1 #ifndef __XPATHPARSER_H__\r
2 #define __XPATHPARSER_H__\r
3 \r
4 /**\r
5 * Phoebe DOM Implementation.\r
6 *\r
7 * This is a C++ approximation of the W3C DOM model, which follows\r
8 * fairly closely the specifications in the various .idl files, copies of\r
9 * which are provided for reference. Most important is this one:\r
10 *\r
11 * http://www.w3.org/TR/2004/REC-DOM-Level-3-Core-20040407/idl-definitions.html\r
12 *\r
13 * Authors:\r
14 * Bob Jamison\r
15 *\r
16 * Copyright (C) 2005 Bob Jamison\r
17 *\r
18 * This library is free software; you can redistribute it and/or\r
19 * modify it under the terms of the GNU Lesser General Public\r
20 * License as published by the Free Software Foundation; either\r
21 * version 2.1 of the License, or (at your option) any later version.\r
22 *\r
23 * This library is distributed in the hope that it will be useful,\r
24 * but WITHOUT ANY WARRANTY; without even the implied warranty of\r
25 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU\r
26 * Lesser General Public License for more details.\r
27 *\r
28 * You should have received a copy of the GNU Lesser General Public\r
29 * License along with this library; if not, write to the Free Software\r
30 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA\r
31 */\r
32 \r
33 \r
34 #include <stdio.h>\r
35 #include <stdarg.h>\r
36 \r
37 #include <string>\r
38 #include <vector>\r
39 \r
40 #include "dom.h"\r
41 #include "xpathtoken.h"\r
42 \r
43 namespace org\r
44 {\r
45 namespace w3c\r
46 {\r
47 namespace dom\r
48 {\r
49 namespace xpath\r
50 {\r
51 \r
52 typedef dom::DOMString DOMString;\r
53 typedef dom::Node Node;\r
54 typedef dom::NodeList NodeList;\r
55 \r
56 \r
57 \r
58 //########################################################################\r
59 //# L E X I C A L D E F I N I T I O N S\r
60 //########################################################################\r
61 \r
62 \r
63 typedef struct\r
64 {\r
65 int ival;\r
66 char *sval;\r
67 } LookupEntry;\r
68 \r
69 \r
70 \r
71 //Note: in the following definitions, where the starts of\r
72 //strings are similar, put the longer definitions first\r
73 \r
74 /**\r
75 *\r
76 */\r
77 typedef enum\r
78 {\r
79 COMMENT,\r
80 TEXT,\r
81 PROCESSING_INSTRUCTION,\r
82 NODE\r
83 } NodeType;\r
84 \r
85 \r
86 static LookupEntry nodeTypeTable [] =\r
87 {\r
88 { COMMENT, "comment" },\r
89 { TEXT, "text" },\r
90 { PROCESSING_INSTRUCTION, "processing-instruction" },\r
91 { NODE, "node" },\r
92 { -1, NULL }\r
93 };\r
94 \r
95 \r
96 /**\r
97 *\r
98 */\r
99 typedef enum\r
100 {\r
101 ANCESTOR_OR_SELF,\r
102 ANCESTOR,\r
103 ATTRIBUTE,\r
104 CHILD,\r
105 DESCENDANT_OR_SELF,\r
106 DESCENDANT,\r
107 FOLLOWING_SIBLING,\r
108 FOLLOWING,\r
109 NAMESPACE,\r
110 PARENT,\r
111 PRECEDING_SIBLING,\r
112 PRECEDING,\r
113 SELF\r
114 } AxisNameType;\r
115 \r
116 \r
117 static LookupEntry axisNameTable [] =\r
118 {\r
119 { ANCESTOR_OR_SELF, "ancestor-or-self" },\r
120 { ANCESTOR, "ancestor" },\r
121 { ATTRIBUTE, "attribute" },\r
122 { CHILD, "child" },\r
123 { DESCENDANT_OR_SELF, "descendant-or-self"},\r
124 { DESCENDANT, "descendant" },\r
125 { FOLLOWING_SIBLING, "following-sibling" },\r
126 { FOLLOWING, "following" },\r
127 { NAMESPACE, "namespace" },\r
128 { PARENT, "parent" },\r
129 { PRECEDING_SIBLING, "preceding-sibling" },\r
130 { PRECEDING, "preceding" },\r
131 { SELF, "self" },\r
132 { -1, NULL }\r
133 };\r
134 \r
135 \r
136 /**\r
137 *\r
138 */\r
139 typedef enum\r
140 {\r
141 NONE = 0,\r
142 CHAR, //default if none of the below\r
143 //Expr tokens\r
144 LPAREN,\r
145 RPAREN,\r
146 LBRACKET,\r
147 RBRACKET,\r
148 DOUBLE_DOT,\r
149 DOT,\r
150 AMPR,\r
151 COMMA,\r
152 DOUBLE_COLON,\r
153 NAME_TEST,\r
154 NODE_TYPE,\r
155 OPERATOR,\r
156 FUNCTION_NAME,\r
157 AXIS_NAME,\r
158 LITERAL,\r
159 NUMBER,\r
160 VARIABLE_REFERENCE,\r
161 //Operator tokens\r
162 AND,\r
163 OR,\r
164 MOD,\r
165 DIV,\r
166 MULTIPLY,\r
167 DOUBLE_SLASH,\r
168 SLASH,\r
169 PIPE,\r
170 PLUS,\r
171 MINUS,\r
172 EQUALS,\r
173 NOT_EQUALS,\r
174 LESS_THAN_EQUALS,\r
175 LESS_THAN,\r
176 GREATER_THAN_EQUALS,\r
177 GREATER_THAN\r
178 } LexTokType;\r
179 \r
180 \r
181 /*\r
182 * Be VERY careful that this table matches the LexicalTokenType enum\r
183 * declaration above.\r
184 */\r
185 static LookupEntry exprTokenTable [] =\r
186 {\r
187 { NONE, "xxNONExx" },\r
188 { CHAR, "CHAR" },\r
189 //Expr tokens\r
190 { LPAREN, "(" },\r
191 { RPAREN, ")" },\r
192 { LBRACKET, "[" },\r
193 { RBRACKET, "]" },\r
194 { DOUBLE_DOT, ".." },\r
195 { DOT, "." },\r
196 { AMPR, "@" },\r
197 { COMMA, "," },\r
198 { DOUBLE_COLON, "::" },\r
199 { NAME_TEST, "NameTest" },\r
200 { NODE_TYPE, "NodeType" },\r
201 { OPERATOR, "Operator" },\r
202 { FUNCTION_NAME, "FunctionName" },\r
203 { AXIS_NAME, "AxisName" },\r
204 { LITERAL, "Literal" },\r
205 { NUMBER, "Number" },\r
206 { VARIABLE_REFERENCE, "VariableReference" },\r
207 { -1, NULL }\r
208 };\r
209 \r
210 static LookupEntry operatorTable [] =\r
211 {\r
212 { NONE, "xxNONExx" },\r
213 //Operator tokens\r
214 { AND, "and" },\r
215 { OR, "or" },\r
216 { MOD, "mod" },\r
217 { DIV, "div" },\r
218 { MULTIPLY, "*" },\r
219 { DOUBLE_SLASH, "//" },\r
220 { SLASH, "/" },\r
221 { PIPE, "|" },\r
222 { PLUS, "+" },\r
223 { MINUS, "-" },\r
224 { EQUALS, "=" },\r
225 { NOT_EQUALS, "!=" },\r
226 { LESS_THAN_EQUALS, "<=" },\r
227 { LESS_THAN, "<" },\r
228 { GREATER_THAN_EQUALS, ">=" },\r
229 { GREATER_THAN, ">" },\r
230 { -1, NULL }\r
231 };\r
232 \r
233 \r
234 /**\r
235 *\r
236 */\r
237 class LexTok\r
238 {\r
239 public:\r
240 LexTok(const LexTok &tok)\r
241 {\r
242 type = tok.type;\r
243 location = tok.location;\r
244 sval = tok.sval;\r
245 dval = tok.dval;\r
246 ival = tok.ival;\r
247 }\r
248 LexTok()\r
249 { init(); }\r
250 LexTok(int theType, int loc)\r
251 { init(); type = theType; location = loc;}\r
252 LexTok(int theType, int loc, const DOMString &val)\r
253 { init(); type = theType; location = loc; sval = val; }\r
254 LexTok(int theType, int loc, double val)\r
255 { init(); type = theType; location = loc; dval = val; }\r
256 LexTok(int theType, int loc, long val)\r
257 { init(); type = theType; location = loc; ival = val; }\r
258 \r
259 void print()\r
260 {\r
261 if (type == OPERATOR)\r
262 {\r
263 char *tokenStr = "unknown";\r
264 for (LookupEntry *entry = operatorTable; entry->sval ; entry++)\r
265 {\r
266 if (entry->ival == ival)\r
267 {\r
268 tokenStr = entry->sval;\r
269 break;\r
270 }\r
271 }\r
272 printf("(%s)\n", tokenStr);\r
273 }\r
274 else if (type == NODE_TYPE)\r
275 {\r
276 char *tokenStr = "unknown";\r
277 for (LookupEntry *entry = nodeTypeTable; entry->sval ; entry++)\r
278 {\r
279 if (entry->ival == ival)\r
280 {\r
281 tokenStr = entry->sval;\r
282 break;\r
283 }\r
284 }\r
285 printf("{{%s}}\n", tokenStr);\r
286 }\r
287 else if (type == AXIS_NAME)\r
288 {\r
289 char *tokenStr = "unknown";\r
290 for (LookupEntry *entry = axisNameTable; entry->sval ; entry++)\r
291 {\r
292 if (entry->ival == ival)\r
293 {\r
294 tokenStr = entry->sval;\r
295 break;\r
296 }\r
297 }\r
298 printf("{%s}\n", tokenStr);\r
299 }\r
300 else if (type == CHAR)\r
301 printf("'%c'\n", (char)ival);\r
302 else if (type == NAME_TEST)\r
303 printf("\"%s\"\n", sval.c_str());\r
304 else if (type == LITERAL)\r
305 printf("L'%s'\n", sval.c_str());\r
306 else if (type == FUNCTION_NAME)\r
307 printf("%s()\n", sval.c_str());\r
308 else if (type == NUMBER)\r
309 printf("#%f\n", dval);\r
310 else\r
311 {\r
312 char *tokenStr = "unknown";\r
313 for (LookupEntry *entry = exprTokenTable; entry->sval ; entry++)\r
314 {\r
315 if (entry->ival == type)\r
316 {\r
317 tokenStr = entry->sval;\r
318 break;\r
319 }\r
320 }\r
321 printf("%s\n", tokenStr);\r
322 //printf("%s [%s/%f/%ld]\n", tokenStr, sval.c_str(), dval, ival);\r
323 }\r
324 }\r
325 \r
326 int getType()\r
327 { return type; }\r
328 int getLocation()\r
329 { return location; }\r
330 DOMString &getStringValue()\r
331 { return sval; }\r
332 double getDoubleValue()\r
333 { return dval; }\r
334 long getIntValue()\r
335 { return ival; }\r
336 \r
337 private:\r
338 void init()\r
339 {\r
340 type = NONE;\r
341 location = 0;\r
342 dval = 0.0;\r
343 ival = 0;\r
344 }\r
345 \r
346 int type;\r
347 int location;\r
348 DOMString sval;\r
349 double dval;\r
350 long ival;\r
351 };\r
352 \r
353 \r
354 \r
355 \r
356 \r
357 //########################################################################\r
358 //# P A R S E R\r
359 //########################################################################\r
360 \r
361 class XPathParser\r
362 {\r
363 public:\r
364 \r
365 //#################################\r
366 //# CONSTRUCTOR\r
367 //#################################\r
368 \r
369 /**\r
370 *\r
371 */\r
372 XPathParser()\r
373 {\r
374 debug = false;\r
375 }\r
376 \r
377 /**\r
378 *\r
379 */\r
380 ~XPathParser() {}\r
381 \r
382 /**\r
383 *\r
384 */\r
385 bool getDebug()\r
386 { return debug; }\r
387 \r
388 /**\r
389 *\r
390 */\r
391 void setDebug(bool val)\r
392 { debug = val; }\r
393 \r
394 \r
395 \r
396 /**\r
397 * Normally not called directly unless for string parsing testing\r
398 */\r
399 bool parse(const DOMString &str);\r
400 \r
401 /**\r
402 * This is the big one. Called by the xpath-dom api to fetch\r
403 * nodes from a DOM tree.\r
404 */\r
405 NodeList evaluate(const Node *root, const DOMString &str);\r
406 \r
407 \r
408 \r
409 private:\r
410 \r
411 //#################################\r
412 //# MESSAGES\r
413 //#################################\r
414 \r
415 /**\r
416 *\r
417 */\r
418 void trace(const char *fmt, ...);\r
419 \r
420 /**\r
421 *\r
422 */\r
423 void traceStack(const char *name, int pos, int depth);\r
424 \r
425 /**\r
426 *\r
427 */\r
428 void error(const char *fmt, ...);\r
429 \r
430 //#################################\r
431 //# LEXICAL SCANNING\r
432 //#################################\r
433 \r
434 /**\r
435 * Add a lexical token of a given type to the list\r
436 */\r
437 void lexTokAdd(int type, int loc);\r
438 void lexTokAdd(int type, int loc, const DOMString &val);\r
439 void lexTokAdd(int type, int loc, double val);\r
440 void lexTokAdd(int type, int loc, long val);\r
441 \r
442 /**\r
443 *\r
444 */\r
445 void lexicalTokenDump();\r
446 \r
447 /**\r
448 *\r
449 */\r
450 LexTok lexTok(int p);\r
451 \r
452 /**\r
453 *\r
454 */\r
455 int lexTokType(int p);\r
456 \r
457 /**\r
458 *\r
459 */\r
460 int peek(int p);\r
461 \r
462 /**\r
463 *\r
464 */\r
465 int get(int p);\r
466 \r
467 /**\r
468 *\r
469 */\r
470 int getword(int p, DOMString &str);\r
471 \r
472 /**\r
473 *\r
474 */\r
475 int match(int p, const char *str);\r
476 \r
477 /**\r
478 *\r
479 */\r
480 int skipwhite(int p);\r
481 \r
482 /**\r
483 *\r
484 */\r
485 int getNumber(int p, double &dresult);\r
486 \r
487 /**\r
488 *\r
489 */\r
490 int getLiteral(int p, DOMString &result);\r
491 \r
492 /**\r
493 *\r
494 */\r
495 int getNameTest(int p0, DOMString &result);\r
496 \r
497 /**\r
498 *\r
499 */\r
500 int getNCName(int p0, DOMString &result);\r
501 \r
502 \r
503 \r
504 \r
505 /**\r
506 *\r
507 */\r
508 int lexicalScan();\r
509 \r
510 \r
511 //#################################\r
512 //# GRAMMAR PARSING\r
513 //#################################\r
514 \r
515 /**\r
516 * The grammar definitions marked [1]-[39] are directly\r
517 * from the W3C XPath grammar spacification.\r
518 */\r
519 \r
520 /**\r
521 * [1]\r
522 */\r
523 int getLocationPath(int p0, int depth);\r
524 \r
525 /**\r
526 * [2]\r
527 */\r
528 int getAbsoluteLocationPath(int p0, int depth);\r
529 \r
530 /**\r
531 * [3]\r
532 */\r
533 int getRelativeLocationPath(int p0, int depth);\r
534 \r
535 /**\r
536 * [4]\r
537 */\r
538 int getStep(int p0, int depth);\r
539 \r
540 /**\r
541 * [5]\r
542 */\r
543 int getAxisSpecifier(int p0, int depth);\r
544 \r
545 /**\r
546 * [6]\r
547 */\r
548 int getAxisName(int p0, int depth);\r
549 \r
550 /**\r
551 * [7]\r
552 */\r
553 int getNodeTest(int p0, int depth);\r
554 \r
555 /**\r
556 * [8]\r
557 */\r
558 int getPredicate(int p0, int depth);\r
559 \r
560 /**\r
561 * [9]\r
562 */\r
563 int getPredicateExpr(int p0, int depth);\r
564 \r
565 /**\r
566 * [10]\r
567 */\r
568 int getAbbreviatedAbsoluteLocationPath(int p0, int depth);\r
569 /**\r
570 * [11]\r
571 */\r
572 int getAbbreviatedRelativeLocationPath(int p0, int depth);\r
573 /**\r
574 * [12]\r
575 */\r
576 int getAbbreviatedStep(int p0, int depth);\r
577 \r
578 /**\r
579 * [13]\r
580 */\r
581 int getAbbreviatedAxisSpecifier(int p0, int depth);\r
582 \r
583 /**\r
584 * [14]\r
585 */\r
586 int getExpr(int p0, int depth);\r
587 \r
588 /**\r
589 * [15]\r
590 */\r
591 int getPrimaryExpr(int p0, int depth);\r
592 \r
593 /**\r
594 * [16]\r
595 */\r
596 int getFunctionCall(int p0, int depth);\r
597 \r
598 /**\r
599 * [17]\r
600 */\r
601 int getArgument(int p0, int depth);\r
602 \r
603 /**\r
604 * [18]\r
605 */\r
606 int getUnionExpr(int p0, int depth);\r
607 \r
608 /**\r
609 * [19]\r
610 */\r
611 int getPathExpr(int p0, int depth);\r
612 \r
613 /**\r
614 * [20]\r
615 */\r
616 int getFilterExpr(int p0, int depth);\r
617 \r
618 /**\r
619 * [21]\r
620 */\r
621 int getOrExpr(int p0, int depth);\r
622 \r
623 /**\r
624 * [22]\r
625 */\r
626 int getAndExpr(int p0, int depth);\r
627 \r
628 /**\r
629 * [23]\r
630 */\r
631 int getEqualityExpr(int p0, int depth);\r
632 \r
633 /**\r
634 * [24]\r
635 */\r
636 int getRelationalExpr(int p0, int depth);\r
637 \r
638 /**\r
639 * [25]\r
640 */\r
641 int getAdditiveExpr(int p0, int depth);\r
642 \r
643 /**\r
644 * [26]\r
645 */\r
646 int getMultiplicativeExpr(int p0, int depth);\r
647 \r
648 /**\r
649 * [27]\r
650 */\r
651 int getUnaryExpr(int p0, int depth);\r
652 \r
653 /**\r
654 * [28]\r
655 */\r
656 int getExprToken(int p0, int depth);\r
657 \r
658 /**\r
659 * [29]\r
660 */\r
661 int getLiteral(int p0, int depth);\r
662 \r
663 /**\r
664 * [30]\r
665 */\r
666 int getNumber(int p0, int depth);\r
667 \r
668 /**\r
669 * [31]\r
670 */\r
671 int getDigits(int p0, int depth);\r
672 \r
673 /**\r
674 * [32]\r
675 */\r
676 int getOperator(int p0, int depth);\r
677 \r
678 /**\r
679 * [33]\r
680 */\r
681 int getOperatorName(int p0, int depth);\r
682 \r
683 /**\r
684 * [34]\r
685 */\r
686 int getMultiplyOperator(int p0, int depth);\r
687 \r
688 /**\r
689 * [35]\r
690 */\r
691 int getFunctionName(int p0, int depth);\r
692 \r
693 /**\r
694 * [36]\r
695 */\r
696 int getVariableReference(int p0, int depth);\r
697 \r
698 /**\r
699 * [37]\r
700 */\r
701 int getNameTest(int p0, int depth);\r
702 \r
703 /**\r
704 * [38]\r
705 */\r
706 int getNodeType(int p0, int depth);\r
707 \r
708 /**\r
709 * [39]\r
710 */\r
711 int getExprWhitespace(int p0, int depth);\r
712 \r
713 \r
714 \r
715 //#################################\r
716 //# DATA ITEMS\r
717 //#################################\r
718 \r
719 /**\r
720 *\r
721 */\r
722 bool debug;\r
723 \r
724 /**\r
725 *\r
726 */\r
727 char *parsebuf;\r
728 \r
729 /**\r
730 *\r
731 */\r
732 int parselen;\r
733 \r
734 /**\r
735 *\r
736 */\r
737 int position;\r
738 \r
739 /**\r
740 *\r
741 */\r
742 DOMString numberString;\r
743 \r
744 /**\r
745 *\r
746 */\r
747 double number;\r
748 \r
749 \r
750 /**\r
751 * The result of the first lexical scan\r
752 */\r
753 std::vector<LexTok> lexicalTokens;\r
754 \r
755 /**\r
756 * The result of parsing. If parsing was successful, then\r
757 * this is executable via execute()\r
758 */\r
759 TokenList tokens;\r
760 \r
761 \r
762 \r
763 \r
764 };\r
765 \r
766 \r
767 \r
768 \r
769 \r
770 \r
771 } // namespace xpath\r
772 } // namespace dom\r
773 } // namespace w3c\r
774 } // namespace org\r
775 #endif /* __XPATHPARSER_H__ */\r
776 //#########################################################################\r
777 //# E N D O F F I L E\r
778 //#########################################################################\r
779 \r
780 \r
781 \r
782 \r
783 \r
784 \r
785 \r
786 \r