1 #ifndef __XPATHPARSER_H__
2 #define __XPATHPARSER_H__
4 /**
5 * Phoebe DOM Implementation.
6 *
7 * This is a C++ approximation of the W3C DOM model, which follows
8 * fairly closely the specifications in the various .idl files, copies of
9 * which are provided for reference. Most important is this one:
10 *
11 * http://www.w3.org/TR/2004/REC-DOM-Level-3-Core-20040407/idl-definitions.html
12 *
13 * Authors:
14 * Bob Jamison
15 *
16 * Copyright (C) 2005 Bob Jamison
17 *
18 * This library is free software; you can redistribute it and/or
19 * modify it under the terms of the GNU Lesser General Public
20 * License as published by the Free Software Foundation; either
21 * version 2.1 of the License, or (at your option) any later version.
22 *
23 * This library is distributed in the hope that it will be useful,
24 * but WITHOUT ANY WARRANTY; without even the implied warranty of
25 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
26 * Lesser General Public License for more details.
27 *
28 * You should have received a copy of the GNU Lesser General Public
29 * License along with this library; if not, write to the Free Software
30 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
31 */
34 #include <stdio.h>
35 #include <stdarg.h>
37 #include <string>
38 #include <vector>
40 #include "dom.h"
41 #include "xpathtoken.h"
43 namespace org
44 {
45 namespace w3c
46 {
47 namespace dom
48 {
49 namespace xpath
50 {
52 typedef dom::DOMString DOMString;
53 typedef dom::Node Node;
54 typedef dom::NodeList NodeList;
58 //########################################################################
59 //# L E X I C A L D E F I N I T I O N S
60 //########################################################################
63 typedef struct
64 {
65 int ival;
66 char *sval;
67 } LookupEntry;
71 //Note: in the following definitions, where the starts of
72 //strings are similar, put the longer definitions first
74 /**
75 *
76 */
77 typedef enum
78 {
79 COMMENT,
80 TEXT,
81 PROCESSING_INSTRUCTION,
82 NODE
83 } NodeType;
86 static LookupEntry nodeTypeTable [] =
87 {
88 { COMMENT, "comment" },
89 { TEXT, "text" },
90 { PROCESSING_INSTRUCTION, "processing-instruction" },
91 { NODE, "node" },
92 { -1, NULL }
93 };
96 /**
97 *
98 */
99 typedef enum
100 {
101 ANCESTOR_OR_SELF,
102 ANCESTOR,
103 ATTRIBUTE,
104 CHILD,
105 DESCENDANT_OR_SELF,
106 DESCENDANT,
107 FOLLOWING_SIBLING,
108 FOLLOWING,
109 NAMESPACE,
110 PARENT,
111 PRECEDING_SIBLING,
112 PRECEDING,
113 SELF
114 } AxisNameType;
117 static LookupEntry axisNameTable [] =
118 {
119 { ANCESTOR_OR_SELF, "ancestor-or-self" },
120 { ANCESTOR, "ancestor" },
121 { ATTRIBUTE, "attribute" },
122 { CHILD, "child" },
123 { DESCENDANT_OR_SELF, "descendant-or-self"},
124 { DESCENDANT, "descendant" },
125 { FOLLOWING_SIBLING, "following-sibling" },
126 { FOLLOWING, "following" },
127 { NAMESPACE, "namespace" },
128 { PARENT, "parent" },
129 { PRECEDING_SIBLING, "preceding-sibling" },
130 { PRECEDING, "preceding" },
131 { SELF, "self" },
132 { -1, NULL }
133 };
136 /**
137 *
138 */
139 typedef enum
140 {
141 NONE = 0,
142 CHAR, //default if none of the below
143 //Expr tokens
144 LPAREN,
145 RPAREN,
146 LBRACKET,
147 RBRACKET,
148 DOUBLE_DOT,
149 DOT,
150 AMPR,
151 COMMA,
152 DOUBLE_COLON,
153 NAME_TEST,
154 NODE_TYPE,
155 OPERATOR,
156 FUNCTION_NAME,
157 AXIS_NAME,
158 LITERAL,
159 NUMBER,
160 VARIABLE_REFERENCE,
161 //Operator tokens
162 AND,
163 OR,
164 MOD,
165 DIV,
166 MULTIPLY,
167 DOUBLE_SLASH,
168 SLASH,
169 PIPE,
170 PLUS,
171 MINUS,
172 EQUALS,
173 NOT_EQUALS,
174 LESS_THAN_EQUALS,
175 LESS_THAN,
176 GREATER_THAN_EQUALS,
177 GREATER_THAN
178 } LexTokType;
181 /*
182 * Be VERY careful that this table matches the LexicalTokenType enum
183 * declaration above.
184 */
185 static LookupEntry exprTokenTable [] =
186 {
187 { NONE, "xxNONExx" },
188 { CHAR, "CHAR" },
189 //Expr tokens
190 { LPAREN, "(" },
191 { RPAREN, ")" },
192 { LBRACKET, "[" },
193 { RBRACKET, "]" },
194 { DOUBLE_DOT, ".." },
195 { DOT, "." },
196 { AMPR, "@" },
197 { COMMA, "," },
198 { DOUBLE_COLON, "::" },
199 { NAME_TEST, "NameTest" },
200 { NODE_TYPE, "NodeType" },
201 { OPERATOR, "Operator" },
202 { FUNCTION_NAME, "FunctionName" },
203 { AXIS_NAME, "AxisName" },
204 { LITERAL, "Literal" },
205 { NUMBER, "Number" },
206 { VARIABLE_REFERENCE, "VariableReference" },
207 { -1, NULL }
208 };
210 static LookupEntry operatorTable [] =
211 {
212 { NONE, "xxNONExx" },
213 //Operator tokens
214 { AND, "and" },
215 { OR, "or" },
216 { MOD, "mod" },
217 { DIV, "div" },
218 { MULTIPLY, "*" },
219 { DOUBLE_SLASH, "//" },
220 { SLASH, "/" },
221 { PIPE, "|" },
222 { PLUS, "+" },
223 { MINUS, "-" },
224 { EQUALS, "=" },
225 { NOT_EQUALS, "!=" },
226 { LESS_THAN_EQUALS, "<=" },
227 { LESS_THAN, "<" },
228 { GREATER_THAN_EQUALS, ">=" },
229 { GREATER_THAN, ">" },
230 { -1, NULL }
231 };
234 /**
235 *
236 */
237 class LexTok
238 {
239 public:
240 LexTok(const LexTok &tok)
241 {
242 type = tok.type;
243 location = tok.location;
244 sval = tok.sval;
245 dval = tok.dval;
246 ival = tok.ival;
247 }
248 LexTok()
249 { init(); }
250 LexTok(int theType, int loc)
251 { init(); type = theType; location = loc;}
252 LexTok(int theType, int loc, const DOMString &val)
253 { init(); type = theType; location = loc; sval = val; }
254 LexTok(int theType, int loc, double val)
255 { init(); type = theType; location = loc; dval = val; }
256 LexTok(int theType, int loc, long val)
257 { init(); type = theType; location = loc; ival = val; }
259 void print()
260 {
261 if (type == OPERATOR)
262 {
263 char *tokenStr = "unknown";
264 for (LookupEntry *entry = operatorTable; entry->sval ; entry++)
265 {
266 if (entry->ival == ival)
267 {
268 tokenStr = entry->sval;
269 break;
270 }
271 }
272 printf("(%s)\n", tokenStr);
273 }
274 else if (type == NODE_TYPE)
275 {
276 char *tokenStr = "unknown";
277 for (LookupEntry *entry = nodeTypeTable; entry->sval ; entry++)
278 {
279 if (entry->ival == ival)
280 {
281 tokenStr = entry->sval;
282 break;
283 }
284 }
285 printf("{{%s}}\n", tokenStr);
286 }
287 else if (type == AXIS_NAME)
288 {
289 char *tokenStr = "unknown";
290 for (LookupEntry *entry = axisNameTable; entry->sval ; entry++)
291 {
292 if (entry->ival == ival)
293 {
294 tokenStr = entry->sval;
295 break;
296 }
297 }
298 printf("{%s}\n", tokenStr);
299 }
300 else if (type == CHAR)
301 printf("'%c'\n", (char)ival);
302 else if (type == NAME_TEST)
303 printf("\"%s\"\n", sval.c_str());
304 else if (type == LITERAL)
305 printf("L'%s'\n", sval.c_str());
306 else if (type == FUNCTION_NAME)
307 printf("%s()\n", sval.c_str());
308 else if (type == NUMBER)
309 printf("#%f\n", dval);
310 else
311 {
312 char *tokenStr = "unknown";
313 for (LookupEntry *entry = exprTokenTable; entry->sval ; entry++)
314 {
315 if (entry->ival == type)
316 {
317 tokenStr = entry->sval;
318 break;
319 }
320 }
321 printf("%s\n", tokenStr);
322 //printf("%s [%s/%f/%ld]\n", tokenStr, sval.c_str(), dval, ival);
323 }
324 }
326 int getType()
327 { return type; }
328 int getLocation()
329 { return location; }
330 DOMString &getStringValue()
331 { return sval; }
332 double getDoubleValue()
333 { return dval; }
334 long getIntValue()
335 { return ival; }
337 private:
338 void init()
339 {
340 type = NONE;
341 location = 0;
342 dval = 0.0;
343 ival = 0;
344 }
346 int type;
347 int location;
348 DOMString sval;
349 double dval;
350 long ival;
351 };
357 //########################################################################
358 //# P A R S E R
359 //########################################################################
361 class XPathParser
362 {
363 public:
365 //#################################
366 //# CONSTRUCTOR
367 //#################################
369 /**
370 *
371 */
372 XPathParser()
373 {
374 debug = false;
375 }
377 /**
378 *
379 */
380 ~XPathParser() {}
382 /**
383 *
384 */
385 bool getDebug()
386 { return debug; }
388 /**
389 *
390 */
391 void setDebug(bool val)
392 { debug = val; }
396 /**
397 * Normally not called directly unless for string parsing testing
398 */
399 bool parse(const DOMString &str);
401 /**
402 * This is the big one. Called by the xpath-dom api to fetch
403 * nodes from a DOM tree.
404 */
405 NodeList evaluate(const NodePtr root, const DOMString &str);
409 private:
411 //#################################
412 //# MESSAGES
413 //#################################
415 /**
416 *
417 */
418 void trace(const char *fmt, ...);
420 /**
421 *
422 */
423 void traceStack(const char *name, int pos, int depth);
425 /**
426 *
427 */
428 void error(const char *fmt, ...);
430 //#################################
431 //# LEXICAL SCANNING
432 //#################################
434 /**
435 * Add a lexical token of a given type to the list
436 */
437 void lexTokAdd(int type, int loc);
438 void lexTokAdd(int type, int loc, const DOMString &val);
439 void lexTokAdd(int type, int loc, double val);
440 void lexTokAdd(int type, int loc, long val);
442 /**
443 *
444 */
445 void lexicalTokenDump();
447 /**
448 *
449 */
450 LexTok lexTok(int p);
452 /**
453 *
454 */
455 int lexTokType(int p);
457 /**
458 *
459 */
460 int peek(int p);
462 /**
463 *
464 */
465 int get(int p);
467 /**
468 *
469 */
470 int getword(int p, DOMString &str);
472 /**
473 *
474 */
475 int match(int p, const char *str);
477 /**
478 *
479 */
480 int skipwhite(int p);
482 /**
483 *
484 */
485 int getNumber(int p, double &dresult);
487 /**
488 *
489 */
490 int getLiteral(int p, DOMString &result);
492 /**
493 *
494 */
495 int getNameTest(int p0, DOMString &result);
497 /**
498 *
499 */
500 int getNCName(int p0, DOMString &result);
505 /**
506 *
507 */
508 int lexicalScan();
511 //#################################
512 //# GRAMMAR PARSING
513 //#################################
515 /**
516 * Add a newly derived token to the token list;
517 */
518 void tokAdd(const Token &token);
520 void tokAdd(int type);
522 void tokAdd(int type, long val);
524 void tokAdd(int type, double val);
526 void tokAdd(int type, const DOMString &val);
529 /**
530 * The grammar definitions marked [1]-[39] are directly
531 * from the W3C XPath grammar spacification.
532 */
534 /**
535 * [1]
536 */
537 int getLocationPath(int p0, int depth);
539 /**
540 * [2]
541 */
542 int getAbsoluteLocationPath(int p0, int depth);
544 /**
545 * [3]
546 */
547 int getRelativeLocationPath(int p0, int depth);
549 /**
550 * [4]
551 */
552 int getStep(int p0, int depth);
554 /**
555 * [5]
556 */
557 int getAxisSpecifier(int p0, int depth);
559 /**
560 * [6]
561 */
562 int getAxisName(int p0, int depth);
564 /**
565 * [7]
566 */
567 int getNodeTest(int p0, int depth);
569 /**
570 * [8]
571 */
572 int getPredicate(int p0, int depth);
574 /**
575 * [9]
576 */
577 int getPredicateExpr(int p0, int depth);
579 /**
580 * [10]
581 */
582 int getAbbreviatedAbsoluteLocationPath(int p0, int depth);
583 /**
584 * [11]
585 */
586 int getAbbreviatedRelativeLocationPath(int p0, int depth);
587 /**
588 * [12]
589 */
590 int getAbbreviatedStep(int p0, int depth);
592 /**
593 * [13]
594 */
595 int getAbbreviatedAxisSpecifier(int p0, int depth);
597 /**
598 * [14]
599 */
600 int getExpr(int p0, int depth);
602 /**
603 * [15]
604 */
605 int getPrimaryExpr(int p0, int depth);
607 /**
608 * [16]
609 */
610 int getFunctionCall(int p0, int depth);
612 /**
613 * [17]
614 */
615 int getArgument(int p0, int depth);
617 /**
618 * [18]
619 */
620 int getUnionExpr(int p0, int depth);
622 /**
623 * [19]
624 */
625 int getPathExpr(int p0, int depth);
627 /**
628 * [20]
629 */
630 int getFilterExpr(int p0, int depth);
632 /**
633 * [21]
634 */
635 int getOrExpr(int p0, int depth);
637 /**
638 * [22]
639 */
640 int getAndExpr(int p0, int depth);
642 /**
643 * [23]
644 */
645 int getEqualityExpr(int p0, int depth);
647 /**
648 * [24]
649 */
650 int getRelationalExpr(int p0, int depth);
652 /**
653 * [25]
654 */
655 int getAdditiveExpr(int p0, int depth);
657 /**
658 * [26]
659 */
660 int getMultiplicativeExpr(int p0, int depth);
662 /**
663 * [27]
664 */
665 int getUnaryExpr(int p0, int depth);
667 /**
668 * [28]
669 */
670 int getExprToken(int p0, int depth);
672 /**
673 * [29]
674 */
675 int getLiteral(int p0, int depth);
677 /**
678 * [30]
679 */
680 int getNumber(int p0, int depth);
682 /**
683 * [31]
684 */
685 int getDigits(int p0, int depth);
687 /**
688 * [32]
689 */
690 int getOperator(int p0, int depth);
692 /**
693 * [33]
694 */
695 int getOperatorName(int p0, int depth);
697 /**
698 * [34]
699 */
700 int getMultiplyOperator(int p0, int depth);
702 /**
703 * [35]
704 */
705 int getFunctionName(int p0, int depth);
707 /**
708 * [36]
709 */
710 int getVariableReference(int p0, int depth);
712 /**
713 * [37]
714 */
715 int getNameTest(int p0, int depth);
717 /**
718 * [38]
719 */
720 int getNodeType(int p0, int depth);
722 /**
723 * [39]
724 */
725 int getExprWhitespace(int p0, int depth);
729 //#################################
730 //# DATA ITEMS
731 //#################################
733 /**
734 *
735 */
736 bool debug;
738 /**
739 *
740 */
741 char *parsebuf;
743 /**
744 *
745 */
746 int parselen;
748 /**
749 *
750 */
751 int position;
753 /**
754 *
755 */
756 DOMString numberString;
758 /**
759 *
760 */
761 double number;
764 /**
765 * The result of the first lexical scan
766 */
767 std::vector<LexTok> lexicalTokens;
769 /**
770 * The result of parsing. If parsing was successful, then
771 * this is executable via execute()
772 */
773 TokenList tokens;
778 };
785 } // namespace xpath
786 } // namespace dom
787 } // namespace w3c
788 } // namespace org
789 #endif /* __XPATHPARSER_H__ */
790 //#########################################################################
791 //# E N D O F F I L E
792 //#########################################################################