1 #ifndef __XPATHPARSER_H__
2 #define __XPATHPARSER_H__
4 /**
5 * Phoebe DOM Implementation.
6 *
7 * This is a C++ approximation of the W3C DOM model, which follows
8 * fairly closely the specifications in the various .idl files, copies of
9 * which are provided for reference. Most important is this one:
10 *
11 * http://www.w3.org/TR/2004/REC-DOM-Level-3-Core-20040407/idl-definitions.html
12 *
13 * Authors:
14 * Bob Jamison
15 *
16 * Copyright (C) 2005 Bob Jamison
17 *
18 * This library is free software; you can redistribute it and/or
19 * modify it under the terms of the GNU Lesser General Public
20 * License as published by the Free Software Foundation; either
21 * version 2.1 of the License, or (at your option) any later version.
22 *
23 * This library is distributed in the hope that it will be useful,
24 * but WITHOUT ANY WARRANTY; without even the implied warranty of
25 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
26 * Lesser General Public License for more details.
27 *
28 * You should have received a copy of the GNU Lesser General Public
29 * License along with this library; if not, write to the Free Software
30 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
31 */
34 #include <stdio.h>
35 #include <stdarg.h>
37 #include <string>
38 #include <vector>
40 #include "dom.h"
42 namespace org
43 {
44 namespace w3c
45 {
46 namespace dom
47 {
48 namespace xpath
49 {
51 typedef dom::DOMString DOMString;
52 typedef dom::Node Node;
53 typedef dom::NodeList NodeList;
57 //########################################################################
58 //# L E X I C A L D E F I N I T I O N S
59 //########################################################################
62 typedef struct
63 {
64 int ival;
65 char *sval;
66 } LookupEntry;
70 //Note: in the following definitions, where the starts of
71 //strings are similar, put the longer definitions first
73 /**
74 *
75 */
76 typedef enum
77 {
78 COMMENT,
79 TEXT,
80 PROCESSING_INSTRUCTION,
81 NODE
82 } NodeType;
85 static LookupEntry nodeTypeTable [] =
86 {
87 { COMMENT, "comment" },
88 { TEXT, "text" },
89 { PROCESSING_INSTRUCTION, "processing-instruction" },
90 { NODE, "node" },
91 { -1, NULL }
92 };
95 /**
96 *
97 */
98 typedef enum
99 {
100 ANCESTOR_OR_SELF,
101 ANCESTOR,
102 ATTRIBUTE,
103 CHILD,
104 DESCENDANT_OR_SELF,
105 DESCENDANT,
106 FOLLOWING_SIBLING,
107 FOLLOWING,
108 NAMESPACE,
109 PARENT,
110 PRECEDING_SIBLING,
111 PRECEDING,
112 SELF
113 } AxisNameType;
116 static LookupEntry axisNameTable [] =
117 {
118 { ANCESTOR_OR_SELF, "ancestor-or-self" },
119 { ANCESTOR, "ancestor" },
120 { ATTRIBUTE, "attribute" },
121 { CHILD, "child" },
122 { DESCENDANT_OR_SELF, "descendant-or-self"},
123 { DESCENDANT, "descendant" },
124 { FOLLOWING_SIBLING, "following-sibling" },
125 { FOLLOWING, "following" },
126 { NAMESPACE, "namespace" },
127 { PARENT, "parent" },
128 { PRECEDING_SIBLING, "preceding-sibling" },
129 { PRECEDING, "preceding" },
130 { SELF, "self" },
131 { -1, NULL }
132 };
135 /**
136 *
137 */
138 typedef enum
139 {
140 NONE = 0,
141 CHAR, //default if none of the below
142 //Expr tokens
143 LPAREN,
144 RPAREN,
145 LBRACKET,
146 RBRACKET,
147 DOUBLE_DOT,
148 DOT,
149 AMPR,
150 COMMA,
151 DOUBLE_COLON,
152 NAME_TEST,
153 NODE_TYPE,
154 OPERATOR,
155 FUNCTION_NAME,
156 AXIS_NAME,
157 LITERAL,
158 NUMBER,
159 VARIABLE_REFERENCE,
160 //Operator tokens
161 AND,
162 OR,
163 MOD,
164 DIV,
165 MULTIPLY,
166 DOUBLE_SLASH,
167 SLASH,
168 PIPE,
169 PLUS,
170 MINUS,
171 EQUALS,
172 NOT_EQUALS,
173 LESS_THAN_EQUALS,
174 LESS_THAN,
175 GREATER_THAN_EQUALS,
176 GREATER_THAN
177 } LexTokType;
180 /*
181 * Be VERY careful that this table matches the LexicalTokenType enum
182 * declaration above.
183 */
184 static LookupEntry exprTokenTable [] =
185 {
186 { NONE, "xxNONExx" },
187 { CHAR, "CHAR" },
188 //Expr tokens
189 { LPAREN, "(" },
190 { RPAREN, ")" },
191 { LBRACKET, "[" },
192 { RBRACKET, "]" },
193 { DOUBLE_DOT, ".." },
194 { DOT, "." },
195 { AMPR, "@" },
196 { COMMA, "," },
197 { DOUBLE_COLON, "::" },
198 { NAME_TEST, "NameTest" },
199 { NODE_TYPE, "NodeType" },
200 { OPERATOR, "Operator" },
201 { FUNCTION_NAME, "FunctionName" },
202 { AXIS_NAME, "AxisName" },
203 { LITERAL, "Literal" },
204 { NUMBER, "Number" },
205 { VARIABLE_REFERENCE, "VariableReference" },
206 { -1, NULL }
207 };
209 static LookupEntry operatorTable [] =
210 {
211 { NONE, "xxNONExx" },
212 //Operator tokens
213 { AND, "and" },
214 { OR, "or" },
215 { MOD, "mod" },
216 { DIV, "div" },
217 { MULTIPLY, "*" },
218 { DOUBLE_SLASH, "//" },
219 { SLASH, "/" },
220 { PIPE, "|" },
221 { PLUS, "+" },
222 { MINUS, "-" },
223 { EQUALS, "=" },
224 { NOT_EQUALS, "!=" },
225 { LESS_THAN_EQUALS, "<=" },
226 { LESS_THAN, "<" },
227 { GREATER_THAN_EQUALS, ">=" },
228 { GREATER_THAN, ">" },
229 { -1, NULL }
230 };
233 /**
234 *
235 */
236 class LexTok
237 {
238 public:
239 LexTok(const LexTok &tok)
240 {
241 type = tok.type;
242 location = tok.location;
243 sval = tok.sval;
244 dval = tok.dval;
245 ival = tok.ival;
246 }
247 LexTok(int theType, int loc)
248 { init(); type = theType; location = loc;}
249 LexTok()
250 { init(); }
251 LexTok(int theType, int loc, const DOMString &val)
252 { init(); type = theType; location = loc; sval = val; }
253 LexTok(int theType, int loc, double val)
254 { init(); type = theType; location = loc; dval = val; }
255 LexTok(int theType, int loc, long val)
256 { init(); type = theType; location = loc; ival = val; }
258 void print()
259 {
260 if (type == OPERATOR)
261 {
262 char *tokenStr = "unknown";
263 for (LookupEntry *entry = operatorTable; entry->sval ; entry++)
264 {
265 if (entry->ival == ival)
266 {
267 tokenStr = entry->sval;
268 break;
269 }
270 }
271 printf("(%s)\n", tokenStr);
272 }
273 else if (type == NODE_TYPE)
274 {
275 char *tokenStr = "unknown";
276 for (LookupEntry *entry = nodeTypeTable; entry->sval ; entry++)
277 {
278 if (entry->ival == ival)
279 {
280 tokenStr = entry->sval;
281 break;
282 }
283 }
284 printf("{{%s}}\n", tokenStr);
285 }
286 else if (type == AXIS_NAME)
287 {
288 char *tokenStr = "unknown";
289 for (LookupEntry *entry = axisNameTable; entry->sval ; entry++)
290 {
291 if (entry->ival == ival)
292 {
293 tokenStr = entry->sval;
294 break;
295 }
296 }
297 printf("{%s}\n", tokenStr);
298 }
299 else if (type == CHAR)
300 printf("'%c'\n", (char)ival);
301 else if (type == NAME_TEST)
302 printf("\"%s\"\n", sval.c_str());
303 else if (type == LITERAL)
304 printf("L'%s'\n", sval.c_str());
305 else if (type == FUNCTION_NAME)
306 printf("%s()\n", sval.c_str());
307 else if (type == NUMBER)
308 printf("#%f\n", dval);
309 else
310 {
311 char *tokenStr = "unknown";
312 for (LookupEntry *entry = exprTokenTable; entry->sval ; entry++)
313 {
314 if (entry->ival == type)
315 {
316 tokenStr = entry->sval;
317 break;
318 }
319 }
320 printf("%s\n", tokenStr);
321 //printf("%s [%s/%f/%ld]\n", tokenStr, sval.c_str(), dval, ival);
322 }
323 }
325 int getType()
326 { return type; }
327 int getLocation()
328 { return location; }
329 DOMString &getStringValue()
330 { return sval; }
331 double getDoubleValue()
332 { return dval; }
333 long getIntValue()
334 { return ival; }
336 private:
337 void init()
338 {
339 type = NONE;
340 location = 0;
341 sval = "";
342 dval = 0.0;
343 ival = 0;
344 }
346 int type;
347 int location;
348 DOMString sval;
349 double dval;
350 long ival;
351 };
356 //########################################################################
357 //# G R A M M A T I C A L T O K E N S
358 //########################################################################
360 typedef enum
361 {
362 TOK_NONE,
363 TOK_ABSOLUTE,
364 TOK_RELATIVE,
365 TOK_STEP,
366 TOK_EXPR
367 } TokenTypes;
370 /**
371 *
372 */
373 class Token
374 {
375 public:
376 Token()
377 { init(); }
379 Token(const Token &other)
380 {
381 init();
382 type = other.type;
383 }
385 Token(int theType)
386 {
387 init();
388 type = theType;
389 }
391 ~Token() {}
394 private:
396 void init()
397 {
398 type = TOK_NONE;
399 }
401 int type;
404 };
411 //########################################################################
412 //# P A R S E R
413 //########################################################################
415 class XPathParser
416 {
417 public:
419 //#################################
420 //# CONSTRUCTOR
421 //#################################
423 /**
424 *
425 */
426 XPathParser()
427 {
428 debug = false;
429 }
431 /**
432 *
433 */
434 virtual ~XPathParser() {}
436 /**
437 *
438 */
439 virtual bool getDebug()
440 { return debug; }
442 /**
443 *
444 */
445 virtual void setDebug(bool val)
446 { debug = val; }
450 /**
451 * Normally not called directly unless for string parsing testing
452 */
453 virtual bool parse(const DOMString &str);
455 /**
456 * Normally not called directly except for testing.
457 */
458 virtual NodeList execute(const Node *root, std::vector<Token> &toks);
460 /**
461 * This is the big one. Called by the xpath-dom api to fetch
462 * nodes from a DOM tree.
463 */
464 virtual NodeList evaluate(const Node *root, const DOMString &str);
468 private:
470 //#################################
471 //# MESSAGES
472 //#################################
474 /**
475 *
476 */
477 virtual void trace(const char *fmt, ...);
479 /**
480 *
481 */
482 virtual void traceStack(const char *name, int pos, int depth);
484 /**
485 *
486 */
487 virtual void error(const char *fmt, ...);
489 //#################################
490 //# LEXICAL SCANNING
491 //#################################
493 /**
494 * Add a lexical token of a given type to the list
495 */
496 virtual void lexTokAdd(int type, int loc);
497 virtual void lexTokAdd(int type, int loc, const DOMString &val);
498 virtual void lexTokAdd(int type, int loc, double val);
499 virtual void lexTokAdd(int type, int loc, long val);
501 /**
502 *
503 */
504 virtual void lexicalTokenDump();
506 /**
507 *
508 */
509 virtual LexTok lexTok(int p);
511 /**
512 *
513 */
514 virtual int lexTokType(int p);
516 /**
517 *
518 */
519 virtual int peek(int p);
521 /**
522 *
523 */
524 virtual int get(int p);
526 /**
527 *
528 */
529 virtual int getword(int p, DOMString &str);
531 /**
532 *
533 */
534 virtual int match(int p, const char *str);
536 /**
537 *
538 */
539 virtual int skipwhite(int p);
541 /**
542 *
543 */
544 virtual int getNumber(int p, double &dresult);
546 /**
547 *
548 */
549 virtual int getLiteral(int p, DOMString &result);
551 /**
552 *
553 */
554 virtual int getNameTest(int p0, DOMString &result);
556 /**
557 *
558 */
559 virtual int getNCName(int p0, DOMString &result);
564 /**
565 *
566 */
567 virtual int lexicalScan();
570 //#################################
571 //# GRAMMAR PARSING
572 //#################################
574 /**
575 * The grammar definitions marked [1]-[39] are directly
576 * from the W3C XPath grammar spacification.
577 */
579 /**
580 * [1]
581 */
582 virtual int getLocationPath(int p0, int depth);
584 /**
585 * [2]
586 */
587 virtual int getAbsoluteLocationPath(int p0, int depth);
589 /**
590 * [3]
591 */
592 virtual int getRelativeLocationPath(int p0, int depth);
594 /**
595 * [4]
596 */
597 virtual int getStep(int p0, int depth);
599 /**
600 * [5]
601 */
602 virtual int getAxisSpecifier(int p0, int depth);
604 /**
605 * [6]
606 */
607 virtual int getAxisName(int p0, int depth);
609 /**
610 * [7]
611 */
612 virtual int getNodeTest(int p0, int depth);
614 /**
615 * [8]
616 */
617 virtual int getPredicate(int p0, int depth);
619 /**
620 * [9]
621 */
622 virtual int getPredicateExpr(int p0, int depth);
624 /**
625 * [10]
626 */
627 virtual int getAbbreviatedAbsoluteLocationPath(int p0, int depth);
628 /**
629 * [11]
630 */
631 virtual int getAbbreviatedRelativeLocationPath(int p0, int depth);
632 /**
633 * [12]
634 */
635 virtual int getAbbreviatedStep(int p0, int depth);
637 /**
638 * [13]
639 */
640 virtual int getAbbreviatedAxisSpecifier(int p0, int depth);
642 /**
643 * [14]
644 */
645 virtual int getExpr(int p0, int depth);
647 /**
648 * [15]
649 */
650 virtual int getPrimaryExpr(int p0, int depth);
652 /**
653 * [16]
654 */
655 virtual int getFunctionCall(int p0, int depth);
657 /**
658 * [17]
659 */
660 virtual int getArgument(int p0, int depth);
662 /**
663 * [18]
664 */
665 virtual int getUnionExpr(int p0, int depth);
667 /**
668 * [19]
669 */
670 virtual int getPathExpr(int p0, int depth);
672 /**
673 * [20]
674 */
675 virtual int getFilterExpr(int p0, int depth);
677 /**
678 * [21]
679 */
680 virtual int getOrExpr(int p0, int depth);
682 /**
683 * [22]
684 */
685 virtual int getAndExpr(int p0, int depth);
687 /**
688 * [23]
689 */
690 virtual int getEqualityExpr(int p0, int depth);
692 /**
693 * [24]
694 */
695 virtual int getRelationalExpr(int p0, int depth);
697 /**
698 * [25]
699 */
700 virtual int getAdditiveExpr(int p0, int depth);
702 /**
703 * [26]
704 */
705 virtual int getMultiplicativeExpr(int p0, int depth);
707 /**
708 * [27]
709 */
710 virtual int getUnaryExpr(int p0, int depth);
712 /**
713 * [28]
714 */
715 virtual int getExprToken(int p0, int depth);
717 /**
718 * [29]
719 */
720 virtual int getLiteral(int p0, int depth);
722 /**
723 * [30]
724 */
725 virtual int getNumber(int p0, int depth);
727 /**
728 * [31]
729 */
730 virtual int getDigits(int p0, int depth);
732 /**
733 * [32]
734 */
735 virtual int getOperator(int p0, int depth);
737 /**
738 * [33]
739 */
740 virtual int getOperatorName(int p0, int depth);
742 /**
743 * [34]
744 */
745 virtual int getMultiplyOperator(int p0, int depth);
747 /**
748 * [35]
749 */
750 virtual int getFunctionName(int p0, int depth);
752 /**
753 * [36]
754 */
755 virtual int getVariableReference(int p0, int depth);
757 /**
758 * [37]
759 */
760 virtual int getNameTest(int p0, int depth);
762 /**
763 * [38]
764 */
765 virtual int getNodeType(int p0, int depth);
767 /**
768 * [39]
769 */
770 virtual int getExprWhitespace(int p0, int depth);
774 //#################################
775 //# DATA ITEMS
776 //#################################
778 /**
779 *
780 */
781 bool debug;
783 /**
784 *
785 */
786 char *parsebuf;
788 /**
789 *
790 */
791 int parselen;
793 /**
794 *
795 */
796 int position;
798 /**
799 *
800 */
801 DOMString numberString;
803 /**
804 *
805 */
806 double number;
809 /**
810 * The result of the first lexical scan
811 */
812 std::vector<LexTok> lexicalTokens;
814 /**
815 * The result of parsing. If parsing was successful, then
816 * this is executable via execute()
817 */
818 std::vector<Token> tokens;
823 };
830 } // namespace xpath
831 } // namespace dom
832 } // namespace w3c
833 } // namespace org
834 #endif /* __XPATHPARSER_H__ */
835 //#########################################################################
836 //# E N D O F F I L E
837 //#########################################################################