1 #ifndef __XPATHPARSER_H__
2 #define __XPATHPARSER_H__
4 /**
5 * Phoebe DOM Implementation.
6 *
7 * This is a C++ approximation of the W3C DOM model, which follows
8 * fairly closely the specifications in the various .idl files, copies of
9 * which are provided for reference. Most important is this one:
10 *
11 * http://www.w3.org/TR/2004/REC-DOM-Level-3-Core-20040407/idl-definitions.html
12 *
13 * Authors:
14 * Bob Jamison
15 *
16 * Copyright (C) 2005-2007 Bob Jamison
17 *
18 * This library is free software; you can redistribute it and/or
19 * modify it under the terms of the GNU Lesser General Public
20 * License as published by the Free Software Foundation; either
21 * version 2.1 of the License, or (at your option) any later version.
22 *
23 * This library is distributed in the hope that it will be useful,
24 * but WITHOUT ANY WARRANTY; without even the implied warranty of
25 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
26 * Lesser General Public License for more details.
27 *
28 * You should have received a copy of the GNU Lesser General Public
29 * License along with this library; if not, write to the Free Software
30 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
31 */
34 #include <stdio.h>
35 #include <stdarg.h>
37 #include <string>
38 #include <vector>
40 #include "dom.h"
41 #include "xpathtoken.h"
43 namespace org
44 {
45 namespace w3c
46 {
47 namespace dom
48 {
49 namespace xpath
50 {
52 typedef dom::DOMString DOMString;
53 typedef dom::Node Node;
54 typedef dom::NodeList NodeList;
58 //########################################################################
59 //# L E X I C A L D E F I N I T I O N S
60 //########################################################################
63 typedef struct
64 {
65 int ival;
66 char const *sval;
67 } LookupEntry;
71 //Note: in the following definitions, where the starts of
72 //strings are similar, put the longer definitions first
74 /**
75 *
76 */
77 typedef enum
78 {
79 COMMENT,
80 TEXT,
81 PROCESSING_INSTRUCTION,
82 NODE
83 } NodeType;
86 static LookupEntry nodeTypeTable [] =
87 {
88 { COMMENT, "comment" },
89 { TEXT, "text" },
90 { PROCESSING_INSTRUCTION, "processing-instruction" },
91 { NODE, "node" },
92 { -1, NULL }
93 };
96 /**
97 *
98 */
99 typedef enum
100 {
101 ANCESTOR_OR_SELF,
102 ANCESTOR,
103 ATTRIBUTE,
104 CHILD,
105 DESCENDANT_OR_SELF,
106 DESCENDANT,
107 FOLLOWING_SIBLING,
108 FOLLOWING,
109 NAMESPACE,
110 PARENT,
111 PRECEDING_SIBLING,
112 PRECEDING,
113 SELF
114 } AxisNameType;
117 static LookupEntry axisNameTable [] =
118 {
119 { ANCESTOR_OR_SELF, "ancestor-or-self" },
120 { ANCESTOR, "ancestor" },
121 { ATTRIBUTE, "attribute" },
122 { CHILD, "child" },
123 { DESCENDANT_OR_SELF, "descendant-or-self"},
124 { DESCENDANT, "descendant" },
125 { FOLLOWING_SIBLING, "following-sibling" },
126 { FOLLOWING, "following" },
127 { NAMESPACE, "namespace" },
128 { PARENT, "parent" },
129 { PRECEDING_SIBLING, "preceding-sibling" },
130 { PRECEDING, "preceding" },
131 { SELF, "self" },
132 { -1, NULL }
133 };
136 /**
137 *
138 */
139 typedef enum
140 {
141 NONE = 0,
142 CHAR, //default if none of the below
143 //Expr tokens
144 LPAREN,
145 RPAREN,
146 LBRACKET,
147 RBRACKET,
148 DOUBLE_DOT,
149 DOT,
150 AMPR,
151 COMMA,
152 DOUBLE_COLON,
153 NAME_TEST,
154 NODE_TYPE,
155 OPERATOR,
156 FUNCTION_NAME,
157 AXIS_NAME,
158 LITERAL,
159 NUMBER,
160 VARIABLE_REFERENCE,
161 //Operator tokens
162 AND,
163 OR,
164 MOD,
165 DIV,
166 MULTIPLY,
167 DOUBLE_SLASH,
168 SLASH,
169 PIPE,
170 PLUS,
171 MINUS,
172 EQUALS,
173 NOT_EQUALS,
174 LESS_THAN_EQUALS,
175 LESS_THAN,
176 GREATER_THAN_EQUALS,
177 GREATER_THAN
178 } LexTokType;
181 /*
182 * Be VERY careful that this table matches the LexicalTokenType enum
183 * declaration above.
184 */
185 static LookupEntry exprTokenTable [] =
186 {
187 { NONE, "xxNONExx" },
188 { CHAR, "CHAR" },
189 //Expr tokens
190 { LPAREN, "(" },
191 { RPAREN, ")" },
192 { LBRACKET, "[" },
193 { RBRACKET, "]" },
194 { DOUBLE_DOT, ".." },
195 { DOT, "." },
196 { AMPR, "@" },
197 { COMMA, "," },
198 { DOUBLE_COLON, "::" },
199 { NAME_TEST, "NameTest" },
200 { NODE_TYPE, "NodeType" },
201 { OPERATOR, "Operator" },
202 { FUNCTION_NAME, "FunctionName" },
203 { AXIS_NAME, "AxisName" },
204 { LITERAL, "Literal" },
205 { NUMBER, "Number" },
206 { VARIABLE_REFERENCE, "VariableReference" },
207 { -1, NULL }
208 };
210 static LookupEntry operatorTable [] =
211 {
212 { NONE, "xxNONExx" },
213 //Operator tokens
214 { AND, "and" },
215 { OR, "or" },
216 { MOD, "mod" },
217 { DIV, "div" },
218 { MULTIPLY, "*" },
219 { DOUBLE_SLASH, "//" },
220 { SLASH, "/" },
221 { PIPE, "|" },
222 { PLUS, "+" },
223 { MINUS, "-" },
224 { EQUALS, "=" },
225 { NOT_EQUALS, "!=" },
226 { LESS_THAN_EQUALS, "<=" },
227 { LESS_THAN, "<" },
228 { GREATER_THAN_EQUALS, ">=" },
229 { GREATER_THAN, ">" },
230 { -1, NULL }
231 };
234 /**
235 *
236 */
237 class LexTok
238 {
239 public:
240 LexTok(const LexTok &tok)
241 {
242 type = tok.type;
243 location = tok.location;
244 sval = tok.sval;
245 dval = tok.dval;
246 ival = tok.ival;
247 }
248 LexTok()
249 { init(); }
250 LexTok(int theType, int loc)
251 { init(); type = theType; location = loc;}
252 LexTok(int theType, int loc, const DOMString &val)
253 { init(); type = theType; location = loc; sval = val; }
254 LexTok(int theType, int loc, double val)
255 { init(); type = theType; location = loc; dval = val; }
256 LexTok(int theType, int loc, long val)
257 { init(); type = theType; location = loc; ival = val; }
259 void print()
260 {
261 if (type == OPERATOR)
262 {
263 char const *tokenStr = "unknown";
264 for (LookupEntry const *entry = operatorTable; entry->sval ; entry++)
265 {
266 if (entry->ival == ival)
267 {
268 tokenStr = entry->sval;
269 break;
270 }
271 }
272 printf("(%s)\n", tokenStr);
273 }
274 else if (type == NODE_TYPE)
275 {
276 char const *tokenStr = "unknown";
277 for (LookupEntry *entry = nodeTypeTable; entry->sval ; entry++)
278 {
279 if (entry->ival == ival)
280 {
281 tokenStr = entry->sval;
282 break;
283 }
284 }
285 printf("{{%s}}\n", tokenStr);
286 }
287 else if (type == AXIS_NAME)
288 {
289 char const *tokenStr = "unknown";
290 for (LookupEntry *entry = axisNameTable; entry->sval ; entry++)
291 {
292 if (entry->ival == ival)
293 {
294 tokenStr = entry->sval;
295 break;
296 }
297 }
298 printf("{%s}\n", tokenStr);
299 }
300 else if (type == CHAR)
301 printf("'%c'\n", (char)ival);
302 else if (type == NAME_TEST)
303 printf("\"%s\"\n", sval.c_str());
304 else if (type == LITERAL)
305 printf("L'%s'\n", sval.c_str());
306 else if (type == FUNCTION_NAME)
307 printf("%s()\n", sval.c_str());
308 else if (type == NUMBER)
309 printf("#%f\n", dval);
310 else
311 {
312 char const *tokenStr = "unknown";
313 for (LookupEntry *entry = exprTokenTable; entry->sval ; entry++)
314 {
315 if (entry->ival == type)
316 {
317 tokenStr = entry->sval;
318 break;
319 }
320 }
321 printf("%s\n", tokenStr);
322 //printf("%s [%s/%f/%ld]\n", tokenStr, sval.c_str(), dval, ival);
323 }
324 }
326 int getType()
327 { return type; }
328 int getLocation()
329 { return location; }
330 DOMString &getStringValue()
331 { return sval; }
332 double getDoubleValue()
333 { return dval; }
334 long getIntValue()
335 { return ival; }
337 private:
338 void init()
339 {
340 type = NONE;
341 location = 0;
342 dval = 0.0;
343 ival = 0;
344 }
346 int type;
347 int location;
348 DOMString sval;
349 double dval;
350 long ival;
351 };
357 //########################################################################
358 //# P A R S E R
359 //########################################################################
361 class XPathParser
362 {
363 public:
365 //#################################
366 //# CONSTRUCTOR
367 //#################################
369 /**
370 *
371 */
372 XPathParser()
373 {
374 debug = false;
375 }
377 /**
378 *
379 */
380 virtual ~XPathParser() {}
382 /**
383 *
384 */
385 bool getDebug()
386 { return debug; }
388 /**
389 *
390 */
391 void setDebug(bool val)
392 { debug = val; }
396 /**
397 * Normally not called directly unless for string parsing testing
398 */
399 bool parse(const DOMString &str);
401 /**
402 * This is the big one. Called by the xpath-dom api to fetch
403 * nodes from a DOM tree.
404 */
405 NodeList evaluate(const NodePtr root, const DOMString &str);
409 private:
411 //#################################
412 //# MESSAGES
413 //#################################
415 /**
416 *
417 */
418 void trace(const char *fmt, ...)
419 #ifdef G_GNUC_PRINTF
420 G_GNUC_PRINTF(2, 3)
421 #endif
422 ;
424 /**
425 *
426 */
427 void traceStack(const char *name, int pos, int depth);
429 /**
430 *
431 */
432 void error(const char *fmt, ...)
433 #ifdef G_GNUC_PRINTF
434 G_GNUC_PRINTF(2, 3)
435 #endif
436 ;
438 //#################################
439 //# LEXICAL SCANNING
440 //#################################
442 /**
443 * Add a lexical token of a given type to the list
444 */
445 void lexTokAdd(int type, int loc);
446 void lexTokAdd(int type, int loc, const DOMString &val);
447 void lexTokAdd(int type, int loc, double val);
448 void lexTokAdd(int type, int loc, long val);
450 /**
451 *
452 */
453 void lexicalTokenDump();
455 /**
456 *
457 */
458 LexTok lexTok(int p);
460 /**
461 *
462 */
463 int lexTokType(int p);
465 /**
466 *
467 */
468 int peek(int p);
470 /**
471 *
472 */
473 int get(int p);
475 /**
476 *
477 */
478 int getword(int p, DOMString &str);
480 /**
481 *
482 */
483 int match(int p, const char *str);
485 /**
486 *
487 */
488 int skipwhite(int p);
490 /**
491 *
492 */
493 int getNumber(int p, double &dresult);
495 /**
496 *
497 */
498 int getLiteral(int p, DOMString &result);
500 /**
501 *
502 */
503 int getNameTest(int p0, DOMString &result);
505 /**
506 *
507 */
508 int getNCName(int p0, DOMString &result);
513 /**
514 *
515 */
516 int lexicalScan();
519 //#################################
520 //# GRAMMAR PARSING
521 //#################################
523 /**
524 * Add a newly derived token to the token list;
525 */
526 void tokAdd(const Token &token);
528 void tokAdd(int type);
530 void tokAdd(int type, long val);
532 void tokAdd(int type, double val);
534 void tokAdd(int type, const DOMString &val);
537 /**
538 * The grammar definitions marked [1]-[39] are directly
539 * from the W3C XPath grammar spacification.
540 */
542 /**
543 * [1]
544 */
545 int getLocationPath(int p0, int depth);
547 /**
548 * [2]
549 */
550 int getAbsoluteLocationPath(int p0, int depth);
552 /**
553 * [3]
554 */
555 int getRelativeLocationPath(int p0, int depth);
557 /**
558 * [4]
559 */
560 int getStep(int p0, int depth);
562 /**
563 * [5]
564 */
565 int getAxisSpecifier(int p0, int depth);
567 /**
568 * [6]
569 */
570 int getAxisName(int p0, int depth);
572 /**
573 * [7]
574 */
575 int getNodeTest(int p0, int depth);
577 /**
578 * [8]
579 */
580 int getPredicate(int p0, int depth);
582 /**
583 * [9]
584 */
585 int getPredicateExpr(int p0, int depth);
587 /**
588 * [10]
589 */
590 int getAbbreviatedAbsoluteLocationPath(int p0, int depth);
591 /**
592 * [11]
593 */
594 int getAbbreviatedRelativeLocationPath(int p0, int depth);
595 /**
596 * [12]
597 */
598 int getAbbreviatedStep(int p0, int depth);
600 /**
601 * [13]
602 */
603 int getAbbreviatedAxisSpecifier(int p0, int depth);
605 /**
606 * [14]
607 */
608 int getExpr(int p0, int depth);
610 /**
611 * [15]
612 */
613 int getPrimaryExpr(int p0, int depth);
615 /**
616 * [16]
617 */
618 int getFunctionCall(int p0, int depth);
620 /**
621 * [17]
622 */
623 int getArgument(int p0, int depth);
625 /**
626 * [18]
627 */
628 int getUnionExpr(int p0, int depth);
630 /**
631 * [19]
632 */
633 int getPathExpr(int p0, int depth);
635 /**
636 * [20]
637 */
638 int getFilterExpr(int p0, int depth);
640 /**
641 * [21]
642 */
643 int getOrExpr(int p0, int depth);
645 /**
646 * [22]
647 */
648 int getAndExpr(int p0, int depth);
650 /**
651 * [23]
652 */
653 int getEqualityExpr(int p0, int depth);
655 /**
656 * [24]
657 */
658 int getRelationalExpr(int p0, int depth);
660 /**
661 * [25]
662 */
663 int getAdditiveExpr(int p0, int depth);
665 /**
666 * [26]
667 */
668 int getMultiplicativeExpr(int p0, int depth);
670 /**
671 * [27]
672 */
673 int getUnaryExpr(int p0, int depth);
675 /**
676 * [28]
677 */
678 int getExprToken(int p0, int depth);
680 /**
681 * [29]
682 */
683 int getLiteral(int p0, int depth);
685 /**
686 * [30]
687 */
688 int getNumber(int p0, int depth);
690 /**
691 * [31]
692 */
693 int getDigits(int p0, int depth);
695 /**
696 * [32]
697 */
698 int getOperator(int p0, int depth);
700 /**
701 * [33]
702 */
703 int getOperatorName(int p0, int depth);
705 /**
706 * [34]
707 */
708 int getMultiplyOperator(int p0, int depth);
710 /**
711 * [35]
712 */
713 int getFunctionName(int p0, int depth);
715 /**
716 * [36]
717 */
718 int getVariableReference(int p0, int depth);
720 /**
721 * [37]
722 */
723 int getNameTest(int p0, int depth);
725 /**
726 * [38]
727 */
728 int getNodeType(int p0, int depth);
730 /**
731 * [39]
732 */
733 int getExprWhitespace(int p0, int depth);
737 //#################################
738 //# DATA ITEMS
739 //#################################
741 /**
742 *
743 */
744 bool debug;
746 /**
747 *
748 */
749 char *parsebuf;
751 /**
752 *
753 */
754 int parselen;
756 /**
757 *
758 */
759 int position;
761 /**
762 *
763 */
764 DOMString numberString;
766 /**
767 *
768 */
769 double number;
772 /**
773 * The result of the first lexical scan
774 */
775 std::vector<LexTok> lexicalTokens;
777 /**
778 * The result of parsing. If parsing was successful, then
779 * this is executable via execute()
780 */
781 TokenList tokens;
786 };
793 } // namespace xpath
794 } // namespace dom
795 } // namespace w3c
796 } // namespace org
797 #endif /* __XPATHPARSER_H__ */
798 //#########################################################################
799 //# E N D O F F I L E
800 //#########################################################################