1 #ifndef __XPATHPARSER_H__
2 #define __XPATHPARSER_H__
4 /**
5 * Phoebe DOM Implementation.
6 *
7 * This is a C++ approximation of the W3C DOM model, which follows
8 * fairly closely the specifications in the various .idl files, copies of
9 * which are provided for reference. Most important is this one:
10 *
11 * http://www.w3.org/TR/2004/REC-DOM-Level-3-Core-20040407/idl-definitions.html
12 *
13 * Authors:
14 * Bob Jamison
15 *
16 * Copyright (C) 2005 Bob Jamison
17 *
18 * This library is free software; you can redistribute it and/or
19 * modify it under the terms of the GNU Lesser General Public
20 * License as published by the Free Software Foundation; either
21 * version 2.1 of the License, or (at your option) any later version.
22 *
23 * This library is distributed in the hope that it will be useful,
24 * but WITHOUT ANY WARRANTY; without even the implied warranty of
25 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
26 * Lesser General Public License for more details.
27 *
28 * You should have received a copy of the GNU Lesser General Public
29 * License along with this library; if not, write to the Free Software
30 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
31 */
34 #include <stdio.h>
35 #include <stdarg.h>
36 #include <glib.h>
38 #include <string>
39 #include <vector>
41 #include "dom.h"
42 #include "xpathtoken.h"
44 namespace org
45 {
46 namespace w3c
47 {
48 namespace dom
49 {
50 namespace xpath
51 {
53 typedef dom::DOMString DOMString;
54 typedef dom::Node Node;
55 typedef dom::NodeList NodeList;
59 //########################################################################
60 //# L E X I C A L D E F I N I T I O N S
61 //########################################################################
64 typedef struct
65 {
66 int ival;
67 char *sval;
68 } LookupEntry;
72 //Note: in the following definitions, where the starts of
73 //strings are similar, put the longer definitions first
75 /**
76 *
77 */
78 typedef enum
79 {
80 COMMENT,
81 TEXT,
82 PROCESSING_INSTRUCTION,
83 NODE
84 } NodeType;
87 static LookupEntry nodeTypeTable [] =
88 {
89 { COMMENT, "comment" },
90 { TEXT, "text" },
91 { PROCESSING_INSTRUCTION, "processing-instruction" },
92 { NODE, "node" },
93 { -1, NULL }
94 };
97 /**
98 *
99 */
100 typedef enum
101 {
102 ANCESTOR_OR_SELF,
103 ANCESTOR,
104 ATTRIBUTE,
105 CHILD,
106 DESCENDANT_OR_SELF,
107 DESCENDANT,
108 FOLLOWING_SIBLING,
109 FOLLOWING,
110 NAMESPACE,
111 PARENT,
112 PRECEDING_SIBLING,
113 PRECEDING,
114 SELF
115 } AxisNameType;
118 static LookupEntry axisNameTable [] =
119 {
120 { ANCESTOR_OR_SELF, "ancestor-or-self" },
121 { ANCESTOR, "ancestor" },
122 { ATTRIBUTE, "attribute" },
123 { CHILD, "child" },
124 { DESCENDANT_OR_SELF, "descendant-or-self"},
125 { DESCENDANT, "descendant" },
126 { FOLLOWING_SIBLING, "following-sibling" },
127 { FOLLOWING, "following" },
128 { NAMESPACE, "namespace" },
129 { PARENT, "parent" },
130 { PRECEDING_SIBLING, "preceding-sibling" },
131 { PRECEDING, "preceding" },
132 { SELF, "self" },
133 { -1, NULL }
134 };
137 /**
138 *
139 */
140 typedef enum
141 {
142 NONE = 0,
143 CHAR, //default if none of the below
144 //Expr tokens
145 LPAREN,
146 RPAREN,
147 LBRACKET,
148 RBRACKET,
149 DOUBLE_DOT,
150 DOT,
151 AMPR,
152 COMMA,
153 DOUBLE_COLON,
154 NAME_TEST,
155 NODE_TYPE,
156 OPERATOR,
157 FUNCTION_NAME,
158 AXIS_NAME,
159 LITERAL,
160 NUMBER,
161 VARIABLE_REFERENCE,
162 //Operator tokens
163 AND,
164 OR,
165 MOD,
166 DIV,
167 MULTIPLY,
168 DOUBLE_SLASH,
169 SLASH,
170 PIPE,
171 PLUS,
172 MINUS,
173 EQUALS,
174 NOT_EQUALS,
175 LESS_THAN_EQUALS,
176 LESS_THAN,
177 GREATER_THAN_EQUALS,
178 GREATER_THAN
179 } LexTokType;
182 /*
183 * Be VERY careful that this table matches the LexicalTokenType enum
184 * declaration above.
185 */
186 static LookupEntry exprTokenTable [] =
187 {
188 { NONE, "xxNONExx" },
189 { CHAR, "CHAR" },
190 //Expr tokens
191 { LPAREN, "(" },
192 { RPAREN, ")" },
193 { LBRACKET, "[" },
194 { RBRACKET, "]" },
195 { DOUBLE_DOT, ".." },
196 { DOT, "." },
197 { AMPR, "@" },
198 { COMMA, "," },
199 { DOUBLE_COLON, "::" },
200 { NAME_TEST, "NameTest" },
201 { NODE_TYPE, "NodeType" },
202 { OPERATOR, "Operator" },
203 { FUNCTION_NAME, "FunctionName" },
204 { AXIS_NAME, "AxisName" },
205 { LITERAL, "Literal" },
206 { NUMBER, "Number" },
207 { VARIABLE_REFERENCE, "VariableReference" },
208 { -1, NULL }
209 };
211 static LookupEntry operatorTable [] =
212 {
213 { NONE, "xxNONExx" },
214 //Operator tokens
215 { AND, "and" },
216 { OR, "or" },
217 { MOD, "mod" },
218 { DIV, "div" },
219 { MULTIPLY, "*" },
220 { DOUBLE_SLASH, "//" },
221 { SLASH, "/" },
222 { PIPE, "|" },
223 { PLUS, "+" },
224 { MINUS, "-" },
225 { EQUALS, "=" },
226 { NOT_EQUALS, "!=" },
227 { LESS_THAN_EQUALS, "<=" },
228 { LESS_THAN, "<" },
229 { GREATER_THAN_EQUALS, ">=" },
230 { GREATER_THAN, ">" },
231 { -1, NULL }
232 };
235 /**
236 *
237 */
238 class LexTok
239 {
240 public:
241 LexTok(const LexTok &tok)
242 {
243 type = tok.type;
244 location = tok.location;
245 sval = tok.sval;
246 dval = tok.dval;
247 ival = tok.ival;
248 }
249 LexTok()
250 { init(); }
251 LexTok(int theType, int loc)
252 { init(); type = theType; location = loc;}
253 LexTok(int theType, int loc, const DOMString &val)
254 { init(); type = theType; location = loc; sval = val; }
255 LexTok(int theType, int loc, double val)
256 { init(); type = theType; location = loc; dval = val; }
257 LexTok(int theType, int loc, long val)
258 { init(); type = theType; location = loc; ival = val; }
260 void print()
261 {
262 if (type == OPERATOR)
263 {
264 char *tokenStr = "unknown";
265 for (LookupEntry *entry = operatorTable; entry->sval ; entry++)
266 {
267 if (entry->ival == ival)
268 {
269 tokenStr = entry->sval;
270 break;
271 }
272 }
273 printf("(%s)\n", tokenStr);
274 }
275 else if (type == NODE_TYPE)
276 {
277 char *tokenStr = "unknown";
278 for (LookupEntry *entry = nodeTypeTable; entry->sval ; entry++)
279 {
280 if (entry->ival == ival)
281 {
282 tokenStr = entry->sval;
283 break;
284 }
285 }
286 printf("{{%s}}\n", tokenStr);
287 }
288 else if (type == AXIS_NAME)
289 {
290 char *tokenStr = "unknown";
291 for (LookupEntry *entry = axisNameTable; entry->sval ; entry++)
292 {
293 if (entry->ival == ival)
294 {
295 tokenStr = entry->sval;
296 break;
297 }
298 }
299 printf("{%s}\n", tokenStr);
300 }
301 else if (type == CHAR)
302 printf("'%c'\n", (char)ival);
303 else if (type == NAME_TEST)
304 printf("\"%s\"\n", sval.c_str());
305 else if (type == LITERAL)
306 printf("L'%s'\n", sval.c_str());
307 else if (type == FUNCTION_NAME)
308 printf("%s()\n", sval.c_str());
309 else if (type == NUMBER)
310 printf("#%f\n", dval);
311 else
312 {
313 char *tokenStr = "unknown";
314 for (LookupEntry *entry = exprTokenTable; entry->sval ; entry++)
315 {
316 if (entry->ival == type)
317 {
318 tokenStr = entry->sval;
319 break;
320 }
321 }
322 printf("%s\n", tokenStr);
323 //printf("%s [%s/%f/%ld]\n", tokenStr, sval.c_str(), dval, ival);
324 }
325 }
327 int getType()
328 { return type; }
329 int getLocation()
330 { return location; }
331 DOMString &getStringValue()
332 { return sval; }
333 double getDoubleValue()
334 { return dval; }
335 long getIntValue()
336 { return ival; }
338 private:
339 void init()
340 {
341 type = NONE;
342 location = 0;
343 dval = 0.0;
344 ival = 0;
345 }
347 int type;
348 int location;
349 DOMString sval;
350 double dval;
351 long ival;
352 };
358 //########################################################################
359 //# P A R S E R
360 //########################################################################
362 class XPathParser
363 {
364 public:
366 //#################################
367 //# CONSTRUCTOR
368 //#################################
370 /**
371 *
372 */
373 XPathParser()
374 {
375 debug = false;
376 }
378 /**
379 *
380 */
381 ~XPathParser() {}
383 /**
384 *
385 */
386 bool getDebug()
387 { return debug; }
389 /**
390 *
391 */
392 void setDebug(bool val)
393 { debug = val; }
397 /**
398 * Normally not called directly unless for string parsing testing
399 */
400 bool parse(const DOMString &str);
402 /**
403 * This is the big one. Called by the xpath-dom api to fetch
404 * nodes from a DOM tree.
405 */
406 NodeList evaluate(const NodePtr root, const DOMString &str);
410 private:
412 //#################################
413 //# MESSAGES
414 //#################################
416 /**
417 *
418 */
419 void trace(const char *fmt, ...) G_GNUC_PRINTF(2,3);
421 /**
422 *
423 */
424 void traceStack(const char *name, int pos, int depth);
426 /**
427 *
428 */
429 void error(const char *fmt, ...) G_GNUC_PRINTF(2,3);
431 //#################################
432 //# LEXICAL SCANNING
433 //#################################
435 /**
436 * Add a lexical token of a given type to the list
437 */
438 void lexTokAdd(int type, int loc);
439 void lexTokAdd(int type, int loc, const DOMString &val);
440 void lexTokAdd(int type, int loc, double val);
441 void lexTokAdd(int type, int loc, long val);
443 /**
444 *
445 */
446 void lexicalTokenDump();
448 /**
449 *
450 */
451 LexTok lexTok(int p);
453 /**
454 *
455 */
456 int lexTokType(int p);
458 /**
459 *
460 */
461 int peek(int p);
463 /**
464 *
465 */
466 int get(int p);
468 /**
469 *
470 */
471 int getword(int p, DOMString &str);
473 /**
474 *
475 */
476 int match(int p, const char *str);
478 /**
479 *
480 */
481 int skipwhite(int p);
483 /**
484 *
485 */
486 int getNumber(int p, double &dresult);
488 /**
489 *
490 */
491 int getLiteral(int p, DOMString &result);
493 /**
494 *
495 */
496 int getNameTest(int p0, DOMString &result);
498 /**
499 *
500 */
501 int getNCName(int p0, DOMString &result);
506 /**
507 *
508 */
509 int lexicalScan();
512 //#################################
513 //# GRAMMAR PARSING
514 //#################################
516 /**
517 * Add a newly derived token to the token list;
518 */
519 void tokAdd(const Token &token);
521 void tokAdd(int type);
523 void tokAdd(int type, long val);
525 void tokAdd(int type, double val);
527 void tokAdd(int type, const DOMString &val);
530 /**
531 * The grammar definitions marked [1]-[39] are directly
532 * from the W3C XPath grammar spacification.
533 */
535 /**
536 * [1]
537 */
538 int getLocationPath(int p0, int depth);
540 /**
541 * [2]
542 */
543 int getAbsoluteLocationPath(int p0, int depth);
545 /**
546 * [3]
547 */
548 int getRelativeLocationPath(int p0, int depth);
550 /**
551 * [4]
552 */
553 int getStep(int p0, int depth);
555 /**
556 * [5]
557 */
558 int getAxisSpecifier(int p0, int depth);
560 /**
561 * [6]
562 */
563 int getAxisName(int p0, int depth);
565 /**
566 * [7]
567 */
568 int getNodeTest(int p0, int depth);
570 /**
571 * [8]
572 */
573 int getPredicate(int p0, int depth);
575 /**
576 * [9]
577 */
578 int getPredicateExpr(int p0, int depth);
580 /**
581 * [10]
582 */
583 int getAbbreviatedAbsoluteLocationPath(int p0, int depth);
584 /**
585 * [11]
586 */
587 int getAbbreviatedRelativeLocationPath(int p0, int depth);
588 /**
589 * [12]
590 */
591 int getAbbreviatedStep(int p0, int depth);
593 /**
594 * [13]
595 */
596 int getAbbreviatedAxisSpecifier(int p0, int depth);
598 /**
599 * [14]
600 */
601 int getExpr(int p0, int depth);
603 /**
604 * [15]
605 */
606 int getPrimaryExpr(int p0, int depth);
608 /**
609 * [16]
610 */
611 int getFunctionCall(int p0, int depth);
613 /**
614 * [17]
615 */
616 int getArgument(int p0, int depth);
618 /**
619 * [18]
620 */
621 int getUnionExpr(int p0, int depth);
623 /**
624 * [19]
625 */
626 int getPathExpr(int p0, int depth);
628 /**
629 * [20]
630 */
631 int getFilterExpr(int p0, int depth);
633 /**
634 * [21]
635 */
636 int getOrExpr(int p0, int depth);
638 /**
639 * [22]
640 */
641 int getAndExpr(int p0, int depth);
643 /**
644 * [23]
645 */
646 int getEqualityExpr(int p0, int depth);
648 /**
649 * [24]
650 */
651 int getRelationalExpr(int p0, int depth);
653 /**
654 * [25]
655 */
656 int getAdditiveExpr(int p0, int depth);
658 /**
659 * [26]
660 */
661 int getMultiplicativeExpr(int p0, int depth);
663 /**
664 * [27]
665 */
666 int getUnaryExpr(int p0, int depth);
668 /**
669 * [28]
670 */
671 int getExprToken(int p0, int depth);
673 /**
674 * [29]
675 */
676 int getLiteral(int p0, int depth);
678 /**
679 * [30]
680 */
681 int getNumber(int p0, int depth);
683 /**
684 * [31]
685 */
686 int getDigits(int p0, int depth);
688 /**
689 * [32]
690 */
691 int getOperator(int p0, int depth);
693 /**
694 * [33]
695 */
696 int getOperatorName(int p0, int depth);
698 /**
699 * [34]
700 */
701 int getMultiplyOperator(int p0, int depth);
703 /**
704 * [35]
705 */
706 int getFunctionName(int p0, int depth);
708 /**
709 * [36]
710 */
711 int getVariableReference(int p0, int depth);
713 /**
714 * [37]
715 */
716 int getNameTest(int p0, int depth);
718 /**
719 * [38]
720 */
721 int getNodeType(int p0, int depth);
723 /**
724 * [39]
725 */
726 int getExprWhitespace(int p0, int depth);
730 //#################################
731 //# DATA ITEMS
732 //#################################
734 /**
735 *
736 */
737 bool debug;
739 /**
740 *
741 */
742 char *parsebuf;
744 /**
745 *
746 */
747 int parselen;
749 /**
750 *
751 */
752 int position;
754 /**
755 *
756 */
757 DOMString numberString;
759 /**
760 *
761 */
762 double number;
765 /**
766 * The result of the first lexical scan
767 */
768 std::vector<LexTok> lexicalTokens;
770 /**
771 * The result of parsing. If parsing was successful, then
772 * this is executable via execute()
773 */
774 TokenList tokens;
779 };
786 } // namespace xpath
787 } // namespace dom
788 } // namespace w3c
789 } // namespace org
790 #endif /* __XPATHPARSER_H__ */
791 //#########################################################################
792 //# E N D O F F I L E
793 //#########################################################################