1 /**
2 * Phoebe DOM Implementation.
3 *
4 * This is a C++ approximation of the W3C DOM model, which follows
5 * fairly closely the specifications in the various .idl files, copies of
6 * which are provided for reference. Most important is this one:
7 *
8 * http://www.w3.org/TR/2004/REC-DOM-Level-3-Core-20040407/idl-definitions.html
9 *
10 * Authors:
11 * Bob Jamison
12 *
13 * Copyright (C) 2005 Bob Jamison
14 *
15 * This library is free software; you can redistribute it and/or
16 * modify it under the terms of the GNU Lesser General Public
17 * License as published by the Free Software Foundation; either
18 * version 2.1 of the License, or (at your option) any later version.
19 *
20 * This library is distributed in the hope that it will be useful,
21 * but WITHOUT ANY WARRANTY; without even the implied warranty of
22 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
23 * Lesser General Public License for more details.
24 *
25 * You should have received a copy of the GNU Lesser General Public
26 * License along with this library; if not, write to the Free Software
27 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
28 */
31 #include "xpathparser.h"
32 #include "charclass.h"
34 namespace org
35 {
36 namespace w3c
37 {
38 namespace dom
39 {
40 namespace xpath
41 {
44 //#########################################################################
45 //# M E S S A G E S
46 //#########################################################################
48 void XPathParser::trace(const char *fmt, ...)
49 {
50 if (!debug)
51 return;
53 FILE *f = stdout;
55 va_list args;
56 va_start(args, fmt);
57 fprintf(f, "XPathParser: ");
58 vfprintf(f, fmt, args);
59 fprintf(f, "\n");
60 va_end(args);
61 }
63 void XPathParser::error(const char *fmt, ...)
64 {
65 FILE *f = stdout;
66 va_list args;
67 va_start(args, fmt);
68 fprintf(f, "XPathParser ERROR: ");
69 vfprintf(f, fmt, args);
70 fprintf(f, "\n");
71 va_end(args);
73 //Print location in string
74 fprintf(f, "%s\n", parsebuf);
75 for (int i=0 ; i<position ; i++)
76 fprintf(f, " ");
77 fprintf(f, "^\n");
78 }
80 void XPathParser::traceStack(const char *name, int pos, int depth)
81 {
82 if (!debug)
83 return;
84 int indent = depth;
86 for (int i=0 ; i<indent ; i++)
87 fprintf(stdout, " ");
88 fprintf(stdout, "%d %d %s\n", pos, depth, name);
90 }
93 //#########################################################################
94 //# L E X I C A L S C A N N I N G
95 //#########################################################################
96 void XPathParser::lexTokAdd(int type, int loc)
97 {
98 LexTok tok(type, loc);
99 lexicalTokens.push_back(tok);
100 }
102 void XPathParser::lexTokAdd(int type, int loc, const DOMString &val)
103 {
104 LexTok tok(type, loc, val);
105 lexicalTokens.push_back(tok);
106 }
108 void XPathParser::lexTokAdd(int type, int loc, double val)
109 {
110 LexTok tok(type, loc, val);
111 lexicalTokens.push_back(tok);
112 }
114 void XPathParser::lexTokAdd(int type, int loc, long val)
115 {
116 LexTok tok(type, loc, val);
117 lexicalTokens.push_back(tok);
118 }
120 void XPathParser::lexicalTokenDump()
121 {
122 printf("####### LEXICAL TOKENS #######\n");
123 for (int i=0 ; i<(int)lexicalTokens.size() ; i++)
124 {
125 printf("%d : ", i);
126 lexicalTokens[i].print();
127 }
128 printf("##### END LEXICAL TOKENS #####\n\n");
129 }
133 LexTok XPathParser::lexTok(int p)
134 {
135 if (p < 0 || p>=(int)lexicalTokens.size())
136 {
137 LexTok tok;
138 return tok;
139 }
140 return lexicalTokens[p];
141 }
143 int XPathParser::lexTokType(int p)
144 {
145 if (p < 0 || p>(int)lexicalTokens.size())
146 return -1;
147 return lexicalTokens[p].getType();
148 }
157 int XPathParser::peek(int p)
158 {
159 if (p >= parselen)
160 return -1;
161 position = p;
162 return parsebuf[p] ;
163 }
166 int XPathParser::get(int p)
167 {
168 if (p >= parselen)
169 return -1;
170 position = p;
171 return parsebuf[p];
172 }
174 int XPathParser::skipwhite(int p0)
175 {
176 int p = p0;
178 while (p < parselen)
179 {
180 int ch = peek(p);
181 if (!isWhitespace(ch))
182 break;
183 ch = get(p++);
184 }
185 return p;
186 }
188 int XPathParser::getword(int p0, DOMString &str)
189 {
190 int p = p0;
191 while (p < parselen)
192 {
193 int ch = peek(p);
194 if (!isLetterOrDigit(ch))
195 break;
196 ch = get(p++);
197 str.push_back(ch);
198 }
199 return p;
200 }
202 int XPathParser::match(int p, const char *str)
203 {
204 while (*str)
205 {
206 if (p >= parselen)
207 return -1;
208 if (parsebuf[p] != *str)
209 return -1;
210 p++; str++;
211 }
212 return p;
213 }
218 int XPathParser::getNumber(int p0, double &dresult)
219 {
220 int p = p0;
221 if (p >= parselen)
222 return p0;/*need at least x*/
224 bool isdouble = false;
225 bool negative = false;
227 int ch = parsebuf[p];
228 if (ch=='-')
229 {
230 p++;
231 negative = true;
232 if (p >= parselen) return p0;
233 }
235 bool seen_dot = false;
236 bool seen_e = false;
237 bool seen_eminus = false;
239 DOMString num;
241 int i = p;
242 while (i < parselen)
243 {
244 ch = parsebuf[i];
245 if (ch=='.')
246 {
247 if (seen_dot)
248 return p0;
249 seen_dot = true;
250 isdouble = true;
251 }
252 else if (ch=='e' || ch=='E')
253 {
254 if (seen_e || !seen_dot)
255 return p0;
256 seen_e = true;
257 }
258 else if (ch=='-' && seen_e)
259 {
260 if (seen_eminus || !seen_dot)
261 return p0;
262 seen_eminus = true;
263 }
264 else if (!isDigit(ch))
265 break;
266 num.push_back(ch);
267 i++;
268 }
270 if (i == p)/*no digits*/
271 return p0;
272 if (isdouble)
273 {
274 const char *begin = num.c_str();
275 char *end;
276 dresult = strtod(begin,&end);
277 if (!end)/*not a number?*/
278 {
279 error("Error formatting double: %s\n", num.c_str());
280 return p0;
281 }
282 }
283 else
284 {
285 const char *begin = num.c_str();
286 char *end;
287 dresult = (double)strtol(begin,&end,10);
288 if (!end)/*not a number?*/
289 {
290 error("Error formatting integer: %s\n", num.c_str());
291 return p0;
292 }
293 }
294 p = i;
295 return p;
296 }
298 int XPathParser::getLiteral(int p0, DOMString &result)
299 {
300 int p = p0;
301 int ch = peek(p);
302 int quotechar = 0;
303 if (ch == '"' || ch == '\'')
304 {
305 quotechar = ch;
306 }
307 else
308 return p0;
309 p++;
310 while (true)
311 {
312 if (p >= parselen)
313 {
314 error("Unterminated literal string");
315 return -1;
316 }
317 ch = peek(p);
318 if (ch == quotechar)
319 break;
320 result.push_back(ch);
321 p++;
322 }
323 p++; //skip over closing "
324 return p;
325 }
328 int XPathParser::getNCName(int p0, DOMString &result)
329 {
330 int p = p0;
331 int ch = peek(p);
332 if (ch != '_' && !isLetter(ch))
333 return p0;
335 result.push_back(ch);
336 p++;
337 while (p < parselen)
338 {
339 ch = peek(p);
340 if ( isLetterOrDigit(ch) ||
341 isCombiningChar(ch) ||
342 isExtender(ch) ||
343 ch == '.' || ch == '-' || ch == '_' )
344 {
345 result.push_back(ch);
346 p++;
347 }
348 else
349 break;
350 }
351 return p;
352 }
354 int XPathParser::getNameTest(int p0, DOMString &result)
355 {
356 int p = p0;
357 int ch = peek(p);
358 if (ch == '*')
359 {
360 result.push_back(ch);
361 p++;
362 return p;
363 }
365 DOMString ncName;
366 int p2 = getNCName(p, ncName);
367 if (p2 <= p)
368 return p0;
370 result = ncName;
371 p = p2;
373 ch = peek(p);
374 if (ch != ':' )//short name. we are done
375 {
376 return p;
377 }
379 if (peek(p+1) == ':') //was name:: which is ok
380 return p;
382 result.push_back(':');
384 p++;
385 ch = peek(p);
386 if (ch == '*')
387 {
388 result.push_back(ch);
389 p++;
390 return p;
391 }
393 DOMString ncName2;
394 p2 = getNCName(p, ncName2);
395 if (p2 <= p)
396 {
397 if (peek(p) == ':') //was name:: which is ok
398 return p0;
399 error("Nothing after ':' in QName");
400 return -1;
401 }
403 result.append(ncName2);
405 p = p2;
407 return p;
408 }
412 int XPathParser::lexicalScan()
413 {
414 lexicalTokens.clear();
416 int p = 0;
417 int p2 = p;
419 while (p < parselen)
420 {
421 p2 = skipwhite(p);
422 p = p2;
424 //trace("nextChar:%c", peek(p));
425 bool selected = false;
427 //### LITERAL EXPR TOKENS
428 for (int i=2 ; i<=10 ; i++)
429 {
430 p2 = match(p, exprTokenTable[i].sval);
431 if (p2 > p)
432 {
433 lexTokAdd(exprTokenTable[i].ival, p);
434 p = p2;
435 selected = true;
436 break;
437 }
438 }
439 if (selected)
440 continue;
442 //### OPERATORS
443 for (LookupEntry *entry = operatorTable; entry->sval ; entry++)
444 {
445 p2 = match(p, entry->sval);
446 if (p2 > p)
447 {
448 long op = (long)entry->ival;
449 //according to the disambiguating rule for * in the spec
450 if (op == MULTIPLY && lexicalTokens.size() > 0)
451 {
452 int ltyp = lexTokType(lexicalTokens.size()-1);
453 if (ltyp != AMPR && ltyp != DOUBLE_COLON &&
454 ltyp != LPAREN && ltyp != RBRACKET &&
455 ltyp != COMMA && ltyp != OPERATOR )
456 {
457 lexTokAdd(OPERATOR, p, (long)entry->ival);
458 p = p2;
459 selected = true;
460 break;
461 }
462 }
463 else
464 {
465 lexTokAdd(OPERATOR, p, (long)entry->ival);
466 p = p2;
467 selected = true;
468 break;
469 }
470 }
471 }
472 if (selected)
473 continue;
475 //### NODE TYPES
476 for (LookupEntry *entry = nodeTypeTable; entry->sval ; entry++)
477 {
478 p2 = match(p, entry->sval);
479 if (p2 > p)
480 {
481 lexTokAdd(NODE_TYPE, p, (long)entry->ival);
482 p = p2;
483 selected = true;
484 break;
485 }
486 }
487 if (selected)
488 continue;
490 //### AXIS NAMES
491 for (LookupEntry *entry = axisNameTable; entry->sval ; entry++)
492 {
493 p2 = match(p, entry->sval);
494 if (p2 > p)
495 {
496 lexTokAdd(AXIS_NAME, p, (long)entry->ival);
497 p = p2;
498 selected = true;
499 break;
500 }
501 }
502 if (selected)
503 continue;
505 //### NAME TEST
506 DOMString ntResult;
507 p2 = getNameTest(p, ntResult);
508 if (p2 > p)
509 {
510 int p3 = skipwhite(p2);
511 if (peek(p3) == '(')
512 lexTokAdd(FUNCTION_NAME, p, ntResult);
513 else
514 lexTokAdd(NAME_TEST, p, ntResult);
515 p = p2;
516 selected = true;
517 }
518 if (selected)
519 continue;
521 //### VARIABLE REFERENCE
522 if (peek(p) == '$')
523 {
524 p++;
525 DOMString qnResult;
526 p2 = getNCName(p, qnResult);
527 if (p2 > p)
528 {
529 lexTokAdd(VARIABLE_REFERENCE, p, qnResult);
530 p = p2;
531 selected = true;
532 }
533 else
534 {
535 error("Variable referenced with '$' requires a qualified name\n");
536 return -1;
537 }
538 }
539 if (selected)
540 continue;
542 //### NUMBER
543 double numval;
544 p2 = getNumber(p, numval);
545 if (p2 > p)
546 {
547 lexTokAdd(NUMBER, p, numval);
548 p = p2;
549 selected = true;
550 }
551 if (selected)
552 continue;
554 //### LITERAL
555 DOMString strval;
556 p2 = getLiteral(p, strval);
557 if (p2 > p)
558 {
559 lexTokAdd(LITERAL, p, strval);
560 p = p2;
561 selected = true;
562 }
563 if (selected)
564 continue;
566 //### CHAR (default, none of the above)
567 lexTokAdd(CHAR, p, (long) peek(p));
568 p++;
570 }//while p
573 return p;
574 }
597 //#########################################################################
598 //# X P A T H G R A M M A R P A R S I N G
599 //#########################################################################
601 /**
602 * [1] LocationPath ::=
603 * RelativeLocationPath
604 * | AbsoluteLocationPath
605 */
606 int XPathParser::getLocationPath(int p0, int depth)
607 {
608 traceStack("getLocationPath", p0, depth);
609 int p = p0;
611 p = skipwhite(p);
613 int p2 = getAbsoluteLocationPath(p, depth+1);
614 if (p2 > p)
615 return p2;
617 p2 = getRelativeLocationPath(p, depth+1);
618 if (p2 > p)
619 return p2;
621 return p0;
622 }
625 /**
626 * [2] AbsoluteLocationPath ::=
627 * '/' RelativeLocationPath?
628 * | AbbreviatedAbsoluteLocationPath
629 */
630 int XPathParser::getAbsoluteLocationPath(int p0, int depth)
631 {
632 traceStack("getAbsoluteLocationPath", p0, depth);
634 int p = p0;
635 LexTok t = lexTok(p);
636 if (t.getType() == OPERATOR && t.getIntValue()==SLASH)
637 {
638 p++;
639 int p2 = getRelativeLocationPath(p, depth+1);
640 if (p2 <= p)
641 {
642 error("Relative path after '/'");
643 return -1;
644 }
645 p = p2;
646 return p;
647 }
649 //AbbreviatedAbsoluteLocationPath
650 if (t.getType() == OPERATOR && t.getIntValue()==DOUBLE_SLASH)
651 {
652 p++;
653 int p2 = getRelativeLocationPath(p, depth+1);
654 if (p2 <= p)
655 {
656 error("Relative path after '//'");
657 return -1;
658 }
659 p = p2;
660 return p;
661 }
664 return p0;
665 }
668 /**
669 * [3] RelativeLocationPath ::=
670 * Step
671 * | RelativeLocationPath '/' Step
672 * | AbbreviatedRelativeLocationPath
673 */
674 int XPathParser::getRelativeLocationPath(int p0, int depth)
675 {
676 traceStack("getRelativeLocationPath", p0, depth);
677 int p = p0;
678 int p2 = getStep(p, depth+1);
679 if (p2 < 0)
680 return -1;
681 if (p2 > p)
682 {
683 p = p2;
684 LexTok t = lexTok(p);
685 if (t.getType() == OPERATOR && t.getIntValue()==SLASH)
686 {
687 p++;
688 p2 = getRelativeLocationPath(p, depth+1);
689 if (p2 < 0)
690 {
691 error("Relative path after '/'");
692 return -1;
693 }
694 p = p2;
695 return p;
696 }
697 //AbbreviatedRelativeLocationPath
698 if (t.getType() == OPERATOR && t.getIntValue()==DOUBLE_SLASH)
699 {
700 p++;
701 p2 = getRelativeLocationPath(p, depth+1);
702 if (p2 < 0)
703 {
704 error("Relative path after '//'");
705 return -1;
706 }
707 p = p2;
708 return p;
709 }
710 return p;
711 }
714 return p0;
715 }
718 /**
719 * [4] Step ::=
720 * AxisSpecifier NodeTest Predicate*
721 * | AbbreviatedStep
722 */
723 int XPathParser::getStep(int p0, int depth)
724 {
725 traceStack("getStep", p0, depth);
727 int p = p0;
729 lexTok(p).print();
731 //This can be (and usually is) 0-length
732 int p2 = getAxisSpecifier(p, depth+1);
733 if (p2 < 0)
734 {
735 error("Axis specifier in step section");
736 return -1;
737 }
739 p = p2;
740 p2 = getNodeTest(p, depth+1);
741 if (p2 < 0)
742 {
743 error("Node test in step section");
744 return -1;
745 }
747 if (p2 > p)
748 {
749 p = p2;
750 p2 = getPredicate(p, depth+1);
751 if (p2 < 0)
752 {
753 error("Predicate in step section");
754 return -1;
755 }
756 p = p2;
757 return p;
758 }
760 //AbbreviatedStep
761 if (lexTokType(p) == DOT)
762 {
763 p++;
764 return p;
765 }
767 //AbbreviatedStep
768 if (lexTokType(p) == DOUBLE_DOT)
769 {
770 p++;
771 return p;
772 }
774 return p0;
775 }
778 /**
779 * [5] AxisSpecifier ::=
780 * AxisName '::'
781 * | AbbreviatedAxisSpecifier
782 */
783 int XPathParser::getAxisSpecifier(int p0, int depth)
784 {
785 traceStack("getAxisSpecifier", p0, depth);
786 int p = p0;
787 if (lexTokType(p) == AXIS_NAME)
788 {
789 p++;
790 if (lexTokType(p) != DOUBLE_COLON)
791 {
792 error("'::' required after axis name literal");
793 return -1;
794 }
795 p++;
796 return p;
797 }
799 //AbbreviatedAxisSpecifier
800 if (lexTokType(p) == AMPR)
801 {
802 p++;
803 return p;
804 }
806 return p0;
807 }
810 /**
811 * [6] AxisName ::=
812 * 'ancestor'
813 * | 'ancestor-or-self'
814 * | 'attribute'
815 * | 'child'
816 * | 'descendant'
817 * | 'descendant-or-self'
818 * | 'following'
819 * | 'following-sibling'
820 * | 'namespace'
821 * | 'parent'
822 * | 'preceding'
823 * | 'preceding-sibling'
824 * | 'self'
825 * NOTE: This definition, and those at the bottom, is not
826 * needed. Its functionality is handled by lexical scanning.
827 * It is left here for reference.
828 */
829 int XPathParser::getAxisName(int p0, int depth)
830 {
831 traceStack("getAxisName", p0, depth);
832 return p0;
833 }
836 /**
837 * [7] NodeTest ::=
838 * NameTest
839 * | NodeType '(' ')'
840 * | 'processing-instruction' '(' Literal ')'
841 */
842 int XPathParser::getNodeTest(int p0, int depth)
843 {
844 traceStack("getNodeTest", p0, depth);
846 int p = p0;
848 LexTok t = lexTok(p);
849 if (t.getType() == NAME_TEST)
850 {
851 p++;
852 return p;
853 }
855 if (t.getType() == NODE_TYPE)
856 {
857 if (t.getIntValue() == PROCESSING_INSTRUCTION)
858 {
859 if (lexTokType(p) != LPAREN ||
860 lexTokType(p+1) != LITERAL ||
861 lexTokType(p+2) != RPAREN )
862 {
863 error("processing instruction requires (\"literal string\")");
864 return -1;
865 }
866 p += 3;
867 }
868 else
869 {
870 if (lexTokType(p+1) != LPAREN ||
871 lexTokType(p+2) != RPAREN )
872 {
873 error("processing instruction requires ()");
874 return -1;
875 }
876 p += 2;
877 }
878 return p;
879 }
881 return p0;
882 }
885 /**
886 * [8] Predicate ::=
887 * '[' PredicateExpr ']'
888 */
889 int XPathParser::getPredicate(int p0, int depth)
890 {
891 traceStack("getPredicate", p0, depth);
893 int p = p0;
894 if (lexTokType(p) != LBRACKET)
895 return p0;
897 p++;
898 int p2 = getPredicateExpr(p, depth+1);
899 if (p2 <= p)
900 {
901 error("Predicate expression in predicate");
902 return -1;
903 }
905 p = p2;
906 lexTok(p).print();
907 if (lexTokType(p) != RBRACKET)
908 {
909 error("Predicate expression requires closing ']'");
910 return -1;
911 }
912 p++;
913 return p;
914 }
917 /**
918 * [9] PredicateExpr ::=
919 * Expr
920 */
921 int XPathParser::getPredicateExpr(int p0, int depth)
922 {
923 traceStack("getPredicateExpr", p0, depth);
924 int p = p0;
925 int p2 = getExpr(p, depth+1);
926 if (p2 < 0)
927 {
928 error("Expression in predicate expression");
929 return -1;
930 }
931 p = p2;
932 return p;
933 }
936 /**
937 * [10] AbbreviatedAbsoluteLocationPath ::=
938 * '//' RelativeLocationPath
939 * NOTE: not used. handled in getAbsoluteLocationPath()
940 */
941 int XPathParser::getAbbreviatedAbsoluteLocationPath(int p0, int depth)
942 {
943 traceStack("getAbbreviatedAbsoluteLocationPath", p0, depth);
945 return p0;
946 }
948 /**
949 * [11] AbbreviatedRelativeLocationPath ::=
950 * RelativeLocationPath '//' Step
951 * NOTE: not used. handled in getRelativeLocationPath()
952 */
953 int XPathParser::getAbbreviatedRelativeLocationPath(int p0, int depth)
954 {
955 traceStack("getAbbreviatedRelativeLocationPath", p0, depth);
956 return p0;
957 }
959 /**
960 * [12] AbbreviatedStep ::=
961 * '.'
962 * | '..'
963 * NOTE: not used. handled in getStep()
964 */
965 int XPathParser::getAbbreviatedStep(int p0, int depth)
966 {
967 traceStack("getAbbreviatedStep", p0, depth);
968 return p0;
969 }
972 /**
973 * [13] AbbreviatedAxisSpecifier ::=
974 * '@'?
975 * NOTE: not used. handled in getAxisSpecifier()
976 */
977 int XPathParser::getAbbreviatedAxisSpecifier(int p0, int depth)
978 {
979 traceStack("getAbbreviatedAxisSpecifier", p0, depth);
980 return p0;
981 }
984 /**
985 * [14] Expr ::=
986 * OrExpr
987 */
988 int XPathParser::getExpr(int p0, int depth)
989 {
990 traceStack("getExpr", p0, depth);
992 int p = p0;
994 int p2 = getOrExpr(p, depth+1);
995 if (p2 < 0)
996 {
997 error("OR expression in expression");
998 return -1;
999 }
1000 p = p2;
1002 return p;
1003 }
1006 /**
1007 * [15] PrimaryExpr ::=
1008 * VariableReference
1009 * | '(' Expr ')'
1010 * | Literal
1011 * | Number
1012 * | FunctionCall
1013 */
1014 int XPathParser::getPrimaryExpr(int p0, int depth)
1015 {
1016 traceStack("getPrimaryExpr", p0, depth);
1017 int p = p0;
1018 int p2 = p;
1020 if (lexTokType(p) == VARIABLE_REFERENCE)
1021 {
1022 p++;
1023 return p;
1024 }
1026 if (lexTokType(p) == LPAREN)
1027 {
1028 p++;
1029 p2 = getExpr(p, depth+1);
1030 if (p2 <= p)
1031 {
1032 error("Expression in primary expression");
1033 return -1;
1034 }
1035 p += p2;
1036 if (lexTokType(p) != RPAREN)
1037 {
1038 error("Primary expression requires closing ')'");
1039 return -1;
1040 }
1041 }
1043 if (lexTokType(p) == LITERAL)
1044 {
1045 p++;
1046 return p;
1047 }
1049 if (lexTokType(p) == NUMBER)
1050 {
1051 p++;
1052 return p;
1053 }
1055 p2 = getFunctionCall(p, depth+1);
1056 if (p2 < 0)
1057 {
1058 error("Function call in primary expression");
1059 return -1;
1060 }
1061 if (p2 > p)
1062 {
1063 p = p2;
1064 return p;
1065 }
1067 return p0;
1068 }
1071 /**
1072 * [16] FunctionCall ::=
1073 * FunctionName '(' ( Argument ( ',' Argument )* )? ')'
1074 */
1075 int XPathParser::getFunctionCall(int p0, int depth)
1076 {
1077 traceStack("getFunctionCall", p0, depth);
1078 int p = p0;
1080 if (lexTokType(p) != FUNCTION_NAME)
1081 return p0;
1083 p++;
1085 if (lexTokType(p) != LPAREN) //this makes a function
1086 return p0;
1087 p++;
1089 int p2 = getArgument(p, depth+1);
1090 if (p2 < 0)
1091 {
1092 error("Error in function argument");
1093 return -1;
1094 }
1095 if (p2 > p)
1096 {
1097 p = p2;
1098 while (lexTokType(p) == COMMA)
1099 {
1100 p++;
1101 p2 = getArgument(p, depth+1);
1102 if (p2 <= p)
1103 {
1104 error("Error in function argument");
1105 return -1;
1106 }
1107 p = p2;
1108 }
1109 }
1111 if (lexTokType(p) != RPAREN) //mandatory
1112 {
1113 error("Function requires closing ')'");
1114 return -1;
1115 }
1116 p++;
1118 return p;
1119 }
1122 /**
1123 * [17] Argument ::=
1124 * Expr
1125 */
1126 int XPathParser::getArgument(int p0, int depth)
1127 {
1128 traceStack("getArgument", p0, depth);
1129 int p = p0;
1130 int p2 = getExpr(p, depth+1);
1131 if (p2 < 0)
1132 {
1133 error("Argument expression");
1134 return -1;
1135 }
1136 p = p2;
1137 return p;
1138 }
1141 /**
1142 * [18] UnionExpr ::=
1143 * PathExpr
1144 * | UnionExpr '|' PathExpr
1145 */
1146 int XPathParser::getUnionExpr(int p0, int depth)
1147 {
1148 traceStack("getUnionExpr", p0, depth);
1149 int p = p0;
1150 int p2 = getPathExpr(p, depth+1);
1151 if (p2 < 0)
1152 {
1153 error("Path expression for union");
1154 return -1;
1155 }
1156 p = p2;
1157 LexTok t = lexTok(p);
1158 if (t.getType() == OPERATOR && t.getIntValue() == PIPE)
1159 {
1160 p++;
1161 p2 = getUnionExpr(p, depth+1);
1162 if (p2 < 0)
1163 {
1164 error("OR (|) requires union expression on the left");
1165 return -1;
1166 }
1167 p = p2;
1168 }
1169 return p;
1170 }
1173 /**
1174 * [19] PathExpr ::=
1175 * LocationPath
1176 * | FilterExpr
1177 * | FilterExpr '/' RelativeLocationPath
1178 * | FilterExpr '//' RelativeLocationPath
1179 */
1180 int XPathParser::getPathExpr(int p0, int depth)
1181 {
1182 traceStack("getPathExpr", p0, depth);
1183 int p = p0;
1184 int p2;
1186 p2 = getLocationPath(p, depth+1);
1187 if (p2 < 0)
1188 {
1189 error("Location path in path expression");
1190 return -1;
1191 }
1192 if (p2 > p)
1193 {
1194 p = p2;
1195 return p;
1196 }
1198 p2 = getFilterExpr(p, depth+1);
1199 if (p2 < 0)
1200 {
1201 error("Filter expression in path expression");
1202 return -1;
1203 }
1204 if (p2 <= p)
1205 return p0;
1206 p = p2;
1208 LexTok t = lexTok(p);
1209 if (t.getType() == OPERATOR && t.getIntValue() == SLASH)
1210 {
1211 p++;
1212 p2 = getRelativeLocationPath(p, depth+1);
1213 if (p2 < 0)
1214 {
1215 error("Relative location after / in path expression");
1216 return -1;
1217 }
1218 p = p2;
1219 return p;
1220 }
1222 if (t.getType() == OPERATOR && t.getIntValue() == DOUBLE_SLASH)
1223 {
1224 p++;
1225 p2 = getRelativeLocationPath(p, depth+1);
1226 if (p2 < 0)
1227 {
1228 error("Relative location after // in path expression");
1229 return -1;
1230 }
1231 p = p2;
1232 return p;
1233 }
1234 return p;
1235 }
1238 /**
1239 * [20] FilterExpr ::=
1240 * PrimaryExpr
1241 * | FilterExpr Predicate
1242 */
1243 int XPathParser::getFilterExpr(int p0, int depth)
1244 {
1245 traceStack("getFilterExpr", p0, depth);
1246 int p = p0;
1248 int p2 = getPrimaryExpr(p, depth+1);
1249 if (p2 < 0)
1250 {
1251 error("Primary expression in path expression");
1252 return -1;
1253 }
1254 if (p2 > p)
1255 {
1256 p = p2;
1257 while (true)
1258 {
1259 p2 = getPredicate(p, depth+1);
1260 if (p2 < 0)
1261 {
1262 error("Predicate in primary expression");
1263 return -1;
1264 }
1265 if (p2 > p)
1266 {
1267 p = p2;
1268 }
1269 else
1270 break;
1271 }
1272 return p;
1273 }
1275 return p0;
1276 }
1279 /**
1280 * [21] OrExpr ::=
1281 * AndExpr
1282 * | OrExpr 'or' AndExpr
1283 */
1284 int XPathParser::getOrExpr(int p0, int depth)
1285 {
1286 traceStack("getOrExpr", p0, depth);
1287 int p = p0;
1288 int p2 = getAndExpr(p, depth+1);
1289 if (p2 < 0)
1290 {
1291 error("AND expression in OR expression");
1292 return -1;
1293 }
1294 if (p2 > p)
1295 {
1296 p = p2;
1297 LexTok t = lexTok(p);
1298 if (t.getType() == OPERATOR && t.getIntValue() == OR)
1299 {
1300 p++;
1301 p2 = getAndExpr(p, depth+1);
1302 if (p2 <= p)
1303 {
1304 error("AND expression in OR expression");
1305 return -1;
1306 }
1307 p = p2;
1308 return p;
1309 }
1310 return p;
1311 }
1313 return p0;
1314 }
1317 /**
1318 * [22] AndExpr ::=
1319 * EqualityExpr
1320 * | AndExpr 'and' EqualityExpr
1321 */
1322 int XPathParser::getAndExpr(int p0, int depth)
1323 {
1324 traceStack("getAndExpr", p0, depth);
1325 int p = p0;
1326 int p2 = getEqualityExpr(p, depth+1);
1327 if (p2 < 0)
1328 {
1329 error("Equality expression in AND expression");
1330 return -1;
1331 }
1332 if (p2 > p)
1333 {
1334 p = p2;
1335 LexTok t = lexTok(p);
1336 if (t.getType() == OPERATOR && t.getIntValue() == AND)
1337 {
1338 p++;
1339 p2 = getAndExpr(p, depth+1);
1340 if (p2 <= p)
1341 {
1342 error("AND expression after 'and'");
1343 return -1;
1344 }
1345 p = p2;
1346 return p;
1347 }
1348 return p;
1349 }
1351 return p0;
1352 }
1355 /**
1356 * [23] EqualityExpr ::=
1357 * RelationalExpr
1358 * | EqualityExpr '=' RelationalExpr
1359 * | EqualityExpr '!=' RelationalExpr
1360 */
1361 int XPathParser::getEqualityExpr(int p0, int depth)
1362 {
1363 traceStack("getEqualityExpr", p0, depth);
1364 int p = p0;
1365 int p2 = getRelationalExpr(p, depth+1);
1366 if (p2 < 0)
1367 {
1368 error("Relation expression in equality expression");
1369 return -1;
1370 }
1371 if (p2 > p)
1372 {
1373 p = p2;
1374 LexTok t = lexTok(p);
1375 if (t.getType() == OPERATOR && t.getIntValue() == EQUALS)
1376 {
1377 p++;
1378 p2 = getEqualityExpr(p, depth+1);
1379 if (p2 <= p)
1380 {
1381 error("Equality expression expected after ==");
1382 return -1;
1383 }
1384 p = p2;
1385 return p;
1386 }
1388 if (t.getType() == OPERATOR && t.getIntValue() == NOT_EQUALS)
1389 {
1390 p++;
1391 p2 = getEqualityExpr(p, depth+1);
1392 if (p2 <= p)
1393 {
1394 error("Equality expression expected after !=");
1395 return -1;
1396 }
1397 p = p2;
1398 return p;
1399 }
1401 return p;
1402 }
1404 return p0;
1405 }
1408 /**
1409 * [24] RelationalExpr ::=
1410 * AdditiveExpr
1411 * | RelationalExpr '<' AdditiveExpr
1412 * | RelationalExpr '>' AdditiveExpr
1413 * | RelationalExpr '<=' AdditiveExpr
1414 * | RelationalExpr '>=' AdditiveExpr
1415 */
1416 int XPathParser::getRelationalExpr(int p0, int depth)
1417 {
1418 traceStack("getRelationalExpr", p0, depth);
1419 int p = p0;
1420 int p2 = getAdditiveExpr(p, depth+1);
1421 if (p2 < 0)
1422 {
1423 error("Additive expression in relational expression");
1424 return -1;
1425 }
1426 if (p2 > p)
1427 {
1428 p = p2;
1429 LexTok t = lexTok(p);
1431 if (t.getType() == OPERATOR && t.getIntValue() == GREATER_THAN)
1432 {
1433 p++;
1434 p2 = getRelationalExpr(p, depth+1);
1435 if (p2 <= p)
1436 {
1437 error("Relational expression after '>'");
1438 return -1;
1439 }
1440 p = p2;
1441 return p;
1442 }
1443 if (t.getType() == OPERATOR && t.getIntValue() == LESS_THAN)
1444 {
1445 p++;
1446 p2 = getRelationalExpr(p, depth+1);
1447 if (p2 <= p)
1448 {
1449 error("Relational expression after '<'");
1450 return -1;
1451 }
1452 p = p2;
1453 return p;
1454 }
1455 if (t.getType() == OPERATOR && t.getIntValue() == GREATER_THAN_EQUALS)
1456 {
1457 p++;
1458 p2 = getRelationalExpr(p, depth+1);
1459 if (p2 <= p)
1460 {
1461 error("Relational expression after '>='");
1462 return -1;
1463 }
1464 p = p2;
1465 return p;
1466 }
1467 if (t.getType() == OPERATOR && t.getIntValue() == LESS_THAN_EQUALS)
1468 {
1469 p++;
1470 p2 = getRelationalExpr(p, depth+1);
1471 if (p2 <= p)
1472 {
1473 error("Relational expression after '<='");
1474 return -1;
1475 }
1476 p = p2;
1477 return p;
1478 }
1481 return p;
1482 }
1484 return p0;
1485 }
1488 /**
1489 * [25] AdditiveExp ::=
1490 * MultiplicativeExpr
1491 * | AdditiveExpr '+' MultiplicativeExpr
1492 * | AdditiveExpr '-' MultiplicativeExpr
1493 */
1494 int XPathParser::getAdditiveExpr(int p0, int depth)
1495 {
1496 traceStack("getAdditiveExpr", p0, depth);
1497 int p = p0;
1498 int p2 = getMultiplicativeExpr(p, depth+1);
1499 if (p2 < 0)
1500 {
1501 error("Multiplicative expression in additive expression");
1502 return -1;
1503 }
1504 if (p2 > p)
1505 {
1506 p = p2;
1507 LexTok t = lexTok(p);
1509 if (t.getType() == OPERATOR && t.getIntValue() == PLUS)
1510 {
1511 p++;
1512 p2 = getAdditiveExpr(p, depth+1);
1513 if (p2 <= p)
1514 {
1515 error("Additive expression after '+'");
1516 return -1;
1517 }
1518 p = p2;
1519 return p;
1520 }
1521 if (t.getType() == OPERATOR && t.getIntValue() == MINUS)
1522 {
1523 p++;
1524 p2 = getAdditiveExpr(p, depth+1);
1525 if (p2 <= p)
1526 {
1527 error("Additive expression after '-'");
1528 return -1;
1529 }
1530 p = p2;
1531 return p;
1532 }
1535 return p;
1536 }
1538 return p0;
1539 }
1542 /**
1543 * [26] MultiplicativeExpr ::=
1544 * UnaryExpr
1545 * | MultiplicativeExpr MultiplyOperator UnaryExpr
1546 * | MultiplicativeExpr 'div' UnaryExpr
1547 * | MultiplicativeExpr 'mod' UnaryExpr
1548 */
1549 int XPathParser::getMultiplicativeExpr(int p0, int depth)
1550 {
1551 traceStack("getMultiplicativeExpr", p0, depth);
1552 int p = p0;
1553 int p2 = getUnaryExpr(p, depth+1);
1554 if (p2 < 0)
1555 {
1556 error("Unary expression in multiplicative expression");
1557 return -1;
1558 }
1559 if (p2 > p)
1560 {
1561 p = p2;
1562 LexTok t = lexTok(p);
1564 if (t.getType() == OPERATOR && t.getIntValue() == MULTIPLY)
1565 {
1566 p++;
1567 p2 = getMultiplicativeExpr(p, depth+1);
1568 if (p2 <= p)
1569 {
1570 error("Multiplicative expression after '*'");
1571 return -1;
1572 }
1573 p = p2;
1574 return p;
1575 }
1577 if (t.getType() == OPERATOR && t.getIntValue() == DIV)
1578 {
1579 p++;
1580 p2 = getMultiplicativeExpr(p, depth+1);
1581 if (p2 <= p)
1582 {
1583 error("Multiplicative expression after 'div'");
1584 return -1;
1585 }
1586 p = p2;
1587 return p;
1588 }
1590 if (t.getType() == OPERATOR && t.getIntValue() == MOD)
1591 {
1592 p++;
1593 p2 = getMultiplicativeExpr(p, depth+1);
1594 if (p2 <= p)
1595 {
1596 error("Multiplicative expression after 'mod'");
1597 return -1;
1598 }
1599 p = p2;
1600 return p;
1601 }
1604 return p;
1605 }
1607 return p0;
1608 }
1611 /**
1612 * [27] UnaryExpr ::=
1613 * UnionExpr
1614 * | '-' UnaryExpr
1615 */
1616 int XPathParser::getUnaryExpr(int p0, int depth)
1617 {
1618 traceStack("getUnaryExpr", p0, depth);
1619 int p = p0;
1620 int p2 = getUnionExpr(p, depth+1);
1621 if (p2 < 0)
1622 {
1623 error("Union expression in unary expression");
1624 return -1;
1625 }
1626 if (p2 > p)
1627 {
1628 p = p2;
1629 return p;
1630 }
1632 if (lexTokType(p) == '-')
1633 {
1634 p++;
1635 p2 = getUnaryExpr(p, depth+1);
1636 if (p2 < 0)
1637 {
1638 error("Unary expression after '-'");
1639 return -1;
1640 }
1641 p = p2;
1642 return p;
1643 }
1645 return p0;
1646 }
1649 //######################################################
1650 //# NOT USED!!!
1651 //## The grammar definitions below are
1652 //## handled by lexical parsing, and will not be used
1653 //######################################################
1655 /**
1656 * [28] ExprToken ::=
1657 * '(' | ')' | '[' | ']' | '.' | '..' | '@' | ',' | '::'
1658 * | NameTest
1659 * | NodeType
1660 * | Operator
1661 * | FunctionName
1662 * | AxisName
1663 * | Literal
1664 * | Number
1665 * | VariableReference
1666 */
1667 int XPathParser::getExprToken(int p0, int depth)
1668 {
1669 traceStack("getExprToken", p0, depth);
1670 return p0;
1671 }
1674 /**
1675 * [29] Literal ::=
1676 * '"' [^"]* '"'
1677 * | "'" [^']* "'"
1678 */
1679 int XPathParser::getLiteral(int p0, int depth)
1680 {
1681 traceStack("getLiteral", p0, depth);
1682 return p0;
1683 }
1686 /**
1687 * [30] Number ::=
1688 * Digits ('.' Digits?)?
1689 * | '.' Digits
1690 */
1691 int XPathParser::getNumber(int p0, int depth)
1692 {
1693 traceStack("getNumber", p0, depth);
1694 return p0;
1695 }
1698 /**
1699 * [31] Digits ::=
1700 * [0-9]+
1701 */
1702 int XPathParser::getDigits(int p0, int depth)
1703 {
1704 traceStack("getDigits", p0, depth);
1705 return p0;
1706 }
1709 /**
1710 * [32] Operator ::=
1711 * OperatorName
1712 * | MultiplyOperator
1713 * | '/' | '//' | '|' | '+' | '-' | '='
1714 * | '!=' | '<' | '<=' | '>' | '>='
1715 */
1716 int XPathParser::getOperator(int p0, int depth)
1717 {
1718 traceStack("getOperator", p0, depth);
1719 return p0;
1720 }
1723 /**
1724 * [33] OperatorName ::=
1725 * 'and' | 'or' | 'mod' | 'div'
1726 */
1727 int XPathParser::getOperatorName(int p0, int depth)
1728 {
1729 traceStack("getOperatorName", p0, depth);
1730 return p0;
1731 }
1734 /**
1735 * [34] MultiplyOperator ::=
1736 * '*'
1737 */
1738 int XPathParser::getMultiplyOperator(int p0, int depth)
1739 {
1740 traceStack("getMultiplyOperator", p0, depth);
1741 return p0;
1742 }
1745 /**
1746 * [35] FunctionName ::=
1747 * QName - NodeType
1748 */
1749 int XPathParser::getFunctionName(int p0, int depth)
1750 {
1751 traceStack("getFunctionName", p0, depth);
1752 return p0;
1753 }
1756 /**
1757 * [36] VariableReference ::=
1758 * '$' QName
1759 */
1760 int XPathParser::getVariableReference(int p0, int depth)
1761 {
1762 traceStack("getVariableReference", p0, depth);
1763 return p0;
1764 }
1767 /**
1768 * [37] NameTest ::=
1769 * '*'
1770 * | NCName ':' '*'
1771 * | QName
1772 */
1773 int XPathParser::getNameTest(int p0, int depth)
1774 {
1775 traceStack("getNameTest", p0, depth);
1776 return p0;
1777 }
1780 /**
1781 * [38] NodeType ::=
1782 * 'comment'
1783 * | 'text'
1784 * | 'processing-instruction'
1785 * | 'node'
1786 */
1787 int XPathParser::getNodeType(int p0, int depth)
1788 {
1789 traceStack("getNodeType", p0, depth);
1790 return p0;
1791 }
1794 /**
1795 * [39] ExprWhitespace ::=
1796 * S
1797 */
1798 int XPathParser::getExprWhitespace(int p0, int depth)
1799 {
1800 traceStack("getExprWhitespace", p0, depth);
1801 return p0;
1802 }
1808 //#########################################################################
1809 //# H I G H L E V E L P A R S I N G
1810 //#########################################################################
1812 /**
1813 * Parse a candidate XPath string. Leave a copy in 'tokens.'
1814 */
1815 bool XPathParser::parse(const DOMString &xpathString)
1816 {
1817 int p0 = 0;
1819 DOMString str = xpathString;
1821 parsebuf = (char *)str.c_str();
1822 parselen = (int) str.size();
1823 position = 0;
1825 trace("## parsing string: '%s'", parsebuf);
1827 lexicalScan();
1828 lexicalTokenDump();
1830 tokens.clear();//Get ready to store new tokens
1832 int p = getLocationPath(p0, 0);
1834 parsebuf = NULL;
1835 parselen = 0;
1837 if (p <= p0)
1838 {
1839 //return false;
1840 }
1842 return true;
1843 }
1849 //#########################################################################
1850 //# E V A L U A T E
1851 //#########################################################################
1853 /**
1854 * This method "executes" a list of Tokens in the context of a DOM root
1855 * Node, returning a list of Nodes that match the xpath expression.
1856 */
1857 NodeList XPathParser::execute(const Node *root,
1858 std::vector<Token> &toks)
1859 {
1860 NodeList list;
1862 if (!root)
1863 return list;
1865 //### Execute the token list
1866 std::vector<Token>::iterator iter;
1867 for (iter = toks.begin() ; iter != toks.end() ; iter++)
1868 {
1869 }
1871 return list;
1872 }
1877 /**
1878 * This wraps the two-step call to parse(), then execute() to get a NodeList
1879 * of matching DOM nodes
1880 */
1881 NodeList XPathParser::evaluate(const Node *root, const DOMString &xpathString)
1882 {
1883 NodeList list;
1885 //### Maybe do caching for speed here
1887 //### Parse and execute
1888 //### Error message can be generated as a side effect
1889 if (!parse(xpathString))
1890 return list;
1892 //### Execute the token list
1893 list = execute(root, tokens);
1895 return list;
1896 }
1900 } // namespace xpath
1901 } // namespace dom
1902 } // namespace w3c
1903 } // namespace org
1904 //#########################################################################
1905 //# E N D O F F I L E
1906 //#########################################################################