1 \r
2 \r
3 \r
4 #include <stdio.h>\r
5 #include <string.h>\r
6 #include <stdarg.h>\r
7 #include <malloc.h>\r
8 #include <sys/types.h>\r
9 #include <sys/stat.h>\r
10 \r
11 \r
12 #include "minidom.h"\r
13 \r
14 namespace MiniDom\r
15 {\r
16 \r
17 \r
18 \r
19 //########################################################################\r
20 //# E L E M E N T\r
21 //########################################################################\r
22 \r
23 void Element::findElementsRecursive(std::vector<Element *>&res, const DOMString &name)\r
24 {\r
25 if (getName() == name)\r
26 res.push_back(this);\r
27 for (int i=0; i<children.size() ; i++)\r
28 children[i]->findElementsRecursive(res, name);\r
29 }\r
30 \r
31 std::vector<Element *> Element::findElements(const DOMString &name)\r
32 {\r
33 std::vector<Element *> res;\r
34 findElementsRecursive(res, name);\r
35 return res;\r
36 }\r
37 \r
38 DOMString Element::getAttribute(const DOMString &name)\r
39 {\r
40 for (int i=0 ; i<attributes.size() ; i++)\r
41 if (attributes[i].getName() ==name)\r
42 return attributes[i].getValue();\r
43 return "";\r
44 }\r
45 \r
46 void Element::addChild(Element *child)\r
47 {\r
48 children.push_back(child);\r
49 }\r
50 \r
51 \r
52 void Element::addAttribute(const DOMString &name, const DOMString &value)\r
53 {\r
54 Attribute attr(name, value);\r
55 attributes.push_back(attr);\r
56 }\r
57 \r
58 void Element::addNamespace(const DOMString &prefix, const DOMString &namespaceURI)\r
59 {\r
60 Namespace ns(prefix, namespaceURI);\r
61 namespaces.push_back(ns);\r
62 }\r
63 \r
64 void Element::writeIndentedRecursive(FILE *f, int indent)\r
65 {\r
66 int i;\r
67 if (!f)\r
68 return;\r
69 //Opening tag, and attributes\r
70 for (i=0;i<indent;i++)\r
71 fputc(' ',f);\r
72 fprintf(f,"<%s",name.c_str());\r
73 for (int i=0 ; i<attributes.size() ; i++)\r
74 {\r
75 fprintf(f," %s=\"%s\"",\r
76 attributes[i].getName().c_str(),\r
77 attributes[i].getValue().c_str());\r
78 }\r
79 for (int i=0 ; i<namespaces.size() ; i++)\r
80 {\r
81 fprintf(f," xmlns:%s=\"%s\"",\r
82 namespaces[i].getPrefix().c_str(),\r
83 namespaces[i].getNamespaceURI().c_str());\r
84 }\r
85 fprintf(f,">\n");\r
86 \r
87 //Between the tags\r
88 if (value.size() > 0)\r
89 {\r
90 for (int i=0;i<indent;i++)\r
91 fputc(' ', f);\r
92 fprintf(f," %s\n", value.c_str());\r
93 }\r
94 \r
95 for (int i=0 ; i<children.size() ; i++)\r
96 children[i]->writeIndentedRecursive(f, indent+2);\r
97 \r
98 //Closing tag\r
99 for (int i=0; i<indent; i++)\r
100 fputc(' ',f);\r
101 fprintf(f,"</%s>\n", name.c_str());\r
102 }\r
103 \r
104 void Element::writeIndented(FILE *f)\r
105 {\r
106 writeIndentedRecursive(f, 0);\r
107 }\r
108 \r
109 void Element::print()\r
110 {\r
111 writeIndented(stdout);\r
112 }\r
113 \r
114 \r
115 //########################################################################\r
116 //# P A R S E R\r
117 //########################################################################\r
118 \r
119 \r
120 \r
121 typedef struct\r
122 {\r
123 char *escaped;\r
124 char value;\r
125 } EntityEntry;\r
126 \r
127 static EntityEntry entities[] =\r
128 {\r
129 { "&" , '&' },\r
130 { "<" , '<' },\r
131 { ">" , '>' },\r
132 { "'", '\'' },\r
133 { """, '"' },\r
134 { NULL , '\0' }\r
135 };\r
136 \r
137 \r
138 \r
139 void Parser::getLineAndColumn(long pos, long *lineNr, long *colNr)\r
140 {\r
141 long line = 1;\r
142 long col = 1;\r
143 for (long i=0 ; i<pos ; i++)\r
144 {\r
145 XMLCh ch = parsebuf[i];\r
146 if (ch == '\n' || ch == '\r')\r
147 {\r
148 col = 0;\r
149 line ++;\r
150 }\r
151 else\r
152 col++;\r
153 }\r
154 *lineNr = line;\r
155 *colNr = col;\r
156 \r
157 }\r
158 \r
159 \r
160 void Parser::error(char *fmt, ...)\r
161 {\r
162 long lineNr;\r
163 long colNr;\r
164 getLineAndColumn(currentPosition, &lineNr, &colNr);\r
165 va_list args;\r
166 fprintf(stderr, "xml error at line %d, column %d:", lineNr, colNr);\r
167 va_start(args,fmt);\r
168 vfprintf(stderr,fmt,args);\r
169 va_end(args) ;\r
170 fprintf(stderr, "\n");\r
171 }\r
172 \r
173 \r
174 \r
175 int Parser::peek(long pos)\r
176 {\r
177 if (pos >= parselen)\r
178 return -1;\r
179 currentPosition = pos;\r
180 int ch = parsebuf[pos];\r
181 //printf("ch:%c\n", ch);\r
182 return ch;\r
183 }\r
184 \r
185 \r
186 \r
187 int Parser::match(long p0, const char *text)\r
188 {\r
189 int p = p0;\r
190 while (*text)\r
191 {\r
192 if (peek(p) != *text)\r
193 return p0;\r
194 p++; text++;\r
195 }\r
196 return p;\r
197 }\r
198 \r
199 \r
200 \r
201 int Parser::skipwhite(long p)\r
202 {\r
203 \r
204 while (p<parselen)\r
205 {\r
206 int p2 = match(p, "<!--");\r
207 if (p2 > p)\r
208 {\r
209 p = p2;\r
210 while (p<parselen)\r
211 {\r
212 p2 = match(p, "-->");\r
213 if (p2 > p)\r
214 {\r
215 p = p2;\r
216 break;\r
217 }\r
218 p++;\r
219 }\r
220 }\r
221 XMLCh b = peek(p);\r
222 if (!isspace(b))\r
223 break;\r
224 p++;\r
225 }\r
226 return p;\r
227 }\r
228 \r
229 /* modify this to allow all chars for an element or attribute name*/\r
230 int Parser::getWord(int p0, DOMString &buf)\r
231 {\r
232 int p = p0;\r
233 while (p<parselen)\r
234 {\r
235 XMLCh b = peek(p);\r
236 if (b<=' ' || b=='/' || b=='>' || b=='=')\r
237 break;\r
238 buf.push_back(b);\r
239 p++;\r
240 }\r
241 return p;\r
242 }\r
243 \r
244 int Parser::getQuoted(int p0, DOMString &buf, int do_i_parse)\r
245 {\r
246 \r
247 int p = p0;\r
248 if (peek(p) != '"' && peek(p) != '\'')\r
249 return p0;\r
250 p++;\r
251 \r
252 while ( p<parselen )\r
253 {\r
254 XMLCh b = peek(p);\r
255 if (b=='"' || b=='\'')\r
256 break;\r
257 if (b=='&' && do_i_parse)\r
258 {\r
259 bool found = false;\r
260 for (EntityEntry *ee = entities ; ee->value ; ee++)\r
261 {\r
262 int p2 = match(p, ee->escaped);\r
263 if (p2>p)\r
264 {\r
265 buf.push_back(ee->value);\r
266 p = p2;\r
267 found = true;\r
268 break;\r
269 }\r
270 }\r
271 if (!found)\r
272 {\r
273 error("unterminated entity");\r
274 return false;\r
275 }\r
276 }\r
277 else\r
278 {\r
279 buf.push_back(b);\r
280 p++;\r
281 }\r
282 }\r
283 return p;\r
284 }\r
285 \r
286 int Parser::parseVersion(int p0)\r
287 {\r
288 //printf("### parseVersion: %d\n", p0);\r
289 \r
290 int p = p0;\r
291 \r
292 p = skipwhite(p0);\r
293 \r
294 if (peek(p) != '<')\r
295 return p0;\r
296 \r
297 p++;\r
298 if (p>=parselen || peek(p)!='?')\r
299 return p0;\r
300 \r
301 p++;\r
302 \r
303 DOMString buf;\r
304 \r
305 while (p<parselen)\r
306 {\r
307 XMLCh ch = peek(p++);\r
308 if (ch=='?')\r
309 break;\r
310 buf.push_back(ch);\r
311 }\r
312 if (peek(p) != '>')\r
313 return p0;\r
314 p++;\r
315 \r
316 //printf("Got version:%s\n",buf.c_str());\r
317 return p;\r
318 }\r
319 \r
320 int Parser::parseDoctype(int p0)\r
321 {\r
322 //printf("### parseDoctype: %d\n", p0);\r
323 \r
324 int p = p0;\r
325 p = skipwhite(p);\r
326 \r
327 if (p>=parselen || peek(p)!='<')\r
328 return p0;\r
329 \r
330 p++;\r
331 \r
332 if (peek(p)!='!' || peek(p+1)=='-')\r
333 return p0;\r
334 p++;\r
335 \r
336 DOMString buf;\r
337 while (p<parselen)\r
338 {\r
339 XMLCh ch = peek(p);\r
340 if (ch=='>')\r
341 {\r
342 p++;\r
343 break;\r
344 }\r
345 buf.push_back(ch);\r
346 p++;\r
347 }\r
348 \r
349 //printf("Got doctype:%s\n",buf.c_str());\r
350 return p;\r
351 }\r
352 \r
353 int Parser::parseElement(int p0, Element *par,int depth)\r
354 {\r
355 \r
356 int p = p0;\r
357 \r
358 int p2 = p;\r
359 \r
360 p = skipwhite(p);\r
361 \r
362 //## Get open tag\r
363 XMLCh ch = peek(p);\r
364 if (ch!='<')\r
365 return p0;\r
366 \r
367 p++;\r
368 \r
369 DOMString openTagName;\r
370 p = skipwhite(p);\r
371 p = getWord(p, openTagName);\r
372 //printf("####tag :%s\n", openTagName.c_str());\r
373 p = skipwhite(p);\r
374 \r
375 //Add element to tree\r
376 Element *n = new Element(openTagName);\r
377 n->parent = par;\r
378 par->addChild(n);\r
379 \r
380 // Get attributes\r
381 if (peek(p) != '>')\r
382 {\r
383 while (p<parselen)\r
384 {\r
385 p = skipwhite(p);\r
386 ch = peek(p);\r
387 //printf("ch:%c\n",ch);\r
388 if (ch=='>')\r
389 break;\r
390 else if (ch=='/' && p<parselen+1)\r
391 {\r
392 p++;\r
393 p = skipwhite(p);\r
394 ch = peek(p);\r
395 if (ch=='>')\r
396 {\r
397 p++;\r
398 //printf("quick close\n");\r
399 return p;\r
400 }\r
401 }\r
402 DOMString attrName;\r
403 p2 = getWord(p, attrName);\r
404 if (p2==p)\r
405 break;\r
406 //printf("name:%s",buf);\r
407 p=p2;\r
408 p = skipwhite(p);\r
409 ch = peek(p);\r
410 //printf("ch:%c\n",ch);\r
411 if (ch!='=')\r
412 break;\r
413 p++;\r
414 p = skipwhite(p);\r
415 // ch = parsebuf[p];\r
416 // printf("ch:%c\n",ch);\r
417 DOMString attrVal;\r
418 p2 = getQuoted(p, attrVal, true);\r
419 p=p2+1;\r
420 //printf("name:'%s' value:'%s'\n",attrName.c_str(),attrVal.c_str());\r
421 char *namestr = (char *)attrName.c_str();\r
422 if (strncmp(namestr, "xmlns:", 6)==0)\r
423 n->addNamespace(attrName, attrVal);\r
424 else\r
425 n->addAttribute(attrName, attrVal);\r
426 }\r
427 }\r
428 \r
429 bool cdata = false;\r
430 \r
431 p++;\r
432 // ### Get intervening data ### */\r
433 DOMString data;\r
434 while (p<parselen)\r
435 {\r
436 //# COMMENT\r
437 p2 = match(p, "<!--");\r
438 if (!cdata && p2>p)\r
439 {\r
440 p = p2;\r
441 while (p<parselen)\r
442 {\r
443 p2 = match(p, "-->");\r
444 if (p2 > p)\r
445 {\r
446 p = p2;\r
447 break;\r
448 }\r
449 p++;\r
450 }\r
451 }\r
452 \r
453 ch = peek(p);\r
454 //# END TAG\r
455 if (ch=='<' && !cdata && peek(p+1)=='/')\r
456 {\r
457 break;\r
458 }\r
459 //# CDATA\r
460 p2 = match(p, "<![CDATA[");\r
461 if (p2 > p)\r
462 {\r
463 cdata = true;\r
464 p = p2;\r
465 continue;\r
466 }\r
467 \r
468 //# CHILD ELEMENT\r
469 if (ch == '<')\r
470 {\r
471 p2 = parseElement(p, n, depth+1);\r
472 if (p2 == p)\r
473 {\r
474 /*\r
475 printf("problem on element:%s. p2:%d p:%d\n",\r
476 openTagName.c_str(), p2, p);\r
477 */\r
478 return p0;\r
479 }\r
480 p = p2;\r
481 continue;\r
482 }\r
483 //# ENTITY\r
484 if (ch=='&' && !cdata)\r
485 {\r
486 bool found = false;\r
487 for (EntityEntry *ee = entities ; ee->value ; ee++)\r
488 {\r
489 int p2 = match(p, ee->escaped);\r
490 if (p2>p)\r
491 {\r
492 data.push_back(ee->value);\r
493 p = p2;\r
494 found = true;\r
495 break;\r
496 }\r
497 }\r
498 if (!found)\r
499 {\r
500 error("unterminated entity");\r
501 return -1;\r
502 }\r
503 continue;\r
504 }\r
505 \r
506 //# NONE OF THE ABOVE\r
507 data.push_back(ch);\r
508 p++;\r
509 }/*while*/\r
510 \r
511 \r
512 n->value = data;\r
513 //printf("%d : data:%s\n",p,data.c_str());\r
514 \r
515 //## Get close tag\r
516 p = skipwhite(p);\r
517 ch = peek(p);\r
518 if (ch != '<')\r
519 {\r
520 error("no < for end tag\n");\r
521 return p0;\r
522 }\r
523 p++;\r
524 ch = peek(p);\r
525 if (ch != '/')\r
526 {\r
527 error("no / on end tag");\r
528 return p0;\r
529 }\r
530 p++;\r
531 ch = peek(p);\r
532 p = skipwhite(p);\r
533 DOMString closeTagName;\r
534 p = getWord(p, closeTagName);\r
535 if (openTagName != closeTagName)\r
536 {\r
537 error("Mismatched closing tag. Expected </%S>. Got '%S'.",\r
538 openTagName.c_str(), closeTagName.c_str());\r
539 return p0;\r
540 }\r
541 p = skipwhite(p);\r
542 if (peek(p) != '>')\r
543 {\r
544 error("no > on end tag for '%s'", closeTagName.c_str());\r
545 return p0;\r
546 }\r
547 p++;\r
548 // printf("close element:%s\n",closeTagName.c_str());\r
549 p = skipwhite(p);\r
550 return p;\r
551 }\r
552 \r
553 \r
554 \r
555 \r
556 Element *Parser::parse(XMLCh *buf,int pos,int len)\r
557 {\r
558 parselen = len;\r
559 parsebuf = buf;\r
560 Element *rootNode = new Element("root");\r
561 pos = parseVersion(pos);\r
562 pos = parseDoctype(pos);\r
563 pos = parseElement(pos, rootNode, 0);\r
564 return rootNode;\r
565 }\r
566 \r
567 \r
568 Element *Parser::parse(const char *buf, int pos, int len)\r
569 {\r
570 \r
571 XMLCh *charbuf = (XMLCh *)malloc((len+1) * sizeof(XMLCh));\r
572 long i = 0;\r
573 while (i< len)\r
574 {\r
575 charbuf[i] = (XMLCh)buf[i];\r
576 i++;\r
577 }\r
578 charbuf[i] = '\0';\r
579 Element *n = parse(charbuf, 0, len-1);\r
580 free(charbuf);\r
581 return n;\r
582 }\r
583 \r
584 Element *Parser::parse(const DOMString &buf)\r
585 {\r
586 long len = buf.size();\r
587 XMLCh *charbuf = (XMLCh *)malloc((len+1) * sizeof(XMLCh));\r
588 long i = 0;\r
589 while (i< len)\r
590 {\r
591 charbuf[i] = (XMLCh)buf[i];\r
592 i++;\r
593 }\r
594 charbuf[i] = '\0';\r
595 Element *n = parse(charbuf, 0, len-1);\r
596 free(charbuf);\r
597 return n;\r
598 }\r
599 \r
600 Element *Parser::parseFile(const char *fileName)\r
601 {\r
602 \r
603 //##### LOAD INTO A CHAR BUF, THEN CONVERT TO XMLCh\r
604 if (!fileName)\r
605 return NULL;\r
606 \r
607 FILE *f = fopen(fileName, "rb");\r
608 if (!f)\r
609 return NULL;\r
610 \r
611 struct stat statBuf;\r
612 if (fstat(fileno(f),&statBuf)<0)\r
613 {\r
614 fclose(f);\r
615 return NULL;\r
616 }\r
617 long filelen = statBuf.st_size;\r
618 \r
619 //printf("length:%d\n",filelen);\r
620 XMLCh *charbuf = (XMLCh *)malloc((filelen+1) * sizeof(XMLCh));\r
621 for (XMLCh *p=charbuf ; !feof(f) ; p++)\r
622 {\r
623 *p = (XMLCh)fgetc(f);\r
624 }\r
625 fclose(f);\r
626 charbuf[filelen] = '\0';\r
627 \r
628 \r
629 /*\r
630 printf("nrbytes:%d\n",wc_count);\r
631 printf("buf:%ls\n======\n",charbuf);\r
632 */\r
633 Element *n = parse(charbuf, 0, filelen-1);\r
634 free(charbuf);\r
635 return n;\r
636 }\r
637 \r
638 \r
639 \r
640 \r
641 \r
642 \r
643 \r
644 }//namespace MiniDom\r
645 //########################################################################\r
646 //# T E S T\r
647 //########################################################################\r
648 \r
649 bool doTest(char *fileName)\r
650 {\r
651 MiniDom::Parser parser;\r
652 \r
653 MiniDom::Element *elem = parser.parseFile(fileName);\r
654 \r
655 if (!elem)\r
656 {\r
657 printf("Parsing failed\n");\r
658 return false;\r
659 }\r
660 \r
661 elem->print();\r
662 \r
663 delete elem;\r
664 \r
665 return true;\r
666 }\r
667 \r
668 \r
669 \r
670 int main(int argc, char **argv)\r
671 {\r
672 if (argc != 2)\r
673 {\r
674 printf("usage: %s <xmlfile>\n", argv[0]);\r
675 return 1;\r
676 }\r
677 \r
678 if (!doTest(argv[1]))\r
679 return 1;\r
680 \r
681 return 0;\r
682 }\r
683 \r
684 \r
685 \r
686 //########################################################################\r
687 //# E N D O F F I L E\r
688 //########################################################################\r
689 \r
690 \r