4 #include <stdio.h>
5 #include <string.h>
6 #include <stdarg.h>
7 #include <malloc.h>
8 #include <sys/types.h>
9 #include <sys/stat.h>
12 #include "minidom.h"
14 namespace MiniDom
15 {
19 //########################################################################
20 //# E L E M E N T
21 //########################################################################
23 void Element::findElementsRecursive(std::vector<Element *>&res, const DOMString &name)
24 {
25 if (getName() == name)
26 res.push_back(this);
27 for (unsigned int i=0; i<children.size() ; i++)
28 children[i]->findElementsRecursive(res, name);
29 }
31 std::vector<Element *> Element::findElements(const DOMString &name)
32 {
33 std::vector<Element *> res;
34 findElementsRecursive(res, name);
35 return res;
36 }
38 DOMString Element::getAttribute(const DOMString &name)
39 {
40 for (unsigned int i=0 ; i<attributes.size() ; i++)
41 if (attributes[i].getName() ==name)
42 return attributes[i].getValue();
43 return "";
44 }
46 void Element::addChild(Element *child)
47 {
48 children.push_back(child);
49 }
52 void Element::addAttribute(const DOMString &name, const DOMString &value)
53 {
54 Attribute attr(name, value);
55 attributes.push_back(attr);
56 }
58 void Element::addNamespace(const DOMString &prefix, const DOMString &namespaceURI)
59 {
60 Namespace ns(prefix, namespaceURI);
61 namespaces.push_back(ns);
62 }
64 void Element::writeIndentedRecursive(FILE *f, int indent)
65 {
66 int i;
67 if (!f)
68 return;
69 //Opening tag, and attributes
70 for (i=0;i<indent;i++)
71 fputc(' ',f);
72 fprintf(f,"<%s",name.c_str());
73 for (unsigned int i=0 ; i<attributes.size() ; i++)
74 {
75 fprintf(f," %s=\"%s\"",
76 attributes[i].getName().c_str(),
77 attributes[i].getValue().c_str());
78 }
79 for (unsigned int i=0 ; i<namespaces.size() ; i++)
80 {
81 fprintf(f," xmlns:%s=\"%s\"",
82 namespaces[i].getPrefix().c_str(),
83 namespaces[i].getNamespaceURI().c_str());
84 }
85 fprintf(f,">\n");
87 //Between the tags
88 if (value.size() > 0)
89 {
90 for (int i=0;i<indent;i++)
91 fputc(' ', f);
92 fprintf(f," %s\n", value.c_str());
93 }
95 for (unsigned int i=0 ; i<children.size() ; i++)
96 children[i]->writeIndentedRecursive(f, indent+2);
98 //Closing tag
99 for (int i=0; i<indent; i++)
100 fputc(' ',f);
101 fprintf(f,"</%s>\n", name.c_str());
102 }
104 void Element::writeIndented(FILE *f)
105 {
106 writeIndentedRecursive(f, 0);
107 }
109 void Element::print()
110 {
111 writeIndented(stdout);
112 }
115 //########################################################################
116 //# P A R S E R
117 //########################################################################
121 typedef struct
122 {
123 char *escaped;
124 char value;
125 } EntityEntry;
127 static EntityEntry entities[] =
128 {
129 { "&" , '&' },
130 { "<" , '<' },
131 { ">" , '>' },
132 { "'", '\'' },
133 { """, '"' },
134 { NULL , '\0' }
135 };
139 int Parser::countLines(int begin, int end)
140 {
141 int count = 0;
142 for (int i=begin ; i<end ; i++)
143 {
144 XMLCh ch = parsebuf[i];
145 if (ch == '\n' || ch == '\r')
146 count++;
147 }
148 return count;
149 }
152 void Parser::getLineAndColumn(int pos, int *lineNr, int *colNr)
153 {
154 int line = 1;
155 int col = 1;
156 for (int i=0 ; i<pos ; i++)
157 {
158 XMLCh ch = parsebuf[i];
159 if (ch == '\n' || ch == '\r')
160 {
161 col = 0;
162 line ++;
163 }
164 else
165 col++;
166 }
167 *lineNr = line;
168 *colNr = col;
170 }
173 void Parser::error(char *fmt, ...)
174 {
175 int lineNr;
176 int colNr;
177 getLineAndColumn(currentPosition, &lineNr, &colNr);
178 va_list args;
179 fprintf(stderr, "xml error at line %d, column %d:", lineNr, colNr);
180 va_start(args,fmt);
181 vfprintf(stderr,fmt,args);
182 va_end(args) ;
183 fprintf(stderr, "\n");
184 }
188 int Parser::peek(int pos)
189 {
190 if (pos >= parselen)
191 return -1;
192 currentPosition = pos;
193 int ch = parsebuf[pos];
194 //printf("ch:%c\n", ch);
195 return ch;
196 }
200 int Parser::match(int p0, const char *text)
201 {
202 int p = p0;
203 while (*text)
204 {
205 if (peek(p) != *text)
206 return p0;
207 p++; text++;
208 }
209 return p;
210 }
214 int Parser::skipwhite(int p)
215 {
217 while (p<parselen)
218 {
219 int p2 = match(p, "<!--");
220 if (p2 > p)
221 {
222 p = p2;
223 while (p<parselen)
224 {
225 p2 = match(p, "-->");
226 if (p2 > p)
227 {
228 p = p2;
229 break;
230 }
231 p++;
232 }
233 }
234 XMLCh b = peek(p);
235 if (!isspace(b))
236 break;
237 p++;
238 }
239 return p;
240 }
242 /* modify this to allow all chars for an element or attribute name*/
243 int Parser::getWord(int p0, DOMString &buf)
244 {
245 int p = p0;
246 while (p<parselen)
247 {
248 XMLCh b = peek(p);
249 if (b<=' ' || b=='/' || b=='>' || b=='=')
250 break;
251 buf.push_back(b);
252 p++;
253 }
254 return p;
255 }
257 int Parser::getQuoted(int p0, DOMString &buf, int do_i_parse)
258 {
260 int p = p0;
261 if (peek(p) != '"' && peek(p) != '\'')
262 return p0;
263 p++;
265 while ( p<parselen )
266 {
267 XMLCh b = peek(p);
268 if (b=='"' || b=='\'')
269 break;
270 if (b=='&' && do_i_parse)
271 {
272 bool found = false;
273 for (EntityEntry *ee = entities ; ee->value ; ee++)
274 {
275 int p2 = match(p, ee->escaped);
276 if (p2>p)
277 {
278 buf.push_back(ee->value);
279 p = p2;
280 found = true;
281 break;
282 }
283 }
284 if (!found)
285 {
286 error("unterminated entity");
287 return false;
288 }
289 }
290 else
291 {
292 buf.push_back(b);
293 p++;
294 }
295 }
296 return p;
297 }
299 int Parser::parseVersion(int p0)
300 {
301 //printf("### parseVersion: %d\n", p0);
303 int p = p0;
305 p = skipwhite(p0);
307 if (peek(p) != '<')
308 return p0;
310 p++;
311 if (p>=parselen || peek(p)!='?')
312 return p0;
314 p++;
316 DOMString buf;
318 while (p<parselen)
319 {
320 XMLCh ch = peek(p++);
321 if (ch=='?')
322 break;
323 buf.push_back(ch);
324 }
325 if (peek(p) != '>')
326 return p0;
327 p++;
329 //printf("Got version:%s\n",buf.c_str());
330 return p;
331 }
333 int Parser::parseDoctype(int p0)
334 {
335 //printf("### parseDoctype: %d\n", p0);
337 int p = p0;
338 p = skipwhite(p);
340 if (p>=parselen || peek(p)!='<')
341 return p0;
343 p++;
345 if (peek(p)!='!' || peek(p+1)=='-')
346 return p0;
347 p++;
349 DOMString buf;
350 while (p<parselen)
351 {
352 XMLCh ch = peek(p);
353 if (ch=='>')
354 {
355 p++;
356 break;
357 }
358 buf.push_back(ch);
359 p++;
360 }
362 //printf("Got doctype:%s\n",buf.c_str());
363 return p;
364 }
366 int Parser::parseElement(int p0, Element *par,int lineNr)
367 {
369 int p = p0;
371 int p2 = p;
373 p = skipwhite(p);
375 //## Get open tag
376 XMLCh ch = peek(p);
377 if (ch!='<')
378 return p0;
380 p++;
382 DOMString openTagName;
383 p = skipwhite(p);
384 p = getWord(p, openTagName);
385 //printf("####tag :%s\n", openTagName.c_str());
386 p = skipwhite(p);
388 //Add element to tree
389 Element *n = new Element(openTagName);
390 n->parent = par;
391 n->line = lineNr + countLines(p0, p);
392 par->addChild(n);
394 // Get attributes
395 if (peek(p) != '>')
396 {
397 while (p<parselen)
398 {
399 p = skipwhite(p);
400 ch = peek(p);
401 //printf("ch:%c\n",ch);
402 if (ch=='>')
403 break;
404 else if (ch=='/' && p<parselen+1)
405 {
406 p++;
407 p = skipwhite(p);
408 ch = peek(p);
409 if (ch=='>')
410 {
411 p++;
412 //printf("quick close\n");
413 return p;
414 }
415 }
416 DOMString attrName;
417 p2 = getWord(p, attrName);
418 if (p2==p)
419 break;
420 //printf("name:%s",buf);
421 p=p2;
422 p = skipwhite(p);
423 ch = peek(p);
424 //printf("ch:%c\n",ch);
425 if (ch!='=')
426 break;
427 p++;
428 p = skipwhite(p);
429 // ch = parsebuf[p];
430 // printf("ch:%c\n",ch);
431 DOMString attrVal;
432 p2 = getQuoted(p, attrVal, true);
433 p=p2+1;
434 //printf("name:'%s' value:'%s'\n",attrName.c_str(),attrVal.c_str());
435 char *namestr = (char *)attrName.c_str();
436 if (strncmp(namestr, "xmlns:", 6)==0)
437 n->addNamespace(attrName, attrVal);
438 else
439 n->addAttribute(attrName, attrVal);
440 }
441 }
443 bool cdata = false;
445 p++;
446 // ### Get intervening data ### */
447 DOMString data;
448 while (p<parselen)
449 {
450 //# COMMENT
451 p2 = match(p, "<!--");
452 if (!cdata && p2>p)
453 {
454 p = p2;
455 while (p<parselen)
456 {
457 p2 = match(p, "-->");
458 if (p2 > p)
459 {
460 p = p2;
461 break;
462 }
463 p++;
464 }
465 }
467 ch = peek(p);
468 //# END TAG
469 if (ch=='<' && !cdata && peek(p+1)=='/')
470 {
471 break;
472 }
473 //# CDATA
474 p2 = match(p, "<![CDATA[");
475 if (p2 > p)
476 {
477 cdata = true;
478 p = p2;
479 continue;
480 }
482 //# CHILD ELEMENT
483 if (ch == '<')
484 {
485 p2 = parseElement(p, n, lineNr + countLines(p0, p));
486 if (p2 == p)
487 {
488 /*
489 printf("problem on element:%s. p2:%d p:%d\n",
490 openTagName.c_str(), p2, p);
491 */
492 return p0;
493 }
494 p = p2;
495 continue;
496 }
497 //# ENTITY
498 if (ch=='&' && !cdata)
499 {
500 bool found = false;
501 for (EntityEntry *ee = entities ; ee->value ; ee++)
502 {
503 int p2 = match(p, ee->escaped);
504 if (p2>p)
505 {
506 data.push_back(ee->value);
507 p = p2;
508 found = true;
509 break;
510 }
511 }
512 if (!found)
513 {
514 error("unterminated entity");
515 return -1;
516 }
517 continue;
518 }
520 //# NONE OF THE ABOVE
521 data.push_back(ch);
522 p++;
523 }/*while*/
526 n->value = data;
527 //printf("%d : data:%s\n",p,data.c_str());
529 //## Get close tag
530 p = skipwhite(p);
531 ch = peek(p);
532 if (ch != '<')
533 {
534 error("no < for end tag\n");
535 return p0;
536 }
537 p++;
538 ch = peek(p);
539 if (ch != '/')
540 {
541 error("no / on end tag");
542 return p0;
543 }
544 p++;
545 ch = peek(p);
546 p = skipwhite(p);
547 DOMString closeTagName;
548 p = getWord(p, closeTagName);
549 if (openTagName != closeTagName)
550 {
551 error("Mismatched closing tag. Expected </%s>. Got '%s'.",
552 openTagName.c_str(), closeTagName.c_str());
553 return p0;
554 }
555 p = skipwhite(p);
556 if (peek(p) != '>')
557 {
558 error("no > on end tag for '%s'", closeTagName.c_str());
559 return p0;
560 }
561 p++;
562 // printf("close element:%s\n",closeTagName.c_str());
563 p = skipwhite(p);
564 return p;
565 }
570 Element *Parser::parse(XMLCh *buf,int pos,int len)
571 {
572 parselen = len;
573 parsebuf = buf;
574 Element *rootNode = new Element("root");
575 pos = parseVersion(pos);
576 pos = parseDoctype(pos);
577 pos = parseElement(pos, rootNode, 1);
578 return rootNode;
579 }
582 Element *Parser::parse(const char *buf, int pos, int len)
583 {
585 XMLCh *charbuf = (XMLCh *)malloc((len+1) * sizeof(XMLCh));
586 long i = 0;
587 while (i< len)
588 {
589 charbuf[i] = (XMLCh)buf[i];
590 i++;
591 }
592 charbuf[i] = '\0';
593 Element *n = parse(charbuf, 0, len-1);
594 free(charbuf);
595 return n;
596 }
598 Element *Parser::parse(const DOMString &buf)
599 {
600 long len = buf.size();
601 XMLCh *charbuf = (XMLCh *)malloc((len+1) * sizeof(XMLCh));
602 long i = 0;
603 while (i< len)
604 {
605 charbuf[i] = (XMLCh)buf[i];
606 i++;
607 }
608 charbuf[i] = '\0';
609 Element *n = parse(charbuf, 0, len-1);
610 free(charbuf);
611 return n;
612 }
614 Element *Parser::parseFile(const char *fileName)
615 {
617 //##### LOAD INTO A CHAR BUF, THEN CONVERT TO XMLCh
618 if (!fileName)
619 return NULL;
621 FILE *f = fopen(fileName, "rb");
622 if (!f)
623 return NULL;
625 struct stat statBuf;
626 if (fstat(fileno(f),&statBuf)<0)
627 {
628 fclose(f);
629 return NULL;
630 }
631 long filelen = statBuf.st_size;
633 //printf("length:%d\n",filelen);
634 XMLCh *charbuf = (XMLCh *)malloc((filelen+1) * sizeof(XMLCh));
635 for (XMLCh *p=charbuf ; !feof(f) ; p++)
636 {
637 *p = (XMLCh)fgetc(f);
638 }
639 fclose(f);
640 charbuf[filelen] = '\0';
643 /*
644 printf("nrbytes:%d\n",wc_count);
645 printf("buf:%ls\n======\n",charbuf);
646 */
647 Element *n = parse(charbuf, 0, filelen-1);
648 free(charbuf);
649 return n;
650 }
658 }//namespace MiniDom
659 //########################################################################
660 //# T E S T
661 //########################################################################
663 bool doTest(char *fileName)
664 {
665 MiniDom::Parser parser;
667 MiniDom::Element *elem = parser.parseFile(fileName);
669 if (!elem)
670 {
671 printf("Parsing failed\n");
672 return false;
673 }
675 elem->print();
677 delete elem;
679 return true;
680 }
684 int main(int argc, char **argv)
685 {
686 if (argc != 2)
687 {
688 printf("usage: %s <xmlfile>\n", argv[0]);
689 return 1;
690 }
692 if (!doTest(argv[1]))
693 return 1;
695 return 0;
696 }
700 //########################################################################
701 //# E N D O F F I L E
702 //########################################################################