diff --git a/src/dom/xmlreader.cpp b/src/dom/xmlreader.cpp
index c36eec961c7dbd05e7a4f9dddc2042cc2d10e796..6e6db723dbe22a39b63369a49f78bc0996f357e3 100644 (file)
--- a/src/dom/xmlreader.cpp
+++ b/src/dom/xmlreader.cpp
-/**\r
- * Phoebe DOM Implementation.\r
- *\r
- * This is a C++ approximation of the W3C DOM model, which follows\r
- * fairly closely the specifications in the various .idl files, copies of\r
- * which are provided for reference. Most important is this one:\r
- *\r
- * http://www.w3.org/TR/2004/REC-DOM-Level-3-Core-20040407/idl-definitions.html\r
- *\r
- * Authors:\r
- * Bob Jamison\r
- *\r
- * Copyright (C) 2005 Bob Jamison\r
- *\r
- * This library is free software; you can redistribute it and/or\r
- * modify it under the terms of the GNU Lesser General Public\r
- * License as published by the Free Software Foundation; either\r
- * version 2.1 of the License, or (at your option) any later version.\r
- *\r
- * This library is distributed in the hope that it will be useful,\r
- * but WITHOUT ANY WARRANTY; without even the implied warranty of\r
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU\r
- * Lesser General Public License for more details.\r
- *\r
- * You should have received a copy of the GNU Lesser General Public\r
- * License along with this library; if not, write to the Free Software\r
- * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA\r
- */\r
-\r
-\r
-\r
-#include "xmlreader.h"\r
-#include "charclass.h"\r
-#include "domimpl.h"\r
-#include "svg/svgimpl.h"\r
-\r
-#include <stdio.h>\r
-#include <stdarg.h>\r
-\r
-namespace org\r
-{\r
-namespace w3c\r
-{\r
-namespace dom\r
-{\r
-\r
-\r
-//#########################################################################\r
-//# E N T I T Y T A B L E\r
-//#########################################################################\r
-struct EntityInfo\r
-{\r
- char *escape;\r
- int escapeLength;\r
- char *value;\r
-};\r
-\r
-\r
-static EntityInfo entityTable[] =\r
-{\r
- { "&" , 5 , "&" },\r
- { "<" , 4 , "<" },\r
- { ">" , 4 , ">" },\r
- { "'" , 6 , "'" },\r
- { """ , 6 , "\"" },\r
- { NULL , 0 , "\0" }\r
-};\r
-\r
-\r
-\r
-//#########################################################################\r
-//# M E S S A G E S\r
-//#########################################################################\r
-\r
-\r
-/**\r
- *\r
- */\r
-void XmlReader::error(char *fmt, ...)\r
-{\r
- va_list args;\r
- fprintf(stderr, "XmlReader:error at line %d, column %d:", lineNr, colNr);\r
- va_start(args, fmt);\r
- vfprintf(stderr, fmt, args);\r
- va_end(args) ;\r
- fprintf(stderr, "\n");\r
-}\r
-\r
-\r
-\r
-//#########################################################################\r
-//# U T I L I T Y\r
-//#########################################################################\r
-\r
-static void trim(DOMString &str)\r
-{\r
- int len = str.size();\r
- if (len<1)\r
- return;\r
-\r
- int start = 0;\r
- int end = 0;\r
- for (start=0 ; start<len ; start++)\r
- {\r
- int ch = str[start];\r
- if (ch<=' ' || ch>126)\r
- break;\r
- }\r
- for (end=len-1 ; end>=0 ; end--)\r
- {\r
- int ch = str[end];\r
- if (ch<=' ' || ch>126)\r
- break;\r
- }\r
- if (start<end)\r
- {\r
- str = str.substr(start, end+1);\r
- }\r
-}\r
-\r
-//#########################################################################\r
-//# P A R S I N G\r
-//#########################################################################\r
-\r
-/**\r
- * Get the character at the position and record the fact\r
- */\r
-int XmlReader::get(int p)\r
-{\r
- if (p >= len)\r
- return -1;\r
- int ch = parsebuf[p];\r
- //printf("%c", ch);\r
- if (ch == '\n' || ch == '\r')\r
- {\r
- colNr = 0;\r
- lineNr++;\r
- }\r
- else\r
- colNr++;\r
- return ch;\r
-}\r
-\r
-/**\r
- * Look at the character at the position, but don't note the fact\r
- */\r
-int XmlReader::peek(int p)\r
-{\r
- if (p >= len)\r
- return -1;\r
- int ch = parsebuf[p];\r
- return ch;\r
-}\r
-\r
-\r
-/**\r
- * Test if the given substring exists at the given position\r
- * in parsebuf. Use peek() in case of out-of-bounds\r
- */\r
-bool XmlReader::match(int pos, char *str)\r
-{\r
- while (*str)\r
- {\r
- if (peek(pos++) != *str++)\r
- return false;\r
- }\r
- return true;\r
-}\r
-\r
-\r
-\r
-/**\r
- * Test if the given substring exists at the given position\r
- * in a given buffer\r
- */\r
-/*\r
-static bool bufMatch(const DOMString &buf, int pos, char *str)\r
-{\r
- while (*str)\r
- {\r
- if (buf[pos++] != *str++)\r
- return false;\r
- }\r
- return true;\r
-}\r
-*/\r
-\r
-\r
-/**\r
- *\r
- */\r
-int XmlReader::skipwhite(int p)\r
-{\r
- while (p < len)\r
- {\r
- int b = get(p);\r
- if (!isWhitespace(b))\r
- break;\r
- p++;\r
- }\r
- return p;\r
-}\r
-\r
-/**\r
- * modify this to allow all chars for an element or attribute name\r
- */\r
-int XmlReader::getWord(int p, DOMString &result)\r
-{\r
- while (p<len)\r
- {\r
- int b = get(p);\r
- if (b<=' ' || b=='/' || b=='>' || b=='=')\r
- break;\r
- result.push_back((XMLCh)b);\r
- p++;\r
- }\r
- return p;\r
-}\r
-\r
-/**\r
- * get a name and prefix, if any\r
- */\r
-int XmlReader::getPrefixedWord(int p, DOMString &prefix,\r
- DOMString &shortWord, DOMString &fullWord)\r
-{\r
- while (p<len)\r
- {\r
- int b = get(p);\r
- if (b<=' ' || b=='/' || b=='>' || b=='=')\r
- break;\r
- else if (b == ':')\r
- {\r
- prefix = shortWord;\r
- shortWord = "";\r
- }\r
- else\r
- shortWord.push_back((XMLCh)b);\r
- p++;\r
- }\r
- if (prefix.size() > 0)\r
- fullWord = prefix + ":" + shortWord;\r
- else\r
- fullWord = shortWord;\r
- return p;\r
-}\r
-\r
-\r
-/**\r
- * Assume that we are starting on a quote. Ends on the char\r
- * after the final '"'\r
- */\r
-int XmlReader::getQuoted(int p0, DOMString &result)\r
-{\r
-\r
- int p = p0;\r
-\r
- if (peek(p)!='"' && peek(p)!='\'')\r
- return p0;\r
-\r
- int b = get(p++); //go to next char\r
-\r
- DOMString buf;\r
-\r
- while (p<len )\r
- {\r
- b = get(p++);\r
- if (b=='"' || b=='\'')\r
- break;\r
- else if (b=='&')\r
- {\r
- p = parseEntity(p, result);\r
- if (p < 0)\r
- return p0;\r
- }\r
- else\r
- {\r
- buf.push_back((XMLCh)b);\r
- }\r
- }\r
-\r
- //printf("quoted text:'%s'\n", buf.c_str());\r
-\r
- result.append(buf);\r
-\r
- return p;\r
-}\r
-\r
-\r
-\r
-/**\r
- * Parse a <!xml> tag. Node may be null. Assumes current char is '<'\r
- * ends on char after '>'\r
- */\r
-int XmlReader::parseVersion(int p0)\r
-{\r
- int p = p0;\r
-\r
- if (!match(p, "<?xml"))\r
- return p0;\r
-\r
- p += 5;\r
- colNr += 5;\r
-\r
- bool quickCloseDummy;\r
- Node *node = new NodeImpl();\r
- int p2 = parseAttributes(p, node, &quickCloseDummy);\r
- if (p2 < p)\r
- {\r
- delete node;\r
- return p0;\r
- }\r
- p = p2;\r
-\r
- //get the attributes that we need\r
- NamedNodeMap attributes = node->getAttributes();\r
- Node *attr = attributes.getNamedItem("version");\r
- if (attr)\r
- document->setXmlVersion(attr->getNodeValue());\r
- attr = attributes.getNamedItem("encoding");\r
- if (attr)\r
- { /*document->setXmlEncoding(attr->getNodeValue());*/ }\r
- attr = attributes.getNamedItem("standalone");\r
- if (attr)\r
- document->setXmlStandalone((attr->getNodeValue() == "yes"));\r
- delete node;\r
-\r
- //#now we should be pointing at '?>'\r
- if (!match(p, "?>"))\r
- {\r
- return p0;\r
- }\r
-\r
- //skip over '?>'\r
- get(p++);\r
- get(p++);\r
-\r
- return p;\r
-}\r
-\r
-\r
-/**\r
- * Parse a <!DOCTYPE> tag. doctype may be null. Expects '<'\r
- * on start. Ends pointing at char after '>'\r
- */\r
-int XmlReader::parseDoctype(int p0)\r
-{\r
- int p = p0;\r
-\r
- if (!match(p, "<!DOCTYPE"))\r
- return p0;\r
-\r
- p += 9;\r
- colNr += 9;\r
-\r
- DocumentType *doctype = document->getDoctype();\r
- if (!doctype)\r
- return p0;\r
-\r
-\r
- //### get the root name of the document\r
- p = skipwhite(p);\r
- DOMString rootName;\r
- int p2 = getWord(p, rootName);\r
- if (p2 <= p)\r
- return p0;\r
- p = p2;\r
- //printf("doctype root '%s'\n", rootName.c_str());\r
-\r
-\r
- while (p < len)\r
- {\r
- p = skipwhite(p);\r
- if (peek(p) == '>')\r
- break;\r
- else if (peek(p) == '[') //just ignore 'internal' [] stuff\r
- {\r
- while (p < len)\r
- {\r
- int ch = get(p++);\r
- if (ch == ']')\r
- break;\r
- }\r
- p++;\r
- }\r
- else if (match(p, "PUBLIC"))\r
- {\r
- p += 6;\r
- colNr += 6;\r
- p = skipwhite(p);\r
- DOMString pubIdLiteral;\r
- int p2 = getQuoted(p, pubIdLiteral);\r
- if (p2 <= p)\r
- return p0;\r
- p = p2;\r
- p = skipwhite(p);\r
- DOMString systemLiteral;\r
- p2 = getQuoted(p, systemLiteral);\r
- if (p2 <= p)\r
- return p0;\r
- p = p2;\r
- //printf("PUBLIC \"%s\" \"%s\" \n",\r
- // pubIdLiteral.c_str(), systemLiteral.c_str());\r
- }\r
- else if (match(p, "SYSTEM"))\r
- {\r
- p += 6;\r
- colNr += 6;\r
- p = skipwhite(p);\r
- DOMString systemLiteral;\r
- int p2 = getQuoted(p, systemLiteral);\r
- if (p2 <= p)\r
- return p0;\r
- p = p2;\r
- //printf("SYSTEM \"%s\" \n", systemLiteral.c_str());\r
- }\r
- }\r
-\r
-\r
- //skip over '>'\r
- get(p++);\r
-\r
- return p;\r
-}\r
-\r
-\r
-\r
-/**\r
- * Expects '<' on startup, ends on char after '>'\r
- */\r
-int XmlReader::parseComment(int p0, Comment *comment)\r
-{\r
- int p = p0;\r
-\r
- if (!match(p, "<!--"))\r
- return p0;\r
-\r
- colNr += 4;\r
- p += 4;\r
-\r
- DOMString buf;\r
-\r
- while (p<len-3)\r
- {\r
- if (match(p, "-->"))\r
- {\r
- p += 3;\r
- colNr += 3;\r
- break;\r
- }\r
- int ch = get(p++);\r
- buf.push_back((XMLCh)ch);\r
- }\r
-\r
- comment->setNodeValue(buf);\r
-\r
- return p;\r
-}\r
-\r
-\r
-\r
-/**\r
- *\r
- */\r
-int XmlReader::parseCDATA(int p0, CDATASection *cdata)\r
-{\r
-\r
- int p = p0;\r
-\r
- if (!match(p, "<![CDATA["))\r
- return p0;\r
-\r
- colNr += 9;\r
- p += 9;\r
-\r
- DOMString buf;\r
-\r
- while (p<len)\r
- {\r
- if (match(p, "]]>"))\r
- {\r
- p +=3;\r
- colNr += 3;\r
- break;\r
- }\r
- int ch = get(p++);\r
- buf.push_back((XMLCh)ch);\r
- }\r
-\r
- /*printf("Got CDATA:%s\n",buf.c_str());*/\r
- cdata->setNodeValue(buf);\r
-\r
- return p;\r
-}\r
-\r
-\r
-\r
-/**\r
- *\r
- */\r
-int XmlReader::parseText(int p0, Text *text)\r
-{\r
-\r
- int p = p0;\r
-\r
- DOMString buf;\r
-\r
- while (p<len)\r
- {\r
- if (peek(p) == '&')\r
- {\r
- p = parseEntity(p, buf);\r
- if (p < 0) //error?\r
- return p0;\r
- }\r
- else if (peek(p) == '<')\r
- {\r
- break;\r
- }\r
- else\r
- {\r
- int ch = get(p++);\r
- buf.push_back((XMLCh)ch);\r
- }\r
- }\r
-\r
- /*printf("Got Text:%s\n",buf.c_str());*/\r
- text->setNodeValue(buf);\r
-\r
- return p;\r
-}\r
-\r
-\r
-\r
-\r
-\r
-/**\r
- * Parses attributes of a node. Should end pointing at either the\r
- * '?' of a version or doctype tag, or a '>' of a normal tag\r
- */\r
-int XmlReader::parseAttributes(int p0, Node *node, bool *quickClose)\r
-{\r
- *quickClose = false;\r
-\r
- int p = p0;\r
-\r
- NamedNodeMap attributes;\r
-\r
- while (p<len)\r
- {\r
- /*printf("ch:%c\n",ch);*/\r
- p = skipwhite(p);\r
- int ch = get(p);\r
-\r
- /*printf("ch:%c\n",ch);*/\r
- if (ch == '?' || ch == '>')//done\r
- break;\r
- else if (ch=='/' && p<len+1)\r
- {\r
- p++;\r
- p = skipwhite(p);\r
- ch = peek(p);\r
- if (ch == '>')\r
- {\r
- p++;\r
- *quickClose = true;\r
- /*printf("quick close\n");*/\r
- return p;\r
- }\r
- }\r
- DOMString shortName;\r
- DOMString prefix;\r
- DOMString qualifiedName;\r
- int p2 = getPrefixedWord(p, prefix, shortName, qualifiedName);\r
- if (p2 <= p)\r
- break;\r
-\r
- /*printf("name:%s",buf);*/\r
- p = p2;\r
- p = skipwhite(p);\r
- ch = get(p);\r
- /*printf("ch:%c\n",ch);*/\r
- if (ch != '=')\r
- break;\r
- p++;\r
- p = skipwhite(p);\r
- /*ch = parsebuf[p];*/\r
- /*printf("ch:%c\n",ch);*/\r
- DOMString attrValue;\r
- p2 = getQuoted(p, attrValue);\r
- p = p2;\r
- /*printf("name:'%s' value:'%s'\n",buf,buf2);*/\r
-\r
- DOMString namespaceURI = "";\r
- if (prefix == "xmlns" || shortName == "xmlns")\r
- namespaceURI = XMLNSNAME;\r
-\r
- //## Now let us make the attribute and give it to the node\r
- Attr *attr = document->createAttributeNS(namespaceURI, qualifiedName);\r
- attr->setValue(attrValue);\r
- node->getAttributes().setNamedItemNS(attr);\r
-\r
- }//while p<len\r
-\r
- return p;\r
-}\r
-\r
-/**\r
- * Appends the value of an entity to the buffer\r
- */\r
-int XmlReader::parseEntity(int p0, DOMString &buf)\r
-{\r
- int p = p0;\r
- for (EntityInfo *info = entityTable ; info->escape ; info++)\r
- {\r
- if (match(p, info->escape))\r
- {\r
- p += info->escapeLength;\r
- colNr += info->escapeLength;\r
- buf += info->value;\r
- return p;\r
- }\r
- }\r
-\r
- error("unterminated entity");\r
- return -1;\r
-}\r
-\r
-\r
-//#########################################################################\r
-//# P A R S E A N O D E\r
-//#########################################################################\r
-\r
-/**\r
- * Parse as a document, preserving the original structure as much as\r
- * possible\r
- */\r
-int XmlReader::parseNode(int p0, Node *node, int depth)\r
-{\r
-\r
- int p = p0;\r
-\r
-\r
- //### OPEN TAG\r
- int ch = get(p++);\r
- if (ch != '<')\r
- return p0;\r
-\r
- p = skipwhite(p);\r
- DOMString openTagName;\r
- DOMString openTagNamePrefix;\r
- DOMString openTagQualifiedName;\r
- int p2 = getPrefixedWord(p,openTagNamePrefix,\r
- openTagName, openTagQualifiedName);\r
- if (p2 <= p)\r
- return p0;\r
- p = p2;\r
- p = skipwhite(p);\r
-\r
- //printf("qualifiedName:%s\n", openTagQualifiedName.c_str());\r
- DOMString namespaceURI = node->lookupNamespaceURI(openTagNamePrefix);\r
- document->renameNode(node, namespaceURI, openTagQualifiedName);\r
-\r
- //### ATTRIBUTES\r
- bool quickClose;\r
- p = parseAttributes(p, node, &quickClose);\r
- if (quickClose) //trivial tag: <name/>\r
- return p;\r
-\r
- p++; //skip over '>'\r
-\r
-\r
- DOMString nodeValue;\r
-\r
- /* ### Get intervening data ### */\r
- while (p<len && keepGoing)\r
- {\r
- //### COMMENT\r
- if (match(p, "<!--"))\r
- {\r
- Comment *comment = document->createComment("");\r
- p2 = parseComment(p, comment);\r
- if (p2 <= p)\r
- return p0;\r
- p = p2;\r
- if (parseAsData)\r
- { //throw away\r
- delete comment;\r
- }\r
- else\r
- {\r
- node->appendChild(comment);\r
- }\r
- }\r
- //### VERSION\r
- else if (match(p, "<?xml"))\r
- {\r
- p2 = parseVersion(p);\r
- if (p2 <= p)\r
- return p0;\r
- }\r
- //### DOCTYPE\r
- else if (match(p, "<!DOCTYPE"))\r
- {\r
- p2 = parseDoctype(p);\r
- if (p2 <= p)\r
- return p0;\r
- }\r
- //### CDATA\r
- else if (match(p, "<![CDATA["))\r
- {\r
- CDATASection *cdata = document->createCDATASection("");\r
- p2 = parseCDATA(p, cdata);\r
- if (p2 <= p)\r
- return p0;\r
- p = p2;\r
- if (parseAsData)\r
- {\r
- nodeValue += cdata->getNodeValue();\r
- delete cdata;\r
- }\r
- else\r
- {\r
- node->appendChild(cdata);\r
- }\r
- }\r
- //### OPEN OR CLOSE TAG\r
- else if (peek(p) == '<')\r
- {\r
- p2 = skipwhite(p+1);\r
- if (peek(p2) =='/')\r
- {\r
- p = p2;\r
- break;\r
- }\r
- else\r
- {\r
- /*Add element to tree*/\r
- Element *elem = document->createElement(""); //fill in name later\r
- node->appendChild(elem);\r
- p2 = parseNode(p, elem, depth+1);\r
- if (p2 <= p)\r
- {\r
- /*printf("problem on element:%ls. p2:%d p:%d\n",n->name, p2, p);*/\r
- return p0;\r
- }\r
- p = p2;\r
- }\r
- }\r
- //### TEXT\r
- else\r
- {\r
- Text *text = document->createTextNode("");\r
- p2 = parseText(p, text);\r
- if (p2 <= p)\r
- return p0;\r
- p = p2;\r
- if (parseAsData)\r
- {\r
- nodeValue += text->getNodeValue();\r
- delete text;\r
- }\r
- else\r
- {\r
- node->appendChild(text);\r
- }\r
- }\r
-\r
- }//while (p<len)\r
-\r
- //printf("%d : nodeValue:'%s'\n", p, nodeValue.c_str());\r
- trim(nodeValue);\r
- node->setNodeValue(nodeValue);\r
-\r
- //### get close tag. we should be pointing at '/'\r
- p = skipwhite(p);\r
- ch = get(p);\r
- if (ch != '/')\r
- {\r
- error("no / on end tag");\r
- return p0;\r
- }\r
- p++;\r
-\r
- //### get word after '/'\r
- p = skipwhite(p);\r
- DOMString closeTagName;\r
- DOMString closeTagNamePrefix;\r
- DOMString closeTagQualifiedName;\r
- p = getPrefixedWord(p, closeTagNamePrefix, closeTagName,\r
- closeTagQualifiedName);\r
- if (openTagQualifiedName != closeTagQualifiedName)\r
- {\r
- error("Mismatched closing tag. Expected </%S>. Got '%S'.",\r
- openTagQualifiedName.c_str(), closeTagQualifiedName.c_str());\r
- return p0;\r
- }\r
- p = skipwhite(p);\r
- if (parsebuf[p] != '>')\r
- {\r
- error("no > on end tag");\r
- return p0;\r
- }\r
- p++;\r
- /*printf("close element:%ls\n",buf);*/\r
- return p;\r
-}\r
-\r
-\r
-/**\r
- *\r
- */\r
-org::w3c::dom::Document *\r
-XmlReader::parse(const DOMString &buf, int bufferOffset, int parseLen)\r
-{\r
- len = parseLen;\r
- parsebuf = buf;\r
-\r
- DOMImplementationSourceImpl source;\r
- DOMImplementation *domImpl = source.getDOMImplementation("");\r
-\r
- keepGoing = true;\r
-\r
- document = domImpl->createDocument("", "", NULL);\r
- //document = new svg::SVGDocumentImpl(domImpl, "", "", NULL);\r
-\r
- int p = bufferOffset;\r
- int p2 = 0;\r
-\r
- while (p<len && keepGoing)\r
- {\r
- p = skipwhite(p);\r
- //### COMMENT\r
- if (match(p, "<!--"))\r
- {\r
- Comment *comment = document->createComment("");\r
- p2 = parseComment(p, comment);\r
- if (p2 <= p)\r
- return document;\r
- p = p2;\r
- if (parseAsData)\r
- { //throw away\r
- delete comment;\r
- }\r
- else\r
- {\r
- document->appendChild(comment);\r
- }\r
- }\r
- //### VERSION\r
- else if (match(p, "<?xml"))\r
- {\r
- p2 = parseVersion(p);\r
- if (p2 <= p)\r
- return document;\r
- p = p2;\r
- }\r
- //### DOCTYPE\r
- else if (match(p, "<!DOCTYPE"))\r
- {\r
- p2 = parseDoctype(p);\r
- if (p2 <= p)\r
- return document;\r
- p = p2;\r
- }\r
- else\r
- {\r
- break;\r
- }\r
- }\r
-\r
- p = skipwhite(p);\r
- p = parseNode(p, document->getDocumentElement(), 0);\r
-\r
- keepGoing = false;\r
-\r
- return document;\r
-}\r
-\r
-\r
-/**\r
- *\r
- */\r
-org::w3c::dom::Document *\r
-XmlReader::parse(const DOMString &str)\r
-{\r
-\r
- Document *doc = parse(str, 0, str.size());\r
- doc->normalizeDocument();\r
-\r
- return doc;\r
-}\r
-\r
-/**\r
- *\r
- */\r
-org::w3c::dom::Document *\r
-XmlReader::parseFile(char *fileName)\r
-{\r
-\r
- DOMString buf = loadFile(fileName);\r
-\r
- Document *doc = parse(buf, 0, buf.size());\r
-\r
- return doc;\r
-}\r
-\r
-\r
-\r
-//#########################################################################\r
-//# S T R E A M R E A D I N G\r
-//#########################################################################\r
-\r
-/**\r
- *\r
- */\r
-org::w3c::dom::DOMString\r
-XmlReader::loadFile(char *fileName)\r
-{\r
-\r
- if (!fileName)\r
- return NULL;\r
- FILE *f = fopen(fileName, "rb");\r
- if (!f)\r
- return NULL;\r
-\r
- DOMString buf;\r
- while (!feof(f))\r
- {\r
- int ch = fgetc(f);\r
- if (ch<0)\r
- break;\r
- buf.push_back((XMLCh)ch);\r
- }\r
- fclose(f);\r
-\r
- return buf;\r
-}\r
-\r
-\r
-//#########################################################################\r
-//# C O N S T R U C T O R / D E S T R U C T O R\r
-//#########################################################################\r
-\r
-\r
-/**\r
- *\r
- */\r
-XmlReader::XmlReader()\r
-{\r
- len = 0;\r
- lineNr = 1;\r
- colNr = 0;\r
- parseAsData = false;\r
- keepGoing = false;\r
-}\r
-\r
-/**\r
- *\r
- */\r
-XmlReader::XmlReader(bool parseAsDataArg)\r
-{\r
- len = 0;\r
- lineNr = 1;\r
- colNr = 0;\r
- parseAsData = parseAsDataArg;\r
- keepGoing = false;\r
-}\r
-\r
-\r
-\r
-/**\r
- *\r
- */\r
-XmlReader::~XmlReader()\r
-{\r
-}\r
-\r
-\r
-} //namespace dom\r
-} //namespace w3c\r
-} //namespace org\r
-\r
-\r
-//#########################################################################\r
-//# E N D O F F I L E\r
-//#########################################################################\r
-\r
+/**
+ * Phoebe DOM Implementation.
+ *
+ * This is a C++ approximation of the W3C DOM model, which follows
+ * fairly closely the specifications in the various .idl files, copies of
+ * which are provided for reference. Most important is this one:
+ *
+ * http://www.w3.org/TR/2004/REC-DOM-Level-3-Core-20040407/idl-definitions.html
+ *
+ * Authors:
+ * Bob Jamison
+ *
+ * Copyright (C) 2005 Bob Jamison
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+
+
+#include "xmlreader.h"
+#include "charclass.h"
+#include "domimpl.h"
+
+#include <stdio.h>
+#include <stdarg.h>
+
+namespace org
+{
+namespace w3c
+{
+namespace dom
+{
+
+
+//#########################################################################
+//# E N T I T Y T A B L E
+//#########################################################################
+struct EntityInfo
+{
+ char *escape;
+ int escapeLength;
+ char *value;
+};
+
+
+static EntityInfo entityTable[] =
+{
+ { "&" , 5 , "&" },
+ { "<" , 4 , "<" },
+ { ">" , 4 , ">" },
+ { "'" , 6 , "'" },
+ { """ , 6 , "\"" },
+ { NULL , 0 , "\0" }
+};
+
+
+
+//#########################################################################
+//# M E S S A G E S
+//#########################################################################
+
+
+/**
+ *
+ */
+void XmlReader::error(char *fmt, ...)
+{
+ va_list args;
+ fprintf(stderr, "XmlReader:error at line %d, column %d:", lineNr, colNr);
+ va_start(args, fmt);
+ vfprintf(stderr, fmt, args);
+ va_end(args) ;
+ fprintf(stderr, "\n");
+}
+
+
+
+//#########################################################################
+//# U T I L I T Y
+//#########################################################################
+
+static void trim(DOMString &str)
+{
+ int len = str.size();
+ if (len<1)
+ return;
+
+ int start = 0;
+ int end = 0;
+ for (start=0 ; start<len ; start++)
+ {
+ int ch = str[start];
+ if (ch<=' ' || ch>126)
+ break;
+ }
+ for (end=len-1 ; end>=0 ; end--)
+ {
+ int ch = str[end];
+ if (ch<=' ' || ch>126)
+ break;
+ }
+ if (start<end)
+ {
+ str = str.substr(start, end+1);
+ }
+}
+
+//#########################################################################
+//# P A R S I N G
+//#########################################################################
+
+/**
+ * Get the character at the position and record the fact
+ */
+int XmlReader::get(int p)
+{
+ if (p >= len)
+ return -1;
+ int ch = parsebuf[p];
+ //printf("%c", ch);
+ if (ch == '\n' || ch == '\r')
+ {
+ colNr = 0;
+ lineNr++;
+ }
+ else
+ colNr++;
+ return ch;
+}
+
+/**
+ * Look at the character at the position, but don't note the fact
+ */
+int XmlReader::peek(int p)
+{
+ if (p >= len)
+ return -1;
+ int ch = parsebuf[p];
+ return ch;
+}
+
+
+/**
+ * Test if the given substring exists at the given position
+ * in parsebuf. Use peek() in case of out-of-bounds
+ */
+bool XmlReader::match(int pos, char *str)
+{
+ while (*str)
+ {
+ if (peek(pos++) != *str++)
+ return false;
+ }
+ return true;
+}
+
+
+
+/**
+ * Test if the given substring exists at the given position
+ * in a given buffer
+ */
+/*
+static bool bufMatch(const DOMString &buf, int pos, char *str)
+{
+ while (*str)
+ {
+ if (buf[pos++] != *str++)
+ return false;
+ }
+ return true;
+}
+*/
+
+
+/**
+ *
+ */
+int XmlReader::skipwhite(int p)
+{
+ while (p < len)
+ {
+ int b = get(p);
+ if (!isWhitespace(b))
+ break;
+ p++;
+ }
+ return p;
+}
+
+/**
+ * modify this to allow all chars for an element or attribute name
+ */
+int XmlReader::getWord(int p, DOMString &result)
+{
+ while (p<len)
+ {
+ int b = get(p);
+ if (b<=' ' || b=='/' || b=='>' || b=='=')
+ break;
+ result.push_back((XMLCh)b);
+ p++;
+ }
+ return p;
+}
+
+/**
+ * get a name and prefix, if any
+ */
+int XmlReader::getPrefixedWord(int p, DOMString &prefix,
+ DOMString &shortWord, DOMString &fullWord)
+{
+ while (p<len)
+ {
+ int b = get(p);
+ if (b<=' ' || b=='/' || b=='>' || b=='=')
+ break;
+ else if (b == ':')
+ {
+ prefix = shortWord;
+ shortWord = "";
+ }
+ else
+ shortWord.push_back((XMLCh)b);
+ p++;
+ }
+ if (prefix.size() > 0)
+ fullWord = prefix + ":" + shortWord;
+ else
+ fullWord = shortWord;
+ return p;
+}
+
+
+/**
+ * Assume that we are starting on a quote. Ends on the char
+ * after the final '"'
+ */
+int XmlReader::getQuoted(int p0, DOMString &result)
+{
+
+ int p = p0;
+
+ if (peek(p)!='"' && peek(p)!='\'')
+ return p0;
+
+ int b = get(p++); //go to next char
+
+ DOMString buf;
+
+ while (p<len )
+ {
+ b = get(p++);
+ if (b=='"' || b=='\'')
+ break;
+ else if (b=='&')
+ {
+ p = parseEntity(p, result);
+ if (p < 0)
+ return p0;
+ }
+ else
+ {
+ buf.push_back((XMLCh)b);
+ }
+ }
+
+ //printf("quoted text:'%s'\n", buf.c_str());
+
+ result.append(buf);
+
+ return p;
+}
+
+
+
+/**
+ * Parse a <!xml> tag. Node may be null. Assumes current char is '<'
+ * ends on char after '>'
+ */
+int XmlReader::parseVersion(int p0)
+{
+ int p = p0;
+
+ if (!match(p, "<?xml"))
+ return p0;
+
+ p += 5;
+ colNr += 5;
+
+ bool quickCloseDummy;
+ NodePtr node = new NodeImpl();
+ int p2 = parseAttributes(p, node, &quickCloseDummy);
+ if (p2 < p)
+ {
+ //smart ptr!!do not delete node;
+ return p0;
+ }
+ p = p2;
+
+ //get the attributes that we need
+ NamedNodeMap attributes = node->getAttributes();
+ NodePtr attr = attributes.getNamedItem("version");
+ if (attr.get())
+ document->setXmlVersion(attr->getNodeValue());
+ attr = attributes.getNamedItem("encoding");
+ if (attr.get())
+ { /*document->setXmlEncoding(attr->getNodeValue());*/ }
+ attr = attributes.getNamedItem("standalone");
+ if (attr.get())
+ document->setXmlStandalone((attr->getNodeValue() == "yes"));
+
+ //#now we should be pointing at '?>'
+ if (!match(p, "?>"))
+ {
+ return p0;
+ }
+
+ //skip over '?>'
+ get(p++);
+ get(p++);
+
+ return p;
+}
+
+
+/**
+ * Parse a <!DOCTYPE> tag. doctype may be null. Expects '<'
+ * on start. Ends pointing at char after '>'
+ */
+int XmlReader::parseDoctype(int p0)
+{
+ int p = p0;
+
+ if (!match(p, "<!DOCTYPE"))
+ return p0;
+
+ p += 9;
+ colNr += 9;
+
+ DocumentTypePtr doctype = document->getDoctype();
+ if (!doctype)
+ return p0;
+
+
+ //### get the root name of the document
+ p = skipwhite(p);
+ DOMString rootName;
+ int p2 = getWord(p, rootName);
+ if (p2 <= p)
+ return p0;
+ p = p2;
+ //printf("doctype root '%s'\n", rootName.c_str());
+
+
+ while (p < len)
+ {
+ p = skipwhite(p);
+ if (peek(p) == '>')
+ break;
+ else if (peek(p) == '[') //just ignore 'internal' [] stuff
+ {
+ while (p < len)
+ {
+ int ch = get(p++);
+ if (ch == ']')
+ break;
+ }
+ p++;
+ }
+ else if (match(p, "PUBLIC"))
+ {
+ p += 6;
+ colNr += 6;
+ p = skipwhite(p);
+ DOMString pubIdLiteral;
+ int p2 = getQuoted(p, pubIdLiteral);
+ if (p2 <= p)
+ return p0;
+ p = p2;
+ p = skipwhite(p);
+ DOMString systemLiteral;
+ p2 = getQuoted(p, systemLiteral);
+ if (p2 <= p)
+ return p0;
+ p = p2;
+ //printf("PUBLIC \"%s\" \"%s\" \n",
+ // pubIdLiteral.c_str(), systemLiteral.c_str());
+ }
+ else if (match(p, "SYSTEM"))
+ {
+ p += 6;
+ colNr += 6;
+ p = skipwhite(p);
+ DOMString systemLiteral;
+ int p2 = getQuoted(p, systemLiteral);
+ if (p2 <= p)
+ return p0;
+ p = p2;
+ //printf("SYSTEM \"%s\" \n", systemLiteral.c_str());
+ }
+ }
+
+
+ //skip over '>'
+ get(p++);
+
+ return p;
+}
+
+
+
+/**
+ * Expects '<' on startup, ends on char after '>'
+ */
+int XmlReader::parseComment(int p0, CommentPtr comment)
+{
+ int p = p0;
+
+ if (!match(p, "<!--"))
+ return p0;
+
+ colNr += 4;
+ p += 4;
+
+ DOMString buf;
+
+ while (p<len-3)
+ {
+ if (match(p, "-->"))
+ {
+ p += 3;
+ colNr += 3;
+ break;
+ }
+ int ch = get(p++);
+ buf.push_back((XMLCh)ch);
+ }
+
+ comment->setNodeValue(buf);
+
+ return p;
+}
+
+
+
+/**
+ *
+ */
+int XmlReader::parseCDATA(int p0, CDATASectionPtr cdata)
+{
+
+ int p = p0;
+
+ if (!match(p, "<![CDATA["))
+ return p0;
+
+ colNr += 9;
+ p += 9;
+
+ DOMString buf;
+
+ while (p<len)
+ {
+ if (match(p, "]]>"))
+ {
+ p +=3;
+ colNr += 3;
+ break;
+ }
+ int ch = get(p++);
+ buf.push_back((XMLCh)ch);
+ }
+
+ /*printf("Got CDATA:%s\n",buf.c_str());*/
+ cdata->setNodeValue(buf);
+
+ return p;
+}
+
+
+
+/**
+ *
+ */
+int XmlReader::parseText(int p0, TextPtr text)
+{
+
+ int p = p0;
+
+ DOMString buf;
+
+ while (p<len)
+ {
+ if (peek(p) == '&')
+ {
+ p = parseEntity(p, buf);
+ if (p < 0) //error?
+ return p0;
+ }
+ else if (peek(p) == '<')
+ {
+ break;
+ }
+ else
+ {
+ int ch = get(p++);
+ buf.push_back((XMLCh)ch);
+ }
+ }
+
+ /*printf("Got Text:%s\n",buf.c_str());*/
+ text->setNodeValue(buf);
+
+ return p;
+}
+
+
+
+
+
+/**
+ * Parses attributes of a node. Should end pointing at either the
+ * '?' of a version or doctype tag, or a '>' of a normal tag
+ */
+int XmlReader::parseAttributes(int p0, NodePtr node, bool *quickClose)
+{
+ *quickClose = false;
+
+ int p = p0;
+
+ NamedNodeMap attributes;
+
+ while (p<len)
+ {
+ /*printf("ch:%c\n",ch);*/
+ p = skipwhite(p);
+ int ch = get(p);
+
+ /*printf("ch:%c\n",ch);*/
+ if (ch == '?' || ch == '>')//done
+ break;
+ else if (ch=='/' && p<len+1)
+ {
+ p++;
+ p = skipwhite(p);
+ ch = peek(p);
+ if (ch == '>')
+ {
+ p++;
+ *quickClose = true;
+ /*printf("quick close\n");*/
+ return p;
+ }
+ }
+ DOMString shortName;
+ DOMString prefix;
+ DOMString qualifiedName;
+ int p2 = getPrefixedWord(p, prefix, shortName, qualifiedName);
+ if (p2 <= p)
+ break;
+
+ /*printf("name:%s",buf);*/
+ p = p2;
+ p = skipwhite(p);
+ ch = get(p);
+ /*printf("ch:%c\n",ch);*/
+ if (ch != '=')
+ break;
+ p++;
+ p = skipwhite(p);
+ /*ch = parsebuf[p];*/
+ /*printf("ch:%c\n",ch);*/
+ DOMString attrValue;
+ p2 = getQuoted(p, attrValue);
+ p = p2;
+ /*printf("name:'%s' value:'%s'\n",buf,buf2);*/
+
+ DOMString namespaceURI = "";
+ if (prefix == "xmlns" || shortName == "xmlns")
+ namespaceURI = XMLNSNAME;
+
+ //## Now let us make the attribute and give it to the node
+ AttrPtr attr = document->createAttributeNS(namespaceURI, qualifiedName);
+ attr->setValue(attrValue);
+ node->getAttributes().setNamedItemNS(attr);
+
+ }//while p<len
+
+ return p;
+}
+
+/**
+ * Appends the value of an entity to the buffer
+ */
+int XmlReader::parseEntity(int p0, DOMString &buf)
+{
+ int p = p0;
+ for (EntityInfo *info = entityTable ; info->escape ; info++)
+ {
+ if (match(p, info->escape))
+ {
+ p += info->escapeLength;
+ colNr += info->escapeLength;
+ buf += info->value;
+ return p;
+ }
+ }
+
+ error("unterminated entity");
+ return -1;
+}
+
+
+//#########################################################################
+//# P A R S E A N O D E
+//#########################################################################
+
+/**
+ * Parse as a document, preserving the original structure as much as
+ * possible
+ */
+int XmlReader::parseNode(int p0, NodePtr node, int depth)
+{
+
+ int p = p0;
+
+
+ //### OPEN TAG
+ int ch = get(p++);
+ if (ch != '<')
+ return p0;
+
+ p = skipwhite(p);
+ DOMString openTagName;
+ DOMString openTagNamePrefix;
+ DOMString openTagQualifiedName;
+ int p2 = getPrefixedWord(p,openTagNamePrefix,
+ openTagName, openTagQualifiedName);
+ if (p2 <= p)
+ return p0;
+ p = p2;
+ p = skipwhite(p);
+
+ //printf("qualifiedName:%s\n", openTagQualifiedName.c_str());
+ DOMString namespaceURI = node->lookupNamespaceURI(openTagNamePrefix);
+ document->renameNode(node, namespaceURI, openTagQualifiedName);
+
+ //### ATTRIBUTES
+ bool quickClose;
+ p = parseAttributes(p, node, &quickClose);
+ if (quickClose) //trivial tag: <name/>
+ return p;
+
+ p++; //skip over '>'
+
+
+ DOMString nodeValue;
+
+ /* ### Get intervening data ### */
+ while (p<len && keepGoing)
+ {
+ //### COMMENT
+ if (match(p, "<!--"))
+ {
+ CommentPtr comment = document->createComment("");
+ p2 = parseComment(p, comment);
+ if (p2 <= p)
+ return p0;
+ p = p2;
+ if (parseAsData)
+ { //throw away
+ //delete comment;
+ }
+ else
+ {
+ node->appendChild(comment);
+ }
+ }
+ //### VERSION
+ else if (match(p, "<?xml"))
+ {
+ p2 = parseVersion(p);
+ if (p2 <= p)
+ return p0;
+ }
+ //### DOCTYPE
+ else if (match(p, "<!DOCTYPE"))
+ {
+ p2 = parseDoctype(p);
+ if (p2 <= p)
+ return p0;
+ }
+ //### CDATA
+ else if (match(p, "<![CDATA["))
+ {
+ CDATASectionPtr cdata = document->createCDATASection("");
+ p2 = parseCDATA(p, cdata);
+ if (p2 <= p)
+ return p0;
+ p = p2;
+ if (parseAsData)
+ {
+ nodeValue += cdata->getNodeValue();
+ //delete cdata;
+ }
+ else
+ {
+ node->appendChild(cdata);
+ }
+ }
+ //### OPEN OR CLOSE TAG
+ else if (peek(p) == '<')
+ {
+ p2 = skipwhite(p+1);
+ if (peek(p2) =='/')
+ {
+ p = p2;
+ break;
+ }
+ else
+ {
+ /*Add element to tree*/
+ ElementPtr elem = document->createElement(""); //fill in name later
+ node->appendChild(elem);
+ p2 = parseNode(p, elem, depth+1);
+ if (p2 <= p)
+ {
+ /*printf("problem on element:%ls. p2:%d p:%d\n",n->name, p2, p);*/
+ return p0;
+ }
+ p = p2;
+ }
+ }
+ //### TEXT
+ else
+ {
+ TextPtr text = document->createTextNode("");
+ p2 = parseText(p, text);
+ if (p2 <= p)
+ return p0;
+ p = p2;
+ if (parseAsData)
+ {
+ nodeValue += text->getNodeValue();
+ //delete text;
+ }
+ else
+ {
+ node->appendChild(text);
+ }
+ }
+
+ }//while (p<len)
+
+ //printf("%d : nodeValue:'%s'\n", p, nodeValue.c_str());
+ trim(nodeValue);
+ node->setNodeValue(nodeValue);
+
+ //### get close tag. we should be pointing at '/'
+ p = skipwhite(p);
+ ch = get(p);
+ if (ch != '/')
+ {
+ error("no / on end tag");
+ return p0;
+ }
+ p++;
+
+ //### get word after '/'
+ p = skipwhite(p);
+ DOMString closeTagName;
+ DOMString closeTagNamePrefix;
+ DOMString closeTagQualifiedName;
+ p = getPrefixedWord(p, closeTagNamePrefix, closeTagName,
+ closeTagQualifiedName);
+ if (openTagQualifiedName != closeTagQualifiedName)
+ {
+ error("Mismatched closing tag. Expected </%s>. Got '%s'.",
+ openTagQualifiedName.c_str(), closeTagQualifiedName.c_str());
+ return p0;
+ }
+ p = skipwhite(p);
+ if (parsebuf[p] != '>')
+ {
+ error("no > on end tag");
+ return p0;
+ }
+ p++;
+ /*printf("close element:%ls\n",buf);*/
+ return p;
+}
+
+
+/**
+ *
+ */
+org::w3c::dom::DocumentPtr
+XmlReader::parse(const DOMString &buf, int bufferOffset, int parseLen)
+{
+ len = parseLen;
+ parsebuf = buf;
+
+ keepGoing = true;
+
+ DOMImplementationSourceImpl source;
+ DOMImplementation *domImpl = source.getDOMImplementation("");
+
+ document = domImpl->createDocument("", "", NULL);
+ //document = new svg::SVGDocumentImpl(domImpl, "", "", NULL);
+
+ int p = bufferOffset;
+ int p2 = 0;
+
+ while (p<len && keepGoing)
+ {
+ p = skipwhite(p);
+ //### COMMENT
+ if (match(p, "<!--"))
+ {
+ CommentPtr comment = document->createComment("");
+ p2 = parseComment(p, comment);
+ if (p2 <= p)
+ return document;
+ p = p2;
+ if (parseAsData)
+ { //throw away
+ //delete comment;
+ }
+ else
+ {
+ document->appendChild(comment);
+ }
+ }
+ //### VERSION
+ else if (match(p, "<?xml"))
+ {
+ p2 = parseVersion(p);
+ if (p2 <= p)
+ return document;
+ p = p2;
+ }
+ //### DOCTYPE
+ else if (match(p, "<!DOCTYPE"))
+ {
+ p2 = parseDoctype(p);
+ if (p2 <= p)
+ return document;
+ p = p2;
+ }
+ else
+ {
+ break;
+ }
+ }
+
+ p = skipwhite(p);
+ p = parseNode(p, document->getDocumentElement(), 0);
+
+ keepGoing = false;
+
+ return document;
+}
+
+
+/**
+ *
+ */
+org::w3c::dom::DocumentPtr
+XmlReader::parse(const DOMString &str)
+{
+
+ DocumentPtr doc = parse(str, 0, str.size());
+ doc->normalizeDocument();
+
+ return doc;
+}
+
+/**
+ *
+ */
+org::w3c::dom::DocumentPtr
+XmlReader::parseFile(char *fileName)
+{
+
+ DOMString buf = loadFile(fileName);
+
+ DocumentPtr doc = parse(buf, 0, buf.size());
+
+ return doc;
+}
+
+
+
+//#########################################################################
+//# S T R E A M R E A D I N G
+//#########################################################################
+
+/**
+ *
+ */
+org::w3c::dom::DOMString
+XmlReader::loadFile(char *fileName)
+{
+
+ if (!fileName)
+ return NULL;
+ FILE *f = fopen(fileName, "rb");
+ if (!f)
+ return NULL;
+
+ DOMString buf;
+ while (!feof(f))
+ {
+ int ch = fgetc(f);
+ if (ch<0)
+ break;
+ buf.push_back((XMLCh)ch);
+ }
+ fclose(f);
+
+ return buf;
+}
+
+
+//#########################################################################
+//# C O N S T R U C T O R / D E S T R U C T O R
+//#########################################################################
+
+
+/**
+ *
+ */
+XmlReader::XmlReader()
+{
+ len = 0;
+ lineNr = 1;
+ colNr = 0;
+ parseAsData = false;
+ keepGoing = false;
+}
+
+/**
+ *
+ */
+XmlReader::XmlReader(bool parseAsDataArg)
+{
+ len = 0;
+ lineNr = 1;
+ colNr = 0;
+ parseAsData = parseAsDataArg;
+ keepGoing = false;
+}
+
+
+
+/**
+ *
+ */
+XmlReader::~XmlReader()
+{
+}
+
+
+} //namespace dom
+} //namespace w3c
+} //namespace org
+
+
+//#########################################################################
+//# E N D O F F I L E
+//#########################################################################
+