X-Git-Url: https://git.tokkee.org/?a=blobdiff_plain;f=src%2Fdom%2Fxmlreader.cpp;h=6e6db723dbe22a39b63369a49f78bc0996f357e3;hb=a8e0310afe0d9b31d25a143238dbbc0bc4a91860;hp=c36eec961c7dbd05e7a4f9dddc2042cc2d10e796;hpb=b4faf05b01509f231c7ee23cf675ee964896042a;p=inkscape.git diff --git a/src/dom/xmlreader.cpp b/src/dom/xmlreader.cpp index c36eec961..6e6db723d 100644 --- a/src/dom/xmlreader.cpp +++ b/src/dom/xmlreader.cpp @@ -1,987 +1,985 @@ -/** - * Phoebe DOM Implementation. - * - * This is a C++ approximation of the W3C DOM model, which follows - * fairly closely the specifications in the various .idl files, copies of - * which are provided for reference. Most important is this one: - * - * http://www.w3.org/TR/2004/REC-DOM-Level-3-Core-20040407/idl-definitions.html - * - * Authors: - * Bob Jamison - * - * Copyright (C) 2005 Bob Jamison - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ - - - -#include "xmlreader.h" -#include "charclass.h" -#include "domimpl.h" -#include "svg/svgimpl.h" - -#include -#include - -namespace org -{ -namespace w3c -{ -namespace dom -{ - - -//######################################################################### -//# E N T I T Y T A B L E -//######################################################################### -struct EntityInfo -{ - char *escape; - int escapeLength; - char *value; -}; - - -static EntityInfo entityTable[] = -{ - { "&" , 5 , "&" }, - { "<" , 4 , "<" }, - { ">" , 4 , ">" }, - { "'" , 6 , "'" }, - { """ , 6 , "\"" }, - { NULL , 0 , "\0" } -}; - - - -//######################################################################### -//# M E S S A G E S -//######################################################################### - - -/** - * - */ -void XmlReader::error(char *fmt, ...) -{ - va_list args; - fprintf(stderr, "XmlReader:error at line %d, column %d:", lineNr, colNr); - va_start(args, fmt); - vfprintf(stderr, fmt, args); - va_end(args) ; - fprintf(stderr, "\n"); -} - - - -//######################################################################### -//# U T I L I T Y -//######################################################################### - -static void trim(DOMString &str) -{ - int len = str.size(); - if (len<1) - return; - - int start = 0; - int end = 0; - for (start=0 ; start126) - break; - } - for (end=len-1 ; end>=0 ; end--) - { - int ch = str[end]; - if (ch<=' ' || ch>126) - break; - } - if (start= len) - return -1; - int ch = parsebuf[p]; - //printf("%c", ch); - if (ch == '\n' || ch == '\r') - { - colNr = 0; - lineNr++; - } - else - colNr++; - return ch; -} - -/** - * Look at the character at the position, but don't note the fact - */ -int XmlReader::peek(int p) -{ - if (p >= len) - return -1; - int ch = parsebuf[p]; - return ch; -} - - -/** - * Test if the given substring exists at the given position - * in parsebuf. Use peek() in case of out-of-bounds - */ -bool XmlReader::match(int pos, char *str) -{ - while (*str) - { - if (peek(pos++) != *str++) - return false; - } - return true; -} - - - -/** - * Test if the given substring exists at the given position - * in a given buffer - */ -/* -static bool bufMatch(const DOMString &buf, int pos, char *str) -{ - while (*str) - { - if (buf[pos++] != *str++) - return false; - } - return true; -} -*/ - - -/** - * - */ -int XmlReader::skipwhite(int p) -{ - while (p < len) - { - int b = get(p); - if (!isWhitespace(b)) - break; - p++; - } - return p; -} - -/** - * modify this to allow all chars for an element or attribute name - */ -int XmlReader::getWord(int p, DOMString &result) -{ - while (p' || b=='=') - break; - result.push_back((XMLCh)b); - p++; - } - return p; -} - -/** - * get a name and prefix, if any - */ -int XmlReader::getPrefixedWord(int p, DOMString &prefix, - DOMString &shortWord, DOMString &fullWord) -{ - while (p' || b=='=') - break; - else if (b == ':') - { - prefix = shortWord; - shortWord = ""; - } - else - shortWord.push_back((XMLCh)b); - p++; - } - if (prefix.size() > 0) - fullWord = prefix + ":" + shortWord; - else - fullWord = shortWord; - return p; -} - - -/** - * Assume that we are starting on a quote. Ends on the char - * after the final '"' - */ -int XmlReader::getQuoted(int p0, DOMString &result) -{ - - int p = p0; - - if (peek(p)!='"' && peek(p)!='\'') - return p0; - - int b = get(p++); //go to next char - - DOMString buf; - - while (p tag. Node may be null. Assumes current char is '<' - * ends on char after '>' - */ -int XmlReader::parseVersion(int p0) -{ - int p = p0; - - if (!match(p, "getAttributes(); - Node *attr = attributes.getNamedItem("version"); - if (attr) - document->setXmlVersion(attr->getNodeValue()); - attr = attributes.getNamedItem("encoding"); - if (attr) - { /*document->setXmlEncoding(attr->getNodeValue());*/ } - attr = attributes.getNamedItem("standalone"); - if (attr) - document->setXmlStandalone((attr->getNodeValue() == "yes")); - delete node; - - //#now we should be pointing at '?>' - if (!match(p, "?>")) - { - return p0; - } - - //skip over '?>' - get(p++); - get(p++); - - return p; -} - - -/** - * Parse a tag. doctype may be null. Expects '<' - * on start. Ends pointing at char after '>' - */ -int XmlReader::parseDoctype(int p0) -{ - int p = p0; - - if (!match(p, "getDoctype(); - if (!doctype) - return p0; - - - //### get the root name of the document - p = skipwhite(p); - DOMString rootName; - int p2 = getWord(p, rootName); - if (p2 <= p) - return p0; - p = p2; - //printf("doctype root '%s'\n", rootName.c_str()); - - - while (p < len) - { - p = skipwhite(p); - if (peek(p) == '>') - break; - else if (peek(p) == '[') //just ignore 'internal' [] stuff - { - while (p < len) - { - int ch = get(p++); - if (ch == ']') - break; - } - p++; - } - else if (match(p, "PUBLIC")) - { - p += 6; - colNr += 6; - p = skipwhite(p); - DOMString pubIdLiteral; - int p2 = getQuoted(p, pubIdLiteral); - if (p2 <= p) - return p0; - p = p2; - p = skipwhite(p); - DOMString systemLiteral; - p2 = getQuoted(p, systemLiteral); - if (p2 <= p) - return p0; - p = p2; - //printf("PUBLIC \"%s\" \"%s\" \n", - // pubIdLiteral.c_str(), systemLiteral.c_str()); - } - else if (match(p, "SYSTEM")) - { - p += 6; - colNr += 6; - p = skipwhite(p); - DOMString systemLiteral; - int p2 = getQuoted(p, systemLiteral); - if (p2 <= p) - return p0; - p = p2; - //printf("SYSTEM \"%s\" \n", systemLiteral.c_str()); - } - } - - - //skip over '>' - get(p++); - - return p; -} - - - -/** - * Expects '<' on startup, ends on char after '>' - */ -int XmlReader::parseComment(int p0, Comment *comment) -{ - int p = p0; - - if (!match(p, "")) - { - p += 3; - colNr += 3; - break; - } - int ch = get(p++); - buf.push_back((XMLCh)ch); - } - - comment->setNodeValue(buf); - - return p; -} - - - -/** - * - */ -int XmlReader::parseCDATA(int p0, CDATASection *cdata) -{ - - int p = p0; - - if (!match(p, "")) - { - p +=3; - colNr += 3; - break; - } - int ch = get(p++); - buf.push_back((XMLCh)ch); - } - - /*printf("Got CDATA:%s\n",buf.c_str());*/ - cdata->setNodeValue(buf); - - return p; -} - - - -/** - * - */ -int XmlReader::parseText(int p0, Text *text) -{ - - int p = p0; - - DOMString buf; - - while (psetNodeValue(buf); - - return p; -} - - - - - -/** - * Parses attributes of a node. Should end pointing at either the - * '?' of a version or doctype tag, or a '>' of a normal tag - */ -int XmlReader::parseAttributes(int p0, Node *node, bool *quickClose) -{ - *quickClose = false; - - int p = p0; - - NamedNodeMap attributes; - - while (p')//done - break; - else if (ch=='/' && p') - { - p++; - *quickClose = true; - /*printf("quick close\n");*/ - return p; - } - } - DOMString shortName; - DOMString prefix; - DOMString qualifiedName; - int p2 = getPrefixedWord(p, prefix, shortName, qualifiedName); - if (p2 <= p) - break; - - /*printf("name:%s",buf);*/ - p = p2; - p = skipwhite(p); - ch = get(p); - /*printf("ch:%c\n",ch);*/ - if (ch != '=') - break; - p++; - p = skipwhite(p); - /*ch = parsebuf[p];*/ - /*printf("ch:%c\n",ch);*/ - DOMString attrValue; - p2 = getQuoted(p, attrValue); - p = p2; - /*printf("name:'%s' value:'%s'\n",buf,buf2);*/ - - DOMString namespaceURI = ""; - if (prefix == "xmlns" || shortName == "xmlns") - namespaceURI = XMLNSNAME; - - //## Now let us make the attribute and give it to the node - Attr *attr = document->createAttributeNS(namespaceURI, qualifiedName); - attr->setValue(attrValue); - node->getAttributes().setNamedItemNS(attr); - - }//while pescape ; info++) - { - if (match(p, info->escape)) - { - p += info->escapeLength; - colNr += info->escapeLength; - buf += info->value; - return p; - } - } - - error("unterminated entity"); - return -1; -} - - -//######################################################################### -//# P A R S E A N O D E -//######################################################################### - -/** - * Parse as a document, preserving the original structure as much as - * possible - */ -int XmlReader::parseNode(int p0, Node *node, int depth) -{ - - int p = p0; - - - //### OPEN TAG - int ch = get(p++); - if (ch != '<') - return p0; - - p = skipwhite(p); - DOMString openTagName; - DOMString openTagNamePrefix; - DOMString openTagQualifiedName; - int p2 = getPrefixedWord(p,openTagNamePrefix, - openTagName, openTagQualifiedName); - if (p2 <= p) - return p0; - p = p2; - p = skipwhite(p); - - //printf("qualifiedName:%s\n", openTagQualifiedName.c_str()); - DOMString namespaceURI = node->lookupNamespaceURI(openTagNamePrefix); - document->renameNode(node, namespaceURI, openTagQualifiedName); - - //### ATTRIBUTES - bool quickClose; - p = parseAttributes(p, node, &quickClose); - if (quickClose) //trivial tag: - return p; - - p++; //skip over '>' - - - DOMString nodeValue; - - /* ### Get intervening data ### */ - while (pcreateComment(""); - p2 = parseComment(p, comment); - if (p2 <= p) - return p0; - p = p2; - if (parseAsData) - { //throw away - delete comment; - } - else - { - node->appendChild(comment); - } - } - //### VERSION - else if (match(p, "createCDATASection(""); - p2 = parseCDATA(p, cdata); - if (p2 <= p) - return p0; - p = p2; - if (parseAsData) - { - nodeValue += cdata->getNodeValue(); - delete cdata; - } - else - { - node->appendChild(cdata); - } - } - //### OPEN OR CLOSE TAG - else if (peek(p) == '<') - { - p2 = skipwhite(p+1); - if (peek(p2) =='/') - { - p = p2; - break; - } - else - { - /*Add element to tree*/ - Element *elem = document->createElement(""); //fill in name later - node->appendChild(elem); - p2 = parseNode(p, elem, depth+1); - if (p2 <= p) - { - /*printf("problem on element:%ls. p2:%d p:%d\n",n->name, p2, p);*/ - return p0; - } - p = p2; - } - } - //### TEXT - else - { - Text *text = document->createTextNode(""); - p2 = parseText(p, text); - if (p2 <= p) - return p0; - p = p2; - if (parseAsData) - { - nodeValue += text->getNodeValue(); - delete text; - } - else - { - node->appendChild(text); - } - } - - }//while (psetNodeValue(nodeValue); - - //### get close tag. we should be pointing at '/' - p = skipwhite(p); - ch = get(p); - if (ch != '/') - { - error("no / on end tag"); - return p0; - } - p++; - - //### get word after '/' - p = skipwhite(p); - DOMString closeTagName; - DOMString closeTagNamePrefix; - DOMString closeTagQualifiedName; - p = getPrefixedWord(p, closeTagNamePrefix, closeTagName, - closeTagQualifiedName); - if (openTagQualifiedName != closeTagQualifiedName) - { - error("Mismatched closing tag. Expected . Got '%S'.", - openTagQualifiedName.c_str(), closeTagQualifiedName.c_str()); - return p0; - } - p = skipwhite(p); - if (parsebuf[p] != '>') - { - error("no > on end tag"); - return p0; - } - p++; - /*printf("close element:%ls\n",buf);*/ - return p; -} - - -/** - * - */ -org::w3c::dom::Document * -XmlReader::parse(const DOMString &buf, int bufferOffset, int parseLen) -{ - len = parseLen; - parsebuf = buf; - - DOMImplementationSourceImpl source; - DOMImplementation *domImpl = source.getDOMImplementation(""); - - keepGoing = true; - - document = domImpl->createDocument("", "", NULL); - //document = new svg::SVGDocumentImpl(domImpl, "", "", NULL); - - int p = bufferOffset; - int p2 = 0; - - while (pcreateComment(""); - p2 = parseComment(p, comment); - if (p2 <= p) - return document; - p = p2; - if (parseAsData) - { //throw away - delete comment; - } - else - { - document->appendChild(comment); - } - } - //### VERSION - else if (match(p, "getDocumentElement(), 0); - - keepGoing = false; - - return document; -} - - -/** - * - */ -org::w3c::dom::Document * -XmlReader::parse(const DOMString &str) -{ - - Document *doc = parse(str, 0, str.size()); - doc->normalizeDocument(); - - return doc; -} - -/** - * - */ -org::w3c::dom::Document * -XmlReader::parseFile(char *fileName) -{ - - DOMString buf = loadFile(fileName); - - Document *doc = parse(buf, 0, buf.size()); - - return doc; -} - - - -//######################################################################### -//# S T R E A M R E A D I N G -//######################################################################### - -/** - * - */ -org::w3c::dom::DOMString -XmlReader::loadFile(char *fileName) -{ - - if (!fileName) - return NULL; - FILE *f = fopen(fileName, "rb"); - if (!f) - return NULL; - - DOMString buf; - while (!feof(f)) - { - int ch = fgetc(f); - if (ch<0) - break; - buf.push_back((XMLCh)ch); - } - fclose(f); - - return buf; -} - - -//######################################################################### -//# C O N S T R U C T O R / D E S T R U C T O R -//######################################################################### - - -/** - * - */ -XmlReader::XmlReader() -{ - len = 0; - lineNr = 1; - colNr = 0; - parseAsData = false; - keepGoing = false; -} - -/** - * - */ -XmlReader::XmlReader(bool parseAsDataArg) -{ - len = 0; - lineNr = 1; - colNr = 0; - parseAsData = parseAsDataArg; - keepGoing = false; -} - - - -/** - * - */ -XmlReader::~XmlReader() -{ -} - - -} //namespace dom -} //namespace w3c -} //namespace org - - -//######################################################################### -//# E N D O F F I L E -//######################################################################### - +/** + * Phoebe DOM Implementation. + * + * This is a C++ approximation of the W3C DOM model, which follows + * fairly closely the specifications in the various .idl files, copies of + * which are provided for reference. Most important is this one: + * + * http://www.w3.org/TR/2004/REC-DOM-Level-3-Core-20040407/idl-definitions.html + * + * Authors: + * Bob Jamison + * + * Copyright (C) 2005 Bob Jamison + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + + + +#include "xmlreader.h" +#include "charclass.h" +#include "domimpl.h" + +#include +#include + +namespace org +{ +namespace w3c +{ +namespace dom +{ + + +//######################################################################### +//# E N T I T Y T A B L E +//######################################################################### +struct EntityInfo +{ + char *escape; + int escapeLength; + char *value; +}; + + +static EntityInfo entityTable[] = +{ + { "&" , 5 , "&" }, + { "<" , 4 , "<" }, + { ">" , 4 , ">" }, + { "'" , 6 , "'" }, + { """ , 6 , "\"" }, + { NULL , 0 , "\0" } +}; + + + +//######################################################################### +//# M E S S A G E S +//######################################################################### + + +/** + * + */ +void XmlReader::error(char *fmt, ...) +{ + va_list args; + fprintf(stderr, "XmlReader:error at line %d, column %d:", lineNr, colNr); + va_start(args, fmt); + vfprintf(stderr, fmt, args); + va_end(args) ; + fprintf(stderr, "\n"); +} + + + +//######################################################################### +//# U T I L I T Y +//######################################################################### + +static void trim(DOMString &str) +{ + int len = str.size(); + if (len<1) + return; + + int start = 0; + int end = 0; + for (start=0 ; start126) + break; + } + for (end=len-1 ; end>=0 ; end--) + { + int ch = str[end]; + if (ch<=' ' || ch>126) + break; + } + if (start= len) + return -1; + int ch = parsebuf[p]; + //printf("%c", ch); + if (ch == '\n' || ch == '\r') + { + colNr = 0; + lineNr++; + } + else + colNr++; + return ch; +} + +/** + * Look at the character at the position, but don't note the fact + */ +int XmlReader::peek(int p) +{ + if (p >= len) + return -1; + int ch = parsebuf[p]; + return ch; +} + + +/** + * Test if the given substring exists at the given position + * in parsebuf. Use peek() in case of out-of-bounds + */ +bool XmlReader::match(int pos, char *str) +{ + while (*str) + { + if (peek(pos++) != *str++) + return false; + } + return true; +} + + + +/** + * Test if the given substring exists at the given position + * in a given buffer + */ +/* +static bool bufMatch(const DOMString &buf, int pos, char *str) +{ + while (*str) + { + if (buf[pos++] != *str++) + return false; + } + return true; +} +*/ + + +/** + * + */ +int XmlReader::skipwhite(int p) +{ + while (p < len) + { + int b = get(p); + if (!isWhitespace(b)) + break; + p++; + } + return p; +} + +/** + * modify this to allow all chars for an element or attribute name + */ +int XmlReader::getWord(int p, DOMString &result) +{ + while (p' || b=='=') + break; + result.push_back((XMLCh)b); + p++; + } + return p; +} + +/** + * get a name and prefix, if any + */ +int XmlReader::getPrefixedWord(int p, DOMString &prefix, + DOMString &shortWord, DOMString &fullWord) +{ + while (p' || b=='=') + break; + else if (b == ':') + { + prefix = shortWord; + shortWord = ""; + } + else + shortWord.push_back((XMLCh)b); + p++; + } + if (prefix.size() > 0) + fullWord = prefix + ":" + shortWord; + else + fullWord = shortWord; + return p; +} + + +/** + * Assume that we are starting on a quote. Ends on the char + * after the final '"' + */ +int XmlReader::getQuoted(int p0, DOMString &result) +{ + + int p = p0; + + if (peek(p)!='"' && peek(p)!='\'') + return p0; + + int b = get(p++); //go to next char + + DOMString buf; + + while (p tag. Node may be null. Assumes current char is '<' + * ends on char after '>' + */ +int XmlReader::parseVersion(int p0) +{ + int p = p0; + + if (!match(p, "getAttributes(); + NodePtr attr = attributes.getNamedItem("version"); + if (attr.get()) + document->setXmlVersion(attr->getNodeValue()); + attr = attributes.getNamedItem("encoding"); + if (attr.get()) + { /*document->setXmlEncoding(attr->getNodeValue());*/ } + attr = attributes.getNamedItem("standalone"); + if (attr.get()) + document->setXmlStandalone((attr->getNodeValue() == "yes")); + + //#now we should be pointing at '?>' + if (!match(p, "?>")) + { + return p0; + } + + //skip over '?>' + get(p++); + get(p++); + + return p; +} + + +/** + * Parse a tag. doctype may be null. Expects '<' + * on start. Ends pointing at char after '>' + */ +int XmlReader::parseDoctype(int p0) +{ + int p = p0; + + if (!match(p, "getDoctype(); + if (!doctype) + return p0; + + + //### get the root name of the document + p = skipwhite(p); + DOMString rootName; + int p2 = getWord(p, rootName); + if (p2 <= p) + return p0; + p = p2; + //printf("doctype root '%s'\n", rootName.c_str()); + + + while (p < len) + { + p = skipwhite(p); + if (peek(p) == '>') + break; + else if (peek(p) == '[') //just ignore 'internal' [] stuff + { + while (p < len) + { + int ch = get(p++); + if (ch == ']') + break; + } + p++; + } + else if (match(p, "PUBLIC")) + { + p += 6; + colNr += 6; + p = skipwhite(p); + DOMString pubIdLiteral; + int p2 = getQuoted(p, pubIdLiteral); + if (p2 <= p) + return p0; + p = p2; + p = skipwhite(p); + DOMString systemLiteral; + p2 = getQuoted(p, systemLiteral); + if (p2 <= p) + return p0; + p = p2; + //printf("PUBLIC \"%s\" \"%s\" \n", + // pubIdLiteral.c_str(), systemLiteral.c_str()); + } + else if (match(p, "SYSTEM")) + { + p += 6; + colNr += 6; + p = skipwhite(p); + DOMString systemLiteral; + int p2 = getQuoted(p, systemLiteral); + if (p2 <= p) + return p0; + p = p2; + //printf("SYSTEM \"%s\" \n", systemLiteral.c_str()); + } + } + + + //skip over '>' + get(p++); + + return p; +} + + + +/** + * Expects '<' on startup, ends on char after '>' + */ +int XmlReader::parseComment(int p0, CommentPtr comment) +{ + int p = p0; + + if (!match(p, "")) + { + p += 3; + colNr += 3; + break; + } + int ch = get(p++); + buf.push_back((XMLCh)ch); + } + + comment->setNodeValue(buf); + + return p; +} + + + +/** + * + */ +int XmlReader::parseCDATA(int p0, CDATASectionPtr cdata) +{ + + int p = p0; + + if (!match(p, "")) + { + p +=3; + colNr += 3; + break; + } + int ch = get(p++); + buf.push_back((XMLCh)ch); + } + + /*printf("Got CDATA:%s\n",buf.c_str());*/ + cdata->setNodeValue(buf); + + return p; +} + + + +/** + * + */ +int XmlReader::parseText(int p0, TextPtr text) +{ + + int p = p0; + + DOMString buf; + + while (psetNodeValue(buf); + + return p; +} + + + + + +/** + * Parses attributes of a node. Should end pointing at either the + * '?' of a version or doctype tag, or a '>' of a normal tag + */ +int XmlReader::parseAttributes(int p0, NodePtr node, bool *quickClose) +{ + *quickClose = false; + + int p = p0; + + NamedNodeMap attributes; + + while (p')//done + break; + else if (ch=='/' && p') + { + p++; + *quickClose = true; + /*printf("quick close\n");*/ + return p; + } + } + DOMString shortName; + DOMString prefix; + DOMString qualifiedName; + int p2 = getPrefixedWord(p, prefix, shortName, qualifiedName); + if (p2 <= p) + break; + + /*printf("name:%s",buf);*/ + p = p2; + p = skipwhite(p); + ch = get(p); + /*printf("ch:%c\n",ch);*/ + if (ch != '=') + break; + p++; + p = skipwhite(p); + /*ch = parsebuf[p];*/ + /*printf("ch:%c\n",ch);*/ + DOMString attrValue; + p2 = getQuoted(p, attrValue); + p = p2; + /*printf("name:'%s' value:'%s'\n",buf,buf2);*/ + + DOMString namespaceURI = ""; + if (prefix == "xmlns" || shortName == "xmlns") + namespaceURI = XMLNSNAME; + + //## Now let us make the attribute and give it to the node + AttrPtr attr = document->createAttributeNS(namespaceURI, qualifiedName); + attr->setValue(attrValue); + node->getAttributes().setNamedItemNS(attr); + + }//while pescape ; info++) + { + if (match(p, info->escape)) + { + p += info->escapeLength; + colNr += info->escapeLength; + buf += info->value; + return p; + } + } + + error("unterminated entity"); + return -1; +} + + +//######################################################################### +//# P A R S E A N O D E +//######################################################################### + +/** + * Parse as a document, preserving the original structure as much as + * possible + */ +int XmlReader::parseNode(int p0, NodePtr node, int depth) +{ + + int p = p0; + + + //### OPEN TAG + int ch = get(p++); + if (ch != '<') + return p0; + + p = skipwhite(p); + DOMString openTagName; + DOMString openTagNamePrefix; + DOMString openTagQualifiedName; + int p2 = getPrefixedWord(p,openTagNamePrefix, + openTagName, openTagQualifiedName); + if (p2 <= p) + return p0; + p = p2; + p = skipwhite(p); + + //printf("qualifiedName:%s\n", openTagQualifiedName.c_str()); + DOMString namespaceURI = node->lookupNamespaceURI(openTagNamePrefix); + document->renameNode(node, namespaceURI, openTagQualifiedName); + + //### ATTRIBUTES + bool quickClose; + p = parseAttributes(p, node, &quickClose); + if (quickClose) //trivial tag: + return p; + + p++; //skip over '>' + + + DOMString nodeValue; + + /* ### Get intervening data ### */ + while (pcreateComment(""); + p2 = parseComment(p, comment); + if (p2 <= p) + return p0; + p = p2; + if (parseAsData) + { //throw away + //delete comment; + } + else + { + node->appendChild(comment); + } + } + //### VERSION + else if (match(p, "createCDATASection(""); + p2 = parseCDATA(p, cdata); + if (p2 <= p) + return p0; + p = p2; + if (parseAsData) + { + nodeValue += cdata->getNodeValue(); + //delete cdata; + } + else + { + node->appendChild(cdata); + } + } + //### OPEN OR CLOSE TAG + else if (peek(p) == '<') + { + p2 = skipwhite(p+1); + if (peek(p2) =='/') + { + p = p2; + break; + } + else + { + /*Add element to tree*/ + ElementPtr elem = document->createElement(""); //fill in name later + node->appendChild(elem); + p2 = parseNode(p, elem, depth+1); + if (p2 <= p) + { + /*printf("problem on element:%ls. p2:%d p:%d\n",n->name, p2, p);*/ + return p0; + } + p = p2; + } + } + //### TEXT + else + { + TextPtr text = document->createTextNode(""); + p2 = parseText(p, text); + if (p2 <= p) + return p0; + p = p2; + if (parseAsData) + { + nodeValue += text->getNodeValue(); + //delete text; + } + else + { + node->appendChild(text); + } + } + + }//while (psetNodeValue(nodeValue); + + //### get close tag. we should be pointing at '/' + p = skipwhite(p); + ch = get(p); + if (ch != '/') + { + error("no / on end tag"); + return p0; + } + p++; + + //### get word after '/' + p = skipwhite(p); + DOMString closeTagName; + DOMString closeTagNamePrefix; + DOMString closeTagQualifiedName; + p = getPrefixedWord(p, closeTagNamePrefix, closeTagName, + closeTagQualifiedName); + if (openTagQualifiedName != closeTagQualifiedName) + { + error("Mismatched closing tag. Expected . Got '%s'.", + openTagQualifiedName.c_str(), closeTagQualifiedName.c_str()); + return p0; + } + p = skipwhite(p); + if (parsebuf[p] != '>') + { + error("no > on end tag"); + return p0; + } + p++; + /*printf("close element:%ls\n",buf);*/ + return p; +} + + +/** + * + */ +org::w3c::dom::DocumentPtr +XmlReader::parse(const DOMString &buf, int bufferOffset, int parseLen) +{ + len = parseLen; + parsebuf = buf; + + keepGoing = true; + + DOMImplementationSourceImpl source; + DOMImplementation *domImpl = source.getDOMImplementation(""); + + document = domImpl->createDocument("", "", NULL); + //document = new svg::SVGDocumentImpl(domImpl, "", "", NULL); + + int p = bufferOffset; + int p2 = 0; + + while (pcreateComment(""); + p2 = parseComment(p, comment); + if (p2 <= p) + return document; + p = p2; + if (parseAsData) + { //throw away + //delete comment; + } + else + { + document->appendChild(comment); + } + } + //### VERSION + else if (match(p, "getDocumentElement(), 0); + + keepGoing = false; + + return document; +} + + +/** + * + */ +org::w3c::dom::DocumentPtr +XmlReader::parse(const DOMString &str) +{ + + DocumentPtr doc = parse(str, 0, str.size()); + doc->normalizeDocument(); + + return doc; +} + +/** + * + */ +org::w3c::dom::DocumentPtr +XmlReader::parseFile(char *fileName) +{ + + DOMString buf = loadFile(fileName); + + DocumentPtr doc = parse(buf, 0, buf.size()); + + return doc; +} + + + +//######################################################################### +//# S T R E A M R E A D I N G +//######################################################################### + +/** + * + */ +org::w3c::dom::DOMString +XmlReader::loadFile(char *fileName) +{ + + if (!fileName) + return NULL; + FILE *f = fopen(fileName, "rb"); + if (!f) + return NULL; + + DOMString buf; + while (!feof(f)) + { + int ch = fgetc(f); + if (ch<0) + break; + buf.push_back((XMLCh)ch); + } + fclose(f); + + return buf; +} + + +//######################################################################### +//# C O N S T R U C T O R / D E S T R U C T O R +//######################################################################### + + +/** + * + */ +XmlReader::XmlReader() +{ + len = 0; + lineNr = 1; + colNr = 0; + parseAsData = false; + keepGoing = false; +} + +/** + * + */ +XmlReader::XmlReader(bool parseAsDataArg) +{ + len = 0; + lineNr = 1; + colNr = 0; + parseAsData = parseAsDataArg; + keepGoing = false; +} + + + +/** + * + */ +XmlReader::~XmlReader() +{ +} + + +} //namespace dom +} //namespace w3c +} //namespace org + + +//######################################################################### +//# E N D O F F I L E +//######################################################################### +