From f8d2244c881c2b35b0daa51ab533c09a8bb5692e Mon Sep 17 00:00:00 2001 From: ishmal Date: Tue, 28 Feb 2006 20:28:32 +0000 Subject: [PATCH] Replace isspace() with isWhitespace(). Use pure unicode character classes. --- src/dom/charclass.cpp | 111 ++++++++++++++++++++++++++++++++++++++++ src/dom/charclass.h | 38 ++++++++++++++ src/dom/cssparser.cpp | 2 +- src/dom/domstream.cpp | 3 +- src/dom/svgparser.cpp | 2 +- src/dom/xmlreader.cpp | 12 +++-- src/dom/xpathparser.cpp | 20 ++++---- 7 files changed, 171 insertions(+), 17 deletions(-) diff --git a/src/dom/charclass.cpp b/src/dom/charclass.cpp index 81c3fb18e..086256f48 100755 --- a/src/dom/charclass.cpp +++ b/src/dom/charclass.cpp @@ -442,6 +442,117 @@ bool isExtender(int ch) +/** + * + * Following are from unicode.org, in the UnicodeData file + * in the Unicode Database + */ + +/** + * UNICODE general class Zs + */ +bool isSpaceSeparator(int ch) +{ + if (ch == 0x0020 || + ch == 0x200A || + ch == 0x2003 || + ch == 0x205F || + ch == 0x2005 || + ch == 0x202F || + ch == 0x2000 || + ch == 0x180E || + ch == 0x2001 || + ch == 0x2004 || + ch == 0x3000 || + ch == 0x2008 || + ch == 0x2006 || + ch == 0x2002 || + ch == 0x2007 || + ch == 0x2009 || + ch == 0x00A0 || + ch == 0x1680) + return true; + return false; +} + +/** + * UNICODE general class Zl + */ +bool isLineSeparator(int ch) +{ + if (ch == 0x2028) + return true; + return false; +} + +/** + * UNICODE general class Zp + */ +bool isParagraphSeparator(int ch) +{ + if (ch == 0x2029) + return true; + return false; +} + +/** + * The union of the 3 space types. + */ +bool isSpaceChar(int ch) +{ + if ( isSpaceSeparator(ch) || + isLineSeparator(ch) || + isParagraphSeparator(ch)) + return true; + return false; +} + +/** + * 3 spaces in isSpaceChar() which don't break + */ +bool isNonBreakingSpace(int ch) +{ + if (ch == 0x00A0 || ch == 0x2007 || ch == 0x202F) + return true; + return false; +} + +/** + * + */ +bool isWhitespace(int ch) +{ + if (isSpaceChar(ch) && !isNonBreakingSpace(ch)) + return true; + if (ch == 0x0009 || // HORIZONTAL TABULATION + ch == 0x000A || // LINE FEED. + ch == 0x000B || // VERTICAL TABULATION. + ch == 0x000C || // FORM FEED. + ch == 0x000D || // CARRIAGE RETURN. + ch == 0x001C || // FILE SEPARATOR. + ch == 0x001D || // GROUP SEPARATOR. + ch == 0x001E || // RECORD SEPARATOR. + ch == 0x001F) // UNIT SEPARATOR. + return true; + return false; +} + + + + + + + + + + + + + + + + + diff --git a/src/dom/charclass.h b/src/dom/charclass.h index 5de08c0d4..800fd1c42 100755 --- a/src/dom/charclass.h +++ b/src/dom/charclass.h @@ -165,6 +165,44 @@ bool isExtender(int ch); +/** + * + * Following are from unicode.org, in the UnicodeData file + * in the Unicode Database + */ + +/** + * + */ +bool isSpaceSeparator(int ch); + +/** + * + */ +bool isLineSeparator(int ch); + +/** + * + */ +bool isParagraphSeparator(int ch); + +/** + * + */ +bool isSpaceChar(int ch); + +/** + * + */ +bool isNonBreakingSpace(int ch); + +/** + * + */ +bool isWhitespace(int ch); + + + #endif /* __CHARCLASS_H__ */ diff --git a/src/dom/cssparser.cpp b/src/dom/cssparser.cpp index 8f3b201ee..bea0c18ed 100755 --- a/src/dom/cssparser.cpp +++ b/src/dom/cssparser.cpp @@ -191,7 +191,7 @@ int CssParser::skipwhite(int p) return -1; } } - else if (!isspace(get(p))) + else if (!isWhitespace(get(p))) break; else p++; diff --git a/src/dom/domstream.cpp b/src/dom/domstream.cpp index 1129a7f46..e09743043 100755 --- a/src/dom/domstream.cpp +++ b/src/dom/domstream.cpp @@ -43,6 +43,7 @@ #include #include "domstream.h" +#include "charclass.h" namespace org { @@ -245,7 +246,7 @@ DOMString BasicReader::readWord() while (available() > 0) { XMLCh ch = get(); - if (!isprint(ch)) + if (isWhitespace(ch)) break; str.push_back(ch); } diff --git a/src/dom/svgparser.cpp b/src/dom/svgparser.cpp index 640fe9237..e698beee7 100755 --- a/src/dom/svgparser.cpp +++ b/src/dom/svgparser.cpp @@ -150,7 +150,7 @@ int SvgParser::skipwhite(int p) return -1; } } - else if (!isspace(get(p))) + else if (!isWhitespace(get(p))) break; else p++; diff --git a/src/dom/xmlreader.cpp b/src/dom/xmlreader.cpp index d475afdea..dd88c2bb8 100755 --- a/src/dom/xmlreader.cpp +++ b/src/dom/xmlreader.cpp @@ -30,13 +30,17 @@ #include "xmlreader.h" +#include "charclass.h" #include "svgimpl.h" #include -namespace org { -namespace w3c { -namespace dom { +namespace org +{ +namespace w3c +{ +namespace dom +{ //######################################################################### @@ -188,7 +192,7 @@ int XmlReader::skipwhite(int p) while (p < len) { int b = get(p); - if (!isspace(b)) + if (!isWhitespace(b)) break; p++; } diff --git a/src/dom/xpathparser.cpp b/src/dom/xpathparser.cpp index ef306d795..a2d3dc56b 100755 --- a/src/dom/xpathparser.cpp +++ b/src/dom/xpathparser.cpp @@ -178,7 +178,7 @@ int XPathParser::skipwhite(int p0) while (p < parselen) { int ch = peek(p); - if (!isspace(ch)) + if (!isWhitespace(ch)) break; ch = get(p++); } @@ -191,7 +191,7 @@ int XPathParser::getword(int p0, DOMString &str) while (p < parselen) { int ch = peek(p); - if (!isalnum(ch)) + if (!isLetterOrDigit(ch)) break; ch = get(p++); str.push_back(ch); @@ -1817,7 +1817,7 @@ bool XPathParser::parse(const DOMString &xpathString) int p0 = 0; DOMString str = xpathString; - + parsebuf = (char *)str.c_str(); parselen = (int) str.size(); position = 0; @@ -1828,7 +1828,7 @@ bool XPathParser::parse(const DOMString &xpathString) lexicalTokenDump(); tokens.clear();//Get ready to store new tokens - + int p = getLocationPath(p0, 0); parsebuf = NULL; @@ -1853,21 +1853,21 @@ bool XPathParser::parse(const DOMString &xpathString) /** * This method "executes" a list of Tokens in the context of a DOM root * Node, returning a list of Nodes that match the xpath expression. - */ -NodeList XPathParser::execute(const Node *root, + */ +NodeList XPathParser::execute(const Node *root, std::vector &toks) { NodeList list; if (!root) return list; - + //### Execute the token list std::vector::iterator iter; for (iter = toks.begin() ; iter != toks.end() ; iter++) { } - + return list; } @@ -1883,7 +1883,7 @@ NodeList XPathParser::evaluate(const Node *root, const DOMString &xpathString) NodeList list; //### Maybe do caching for speed here - + //### Parse and execute //### Error message can be generated as a side effect if (!parse(xpathString)) @@ -1891,7 +1891,7 @@ NodeList XPathParser::evaluate(const Node *root, const DOMString &xpathString) //### Execute the token list list = execute(root, tokens); - + return list; } -- 2.30.2