summary | shortlog | log | commit | commitdiff | tree
raw | patch | inline | side by side (parent: 8df0cf0)
raw | patch | inline | side by side (parent: 8df0cf0)
author | ishmal <ishmal@users.sourceforge.net> | |
Tue, 28 Feb 2006 20:28:32 +0000 (20:28 +0000) | ||
committer | ishmal <ishmal@users.sourceforge.net> | |
Tue, 28 Feb 2006 20:28:32 +0000 (20:28 +0000) |
diff --git a/src/dom/charclass.cpp b/src/dom/charclass.cpp
index 81c3fb18e6f105720b08ba2addbd7cb10dba2f53..086256f48fdf1ce89c95549b1c23af26756e1525 100755 (executable)
--- a/src/dom/charclass.cpp
+++ b/src/dom/charclass.cpp
+/**
+ *
+ * Following are from unicode.org, in the UnicodeData file
+ * in the Unicode Database
+ */
+
+/**
+ * UNICODE general class Zs
+ */
+bool isSpaceSeparator(int ch)
+{
+ if (ch == 0x0020 ||
+ ch == 0x200A ||
+ ch == 0x2003 ||
+ ch == 0x205F ||
+ ch == 0x2005 ||
+ ch == 0x202F ||
+ ch == 0x2000 ||
+ ch == 0x180E ||
+ ch == 0x2001 ||
+ ch == 0x2004 ||
+ ch == 0x3000 ||
+ ch == 0x2008 ||
+ ch == 0x2006 ||
+ ch == 0x2002 ||
+ ch == 0x2007 ||
+ ch == 0x2009 ||
+ ch == 0x00A0 ||
+ ch == 0x1680)
+ return true;
+ return false;
+}
+
+/**
+ * UNICODE general class Zl
+ */
+bool isLineSeparator(int ch)
+{
+ if (ch == 0x2028)
+ return true;
+ return false;
+}
+
+/**
+ * UNICODE general class Zp
+ */
+bool isParagraphSeparator(int ch)
+{
+ if (ch == 0x2029)
+ return true;
+ return false;
+}
+
+/**
+ * The union of the 3 space types.
+ */
+bool isSpaceChar(int ch)
+{
+ if ( isSpaceSeparator(ch) ||
+ isLineSeparator(ch) ||
+ isParagraphSeparator(ch))
+ return true;
+ return false;
+}
+
+/**
+ * 3 spaces in isSpaceChar() which don't break
+ */
+bool isNonBreakingSpace(int ch)
+{
+ if (ch == 0x00A0 || ch == 0x2007 || ch == 0x202F)
+ return true;
+ return false;
+}
+
+/**
+ *
+ */
+bool isWhitespace(int ch)
+{
+ if (isSpaceChar(ch) && !isNonBreakingSpace(ch))
+ return true;
+ if (ch == 0x0009 || // HORIZONTAL TABULATION
+ ch == 0x000A || // LINE FEED.
+ ch == 0x000B || // VERTICAL TABULATION.
+ ch == 0x000C || // FORM FEED.
+ ch == 0x000D || // CARRIAGE RETURN.
+ ch == 0x001C || // FILE SEPARATOR.
+ ch == 0x001D || // GROUP SEPARATOR.
+ ch == 0x001E || // RECORD SEPARATOR.
+ ch == 0x001F) // UNIT SEPARATOR.
+ return true;
+ return false;
+}
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/src/dom/charclass.h b/src/dom/charclass.h
index 5de08c0d4febac8519db6924ac9ad74c14ad34d6..800fd1c42f0bb38e01486da92fe4887ddaa8543e 100755 (executable)
--- a/src/dom/charclass.h
+++ b/src/dom/charclass.h
+/**
+ *
+ * Following are from unicode.org, in the UnicodeData file
+ * in the Unicode Database
+ */
+
+/**
+ *
+ */
+bool isSpaceSeparator(int ch);
+
+/**
+ *
+ */
+bool isLineSeparator(int ch);
+
+/**
+ *
+ */
+bool isParagraphSeparator(int ch);
+
+/**
+ *
+ */
+bool isSpaceChar(int ch);
+
+/**
+ *
+ */
+bool isNonBreakingSpace(int ch);
+
+/**
+ *
+ */
+bool isWhitespace(int ch);
+
+
+
#endif /* __CHARCLASS_H__ */
diff --git a/src/dom/cssparser.cpp b/src/dom/cssparser.cpp
index 8f3b201ee3e738420657c6b7c6263921e6862957..bea0c18ed386465af6fec77cb782c6b50dc88dab 100755 (executable)
--- a/src/dom/cssparser.cpp
+++ b/src/dom/cssparser.cpp
return -1;
}
}
- else if (!isspace(get(p)))
+ else if (!isWhitespace(get(p)))
break;
else
p++;
diff --git a/src/dom/domstream.cpp b/src/dom/domstream.cpp
index 1129a7f46ca46651553e6c0a0ab6408ce209a5ac..e0974304378807f94fa3e225cf72e63bf80608ef 100755 (executable)
--- a/src/dom/domstream.cpp
+++ b/src/dom/domstream.cpp
#include <stdarg.h>
#include "domstream.h"
+#include "charclass.h"
namespace org
{
while (available() > 0)
{
XMLCh ch = get();
- if (!isprint(ch))
+ if (isWhitespace(ch))
break;
str.push_back(ch);
}
diff --git a/src/dom/svgparser.cpp b/src/dom/svgparser.cpp
index 640fe9237f9d0d1955edfbb775cad51d60ae62b1..e698beee77399e212ba3f2919cd6b31bb613db0d 100755 (executable)
--- a/src/dom/svgparser.cpp
+++ b/src/dom/svgparser.cpp
return -1;
}
}
- else if (!isspace(get(p)))
+ else if (!isWhitespace(get(p)))
break;
else
p++;
diff --git a/src/dom/xmlreader.cpp b/src/dom/xmlreader.cpp
index d475afdea7f8ec46df9aec385f56d387e277bdb2..dd88c2bb8ed6c3c5ad9bc54814ec74f8587437c5 100755 (executable)
--- a/src/dom/xmlreader.cpp
+++ b/src/dom/xmlreader.cpp
#include "xmlreader.h"
+#include "charclass.h"
#include "svgimpl.h"
#include <stdarg.h>
-namespace org {
-namespace w3c {
-namespace dom {
+namespace org
+{
+namespace w3c
+{
+namespace dom
+{
//#########################################################################
while (p < len)
{
int b = get(p);
- if (!isspace(b))
+ if (!isWhitespace(b))
break;
p++;
}
index ef306d795311ddd1f458350778522eb51310312e..a2d3dc56b9a1efa491842aaf313c25d8daa60f61 100755 (executable)
--- a/src/dom/xpathparser.cpp
+++ b/src/dom/xpathparser.cpp
while (p < parselen)
{
int ch = peek(p);
- if (!isspace(ch))
+ if (!isWhitespace(ch))
break;
ch = get(p++);
}
while (p < parselen)
{
int ch = peek(p);
- if (!isalnum(ch))
+ if (!isLetterOrDigit(ch))
break;
ch = get(p++);
str.push_back(ch);
int p0 = 0;
DOMString str = xpathString;
-
+
parsebuf = (char *)str.c_str();
parselen = (int) str.size();
position = 0;
lexicalTokenDump();
tokens.clear();//Get ready to store new tokens
-
+
int p = getLocationPath(p0, 0);
parsebuf = NULL;
/**
* This method "executes" a list of Tokens in the context of a DOM root
* Node, returning a list of Nodes that match the xpath expression.
- */
-NodeList XPathParser::execute(const Node *root,
+ */
+NodeList XPathParser::execute(const Node *root,
std::vector<Token> &toks)
{
NodeList list;
if (!root)
return list;
-
+
//### Execute the token list
std::vector<Token>::iterator iter;
for (iter = toks.begin() ; iter != toks.end() ; iter++)
{
}
-
+
return list;
}
@@ -1883,7 +1883,7 @@ NodeList XPathParser::evaluate(const Node *root, const DOMString &xpathString)
NodeList list;
//### Maybe do caching for speed here
-
+
//### Parse and execute
//### Error message can be generated as a side effect
if (!parse(xpathString))
@@ -1891,7 +1891,7 @@ NodeList XPathParser::evaluate(const Node *root, const DOMString &xpathString)
//### Execute the token list
list = execute(root, tokens);
-
+
return list;
}