Replace isspace() with isWhitespace(). Use pure unicode character classes.

author ishmal <ishmal@users.sourceforge.net>

Tue, 28 Feb 2006 20:28:32 +0000 (20:28 +0000)

committer ishmal <ishmal@users.sourceforge.net>

Tue, 28 Feb 2006 20:28:32 +0000 (20:28 +0000)
author ishmal <ishmal@users.sourceforge.net>
Tue, 28 Feb 2006 20:28:32 +0000 (20:28 +0000)
committer ishmal <ishmal@users.sourceforge.net>
Tue, 28 Feb 2006 20:28:32 +0000 (20:28 +0000)
diff --git a/src/dom/charclass.cpp b/src/dom/charclass.cpp

index 81c3fb18e6f105720b08ba2addbd7cb10dba2f53..086256f48fdf1ce89c95549b1c23af26756e1525 100755 (executable)
--- a/src/dom/charclass.cpp
+++ b/src/dom/charclass.cpp
@@ -442,6 +442,117 @@ bool isExtender(int ch)
  
  
  
+/**
+ *
+ * Following are from unicode.org, in the UnicodeData file
+ * in the Unicode Database
+ */
+
+/**
+ * UNICODE general class Zs
+ */
+bool isSpaceSeparator(int ch)
+{
+    if (ch == 0x0020 ||
+        ch == 0x200A ||
+        ch == 0x2003 ||
+        ch == 0x205F ||
+        ch == 0x2005 ||
+        ch == 0x202F ||
+        ch == 0x2000 ||
+        ch == 0x180E ||
+        ch == 0x2001 ||
+        ch == 0x2004 ||
+        ch == 0x3000 ||
+        ch == 0x2008 ||
+        ch == 0x2006 ||
+        ch == 0x2002 ||
+        ch == 0x2007 ||
+        ch == 0x2009 ||
+        ch == 0x00A0 ||
+        ch == 0x1680)
+        return true;
+    return false;
+}
+
+/**
+ * UNICODE general class Zl
+ */
+bool isLineSeparator(int ch)
+{
+    if (ch == 0x2028)
+        return true;
+    return false;
+}
+
+/**
+ * UNICODE general class Zp
+ */
+bool isParagraphSeparator(int ch)
+{
+    if (ch == 0x2029)
+        return true;
+    return false;
+}
+
+/**
+ * The union of the 3 space types.
+ */
+bool isSpaceChar(int ch)
+{
+    if ( isSpaceSeparator(ch) ||
+         isLineSeparator(ch)  ||
+         isParagraphSeparator(ch))
+        return true;
+    return false;
+}
+
+/**
+ * 3 spaces in isSpaceChar() which don't break
+ */
+bool isNonBreakingSpace(int ch)
+{
+    if (ch == 0x00A0 || ch == 0x2007 || ch == 0x202F)
+        return true;
+    return false;
+}
+
+/**
+ *
+ */
+bool isWhitespace(int ch)
+{
+    if (isSpaceChar(ch) && !isNonBreakingSpace(ch))
+        return true;
+    if (ch == 0x0009 || // HORIZONTAL TABULATION
+        ch == 0x000A || // LINE FEED.
+        ch == 0x000B || // VERTICAL TABULATION.
+        ch == 0x000C || // FORM FEED.
+        ch == 0x000D || // CARRIAGE RETURN.
+        ch == 0x001C || // FILE SEPARATOR.
+        ch == 0x001D || // GROUP SEPARATOR.
+        ch == 0x001E || // RECORD SEPARATOR.
+        ch == 0x001F)   // UNIT SEPARATOR.
+        return true;
+    return false;
+}
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
  
  
  
diff --git a/src/dom/charclass.h b/src/dom/charclass.h

index 5de08c0d4febac8519db6924ac9ad74c14ad34d6..800fd1c42f0bb38e01486da92fe4887ddaa8543e 100755 (executable)
--- a/src/dom/charclass.h
+++ b/src/dom/charclass.h
@@ -165,6 +165,44 @@ bool isExtender(int ch);
  
  
  
+/**
+ *
+ * Following are from unicode.org, in the UnicodeData file
+ * in the Unicode Database
+ */
+
+/**
+ *
+ */
+bool isSpaceSeparator(int ch);
+
+/**
+ *
+ */
+bool isLineSeparator(int ch);
+
+/**
+ *
+ */
+bool isParagraphSeparator(int ch);
+
+/**
+ *
+ */
+bool isSpaceChar(int ch);
+
+/**
+ *
+ */
+bool isNonBreakingSpace(int ch);
+
+/**
+ *
+ */
+bool isWhitespace(int ch);
+
+
+
  #endif  /* __CHARCLASS_H__ */
  
  
diff --git a/src/dom/cssparser.cpp b/src/dom/cssparser.cpp

index 8f3b201ee3e738420657c6b7c6263921e6862957..bea0c18ed386465af6fec77cb782c6b50dc88dab 100755 (executable)
--- a/src/dom/cssparser.cpp
+++ b/src/dom/cssparser.cpp
@@ -191,7 +191,7 @@ int CssParser::skipwhite(int p)
              return -1;
              }
          }
-    else if (!isspace(get(p)))
+    else if (!isWhitespace(get(p)))
          break;
      else
          p++;
diff --git a/src/dom/domstream.cpp b/src/dom/domstream.cpp

index 1129a7f46ca46651553e6c0a0ab6408ce209a5ac..e0974304378807f94fa3e225cf72e63bf80608ef 100755 (executable)
--- a/src/dom/domstream.cpp
+++ b/src/dom/domstream.cpp
@@ -43,6 +43,7 @@
  #include <stdarg.h>
  
  #include "domstream.h"
+#include "charclass.h"
  
  namespace org
  {
@@ -245,7 +246,7 @@ DOMString BasicReader::readWord()
      while (available() > 0)
          {
          XMLCh ch = get();
-        if (!isprint(ch))
+        if (isWhitespace(ch))
              break;
          str.push_back(ch);
          }
diff --git a/src/dom/svgparser.cpp b/src/dom/svgparser.cpp

index 640fe9237f9d0d1955edfbb775cad51d60ae62b1..e698beee77399e212ba3f2919cd6b31bb613db0d 100755 (executable)
--- a/src/dom/svgparser.cpp
+++ b/src/dom/svgparser.cpp
@@ -150,7 +150,7 @@ int SvgParser::skipwhite(int p)
              return -1;
              }
          }
-    else if (!isspace(get(p)))
+    else if (!isWhitespace(get(p)))
          break;
      else
          p++;
diff --git a/src/dom/xmlreader.cpp b/src/dom/xmlreader.cpp

index d475afdea7f8ec46df9aec385f56d387e277bdb2..dd88c2bb8ed6c3c5ad9bc54814ec74f8587437c5 100755 (executable)
--- a/src/dom/xmlreader.cpp
+++ b/src/dom/xmlreader.cpp
@@ -30,13 +30,17 @@
  
  
  #include "xmlreader.h"
+#include "charclass.h"
  #include "svgimpl.h"
  
  #include <stdarg.h>
  
-namespace org {
-namespace w3c {
-namespace dom {
+namespace org
+{
+namespace w3c
+{
+namespace dom
+{
  
  
  //#########################################################################
@@ -188,7 +192,7 @@ int XmlReader::skipwhite(int p)
    while (p < len)
      {
      int b = get(p);
-    if (!isspace(b))
+    if (!isWhitespace(b))
          break;
      p++;
      }
diff --git a/src/dom/xpathparser.cpp b/src/dom/xpathparser.cpp

index ef306d795311ddd1f458350778522eb51310312e..a2d3dc56b9a1efa491842aaf313c25d8daa60f61 100755 (executable)
--- a/src/dom/xpathparser.cpp
+++ b/src/dom/xpathparser.cpp
@@ -178,7 +178,7 @@ int XPathParser::skipwhite(int p0)
      while (p < parselen)
          {
          int ch = peek(p);
-        if (!isspace(ch))
+        if (!isWhitespace(ch))
              break;
          ch = get(p++);
          }
@@ -191,7 +191,7 @@ int XPathParser::getword(int p0, DOMString &str)
      while (p < parselen)
          {
          int ch = peek(p);
-        if (!isalnum(ch))
+        if (!isLetterOrDigit(ch))
              break;
          ch = get(p++);
          str.push_back(ch);
@@ -1817,7 +1817,7 @@ bool XPathParser::parse(const DOMString &xpathString)
      int p0 = 0;
  
      DOMString str = xpathString;
-    
+
      parsebuf = (char *)str.c_str();
      parselen = (int)   str.size();
      position = 0;
@@ -1828,7 +1828,7 @@ bool XPathParser::parse(const DOMString &xpathString)
      lexicalTokenDump();
  
      tokens.clear();//Get ready to store new tokens
-    
+
      int p = getLocationPath(p0, 0);
  
      parsebuf = NULL;
@@ -1853,21 +1853,21 @@ bool XPathParser::parse(const DOMString &xpathString)
  /**
   *  This method "executes" a list of Tokens in the context of a DOM root
   *  Node, returning a list of Nodes that match the xpath expression.
- */ 
-NodeList XPathParser::execute(const Node *root, 
+ */
+NodeList XPathParser::execute(const Node *root,
                                std::vector<Token> &toks)
  {
      NodeList list;
  
      if (!root)
          return list;
-        
+
      //### Execute the token list
      std::vector<Token>::iterator iter;
      for (iter = toks.begin() ; iter != toks.end() ; iter++)
          {
          }
-        
+
      return list;
  }
  
@@ -1883,7 +1883,7 @@ NodeList XPathParser::evaluate(const Node *root, const DOMString &xpathString)
      NodeList list;
  
      //### Maybe do caching for speed here
-    
+
      //### Parse and execute
      //### Error message can be generated as a side effect
      if (!parse(xpathString))
@@ -1891,7 +1891,7 @@ NodeList XPathParser::evaluate(const Node *root, const DOMString &xpathString)
  
      //### Execute the token list
      list = execute(root, tokens);
-        
+
      return list;
  }
author	ishmal <ishmal@users.sourceforge.net>
	Tue, 28 Feb 2006 20:28:32 +0000 (20:28 +0000)
committer	ishmal <ishmal@users.sourceforge.net>
	Tue, 28 Feb 2006 20:28:32 +0000 (20:28 +0000)
src/dom/charclass.cpp		patch \| blob \| history
src/dom/charclass.h		patch \| blob \| history
src/dom/cssparser.cpp		patch \| blob \| history
src/dom/domstream.cpp		patch \| blob \| history
src/dom/svgparser.cpp		patch \| blob \| history
src/dom/xmlreader.cpp		patch \| blob \| history
src/dom/xpathparser.cpp		patch \| blob \| history