Warning cleanup

[inkscape.git] / src / dom / uri.cpp
diff --git a/src/dom/uri.cpp b/src/dom/uri.cpp

index 286857e4177c7c87bb160991f90fd613f3f5f729..13b76c413d3a962a685bd505cec1e5be3a842609 100644 (file)
--- a/src/dom/uri.cpp
+++ b/src/dom/uri.cpp
@@ -10,7 +10,7 @@
   * Authors:
   *   Bob Jamison
   *
- * Copyright (C) 2005 Bob Jamison
+ * Copyright (C) 2005-2007 Bob Jamison
   *
   *  This library is free software; you can redistribute it and/or
   *  modify it under the terms of the GNU Lesser General Public
@@ -35,7 +35,7 @@
  
  #include <stdio.h>
  #include <stdarg.h>
-
+#include <vector>
  
  
  namespace org
@@ -49,11 +49,11 @@ namespace dom
  typedef struct
  {
      int  ival;
-    char *sval;
+    char const *sval;
      int  port;
  } LookupEntry;
  
-LookupEntry schemes[] =
+static LookupEntry schemes[] =
  {
      { URI::SCHEME_DATA,   "data:",    0 },
      { URI::SCHEME_HTTP,   "http:",   80 },
@@ -142,14 +142,14 @@ void URI::init()
      parsebuf  = NULL;
      parselen  = 0;
      scheme    = SCHEME_NONE;
-    schemeStr = "";
+    schemeStr.clear();
      port      = 0;
-    authority = "";
-    path      = "";
+    authority.clear();
+    path.clear();
      absolute  = false;
      opaque    = false;
-    query     = "";
-    fragment  = "";
+    query.clear();
+    fragment.clear();
  }
  
  
@@ -173,6 +173,29 @@ void URI::assign(const URI &other)
  //#########################################################################
  //#A T T R I B U T E S
  //#########################################################################
+static char *hexChars = "0123456789abcdef";
+
+static DOMString toStr(const std::vector<int> &arr)
+{
+    DOMString buf;
+    std::vector<int>::const_iterator iter;
+    for (iter=arr.begin() ; iter!=arr.end() ; iter++)
+        {
+        int ch = *iter;
+        if (isprint(ch))
+            buf.push_back((XMLCh)ch);
+        else
+            {
+            buf.push_back('%');
+            int hi = ((ch>>4) & 0xf);
+            buf.push_back(hexChars[hi]);
+            int lo = ((ch   ) & 0xf);
+            buf.push_back(hexChars[lo]);
+            }
+        }
+    return buf;
+}
+
  
  DOMString URI::toString() const
  {
@@ -180,18 +203,18 @@ DOMString URI::toString() const
      if (authority.size() > 0)
          {
          str.append("//");
-        str.append(authority);
+        str.append(toStr(authority));
          }
-    str.append(path);
+    str.append(toStr(path));
      if (query.size() > 0)
          {
          str.append("?");
-        str.append(query);
+        str.append(toStr(query));
          }
      if (fragment.size() > 0)
          {
          str.append("#");
-        str.append(fragment);
+        str.append(toStr(fragment));
          }
      return str;
  }
@@ -210,7 +233,7 @@ DOMString URI::getSchemeStr() const
  
  DOMString URI::getAuthority() const
  {
-    DOMString ret = authority;
+    DOMString ret = toStr(authority);
      if (portSpecified && port>=0)
          {
          char buf[7];
@@ -222,7 +245,8 @@ DOMString URI::getAuthority() const
  
  DOMString URI::getHost() const
  {
-    return authority;
+    DOMString str = toStr(authority);
+    return str;
  }
  
  int URI::getPort() const
@@ -233,7 +257,35 @@ int URI::getPort() const
  
  DOMString URI::getPath() const
  {
-    return path;
+    DOMString str = toStr(path);
+    return str;
+}
+
+DOMString URI::getNativePath() const
+{
+    DOMString pathStr = toStr(path);
+    DOMString npath;
+#ifdef __WIN32__
+    unsigned int firstChar = 0;
+    if (pathStr.size() >= 3)
+        {
+        if (pathStr[0] == '/' &&
+            isLetter(pathStr[1]) &&
+            pathStr[2] == ':')
+            firstChar++;
+         }
+    for (unsigned int i=firstChar ; i<pathStr.size() ; i++)
+        {
+        XMLCh ch = (XMLCh) pathStr[i];
+        if (ch == '/')
+            npath.push_back((XMLCh)'\\');
+        else
+            npath.push_back(ch);
+        }
+#else
+    npath = pathStr;
+#endif
+    return npath;
  }
  
  
@@ -250,13 +302,71 @@ bool URI::isOpaque() const
  
  DOMString URI::getQuery() const
  {
-    return query;
+    DOMString str = toStr(query);
+    return str;
  }
  
  
  DOMString URI::getFragment() const
  {
-    return fragment;
+    DOMString str = toStr(fragment);
+    return str;
+}
+
+
+
+
+static int find(const std::vector<int> &str, int ch, int startpos)
+{
+    for (unsigned int i = startpos ; i < str.size() ; i++)
+        {
+        if (ch == str[i])
+            return i;
+        }
+    return -1;
+}
+
+
+static int findLast(const std::vector<int> &str, int ch)
+{
+    // TODO FIXME BUGBUG
+    // This loop appears to be infinite, so it is probably not being called.
+    // Test for a problem, then fix after it has been observed locking up.
+    for (unsigned int i = str.size()-1 ; i>=0 ; i--)
+        {
+        if (ch == str[i])
+            return i;
+        }
+    return -1;
+}
+
+
+static bool sequ(const std::vector<int> &str, char *key)
+{
+    char *c = key;
+    for (unsigned int i=0 ; i<str.size() ; i++)
+        {
+        if (! (*c))
+            return false;
+        if (*c != str[i])
+            return false;
+        }
+    return true;
+}
+
+
+static std::vector<int> substr(const std::vector<int> &str,
+                      int startpos, int len)
+{
+    std::vector<int> buf;
+    unsigned int pos = startpos;
+    for (int i=0 ; i<len ; i++)
+        {
+        if (pos >= str.size())
+            break;
+        buf.push_back(str[pos++]);
+        }
+    return buf;
  }
  
  
@@ -306,30 +416,119 @@ URI URI::resolve(const URI &other) const
              }
          else
              {
-            unsigned int pos = path.rfind('/');
-            if (pos != path.npos)
+            int pos = findLast(path, '/');
+            if (pos >= 0)
                  {
-                DOMString tpath = path.substr(pos);
-                tpath.append(other.path);
-                newUri.path = tpath;
-                newUri.normalize();
+                newUri.path.clear();
+                //# append my path up to and including the '/'
+                for (int i = 0; i<=pos ; i++)
+                       newUri.path.push_back(path[i]);
+                //# append other path
+                for (unsigned int i = 0; i<other.path.size() ; i++)
+                       newUri.path.push_back(other.path[i]);
                  }
+            else
+                newUri.path = other.path;
              }
          }
+
+    newUri.normalize();
+
      return newUri;
  }
  
  
  /**
- *
+ *  This follows the Java URI algorithm:
+ *   1. All "." segments are removed.
+ *   2. If a ".." segment is preceded by a non-".." segment
+ *          then both of these segments are removed. This step
+ *          is repeated until it is no longer applicable.
+ *   3. If the path is relative, and if its first segment
+ *          contains a colon character (':'), then a "." segment
+ *          is prepended. This prevents a relative URI with a path
+ *          such as "a:b/c/d" from later being re-parsed as an
+ *          opaque URI with a scheme of "a" and a scheme-specific
+ *          part of "b/c/d". (Deviation from RFC 2396)
   */
-void URI::normalize() const
+void URI::normalize()
  {
+    std::vector< std::vector<int> > segments;
  
+    //## Collect segments
+    if (path.size()<2)
+        return;
+    bool abs = false;
+    int pos=0;
+    int len = (int) path.size();
  
+    if (path[0]=='/')
+        {
+        abs = true;
+        pos++;
+        }
  
+    while (pos < len)
+        {
+        int pos2 = find(path, '/', pos);
+        if (pos2 < 0)
+            {
+            std::vector<int> seg = substr(path, pos, path.size()-pos);
+            //printf("last segment:%s\n", toStr(seg).c_str());
+            segments.push_back(seg);
+            break;
+            }
+        if (pos2>pos)
+            {
+            std::vector<int> seg = substr(path, pos, pos2-pos);
+            //printf("segment:%s\n", toStr(seg).c_str());
+            segments.push_back(seg);
+            }
+        pos = pos2;
+        pos++;
+        }
  
+    //## Clean up (normalize) segments
+    bool edited = false;
+    std::vector< std::vector<int> >::iterator iter;
+    for (iter=segments.begin() ; iter!=segments.end() ; )
+        {
+        std::vector<int> s = *iter;
+        if (sequ(s,"."))
+            {
+            iter = segments.erase(iter);
+            edited = true;
+            }
+        else if (sequ(s, "..") && iter != segments.begin() &&
+                 !sequ(*(iter-1), ".."))
+            {
+            iter--; //back up, then erase two entries
+            iter = segments.erase(iter);
+            iter = segments.erase(iter);
+            edited = true;
+            }
+        else
+            iter++;
+        }
  
+    //## Rebuild path, if necessary
+    if (edited)
+        {
+        path.clear();
+        if (abs)
+            {
+            path.push_back('/');
+            }
+        std::vector< std::vector<int> >::iterator iter;
+        for (iter=segments.begin() ; iter!=segments.end() ; iter++)
+            {
+            if (iter != segments.begin())
+                path.push_back('/');
+            std::vector<int> seg = *iter;
+            for (unsigned int i = 0; i<seg.size() ; i++)
+                path.push_back(seg[i]);
+            }
+        }
  
  }
  
@@ -376,7 +575,7 @@ int URI::peek(int p)
  
  
  
-int URI::match(int p0, char *key)
+int URI::match(int p0, char const *key)
  {
      int p = p0;
      while (p < parselen)
@@ -395,6 +594,91 @@ int URI::match(int p0, char *key)
  //#  http://www.gbiv.com/protocols/uri/rfc/rfc3986.html#components
  //#########################################################################
  
+int URI::parseHex(int p0, int &result)
+{
+    int p = p0;
+    int val = 0;
+
+    //# Upper 4
+    int ch = peek(p);
+    if (ch >= '0' && ch <= '9')
+        val += (ch - '0');
+    else if (ch >= 'a' && ch <= 'f')
+        val += (10 + ch - 'a');
+    else if (ch >= 'A' && ch <= 'F')
+        val += (10 + ch - 'A');
+    else
+        {
+        error("parseHex : unexpected character : %c", ch);
+        return -1;
+        }
+    p++;
+    val <<= 4;
+
+    //# Lower 4
+    ch = peek(p);
+    if (ch >= '0' && ch <= '9')
+        val += (ch - '0');
+    else if (ch >= 'a' && ch <= 'f')
+        val += (10 + ch - 'a');
+    else if (ch >= 'A' && ch <= 'F')
+        val += (10 + ch - 'A');
+    else
+        {
+        error("parseHex : unexpected character : %c", ch);
+        return -1;
+        }
+    p++;
+    result = val;
+    return p;
+}
+
+
+
+int URI::parseEntity(int p0, int &result)
+{
+    int p = p0;
+    int ch = peek(p);
+    if (ch != '&')
+        return p0;
+    p++;
+    if (!match(p, "#x"))
+        {
+        error("parseEntity: expected '#x'");
+        return -1;
+        }
+    p += 2;
+    int val;
+    p = parseHex(p, val);
+    if (p<0)
+        return -1;
+    ch = peek(p);
+    if (ch != ';')
+        {
+        error("parseEntity: expected ';'");
+        return -1;
+        }
+    p++;
+    result = val;
+    return p;
+}
+
+int URI::parseAsciiEntity(int p0, int &result)
+{
+    int p = p0;
+    int ch = peek(p);
+    if (ch != '%')
+        return p0;
+    p++;
+    int val;
+    p = parseHex(p, val);
+    if (p<0)
+        return -1;
+    result = val;
+    return p;
+}
+
+
  int URI::parseScheme(int p0)
  {
      int p = p0;
@@ -432,13 +716,43 @@ int URI::parseHierarchicalPart(int p0)
              ch = peek(p);
              if (ch == '/')
                  break;
+            else if (ch == '&') //IRI entity
+                {
+                int val;
+                p2 = parseEntity(p, val);
+                if (p2<p)
+                    {
+                    return -1;
+                    }
+                p = p2;
+                authority.push_back((XMLCh)val);
+                }
+            else if (ch == '%') //ascii hex excape
+                {
+                int val;
+                p2 = parseAsciiEntity(p, val);
+                if (p2<p)
+                    {
+                    return -1;
+                    }
+                p = p2;
+                authority.push_back((XMLCh)val);
+                }
              else if (ch == ':')
+                {
                  portSpecified = true;
+                p++;
+                }
              else if (portSpecified)
+                {
                  portStr.push_back((XMLCh)ch);
+                p++;
+                }
              else
+                {
                  authority.push_back((XMLCh)ch);
-            p++;
+                p++;
+                }
              }
          if (portStr.size() > 0)
              {
@@ -452,7 +766,12 @@ int URI::parseHierarchicalPart(int p0)
  
      //# Are we absolute?
      ch = peek(p);
-    if (ch == '/')
+    if (isLetter(ch) && peek(p+1)==':')
+        {
+        absolute = true;
+        path.push_back((XMLCh)'/');
+        }
+    else if (ch == '/')
          {
          absolute = true;
          if (p>p0) //in other words, if '/' is not the first char
@@ -466,10 +785,35 @@ int URI::parseHierarchicalPart(int p0)
          ch = peek(p);
          if (ch == '?' || ch == '#')
              break;
-        path.push_back((XMLCh)ch);
-        p++;
+        else if (ch == '&') //IRI entity
+            {
+            int val;
+            p2 = parseEntity(p, val);
+            if (p2<p)
+                {
+                return -1;
+                }
+            p = p2;
+            path.push_back((XMLCh)val);
+            }
+        else if (ch == '%') //ascii hex excape
+            {
+            int val;
+            p2 = parseAsciiEntity(p, val);
+            if (p2<p)
+                {
+                return -1;
+                }
+            p = p2;
+            path.push_back((XMLCh)val);
+            }
+        else
+            {
+            path.push_back((XMLCh)ch);
+            p++;
+            }
          }
-
+    //trace("path:%s", toStr(path).c_str());
      return p;
  }
  
@@ -508,7 +852,7 @@ int URI::parseFragment(int p0)
          ch = peek(p);
          if (ch == '?')
              break;
-        fragment.push_back((XMLCh)ch);
+        fragment.push_back(ch);
          p++;
          }
  
@@ -566,11 +910,29 @@ bool URI::parse(const DOMString &str)
  {
  
      parselen = str.size();
-    DOMString tmp = str;
-    parsebuf = (char *) tmp.c_str();
+    parsebuf = new int[str.size()];
+    if (!parsebuf)
+        {
+        error("parse : could not allocate parsebuf");
+        return false;
+        }
+
+    DOMString::const_iterator iter;
+    unsigned int i=0;
+    for (iter= str.begin() ; iter!=str.end() ; iter++)
+        {
+        int ch = *iter;
+        if (ch == '\\')
+            parsebuf[i++] = '/';
+        else
+            parsebuf[i++] = ch;
+        }
  
  
      int p = parse(0);
+    normalize();
+
+    delete[] parsebuf;
  
      if (p < 0)
          {
@@ -579,7 +941,7 @@ bool URI::parse(const DOMString &str)
          }
  
      //printf("uri:%s\n", toString().c_str());
-    //printf("path:%s\n", path.c_str());
+    //printf("parse:%s\n", toStr(path).c_str());
  
      return true;