Warning cleanup

[inkscape.git] / src / dom / uri.cpp
diff --git a/src/dom/uri.cpp b/src/dom/uri.cpp

index 8f5cf976d9ee92c7b4fb200d496e715b8e1bf8e5..13b76c413d3a962a685bd505cec1e5be3a842609 100644 (file)
--- a/src/dom/uri.cpp
+++ b/src/dom/uri.cpp
@@ -10,7 +10,7 @@
   * Authors:
   *   Bob Jamison
   *
- * Copyright (C) 2005 Bob Jamison
+ * Copyright (C) 2005-2007 Bob Jamison
   *
   *  This library is free software; you can redistribute it and/or
   *  modify it under the terms of the GNU Lesser General Public
@@ -35,7 +35,7 @@
  
  #include <stdio.h>
  #include <stdarg.h>
-
+#include <vector>
  
  
  namespace org
@@ -49,11 +49,11 @@ namespace dom
  typedef struct
  {
      int  ival;
-    char *sval;
+    char const *sval;
      int  port;
  } LookupEntry;
  
-LookupEntry schemes[] =
+static LookupEntry schemes[] =
  {
      { URI::SCHEME_DATA,   "data:",    0 },
      { URI::SCHEME_HTTP,   "http:",   80 },
@@ -108,14 +108,18 @@ URI::URI(const char *str)
  URI::URI(const URI &other)
  {
      init();
-    scheme    = other.scheme;
-    schemeStr = other.schemeStr;
-    authority = other.authority;
-    port      = other.port;
-    path      = other.path;
-    absolute  = other.absolute;
-    query     = other.query;
-    fragment  = other.fragment;
+    assign(other);
+}
+
+
+/**
+ *
+ */
+URI &URI::operator=(const URI &other)
+{
+    init();
+    assign(other);
+    return *this;
  }
  
  
@@ -138,20 +142,60 @@ void URI::init()
      parsebuf  = NULL;
      parselen  = 0;
      scheme    = SCHEME_NONE;
-    schemeStr = "";
+    schemeStr.clear();
      port      = 0;
-    authority = "";
-    path      = "";
+    authority.clear();
+    path.clear();
      absolute  = false;
-    query     = "";
-    fragment  = "";
+    opaque    = false;
+    query.clear();
+    fragment.clear();
  }
  
  
+/**
+ *
+ */
+void URI::assign(const URI &other)
+{
+    scheme    = other.scheme;
+    schemeStr = other.schemeStr;
+    authority = other.authority;
+    port      = other.port;
+    path      = other.path;
+    absolute  = other.absolute;
+    opaque    = other.opaque;
+    query     = other.query;
+    fragment  = other.fragment;
+}
+
  
  //#########################################################################
  //#A T T R I B U T E S
  //#########################################################################
+static char *hexChars = "0123456789abcdef";
+
+static DOMString toStr(const std::vector<int> &arr)
+{
+    DOMString buf;
+    std::vector<int>::const_iterator iter;
+    for (iter=arr.begin() ; iter!=arr.end() ; iter++)
+        {
+        int ch = *iter;
+        if (isprint(ch))
+            buf.push_back((XMLCh)ch);
+        else
+            {
+            buf.push_back('%');
+            int hi = ((ch>>4) & 0xf);
+            buf.push_back(hexChars[hi]);
+            int lo = ((ch   ) & 0xf);
+            buf.push_back(hexChars[lo]);
+            }
+        }
+    return buf;
+}
+
  
  DOMString URI::toString() const
  {
@@ -159,18 +203,18 @@ DOMString URI::toString() const
      if (authority.size() > 0)
          {
          str.append("//");
-        str.append(authority);
+        str.append(toStr(authority));
          }
-    str.append(path);
+    str.append(toStr(path));
      if (query.size() > 0)
          {
          str.append("?");
-        str.append(query);
+        str.append(toStr(query));
          }
      if (fragment.size() > 0)
          {
          str.append("#");
-        str.append(fragment);
+        str.append(toStr(fragment));
          }
      return str;
  }
@@ -189,7 +233,7 @@ DOMString URI::getSchemeStr() const
  
  DOMString URI::getAuthority() const
  {
-    DOMString ret = authority;
+    DOMString ret = toStr(authority);
      if (portSpecified && port>=0)
          {
          char buf[7];
@@ -201,7 +245,8 @@ DOMString URI::getAuthority() const
  
  DOMString URI::getHost() const
  {
-    return authority;
+    DOMString str = toStr(authority);
+    return str;
  }
  
  int URI::getPort() const
@@ -212,25 +257,279 @@ int URI::getPort() const
  
  DOMString URI::getPath() const
  {
-    return path;
+    DOMString str = toStr(path);
+    return str;
+}
+
+DOMString URI::getNativePath() const
+{
+    DOMString pathStr = toStr(path);
+    DOMString npath;
+#ifdef __WIN32__
+    unsigned int firstChar = 0;
+    if (pathStr.size() >= 3)
+        {
+        if (pathStr[0] == '/' &&
+            isLetter(pathStr[1]) &&
+            pathStr[2] == ':')
+            firstChar++;
+         }
+    for (unsigned int i=firstChar ; i<pathStr.size() ; i++)
+        {
+        XMLCh ch = (XMLCh) pathStr[i];
+        if (ch == '/')
+            npath.push_back((XMLCh)'\\');
+        else
+            npath.push_back(ch);
+        }
+#else
+    npath = pathStr;
+#endif
+    return npath;
  }
  
  
-bool URI::getIsAbsolute() const
+bool URI::isAbsolute() const
  {
      return absolute;
  }
  
+bool URI::isOpaque() const
+{
+    return opaque;
+}
+
  
  DOMString URI::getQuery() const
  {
-    return query;
+    DOMString str = toStr(query);
+    return str;
  }
  
  
  DOMString URI::getFragment() const
  {
-    return fragment;
+    DOMString str = toStr(fragment);
+    return str;
+}
+
+
+
+
+static int find(const std::vector<int> &str, int ch, int startpos)
+{
+    for (unsigned int i = startpos ; i < str.size() ; i++)
+        {
+        if (ch == str[i])
+            return i;
+        }
+    return -1;
+}
+
+
+static int findLast(const std::vector<int> &str, int ch)
+{
+    // TODO FIXME BUGBUG
+    // This loop appears to be infinite, so it is probably not being called.
+    // Test for a problem, then fix after it has been observed locking up.
+    for (unsigned int i = str.size()-1 ; i>=0 ; i--)
+        {
+        if (ch == str[i])
+            return i;
+        }
+    return -1;
+}
+
+
+static bool sequ(const std::vector<int> &str, char *key)
+{
+    char *c = key;
+    for (unsigned int i=0 ; i<str.size() ; i++)
+        {
+        if (! (*c))
+            return false;
+        if (*c != str[i])
+            return false;
+        }
+    return true;
+}
+
+
+static std::vector<int> substr(const std::vector<int> &str,
+                      int startpos, int len)
+{
+    std::vector<int> buf;
+    unsigned int pos = startpos;
+    for (int i=0 ; i<len ; i++)
+        {
+        if (pos >= str.size())
+            break;
+        buf.push_back(str[pos++]);
+        }
+    return buf;
+}
+
+
+URI URI::resolve(const URI &other) const
+{
+    //### According to w3c, this is handled in 3 cases
+
+    //## 1
+    if (opaque || other.isAbsolute())
+        return other;
+
+    //## 2
+    if (other.fragment.size()  >  0 &&
+        other.path.size()      == 0 &&
+        other.scheme           == SCHEME_NONE &&
+        other.authority.size() == 0 &&
+        other.query.size()     == 0 )
+        {
+        URI fragUri = *this;
+        fragUri.fragment = other.fragment;
+        return fragUri;
+        }
+
+    //## 3 http://www.ietf.org/rfc/rfc2396.txt, section 5.2
+    URI newUri;
+    //# 3.1
+    newUri.scheme    = scheme;
+    newUri.schemeStr = schemeStr;
+    newUri.query     = other.query;
+    newUri.fragment  = other.fragment;
+    if (other.authority.size() > 0)
+        {
+        //# 3.2
+        if (absolute || other.absolute)
+            newUri.absolute = true;
+        newUri.authority = other.authority;
+        newUri.port      = other.port;//part of authority
+        newUri.path      = other.path;
+        }
+    else
+        {
+        //# 3.3
+        if (other.absolute)
+            {
+            newUri.absolute = true;
+            newUri.path     = other.path;
+            }
+        else
+            {
+            int pos = findLast(path, '/');
+            if (pos >= 0)
+                {
+                newUri.path.clear();
+                //# append my path up to and including the '/'
+                for (int i = 0; i<=pos ; i++)
+                       newUri.path.push_back(path[i]);
+                //# append other path
+                for (unsigned int i = 0; i<other.path.size() ; i++)
+                       newUri.path.push_back(other.path[i]);
+                }
+            else
+                newUri.path = other.path;
+            }
+        }
+
+    newUri.normalize();
+
+    return newUri;
+}
+
+
+/**
+ *  This follows the Java URI algorithm:
+ *   1. All "." segments are removed.
+ *   2. If a ".." segment is preceded by a non-".." segment
+ *          then both of these segments are removed. This step
+ *          is repeated until it is no longer applicable.
+ *   3. If the path is relative, and if its first segment
+ *          contains a colon character (':'), then a "." segment
+ *          is prepended. This prevents a relative URI with a path
+ *          such as "a:b/c/d" from later being re-parsed as an
+ *          opaque URI with a scheme of "a" and a scheme-specific
+ *          part of "b/c/d". (Deviation from RFC 2396)
+ */
+void URI::normalize()
+{
+    std::vector< std::vector<int> > segments;
+
+    //## Collect segments
+    if (path.size()<2)
+        return;
+    bool abs = false;
+    int pos=0;
+    int len = (int) path.size();
+
+    if (path[0]=='/')
+        {
+        abs = true;
+        pos++;
+        }
+
+    while (pos < len)
+        {
+        int pos2 = find(path, '/', pos);
+        if (pos2 < 0)
+            {
+            std::vector<int> seg = substr(path, pos, path.size()-pos);
+            //printf("last segment:%s\n", toStr(seg).c_str());
+            segments.push_back(seg);
+            break;
+            }
+        if (pos2>pos)
+            {
+            std::vector<int> seg = substr(path, pos, pos2-pos);
+            //printf("segment:%s\n", toStr(seg).c_str());
+            segments.push_back(seg);
+            }
+        pos = pos2;
+        pos++;
+        }
+
+    //## Clean up (normalize) segments
+    bool edited = false;
+    std::vector< std::vector<int> >::iterator iter;
+    for (iter=segments.begin() ; iter!=segments.end() ; )
+        {
+        std::vector<int> s = *iter;
+        if (sequ(s,"."))
+            {
+            iter = segments.erase(iter);
+            edited = true;
+            }
+        else if (sequ(s, "..") && iter != segments.begin() &&
+                 !sequ(*(iter-1), ".."))
+            {
+            iter--; //back up, then erase two entries
+            iter = segments.erase(iter);
+            iter = segments.erase(iter);
+            edited = true;
+            }
+        else
+            iter++;
+        }
+
+    //## Rebuild path, if necessary
+    if (edited)
+        {
+        path.clear();
+        if (abs)
+            {
+            path.push_back('/');
+            }
+        std::vector< std::vector<int> >::iterator iter;
+        for (iter=segments.begin() ; iter!=segments.end() ; iter++)
+            {
+            if (iter != segments.begin())
+                path.push_back('/');
+            std::vector<int> seg = *iter;
+            for (unsigned int i = 0; i<seg.size() ; i++)
+                path.push_back(seg[i]);
+            }
+        }
+
  }
  
  
@@ -276,7 +575,7 @@ int URI::peek(int p)
  
  
  
-int URI::match(int p0, char *key)
+int URI::match(int p0, char const *key)
  {
      int p = p0;
      while (p < parselen)
@@ -295,6 +594,91 @@ int URI::match(int p0, char *key)
  //#  http://www.gbiv.com/protocols/uri/rfc/rfc3986.html#components
  //#########################################################################
  
+int URI::parseHex(int p0, int &result)
+{
+    int p = p0;
+    int val = 0;
+
+    //# Upper 4
+    int ch = peek(p);
+    if (ch >= '0' && ch <= '9')
+        val += (ch - '0');
+    else if (ch >= 'a' && ch <= 'f')
+        val += (10 + ch - 'a');
+    else if (ch >= 'A' && ch <= 'F')
+        val += (10 + ch - 'A');
+    else
+        {
+        error("parseHex : unexpected character : %c", ch);
+        return -1;
+        }
+    p++;
+    val <<= 4;
+
+    //# Lower 4
+    ch = peek(p);
+    if (ch >= '0' && ch <= '9')
+        val += (ch - '0');
+    else if (ch >= 'a' && ch <= 'f')
+        val += (10 + ch - 'a');
+    else if (ch >= 'A' && ch <= 'F')
+        val += (10 + ch - 'A');
+    else
+        {
+        error("parseHex : unexpected character : %c", ch);
+        return -1;
+        }
+    p++;
+    result = val;
+    return p;
+}
+
+
+
+int URI::parseEntity(int p0, int &result)
+{
+    int p = p0;
+    int ch = peek(p);
+    if (ch != '&')
+        return p0;
+    p++;
+    if (!match(p, "#x"))
+        {
+        error("parseEntity: expected '#x'");
+        return -1;
+        }
+    p += 2;
+    int val;
+    p = parseHex(p, val);
+    if (p<0)
+        return -1;
+    ch = peek(p);
+    if (ch != ';')
+        {
+        error("parseEntity: expected ';'");
+        return -1;
+        }
+    p++;
+    result = val;
+    return p;
+}
+
+int URI::parseAsciiEntity(int p0, int &result)
+{
+    int p = p0;
+    int ch = peek(p);
+    if (ch != '%')
+        return p0;
+    p++;
+    int val;
+    p = parseHex(p, val);
+    if (p<0)
+        return -1;
+    result = val;
+    return p;
+}
+
+
  int URI::parseScheme(int p0)
  {
      int p = p0;
@@ -332,13 +716,43 @@ int URI::parseHierarchicalPart(int p0)
              ch = peek(p);
              if (ch == '/')
                  break;
+            else if (ch == '&') //IRI entity
+                {
+                int val;
+                p2 = parseEntity(p, val);
+                if (p2<p)
+                    {
+                    return -1;
+                    }
+                p = p2;
+                authority.push_back((XMLCh)val);
+                }
+            else if (ch == '%') //ascii hex excape
+                {
+                int val;
+                p2 = parseAsciiEntity(p, val);
+                if (p2<p)
+                    {
+                    return -1;
+                    }
+                p = p2;
+                authority.push_back((XMLCh)val);
+                }
              else if (ch == ':')
+                {
                  portSpecified = true;
+                p++;
+                }
              else if (portSpecified)
-                portStr.push_back(ch);
+                {
+                portStr.push_back((XMLCh)ch);
+                p++;
+                }
              else
-                authority.push_back(ch);
-            p++;
+                {
+                authority.push_back((XMLCh)ch);
+                p++;
+                }
              }
          if (portStr.size() > 0)
              {
@@ -352,10 +766,17 @@ int URI::parseHierarchicalPart(int p0)
  
      //# Are we absolute?
      ch = peek(p);
-    if (ch == '/')
+    if (isLetter(ch) && peek(p+1)==':')
+        {
+        absolute = true;
+        path.push_back((XMLCh)'/');
+        }
+    else if (ch == '/')
          {
          absolute = true;
-        path.push_back(ch);
+        if (p>p0) //in other words, if '/' is not the first char
+            opaque = true;
+        path.push_back((XMLCh)ch);
          p++;
          }
  
@@ -364,10 +785,35 @@ int URI::parseHierarchicalPart(int p0)
          ch = peek(p);
          if (ch == '?' || ch == '#')
              break;
-        path.push_back(ch);
-        p++;
+        else if (ch == '&') //IRI entity
+            {
+            int val;
+            p2 = parseEntity(p, val);
+            if (p2<p)
+                {
+                return -1;
+                }
+            p = p2;
+            path.push_back((XMLCh)val);
+            }
+        else if (ch == '%') //ascii hex excape
+            {
+            int val;
+            p2 = parseAsciiEntity(p, val);
+            if (p2<p)
+                {
+                return -1;
+                }
+            p = p2;
+            path.push_back((XMLCh)val);
+            }
+        else
+            {
+            path.push_back((XMLCh)ch);
+            p++;
+            }
          }
-
+    //trace("path:%s", toStr(path).c_str());
      return p;
  }
  
@@ -384,7 +830,7 @@ int URI::parseQuery(int p0)
          ch = peek(p);
          if (ch == '#')
              break;
-        query.push_back(ch);
+        query.push_back((XMLCh)ch);
          p++;
          }
  
@@ -464,11 +910,29 @@ bool URI::parse(const DOMString &str)
  {
  
      parselen = str.size();
-    DOMString tmp = str;
-    parsebuf = (char *) tmp.c_str();
+    parsebuf = new int[str.size()];
+    if (!parsebuf)
+        {
+        error("parse : could not allocate parsebuf");
+        return false;
+        }
+
+    DOMString::const_iterator iter;
+    unsigned int i=0;
+    for (iter= str.begin() ; iter!=str.end() ; iter++)
+        {
+        int ch = *iter;
+        if (ch == '\\')
+            parsebuf[i++] = '/';
+        else
+            parsebuf[i++] = ch;
+        }
  
  
      int p = parse(0);
+    normalize();
+
+    delete[] parsebuf;
  
      if (p < 0)
          {
@@ -477,7 +941,7 @@ bool URI::parse(const DOMString &str)
          }
  
      //printf("uri:%s\n", toString().c_str());
-    //printf("path:%s\n", path.c_str());
+    //printf("parse:%s\n", toStr(path).c_str());
  
      return true;