diff --git a/src/dom/uri.cpp b/src/dom/uri.cpp
index 59a965b8d942b84ab1c6ee8e95053d3e7ce1c02a..13b76c413d3a962a685bd505cec1e5be3a842609 100644 (file)
--- a/src/dom/uri.cpp
+++ b/src/dom/uri.cpp
* Authors:
* Bob Jamison
*
- * Copyright (C) 2005 Bob Jamison
+ * Copyright (C) 2005-2007 Bob Jamison
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
#include <stdio.h>
#include <stdarg.h>
-
+#include <vector>
namespace org
typedef struct
{
int ival;
- char *sval;
+ char const *sval;
int port;
} LookupEntry;
-LookupEntry schemes[] =
+static LookupEntry schemes[] =
{
{ URI::SCHEME_DATA, "data:", 0 },
{ URI::SCHEME_HTTP, "http:", 80 },
parsebuf = NULL;
parselen = 0;
scheme = SCHEME_NONE;
- schemeStr = "";
+ schemeStr.clear();
port = 0;
- authority = "";
- path = "";
+ authority.clear();
+ path.clear();
absolute = false;
opaque = false;
- query = "";
- fragment = "";
+ query.clear();
+ fragment.clear();
}
//#########################################################################
//#A T T R I B U T E S
//#########################################################################
+static char *hexChars = "0123456789abcdef";
+
+static DOMString toStr(const std::vector<int> &arr)
+{
+ DOMString buf;
+ std::vector<int>::const_iterator iter;
+ for (iter=arr.begin() ; iter!=arr.end() ; iter++)
+ {
+ int ch = *iter;
+ if (isprint(ch))
+ buf.push_back((XMLCh)ch);
+ else
+ {
+ buf.push_back('%');
+ int hi = ((ch>>4) & 0xf);
+ buf.push_back(hexChars[hi]);
+ int lo = ((ch ) & 0xf);
+ buf.push_back(hexChars[lo]);
+ }
+ }
+ return buf;
+}
+
DOMString URI::toString() const
{
if (authority.size() > 0)
{
str.append("//");
- str.append(authority);
+ str.append(toStr(authority));
}
- str.append(path);
+ str.append(toStr(path));
if (query.size() > 0)
{
str.append("?");
- str.append(query);
+ str.append(toStr(query));
}
if (fragment.size() > 0)
{
str.append("#");
- str.append(fragment);
+ str.append(toStr(fragment));
}
return str;
}
DOMString URI::getAuthority() const
{
- DOMString ret = authority;
+ DOMString ret = toStr(authority);
if (portSpecified && port>=0)
{
char buf[7];
DOMString URI::getHost() const
{
- return authority;
+ DOMString str = toStr(authority);
+ return str;
}
int URI::getPort() const
DOMString URI::getPath() const
{
- return path;
+ DOMString str = toStr(path);
+ return str;
+}
+
+DOMString URI::getNativePath() const
+{
+ DOMString pathStr = toStr(path);
+ DOMString npath;
+#ifdef __WIN32__
+ unsigned int firstChar = 0;
+ if (pathStr.size() >= 3)
+ {
+ if (pathStr[0] == '/' &&
+ isLetter(pathStr[1]) &&
+ pathStr[2] == ':')
+ firstChar++;
+ }
+ for (unsigned int i=firstChar ; i<pathStr.size() ; i++)
+ {
+ XMLCh ch = (XMLCh) pathStr[i];
+ if (ch == '/')
+ npath.push_back((XMLCh)'\\');
+ else
+ npath.push_back(ch);
+ }
+#else
+ npath = pathStr;
+#endif
+ return npath;
}
DOMString URI::getQuery() const
{
- return query;
+ DOMString str = toStr(query);
+ return str;
}
DOMString URI::getFragment() const
{
- return fragment;
+ DOMString str = toStr(fragment);
+ return str;
+}
+
+
+
+
+static int find(const std::vector<int> &str, int ch, int startpos)
+{
+ for (unsigned int i = startpos ; i < str.size() ; i++)
+ {
+ if (ch == str[i])
+ return i;
+ }
+ return -1;
+}
+
+
+static int findLast(const std::vector<int> &str, int ch)
+{
+ // TODO FIXME BUGBUG
+ // This loop appears to be infinite, so it is probably not being called.
+ // Test for a problem, then fix after it has been observed locking up.
+ for (unsigned int i = str.size()-1 ; i>=0 ; i--)
+ {
+ if (ch == str[i])
+ return i;
+ }
+ return -1;
+}
+
+
+static bool sequ(const std::vector<int> &str, char *key)
+{
+ char *c = key;
+ for (unsigned int i=0 ; i<str.size() ; i++)
+ {
+ if (! (*c))
+ return false;
+ if (*c != str[i])
+ return false;
+ }
+ return true;
+}
+
+
+static std::vector<int> substr(const std::vector<int> &str,
+ int startpos, int len)
+{
+ std::vector<int> buf;
+ unsigned int pos = startpos;
+ for (int i=0 ; i<len ; i++)
+ {
+ if (pos >= str.size())
+ break;
+ buf.push_back(str[pos++]);
+ }
+ return buf;
}
}
else
{
- unsigned int pos = path.rfind('/');
- if (pos != path.npos)
+ int pos = findLast(path, '/');
+ if (pos >= 0)
{
- DOMString tpath = path.substr(0, pos+1);
- tpath.append(other.path);
- newUri.path = tpath;
- newUri.normalize();
+ newUri.path.clear();
+ //# append my path up to and including the '/'
+ for (int i = 0; i<=pos ; i++)
+ newUri.path.push_back(path[i]);
+ //# append other path
+ for (unsigned int i = 0; i<other.path.size() ; i++)
+ newUri.path.push_back(other.path[i]);
}
+ else
+ newUri.path = other.path;
}
}
+
+ newUri.normalize();
+
return newUri;
}
*/
void URI::normalize()
{
- std::vector<DOMString> segments;
+ std::vector< std::vector<int> > segments;
//## Collect segments
if (path.size()<2)
return;
- unsigned int pos=0;
- while (pos < path.size())
+ bool abs = false;
+ int pos=0;
+ int len = (int) path.size();
+
+ if (path[0]=='/')
+ {
+ abs = true;
+ pos++;
+ }
+
+ while (pos < len)
{
- unsigned int pos2 = path.find(pos);
- if (pos2==path.npos)
+ int pos2 = find(path, '/', pos);
+ if (pos2 < 0)
+ {
+ std::vector<int> seg = substr(path, pos, path.size()-pos);
+ //printf("last segment:%s\n", toStr(seg).c_str());
+ segments.push_back(seg);
break;
+ }
if (pos2>pos)
{
- DOMString seg = path.substr(pos, pos2);
+ std::vector<int> seg = substr(path, pos, pos2-pos);
+ //printf("segment:%s\n", toStr(seg).c_str());
segments.push_back(seg);
}
pos = pos2;
//## Clean up (normalize) segments
bool edited = false;
- std::vector<DOMString>::iterator iter;
+ std::vector< std::vector<int> >::iterator iter;
for (iter=segments.begin() ; iter!=segments.end() ; )
{
- DOMString s = *iter;
- if (s == ".")
+ std::vector<int> s = *iter;
+ if (sequ(s,"."))
{
iter = segments.erase(iter);
edited = true;
}
- else if (s == ".." &&
- iter != segments.begin() &&
- *(iter-1) != "..")
+ else if (sequ(s, "..") && iter != segments.begin() &&
+ !sequ(*(iter-1), ".."))
{
iter--; //back up, then erase two entries
iter = segments.erase(iter);
if (edited)
{
path.clear();
- if (absolute)
- path.append("/");
- std::vector<DOMString>::iterator iter;
+ if (abs)
+ {
+ path.push_back('/');
+ }
+ std::vector< std::vector<int> >::iterator iter;
for (iter=segments.begin() ; iter!=segments.end() ; iter++)
{
- path.append(*iter);
- path.append("/");
+ if (iter != segments.begin())
+ path.push_back('/');
+ std::vector<int> seg = *iter;
+ for (unsigned int i = 0; i<seg.size() ; i++)
+ path.push_back(seg[i]);
}
}
-int URI::match(int p0, char *key)
+int URI::match(int p0, char const *key)
{
int p = p0;
while (p < parselen)
//# http://www.gbiv.com/protocols/uri/rfc/rfc3986.html#components
//#########################################################################
+int URI::parseHex(int p0, int &result)
+{
+ int p = p0;
+ int val = 0;
+
+ //# Upper 4
+ int ch = peek(p);
+ if (ch >= '0' && ch <= '9')
+ val += (ch - '0');
+ else if (ch >= 'a' && ch <= 'f')
+ val += (10 + ch - 'a');
+ else if (ch >= 'A' && ch <= 'F')
+ val += (10 + ch - 'A');
+ else
+ {
+ error("parseHex : unexpected character : %c", ch);
+ return -1;
+ }
+ p++;
+ val <<= 4;
+
+ //# Lower 4
+ ch = peek(p);
+ if (ch >= '0' && ch <= '9')
+ val += (ch - '0');
+ else if (ch >= 'a' && ch <= 'f')
+ val += (10 + ch - 'a');
+ else if (ch >= 'A' && ch <= 'F')
+ val += (10 + ch - 'A');
+ else
+ {
+ error("parseHex : unexpected character : %c", ch);
+ return -1;
+ }
+ p++;
+ result = val;
+ return p;
+}
+
+
+
+int URI::parseEntity(int p0, int &result)
+{
+ int p = p0;
+ int ch = peek(p);
+ if (ch != '&')
+ return p0;
+ p++;
+ if (!match(p, "#x"))
+ {
+ error("parseEntity: expected '#x'");
+ return -1;
+ }
+ p += 2;
+ int val;
+ p = parseHex(p, val);
+ if (p<0)
+ return -1;
+ ch = peek(p);
+ if (ch != ';')
+ {
+ error("parseEntity: expected ';'");
+ return -1;
+ }
+ p++;
+ result = val;
+ return p;
+}
+
+int URI::parseAsciiEntity(int p0, int &result)
+{
+ int p = p0;
+ int ch = peek(p);
+ if (ch != '%')
+ return p0;
+ p++;
+ int val;
+ p = parseHex(p, val);
+ if (p<0)
+ return -1;
+ result = val;
+ return p;
+}
+
+
int URI::parseScheme(int p0)
{
int p = p0;
ch = peek(p);
if (ch == '/')
break;
+ else if (ch == '&') //IRI entity
+ {
+ int val;
+ p2 = parseEntity(p, val);
+ if (p2<p)
+ {
+ return -1;
+ }
+ p = p2;
+ authority.push_back((XMLCh)val);
+ }
+ else if (ch == '%') //ascii hex excape
+ {
+ int val;
+ p2 = parseAsciiEntity(p, val);
+ if (p2<p)
+ {
+ return -1;
+ }
+ p = p2;
+ authority.push_back((XMLCh)val);
+ }
else if (ch == ':')
+ {
portSpecified = true;
+ p++;
+ }
else if (portSpecified)
+ {
portStr.push_back((XMLCh)ch);
+ p++;
+ }
else
+ {
authority.push_back((XMLCh)ch);
- p++;
+ p++;
+ }
}
if (portStr.size() > 0)
{
//# Are we absolute?
ch = peek(p);
- if (ch == '/')
+ if (isLetter(ch) && peek(p+1)==':')
+ {
+ absolute = true;
+ path.push_back((XMLCh)'/');
+ }
+ else if (ch == '/')
{
absolute = true;
if (p>p0) //in other words, if '/' is not the first char
ch = peek(p);
if (ch == '?' || ch == '#')
break;
- path.push_back((XMLCh)ch);
- p++;
+ else if (ch == '&') //IRI entity
+ {
+ int val;
+ p2 = parseEntity(p, val);
+ if (p2<p)
+ {
+ return -1;
+ }
+ p = p2;
+ path.push_back((XMLCh)val);
+ }
+ else if (ch == '%') //ascii hex excape
+ {
+ int val;
+ p2 = parseAsciiEntity(p, val);
+ if (p2<p)
+ {
+ return -1;
+ }
+ p = p2;
+ path.push_back((XMLCh)val);
+ }
+ else
+ {
+ path.push_back((XMLCh)ch);
+ p++;
+ }
}
-
+ //trace("path:%s", toStr(path).c_str());
return p;
}
ch = peek(p);
if (ch == '?')
break;
- fragment.push_back((XMLCh)ch);
+ fragment.push_back(ch);
p++;
}
{
parselen = str.size();
- DOMString tmp = str;
- parsebuf = (char *) tmp.c_str();
+ parsebuf = new int[str.size()];
+ if (!parsebuf)
+ {
+ error("parse : could not allocate parsebuf");
+ return false;
+ }
+
+ DOMString::const_iterator iter;
+ unsigned int i=0;
+ for (iter= str.begin() ; iter!=str.end() ; iter++)
+ {
+ int ch = *iter;
+ if (ch == '\\')
+ parsebuf[i++] = '/';
+ else
+ parsebuf[i++] = ch;
+ }
int p = parse(0);
+ normalize();
+
+ delete[] parsebuf;
if (p < 0)
{
}
//printf("uri:%s\n", toString().c_str());
- //printf("path:%s\n", path.c_str());
+ //printf("parse:%s\n", toStr(path).c_str());
return true;