1 /**
2 * Phoebe DOM Implementation.
3 *
4 * This is a C++ approximation of the W3C DOM model, which follows
5 * fairly closely the specifications in the various .idl files, copies of
6 * which are provided for reference. Most important is this one:
7 *
8 * http://www.w3.org/TR/2004/REC-DOM-Level-3-Core-20040407/idl-definitions.html
9 *
10 * Authors:
11 * Bob Jamison
12 *
13 * Copyright (C) 2005 Bob Jamison
14 *
15 * This library is free software; you can redistribute it and/or
16 * modify it under the terms of the GNU Lesser General Public
17 * License as published by the Free Software Foundation; either
18 * version 2.1 of the License, or (at your option) any later version.
19 *
20 * This library is distributed in the hope that it will be useful,
21 * but WITHOUT ANY WARRANTY; without even the implied warranty of
22 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
23 * Lesser General Public License for more details.
24 *
25 * You should have received a copy of the GNU Lesser General Public
26 * License along with this library; if not, write to the Free Software
27 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
28 */
33 #include "uri.h"
35 #include <stdarg.h>
39 namespace org
40 {
41 namespace w3c
42 {
43 namespace dom
44 {
47 typedef struct
48 {
49 int ival;
50 char *sval;
51 } LookupEntry;
53 LookupEntry schemes[] =
54 {
55 { URI::SCHEME_DATA, "data:" },
56 { URI::SCHEME_HTTP, "http:" },
57 { URI::SCHEME_FTP, "ftp" },
58 { URI::SCHEME_FILE, "file:" },
59 { URI::SCHEME_LDAP, "ldap:" },
60 { URI::SCHEME_MAILTO, "mailto:" },
61 { URI::SCHEME_NEWS, "news:" },
62 { URI::SCHEME_TELNET, "telnet:" },
63 { 0, NULL }
64 };
68 //#########################################################################
69 //# C O N S T R U C T O R
70 //#########################################################################
72 /**
73 *
74 */
75 URI::URI()
76 {
77 init();
78 }
80 /**
81 *
82 */
83 URI::URI(const DOMString &str)
84 {
85 init();
86 parse(str);
87 }
90 /**
91 *
92 */
93 URI::URI(const char *str)
94 {
95 init();
96 DOMString domStr = str;
97 parse(domStr);
98 }
101 /**
102 *
103 */
104 URI::URI(const URI &other)
105 {
106 init();
107 scheme = other.scheme;
108 schemeStr = other.schemeStr;
109 authority = other.authority;
110 path = other.path;
111 absolute = other.absolute;
112 query = other.query;
113 fragment = other.fragment;
114 }
117 /**
118 *
119 */
120 URI::~URI()
121 {
122 }
128 /**
129 *
130 */
131 void URI::init()
132 {
133 parsebuf = NULL;
134 parselen = 0;
135 scheme = SCHEME_NONE;
136 schemeStr = "";
137 authority = "";
138 path = "";
139 absolute = false;
140 query = "";
141 fragment = "";
142 }
146 //#########################################################################
147 //#A T T R I B U T E S
148 //#########################################################################
150 DOMString URI::toString()
151 {
152 DOMString str = schemeStr;
153 if (authority.size()>0)
154 {
155 str.append("//");
156 str.append(authority);
157 }
158 str.append(path);
159 if (query.size() > 0)
160 {
161 str.append("?");
162 str.append(query);
163 }
164 if (fragment.size() > 0)
165 {
166 str.append("#");
167 str.append(fragment);
168 }
169 return str;
170 }
173 int URI::getScheme()
174 {
175 return scheme;
176 }
178 DOMString URI::getSchemeStr()
179 {
180 return schemeStr;
181 }
184 DOMString URI::getAuthority()
185 {
186 return authority;
187 }
190 DOMString URI::getPath()
191 {
192 return path;
193 }
196 bool URI::getIsAbsolute()
197 {
198 return absolute;
199 }
202 DOMString URI::getQuery()
203 {
204 return query;
205 }
208 DOMString URI::getFragment()
209 {
210 return fragment;
211 }
215 //#########################################################################
216 //# M E S S A G E S
217 //#########################################################################
219 void URI::error(const char *fmt, ...)
220 {
221 va_list args;
222 fprintf(stderr, "URI error: ");
223 va_start(args, fmt);
224 vfprintf(stderr, fmt, args);
225 va_end(args);
226 fprintf(stderr, "\n");
227 }
229 void URI::trace(const char *fmt, ...)
230 {
231 va_list args;
232 fprintf(stdout, "URI: ");
233 va_start(args, fmt);
234 vfprintf(stdout, fmt, args);
235 va_end(args);
236 fprintf(stdout, "\n");
237 }
241 //#########################################################################
242 //# P A R S I N G
243 //#########################################################################
247 int URI::peek(int p)
248 {
249 if (p<0 || p>=parselen)
250 return -1;
251 return parsebuf[p];
252 }
256 int URI::match(int p0, char *key)
257 {
258 int p = p0;
259 while (p < parselen)
260 {
261 if (*key != parsebuf[p])
262 break;
263 p++; key++;
264 }
265 return p;
266 }
268 //#########################################################################
269 //# Parsing is performed according to:
270 //# http://www.gbiv.com/protocols/uri/rfc/rfc3986.html#components
271 //#########################################################################
273 int URI::parseScheme(int p0)
274 {
275 int p = p0;
276 for (LookupEntry *entry = schemes; entry->sval ; entry++)
277 {
278 int p2 = match(p, entry->sval);
279 if (p2 > p)
280 {
281 schemeStr = entry->sval;
282 scheme = entry->ival;
283 p = p2;
284 return p;
285 }
286 }
288 return p;
289 }
292 int URI::parseHierarchicalPart(int p0)
293 {
294 int p = p0;
295 int ch;
297 //# Authority field (host and port, for example)
298 int p2 = match(p, "//");
299 if (p2 > p)
300 {
301 p = p2;
302 while (p < parselen)
303 {
304 ch = peek(p);
305 if (ch == '/')
306 break;
307 authority.push_back(ch);
308 p++;
309 }
310 }
312 //# Are we absolute?
313 ch = peek(p);
314 if (ch == '/')
315 {
316 absolute = true;
317 path.push_back(ch);
318 p++;
319 }
321 while (p < parselen)
322 {
323 ch = peek(p);
324 if (ch == '?' || ch == '#')
325 break;
326 path.push_back(ch);
327 p++;
328 }
330 return p;
331 }
333 int URI::parseQuery(int p0)
334 {
335 int p = p0;
336 int ch = peek(p);
337 if (ch != '?')
338 return p0;
340 p++;
341 while (p < parselen)
342 {
343 ch = peek(p);
344 if (ch == '#')
345 break;
346 query.push_back(ch);
347 p++;
348 }
351 return p;
352 }
354 int URI::parseFragment(int p0)
355 {
357 int p = p0;
358 int ch = peek(p);
359 if (ch != '#')
360 return p0;
362 p++;
363 while (p < parselen)
364 {
365 ch = peek(p);
366 if (ch == '?')
367 break;
368 fragment.push_back(ch);
369 p++;
370 }
373 return p;
374 }
377 int URI::parse(int p0)
378 {
380 int p = p0;
382 int p2 = parseScheme(p);
383 if (p2 < 0)
384 {
385 error("Scheme");
386 return -1;
387 }
388 p = p2;
391 p2 = parseHierarchicalPart(p);
392 if (p2 < 0)
393 {
394 error("Hierarchical part");
395 return -1;
396 }
397 p = p2;
399 p2 = parseQuery(p);
400 if (p2 < 0)
401 {
402 error("Query");
403 return -1;
404 }
405 p = p2;
408 p2 = parseFragment(p);
409 if (p2 < 0)
410 {
411 error("Fragment");
412 return -1;
413 }
414 p = p2;
416 return p;
418 }
422 bool URI::parse(const DOMString &str)
423 {
425 parselen = str.size();
426 DOMString tmp = str;
427 parsebuf = (char *) tmp.c_str();
430 int p = parse(0);
432 if (p < 0)
433 {
434 error("Syntax error");
435 return false;
436 }
438 return true;
440 }
446 } //namespace dom
447 } //namespace w3c
448 } //namespace org
449 //#########################################################################
450 //# E N D O F F I L E
451 //#########################################################################