1 /**
2 * Phoebe DOM Implementation.
3 *
4 * This is a C++ approximation of the W3C DOM model, which follows
5 * fairly closely the specifications in the various .idl files, copies of
6 * which are provided for reference. Most important is this one:
7 *
8 * http://www.w3.org/TR/2004/REC-DOM-Level-3-Core-20040407/idl-definitions.html
9 *
10 * Authors:
11 * Bob Jamison
12 *
13 * Copyright (C) 2005 Bob Jamison
14 *
15 * This library is free software; you can redistribute it and/or
16 * modify it under the terms of the GNU Lesser General Public
17 * License as published by the Free Software Foundation; either
18 * version 2.1 of the License, or (at your option) any later version.
19 *
20 * This library is distributed in the hope that it will be useful,
21 * but WITHOUT ANY WARRANTY; without even the implied warranty of
22 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
23 * Lesser General Public License for more details.
24 *
25 * You should have received a copy of the GNU Lesser General Public
26 * License along with this library; if not, write to the Free Software
27 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
28 */
33 #include "uri.h"
34 #include "charclass.h"
36 #include <stdio.h>
37 #include <stdarg.h>
41 namespace org
42 {
43 namespace w3c
44 {
45 namespace dom
46 {
49 typedef struct
50 {
51 int ival;
52 char *sval;
53 int port;
54 } LookupEntry;
56 LookupEntry schemes[] =
57 {
58 { URI::SCHEME_DATA, "data:", 0 },
59 { URI::SCHEME_HTTP, "http:", 80 },
60 { URI::SCHEME_HTTPS, "https:", 443 },
61 { URI::SCHEME_FTP, "ftp", 12 },
62 { URI::SCHEME_FILE, "file:", 0 },
63 { URI::SCHEME_LDAP, "ldap:", 123 },
64 { URI::SCHEME_MAILTO, "mailto:", 25 },
65 { URI::SCHEME_NEWS, "news:", 117 },
66 { URI::SCHEME_TELNET, "telnet:", 23 },
67 { 0, NULL, 0 }
68 };
72 //#########################################################################
73 //# C O N S T R U C T O R
74 //#########################################################################
76 /**
77 *
78 */
79 URI::URI()
80 {
81 init();
82 }
84 /**
85 *
86 */
87 URI::URI(const DOMString &str)
88 {
89 init();
90 parse(str);
91 }
94 /**
95 *
96 */
97 URI::URI(const char *str)
98 {
99 init();
100 DOMString domStr = str;
101 parse(domStr);
102 }
105 /**
106 *
107 */
108 URI::URI(const URI &other)
109 {
110 init();
111 scheme = other.scheme;
112 schemeStr = other.schemeStr;
113 authority = other.authority;
114 port = other.port;
115 path = other.path;
116 absolute = other.absolute;
117 query = other.query;
118 fragment = other.fragment;
119 }
122 /**
123 *
124 */
125 URI::~URI()
126 {
127 }
133 /**
134 *
135 */
136 void URI::init()
137 {
138 parsebuf = NULL;
139 parselen = 0;
140 scheme = SCHEME_NONE;
141 schemeStr = "";
142 port = 0;
143 authority = "";
144 path = "";
145 absolute = false;
146 query = "";
147 fragment = "";
148 }
152 //#########################################################################
153 //#A T T R I B U T E S
154 //#########################################################################
156 DOMString URI::toString() const
157 {
158 DOMString str = schemeStr;
159 if (authority.size() > 0)
160 {
161 str.append("//");
162 str.append(authority);
163 }
164 str.append(path);
165 if (query.size() > 0)
166 {
167 str.append("?");
168 str.append(query);
169 }
170 if (fragment.size() > 0)
171 {
172 str.append("#");
173 str.append(fragment);
174 }
175 return str;
176 }
179 int URI::getScheme() const
180 {
181 return scheme;
182 }
184 DOMString URI::getSchemeStr() const
185 {
186 return schemeStr;
187 }
190 DOMString URI::getAuthority() const
191 {
192 DOMString ret = authority;
193 if (portSpecified && port>=0)
194 {
195 char buf[7];
196 snprintf(buf, 6, ":%6d", port);
197 ret.append(buf);
198 }
199 return ret;
200 }
202 DOMString URI::getHost() const
203 {
204 return authority;
205 }
207 int URI::getPort() const
208 {
209 return port;
210 }
213 DOMString URI::getPath() const
214 {
215 return path;
216 }
219 bool URI::getIsAbsolute() const
220 {
221 return absolute;
222 }
225 DOMString URI::getQuery() const
226 {
227 return query;
228 }
231 DOMString URI::getFragment() const
232 {
233 return fragment;
234 }
238 //#########################################################################
239 //# M E S S A G E S
240 //#########################################################################
242 void URI::error(const char *fmt, ...)
243 {
244 va_list args;
245 fprintf(stderr, "URI error: ");
246 va_start(args, fmt);
247 vfprintf(stderr, fmt, args);
248 va_end(args);
249 fprintf(stderr, "\n");
250 }
252 void URI::trace(const char *fmt, ...)
253 {
254 va_list args;
255 fprintf(stdout, "URI: ");
256 va_start(args, fmt);
257 vfprintf(stdout, fmt, args);
258 va_end(args);
259 fprintf(stdout, "\n");
260 }
264 //#########################################################################
265 //# P A R S I N G
266 //#########################################################################
270 int URI::peek(int p)
271 {
272 if (p<0 || p>=parselen)
273 return -1;
274 return parsebuf[p];
275 }
279 int URI::match(int p0, char *key)
280 {
281 int p = p0;
282 while (p < parselen)
283 {
284 if (*key == '\0')
285 return p;
286 else if (*key != parsebuf[p])
287 break;
288 p++; key++;
289 }
290 return p0;
291 }
293 //#########################################################################
294 //# Parsing is performed according to:
295 //# http://www.gbiv.com/protocols/uri/rfc/rfc3986.html#components
296 //#########################################################################
298 int URI::parseScheme(int p0)
299 {
300 int p = p0;
301 for (LookupEntry *entry = schemes; entry->sval ; entry++)
302 {
303 int p2 = match(p, entry->sval);
304 if (p2 > p)
305 {
306 schemeStr = entry->sval;
307 scheme = entry->ival;
308 port = entry->port;
309 p = p2;
310 return p;
311 }
312 }
314 return p;
315 }
318 int URI::parseHierarchicalPart(int p0)
319 {
320 int p = p0;
321 int ch;
323 //# Authority field (host and port, for example)
324 int p2 = match(p, "//");
325 if (p2 > p)
326 {
327 p = p2;
328 portSpecified = false;
329 DOMString portStr;
330 while (p < parselen)
331 {
332 ch = peek(p);
333 if (ch == '/')
334 break;
335 else if (ch == ':')
336 portSpecified = true;
337 else if (portSpecified)
338 portStr.push_back(ch);
339 else
340 authority.push_back(ch);
341 p++;
342 }
343 if (portStr.size() > 0)
344 {
345 char *pstr = (char *)portStr.c_str();
346 char *endStr;
347 long val = strtol(pstr, &endStr, 10);
348 if (endStr > pstr) //successful parse?
349 port = val;
350 }
351 }
353 //# Are we absolute?
354 ch = peek(p);
355 if (ch == '/')
356 {
357 absolute = true;
358 path.push_back(ch);
359 p++;
360 }
362 while (p < parselen)
363 {
364 ch = peek(p);
365 if (ch == '?' || ch == '#')
366 break;
367 path.push_back(ch);
368 p++;
369 }
371 return p;
372 }
374 int URI::parseQuery(int p0)
375 {
376 int p = p0;
377 int ch = peek(p);
378 if (ch != '?')
379 return p0;
381 p++;
382 while (p < parselen)
383 {
384 ch = peek(p);
385 if (ch == '#')
386 break;
387 query.push_back(ch);
388 p++;
389 }
392 return p;
393 }
395 int URI::parseFragment(int p0)
396 {
398 int p = p0;
399 int ch = peek(p);
400 if (ch != '#')
401 return p0;
403 p++;
404 while (p < parselen)
405 {
406 ch = peek(p);
407 if (ch == '?')
408 break;
409 fragment.push_back(ch);
410 p++;
411 }
414 return p;
415 }
418 int URI::parse(int p0)
419 {
421 int p = p0;
423 int p2 = parseScheme(p);
424 if (p2 < 0)
425 {
426 error("Scheme");
427 return -1;
428 }
429 p = p2;
432 p2 = parseHierarchicalPart(p);
433 if (p2 < 0)
434 {
435 error("Hierarchical part");
436 return -1;
437 }
438 p = p2;
440 p2 = parseQuery(p);
441 if (p2 < 0)
442 {
443 error("Query");
444 return -1;
445 }
446 p = p2;
449 p2 = parseFragment(p);
450 if (p2 < 0)
451 {
452 error("Fragment");
453 return -1;
454 }
455 p = p2;
457 return p;
459 }
463 bool URI::parse(const DOMString &str)
464 {
466 parselen = str.size();
467 DOMString tmp = str;
468 parsebuf = (char *) tmp.c_str();
471 int p = parse(0);
473 if (p < 0)
474 {
475 error("Syntax error");
476 return false;
477 }
479 //printf("uri:%s\n", toString().c_str());
480 //printf("path:%s\n", path.c_str());
482 return true;
484 }
490 } //namespace dom
491 } //namespace w3c
492 } //namespace org
493 //#########################################################################
494 //# E N D O F F I L E
495 //#########################################################################