286857e4177c7c87bb160991f90fd613f3f5f729
1 /**
2 * Phoebe DOM Implementation.
3 *
4 * This is a C++ approximation of the W3C DOM model, which follows
5 * fairly closely the specifications in the various .idl files, copies of
6 * which are provided for reference. Most important is this one:
7 *
8 * http://www.w3.org/TR/2004/REC-DOM-Level-3-Core-20040407/idl-definitions.html
9 *
10 * Authors:
11 * Bob Jamison
12 *
13 * Copyright (C) 2005 Bob Jamison
14 *
15 * This library is free software; you can redistribute it and/or
16 * modify it under the terms of the GNU Lesser General Public
17 * License as published by the Free Software Foundation; either
18 * version 2.1 of the License, or (at your option) any later version.
19 *
20 * This library is distributed in the hope that it will be useful,
21 * but WITHOUT ANY WARRANTY; without even the implied warranty of
22 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
23 * Lesser General Public License for more details.
24 *
25 * You should have received a copy of the GNU Lesser General Public
26 * License along with this library; if not, write to the Free Software
27 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
28 */
33 #include "uri.h"
34 #include "charclass.h"
36 #include <stdio.h>
37 #include <stdarg.h>
41 namespace org
42 {
43 namespace w3c
44 {
45 namespace dom
46 {
49 typedef struct
50 {
51 int ival;
52 char *sval;
53 int port;
54 } LookupEntry;
56 LookupEntry schemes[] =
57 {
58 { URI::SCHEME_DATA, "data:", 0 },
59 { URI::SCHEME_HTTP, "http:", 80 },
60 { URI::SCHEME_HTTPS, "https:", 443 },
61 { URI::SCHEME_FTP, "ftp", 12 },
62 { URI::SCHEME_FILE, "file:", 0 },
63 { URI::SCHEME_LDAP, "ldap:", 123 },
64 { URI::SCHEME_MAILTO, "mailto:", 25 },
65 { URI::SCHEME_NEWS, "news:", 117 },
66 { URI::SCHEME_TELNET, "telnet:", 23 },
67 { 0, NULL, 0 }
68 };
72 //#########################################################################
73 //# C O N S T R U C T O R
74 //#########################################################################
76 /**
77 *
78 */
79 URI::URI()
80 {
81 init();
82 }
84 /**
85 *
86 */
87 URI::URI(const DOMString &str)
88 {
89 init();
90 parse(str);
91 }
94 /**
95 *
96 */
97 URI::URI(const char *str)
98 {
99 init();
100 DOMString domStr = str;
101 parse(domStr);
102 }
105 /**
106 *
107 */
108 URI::URI(const URI &other)
109 {
110 init();
111 assign(other);
112 }
115 /**
116 *
117 */
118 URI &URI::operator=(const URI &other)
119 {
120 init();
121 assign(other);
122 return *this;
123 }
126 /**
127 *
128 */
129 URI::~URI()
130 {
131 }
137 /**
138 *
139 */
140 void URI::init()
141 {
142 parsebuf = NULL;
143 parselen = 0;
144 scheme = SCHEME_NONE;
145 schemeStr = "";
146 port = 0;
147 authority = "";
148 path = "";
149 absolute = false;
150 opaque = false;
151 query = "";
152 fragment = "";
153 }
156 /**
157 *
158 */
159 void URI::assign(const URI &other)
160 {
161 scheme = other.scheme;
162 schemeStr = other.schemeStr;
163 authority = other.authority;
164 port = other.port;
165 path = other.path;
166 absolute = other.absolute;
167 opaque = other.opaque;
168 query = other.query;
169 fragment = other.fragment;
170 }
173 //#########################################################################
174 //#A T T R I B U T E S
175 //#########################################################################
177 DOMString URI::toString() const
178 {
179 DOMString str = schemeStr;
180 if (authority.size() > 0)
181 {
182 str.append("//");
183 str.append(authority);
184 }
185 str.append(path);
186 if (query.size() > 0)
187 {
188 str.append("?");
189 str.append(query);
190 }
191 if (fragment.size() > 0)
192 {
193 str.append("#");
194 str.append(fragment);
195 }
196 return str;
197 }
200 int URI::getScheme() const
201 {
202 return scheme;
203 }
205 DOMString URI::getSchemeStr() const
206 {
207 return schemeStr;
208 }
211 DOMString URI::getAuthority() const
212 {
213 DOMString ret = authority;
214 if (portSpecified && port>=0)
215 {
216 char buf[7];
217 snprintf(buf, 6, ":%6d", port);
218 ret.append(buf);
219 }
220 return ret;
221 }
223 DOMString URI::getHost() const
224 {
225 return authority;
226 }
228 int URI::getPort() const
229 {
230 return port;
231 }
234 DOMString URI::getPath() const
235 {
236 return path;
237 }
240 bool URI::isAbsolute() const
241 {
242 return absolute;
243 }
245 bool URI::isOpaque() const
246 {
247 return opaque;
248 }
251 DOMString URI::getQuery() const
252 {
253 return query;
254 }
257 DOMString URI::getFragment() const
258 {
259 return fragment;
260 }
263 URI URI::resolve(const URI &other) const
264 {
265 //### According to w3c, this is handled in 3 cases
267 //## 1
268 if (opaque || other.isAbsolute())
269 return other;
271 //## 2
272 if (other.fragment.size() > 0 &&
273 other.path.size() == 0 &&
274 other.scheme == SCHEME_NONE &&
275 other.authority.size() == 0 &&
276 other.query.size() == 0 )
277 {
278 URI fragUri = *this;
279 fragUri.fragment = other.fragment;
280 return fragUri;
281 }
283 //## 3 http://www.ietf.org/rfc/rfc2396.txt, section 5.2
284 URI newUri;
285 //# 3.1
286 newUri.scheme = scheme;
287 newUri.schemeStr = schemeStr;
288 newUri.query = other.query;
289 newUri.fragment = other.fragment;
290 if (other.authority.size() > 0)
291 {
292 //# 3.2
293 if (absolute || other.absolute)
294 newUri.absolute = true;
295 newUri.authority = other.authority;
296 newUri.port = other.port;//part of authority
297 newUri.path = other.path;
298 }
299 else
300 {
301 //# 3.3
302 if (other.absolute)
303 {
304 newUri.absolute = true;
305 newUri.path = other.path;
306 }
307 else
308 {
309 unsigned int pos = path.rfind('/');
310 if (pos != path.npos)
311 {
312 DOMString tpath = path.substr(pos);
313 tpath.append(other.path);
314 newUri.path = tpath;
315 newUri.normalize();
316 }
317 }
318 }
319 return newUri;
320 }
323 /**
324 *
325 */
326 void URI::normalize() const
327 {
334 }
338 //#########################################################################
339 //# M E S S A G E S
340 //#########################################################################
342 void URI::error(const char *fmt, ...)
343 {
344 va_list args;
345 fprintf(stderr, "URI error: ");
346 va_start(args, fmt);
347 vfprintf(stderr, fmt, args);
348 va_end(args);
349 fprintf(stderr, "\n");
350 }
352 void URI::trace(const char *fmt, ...)
353 {
354 va_list args;
355 fprintf(stdout, "URI: ");
356 va_start(args, fmt);
357 vfprintf(stdout, fmt, args);
358 va_end(args);
359 fprintf(stdout, "\n");
360 }
364 //#########################################################################
365 //# P A R S I N G
366 //#########################################################################
370 int URI::peek(int p)
371 {
372 if (p<0 || p>=parselen)
373 return -1;
374 return parsebuf[p];
375 }
379 int URI::match(int p0, char *key)
380 {
381 int p = p0;
382 while (p < parselen)
383 {
384 if (*key == '\0')
385 return p;
386 else if (*key != parsebuf[p])
387 break;
388 p++; key++;
389 }
390 return p0;
391 }
393 //#########################################################################
394 //# Parsing is performed according to:
395 //# http://www.gbiv.com/protocols/uri/rfc/rfc3986.html#components
396 //#########################################################################
398 int URI::parseScheme(int p0)
399 {
400 int p = p0;
401 for (LookupEntry *entry = schemes; entry->sval ; entry++)
402 {
403 int p2 = match(p, entry->sval);
404 if (p2 > p)
405 {
406 schemeStr = entry->sval;
407 scheme = entry->ival;
408 port = entry->port;
409 p = p2;
410 return p;
411 }
412 }
414 return p;
415 }
418 int URI::parseHierarchicalPart(int p0)
419 {
420 int p = p0;
421 int ch;
423 //# Authority field (host and port, for example)
424 int p2 = match(p, "//");
425 if (p2 > p)
426 {
427 p = p2;
428 portSpecified = false;
429 DOMString portStr;
430 while (p < parselen)
431 {
432 ch = peek(p);
433 if (ch == '/')
434 break;
435 else if (ch == ':')
436 portSpecified = true;
437 else if (portSpecified)
438 portStr.push_back((XMLCh)ch);
439 else
440 authority.push_back((XMLCh)ch);
441 p++;
442 }
443 if (portStr.size() > 0)
444 {
445 char *pstr = (char *)portStr.c_str();
446 char *endStr;
447 long val = strtol(pstr, &endStr, 10);
448 if (endStr > pstr) //successful parse?
449 port = val;
450 }
451 }
453 //# Are we absolute?
454 ch = peek(p);
455 if (ch == '/')
456 {
457 absolute = true;
458 if (p>p0) //in other words, if '/' is not the first char
459 opaque = true;
460 path.push_back((XMLCh)ch);
461 p++;
462 }
464 while (p < parselen)
465 {
466 ch = peek(p);
467 if (ch == '?' || ch == '#')
468 break;
469 path.push_back((XMLCh)ch);
470 p++;
471 }
473 return p;
474 }
476 int URI::parseQuery(int p0)
477 {
478 int p = p0;
479 int ch = peek(p);
480 if (ch != '?')
481 return p0;
483 p++;
484 while (p < parselen)
485 {
486 ch = peek(p);
487 if (ch == '#')
488 break;
489 query.push_back((XMLCh)ch);
490 p++;
491 }
494 return p;
495 }
497 int URI::parseFragment(int p0)
498 {
500 int p = p0;
501 int ch = peek(p);
502 if (ch != '#')
503 return p0;
505 p++;
506 while (p < parselen)
507 {
508 ch = peek(p);
509 if (ch == '?')
510 break;
511 fragment.push_back((XMLCh)ch);
512 p++;
513 }
516 return p;
517 }
520 int URI::parse(int p0)
521 {
523 int p = p0;
525 int p2 = parseScheme(p);
526 if (p2 < 0)
527 {
528 error("Scheme");
529 return -1;
530 }
531 p = p2;
534 p2 = parseHierarchicalPart(p);
535 if (p2 < 0)
536 {
537 error("Hierarchical part");
538 return -1;
539 }
540 p = p2;
542 p2 = parseQuery(p);
543 if (p2 < 0)
544 {
545 error("Query");
546 return -1;
547 }
548 p = p2;
551 p2 = parseFragment(p);
552 if (p2 < 0)
553 {
554 error("Fragment");
555 return -1;
556 }
557 p = p2;
559 return p;
561 }
565 bool URI::parse(const DOMString &str)
566 {
568 parselen = str.size();
569 DOMString tmp = str;
570 parsebuf = (char *) tmp.c_str();
573 int p = parse(0);
575 if (p < 0)
576 {
577 error("Syntax error");
578 return false;
579 }
581 //printf("uri:%s\n", toString().c_str());
582 //printf("path:%s\n", path.c_str());
584 return true;
586 }
592 } //namespace dom
593 } //namespace w3c
594 } //namespace org
595 //#########################################################################
596 //# E N D O F F I L E
597 //#########################################################################