59a965b8d942b84ab1c6ee8e95053d3e7ce1c02a
1 /**
2 * Phoebe DOM Implementation.
3 *
4 * This is a C++ approximation of the W3C DOM model, which follows
5 * fairly closely the specifications in the various .idl files, copies of
6 * which are provided for reference. Most important is this one:
7 *
8 * http://www.w3.org/TR/2004/REC-DOM-Level-3-Core-20040407/idl-definitions.html
9 *
10 * Authors:
11 * Bob Jamison
12 *
13 * Copyright (C) 2005 Bob Jamison
14 *
15 * This library is free software; you can redistribute it and/or
16 * modify it under the terms of the GNU Lesser General Public
17 * License as published by the Free Software Foundation; either
18 * version 2.1 of the License, or (at your option) any later version.
19 *
20 * This library is distributed in the hope that it will be useful,
21 * but WITHOUT ANY WARRANTY; without even the implied warranty of
22 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
23 * Lesser General Public License for more details.
24 *
25 * You should have received a copy of the GNU Lesser General Public
26 * License along with this library; if not, write to the Free Software
27 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
28 */
33 #include "uri.h"
34 #include "charclass.h"
36 #include <stdio.h>
37 #include <stdarg.h>
41 namespace org
42 {
43 namespace w3c
44 {
45 namespace dom
46 {
49 typedef struct
50 {
51 int ival;
52 char *sval;
53 int port;
54 } LookupEntry;
56 LookupEntry schemes[] =
57 {
58 { URI::SCHEME_DATA, "data:", 0 },
59 { URI::SCHEME_HTTP, "http:", 80 },
60 { URI::SCHEME_HTTPS, "https:", 443 },
61 { URI::SCHEME_FTP, "ftp", 12 },
62 { URI::SCHEME_FILE, "file:", 0 },
63 { URI::SCHEME_LDAP, "ldap:", 123 },
64 { URI::SCHEME_MAILTO, "mailto:", 25 },
65 { URI::SCHEME_NEWS, "news:", 117 },
66 { URI::SCHEME_TELNET, "telnet:", 23 },
67 { 0, NULL, 0 }
68 };
72 //#########################################################################
73 //# C O N S T R U C T O R
74 //#########################################################################
76 /**
77 *
78 */
79 URI::URI()
80 {
81 init();
82 }
84 /**
85 *
86 */
87 URI::URI(const DOMString &str)
88 {
89 init();
90 parse(str);
91 }
94 /**
95 *
96 */
97 URI::URI(const char *str)
98 {
99 init();
100 DOMString domStr = str;
101 parse(domStr);
102 }
105 /**
106 *
107 */
108 URI::URI(const URI &other)
109 {
110 init();
111 assign(other);
112 }
115 /**
116 *
117 */
118 URI &URI::operator=(const URI &other)
119 {
120 init();
121 assign(other);
122 return *this;
123 }
126 /**
127 *
128 */
129 URI::~URI()
130 {
131 }
137 /**
138 *
139 */
140 void URI::init()
141 {
142 parsebuf = NULL;
143 parselen = 0;
144 scheme = SCHEME_NONE;
145 schemeStr = "";
146 port = 0;
147 authority = "";
148 path = "";
149 absolute = false;
150 opaque = false;
151 query = "";
152 fragment = "";
153 }
156 /**
157 *
158 */
159 void URI::assign(const URI &other)
160 {
161 scheme = other.scheme;
162 schemeStr = other.schemeStr;
163 authority = other.authority;
164 port = other.port;
165 path = other.path;
166 absolute = other.absolute;
167 opaque = other.opaque;
168 query = other.query;
169 fragment = other.fragment;
170 }
173 //#########################################################################
174 //#A T T R I B U T E S
175 //#########################################################################
177 DOMString URI::toString() const
178 {
179 DOMString str = schemeStr;
180 if (authority.size() > 0)
181 {
182 str.append("//");
183 str.append(authority);
184 }
185 str.append(path);
186 if (query.size() > 0)
187 {
188 str.append("?");
189 str.append(query);
190 }
191 if (fragment.size() > 0)
192 {
193 str.append("#");
194 str.append(fragment);
195 }
196 return str;
197 }
200 int URI::getScheme() const
201 {
202 return scheme;
203 }
205 DOMString URI::getSchemeStr() const
206 {
207 return schemeStr;
208 }
211 DOMString URI::getAuthority() const
212 {
213 DOMString ret = authority;
214 if (portSpecified && port>=0)
215 {
216 char buf[7];
217 snprintf(buf, 6, ":%6d", port);
218 ret.append(buf);
219 }
220 return ret;
221 }
223 DOMString URI::getHost() const
224 {
225 return authority;
226 }
228 int URI::getPort() const
229 {
230 return port;
231 }
234 DOMString URI::getPath() const
235 {
236 return path;
237 }
240 bool URI::isAbsolute() const
241 {
242 return absolute;
243 }
245 bool URI::isOpaque() const
246 {
247 return opaque;
248 }
251 DOMString URI::getQuery() const
252 {
253 return query;
254 }
257 DOMString URI::getFragment() const
258 {
259 return fragment;
260 }
263 URI URI::resolve(const URI &other) const
264 {
265 //### According to w3c, this is handled in 3 cases
267 //## 1
268 if (opaque || other.isAbsolute())
269 return other;
271 //## 2
272 if (other.fragment.size() > 0 &&
273 other.path.size() == 0 &&
274 other.scheme == SCHEME_NONE &&
275 other.authority.size() == 0 &&
276 other.query.size() == 0 )
277 {
278 URI fragUri = *this;
279 fragUri.fragment = other.fragment;
280 return fragUri;
281 }
283 //## 3 http://www.ietf.org/rfc/rfc2396.txt, section 5.2
284 URI newUri;
285 //# 3.1
286 newUri.scheme = scheme;
287 newUri.schemeStr = schemeStr;
288 newUri.query = other.query;
289 newUri.fragment = other.fragment;
290 if (other.authority.size() > 0)
291 {
292 //# 3.2
293 if (absolute || other.absolute)
294 newUri.absolute = true;
295 newUri.authority = other.authority;
296 newUri.port = other.port;//part of authority
297 newUri.path = other.path;
298 }
299 else
300 {
301 //# 3.3
302 if (other.absolute)
303 {
304 newUri.absolute = true;
305 newUri.path = other.path;
306 }
307 else
308 {
309 unsigned int pos = path.rfind('/');
310 if (pos != path.npos)
311 {
312 DOMString tpath = path.substr(0, pos+1);
313 tpath.append(other.path);
314 newUri.path = tpath;
315 newUri.normalize();
316 }
317 }
318 }
319 return newUri;
320 }
323 /**
324 * This follows the Java URI algorithm:
325 * 1. All "." segments are removed.
326 * 2. If a ".." segment is preceded by a non-".." segment
327 * then both of these segments are removed. This step
328 * is repeated until it is no longer applicable.
329 * 3. If the path is relative, and if its first segment
330 * contains a colon character (':'), then a "." segment
331 * is prepended. This prevents a relative URI with a path
332 * such as "a:b/c/d" from later being re-parsed as an
333 * opaque URI with a scheme of "a" and a scheme-specific
334 * part of "b/c/d". (Deviation from RFC 2396)
335 */
336 void URI::normalize()
337 {
338 std::vector<DOMString> segments;
340 //## Collect segments
341 if (path.size()<2)
342 return;
343 unsigned int pos=0;
344 while (pos < path.size())
345 {
346 unsigned int pos2 = path.find(pos);
347 if (pos2==path.npos)
348 break;
349 if (pos2>pos)
350 {
351 DOMString seg = path.substr(pos, pos2);
352 segments.push_back(seg);
353 }
354 pos = pos2;
355 pos++;
356 }
358 //## Clean up (normalize) segments
359 bool edited = false;
360 std::vector<DOMString>::iterator iter;
361 for (iter=segments.begin() ; iter!=segments.end() ; )
362 {
363 DOMString s = *iter;
364 if (s == ".")
365 {
366 iter = segments.erase(iter);
367 edited = true;
368 }
369 else if (s == ".." &&
370 iter != segments.begin() &&
371 *(iter-1) != "..")
372 {
373 iter--; //back up, then erase two entries
374 iter = segments.erase(iter);
375 iter = segments.erase(iter);
376 edited = true;
377 }
378 else
379 iter++;
380 }
382 //## Rebuild path, if necessary
383 if (edited)
384 {
385 path.clear();
386 if (absolute)
387 path.append("/");
388 std::vector<DOMString>::iterator iter;
389 for (iter=segments.begin() ; iter!=segments.end() ; iter++)
390 {
391 path.append(*iter);
392 path.append("/");
393 }
394 }
396 }
400 //#########################################################################
401 //# M E S S A G E S
402 //#########################################################################
404 void URI::error(const char *fmt, ...)
405 {
406 va_list args;
407 fprintf(stderr, "URI error: ");
408 va_start(args, fmt);
409 vfprintf(stderr, fmt, args);
410 va_end(args);
411 fprintf(stderr, "\n");
412 }
414 void URI::trace(const char *fmt, ...)
415 {
416 va_list args;
417 fprintf(stdout, "URI: ");
418 va_start(args, fmt);
419 vfprintf(stdout, fmt, args);
420 va_end(args);
421 fprintf(stdout, "\n");
422 }
426 //#########################################################################
427 //# P A R S I N G
428 //#########################################################################
432 int URI::peek(int p)
433 {
434 if (p<0 || p>=parselen)
435 return -1;
436 return parsebuf[p];
437 }
441 int URI::match(int p0, char *key)
442 {
443 int p = p0;
444 while (p < parselen)
445 {
446 if (*key == '\0')
447 return p;
448 else if (*key != parsebuf[p])
449 break;
450 p++; key++;
451 }
452 return p0;
453 }
455 //#########################################################################
456 //# Parsing is performed according to:
457 //# http://www.gbiv.com/protocols/uri/rfc/rfc3986.html#components
458 //#########################################################################
460 int URI::parseScheme(int p0)
461 {
462 int p = p0;
463 for (LookupEntry *entry = schemes; entry->sval ; entry++)
464 {
465 int p2 = match(p, entry->sval);
466 if (p2 > p)
467 {
468 schemeStr = entry->sval;
469 scheme = entry->ival;
470 port = entry->port;
471 p = p2;
472 return p;
473 }
474 }
476 return p;
477 }
480 int URI::parseHierarchicalPart(int p0)
481 {
482 int p = p0;
483 int ch;
485 //# Authority field (host and port, for example)
486 int p2 = match(p, "//");
487 if (p2 > p)
488 {
489 p = p2;
490 portSpecified = false;
491 DOMString portStr;
492 while (p < parselen)
493 {
494 ch = peek(p);
495 if (ch == '/')
496 break;
497 else if (ch == ':')
498 portSpecified = true;
499 else if (portSpecified)
500 portStr.push_back((XMLCh)ch);
501 else
502 authority.push_back((XMLCh)ch);
503 p++;
504 }
505 if (portStr.size() > 0)
506 {
507 char *pstr = (char *)portStr.c_str();
508 char *endStr;
509 long val = strtol(pstr, &endStr, 10);
510 if (endStr > pstr) //successful parse?
511 port = val;
512 }
513 }
515 //# Are we absolute?
516 ch = peek(p);
517 if (ch == '/')
518 {
519 absolute = true;
520 if (p>p0) //in other words, if '/' is not the first char
521 opaque = true;
522 path.push_back((XMLCh)ch);
523 p++;
524 }
526 while (p < parselen)
527 {
528 ch = peek(p);
529 if (ch == '?' || ch == '#')
530 break;
531 path.push_back((XMLCh)ch);
532 p++;
533 }
535 return p;
536 }
538 int URI::parseQuery(int p0)
539 {
540 int p = p0;
541 int ch = peek(p);
542 if (ch != '?')
543 return p0;
545 p++;
546 while (p < parselen)
547 {
548 ch = peek(p);
549 if (ch == '#')
550 break;
551 query.push_back((XMLCh)ch);
552 p++;
553 }
556 return p;
557 }
559 int URI::parseFragment(int p0)
560 {
562 int p = p0;
563 int ch = peek(p);
564 if (ch != '#')
565 return p0;
567 p++;
568 while (p < parselen)
569 {
570 ch = peek(p);
571 if (ch == '?')
572 break;
573 fragment.push_back((XMLCh)ch);
574 p++;
575 }
578 return p;
579 }
582 int URI::parse(int p0)
583 {
585 int p = p0;
587 int p2 = parseScheme(p);
588 if (p2 < 0)
589 {
590 error("Scheme");
591 return -1;
592 }
593 p = p2;
596 p2 = parseHierarchicalPart(p);
597 if (p2 < 0)
598 {
599 error("Hierarchical part");
600 return -1;
601 }
602 p = p2;
604 p2 = parseQuery(p);
605 if (p2 < 0)
606 {
607 error("Query");
608 return -1;
609 }
610 p = p2;
613 p2 = parseFragment(p);
614 if (p2 < 0)
615 {
616 error("Fragment");
617 return -1;
618 }
619 p = p2;
621 return p;
623 }
627 bool URI::parse(const DOMString &str)
628 {
630 parselen = str.size();
631 DOMString tmp = str;
632 parsebuf = (char *) tmp.c_str();
635 int p = parse(0);
637 if (p < 0)
638 {
639 error("Syntax error");
640 return false;
641 }
643 //printf("uri:%s\n", toString().c_str());
644 //printf("path:%s\n", path.c_str());
646 return true;
648 }
654 } //namespace dom
655 } //namespace w3c
656 } //namespace org
657 //#########################################################################
658 //# E N D O F F I L E
659 //#########################################################################