1 import cStringIO, cgi, sys, urllib
2 import dps.utils
3 try:
4 from restructuredtext import Parser
5 except ImportError:
6 from dps.parsers.restructuredtext import Parser
8 # TODO: enforce model?
10 class DumbHTMLFormatter:
11 def __init__(self):
12 self.out = cStringIO.StringIO()
13 self.w = self.out.write
14 self.section = 0
15 self.closers = []
17 def format(self, node):
18 '''Format a node
19 '''
20 for entry in node:
21 self.formatOneTag(entry)
23 def formatOneTag(self, tag):
24 if tag.tagname == '#text':
25 meth = self.format__text
26 else:
27 meth = getattr(self, 'format_'+tag.tagname)
28 meth(tag)
30 #
31 # Root Element
32 #
33 # ((title, subtitle?)?, docinfo?, %structure.model;)
34 #
35 def format_document(self, document):
36 ''' ((title, subtitle?)?, docinfo?, %structure.model;)
39 '''
40 self.document = document
41 self.w('<html><head>\n')
43 n = 0
45 # See if there's a title
46 if document[n].tagname == 'title':
47 title = cgi.escape(document[n][0][0].data)
48 self.w('<title>%s</title>\n'%title)
49 n += 1
50 if document[n].tagname == 'subtitle':
51 title = cgi.escape(document[n][0][0].data)
52 self.w('<h1>%s</h1>'%title)
53 self.section += 1
54 n += 1
56 # Now see if there's biblio information
58 # see if there's a field_list at the start of the document
59 if document[n].tagname == 'docinfo':
60 self.format_docinfo(document[n])
61 n += 1
63 self.w('</head>\n<body>')
65 # now for the body
66 l = list(document)
67 for entry in l[n:]:
68 self.formatOneTag(entry)
69 self.w('</body>\n</html>')
70 return self.out.getvalue()
72 #
73 # Title Elements
74 #
75 def format_title(self, node):
76 self.w('<h%d>'%self.section)
77 if node.children: self.format(node)
78 self.w('</h%d>\n'%self.section)
80 def format_subtitle(self, node):
81 raise NotImplementedError, node
83 #
84 # Bibliographic Elements
85 #
86 def format_docinfo(self, node):
87 ''' (((%bibliographic.elements;)+, abstract?) | abstract)
89 bibliographic.elements:
90 author | authors | organization | contact | version | revision
91 | status | date | copyright
92 '''
93 if node.children: self.format(node)
95 def format_abstract(self, node):
96 content = urllib.quote(node[0].data)
97 self.w('<meta name="description" content="%s">\n'%content)
99 def format_author(self, node):
100 content = urllib.quote(node[0].data)
101 self.w('<meta name="author" content="%s">\n'%content)
103 def format_authors(self, node):
104 ''' ((author, organization?, contact?)+)
105 '''
106 self.w('<meta name="author" content="')
107 print node
108 self.w('">\n'%content)
110 def format_organization(self, node):
111 content = urllib.quote(node[0].data)
112 self.w('<meta name="organization" content="%s">\n'%content)
114 # TODO: not in DTD
115 # def format_keywords(self, node):
116 # content = urllib.quote(node[0].data)
117 # self.w('<meta name="keywords" content="%s">\n'%content)
119 def format_contact(self, node):
120 addr = urllib.quote(node[0].data)
121 self.w('<link rev="made" href="mailto:%s>\n'%addr)
123 def format_version(self, node):
124 addr = urllib.quote(node[0].data)
125 self.w('<meta name="version" content="%s">\n'%content)
127 def format_revision(self, node):
128 addr = urllib.quote(node[0].data)
129 self.w('<meta name="revision" content="%s">\n'%content)
131 def format_status(self, node):
132 addr = urllib.quote(node[0].data)
133 self.w('<meta name="status" content="%s">\n'%content)
135 def format_date(self, node):
136 addr = urllib.quote(node[0].data)
137 self.w('<meta name="date" content="%s">\n'%content)
139 def format_copyright(self, node):
140 addr = urllib.quote(node[0].data)
141 self.w('<meta name="copyright" content="%s">\n'%content)
143 #
144 # Structural Elements
145 #
146 # section
147 #
148 # structure.model:
149 # ( ((%body.elements; | transition)+, (%structural.elements;)*)
150 # | (%structural.elements;)+ )
151 #
152 def format_section(self, node):
153 self.w('<a name="%s"></a>'%urllib.quote(node.attributes['name']))
154 self.section += 1
155 if node.children: self.format(node)
156 self.section -= 1
158 def format_transition(self, node):
159 self.w('<hr>')
161 #
162 # Body Elements
163 #
164 # paragraph | literal_block | block_quote | doctest_block| table
165 # | figure | image | footnote
166 # | bullet_list | enumerated_list | definition_list | field_list
167 # | option_list
168 # | note | tip | hint | warning | error | caution | danger | important
169 # | target | substitution_definition | comment | system_message
170 #
171 #
172 def format_paragraph(self, node):
173 ''' %text.model;
174 '''
175 # TODO: there are situations where the <p> </p> are unnecessary
176 self.w('<p>')
177 if node.children: self.format(node)
178 self.w('</p>\n')
180 # Simple lists
181 def format_bullet_list(self, node):
182 ''' (list_item+)
183 bullet CDATA
184 '''
185 # TODO: handle attribute
186 self.w('<ul>\n')
187 if node.children: self.format(node)
188 self.w('</ul>\n')
190 def format_enumerated_list(self, node):
191 ''' (list_item+)
192 enumtype (arabic | loweralpha | upperalpha | lowerroman |
193 upperroman)
194 prefix CDATA
195 suffix CDATA
196 start CDATA
197 '''
198 # TODO: handle attributes
199 self.w('<ol>\n')
200 if node.children: self.format(node)
201 self.w('</ol>\n')
203 def format_list_item(self, node):
204 ''' (%body.elements;)+
205 '''
206 self.w('<li>')
207 if node.children: self.format(node)
208 self.w('</li>\n')
210 # Definition List
211 def format_definition_list(self, node):
212 ''' (definition_list_item+)
213 '''
214 self.w('<dl>\n')
215 if node.children: self.format(node)
216 self.w('</dl>\n')
218 def format_definition_list_item(self, node):
219 ''' (term, classifier?, definition)
220 '''
221 self.w('<dt>')
222 if node.children: self.format(node)
224 def format_term(self, node):
225 ''' %text.model;
226 '''
227 self.w('<span class="term">')
228 if node.children:self.format(node)
229 self.w('</span>')
231 def format_classifier(self, node):
232 ''' %text.model;
233 '''
234 # TODO: handle the classifier better
235 self.w('<span class="classifier">')
236 if node.children: self.format(node)
237 self.w('</span>')
239 def format_definition(self, node):
240 ''' (%body.elements;)+
241 '''
242 self.w('</dt>\n<dd>')
243 # TODO: this is way suboptimal!
244 first = 1
245 for child in node.children:
246 if child.tagname == 'paragraph' and first:
247 # just format the contents of the para
248 self.format(child)
249 else:
250 # format the whole tag
251 self.formatOneTag(child)
252 first = 0
253 self.w('</dd>\n')
255 # Field List
256 def format_field_list(self, node):
257 ''' (field+)
258 '''
259 self.w('<dl>')
260 if node.children: self.format(node)
261 self.w('</dl>')
263 def format_field(self, node):
264 ''' (field_name, field_argument*, field_body)
265 '''
266 self.w('<dt>')
267 if node.children: self.format(node)
269 def format_field_name(self, node):
270 ''' (#PCDATA)
271 '''
272 self.w('<span class="field_name">')
273 if node.children:self.format(node)
274 self.w('</span>')
276 def format_field_argument(self, node):
277 ''' (#PCDATA)
278 '''
279 self.w('<span class="field_argument">')
280 if node.children: self.format(node)
281 self.w('</span>')
283 def format_field_body(self, node):
284 ''' (%body.elements;)+
285 '''
286 self.w('</dt>\n<dd class="field_body">')
287 if node.children: self.format(node)
288 self.w('</dd>\n')
290 # Option List
291 def format_option_list(self, node):
292 ''' (option_list_item+)
293 '''
294 self.w('<table border=0 cellspacing=0 cellpadding=2><tr><th align="left" class="option_header">Option</th>\n')
295 self.w('<th align="left" class="option_header">Description</th></tr>\n')
296 if node.children: self.format(node)
297 self.w('</table>\n')
299 def format_option_list_item(self, node):
300 ''' (option+, description)
301 '''
302 self.w('<tr>')
303 if node.children: self.format(node)
304 self.w('</tr>\n')
306 def format_option(self, node):
307 ''' ((short_option | long_option | vms_option), option_argument?)
308 '''
309 self.w('<td align="left" valign="top" class="option">')
310 if node.children: self.format(node)
311 self.w('</td>')
313 def format_short_option(self, node):
314 ''' (#PCDATA)
315 '''
316 for option in node.children:
317 self.w('-%s'%cgi.escape(option.data))
319 def format_long_option(self, node):
320 ''' (#PCDATA)
321 '''
322 for option in node.children:
323 self.w('--%s'%cgi.escape(option.data))
325 def format_vms_option(self, node):
326 ''' (#PCDATA)
327 '''
328 for option in node.children:
329 self.w('/%s'%cgi.escape(option.data))
331 def format_option_argument(self, node):
332 ''' (#PCDATA)
333 '''
334 for option in node.children:
335 self.w('=%s'%cgi.escape(option.data))
337 def format_description(self, node):
338 ''' (%body.elements;)+
339 '''
340 self.w('<td align="left" valign="top" class="option_description">')
341 if node.children: self.format(node)
342 self.w('</td>\n')
344 # Literal Block
345 def format_literal_block(self, node):
346 self.w('<pre>')
347 if node.children: self.format(node)
348 self.w('</pre>\n')
350 # Block Quote
351 def format_block_quote(self, node):
352 # TODO: I believe this needs to be CSS'ified - blockquote is deprecated
353 self.w('<blockquote>')
354 if node.children: self.format(node)
355 self.w('</blockquote>\n')
357 # Doctest Block
358 def format_doctest_block(self, node):
359 self.w('<pre>')
360 if node.children: self.format(node)
361 self.w('</pre>\n')
363 # Note, tip, hint, warning, error, caution, danger, important
364 def format_note(self, node):
365 ''' (%body.elements;)+
366 '''
367 self.w('<span class="note">')
368 if node.children: self.format(node)
369 self.w('</span>')
371 def format_tip(self, node):
372 ''' (%body.elements;)+
373 '''
374 self.w('<span class="tip">')
375 if node.children: self.format(node)
376 self.w('</span>')
378 def format_hint(self, node):
379 ''' (%body.elements;)+
380 '''
381 self.w('<span class="hint">')
382 if node.children: self.format(node)
383 self.w('</span>')
385 def format_warning(self, node):
386 ''' (%body.elements;)+
387 '''
388 self.w('<span class="warning">')
389 if node.children: self.format(node)
390 self.w('</span>')
392 def format_error(self, node):
393 ''' (%body.elements;)+
394 '''
395 self.w('<span class="error">')
396 if node.children: self.format(node)
397 self.w('</span>')
399 def format_caution(self, node):
400 ''' (%body.elements;)+
401 '''
402 self.w('<span class="caution">')
403 if node.children: self.format(node)
404 self.w('</span>')
406 def format_danger(self, node):
407 ''' (%body.elements;)+
408 '''
409 self.w('<span class="danger">')
410 if node.children: self.format(node)
411 self.w('</span>')
413 def format_important(self, node):
414 ''' (%body.elements;)+
415 '''
416 self.w('<span class="important">')
417 if node.children: self.format(node)
418 self.w('</span>')
420 # Footnote
421 def format_footnote(self, node):
422 ''' (label?, (%body.elements;)+)
423 %auto.att;
424 '''
425 raise NotImplementedError, node
427 def format_label(self, node):
428 ''' (#PCDATA)
429 '''
430 for label in node.children:
431 self.w(cgi.escape(label.data))
433 # Target
434 def format_target(self, node):
435 ''' (%text.model;)
436 %reference.atts;
437 %anonymous.att;
438 '''
439 pass
441 # Substitution Definition
442 def format_substitution_definition(self, node):
443 ''' (%text.model;)
444 '''
445 raise NotImplementedError, node
447 # Comment
448 def format_comment(self, node):
449 ''' (#PCDATA)
450 %fixedspace.att;
451 '''
452 # TODO: handle attrs
453 self.w('<!--')
454 for data in node.children:
455 self.w(cgi.escape(data.data))
456 self.w('-->')
458 # Figure
459 def format_figure(self, node):
460 ''' (image, ((caption, legend?) | legend)
461 '''
462 raise NotImplementedError, node
464 def format_image(self, node):
465 ''' EMPTY
466 uri CDATA #REQUIRED
467 alt CDATA #IMPLIED
468 height NMTOKEN #IMPLIED
469 width NMTOKEN #IMPLIED
470 scale NMTOKEN #IMPLIED
471 '''
472 attrs = node.attributes
473 l = ['src="%(uri)s"'%attrs]
474 if attrs.has_key('alt'):
475 l.append('alt="%(alt)s"'%attrs)
476 if attrs.has_key('alt'):
477 l.append('alt="%(alt)s"'%attrs)
478 if attrs.has_key('height'):
479 l.append('height="%(height)s"'%attrs)
480 if attrs.has_key('width'):
481 l.append('width="%(width)s"'%attrs)
482 # TODO: scale
483 self.w('<img %s>'%(' '.join(l)))
485 def format_caption(self, node):
486 ''' %text.model;
487 '''
488 raise NotImplementedError, node
490 def format_legend(self, node):
491 ''' (%body.elements;)+
492 '''
493 raise NotImplementedError, node
495 # System Message
496 def format_system_message(self, node):
497 ''' (%body.elements;)+
498 level NMTOKEN #IMPLIED
499 type CDATA #IMPLIED
500 '''
501 self.w('<span class="system_message-%s">'%node.attributes['type'])
502 if node.children: self.format(node)
503 self.w('</span>')
505 #
506 # Tables:
507 # NOT IN DOM YET
508 #
509 def format_table(self, node):
510 '''
511 +------------------------+------------+----------+----------+
512 | Header row, column 1 | Header 2 | Header 3 | Header 4 |
513 | (header rows optional) | | | |
514 +========================+============+==========+==========+
515 | body row 1, column 1 | column 2 | column 3 | column 4 |
516 +------------------------+------------+----------+----------+
517 | body row 2 | Cells may span columns. |
518 +------------------------+------------+---------------------+
519 | body row 3 | Cells may | - Table cells |
520 +------------------------+ span rows. | - contain |
521 | body row 4 | | - body elements. |
522 +------------------------+------------+---------------------+
523 '''
524 self.w('<table border=1>\n')
525 if node.children: self.format(node)
526 self.w('</table>\n')
528 def format_tgroup(self, node):
529 # we get the number of columns, if that's important
530 if node.children: self.format(node)
532 def format_colspec(self, node):
533 # we get colwidth, but don't need it
534 pass
536 def format_thead(self, node):
537 for row in node.children:
538 self.w('<tr>')
539 for cell in row.children:
540 s = ''
541 attrs = cell.attributes
542 if attrs.has_key('morecols'):
543 s = s + ' colspan=%d'%(attrs['morecols']+1)
544 if attrs.has_key('morerows'):
545 s = s + ' rowspan=%d'%(attrs['morerows']+1)
546 self.w('<th valign="top" align="left"%s>'%s)
547 if cell.children: self.format(cell)
548 self.w('</th>\n')
549 self.w('</tr>\n')
551 def format_tbody(self, node):
552 for row in node.children:
553 self.w('<tr>')
554 for cell in row.children:
555 s = ''
556 attrs = cell.attributes
557 if attrs.has_key('morecols'):
558 s = s + ' colspan=%d'%(attrs['morecols']+1)
559 if attrs.has_key('morerows'):
560 s = s + ' rowspan=%d'%(attrs['morerows']+1)
561 self.w('<td valign="top" align="left"%s>'%s)
562 if cell.children: self.format(cell)
563 self.w('</td>\n')
564 self.w('</tr>\n')
566 #
567 # Inline Elements
568 #
569 # Inline elements occur within the text contents of body elements. Some
570 # nesting of inline elements is allowed by these definitions, with the
571 # following caveats:
572 # - An inline element may not contain a nested element of the same type
573 # (e.g. <strong> may not contain another <strong>).
574 # - Nested inline elements may or may not be supported by individual
575 # applications using this DTD.
576 # - The inline elements <footnote_reference>, <literal>, and <image> do
577 # not support nesting.
578 #
579 # What that means is that all of these take (%text.model;) except:
580 # literal (#PCDATA)
581 # footnote_reference (#PCDATA)
582 #
583 # text.model:
584 # (#PCDATA | %inline.elements;)*
585 #
586 def format_emphasis(self, node):
587 ''' (%text.model;)
588 '''
589 self.w('<em>')
590 if node.children: self.format(node)
591 self.w('</em>')
593 def format_strong(self, node):
594 ''' (%text.model;)
595 '''
596 self.w('<strong>')
597 if node.children: self.format(node)
598 self.w('</strong>')
600 def format_interpreted(self, node):
601 ''' (%text.model;)
602 type CDATA #IMPLIED
603 '''
604 pass #raise NotImplementedError, node
606 def format_literal(self, node):
607 ''' (#PCDATA)
608 '''
609 self.w('<tt>')
610 for literal in node.children:
611 self.w(cgi.escape(literal.data))
612 self.w('</tt>')
614 def format_reference(self, node):
615 ''' (%text.model;)
616 %reference.atts;
617 %anonymous.att;
618 '''
619 attrs = node.attributes
620 doc = self.document
621 ok = 1
622 print node
623 if attrs.has_key('refuri'):
624 self.w('<a href="%s">'%attrs['refuri'])
625 elif doc.explicit_targets.has_key(attrs['refname']):
626 # an external reference has been defined
627 ref = doc.explicit_targets[attrs['refname']]
628 if ref.attributes.has_key('refuri'):
629 self.w('<a href="%s">'%ref.attributes['refuri'])
630 else:
631 self.w('<a href="#%s">'%attrs['refname'])
632 elif doc.implicit_targets.has_key(attrs['refname']):
633 # internal reference
634 name = attrs['refname']
635 self.w('<a href="#%s">'%urllib.quote(name))
636 else:
637 ok = 0
638 self.w('<span class="formatter_error">target "%s" '
639 'undefined</span>'%attrs['refname'])
640 if node.children: self.format(node)
641 if ok:
642 self.w('</a>')
644 def format_footnote_reference(self, node):
645 ''' (#PCDATA)
646 %reference.atts;
647 %auto.att;
648 '''
649 raise NotImplementedError, node
651 def format_substitution_reference(self, node):
652 ''' (%text.model;)
653 %refname.att;
654 '''
655 raise NotImplementedError, node
657 def format_problematic(self, node):
658 ''' (%text.model;)
659 '''
660 raise NotImplementedError, node
662 #
663 # Finally, #text
664 #
665 def format__text(self, node):
666 self.w(cgi.escape(node.data))
669 def main(filename, debug=0):
670 parser = Parser()
671 input = open(filename).read()
672 document = dps.utils.newdocument()
673 parser.parse(input, document)
674 if debug == 1:
675 print document.pformat()
676 else:
677 formatter = DumbHTMLFormatter()
678 print formatter.format_document(document)
680 if __name__ == '__main__':
681 if len(sys.argv) > 2:
682 main(sys.argv[1], debug=1)
683 else:
684 main(sys.argv[1])