Code

Adding TAL to the dist
authorrichard <richard@57a73879-2fb5-44c3-a270-3262357dd7e2>
Fri, 30 Aug 2002 08:23:53 +0000 (08:23 +0000)
committerrichard <richard@57a73879-2fb5-44c3-a270-3262357dd7e2>
Fri, 30 Aug 2002 08:23:53 +0000 (08:23 +0000)
git-svn-id: http://svn.roundup-tracker.org/svnroot/roundup/trunk@999 57a73879-2fb5-44c3-a270-3262357dd7e2

TAL/.cvsignore [new file with mode: 0644]
TAL/HTMLParser.py [new file with mode: 0644]
TAL/README.txt [new file with mode: 0644]
TAL/TALDefs.py [new file with mode: 0644]
TAL/TALGenerator.py [new file with mode: 0644]
TAL/TALInterpreter.py [new file with mode: 0644]
TAL/TALParser.py [new file with mode: 0644]
TAL/XMLParser.py [new file with mode: 0644]
TAL/__init__.py [new file with mode: 0644]
TAL/markupbase.py [new file with mode: 0644]

diff --git a/TAL/.cvsignore b/TAL/.cvsignore
new file mode 100644 (file)
index 0000000..0cd1d94
--- /dev/null
@@ -0,0 +1,2 @@
+.path
+*.pyc
diff --git a/TAL/HTMLParser.py b/TAL/HTMLParser.py
new file mode 100644 (file)
index 0000000..5ab076b
--- /dev/null
@@ -0,0 +1,403 @@
+"""A parser for HTML and XHTML."""
+
+# This file is based on sgmllib.py, but the API is slightly different.
+
+# XXX There should be a way to distinguish between PCDATA (parsed
+# character data -- the normal case), RCDATA (replaceable character
+# data -- only char and entity references and end tags are special)
+# and CDATA (character data -- only end tags are special).
+
+
+import markupbase
+import re
+import string
+
+# Regular expressions used for parsing
+
+interesting_normal = re.compile('[&<]')
+interesting_cdata = re.compile(r'<(/|\Z)')
+incomplete = re.compile('&[a-zA-Z#]')
+
+entityref = re.compile('&([a-zA-Z][-.a-zA-Z0-9]*)[^a-zA-Z0-9]')
+charref = re.compile('&#(?:[0-9]+|[xX][0-9a-fA-F]+)[^0-9a-fA-F]')
+
+starttagopen = re.compile('<[a-zA-Z]')
+piclose = re.compile('>')
+endtagopen = re.compile('</')
+commentclose = re.compile(r'--\s*>')
+tagfind = re.compile('[a-zA-Z][-.a-zA-Z0-9:_]*')
+attrfind = re.compile(
+    r'\s*([a-zA-Z_][-.:a-zA-Z_0-9]*)(\s*=\s*'
+    r'(\'[^\']*\'|"[^"]*"|[-a-zA-Z0-9./:;+*%?!&$\(\)_#=~]*))?')
+
+locatestarttagend = re.compile(r"""
+  <[a-zA-Z][-.a-zA-Z0-9:_]*          # tag name
+  (?:\s+                             # whitespace before attribute name
+    (?:[a-zA-Z_][-.:a-zA-Z0-9_]*     # attribute name
+      (?:\s*=\s*                     # value indicator
+        (?:'[^']*'                   # LITA-enclosed value
+          |\"[^\"]*\"                # LIT-enclosed value
+          |[^'\">\s]+                # bare value
+         )
+       )?
+     )
+   )*
+  \s*                                # trailing whitespace
+""", re.VERBOSE)
+endendtag = re.compile('>')
+endtagfind = re.compile('</\s*([a-zA-Z][-.a-zA-Z0-9:_]*)\s*>')
+
+
+class HTMLParseError(Exception):
+    """Exception raised for all parse errors."""
+
+    def __init__(self, msg, position=(None, None)):
+        assert msg
+        self.msg = msg
+        self.lineno = position[0]
+        self.offset = position[1]
+
+    def __str__(self):
+        result = self.msg
+        if self.lineno is not None:
+            result = result + ", at line %d" % self.lineno
+        if self.offset is not None:
+            result = result + ", column %d" % (self.offset + 1)
+        return result
+
+
+def _contains_at(s, sub, pos):
+    return s[pos:pos+len(sub)] == sub
+
+
+class HTMLParser(markupbase.ParserBase):
+    """Find tags and other markup and call handler functions.
+
+    Usage:
+        p = HTMLParser()
+        p.feed(data)
+        ...
+        p.close()
+
+    Start tags are handled by calling self.handle_starttag() or
+    self.handle_startendtag(); end tags by self.handle_endtag().  The
+    data between tags is passed from the parser to the derived class
+    by calling self.handle_data() with the data as argument (the data
+    may be split up in arbitrary chunks).  Entity references are
+    passed by calling self.handle_entityref() with the entity
+    reference as the argument.  Numeric character references are
+    passed to self.handle_charref() with the string containing the
+    reference as the argument.
+    """
+
+    CDATA_CONTENT_ELEMENTS = ("script", "style")
+
+
+    def __init__(self):
+        """Initialize and reset this instance."""
+        self.reset()
+
+    def reset(self):
+        """Reset this instance.  Loses all unprocessed data."""
+        self.rawdata = ''
+        self.stack = []
+        self.lasttag = '???'
+        self.interesting = interesting_normal
+        markupbase.ParserBase.reset(self)
+
+    def feed(self, data):
+        """Feed data to the parser.
+
+        Call this as often as you want, with as little or as much text
+        as you want (may include '\n').
+        """
+        self.rawdata = self.rawdata + data
+        self.goahead(0)
+
+    def close(self):
+        """Handle any buffered data."""
+        self.goahead(1)
+
+    def error(self, message):
+        raise HTMLParseError(message, self.getpos())
+
+    __starttag_text = None
+
+    def get_starttag_text(self):
+        """Return full source of start tag: '<...>'."""
+        return self.__starttag_text
+
+    cdata_endtag = None
+
+    def set_cdata_mode(self, endtag=None):
+        self.cdata_endtag = endtag
+        self.interesting = interesting_cdata
+
+    def clear_cdata_mode(self):
+        self.cdata_endtag = None
+        self.interesting = interesting_normal
+
+    # Internal -- handle data as far as reasonable.  May leave state
+    # and data to be processed by a subsequent call.  If 'end' is
+    # true, force handling all data as if followed by EOF marker.
+    def goahead(self, end):
+        rawdata = self.rawdata
+        i = 0
+        n = len(rawdata)
+        while i < n:
+            match = self.interesting.search(rawdata, i) # < or &
+            if match:
+                j = match.start()
+            else:
+                j = n
+            if i < j: self.handle_data(rawdata[i:j])
+            i = self.updatepos(i, j)
+            if i == n: break
+            if rawdata[i] == '<':
+                if starttagopen.match(rawdata, i): # < + letter
+                    k = self.parse_starttag(i)
+                elif endtagopen.match(rawdata, i): # </
+                    k = self.parse_endtag(i)
+                elif _contains_at(rawdata, "<!--", i): # <!--
+                    k = self.parse_comment(i)
+                elif _contains_at(rawdata, "<!", i): # <!
+                    k = self.parse_declaration(i)
+                elif _contains_at(rawdata, "<?", i): # <?
+                    k = self.parse_pi(i)
+                elif _contains_at(rawdata, "<?", i): # <!
+                    k = self.parse_declaration(i)
+                elif (i + 1) < n:
+                    self.handle_data("<")
+                    k = i + 1
+                else:
+                    break
+                if k < 0:
+                    if end:
+                        self.error("EOF in middle of construct")
+                    break
+                i = self.updatepos(i, k)
+            elif rawdata[i:i+2] == "&#":
+                match = charref.match(rawdata, i)
+                if match:
+                    name = match.group()[2:-1]
+                    self.handle_charref(name)
+                    k = match.end()
+                    if rawdata[k-1] != ';':
+                        k = k - 1
+                    i = self.updatepos(i, k)
+                    continue
+                else:
+                    break
+            elif rawdata[i] == '&':
+                match = entityref.match(rawdata, i)
+                if match:
+                    name = match.group(1)
+                    self.handle_entityref(name)
+                    k = match.end()
+                    if rawdata[k-1] != ';':
+                        k = k - 1
+                    i = self.updatepos(i, k)
+                    continue
+                match = incomplete.match(rawdata, i)
+                if match:
+                    # match.group() will contain at least 2 chars
+                    rest = rawdata[i:]
+                    if end and match.group() == rest:
+                        self.error("EOF in middle of entity or char ref")
+                    # incomplete
+                    break
+                elif (i + 1) < n:
+                    # not the end of the buffer, and can't be confused
+                    # with some other construct
+                    self.handle_data("&")
+                    i = self.updatepos(i, i + 1)
+                else:
+                    break
+            else:
+                assert 0, "interesting.search() lied"
+        # end while
+        if end and i < n:
+            self.handle_data(rawdata[i:n])
+            i = self.updatepos(i, n)
+        self.rawdata = rawdata[i:]
+
+    # Internal -- parse comment, return end or -1 if not terminated
+    def parse_comment(self, i, report=1):
+        rawdata = self.rawdata
+        assert rawdata[i:i+4] == '<!--', 'unexpected call to parse_comment()'
+        match = commentclose.search(rawdata, i+4)
+        if not match:
+            return -1
+        if report:
+            j = match.start()
+            self.handle_comment(rawdata[i+4: j])
+        j = match.end()
+        return j
+
+    # Internal -- parse processing instr, return end or -1 if not terminated
+    def parse_pi(self, i):
+        rawdata = self.rawdata
+        assert rawdata[i:i+2] == '<?', 'unexpected call to parse_pi()'
+        match = piclose.search(rawdata, i+2) # >
+        if not match:
+            return -1
+        j = match.start()
+        self.handle_pi(rawdata[i+2: j])
+        j = match.end()
+        return j
+
+    # Internal -- handle starttag, return end or -1 if not terminated
+    def parse_starttag(self, i):
+        self.__starttag_text = None
+        endpos = self.check_for_whole_start_tag(i)
+        if endpos < 0:
+            return endpos
+        rawdata = self.rawdata
+        self.__starttag_text = rawdata[i:endpos]
+
+        # Now parse the data between i+1 and j into a tag and attrs
+        attrs = []
+        match = tagfind.match(rawdata, i+1)
+        assert match, 'unexpected call to parse_starttag()'
+        k = match.end()
+        self.lasttag = tag = string.lower(rawdata[i+1:k])
+
+        while k < endpos:
+            m = attrfind.match(rawdata, k)
+            if not m:
+                break
+            attrname, rest, attrvalue = m.group(1, 2, 3)
+            if not rest:
+                attrvalue = None
+            elif attrvalue[:1] == '\'' == attrvalue[-1:] or \
+                 attrvalue[:1] == '"' == attrvalue[-1:]:
+                attrvalue = attrvalue[1:-1]
+                attrvalue = self.unescape(attrvalue)
+            attrs.append((string.lower(attrname), attrvalue))
+            k = m.end()
+
+        end = string.strip(rawdata[k:endpos])
+        if end not in (">", "/>"):
+            lineno, offset = self.getpos()
+            if "\n" in self.__starttag_text:
+                lineno = lineno + string.count(self.__starttag_text, "\n")
+                offset = len(self.__starttag_text) \
+                         - string.rfind(self.__starttag_text, "\n")
+            else:
+                offset = offset + len(self.__starttag_text)
+            self.error("junk characters in start tag: %s"
+                       % `rawdata[k:endpos][:20]`)
+        if end[-2:] == '/>':
+            # XHTML-style empty tag: <span attr="value" />
+            self.handle_startendtag(tag, attrs)
+        else:
+            self.handle_starttag(tag, attrs)
+            if tag in self.CDATA_CONTENT_ELEMENTS:
+                self.set_cdata_mode(tag)
+        return endpos
+
+    # Internal -- check to see if we have a complete starttag; return end
+    # or -1 if incomplete.
+    def check_for_whole_start_tag(self, i):
+        rawdata = self.rawdata
+        m = locatestarttagend.match(rawdata, i)
+        if m:
+            j = m.end()
+            next = rawdata[j:j+1]
+            if next == ">":
+                return j + 1
+            if next == "/":
+                s = rawdata[j:j+2]
+                if s == "/>":
+                    return j + 2
+                if s == "/":
+                    # buffer boundary
+                    return -1
+                # else bogus input
+                self.updatepos(i, j + 1)
+                self.error("malformed empty start tag")
+            if next == "":
+                # end of input
+                return -1
+            if next in ("abcdefghijklmnopqrstuvwxyz=/"
+                        "ABCDEFGHIJKLMNOPQRSTUVWXYZ"):
+                # end of input in or before attribute value, or we have the
+                # '/' from a '/>' ending
+                return -1
+            self.updatepos(i, j)
+            self.error("malformed start tag")
+        raise AssertionError("we should not get here!")
+
+    # Internal -- parse endtag, return end or -1 if incomplete
+    def parse_endtag(self, i):
+        rawdata = self.rawdata
+        assert rawdata[i:i+2] == "</", "unexpected call to parse_endtag"
+        match = endendtag.search(rawdata, i+1) # >
+        if not match:
+            return -1
+        j = match.end()
+        match = endtagfind.match(rawdata, i) # </ + tag + >
+        if not match:
+            self.error("bad end tag: %s" % `rawdata[i:j]`)
+        tag = string.lower(match.group(1))
+        if (  self.cdata_endtag is not None
+              and tag != self.cdata_endtag):
+            # Should be a mismatched end tag, but we'll treat it
+            # as text anyway, since most HTML authors aren't
+            # interested in the finer points of syntax.
+            self.handle_data(match.group(0))
+        else:
+            self.handle_endtag(tag)
+            self.clear_cdata_mode()
+        return j
+
+    # Overridable -- finish processing of start+end tag: <tag.../>
+    def handle_startendtag(self, tag, attrs):
+        self.handle_starttag(tag, attrs)
+        self.handle_endtag(tag)
+
+    # Overridable -- handle start tag
+    def handle_starttag(self, tag, attrs):
+        pass
+
+    # Overridable -- handle end tag
+    def handle_endtag(self, tag):
+        pass
+
+    # Overridable -- handle character reference
+    def handle_charref(self, name):
+        pass
+
+    # Overridable -- handle entity reference
+    def handle_entityref(self, name):
+        pass
+
+    # Overridable -- handle data
+    def handle_data(self, data):
+        pass
+
+    # Overridable -- handle comment
+    def handle_comment(self, data):
+        pass
+
+    # Overridable -- handle declaration
+    def handle_decl(self, decl):
+        pass
+
+    # Overridable -- handle processing instruction
+    def handle_pi(self, data):
+        pass
+
+    def unknown_decl(self, data):
+        self.error("unknown declaration: " + `data`)
+
+    # Internal -- helper to remove special character quoting
+    def unescape(self, s):
+        if '&' not in s:
+            return s
+        s = string.replace(s, "&lt;", "<")
+        s = string.replace(s, "&gt;", ">")
+        s = string.replace(s, "&apos;", "'")
+        s = string.replace(s, "&quot;", '"')
+        s = string.replace(s, "&amp;", "&") # Must be last
+        return s
diff --git a/TAL/README.txt b/TAL/README.txt
new file mode 100644 (file)
index 0000000..4a28816
--- /dev/null
@@ -0,0 +1,97 @@
+TAL - Template Attribute Language
+---------------------------------
+
+This is an implementation of TAL, the Zope Template Attribute
+Language.  For TAL, see the Zope Presentation Templates ZWiki:
+
+    http://dev.zope.org/Wikis/DevSite/Projects/ZPT/FrontPage
+
+It is not a Zope product nor is it designed exclusively to run inside
+of Zope, but if you have a Zope checkout that includes
+Products/ParsedXML, its Expat parser will be used.
+
+Prerequisites
+-------------
+
+You need:
+
+- A recent checkout of Zope2; don't forget to run the wo_pcgi.py
+  script to compile everything.  (See above -- this is now optional.)
+
+- A recent checkout of the Zope2 product ParsedXML, accessible
+  throught <Zope2>/lib/python/Products/ParsedXML; don't forget to run
+  the setup.py script to compiles Expat.  (Again, optional.)
+
+- Python 1.5.2; the driver script refuses to work with other versions
+  unless you specify the -n option; this is done so that I don't
+  accidentally use Python 2.x features.
+
+- Create a .path file containing proper module search path; it should
+  point the <Zope2>/lib/python directory that you want to use.
+
+How To Play
+-----------
+
+(Don't forget to edit .path, see above!)
+
+The script driver.py takes an XML file with TAL markup as argument and
+writes the expanded version to standard output.  The filename argument
+defaults to tests/input/test01.xml.
+
+Regression test
+---------------
+
+There are unit test suites in the 'tests' subdirectory; these can be
+run with tests/run.py.  This should print the testcase names plus
+progress info, followed by a final line saying "OK".  It requires that
+../unittest.py exists.
+
+There are a number of test files in the 'tests' subdirectory, named
+tests/input/test<number>.xml and tests/input/test<number>.html.  The
+Python script ./runtest.py calls driver.main() for each test file, and
+should print "<file> OK" for each one.  These tests are also run as
+part of the unit test suites, so tests/run.py is all you need.
+
+What's Here
+-----------
+
+DummyEngine.py         simple-minded TALES execution engine
+TALInterpreter.py      class to interpret intermediate code
+TALGenerator.py                class to generate intermediate code
+XMLParser.py           base class to parse XML, avoiding DOM
+TALParser.py           class to parse XML with TAL into intermediate code
+HTMLTALParser.py       class to parse HTML with TAL into intermediate code
+HTMLParser.py          HTML-parsing base class
+driver.py              script to demonstrate TAL expansion
+timer.py               script to time various processing phases
+setpath.py             hack to set sys.path and import ZODB
+__init__.py            empty file that makes this directory a package
+runtest.py             Python script to run file-comparison tests
+ndiff.py               helper for runtest.py to produce diffs
+tests/                 drectory with test files and output
+tests/run.py           Python script to run all tests
+
+Author and License
+------------------
+
+This code is written by Guido van Rossum (project lead), Fred Drake,
+and Tim Peters.  It is owned by Digital Creations and can be
+redistributed under the Zope Public License.
+
+TO DO
+-----
+
+(See also http://www.zope.org/Members/jim/ZPTIssueTracker .)
+
+- Need to remove leading whitespace and newline when omitting an
+  element (either through tal:replace with a value of nothing or
+  tal:condition with a false condition).
+
+- Empty TAL/METAL attributes are ignored: tal:replace="" is ignored
+  rather than causing an error.
+
+- HTMLTALParser.py and TALParser.py are silly names.  Should be
+  HTMLTALCompiler.py and XMLTALCompiler.py (or maybe shortened,
+  without "TAL"?)
+
+- Should we preserve case of tags and attribute names in HTML?
diff --git a/TAL/TALDefs.py b/TAL/TALDefs.py
new file mode 100644 (file)
index 0000000..dbc0443
--- /dev/null
@@ -0,0 +1,145 @@
+##############################################################################
+#
+# Copyright (c) 2001, 2002 Zope Corporation and Contributors.
+# All Rights Reserved.
+# 
+# This software is subject to the provisions of the Zope Public License,
+# Version 2.0 (ZPL).  A copy of the ZPL should accompany this distribution.
+# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
+# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
+# FOR A PARTICULAR PURPOSE
+# 
+##############################################################################
+"""
+Common definitions used by TAL and METAL compilation an transformation.
+"""
+
+from types import ListType, TupleType
+
+TAL_VERSION = "1.3.2"
+
+XML_NS = "http://www.w3.org/XML/1998/namespace" # URI for XML namespace
+XMLNS_NS = "http://www.w3.org/2000/xmlns/" # URI for XML NS declarations
+
+ZOPE_TAL_NS = "http://xml.zope.org/namespaces/tal"
+ZOPE_METAL_NS = "http://xml.zope.org/namespaces/metal"
+
+NAME_RE = "[a-zA-Z_][a-zA-Z0-9_]*"
+
+KNOWN_METAL_ATTRIBUTES = [
+    "define-macro",
+    "use-macro",
+    "define-slot",
+    "fill-slot",
+    "slot"
+    ]
+
+KNOWN_TAL_ATTRIBUTES = [
+    "define",
+    "condition",
+    "content",
+    "replace",
+    "repeat",
+    "attributes",
+    "on-error",
+    "omit-tag",
+    "tal tag",
+    ]
+
+class TALError(Exception):
+
+    def __init__(self, msg, position=(None, None)):
+        assert msg != ""
+        self.msg = msg
+        self.lineno = position[0]
+        self.offset = position[1]
+
+    def __str__(self):
+        result = self.msg
+        if self.lineno is not None:
+            result = result + ", at line %d" % self.lineno
+        if self.offset is not None:
+            result = result + ", column %d" % (self.offset + 1)
+        return result
+
+class METALError(TALError):
+    pass
+
+class TALESError(TALError):
+    pass
+
+class ErrorInfo:
+
+    def __init__(self, err, position=(None, None)):
+        if isinstance(err, Exception):
+            self.type = err.__class__
+            self.value = err
+        else:
+            self.type = err
+            self.value = None
+        self.lineno = position[0]
+        self.offset = position[1]
+
+import re
+_attr_re = re.compile(r"\s*([^\s]+)\s+([^\s].*)\Z", re.S)
+_subst_re = re.compile(r"\s*(?:(text|structure)\s+)?(.*)\Z", re.S)
+del re
+
+def parseAttributeReplacements(arg):
+    dict = {}
+    for part in splitParts(arg):
+        m = _attr_re.match(part)
+        if not m:
+            raise TALError("Bad syntax in attributes:" + `part`)
+        name, expr = m.group(1, 2)
+        if dict.has_key(name):
+            raise TALError("Duplicate attribute name in attributes:" + `part`)
+        dict[name] = expr
+    return dict
+
+def parseSubstitution(arg, position=(None, None)):
+    m = _subst_re.match(arg)
+    if not m:
+        raise TALError("Bad syntax in substitution text: " + `arg`, position)
+    key, expr = m.group(1, 2)
+    if not key:
+        key = "text"
+    return key, expr
+
+def splitParts(arg):
+    # Break in pieces at undoubled semicolons and
+    # change double semicolons to singles:
+    import string
+    arg = string.replace(arg, ";;", "\0")
+    parts = string.split(arg, ';')
+    parts = map(lambda s, repl=string.replace: repl(s, "\0", ";"), parts)
+    if len(parts) > 1 and not string.strip(parts[-1]):
+        del parts[-1] # It ended in a semicolon
+    return parts
+
+def isCurrentVersion(program):
+    version = getProgramVersion(program)
+    return version == TAL_VERSION
+
+def getProgramMode(program):
+    version = getProgramVersion(program)
+    if (version == TAL_VERSION and isinstance(program[1], TupleType) and
+        len(program[1]) == 2):
+        opcode, mode = program[1]
+        if opcode == "mode":
+            return mode
+    return None
+
+def getProgramVersion(program):
+    if (len(program) >= 2 and
+        isinstance(program[0], TupleType) and len(program[0]) == 2):
+        opcode, version = program[0]
+        if opcode == "version":
+            return version
+    return None
+
+import cgi
+def quote(s, escape=cgi.escape):
+    return '"%s"' % escape(s, 1)
+del cgi
diff --git a/TAL/TALGenerator.py b/TAL/TALGenerator.py
new file mode 100644 (file)
index 0000000..0bfadea
--- /dev/null
@@ -0,0 +1,583 @@
+##############################################################################
+#
+# Copyright (c) 2001, 2002 Zope Corporation and Contributors.
+# All Rights Reserved.
+# 
+# This software is subject to the provisions of the Zope Public License,
+# Version 2.0 (ZPL).  A copy of the ZPL should accompany this distribution.
+# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
+# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
+# FOR A PARTICULAR PURPOSE
+# 
+##############################################################################
+"""
+Code generator for TALInterpreter intermediate code.
+"""
+
+import string
+import re
+import cgi
+
+from TALDefs import *
+
+class TALGenerator:
+
+    inMacroUse = 0
+    inMacroDef = 0
+    source_file = None
+    
+    def __init__(self, expressionCompiler=None, xml=1, source_file=None):
+        if not expressionCompiler:
+            from DummyEngine import DummyEngine
+            expressionCompiler = DummyEngine()
+        self.expressionCompiler = expressionCompiler
+        self.CompilerError = expressionCompiler.getCompilerError()
+        self.program = []
+        self.stack = []
+        self.todoStack = []
+        self.macros = {}
+        self.slots = {}
+        self.slotStack = []
+        self.xml = xml
+        self.emit("version", TAL_VERSION)
+        self.emit("mode", xml and "xml" or "html")
+        if source_file is not None:
+            self.source_file = source_file
+            self.emit("setSourceFile", source_file)
+
+    def getCode(self):
+        assert not self.stack
+        assert not self.todoStack
+        return self.optimize(self.program), self.macros
+
+    def optimize(self, program):
+        output = []
+        collect = []
+        rawseen = cursor = 0
+        if self.xml:
+            endsep = "/>"
+        else:
+            endsep = " />"
+        for cursor in xrange(len(program)+1):
+            try:
+                item = program[cursor]
+            except IndexError:
+                item = (None, None)
+            opcode = item[0]
+            if opcode == "rawtext":
+                collect.append(item[1])
+                continue
+            if opcode == "endTag":
+                collect.append("</%s>" % item[1])
+                continue
+            if opcode == "startTag":
+                if self.optimizeStartTag(collect, item[1], item[2], ">"):
+                    continue
+            if opcode == "startEndTag":
+                if self.optimizeStartTag(collect, item[1], item[2], endsep):
+                    continue
+            if opcode in ("beginScope", "endScope"):
+                # Push *Scope instructions in front of any text instructions;
+                # this allows text instructions separated only by *Scope
+                # instructions to be joined together.
+                output.append(self.optimizeArgsList(item))
+                continue
+            text = string.join(collect, "")
+            if text:
+                i = string.rfind(text, "\n")
+                if i >= 0:
+                    i = len(text) - (i + 1)
+                    output.append(("rawtextColumn", (text, i)))
+                else:
+                    output.append(("rawtextOffset", (text, len(text))))
+            if opcode != None:
+                output.append(self.optimizeArgsList(item))
+            rawseen = cursor+1
+            collect = []
+        return self.optimizeCommonTriple(output)
+
+    def optimizeArgsList(self, item):
+        if len(item) == 2:
+            return item
+        else:
+            return item[0], tuple(item[1:])
+
+    actionIndex = {"replace":0, "insert":1, "metal":2, "tal":3, "xmlns":4,
+                   0: 0, 1: 1, 2: 2, 3: 3, 4: 4}
+    def optimizeStartTag(self, collect, name, attrlist, end):
+        if not attrlist:
+            collect.append("<%s%s" % (name, end))
+            return 1
+        opt = 1
+        new = ["<" + name]
+        for i in range(len(attrlist)):
+            item = attrlist[i]
+            if len(item) > 2:
+                opt = 0
+                name, value, action = item[:3]
+                action = self.actionIndex[action]
+                attrlist[i] = (name, value, action) + item[3:]
+            else:
+                if item[1] is None:
+                    s = item[0]
+                else:
+                    s = "%s=%s" % (item[0], quote(item[1]))
+                attrlist[i] = item[0], s
+            if item[1] is None:
+                new.append(" " + item[0])
+            else:
+                new.append(" %s=%s" % (item[0], quote(item[1])))
+        if opt:
+            new.append(end)
+            collect.extend(new)
+        return opt
+
+    def optimizeCommonTriple(self, program):
+        if len(program) < 3:
+            return program
+        output = program[:2]
+        prev2, prev1 = output
+        for item in program[2:]:
+            if (  item[0] == "beginScope"
+                  and prev1[0] == "setPosition"
+                  and prev2[0] == "rawtextColumn"):
+                position = output.pop()[1]
+                text, column = output.pop()[1]
+                prev1 = None, None
+                closeprev = 0
+                if output and output[-1][0] == "endScope":
+                    closeprev = 1
+                    output.pop()
+                item = ("rawtextBeginScope",
+                        (text, column, position, closeprev, item[1]))
+            output.append(item)
+            prev2 = prev1
+            prev1 = item
+        return output
+
+    def todoPush(self, todo):
+        self.todoStack.append(todo)
+
+    def todoPop(self):
+        return self.todoStack.pop()
+
+    def compileExpression(self, expr):
+        try:
+            return self.expressionCompiler.compile(expr)
+        except self.CompilerError, err:
+            raise TALError('%s in expression %s' % (err.args[0], `expr`),
+                           self.position)
+
+    def pushProgram(self):
+        self.stack.append(self.program)
+        self.program = []
+
+    def popProgram(self):
+        program = self.program
+        self.program = self.stack.pop()
+        return self.optimize(program)
+
+    def pushSlots(self):
+        self.slotStack.append(self.slots)
+        self.slots = {}
+
+    def popSlots(self):
+        slots = self.slots
+        self.slots = self.slotStack.pop()
+        return slots
+
+    def emit(self, *instruction):
+        self.program.append(instruction)
+
+    def emitStartTag(self, name, attrlist, isend=0):
+        if isend:
+            opcode = "startEndTag"
+        else:
+            opcode = "startTag"
+        self.emit(opcode, name, attrlist)
+
+    def emitEndTag(self, name):
+        if self.xml and self.program and self.program[-1][0] == "startTag":
+            # Minimize empty element
+            self.program[-1] = ("startEndTag",) + self.program[-1][1:]
+        else:
+            self.emit("endTag", name)
+
+    def emitOptTag(self, name, optTag, isend):
+        program = self.popProgram() #block
+        start = self.popProgram() #start tag
+        if (isend or not program) and self.xml:
+            # Minimize empty element
+            start[-1] = ("startEndTag",) + start[-1][1:]
+            isend = 1
+        cexpr = optTag[0]
+        if cexpr:
+            cexpr = self.compileExpression(optTag[0])
+        self.emit("optTag", name, cexpr, optTag[1], isend, start, program)
+        
+    def emitRawText(self, text):
+        self.emit("rawtext", text)
+
+    def emitText(self, text):
+        self.emitRawText(cgi.escape(text))
+
+    def emitDefines(self, defines):
+        for part in splitParts(defines):
+            m = re.match(
+                r"(?s)\s*(?:(global|local)\s+)?(%s)\s+(.*)\Z" % NAME_RE, part)
+            if not m:
+                raise TALError("invalid define syntax: " + `part`,
+                               self.position)
+            scope, name, expr = m.group(1, 2, 3)
+            scope = scope or "local"
+            cexpr = self.compileExpression(expr)
+            if scope == "local":
+                self.emit("setLocal", name, cexpr)
+            else:
+                self.emit("setGlobal", name, cexpr)
+
+    def emitOnError(self, name, onError):
+        block = self.popProgram()
+        key, expr = parseSubstitution(onError)
+        cexpr = self.compileExpression(expr)
+        if key == "text":
+            self.emit("insertText", cexpr, [])
+        else:
+            assert key == "structure"
+            self.emit("insertStructure", cexpr, {}, [])
+        self.emitEndTag(name)
+        handler = self.popProgram()
+        self.emit("onError", block, handler)
+
+    def emitCondition(self, expr):
+        cexpr = self.compileExpression(expr)
+        program = self.popProgram()
+        self.emit("condition", cexpr, program)
+
+    def emitRepeat(self, arg):
+        m = re.match("(?s)\s*(%s)\s+(.*)\Z" % NAME_RE, arg)
+        if not m:
+            raise TALError("invalid repeat syntax: " + `arg`,
+                           self.position)
+        name, expr = m.group(1, 2)
+        cexpr = self.compileExpression(expr)
+        program = self.popProgram()
+        self.emit("loop", name, cexpr, program)
+
+    def emitSubstitution(self, arg, attrDict={}):
+        key, expr = parseSubstitution(arg)
+        cexpr = self.compileExpression(expr)
+        program = self.popProgram()
+        if key == "text":
+            self.emit("insertText", cexpr, program)
+        else:
+            assert key == "structure"
+            self.emit("insertStructure", cexpr, attrDict, program)
+
+    def emitDefineMacro(self, macroName):
+        program = self.popProgram()
+        macroName = string.strip(macroName)
+        if self.macros.has_key(macroName):
+            raise METALError("duplicate macro definition: %s" % `macroName`,
+                             self.position)
+        if not re.match('%s$' % NAME_RE, macroName):
+            raise METALError("invalid macro name: %s" % `macroName`,
+                             self.position)
+        self.macros[macroName] = program
+        self.inMacroDef = self.inMacroDef - 1
+        self.emit("defineMacro", macroName, program)
+
+    def emitUseMacro(self, expr):
+        cexpr = self.compileExpression(expr)
+        program = self.popProgram()
+        self.inMacroUse = 0
+        self.emit("useMacro", expr, cexpr, self.popSlots(), program)
+
+    def emitDefineSlot(self, slotName):
+        program = self.popProgram()
+        slotName = string.strip(slotName)
+        if not re.match('%s$' % NAME_RE, slotName):
+            raise METALError("invalid slot name: %s" % `slotName`,
+                             self.position)
+        self.emit("defineSlot", slotName, program)
+
+    def emitFillSlot(self, slotName):
+        program = self.popProgram()
+        slotName = string.strip(slotName)
+        if self.slots.has_key(slotName):
+            raise METALError("duplicate fill-slot name: %s" % `slotName`,
+                             self.position)
+        if not re.match('%s$' % NAME_RE, slotName):
+            raise METALError("invalid slot name: %s" % `slotName`,
+                             self.position)
+        self.slots[slotName] = program
+        self.inMacroUse = 1
+        self.emit("fillSlot", slotName, program)
+
+    def unEmitWhitespace(self):
+        collect = []
+        i = len(self.program) - 1
+        while i >= 0:
+            item = self.program[i]
+            if item[0] != "rawtext":
+                break
+            text = item[1]
+            if not re.match(r"\A\s*\Z", text):
+                break
+            collect.append(text)
+            i = i-1
+        del self.program[i+1:]
+        if i >= 0 and self.program[i][0] == "rawtext":
+            text = self.program[i][1]
+            m = re.search(r"\s+\Z", text)
+            if m:
+                self.program[i] = ("rawtext", text[:m.start()])
+                collect.append(m.group())
+        collect.reverse()
+        return string.join(collect, "")
+
+    def unEmitNewlineWhitespace(self):
+        collect = []
+        i = len(self.program)
+        while i > 0:
+            i = i-1
+            item = self.program[i]
+            if item[0] != "rawtext":
+                break
+            text = item[1]
+            if re.match(r"\A[ \t]*\Z", text):
+                collect.append(text)
+                continue
+            m = re.match(r"(?s)^(.*)(\n[ \t]*)\Z", text)
+            if not m:
+                break
+            text, rest = m.group(1, 2)
+            collect.reverse()
+            rest = rest + string.join(collect, "")
+            del self.program[i:]
+            if text:
+                self.emit("rawtext", text)
+            return rest
+        return None
+
+    def replaceAttrs(self, attrlist, repldict):
+        if not repldict:
+            return attrlist
+        newlist = []
+        for item in attrlist:
+            key = item[0]
+            if repldict.has_key(key):
+                item = item[:2] + ("replace", repldict[key])
+                del repldict[key]
+            newlist.append(item)
+        for key, value in repldict.items(): # Add dynamic-only attributes
+            item = (key, None, "insert", value)
+            newlist.append(item)
+        return newlist
+
+    def emitStartElement(self, name, attrlist, taldict, metaldict,
+                         position=(None, None), isend=0):
+        if not taldict and not metaldict:
+            # Handle the simple, common case
+            self.emitStartTag(name, attrlist, isend)
+            self.todoPush({})
+            if isend:
+                self.emitEndElement(name, isend)
+            return
+
+        self.position = position
+        for key, value in taldict.items():
+            if key not in KNOWN_TAL_ATTRIBUTES:
+                raise TALError("bad TAL attribute: " + `key`, position)
+            if not (value or key == 'omit-tag'):
+                raise TALError("missing value for TAL attribute: " +
+                               `key`, position)
+        for key, value in metaldict.items():
+            if key not in KNOWN_METAL_ATTRIBUTES:
+                raise METALError("bad METAL attribute: " + `key`,
+                position)
+            if not value:
+                raise TALError("missing value for METAL attribute: " +
+                               `key`, position)
+        todo = {}
+        defineMacro = metaldict.get("define-macro")
+        useMacro = metaldict.get("use-macro")
+        defineSlot = metaldict.get("define-slot")
+        fillSlot = metaldict.get("fill-slot")
+        define = taldict.get("define")
+        condition = taldict.get("condition")
+        repeat = taldict.get("repeat")
+        content = taldict.get("content")
+        replace = taldict.get("replace")
+        attrsubst = taldict.get("attributes")
+        onError = taldict.get("on-error")
+        omitTag = taldict.get("omit-tag")
+        TALtag = taldict.get("tal tag")
+        if len(metaldict) > 1 and (defineMacro or useMacro):
+            raise METALError("define-macro and use-macro cannot be used "
+                             "together or with define-slot or fill-slot",
+                             position)
+        if content and replace:
+            raise TALError("content and replace are mutually exclusive",
+                           position)
+
+        repeatWhitespace = None
+        if repeat:
+            # Hack to include preceding whitespace in the loop program
+            repeatWhitespace = self.unEmitNewlineWhitespace()
+        if position != (None, None):
+            # XXX at some point we should insist on a non-trivial position
+            self.emit("setPosition", position)
+        if self.inMacroUse:
+            if fillSlot:
+                self.pushProgram()
+                if self.source_file is not None:
+                    self.emit("setSourceFile", self.source_file)
+                todo["fillSlot"] = fillSlot
+                self.inMacroUse = 0
+        else:
+            if fillSlot:
+                raise METALError, ("fill-slot must be within a use-macro",
+                                   position)
+        if not self.inMacroUse:
+            if defineMacro:
+                self.pushProgram()
+                self.emit("version", TAL_VERSION)
+                self.emit("mode", self.xml and "xml" or "html")
+                if self.source_file is not None:
+                    self.emit("setSourceFile", self.source_file)
+                todo["defineMacro"] = defineMacro
+                self.inMacroDef = self.inMacroDef + 1
+            if useMacro:
+                self.pushSlots()
+                self.pushProgram()
+                todo["useMacro"] = useMacro
+                self.inMacroUse = 1
+            if defineSlot:
+                if not self.inMacroDef:
+                    raise METALError, (
+                        "define-slot must be within a define-macro",
+                        position)
+                self.pushProgram()
+                todo["defineSlot"] = defineSlot
+
+        if taldict:
+            dict = {}
+            for item in attrlist:
+                key, value = item[:2]
+                dict[key] = value
+            self.emit("beginScope", dict)
+            todo["scope"] = 1
+        if onError:
+            self.pushProgram() # handler
+            self.emitStartTag(name, list(attrlist)) # Must copy attrlist!
+            self.pushProgram() # block
+            todo["onError"] = onError
+        if define:
+            self.emitDefines(define)
+            todo["define"] = define
+        if condition:
+            self.pushProgram()
+            todo["condition"] = condition
+        if repeat:
+            todo["repeat"] = repeat
+            self.pushProgram()
+            if repeatWhitespace:
+                self.emitText(repeatWhitespace)
+        if content:
+            todo["content"] = content
+        if replace:
+            todo["replace"] = replace
+            self.pushProgram()
+        optTag = omitTag is not None or TALtag
+        if optTag:
+            todo["optional tag"] = omitTag, TALtag
+            self.pushProgram()
+        if attrsubst:
+            repldict = parseAttributeReplacements(attrsubst)
+            for key, value in repldict.items():
+                repldict[key] = self.compileExpression(value)
+        else:
+            repldict = {}
+        if replace:
+            todo["repldict"] = repldict
+            repldict = {}
+        self.emitStartTag(name, self.replaceAttrs(attrlist, repldict), isend)
+        if optTag:
+            self.pushProgram()
+        if content:
+            self.pushProgram()
+        if todo and position != (None, None):
+            todo["position"] = position
+        self.todoPush(todo)
+        if isend:
+            self.emitEndElement(name, isend)
+
+    def emitEndElement(self, name, isend=0, implied=0):
+        todo = self.todoPop()
+        if not todo:
+            # Shortcut
+            if not isend:
+                self.emitEndTag(name)
+            return
+
+        self.position = position = todo.get("position", (None, None))
+        defineMacro = todo.get("defineMacro")
+        useMacro = todo.get("useMacro")
+        defineSlot = todo.get("defineSlot")
+        fillSlot = todo.get("fillSlot")
+        repeat = todo.get("repeat")
+        content = todo.get("content")
+        replace = todo.get("replace")
+        condition = todo.get("condition")
+        onError = todo.get("onError")
+        define = todo.get("define")
+        repldict = todo.get("repldict", {})
+        scope = todo.get("scope")
+        optTag = todo.get("optional tag")
+
+        if implied > 0:
+            if defineMacro or useMacro or defineSlot or fillSlot:
+                exc = METALError
+                what = "METAL"
+            else:
+                exc = TALError
+                what = "TAL"
+            raise exc("%s attributes on <%s> require explicit </%s>" %
+                      (what, name, name), position)
+
+        if content:
+            self.emitSubstitution(content, {})
+        if optTag:
+            self.emitOptTag(name, optTag, isend)
+        elif not isend:
+            self.emitEndTag(name)
+        if replace:
+            self.emitSubstitution(replace, repldict)
+        if repeat:
+            self.emitRepeat(repeat)
+        if condition:
+            self.emitCondition(condition)
+        if onError:
+            self.emitOnError(name, onError)
+        if scope:
+            self.emit("endScope")
+        if defineSlot:
+            self.emitDefineSlot(defineSlot)
+        if fillSlot:
+            self.emitFillSlot(fillSlot)
+        if useMacro:
+            self.emitUseMacro(useMacro)
+        if defineMacro:
+            self.emitDefineMacro(defineMacro)
+
+def test():
+    t = TALGenerator()
+    t.pushProgram()
+    t.emit("bar")
+    p = t.popProgram()
+    t.emit("foo", p)
+
+if __name__ == "__main__":
+    test()
diff --git a/TAL/TALInterpreter.py b/TAL/TALInterpreter.py
new file mode 100644 (file)
index 0000000..0f42284
--- /dev/null
@@ -0,0 +1,626 @@
+##############################################################################
+#
+# Copyright (c) 2001, 2002 Zope Corporation and Contributors.
+# All Rights Reserved.
+# 
+# This software is subject to the provisions of the Zope Public License,
+# Version 2.0 (ZPL).  A copy of the ZPL should accompany this distribution.
+# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
+# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
+# FOR A PARTICULAR PURPOSE
+# 
+##############################################################################
+"""
+Interpreter for a pre-compiled TAL program.
+"""
+
+import sys
+import getopt
+
+from cgi import escape
+from string import join, lower, rfind
+try:
+    from strop import lower, rfind
+except ImportError:
+    pass
+
+try:
+    from cStringIO import StringIO
+except ImportError:
+    from StringIO import StringIO
+
+from TALDefs import quote, TAL_VERSION, TALError, METALError
+from TALDefs import isCurrentVersion, getProgramVersion, getProgramMode
+from TALGenerator import TALGenerator
+
+BOOLEAN_HTML_ATTRS = [
+    # List of Boolean attributes in HTML that should be rendered in
+    # minimized form (e.g. <img ismap> rather than <img ismap="">)
+    # From http://www.w3.org/TR/xhtml1/#guidelines (C.10)
+    # XXX The problem with this is that this is not valid XML and
+    # can't be parsed back!
+    "compact", "nowrap", "ismap", "declare", "noshade", "checked",
+    "disabled", "readonly", "multiple", "selected", "noresize",
+    "defer"
+]
+
+EMPTY_HTML_TAGS = [
+    # List of HTML tags with an empty content model; these are
+    # rendered in minimized form, e.g. <img />.
+    # From http://www.w3.org/TR/xhtml1/#dtds
+    "base", "meta", "link", "hr", "br", "param", "img", "area",
+    "input", "col", "basefont", "isindex", "frame",
+]
+
+class AltTALGenerator(TALGenerator):
+
+    def __init__(self, repldict, expressionCompiler=None, xml=0):
+        self.repldict = repldict
+        self.enabled = 1
+        TALGenerator.__init__(self, expressionCompiler, xml)
+
+    def enable(self, enabled):
+        self.enabled = enabled
+
+    def emit(self, *args):
+        if self.enabled:
+            apply(TALGenerator.emit, (self,) + args)
+
+    def emitStartElement(self, name, attrlist, taldict, metaldict,
+                         position=(None, None), isend=0):
+        metaldict = {}
+        taldict = {}
+        if self.enabled and self.repldict:
+            taldict["attributes"] = "x x"
+        TALGenerator.emitStartElement(self, name, attrlist,
+                                      taldict, metaldict, position, isend)
+
+    def replaceAttrs(self, attrlist, repldict):
+        if self.enabled and self.repldict:
+            repldict = self.repldict
+            self.repldict = None
+        return TALGenerator.replaceAttrs(self, attrlist, repldict)
+
+
+class TALInterpreter:
+
+    def __init__(self, program, macros, engine, stream=None,
+                 debug=0, wrap=60, metal=1, tal=1, showtal=-1,
+                 strictinsert=1, stackLimit=100):
+        self.program = program
+        self.macros = macros
+        self.engine = engine
+        self.Default = engine.getDefault()
+        self.stream = stream or sys.stdout
+        self._stream_write = self.stream.write
+        self.debug = debug
+        self.wrap = wrap
+        self.metal = metal
+        self.tal = tal
+        if tal:
+            self.dispatch = self.bytecode_handlers_tal
+        else:
+            self.dispatch = self.bytecode_handlers
+        assert showtal in (-1, 0, 1)
+        if showtal == -1:
+            showtal = (not tal)
+        self.showtal = showtal
+        self.strictinsert = strictinsert
+        self.stackLimit = stackLimit
+        self.html = 0
+        self.endsep = "/>"
+        self.endlen = len(self.endsep)
+        self.macroStack = []
+        self.popMacro = self.macroStack.pop
+        self.position = None, None  # (lineno, offset)
+        self.col = 0
+        self.level = 0
+        self.scopeLevel = 0
+        self.sourceFile = None
+
+    def saveState(self):
+        return (self.position, self.col, self.stream,
+                self.scopeLevel, self.level)
+
+    def restoreState(self, state):
+        (self.position, self.col, self.stream, scopeLevel, level) = state
+        self._stream_write = self.stream.write
+        assert self.level == level
+        while self.scopeLevel > scopeLevel:
+            self.engine.endScope()
+            self.scopeLevel = self.scopeLevel - 1
+        self.engine.setPosition(self.position)
+
+    def restoreOutputState(self, state):
+        (dummy, self.col, self.stream, scopeLevel, level) = state
+        self._stream_write = self.stream.write
+        assert self.level == level
+        assert self.scopeLevel == scopeLevel
+
+    def pushMacro(self, macroName, slots, entering=1):
+        if len(self.macroStack) >= self.stackLimit:
+            raise METALError("macro nesting limit (%d) exceeded "
+                             "by %s" % (self.stackLimit, `macroName`))
+        self.macroStack.append([macroName, slots, entering])
+
+    def macroContext(self, what):
+        macroStack = self.macroStack
+        i = len(macroStack)
+        while i > 0:
+            i = i-1
+            if macroStack[i][0] == what:
+                return i
+        return -1
+
+    def __call__(self):
+        assert self.level == 0
+        assert self.scopeLevel == 0
+        self.interpret(self.program)
+        assert self.level == 0
+        assert self.scopeLevel == 0
+        if self.col > 0:
+            self._stream_write("\n")
+            self.col = 0
+
+    def stream_write(self, s,
+                     len=len, rfind=rfind):
+        self._stream_write(s)
+        i = rfind(s, '\n')
+        if i < 0:
+            self.col = self.col + len(s)
+        else:
+            self.col = len(s) - (i + 1)
+
+    bytecode_handlers = {}
+
+    def interpret(self, program, None=None):
+        oldlevel = self.level
+        self.level = oldlevel + 1
+        handlers = self.dispatch
+        try:
+            if self.debug:
+                for (opcode, args) in program:
+                    s = "%sdo_%s%s\n" % ("    "*self.level, opcode,
+                                      repr(args))
+                    if len(s) > 80:
+                        s = s[:76] + "...\n"
+                    sys.stderr.write(s)
+                    handlers[opcode](self, args)
+            else:
+                for (opcode, args) in program:
+                    handlers[opcode](self, args)
+        finally:
+            self.level = oldlevel
+
+    def do_version(self, version):
+        assert version == TAL_VERSION
+    bytecode_handlers["version"] = do_version
+
+    def do_mode(self, mode):
+        assert mode in ("html", "xml")
+        self.html = (mode == "html")
+        if self.html:
+            self.endsep = " />"
+        else:
+            self.endsep = "/>"
+        self.endlen = len(self.endsep)
+    bytecode_handlers["mode"] = do_mode
+
+    def do_setSourceFile(self, source_file):
+        self.sourceFile = source_file
+        self.engine.setSourceFile(source_file)
+    bytecode_handlers["setSourceFile"] = do_setSourceFile
+
+    def do_setPosition(self, position):
+        self.position = position
+        self.engine.setPosition(position)
+    bytecode_handlers["setPosition"] = do_setPosition
+
+    def do_startEndTag(self, stuff):
+        self.do_startTag(stuff, self.endsep, self.endlen)
+    bytecode_handlers["startEndTag"] = do_startEndTag
+
+    def do_startTag(self, (name, attrList),
+                    end=">", endlen=1, _len=len):
+        # The bytecode generator does not cause calls to this method
+        # for start tags with no attributes; those are optimized down
+        # to rawtext events.  Hence, there is no special "fast path"
+        # for that case.
+        _stream_write = self._stream_write
+        _stream_write("<" + name)
+        namelen = _len(name)
+        col = self.col + namelen + 1
+        wrap = self.wrap
+        align = col + 1
+        if align >= wrap/2:
+            align = 4  # Avoid a narrow column far to the right
+        attrAction = self.dispatch["<attrAction>"]
+        try:
+            for item in attrList:
+                if _len(item) == 2:
+                    name, s = item
+                else:
+                    ok, name, s = attrAction(self, item)
+                    if not ok:
+                        continue
+                slen = _len(s)
+                if (wrap and
+                    col >= align and
+                    col + 1 + slen > wrap):
+                    _stream_write("\n" + " "*align)
+                    col = align + slen
+                else:
+                    s = " " + s
+                    col = col + 1 + slen
+                _stream_write(s)
+            _stream_write(end)
+            col = col + endlen
+        finally:
+            self.col = col
+    bytecode_handlers["startTag"] = do_startTag
+
+    def attrAction(self, item):
+        name, value, action = item[:3]
+        if action == 1 or (action > 1 and not self.showtal):
+            return 0, name, value
+        macs = self.macroStack
+        if action == 2 and self.metal and macs:
+            if len(macs) > 1 or not macs[-1][2]:
+                # Drop all METAL attributes at a use-depth above one.
+                return 0, name, value
+            # Clear 'entering' flag
+            macs[-1][2] = 0
+            # Convert or drop depth-one METAL attributes.
+            i = rfind(name, ":") + 1
+            prefix, suffix = name[:i], name[i:]
+            if suffix == "define-macro":
+                # Convert define-macro as we enter depth one.
+                name = prefix + "use-macro"
+                value = macs[-1][0] # Macro name
+            elif suffix == "define-slot":
+                name = prefix + "slot"
+            elif suffix == "fill-slot":
+                pass
+            else:
+                return 0, name, value
+
+        if value is None:
+            value = name
+        else:
+            value = "%s=%s" % (name, quote(value))
+        return 1, name, value
+
+    def attrAction_tal(self, item):
+        name, value, action = item[:3]
+        if action > 1:
+            return self.attrAction(item)
+        ok = 1
+        if self.html and lower(name) in BOOLEAN_HTML_ATTRS:
+            evalue = self.engine.evaluateBoolean(item[3])
+            if evalue is self.Default:
+                if action == 1: # Cancelled insert
+                    ok = 0
+            elif evalue:
+                value = None
+            else:
+                ok = 0
+        else:
+            evalue = self.engine.evaluateText(item[3])
+            if evalue is self.Default:
+                if action == 1: # Cancelled insert
+                    ok = 0
+            else:
+                if evalue is None:
+                    ok = 0
+                value = evalue
+        if ok:
+            if value is None:
+                value = name
+            value = "%s=%s" % (name, quote(value))
+        return ok, name, value
+
+    bytecode_handlers["<attrAction>"] = attrAction
+
+    def no_tag(self, start, program):
+        state = self.saveState()
+        self.stream = stream = StringIO()
+        self._stream_write = stream.write
+        self.interpret(start)
+        self.restoreOutputState(state)
+        self.interpret(program)
+
+    def do_optTag(self, (name, cexpr, tag_ns, isend, start, program),
+                  omit=0):
+        if tag_ns and not self.showtal:
+            return self.no_tag(start, program)
+            
+        self.interpret(start)
+        if not isend:
+            self.interpret(program)
+            s = '</%s>' % name
+            self._stream_write(s)
+            self.col = self.col + len(s)
+
+    def do_optTag_tal(self, stuff):
+        cexpr = stuff[1]
+        if cexpr is not None and (cexpr == '' or
+                                  self.engine.evaluateBoolean(cexpr)):
+            self.no_tag(stuff[-2], stuff[-1])
+        else:
+            self.do_optTag(stuff)
+    bytecode_handlers["optTag"] = do_optTag
+
+    def dumpMacroStack(self, prefix, suffix, value):
+        sys.stderr.write("+---- %s%s = %s\n" % (prefix, suffix, value))
+        for i in range(len(self.macroStack)):
+            what, macroName, slots = self.macroStack[i]
+            sys.stderr.write("| %2d. %-12s %-12s %s\n" %
+                             (i, what, macroName, slots and slots.keys()))
+        sys.stderr.write("+--------------------------------------\n")
+
+    def do_rawtextBeginScope(self, (s, col, position, closeprev, dict)):
+        self._stream_write(s)
+        self.col = col
+        self.do_setPosition(position)
+        if closeprev:
+            engine = self.engine
+            engine.endScope()
+            engine.beginScope()
+        else:
+            self.engine.beginScope()
+            self.scopeLevel = self.scopeLevel + 1
+
+    def do_rawtextBeginScope_tal(self, (s, col, position, closeprev, dict)):
+        self._stream_write(s)
+        self.col = col
+        self.do_setPosition(position)
+        engine = self.engine
+        if closeprev:
+            engine.endScope()
+            engine.beginScope()
+        else:
+            engine.beginScope()
+            self.scopeLevel = self.scopeLevel + 1
+        engine.setLocal("attrs", dict)
+    bytecode_handlers["rawtextBeginScope"] = do_rawtextBeginScope
+
+    def do_beginScope(self, dict):
+        self.engine.beginScope()
+        self.scopeLevel = self.scopeLevel + 1
+
+    def do_beginScope_tal(self, dict):
+        engine = self.engine
+        engine.beginScope()
+        engine.setLocal("attrs", dict)
+        self.scopeLevel = self.scopeLevel + 1
+    bytecode_handlers["beginScope"] = do_beginScope
+
+    def do_endScope(self, notused=None):
+        self.engine.endScope()
+        self.scopeLevel = self.scopeLevel - 1
+    bytecode_handlers["endScope"] = do_endScope
+
+    def do_setLocal(self, notused):
+        pass
+
+    def do_setLocal_tal(self, (name, expr)):
+        self.engine.setLocal(name, self.engine.evaluateValue(expr))
+    bytecode_handlers["setLocal"] = do_setLocal
+
+    def do_setGlobal_tal(self, (name, expr)):
+        self.engine.setGlobal(name, self.engine.evaluateValue(expr))
+    bytecode_handlers["setGlobal"] = do_setLocal
+
+    def do_insertText(self, stuff):
+        self.interpret(stuff[1])
+
+    def do_insertText_tal(self, stuff):
+        text = self.engine.evaluateText(stuff[0])
+        if text is None:
+            return
+        if text is self.Default:
+            self.interpret(stuff[1])
+            return
+        s = escape(text)
+        self._stream_write(s)
+        i = rfind(s, '\n')
+        if i < 0:
+            self.col = self.col + len(s)
+        else:
+            self.col = len(s) - (i + 1)
+    bytecode_handlers["insertText"] = do_insertText
+
+    def do_insertStructure(self, stuff):
+        self.interpret(stuff[2])
+
+    def do_insertStructure_tal(self, (expr, repldict, block)):
+        structure = self.engine.evaluateStructure(expr)
+        if structure is None:
+            return
+        if structure is self.Default:
+            self.interpret(block)
+            return
+        text = str(structure)
+        if not (repldict or self.strictinsert):
+            # Take a shortcut, no error checking
+            self.stream_write(text)
+            return
+        if self.html:
+            self.insertHTMLStructure(text, repldict)
+        else:
+            self.insertXMLStructure(text, repldict)
+    bytecode_handlers["insertStructure"] = do_insertStructure
+
+    def insertHTMLStructure(self, text, repldict):
+        from HTMLTALParser import HTMLTALParser
+        gen = AltTALGenerator(repldict, self.engine, 0)
+        p = HTMLTALParser(gen) # Raises an exception if text is invalid
+        p.parseString(text)
+        program, macros = p.getCode()
+        self.interpret(program)
+
+    def insertXMLStructure(self, text, repldict):
+        from TALParser import TALParser
+        gen = AltTALGenerator(repldict, self.engine, 0)
+        p = TALParser(gen)
+        gen.enable(0)
+        p.parseFragment('<!DOCTYPE foo PUBLIC "foo" "bar"><foo>')
+        gen.enable(1)
+        p.parseFragment(text) # Raises an exception if text is invalid
+        gen.enable(0)
+        p.parseFragment('</foo>', 1)
+        program, macros = gen.getCode()
+        self.interpret(program)
+
+    def do_loop(self, (name, expr, block)):
+        self.interpret(block)
+
+    def do_loop_tal(self, (name, expr, block)):
+        iterator = self.engine.setRepeat(name, expr)
+        while iterator.next():
+            self.interpret(block)
+    bytecode_handlers["loop"] = do_loop
+
+    def do_rawtextColumn(self, (s, col)):
+        self._stream_write(s)
+        self.col = col
+    bytecode_handlers["rawtextColumn"] = do_rawtextColumn
+
+    def do_rawtextOffset(self, (s, offset)):
+        self._stream_write(s)
+        self.col = self.col + offset
+    bytecode_handlers["rawtextOffset"] = do_rawtextOffset
+
+    def do_condition(self, (condition, block)):
+        if not self.tal or self.engine.evaluateBoolean(condition):
+            self.interpret(block)
+    bytecode_handlers["condition"] = do_condition
+
+    def do_defineMacro(self, (macroName, macro)):
+        macs = self.macroStack
+        if len(macs) == 1:
+            entering = macs[-1][2]
+            if not entering:
+                macs.append(None)
+                self.interpret(macro)
+                macs.pop()
+                return
+        self.interpret(macro)
+    bytecode_handlers["defineMacro"] = do_defineMacro
+
+    def do_useMacro(self, (macroName, macroExpr, compiledSlots, block)):
+        if not self.metal:
+            self.interpret(block)
+            return
+        macro = self.engine.evaluateMacro(macroExpr)
+        if macro is self.Default:
+            macro = block
+        else:
+            if not isCurrentVersion(macro):
+                raise METALError("macro %s has incompatible version %s" %
+                                 (`macroName`, `getProgramVersion(macro)`),
+                                 self.position)
+            mode = getProgramMode(macro)
+            if mode != (self.html and "html" or "xml"):
+                raise METALError("macro %s has incompatible mode %s" %
+                                 (`macroName`, `mode`), self.position)
+        self.pushMacro(macroName, compiledSlots)
+        saved_source = self.sourceFile
+        saved_position = self.position  # Used by Boa Constructor
+        self.interpret(macro)
+        if self.sourceFile != saved_source:
+            self.engine.setSourceFile(saved_source)
+            self.sourceFile = saved_source
+        self.popMacro()
+    bytecode_handlers["useMacro"] = do_useMacro
+
+    def do_fillSlot(self, (slotName, block)):
+        # This is only executed if the enclosing 'use-macro' evaluates
+        # to 'default'.
+        self.interpret(block)
+    bytecode_handlers["fillSlot"] = do_fillSlot
+
+    def do_defineSlot(self, (slotName, block)):
+        if not self.metal:
+            self.interpret(block)
+            return
+        macs = self.macroStack
+        if macs and macs[-1] is not None:
+            saved_source = self.sourceFile
+            saved_position = self.position  # Used by Boa Constructor
+            macroName, slots = self.popMacro()[:2]
+            slot = slots.get(slotName)
+            if slot is not None:
+                self.interpret(slot)
+                if self.sourceFile != saved_source:
+                    self.engine.setSourceFile(saved_source)
+                    self.sourceFile = saved_source
+                self.pushMacro(macroName, slots, entering=0)
+                return
+            self.pushMacro(macroName, slots)
+            if len(macs) == 1:
+                self.interpret(block)
+                return
+        self.interpret(block)
+    bytecode_handlers["defineSlot"] = do_defineSlot
+
+    def do_onError(self, (block, handler)):
+        self.interpret(block)
+
+    def do_onError_tal(self, (block, handler)):
+        state = self.saveState()
+        self.stream = stream = StringIO()
+        self._stream_write = stream.write
+        try:
+            self.interpret(block)
+        except:
+            exc = sys.exc_info()[1]
+            self.restoreState(state)
+            engine = self.engine
+            engine.beginScope()
+            error = engine.createErrorInfo(exc, self.position)
+            engine.setLocal('error', error)
+            try:
+                self.interpret(handler)
+            finally:
+                engine.endScope()
+        else:
+            self.restoreOutputState(state)
+            self.stream_write(stream.getvalue())
+    bytecode_handlers["onError"] = do_onError
+
+    bytecode_handlers_tal = bytecode_handlers.copy()
+    bytecode_handlers_tal["rawtextBeginScope"] = do_rawtextBeginScope_tal
+    bytecode_handlers_tal["beginScope"] = do_beginScope_tal
+    bytecode_handlers_tal["setLocal"] = do_setLocal_tal
+    bytecode_handlers_tal["setGlobal"] = do_setGlobal_tal
+    bytecode_handlers_tal["insertStructure"] = do_insertStructure_tal
+    bytecode_handlers_tal["insertText"] = do_insertText_tal
+    bytecode_handlers_tal["loop"] = do_loop_tal
+    bytecode_handlers_tal["onError"] = do_onError_tal
+    bytecode_handlers_tal["<attrAction>"] = attrAction_tal
+    bytecode_handlers_tal["optTag"] = do_optTag_tal
+
+
+def test():
+    from driver import FILE, parsefile
+    from DummyEngine import DummyEngine
+    try:
+        opts, args = getopt.getopt(sys.argv[1:], "")
+    except getopt.error, msg:
+        print msg
+        sys.exit(2)
+    if args:
+        file = args[0]
+    else:
+        file = FILE
+    doc = parsefile(file)
+    compiler = TALCompiler(doc)
+    program, macros = compiler()
+    engine = DummyEngine()
+    interpreter = TALInterpreter(program, macros, engine)
+    interpreter()
+
+if __name__ == "__main__":
+    test()
diff --git a/TAL/TALParser.py b/TAL/TALParser.py
new file mode 100644 (file)
index 0000000..f75414e
--- /dev/null
@@ -0,0 +1,137 @@
+##############################################################################
+#
+# Copyright (c) 2001, 2002 Zope Corporation and Contributors.
+# All Rights Reserved.
+# 
+# This software is subject to the provisions of the Zope Public License,
+# Version 2.0 (ZPL).  A copy of the ZPL should accompany this distribution.
+# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
+# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
+# FOR A PARTICULAR PURPOSE
+# 
+##############################################################################
+"""
+Parse XML and compile to TALInterpreter intermediate code.
+"""
+
+import string
+from XMLParser import XMLParser
+from TALDefs import *
+from TALGenerator import TALGenerator
+
+class TALParser(XMLParser):
+
+    ordered_attributes = 1
+
+    def __init__(self, gen=None): # Override
+        XMLParser.__init__(self)
+        if gen is None:
+            gen = TALGenerator()
+        self.gen = gen
+        self.nsStack = []
+        self.nsDict = {XML_NS: 'xml'}
+        self.nsNew = []
+
+    def getCode(self):
+        return self.gen.getCode()
+
+    def getWarnings(self):
+        return ()
+
+    def StartNamespaceDeclHandler(self, prefix, uri):
+        self.nsStack.append(self.nsDict.copy())
+        self.nsDict[uri] = prefix
+        self.nsNew.append((prefix, uri))
+
+    def EndNamespaceDeclHandler(self, prefix):
+        self.nsDict = self.nsStack.pop()
+
+    def StartElementHandler(self, name, attrs):
+        if self.ordered_attributes:
+            # attrs is a list of alternating names and values
+            attrlist = []
+            for i in range(0, len(attrs), 2):
+                key = attrs[i]
+                value = attrs[i+1]
+                attrlist.append((key, value))
+        else:
+            # attrs is a dict of {name: value}
+            attrlist = attrs.items()
+            attrlist.sort() # For definiteness
+        name, attrlist, taldict, metaldict = self.process_ns(name, attrlist)
+        attrlist = self.xmlnsattrs() + attrlist
+        self.gen.emitStartElement(name, attrlist, taldict, metaldict)
+
+    def process_ns(self, name, attrlist):
+        taldict = {}
+        metaldict = {}
+        fixedattrlist = []
+        name, namebase, namens = self.fixname(name)
+        for key, value in attrlist:
+            key, keybase, keyns = self.fixname(key)
+            ns = keyns or namens # default to tag namespace
+            item = key, value
+            if ns == 'metal':
+                metaldict[keybase] = value
+                item = item + ("metal",)
+            elif ns == 'tal':
+                taldict[keybase] = value
+                item = item + ("tal",)
+            fixedattrlist.append(item)
+        if namens in ('metal', 'tal'):
+            taldict['tal tag'] = namens
+        return name, fixedattrlist, taldict, metaldict
+
+    def xmlnsattrs(self):
+        newlist = []
+        for prefix, uri in self.nsNew:
+            if prefix:
+                key = "xmlns:" + prefix
+            else:
+                key = "xmlns"
+            if uri in (ZOPE_METAL_NS, ZOPE_TAL_NS):
+                item = (key, uri, "xmlns")
+            else:
+                item = (key, uri)
+            newlist.append(item)
+        self.nsNew = []
+        return newlist
+
+    def fixname(self, name):
+        if ' ' in name:
+            uri, name = string.split(name, ' ')
+            prefix = self.nsDict[uri]
+            prefixed = name
+            if prefix:
+                prefixed = "%s:%s" % (prefix, name)
+            ns = 'x'
+            if uri == ZOPE_TAL_NS:
+                ns = 'tal'
+            elif uri == ZOPE_METAL_NS:
+                ns = 'metal'
+            return (prefixed, name, ns)
+        return (name, name, None)
+
+    def EndElementHandler(self, name):
+        name = self.fixname(name)[0]
+        self.gen.emitEndElement(name)
+
+    def DefaultHandler(self, text):
+        self.gen.emitRawText(text)
+
+def test():
+    import sys
+    p = TALParser()
+    file = "tests/input/test01.xml"
+    if sys.argv[1:]:
+        file = sys.argv[1]
+    p.parseFile(file)
+    program, macros = p.getCode()
+    from TALInterpreter import TALInterpreter
+    from DummyEngine import DummyEngine
+    engine = DummyEngine(macros)
+    TALInterpreter(program, macros, engine, sys.stdout, wrap=0)()
+
+if __name__ == "__main__":
+    test()
diff --git a/TAL/XMLParser.py b/TAL/XMLParser.py
new file mode 100644 (file)
index 0000000..71a65ab
--- /dev/null
@@ -0,0 +1,90 @@
+##############################################################################
+#
+# Copyright (c) 2001, 2002 Zope Corporation and Contributors.
+# All Rights Reserved.
+# 
+# This software is subject to the provisions of the Zope Public License,
+# Version 2.0 (ZPL).  A copy of the ZPL should accompany this distribution.
+# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
+# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
+# FOR A PARTICULAR PURPOSE
+# 
+##############################################################################
+"""
+Generic expat-based XML parser base class.
+"""
+
+import zLOG
+
+class XMLParser:
+
+    ordered_attributes = 0
+
+    handler_names = [
+        "StartElementHandler",
+        "EndElementHandler",
+        "ProcessingInstructionHandler",
+        "CharacterDataHandler",
+        "UnparsedEntityDeclHandler",
+        "NotationDeclHandler",
+        "StartNamespaceDeclHandler",
+        "EndNamespaceDeclHandler",
+        "CommentHandler",
+        "StartCdataSectionHandler",
+        "EndCdataSectionHandler",
+        "DefaultHandler",
+        "DefaultHandlerExpand",
+        "NotStandaloneHandler",
+        "ExternalEntityRefHandler",
+        "XmlDeclHandler",
+        "StartDoctypeDeclHandler",
+        "EndDoctypeDeclHandler",
+        "ElementDeclHandler",
+        "AttlistDeclHandler"
+        ]
+
+    def __init__(self, encoding=None):
+        self.parser = p = self.createParser()
+        if self.ordered_attributes:
+            try:
+                self.parser.ordered_attributes = self.ordered_attributes
+            except AttributeError:
+                zLOG.LOG("TAL.XMLParser", zLOG.INFO, 
+                         "Can't set ordered_attributes")
+                self.ordered_attributes = 0
+        for name in self.handler_names:
+            method = getattr(self, name, None)
+            if method is not None:
+                try:
+                    setattr(p, name, method)
+                except AttributeError:
+                    zLOG.LOG("TAL.XMLParser", zLOG.PROBLEM,
+                             "Can't set expat handler %s" % name)
+
+    def createParser(self, encoding=None):
+        global XMLParseError
+        try:
+            from Products.ParsedXML.Expat import pyexpat
+            XMLParseError = pyexpat.ExpatError
+            return pyexpat.ParserCreate(encoding, ' ')
+        except ImportError:
+            from xml.parsers import expat
+            XMLParseError = expat.ExpatError
+            return expat.ParserCreate(encoding, ' ')
+
+    def parseFile(self, filename):
+        self.parseStream(open(filename))
+
+    def parseString(self, s):
+        self.parser.Parse(s, 1)
+
+    def parseURL(self, url):
+        import urllib
+        self.parseStream(urllib.urlopen(url))
+
+    def parseStream(self, stream):
+        self.parser.ParseFile(stream)
+
+    def parseFragment(self, s, end=0):
+        self.parser.Parse(s, end)
diff --git a/TAL/__init__.py b/TAL/__init__.py
new file mode 100644 (file)
index 0000000..080ed5d
--- /dev/null
@@ -0,0 +1,14 @@
+##############################################################################
+#
+# Copyright (c) 2001, 2002 Zope Corporation and Contributors.
+# All Rights Reserved.
+# 
+# This software is subject to the provisions of the Zope Public License,
+# Version 2.0 (ZPL).  A copy of the ZPL should accompany this distribution.
+# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
+# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
+# FOR A PARTICULAR PURPOSE
+# 
+##############################################################################
+""" Template Attribute Language package """
diff --git a/TAL/markupbase.py b/TAL/markupbase.py
new file mode 100644 (file)
index 0000000..eab134c
--- /dev/null
@@ -0,0 +1,306 @@
+"""Shared support for scanning document type declarations in HTML and XHTML."""
+
+import re
+import string
+
+_declname_match = re.compile(r'[a-zA-Z][-_.a-zA-Z0-9]*\s*').match
+_declstringlit_match = re.compile(r'(\'[^\']*\'|"[^"]*")\s*').match
+
+del re
+
+
+class ParserBase:
+    """Parser base class which provides some common support methods used
+    by the SGML/HTML and XHTML parsers."""
+
+    def reset(self):
+        self.lineno = 1
+        self.offset = 0
+
+    def getpos(self):
+        """Return current line number and offset."""
+        return self.lineno, self.offset
+
+    # Internal -- update line number and offset.  This should be
+    # called for each piece of data exactly once, in order -- in other
+    # words the concatenation of all the input strings to this
+    # function should be exactly the entire input.
+    def updatepos(self, i, j):
+        if i >= j:
+            return j
+        rawdata = self.rawdata
+        nlines = string.count(rawdata, "\n", i, j)
+        if nlines:
+            self.lineno = self.lineno + nlines
+            pos = string.rindex(rawdata, "\n", i, j) # Should not fail
+            self.offset = j-(pos+1)
+        else:
+            self.offset = self.offset + j-i
+        return j
+
+    _decl_otherchars = ''
+
+    # Internal -- parse declaration (for use by subclasses).
+    def parse_declaration(self, i):
+        # This is some sort of declaration; in "HTML as
+        # deployed," this should only be the document type
+        # declaration ("<!DOCTYPE html...>").
+        rawdata = self.rawdata
+        import sys
+        j = i + 2
+        assert rawdata[i:j] == "<!", "unexpected call to parse_declaration"
+        if rawdata[j:j+1] in ("-", ""):
+            # Start of comment followed by buffer boundary,
+            # or just a buffer boundary.
+            return -1
+        # in practice, this should look like: ((name|stringlit) S*)+ '>'
+        n = len(rawdata)
+        decltype, j = self._scan_name(j, i)
+        if j < 0:
+            return j
+        if decltype == "doctype":
+            self._decl_otherchars = ''
+        while j < n:
+            c = rawdata[j]
+            if c == ">":
+                # end of declaration syntax
+                data = rawdata[i+2:j]
+                if decltype == "doctype":
+                    self.handle_decl(data)
+                else:
+                    self.unknown_decl(data)
+                return j + 1
+            if c in "\"'":
+                m = _declstringlit_match(rawdata, j)
+                if not m:
+                    return -1 # incomplete
+                j = m.end()
+            elif c in "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ":
+                name, j = self._scan_name(j, i)
+            elif c in self._decl_otherchars:
+                j = j + 1
+            elif c == "[":
+                if decltype == "doctype":
+                    j = self._parse_doctype_subset(j + 1, i)
+                else:
+                    self.error("unexpected '[' char in declaration")
+            else:
+                self.error(
+                    "unexpected %s char in declaration" % `rawdata[j]`)
+            if j < 0:
+                return j
+        return -1 # incomplete
+
+    # Internal -- scan past the internal subset in a <!DOCTYPE declaration,
+    # returning the index just past any whitespace following the trailing ']'.
+    def _parse_doctype_subset(self, i, declstartpos):
+        rawdata = self.rawdata
+        n = len(rawdata)
+        j = i
+        while j < n:
+            c = rawdata[j]
+            if c == "<":
+                s = rawdata[j:j+2]
+                if s == "<":
+                    # end of buffer; incomplete
+                    return -1
+                if s != "<!":
+                    self.updatepos(declstartpos, j + 1)
+                    self.error("unexpected char in internal subset (in %s)"
+                               % `s`)
+                if (j + 2) == n:
+                    # end of buffer; incomplete
+                    return -1
+                if (j + 4) > n:
+                    # end of buffer; incomplete
+                    return -1
+                if rawdata[j:j+4] == "<!--":
+                    j = self.parse_comment(j, report=0)
+                    if j < 0:
+                        return j
+                    continue
+                name, j = self._scan_name(j + 2, declstartpos)
+                if j == -1:
+                    return -1
+                if name not in ("attlist", "element", "entity", "notation"):
+                    self.updatepos(declstartpos, j + 2)
+                    self.error(
+                        "unknown declaration %s in internal subset" % `name`)
+                # handle the individual names
+                meth = getattr(self, "_parse_doctype_" + name)
+                j = meth(j, declstartpos)
+                if j < 0:
+                    return j
+            elif c == "%":
+                # parameter entity reference
+                if (j + 1) == n:
+                    # end of buffer; incomplete
+                    return -1
+                s, j = self._scan_name(j + 1, declstartpos)
+                if j < 0:
+                    return j
+                if rawdata[j] == ";":
+                    j = j + 1
+            elif c == "]":
+                j = j + 1
+                while j < n and rawdata[j] in string.whitespace:
+                    j = j + 1
+                if j < n:
+                    if rawdata[j] == ">":
+                        return j
+                    self.updatepos(declstartpos, j)
+                    self.error("unexpected char after internal subset")
+                else:
+                    return -1
+            elif c in string.whitespace:
+                j = j + 1
+            else:
+                self.updatepos(declstartpos, j)
+                self.error("unexpected char %s in internal subset" % `c`)
+        # end of buffer reached
+        return -1
+
+    # Internal -- scan past <!ELEMENT declarations
+    def _parse_doctype_element(self, i, declstartpos):
+        rawdata = self.rawdata
+        n = len(rawdata)
+        name, j = self._scan_name(i, declstartpos)
+        if j == -1:
+            return -1
+        # style content model; just skip until '>'
+        if '>' in rawdata[j:]:
+            return string.find(rawdata, ">", j) + 1
+        return -1
+
+    # Internal -- scan past <!ATTLIST declarations
+    def _parse_doctype_attlist(self, i, declstartpos):
+        rawdata = self.rawdata
+        name, j = self._scan_name(i, declstartpos)
+        c = rawdata[j:j+1]
+        if c == "":
+            return -1
+        if c == ">":
+            return j + 1
+        while 1:
+            # scan a series of attribute descriptions; simplified:
+            #   name type [value] [#constraint]
+            name, j = self._scan_name(j, declstartpos)
+            if j < 0:
+                return j
+            c = rawdata[j:j+1]
+            if c == "":
+                return -1
+            if c == "(":
+                # an enumerated type; look for ')'
+                if ")" in rawdata[j:]:
+                    j = string.find(rawdata, ")", j) + 1
+                else:
+                    return -1
+                while rawdata[j:j+1] in string.whitespace:
+                    j = j + 1
+                if not rawdata[j:]:
+                    # end of buffer, incomplete
+                    return -1
+            else:
+                name, j = self._scan_name(j, declstartpos)
+            c = rawdata[j:j+1]
+            if not c:
+                return -1
+            if c in "'\"":
+                m = _declstringlit_match(rawdata, j)
+                if m:
+                    j = m.end()
+                else:
+                    return -1
+                c = rawdata[j:j+1]
+                if not c:
+                    return -1
+            if c == "#":
+                if rawdata[j:] == "#":
+                    # end of buffer
+                    return -1
+                name, j = self._scan_name(j + 1, declstartpos)
+                if j < 0:
+                    return j
+                c = rawdata[j:j+1]
+                if not c:
+                    return -1
+            if c == '>':
+                # all done
+                return j + 1
+
+    # Internal -- scan past <!NOTATION declarations
+    def _parse_doctype_notation(self, i, declstartpos):
+        name, j = self._scan_name(i, declstartpos)
+        if j < 0:
+            return j
+        rawdata = self.rawdata
+        while 1:
+            c = rawdata[j:j+1]
+            if not c:
+                # end of buffer; incomplete
+                return -1
+            if c == '>':
+                return j + 1
+            if c in "'\"":
+                m = _declstringlit_match(rawdata, j)
+                if not m:
+                    return -1
+                j = m.end()
+            else:
+                name, j = self._scan_name(j, declstartpos)
+                if j < 0:
+                    return j
+
+    # Internal -- scan past <!ENTITY declarations
+    def _parse_doctype_entity(self, i, declstartpos):
+        rawdata = self.rawdata
+        if rawdata[i:i+1] == "%":
+            j = i + 1
+            while 1:
+                c = rawdata[j:j+1]
+                if not c:
+                    return -1
+                if c in string.whitespace:
+                    j = j + 1
+                else:
+                    break
+        else:
+            j = i
+        name, j = self._scan_name(j, declstartpos)
+        if j < 0:
+            return j
+        while 1:
+            c = self.rawdata[j:j+1]
+            if not c:
+                return -1
+            if c in "'\"":
+                m = _declstringlit_match(rawdata, j)
+                if m:
+                    j = m.end()
+                else:
+                    return -1    # incomplete
+            elif c == ">":
+                return j + 1
+            else:
+                name, j = self._scan_name(j, declstartpos)
+                if j < 0:
+                    return j
+
+    # Internal -- scan a name token and the new position and the token, or
+    # return -1 if we've reached the end of the buffer.
+    def _scan_name(self, i, declstartpos):
+        rawdata = self.rawdata
+        n = len(rawdata)
+        if i == n:
+            return None, -1
+        m = _declname_match(rawdata, i)
+        if m:
+            s = m.group()
+            name = string.strip(s)
+            if (i + len(s)) == n:
+                return None, -1  # end of buffer
+            return string.lower(name), m.end()
+        else:
+            self.updatepos(declstartpos, i)
+            self.error("expected name token", self.getpos())