summary | shortlog | log | commit | commitdiff | tree
raw | patch | inline | side by side (parent: 381116f)
raw | patch | inline | side by side (parent: 381116f)
author | richard <richard@57a73879-2fb5-44c3-a270-3262357dd7e2> | |
Fri, 30 Aug 2002 08:23:53 +0000 (08:23 +0000) | ||
committer | richard <richard@57a73879-2fb5-44c3-a270-3262357dd7e2> | |
Fri, 30 Aug 2002 08:23:53 +0000 (08:23 +0000) |
git-svn-id: http://svn.roundup-tracker.org/svnroot/roundup/trunk@999 57a73879-2fb5-44c3-a270-3262357dd7e2
TAL/.cvsignore | [new file with mode: 0644] | patch | blob |
TAL/HTMLParser.py | [new file with mode: 0644] | patch | blob |
TAL/README.txt | [new file with mode: 0644] | patch | blob |
TAL/TALDefs.py | [new file with mode: 0644] | patch | blob |
TAL/TALGenerator.py | [new file with mode: 0644] | patch | blob |
TAL/TALInterpreter.py | [new file with mode: 0644] | patch | blob |
TAL/TALParser.py | [new file with mode: 0644] | patch | blob |
TAL/XMLParser.py | [new file with mode: 0644] | patch | blob |
TAL/__init__.py | [new file with mode: 0644] | patch | blob |
TAL/markupbase.py | [new file with mode: 0644] | patch | blob |
diff --git a/TAL/.cvsignore b/TAL/.cvsignore
--- /dev/null
+++ b/TAL/.cvsignore
@@ -0,0 +1,2 @@
+.path
+*.pyc
diff --git a/TAL/HTMLParser.py b/TAL/HTMLParser.py
--- /dev/null
+++ b/TAL/HTMLParser.py
@@ -0,0 +1,403 @@
+"""A parser for HTML and XHTML."""
+
+# This file is based on sgmllib.py, but the API is slightly different.
+
+# XXX There should be a way to distinguish between PCDATA (parsed
+# character data -- the normal case), RCDATA (replaceable character
+# data -- only char and entity references and end tags are special)
+# and CDATA (character data -- only end tags are special).
+
+
+import markupbase
+import re
+import string
+
+# Regular expressions used for parsing
+
+interesting_normal = re.compile('[&<]')
+interesting_cdata = re.compile(r'<(/|\Z)')
+incomplete = re.compile('&[a-zA-Z#]')
+
+entityref = re.compile('&([a-zA-Z][-.a-zA-Z0-9]*)[^a-zA-Z0-9]')
+charref = re.compile('&#(?:[0-9]+|[xX][0-9a-fA-F]+)[^0-9a-fA-F]')
+
+starttagopen = re.compile('<[a-zA-Z]')
+piclose = re.compile('>')
+endtagopen = re.compile('</')
+commentclose = re.compile(r'--\s*>')
+tagfind = re.compile('[a-zA-Z][-.a-zA-Z0-9:_]*')
+attrfind = re.compile(
+ r'\s*([a-zA-Z_][-.:a-zA-Z_0-9]*)(\s*=\s*'
+ r'(\'[^\']*\'|"[^"]*"|[-a-zA-Z0-9./:;+*%?!&$\(\)_#=~]*))?')
+
+locatestarttagend = re.compile(r"""
+ <[a-zA-Z][-.a-zA-Z0-9:_]* # tag name
+ (?:\s+ # whitespace before attribute name
+ (?:[a-zA-Z_][-.:a-zA-Z0-9_]* # attribute name
+ (?:\s*=\s* # value indicator
+ (?:'[^']*' # LITA-enclosed value
+ |\"[^\"]*\" # LIT-enclosed value
+ |[^'\">\s]+ # bare value
+ )
+ )?
+ )
+ )*
+ \s* # trailing whitespace
+""", re.VERBOSE)
+endendtag = re.compile('>')
+endtagfind = re.compile('</\s*([a-zA-Z][-.a-zA-Z0-9:_]*)\s*>')
+
+
+class HTMLParseError(Exception):
+ """Exception raised for all parse errors."""
+
+ def __init__(self, msg, position=(None, None)):
+ assert msg
+ self.msg = msg
+ self.lineno = position[0]
+ self.offset = position[1]
+
+ def __str__(self):
+ result = self.msg
+ if self.lineno is not None:
+ result = result + ", at line %d" % self.lineno
+ if self.offset is not None:
+ result = result + ", column %d" % (self.offset + 1)
+ return result
+
+
+def _contains_at(s, sub, pos):
+ return s[pos:pos+len(sub)] == sub
+
+
+class HTMLParser(markupbase.ParserBase):
+ """Find tags and other markup and call handler functions.
+
+ Usage:
+ p = HTMLParser()
+ p.feed(data)
+ ...
+ p.close()
+
+ Start tags are handled by calling self.handle_starttag() or
+ self.handle_startendtag(); end tags by self.handle_endtag(). The
+ data between tags is passed from the parser to the derived class
+ by calling self.handle_data() with the data as argument (the data
+ may be split up in arbitrary chunks). Entity references are
+ passed by calling self.handle_entityref() with the entity
+ reference as the argument. Numeric character references are
+ passed to self.handle_charref() with the string containing the
+ reference as the argument.
+ """
+
+ CDATA_CONTENT_ELEMENTS = ("script", "style")
+
+
+ def __init__(self):
+ """Initialize and reset this instance."""
+ self.reset()
+
+ def reset(self):
+ """Reset this instance. Loses all unprocessed data."""
+ self.rawdata = ''
+ self.stack = []
+ self.lasttag = '???'
+ self.interesting = interesting_normal
+ markupbase.ParserBase.reset(self)
+
+ def feed(self, data):
+ """Feed data to the parser.
+
+ Call this as often as you want, with as little or as much text
+ as you want (may include '\n').
+ """
+ self.rawdata = self.rawdata + data
+ self.goahead(0)
+
+ def close(self):
+ """Handle any buffered data."""
+ self.goahead(1)
+
+ def error(self, message):
+ raise HTMLParseError(message, self.getpos())
+
+ __starttag_text = None
+
+ def get_starttag_text(self):
+ """Return full source of start tag: '<...>'."""
+ return self.__starttag_text
+
+ cdata_endtag = None
+
+ def set_cdata_mode(self, endtag=None):
+ self.cdata_endtag = endtag
+ self.interesting = interesting_cdata
+
+ def clear_cdata_mode(self):
+ self.cdata_endtag = None
+ self.interesting = interesting_normal
+
+ # Internal -- handle data as far as reasonable. May leave state
+ # and data to be processed by a subsequent call. If 'end' is
+ # true, force handling all data as if followed by EOF marker.
+ def goahead(self, end):
+ rawdata = self.rawdata
+ i = 0
+ n = len(rawdata)
+ while i < n:
+ match = self.interesting.search(rawdata, i) # < or &
+ if match:
+ j = match.start()
+ else:
+ j = n
+ if i < j: self.handle_data(rawdata[i:j])
+ i = self.updatepos(i, j)
+ if i == n: break
+ if rawdata[i] == '<':
+ if starttagopen.match(rawdata, i): # < + letter
+ k = self.parse_starttag(i)
+ elif endtagopen.match(rawdata, i): # </
+ k = self.parse_endtag(i)
+ elif _contains_at(rawdata, "<!--", i): # <!--
+ k = self.parse_comment(i)
+ elif _contains_at(rawdata, "<!", i): # <!
+ k = self.parse_declaration(i)
+ elif _contains_at(rawdata, "<?", i): # <?
+ k = self.parse_pi(i)
+ elif _contains_at(rawdata, "<?", i): # <!
+ k = self.parse_declaration(i)
+ elif (i + 1) < n:
+ self.handle_data("<")
+ k = i + 1
+ else:
+ break
+ if k < 0:
+ if end:
+ self.error("EOF in middle of construct")
+ break
+ i = self.updatepos(i, k)
+ elif rawdata[i:i+2] == "&#":
+ match = charref.match(rawdata, i)
+ if match:
+ name = match.group()[2:-1]
+ self.handle_charref(name)
+ k = match.end()
+ if rawdata[k-1] != ';':
+ k = k - 1
+ i = self.updatepos(i, k)
+ continue
+ else:
+ break
+ elif rawdata[i] == '&':
+ match = entityref.match(rawdata, i)
+ if match:
+ name = match.group(1)
+ self.handle_entityref(name)
+ k = match.end()
+ if rawdata[k-1] != ';':
+ k = k - 1
+ i = self.updatepos(i, k)
+ continue
+ match = incomplete.match(rawdata, i)
+ if match:
+ # match.group() will contain at least 2 chars
+ rest = rawdata[i:]
+ if end and match.group() == rest:
+ self.error("EOF in middle of entity or char ref")
+ # incomplete
+ break
+ elif (i + 1) < n:
+ # not the end of the buffer, and can't be confused
+ # with some other construct
+ self.handle_data("&")
+ i = self.updatepos(i, i + 1)
+ else:
+ break
+ else:
+ assert 0, "interesting.search() lied"
+ # end while
+ if end and i < n:
+ self.handle_data(rawdata[i:n])
+ i = self.updatepos(i, n)
+ self.rawdata = rawdata[i:]
+
+ # Internal -- parse comment, return end or -1 if not terminated
+ def parse_comment(self, i, report=1):
+ rawdata = self.rawdata
+ assert rawdata[i:i+4] == '<!--', 'unexpected call to parse_comment()'
+ match = commentclose.search(rawdata, i+4)
+ if not match:
+ return -1
+ if report:
+ j = match.start()
+ self.handle_comment(rawdata[i+4: j])
+ j = match.end()
+ return j
+
+ # Internal -- parse processing instr, return end or -1 if not terminated
+ def parse_pi(self, i):
+ rawdata = self.rawdata
+ assert rawdata[i:i+2] == '<?', 'unexpected call to parse_pi()'
+ match = piclose.search(rawdata, i+2) # >
+ if not match:
+ return -1
+ j = match.start()
+ self.handle_pi(rawdata[i+2: j])
+ j = match.end()
+ return j
+
+ # Internal -- handle starttag, return end or -1 if not terminated
+ def parse_starttag(self, i):
+ self.__starttag_text = None
+ endpos = self.check_for_whole_start_tag(i)
+ if endpos < 0:
+ return endpos
+ rawdata = self.rawdata
+ self.__starttag_text = rawdata[i:endpos]
+
+ # Now parse the data between i+1 and j into a tag and attrs
+ attrs = []
+ match = tagfind.match(rawdata, i+1)
+ assert match, 'unexpected call to parse_starttag()'
+ k = match.end()
+ self.lasttag = tag = string.lower(rawdata[i+1:k])
+
+ while k < endpos:
+ m = attrfind.match(rawdata, k)
+ if not m:
+ break
+ attrname, rest, attrvalue = m.group(1, 2, 3)
+ if not rest:
+ attrvalue = None
+ elif attrvalue[:1] == '\'' == attrvalue[-1:] or \
+ attrvalue[:1] == '"' == attrvalue[-1:]:
+ attrvalue = attrvalue[1:-1]
+ attrvalue = self.unescape(attrvalue)
+ attrs.append((string.lower(attrname), attrvalue))
+ k = m.end()
+
+ end = string.strip(rawdata[k:endpos])
+ if end not in (">", "/>"):
+ lineno, offset = self.getpos()
+ if "\n" in self.__starttag_text:
+ lineno = lineno + string.count(self.__starttag_text, "\n")
+ offset = len(self.__starttag_text) \
+ - string.rfind(self.__starttag_text, "\n")
+ else:
+ offset = offset + len(self.__starttag_text)
+ self.error("junk characters in start tag: %s"
+ % `rawdata[k:endpos][:20]`)
+ if end[-2:] == '/>':
+ # XHTML-style empty tag: <span attr="value" />
+ self.handle_startendtag(tag, attrs)
+ else:
+ self.handle_starttag(tag, attrs)
+ if tag in self.CDATA_CONTENT_ELEMENTS:
+ self.set_cdata_mode(tag)
+ return endpos
+
+ # Internal -- check to see if we have a complete starttag; return end
+ # or -1 if incomplete.
+ def check_for_whole_start_tag(self, i):
+ rawdata = self.rawdata
+ m = locatestarttagend.match(rawdata, i)
+ if m:
+ j = m.end()
+ next = rawdata[j:j+1]
+ if next == ">":
+ return j + 1
+ if next == "/":
+ s = rawdata[j:j+2]
+ if s == "/>":
+ return j + 2
+ if s == "/":
+ # buffer boundary
+ return -1
+ # else bogus input
+ self.updatepos(i, j + 1)
+ self.error("malformed empty start tag")
+ if next == "":
+ # end of input
+ return -1
+ if next in ("abcdefghijklmnopqrstuvwxyz=/"
+ "ABCDEFGHIJKLMNOPQRSTUVWXYZ"):
+ # end of input in or before attribute value, or we have the
+ # '/' from a '/>' ending
+ return -1
+ self.updatepos(i, j)
+ self.error("malformed start tag")
+ raise AssertionError("we should not get here!")
+
+ # Internal -- parse endtag, return end or -1 if incomplete
+ def parse_endtag(self, i):
+ rawdata = self.rawdata
+ assert rawdata[i:i+2] == "</", "unexpected call to parse_endtag"
+ match = endendtag.search(rawdata, i+1) # >
+ if not match:
+ return -1
+ j = match.end()
+ match = endtagfind.match(rawdata, i) # </ + tag + >
+ if not match:
+ self.error("bad end tag: %s" % `rawdata[i:j]`)
+ tag = string.lower(match.group(1))
+ if ( self.cdata_endtag is not None
+ and tag != self.cdata_endtag):
+ # Should be a mismatched end tag, but we'll treat it
+ # as text anyway, since most HTML authors aren't
+ # interested in the finer points of syntax.
+ self.handle_data(match.group(0))
+ else:
+ self.handle_endtag(tag)
+ self.clear_cdata_mode()
+ return j
+
+ # Overridable -- finish processing of start+end tag: <tag.../>
+ def handle_startendtag(self, tag, attrs):
+ self.handle_starttag(tag, attrs)
+ self.handle_endtag(tag)
+
+ # Overridable -- handle start tag
+ def handle_starttag(self, tag, attrs):
+ pass
+
+ # Overridable -- handle end tag
+ def handle_endtag(self, tag):
+ pass
+
+ # Overridable -- handle character reference
+ def handle_charref(self, name):
+ pass
+
+ # Overridable -- handle entity reference
+ def handle_entityref(self, name):
+ pass
+
+ # Overridable -- handle data
+ def handle_data(self, data):
+ pass
+
+ # Overridable -- handle comment
+ def handle_comment(self, data):
+ pass
+
+ # Overridable -- handle declaration
+ def handle_decl(self, decl):
+ pass
+
+ # Overridable -- handle processing instruction
+ def handle_pi(self, data):
+ pass
+
+ def unknown_decl(self, data):
+ self.error("unknown declaration: " + `data`)
+
+ # Internal -- helper to remove special character quoting
+ def unescape(self, s):
+ if '&' not in s:
+ return s
+ s = string.replace(s, "<", "<")
+ s = string.replace(s, ">", ">")
+ s = string.replace(s, "'", "'")
+ s = string.replace(s, """, '"')
+ s = string.replace(s, "&", "&") # Must be last
+ return s
diff --git a/TAL/README.txt b/TAL/README.txt
--- /dev/null
+++ b/TAL/README.txt
@@ -0,0 +1,97 @@
+TAL - Template Attribute Language
+---------------------------------
+
+This is an implementation of TAL, the Zope Template Attribute
+Language. For TAL, see the Zope Presentation Templates ZWiki:
+
+ http://dev.zope.org/Wikis/DevSite/Projects/ZPT/FrontPage
+
+It is not a Zope product nor is it designed exclusively to run inside
+of Zope, but if you have a Zope checkout that includes
+Products/ParsedXML, its Expat parser will be used.
+
+Prerequisites
+-------------
+
+You need:
+
+- A recent checkout of Zope2; don't forget to run the wo_pcgi.py
+ script to compile everything. (See above -- this is now optional.)
+
+- A recent checkout of the Zope2 product ParsedXML, accessible
+ throught <Zope2>/lib/python/Products/ParsedXML; don't forget to run
+ the setup.py script to compiles Expat. (Again, optional.)
+
+- Python 1.5.2; the driver script refuses to work with other versions
+ unless you specify the -n option; this is done so that I don't
+ accidentally use Python 2.x features.
+
+- Create a .path file containing proper module search path; it should
+ point the <Zope2>/lib/python directory that you want to use.
+
+How To Play
+-----------
+
+(Don't forget to edit .path, see above!)
+
+The script driver.py takes an XML file with TAL markup as argument and
+writes the expanded version to standard output. The filename argument
+defaults to tests/input/test01.xml.
+
+Regression test
+---------------
+
+There are unit test suites in the 'tests' subdirectory; these can be
+run with tests/run.py. This should print the testcase names plus
+progress info, followed by a final line saying "OK". It requires that
+../unittest.py exists.
+
+There are a number of test files in the 'tests' subdirectory, named
+tests/input/test<number>.xml and tests/input/test<number>.html. The
+Python script ./runtest.py calls driver.main() for each test file, and
+should print "<file> OK" for each one. These tests are also run as
+part of the unit test suites, so tests/run.py is all you need.
+
+What's Here
+-----------
+
+DummyEngine.py simple-minded TALES execution engine
+TALInterpreter.py class to interpret intermediate code
+TALGenerator.py class to generate intermediate code
+XMLParser.py base class to parse XML, avoiding DOM
+TALParser.py class to parse XML with TAL into intermediate code
+HTMLTALParser.py class to parse HTML with TAL into intermediate code
+HTMLParser.py HTML-parsing base class
+driver.py script to demonstrate TAL expansion
+timer.py script to time various processing phases
+setpath.py hack to set sys.path and import ZODB
+__init__.py empty file that makes this directory a package
+runtest.py Python script to run file-comparison tests
+ndiff.py helper for runtest.py to produce diffs
+tests/ drectory with test files and output
+tests/run.py Python script to run all tests
+
+Author and License
+------------------
+
+This code is written by Guido van Rossum (project lead), Fred Drake,
+and Tim Peters. It is owned by Digital Creations and can be
+redistributed under the Zope Public License.
+
+TO DO
+-----
+
+(See also http://www.zope.org/Members/jim/ZPTIssueTracker .)
+
+- Need to remove leading whitespace and newline when omitting an
+ element (either through tal:replace with a value of nothing or
+ tal:condition with a false condition).
+
+- Empty TAL/METAL attributes are ignored: tal:replace="" is ignored
+ rather than causing an error.
+
+- HTMLTALParser.py and TALParser.py are silly names. Should be
+ HTMLTALCompiler.py and XMLTALCompiler.py (or maybe shortened,
+ without "TAL"?)
+
+- Should we preserve case of tags and attribute names in HTML?
diff --git a/TAL/TALDefs.py b/TAL/TALDefs.py
--- /dev/null
+++ b/TAL/TALDefs.py
@@ -0,0 +1,145 @@
+##############################################################################
+#
+# Copyright (c) 2001, 2002 Zope Corporation and Contributors.
+# All Rights Reserved.
+#
+# This software is subject to the provisions of the Zope Public License,
+# Version 2.0 (ZPL). A copy of the ZPL should accompany this distribution.
+# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
+# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
+# FOR A PARTICULAR PURPOSE
+#
+##############################################################################
+"""
+Common definitions used by TAL and METAL compilation an transformation.
+"""
+
+from types import ListType, TupleType
+
+TAL_VERSION = "1.3.2"
+
+XML_NS = "http://www.w3.org/XML/1998/namespace" # URI for XML namespace
+XMLNS_NS = "http://www.w3.org/2000/xmlns/" # URI for XML NS declarations
+
+ZOPE_TAL_NS = "http://xml.zope.org/namespaces/tal"
+ZOPE_METAL_NS = "http://xml.zope.org/namespaces/metal"
+
+NAME_RE = "[a-zA-Z_][a-zA-Z0-9_]*"
+
+KNOWN_METAL_ATTRIBUTES = [
+ "define-macro",
+ "use-macro",
+ "define-slot",
+ "fill-slot",
+ "slot"
+ ]
+
+KNOWN_TAL_ATTRIBUTES = [
+ "define",
+ "condition",
+ "content",
+ "replace",
+ "repeat",
+ "attributes",
+ "on-error",
+ "omit-tag",
+ "tal tag",
+ ]
+
+class TALError(Exception):
+
+ def __init__(self, msg, position=(None, None)):
+ assert msg != ""
+ self.msg = msg
+ self.lineno = position[0]
+ self.offset = position[1]
+
+ def __str__(self):
+ result = self.msg
+ if self.lineno is not None:
+ result = result + ", at line %d" % self.lineno
+ if self.offset is not None:
+ result = result + ", column %d" % (self.offset + 1)
+ return result
+
+class METALError(TALError):
+ pass
+
+class TALESError(TALError):
+ pass
+
+class ErrorInfo:
+
+ def __init__(self, err, position=(None, None)):
+ if isinstance(err, Exception):
+ self.type = err.__class__
+ self.value = err
+ else:
+ self.type = err
+ self.value = None
+ self.lineno = position[0]
+ self.offset = position[1]
+
+import re
+_attr_re = re.compile(r"\s*([^\s]+)\s+([^\s].*)\Z", re.S)
+_subst_re = re.compile(r"\s*(?:(text|structure)\s+)?(.*)\Z", re.S)
+del re
+
+def parseAttributeReplacements(arg):
+ dict = {}
+ for part in splitParts(arg):
+ m = _attr_re.match(part)
+ if not m:
+ raise TALError("Bad syntax in attributes:" + `part`)
+ name, expr = m.group(1, 2)
+ if dict.has_key(name):
+ raise TALError("Duplicate attribute name in attributes:" + `part`)
+ dict[name] = expr
+ return dict
+
+def parseSubstitution(arg, position=(None, None)):
+ m = _subst_re.match(arg)
+ if not m:
+ raise TALError("Bad syntax in substitution text: " + `arg`, position)
+ key, expr = m.group(1, 2)
+ if not key:
+ key = "text"
+ return key, expr
+
+def splitParts(arg):
+ # Break in pieces at undoubled semicolons and
+ # change double semicolons to singles:
+ import string
+ arg = string.replace(arg, ";;", "\0")
+ parts = string.split(arg, ';')
+ parts = map(lambda s, repl=string.replace: repl(s, "\0", ";"), parts)
+ if len(parts) > 1 and not string.strip(parts[-1]):
+ del parts[-1] # It ended in a semicolon
+ return parts
+
+def isCurrentVersion(program):
+ version = getProgramVersion(program)
+ return version == TAL_VERSION
+
+def getProgramMode(program):
+ version = getProgramVersion(program)
+ if (version == TAL_VERSION and isinstance(program[1], TupleType) and
+ len(program[1]) == 2):
+ opcode, mode = program[1]
+ if opcode == "mode":
+ return mode
+ return None
+
+def getProgramVersion(program):
+ if (len(program) >= 2 and
+ isinstance(program[0], TupleType) and len(program[0]) == 2):
+ opcode, version = program[0]
+ if opcode == "version":
+ return version
+ return None
+
+import cgi
+def quote(s, escape=cgi.escape):
+ return '"%s"' % escape(s, 1)
+del cgi
diff --git a/TAL/TALGenerator.py b/TAL/TALGenerator.py
--- /dev/null
+++ b/TAL/TALGenerator.py
@@ -0,0 +1,583 @@
+##############################################################################
+#
+# Copyright (c) 2001, 2002 Zope Corporation and Contributors.
+# All Rights Reserved.
+#
+# This software is subject to the provisions of the Zope Public License,
+# Version 2.0 (ZPL). A copy of the ZPL should accompany this distribution.
+# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
+# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
+# FOR A PARTICULAR PURPOSE
+#
+##############################################################################
+"""
+Code generator for TALInterpreter intermediate code.
+"""
+
+import string
+import re
+import cgi
+
+from TALDefs import *
+
+class TALGenerator:
+
+ inMacroUse = 0
+ inMacroDef = 0
+ source_file = None
+
+ def __init__(self, expressionCompiler=None, xml=1, source_file=None):
+ if not expressionCompiler:
+ from DummyEngine import DummyEngine
+ expressionCompiler = DummyEngine()
+ self.expressionCompiler = expressionCompiler
+ self.CompilerError = expressionCompiler.getCompilerError()
+ self.program = []
+ self.stack = []
+ self.todoStack = []
+ self.macros = {}
+ self.slots = {}
+ self.slotStack = []
+ self.xml = xml
+ self.emit("version", TAL_VERSION)
+ self.emit("mode", xml and "xml" or "html")
+ if source_file is not None:
+ self.source_file = source_file
+ self.emit("setSourceFile", source_file)
+
+ def getCode(self):
+ assert not self.stack
+ assert not self.todoStack
+ return self.optimize(self.program), self.macros
+
+ def optimize(self, program):
+ output = []
+ collect = []
+ rawseen = cursor = 0
+ if self.xml:
+ endsep = "/>"
+ else:
+ endsep = " />"
+ for cursor in xrange(len(program)+1):
+ try:
+ item = program[cursor]
+ except IndexError:
+ item = (None, None)
+ opcode = item[0]
+ if opcode == "rawtext":
+ collect.append(item[1])
+ continue
+ if opcode == "endTag":
+ collect.append("</%s>" % item[1])
+ continue
+ if opcode == "startTag":
+ if self.optimizeStartTag(collect, item[1], item[2], ">"):
+ continue
+ if opcode == "startEndTag":
+ if self.optimizeStartTag(collect, item[1], item[2], endsep):
+ continue
+ if opcode in ("beginScope", "endScope"):
+ # Push *Scope instructions in front of any text instructions;
+ # this allows text instructions separated only by *Scope
+ # instructions to be joined together.
+ output.append(self.optimizeArgsList(item))
+ continue
+ text = string.join(collect, "")
+ if text:
+ i = string.rfind(text, "\n")
+ if i >= 0:
+ i = len(text) - (i + 1)
+ output.append(("rawtextColumn", (text, i)))
+ else:
+ output.append(("rawtextOffset", (text, len(text))))
+ if opcode != None:
+ output.append(self.optimizeArgsList(item))
+ rawseen = cursor+1
+ collect = []
+ return self.optimizeCommonTriple(output)
+
+ def optimizeArgsList(self, item):
+ if len(item) == 2:
+ return item
+ else:
+ return item[0], tuple(item[1:])
+
+ actionIndex = {"replace":0, "insert":1, "metal":2, "tal":3, "xmlns":4,
+ 0: 0, 1: 1, 2: 2, 3: 3, 4: 4}
+ def optimizeStartTag(self, collect, name, attrlist, end):
+ if not attrlist:
+ collect.append("<%s%s" % (name, end))
+ return 1
+ opt = 1
+ new = ["<" + name]
+ for i in range(len(attrlist)):
+ item = attrlist[i]
+ if len(item) > 2:
+ opt = 0
+ name, value, action = item[:3]
+ action = self.actionIndex[action]
+ attrlist[i] = (name, value, action) + item[3:]
+ else:
+ if item[1] is None:
+ s = item[0]
+ else:
+ s = "%s=%s" % (item[0], quote(item[1]))
+ attrlist[i] = item[0], s
+ if item[1] is None:
+ new.append(" " + item[0])
+ else:
+ new.append(" %s=%s" % (item[0], quote(item[1])))
+ if opt:
+ new.append(end)
+ collect.extend(new)
+ return opt
+
+ def optimizeCommonTriple(self, program):
+ if len(program) < 3:
+ return program
+ output = program[:2]
+ prev2, prev1 = output
+ for item in program[2:]:
+ if ( item[0] == "beginScope"
+ and prev1[0] == "setPosition"
+ and prev2[0] == "rawtextColumn"):
+ position = output.pop()[1]
+ text, column = output.pop()[1]
+ prev1 = None, None
+ closeprev = 0
+ if output and output[-1][0] == "endScope":
+ closeprev = 1
+ output.pop()
+ item = ("rawtextBeginScope",
+ (text, column, position, closeprev, item[1]))
+ output.append(item)
+ prev2 = prev1
+ prev1 = item
+ return output
+
+ def todoPush(self, todo):
+ self.todoStack.append(todo)
+
+ def todoPop(self):
+ return self.todoStack.pop()
+
+ def compileExpression(self, expr):
+ try:
+ return self.expressionCompiler.compile(expr)
+ except self.CompilerError, err:
+ raise TALError('%s in expression %s' % (err.args[0], `expr`),
+ self.position)
+
+ def pushProgram(self):
+ self.stack.append(self.program)
+ self.program = []
+
+ def popProgram(self):
+ program = self.program
+ self.program = self.stack.pop()
+ return self.optimize(program)
+
+ def pushSlots(self):
+ self.slotStack.append(self.slots)
+ self.slots = {}
+
+ def popSlots(self):
+ slots = self.slots
+ self.slots = self.slotStack.pop()
+ return slots
+
+ def emit(self, *instruction):
+ self.program.append(instruction)
+
+ def emitStartTag(self, name, attrlist, isend=0):
+ if isend:
+ opcode = "startEndTag"
+ else:
+ opcode = "startTag"
+ self.emit(opcode, name, attrlist)
+
+ def emitEndTag(self, name):
+ if self.xml and self.program and self.program[-1][0] == "startTag":
+ # Minimize empty element
+ self.program[-1] = ("startEndTag",) + self.program[-1][1:]
+ else:
+ self.emit("endTag", name)
+
+ def emitOptTag(self, name, optTag, isend):
+ program = self.popProgram() #block
+ start = self.popProgram() #start tag
+ if (isend or not program) and self.xml:
+ # Minimize empty element
+ start[-1] = ("startEndTag",) + start[-1][1:]
+ isend = 1
+ cexpr = optTag[0]
+ if cexpr:
+ cexpr = self.compileExpression(optTag[0])
+ self.emit("optTag", name, cexpr, optTag[1], isend, start, program)
+
+ def emitRawText(self, text):
+ self.emit("rawtext", text)
+
+ def emitText(self, text):
+ self.emitRawText(cgi.escape(text))
+
+ def emitDefines(self, defines):
+ for part in splitParts(defines):
+ m = re.match(
+ r"(?s)\s*(?:(global|local)\s+)?(%s)\s+(.*)\Z" % NAME_RE, part)
+ if not m:
+ raise TALError("invalid define syntax: " + `part`,
+ self.position)
+ scope, name, expr = m.group(1, 2, 3)
+ scope = scope or "local"
+ cexpr = self.compileExpression(expr)
+ if scope == "local":
+ self.emit("setLocal", name, cexpr)
+ else:
+ self.emit("setGlobal", name, cexpr)
+
+ def emitOnError(self, name, onError):
+ block = self.popProgram()
+ key, expr = parseSubstitution(onError)
+ cexpr = self.compileExpression(expr)
+ if key == "text":
+ self.emit("insertText", cexpr, [])
+ else:
+ assert key == "structure"
+ self.emit("insertStructure", cexpr, {}, [])
+ self.emitEndTag(name)
+ handler = self.popProgram()
+ self.emit("onError", block, handler)
+
+ def emitCondition(self, expr):
+ cexpr = self.compileExpression(expr)
+ program = self.popProgram()
+ self.emit("condition", cexpr, program)
+
+ def emitRepeat(self, arg):
+ m = re.match("(?s)\s*(%s)\s+(.*)\Z" % NAME_RE, arg)
+ if not m:
+ raise TALError("invalid repeat syntax: " + `arg`,
+ self.position)
+ name, expr = m.group(1, 2)
+ cexpr = self.compileExpression(expr)
+ program = self.popProgram()
+ self.emit("loop", name, cexpr, program)
+
+ def emitSubstitution(self, arg, attrDict={}):
+ key, expr = parseSubstitution(arg)
+ cexpr = self.compileExpression(expr)
+ program = self.popProgram()
+ if key == "text":
+ self.emit("insertText", cexpr, program)
+ else:
+ assert key == "structure"
+ self.emit("insertStructure", cexpr, attrDict, program)
+
+ def emitDefineMacro(self, macroName):
+ program = self.popProgram()
+ macroName = string.strip(macroName)
+ if self.macros.has_key(macroName):
+ raise METALError("duplicate macro definition: %s" % `macroName`,
+ self.position)
+ if not re.match('%s$' % NAME_RE, macroName):
+ raise METALError("invalid macro name: %s" % `macroName`,
+ self.position)
+ self.macros[macroName] = program
+ self.inMacroDef = self.inMacroDef - 1
+ self.emit("defineMacro", macroName, program)
+
+ def emitUseMacro(self, expr):
+ cexpr = self.compileExpression(expr)
+ program = self.popProgram()
+ self.inMacroUse = 0
+ self.emit("useMacro", expr, cexpr, self.popSlots(), program)
+
+ def emitDefineSlot(self, slotName):
+ program = self.popProgram()
+ slotName = string.strip(slotName)
+ if not re.match('%s$' % NAME_RE, slotName):
+ raise METALError("invalid slot name: %s" % `slotName`,
+ self.position)
+ self.emit("defineSlot", slotName, program)
+
+ def emitFillSlot(self, slotName):
+ program = self.popProgram()
+ slotName = string.strip(slotName)
+ if self.slots.has_key(slotName):
+ raise METALError("duplicate fill-slot name: %s" % `slotName`,
+ self.position)
+ if not re.match('%s$' % NAME_RE, slotName):
+ raise METALError("invalid slot name: %s" % `slotName`,
+ self.position)
+ self.slots[slotName] = program
+ self.inMacroUse = 1
+ self.emit("fillSlot", slotName, program)
+
+ def unEmitWhitespace(self):
+ collect = []
+ i = len(self.program) - 1
+ while i >= 0:
+ item = self.program[i]
+ if item[0] != "rawtext":
+ break
+ text = item[1]
+ if not re.match(r"\A\s*\Z", text):
+ break
+ collect.append(text)
+ i = i-1
+ del self.program[i+1:]
+ if i >= 0 and self.program[i][0] == "rawtext":
+ text = self.program[i][1]
+ m = re.search(r"\s+\Z", text)
+ if m:
+ self.program[i] = ("rawtext", text[:m.start()])
+ collect.append(m.group())
+ collect.reverse()
+ return string.join(collect, "")
+
+ def unEmitNewlineWhitespace(self):
+ collect = []
+ i = len(self.program)
+ while i > 0:
+ i = i-1
+ item = self.program[i]
+ if item[0] != "rawtext":
+ break
+ text = item[1]
+ if re.match(r"\A[ \t]*\Z", text):
+ collect.append(text)
+ continue
+ m = re.match(r"(?s)^(.*)(\n[ \t]*)\Z", text)
+ if not m:
+ break
+ text, rest = m.group(1, 2)
+ collect.reverse()
+ rest = rest + string.join(collect, "")
+ del self.program[i:]
+ if text:
+ self.emit("rawtext", text)
+ return rest
+ return None
+
+ def replaceAttrs(self, attrlist, repldict):
+ if not repldict:
+ return attrlist
+ newlist = []
+ for item in attrlist:
+ key = item[0]
+ if repldict.has_key(key):
+ item = item[:2] + ("replace", repldict[key])
+ del repldict[key]
+ newlist.append(item)
+ for key, value in repldict.items(): # Add dynamic-only attributes
+ item = (key, None, "insert", value)
+ newlist.append(item)
+ return newlist
+
+ def emitStartElement(self, name, attrlist, taldict, metaldict,
+ position=(None, None), isend=0):
+ if not taldict and not metaldict:
+ # Handle the simple, common case
+ self.emitStartTag(name, attrlist, isend)
+ self.todoPush({})
+ if isend:
+ self.emitEndElement(name, isend)
+ return
+
+ self.position = position
+ for key, value in taldict.items():
+ if key not in KNOWN_TAL_ATTRIBUTES:
+ raise TALError("bad TAL attribute: " + `key`, position)
+ if not (value or key == 'omit-tag'):
+ raise TALError("missing value for TAL attribute: " +
+ `key`, position)
+ for key, value in metaldict.items():
+ if key not in KNOWN_METAL_ATTRIBUTES:
+ raise METALError("bad METAL attribute: " + `key`,
+ position)
+ if not value:
+ raise TALError("missing value for METAL attribute: " +
+ `key`, position)
+ todo = {}
+ defineMacro = metaldict.get("define-macro")
+ useMacro = metaldict.get("use-macro")
+ defineSlot = metaldict.get("define-slot")
+ fillSlot = metaldict.get("fill-slot")
+ define = taldict.get("define")
+ condition = taldict.get("condition")
+ repeat = taldict.get("repeat")
+ content = taldict.get("content")
+ replace = taldict.get("replace")
+ attrsubst = taldict.get("attributes")
+ onError = taldict.get("on-error")
+ omitTag = taldict.get("omit-tag")
+ TALtag = taldict.get("tal tag")
+ if len(metaldict) > 1 and (defineMacro or useMacro):
+ raise METALError("define-macro and use-macro cannot be used "
+ "together or with define-slot or fill-slot",
+ position)
+ if content and replace:
+ raise TALError("content and replace are mutually exclusive",
+ position)
+
+ repeatWhitespace = None
+ if repeat:
+ # Hack to include preceding whitespace in the loop program
+ repeatWhitespace = self.unEmitNewlineWhitespace()
+ if position != (None, None):
+ # XXX at some point we should insist on a non-trivial position
+ self.emit("setPosition", position)
+ if self.inMacroUse:
+ if fillSlot:
+ self.pushProgram()
+ if self.source_file is not None:
+ self.emit("setSourceFile", self.source_file)
+ todo["fillSlot"] = fillSlot
+ self.inMacroUse = 0
+ else:
+ if fillSlot:
+ raise METALError, ("fill-slot must be within a use-macro",
+ position)
+ if not self.inMacroUse:
+ if defineMacro:
+ self.pushProgram()
+ self.emit("version", TAL_VERSION)
+ self.emit("mode", self.xml and "xml" or "html")
+ if self.source_file is not None:
+ self.emit("setSourceFile", self.source_file)
+ todo["defineMacro"] = defineMacro
+ self.inMacroDef = self.inMacroDef + 1
+ if useMacro:
+ self.pushSlots()
+ self.pushProgram()
+ todo["useMacro"] = useMacro
+ self.inMacroUse = 1
+ if defineSlot:
+ if not self.inMacroDef:
+ raise METALError, (
+ "define-slot must be within a define-macro",
+ position)
+ self.pushProgram()
+ todo["defineSlot"] = defineSlot
+
+ if taldict:
+ dict = {}
+ for item in attrlist:
+ key, value = item[:2]
+ dict[key] = value
+ self.emit("beginScope", dict)
+ todo["scope"] = 1
+ if onError:
+ self.pushProgram() # handler
+ self.emitStartTag(name, list(attrlist)) # Must copy attrlist!
+ self.pushProgram() # block
+ todo["onError"] = onError
+ if define:
+ self.emitDefines(define)
+ todo["define"] = define
+ if condition:
+ self.pushProgram()
+ todo["condition"] = condition
+ if repeat:
+ todo["repeat"] = repeat
+ self.pushProgram()
+ if repeatWhitespace:
+ self.emitText(repeatWhitespace)
+ if content:
+ todo["content"] = content
+ if replace:
+ todo["replace"] = replace
+ self.pushProgram()
+ optTag = omitTag is not None or TALtag
+ if optTag:
+ todo["optional tag"] = omitTag, TALtag
+ self.pushProgram()
+ if attrsubst:
+ repldict = parseAttributeReplacements(attrsubst)
+ for key, value in repldict.items():
+ repldict[key] = self.compileExpression(value)
+ else:
+ repldict = {}
+ if replace:
+ todo["repldict"] = repldict
+ repldict = {}
+ self.emitStartTag(name, self.replaceAttrs(attrlist, repldict), isend)
+ if optTag:
+ self.pushProgram()
+ if content:
+ self.pushProgram()
+ if todo and position != (None, None):
+ todo["position"] = position
+ self.todoPush(todo)
+ if isend:
+ self.emitEndElement(name, isend)
+
+ def emitEndElement(self, name, isend=0, implied=0):
+ todo = self.todoPop()
+ if not todo:
+ # Shortcut
+ if not isend:
+ self.emitEndTag(name)
+ return
+
+ self.position = position = todo.get("position", (None, None))
+ defineMacro = todo.get("defineMacro")
+ useMacro = todo.get("useMacro")
+ defineSlot = todo.get("defineSlot")
+ fillSlot = todo.get("fillSlot")
+ repeat = todo.get("repeat")
+ content = todo.get("content")
+ replace = todo.get("replace")
+ condition = todo.get("condition")
+ onError = todo.get("onError")
+ define = todo.get("define")
+ repldict = todo.get("repldict", {})
+ scope = todo.get("scope")
+ optTag = todo.get("optional tag")
+
+ if implied > 0:
+ if defineMacro or useMacro or defineSlot or fillSlot:
+ exc = METALError
+ what = "METAL"
+ else:
+ exc = TALError
+ what = "TAL"
+ raise exc("%s attributes on <%s> require explicit </%s>" %
+ (what, name, name), position)
+
+ if content:
+ self.emitSubstitution(content, {})
+ if optTag:
+ self.emitOptTag(name, optTag, isend)
+ elif not isend:
+ self.emitEndTag(name)
+ if replace:
+ self.emitSubstitution(replace, repldict)
+ if repeat:
+ self.emitRepeat(repeat)
+ if condition:
+ self.emitCondition(condition)
+ if onError:
+ self.emitOnError(name, onError)
+ if scope:
+ self.emit("endScope")
+ if defineSlot:
+ self.emitDefineSlot(defineSlot)
+ if fillSlot:
+ self.emitFillSlot(fillSlot)
+ if useMacro:
+ self.emitUseMacro(useMacro)
+ if defineMacro:
+ self.emitDefineMacro(defineMacro)
+
+def test():
+ t = TALGenerator()
+ t.pushProgram()
+ t.emit("bar")
+ p = t.popProgram()
+ t.emit("foo", p)
+
+if __name__ == "__main__":
+ test()
diff --git a/TAL/TALInterpreter.py b/TAL/TALInterpreter.py
--- /dev/null
+++ b/TAL/TALInterpreter.py
@@ -0,0 +1,626 @@
+##############################################################################
+#
+# Copyright (c) 2001, 2002 Zope Corporation and Contributors.
+# All Rights Reserved.
+#
+# This software is subject to the provisions of the Zope Public License,
+# Version 2.0 (ZPL). A copy of the ZPL should accompany this distribution.
+# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
+# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
+# FOR A PARTICULAR PURPOSE
+#
+##############################################################################
+"""
+Interpreter for a pre-compiled TAL program.
+"""
+
+import sys
+import getopt
+
+from cgi import escape
+from string import join, lower, rfind
+try:
+ from strop import lower, rfind
+except ImportError:
+ pass
+
+try:
+ from cStringIO import StringIO
+except ImportError:
+ from StringIO import StringIO
+
+from TALDefs import quote, TAL_VERSION, TALError, METALError
+from TALDefs import isCurrentVersion, getProgramVersion, getProgramMode
+from TALGenerator import TALGenerator
+
+BOOLEAN_HTML_ATTRS = [
+ # List of Boolean attributes in HTML that should be rendered in
+ # minimized form (e.g. <img ismap> rather than <img ismap="">)
+ # From http://www.w3.org/TR/xhtml1/#guidelines (C.10)
+ # XXX The problem with this is that this is not valid XML and
+ # can't be parsed back!
+ "compact", "nowrap", "ismap", "declare", "noshade", "checked",
+ "disabled", "readonly", "multiple", "selected", "noresize",
+ "defer"
+]
+
+EMPTY_HTML_TAGS = [
+ # List of HTML tags with an empty content model; these are
+ # rendered in minimized form, e.g. <img />.
+ # From http://www.w3.org/TR/xhtml1/#dtds
+ "base", "meta", "link", "hr", "br", "param", "img", "area",
+ "input", "col", "basefont", "isindex", "frame",
+]
+
+class AltTALGenerator(TALGenerator):
+
+ def __init__(self, repldict, expressionCompiler=None, xml=0):
+ self.repldict = repldict
+ self.enabled = 1
+ TALGenerator.__init__(self, expressionCompiler, xml)
+
+ def enable(self, enabled):
+ self.enabled = enabled
+
+ def emit(self, *args):
+ if self.enabled:
+ apply(TALGenerator.emit, (self,) + args)
+
+ def emitStartElement(self, name, attrlist, taldict, metaldict,
+ position=(None, None), isend=0):
+ metaldict = {}
+ taldict = {}
+ if self.enabled and self.repldict:
+ taldict["attributes"] = "x x"
+ TALGenerator.emitStartElement(self, name, attrlist,
+ taldict, metaldict, position, isend)
+
+ def replaceAttrs(self, attrlist, repldict):
+ if self.enabled and self.repldict:
+ repldict = self.repldict
+ self.repldict = None
+ return TALGenerator.replaceAttrs(self, attrlist, repldict)
+
+
+class TALInterpreter:
+
+ def __init__(self, program, macros, engine, stream=None,
+ debug=0, wrap=60, metal=1, tal=1, showtal=-1,
+ strictinsert=1, stackLimit=100):
+ self.program = program
+ self.macros = macros
+ self.engine = engine
+ self.Default = engine.getDefault()
+ self.stream = stream or sys.stdout
+ self._stream_write = self.stream.write
+ self.debug = debug
+ self.wrap = wrap
+ self.metal = metal
+ self.tal = tal
+ if tal:
+ self.dispatch = self.bytecode_handlers_tal
+ else:
+ self.dispatch = self.bytecode_handlers
+ assert showtal in (-1, 0, 1)
+ if showtal == -1:
+ showtal = (not tal)
+ self.showtal = showtal
+ self.strictinsert = strictinsert
+ self.stackLimit = stackLimit
+ self.html = 0
+ self.endsep = "/>"
+ self.endlen = len(self.endsep)
+ self.macroStack = []
+ self.popMacro = self.macroStack.pop
+ self.position = None, None # (lineno, offset)
+ self.col = 0
+ self.level = 0
+ self.scopeLevel = 0
+ self.sourceFile = None
+
+ def saveState(self):
+ return (self.position, self.col, self.stream,
+ self.scopeLevel, self.level)
+
+ def restoreState(self, state):
+ (self.position, self.col, self.stream, scopeLevel, level) = state
+ self._stream_write = self.stream.write
+ assert self.level == level
+ while self.scopeLevel > scopeLevel:
+ self.engine.endScope()
+ self.scopeLevel = self.scopeLevel - 1
+ self.engine.setPosition(self.position)
+
+ def restoreOutputState(self, state):
+ (dummy, self.col, self.stream, scopeLevel, level) = state
+ self._stream_write = self.stream.write
+ assert self.level == level
+ assert self.scopeLevel == scopeLevel
+
+ def pushMacro(self, macroName, slots, entering=1):
+ if len(self.macroStack) >= self.stackLimit:
+ raise METALError("macro nesting limit (%d) exceeded "
+ "by %s" % (self.stackLimit, `macroName`))
+ self.macroStack.append([macroName, slots, entering])
+
+ def macroContext(self, what):
+ macroStack = self.macroStack
+ i = len(macroStack)
+ while i > 0:
+ i = i-1
+ if macroStack[i][0] == what:
+ return i
+ return -1
+
+ def __call__(self):
+ assert self.level == 0
+ assert self.scopeLevel == 0
+ self.interpret(self.program)
+ assert self.level == 0
+ assert self.scopeLevel == 0
+ if self.col > 0:
+ self._stream_write("\n")
+ self.col = 0
+
+ def stream_write(self, s,
+ len=len, rfind=rfind):
+ self._stream_write(s)
+ i = rfind(s, '\n')
+ if i < 0:
+ self.col = self.col + len(s)
+ else:
+ self.col = len(s) - (i + 1)
+
+ bytecode_handlers = {}
+
+ def interpret(self, program, None=None):
+ oldlevel = self.level
+ self.level = oldlevel + 1
+ handlers = self.dispatch
+ try:
+ if self.debug:
+ for (opcode, args) in program:
+ s = "%sdo_%s%s\n" % (" "*self.level, opcode,
+ repr(args))
+ if len(s) > 80:
+ s = s[:76] + "...\n"
+ sys.stderr.write(s)
+ handlers[opcode](self, args)
+ else:
+ for (opcode, args) in program:
+ handlers[opcode](self, args)
+ finally:
+ self.level = oldlevel
+
+ def do_version(self, version):
+ assert version == TAL_VERSION
+ bytecode_handlers["version"] = do_version
+
+ def do_mode(self, mode):
+ assert mode in ("html", "xml")
+ self.html = (mode == "html")
+ if self.html:
+ self.endsep = " />"
+ else:
+ self.endsep = "/>"
+ self.endlen = len(self.endsep)
+ bytecode_handlers["mode"] = do_mode
+
+ def do_setSourceFile(self, source_file):
+ self.sourceFile = source_file
+ self.engine.setSourceFile(source_file)
+ bytecode_handlers["setSourceFile"] = do_setSourceFile
+
+ def do_setPosition(self, position):
+ self.position = position
+ self.engine.setPosition(position)
+ bytecode_handlers["setPosition"] = do_setPosition
+
+ def do_startEndTag(self, stuff):
+ self.do_startTag(stuff, self.endsep, self.endlen)
+ bytecode_handlers["startEndTag"] = do_startEndTag
+
+ def do_startTag(self, (name, attrList),
+ end=">", endlen=1, _len=len):
+ # The bytecode generator does not cause calls to this method
+ # for start tags with no attributes; those are optimized down
+ # to rawtext events. Hence, there is no special "fast path"
+ # for that case.
+ _stream_write = self._stream_write
+ _stream_write("<" + name)
+ namelen = _len(name)
+ col = self.col + namelen + 1
+ wrap = self.wrap
+ align = col + 1
+ if align >= wrap/2:
+ align = 4 # Avoid a narrow column far to the right
+ attrAction = self.dispatch["<attrAction>"]
+ try:
+ for item in attrList:
+ if _len(item) == 2:
+ name, s = item
+ else:
+ ok, name, s = attrAction(self, item)
+ if not ok:
+ continue
+ slen = _len(s)
+ if (wrap and
+ col >= align and
+ col + 1 + slen > wrap):
+ _stream_write("\n" + " "*align)
+ col = align + slen
+ else:
+ s = " " + s
+ col = col + 1 + slen
+ _stream_write(s)
+ _stream_write(end)
+ col = col + endlen
+ finally:
+ self.col = col
+ bytecode_handlers["startTag"] = do_startTag
+
+ def attrAction(self, item):
+ name, value, action = item[:3]
+ if action == 1 or (action > 1 and not self.showtal):
+ return 0, name, value
+ macs = self.macroStack
+ if action == 2 and self.metal and macs:
+ if len(macs) > 1 or not macs[-1][2]:
+ # Drop all METAL attributes at a use-depth above one.
+ return 0, name, value
+ # Clear 'entering' flag
+ macs[-1][2] = 0
+ # Convert or drop depth-one METAL attributes.
+ i = rfind(name, ":") + 1
+ prefix, suffix = name[:i], name[i:]
+ if suffix == "define-macro":
+ # Convert define-macro as we enter depth one.
+ name = prefix + "use-macro"
+ value = macs[-1][0] # Macro name
+ elif suffix == "define-slot":
+ name = prefix + "slot"
+ elif suffix == "fill-slot":
+ pass
+ else:
+ return 0, name, value
+
+ if value is None:
+ value = name
+ else:
+ value = "%s=%s" % (name, quote(value))
+ return 1, name, value
+
+ def attrAction_tal(self, item):
+ name, value, action = item[:3]
+ if action > 1:
+ return self.attrAction(item)
+ ok = 1
+ if self.html and lower(name) in BOOLEAN_HTML_ATTRS:
+ evalue = self.engine.evaluateBoolean(item[3])
+ if evalue is self.Default:
+ if action == 1: # Cancelled insert
+ ok = 0
+ elif evalue:
+ value = None
+ else:
+ ok = 0
+ else:
+ evalue = self.engine.evaluateText(item[3])
+ if evalue is self.Default:
+ if action == 1: # Cancelled insert
+ ok = 0
+ else:
+ if evalue is None:
+ ok = 0
+ value = evalue
+ if ok:
+ if value is None:
+ value = name
+ value = "%s=%s" % (name, quote(value))
+ return ok, name, value
+
+ bytecode_handlers["<attrAction>"] = attrAction
+
+ def no_tag(self, start, program):
+ state = self.saveState()
+ self.stream = stream = StringIO()
+ self._stream_write = stream.write
+ self.interpret(start)
+ self.restoreOutputState(state)
+ self.interpret(program)
+
+ def do_optTag(self, (name, cexpr, tag_ns, isend, start, program),
+ omit=0):
+ if tag_ns and not self.showtal:
+ return self.no_tag(start, program)
+
+ self.interpret(start)
+ if not isend:
+ self.interpret(program)
+ s = '</%s>' % name
+ self._stream_write(s)
+ self.col = self.col + len(s)
+
+ def do_optTag_tal(self, stuff):
+ cexpr = stuff[1]
+ if cexpr is not None and (cexpr == '' or
+ self.engine.evaluateBoolean(cexpr)):
+ self.no_tag(stuff[-2], stuff[-1])
+ else:
+ self.do_optTag(stuff)
+ bytecode_handlers["optTag"] = do_optTag
+
+ def dumpMacroStack(self, prefix, suffix, value):
+ sys.stderr.write("+---- %s%s = %s\n" % (prefix, suffix, value))
+ for i in range(len(self.macroStack)):
+ what, macroName, slots = self.macroStack[i]
+ sys.stderr.write("| %2d. %-12s %-12s %s\n" %
+ (i, what, macroName, slots and slots.keys()))
+ sys.stderr.write("+--------------------------------------\n")
+
+ def do_rawtextBeginScope(self, (s, col, position, closeprev, dict)):
+ self._stream_write(s)
+ self.col = col
+ self.do_setPosition(position)
+ if closeprev:
+ engine = self.engine
+ engine.endScope()
+ engine.beginScope()
+ else:
+ self.engine.beginScope()
+ self.scopeLevel = self.scopeLevel + 1
+
+ def do_rawtextBeginScope_tal(self, (s, col, position, closeprev, dict)):
+ self._stream_write(s)
+ self.col = col
+ self.do_setPosition(position)
+ engine = self.engine
+ if closeprev:
+ engine.endScope()
+ engine.beginScope()
+ else:
+ engine.beginScope()
+ self.scopeLevel = self.scopeLevel + 1
+ engine.setLocal("attrs", dict)
+ bytecode_handlers["rawtextBeginScope"] = do_rawtextBeginScope
+
+ def do_beginScope(self, dict):
+ self.engine.beginScope()
+ self.scopeLevel = self.scopeLevel + 1
+
+ def do_beginScope_tal(self, dict):
+ engine = self.engine
+ engine.beginScope()
+ engine.setLocal("attrs", dict)
+ self.scopeLevel = self.scopeLevel + 1
+ bytecode_handlers["beginScope"] = do_beginScope
+
+ def do_endScope(self, notused=None):
+ self.engine.endScope()
+ self.scopeLevel = self.scopeLevel - 1
+ bytecode_handlers["endScope"] = do_endScope
+
+ def do_setLocal(self, notused):
+ pass
+
+ def do_setLocal_tal(self, (name, expr)):
+ self.engine.setLocal(name, self.engine.evaluateValue(expr))
+ bytecode_handlers["setLocal"] = do_setLocal
+
+ def do_setGlobal_tal(self, (name, expr)):
+ self.engine.setGlobal(name, self.engine.evaluateValue(expr))
+ bytecode_handlers["setGlobal"] = do_setLocal
+
+ def do_insertText(self, stuff):
+ self.interpret(stuff[1])
+
+ def do_insertText_tal(self, stuff):
+ text = self.engine.evaluateText(stuff[0])
+ if text is None:
+ return
+ if text is self.Default:
+ self.interpret(stuff[1])
+ return
+ s = escape(text)
+ self._stream_write(s)
+ i = rfind(s, '\n')
+ if i < 0:
+ self.col = self.col + len(s)
+ else:
+ self.col = len(s) - (i + 1)
+ bytecode_handlers["insertText"] = do_insertText
+
+ def do_insertStructure(self, stuff):
+ self.interpret(stuff[2])
+
+ def do_insertStructure_tal(self, (expr, repldict, block)):
+ structure = self.engine.evaluateStructure(expr)
+ if structure is None:
+ return
+ if structure is self.Default:
+ self.interpret(block)
+ return
+ text = str(structure)
+ if not (repldict or self.strictinsert):
+ # Take a shortcut, no error checking
+ self.stream_write(text)
+ return
+ if self.html:
+ self.insertHTMLStructure(text, repldict)
+ else:
+ self.insertXMLStructure(text, repldict)
+ bytecode_handlers["insertStructure"] = do_insertStructure
+
+ def insertHTMLStructure(self, text, repldict):
+ from HTMLTALParser import HTMLTALParser
+ gen = AltTALGenerator(repldict, self.engine, 0)
+ p = HTMLTALParser(gen) # Raises an exception if text is invalid
+ p.parseString(text)
+ program, macros = p.getCode()
+ self.interpret(program)
+
+ def insertXMLStructure(self, text, repldict):
+ from TALParser import TALParser
+ gen = AltTALGenerator(repldict, self.engine, 0)
+ p = TALParser(gen)
+ gen.enable(0)
+ p.parseFragment('<!DOCTYPE foo PUBLIC "foo" "bar"><foo>')
+ gen.enable(1)
+ p.parseFragment(text) # Raises an exception if text is invalid
+ gen.enable(0)
+ p.parseFragment('</foo>', 1)
+ program, macros = gen.getCode()
+ self.interpret(program)
+
+ def do_loop(self, (name, expr, block)):
+ self.interpret(block)
+
+ def do_loop_tal(self, (name, expr, block)):
+ iterator = self.engine.setRepeat(name, expr)
+ while iterator.next():
+ self.interpret(block)
+ bytecode_handlers["loop"] = do_loop
+
+ def do_rawtextColumn(self, (s, col)):
+ self._stream_write(s)
+ self.col = col
+ bytecode_handlers["rawtextColumn"] = do_rawtextColumn
+
+ def do_rawtextOffset(self, (s, offset)):
+ self._stream_write(s)
+ self.col = self.col + offset
+ bytecode_handlers["rawtextOffset"] = do_rawtextOffset
+
+ def do_condition(self, (condition, block)):
+ if not self.tal or self.engine.evaluateBoolean(condition):
+ self.interpret(block)
+ bytecode_handlers["condition"] = do_condition
+
+ def do_defineMacro(self, (macroName, macro)):
+ macs = self.macroStack
+ if len(macs) == 1:
+ entering = macs[-1][2]
+ if not entering:
+ macs.append(None)
+ self.interpret(macro)
+ macs.pop()
+ return
+ self.interpret(macro)
+ bytecode_handlers["defineMacro"] = do_defineMacro
+
+ def do_useMacro(self, (macroName, macroExpr, compiledSlots, block)):
+ if not self.metal:
+ self.interpret(block)
+ return
+ macro = self.engine.evaluateMacro(macroExpr)
+ if macro is self.Default:
+ macro = block
+ else:
+ if not isCurrentVersion(macro):
+ raise METALError("macro %s has incompatible version %s" %
+ (`macroName`, `getProgramVersion(macro)`),
+ self.position)
+ mode = getProgramMode(macro)
+ if mode != (self.html and "html" or "xml"):
+ raise METALError("macro %s has incompatible mode %s" %
+ (`macroName`, `mode`), self.position)
+ self.pushMacro(macroName, compiledSlots)
+ saved_source = self.sourceFile
+ saved_position = self.position # Used by Boa Constructor
+ self.interpret(macro)
+ if self.sourceFile != saved_source:
+ self.engine.setSourceFile(saved_source)
+ self.sourceFile = saved_source
+ self.popMacro()
+ bytecode_handlers["useMacro"] = do_useMacro
+
+ def do_fillSlot(self, (slotName, block)):
+ # This is only executed if the enclosing 'use-macro' evaluates
+ # to 'default'.
+ self.interpret(block)
+ bytecode_handlers["fillSlot"] = do_fillSlot
+
+ def do_defineSlot(self, (slotName, block)):
+ if not self.metal:
+ self.interpret(block)
+ return
+ macs = self.macroStack
+ if macs and macs[-1] is not None:
+ saved_source = self.sourceFile
+ saved_position = self.position # Used by Boa Constructor
+ macroName, slots = self.popMacro()[:2]
+ slot = slots.get(slotName)
+ if slot is not None:
+ self.interpret(slot)
+ if self.sourceFile != saved_source:
+ self.engine.setSourceFile(saved_source)
+ self.sourceFile = saved_source
+ self.pushMacro(macroName, slots, entering=0)
+ return
+ self.pushMacro(macroName, slots)
+ if len(macs) == 1:
+ self.interpret(block)
+ return
+ self.interpret(block)
+ bytecode_handlers["defineSlot"] = do_defineSlot
+
+ def do_onError(self, (block, handler)):
+ self.interpret(block)
+
+ def do_onError_tal(self, (block, handler)):
+ state = self.saveState()
+ self.stream = stream = StringIO()
+ self._stream_write = stream.write
+ try:
+ self.interpret(block)
+ except:
+ exc = sys.exc_info()[1]
+ self.restoreState(state)
+ engine = self.engine
+ engine.beginScope()
+ error = engine.createErrorInfo(exc, self.position)
+ engine.setLocal('error', error)
+ try:
+ self.interpret(handler)
+ finally:
+ engine.endScope()
+ else:
+ self.restoreOutputState(state)
+ self.stream_write(stream.getvalue())
+ bytecode_handlers["onError"] = do_onError
+
+ bytecode_handlers_tal = bytecode_handlers.copy()
+ bytecode_handlers_tal["rawtextBeginScope"] = do_rawtextBeginScope_tal
+ bytecode_handlers_tal["beginScope"] = do_beginScope_tal
+ bytecode_handlers_tal["setLocal"] = do_setLocal_tal
+ bytecode_handlers_tal["setGlobal"] = do_setGlobal_tal
+ bytecode_handlers_tal["insertStructure"] = do_insertStructure_tal
+ bytecode_handlers_tal["insertText"] = do_insertText_tal
+ bytecode_handlers_tal["loop"] = do_loop_tal
+ bytecode_handlers_tal["onError"] = do_onError_tal
+ bytecode_handlers_tal["<attrAction>"] = attrAction_tal
+ bytecode_handlers_tal["optTag"] = do_optTag_tal
+
+
+def test():
+ from driver import FILE, parsefile
+ from DummyEngine import DummyEngine
+ try:
+ opts, args = getopt.getopt(sys.argv[1:], "")
+ except getopt.error, msg:
+ print msg
+ sys.exit(2)
+ if args:
+ file = args[0]
+ else:
+ file = FILE
+ doc = parsefile(file)
+ compiler = TALCompiler(doc)
+ program, macros = compiler()
+ engine = DummyEngine()
+ interpreter = TALInterpreter(program, macros, engine)
+ interpreter()
+
+if __name__ == "__main__":
+ test()
diff --git a/TAL/TALParser.py b/TAL/TALParser.py
--- /dev/null
+++ b/TAL/TALParser.py
@@ -0,0 +1,137 @@
+##############################################################################
+#
+# Copyright (c) 2001, 2002 Zope Corporation and Contributors.
+# All Rights Reserved.
+#
+# This software is subject to the provisions of the Zope Public License,
+# Version 2.0 (ZPL). A copy of the ZPL should accompany this distribution.
+# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
+# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
+# FOR A PARTICULAR PURPOSE
+#
+##############################################################################
+"""
+Parse XML and compile to TALInterpreter intermediate code.
+"""
+
+import string
+from XMLParser import XMLParser
+from TALDefs import *
+from TALGenerator import TALGenerator
+
+class TALParser(XMLParser):
+
+ ordered_attributes = 1
+
+ def __init__(self, gen=None): # Override
+ XMLParser.__init__(self)
+ if gen is None:
+ gen = TALGenerator()
+ self.gen = gen
+ self.nsStack = []
+ self.nsDict = {XML_NS: 'xml'}
+ self.nsNew = []
+
+ def getCode(self):
+ return self.gen.getCode()
+
+ def getWarnings(self):
+ return ()
+
+ def StartNamespaceDeclHandler(self, prefix, uri):
+ self.nsStack.append(self.nsDict.copy())
+ self.nsDict[uri] = prefix
+ self.nsNew.append((prefix, uri))
+
+ def EndNamespaceDeclHandler(self, prefix):
+ self.nsDict = self.nsStack.pop()
+
+ def StartElementHandler(self, name, attrs):
+ if self.ordered_attributes:
+ # attrs is a list of alternating names and values
+ attrlist = []
+ for i in range(0, len(attrs), 2):
+ key = attrs[i]
+ value = attrs[i+1]
+ attrlist.append((key, value))
+ else:
+ # attrs is a dict of {name: value}
+ attrlist = attrs.items()
+ attrlist.sort() # For definiteness
+ name, attrlist, taldict, metaldict = self.process_ns(name, attrlist)
+ attrlist = self.xmlnsattrs() + attrlist
+ self.gen.emitStartElement(name, attrlist, taldict, metaldict)
+
+ def process_ns(self, name, attrlist):
+ taldict = {}
+ metaldict = {}
+ fixedattrlist = []
+ name, namebase, namens = self.fixname(name)
+ for key, value in attrlist:
+ key, keybase, keyns = self.fixname(key)
+ ns = keyns or namens # default to tag namespace
+ item = key, value
+ if ns == 'metal':
+ metaldict[keybase] = value
+ item = item + ("metal",)
+ elif ns == 'tal':
+ taldict[keybase] = value
+ item = item + ("tal",)
+ fixedattrlist.append(item)
+ if namens in ('metal', 'tal'):
+ taldict['tal tag'] = namens
+ return name, fixedattrlist, taldict, metaldict
+
+ def xmlnsattrs(self):
+ newlist = []
+ for prefix, uri in self.nsNew:
+ if prefix:
+ key = "xmlns:" + prefix
+ else:
+ key = "xmlns"
+ if uri in (ZOPE_METAL_NS, ZOPE_TAL_NS):
+ item = (key, uri, "xmlns")
+ else:
+ item = (key, uri)
+ newlist.append(item)
+ self.nsNew = []
+ return newlist
+
+ def fixname(self, name):
+ if ' ' in name:
+ uri, name = string.split(name, ' ')
+ prefix = self.nsDict[uri]
+ prefixed = name
+ if prefix:
+ prefixed = "%s:%s" % (prefix, name)
+ ns = 'x'
+ if uri == ZOPE_TAL_NS:
+ ns = 'tal'
+ elif uri == ZOPE_METAL_NS:
+ ns = 'metal'
+ return (prefixed, name, ns)
+ return (name, name, None)
+
+ def EndElementHandler(self, name):
+ name = self.fixname(name)[0]
+ self.gen.emitEndElement(name)
+
+ def DefaultHandler(self, text):
+ self.gen.emitRawText(text)
+
+def test():
+ import sys
+ p = TALParser()
+ file = "tests/input/test01.xml"
+ if sys.argv[1:]:
+ file = sys.argv[1]
+ p.parseFile(file)
+ program, macros = p.getCode()
+ from TALInterpreter import TALInterpreter
+ from DummyEngine import DummyEngine
+ engine = DummyEngine(macros)
+ TALInterpreter(program, macros, engine, sys.stdout, wrap=0)()
+
+if __name__ == "__main__":
+ test()
diff --git a/TAL/XMLParser.py b/TAL/XMLParser.py
--- /dev/null
+++ b/TAL/XMLParser.py
@@ -0,0 +1,90 @@
+##############################################################################
+#
+# Copyright (c) 2001, 2002 Zope Corporation and Contributors.
+# All Rights Reserved.
+#
+# This software is subject to the provisions of the Zope Public License,
+# Version 2.0 (ZPL). A copy of the ZPL should accompany this distribution.
+# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
+# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
+# FOR A PARTICULAR PURPOSE
+#
+##############################################################################
+"""
+Generic expat-based XML parser base class.
+"""
+
+import zLOG
+
+class XMLParser:
+
+ ordered_attributes = 0
+
+ handler_names = [
+ "StartElementHandler",
+ "EndElementHandler",
+ "ProcessingInstructionHandler",
+ "CharacterDataHandler",
+ "UnparsedEntityDeclHandler",
+ "NotationDeclHandler",
+ "StartNamespaceDeclHandler",
+ "EndNamespaceDeclHandler",
+ "CommentHandler",
+ "StartCdataSectionHandler",
+ "EndCdataSectionHandler",
+ "DefaultHandler",
+ "DefaultHandlerExpand",
+ "NotStandaloneHandler",
+ "ExternalEntityRefHandler",
+ "XmlDeclHandler",
+ "StartDoctypeDeclHandler",
+ "EndDoctypeDeclHandler",
+ "ElementDeclHandler",
+ "AttlistDeclHandler"
+ ]
+
+ def __init__(self, encoding=None):
+ self.parser = p = self.createParser()
+ if self.ordered_attributes:
+ try:
+ self.parser.ordered_attributes = self.ordered_attributes
+ except AttributeError:
+ zLOG.LOG("TAL.XMLParser", zLOG.INFO,
+ "Can't set ordered_attributes")
+ self.ordered_attributes = 0
+ for name in self.handler_names:
+ method = getattr(self, name, None)
+ if method is not None:
+ try:
+ setattr(p, name, method)
+ except AttributeError:
+ zLOG.LOG("TAL.XMLParser", zLOG.PROBLEM,
+ "Can't set expat handler %s" % name)
+
+ def createParser(self, encoding=None):
+ global XMLParseError
+ try:
+ from Products.ParsedXML.Expat import pyexpat
+ XMLParseError = pyexpat.ExpatError
+ return pyexpat.ParserCreate(encoding, ' ')
+ except ImportError:
+ from xml.parsers import expat
+ XMLParseError = expat.ExpatError
+ return expat.ParserCreate(encoding, ' ')
+
+ def parseFile(self, filename):
+ self.parseStream(open(filename))
+
+ def parseString(self, s):
+ self.parser.Parse(s, 1)
+
+ def parseURL(self, url):
+ import urllib
+ self.parseStream(urllib.urlopen(url))
+
+ def parseStream(self, stream):
+ self.parser.ParseFile(stream)
+
+ def parseFragment(self, s, end=0):
+ self.parser.Parse(s, end)
diff --git a/TAL/__init__.py b/TAL/__init__.py
--- /dev/null
+++ b/TAL/__init__.py
@@ -0,0 +1,14 @@
+##############################################################################
+#
+# Copyright (c) 2001, 2002 Zope Corporation and Contributors.
+# All Rights Reserved.
+#
+# This software is subject to the provisions of the Zope Public License,
+# Version 2.0 (ZPL). A copy of the ZPL should accompany this distribution.
+# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
+# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
+# FOR A PARTICULAR PURPOSE
+#
+##############################################################################
+""" Template Attribute Language package """
diff --git a/TAL/markupbase.py b/TAL/markupbase.py
--- /dev/null
+++ b/TAL/markupbase.py
@@ -0,0 +1,306 @@
+"""Shared support for scanning document type declarations in HTML and XHTML."""
+
+import re
+import string
+
+_declname_match = re.compile(r'[a-zA-Z][-_.a-zA-Z0-9]*\s*').match
+_declstringlit_match = re.compile(r'(\'[^\']*\'|"[^"]*")\s*').match
+
+del re
+
+
+class ParserBase:
+ """Parser base class which provides some common support methods used
+ by the SGML/HTML and XHTML parsers."""
+
+ def reset(self):
+ self.lineno = 1
+ self.offset = 0
+
+ def getpos(self):
+ """Return current line number and offset."""
+ return self.lineno, self.offset
+
+ # Internal -- update line number and offset. This should be
+ # called for each piece of data exactly once, in order -- in other
+ # words the concatenation of all the input strings to this
+ # function should be exactly the entire input.
+ def updatepos(self, i, j):
+ if i >= j:
+ return j
+ rawdata = self.rawdata
+ nlines = string.count(rawdata, "\n", i, j)
+ if nlines:
+ self.lineno = self.lineno + nlines
+ pos = string.rindex(rawdata, "\n", i, j) # Should not fail
+ self.offset = j-(pos+1)
+ else:
+ self.offset = self.offset + j-i
+ return j
+
+ _decl_otherchars = ''
+
+ # Internal -- parse declaration (for use by subclasses).
+ def parse_declaration(self, i):
+ # This is some sort of declaration; in "HTML as
+ # deployed," this should only be the document type
+ # declaration ("<!DOCTYPE html...>").
+ rawdata = self.rawdata
+ import sys
+ j = i + 2
+ assert rawdata[i:j] == "<!", "unexpected call to parse_declaration"
+ if rawdata[j:j+1] in ("-", ""):
+ # Start of comment followed by buffer boundary,
+ # or just a buffer boundary.
+ return -1
+ # in practice, this should look like: ((name|stringlit) S*)+ '>'
+ n = len(rawdata)
+ decltype, j = self._scan_name(j, i)
+ if j < 0:
+ return j
+ if decltype == "doctype":
+ self._decl_otherchars = ''
+ while j < n:
+ c = rawdata[j]
+ if c == ">":
+ # end of declaration syntax
+ data = rawdata[i+2:j]
+ if decltype == "doctype":
+ self.handle_decl(data)
+ else:
+ self.unknown_decl(data)
+ return j + 1
+ if c in "\"'":
+ m = _declstringlit_match(rawdata, j)
+ if not m:
+ return -1 # incomplete
+ j = m.end()
+ elif c in "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ":
+ name, j = self._scan_name(j, i)
+ elif c in self._decl_otherchars:
+ j = j + 1
+ elif c == "[":
+ if decltype == "doctype":
+ j = self._parse_doctype_subset(j + 1, i)
+ else:
+ self.error("unexpected '[' char in declaration")
+ else:
+ self.error(
+ "unexpected %s char in declaration" % `rawdata[j]`)
+ if j < 0:
+ return j
+ return -1 # incomplete
+
+ # Internal -- scan past the internal subset in a <!DOCTYPE declaration,
+ # returning the index just past any whitespace following the trailing ']'.
+ def _parse_doctype_subset(self, i, declstartpos):
+ rawdata = self.rawdata
+ n = len(rawdata)
+ j = i
+ while j < n:
+ c = rawdata[j]
+ if c == "<":
+ s = rawdata[j:j+2]
+ if s == "<":
+ # end of buffer; incomplete
+ return -1
+ if s != "<!":
+ self.updatepos(declstartpos, j + 1)
+ self.error("unexpected char in internal subset (in %s)"
+ % `s`)
+ if (j + 2) == n:
+ # end of buffer; incomplete
+ return -1
+ if (j + 4) > n:
+ # end of buffer; incomplete
+ return -1
+ if rawdata[j:j+4] == "<!--":
+ j = self.parse_comment(j, report=0)
+ if j < 0:
+ return j
+ continue
+ name, j = self._scan_name(j + 2, declstartpos)
+ if j == -1:
+ return -1
+ if name not in ("attlist", "element", "entity", "notation"):
+ self.updatepos(declstartpos, j + 2)
+ self.error(
+ "unknown declaration %s in internal subset" % `name`)
+ # handle the individual names
+ meth = getattr(self, "_parse_doctype_" + name)
+ j = meth(j, declstartpos)
+ if j < 0:
+ return j
+ elif c == "%":
+ # parameter entity reference
+ if (j + 1) == n:
+ # end of buffer; incomplete
+ return -1
+ s, j = self._scan_name(j + 1, declstartpos)
+ if j < 0:
+ return j
+ if rawdata[j] == ";":
+ j = j + 1
+ elif c == "]":
+ j = j + 1
+ while j < n and rawdata[j] in string.whitespace:
+ j = j + 1
+ if j < n:
+ if rawdata[j] == ">":
+ return j
+ self.updatepos(declstartpos, j)
+ self.error("unexpected char after internal subset")
+ else:
+ return -1
+ elif c in string.whitespace:
+ j = j + 1
+ else:
+ self.updatepos(declstartpos, j)
+ self.error("unexpected char %s in internal subset" % `c`)
+ # end of buffer reached
+ return -1
+
+ # Internal -- scan past <!ELEMENT declarations
+ def _parse_doctype_element(self, i, declstartpos):
+ rawdata = self.rawdata
+ n = len(rawdata)
+ name, j = self._scan_name(i, declstartpos)
+ if j == -1:
+ return -1
+ # style content model; just skip until '>'
+ if '>' in rawdata[j:]:
+ return string.find(rawdata, ">", j) + 1
+ return -1
+
+ # Internal -- scan past <!ATTLIST declarations
+ def _parse_doctype_attlist(self, i, declstartpos):
+ rawdata = self.rawdata
+ name, j = self._scan_name(i, declstartpos)
+ c = rawdata[j:j+1]
+ if c == "":
+ return -1
+ if c == ">":
+ return j + 1
+ while 1:
+ # scan a series of attribute descriptions; simplified:
+ # name type [value] [#constraint]
+ name, j = self._scan_name(j, declstartpos)
+ if j < 0:
+ return j
+ c = rawdata[j:j+1]
+ if c == "":
+ return -1
+ if c == "(":
+ # an enumerated type; look for ')'
+ if ")" in rawdata[j:]:
+ j = string.find(rawdata, ")", j) + 1
+ else:
+ return -1
+ while rawdata[j:j+1] in string.whitespace:
+ j = j + 1
+ if not rawdata[j:]:
+ # end of buffer, incomplete
+ return -1
+ else:
+ name, j = self._scan_name(j, declstartpos)
+ c = rawdata[j:j+1]
+ if not c:
+ return -1
+ if c in "'\"":
+ m = _declstringlit_match(rawdata, j)
+ if m:
+ j = m.end()
+ else:
+ return -1
+ c = rawdata[j:j+1]
+ if not c:
+ return -1
+ if c == "#":
+ if rawdata[j:] == "#":
+ # end of buffer
+ return -1
+ name, j = self._scan_name(j + 1, declstartpos)
+ if j < 0:
+ return j
+ c = rawdata[j:j+1]
+ if not c:
+ return -1
+ if c == '>':
+ # all done
+ return j + 1
+
+ # Internal -- scan past <!NOTATION declarations
+ def _parse_doctype_notation(self, i, declstartpos):
+ name, j = self._scan_name(i, declstartpos)
+ if j < 0:
+ return j
+ rawdata = self.rawdata
+ while 1:
+ c = rawdata[j:j+1]
+ if not c:
+ # end of buffer; incomplete
+ return -1
+ if c == '>':
+ return j + 1
+ if c in "'\"":
+ m = _declstringlit_match(rawdata, j)
+ if not m:
+ return -1
+ j = m.end()
+ else:
+ name, j = self._scan_name(j, declstartpos)
+ if j < 0:
+ return j
+
+ # Internal -- scan past <!ENTITY declarations
+ def _parse_doctype_entity(self, i, declstartpos):
+ rawdata = self.rawdata
+ if rawdata[i:i+1] == "%":
+ j = i + 1
+ while 1:
+ c = rawdata[j:j+1]
+ if not c:
+ return -1
+ if c in string.whitespace:
+ j = j + 1
+ else:
+ break
+ else:
+ j = i
+ name, j = self._scan_name(j, declstartpos)
+ if j < 0:
+ return j
+ while 1:
+ c = self.rawdata[j:j+1]
+ if not c:
+ return -1
+ if c in "'\"":
+ m = _declstringlit_match(rawdata, j)
+ if m:
+ j = m.end()
+ else:
+ return -1 # incomplete
+ elif c == ">":
+ return j + 1
+ else:
+ name, j = self._scan_name(j, declstartpos)
+ if j < 0:
+ return j
+
+ # Internal -- scan a name token and the new position and the token, or
+ # return -1 if we've reached the end of the buffer.
+ def _scan_name(self, i, declstartpos):
+ rawdata = self.rawdata
+ n = len(rawdata)
+ if i == n:
+ return None, -1
+ m = _declname_match(rawdata, i)
+ if m:
+ s = m.group()
+ name = string.strip(s)
+ if (i + len(s)) == n:
+ return None, -1 # end of buffer
+ return string.lower(name), m.end()
+ else:
+ self.updatepos(declstartpos, i)
+ self.error("expected name token", self.getpos())