Code

Checkin for Bug 409004: Add Scour as an output option
authorjeff_schiller <jeff_schiller@users.sourceforge.net>
Tue, 4 Aug 2009 20:15:08 +0000 (20:15 +0000)
committerjeff_schiller <jeff_schiller@users.sourceforge.net>
Tue, 4 Aug 2009 20:15:08 +0000 (20:15 +0000)
share/extensions/scour.inkscape.py [new file with mode: 0755]
share/extensions/scour.inx [new file with mode: 0644]
share/extensions/scour.py [new file with mode: 0755]
share/extensions/svg_regex.py [new file with mode: 0644]

diff --git a/share/extensions/scour.inkscape.py b/share/extensions/scour.inkscape.py
new file mode 100755 (executable)
index 0000000..531dfb4
--- /dev/null
@@ -0,0 +1,8 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+import sys
+from scour import scourString
+input = file(sys.argv[1], "r")
+sys.stdout.write(scourString(input.read()).encode("UTF-8"))
+input.close()
+sys.stdout.close()
diff --git a/share/extensions/scour.inx b/share/extensions/scour.inx
new file mode 100644 (file)
index 0000000..30c948c
--- /dev/null
@@ -0,0 +1,16 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<inkscape-extension xmlns="http://www.inkscape.org/namespace/inkscape/extension">
+    <_name>Scoured SVG Output</_name>
+    <id>org.inkscape.output.scour</id>
+    <dependency type="executable" location="extensions">scour.py</dependency>
+    <dependency type="executable" location="extensions">svg_regex.py</dependency>
+    <output>
+        <extension>.svg</extension>
+        <mimetype>image/svg+xml</mimetype>
+        <_filetypename>Scoured SVG (*.svg)</_filetypename>
+        <_filetypetooltip>Scalable Vector Graphics</_filetypetooltip>
+    </output>
+    <script>
+        <command reldir="extensions" interpreter="python">scour.inkscape.py</command>
+    </script>
+</inkscape-extension>
diff --git a/share/extensions/scour.py b/share/extensions/scour.py
new file mode 100755 (executable)
index 0000000..01c01a8
--- /dev/null
@@ -0,0 +1,2091 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+#  Scour
+#
+#  Copyright 2009 Jeff Schiller
+#
+#  This file is part of Scour, http://www.codedread.com/scour/
+#
+#   Licensed under the Apache License, Version 2.0 (the "License");
+#   you may not use this file except in compliance with the License.
+#   You may obtain a copy of the License at
+#
+#       http://www.apache.org/licenses/LICENSE-2.0
+#
+#   Unless required by applicable law or agreed to in writing, software
+#   distributed under the License is distributed on an "AS IS" BASIS,
+#   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#   See the License for the specific language governing permissions and
+#   limitations under the License.
+
+# Notes:
+
+# rubys' path-crunching ideas here: http://intertwingly.net/code/svgtidy/spec.rb
+# (and implemented here: http://intertwingly.net/code/svgtidy/svgtidy.rb )
+
+# Yet more ideas here: http://wiki.inkscape.org/wiki/index.php/Save_Cleaned_SVG
+# TODO: Adapt this script into an Inkscape python plugin
+#
+# * Process Transformations
+#  * Process quadratic Bezier curves
+#  * Collapse all group based transformations
+
+# Even more ideas here: http://esw.w3.org/topic/SvgTidy
+#  * removal of more default attribute values (gradientUnits, spreadMethod, x1, y1, etc)
+#  * analysis of path elements to see if rect can be used instead?
+#  * removal of unused attributes in groups:
+#    <g fill="blue" ...>
+#      <rect fill="red" ... />
+#      <rect fill="red" ... />
+#      <rect fill="red" ... />
+#    </g>
+#    in this case, fill="blue" should be removed
+#  * Move common attributes up to a parent group:
+#    <g>
+#      <rect fill="white"/>
+#      <rect fill="white"/>
+#      <rect fill="white"/>
+#    </g>
+#    becomes:
+#    <g fill="white">
+#      <rect />
+#      <rect />
+#      <rect />
+#    </g>
+
+# Suggestion from Richard Hutch:
+#  * Put id attributes first in the serialization (or make the d attribute last)
+#    This would require my own serialization of the DOM objects (not impossible)
+
+# Next Up:
+# - add an option for svgweb compatible markup (no self-closing tags)?
+# - if a <g> has only one element in it, collapse the <g> (ensure transform, etc are carried down)
+# - remove id if it matches the Inkscape-style of IDs (also provide a switch to disable this)
+# - prevent elements from being stripped if they are referenced in a <style> element
+#   (for instance, filter, marker, pattern) - need a crude CSS parser
+# - Remove any unused glyphs from font elements?
+
+# necessary to get true division
+from __future__ import division
+
+import os
+import sys
+import xml.dom.minidom
+import re
+import math
+import base64
+import urllib
+from svg_regex import svg_parser
+import gzip
+import optparse
+
+# Python 2.3- did not have Decimal
+try:
+       from decimal import *
+except ImportError:
+       from fixedpoint import *
+       Decimal = FixedPoint    
+
+APP = 'scour'
+VER = '0.18'
+COPYRIGHT = 'Copyright Jeff Schiller, 2009'
+
+NS = {         'SVG':          'http://www.w3.org/2000/svg', 
+               'XLINK':        'http://www.w3.org/1999/xlink', 
+               'SODIPODI': 'http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd',
+               'INKSCAPE': 'http://www.inkscape.org/namespaces/inkscape',
+               'ADOBE_ILLUSTRATOR': 'http://ns.adobe.com/AdobeIllustrator/10.0/',
+               'ADOBE_GRAPHS': 'http://ns.adobe.com/Graphs/1.0/',
+               'ADOBE_SVG_VIEWER': 'http://ns.adobe.com/AdobeSVGViewerExtensions/3.0/',
+               'ADOBE_VARIABLES': 'http://ns.adobe.com/Variables/1.0/',
+               'ADOBE_SFW': 'http://ns.adobe.com/SaveForWeb/1.0/',
+               'ADOBE_EXTENSIBILITY': 'http://ns.adobe.com/Extensibility/1.0/',
+               'ADOBE_FLOWS': 'http://ns.adobe.com/Flows/1.0/',
+               'ADOBE_IMAGE_REPLACEMENT': 'http://ns.adobe.com/ImageReplacement/1.0/',     
+               'ADOBE_CUSTOM': 'http://ns.adobe.com/GenericCustomNamespace/1.0/',
+               'ADOBE_XPATH': 'http://ns.adobe.com/XPath/1.0/'
+               }
+
+unwanted_ns = [ NS['SODIPODI'], NS['INKSCAPE'], NS['ADOBE_ILLUSTRATOR'],
+                               NS['ADOBE_GRAPHS'], NS['ADOBE_SVG_VIEWER'], NS['ADOBE_VARIABLES'],
+                               NS['ADOBE_SFW'], NS['ADOBE_EXTENSIBILITY'], NS['ADOBE_FLOWS'],
+                               NS['ADOBE_IMAGE_REPLACEMENT'], NS['ADOBE_CUSTOM'], NS['ADOBE_XPATH'] ] 
+
+svgAttributes = [
+                               'clip-rule',
+                               'display',
+                               'fill',
+                               'fill-opacity',
+                               'fill-rule',
+                               'filter',
+                               'font-family',
+                               'font-size',
+                               'font-stretch',
+                               'font-style',
+                               'font-variant',
+                               'font-weight',
+                               'line-height',
+                               'marker',
+                               'opacity',
+                               'overflow',
+                               'stop-color',
+                               'stop-opacity',
+                               'stroke',
+                               'stroke-dashoffset',
+                               'stroke-linecap',
+                               'stroke-linejoin',
+                               'stroke-miterlimit',
+                               'stroke-opacity',
+                               'stroke-width',
+                               'visibility'
+                               ]
+
+colors = {
+       'aliceblue': 'rgb(240, 248, 255)',
+       'antiquewhite': 'rgb(250, 235, 215)',
+       'aqua': 'rgb( 0, 255, 255)',
+       'aquamarine': 'rgb(127, 255, 212)',
+       'azure': 'rgb(240, 255, 255)',
+       'beige': 'rgb(245, 245, 220)',
+       'bisque': 'rgb(255, 228, 196)',
+       'black': 'rgb( 0, 0, 0)',
+       'blanchedalmond': 'rgb(255, 235, 205)',
+       'blue': 'rgb( 0, 0, 255)',
+       'blueviolet': 'rgb(138, 43, 226)',
+       'brown': 'rgb(165, 42, 42)',
+       'burlywood': 'rgb(222, 184, 135)',
+       'cadetblue': 'rgb( 95, 158, 160)',
+       'chartreuse': 'rgb(127, 255, 0)',
+       'chocolate': 'rgb(210, 105, 30)',
+       'coral': 'rgb(255, 127, 80)',
+       'cornflowerblue': 'rgb(100, 149, 237)',
+       'cornsilk': 'rgb(255, 248, 220)',
+       'crimson': 'rgb(220, 20, 60)',
+       'cyan': 'rgb( 0, 255, 255)',
+       'darkblue': 'rgb( 0, 0, 139)',
+       'darkcyan': 'rgb( 0, 139, 139)',
+       'darkgoldenrod': 'rgb(184, 134, 11)',
+       'darkgray': 'rgb(169, 169, 169)',
+       'darkgreen': 'rgb( 0, 100, 0)',
+       'darkgrey': 'rgb(169, 169, 169)',
+       'darkkhaki': 'rgb(189, 183, 107)',
+       'darkmagenta': 'rgb(139, 0, 139)',
+       'darkolivegreen': 'rgb( 85, 107, 47)',
+       'darkorange': 'rgb(255, 140, 0)',
+       'darkorchid': 'rgb(153, 50, 204)',
+       'darkred': 'rgb(139, 0, 0)',
+       'darksalmon': 'rgb(233, 150, 122)',
+       'darkseagreen': 'rgb(143, 188, 143)',
+       'darkslateblue': 'rgb( 72, 61, 139)',
+       'darkslategray': 'rgb( 47, 79, 79)',
+       'darkslategrey': 'rgb( 47, 79, 79)',
+       'darkturquoise': 'rgb( 0, 206, 209)',
+       'darkviolet': 'rgb(148, 0, 211)',
+       'deeppink': 'rgb(255, 20, 147)',
+       'deepskyblue': 'rgb( 0, 191, 255)',
+       'dimgray': 'rgb(105, 105, 105)',
+       'dimgrey': 'rgb(105, 105, 105)',
+       'dodgerblue': 'rgb( 30, 144, 255)',
+       'firebrick': 'rgb(178, 34, 34)',
+       'floralwhite': 'rgb(255, 250, 240)',
+       'forestgreen': 'rgb( 34, 139, 34)',
+       'fuchsia': 'rgb(255, 0, 255)',
+       'gainsboro': 'rgb(220, 220, 220)',
+       'ghostwhite': 'rgb(248, 248, 255)',
+       'gold': 'rgb(255, 215, 0)',
+       'goldenrod': 'rgb(218, 165, 32)',
+       'gray': 'rgb(128, 128, 128)',
+       'grey': 'rgb(128, 128, 128)',
+       'green': 'rgb( 0, 128, 0)',
+       'greenyellow': 'rgb(173, 255, 47)',
+       'honeydew': 'rgb(240, 255, 240)',
+       'hotpink': 'rgb(255, 105, 180)',
+       'indianred': 'rgb(205, 92, 92)',
+       'indigo': 'rgb( 75, 0, 130)',
+       'ivory': 'rgb(255, 255, 240)',
+       'khaki': 'rgb(240, 230, 140)',
+       'lavender': 'rgb(230, 230, 250)',
+       'lavenderblush': 'rgb(255, 240, 245)',
+       'lawngreen': 'rgb(124, 252, 0)',
+       'lemonchiffon': 'rgb(255, 250, 205)',
+       'lightblue': 'rgb(173, 216, 230)',
+       'lightcoral': 'rgb(240, 128, 128)',
+       'lightcyan': 'rgb(224, 255, 255)',
+       'lightgoldenrodyellow': 'rgb(250, 250, 210)',
+       'lightgray': 'rgb(211, 211, 211)',
+       'lightgreen': 'rgb(144, 238, 144)',
+       'lightgrey': 'rgb(211, 211, 211)',
+       'lightpink': 'rgb(255, 182, 193)',
+       'lightsalmon': 'rgb(255, 160, 122)',
+       'lightseagreen': 'rgb( 32, 178, 170)',
+       'lightskyblue': 'rgb(135, 206, 250)',
+       'lightslategray': 'rgb(119, 136, 153)',
+       'lightslategrey': 'rgb(119, 136, 153)',
+       'lightsteelblue': 'rgb(176, 196, 222)',
+       'lightyellow': 'rgb(255, 255, 224)',
+       'lime': 'rgb( 0, 255, 0)',
+       'limegreen': 'rgb( 50, 205, 50)',
+       'linen': 'rgb(250, 240, 230)',
+       'magenta': 'rgb(255, 0, 255)',
+       'maroon': 'rgb(128, 0, 0)',
+       'mediumaquamarine': 'rgb(102, 205, 170)',
+       'mediumblue': 'rgb( 0, 0, 205)',
+       'mediumorchid': 'rgb(186, 85, 211)',
+       'mediumpurple': 'rgb(147, 112, 219)',
+       'mediumseagreen': 'rgb( 60, 179, 113)',
+       'mediumslateblue': 'rgb(123, 104, 238)',
+       'mediumspringgreen': 'rgb( 0, 250, 154)',
+       'mediumturquoise': 'rgb( 72, 209, 204)',
+       'mediumvioletred': 'rgb(199, 21, 133)',
+       'midnightblue': 'rgb( 25, 25, 112)',
+       'mintcream': 'rgb(245, 255, 250)',
+       'mistyrose': 'rgb(255, 228, 225)',
+       'moccasin': 'rgb(255, 228, 181)',
+       'navajowhite': 'rgb(255, 222, 173)',
+       'navy': 'rgb( 0, 0, 128)',
+       'oldlace': 'rgb(253, 245, 230)',
+       'olive': 'rgb(128, 128, 0)',
+       'olivedrab': 'rgb(107, 142, 35)',
+       'orange': 'rgb(255, 165, 0)',
+       'orangered': 'rgb(255, 69, 0)',
+       'orchid': 'rgb(218, 112, 214)',
+       'palegoldenrod': 'rgb(238, 232, 170)',
+       'palegreen': 'rgb(152, 251, 152)',
+       'paleturquoise': 'rgb(175, 238, 238)',
+       'palevioletred': 'rgb(219, 112, 147)',
+       'papayawhip': 'rgb(255, 239, 213)',
+       'peachpuff': 'rgb(255, 218, 185)',
+       'peru': 'rgb(205, 133, 63)',
+       'pink': 'rgb(255, 192, 203)',
+       'plum': 'rgb(221, 160, 221)',
+       'powderblue': 'rgb(176, 224, 230)',
+       'purple': 'rgb(128, 0, 128)',
+       'red': 'rgb(255, 0, 0)',
+       'rosybrown': 'rgb(188, 143, 143)',
+       'royalblue': 'rgb( 65, 105, 225)',
+       'saddlebrown': 'rgb(139, 69, 19)',
+       'salmon': 'rgb(250, 128, 114)',
+       'sandybrown': 'rgb(244, 164, 96)',
+       'seagreen': 'rgb( 46, 139, 87)',
+       'seashell': 'rgb(255, 245, 238)',
+       'sienna': 'rgb(160, 82, 45)',
+       'silver': 'rgb(192, 192, 192)',
+       'skyblue': 'rgb(135, 206, 235)',
+       'slateblue': 'rgb(106, 90, 205)',
+       'slategray': 'rgb(112, 128, 144)',
+       'slategrey': 'rgb(112, 128, 144)',
+       'snow': 'rgb(255, 250, 250)',
+       'springgreen': 'rgb( 0, 255, 127)',
+       'steelblue': 'rgb( 70, 130, 180)',
+       'tan': 'rgb(210, 180, 140)',
+       'teal': 'rgb( 0, 128, 128)',
+       'thistle': 'rgb(216, 191, 216)',
+       'tomato': 'rgb(255, 99, 71)',
+       'turquoise': 'rgb( 64, 224, 208)',
+       'violet': 'rgb(238, 130, 238)',
+       'wheat': 'rgb(245, 222, 179)',
+       'white': 'rgb(255, 255, 255)',
+       'whitesmoke': 'rgb(245, 245, 245)',
+       'yellow': 'rgb(255, 255, 0)',
+       'yellowgreen': 'rgb(154, 205, 50)',
+       }
+       
+def isSameSign(a,b): return (a <= 0 and b <= 0) or (a >= 0 and b >= 0)
+       
+coord = re.compile("\\-?\\d+\\.?\\d*")
+scinumber = re.compile("[\\-\\+]?(\\d*\\.?)?\\d+[eE][\\-\\+]?\\d+")
+number = re.compile("[\\-\\+]?(\\d*\\.?)?\\d+")
+sciExponent = re.compile("[eE]([\\-\\+]?\\d+)")
+unit = re.compile("(em|ex|px|pt|pc|cm|mm|in|\\%){1,1}$")
+
+class Unit(object):
+       INVALID = -1
+       NONE = 0
+       PCT = 1
+       PX = 2
+       PT = 3
+       PC = 4
+       EM = 5
+       EX = 6
+       CM = 7
+       MM = 8
+       IN = 9
+       
+#      @staticmethod
+       def get(str):
+               # GZ: shadowing builtins like 'str' is generally bad form
+               # GZ: encoding stuff like this in a dict makes for nicer code
+               if str == None or str == '': return Unit.NONE
+               elif str == '%': return Unit.PCT
+               elif str == 'px': return Unit.PX
+               elif str == 'pt': return Unit.PT
+               elif str == 'pc': return Unit.PC
+               elif str == 'em': return Unit.EM
+               elif str == 'ex': return Unit.EX
+               elif str == 'cm': return Unit.CM
+               elif str == 'mm': return Unit.MM
+               elif str == 'in': return Unit.IN
+               return Unit.INVALID
+
+#      @staticmethod
+       def str(u):
+               if u == Unit.NONE: return ''
+               elif u == Unit.PCT: return '%'
+               elif u == Unit.PX: return 'px'
+               elif u == Unit.PT: return 'pt'
+               elif u == Unit.PC: return 'pc'
+               elif u == Unit.EM: return 'em'
+               elif u == Unit.EX: return 'ex'
+               elif u == Unit.CM: return 'cm'
+               elif u == Unit.MM: return 'mm'
+               elif u == Unit.IN: return 'in'
+               return 'INVALID'
+               
+       get = staticmethod(get)
+       str = staticmethod(str)
+       
+class SVGLength(object):
+       def __init__(self, str):
+               try: # simple unitless and no scientific notation
+                       self.value = float(str)
+                       if int(self.value) == self.value: 
+                               self.value = int(self.value)
+                       self.units = Unit.NONE
+               except ValueError:
+                       # we know that the length string has an exponent, a unit, both or is invalid
+
+                       # parse out number, exponent and unit
+                       self.value = 0
+                       unitBegin = 0
+                       scinum = scinumber.match(str)
+                       if scinum != None:
+                               # this will always match, no need to check it
+                               numMatch = number.match(str)
+                               expMatch = sciExponent.search(str, numMatch.start(0))
+                               self.value = (float(numMatch.group(0)) *
+                                       10 ** float(expMatch.group(1)))
+                               unitBegin = expMatch.end(1)
+                       else:
+                               # unit or invalid
+                               numMatch = number.match(str)
+                               if numMatch != None:
+                                       self.value = float(numMatch.group(0))
+                                       unitBegin = numMatch.end(0)
+                                       
+                       if int(self.value) == self.value:
+                               self.value = int(self.value)
+
+                       if unitBegin != 0 :
+                               unitMatch = unit.search(str, unitBegin)
+                               if unitMatch != None :
+                                       self.units = Unit.get(unitMatch.group(0))
+                               
+                       # invalid
+                       else:
+                               # TODO: this needs to set the default for the given attribute (how?)
+                               self.value = 0 
+                               self.units = Unit.INVALID
+
+# returns the length of a property
+# TODO: eventually use the above class once it is complete
+def getSVGLength(value):
+       try:
+               v = float(value)
+       except ValueError:
+               coordMatch = coord.match(value)
+               if coordMatch != None:
+                       unitMatch = unit.search(value, coordMatch.start(0))
+               v = value
+       return v
+
+def findElementById(node, id):
+       if node == None or node.nodeType != 1: return None
+       if node.getAttribute('id') == id: return node
+       for child in node.childNodes :
+               e = findElementById(child,id)
+               if e != None: return e
+       return None
+
+def findElementsWithId(node, elems=None):
+       """
+       Returns all elements with id attributes
+       """
+       if elems is None:
+               elems = {}
+       id = node.getAttribute('id')
+       if id != '' :
+               elems[id] = node
+       if node.hasChildNodes() :
+               for child in node.childNodes:
+                       # from http://www.w3.org/TR/DOM-Level-2-Core/idl-definitions.html
+                       # we are only really interested in nodes of type Element (1)
+                       if child.nodeType == 1 :
+                               findElementsWithId(child, elems)
+       return elems
+
+def findReferencedElements(node, ids=None):
+       """
+       Returns the number of times an ID is referenced as well as all elements
+       that reference it.
+
+       Currently looks at fill, stroke, clip-path, mask, marker, and
+       xlink:href attributes.
+       """
+       if ids is None:
+               ids = {}
+       # TODO: input argument ids is clunky here (see below how it is called)
+       # GZ: alternative to passing dict, use **kwargs
+       href = node.getAttributeNS(NS['XLINK'],'href')
+       
+       # if xlink:href is set, then grab the id
+       if href != '' and len(href) > 1 and href[0] == '#':
+               # we remove the hash mark from the beginning of the id
+               id = href[1:]
+               if id in ids:
+                       ids[id][0] += 1
+                       ids[id][1].append(node)
+               else:
+                       ids[id] = [1,[node]]
+
+       # now get all style properties and the fill, stroke, filter attributes
+       styles = node.getAttribute('style').split(';')
+       referencingProps = ['fill', 'stroke', 'filter', 'clip-path', 'mask',  'marker-start', 
+                                               'marker-end', 'marker-mid']
+       for attr in referencingProps:
+               styles.append(':'.join([attr, node.getAttribute(attr)]))
+                       
+       for style in styles:
+               propval = style.split(':')
+               if len(propval) == 2 :
+                       prop = propval[0].strip()
+                       val = propval[1].strip()
+                       if prop in referencingProps and val != '' :
+                               if len(val) >= 7 and val[0:5] == 'url(#' :
+                                       id = val[5:val.find(')')]
+                                       if ids.has_key(id) :
+                                               ids[id][0] += 1
+                                               ids[id][1].append(node)
+                                       else:
+                                               ids[id] = [1,[node]]
+                               # if the url has a quote in it, we need to compensate
+                               elif len(val) >= 8 :
+                                       id = None
+                                       # double-quote
+                                       if val[0:6] == 'url("#' :
+                                               id = val[6:val.find('")')]
+                                       # single-quote
+                                       elif val[0:6] == "url('#" :
+                                               id = val[6:val.find("')")]
+                                       if id != None:
+                                               if ids.has_key(id) :
+                                                       ids[id][0] += 1
+                                                       ids[id][1].append(node)
+                                               else:
+                                                       ids[id] = [1,[node]]
+
+       if node.hasChildNodes() :
+               for child in node.childNodes:
+                       if child.nodeType == 1 :
+                               findReferencedElements(child, ids)
+       return ids
+
+numIDsRemoved = 0
+numElemsRemoved = 0
+numAttrsRemoved = 0
+numRastersEmbedded = 0
+numPathSegmentsReduced = 0
+numCurvesStraightened = 0
+numBytesSavedInPathData = 0
+numBytesSavedInColors = 0
+numPointsRemovedFromPolygon = 0
+
+def removeUnusedDefs(doc, defElem, elemsToRemove=None):
+       if elemsToRemove is None:
+               elemsToRemove = []
+
+       identifiedElements = findElementsWithId(doc.documentElement)
+       referencedIDs = findReferencedElements(doc.documentElement)
+       
+       keepTags = ['font', 'style', 'metadata', 'script', 'title', 'desc']
+       for elem in defElem.childNodes:
+               if elem.nodeName == 'g' and elem.namespaceURI == NS['SVG']:
+                       elemsToRemove = removeUnusedDefs(doc, elem, elemsToRemove)
+                       continue
+               if elem.nodeType == 1 and (elem.getAttribute('id') == '' or \
+                               (not elem.getAttribute('id') in referencedIDs)) and \
+                               not elem.nodeName in keepTags:
+                       elemsToRemove.append(elem)
+       return elemsToRemove
+
+def removeUnreferencedElements(doc):
+       """
+       Removes all unreferenced elements except for <svg>, <font>, <metadata>, <title>, and <desc>.    
+       Also vacuums the defs of any non-referenced renderable elements.
+       
+       Returns the number of unreferenced elements removed from the document.
+       """
+       global numElemsRemoved
+       num = 0
+       removeTags = ['linearGradient', 'radialGradient', 'pattern']
+
+       identifiedElements = findElementsWithId(doc.documentElement)
+       referencedIDs = findReferencedElements(doc.documentElement)
+
+       for id in identifiedElements:
+               if not id in referencedIDs:
+                       goner = findElementById(doc.documentElement, id)
+                       if goner != None and goner.parentNode != None and goner.nodeName in removeTags:
+                               goner.parentNode.removeChild(goner)
+                               num += 1
+                               numElemsRemoved += 1
+
+       # TODO: should also go through defs and vacuum it
+       num = 0
+       defs = doc.documentElement.getElementsByTagNameNS(NS['SVG'], 'defs')
+       for aDef in defs:
+               elemsToRemove = removeUnusedDefs(doc, aDef)
+               for elem in elemsToRemove:
+                       elem.parentNode.removeChild(elem)
+                       numElemsRemoved += 1
+                       num += 1
+       return num
+
+def removeUnreferencedIDs(referencedIDs, identifiedElements):
+       """
+       Removes the unreferenced ID attributes.
+       
+       Returns the number of ID attributes removed
+       """
+       global numIDsRemoved
+       keepTags = ['font']
+       num = 0;
+       for id in identifiedElements.keys():
+               node = identifiedElements[id]
+               if referencedIDs.has_key(id) == False and not node.nodeName in keepTags:
+                       node.removeAttribute('id')
+                       numIDsRemoved += 1
+                       num += 1
+       return num
+       
+def removeNamespacedAttributes(node, namespaces):
+       global numAttrsRemoved
+       num = 0
+       if node.nodeType == 1 :
+               # remove all namespace'd attributes from this element
+               attrList = node.attributes
+               attrsToRemove = []
+               for attrNum in range(attrList.length):
+                       attr = attrList.item(attrNum)
+                       if attr != None and attr.namespaceURI in namespaces:
+                               attrsToRemove.append(attr.nodeName)
+               for attrName in attrsToRemove :
+                       num += 1
+                       numAttrsRemoved += 1
+                       node.removeAttribute(attrName)
+               
+               # now recurse for children
+               for child in node.childNodes:
+                       num += removeNamespacedAttributes(child, namespaces)
+       return num
+       
+def removeNamespacedElements(node, namespaces):
+       global numElemsRemoved
+       num = 0
+       if node.nodeType == 1 :
+               # remove all namespace'd child nodes from this element
+               childList = node.childNodes
+               childrenToRemove = []
+               for child in childList:
+                       if child != None and child.namespaceURI in namespaces:
+                               childrenToRemove.append(child)
+               for child in childrenToRemove :
+                       num += 1
+                       numElemsRemoved += 1
+                       node.removeChild(child)
+               
+               # now recurse for children
+               for child in node.childNodes:
+                       num += removeNamespacedElements(child, namespaces)
+       return num
+
+# this walks further and further down the tree, removing groups
+# which do not have any attributes or a title/desc child and 
+# promoting their children up one level
+def removeNestedGroups(node):
+       global numElemsRemoved
+       num = 0
+       
+       groupsToRemove = []
+       for child in node.childNodes:
+               if child.nodeName == 'g' and child.namespaceURI == NS['SVG'] and len(child.attributes) == 0:
+                       # only collapse group if it does not have a title or desc as a direct descendant
+                       for grandchild in child.childNodes:
+                               if grandchild.nodeType == 1 and grandchild.namespaceURI == NS['SVG'] and \
+                                               grandchild.nodeName in ['title','desc']:
+                                       break
+                       else:
+                               groupsToRemove.append(child)
+
+       for g in groupsToRemove:
+               while g.childNodes.length > 0:
+                       g.parentNode.insertBefore(g.firstChild, g)
+               g.parentNode.removeChild(g)
+               numElemsRemoved += 1
+               num += 1
+
+       # now recurse for children
+       for child in node.childNodes:
+               if child.nodeType == 1:
+                       num += removeNestedGroups(child)                
+       return num
+
+def removeDuplicateGradientStops(doc):
+       global numElemsRemoved
+       num = 0
+       
+       for gradType in ['linearGradient', 'radialGradient']:
+               for grad in doc.getElementsByTagNameNS(NS['SVG'], gradType):
+                       stops = {}
+                       stopsToRemove = []
+                       for stop in grad.getElementsByTagNameNS(NS['SVG'], 'stop'):
+                               # convert percentages into a floating point number
+                               offsetU = SVGLength(stop.getAttribute('offset'))
+                               if offsetU.units == Unit.PCT:
+                                       offset = offsetU.value / 100.0
+                               elif offsetU.units == Unit.NONE:
+                                       offset = offsetU.value
+                               else:
+                                       offset = 0
+                               # set the stop offset value to the integer or floating point equivalent
+                               if int(offset) == offset: stop.setAttribute('offset', str(int(offset)))
+                               else: stop.setAttribute('offset', str(offset))
+                                       
+                               color = stop.getAttribute('stop-color')
+                               opacity = stop.getAttribute('stop-opacity')
+                               if stops.has_key(offset) :
+                                       oldStop = stops[offset]
+                                       if oldStop[0] == color and oldStop[1] == opacity:
+                                               stopsToRemove.append(stop)
+                               stops[offset] = [color, opacity]
+                               
+                       for stop in stopsToRemove:
+                               stop.parentNode.removeChild(stop)
+                               num += 1
+                               numElemsRemoved += 1
+       
+       # linear gradients
+       return num
+
+def collapseSinglyReferencedGradients(doc):
+       global numElemsRemoved
+       num = 0
+       
+       # make sure to reset the ref'ed ids for when we are running this in testscour
+       for rid,nodeCount in findReferencedElements(doc.documentElement).iteritems():
+               count = nodeCount[0]
+               nodes = nodeCount[1]
+               if count == 1:
+                       elem = findElementById(doc.documentElement,rid)
+                       if elem != None and elem.nodeType == 1 and elem.nodeName in ['linearGradient', 'radialGradient'] \
+                                       and elem.namespaceURI == NS['SVG']:
+                               # found a gradient that is referenced by only 1 other element
+                               refElem = nodes[0]
+                               if refElem.nodeType == 1 and refElem.nodeName in ['linearGradient', 'radialGradient'] \
+                                               and refElem.namespaceURI == NS['SVG']:
+                                       # elem is a gradient referenced by only one other gradient (refElem)
+                                       
+                                       # add the stops to the referencing gradient (this removes them from elem)
+                                       if len(refElem.getElementsByTagNameNS(NS['SVG'], 'stop')) == 0:
+                                               stopsToAdd = elem.getElementsByTagNameNS(NS['SVG'], 'stop')
+                                               for stop in stopsToAdd:
+                                                       refElem.appendChild(stop)
+                                                       
+                                       # adopt the gradientUnits, spreadMethod,  gradientTransform attributes if
+                                       # they are unspecified on refElem
+                                       for attr in ['gradientUnits','spreadMethod','gradientTransform']:
+                                               if refElem.getAttribute(attr) == '' and not elem.getAttribute(attr) == '':
+                                                       refElem.setAttributeNS(None, attr, elem.getAttribute(attr))
+                                                       
+                                       # if both are radialGradients, adopt elem's fx,fy,cx,cy,r attributes if
+                                       # they are unspecified on refElem
+                                       if elem.nodeName == 'radialGradient' and refElem.nodeName == 'radialGradient':
+                                               for attr in ['fx','fy','cx','cy','r']:
+                                                       if refElem.getAttribute(attr) == '' and not elem.getAttribute(attr) == '':
+                                                               refElem.setAttributeNS(None, attr, elem.getAttribute(attr))
+                                       
+                                       # if both are linearGradients, adopt elem's x1,y1,x2,y2 attributes if 
+                                       # they are unspecified on refElem
+                                       if elem.nodeName == 'linearGradient' and refElem.nodeName == 'linearGradient':
+                                               for attr in ['x1','y1','x2','y2']:
+                                                       if refElem.getAttribute(attr) == '' and not elem.getAttribute(attr) == '':
+                                                               refElem.setAttributeNS(None, attr, elem.getAttribute(attr))
+                                                               
+                                       # now remove the xlink:href from refElem
+                                       refElem.removeAttributeNS(NS['XLINK'], 'href')
+                                       
+                                       # now delete elem
+                                       elem.parentNode.removeChild(elem)
+                                       numElemsRemoved += 1
+                                       num += 1                
+       return num
+
+def removeDuplicateGradients(doc):
+       global numElemsRemoved
+       num = 0
+       
+       gradientsToRemove = {}
+       duplicateToMaster = {}
+
+       for gradType in ['linearGradient', 'radialGradient']:
+               grads = doc.getElementsByTagNameNS(NS['SVG'], gradType)
+               for grad in grads:
+                       # TODO: should slice grads from 'grad' here to optimize
+                       for ograd in grads:
+                               # do not compare gradient to itself
+                               if grad == ograd: continue
+
+                               # compare grad to ograd (all properties, then all stops)
+                               # if attributes do not match, go to next gradient
+                               someGradAttrsDoNotMatch = False
+                               for attr in ['gradientUnits','spreadMethod','gradientTransform','x1','y1','x2','y2','cx','cy','fx','fy','r']:
+                                       if grad.getAttribute(attr) != ograd.getAttribute(attr):
+                                               someGradAttrsDoNotMatch = True
+                                               break;
+                               
+                               if someGradAttrsDoNotMatch: continue
+
+                               # compare xlink:href values too
+                               if grad.getAttributeNS(NS['XLINK'], 'href') != ograd.getAttributeNS(NS['XLINK'], 'href'):
+                                       continue
+
+                               # all gradient properties match, now time to compare stops
+                               stops = grad.getElementsByTagNameNS(NS['SVG'], 'stop')
+                               ostops = ograd.getElementsByTagNameNS(NS['SVG'], 'stop')
+
+                               if stops.length != ostops.length: continue
+
+                               # now compare stops
+                               stopsNotEqual = False
+                               for i in range(stops.length):
+                                       if stopsNotEqual: break
+                                       stop = stops.item(i)
+                                       ostop = ostops.item(i)
+                                       for attr in ['offset', 'stop-color', 'stop-opacity']:
+                                               if stop.getAttribute(attr) != ostop.getAttribute(attr):
+                                                       stopsNotEqual = True
+                                                       break
+                               if stopsNotEqual: continue
+
+                               # ograd is a duplicate of grad, we schedule it to be removed UNLESS
+                               # ograd is ALREADY considered a 'master' element
+                               if not gradientsToRemove.has_key(ograd):
+                                       if not duplicateToMaster.has_key(ograd):
+                                               if not gradientsToRemove.has_key(grad):
+                                                       gradientsToRemove[grad] = []
+                                               gradientsToRemove[grad].append( ograd )
+                                               duplicateToMaster[ograd] = grad
+       
+       # get a collection of all elements that are referenced and their referencing elements
+       referencedIDs = findReferencedElements(doc.documentElement)
+       for masterGrad in gradientsToRemove.keys():
+               master_id = masterGrad.getAttribute('id')
+               for dupGrad in gradientsToRemove[masterGrad]:
+                       # if the duplicate gradient no longer has a parent that means it was
+                       # already re-mapped to another master gradient
+                       if not dupGrad.parentNode: continue
+                       dup_id = dupGrad.getAttribute('id')
+                       # for each element that referenced the gradient we are going to remove
+                       for elem in referencedIDs[dup_id][1]:
+                               # find out which attribute referenced the duplicate gradient
+                               for attr in ['fill', 'stroke']:
+                                       v = elem.getAttribute(attr)
+                                       if v == 'url(#'+dup_id+')' or v == 'url("#'+dup_id+'")' or v == "url('#"+dup_id+"')":
+                                               elem.setAttribute(attr, 'url(#'+master_id+')')
+                               if elem.getAttributeNS(NS['XLINK'], 'href') == '#'+dup_id:
+                                       elem.setAttributeNS(NS['XLINK'], 'href', '#'+master_id)
+                       
+                       # now that all referencing elements have been re-mapped to the master
+                       # it is safe to remove this gradient from the document
+                       dupGrad.parentNode.removeChild(dupGrad)
+                       numElemsRemoved += 1
+                       num += 1
+       return num
+
+def repairStyle(node, options):
+       num = 0
+       if node.nodeType == 1 and len(node.getAttribute('style')) > 0 : 
+               # get all style properties and stuff them into a dictionary
+               styleMap = { }
+               rawStyles = node.getAttribute('style').split(';')
+               for style in rawStyles:
+                       propval = style.split(':')
+                       if len(propval) == 2 :
+                               styleMap[propval[0].strip()] = propval[1].strip()
+
+               # I've seen this enough to know that I need to correct it:
+               # fill: url(#linearGradient4918) rgb(0, 0, 0);
+               for prop in ['fill', 'stroke'] :
+                       if styleMap.has_key(prop) :
+                               chunk = styleMap[prop].split(') ')
+                               if len(chunk) == 2 and (chunk[0][:5] == 'url(#' or chunk[0][:6] == 'url("#' or chunk[0][:6] == "url('#") and chunk[1] == 'rgb(0, 0, 0)' :
+                                       styleMap[prop] = chunk[0] + ')'
+                                       num += 1
+
+               # Here is where we can weed out unnecessary styles like:
+               #  opacity:1
+               if styleMap.has_key('opacity') :
+                       opacity = float(styleMap['opacity'])
+                       # opacity='1.0' is useless, remove it
+                       if opacity == 1.0 :
+                               del styleMap['opacity']
+                               num += 1
+                               
+                       # if opacity='0' then all fill and stroke properties are useless, remove them
+                       elif opacity == 0.0 :
+                               for uselessStyle in ['fill', 'fill-opacity', 'fill-rule', 'stroke', 'stroke-linejoin',
+                                       'stroke-opacity', 'stroke-miterlimit', 'stroke-linecap', 'stroke-dasharray',
+                                       'stroke-dashoffset', 'stroke-opacity'] :
+                                       if styleMap.has_key(uselessStyle):
+                                               del styleMap[uselessStyle]
+                                               num += 1
+
+               #  if stroke:none, then remove all stroke-related properties (stroke-width, etc)
+               #  TODO: should also detect if the computed value of this element is stroke="none"
+               if styleMap.has_key('stroke') and styleMap['stroke'] == 'none' :
+                       for strokestyle in [ 'stroke-width', 'stroke-linejoin', 'stroke-miterlimit', 
+                                       'stroke-linecap', 'stroke-dasharray', 'stroke-dashoffset', 'stroke-opacity'] :
+                               if styleMap.has_key(strokestyle) :
+                                       del styleMap[strokestyle]
+                                       num += 1
+                       # TODO: This is actually a problem if a parent element has a specified stroke
+                       # we need to properly calculate computed values
+                       del styleMap['stroke']
+
+               #  if fill:none, then remove all fill-related properties (fill-rule, etc)
+               if styleMap.has_key('fill') and styleMap['fill'] == 'none' :
+                       for fillstyle in [ 'fill-rule', 'fill-opacity' ] :
+                               if styleMap.has_key(fillstyle) :
+                                       del styleMap[fillstyle]
+                                       num += 1
+                                       
+               #  stop-opacity: 1
+               if styleMap.has_key('stop-opacity') :
+                       if float(styleMap['stop-opacity']) == 1.0 :
+                               del styleMap['stop-opacity']
+                               num += 1
+               
+               #  fill-opacity: 1 or 0
+               if styleMap.has_key('fill-opacity') :
+                       fillOpacity = float(styleMap['fill-opacity'])
+                       #  TODO: This is actually a problem if the parent element does not have fill-opacity=1
+                       if fillOpacity == 1.0 :
+                               del styleMap['fill-opacity']
+                               num += 1
+                       elif fillOpacity == 0.0 :
+                               for uselessFillStyle in [ 'fill', 'fill-rule' ] :
+                                       if styleMap.has_key(uselessFillStyle):
+                                               del styleMap[uselessFillStyle]
+                                               num += 1
+               
+               #  stroke-opacity: 1 or 0
+               if styleMap.has_key('stroke-opacity') :
+                       strokeOpacity = float(styleMap['stroke-opacity']) 
+                       #  TODO: This is actually a problem if the parent element does not have stroke-opacity=1
+                       if strokeOpacity == 1.0 :
+                               del styleMap['stroke-opacity']
+                               num += 1
+                       elif strokeOpacity == 0.0 :
+                               for uselessStrokeStyle in [ 'stroke', 'stroke-width', 'stroke-linejoin', 'stroke-linecap', 
+                                                       'stroke-dasharray', 'stroke-dashoffset' ] :
+                                       if styleMap.has_key(uselessStrokeStyle): 
+                                               del styleMap[uselessStrokeStyle]
+                                               num += 1
+
+               # stroke-width: 0
+               if styleMap.has_key('stroke-width') :
+                       strokeWidth = getSVGLength(styleMap['stroke-width']) 
+                       if strokeWidth == 0.0 :
+                               for uselessStrokeStyle in [ 'stroke', 'stroke-linejoin', 'stroke-linecap', 
+                                                       'stroke-dasharray', 'stroke-dashoffset', 'stroke-opacity' ] :
+                                       if styleMap.has_key(uselessStrokeStyle): 
+                                               del styleMap[uselessStrokeStyle]
+                                               num += 1
+               
+               # remove font properties for non-text elements
+               # I've actually observed this in real SVG content
+               if node.nodeName in ['rect', 'circle', 'ellipse', 'line', 'polyline', 'polygon', 'path']:
+                       for fontstyle in [ 'font-family', 'font-size', 'font-stretch', 'font-size-adjust', 
+                                                               'font-style', 'font-variant', 'font-weight', 
+                                                               'letter-spacing', 'line-height', 'kerning',
+                                                               'text-anchor', 'text-decoration', 'text-rendering',
+                                                               'unicode-bidi', 'word-spacing', 'writing-mode'] :
+                               if styleMap.has_key(fontstyle) :
+                                       del styleMap[fontstyle]
+                                       num += 1
+
+               # remove inkscape-specific styles
+               # TODO: need to get a full list of these
+               for inkscapeStyle in ['-inkscape-font-specification']:
+                       if styleMap.has_key(inkscapeStyle):
+                               del styleMap[inkscapeStyle]
+                               num += 1
+
+               # visibility: visible
+               if styleMap.has_key('visibility') :
+                       if styleMap['visibility'] == 'visible':
+                               del styleMap['visibility']
+                               num += 1
+               
+               # display: inline
+               if styleMap.has_key('display') :
+                       if styleMap['display'] == 'inline':
+                               del styleMap['display']
+                               num += 1
+                               
+               # overflow: visible or overflow specified on element other than svg, marker, pattern
+               if styleMap.has_key('overflow') :
+                       if styleMap['overflow'] == 'visible' or node.nodeName in ['svg','marker','pattern']:
+                               del styleMap['overflow']
+                               num += 1
+                               
+               # marker: none
+               if styleMap.has_key('marker') :
+                       if styleMap['marker'] == 'none':
+                               del styleMap['marker']
+                               num += 1
+               
+               # now if any of the properties match known SVG attributes we prefer attributes 
+               # over style so emit them and remove them from the style map
+               if options.style_to_xml:
+                       for propName in styleMap.keys() :
+                               if propName in svgAttributes :
+                                       node.setAttribute(propName, styleMap[propName])
+                                       del styleMap[propName]
+
+               # sew our remaining style properties back together into a style attribute
+               fixedStyle = ''
+               for prop in styleMap.keys() :
+                       fixedStyle += prop + ':' + styleMap[prop] + ';'
+                       
+               if fixedStyle != '' :
+                       node.setAttribute('style', fixedStyle)
+               else:
+                       node.removeAttribute('style')
+       
+       # recurse for our child elements
+       for child in node.childNodes :
+               num += repairStyle(child,options)
+                       
+       return num
+
+def removeDefaultAttributeValues(node, options):
+       num = 0
+       if node.nodeType != 1: return 0
+       
+       # gradientUnits: objectBoundingBox
+       if node.getAttribute('gradientUnits') == 'objectBoundingBox':
+               node.removeAttribute('gradientUnits')
+               num += 1
+               
+       # spreadMethod: pad
+       if node.getAttribute('spreadMethod') == 'pad':
+               node.removeAttribute('spreadMethod')
+               num += 1
+               
+       # x1: 0%
+       if node.getAttribute('x1') != '':
+               x1 = SVGLength(node.getAttribute('x1'))
+               if x1.value == 0:
+                       node.removeAttribute('x1')
+                       num += 1
+
+       # y1: 0%
+       if node.getAttribute('y1') != '':
+               y1 = SVGLength(node.getAttribute('y1'))
+               if y1.value == 0:
+                       node.removeAttribute('y1')
+                       num += 1
+
+       # x2: 100%
+       if node.getAttribute('x2') != '':
+               x2 = SVGLength(node.getAttribute('x2'))
+               if (x2.value == 100 and x2.units == Unit.PCT) or (x2.value == 1 and x2.units == Unit.NONE):
+                       node.removeAttribute('x2')
+                       num += 1
+
+       # y2: 0%
+       if node.getAttribute('y2') != '':
+               y2 = SVGLength(node.getAttribute('y2'))
+               if y2.value == 0:
+                       node.removeAttribute('y2')
+                       num += 1
+
+       # fx: equal to rx
+       if node.getAttribute('fx') != '':
+               if node.getAttribute('fx') == node.getAttribute('cx'):
+                       node.removeAttribute('fx')
+                       num += 1
+
+       # fy: equal to ry
+       if node.getAttribute('fy') != '':
+               if node.getAttribute('fy') == node.getAttribute('cy'):
+                       node.removeAttribute('fy')
+                       num += 1
+
+       # cx: 50%
+       if node.getAttribute('cx') != '':
+               cx = SVGLength(node.getAttribute('cx'))
+               if (cx.value == 50 and cx.units == Unit.PCT) or (cx.value == 0.5 and cx.units == Unit.NONE):
+                       node.removeAttribute('cx')
+                       num += 1
+
+       # cy: 50%
+       if node.getAttribute('cy') != '':
+               cy = SVGLength(node.getAttribute('cy'))
+               if (cy.value == 50 and cy.units == Unit.PCT) or (cy.value == 0.5 and cy.units == Unit.NONE):
+                       node.removeAttribute('cy')
+                       num += 1
+
+       # r: 50%
+       if node.getAttribute('r') != '':
+               r = SVGLength(node.getAttribute('r'))
+               if (r.value == 50 and r.units == Unit.PCT) or (r.value == 0.5 and r.units == Unit.NONE):
+                       node.removeAttribute('r')
+                       num += 1
+
+       # recurse for our child elements
+       for child in node.childNodes :
+               num += removeDefaultAttributeValues(child,options)
+       
+       return num
+
+rgb = re.compile("\\s*rgb\\(\\s*(\\d+)\\s*\\,\\s*(\\d+)\\s*\\,\\s*(\\d+)\\s*\\)\\s*")
+rgbp = re.compile("\\s*rgb\\(\\s*(\\d*\\.?\\d+)\\%\\s*\\,\\s*(\\d*\\.?\\d+)\\%\\s*\\,\\s*(\\d*\\.?\\d+)\\%\\s*\\)\\s*")
+def convertColor(value):
+       """
+               Converts the input color string and returns a #RRGGBB (or #RGB if possible) string
+       """
+       s = value
+       
+       if s in colors.keys():
+               s = colors[s]
+       
+       rgbpMatch = rgbp.match(s)
+       if rgbpMatch != None :
+               r = int(float(rgbpMatch.group(1)) * 255.0 / 100.0)
+               g = int(float(rgbpMatch.group(2)) * 255.0 / 100.0)
+               b = int(float(rgbpMatch.group(3)) * 255.0 / 100.0)
+               s  = 'rgb(%d,%d,%d)' % (r,g,b)
+       
+       rgbMatch = rgb.match(s)
+       if rgbMatch != None :
+               r = hex( int( rgbMatch.group(1) ) )[2:].upper()
+               g = hex( int( rgbMatch.group(2) ) )[2:].upper()
+               b = hex( int( rgbMatch.group(3) ) )[2:].upper()
+               if len(r) == 1: r='0'+r
+               if len(g) == 1: g='0'+g
+               if len(b) == 1: b='0'+b
+               s = '#'+r+g+b
+       
+       if s[0] == '#' and len(s)==7 and s[1]==s[2] and s[3]==s[4] and s[5]==s[6]:
+               s = s.upper()
+               s = '#'+s[1]+s[3]+s[5]
+
+       return s
+       
+def convertColors(element) :
+       """
+               Recursively converts all color properties into #RRGGBB format if shorter
+       """
+       numBytes = 0
+       
+       if element.nodeType != 1: return 0
+
+       # set up list of color attributes for each element type
+       attrsToConvert = []
+       if element.nodeName in ['rect', 'circle', 'ellipse', 'polygon', \
+                                                       'line', 'polyline', 'path', 'g', 'a']:
+               attrsToConvert = ['fill', 'stroke']
+       elif element.nodeName in ['stop']:
+               attrsToConvert = ['stop-color']
+       elif element.nodeName in ['solidColor']:
+               attrsToConvert = ['solid-color']
+
+       # now convert all the color formats
+       for attr in attrsToConvert:
+               oldColorValue = element.getAttribute(attr)
+               if oldColorValue != '':
+                       newColorValue = convertColor(oldColorValue)
+                       oldBytes = len(oldColorValue)
+                       newBytes = len(newColorValue)
+                       if oldBytes > newBytes:
+                               element.setAttribute(attr, newColorValue)
+                               numBytes += (oldBytes - len(element.getAttribute(attr)))
+       
+       # now recurse for our child elements
+       for child in element.childNodes :
+               numBytes += convertColors(child)
+
+       return numBytes
+
+def cleanPath(element) :
+       """
+               Cleans the path string (d attribute) of the element 
+       """
+       global numBytesSavedInPathData
+       global numPathSegmentsReduced
+       global numCurvesStraightened
+       
+       # this gets the parser object from svg_regex.py
+       oldPathStr = element.getAttribute('d')
+       pathObj = svg_parser.parse(oldPathStr)
+       
+       # however, this parser object has some ugliness in it (lists of tuples of tuples of 
+       # numbers and booleans).  we just need a list of (cmd,[numbers]):
+       path = []
+       for (cmd,dataset) in pathObj:
+               if cmd in ['M','m','L','l','T','t']:
+                       # one or more tuples, each containing two numbers
+                       nums = []
+                       for t in dataset:
+                               # convert to a Decimal
+                               nums.append(Decimal(str(t[0])) * Decimal(1))
+                               nums.append(Decimal(str(t[1])) * Decimal(1))
+                                       
+                       # only create this segment if it is not empty
+                       if nums:
+                               path.append( (cmd, nums) )
+                       
+               elif cmd in ['V','v','H','h']:
+                       # one or more numbers
+                       nums = []
+                       for n in dataset:
+                               nums.append(Decimal(str(n)))
+                       if nums:
+                               path.append( (cmd, nums) )
+                       
+               elif cmd in ['C','c']:
+                       # one or more tuples, each containing three tuples of two numbers each
+                       nums = []
+                       for t in dataset:
+                               for pair in t:
+                                       nums.append(Decimal(str(pair[0])) * Decimal(1))
+                                       nums.append(Decimal(str(pair[1])) * Decimal(1))
+                       path.append( (cmd, nums) )
+                       
+               elif cmd in ['S','s','Q','q']:
+                       # one or more tuples, each containing two tuples of two numbers each
+                       nums = []
+                       for t in dataset:
+                               for pair in t:
+                                       nums.append(Decimal(str(pair[0])) * Decimal(1))
+                                       nums.append(Decimal(str(pair[1])) * Decimal(1))
+                       path.append( (cmd, nums) )
+                       
+               elif cmd in ['A','a']:
+                       # one or more tuples, each containing a tuple of two numbers, a number, a boolean,
+                       # another boolean, and a tuple of two numbers
+                       nums = []
+                       for t in dataset:
+                               nums.append( Decimal(str(t[0][0])) * Decimal(1) )
+                               nums.append( Decimal(str(t[0][1])) * Decimal(1) )
+                               nums.append( Decimal(str(t[1])) * Decimal(1))
+                               
+                               if t[2]: nums.append( Decimal(1) )
+                               else: nums.append( Decimal(0) )
+
+                               if t[3]: nums.append( Decimal(1) )
+                               else: nums.append( Decimal(0) )
+                               
+                               nums.append( Decimal(str(t[4][0])) * Decimal(1) )
+                               nums.append( Decimal(str(t[4][1])) * Decimal(1) )
+                       path.append( (cmd, nums) )
+               
+               elif cmd in ['Z','z']:
+                       path.append( (cmd, []) )
+
+       # calculate the starting x,y coord for the second path command
+       if len(path[0][1]) == 2:
+               (x,y) = path[0][1]
+       else:
+               # we have a move and then 1 or more coords for lines
+               N = len(path[0][1])
+               if path[0] == 'M':
+                       # take the last pair of coordinates for the starting point
+                       x = path[0][1][N-2]
+                       y = path[0][1][N-1]
+               else: # relative move, accumulate coordinates for the starting point
+                       (x,y) = path[0][1][0],path[0][1][1]
+                       n = 2
+                       while n < N:
+                               x += path[0][1][n]
+                               y += path[0][1][n+1]
+                               n += 2
+       
+       # now we have the starting point at x,y so let's save it 
+       (startx,starty) = (x,y)
+       
+       # convert absolute coordinates into relative ones (start with the second subcommand
+       # and leave the first M as absolute)
+       newPath = [path[0]]
+       for (cmd,data) in path[1:]:
+               i = 0
+               newCmd = cmd
+               newData = data
+               # adjust abs to rel
+               # only the A command has some values that we don't want to adjust (radii, rotation, flags)
+               if cmd == 'A':
+                       newCmd = 'a'
+                       newData = []
+                       while i < len(data):
+                               newData.append(data[i])
+                               newData.append(data[i+1])
+                               newData.append(data[i+2])
+                               newData.append(data[i+3])
+                               newData.append(data[i+4])
+                               newData.append(data[i+5]-x)
+                               newData.append(data[i+6]-y)
+                               x = data[i+5]
+                               y = data[i+6]
+                               i += 7
+               elif cmd == 'a':
+                       while i < len(data):
+                               x += data[i+5]
+                               y += data[i+6]
+                               i += 7                  
+               elif cmd == 'H':
+                       newCmd = 'h'
+                       newData = []
+                       while i < len(data):
+                               newData.append(data[i]-x)
+                               x = data[i]
+                               i += 1
+               elif cmd == 'h':
+                       while i < len(data):
+                               x += data[i]
+                               i += 1
+               elif cmd == 'V':
+                       newCmd = 'v'
+                       newData = []
+                       while i < len(data):
+                               newData.append(data[i] - y)
+                               y = data[i]
+                               i += 1
+               elif cmd == 'v':
+                       while i < len(data):
+                               y += data[i]
+                               i += 1
+               elif cmd in ['M']:
+                       newCmd = cmd.lower()
+                       newData = []
+                       startx = data[0]
+                       starty = data[1]
+                       while i < len(data):
+                               newData.append( data[i] - x )
+                               newData.append( data[i+1] - y )
+                               x = data[i]
+                               y = data[i+1]
+                               i += 2
+               elif cmd in ['L','T']:
+                       newCmd = cmd.lower()
+                       newData = []
+                       while i < len(data):
+                               newData.append( data[i] - x )
+                               newData.append( data[i+1] - y )
+                               x = data[i]
+                               y = data[i+1]
+                               i += 2
+               elif cmd in ['m']:
+                       startx += data[0]
+                       starty += data[1]
+                       while i < len(data):
+                               x += data[i]
+                               y += data[i+1]
+                               i += 2
+               elif cmd in ['l','t']:
+                       while i < len(data):
+                               x += data[i]
+                               y += data[i+1]
+                               i += 2
+               elif cmd in ['S','Q']:
+                       newCmd = cmd.lower()
+                       newData = []
+                       while i < len(data):
+                               newData.append( data[i] - x )
+                               newData.append( data[i+1] - y )
+                               newData.append( data[i+2] - x )
+                               newData.append( data[i+3] - y )
+                               x = data[i+2]
+                               y = data[i+3]
+                               i += 4
+               elif cmd in ['s','q']:
+                       while i < len(data):
+                               x += data[i+2]
+                               y += data[i+3]
+                               i += 4
+               elif cmd == 'C':
+                       newCmd = 'c'
+                       newData = []
+                       while i < len(data):
+                               newData.append( data[i] - x )
+                               newData.append( data[i+1] - y )
+                               newData.append( data[i+2] - x )
+                               newData.append( data[i+3] - y )
+                               newData.append( data[i+4] - x )
+                               newData.append( data[i+5] - y )
+                               x = data[i+4]
+                               y = data[i+5]
+                               i += 6
+               elif cmd == 'c':
+                       while i < len(data):
+                               x += data[i+4]
+                               y += data[i+5]
+                               i += 6
+               elif cmd in ['z','Z']:
+                       x = startx
+                       y = starty
+                       newCmd = 'z'
+               newPath.append( (newCmd, newData) )
+       path = newPath
+       
+       # remove empty segments
+       newPath = [path[0]]
+       for (cmd,data) in path[1:]:
+               if cmd in ['m','l','t']:
+                       newData = []
+                       i = 0
+                       while i < len(data):
+                               if data[i] != 0 or data[i+1] != 0:
+                                       newData.append(data[i])
+                                       newData.append(data[i+1])
+                               else:
+                                       numPathSegmentsReduced += 1
+                               i += 2
+                       if newData:
+                               newPath.append( (cmd,newData) )
+               elif cmd == 'c':
+                       newData = []
+                       i = 0
+                       while i < len(data):
+                               if data[i+4] != 0 or data[i+5] != 0:
+                                       newData.append(data[i])
+                                       newData.append(data[i+1])
+                                       newData.append(data[i+2])
+                                       newData.append(data[i+3])
+                                       newData.append(data[i+4])
+                                       newData.append(data[i+5])
+                               else:
+                                       numPathSegmentsReduced += 1
+                               i += 6
+                       if newData:
+                               newPath.append( (cmd,newData) )
+               elif cmd == 'a':
+                       newData = []
+                       i = 0
+                       while i < len(data):
+                               if data[i+5] != 0 or data[i+6] != 0:
+                                       newData.append(data[i])
+                                       newData.append(data[i+1])
+                                       newData.append(data[i+2])
+                                       newData.append(data[i+3])
+                                       newData.append(data[i+4])
+                                       newData.append(data[i+5])
+                                       newData.append(data[i+6])
+                               else:
+                                       numPathSegmentsReduced += 1
+                               i += 7
+                       if newData:
+                               newPath.append( (cmd,newData) )
+               elif cmd == 'q':
+                       newData = []
+                       i = 0
+                       while i < len(data):
+                               if data[i+2] != 0 or data[i+3] != 0:
+                                       newData.append(data[i])
+                                       newData.append(data[i+1])
+                                       newData.append(data[i+2])
+                                       newData.append(data[i+3])
+                               else:
+                                       numPathSegmentsReduced += 1
+                               i += 4
+                       if newData:
+                               newPath.append( (cmd,newData) )
+               elif cmd in ['h','v']:
+                       newData = []
+                       i = 0
+                       while i < len(data):
+                               if data[i] != 0:
+                                       newData.append(data[i])
+                               else:
+                                       numPathSegmentsReduced += 1
+                               i += 1
+                       if newData:
+                               newPath.append( (cmd,newData) )
+               else:
+                       newPath.append( (cmd,data) )
+       path = newPath
+       
+       # convert straight curves into lines
+       newPath = [path[0]]
+       for (cmd,data) in path[1:]:
+               i = 0
+               newData = data
+               if cmd == 'c':
+                       newData = []
+                       while i < len(data):
+                               # since all commands are now relative, we can think of previous point as (0,0)
+                               # and new point (dx,dy) is (data[i+4],data[i+5])
+                               # eqn of line will be y = (dy/dx)*x or if dx=0 then eqn of line is x=0
+                               (p1x,p1y) = (data[i],data[i+1])
+                               (p2x,p2y) = (data[i+2],data[i+3])
+                               dx = data[i+4]
+                               dy = data[i+5]
+                               
+                               foundStraightCurve = False
+                               
+                               if dx == 0:
+                                       if p1x == 0 and p2x == 0:
+                                               foundStraightCurve = True
+                               else:
+                                       m = dy/dx
+                                       if p1y == m*p1x and p2y == m*p2y:
+                                               foundStraightCurve = True
+
+                               if foundStraightCurve:
+                                       # flush any existing curve coords first
+                                       if newData:
+                                               newPath.append( (cmd,newData) )
+                                               newData = []
+                                       # now create a straight line segment
+                                       newPath.append( ('l', [dx,dy]) )
+                                       numCurvesStraightened += 1
+                               else:
+                                       newData.append(data[i])
+                                       newData.append(data[i+1])
+                                       newData.append(data[i+2])
+                                       newData.append(data[i+3])
+                                       newData.append(data[i+4])
+                                       newData.append(data[i+5])
+                                       
+                               i += 6
+               if newData or cmd == 'z' or cmd == 'Z':
+                       newPath.append( (cmd,newData) )
+       path = newPath
+
+       # collapse all consecutive commands of the same type into one command
+       prevCmd = ''
+       prevData = []
+       newPath = [path[0]]
+       for (cmd,data) in path[1:]:
+               # flush the previous command if it is not the same type as the current command
+               # or it is not an h or v line
+               if prevCmd != '':
+                       if cmd != prevCmd:# or not prevCmd in ['h','v']:
+                               newPath.append( (prevCmd, prevData) )
+                               prevCmd = ''
+                               prevData = []
+               
+               # if the previous and current commands are the same type and a h/v line, collapse
+               if cmd == prevCmd: # and cmd in ['h','v','l']:
+                       for coord in data:
+                               prevData.append(coord)
+               
+               # save last command and data
+               else:
+                       prevCmd = cmd
+                       prevData = data
+       # flush last command and data
+       if prevCmd != '':
+               newPath.append( (prevCmd, prevData) )
+       path = newPath
+
+       # convert line segments into h,v where possible 
+       newPath = [path[0]]
+       for (cmd,data) in path[1:]:
+               if cmd == 'l':
+                       i = 0
+                       lineTuples = []
+                       while i < len(data):
+                               if data[i] == 0:
+                                       # vertical
+                                       if lineTuples:
+                                               # flush the existing line command
+                                               newPath.append( ('l', lineTuples) )
+                                               lineTuples = []
+                                       # append the v and then the remaining line coords                                               
+                                       newPath.append( ('v', [data[i+1]]) )
+                                       numPathSegmentsReduced += 1
+                               elif data[i+1] == 0:
+                                       if lineTuples:
+                                               # flush the line command, then append the h and then the remaining line coords
+                                               newPath.append( ('l', lineTuples) )
+                                               lineTuples = []
+                                       newPath.append( ('h', [data[i]]) )
+                                       numPathSegmentsReduced += 1
+                               else:
+                                       lineTuples.append(data[i])
+                                       lineTuples.append(data[i+1])
+                               i += 2
+                       if lineTuples:
+                               newPath.append( ('l', lineTuples) )
+               else:
+                       newPath.append( (cmd, data) )
+       path = newPath
+
+       # for each h or v, collapse unnecessary coordinates that run in the same direction
+       # i.e. "h-100-100" becomes "h-200" but "h300-100" does not change
+       newPath = [path[0]]
+       for (cmd,data) in path[1:]:
+               if cmd in ['h','v'] and len(data) > 1:
+                       newData = []
+                       prevCoord = data[0]
+                       for coord in data[1:]:
+                               if isSameSign(prevCoord, coord):
+                                       prevCoord += coord
+                                       numPathSegmentsReduced += 1
+                               else:
+                                       newData.append(prevCoord)
+                                       prevCoord = coord
+                       newData.append(prevCoord)
+                       newPath.append( (cmd, newData) )
+               else:
+                       newPath.append( (cmd, data) )
+       path = newPath
+       
+       newPathStr = serializePath(path)
+       numBytesSavedInPathData += ( len(oldPathStr) - len(newPathStr) )
+       element.setAttribute('d', newPathStr)
+
+def parseListOfPoints(s):
+       """
+               Parse string into a list of points.
+       
+               Returns a list of containing an even number of coordinate strings
+       """
+       
+       # (wsp)? comma-or-wsp-separated coordinate pairs (wsp)?
+       # coordinate-pair = coordinate comma-or-wsp coordinate
+       # coordinate = sign? integer
+       nums = re.split("\\s*\\,?\\s*", s)
+       i = 0
+       points = []
+       while i < len(nums):
+               x = SVGLength(nums[i])
+               # if we had an odd number of points, return empty
+               if i == len(nums)-1: return []
+               else: y = SVGLength(nums[i+1])
+               
+               # if the coordinates were not unitless, return empty
+               if x.units != Unit.NONE or y.units != Unit.NONE: return []
+               points.append( str(x.value) )
+               points.append( str(y.value) )
+               i += 2
+       
+       return points
+       
+def cleanPolygon(elem):
+       """
+               Remove unnecessary closing point of polygon points attribute
+       """
+       global numPointsRemovedFromPolygon
+       
+       pts = parseListOfPoints(elem.getAttribute('points'))
+       N = len(pts)/2
+       if N >= 2:              
+               (startx,starty) = (pts[0],pts[0])
+               (endx,endy) = (pts[len(pts)-2],pts[len(pts)-1])
+               if startx == endx and starty == endy:
+                       pts = pts[:-2]
+                       numPointsRemovedFromPolygon += 1                
+       elem.setAttribute('points', scourCoordinates(pts))
+
+def cleanPolyline(elem):
+       """
+               Scour the polyline points attribute
+       """
+       pts = parseListOfPoints(elem.getAttribute('points'))            
+       elem.setAttribute('points', scourCoordinates(pts))
+       
+def serializePath(pathObj):
+       """
+               Reserializes the path data with some cleanups.
+       """
+       pathStr = ""
+       for (cmd,data) in pathObj:
+               pathStr += cmd
+               pathStr += scourCoordinates(data)
+       return pathStr
+
+def scourCoordinates(data):
+       """
+               Serializes coordinate data with some cleanups:
+                       - removes all trailing zeros after the decimal
+                       - integerize coordinates if possible
+                       - removes extraneous whitespace
+                       - adds commas between values in a subcommand if required
+       """
+       coordsStr = ""
+       if data != None:
+               c = 0
+               for coord in data:
+                       # add the scoured coordinate to the path string
+                       coordsStr += scourLength(coord)
+                       
+                       # only need the comma if the next number is non-negative
+                       if c < len(data)-1 and Decimal(data[c+1]) >= 0:
+                               coordsStr += ','
+                       c += 1
+       return coordsStr
+
+def scourLength(str):
+       length = SVGLength(str)
+       coord = length.value
+       
+       # reduce to the proper number of digits
+       coord = Decimal(unicode(coord)) * Decimal(1)
+       
+       # integerize if we can
+       if int(coord) == coord: coord = Decimal(unicode(int(coord)))
+
+       # Decimal.trim() is available in Python 2.6+ to trim trailing zeros
+       try:
+               coord = coord.trim()
+       except AttributeError:
+               # trim it ourselves
+               s = unicode(coord)
+               dec = s.find('.')
+               if dec != -1:
+                       while s[-1] == '0':
+                               s = s[:-1]
+               coord = Decimal(s)
+
+               # Decimal.normalize() will uses scientific notation - if that
+               # string is smaller, then use it
+               normd = coord.normalize()
+               if len(unicode(normd)) < len(unicode(coord)):
+                       coord = normd
+       
+       return unicode(coord)+Unit.str(length.units)
+
+def embedRasters(element, options) :
+       """
+               Converts raster references to inline images.
+               NOTE: there are size limits to base64-encoding handling in browsers 
+       """
+       global numRastersEmbedded
+
+       href = element.getAttributeNS(NS['XLINK'],'href')
+       
+       # if xlink:href is set, then grab the id
+       if href != '' and len(href) > 1:
+               # find if href value has filename ext           
+               ext = os.path.splitext(os.path.basename(href))[1].lower()[1:]
+                               
+               # look for 'png', 'jpg', and 'gif' extensions 
+               if ext == 'png' or ext == 'jpg' or ext == 'gif':
+
+                       # check if href resolves to an existing file
+                       if os.path.isfile(href) == False :
+                               if href[:7] != 'http://' and os.path.isfile(href) == False :
+                                               # if this is not an absolute path, set path relative
+                                               # to script file based on input arg 
+                                               infilename = '.'
+                                               if options.infilename: infilename = options.infilename
+                                               href = os.path.join(os.path.dirname(infilename), href)                          
+                       
+                       rasterdata = ''
+                       # test if file exists locally
+                       if os.path.isfile(href) == True :
+                               # open raster file as raw binary
+                               raster = open( href, "rb")
+                               rasterdata = raster.read()
+
+                       elif href[:7] == 'http://':
+                               # raster = open( href, "rb")
+                               webFile = urllib.urlopen( href )
+                               rasterdata = webFile.read()
+                               webFile.close()
+                       
+                       # ... should we remove all images which don't resolve?  
+                       if rasterdata != '' :
+                               # base64-encode raster
+                               b64eRaster = base64.b64encode( rasterdata )
+
+                               # set href attribute to base64-encoded equivalent
+                               if b64eRaster != '':
+                                       # PNG and GIF both have MIME Type 'image/[ext]', but 
+                                       # JPEG has MIME Type 'image/jpeg'
+                                       if ext == 'jpg':
+                                               ext = 'jpeg'
+
+                                       element.setAttributeNS(NS['XLINK'], 'href', 'data:image/' + ext + ';base64,' + b64eRaster)
+                                       numRastersEmbedded += 1
+                                       del b64eRaster                          
+
+def properlySizeDoc(docElement):
+       # get doc width and height
+       w = SVGLength(docElement.getAttribute('width'))
+       h = SVGLength(docElement.getAttribute('height'))
+
+       # if width/height are not unitless or px then it is not ok to rewrite them into a viewBox       
+       if ((w.units != Unit.NONE and w.units != Unit.PX) or
+               (w.units != Unit.NONE and w.units != Unit.PX)):
+           return
+
+       # else we have a statically sized image and we should try to remedy that        
+
+       # parse viewBox attribute
+       vbSep = re.split("\\s*\\,?\\s*", docElement.getAttribute('viewBox'), 3)
+       # if we have a valid viewBox we need to check it
+       vbWidth,vbHeight = 0,0
+       if len(vbSep) == 4:
+               try:
+                       # if x or y are specified and non-zero then it is not ok to overwrite it
+                       vbX = float(vbSep[0])
+                       vbY = float(vbSep[1])
+                       if vbX != 0 or vbY != 0:
+                               return
+                               
+                       # if width or height are not equal to doc width/height then it is not ok to overwrite it
+                       vbWidth = float(vbSep[2])
+                       vbHeight = float(vbSep[3])
+                       if vbWidth != w.value or vbHeight != h.value:
+                               return
+               # if the viewBox did not parse properly it is invalid and ok to overwrite it
+               except ValueError:
+                       pass
+       
+       # at this point it's safe to set the viewBox and remove width/height
+       docElement.setAttribute('viewBox', '0 0 %s %s' % (w.value, h.value))
+       docElement.removeAttribute('width')
+       docElement.removeAttribute('height')
+
+def remapNamespacePrefix(node, oldprefix, newprefix):
+       if node == None or node.nodeType != 1: return
+       
+       if node.prefix == oldprefix:
+               localName = node.localName
+               namespace = node.namespaceURI
+               doc = node.ownerDocument
+               parent = node.parentNode
+       
+               # create a replacement node
+               newNode = None
+               if newprefix != '':
+                       newNode = doc.createElementNS(namespace, newprefix+":"+localName)
+               else:
+                       newNode = doc.createElement(localName);
+                       
+               # add all the attributes
+               attrList = node.attributes
+               for i in range(attrList.length):
+                       attr = attrList.item(i)
+                       newNode.setAttributeNS( attr.namespaceURI, attr.localName, attr.nodeValue)
+       
+               # clone and add all the child nodes
+               for child in node.childNodes:
+                       newNode.appendChild(child.cloneNode(true))
+                       
+               # replace old node with new node
+               node = parent.replaceChild( newNode, node )
+       
+       # now do all child nodes
+       for child in node.childNodes :
+               remapNamespacePrefix(child, oldprefix, newprefix)       
+
+# this is the main method
+# input is a string representation of the input XML
+# returns a string representation of the output XML
+def scourString(in_string, options=None):
+       if options is None:
+               options = _options_parser.get_default_values()
+       getcontext().prec = options.digits
+       global numAttrsRemoved
+       global numStylePropsFixed
+       global numElemsRemoved
+       global numBytesSavedInColors
+       doc = xml.dom.minidom.parseString(in_string)
+
+       # for whatever reason this does not always remove all inkscape/sodipodi attributes/elements
+       # on the first pass, so we do it multiple times
+       # does it have to do with removal of children affecting the childlist?
+       if options.keep_editor_data == False:
+               while removeNamespacedElements( doc.documentElement, unwanted_ns ) > 0 :
+                       pass    
+               while removeNamespacedAttributes( doc.documentElement, unwanted_ns ) > 0 :
+                       pass
+               
+               # remove the xmlns: declarations now
+               xmlnsDeclsToRemove = []
+               attrList = doc.documentElement.attributes
+               for num in range(attrList.length) :
+                       if attrList.item(num).nodeValue in unwanted_ns :
+                               xmlnsDeclsToRemove.append(attrList.item(num).nodeName)
+               
+               for attr in xmlnsDeclsToRemove :
+                       doc.documentElement.removeAttribute(attr)
+                       numAttrsRemoved += 1
+
+       # ensure namespace for SVG is declared
+       if doc.documentElement.getAttribute('xmlns') != 'http://www.w3.org/2000/svg':
+               doc.documentElement.setAttribute('xmlns', 'http://www.w3.org/2000/svg')
+               # TODO: throw error or warning?
+               
+       # check for redundant SVG namespace declaration
+       attrList = doc.documentElement.attributes
+       xmlnsDeclsToRemove = []
+       redundantPrefixes = []
+       for i in range(attrList.length):
+               attr = attrList.item(i)
+               name = attr.nodeName
+               val = attr.nodeValue
+               if name[0:6] == 'xmlns:' and val == 'http://www.w3.org/2000/svg':
+                       redundantPrefixes.append(name[6:])
+                       xmlnsDeclsToRemove.append(name)
+                       
+       for attrName in xmlnsDeclsToRemove:
+               doc.documentElement.removeAttribute(attrName)
+       
+       for prefix in redundantPrefixes:
+               remapNamespacePrefix(doc.documentElement, prefix, '')
+
+       # repair style (remove unnecessary style properties and change them into XML attributes)
+       numStylePropsFixed = repairStyle(doc.documentElement, options)
+
+       # convert colors to #RRGGBB format
+       if options.simple_colors:
+               numBytesSavedInColors = convertColors(doc.documentElement)
+       
+       # remove empty defs, metadata, g
+       # NOTE: these elements will be removed even if they have (invalid) text nodes
+       elemsToRemove = []
+       for tag in ['defs', 'metadata', 'g'] :
+               for elem in doc.documentElement.getElementsByTagNameNS(NS['SVG'], tag) :
+                       removeElem = not elem.hasChildNodes()
+                       if removeElem == False :
+                               for child in elem.childNodes :
+                                       if child.nodeType in [1, 3, 4, 8] :
+                                               break
+                               else:
+                                       removeElem = True
+                       if removeElem :
+                               elem.parentNode.removeChild(elem)
+                               numElemsRemoved += 1
+
+       # remove unreferenced gradients/patterns outside of defs
+       while removeUnreferencedElements(doc) > 0:
+               pass
+
+       if options.strip_ids:
+               bContinueLooping = True
+               while bContinueLooping:
+                       identifiedElements = findElementsWithId(doc.documentElement)
+                       referencedIDs = findReferencedElements(doc.documentElement)
+                       bContinueLooping = (removeUnreferencedIDs(referencedIDs, identifiedElements) > 0)
+       
+       if options.group_collapse:
+               while removeNestedGroups(doc.documentElement) > 0:
+                       pass
+
+       while removeDuplicateGradientStops(doc) > 0:
+               pass
+       
+       # remove gradients that are only referenced by one other gradient
+       while collapseSinglyReferencedGradients(doc) > 0:
+               pass
+               
+       # remove duplicate gradients
+       while removeDuplicateGradients(doc) > 0:
+               pass
+       
+       # clean path data
+       for elem in doc.documentElement.getElementsByTagNameNS(NS['SVG'], 'path') :
+               if elem.getAttribute('d') == '':
+                       elem.parentNode.removeChild(elem)
+               else:
+                       cleanPath(elem)
+
+       # remove unnecessary closing point of polygons and scour points
+       for polygon in doc.documentElement.getElementsByTagNameNS(NS['SVG'], 'polygon') :
+               cleanPolygon(polygon)
+
+       # scour points of polyline
+       for polyline in doc.documentElement.getElementsByTagNameNS(NS['SVG'], 'polyline') :
+               cleanPolygon(polyline)
+       
+       # scour lengths (including coordinates)
+       for type in ['svg', 'image', 'rect', 'circle', 'ellipse', 'line', 'linearGradient', 'radialGradient', 'stop']:
+               for elem in doc.documentElement.getElementsByTagNameNS(NS['SVG'], type):
+                       for attr in ['x', 'y', 'width', 'height', 'cx', 'cy', 'r', 'rx', 'ry', 'x1', 'y1', 'x2', 'y2', 'fx', 'fy', 'offset']:
+                               if elem.getAttribute(attr) != '':
+                                       elem.setAttribute(attr, scourLength(elem.getAttribute(attr)))
+
+       # remove default values of attributes
+#      print doc.documentElement.toxml()
+       numAttrsRemoved += removeDefaultAttributeValues(doc.documentElement, options)           
+       
+       # convert rasters references to base64-encoded strings 
+       if options.embed_rasters:
+               for elem in doc.documentElement.getElementsByTagNameNS(NS['SVG'], 'image') :
+                       embedRasters(elem, options)             
+
+       # properly size the SVG document (ideally width/height should be 100% with a viewBox)
+       properlySizeDoc(doc.documentElement)
+
+       # output the document as a pretty string with a single space for indent
+       # NOTE: removed pretty printing because of this problem:
+       # http://ronrothman.com/public/leftbraned/xml-dom-minidom-toprettyxml-and-silly-whitespace/
+#      out_string = doc.documentElement.toprettyxml(' ')
+       out_string = doc.documentElement.toxml()
+       
+       # now strip out empty lines
+       lines = []
+       # Get rid of empty lines
+       for line in out_string.splitlines(True):
+               if line.strip():
+                       lines.append(line)
+
+       # return the string stripped of empty lines
+       if options.strip_xml_prolog == False:
+               xmlprolog = '<?xml version="1.0" encoding="UTF-8" standalone="no"?>\n'
+       else:
+               xmlprolog = ""
+               
+       return xmlprolog + "".join(lines)
+
+# used mostly by unit tests
+# input is a filename
+# returns the minidom doc representation of the SVG
+def scourXmlFile(filename, options=None):
+       in_string = open(filename).read()
+       out_string = scourString(in_string, options)
+       return xml.dom.minidom.parseString(out_string.encode('utf-8'))
+
+# GZ: Seems most other commandline tools don't do this, is it really wanted?
+class HeaderedFormatter(optparse.IndentedHelpFormatter):
+       """
+               Show application name, version number, and copyright statement
+               above usage information.
+       """
+       def format_usage(self, usage):
+               return "%s %s\n%s\n%s" % (APP, VER, COPYRIGHT,
+                       optparse.IndentedHelpFormatter.format_usage(self, usage))
+
+# GZ: would prefer this to be in a function or class scope, but tests etc need
+#     access to the defaults anyway
+_options_parser = optparse.OptionParser(
+       usage="%prog [-i input.svg] [-o output.svg] [OPTIONS]",
+       description=("If the input/output files are specified with a svgz"
+       " extension, then compressed SVG is assumed. If the input file is not"
+       " specified, stdin is used. If the output file is not specified, "
+       " stdout is used."),
+       formatter=HeaderedFormatter(max_help_position=30),
+       version=VER)
+
+_options_parser.add_option("--disable-simplify-colors",
+       action="store_false", dest="simple_colors", default=True,
+       help="won't convert all colors to #RRGGBB format")
+_options_parser.add_option("--disable-style-to-xml",
+       action="store_false", dest="style_to_xml", default=True,
+       help="won't convert styles into XML attributes")
+_options_parser.add_option("--disable-group-collapsing",
+       action="store_false", dest="group_collapse", default=True,
+       help="won't collapse <g> elements")
+_options_parser.add_option("--enable-id-stripping",
+       action="store_true", dest="strip_ids", default=False,
+       help="remove all un-referenced ID attributes")
+_options_parser.add_option("--disable-embed-rasters",
+       action="store_false", dest="embed_rasters", default=True,
+       help="won't embed rasters as base64-encoded data")
+_options_parser.add_option("--keep-editor-data",
+       action="store_true", dest="keep_editor_data", default=False,
+       help="won't remove Inkscape, Sodipodi or Adobe Illustrator elements and attributes")
+_options_parser.add_option("--strip-xml-prolog",
+       action="store_true", dest="strip_xml_prolog", default=False,
+       help="won't output the <?xml ?> prolog")
+
+# GZ: this is confusing, most people will be thinking in terms of
+#     decimal places, which is not what decimal precision is doing
+_options_parser.add_option("-p", "--set-precision",
+       action="store", type=int, dest="digits", default=5,
+       help="set number of significant digits (default: %default)")
+_options_parser.add_option("-i",
+       action="store", dest="infilename", help=optparse.SUPPRESS_HELP)
+_options_parser.add_option("-o",
+       action="store", dest="outfilename", help=optparse.SUPPRESS_HELP)
+
+def maybe_gziped_file(filename, mode="r"):
+       if os.path.splitext(filename)[1].lower() in (".svgz", ".gz"):
+               return gzip.GzipFile(filename, mode)
+       return file(filename, mode)
+
+def parse_args(args=None):
+       options, rargs = _options_parser.parse_args(args)
+
+       if rargs:
+               _options_parser.error("Additional arguments not handled: %r, see --help" % rargs)
+       if options.digits < 0:
+               _options_parser.error("Can't have negative significant digits, see --help")
+       if options.infilename:
+               infile = maybe_gziped_file(options.infilename)
+               # GZ: could catch a raised IOError here and report
+       else:
+               # GZ: could sniff for gzip compression here
+               infile = sys.stdin
+       if options.outfilename:
+               outfile = maybe_gziped_file(options.outfilename, "w")
+       else:
+               outfile = sys.stdout
+
+       return options, [infile, outfile]
+
+def getReport():
+       return ' Number of elements removed: ' + str(numElemsRemoved) + \
+               '\n Number of attributes removed: ' + str(numAttrsRemoved) + \
+               '\n Number of unreferenced id attributes removed: ' + str(numIDsRemoved) + \
+               '\n Number of style properties fixed: ' + str(numStylePropsFixed) + \
+               '\n Number of raster images embedded inline: ' + str(numRastersEmbedded) + \
+               '\n Number of path segments reduced/removed: ' + str(numPathSegmentsReduced) + \
+               '\n Number of bytes saved in path data: ' + str(numBytesSavedInPathData) + \
+               '\n Number of bytes saved in colors: ' + str(numBytesSavedInColors) + \
+               '\n Number of points removed from polygons: ' + str(numPointsRemovedFromPolygon)
+
+if __name__ == '__main__':
+       if sys.platform == "win32":
+               from time import clock as get_tick
+       else:
+               # GZ: is this different from time.time() in any way?
+               def get_tick():
+                       return os.times()[0]
+
+       start = get_tick()
+       
+       options, (input, output) = parse_args()
+       
+       print >>sys.stderr, "%s %s\n%s" % (APP, VER, COPYRIGHT)
+
+       # do the work
+       in_string = input.read()
+       out_string = scourString(in_string, options).encode("UTF-8")
+       output.write(out_string)
+
+       # Close input and output files
+       input.close()
+       output.close()
+
+       end = get_tick()
+
+       # GZ: unless silenced by -q or something?
+       # GZ: not using globals would be good too
+       print >>sys.stderr, ' File:', input.name, \
+               '\n Time taken:', str(end-start) + 's\n', \
+               getReport()
+       
+       oldsize = len(in_string)
+       newsize = len(out_string)
+       sizediff = (newsize / oldsize) * 100
+       print >>sys.stderr, ' Original file size:', oldsize, 'bytes;', \
+               'new file size:', newsize, 'bytes (' + str(sizediff)[:5] + '%)'
+
+
diff --git a/share/extensions/svg_regex.py b/share/extensions/svg_regex.py
new file mode 100644 (file)
index 0000000..10fd9c3
--- /dev/null
@@ -0,0 +1,281 @@
+# This software is OSI Certified Open Source Software.
+# OSI Certified is a certification mark of the Open Source Initiative.
+# 
+# Copyright (c) 2006, Enthought, Inc.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+#  * Redistributions of source code must retain the above copyright notice, this
+#    list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright notice,
+#    this list of conditions and the following disclaimer in the documentation
+#    and/or other materials provided with the distribution.
+#  * Neither the name of Enthought, Inc. nor the names of its contributors may
+#    be used to endorse or promote products derived from this software without
+#    specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+# ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+# ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+""" Small hand-written recursive descent parser for SVG <path> data.
+
+
+In [1]: from svg_regex import svg_parser
+
+In [3]: svg_parser.parse('M 10,20 30,40V50 60 70')
+Out[3]: [('M', [(10.0, 20.0), (30.0, 40.0)]), ('V', [50.0, 60.0, 70.0])]
+
+In [4]: svg_parser.parse('M 0.6051.5')  # An edge case
+Out[4]: [('M', [(0.60509999999999997, 0.5)])]
+
+In [5]: svg_parser.parse('M 100-200')  # Another edge case
+Out[5]: [('M', [(100.0, -200.0)])]
+"""
+
+import re
+
+
+# Sentinel.
+class _EOF(object):
+    def __repr__(self):
+        return 'EOF'
+EOF = _EOF()
+
+lexicon = [
+    ('float', r'[-\+]?(?:(?:[0-9]*\.[0-9]+)|(?:[0-9]+\.?))(?:[Ee][-\+]?[0-9]+)?'),
+    ('int', r'[-\+]?[0-9]+'),
+    ('command', r'[AaCcHhLlMmQqSsTtVvZz]'),
+]
+
+
+class Lexer(object):
+    """ Break SVG path data into tokens.
+
+    The SVG spec requires that tokens are greedy. This lexer relies on Python's
+    regexes defaulting to greediness.
+
+    This style of implementation was inspired by this article:
+
+        http://www.gooli.org/blog/a-simple-lexer-in-python/
+    """
+    def __init__(self, lexicon):
+        self.lexicon = lexicon
+        parts = []
+        for name, regex in lexicon:
+            parts.append('(?P<%s>%s)' % (name, regex))
+        self.regex_string = '|'.join(parts)
+        self.regex = re.compile(self.regex_string)
+
+    def lex(self, text):
+        """ Yield (token_type, str_data) tokens.
+
+        The last token will be (EOF, None) where EOF is the singleton object
+        defined in this module.
+        """
+        for match in self.regex.finditer(text):
+            for name, _ in self.lexicon:
+                m = match.group(name)
+                if m is not None:
+                    yield (name, m)
+                    break
+        yield (EOF, None)
+
+svg_lexer = Lexer(lexicon)
+
+
+class SVGPathParser(object):
+    """ Parse SVG <path> data into a list of commands.
+
+    Each distinct command will take the form of a tuple (command, data). The
+    `command` is just the character string that starts the command group in the
+    <path> data, so 'M' for absolute moveto, 'm' for relative moveto, 'Z' for
+    closepath, etc. The kind of data it carries with it depends on the command.
+    For 'Z' (closepath), it's just None. The others are lists of individual
+    argument groups. Multiple elements in these lists usually mean to repeat the
+    command. The notable exception is 'M' (moveto) where only the first element
+    is truly a moveto. The remainder are implicit linetos.
+
+    See the SVG documentation for the interpretation of the individual elements
+    for each command.
+
+    The main method is `parse(text)`. It can only consume actual strings, not
+    filelike objects or iterators.
+    """
+
+    def __init__(self, lexer=svg_lexer):
+        self.lexer = lexer
+
+        self.command_dispatch = {
+            'Z': self.rule_closepath,
+            'z': self.rule_closepath,
+            'M': self.rule_moveto_or_lineto,
+            'm': self.rule_moveto_or_lineto,
+            'L': self.rule_moveto_or_lineto,
+            'l': self.rule_moveto_or_lineto,
+            'H': self.rule_orthogonal_lineto,
+            'h': self.rule_orthogonal_lineto,
+            'V': self.rule_orthogonal_lineto,
+            'v': self.rule_orthogonal_lineto,
+            'C': self.rule_curveto3,
+            'c': self.rule_curveto3,
+            'S': self.rule_curveto2,
+            's': self.rule_curveto2,
+            'Q': self.rule_curveto2,
+            'q': self.rule_curveto2,
+            'T': self.rule_curveto1,
+            't': self.rule_curveto1,
+            'A': self.rule_elliptical_arc,
+            'a': self.rule_elliptical_arc,
+        }
+
+#        self.number_tokens = set(['int', 'float'])
+        self.number_tokens = list(['int', 'float'])
+
+    def parse(self, text):
+        """ Parse a string of SVG <path> data.
+        """
+        next = self.lexer.lex(text).next
+        token = next()
+        return self.rule_svg_path(next, token)
+
+    def rule_svg_path(self, next, token):
+        commands = []
+        while token[0] is not EOF:
+            if token[0] != 'command':
+                raise SyntaxError("expecting a command; got %r" % (token,))
+            rule = self.command_dispatch[token[1]]
+            command_group, token = rule(next, token)
+            commands.append(command_group)
+        return commands
+
+    def rule_closepath(self, next, token):
+        command = token[1]
+        token = next()
+        return (command, None), token
+
+    def rule_moveto_or_lineto(self, next, token):
+        command = token[1]
+        token = next()
+        coordinates = []
+        while token[0] in self.number_tokens:
+            pair, token = self.rule_coordinate_pair(next, token)
+            coordinates.append(pair)
+        return (command, coordinates), token
+
+    def rule_orthogonal_lineto(self, next, token):
+        command = token[1]
+        token = next()
+        coordinates = []
+        while token[0] in self.number_tokens:
+            coord, token = self.rule_coordinate(next, token)
+            coordinates.append(coord)
+        return (command, coordinates), token
+
+    def rule_curveto3(self, next, token):
+        command = token[1]
+        token = next()
+        coordinates = []
+        while token[0] in self.number_tokens:
+            pair1, token = self.rule_coordinate_pair(next, token)
+            pair2, token = self.rule_coordinate_pair(next, token)
+            pair3, token = self.rule_coordinate_pair(next, token)
+            coordinates.append((pair1, pair2, pair3))
+        return (command, coordinates), token
+
+    def rule_curveto2(self, next, token):
+        command = token[1]
+        token = next()
+        coordinates = []
+        while token[0] in self.number_tokens:
+            pair1, token = self.rule_coordinate_pair(next, token)
+            pair2, token = self.rule_coordinate_pair(next, token)
+            coordinates.append((pair1, pair2))
+        return (command, coordinates), token
+
+    def rule_curveto1(self, next, token):
+        command = token[1]
+        token = next()
+        coordinates = []
+        while token[0] in self.number_tokens:
+            pair1, token = self.rule_coordinate_pair(next, token)
+            coordinates.append(pair1)
+        return (command, coordinates), token
+
+    def rule_elliptical_arc(self, next, token):
+        command = token[1]
+        token = next()
+        arguments = []
+        while token[0] in self.number_tokens:
+            rx = float(token[1])
+            if rx < 0.0:
+                raise SyntaxError("expecting a nonnegative number; got %r" % (token,))
+
+            token = next()
+            if token[0] not in self.number_tokens:
+                raise SyntaxError("expecting a number; got %r" % (token,))
+            ry = float(token[1])
+            if ry < 0.0:
+                raise SyntaxError("expecting a nonnegative number; got %r" % (token,))
+
+            token = next()
+            if token[0] not in self.number_tokens:
+                raise SyntaxError("expecting a number; got %r" % (token,))
+            axis_rotation = float(token[1])
+
+            token = next()
+            if token[1] not in ('0', '1'):
+                raise SyntaxError("expecting a boolean flag; got %r" % (token,))
+            large_arc_flag = bool(int(token[1]))
+
+            token = next()
+            if token[1] not in ('0', '1'):
+                raise SyntaxError("expecting a boolean flag; got %r" % (token,))
+            sweep_flag = bool(int(token[1]))
+
+            token = next()
+            if token[0] not in self.number_tokens:
+                raise SyntaxError("expecting a number; got %r" % (token,))
+            x = float(token[1])
+
+            token = next()
+            if token[0] not in self.number_tokens:
+                raise SyntaxError("expecting a number; got %r" % (token,))
+            y = float(token[1])
+
+            token = next()
+            arguments.append(((rx,ry), axis_rotation, large_arc_flag, sweep_flag, (x,y)))
+
+        return (command, arguments), token
+
+    def rule_coordinate(self, next, token):
+        if token[0] not in self.number_tokens:
+            raise SyntaxError("expecting a number; got %r" % (token,))
+        x = float(token[1])
+        token = next()
+        return x, token
+
+
+    def rule_coordinate_pair(self, next, token):
+        # Inline these since this rule is so common.
+        if token[0] not in self.number_tokens:
+            raise SyntaxError("expecting a number; got %r" % (token,))
+        x = float(token[1])
+        token = next()
+        if token[0] not in self.number_tokens:
+            raise SyntaxError("expecting a number; got %r" % (token,))
+        y = float(token[1])
+        token = next()
+        return (x,y), token
+
+
+svg_parser = SVGPathParser()