summary | shortlog | log | commit | commitdiff | tree
raw | patch | inline | side by side (parent: 42a325b)
raw | patch | inline | side by side (parent: 42a325b)
author | jeff_schiller <jeff_schiller@users.sourceforge.net> | |
Tue, 4 Aug 2009 20:15:08 +0000 (20:15 +0000) | ||
committer | jeff_schiller <jeff_schiller@users.sourceforge.net> | |
Tue, 4 Aug 2009 20:15:08 +0000 (20:15 +0000) |
share/extensions/scour.inkscape.py | [new file with mode: 0755] | patch | blob |
share/extensions/scour.inx | [new file with mode: 0644] | patch | blob |
share/extensions/scour.py | [new file with mode: 0755] | patch | blob |
share/extensions/svg_regex.py | [new file with mode: 0644] | patch | blob |
diff --git a/share/extensions/scour.inkscape.py b/share/extensions/scour.inkscape.py
--- /dev/null
@@ -0,0 +1,8 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+import sys
+from scour import scourString
+input = file(sys.argv[1], "r")
+sys.stdout.write(scourString(input.read()).encode("UTF-8"))
+input.close()
+sys.stdout.close()
diff --git a/share/extensions/scour.inx b/share/extensions/scour.inx
--- /dev/null
@@ -0,0 +1,16 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<inkscape-extension xmlns="http://www.inkscape.org/namespace/inkscape/extension">
+ <_name>Scoured SVG Output</_name>
+ <id>org.inkscape.output.scour</id>
+ <dependency type="executable" location="extensions">scour.py</dependency>
+ <dependency type="executable" location="extensions">svg_regex.py</dependency>
+ <output>
+ <extension>.svg</extension>
+ <mimetype>image/svg+xml</mimetype>
+ <_filetypename>Scoured SVG (*.svg)</_filetypename>
+ <_filetypetooltip>Scalable Vector Graphics</_filetypetooltip>
+ </output>
+ <script>
+ <command reldir="extensions" interpreter="python">scour.inkscape.py</command>
+ </script>
+</inkscape-extension>
diff --git a/share/extensions/scour.py b/share/extensions/scour.py
--- /dev/null
@@ -0,0 +1,2091 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+# Scour
+#
+# Copyright 2009 Jeff Schiller
+#
+# This file is part of Scour, http://www.codedread.com/scour/
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Notes:
+
+# rubys' path-crunching ideas here: http://intertwingly.net/code/svgtidy/spec.rb
+# (and implemented here: http://intertwingly.net/code/svgtidy/svgtidy.rb )
+
+# Yet more ideas here: http://wiki.inkscape.org/wiki/index.php/Save_Cleaned_SVG
+# TODO: Adapt this script into an Inkscape python plugin
+#
+# * Process Transformations
+# * Process quadratic Bezier curves
+# * Collapse all group based transformations
+
+# Even more ideas here: http://esw.w3.org/topic/SvgTidy
+# * removal of more default attribute values (gradientUnits, spreadMethod, x1, y1, etc)
+# * analysis of path elements to see if rect can be used instead?
+# * removal of unused attributes in groups:
+# <g fill="blue" ...>
+# <rect fill="red" ... />
+# <rect fill="red" ... />
+# <rect fill="red" ... />
+# </g>
+# in this case, fill="blue" should be removed
+# * Move common attributes up to a parent group:
+# <g>
+# <rect fill="white"/>
+# <rect fill="white"/>
+# <rect fill="white"/>
+# </g>
+# becomes:
+# <g fill="white">
+# <rect />
+# <rect />
+# <rect />
+# </g>
+
+# Suggestion from Richard Hutch:
+# * Put id attributes first in the serialization (or make the d attribute last)
+# This would require my own serialization of the DOM objects (not impossible)
+
+# Next Up:
+# - add an option for svgweb compatible markup (no self-closing tags)?
+# - if a <g> has only one element in it, collapse the <g> (ensure transform, etc are carried down)
+# - remove id if it matches the Inkscape-style of IDs (also provide a switch to disable this)
+# - prevent elements from being stripped if they are referenced in a <style> element
+# (for instance, filter, marker, pattern) - need a crude CSS parser
+# - Remove any unused glyphs from font elements?
+
+# necessary to get true division
+from __future__ import division
+
+import os
+import sys
+import xml.dom.minidom
+import re
+import math
+import base64
+import urllib
+from svg_regex import svg_parser
+import gzip
+import optparse
+
+# Python 2.3- did not have Decimal
+try:
+ from decimal import *
+except ImportError:
+ from fixedpoint import *
+ Decimal = FixedPoint
+
+APP = 'scour'
+VER = '0.18'
+COPYRIGHT = 'Copyright Jeff Schiller, 2009'
+
+NS = { 'SVG': 'http://www.w3.org/2000/svg',
+ 'XLINK': 'http://www.w3.org/1999/xlink',
+ 'SODIPODI': 'http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd',
+ 'INKSCAPE': 'http://www.inkscape.org/namespaces/inkscape',
+ 'ADOBE_ILLUSTRATOR': 'http://ns.adobe.com/AdobeIllustrator/10.0/',
+ 'ADOBE_GRAPHS': 'http://ns.adobe.com/Graphs/1.0/',
+ 'ADOBE_SVG_VIEWER': 'http://ns.adobe.com/AdobeSVGViewerExtensions/3.0/',
+ 'ADOBE_VARIABLES': 'http://ns.adobe.com/Variables/1.0/',
+ 'ADOBE_SFW': 'http://ns.adobe.com/SaveForWeb/1.0/',
+ 'ADOBE_EXTENSIBILITY': 'http://ns.adobe.com/Extensibility/1.0/',
+ 'ADOBE_FLOWS': 'http://ns.adobe.com/Flows/1.0/',
+ 'ADOBE_IMAGE_REPLACEMENT': 'http://ns.adobe.com/ImageReplacement/1.0/',
+ 'ADOBE_CUSTOM': 'http://ns.adobe.com/GenericCustomNamespace/1.0/',
+ 'ADOBE_XPATH': 'http://ns.adobe.com/XPath/1.0/'
+ }
+
+unwanted_ns = [ NS['SODIPODI'], NS['INKSCAPE'], NS['ADOBE_ILLUSTRATOR'],
+ NS['ADOBE_GRAPHS'], NS['ADOBE_SVG_VIEWER'], NS['ADOBE_VARIABLES'],
+ NS['ADOBE_SFW'], NS['ADOBE_EXTENSIBILITY'], NS['ADOBE_FLOWS'],
+ NS['ADOBE_IMAGE_REPLACEMENT'], NS['ADOBE_CUSTOM'], NS['ADOBE_XPATH'] ]
+
+svgAttributes = [
+ 'clip-rule',
+ 'display',
+ 'fill',
+ 'fill-opacity',
+ 'fill-rule',
+ 'filter',
+ 'font-family',
+ 'font-size',
+ 'font-stretch',
+ 'font-style',
+ 'font-variant',
+ 'font-weight',
+ 'line-height',
+ 'marker',
+ 'opacity',
+ 'overflow',
+ 'stop-color',
+ 'stop-opacity',
+ 'stroke',
+ 'stroke-dashoffset',
+ 'stroke-linecap',
+ 'stroke-linejoin',
+ 'stroke-miterlimit',
+ 'stroke-opacity',
+ 'stroke-width',
+ 'visibility'
+ ]
+
+colors = {
+ 'aliceblue': 'rgb(240, 248, 255)',
+ 'antiquewhite': 'rgb(250, 235, 215)',
+ 'aqua': 'rgb( 0, 255, 255)',
+ 'aquamarine': 'rgb(127, 255, 212)',
+ 'azure': 'rgb(240, 255, 255)',
+ 'beige': 'rgb(245, 245, 220)',
+ 'bisque': 'rgb(255, 228, 196)',
+ 'black': 'rgb( 0, 0, 0)',
+ 'blanchedalmond': 'rgb(255, 235, 205)',
+ 'blue': 'rgb( 0, 0, 255)',
+ 'blueviolet': 'rgb(138, 43, 226)',
+ 'brown': 'rgb(165, 42, 42)',
+ 'burlywood': 'rgb(222, 184, 135)',
+ 'cadetblue': 'rgb( 95, 158, 160)',
+ 'chartreuse': 'rgb(127, 255, 0)',
+ 'chocolate': 'rgb(210, 105, 30)',
+ 'coral': 'rgb(255, 127, 80)',
+ 'cornflowerblue': 'rgb(100, 149, 237)',
+ 'cornsilk': 'rgb(255, 248, 220)',
+ 'crimson': 'rgb(220, 20, 60)',
+ 'cyan': 'rgb( 0, 255, 255)',
+ 'darkblue': 'rgb( 0, 0, 139)',
+ 'darkcyan': 'rgb( 0, 139, 139)',
+ 'darkgoldenrod': 'rgb(184, 134, 11)',
+ 'darkgray': 'rgb(169, 169, 169)',
+ 'darkgreen': 'rgb( 0, 100, 0)',
+ 'darkgrey': 'rgb(169, 169, 169)',
+ 'darkkhaki': 'rgb(189, 183, 107)',
+ 'darkmagenta': 'rgb(139, 0, 139)',
+ 'darkolivegreen': 'rgb( 85, 107, 47)',
+ 'darkorange': 'rgb(255, 140, 0)',
+ 'darkorchid': 'rgb(153, 50, 204)',
+ 'darkred': 'rgb(139, 0, 0)',
+ 'darksalmon': 'rgb(233, 150, 122)',
+ 'darkseagreen': 'rgb(143, 188, 143)',
+ 'darkslateblue': 'rgb( 72, 61, 139)',
+ 'darkslategray': 'rgb( 47, 79, 79)',
+ 'darkslategrey': 'rgb( 47, 79, 79)',
+ 'darkturquoise': 'rgb( 0, 206, 209)',
+ 'darkviolet': 'rgb(148, 0, 211)',
+ 'deeppink': 'rgb(255, 20, 147)',
+ 'deepskyblue': 'rgb( 0, 191, 255)',
+ 'dimgray': 'rgb(105, 105, 105)',
+ 'dimgrey': 'rgb(105, 105, 105)',
+ 'dodgerblue': 'rgb( 30, 144, 255)',
+ 'firebrick': 'rgb(178, 34, 34)',
+ 'floralwhite': 'rgb(255, 250, 240)',
+ 'forestgreen': 'rgb( 34, 139, 34)',
+ 'fuchsia': 'rgb(255, 0, 255)',
+ 'gainsboro': 'rgb(220, 220, 220)',
+ 'ghostwhite': 'rgb(248, 248, 255)',
+ 'gold': 'rgb(255, 215, 0)',
+ 'goldenrod': 'rgb(218, 165, 32)',
+ 'gray': 'rgb(128, 128, 128)',
+ 'grey': 'rgb(128, 128, 128)',
+ 'green': 'rgb( 0, 128, 0)',
+ 'greenyellow': 'rgb(173, 255, 47)',
+ 'honeydew': 'rgb(240, 255, 240)',
+ 'hotpink': 'rgb(255, 105, 180)',
+ 'indianred': 'rgb(205, 92, 92)',
+ 'indigo': 'rgb( 75, 0, 130)',
+ 'ivory': 'rgb(255, 255, 240)',
+ 'khaki': 'rgb(240, 230, 140)',
+ 'lavender': 'rgb(230, 230, 250)',
+ 'lavenderblush': 'rgb(255, 240, 245)',
+ 'lawngreen': 'rgb(124, 252, 0)',
+ 'lemonchiffon': 'rgb(255, 250, 205)',
+ 'lightblue': 'rgb(173, 216, 230)',
+ 'lightcoral': 'rgb(240, 128, 128)',
+ 'lightcyan': 'rgb(224, 255, 255)',
+ 'lightgoldenrodyellow': 'rgb(250, 250, 210)',
+ 'lightgray': 'rgb(211, 211, 211)',
+ 'lightgreen': 'rgb(144, 238, 144)',
+ 'lightgrey': 'rgb(211, 211, 211)',
+ 'lightpink': 'rgb(255, 182, 193)',
+ 'lightsalmon': 'rgb(255, 160, 122)',
+ 'lightseagreen': 'rgb( 32, 178, 170)',
+ 'lightskyblue': 'rgb(135, 206, 250)',
+ 'lightslategray': 'rgb(119, 136, 153)',
+ 'lightslategrey': 'rgb(119, 136, 153)',
+ 'lightsteelblue': 'rgb(176, 196, 222)',
+ 'lightyellow': 'rgb(255, 255, 224)',
+ 'lime': 'rgb( 0, 255, 0)',
+ 'limegreen': 'rgb( 50, 205, 50)',
+ 'linen': 'rgb(250, 240, 230)',
+ 'magenta': 'rgb(255, 0, 255)',
+ 'maroon': 'rgb(128, 0, 0)',
+ 'mediumaquamarine': 'rgb(102, 205, 170)',
+ 'mediumblue': 'rgb( 0, 0, 205)',
+ 'mediumorchid': 'rgb(186, 85, 211)',
+ 'mediumpurple': 'rgb(147, 112, 219)',
+ 'mediumseagreen': 'rgb( 60, 179, 113)',
+ 'mediumslateblue': 'rgb(123, 104, 238)',
+ 'mediumspringgreen': 'rgb( 0, 250, 154)',
+ 'mediumturquoise': 'rgb( 72, 209, 204)',
+ 'mediumvioletred': 'rgb(199, 21, 133)',
+ 'midnightblue': 'rgb( 25, 25, 112)',
+ 'mintcream': 'rgb(245, 255, 250)',
+ 'mistyrose': 'rgb(255, 228, 225)',
+ 'moccasin': 'rgb(255, 228, 181)',
+ 'navajowhite': 'rgb(255, 222, 173)',
+ 'navy': 'rgb( 0, 0, 128)',
+ 'oldlace': 'rgb(253, 245, 230)',
+ 'olive': 'rgb(128, 128, 0)',
+ 'olivedrab': 'rgb(107, 142, 35)',
+ 'orange': 'rgb(255, 165, 0)',
+ 'orangered': 'rgb(255, 69, 0)',
+ 'orchid': 'rgb(218, 112, 214)',
+ 'palegoldenrod': 'rgb(238, 232, 170)',
+ 'palegreen': 'rgb(152, 251, 152)',
+ 'paleturquoise': 'rgb(175, 238, 238)',
+ 'palevioletred': 'rgb(219, 112, 147)',
+ 'papayawhip': 'rgb(255, 239, 213)',
+ 'peachpuff': 'rgb(255, 218, 185)',
+ 'peru': 'rgb(205, 133, 63)',
+ 'pink': 'rgb(255, 192, 203)',
+ 'plum': 'rgb(221, 160, 221)',
+ 'powderblue': 'rgb(176, 224, 230)',
+ 'purple': 'rgb(128, 0, 128)',
+ 'red': 'rgb(255, 0, 0)',
+ 'rosybrown': 'rgb(188, 143, 143)',
+ 'royalblue': 'rgb( 65, 105, 225)',
+ 'saddlebrown': 'rgb(139, 69, 19)',
+ 'salmon': 'rgb(250, 128, 114)',
+ 'sandybrown': 'rgb(244, 164, 96)',
+ 'seagreen': 'rgb( 46, 139, 87)',
+ 'seashell': 'rgb(255, 245, 238)',
+ 'sienna': 'rgb(160, 82, 45)',
+ 'silver': 'rgb(192, 192, 192)',
+ 'skyblue': 'rgb(135, 206, 235)',
+ 'slateblue': 'rgb(106, 90, 205)',
+ 'slategray': 'rgb(112, 128, 144)',
+ 'slategrey': 'rgb(112, 128, 144)',
+ 'snow': 'rgb(255, 250, 250)',
+ 'springgreen': 'rgb( 0, 255, 127)',
+ 'steelblue': 'rgb( 70, 130, 180)',
+ 'tan': 'rgb(210, 180, 140)',
+ 'teal': 'rgb( 0, 128, 128)',
+ 'thistle': 'rgb(216, 191, 216)',
+ 'tomato': 'rgb(255, 99, 71)',
+ 'turquoise': 'rgb( 64, 224, 208)',
+ 'violet': 'rgb(238, 130, 238)',
+ 'wheat': 'rgb(245, 222, 179)',
+ 'white': 'rgb(255, 255, 255)',
+ 'whitesmoke': 'rgb(245, 245, 245)',
+ 'yellow': 'rgb(255, 255, 0)',
+ 'yellowgreen': 'rgb(154, 205, 50)',
+ }
+
+def isSameSign(a,b): return (a <= 0 and b <= 0) or (a >= 0 and b >= 0)
+
+coord = re.compile("\\-?\\d+\\.?\\d*")
+scinumber = re.compile("[\\-\\+]?(\\d*\\.?)?\\d+[eE][\\-\\+]?\\d+")
+number = re.compile("[\\-\\+]?(\\d*\\.?)?\\d+")
+sciExponent = re.compile("[eE]([\\-\\+]?\\d+)")
+unit = re.compile("(em|ex|px|pt|pc|cm|mm|in|\\%){1,1}$")
+
+class Unit(object):
+ INVALID = -1
+ NONE = 0
+ PCT = 1
+ PX = 2
+ PT = 3
+ PC = 4
+ EM = 5
+ EX = 6
+ CM = 7
+ MM = 8
+ IN = 9
+
+# @staticmethod
+ def get(str):
+ # GZ: shadowing builtins like 'str' is generally bad form
+ # GZ: encoding stuff like this in a dict makes for nicer code
+ if str == None or str == '': return Unit.NONE
+ elif str == '%': return Unit.PCT
+ elif str == 'px': return Unit.PX
+ elif str == 'pt': return Unit.PT
+ elif str == 'pc': return Unit.PC
+ elif str == 'em': return Unit.EM
+ elif str == 'ex': return Unit.EX
+ elif str == 'cm': return Unit.CM
+ elif str == 'mm': return Unit.MM
+ elif str == 'in': return Unit.IN
+ return Unit.INVALID
+
+# @staticmethod
+ def str(u):
+ if u == Unit.NONE: return ''
+ elif u == Unit.PCT: return '%'
+ elif u == Unit.PX: return 'px'
+ elif u == Unit.PT: return 'pt'
+ elif u == Unit.PC: return 'pc'
+ elif u == Unit.EM: return 'em'
+ elif u == Unit.EX: return 'ex'
+ elif u == Unit.CM: return 'cm'
+ elif u == Unit.MM: return 'mm'
+ elif u == Unit.IN: return 'in'
+ return 'INVALID'
+
+ get = staticmethod(get)
+ str = staticmethod(str)
+
+class SVGLength(object):
+ def __init__(self, str):
+ try: # simple unitless and no scientific notation
+ self.value = float(str)
+ if int(self.value) == self.value:
+ self.value = int(self.value)
+ self.units = Unit.NONE
+ except ValueError:
+ # we know that the length string has an exponent, a unit, both or is invalid
+
+ # parse out number, exponent and unit
+ self.value = 0
+ unitBegin = 0
+ scinum = scinumber.match(str)
+ if scinum != None:
+ # this will always match, no need to check it
+ numMatch = number.match(str)
+ expMatch = sciExponent.search(str, numMatch.start(0))
+ self.value = (float(numMatch.group(0)) *
+ 10 ** float(expMatch.group(1)))
+ unitBegin = expMatch.end(1)
+ else:
+ # unit or invalid
+ numMatch = number.match(str)
+ if numMatch != None:
+ self.value = float(numMatch.group(0))
+ unitBegin = numMatch.end(0)
+
+ if int(self.value) == self.value:
+ self.value = int(self.value)
+
+ if unitBegin != 0 :
+ unitMatch = unit.search(str, unitBegin)
+ if unitMatch != None :
+ self.units = Unit.get(unitMatch.group(0))
+
+ # invalid
+ else:
+ # TODO: this needs to set the default for the given attribute (how?)
+ self.value = 0
+ self.units = Unit.INVALID
+
+# returns the length of a property
+# TODO: eventually use the above class once it is complete
+def getSVGLength(value):
+ try:
+ v = float(value)
+ except ValueError:
+ coordMatch = coord.match(value)
+ if coordMatch != None:
+ unitMatch = unit.search(value, coordMatch.start(0))
+ v = value
+ return v
+
+def findElementById(node, id):
+ if node == None or node.nodeType != 1: return None
+ if node.getAttribute('id') == id: return node
+ for child in node.childNodes :
+ e = findElementById(child,id)
+ if e != None: return e
+ return None
+
+def findElementsWithId(node, elems=None):
+ """
+ Returns all elements with id attributes
+ """
+ if elems is None:
+ elems = {}
+ id = node.getAttribute('id')
+ if id != '' :
+ elems[id] = node
+ if node.hasChildNodes() :
+ for child in node.childNodes:
+ # from http://www.w3.org/TR/DOM-Level-2-Core/idl-definitions.html
+ # we are only really interested in nodes of type Element (1)
+ if child.nodeType == 1 :
+ findElementsWithId(child, elems)
+ return elems
+
+def findReferencedElements(node, ids=None):
+ """
+ Returns the number of times an ID is referenced as well as all elements
+ that reference it.
+
+ Currently looks at fill, stroke, clip-path, mask, marker, and
+ xlink:href attributes.
+ """
+ if ids is None:
+ ids = {}
+ # TODO: input argument ids is clunky here (see below how it is called)
+ # GZ: alternative to passing dict, use **kwargs
+ href = node.getAttributeNS(NS['XLINK'],'href')
+
+ # if xlink:href is set, then grab the id
+ if href != '' and len(href) > 1 and href[0] == '#':
+ # we remove the hash mark from the beginning of the id
+ id = href[1:]
+ if id in ids:
+ ids[id][0] += 1
+ ids[id][1].append(node)
+ else:
+ ids[id] = [1,[node]]
+
+ # now get all style properties and the fill, stroke, filter attributes
+ styles = node.getAttribute('style').split(';')
+ referencingProps = ['fill', 'stroke', 'filter', 'clip-path', 'mask', 'marker-start',
+ 'marker-end', 'marker-mid']
+ for attr in referencingProps:
+ styles.append(':'.join([attr, node.getAttribute(attr)]))
+
+ for style in styles:
+ propval = style.split(':')
+ if len(propval) == 2 :
+ prop = propval[0].strip()
+ val = propval[1].strip()
+ if prop in referencingProps and val != '' :
+ if len(val) >= 7 and val[0:5] == 'url(#' :
+ id = val[5:val.find(')')]
+ if ids.has_key(id) :
+ ids[id][0] += 1
+ ids[id][1].append(node)
+ else:
+ ids[id] = [1,[node]]
+ # if the url has a quote in it, we need to compensate
+ elif len(val) >= 8 :
+ id = None
+ # double-quote
+ if val[0:6] == 'url("#' :
+ id = val[6:val.find('")')]
+ # single-quote
+ elif val[0:6] == "url('#" :
+ id = val[6:val.find("')")]
+ if id != None:
+ if ids.has_key(id) :
+ ids[id][0] += 1
+ ids[id][1].append(node)
+ else:
+ ids[id] = [1,[node]]
+
+ if node.hasChildNodes() :
+ for child in node.childNodes:
+ if child.nodeType == 1 :
+ findReferencedElements(child, ids)
+ return ids
+
+numIDsRemoved = 0
+numElemsRemoved = 0
+numAttrsRemoved = 0
+numRastersEmbedded = 0
+numPathSegmentsReduced = 0
+numCurvesStraightened = 0
+numBytesSavedInPathData = 0
+numBytesSavedInColors = 0
+numPointsRemovedFromPolygon = 0
+
+def removeUnusedDefs(doc, defElem, elemsToRemove=None):
+ if elemsToRemove is None:
+ elemsToRemove = []
+
+ identifiedElements = findElementsWithId(doc.documentElement)
+ referencedIDs = findReferencedElements(doc.documentElement)
+
+ keepTags = ['font', 'style', 'metadata', 'script', 'title', 'desc']
+ for elem in defElem.childNodes:
+ if elem.nodeName == 'g' and elem.namespaceURI == NS['SVG']:
+ elemsToRemove = removeUnusedDefs(doc, elem, elemsToRemove)
+ continue
+ if elem.nodeType == 1 and (elem.getAttribute('id') == '' or \
+ (not elem.getAttribute('id') in referencedIDs)) and \
+ not elem.nodeName in keepTags:
+ elemsToRemove.append(elem)
+ return elemsToRemove
+
+def removeUnreferencedElements(doc):
+ """
+ Removes all unreferenced elements except for <svg>, <font>, <metadata>, <title>, and <desc>.
+ Also vacuums the defs of any non-referenced renderable elements.
+
+ Returns the number of unreferenced elements removed from the document.
+ """
+ global numElemsRemoved
+ num = 0
+ removeTags = ['linearGradient', 'radialGradient', 'pattern']
+
+ identifiedElements = findElementsWithId(doc.documentElement)
+ referencedIDs = findReferencedElements(doc.documentElement)
+
+ for id in identifiedElements:
+ if not id in referencedIDs:
+ goner = findElementById(doc.documentElement, id)
+ if goner != None and goner.parentNode != None and goner.nodeName in removeTags:
+ goner.parentNode.removeChild(goner)
+ num += 1
+ numElemsRemoved += 1
+
+ # TODO: should also go through defs and vacuum it
+ num = 0
+ defs = doc.documentElement.getElementsByTagNameNS(NS['SVG'], 'defs')
+ for aDef in defs:
+ elemsToRemove = removeUnusedDefs(doc, aDef)
+ for elem in elemsToRemove:
+ elem.parentNode.removeChild(elem)
+ numElemsRemoved += 1
+ num += 1
+ return num
+
+def removeUnreferencedIDs(referencedIDs, identifiedElements):
+ """
+ Removes the unreferenced ID attributes.
+
+ Returns the number of ID attributes removed
+ """
+ global numIDsRemoved
+ keepTags = ['font']
+ num = 0;
+ for id in identifiedElements.keys():
+ node = identifiedElements[id]
+ if referencedIDs.has_key(id) == False and not node.nodeName in keepTags:
+ node.removeAttribute('id')
+ numIDsRemoved += 1
+ num += 1
+ return num
+
+def removeNamespacedAttributes(node, namespaces):
+ global numAttrsRemoved
+ num = 0
+ if node.nodeType == 1 :
+ # remove all namespace'd attributes from this element
+ attrList = node.attributes
+ attrsToRemove = []
+ for attrNum in range(attrList.length):
+ attr = attrList.item(attrNum)
+ if attr != None and attr.namespaceURI in namespaces:
+ attrsToRemove.append(attr.nodeName)
+ for attrName in attrsToRemove :
+ num += 1
+ numAttrsRemoved += 1
+ node.removeAttribute(attrName)
+
+ # now recurse for children
+ for child in node.childNodes:
+ num += removeNamespacedAttributes(child, namespaces)
+ return num
+
+def removeNamespacedElements(node, namespaces):
+ global numElemsRemoved
+ num = 0
+ if node.nodeType == 1 :
+ # remove all namespace'd child nodes from this element
+ childList = node.childNodes
+ childrenToRemove = []
+ for child in childList:
+ if child != None and child.namespaceURI in namespaces:
+ childrenToRemove.append(child)
+ for child in childrenToRemove :
+ num += 1
+ numElemsRemoved += 1
+ node.removeChild(child)
+
+ # now recurse for children
+ for child in node.childNodes:
+ num += removeNamespacedElements(child, namespaces)
+ return num
+
+# this walks further and further down the tree, removing groups
+# which do not have any attributes or a title/desc child and
+# promoting their children up one level
+def removeNestedGroups(node):
+ global numElemsRemoved
+ num = 0
+
+ groupsToRemove = []
+ for child in node.childNodes:
+ if child.nodeName == 'g' and child.namespaceURI == NS['SVG'] and len(child.attributes) == 0:
+ # only collapse group if it does not have a title or desc as a direct descendant
+ for grandchild in child.childNodes:
+ if grandchild.nodeType == 1 and grandchild.namespaceURI == NS['SVG'] and \
+ grandchild.nodeName in ['title','desc']:
+ break
+ else:
+ groupsToRemove.append(child)
+
+ for g in groupsToRemove:
+ while g.childNodes.length > 0:
+ g.parentNode.insertBefore(g.firstChild, g)
+ g.parentNode.removeChild(g)
+ numElemsRemoved += 1
+ num += 1
+
+ # now recurse for children
+ for child in node.childNodes:
+ if child.nodeType == 1:
+ num += removeNestedGroups(child)
+ return num
+
+def removeDuplicateGradientStops(doc):
+ global numElemsRemoved
+ num = 0
+
+ for gradType in ['linearGradient', 'radialGradient']:
+ for grad in doc.getElementsByTagNameNS(NS['SVG'], gradType):
+ stops = {}
+ stopsToRemove = []
+ for stop in grad.getElementsByTagNameNS(NS['SVG'], 'stop'):
+ # convert percentages into a floating point number
+ offsetU = SVGLength(stop.getAttribute('offset'))
+ if offsetU.units == Unit.PCT:
+ offset = offsetU.value / 100.0
+ elif offsetU.units == Unit.NONE:
+ offset = offsetU.value
+ else:
+ offset = 0
+ # set the stop offset value to the integer or floating point equivalent
+ if int(offset) == offset: stop.setAttribute('offset', str(int(offset)))
+ else: stop.setAttribute('offset', str(offset))
+
+ color = stop.getAttribute('stop-color')
+ opacity = stop.getAttribute('stop-opacity')
+ if stops.has_key(offset) :
+ oldStop = stops[offset]
+ if oldStop[0] == color and oldStop[1] == opacity:
+ stopsToRemove.append(stop)
+ stops[offset] = [color, opacity]
+
+ for stop in stopsToRemove:
+ stop.parentNode.removeChild(stop)
+ num += 1
+ numElemsRemoved += 1
+
+ # linear gradients
+ return num
+
+def collapseSinglyReferencedGradients(doc):
+ global numElemsRemoved
+ num = 0
+
+ # make sure to reset the ref'ed ids for when we are running this in testscour
+ for rid,nodeCount in findReferencedElements(doc.documentElement).iteritems():
+ count = nodeCount[0]
+ nodes = nodeCount[1]
+ if count == 1:
+ elem = findElementById(doc.documentElement,rid)
+ if elem != None and elem.nodeType == 1 and elem.nodeName in ['linearGradient', 'radialGradient'] \
+ and elem.namespaceURI == NS['SVG']:
+ # found a gradient that is referenced by only 1 other element
+ refElem = nodes[0]
+ if refElem.nodeType == 1 and refElem.nodeName in ['linearGradient', 'radialGradient'] \
+ and refElem.namespaceURI == NS['SVG']:
+ # elem is a gradient referenced by only one other gradient (refElem)
+
+ # add the stops to the referencing gradient (this removes them from elem)
+ if len(refElem.getElementsByTagNameNS(NS['SVG'], 'stop')) == 0:
+ stopsToAdd = elem.getElementsByTagNameNS(NS['SVG'], 'stop')
+ for stop in stopsToAdd:
+ refElem.appendChild(stop)
+
+ # adopt the gradientUnits, spreadMethod, gradientTransform attributes if
+ # they are unspecified on refElem
+ for attr in ['gradientUnits','spreadMethod','gradientTransform']:
+ if refElem.getAttribute(attr) == '' and not elem.getAttribute(attr) == '':
+ refElem.setAttributeNS(None, attr, elem.getAttribute(attr))
+
+ # if both are radialGradients, adopt elem's fx,fy,cx,cy,r attributes if
+ # they are unspecified on refElem
+ if elem.nodeName == 'radialGradient' and refElem.nodeName == 'radialGradient':
+ for attr in ['fx','fy','cx','cy','r']:
+ if refElem.getAttribute(attr) == '' and not elem.getAttribute(attr) == '':
+ refElem.setAttributeNS(None, attr, elem.getAttribute(attr))
+
+ # if both are linearGradients, adopt elem's x1,y1,x2,y2 attributes if
+ # they are unspecified on refElem
+ if elem.nodeName == 'linearGradient' and refElem.nodeName == 'linearGradient':
+ for attr in ['x1','y1','x2','y2']:
+ if refElem.getAttribute(attr) == '' and not elem.getAttribute(attr) == '':
+ refElem.setAttributeNS(None, attr, elem.getAttribute(attr))
+
+ # now remove the xlink:href from refElem
+ refElem.removeAttributeNS(NS['XLINK'], 'href')
+
+ # now delete elem
+ elem.parentNode.removeChild(elem)
+ numElemsRemoved += 1
+ num += 1
+ return num
+
+def removeDuplicateGradients(doc):
+ global numElemsRemoved
+ num = 0
+
+ gradientsToRemove = {}
+ duplicateToMaster = {}
+
+ for gradType in ['linearGradient', 'radialGradient']:
+ grads = doc.getElementsByTagNameNS(NS['SVG'], gradType)
+ for grad in grads:
+ # TODO: should slice grads from 'grad' here to optimize
+ for ograd in grads:
+ # do not compare gradient to itself
+ if grad == ograd: continue
+
+ # compare grad to ograd (all properties, then all stops)
+ # if attributes do not match, go to next gradient
+ someGradAttrsDoNotMatch = False
+ for attr in ['gradientUnits','spreadMethod','gradientTransform','x1','y1','x2','y2','cx','cy','fx','fy','r']:
+ if grad.getAttribute(attr) != ograd.getAttribute(attr):
+ someGradAttrsDoNotMatch = True
+ break;
+
+ if someGradAttrsDoNotMatch: continue
+
+ # compare xlink:href values too
+ if grad.getAttributeNS(NS['XLINK'], 'href') != ograd.getAttributeNS(NS['XLINK'], 'href'):
+ continue
+
+ # all gradient properties match, now time to compare stops
+ stops = grad.getElementsByTagNameNS(NS['SVG'], 'stop')
+ ostops = ograd.getElementsByTagNameNS(NS['SVG'], 'stop')
+
+ if stops.length != ostops.length: continue
+
+ # now compare stops
+ stopsNotEqual = False
+ for i in range(stops.length):
+ if stopsNotEqual: break
+ stop = stops.item(i)
+ ostop = ostops.item(i)
+ for attr in ['offset', 'stop-color', 'stop-opacity']:
+ if stop.getAttribute(attr) != ostop.getAttribute(attr):
+ stopsNotEqual = True
+ break
+ if stopsNotEqual: continue
+
+ # ograd is a duplicate of grad, we schedule it to be removed UNLESS
+ # ograd is ALREADY considered a 'master' element
+ if not gradientsToRemove.has_key(ograd):
+ if not duplicateToMaster.has_key(ograd):
+ if not gradientsToRemove.has_key(grad):
+ gradientsToRemove[grad] = []
+ gradientsToRemove[grad].append( ograd )
+ duplicateToMaster[ograd] = grad
+
+ # get a collection of all elements that are referenced and their referencing elements
+ referencedIDs = findReferencedElements(doc.documentElement)
+ for masterGrad in gradientsToRemove.keys():
+ master_id = masterGrad.getAttribute('id')
+ for dupGrad in gradientsToRemove[masterGrad]:
+ # if the duplicate gradient no longer has a parent that means it was
+ # already re-mapped to another master gradient
+ if not dupGrad.parentNode: continue
+ dup_id = dupGrad.getAttribute('id')
+ # for each element that referenced the gradient we are going to remove
+ for elem in referencedIDs[dup_id][1]:
+ # find out which attribute referenced the duplicate gradient
+ for attr in ['fill', 'stroke']:
+ v = elem.getAttribute(attr)
+ if v == 'url(#'+dup_id+')' or v == 'url("#'+dup_id+'")' or v == "url('#"+dup_id+"')":
+ elem.setAttribute(attr, 'url(#'+master_id+')')
+ if elem.getAttributeNS(NS['XLINK'], 'href') == '#'+dup_id:
+ elem.setAttributeNS(NS['XLINK'], 'href', '#'+master_id)
+
+ # now that all referencing elements have been re-mapped to the master
+ # it is safe to remove this gradient from the document
+ dupGrad.parentNode.removeChild(dupGrad)
+ numElemsRemoved += 1
+ num += 1
+ return num
+
+def repairStyle(node, options):
+ num = 0
+ if node.nodeType == 1 and len(node.getAttribute('style')) > 0 :
+ # get all style properties and stuff them into a dictionary
+ styleMap = { }
+ rawStyles = node.getAttribute('style').split(';')
+ for style in rawStyles:
+ propval = style.split(':')
+ if len(propval) == 2 :
+ styleMap[propval[0].strip()] = propval[1].strip()
+
+ # I've seen this enough to know that I need to correct it:
+ # fill: url(#linearGradient4918) rgb(0, 0, 0);
+ for prop in ['fill', 'stroke'] :
+ if styleMap.has_key(prop) :
+ chunk = styleMap[prop].split(') ')
+ if len(chunk) == 2 and (chunk[0][:5] == 'url(#' or chunk[0][:6] == 'url("#' or chunk[0][:6] == "url('#") and chunk[1] == 'rgb(0, 0, 0)' :
+ styleMap[prop] = chunk[0] + ')'
+ num += 1
+
+ # Here is where we can weed out unnecessary styles like:
+ # opacity:1
+ if styleMap.has_key('opacity') :
+ opacity = float(styleMap['opacity'])
+ # opacity='1.0' is useless, remove it
+ if opacity == 1.0 :
+ del styleMap['opacity']
+ num += 1
+
+ # if opacity='0' then all fill and stroke properties are useless, remove them
+ elif opacity == 0.0 :
+ for uselessStyle in ['fill', 'fill-opacity', 'fill-rule', 'stroke', 'stroke-linejoin',
+ 'stroke-opacity', 'stroke-miterlimit', 'stroke-linecap', 'stroke-dasharray',
+ 'stroke-dashoffset', 'stroke-opacity'] :
+ if styleMap.has_key(uselessStyle):
+ del styleMap[uselessStyle]
+ num += 1
+
+ # if stroke:none, then remove all stroke-related properties (stroke-width, etc)
+ # TODO: should also detect if the computed value of this element is stroke="none"
+ if styleMap.has_key('stroke') and styleMap['stroke'] == 'none' :
+ for strokestyle in [ 'stroke-width', 'stroke-linejoin', 'stroke-miterlimit',
+ 'stroke-linecap', 'stroke-dasharray', 'stroke-dashoffset', 'stroke-opacity'] :
+ if styleMap.has_key(strokestyle) :
+ del styleMap[strokestyle]
+ num += 1
+ # TODO: This is actually a problem if a parent element has a specified stroke
+ # we need to properly calculate computed values
+ del styleMap['stroke']
+
+ # if fill:none, then remove all fill-related properties (fill-rule, etc)
+ if styleMap.has_key('fill') and styleMap['fill'] == 'none' :
+ for fillstyle in [ 'fill-rule', 'fill-opacity' ] :
+ if styleMap.has_key(fillstyle) :
+ del styleMap[fillstyle]
+ num += 1
+
+ # stop-opacity: 1
+ if styleMap.has_key('stop-opacity') :
+ if float(styleMap['stop-opacity']) == 1.0 :
+ del styleMap['stop-opacity']
+ num += 1
+
+ # fill-opacity: 1 or 0
+ if styleMap.has_key('fill-opacity') :
+ fillOpacity = float(styleMap['fill-opacity'])
+ # TODO: This is actually a problem if the parent element does not have fill-opacity=1
+ if fillOpacity == 1.0 :
+ del styleMap['fill-opacity']
+ num += 1
+ elif fillOpacity == 0.0 :
+ for uselessFillStyle in [ 'fill', 'fill-rule' ] :
+ if styleMap.has_key(uselessFillStyle):
+ del styleMap[uselessFillStyle]
+ num += 1
+
+ # stroke-opacity: 1 or 0
+ if styleMap.has_key('stroke-opacity') :
+ strokeOpacity = float(styleMap['stroke-opacity'])
+ # TODO: This is actually a problem if the parent element does not have stroke-opacity=1
+ if strokeOpacity == 1.0 :
+ del styleMap['stroke-opacity']
+ num += 1
+ elif strokeOpacity == 0.0 :
+ for uselessStrokeStyle in [ 'stroke', 'stroke-width', 'stroke-linejoin', 'stroke-linecap',
+ 'stroke-dasharray', 'stroke-dashoffset' ] :
+ if styleMap.has_key(uselessStrokeStyle):
+ del styleMap[uselessStrokeStyle]
+ num += 1
+
+ # stroke-width: 0
+ if styleMap.has_key('stroke-width') :
+ strokeWidth = getSVGLength(styleMap['stroke-width'])
+ if strokeWidth == 0.0 :
+ for uselessStrokeStyle in [ 'stroke', 'stroke-linejoin', 'stroke-linecap',
+ 'stroke-dasharray', 'stroke-dashoffset', 'stroke-opacity' ] :
+ if styleMap.has_key(uselessStrokeStyle):
+ del styleMap[uselessStrokeStyle]
+ num += 1
+
+ # remove font properties for non-text elements
+ # I've actually observed this in real SVG content
+ if node.nodeName in ['rect', 'circle', 'ellipse', 'line', 'polyline', 'polygon', 'path']:
+ for fontstyle in [ 'font-family', 'font-size', 'font-stretch', 'font-size-adjust',
+ 'font-style', 'font-variant', 'font-weight',
+ 'letter-spacing', 'line-height', 'kerning',
+ 'text-anchor', 'text-decoration', 'text-rendering',
+ 'unicode-bidi', 'word-spacing', 'writing-mode'] :
+ if styleMap.has_key(fontstyle) :
+ del styleMap[fontstyle]
+ num += 1
+
+ # remove inkscape-specific styles
+ # TODO: need to get a full list of these
+ for inkscapeStyle in ['-inkscape-font-specification']:
+ if styleMap.has_key(inkscapeStyle):
+ del styleMap[inkscapeStyle]
+ num += 1
+
+ # visibility: visible
+ if styleMap.has_key('visibility') :
+ if styleMap['visibility'] == 'visible':
+ del styleMap['visibility']
+ num += 1
+
+ # display: inline
+ if styleMap.has_key('display') :
+ if styleMap['display'] == 'inline':
+ del styleMap['display']
+ num += 1
+
+ # overflow: visible or overflow specified on element other than svg, marker, pattern
+ if styleMap.has_key('overflow') :
+ if styleMap['overflow'] == 'visible' or node.nodeName in ['svg','marker','pattern']:
+ del styleMap['overflow']
+ num += 1
+
+ # marker: none
+ if styleMap.has_key('marker') :
+ if styleMap['marker'] == 'none':
+ del styleMap['marker']
+ num += 1
+
+ # now if any of the properties match known SVG attributes we prefer attributes
+ # over style so emit them and remove them from the style map
+ if options.style_to_xml:
+ for propName in styleMap.keys() :
+ if propName in svgAttributes :
+ node.setAttribute(propName, styleMap[propName])
+ del styleMap[propName]
+
+ # sew our remaining style properties back together into a style attribute
+ fixedStyle = ''
+ for prop in styleMap.keys() :
+ fixedStyle += prop + ':' + styleMap[prop] + ';'
+
+ if fixedStyle != '' :
+ node.setAttribute('style', fixedStyle)
+ else:
+ node.removeAttribute('style')
+
+ # recurse for our child elements
+ for child in node.childNodes :
+ num += repairStyle(child,options)
+
+ return num
+
+def removeDefaultAttributeValues(node, options):
+ num = 0
+ if node.nodeType != 1: return 0
+
+ # gradientUnits: objectBoundingBox
+ if node.getAttribute('gradientUnits') == 'objectBoundingBox':
+ node.removeAttribute('gradientUnits')
+ num += 1
+
+ # spreadMethod: pad
+ if node.getAttribute('spreadMethod') == 'pad':
+ node.removeAttribute('spreadMethod')
+ num += 1
+
+ # x1: 0%
+ if node.getAttribute('x1') != '':
+ x1 = SVGLength(node.getAttribute('x1'))
+ if x1.value == 0:
+ node.removeAttribute('x1')
+ num += 1
+
+ # y1: 0%
+ if node.getAttribute('y1') != '':
+ y1 = SVGLength(node.getAttribute('y1'))
+ if y1.value == 0:
+ node.removeAttribute('y1')
+ num += 1
+
+ # x2: 100%
+ if node.getAttribute('x2') != '':
+ x2 = SVGLength(node.getAttribute('x2'))
+ if (x2.value == 100 and x2.units == Unit.PCT) or (x2.value == 1 and x2.units == Unit.NONE):
+ node.removeAttribute('x2')
+ num += 1
+
+ # y2: 0%
+ if node.getAttribute('y2') != '':
+ y2 = SVGLength(node.getAttribute('y2'))
+ if y2.value == 0:
+ node.removeAttribute('y2')
+ num += 1
+
+ # fx: equal to rx
+ if node.getAttribute('fx') != '':
+ if node.getAttribute('fx') == node.getAttribute('cx'):
+ node.removeAttribute('fx')
+ num += 1
+
+ # fy: equal to ry
+ if node.getAttribute('fy') != '':
+ if node.getAttribute('fy') == node.getAttribute('cy'):
+ node.removeAttribute('fy')
+ num += 1
+
+ # cx: 50%
+ if node.getAttribute('cx') != '':
+ cx = SVGLength(node.getAttribute('cx'))
+ if (cx.value == 50 and cx.units == Unit.PCT) or (cx.value == 0.5 and cx.units == Unit.NONE):
+ node.removeAttribute('cx')
+ num += 1
+
+ # cy: 50%
+ if node.getAttribute('cy') != '':
+ cy = SVGLength(node.getAttribute('cy'))
+ if (cy.value == 50 and cy.units == Unit.PCT) or (cy.value == 0.5 and cy.units == Unit.NONE):
+ node.removeAttribute('cy')
+ num += 1
+
+ # r: 50%
+ if node.getAttribute('r') != '':
+ r = SVGLength(node.getAttribute('r'))
+ if (r.value == 50 and r.units == Unit.PCT) or (r.value == 0.5 and r.units == Unit.NONE):
+ node.removeAttribute('r')
+ num += 1
+
+ # recurse for our child elements
+ for child in node.childNodes :
+ num += removeDefaultAttributeValues(child,options)
+
+ return num
+
+rgb = re.compile("\\s*rgb\\(\\s*(\\d+)\\s*\\,\\s*(\\d+)\\s*\\,\\s*(\\d+)\\s*\\)\\s*")
+rgbp = re.compile("\\s*rgb\\(\\s*(\\d*\\.?\\d+)\\%\\s*\\,\\s*(\\d*\\.?\\d+)\\%\\s*\\,\\s*(\\d*\\.?\\d+)\\%\\s*\\)\\s*")
+def convertColor(value):
+ """
+ Converts the input color string and returns a #RRGGBB (or #RGB if possible) string
+ """
+ s = value
+
+ if s in colors.keys():
+ s = colors[s]
+
+ rgbpMatch = rgbp.match(s)
+ if rgbpMatch != None :
+ r = int(float(rgbpMatch.group(1)) * 255.0 / 100.0)
+ g = int(float(rgbpMatch.group(2)) * 255.0 / 100.0)
+ b = int(float(rgbpMatch.group(3)) * 255.0 / 100.0)
+ s = 'rgb(%d,%d,%d)' % (r,g,b)
+
+ rgbMatch = rgb.match(s)
+ if rgbMatch != None :
+ r = hex( int( rgbMatch.group(1) ) )[2:].upper()
+ g = hex( int( rgbMatch.group(2) ) )[2:].upper()
+ b = hex( int( rgbMatch.group(3) ) )[2:].upper()
+ if len(r) == 1: r='0'+r
+ if len(g) == 1: g='0'+g
+ if len(b) == 1: b='0'+b
+ s = '#'+r+g+b
+
+ if s[0] == '#' and len(s)==7 and s[1]==s[2] and s[3]==s[4] and s[5]==s[6]:
+ s = s.upper()
+ s = '#'+s[1]+s[3]+s[5]
+
+ return s
+
+def convertColors(element) :
+ """
+ Recursively converts all color properties into #RRGGBB format if shorter
+ """
+ numBytes = 0
+
+ if element.nodeType != 1: return 0
+
+ # set up list of color attributes for each element type
+ attrsToConvert = []
+ if element.nodeName in ['rect', 'circle', 'ellipse', 'polygon', \
+ 'line', 'polyline', 'path', 'g', 'a']:
+ attrsToConvert = ['fill', 'stroke']
+ elif element.nodeName in ['stop']:
+ attrsToConvert = ['stop-color']
+ elif element.nodeName in ['solidColor']:
+ attrsToConvert = ['solid-color']
+
+ # now convert all the color formats
+ for attr in attrsToConvert:
+ oldColorValue = element.getAttribute(attr)
+ if oldColorValue != '':
+ newColorValue = convertColor(oldColorValue)
+ oldBytes = len(oldColorValue)
+ newBytes = len(newColorValue)
+ if oldBytes > newBytes:
+ element.setAttribute(attr, newColorValue)
+ numBytes += (oldBytes - len(element.getAttribute(attr)))
+
+ # now recurse for our child elements
+ for child in element.childNodes :
+ numBytes += convertColors(child)
+
+ return numBytes
+
+def cleanPath(element) :
+ """
+ Cleans the path string (d attribute) of the element
+ """
+ global numBytesSavedInPathData
+ global numPathSegmentsReduced
+ global numCurvesStraightened
+
+ # this gets the parser object from svg_regex.py
+ oldPathStr = element.getAttribute('d')
+ pathObj = svg_parser.parse(oldPathStr)
+
+ # however, this parser object has some ugliness in it (lists of tuples of tuples of
+ # numbers and booleans). we just need a list of (cmd,[numbers]):
+ path = []
+ for (cmd,dataset) in pathObj:
+ if cmd in ['M','m','L','l','T','t']:
+ # one or more tuples, each containing two numbers
+ nums = []
+ for t in dataset:
+ # convert to a Decimal
+ nums.append(Decimal(str(t[0])) * Decimal(1))
+ nums.append(Decimal(str(t[1])) * Decimal(1))
+
+ # only create this segment if it is not empty
+ if nums:
+ path.append( (cmd, nums) )
+
+ elif cmd in ['V','v','H','h']:
+ # one or more numbers
+ nums = []
+ for n in dataset:
+ nums.append(Decimal(str(n)))
+ if nums:
+ path.append( (cmd, nums) )
+
+ elif cmd in ['C','c']:
+ # one or more tuples, each containing three tuples of two numbers each
+ nums = []
+ for t in dataset:
+ for pair in t:
+ nums.append(Decimal(str(pair[0])) * Decimal(1))
+ nums.append(Decimal(str(pair[1])) * Decimal(1))
+ path.append( (cmd, nums) )
+
+ elif cmd in ['S','s','Q','q']:
+ # one or more tuples, each containing two tuples of two numbers each
+ nums = []
+ for t in dataset:
+ for pair in t:
+ nums.append(Decimal(str(pair[0])) * Decimal(1))
+ nums.append(Decimal(str(pair[1])) * Decimal(1))
+ path.append( (cmd, nums) )
+
+ elif cmd in ['A','a']:
+ # one or more tuples, each containing a tuple of two numbers, a number, a boolean,
+ # another boolean, and a tuple of two numbers
+ nums = []
+ for t in dataset:
+ nums.append( Decimal(str(t[0][0])) * Decimal(1) )
+ nums.append( Decimal(str(t[0][1])) * Decimal(1) )
+ nums.append( Decimal(str(t[1])) * Decimal(1))
+
+ if t[2]: nums.append( Decimal(1) )
+ else: nums.append( Decimal(0) )
+
+ if t[3]: nums.append( Decimal(1) )
+ else: nums.append( Decimal(0) )
+
+ nums.append( Decimal(str(t[4][0])) * Decimal(1) )
+ nums.append( Decimal(str(t[4][1])) * Decimal(1) )
+ path.append( (cmd, nums) )
+
+ elif cmd in ['Z','z']:
+ path.append( (cmd, []) )
+
+ # calculate the starting x,y coord for the second path command
+ if len(path[0][1]) == 2:
+ (x,y) = path[0][1]
+ else:
+ # we have a move and then 1 or more coords for lines
+ N = len(path[0][1])
+ if path[0] == 'M':
+ # take the last pair of coordinates for the starting point
+ x = path[0][1][N-2]
+ y = path[0][1][N-1]
+ else: # relative move, accumulate coordinates for the starting point
+ (x,y) = path[0][1][0],path[0][1][1]
+ n = 2
+ while n < N:
+ x += path[0][1][n]
+ y += path[0][1][n+1]
+ n += 2
+
+ # now we have the starting point at x,y so let's save it
+ (startx,starty) = (x,y)
+
+ # convert absolute coordinates into relative ones (start with the second subcommand
+ # and leave the first M as absolute)
+ newPath = [path[0]]
+ for (cmd,data) in path[1:]:
+ i = 0
+ newCmd = cmd
+ newData = data
+ # adjust abs to rel
+ # only the A command has some values that we don't want to adjust (radii, rotation, flags)
+ if cmd == 'A':
+ newCmd = 'a'
+ newData = []
+ while i < len(data):
+ newData.append(data[i])
+ newData.append(data[i+1])
+ newData.append(data[i+2])
+ newData.append(data[i+3])
+ newData.append(data[i+4])
+ newData.append(data[i+5]-x)
+ newData.append(data[i+6]-y)
+ x = data[i+5]
+ y = data[i+6]
+ i += 7
+ elif cmd == 'a':
+ while i < len(data):
+ x += data[i+5]
+ y += data[i+6]
+ i += 7
+ elif cmd == 'H':
+ newCmd = 'h'
+ newData = []
+ while i < len(data):
+ newData.append(data[i]-x)
+ x = data[i]
+ i += 1
+ elif cmd == 'h':
+ while i < len(data):
+ x += data[i]
+ i += 1
+ elif cmd == 'V':
+ newCmd = 'v'
+ newData = []
+ while i < len(data):
+ newData.append(data[i] - y)
+ y = data[i]
+ i += 1
+ elif cmd == 'v':
+ while i < len(data):
+ y += data[i]
+ i += 1
+ elif cmd in ['M']:
+ newCmd = cmd.lower()
+ newData = []
+ startx = data[0]
+ starty = data[1]
+ while i < len(data):
+ newData.append( data[i] - x )
+ newData.append( data[i+1] - y )
+ x = data[i]
+ y = data[i+1]
+ i += 2
+ elif cmd in ['L','T']:
+ newCmd = cmd.lower()
+ newData = []
+ while i < len(data):
+ newData.append( data[i] - x )
+ newData.append( data[i+1] - y )
+ x = data[i]
+ y = data[i+1]
+ i += 2
+ elif cmd in ['m']:
+ startx += data[0]
+ starty += data[1]
+ while i < len(data):
+ x += data[i]
+ y += data[i+1]
+ i += 2
+ elif cmd in ['l','t']:
+ while i < len(data):
+ x += data[i]
+ y += data[i+1]
+ i += 2
+ elif cmd in ['S','Q']:
+ newCmd = cmd.lower()
+ newData = []
+ while i < len(data):
+ newData.append( data[i] - x )
+ newData.append( data[i+1] - y )
+ newData.append( data[i+2] - x )
+ newData.append( data[i+3] - y )
+ x = data[i+2]
+ y = data[i+3]
+ i += 4
+ elif cmd in ['s','q']:
+ while i < len(data):
+ x += data[i+2]
+ y += data[i+3]
+ i += 4
+ elif cmd == 'C':
+ newCmd = 'c'
+ newData = []
+ while i < len(data):
+ newData.append( data[i] - x )
+ newData.append( data[i+1] - y )
+ newData.append( data[i+2] - x )
+ newData.append( data[i+3] - y )
+ newData.append( data[i+4] - x )
+ newData.append( data[i+5] - y )
+ x = data[i+4]
+ y = data[i+5]
+ i += 6
+ elif cmd == 'c':
+ while i < len(data):
+ x += data[i+4]
+ y += data[i+5]
+ i += 6
+ elif cmd in ['z','Z']:
+ x = startx
+ y = starty
+ newCmd = 'z'
+ newPath.append( (newCmd, newData) )
+ path = newPath
+
+ # remove empty segments
+ newPath = [path[0]]
+ for (cmd,data) in path[1:]:
+ if cmd in ['m','l','t']:
+ newData = []
+ i = 0
+ while i < len(data):
+ if data[i] != 0 or data[i+1] != 0:
+ newData.append(data[i])
+ newData.append(data[i+1])
+ else:
+ numPathSegmentsReduced += 1
+ i += 2
+ if newData:
+ newPath.append( (cmd,newData) )
+ elif cmd == 'c':
+ newData = []
+ i = 0
+ while i < len(data):
+ if data[i+4] != 0 or data[i+5] != 0:
+ newData.append(data[i])
+ newData.append(data[i+1])
+ newData.append(data[i+2])
+ newData.append(data[i+3])
+ newData.append(data[i+4])
+ newData.append(data[i+5])
+ else:
+ numPathSegmentsReduced += 1
+ i += 6
+ if newData:
+ newPath.append( (cmd,newData) )
+ elif cmd == 'a':
+ newData = []
+ i = 0
+ while i < len(data):
+ if data[i+5] != 0 or data[i+6] != 0:
+ newData.append(data[i])
+ newData.append(data[i+1])
+ newData.append(data[i+2])
+ newData.append(data[i+3])
+ newData.append(data[i+4])
+ newData.append(data[i+5])
+ newData.append(data[i+6])
+ else:
+ numPathSegmentsReduced += 1
+ i += 7
+ if newData:
+ newPath.append( (cmd,newData) )
+ elif cmd == 'q':
+ newData = []
+ i = 0
+ while i < len(data):
+ if data[i+2] != 0 or data[i+3] != 0:
+ newData.append(data[i])
+ newData.append(data[i+1])
+ newData.append(data[i+2])
+ newData.append(data[i+3])
+ else:
+ numPathSegmentsReduced += 1
+ i += 4
+ if newData:
+ newPath.append( (cmd,newData) )
+ elif cmd in ['h','v']:
+ newData = []
+ i = 0
+ while i < len(data):
+ if data[i] != 0:
+ newData.append(data[i])
+ else:
+ numPathSegmentsReduced += 1
+ i += 1
+ if newData:
+ newPath.append( (cmd,newData) )
+ else:
+ newPath.append( (cmd,data) )
+ path = newPath
+
+ # convert straight curves into lines
+ newPath = [path[0]]
+ for (cmd,data) in path[1:]:
+ i = 0
+ newData = data
+ if cmd == 'c':
+ newData = []
+ while i < len(data):
+ # since all commands are now relative, we can think of previous point as (0,0)
+ # and new point (dx,dy) is (data[i+4],data[i+5])
+ # eqn of line will be y = (dy/dx)*x or if dx=0 then eqn of line is x=0
+ (p1x,p1y) = (data[i],data[i+1])
+ (p2x,p2y) = (data[i+2],data[i+3])
+ dx = data[i+4]
+ dy = data[i+5]
+
+ foundStraightCurve = False
+
+ if dx == 0:
+ if p1x == 0 and p2x == 0:
+ foundStraightCurve = True
+ else:
+ m = dy/dx
+ if p1y == m*p1x and p2y == m*p2y:
+ foundStraightCurve = True
+
+ if foundStraightCurve:
+ # flush any existing curve coords first
+ if newData:
+ newPath.append( (cmd,newData) )
+ newData = []
+ # now create a straight line segment
+ newPath.append( ('l', [dx,dy]) )
+ numCurvesStraightened += 1
+ else:
+ newData.append(data[i])
+ newData.append(data[i+1])
+ newData.append(data[i+2])
+ newData.append(data[i+3])
+ newData.append(data[i+4])
+ newData.append(data[i+5])
+
+ i += 6
+ if newData or cmd == 'z' or cmd == 'Z':
+ newPath.append( (cmd,newData) )
+ path = newPath
+
+ # collapse all consecutive commands of the same type into one command
+ prevCmd = ''
+ prevData = []
+ newPath = [path[0]]
+ for (cmd,data) in path[1:]:
+ # flush the previous command if it is not the same type as the current command
+ # or it is not an h or v line
+ if prevCmd != '':
+ if cmd != prevCmd:# or not prevCmd in ['h','v']:
+ newPath.append( (prevCmd, prevData) )
+ prevCmd = ''
+ prevData = []
+
+ # if the previous and current commands are the same type and a h/v line, collapse
+ if cmd == prevCmd: # and cmd in ['h','v','l']:
+ for coord in data:
+ prevData.append(coord)
+
+ # save last command and data
+ else:
+ prevCmd = cmd
+ prevData = data
+ # flush last command and data
+ if prevCmd != '':
+ newPath.append( (prevCmd, prevData) )
+ path = newPath
+
+ # convert line segments into h,v where possible
+ newPath = [path[0]]
+ for (cmd,data) in path[1:]:
+ if cmd == 'l':
+ i = 0
+ lineTuples = []
+ while i < len(data):
+ if data[i] == 0:
+ # vertical
+ if lineTuples:
+ # flush the existing line command
+ newPath.append( ('l', lineTuples) )
+ lineTuples = []
+ # append the v and then the remaining line coords
+ newPath.append( ('v', [data[i+1]]) )
+ numPathSegmentsReduced += 1
+ elif data[i+1] == 0:
+ if lineTuples:
+ # flush the line command, then append the h and then the remaining line coords
+ newPath.append( ('l', lineTuples) )
+ lineTuples = []
+ newPath.append( ('h', [data[i]]) )
+ numPathSegmentsReduced += 1
+ else:
+ lineTuples.append(data[i])
+ lineTuples.append(data[i+1])
+ i += 2
+ if lineTuples:
+ newPath.append( ('l', lineTuples) )
+ else:
+ newPath.append( (cmd, data) )
+ path = newPath
+
+ # for each h or v, collapse unnecessary coordinates that run in the same direction
+ # i.e. "h-100-100" becomes "h-200" but "h300-100" does not change
+ newPath = [path[0]]
+ for (cmd,data) in path[1:]:
+ if cmd in ['h','v'] and len(data) > 1:
+ newData = []
+ prevCoord = data[0]
+ for coord in data[1:]:
+ if isSameSign(prevCoord, coord):
+ prevCoord += coord
+ numPathSegmentsReduced += 1
+ else:
+ newData.append(prevCoord)
+ prevCoord = coord
+ newData.append(prevCoord)
+ newPath.append( (cmd, newData) )
+ else:
+ newPath.append( (cmd, data) )
+ path = newPath
+
+ newPathStr = serializePath(path)
+ numBytesSavedInPathData += ( len(oldPathStr) - len(newPathStr) )
+ element.setAttribute('d', newPathStr)
+
+def parseListOfPoints(s):
+ """
+ Parse string into a list of points.
+
+ Returns a list of containing an even number of coordinate strings
+ """
+
+ # (wsp)? comma-or-wsp-separated coordinate pairs (wsp)?
+ # coordinate-pair = coordinate comma-or-wsp coordinate
+ # coordinate = sign? integer
+ nums = re.split("\\s*\\,?\\s*", s)
+ i = 0
+ points = []
+ while i < len(nums):
+ x = SVGLength(nums[i])
+ # if we had an odd number of points, return empty
+ if i == len(nums)-1: return []
+ else: y = SVGLength(nums[i+1])
+
+ # if the coordinates were not unitless, return empty
+ if x.units != Unit.NONE or y.units != Unit.NONE: return []
+ points.append( str(x.value) )
+ points.append( str(y.value) )
+ i += 2
+
+ return points
+
+def cleanPolygon(elem):
+ """
+ Remove unnecessary closing point of polygon points attribute
+ """
+ global numPointsRemovedFromPolygon
+
+ pts = parseListOfPoints(elem.getAttribute('points'))
+ N = len(pts)/2
+ if N >= 2:
+ (startx,starty) = (pts[0],pts[0])
+ (endx,endy) = (pts[len(pts)-2],pts[len(pts)-1])
+ if startx == endx and starty == endy:
+ pts = pts[:-2]
+ numPointsRemovedFromPolygon += 1
+ elem.setAttribute('points', scourCoordinates(pts))
+
+def cleanPolyline(elem):
+ """
+ Scour the polyline points attribute
+ """
+ pts = parseListOfPoints(elem.getAttribute('points'))
+ elem.setAttribute('points', scourCoordinates(pts))
+
+def serializePath(pathObj):
+ """
+ Reserializes the path data with some cleanups.
+ """
+ pathStr = ""
+ for (cmd,data) in pathObj:
+ pathStr += cmd
+ pathStr += scourCoordinates(data)
+ return pathStr
+
+def scourCoordinates(data):
+ """
+ Serializes coordinate data with some cleanups:
+ - removes all trailing zeros after the decimal
+ - integerize coordinates if possible
+ - removes extraneous whitespace
+ - adds commas between values in a subcommand if required
+ """
+ coordsStr = ""
+ if data != None:
+ c = 0
+ for coord in data:
+ # add the scoured coordinate to the path string
+ coordsStr += scourLength(coord)
+
+ # only need the comma if the next number is non-negative
+ if c < len(data)-1 and Decimal(data[c+1]) >= 0:
+ coordsStr += ','
+ c += 1
+ return coordsStr
+
+def scourLength(str):
+ length = SVGLength(str)
+ coord = length.value
+
+ # reduce to the proper number of digits
+ coord = Decimal(unicode(coord)) * Decimal(1)
+
+ # integerize if we can
+ if int(coord) == coord: coord = Decimal(unicode(int(coord)))
+
+ # Decimal.trim() is available in Python 2.6+ to trim trailing zeros
+ try:
+ coord = coord.trim()
+ except AttributeError:
+ # trim it ourselves
+ s = unicode(coord)
+ dec = s.find('.')
+ if dec != -1:
+ while s[-1] == '0':
+ s = s[:-1]
+ coord = Decimal(s)
+
+ # Decimal.normalize() will uses scientific notation - if that
+ # string is smaller, then use it
+ normd = coord.normalize()
+ if len(unicode(normd)) < len(unicode(coord)):
+ coord = normd
+
+ return unicode(coord)+Unit.str(length.units)
+
+def embedRasters(element, options) :
+ """
+ Converts raster references to inline images.
+ NOTE: there are size limits to base64-encoding handling in browsers
+ """
+ global numRastersEmbedded
+
+ href = element.getAttributeNS(NS['XLINK'],'href')
+
+ # if xlink:href is set, then grab the id
+ if href != '' and len(href) > 1:
+ # find if href value has filename ext
+ ext = os.path.splitext(os.path.basename(href))[1].lower()[1:]
+
+ # look for 'png', 'jpg', and 'gif' extensions
+ if ext == 'png' or ext == 'jpg' or ext == 'gif':
+
+ # check if href resolves to an existing file
+ if os.path.isfile(href) == False :
+ if href[:7] != 'http://' and os.path.isfile(href) == False :
+ # if this is not an absolute path, set path relative
+ # to script file based on input arg
+ infilename = '.'
+ if options.infilename: infilename = options.infilename
+ href = os.path.join(os.path.dirname(infilename), href)
+
+ rasterdata = ''
+ # test if file exists locally
+ if os.path.isfile(href) == True :
+ # open raster file as raw binary
+ raster = open( href, "rb")
+ rasterdata = raster.read()
+
+ elif href[:7] == 'http://':
+ # raster = open( href, "rb")
+ webFile = urllib.urlopen( href )
+ rasterdata = webFile.read()
+ webFile.close()
+
+ # ... should we remove all images which don't resolve?
+ if rasterdata != '' :
+ # base64-encode raster
+ b64eRaster = base64.b64encode( rasterdata )
+
+ # set href attribute to base64-encoded equivalent
+ if b64eRaster != '':
+ # PNG and GIF both have MIME Type 'image/[ext]', but
+ # JPEG has MIME Type 'image/jpeg'
+ if ext == 'jpg':
+ ext = 'jpeg'
+
+ element.setAttributeNS(NS['XLINK'], 'href', 'data:image/' + ext + ';base64,' + b64eRaster)
+ numRastersEmbedded += 1
+ del b64eRaster
+
+def properlySizeDoc(docElement):
+ # get doc width and height
+ w = SVGLength(docElement.getAttribute('width'))
+ h = SVGLength(docElement.getAttribute('height'))
+
+ # if width/height are not unitless or px then it is not ok to rewrite them into a viewBox
+ if ((w.units != Unit.NONE and w.units != Unit.PX) or
+ (w.units != Unit.NONE and w.units != Unit.PX)):
+ return
+
+ # else we have a statically sized image and we should try to remedy that
+
+ # parse viewBox attribute
+ vbSep = re.split("\\s*\\,?\\s*", docElement.getAttribute('viewBox'), 3)
+ # if we have a valid viewBox we need to check it
+ vbWidth,vbHeight = 0,0
+ if len(vbSep) == 4:
+ try:
+ # if x or y are specified and non-zero then it is not ok to overwrite it
+ vbX = float(vbSep[0])
+ vbY = float(vbSep[1])
+ if vbX != 0 or vbY != 0:
+ return
+
+ # if width or height are not equal to doc width/height then it is not ok to overwrite it
+ vbWidth = float(vbSep[2])
+ vbHeight = float(vbSep[3])
+ if vbWidth != w.value or vbHeight != h.value:
+ return
+ # if the viewBox did not parse properly it is invalid and ok to overwrite it
+ except ValueError:
+ pass
+
+ # at this point it's safe to set the viewBox and remove width/height
+ docElement.setAttribute('viewBox', '0 0 %s %s' % (w.value, h.value))
+ docElement.removeAttribute('width')
+ docElement.removeAttribute('height')
+
+def remapNamespacePrefix(node, oldprefix, newprefix):
+ if node == None or node.nodeType != 1: return
+
+ if node.prefix == oldprefix:
+ localName = node.localName
+ namespace = node.namespaceURI
+ doc = node.ownerDocument
+ parent = node.parentNode
+
+ # create a replacement node
+ newNode = None
+ if newprefix != '':
+ newNode = doc.createElementNS(namespace, newprefix+":"+localName)
+ else:
+ newNode = doc.createElement(localName);
+
+ # add all the attributes
+ attrList = node.attributes
+ for i in range(attrList.length):
+ attr = attrList.item(i)
+ newNode.setAttributeNS( attr.namespaceURI, attr.localName, attr.nodeValue)
+
+ # clone and add all the child nodes
+ for child in node.childNodes:
+ newNode.appendChild(child.cloneNode(true))
+
+ # replace old node with new node
+ node = parent.replaceChild( newNode, node )
+
+ # now do all child nodes
+ for child in node.childNodes :
+ remapNamespacePrefix(child, oldprefix, newprefix)
+
+# this is the main method
+# input is a string representation of the input XML
+# returns a string representation of the output XML
+def scourString(in_string, options=None):
+ if options is None:
+ options = _options_parser.get_default_values()
+ getcontext().prec = options.digits
+ global numAttrsRemoved
+ global numStylePropsFixed
+ global numElemsRemoved
+ global numBytesSavedInColors
+ doc = xml.dom.minidom.parseString(in_string)
+
+ # for whatever reason this does not always remove all inkscape/sodipodi attributes/elements
+ # on the first pass, so we do it multiple times
+ # does it have to do with removal of children affecting the childlist?
+ if options.keep_editor_data == False:
+ while removeNamespacedElements( doc.documentElement, unwanted_ns ) > 0 :
+ pass
+ while removeNamespacedAttributes( doc.documentElement, unwanted_ns ) > 0 :
+ pass
+
+ # remove the xmlns: declarations now
+ xmlnsDeclsToRemove = []
+ attrList = doc.documentElement.attributes
+ for num in range(attrList.length) :
+ if attrList.item(num).nodeValue in unwanted_ns :
+ xmlnsDeclsToRemove.append(attrList.item(num).nodeName)
+
+ for attr in xmlnsDeclsToRemove :
+ doc.documentElement.removeAttribute(attr)
+ numAttrsRemoved += 1
+
+ # ensure namespace for SVG is declared
+ if doc.documentElement.getAttribute('xmlns') != 'http://www.w3.org/2000/svg':
+ doc.documentElement.setAttribute('xmlns', 'http://www.w3.org/2000/svg')
+ # TODO: throw error or warning?
+
+ # check for redundant SVG namespace declaration
+ attrList = doc.documentElement.attributes
+ xmlnsDeclsToRemove = []
+ redundantPrefixes = []
+ for i in range(attrList.length):
+ attr = attrList.item(i)
+ name = attr.nodeName
+ val = attr.nodeValue
+ if name[0:6] == 'xmlns:' and val == 'http://www.w3.org/2000/svg':
+ redundantPrefixes.append(name[6:])
+ xmlnsDeclsToRemove.append(name)
+
+ for attrName in xmlnsDeclsToRemove:
+ doc.documentElement.removeAttribute(attrName)
+
+ for prefix in redundantPrefixes:
+ remapNamespacePrefix(doc.documentElement, prefix, '')
+
+ # repair style (remove unnecessary style properties and change them into XML attributes)
+ numStylePropsFixed = repairStyle(doc.documentElement, options)
+
+ # convert colors to #RRGGBB format
+ if options.simple_colors:
+ numBytesSavedInColors = convertColors(doc.documentElement)
+
+ # remove empty defs, metadata, g
+ # NOTE: these elements will be removed even if they have (invalid) text nodes
+ elemsToRemove = []
+ for tag in ['defs', 'metadata', 'g'] :
+ for elem in doc.documentElement.getElementsByTagNameNS(NS['SVG'], tag) :
+ removeElem = not elem.hasChildNodes()
+ if removeElem == False :
+ for child in elem.childNodes :
+ if child.nodeType in [1, 3, 4, 8] :
+ break
+ else:
+ removeElem = True
+ if removeElem :
+ elem.parentNode.removeChild(elem)
+ numElemsRemoved += 1
+
+ # remove unreferenced gradients/patterns outside of defs
+ while removeUnreferencedElements(doc) > 0:
+ pass
+
+ if options.strip_ids:
+ bContinueLooping = True
+ while bContinueLooping:
+ identifiedElements = findElementsWithId(doc.documentElement)
+ referencedIDs = findReferencedElements(doc.documentElement)
+ bContinueLooping = (removeUnreferencedIDs(referencedIDs, identifiedElements) > 0)
+
+ if options.group_collapse:
+ while removeNestedGroups(doc.documentElement) > 0:
+ pass
+
+ while removeDuplicateGradientStops(doc) > 0:
+ pass
+
+ # remove gradients that are only referenced by one other gradient
+ while collapseSinglyReferencedGradients(doc) > 0:
+ pass
+
+ # remove duplicate gradients
+ while removeDuplicateGradients(doc) > 0:
+ pass
+
+ # clean path data
+ for elem in doc.documentElement.getElementsByTagNameNS(NS['SVG'], 'path') :
+ if elem.getAttribute('d') == '':
+ elem.parentNode.removeChild(elem)
+ else:
+ cleanPath(elem)
+
+ # remove unnecessary closing point of polygons and scour points
+ for polygon in doc.documentElement.getElementsByTagNameNS(NS['SVG'], 'polygon') :
+ cleanPolygon(polygon)
+
+ # scour points of polyline
+ for polyline in doc.documentElement.getElementsByTagNameNS(NS['SVG'], 'polyline') :
+ cleanPolygon(polyline)
+
+ # scour lengths (including coordinates)
+ for type in ['svg', 'image', 'rect', 'circle', 'ellipse', 'line', 'linearGradient', 'radialGradient', 'stop']:
+ for elem in doc.documentElement.getElementsByTagNameNS(NS['SVG'], type):
+ for attr in ['x', 'y', 'width', 'height', 'cx', 'cy', 'r', 'rx', 'ry', 'x1', 'y1', 'x2', 'y2', 'fx', 'fy', 'offset']:
+ if elem.getAttribute(attr) != '':
+ elem.setAttribute(attr, scourLength(elem.getAttribute(attr)))
+
+ # remove default values of attributes
+# print doc.documentElement.toxml()
+ numAttrsRemoved += removeDefaultAttributeValues(doc.documentElement, options)
+
+ # convert rasters references to base64-encoded strings
+ if options.embed_rasters:
+ for elem in doc.documentElement.getElementsByTagNameNS(NS['SVG'], 'image') :
+ embedRasters(elem, options)
+
+ # properly size the SVG document (ideally width/height should be 100% with a viewBox)
+ properlySizeDoc(doc.documentElement)
+
+ # output the document as a pretty string with a single space for indent
+ # NOTE: removed pretty printing because of this problem:
+ # http://ronrothman.com/public/leftbraned/xml-dom-minidom-toprettyxml-and-silly-whitespace/
+# out_string = doc.documentElement.toprettyxml(' ')
+ out_string = doc.documentElement.toxml()
+
+ # now strip out empty lines
+ lines = []
+ # Get rid of empty lines
+ for line in out_string.splitlines(True):
+ if line.strip():
+ lines.append(line)
+
+ # return the string stripped of empty lines
+ if options.strip_xml_prolog == False:
+ xmlprolog = '<?xml version="1.0" encoding="UTF-8" standalone="no"?>\n'
+ else:
+ xmlprolog = ""
+
+ return xmlprolog + "".join(lines)
+
+# used mostly by unit tests
+# input is a filename
+# returns the minidom doc representation of the SVG
+def scourXmlFile(filename, options=None):
+ in_string = open(filename).read()
+ out_string = scourString(in_string, options)
+ return xml.dom.minidom.parseString(out_string.encode('utf-8'))
+
+# GZ: Seems most other commandline tools don't do this, is it really wanted?
+class HeaderedFormatter(optparse.IndentedHelpFormatter):
+ """
+ Show application name, version number, and copyright statement
+ above usage information.
+ """
+ def format_usage(self, usage):
+ return "%s %s\n%s\n%s" % (APP, VER, COPYRIGHT,
+ optparse.IndentedHelpFormatter.format_usage(self, usage))
+
+# GZ: would prefer this to be in a function or class scope, but tests etc need
+# access to the defaults anyway
+_options_parser = optparse.OptionParser(
+ usage="%prog [-i input.svg] [-o output.svg] [OPTIONS]",
+ description=("If the input/output files are specified with a svgz"
+ " extension, then compressed SVG is assumed. If the input file is not"
+ " specified, stdin is used. If the output file is not specified, "
+ " stdout is used."),
+ formatter=HeaderedFormatter(max_help_position=30),
+ version=VER)
+
+_options_parser.add_option("--disable-simplify-colors",
+ action="store_false", dest="simple_colors", default=True,
+ help="won't convert all colors to #RRGGBB format")
+_options_parser.add_option("--disable-style-to-xml",
+ action="store_false", dest="style_to_xml", default=True,
+ help="won't convert styles into XML attributes")
+_options_parser.add_option("--disable-group-collapsing",
+ action="store_false", dest="group_collapse", default=True,
+ help="won't collapse <g> elements")
+_options_parser.add_option("--enable-id-stripping",
+ action="store_true", dest="strip_ids", default=False,
+ help="remove all un-referenced ID attributes")
+_options_parser.add_option("--disable-embed-rasters",
+ action="store_false", dest="embed_rasters", default=True,
+ help="won't embed rasters as base64-encoded data")
+_options_parser.add_option("--keep-editor-data",
+ action="store_true", dest="keep_editor_data", default=False,
+ help="won't remove Inkscape, Sodipodi or Adobe Illustrator elements and attributes")
+_options_parser.add_option("--strip-xml-prolog",
+ action="store_true", dest="strip_xml_prolog", default=False,
+ help="won't output the <?xml ?> prolog")
+
+# GZ: this is confusing, most people will be thinking in terms of
+# decimal places, which is not what decimal precision is doing
+_options_parser.add_option("-p", "--set-precision",
+ action="store", type=int, dest="digits", default=5,
+ help="set number of significant digits (default: %default)")
+_options_parser.add_option("-i",
+ action="store", dest="infilename", help=optparse.SUPPRESS_HELP)
+_options_parser.add_option("-o",
+ action="store", dest="outfilename", help=optparse.SUPPRESS_HELP)
+
+def maybe_gziped_file(filename, mode="r"):
+ if os.path.splitext(filename)[1].lower() in (".svgz", ".gz"):
+ return gzip.GzipFile(filename, mode)
+ return file(filename, mode)
+
+def parse_args(args=None):
+ options, rargs = _options_parser.parse_args(args)
+
+ if rargs:
+ _options_parser.error("Additional arguments not handled: %r, see --help" % rargs)
+ if options.digits < 0:
+ _options_parser.error("Can't have negative significant digits, see --help")
+ if options.infilename:
+ infile = maybe_gziped_file(options.infilename)
+ # GZ: could catch a raised IOError here and report
+ else:
+ # GZ: could sniff for gzip compression here
+ infile = sys.stdin
+ if options.outfilename:
+ outfile = maybe_gziped_file(options.outfilename, "w")
+ else:
+ outfile = sys.stdout
+
+ return options, [infile, outfile]
+
+def getReport():
+ return ' Number of elements removed: ' + str(numElemsRemoved) + \
+ '\n Number of attributes removed: ' + str(numAttrsRemoved) + \
+ '\n Number of unreferenced id attributes removed: ' + str(numIDsRemoved) + \
+ '\n Number of style properties fixed: ' + str(numStylePropsFixed) + \
+ '\n Number of raster images embedded inline: ' + str(numRastersEmbedded) + \
+ '\n Number of path segments reduced/removed: ' + str(numPathSegmentsReduced) + \
+ '\n Number of bytes saved in path data: ' + str(numBytesSavedInPathData) + \
+ '\n Number of bytes saved in colors: ' + str(numBytesSavedInColors) + \
+ '\n Number of points removed from polygons: ' + str(numPointsRemovedFromPolygon)
+
+if __name__ == '__main__':
+ if sys.platform == "win32":
+ from time import clock as get_tick
+ else:
+ # GZ: is this different from time.time() in any way?
+ def get_tick():
+ return os.times()[0]
+
+ start = get_tick()
+
+ options, (input, output) = parse_args()
+
+ print >>sys.stderr, "%s %s\n%s" % (APP, VER, COPYRIGHT)
+
+ # do the work
+ in_string = input.read()
+ out_string = scourString(in_string, options).encode("UTF-8")
+ output.write(out_string)
+
+ # Close input and output files
+ input.close()
+ output.close()
+
+ end = get_tick()
+
+ # GZ: unless silenced by -q or something?
+ # GZ: not using globals would be good too
+ print >>sys.stderr, ' File:', input.name, \
+ '\n Time taken:', str(end-start) + 's\n', \
+ getReport()
+
+ oldsize = len(in_string)
+ newsize = len(out_string)
+ sizediff = (newsize / oldsize) * 100
+ print >>sys.stderr, ' Original file size:', oldsize, 'bytes;', \
+ 'new file size:', newsize, 'bytes (' + str(sizediff)[:5] + '%)'
+
+
diff --git a/share/extensions/svg_regex.py b/share/extensions/svg_regex.py
--- /dev/null
@@ -0,0 +1,281 @@
+# This software is OSI Certified Open Source Software.
+# OSI Certified is a certification mark of the Open Source Initiative.
+#
+# Copyright (c) 2006, Enthought, Inc.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+# list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+# * Neither the name of Enthought, Inc. nor the names of its contributors may
+# be used to endorse or promote products derived from this software without
+# specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+# ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+# ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+""" Small hand-written recursive descent parser for SVG <path> data.
+
+
+In [1]: from svg_regex import svg_parser
+
+In [3]: svg_parser.parse('M 10,20 30,40V50 60 70')
+Out[3]: [('M', [(10.0, 20.0), (30.0, 40.0)]), ('V', [50.0, 60.0, 70.0])]
+
+In [4]: svg_parser.parse('M 0.6051.5') # An edge case
+Out[4]: [('M', [(0.60509999999999997, 0.5)])]
+
+In [5]: svg_parser.parse('M 100-200') # Another edge case
+Out[5]: [('M', [(100.0, -200.0)])]
+"""
+
+import re
+
+
+# Sentinel.
+class _EOF(object):
+ def __repr__(self):
+ return 'EOF'
+EOF = _EOF()
+
+lexicon = [
+ ('float', r'[-\+]?(?:(?:[0-9]*\.[0-9]+)|(?:[0-9]+\.?))(?:[Ee][-\+]?[0-9]+)?'),
+ ('int', r'[-\+]?[0-9]+'),
+ ('command', r'[AaCcHhLlMmQqSsTtVvZz]'),
+]
+
+
+class Lexer(object):
+ """ Break SVG path data into tokens.
+
+ The SVG spec requires that tokens are greedy. This lexer relies on Python's
+ regexes defaulting to greediness.
+
+ This style of implementation was inspired by this article:
+
+ http://www.gooli.org/blog/a-simple-lexer-in-python/
+ """
+ def __init__(self, lexicon):
+ self.lexicon = lexicon
+ parts = []
+ for name, regex in lexicon:
+ parts.append('(?P<%s>%s)' % (name, regex))
+ self.regex_string = '|'.join(parts)
+ self.regex = re.compile(self.regex_string)
+
+ def lex(self, text):
+ """ Yield (token_type, str_data) tokens.
+
+ The last token will be (EOF, None) where EOF is the singleton object
+ defined in this module.
+ """
+ for match in self.regex.finditer(text):
+ for name, _ in self.lexicon:
+ m = match.group(name)
+ if m is not None:
+ yield (name, m)
+ break
+ yield (EOF, None)
+
+svg_lexer = Lexer(lexicon)
+
+
+class SVGPathParser(object):
+ """ Parse SVG <path> data into a list of commands.
+
+ Each distinct command will take the form of a tuple (command, data). The
+ `command` is just the character string that starts the command group in the
+ <path> data, so 'M' for absolute moveto, 'm' for relative moveto, 'Z' for
+ closepath, etc. The kind of data it carries with it depends on the command.
+ For 'Z' (closepath), it's just None. The others are lists of individual
+ argument groups. Multiple elements in these lists usually mean to repeat the
+ command. The notable exception is 'M' (moveto) where only the first element
+ is truly a moveto. The remainder are implicit linetos.
+
+ See the SVG documentation for the interpretation of the individual elements
+ for each command.
+
+ The main method is `parse(text)`. It can only consume actual strings, not
+ filelike objects or iterators.
+ """
+
+ def __init__(self, lexer=svg_lexer):
+ self.lexer = lexer
+
+ self.command_dispatch = {
+ 'Z': self.rule_closepath,
+ 'z': self.rule_closepath,
+ 'M': self.rule_moveto_or_lineto,
+ 'm': self.rule_moveto_or_lineto,
+ 'L': self.rule_moveto_or_lineto,
+ 'l': self.rule_moveto_or_lineto,
+ 'H': self.rule_orthogonal_lineto,
+ 'h': self.rule_orthogonal_lineto,
+ 'V': self.rule_orthogonal_lineto,
+ 'v': self.rule_orthogonal_lineto,
+ 'C': self.rule_curveto3,
+ 'c': self.rule_curveto3,
+ 'S': self.rule_curveto2,
+ 's': self.rule_curveto2,
+ 'Q': self.rule_curveto2,
+ 'q': self.rule_curveto2,
+ 'T': self.rule_curveto1,
+ 't': self.rule_curveto1,
+ 'A': self.rule_elliptical_arc,
+ 'a': self.rule_elliptical_arc,
+ }
+
+# self.number_tokens = set(['int', 'float'])
+ self.number_tokens = list(['int', 'float'])
+
+ def parse(self, text):
+ """ Parse a string of SVG <path> data.
+ """
+ next = self.lexer.lex(text).next
+ token = next()
+ return self.rule_svg_path(next, token)
+
+ def rule_svg_path(self, next, token):
+ commands = []
+ while token[0] is not EOF:
+ if token[0] != 'command':
+ raise SyntaxError("expecting a command; got %r" % (token,))
+ rule = self.command_dispatch[token[1]]
+ command_group, token = rule(next, token)
+ commands.append(command_group)
+ return commands
+
+ def rule_closepath(self, next, token):
+ command = token[1]
+ token = next()
+ return (command, None), token
+
+ def rule_moveto_or_lineto(self, next, token):
+ command = token[1]
+ token = next()
+ coordinates = []
+ while token[0] in self.number_tokens:
+ pair, token = self.rule_coordinate_pair(next, token)
+ coordinates.append(pair)
+ return (command, coordinates), token
+
+ def rule_orthogonal_lineto(self, next, token):
+ command = token[1]
+ token = next()
+ coordinates = []
+ while token[0] in self.number_tokens:
+ coord, token = self.rule_coordinate(next, token)
+ coordinates.append(coord)
+ return (command, coordinates), token
+
+ def rule_curveto3(self, next, token):
+ command = token[1]
+ token = next()
+ coordinates = []
+ while token[0] in self.number_tokens:
+ pair1, token = self.rule_coordinate_pair(next, token)
+ pair2, token = self.rule_coordinate_pair(next, token)
+ pair3, token = self.rule_coordinate_pair(next, token)
+ coordinates.append((pair1, pair2, pair3))
+ return (command, coordinates), token
+
+ def rule_curveto2(self, next, token):
+ command = token[1]
+ token = next()
+ coordinates = []
+ while token[0] in self.number_tokens:
+ pair1, token = self.rule_coordinate_pair(next, token)
+ pair2, token = self.rule_coordinate_pair(next, token)
+ coordinates.append((pair1, pair2))
+ return (command, coordinates), token
+
+ def rule_curveto1(self, next, token):
+ command = token[1]
+ token = next()
+ coordinates = []
+ while token[0] in self.number_tokens:
+ pair1, token = self.rule_coordinate_pair(next, token)
+ coordinates.append(pair1)
+ return (command, coordinates), token
+
+ def rule_elliptical_arc(self, next, token):
+ command = token[1]
+ token = next()
+ arguments = []
+ while token[0] in self.number_tokens:
+ rx = float(token[1])
+ if rx < 0.0:
+ raise SyntaxError("expecting a nonnegative number; got %r" % (token,))
+
+ token = next()
+ if token[0] not in self.number_tokens:
+ raise SyntaxError("expecting a number; got %r" % (token,))
+ ry = float(token[1])
+ if ry < 0.0:
+ raise SyntaxError("expecting a nonnegative number; got %r" % (token,))
+
+ token = next()
+ if token[0] not in self.number_tokens:
+ raise SyntaxError("expecting a number; got %r" % (token,))
+ axis_rotation = float(token[1])
+
+ token = next()
+ if token[1] not in ('0', '1'):
+ raise SyntaxError("expecting a boolean flag; got %r" % (token,))
+ large_arc_flag = bool(int(token[1]))
+
+ token = next()
+ if token[1] not in ('0', '1'):
+ raise SyntaxError("expecting a boolean flag; got %r" % (token,))
+ sweep_flag = bool(int(token[1]))
+
+ token = next()
+ if token[0] not in self.number_tokens:
+ raise SyntaxError("expecting a number; got %r" % (token,))
+ x = float(token[1])
+
+ token = next()
+ if token[0] not in self.number_tokens:
+ raise SyntaxError("expecting a number; got %r" % (token,))
+ y = float(token[1])
+
+ token = next()
+ arguments.append(((rx,ry), axis_rotation, large_arc_flag, sweep_flag, (x,y)))
+
+ return (command, arguments), token
+
+ def rule_coordinate(self, next, token):
+ if token[0] not in self.number_tokens:
+ raise SyntaxError("expecting a number; got %r" % (token,))
+ x = float(token[1])
+ token = next()
+ return x, token
+
+
+ def rule_coordinate_pair(self, next, token):
+ # Inline these since this rule is so common.
+ if token[0] not in self.number_tokens:
+ raise SyntaxError("expecting a number; got %r" % (token,))
+ x = float(token[1])
+ token = next()
+ if token[0] not in self.number_tokens:
+ raise SyntaxError("expecting a number; got %r" % (token,))
+ y = float(token[1])
+ token = next()
+ return (x,y), token
+
+
+svg_parser = SVGPathParser()