share/extensions/scour.py

   1 #!/usr/bin/env python
   2 # -*- coding: utf-8 -*-
   3
   4 #  Scour
   5 #
   6 #  Copyright 2009 Jeff Schiller
   7 #
   8 #  This file is part of Scour, http://www.codedread.com/scour/
   9 #
  10 #   Licensed under the Apache License, Version 2.0 (the "License");
  11 #   you may not use this file except in compliance with the License.
  12 #   You may obtain a copy of the License at
  13 #
  14 #       http://www.apache.org/licenses/LICENSE-2.0
  15 #
  16 #   Unless required by applicable law or agreed to in writing, software
  17 #   distributed under the License is distributed on an "AS IS" BASIS,
  18 #   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  19 #   See the License for the specific language governing permissions and
  20 #   limitations under the License.
  21
  22 # Notes:
  23
  24 # rubys' path-crunching ideas here: http://intertwingly.net/code/svgtidy/spec.rb
  25 # (and implemented here: http://intertwingly.net/code/svgtidy/svgtidy.rb )
  26
  27 # Yet more ideas here: http://wiki.inkscape.org/wiki/index.php/Save_Cleaned_SVG
  28 #
  29 # * Process Transformations
  30 #  * Collapse all group based transformations
  31
  32 # Even more ideas here: http://esw.w3.org/topic/SvgTidy
  33 #  * analysis of path elements to see if rect can be used instead? (must also need to look
  34 #    at rounded corners)
  35
  36 # Next Up:
  37 # + remove unused attributes in parent elements
  38 # + prevent elements from being stripped if they are referenced in a <style> element
  39 #   (for instance, filter, marker, pattern) - need a crude CSS parser
  40 # - add an option to remove ids if they match the Inkscape-style of IDs
  41 # - investigate point-reducing algorithms
  42 # - parse transform attribute
  43 # - if a <g> has only one element in it, collapse the <g> (ensure transform, etc are carried down)
  44 # - option to remove metadata
  45
  46 # necessary to get true division
  47 from __future__ import division
  48
  49 import os
  50 import sys
  51 import xml.dom.minidom
  52 import re
  53 import math
  54 import base64
  55 import urllib
  56 from svg_regex import svg_parser
  57 import gzip
  58 import optparse
  59 from yocto_css import parseCssString
  60
  61 # Python 2.3- did not have Decimal
  62 try:
  63         from decimal import *
  64 except ImportError:
  65         from fixedpoint import *
  66         Decimal = FixedPoint
  67
  68 APP = 'scour'
  69 VER = '0.20'
  70 COPYRIGHT = 'Copyright Jeff Schiller, 2009'
  71
  72 NS = {  'SVG':          'http://www.w3.org/2000/svg',
  73                 'XLINK':        'http://www.w3.org/1999/xlink',
  74                 'SODIPODI': 'http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd',
  75                 'INKSCAPE': 'http://www.inkscape.org/namespaces/inkscape',
  76                 'ADOBE_ILLUSTRATOR': 'http://ns.adobe.com/AdobeIllustrator/10.0/',
  77                 'ADOBE_GRAPHS': 'http://ns.adobe.com/Graphs/1.0/',
  78                 'ADOBE_SVG_VIEWER': 'http://ns.adobe.com/AdobeSVGViewerExtensions/3.0/',
  79                 'ADOBE_VARIABLES': 'http://ns.adobe.com/Variables/1.0/',
  80                 'ADOBE_SFW': 'http://ns.adobe.com/SaveForWeb/1.0/',
  81                 'ADOBE_EXTENSIBILITY': 'http://ns.adobe.com/Extensibility/1.0/',
  82                 'ADOBE_FLOWS': 'http://ns.adobe.com/Flows/1.0/',
  83                 'ADOBE_IMAGE_REPLACEMENT': 'http://ns.adobe.com/ImageReplacement/1.0/',
  84                 'ADOBE_CUSTOM': 'http://ns.adobe.com/GenericCustomNamespace/1.0/',
  85                 'ADOBE_XPATH': 'http://ns.adobe.com/XPath/1.0/'
  86                 }
  87
  88 unwanted_ns = [ NS['SODIPODI'], NS['INKSCAPE'], NS['ADOBE_ILLUSTRATOR'],
  89                                 NS['ADOBE_GRAPHS'], NS['ADOBE_SVG_VIEWER'], NS['ADOBE_VARIABLES'],
  90                                 NS['ADOBE_SFW'], NS['ADOBE_EXTENSIBILITY'], NS['ADOBE_FLOWS'],
  91                                 NS['ADOBE_IMAGE_REPLACEMENT'], NS['ADOBE_CUSTOM'], NS['ADOBE_XPATH'] ]
  92
  93 svgAttributes = [
  94                                 'clip-rule',
  95                                 'display',
  96                                 'fill',
  97                                 'fill-opacity',
  98                                 'fill-rule',
  99                                 'filter',
 100                                 'font-family',
 101                                 'font-size',
 102                                 'font-stretch',
 103                                 'font-style',
 104                                 'font-variant',
 105                                 'font-weight',
 106                                 'line-height',
 107                                 'marker',
 108                                 'opacity',
 109                                 'overflow',
 110                                 'stop-color',
 111                                 'stop-opacity',
 112                                 'stroke',
 113                                 'stroke-dashoffset',
 114                                 'stroke-linecap',
 115                                 'stroke-linejoin',
 116                                 'stroke-miterlimit',
 117                                 'stroke-opacity',
 118                                 'stroke-width',
 119                                 'visibility'
 120                                 ]
 121
 122 colors = {
 123         'aliceblue': 'rgb(240, 248, 255)',
 124         'antiquewhite': 'rgb(250, 235, 215)',
 125         'aqua': 'rgb( 0, 255, 255)',
 126         'aquamarine': 'rgb(127, 255, 212)',
 127         'azure': 'rgb(240, 255, 255)',
 128         'beige': 'rgb(245, 245, 220)',
 129         'bisque': 'rgb(255, 228, 196)',
 130         'black': 'rgb( 0, 0, 0)',
 131         'blanchedalmond': 'rgb(255, 235, 205)',
 132         'blue': 'rgb( 0, 0, 255)',
 133         'blueviolet': 'rgb(138, 43, 226)',
 134         'brown': 'rgb(165, 42, 42)',
 135         'burlywood': 'rgb(222, 184, 135)',
 136         'cadetblue': 'rgb( 95, 158, 160)',
 137         'chartreuse': 'rgb(127, 255, 0)',
 138         'chocolate': 'rgb(210, 105, 30)',
 139         'coral': 'rgb(255, 127, 80)',
 140         'cornflowerblue': 'rgb(100, 149, 237)',
 141         'cornsilk': 'rgb(255, 248, 220)',
 142         'crimson': 'rgb(220, 20, 60)',
 143         'cyan': 'rgb( 0, 255, 255)',
 144         'darkblue': 'rgb( 0, 0, 139)',
 145         'darkcyan': 'rgb( 0, 139, 139)',
 146         'darkgoldenrod': 'rgb(184, 134, 11)',
 147         'darkgray': 'rgb(169, 169, 169)',
 148         'darkgreen': 'rgb( 0, 100, 0)',
 149         'darkgrey': 'rgb(169, 169, 169)',
 150         'darkkhaki': 'rgb(189, 183, 107)',
 151         'darkmagenta': 'rgb(139, 0, 139)',
 152         'darkolivegreen': 'rgb( 85, 107, 47)',
 153         'darkorange': 'rgb(255, 140, 0)',
 154         'darkorchid': 'rgb(153, 50, 204)',
 155         'darkred': 'rgb(139, 0, 0)',
 156         'darksalmon': 'rgb(233, 150, 122)',
 157         'darkseagreen': 'rgb(143, 188, 143)',
 158         'darkslateblue': 'rgb( 72, 61, 139)',
 159         'darkslategray': 'rgb( 47, 79, 79)',
 160         'darkslategrey': 'rgb( 47, 79, 79)',
 161         'darkturquoise': 'rgb( 0, 206, 209)',
 162         'darkviolet': 'rgb(148, 0, 211)',
 163         'deeppink': 'rgb(255, 20, 147)',
 164         'deepskyblue': 'rgb( 0, 191, 255)',
 165         'dimgray': 'rgb(105, 105, 105)',
 166         'dimgrey': 'rgb(105, 105, 105)',
 167         'dodgerblue': 'rgb( 30, 144, 255)',
 168         'firebrick': 'rgb(178, 34, 34)',
 169         'floralwhite': 'rgb(255, 250, 240)',
 170         'forestgreen': 'rgb( 34, 139, 34)',
 171         'fuchsia': 'rgb(255, 0, 255)',
 172         'gainsboro': 'rgb(220, 220, 220)',
 173         'ghostwhite': 'rgb(248, 248, 255)',
 174         'gold': 'rgb(255, 215, 0)',
 175         'goldenrod': 'rgb(218, 165, 32)',
 176         'gray': 'rgb(128, 128, 128)',
 177         'grey': 'rgb(128, 128, 128)',
 178         'green': 'rgb( 0, 128, 0)',
 179         'greenyellow': 'rgb(173, 255, 47)',
 180         'honeydew': 'rgb(240, 255, 240)',
 181         'hotpink': 'rgb(255, 105, 180)',
 182         'indianred': 'rgb(205, 92, 92)',
 183         'indigo': 'rgb( 75, 0, 130)',
 184         'ivory': 'rgb(255, 255, 240)',
 185         'khaki': 'rgb(240, 230, 140)',
 186         'lavender': 'rgb(230, 230, 250)',
 187         'lavenderblush': 'rgb(255, 240, 245)',
 188         'lawngreen': 'rgb(124, 252, 0)',
 189         'lemonchiffon': 'rgb(255, 250, 205)',
 190         'lightblue': 'rgb(173, 216, 230)',
 191         'lightcoral': 'rgb(240, 128, 128)',
 192         'lightcyan': 'rgb(224, 255, 255)',
 193         'lightgoldenrodyellow': 'rgb(250, 250, 210)',
 194         'lightgray': 'rgb(211, 211, 211)',
 195         'lightgreen': 'rgb(144, 238, 144)',
 196         'lightgrey': 'rgb(211, 211, 211)',
 197         'lightpink': 'rgb(255, 182, 193)',
 198         'lightsalmon': 'rgb(255, 160, 122)',
 199         'lightseagreen': 'rgb( 32, 178, 170)',
 200         'lightskyblue': 'rgb(135, 206, 250)',
 201         'lightslategray': 'rgb(119, 136, 153)',
 202         'lightslategrey': 'rgb(119, 136, 153)',
 203         'lightsteelblue': 'rgb(176, 196, 222)',
 204         'lightyellow': 'rgb(255, 255, 224)',
 205         'lime': 'rgb( 0, 255, 0)',
 206         'limegreen': 'rgb( 50, 205, 50)',
 207         'linen': 'rgb(250, 240, 230)',
 208         'magenta': 'rgb(255, 0, 255)',
 209         'maroon': 'rgb(128, 0, 0)',
 210         'mediumaquamarine': 'rgb(102, 205, 170)',
 211         'mediumblue': 'rgb( 0, 0, 205)',
 212         'mediumorchid': 'rgb(186, 85, 211)',
 213         'mediumpurple': 'rgb(147, 112, 219)',
 214         'mediumseagreen': 'rgb( 60, 179, 113)',
 215         'mediumslateblue': 'rgb(123, 104, 238)',
 216         'mediumspringgreen': 'rgb( 0, 250, 154)',
 217         'mediumturquoise': 'rgb( 72, 209, 204)',
 218         'mediumvioletred': 'rgb(199, 21, 133)',
 219         'midnightblue': 'rgb( 25, 25, 112)',
 220         'mintcream': 'rgb(245, 255, 250)',
 221         'mistyrose': 'rgb(255, 228, 225)',
 222         'moccasin': 'rgb(255, 228, 181)',
 223         'navajowhite': 'rgb(255, 222, 173)',
 224         'navy': 'rgb( 0, 0, 128)',
 225         'oldlace': 'rgb(253, 245, 230)',
 226         'olive': 'rgb(128, 128, 0)',
 227         'olivedrab': 'rgb(107, 142, 35)',
 228         'orange': 'rgb(255, 165, 0)',
 229         'orangered': 'rgb(255, 69, 0)',
 230         'orchid': 'rgb(218, 112, 214)',
 231         'palegoldenrod': 'rgb(238, 232, 170)',
 232         'palegreen': 'rgb(152, 251, 152)',
 233         'paleturquoise': 'rgb(175, 238, 238)',
 234         'palevioletred': 'rgb(219, 112, 147)',
 235         'papayawhip': 'rgb(255, 239, 213)',
 236         'peachpuff': 'rgb(255, 218, 185)',
 237         'peru': 'rgb(205, 133, 63)',
 238         'pink': 'rgb(255, 192, 203)',
 239         'plum': 'rgb(221, 160, 221)',
 240         'powderblue': 'rgb(176, 224, 230)',
 241         'purple': 'rgb(128, 0, 128)',
 242         'red': 'rgb(255, 0, 0)',
 243         'rosybrown': 'rgb(188, 143, 143)',
 244         'royalblue': 'rgb( 65, 105, 225)',
 245         'saddlebrown': 'rgb(139, 69, 19)',
 246         'salmon': 'rgb(250, 128, 114)',
 247         'sandybrown': 'rgb(244, 164, 96)',
 248         'seagreen': 'rgb( 46, 139, 87)',
 249         'seashell': 'rgb(255, 245, 238)',
 250         'sienna': 'rgb(160, 82, 45)',
 251         'silver': 'rgb(192, 192, 192)',
 252         'skyblue': 'rgb(135, 206, 235)',
 253         'slateblue': 'rgb(106, 90, 205)',
 254         'slategray': 'rgb(112, 128, 144)',
 255         'slategrey': 'rgb(112, 128, 144)',
 256         'snow': 'rgb(255, 250, 250)',
 257         'springgreen': 'rgb( 0, 255, 127)',
 258         'steelblue': 'rgb( 70, 130, 180)',
 259         'tan': 'rgb(210, 180, 140)',
 260         'teal': 'rgb( 0, 128, 128)',
 261         'thistle': 'rgb(216, 191, 216)',
 262         'tomato': 'rgb(255, 99, 71)',
 263         'turquoise': 'rgb( 64, 224, 208)',
 264         'violet': 'rgb(238, 130, 238)',
 265         'wheat': 'rgb(245, 222, 179)',
 266         'white': 'rgb(255, 255, 255)',
 267         'whitesmoke': 'rgb(245, 245, 245)',
 268         'yellow': 'rgb(255, 255, 0)',
 269         'yellowgreen': 'rgb(154, 205, 50)',
 270         }
 271
 272 def isSameSign(a,b): return (a <= 0 and b <= 0) or (a >= 0 and b >= 0)
 273
 274 coord = re.compile("\\-?\\d+\\.?\\d*")
 275 scinumber = re.compile("[\\-\\+]?(\\d*\\.?)?\\d+[eE][\\-\\+]?\\d+")
 276 number = re.compile("[\\-\\+]?(\\d*\\.?)?\\d+")
 277 sciExponent = re.compile("[eE]([\\-\\+]?\\d+)")
 278 unit = re.compile("(em|ex|px|pt|pc|cm|mm|in|\\%){1,1}$")
 279
 280 class Unit(object):
 281         INVALID = -1
 282         NONE = 0
 283         PCT = 1
 284         PX = 2
 285         PT = 3
 286         PC = 4
 287         EM = 5
 288         EX = 6
 289         CM = 7
 290         MM = 8
 291         IN = 9
 292
 293 #       @staticmethod
 294         def get(str):
 295                 # GZ: shadowing builtins like 'str' is generally bad form
 296                 # GZ: encoding stuff like this in a dict makes for nicer code
 297                 if str == None or str == '': return Unit.NONE
 298                 elif str == '%': return Unit.PCT
 299                 elif str == 'px': return Unit.PX
 300                 elif str == 'pt': return Unit.PT
 301                 elif str == 'pc': return Unit.PC
 302                 elif str == 'em': return Unit.EM
 303                 elif str == 'ex': return Unit.EX
 304                 elif str == 'cm': return Unit.CM
 305                 elif str == 'mm': return Unit.MM
 306                 elif str == 'in': return Unit.IN
 307                 return Unit.INVALID
 308
 309 #       @staticmethod
 310         def str(u):
 311                 if u == Unit.NONE: return ''
 312                 elif u == Unit.PCT: return '%'
 313                 elif u == Unit.PX: return 'px'
 314                 elif u == Unit.PT: return 'pt'
 315                 elif u == Unit.PC: return 'pc'
 316                 elif u == Unit.EM: return 'em'
 317                 elif u == Unit.EX: return 'ex'
 318                 elif u == Unit.CM: return 'cm'
 319                 elif u == Unit.MM: return 'mm'
 320                 elif u == Unit.IN: return 'in'
 321                 return 'INVALID'
 322
 323         get = staticmethod(get)
 324         str = staticmethod(str)
 325
 326 class SVGLength(object):
 327         def __init__(self, str):
 328                 try: # simple unitless and no scientific notation
 329                         self.value = float(str)
 330                         if int(self.value) == self.value:
 331                                 self.value = int(self.value)
 332                         self.units = Unit.NONE
 333                 except ValueError:
 334                         # we know that the length string has an exponent, a unit, both or is invalid
 335
 336                         # parse out number, exponent and unit
 337                         self.value = 0
 338                         unitBegin = 0
 339                         scinum = scinumber.match(str)
 340                         if scinum != None:
 341                                 # this will always match, no need to check it
 342                                 numMatch = number.match(str)
 343                                 expMatch = sciExponent.search(str, numMatch.start(0))
 344                                 self.value = (float(numMatch.group(0)) *
 345                                         10 ** float(expMatch.group(1)))
 346                                 unitBegin = expMatch.end(1)
 347                         else:
 348                                 # unit or invalid
 349                                 numMatch = number.match(str)
 350                                 if numMatch != None:
 351                                         self.value = float(numMatch.group(0))
 352                                         unitBegin = numMatch.end(0)
 353
 354                         if int(self.value) == self.value:
 355                                 self.value = int(self.value)
 356
 357                         if unitBegin != 0 :
 358                                 unitMatch = unit.search(str, unitBegin)
 359                                 if unitMatch != None :
 360                                         self.units = Unit.get(unitMatch.group(0))
 361
 362                         # invalid
 363                         else:
 364                                 # TODO: this needs to set the default for the given attribute (how?)
 365                                 self.value = 0
 366                                 self.units = Unit.INVALID
 367
 368 # returns the length of a property
 369 # TODO: eventually use the above class once it is complete
 370 def getSVGLength(value):
 371         try:
 372                 v = float(value)
 373         except ValueError:
 374                 coordMatch = coord.match(value)
 375                 if coordMatch != None:
 376                         unitMatch = unit.search(value, coordMatch.start(0))
 377                 v = value
 378         return v
 379
 380 def findElementById(node, id):
 381         if node == None or node.nodeType != 1: return None
 382         if node.getAttribute('id') == id: return node
 383         for child in node.childNodes :
 384                 e = findElementById(child,id)
 385                 if e != None: return e
 386         return None
 387
 388 def findElementsWithId(node, elems=None):
 389         """
 390         Returns all elements with id attributes
 391         """
 392         if elems is None:
 393                 elems = {}
 394         id = node.getAttribute('id')
 395         if id != '' :
 396                 elems[id] = node
 397         if node.hasChildNodes() :
 398                 for child in node.childNodes:
 399                         # from http://www.w3.org/TR/DOM-Level-2-Core/idl-definitions.html
 400                         # we are only really interested in nodes of type Element (1)
 401                         if child.nodeType == 1 :
 402                                 findElementsWithId(child, elems)
 403         return elems
 404
 405 referencingProps = ['fill', 'stroke', 'filter', 'clip-path', 'mask',  'marker-start',
 406                                         'marker-end', 'marker-mid']
 407
 408 def findReferencedElements(node, ids=None):
 409         """
 410         Returns the number of times an ID is referenced as well as all elements
 411         that reference it.
 412
 413         Currently looks at fill, stroke, clip-path, mask, marker, and
 414         xlink:href attributes.
 415         """
 416         global referencingProps
 417         if ids is None:
 418                 ids = {}
 419         # TODO: input argument ids is clunky here (see below how it is called)
 420         # GZ: alternative to passing dict, use **kwargs
 421
 422         # if this node is a style element, parse its text into CSS
 423         if node.nodeName == 'style' and node.namespaceURI == NS['SVG']:
 424                 # node.firstChild will be either a CDATA or a Text node
 425                 cssRules = parseCssString(node.firstChild.nodeValue)
 426                 for rule in cssRules:
 427                         for propname in rule['properties']:
 428                                 propval = rule['properties'][propname]
 429                                 findReferencingProperty(node, propname, propval, ids)
 430
 431                 return ids
 432
 433         # else if xlink:href is set, then grab the id
 434         href = node.getAttributeNS(NS['XLINK'],'href')
 435         if href != '' and len(href) > 1 and href[0] == '#':
 436                 # we remove the hash mark from the beginning of the id
 437                 id = href[1:]
 438                 if id in ids:
 439                         ids[id][0] += 1
 440                         ids[id][1].append(node)
 441                 else:
 442                         ids[id] = [1,[node]]
 443
 444         # now get all style properties and the fill, stroke, filter attributes
 445         styles = node.getAttribute('style').split(';')
 446         for attr in referencingProps:
 447                 styles.append(':'.join([attr, node.getAttribute(attr)]))
 448
 449         for style in styles:
 450                 propval = style.split(':')
 451                 if len(propval) == 2 :
 452                         prop = propval[0].strip()
 453                         val = propval[1].strip()
 454                         findReferencingProperty(node, prop, val, ids)
 455
 456         if node.hasChildNodes() :
 457                 for child in node.childNodes:
 458                         if child.nodeType == 1 :
 459                                 findReferencedElements(child, ids)
 460         return ids
 461
 462 def findReferencingProperty(node, prop, val, ids):
 463         global referencingProps
 464         if prop in referencingProps and val != '' :
 465                 if len(val) >= 7 and val[0:5] == 'url(#' :
 466                         id = val[5:val.find(')')]
 467                         if ids.has_key(id) :
 468                                 ids[id][0] += 1
 469                                 ids[id][1].append(node)
 470                         else:
 471                                 ids[id] = [1,[node]]
 472                 # if the url has a quote in it, we need to compensate
 473                 elif len(val) >= 8 :
 474                         id = None
 475                         # double-quote
 476                         if val[0:6] == 'url("#' :
 477                                 id = val[6:val.find('")')]
 478                         # single-quote
 479                         elif val[0:6] == "url('#" :
 480                                 id = val[6:val.find("')")]
 481                         if id != None:
 482                                 if ids.has_key(id) :
 483                                         ids[id][0] += 1
 484                                         ids[id][1].append(node)
 485                                 else:
 486                                         ids[id] = [1,[node]]
 487
 488 numIDsRemoved = 0
 489 numElemsRemoved = 0
 490 numAttrsRemoved = 0
 491 numRastersEmbedded = 0
 492 numPathSegmentsReduced = 0
 493 numCurvesStraightened = 0
 494 numBytesSavedInPathData = 0
 495 numBytesSavedInColors = 0
 496 numPointsRemovedFromPolygon = 0
 497
 498 def removeUnusedDefs(doc, defElem, elemsToRemove=None):
 499         if elemsToRemove is None:
 500                 elemsToRemove = []
 501
 502         identifiedElements = findElementsWithId(doc.documentElement)
 503         referencedIDs = findReferencedElements(doc.documentElement)
 504
 505         keepTags = ['font', 'style', 'metadata', 'script', 'title', 'desc']
 506         for elem in defElem.childNodes:
 507                 if elem.nodeName == 'g' and elem.namespaceURI == NS['SVG']:
 508                         elemsToRemove = removeUnusedDefs(doc, elem, elemsToRemove)
 509                         continue
 510                 if elem.nodeType == 1 and (elem.getAttribute('id') == '' or \
 511                                 (not elem.getAttribute('id') in referencedIDs)) and \
 512                                 not elem.nodeName in keepTags:
 513                         elemsToRemove.append(elem)
 514         return elemsToRemove
 515
 516 def removeUnreferencedElements(doc):
 517         """
 518         Removes all unreferenced elements except for <svg>, <font>, <metadata>, <title>, and <desc>.
 519         Also vacuums the defs of any non-referenced renderable elements.
 520
 521         Returns the number of unreferenced elements removed from the document.
 522         """
 523         global numElemsRemoved
 524         num = 0
 525         removeTags = ['linearGradient', 'radialGradient', 'pattern']
 526
 527         identifiedElements = findElementsWithId(doc.documentElement)
 528         referencedIDs = findReferencedElements(doc.documentElement)
 529
 530         for id in identifiedElements:
 531                 if not id in referencedIDs:
 532                         goner = findElementById(doc.documentElement, id)
 533                         if goner != None and goner.parentNode != None and goner.nodeName in removeTags:
 534                                 goner.parentNode.removeChild(goner)
 535                                 num += 1
 536                                 numElemsRemoved += 1
 537
 538         # TODO: should also go through defs and vacuum it
 539         num = 0
 540         defs = doc.documentElement.getElementsByTagName('defs')
 541         for aDef in defs:
 542                 elemsToRemove = removeUnusedDefs(doc, aDef)
 543                 for elem in elemsToRemove:
 544                         elem.parentNode.removeChild(elem)
 545                         numElemsRemoved += 1
 546                         num += 1
 547         return num
 548
 549 def removeUnreferencedIDs(referencedIDs, identifiedElements):
 550         """
 551         Removes the unreferenced ID attributes.
 552
 553         Returns the number of ID attributes removed
 554         """
 555         global numIDsRemoved
 556         keepTags = ['font']
 557         num = 0;
 558         for id in identifiedElements.keys():
 559                 node = identifiedElements[id]
 560                 if referencedIDs.has_key(id) == False and not node.nodeName in keepTags:
 561                         node.removeAttribute('id')
 562                         numIDsRemoved += 1
 563                         num += 1
 564         return num
 565
 566 def removeNamespacedAttributes(node, namespaces):
 567         global numAttrsRemoved
 568         num = 0
 569         if node.nodeType == 1 :
 570                 # remove all namespace'd attributes from this element
 571                 attrList = node.attributes
 572                 attrsToRemove = []
 573                 for attrNum in range(attrList.length):
 574                         attr = attrList.item(attrNum)
 575                         if attr != None and attr.namespaceURI in namespaces:
 576                                 attrsToRemove.append(attr.nodeName)
 577                 for attrName in attrsToRemove :
 578                         num += 1
 579                         numAttrsRemoved += 1
 580                         node.removeAttribute(attrName)
 581
 582                 # now recurse for children
 583                 for child in node.childNodes:
 584                         num += removeNamespacedAttributes(child, namespaces)
 585         return num
 586
 587 def removeNamespacedElements(node, namespaces):
 588         global numElemsRemoved
 589         num = 0
 590         if node.nodeType == 1 :
 591                 # remove all namespace'd child nodes from this element
 592                 childList = node.childNodes
 593                 childrenToRemove = []
 594                 for child in childList:
 595                         if child != None and child.namespaceURI in namespaces:
 596                                 childrenToRemove.append(child)
 597                 for child in childrenToRemove :
 598                         num += 1
 599                         numElemsRemoved += 1
 600                         node.removeChild(child)
 601
 602                 # now recurse for children
 603                 for child in node.childNodes:
 604                         num += removeNamespacedElements(child, namespaces)
 605         return num
 606
 607 def removeNestedGroups(node):
 608         """
 609         This walks further and further down the tree, removing groups
 610         which do not have any attributes or a title/desc child and
 611         promoting their children up one level
 612         """
 613         global numElemsRemoved
 614         num = 0
 615
 616         groupsToRemove = []
 617         for child in node.childNodes:
 618                 if child.nodeName == 'g' and child.namespaceURI == NS['SVG'] and len(child.attributes) == 0:
 619                         # only collapse group if it does not have a title or desc as a direct descendant
 620                         for grandchild in child.childNodes:
 621                                 if grandchild.nodeType == 1 and grandchild.namespaceURI == NS['SVG'] and \
 622                                                 grandchild.nodeName in ['title','desc']:
 623                                         break
 624                         else:
 625                                 groupsToRemove.append(child)
 626
 627         for g in groupsToRemove:
 628                 while g.childNodes.length > 0:
 629                         g.parentNode.insertBefore(g.firstChild, g)
 630                 g.parentNode.removeChild(g)
 631                 numElemsRemoved += 1
 632                 num += 1
 633
 634         # now recurse for children
 635         for child in node.childNodes:
 636                 if child.nodeType == 1:
 637                         num += removeNestedGroups(child)
 638         return num
 639
 640 def moveCommonAttributesToParentGroup(elem):
 641         """
 642         This recursively calls this function on all children of the passed in element
 643         and then iterates over all child elements and removes common inheritable attributes
 644         from the children and places them in the parent group.
 645         """
 646         num = 0
 647
 648         childElements = []
 649         # recurse first into the children (depth-first)
 650         for child in elem.childNodes:
 651                 if child.nodeType == 1:
 652                         childElements.append(child)
 653                         num += moveCommonAttributesToParentGroup(child)
 654
 655         # only process the children if there are more than one element
 656         if len(childElements) <= 1: return num
 657
 658         commonAttrs = {}
 659         # add all inheritable properties of the first child element
 660         # FIXME: Note there is a chance that the first child is a set/animate in which case
 661         # its fill attribute is not what we want to look at, we should look for the first
 662         # non-animate/set element
 663         attrList = childElements[0].attributes
 664         for num in range(attrList.length):
 665                 attr = attrList.item(num)
 666                 # this is most of the inheritable properties from http://www.w3.org/TR/SVG11/propidx.html
 667                 # and http://www.w3.org/TR/SVGTiny12/attributeTable.html
 668                 if attr.nodeName in ['clip-rule',
 669                                         'display-align',
 670                                         'fill', 'fill-opacity', 'fill-rule',
 671                                         'font', 'font-family', 'font-size', 'font-size-adjust', 'font-stretch',
 672                                         'font-style', 'font-variant', 'font-weight',
 673                                         'letter-spacing',
 674                                         'pointer-events', 'shape-rendering',
 675                                         'stroke', 'stroke-dasharray', 'stroke-dashoffset', 'stroke-linecap', 'stroke-linejoin',
 676                                         'stroke-miterlimit', 'stroke-opacity', 'stroke-width',
 677                                         'text-anchor', 'text-decoration', 'text-rendering', 'visibility',
 678                                         'word-spacing', 'writing-mode']:
 679                         # we just add all the attributes from the first child
 680                         commonAttrs[attr.nodeName] = attr.nodeValue
 681
 682         # for each subsequent child element
 683         for childNum in range(len(childElements)):
 684                 # skip first child
 685                 if childNum == 0:
 686                         continue
 687
 688                 child = childElements[childNum]
 689                 # if we are on an animateXXX/set element, ignore it (due to the 'fill' attribute)
 690                 if child.localName in ['set', 'animate', 'animateColor', 'animateTransform', 'animateMotion']:
 691                         continue
 692
 693                 distinctAttrs = []
 694                 # loop through all current 'common' attributes
 695                 for name in commonAttrs.keys():
 696                         # if this child doesn't match that attribute, schedule it for removal
 697                         if child.getAttribute(name) != commonAttrs[name]:
 698                                 distinctAttrs.append(name)
 699                 # remove those attributes which are not common
 700                 for name in distinctAttrs:
 701                         del commonAttrs[name]
 702
 703         # commonAttrs now has all the inheritable attributes which are common among all child elements
 704         for name in commonAttrs.keys():
 705                 for child in childElements:
 706                         child.removeAttribute(name)
 707                 elem.setAttribute(name, commonAttrs[name])
 708
 709         # update our statistic (we remove N*M attributes and add back in M attributes)
 710         num += (len(childElements)-1) * len(commonAttrs)
 711         return num
 712
 713 def removeUnusedAttributesOnParent(elem):
 714         """
 715         This recursively calls this function on all children of the element passed in,
 716         then removes any unused attributes on this elem if none of the children inherit it
 717         """
 718         num = 0
 719
 720         childElements = []
 721         # recurse first into the children (depth-first)
 722         for child in elem.childNodes:
 723                 if child.nodeType == 1:
 724                         childElements.append(child)
 725                         num += removeUnusedAttributesOnParent(child)
 726
 727         # only process the children if there are more than one element
 728         if len(childElements) <= 1: return num
 729
 730         # get all attribute values on this parent
 731         attrList = elem.attributes
 732         unusedAttrs = {}
 733         for num in range(attrList.length):
 734                 attr = attrList.item(num)
 735                 if attr.nodeName in ['clip-rule',
 736                                         'display-align',
 737                                         'fill', 'fill-opacity', 'fill-rule',
 738                                         'font', 'font-family', 'font-size', 'font-size-adjust', 'font-stretch',
 739                                         'font-style', 'font-variant', 'font-weight',
 740                                         'letter-spacing',
 741                                         'pointer-events', 'shape-rendering',
 742                                         'stroke', 'stroke-dasharray', 'stroke-dashoffset', 'stroke-linecap', 'stroke-linejoin',
 743                                         'stroke-miterlimit', 'stroke-opacity', 'stroke-width',
 744                                         'text-anchor', 'text-decoration', 'text-rendering', 'visibility',
 745                                         'word-spacing', 'writing-mode']:
 746                         unusedAttrs[attr.nodeName] = attr.nodeValue
 747
 748         # for each child, if at least one child inherits the parent's attribute, then remove
 749         for childNum in range(len(childElements)):
 750                 child = childElements[childNum]
 751                 inheritedAttrs = []
 752                 for name in unusedAttrs.keys():
 753                         val = child.getAttribute(name)
 754                         if val == '' or val == None or val == 'inherit':
 755                                 inheritedAttrs.append(name)
 756                 for a in inheritedAttrs:
 757                         del unusedAttrs[a]
 758
 759         # unusedAttrs now has all the parent attributes that are unused
 760         for name in unusedAttrs.keys():
 761                 elem.removeAttribute(name)
 762                 num += 1
 763
 764         return num
 765
 766 def removeDuplicateGradientStops(doc):
 767         global numElemsRemoved
 768         num = 0
 769
 770         for gradType in ['linearGradient', 'radialGradient']:
 771                 for grad in doc.getElementsByTagName(gradType):
 772                         stops = {}
 773                         stopsToRemove = []
 774                         for stop in grad.getElementsByTagName('stop'):
 775                                 # convert percentages into a floating point number
 776                                 offsetU = SVGLength(stop.getAttribute('offset'))
 777                                 if offsetU.units == Unit.PCT:
 778                                         offset = offsetU.value / 100.0
 779                                 elif offsetU.units == Unit.NONE:
 780                                         offset = offsetU.value
 781                                 else:
 782                                         offset = 0
 783                                 # set the stop offset value to the integer or floating point equivalent
 784                                 if int(offset) == offset: stop.setAttribute('offset', str(int(offset)))
 785                                 else: stop.setAttribute('offset', str(offset))
 786
 787                                 color = stop.getAttribute('stop-color')
 788                                 opacity = stop.getAttribute('stop-opacity')
 789                                 if stops.has_key(offset) :
 790                                         oldStop = stops[offset]
 791                                         if oldStop[0] == color and oldStop[1] == opacity:
 792                                                 stopsToRemove.append(stop)
 793                                 stops[offset] = [color, opacity]
 794
 795                         for stop in stopsToRemove:
 796                                 stop.parentNode.removeChild(stop)
 797                                 num += 1
 798                                 numElemsRemoved += 1
 799
 800         # linear gradients
 801         return num
 802
 803 def collapseSinglyReferencedGradients(doc):
 804         global numElemsRemoved
 805         num = 0
 806
 807         # make sure to reset the ref'ed ids for when we are running this in testscour
 808         for rid,nodeCount in findReferencedElements(doc.documentElement).iteritems():
 809                 count = nodeCount[0]
 810                 nodes = nodeCount[1]
 811                 if count == 1:
 812                         elem = findElementById(doc.documentElement,rid)
 813                         if elem != None and elem.nodeType == 1 and elem.nodeName in ['linearGradient', 'radialGradient'] \
 814                                         and elem.namespaceURI == NS['SVG']:
 815                                 # found a gradient that is referenced by only 1 other element
 816                                 refElem = nodes[0]
 817                                 if refElem.nodeType == 1 and refElem.nodeName in ['linearGradient', 'radialGradient'] \
 818                                                 and refElem.namespaceURI == NS['SVG']:
 819                                         # elem is a gradient referenced by only one other gradient (refElem)
 820
 821                                         # add the stops to the referencing gradient (this removes them from elem)
 822                                         if len(refElem.getElementsByTagName('stop')) == 0:
 823                                                 stopsToAdd = elem.getElementsByTagName('stop')
 824                                                 for stop in stopsToAdd:
 825                                                         refElem.appendChild(stop)
 826
 827                                         # adopt the gradientUnits, spreadMethod,  gradientTransform attributes if
 828                                         # they are unspecified on refElem
 829                                         for attr in ['gradientUnits','spreadMethod','gradientTransform']:
 830                                                 if refElem.getAttribute(attr) == '' and not elem.getAttribute(attr) == '':
 831                                                         refElem.setAttributeNS(None, attr, elem.getAttribute(attr))
 832
 833                                         # if both are radialGradients, adopt elem's fx,fy,cx,cy,r attributes if
 834                                         # they are unspecified on refElem
 835                                         if elem.nodeName == 'radialGradient' and refElem.nodeName == 'radialGradient':
 836                                                 for attr in ['fx','fy','cx','cy','r']:
 837                                                         if refElem.getAttribute(attr) == '' and not elem.getAttribute(attr) == '':
 838                                                                 refElem.setAttributeNS(None, attr, elem.getAttribute(attr))
 839
 840                                         # if both are linearGradients, adopt elem's x1,y1,x2,y2 attributes if
 841                                         # they are unspecified on refElem
 842                                         if elem.nodeName == 'linearGradient' and refElem.nodeName == 'linearGradient':
 843                                                 for attr in ['x1','y1','x2','y2']:
 844                                                         if refElem.getAttribute(attr) == '' and not elem.getAttribute(attr) == '':
 845                                                                 refElem.setAttributeNS(None, attr, elem.getAttribute(attr))
 846
 847                                         # now remove the xlink:href from refElem
 848                                         refElem.removeAttributeNS(NS['XLINK'], 'href')
 849
 850                                         # now delete elem
 851                                         elem.parentNode.removeChild(elem)
 852                                         numElemsRemoved += 1
 853                                         num += 1
 854         return num
 855
 856 def removeDuplicateGradients(doc):
 857         global numElemsRemoved
 858         num = 0
 859
 860         gradientsToRemove = {}
 861         duplicateToMaster = {}
 862
 863         for gradType in ['linearGradient', 'radialGradient']:
 864                 grads = doc.getElementsByTagName(gradType)
 865                 for grad in grads:
 866                         # TODO: should slice grads from 'grad' here to optimize
 867                         for ograd in grads:
 868                                 # do not compare gradient to itself
 869                                 if grad == ograd: continue
 870
 871                                 # compare grad to ograd (all properties, then all stops)
 872                                 # if attributes do not match, go to next gradient
 873                                 someGradAttrsDoNotMatch = False
 874                                 for attr in ['gradientUnits','spreadMethod','gradientTransform','x1','y1','x2','y2','cx','cy','fx','fy','r']:
 875                                         if grad.getAttribute(attr) != ograd.getAttribute(attr):
 876                                                 someGradAttrsDoNotMatch = True
 877                                                 break;
 878
 879                                 if someGradAttrsDoNotMatch: continue
 880
 881                                 # compare xlink:href values too
 882                                 if grad.getAttributeNS(NS['XLINK'], 'href') != ograd.getAttributeNS(NS['XLINK'], 'href'):
 883                                         continue
 884
 885                                 # all gradient properties match, now time to compare stops
 886                                 stops = grad.getElementsByTagName('stop')
 887                                 ostops = ograd.getElementsByTagName('stop')
 888
 889                                 if stops.length != ostops.length: continue
 890
 891                                 # now compare stops
 892                                 stopsNotEqual = False
 893                                 for i in range(stops.length):
 894                                         if stopsNotEqual: break
 895                                         stop = stops.item(i)
 896                                         ostop = ostops.item(i)
 897                                         for attr in ['offset', 'stop-color', 'stop-opacity']:
 898                                                 if stop.getAttribute(attr) != ostop.getAttribute(attr):
 899                                                         stopsNotEqual = True
 900                                                         break
 901                                 if stopsNotEqual: continue
 902
 903                                 # ograd is a duplicate of grad, we schedule it to be removed UNLESS
 904                                 # ograd is ALREADY considered a 'master' element
 905                                 if not gradientsToRemove.has_key(ograd):
 906                                         if not duplicateToMaster.has_key(ograd):
 907                                                 if not gradientsToRemove.has_key(grad):
 908                                                         gradientsToRemove[grad] = []
 909                                                 gradientsToRemove[grad].append( ograd )
 910                                                 duplicateToMaster[ograd] = grad
 911
 912         # get a collection of all elements that are referenced and their referencing elements
 913         referencedIDs = findReferencedElements(doc.documentElement)
 914         for masterGrad in gradientsToRemove.keys():
 915                 master_id = masterGrad.getAttribute('id')
 916                 for dupGrad in gradientsToRemove[masterGrad]:
 917                         # if the duplicate gradient no longer has a parent that means it was
 918                         # already re-mapped to another master gradient
 919                         if not dupGrad.parentNode: continue
 920                         dup_id = dupGrad.getAttribute('id')
 921                         # for each element that referenced the gradient we are going to remove
 922                         for elem in referencedIDs[dup_id][1]:
 923                                 # find out which attribute referenced the duplicate gradient
 924                                 for attr in ['fill', 'stroke']:
 925                                         v = elem.getAttribute(attr)
 926                                         if v == 'url(#'+dup_id+')' or v == 'url("#'+dup_id+'")' or v == "url('#"+dup_id+"')":
 927                                                 elem.setAttribute(attr, 'url(#'+master_id+')')
 928                                 if elem.getAttributeNS(NS['XLINK'], 'href') == '#'+dup_id:
 929                                         elem.setAttributeNS(NS['XLINK'], 'href', '#'+master_id)
 930
 931                         # now that all referencing elements have been re-mapped to the master
 932                         # it is safe to remove this gradient from the document
 933                         dupGrad.parentNode.removeChild(dupGrad)
 934                         numElemsRemoved += 1
 935                         num += 1
 936         return num
 937
 938 def repairStyle(node, options):
 939         num = 0
 940         if node.nodeType == 1 and len(node.getAttribute('style')) > 0 :
 941                 # get all style properties and stuff them into a dictionary
 942                 styleMap = { }
 943                 rawStyles = node.getAttribute('style').split(';')
 944                 for style in rawStyles:
 945                         propval = style.split(':')
 946                         if len(propval) == 2 :
 947                                 styleMap[propval[0].strip()] = propval[1].strip()
 948
 949                 # I've seen this enough to know that I need to correct it:
 950                 # fill: url(#linearGradient4918) rgb(0, 0, 0);
 951                 for prop in ['fill', 'stroke'] :
 952                         if styleMap.has_key(prop) :
 953                                 chunk = styleMap[prop].split(') ')
 954                                 if len(chunk) == 2 and (chunk[0][:5] == 'url(#' or chunk[0][:6] == 'url("#' or chunk[0][:6] == "url('#") and chunk[1] == 'rgb(0, 0, 0)' :
 955                                         styleMap[prop] = chunk[0] + ')'
 956                                         num += 1
 957
 958                 # Here is where we can weed out unnecessary styles like:
 959                 #  opacity:1
 960                 if styleMap.has_key('opacity') :
 961                         opacity = float(styleMap['opacity'])
 962                         # opacity='1.0' is useless, remove it
 963                         if opacity == 1.0 :
 964                                 del styleMap['opacity']
 965                                 num += 1
 966
 967                         # if opacity='0' then all fill and stroke properties are useless, remove them
 968                         elif opacity == 0.0 :
 969                                 for uselessStyle in ['fill', 'fill-opacity', 'fill-rule', 'stroke', 'stroke-linejoin',
 970                                         'stroke-opacity', 'stroke-miterlimit', 'stroke-linecap', 'stroke-dasharray',
 971                                         'stroke-dashoffset', 'stroke-opacity'] :
 972                                         if styleMap.has_key(uselessStyle):
 973                                                 del styleMap[uselessStyle]
 974                                                 num += 1
 975
 976                 #  if stroke:none, then remove all stroke-related properties (stroke-width, etc)
 977                 #  TODO: should also detect if the computed value of this element is stroke="none"
 978                 if styleMap.has_key('stroke') and styleMap['stroke'] == 'none' :
 979                         for strokestyle in [ 'stroke-width', 'stroke-linejoin', 'stroke-miterlimit',
 980                                         'stroke-linecap', 'stroke-dasharray', 'stroke-dashoffset', 'stroke-opacity'] :
 981                                 if styleMap.has_key(strokestyle) :
 982                                         del styleMap[strokestyle]
 983                                         num += 1
 984                         # TODO: This is actually a problem if a parent element has a specified stroke
 985                         # we need to properly calculate computed values
 986                         del styleMap['stroke']
 987
 988                 #  if fill:none, then remove all fill-related properties (fill-rule, etc)
 989                 if styleMap.has_key('fill') and styleMap['fill'] == 'none' :
 990                         for fillstyle in [ 'fill-rule', 'fill-opacity' ] :
 991                                 if styleMap.has_key(fillstyle) :
 992                                         del styleMap[fillstyle]
 993                                         num += 1
 994
 995                 #  stop-opacity: 1
 996                 if styleMap.has_key('stop-opacity') :
 997                         if float(styleMap['stop-opacity']) == 1.0 :
 998                                 del styleMap['stop-opacity']
 999                                 num += 1
1000
1001                 #  fill-opacity: 1 or 0
1002                 if styleMap.has_key('fill-opacity') :
1003                         fillOpacity = float(styleMap['fill-opacity'])
1004                         #  TODO: This is actually a problem if the parent element does not have fill-opacity=1
1005                         if fillOpacity == 1.0 :
1006                                 del styleMap['fill-opacity']
1007                                 num += 1
1008                         elif fillOpacity == 0.0 :
1009                                 for uselessFillStyle in [ 'fill', 'fill-rule' ] :
1010                                         if styleMap.has_key(uselessFillStyle):
1011                                                 del styleMap[uselessFillStyle]
1012                                                 num += 1
1013
1014                 #  stroke-opacity: 1 or 0
1015                 if styleMap.has_key('stroke-opacity') :
1016                         strokeOpacity = float(styleMap['stroke-opacity'])
1017                         #  TODO: This is actually a problem if the parent element does not have stroke-opacity=1
1018                         if strokeOpacity == 1.0 :
1019                                 del styleMap['stroke-opacity']
1020                                 num += 1
1021                         elif strokeOpacity == 0.0 :
1022                                 for uselessStrokeStyle in [ 'stroke', 'stroke-width', 'stroke-linejoin', 'stroke-linecap',
1023                                                         'stroke-dasharray', 'stroke-dashoffset' ] :
1024                                         if styleMap.has_key(uselessStrokeStyle):
1025                                                 del styleMap[uselessStrokeStyle]
1026                                                 num += 1
1027
1028                 # stroke-width: 0
1029                 if styleMap.has_key('stroke-width') :
1030                         strokeWidth = getSVGLength(styleMap['stroke-width'])
1031                         if strokeWidth == 0.0 :
1032                                 for uselessStrokeStyle in [ 'stroke', 'stroke-linejoin', 'stroke-linecap',
1033                                                         'stroke-dasharray', 'stroke-dashoffset', 'stroke-opacity' ] :
1034                                         if styleMap.has_key(uselessStrokeStyle):
1035                                                 del styleMap[uselessStrokeStyle]
1036                                                 num += 1
1037
1038                 # remove font properties for non-text elements
1039                 # I've actually observed this in real SVG content
1040                 if node.nodeName in ['rect', 'circle', 'ellipse', 'line', 'polyline', 'polygon', 'path']:
1041                         for fontstyle in [ 'font-family', 'font-size', 'font-stretch', 'font-size-adjust',
1042                                                                 'font-style', 'font-variant', 'font-weight',
1043                                                                 'letter-spacing', 'line-height', 'kerning',
1044                                                                 'text-anchor', 'text-decoration', 'text-rendering',
1045                                                                 'unicode-bidi', 'word-spacing', 'writing-mode'] :
1046                                 if styleMap.has_key(fontstyle) :
1047                                         del styleMap[fontstyle]
1048                                         num += 1
1049
1050                 # remove inkscape-specific styles
1051                 # TODO: need to get a full list of these
1052                 for inkscapeStyle in ['-inkscape-font-specification']:
1053                         if styleMap.has_key(inkscapeStyle):
1054                                 del styleMap[inkscapeStyle]
1055                                 num += 1
1056
1057                 # visibility: visible
1058                 if styleMap.has_key('visibility') :
1059                         if styleMap['visibility'] == 'visible':
1060                                 del styleMap['visibility']
1061                                 num += 1
1062
1063                 # display: inline
1064                 if styleMap.has_key('display') :
1065                         if styleMap['display'] == 'inline':
1066                                 del styleMap['display']
1067                                 num += 1
1068
1069                 # overflow: visible or overflow specified on element other than svg, marker, pattern
1070                 if styleMap.has_key('overflow') :
1071                         if styleMap['overflow'] == 'visible' or node.nodeName in ['svg','marker','pattern']:
1072                                 del styleMap['overflow']
1073                                 num += 1
1074
1075                 # marker: none
1076                 if styleMap.has_key('marker') :
1077                         if styleMap['marker'] == 'none':
1078                                 del styleMap['marker']
1079                                 num += 1
1080
1081                 # now if any of the properties match known SVG attributes we prefer attributes
1082                 # over style so emit them and remove them from the style map
1083                 if options.style_to_xml:
1084                         for propName in styleMap.keys() :
1085                                 if propName in svgAttributes :
1086                                         node.setAttribute(propName, styleMap[propName])
1087                                         del styleMap[propName]
1088
1089                 # sew our remaining style properties back together into a style attribute
1090                 fixedStyle = ''
1091                 for prop in styleMap.keys() :
1092                         fixedStyle += prop + ':' + styleMap[prop] + ';'
1093
1094                 if fixedStyle != '' :
1095                         node.setAttribute('style', fixedStyle)
1096                 else:
1097                         node.removeAttribute('style')
1098
1099         # recurse for our child elements
1100         for child in node.childNodes :
1101                 num += repairStyle(child,options)
1102
1103         return num
1104
1105 def removeDefaultAttributeValues(node, options):
1106         num = 0
1107         if node.nodeType != 1: return 0
1108
1109         # gradientUnits: objectBoundingBox
1110         if node.getAttribute('gradientUnits') == 'objectBoundingBox':
1111                 node.removeAttribute('gradientUnits')
1112                 num += 1
1113
1114         # spreadMethod: pad
1115         if node.getAttribute('spreadMethod') == 'pad':
1116                 node.removeAttribute('spreadMethod')
1117                 num += 1
1118
1119         # x1: 0%
1120         if node.getAttribute('x1') != '':
1121                 x1 = SVGLength(node.getAttribute('x1'))
1122                 if x1.value == 0:
1123                         node.removeAttribute('x1')
1124                         num += 1
1125
1126         # y1: 0%
1127         if node.getAttribute('y1') != '':
1128                 y1 = SVGLength(node.getAttribute('y1'))
1129                 if y1.value == 0:
1130                         node.removeAttribute('y1')
1131                         num += 1
1132
1133         # x2: 100%
1134         if node.getAttribute('x2') != '':
1135                 x2 = SVGLength(node.getAttribute('x2'))
1136                 if (x2.value == 100 and x2.units == Unit.PCT) or (x2.value == 1 and x2.units == Unit.NONE):
1137                         node.removeAttribute('x2')
1138                         num += 1
1139
1140         # y2: 0%
1141         if node.getAttribute('y2') != '':
1142                 y2 = SVGLength(node.getAttribute('y2'))
1143                 if y2.value == 0:
1144                         node.removeAttribute('y2')
1145                         num += 1
1146
1147         # fx: equal to rx
1148         if node.getAttribute('fx') != '':
1149                 if node.getAttribute('fx') == node.getAttribute('cx'):
1150                         node.removeAttribute('fx')
1151                         num += 1
1152
1153         # fy: equal to ry
1154         if node.getAttribute('fy') != '':
1155                 if node.getAttribute('fy') == node.getAttribute('cy'):
1156                         node.removeAttribute('fy')
1157                         num += 1
1158
1159         # cx: 50%
1160         if node.getAttribute('cx') != '':
1161                 cx = SVGLength(node.getAttribute('cx'))
1162                 if (cx.value == 50 and cx.units == Unit.PCT) or (cx.value == 0.5 and cx.units == Unit.NONE):
1163                         node.removeAttribute('cx')
1164                         num += 1
1165
1166         # cy: 50%
1167         if node.getAttribute('cy') != '':
1168                 cy = SVGLength(node.getAttribute('cy'))
1169                 if (cy.value == 50 and cy.units == Unit.PCT) or (cy.value == 0.5 and cy.units == Unit.NONE):
1170                         node.removeAttribute('cy')
1171                         num += 1
1172
1173         # r: 50%
1174         if node.getAttribute('r') != '':
1175                 r = SVGLength(node.getAttribute('r'))
1176                 if (r.value == 50 and r.units == Unit.PCT) or (r.value == 0.5 and r.units == Unit.NONE):
1177                         node.removeAttribute('r')
1178                         num += 1
1179
1180         # recurse for our child elements
1181         for child in node.childNodes :
1182                 num += removeDefaultAttributeValues(child,options)
1183
1184         return num
1185
1186 rgb = re.compile("\\s*rgb\\(\\s*(\\d+)\\s*\\,\\s*(\\d+)\\s*\\,\\s*(\\d+)\\s*\\)\\s*")
1187 rgbp = re.compile("\\s*rgb\\(\\s*(\\d*\\.?\\d+)\\%\\s*\\,\\s*(\\d*\\.?\\d+)\\%\\s*\\,\\s*(\\d*\\.?\\d+)\\%\\s*\\)\\s*")
1188 def convertColor(value):
1189         """
1190                 Converts the input color string and returns a #RRGGBB (or #RGB if possible) string
1191         """
1192         s = value
1193
1194         if s in colors.keys():
1195                 s = colors[s]
1196
1197         rgbpMatch = rgbp.match(s)
1198         if rgbpMatch != None :
1199                 r = int(float(rgbpMatch.group(1)) * 255.0 / 100.0)
1200                 g = int(float(rgbpMatch.group(2)) * 255.0 / 100.0)
1201                 b = int(float(rgbpMatch.group(3)) * 255.0 / 100.0)
1202                 s  = 'rgb(%d,%d,%d)' % (r,g,b)
1203
1204         rgbMatch = rgb.match(s)
1205         if rgbMatch != None :
1206                 r = hex( int( rgbMatch.group(1) ) )[2:].upper()
1207                 g = hex( int( rgbMatch.group(2) ) )[2:].upper()
1208                 b = hex( int( rgbMatch.group(3) ) )[2:].upper()
1209                 if len(r) == 1: r='0'+r
1210                 if len(g) == 1: g='0'+g
1211                 if len(b) == 1: b='0'+b
1212                 s = '#'+r+g+b
1213
1214         if s[0] == '#' and len(s)==7 and s[1]==s[2] and s[3]==s[4] and s[5]==s[6]:
1215                 s = s.upper()
1216                 s = '#'+s[1]+s[3]+s[5]
1217
1218         return s
1219
1220 def convertColors(element) :
1221         """
1222                 Recursively converts all color properties into #RRGGBB format if shorter
1223         """
1224         numBytes = 0
1225
1226         if element.nodeType != 1: return 0
1227
1228         # set up list of color attributes for each element type
1229         attrsToConvert = []
1230         if element.nodeName in ['rect', 'circle', 'ellipse', 'polygon', \
1231                                                         'line', 'polyline', 'path', 'g', 'a']:
1232                 attrsToConvert = ['fill', 'stroke']
1233         elif element.nodeName in ['stop']:
1234                 attrsToConvert = ['stop-color']
1235         elif element.nodeName in ['solidColor']:
1236                 attrsToConvert = ['solid-color']
1237
1238         # now convert all the color formats
1239         for attr in attrsToConvert:
1240                 oldColorValue = element.getAttribute(attr)
1241                 if oldColorValue != '':
1242                         newColorValue = convertColor(oldColorValue)
1243                         oldBytes = len(oldColorValue)
1244                         newBytes = len(newColorValue)
1245                         if oldBytes > newBytes:
1246                                 element.setAttribute(attr, newColorValue)
1247                                 numBytes += (oldBytes - len(element.getAttribute(attr)))
1248
1249         # now recurse for our child elements
1250         for child in element.childNodes :
1251                 numBytes += convertColors(child)
1252
1253         return numBytes
1254
1255 # TODO: go over what this method does and see if there is a way to optimize it
1256 # TODO: go over the performance of this method and see if I can save memory/speed by
1257 #       reusing data structures, etc
1258 def cleanPath(element) :
1259         """
1260                 Cleans the path string (d attribute) of the element
1261         """
1262         global numBytesSavedInPathData
1263         global numPathSegmentsReduced
1264         global numCurvesStraightened
1265
1266         # this gets the parser object from svg_regex.py
1267         oldPathStr = element.getAttribute('d')
1268         pathObj = svg_parser.parse(oldPathStr)
1269
1270         # however, this parser object has some ugliness in it (lists of tuples of tuples of
1271         # numbers and booleans).  we just need a list of (cmd,[numbers]):
1272         path = []
1273         for (cmd,dataset) in pathObj:
1274                 if cmd in ['M','m','L','l','T','t']:
1275                         # one or more tuples, each containing two numbers
1276                         nums = []
1277                         for t in dataset:
1278                                 # convert to a Decimal
1279                                 nums.append(Decimal(str(t[0])) * Decimal(1))
1280                                 nums.append(Decimal(str(t[1])) * Decimal(1))
1281
1282                         # only create this segment if it is not empty
1283                         if nums:
1284                                 path.append( (cmd, nums) )
1285
1286                 elif cmd in ['V','v','H','h']:
1287                         # one or more numbers
1288                         nums = []
1289                         for n in dataset:
1290                                 nums.append(Decimal(str(n)))
1291                         if nums:
1292                                 path.append( (cmd, nums) )
1293
1294                 elif cmd in ['C','c']:
1295                         # one or more tuples, each containing three tuples of two numbers each
1296                         nums = []
1297                         for t in dataset:
1298                                 for pair in t:
1299                                         nums.append(Decimal(str(pair[0])) * Decimal(1))
1300                                         nums.append(Decimal(str(pair[1])) * Decimal(1))
1301                         path.append( (cmd, nums) )
1302
1303                 elif cmd in ['S','s','Q','q']:
1304                         # one or more tuples, each containing two tuples of two numbers each
1305                         nums = []
1306                         for t in dataset:
1307                                 for pair in t:
1308                                         nums.append(Decimal(str(pair[0])) * Decimal(1))
1309                                         nums.append(Decimal(str(pair[1])) * Decimal(1))
1310                         path.append( (cmd, nums) )
1311
1312                 elif cmd in ['A','a']:
1313                         # one or more tuples, each containing a tuple of two numbers, a number, a boolean,
1314                         # another boolean, and a tuple of two numbers
1315                         nums = []
1316                         for t in dataset:
1317                                 nums.append( Decimal(str(t[0][0])) * Decimal(1) )
1318                                 nums.append( Decimal(str(t[0][1])) * Decimal(1) )
1319                                 nums.append( Decimal(str(t[1])) * Decimal(1))
1320
1321                                 if t[2]: nums.append( Decimal(1) )
1322                                 else: nums.append( Decimal(0) )
1323
1324                                 if t[3]: nums.append( Decimal(1) )
1325                                 else: nums.append( Decimal(0) )
1326
1327                                 nums.append( Decimal(str(t[4][0])) * Decimal(1) )
1328                                 nums.append( Decimal(str(t[4][1])) * Decimal(1) )
1329                         path.append( (cmd, nums) )
1330
1331                 elif cmd in ['Z','z']:
1332                         path.append( (cmd, []) )
1333
1334         # calculate the starting x,y coord for the second path command
1335         if len(path[0][1]) == 2:
1336                 (x,y) = path[0][1]
1337         else:
1338                 # we have a move and then 1 or more coords for lines
1339                 N = len(path[0][1])
1340                 if path[0] == 'M':
1341                         # take the last pair of coordinates for the starting point
1342                         x = path[0][1][N-2]
1343                         y = path[0][1][N-1]
1344                 else: # relative move, accumulate coordinates for the starting point
1345                         (x,y) = path[0][1][0],path[0][1][1]
1346                         n = 2
1347                         while n < N:
1348                                 x += path[0][1][n]
1349                                 y += path[0][1][n+1]
1350                                 n += 2
1351
1352         # now we have the starting point at x,y so let's save it
1353         (startx,starty) = (x,y)
1354
1355         # convert absolute coordinates into relative ones (start with the second subcommand
1356         # and leave the first M as absolute)
1357         newPath = [path[0]]
1358         for (cmd,data) in path[1:]:
1359                 i = 0
1360                 newCmd = cmd
1361                 newData = data
1362                 # adjust abs to rel
1363                 # only the A command has some values that we don't want to adjust (radii, rotation, flags)
1364                 if cmd == 'A':
1365                         newCmd = 'a'
1366                         newData = []
1367                         while i < len(data):
1368                                 newData.append(data[i])
1369                                 newData.append(data[i+1])
1370                                 newData.append(data[i+2])
1371                                 newData.append(data[i+3])
1372                                 newData.append(data[i+4])
1373                                 newData.append(data[i+5]-x)
1374                                 newData.append(data[i+6]-y)
1375                                 x = data[i+5]
1376                                 y = data[i+6]
1377                                 i += 7
1378                 elif cmd == 'a':
1379                         while i < len(data):
1380                                 x += data[i+5]
1381                                 y += data[i+6]
1382                                 i += 7
1383                 elif cmd == 'H':
1384                         newCmd = 'h'
1385                         newData = []
1386                         while i < len(data):
1387                                 newData.append(data[i]-x)
1388                                 x = data[i]
1389                                 i += 1
1390                 elif cmd == 'h':
1391                         while i < len(data):
1392                                 x += data[i]
1393                                 i += 1
1394                 elif cmd == 'V':
1395                         newCmd = 'v'
1396                         newData = []
1397                         while i < len(data):
1398                                 newData.append(data[i] - y)
1399                                 y = data[i]
1400                                 i += 1
1401                 elif cmd == 'v':
1402                         while i < len(data):
1403                                 y += data[i]
1404                                 i += 1
1405                 elif cmd in ['M']:
1406                         newCmd = cmd.lower()
1407                         newData = []
1408                         startx = data[0]
1409                         starty = data[1]
1410                         while i < len(data):
1411                                 newData.append( data[i] - x )
1412                                 newData.append( data[i+1] - y )
1413                                 x = data[i]
1414                                 y = data[i+1]
1415                                 i += 2
1416                 elif cmd in ['L','T']:
1417                         newCmd = cmd.lower()
1418                         newData = []
1419                         while i < len(data):
1420                                 newData.append( data[i] - x )
1421                                 newData.append( data[i+1] - y )
1422                                 x = data[i]
1423                                 y = data[i+1]
1424                                 i += 2
1425                 elif cmd in ['m']:
1426                         startx += data[0]
1427                         starty += data[1]
1428                         while i < len(data):
1429                                 x += data[i]
1430                                 y += data[i+1]
1431                                 i += 2
1432                 elif cmd in ['l','t']:
1433                         while i < len(data):
1434                                 x += data[i]
1435                                 y += data[i+1]
1436                                 i += 2
1437                 elif cmd in ['S','Q']:
1438                         newCmd = cmd.lower()
1439                         newData = []
1440                         while i < len(data):
1441                                 newData.append( data[i] - x )
1442                                 newData.append( data[i+1] - y )
1443                                 newData.append( data[i+2] - x )
1444                                 newData.append( data[i+3] - y )
1445                                 x = data[i+2]
1446                                 y = data[i+3]
1447                                 i += 4
1448                 elif cmd in ['s','q']:
1449                         while i < len(data):
1450                                 x += data[i+2]
1451                                 y += data[i+3]
1452                                 i += 4
1453                 elif cmd == 'C':
1454                         newCmd = 'c'
1455                         newData = []
1456                         while i < len(data):
1457                                 newData.append( data[i] - x )
1458                                 newData.append( data[i+1] - y )
1459                                 newData.append( data[i+2] - x )
1460                                 newData.append( data[i+3] - y )
1461                                 newData.append( data[i+4] - x )
1462                                 newData.append( data[i+5] - y )
1463                                 x = data[i+4]
1464                                 y = data[i+5]
1465                                 i += 6
1466                 elif cmd == 'c':
1467                         while i < len(data):
1468                                 x += data[i+4]
1469                                 y += data[i+5]
1470                                 i += 6
1471                 elif cmd in ['z','Z']:
1472                         x = startx
1473                         y = starty
1474                         newCmd = 'z'
1475                 newPath.append( (newCmd, newData) )
1476         path = newPath
1477
1478         # remove empty segments
1479         newPath = [path[0]]
1480         for (cmd,data) in path[1:]:
1481                 if cmd in ['m','l','t']:
1482                         newData = []
1483                         i = 0
1484                         while i < len(data):
1485                                 if data[i] != 0 or data[i+1] != 0:
1486                                         newData.append(data[i])
1487                                         newData.append(data[i+1])
1488                                 else:
1489                                         numPathSegmentsReduced += 1
1490                                 i += 2
1491                         if newData:
1492                                 newPath.append( (cmd,newData) )
1493                 elif cmd == 'c':
1494                         newData = []
1495                         i = 0
1496                         while i < len(data):
1497                                 if data[i+4] != 0 or data[i+5] != 0:
1498                                         newData.append(data[i])
1499                                         newData.append(data[i+1])
1500                                         newData.append(data[i+2])
1501                                         newData.append(data[i+3])
1502                                         newData.append(data[i+4])
1503                                         newData.append(data[i+5])
1504                                 else:
1505                                         numPathSegmentsReduced += 1
1506                                 i += 6
1507                         if newData:
1508                                 newPath.append( (cmd,newData) )
1509                 elif cmd == 'a':
1510                         newData = []
1511                         i = 0
1512                         while i < len(data):
1513                                 if data[i+5] != 0 or data[i+6] != 0:
1514                                         newData.append(data[i])
1515                                         newData.append(data[i+1])
1516                                         newData.append(data[i+2])
1517                                         newData.append(data[i+3])
1518                                         newData.append(data[i+4])
1519                                         newData.append(data[i+5])
1520                                         newData.append(data[i+6])
1521                                 else:
1522                                         numPathSegmentsReduced += 1
1523                                 i += 7
1524                         if newData:
1525                                 newPath.append( (cmd,newData) )
1526                 elif cmd == 'q':
1527                         newData = []
1528                         i = 0
1529                         while i < len(data):
1530                                 if data[i+2] != 0 or data[i+3] != 0:
1531                                         newData.append(data[i])
1532                                         newData.append(data[i+1])
1533                                         newData.append(data[i+2])
1534                                         newData.append(data[i+3])
1535                                 else:
1536                                         numPathSegmentsReduced += 1
1537                                 i += 4
1538                         if newData:
1539                                 newPath.append( (cmd,newData) )
1540                 elif cmd in ['h','v']:
1541                         newData = []
1542                         i = 0
1543                         while i < len(data):
1544                                 if data[i] != 0:
1545                                         newData.append(data[i])
1546                                 else:
1547                                         numPathSegmentsReduced += 1
1548                                 i += 1
1549                         if newData:
1550                                 newPath.append( (cmd,newData) )
1551                 else:
1552                         newPath.append( (cmd,data) )
1553         path = newPath
1554
1555         # convert straight curves into lines
1556         newPath = [path[0]]
1557         for (cmd,data) in path[1:]:
1558                 i = 0
1559                 newData = data
1560                 if cmd == 'c':
1561                         newData = []
1562                         while i < len(data):
1563                                 # since all commands are now relative, we can think of previous point as (0,0)
1564                                 # and new point (dx,dy) is (data[i+4],data[i+5])
1565                                 # eqn of line will be y = (dy/dx)*x or if dx=0 then eqn of line is x=0
1566                                 (p1x,p1y) = (data[i],data[i+1])
1567                                 (p2x,p2y) = (data[i+2],data[i+3])
1568                                 dx = data[i+4]
1569                                 dy = data[i+5]
1570
1571                                 foundStraightCurve = False
1572
1573                                 if dx == 0:
1574                                         if p1x == 0 and p2x == 0:
1575                                                 foundStraightCurve = True
1576                                 else:
1577                                         m = dy/dx
1578                                         if p1y == m*p1x and p2y == m*p2y:
1579                                                 foundStraightCurve = True
1580
1581                                 if foundStraightCurve:
1582                                         # flush any existing curve coords first
1583                                         if newData:
1584                                                 newPath.append( (cmd,newData) )
1585                                                 newData = []
1586                                         # now create a straight line segment
1587                                         newPath.append( ('l', [dx,dy]) )
1588                                         numCurvesStraightened += 1
1589                                 else:
1590                                         newData.append(data[i])
1591                                         newData.append(data[i+1])
1592                                         newData.append(data[i+2])
1593                                         newData.append(data[i+3])
1594                                         newData.append(data[i+4])
1595                                         newData.append(data[i+5])
1596
1597                                 i += 6
1598                 if newData or cmd == 'z' or cmd == 'Z':
1599                         newPath.append( (cmd,newData) )
1600         path = newPath
1601
1602         # collapse all consecutive commands of the same type into one command
1603         prevCmd = ''
1604         prevData = []
1605         newPath = [path[0]]
1606         for (cmd,data) in path[1:]:
1607                 # flush the previous command if it is not the same type as the current command
1608                 if prevCmd != '':
1609                         if cmd != prevCmd:
1610                                 newPath.append( (prevCmd, prevData) )
1611                                 prevCmd = ''
1612                                 prevData = []
1613
1614                 # if the previous and current commands are the same type, collapse
1615                 if cmd == prevCmd:
1616                         for coord in data:
1617                                 prevData.append(coord)
1618
1619                 # save last command and data
1620                 else:
1621                         prevCmd = cmd
1622                         prevData = data
1623         # flush last command and data
1624         if prevCmd != '':
1625                 newPath.append( (prevCmd, prevData) )
1626         path = newPath
1627
1628         # convert to shorthand path segments where possible
1629         newPath = [path[0]]
1630         for (cmd,data) in path[1:]:
1631                 # convert line segments into h,v where possible
1632                 if cmd == 'l':
1633                         i = 0
1634                         lineTuples = []
1635                         while i < len(data):
1636                                 if data[i] == 0:
1637                                         # vertical
1638                                         if lineTuples:
1639                                                 # flush the existing line command
1640                                                 newPath.append( ('l', lineTuples) )
1641                                                 lineTuples = []
1642                                         # append the v and then the remaining line coords
1643                                         newPath.append( ('v', [data[i+1]]) )
1644                                         numPathSegmentsReduced += 1
1645                                 elif data[i+1] == 0:
1646                                         if lineTuples:
1647                                                 # flush the line command, then append the h and then the remaining line coords
1648                                                 newPath.append( ('l', lineTuples) )
1649                                                 lineTuples = []
1650                                         newPath.append( ('h', [data[i]]) )
1651                                         numPathSegmentsReduced += 1
1652                                 else:
1653                                         lineTuples.append(data[i])
1654                                         lineTuples.append(data[i+1])
1655                                 i += 2
1656                         if lineTuples:
1657                                 newPath.append( ('l', lineTuples) )
1658                 # convert Bézier curve segments into s where possible
1659                 elif cmd == 'c':
1660                         bez_ctl_pt = (0,0)
1661                         i = 0
1662                         curveTuples = []
1663                         while i < len(data):
1664                                 # rotate by 180deg means negate both coordinates
1665                                 # if the previous control point is equal then we can substitute a
1666                                 # shorthand bezier command
1667                                 if bez_ctl_pt[0] == data[i] and bez_ctl_pt[1] == data[i+1]:
1668                                         if curveTuples:
1669                                                 newPath.append( ('c', curveTuples) )
1670                                                 curveTuples = []
1671                                         # append the s command
1672                                         newPath.append( ('s', [data[i+2], data[i+3], data[i+4], data[i+5]]) )
1673                                         numPathSegmentsReduced += 1
1674                                 else:
1675                                         j = 0
1676                                         while j <= 5:
1677                                                 curveTuples.append(data[i+j])
1678                                                 j += 1
1679
1680                                 # set up control point for next curve segment
1681                                 bez_ctl_pt = (data[i+4]-data[i+2], data[i+5]-data[i+3])
1682                                 i += 6
1683
1684                         if curveTuples:
1685                                 newPath.append( ('c', curveTuples) )
1686                 # convert quadratic curve segments into t where possible
1687                 elif cmd == 'q':
1688                         quad_ctl_pt = (0,0)
1689                         i = 0
1690                         curveTuples = []
1691                         while i < len(data):
1692                                 if quad_ctl_pt[0] == data[i] and quad_ctl_pt[1] == data[i+1]:
1693                                         if curveTuples:
1694                                                 newPath.append( ('q', curveTuples) )
1695                                                 curveTuples = []
1696                                         # append the t command
1697                                         newPath.append( ('t', [data[i+2], data[i+3]]) )
1698                                         numPathSegmentsReduced += 1
1699                                 else:
1700                                         j = 0;
1701                                         while j <= 3:
1702                                                 curveTuples.append(data[i+j])
1703                                                 j += 1
1704
1705                                 quad_ctl_pt = (data[i+2]-data[i], data[i+3]-data[i+1])
1706                                 i += 4
1707
1708                         if curveTuples:
1709                                 newPath.append( ('q', curveTuples) )
1710                 else:
1711                         newPath.append( (cmd, data) )
1712         path = newPath
1713
1714         # for each h or v, collapse unnecessary coordinates that run in the same direction
1715         # i.e. "h-100-100" becomes "h-200" but "h300-100" does not change
1716         newPath = [path[0]]
1717         for (cmd,data) in path[1:]:
1718                 if cmd in ['h','v'] and len(data) > 1:
1719                         newData = []
1720                         prevCoord = data[0]
1721                         for coord in data[1:]:
1722                                 if isSameSign(prevCoord, coord):
1723                                         prevCoord += coord
1724                                         numPathSegmentsReduced += 1
1725                                 else:
1726                                         newData.append(prevCoord)
1727                                         prevCoord = coord
1728                         newData.append(prevCoord)
1729                         newPath.append( (cmd, newData) )
1730                 else:
1731                         newPath.append( (cmd, data) )
1732         path = newPath
1733
1734         # it is possible that we have consecutive h, v, c, t commands now
1735         # so again collapse all consecutive commands of the same type into one command
1736         prevCmd = ''
1737         prevData = []
1738         newPath = [path[0]]
1739         for (cmd,data) in path[1:]:
1740                 # flush the previous command if it is not the same type as the current command
1741                 if prevCmd != '':
1742                         if cmd != prevCmd:
1743                                 newPath.append( (prevCmd, prevData) )
1744                                 prevCmd = ''
1745                                 prevData = []
1746
1747                 # if the previous and current commands are the same type, collapse
1748                 if cmd == prevCmd:
1749                         for coord in data:
1750                                 prevData.append(coord)
1751
1752                 # save last command and data
1753                 else:
1754                         prevCmd = cmd
1755                         prevData = data
1756         # flush last command and data
1757         if prevCmd != '':
1758                 newPath.append( (prevCmd, prevData) )
1759         path = newPath
1760
1761         newPathStr = serializePath(path)
1762         numBytesSavedInPathData += ( len(oldPathStr) - len(newPathStr) )
1763         element.setAttribute('d', newPathStr)
1764
1765 def parseListOfPoints(s):
1766         """
1767                 Parse string into a list of points.
1768
1769                 Returns a list of containing an even number of coordinate strings
1770         """
1771         # (wsp)? comma-or-wsp-separated coordinate pairs (wsp)?
1772         # coordinate-pair = coordinate comma-or-wsp coordinate
1773         # coordinate = sign? integer
1774         nums = re.split("\\s*\\,?\\s*", s.strip())
1775         i = 0
1776         points = []
1777         while i < len(nums):
1778                 x = SVGLength(nums[i])
1779                 # if we had an odd number of points, return empty
1780                 if i == len(nums)-1: return []
1781                 else: y = SVGLength(nums[i+1])
1782
1783                 # if the coordinates were not unitless, return empty
1784                 if x.units != Unit.NONE or y.units != Unit.NONE: return []
1785                 points.append( str(x.value) )
1786                 points.append( str(y.value) )
1787                 i += 2
1788
1789         return points
1790
1791 def cleanPolygon(elem):
1792         """
1793                 Remove unnecessary closing point of polygon points attribute
1794         """
1795         global numPointsRemovedFromPolygon
1796
1797         pts = parseListOfPoints(elem.getAttribute('points'))
1798         N = len(pts)/2
1799         if N >= 2:
1800                 (startx,starty) = (pts[0],pts[0])
1801                 (endx,endy) = (pts[len(pts)-2],pts[len(pts)-1])
1802                 if startx == endx and starty == endy:
1803                         pts = pts[:-2]
1804                         numPointsRemovedFromPolygon += 1
1805         elem.setAttribute('points', scourCoordinates(pts))
1806
1807 def cleanPolyline(elem):
1808         """
1809                 Scour the polyline points attribute
1810         """
1811         pts = parseListOfPoints(elem.getAttribute('points'))
1812         elem.setAttribute('points', scourCoordinates(pts))
1813
1814 def serializePath(pathObj):
1815         """
1816                 Reserializes the path data with some cleanups.
1817         """
1818         pathStr = ""
1819         for (cmd,data) in pathObj:
1820                 pathStr += cmd
1821                 # elliptical arc commands must have comma/wsp separating the coordinates
1822                 # this fixes an issue outlined in Fix https://bugs.launchpad.net/scour/+bug/412754
1823                 pathStr += scourCoordinates(data, (cmd == 'a'))
1824         return pathStr
1825
1826 def scourCoordinates(data, forceCommaWsp = False):
1827         """
1828                 Serializes coordinate data with some cleanups:
1829                         - removes all trailing zeros after the decimal
1830                         - integerize coordinates if possible
1831                         - removes extraneous whitespace
1832                         - adds commas between values in a subcommand if required (or if forceCommaWsp is True)
1833         """
1834         coordsStr = ""
1835         if data != None:
1836                 c = 0
1837                 for coord in data:
1838                         # add the scoured coordinate to the path string
1839                         coordsStr += scourLength(coord)
1840
1841                         # only need the comma if the next number is non-negative or if forceCommaWsp is True
1842                         if c < len(data)-1 and (forceCommaWsp or Decimal(data[c+1]) >= 0):
1843                                 coordsStr += ','
1844                         c += 1
1845         return coordsStr
1846
1847 def scourLength(str):
1848         length = SVGLength(str)
1849         coord = length.value
1850
1851         # reduce to the proper number of digits
1852         coord = Decimal(unicode(coord)) * Decimal(1)
1853
1854         # integerize if we can
1855         if int(coord) == coord: coord = Decimal(unicode(int(coord)))
1856
1857         # Decimal.trim() is available in Python 2.6+ to trim trailing zeros
1858         try:
1859                 coord = coord.trim()
1860         except AttributeError:
1861                 # trim it ourselves
1862                 s = unicode(coord)
1863                 dec = s.find('.')
1864                 if dec != -1:
1865                         while s[-1] == '0':
1866                                 s = s[:-1]
1867                 coord = Decimal(s)
1868
1869                 # Decimal.normalize() will uses scientific notation - if that
1870                 # string is smaller, then use it
1871                 normd = coord.normalize()
1872                 if len(unicode(normd)) < len(unicode(coord)):
1873                         coord = normd
1874
1875         return unicode(coord)+Unit.str(length.units)
1876
1877 def embedRasters(element, options) :
1878         """
1879                 Converts raster references to inline images.
1880                 NOTE: there are size limits to base64-encoding handling in browsers
1881         """
1882         global numRastersEmbedded
1883
1884         href = element.getAttributeNS(NS['XLINK'],'href')
1885
1886         # if xlink:href is set, then grab the id
1887         if href != '' and len(href) > 1:
1888                 # find if href value has filename ext
1889                 ext = os.path.splitext(os.path.basename(href))[1].lower()[1:]
1890
1891                 # look for 'png', 'jpg', and 'gif' extensions
1892                 if ext == 'png' or ext == 'jpg' or ext == 'gif':
1893
1894                         # check if href resolves to an existing file
1895                         if os.path.isfile(href) == False :
1896                                 if href[:7] != 'http://' and os.path.isfile(href) == False :
1897                                                 # if this is not an absolute path, set path relative
1898                                                 # to script file based on input arg
1899                                                 infilename = '.'
1900                                                 if options.infilename: infilename = options.infilename
1901                                                 href = os.path.join(os.path.dirname(infilename), href)
1902
1903                         rasterdata = ''
1904                         # test if file exists locally
1905                         if os.path.isfile(href) == True :
1906                                 # open raster file as raw binary
1907                                 raster = open( href, "rb")
1908                                 rasterdata = raster.read()
1909
1910                         elif href[:7] == 'http://':
1911                                 # raster = open( href, "rb")
1912                                 webFile = urllib.urlopen( href )
1913                                 rasterdata = webFile.read()
1914                                 webFile.close()
1915
1916                         # ... should we remove all images which don't resolve?
1917                         if rasterdata != '' :
1918                                 # base64-encode raster
1919                                 b64eRaster = base64.b64encode( rasterdata )
1920
1921                                 # set href attribute to base64-encoded equivalent
1922                                 if b64eRaster != '':
1923                                         # PNG and GIF both have MIME Type 'image/[ext]', but
1924                                         # JPEG has MIME Type 'image/jpeg'
1925                                         if ext == 'jpg':
1926                                                 ext = 'jpeg'
1927
1928                                         element.setAttributeNS(NS['XLINK'], 'href', 'data:image/' + ext + ';base64,' + b64eRaster)
1929                                         numRastersEmbedded += 1
1930                                         del b64eRaster
1931
1932 def properlySizeDoc(docElement):
1933         # get doc width and height
1934         w = SVGLength(docElement.getAttribute('width'))
1935         h = SVGLength(docElement.getAttribute('height'))
1936
1937         # if width/height are not unitless or px then it is not ok to rewrite them into a viewBox
1938         if ((w.units != Unit.NONE and w.units != Unit.PX) or
1939                 (w.units != Unit.NONE and w.units != Unit.PX)):
1940             return
1941
1942         # else we have a statically sized image and we should try to remedy that
1943
1944         # parse viewBox attribute
1945         vbSep = re.split("\\s*\\,?\\s*", docElement.getAttribute('viewBox'), 3)
1946         # if we have a valid viewBox we need to check it
1947         vbWidth,vbHeight = 0,0
1948         if len(vbSep) == 4:
1949                 try:
1950                         # if x or y are specified and non-zero then it is not ok to overwrite it
1951                         vbX = float(vbSep[0])
1952                         vbY = float(vbSep[1])
1953                         if vbX != 0 or vbY != 0:
1954                                 return
1955
1956                         # if width or height are not equal to doc width/height then it is not ok to overwrite it
1957                         vbWidth = float(vbSep[2])
1958                         vbHeight = float(vbSep[3])
1959                         if vbWidth != w.value or vbHeight != h.value:
1960                                 return
1961                 # if the viewBox did not parse properly it is invalid and ok to overwrite it
1962                 except ValueError:
1963                         pass
1964
1965         # at this point it's safe to set the viewBox and remove width/height
1966         docElement.setAttribute('viewBox', '0 0 %s %s' % (w.value, h.value))
1967         docElement.removeAttribute('width')
1968         docElement.removeAttribute('height')
1969
1970 def remapNamespacePrefix(node, oldprefix, newprefix):
1971         if node == None or node.nodeType != 1: return
1972
1973         if node.prefix == oldprefix:
1974                 localName = node.localName
1975                 namespace = node.namespaceURI
1976                 doc = node.ownerDocument
1977                 parent = node.parentNode
1978
1979                 # create a replacement node
1980                 newNode = None
1981                 if newprefix != '':
1982                         newNode = doc.createElementNS(namespace, newprefix+":"+localName)
1983                 else:
1984                         newNode = doc.createElement(localName);
1985
1986                 # add all the attributes
1987                 attrList = node.attributes
1988                 for i in range(attrList.length):
1989                         attr = attrList.item(i)
1990                         newNode.setAttributeNS( attr.namespaceURI, attr.localName, attr.nodeValue)
1991
1992                 # clone and add all the child nodes
1993                 for child in node.childNodes:
1994                         newNode.appendChild(child.cloneNode(True))
1995
1996                 # replace old node with new node
1997                 parent.replaceChild( newNode, node )
1998                 # set the node to the new node in the remapped namespace prefix
1999                 node = newNode
2000
2001         # now do all child nodes
2002         for child in node.childNodes :
2003                 remapNamespacePrefix(child, oldprefix, newprefix)
2004
2005 def makeWellFormed(str):
2006         newstr = str
2007
2008         # encode & as &amp; ( must do this first so that &lt; does not become &amp;lt; )
2009         if str.find('&') != -1:
2010                 newstr = str.replace('&', '&amp;')
2011
2012         # encode < as &lt;
2013         if str.find("<") != -1:
2014                 newstr = str.replace('<', '&lt;')
2015
2016         # encode > as &gt; (TODO: is this necessary?)
2017         if str.find('>') != -1:
2018                 newstr = str.replace('>', '&gt;')
2019
2020         return newstr
2021
2022 # hand-rolled serialization function that has the following benefits:
2023 # - pretty printing
2024 # - somewhat judicious use of whitespace
2025 # - ensure id attributes are first
2026 def serializeXML(element, options, ind = 0):
2027         indent = ind
2028         I=''
2029         if options.indent_type == 'tab': I='\t'
2030         elif options.indent_type == 'space': I=' '
2031
2032         outString = (I * ind) + '<' + element.nodeName
2033
2034         # always serialize the id or xml:id attributes first
2035         if element.getAttribute('id') != '':
2036                 id = element.getAttribute('id')
2037                 quot = '"'
2038                 if id.find('"') != -1:
2039                         quot = "'"
2040                 outString += ' ' + 'id=' + quot + id + quot
2041         if element.getAttribute('xml:id') != '':
2042                 id = element.getAttribute('xml:id')
2043                 quot = '"'
2044                 if id.find('"') != -1:
2045                         quot = "'"
2046                 outString += ' ' + 'xml:id=' + quot + id + quot
2047
2048         # now serialize the other attributes
2049         attrList = element.attributes
2050         for num in range(attrList.length) :
2051                 attr = attrList.item(num)
2052                 if attr.nodeName == 'id' or attr.nodeName == 'xml:id': continue
2053                 # if the attribute value contains a double-quote, use single-quotes
2054                 quot = '"'
2055                 if attr.nodeValue.find('"') != -1:
2056                         quot = "'"
2057
2058                 attrValue = makeWellFormed( attr.nodeValue )
2059
2060                 outString += ' '
2061                 # preserve xmlns: if it is a namespace prefix declaration
2062                 if attr.namespaceURI == 'http://www.w3.org/2000/xmlns/' and attr.nodeName.find('xmlns') == -1:
2063                         outString += 'xmlns:'
2064                 outString += attr.nodeName + '=' + quot + attrValue + quot
2065
2066         # if no children, self-close
2067         children = element.childNodes
2068         if children.length > 0:
2069                 outString += '>'
2070
2071                 onNewLine = False
2072                 for child in element.childNodes:
2073                         # element node
2074                         if child.nodeType == 1:
2075                                 outString += '\n' + serializeXML(child, options, indent + 1)
2076                                 onNewLine = True
2077                         # text node
2078                         elif child.nodeType == 3:
2079                                 # trim it only in the case of not being a child of an element
2080                                 # where whitespace might be important
2081                                 if element.nodeName in ["text", "tspan", "textPath", "tref", "title", "desc", "textArea"]:
2082                                         outString += makeWellFormed(child.nodeValue)
2083                                 else:
2084                                         outString += makeWellFormed(child.nodeValue.strip())
2085                         # CDATA node
2086                         elif child.nodeType == 4:
2087                                 outString += '<![CDATA[' + child.nodeValue + ']]>'
2088                         # Comment node
2089                         elif child.nodeType == 8:
2090                                 outString += '<!--' + child.nodeValue + '-->'
2091                         # TODO: entities, processing instructions, what else?
2092                         else: # ignore the rest
2093                                 pass
2094
2095                 if onNewLine: outString += (I * ind)
2096                 outString += '</' + element.nodeName + '>'
2097                 if indent > 0: outString += '\n'
2098         else:
2099                 outString += '/>'
2100                 if indent > 0: outString += '\n'
2101
2102         return outString
2103
2104 # this is the main method
2105 # input is a string representation of the input XML
2106 # returns a string representation of the output XML
2107 def scourString(in_string, options=None):
2108         if options is None:
2109                 options = _options_parser.get_default_values()
2110         getcontext().prec = options.digits
2111         global numAttrsRemoved
2112         global numStylePropsFixed
2113         global numElemsRemoved
2114         global numBytesSavedInColors
2115         doc = xml.dom.minidom.parseString(in_string)
2116
2117         # for whatever reason this does not always remove all inkscape/sodipodi attributes/elements
2118         # on the first pass, so we do it multiple times
2119         # does it have to do with removal of children affecting the childlist?
2120         if options.keep_editor_data == False:
2121                 while removeNamespacedElements( doc.documentElement, unwanted_ns ) > 0 :
2122                         pass
2123                 while removeNamespacedAttributes( doc.documentElement, unwanted_ns ) > 0 :
2124                         pass
2125
2126                 # remove the xmlns: declarations now
2127                 xmlnsDeclsToRemove = []
2128                 attrList = doc.documentElement.attributes
2129                 for num in range(attrList.length) :
2130                         if attrList.item(num).nodeValue in unwanted_ns :
2131                                 xmlnsDeclsToRemove.append(attrList.item(num).nodeName)
2132
2133                 for attr in xmlnsDeclsToRemove :
2134                         doc.documentElement.removeAttribute(attr)
2135                         numAttrsRemoved += 1
2136
2137         # ensure namespace for SVG is declared
2138         # TODO: what if the default namespace is something else (i.e. some valid namespace)?
2139         if doc.documentElement.getAttribute('xmlns') != 'http://www.w3.org/2000/svg':
2140                 doc.documentElement.setAttribute('xmlns', 'http://www.w3.org/2000/svg')
2141                 # TODO: throw error or warning?
2142
2143         # check for redundant SVG namespace declaration
2144         attrList = doc.documentElement.attributes
2145         xmlnsDeclsToRemove = []
2146         redundantPrefixes = []
2147         for i in range(attrList.length):
2148                 attr = attrList.item(i)
2149                 name = attr.nodeName
2150                 val = attr.nodeValue
2151                 if name[0:6] == 'xmlns:' and val == 'http://www.w3.org/2000/svg':
2152                         redundantPrefixes.append(name[6:])
2153                         xmlnsDeclsToRemove.append(name)
2154
2155         for attrName in xmlnsDeclsToRemove:
2156                 doc.documentElement.removeAttribute(attrName)
2157
2158         for prefix in redundantPrefixes:
2159                 remapNamespacePrefix(doc.documentElement, prefix, '')
2160
2161         # repair style (remove unnecessary style properties and change them into XML attributes)
2162         numStylePropsFixed = repairStyle(doc.documentElement, options)
2163
2164         # convert colors to #RRGGBB format
2165         if options.simple_colors:
2166                 numBytesSavedInColors = convertColors(doc.documentElement)
2167
2168         # remove empty defs, metadata, g
2169         # NOTE: these elements will be removed even if they have (invalid) text nodes
2170         elemsToRemove = []
2171         for tag in ['defs', 'metadata', 'g'] :
2172                 for elem in doc.documentElement.getElementsByTagName(tag) :
2173                         removeElem = not elem.hasChildNodes()
2174                         if removeElem == False :
2175                                 for child in elem.childNodes :
2176                                         if child.nodeType in [1, 3, 4, 8] :
2177                                                 break
2178                                 else:
2179                                         removeElem = True
2180                         if removeElem :
2181                                 elem.parentNode.removeChild(elem)
2182                                 numElemsRemoved += 1
2183
2184         # remove unreferenced gradients/patterns outside of defs
2185         while removeUnreferencedElements(doc) > 0:
2186                 pass
2187
2188         if options.strip_ids:
2189                 bContinueLooping = True
2190                 while bContinueLooping:
2191                         identifiedElements = findElementsWithId(doc.documentElement)
2192                         referencedIDs = findReferencedElements(doc.documentElement)
2193                         bContinueLooping = (removeUnreferencedIDs(referencedIDs, identifiedElements) > 0)
2194
2195         if options.group_collapse:
2196                 while removeNestedGroups(doc.documentElement) > 0:
2197                         pass
2198
2199         # move common attributes to parent group
2200         numAttrsRemoved += moveCommonAttributesToParentGroup(doc.documentElement)
2201
2202         # remove unused attributes from parent
2203         numAttrsRemoved += removeUnusedAttributesOnParent(doc.documentElement)
2204
2205         while removeDuplicateGradientStops(doc) > 0:
2206                 pass
2207
2208         # remove gradients that are only referenced by one other gradient
2209         while collapseSinglyReferencedGradients(doc) > 0:
2210                 pass
2211
2212         # remove duplicate gradients
2213         while removeDuplicateGradients(doc) > 0:
2214                 pass
2215
2216         # clean path data
2217         for elem in doc.documentElement.getElementsByTagName('path') :
2218                 if elem.getAttribute('d') == '':
2219                         elem.parentNode.removeChild(elem)
2220                 else:
2221                         cleanPath(elem)
2222
2223         # remove unnecessary closing point of polygons and scour points
2224         for polygon in doc.documentElement.getElementsByTagName('polygon') :
2225                 cleanPolygon(polygon)
2226
2227         # scour points of polyline
2228         for polyline in doc.documentElement.getElementsByTagName('polyline') :
2229                 cleanPolygon(polyline)
2230
2231         # scour lengths (including coordinates)
2232         for type in ['svg', 'image', 'rect', 'circle', 'ellipse', 'line', 'linearGradient', 'radialGradient', 'stop']:
2233                 for elem in doc.getElementsByTagName(type):
2234                         for attr in ['x', 'y', 'width', 'height', 'cx', 'cy', 'r', 'rx', 'ry',
2235                                                 'x1', 'y1', 'x2', 'y2', 'fx', 'fy', 'offset', 'opacity',
2236                                                 'fill-opacity', 'stroke-opacity', 'stroke-width', 'stroke-miterlimit']:
2237                                 if elem.getAttribute(attr) != '':
2238                                         elem.setAttribute(attr, scourLength(elem.getAttribute(attr)))
2239
2240         # remove default values of attributes
2241         numAttrsRemoved += removeDefaultAttributeValues(doc.documentElement, options)
2242
2243         # convert rasters references to base64-encoded strings
2244         if options.embed_rasters:
2245                 for elem in doc.documentElement.getElementsByTagName('image') :
2246                         embedRasters(elem, options)
2247
2248         # properly size the SVG document (ideally width/height should be 100% with a viewBox)
2249         properlySizeDoc(doc.documentElement)
2250
2251         # output the document as a pretty string with a single space for indent
2252         # NOTE: removed pretty printing because of this problem:
2253         # http://ronrothman.com/public/leftbraned/xml-dom-minidom-toprettyxml-and-silly-whitespace/
2254         # rolled our own serialize function here to save on space, put id first, customize indentation, etc
2255 #       out_string = doc.documentElement.toprettyxml(' ')
2256         out_string = serializeXML(doc.documentElement, options)
2257
2258         # now strip out empty lines
2259         lines = []
2260         # Get rid of empty lines
2261         for line in out_string.splitlines(True):
2262                 if line.strip():
2263                         lines.append(line)
2264
2265         # return the string stripped of empty lines
2266         if options.strip_xml_prolog == False:
2267                 xmlprolog = '<?xml version="1.0" encoding="UTF-8" standalone="no"?>\n'
2268         else:
2269                 xmlprolog = ""
2270
2271         return xmlprolog + "".join(lines)
2272
2273 # used mostly by unit tests
2274 # input is a filename
2275 # returns the minidom doc representation of the SVG
2276 def scourXmlFile(filename, options=None):
2277         in_string = open(filename).read()
2278         out_string = scourString(in_string, options)
2279         return xml.dom.minidom.parseString(out_string.encode('utf-8'))
2280
2281 # GZ: Seems most other commandline tools don't do this, is it really wanted?
2282 class HeaderedFormatter(optparse.IndentedHelpFormatter):
2283         """
2284                 Show application name, version number, and copyright statement
2285                 above usage information.
2286         """
2287         def format_usage(self, usage):
2288                 return "%s %s\n%s\n%s" % (APP, VER, COPYRIGHT,
2289                         optparse.IndentedHelpFormatter.format_usage(self, usage))
2290
2291 # GZ: would prefer this to be in a function or class scope, but tests etc need
2292 #     access to the defaults anyway
2293 _options_parser = optparse.OptionParser(
2294         usage="%prog [-i input.svg] [-o output.svg] [OPTIONS]",
2295         description=("If the input/output files are specified with a svgz"
2296         " extension, then compressed SVG is assumed. If the input file is not"
2297         " specified, stdin is used. If the output file is not specified, "
2298         " stdout is used."),
2299         formatter=HeaderedFormatter(max_help_position=30),
2300         version=VER)
2301
2302 _options_parser.add_option("--disable-simplify-colors",
2303         action="store_false", dest="simple_colors", default=True,
2304         help="won't convert all colors to #RRGGBB format")
2305 _options_parser.add_option("--disable-style-to-xml",
2306         action="store_false", dest="style_to_xml", default=True,
2307         help="won't convert styles into XML attributes")
2308 _options_parser.add_option("--disable-group-collapsing",
2309         action="store_false", dest="group_collapse", default=True,
2310         help="won't collapse <g> elements")
2311 _options_parser.add_option("--enable-id-stripping",
2312         action="store_true", dest="strip_ids", default=False,
2313         help="remove all un-referenced ID attributes")
2314 _options_parser.add_option("--disable-embed-rasters",
2315         action="store_false", dest="embed_rasters", default=True,
2316         help="won't embed rasters as base64-encoded data")
2317 _options_parser.add_option("--keep-editor-data",
2318         action="store_true", dest="keep_editor_data", default=False,
2319         help="won't remove Inkscape, Sodipodi or Adobe Illustrator elements and attributes")
2320 _options_parser.add_option("--strip-xml-prolog",
2321         action="store_true", dest="strip_xml_prolog", default=False,
2322         help="won't output the <?xml ?> prolog")
2323
2324 # GZ: this is confusing, most people will be thinking in terms of
2325 #     decimal places, which is not what decimal precision is doing
2326 _options_parser.add_option("-p", "--set-precision",
2327         action="store", type=int, dest="digits", default=5,
2328         help="set number of significant digits (default: %default)")
2329 _options_parser.add_option("-i",
2330         action="store", dest="infilename", help=optparse.SUPPRESS_HELP)
2331 _options_parser.add_option("-o",
2332         action="store", dest="outfilename", help=optparse.SUPPRESS_HELP)
2333 _options_parser.add_option("--indent",
2334         action="store", type="string", dest="indent_type", default="space",
2335         help="indentation of the output: none, space, tab (default: %default)")
2336
2337 def maybe_gziped_file(filename, mode="r"):
2338         if os.path.splitext(filename)[1].lower() in (".svgz", ".gz"):
2339                 return gzip.GzipFile(filename, mode)
2340         return file(filename, mode)
2341
2342 def parse_args(args=None):
2343         options, rargs = _options_parser.parse_args(args)
2344
2345         if rargs:
2346                 _options_parser.error("Additional arguments not handled: %r, see --help" % rargs)
2347         if options.digits < 0:
2348                 _options_parser.error("Can't have negative significant digits, see --help")
2349         if not options.indent_type in ["tab", "space", "none"]:
2350                 _options_parser.error("Invalid value for --indent, see --help")
2351
2352         if options.infilename:
2353                 infile = maybe_gziped_file(options.infilename)
2354                 # GZ: could catch a raised IOError here and report
2355         else:
2356                 # GZ: could sniff for gzip compression here
2357                 infile = sys.stdin
2358         if options.outfilename:
2359                 outfile = maybe_gziped_file(options.outfilename, "w")
2360         else:
2361                 outfile = sys.stdout
2362
2363         return options, [infile, outfile]
2364
2365 def getReport():
2366         return ' Number of elements removed: ' + str(numElemsRemoved) + \
2367                 '\n Number of attributes removed: ' + str(numAttrsRemoved) + \
2368                 '\n Number of unreferenced id attributes removed: ' + str(numIDsRemoved) + \
2369                 '\n Number of style properties fixed: ' + str(numStylePropsFixed) + \
2370                 '\n Number of raster images embedded inline: ' + str(numRastersEmbedded) + \
2371                 '\n Number of path segments reduced/removed: ' + str(numPathSegmentsReduced) + \
2372                 '\n Number of bytes saved in path data: ' + str(numBytesSavedInPathData) + \
2373                 '\n Number of bytes saved in colors: ' + str(numBytesSavedInColors) + \
2374                 '\n Number of points removed from polygons: ' + str(numPointsRemovedFromPolygon)
2375
2376 if __name__ == '__main__':
2377         if sys.platform == "win32":
2378                 from time import clock as get_tick
2379         else:
2380                 # GZ: is this different from time.time() in any way?
2381                 def get_tick():
2382                         return os.times()[0]
2383
2384         start = get_tick()
2385
2386         options, (input, output) = parse_args()
2387
2388         print >>sys.stderr, "%s %s\n%s" % (APP, VER, COPYRIGHT)
2389
2390         # do the work
2391         in_string = input.read()
2392         out_string = scourString(in_string, options).encode("UTF-8")
2393         output.write(out_string)
2394
2395         # Close input and output files
2396         input.close()
2397         output.close()
2398
2399         end = get_tick()
2400
2401         # GZ: unless silenced by -q or something?
2402         # GZ: not using globals would be good too
2403         print >>sys.stderr, ' File:', input.name, \
2404                 '\n Time taken:', str(end-start) + 's\n', \
2405                 getReport()
2406
2407         oldsize = len(in_string)
2408         newsize = len(out_string)
2409         sizediff = (newsize / oldsize) * 100
2410         print >>sys.stderr, ' Original file size:', oldsize, 'bytes;', \
2411                 'new file size:', newsize, 'bytes (' + str(sizediff)[:5] + '%)'
2412
2413