share/extensions/scour.py

   1 #!/usr/bin/env python
   2 # -*- coding: utf-8 -*-
   3
   4 #  Scour
   5 #
   6 #  Copyright 2009 Jeff Schiller
   7 #
   8 #  This file is part of Scour, http://www.codedread.com/scour/
   9 #
  10 #   Licensed under the Apache License, Version 2.0 (the "License");
  11 #   you may not use this file except in compliance with the License.
  12 #   You may obtain a copy of the License at
  13 #
  14 #       http://www.apache.org/licenses/LICENSE-2.0
  15 #
  16 #   Unless required by applicable law or agreed to in writing, software
  17 #   distributed under the License is distributed on an "AS IS" BASIS,
  18 #   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  19 #   See the License for the specific language governing permissions and
  20 #   limitations under the License.
  21
  22 # Notes:
  23
  24 # rubys' path-crunching ideas here: http://intertwingly.net/code/svgtidy/spec.rb
  25 # (and implemented here: http://intertwingly.net/code/svgtidy/svgtidy.rb )
  26
  27 # Yet more ideas here: http://wiki.inkscape.org/wiki/index.php/Save_Cleaned_SVG
  28 #
  29 # * Process Transformations
  30 #  * Collapse all group based transformations
  31
  32 # Even more ideas here: http://esw.w3.org/topic/SvgTidy
  33 #  * analysis of path elements to see if rect can be used instead? (must also need to look
  34 #    at rounded corners)
  35 #  * removal of unused attributes in groups:
  36 #    <g fill="blue" ...>
  37 #      <rect fill="red" ... />
  38 #      <rect fill="red" ... />
  39 #      <rect fill="red" ... />
  40 #    </g>
  41 #    in this case, fill="blue" should be removed
  42
  43 # Next Up:
  44 # + analyze all children of a group, if they have common inheritable attributes, then move them to the group
  45 # + scour lengths for *opacity, svg:x,y,width,height, stroke-miterlimit, stroke-width
  46 # - analyze a group and its children, if a group's attribute is not being used by any children
  47 #   (or descendants?) then remove it
  48 # - add an option to remove ids if they match the Inkscape-style of IDs
  49 # - investigate point-reducing algorithms
  50 # - parse transform attribute
  51 # - if a <g> has only one element in it, collapse the <g> (ensure transform, etc are carried down)
  52 # - option to remove metadata
  53 # - prevent elements from being stripped if they are referenced in a <style> element
  54 #   (for instance, filter, marker, pattern) - need a crude CSS parser
  55
  56 # necessary to get true division
  57 from __future__ import division
  58
  59 import os
  60 import sys
  61 import xml.dom.minidom
  62 import re
  63 import math
  64 import base64
  65 import urllib
  66 from svg_regex import svg_parser
  67 import gzip
  68 import optparse
  69
  70 # Python 2.3- did not have Decimal
  71 try:
  72         from decimal import *
  73 except ImportError:
  74         from fixedpoint import *
  75         Decimal = FixedPoint
  76
  77 APP = 'scour'
  78 VER = '0.19'
  79 COPYRIGHT = 'Copyright Jeff Schiller, 2009'
  80
  81 NS = {  'SVG':          'http://www.w3.org/2000/svg',
  82                 'XLINK':        'http://www.w3.org/1999/xlink',
  83                 'SODIPODI': 'http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd',
  84                 'INKSCAPE': 'http://www.inkscape.org/namespaces/inkscape',
  85                 'ADOBE_ILLUSTRATOR': 'http://ns.adobe.com/AdobeIllustrator/10.0/',
  86                 'ADOBE_GRAPHS': 'http://ns.adobe.com/Graphs/1.0/',
  87                 'ADOBE_SVG_VIEWER': 'http://ns.adobe.com/AdobeSVGViewerExtensions/3.0/',
  88                 'ADOBE_VARIABLES': 'http://ns.adobe.com/Variables/1.0/',
  89                 'ADOBE_SFW': 'http://ns.adobe.com/SaveForWeb/1.0/',
  90                 'ADOBE_EXTENSIBILITY': 'http://ns.adobe.com/Extensibility/1.0/',
  91                 'ADOBE_FLOWS': 'http://ns.adobe.com/Flows/1.0/',
  92                 'ADOBE_IMAGE_REPLACEMENT': 'http://ns.adobe.com/ImageReplacement/1.0/',
  93                 'ADOBE_CUSTOM': 'http://ns.adobe.com/GenericCustomNamespace/1.0/',
  94                 'ADOBE_XPATH': 'http://ns.adobe.com/XPath/1.0/'
  95                 }
  96
  97 unwanted_ns = [ NS['SODIPODI'], NS['INKSCAPE'], NS['ADOBE_ILLUSTRATOR'],
  98                                 NS['ADOBE_GRAPHS'], NS['ADOBE_SVG_VIEWER'], NS['ADOBE_VARIABLES'],
  99                                 NS['ADOBE_SFW'], NS['ADOBE_EXTENSIBILITY'], NS['ADOBE_FLOWS'],
 100                                 NS['ADOBE_IMAGE_REPLACEMENT'], NS['ADOBE_CUSTOM'], NS['ADOBE_XPATH'] ]
 101
 102 svgAttributes = [
 103                                 'clip-rule',
 104                                 'display',
 105                                 'fill',
 106                                 'fill-opacity',
 107                                 'fill-rule',
 108                                 'filter',
 109                                 'font-family',
 110                                 'font-size',
 111                                 'font-stretch',
 112                                 'font-style',
 113                                 'font-variant',
 114                                 'font-weight',
 115                                 'line-height',
 116                                 'marker',
 117                                 'opacity',
 118                                 'overflow',
 119                                 'stop-color',
 120                                 'stop-opacity',
 121                                 'stroke',
 122                                 'stroke-dashoffset',
 123                                 'stroke-linecap',
 124                                 'stroke-linejoin',
 125                                 'stroke-miterlimit',
 126                                 'stroke-opacity',
 127                                 'stroke-width',
 128                                 'visibility'
 129                                 ]
 130
 131 colors = {
 132         'aliceblue': 'rgb(240, 248, 255)',
 133         'antiquewhite': 'rgb(250, 235, 215)',
 134         'aqua': 'rgb( 0, 255, 255)',
 135         'aquamarine': 'rgb(127, 255, 212)',
 136         'azure': 'rgb(240, 255, 255)',
 137         'beige': 'rgb(245, 245, 220)',
 138         'bisque': 'rgb(255, 228, 196)',
 139         'black': 'rgb( 0, 0, 0)',
 140         'blanchedalmond': 'rgb(255, 235, 205)',
 141         'blue': 'rgb( 0, 0, 255)',
 142         'blueviolet': 'rgb(138, 43, 226)',
 143         'brown': 'rgb(165, 42, 42)',
 144         'burlywood': 'rgb(222, 184, 135)',
 145         'cadetblue': 'rgb( 95, 158, 160)',
 146         'chartreuse': 'rgb(127, 255, 0)',
 147         'chocolate': 'rgb(210, 105, 30)',
 148         'coral': 'rgb(255, 127, 80)',
 149         'cornflowerblue': 'rgb(100, 149, 237)',
 150         'cornsilk': 'rgb(255, 248, 220)',
 151         'crimson': 'rgb(220, 20, 60)',
 152         'cyan': 'rgb( 0, 255, 255)',
 153         'darkblue': 'rgb( 0, 0, 139)',
 154         'darkcyan': 'rgb( 0, 139, 139)',
 155         'darkgoldenrod': 'rgb(184, 134, 11)',
 156         'darkgray': 'rgb(169, 169, 169)',
 157         'darkgreen': 'rgb( 0, 100, 0)',
 158         'darkgrey': 'rgb(169, 169, 169)',
 159         'darkkhaki': 'rgb(189, 183, 107)',
 160         'darkmagenta': 'rgb(139, 0, 139)',
 161         'darkolivegreen': 'rgb( 85, 107, 47)',
 162         'darkorange': 'rgb(255, 140, 0)',
 163         'darkorchid': 'rgb(153, 50, 204)',
 164         'darkred': 'rgb(139, 0, 0)',
 165         'darksalmon': 'rgb(233, 150, 122)',
 166         'darkseagreen': 'rgb(143, 188, 143)',
 167         'darkslateblue': 'rgb( 72, 61, 139)',
 168         'darkslategray': 'rgb( 47, 79, 79)',
 169         'darkslategrey': 'rgb( 47, 79, 79)',
 170         'darkturquoise': 'rgb( 0, 206, 209)',
 171         'darkviolet': 'rgb(148, 0, 211)',
 172         'deeppink': 'rgb(255, 20, 147)',
 173         'deepskyblue': 'rgb( 0, 191, 255)',
 174         'dimgray': 'rgb(105, 105, 105)',
 175         'dimgrey': 'rgb(105, 105, 105)',
 176         'dodgerblue': 'rgb( 30, 144, 255)',
 177         'firebrick': 'rgb(178, 34, 34)',
 178         'floralwhite': 'rgb(255, 250, 240)',
 179         'forestgreen': 'rgb( 34, 139, 34)',
 180         'fuchsia': 'rgb(255, 0, 255)',
 181         'gainsboro': 'rgb(220, 220, 220)',
 182         'ghostwhite': 'rgb(248, 248, 255)',
 183         'gold': 'rgb(255, 215, 0)',
 184         'goldenrod': 'rgb(218, 165, 32)',
 185         'gray': 'rgb(128, 128, 128)',
 186         'grey': 'rgb(128, 128, 128)',
 187         'green': 'rgb( 0, 128, 0)',
 188         'greenyellow': 'rgb(173, 255, 47)',
 189         'honeydew': 'rgb(240, 255, 240)',
 190         'hotpink': 'rgb(255, 105, 180)',
 191         'indianred': 'rgb(205, 92, 92)',
 192         'indigo': 'rgb( 75, 0, 130)',
 193         'ivory': 'rgb(255, 255, 240)',
 194         'khaki': 'rgb(240, 230, 140)',
 195         'lavender': 'rgb(230, 230, 250)',
 196         'lavenderblush': 'rgb(255, 240, 245)',
 197         'lawngreen': 'rgb(124, 252, 0)',
 198         'lemonchiffon': 'rgb(255, 250, 205)',
 199         'lightblue': 'rgb(173, 216, 230)',
 200         'lightcoral': 'rgb(240, 128, 128)',
 201         'lightcyan': 'rgb(224, 255, 255)',
 202         'lightgoldenrodyellow': 'rgb(250, 250, 210)',
 203         'lightgray': 'rgb(211, 211, 211)',
 204         'lightgreen': 'rgb(144, 238, 144)',
 205         'lightgrey': 'rgb(211, 211, 211)',
 206         'lightpink': 'rgb(255, 182, 193)',
 207         'lightsalmon': 'rgb(255, 160, 122)',
 208         'lightseagreen': 'rgb( 32, 178, 170)',
 209         'lightskyblue': 'rgb(135, 206, 250)',
 210         'lightslategray': 'rgb(119, 136, 153)',
 211         'lightslategrey': 'rgb(119, 136, 153)',
 212         'lightsteelblue': 'rgb(176, 196, 222)',
 213         'lightyellow': 'rgb(255, 255, 224)',
 214         'lime': 'rgb( 0, 255, 0)',
 215         'limegreen': 'rgb( 50, 205, 50)',
 216         'linen': 'rgb(250, 240, 230)',
 217         'magenta': 'rgb(255, 0, 255)',
 218         'maroon': 'rgb(128, 0, 0)',
 219         'mediumaquamarine': 'rgb(102, 205, 170)',
 220         'mediumblue': 'rgb( 0, 0, 205)',
 221         'mediumorchid': 'rgb(186, 85, 211)',
 222         'mediumpurple': 'rgb(147, 112, 219)',
 223         'mediumseagreen': 'rgb( 60, 179, 113)',
 224         'mediumslateblue': 'rgb(123, 104, 238)',
 225         'mediumspringgreen': 'rgb( 0, 250, 154)',
 226         'mediumturquoise': 'rgb( 72, 209, 204)',
 227         'mediumvioletred': 'rgb(199, 21, 133)',
 228         'midnightblue': 'rgb( 25, 25, 112)',
 229         'mintcream': 'rgb(245, 255, 250)',
 230         'mistyrose': 'rgb(255, 228, 225)',
 231         'moccasin': 'rgb(255, 228, 181)',
 232         'navajowhite': 'rgb(255, 222, 173)',
 233         'navy': 'rgb( 0, 0, 128)',
 234         'oldlace': 'rgb(253, 245, 230)',
 235         'olive': 'rgb(128, 128, 0)',
 236         'olivedrab': 'rgb(107, 142, 35)',
 237         'orange': 'rgb(255, 165, 0)',
 238         'orangered': 'rgb(255, 69, 0)',
 239         'orchid': 'rgb(218, 112, 214)',
 240         'palegoldenrod': 'rgb(238, 232, 170)',
 241         'palegreen': 'rgb(152, 251, 152)',
 242         'paleturquoise': 'rgb(175, 238, 238)',
 243         'palevioletred': 'rgb(219, 112, 147)',
 244         'papayawhip': 'rgb(255, 239, 213)',
 245         'peachpuff': 'rgb(255, 218, 185)',
 246         'peru': 'rgb(205, 133, 63)',
 247         'pink': 'rgb(255, 192, 203)',
 248         'plum': 'rgb(221, 160, 221)',
 249         'powderblue': 'rgb(176, 224, 230)',
 250         'purple': 'rgb(128, 0, 128)',
 251         'red': 'rgb(255, 0, 0)',
 252         'rosybrown': 'rgb(188, 143, 143)',
 253         'royalblue': 'rgb( 65, 105, 225)',
 254         'saddlebrown': 'rgb(139, 69, 19)',
 255         'salmon': 'rgb(250, 128, 114)',
 256         'sandybrown': 'rgb(244, 164, 96)',
 257         'seagreen': 'rgb( 46, 139, 87)',
 258         'seashell': 'rgb(255, 245, 238)',
 259         'sienna': 'rgb(160, 82, 45)',
 260         'silver': 'rgb(192, 192, 192)',
 261         'skyblue': 'rgb(135, 206, 235)',
 262         'slateblue': 'rgb(106, 90, 205)',
 263         'slategray': 'rgb(112, 128, 144)',
 264         'slategrey': 'rgb(112, 128, 144)',
 265         'snow': 'rgb(255, 250, 250)',
 266         'springgreen': 'rgb( 0, 255, 127)',
 267         'steelblue': 'rgb( 70, 130, 180)',
 268         'tan': 'rgb(210, 180, 140)',
 269         'teal': 'rgb( 0, 128, 128)',
 270         'thistle': 'rgb(216, 191, 216)',
 271         'tomato': 'rgb(255, 99, 71)',
 272         'turquoise': 'rgb( 64, 224, 208)',
 273         'violet': 'rgb(238, 130, 238)',
 274         'wheat': 'rgb(245, 222, 179)',
 275         'white': 'rgb(255, 255, 255)',
 276         'whitesmoke': 'rgb(245, 245, 245)',
 277         'yellow': 'rgb(255, 255, 0)',
 278         'yellowgreen': 'rgb(154, 205, 50)',
 279         }
 280
 281 def isSameSign(a,b): return (a <= 0 and b <= 0) or (a >= 0 and b >= 0)
 282
 283 coord = re.compile("\\-?\\d+\\.?\\d*")
 284 scinumber = re.compile("[\\-\\+]?(\\d*\\.?)?\\d+[eE][\\-\\+]?\\d+")
 285 number = re.compile("[\\-\\+]?(\\d*\\.?)?\\d+")
 286 sciExponent = re.compile("[eE]([\\-\\+]?\\d+)")
 287 unit = re.compile("(em|ex|px|pt|pc|cm|mm|in|\\%){1,1}$")
 288
 289 class Unit(object):
 290         INVALID = -1
 291         NONE = 0
 292         PCT = 1
 293         PX = 2
 294         PT = 3
 295         PC = 4
 296         EM = 5
 297         EX = 6
 298         CM = 7
 299         MM = 8
 300         IN = 9
 301
 302 #       @staticmethod
 303         def get(str):
 304                 # GZ: shadowing builtins like 'str' is generally bad form
 305                 # GZ: encoding stuff like this in a dict makes for nicer code
 306                 if str == None or str == '': return Unit.NONE
 307                 elif str == '%': return Unit.PCT
 308                 elif str == 'px': return Unit.PX
 309                 elif str == 'pt': return Unit.PT
 310                 elif str == 'pc': return Unit.PC
 311                 elif str == 'em': return Unit.EM
 312                 elif str == 'ex': return Unit.EX
 313                 elif str == 'cm': return Unit.CM
 314                 elif str == 'mm': return Unit.MM
 315                 elif str == 'in': return Unit.IN
 316                 return Unit.INVALID
 317
 318 #       @staticmethod
 319         def str(u):
 320                 if u == Unit.NONE: return ''
 321                 elif u == Unit.PCT: return '%'
 322                 elif u == Unit.PX: return 'px'
 323                 elif u == Unit.PT: return 'pt'
 324                 elif u == Unit.PC: return 'pc'
 325                 elif u == Unit.EM: return 'em'
 326                 elif u == Unit.EX: return 'ex'
 327                 elif u == Unit.CM: return 'cm'
 328                 elif u == Unit.MM: return 'mm'
 329                 elif u == Unit.IN: return 'in'
 330                 return 'INVALID'
 331
 332         get = staticmethod(get)
 333         str = staticmethod(str)
 334
 335 class SVGLength(object):
 336         def __init__(self, str):
 337                 try: # simple unitless and no scientific notation
 338                         self.value = float(str)
 339                         if int(self.value) == self.value:
 340                                 self.value = int(self.value)
 341                         self.units = Unit.NONE
 342                 except ValueError:
 343                         # we know that the length string has an exponent, a unit, both or is invalid
 344
 345                         # parse out number, exponent and unit
 346                         self.value = 0
 347                         unitBegin = 0
 348                         scinum = scinumber.match(str)
 349                         if scinum != None:
 350                                 # this will always match, no need to check it
 351                                 numMatch = number.match(str)
 352                                 expMatch = sciExponent.search(str, numMatch.start(0))
 353                                 self.value = (float(numMatch.group(0)) *
 354                                         10 ** float(expMatch.group(1)))
 355                                 unitBegin = expMatch.end(1)
 356                         else:
 357                                 # unit or invalid
 358                                 numMatch = number.match(str)
 359                                 if numMatch != None:
 360                                         self.value = float(numMatch.group(0))
 361                                         unitBegin = numMatch.end(0)
 362
 363                         if int(self.value) == self.value:
 364                                 self.value = int(self.value)
 365
 366                         if unitBegin != 0 :
 367                                 unitMatch = unit.search(str, unitBegin)
 368                                 if unitMatch != None :
 369                                         self.units = Unit.get(unitMatch.group(0))
 370
 371                         # invalid
 372                         else:
 373                                 # TODO: this needs to set the default for the given attribute (how?)
 374                                 self.value = 0
 375                                 self.units = Unit.INVALID
 376
 377 # returns the length of a property
 378 # TODO: eventually use the above class once it is complete
 379 def getSVGLength(value):
 380         try:
 381                 v = float(value)
 382         except ValueError:
 383                 coordMatch = coord.match(value)
 384                 if coordMatch != None:
 385                         unitMatch = unit.search(value, coordMatch.start(0))
 386                 v = value
 387         return v
 388
 389 def findElementById(node, id):
 390         if node == None or node.nodeType != 1: return None
 391         if node.getAttribute('id') == id: return node
 392         for child in node.childNodes :
 393                 e = findElementById(child,id)
 394                 if e != None: return e
 395         return None
 396
 397 def findElementsWithId(node, elems=None):
 398         """
 399         Returns all elements with id attributes
 400         """
 401         if elems is None:
 402                 elems = {}
 403         id = node.getAttribute('id')
 404         if id != '' :
 405                 elems[id] = node
 406         if node.hasChildNodes() :
 407                 for child in node.childNodes:
 408                         # from http://www.w3.org/TR/DOM-Level-2-Core/idl-definitions.html
 409                         # we are only really interested in nodes of type Element (1)
 410                         if child.nodeType == 1 :
 411                                 findElementsWithId(child, elems)
 412         return elems
 413
 414 def findReferencedElements(node, ids=None):
 415         """
 416         Returns the number of times an ID is referenced as well as all elements
 417         that reference it.
 418
 419         Currently looks at fill, stroke, clip-path, mask, marker, and
 420         xlink:href attributes.
 421         """
 422         if ids is None:
 423                 ids = {}
 424         # TODO: input argument ids is clunky here (see below how it is called)
 425         # GZ: alternative to passing dict, use **kwargs
 426         href = node.getAttributeNS(NS['XLINK'],'href')
 427
 428         # if xlink:href is set, then grab the id
 429         if href != '' and len(href) > 1 and href[0] == '#':
 430                 # we remove the hash mark from the beginning of the id
 431                 id = href[1:]
 432                 if id in ids:
 433                         ids[id][0] += 1
 434                         ids[id][1].append(node)
 435                 else:
 436                         ids[id] = [1,[node]]
 437
 438         # now get all style properties and the fill, stroke, filter attributes
 439         styles = node.getAttribute('style').split(';')
 440         referencingProps = ['fill', 'stroke', 'filter', 'clip-path', 'mask',  'marker-start',
 441                                                 'marker-end', 'marker-mid']
 442         for attr in referencingProps:
 443                 styles.append(':'.join([attr, node.getAttribute(attr)]))
 444
 445         for style in styles:
 446                 propval = style.split(':')
 447                 if len(propval) == 2 :
 448                         prop = propval[0].strip()
 449                         val = propval[1].strip()
 450                         if prop in referencingProps and val != '' :
 451                                 if len(val) >= 7 and val[0:5] == 'url(#' :
 452                                         id = val[5:val.find(')')]
 453                                         if ids.has_key(id) :
 454                                                 ids[id][0] += 1
 455                                                 ids[id][1].append(node)
 456                                         else:
 457                                                 ids[id] = [1,[node]]
 458                                 # if the url has a quote in it, we need to compensate
 459                                 elif len(val) >= 8 :
 460                                         id = None
 461                                         # double-quote
 462                                         if val[0:6] == 'url("#' :
 463                                                 id = val[6:val.find('")')]
 464                                         # single-quote
 465                                         elif val[0:6] == "url('#" :
 466                                                 id = val[6:val.find("')")]
 467                                         if id != None:
 468                                                 if ids.has_key(id) :
 469                                                         ids[id][0] += 1
 470                                                         ids[id][1].append(node)
 471                                                 else:
 472                                                         ids[id] = [1,[node]]
 473
 474         if node.hasChildNodes() :
 475                 for child in node.childNodes:
 476                         if child.nodeType == 1 :
 477                                 findReferencedElements(child, ids)
 478         return ids
 479
 480 numIDsRemoved = 0
 481 numElemsRemoved = 0
 482 numAttrsRemoved = 0
 483 numRastersEmbedded = 0
 484 numPathSegmentsReduced = 0
 485 numCurvesStraightened = 0
 486 numBytesSavedInPathData = 0
 487 numBytesSavedInColors = 0
 488 numPointsRemovedFromPolygon = 0
 489
 490 def removeUnusedDefs(doc, defElem, elemsToRemove=None):
 491         if elemsToRemove is None:
 492                 elemsToRemove = []
 493
 494         identifiedElements = findElementsWithId(doc.documentElement)
 495         referencedIDs = findReferencedElements(doc.documentElement)
 496
 497         keepTags = ['font', 'style', 'metadata', 'script', 'title', 'desc']
 498         for elem in defElem.childNodes:
 499                 if elem.nodeName == 'g' and elem.namespaceURI == NS['SVG']:
 500                         elemsToRemove = removeUnusedDefs(doc, elem, elemsToRemove)
 501                         continue
 502                 if elem.nodeType == 1 and (elem.getAttribute('id') == '' or \
 503                                 (not elem.getAttribute('id') in referencedIDs)) and \
 504                                 not elem.nodeName in keepTags:
 505                         elemsToRemove.append(elem)
 506         return elemsToRemove
 507
 508 def removeUnreferencedElements(doc):
 509         """
 510         Removes all unreferenced elements except for <svg>, <font>, <metadata>, <title>, and <desc>.
 511         Also vacuums the defs of any non-referenced renderable elements.
 512
 513         Returns the number of unreferenced elements removed from the document.
 514         """
 515         global numElemsRemoved
 516         num = 0
 517         removeTags = ['linearGradient', 'radialGradient', 'pattern']
 518
 519         identifiedElements = findElementsWithId(doc.documentElement)
 520         referencedIDs = findReferencedElements(doc.documentElement)
 521
 522         for id in identifiedElements:
 523                 if not id in referencedIDs:
 524                         goner = findElementById(doc.documentElement, id)
 525                         if goner != None and goner.parentNode != None and goner.nodeName in removeTags:
 526                                 goner.parentNode.removeChild(goner)
 527                                 num += 1
 528                                 numElemsRemoved += 1
 529
 530         # TODO: should also go through defs and vacuum it
 531         num = 0
 532         defs = doc.documentElement.getElementsByTagName('defs')
 533         for aDef in defs:
 534                 elemsToRemove = removeUnusedDefs(doc, aDef)
 535                 for elem in elemsToRemove:
 536                         elem.parentNode.removeChild(elem)
 537                         numElemsRemoved += 1
 538                         num += 1
 539         return num
 540
 541 def removeUnreferencedIDs(referencedIDs, identifiedElements):
 542         """
 543         Removes the unreferenced ID attributes.
 544
 545         Returns the number of ID attributes removed
 546         """
 547         global numIDsRemoved
 548         keepTags = ['font']
 549         num = 0;
 550         for id in identifiedElements.keys():
 551                 node = identifiedElements[id]
 552                 if referencedIDs.has_key(id) == False and not node.nodeName in keepTags:
 553                         node.removeAttribute('id')
 554                         numIDsRemoved += 1
 555                         num += 1
 556         return num
 557
 558 def removeNamespacedAttributes(node, namespaces):
 559         global numAttrsRemoved
 560         num = 0
 561         if node.nodeType == 1 :
 562                 # remove all namespace'd attributes from this element
 563                 attrList = node.attributes
 564                 attrsToRemove = []
 565                 for attrNum in range(attrList.length):
 566                         attr = attrList.item(attrNum)
 567                         if attr != None and attr.namespaceURI in namespaces:
 568                                 attrsToRemove.append(attr.nodeName)
 569                 for attrName in attrsToRemove :
 570                         num += 1
 571                         numAttrsRemoved += 1
 572                         node.removeAttribute(attrName)
 573
 574                 # now recurse for children
 575                 for child in node.childNodes:
 576                         num += removeNamespacedAttributes(child, namespaces)
 577         return num
 578
 579 def removeNamespacedElements(node, namespaces):
 580         global numElemsRemoved
 581         num = 0
 582         if node.nodeType == 1 :
 583                 # remove all namespace'd child nodes from this element
 584                 childList = node.childNodes
 585                 childrenToRemove = []
 586                 for child in childList:
 587                         if child != None and child.namespaceURI in namespaces:
 588                                 childrenToRemove.append(child)
 589                 for child in childrenToRemove :
 590                         num += 1
 591                         numElemsRemoved += 1
 592                         node.removeChild(child)
 593
 594                 # now recurse for children
 595                 for child in node.childNodes:
 596                         num += removeNamespacedElements(child, namespaces)
 597         return num
 598
 599 def removeNestedGroups(node):
 600         """
 601         This walks further and further down the tree, removing groups
 602         which do not have any attributes or a title/desc child and
 603         promoting their children up one level
 604         """
 605         global numElemsRemoved
 606         num = 0
 607
 608         groupsToRemove = []
 609         for child in node.childNodes:
 610                 if child.nodeName == 'g' and child.namespaceURI == NS['SVG'] and len(child.attributes) == 0:
 611                         # only collapse group if it does not have a title or desc as a direct descendant
 612                         for grandchild in child.childNodes:
 613                                 if grandchild.nodeType == 1 and grandchild.namespaceURI == NS['SVG'] and \
 614                                                 grandchild.nodeName in ['title','desc']:
 615                                         break
 616                         else:
 617                                 groupsToRemove.append(child)
 618
 619         for g in groupsToRemove:
 620                 while g.childNodes.length > 0:
 621                         g.parentNode.insertBefore(g.firstChild, g)
 622                 g.parentNode.removeChild(g)
 623                 numElemsRemoved += 1
 624                 num += 1
 625
 626         # now recurse for children
 627         for child in node.childNodes:
 628                 if child.nodeType == 1:
 629                         num += removeNestedGroups(child)
 630         return num
 631
 632 def moveCommonAttributesToParentGroup(elem):
 633         """
 634         This recursively calls this function on all children of the passed in element
 635         and then iterates over all child elements and removes common inheritable attributes
 636         from the children and places them in the parent group.
 637         """
 638         num = 0
 639
 640         childElements = []
 641         # recurse first into the children (depth-first)
 642         for child in elem.childNodes:
 643                 if child.nodeType == 1:
 644                         childElements.append(child)
 645                         num += moveCommonAttributesToParentGroup(child)
 646
 647         # only process the children if there are more than one element
 648         if len(childElements) <= 1: return num
 649
 650         commonAttrs = {}
 651         # add all inheritable properties of the first child element
 652         # FIXME: Note there is a chance that the first child is a set/animate in which case
 653         # its fill attribute is not what we want to look at, we should look for the first
 654         # non-animate/set element
 655         attrList = childElements[0].attributes
 656         for num in range(attrList.length):
 657                 attr = attrList.item(num)
 658                 # this is most of the inheritable properties from http://www.w3.org/TR/SVG11/propidx.html
 659                 # and http://www.w3.org/TR/SVGTiny12/attributeTable.html
 660                 if attr.nodeName in ['clip-rule',
 661                                         'display-align',
 662                                         'fill', 'fill-opacity', 'fill-rule',
 663                                         'font', 'font-family', 'font-size', 'font-size-adjust', 'font-stretch',
 664                                         'font-style', 'font-variant', 'font-weight',
 665                                         'letter-spacing',
 666                                         'pointer-events', 'shape-rendering',
 667                                         'stroke', 'stroke-dasharray', 'stroke-dashoffset', 'stroke-linecap', 'stroke-linejoin',
 668                                         'stroke-miterlimit', 'stroke-opacity', 'stroke-width',
 669                                         'text-anchor', 'text-decoration', 'text-rendering', 'visibility',
 670                                         'word-spacing', 'writing-mode']:
 671                         # we just add all the attributes from the first child
 672                         commonAttrs[attr.nodeName] = attr.nodeValue
 673
 674         # for each subsequent child element
 675         for childNum in range(len(childElements)):
 676                 # skip first child
 677                 if childNum == 0:
 678                         continue
 679
 680                 child = childElements[childNum]
 681                 # if we are on an animateXXX/set element, ignore it (due to the 'fill' attribute)
 682                 if child.localName in ['set', 'animate', 'animateColor', 'animateTransform', 'animateMotion']:
 683                         continue
 684
 685                 distinctAttrs = []
 686                 # loop through all current 'common' attributes
 687                 for name in commonAttrs.keys():
 688                         # if this child doesn't match that attribute, schedule it for removal
 689                         if child.getAttribute(name) != commonAttrs[name]:
 690                                 distinctAttrs.append(name)
 691                 # remove those attributes which are not common
 692                 for name in distinctAttrs:
 693                         del commonAttrs[name]
 694
 695         # commonAttrs now has all the inheritable attributes which are common among all child elements
 696         for name in commonAttrs.keys():
 697                 for child in childElements:
 698                         child.removeAttribute(name)
 699                 elem.setAttribute(name, commonAttrs[name])
 700
 701         # update our statistic (we remove N*M attributes and add back in M attributes)
 702         num += (len(childElements)-1) * len(commonAttrs)
 703         return num
 704
 705 def removeDuplicateGradientStops(doc):
 706         global numElemsRemoved
 707         num = 0
 708
 709         for gradType in ['linearGradient', 'radialGradient']:
 710                 for grad in doc.getElementsByTagName(gradType):
 711                         stops = {}
 712                         stopsToRemove = []
 713                         for stop in grad.getElementsByTagName('stop'):
 714                                 # convert percentages into a floating point number
 715                                 offsetU = SVGLength(stop.getAttribute('offset'))
 716                                 if offsetU.units == Unit.PCT:
 717                                         offset = offsetU.value / 100.0
 718                                 elif offsetU.units == Unit.NONE:
 719                                         offset = offsetU.value
 720                                 else:
 721                                         offset = 0
 722                                 # set the stop offset value to the integer or floating point equivalent
 723                                 if int(offset) == offset: stop.setAttribute('offset', str(int(offset)))
 724                                 else: stop.setAttribute('offset', str(offset))
 725
 726                                 color = stop.getAttribute('stop-color')
 727                                 opacity = stop.getAttribute('stop-opacity')
 728                                 if stops.has_key(offset) :
 729                                         oldStop = stops[offset]
 730                                         if oldStop[0] == color and oldStop[1] == opacity:
 731                                                 stopsToRemove.append(stop)
 732                                 stops[offset] = [color, opacity]
 733
 734                         for stop in stopsToRemove:
 735                                 stop.parentNode.removeChild(stop)
 736                                 num += 1
 737                                 numElemsRemoved += 1
 738
 739         # linear gradients
 740         return num
 741
 742 def collapseSinglyReferencedGradients(doc):
 743         global numElemsRemoved
 744         num = 0
 745
 746         # make sure to reset the ref'ed ids for when we are running this in testscour
 747         for rid,nodeCount in findReferencedElements(doc.documentElement).iteritems():
 748                 count = nodeCount[0]
 749                 nodes = nodeCount[1]
 750                 if count == 1:
 751                         elem = findElementById(doc.documentElement,rid)
 752                         if elem != None and elem.nodeType == 1 and elem.nodeName in ['linearGradient', 'radialGradient'] \
 753                                         and elem.namespaceURI == NS['SVG']:
 754                                 # found a gradient that is referenced by only 1 other element
 755                                 refElem = nodes[0]
 756                                 if refElem.nodeType == 1 and refElem.nodeName in ['linearGradient', 'radialGradient'] \
 757                                                 and refElem.namespaceURI == NS['SVG']:
 758                                         # elem is a gradient referenced by only one other gradient (refElem)
 759
 760                                         # add the stops to the referencing gradient (this removes them from elem)
 761                                         if len(refElem.getElementsByTagName('stop')) == 0:
 762                                                 stopsToAdd = elem.getElementsByTagName('stop')
 763                                                 for stop in stopsToAdd:
 764                                                         refElem.appendChild(stop)
 765
 766                                         # adopt the gradientUnits, spreadMethod,  gradientTransform attributes if
 767                                         # they are unspecified on refElem
 768                                         for attr in ['gradientUnits','spreadMethod','gradientTransform']:
 769                                                 if refElem.getAttribute(attr) == '' and not elem.getAttribute(attr) == '':
 770                                                         refElem.setAttributeNS(None, attr, elem.getAttribute(attr))
 771
 772                                         # if both are radialGradients, adopt elem's fx,fy,cx,cy,r attributes if
 773                                         # they are unspecified on refElem
 774                                         if elem.nodeName == 'radialGradient' and refElem.nodeName == 'radialGradient':
 775                                                 for attr in ['fx','fy','cx','cy','r']:
 776                                                         if refElem.getAttribute(attr) == '' and not elem.getAttribute(attr) == '':
 777                                                                 refElem.setAttributeNS(None, attr, elem.getAttribute(attr))
 778
 779                                         # if both are linearGradients, adopt elem's x1,y1,x2,y2 attributes if
 780                                         # they are unspecified on refElem
 781                                         if elem.nodeName == 'linearGradient' and refElem.nodeName == 'linearGradient':
 782                                                 for attr in ['x1','y1','x2','y2']:
 783                                                         if refElem.getAttribute(attr) == '' and not elem.getAttribute(attr) == '':
 784                                                                 refElem.setAttributeNS(None, attr, elem.getAttribute(attr))
 785
 786                                         # now remove the xlink:href from refElem
 787                                         refElem.removeAttributeNS(NS['XLINK'], 'href')
 788
 789                                         # now delete elem
 790                                         elem.parentNode.removeChild(elem)
 791                                         numElemsRemoved += 1
 792                                         num += 1
 793         return num
 794
 795 def removeDuplicateGradients(doc):
 796         global numElemsRemoved
 797         num = 0
 798
 799         gradientsToRemove = {}
 800         duplicateToMaster = {}
 801
 802         for gradType in ['linearGradient', 'radialGradient']:
 803                 grads = doc.getElementsByTagName(gradType)
 804                 for grad in grads:
 805                         # TODO: should slice grads from 'grad' here to optimize
 806                         for ograd in grads:
 807                                 # do not compare gradient to itself
 808                                 if grad == ograd: continue
 809
 810                                 # compare grad to ograd (all properties, then all stops)
 811                                 # if attributes do not match, go to next gradient
 812                                 someGradAttrsDoNotMatch = False
 813                                 for attr in ['gradientUnits','spreadMethod','gradientTransform','x1','y1','x2','y2','cx','cy','fx','fy','r']:
 814                                         if grad.getAttribute(attr) != ograd.getAttribute(attr):
 815                                                 someGradAttrsDoNotMatch = True
 816                                                 break;
 817
 818                                 if someGradAttrsDoNotMatch: continue
 819
 820                                 # compare xlink:href values too
 821                                 if grad.getAttributeNS(NS['XLINK'], 'href') != ograd.getAttributeNS(NS['XLINK'], 'href'):
 822                                         continue
 823
 824                                 # all gradient properties match, now time to compare stops
 825                                 stops = grad.getElementsByTagName('stop')
 826                                 ostops = ograd.getElementsByTagName('stop')
 827
 828                                 if stops.length != ostops.length: continue
 829
 830                                 # now compare stops
 831                                 stopsNotEqual = False
 832                                 for i in range(stops.length):
 833                                         if stopsNotEqual: break
 834                                         stop = stops.item(i)
 835                                         ostop = ostops.item(i)
 836                                         for attr in ['offset', 'stop-color', 'stop-opacity']:
 837                                                 if stop.getAttribute(attr) != ostop.getAttribute(attr):
 838                                                         stopsNotEqual = True
 839                                                         break
 840                                 if stopsNotEqual: continue
 841
 842                                 # ograd is a duplicate of grad, we schedule it to be removed UNLESS
 843                                 # ograd is ALREADY considered a 'master' element
 844                                 if not gradientsToRemove.has_key(ograd):
 845                                         if not duplicateToMaster.has_key(ograd):
 846                                                 if not gradientsToRemove.has_key(grad):
 847                                                         gradientsToRemove[grad] = []
 848                                                 gradientsToRemove[grad].append( ograd )
 849                                                 duplicateToMaster[ograd] = grad
 850
 851         # get a collection of all elements that are referenced and their referencing elements
 852         referencedIDs = findReferencedElements(doc.documentElement)
 853         for masterGrad in gradientsToRemove.keys():
 854                 master_id = masterGrad.getAttribute('id')
 855                 for dupGrad in gradientsToRemove[masterGrad]:
 856                         # if the duplicate gradient no longer has a parent that means it was
 857                         # already re-mapped to another master gradient
 858                         if not dupGrad.parentNode: continue
 859                         dup_id = dupGrad.getAttribute('id')
 860                         # for each element that referenced the gradient we are going to remove
 861                         for elem in referencedIDs[dup_id][1]:
 862                                 # find out which attribute referenced the duplicate gradient
 863                                 for attr in ['fill', 'stroke']:
 864                                         v = elem.getAttribute(attr)
 865                                         if v == 'url(#'+dup_id+')' or v == 'url("#'+dup_id+'")' or v == "url('#"+dup_id+"')":
 866                                                 elem.setAttribute(attr, 'url(#'+master_id+')')
 867                                 if elem.getAttributeNS(NS['XLINK'], 'href') == '#'+dup_id:
 868                                         elem.setAttributeNS(NS['XLINK'], 'href', '#'+master_id)
 869
 870                         # now that all referencing elements have been re-mapped to the master
 871                         # it is safe to remove this gradient from the document
 872                         dupGrad.parentNode.removeChild(dupGrad)
 873                         numElemsRemoved += 1
 874                         num += 1
 875         return num
 876
 877 def repairStyle(node, options):
 878         num = 0
 879         if node.nodeType == 1 and len(node.getAttribute('style')) > 0 :
 880                 # get all style properties and stuff them into a dictionary
 881                 styleMap = { }
 882                 rawStyles = node.getAttribute('style').split(';')
 883                 for style in rawStyles:
 884                         propval = style.split(':')
 885                         if len(propval) == 2 :
 886                                 styleMap[propval[0].strip()] = propval[1].strip()
 887
 888                 # I've seen this enough to know that I need to correct it:
 889                 # fill: url(#linearGradient4918) rgb(0, 0, 0);
 890                 for prop in ['fill', 'stroke'] :
 891                         if styleMap.has_key(prop) :
 892                                 chunk = styleMap[prop].split(') ')
 893                                 if len(chunk) == 2 and (chunk[0][:5] == 'url(#' or chunk[0][:6] == 'url("#' or chunk[0][:6] == "url('#") and chunk[1] == 'rgb(0, 0, 0)' :
 894                                         styleMap[prop] = chunk[0] + ')'
 895                                         num += 1
 896
 897                 # Here is where we can weed out unnecessary styles like:
 898                 #  opacity:1
 899                 if styleMap.has_key('opacity') :
 900                         opacity = float(styleMap['opacity'])
 901                         # opacity='1.0' is useless, remove it
 902                         if opacity == 1.0 :
 903                                 del styleMap['opacity']
 904                                 num += 1
 905
 906                         # if opacity='0' then all fill and stroke properties are useless, remove them
 907                         elif opacity == 0.0 :
 908                                 for uselessStyle in ['fill', 'fill-opacity', 'fill-rule', 'stroke', 'stroke-linejoin',
 909                                         'stroke-opacity', 'stroke-miterlimit', 'stroke-linecap', 'stroke-dasharray',
 910                                         'stroke-dashoffset', 'stroke-opacity'] :
 911                                         if styleMap.has_key(uselessStyle):
 912                                                 del styleMap[uselessStyle]
 913                                                 num += 1
 914
 915                 #  if stroke:none, then remove all stroke-related properties (stroke-width, etc)
 916                 #  TODO: should also detect if the computed value of this element is stroke="none"
 917                 if styleMap.has_key('stroke') and styleMap['stroke'] == 'none' :
 918                         for strokestyle in [ 'stroke-width', 'stroke-linejoin', 'stroke-miterlimit',
 919                                         'stroke-linecap', 'stroke-dasharray', 'stroke-dashoffset', 'stroke-opacity'] :
 920                                 if styleMap.has_key(strokestyle) :
 921                                         del styleMap[strokestyle]
 922                                         num += 1
 923                         # TODO: This is actually a problem if a parent element has a specified stroke
 924                         # we need to properly calculate computed values
 925                         del styleMap['stroke']
 926
 927                 #  if fill:none, then remove all fill-related properties (fill-rule, etc)
 928                 if styleMap.has_key('fill') and styleMap['fill'] == 'none' :
 929                         for fillstyle in [ 'fill-rule', 'fill-opacity' ] :
 930                                 if styleMap.has_key(fillstyle) :
 931                                         del styleMap[fillstyle]
 932                                         num += 1
 933
 934                 #  stop-opacity: 1
 935                 if styleMap.has_key('stop-opacity') :
 936                         if float(styleMap['stop-opacity']) == 1.0 :
 937                                 del styleMap['stop-opacity']
 938                                 num += 1
 939
 940                 #  fill-opacity: 1 or 0
 941                 if styleMap.has_key('fill-opacity') :
 942                         fillOpacity = float(styleMap['fill-opacity'])
 943                         #  TODO: This is actually a problem if the parent element does not have fill-opacity=1
 944                         if fillOpacity == 1.0 :
 945                                 del styleMap['fill-opacity']
 946                                 num += 1
 947                         elif fillOpacity == 0.0 :
 948                                 for uselessFillStyle in [ 'fill', 'fill-rule' ] :
 949                                         if styleMap.has_key(uselessFillStyle):
 950                                                 del styleMap[uselessFillStyle]
 951                                                 num += 1
 952
 953                 #  stroke-opacity: 1 or 0
 954                 if styleMap.has_key('stroke-opacity') :
 955                         strokeOpacity = float(styleMap['stroke-opacity'])
 956                         #  TODO: This is actually a problem if the parent element does not have stroke-opacity=1
 957                         if strokeOpacity == 1.0 :
 958                                 del styleMap['stroke-opacity']
 959                                 num += 1
 960                         elif strokeOpacity == 0.0 :
 961                                 for uselessStrokeStyle in [ 'stroke', 'stroke-width', 'stroke-linejoin', 'stroke-linecap',
 962                                                         'stroke-dasharray', 'stroke-dashoffset' ] :
 963                                         if styleMap.has_key(uselessStrokeStyle):
 964                                                 del styleMap[uselessStrokeStyle]
 965                                                 num += 1
 966
 967                 # stroke-width: 0
 968                 if styleMap.has_key('stroke-width') :
 969                         strokeWidth = getSVGLength(styleMap['stroke-width'])
 970                         if strokeWidth == 0.0 :
 971                                 for uselessStrokeStyle in [ 'stroke', 'stroke-linejoin', 'stroke-linecap',
 972                                                         'stroke-dasharray', 'stroke-dashoffset', 'stroke-opacity' ] :
 973                                         if styleMap.has_key(uselessStrokeStyle):
 974                                                 del styleMap[uselessStrokeStyle]
 975                                                 num += 1
 976
 977                 # remove font properties for non-text elements
 978                 # I've actually observed this in real SVG content
 979                 if node.nodeName in ['rect', 'circle', 'ellipse', 'line', 'polyline', 'polygon', 'path']:
 980                         for fontstyle in [ 'font-family', 'font-size', 'font-stretch', 'font-size-adjust',
 981                                                                 'font-style', 'font-variant', 'font-weight',
 982                                                                 'letter-spacing', 'line-height', 'kerning',
 983                                                                 'text-anchor', 'text-decoration', 'text-rendering',
 984                                                                 'unicode-bidi', 'word-spacing', 'writing-mode'] :
 985                                 if styleMap.has_key(fontstyle) :
 986                                         del styleMap[fontstyle]
 987                                         num += 1
 988
 989                 # remove inkscape-specific styles
 990                 # TODO: need to get a full list of these
 991                 for inkscapeStyle in ['-inkscape-font-specification']:
 992                         if styleMap.has_key(inkscapeStyle):
 993                                 del styleMap[inkscapeStyle]
 994                                 num += 1
 995
 996                 # visibility: visible
 997                 if styleMap.has_key('visibility') :
 998                         if styleMap['visibility'] == 'visible':
 999                                 del styleMap['visibility']
1000                                 num += 1
1001
1002                 # display: inline
1003                 if styleMap.has_key('display') :
1004                         if styleMap['display'] == 'inline':
1005                                 del styleMap['display']
1006                                 num += 1
1007
1008                 # overflow: visible or overflow specified on element other than svg, marker, pattern
1009                 if styleMap.has_key('overflow') :
1010                         if styleMap['overflow'] == 'visible' or node.nodeName in ['svg','marker','pattern']:
1011                                 del styleMap['overflow']
1012                                 num += 1
1013
1014                 # marker: none
1015                 if styleMap.has_key('marker') :
1016                         if styleMap['marker'] == 'none':
1017                                 del styleMap['marker']
1018                                 num += 1
1019
1020                 # now if any of the properties match known SVG attributes we prefer attributes
1021                 # over style so emit them and remove them from the style map
1022                 if options.style_to_xml:
1023                         for propName in styleMap.keys() :
1024                                 if propName in svgAttributes :
1025                                         node.setAttribute(propName, styleMap[propName])
1026                                         del styleMap[propName]
1027
1028                 # sew our remaining style properties back together into a style attribute
1029                 fixedStyle = ''
1030                 for prop in styleMap.keys() :
1031                         fixedStyle += prop + ':' + styleMap[prop] + ';'
1032
1033                 if fixedStyle != '' :
1034                         node.setAttribute('style', fixedStyle)
1035                 else:
1036                         node.removeAttribute('style')
1037
1038         # recurse for our child elements
1039         for child in node.childNodes :
1040                 num += repairStyle(child,options)
1041
1042         return num
1043
1044 def removeDefaultAttributeValues(node, options):
1045         num = 0
1046         if node.nodeType != 1: return 0
1047
1048         # gradientUnits: objectBoundingBox
1049         if node.getAttribute('gradientUnits') == 'objectBoundingBox':
1050                 node.removeAttribute('gradientUnits')
1051                 num += 1
1052
1053         # spreadMethod: pad
1054         if node.getAttribute('spreadMethod') == 'pad':
1055                 node.removeAttribute('spreadMethod')
1056                 num += 1
1057
1058         # x1: 0%
1059         if node.getAttribute('x1') != '':
1060                 x1 = SVGLength(node.getAttribute('x1'))
1061                 if x1.value == 0:
1062                         node.removeAttribute('x1')
1063                         num += 1
1064
1065         # y1: 0%
1066         if node.getAttribute('y1') != '':
1067                 y1 = SVGLength(node.getAttribute('y1'))
1068                 if y1.value == 0:
1069                         node.removeAttribute('y1')
1070                         num += 1
1071
1072         # x2: 100%
1073         if node.getAttribute('x2') != '':
1074                 x2 = SVGLength(node.getAttribute('x2'))
1075                 if (x2.value == 100 and x2.units == Unit.PCT) or (x2.value == 1 and x2.units == Unit.NONE):
1076                         node.removeAttribute('x2')
1077                         num += 1
1078
1079         # y2: 0%
1080         if node.getAttribute('y2') != '':
1081                 y2 = SVGLength(node.getAttribute('y2'))
1082                 if y2.value == 0:
1083                         node.removeAttribute('y2')
1084                         num += 1
1085
1086         # fx: equal to rx
1087         if node.getAttribute('fx') != '':
1088                 if node.getAttribute('fx') == node.getAttribute('cx'):
1089                         node.removeAttribute('fx')
1090                         num += 1
1091
1092         # fy: equal to ry
1093         if node.getAttribute('fy') != '':
1094                 if node.getAttribute('fy') == node.getAttribute('cy'):
1095                         node.removeAttribute('fy')
1096                         num += 1
1097
1098         # cx: 50%
1099         if node.getAttribute('cx') != '':
1100                 cx = SVGLength(node.getAttribute('cx'))
1101                 if (cx.value == 50 and cx.units == Unit.PCT) or (cx.value == 0.5 and cx.units == Unit.NONE):
1102                         node.removeAttribute('cx')
1103                         num += 1
1104
1105         # cy: 50%
1106         if node.getAttribute('cy') != '':
1107                 cy = SVGLength(node.getAttribute('cy'))
1108                 if (cy.value == 50 and cy.units == Unit.PCT) or (cy.value == 0.5 and cy.units == Unit.NONE):
1109                         node.removeAttribute('cy')
1110                         num += 1
1111
1112         # r: 50%
1113         if node.getAttribute('r') != '':
1114                 r = SVGLength(node.getAttribute('r'))
1115                 if (r.value == 50 and r.units == Unit.PCT) or (r.value == 0.5 and r.units == Unit.NONE):
1116                         node.removeAttribute('r')
1117                         num += 1
1118
1119         # recurse for our child elements
1120         for child in node.childNodes :
1121                 num += removeDefaultAttributeValues(child,options)
1122
1123         return num
1124
1125 rgb = re.compile("\\s*rgb\\(\\s*(\\d+)\\s*\\,\\s*(\\d+)\\s*\\,\\s*(\\d+)\\s*\\)\\s*")
1126 rgbp = re.compile("\\s*rgb\\(\\s*(\\d*\\.?\\d+)\\%\\s*\\,\\s*(\\d*\\.?\\d+)\\%\\s*\\,\\s*(\\d*\\.?\\d+)\\%\\s*\\)\\s*")
1127 def convertColor(value):
1128         """
1129                 Converts the input color string and returns a #RRGGBB (or #RGB if possible) string
1130         """
1131         s = value
1132
1133         if s in colors.keys():
1134                 s = colors[s]
1135
1136         rgbpMatch = rgbp.match(s)
1137         if rgbpMatch != None :
1138                 r = int(float(rgbpMatch.group(1)) * 255.0 / 100.0)
1139                 g = int(float(rgbpMatch.group(2)) * 255.0 / 100.0)
1140                 b = int(float(rgbpMatch.group(3)) * 255.0 / 100.0)
1141                 s  = 'rgb(%d,%d,%d)' % (r,g,b)
1142
1143         rgbMatch = rgb.match(s)
1144         if rgbMatch != None :
1145                 r = hex( int( rgbMatch.group(1) ) )[2:].upper()
1146                 g = hex( int( rgbMatch.group(2) ) )[2:].upper()
1147                 b = hex( int( rgbMatch.group(3) ) )[2:].upper()
1148                 if len(r) == 1: r='0'+r
1149                 if len(g) == 1: g='0'+g
1150                 if len(b) == 1: b='0'+b
1151                 s = '#'+r+g+b
1152
1153         if s[0] == '#' and len(s)==7 and s[1]==s[2] and s[3]==s[4] and s[5]==s[6]:
1154                 s = s.upper()
1155                 s = '#'+s[1]+s[3]+s[5]
1156
1157         return s
1158
1159 def convertColors(element) :
1160         """
1161                 Recursively converts all color properties into #RRGGBB format if shorter
1162         """
1163         numBytes = 0
1164
1165         if element.nodeType != 1: return 0
1166
1167         # set up list of color attributes for each element type
1168         attrsToConvert = []
1169         if element.nodeName in ['rect', 'circle', 'ellipse', 'polygon', \
1170                                                         'line', 'polyline', 'path', 'g', 'a']:
1171                 attrsToConvert = ['fill', 'stroke']
1172         elif element.nodeName in ['stop']:
1173                 attrsToConvert = ['stop-color']
1174         elif element.nodeName in ['solidColor']:
1175                 attrsToConvert = ['solid-color']
1176
1177         # now convert all the color formats
1178         for attr in attrsToConvert:
1179                 oldColorValue = element.getAttribute(attr)
1180                 if oldColorValue != '':
1181                         newColorValue = convertColor(oldColorValue)
1182                         oldBytes = len(oldColorValue)
1183                         newBytes = len(newColorValue)
1184                         if oldBytes > newBytes:
1185                                 element.setAttribute(attr, newColorValue)
1186                                 numBytes += (oldBytes - len(element.getAttribute(attr)))
1187
1188         # now recurse for our child elements
1189         for child in element.childNodes :
1190                 numBytes += convertColors(child)
1191
1192         return numBytes
1193
1194 # TODO: go over what this method does and see if there is a way to optimize it
1195 # TODO: go over the performance of this method and see if I can save memory/speed by
1196 #       reusing data structures, etc
1197 def cleanPath(element) :
1198         """
1199                 Cleans the path string (d attribute) of the element
1200         """
1201         global numBytesSavedInPathData
1202         global numPathSegmentsReduced
1203         global numCurvesStraightened
1204
1205         # this gets the parser object from svg_regex.py
1206         oldPathStr = element.getAttribute('d')
1207         pathObj = svg_parser.parse(oldPathStr)
1208
1209         # however, this parser object has some ugliness in it (lists of tuples of tuples of
1210         # numbers and booleans).  we just need a list of (cmd,[numbers]):
1211         path = []
1212         for (cmd,dataset) in pathObj:
1213                 if cmd in ['M','m','L','l','T','t']:
1214                         # one or more tuples, each containing two numbers
1215                         nums = []
1216                         for t in dataset:
1217                                 # convert to a Decimal
1218                                 nums.append(Decimal(str(t[0])) * Decimal(1))
1219                                 nums.append(Decimal(str(t[1])) * Decimal(1))
1220
1221                         # only create this segment if it is not empty
1222                         if nums:
1223                                 path.append( (cmd, nums) )
1224
1225                 elif cmd in ['V','v','H','h']:
1226                         # one or more numbers
1227                         nums = []
1228                         for n in dataset:
1229                                 nums.append(Decimal(str(n)))
1230                         if nums:
1231                                 path.append( (cmd, nums) )
1232
1233                 elif cmd in ['C','c']:
1234                         # one or more tuples, each containing three tuples of two numbers each
1235                         nums = []
1236                         for t in dataset:
1237                                 for pair in t:
1238                                         nums.append(Decimal(str(pair[0])) * Decimal(1))
1239                                         nums.append(Decimal(str(pair[1])) * Decimal(1))
1240                         path.append( (cmd, nums) )
1241
1242                 elif cmd in ['S','s','Q','q']:
1243                         # one or more tuples, each containing two tuples of two numbers each
1244                         nums = []
1245                         for t in dataset:
1246                                 for pair in t:
1247                                         nums.append(Decimal(str(pair[0])) * Decimal(1))
1248                                         nums.append(Decimal(str(pair[1])) * Decimal(1))
1249                         path.append( (cmd, nums) )
1250
1251                 elif cmd in ['A','a']:
1252                         # one or more tuples, each containing a tuple of two numbers, a number, a boolean,
1253                         # another boolean, and a tuple of two numbers
1254                         nums = []
1255                         for t in dataset:
1256                                 nums.append( Decimal(str(t[0][0])) * Decimal(1) )
1257                                 nums.append( Decimal(str(t[0][1])) * Decimal(1) )
1258                                 nums.append( Decimal(str(t[1])) * Decimal(1))
1259
1260                                 if t[2]: nums.append( Decimal(1) )
1261                                 else: nums.append( Decimal(0) )
1262
1263                                 if t[3]: nums.append( Decimal(1) )
1264                                 else: nums.append( Decimal(0) )
1265
1266                                 nums.append( Decimal(str(t[4][0])) * Decimal(1) )
1267                                 nums.append( Decimal(str(t[4][1])) * Decimal(1) )
1268                         path.append( (cmd, nums) )
1269
1270                 elif cmd in ['Z','z']:
1271                         path.append( (cmd, []) )
1272
1273         # calculate the starting x,y coord for the second path command
1274         if len(path[0][1]) == 2:
1275                 (x,y) = path[0][1]
1276         else:
1277                 # we have a move and then 1 or more coords for lines
1278                 N = len(path[0][1])
1279                 if path[0] == 'M':
1280                         # take the last pair of coordinates for the starting point
1281                         x = path[0][1][N-2]
1282                         y = path[0][1][N-1]
1283                 else: # relative move, accumulate coordinates for the starting point
1284                         (x,y) = path[0][1][0],path[0][1][1]
1285                         n = 2
1286                         while n < N:
1287                                 x += path[0][1][n]
1288                                 y += path[0][1][n+1]
1289                                 n += 2
1290
1291         # now we have the starting point at x,y so let's save it
1292         (startx,starty) = (x,y)
1293
1294         # convert absolute coordinates into relative ones (start with the second subcommand
1295         # and leave the first M as absolute)
1296         newPath = [path[0]]
1297         for (cmd,data) in path[1:]:
1298                 i = 0
1299                 newCmd = cmd
1300                 newData = data
1301                 # adjust abs to rel
1302                 # only the A command has some values that we don't want to adjust (radii, rotation, flags)
1303                 if cmd == 'A':
1304                         newCmd = 'a'
1305                         newData = []
1306                         while i < len(data):
1307                                 newData.append(data[i])
1308                                 newData.append(data[i+1])
1309                                 newData.append(data[i+2])
1310                                 newData.append(data[i+3])
1311                                 newData.append(data[i+4])
1312                                 newData.append(data[i+5]-x)
1313                                 newData.append(data[i+6]-y)
1314                                 x = data[i+5]
1315                                 y = data[i+6]
1316                                 i += 7
1317                 elif cmd == 'a':
1318                         while i < len(data):
1319                                 x += data[i+5]
1320                                 y += data[i+6]
1321                                 i += 7
1322                 elif cmd == 'H':
1323                         newCmd = 'h'
1324                         newData = []
1325                         while i < len(data):
1326                                 newData.append(data[i]-x)
1327                                 x = data[i]
1328                                 i += 1
1329                 elif cmd == 'h':
1330                         while i < len(data):
1331                                 x += data[i]
1332                                 i += 1
1333                 elif cmd == 'V':
1334                         newCmd = 'v'
1335                         newData = []
1336                         while i < len(data):
1337                                 newData.append(data[i] - y)
1338                                 y = data[i]
1339                                 i += 1
1340                 elif cmd == 'v':
1341                         while i < len(data):
1342                                 y += data[i]
1343                                 i += 1
1344                 elif cmd in ['M']:
1345                         newCmd = cmd.lower()
1346                         newData = []
1347                         startx = data[0]
1348                         starty = data[1]
1349                         while i < len(data):
1350                                 newData.append( data[i] - x )
1351                                 newData.append( data[i+1] - y )
1352                                 x = data[i]
1353                                 y = data[i+1]
1354                                 i += 2
1355                 elif cmd in ['L','T']:
1356                         newCmd = cmd.lower()
1357                         newData = []
1358                         while i < len(data):
1359                                 newData.append( data[i] - x )
1360                                 newData.append( data[i+1] - y )
1361                                 x = data[i]
1362                                 y = data[i+1]
1363                                 i += 2
1364                 elif cmd in ['m']:
1365                         startx += data[0]
1366                         starty += data[1]
1367                         while i < len(data):
1368                                 x += data[i]
1369                                 y += data[i+1]
1370                                 i += 2
1371                 elif cmd in ['l','t']:
1372                         while i < len(data):
1373                                 x += data[i]
1374                                 y += data[i+1]
1375                                 i += 2
1376                 elif cmd in ['S','Q']:
1377                         newCmd = cmd.lower()
1378                         newData = []
1379                         while i < len(data):
1380                                 newData.append( data[i] - x )
1381                                 newData.append( data[i+1] - y )
1382                                 newData.append( data[i+2] - x )
1383                                 newData.append( data[i+3] - y )
1384                                 x = data[i+2]
1385                                 y = data[i+3]
1386                                 i += 4
1387                 elif cmd in ['s','q']:
1388                         while i < len(data):
1389                                 x += data[i+2]
1390                                 y += data[i+3]
1391                                 i += 4
1392                 elif cmd == 'C':
1393                         newCmd = 'c'
1394                         newData = []
1395                         while i < len(data):
1396                                 newData.append( data[i] - x )
1397                                 newData.append( data[i+1] - y )
1398                                 newData.append( data[i+2] - x )
1399                                 newData.append( data[i+3] - y )
1400                                 newData.append( data[i+4] - x )
1401                                 newData.append( data[i+5] - y )
1402                                 x = data[i+4]
1403                                 y = data[i+5]
1404                                 i += 6
1405                 elif cmd == 'c':
1406                         while i < len(data):
1407                                 x += data[i+4]
1408                                 y += data[i+5]
1409                                 i += 6
1410                 elif cmd in ['z','Z']:
1411                         x = startx
1412                         y = starty
1413                         newCmd = 'z'
1414                 newPath.append( (newCmd, newData) )
1415         path = newPath
1416
1417         # remove empty segments
1418         newPath = [path[0]]
1419         for (cmd,data) in path[1:]:
1420                 if cmd in ['m','l','t']:
1421                         newData = []
1422                         i = 0
1423                         while i < len(data):
1424                                 if data[i] != 0 or data[i+1] != 0:
1425                                         newData.append(data[i])
1426                                         newData.append(data[i+1])
1427                                 else:
1428                                         numPathSegmentsReduced += 1
1429                                 i += 2
1430                         if newData:
1431                                 newPath.append( (cmd,newData) )
1432                 elif cmd == 'c':
1433                         newData = []
1434                         i = 0
1435                         while i < len(data):
1436                                 if data[i+4] != 0 or data[i+5] != 0:
1437                                         newData.append(data[i])
1438                                         newData.append(data[i+1])
1439                                         newData.append(data[i+2])
1440                                         newData.append(data[i+3])
1441                                         newData.append(data[i+4])
1442                                         newData.append(data[i+5])
1443                                 else:
1444                                         numPathSegmentsReduced += 1
1445                                 i += 6
1446                         if newData:
1447                                 newPath.append( (cmd,newData) )
1448                 elif cmd == 'a':
1449                         newData = []
1450                         i = 0
1451                         while i < len(data):
1452                                 if data[i+5] != 0 or data[i+6] != 0:
1453                                         newData.append(data[i])
1454                                         newData.append(data[i+1])
1455                                         newData.append(data[i+2])
1456                                         newData.append(data[i+3])
1457                                         newData.append(data[i+4])
1458                                         newData.append(data[i+5])
1459                                         newData.append(data[i+6])
1460                                 else:
1461                                         numPathSegmentsReduced += 1
1462                                 i += 7
1463                         if newData:
1464                                 newPath.append( (cmd,newData) )
1465                 elif cmd == 'q':
1466                         newData = []
1467                         i = 0
1468                         while i < len(data):
1469                                 if data[i+2] != 0 or data[i+3] != 0:
1470                                         newData.append(data[i])
1471                                         newData.append(data[i+1])
1472                                         newData.append(data[i+2])
1473                                         newData.append(data[i+3])
1474                                 else:
1475                                         numPathSegmentsReduced += 1
1476                                 i += 4
1477                         if newData:
1478                                 newPath.append( (cmd,newData) )
1479                 elif cmd in ['h','v']:
1480                         newData = []
1481                         i = 0
1482                         while i < len(data):
1483                                 if data[i] != 0:
1484                                         newData.append(data[i])
1485                                 else:
1486                                         numPathSegmentsReduced += 1
1487                                 i += 1
1488                         if newData:
1489                                 newPath.append( (cmd,newData) )
1490                 else:
1491                         newPath.append( (cmd,data) )
1492         path = newPath
1493
1494         # convert straight curves into lines
1495         newPath = [path[0]]
1496         for (cmd,data) in path[1:]:
1497                 i = 0
1498                 newData = data
1499                 if cmd == 'c':
1500                         newData = []
1501                         while i < len(data):
1502                                 # since all commands are now relative, we can think of previous point as (0,0)
1503                                 # and new point (dx,dy) is (data[i+4],data[i+5])
1504                                 # eqn of line will be y = (dy/dx)*x or if dx=0 then eqn of line is x=0
1505                                 (p1x,p1y) = (data[i],data[i+1])
1506                                 (p2x,p2y) = (data[i+2],data[i+3])
1507                                 dx = data[i+4]
1508                                 dy = data[i+5]
1509
1510                                 foundStraightCurve = False
1511
1512                                 if dx == 0:
1513                                         if p1x == 0 and p2x == 0:
1514                                                 foundStraightCurve = True
1515                                 else:
1516                                         m = dy/dx
1517                                         if p1y == m*p1x and p2y == m*p2y:
1518                                                 foundStraightCurve = True
1519
1520                                 if foundStraightCurve:
1521                                         # flush any existing curve coords first
1522                                         if newData:
1523                                                 newPath.append( (cmd,newData) )
1524                                                 newData = []
1525                                         # now create a straight line segment
1526                                         newPath.append( ('l', [dx,dy]) )
1527                                         numCurvesStraightened += 1
1528                                 else:
1529                                         newData.append(data[i])
1530                                         newData.append(data[i+1])
1531                                         newData.append(data[i+2])
1532                                         newData.append(data[i+3])
1533                                         newData.append(data[i+4])
1534                                         newData.append(data[i+5])
1535
1536                                 i += 6
1537                 if newData or cmd == 'z' or cmd == 'Z':
1538                         newPath.append( (cmd,newData) )
1539         path = newPath
1540
1541         # collapse all consecutive commands of the same type into one command
1542         prevCmd = ''
1543         prevData = []
1544         newPath = [path[0]]
1545         for (cmd,data) in path[1:]:
1546                 # flush the previous command if it is not the same type as the current command
1547                 if prevCmd != '':
1548                         if cmd != prevCmd:
1549                                 newPath.append( (prevCmd, prevData) )
1550                                 prevCmd = ''
1551                                 prevData = []
1552
1553                 # if the previous and current commands are the same type, collapse
1554                 if cmd == prevCmd:
1555                         for coord in data:
1556                                 prevData.append(coord)
1557
1558                 # save last command and data
1559                 else:
1560                         prevCmd = cmd
1561                         prevData = data
1562         # flush last command and data
1563         if prevCmd != '':
1564                 newPath.append( (prevCmd, prevData) )
1565         path = newPath
1566
1567         # convert to shorthand path segments where possible
1568         newPath = [path[0]]
1569         for (cmd,data) in path[1:]:
1570                 # convert line segments into h,v where possible
1571                 if cmd == 'l':
1572                         i = 0
1573                         lineTuples = []
1574                         while i < len(data):
1575                                 if data[i] == 0:
1576                                         # vertical
1577                                         if lineTuples:
1578                                                 # flush the existing line command
1579                                                 newPath.append( ('l', lineTuples) )
1580                                                 lineTuples = []
1581                                         # append the v and then the remaining line coords
1582                                         newPath.append( ('v', [data[i+1]]) )
1583                                         numPathSegmentsReduced += 1
1584                                 elif data[i+1] == 0:
1585                                         if lineTuples:
1586                                                 # flush the line command, then append the h and then the remaining line coords
1587                                                 newPath.append( ('l', lineTuples) )
1588                                                 lineTuples = []
1589                                         newPath.append( ('h', [data[i]]) )
1590                                         numPathSegmentsReduced += 1
1591                                 else:
1592                                         lineTuples.append(data[i])
1593                                         lineTuples.append(data[i+1])
1594                                 i += 2
1595                         if lineTuples:
1596                                 newPath.append( ('l', lineTuples) )
1597                 # convert Bézier curve segments into s where possible
1598                 elif cmd == 'c':
1599                         bez_ctl_pt = (0,0)
1600                         i = 0
1601                         curveTuples = []
1602                         while i < len(data):
1603                                 # rotate by 180deg means negate both coordinates
1604                                 # if the previous control point is equal then we can substitute a
1605                                 # shorthand bezier command
1606                                 if bez_ctl_pt[0] == data[i] and bez_ctl_pt[1] == data[i+1]:
1607                                         if curveTuples:
1608                                                 newPath.append( ('c', curveTuples) )
1609                                                 curveTuples = []
1610                                         # append the s command
1611                                         newPath.append( ('s', [data[i+2], data[i+3], data[i+4], data[i+5]]) )
1612                                         numPathSegmentsReduced += 1
1613                                 else:
1614                                         j = 0
1615                                         while j <= 5:
1616                                                 curveTuples.append(data[i+j])
1617                                                 j += 1
1618
1619                                 # set up control point for next curve segment
1620                                 bez_ctl_pt = (data[i+4]-data[i+2], data[i+5]-data[i+3])
1621                                 i += 6
1622
1623                         if curveTuples:
1624                                 newPath.append( ('c', curveTuples) )
1625                 # convert quadratic curve segments into t where possible
1626                 elif cmd == 'q':
1627                         quad_ctl_pt = (0,0)
1628                         i = 0
1629                         curveTuples = []
1630                         while i < len(data):
1631                                 if quad_ctl_pt[0] == data[i] and quad_ctl_pt[1] == data[i+1]:
1632                                         if curveTuples:
1633                                                 newPath.append( ('q', curveTuples) )
1634                                                 curveTuples = []
1635                                         # append the t command
1636                                         newPath.append( ('t', [data[i+2], data[i+3]]) )
1637                                         numPathSegmentsReduced += 1
1638                                 else:
1639                                         j = 0;
1640                                         while j <= 3:
1641                                                 curveTuples.append(data[i+j])
1642                                                 j += 1
1643
1644                                 quad_ctl_pt = (data[i+2]-data[i], data[i+3]-data[i+1])
1645                                 i += 4
1646
1647                         if curveTuples:
1648                                 newPath.append( ('q', curveTuples) )
1649                 else:
1650                         newPath.append( (cmd, data) )
1651         path = newPath
1652
1653         # for each h or v, collapse unnecessary coordinates that run in the same direction
1654         # i.e. "h-100-100" becomes "h-200" but "h300-100" does not change
1655         newPath = [path[0]]
1656         for (cmd,data) in path[1:]:
1657                 if cmd in ['h','v'] and len(data) > 1:
1658                         newData = []
1659                         prevCoord = data[0]
1660                         for coord in data[1:]:
1661                                 if isSameSign(prevCoord, coord):
1662                                         prevCoord += coord
1663                                         numPathSegmentsReduced += 1
1664                                 else:
1665                                         newData.append(prevCoord)
1666                                         prevCoord = coord
1667                         newData.append(prevCoord)
1668                         newPath.append( (cmd, newData) )
1669                 else:
1670                         newPath.append( (cmd, data) )
1671         path = newPath
1672
1673         # it is possible that we have consecutive h, v, c, t commands now
1674         # so again collapse all consecutive commands of the same type into one command
1675         prevCmd = ''
1676         prevData = []
1677         newPath = [path[0]]
1678         for (cmd,data) in path[1:]:
1679                 # flush the previous command if it is not the same type as the current command
1680                 if prevCmd != '':
1681                         if cmd != prevCmd:
1682                                 newPath.append( (prevCmd, prevData) )
1683                                 prevCmd = ''
1684                                 prevData = []
1685
1686                 # if the previous and current commands are the same type, collapse
1687                 if cmd == prevCmd:
1688                         for coord in data:
1689                                 prevData.append(coord)
1690
1691                 # save last command and data
1692                 else:
1693                         prevCmd = cmd
1694                         prevData = data
1695         # flush last command and data
1696         if prevCmd != '':
1697                 newPath.append( (prevCmd, prevData) )
1698         path = newPath
1699
1700         newPathStr = serializePath(path)
1701         numBytesSavedInPathData += ( len(oldPathStr) - len(newPathStr) )
1702         element.setAttribute('d', newPathStr)
1703
1704 def parseListOfPoints(s):
1705         """
1706                 Parse string into a list of points.
1707
1708                 Returns a list of containing an even number of coordinate strings
1709         """
1710
1711         # (wsp)? comma-or-wsp-separated coordinate pairs (wsp)?
1712         # coordinate-pair = coordinate comma-or-wsp coordinate
1713         # coordinate = sign? integer
1714         nums = re.split("\\s*\\,?\\s*", s)
1715         i = 0
1716         points = []
1717         while i < len(nums):
1718                 x = SVGLength(nums[i])
1719                 # if we had an odd number of points, return empty
1720                 if i == len(nums)-1: return []
1721                 else: y = SVGLength(nums[i+1])
1722
1723                 # if the coordinates were not unitless, return empty
1724                 if x.units != Unit.NONE or y.units != Unit.NONE: return []
1725                 points.append( str(x.value) )
1726                 points.append( str(y.value) )
1727                 i += 2
1728
1729         return points
1730
1731 def cleanPolygon(elem):
1732         """
1733                 Remove unnecessary closing point of polygon points attribute
1734         """
1735         global numPointsRemovedFromPolygon
1736
1737         pts = parseListOfPoints(elem.getAttribute('points'))
1738         N = len(pts)/2
1739         if N >= 2:
1740                 (startx,starty) = (pts[0],pts[0])
1741                 (endx,endy) = (pts[len(pts)-2],pts[len(pts)-1])
1742                 if startx == endx and starty == endy:
1743                         pts = pts[:-2]
1744                         numPointsRemovedFromPolygon += 1
1745         elem.setAttribute('points', scourCoordinates(pts))
1746
1747 def cleanPolyline(elem):
1748         """
1749                 Scour the polyline points attribute
1750         """
1751         pts = parseListOfPoints(elem.getAttribute('points'))
1752         elem.setAttribute('points', scourCoordinates(pts))
1753
1754 def serializePath(pathObj):
1755         """
1756                 Reserializes the path data with some cleanups.
1757         """
1758         pathStr = ""
1759         for (cmd,data) in pathObj:
1760                 pathStr += cmd
1761                 # elliptical arc commands must have comma/wsp separating the coordinates
1762                 # this fixes an issue outlined in Fix https://bugs.launchpad.net/scour/+bug/412754
1763                 pathStr += scourCoordinates(data, (cmd == 'a'))
1764         return pathStr
1765
1766 def scourCoordinates(data, forceCommaWsp = False):
1767         """
1768                 Serializes coordinate data with some cleanups:
1769                         - removes all trailing zeros after the decimal
1770                         - integerize coordinates if possible
1771                         - removes extraneous whitespace
1772                         - adds commas between values in a subcommand if required (or if forceCommaWsp is True)
1773         """
1774         coordsStr = ""
1775         if data != None:
1776                 c = 0
1777                 for coord in data:
1778                         # add the scoured coordinate to the path string
1779                         coordsStr += scourLength(coord)
1780
1781                         # only need the comma if the next number is non-negative or if forceCommaWsp is True
1782                         if c < len(data)-1 and (forceCommaWsp or Decimal(data[c+1]) >= 0):
1783                                 coordsStr += ','
1784                         c += 1
1785         return coordsStr
1786
1787 def scourLength(str):
1788         length = SVGLength(str)
1789         coord = length.value
1790
1791         # reduce to the proper number of digits
1792         coord = Decimal(unicode(coord)) * Decimal(1)
1793
1794         # integerize if we can
1795         if int(coord) == coord: coord = Decimal(unicode(int(coord)))
1796
1797         # Decimal.trim() is available in Python 2.6+ to trim trailing zeros
1798         try:
1799                 coord = coord.trim()
1800         except AttributeError:
1801                 # trim it ourselves
1802                 s = unicode(coord)
1803                 dec = s.find('.')
1804                 if dec != -1:
1805                         while s[-1] == '0':
1806                                 s = s[:-1]
1807                 coord = Decimal(s)
1808
1809                 # Decimal.normalize() will uses scientific notation - if that
1810                 # string is smaller, then use it
1811                 normd = coord.normalize()
1812                 if len(unicode(normd)) < len(unicode(coord)):
1813                         coord = normd
1814
1815         return unicode(coord)+Unit.str(length.units)
1816
1817 def embedRasters(element, options) :
1818         """
1819                 Converts raster references to inline images.
1820                 NOTE: there are size limits to base64-encoding handling in browsers
1821         """
1822         global numRastersEmbedded
1823
1824         href = element.getAttributeNS(NS['XLINK'],'href')
1825
1826         # if xlink:href is set, then grab the id
1827         if href != '' and len(href) > 1:
1828                 # find if href value has filename ext
1829                 ext = os.path.splitext(os.path.basename(href))[1].lower()[1:]
1830
1831                 # look for 'png', 'jpg', and 'gif' extensions
1832                 if ext == 'png' or ext == 'jpg' or ext == 'gif':
1833
1834                         # check if href resolves to an existing file
1835                         if os.path.isfile(href) == False :
1836                                 if href[:7] != 'http://' and os.path.isfile(href) == False :
1837                                                 # if this is not an absolute path, set path relative
1838                                                 # to script file based on input arg
1839                                                 infilename = '.'
1840                                                 if options.infilename: infilename = options.infilename
1841                                                 href = os.path.join(os.path.dirname(infilename), href)
1842
1843                         rasterdata = ''
1844                         # test if file exists locally
1845                         if os.path.isfile(href) == True :
1846                                 # open raster file as raw binary
1847                                 raster = open( href, "rb")
1848                                 rasterdata = raster.read()
1849
1850                         elif href[:7] == 'http://':
1851                                 # raster = open( href, "rb")
1852                                 webFile = urllib.urlopen( href )
1853                                 rasterdata = webFile.read()
1854                                 webFile.close()
1855
1856                         # ... should we remove all images which don't resolve?
1857                         if rasterdata != '' :
1858                                 # base64-encode raster
1859                                 b64eRaster = base64.b64encode( rasterdata )
1860
1861                                 # set href attribute to base64-encoded equivalent
1862                                 if b64eRaster != '':
1863                                         # PNG and GIF both have MIME Type 'image/[ext]', but
1864                                         # JPEG has MIME Type 'image/jpeg'
1865                                         if ext == 'jpg':
1866                                                 ext = 'jpeg'
1867
1868                                         element.setAttributeNS(NS['XLINK'], 'href', 'data:image/' + ext + ';base64,' + b64eRaster)
1869                                         numRastersEmbedded += 1
1870                                         del b64eRaster
1871
1872 def properlySizeDoc(docElement):
1873         # get doc width and height
1874         w = SVGLength(docElement.getAttribute('width'))
1875         h = SVGLength(docElement.getAttribute('height'))
1876
1877         # if width/height are not unitless or px then it is not ok to rewrite them into a viewBox
1878         if ((w.units != Unit.NONE and w.units != Unit.PX) or
1879                 (w.units != Unit.NONE and w.units != Unit.PX)):
1880             return
1881
1882         # else we have a statically sized image and we should try to remedy that
1883
1884         # parse viewBox attribute
1885         vbSep = re.split("\\s*\\,?\\s*", docElement.getAttribute('viewBox'), 3)
1886         # if we have a valid viewBox we need to check it
1887         vbWidth,vbHeight = 0,0
1888         if len(vbSep) == 4:
1889                 try:
1890                         # if x or y are specified and non-zero then it is not ok to overwrite it
1891                         vbX = float(vbSep[0])
1892                         vbY = float(vbSep[1])
1893                         if vbX != 0 or vbY != 0:
1894                                 return
1895
1896                         # if width or height are not equal to doc width/height then it is not ok to overwrite it
1897                         vbWidth = float(vbSep[2])
1898                         vbHeight = float(vbSep[3])
1899                         if vbWidth != w.value or vbHeight != h.value:
1900                                 return
1901                 # if the viewBox did not parse properly it is invalid and ok to overwrite it
1902                 except ValueError:
1903                         pass
1904
1905         # at this point it's safe to set the viewBox and remove width/height
1906         docElement.setAttribute('viewBox', '0 0 %s %s' % (w.value, h.value))
1907         docElement.removeAttribute('width')
1908         docElement.removeAttribute('height')
1909
1910 def remapNamespacePrefix(node, oldprefix, newprefix):
1911         if node == None or node.nodeType != 1: return
1912
1913         if node.prefix == oldprefix:
1914                 localName = node.localName
1915                 namespace = node.namespaceURI
1916                 doc = node.ownerDocument
1917                 parent = node.parentNode
1918
1919                 # create a replacement node
1920                 newNode = None
1921                 if newprefix != '':
1922                         newNode = doc.createElementNS(namespace, newprefix+":"+localName)
1923                 else:
1924                         newNode = doc.createElement(localName);
1925
1926                 # add all the attributes
1927                 attrList = node.attributes
1928                 for i in range(attrList.length):
1929                         attr = attrList.item(i)
1930                         newNode.setAttributeNS( attr.namespaceURI, attr.localName, attr.nodeValue)
1931
1932                 # clone and add all the child nodes
1933                 for child in node.childNodes:
1934                         newNode.appendChild(child.cloneNode(True))
1935
1936                 # replace old node with new node
1937                 parent.replaceChild( newNode, node )
1938                 # set the node to the new node in the remapped namespace prefix
1939                 node = newNode
1940
1941         # now do all child nodes
1942         for child in node.childNodes :
1943                 remapNamespacePrefix(child, oldprefix, newprefix)
1944
1945 def makeWellFormed(str):
1946         newstr = str
1947
1948         # encode & as &amp; ( must do this first so that &lt; does not become &amp;lt; )
1949         if str.find('&') != -1:
1950                 newstr = str.replace('&', '&amp;')
1951
1952         # encode < as &lt;
1953         if str.find("<") != -1:
1954                 newstr = str.replace('<', '&lt;')
1955
1956         # encode > as &gt; (TODO: is this necessary?)
1957         if str.find('>') != -1:
1958                 newstr = str.replace('>', '&gt;')
1959
1960         return newstr
1961
1962 # hand-rolled serialization function that has the following benefits:
1963 # - pretty printing
1964 # - somewhat judicious use of whitespace
1965 # - ensure id attributes are first
1966 def serializeXML(element, options, ind = 0):
1967         indent = ind
1968         I=''
1969         if options.indent_type == 'tab': I='\t'
1970         elif options.indent_type == 'space': I=' '
1971
1972         outString = (I * ind) + '<' + element.nodeName
1973
1974         # always serialize the id or xml:id attributes first
1975         if element.getAttribute('id') != '':
1976                 id = element.getAttribute('id')
1977                 quot = '"'
1978                 if id.find('"') != -1:
1979                         quot = "'"
1980                 outString += ' ' + 'id=' + quot + id + quot
1981         if element.getAttribute('xml:id') != '':
1982                 id = element.getAttribute('xml:id')
1983                 quot = '"'
1984                 if id.find('"') != -1:
1985                         quot = "'"
1986                 outString += ' ' + 'xml:id=' + quot + id + quot
1987
1988         # now serialize the other attributes
1989         attrList = element.attributes
1990         for num in range(attrList.length) :
1991                 attr = attrList.item(num)
1992                 if attr.nodeName == 'id' or attr.nodeName == 'xml:id': continue
1993                 # if the attribute value contains a double-quote, use single-quotes
1994                 quot = '"'
1995                 if attr.nodeValue.find('"') != -1:
1996                         quot = "'"
1997
1998                 attrValue = makeWellFormed( attr.nodeValue )
1999
2000                 outString += ' '
2001                 # preserve xmlns: if it is a namespace prefix declaration
2002                 if attr.namespaceURI == 'http://www.w3.org/2000/xmlns/' and attr.nodeName.find('xmlns') == -1:
2003                         outString += 'xmlns:'
2004                 outString += attr.nodeName + '=' + quot + attrValue + quot
2005
2006         # if no children, self-close
2007         children = element.childNodes
2008         if children.length > 0:
2009                 outString += '>'
2010
2011                 onNewLine = False
2012                 for child in element.childNodes:
2013                         # element node
2014                         if child.nodeType == 1:
2015                                 outString += '\n' + serializeXML(child, options, indent + 1)
2016                                 onNewLine = True
2017                         # text node
2018                         elif child.nodeType == 3:
2019                                 # trim it only in the case of not being a child of an element
2020                                 # where whitespace might be important
2021                                 if element.nodeName in ["text", "tspan", "textPath", "tref", "title", "desc", "textArea"]:
2022                                         outString += makeWellFormed(child.nodeValue)
2023                                 else:
2024                                         outString += makeWellFormed(child.nodeValue.strip())
2025                         # CDATA node
2026                         elif child.nodeType == 4:
2027                                 outString += '<![CDATA[' + child.nodeValue + ']]>'
2028                         # Comment node
2029                         elif child.nodeType == 8:
2030                                 outString += '<!--' + child.nodeValue + '-->'
2031                         # TODO: entities, processing instructions, what else?
2032                         else: # ignore the rest
2033                                 pass
2034
2035                 if onNewLine: outString += (I * ind)
2036                 outString += '</' + element.nodeName + '>'
2037                 if indent > 0: outString += '\n'
2038         else:
2039                 outString += '/>'
2040                 if indent > 0: outString += '\n'
2041
2042         return outString
2043
2044 # this is the main method
2045 # input is a string representation of the input XML
2046 # returns a string representation of the output XML
2047 def scourString(in_string, options=None):
2048         if options is None:
2049                 options = _options_parser.get_default_values()
2050         getcontext().prec = options.digits
2051         global numAttrsRemoved
2052         global numStylePropsFixed
2053         global numElemsRemoved
2054         global numBytesSavedInColors
2055         doc = xml.dom.minidom.parseString(in_string)
2056
2057         # for whatever reason this does not always remove all inkscape/sodipodi attributes/elements
2058         # on the first pass, so we do it multiple times
2059         # does it have to do with removal of children affecting the childlist?
2060         if options.keep_editor_data == False:
2061                 while removeNamespacedElements( doc.documentElement, unwanted_ns ) > 0 :
2062                         pass
2063                 while removeNamespacedAttributes( doc.documentElement, unwanted_ns ) > 0 :
2064                         pass
2065
2066                 # remove the xmlns: declarations now
2067                 xmlnsDeclsToRemove = []
2068                 attrList = doc.documentElement.attributes
2069                 for num in range(attrList.length) :
2070                         if attrList.item(num).nodeValue in unwanted_ns :
2071                                 xmlnsDeclsToRemove.append(attrList.item(num).nodeName)
2072
2073                 for attr in xmlnsDeclsToRemove :
2074                         doc.documentElement.removeAttribute(attr)
2075                         numAttrsRemoved += 1
2076
2077         # ensure namespace for SVG is declared
2078         # TODO: what if the default namespace is something else (i.e. some valid namespace)?
2079         if doc.documentElement.getAttribute('xmlns') != 'http://www.w3.org/2000/svg':
2080                 doc.documentElement.setAttribute('xmlns', 'http://www.w3.org/2000/svg')
2081                 # TODO: throw error or warning?
2082
2083         # check for redundant SVG namespace declaration
2084         attrList = doc.documentElement.attributes
2085         xmlnsDeclsToRemove = []
2086         redundantPrefixes = []
2087         for i in range(attrList.length):
2088                 attr = attrList.item(i)
2089                 name = attr.nodeName
2090                 val = attr.nodeValue
2091                 if name[0:6] == 'xmlns:' and val == 'http://www.w3.org/2000/svg':
2092                         redundantPrefixes.append(name[6:])
2093                         xmlnsDeclsToRemove.append(name)
2094
2095         for attrName in xmlnsDeclsToRemove:
2096                 doc.documentElement.removeAttribute(attrName)
2097
2098         for prefix in redundantPrefixes:
2099                 remapNamespacePrefix(doc.documentElement, prefix, '')
2100
2101         # repair style (remove unnecessary style properties and change them into XML attributes)
2102         numStylePropsFixed = repairStyle(doc.documentElement, options)
2103
2104         # convert colors to #RRGGBB format
2105         if options.simple_colors:
2106                 numBytesSavedInColors = convertColors(doc.documentElement)
2107
2108         # remove empty defs, metadata, g
2109         # NOTE: these elements will be removed even if they have (invalid) text nodes
2110         elemsToRemove = []
2111         for tag in ['defs', 'metadata', 'g'] :
2112                 for elem in doc.documentElement.getElementsByTagName(tag) :
2113                         removeElem = not elem.hasChildNodes()
2114                         if removeElem == False :
2115                                 for child in elem.childNodes :
2116                                         if child.nodeType in [1, 3, 4, 8] :
2117                                                 break
2118                                 else:
2119                                         removeElem = True
2120                         if removeElem :
2121                                 elem.parentNode.removeChild(elem)
2122                                 numElemsRemoved += 1
2123
2124         # remove unreferenced gradients/patterns outside of defs
2125         while removeUnreferencedElements(doc) > 0:
2126                 pass
2127
2128         if options.strip_ids:
2129                 bContinueLooping = True
2130                 while bContinueLooping:
2131                         identifiedElements = findElementsWithId(doc.documentElement)
2132                         referencedIDs = findReferencedElements(doc.documentElement)
2133                         bContinueLooping = (removeUnreferencedIDs(referencedIDs, identifiedElements) > 0)
2134
2135         if options.group_collapse:
2136                 while removeNestedGroups(doc.documentElement) > 0:
2137                         pass
2138
2139         # move common attributes to parent group
2140         # TODO: should make sure this is called with most-nested groups first
2141         numAttrsRemoved += moveCommonAttributesToParentGroup(doc.documentElement)
2142
2143         while removeDuplicateGradientStops(doc) > 0:
2144                 pass
2145
2146         # remove gradients that are only referenced by one other gradient
2147         while collapseSinglyReferencedGradients(doc) > 0:
2148                 pass
2149
2150         # remove duplicate gradients
2151         while removeDuplicateGradients(doc) > 0:
2152                 pass
2153
2154         # clean path data
2155         for elem in doc.documentElement.getElementsByTagName('path') :
2156                 if elem.getAttribute('d') == '':
2157                         elem.parentNode.removeChild(elem)
2158                 else:
2159                         cleanPath(elem)
2160
2161         # remove unnecessary closing point of polygons and scour points
2162         for polygon in doc.documentElement.getElementsByTagName('polygon') :
2163                 cleanPolygon(polygon)
2164
2165         # scour points of polyline
2166         for polyline in doc.documentElement.getElementsByTagName('polyline') :
2167                 cleanPolygon(polyline)
2168
2169         # scour lengths (including coordinates)
2170         for type in ['svg', 'image', 'rect', 'circle', 'ellipse', 'line', 'linearGradient', 'radialGradient', 'stop']:
2171                 for elem in doc.getElementsByTagName(type):
2172                         for attr in ['x', 'y', 'width', 'height', 'cx', 'cy', 'r', 'rx', 'ry',
2173                                                 'x1', 'y1', 'x2', 'y2', 'fx', 'fy', 'offset', 'opacity',
2174                                                 'fill-opacity', 'stroke-opacity', 'stroke-width', 'stroke-miterlimit']:
2175                                 if elem.getAttribute(attr) != '':
2176                                         elem.setAttribute(attr, scourLength(elem.getAttribute(attr)))
2177
2178         # remove default values of attributes
2179         numAttrsRemoved += removeDefaultAttributeValues(doc.documentElement, options)
2180
2181         # convert rasters references to base64-encoded strings
2182         if options.embed_rasters:
2183                 for elem in doc.documentElement.getElementsByTagName('image') :
2184                         embedRasters(elem, options)
2185
2186         # properly size the SVG document (ideally width/height should be 100% with a viewBox)
2187         properlySizeDoc(doc.documentElement)
2188
2189         # output the document as a pretty string with a single space for indent
2190         # NOTE: removed pretty printing because of this problem:
2191         # http://ronrothman.com/public/leftbraned/xml-dom-minidom-toprettyxml-and-silly-whitespace/
2192         # rolled our own serialize function here to save on space, put id first, customize indentation, etc
2193 #       out_string = doc.documentElement.toprettyxml(' ')
2194         out_string = serializeXML(doc.documentElement, options)
2195
2196         # now strip out empty lines
2197         lines = []
2198         # Get rid of empty lines
2199         for line in out_string.splitlines(True):
2200                 if line.strip():
2201                         lines.append(line)
2202
2203         # return the string stripped of empty lines
2204         if options.strip_xml_prolog == False:
2205                 xmlprolog = '<?xml version="1.0" encoding="UTF-8" standalone="no"?>\n'
2206         else:
2207                 xmlprolog = ""
2208
2209         return xmlprolog + "".join(lines)
2210
2211 # used mostly by unit tests
2212 # input is a filename
2213 # returns the minidom doc representation of the SVG
2214 def scourXmlFile(filename, options=None):
2215         in_string = open(filename).read()
2216         out_string = scourString(in_string, options)
2217         return xml.dom.minidom.parseString(out_string.encode('utf-8'))
2218
2219 # GZ: Seems most other commandline tools don't do this, is it really wanted?
2220 class HeaderedFormatter(optparse.IndentedHelpFormatter):
2221         """
2222                 Show application name, version number, and copyright statement
2223                 above usage information.
2224         """
2225         def format_usage(self, usage):
2226                 return "%s %s\n%s\n%s" % (APP, VER, COPYRIGHT,
2227                         optparse.IndentedHelpFormatter.format_usage(self, usage))
2228
2229 # GZ: would prefer this to be in a function or class scope, but tests etc need
2230 #     access to the defaults anyway
2231 _options_parser = optparse.OptionParser(
2232         usage="%prog [-i input.svg] [-o output.svg] [OPTIONS]",
2233         description=("If the input/output files are specified with a svgz"
2234         " extension, then compressed SVG is assumed. If the input file is not"
2235         " specified, stdin is used. If the output file is not specified, "
2236         " stdout is used."),
2237         formatter=HeaderedFormatter(max_help_position=30),
2238         version=VER)
2239
2240 _options_parser.add_option("--disable-simplify-colors",
2241         action="store_false", dest="simple_colors", default=True,
2242         help="won't convert all colors to #RRGGBB format")
2243 _options_parser.add_option("--disable-style-to-xml",
2244         action="store_false", dest="style_to_xml", default=True,
2245         help="won't convert styles into XML attributes")
2246 _options_parser.add_option("--disable-group-collapsing",
2247         action="store_false", dest="group_collapse", default=True,
2248         help="won't collapse <g> elements")
2249 _options_parser.add_option("--enable-id-stripping",
2250         action="store_true", dest="strip_ids", default=False,
2251         help="remove all un-referenced ID attributes")
2252 _options_parser.add_option("--disable-embed-rasters",
2253         action="store_false", dest="embed_rasters", default=True,
2254         help="won't embed rasters as base64-encoded data")
2255 _options_parser.add_option("--keep-editor-data",
2256         action="store_true", dest="keep_editor_data", default=False,
2257         help="won't remove Inkscape, Sodipodi or Adobe Illustrator elements and attributes")
2258 _options_parser.add_option("--strip-xml-prolog",
2259         action="store_true", dest="strip_xml_prolog", default=False,
2260         help="won't output the <?xml ?> prolog")
2261
2262 # GZ: this is confusing, most people will be thinking in terms of
2263 #     decimal places, which is not what decimal precision is doing
2264 _options_parser.add_option("-p", "--set-precision",
2265         action="store", type=int, dest="digits", default=5,
2266         help="set number of significant digits (default: %default)")
2267 _options_parser.add_option("-i",
2268         action="store", dest="infilename", help=optparse.SUPPRESS_HELP)
2269 _options_parser.add_option("-o",
2270         action="store", dest="outfilename", help=optparse.SUPPRESS_HELP)
2271 _options_parser.add_option("--indent",
2272         action="store", type="string", dest="indent_type", default="space",
2273         help="indentation of the output: none, space, tab (default: %default)")
2274
2275 def maybe_gziped_file(filename, mode="r"):
2276         if os.path.splitext(filename)[1].lower() in (".svgz", ".gz"):
2277                 return gzip.GzipFile(filename, mode)
2278         return file(filename, mode)
2279
2280 def parse_args(args=None):
2281         options, rargs = _options_parser.parse_args(args)
2282
2283         if rargs:
2284                 _options_parser.error("Additional arguments not handled: %r, see --help" % rargs)
2285         if options.digits < 0:
2286                 _options_parser.error("Can't have negative significant digits, see --help")
2287         if not options.indent_type in ["tab", "space", "none"]:
2288                 _options_parser.error("Invalid value for --indent, see --help")
2289
2290         if options.infilename:
2291                 infile = maybe_gziped_file(options.infilename)
2292                 # GZ: could catch a raised IOError here and report
2293         else:
2294                 # GZ: could sniff for gzip compression here
2295                 infile = sys.stdin
2296         if options.outfilename:
2297                 outfile = maybe_gziped_file(options.outfilename, "w")
2298         else:
2299                 outfile = sys.stdout
2300
2301         return options, [infile, outfile]
2302
2303 def getReport():
2304         return ' Number of elements removed: ' + str(numElemsRemoved) + \
2305                 '\n Number of attributes removed: ' + str(numAttrsRemoved) + \
2306                 '\n Number of unreferenced id attributes removed: ' + str(numIDsRemoved) + \
2307                 '\n Number of style properties fixed: ' + str(numStylePropsFixed) + \
2308                 '\n Number of raster images embedded inline: ' + str(numRastersEmbedded) + \
2309                 '\n Number of path segments reduced/removed: ' + str(numPathSegmentsReduced) + \
2310                 '\n Number of bytes saved in path data: ' + str(numBytesSavedInPathData) + \
2311                 '\n Number of bytes saved in colors: ' + str(numBytesSavedInColors) + \
2312                 '\n Number of points removed from polygons: ' + str(numPointsRemovedFromPolygon)
2313
2314 if __name__ == '__main__':
2315         if sys.platform == "win32":
2316                 from time import clock as get_tick
2317         else:
2318                 # GZ: is this different from time.time() in any way?
2319                 def get_tick():
2320                         return os.times()[0]
2321
2322         start = get_tick()
2323
2324         options, (input, output) = parse_args()
2325
2326         print >>sys.stderr, "%s %s\n%s" % (APP, VER, COPYRIGHT)
2327
2328         # do the work
2329         in_string = input.read()
2330         out_string = scourString(in_string, options).encode("UTF-8")
2331         output.write(out_string)
2332
2333         # Close input and output files
2334         input.close()
2335         output.close()
2336
2337         end = get_tick()
2338
2339         # GZ: unless silenced by -q or something?
2340         # GZ: not using globals would be good too
2341         print >>sys.stderr, ' File:', input.name, \
2342                 '\n Time taken:', str(end-start) + 's\n', \
2343                 getReport()
2344
2345         oldsize = len(in_string)
2346         newsize = len(out_string)
2347         sizediff = (newsize / oldsize) * 100
2348         print >>sys.stderr, ' Original file size:', oldsize, 'bytes;', \
2349                 'new file size:', newsize, 'bytes (' + str(sizediff)[:5] + '%)'
2350
2351