share/extensions/scour.py

   1 #!/usr/bin/env python
   2 # -*- coding: utf-8 -*-
   3
   4 #  Scour
   5 #
   6 #  Copyright 2009 Jeff Schiller
   7 #
   8 #  This file is part of Scour, http://www.codedread.com/scour/
   9 #
  10 #   Licensed under the Apache License, Version 2.0 (the "License");
  11 #   you may not use this file except in compliance with the License.
  12 #   You may obtain a copy of the License at
  13 #
  14 #       http://www.apache.org/licenses/LICENSE-2.0
  15 #
  16 #   Unless required by applicable law or agreed to in writing, software
  17 #   distributed under the License is distributed on an "AS IS" BASIS,
  18 #   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  19 #   See the License for the specific language governing permissions and
  20 #   limitations under the License.
  21
  22 # Notes:
  23
  24 # rubys' path-crunching ideas here: http://intertwingly.net/code/svgtidy/spec.rb
  25 # (and implemented here: http://intertwingly.net/code/svgtidy/svgtidy.rb )
  26
  27 # Yet more ideas here: http://wiki.inkscape.org/wiki/index.php/Save_Cleaned_SVG
  28 #
  29 # * Process Transformations
  30 #  * Collapse all group based transformations
  31
  32 # Even more ideas here: http://esw.w3.org/topic/SvgTidy
  33 #  * analysis of path elements to see if rect can be used instead? (must also need to look
  34 #    at rounded corners)
  35
  36 # Next Up:
  37 # - TODO: fix the removal of comment elements (between <?xml?> and <svg>)
  38 # - add an option to remove ids if they match the Inkscape-style of IDs
  39 # - investigate point-reducing algorithms
  40 # - parse transform attribute
  41 # - if a <g> has only one element in it, collapse the <g> (ensure transform, etc are carried down)
  42 # - option to remove metadata
  43
  44 # necessary to get true division
  45 from __future__ import division
  46
  47 import os
  48 import sys
  49 import xml.dom.minidom
  50 import re
  51 import math
  52 import base64
  53 import urllib
  54 from svg_regex import svg_parser
  55 import gzip
  56 import optparse
  57 from yocto_css import parseCssString
  58
  59 # Python 2.3- did not have Decimal
  60 try:
  61         from decimal import *
  62 except ImportError:
  63         from fixedpoint import *
  64         Decimal = FixedPoint
  65
  66 APP = 'scour'
  67 VER = '0.22'
  68 COPYRIGHT = 'Copyright Jeff Schiller, 2009'
  69
  70 NS = {  'SVG':          'http://www.w3.org/2000/svg',
  71                 'XLINK':        'http://www.w3.org/1999/xlink',
  72                 'SODIPODI': 'http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd',
  73                 'INKSCAPE': 'http://www.inkscape.org/namespaces/inkscape',
  74                 'ADOBE_ILLUSTRATOR': 'http://ns.adobe.com/AdobeIllustrator/10.0/',
  75                 'ADOBE_GRAPHS': 'http://ns.adobe.com/Graphs/1.0/',
  76                 'ADOBE_SVG_VIEWER': 'http://ns.adobe.com/AdobeSVGViewerExtensions/3.0/',
  77                 'ADOBE_VARIABLES': 'http://ns.adobe.com/Variables/1.0/',
  78                 'ADOBE_SFW': 'http://ns.adobe.com/SaveForWeb/1.0/',
  79                 'ADOBE_EXTENSIBILITY': 'http://ns.adobe.com/Extensibility/1.0/',
  80                 'ADOBE_FLOWS': 'http://ns.adobe.com/Flows/1.0/',
  81                 'ADOBE_IMAGE_REPLACEMENT': 'http://ns.adobe.com/ImageReplacement/1.0/',
  82                 'ADOBE_CUSTOM': 'http://ns.adobe.com/GenericCustomNamespace/1.0/',
  83                 'ADOBE_XPATH': 'http://ns.adobe.com/XPath/1.0/'
  84                 }
  85
  86 unwanted_ns = [ NS['SODIPODI'], NS['INKSCAPE'], NS['ADOBE_ILLUSTRATOR'],
  87                                 NS['ADOBE_GRAPHS'], NS['ADOBE_SVG_VIEWER'], NS['ADOBE_VARIABLES'],
  88                                 NS['ADOBE_SFW'], NS['ADOBE_EXTENSIBILITY'], NS['ADOBE_FLOWS'],
  89                                 NS['ADOBE_IMAGE_REPLACEMENT'], NS['ADOBE_CUSTOM'], NS['ADOBE_XPATH'] ]
  90
  91 svgAttributes = [
  92                                 'clip-rule',
  93                                 'display',
  94                                 'fill',
  95                                 'fill-opacity',
  96                                 'fill-rule',
  97                                 'filter',
  98                                 'font-family',
  99                                 'font-size',
 100                                 'font-stretch',
 101                                 'font-style',
 102                                 'font-variant',
 103                                 'font-weight',
 104                                 'line-height',
 105                                 'marker',
 106                                 'opacity',
 107                                 'overflow',
 108                                 'stop-color',
 109                                 'stop-opacity',
 110                                 'stroke',
 111                                 'stroke-dashoffset',
 112                                 'stroke-linecap',
 113                                 'stroke-linejoin',
 114                                 'stroke-miterlimit',
 115                                 'stroke-opacity',
 116                                 'stroke-width',
 117                                 'visibility'
 118                                 ]
 119
 120 colors = {
 121         'aliceblue': 'rgb(240, 248, 255)',
 122         'antiquewhite': 'rgb(250, 235, 215)',
 123         'aqua': 'rgb( 0, 255, 255)',
 124         'aquamarine': 'rgb(127, 255, 212)',
 125         'azure': 'rgb(240, 255, 255)',
 126         'beige': 'rgb(245, 245, 220)',
 127         'bisque': 'rgb(255, 228, 196)',
 128         'black': 'rgb( 0, 0, 0)',
 129         'blanchedalmond': 'rgb(255, 235, 205)',
 130         'blue': 'rgb( 0, 0, 255)',
 131         'blueviolet': 'rgb(138, 43, 226)',
 132         'brown': 'rgb(165, 42, 42)',
 133         'burlywood': 'rgb(222, 184, 135)',
 134         'cadetblue': 'rgb( 95, 158, 160)',
 135         'chartreuse': 'rgb(127, 255, 0)',
 136         'chocolate': 'rgb(210, 105, 30)',
 137         'coral': 'rgb(255, 127, 80)',
 138         'cornflowerblue': 'rgb(100, 149, 237)',
 139         'cornsilk': 'rgb(255, 248, 220)',
 140         'crimson': 'rgb(220, 20, 60)',
 141         'cyan': 'rgb( 0, 255, 255)',
 142         'darkblue': 'rgb( 0, 0, 139)',
 143         'darkcyan': 'rgb( 0, 139, 139)',
 144         'darkgoldenrod': 'rgb(184, 134, 11)',
 145         'darkgray': 'rgb(169, 169, 169)',
 146         'darkgreen': 'rgb( 0, 100, 0)',
 147         'darkgrey': 'rgb(169, 169, 169)',
 148         'darkkhaki': 'rgb(189, 183, 107)',
 149         'darkmagenta': 'rgb(139, 0, 139)',
 150         'darkolivegreen': 'rgb( 85, 107, 47)',
 151         'darkorange': 'rgb(255, 140, 0)',
 152         'darkorchid': 'rgb(153, 50, 204)',
 153         'darkred': 'rgb(139, 0, 0)',
 154         'darksalmon': 'rgb(233, 150, 122)',
 155         'darkseagreen': 'rgb(143, 188, 143)',
 156         'darkslateblue': 'rgb( 72, 61, 139)',
 157         'darkslategray': 'rgb( 47, 79, 79)',
 158         'darkslategrey': 'rgb( 47, 79, 79)',
 159         'darkturquoise': 'rgb( 0, 206, 209)',
 160         'darkviolet': 'rgb(148, 0, 211)',
 161         'deeppink': 'rgb(255, 20, 147)',
 162         'deepskyblue': 'rgb( 0, 191, 255)',
 163         'dimgray': 'rgb(105, 105, 105)',
 164         'dimgrey': 'rgb(105, 105, 105)',
 165         'dodgerblue': 'rgb( 30, 144, 255)',
 166         'firebrick': 'rgb(178, 34, 34)',
 167         'floralwhite': 'rgb(255, 250, 240)',
 168         'forestgreen': 'rgb( 34, 139, 34)',
 169         'fuchsia': 'rgb(255, 0, 255)',
 170         'gainsboro': 'rgb(220, 220, 220)',
 171         'ghostwhite': 'rgb(248, 248, 255)',
 172         'gold': 'rgb(255, 215, 0)',
 173         'goldenrod': 'rgb(218, 165, 32)',
 174         'gray': 'rgb(128, 128, 128)',
 175         'grey': 'rgb(128, 128, 128)',
 176         'green': 'rgb( 0, 128, 0)',
 177         'greenyellow': 'rgb(173, 255, 47)',
 178         'honeydew': 'rgb(240, 255, 240)',
 179         'hotpink': 'rgb(255, 105, 180)',
 180         'indianred': 'rgb(205, 92, 92)',
 181         'indigo': 'rgb( 75, 0, 130)',
 182         'ivory': 'rgb(255, 255, 240)',
 183         'khaki': 'rgb(240, 230, 140)',
 184         'lavender': 'rgb(230, 230, 250)',
 185         'lavenderblush': 'rgb(255, 240, 245)',
 186         'lawngreen': 'rgb(124, 252, 0)',
 187         'lemonchiffon': 'rgb(255, 250, 205)',
 188         'lightblue': 'rgb(173, 216, 230)',
 189         'lightcoral': 'rgb(240, 128, 128)',
 190         'lightcyan': 'rgb(224, 255, 255)',
 191         'lightgoldenrodyellow': 'rgb(250, 250, 210)',
 192         'lightgray': 'rgb(211, 211, 211)',
 193         'lightgreen': 'rgb(144, 238, 144)',
 194         'lightgrey': 'rgb(211, 211, 211)',
 195         'lightpink': 'rgb(255, 182, 193)',
 196         'lightsalmon': 'rgb(255, 160, 122)',
 197         'lightseagreen': 'rgb( 32, 178, 170)',
 198         'lightskyblue': 'rgb(135, 206, 250)',
 199         'lightslategray': 'rgb(119, 136, 153)',
 200         'lightslategrey': 'rgb(119, 136, 153)',
 201         'lightsteelblue': 'rgb(176, 196, 222)',
 202         'lightyellow': 'rgb(255, 255, 224)',
 203         'lime': 'rgb( 0, 255, 0)',
 204         'limegreen': 'rgb( 50, 205, 50)',
 205         'linen': 'rgb(250, 240, 230)',
 206         'magenta': 'rgb(255, 0, 255)',
 207         'maroon': 'rgb(128, 0, 0)',
 208         'mediumaquamarine': 'rgb(102, 205, 170)',
 209         'mediumblue': 'rgb( 0, 0, 205)',
 210         'mediumorchid': 'rgb(186, 85, 211)',
 211         'mediumpurple': 'rgb(147, 112, 219)',
 212         'mediumseagreen': 'rgb( 60, 179, 113)',
 213         'mediumslateblue': 'rgb(123, 104, 238)',
 214         'mediumspringgreen': 'rgb( 0, 250, 154)',
 215         'mediumturquoise': 'rgb( 72, 209, 204)',
 216         'mediumvioletred': 'rgb(199, 21, 133)',
 217         'midnightblue': 'rgb( 25, 25, 112)',
 218         'mintcream': 'rgb(245, 255, 250)',
 219         'mistyrose': 'rgb(255, 228, 225)',
 220         'moccasin': 'rgb(255, 228, 181)',
 221         'navajowhite': 'rgb(255, 222, 173)',
 222         'navy': 'rgb( 0, 0, 128)',
 223         'oldlace': 'rgb(253, 245, 230)',
 224         'olive': 'rgb(128, 128, 0)',
 225         'olivedrab': 'rgb(107, 142, 35)',
 226         'orange': 'rgb(255, 165, 0)',
 227         'orangered': 'rgb(255, 69, 0)',
 228         'orchid': 'rgb(218, 112, 214)',
 229         'palegoldenrod': 'rgb(238, 232, 170)',
 230         'palegreen': 'rgb(152, 251, 152)',
 231         'paleturquoise': 'rgb(175, 238, 238)',
 232         'palevioletred': 'rgb(219, 112, 147)',
 233         'papayawhip': 'rgb(255, 239, 213)',
 234         'peachpuff': 'rgb(255, 218, 185)',
 235         'peru': 'rgb(205, 133, 63)',
 236         'pink': 'rgb(255, 192, 203)',
 237         'plum': 'rgb(221, 160, 221)',
 238         'powderblue': 'rgb(176, 224, 230)',
 239         'purple': 'rgb(128, 0, 128)',
 240         'red': 'rgb(255, 0, 0)',
 241         'rosybrown': 'rgb(188, 143, 143)',
 242         'royalblue': 'rgb( 65, 105, 225)',
 243         'saddlebrown': 'rgb(139, 69, 19)',
 244         'salmon': 'rgb(250, 128, 114)',
 245         'sandybrown': 'rgb(244, 164, 96)',
 246         'seagreen': 'rgb( 46, 139, 87)',
 247         'seashell': 'rgb(255, 245, 238)',
 248         'sienna': 'rgb(160, 82, 45)',
 249         'silver': 'rgb(192, 192, 192)',
 250         'skyblue': 'rgb(135, 206, 235)',
 251         'slateblue': 'rgb(106, 90, 205)',
 252         'slategray': 'rgb(112, 128, 144)',
 253         'slategrey': 'rgb(112, 128, 144)',
 254         'snow': 'rgb(255, 250, 250)',
 255         'springgreen': 'rgb( 0, 255, 127)',
 256         'steelblue': 'rgb( 70, 130, 180)',
 257         'tan': 'rgb(210, 180, 140)',
 258         'teal': 'rgb( 0, 128, 128)',
 259         'thistle': 'rgb(216, 191, 216)',
 260         'tomato': 'rgb(255, 99, 71)',
 261         'turquoise': 'rgb( 64, 224, 208)',
 262         'violet': 'rgb(238, 130, 238)',
 263         'wheat': 'rgb(245, 222, 179)',
 264         'white': 'rgb(255, 255, 255)',
 265         'whitesmoke': 'rgb(245, 245, 245)',
 266         'yellow': 'rgb(255, 255, 0)',
 267         'yellowgreen': 'rgb(154, 205, 50)',
 268         }
 269
 270 def isSameSign(a,b): return (a <= 0 and b <= 0) or (a >= 0 and b >= 0)
 271
 272 coord = re.compile("\\-?\\d+\\.?\\d*")
 273 scinumber = re.compile("[\\-\\+]?(\\d*\\.?)?\\d+[eE][\\-\\+]?\\d+")
 274 number = re.compile("[\\-\\+]?(\\d*\\.?)?\\d+")
 275 sciExponent = re.compile("[eE]([\\-\\+]?\\d+)")
 276 unit = re.compile("(em|ex|px|pt|pc|cm|mm|in|\\%){1,1}$")
 277
 278 class Unit(object):
 279         INVALID = -1
 280         NONE = 0
 281         PCT = 1
 282         PX = 2
 283         PT = 3
 284         PC = 4
 285         EM = 5
 286         EX = 6
 287         CM = 7
 288         MM = 8
 289         IN = 9
 290
 291 #       @staticmethod
 292         def get(str):
 293                 # GZ: shadowing builtins like 'str' is generally bad form
 294                 # GZ: encoding stuff like this in a dict makes for nicer code
 295                 if str == None or str == '': return Unit.NONE
 296                 elif str == '%': return Unit.PCT
 297                 elif str == 'px': return Unit.PX
 298                 elif str == 'pt': return Unit.PT
 299                 elif str == 'pc': return Unit.PC
 300                 elif str == 'em': return Unit.EM
 301                 elif str == 'ex': return Unit.EX
 302                 elif str == 'cm': return Unit.CM
 303                 elif str == 'mm': return Unit.MM
 304                 elif str == 'in': return Unit.IN
 305                 return Unit.INVALID
 306
 307 #       @staticmethod
 308         def str(u):
 309                 if u == Unit.NONE: return ''
 310                 elif u == Unit.PCT: return '%'
 311                 elif u == Unit.PX: return 'px'
 312                 elif u == Unit.PT: return 'pt'
 313                 elif u == Unit.PC: return 'pc'
 314                 elif u == Unit.EM: return 'em'
 315                 elif u == Unit.EX: return 'ex'
 316                 elif u == Unit.CM: return 'cm'
 317                 elif u == Unit.MM: return 'mm'
 318                 elif u == Unit.IN: return 'in'
 319                 return 'INVALID'
 320
 321         get = staticmethod(get)
 322         str = staticmethod(str)
 323
 324 class SVGLength(object):
 325         def __init__(self, str):
 326                 try: # simple unitless and no scientific notation
 327                         self.value = float(str)
 328                         if int(self.value) == self.value:
 329                                 self.value = int(self.value)
 330                         self.units = Unit.NONE
 331                 except ValueError:
 332                         # we know that the length string has an exponent, a unit, both or is invalid
 333
 334                         # parse out number, exponent and unit
 335                         self.value = 0
 336                         unitBegin = 0
 337                         scinum = scinumber.match(str)
 338                         if scinum != None:
 339                                 # this will always match, no need to check it
 340                                 numMatch = number.match(str)
 341                                 expMatch = sciExponent.search(str, numMatch.start(0))
 342                                 self.value = (float(numMatch.group(0)) *
 343                                         10 ** float(expMatch.group(1)))
 344                                 unitBegin = expMatch.end(1)
 345                         else:
 346                                 # unit or invalid
 347                                 numMatch = number.match(str)
 348                                 if numMatch != None:
 349                                         self.value = float(numMatch.group(0))
 350                                         unitBegin = numMatch.end(0)
 351
 352                         if int(self.value) == self.value:
 353                                 self.value = int(self.value)
 354
 355                         if unitBegin != 0 :
 356                                 unitMatch = unit.search(str, unitBegin)
 357                                 if unitMatch != None :
 358                                         self.units = Unit.get(unitMatch.group(0))
 359
 360                         # invalid
 361                         else:
 362                                 # TODO: this needs to set the default for the given attribute (how?)
 363                                 self.value = 0
 364                                 self.units = Unit.INVALID
 365
 366 # returns the length of a property
 367 # TODO: eventually use the above class once it is complete
 368 def getSVGLength(value):
 369         try:
 370                 v = float(value)
 371         except ValueError:
 372                 coordMatch = coord.match(value)
 373                 if coordMatch != None:
 374                         unitMatch = unit.search(value, coordMatch.start(0))
 375                 v = value
 376         return v
 377
 378 def findElementById(node, id):
 379         if node == None or node.nodeType != 1: return None
 380         if node.getAttribute('id') == id: return node
 381         for child in node.childNodes :
 382                 e = findElementById(child,id)
 383                 if e != None: return e
 384         return None
 385
 386 def findElementsWithId(node, elems=None):
 387         """
 388         Returns all elements with id attributes
 389         """
 390         if elems is None:
 391                 elems = {}
 392         id = node.getAttribute('id')
 393         if id != '' :
 394                 elems[id] = node
 395         if node.hasChildNodes() :
 396                 for child in node.childNodes:
 397                         # from http://www.w3.org/TR/DOM-Level-2-Core/idl-definitions.html
 398                         # we are only really interested in nodes of type Element (1)
 399                         if child.nodeType == 1 :
 400                                 findElementsWithId(child, elems)
 401         return elems
 402
 403 referencingProps = ['fill', 'stroke', 'filter', 'clip-path', 'mask',  'marker-start',
 404                                         'marker-end', 'marker-mid']
 405
 406 def findReferencedElements(node, ids=None):
 407         """
 408         Returns the number of times an ID is referenced as well as all elements
 409         that reference it.
 410
 411         Currently looks at fill, stroke, clip-path, mask, marker, and
 412         xlink:href attributes.
 413         """
 414         global referencingProps
 415         if ids is None:
 416                 ids = {}
 417         # TODO: input argument ids is clunky here (see below how it is called)
 418         # GZ: alternative to passing dict, use **kwargs
 419
 420         # if this node is a style element, parse its text into CSS
 421         if node.nodeName == 'style' and node.namespaceURI == NS['SVG']:
 422                 # node.firstChild will be either a CDATA or a Text node
 423                 if node.firstChild != None:
 424                         cssRules = parseCssString(node.firstChild.nodeValue)
 425                         for rule in cssRules:
 426                                 for propname in rule['properties']:
 427                                         propval = rule['properties'][propname]
 428                                         findReferencingProperty(node, propname, propval, ids)
 429                 return ids
 430
 431         # else if xlink:href is set, then grab the id
 432         href = node.getAttributeNS(NS['XLINK'],'href')
 433         if href != '' and len(href) > 1 and href[0] == '#':
 434                 # we remove the hash mark from the beginning of the id
 435                 id = href[1:]
 436                 if id in ids:
 437                         ids[id][0] += 1
 438                         ids[id][1].append(node)
 439                 else:
 440                         ids[id] = [1,[node]]
 441
 442         # now get all style properties and the fill, stroke, filter attributes
 443         styles = node.getAttribute('style').split(';')
 444         for attr in referencingProps:
 445                 styles.append(':'.join([attr, node.getAttribute(attr)]))
 446
 447         for style in styles:
 448                 propval = style.split(':')
 449                 if len(propval) == 2 :
 450                         prop = propval[0].strip()
 451                         val = propval[1].strip()
 452                         findReferencingProperty(node, prop, val, ids)
 453
 454         if node.hasChildNodes() :
 455                 for child in node.childNodes:
 456                         if child.nodeType == 1 :
 457                                 findReferencedElements(child, ids)
 458         return ids
 459
 460 def findReferencingProperty(node, prop, val, ids):
 461         global referencingProps
 462         if prop in referencingProps and val != '' :
 463                 if len(val) >= 7 and val[0:5] == 'url(#' :
 464                         id = val[5:val.find(')')]
 465                         if ids.has_key(id) :
 466                                 ids[id][0] += 1
 467                                 ids[id][1].append(node)
 468                         else:
 469                                 ids[id] = [1,[node]]
 470                 # if the url has a quote in it, we need to compensate
 471                 elif len(val) >= 8 :
 472                         id = None
 473                         # double-quote
 474                         if val[0:6] == 'url("#' :
 475                                 id = val[6:val.find('")')]
 476                         # single-quote
 477                         elif val[0:6] == "url('#" :
 478                                 id = val[6:val.find("')")]
 479                         if id != None:
 480                                 if ids.has_key(id) :
 481                                         ids[id][0] += 1
 482                                         ids[id][1].append(node)
 483                                 else:
 484                                         ids[id] = [1,[node]]
 485
 486 numIDsRemoved = 0
 487 numElemsRemoved = 0
 488 numAttrsRemoved = 0
 489 numRastersEmbedded = 0
 490 numPathSegmentsReduced = 0
 491 numCurvesStraightened = 0
 492 numBytesSavedInPathData = 0
 493 numBytesSavedInColors = 0
 494 numPointsRemovedFromPolygon = 0
 495
 496 def removeUnusedDefs(doc, defElem, elemsToRemove=None):
 497         if elemsToRemove is None:
 498                 elemsToRemove = []
 499
 500         identifiedElements = findElementsWithId(doc.documentElement)
 501         referencedIDs = findReferencedElements(doc.documentElement)
 502
 503         keepTags = ['font', 'style', 'metadata', 'script', 'title', 'desc']
 504         for elem in defElem.childNodes:
 505                 if elem.nodeName == 'g' and elem.namespaceURI == NS['SVG']:
 506                         elemsToRemove = removeUnusedDefs(doc, elem, elemsToRemove)
 507                         continue
 508                 if elem.nodeType == 1 and (elem.getAttribute('id') == '' or \
 509                                 (not elem.getAttribute('id') in referencedIDs)) and \
 510                                 not elem.nodeName in keepTags:
 511                         elemsToRemove.append(elem)
 512         return elemsToRemove
 513
 514 def removeUnreferencedElements(doc):
 515         """
 516         Removes all unreferenced elements except for <svg>, <font>, <metadata>, <title>, and <desc>.
 517         Also vacuums the defs of any non-referenced renderable elements.
 518
 519         Returns the number of unreferenced elements removed from the document.
 520         """
 521         global numElemsRemoved
 522         num = 0
 523         removeTags = ['linearGradient', 'radialGradient', 'pattern']
 524
 525         identifiedElements = findElementsWithId(doc.documentElement)
 526         referencedIDs = findReferencedElements(doc.documentElement)
 527
 528         for id in identifiedElements:
 529                 if not id in referencedIDs:
 530                         goner = findElementById(doc.documentElement, id)
 531                         if goner != None and goner.parentNode != None and goner.nodeName in removeTags:
 532                                 goner.parentNode.removeChild(goner)
 533                                 num += 1
 534                                 numElemsRemoved += 1
 535
 536         # TODO: should also go through defs and vacuum it
 537         num = 0
 538         defs = doc.documentElement.getElementsByTagName('defs')
 539         for aDef in defs:
 540                 elemsToRemove = removeUnusedDefs(doc, aDef)
 541                 for elem in elemsToRemove:
 542                         elem.parentNode.removeChild(elem)
 543                         numElemsRemoved += 1
 544                         num += 1
 545         return num
 546
 547 def removeUnreferencedIDs(referencedIDs, identifiedElements):
 548         """
 549         Removes the unreferenced ID attributes.
 550
 551         Returns the number of ID attributes removed
 552         """
 553         global numIDsRemoved
 554         keepTags = ['font']
 555         num = 0;
 556         for id in identifiedElements.keys():
 557                 node = identifiedElements[id]
 558                 if referencedIDs.has_key(id) == False and not node.nodeName in keepTags:
 559                         node.removeAttribute('id')
 560                         numIDsRemoved += 1
 561                         num += 1
 562         return num
 563
 564 def removeNamespacedAttributes(node, namespaces):
 565         global numAttrsRemoved
 566         num = 0
 567         if node.nodeType == 1 :
 568                 # remove all namespace'd attributes from this element
 569                 attrList = node.attributes
 570                 attrsToRemove = []
 571                 for attrNum in range(attrList.length):
 572                         attr = attrList.item(attrNum)
 573                         if attr != None and attr.namespaceURI in namespaces:
 574                                 attrsToRemove.append(attr.nodeName)
 575                 for attrName in attrsToRemove :
 576                         num += 1
 577                         numAttrsRemoved += 1
 578                         node.removeAttribute(attrName)
 579
 580                 # now recurse for children
 581                 for child in node.childNodes:
 582                         num += removeNamespacedAttributes(child, namespaces)
 583         return num
 584
 585 def removeNamespacedElements(node, namespaces):
 586         global numElemsRemoved
 587         num = 0
 588         if node.nodeType == 1 :
 589                 # remove all namespace'd child nodes from this element
 590                 childList = node.childNodes
 591                 childrenToRemove = []
 592                 for child in childList:
 593                         if child != None and child.namespaceURI in namespaces:
 594                                 childrenToRemove.append(child)
 595                 for child in childrenToRemove :
 596                         num += 1
 597                         numElemsRemoved += 1
 598                         node.removeChild(child)
 599
 600                 # now recurse for children
 601                 for child in node.childNodes:
 602                         num += removeNamespacedElements(child, namespaces)
 603         return num
 604
 605 def removeNestedGroups(node):
 606         """
 607         This walks further and further down the tree, removing groups
 608         which do not have any attributes or a title/desc child and
 609         promoting their children up one level
 610         """
 611         global numElemsRemoved
 612         num = 0
 613
 614         groupsToRemove = []
 615         for child in node.childNodes:
 616                 if child.nodeName == 'g' and child.namespaceURI == NS['SVG'] and len(child.attributes) == 0:
 617                         # only collapse group if it does not have a title or desc as a direct descendant
 618                         for grandchild in child.childNodes:
 619                                 if grandchild.nodeType == 1 and grandchild.namespaceURI == NS['SVG'] and \
 620                                                 grandchild.nodeName in ['title','desc']:
 621                                         break
 622                         else:
 623                                 groupsToRemove.append(child)
 624
 625         for g in groupsToRemove:
 626                 while g.childNodes.length > 0:
 627                         g.parentNode.insertBefore(g.firstChild, g)
 628                 g.parentNode.removeChild(g)
 629                 numElemsRemoved += 1
 630                 num += 1
 631
 632         # now recurse for children
 633         for child in node.childNodes:
 634                 if child.nodeType == 1:
 635                         num += removeNestedGroups(child)
 636         return num
 637
 638 def moveCommonAttributesToParentGroup(elem):
 639         """
 640         This recursively calls this function on all children of the passed in element
 641         and then iterates over all child elements and removes common inheritable attributes
 642         from the children and places them in the parent group.  But only if the parent contains
 643         nothing but element children and whitespace.
 644         """
 645         num = 0
 646
 647         childElements = []
 648         # recurse first into the children (depth-first)
 649         for child in elem.childNodes:
 650                 if child.nodeType == 1:
 651                         childElements.append(child)
 652                         num += moveCommonAttributesToParentGroup(child)
 653                 # else if the parent has non-whitespace text children, do not
 654                 # try to move common attributes
 655                 elif child.nodeType == 3 and child.nodeValue.strip():
 656                         return num
 657
 658         # only process the children if there are more than one element
 659         if len(childElements) <= 1: return num
 660
 661         commonAttrs = {}
 662         # add all inheritable properties of the first child element
 663         # FIXME: Note there is a chance that the first child is a set/animate in which case
 664         # its fill attribute is not what we want to look at, we should look for the first
 665         # non-animate/set element
 666         attrList = childElements[0].attributes
 667         for num in range(attrList.length):
 668                 attr = attrList.item(num)
 669                 # this is most of the inheritable properties from http://www.w3.org/TR/SVG11/propidx.html
 670                 # and http://www.w3.org/TR/SVGTiny12/attributeTable.html
 671                 if attr.nodeName in ['clip-rule',
 672                                         'display-align',
 673                                         'fill', 'fill-opacity', 'fill-rule',
 674                                         'font', 'font-family', 'font-size', 'font-size-adjust', 'font-stretch',
 675                                         'font-style', 'font-variant', 'font-weight',
 676                                         'letter-spacing',
 677                                         'pointer-events', 'shape-rendering',
 678                                         'stroke', 'stroke-dasharray', 'stroke-dashoffset', 'stroke-linecap', 'stroke-linejoin',
 679                                         'stroke-miterlimit', 'stroke-opacity', 'stroke-width',
 680                                         'text-anchor', 'text-decoration', 'text-rendering', 'visibility',
 681                                         'word-spacing', 'writing-mode']:
 682                         # we just add all the attributes from the first child
 683                         commonAttrs[attr.nodeName] = attr.nodeValue
 684
 685         # for each subsequent child element
 686         for childNum in range(len(childElements)):
 687                 # skip first child
 688                 if childNum == 0:
 689                         continue
 690
 691                 child = childElements[childNum]
 692                 # if we are on an animateXXX/set element, ignore it (due to the 'fill' attribute)
 693                 if child.localName in ['set', 'animate', 'animateColor', 'animateTransform', 'animateMotion']:
 694                         continue
 695
 696                 distinctAttrs = []
 697                 # loop through all current 'common' attributes
 698                 for name in commonAttrs.keys():
 699                         # if this child doesn't match that attribute, schedule it for removal
 700                         if child.getAttribute(name) != commonAttrs[name]:
 701                                 distinctAttrs.append(name)
 702                 # remove those attributes which are not common
 703                 for name in distinctAttrs:
 704                         del commonAttrs[name]
 705
 706         # commonAttrs now has all the inheritable attributes which are common among all child elements
 707         for name in commonAttrs.keys():
 708                 for child in childElements:
 709                         child.removeAttribute(name)
 710                 elem.setAttribute(name, commonAttrs[name])
 711
 712         # update our statistic (we remove N*M attributes and add back in M attributes)
 713         num += (len(childElements)-1) * len(commonAttrs)
 714         return num
 715
 716 def removeUnusedAttributesOnParent(elem):
 717         """
 718         This recursively calls this function on all children of the element passed in,
 719         then removes any unused attributes on this elem if none of the children inherit it
 720         """
 721         num = 0
 722
 723         childElements = []
 724         # recurse first into the children (depth-first)
 725         for child in elem.childNodes:
 726                 if child.nodeType == 1:
 727                         childElements.append(child)
 728                         num += removeUnusedAttributesOnParent(child)
 729
 730         # only process the children if there are more than one element
 731         if len(childElements) <= 1: return num
 732
 733         # get all attribute values on this parent
 734         attrList = elem.attributes
 735         unusedAttrs = {}
 736         for num in range(attrList.length):
 737                 attr = attrList.item(num)
 738                 if attr.nodeName in ['clip-rule',
 739                                         'display-align',
 740                                         'fill', 'fill-opacity', 'fill-rule',
 741                                         'font', 'font-family', 'font-size', 'font-size-adjust', 'font-stretch',
 742                                         'font-style', 'font-variant', 'font-weight',
 743                                         'letter-spacing',
 744                                         'pointer-events', 'shape-rendering',
 745                                         'stroke', 'stroke-dasharray', 'stroke-dashoffset', 'stroke-linecap', 'stroke-linejoin',
 746                                         'stroke-miterlimit', 'stroke-opacity', 'stroke-width',
 747                                         'text-anchor', 'text-decoration', 'text-rendering', 'visibility',
 748                                         'word-spacing', 'writing-mode']:
 749                         unusedAttrs[attr.nodeName] = attr.nodeValue
 750
 751         # for each child, if at least one child inherits the parent's attribute, then remove
 752         for childNum in range(len(childElements)):
 753                 child = childElements[childNum]
 754                 inheritedAttrs = []
 755                 for name in unusedAttrs.keys():
 756                         val = child.getAttribute(name)
 757                         if val == '' or val == None or val == 'inherit':
 758                                 inheritedAttrs.append(name)
 759                 for a in inheritedAttrs:
 760                         del unusedAttrs[a]
 761
 762         # unusedAttrs now has all the parent attributes that are unused
 763         for name in unusedAttrs.keys():
 764                 elem.removeAttribute(name)
 765                 num += 1
 766
 767         return num
 768
 769 def removeDuplicateGradientStops(doc):
 770         global numElemsRemoved
 771         num = 0
 772
 773         for gradType in ['linearGradient', 'radialGradient']:
 774                 for grad in doc.getElementsByTagName(gradType):
 775                         stops = {}
 776                         stopsToRemove = []
 777                         for stop in grad.getElementsByTagName('stop'):
 778                                 # convert percentages into a floating point number
 779                                 offsetU = SVGLength(stop.getAttribute('offset'))
 780                                 if offsetU.units == Unit.PCT:
 781                                         offset = offsetU.value / 100.0
 782                                 elif offsetU.units == Unit.NONE:
 783                                         offset = offsetU.value
 784                                 else:
 785                                         offset = 0
 786                                 # set the stop offset value to the integer or floating point equivalent
 787                                 if int(offset) == offset: stop.setAttribute('offset', str(int(offset)))
 788                                 else: stop.setAttribute('offset', str(offset))
 789
 790                                 color = stop.getAttribute('stop-color')
 791                                 opacity = stop.getAttribute('stop-opacity')
 792                                 if stops.has_key(offset) :
 793                                         oldStop = stops[offset]
 794                                         if oldStop[0] == color and oldStop[1] == opacity:
 795                                                 stopsToRemove.append(stop)
 796                                 stops[offset] = [color, opacity]
 797
 798                         for stop in stopsToRemove:
 799                                 stop.parentNode.removeChild(stop)
 800                                 num += 1
 801                                 numElemsRemoved += 1
 802
 803         # linear gradients
 804         return num
 805
 806 def collapseSinglyReferencedGradients(doc):
 807         global numElemsRemoved
 808         num = 0
 809
 810         # make sure to reset the ref'ed ids for when we are running this in testscour
 811         for rid,nodeCount in findReferencedElements(doc.documentElement).iteritems():
 812                 count = nodeCount[0]
 813                 nodes = nodeCount[1]
 814                 if count == 1:
 815                         elem = findElementById(doc.documentElement,rid)
 816                         if elem != None and elem.nodeType == 1 and elem.nodeName in ['linearGradient', 'radialGradient'] \
 817                                         and elem.namespaceURI == NS['SVG']:
 818                                 # found a gradient that is referenced by only 1 other element
 819                                 refElem = nodes[0]
 820                                 if refElem.nodeType == 1 and refElem.nodeName in ['linearGradient', 'radialGradient'] \
 821                                                 and refElem.namespaceURI == NS['SVG']:
 822                                         # elem is a gradient referenced by only one other gradient (refElem)
 823
 824                                         # add the stops to the referencing gradient (this removes them from elem)
 825                                         if len(refElem.getElementsByTagName('stop')) == 0:
 826                                                 stopsToAdd = elem.getElementsByTagName('stop')
 827                                                 for stop in stopsToAdd:
 828                                                         refElem.appendChild(stop)
 829
 830                                         # adopt the gradientUnits, spreadMethod,  gradientTransform attributes if
 831                                         # they are unspecified on refElem
 832                                         for attr in ['gradientUnits','spreadMethod','gradientTransform']:
 833                                                 if refElem.getAttribute(attr) == '' and not elem.getAttribute(attr) == '':
 834                                                         refElem.setAttributeNS(None, attr, elem.getAttribute(attr))
 835
 836                                         # if both are radialGradients, adopt elem's fx,fy,cx,cy,r attributes if
 837                                         # they are unspecified on refElem
 838                                         if elem.nodeName == 'radialGradient' and refElem.nodeName == 'radialGradient':
 839                                                 for attr in ['fx','fy','cx','cy','r']:
 840                                                         if refElem.getAttribute(attr) == '' and not elem.getAttribute(attr) == '':
 841                                                                 refElem.setAttributeNS(None, attr, elem.getAttribute(attr))
 842
 843                                         # if both are linearGradients, adopt elem's x1,y1,x2,y2 attributes if
 844                                         # they are unspecified on refElem
 845                                         if elem.nodeName == 'linearGradient' and refElem.nodeName == 'linearGradient':
 846                                                 for attr in ['x1','y1','x2','y2']:
 847                                                         if refElem.getAttribute(attr) == '' and not elem.getAttribute(attr) == '':
 848                                                                 refElem.setAttributeNS(None, attr, elem.getAttribute(attr))
 849
 850                                         # now remove the xlink:href from refElem
 851                                         refElem.removeAttributeNS(NS['XLINK'], 'href')
 852
 853                                         # now delete elem
 854                                         elem.parentNode.removeChild(elem)
 855                                         numElemsRemoved += 1
 856                                         num += 1
 857         return num
 858
 859 def removeDuplicateGradients(doc):
 860         global numElemsRemoved
 861         num = 0
 862
 863         gradientsToRemove = {}
 864         duplicateToMaster = {}
 865
 866         for gradType in ['linearGradient', 'radialGradient']:
 867                 grads = doc.getElementsByTagName(gradType)
 868                 for grad in grads:
 869                         # TODO: should slice grads from 'grad' here to optimize
 870                         for ograd in grads:
 871                                 # do not compare gradient to itself
 872                                 if grad == ograd: continue
 873
 874                                 # compare grad to ograd (all properties, then all stops)
 875                                 # if attributes do not match, go to next gradient
 876                                 someGradAttrsDoNotMatch = False
 877                                 for attr in ['gradientUnits','spreadMethod','gradientTransform','x1','y1','x2','y2','cx','cy','fx','fy','r']:
 878                                         if grad.getAttribute(attr) != ograd.getAttribute(attr):
 879                                                 someGradAttrsDoNotMatch = True
 880                                                 break;
 881
 882                                 if someGradAttrsDoNotMatch: continue
 883
 884                                 # compare xlink:href values too
 885                                 if grad.getAttributeNS(NS['XLINK'], 'href') != ograd.getAttributeNS(NS['XLINK'], 'href'):
 886                                         continue
 887
 888                                 # all gradient properties match, now time to compare stops
 889                                 stops = grad.getElementsByTagName('stop')
 890                                 ostops = ograd.getElementsByTagName('stop')
 891
 892                                 if stops.length != ostops.length: continue
 893
 894                                 # now compare stops
 895                                 stopsNotEqual = False
 896                                 for i in range(stops.length):
 897                                         if stopsNotEqual: break
 898                                         stop = stops.item(i)
 899                                         ostop = ostops.item(i)
 900                                         for attr in ['offset', 'stop-color', 'stop-opacity']:
 901                                                 if stop.getAttribute(attr) != ostop.getAttribute(attr):
 902                                                         stopsNotEqual = True
 903                                                         break
 904                                 if stopsNotEqual: continue
 905
 906                                 # ograd is a duplicate of grad, we schedule it to be removed UNLESS
 907                                 # ograd is ALREADY considered a 'master' element
 908                                 if not gradientsToRemove.has_key(ograd):
 909                                         if not duplicateToMaster.has_key(ograd):
 910                                                 if not gradientsToRemove.has_key(grad):
 911                                                         gradientsToRemove[grad] = []
 912                                                 gradientsToRemove[grad].append( ograd )
 913                                                 duplicateToMaster[ograd] = grad
 914
 915         # get a collection of all elements that are referenced and their referencing elements
 916         referencedIDs = findReferencedElements(doc.documentElement)
 917         for masterGrad in gradientsToRemove.keys():
 918                 master_id = masterGrad.getAttribute('id')
 919 #               print 'master='+master_id
 920                 for dupGrad in gradientsToRemove[masterGrad]:
 921                         # if the duplicate gradient no longer has a parent that means it was
 922                         # already re-mapped to another master gradient
 923                         if not dupGrad.parentNode: continue
 924                         dup_id = dupGrad.getAttribute('id')
 925 #                       print 'dup='+dup_id
 926 #                       print referencedIDs[dup_id]
 927                         # for each element that referenced the gradient we are going to remove
 928                         for elem in referencedIDs[dup_id][1]:
 929                                 # find out which attribute referenced the duplicate gradient
 930                                 for attr in ['fill', 'stroke']:
 931                                         v = elem.getAttribute(attr)
 932                                         if v == 'url(#'+dup_id+')' or v == 'url("#'+dup_id+'")' or v == "url('#"+dup_id+"')":
 933                                                 elem.setAttribute(attr, 'url(#'+master_id+')')
 934                                 if elem.getAttributeNS(NS['XLINK'], 'href') == '#'+dup_id:
 935                                         elem.setAttributeNS(NS['XLINK'], 'href', '#'+master_id)
 936
 937                         # now that all referencing elements have been re-mapped to the master
 938                         # it is safe to remove this gradient from the document
 939                         dupGrad.parentNode.removeChild(dupGrad)
 940                         numElemsRemoved += 1
 941                         num += 1
 942         return num
 943
 944 def repairStyle(node, options):
 945         num = 0
 946         if node.nodeType == 1 and len(node.getAttribute('style')) > 0 :
 947                 # get all style properties and stuff them into a dictionary
 948                 styleMap = { }
 949                 rawStyles = node.getAttribute('style').split(';')
 950                 for style in rawStyles:
 951                         propval = style.split(':')
 952                         if len(propval) == 2 :
 953                                 styleMap[propval[0].strip()] = propval[1].strip()
 954
 955                 # I've seen this enough to know that I need to correct it:
 956                 # fill: url(#linearGradient4918) rgb(0, 0, 0);
 957                 for prop in ['fill', 'stroke'] :
 958                         if styleMap.has_key(prop) :
 959                                 chunk = styleMap[prop].split(') ')
 960                                 if len(chunk) == 2 and (chunk[0][:5] == 'url(#' or chunk[0][:6] == 'url("#' or chunk[0][:6] == "url('#") and chunk[1] == 'rgb(0, 0, 0)' :
 961                                         styleMap[prop] = chunk[0] + ')'
 962                                         num += 1
 963
 964                 # Here is where we can weed out unnecessary styles like:
 965                 #  opacity:1
 966                 if styleMap.has_key('opacity') :
 967                         opacity = float(styleMap['opacity'])
 968                         # opacity='1.0' is useless, remove it
 969                         if opacity == 1.0 :
 970                                 del styleMap['opacity']
 971                                 num += 1
 972
 973                         # if opacity='0' then all fill and stroke properties are useless, remove them
 974                         elif opacity == 0.0 :
 975                                 for uselessStyle in ['fill', 'fill-opacity', 'fill-rule', 'stroke', 'stroke-linejoin',
 976                                         'stroke-opacity', 'stroke-miterlimit', 'stroke-linecap', 'stroke-dasharray',
 977                                         'stroke-dashoffset', 'stroke-opacity'] :
 978                                         if styleMap.has_key(uselessStyle):
 979                                                 del styleMap[uselessStyle]
 980                                                 num += 1
 981
 982                 #  if stroke:none, then remove all stroke-related properties (stroke-width, etc)
 983                 #  TODO: should also detect if the computed value of this element is stroke="none"
 984                 if styleMap.has_key('stroke') and styleMap['stroke'] == 'none' :
 985                         for strokestyle in [ 'stroke-width', 'stroke-linejoin', 'stroke-miterlimit',
 986                                         'stroke-linecap', 'stroke-dasharray', 'stroke-dashoffset', 'stroke-opacity'] :
 987                                 if styleMap.has_key(strokestyle) :
 988                                         del styleMap[strokestyle]
 989                                         num += 1
 990                         # TODO: This is actually a problem if a parent element has a specified stroke
 991                         # we need to properly calculate computed values
 992                         del styleMap['stroke']
 993
 994                 #  if fill:none, then remove all fill-related properties (fill-rule, etc)
 995                 if styleMap.has_key('fill') and styleMap['fill'] == 'none' :
 996                         for fillstyle in [ 'fill-rule', 'fill-opacity' ] :
 997                                 if styleMap.has_key(fillstyle) :
 998                                         del styleMap[fillstyle]
 999                                         num += 1
1000
1001                 #  stop-opacity: 1
1002                 if styleMap.has_key('stop-opacity') :
1003                         if float(styleMap['stop-opacity']) == 1.0 :
1004                                 del styleMap['stop-opacity']
1005                                 num += 1
1006
1007                 #  fill-opacity: 1 or 0
1008                 if styleMap.has_key('fill-opacity') :
1009                         fillOpacity = float(styleMap['fill-opacity'])
1010                         #  TODO: This is actually a problem if the parent element does not have fill-opacity=1
1011                         if fillOpacity == 1.0 :
1012                                 del styleMap['fill-opacity']
1013                                 num += 1
1014                         elif fillOpacity == 0.0 :
1015                                 for uselessFillStyle in [ 'fill', 'fill-rule' ] :
1016                                         if styleMap.has_key(uselessFillStyle):
1017                                                 del styleMap[uselessFillStyle]
1018                                                 num += 1
1019
1020                 #  stroke-opacity: 1 or 0
1021                 if styleMap.has_key('stroke-opacity') :
1022                         strokeOpacity = float(styleMap['stroke-opacity'])
1023                         #  TODO: This is actually a problem if the parent element does not have stroke-opacity=1
1024                         if strokeOpacity == 1.0 :
1025                                 del styleMap['stroke-opacity']
1026                                 num += 1
1027                         elif strokeOpacity == 0.0 :
1028                                 for uselessStrokeStyle in [ 'stroke', 'stroke-width', 'stroke-linejoin', 'stroke-linecap',
1029                                                         'stroke-dasharray', 'stroke-dashoffset' ] :
1030                                         if styleMap.has_key(uselessStrokeStyle):
1031                                                 del styleMap[uselessStrokeStyle]
1032                                                 num += 1
1033
1034                 # stroke-width: 0
1035                 if styleMap.has_key('stroke-width') :
1036                         strokeWidth = getSVGLength(styleMap['stroke-width'])
1037                         if strokeWidth == 0.0 :
1038                                 for uselessStrokeStyle in [ 'stroke', 'stroke-linejoin', 'stroke-linecap',
1039                                                         'stroke-dasharray', 'stroke-dashoffset', 'stroke-opacity' ] :
1040                                         if styleMap.has_key(uselessStrokeStyle):
1041                                                 del styleMap[uselessStrokeStyle]
1042                                                 num += 1
1043
1044                 # remove font properties for non-text elements
1045                 # I've actually observed this in real SVG content
1046                 if node.nodeName in ['rect', 'circle', 'ellipse', 'line', 'polyline', 'polygon', 'path']:
1047                         for fontstyle in [ 'font-family', 'font-size', 'font-stretch', 'font-size-adjust',
1048                                                                 'font-style', 'font-variant', 'font-weight',
1049                                                                 'letter-spacing', 'line-height', 'kerning',
1050                                                                 'text-anchor', 'text-decoration', 'text-rendering',
1051                                                                 'unicode-bidi', 'word-spacing', 'writing-mode'] :
1052                                 if styleMap.has_key(fontstyle) :
1053                                         del styleMap[fontstyle]
1054                                         num += 1
1055
1056                 # remove inkscape-specific styles
1057                 # TODO: need to get a full list of these
1058                 for inkscapeStyle in ['-inkscape-font-specification']:
1059                         if styleMap.has_key(inkscapeStyle):
1060                                 del styleMap[inkscapeStyle]
1061                                 num += 1
1062
1063                 # visibility: visible
1064                 if styleMap.has_key('visibility') :
1065                         if styleMap['visibility'] == 'visible':
1066                                 del styleMap['visibility']
1067                                 num += 1
1068
1069                 # display: inline
1070                 if styleMap.has_key('display') :
1071                         if styleMap['display'] == 'inline':
1072                                 del styleMap['display']
1073                                 num += 1
1074
1075                 # overflow: visible or overflow specified on element other than svg, marker, pattern
1076                 if styleMap.has_key('overflow') :
1077                         if styleMap['overflow'] == 'visible' or node.nodeName in ['svg','marker','pattern']:
1078                                 del styleMap['overflow']
1079                                 num += 1
1080
1081                 # marker: none
1082                 if styleMap.has_key('marker') :
1083                         if styleMap['marker'] == 'none':
1084                                 del styleMap['marker']
1085                                 num += 1
1086
1087                 # now if any of the properties match known SVG attributes we prefer attributes
1088                 # over style so emit them and remove them from the style map
1089                 if options.style_to_xml:
1090                         for propName in styleMap.keys() :
1091                                 if propName in svgAttributes :
1092                                         node.setAttribute(propName, styleMap[propName])
1093                                         del styleMap[propName]
1094
1095                 # sew our remaining style properties back together into a style attribute
1096                 fixedStyle = ''
1097                 for prop in styleMap.keys() :
1098                         fixedStyle += prop + ':' + styleMap[prop] + ';'
1099
1100                 if fixedStyle != '' :
1101                         node.setAttribute('style', fixedStyle)
1102                 else:
1103                         node.removeAttribute('style')
1104
1105         # recurse for our child elements
1106         for child in node.childNodes :
1107                 num += repairStyle(child,options)
1108
1109         return num
1110
1111 def removeDefaultAttributeValues(node, options):
1112         num = 0
1113         if node.nodeType != 1: return 0
1114
1115         # gradientUnits: objectBoundingBox
1116         if node.getAttribute('gradientUnits') == 'objectBoundingBox':
1117                 node.removeAttribute('gradientUnits')
1118                 num += 1
1119
1120         # spreadMethod: pad
1121         if node.getAttribute('spreadMethod') == 'pad':
1122                 node.removeAttribute('spreadMethod')
1123                 num += 1
1124
1125         # x1: 0%
1126         if node.getAttribute('x1') != '':
1127                 x1 = SVGLength(node.getAttribute('x1'))
1128                 if x1.value == 0:
1129                         node.removeAttribute('x1')
1130                         num += 1
1131
1132         # y1: 0%
1133         if node.getAttribute('y1') != '':
1134                 y1 = SVGLength(node.getAttribute('y1'))
1135                 if y1.value == 0:
1136                         node.removeAttribute('y1')
1137                         num += 1
1138
1139         # x2: 100%
1140         if node.getAttribute('x2') != '':
1141                 x2 = SVGLength(node.getAttribute('x2'))
1142                 if (x2.value == 100 and x2.units == Unit.PCT) or (x2.value == 1 and x2.units == Unit.NONE):
1143                         node.removeAttribute('x2')
1144                         num += 1
1145
1146         # y2: 0%
1147         if node.getAttribute('y2') != '':
1148                 y2 = SVGLength(node.getAttribute('y2'))
1149                 if y2.value == 0:
1150                         node.removeAttribute('y2')
1151                         num += 1
1152
1153         # fx: equal to rx
1154         if node.getAttribute('fx') != '':
1155                 if node.getAttribute('fx') == node.getAttribute('cx'):
1156                         node.removeAttribute('fx')
1157                         num += 1
1158
1159         # fy: equal to ry
1160         if node.getAttribute('fy') != '':
1161                 if node.getAttribute('fy') == node.getAttribute('cy'):
1162                         node.removeAttribute('fy')
1163                         num += 1
1164
1165         # cx: 50%
1166         if node.getAttribute('cx') != '':
1167                 cx = SVGLength(node.getAttribute('cx'))
1168                 if (cx.value == 50 and cx.units == Unit.PCT) or (cx.value == 0.5 and cx.units == Unit.NONE):
1169                         node.removeAttribute('cx')
1170                         num += 1
1171
1172         # cy: 50%
1173         if node.getAttribute('cy') != '':
1174                 cy = SVGLength(node.getAttribute('cy'))
1175                 if (cy.value == 50 and cy.units == Unit.PCT) or (cy.value == 0.5 and cy.units == Unit.NONE):
1176                         node.removeAttribute('cy')
1177                         num += 1
1178
1179         # r: 50%
1180         if node.getAttribute('r') != '':
1181                 r = SVGLength(node.getAttribute('r'))
1182                 if (r.value == 50 and r.units == Unit.PCT) or (r.value == 0.5 and r.units == Unit.NONE):
1183                         node.removeAttribute('r')
1184                         num += 1
1185
1186         # recurse for our child elements
1187         for child in node.childNodes :
1188                 num += removeDefaultAttributeValues(child,options)
1189
1190         return num
1191
1192 rgb = re.compile("\\s*rgb\\(\\s*(\\d+)\\s*\\,\\s*(\\d+)\\s*\\,\\s*(\\d+)\\s*\\)\\s*")
1193 rgbp = re.compile("\\s*rgb\\(\\s*(\\d*\\.?\\d+)\\%\\s*\\,\\s*(\\d*\\.?\\d+)\\%\\s*\\,\\s*(\\d*\\.?\\d+)\\%\\s*\\)\\s*")
1194 def convertColor(value):
1195         """
1196                 Converts the input color string and returns a #RRGGBB (or #RGB if possible) string
1197         """
1198         s = value
1199
1200         if s in colors.keys():
1201                 s = colors[s]
1202
1203         rgbpMatch = rgbp.match(s)
1204         if rgbpMatch != None :
1205                 r = int(float(rgbpMatch.group(1)) * 255.0 / 100.0)
1206                 g = int(float(rgbpMatch.group(2)) * 255.0 / 100.0)
1207                 b = int(float(rgbpMatch.group(3)) * 255.0 / 100.0)
1208                 s  = 'rgb(%d,%d,%d)' % (r,g,b)
1209
1210         rgbMatch = rgb.match(s)
1211         if rgbMatch != None :
1212                 r = hex( int( rgbMatch.group(1) ) )[2:].upper()
1213                 g = hex( int( rgbMatch.group(2) ) )[2:].upper()
1214                 b = hex( int( rgbMatch.group(3) ) )[2:].upper()
1215                 if len(r) == 1: r='0'+r
1216                 if len(g) == 1: g='0'+g
1217                 if len(b) == 1: b='0'+b
1218                 s = '#'+r+g+b
1219
1220         if s[0] == '#' and len(s)==7 and s[1]==s[2] and s[3]==s[4] and s[5]==s[6]:
1221                 s = s.upper()
1222                 s = '#'+s[1]+s[3]+s[5]
1223
1224         return s
1225
1226 def convertColors(element) :
1227         """
1228                 Recursively converts all color properties into #RRGGBB format if shorter
1229         """
1230         numBytes = 0
1231
1232         if element.nodeType != 1: return 0
1233
1234         # set up list of color attributes for each element type
1235         attrsToConvert = []
1236         if element.nodeName in ['rect', 'circle', 'ellipse', 'polygon', \
1237                                                         'line', 'polyline', 'path', 'g', 'a']:
1238                 attrsToConvert = ['fill', 'stroke']
1239         elif element.nodeName in ['stop']:
1240                 attrsToConvert = ['stop-color']
1241         elif element.nodeName in ['solidColor']:
1242                 attrsToConvert = ['solid-color']
1243
1244         # now convert all the color formats
1245         for attr in attrsToConvert:
1246                 oldColorValue = element.getAttribute(attr)
1247                 if oldColorValue != '':
1248                         newColorValue = convertColor(oldColorValue)
1249                         oldBytes = len(oldColorValue)
1250                         newBytes = len(newColorValue)
1251                         if oldBytes > newBytes:
1252                                 element.setAttribute(attr, newColorValue)
1253                                 numBytes += (oldBytes - len(element.getAttribute(attr)))
1254
1255         # now recurse for our child elements
1256         for child in element.childNodes :
1257                 numBytes += convertColors(child)
1258
1259         return numBytes
1260
1261 # TODO: go over what this method does and see if there is a way to optimize it
1262 # TODO: go over the performance of this method and see if I can save memory/speed by
1263 #       reusing data structures, etc
1264 def cleanPath(element) :
1265         """
1266                 Cleans the path string (d attribute) of the element
1267         """
1268         global numBytesSavedInPathData
1269         global numPathSegmentsReduced
1270         global numCurvesStraightened
1271
1272         # this gets the parser object from svg_regex.py
1273         oldPathStr = element.getAttribute('d')
1274         pathObj = svg_parser.parse(oldPathStr)
1275
1276         # however, this parser object has some ugliness in it (lists of tuples of tuples of
1277         # numbers and booleans).  we just need a list of (cmd,[numbers]):
1278         path = []
1279         for (cmd,dataset) in pathObj:
1280                 if cmd in ['M','m','L','l','T','t']:
1281                         # one or more tuples, each containing two numbers
1282                         nums = []
1283                         for t in dataset:
1284                                 # convert to a Decimal
1285                                 nums.append(Decimal(str(t[0])) * Decimal(1))
1286                                 nums.append(Decimal(str(t[1])) * Decimal(1))
1287
1288                         # only create this segment if it is not empty
1289                         if nums:
1290                                 path.append( (cmd, nums) )
1291
1292                 elif cmd in ['V','v','H','h']:
1293                         # one or more numbers
1294                         nums = []
1295                         for n in dataset:
1296                                 nums.append(Decimal(str(n)))
1297                         if nums:
1298                                 path.append( (cmd, nums) )
1299
1300                 elif cmd in ['C','c']:
1301                         # one or more tuples, each containing three tuples of two numbers each
1302                         nums = []
1303                         for t in dataset:
1304                                 for pair in t:
1305                                         nums.append(Decimal(str(pair[0])) * Decimal(1))
1306                                         nums.append(Decimal(str(pair[1])) * Decimal(1))
1307                         path.append( (cmd, nums) )
1308
1309                 elif cmd in ['S','s','Q','q']:
1310                         # one or more tuples, each containing two tuples of two numbers each
1311                         nums = []
1312                         for t in dataset:
1313                                 for pair in t:
1314                                         nums.append(Decimal(str(pair[0])) * Decimal(1))
1315                                         nums.append(Decimal(str(pair[1])) * Decimal(1))
1316                         path.append( (cmd, nums) )
1317
1318                 elif cmd in ['A','a']:
1319                         # one or more tuples, each containing a tuple of two numbers, a number, a boolean,
1320                         # another boolean, and a tuple of two numbers
1321                         nums = []
1322                         for t in dataset:
1323                                 nums.append( Decimal(str(t[0][0])) * Decimal(1) )
1324                                 nums.append( Decimal(str(t[0][1])) * Decimal(1) )
1325                                 nums.append( Decimal(str(t[1])) * Decimal(1))
1326
1327                                 if t[2]: nums.append( Decimal(1) )
1328                                 else: nums.append( Decimal(0) )
1329
1330                                 if t[3]: nums.append( Decimal(1) )
1331                                 else: nums.append( Decimal(0) )
1332
1333                                 nums.append( Decimal(str(t[4][0])) * Decimal(1) )
1334                                 nums.append( Decimal(str(t[4][1])) * Decimal(1) )
1335                         path.append( (cmd, nums) )
1336
1337                 elif cmd in ['Z','z']:
1338                         path.append( (cmd, []) )
1339
1340         # calculate the starting x,y coord for the second path command
1341         if len(path[0][1]) == 2:
1342                 (x,y) = path[0][1]
1343         else:
1344                 # we have a move and then 1 or more coords for lines
1345                 N = len(path[0][1])
1346                 if path[0] == 'M':
1347                         # take the last pair of coordinates for the starting point
1348                         x = path[0][1][N-2]
1349                         y = path[0][1][N-1]
1350                 else: # relative move, accumulate coordinates for the starting point
1351                         (x,y) = path[0][1][0],path[0][1][1]
1352                         n = 2
1353                         while n < N:
1354                                 x += path[0][1][n]
1355                                 y += path[0][1][n+1]
1356                                 n += 2
1357
1358         # now we have the starting point at x,y so let's save it
1359         (startx,starty) = (x,y)
1360
1361         # convert absolute coordinates into relative ones (start with the second subcommand
1362         # and leave the first M as absolute)
1363         newPath = [path[0]]
1364         for (cmd,data) in path[1:]:
1365                 i = 0
1366                 newCmd = cmd
1367                 newData = data
1368                 # adjust abs to rel
1369                 # only the A command has some values that we don't want to adjust (radii, rotation, flags)
1370                 if cmd == 'A':
1371                         newCmd = 'a'
1372                         newData = []
1373                         while i < len(data):
1374                                 newData.append(data[i])
1375                                 newData.append(data[i+1])
1376                                 newData.append(data[i+2])
1377                                 newData.append(data[i+3])
1378                                 newData.append(data[i+4])
1379                                 newData.append(data[i+5]-x)
1380                                 newData.append(data[i+6]-y)
1381                                 x = data[i+5]
1382                                 y = data[i+6]
1383                                 i += 7
1384                 elif cmd == 'a':
1385                         while i < len(data):
1386                                 x += data[i+5]
1387                                 y += data[i+6]
1388                                 i += 7
1389                 elif cmd == 'H':
1390                         newCmd = 'h'
1391                         newData = []
1392                         while i < len(data):
1393                                 newData.append(data[i]-x)
1394                                 x = data[i]
1395                                 i += 1
1396                 elif cmd == 'h':
1397                         while i < len(data):
1398                                 x += data[i]
1399                                 i += 1
1400                 elif cmd == 'V':
1401                         newCmd = 'v'
1402                         newData = []
1403                         while i < len(data):
1404                                 newData.append(data[i] - y)
1405                                 y = data[i]
1406                                 i += 1
1407                 elif cmd == 'v':
1408                         while i < len(data):
1409                                 y += data[i]
1410                                 i += 1
1411                 elif cmd in ['M']:
1412                         newCmd = cmd.lower()
1413                         newData = []
1414                         startx = data[0]
1415                         starty = data[1]
1416                         while i < len(data):
1417                                 newData.append( data[i] - x )
1418                                 newData.append( data[i+1] - y )
1419                                 x = data[i]
1420                                 y = data[i+1]
1421                                 i += 2
1422                 elif cmd in ['L','T']:
1423                         newCmd = cmd.lower()
1424                         newData = []
1425                         while i < len(data):
1426                                 newData.append( data[i] - x )
1427                                 newData.append( data[i+1] - y )
1428                                 x = data[i]
1429                                 y = data[i+1]
1430                                 i += 2
1431                 elif cmd in ['m']:
1432                         startx += data[0]
1433                         starty += data[1]
1434                         while i < len(data):
1435                                 x += data[i]
1436                                 y += data[i+1]
1437                                 i += 2
1438                 elif cmd in ['l','t']:
1439                         while i < len(data):
1440                                 x += data[i]
1441                                 y += data[i+1]
1442                                 i += 2
1443                 elif cmd in ['S','Q']:
1444                         newCmd = cmd.lower()
1445                         newData = []
1446                         while i < len(data):
1447                                 newData.append( data[i] - x )
1448                                 newData.append( data[i+1] - y )
1449                                 newData.append( data[i+2] - x )
1450                                 newData.append( data[i+3] - y )
1451                                 x = data[i+2]
1452                                 y = data[i+3]
1453                                 i += 4
1454                 elif cmd in ['s','q']:
1455                         while i < len(data):
1456                                 x += data[i+2]
1457                                 y += data[i+3]
1458                                 i += 4
1459                 elif cmd == 'C':
1460                         newCmd = 'c'
1461                         newData = []
1462                         while i < len(data):
1463                                 newData.append( data[i] - x )
1464                                 newData.append( data[i+1] - y )
1465                                 newData.append( data[i+2] - x )
1466                                 newData.append( data[i+3] - y )
1467                                 newData.append( data[i+4] - x )
1468                                 newData.append( data[i+5] - y )
1469                                 x = data[i+4]
1470                                 y = data[i+5]
1471                                 i += 6
1472                 elif cmd == 'c':
1473                         while i < len(data):
1474                                 x += data[i+4]
1475                                 y += data[i+5]
1476                                 i += 6
1477                 elif cmd in ['z','Z']:
1478                         x = startx
1479                         y = starty
1480                         newCmd = 'z'
1481                 newPath.append( (newCmd, newData) )
1482         path = newPath
1483
1484         # remove empty segments
1485         newPath = [path[0]]
1486         for (cmd,data) in path[1:]:
1487                 if cmd in ['m','l','t']:
1488                         newData = []
1489                         i = 0
1490                         while i < len(data):
1491                                 if data[i] != 0 or data[i+1] != 0:
1492                                         newData.append(data[i])
1493                                         newData.append(data[i+1])
1494                                 else:
1495                                         numPathSegmentsReduced += 1
1496                                 i += 2
1497                         if newData:
1498                                 newPath.append( (cmd,newData) )
1499                 elif cmd == 'c':
1500                         newData = []
1501                         i = 0
1502                         while i < len(data):
1503                                 if data[i+4] != 0 or data[i+5] != 0:
1504                                         newData.append(data[i])
1505                                         newData.append(data[i+1])
1506                                         newData.append(data[i+2])
1507                                         newData.append(data[i+3])
1508                                         newData.append(data[i+4])
1509                                         newData.append(data[i+5])
1510                                 else:
1511                                         numPathSegmentsReduced += 1
1512                                 i += 6
1513                         if newData:
1514                                 newPath.append( (cmd,newData) )
1515                 elif cmd == 'a':
1516                         newData = []
1517                         i = 0
1518                         while i < len(data):
1519                                 if data[i+5] != 0 or data[i+6] != 0:
1520                                         newData.append(data[i])
1521                                         newData.append(data[i+1])
1522                                         newData.append(data[i+2])
1523                                         newData.append(data[i+3])
1524                                         newData.append(data[i+4])
1525                                         newData.append(data[i+5])
1526                                         newData.append(data[i+6])
1527                                 else:
1528                                         numPathSegmentsReduced += 1
1529                                 i += 7
1530                         if newData:
1531                                 newPath.append( (cmd,newData) )
1532                 elif cmd == 'q':
1533                         newData = []
1534                         i = 0
1535                         while i < len(data):
1536                                 if data[i+2] != 0 or data[i+3] != 0:
1537                                         newData.append(data[i])
1538                                         newData.append(data[i+1])
1539                                         newData.append(data[i+2])
1540                                         newData.append(data[i+3])
1541                                 else:
1542                                         numPathSegmentsReduced += 1
1543                                 i += 4
1544                         if newData:
1545                                 newPath.append( (cmd,newData) )
1546                 elif cmd in ['h','v']:
1547                         newData = []
1548                         i = 0
1549                         while i < len(data):
1550                                 if data[i] != 0:
1551                                         newData.append(data[i])
1552                                 else:
1553                                         numPathSegmentsReduced += 1
1554                                 i += 1
1555                         if newData:
1556                                 newPath.append( (cmd,newData) )
1557                 else:
1558                         newPath.append( (cmd,data) )
1559         path = newPath
1560
1561         # convert straight curves into lines
1562         newPath = [path[0]]
1563         for (cmd,data) in path[1:]:
1564                 i = 0
1565                 newData = data
1566                 if cmd == 'c':
1567                         newData = []
1568                         while i < len(data):
1569                                 # since all commands are now relative, we can think of previous point as (0,0)
1570                                 # and new point (dx,dy) is (data[i+4],data[i+5])
1571                                 # eqn of line will be y = (dy/dx)*x or if dx=0 then eqn of line is x=0
1572                                 (p1x,p1y) = (data[i],data[i+1])
1573                                 (p2x,p2y) = (data[i+2],data[i+3])
1574                                 dx = data[i+4]
1575                                 dy = data[i+5]
1576
1577                                 foundStraightCurve = False
1578
1579                                 if dx == 0:
1580                                         if p1x == 0 and p2x == 0:
1581                                                 foundStraightCurve = True
1582                                 else:
1583                                         m = dy/dx
1584                                         if p1y == m*p1x and p2y == m*p2y:
1585                                                 foundStraightCurve = True
1586
1587                                 if foundStraightCurve:
1588                                         # flush any existing curve coords first
1589                                         if newData:
1590                                                 newPath.append( (cmd,newData) )
1591                                                 newData = []
1592                                         # now create a straight line segment
1593                                         newPath.append( ('l', [dx,dy]) )
1594                                         numCurvesStraightened += 1
1595                                 else:
1596                                         newData.append(data[i])
1597                                         newData.append(data[i+1])
1598                                         newData.append(data[i+2])
1599                                         newData.append(data[i+3])
1600                                         newData.append(data[i+4])
1601                                         newData.append(data[i+5])
1602
1603                                 i += 6
1604                 if newData or cmd == 'z' or cmd == 'Z':
1605                         newPath.append( (cmd,newData) )
1606         path = newPath
1607
1608         # collapse all consecutive commands of the same type into one command
1609         prevCmd = ''
1610         prevData = []
1611         newPath = [path[0]]
1612         for (cmd,data) in path[1:]:
1613                 # flush the previous command if it is not the same type as the current command
1614                 if prevCmd != '':
1615                         if cmd != prevCmd:
1616                                 newPath.append( (prevCmd, prevData) )
1617                                 prevCmd = ''
1618                                 prevData = []
1619
1620                 # if the previous and current commands are the same type, collapse
1621                 if cmd == prevCmd:
1622                         for coord in data:
1623                                 prevData.append(coord)
1624
1625                 # save last command and data
1626                 else:
1627                         prevCmd = cmd
1628                         prevData = data
1629         # flush last command and data
1630         if prevCmd != '':
1631                 newPath.append( (prevCmd, prevData) )
1632         path = newPath
1633
1634         # convert to shorthand path segments where possible
1635         newPath = [path[0]]
1636         for (cmd,data) in path[1:]:
1637                 # convert line segments into h,v where possible
1638                 if cmd == 'l':
1639                         i = 0
1640                         lineTuples = []
1641                         while i < len(data):
1642                                 if data[i] == 0:
1643                                         # vertical
1644                                         if lineTuples:
1645                                                 # flush the existing line command
1646                                                 newPath.append( ('l', lineTuples) )
1647                                                 lineTuples = []
1648                                         # append the v and then the remaining line coords
1649                                         newPath.append( ('v', [data[i+1]]) )
1650                                         numPathSegmentsReduced += 1
1651                                 elif data[i+1] == 0:
1652                                         if lineTuples:
1653                                                 # flush the line command, then append the h and then the remaining line coords
1654                                                 newPath.append( ('l', lineTuples) )
1655                                                 lineTuples = []
1656                                         newPath.append( ('h', [data[i]]) )
1657                                         numPathSegmentsReduced += 1
1658                                 else:
1659                                         lineTuples.append(data[i])
1660                                         lineTuples.append(data[i+1])
1661                                 i += 2
1662                         if lineTuples:
1663                                 newPath.append( ('l', lineTuples) )
1664                 # convert Bézier curve segments into s where possible
1665                 elif cmd == 'c':
1666                         bez_ctl_pt = (0,0)
1667                         i = 0
1668                         curveTuples = []
1669                         while i < len(data):
1670                                 # rotate by 180deg means negate both coordinates
1671                                 # if the previous control point is equal then we can substitute a
1672                                 # shorthand bezier command
1673                                 if bez_ctl_pt[0] == data[i] and bez_ctl_pt[1] == data[i+1]:
1674                                         if curveTuples:
1675                                                 newPath.append( ('c', curveTuples) )
1676                                                 curveTuples = []
1677                                         # append the s command
1678                                         newPath.append( ('s', [data[i+2], data[i+3], data[i+4], data[i+5]]) )
1679                                         numPathSegmentsReduced += 1
1680                                 else:
1681                                         j = 0
1682                                         while j <= 5:
1683                                                 curveTuples.append(data[i+j])
1684                                                 j += 1
1685
1686                                 # set up control point for next curve segment
1687                                 bez_ctl_pt = (data[i+4]-data[i+2], data[i+5]-data[i+3])
1688                                 i += 6
1689
1690                         if curveTuples:
1691                                 newPath.append( ('c', curveTuples) )
1692                 # convert quadratic curve segments into t where possible
1693                 elif cmd == 'q':
1694                         quad_ctl_pt = (0,0)
1695                         i = 0
1696                         curveTuples = []
1697                         while i < len(data):
1698                                 if quad_ctl_pt[0] == data[i] and quad_ctl_pt[1] == data[i+1]:
1699                                         if curveTuples:
1700                                                 newPath.append( ('q', curveTuples) )
1701                                                 curveTuples = []
1702                                         # append the t command
1703                                         newPath.append( ('t', [data[i+2], data[i+3]]) )
1704                                         numPathSegmentsReduced += 1
1705                                 else:
1706                                         j = 0;
1707                                         while j <= 3:
1708                                                 curveTuples.append(data[i+j])
1709                                                 j += 1
1710
1711                                 quad_ctl_pt = (data[i+2]-data[i], data[i+3]-data[i+1])
1712                                 i += 4
1713
1714                         if curveTuples:
1715                                 newPath.append( ('q', curveTuples) )
1716                 else:
1717                         newPath.append( (cmd, data) )
1718         path = newPath
1719
1720         # for each h or v, collapse unnecessary coordinates that run in the same direction
1721         # i.e. "h-100-100" becomes "h-200" but "h300-100" does not change
1722         newPath = [path[0]]
1723         for (cmd,data) in path[1:]:
1724                 if cmd in ['h','v'] and len(data) > 1:
1725                         newData = []
1726                         prevCoord = data[0]
1727                         for coord in data[1:]:
1728                                 if isSameSign(prevCoord, coord):
1729                                         prevCoord += coord
1730                                         numPathSegmentsReduced += 1
1731                                 else:
1732                                         newData.append(prevCoord)
1733                                         prevCoord = coord
1734                         newData.append(prevCoord)
1735                         newPath.append( (cmd, newData) )
1736                 else:
1737                         newPath.append( (cmd, data) )
1738         path = newPath
1739
1740         # it is possible that we have consecutive h, v, c, t commands now
1741         # so again collapse all consecutive commands of the same type into one command
1742         prevCmd = ''
1743         prevData = []
1744         newPath = [path[0]]
1745         for (cmd,data) in path[1:]:
1746                 # flush the previous command if it is not the same type as the current command
1747                 if prevCmd != '':
1748                         if cmd != prevCmd:
1749                                 newPath.append( (prevCmd, prevData) )
1750                                 prevCmd = ''
1751                                 prevData = []
1752
1753                 # if the previous and current commands are the same type, collapse
1754                 if cmd == prevCmd:
1755                         for coord in data:
1756                                 prevData.append(coord)
1757
1758                 # save last command and data
1759                 else:
1760                         prevCmd = cmd
1761                         prevData = data
1762         # flush last command and data
1763         if prevCmd != '':
1764                 newPath.append( (prevCmd, prevData) )
1765         path = newPath
1766
1767         newPathStr = serializePath(path)
1768         numBytesSavedInPathData += ( len(oldPathStr) - len(newPathStr) )
1769         element.setAttribute('d', newPathStr)
1770
1771 def parseListOfPoints(s):
1772         """
1773                 Parse string into a list of points.
1774
1775                 Returns a list of containing an even number of coordinate strings
1776         """
1777         # (wsp)? comma-or-wsp-separated coordinate pairs (wsp)?
1778         # coordinate-pair = coordinate comma-or-wsp coordinate
1779         # coordinate = sign? integer
1780         nums = re.split("\\s*\\,?\\s*", s.strip())
1781         i = 0
1782         points = []
1783         while i < len(nums):
1784                 x = SVGLength(nums[i])
1785                 # if we had an odd number of points, return empty
1786                 if i == len(nums)-1: return []
1787                 else: y = SVGLength(nums[i+1])
1788
1789                 # if the coordinates were not unitless, return empty
1790                 if x.units != Unit.NONE or y.units != Unit.NONE: return []
1791                 points.append( str(x.value) )
1792                 points.append( str(y.value) )
1793                 i += 2
1794
1795         return points
1796
1797 def cleanPolygon(elem):
1798         """
1799                 Remove unnecessary closing point of polygon points attribute
1800         """
1801         global numPointsRemovedFromPolygon
1802
1803         pts = parseListOfPoints(elem.getAttribute('points'))
1804         N = len(pts)/2
1805         if N >= 2:
1806                 (startx,starty) = (pts[0],pts[0])
1807                 (endx,endy) = (pts[len(pts)-2],pts[len(pts)-1])
1808                 if startx == endx and starty == endy:
1809                         pts = pts[:-2]
1810                         numPointsRemovedFromPolygon += 1
1811         elem.setAttribute('points', scourCoordinates(pts))
1812
1813 def cleanPolyline(elem):
1814         """
1815                 Scour the polyline points attribute
1816         """
1817         pts = parseListOfPoints(elem.getAttribute('points'))
1818         elem.setAttribute('points', scourCoordinates(pts))
1819
1820 def serializePath(pathObj):
1821         """
1822                 Reserializes the path data with some cleanups.
1823         """
1824         pathStr = ""
1825         for (cmd,data) in pathObj:
1826                 pathStr += cmd
1827                 # elliptical arc commands must have comma/wsp separating the coordinates
1828                 # this fixes an issue outlined in Fix https://bugs.launchpad.net/scour/+bug/412754
1829                 pathStr += scourCoordinates(data, (cmd == 'a'))
1830         return pathStr
1831
1832 def scourCoordinates(data, forceCommaWsp = False):
1833         """
1834                 Serializes coordinate data with some cleanups:
1835                         - removes all trailing zeros after the decimal
1836                         - integerize coordinates if possible
1837                         - removes extraneous whitespace
1838                         - adds commas between values in a subcommand if required (or if forceCommaWsp is True)
1839         """
1840         coordsStr = ""
1841         if data != None:
1842                 c = 0
1843                 for coord in data:
1844                         # add the scoured coordinate to the path string
1845                         coordsStr += scourLength(coord)
1846
1847                         # only need the comma if the next number is non-negative or if forceCommaWsp is True
1848                         if c < len(data)-1 and (forceCommaWsp or Decimal(data[c+1]) >= 0):
1849                                 coordsStr += ','
1850                         c += 1
1851         return coordsStr
1852
1853 def scourLength(str):
1854         length = SVGLength(str)
1855         coord = length.value
1856
1857         # reduce to the proper number of digits
1858         coord = Decimal(unicode(coord)) * Decimal(1)
1859
1860         # integerize if we can
1861         if int(coord) == coord: coord = Decimal(unicode(int(coord)))
1862
1863         # Decimal.trim() is available in Python 2.6+ to trim trailing zeros
1864         try:
1865                 coord = coord.trim()
1866         except AttributeError:
1867                 # trim it ourselves
1868                 s = unicode(coord)
1869                 dec = s.find('.')
1870                 if dec != -1:
1871                         while s[-1] == '0':
1872                                 s = s[:-1]
1873                 coord = Decimal(s)
1874
1875                 # Decimal.normalize() will uses scientific notation - if that
1876                 # string is smaller, then use it
1877                 normd = coord.normalize()
1878                 if len(unicode(normd)) < len(unicode(coord)):
1879                         coord = normd
1880
1881         return unicode(coord)+Unit.str(length.units)
1882
1883 def embedRasters(element, options) :
1884         """
1885                 Converts raster references to inline images.
1886                 NOTE: there are size limits to base64-encoding handling in browsers
1887         """
1888         global numRastersEmbedded
1889
1890         href = element.getAttributeNS(NS['XLINK'],'href')
1891
1892         # if xlink:href is set, then grab the id
1893         if href != '' and len(href) > 1:
1894                 # find if href value has filename ext
1895                 ext = os.path.splitext(os.path.basename(href))[1].lower()[1:]
1896
1897                 # look for 'png', 'jpg', and 'gif' extensions
1898                 if ext == 'png' or ext == 'jpg' or ext == 'gif':
1899
1900                         # check if href resolves to an existing file
1901                         if os.path.isfile(href) == False :
1902                                 if href[:7] != 'http://' and os.path.isfile(href) == False :
1903                                                 # if this is not an absolute path, set path relative
1904                                                 # to script file based on input arg
1905                                                 infilename = '.'
1906                                                 if options.infilename: infilename = options.infilename
1907                                                 href = os.path.join(os.path.dirname(infilename), href)
1908
1909                         rasterdata = ''
1910                         # test if file exists locally
1911                         if os.path.isfile(href) == True :
1912                                 # open raster file as raw binary
1913                                 raster = open( href, "rb")
1914                                 rasterdata = raster.read()
1915
1916                         elif href[:7] == 'http://':
1917                                 # raster = open( href, "rb")
1918                                 webFile = urllib.urlopen( href )
1919                                 rasterdata = webFile.read()
1920                                 webFile.close()
1921
1922                         # ... should we remove all images which don't resolve?
1923                         if rasterdata != '' :
1924                                 # base64-encode raster
1925                                 b64eRaster = base64.b64encode( rasterdata )
1926
1927                                 # set href attribute to base64-encoded equivalent
1928                                 if b64eRaster != '':
1929                                         # PNG and GIF both have MIME Type 'image/[ext]', but
1930                                         # JPEG has MIME Type 'image/jpeg'
1931                                         if ext == 'jpg':
1932                                                 ext = 'jpeg'
1933
1934                                         element.setAttributeNS(NS['XLINK'], 'href', 'data:image/' + ext + ';base64,' + b64eRaster)
1935                                         numRastersEmbedded += 1
1936                                         del b64eRaster
1937
1938 def properlySizeDoc(docElement):
1939         # get doc width and height
1940         w = SVGLength(docElement.getAttribute('width'))
1941         h = SVGLength(docElement.getAttribute('height'))
1942
1943         # if width/height are not unitless or px then it is not ok to rewrite them into a viewBox
1944         if ((w.units != Unit.NONE and w.units != Unit.PX) or
1945                 (w.units != Unit.NONE and w.units != Unit.PX)):
1946             return
1947
1948         # else we have a statically sized image and we should try to remedy that
1949
1950         # parse viewBox attribute
1951         vbSep = re.split("\\s*\\,?\\s*", docElement.getAttribute('viewBox'), 3)
1952         # if we have a valid viewBox we need to check it
1953         vbWidth,vbHeight = 0,0
1954         if len(vbSep) == 4:
1955                 try:
1956                         # if x or y are specified and non-zero then it is not ok to overwrite it
1957                         vbX = float(vbSep[0])
1958                         vbY = float(vbSep[1])
1959                         if vbX != 0 or vbY != 0:
1960                                 return
1961
1962                         # if width or height are not equal to doc width/height then it is not ok to overwrite it
1963                         vbWidth = float(vbSep[2])
1964                         vbHeight = float(vbSep[3])
1965                         if vbWidth != w.value or vbHeight != h.value:
1966                                 return
1967                 # if the viewBox did not parse properly it is invalid and ok to overwrite it
1968                 except ValueError:
1969                         pass
1970
1971         # at this point it's safe to set the viewBox and remove width/height
1972         docElement.setAttribute('viewBox', '0 0 %s %s' % (w.value, h.value))
1973         docElement.removeAttribute('width')
1974         docElement.removeAttribute('height')
1975
1976 def remapNamespacePrefix(node, oldprefix, newprefix):
1977         if node == None or node.nodeType != 1: return
1978
1979         if node.prefix == oldprefix:
1980                 localName = node.localName
1981                 namespace = node.namespaceURI
1982                 doc = node.ownerDocument
1983                 parent = node.parentNode
1984
1985                 # create a replacement node
1986                 newNode = None
1987                 if newprefix != '':
1988                         newNode = doc.createElementNS(namespace, newprefix+":"+localName)
1989                 else:
1990                         newNode = doc.createElement(localName);
1991
1992                 # add all the attributes
1993                 attrList = node.attributes
1994                 for i in range(attrList.length):
1995                         attr = attrList.item(i)
1996                         newNode.setAttributeNS( attr.namespaceURI, attr.localName, attr.nodeValue)
1997
1998                 # clone and add all the child nodes
1999                 for child in node.childNodes:
2000                         newNode.appendChild(child.cloneNode(True))
2001
2002                 # replace old node with new node
2003                 parent.replaceChild( newNode, node )
2004                 # set the node to the new node in the remapped namespace prefix
2005                 node = newNode
2006
2007         # now do all child nodes
2008         for child in node.childNodes :
2009                 remapNamespacePrefix(child, oldprefix, newprefix)
2010
2011 def makeWellFormed(str):
2012         newstr = str
2013
2014         # encode & as &amp; ( must do this first so that &lt; does not become &amp;lt; )
2015         if str.find('&') != -1:
2016                 newstr = str.replace('&', '&amp;')
2017
2018         # encode < as &lt;
2019         if str.find("<") != -1:
2020                 newstr = str.replace('<', '&lt;')
2021
2022         # encode > as &gt; (TODO: is this necessary?)
2023         if str.find('>') != -1:
2024                 newstr = str.replace('>', '&gt;')
2025
2026         return newstr
2027
2028 # hand-rolled serialization function that has the following benefits:
2029 # - pretty printing
2030 # - somewhat judicious use of whitespace
2031 # - ensure id attributes are first
2032 def serializeXML(element, options, ind = 0, preserveWhitespace = False):
2033         indent = ind
2034         I=''
2035         if options.indent_type == 'tab': I='\t'
2036         elif options.indent_type == 'space': I=' '
2037
2038         outString = (I * ind) + '<' + element.nodeName
2039
2040         # always serialize the id or xml:id attributes first
2041         if element.getAttribute('id') != '':
2042                 id = element.getAttribute('id')
2043                 quot = '"'
2044                 if id.find('"') != -1:
2045                         quot = "'"
2046                 outString += ' ' + 'id=' + quot + id + quot
2047         if element.getAttribute('xml:id') != '':
2048                 id = element.getAttribute('xml:id')
2049                 quot = '"'
2050                 if id.find('"') != -1:
2051                         quot = "'"
2052                 outString += ' ' + 'xml:id=' + quot + id + quot
2053
2054         # now serialize the other attributes
2055         attrList = element.attributes
2056         for num in range(attrList.length) :
2057                 attr = attrList.item(num)
2058                 if attr.nodeName == 'id' or attr.nodeName == 'xml:id': continue
2059                 # if the attribute value contains a double-quote, use single-quotes
2060                 quot = '"'
2061                 if attr.nodeValue.find('"') != -1:
2062                         quot = "'"
2063
2064                 attrValue = makeWellFormed( attr.nodeValue )
2065
2066                 outString += ' '
2067                 # preserve xmlns: if it is a namespace prefix declaration
2068                 if attr.prefix != None:
2069                         outString += attr.prefix + ':'
2070                 elif attr.namespaceURI != None:
2071                         if attr.namespaceURI == 'http://www.w3.org/2000/xmlns/' and attr.nodeName.find('xmlns') == -1:
2072                                 outString += 'xmlns:'
2073                         elif attr.namespaceURI == 'http://www.w3.org/1999/xlink':
2074                                 outString += 'xlink:'
2075                 outString += attr.localName + '=' + quot + attrValue + quot
2076
2077                 if attr.nodeName == 'xml:space':
2078                         if attrValue == 'preserve':
2079                                 preserveWhitespace = True
2080                         elif attrValue == 'default':
2081                                 preserveWhitespace = False
2082
2083         # if no children, self-close
2084         children = element.childNodes
2085         if children.length > 0:
2086                 outString += '>'
2087
2088                 onNewLine = False
2089                 for child in element.childNodes:
2090                         # element node
2091                         if child.nodeType == 1:
2092                                 if preserveWhitespace:
2093                                         outString += serializeXML(child, options, 0, preserveWhitespace)
2094                                 else:
2095                                         outString += '\n' + serializeXML(child, options, indent + 1, preserveWhitespace)
2096                                         onNewLine = True
2097                         # text node
2098                         elif child.nodeType == 3:
2099                                 # trim it only in the case of not being a child of an element
2100                                 # where whitespace might be important
2101                                 if preserveWhitespace:
2102                                         outString += makeWellFormed(child.nodeValue)
2103                                 else:
2104                                         outString += makeWellFormed(child.nodeValue.strip())
2105                         # CDATA node
2106                         elif child.nodeType == 4:
2107                                 outString += '<![CDATA[' + child.nodeValue + ']]>'
2108                         # Comment node
2109                         elif child.nodeType == 8:
2110                                 outString += '<!--' + child.nodeValue + '-->'
2111                         # TODO: entities, processing instructions, what else?
2112                         else: # ignore the rest
2113                                 pass
2114
2115                 if onNewLine: outString += (I * ind)
2116                 outString += '</' + element.nodeName + '>'
2117                 if indent > 0: outString += '\n'
2118         else:
2119                 outString += '/>'
2120                 if indent > 0: outString += '\n'
2121
2122         return outString
2123
2124 # this is the main method
2125 # input is a string representation of the input XML
2126 # returns a string representation of the output XML
2127 def scourString(in_string, options=None):
2128         if options is None:
2129                 options = _options_parser.get_default_values()
2130         getcontext().prec = options.digits
2131         global numAttrsRemoved
2132         global numStylePropsFixed
2133         global numElemsRemoved
2134         global numBytesSavedInColors
2135         doc = xml.dom.minidom.parseString(in_string)
2136
2137         # for whatever reason this does not always remove all inkscape/sodipodi attributes/elements
2138         # on the first pass, so we do it multiple times
2139         # does it have to do with removal of children affecting the childlist?
2140         if options.keep_editor_data == False:
2141                 while removeNamespacedElements( doc.documentElement, unwanted_ns ) > 0 :
2142                         pass
2143                 while removeNamespacedAttributes( doc.documentElement, unwanted_ns ) > 0 :
2144                         pass
2145
2146                 # remove the xmlns: declarations now
2147                 xmlnsDeclsToRemove = []
2148                 attrList = doc.documentElement.attributes
2149                 for num in range(attrList.length) :
2150                         if attrList.item(num).nodeValue in unwanted_ns :
2151                                 xmlnsDeclsToRemove.append(attrList.item(num).nodeName)
2152
2153                 for attr in xmlnsDeclsToRemove :
2154                         doc.documentElement.removeAttribute(attr)
2155                         numAttrsRemoved += 1
2156
2157         # ensure namespace for SVG is declared
2158         # TODO: what if the default namespace is something else (i.e. some valid namespace)?
2159         if doc.documentElement.getAttribute('xmlns') != 'http://www.w3.org/2000/svg':
2160                 doc.documentElement.setAttribute('xmlns', 'http://www.w3.org/2000/svg')
2161                 # TODO: throw error or warning?
2162
2163         # check for redundant SVG namespace declaration
2164         attrList = doc.documentElement.attributes
2165         xmlnsDeclsToRemove = []
2166         redundantPrefixes = []
2167         for i in range(attrList.length):
2168                 attr = attrList.item(i)
2169                 name = attr.nodeName
2170                 val = attr.nodeValue
2171                 if name[0:6] == 'xmlns:' and val == 'http://www.w3.org/2000/svg':
2172                         redundantPrefixes.append(name[6:])
2173                         xmlnsDeclsToRemove.append(name)
2174
2175         for attrName in xmlnsDeclsToRemove:
2176                 doc.documentElement.removeAttribute(attrName)
2177
2178         for prefix in redundantPrefixes:
2179                 remapNamespacePrefix(doc.documentElement, prefix, '')
2180
2181         # repair style (remove unnecessary style properties and change them into XML attributes)
2182         numStylePropsFixed = repairStyle(doc.documentElement, options)
2183
2184         # convert colors to #RRGGBB format
2185         if options.simple_colors:
2186                 numBytesSavedInColors = convertColors(doc.documentElement)
2187
2188         # remove empty defs, metadata, g
2189         # NOTE: these elements will be removed even if they have (invalid) text nodes
2190         elemsToRemove = []
2191         for tag in ['defs', 'metadata', 'g'] :
2192                 for elem in doc.documentElement.getElementsByTagName(tag) :
2193                         removeElem = not elem.hasChildNodes()
2194                         if removeElem == False :
2195                                 for child in elem.childNodes :
2196                                         if child.nodeType in [1, 3, 4, 8] :
2197                                                 break
2198                                 else:
2199                                         removeElem = True
2200                         if removeElem :
2201                                 elem.parentNode.removeChild(elem)
2202                                 numElemsRemoved += 1
2203
2204         # remove unreferenced gradients/patterns outside of defs
2205         while removeUnreferencedElements(doc) > 0:
2206                 pass
2207
2208         if options.strip_ids:
2209                 bContinueLooping = True
2210                 while bContinueLooping:
2211                         identifiedElements = findElementsWithId(doc.documentElement)
2212                         referencedIDs = findReferencedElements(doc.documentElement)
2213                         bContinueLooping = (removeUnreferencedIDs(referencedIDs, identifiedElements) > 0)
2214
2215         if options.group_collapse:
2216                 while removeNestedGroups(doc.documentElement) > 0:
2217                         pass
2218
2219         while removeDuplicateGradientStops(doc) > 0:
2220                 pass
2221
2222         # remove gradients that are only referenced by one other gradient
2223         while collapseSinglyReferencedGradients(doc) > 0:
2224                 pass
2225
2226         # remove duplicate gradients
2227         while removeDuplicateGradients(doc) > 0:
2228                 pass
2229
2230         # move common attributes to parent group
2231         numAttrsRemoved += moveCommonAttributesToParentGroup(doc.documentElement)
2232
2233         # remove unused attributes from parent
2234         numAttrsRemoved += removeUnusedAttributesOnParent(doc.documentElement)
2235
2236         # clean path data
2237         for elem in doc.documentElement.getElementsByTagName('path') :
2238                 if elem.getAttribute('d') == '':
2239                         elem.parentNode.removeChild(elem)
2240                 else:
2241                         cleanPath(elem)
2242
2243         # remove unnecessary closing point of polygons and scour points
2244         for polygon in doc.documentElement.getElementsByTagName('polygon') :
2245                 cleanPolygon(polygon)
2246
2247         # scour points of polyline
2248         for polyline in doc.documentElement.getElementsByTagName('polyline') :
2249                 cleanPolygon(polyline)
2250
2251         # scour lengths (including coordinates)
2252         for type in ['svg', 'image', 'rect', 'circle', 'ellipse', 'line', 'linearGradient', 'radialGradient', 'stop']:
2253                 for elem in doc.getElementsByTagName(type):
2254                         for attr in ['x', 'y', 'width', 'height', 'cx', 'cy', 'r', 'rx', 'ry',
2255                                                 'x1', 'y1', 'x2', 'y2', 'fx', 'fy', 'offset', 'opacity',
2256                                                 'fill-opacity', 'stroke-opacity', 'stroke-width', 'stroke-miterlimit']:
2257                                 if elem.getAttribute(attr) != '':
2258                                         elem.setAttribute(attr, scourLength(elem.getAttribute(attr)))
2259
2260         # remove default values of attributes
2261         numAttrsRemoved += removeDefaultAttributeValues(doc.documentElement, options)
2262
2263         # convert rasters references to base64-encoded strings
2264         if options.embed_rasters:
2265                 for elem in doc.documentElement.getElementsByTagName('image') :
2266                         embedRasters(elem, options)
2267
2268         # properly size the SVG document (ideally width/height should be 100% with a viewBox)
2269         properlySizeDoc(doc.documentElement)
2270
2271         # output the document as a pretty string with a single space for indent
2272         # NOTE: removed pretty printing because of this problem:
2273         # http://ronrothman.com/public/leftbraned/xml-dom-minidom-toprettyxml-and-silly-whitespace/
2274         # rolled our own serialize function here to save on space, put id first, customize indentation, etc
2275 #       out_string = doc.documentElement.toprettyxml(' ')
2276         out_string = serializeXML(doc.documentElement, options)
2277
2278         # now strip out empty lines
2279         lines = []
2280         # Get rid of empty lines
2281         for line in out_string.splitlines(True):
2282                 if line.strip():
2283                         lines.append(line)
2284
2285         # return the string stripped of empty lines
2286         if options.strip_xml_prolog == False:
2287                 xmlprolog = '<?xml version="1.0" encoding="UTF-8" standalone="no"?>\n'
2288         else:
2289                 xmlprolog = ""
2290
2291         return xmlprolog + "".join(lines)
2292
2293 # used mostly by unit tests
2294 # input is a filename
2295 # returns the minidom doc representation of the SVG
2296 def scourXmlFile(filename, options=None):
2297         in_string = open(filename).read()
2298         out_string = scourString(in_string, options)
2299         return xml.dom.minidom.parseString(out_string.encode('utf-8'))
2300
2301 # GZ: Seems most other commandline tools don't do this, is it really wanted?
2302 class HeaderedFormatter(optparse.IndentedHelpFormatter):
2303         """
2304                 Show application name, version number, and copyright statement
2305                 above usage information.
2306         """
2307         def format_usage(self, usage):
2308                 return "%s %s\n%s\n%s" % (APP, VER, COPYRIGHT,
2309                         optparse.IndentedHelpFormatter.format_usage(self, usage))
2310
2311 # GZ: would prefer this to be in a function or class scope, but tests etc need
2312 #     access to the defaults anyway
2313 _options_parser = optparse.OptionParser(
2314         usage="%prog [-i input.svg] [-o output.svg] [OPTIONS]",
2315         description=("If the input/output files are specified with a svgz"
2316         " extension, then compressed SVG is assumed. If the input file is not"
2317         " specified, stdin is used. If the output file is not specified, "
2318         " stdout is used."),
2319         formatter=HeaderedFormatter(max_help_position=30),
2320         version=VER)
2321
2322 _options_parser.add_option("--disable-simplify-colors",
2323         action="store_false", dest="simple_colors", default=True,
2324         help="won't convert all colors to #RRGGBB format")
2325 _options_parser.add_option("--disable-style-to-xml",
2326         action="store_false", dest="style_to_xml", default=True,
2327         help="won't convert styles into XML attributes")
2328 _options_parser.add_option("--disable-group-collapsing",
2329         action="store_false", dest="group_collapse", default=True,
2330         help="won't collapse <g> elements")
2331 _options_parser.add_option("--enable-id-stripping",
2332         action="store_true", dest="strip_ids", default=False,
2333         help="remove all un-referenced ID attributes")
2334 _options_parser.add_option("--disable-embed-rasters",
2335         action="store_false", dest="embed_rasters", default=True,
2336         help="won't embed rasters as base64-encoded data")
2337 _options_parser.add_option("--keep-editor-data",
2338         action="store_true", dest="keep_editor_data", default=False,
2339         help="won't remove Inkscape, Sodipodi or Adobe Illustrator elements and attributes")
2340 _options_parser.add_option("--strip-xml-prolog",
2341         action="store_true", dest="strip_xml_prolog", default=False,
2342         help="won't output the <?xml ?> prolog")
2343
2344 # GZ: this is confusing, most people will be thinking in terms of
2345 #     decimal places, which is not what decimal precision is doing
2346 _options_parser.add_option("-p", "--set-precision",
2347         action="store", type=int, dest="digits", default=5,
2348         help="set number of significant digits (default: %default)")
2349 _options_parser.add_option("-i",
2350         action="store", dest="infilename", help=optparse.SUPPRESS_HELP)
2351 _options_parser.add_option("-o",
2352         action="store", dest="outfilename", help=optparse.SUPPRESS_HELP)
2353 _options_parser.add_option("--indent",
2354         action="store", type="string", dest="indent_type", default="space",
2355         help="indentation of the output: none, space, tab (default: %default)")
2356
2357 def maybe_gziped_file(filename, mode="r"):
2358         if os.path.splitext(filename)[1].lower() in (".svgz", ".gz"):
2359                 return gzip.GzipFile(filename, mode)
2360         return file(filename, mode)
2361
2362 def parse_args(args=None):
2363         options, rargs = _options_parser.parse_args(args)
2364
2365         if rargs:
2366                 _options_parser.error("Additional arguments not handled: %r, see --help" % rargs)
2367         if options.digits < 0:
2368                 _options_parser.error("Can't have negative significant digits, see --help")
2369         if not options.indent_type in ["tab", "space", "none"]:
2370                 _options_parser.error("Invalid value for --indent, see --help")
2371         if options.infilename and options.outfilename and options.infilename == options.outfilename:
2372                 _options_parser.error("Input filename is the same as output filename")
2373
2374         if options.infilename:
2375                 infile = maybe_gziped_file(options.infilename)
2376                 # GZ: could catch a raised IOError here and report
2377         else:
2378                 # GZ: could sniff for gzip compression here
2379                 infile = sys.stdin
2380         if options.outfilename:
2381                 outfile = maybe_gziped_file(options.outfilename, "w")
2382         else:
2383                 outfile = sys.stdout
2384
2385         return options, [infile, outfile]
2386
2387 def getReport():
2388         return ' Number of elements removed: ' + str(numElemsRemoved) + \
2389                 '\n Number of attributes removed: ' + str(numAttrsRemoved) + \
2390                 '\n Number of unreferenced id attributes removed: ' + str(numIDsRemoved) + \
2391                 '\n Number of style properties fixed: ' + str(numStylePropsFixed) + \
2392                 '\n Number of raster images embedded inline: ' + str(numRastersEmbedded) + \
2393                 '\n Number of path segments reduced/removed: ' + str(numPathSegmentsReduced) + \
2394                 '\n Number of bytes saved in path data: ' + str(numBytesSavedInPathData) + \
2395                 '\n Number of bytes saved in colors: ' + str(numBytesSavedInColors) + \
2396                 '\n Number of points removed from polygons: ' + str(numPointsRemovedFromPolygon)
2397
2398 if __name__ == '__main__':
2399         if sys.platform == "win32":
2400                 from time import clock as get_tick
2401         else:
2402                 # GZ: is this different from time.time() in any way?
2403                 def get_tick():
2404                         return os.times()[0]
2405
2406         start = get_tick()
2407
2408         options, (input, output) = parse_args()
2409
2410         print >>sys.stderr, "%s %s\n%s" % (APP, VER, COPYRIGHT)
2411
2412         # do the work
2413         in_string = input.read()
2414         out_string = scourString(in_string, options).encode("UTF-8")
2415         output.write(out_string)
2416
2417         # Close input and output files
2418         input.close()
2419         output.close()
2420
2421         end = get_tick()
2422
2423         # GZ: unless silenced by -q or something?
2424         # GZ: not using globals would be good too
2425         print >>sys.stderr, ' File:', input.name, \
2426                 '\n Time taken:', str(end-start) + 's\n', \
2427                 getReport()
2428
2429         oldsize = len(in_string)
2430         newsize = len(out_string)
2431         sizediff = (newsize / oldsize) * 100
2432         print >>sys.stderr, ' Original file size:', oldsize, 'bytes;', \
2433                 'new file size:', newsize, 'bytes (' + str(sizediff)[:5] + '%)'
2434
2435