share/extensions/scour.py

   1 #!/usr/bin/env python
   2 # -*- coding: utf-8 -*-
   3
   4 #  Scour
   5 #
   6 #  Copyright 2010 Jeff Schiller
   7 #
   8 #  This file is part of Scour, http://www.codedread.com/scour/
   9 #
  10 #   Licensed under the Apache License, Version 2.0 (the "License");
  11 #   you may not use this file except in compliance with the License.
  12 #   You may obtain a copy of the License at
  13 #
  14 #       http://www.apache.org/licenses/LICENSE-2.0
  15 #
  16 #   Unless required by applicable law or agreed to in writing, software
  17 #   distributed under the License is distributed on an "AS IS" BASIS,
  18 #   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  19 #   See the License for the specific language governing permissions and
  20 #   limitations under the License.
  21
  22 # Notes:
  23
  24 # rubys' path-crunching ideas here: http://intertwingly.net/code/svgtidy/spec.rb
  25 # (and implemented here: http://intertwingly.net/code/svgtidy/svgtidy.rb )
  26
  27 # Yet more ideas here: http://wiki.inkscape.org/wiki/index.php/Save_Cleaned_SVG
  28 #
  29 # * Process Transformations
  30 #  * Collapse all group based transformations
  31
  32 # Even more ideas here: http://esw.w3.org/topic/SvgTidy
  33 #  * analysis of path elements to see if rect can be used instead? (must also need to look
  34 #    at rounded corners)
  35
  36 # Next Up:
  37 # - only remove unreferenced elements if they are not children of a referenced element
  38 # - add an option to remove ids if they match the Inkscape-style of IDs
  39 # - investigate point-reducing algorithms
  40 # - parse transform attribute
  41 # - if a <g> has only one element in it, collapse the <g> (ensure transform, etc are carried down)
  42 # - option to remove metadata
  43
  44 # necessary to get true division
  45 from __future__ import division
  46
  47 import os
  48 import sys
  49 import xml.dom.minidom
  50 import re
  51 import math
  52 import base64
  53 import urllib
  54 from svg_regex import svg_parser
  55 import gzip
  56 import optparse
  57 from yocto_css import parseCssString
  58
  59 # Python 2.3- did not have Decimal
  60 try:
  61         from decimal import *
  62 except ImportError:
  63         from fixedpoint import *
  64         Decimal = FixedPoint
  65
  66 # Import Psyco if available
  67 try:
  68         import psyco
  69         psyco.full()
  70 except ImportError:
  71         pass
  72
  73 APP = 'scour'
  74 VER = '0.25r171'
  75 COPYRIGHT = 'Copyright Jeff Schiller, 2010'
  76
  77 NS = {  'SVG':          'http://www.w3.org/2000/svg',
  78                 'XLINK':        'http://www.w3.org/1999/xlink',
  79                 'SODIPODI': 'http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd',
  80                 'INKSCAPE': 'http://www.inkscape.org/namespaces/inkscape',
  81                 'ADOBE_ILLUSTRATOR': 'http://ns.adobe.com/AdobeIllustrator/10.0/',
  82                 'ADOBE_GRAPHS': 'http://ns.adobe.com/Graphs/1.0/',
  83                 'ADOBE_SVG_VIEWER': 'http://ns.adobe.com/AdobeSVGViewerExtensions/3.0/',
  84                 'ADOBE_VARIABLES': 'http://ns.adobe.com/Variables/1.0/',
  85                 'ADOBE_SFW': 'http://ns.adobe.com/SaveForWeb/1.0/',
  86                 'ADOBE_EXTENSIBILITY': 'http://ns.adobe.com/Extensibility/1.0/',
  87                 'ADOBE_FLOWS': 'http://ns.adobe.com/Flows/1.0/',
  88                 'ADOBE_IMAGE_REPLACEMENT': 'http://ns.adobe.com/ImageReplacement/1.0/',
  89                 'ADOBE_CUSTOM': 'http://ns.adobe.com/GenericCustomNamespace/1.0/',
  90                 'ADOBE_XPATH': 'http://ns.adobe.com/XPath/1.0/'
  91                 }
  92
  93 unwanted_ns = [ NS['SODIPODI'], NS['INKSCAPE'], NS['ADOBE_ILLUSTRATOR'],
  94                                 NS['ADOBE_GRAPHS'], NS['ADOBE_SVG_VIEWER'], NS['ADOBE_VARIABLES'],
  95                                 NS['ADOBE_SFW'], NS['ADOBE_EXTENSIBILITY'], NS['ADOBE_FLOWS'],
  96                                 NS['ADOBE_IMAGE_REPLACEMENT'], NS['ADOBE_CUSTOM'], NS['ADOBE_XPATH'] ]
  97
  98 svgAttributes = [
  99                                 'clip-rule',
 100                                 'display',
 101                                 'fill',
 102                                 'fill-opacity',
 103                                 'fill-rule',
 104                                 'filter',
 105                                 'font-family',
 106                                 'font-size',
 107                                 'font-stretch',
 108                                 'font-style',
 109                                 'font-variant',
 110                                 'font-weight',
 111                                 'line-height',
 112                                 'marker',
 113                                 'opacity',
 114                                 'overflow',
 115                                 'stop-color',
 116                                 'stop-opacity',
 117                                 'stroke',
 118                                 'stroke-dasharray',
 119                                 'stroke-dashoffset',
 120                                 'stroke-linecap',
 121                                 'stroke-linejoin',
 122                                 'stroke-miterlimit',
 123                                 'stroke-opacity',
 124                                 'stroke-width',
 125                                 'visibility'
 126                                 ]
 127
 128 colors = {
 129         'aliceblue': 'rgb(240, 248, 255)',
 130         'antiquewhite': 'rgb(250, 235, 215)',
 131         'aqua': 'rgb( 0, 255, 255)',
 132         'aquamarine': 'rgb(127, 255, 212)',
 133         'azure': 'rgb(240, 255, 255)',
 134         'beige': 'rgb(245, 245, 220)',
 135         'bisque': 'rgb(255, 228, 196)',
 136         'black': 'rgb( 0, 0, 0)',
 137         'blanchedalmond': 'rgb(255, 235, 205)',
 138         'blue': 'rgb( 0, 0, 255)',
 139         'blueviolet': 'rgb(138, 43, 226)',
 140         'brown': 'rgb(165, 42, 42)',
 141         'burlywood': 'rgb(222, 184, 135)',
 142         'cadetblue': 'rgb( 95, 158, 160)',
 143         'chartreuse': 'rgb(127, 255, 0)',
 144         'chocolate': 'rgb(210, 105, 30)',
 145         'coral': 'rgb(255, 127, 80)',
 146         'cornflowerblue': 'rgb(100, 149, 237)',
 147         'cornsilk': 'rgb(255, 248, 220)',
 148         'crimson': 'rgb(220, 20, 60)',
 149         'cyan': 'rgb( 0, 255, 255)',
 150         'darkblue': 'rgb( 0, 0, 139)',
 151         'darkcyan': 'rgb( 0, 139, 139)',
 152         'darkgoldenrod': 'rgb(184, 134, 11)',
 153         'darkgray': 'rgb(169, 169, 169)',
 154         'darkgreen': 'rgb( 0, 100, 0)',
 155         'darkgrey': 'rgb(169, 169, 169)',
 156         'darkkhaki': 'rgb(189, 183, 107)',
 157         'darkmagenta': 'rgb(139, 0, 139)',
 158         'darkolivegreen': 'rgb( 85, 107, 47)',
 159         'darkorange': 'rgb(255, 140, 0)',
 160         'darkorchid': 'rgb(153, 50, 204)',
 161         'darkred': 'rgb(139, 0, 0)',
 162         'darksalmon': 'rgb(233, 150, 122)',
 163         'darkseagreen': 'rgb(143, 188, 143)',
 164         'darkslateblue': 'rgb( 72, 61, 139)',
 165         'darkslategray': 'rgb( 47, 79, 79)',
 166         'darkslategrey': 'rgb( 47, 79, 79)',
 167         'darkturquoise': 'rgb( 0, 206, 209)',
 168         'darkviolet': 'rgb(148, 0, 211)',
 169         'deeppink': 'rgb(255, 20, 147)',
 170         'deepskyblue': 'rgb( 0, 191, 255)',
 171         'dimgray': 'rgb(105, 105, 105)',
 172         'dimgrey': 'rgb(105, 105, 105)',
 173         'dodgerblue': 'rgb( 30, 144, 255)',
 174         'firebrick': 'rgb(178, 34, 34)',
 175         'floralwhite': 'rgb(255, 250, 240)',
 176         'forestgreen': 'rgb( 34, 139, 34)',
 177         'fuchsia': 'rgb(255, 0, 255)',
 178         'gainsboro': 'rgb(220, 220, 220)',
 179         'ghostwhite': 'rgb(248, 248, 255)',
 180         'gold': 'rgb(255, 215, 0)',
 181         'goldenrod': 'rgb(218, 165, 32)',
 182         'gray': 'rgb(128, 128, 128)',
 183         'grey': 'rgb(128, 128, 128)',
 184         'green': 'rgb( 0, 128, 0)',
 185         'greenyellow': 'rgb(173, 255, 47)',
 186         'honeydew': 'rgb(240, 255, 240)',
 187         'hotpink': 'rgb(255, 105, 180)',
 188         'indianred': 'rgb(205, 92, 92)',
 189         'indigo': 'rgb( 75, 0, 130)',
 190         'ivory': 'rgb(255, 255, 240)',
 191         'khaki': 'rgb(240, 230, 140)',
 192         'lavender': 'rgb(230, 230, 250)',
 193         'lavenderblush': 'rgb(255, 240, 245)',
 194         'lawngreen': 'rgb(124, 252, 0)',
 195         'lemonchiffon': 'rgb(255, 250, 205)',
 196         'lightblue': 'rgb(173, 216, 230)',
 197         'lightcoral': 'rgb(240, 128, 128)',
 198         'lightcyan': 'rgb(224, 255, 255)',
 199         'lightgoldenrodyellow': 'rgb(250, 250, 210)',
 200         'lightgray': 'rgb(211, 211, 211)',
 201         'lightgreen': 'rgb(144, 238, 144)',
 202         'lightgrey': 'rgb(211, 211, 211)',
 203         'lightpink': 'rgb(255, 182, 193)',
 204         'lightsalmon': 'rgb(255, 160, 122)',
 205         'lightseagreen': 'rgb( 32, 178, 170)',
 206         'lightskyblue': 'rgb(135, 206, 250)',
 207         'lightslategray': 'rgb(119, 136, 153)',
 208         'lightslategrey': 'rgb(119, 136, 153)',
 209         'lightsteelblue': 'rgb(176, 196, 222)',
 210         'lightyellow': 'rgb(255, 255, 224)',
 211         'lime': 'rgb( 0, 255, 0)',
 212         'limegreen': 'rgb( 50, 205, 50)',
 213         'linen': 'rgb(250, 240, 230)',
 214         'magenta': 'rgb(255, 0, 255)',
 215         'maroon': 'rgb(128, 0, 0)',
 216         'mediumaquamarine': 'rgb(102, 205, 170)',
 217         'mediumblue': 'rgb( 0, 0, 205)',
 218         'mediumorchid': 'rgb(186, 85, 211)',
 219         'mediumpurple': 'rgb(147, 112, 219)',
 220         'mediumseagreen': 'rgb( 60, 179, 113)',
 221         'mediumslateblue': 'rgb(123, 104, 238)',
 222         'mediumspringgreen': 'rgb( 0, 250, 154)',
 223         'mediumturquoise': 'rgb( 72, 209, 204)',
 224         'mediumvioletred': 'rgb(199, 21, 133)',
 225         'midnightblue': 'rgb( 25, 25, 112)',
 226         'mintcream': 'rgb(245, 255, 250)',
 227         'mistyrose': 'rgb(255, 228, 225)',
 228         'moccasin': 'rgb(255, 228, 181)',
 229         'navajowhite': 'rgb(255, 222, 173)',
 230         'navy': 'rgb( 0, 0, 128)',
 231         'oldlace': 'rgb(253, 245, 230)',
 232         'olive': 'rgb(128, 128, 0)',
 233         'olivedrab': 'rgb(107, 142, 35)',
 234         'orange': 'rgb(255, 165, 0)',
 235         'orangered': 'rgb(255, 69, 0)',
 236         'orchid': 'rgb(218, 112, 214)',
 237         'palegoldenrod': 'rgb(238, 232, 170)',
 238         'palegreen': 'rgb(152, 251, 152)',
 239         'paleturquoise': 'rgb(175, 238, 238)',
 240         'palevioletred': 'rgb(219, 112, 147)',
 241         'papayawhip': 'rgb(255, 239, 213)',
 242         'peachpuff': 'rgb(255, 218, 185)',
 243         'peru': 'rgb(205, 133, 63)',
 244         'pink': 'rgb(255, 192, 203)',
 245         'plum': 'rgb(221, 160, 221)',
 246         'powderblue': 'rgb(176, 224, 230)',
 247         'purple': 'rgb(128, 0, 128)',
 248         'red': 'rgb(255, 0, 0)',
 249         'rosybrown': 'rgb(188, 143, 143)',
 250         'royalblue': 'rgb( 65, 105, 225)',
 251         'saddlebrown': 'rgb(139, 69, 19)',
 252         'salmon': 'rgb(250, 128, 114)',
 253         'sandybrown': 'rgb(244, 164, 96)',
 254         'seagreen': 'rgb( 46, 139, 87)',
 255         'seashell': 'rgb(255, 245, 238)',
 256         'sienna': 'rgb(160, 82, 45)',
 257         'silver': 'rgb(192, 192, 192)',
 258         'skyblue': 'rgb(135, 206, 235)',
 259         'slateblue': 'rgb(106, 90, 205)',
 260         'slategray': 'rgb(112, 128, 144)',
 261         'slategrey': 'rgb(112, 128, 144)',
 262         'snow': 'rgb(255, 250, 250)',
 263         'springgreen': 'rgb( 0, 255, 127)',
 264         'steelblue': 'rgb( 70, 130, 180)',
 265         'tan': 'rgb(210, 180, 140)',
 266         'teal': 'rgb( 0, 128, 128)',
 267         'thistle': 'rgb(216, 191, 216)',
 268         'tomato': 'rgb(255, 99, 71)',
 269         'turquoise': 'rgb( 64, 224, 208)',
 270         'violet': 'rgb(238, 130, 238)',
 271         'wheat': 'rgb(245, 222, 179)',
 272         'white': 'rgb(255, 255, 255)',
 273         'whitesmoke': 'rgb(245, 245, 245)',
 274         'yellow': 'rgb(255, 255, 0)',
 275         'yellowgreen': 'rgb(154, 205, 50)',
 276         }
 277
 278 def isSameSign(a,b): return (a <= 0 and b <= 0) or (a >= 0 and b >= 0)
 279
 280 coord = re.compile("\\-?\\d+\\.?\\d*")
 281 scinumber = re.compile("[\\-\\+]?(\\d*\\.?)?\\d+[eE][\\-\\+]?\\d+")
 282 number = re.compile("[\\-\\+]?(\\d*\\.?)?\\d+")
 283 sciExponent = re.compile("[eE]([\\-\\+]?\\d+)")
 284 unit = re.compile("(em|ex|px|pt|pc|cm|mm|in|\\%){1,1}$")
 285
 286 class Unit(object):
 287         INVALID = -1
 288         NONE = 0
 289         PCT = 1
 290         PX = 2
 291         PT = 3
 292         PC = 4
 293         EM = 5
 294         EX = 6
 295         CM = 7
 296         MM = 8
 297         IN = 9
 298
 299 #       @staticmethod
 300         def get(str):
 301                 # GZ: shadowing builtins like 'str' is generally bad form
 302                 # GZ: encoding stuff like this in a dict makes for nicer code
 303                 if str == None or str == '': return Unit.NONE
 304                 elif str == '%': return Unit.PCT
 305                 elif str == 'px': return Unit.PX
 306                 elif str == 'pt': return Unit.PT
 307                 elif str == 'pc': return Unit.PC
 308                 elif str == 'em': return Unit.EM
 309                 elif str == 'ex': return Unit.EX
 310                 elif str == 'cm': return Unit.CM
 311                 elif str == 'mm': return Unit.MM
 312                 elif str == 'in': return Unit.IN
 313                 return Unit.INVALID
 314
 315 #       @staticmethod
 316         def str(u):
 317                 if u == Unit.NONE: return ''
 318                 elif u == Unit.PCT: return '%'
 319                 elif u == Unit.PX: return 'px'
 320                 elif u == Unit.PT: return 'pt'
 321                 elif u == Unit.PC: return 'pc'
 322                 elif u == Unit.EM: return 'em'
 323                 elif u == Unit.EX: return 'ex'
 324                 elif u == Unit.CM: return 'cm'
 325                 elif u == Unit.MM: return 'mm'
 326                 elif u == Unit.IN: return 'in'
 327                 return 'INVALID'
 328
 329         get = staticmethod(get)
 330         str = staticmethod(str)
 331
 332 class SVGLength(object):
 333         def __init__(self, str):
 334                 try: # simple unitless and no scientific notation
 335                         self.value = float(str)
 336                         if int(self.value) == self.value:
 337                                 self.value = int(self.value)
 338                         self.units = Unit.NONE
 339                 except ValueError:
 340                         # we know that the length string has an exponent, a unit, both or is invalid
 341
 342                         # parse out number, exponent and unit
 343                         self.value = 0
 344                         unitBegin = 0
 345                         scinum = scinumber.match(str)
 346                         if scinum != None:
 347                                 # this will always match, no need to check it
 348                                 numMatch = number.match(str)
 349                                 expMatch = sciExponent.search(str, numMatch.start(0))
 350                                 self.value = (float(numMatch.group(0)) *
 351                                         10 ** float(expMatch.group(1)))
 352                                 unitBegin = expMatch.end(1)
 353                         else:
 354                                 # unit or invalid
 355                                 numMatch = number.match(str)
 356                                 if numMatch != None:
 357                                         self.value = float(numMatch.group(0))
 358                                         unitBegin = numMatch.end(0)
 359
 360                         if int(self.value) == self.value:
 361                                 self.value = int(self.value)
 362
 363                         if unitBegin != 0 :
 364                                 unitMatch = unit.search(str, unitBegin)
 365                                 if unitMatch != None :
 366                                         self.units = Unit.get(unitMatch.group(0))
 367
 368                         # invalid
 369                         else:
 370                                 # TODO: this needs to set the default for the given attribute (how?)
 371                                 self.value = 0
 372                                 self.units = Unit.INVALID
 373
 374 # returns the length of a property
 375 # TODO: eventually use the above class once it is complete
 376 def getSVGLength(value):
 377         try:
 378                 v = float(value)
 379         except ValueError:
 380                 coordMatch = coord.match(value)
 381                 if coordMatch != None:
 382                         unitMatch = unit.search(value, coordMatch.start(0))
 383                 v = value
 384         return v
 385
 386 def findElementById(node, id):
 387         if node == None or node.nodeType != 1: return None
 388         if node.getAttribute('id') == id: return node
 389         for child in node.childNodes :
 390                 e = findElementById(child,id)
 391                 if e != None: return e
 392         return None
 393
 394 def findElementsWithId(node, elems=None):
 395         """
 396         Returns all elements with id attributes
 397         """
 398         if elems is None:
 399                 elems = {}
 400         id = node.getAttribute('id')
 401         if id != '' :
 402                 elems[id] = node
 403         if node.hasChildNodes() :
 404                 for child in node.childNodes:
 405                         # from http://www.w3.org/TR/DOM-Level-2-Core/idl-definitions.html
 406                         # we are only really interested in nodes of type Element (1)
 407                         if child.nodeType == 1 :
 408                                 findElementsWithId(child, elems)
 409         return elems
 410
 411 referencingProps = ['fill', 'stroke', 'filter', 'clip-path', 'mask',  'marker-start',
 412                                         'marker-end', 'marker-mid']
 413
 414 def findReferencedElements(node, ids=None):
 415         """
 416         Returns the number of times an ID is referenced as well as all elements
 417         that reference it.
 418
 419         Currently looks at fill, stroke, clip-path, mask, marker, and
 420         xlink:href attributes.
 421         """
 422         global referencingProps
 423         if ids is None:
 424                 ids = {}
 425         # TODO: input argument ids is clunky here (see below how it is called)
 426         # GZ: alternative to passing dict, use **kwargs
 427
 428         # if this node is a style element, parse its text into CSS
 429         if node.nodeName == 'style' and node.namespaceURI == NS['SVG']:
 430                 # node.firstChild will be either a CDATA or a Text node
 431                 if node.firstChild != None:
 432                         cssRules = parseCssString(node.firstChild.nodeValue)
 433                         for rule in cssRules:
 434                                 for propname in rule['properties']:
 435                                         propval = rule['properties'][propname]
 436                                         findReferencingProperty(node, propname, propval, ids)
 437                 return ids
 438
 439         # else if xlink:href is set, then grab the id
 440         href = node.getAttributeNS(NS['XLINK'],'href')
 441         if href != '' and len(href) > 1 and href[0] == '#':
 442                 # we remove the hash mark from the beginning of the id
 443                 id = href[1:]
 444                 if id in ids:
 445                         ids[id][0] += 1
 446                         ids[id][1].append(node)
 447                 else:
 448                         ids[id] = [1,[node]]
 449
 450         # now get all style properties and the fill, stroke, filter attributes
 451         styles = node.getAttribute('style').split(';')
 452         for attr in referencingProps:
 453                 styles.append(':'.join([attr, node.getAttribute(attr)]))
 454
 455         for style in styles:
 456                 propval = style.split(':')
 457                 if len(propval) == 2 :
 458                         prop = propval[0].strip()
 459                         val = propval[1].strip()
 460                         findReferencingProperty(node, prop, val, ids)
 461
 462         if node.hasChildNodes() :
 463                 for child in node.childNodes:
 464                         if child.nodeType == 1 :
 465                                 findReferencedElements(child, ids)
 466         return ids
 467
 468 def findReferencingProperty(node, prop, val, ids):
 469         global referencingProps
 470         if prop in referencingProps and val != '' :
 471                 if len(val) >= 7 and val[0:5] == 'url(#' :
 472                         id = val[5:val.find(')')]
 473                         if ids.has_key(id) :
 474                                 ids[id][0] += 1
 475                                 ids[id][1].append(node)
 476                         else:
 477                                 ids[id] = [1,[node]]
 478                 # if the url has a quote in it, we need to compensate
 479                 elif len(val) >= 8 :
 480                         id = None
 481                         # double-quote
 482                         if val[0:6] == 'url("#' :
 483                                 id = val[6:val.find('")')]
 484                         # single-quote
 485                         elif val[0:6] == "url('#" :
 486                                 id = val[6:val.find("')")]
 487                         if id != None:
 488                                 if ids.has_key(id) :
 489                                         ids[id][0] += 1
 490                                         ids[id][1].append(node)
 491                                 else:
 492                                         ids[id] = [1,[node]]
 493
 494 numIDsRemoved = 0
 495 numElemsRemoved = 0
 496 numAttrsRemoved = 0
 497 numRastersEmbedded = 0
 498 numPathSegmentsReduced = 0
 499 numCurvesStraightened = 0
 500 numBytesSavedInPathData = 0
 501 numBytesSavedInColors = 0
 502 numPointsRemovedFromPolygon = 0
 503
 504 def removeUnusedDefs(doc, defElem, elemsToRemove=None):
 505         if elemsToRemove is None:
 506                 elemsToRemove = []
 507
 508         identifiedElements = findElementsWithId(doc.documentElement)
 509         referencedIDs = findReferencedElements(doc.documentElement)
 510
 511         keepTags = ['font', 'style', 'metadata', 'script', 'title', 'desc']
 512         for elem in defElem.childNodes:
 513                 # only look at it if an element and not referenced anywhere else
 514                 if elem.nodeType == 1 and (elem.getAttribute('id') == '' or \
 515                                 (not elem.getAttribute('id') in referencedIDs)):
 516
 517                         # we only inspect the children of a group in a defs if the group
 518                         # is not referenced anywhere else
 519                         if elem.nodeName == 'g' and elem.namespaceURI == NS['SVG']:
 520                                 elemsToRemove = removeUnusedDefs(doc, elem, elemsToRemove)
 521                         # we only remove if it is not one of our tags we always keep (see above)
 522                         elif not elem.nodeName in keepTags:
 523                                 elemsToRemove.append(elem)
 524         return elemsToRemove
 525
 526 def removeUnreferencedElements(doc):
 527         """
 528         Removes all unreferenced elements except for <svg>, <font>, <metadata>, <title>, and <desc>.
 529         Also vacuums the defs of any non-referenced renderable elements.
 530
 531         Returns the number of unreferenced elements removed from the document.
 532         """
 533         global numElemsRemoved
 534         num = 0
 535         removeTags = ['linearGradient', 'radialGradient', 'pattern']
 536
 537         identifiedElements = findElementsWithId(doc.documentElement)
 538         referencedIDs = findReferencedElements(doc.documentElement)
 539
 540         for id in identifiedElements:
 541                 if not id in referencedIDs:
 542                         goner = findElementById(doc.documentElement, id)
 543                         if goner != None and goner.parentNode != None and goner.nodeName in removeTags:
 544                                 goner.parentNode.removeChild(goner)
 545                                 num += 1
 546                                 numElemsRemoved += 1
 547
 548         # TODO: should also go through defs and vacuum it
 549         num = 0
 550         defs = doc.documentElement.getElementsByTagName('defs')
 551         for aDef in defs:
 552                 elemsToRemove = removeUnusedDefs(doc, aDef)
 553                 for elem in elemsToRemove:
 554                         elem.parentNode.removeChild(elem)
 555                         numElemsRemoved += 1
 556                         num += 1
 557         return num
 558
 559 def removeUnreferencedIDs(referencedIDs, identifiedElements):
 560         """
 561         Removes the unreferenced ID attributes.
 562
 563         Returns the number of ID attributes removed
 564         """
 565         global numIDsRemoved
 566         keepTags = ['font']
 567         num = 0;
 568         for id in identifiedElements.keys():
 569                 node = identifiedElements[id]
 570                 if referencedIDs.has_key(id) == False and not node.nodeName in keepTags:
 571                         node.removeAttribute('id')
 572                         numIDsRemoved += 1
 573                         num += 1
 574         return num
 575
 576 def removeNamespacedAttributes(node, namespaces):
 577         global numAttrsRemoved
 578         num = 0
 579         if node.nodeType == 1 :
 580                 # remove all namespace'd attributes from this element
 581                 attrList = node.attributes
 582                 attrsToRemove = []
 583                 for attrNum in range(attrList.length):
 584                         attr = attrList.item(attrNum)
 585                         if attr != None and attr.namespaceURI in namespaces:
 586                                 attrsToRemove.append(attr.nodeName)
 587                 for attrName in attrsToRemove :
 588                         num += 1
 589                         numAttrsRemoved += 1
 590                         node.removeAttribute(attrName)
 591
 592                 # now recurse for children
 593                 for child in node.childNodes:
 594                         num += removeNamespacedAttributes(child, namespaces)
 595         return num
 596
 597 def removeNamespacedElements(node, namespaces):
 598         global numElemsRemoved
 599         num = 0
 600         if node.nodeType == 1 :
 601                 # remove all namespace'd child nodes from this element
 602                 childList = node.childNodes
 603                 childrenToRemove = []
 604                 for child in childList:
 605                         if child != None and child.namespaceURI in namespaces:
 606                                 childrenToRemove.append(child)
 607                 for child in childrenToRemove :
 608                         num += 1
 609                         numElemsRemoved += 1
 610                         node.removeChild(child)
 611
 612                 # now recurse for children
 613                 for child in node.childNodes:
 614                         num += removeNamespacedElements(child, namespaces)
 615         return num
 616
 617 def removeNestedGroups(node):
 618         """
 619         This walks further and further down the tree, removing groups
 620         which do not have any attributes or a title/desc child and
 621         promoting their children up one level
 622         """
 623         global numElemsRemoved
 624         num = 0
 625
 626         groupsToRemove = []
 627         for child in node.childNodes:
 628                 if child.nodeName == 'g' and child.namespaceURI == NS['SVG'] and len(child.attributes) == 0:
 629                         # only collapse group if it does not have a title or desc as a direct descendant
 630                         for grandchild in child.childNodes:
 631                                 if grandchild.nodeType == 1 and grandchild.namespaceURI == NS['SVG'] and \
 632                                                 grandchild.nodeName in ['title','desc']:
 633                                         break
 634                         else:
 635                                 groupsToRemove.append(child)
 636
 637         for g in groupsToRemove:
 638                 while g.childNodes.length > 0:
 639                         g.parentNode.insertBefore(g.firstChild, g)
 640                 g.parentNode.removeChild(g)
 641                 numElemsRemoved += 1
 642                 num += 1
 643
 644         # now recurse for children
 645         for child in node.childNodes:
 646                 if child.nodeType == 1:
 647                         num += removeNestedGroups(child)
 648         return num
 649
 650 def moveCommonAttributesToParentGroup(elem):
 651         """
 652         This recursively calls this function on all children of the passed in element
 653         and then iterates over all child elements and removes common inheritable attributes
 654         from the children and places them in the parent group.  But only if the parent contains
 655         nothing but element children and whitespace.
 656         """
 657         num = 0
 658
 659         childElements = []
 660         # recurse first into the children (depth-first)
 661         for child in elem.childNodes:
 662                 if child.nodeType == 1:
 663                         childElements.append(child)
 664                         num += moveCommonAttributesToParentGroup(child)
 665                 # else if the parent has non-whitespace text children, do not
 666                 # try to move common attributes
 667                 elif child.nodeType == 3 and child.nodeValue.strip():
 668                         return num
 669
 670         # only process the children if there are more than one element
 671         if len(childElements) <= 1: return num
 672
 673         commonAttrs = {}
 674         # add all inheritable properties of the first child element
 675         # FIXME: Note there is a chance that the first child is a set/animate in which case
 676         # its fill attribute is not what we want to look at, we should look for the first
 677         # non-animate/set element
 678         attrList = childElements[0].attributes
 679         for num in range(attrList.length):
 680                 attr = attrList.item(num)
 681                 # this is most of the inheritable properties from http://www.w3.org/TR/SVG11/propidx.html
 682                 # and http://www.w3.org/TR/SVGTiny12/attributeTable.html
 683                 if attr.nodeName in ['clip-rule',
 684                                         'display-align',
 685                                         'fill', 'fill-opacity', 'fill-rule',
 686                                         'font', 'font-family', 'font-size', 'font-size-adjust', 'font-stretch',
 687                                         'font-style', 'font-variant', 'font-weight',
 688                                         'letter-spacing',
 689                                         'pointer-events', 'shape-rendering',
 690                                         'stroke', 'stroke-dasharray', 'stroke-dashoffset', 'stroke-linecap', 'stroke-linejoin',
 691                                         'stroke-miterlimit', 'stroke-opacity', 'stroke-width',
 692                                         'text-anchor', 'text-decoration', 'text-rendering', 'visibility',
 693                                         'word-spacing', 'writing-mode']:
 694                         # we just add all the attributes from the first child
 695                         commonAttrs[attr.nodeName] = attr.nodeValue
 696
 697         # for each subsequent child element
 698         for childNum in range(len(childElements)):
 699                 # skip first child
 700                 if childNum == 0:
 701                         continue
 702
 703                 child = childElements[childNum]
 704                 # if we are on an animateXXX/set element, ignore it (due to the 'fill' attribute)
 705                 if child.localName in ['set', 'animate', 'animateColor', 'animateTransform', 'animateMotion']:
 706                         continue
 707
 708                 distinctAttrs = []
 709                 # loop through all current 'common' attributes
 710                 for name in commonAttrs.keys():
 711                         # if this child doesn't match that attribute, schedule it for removal
 712                         if child.getAttribute(name) != commonAttrs[name]:
 713                                 distinctAttrs.append(name)
 714                 # remove those attributes which are not common
 715                 for name in distinctAttrs:
 716                         del commonAttrs[name]
 717
 718         # commonAttrs now has all the inheritable attributes which are common among all child elements
 719         for name in commonAttrs.keys():
 720                 for child in childElements:
 721                         child.removeAttribute(name)
 722                 elem.setAttribute(name, commonAttrs[name])
 723
 724         # update our statistic (we remove N*M attributes and add back in M attributes)
 725         num += (len(childElements)-1) * len(commonAttrs)
 726         return num
 727
 728 def removeUnusedAttributesOnParent(elem):
 729         """
 730         This recursively calls this function on all children of the element passed in,
 731         then removes any unused attributes on this elem if none of the children inherit it
 732         """
 733         num = 0
 734
 735         childElements = []
 736         # recurse first into the children (depth-first)
 737         for child in elem.childNodes:
 738                 if child.nodeType == 1:
 739                         childElements.append(child)
 740                         num += removeUnusedAttributesOnParent(child)
 741
 742         # only process the children if there are more than one element
 743         if len(childElements) <= 1: return num
 744
 745         # get all attribute values on this parent
 746         attrList = elem.attributes
 747         unusedAttrs = {}
 748         for num in range(attrList.length):
 749                 attr = attrList.item(num)
 750                 if attr.nodeName in ['clip-rule',
 751                                         'display-align',
 752                                         'fill', 'fill-opacity', 'fill-rule',
 753                                         'font', 'font-family', 'font-size', 'font-size-adjust', 'font-stretch',
 754                                         'font-style', 'font-variant', 'font-weight',
 755                                         'letter-spacing',
 756                                         'pointer-events', 'shape-rendering',
 757                                         'stroke', 'stroke-dasharray', 'stroke-dashoffset', 'stroke-linecap', 'stroke-linejoin',
 758                                         'stroke-miterlimit', 'stroke-opacity', 'stroke-width',
 759                                         'text-anchor', 'text-decoration', 'text-rendering', 'visibility',
 760                                         'word-spacing', 'writing-mode']:
 761                         unusedAttrs[attr.nodeName] = attr.nodeValue
 762
 763         # for each child, if at least one child inherits the parent's attribute, then remove
 764         for childNum in range(len(childElements)):
 765                 child = childElements[childNum]
 766                 inheritedAttrs = []
 767                 for name in unusedAttrs.keys():
 768                         val = child.getAttribute(name)
 769                         if val == '' or val == None or val == 'inherit':
 770                                 inheritedAttrs.append(name)
 771                 for a in inheritedAttrs:
 772                         del unusedAttrs[a]
 773
 774         # unusedAttrs now has all the parent attributes that are unused
 775         for name in unusedAttrs.keys():
 776                 elem.removeAttribute(name)
 777                 num += 1
 778
 779         return num
 780
 781 def removeDuplicateGradientStops(doc):
 782         global numElemsRemoved
 783         num = 0
 784
 785         for gradType in ['linearGradient', 'radialGradient']:
 786                 for grad in doc.getElementsByTagName(gradType):
 787                         stops = {}
 788                         stopsToRemove = []
 789                         for stop in grad.getElementsByTagName('stop'):
 790                                 # convert percentages into a floating point number
 791                                 offsetU = SVGLength(stop.getAttribute('offset'))
 792                                 if offsetU.units == Unit.PCT:
 793                                         offset = offsetU.value / 100.0
 794                                 elif offsetU.units == Unit.NONE:
 795                                         offset = offsetU.value
 796                                 else:
 797                                         offset = 0
 798                                 # set the stop offset value to the integer or floating point equivalent
 799                                 if int(offset) == offset: stop.setAttribute('offset', str(int(offset)))
 800                                 else: stop.setAttribute('offset', str(offset))
 801
 802                                 color = stop.getAttribute('stop-color')
 803                                 opacity = stop.getAttribute('stop-opacity')
 804                                 if stops.has_key(offset) :
 805                                         oldStop = stops[offset]
 806                                         if oldStop[0] == color and oldStop[1] == opacity:
 807                                                 stopsToRemove.append(stop)
 808                                 stops[offset] = [color, opacity]
 809
 810                         for stop in stopsToRemove:
 811                                 stop.parentNode.removeChild(stop)
 812                                 num += 1
 813                                 numElemsRemoved += 1
 814
 815         # linear gradients
 816         return num
 817
 818 def collapseSinglyReferencedGradients(doc):
 819         global numElemsRemoved
 820         num = 0
 821
 822         # make sure to reset the ref'ed ids for when we are running this in testscour
 823         for rid,nodeCount in findReferencedElements(doc.documentElement).iteritems():
 824                 count = nodeCount[0]
 825                 nodes = nodeCount[1]
 826                 if count == 1:
 827                         elem = findElementById(doc.documentElement,rid)
 828                         if elem != None and elem.nodeType == 1 and elem.nodeName in ['linearGradient', 'radialGradient'] \
 829                                         and elem.namespaceURI == NS['SVG']:
 830                                 # found a gradient that is referenced by only 1 other element
 831                                 refElem = nodes[0]
 832                                 if refElem.nodeType == 1 and refElem.nodeName in ['linearGradient', 'radialGradient'] \
 833                                                 and refElem.namespaceURI == NS['SVG']:
 834                                         # elem is a gradient referenced by only one other gradient (refElem)
 835
 836                                         # add the stops to the referencing gradient (this removes them from elem)
 837                                         if len(refElem.getElementsByTagName('stop')) == 0:
 838                                                 stopsToAdd = elem.getElementsByTagName('stop')
 839                                                 for stop in stopsToAdd:
 840                                                         refElem.appendChild(stop)
 841
 842                                         # adopt the gradientUnits, spreadMethod,  gradientTransform attributes if
 843                                         # they are unspecified on refElem
 844                                         for attr in ['gradientUnits','spreadMethod','gradientTransform']:
 845                                                 if refElem.getAttribute(attr) == '' and not elem.getAttribute(attr) == '':
 846                                                         refElem.setAttributeNS(None, attr, elem.getAttribute(attr))
 847
 848                                         # if both are radialGradients, adopt elem's fx,fy,cx,cy,r attributes if
 849                                         # they are unspecified on refElem
 850                                         if elem.nodeName == 'radialGradient' and refElem.nodeName == 'radialGradient':
 851                                                 for attr in ['fx','fy','cx','cy','r']:
 852                                                         if refElem.getAttribute(attr) == '' and not elem.getAttribute(attr) == '':
 853                                                                 refElem.setAttributeNS(None, attr, elem.getAttribute(attr))
 854
 855                                         # if both are linearGradients, adopt elem's x1,y1,x2,y2 attributes if
 856                                         # they are unspecified on refElem
 857                                         if elem.nodeName == 'linearGradient' and refElem.nodeName == 'linearGradient':
 858                                                 for attr in ['x1','y1','x2','y2']:
 859                                                         if refElem.getAttribute(attr) == '' and not elem.getAttribute(attr) == '':
 860                                                                 refElem.setAttributeNS(None, attr, elem.getAttribute(attr))
 861
 862                                         # now remove the xlink:href from refElem
 863                                         refElem.removeAttributeNS(NS['XLINK'], 'href')
 864
 865                                         # now delete elem
 866                                         elem.parentNode.removeChild(elem)
 867                                         numElemsRemoved += 1
 868                                         num += 1
 869         return num
 870
 871 def removeDuplicateGradients(doc):
 872         global numElemsRemoved
 873         num = 0
 874
 875         gradientsToRemove = {}
 876         duplicateToMaster = {}
 877
 878         for gradType in ['linearGradient', 'radialGradient']:
 879                 grads = doc.getElementsByTagName(gradType)
 880                 for grad in grads:
 881                         # TODO: should slice grads from 'grad' here to optimize
 882                         for ograd in grads:
 883                                 # do not compare gradient to itself
 884                                 if grad == ograd: continue
 885
 886                                 # compare grad to ograd (all properties, then all stops)
 887                                 # if attributes do not match, go to next gradient
 888                                 someGradAttrsDoNotMatch = False
 889                                 for attr in ['gradientUnits','spreadMethod','gradientTransform','x1','y1','x2','y2','cx','cy','fx','fy','r']:
 890                                         if grad.getAttribute(attr) != ograd.getAttribute(attr):
 891                                                 someGradAttrsDoNotMatch = True
 892                                                 break;
 893
 894                                 if someGradAttrsDoNotMatch: continue
 895
 896                                 # compare xlink:href values too
 897                                 if grad.getAttributeNS(NS['XLINK'], 'href') != ograd.getAttributeNS(NS['XLINK'], 'href'):
 898                                         continue
 899
 900                                 # all gradient properties match, now time to compare stops
 901                                 stops = grad.getElementsByTagName('stop')
 902                                 ostops = ograd.getElementsByTagName('stop')
 903
 904                                 if stops.length != ostops.length: continue
 905
 906                                 # now compare stops
 907                                 stopsNotEqual = False
 908                                 for i in range(stops.length):
 909                                         if stopsNotEqual: break
 910                                         stop = stops.item(i)
 911                                         ostop = ostops.item(i)
 912                                         for attr in ['offset', 'stop-color', 'stop-opacity']:
 913                                                 if stop.getAttribute(attr) != ostop.getAttribute(attr):
 914                                                         stopsNotEqual = True
 915                                                         break
 916                                 if stopsNotEqual: continue
 917
 918                                 # ograd is a duplicate of grad, we schedule it to be removed UNLESS
 919                                 # ograd is ALREADY considered a 'master' element
 920                                 if not gradientsToRemove.has_key(ograd):
 921                                         if not duplicateToMaster.has_key(ograd):
 922                                                 if not gradientsToRemove.has_key(grad):
 923                                                         gradientsToRemove[grad] = []
 924                                                 gradientsToRemove[grad].append( ograd )
 925                                                 duplicateToMaster[ograd] = grad
 926
 927         # get a collection of all elements that are referenced and their referencing elements
 928         referencedIDs = findReferencedElements(doc.documentElement)
 929         for masterGrad in gradientsToRemove.keys():
 930                 master_id = masterGrad.getAttribute('id')
 931 #               print 'master='+master_id
 932                 for dupGrad in gradientsToRemove[masterGrad]:
 933                         # if the duplicate gradient no longer has a parent that means it was
 934                         # already re-mapped to another master gradient
 935                         if not dupGrad.parentNode: continue
 936                         dup_id = dupGrad.getAttribute('id')
 937 #                       print 'dup='+dup_id
 938 #                       print referencedIDs[dup_id]
 939                         # for each element that referenced the gradient we are going to remove
 940                         for elem in referencedIDs[dup_id][1]:
 941                                 # find out which attribute referenced the duplicate gradient
 942                                 for attr in ['fill', 'stroke']:
 943                                         v = elem.getAttribute(attr)
 944                                         if v == 'url(#'+dup_id+')' or v == 'url("#'+dup_id+'")' or v == "url('#"+dup_id+"')":
 945                                                 elem.setAttribute(attr, 'url(#'+master_id+')')
 946                                 if elem.getAttributeNS(NS['XLINK'], 'href') == '#'+dup_id:
 947                                         elem.setAttributeNS(NS['XLINK'], 'href', '#'+master_id)
 948
 949                         # now that all referencing elements have been re-mapped to the master
 950                         # it is safe to remove this gradient from the document
 951                         dupGrad.parentNode.removeChild(dupGrad)
 952                         numElemsRemoved += 1
 953                         num += 1
 954         return num
 955
 956 def repairStyle(node, options):
 957         num = 0
 958         if node.nodeType == 1 and len(node.getAttribute('style')) > 0 :
 959                 # get all style properties and stuff them into a dictionary
 960                 styleMap = { }
 961                 rawStyles = node.getAttribute('style').split(';')
 962                 for style in rawStyles:
 963                         propval = style.split(':')
 964                         if len(propval) == 2 :
 965                                 styleMap[propval[0].strip()] = propval[1].strip()
 966
 967                 # I've seen this enough to know that I need to correct it:
 968                 # fill: url(#linearGradient4918) rgb(0, 0, 0);
 969                 for prop in ['fill', 'stroke'] :
 970                         if styleMap.has_key(prop) :
 971                                 chunk = styleMap[prop].split(') ')
 972                                 if len(chunk) == 2 and (chunk[0][:5] == 'url(#' or chunk[0][:6] == 'url("#' or chunk[0][:6] == "url('#") and chunk[1] == 'rgb(0, 0, 0)' :
 973                                         styleMap[prop] = chunk[0] + ')'
 974                                         num += 1
 975
 976                 # Here is where we can weed out unnecessary styles like:
 977                 #  opacity:1
 978                 if styleMap.has_key('opacity') :
 979                         opacity = float(styleMap['opacity'])
 980                         # opacity='1.0' is useless, remove it
 981                         if opacity == 1.0 :
 982                                 del styleMap['opacity']
 983                                 num += 1
 984
 985                         # if opacity='0' then all fill and stroke properties are useless, remove them
 986                         elif opacity == 0.0 :
 987                                 for uselessStyle in ['fill', 'fill-opacity', 'fill-rule', 'stroke', 'stroke-linejoin',
 988                                         'stroke-opacity', 'stroke-miterlimit', 'stroke-linecap', 'stroke-dasharray',
 989                                         'stroke-dashoffset', 'stroke-opacity'] :
 990                                         if styleMap.has_key(uselessStyle):
 991                                                 del styleMap[uselessStyle]
 992                                                 num += 1
 993
 994                 #  if stroke:none, then remove all stroke-related properties (stroke-width, etc)
 995                 #  TODO: should also detect if the computed value of this element is stroke="none"
 996                 if styleMap.has_key('stroke') and styleMap['stroke'] == 'none' :
 997                         for strokestyle in [ 'stroke-width', 'stroke-linejoin', 'stroke-miterlimit',
 998                                         'stroke-linecap', 'stroke-dasharray', 'stroke-dashoffset', 'stroke-opacity'] :
 999                                 if styleMap.has_key(strokestyle) :
1000                                         del styleMap[strokestyle]
1001                                         num += 1
1002                         # TODO: This is actually a problem if a parent element has a specified stroke
1003                         # we need to properly calculate computed values
1004                         del styleMap['stroke']
1005
1006                 #  if fill:none, then remove all fill-related properties (fill-rule, etc)
1007                 if styleMap.has_key('fill') and styleMap['fill'] == 'none' :
1008                         for fillstyle in [ 'fill-rule', 'fill-opacity' ] :
1009                                 if styleMap.has_key(fillstyle) :
1010                                         del styleMap[fillstyle]
1011                                         num += 1
1012
1013                 #  stop-opacity: 1
1014                 if styleMap.has_key('stop-opacity') :
1015                         if float(styleMap['stop-opacity']) == 1.0 :
1016                                 del styleMap['stop-opacity']
1017                                 num += 1
1018
1019                 #  fill-opacity: 1 or 0
1020                 if styleMap.has_key('fill-opacity') :
1021                         fillOpacity = float(styleMap['fill-opacity'])
1022                         #  TODO: This is actually a problem if the parent element does not have fill-opacity=1
1023                         if fillOpacity == 1.0 :
1024                                 del styleMap['fill-opacity']
1025                                 num += 1
1026                         elif fillOpacity == 0.0 :
1027                                 for uselessFillStyle in [ 'fill', 'fill-rule' ] :
1028                                         if styleMap.has_key(uselessFillStyle):
1029                                                 del styleMap[uselessFillStyle]
1030                                                 num += 1
1031
1032                 #  stroke-opacity: 1 or 0
1033                 if styleMap.has_key('stroke-opacity') :
1034                         strokeOpacity = float(styleMap['stroke-opacity'])
1035                         #  TODO: This is actually a problem if the parent element does not have stroke-opacity=1
1036                         if strokeOpacity == 1.0 :
1037                                 del styleMap['stroke-opacity']
1038                                 num += 1
1039                         elif strokeOpacity == 0.0 :
1040                                 for uselessStrokeStyle in [ 'stroke', 'stroke-width', 'stroke-linejoin', 'stroke-linecap',
1041                                                         'stroke-dasharray', 'stroke-dashoffset' ] :
1042                                         if styleMap.has_key(uselessStrokeStyle):
1043                                                 del styleMap[uselessStrokeStyle]
1044                                                 num += 1
1045
1046                 # stroke-width: 0
1047                 if styleMap.has_key('stroke-width') :
1048                         strokeWidth = getSVGLength(styleMap['stroke-width'])
1049                         if strokeWidth == 0.0 :
1050                                 for uselessStrokeStyle in [ 'stroke', 'stroke-linejoin', 'stroke-linecap',
1051                                                         'stroke-dasharray', 'stroke-dashoffset', 'stroke-opacity' ] :
1052                                         if styleMap.has_key(uselessStrokeStyle):
1053                                                 del styleMap[uselessStrokeStyle]
1054                                                 num += 1
1055
1056                 # remove font properties for non-text elements
1057                 # I've actually observed this in real SVG content
1058                 if node.nodeName in ['rect', 'circle', 'ellipse', 'line', 'polyline', 'polygon', 'path']:
1059                         for fontstyle in [ 'font-family', 'font-size', 'font-stretch', 'font-size-adjust',
1060                                                                 'font-style', 'font-variant', 'font-weight',
1061                                                                 'letter-spacing', 'line-height', 'kerning',
1062                                                                 'text-anchor', 'text-decoration', 'text-rendering',
1063                                                                 'unicode-bidi', 'word-spacing', 'writing-mode'] :
1064                                 if styleMap.has_key(fontstyle) :
1065                                         del styleMap[fontstyle]
1066                                         num += 1
1067
1068                 # remove inkscape-specific styles
1069                 # TODO: need to get a full list of these
1070                 for inkscapeStyle in ['-inkscape-font-specification']:
1071                         if styleMap.has_key(inkscapeStyle):
1072                                 del styleMap[inkscapeStyle]
1073                                 num += 1
1074
1075                 # visibility: visible
1076                 if styleMap.has_key('visibility') :
1077                         if styleMap['visibility'] == 'visible':
1078                                 del styleMap['visibility']
1079                                 num += 1
1080
1081                 # display: inline
1082                 if styleMap.has_key('display') :
1083                         if styleMap['display'] == 'inline':
1084                                 del styleMap['display']
1085                                 num += 1
1086
1087                 # overflow: visible or overflow specified on element other than svg, marker, pattern
1088                 if styleMap.has_key('overflow') :
1089                         if styleMap['overflow'] == 'visible' or node.nodeName in ['svg','marker','pattern']:
1090                                 del styleMap['overflow']
1091                                 num += 1
1092
1093                 # marker: none
1094                 if styleMap.has_key('marker') :
1095                         if styleMap['marker'] == 'none':
1096                                 del styleMap['marker']
1097                                 num += 1
1098
1099                 # now if any of the properties match known SVG attributes we prefer attributes
1100                 # over style so emit them and remove them from the style map
1101                 if options.style_to_xml:
1102                         for propName in styleMap.keys() :
1103                                 if propName in svgAttributes :
1104                                         node.setAttribute(propName, styleMap[propName])
1105                                         del styleMap[propName]
1106
1107                 # sew our remaining style properties back together into a style attribute
1108                 fixedStyle = ''
1109                 for prop in styleMap.keys() :
1110                         fixedStyle += prop + ':' + styleMap[prop] + ';'
1111
1112                 if fixedStyle != '' :
1113                         node.setAttribute('style', fixedStyle)
1114                 else:
1115                         node.removeAttribute('style')
1116
1117         # recurse for our child elements
1118         for child in node.childNodes :
1119                 num += repairStyle(child,options)
1120
1121         return num
1122
1123 def removeDefaultAttributeValues(node, options):
1124         num = 0
1125         if node.nodeType != 1: return 0
1126
1127         # gradientUnits: objectBoundingBox
1128         if node.getAttribute('gradientUnits') == 'objectBoundingBox':
1129                 node.removeAttribute('gradientUnits')
1130                 num += 1
1131
1132         # spreadMethod: pad
1133         if node.getAttribute('spreadMethod') == 'pad':
1134                 node.removeAttribute('spreadMethod')
1135                 num += 1
1136
1137         # x1: 0%
1138         if node.getAttribute('x1') != '':
1139                 x1 = SVGLength(node.getAttribute('x1'))
1140                 if x1.value == 0:
1141                         node.removeAttribute('x1')
1142                         num += 1
1143
1144         # y1: 0%
1145         if node.getAttribute('y1') != '':
1146                 y1 = SVGLength(node.getAttribute('y1'))
1147                 if y1.value == 0:
1148                         node.removeAttribute('y1')
1149                         num += 1
1150
1151         # x2: 100%
1152         if node.getAttribute('x2') != '':
1153                 x2 = SVGLength(node.getAttribute('x2'))
1154                 if (x2.value == 100 and x2.units == Unit.PCT) or (x2.value == 1 and x2.units == Unit.NONE):
1155                         node.removeAttribute('x2')
1156                         num += 1
1157
1158         # y2: 0%
1159         if node.getAttribute('y2') != '':
1160                 y2 = SVGLength(node.getAttribute('y2'))
1161                 if y2.value == 0:
1162                         node.removeAttribute('y2')
1163                         num += 1
1164
1165         # fx: equal to rx
1166         if node.getAttribute('fx') != '':
1167                 if node.getAttribute('fx') == node.getAttribute('cx'):
1168                         node.removeAttribute('fx')
1169                         num += 1
1170
1171         # fy: equal to ry
1172         if node.getAttribute('fy') != '':
1173                 if node.getAttribute('fy') == node.getAttribute('cy'):
1174                         node.removeAttribute('fy')
1175                         num += 1
1176
1177         # cx: 50%
1178         if node.getAttribute('cx') != '':
1179                 cx = SVGLength(node.getAttribute('cx'))
1180                 if (cx.value == 50 and cx.units == Unit.PCT) or (cx.value == 0.5 and cx.units == Unit.NONE):
1181                         node.removeAttribute('cx')
1182                         num += 1
1183
1184         # cy: 50%
1185         if node.getAttribute('cy') != '':
1186                 cy = SVGLength(node.getAttribute('cy'))
1187                 if (cy.value == 50 and cy.units == Unit.PCT) or (cy.value == 0.5 and cy.units == Unit.NONE):
1188                         node.removeAttribute('cy')
1189                         num += 1
1190
1191         # r: 50%
1192         if node.getAttribute('r') != '':
1193                 r = SVGLength(node.getAttribute('r'))
1194                 if (r.value == 50 and r.units == Unit.PCT) or (r.value == 0.5 and r.units == Unit.NONE):
1195                         node.removeAttribute('r')
1196                         num += 1
1197
1198         # recurse for our child elements
1199         for child in node.childNodes :
1200                 num += removeDefaultAttributeValues(child,options)
1201
1202         return num
1203
1204 rgb = re.compile("\\s*rgb\\(\\s*(\\d+)\\s*\\,\\s*(\\d+)\\s*\\,\\s*(\\d+)\\s*\\)\\s*")
1205 rgbp = re.compile("\\s*rgb\\(\\s*(\\d*\\.?\\d+)\\%\\s*\\,\\s*(\\d*\\.?\\d+)\\%\\s*\\,\\s*(\\d*\\.?\\d+)\\%\\s*\\)\\s*")
1206 def convertColor(value):
1207         """
1208                 Converts the input color string and returns a #RRGGBB (or #RGB if possible) string
1209         """
1210         s = value
1211
1212         if s in colors.keys():
1213                 s = colors[s]
1214
1215         rgbpMatch = rgbp.match(s)
1216         if rgbpMatch != None :
1217                 r = int(float(rgbpMatch.group(1)) * 255.0 / 100.0)
1218                 g = int(float(rgbpMatch.group(2)) * 255.0 / 100.0)
1219                 b = int(float(rgbpMatch.group(3)) * 255.0 / 100.0)
1220                 s  = 'rgb(%d,%d,%d)' % (r,g,b)
1221
1222         rgbMatch = rgb.match(s)
1223         if rgbMatch != None :
1224                 r = hex( int( rgbMatch.group(1) ) )[2:].upper()
1225                 g = hex( int( rgbMatch.group(2) ) )[2:].upper()
1226                 b = hex( int( rgbMatch.group(3) ) )[2:].upper()
1227                 if len(r) == 1: r='0'+r
1228                 if len(g) == 1: g='0'+g
1229                 if len(b) == 1: b='0'+b
1230                 s = '#'+r+g+b
1231
1232         if s[0] == '#' and len(s)==7 and s[1]==s[2] and s[3]==s[4] and s[5]==s[6]:
1233                 s = s.upper()
1234                 s = '#'+s[1]+s[3]+s[5]
1235
1236         return s
1237
1238 def convertColors(element) :
1239         """
1240                 Recursively converts all color properties into #RRGGBB format if shorter
1241         """
1242         numBytes = 0
1243
1244         if element.nodeType != 1: return 0
1245
1246         # set up list of color attributes for each element type
1247         attrsToConvert = []
1248         if element.nodeName in ['rect', 'circle', 'ellipse', 'polygon', \
1249                                                         'line', 'polyline', 'path', 'g', 'a']:
1250                 attrsToConvert = ['fill', 'stroke']
1251         elif element.nodeName in ['stop']:
1252                 attrsToConvert = ['stop-color']
1253         elif element.nodeName in ['solidColor']:
1254                 attrsToConvert = ['solid-color']
1255
1256         # now convert all the color formats
1257         for attr in attrsToConvert:
1258                 oldColorValue = element.getAttribute(attr)
1259                 if oldColorValue != '':
1260                         newColorValue = convertColor(oldColorValue)
1261                         oldBytes = len(oldColorValue)
1262                         newBytes = len(newColorValue)
1263                         if oldBytes > newBytes:
1264                                 element.setAttribute(attr, newColorValue)
1265                                 numBytes += (oldBytes - len(element.getAttribute(attr)))
1266
1267         # now recurse for our child elements
1268         for child in element.childNodes :
1269                 numBytes += convertColors(child)
1270
1271         return numBytes
1272
1273 # TODO: go over what this method does and see if there is a way to optimize it
1274 # TODO: go over the performance of this method and see if I can save memory/speed by
1275 #       reusing data structures, etc
1276 def cleanPath(element) :
1277         """
1278                 Cleans the path string (d attribute) of the element
1279         """
1280         global numBytesSavedInPathData
1281         global numPathSegmentsReduced
1282         global numCurvesStraightened
1283
1284         # this gets the parser object from svg_regex.py
1285         oldPathStr = element.getAttribute('d')
1286         pathObj = svg_parser.parse(oldPathStr)
1287
1288         # however, this parser object has some ugliness in it (lists of tuples of tuples of
1289         # numbers and booleans).  we just need a list of (cmd,[numbers]):
1290         path = []
1291         for (cmd,dataset) in pathObj:
1292                 if cmd in ['M','m','L','l','T','t']:
1293                         # one or more tuples, each containing two numbers
1294                         nums = []
1295                         for t in dataset:
1296                                 # convert to a Decimal
1297                                 nums.append(Decimal(str(t[0])) * Decimal(1))
1298                                 nums.append(Decimal(str(t[1])) * Decimal(1))
1299
1300                         # only create this segment if it is not empty
1301                         if nums:
1302                                 path.append( (cmd, nums) )
1303
1304                 elif cmd in ['V','v','H','h']:
1305                         # one or more numbers
1306                         nums = []
1307                         for n in dataset:
1308                                 nums.append(Decimal(str(n)))
1309                         if nums:
1310                                 path.append( (cmd, nums) )
1311
1312                 elif cmd in ['C','c']:
1313                         # one or more tuples, each containing three tuples of two numbers each
1314                         nums = []
1315                         for t in dataset:
1316                                 for pair in t:
1317                                         nums.append(Decimal(str(pair[0])) * Decimal(1))
1318                                         nums.append(Decimal(str(pair[1])) * Decimal(1))
1319                         path.append( (cmd, nums) )
1320
1321                 elif cmd in ['S','s','Q','q']:
1322                         # one or more tuples, each containing two tuples of two numbers each
1323                         nums = []
1324                         for t in dataset:
1325                                 for pair in t:
1326                                         nums.append(Decimal(str(pair[0])) * Decimal(1))
1327                                         nums.append(Decimal(str(pair[1])) * Decimal(1))
1328                         path.append( (cmd, nums) )
1329
1330                 elif cmd in ['A','a']:
1331                         # one or more tuples, each containing a tuple of two numbers, a number, a boolean,
1332                         # another boolean, and a tuple of two numbers
1333                         nums = []
1334                         for t in dataset:
1335                                 nums.append( Decimal(str(t[0][0])) * Decimal(1) )
1336                                 nums.append( Decimal(str(t[0][1])) * Decimal(1) )
1337                                 nums.append( Decimal(str(t[1])) * Decimal(1))
1338
1339                                 if t[2]: nums.append( Decimal(1) )
1340                                 else: nums.append( Decimal(0) )
1341
1342                                 if t[3]: nums.append( Decimal(1) )
1343                                 else: nums.append( Decimal(0) )
1344
1345                                 nums.append( Decimal(str(t[4][0])) * Decimal(1) )
1346                                 nums.append( Decimal(str(t[4][1])) * Decimal(1) )
1347                         path.append( (cmd, nums) )
1348
1349                 elif cmd in ['Z','z']:
1350                         path.append( (cmd, []) )
1351
1352         # calculate the starting x,y coord for the second path command
1353         if len(path[0][1]) == 2:
1354                 (x,y) = path[0][1]
1355         else:
1356                 # we have a move and then 1 or more coords for lines
1357                 N = len(path[0][1])
1358                 if path[0][0] == 'M':
1359                         # take the last pair of coordinates for the starting point
1360                         x = path[0][1][N-2]
1361                         y = path[0][1][N-1]
1362                 else: # relative move, accumulate coordinates for the starting point
1363                         (x,y) = path[0][1][0],path[0][1][1]
1364                         n = 2
1365                         while n < N:
1366                                 x += path[0][1][n]
1367                                 y += path[0][1][n+1]
1368                                 n += 2
1369
1370         # now we have the starting point at x,y so let's save it
1371         (startx,starty) = (x,y)
1372
1373         # convert absolute coordinates into relative ones (start with the second subcommand
1374         # and leave the first M as absolute)
1375         newPath = [path[0]]
1376         for (cmd,data) in path[1:]:
1377                 i = 0
1378                 newCmd = cmd
1379                 newData = data
1380                 # adjust abs to rel
1381                 # only the A command has some values that we don't want to adjust (radii, rotation, flags)
1382                 if cmd == 'A':
1383                         newCmd = 'a'
1384                         newData = []
1385                         while i < len(data):
1386                                 newData.append(data[i])
1387                                 newData.append(data[i+1])
1388                                 newData.append(data[i+2])
1389                                 newData.append(data[i+3])
1390                                 newData.append(data[i+4])
1391                                 newData.append(data[i+5]-x)
1392                                 newData.append(data[i+6]-y)
1393                                 x = data[i+5]
1394                                 y = data[i+6]
1395                                 i += 7
1396                 elif cmd == 'a':
1397                         while i < len(data):
1398                                 x += data[i+5]
1399                                 y += data[i+6]
1400                                 i += 7
1401                 elif cmd == 'H':
1402                         newCmd = 'h'
1403                         newData = []
1404                         while i < len(data):
1405                                 newData.append(data[i]-x)
1406                                 x = data[i]
1407                                 i += 1
1408                 elif cmd == 'h':
1409                         while i < len(data):
1410                                 x += data[i]
1411                                 i += 1
1412                 elif cmd == 'V':
1413                         newCmd = 'v'
1414                         newData = []
1415                         while i < len(data):
1416                                 newData.append(data[i] - y)
1417                                 y = data[i]
1418                                 i += 1
1419                 elif cmd == 'v':
1420                         while i < len(data):
1421                                 y += data[i]
1422                                 i += 1
1423                 elif cmd in ['M']:
1424                         newCmd = cmd.lower()
1425                         newData = []
1426                         startx = data[0]
1427                         starty = data[1]
1428                         while i < len(data):
1429                                 newData.append( data[i] - x )
1430                                 newData.append( data[i+1] - y )
1431                                 x = data[i]
1432                                 y = data[i+1]
1433                                 i += 2
1434                 elif cmd in ['L','T']:
1435                         newCmd = cmd.lower()
1436                         newData = []
1437                         while i < len(data):
1438                                 newData.append( data[i] - x )
1439                                 newData.append( data[i+1] - y )
1440                                 x = data[i]
1441                                 y = data[i+1]
1442                                 i += 2
1443                 elif cmd in ['m']:
1444                         startx += data[0]
1445                         starty += data[1]
1446                         while i < len(data):
1447                                 x += data[i]
1448                                 y += data[i+1]
1449                                 i += 2
1450                 elif cmd in ['l','t']:
1451                         while i < len(data):
1452                                 x += data[i]
1453                                 y += data[i+1]
1454                                 i += 2
1455                 elif cmd in ['S','Q']:
1456                         newCmd = cmd.lower()
1457                         newData = []
1458                         while i < len(data):
1459                                 newData.append( data[i] - x )
1460                                 newData.append( data[i+1] - y )
1461                                 newData.append( data[i+2] - x )
1462                                 newData.append( data[i+3] - y )
1463                                 x = data[i+2]
1464                                 y = data[i+3]
1465                                 i += 4
1466                 elif cmd in ['s','q']:
1467                         while i < len(data):
1468                                 x += data[i+2]
1469                                 y += data[i+3]
1470                                 i += 4
1471                 elif cmd == 'C':
1472                         newCmd = 'c'
1473                         newData = []
1474                         while i < len(data):
1475                                 newData.append( data[i] - x )
1476                                 newData.append( data[i+1] - y )
1477                                 newData.append( data[i+2] - x )
1478                                 newData.append( data[i+3] - y )
1479                                 newData.append( data[i+4] - x )
1480                                 newData.append( data[i+5] - y )
1481                                 x = data[i+4]
1482                                 y = data[i+5]
1483                                 i += 6
1484                 elif cmd == 'c':
1485                         while i < len(data):
1486                                 x += data[i+4]
1487                                 y += data[i+5]
1488                                 i += 6
1489                 elif cmd in ['z','Z']:
1490                         x = startx
1491                         y = starty
1492                         newCmd = 'z'
1493                 newPath.append( (newCmd, newData) )
1494         path = newPath
1495
1496         # remove empty segments
1497         newPath = [path[0]]
1498         for (cmd,data) in path[1:]:
1499                 if cmd in ['m','l','t']:
1500                         newData = []
1501                         i = 0
1502                         while i < len(data):
1503                                 if data[i] != 0 or data[i+1] != 0:
1504                                         newData.append(data[i])
1505                                         newData.append(data[i+1])
1506                                 else:
1507                                         numPathSegmentsReduced += 1
1508                                 i += 2
1509                         if newData:
1510                                 newPath.append( (cmd,newData) )
1511                 elif cmd == 'c':
1512                         newData = []
1513                         i = 0
1514                         while i < len(data):
1515                                 if data[i+4] != 0 or data[i+5] != 0:
1516                                         newData.append(data[i])
1517                                         newData.append(data[i+1])
1518                                         newData.append(data[i+2])
1519                                         newData.append(data[i+3])
1520                                         newData.append(data[i+4])
1521                                         newData.append(data[i+5])
1522                                 else:
1523                                         numPathSegmentsReduced += 1
1524                                 i += 6
1525                         if newData:
1526                                 newPath.append( (cmd,newData) )
1527                 elif cmd == 'a':
1528                         newData = []
1529                         i = 0
1530                         while i < len(data):
1531                                 if data[i+5] != 0 or data[i+6] != 0:
1532                                         newData.append(data[i])
1533                                         newData.append(data[i+1])
1534                                         newData.append(data[i+2])
1535                                         newData.append(data[i+3])
1536                                         newData.append(data[i+4])
1537                                         newData.append(data[i+5])
1538                                         newData.append(data[i+6])
1539                                 else:
1540                                         numPathSegmentsReduced += 1
1541                                 i += 7
1542                         if newData:
1543                                 newPath.append( (cmd,newData) )
1544                 elif cmd == 'q':
1545                         newData = []
1546                         i = 0
1547                         while i < len(data):
1548                                 if data[i+2] != 0 or data[i+3] != 0:
1549                                         newData.append(data[i])
1550                                         newData.append(data[i+1])
1551                                         newData.append(data[i+2])
1552                                         newData.append(data[i+3])
1553                                 else:
1554                                         numPathSegmentsReduced += 1
1555                                 i += 4
1556                         if newData:
1557                                 newPath.append( (cmd,newData) )
1558                 elif cmd in ['h','v']:
1559                         newData = []
1560                         i = 0
1561                         while i < len(data):
1562                                 if data[i] != 0:
1563                                         newData.append(data[i])
1564                                 else:
1565                                         numPathSegmentsReduced += 1
1566                                 i += 1
1567                         if newData:
1568                                 newPath.append( (cmd,newData) )
1569                 else:
1570                         newPath.append( (cmd,data) )
1571         path = newPath
1572
1573         # convert straight curves into lines
1574         newPath = [path[0]]
1575         for (cmd,data) in path[1:]:
1576                 i = 0
1577                 newData = data
1578                 if cmd == 'c':
1579                         newData = []
1580                         while i < len(data):
1581                                 # since all commands are now relative, we can think of previous point as (0,0)
1582                                 # and new point (dx,dy) is (data[i+4],data[i+5])
1583                                 # eqn of line will be y = (dy/dx)*x or if dx=0 then eqn of line is x=0
1584                                 (p1x,p1y) = (data[i],data[i+1])
1585                                 (p2x,p2y) = (data[i+2],data[i+3])
1586                                 dx = data[i+4]
1587                                 dy = data[i+5]
1588
1589                                 foundStraightCurve = False
1590
1591                                 if dx == 0:
1592                                         if p1x == 0 and p2x == 0:
1593                                                 foundStraightCurve = True
1594                                 else:
1595                                         m = dy/dx
1596                                         if p1y == m*p1x and p2y == m*p2y:
1597                                                 foundStraightCurve = True
1598
1599                                 if foundStraightCurve:
1600                                         # flush any existing curve coords first
1601                                         if newData:
1602                                                 newPath.append( (cmd,newData) )
1603                                                 newData = []
1604                                         # now create a straight line segment
1605                                         newPath.append( ('l', [dx,dy]) )
1606                                         numCurvesStraightened += 1
1607                                 else:
1608                                         newData.append(data[i])
1609                                         newData.append(data[i+1])
1610                                         newData.append(data[i+2])
1611                                         newData.append(data[i+3])
1612                                         newData.append(data[i+4])
1613                                         newData.append(data[i+5])
1614
1615                                 i += 6
1616                 if newData or cmd == 'z' or cmd == 'Z':
1617                         newPath.append( (cmd,newData) )
1618         path = newPath
1619
1620         # collapse all consecutive commands of the same type into one command
1621         prevCmd = ''
1622         prevData = []
1623         newPath = [path[0]]
1624         for (cmd,data) in path[1:]:
1625                 # flush the previous command if it is not the same type as the current command
1626                 if prevCmd != '':
1627                         if cmd != prevCmd or cmd == 'm':
1628                                 newPath.append( (prevCmd, prevData) )
1629                                 prevCmd = ''
1630                                 prevData = []
1631
1632                 # if the previous and current commands are the same type, collapse
1633                 # but only if they are not move commands (since move can contain implicit lineto commands)
1634                 if cmd == prevCmd and cmd != 'm':
1635                         for coord in data:
1636                                 prevData.append(coord)
1637
1638                 # save last command and data
1639                 else:
1640                         prevCmd = cmd
1641                         prevData = data
1642         # flush last command and data
1643         if prevCmd != '':
1644                 newPath.append( (prevCmd, prevData) )
1645         path = newPath
1646
1647         # convert to shorthand path segments where possible
1648         newPath = [path[0]]
1649         for (cmd,data) in path[1:]:
1650                 # convert line segments into h,v where possible
1651                 if cmd == 'l':
1652                         i = 0
1653                         lineTuples = []
1654                         while i < len(data):
1655                                 if data[i] == 0:
1656                                         # vertical
1657                                         if lineTuples:
1658                                                 # flush the existing line command
1659                                                 newPath.append( ('l', lineTuples) )
1660                                                 lineTuples = []
1661                                         # append the v and then the remaining line coords
1662                                         newPath.append( ('v', [data[i+1]]) )
1663                                         numPathSegmentsReduced += 1
1664                                 elif data[i+1] == 0:
1665                                         if lineTuples:
1666                                                 # flush the line command, then append the h and then the remaining line coords
1667                                                 newPath.append( ('l', lineTuples) )
1668                                                 lineTuples = []
1669                                         newPath.append( ('h', [data[i]]) )
1670                                         numPathSegmentsReduced += 1
1671                                 else:
1672                                         lineTuples.append(data[i])
1673                                         lineTuples.append(data[i+1])
1674                                 i += 2
1675                         if lineTuples:
1676                                 newPath.append( ('l', lineTuples) )
1677                 # convert Bézier curve segments into s where possible
1678                 elif cmd == 'c':
1679                         bez_ctl_pt = (0,0)
1680                         i = 0
1681                         curveTuples = []
1682                         while i < len(data):
1683                                 # rotate by 180deg means negate both coordinates
1684                                 # if the previous control point is equal then we can substitute a
1685                                 # shorthand bezier command
1686                                 if bez_ctl_pt[0] == data[i] and bez_ctl_pt[1] == data[i+1]:
1687                                         if curveTuples:
1688                                                 newPath.append( ('c', curveTuples) )
1689                                                 curveTuples = []
1690                                         # append the s command
1691                                         newPath.append( ('s', [data[i+2], data[i+3], data[i+4], data[i+5]]) )
1692                                         numPathSegmentsReduced += 1
1693                                 else:
1694                                         j = 0
1695                                         while j <= 5:
1696                                                 curveTuples.append(data[i+j])
1697                                                 j += 1
1698
1699                                 # set up control point for next curve segment
1700                                 bez_ctl_pt = (data[i+4]-data[i+2], data[i+5]-data[i+3])
1701                                 i += 6
1702
1703                         if curveTuples:
1704                                 newPath.append( ('c', curveTuples) )
1705                 # convert quadratic curve segments into t where possible
1706                 elif cmd == 'q':
1707                         quad_ctl_pt = (0,0)
1708                         i = 0
1709                         curveTuples = []
1710                         while i < len(data):
1711                                 if quad_ctl_pt[0] == data[i] and quad_ctl_pt[1] == data[i+1]:
1712                                         if curveTuples:
1713                                                 newPath.append( ('q', curveTuples) )
1714                                                 curveTuples = []
1715                                         # append the t command
1716                                         newPath.append( ('t', [data[i+2], data[i+3]]) )
1717                                         numPathSegmentsReduced += 1
1718                                 else:
1719                                         j = 0;
1720                                         while j <= 3:
1721                                                 curveTuples.append(data[i+j])
1722                                                 j += 1
1723
1724                                 quad_ctl_pt = (data[i+2]-data[i], data[i+3]-data[i+1])
1725                                 i += 4
1726
1727                         if curveTuples:
1728                                 newPath.append( ('q', curveTuples) )
1729                 else:
1730                         newPath.append( (cmd, data) )
1731         path = newPath
1732
1733         # for each h or v, collapse unnecessary coordinates that run in the same direction
1734         # i.e. "h-100-100" becomes "h-200" but "h300-100" does not change
1735         newPath = [path[0]]
1736         for (cmd,data) in path[1:]:
1737                 if cmd in ['h','v'] and len(data) > 1:
1738                         newData = []
1739                         prevCoord = data[0]
1740                         for coord in data[1:]:
1741                                 if isSameSign(prevCoord, coord):
1742                                         prevCoord += coord
1743                                         numPathSegmentsReduced += 1
1744                                 else:
1745                                         newData.append(prevCoord)
1746                                         prevCoord = coord
1747                         newData.append(prevCoord)
1748                         newPath.append( (cmd, newData) )
1749                 else:
1750                         newPath.append( (cmd, data) )
1751         path = newPath
1752
1753         # it is possible that we have consecutive h, v, c, t commands now
1754         # so again collapse all consecutive commands of the same type into one command
1755         prevCmd = ''
1756         prevData = []
1757         newPath = [path[0]]
1758         for (cmd,data) in path[1:]:
1759                 # flush the previous command if it is not the same type as the current command
1760                 if prevCmd != '':
1761                         if cmd != prevCmd or cmd == 'm':
1762                                 newPath.append( (prevCmd, prevData) )
1763                                 prevCmd = ''
1764                                 prevData = []
1765
1766                 # if the previous and current commands are the same type, collapse
1767                 if cmd == prevCmd and cmd != 'm':
1768                         for coord in data:
1769                                 prevData.append(coord)
1770
1771                 # save last command and data
1772                 else:
1773                         prevCmd = cmd
1774                         prevData = data
1775         # flush last command and data
1776         if prevCmd != '':
1777                 newPath.append( (prevCmd, prevData) )
1778         path = newPath
1779
1780         newPathStr = serializePath(path)
1781         numBytesSavedInPathData += ( len(oldPathStr) - len(newPathStr) )
1782         element.setAttribute('d', newPathStr)
1783
1784 def parseListOfPoints(s):
1785         """
1786                 Parse string into a list of points.
1787
1788                 Returns a list of containing an even number of coordinate strings
1789         """
1790         i = 0
1791         points = []
1792
1793         # (wsp)? comma-or-wsp-separated coordinate pairs (wsp)?
1794         # coordinate-pair = coordinate comma-or-wsp coordinate
1795         # coordinate = sign? integer
1796         # comma-wsp: (wsp+ comma? wsp*) | (comma wsp*)
1797         ws_nums = re.split("\\s*\\,?\\s*", s.strip())
1798         nums = []
1799
1800         # also, if 100-100 is found, split it into two also
1801     #  <polygon points="100,-100,100-100,100-100-100,-100-100" />
1802         for i in range(len(ws_nums)):
1803                 negcoords = re.split("\\-", ws_nums[i]);
1804
1805                 # this string didn't have any negative coordinates
1806                 if len(negcoords) == 1:
1807                         nums.append(negcoords[0])
1808                 # we got negative coords
1809                 else:
1810                         for j in range(len(negcoords)):
1811                                 # first number could be positive
1812                                 if j == 0:
1813                                         if negcoords[0] != '':
1814                                                 nums.append(negcoords[0])
1815                                 # otherwise all other strings will be negative
1816                                 else:
1817                                         # unless we accidentally split a number that was in scientific notation
1818                                         # and had a negative exponent (500.00e-1)
1819                                         prev = nums[len(nums)-1]
1820                                         if prev[len(prev)-1] == 'e' or prev[len(prev)-1] == 'E':
1821                                                 nums[len(nums)-1] = prev + '-' + negcoords[j]
1822                                         else:
1823                                                 nums.append( '-'+negcoords[j] )
1824
1825         # now resolve into SVGLength values
1826         i = 0
1827         while i < len(nums):
1828                 x = SVGLength(nums[i])
1829                 # if we had an odd number of points, return empty
1830                 if i == len(nums)-1: return []
1831                 else: y = SVGLength(nums[i+1])
1832
1833                 # if the coordinates were not unitless, return empty
1834                 if x.units != Unit.NONE or y.units != Unit.NONE: return []
1835                 points.append( str(x.value) )
1836                 points.append( str(y.value) )
1837                 i += 2
1838
1839         return points
1840
1841 def cleanPolygon(elem):
1842         """
1843                 Remove unnecessary closing point of polygon points attribute
1844         """
1845         global numPointsRemovedFromPolygon
1846
1847         pts = parseListOfPoints(elem.getAttribute('points'))
1848         N = len(pts)/2
1849         if N >= 2:
1850                 (startx,starty) = (pts[0],pts[0])
1851                 (endx,endy) = (pts[len(pts)-2],pts[len(pts)-1])
1852                 if startx == endx and starty == endy:
1853                         pts = pts[:-2]
1854                         numPointsRemovedFromPolygon += 1
1855         elem.setAttribute('points', scourCoordinates(pts,True))
1856
1857 def cleanPolyline(elem):
1858         """
1859                 Scour the polyline points attribute
1860         """
1861         pts = parseListOfPoints(elem.getAttribute('points'))
1862         elem.setAttribute('points', scourCoordinates(pts,True))
1863
1864 def serializePath(pathObj):
1865         """
1866                 Reserializes the path data with some cleanups.
1867         """
1868         pathStr = ""
1869         for (cmd,data) in pathObj:
1870                 pathStr += cmd
1871                 # elliptical arc commands must have comma/wsp separating the coordinates
1872                 # this fixes an issue outlined in Fix https://bugs.launchpad.net/scour/+bug/412754
1873                 pathStr += scourCoordinates(data, (cmd == 'a'))
1874         return pathStr
1875
1876 def scourCoordinates(data, forceCommaWsp = False):
1877         """
1878                 Serializes coordinate data with some cleanups:
1879                         - removes all trailing zeros after the decimal
1880                         - integerize coordinates if possible
1881                         - removes extraneous whitespace
1882                         - adds commas between values in a subcommand if required (or if forceCommaWsp is True)
1883         """
1884         coordsStr = ""
1885         if data != None:
1886                 c = 0
1887                 for coord in data:
1888                         # add the scoured coordinate to the path string
1889                         coordsStr += scourLength(coord)
1890
1891                         # only need the comma if the next number is non-negative or if forceCommaWsp is True
1892                         if c < len(data)-1 and (forceCommaWsp or Decimal(data[c+1]) >= 0):
1893                                 coordsStr += ','
1894                         c += 1
1895         return coordsStr
1896
1897 def scourLength(str):
1898         length = SVGLength(str)
1899         coord = length.value
1900
1901         # reduce to the proper number of digits
1902         coord = Decimal(unicode(coord)) * Decimal(1)
1903
1904         # integerize if we can
1905         if int(coord) == coord: coord = Decimal(unicode(int(coord)))
1906
1907         # Decimal.trim() is available in Python 2.6+ to trim trailing zeros
1908         try:
1909                 coord = coord.trim()
1910         except AttributeError:
1911                 # trim it ourselves
1912                 s = unicode(coord)
1913                 dec = s.find('.')
1914                 if dec != -1:
1915                         while s[-1] == '0':
1916                                 s = s[:-1]
1917                 coord = Decimal(s)
1918
1919                 # Decimal.normalize() will uses scientific notation - if that
1920                 # string is smaller, then use it
1921                 normd = coord.normalize()
1922                 if len(unicode(normd)) < len(unicode(coord)):
1923                         coord = normd
1924
1925         return unicode(coord)+Unit.str(length.units)
1926
1927 def embedRasters(element, options) :
1928         """
1929                 Converts raster references to inline images.
1930                 NOTE: there are size limits to base64-encoding handling in browsers
1931         """
1932         global numRastersEmbedded
1933
1934         href = element.getAttributeNS(NS['XLINK'],'href')
1935
1936         # if xlink:href is set, then grab the id
1937         if href != '' and len(href) > 1:
1938                 # find if href value has filename ext
1939                 ext = os.path.splitext(os.path.basename(href))[1].lower()[1:]
1940
1941                 # look for 'png', 'jpg', and 'gif' extensions
1942                 if ext == 'png' or ext == 'jpg' or ext == 'gif':
1943
1944                         # check if href resolves to an existing file
1945                         if os.path.isfile(href) == False :
1946                                 if href[:7] != 'http://' and os.path.isfile(href) == False :
1947                                                 # if this is not an absolute path, set path relative
1948                                                 # to script file based on input arg
1949                                                 infilename = '.'
1950                                                 if options.infilename: infilename = options.infilename
1951                                                 href = os.path.join(os.path.dirname(infilename), href)
1952
1953                         rasterdata = ''
1954                         # test if file exists locally
1955                         if os.path.isfile(href) == True :
1956                                 # open raster file as raw binary
1957                                 raster = open( href, "rb")
1958                                 rasterdata = raster.read()
1959
1960                         elif href[:7] == 'http://':
1961                                 # raster = open( href, "rb")
1962                                 webFile = urllib.urlopen( href )
1963                                 rasterdata = webFile.read()
1964                                 webFile.close()
1965
1966                         # ... should we remove all images which don't resolve?
1967                         if rasterdata != '' :
1968                                 # base64-encode raster
1969                                 b64eRaster = base64.b64encode( rasterdata )
1970
1971                                 # set href attribute to base64-encoded equivalent
1972                                 if b64eRaster != '':
1973                                         # PNG and GIF both have MIME Type 'image/[ext]', but
1974                                         # JPEG has MIME Type 'image/jpeg'
1975                                         if ext == 'jpg':
1976                                                 ext = 'jpeg'
1977
1978                                         element.setAttributeNS(NS['XLINK'], 'href', 'data:image/' + ext + ';base64,' + b64eRaster)
1979                                         numRastersEmbedded += 1
1980                                         del b64eRaster
1981
1982 def properlySizeDoc(docElement):
1983         # get doc width and height
1984         w = SVGLength(docElement.getAttribute('width'))
1985         h = SVGLength(docElement.getAttribute('height'))
1986
1987         # if width/height are not unitless or px then it is not ok to rewrite them into a viewBox
1988         if ((w.units != Unit.NONE and w.units != Unit.PX) or
1989                 (w.units != Unit.NONE and w.units != Unit.PX)):
1990             return
1991
1992         # else we have a statically sized image and we should try to remedy that
1993
1994         # parse viewBox attribute
1995         vbSep = re.split("\\s*\\,?\\s*", docElement.getAttribute('viewBox'), 3)
1996         # if we have a valid viewBox we need to check it
1997         vbWidth,vbHeight = 0,0
1998         if len(vbSep) == 4:
1999                 try:
2000                         # if x or y are specified and non-zero then it is not ok to overwrite it
2001                         vbX = float(vbSep[0])
2002                         vbY = float(vbSep[1])
2003                         if vbX != 0 or vbY != 0:
2004                                 return
2005
2006                         # if width or height are not equal to doc width/height then it is not ok to overwrite it
2007                         vbWidth = float(vbSep[2])
2008                         vbHeight = float(vbSep[3])
2009                         if vbWidth != w.value or vbHeight != h.value:
2010                                 return
2011                 # if the viewBox did not parse properly it is invalid and ok to overwrite it
2012                 except ValueError:
2013                         pass
2014
2015         # at this point it's safe to set the viewBox and remove width/height
2016         docElement.setAttribute('viewBox', '0 0 %s %s' % (w.value, h.value))
2017         docElement.removeAttribute('width')
2018         docElement.removeAttribute('height')
2019
2020 def remapNamespacePrefix(node, oldprefix, newprefix):
2021         if node == None or node.nodeType != 1: return
2022
2023         if node.prefix == oldprefix:
2024                 localName = node.localName
2025                 namespace = node.namespaceURI
2026                 doc = node.ownerDocument
2027                 parent = node.parentNode
2028
2029                 # create a replacement node
2030                 newNode = None
2031                 if newprefix != '':
2032                         newNode = doc.createElementNS(namespace, newprefix+":"+localName)
2033                 else:
2034                         newNode = doc.createElement(localName);
2035
2036                 # add all the attributes
2037                 attrList = node.attributes
2038                 for i in range(attrList.length):
2039                         attr = attrList.item(i)
2040                         newNode.setAttributeNS( attr.namespaceURI, attr.localName, attr.nodeValue)
2041
2042                 # clone and add all the child nodes
2043                 for child in node.childNodes:
2044                         newNode.appendChild(child.cloneNode(True))
2045
2046                 # replace old node with new node
2047                 parent.replaceChild( newNode, node )
2048                 # set the node to the new node in the remapped namespace prefix
2049                 node = newNode
2050
2051         # now do all child nodes
2052         for child in node.childNodes :
2053                 remapNamespacePrefix(child, oldprefix, newprefix)
2054
2055 def makeWellFormed(str):
2056         xml_ents = { '<':'&lt;', '>':'&gt;', '&':'&amp;', "'":'&apos;', '"':'&quot;'}
2057
2058 #       starr = []
2059 #       for c in str:
2060 #               if c in xml_ents:
2061 #                       starr.append(xml_ents[c])
2062 #               else:
2063 #                       starr.append(c)
2064
2065         # this list comprehension is short-form for the above for-loop:
2066         return ''.join([xml_ents[c] if c in xml_ents else c for c in str])
2067
2068 # hand-rolled serialization function that has the following benefits:
2069 # - pretty printing
2070 # - somewhat judicious use of whitespace
2071 # - ensure id attributes are first
2072 def serializeXML(element, options, ind = 0, preserveWhitespace = False):
2073         indent = ind
2074         I=''
2075         if options.indent_type == 'tab': I='\t'
2076         elif options.indent_type == 'space': I=' '
2077
2078         outString = (I * ind) + '<' + element.nodeName
2079
2080         # always serialize the id or xml:id attributes first
2081         if element.getAttribute('id') != '':
2082                 id = element.getAttribute('id')
2083                 quot = '"'
2084                 if id.find('"') != -1:
2085                         quot = "'"
2086                 outString += ' ' + 'id=' + quot + id + quot
2087         if element.getAttribute('xml:id') != '':
2088                 id = element.getAttribute('xml:id')
2089                 quot = '"'
2090                 if id.find('"') != -1:
2091                         quot = "'"
2092                 outString += ' ' + 'xml:id=' + quot + id + quot
2093
2094         # now serialize the other attributes
2095         attrList = element.attributes
2096         for num in range(attrList.length) :
2097                 attr = attrList.item(num)
2098                 if attr.nodeName == 'id' or attr.nodeName == 'xml:id': continue
2099                 # if the attribute value contains a double-quote, use single-quotes
2100                 quot = '"'
2101                 if attr.nodeValue.find('"') != -1:
2102                         quot = "'"
2103
2104                 attrValue = makeWellFormed( attr.nodeValue )
2105
2106                 outString += ' '
2107                 # preserve xmlns: if it is a namespace prefix declaration
2108                 if attr.prefix != None:
2109                         outString += attr.prefix + ':'
2110                 elif attr.namespaceURI != None:
2111                         if attr.namespaceURI == 'http://www.w3.org/2000/xmlns/' and attr.nodeName.find('xmlns') == -1:
2112                                 outString += 'xmlns:'
2113                         elif attr.namespaceURI == 'http://www.w3.org/1999/xlink':
2114                                 outString += 'xlink:'
2115                 outString += attr.localName + '=' + quot + attrValue + quot
2116
2117                 if attr.nodeName == 'xml:space':
2118                         if attrValue == 'preserve':
2119                                 preserveWhitespace = True
2120                         elif attrValue == 'default':
2121                                 preserveWhitespace = False
2122
2123         # if no children, self-close
2124         children = element.childNodes
2125         if children.length > 0:
2126                 outString += '>'
2127
2128                 onNewLine = False
2129                 for child in element.childNodes:
2130                         # element node
2131                         if child.nodeType == 1:
2132                                 if preserveWhitespace:
2133                                         outString += serializeXML(child, options, 0, preserveWhitespace)
2134                                 else:
2135                                         outString += os.linesep + serializeXML(child, options, indent + 1, preserveWhitespace)
2136                                         onNewLine = True
2137                         # text node
2138                         elif child.nodeType == 3:
2139                                 # trim it only in the case of not being a child of an element
2140                                 # where whitespace might be important
2141                                 if preserveWhitespace:
2142                                         outString += makeWellFormed(child.nodeValue)
2143                                 else:
2144                                         outString += makeWellFormed(child.nodeValue.strip())
2145                         # CDATA node
2146                         elif child.nodeType == 4:
2147                                 outString += '<![CDATA[' + child.nodeValue + ']]>'
2148                         # Comment node
2149                         elif child.nodeType == 8:
2150                                 outString += '<!--' + child.nodeValue + '-->'
2151                         # TODO: entities, processing instructions, what else?
2152                         else: # ignore the rest
2153                                 pass
2154
2155                 if onNewLine: outString += (I * ind)
2156                 outString += '</' + element.nodeName + '>'
2157                 if indent > 0: outString += os.linesep
2158         else:
2159                 outString += '/>'
2160                 if indent > 0: outString += os.linesep
2161
2162         return outString
2163
2164 # this is the main method
2165 # input is a string representation of the input XML
2166 # returns a string representation of the output XML
2167 def scourString(in_string, options=None):
2168         if options is None:
2169                 options = _options_parser.get_default_values()
2170         getcontext().prec = options.digits
2171         global numAttrsRemoved
2172         global numStylePropsFixed
2173         global numElemsRemoved
2174         global numBytesSavedInColors
2175         doc = xml.dom.minidom.parseString(in_string)
2176
2177         # for whatever reason this does not always remove all inkscape/sodipodi attributes/elements
2178         # on the first pass, so we do it multiple times
2179         # does it have to do with removal of children affecting the childlist?
2180         if options.keep_editor_data == False:
2181                 while removeNamespacedElements( doc.documentElement, unwanted_ns ) > 0 :
2182                         pass
2183                 while removeNamespacedAttributes( doc.documentElement, unwanted_ns ) > 0 :
2184                         pass
2185
2186                 # remove the xmlns: declarations now
2187                 xmlnsDeclsToRemove = []
2188                 attrList = doc.documentElement.attributes
2189                 for num in range(attrList.length) :
2190                         if attrList.item(num).nodeValue in unwanted_ns :
2191                                 xmlnsDeclsToRemove.append(attrList.item(num).nodeName)
2192
2193                 for attr in xmlnsDeclsToRemove :
2194                         doc.documentElement.removeAttribute(attr)
2195                         numAttrsRemoved += 1
2196
2197         # ensure namespace for SVG is declared
2198         # TODO: what if the default namespace is something else (i.e. some valid namespace)?
2199         if doc.documentElement.getAttribute('xmlns') != 'http://www.w3.org/2000/svg':
2200                 doc.documentElement.setAttribute('xmlns', 'http://www.w3.org/2000/svg')
2201                 # TODO: throw error or warning?
2202
2203         # check for redundant SVG namespace declaration
2204         attrList = doc.documentElement.attributes
2205         xmlnsDeclsToRemove = []
2206         redundantPrefixes = []
2207         for i in range(attrList.length):
2208                 attr = attrList.item(i)
2209                 name = attr.nodeName
2210                 val = attr.nodeValue
2211                 if name[0:6] == 'xmlns:' and val == 'http://www.w3.org/2000/svg':
2212                         redundantPrefixes.append(name[6:])
2213                         xmlnsDeclsToRemove.append(name)
2214
2215         for attrName in xmlnsDeclsToRemove:
2216                 doc.documentElement.removeAttribute(attrName)
2217
2218         for prefix in redundantPrefixes:
2219                 remapNamespacePrefix(doc.documentElement, prefix, '')
2220
2221         # repair style (remove unnecessary style properties and change them into XML attributes)
2222         numStylePropsFixed = repairStyle(doc.documentElement, options)
2223
2224         # convert colors to #RRGGBB format
2225         if options.simple_colors:
2226                 numBytesSavedInColors = convertColors(doc.documentElement)
2227
2228         # remove empty defs, metadata, g
2229         # NOTE: these elements will be removed even if they have (invalid) text nodes
2230         elemsToRemove = []
2231         for tag in ['defs', 'metadata', 'g'] :
2232                 for elem in doc.documentElement.getElementsByTagName(tag) :
2233                         removeElem = not elem.hasChildNodes()
2234                         if removeElem == False :
2235                                 for child in elem.childNodes :
2236                                         if child.nodeType in [1, 3, 4, 8] :
2237                                                 break
2238                                 else:
2239                                         removeElem = True
2240                         if removeElem :
2241                                 elem.parentNode.removeChild(elem)
2242                                 numElemsRemoved += 1
2243
2244         # remove unreferenced gradients/patterns outside of defs
2245         while removeUnreferencedElements(doc) > 0:
2246                 pass
2247
2248         if options.strip_ids:
2249                 bContinueLooping = True
2250                 while bContinueLooping:
2251                         identifiedElements = findElementsWithId(doc.documentElement)
2252                         referencedIDs = findReferencedElements(doc.documentElement)
2253                         bContinueLooping = (removeUnreferencedIDs(referencedIDs, identifiedElements) > 0)
2254
2255         if options.group_collapse:
2256                 while removeNestedGroups(doc.documentElement) > 0:
2257                         pass
2258
2259         while removeDuplicateGradientStops(doc) > 0:
2260                 pass
2261
2262         # remove gradients that are only referenced by one other gradient
2263         while collapseSinglyReferencedGradients(doc) > 0:
2264                 pass
2265
2266         # remove duplicate gradients
2267         while removeDuplicateGradients(doc) > 0:
2268                 pass
2269
2270         # move common attributes to parent group
2271         numAttrsRemoved += moveCommonAttributesToParentGroup(doc.documentElement)
2272
2273         # remove unused attributes from parent
2274         numAttrsRemoved += removeUnusedAttributesOnParent(doc.documentElement)
2275
2276         # clean path data
2277         for elem in doc.documentElement.getElementsByTagName('path') :
2278                 if elem.getAttribute('d') == '':
2279                         elem.parentNode.removeChild(elem)
2280                 else:
2281                         cleanPath(elem)
2282
2283         # remove unnecessary closing point of polygons and scour points
2284         for polygon in doc.documentElement.getElementsByTagName('polygon') :
2285                 cleanPolygon(polygon)
2286
2287         # scour points of polyline
2288         for polyline in doc.documentElement.getElementsByTagName('polyline') :
2289                 cleanPolygon(polyline)
2290
2291         # scour lengths (including coordinates)
2292         for type in ['svg', 'image', 'rect', 'circle', 'ellipse', 'line', 'linearGradient', 'radialGradient', 'stop']:
2293                 for elem in doc.getElementsByTagName(type):
2294                         for attr in ['x', 'y', 'width', 'height', 'cx', 'cy', 'r', 'rx', 'ry',
2295                                                 'x1', 'y1', 'x2', 'y2', 'fx', 'fy', 'offset', 'opacity',
2296                                                 'fill-opacity', 'stroke-opacity', 'stroke-width', 'stroke-miterlimit']:
2297                                 if elem.getAttribute(attr) != '':
2298                                         elem.setAttribute(attr, scourLength(elem.getAttribute(attr)))
2299
2300         # remove default values of attributes
2301         numAttrsRemoved += removeDefaultAttributeValues(doc.documentElement, options)
2302
2303         # convert rasters references to base64-encoded strings
2304         if options.embed_rasters:
2305                 for elem in doc.documentElement.getElementsByTagName('image') :
2306                         embedRasters(elem, options)
2307
2308         # properly size the SVG document (ideally width/height should be 100% with a viewBox)
2309         if options.enable_viewboxing:
2310                 properlySizeDoc(doc.documentElement)
2311
2312         # output the document as a pretty string with a single space for indent
2313         # NOTE: removed pretty printing because of this problem:
2314         # http://ronrothman.com/public/leftbraned/xml-dom-minidom-toprettyxml-and-silly-whitespace/
2315         # rolled our own serialize function here to save on space, put id first, customize indentation, etc
2316 #       out_string = doc.documentElement.toprettyxml(' ')
2317         out_string = serializeXML(doc.documentElement, options) + os.linesep
2318
2319         # now strip out empty lines
2320         lines = []
2321         # Get rid of empty lines
2322         for line in out_string.splitlines(True):
2323                 if line.strip():
2324                         lines.append(line)
2325
2326         # return the string with its XML prolog and surrounding comments
2327         if options.strip_xml_prolog == False:
2328                 total_output = '<?xml version="1.0" encoding="UTF-8" standalone="no"?>' + os.linesep
2329         else:
2330                 total_output = ""
2331
2332         for child in doc.childNodes:
2333                 if child.nodeType == 1:
2334                         total_output += "".join(lines)
2335                 else: # doctypes, entities, comments
2336                         total_output += child.toxml() + os.linesep
2337
2338         return total_output
2339
2340 # used mostly by unit tests
2341 # input is a filename
2342 # returns the minidom doc representation of the SVG
2343 def scourXmlFile(filename, options=None):
2344         in_string = open(filename).read()
2345         out_string = scourString(in_string, options)
2346         return xml.dom.minidom.parseString(out_string.encode('utf-8'))
2347
2348 # GZ: Seems most other commandline tools don't do this, is it really wanted?
2349 class HeaderedFormatter(optparse.IndentedHelpFormatter):
2350         """
2351                 Show application name, version number, and copyright statement
2352                 above usage information.
2353         """
2354         def format_usage(self, usage):
2355                 return "%s %s\n%s\n%s" % (APP, VER, COPYRIGHT,
2356                         optparse.IndentedHelpFormatter.format_usage(self, usage))
2357
2358 # GZ: would prefer this to be in a function or class scope, but tests etc need
2359 #     access to the defaults anyway
2360 _options_parser = optparse.OptionParser(
2361         usage="%prog [-i input.svg] [-o output.svg] [OPTIONS]",
2362         description=("If the input/output files are specified with a svgz"
2363         " extension, then compressed SVG is assumed. If the input file is not"
2364         " specified, stdin is used. If the output file is not specified, "
2365         " stdout is used."),
2366         formatter=HeaderedFormatter(max_help_position=30),
2367         version=VER)
2368
2369 _options_parser.add_option("--disable-simplify-colors",
2370         action="store_false", dest="simple_colors", default=True,
2371         help="won't convert all colors to #RRGGBB format")
2372 _options_parser.add_option("--disable-style-to-xml",
2373         action="store_false", dest="style_to_xml", default=True,
2374         help="won't convert styles into XML attributes")
2375 _options_parser.add_option("--disable-group-collapsing",
2376         action="store_false", dest="group_collapse", default=True,
2377         help="won't collapse <g> elements")
2378 _options_parser.add_option("--enable-id-stripping",
2379         action="store_true", dest="strip_ids", default=False,
2380         help="remove all un-referenced ID attributes")
2381 _options_parser.add_option("--disable-embed-rasters",
2382         action="store_false", dest="embed_rasters", default=True,
2383         help="won't embed rasters as base64-encoded data")
2384 _options_parser.add_option("--keep-editor-data",
2385         action="store_true", dest="keep_editor_data", default=False,
2386         help="won't remove Inkscape, Sodipodi or Adobe Illustrator elements and attributes")
2387 _options_parser.add_option("--strip-xml-prolog",
2388         action="store_true", dest="strip_xml_prolog", default=False,
2389         help="won't output the <?xml ?> prolog")
2390 _options_parser.add_option("--enable-viewboxing",
2391         action="store_true", dest="enable_viewboxing", default=False,
2392         help="changes document width/height to 100%/100% and creates viewbox coordinates")
2393
2394 # GZ: this is confusing, most people will be thinking in terms of
2395 #     decimal places, which is not what decimal precision is doing
2396 _options_parser.add_option("-p", "--set-precision",
2397         action="store", type=int, dest="digits", default=5,
2398         help="set number of significant digits (default: %default)")
2399 _options_parser.add_option("-i",
2400         action="store", dest="infilename", help=optparse.SUPPRESS_HELP)
2401 _options_parser.add_option("-o",
2402         action="store", dest="outfilename", help=optparse.SUPPRESS_HELP)
2403 _options_parser.add_option("--indent",
2404         action="store", type="string", dest="indent_type", default="space",
2405         help="indentation of the output: none, space, tab (default: %default)")
2406
2407 def maybe_gziped_file(filename, mode="r"):
2408         if os.path.splitext(filename)[1].lower() in (".svgz", ".gz"):
2409                 return gzip.GzipFile(filename, mode)
2410         return file(filename, mode)
2411
2412 def parse_args(args=None):
2413         options, rargs = _options_parser.parse_args(args)
2414
2415         if rargs:
2416                 _options_parser.error("Additional arguments not handled: %r, see --help" % rargs)
2417         if options.digits < 0:
2418                 _options_parser.error("Can't have negative significant digits, see --help")
2419         if not options.indent_type in ["tab", "space", "none"]:
2420                 _options_parser.error("Invalid value for --indent, see --help")
2421         if options.infilename and options.outfilename and options.infilename == options.outfilename:
2422                 _options_parser.error("Input filename is the same as output filename")
2423
2424         if options.infilename:
2425                 infile = maybe_gziped_file(options.infilename)
2426                 # GZ: could catch a raised IOError here and report
2427         else:
2428                 # GZ: could sniff for gzip compression here
2429                 infile = sys.stdin
2430         if options.outfilename:
2431                 outfile = maybe_gziped_file(options.outfilename, "w")
2432         else:
2433                 outfile = sys.stdout
2434
2435         return options, [infile, outfile]
2436
2437 def getReport():
2438         return ' Number of elements removed: ' + str(numElemsRemoved) + os.linesep + \
2439                 ' Number of attributes removed: ' + str(numAttrsRemoved) + os.linesep + \
2440                 ' Number of unreferenced id attributes removed: ' + str(numIDsRemoved) + os.linesep + \
2441                 ' Number of style properties fixed: ' + str(numStylePropsFixed) + os.linesep + \
2442                 ' Number of raster images embedded inline: ' + str(numRastersEmbedded) + os.linesep + \
2443                 ' Number of path segments reduced/removed: ' + str(numPathSegmentsReduced) + os.linesep + \
2444                 ' Number of bytes saved in path data: ' + str(numBytesSavedInPathData) + os.linesep + \
2445                 ' Number of bytes saved in colors: ' + str(numBytesSavedInColors) + os.linesep + \
2446                 ' Number of points removed from polygons: ' + str(numPointsRemovedFromPolygon)
2447
2448 if __name__ == '__main__':
2449         if sys.platform == "win32":
2450                 from time import clock as get_tick
2451         else:
2452                 # GZ: is this different from time.time() in any way?
2453                 def get_tick():
2454                         return os.times()[0]
2455
2456         start = get_tick()
2457
2458         options, (input, output) = parse_args()
2459
2460         print >>sys.stderr, "%s %s\n%s" % (APP, VER, COPYRIGHT)
2461
2462         # do the work
2463         in_string = input.read()
2464         out_string = scourString(in_string, options).encode("UTF-8")
2465         output.write(out_string)
2466
2467         # Close input and output files
2468         input.close()
2469         output.close()
2470
2471         end = get_tick()
2472
2473         # GZ: unless silenced by -q or something?
2474         # GZ: not using globals would be good too
2475         print >>sys.stderr, ' File:', input.name, \
2476                 os.linesep + ' Time taken:', str(end-start) + 's' + os.linesep, \
2477                 getReport()
2478
2479         oldsize = len(in_string)
2480         newsize = len(out_string)
2481         sizediff = (newsize / oldsize) * 100
2482         print >>sys.stderr, ' Original file size:', oldsize, 'bytes;', \
2483                 'new file size:', newsize, 'bytes (' + str(sizediff)[:5] + '%)'