1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
4 # Scour
5 #
6 # Copyright 2010 Jeff Schiller
7 #
8 # This file is part of Scour, http://www.codedread.com/scour/
9 #
10 # Licensed under the Apache License, Version 2.0 (the "License");
11 # you may not use this file except in compliance with the License.
12 # You may obtain a copy of the License at
13 #
14 # http://www.apache.org/licenses/LICENSE-2.0
15 #
16 # Unless required by applicable law or agreed to in writing, software
17 # distributed under the License is distributed on an "AS IS" BASIS,
18 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
19 # See the License for the specific language governing permissions and
20 # limitations under the License.
22 # Notes:
24 # rubys' path-crunching ideas here: http://intertwingly.net/code/svgtidy/spec.rb
25 # (and implemented here: http://intertwingly.net/code/svgtidy/svgtidy.rb )
27 # Yet more ideas here: http://wiki.inkscape.org/wiki/index.php/Save_Cleaned_SVG
28 #
29 # * Process Transformations
30 # * Collapse all group based transformations
32 # Even more ideas here: http://esw.w3.org/topic/SvgTidy
33 # * analysis of path elements to see if rect can be used instead? (must also need to look
34 # at rounded corners)
36 # Next Up:
37 # - only remove unreferenced elements if they are not children of a referenced element
38 # - add an option to remove ids if they match the Inkscape-style of IDs
39 # - investigate point-reducing algorithms
40 # - parse transform attribute
41 # - if a <g> has only one element in it, collapse the <g> (ensure transform, etc are carried down)
42 # - option to remove metadata
44 # necessary to get true division
45 from __future__ import division
47 import os
48 import sys
49 import xml.dom.minidom
50 import re
51 import math
52 import base64
53 import urllib
54 from svg_regex import svg_parser
55 import gzip
56 import optparse
57 from yocto_css import parseCssString
59 # Python 2.3- did not have Decimal
60 try:
61 from decimal import *
62 except ImportError:
63 from fixedpoint import *
64 Decimal = FixedPoint
66 # Import Psyco if available
67 try:
68 import psyco
69 psyco.full()
70 except ImportError:
71 pass
73 APP = 'scour'
74 VER = '0.25r171'
75 COPYRIGHT = 'Copyright Jeff Schiller, 2010'
77 NS = { 'SVG': 'http://www.w3.org/2000/svg',
78 'XLINK': 'http://www.w3.org/1999/xlink',
79 'SODIPODI': 'http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd',
80 'INKSCAPE': 'http://www.inkscape.org/namespaces/inkscape',
81 'ADOBE_ILLUSTRATOR': 'http://ns.adobe.com/AdobeIllustrator/10.0/',
82 'ADOBE_GRAPHS': 'http://ns.adobe.com/Graphs/1.0/',
83 'ADOBE_SVG_VIEWER': 'http://ns.adobe.com/AdobeSVGViewerExtensions/3.0/',
84 'ADOBE_VARIABLES': 'http://ns.adobe.com/Variables/1.0/',
85 'ADOBE_SFW': 'http://ns.adobe.com/SaveForWeb/1.0/',
86 'ADOBE_EXTENSIBILITY': 'http://ns.adobe.com/Extensibility/1.0/',
87 'ADOBE_FLOWS': 'http://ns.adobe.com/Flows/1.0/',
88 'ADOBE_IMAGE_REPLACEMENT': 'http://ns.adobe.com/ImageReplacement/1.0/',
89 'ADOBE_CUSTOM': 'http://ns.adobe.com/GenericCustomNamespace/1.0/',
90 'ADOBE_XPATH': 'http://ns.adobe.com/XPath/1.0/'
91 }
93 unwanted_ns = [ NS['SODIPODI'], NS['INKSCAPE'], NS['ADOBE_ILLUSTRATOR'],
94 NS['ADOBE_GRAPHS'], NS['ADOBE_SVG_VIEWER'], NS['ADOBE_VARIABLES'],
95 NS['ADOBE_SFW'], NS['ADOBE_EXTENSIBILITY'], NS['ADOBE_FLOWS'],
96 NS['ADOBE_IMAGE_REPLACEMENT'], NS['ADOBE_CUSTOM'], NS['ADOBE_XPATH'] ]
98 svgAttributes = [
99 'clip-rule',
100 'display',
101 'fill',
102 'fill-opacity',
103 'fill-rule',
104 'filter',
105 'font-family',
106 'font-size',
107 'font-stretch',
108 'font-style',
109 'font-variant',
110 'font-weight',
111 'line-height',
112 'marker',
113 'opacity',
114 'overflow',
115 'stop-color',
116 'stop-opacity',
117 'stroke',
118 'stroke-dasharray',
119 'stroke-dashoffset',
120 'stroke-linecap',
121 'stroke-linejoin',
122 'stroke-miterlimit',
123 'stroke-opacity',
124 'stroke-width',
125 'visibility'
126 ]
128 colors = {
129 'aliceblue': 'rgb(240, 248, 255)',
130 'antiquewhite': 'rgb(250, 235, 215)',
131 'aqua': 'rgb( 0, 255, 255)',
132 'aquamarine': 'rgb(127, 255, 212)',
133 'azure': 'rgb(240, 255, 255)',
134 'beige': 'rgb(245, 245, 220)',
135 'bisque': 'rgb(255, 228, 196)',
136 'black': 'rgb( 0, 0, 0)',
137 'blanchedalmond': 'rgb(255, 235, 205)',
138 'blue': 'rgb( 0, 0, 255)',
139 'blueviolet': 'rgb(138, 43, 226)',
140 'brown': 'rgb(165, 42, 42)',
141 'burlywood': 'rgb(222, 184, 135)',
142 'cadetblue': 'rgb( 95, 158, 160)',
143 'chartreuse': 'rgb(127, 255, 0)',
144 'chocolate': 'rgb(210, 105, 30)',
145 'coral': 'rgb(255, 127, 80)',
146 'cornflowerblue': 'rgb(100, 149, 237)',
147 'cornsilk': 'rgb(255, 248, 220)',
148 'crimson': 'rgb(220, 20, 60)',
149 'cyan': 'rgb( 0, 255, 255)',
150 'darkblue': 'rgb( 0, 0, 139)',
151 'darkcyan': 'rgb( 0, 139, 139)',
152 'darkgoldenrod': 'rgb(184, 134, 11)',
153 'darkgray': 'rgb(169, 169, 169)',
154 'darkgreen': 'rgb( 0, 100, 0)',
155 'darkgrey': 'rgb(169, 169, 169)',
156 'darkkhaki': 'rgb(189, 183, 107)',
157 'darkmagenta': 'rgb(139, 0, 139)',
158 'darkolivegreen': 'rgb( 85, 107, 47)',
159 'darkorange': 'rgb(255, 140, 0)',
160 'darkorchid': 'rgb(153, 50, 204)',
161 'darkred': 'rgb(139, 0, 0)',
162 'darksalmon': 'rgb(233, 150, 122)',
163 'darkseagreen': 'rgb(143, 188, 143)',
164 'darkslateblue': 'rgb( 72, 61, 139)',
165 'darkslategray': 'rgb( 47, 79, 79)',
166 'darkslategrey': 'rgb( 47, 79, 79)',
167 'darkturquoise': 'rgb( 0, 206, 209)',
168 'darkviolet': 'rgb(148, 0, 211)',
169 'deeppink': 'rgb(255, 20, 147)',
170 'deepskyblue': 'rgb( 0, 191, 255)',
171 'dimgray': 'rgb(105, 105, 105)',
172 'dimgrey': 'rgb(105, 105, 105)',
173 'dodgerblue': 'rgb( 30, 144, 255)',
174 'firebrick': 'rgb(178, 34, 34)',
175 'floralwhite': 'rgb(255, 250, 240)',
176 'forestgreen': 'rgb( 34, 139, 34)',
177 'fuchsia': 'rgb(255, 0, 255)',
178 'gainsboro': 'rgb(220, 220, 220)',
179 'ghostwhite': 'rgb(248, 248, 255)',
180 'gold': 'rgb(255, 215, 0)',
181 'goldenrod': 'rgb(218, 165, 32)',
182 'gray': 'rgb(128, 128, 128)',
183 'grey': 'rgb(128, 128, 128)',
184 'green': 'rgb( 0, 128, 0)',
185 'greenyellow': 'rgb(173, 255, 47)',
186 'honeydew': 'rgb(240, 255, 240)',
187 'hotpink': 'rgb(255, 105, 180)',
188 'indianred': 'rgb(205, 92, 92)',
189 'indigo': 'rgb( 75, 0, 130)',
190 'ivory': 'rgb(255, 255, 240)',
191 'khaki': 'rgb(240, 230, 140)',
192 'lavender': 'rgb(230, 230, 250)',
193 'lavenderblush': 'rgb(255, 240, 245)',
194 'lawngreen': 'rgb(124, 252, 0)',
195 'lemonchiffon': 'rgb(255, 250, 205)',
196 'lightblue': 'rgb(173, 216, 230)',
197 'lightcoral': 'rgb(240, 128, 128)',
198 'lightcyan': 'rgb(224, 255, 255)',
199 'lightgoldenrodyellow': 'rgb(250, 250, 210)',
200 'lightgray': 'rgb(211, 211, 211)',
201 'lightgreen': 'rgb(144, 238, 144)',
202 'lightgrey': 'rgb(211, 211, 211)',
203 'lightpink': 'rgb(255, 182, 193)',
204 'lightsalmon': 'rgb(255, 160, 122)',
205 'lightseagreen': 'rgb( 32, 178, 170)',
206 'lightskyblue': 'rgb(135, 206, 250)',
207 'lightslategray': 'rgb(119, 136, 153)',
208 'lightslategrey': 'rgb(119, 136, 153)',
209 'lightsteelblue': 'rgb(176, 196, 222)',
210 'lightyellow': 'rgb(255, 255, 224)',
211 'lime': 'rgb( 0, 255, 0)',
212 'limegreen': 'rgb( 50, 205, 50)',
213 'linen': 'rgb(250, 240, 230)',
214 'magenta': 'rgb(255, 0, 255)',
215 'maroon': 'rgb(128, 0, 0)',
216 'mediumaquamarine': 'rgb(102, 205, 170)',
217 'mediumblue': 'rgb( 0, 0, 205)',
218 'mediumorchid': 'rgb(186, 85, 211)',
219 'mediumpurple': 'rgb(147, 112, 219)',
220 'mediumseagreen': 'rgb( 60, 179, 113)',
221 'mediumslateblue': 'rgb(123, 104, 238)',
222 'mediumspringgreen': 'rgb( 0, 250, 154)',
223 'mediumturquoise': 'rgb( 72, 209, 204)',
224 'mediumvioletred': 'rgb(199, 21, 133)',
225 'midnightblue': 'rgb( 25, 25, 112)',
226 'mintcream': 'rgb(245, 255, 250)',
227 'mistyrose': 'rgb(255, 228, 225)',
228 'moccasin': 'rgb(255, 228, 181)',
229 'navajowhite': 'rgb(255, 222, 173)',
230 'navy': 'rgb( 0, 0, 128)',
231 'oldlace': 'rgb(253, 245, 230)',
232 'olive': 'rgb(128, 128, 0)',
233 'olivedrab': 'rgb(107, 142, 35)',
234 'orange': 'rgb(255, 165, 0)',
235 'orangered': 'rgb(255, 69, 0)',
236 'orchid': 'rgb(218, 112, 214)',
237 'palegoldenrod': 'rgb(238, 232, 170)',
238 'palegreen': 'rgb(152, 251, 152)',
239 'paleturquoise': 'rgb(175, 238, 238)',
240 'palevioletred': 'rgb(219, 112, 147)',
241 'papayawhip': 'rgb(255, 239, 213)',
242 'peachpuff': 'rgb(255, 218, 185)',
243 'peru': 'rgb(205, 133, 63)',
244 'pink': 'rgb(255, 192, 203)',
245 'plum': 'rgb(221, 160, 221)',
246 'powderblue': 'rgb(176, 224, 230)',
247 'purple': 'rgb(128, 0, 128)',
248 'red': 'rgb(255, 0, 0)',
249 'rosybrown': 'rgb(188, 143, 143)',
250 'royalblue': 'rgb( 65, 105, 225)',
251 'saddlebrown': 'rgb(139, 69, 19)',
252 'salmon': 'rgb(250, 128, 114)',
253 'sandybrown': 'rgb(244, 164, 96)',
254 'seagreen': 'rgb( 46, 139, 87)',
255 'seashell': 'rgb(255, 245, 238)',
256 'sienna': 'rgb(160, 82, 45)',
257 'silver': 'rgb(192, 192, 192)',
258 'skyblue': 'rgb(135, 206, 235)',
259 'slateblue': 'rgb(106, 90, 205)',
260 'slategray': 'rgb(112, 128, 144)',
261 'slategrey': 'rgb(112, 128, 144)',
262 'snow': 'rgb(255, 250, 250)',
263 'springgreen': 'rgb( 0, 255, 127)',
264 'steelblue': 'rgb( 70, 130, 180)',
265 'tan': 'rgb(210, 180, 140)',
266 'teal': 'rgb( 0, 128, 128)',
267 'thistle': 'rgb(216, 191, 216)',
268 'tomato': 'rgb(255, 99, 71)',
269 'turquoise': 'rgb( 64, 224, 208)',
270 'violet': 'rgb(238, 130, 238)',
271 'wheat': 'rgb(245, 222, 179)',
272 'white': 'rgb(255, 255, 255)',
273 'whitesmoke': 'rgb(245, 245, 245)',
274 'yellow': 'rgb(255, 255, 0)',
275 'yellowgreen': 'rgb(154, 205, 50)',
276 }
278 def isSameSign(a,b): return (a <= 0 and b <= 0) or (a >= 0 and b >= 0)
280 coord = re.compile("\\-?\\d+\\.?\\d*")
281 scinumber = re.compile("[\\-\\+]?(\\d*\\.?)?\\d+[eE][\\-\\+]?\\d+")
282 number = re.compile("[\\-\\+]?(\\d*\\.?)?\\d+")
283 sciExponent = re.compile("[eE]([\\-\\+]?\\d+)")
284 unit = re.compile("(em|ex|px|pt|pc|cm|mm|in|\\%){1,1}$")
286 class Unit(object):
287 INVALID = -1
288 NONE = 0
289 PCT = 1
290 PX = 2
291 PT = 3
292 PC = 4
293 EM = 5
294 EX = 6
295 CM = 7
296 MM = 8
297 IN = 9
299 # @staticmethod
300 def get(str):
301 # GZ: shadowing builtins like 'str' is generally bad form
302 # GZ: encoding stuff like this in a dict makes for nicer code
303 if str == None or str == '': return Unit.NONE
304 elif str == '%': return Unit.PCT
305 elif str == 'px': return Unit.PX
306 elif str == 'pt': return Unit.PT
307 elif str == 'pc': return Unit.PC
308 elif str == 'em': return Unit.EM
309 elif str == 'ex': return Unit.EX
310 elif str == 'cm': return Unit.CM
311 elif str == 'mm': return Unit.MM
312 elif str == 'in': return Unit.IN
313 return Unit.INVALID
315 # @staticmethod
316 def str(u):
317 if u == Unit.NONE: return ''
318 elif u == Unit.PCT: return '%'
319 elif u == Unit.PX: return 'px'
320 elif u == Unit.PT: return 'pt'
321 elif u == Unit.PC: return 'pc'
322 elif u == Unit.EM: return 'em'
323 elif u == Unit.EX: return 'ex'
324 elif u == Unit.CM: return 'cm'
325 elif u == Unit.MM: return 'mm'
326 elif u == Unit.IN: return 'in'
327 return 'INVALID'
329 get = staticmethod(get)
330 str = staticmethod(str)
332 class SVGLength(object):
333 def __init__(self, str):
334 try: # simple unitless and no scientific notation
335 self.value = float(str)
336 if int(self.value) == self.value:
337 self.value = int(self.value)
338 self.units = Unit.NONE
339 except ValueError:
340 # we know that the length string has an exponent, a unit, both or is invalid
342 # parse out number, exponent and unit
343 self.value = 0
344 unitBegin = 0
345 scinum = scinumber.match(str)
346 if scinum != None:
347 # this will always match, no need to check it
348 numMatch = number.match(str)
349 expMatch = sciExponent.search(str, numMatch.start(0))
350 self.value = (float(numMatch.group(0)) *
351 10 ** float(expMatch.group(1)))
352 unitBegin = expMatch.end(1)
353 else:
354 # unit or invalid
355 numMatch = number.match(str)
356 if numMatch != None:
357 self.value = float(numMatch.group(0))
358 unitBegin = numMatch.end(0)
360 if int(self.value) == self.value:
361 self.value = int(self.value)
363 if unitBegin != 0 :
364 unitMatch = unit.search(str, unitBegin)
365 if unitMatch != None :
366 self.units = Unit.get(unitMatch.group(0))
368 # invalid
369 else:
370 # TODO: this needs to set the default for the given attribute (how?)
371 self.value = 0
372 self.units = Unit.INVALID
374 # returns the length of a property
375 # TODO: eventually use the above class once it is complete
376 def getSVGLength(value):
377 try:
378 v = float(value)
379 except ValueError:
380 coordMatch = coord.match(value)
381 if coordMatch != None:
382 unitMatch = unit.search(value, coordMatch.start(0))
383 v = value
384 return v
386 def findElementById(node, id):
387 if node == None or node.nodeType != 1: return None
388 if node.getAttribute('id') == id: return node
389 for child in node.childNodes :
390 e = findElementById(child,id)
391 if e != None: return e
392 return None
394 def findElementsWithId(node, elems=None):
395 """
396 Returns all elements with id attributes
397 """
398 if elems is None:
399 elems = {}
400 id = node.getAttribute('id')
401 if id != '' :
402 elems[id] = node
403 if node.hasChildNodes() :
404 for child in node.childNodes:
405 # from http://www.w3.org/TR/DOM-Level-2-Core/idl-definitions.html
406 # we are only really interested in nodes of type Element (1)
407 if child.nodeType == 1 :
408 findElementsWithId(child, elems)
409 return elems
411 referencingProps = ['fill', 'stroke', 'filter', 'clip-path', 'mask', 'marker-start',
412 'marker-end', 'marker-mid']
414 def findReferencedElements(node, ids=None):
415 """
416 Returns the number of times an ID is referenced as well as all elements
417 that reference it.
419 Currently looks at fill, stroke, clip-path, mask, marker, and
420 xlink:href attributes.
421 """
422 global referencingProps
423 if ids is None:
424 ids = {}
425 # TODO: input argument ids is clunky here (see below how it is called)
426 # GZ: alternative to passing dict, use **kwargs
428 # if this node is a style element, parse its text into CSS
429 if node.nodeName == 'style' and node.namespaceURI == NS['SVG']:
430 # node.firstChild will be either a CDATA or a Text node
431 if node.firstChild != None:
432 cssRules = parseCssString(node.firstChild.nodeValue)
433 for rule in cssRules:
434 for propname in rule['properties']:
435 propval = rule['properties'][propname]
436 findReferencingProperty(node, propname, propval, ids)
437 return ids
439 # else if xlink:href is set, then grab the id
440 href = node.getAttributeNS(NS['XLINK'],'href')
441 if href != '' and len(href) > 1 and href[0] == '#':
442 # we remove the hash mark from the beginning of the id
443 id = href[1:]
444 if id in ids:
445 ids[id][0] += 1
446 ids[id][1].append(node)
447 else:
448 ids[id] = [1,[node]]
450 # now get all style properties and the fill, stroke, filter attributes
451 styles = node.getAttribute('style').split(';')
452 for attr in referencingProps:
453 styles.append(':'.join([attr, node.getAttribute(attr)]))
455 for style in styles:
456 propval = style.split(':')
457 if len(propval) == 2 :
458 prop = propval[0].strip()
459 val = propval[1].strip()
460 findReferencingProperty(node, prop, val, ids)
462 if node.hasChildNodes() :
463 for child in node.childNodes:
464 if child.nodeType == 1 :
465 findReferencedElements(child, ids)
466 return ids
468 def findReferencingProperty(node, prop, val, ids):
469 global referencingProps
470 if prop in referencingProps and val != '' :
471 if len(val) >= 7 and val[0:5] == 'url(#' :
472 id = val[5:val.find(')')]
473 if ids.has_key(id) :
474 ids[id][0] += 1
475 ids[id][1].append(node)
476 else:
477 ids[id] = [1,[node]]
478 # if the url has a quote in it, we need to compensate
479 elif len(val) >= 8 :
480 id = None
481 # double-quote
482 if val[0:6] == 'url("#' :
483 id = val[6:val.find('")')]
484 # single-quote
485 elif val[0:6] == "url('#" :
486 id = val[6:val.find("')")]
487 if id != None:
488 if ids.has_key(id) :
489 ids[id][0] += 1
490 ids[id][1].append(node)
491 else:
492 ids[id] = [1,[node]]
494 numIDsRemoved = 0
495 numElemsRemoved = 0
496 numAttrsRemoved = 0
497 numRastersEmbedded = 0
498 numPathSegmentsReduced = 0
499 numCurvesStraightened = 0
500 numBytesSavedInPathData = 0
501 numBytesSavedInColors = 0
502 numPointsRemovedFromPolygon = 0
504 def removeUnusedDefs(doc, defElem, elemsToRemove=None):
505 if elemsToRemove is None:
506 elemsToRemove = []
508 identifiedElements = findElementsWithId(doc.documentElement)
509 referencedIDs = findReferencedElements(doc.documentElement)
511 keepTags = ['font', 'style', 'metadata', 'script', 'title', 'desc']
512 for elem in defElem.childNodes:
513 # only look at it if an element and not referenced anywhere else
514 if elem.nodeType == 1 and (elem.getAttribute('id') == '' or \
515 (not elem.getAttribute('id') in referencedIDs)):
517 # we only inspect the children of a group in a defs if the group
518 # is not referenced anywhere else
519 if elem.nodeName == 'g' and elem.namespaceURI == NS['SVG']:
520 elemsToRemove = removeUnusedDefs(doc, elem, elemsToRemove)
521 # we only remove if it is not one of our tags we always keep (see above)
522 elif not elem.nodeName in keepTags:
523 elemsToRemove.append(elem)
524 return elemsToRemove
526 def removeUnreferencedElements(doc):
527 """
528 Removes all unreferenced elements except for <svg>, <font>, <metadata>, <title>, and <desc>.
529 Also vacuums the defs of any non-referenced renderable elements.
531 Returns the number of unreferenced elements removed from the document.
532 """
533 global numElemsRemoved
534 num = 0
535 removeTags = ['linearGradient', 'radialGradient', 'pattern']
537 identifiedElements = findElementsWithId(doc.documentElement)
538 referencedIDs = findReferencedElements(doc.documentElement)
540 for id in identifiedElements:
541 if not id in referencedIDs:
542 goner = findElementById(doc.documentElement, id)
543 if goner != None and goner.parentNode != None and goner.nodeName in removeTags:
544 goner.parentNode.removeChild(goner)
545 num += 1
546 numElemsRemoved += 1
548 # TODO: should also go through defs and vacuum it
549 num = 0
550 defs = doc.documentElement.getElementsByTagName('defs')
551 for aDef in defs:
552 elemsToRemove = removeUnusedDefs(doc, aDef)
553 for elem in elemsToRemove:
554 elem.parentNode.removeChild(elem)
555 numElemsRemoved += 1
556 num += 1
557 return num
559 def removeUnreferencedIDs(referencedIDs, identifiedElements):
560 """
561 Removes the unreferenced ID attributes.
563 Returns the number of ID attributes removed
564 """
565 global numIDsRemoved
566 keepTags = ['font']
567 num = 0;
568 for id in identifiedElements.keys():
569 node = identifiedElements[id]
570 if referencedIDs.has_key(id) == False and not node.nodeName in keepTags:
571 node.removeAttribute('id')
572 numIDsRemoved += 1
573 num += 1
574 return num
576 def removeNamespacedAttributes(node, namespaces):
577 global numAttrsRemoved
578 num = 0
579 if node.nodeType == 1 :
580 # remove all namespace'd attributes from this element
581 attrList = node.attributes
582 attrsToRemove = []
583 for attrNum in range(attrList.length):
584 attr = attrList.item(attrNum)
585 if attr != None and attr.namespaceURI in namespaces:
586 attrsToRemove.append(attr.nodeName)
587 for attrName in attrsToRemove :
588 num += 1
589 numAttrsRemoved += 1
590 node.removeAttribute(attrName)
592 # now recurse for children
593 for child in node.childNodes:
594 num += removeNamespacedAttributes(child, namespaces)
595 return num
597 def removeNamespacedElements(node, namespaces):
598 global numElemsRemoved
599 num = 0
600 if node.nodeType == 1 :
601 # remove all namespace'd child nodes from this element
602 childList = node.childNodes
603 childrenToRemove = []
604 for child in childList:
605 if child != None and child.namespaceURI in namespaces:
606 childrenToRemove.append(child)
607 for child in childrenToRemove :
608 num += 1
609 numElemsRemoved += 1
610 node.removeChild(child)
612 # now recurse for children
613 for child in node.childNodes:
614 num += removeNamespacedElements(child, namespaces)
615 return num
617 def removeNestedGroups(node):
618 """
619 This walks further and further down the tree, removing groups
620 which do not have any attributes or a title/desc child and
621 promoting their children up one level
622 """
623 global numElemsRemoved
624 num = 0
626 groupsToRemove = []
627 for child in node.childNodes:
628 if child.nodeName == 'g' and child.namespaceURI == NS['SVG'] and len(child.attributes) == 0:
629 # only collapse group if it does not have a title or desc as a direct descendant
630 for grandchild in child.childNodes:
631 if grandchild.nodeType == 1 and grandchild.namespaceURI == NS['SVG'] and \
632 grandchild.nodeName in ['title','desc']:
633 break
634 else:
635 groupsToRemove.append(child)
637 for g in groupsToRemove:
638 while g.childNodes.length > 0:
639 g.parentNode.insertBefore(g.firstChild, g)
640 g.parentNode.removeChild(g)
641 numElemsRemoved += 1
642 num += 1
644 # now recurse for children
645 for child in node.childNodes:
646 if child.nodeType == 1:
647 num += removeNestedGroups(child)
648 return num
650 def moveCommonAttributesToParentGroup(elem):
651 """
652 This recursively calls this function on all children of the passed in element
653 and then iterates over all child elements and removes common inheritable attributes
654 from the children and places them in the parent group. But only if the parent contains
655 nothing but element children and whitespace.
656 """
657 num = 0
659 childElements = []
660 # recurse first into the children (depth-first)
661 for child in elem.childNodes:
662 if child.nodeType == 1:
663 childElements.append(child)
664 num += moveCommonAttributesToParentGroup(child)
665 # else if the parent has non-whitespace text children, do not
666 # try to move common attributes
667 elif child.nodeType == 3 and child.nodeValue.strip():
668 return num
670 # only process the children if there are more than one element
671 if len(childElements) <= 1: return num
673 commonAttrs = {}
674 # add all inheritable properties of the first child element
675 # FIXME: Note there is a chance that the first child is a set/animate in which case
676 # its fill attribute is not what we want to look at, we should look for the first
677 # non-animate/set element
678 attrList = childElements[0].attributes
679 for num in range(attrList.length):
680 attr = attrList.item(num)
681 # this is most of the inheritable properties from http://www.w3.org/TR/SVG11/propidx.html
682 # and http://www.w3.org/TR/SVGTiny12/attributeTable.html
683 if attr.nodeName in ['clip-rule',
684 'display-align',
685 'fill', 'fill-opacity', 'fill-rule',
686 'font', 'font-family', 'font-size', 'font-size-adjust', 'font-stretch',
687 'font-style', 'font-variant', 'font-weight',
688 'letter-spacing',
689 'pointer-events', 'shape-rendering',
690 'stroke', 'stroke-dasharray', 'stroke-dashoffset', 'stroke-linecap', 'stroke-linejoin',
691 'stroke-miterlimit', 'stroke-opacity', 'stroke-width',
692 'text-anchor', 'text-decoration', 'text-rendering', 'visibility',
693 'word-spacing', 'writing-mode']:
694 # we just add all the attributes from the first child
695 commonAttrs[attr.nodeName] = attr.nodeValue
697 # for each subsequent child element
698 for childNum in range(len(childElements)):
699 # skip first child
700 if childNum == 0:
701 continue
703 child = childElements[childNum]
704 # if we are on an animateXXX/set element, ignore it (due to the 'fill' attribute)
705 if child.localName in ['set', 'animate', 'animateColor', 'animateTransform', 'animateMotion']:
706 continue
708 distinctAttrs = []
709 # loop through all current 'common' attributes
710 for name in commonAttrs.keys():
711 # if this child doesn't match that attribute, schedule it for removal
712 if child.getAttribute(name) != commonAttrs[name]:
713 distinctAttrs.append(name)
714 # remove those attributes which are not common
715 for name in distinctAttrs:
716 del commonAttrs[name]
718 # commonAttrs now has all the inheritable attributes which are common among all child elements
719 for name in commonAttrs.keys():
720 for child in childElements:
721 child.removeAttribute(name)
722 elem.setAttribute(name, commonAttrs[name])
724 # update our statistic (we remove N*M attributes and add back in M attributes)
725 num += (len(childElements)-1) * len(commonAttrs)
726 return num
728 def removeUnusedAttributesOnParent(elem):
729 """
730 This recursively calls this function on all children of the element passed in,
731 then removes any unused attributes on this elem if none of the children inherit it
732 """
733 num = 0
735 childElements = []
736 # recurse first into the children (depth-first)
737 for child in elem.childNodes:
738 if child.nodeType == 1:
739 childElements.append(child)
740 num += removeUnusedAttributesOnParent(child)
742 # only process the children if there are more than one element
743 if len(childElements) <= 1: return num
745 # get all attribute values on this parent
746 attrList = elem.attributes
747 unusedAttrs = {}
748 for num in range(attrList.length):
749 attr = attrList.item(num)
750 if attr.nodeName in ['clip-rule',
751 'display-align',
752 'fill', 'fill-opacity', 'fill-rule',
753 'font', 'font-family', 'font-size', 'font-size-adjust', 'font-stretch',
754 'font-style', 'font-variant', 'font-weight',
755 'letter-spacing',
756 'pointer-events', 'shape-rendering',
757 'stroke', 'stroke-dasharray', 'stroke-dashoffset', 'stroke-linecap', 'stroke-linejoin',
758 'stroke-miterlimit', 'stroke-opacity', 'stroke-width',
759 'text-anchor', 'text-decoration', 'text-rendering', 'visibility',
760 'word-spacing', 'writing-mode']:
761 unusedAttrs[attr.nodeName] = attr.nodeValue
763 # for each child, if at least one child inherits the parent's attribute, then remove
764 for childNum in range(len(childElements)):
765 child = childElements[childNum]
766 inheritedAttrs = []
767 for name in unusedAttrs.keys():
768 val = child.getAttribute(name)
769 if val == '' or val == None or val == 'inherit':
770 inheritedAttrs.append(name)
771 for a in inheritedAttrs:
772 del unusedAttrs[a]
774 # unusedAttrs now has all the parent attributes that are unused
775 for name in unusedAttrs.keys():
776 elem.removeAttribute(name)
777 num += 1
779 return num
781 def removeDuplicateGradientStops(doc):
782 global numElemsRemoved
783 num = 0
785 for gradType in ['linearGradient', 'radialGradient']:
786 for grad in doc.getElementsByTagName(gradType):
787 stops = {}
788 stopsToRemove = []
789 for stop in grad.getElementsByTagName('stop'):
790 # convert percentages into a floating point number
791 offsetU = SVGLength(stop.getAttribute('offset'))
792 if offsetU.units == Unit.PCT:
793 offset = offsetU.value / 100.0
794 elif offsetU.units == Unit.NONE:
795 offset = offsetU.value
796 else:
797 offset = 0
798 # set the stop offset value to the integer or floating point equivalent
799 if int(offset) == offset: stop.setAttribute('offset', str(int(offset)))
800 else: stop.setAttribute('offset', str(offset))
802 color = stop.getAttribute('stop-color')
803 opacity = stop.getAttribute('stop-opacity')
804 if stops.has_key(offset) :
805 oldStop = stops[offset]
806 if oldStop[0] == color and oldStop[1] == opacity:
807 stopsToRemove.append(stop)
808 stops[offset] = [color, opacity]
810 for stop in stopsToRemove:
811 stop.parentNode.removeChild(stop)
812 num += 1
813 numElemsRemoved += 1
815 # linear gradients
816 return num
818 def collapseSinglyReferencedGradients(doc):
819 global numElemsRemoved
820 num = 0
822 # make sure to reset the ref'ed ids for when we are running this in testscour
823 for rid,nodeCount in findReferencedElements(doc.documentElement).iteritems():
824 count = nodeCount[0]
825 nodes = nodeCount[1]
826 if count == 1:
827 elem = findElementById(doc.documentElement,rid)
828 if elem != None and elem.nodeType == 1 and elem.nodeName in ['linearGradient', 'radialGradient'] \
829 and elem.namespaceURI == NS['SVG']:
830 # found a gradient that is referenced by only 1 other element
831 refElem = nodes[0]
832 if refElem.nodeType == 1 and refElem.nodeName in ['linearGradient', 'radialGradient'] \
833 and refElem.namespaceURI == NS['SVG']:
834 # elem is a gradient referenced by only one other gradient (refElem)
836 # add the stops to the referencing gradient (this removes them from elem)
837 if len(refElem.getElementsByTagName('stop')) == 0:
838 stopsToAdd = elem.getElementsByTagName('stop')
839 for stop in stopsToAdd:
840 refElem.appendChild(stop)
842 # adopt the gradientUnits, spreadMethod, gradientTransform attributes if
843 # they are unspecified on refElem
844 for attr in ['gradientUnits','spreadMethod','gradientTransform']:
845 if refElem.getAttribute(attr) == '' and not elem.getAttribute(attr) == '':
846 refElem.setAttributeNS(None, attr, elem.getAttribute(attr))
848 # if both are radialGradients, adopt elem's fx,fy,cx,cy,r attributes if
849 # they are unspecified on refElem
850 if elem.nodeName == 'radialGradient' and refElem.nodeName == 'radialGradient':
851 for attr in ['fx','fy','cx','cy','r']:
852 if refElem.getAttribute(attr) == '' and not elem.getAttribute(attr) == '':
853 refElem.setAttributeNS(None, attr, elem.getAttribute(attr))
855 # if both are linearGradients, adopt elem's x1,y1,x2,y2 attributes if
856 # they are unspecified on refElem
857 if elem.nodeName == 'linearGradient' and refElem.nodeName == 'linearGradient':
858 for attr in ['x1','y1','x2','y2']:
859 if refElem.getAttribute(attr) == '' and not elem.getAttribute(attr) == '':
860 refElem.setAttributeNS(None, attr, elem.getAttribute(attr))
862 # now remove the xlink:href from refElem
863 refElem.removeAttributeNS(NS['XLINK'], 'href')
865 # now delete elem
866 elem.parentNode.removeChild(elem)
867 numElemsRemoved += 1
868 num += 1
869 return num
871 def removeDuplicateGradients(doc):
872 global numElemsRemoved
873 num = 0
875 gradientsToRemove = {}
876 duplicateToMaster = {}
878 for gradType in ['linearGradient', 'radialGradient']:
879 grads = doc.getElementsByTagName(gradType)
880 for grad in grads:
881 # TODO: should slice grads from 'grad' here to optimize
882 for ograd in grads:
883 # do not compare gradient to itself
884 if grad == ograd: continue
886 # compare grad to ograd (all properties, then all stops)
887 # if attributes do not match, go to next gradient
888 someGradAttrsDoNotMatch = False
889 for attr in ['gradientUnits','spreadMethod','gradientTransform','x1','y1','x2','y2','cx','cy','fx','fy','r']:
890 if grad.getAttribute(attr) != ograd.getAttribute(attr):
891 someGradAttrsDoNotMatch = True
892 break;
894 if someGradAttrsDoNotMatch: continue
896 # compare xlink:href values too
897 if grad.getAttributeNS(NS['XLINK'], 'href') != ograd.getAttributeNS(NS['XLINK'], 'href'):
898 continue
900 # all gradient properties match, now time to compare stops
901 stops = grad.getElementsByTagName('stop')
902 ostops = ograd.getElementsByTagName('stop')
904 if stops.length != ostops.length: continue
906 # now compare stops
907 stopsNotEqual = False
908 for i in range(stops.length):
909 if stopsNotEqual: break
910 stop = stops.item(i)
911 ostop = ostops.item(i)
912 for attr in ['offset', 'stop-color', 'stop-opacity']:
913 if stop.getAttribute(attr) != ostop.getAttribute(attr):
914 stopsNotEqual = True
915 break
916 if stopsNotEqual: continue
918 # ograd is a duplicate of grad, we schedule it to be removed UNLESS
919 # ograd is ALREADY considered a 'master' element
920 if not gradientsToRemove.has_key(ograd):
921 if not duplicateToMaster.has_key(ograd):
922 if not gradientsToRemove.has_key(grad):
923 gradientsToRemove[grad] = []
924 gradientsToRemove[grad].append( ograd )
925 duplicateToMaster[ograd] = grad
927 # get a collection of all elements that are referenced and their referencing elements
928 referencedIDs = findReferencedElements(doc.documentElement)
929 for masterGrad in gradientsToRemove.keys():
930 master_id = masterGrad.getAttribute('id')
931 # print 'master='+master_id
932 for dupGrad in gradientsToRemove[masterGrad]:
933 # if the duplicate gradient no longer has a parent that means it was
934 # already re-mapped to another master gradient
935 if not dupGrad.parentNode: continue
936 dup_id = dupGrad.getAttribute('id')
937 # print 'dup='+dup_id
938 # print referencedIDs[dup_id]
939 # for each element that referenced the gradient we are going to remove
940 for elem in referencedIDs[dup_id][1]:
941 # find out which attribute referenced the duplicate gradient
942 for attr in ['fill', 'stroke']:
943 v = elem.getAttribute(attr)
944 if v == 'url(#'+dup_id+')' or v == 'url("#'+dup_id+'")' or v == "url('#"+dup_id+"')":
945 elem.setAttribute(attr, 'url(#'+master_id+')')
946 if elem.getAttributeNS(NS['XLINK'], 'href') == '#'+dup_id:
947 elem.setAttributeNS(NS['XLINK'], 'href', '#'+master_id)
949 # now that all referencing elements have been re-mapped to the master
950 # it is safe to remove this gradient from the document
951 dupGrad.parentNode.removeChild(dupGrad)
952 numElemsRemoved += 1
953 num += 1
954 return num
956 def repairStyle(node, options):
957 num = 0
958 if node.nodeType == 1 and len(node.getAttribute('style')) > 0 :
959 # get all style properties and stuff them into a dictionary
960 styleMap = { }
961 rawStyles = node.getAttribute('style').split(';')
962 for style in rawStyles:
963 propval = style.split(':')
964 if len(propval) == 2 :
965 styleMap[propval[0].strip()] = propval[1].strip()
967 # I've seen this enough to know that I need to correct it:
968 # fill: url(#linearGradient4918) rgb(0, 0, 0);
969 for prop in ['fill', 'stroke'] :
970 if styleMap.has_key(prop) :
971 chunk = styleMap[prop].split(') ')
972 if len(chunk) == 2 and (chunk[0][:5] == 'url(#' or chunk[0][:6] == 'url("#' or chunk[0][:6] == "url('#") and chunk[1] == 'rgb(0, 0, 0)' :
973 styleMap[prop] = chunk[0] + ')'
974 num += 1
976 # Here is where we can weed out unnecessary styles like:
977 # opacity:1
978 if styleMap.has_key('opacity') :
979 opacity = float(styleMap['opacity'])
980 # opacity='1.0' is useless, remove it
981 if opacity == 1.0 :
982 del styleMap['opacity']
983 num += 1
985 # if opacity='0' then all fill and stroke properties are useless, remove them
986 elif opacity == 0.0 :
987 for uselessStyle in ['fill', 'fill-opacity', 'fill-rule', 'stroke', 'stroke-linejoin',
988 'stroke-opacity', 'stroke-miterlimit', 'stroke-linecap', 'stroke-dasharray',
989 'stroke-dashoffset', 'stroke-opacity'] :
990 if styleMap.has_key(uselessStyle):
991 del styleMap[uselessStyle]
992 num += 1
994 # if stroke:none, then remove all stroke-related properties (stroke-width, etc)
995 # TODO: should also detect if the computed value of this element is stroke="none"
996 if styleMap.has_key('stroke') and styleMap['stroke'] == 'none' :
997 for strokestyle in [ 'stroke-width', 'stroke-linejoin', 'stroke-miterlimit',
998 'stroke-linecap', 'stroke-dasharray', 'stroke-dashoffset', 'stroke-opacity'] :
999 if styleMap.has_key(strokestyle) :
1000 del styleMap[strokestyle]
1001 num += 1
1002 # TODO: This is actually a problem if a parent element has a specified stroke
1003 # we need to properly calculate computed values
1004 del styleMap['stroke']
1006 # if fill:none, then remove all fill-related properties (fill-rule, etc)
1007 if styleMap.has_key('fill') and styleMap['fill'] == 'none' :
1008 for fillstyle in [ 'fill-rule', 'fill-opacity' ] :
1009 if styleMap.has_key(fillstyle) :
1010 del styleMap[fillstyle]
1011 num += 1
1013 # stop-opacity: 1
1014 if styleMap.has_key('stop-opacity') :
1015 if float(styleMap['stop-opacity']) == 1.0 :
1016 del styleMap['stop-opacity']
1017 num += 1
1019 # fill-opacity: 1 or 0
1020 if styleMap.has_key('fill-opacity') :
1021 fillOpacity = float(styleMap['fill-opacity'])
1022 # TODO: This is actually a problem if the parent element does not have fill-opacity=1
1023 if fillOpacity == 1.0 :
1024 del styleMap['fill-opacity']
1025 num += 1
1026 elif fillOpacity == 0.0 :
1027 for uselessFillStyle in [ 'fill', 'fill-rule' ] :
1028 if styleMap.has_key(uselessFillStyle):
1029 del styleMap[uselessFillStyle]
1030 num += 1
1032 # stroke-opacity: 1 or 0
1033 if styleMap.has_key('stroke-opacity') :
1034 strokeOpacity = float(styleMap['stroke-opacity'])
1035 # TODO: This is actually a problem if the parent element does not have stroke-opacity=1
1036 if strokeOpacity == 1.0 :
1037 del styleMap['stroke-opacity']
1038 num += 1
1039 elif strokeOpacity == 0.0 :
1040 for uselessStrokeStyle in [ 'stroke', 'stroke-width', 'stroke-linejoin', 'stroke-linecap',
1041 'stroke-dasharray', 'stroke-dashoffset' ] :
1042 if styleMap.has_key(uselessStrokeStyle):
1043 del styleMap[uselessStrokeStyle]
1044 num += 1
1046 # stroke-width: 0
1047 if styleMap.has_key('stroke-width') :
1048 strokeWidth = getSVGLength(styleMap['stroke-width'])
1049 if strokeWidth == 0.0 :
1050 for uselessStrokeStyle in [ 'stroke', 'stroke-linejoin', 'stroke-linecap',
1051 'stroke-dasharray', 'stroke-dashoffset', 'stroke-opacity' ] :
1052 if styleMap.has_key(uselessStrokeStyle):
1053 del styleMap[uselessStrokeStyle]
1054 num += 1
1056 # remove font properties for non-text elements
1057 # I've actually observed this in real SVG content
1058 if node.nodeName in ['rect', 'circle', 'ellipse', 'line', 'polyline', 'polygon', 'path']:
1059 for fontstyle in [ 'font-family', 'font-size', 'font-stretch', 'font-size-adjust',
1060 'font-style', 'font-variant', 'font-weight',
1061 'letter-spacing', 'line-height', 'kerning',
1062 'text-anchor', 'text-decoration', 'text-rendering',
1063 'unicode-bidi', 'word-spacing', 'writing-mode'] :
1064 if styleMap.has_key(fontstyle) :
1065 del styleMap[fontstyle]
1066 num += 1
1068 # remove inkscape-specific styles
1069 # TODO: need to get a full list of these
1070 for inkscapeStyle in ['-inkscape-font-specification']:
1071 if styleMap.has_key(inkscapeStyle):
1072 del styleMap[inkscapeStyle]
1073 num += 1
1075 # visibility: visible
1076 if styleMap.has_key('visibility') :
1077 if styleMap['visibility'] == 'visible':
1078 del styleMap['visibility']
1079 num += 1
1081 # display: inline
1082 if styleMap.has_key('display') :
1083 if styleMap['display'] == 'inline':
1084 del styleMap['display']
1085 num += 1
1087 # overflow: visible or overflow specified on element other than svg, marker, pattern
1088 if styleMap.has_key('overflow') :
1089 if styleMap['overflow'] == 'visible' or node.nodeName in ['svg','marker','pattern']:
1090 del styleMap['overflow']
1091 num += 1
1093 # marker: none
1094 if styleMap.has_key('marker') :
1095 if styleMap['marker'] == 'none':
1096 del styleMap['marker']
1097 num += 1
1099 # now if any of the properties match known SVG attributes we prefer attributes
1100 # over style so emit them and remove them from the style map
1101 if options.style_to_xml:
1102 for propName in styleMap.keys() :
1103 if propName in svgAttributes :
1104 node.setAttribute(propName, styleMap[propName])
1105 del styleMap[propName]
1107 # sew our remaining style properties back together into a style attribute
1108 fixedStyle = ''
1109 for prop in styleMap.keys() :
1110 fixedStyle += prop + ':' + styleMap[prop] + ';'
1112 if fixedStyle != '' :
1113 node.setAttribute('style', fixedStyle)
1114 else:
1115 node.removeAttribute('style')
1117 # recurse for our child elements
1118 for child in node.childNodes :
1119 num += repairStyle(child,options)
1121 return num
1123 def removeDefaultAttributeValues(node, options):
1124 num = 0
1125 if node.nodeType != 1: return 0
1127 # gradientUnits: objectBoundingBox
1128 if node.getAttribute('gradientUnits') == 'objectBoundingBox':
1129 node.removeAttribute('gradientUnits')
1130 num += 1
1132 # spreadMethod: pad
1133 if node.getAttribute('spreadMethod') == 'pad':
1134 node.removeAttribute('spreadMethod')
1135 num += 1
1137 # x1: 0%
1138 if node.getAttribute('x1') != '':
1139 x1 = SVGLength(node.getAttribute('x1'))
1140 if x1.value == 0:
1141 node.removeAttribute('x1')
1142 num += 1
1144 # y1: 0%
1145 if node.getAttribute('y1') != '':
1146 y1 = SVGLength(node.getAttribute('y1'))
1147 if y1.value == 0:
1148 node.removeAttribute('y1')
1149 num += 1
1151 # x2: 100%
1152 if node.getAttribute('x2') != '':
1153 x2 = SVGLength(node.getAttribute('x2'))
1154 if (x2.value == 100 and x2.units == Unit.PCT) or (x2.value == 1 and x2.units == Unit.NONE):
1155 node.removeAttribute('x2')
1156 num += 1
1158 # y2: 0%
1159 if node.getAttribute('y2') != '':
1160 y2 = SVGLength(node.getAttribute('y2'))
1161 if y2.value == 0:
1162 node.removeAttribute('y2')
1163 num += 1
1165 # fx: equal to rx
1166 if node.getAttribute('fx') != '':
1167 if node.getAttribute('fx') == node.getAttribute('cx'):
1168 node.removeAttribute('fx')
1169 num += 1
1171 # fy: equal to ry
1172 if node.getAttribute('fy') != '':
1173 if node.getAttribute('fy') == node.getAttribute('cy'):
1174 node.removeAttribute('fy')
1175 num += 1
1177 # cx: 50%
1178 if node.getAttribute('cx') != '':
1179 cx = SVGLength(node.getAttribute('cx'))
1180 if (cx.value == 50 and cx.units == Unit.PCT) or (cx.value == 0.5 and cx.units == Unit.NONE):
1181 node.removeAttribute('cx')
1182 num += 1
1184 # cy: 50%
1185 if node.getAttribute('cy') != '':
1186 cy = SVGLength(node.getAttribute('cy'))
1187 if (cy.value == 50 and cy.units == Unit.PCT) or (cy.value == 0.5 and cy.units == Unit.NONE):
1188 node.removeAttribute('cy')
1189 num += 1
1191 # r: 50%
1192 if node.getAttribute('r') != '':
1193 r = SVGLength(node.getAttribute('r'))
1194 if (r.value == 50 and r.units == Unit.PCT) or (r.value == 0.5 and r.units == Unit.NONE):
1195 node.removeAttribute('r')
1196 num += 1
1198 # recurse for our child elements
1199 for child in node.childNodes :
1200 num += removeDefaultAttributeValues(child,options)
1202 return num
1204 rgb = re.compile("\\s*rgb\\(\\s*(\\d+)\\s*\\,\\s*(\\d+)\\s*\\,\\s*(\\d+)\\s*\\)\\s*")
1205 rgbp = re.compile("\\s*rgb\\(\\s*(\\d*\\.?\\d+)\\%\\s*\\,\\s*(\\d*\\.?\\d+)\\%\\s*\\,\\s*(\\d*\\.?\\d+)\\%\\s*\\)\\s*")
1206 def convertColor(value):
1207 """
1208 Converts the input color string and returns a #RRGGBB (or #RGB if possible) string
1209 """
1210 s = value
1212 if s in colors.keys():
1213 s = colors[s]
1215 rgbpMatch = rgbp.match(s)
1216 if rgbpMatch != None :
1217 r = int(float(rgbpMatch.group(1)) * 255.0 / 100.0)
1218 g = int(float(rgbpMatch.group(2)) * 255.0 / 100.0)
1219 b = int(float(rgbpMatch.group(3)) * 255.0 / 100.0)
1220 s = 'rgb(%d,%d,%d)' % (r,g,b)
1222 rgbMatch = rgb.match(s)
1223 if rgbMatch != None :
1224 r = hex( int( rgbMatch.group(1) ) )[2:].upper()
1225 g = hex( int( rgbMatch.group(2) ) )[2:].upper()
1226 b = hex( int( rgbMatch.group(3) ) )[2:].upper()
1227 if len(r) == 1: r='0'+r
1228 if len(g) == 1: g='0'+g
1229 if len(b) == 1: b='0'+b
1230 s = '#'+r+g+b
1232 if s[0] == '#' and len(s)==7 and s[1]==s[2] and s[3]==s[4] and s[5]==s[6]:
1233 s = s.upper()
1234 s = '#'+s[1]+s[3]+s[5]
1236 return s
1238 def convertColors(element) :
1239 """
1240 Recursively converts all color properties into #RRGGBB format if shorter
1241 """
1242 numBytes = 0
1244 if element.nodeType != 1: return 0
1246 # set up list of color attributes for each element type
1247 attrsToConvert = []
1248 if element.nodeName in ['rect', 'circle', 'ellipse', 'polygon', \
1249 'line', 'polyline', 'path', 'g', 'a']:
1250 attrsToConvert = ['fill', 'stroke']
1251 elif element.nodeName in ['stop']:
1252 attrsToConvert = ['stop-color']
1253 elif element.nodeName in ['solidColor']:
1254 attrsToConvert = ['solid-color']
1256 # now convert all the color formats
1257 for attr in attrsToConvert:
1258 oldColorValue = element.getAttribute(attr)
1259 if oldColorValue != '':
1260 newColorValue = convertColor(oldColorValue)
1261 oldBytes = len(oldColorValue)
1262 newBytes = len(newColorValue)
1263 if oldBytes > newBytes:
1264 element.setAttribute(attr, newColorValue)
1265 numBytes += (oldBytes - len(element.getAttribute(attr)))
1267 # now recurse for our child elements
1268 for child in element.childNodes :
1269 numBytes += convertColors(child)
1271 return numBytes
1273 # TODO: go over what this method does and see if there is a way to optimize it
1274 # TODO: go over the performance of this method and see if I can save memory/speed by
1275 # reusing data structures, etc
1276 def cleanPath(element) :
1277 """
1278 Cleans the path string (d attribute) of the element
1279 """
1280 global numBytesSavedInPathData
1281 global numPathSegmentsReduced
1282 global numCurvesStraightened
1284 # this gets the parser object from svg_regex.py
1285 oldPathStr = element.getAttribute('d')
1286 pathObj = svg_parser.parse(oldPathStr)
1288 # however, this parser object has some ugliness in it (lists of tuples of tuples of
1289 # numbers and booleans). we just need a list of (cmd,[numbers]):
1290 path = []
1291 for (cmd,dataset) in pathObj:
1292 if cmd in ['M','m','L','l','T','t']:
1293 # one or more tuples, each containing two numbers
1294 nums = []
1295 for t in dataset:
1296 # convert to a Decimal
1297 nums.append(Decimal(str(t[0])) * Decimal(1))
1298 nums.append(Decimal(str(t[1])) * Decimal(1))
1300 # only create this segment if it is not empty
1301 if nums:
1302 path.append( (cmd, nums) )
1304 elif cmd in ['V','v','H','h']:
1305 # one or more numbers
1306 nums = []
1307 for n in dataset:
1308 nums.append(Decimal(str(n)))
1309 if nums:
1310 path.append( (cmd, nums) )
1312 elif cmd in ['C','c']:
1313 # one or more tuples, each containing three tuples of two numbers each
1314 nums = []
1315 for t in dataset:
1316 for pair in t:
1317 nums.append(Decimal(str(pair[0])) * Decimal(1))
1318 nums.append(Decimal(str(pair[1])) * Decimal(1))
1319 path.append( (cmd, nums) )
1321 elif cmd in ['S','s','Q','q']:
1322 # one or more tuples, each containing two tuples of two numbers each
1323 nums = []
1324 for t in dataset:
1325 for pair in t:
1326 nums.append(Decimal(str(pair[0])) * Decimal(1))
1327 nums.append(Decimal(str(pair[1])) * Decimal(1))
1328 path.append( (cmd, nums) )
1330 elif cmd in ['A','a']:
1331 # one or more tuples, each containing a tuple of two numbers, a number, a boolean,
1332 # another boolean, and a tuple of two numbers
1333 nums = []
1334 for t in dataset:
1335 nums.append( Decimal(str(t[0][0])) * Decimal(1) )
1336 nums.append( Decimal(str(t[0][1])) * Decimal(1) )
1337 nums.append( Decimal(str(t[1])) * Decimal(1))
1339 if t[2]: nums.append( Decimal(1) )
1340 else: nums.append( Decimal(0) )
1342 if t[3]: nums.append( Decimal(1) )
1343 else: nums.append( Decimal(0) )
1345 nums.append( Decimal(str(t[4][0])) * Decimal(1) )
1346 nums.append( Decimal(str(t[4][1])) * Decimal(1) )
1347 path.append( (cmd, nums) )
1349 elif cmd in ['Z','z']:
1350 path.append( (cmd, []) )
1352 # calculate the starting x,y coord for the second path command
1353 if len(path[0][1]) == 2:
1354 (x,y) = path[0][1]
1355 else:
1356 # we have a move and then 1 or more coords for lines
1357 N = len(path[0][1])
1358 if path[0][0] == 'M':
1359 # take the last pair of coordinates for the starting point
1360 x = path[0][1][N-2]
1361 y = path[0][1][N-1]
1362 else: # relative move, accumulate coordinates for the starting point
1363 (x,y) = path[0][1][0],path[0][1][1]
1364 n = 2
1365 while n < N:
1366 x += path[0][1][n]
1367 y += path[0][1][n+1]
1368 n += 2
1370 # now we have the starting point at x,y so let's save it
1371 (startx,starty) = (x,y)
1373 # convert absolute coordinates into relative ones (start with the second subcommand
1374 # and leave the first M as absolute)
1375 newPath = [path[0]]
1376 for (cmd,data) in path[1:]:
1377 i = 0
1378 newCmd = cmd
1379 newData = data
1380 # adjust abs to rel
1381 # only the A command has some values that we don't want to adjust (radii, rotation, flags)
1382 if cmd == 'A':
1383 newCmd = 'a'
1384 newData = []
1385 while i < len(data):
1386 newData.append(data[i])
1387 newData.append(data[i+1])
1388 newData.append(data[i+2])
1389 newData.append(data[i+3])
1390 newData.append(data[i+4])
1391 newData.append(data[i+5]-x)
1392 newData.append(data[i+6]-y)
1393 x = data[i+5]
1394 y = data[i+6]
1395 i += 7
1396 elif cmd == 'a':
1397 while i < len(data):
1398 x += data[i+5]
1399 y += data[i+6]
1400 i += 7
1401 elif cmd == 'H':
1402 newCmd = 'h'
1403 newData = []
1404 while i < len(data):
1405 newData.append(data[i]-x)
1406 x = data[i]
1407 i += 1
1408 elif cmd == 'h':
1409 while i < len(data):
1410 x += data[i]
1411 i += 1
1412 elif cmd == 'V':
1413 newCmd = 'v'
1414 newData = []
1415 while i < len(data):
1416 newData.append(data[i] - y)
1417 y = data[i]
1418 i += 1
1419 elif cmd == 'v':
1420 while i < len(data):
1421 y += data[i]
1422 i += 1
1423 elif cmd in ['M']:
1424 newCmd = cmd.lower()
1425 newData = []
1426 startx = data[0]
1427 starty = data[1]
1428 while i < len(data):
1429 newData.append( data[i] - x )
1430 newData.append( data[i+1] - y )
1431 x = data[i]
1432 y = data[i+1]
1433 i += 2
1434 elif cmd in ['L','T']:
1435 newCmd = cmd.lower()
1436 newData = []
1437 while i < len(data):
1438 newData.append( data[i] - x )
1439 newData.append( data[i+1] - y )
1440 x = data[i]
1441 y = data[i+1]
1442 i += 2
1443 elif cmd in ['m']:
1444 startx += data[0]
1445 starty += data[1]
1446 while i < len(data):
1447 x += data[i]
1448 y += data[i+1]
1449 i += 2
1450 elif cmd in ['l','t']:
1451 while i < len(data):
1452 x += data[i]
1453 y += data[i+1]
1454 i += 2
1455 elif cmd in ['S','Q']:
1456 newCmd = cmd.lower()
1457 newData = []
1458 while i < len(data):
1459 newData.append( data[i] - x )
1460 newData.append( data[i+1] - y )
1461 newData.append( data[i+2] - x )
1462 newData.append( data[i+3] - y )
1463 x = data[i+2]
1464 y = data[i+3]
1465 i += 4
1466 elif cmd in ['s','q']:
1467 while i < len(data):
1468 x += data[i+2]
1469 y += data[i+3]
1470 i += 4
1471 elif cmd == 'C':
1472 newCmd = 'c'
1473 newData = []
1474 while i < len(data):
1475 newData.append( data[i] - x )
1476 newData.append( data[i+1] - y )
1477 newData.append( data[i+2] - x )
1478 newData.append( data[i+3] - y )
1479 newData.append( data[i+4] - x )
1480 newData.append( data[i+5] - y )
1481 x = data[i+4]
1482 y = data[i+5]
1483 i += 6
1484 elif cmd == 'c':
1485 while i < len(data):
1486 x += data[i+4]
1487 y += data[i+5]
1488 i += 6
1489 elif cmd in ['z','Z']:
1490 x = startx
1491 y = starty
1492 newCmd = 'z'
1493 newPath.append( (newCmd, newData) )
1494 path = newPath
1496 # remove empty segments
1497 newPath = [path[0]]
1498 for (cmd,data) in path[1:]:
1499 if cmd in ['m','l','t']:
1500 newData = []
1501 i = 0
1502 while i < len(data):
1503 if data[i] != 0 or data[i+1] != 0:
1504 newData.append(data[i])
1505 newData.append(data[i+1])
1506 else:
1507 numPathSegmentsReduced += 1
1508 i += 2
1509 if newData:
1510 newPath.append( (cmd,newData) )
1511 elif cmd == 'c':
1512 newData = []
1513 i = 0
1514 while i < len(data):
1515 if data[i+4] != 0 or data[i+5] != 0:
1516 newData.append(data[i])
1517 newData.append(data[i+1])
1518 newData.append(data[i+2])
1519 newData.append(data[i+3])
1520 newData.append(data[i+4])
1521 newData.append(data[i+5])
1522 else:
1523 numPathSegmentsReduced += 1
1524 i += 6
1525 if newData:
1526 newPath.append( (cmd,newData) )
1527 elif cmd == 'a':
1528 newData = []
1529 i = 0
1530 while i < len(data):
1531 if data[i+5] != 0 or data[i+6] != 0:
1532 newData.append(data[i])
1533 newData.append(data[i+1])
1534 newData.append(data[i+2])
1535 newData.append(data[i+3])
1536 newData.append(data[i+4])
1537 newData.append(data[i+5])
1538 newData.append(data[i+6])
1539 else:
1540 numPathSegmentsReduced += 1
1541 i += 7
1542 if newData:
1543 newPath.append( (cmd,newData) )
1544 elif cmd == 'q':
1545 newData = []
1546 i = 0
1547 while i < len(data):
1548 if data[i+2] != 0 or data[i+3] != 0:
1549 newData.append(data[i])
1550 newData.append(data[i+1])
1551 newData.append(data[i+2])
1552 newData.append(data[i+3])
1553 else:
1554 numPathSegmentsReduced += 1
1555 i += 4
1556 if newData:
1557 newPath.append( (cmd,newData) )
1558 elif cmd in ['h','v']:
1559 newData = []
1560 i = 0
1561 while i < len(data):
1562 if data[i] != 0:
1563 newData.append(data[i])
1564 else:
1565 numPathSegmentsReduced += 1
1566 i += 1
1567 if newData:
1568 newPath.append( (cmd,newData) )
1569 else:
1570 newPath.append( (cmd,data) )
1571 path = newPath
1573 # convert straight curves into lines
1574 newPath = [path[0]]
1575 for (cmd,data) in path[1:]:
1576 i = 0
1577 newData = data
1578 if cmd == 'c':
1579 newData = []
1580 while i < len(data):
1581 # since all commands are now relative, we can think of previous point as (0,0)
1582 # and new point (dx,dy) is (data[i+4],data[i+5])
1583 # eqn of line will be y = (dy/dx)*x or if dx=0 then eqn of line is x=0
1584 (p1x,p1y) = (data[i],data[i+1])
1585 (p2x,p2y) = (data[i+2],data[i+3])
1586 dx = data[i+4]
1587 dy = data[i+5]
1589 foundStraightCurve = False
1591 if dx == 0:
1592 if p1x == 0 and p2x == 0:
1593 foundStraightCurve = True
1594 else:
1595 m = dy/dx
1596 if p1y == m*p1x and p2y == m*p2y:
1597 foundStraightCurve = True
1599 if foundStraightCurve:
1600 # flush any existing curve coords first
1601 if newData:
1602 newPath.append( (cmd,newData) )
1603 newData = []
1604 # now create a straight line segment
1605 newPath.append( ('l', [dx,dy]) )
1606 numCurvesStraightened += 1
1607 else:
1608 newData.append(data[i])
1609 newData.append(data[i+1])
1610 newData.append(data[i+2])
1611 newData.append(data[i+3])
1612 newData.append(data[i+4])
1613 newData.append(data[i+5])
1615 i += 6
1616 if newData or cmd == 'z' or cmd == 'Z':
1617 newPath.append( (cmd,newData) )
1618 path = newPath
1620 # collapse all consecutive commands of the same type into one command
1621 prevCmd = ''
1622 prevData = []
1623 newPath = [path[0]]
1624 for (cmd,data) in path[1:]:
1625 # flush the previous command if it is not the same type as the current command
1626 if prevCmd != '':
1627 if cmd != prevCmd or cmd == 'm':
1628 newPath.append( (prevCmd, prevData) )
1629 prevCmd = ''
1630 prevData = []
1632 # if the previous and current commands are the same type, collapse
1633 # but only if they are not move commands (since move can contain implicit lineto commands)
1634 if cmd == prevCmd and cmd != 'm':
1635 for coord in data:
1636 prevData.append(coord)
1638 # save last command and data
1639 else:
1640 prevCmd = cmd
1641 prevData = data
1642 # flush last command and data
1643 if prevCmd != '':
1644 newPath.append( (prevCmd, prevData) )
1645 path = newPath
1647 # convert to shorthand path segments where possible
1648 newPath = [path[0]]
1649 for (cmd,data) in path[1:]:
1650 # convert line segments into h,v where possible
1651 if cmd == 'l':
1652 i = 0
1653 lineTuples = []
1654 while i < len(data):
1655 if data[i] == 0:
1656 # vertical
1657 if lineTuples:
1658 # flush the existing line command
1659 newPath.append( ('l', lineTuples) )
1660 lineTuples = []
1661 # append the v and then the remaining line coords
1662 newPath.append( ('v', [data[i+1]]) )
1663 numPathSegmentsReduced += 1
1664 elif data[i+1] == 0:
1665 if lineTuples:
1666 # flush the line command, then append the h and then the remaining line coords
1667 newPath.append( ('l', lineTuples) )
1668 lineTuples = []
1669 newPath.append( ('h', [data[i]]) )
1670 numPathSegmentsReduced += 1
1671 else:
1672 lineTuples.append(data[i])
1673 lineTuples.append(data[i+1])
1674 i += 2
1675 if lineTuples:
1676 newPath.append( ('l', lineTuples) )
1677 # convert Bézier curve segments into s where possible
1678 elif cmd == 'c':
1679 bez_ctl_pt = (0,0)
1680 i = 0
1681 curveTuples = []
1682 while i < len(data):
1683 # rotate by 180deg means negate both coordinates
1684 # if the previous control point is equal then we can substitute a
1685 # shorthand bezier command
1686 if bez_ctl_pt[0] == data[i] and bez_ctl_pt[1] == data[i+1]:
1687 if curveTuples:
1688 newPath.append( ('c', curveTuples) )
1689 curveTuples = []
1690 # append the s command
1691 newPath.append( ('s', [data[i+2], data[i+3], data[i+4], data[i+5]]) )
1692 numPathSegmentsReduced += 1
1693 else:
1694 j = 0
1695 while j <= 5:
1696 curveTuples.append(data[i+j])
1697 j += 1
1699 # set up control point for next curve segment
1700 bez_ctl_pt = (data[i+4]-data[i+2], data[i+5]-data[i+3])
1701 i += 6
1703 if curveTuples:
1704 newPath.append( ('c', curveTuples) )
1705 # convert quadratic curve segments into t where possible
1706 elif cmd == 'q':
1707 quad_ctl_pt = (0,0)
1708 i = 0
1709 curveTuples = []
1710 while i < len(data):
1711 if quad_ctl_pt[0] == data[i] and quad_ctl_pt[1] == data[i+1]:
1712 if curveTuples:
1713 newPath.append( ('q', curveTuples) )
1714 curveTuples = []
1715 # append the t command
1716 newPath.append( ('t', [data[i+2], data[i+3]]) )
1717 numPathSegmentsReduced += 1
1718 else:
1719 j = 0;
1720 while j <= 3:
1721 curveTuples.append(data[i+j])
1722 j += 1
1724 quad_ctl_pt = (data[i+2]-data[i], data[i+3]-data[i+1])
1725 i += 4
1727 if curveTuples:
1728 newPath.append( ('q', curveTuples) )
1729 else:
1730 newPath.append( (cmd, data) )
1731 path = newPath
1733 # for each h or v, collapse unnecessary coordinates that run in the same direction
1734 # i.e. "h-100-100" becomes "h-200" but "h300-100" does not change
1735 newPath = [path[0]]
1736 for (cmd,data) in path[1:]:
1737 if cmd in ['h','v'] and len(data) > 1:
1738 newData = []
1739 prevCoord = data[0]
1740 for coord in data[1:]:
1741 if isSameSign(prevCoord, coord):
1742 prevCoord += coord
1743 numPathSegmentsReduced += 1
1744 else:
1745 newData.append(prevCoord)
1746 prevCoord = coord
1747 newData.append(prevCoord)
1748 newPath.append( (cmd, newData) )
1749 else:
1750 newPath.append( (cmd, data) )
1751 path = newPath
1753 # it is possible that we have consecutive h, v, c, t commands now
1754 # so again collapse all consecutive commands of the same type into one command
1755 prevCmd = ''
1756 prevData = []
1757 newPath = [path[0]]
1758 for (cmd,data) in path[1:]:
1759 # flush the previous command if it is not the same type as the current command
1760 if prevCmd != '':
1761 if cmd != prevCmd or cmd == 'm':
1762 newPath.append( (prevCmd, prevData) )
1763 prevCmd = ''
1764 prevData = []
1766 # if the previous and current commands are the same type, collapse
1767 if cmd == prevCmd and cmd != 'm':
1768 for coord in data:
1769 prevData.append(coord)
1771 # save last command and data
1772 else:
1773 prevCmd = cmd
1774 prevData = data
1775 # flush last command and data
1776 if prevCmd != '':
1777 newPath.append( (prevCmd, prevData) )
1778 path = newPath
1780 newPathStr = serializePath(path)
1781 numBytesSavedInPathData += ( len(oldPathStr) - len(newPathStr) )
1782 element.setAttribute('d', newPathStr)
1784 def parseListOfPoints(s):
1785 """
1786 Parse string into a list of points.
1788 Returns a list of containing an even number of coordinate strings
1789 """
1790 i = 0
1791 points = []
1793 # (wsp)? comma-or-wsp-separated coordinate pairs (wsp)?
1794 # coordinate-pair = coordinate comma-or-wsp coordinate
1795 # coordinate = sign? integer
1796 # comma-wsp: (wsp+ comma? wsp*) | (comma wsp*)
1797 ws_nums = re.split("\\s*\\,?\\s*", s.strip())
1798 nums = []
1800 # also, if 100-100 is found, split it into two also
1801 # <polygon points="100,-100,100-100,100-100-100,-100-100" />
1802 for i in range(len(ws_nums)):
1803 negcoords = re.split("\\-", ws_nums[i]);
1805 # this string didn't have any negative coordinates
1806 if len(negcoords) == 1:
1807 nums.append(negcoords[0])
1808 # we got negative coords
1809 else:
1810 for j in range(len(negcoords)):
1811 # first number could be positive
1812 if j == 0:
1813 if negcoords[0] != '':
1814 nums.append(negcoords[0])
1815 # otherwise all other strings will be negative
1816 else:
1817 # unless we accidentally split a number that was in scientific notation
1818 # and had a negative exponent (500.00e-1)
1819 prev = nums[len(nums)-1]
1820 if prev[len(prev)-1] == 'e' or prev[len(prev)-1] == 'E':
1821 nums[len(nums)-1] = prev + '-' + negcoords[j]
1822 else:
1823 nums.append( '-'+negcoords[j] )
1825 # now resolve into SVGLength values
1826 i = 0
1827 while i < len(nums):
1828 x = SVGLength(nums[i])
1829 # if we had an odd number of points, return empty
1830 if i == len(nums)-1: return []
1831 else: y = SVGLength(nums[i+1])
1833 # if the coordinates were not unitless, return empty
1834 if x.units != Unit.NONE or y.units != Unit.NONE: return []
1835 points.append( str(x.value) )
1836 points.append( str(y.value) )
1837 i += 2
1839 return points
1841 def cleanPolygon(elem):
1842 """
1843 Remove unnecessary closing point of polygon points attribute
1844 """
1845 global numPointsRemovedFromPolygon
1847 pts = parseListOfPoints(elem.getAttribute('points'))
1848 N = len(pts)/2
1849 if N >= 2:
1850 (startx,starty) = (pts[0],pts[0])
1851 (endx,endy) = (pts[len(pts)-2],pts[len(pts)-1])
1852 if startx == endx and starty == endy:
1853 pts = pts[:-2]
1854 numPointsRemovedFromPolygon += 1
1855 elem.setAttribute('points', scourCoordinates(pts,True))
1857 def cleanPolyline(elem):
1858 """
1859 Scour the polyline points attribute
1860 """
1861 pts = parseListOfPoints(elem.getAttribute('points'))
1862 elem.setAttribute('points', scourCoordinates(pts,True))
1864 def serializePath(pathObj):
1865 """
1866 Reserializes the path data with some cleanups.
1867 """
1868 pathStr = ""
1869 for (cmd,data) in pathObj:
1870 pathStr += cmd
1871 # elliptical arc commands must have comma/wsp separating the coordinates
1872 # this fixes an issue outlined in Fix https://bugs.launchpad.net/scour/+bug/412754
1873 pathStr += scourCoordinates(data, (cmd == 'a'))
1874 return pathStr
1876 def scourCoordinates(data, forceCommaWsp = False):
1877 """
1878 Serializes coordinate data with some cleanups:
1879 - removes all trailing zeros after the decimal
1880 - integerize coordinates if possible
1881 - removes extraneous whitespace
1882 - adds commas between values in a subcommand if required (or if forceCommaWsp is True)
1883 """
1884 coordsStr = ""
1885 if data != None:
1886 c = 0
1887 for coord in data:
1888 # add the scoured coordinate to the path string
1889 coordsStr += scourLength(coord)
1891 # only need the comma if the next number is non-negative or if forceCommaWsp is True
1892 if c < len(data)-1 and (forceCommaWsp or Decimal(data[c+1]) >= 0):
1893 coordsStr += ','
1894 c += 1
1895 return coordsStr
1897 def scourLength(str):
1898 length = SVGLength(str)
1899 coord = length.value
1901 # reduce to the proper number of digits
1902 coord = Decimal(unicode(coord)) * Decimal(1)
1904 # integerize if we can
1905 if int(coord) == coord: coord = Decimal(unicode(int(coord)))
1907 # Decimal.trim() is available in Python 2.6+ to trim trailing zeros
1908 try:
1909 coord = coord.trim()
1910 except AttributeError:
1911 # trim it ourselves
1912 s = unicode(coord)
1913 dec = s.find('.')
1914 if dec != -1:
1915 while s[-1] == '0':
1916 s = s[:-1]
1917 coord = Decimal(s)
1919 # Decimal.normalize() will uses scientific notation - if that
1920 # string is smaller, then use it
1921 normd = coord.normalize()
1922 if len(unicode(normd)) < len(unicode(coord)):
1923 coord = normd
1925 return unicode(coord)+Unit.str(length.units)
1927 def embedRasters(element, options) :
1928 """
1929 Converts raster references to inline images.
1930 NOTE: there are size limits to base64-encoding handling in browsers
1931 """
1932 global numRastersEmbedded
1934 href = element.getAttributeNS(NS['XLINK'],'href')
1936 # if xlink:href is set, then grab the id
1937 if href != '' and len(href) > 1:
1938 # find if href value has filename ext
1939 ext = os.path.splitext(os.path.basename(href))[1].lower()[1:]
1941 # look for 'png', 'jpg', and 'gif' extensions
1942 if ext == 'png' or ext == 'jpg' or ext == 'gif':
1944 # check if href resolves to an existing file
1945 if os.path.isfile(href) == False :
1946 if href[:7] != 'http://' and os.path.isfile(href) == False :
1947 # if this is not an absolute path, set path relative
1948 # to script file based on input arg
1949 infilename = '.'
1950 if options.infilename: infilename = options.infilename
1951 href = os.path.join(os.path.dirname(infilename), href)
1953 rasterdata = ''
1954 # test if file exists locally
1955 if os.path.isfile(href) == True :
1956 # open raster file as raw binary
1957 raster = open( href, "rb")
1958 rasterdata = raster.read()
1960 elif href[:7] == 'http://':
1961 # raster = open( href, "rb")
1962 webFile = urllib.urlopen( href )
1963 rasterdata = webFile.read()
1964 webFile.close()
1966 # ... should we remove all images which don't resolve?
1967 if rasterdata != '' :
1968 # base64-encode raster
1969 b64eRaster = base64.b64encode( rasterdata )
1971 # set href attribute to base64-encoded equivalent
1972 if b64eRaster != '':
1973 # PNG and GIF both have MIME Type 'image/[ext]', but
1974 # JPEG has MIME Type 'image/jpeg'
1975 if ext == 'jpg':
1976 ext = 'jpeg'
1978 element.setAttributeNS(NS['XLINK'], 'href', 'data:image/' + ext + ';base64,' + b64eRaster)
1979 numRastersEmbedded += 1
1980 del b64eRaster
1982 def properlySizeDoc(docElement):
1983 # get doc width and height
1984 w = SVGLength(docElement.getAttribute('width'))
1985 h = SVGLength(docElement.getAttribute('height'))
1987 # if width/height are not unitless or px then it is not ok to rewrite them into a viewBox
1988 if ((w.units != Unit.NONE and w.units != Unit.PX) or
1989 (w.units != Unit.NONE and w.units != Unit.PX)):
1990 return
1992 # else we have a statically sized image and we should try to remedy that
1994 # parse viewBox attribute
1995 vbSep = re.split("\\s*\\,?\\s*", docElement.getAttribute('viewBox'), 3)
1996 # if we have a valid viewBox we need to check it
1997 vbWidth,vbHeight = 0,0
1998 if len(vbSep) == 4:
1999 try:
2000 # if x or y are specified and non-zero then it is not ok to overwrite it
2001 vbX = float(vbSep[0])
2002 vbY = float(vbSep[1])
2003 if vbX != 0 or vbY != 0:
2004 return
2006 # if width or height are not equal to doc width/height then it is not ok to overwrite it
2007 vbWidth = float(vbSep[2])
2008 vbHeight = float(vbSep[3])
2009 if vbWidth != w.value or vbHeight != h.value:
2010 return
2011 # if the viewBox did not parse properly it is invalid and ok to overwrite it
2012 except ValueError:
2013 pass
2015 # at this point it's safe to set the viewBox and remove width/height
2016 docElement.setAttribute('viewBox', '0 0 %s %s' % (w.value, h.value))
2017 docElement.removeAttribute('width')
2018 docElement.removeAttribute('height')
2020 def remapNamespacePrefix(node, oldprefix, newprefix):
2021 if node == None or node.nodeType != 1: return
2023 if node.prefix == oldprefix:
2024 localName = node.localName
2025 namespace = node.namespaceURI
2026 doc = node.ownerDocument
2027 parent = node.parentNode
2029 # create a replacement node
2030 newNode = None
2031 if newprefix != '':
2032 newNode = doc.createElementNS(namespace, newprefix+":"+localName)
2033 else:
2034 newNode = doc.createElement(localName);
2036 # add all the attributes
2037 attrList = node.attributes
2038 for i in range(attrList.length):
2039 attr = attrList.item(i)
2040 newNode.setAttributeNS( attr.namespaceURI, attr.localName, attr.nodeValue)
2042 # clone and add all the child nodes
2043 for child in node.childNodes:
2044 newNode.appendChild(child.cloneNode(True))
2046 # replace old node with new node
2047 parent.replaceChild( newNode, node )
2048 # set the node to the new node in the remapped namespace prefix
2049 node = newNode
2051 # now do all child nodes
2052 for child in node.childNodes :
2053 remapNamespacePrefix(child, oldprefix, newprefix)
2055 def makeWellFormed(str):
2056 xml_ents = { '<':'<', '>':'>', '&':'&', "'":''', '"':'"'}
2058 # starr = []
2059 # for c in str:
2060 # if c in xml_ents:
2061 # starr.append(xml_ents[c])
2062 # else:
2063 # starr.append(c)
2065 # this list comprehension is short-form for the above for-loop:
2066 return ''.join([xml_ents[c] if c in xml_ents else c for c in str])
2068 # hand-rolled serialization function that has the following benefits:
2069 # - pretty printing
2070 # - somewhat judicious use of whitespace
2071 # - ensure id attributes are first
2072 def serializeXML(element, options, ind = 0, preserveWhitespace = False):
2073 indent = ind
2074 I=''
2075 if options.indent_type == 'tab': I='\t'
2076 elif options.indent_type == 'space': I=' '
2078 outString = (I * ind) + '<' + element.nodeName
2080 # always serialize the id or xml:id attributes first
2081 if element.getAttribute('id') != '':
2082 id = element.getAttribute('id')
2083 quot = '"'
2084 if id.find('"') != -1:
2085 quot = "'"
2086 outString += ' ' + 'id=' + quot + id + quot
2087 if element.getAttribute('xml:id') != '':
2088 id = element.getAttribute('xml:id')
2089 quot = '"'
2090 if id.find('"') != -1:
2091 quot = "'"
2092 outString += ' ' + 'xml:id=' + quot + id + quot
2094 # now serialize the other attributes
2095 attrList = element.attributes
2096 for num in range(attrList.length) :
2097 attr = attrList.item(num)
2098 if attr.nodeName == 'id' or attr.nodeName == 'xml:id': continue
2099 # if the attribute value contains a double-quote, use single-quotes
2100 quot = '"'
2101 if attr.nodeValue.find('"') != -1:
2102 quot = "'"
2104 attrValue = makeWellFormed( attr.nodeValue )
2106 outString += ' '
2107 # preserve xmlns: if it is a namespace prefix declaration
2108 if attr.prefix != None:
2109 outString += attr.prefix + ':'
2110 elif attr.namespaceURI != None:
2111 if attr.namespaceURI == 'http://www.w3.org/2000/xmlns/' and attr.nodeName.find('xmlns') == -1:
2112 outString += 'xmlns:'
2113 elif attr.namespaceURI == 'http://www.w3.org/1999/xlink':
2114 outString += 'xlink:'
2115 outString += attr.localName + '=' + quot + attrValue + quot
2117 if attr.nodeName == 'xml:space':
2118 if attrValue == 'preserve':
2119 preserveWhitespace = True
2120 elif attrValue == 'default':
2121 preserveWhitespace = False
2123 # if no children, self-close
2124 children = element.childNodes
2125 if children.length > 0:
2126 outString += '>'
2128 onNewLine = False
2129 for child in element.childNodes:
2130 # element node
2131 if child.nodeType == 1:
2132 if preserveWhitespace:
2133 outString += serializeXML(child, options, 0, preserveWhitespace)
2134 else:
2135 outString += os.linesep + serializeXML(child, options, indent + 1, preserveWhitespace)
2136 onNewLine = True
2137 # text node
2138 elif child.nodeType == 3:
2139 # trim it only in the case of not being a child of an element
2140 # where whitespace might be important
2141 if preserveWhitespace:
2142 outString += makeWellFormed(child.nodeValue)
2143 else:
2144 outString += makeWellFormed(child.nodeValue.strip())
2145 # CDATA node
2146 elif child.nodeType == 4:
2147 outString += '<![CDATA[' + child.nodeValue + ']]>'
2148 # Comment node
2149 elif child.nodeType == 8:
2150 outString += '<!--' + child.nodeValue + '-->'
2151 # TODO: entities, processing instructions, what else?
2152 else: # ignore the rest
2153 pass
2155 if onNewLine: outString += (I * ind)
2156 outString += '</' + element.nodeName + '>'
2157 if indent > 0: outString += os.linesep
2158 else:
2159 outString += '/>'
2160 if indent > 0: outString += os.linesep
2162 return outString
2164 # this is the main method
2165 # input is a string representation of the input XML
2166 # returns a string representation of the output XML
2167 def scourString(in_string, options=None):
2168 if options is None:
2169 options = _options_parser.get_default_values()
2170 getcontext().prec = options.digits
2171 global numAttrsRemoved
2172 global numStylePropsFixed
2173 global numElemsRemoved
2174 global numBytesSavedInColors
2175 doc = xml.dom.minidom.parseString(in_string)
2177 # for whatever reason this does not always remove all inkscape/sodipodi attributes/elements
2178 # on the first pass, so we do it multiple times
2179 # does it have to do with removal of children affecting the childlist?
2180 if options.keep_editor_data == False:
2181 while removeNamespacedElements( doc.documentElement, unwanted_ns ) > 0 :
2182 pass
2183 while removeNamespacedAttributes( doc.documentElement, unwanted_ns ) > 0 :
2184 pass
2186 # remove the xmlns: declarations now
2187 xmlnsDeclsToRemove = []
2188 attrList = doc.documentElement.attributes
2189 for num in range(attrList.length) :
2190 if attrList.item(num).nodeValue in unwanted_ns :
2191 xmlnsDeclsToRemove.append(attrList.item(num).nodeName)
2193 for attr in xmlnsDeclsToRemove :
2194 doc.documentElement.removeAttribute(attr)
2195 numAttrsRemoved += 1
2197 # ensure namespace for SVG is declared
2198 # TODO: what if the default namespace is something else (i.e. some valid namespace)?
2199 if doc.documentElement.getAttribute('xmlns') != 'http://www.w3.org/2000/svg':
2200 doc.documentElement.setAttribute('xmlns', 'http://www.w3.org/2000/svg')
2201 # TODO: throw error or warning?
2203 # check for redundant SVG namespace declaration
2204 attrList = doc.documentElement.attributes
2205 xmlnsDeclsToRemove = []
2206 redundantPrefixes = []
2207 for i in range(attrList.length):
2208 attr = attrList.item(i)
2209 name = attr.nodeName
2210 val = attr.nodeValue
2211 if name[0:6] == 'xmlns:' and val == 'http://www.w3.org/2000/svg':
2212 redundantPrefixes.append(name[6:])
2213 xmlnsDeclsToRemove.append(name)
2215 for attrName in xmlnsDeclsToRemove:
2216 doc.documentElement.removeAttribute(attrName)
2218 for prefix in redundantPrefixes:
2219 remapNamespacePrefix(doc.documentElement, prefix, '')
2221 # repair style (remove unnecessary style properties and change them into XML attributes)
2222 numStylePropsFixed = repairStyle(doc.documentElement, options)
2224 # convert colors to #RRGGBB format
2225 if options.simple_colors:
2226 numBytesSavedInColors = convertColors(doc.documentElement)
2228 # remove empty defs, metadata, g
2229 # NOTE: these elements will be removed even if they have (invalid) text nodes
2230 elemsToRemove = []
2231 for tag in ['defs', 'metadata', 'g'] :
2232 for elem in doc.documentElement.getElementsByTagName(tag) :
2233 removeElem = not elem.hasChildNodes()
2234 if removeElem == False :
2235 for child in elem.childNodes :
2236 if child.nodeType in [1, 3, 4, 8] :
2237 break
2238 else:
2239 removeElem = True
2240 if removeElem :
2241 elem.parentNode.removeChild(elem)
2242 numElemsRemoved += 1
2244 # remove unreferenced gradients/patterns outside of defs
2245 while removeUnreferencedElements(doc) > 0:
2246 pass
2248 if options.strip_ids:
2249 bContinueLooping = True
2250 while bContinueLooping:
2251 identifiedElements = findElementsWithId(doc.documentElement)
2252 referencedIDs = findReferencedElements(doc.documentElement)
2253 bContinueLooping = (removeUnreferencedIDs(referencedIDs, identifiedElements) > 0)
2255 if options.group_collapse:
2256 while removeNestedGroups(doc.documentElement) > 0:
2257 pass
2259 while removeDuplicateGradientStops(doc) > 0:
2260 pass
2262 # remove gradients that are only referenced by one other gradient
2263 while collapseSinglyReferencedGradients(doc) > 0:
2264 pass
2266 # remove duplicate gradients
2267 while removeDuplicateGradients(doc) > 0:
2268 pass
2270 # move common attributes to parent group
2271 numAttrsRemoved += moveCommonAttributesToParentGroup(doc.documentElement)
2273 # remove unused attributes from parent
2274 numAttrsRemoved += removeUnusedAttributesOnParent(doc.documentElement)
2276 # clean path data
2277 for elem in doc.documentElement.getElementsByTagName('path') :
2278 if elem.getAttribute('d') == '':
2279 elem.parentNode.removeChild(elem)
2280 else:
2281 cleanPath(elem)
2283 # remove unnecessary closing point of polygons and scour points
2284 for polygon in doc.documentElement.getElementsByTagName('polygon') :
2285 cleanPolygon(polygon)
2287 # scour points of polyline
2288 for polyline in doc.documentElement.getElementsByTagName('polyline') :
2289 cleanPolygon(polyline)
2291 # scour lengths (including coordinates)
2292 for type in ['svg', 'image', 'rect', 'circle', 'ellipse', 'line', 'linearGradient', 'radialGradient', 'stop']:
2293 for elem in doc.getElementsByTagName(type):
2294 for attr in ['x', 'y', 'width', 'height', 'cx', 'cy', 'r', 'rx', 'ry',
2295 'x1', 'y1', 'x2', 'y2', 'fx', 'fy', 'offset', 'opacity',
2296 'fill-opacity', 'stroke-opacity', 'stroke-width', 'stroke-miterlimit']:
2297 if elem.getAttribute(attr) != '':
2298 elem.setAttribute(attr, scourLength(elem.getAttribute(attr)))
2300 # remove default values of attributes
2301 numAttrsRemoved += removeDefaultAttributeValues(doc.documentElement, options)
2303 # convert rasters references to base64-encoded strings
2304 if options.embed_rasters:
2305 for elem in doc.documentElement.getElementsByTagName('image') :
2306 embedRasters(elem, options)
2308 # properly size the SVG document (ideally width/height should be 100% with a viewBox)
2309 if options.enable_viewboxing:
2310 properlySizeDoc(doc.documentElement)
2312 # output the document as a pretty string with a single space for indent
2313 # NOTE: removed pretty printing because of this problem:
2314 # http://ronrothman.com/public/leftbraned/xml-dom-minidom-toprettyxml-and-silly-whitespace/
2315 # rolled our own serialize function here to save on space, put id first, customize indentation, etc
2316 # out_string = doc.documentElement.toprettyxml(' ')
2317 out_string = serializeXML(doc.documentElement, options) + os.linesep
2319 # now strip out empty lines
2320 lines = []
2321 # Get rid of empty lines
2322 for line in out_string.splitlines(True):
2323 if line.strip():
2324 lines.append(line)
2326 # return the string with its XML prolog and surrounding comments
2327 if options.strip_xml_prolog == False:
2328 total_output = '<?xml version="1.0" encoding="UTF-8" standalone="no"?>' + os.linesep
2329 else:
2330 total_output = ""
2332 for child in doc.childNodes:
2333 if child.nodeType == 1:
2334 total_output += "".join(lines)
2335 else: # doctypes, entities, comments
2336 total_output += child.toxml() + os.linesep
2338 return total_output
2340 # used mostly by unit tests
2341 # input is a filename
2342 # returns the minidom doc representation of the SVG
2343 def scourXmlFile(filename, options=None):
2344 in_string = open(filename).read()
2345 out_string = scourString(in_string, options)
2346 return xml.dom.minidom.parseString(out_string.encode('utf-8'))
2348 # GZ: Seems most other commandline tools don't do this, is it really wanted?
2349 class HeaderedFormatter(optparse.IndentedHelpFormatter):
2350 """
2351 Show application name, version number, and copyright statement
2352 above usage information.
2353 """
2354 def format_usage(self, usage):
2355 return "%s %s\n%s\n%s" % (APP, VER, COPYRIGHT,
2356 optparse.IndentedHelpFormatter.format_usage(self, usage))
2358 # GZ: would prefer this to be in a function or class scope, but tests etc need
2359 # access to the defaults anyway
2360 _options_parser = optparse.OptionParser(
2361 usage="%prog [-i input.svg] [-o output.svg] [OPTIONS]",
2362 description=("If the input/output files are specified with a svgz"
2363 " extension, then compressed SVG is assumed. If the input file is not"
2364 " specified, stdin is used. If the output file is not specified, "
2365 " stdout is used."),
2366 formatter=HeaderedFormatter(max_help_position=30),
2367 version=VER)
2369 _options_parser.add_option("--disable-simplify-colors",
2370 action="store_false", dest="simple_colors", default=True,
2371 help="won't convert all colors to #RRGGBB format")
2372 _options_parser.add_option("--disable-style-to-xml",
2373 action="store_false", dest="style_to_xml", default=True,
2374 help="won't convert styles into XML attributes")
2375 _options_parser.add_option("--disable-group-collapsing",
2376 action="store_false", dest="group_collapse", default=True,
2377 help="won't collapse <g> elements")
2378 _options_parser.add_option("--enable-id-stripping",
2379 action="store_true", dest="strip_ids", default=False,
2380 help="remove all un-referenced ID attributes")
2381 _options_parser.add_option("--disable-embed-rasters",
2382 action="store_false", dest="embed_rasters", default=True,
2383 help="won't embed rasters as base64-encoded data")
2384 _options_parser.add_option("--keep-editor-data",
2385 action="store_true", dest="keep_editor_data", default=False,
2386 help="won't remove Inkscape, Sodipodi or Adobe Illustrator elements and attributes")
2387 _options_parser.add_option("--strip-xml-prolog",
2388 action="store_true", dest="strip_xml_prolog", default=False,
2389 help="won't output the <?xml ?> prolog")
2390 _options_parser.add_option("--enable-viewboxing",
2391 action="store_true", dest="enable_viewboxing", default=False,
2392 help="changes document width/height to 100%/100% and creates viewbox coordinates")
2394 # GZ: this is confusing, most people will be thinking in terms of
2395 # decimal places, which is not what decimal precision is doing
2396 _options_parser.add_option("-p", "--set-precision",
2397 action="store", type=int, dest="digits", default=5,
2398 help="set number of significant digits (default: %default)")
2399 _options_parser.add_option("-i",
2400 action="store", dest="infilename", help=optparse.SUPPRESS_HELP)
2401 _options_parser.add_option("-o",
2402 action="store", dest="outfilename", help=optparse.SUPPRESS_HELP)
2403 _options_parser.add_option("--indent",
2404 action="store", type="string", dest="indent_type", default="space",
2405 help="indentation of the output: none, space, tab (default: %default)")
2407 def maybe_gziped_file(filename, mode="r"):
2408 if os.path.splitext(filename)[1].lower() in (".svgz", ".gz"):
2409 return gzip.GzipFile(filename, mode)
2410 return file(filename, mode)
2412 def parse_args(args=None):
2413 options, rargs = _options_parser.parse_args(args)
2415 if rargs:
2416 _options_parser.error("Additional arguments not handled: %r, see --help" % rargs)
2417 if options.digits < 0:
2418 _options_parser.error("Can't have negative significant digits, see --help")
2419 if not options.indent_type in ["tab", "space", "none"]:
2420 _options_parser.error("Invalid value for --indent, see --help")
2421 if options.infilename and options.outfilename and options.infilename == options.outfilename:
2422 _options_parser.error("Input filename is the same as output filename")
2424 if options.infilename:
2425 infile = maybe_gziped_file(options.infilename)
2426 # GZ: could catch a raised IOError here and report
2427 else:
2428 # GZ: could sniff for gzip compression here
2429 infile = sys.stdin
2430 if options.outfilename:
2431 outfile = maybe_gziped_file(options.outfilename, "w")
2432 else:
2433 outfile = sys.stdout
2435 return options, [infile, outfile]
2437 def getReport():
2438 return ' Number of elements removed: ' + str(numElemsRemoved) + os.linesep + \
2439 ' Number of attributes removed: ' + str(numAttrsRemoved) + os.linesep + \
2440 ' Number of unreferenced id attributes removed: ' + str(numIDsRemoved) + os.linesep + \
2441 ' Number of style properties fixed: ' + str(numStylePropsFixed) + os.linesep + \
2442 ' Number of raster images embedded inline: ' + str(numRastersEmbedded) + os.linesep + \
2443 ' Number of path segments reduced/removed: ' + str(numPathSegmentsReduced) + os.linesep + \
2444 ' Number of bytes saved in path data: ' + str(numBytesSavedInPathData) + os.linesep + \
2445 ' Number of bytes saved in colors: ' + str(numBytesSavedInColors) + os.linesep + \
2446 ' Number of points removed from polygons: ' + str(numPointsRemovedFromPolygon)
2448 if __name__ == '__main__':
2449 if sys.platform == "win32":
2450 from time import clock as get_tick
2451 else:
2452 # GZ: is this different from time.time() in any way?
2453 def get_tick():
2454 return os.times()[0]
2456 start = get_tick()
2458 options, (input, output) = parse_args()
2460 print >>sys.stderr, "%s %s\n%s" % (APP, VER, COPYRIGHT)
2462 # do the work
2463 in_string = input.read()
2464 out_string = scourString(in_string, options).encode("UTF-8")
2465 output.write(out_string)
2467 # Close input and output files
2468 input.close()
2469 output.close()
2471 end = get_tick()
2473 # GZ: unless silenced by -q or something?
2474 # GZ: not using globals would be good too
2475 print >>sys.stderr, ' File:', input.name, \
2476 os.linesep + ' Time taken:', str(end-start) + 's' + os.linesep, \
2477 getReport()
2479 oldsize = len(in_string)
2480 newsize = len(out_string)
2481 sizediff = (newsize / oldsize) * 100
2482 print >>sys.stderr, ' Original file size:', oldsize, 'bytes;', \
2483 'new file size:', newsize, 'bytes (' + str(sizediff)[:5] + '%)'