1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
4 # Scour
5 #
6 # Copyright 2009 Jeff Schiller
7 #
8 # This file is part of Scour, http://www.codedread.com/scour/
9 #
10 # Licensed under the Apache License, Version 2.0 (the "License");
11 # you may not use this file except in compliance with the License.
12 # You may obtain a copy of the License at
13 #
14 # http://www.apache.org/licenses/LICENSE-2.0
15 #
16 # Unless required by applicable law or agreed to in writing, software
17 # distributed under the License is distributed on an "AS IS" BASIS,
18 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
19 # See the License for the specific language governing permissions and
20 # limitations under the License.
22 # Notes:
24 # rubys' path-crunching ideas here: http://intertwingly.net/code/svgtidy/spec.rb
25 # (and implemented here: http://intertwingly.net/code/svgtidy/svgtidy.rb )
27 # Yet more ideas here: http://wiki.inkscape.org/wiki/index.php/Save_Cleaned_SVG
28 #
29 # * Process Transformations
30 # * Collapse all group based transformations
32 # Even more ideas here: http://esw.w3.org/topic/SvgTidy
33 # * analysis of path elements to see if rect can be used instead? (must also need to look
34 # at rounded corners)
36 # Next Up:
37 # + remove unused attributes in parent elements
38 # + prevent elements from being stripped if they are referenced in a <style> element
39 # (for instance, filter, marker, pattern) - need a crude CSS parser
40 # - add an option to remove ids if they match the Inkscape-style of IDs
41 # - investigate point-reducing algorithms
42 # - parse transform attribute
43 # - if a <g> has only one element in it, collapse the <g> (ensure transform, etc are carried down)
44 # - option to remove metadata
46 # necessary to get true division
47 from __future__ import division
49 import os
50 import sys
51 import xml.dom.minidom
52 import re
53 import math
54 import base64
55 import urllib
56 from svg_regex import svg_parser
57 import gzip
58 import optparse
59 from yocto_css import parseCssString
61 # Python 2.3- did not have Decimal
62 try:
63 from decimal import *
64 except ImportError:
65 from fixedpoint import *
66 Decimal = FixedPoint
68 APP = 'scour'
69 VER = '0.20'
70 COPYRIGHT = 'Copyright Jeff Schiller, 2009'
72 NS = { 'SVG': 'http://www.w3.org/2000/svg',
73 'XLINK': 'http://www.w3.org/1999/xlink',
74 'SODIPODI': 'http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd',
75 'INKSCAPE': 'http://www.inkscape.org/namespaces/inkscape',
76 'ADOBE_ILLUSTRATOR': 'http://ns.adobe.com/AdobeIllustrator/10.0/',
77 'ADOBE_GRAPHS': 'http://ns.adobe.com/Graphs/1.0/',
78 'ADOBE_SVG_VIEWER': 'http://ns.adobe.com/AdobeSVGViewerExtensions/3.0/',
79 'ADOBE_VARIABLES': 'http://ns.adobe.com/Variables/1.0/',
80 'ADOBE_SFW': 'http://ns.adobe.com/SaveForWeb/1.0/',
81 'ADOBE_EXTENSIBILITY': 'http://ns.adobe.com/Extensibility/1.0/',
82 'ADOBE_FLOWS': 'http://ns.adobe.com/Flows/1.0/',
83 'ADOBE_IMAGE_REPLACEMENT': 'http://ns.adobe.com/ImageReplacement/1.0/',
84 'ADOBE_CUSTOM': 'http://ns.adobe.com/GenericCustomNamespace/1.0/',
85 'ADOBE_XPATH': 'http://ns.adobe.com/XPath/1.0/'
86 }
88 unwanted_ns = [ NS['SODIPODI'], NS['INKSCAPE'], NS['ADOBE_ILLUSTRATOR'],
89 NS['ADOBE_GRAPHS'], NS['ADOBE_SVG_VIEWER'], NS['ADOBE_VARIABLES'],
90 NS['ADOBE_SFW'], NS['ADOBE_EXTENSIBILITY'], NS['ADOBE_FLOWS'],
91 NS['ADOBE_IMAGE_REPLACEMENT'], NS['ADOBE_CUSTOM'], NS['ADOBE_XPATH'] ]
93 svgAttributes = [
94 'clip-rule',
95 'display',
96 'fill',
97 'fill-opacity',
98 'fill-rule',
99 'filter',
100 'font-family',
101 'font-size',
102 'font-stretch',
103 'font-style',
104 'font-variant',
105 'font-weight',
106 'line-height',
107 'marker',
108 'opacity',
109 'overflow',
110 'stop-color',
111 'stop-opacity',
112 'stroke',
113 'stroke-dashoffset',
114 'stroke-linecap',
115 'stroke-linejoin',
116 'stroke-miterlimit',
117 'stroke-opacity',
118 'stroke-width',
119 'visibility'
120 ]
122 colors = {
123 'aliceblue': 'rgb(240, 248, 255)',
124 'antiquewhite': 'rgb(250, 235, 215)',
125 'aqua': 'rgb( 0, 255, 255)',
126 'aquamarine': 'rgb(127, 255, 212)',
127 'azure': 'rgb(240, 255, 255)',
128 'beige': 'rgb(245, 245, 220)',
129 'bisque': 'rgb(255, 228, 196)',
130 'black': 'rgb( 0, 0, 0)',
131 'blanchedalmond': 'rgb(255, 235, 205)',
132 'blue': 'rgb( 0, 0, 255)',
133 'blueviolet': 'rgb(138, 43, 226)',
134 'brown': 'rgb(165, 42, 42)',
135 'burlywood': 'rgb(222, 184, 135)',
136 'cadetblue': 'rgb( 95, 158, 160)',
137 'chartreuse': 'rgb(127, 255, 0)',
138 'chocolate': 'rgb(210, 105, 30)',
139 'coral': 'rgb(255, 127, 80)',
140 'cornflowerblue': 'rgb(100, 149, 237)',
141 'cornsilk': 'rgb(255, 248, 220)',
142 'crimson': 'rgb(220, 20, 60)',
143 'cyan': 'rgb( 0, 255, 255)',
144 'darkblue': 'rgb( 0, 0, 139)',
145 'darkcyan': 'rgb( 0, 139, 139)',
146 'darkgoldenrod': 'rgb(184, 134, 11)',
147 'darkgray': 'rgb(169, 169, 169)',
148 'darkgreen': 'rgb( 0, 100, 0)',
149 'darkgrey': 'rgb(169, 169, 169)',
150 'darkkhaki': 'rgb(189, 183, 107)',
151 'darkmagenta': 'rgb(139, 0, 139)',
152 'darkolivegreen': 'rgb( 85, 107, 47)',
153 'darkorange': 'rgb(255, 140, 0)',
154 'darkorchid': 'rgb(153, 50, 204)',
155 'darkred': 'rgb(139, 0, 0)',
156 'darksalmon': 'rgb(233, 150, 122)',
157 'darkseagreen': 'rgb(143, 188, 143)',
158 'darkslateblue': 'rgb( 72, 61, 139)',
159 'darkslategray': 'rgb( 47, 79, 79)',
160 'darkslategrey': 'rgb( 47, 79, 79)',
161 'darkturquoise': 'rgb( 0, 206, 209)',
162 'darkviolet': 'rgb(148, 0, 211)',
163 'deeppink': 'rgb(255, 20, 147)',
164 'deepskyblue': 'rgb( 0, 191, 255)',
165 'dimgray': 'rgb(105, 105, 105)',
166 'dimgrey': 'rgb(105, 105, 105)',
167 'dodgerblue': 'rgb( 30, 144, 255)',
168 'firebrick': 'rgb(178, 34, 34)',
169 'floralwhite': 'rgb(255, 250, 240)',
170 'forestgreen': 'rgb( 34, 139, 34)',
171 'fuchsia': 'rgb(255, 0, 255)',
172 'gainsboro': 'rgb(220, 220, 220)',
173 'ghostwhite': 'rgb(248, 248, 255)',
174 'gold': 'rgb(255, 215, 0)',
175 'goldenrod': 'rgb(218, 165, 32)',
176 'gray': 'rgb(128, 128, 128)',
177 'grey': 'rgb(128, 128, 128)',
178 'green': 'rgb( 0, 128, 0)',
179 'greenyellow': 'rgb(173, 255, 47)',
180 'honeydew': 'rgb(240, 255, 240)',
181 'hotpink': 'rgb(255, 105, 180)',
182 'indianred': 'rgb(205, 92, 92)',
183 'indigo': 'rgb( 75, 0, 130)',
184 'ivory': 'rgb(255, 255, 240)',
185 'khaki': 'rgb(240, 230, 140)',
186 'lavender': 'rgb(230, 230, 250)',
187 'lavenderblush': 'rgb(255, 240, 245)',
188 'lawngreen': 'rgb(124, 252, 0)',
189 'lemonchiffon': 'rgb(255, 250, 205)',
190 'lightblue': 'rgb(173, 216, 230)',
191 'lightcoral': 'rgb(240, 128, 128)',
192 'lightcyan': 'rgb(224, 255, 255)',
193 'lightgoldenrodyellow': 'rgb(250, 250, 210)',
194 'lightgray': 'rgb(211, 211, 211)',
195 'lightgreen': 'rgb(144, 238, 144)',
196 'lightgrey': 'rgb(211, 211, 211)',
197 'lightpink': 'rgb(255, 182, 193)',
198 'lightsalmon': 'rgb(255, 160, 122)',
199 'lightseagreen': 'rgb( 32, 178, 170)',
200 'lightskyblue': 'rgb(135, 206, 250)',
201 'lightslategray': 'rgb(119, 136, 153)',
202 'lightslategrey': 'rgb(119, 136, 153)',
203 'lightsteelblue': 'rgb(176, 196, 222)',
204 'lightyellow': 'rgb(255, 255, 224)',
205 'lime': 'rgb( 0, 255, 0)',
206 'limegreen': 'rgb( 50, 205, 50)',
207 'linen': 'rgb(250, 240, 230)',
208 'magenta': 'rgb(255, 0, 255)',
209 'maroon': 'rgb(128, 0, 0)',
210 'mediumaquamarine': 'rgb(102, 205, 170)',
211 'mediumblue': 'rgb( 0, 0, 205)',
212 'mediumorchid': 'rgb(186, 85, 211)',
213 'mediumpurple': 'rgb(147, 112, 219)',
214 'mediumseagreen': 'rgb( 60, 179, 113)',
215 'mediumslateblue': 'rgb(123, 104, 238)',
216 'mediumspringgreen': 'rgb( 0, 250, 154)',
217 'mediumturquoise': 'rgb( 72, 209, 204)',
218 'mediumvioletred': 'rgb(199, 21, 133)',
219 'midnightblue': 'rgb( 25, 25, 112)',
220 'mintcream': 'rgb(245, 255, 250)',
221 'mistyrose': 'rgb(255, 228, 225)',
222 'moccasin': 'rgb(255, 228, 181)',
223 'navajowhite': 'rgb(255, 222, 173)',
224 'navy': 'rgb( 0, 0, 128)',
225 'oldlace': 'rgb(253, 245, 230)',
226 'olive': 'rgb(128, 128, 0)',
227 'olivedrab': 'rgb(107, 142, 35)',
228 'orange': 'rgb(255, 165, 0)',
229 'orangered': 'rgb(255, 69, 0)',
230 'orchid': 'rgb(218, 112, 214)',
231 'palegoldenrod': 'rgb(238, 232, 170)',
232 'palegreen': 'rgb(152, 251, 152)',
233 'paleturquoise': 'rgb(175, 238, 238)',
234 'palevioletred': 'rgb(219, 112, 147)',
235 'papayawhip': 'rgb(255, 239, 213)',
236 'peachpuff': 'rgb(255, 218, 185)',
237 'peru': 'rgb(205, 133, 63)',
238 'pink': 'rgb(255, 192, 203)',
239 'plum': 'rgb(221, 160, 221)',
240 'powderblue': 'rgb(176, 224, 230)',
241 'purple': 'rgb(128, 0, 128)',
242 'red': 'rgb(255, 0, 0)',
243 'rosybrown': 'rgb(188, 143, 143)',
244 'royalblue': 'rgb( 65, 105, 225)',
245 'saddlebrown': 'rgb(139, 69, 19)',
246 'salmon': 'rgb(250, 128, 114)',
247 'sandybrown': 'rgb(244, 164, 96)',
248 'seagreen': 'rgb( 46, 139, 87)',
249 'seashell': 'rgb(255, 245, 238)',
250 'sienna': 'rgb(160, 82, 45)',
251 'silver': 'rgb(192, 192, 192)',
252 'skyblue': 'rgb(135, 206, 235)',
253 'slateblue': 'rgb(106, 90, 205)',
254 'slategray': 'rgb(112, 128, 144)',
255 'slategrey': 'rgb(112, 128, 144)',
256 'snow': 'rgb(255, 250, 250)',
257 'springgreen': 'rgb( 0, 255, 127)',
258 'steelblue': 'rgb( 70, 130, 180)',
259 'tan': 'rgb(210, 180, 140)',
260 'teal': 'rgb( 0, 128, 128)',
261 'thistle': 'rgb(216, 191, 216)',
262 'tomato': 'rgb(255, 99, 71)',
263 'turquoise': 'rgb( 64, 224, 208)',
264 'violet': 'rgb(238, 130, 238)',
265 'wheat': 'rgb(245, 222, 179)',
266 'white': 'rgb(255, 255, 255)',
267 'whitesmoke': 'rgb(245, 245, 245)',
268 'yellow': 'rgb(255, 255, 0)',
269 'yellowgreen': 'rgb(154, 205, 50)',
270 }
272 def isSameSign(a,b): return (a <= 0 and b <= 0) or (a >= 0 and b >= 0)
274 coord = re.compile("\\-?\\d+\\.?\\d*")
275 scinumber = re.compile("[\\-\\+]?(\\d*\\.?)?\\d+[eE][\\-\\+]?\\d+")
276 number = re.compile("[\\-\\+]?(\\d*\\.?)?\\d+")
277 sciExponent = re.compile("[eE]([\\-\\+]?\\d+)")
278 unit = re.compile("(em|ex|px|pt|pc|cm|mm|in|\\%){1,1}$")
280 class Unit(object):
281 INVALID = -1
282 NONE = 0
283 PCT = 1
284 PX = 2
285 PT = 3
286 PC = 4
287 EM = 5
288 EX = 6
289 CM = 7
290 MM = 8
291 IN = 9
293 # @staticmethod
294 def get(str):
295 # GZ: shadowing builtins like 'str' is generally bad form
296 # GZ: encoding stuff like this in a dict makes for nicer code
297 if str == None or str == '': return Unit.NONE
298 elif str == '%': return Unit.PCT
299 elif str == 'px': return Unit.PX
300 elif str == 'pt': return Unit.PT
301 elif str == 'pc': return Unit.PC
302 elif str == 'em': return Unit.EM
303 elif str == 'ex': return Unit.EX
304 elif str == 'cm': return Unit.CM
305 elif str == 'mm': return Unit.MM
306 elif str == 'in': return Unit.IN
307 return Unit.INVALID
309 # @staticmethod
310 def str(u):
311 if u == Unit.NONE: return ''
312 elif u == Unit.PCT: return '%'
313 elif u == Unit.PX: return 'px'
314 elif u == Unit.PT: return 'pt'
315 elif u == Unit.PC: return 'pc'
316 elif u == Unit.EM: return 'em'
317 elif u == Unit.EX: return 'ex'
318 elif u == Unit.CM: return 'cm'
319 elif u == Unit.MM: return 'mm'
320 elif u == Unit.IN: return 'in'
321 return 'INVALID'
323 get = staticmethod(get)
324 str = staticmethod(str)
326 class SVGLength(object):
327 def __init__(self, str):
328 try: # simple unitless and no scientific notation
329 self.value = float(str)
330 if int(self.value) == self.value:
331 self.value = int(self.value)
332 self.units = Unit.NONE
333 except ValueError:
334 # we know that the length string has an exponent, a unit, both or is invalid
336 # parse out number, exponent and unit
337 self.value = 0
338 unitBegin = 0
339 scinum = scinumber.match(str)
340 if scinum != None:
341 # this will always match, no need to check it
342 numMatch = number.match(str)
343 expMatch = sciExponent.search(str, numMatch.start(0))
344 self.value = (float(numMatch.group(0)) *
345 10 ** float(expMatch.group(1)))
346 unitBegin = expMatch.end(1)
347 else:
348 # unit or invalid
349 numMatch = number.match(str)
350 if numMatch != None:
351 self.value = float(numMatch.group(0))
352 unitBegin = numMatch.end(0)
354 if int(self.value) == self.value:
355 self.value = int(self.value)
357 if unitBegin != 0 :
358 unitMatch = unit.search(str, unitBegin)
359 if unitMatch != None :
360 self.units = Unit.get(unitMatch.group(0))
362 # invalid
363 else:
364 # TODO: this needs to set the default for the given attribute (how?)
365 self.value = 0
366 self.units = Unit.INVALID
368 # returns the length of a property
369 # TODO: eventually use the above class once it is complete
370 def getSVGLength(value):
371 try:
372 v = float(value)
373 except ValueError:
374 coordMatch = coord.match(value)
375 if coordMatch != None:
376 unitMatch = unit.search(value, coordMatch.start(0))
377 v = value
378 return v
380 def findElementById(node, id):
381 if node == None or node.nodeType != 1: return None
382 if node.getAttribute('id') == id: return node
383 for child in node.childNodes :
384 e = findElementById(child,id)
385 if e != None: return e
386 return None
388 def findElementsWithId(node, elems=None):
389 """
390 Returns all elements with id attributes
391 """
392 if elems is None:
393 elems = {}
394 id = node.getAttribute('id')
395 if id != '' :
396 elems[id] = node
397 if node.hasChildNodes() :
398 for child in node.childNodes:
399 # from http://www.w3.org/TR/DOM-Level-2-Core/idl-definitions.html
400 # we are only really interested in nodes of type Element (1)
401 if child.nodeType == 1 :
402 findElementsWithId(child, elems)
403 return elems
405 referencingProps = ['fill', 'stroke', 'filter', 'clip-path', 'mask', 'marker-start',
406 'marker-end', 'marker-mid']
408 def findReferencedElements(node, ids=None):
409 """
410 Returns the number of times an ID is referenced as well as all elements
411 that reference it.
413 Currently looks at fill, stroke, clip-path, mask, marker, and
414 xlink:href attributes.
415 """
416 global referencingProps
417 if ids is None:
418 ids = {}
419 # TODO: input argument ids is clunky here (see below how it is called)
420 # GZ: alternative to passing dict, use **kwargs
422 # if this node is a style element, parse its text into CSS
423 if node.nodeName == 'style' and node.namespaceURI == NS['SVG']:
424 # node.firstChild will be either a CDATA or a Text node
425 cssRules = parseCssString(node.firstChild.nodeValue)
426 for rule in cssRules:
427 for propname in rule['properties']:
428 propval = rule['properties'][propname]
429 findReferencingProperty(node, propname, propval, ids)
431 return ids
433 # else if xlink:href is set, then grab the id
434 href = node.getAttributeNS(NS['XLINK'],'href')
435 if href != '' and len(href) > 1 and href[0] == '#':
436 # we remove the hash mark from the beginning of the id
437 id = href[1:]
438 if id in ids:
439 ids[id][0] += 1
440 ids[id][1].append(node)
441 else:
442 ids[id] = [1,[node]]
444 # now get all style properties and the fill, stroke, filter attributes
445 styles = node.getAttribute('style').split(';')
446 for attr in referencingProps:
447 styles.append(':'.join([attr, node.getAttribute(attr)]))
449 for style in styles:
450 propval = style.split(':')
451 if len(propval) == 2 :
452 prop = propval[0].strip()
453 val = propval[1].strip()
454 findReferencingProperty(node, prop, val, ids)
456 if node.hasChildNodes() :
457 for child in node.childNodes:
458 if child.nodeType == 1 :
459 findReferencedElements(child, ids)
460 return ids
462 def findReferencingProperty(node, prop, val, ids):
463 global referencingProps
464 if prop in referencingProps and val != '' :
465 if len(val) >= 7 and val[0:5] == 'url(#' :
466 id = val[5:val.find(')')]
467 if ids.has_key(id) :
468 ids[id][0] += 1
469 ids[id][1].append(node)
470 else:
471 ids[id] = [1,[node]]
472 # if the url has a quote in it, we need to compensate
473 elif len(val) >= 8 :
474 id = None
475 # double-quote
476 if val[0:6] == 'url("#' :
477 id = val[6:val.find('")')]
478 # single-quote
479 elif val[0:6] == "url('#" :
480 id = val[6:val.find("')")]
481 if id != None:
482 if ids.has_key(id) :
483 ids[id][0] += 1
484 ids[id][1].append(node)
485 else:
486 ids[id] = [1,[node]]
488 numIDsRemoved = 0
489 numElemsRemoved = 0
490 numAttrsRemoved = 0
491 numRastersEmbedded = 0
492 numPathSegmentsReduced = 0
493 numCurvesStraightened = 0
494 numBytesSavedInPathData = 0
495 numBytesSavedInColors = 0
496 numPointsRemovedFromPolygon = 0
498 def removeUnusedDefs(doc, defElem, elemsToRemove=None):
499 if elemsToRemove is None:
500 elemsToRemove = []
502 identifiedElements = findElementsWithId(doc.documentElement)
503 referencedIDs = findReferencedElements(doc.documentElement)
505 keepTags = ['font', 'style', 'metadata', 'script', 'title', 'desc']
506 for elem in defElem.childNodes:
507 if elem.nodeName == 'g' and elem.namespaceURI == NS['SVG']:
508 elemsToRemove = removeUnusedDefs(doc, elem, elemsToRemove)
509 continue
510 if elem.nodeType == 1 and (elem.getAttribute('id') == '' or \
511 (not elem.getAttribute('id') in referencedIDs)) and \
512 not elem.nodeName in keepTags:
513 elemsToRemove.append(elem)
514 return elemsToRemove
516 def removeUnreferencedElements(doc):
517 """
518 Removes all unreferenced elements except for <svg>, <font>, <metadata>, <title>, and <desc>.
519 Also vacuums the defs of any non-referenced renderable elements.
521 Returns the number of unreferenced elements removed from the document.
522 """
523 global numElemsRemoved
524 num = 0
525 removeTags = ['linearGradient', 'radialGradient', 'pattern']
527 identifiedElements = findElementsWithId(doc.documentElement)
528 referencedIDs = findReferencedElements(doc.documentElement)
530 for id in identifiedElements:
531 if not id in referencedIDs:
532 goner = findElementById(doc.documentElement, id)
533 if goner != None and goner.parentNode != None and goner.nodeName in removeTags:
534 goner.parentNode.removeChild(goner)
535 num += 1
536 numElemsRemoved += 1
538 # TODO: should also go through defs and vacuum it
539 num = 0
540 defs = doc.documentElement.getElementsByTagName('defs')
541 for aDef in defs:
542 elemsToRemove = removeUnusedDefs(doc, aDef)
543 for elem in elemsToRemove:
544 elem.parentNode.removeChild(elem)
545 numElemsRemoved += 1
546 num += 1
547 return num
549 def removeUnreferencedIDs(referencedIDs, identifiedElements):
550 """
551 Removes the unreferenced ID attributes.
553 Returns the number of ID attributes removed
554 """
555 global numIDsRemoved
556 keepTags = ['font']
557 num = 0;
558 for id in identifiedElements.keys():
559 node = identifiedElements[id]
560 if referencedIDs.has_key(id) == False and not node.nodeName in keepTags:
561 node.removeAttribute('id')
562 numIDsRemoved += 1
563 num += 1
564 return num
566 def removeNamespacedAttributes(node, namespaces):
567 global numAttrsRemoved
568 num = 0
569 if node.nodeType == 1 :
570 # remove all namespace'd attributes from this element
571 attrList = node.attributes
572 attrsToRemove = []
573 for attrNum in range(attrList.length):
574 attr = attrList.item(attrNum)
575 if attr != None and attr.namespaceURI in namespaces:
576 attrsToRemove.append(attr.nodeName)
577 for attrName in attrsToRemove :
578 num += 1
579 numAttrsRemoved += 1
580 node.removeAttribute(attrName)
582 # now recurse for children
583 for child in node.childNodes:
584 num += removeNamespacedAttributes(child, namespaces)
585 return num
587 def removeNamespacedElements(node, namespaces):
588 global numElemsRemoved
589 num = 0
590 if node.nodeType == 1 :
591 # remove all namespace'd child nodes from this element
592 childList = node.childNodes
593 childrenToRemove = []
594 for child in childList:
595 if child != None and child.namespaceURI in namespaces:
596 childrenToRemove.append(child)
597 for child in childrenToRemove :
598 num += 1
599 numElemsRemoved += 1
600 node.removeChild(child)
602 # now recurse for children
603 for child in node.childNodes:
604 num += removeNamespacedElements(child, namespaces)
605 return num
607 def removeNestedGroups(node):
608 """
609 This walks further and further down the tree, removing groups
610 which do not have any attributes or a title/desc child and
611 promoting their children up one level
612 """
613 global numElemsRemoved
614 num = 0
616 groupsToRemove = []
617 for child in node.childNodes:
618 if child.nodeName == 'g' and child.namespaceURI == NS['SVG'] and len(child.attributes) == 0:
619 # only collapse group if it does not have a title or desc as a direct descendant
620 for grandchild in child.childNodes:
621 if grandchild.nodeType == 1 and grandchild.namespaceURI == NS['SVG'] and \
622 grandchild.nodeName in ['title','desc']:
623 break
624 else:
625 groupsToRemove.append(child)
627 for g in groupsToRemove:
628 while g.childNodes.length > 0:
629 g.parentNode.insertBefore(g.firstChild, g)
630 g.parentNode.removeChild(g)
631 numElemsRemoved += 1
632 num += 1
634 # now recurse for children
635 for child in node.childNodes:
636 if child.nodeType == 1:
637 num += removeNestedGroups(child)
638 return num
640 def moveCommonAttributesToParentGroup(elem):
641 """
642 This recursively calls this function on all children of the passed in element
643 and then iterates over all child elements and removes common inheritable attributes
644 from the children and places them in the parent group.
645 """
646 num = 0
648 childElements = []
649 # recurse first into the children (depth-first)
650 for child in elem.childNodes:
651 if child.nodeType == 1:
652 childElements.append(child)
653 num += moveCommonAttributesToParentGroup(child)
655 # only process the children if there are more than one element
656 if len(childElements) <= 1: return num
658 commonAttrs = {}
659 # add all inheritable properties of the first child element
660 # FIXME: Note there is a chance that the first child is a set/animate in which case
661 # its fill attribute is not what we want to look at, we should look for the first
662 # non-animate/set element
663 attrList = childElements[0].attributes
664 for num in range(attrList.length):
665 attr = attrList.item(num)
666 # this is most of the inheritable properties from http://www.w3.org/TR/SVG11/propidx.html
667 # and http://www.w3.org/TR/SVGTiny12/attributeTable.html
668 if attr.nodeName in ['clip-rule',
669 'display-align',
670 'fill', 'fill-opacity', 'fill-rule',
671 'font', 'font-family', 'font-size', 'font-size-adjust', 'font-stretch',
672 'font-style', 'font-variant', 'font-weight',
673 'letter-spacing',
674 'pointer-events', 'shape-rendering',
675 'stroke', 'stroke-dasharray', 'stroke-dashoffset', 'stroke-linecap', 'stroke-linejoin',
676 'stroke-miterlimit', 'stroke-opacity', 'stroke-width',
677 'text-anchor', 'text-decoration', 'text-rendering', 'visibility',
678 'word-spacing', 'writing-mode']:
679 # we just add all the attributes from the first child
680 commonAttrs[attr.nodeName] = attr.nodeValue
682 # for each subsequent child element
683 for childNum in range(len(childElements)):
684 # skip first child
685 if childNum == 0:
686 continue
688 child = childElements[childNum]
689 # if we are on an animateXXX/set element, ignore it (due to the 'fill' attribute)
690 if child.localName in ['set', 'animate', 'animateColor', 'animateTransform', 'animateMotion']:
691 continue
693 distinctAttrs = []
694 # loop through all current 'common' attributes
695 for name in commonAttrs.keys():
696 # if this child doesn't match that attribute, schedule it for removal
697 if child.getAttribute(name) != commonAttrs[name]:
698 distinctAttrs.append(name)
699 # remove those attributes which are not common
700 for name in distinctAttrs:
701 del commonAttrs[name]
703 # commonAttrs now has all the inheritable attributes which are common among all child elements
704 for name in commonAttrs.keys():
705 for child in childElements:
706 child.removeAttribute(name)
707 elem.setAttribute(name, commonAttrs[name])
709 # update our statistic (we remove N*M attributes and add back in M attributes)
710 num += (len(childElements)-1) * len(commonAttrs)
711 return num
713 def removeUnusedAttributesOnParent(elem):
714 """
715 This recursively calls this function on all children of the element passed in,
716 then removes any unused attributes on this elem if none of the children inherit it
717 """
718 num = 0
720 childElements = []
721 # recurse first into the children (depth-first)
722 for child in elem.childNodes:
723 if child.nodeType == 1:
724 childElements.append(child)
725 num += removeUnusedAttributesOnParent(child)
727 # only process the children if there are more than one element
728 if len(childElements) <= 1: return num
730 # get all attribute values on this parent
731 attrList = elem.attributes
732 unusedAttrs = {}
733 for num in range(attrList.length):
734 attr = attrList.item(num)
735 if attr.nodeName in ['clip-rule',
736 'display-align',
737 'fill', 'fill-opacity', 'fill-rule',
738 'font', 'font-family', 'font-size', 'font-size-adjust', 'font-stretch',
739 'font-style', 'font-variant', 'font-weight',
740 'letter-spacing',
741 'pointer-events', 'shape-rendering',
742 'stroke', 'stroke-dasharray', 'stroke-dashoffset', 'stroke-linecap', 'stroke-linejoin',
743 'stroke-miterlimit', 'stroke-opacity', 'stroke-width',
744 'text-anchor', 'text-decoration', 'text-rendering', 'visibility',
745 'word-spacing', 'writing-mode']:
746 unusedAttrs[attr.nodeName] = attr.nodeValue
748 # for each child, if at least one child inherits the parent's attribute, then remove
749 for childNum in range(len(childElements)):
750 child = childElements[childNum]
751 inheritedAttrs = []
752 for name in unusedAttrs.keys():
753 val = child.getAttribute(name)
754 if val == '' or val == None or val == 'inherit':
755 inheritedAttrs.append(name)
756 for a in inheritedAttrs:
757 del unusedAttrs[a]
759 # unusedAttrs now has all the parent attributes that are unused
760 for name in unusedAttrs.keys():
761 elem.removeAttribute(name)
762 num += 1
764 return num
766 def removeDuplicateGradientStops(doc):
767 global numElemsRemoved
768 num = 0
770 for gradType in ['linearGradient', 'radialGradient']:
771 for grad in doc.getElementsByTagName(gradType):
772 stops = {}
773 stopsToRemove = []
774 for stop in grad.getElementsByTagName('stop'):
775 # convert percentages into a floating point number
776 offsetU = SVGLength(stop.getAttribute('offset'))
777 if offsetU.units == Unit.PCT:
778 offset = offsetU.value / 100.0
779 elif offsetU.units == Unit.NONE:
780 offset = offsetU.value
781 else:
782 offset = 0
783 # set the stop offset value to the integer or floating point equivalent
784 if int(offset) == offset: stop.setAttribute('offset', str(int(offset)))
785 else: stop.setAttribute('offset', str(offset))
787 color = stop.getAttribute('stop-color')
788 opacity = stop.getAttribute('stop-opacity')
789 if stops.has_key(offset) :
790 oldStop = stops[offset]
791 if oldStop[0] == color and oldStop[1] == opacity:
792 stopsToRemove.append(stop)
793 stops[offset] = [color, opacity]
795 for stop in stopsToRemove:
796 stop.parentNode.removeChild(stop)
797 num += 1
798 numElemsRemoved += 1
800 # linear gradients
801 return num
803 def collapseSinglyReferencedGradients(doc):
804 global numElemsRemoved
805 num = 0
807 # make sure to reset the ref'ed ids for when we are running this in testscour
808 for rid,nodeCount in findReferencedElements(doc.documentElement).iteritems():
809 count = nodeCount[0]
810 nodes = nodeCount[1]
811 if count == 1:
812 elem = findElementById(doc.documentElement,rid)
813 if elem != None and elem.nodeType == 1 and elem.nodeName in ['linearGradient', 'radialGradient'] \
814 and elem.namespaceURI == NS['SVG']:
815 # found a gradient that is referenced by only 1 other element
816 refElem = nodes[0]
817 if refElem.nodeType == 1 and refElem.nodeName in ['linearGradient', 'radialGradient'] \
818 and refElem.namespaceURI == NS['SVG']:
819 # elem is a gradient referenced by only one other gradient (refElem)
821 # add the stops to the referencing gradient (this removes them from elem)
822 if len(refElem.getElementsByTagName('stop')) == 0:
823 stopsToAdd = elem.getElementsByTagName('stop')
824 for stop in stopsToAdd:
825 refElem.appendChild(stop)
827 # adopt the gradientUnits, spreadMethod, gradientTransform attributes if
828 # they are unspecified on refElem
829 for attr in ['gradientUnits','spreadMethod','gradientTransform']:
830 if refElem.getAttribute(attr) == '' and not elem.getAttribute(attr) == '':
831 refElem.setAttributeNS(None, attr, elem.getAttribute(attr))
833 # if both are radialGradients, adopt elem's fx,fy,cx,cy,r attributes if
834 # they are unspecified on refElem
835 if elem.nodeName == 'radialGradient' and refElem.nodeName == 'radialGradient':
836 for attr in ['fx','fy','cx','cy','r']:
837 if refElem.getAttribute(attr) == '' and not elem.getAttribute(attr) == '':
838 refElem.setAttributeNS(None, attr, elem.getAttribute(attr))
840 # if both are linearGradients, adopt elem's x1,y1,x2,y2 attributes if
841 # they are unspecified on refElem
842 if elem.nodeName == 'linearGradient' and refElem.nodeName == 'linearGradient':
843 for attr in ['x1','y1','x2','y2']:
844 if refElem.getAttribute(attr) == '' and not elem.getAttribute(attr) == '':
845 refElem.setAttributeNS(None, attr, elem.getAttribute(attr))
847 # now remove the xlink:href from refElem
848 refElem.removeAttributeNS(NS['XLINK'], 'href')
850 # now delete elem
851 elem.parentNode.removeChild(elem)
852 numElemsRemoved += 1
853 num += 1
854 return num
856 def removeDuplicateGradients(doc):
857 global numElemsRemoved
858 num = 0
860 gradientsToRemove = {}
861 duplicateToMaster = {}
863 for gradType in ['linearGradient', 'radialGradient']:
864 grads = doc.getElementsByTagName(gradType)
865 for grad in grads:
866 # TODO: should slice grads from 'grad' here to optimize
867 for ograd in grads:
868 # do not compare gradient to itself
869 if grad == ograd: continue
871 # compare grad to ograd (all properties, then all stops)
872 # if attributes do not match, go to next gradient
873 someGradAttrsDoNotMatch = False
874 for attr in ['gradientUnits','spreadMethod','gradientTransform','x1','y1','x2','y2','cx','cy','fx','fy','r']:
875 if grad.getAttribute(attr) != ograd.getAttribute(attr):
876 someGradAttrsDoNotMatch = True
877 break;
879 if someGradAttrsDoNotMatch: continue
881 # compare xlink:href values too
882 if grad.getAttributeNS(NS['XLINK'], 'href') != ograd.getAttributeNS(NS['XLINK'], 'href'):
883 continue
885 # all gradient properties match, now time to compare stops
886 stops = grad.getElementsByTagName('stop')
887 ostops = ograd.getElementsByTagName('stop')
889 if stops.length != ostops.length: continue
891 # now compare stops
892 stopsNotEqual = False
893 for i in range(stops.length):
894 if stopsNotEqual: break
895 stop = stops.item(i)
896 ostop = ostops.item(i)
897 for attr in ['offset', 'stop-color', 'stop-opacity']:
898 if stop.getAttribute(attr) != ostop.getAttribute(attr):
899 stopsNotEqual = True
900 break
901 if stopsNotEqual: continue
903 # ograd is a duplicate of grad, we schedule it to be removed UNLESS
904 # ograd is ALREADY considered a 'master' element
905 if not gradientsToRemove.has_key(ograd):
906 if not duplicateToMaster.has_key(ograd):
907 if not gradientsToRemove.has_key(grad):
908 gradientsToRemove[grad] = []
909 gradientsToRemove[grad].append( ograd )
910 duplicateToMaster[ograd] = grad
912 # get a collection of all elements that are referenced and their referencing elements
913 referencedIDs = findReferencedElements(doc.documentElement)
914 for masterGrad in gradientsToRemove.keys():
915 master_id = masterGrad.getAttribute('id')
916 for dupGrad in gradientsToRemove[masterGrad]:
917 # if the duplicate gradient no longer has a parent that means it was
918 # already re-mapped to another master gradient
919 if not dupGrad.parentNode: continue
920 dup_id = dupGrad.getAttribute('id')
921 # for each element that referenced the gradient we are going to remove
922 for elem in referencedIDs[dup_id][1]:
923 # find out which attribute referenced the duplicate gradient
924 for attr in ['fill', 'stroke']:
925 v = elem.getAttribute(attr)
926 if v == 'url(#'+dup_id+')' or v == 'url("#'+dup_id+'")' or v == "url('#"+dup_id+"')":
927 elem.setAttribute(attr, 'url(#'+master_id+')')
928 if elem.getAttributeNS(NS['XLINK'], 'href') == '#'+dup_id:
929 elem.setAttributeNS(NS['XLINK'], 'href', '#'+master_id)
931 # now that all referencing elements have been re-mapped to the master
932 # it is safe to remove this gradient from the document
933 dupGrad.parentNode.removeChild(dupGrad)
934 numElemsRemoved += 1
935 num += 1
936 return num
938 def repairStyle(node, options):
939 num = 0
940 if node.nodeType == 1 and len(node.getAttribute('style')) > 0 :
941 # get all style properties and stuff them into a dictionary
942 styleMap = { }
943 rawStyles = node.getAttribute('style').split(';')
944 for style in rawStyles:
945 propval = style.split(':')
946 if len(propval) == 2 :
947 styleMap[propval[0].strip()] = propval[1].strip()
949 # I've seen this enough to know that I need to correct it:
950 # fill: url(#linearGradient4918) rgb(0, 0, 0);
951 for prop in ['fill', 'stroke'] :
952 if styleMap.has_key(prop) :
953 chunk = styleMap[prop].split(') ')
954 if len(chunk) == 2 and (chunk[0][:5] == 'url(#' or chunk[0][:6] == 'url("#' or chunk[0][:6] == "url('#") and chunk[1] == 'rgb(0, 0, 0)' :
955 styleMap[prop] = chunk[0] + ')'
956 num += 1
958 # Here is where we can weed out unnecessary styles like:
959 # opacity:1
960 if styleMap.has_key('opacity') :
961 opacity = float(styleMap['opacity'])
962 # opacity='1.0' is useless, remove it
963 if opacity == 1.0 :
964 del styleMap['opacity']
965 num += 1
967 # if opacity='0' then all fill and stroke properties are useless, remove them
968 elif opacity == 0.0 :
969 for uselessStyle in ['fill', 'fill-opacity', 'fill-rule', 'stroke', 'stroke-linejoin',
970 'stroke-opacity', 'stroke-miterlimit', 'stroke-linecap', 'stroke-dasharray',
971 'stroke-dashoffset', 'stroke-opacity'] :
972 if styleMap.has_key(uselessStyle):
973 del styleMap[uselessStyle]
974 num += 1
976 # if stroke:none, then remove all stroke-related properties (stroke-width, etc)
977 # TODO: should also detect if the computed value of this element is stroke="none"
978 if styleMap.has_key('stroke') and styleMap['stroke'] == 'none' :
979 for strokestyle in [ 'stroke-width', 'stroke-linejoin', 'stroke-miterlimit',
980 'stroke-linecap', 'stroke-dasharray', 'stroke-dashoffset', 'stroke-opacity'] :
981 if styleMap.has_key(strokestyle) :
982 del styleMap[strokestyle]
983 num += 1
984 # TODO: This is actually a problem if a parent element has a specified stroke
985 # we need to properly calculate computed values
986 del styleMap['stroke']
988 # if fill:none, then remove all fill-related properties (fill-rule, etc)
989 if styleMap.has_key('fill') and styleMap['fill'] == 'none' :
990 for fillstyle in [ 'fill-rule', 'fill-opacity' ] :
991 if styleMap.has_key(fillstyle) :
992 del styleMap[fillstyle]
993 num += 1
995 # stop-opacity: 1
996 if styleMap.has_key('stop-opacity') :
997 if float(styleMap['stop-opacity']) == 1.0 :
998 del styleMap['stop-opacity']
999 num += 1
1001 # fill-opacity: 1 or 0
1002 if styleMap.has_key('fill-opacity') :
1003 fillOpacity = float(styleMap['fill-opacity'])
1004 # TODO: This is actually a problem if the parent element does not have fill-opacity=1
1005 if fillOpacity == 1.0 :
1006 del styleMap['fill-opacity']
1007 num += 1
1008 elif fillOpacity == 0.0 :
1009 for uselessFillStyle in [ 'fill', 'fill-rule' ] :
1010 if styleMap.has_key(uselessFillStyle):
1011 del styleMap[uselessFillStyle]
1012 num += 1
1014 # stroke-opacity: 1 or 0
1015 if styleMap.has_key('stroke-opacity') :
1016 strokeOpacity = float(styleMap['stroke-opacity'])
1017 # TODO: This is actually a problem if the parent element does not have stroke-opacity=1
1018 if strokeOpacity == 1.0 :
1019 del styleMap['stroke-opacity']
1020 num += 1
1021 elif strokeOpacity == 0.0 :
1022 for uselessStrokeStyle in [ 'stroke', 'stroke-width', 'stroke-linejoin', 'stroke-linecap',
1023 'stroke-dasharray', 'stroke-dashoffset' ] :
1024 if styleMap.has_key(uselessStrokeStyle):
1025 del styleMap[uselessStrokeStyle]
1026 num += 1
1028 # stroke-width: 0
1029 if styleMap.has_key('stroke-width') :
1030 strokeWidth = getSVGLength(styleMap['stroke-width'])
1031 if strokeWidth == 0.0 :
1032 for uselessStrokeStyle in [ 'stroke', 'stroke-linejoin', 'stroke-linecap',
1033 'stroke-dasharray', 'stroke-dashoffset', 'stroke-opacity' ] :
1034 if styleMap.has_key(uselessStrokeStyle):
1035 del styleMap[uselessStrokeStyle]
1036 num += 1
1038 # remove font properties for non-text elements
1039 # I've actually observed this in real SVG content
1040 if node.nodeName in ['rect', 'circle', 'ellipse', 'line', 'polyline', 'polygon', 'path']:
1041 for fontstyle in [ 'font-family', 'font-size', 'font-stretch', 'font-size-adjust',
1042 'font-style', 'font-variant', 'font-weight',
1043 'letter-spacing', 'line-height', 'kerning',
1044 'text-anchor', 'text-decoration', 'text-rendering',
1045 'unicode-bidi', 'word-spacing', 'writing-mode'] :
1046 if styleMap.has_key(fontstyle) :
1047 del styleMap[fontstyle]
1048 num += 1
1050 # remove inkscape-specific styles
1051 # TODO: need to get a full list of these
1052 for inkscapeStyle in ['-inkscape-font-specification']:
1053 if styleMap.has_key(inkscapeStyle):
1054 del styleMap[inkscapeStyle]
1055 num += 1
1057 # visibility: visible
1058 if styleMap.has_key('visibility') :
1059 if styleMap['visibility'] == 'visible':
1060 del styleMap['visibility']
1061 num += 1
1063 # display: inline
1064 if styleMap.has_key('display') :
1065 if styleMap['display'] == 'inline':
1066 del styleMap['display']
1067 num += 1
1069 # overflow: visible or overflow specified on element other than svg, marker, pattern
1070 if styleMap.has_key('overflow') :
1071 if styleMap['overflow'] == 'visible' or node.nodeName in ['svg','marker','pattern']:
1072 del styleMap['overflow']
1073 num += 1
1075 # marker: none
1076 if styleMap.has_key('marker') :
1077 if styleMap['marker'] == 'none':
1078 del styleMap['marker']
1079 num += 1
1081 # now if any of the properties match known SVG attributes we prefer attributes
1082 # over style so emit them and remove them from the style map
1083 if options.style_to_xml:
1084 for propName in styleMap.keys() :
1085 if propName in svgAttributes :
1086 node.setAttribute(propName, styleMap[propName])
1087 del styleMap[propName]
1089 # sew our remaining style properties back together into a style attribute
1090 fixedStyle = ''
1091 for prop in styleMap.keys() :
1092 fixedStyle += prop + ':' + styleMap[prop] + ';'
1094 if fixedStyle != '' :
1095 node.setAttribute('style', fixedStyle)
1096 else:
1097 node.removeAttribute('style')
1099 # recurse for our child elements
1100 for child in node.childNodes :
1101 num += repairStyle(child,options)
1103 return num
1105 def removeDefaultAttributeValues(node, options):
1106 num = 0
1107 if node.nodeType != 1: return 0
1109 # gradientUnits: objectBoundingBox
1110 if node.getAttribute('gradientUnits') == 'objectBoundingBox':
1111 node.removeAttribute('gradientUnits')
1112 num += 1
1114 # spreadMethod: pad
1115 if node.getAttribute('spreadMethod') == 'pad':
1116 node.removeAttribute('spreadMethod')
1117 num += 1
1119 # x1: 0%
1120 if node.getAttribute('x1') != '':
1121 x1 = SVGLength(node.getAttribute('x1'))
1122 if x1.value == 0:
1123 node.removeAttribute('x1')
1124 num += 1
1126 # y1: 0%
1127 if node.getAttribute('y1') != '':
1128 y1 = SVGLength(node.getAttribute('y1'))
1129 if y1.value == 0:
1130 node.removeAttribute('y1')
1131 num += 1
1133 # x2: 100%
1134 if node.getAttribute('x2') != '':
1135 x2 = SVGLength(node.getAttribute('x2'))
1136 if (x2.value == 100 and x2.units == Unit.PCT) or (x2.value == 1 and x2.units == Unit.NONE):
1137 node.removeAttribute('x2')
1138 num += 1
1140 # y2: 0%
1141 if node.getAttribute('y2') != '':
1142 y2 = SVGLength(node.getAttribute('y2'))
1143 if y2.value == 0:
1144 node.removeAttribute('y2')
1145 num += 1
1147 # fx: equal to rx
1148 if node.getAttribute('fx') != '':
1149 if node.getAttribute('fx') == node.getAttribute('cx'):
1150 node.removeAttribute('fx')
1151 num += 1
1153 # fy: equal to ry
1154 if node.getAttribute('fy') != '':
1155 if node.getAttribute('fy') == node.getAttribute('cy'):
1156 node.removeAttribute('fy')
1157 num += 1
1159 # cx: 50%
1160 if node.getAttribute('cx') != '':
1161 cx = SVGLength(node.getAttribute('cx'))
1162 if (cx.value == 50 and cx.units == Unit.PCT) or (cx.value == 0.5 and cx.units == Unit.NONE):
1163 node.removeAttribute('cx')
1164 num += 1
1166 # cy: 50%
1167 if node.getAttribute('cy') != '':
1168 cy = SVGLength(node.getAttribute('cy'))
1169 if (cy.value == 50 and cy.units == Unit.PCT) or (cy.value == 0.5 and cy.units == Unit.NONE):
1170 node.removeAttribute('cy')
1171 num += 1
1173 # r: 50%
1174 if node.getAttribute('r') != '':
1175 r = SVGLength(node.getAttribute('r'))
1176 if (r.value == 50 and r.units == Unit.PCT) or (r.value == 0.5 and r.units == Unit.NONE):
1177 node.removeAttribute('r')
1178 num += 1
1180 # recurse for our child elements
1181 for child in node.childNodes :
1182 num += removeDefaultAttributeValues(child,options)
1184 return num
1186 rgb = re.compile("\\s*rgb\\(\\s*(\\d+)\\s*\\,\\s*(\\d+)\\s*\\,\\s*(\\d+)\\s*\\)\\s*")
1187 rgbp = re.compile("\\s*rgb\\(\\s*(\\d*\\.?\\d+)\\%\\s*\\,\\s*(\\d*\\.?\\d+)\\%\\s*\\,\\s*(\\d*\\.?\\d+)\\%\\s*\\)\\s*")
1188 def convertColor(value):
1189 """
1190 Converts the input color string and returns a #RRGGBB (or #RGB if possible) string
1191 """
1192 s = value
1194 if s in colors.keys():
1195 s = colors[s]
1197 rgbpMatch = rgbp.match(s)
1198 if rgbpMatch != None :
1199 r = int(float(rgbpMatch.group(1)) * 255.0 / 100.0)
1200 g = int(float(rgbpMatch.group(2)) * 255.0 / 100.0)
1201 b = int(float(rgbpMatch.group(3)) * 255.0 / 100.0)
1202 s = 'rgb(%d,%d,%d)' % (r,g,b)
1204 rgbMatch = rgb.match(s)
1205 if rgbMatch != None :
1206 r = hex( int( rgbMatch.group(1) ) )[2:].upper()
1207 g = hex( int( rgbMatch.group(2) ) )[2:].upper()
1208 b = hex( int( rgbMatch.group(3) ) )[2:].upper()
1209 if len(r) == 1: r='0'+r
1210 if len(g) == 1: g='0'+g
1211 if len(b) == 1: b='0'+b
1212 s = '#'+r+g+b
1214 if s[0] == '#' and len(s)==7 and s[1]==s[2] and s[3]==s[4] and s[5]==s[6]:
1215 s = s.upper()
1216 s = '#'+s[1]+s[3]+s[5]
1218 return s
1220 def convertColors(element) :
1221 """
1222 Recursively converts all color properties into #RRGGBB format if shorter
1223 """
1224 numBytes = 0
1226 if element.nodeType != 1: return 0
1228 # set up list of color attributes for each element type
1229 attrsToConvert = []
1230 if element.nodeName in ['rect', 'circle', 'ellipse', 'polygon', \
1231 'line', 'polyline', 'path', 'g', 'a']:
1232 attrsToConvert = ['fill', 'stroke']
1233 elif element.nodeName in ['stop']:
1234 attrsToConvert = ['stop-color']
1235 elif element.nodeName in ['solidColor']:
1236 attrsToConvert = ['solid-color']
1238 # now convert all the color formats
1239 for attr in attrsToConvert:
1240 oldColorValue = element.getAttribute(attr)
1241 if oldColorValue != '':
1242 newColorValue = convertColor(oldColorValue)
1243 oldBytes = len(oldColorValue)
1244 newBytes = len(newColorValue)
1245 if oldBytes > newBytes:
1246 element.setAttribute(attr, newColorValue)
1247 numBytes += (oldBytes - len(element.getAttribute(attr)))
1249 # now recurse for our child elements
1250 for child in element.childNodes :
1251 numBytes += convertColors(child)
1253 return numBytes
1255 # TODO: go over what this method does and see if there is a way to optimize it
1256 # TODO: go over the performance of this method and see if I can save memory/speed by
1257 # reusing data structures, etc
1258 def cleanPath(element) :
1259 """
1260 Cleans the path string (d attribute) of the element
1261 """
1262 global numBytesSavedInPathData
1263 global numPathSegmentsReduced
1264 global numCurvesStraightened
1266 # this gets the parser object from svg_regex.py
1267 oldPathStr = element.getAttribute('d')
1268 pathObj = svg_parser.parse(oldPathStr)
1270 # however, this parser object has some ugliness in it (lists of tuples of tuples of
1271 # numbers and booleans). we just need a list of (cmd,[numbers]):
1272 path = []
1273 for (cmd,dataset) in pathObj:
1274 if cmd in ['M','m','L','l','T','t']:
1275 # one or more tuples, each containing two numbers
1276 nums = []
1277 for t in dataset:
1278 # convert to a Decimal
1279 nums.append(Decimal(str(t[0])) * Decimal(1))
1280 nums.append(Decimal(str(t[1])) * Decimal(1))
1282 # only create this segment if it is not empty
1283 if nums:
1284 path.append( (cmd, nums) )
1286 elif cmd in ['V','v','H','h']:
1287 # one or more numbers
1288 nums = []
1289 for n in dataset:
1290 nums.append(Decimal(str(n)))
1291 if nums:
1292 path.append( (cmd, nums) )
1294 elif cmd in ['C','c']:
1295 # one or more tuples, each containing three tuples of two numbers each
1296 nums = []
1297 for t in dataset:
1298 for pair in t:
1299 nums.append(Decimal(str(pair[0])) * Decimal(1))
1300 nums.append(Decimal(str(pair[1])) * Decimal(1))
1301 path.append( (cmd, nums) )
1303 elif cmd in ['S','s','Q','q']:
1304 # one or more tuples, each containing two tuples of two numbers each
1305 nums = []
1306 for t in dataset:
1307 for pair in t:
1308 nums.append(Decimal(str(pair[0])) * Decimal(1))
1309 nums.append(Decimal(str(pair[1])) * Decimal(1))
1310 path.append( (cmd, nums) )
1312 elif cmd in ['A','a']:
1313 # one or more tuples, each containing a tuple of two numbers, a number, a boolean,
1314 # another boolean, and a tuple of two numbers
1315 nums = []
1316 for t in dataset:
1317 nums.append( Decimal(str(t[0][0])) * Decimal(1) )
1318 nums.append( Decimal(str(t[0][1])) * Decimal(1) )
1319 nums.append( Decimal(str(t[1])) * Decimal(1))
1321 if t[2]: nums.append( Decimal(1) )
1322 else: nums.append( Decimal(0) )
1324 if t[3]: nums.append( Decimal(1) )
1325 else: nums.append( Decimal(0) )
1327 nums.append( Decimal(str(t[4][0])) * Decimal(1) )
1328 nums.append( Decimal(str(t[4][1])) * Decimal(1) )
1329 path.append( (cmd, nums) )
1331 elif cmd in ['Z','z']:
1332 path.append( (cmd, []) )
1334 # calculate the starting x,y coord for the second path command
1335 if len(path[0][1]) == 2:
1336 (x,y) = path[0][1]
1337 else:
1338 # we have a move and then 1 or more coords for lines
1339 N = len(path[0][1])
1340 if path[0] == 'M':
1341 # take the last pair of coordinates for the starting point
1342 x = path[0][1][N-2]
1343 y = path[0][1][N-1]
1344 else: # relative move, accumulate coordinates for the starting point
1345 (x,y) = path[0][1][0],path[0][1][1]
1346 n = 2
1347 while n < N:
1348 x += path[0][1][n]
1349 y += path[0][1][n+1]
1350 n += 2
1352 # now we have the starting point at x,y so let's save it
1353 (startx,starty) = (x,y)
1355 # convert absolute coordinates into relative ones (start with the second subcommand
1356 # and leave the first M as absolute)
1357 newPath = [path[0]]
1358 for (cmd,data) in path[1:]:
1359 i = 0
1360 newCmd = cmd
1361 newData = data
1362 # adjust abs to rel
1363 # only the A command has some values that we don't want to adjust (radii, rotation, flags)
1364 if cmd == 'A':
1365 newCmd = 'a'
1366 newData = []
1367 while i < len(data):
1368 newData.append(data[i])
1369 newData.append(data[i+1])
1370 newData.append(data[i+2])
1371 newData.append(data[i+3])
1372 newData.append(data[i+4])
1373 newData.append(data[i+5]-x)
1374 newData.append(data[i+6]-y)
1375 x = data[i+5]
1376 y = data[i+6]
1377 i += 7
1378 elif cmd == 'a':
1379 while i < len(data):
1380 x += data[i+5]
1381 y += data[i+6]
1382 i += 7
1383 elif cmd == 'H':
1384 newCmd = 'h'
1385 newData = []
1386 while i < len(data):
1387 newData.append(data[i]-x)
1388 x = data[i]
1389 i += 1
1390 elif cmd == 'h':
1391 while i < len(data):
1392 x += data[i]
1393 i += 1
1394 elif cmd == 'V':
1395 newCmd = 'v'
1396 newData = []
1397 while i < len(data):
1398 newData.append(data[i] - y)
1399 y = data[i]
1400 i += 1
1401 elif cmd == 'v':
1402 while i < len(data):
1403 y += data[i]
1404 i += 1
1405 elif cmd in ['M']:
1406 newCmd = cmd.lower()
1407 newData = []
1408 startx = data[0]
1409 starty = data[1]
1410 while i < len(data):
1411 newData.append( data[i] - x )
1412 newData.append( data[i+1] - y )
1413 x = data[i]
1414 y = data[i+1]
1415 i += 2
1416 elif cmd in ['L','T']:
1417 newCmd = cmd.lower()
1418 newData = []
1419 while i < len(data):
1420 newData.append( data[i] - x )
1421 newData.append( data[i+1] - y )
1422 x = data[i]
1423 y = data[i+1]
1424 i += 2
1425 elif cmd in ['m']:
1426 startx += data[0]
1427 starty += data[1]
1428 while i < len(data):
1429 x += data[i]
1430 y += data[i+1]
1431 i += 2
1432 elif cmd in ['l','t']:
1433 while i < len(data):
1434 x += data[i]
1435 y += data[i+1]
1436 i += 2
1437 elif cmd in ['S','Q']:
1438 newCmd = cmd.lower()
1439 newData = []
1440 while i < len(data):
1441 newData.append( data[i] - x )
1442 newData.append( data[i+1] - y )
1443 newData.append( data[i+2] - x )
1444 newData.append( data[i+3] - y )
1445 x = data[i+2]
1446 y = data[i+3]
1447 i += 4
1448 elif cmd in ['s','q']:
1449 while i < len(data):
1450 x += data[i+2]
1451 y += data[i+3]
1452 i += 4
1453 elif cmd == 'C':
1454 newCmd = 'c'
1455 newData = []
1456 while i < len(data):
1457 newData.append( data[i] - x )
1458 newData.append( data[i+1] - y )
1459 newData.append( data[i+2] - x )
1460 newData.append( data[i+3] - y )
1461 newData.append( data[i+4] - x )
1462 newData.append( data[i+5] - y )
1463 x = data[i+4]
1464 y = data[i+5]
1465 i += 6
1466 elif cmd == 'c':
1467 while i < len(data):
1468 x += data[i+4]
1469 y += data[i+5]
1470 i += 6
1471 elif cmd in ['z','Z']:
1472 x = startx
1473 y = starty
1474 newCmd = 'z'
1475 newPath.append( (newCmd, newData) )
1476 path = newPath
1478 # remove empty segments
1479 newPath = [path[0]]
1480 for (cmd,data) in path[1:]:
1481 if cmd in ['m','l','t']:
1482 newData = []
1483 i = 0
1484 while i < len(data):
1485 if data[i] != 0 or data[i+1] != 0:
1486 newData.append(data[i])
1487 newData.append(data[i+1])
1488 else:
1489 numPathSegmentsReduced += 1
1490 i += 2
1491 if newData:
1492 newPath.append( (cmd,newData) )
1493 elif cmd == 'c':
1494 newData = []
1495 i = 0
1496 while i < len(data):
1497 if data[i+4] != 0 or data[i+5] != 0:
1498 newData.append(data[i])
1499 newData.append(data[i+1])
1500 newData.append(data[i+2])
1501 newData.append(data[i+3])
1502 newData.append(data[i+4])
1503 newData.append(data[i+5])
1504 else:
1505 numPathSegmentsReduced += 1
1506 i += 6
1507 if newData:
1508 newPath.append( (cmd,newData) )
1509 elif cmd == 'a':
1510 newData = []
1511 i = 0
1512 while i < len(data):
1513 if data[i+5] != 0 or data[i+6] != 0:
1514 newData.append(data[i])
1515 newData.append(data[i+1])
1516 newData.append(data[i+2])
1517 newData.append(data[i+3])
1518 newData.append(data[i+4])
1519 newData.append(data[i+5])
1520 newData.append(data[i+6])
1521 else:
1522 numPathSegmentsReduced += 1
1523 i += 7
1524 if newData:
1525 newPath.append( (cmd,newData) )
1526 elif cmd == 'q':
1527 newData = []
1528 i = 0
1529 while i < len(data):
1530 if data[i+2] != 0 or data[i+3] != 0:
1531 newData.append(data[i])
1532 newData.append(data[i+1])
1533 newData.append(data[i+2])
1534 newData.append(data[i+3])
1535 else:
1536 numPathSegmentsReduced += 1
1537 i += 4
1538 if newData:
1539 newPath.append( (cmd,newData) )
1540 elif cmd in ['h','v']:
1541 newData = []
1542 i = 0
1543 while i < len(data):
1544 if data[i] != 0:
1545 newData.append(data[i])
1546 else:
1547 numPathSegmentsReduced += 1
1548 i += 1
1549 if newData:
1550 newPath.append( (cmd,newData) )
1551 else:
1552 newPath.append( (cmd,data) )
1553 path = newPath
1555 # convert straight curves into lines
1556 newPath = [path[0]]
1557 for (cmd,data) in path[1:]:
1558 i = 0
1559 newData = data
1560 if cmd == 'c':
1561 newData = []
1562 while i < len(data):
1563 # since all commands are now relative, we can think of previous point as (0,0)
1564 # and new point (dx,dy) is (data[i+4],data[i+5])
1565 # eqn of line will be y = (dy/dx)*x or if dx=0 then eqn of line is x=0
1566 (p1x,p1y) = (data[i],data[i+1])
1567 (p2x,p2y) = (data[i+2],data[i+3])
1568 dx = data[i+4]
1569 dy = data[i+5]
1571 foundStraightCurve = False
1573 if dx == 0:
1574 if p1x == 0 and p2x == 0:
1575 foundStraightCurve = True
1576 else:
1577 m = dy/dx
1578 if p1y == m*p1x and p2y == m*p2y:
1579 foundStraightCurve = True
1581 if foundStraightCurve:
1582 # flush any existing curve coords first
1583 if newData:
1584 newPath.append( (cmd,newData) )
1585 newData = []
1586 # now create a straight line segment
1587 newPath.append( ('l', [dx,dy]) )
1588 numCurvesStraightened += 1
1589 else:
1590 newData.append(data[i])
1591 newData.append(data[i+1])
1592 newData.append(data[i+2])
1593 newData.append(data[i+3])
1594 newData.append(data[i+4])
1595 newData.append(data[i+5])
1597 i += 6
1598 if newData or cmd == 'z' or cmd == 'Z':
1599 newPath.append( (cmd,newData) )
1600 path = newPath
1602 # collapse all consecutive commands of the same type into one command
1603 prevCmd = ''
1604 prevData = []
1605 newPath = [path[0]]
1606 for (cmd,data) in path[1:]:
1607 # flush the previous command if it is not the same type as the current command
1608 if prevCmd != '':
1609 if cmd != prevCmd:
1610 newPath.append( (prevCmd, prevData) )
1611 prevCmd = ''
1612 prevData = []
1614 # if the previous and current commands are the same type, collapse
1615 if cmd == prevCmd:
1616 for coord in data:
1617 prevData.append(coord)
1619 # save last command and data
1620 else:
1621 prevCmd = cmd
1622 prevData = data
1623 # flush last command and data
1624 if prevCmd != '':
1625 newPath.append( (prevCmd, prevData) )
1626 path = newPath
1628 # convert to shorthand path segments where possible
1629 newPath = [path[0]]
1630 for (cmd,data) in path[1:]:
1631 # convert line segments into h,v where possible
1632 if cmd == 'l':
1633 i = 0
1634 lineTuples = []
1635 while i < len(data):
1636 if data[i] == 0:
1637 # vertical
1638 if lineTuples:
1639 # flush the existing line command
1640 newPath.append( ('l', lineTuples) )
1641 lineTuples = []
1642 # append the v and then the remaining line coords
1643 newPath.append( ('v', [data[i+1]]) )
1644 numPathSegmentsReduced += 1
1645 elif data[i+1] == 0:
1646 if lineTuples:
1647 # flush the line command, then append the h and then the remaining line coords
1648 newPath.append( ('l', lineTuples) )
1649 lineTuples = []
1650 newPath.append( ('h', [data[i]]) )
1651 numPathSegmentsReduced += 1
1652 else:
1653 lineTuples.append(data[i])
1654 lineTuples.append(data[i+1])
1655 i += 2
1656 if lineTuples:
1657 newPath.append( ('l', lineTuples) )
1658 # convert Bézier curve segments into s where possible
1659 elif cmd == 'c':
1660 bez_ctl_pt = (0,0)
1661 i = 0
1662 curveTuples = []
1663 while i < len(data):
1664 # rotate by 180deg means negate both coordinates
1665 # if the previous control point is equal then we can substitute a
1666 # shorthand bezier command
1667 if bez_ctl_pt[0] == data[i] and bez_ctl_pt[1] == data[i+1]:
1668 if curveTuples:
1669 newPath.append( ('c', curveTuples) )
1670 curveTuples = []
1671 # append the s command
1672 newPath.append( ('s', [data[i+2], data[i+3], data[i+4], data[i+5]]) )
1673 numPathSegmentsReduced += 1
1674 else:
1675 j = 0
1676 while j <= 5:
1677 curveTuples.append(data[i+j])
1678 j += 1
1680 # set up control point for next curve segment
1681 bez_ctl_pt = (data[i+4]-data[i+2], data[i+5]-data[i+3])
1682 i += 6
1684 if curveTuples:
1685 newPath.append( ('c', curveTuples) )
1686 # convert quadratic curve segments into t where possible
1687 elif cmd == 'q':
1688 quad_ctl_pt = (0,0)
1689 i = 0
1690 curveTuples = []
1691 while i < len(data):
1692 if quad_ctl_pt[0] == data[i] and quad_ctl_pt[1] == data[i+1]:
1693 if curveTuples:
1694 newPath.append( ('q', curveTuples) )
1695 curveTuples = []
1696 # append the t command
1697 newPath.append( ('t', [data[i+2], data[i+3]]) )
1698 numPathSegmentsReduced += 1
1699 else:
1700 j = 0;
1701 while j <= 3:
1702 curveTuples.append(data[i+j])
1703 j += 1
1705 quad_ctl_pt = (data[i+2]-data[i], data[i+3]-data[i+1])
1706 i += 4
1708 if curveTuples:
1709 newPath.append( ('q', curveTuples) )
1710 else:
1711 newPath.append( (cmd, data) )
1712 path = newPath
1714 # for each h or v, collapse unnecessary coordinates that run in the same direction
1715 # i.e. "h-100-100" becomes "h-200" but "h300-100" does not change
1716 newPath = [path[0]]
1717 for (cmd,data) in path[1:]:
1718 if cmd in ['h','v'] and len(data) > 1:
1719 newData = []
1720 prevCoord = data[0]
1721 for coord in data[1:]:
1722 if isSameSign(prevCoord, coord):
1723 prevCoord += coord
1724 numPathSegmentsReduced += 1
1725 else:
1726 newData.append(prevCoord)
1727 prevCoord = coord
1728 newData.append(prevCoord)
1729 newPath.append( (cmd, newData) )
1730 else:
1731 newPath.append( (cmd, data) )
1732 path = newPath
1734 # it is possible that we have consecutive h, v, c, t commands now
1735 # so again collapse all consecutive commands of the same type into one command
1736 prevCmd = ''
1737 prevData = []
1738 newPath = [path[0]]
1739 for (cmd,data) in path[1:]:
1740 # flush the previous command if it is not the same type as the current command
1741 if prevCmd != '':
1742 if cmd != prevCmd:
1743 newPath.append( (prevCmd, prevData) )
1744 prevCmd = ''
1745 prevData = []
1747 # if the previous and current commands are the same type, collapse
1748 if cmd == prevCmd:
1749 for coord in data:
1750 prevData.append(coord)
1752 # save last command and data
1753 else:
1754 prevCmd = cmd
1755 prevData = data
1756 # flush last command and data
1757 if prevCmd != '':
1758 newPath.append( (prevCmd, prevData) )
1759 path = newPath
1761 newPathStr = serializePath(path)
1762 numBytesSavedInPathData += ( len(oldPathStr) - len(newPathStr) )
1763 element.setAttribute('d', newPathStr)
1765 def parseListOfPoints(s):
1766 """
1767 Parse string into a list of points.
1769 Returns a list of containing an even number of coordinate strings
1770 """
1771 # (wsp)? comma-or-wsp-separated coordinate pairs (wsp)?
1772 # coordinate-pair = coordinate comma-or-wsp coordinate
1773 # coordinate = sign? integer
1774 nums = re.split("\\s*\\,?\\s*", s.strip())
1775 i = 0
1776 points = []
1777 while i < len(nums):
1778 x = SVGLength(nums[i])
1779 # if we had an odd number of points, return empty
1780 if i == len(nums)-1: return []
1781 else: y = SVGLength(nums[i+1])
1783 # if the coordinates were not unitless, return empty
1784 if x.units != Unit.NONE or y.units != Unit.NONE: return []
1785 points.append( str(x.value) )
1786 points.append( str(y.value) )
1787 i += 2
1789 return points
1791 def cleanPolygon(elem):
1792 """
1793 Remove unnecessary closing point of polygon points attribute
1794 """
1795 global numPointsRemovedFromPolygon
1797 pts = parseListOfPoints(elem.getAttribute('points'))
1798 N = len(pts)/2
1799 if N >= 2:
1800 (startx,starty) = (pts[0],pts[0])
1801 (endx,endy) = (pts[len(pts)-2],pts[len(pts)-1])
1802 if startx == endx and starty == endy:
1803 pts = pts[:-2]
1804 numPointsRemovedFromPolygon += 1
1805 elem.setAttribute('points', scourCoordinates(pts))
1807 def cleanPolyline(elem):
1808 """
1809 Scour the polyline points attribute
1810 """
1811 pts = parseListOfPoints(elem.getAttribute('points'))
1812 elem.setAttribute('points', scourCoordinates(pts))
1814 def serializePath(pathObj):
1815 """
1816 Reserializes the path data with some cleanups.
1817 """
1818 pathStr = ""
1819 for (cmd,data) in pathObj:
1820 pathStr += cmd
1821 # elliptical arc commands must have comma/wsp separating the coordinates
1822 # this fixes an issue outlined in Fix https://bugs.launchpad.net/scour/+bug/412754
1823 pathStr += scourCoordinates(data, (cmd == 'a'))
1824 return pathStr
1826 def scourCoordinates(data, forceCommaWsp = False):
1827 """
1828 Serializes coordinate data with some cleanups:
1829 - removes all trailing zeros after the decimal
1830 - integerize coordinates if possible
1831 - removes extraneous whitespace
1832 - adds commas between values in a subcommand if required (or if forceCommaWsp is True)
1833 """
1834 coordsStr = ""
1835 if data != None:
1836 c = 0
1837 for coord in data:
1838 # add the scoured coordinate to the path string
1839 coordsStr += scourLength(coord)
1841 # only need the comma if the next number is non-negative or if forceCommaWsp is True
1842 if c < len(data)-1 and (forceCommaWsp or Decimal(data[c+1]) >= 0):
1843 coordsStr += ','
1844 c += 1
1845 return coordsStr
1847 def scourLength(str):
1848 length = SVGLength(str)
1849 coord = length.value
1851 # reduce to the proper number of digits
1852 coord = Decimal(unicode(coord)) * Decimal(1)
1854 # integerize if we can
1855 if int(coord) == coord: coord = Decimal(unicode(int(coord)))
1857 # Decimal.trim() is available in Python 2.6+ to trim trailing zeros
1858 try:
1859 coord = coord.trim()
1860 except AttributeError:
1861 # trim it ourselves
1862 s = unicode(coord)
1863 dec = s.find('.')
1864 if dec != -1:
1865 while s[-1] == '0':
1866 s = s[:-1]
1867 coord = Decimal(s)
1869 # Decimal.normalize() will uses scientific notation - if that
1870 # string is smaller, then use it
1871 normd = coord.normalize()
1872 if len(unicode(normd)) < len(unicode(coord)):
1873 coord = normd
1875 return unicode(coord)+Unit.str(length.units)
1877 def embedRasters(element, options) :
1878 """
1879 Converts raster references to inline images.
1880 NOTE: there are size limits to base64-encoding handling in browsers
1881 """
1882 global numRastersEmbedded
1884 href = element.getAttributeNS(NS['XLINK'],'href')
1886 # if xlink:href is set, then grab the id
1887 if href != '' and len(href) > 1:
1888 # find if href value has filename ext
1889 ext = os.path.splitext(os.path.basename(href))[1].lower()[1:]
1891 # look for 'png', 'jpg', and 'gif' extensions
1892 if ext == 'png' or ext == 'jpg' or ext == 'gif':
1894 # check if href resolves to an existing file
1895 if os.path.isfile(href) == False :
1896 if href[:7] != 'http://' and os.path.isfile(href) == False :
1897 # if this is not an absolute path, set path relative
1898 # to script file based on input arg
1899 infilename = '.'
1900 if options.infilename: infilename = options.infilename
1901 href = os.path.join(os.path.dirname(infilename), href)
1903 rasterdata = ''
1904 # test if file exists locally
1905 if os.path.isfile(href) == True :
1906 # open raster file as raw binary
1907 raster = open( href, "rb")
1908 rasterdata = raster.read()
1910 elif href[:7] == 'http://':
1911 # raster = open( href, "rb")
1912 webFile = urllib.urlopen( href )
1913 rasterdata = webFile.read()
1914 webFile.close()
1916 # ... should we remove all images which don't resolve?
1917 if rasterdata != '' :
1918 # base64-encode raster
1919 b64eRaster = base64.b64encode( rasterdata )
1921 # set href attribute to base64-encoded equivalent
1922 if b64eRaster != '':
1923 # PNG and GIF both have MIME Type 'image/[ext]', but
1924 # JPEG has MIME Type 'image/jpeg'
1925 if ext == 'jpg':
1926 ext = 'jpeg'
1928 element.setAttributeNS(NS['XLINK'], 'href', 'data:image/' + ext + ';base64,' + b64eRaster)
1929 numRastersEmbedded += 1
1930 del b64eRaster
1932 def properlySizeDoc(docElement):
1933 # get doc width and height
1934 w = SVGLength(docElement.getAttribute('width'))
1935 h = SVGLength(docElement.getAttribute('height'))
1937 # if width/height are not unitless or px then it is not ok to rewrite them into a viewBox
1938 if ((w.units != Unit.NONE and w.units != Unit.PX) or
1939 (w.units != Unit.NONE and w.units != Unit.PX)):
1940 return
1942 # else we have a statically sized image and we should try to remedy that
1944 # parse viewBox attribute
1945 vbSep = re.split("\\s*\\,?\\s*", docElement.getAttribute('viewBox'), 3)
1946 # if we have a valid viewBox we need to check it
1947 vbWidth,vbHeight = 0,0
1948 if len(vbSep) == 4:
1949 try:
1950 # if x or y are specified and non-zero then it is not ok to overwrite it
1951 vbX = float(vbSep[0])
1952 vbY = float(vbSep[1])
1953 if vbX != 0 or vbY != 0:
1954 return
1956 # if width or height are not equal to doc width/height then it is not ok to overwrite it
1957 vbWidth = float(vbSep[2])
1958 vbHeight = float(vbSep[3])
1959 if vbWidth != w.value or vbHeight != h.value:
1960 return
1961 # if the viewBox did not parse properly it is invalid and ok to overwrite it
1962 except ValueError:
1963 pass
1965 # at this point it's safe to set the viewBox and remove width/height
1966 docElement.setAttribute('viewBox', '0 0 %s %s' % (w.value, h.value))
1967 docElement.removeAttribute('width')
1968 docElement.removeAttribute('height')
1970 def remapNamespacePrefix(node, oldprefix, newprefix):
1971 if node == None or node.nodeType != 1: return
1973 if node.prefix == oldprefix:
1974 localName = node.localName
1975 namespace = node.namespaceURI
1976 doc = node.ownerDocument
1977 parent = node.parentNode
1979 # create a replacement node
1980 newNode = None
1981 if newprefix != '':
1982 newNode = doc.createElementNS(namespace, newprefix+":"+localName)
1983 else:
1984 newNode = doc.createElement(localName);
1986 # add all the attributes
1987 attrList = node.attributes
1988 for i in range(attrList.length):
1989 attr = attrList.item(i)
1990 newNode.setAttributeNS( attr.namespaceURI, attr.localName, attr.nodeValue)
1992 # clone and add all the child nodes
1993 for child in node.childNodes:
1994 newNode.appendChild(child.cloneNode(True))
1996 # replace old node with new node
1997 parent.replaceChild( newNode, node )
1998 # set the node to the new node in the remapped namespace prefix
1999 node = newNode
2001 # now do all child nodes
2002 for child in node.childNodes :
2003 remapNamespacePrefix(child, oldprefix, newprefix)
2005 def makeWellFormed(str):
2006 newstr = str
2008 # encode & as & ( must do this first so that < does not become &lt; )
2009 if str.find('&') != -1:
2010 newstr = str.replace('&', '&')
2012 # encode < as <
2013 if str.find("<") != -1:
2014 newstr = str.replace('<', '<')
2016 # encode > as > (TODO: is this necessary?)
2017 if str.find('>') != -1:
2018 newstr = str.replace('>', '>')
2020 return newstr
2022 # hand-rolled serialization function that has the following benefits:
2023 # - pretty printing
2024 # - somewhat judicious use of whitespace
2025 # - ensure id attributes are first
2026 def serializeXML(element, options, ind = 0):
2027 indent = ind
2028 I=''
2029 if options.indent_type == 'tab': I='\t'
2030 elif options.indent_type == 'space': I=' '
2032 outString = (I * ind) + '<' + element.nodeName
2034 # always serialize the id or xml:id attributes first
2035 if element.getAttribute('id') != '':
2036 id = element.getAttribute('id')
2037 quot = '"'
2038 if id.find('"') != -1:
2039 quot = "'"
2040 outString += ' ' + 'id=' + quot + id + quot
2041 if element.getAttribute('xml:id') != '':
2042 id = element.getAttribute('xml:id')
2043 quot = '"'
2044 if id.find('"') != -1:
2045 quot = "'"
2046 outString += ' ' + 'xml:id=' + quot + id + quot
2048 # now serialize the other attributes
2049 attrList = element.attributes
2050 for num in range(attrList.length) :
2051 attr = attrList.item(num)
2052 if attr.nodeName == 'id' or attr.nodeName == 'xml:id': continue
2053 # if the attribute value contains a double-quote, use single-quotes
2054 quot = '"'
2055 if attr.nodeValue.find('"') != -1:
2056 quot = "'"
2058 attrValue = makeWellFormed( attr.nodeValue )
2060 outString += ' '
2061 # preserve xmlns: if it is a namespace prefix declaration
2062 if attr.namespaceURI == 'http://www.w3.org/2000/xmlns/' and attr.nodeName.find('xmlns') == -1:
2063 outString += 'xmlns:'
2064 outString += attr.nodeName + '=' + quot + attrValue + quot
2066 # if no children, self-close
2067 children = element.childNodes
2068 if children.length > 0:
2069 outString += '>'
2071 onNewLine = False
2072 for child in element.childNodes:
2073 # element node
2074 if child.nodeType == 1:
2075 outString += '\n' + serializeXML(child, options, indent + 1)
2076 onNewLine = True
2077 # text node
2078 elif child.nodeType == 3:
2079 # trim it only in the case of not being a child of an element
2080 # where whitespace might be important
2081 if element.nodeName in ["text", "tspan", "textPath", "tref", "title", "desc", "textArea"]:
2082 outString += makeWellFormed(child.nodeValue)
2083 else:
2084 outString += makeWellFormed(child.nodeValue.strip())
2085 # CDATA node
2086 elif child.nodeType == 4:
2087 outString += '<![CDATA[' + child.nodeValue + ']]>'
2088 # Comment node
2089 elif child.nodeType == 8:
2090 outString += '<!--' + child.nodeValue + '-->'
2091 # TODO: entities, processing instructions, what else?
2092 else: # ignore the rest
2093 pass
2095 if onNewLine: outString += (I * ind)
2096 outString += '</' + element.nodeName + '>'
2097 if indent > 0: outString += '\n'
2098 else:
2099 outString += '/>'
2100 if indent > 0: outString += '\n'
2102 return outString
2104 # this is the main method
2105 # input is a string representation of the input XML
2106 # returns a string representation of the output XML
2107 def scourString(in_string, options=None):
2108 if options is None:
2109 options = _options_parser.get_default_values()
2110 getcontext().prec = options.digits
2111 global numAttrsRemoved
2112 global numStylePropsFixed
2113 global numElemsRemoved
2114 global numBytesSavedInColors
2115 doc = xml.dom.minidom.parseString(in_string)
2117 # for whatever reason this does not always remove all inkscape/sodipodi attributes/elements
2118 # on the first pass, so we do it multiple times
2119 # does it have to do with removal of children affecting the childlist?
2120 if options.keep_editor_data == False:
2121 while removeNamespacedElements( doc.documentElement, unwanted_ns ) > 0 :
2122 pass
2123 while removeNamespacedAttributes( doc.documentElement, unwanted_ns ) > 0 :
2124 pass
2126 # remove the xmlns: declarations now
2127 xmlnsDeclsToRemove = []
2128 attrList = doc.documentElement.attributes
2129 for num in range(attrList.length) :
2130 if attrList.item(num).nodeValue in unwanted_ns :
2131 xmlnsDeclsToRemove.append(attrList.item(num).nodeName)
2133 for attr in xmlnsDeclsToRemove :
2134 doc.documentElement.removeAttribute(attr)
2135 numAttrsRemoved += 1
2137 # ensure namespace for SVG is declared
2138 # TODO: what if the default namespace is something else (i.e. some valid namespace)?
2139 if doc.documentElement.getAttribute('xmlns') != 'http://www.w3.org/2000/svg':
2140 doc.documentElement.setAttribute('xmlns', 'http://www.w3.org/2000/svg')
2141 # TODO: throw error or warning?
2143 # check for redundant SVG namespace declaration
2144 attrList = doc.documentElement.attributes
2145 xmlnsDeclsToRemove = []
2146 redundantPrefixes = []
2147 for i in range(attrList.length):
2148 attr = attrList.item(i)
2149 name = attr.nodeName
2150 val = attr.nodeValue
2151 if name[0:6] == 'xmlns:' and val == 'http://www.w3.org/2000/svg':
2152 redundantPrefixes.append(name[6:])
2153 xmlnsDeclsToRemove.append(name)
2155 for attrName in xmlnsDeclsToRemove:
2156 doc.documentElement.removeAttribute(attrName)
2158 for prefix in redundantPrefixes:
2159 remapNamespacePrefix(doc.documentElement, prefix, '')
2161 # repair style (remove unnecessary style properties and change them into XML attributes)
2162 numStylePropsFixed = repairStyle(doc.documentElement, options)
2164 # convert colors to #RRGGBB format
2165 if options.simple_colors:
2166 numBytesSavedInColors = convertColors(doc.documentElement)
2168 # remove empty defs, metadata, g
2169 # NOTE: these elements will be removed even if they have (invalid) text nodes
2170 elemsToRemove = []
2171 for tag in ['defs', 'metadata', 'g'] :
2172 for elem in doc.documentElement.getElementsByTagName(tag) :
2173 removeElem = not elem.hasChildNodes()
2174 if removeElem == False :
2175 for child in elem.childNodes :
2176 if child.nodeType in [1, 3, 4, 8] :
2177 break
2178 else:
2179 removeElem = True
2180 if removeElem :
2181 elem.parentNode.removeChild(elem)
2182 numElemsRemoved += 1
2184 # remove unreferenced gradients/patterns outside of defs
2185 while removeUnreferencedElements(doc) > 0:
2186 pass
2188 if options.strip_ids:
2189 bContinueLooping = True
2190 while bContinueLooping:
2191 identifiedElements = findElementsWithId(doc.documentElement)
2192 referencedIDs = findReferencedElements(doc.documentElement)
2193 bContinueLooping = (removeUnreferencedIDs(referencedIDs, identifiedElements) > 0)
2195 if options.group_collapse:
2196 while removeNestedGroups(doc.documentElement) > 0:
2197 pass
2199 # move common attributes to parent group
2200 numAttrsRemoved += moveCommonAttributesToParentGroup(doc.documentElement)
2202 # remove unused attributes from parent
2203 numAttrsRemoved += removeUnusedAttributesOnParent(doc.documentElement)
2205 while removeDuplicateGradientStops(doc) > 0:
2206 pass
2208 # remove gradients that are only referenced by one other gradient
2209 while collapseSinglyReferencedGradients(doc) > 0:
2210 pass
2212 # remove duplicate gradients
2213 while removeDuplicateGradients(doc) > 0:
2214 pass
2216 # clean path data
2217 for elem in doc.documentElement.getElementsByTagName('path') :
2218 if elem.getAttribute('d') == '':
2219 elem.parentNode.removeChild(elem)
2220 else:
2221 cleanPath(elem)
2223 # remove unnecessary closing point of polygons and scour points
2224 for polygon in doc.documentElement.getElementsByTagName('polygon') :
2225 cleanPolygon(polygon)
2227 # scour points of polyline
2228 for polyline in doc.documentElement.getElementsByTagName('polyline') :
2229 cleanPolygon(polyline)
2231 # scour lengths (including coordinates)
2232 for type in ['svg', 'image', 'rect', 'circle', 'ellipse', 'line', 'linearGradient', 'radialGradient', 'stop']:
2233 for elem in doc.getElementsByTagName(type):
2234 for attr in ['x', 'y', 'width', 'height', 'cx', 'cy', 'r', 'rx', 'ry',
2235 'x1', 'y1', 'x2', 'y2', 'fx', 'fy', 'offset', 'opacity',
2236 'fill-opacity', 'stroke-opacity', 'stroke-width', 'stroke-miterlimit']:
2237 if elem.getAttribute(attr) != '':
2238 elem.setAttribute(attr, scourLength(elem.getAttribute(attr)))
2240 # remove default values of attributes
2241 numAttrsRemoved += removeDefaultAttributeValues(doc.documentElement, options)
2243 # convert rasters references to base64-encoded strings
2244 if options.embed_rasters:
2245 for elem in doc.documentElement.getElementsByTagName('image') :
2246 embedRasters(elem, options)
2248 # properly size the SVG document (ideally width/height should be 100% with a viewBox)
2249 properlySizeDoc(doc.documentElement)
2251 # output the document as a pretty string with a single space for indent
2252 # NOTE: removed pretty printing because of this problem:
2253 # http://ronrothman.com/public/leftbraned/xml-dom-minidom-toprettyxml-and-silly-whitespace/
2254 # rolled our own serialize function here to save on space, put id first, customize indentation, etc
2255 # out_string = doc.documentElement.toprettyxml(' ')
2256 out_string = serializeXML(doc.documentElement, options)
2258 # now strip out empty lines
2259 lines = []
2260 # Get rid of empty lines
2261 for line in out_string.splitlines(True):
2262 if line.strip():
2263 lines.append(line)
2265 # return the string stripped of empty lines
2266 if options.strip_xml_prolog == False:
2267 xmlprolog = '<?xml version="1.0" encoding="UTF-8" standalone="no"?>\n'
2268 else:
2269 xmlprolog = ""
2271 return xmlprolog + "".join(lines)
2273 # used mostly by unit tests
2274 # input is a filename
2275 # returns the minidom doc representation of the SVG
2276 def scourXmlFile(filename, options=None):
2277 in_string = open(filename).read()
2278 out_string = scourString(in_string, options)
2279 return xml.dom.minidom.parseString(out_string.encode('utf-8'))
2281 # GZ: Seems most other commandline tools don't do this, is it really wanted?
2282 class HeaderedFormatter(optparse.IndentedHelpFormatter):
2283 """
2284 Show application name, version number, and copyright statement
2285 above usage information.
2286 """
2287 def format_usage(self, usage):
2288 return "%s %s\n%s\n%s" % (APP, VER, COPYRIGHT,
2289 optparse.IndentedHelpFormatter.format_usage(self, usage))
2291 # GZ: would prefer this to be in a function or class scope, but tests etc need
2292 # access to the defaults anyway
2293 _options_parser = optparse.OptionParser(
2294 usage="%prog [-i input.svg] [-o output.svg] [OPTIONS]",
2295 description=("If the input/output files are specified with a svgz"
2296 " extension, then compressed SVG is assumed. If the input file is not"
2297 " specified, stdin is used. If the output file is not specified, "
2298 " stdout is used."),
2299 formatter=HeaderedFormatter(max_help_position=30),
2300 version=VER)
2302 _options_parser.add_option("--disable-simplify-colors",
2303 action="store_false", dest="simple_colors", default=True,
2304 help="won't convert all colors to #RRGGBB format")
2305 _options_parser.add_option("--disable-style-to-xml",
2306 action="store_false", dest="style_to_xml", default=True,
2307 help="won't convert styles into XML attributes")
2308 _options_parser.add_option("--disable-group-collapsing",
2309 action="store_false", dest="group_collapse", default=True,
2310 help="won't collapse <g> elements")
2311 _options_parser.add_option("--enable-id-stripping",
2312 action="store_true", dest="strip_ids", default=False,
2313 help="remove all un-referenced ID attributes")
2314 _options_parser.add_option("--disable-embed-rasters",
2315 action="store_false", dest="embed_rasters", default=True,
2316 help="won't embed rasters as base64-encoded data")
2317 _options_parser.add_option("--keep-editor-data",
2318 action="store_true", dest="keep_editor_data", default=False,
2319 help="won't remove Inkscape, Sodipodi or Adobe Illustrator elements and attributes")
2320 _options_parser.add_option("--strip-xml-prolog",
2321 action="store_true", dest="strip_xml_prolog", default=False,
2322 help="won't output the <?xml ?> prolog")
2324 # GZ: this is confusing, most people will be thinking in terms of
2325 # decimal places, which is not what decimal precision is doing
2326 _options_parser.add_option("-p", "--set-precision",
2327 action="store", type=int, dest="digits", default=5,
2328 help="set number of significant digits (default: %default)")
2329 _options_parser.add_option("-i",
2330 action="store", dest="infilename", help=optparse.SUPPRESS_HELP)
2331 _options_parser.add_option("-o",
2332 action="store", dest="outfilename", help=optparse.SUPPRESS_HELP)
2333 _options_parser.add_option("--indent",
2334 action="store", type="string", dest="indent_type", default="space",
2335 help="indentation of the output: none, space, tab (default: %default)")
2337 def maybe_gziped_file(filename, mode="r"):
2338 if os.path.splitext(filename)[1].lower() in (".svgz", ".gz"):
2339 return gzip.GzipFile(filename, mode)
2340 return file(filename, mode)
2342 def parse_args(args=None):
2343 options, rargs = _options_parser.parse_args(args)
2345 if rargs:
2346 _options_parser.error("Additional arguments not handled: %r, see --help" % rargs)
2347 if options.digits < 0:
2348 _options_parser.error("Can't have negative significant digits, see --help")
2349 if not options.indent_type in ["tab", "space", "none"]:
2350 _options_parser.error("Invalid value for --indent, see --help")
2352 if options.infilename:
2353 infile = maybe_gziped_file(options.infilename)
2354 # GZ: could catch a raised IOError here and report
2355 else:
2356 # GZ: could sniff for gzip compression here
2357 infile = sys.stdin
2358 if options.outfilename:
2359 outfile = maybe_gziped_file(options.outfilename, "w")
2360 else:
2361 outfile = sys.stdout
2363 return options, [infile, outfile]
2365 def getReport():
2366 return ' Number of elements removed: ' + str(numElemsRemoved) + \
2367 '\n Number of attributes removed: ' + str(numAttrsRemoved) + \
2368 '\n Number of unreferenced id attributes removed: ' + str(numIDsRemoved) + \
2369 '\n Number of style properties fixed: ' + str(numStylePropsFixed) + \
2370 '\n Number of raster images embedded inline: ' + str(numRastersEmbedded) + \
2371 '\n Number of path segments reduced/removed: ' + str(numPathSegmentsReduced) + \
2372 '\n Number of bytes saved in path data: ' + str(numBytesSavedInPathData) + \
2373 '\n Number of bytes saved in colors: ' + str(numBytesSavedInColors) + \
2374 '\n Number of points removed from polygons: ' + str(numPointsRemovedFromPolygon)
2376 if __name__ == '__main__':
2377 if sys.platform == "win32":
2378 from time import clock as get_tick
2379 else:
2380 # GZ: is this different from time.time() in any way?
2381 def get_tick():
2382 return os.times()[0]
2384 start = get_tick()
2386 options, (input, output) = parse_args()
2388 print >>sys.stderr, "%s %s\n%s" % (APP, VER, COPYRIGHT)
2390 # do the work
2391 in_string = input.read()
2392 out_string = scourString(in_string, options).encode("UTF-8")
2393 output.write(out_string)
2395 # Close input and output files
2396 input.close()
2397 output.close()
2399 end = get_tick()
2401 # GZ: unless silenced by -q or something?
2402 # GZ: not using globals would be good too
2403 print >>sys.stderr, ' File:', input.name, \
2404 '\n Time taken:', str(end-start) + 's\n', \
2405 getReport()
2407 oldsize = len(in_string)
2408 newsize = len(out_string)
2409 sizediff = (newsize / oldsize) * 100
2410 print >>sys.stderr, ' Original file size:', oldsize, 'bytes;', \
2411 'new file size:', newsize, 'bytes (' + str(sizediff)[:5] + '%)'