1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
4 # Scour
5 #
6 # Copyright 2009 Jeff Schiller
7 #
8 # This file is part of Scour, http://www.codedread.com/scour/
9 #
10 # Licensed under the Apache License, Version 2.0 (the "License");
11 # you may not use this file except in compliance with the License.
12 # You may obtain a copy of the License at
13 #
14 # http://www.apache.org/licenses/LICENSE-2.0
15 #
16 # Unless required by applicable law or agreed to in writing, software
17 # distributed under the License is distributed on an "AS IS" BASIS,
18 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
19 # See the License for the specific language governing permissions and
20 # limitations under the License.
22 # Notes:
24 # rubys' path-crunching ideas here: http://intertwingly.net/code/svgtidy/spec.rb
25 # (and implemented here: http://intertwingly.net/code/svgtidy/svgtidy.rb )
27 # Yet more ideas here: http://wiki.inkscape.org/wiki/index.php/Save_Cleaned_SVG
28 #
29 # * Process Transformations
30 # * Collapse all group based transformations
32 # Even more ideas here: http://esw.w3.org/topic/SvgTidy
33 # * analysis of path elements to see if rect can be used instead? (must also need to look
34 # at rounded corners)
36 # Next Up:
37 # - TODO: fix the removal of comment elements (between <?xml?> and <svg>)
38 # - add an option to remove ids if they match the Inkscape-style of IDs
39 # - investigate point-reducing algorithms
40 # - parse transform attribute
41 # - if a <g> has only one element in it, collapse the <g> (ensure transform, etc are carried down)
42 # - option to remove metadata
44 # necessary to get true division
45 from __future__ import division
47 import os
48 import sys
49 import xml.dom.minidom
50 import re
51 import math
52 import base64
53 import urllib
54 from svg_regex import svg_parser
55 import gzip
56 import optparse
57 from yocto_css import parseCssString
59 # Python 2.3- did not have Decimal
60 try:
61 from decimal import *
62 except ImportError:
63 from fixedpoint import *
64 Decimal = FixedPoint
66 APP = 'scour'
67 VER = '0.22'
68 COPYRIGHT = 'Copyright Jeff Schiller, 2009'
70 NS = { 'SVG': 'http://www.w3.org/2000/svg',
71 'XLINK': 'http://www.w3.org/1999/xlink',
72 'SODIPODI': 'http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd',
73 'INKSCAPE': 'http://www.inkscape.org/namespaces/inkscape',
74 'ADOBE_ILLUSTRATOR': 'http://ns.adobe.com/AdobeIllustrator/10.0/',
75 'ADOBE_GRAPHS': 'http://ns.adobe.com/Graphs/1.0/',
76 'ADOBE_SVG_VIEWER': 'http://ns.adobe.com/AdobeSVGViewerExtensions/3.0/',
77 'ADOBE_VARIABLES': 'http://ns.adobe.com/Variables/1.0/',
78 'ADOBE_SFW': 'http://ns.adobe.com/SaveForWeb/1.0/',
79 'ADOBE_EXTENSIBILITY': 'http://ns.adobe.com/Extensibility/1.0/',
80 'ADOBE_FLOWS': 'http://ns.adobe.com/Flows/1.0/',
81 'ADOBE_IMAGE_REPLACEMENT': 'http://ns.adobe.com/ImageReplacement/1.0/',
82 'ADOBE_CUSTOM': 'http://ns.adobe.com/GenericCustomNamespace/1.0/',
83 'ADOBE_XPATH': 'http://ns.adobe.com/XPath/1.0/'
84 }
86 unwanted_ns = [ NS['SODIPODI'], NS['INKSCAPE'], NS['ADOBE_ILLUSTRATOR'],
87 NS['ADOBE_GRAPHS'], NS['ADOBE_SVG_VIEWER'], NS['ADOBE_VARIABLES'],
88 NS['ADOBE_SFW'], NS['ADOBE_EXTENSIBILITY'], NS['ADOBE_FLOWS'],
89 NS['ADOBE_IMAGE_REPLACEMENT'], NS['ADOBE_CUSTOM'], NS['ADOBE_XPATH'] ]
91 svgAttributes = [
92 'clip-rule',
93 'display',
94 'fill',
95 'fill-opacity',
96 'fill-rule',
97 'filter',
98 'font-family',
99 'font-size',
100 'font-stretch',
101 'font-style',
102 'font-variant',
103 'font-weight',
104 'line-height',
105 'marker',
106 'opacity',
107 'overflow',
108 'stop-color',
109 'stop-opacity',
110 'stroke',
111 'stroke-dashoffset',
112 'stroke-linecap',
113 'stroke-linejoin',
114 'stroke-miterlimit',
115 'stroke-opacity',
116 'stroke-width',
117 'visibility'
118 ]
120 colors = {
121 'aliceblue': 'rgb(240, 248, 255)',
122 'antiquewhite': 'rgb(250, 235, 215)',
123 'aqua': 'rgb( 0, 255, 255)',
124 'aquamarine': 'rgb(127, 255, 212)',
125 'azure': 'rgb(240, 255, 255)',
126 'beige': 'rgb(245, 245, 220)',
127 'bisque': 'rgb(255, 228, 196)',
128 'black': 'rgb( 0, 0, 0)',
129 'blanchedalmond': 'rgb(255, 235, 205)',
130 'blue': 'rgb( 0, 0, 255)',
131 'blueviolet': 'rgb(138, 43, 226)',
132 'brown': 'rgb(165, 42, 42)',
133 'burlywood': 'rgb(222, 184, 135)',
134 'cadetblue': 'rgb( 95, 158, 160)',
135 'chartreuse': 'rgb(127, 255, 0)',
136 'chocolate': 'rgb(210, 105, 30)',
137 'coral': 'rgb(255, 127, 80)',
138 'cornflowerblue': 'rgb(100, 149, 237)',
139 'cornsilk': 'rgb(255, 248, 220)',
140 'crimson': 'rgb(220, 20, 60)',
141 'cyan': 'rgb( 0, 255, 255)',
142 'darkblue': 'rgb( 0, 0, 139)',
143 'darkcyan': 'rgb( 0, 139, 139)',
144 'darkgoldenrod': 'rgb(184, 134, 11)',
145 'darkgray': 'rgb(169, 169, 169)',
146 'darkgreen': 'rgb( 0, 100, 0)',
147 'darkgrey': 'rgb(169, 169, 169)',
148 'darkkhaki': 'rgb(189, 183, 107)',
149 'darkmagenta': 'rgb(139, 0, 139)',
150 'darkolivegreen': 'rgb( 85, 107, 47)',
151 'darkorange': 'rgb(255, 140, 0)',
152 'darkorchid': 'rgb(153, 50, 204)',
153 'darkred': 'rgb(139, 0, 0)',
154 'darksalmon': 'rgb(233, 150, 122)',
155 'darkseagreen': 'rgb(143, 188, 143)',
156 'darkslateblue': 'rgb( 72, 61, 139)',
157 'darkslategray': 'rgb( 47, 79, 79)',
158 'darkslategrey': 'rgb( 47, 79, 79)',
159 'darkturquoise': 'rgb( 0, 206, 209)',
160 'darkviolet': 'rgb(148, 0, 211)',
161 'deeppink': 'rgb(255, 20, 147)',
162 'deepskyblue': 'rgb( 0, 191, 255)',
163 'dimgray': 'rgb(105, 105, 105)',
164 'dimgrey': 'rgb(105, 105, 105)',
165 'dodgerblue': 'rgb( 30, 144, 255)',
166 'firebrick': 'rgb(178, 34, 34)',
167 'floralwhite': 'rgb(255, 250, 240)',
168 'forestgreen': 'rgb( 34, 139, 34)',
169 'fuchsia': 'rgb(255, 0, 255)',
170 'gainsboro': 'rgb(220, 220, 220)',
171 'ghostwhite': 'rgb(248, 248, 255)',
172 'gold': 'rgb(255, 215, 0)',
173 'goldenrod': 'rgb(218, 165, 32)',
174 'gray': 'rgb(128, 128, 128)',
175 'grey': 'rgb(128, 128, 128)',
176 'green': 'rgb( 0, 128, 0)',
177 'greenyellow': 'rgb(173, 255, 47)',
178 'honeydew': 'rgb(240, 255, 240)',
179 'hotpink': 'rgb(255, 105, 180)',
180 'indianred': 'rgb(205, 92, 92)',
181 'indigo': 'rgb( 75, 0, 130)',
182 'ivory': 'rgb(255, 255, 240)',
183 'khaki': 'rgb(240, 230, 140)',
184 'lavender': 'rgb(230, 230, 250)',
185 'lavenderblush': 'rgb(255, 240, 245)',
186 'lawngreen': 'rgb(124, 252, 0)',
187 'lemonchiffon': 'rgb(255, 250, 205)',
188 'lightblue': 'rgb(173, 216, 230)',
189 'lightcoral': 'rgb(240, 128, 128)',
190 'lightcyan': 'rgb(224, 255, 255)',
191 'lightgoldenrodyellow': 'rgb(250, 250, 210)',
192 'lightgray': 'rgb(211, 211, 211)',
193 'lightgreen': 'rgb(144, 238, 144)',
194 'lightgrey': 'rgb(211, 211, 211)',
195 'lightpink': 'rgb(255, 182, 193)',
196 'lightsalmon': 'rgb(255, 160, 122)',
197 'lightseagreen': 'rgb( 32, 178, 170)',
198 'lightskyblue': 'rgb(135, 206, 250)',
199 'lightslategray': 'rgb(119, 136, 153)',
200 'lightslategrey': 'rgb(119, 136, 153)',
201 'lightsteelblue': 'rgb(176, 196, 222)',
202 'lightyellow': 'rgb(255, 255, 224)',
203 'lime': 'rgb( 0, 255, 0)',
204 'limegreen': 'rgb( 50, 205, 50)',
205 'linen': 'rgb(250, 240, 230)',
206 'magenta': 'rgb(255, 0, 255)',
207 'maroon': 'rgb(128, 0, 0)',
208 'mediumaquamarine': 'rgb(102, 205, 170)',
209 'mediumblue': 'rgb( 0, 0, 205)',
210 'mediumorchid': 'rgb(186, 85, 211)',
211 'mediumpurple': 'rgb(147, 112, 219)',
212 'mediumseagreen': 'rgb( 60, 179, 113)',
213 'mediumslateblue': 'rgb(123, 104, 238)',
214 'mediumspringgreen': 'rgb( 0, 250, 154)',
215 'mediumturquoise': 'rgb( 72, 209, 204)',
216 'mediumvioletred': 'rgb(199, 21, 133)',
217 'midnightblue': 'rgb( 25, 25, 112)',
218 'mintcream': 'rgb(245, 255, 250)',
219 'mistyrose': 'rgb(255, 228, 225)',
220 'moccasin': 'rgb(255, 228, 181)',
221 'navajowhite': 'rgb(255, 222, 173)',
222 'navy': 'rgb( 0, 0, 128)',
223 'oldlace': 'rgb(253, 245, 230)',
224 'olive': 'rgb(128, 128, 0)',
225 'olivedrab': 'rgb(107, 142, 35)',
226 'orange': 'rgb(255, 165, 0)',
227 'orangered': 'rgb(255, 69, 0)',
228 'orchid': 'rgb(218, 112, 214)',
229 'palegoldenrod': 'rgb(238, 232, 170)',
230 'palegreen': 'rgb(152, 251, 152)',
231 'paleturquoise': 'rgb(175, 238, 238)',
232 'palevioletred': 'rgb(219, 112, 147)',
233 'papayawhip': 'rgb(255, 239, 213)',
234 'peachpuff': 'rgb(255, 218, 185)',
235 'peru': 'rgb(205, 133, 63)',
236 'pink': 'rgb(255, 192, 203)',
237 'plum': 'rgb(221, 160, 221)',
238 'powderblue': 'rgb(176, 224, 230)',
239 'purple': 'rgb(128, 0, 128)',
240 'red': 'rgb(255, 0, 0)',
241 'rosybrown': 'rgb(188, 143, 143)',
242 'royalblue': 'rgb( 65, 105, 225)',
243 'saddlebrown': 'rgb(139, 69, 19)',
244 'salmon': 'rgb(250, 128, 114)',
245 'sandybrown': 'rgb(244, 164, 96)',
246 'seagreen': 'rgb( 46, 139, 87)',
247 'seashell': 'rgb(255, 245, 238)',
248 'sienna': 'rgb(160, 82, 45)',
249 'silver': 'rgb(192, 192, 192)',
250 'skyblue': 'rgb(135, 206, 235)',
251 'slateblue': 'rgb(106, 90, 205)',
252 'slategray': 'rgb(112, 128, 144)',
253 'slategrey': 'rgb(112, 128, 144)',
254 'snow': 'rgb(255, 250, 250)',
255 'springgreen': 'rgb( 0, 255, 127)',
256 'steelblue': 'rgb( 70, 130, 180)',
257 'tan': 'rgb(210, 180, 140)',
258 'teal': 'rgb( 0, 128, 128)',
259 'thistle': 'rgb(216, 191, 216)',
260 'tomato': 'rgb(255, 99, 71)',
261 'turquoise': 'rgb( 64, 224, 208)',
262 'violet': 'rgb(238, 130, 238)',
263 'wheat': 'rgb(245, 222, 179)',
264 'white': 'rgb(255, 255, 255)',
265 'whitesmoke': 'rgb(245, 245, 245)',
266 'yellow': 'rgb(255, 255, 0)',
267 'yellowgreen': 'rgb(154, 205, 50)',
268 }
270 def isSameSign(a,b): return (a <= 0 and b <= 0) or (a >= 0 and b >= 0)
272 coord = re.compile("\\-?\\d+\\.?\\d*")
273 scinumber = re.compile("[\\-\\+]?(\\d*\\.?)?\\d+[eE][\\-\\+]?\\d+")
274 number = re.compile("[\\-\\+]?(\\d*\\.?)?\\d+")
275 sciExponent = re.compile("[eE]([\\-\\+]?\\d+)")
276 unit = re.compile("(em|ex|px|pt|pc|cm|mm|in|\\%){1,1}$")
278 class Unit(object):
279 INVALID = -1
280 NONE = 0
281 PCT = 1
282 PX = 2
283 PT = 3
284 PC = 4
285 EM = 5
286 EX = 6
287 CM = 7
288 MM = 8
289 IN = 9
291 # @staticmethod
292 def get(str):
293 # GZ: shadowing builtins like 'str' is generally bad form
294 # GZ: encoding stuff like this in a dict makes for nicer code
295 if str == None or str == '': return Unit.NONE
296 elif str == '%': return Unit.PCT
297 elif str == 'px': return Unit.PX
298 elif str == 'pt': return Unit.PT
299 elif str == 'pc': return Unit.PC
300 elif str == 'em': return Unit.EM
301 elif str == 'ex': return Unit.EX
302 elif str == 'cm': return Unit.CM
303 elif str == 'mm': return Unit.MM
304 elif str == 'in': return Unit.IN
305 return Unit.INVALID
307 # @staticmethod
308 def str(u):
309 if u == Unit.NONE: return ''
310 elif u == Unit.PCT: return '%'
311 elif u == Unit.PX: return 'px'
312 elif u == Unit.PT: return 'pt'
313 elif u == Unit.PC: return 'pc'
314 elif u == Unit.EM: return 'em'
315 elif u == Unit.EX: return 'ex'
316 elif u == Unit.CM: return 'cm'
317 elif u == Unit.MM: return 'mm'
318 elif u == Unit.IN: return 'in'
319 return 'INVALID'
321 get = staticmethod(get)
322 str = staticmethod(str)
324 class SVGLength(object):
325 def __init__(self, str):
326 try: # simple unitless and no scientific notation
327 self.value = float(str)
328 if int(self.value) == self.value:
329 self.value = int(self.value)
330 self.units = Unit.NONE
331 except ValueError:
332 # we know that the length string has an exponent, a unit, both or is invalid
334 # parse out number, exponent and unit
335 self.value = 0
336 unitBegin = 0
337 scinum = scinumber.match(str)
338 if scinum != None:
339 # this will always match, no need to check it
340 numMatch = number.match(str)
341 expMatch = sciExponent.search(str, numMatch.start(0))
342 self.value = (float(numMatch.group(0)) *
343 10 ** float(expMatch.group(1)))
344 unitBegin = expMatch.end(1)
345 else:
346 # unit or invalid
347 numMatch = number.match(str)
348 if numMatch != None:
349 self.value = float(numMatch.group(0))
350 unitBegin = numMatch.end(0)
352 if int(self.value) == self.value:
353 self.value = int(self.value)
355 if unitBegin != 0 :
356 unitMatch = unit.search(str, unitBegin)
357 if unitMatch != None :
358 self.units = Unit.get(unitMatch.group(0))
360 # invalid
361 else:
362 # TODO: this needs to set the default for the given attribute (how?)
363 self.value = 0
364 self.units = Unit.INVALID
366 # returns the length of a property
367 # TODO: eventually use the above class once it is complete
368 def getSVGLength(value):
369 try:
370 v = float(value)
371 except ValueError:
372 coordMatch = coord.match(value)
373 if coordMatch != None:
374 unitMatch = unit.search(value, coordMatch.start(0))
375 v = value
376 return v
378 def findElementById(node, id):
379 if node == None or node.nodeType != 1: return None
380 if node.getAttribute('id') == id: return node
381 for child in node.childNodes :
382 e = findElementById(child,id)
383 if e != None: return e
384 return None
386 def findElementsWithId(node, elems=None):
387 """
388 Returns all elements with id attributes
389 """
390 if elems is None:
391 elems = {}
392 id = node.getAttribute('id')
393 if id != '' :
394 elems[id] = node
395 if node.hasChildNodes() :
396 for child in node.childNodes:
397 # from http://www.w3.org/TR/DOM-Level-2-Core/idl-definitions.html
398 # we are only really interested in nodes of type Element (1)
399 if child.nodeType == 1 :
400 findElementsWithId(child, elems)
401 return elems
403 referencingProps = ['fill', 'stroke', 'filter', 'clip-path', 'mask', 'marker-start',
404 'marker-end', 'marker-mid']
406 def findReferencedElements(node, ids=None):
407 """
408 Returns the number of times an ID is referenced as well as all elements
409 that reference it.
411 Currently looks at fill, stroke, clip-path, mask, marker, and
412 xlink:href attributes.
413 """
414 global referencingProps
415 if ids is None:
416 ids = {}
417 # TODO: input argument ids is clunky here (see below how it is called)
418 # GZ: alternative to passing dict, use **kwargs
420 # if this node is a style element, parse its text into CSS
421 if node.nodeName == 'style' and node.namespaceURI == NS['SVG']:
422 # node.firstChild will be either a CDATA or a Text node
423 if node.firstChild != None:
424 cssRules = parseCssString(node.firstChild.nodeValue)
425 for rule in cssRules:
426 for propname in rule['properties']:
427 propval = rule['properties'][propname]
428 findReferencingProperty(node, propname, propval, ids)
429 return ids
431 # else if xlink:href is set, then grab the id
432 href = node.getAttributeNS(NS['XLINK'],'href')
433 if href != '' and len(href) > 1 and href[0] == '#':
434 # we remove the hash mark from the beginning of the id
435 id = href[1:]
436 if id in ids:
437 ids[id][0] += 1
438 ids[id][1].append(node)
439 else:
440 ids[id] = [1,[node]]
442 # now get all style properties and the fill, stroke, filter attributes
443 styles = node.getAttribute('style').split(';')
444 for attr in referencingProps:
445 styles.append(':'.join([attr, node.getAttribute(attr)]))
447 for style in styles:
448 propval = style.split(':')
449 if len(propval) == 2 :
450 prop = propval[0].strip()
451 val = propval[1].strip()
452 findReferencingProperty(node, prop, val, ids)
454 if node.hasChildNodes() :
455 for child in node.childNodes:
456 if child.nodeType == 1 :
457 findReferencedElements(child, ids)
458 return ids
460 def findReferencingProperty(node, prop, val, ids):
461 global referencingProps
462 if prop in referencingProps and val != '' :
463 if len(val) >= 7 and val[0:5] == 'url(#' :
464 id = val[5:val.find(')')]
465 if ids.has_key(id) :
466 ids[id][0] += 1
467 ids[id][1].append(node)
468 else:
469 ids[id] = [1,[node]]
470 # if the url has a quote in it, we need to compensate
471 elif len(val) >= 8 :
472 id = None
473 # double-quote
474 if val[0:6] == 'url("#' :
475 id = val[6:val.find('")')]
476 # single-quote
477 elif val[0:6] == "url('#" :
478 id = val[6:val.find("')")]
479 if id != None:
480 if ids.has_key(id) :
481 ids[id][0] += 1
482 ids[id][1].append(node)
483 else:
484 ids[id] = [1,[node]]
486 numIDsRemoved = 0
487 numElemsRemoved = 0
488 numAttrsRemoved = 0
489 numRastersEmbedded = 0
490 numPathSegmentsReduced = 0
491 numCurvesStraightened = 0
492 numBytesSavedInPathData = 0
493 numBytesSavedInColors = 0
494 numPointsRemovedFromPolygon = 0
496 def removeUnusedDefs(doc, defElem, elemsToRemove=None):
497 if elemsToRemove is None:
498 elemsToRemove = []
500 identifiedElements = findElementsWithId(doc.documentElement)
501 referencedIDs = findReferencedElements(doc.documentElement)
503 keepTags = ['font', 'style', 'metadata', 'script', 'title', 'desc']
504 for elem in defElem.childNodes:
505 if elem.nodeName == 'g' and elem.namespaceURI == NS['SVG']:
506 elemsToRemove = removeUnusedDefs(doc, elem, elemsToRemove)
507 continue
508 if elem.nodeType == 1 and (elem.getAttribute('id') == '' or \
509 (not elem.getAttribute('id') in referencedIDs)) and \
510 not elem.nodeName in keepTags:
511 elemsToRemove.append(elem)
512 return elemsToRemove
514 def removeUnreferencedElements(doc):
515 """
516 Removes all unreferenced elements except for <svg>, <font>, <metadata>, <title>, and <desc>.
517 Also vacuums the defs of any non-referenced renderable elements.
519 Returns the number of unreferenced elements removed from the document.
520 """
521 global numElemsRemoved
522 num = 0
523 removeTags = ['linearGradient', 'radialGradient', 'pattern']
525 identifiedElements = findElementsWithId(doc.documentElement)
526 referencedIDs = findReferencedElements(doc.documentElement)
528 for id in identifiedElements:
529 if not id in referencedIDs:
530 goner = findElementById(doc.documentElement, id)
531 if goner != None and goner.parentNode != None and goner.nodeName in removeTags:
532 goner.parentNode.removeChild(goner)
533 num += 1
534 numElemsRemoved += 1
536 # TODO: should also go through defs and vacuum it
537 num = 0
538 defs = doc.documentElement.getElementsByTagName('defs')
539 for aDef in defs:
540 elemsToRemove = removeUnusedDefs(doc, aDef)
541 for elem in elemsToRemove:
542 elem.parentNode.removeChild(elem)
543 numElemsRemoved += 1
544 num += 1
545 return num
547 def removeUnreferencedIDs(referencedIDs, identifiedElements):
548 """
549 Removes the unreferenced ID attributes.
551 Returns the number of ID attributes removed
552 """
553 global numIDsRemoved
554 keepTags = ['font']
555 num = 0;
556 for id in identifiedElements.keys():
557 node = identifiedElements[id]
558 if referencedIDs.has_key(id) == False and not node.nodeName in keepTags:
559 node.removeAttribute('id')
560 numIDsRemoved += 1
561 num += 1
562 return num
564 def removeNamespacedAttributes(node, namespaces):
565 global numAttrsRemoved
566 num = 0
567 if node.nodeType == 1 :
568 # remove all namespace'd attributes from this element
569 attrList = node.attributes
570 attrsToRemove = []
571 for attrNum in range(attrList.length):
572 attr = attrList.item(attrNum)
573 if attr != None and attr.namespaceURI in namespaces:
574 attrsToRemove.append(attr.nodeName)
575 for attrName in attrsToRemove :
576 num += 1
577 numAttrsRemoved += 1
578 node.removeAttribute(attrName)
580 # now recurse for children
581 for child in node.childNodes:
582 num += removeNamespacedAttributes(child, namespaces)
583 return num
585 def removeNamespacedElements(node, namespaces):
586 global numElemsRemoved
587 num = 0
588 if node.nodeType == 1 :
589 # remove all namespace'd child nodes from this element
590 childList = node.childNodes
591 childrenToRemove = []
592 for child in childList:
593 if child != None and child.namespaceURI in namespaces:
594 childrenToRemove.append(child)
595 for child in childrenToRemove :
596 num += 1
597 numElemsRemoved += 1
598 node.removeChild(child)
600 # now recurse for children
601 for child in node.childNodes:
602 num += removeNamespacedElements(child, namespaces)
603 return num
605 def removeNestedGroups(node):
606 """
607 This walks further and further down the tree, removing groups
608 which do not have any attributes or a title/desc child and
609 promoting their children up one level
610 """
611 global numElemsRemoved
612 num = 0
614 groupsToRemove = []
615 for child in node.childNodes:
616 if child.nodeName == 'g' and child.namespaceURI == NS['SVG'] and len(child.attributes) == 0:
617 # only collapse group if it does not have a title or desc as a direct descendant
618 for grandchild in child.childNodes:
619 if grandchild.nodeType == 1 and grandchild.namespaceURI == NS['SVG'] and \
620 grandchild.nodeName in ['title','desc']:
621 break
622 else:
623 groupsToRemove.append(child)
625 for g in groupsToRemove:
626 while g.childNodes.length > 0:
627 g.parentNode.insertBefore(g.firstChild, g)
628 g.parentNode.removeChild(g)
629 numElemsRemoved += 1
630 num += 1
632 # now recurse for children
633 for child in node.childNodes:
634 if child.nodeType == 1:
635 num += removeNestedGroups(child)
636 return num
638 def moveCommonAttributesToParentGroup(elem):
639 """
640 This recursively calls this function on all children of the passed in element
641 and then iterates over all child elements and removes common inheritable attributes
642 from the children and places them in the parent group. But only if the parent contains
643 nothing but element children and whitespace.
644 """
645 num = 0
647 childElements = []
648 # recurse first into the children (depth-first)
649 for child in elem.childNodes:
650 if child.nodeType == 1:
651 childElements.append(child)
652 num += moveCommonAttributesToParentGroup(child)
653 # else if the parent has non-whitespace text children, do not
654 # try to move common attributes
655 elif child.nodeType == 3 and child.nodeValue.strip():
656 return num
658 # only process the children if there are more than one element
659 if len(childElements) <= 1: return num
661 commonAttrs = {}
662 # add all inheritable properties of the first child element
663 # FIXME: Note there is a chance that the first child is a set/animate in which case
664 # its fill attribute is not what we want to look at, we should look for the first
665 # non-animate/set element
666 attrList = childElements[0].attributes
667 for num in range(attrList.length):
668 attr = attrList.item(num)
669 # this is most of the inheritable properties from http://www.w3.org/TR/SVG11/propidx.html
670 # and http://www.w3.org/TR/SVGTiny12/attributeTable.html
671 if attr.nodeName in ['clip-rule',
672 'display-align',
673 'fill', 'fill-opacity', 'fill-rule',
674 'font', 'font-family', 'font-size', 'font-size-adjust', 'font-stretch',
675 'font-style', 'font-variant', 'font-weight',
676 'letter-spacing',
677 'pointer-events', 'shape-rendering',
678 'stroke', 'stroke-dasharray', 'stroke-dashoffset', 'stroke-linecap', 'stroke-linejoin',
679 'stroke-miterlimit', 'stroke-opacity', 'stroke-width',
680 'text-anchor', 'text-decoration', 'text-rendering', 'visibility',
681 'word-spacing', 'writing-mode']:
682 # we just add all the attributes from the first child
683 commonAttrs[attr.nodeName] = attr.nodeValue
685 # for each subsequent child element
686 for childNum in range(len(childElements)):
687 # skip first child
688 if childNum == 0:
689 continue
691 child = childElements[childNum]
692 # if we are on an animateXXX/set element, ignore it (due to the 'fill' attribute)
693 if child.localName in ['set', 'animate', 'animateColor', 'animateTransform', 'animateMotion']:
694 continue
696 distinctAttrs = []
697 # loop through all current 'common' attributes
698 for name in commonAttrs.keys():
699 # if this child doesn't match that attribute, schedule it for removal
700 if child.getAttribute(name) != commonAttrs[name]:
701 distinctAttrs.append(name)
702 # remove those attributes which are not common
703 for name in distinctAttrs:
704 del commonAttrs[name]
706 # commonAttrs now has all the inheritable attributes which are common among all child elements
707 for name in commonAttrs.keys():
708 for child in childElements:
709 child.removeAttribute(name)
710 elem.setAttribute(name, commonAttrs[name])
712 # update our statistic (we remove N*M attributes and add back in M attributes)
713 num += (len(childElements)-1) * len(commonAttrs)
714 return num
716 def removeUnusedAttributesOnParent(elem):
717 """
718 This recursively calls this function on all children of the element passed in,
719 then removes any unused attributes on this elem if none of the children inherit it
720 """
721 num = 0
723 childElements = []
724 # recurse first into the children (depth-first)
725 for child in elem.childNodes:
726 if child.nodeType == 1:
727 childElements.append(child)
728 num += removeUnusedAttributesOnParent(child)
730 # only process the children if there are more than one element
731 if len(childElements) <= 1: return num
733 # get all attribute values on this parent
734 attrList = elem.attributes
735 unusedAttrs = {}
736 for num in range(attrList.length):
737 attr = attrList.item(num)
738 if attr.nodeName in ['clip-rule',
739 'display-align',
740 'fill', 'fill-opacity', 'fill-rule',
741 'font', 'font-family', 'font-size', 'font-size-adjust', 'font-stretch',
742 'font-style', 'font-variant', 'font-weight',
743 'letter-spacing',
744 'pointer-events', 'shape-rendering',
745 'stroke', 'stroke-dasharray', 'stroke-dashoffset', 'stroke-linecap', 'stroke-linejoin',
746 'stroke-miterlimit', 'stroke-opacity', 'stroke-width',
747 'text-anchor', 'text-decoration', 'text-rendering', 'visibility',
748 'word-spacing', 'writing-mode']:
749 unusedAttrs[attr.nodeName] = attr.nodeValue
751 # for each child, if at least one child inherits the parent's attribute, then remove
752 for childNum in range(len(childElements)):
753 child = childElements[childNum]
754 inheritedAttrs = []
755 for name in unusedAttrs.keys():
756 val = child.getAttribute(name)
757 if val == '' or val == None or val == 'inherit':
758 inheritedAttrs.append(name)
759 for a in inheritedAttrs:
760 del unusedAttrs[a]
762 # unusedAttrs now has all the parent attributes that are unused
763 for name in unusedAttrs.keys():
764 elem.removeAttribute(name)
765 num += 1
767 return num
769 def removeDuplicateGradientStops(doc):
770 global numElemsRemoved
771 num = 0
773 for gradType in ['linearGradient', 'radialGradient']:
774 for grad in doc.getElementsByTagName(gradType):
775 stops = {}
776 stopsToRemove = []
777 for stop in grad.getElementsByTagName('stop'):
778 # convert percentages into a floating point number
779 offsetU = SVGLength(stop.getAttribute('offset'))
780 if offsetU.units == Unit.PCT:
781 offset = offsetU.value / 100.0
782 elif offsetU.units == Unit.NONE:
783 offset = offsetU.value
784 else:
785 offset = 0
786 # set the stop offset value to the integer or floating point equivalent
787 if int(offset) == offset: stop.setAttribute('offset', str(int(offset)))
788 else: stop.setAttribute('offset', str(offset))
790 color = stop.getAttribute('stop-color')
791 opacity = stop.getAttribute('stop-opacity')
792 if stops.has_key(offset) :
793 oldStop = stops[offset]
794 if oldStop[0] == color and oldStop[1] == opacity:
795 stopsToRemove.append(stop)
796 stops[offset] = [color, opacity]
798 for stop in stopsToRemove:
799 stop.parentNode.removeChild(stop)
800 num += 1
801 numElemsRemoved += 1
803 # linear gradients
804 return num
806 def collapseSinglyReferencedGradients(doc):
807 global numElemsRemoved
808 num = 0
810 # make sure to reset the ref'ed ids for when we are running this in testscour
811 for rid,nodeCount in findReferencedElements(doc.documentElement).iteritems():
812 count = nodeCount[0]
813 nodes = nodeCount[1]
814 if count == 1:
815 elem = findElementById(doc.documentElement,rid)
816 if elem != None and elem.nodeType == 1 and elem.nodeName in ['linearGradient', 'radialGradient'] \
817 and elem.namespaceURI == NS['SVG']:
818 # found a gradient that is referenced by only 1 other element
819 refElem = nodes[0]
820 if refElem.nodeType == 1 and refElem.nodeName in ['linearGradient', 'radialGradient'] \
821 and refElem.namespaceURI == NS['SVG']:
822 # elem is a gradient referenced by only one other gradient (refElem)
824 # add the stops to the referencing gradient (this removes them from elem)
825 if len(refElem.getElementsByTagName('stop')) == 0:
826 stopsToAdd = elem.getElementsByTagName('stop')
827 for stop in stopsToAdd:
828 refElem.appendChild(stop)
830 # adopt the gradientUnits, spreadMethod, gradientTransform attributes if
831 # they are unspecified on refElem
832 for attr in ['gradientUnits','spreadMethod','gradientTransform']:
833 if refElem.getAttribute(attr) == '' and not elem.getAttribute(attr) == '':
834 refElem.setAttributeNS(None, attr, elem.getAttribute(attr))
836 # if both are radialGradients, adopt elem's fx,fy,cx,cy,r attributes if
837 # they are unspecified on refElem
838 if elem.nodeName == 'radialGradient' and refElem.nodeName == 'radialGradient':
839 for attr in ['fx','fy','cx','cy','r']:
840 if refElem.getAttribute(attr) == '' and not elem.getAttribute(attr) == '':
841 refElem.setAttributeNS(None, attr, elem.getAttribute(attr))
843 # if both are linearGradients, adopt elem's x1,y1,x2,y2 attributes if
844 # they are unspecified on refElem
845 if elem.nodeName == 'linearGradient' and refElem.nodeName == 'linearGradient':
846 for attr in ['x1','y1','x2','y2']:
847 if refElem.getAttribute(attr) == '' and not elem.getAttribute(attr) == '':
848 refElem.setAttributeNS(None, attr, elem.getAttribute(attr))
850 # now remove the xlink:href from refElem
851 refElem.removeAttributeNS(NS['XLINK'], 'href')
853 # now delete elem
854 elem.parentNode.removeChild(elem)
855 numElemsRemoved += 1
856 num += 1
857 return num
859 def removeDuplicateGradients(doc):
860 global numElemsRemoved
861 num = 0
863 gradientsToRemove = {}
864 duplicateToMaster = {}
866 for gradType in ['linearGradient', 'radialGradient']:
867 grads = doc.getElementsByTagName(gradType)
868 for grad in grads:
869 # TODO: should slice grads from 'grad' here to optimize
870 for ograd in grads:
871 # do not compare gradient to itself
872 if grad == ograd: continue
874 # compare grad to ograd (all properties, then all stops)
875 # if attributes do not match, go to next gradient
876 someGradAttrsDoNotMatch = False
877 for attr in ['gradientUnits','spreadMethod','gradientTransform','x1','y1','x2','y2','cx','cy','fx','fy','r']:
878 if grad.getAttribute(attr) != ograd.getAttribute(attr):
879 someGradAttrsDoNotMatch = True
880 break;
882 if someGradAttrsDoNotMatch: continue
884 # compare xlink:href values too
885 if grad.getAttributeNS(NS['XLINK'], 'href') != ograd.getAttributeNS(NS['XLINK'], 'href'):
886 continue
888 # all gradient properties match, now time to compare stops
889 stops = grad.getElementsByTagName('stop')
890 ostops = ograd.getElementsByTagName('stop')
892 if stops.length != ostops.length: continue
894 # now compare stops
895 stopsNotEqual = False
896 for i in range(stops.length):
897 if stopsNotEqual: break
898 stop = stops.item(i)
899 ostop = ostops.item(i)
900 for attr in ['offset', 'stop-color', 'stop-opacity']:
901 if stop.getAttribute(attr) != ostop.getAttribute(attr):
902 stopsNotEqual = True
903 break
904 if stopsNotEqual: continue
906 # ograd is a duplicate of grad, we schedule it to be removed UNLESS
907 # ograd is ALREADY considered a 'master' element
908 if not gradientsToRemove.has_key(ograd):
909 if not duplicateToMaster.has_key(ograd):
910 if not gradientsToRemove.has_key(grad):
911 gradientsToRemove[grad] = []
912 gradientsToRemove[grad].append( ograd )
913 duplicateToMaster[ograd] = grad
915 # get a collection of all elements that are referenced and their referencing elements
916 referencedIDs = findReferencedElements(doc.documentElement)
917 for masterGrad in gradientsToRemove.keys():
918 master_id = masterGrad.getAttribute('id')
919 # print 'master='+master_id
920 for dupGrad in gradientsToRemove[masterGrad]:
921 # if the duplicate gradient no longer has a parent that means it was
922 # already re-mapped to another master gradient
923 if not dupGrad.parentNode: continue
924 dup_id = dupGrad.getAttribute('id')
925 # print 'dup='+dup_id
926 # print referencedIDs[dup_id]
927 # for each element that referenced the gradient we are going to remove
928 for elem in referencedIDs[dup_id][1]:
929 # find out which attribute referenced the duplicate gradient
930 for attr in ['fill', 'stroke']:
931 v = elem.getAttribute(attr)
932 if v == 'url(#'+dup_id+')' or v == 'url("#'+dup_id+'")' or v == "url('#"+dup_id+"')":
933 elem.setAttribute(attr, 'url(#'+master_id+')')
934 if elem.getAttributeNS(NS['XLINK'], 'href') == '#'+dup_id:
935 elem.setAttributeNS(NS['XLINK'], 'href', '#'+master_id)
937 # now that all referencing elements have been re-mapped to the master
938 # it is safe to remove this gradient from the document
939 dupGrad.parentNode.removeChild(dupGrad)
940 numElemsRemoved += 1
941 num += 1
942 return num
944 def repairStyle(node, options):
945 num = 0
946 if node.nodeType == 1 and len(node.getAttribute('style')) > 0 :
947 # get all style properties and stuff them into a dictionary
948 styleMap = { }
949 rawStyles = node.getAttribute('style').split(';')
950 for style in rawStyles:
951 propval = style.split(':')
952 if len(propval) == 2 :
953 styleMap[propval[0].strip()] = propval[1].strip()
955 # I've seen this enough to know that I need to correct it:
956 # fill: url(#linearGradient4918) rgb(0, 0, 0);
957 for prop in ['fill', 'stroke'] :
958 if styleMap.has_key(prop) :
959 chunk = styleMap[prop].split(') ')
960 if len(chunk) == 2 and (chunk[0][:5] == 'url(#' or chunk[0][:6] == 'url("#' or chunk[0][:6] == "url('#") and chunk[1] == 'rgb(0, 0, 0)' :
961 styleMap[prop] = chunk[0] + ')'
962 num += 1
964 # Here is where we can weed out unnecessary styles like:
965 # opacity:1
966 if styleMap.has_key('opacity') :
967 opacity = float(styleMap['opacity'])
968 # opacity='1.0' is useless, remove it
969 if opacity == 1.0 :
970 del styleMap['opacity']
971 num += 1
973 # if opacity='0' then all fill and stroke properties are useless, remove them
974 elif opacity == 0.0 :
975 for uselessStyle in ['fill', 'fill-opacity', 'fill-rule', 'stroke', 'stroke-linejoin',
976 'stroke-opacity', 'stroke-miterlimit', 'stroke-linecap', 'stroke-dasharray',
977 'stroke-dashoffset', 'stroke-opacity'] :
978 if styleMap.has_key(uselessStyle):
979 del styleMap[uselessStyle]
980 num += 1
982 # if stroke:none, then remove all stroke-related properties (stroke-width, etc)
983 # TODO: should also detect if the computed value of this element is stroke="none"
984 if styleMap.has_key('stroke') and styleMap['stroke'] == 'none' :
985 for strokestyle in [ 'stroke-width', 'stroke-linejoin', 'stroke-miterlimit',
986 'stroke-linecap', 'stroke-dasharray', 'stroke-dashoffset', 'stroke-opacity'] :
987 if styleMap.has_key(strokestyle) :
988 del styleMap[strokestyle]
989 num += 1
990 # TODO: This is actually a problem if a parent element has a specified stroke
991 # we need to properly calculate computed values
992 del styleMap['stroke']
994 # if fill:none, then remove all fill-related properties (fill-rule, etc)
995 if styleMap.has_key('fill') and styleMap['fill'] == 'none' :
996 for fillstyle in [ 'fill-rule', 'fill-opacity' ] :
997 if styleMap.has_key(fillstyle) :
998 del styleMap[fillstyle]
999 num += 1
1001 # stop-opacity: 1
1002 if styleMap.has_key('stop-opacity') :
1003 if float(styleMap['stop-opacity']) == 1.0 :
1004 del styleMap['stop-opacity']
1005 num += 1
1007 # fill-opacity: 1 or 0
1008 if styleMap.has_key('fill-opacity') :
1009 fillOpacity = float(styleMap['fill-opacity'])
1010 # TODO: This is actually a problem if the parent element does not have fill-opacity=1
1011 if fillOpacity == 1.0 :
1012 del styleMap['fill-opacity']
1013 num += 1
1014 elif fillOpacity == 0.0 :
1015 for uselessFillStyle in [ 'fill', 'fill-rule' ] :
1016 if styleMap.has_key(uselessFillStyle):
1017 del styleMap[uselessFillStyle]
1018 num += 1
1020 # stroke-opacity: 1 or 0
1021 if styleMap.has_key('stroke-opacity') :
1022 strokeOpacity = float(styleMap['stroke-opacity'])
1023 # TODO: This is actually a problem if the parent element does not have stroke-opacity=1
1024 if strokeOpacity == 1.0 :
1025 del styleMap['stroke-opacity']
1026 num += 1
1027 elif strokeOpacity == 0.0 :
1028 for uselessStrokeStyle in [ 'stroke', 'stroke-width', 'stroke-linejoin', 'stroke-linecap',
1029 'stroke-dasharray', 'stroke-dashoffset' ] :
1030 if styleMap.has_key(uselessStrokeStyle):
1031 del styleMap[uselessStrokeStyle]
1032 num += 1
1034 # stroke-width: 0
1035 if styleMap.has_key('stroke-width') :
1036 strokeWidth = getSVGLength(styleMap['stroke-width'])
1037 if strokeWidth == 0.0 :
1038 for uselessStrokeStyle in [ 'stroke', 'stroke-linejoin', 'stroke-linecap',
1039 'stroke-dasharray', 'stroke-dashoffset', 'stroke-opacity' ] :
1040 if styleMap.has_key(uselessStrokeStyle):
1041 del styleMap[uselessStrokeStyle]
1042 num += 1
1044 # remove font properties for non-text elements
1045 # I've actually observed this in real SVG content
1046 if node.nodeName in ['rect', 'circle', 'ellipse', 'line', 'polyline', 'polygon', 'path']:
1047 for fontstyle in [ 'font-family', 'font-size', 'font-stretch', 'font-size-adjust',
1048 'font-style', 'font-variant', 'font-weight',
1049 'letter-spacing', 'line-height', 'kerning',
1050 'text-anchor', 'text-decoration', 'text-rendering',
1051 'unicode-bidi', 'word-spacing', 'writing-mode'] :
1052 if styleMap.has_key(fontstyle) :
1053 del styleMap[fontstyle]
1054 num += 1
1056 # remove inkscape-specific styles
1057 # TODO: need to get a full list of these
1058 for inkscapeStyle in ['-inkscape-font-specification']:
1059 if styleMap.has_key(inkscapeStyle):
1060 del styleMap[inkscapeStyle]
1061 num += 1
1063 # visibility: visible
1064 if styleMap.has_key('visibility') :
1065 if styleMap['visibility'] == 'visible':
1066 del styleMap['visibility']
1067 num += 1
1069 # display: inline
1070 if styleMap.has_key('display') :
1071 if styleMap['display'] == 'inline':
1072 del styleMap['display']
1073 num += 1
1075 # overflow: visible or overflow specified on element other than svg, marker, pattern
1076 if styleMap.has_key('overflow') :
1077 if styleMap['overflow'] == 'visible' or node.nodeName in ['svg','marker','pattern']:
1078 del styleMap['overflow']
1079 num += 1
1081 # marker: none
1082 if styleMap.has_key('marker') :
1083 if styleMap['marker'] == 'none':
1084 del styleMap['marker']
1085 num += 1
1087 # now if any of the properties match known SVG attributes we prefer attributes
1088 # over style so emit them and remove them from the style map
1089 if options.style_to_xml:
1090 for propName in styleMap.keys() :
1091 if propName in svgAttributes :
1092 node.setAttribute(propName, styleMap[propName])
1093 del styleMap[propName]
1095 # sew our remaining style properties back together into a style attribute
1096 fixedStyle = ''
1097 for prop in styleMap.keys() :
1098 fixedStyle += prop + ':' + styleMap[prop] + ';'
1100 if fixedStyle != '' :
1101 node.setAttribute('style', fixedStyle)
1102 else:
1103 node.removeAttribute('style')
1105 # recurse for our child elements
1106 for child in node.childNodes :
1107 num += repairStyle(child,options)
1109 return num
1111 def removeDefaultAttributeValues(node, options):
1112 num = 0
1113 if node.nodeType != 1: return 0
1115 # gradientUnits: objectBoundingBox
1116 if node.getAttribute('gradientUnits') == 'objectBoundingBox':
1117 node.removeAttribute('gradientUnits')
1118 num += 1
1120 # spreadMethod: pad
1121 if node.getAttribute('spreadMethod') == 'pad':
1122 node.removeAttribute('spreadMethod')
1123 num += 1
1125 # x1: 0%
1126 if node.getAttribute('x1') != '':
1127 x1 = SVGLength(node.getAttribute('x1'))
1128 if x1.value == 0:
1129 node.removeAttribute('x1')
1130 num += 1
1132 # y1: 0%
1133 if node.getAttribute('y1') != '':
1134 y1 = SVGLength(node.getAttribute('y1'))
1135 if y1.value == 0:
1136 node.removeAttribute('y1')
1137 num += 1
1139 # x2: 100%
1140 if node.getAttribute('x2') != '':
1141 x2 = SVGLength(node.getAttribute('x2'))
1142 if (x2.value == 100 and x2.units == Unit.PCT) or (x2.value == 1 and x2.units == Unit.NONE):
1143 node.removeAttribute('x2')
1144 num += 1
1146 # y2: 0%
1147 if node.getAttribute('y2') != '':
1148 y2 = SVGLength(node.getAttribute('y2'))
1149 if y2.value == 0:
1150 node.removeAttribute('y2')
1151 num += 1
1153 # fx: equal to rx
1154 if node.getAttribute('fx') != '':
1155 if node.getAttribute('fx') == node.getAttribute('cx'):
1156 node.removeAttribute('fx')
1157 num += 1
1159 # fy: equal to ry
1160 if node.getAttribute('fy') != '':
1161 if node.getAttribute('fy') == node.getAttribute('cy'):
1162 node.removeAttribute('fy')
1163 num += 1
1165 # cx: 50%
1166 if node.getAttribute('cx') != '':
1167 cx = SVGLength(node.getAttribute('cx'))
1168 if (cx.value == 50 and cx.units == Unit.PCT) or (cx.value == 0.5 and cx.units == Unit.NONE):
1169 node.removeAttribute('cx')
1170 num += 1
1172 # cy: 50%
1173 if node.getAttribute('cy') != '':
1174 cy = SVGLength(node.getAttribute('cy'))
1175 if (cy.value == 50 and cy.units == Unit.PCT) or (cy.value == 0.5 and cy.units == Unit.NONE):
1176 node.removeAttribute('cy')
1177 num += 1
1179 # r: 50%
1180 if node.getAttribute('r') != '':
1181 r = SVGLength(node.getAttribute('r'))
1182 if (r.value == 50 and r.units == Unit.PCT) or (r.value == 0.5 and r.units == Unit.NONE):
1183 node.removeAttribute('r')
1184 num += 1
1186 # recurse for our child elements
1187 for child in node.childNodes :
1188 num += removeDefaultAttributeValues(child,options)
1190 return num
1192 rgb = re.compile("\\s*rgb\\(\\s*(\\d+)\\s*\\,\\s*(\\d+)\\s*\\,\\s*(\\d+)\\s*\\)\\s*")
1193 rgbp = re.compile("\\s*rgb\\(\\s*(\\d*\\.?\\d+)\\%\\s*\\,\\s*(\\d*\\.?\\d+)\\%\\s*\\,\\s*(\\d*\\.?\\d+)\\%\\s*\\)\\s*")
1194 def convertColor(value):
1195 """
1196 Converts the input color string and returns a #RRGGBB (or #RGB if possible) string
1197 """
1198 s = value
1200 if s in colors.keys():
1201 s = colors[s]
1203 rgbpMatch = rgbp.match(s)
1204 if rgbpMatch != None :
1205 r = int(float(rgbpMatch.group(1)) * 255.0 / 100.0)
1206 g = int(float(rgbpMatch.group(2)) * 255.0 / 100.0)
1207 b = int(float(rgbpMatch.group(3)) * 255.0 / 100.0)
1208 s = 'rgb(%d,%d,%d)' % (r,g,b)
1210 rgbMatch = rgb.match(s)
1211 if rgbMatch != None :
1212 r = hex( int( rgbMatch.group(1) ) )[2:].upper()
1213 g = hex( int( rgbMatch.group(2) ) )[2:].upper()
1214 b = hex( int( rgbMatch.group(3) ) )[2:].upper()
1215 if len(r) == 1: r='0'+r
1216 if len(g) == 1: g='0'+g
1217 if len(b) == 1: b='0'+b
1218 s = '#'+r+g+b
1220 if s[0] == '#' and len(s)==7 and s[1]==s[2] and s[3]==s[4] and s[5]==s[6]:
1221 s = s.upper()
1222 s = '#'+s[1]+s[3]+s[5]
1224 return s
1226 def convertColors(element) :
1227 """
1228 Recursively converts all color properties into #RRGGBB format if shorter
1229 """
1230 numBytes = 0
1232 if element.nodeType != 1: return 0
1234 # set up list of color attributes for each element type
1235 attrsToConvert = []
1236 if element.nodeName in ['rect', 'circle', 'ellipse', 'polygon', \
1237 'line', 'polyline', 'path', 'g', 'a']:
1238 attrsToConvert = ['fill', 'stroke']
1239 elif element.nodeName in ['stop']:
1240 attrsToConvert = ['stop-color']
1241 elif element.nodeName in ['solidColor']:
1242 attrsToConvert = ['solid-color']
1244 # now convert all the color formats
1245 for attr in attrsToConvert:
1246 oldColorValue = element.getAttribute(attr)
1247 if oldColorValue != '':
1248 newColorValue = convertColor(oldColorValue)
1249 oldBytes = len(oldColorValue)
1250 newBytes = len(newColorValue)
1251 if oldBytes > newBytes:
1252 element.setAttribute(attr, newColorValue)
1253 numBytes += (oldBytes - len(element.getAttribute(attr)))
1255 # now recurse for our child elements
1256 for child in element.childNodes :
1257 numBytes += convertColors(child)
1259 return numBytes
1261 # TODO: go over what this method does and see if there is a way to optimize it
1262 # TODO: go over the performance of this method and see if I can save memory/speed by
1263 # reusing data structures, etc
1264 def cleanPath(element) :
1265 """
1266 Cleans the path string (d attribute) of the element
1267 """
1268 global numBytesSavedInPathData
1269 global numPathSegmentsReduced
1270 global numCurvesStraightened
1272 # this gets the parser object from svg_regex.py
1273 oldPathStr = element.getAttribute('d')
1274 pathObj = svg_parser.parse(oldPathStr)
1276 # however, this parser object has some ugliness in it (lists of tuples of tuples of
1277 # numbers and booleans). we just need a list of (cmd,[numbers]):
1278 path = []
1279 for (cmd,dataset) in pathObj:
1280 if cmd in ['M','m','L','l','T','t']:
1281 # one or more tuples, each containing two numbers
1282 nums = []
1283 for t in dataset:
1284 # convert to a Decimal
1285 nums.append(Decimal(str(t[0])) * Decimal(1))
1286 nums.append(Decimal(str(t[1])) * Decimal(1))
1288 # only create this segment if it is not empty
1289 if nums:
1290 path.append( (cmd, nums) )
1292 elif cmd in ['V','v','H','h']:
1293 # one or more numbers
1294 nums = []
1295 for n in dataset:
1296 nums.append(Decimal(str(n)))
1297 if nums:
1298 path.append( (cmd, nums) )
1300 elif cmd in ['C','c']:
1301 # one or more tuples, each containing three tuples of two numbers each
1302 nums = []
1303 for t in dataset:
1304 for pair in t:
1305 nums.append(Decimal(str(pair[0])) * Decimal(1))
1306 nums.append(Decimal(str(pair[1])) * Decimal(1))
1307 path.append( (cmd, nums) )
1309 elif cmd in ['S','s','Q','q']:
1310 # one or more tuples, each containing two tuples of two numbers each
1311 nums = []
1312 for t in dataset:
1313 for pair in t:
1314 nums.append(Decimal(str(pair[0])) * Decimal(1))
1315 nums.append(Decimal(str(pair[1])) * Decimal(1))
1316 path.append( (cmd, nums) )
1318 elif cmd in ['A','a']:
1319 # one or more tuples, each containing a tuple of two numbers, a number, a boolean,
1320 # another boolean, and a tuple of two numbers
1321 nums = []
1322 for t in dataset:
1323 nums.append( Decimal(str(t[0][0])) * Decimal(1) )
1324 nums.append( Decimal(str(t[0][1])) * Decimal(1) )
1325 nums.append( Decimal(str(t[1])) * Decimal(1))
1327 if t[2]: nums.append( Decimal(1) )
1328 else: nums.append( Decimal(0) )
1330 if t[3]: nums.append( Decimal(1) )
1331 else: nums.append( Decimal(0) )
1333 nums.append( Decimal(str(t[4][0])) * Decimal(1) )
1334 nums.append( Decimal(str(t[4][1])) * Decimal(1) )
1335 path.append( (cmd, nums) )
1337 elif cmd in ['Z','z']:
1338 path.append( (cmd, []) )
1340 # calculate the starting x,y coord for the second path command
1341 if len(path[0][1]) == 2:
1342 (x,y) = path[0][1]
1343 else:
1344 # we have a move and then 1 or more coords for lines
1345 N = len(path[0][1])
1346 if path[0] == 'M':
1347 # take the last pair of coordinates for the starting point
1348 x = path[0][1][N-2]
1349 y = path[0][1][N-1]
1350 else: # relative move, accumulate coordinates for the starting point
1351 (x,y) = path[0][1][0],path[0][1][1]
1352 n = 2
1353 while n < N:
1354 x += path[0][1][n]
1355 y += path[0][1][n+1]
1356 n += 2
1358 # now we have the starting point at x,y so let's save it
1359 (startx,starty) = (x,y)
1361 # convert absolute coordinates into relative ones (start with the second subcommand
1362 # and leave the first M as absolute)
1363 newPath = [path[0]]
1364 for (cmd,data) in path[1:]:
1365 i = 0
1366 newCmd = cmd
1367 newData = data
1368 # adjust abs to rel
1369 # only the A command has some values that we don't want to adjust (radii, rotation, flags)
1370 if cmd == 'A':
1371 newCmd = 'a'
1372 newData = []
1373 while i < len(data):
1374 newData.append(data[i])
1375 newData.append(data[i+1])
1376 newData.append(data[i+2])
1377 newData.append(data[i+3])
1378 newData.append(data[i+4])
1379 newData.append(data[i+5]-x)
1380 newData.append(data[i+6]-y)
1381 x = data[i+5]
1382 y = data[i+6]
1383 i += 7
1384 elif cmd == 'a':
1385 while i < len(data):
1386 x += data[i+5]
1387 y += data[i+6]
1388 i += 7
1389 elif cmd == 'H':
1390 newCmd = 'h'
1391 newData = []
1392 while i < len(data):
1393 newData.append(data[i]-x)
1394 x = data[i]
1395 i += 1
1396 elif cmd == 'h':
1397 while i < len(data):
1398 x += data[i]
1399 i += 1
1400 elif cmd == 'V':
1401 newCmd = 'v'
1402 newData = []
1403 while i < len(data):
1404 newData.append(data[i] - y)
1405 y = data[i]
1406 i += 1
1407 elif cmd == 'v':
1408 while i < len(data):
1409 y += data[i]
1410 i += 1
1411 elif cmd in ['M']:
1412 newCmd = cmd.lower()
1413 newData = []
1414 startx = data[0]
1415 starty = data[1]
1416 while i < len(data):
1417 newData.append( data[i] - x )
1418 newData.append( data[i+1] - y )
1419 x = data[i]
1420 y = data[i+1]
1421 i += 2
1422 elif cmd in ['L','T']:
1423 newCmd = cmd.lower()
1424 newData = []
1425 while i < len(data):
1426 newData.append( data[i] - x )
1427 newData.append( data[i+1] - y )
1428 x = data[i]
1429 y = data[i+1]
1430 i += 2
1431 elif cmd in ['m']:
1432 startx += data[0]
1433 starty += data[1]
1434 while i < len(data):
1435 x += data[i]
1436 y += data[i+1]
1437 i += 2
1438 elif cmd in ['l','t']:
1439 while i < len(data):
1440 x += data[i]
1441 y += data[i+1]
1442 i += 2
1443 elif cmd in ['S','Q']:
1444 newCmd = cmd.lower()
1445 newData = []
1446 while i < len(data):
1447 newData.append( data[i] - x )
1448 newData.append( data[i+1] - y )
1449 newData.append( data[i+2] - x )
1450 newData.append( data[i+3] - y )
1451 x = data[i+2]
1452 y = data[i+3]
1453 i += 4
1454 elif cmd in ['s','q']:
1455 while i < len(data):
1456 x += data[i+2]
1457 y += data[i+3]
1458 i += 4
1459 elif cmd == 'C':
1460 newCmd = 'c'
1461 newData = []
1462 while i < len(data):
1463 newData.append( data[i] - x )
1464 newData.append( data[i+1] - y )
1465 newData.append( data[i+2] - x )
1466 newData.append( data[i+3] - y )
1467 newData.append( data[i+4] - x )
1468 newData.append( data[i+5] - y )
1469 x = data[i+4]
1470 y = data[i+5]
1471 i += 6
1472 elif cmd == 'c':
1473 while i < len(data):
1474 x += data[i+4]
1475 y += data[i+5]
1476 i += 6
1477 elif cmd in ['z','Z']:
1478 x = startx
1479 y = starty
1480 newCmd = 'z'
1481 newPath.append( (newCmd, newData) )
1482 path = newPath
1484 # remove empty segments
1485 newPath = [path[0]]
1486 for (cmd,data) in path[1:]:
1487 if cmd in ['m','l','t']:
1488 newData = []
1489 i = 0
1490 while i < len(data):
1491 if data[i] != 0 or data[i+1] != 0:
1492 newData.append(data[i])
1493 newData.append(data[i+1])
1494 else:
1495 numPathSegmentsReduced += 1
1496 i += 2
1497 if newData:
1498 newPath.append( (cmd,newData) )
1499 elif cmd == 'c':
1500 newData = []
1501 i = 0
1502 while i < len(data):
1503 if data[i+4] != 0 or data[i+5] != 0:
1504 newData.append(data[i])
1505 newData.append(data[i+1])
1506 newData.append(data[i+2])
1507 newData.append(data[i+3])
1508 newData.append(data[i+4])
1509 newData.append(data[i+5])
1510 else:
1511 numPathSegmentsReduced += 1
1512 i += 6
1513 if newData:
1514 newPath.append( (cmd,newData) )
1515 elif cmd == 'a':
1516 newData = []
1517 i = 0
1518 while i < len(data):
1519 if data[i+5] != 0 or data[i+6] != 0:
1520 newData.append(data[i])
1521 newData.append(data[i+1])
1522 newData.append(data[i+2])
1523 newData.append(data[i+3])
1524 newData.append(data[i+4])
1525 newData.append(data[i+5])
1526 newData.append(data[i+6])
1527 else:
1528 numPathSegmentsReduced += 1
1529 i += 7
1530 if newData:
1531 newPath.append( (cmd,newData) )
1532 elif cmd == 'q':
1533 newData = []
1534 i = 0
1535 while i < len(data):
1536 if data[i+2] != 0 or data[i+3] != 0:
1537 newData.append(data[i])
1538 newData.append(data[i+1])
1539 newData.append(data[i+2])
1540 newData.append(data[i+3])
1541 else:
1542 numPathSegmentsReduced += 1
1543 i += 4
1544 if newData:
1545 newPath.append( (cmd,newData) )
1546 elif cmd in ['h','v']:
1547 newData = []
1548 i = 0
1549 while i < len(data):
1550 if data[i] != 0:
1551 newData.append(data[i])
1552 else:
1553 numPathSegmentsReduced += 1
1554 i += 1
1555 if newData:
1556 newPath.append( (cmd,newData) )
1557 else:
1558 newPath.append( (cmd,data) )
1559 path = newPath
1561 # convert straight curves into lines
1562 newPath = [path[0]]
1563 for (cmd,data) in path[1:]:
1564 i = 0
1565 newData = data
1566 if cmd == 'c':
1567 newData = []
1568 while i < len(data):
1569 # since all commands are now relative, we can think of previous point as (0,0)
1570 # and new point (dx,dy) is (data[i+4],data[i+5])
1571 # eqn of line will be y = (dy/dx)*x or if dx=0 then eqn of line is x=0
1572 (p1x,p1y) = (data[i],data[i+1])
1573 (p2x,p2y) = (data[i+2],data[i+3])
1574 dx = data[i+4]
1575 dy = data[i+5]
1577 foundStraightCurve = False
1579 if dx == 0:
1580 if p1x == 0 and p2x == 0:
1581 foundStraightCurve = True
1582 else:
1583 m = dy/dx
1584 if p1y == m*p1x and p2y == m*p2y:
1585 foundStraightCurve = True
1587 if foundStraightCurve:
1588 # flush any existing curve coords first
1589 if newData:
1590 newPath.append( (cmd,newData) )
1591 newData = []
1592 # now create a straight line segment
1593 newPath.append( ('l', [dx,dy]) )
1594 numCurvesStraightened += 1
1595 else:
1596 newData.append(data[i])
1597 newData.append(data[i+1])
1598 newData.append(data[i+2])
1599 newData.append(data[i+3])
1600 newData.append(data[i+4])
1601 newData.append(data[i+5])
1603 i += 6
1604 if newData or cmd == 'z' or cmd == 'Z':
1605 newPath.append( (cmd,newData) )
1606 path = newPath
1608 # collapse all consecutive commands of the same type into one command
1609 prevCmd = ''
1610 prevData = []
1611 newPath = [path[0]]
1612 for (cmd,data) in path[1:]:
1613 # flush the previous command if it is not the same type as the current command
1614 if prevCmd != '':
1615 if cmd != prevCmd:
1616 newPath.append( (prevCmd, prevData) )
1617 prevCmd = ''
1618 prevData = []
1620 # if the previous and current commands are the same type, collapse
1621 if cmd == prevCmd:
1622 for coord in data:
1623 prevData.append(coord)
1625 # save last command and data
1626 else:
1627 prevCmd = cmd
1628 prevData = data
1629 # flush last command and data
1630 if prevCmd != '':
1631 newPath.append( (prevCmd, prevData) )
1632 path = newPath
1634 # convert to shorthand path segments where possible
1635 newPath = [path[0]]
1636 for (cmd,data) in path[1:]:
1637 # convert line segments into h,v where possible
1638 if cmd == 'l':
1639 i = 0
1640 lineTuples = []
1641 while i < len(data):
1642 if data[i] == 0:
1643 # vertical
1644 if lineTuples:
1645 # flush the existing line command
1646 newPath.append( ('l', lineTuples) )
1647 lineTuples = []
1648 # append the v and then the remaining line coords
1649 newPath.append( ('v', [data[i+1]]) )
1650 numPathSegmentsReduced += 1
1651 elif data[i+1] == 0:
1652 if lineTuples:
1653 # flush the line command, then append the h and then the remaining line coords
1654 newPath.append( ('l', lineTuples) )
1655 lineTuples = []
1656 newPath.append( ('h', [data[i]]) )
1657 numPathSegmentsReduced += 1
1658 else:
1659 lineTuples.append(data[i])
1660 lineTuples.append(data[i+1])
1661 i += 2
1662 if lineTuples:
1663 newPath.append( ('l', lineTuples) )
1664 # convert Bézier curve segments into s where possible
1665 elif cmd == 'c':
1666 bez_ctl_pt = (0,0)
1667 i = 0
1668 curveTuples = []
1669 while i < len(data):
1670 # rotate by 180deg means negate both coordinates
1671 # if the previous control point is equal then we can substitute a
1672 # shorthand bezier command
1673 if bez_ctl_pt[0] == data[i] and bez_ctl_pt[1] == data[i+1]:
1674 if curveTuples:
1675 newPath.append( ('c', curveTuples) )
1676 curveTuples = []
1677 # append the s command
1678 newPath.append( ('s', [data[i+2], data[i+3], data[i+4], data[i+5]]) )
1679 numPathSegmentsReduced += 1
1680 else:
1681 j = 0
1682 while j <= 5:
1683 curveTuples.append(data[i+j])
1684 j += 1
1686 # set up control point for next curve segment
1687 bez_ctl_pt = (data[i+4]-data[i+2], data[i+5]-data[i+3])
1688 i += 6
1690 if curveTuples:
1691 newPath.append( ('c', curveTuples) )
1692 # convert quadratic curve segments into t where possible
1693 elif cmd == 'q':
1694 quad_ctl_pt = (0,0)
1695 i = 0
1696 curveTuples = []
1697 while i < len(data):
1698 if quad_ctl_pt[0] == data[i] and quad_ctl_pt[1] == data[i+1]:
1699 if curveTuples:
1700 newPath.append( ('q', curveTuples) )
1701 curveTuples = []
1702 # append the t command
1703 newPath.append( ('t', [data[i+2], data[i+3]]) )
1704 numPathSegmentsReduced += 1
1705 else:
1706 j = 0;
1707 while j <= 3:
1708 curveTuples.append(data[i+j])
1709 j += 1
1711 quad_ctl_pt = (data[i+2]-data[i], data[i+3]-data[i+1])
1712 i += 4
1714 if curveTuples:
1715 newPath.append( ('q', curveTuples) )
1716 else:
1717 newPath.append( (cmd, data) )
1718 path = newPath
1720 # for each h or v, collapse unnecessary coordinates that run in the same direction
1721 # i.e. "h-100-100" becomes "h-200" but "h300-100" does not change
1722 newPath = [path[0]]
1723 for (cmd,data) in path[1:]:
1724 if cmd in ['h','v'] and len(data) > 1:
1725 newData = []
1726 prevCoord = data[0]
1727 for coord in data[1:]:
1728 if isSameSign(prevCoord, coord):
1729 prevCoord += coord
1730 numPathSegmentsReduced += 1
1731 else:
1732 newData.append(prevCoord)
1733 prevCoord = coord
1734 newData.append(prevCoord)
1735 newPath.append( (cmd, newData) )
1736 else:
1737 newPath.append( (cmd, data) )
1738 path = newPath
1740 # it is possible that we have consecutive h, v, c, t commands now
1741 # so again collapse all consecutive commands of the same type into one command
1742 prevCmd = ''
1743 prevData = []
1744 newPath = [path[0]]
1745 for (cmd,data) in path[1:]:
1746 # flush the previous command if it is not the same type as the current command
1747 if prevCmd != '':
1748 if cmd != prevCmd:
1749 newPath.append( (prevCmd, prevData) )
1750 prevCmd = ''
1751 prevData = []
1753 # if the previous and current commands are the same type, collapse
1754 if cmd == prevCmd:
1755 for coord in data:
1756 prevData.append(coord)
1758 # save last command and data
1759 else:
1760 prevCmd = cmd
1761 prevData = data
1762 # flush last command and data
1763 if prevCmd != '':
1764 newPath.append( (prevCmd, prevData) )
1765 path = newPath
1767 newPathStr = serializePath(path)
1768 numBytesSavedInPathData += ( len(oldPathStr) - len(newPathStr) )
1769 element.setAttribute('d', newPathStr)
1771 def parseListOfPoints(s):
1772 """
1773 Parse string into a list of points.
1775 Returns a list of containing an even number of coordinate strings
1776 """
1777 # (wsp)? comma-or-wsp-separated coordinate pairs (wsp)?
1778 # coordinate-pair = coordinate comma-or-wsp coordinate
1779 # coordinate = sign? integer
1780 nums = re.split("\\s*\\,?\\s*", s.strip())
1781 i = 0
1782 points = []
1783 while i < len(nums):
1784 x = SVGLength(nums[i])
1785 # if we had an odd number of points, return empty
1786 if i == len(nums)-1: return []
1787 else: y = SVGLength(nums[i+1])
1789 # if the coordinates were not unitless, return empty
1790 if x.units != Unit.NONE or y.units != Unit.NONE: return []
1791 points.append( str(x.value) )
1792 points.append( str(y.value) )
1793 i += 2
1795 return points
1797 def cleanPolygon(elem):
1798 """
1799 Remove unnecessary closing point of polygon points attribute
1800 """
1801 global numPointsRemovedFromPolygon
1803 pts = parseListOfPoints(elem.getAttribute('points'))
1804 N = len(pts)/2
1805 if N >= 2:
1806 (startx,starty) = (pts[0],pts[0])
1807 (endx,endy) = (pts[len(pts)-2],pts[len(pts)-1])
1808 if startx == endx and starty == endy:
1809 pts = pts[:-2]
1810 numPointsRemovedFromPolygon += 1
1811 elem.setAttribute('points', scourCoordinates(pts))
1813 def cleanPolyline(elem):
1814 """
1815 Scour the polyline points attribute
1816 """
1817 pts = parseListOfPoints(elem.getAttribute('points'))
1818 elem.setAttribute('points', scourCoordinates(pts))
1820 def serializePath(pathObj):
1821 """
1822 Reserializes the path data with some cleanups.
1823 """
1824 pathStr = ""
1825 for (cmd,data) in pathObj:
1826 pathStr += cmd
1827 # elliptical arc commands must have comma/wsp separating the coordinates
1828 # this fixes an issue outlined in Fix https://bugs.launchpad.net/scour/+bug/412754
1829 pathStr += scourCoordinates(data, (cmd == 'a'))
1830 return pathStr
1832 def scourCoordinates(data, forceCommaWsp = False):
1833 """
1834 Serializes coordinate data with some cleanups:
1835 - removes all trailing zeros after the decimal
1836 - integerize coordinates if possible
1837 - removes extraneous whitespace
1838 - adds commas between values in a subcommand if required (or if forceCommaWsp is True)
1839 """
1840 coordsStr = ""
1841 if data != None:
1842 c = 0
1843 for coord in data:
1844 # add the scoured coordinate to the path string
1845 coordsStr += scourLength(coord)
1847 # only need the comma if the next number is non-negative or if forceCommaWsp is True
1848 if c < len(data)-1 and (forceCommaWsp or Decimal(data[c+1]) >= 0):
1849 coordsStr += ','
1850 c += 1
1851 return coordsStr
1853 def scourLength(str):
1854 length = SVGLength(str)
1855 coord = length.value
1857 # reduce to the proper number of digits
1858 coord = Decimal(unicode(coord)) * Decimal(1)
1860 # integerize if we can
1861 if int(coord) == coord: coord = Decimal(unicode(int(coord)))
1863 # Decimal.trim() is available in Python 2.6+ to trim trailing zeros
1864 try:
1865 coord = coord.trim()
1866 except AttributeError:
1867 # trim it ourselves
1868 s = unicode(coord)
1869 dec = s.find('.')
1870 if dec != -1:
1871 while s[-1] == '0':
1872 s = s[:-1]
1873 coord = Decimal(s)
1875 # Decimal.normalize() will uses scientific notation - if that
1876 # string is smaller, then use it
1877 normd = coord.normalize()
1878 if len(unicode(normd)) < len(unicode(coord)):
1879 coord = normd
1881 return unicode(coord)+Unit.str(length.units)
1883 def embedRasters(element, options) :
1884 """
1885 Converts raster references to inline images.
1886 NOTE: there are size limits to base64-encoding handling in browsers
1887 """
1888 global numRastersEmbedded
1890 href = element.getAttributeNS(NS['XLINK'],'href')
1892 # if xlink:href is set, then grab the id
1893 if href != '' and len(href) > 1:
1894 # find if href value has filename ext
1895 ext = os.path.splitext(os.path.basename(href))[1].lower()[1:]
1897 # look for 'png', 'jpg', and 'gif' extensions
1898 if ext == 'png' or ext == 'jpg' or ext == 'gif':
1900 # check if href resolves to an existing file
1901 if os.path.isfile(href) == False :
1902 if href[:7] != 'http://' and os.path.isfile(href) == False :
1903 # if this is not an absolute path, set path relative
1904 # to script file based on input arg
1905 infilename = '.'
1906 if options.infilename: infilename = options.infilename
1907 href = os.path.join(os.path.dirname(infilename), href)
1909 rasterdata = ''
1910 # test if file exists locally
1911 if os.path.isfile(href) == True :
1912 # open raster file as raw binary
1913 raster = open( href, "rb")
1914 rasterdata = raster.read()
1916 elif href[:7] == 'http://':
1917 # raster = open( href, "rb")
1918 webFile = urllib.urlopen( href )
1919 rasterdata = webFile.read()
1920 webFile.close()
1922 # ... should we remove all images which don't resolve?
1923 if rasterdata != '' :
1924 # base64-encode raster
1925 b64eRaster = base64.b64encode( rasterdata )
1927 # set href attribute to base64-encoded equivalent
1928 if b64eRaster != '':
1929 # PNG and GIF both have MIME Type 'image/[ext]', but
1930 # JPEG has MIME Type 'image/jpeg'
1931 if ext == 'jpg':
1932 ext = 'jpeg'
1934 element.setAttributeNS(NS['XLINK'], 'href', 'data:image/' + ext + ';base64,' + b64eRaster)
1935 numRastersEmbedded += 1
1936 del b64eRaster
1938 def properlySizeDoc(docElement):
1939 # get doc width and height
1940 w = SVGLength(docElement.getAttribute('width'))
1941 h = SVGLength(docElement.getAttribute('height'))
1943 # if width/height are not unitless or px then it is not ok to rewrite them into a viewBox
1944 if ((w.units != Unit.NONE and w.units != Unit.PX) or
1945 (w.units != Unit.NONE and w.units != Unit.PX)):
1946 return
1948 # else we have a statically sized image and we should try to remedy that
1950 # parse viewBox attribute
1951 vbSep = re.split("\\s*\\,?\\s*", docElement.getAttribute('viewBox'), 3)
1952 # if we have a valid viewBox we need to check it
1953 vbWidth,vbHeight = 0,0
1954 if len(vbSep) == 4:
1955 try:
1956 # if x or y are specified and non-zero then it is not ok to overwrite it
1957 vbX = float(vbSep[0])
1958 vbY = float(vbSep[1])
1959 if vbX != 0 or vbY != 0:
1960 return
1962 # if width or height are not equal to doc width/height then it is not ok to overwrite it
1963 vbWidth = float(vbSep[2])
1964 vbHeight = float(vbSep[3])
1965 if vbWidth != w.value or vbHeight != h.value:
1966 return
1967 # if the viewBox did not parse properly it is invalid and ok to overwrite it
1968 except ValueError:
1969 pass
1971 # at this point it's safe to set the viewBox and remove width/height
1972 docElement.setAttribute('viewBox', '0 0 %s %s' % (w.value, h.value))
1973 docElement.removeAttribute('width')
1974 docElement.removeAttribute('height')
1976 def remapNamespacePrefix(node, oldprefix, newprefix):
1977 if node == None or node.nodeType != 1: return
1979 if node.prefix == oldprefix:
1980 localName = node.localName
1981 namespace = node.namespaceURI
1982 doc = node.ownerDocument
1983 parent = node.parentNode
1985 # create a replacement node
1986 newNode = None
1987 if newprefix != '':
1988 newNode = doc.createElementNS(namespace, newprefix+":"+localName)
1989 else:
1990 newNode = doc.createElement(localName);
1992 # add all the attributes
1993 attrList = node.attributes
1994 for i in range(attrList.length):
1995 attr = attrList.item(i)
1996 newNode.setAttributeNS( attr.namespaceURI, attr.localName, attr.nodeValue)
1998 # clone and add all the child nodes
1999 for child in node.childNodes:
2000 newNode.appendChild(child.cloneNode(True))
2002 # replace old node with new node
2003 parent.replaceChild( newNode, node )
2004 # set the node to the new node in the remapped namespace prefix
2005 node = newNode
2007 # now do all child nodes
2008 for child in node.childNodes :
2009 remapNamespacePrefix(child, oldprefix, newprefix)
2011 def makeWellFormed(str):
2012 newstr = str
2014 # encode & as & ( must do this first so that < does not become &lt; )
2015 if str.find('&') != -1:
2016 newstr = str.replace('&', '&')
2018 # encode < as <
2019 if str.find("<") != -1:
2020 newstr = str.replace('<', '<')
2022 # encode > as > (TODO: is this necessary?)
2023 if str.find('>') != -1:
2024 newstr = str.replace('>', '>')
2026 return newstr
2028 # hand-rolled serialization function that has the following benefits:
2029 # - pretty printing
2030 # - somewhat judicious use of whitespace
2031 # - ensure id attributes are first
2032 def serializeXML(element, options, ind = 0, preserveWhitespace = False):
2033 indent = ind
2034 I=''
2035 if options.indent_type == 'tab': I='\t'
2036 elif options.indent_type == 'space': I=' '
2038 outString = (I * ind) + '<' + element.nodeName
2040 # always serialize the id or xml:id attributes first
2041 if element.getAttribute('id') != '':
2042 id = element.getAttribute('id')
2043 quot = '"'
2044 if id.find('"') != -1:
2045 quot = "'"
2046 outString += ' ' + 'id=' + quot + id + quot
2047 if element.getAttribute('xml:id') != '':
2048 id = element.getAttribute('xml:id')
2049 quot = '"'
2050 if id.find('"') != -1:
2051 quot = "'"
2052 outString += ' ' + 'xml:id=' + quot + id + quot
2054 # now serialize the other attributes
2055 attrList = element.attributes
2056 for num in range(attrList.length) :
2057 attr = attrList.item(num)
2058 if attr.nodeName == 'id' or attr.nodeName == 'xml:id': continue
2059 # if the attribute value contains a double-quote, use single-quotes
2060 quot = '"'
2061 if attr.nodeValue.find('"') != -1:
2062 quot = "'"
2064 attrValue = makeWellFormed( attr.nodeValue )
2066 outString += ' '
2067 # preserve xmlns: if it is a namespace prefix declaration
2068 if attr.prefix != None:
2069 outString += attr.prefix + ':'
2070 elif attr.namespaceURI != None:
2071 if attr.namespaceURI == 'http://www.w3.org/2000/xmlns/' and attr.nodeName.find('xmlns') == -1:
2072 outString += 'xmlns:'
2073 elif attr.namespaceURI == 'http://www.w3.org/1999/xlink':
2074 outString += 'xlink:'
2075 outString += attr.localName + '=' + quot + attrValue + quot
2077 if attr.nodeName == 'xml:space':
2078 if attrValue == 'preserve':
2079 preserveWhitespace = True
2080 elif attrValue == 'default':
2081 preserveWhitespace = False
2083 # if no children, self-close
2084 children = element.childNodes
2085 if children.length > 0:
2086 outString += '>'
2088 onNewLine = False
2089 for child in element.childNodes:
2090 # element node
2091 if child.nodeType == 1:
2092 if preserveWhitespace:
2093 outString += serializeXML(child, options, 0, preserveWhitespace)
2094 else:
2095 outString += '\n' + serializeXML(child, options, indent + 1, preserveWhitespace)
2096 onNewLine = True
2097 # text node
2098 elif child.nodeType == 3:
2099 # trim it only in the case of not being a child of an element
2100 # where whitespace might be important
2101 if preserveWhitespace:
2102 outString += makeWellFormed(child.nodeValue)
2103 else:
2104 outString += makeWellFormed(child.nodeValue.strip())
2105 # CDATA node
2106 elif child.nodeType == 4:
2107 outString += '<![CDATA[' + child.nodeValue + ']]>'
2108 # Comment node
2109 elif child.nodeType == 8:
2110 outString += '<!--' + child.nodeValue + '-->'
2111 # TODO: entities, processing instructions, what else?
2112 else: # ignore the rest
2113 pass
2115 if onNewLine: outString += (I * ind)
2116 outString += '</' + element.nodeName + '>'
2117 if indent > 0: outString += '\n'
2118 else:
2119 outString += '/>'
2120 if indent > 0: outString += '\n'
2122 return outString
2124 # this is the main method
2125 # input is a string representation of the input XML
2126 # returns a string representation of the output XML
2127 def scourString(in_string, options=None):
2128 if options is None:
2129 options = _options_parser.get_default_values()
2130 getcontext().prec = options.digits
2131 global numAttrsRemoved
2132 global numStylePropsFixed
2133 global numElemsRemoved
2134 global numBytesSavedInColors
2135 doc = xml.dom.minidom.parseString(in_string)
2137 # for whatever reason this does not always remove all inkscape/sodipodi attributes/elements
2138 # on the first pass, so we do it multiple times
2139 # does it have to do with removal of children affecting the childlist?
2140 if options.keep_editor_data == False:
2141 while removeNamespacedElements( doc.documentElement, unwanted_ns ) > 0 :
2142 pass
2143 while removeNamespacedAttributes( doc.documentElement, unwanted_ns ) > 0 :
2144 pass
2146 # remove the xmlns: declarations now
2147 xmlnsDeclsToRemove = []
2148 attrList = doc.documentElement.attributes
2149 for num in range(attrList.length) :
2150 if attrList.item(num).nodeValue in unwanted_ns :
2151 xmlnsDeclsToRemove.append(attrList.item(num).nodeName)
2153 for attr in xmlnsDeclsToRemove :
2154 doc.documentElement.removeAttribute(attr)
2155 numAttrsRemoved += 1
2157 # ensure namespace for SVG is declared
2158 # TODO: what if the default namespace is something else (i.e. some valid namespace)?
2159 if doc.documentElement.getAttribute('xmlns') != 'http://www.w3.org/2000/svg':
2160 doc.documentElement.setAttribute('xmlns', 'http://www.w3.org/2000/svg')
2161 # TODO: throw error or warning?
2163 # check for redundant SVG namespace declaration
2164 attrList = doc.documentElement.attributes
2165 xmlnsDeclsToRemove = []
2166 redundantPrefixes = []
2167 for i in range(attrList.length):
2168 attr = attrList.item(i)
2169 name = attr.nodeName
2170 val = attr.nodeValue
2171 if name[0:6] == 'xmlns:' and val == 'http://www.w3.org/2000/svg':
2172 redundantPrefixes.append(name[6:])
2173 xmlnsDeclsToRemove.append(name)
2175 for attrName in xmlnsDeclsToRemove:
2176 doc.documentElement.removeAttribute(attrName)
2178 for prefix in redundantPrefixes:
2179 remapNamespacePrefix(doc.documentElement, prefix, '')
2181 # repair style (remove unnecessary style properties and change them into XML attributes)
2182 numStylePropsFixed = repairStyle(doc.documentElement, options)
2184 # convert colors to #RRGGBB format
2185 if options.simple_colors:
2186 numBytesSavedInColors = convertColors(doc.documentElement)
2188 # remove empty defs, metadata, g
2189 # NOTE: these elements will be removed even if they have (invalid) text nodes
2190 elemsToRemove = []
2191 for tag in ['defs', 'metadata', 'g'] :
2192 for elem in doc.documentElement.getElementsByTagName(tag) :
2193 removeElem = not elem.hasChildNodes()
2194 if removeElem == False :
2195 for child in elem.childNodes :
2196 if child.nodeType in [1, 3, 4, 8] :
2197 break
2198 else:
2199 removeElem = True
2200 if removeElem :
2201 elem.parentNode.removeChild(elem)
2202 numElemsRemoved += 1
2204 # remove unreferenced gradients/patterns outside of defs
2205 while removeUnreferencedElements(doc) > 0:
2206 pass
2208 if options.strip_ids:
2209 bContinueLooping = True
2210 while bContinueLooping:
2211 identifiedElements = findElementsWithId(doc.documentElement)
2212 referencedIDs = findReferencedElements(doc.documentElement)
2213 bContinueLooping = (removeUnreferencedIDs(referencedIDs, identifiedElements) > 0)
2215 if options.group_collapse:
2216 while removeNestedGroups(doc.documentElement) > 0:
2217 pass
2219 while removeDuplicateGradientStops(doc) > 0:
2220 pass
2222 # remove gradients that are only referenced by one other gradient
2223 while collapseSinglyReferencedGradients(doc) > 0:
2224 pass
2226 # remove duplicate gradients
2227 while removeDuplicateGradients(doc) > 0:
2228 pass
2230 # move common attributes to parent group
2231 numAttrsRemoved += moveCommonAttributesToParentGroup(doc.documentElement)
2233 # remove unused attributes from parent
2234 numAttrsRemoved += removeUnusedAttributesOnParent(doc.documentElement)
2236 # clean path data
2237 for elem in doc.documentElement.getElementsByTagName('path') :
2238 if elem.getAttribute('d') == '':
2239 elem.parentNode.removeChild(elem)
2240 else:
2241 cleanPath(elem)
2243 # remove unnecessary closing point of polygons and scour points
2244 for polygon in doc.documentElement.getElementsByTagName('polygon') :
2245 cleanPolygon(polygon)
2247 # scour points of polyline
2248 for polyline in doc.documentElement.getElementsByTagName('polyline') :
2249 cleanPolygon(polyline)
2251 # scour lengths (including coordinates)
2252 for type in ['svg', 'image', 'rect', 'circle', 'ellipse', 'line', 'linearGradient', 'radialGradient', 'stop']:
2253 for elem in doc.getElementsByTagName(type):
2254 for attr in ['x', 'y', 'width', 'height', 'cx', 'cy', 'r', 'rx', 'ry',
2255 'x1', 'y1', 'x2', 'y2', 'fx', 'fy', 'offset', 'opacity',
2256 'fill-opacity', 'stroke-opacity', 'stroke-width', 'stroke-miterlimit']:
2257 if elem.getAttribute(attr) != '':
2258 elem.setAttribute(attr, scourLength(elem.getAttribute(attr)))
2260 # remove default values of attributes
2261 numAttrsRemoved += removeDefaultAttributeValues(doc.documentElement, options)
2263 # convert rasters references to base64-encoded strings
2264 if options.embed_rasters:
2265 for elem in doc.documentElement.getElementsByTagName('image') :
2266 embedRasters(elem, options)
2268 # properly size the SVG document (ideally width/height should be 100% with a viewBox)
2269 properlySizeDoc(doc.documentElement)
2271 # output the document as a pretty string with a single space for indent
2272 # NOTE: removed pretty printing because of this problem:
2273 # http://ronrothman.com/public/leftbraned/xml-dom-minidom-toprettyxml-and-silly-whitespace/
2274 # rolled our own serialize function here to save on space, put id first, customize indentation, etc
2275 # out_string = doc.documentElement.toprettyxml(' ')
2276 out_string = serializeXML(doc.documentElement, options)
2278 # now strip out empty lines
2279 lines = []
2280 # Get rid of empty lines
2281 for line in out_string.splitlines(True):
2282 if line.strip():
2283 lines.append(line)
2285 # return the string stripped of empty lines
2286 if options.strip_xml_prolog == False:
2287 xmlprolog = '<?xml version="1.0" encoding="UTF-8" standalone="no"?>\n'
2288 else:
2289 xmlprolog = ""
2291 return xmlprolog + "".join(lines)
2293 # used mostly by unit tests
2294 # input is a filename
2295 # returns the minidom doc representation of the SVG
2296 def scourXmlFile(filename, options=None):
2297 in_string = open(filename).read()
2298 out_string = scourString(in_string, options)
2299 return xml.dom.minidom.parseString(out_string.encode('utf-8'))
2301 # GZ: Seems most other commandline tools don't do this, is it really wanted?
2302 class HeaderedFormatter(optparse.IndentedHelpFormatter):
2303 """
2304 Show application name, version number, and copyright statement
2305 above usage information.
2306 """
2307 def format_usage(self, usage):
2308 return "%s %s\n%s\n%s" % (APP, VER, COPYRIGHT,
2309 optparse.IndentedHelpFormatter.format_usage(self, usage))
2311 # GZ: would prefer this to be in a function or class scope, but tests etc need
2312 # access to the defaults anyway
2313 _options_parser = optparse.OptionParser(
2314 usage="%prog [-i input.svg] [-o output.svg] [OPTIONS]",
2315 description=("If the input/output files are specified with a svgz"
2316 " extension, then compressed SVG is assumed. If the input file is not"
2317 " specified, stdin is used. If the output file is not specified, "
2318 " stdout is used."),
2319 formatter=HeaderedFormatter(max_help_position=30),
2320 version=VER)
2322 _options_parser.add_option("--disable-simplify-colors",
2323 action="store_false", dest="simple_colors", default=True,
2324 help="won't convert all colors to #RRGGBB format")
2325 _options_parser.add_option("--disable-style-to-xml",
2326 action="store_false", dest="style_to_xml", default=True,
2327 help="won't convert styles into XML attributes")
2328 _options_parser.add_option("--disable-group-collapsing",
2329 action="store_false", dest="group_collapse", default=True,
2330 help="won't collapse <g> elements")
2331 _options_parser.add_option("--enable-id-stripping",
2332 action="store_true", dest="strip_ids", default=False,
2333 help="remove all un-referenced ID attributes")
2334 _options_parser.add_option("--disable-embed-rasters",
2335 action="store_false", dest="embed_rasters", default=True,
2336 help="won't embed rasters as base64-encoded data")
2337 _options_parser.add_option("--keep-editor-data",
2338 action="store_true", dest="keep_editor_data", default=False,
2339 help="won't remove Inkscape, Sodipodi or Adobe Illustrator elements and attributes")
2340 _options_parser.add_option("--strip-xml-prolog",
2341 action="store_true", dest="strip_xml_prolog", default=False,
2342 help="won't output the <?xml ?> prolog")
2344 # GZ: this is confusing, most people will be thinking in terms of
2345 # decimal places, which is not what decimal precision is doing
2346 _options_parser.add_option("-p", "--set-precision",
2347 action="store", type=int, dest="digits", default=5,
2348 help="set number of significant digits (default: %default)")
2349 _options_parser.add_option("-i",
2350 action="store", dest="infilename", help=optparse.SUPPRESS_HELP)
2351 _options_parser.add_option("-o",
2352 action="store", dest="outfilename", help=optparse.SUPPRESS_HELP)
2353 _options_parser.add_option("--indent",
2354 action="store", type="string", dest="indent_type", default="space",
2355 help="indentation of the output: none, space, tab (default: %default)")
2357 def maybe_gziped_file(filename, mode="r"):
2358 if os.path.splitext(filename)[1].lower() in (".svgz", ".gz"):
2359 return gzip.GzipFile(filename, mode)
2360 return file(filename, mode)
2362 def parse_args(args=None):
2363 options, rargs = _options_parser.parse_args(args)
2365 if rargs:
2366 _options_parser.error("Additional arguments not handled: %r, see --help" % rargs)
2367 if options.digits < 0:
2368 _options_parser.error("Can't have negative significant digits, see --help")
2369 if not options.indent_type in ["tab", "space", "none"]:
2370 _options_parser.error("Invalid value for --indent, see --help")
2371 if options.infilename and options.outfilename and options.infilename == options.outfilename:
2372 _options_parser.error("Input filename is the same as output filename")
2374 if options.infilename:
2375 infile = maybe_gziped_file(options.infilename)
2376 # GZ: could catch a raised IOError here and report
2377 else:
2378 # GZ: could sniff for gzip compression here
2379 infile = sys.stdin
2380 if options.outfilename:
2381 outfile = maybe_gziped_file(options.outfilename, "w")
2382 else:
2383 outfile = sys.stdout
2385 return options, [infile, outfile]
2387 def getReport():
2388 return ' Number of elements removed: ' + str(numElemsRemoved) + \
2389 '\n Number of attributes removed: ' + str(numAttrsRemoved) + \
2390 '\n Number of unreferenced id attributes removed: ' + str(numIDsRemoved) + \
2391 '\n Number of style properties fixed: ' + str(numStylePropsFixed) + \
2392 '\n Number of raster images embedded inline: ' + str(numRastersEmbedded) + \
2393 '\n Number of path segments reduced/removed: ' + str(numPathSegmentsReduced) + \
2394 '\n Number of bytes saved in path data: ' + str(numBytesSavedInPathData) + \
2395 '\n Number of bytes saved in colors: ' + str(numBytesSavedInColors) + \
2396 '\n Number of points removed from polygons: ' + str(numPointsRemovedFromPolygon)
2398 if __name__ == '__main__':
2399 if sys.platform == "win32":
2400 from time import clock as get_tick
2401 else:
2402 # GZ: is this different from time.time() in any way?
2403 def get_tick():
2404 return os.times()[0]
2406 start = get_tick()
2408 options, (input, output) = parse_args()
2410 print >>sys.stderr, "%s %s\n%s" % (APP, VER, COPYRIGHT)
2412 # do the work
2413 in_string = input.read()
2414 out_string = scourString(in_string, options).encode("UTF-8")
2415 output.write(out_string)
2417 # Close input and output files
2418 input.close()
2419 output.close()
2421 end = get_tick()
2423 # GZ: unless silenced by -q or something?
2424 # GZ: not using globals would be good too
2425 print >>sys.stderr, ' File:', input.name, \
2426 '\n Time taken:', str(end-start) + 's\n', \
2427 getReport()
2429 oldsize = len(in_string)
2430 newsize = len(out_string)
2431 sizediff = (newsize / oldsize) * 100
2432 print >>sys.stderr, ' Original file size:', oldsize, 'bytes;', \
2433 'new file size:', newsize, 'bytes (' + str(sizediff)[:5] + '%)'