1 #!/usr/bin/env python
2 # This software is OSI Certified Open Source Software.
3 # OSI Certified is a certification mark of the Open Source Initiative.
4 #
5 # Copyright (c) 2006, Enthought, Inc.
6 # All rights reserved.
7 #
8 # Redistribution and use in source and binary forms, with or without
9 # modification, are permitted provided that the following conditions are met:
10 #
11 # * Redistributions of source code must retain the above copyright notice, this
12 # list of conditions and the following disclaimer.
13 # * Redistributions in binary form must reproduce the above copyright notice,
14 # this list of conditions and the following disclaimer in the documentation
15 # and/or other materials provided with the distribution.
16 # * Neither the name of Enthought, Inc. nor the names of its contributors may
17 # be used to endorse or promote products derived from this software without
18 # specific prior written permission.
19 #
20 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
21 # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
22 # WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23 # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
24 # ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
25 # (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
26 # LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
27 # ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
29 # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 """ Small hand-written recursive descent parser for SVG <path> data.
34 In [1]: from svg_regex import svg_parser
36 In [3]: svg_parser.parse('M 10,20 30,40V50 60 70')
37 Out[3]: [('M', [(10.0, 20.0), (30.0, 40.0)]), ('V', [50.0, 60.0, 70.0])]
39 In [4]: svg_parser.parse('M 0.6051.5') # An edge case
40 Out[4]: [('M', [(0.60509999999999997, 0.5)])]
42 In [5]: svg_parser.parse('M 100-200') # Another edge case
43 Out[5]: [('M', [(100.0, -200.0)])]
44 """
46 import re
49 # Sentinel.
50 class _EOF(object):
51 def __repr__(self):
52 return 'EOF'
53 EOF = _EOF()
55 lexicon = [
56 ('float', r'[-\+]?(?:(?:[0-9]*\.[0-9]+)|(?:[0-9]+\.?))(?:[Ee][-\+]?[0-9]+)?'),
57 ('int', r'[-\+]?[0-9]+'),
58 ('command', r'[AaCcHhLlMmQqSsTtVvZz]'),
59 ]
62 class Lexer(object):
63 """ Break SVG path data into tokens.
65 The SVG spec requires that tokens are greedy. This lexer relies on Python's
66 regexes defaulting to greediness.
68 This style of implementation was inspired by this article:
70 http://www.gooli.org/blog/a-simple-lexer-in-python/
71 """
72 def __init__(self, lexicon):
73 self.lexicon = lexicon
74 parts = []
75 for name, regex in lexicon:
76 parts.append('(?P<%s>%s)' % (name, regex))
77 self.regex_string = '|'.join(parts)
78 self.regex = re.compile(self.regex_string)
80 def lex(self, text):
81 """ Yield (token_type, str_data) tokens.
83 The last token will be (EOF, None) where EOF is the singleton object
84 defined in this module.
85 """
86 for match in self.regex.finditer(text):
87 for name, _ in self.lexicon:
88 m = match.group(name)
89 if m is not None:
90 yield (name, m)
91 break
92 yield (EOF, None)
94 svg_lexer = Lexer(lexicon)
97 class SVGPathParser(object):
98 """ Parse SVG <path> data into a list of commands.
100 Each distinct command will take the form of a tuple (command, data). The
101 `command` is just the character string that starts the command group in the
102 <path> data, so 'M' for absolute moveto, 'm' for relative moveto, 'Z' for
103 closepath, etc. The kind of data it carries with it depends on the command.
104 For 'Z' (closepath), it's just None. The others are lists of individual
105 argument groups. Multiple elements in these lists usually mean to repeat the
106 command. The notable exception is 'M' (moveto) where only the first element
107 is truly a moveto. The remainder are implicit linetos.
109 See the SVG documentation for the interpretation of the individual elements
110 for each command.
112 The main method is `parse(text)`. It can only consume actual strings, not
113 filelike objects or iterators.
114 """
116 def __init__(self, lexer=svg_lexer):
117 self.lexer = lexer
119 self.command_dispatch = {
120 'Z': self.rule_closepath,
121 'z': self.rule_closepath,
122 'M': self.rule_moveto_or_lineto,
123 'm': self.rule_moveto_or_lineto,
124 'L': self.rule_moveto_or_lineto,
125 'l': self.rule_moveto_or_lineto,
126 'H': self.rule_orthogonal_lineto,
127 'h': self.rule_orthogonal_lineto,
128 'V': self.rule_orthogonal_lineto,
129 'v': self.rule_orthogonal_lineto,
130 'C': self.rule_curveto3,
131 'c': self.rule_curveto3,
132 'S': self.rule_curveto2,
133 's': self.rule_curveto2,
134 'Q': self.rule_curveto2,
135 'q': self.rule_curveto2,
136 'T': self.rule_curveto1,
137 't': self.rule_curveto1,
138 'A': self.rule_elliptical_arc,
139 'a': self.rule_elliptical_arc,
140 }
142 # self.number_tokens = set(['int', 'float'])
143 self.number_tokens = list(['int', 'float'])
145 def parse(self, text):
146 """ Parse a string of SVG <path> data.
147 """
148 next = self.lexer.lex(text).next
149 token = next()
150 return self.rule_svg_path(next, token)
152 def rule_svg_path(self, next, token):
153 commands = []
154 while token[0] is not EOF:
155 if token[0] != 'command':
156 raise SyntaxError("expecting a command; got %r" % (token,))
157 rule = self.command_dispatch[token[1]]
158 command_group, token = rule(next, token)
159 commands.append(command_group)
160 return commands
162 def rule_closepath(self, next, token):
163 command = token[1]
164 token = next()
165 return (command, None), token
167 def rule_moveto_or_lineto(self, next, token):
168 command = token[1]
169 token = next()
170 coordinates = []
171 while token[0] in self.number_tokens:
172 pair, token = self.rule_coordinate_pair(next, token)
173 coordinates.append(pair)
174 return (command, coordinates), token
176 def rule_orthogonal_lineto(self, next, token):
177 command = token[1]
178 token = next()
179 coordinates = []
180 while token[0] in self.number_tokens:
181 coord, token = self.rule_coordinate(next, token)
182 coordinates.append(coord)
183 return (command, coordinates), token
185 def rule_curveto3(self, next, token):
186 command = token[1]
187 token = next()
188 coordinates = []
189 while token[0] in self.number_tokens:
190 pair1, token = self.rule_coordinate_pair(next, token)
191 pair2, token = self.rule_coordinate_pair(next, token)
192 pair3, token = self.rule_coordinate_pair(next, token)
193 coordinates.append((pair1, pair2, pair3))
194 return (command, coordinates), token
196 def rule_curveto2(self, next, token):
197 command = token[1]
198 token = next()
199 coordinates = []
200 while token[0] in self.number_tokens:
201 pair1, token = self.rule_coordinate_pair(next, token)
202 pair2, token = self.rule_coordinate_pair(next, token)
203 coordinates.append((pair1, pair2))
204 return (command, coordinates), token
206 def rule_curveto1(self, next, token):
207 command = token[1]
208 token = next()
209 coordinates = []
210 while token[0] in self.number_tokens:
211 pair1, token = self.rule_coordinate_pair(next, token)
212 coordinates.append(pair1)
213 return (command, coordinates), token
215 def rule_elliptical_arc(self, next, token):
216 command = token[1]
217 token = next()
218 arguments = []
219 while token[0] in self.number_tokens:
220 rx = float(token[1])
221 if rx < 0.0:
222 raise SyntaxError("expecting a nonnegative number; got %r" % (token,))
224 token = next()
225 if token[0] not in self.number_tokens:
226 raise SyntaxError("expecting a number; got %r" % (token,))
227 ry = float(token[1])
228 if ry < 0.0:
229 raise SyntaxError("expecting a nonnegative number; got %r" % (token,))
231 token = next()
232 if token[0] not in self.number_tokens:
233 raise SyntaxError("expecting a number; got %r" % (token,))
234 axis_rotation = float(token[1])
236 token = next()
237 if token[1] not in ('0', '1'):
238 raise SyntaxError("expecting a boolean flag; got %r" % (token,))
239 large_arc_flag = bool(int(token[1]))
241 token = next()
242 if token[1] not in ('0', '1'):
243 raise SyntaxError("expecting a boolean flag; got %r" % (token,))
244 sweep_flag = bool(int(token[1]))
246 token = next()
247 if token[0] not in self.number_tokens:
248 raise SyntaxError("expecting a number; got %r" % (token,))
249 x = float(token[1])
251 token = next()
252 if token[0] not in self.number_tokens:
253 raise SyntaxError("expecting a number; got %r" % (token,))
254 y = float(token[1])
256 token = next()
257 arguments.append(((rx,ry), axis_rotation, large_arc_flag, sweep_flag, (x,y)))
259 return (command, arguments), token
261 def rule_coordinate(self, next, token):
262 if token[0] not in self.number_tokens:
263 raise SyntaxError("expecting a number; got %r" % (token,))
264 x = float(token[1])
265 token = next()
266 return x, token
269 def rule_coordinate_pair(self, next, token):
270 # Inline these since this rule is so common.
271 if token[0] not in self.number_tokens:
272 raise SyntaxError("expecting a number; got %r" % (token,))
273 x = float(token[1])
274 token = next()
275 if token[0] not in self.number_tokens:
276 raise SyntaxError("expecting a number; got %r" % (token,))
277 y = float(token[1])
278 token = next()
279 return (x,y), token
282 svg_parser = SVGPathParser()