1 <?php
3 class Scanner
4 {
5 function Scanner(&$script)
6 {
7 $this->_construct($script);
8 }
10 function _construct(&$script)
11 {
12 if ($script === null)
13 {
14 return;
15 }
17 $this->tokenize($script);
18 }
20 function setCommentFunc($callback)
21 {
22 if ($callback == null || is_callable($callback))
23 {
24 $this->commentFn_ = $callback;
25 }
26 }
28 function tokenize(&$script)
29 {
30 $pos = 0;
31 $line = 1;
32 $script_length = mb_strlen($script);
34 while ($pos < $script_length)
35 {
36 foreach ($this->token_match_ as $class => $regex)
37 {
38 if (preg_match('/^'. $regex .'/', mb_substr($script, $pos), $match))
39 {
40 $length = mb_strlen($match[0]);
42 if ($class != 'whitespace')
43 {
44 array_push($this->tokens_, array(
45 'class' => $class,
46 'text' => chop(mb_substr($script, $pos, $length)),
47 'line' => $line,
48 ));
49 }
50 if ($class == 'unknown')
51 {
52 return;
53 }
55 $pos += $length;
56 $line += mb_substr_count($match[0], "\n");
57 break;
58 }
59 }
60 }
61 array_push($this->tokens_, array(
62 'class' => 'script-end',
63 'text' => 'script-end',
64 'line' => $line,
65 ));
66 }
68 function nextTokenIs($class)
69 {
70 $offset = 0;
71 do
72 {
73 $next = $this->tokens_[$this->tokenPos_ + $offset++]['class'];
74 }
75 while ($next == 'comment');
77 if (is_array($class))
78 {
79 return in_array($next, $class);
80 }
81 else if (is_string($class))
82 {
83 return (strcmp($next, $class) == 0);
84 }
85 return false;
86 }
88 function peekNextToken()
89 {
90 return $this->tokens_[$this->tokenPos_];
91 }
93 function nextToken()
94 {
95 $token = $this->tokens_[$this->tokenPos_++];
96 while ($token['class'] == 'comment')
97 {
98 if ($this->commentFn_ != null)
99 {
100 call_user_func($this->commentFn_, $token);
101 }
102 $token = $this->tokens_[$this->tokenPos_++];
103 }
104 return $token;
105 }
107 function scriptStart()
108 {
109 return array(
110 'class' => 'script-start',
111 'text' => 'script-start',
112 'line' => 1,
113 );
114 }
116 var $commentFn_ = null;
117 var $tokenPos_ = 0;
118 var $tokens_ = array();
119 var $token_match_ = array (
120 'left-bracket' => '\[',
121 'right-bracket' => '\]',
122 'block-start' => '\{',
123 'block-end' => '\}',
124 'left-parant' => '\(',
125 'right-parant' => '\)',
126 'comma' => ',',
127 'semicolon' => ';',
128 'whitespace' => '[ \r\n\t]+',
129 'tag' => ':[[:alpha:]_][[:alnum:]_]*(?=\b)',
130 'quoted-string' => '"(?:\\[\\"]|[^\x00"])*"',
131 'number' => '[[:digit:]]+(?:[KMG])?(?=\b)',
132 'comment' => '(?:\/\*(?:[^\*]|\*(?=[^\/]))*\*\/|#[^\r\n]*\r?\n)',
133 # 'multi-line' => 'text:[ \t]*(?:#[^\r\n]*)?\r?\n(\.[^\r\n]+\r?\n|[^\.]*\r?\n)*\.\r?\n',
134 'multi-line' => 'text:[^;]*',
135 'identifier' => '[[:alpha:]_][[:alnum:]_]*(?=\b)',
136 'unknown token' => '[^ \r\n\t]+'
137 );
138 }
140 ?>