1 <?php
3 class Scanner
4 {
5 function Scanner(&$script)
6 {
7 $this->_construct($script);
8 }
10 function _construct(&$script)
11 {
12 if ($script === null)
13 {
14 return;
15 }
17 $this->tokenize($script);
18 }
20 function setCommentFunc($callback)
21 {
22 if ($callback == null || is_callable($callback))
23 {
24 $this->commentFn_ = $callback;
25 }
26 }
28 function tokenize(&$script)
29 {
30 $pos = 0;
31 $line = 1;
32 $script_length = mb_strlen($script);
34 while ($pos < $script_length)
35 {
36 foreach ($this->token_match_ as $class => $regex)
37 {
38 if (preg_match('/^'. $regex .'/', mb_substr($script, $pos), $match))
39 {
40 $length = mb_strlen($match[0]);
42 if ($class != 'whitespace')
43 {
44 array_push($this->tokens_, array(
45 'class' => $class,
46 'text' => chop(mb_substr($script, $pos, $length)),
47 'line' => $line,
48 ));
49 }
50 if ($class == 'unknown')
51 {
52 return;
53 }
55 $pos += $length;
56 $line += mb_substr_count($match[0], "\n");
57 break;
58 }
59 }
60 }
62 array_push($this->tokens_, array(
63 'class' => 'script-end',
64 'text' => 'script-end',
65 'line' => $line,
66 ));
67 }
69 function nextTokenIs($class)
70 {
71 $offset = 0;
72 do
73 {
74 $next = $this->tokens_[$this->tokenPos_ + $offset++]['class'];
75 }
76 while ($next == 'comment');
78 if (is_array($class))
79 {
80 return in_array($next, $class);
81 }
82 else if (is_string($class))
83 {
84 return (strcmp($next, $class) == 0);
85 }
86 return false;
87 }
89 function peekNextToken()
90 {
91 return $this->tokens_[$this->tokenPos_];
92 }
94 function nextToken()
95 {
96 $token = $this->tokens_[$this->tokenPos_++];
97 while ($token['class'] == 'comment')
98 {
99 if ($this->commentFn_ != null)
100 {
101 call_user_func($this->commentFn_, $token);
102 }
103 $token = $this->tokens_[$this->tokenPos_++];
104 }
105 return $token;
106 }
108 function scriptStart()
109 {
110 return array(
111 'class' => 'script-start',
112 'text' => 'script-start',
113 'line' => 1,
114 );
115 }
117 var $commentFn_ = null;
118 var $tokenPos_ = 0;
119 var $tokens_ = array();
120 var $token_match_ = array (
121 'left-bracket' => '\[',
122 'right-bracket' => '\]',
123 'block-start' => '\{',
124 'block-end' => '\}',
125 'left-parant' => '\(',
126 'right-parant' => '\)',
127 'comma' => ',',
128 'semicolon' => ';',
129 'whitespace' => '[ \r\n\t]+',
130 'tag' => ':[[:alpha:]_][[:alnum:]_]*(?=\b)',
131 'quoted-string' => '"(?:\\[\\"]|[^\x00"])*"',
132 'number' => '[[:digit:]]+(?:[KMG])?(?=\b)',
133 'comment' => '(?:\/\*(?:[^\*]|\*(?=[^\/]))*\*\/|#[^\r\n]*\r?\n)',
134 'multi-line' => 'text:[ \t]*(?:#[^\r\n]*)?\r?\n(\.[^\r\n]+\r?\n|[^\.]*\r?\n)*\.\r?\n',
135 'identifier' => '[[:alpha:]_][[:alnum:]_]*(?=\b)',
136 'unknown token' => '[^ \r\n\t]+'
137 );
138 }
140 ?>