1 <?php
3 class My_Scanner extends Scanner
4 {
5 function tokenize(&$script)
6 {
7 $pos = 0;
8 $line = 1;
9 $script_length = mb_strlen($script);
11 while ($pos < $script_length)
12 {
13 foreach ($this->token_match_ as $class => $regex)
14 {
15 if (preg_match('/^'. $regex .'/', mb_substr($script, $pos), $match))
16 {
17 $length = mb_strlen($match[0]);
19 if ($class != 'whitespace')
20 {
21 array_push($this->tokens_, array(
22 'class' => $class,
23 'text' => chop(mb_substr($script, $pos, $length)),
24 'line' => $line,
25 ));
26 }
27 if ($class == 'unknown')
28 {
29 return;
30 }
32 $pos += $length;
33 $line += mb_substr_count($match[0], "\n");
34 break;
35 }
36 }
37 }
38 array_push($this->tokens_, array(
39 'class' => 'script-end',
40 'text' => 'script-end',
41 'line' => $line,
42 ));
43 }
45 var $commentFn_ = null;
46 var $tokenPos_ = 0;
47 var $tokens_ = array();
48 var $token_match_ = array (
49 'left-bracket' => '\[',
50 'right-bracket' => '\]',
51 'block-start' => '\{',
52 'block-end' => '\}',
53 'left-parant' => '\(',
54 'right-parant' => '\)',
55 'comma' => ',',
56 'semicolon' => ';',
57 'whitespace' => '[ \r\n\t]+',
58 'tag' => ':[[:alpha:]_][[:alnum:]_]*(?=\b)',
59 'quoted-string' => '"(?:\\[\\"]|[^\x00"])*"',
60 'number' => '[[:digit:]]+(?:[KMG])?(?=\b)',
61 'comment' => '(?:\/\*(?:[^\*]|\*(?=[^\/]))*\*\/|#[^\r\n]*\r?\n)',
62 # 'multi-line' => 'text:[ \t]*(?:#[^\r\n]*)?\r?\n(\.[^\r\n]+\r?\n|[^\.]*\r?\n)*\.\r?\n',
63 'multi-line' => 'text:[^;]*',
64 'identifier' => '[[:alpha:]_][[:alnum:]_]*(?=\b)',
65 'unknown token' => '[^ \r\n\t]+'
66 );
67 }
69 ?>