Code

r11451@tres: ted | 2006-04-17 22:21:33 -0700
[inkscape.git] / src / dom / js / jsregexp.c
1 /* -*- Mode: C; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
2  *
3  * ***** BEGIN LICENSE BLOCK *****
4  * Version: MPL 1.1/GPL 2.0/LGPL 2.1
5  *
6  * The contents of this file are subject to the Mozilla Public License Version
7  * 1.1 (the "License"); you may not use this file except in compliance with
8  * the License. You may obtain a copy of the License at
9  * http://www.mozilla.org/MPL/
10  *
11  * Software distributed under the License is distributed on an "AS IS" basis,
12  * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
13  * for the specific language governing rights and limitations under the
14  * License.
15  *
16  * The Original Code is Mozilla Communicator client code, released
17  * March 31, 1998.
18  *
19  * The Initial Developer of the Original Code is
20  * Netscape Communications Corporation.
21  * Portions created by the Initial Developer are Copyright (C) 1998
22  * the Initial Developer. All Rights Reserved.
23  *
24  * Contributor(s):
25  *
26  * Alternatively, the contents of this file may be used under the terms of
27  * either of the GNU General Public License Version 2 or later (the "GPL"),
28  * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
29  * in which case the provisions of the GPL or the LGPL are applicable instead
30  * of those above. If you wish to allow use of your version of this file only
31  * under the terms of either the GPL or the LGPL, and not to allow others to
32  * use your version of this file under the terms of the MPL, indicate your
33  * decision by deleting the provisions above and replace them with the notice
34  * and other provisions required by the GPL or the LGPL. If you do not delete
35  * the provisions above, a recipient may use your version of this file under
36  * the terms of any one of the MPL, the GPL or the LGPL.
37  *
38  * ***** END LICENSE BLOCK ***** */
40 /*
41  * JS regular expressions, after Perl.
42  */
43 #include "jsstddef.h"
44 #include <stdlib.h>
45 #include <string.h>
46 #include "jstypes.h"
47 #include "jsarena.h" /* Added by JSIFY */
48 #include "jsutil.h" /* Added by JSIFY */
49 #include "jsapi.h"
50 #include "jsarray.h"
51 #include "jsatom.h"
52 #include "jscntxt.h"
53 #include "jsconfig.h"
54 #include "jsfun.h"
55 #include "jsgc.h"
56 #include "jsinterp.h"
57 #include "jslock.h"
58 #include "jsnum.h"
59 #include "jsobj.h"
60 #include "jsopcode.h"
61 #include "jsregexp.h"
62 #include "jsscan.h"
63 #include "jsstr.h"
65 #ifdef XP_MAC
66 #include <MacMemory.h>
67 #endif
69 #if JS_HAS_REGEXPS
71 /* Note : contiguity of 'simple opcodes' is important for SimpleMatch() */
72 typedef enum REOp {
73     REOP_EMPTY         = 0,  /* match rest of input against rest of r.e. */
74     REOP_ALT           = 1,  /* alternative subexpressions in kid and next */
75     REOP_SIMPLE_START  = 2,  /* start of 'simple opcodes' */
76     REOP_BOL           = 2,  /* beginning of input (or line if multiline) */
77     REOP_EOL           = 3,  /* end of input (or line if multiline) */
78     REOP_WBDRY         = 4,  /* match "" at word boundary */
79     REOP_WNONBDRY      = 5,  /* match "" at word non-boundary */
80     REOP_DOT           = 6,  /* stands for any character */
81     REOP_DIGIT         = 7,  /* match a digit char: [0-9] */
82     REOP_NONDIGIT      = 8,  /* match a non-digit char: [^0-9] */
83     REOP_ALNUM         = 9,  /* match an alphanumeric char: [0-9a-z_A-Z] */
84     REOP_NONALNUM      = 10, /* match a non-alphanumeric char: [^0-9a-z_A-Z] */
85     REOP_SPACE         = 11, /* match a whitespace char */
86     REOP_NONSPACE      = 12, /* match a non-whitespace char */
87     REOP_BACKREF       = 13, /* back-reference (e.g., \1) to a parenthetical */
88     REOP_FLAT          = 14, /* match a flat string */
89     REOP_FLAT1         = 15, /* match a single char */
90     REOP_FLATi         = 16, /* case-independent REOP_FLAT */
91     REOP_FLAT1i        = 17, /* case-independent REOP_FLAT1 */
92     REOP_UCFLAT1       = 18, /* single Unicode char */
93     REOP_UCFLAT1i      = 19, /* case-independent REOP_UCFLAT1 */
94     REOP_UCFLAT        = 20, /* flat Unicode string; len immediate counts chars */
95     REOP_UCFLATi       = 21, /* case-independent REOP_UCFLAT */
96     REOP_CLASS         = 22, /* character class with index */
97     REOP_NCLASS        = 23, /* negated character class with index */
98     REOP_SIMPLE_END    = 23, /* end of 'simple opcodes' */
99     REOP_QUANT         = 25, /* quantified atom: atom{1,2} */
100     REOP_STAR          = 26, /* zero or more occurrences of kid */
101     REOP_PLUS          = 27, /* one or more occurrences of kid */
102     REOP_OPT           = 28, /* optional subexpression in kid */
103     REOP_LPAREN        = 29, /* left paren bytecode: kid is u.num'th sub-regexp */
104     REOP_RPAREN        = 30, /* right paren bytecode */
105     REOP_JUMP          = 31, /* for deoptimized closure loops */
106     REOP_DOTSTAR       = 32, /* optimize .* to use a single opcode */
107     REOP_ANCHOR        = 33, /* like .* but skips left context to unanchored r.e. */
108     REOP_EOLONLY       = 34, /* $ not preceded by any pattern */
109     REOP_BACKREFi      = 37, /* case-independent REOP_BACKREF */
110     REOP_LPARENNON     = 41, /* non-capturing version of REOP_LPAREN */
111     REOP_ASSERT        = 43, /* zero width positive lookahead assertion */
112     REOP_ASSERT_NOT    = 44, /* zero width negative lookahead assertion */
113     REOP_ASSERTTEST    = 45, /* sentinel at end of assertion child */
114     REOP_ASSERTNOTTEST = 46, /* sentinel at end of !assertion child */
115     REOP_MINIMALSTAR   = 47, /* non-greedy version of * */
116     REOP_MINIMALPLUS   = 48, /* non-greedy version of + */
117     REOP_MINIMALOPT    = 49, /* non-greedy version of ? */
118     REOP_MINIMALQUANT  = 50, /* non-greedy version of {} */
119     REOP_ENDCHILD      = 51, /* sentinel at end of quantifier child */
120     REOP_REPEAT        = 52, /* directs execution of greedy quantifier */
121     REOP_MINIMALREPEAT = 53, /* directs execution of non-greedy quantifier */
122     REOP_ALTPREREQ     = 54, /* prerequisite for ALT, either of two chars */
123     REOP_ALTPREREQ2    = 55, /* prerequisite for ALT, a char or a class */
124     REOP_ENDALT        = 56, /* end of final alternate */
125     REOP_CONCAT        = 57, /* concatenation of terms (parse time only) */
127     REOP_END
128 } REOp;
130 #define REOP_IS_SIMPLE(op)  ((unsigned)((op) - REOP_SIMPLE_START) <           \
131                              (unsigned)REOP_SIMPLE_END)
133 struct RENode {
134     REOp            op;         /* r.e. op bytecode */
135     RENode          *next;      /* next in concatenation order */
136     void            *kid;       /* first operand */
137     union {
138         void        *kid2;      /* second operand */
139         jsint       num;        /* could be a number */
140         uint16      parenIndex; /* or a parenthesis index */
141         struct {                /* or a quantifier range */
142             uint16  min;
143             uint16  max;
144             JSBool  greedy;
145         } range;
146         struct {                /* or a character class */
147             uint16  startIndex;
148             uint16  kidlen;     /* length of string at kid, in jschars */
149             uint16  bmsize;     /* bitmap size, based on max char code */
150             uint16  index;      /* index into class list */
151             JSBool  sense;
152         } ucclass;
153         struct {                /* or a literal sequence */
154             jschar  chr;        /* of one character */
155             uint16  length;     /* or many (via the kid) */
156         } flat;
157         struct {
158             RENode  *kid2;      /* second operand from ALT */
159             jschar  ch1;        /* match char for ALTPREREQ */
160             jschar  ch2;        /* ditto, or class index for ALTPREREQ2 */
161         } altprereq;
162     } u;
163 };
165 #define RE_IS_LETTER(c)     (((c >= 'A') && (c <= 'Z')) ||                    \
166                              ((c >= 'a') && (c <= 'z')) )
167 #define RE_IS_LINE_TERM(c)  ((c == '\n') || (c == '\r') ||                    \
168                              (c == LINE_SEPARATOR) || (c == PARA_SEPARATOR))
170 #define CLASS_CACHE_SIZE    4
172 typedef struct CompilerState {
173     JSContext       *context;
174     JSTokenStream   *tokenStream; /* For reporting errors */
175     const jschar    *cpbegin;
176     const jschar    *cpend;
177     const jschar    *cp;
178     uint16          flags;
179     uint16          parenCount;
180     uint16          classCount;   /* number of [] encountered */
181     uint16          treeDepth;    /* maximum depth of parse tree */
182     size_t          progLength;   /* estimated bytecode length */
183     RENode          *result;
184     struct {
185         const jschar *start;        /* small cache of class strings */
186         uint16 length;              /* since they're often the same */
187         uint16 index;
188     } classCache[CLASS_CACHE_SIZE];
189 } CompilerState;
191 typedef struct RECapture {
192     int32 index;               /* start of contents, -1 for empty  */
193     uint16 length;             /* length of capture */
194 } RECapture;
196 typedef struct REMatchState {
197     const jschar *cp;
198     RECapture parens[1];      /* first of 're->parenCount' captures,
199                                * allocated at end of this struct.
200                                */
201 } REMatchState;
203 struct REBackTrackData;
205 typedef struct REProgState {
206     jsbytecode *continue_pc;        /* current continuation data */
207     jsbytecode continue_op;
208     uint16 index;                   /* progress in text */
209     uintN parenSoFar;               /* highest indexed paren started */
210     union {
211         struct {
212             uint16 min;             /* current quantifier limits */
213             uint16 max;
214         } quantifier;
215         struct {
216             size_t top;             /* backtrack stack state */
217             size_t sz;
218         } assertion;
219     } u;
220 } REProgState;
222 typedef struct REBackTrackData {
223     size_t sz;                      /* size of previous stack entry */
224     jsbytecode *backtrack_pc;       /* where to backtrack to */
225     jsbytecode backtrack_op;
226     const jschar *cp;               /* index in text of match at backtrack */
227     uint16 parenIndex;              /* start index of saved paren contents */
228     uint16 parenCount;              /* # of saved paren contents */
229     uint16 saveStateStackTop;       /* number of parent states */
230     /* saved parent states follow */
231     /* saved paren contents follow */
232 } REBackTrackData;
234 #define INITIAL_STATESTACK (100)
235 #define INITIAL_BACKTRACK (8000)
237 typedef struct REGlobalData {
238     JSContext *cx;
239     JSRegExp *regexp;               /* the RE in execution */
240     JSBool ok;                      /* runtime error (out_of_memory only?) */
241     size_t start;                   /* offset to start at */
242     ptrdiff_t skipped;              /* chars skipped anchoring this r.e. */
243     const jschar *cpbegin, *cpend;  /* text base address and limit */
245     REProgState *stateStack;        /* stack of state of current parents */
246     uint16 stateStackTop;
247     uint16 stateStackLimit;
249     REBackTrackData *backTrackStack;/* stack of matched-so-far positions */
250     REBackTrackData *backTrackSP;
251     size_t backTrackStackSize;
252     size_t cursz;                   /* size of current stack entry */
254     JSArenaPool pool;               /* I don't understand but it's faster to
255                                      * use this than to malloc/free the three
256                                      * items that are allocated from this pool
257                                      */
259 } REGlobalData;
262 /*
263  * 1. If IgnoreCase is false, return ch.
264  * 2. Let u be ch converted to upper case as if by calling
265  *    String.prototype.toUpperCase on the one-character string ch.
266  * 3. If u does not consist of a single character, return ch.
267  * 4. Let cu be u's character.
268  * 5. If ch's code point value is greater than or equal to decimal 128 and cu's
269  *    code point value is less than decimal 128, then return ch.
270  * 6. Return cu.
271  */
272 static jschar
273 upcase(jschar ch)
275     jschar cu = JS_TOUPPER(ch);
276     if (ch >= 128 && cu < 128)
277         return ch;
278     return cu;
281 static jschar
282 downcase(jschar ch)
284     jschar cl = JS_TOLOWER(ch);
285     if (cl >= 128 && ch < 128)
286         return ch;
287     return cl;
290 /* Construct and initialize an RENode, returning NULL for out-of-memory */
291 static RENode *
292 NewRENode(CompilerState *state, REOp op)
294     JSContext *cx;
295     RENode *ren;
297     cx = state->context;
298     JS_ARENA_ALLOCATE_CAST(ren, RENode *, &cx->tempPool, sizeof *ren);
299     if (!ren) {
300         JS_ReportOutOfMemory(cx);
301         return NULL;
302     }
303     ren->op = op;
304     ren->next = NULL;
305     ren->kid = NULL;
306     return ren;
309 /*
310  * Validates and converts hex ascii value.
311  */
312 static JSBool
313 isASCIIHexDigit(jschar c, uintN *digit)
315     uintN cv = c;
317     if (cv < '0')
318         return JS_FALSE;
319     if (cv <= '9') {
320         *digit = cv - '0';
321         return JS_TRUE;
322     }
323     cv |= 0x20;
324     if (cv >= 'a' && cv <= 'f') {
325         *digit = cv - 'a' + 10;
326         return JS_TRUE;
327     }
328     return JS_FALSE;
332 typedef struct {
333     REOp op;
334     const jschar *errPos;
335     uint16 parenIndex;
336 } REOpData;
339 /*
340  * Process the op against the two top operands, reducing them to a single
341  * operand in the penultimate slot. Update progLength and treeDepth.
342  */
343 static JSBool
344 ProcessOp(CompilerState *state, REOpData *opData, RENode **operandStack, intN operandSP)
346     RENode *result;
348     switch (opData->op) {
349     case REOP_ALT:
350         result = NewRENode(state, REOP_ALT);
351         if (!result)
352             return JS_FALSE;
353         result->kid = operandStack[operandSP - 2];
354         result->u.kid2 = operandStack[operandSP - 1];
355         operandStack[operandSP - 2] = result;
356         /*
357          * look at both alternates to see if there's a FLAT or a CLASS at
358          * the start of each. If so, use a prerequisite match
359          */
360         ++state->treeDepth;
361         if (((RENode *) result->kid)->op == REOP_FLAT &&
362             ((RENode *) result->u.kid2)->op == REOP_FLAT &&
363             (state->flags & JSREG_FOLD) == 0) {
364             result->op = REOP_ALTPREREQ;
365             result->u.altprereq.ch1 = ((RENode *) result->kid)->u.flat.chr;
366             result->u.altprereq.ch2 = ((RENode *) result->u.kid2)->u.flat.chr;
367             /* ALTPREREQ, <end>, uch1, uch2, <next>, ...,
368                                             JUMP, <end> ... ENDALT */
369             state->progLength += 13;
370         }
371         else
372         if (((RENode *) result->kid)->op == REOP_CLASS &&
373             ((RENode *) result->kid)->u.ucclass.index < 256 &&
374             ((RENode *) result->u.kid2)->op == REOP_FLAT &&
375             (state->flags & JSREG_FOLD) == 0) {
376             result->op = REOP_ALTPREREQ2;
377             result->u.altprereq.ch1 = ((RENode *) result->u.kid2)->u.flat.chr;
378             result->u.altprereq.ch2 = ((RENode *) result->kid)->u.ucclass.index;
379             /* ALTPREREQ2, <end>, uch1, uch2, <next>, ...,
380                                             JUMP, <end> ... ENDALT */
381             state->progLength += 13;
382         }
383         else
384         if (((RENode *) result->kid)->op == REOP_FLAT &&
385             ((RENode *) result->u.kid2)->op == REOP_CLASS &&
386             ((RENode *) result->u.kid2)->u.ucclass.index < 256 &&
387             (state->flags & JSREG_FOLD) == 0) {
388             result->op = REOP_ALTPREREQ2;
389             result->u.altprereq.ch1 = ((RENode *) result->kid)->u.flat.chr;
390             result->u.altprereq.ch2 =
391                 ((RENode *) result->u.kid2)->u.ucclass.index;
392             /* ALTPREREQ2, <end>, uch1, uch2, <next>, ...,
393                                           JUMP, <end> ... ENDALT */
394             state->progLength += 13;
395         }
396         else
397             /* ALT, <next>, ..., JUMP, <end> ... ENDALT */
398             state->progLength += 7;
399         break;
400     case REOP_CONCAT:
401         result = operandStack[operandSP - 2];
402         while (result->next)
403             result = result->next;
404         result->next = operandStack[operandSP - 1];
405         break;
406     case REOP_ASSERT:
407     case REOP_ASSERT_NOT:
408     case REOP_LPARENNON:
409     case REOP_LPAREN:
410         /* These should have been processed by a close paren. */
411         js_ReportCompileErrorNumber(state->context, state->tokenStream,
412                                     NULL, JSREPORT_ERROR,
413                                     JSMSG_MISSING_PAREN, opData->errPos);
414         return JS_FALSE;
415     default:;
416     }
417     return JS_TRUE;
420 /*
421  * Parser forward declarations.
422  */
423 static JSBool ParseTerm(CompilerState *state);
424 static JSBool ParseQuantifier(CompilerState *state);
426 /*
427  * Top-down regular expression grammar, based closely on Perl4.
428  *
429  *  regexp:     altern                  A regular expression is one or more
430  *              altern '|' regexp       alternatives separated by vertical bar.
431  */
432 #define INITIAL_STACK_SIZE  128
434 static JSBool
435 ParseRegExp(CompilerState *state)
437     uint16 parenIndex;
438     RENode *operand;
439     REOpData *operatorStack;
440     RENode **operandStack;
441     REOp op;
442     intN i;
443     JSBool result = JS_FALSE;
445     intN operatorSP = 0, operatorStackSize = INITIAL_STACK_SIZE;
446     intN operandSP = 0, operandStackSize = INITIAL_STACK_SIZE;
448     /* Watch out for empty regexp */
449     if (state->cp == state->cpend) {
450         state->result = NewRENode(state, REOP_EMPTY);
451         return (state->result != NULL);
452     }
454     operatorStack = (REOpData *)
455         JS_malloc(state->context, sizeof(REOpData) * operatorStackSize);
456     if (!operatorStack)
457         return JS_FALSE;
459     operandStack = (RENode **)
460         JS_malloc(state->context, sizeof(RENode *) * operandStackSize);
461     if (!operandStack)
462         goto out;
464     while (JS_TRUE) {
465         parenIndex = state->parenCount;
466         if (state->cp == state->cpend) {
467             /*
468              * If we are at the end of the regexp and we're short one or more
469              * operands, the regexp must have the form /x|/ or some such, with
470              * left parentheses making us short more than one operand.
471              */
472             if (operatorSP >= operandSP) {
473                 operand = NewRENode(state, REOP_EMPTY);
474                 if (!operand)
475                     goto out;
476                 goto pushOperand;
477             }
478         } else {
479             switch (*state->cp) {
480                 /* balance '(' */
481             case '(':           /* balance ')' */
482                 ++state->cp;
483                 if (state->cp + 1 < state->cpend &&
484                     *state->cp == '?' &&
485                     (state->cp[1] == '=' ||
486                      state->cp[1] == '!' ||
487                      state->cp[1] == ':')) {
488                     switch (state->cp[1]) {
489                     case '=':
490                         op = REOP_ASSERT;
491                         /* ASSERT, <next>, ... ASSERTTEST */
492                         state->progLength += 4;
493                         break;
494                     case '!':
495                         op = REOP_ASSERT_NOT;
496                         /* ASSERTNOT, <next>, ... ASSERTNOTTEST */
497                         state->progLength += 4;
498                         break;
499                     default:
500                         op = REOP_LPARENNON;
501                         break;
502                     }
503                     state->cp += 2;
504                 } else {
505                     op = REOP_LPAREN;
506                     /* LPAREN, <index>, ... RPAREN, <index> */
507                     state->progLength += 6;
508                     state->parenCount++;
509                     if (state->parenCount == 65535) {
510                         js_ReportCompileErrorNumber(state->context,
511                                                     state->tokenStream,
512                                                     NULL, JSREPORT_ERROR,
513                                                     JSMSG_TOO_MANY_PARENS);
514                         goto out;
515                     }
516                 }
517                 goto pushOperator;
518             case ')':
519                 /* If there's not a stacked open parenthesis, throw
520                  * a syntax error.
521                  */
522                 for (i = operatorSP - 1; ; i--) {
523                     if (i < 0) {
524                         js_ReportCompileErrorNumber(state->context,
525                                                     state->tokenStream,
526                                                     NULL, JSREPORT_ERROR,
527                                                     JSMSG_UNMATCHED_RIGHT_PAREN);
528                         goto out;
529                     }
530                     if (operatorStack[i].op == REOP_ASSERT ||
531                         operatorStack[i].op == REOP_ASSERT_NOT ||
532                         operatorStack[i].op == REOP_LPARENNON ||
533                         operatorStack[i].op == REOP_LPAREN) {
534                         break;
535                     }
536                 }
537                 /* fall thru... */
538             case '|':
539                 /* Expected an operand before these, so make an empty one */
540                 operand = NewRENode(state, REOP_EMPTY);
541                 if (!operand)
542                     goto out;
543                 goto pushOperand;
544             default:
545                 if (!ParseTerm(state))
546                     goto out;
547                 operand = state->result;
548 pushOperand:
549                 if (operandSP == operandStackSize) {
550                     operandStackSize += operandStackSize;
551                     operandStack =
552                       (RENode **)JS_realloc(state->context, operandStack,
553                                             sizeof(RENode *) * operandStackSize);
554                     if (!operandStack)
555                         goto out;
556                 }
557                 operandStack[operandSP++] = operand;
558                 break;
559             }
560         }
561             /* At the end; process remaining operators */
562 restartOperator:
563         if (state->cp == state->cpend) {
564             while (operatorSP) {
565                 --operatorSP;
566                 if (!ProcessOp(state, &operatorStack[operatorSP],
567                                operandStack, operandSP))
568                     goto out;
569                 --operandSP;
570             }
571             JS_ASSERT(operandSP == 1);
572             state->result = operandStack[0];
573             result = JS_TRUE;
574             goto out;
575         }
576         switch (*state->cp) {
577         case '|':
578             /* Process any stacked 'concat' operators */
579             ++state->cp;
580             while (operatorSP &&
581                    operatorStack[operatorSP - 1].op == REOP_CONCAT) {
582                 --operatorSP;
583                 if (!ProcessOp(state, &operatorStack[operatorSP],
584                                operandStack, operandSP))
585                     goto out;
586                 --operandSP;
587             }
588             op = REOP_ALT;
589             goto pushOperator;
591         case ')':
592             /* If there's not a stacked open parenthesis,we
593              * accept the close as a flat.
594              */
595             for (i = operatorSP - 1; ; i--) {
596                 if (i < 0) {
597                     js_ReportCompileErrorNumber(state->context,
598                                                 state->tokenStream,
599                                                 NULL, JSREPORT_ERROR,
600                                                 JSMSG_UNMATCHED_RIGHT_PAREN);
601                     goto out;
602                 }
603                 if (operatorStack[i].op == REOP_ASSERT ||
604                     operatorStack[i].op == REOP_ASSERT_NOT ||
605                     operatorStack[i].op == REOP_LPARENNON ||
606                     operatorStack[i].op == REOP_LPAREN) {
607                     break;
608                 }
609             }
610             ++state->cp;
611             /* process everything on the stack until the open */
612             while (JS_TRUE) {
613                 JS_ASSERT(operatorSP);
614                 --operatorSP;
615                 switch (operatorStack[operatorSP].op) {
616                 case REOP_ASSERT:
617                 case REOP_ASSERT_NOT:
618                 case REOP_LPAREN:
619                     operand = NewRENode(state, operatorStack[operatorSP].op);
620                     if (!operand)
621                         goto out;
622                     operand->u.parenIndex =
623                         operatorStack[operatorSP].parenIndex;
624                     JS_ASSERT(operandSP);
625                     operand->kid = operandStack[operandSP - 1];
626                     operandStack[operandSP - 1] = operand;
627                     ++state->treeDepth;
628                     /* fall thru... */
629                 case REOP_LPARENNON:
630                     state->result = operandStack[operandSP - 1];
631                     if (!ParseQuantifier(state))
632                         goto out;
633                     operandStack[operandSP - 1] = state->result;
634                     goto restartOperator;
635                 default:
636                     if (!ProcessOp(state, &operatorStack[operatorSP],
637                                    operandStack, operandSP))
638                         goto out;
639                     --operandSP;
640                     break;
641                 }
642             }
643             break;
644         case '+':
645         case '*':
646         case '?':
647         case '{':
648             js_ReportCompileErrorNumber(state->context, state->tokenStream,
649                                         NULL, JSREPORT_ERROR,
650                                         JSMSG_BAD_QUANTIFIER, state->cp);
651             result = JS_FALSE;
652             goto out;
653         default:
654             /* Anything else is the start of the next term */
655             op = REOP_CONCAT;
656 pushOperator:
657             if (operatorSP == operatorStackSize) {
658                 operatorStackSize += operatorStackSize;
659                 operatorStack =
660                     (REOpData *)JS_realloc(state->context, operatorStack,
661                                            sizeof(REOpData) * operatorStackSize);
662                 if (!operatorStack)
663                     goto out;
664             }
665             operatorStack[operatorSP].op = op;
666             operatorStack[operatorSP].errPos = state->cp;
667             operatorStack[operatorSP++].parenIndex = parenIndex;
668             break;
669         }
670     }
671 out:
672     if (operatorStack)
673         JS_free(state->context, operatorStack);
674     if (operandStack)
675         JS_free(state->context, operandStack);
676     return result;
679 /*
680  * Hack two bits in CompilerState.flags, for use within FindParenCount to flag
681  * its being on the stack, and to propagate errors to its callers.
682  */
683 #define JSREG_FIND_PAREN_COUNT  0x8000
684 #define JSREG_FIND_PAREN_ERROR  0x4000
686 /*
687  * Magic return value from FindParenCount and GetDecimalValue, to indicate
688  * overflow beyond GetDecimalValue's max parameter, or a computed maximum if
689  * its findMax parameter is non-null.
690  */
691 #define OVERFLOW_VALUE          ((uintN)-1)
693 static uintN
694 FindParenCount(CompilerState *state)
696     CompilerState temp;
697     int i;
699     if (state->flags & JSREG_FIND_PAREN_COUNT)
700         return OVERFLOW_VALUE;
702     /*
703      * Copy state into temp, flag it so we never report an invalid backref,
704      * and reset its members to parse the entire regexp.  This is obviously
705      * suboptimal, but GetDecimalValue calls us only if a backref appears to
706      * refer to a forward parenthetical, which is rare.
707      */
708     temp = *state;
709     temp.flags |= JSREG_FIND_PAREN_COUNT;
710     temp.cp = temp.cpbegin;
711     temp.parenCount = 0;
712     temp.classCount = 0;
713     temp.progLength = 0;
714     temp.treeDepth = 0;
715     for (i = 0; i < CLASS_CACHE_SIZE; i++)
716         temp.classCache[i].start = NULL;
718     if (!ParseRegExp(&temp)) {
719         state->flags |= JSREG_FIND_PAREN_ERROR;
720         return OVERFLOW_VALUE;
721     }
722     return temp.parenCount;
725 /*
726  * Extract and return a decimal value at state->cp.  The initial character c
727  * has already been read.  Return OVERFLOW_VALUE if the result exceeds max.
728  * Callers who pass a non-null findMax should test JSREG_FIND_PAREN_ERROR in
729  * state->flags to discover whether an error occurred under findMax.
730  */
731 static uintN
732 GetDecimalValue(jschar c, uintN max, uintN (*findMax)(CompilerState *state),
733                 CompilerState *state)
735     uintN value = JS7_UNDEC(c);
736     JSBool overflow = (value > max && (!findMax || value > findMax(state)));
738     /* The following restriction allows simpler overflow checks. */
739     JS_ASSERT(max <= ((uintN)-1 - 9) / 10);
740     while (state->cp < state->cpend) {
741         c = *state->cp;
742         if (!JS7_ISDEC(c))
743             break;
744         value = 10 * value + JS7_UNDEC(c);
745         if (!overflow && value > max && (!findMax || value > findMax(state)))
746             overflow = JS_TRUE;
747         ++state->cp;
748     }
749     return overflow ? OVERFLOW_VALUE : value;
752 /*
753  * Calculate the total size of the bitmap required for a class expression.
754  */
755 static JSBool
756 CalculateBitmapSize(CompilerState *state, RENode *target, const jschar *src,
757                     const jschar *end)
759     uintN max = 0;
760     JSBool inRange = JS_FALSE;
761     jschar c, rangeStart = 0;
762     uintN n, digit, nDigits, i;
764     target->u.ucclass.bmsize = 0;
765     target->u.ucclass.sense = JS_TRUE;
767     if (src == end)
768         return JS_TRUE;
770     if (*src == '^') {
771         ++src;
772         target->u.ucclass.sense = JS_FALSE;
773     }
775     while (src != end) {
776         uintN localMax = 0;
777         switch (*src) {
778         case '\\':
779             ++src;
780             c = *src++;
781             switch (c) {
782             case 'b':
783                 localMax = 0x8;
784                 break;
785             case 'f':
786                 localMax = 0xC;
787                 break;
788             case 'n':
789                 localMax = 0xA;
790                 break;
791             case 'r':
792                 localMax = 0xD;
793                 break;
794             case 't':
795                 localMax = 0x9;
796                 break;
797             case 'v':
798                 localMax = 0xB;
799                 break;
800             case 'c':
801                 if (src + 1 < end && RE_IS_LETTER(src[1]))
802                     localMax = (jschar) (*src++ & 0x1F);
803                 else
804                     localMax = '\\';
805                 break;
806             case 'x':
807                 nDigits = 2;
808                 goto lexHex;
809             case 'u':
810                 nDigits = 4;
811 lexHex:
812                 n = 0;
813                 for (i = 0; (i < nDigits) && (src < end); i++) {
814                     c = *src++;
815                     if (!isASCIIHexDigit(c, &digit)) {
816                         /*
817                          * Back off to accepting the original
818                          *'\' as a literal.
819                          */
820                         src -= i + 1;
821                         n = '\\';
822                         break;
823                     }
824                     n = (n << 4) | digit;
825                 }
826                 localMax = n;
827                 break;
828             case 'd':
829                 if (inRange) {
830                     JS_ReportErrorNumber(state->context,
831                                          js_GetErrorMessage, NULL,
832                                          JSMSG_BAD_CLASS_RANGE);
833                     return JS_FALSE;
834                 }
835                 localMax = '9';
836                 break;
837             case 'D':
838             case 's':
839             case 'S':
840             case 'w':
841             case 'W':
842                 if (inRange) {
843                     JS_ReportErrorNumber(state->context,
844                                          js_GetErrorMessage, NULL,
845                                          JSMSG_BAD_CLASS_RANGE);
846                     return JS_FALSE;
847                 }
848                 target->u.ucclass.bmsize = 65535;
849                 return JS_TRUE;
850             case '0':
851             case '1':
852             case '2':
853             case '3':
854             case '4':
855             case '5':
856             case '6':
857             case '7':
858                 /*
859                  *  This is a non-ECMA extension - decimal escapes (in this
860                  *  case, octal!) are supposed to be an error inside class
861                  *  ranges, but supported here for backwards compatibility.
862                  *
863                  */
864                 n = JS7_UNDEC(c);
865                 c = *src;
866                 if ('0' <= c && c <= '7') {
867                     src++;
868                     n = 8 * n + JS7_UNDEC(c);
869                     c = *src;
870                     if ('0' <= c && c <= '7') {
871                         src++;
872                         i = 8 * n + JS7_UNDEC(c);
873                         if (i <= 0377)
874                             n = i;
875                         else
876                             src--;
877                     }
878                 }
879                 localMax = n;
880                 break;
882             default:
883                 localMax = c;
884                 break;
885             }
886             break;
887         default:
888             localMax = *src++;
889             break;
890         }
891         if (inRange) {
892             if (rangeStart > localMax) {
893                 JS_ReportErrorNumber(state->context,
894                                      js_GetErrorMessage, NULL,
895                                      JSMSG_BAD_CLASS_RANGE);
896                 return JS_FALSE;
897             }
898             inRange = JS_FALSE;
899         } else {
900             if (src < end - 1) {
901                 if (*src == '-') {
902                     ++src;
903                     inRange = JS_TRUE;
904                     rangeStart = (jschar)localMax;
905                     continue;
906                 }
907             }
908         }
909         if (state->flags & JSREG_FOLD) {
910             c = JS_MAX(upcase((jschar)localMax), downcase((jschar)localMax));
911             if (c > localMax)
912                 localMax = c;
913         }
914         if (localMax > max)
915             max = localMax;
916     }
917     target->u.ucclass.bmsize = max;
918     return JS_TRUE;
921 /*
922  *  item:       assertion               An item is either an assertion or
923  *              quantatom               a quantified atom.
924  *
925  *  assertion:  '^'                     Assertions match beginning of string
926  *                                      (or line if the class static property
927  *                                      RegExp.multiline is true).
928  *              '$'                     End of string (or line if the class
929  *                                      static property RegExp.multiline is
930  *                                      true).
931  *              '\b'                    Word boundary (between \w and \W).
932  *              '\B'                    Word non-boundary.
933  *
934  *  quantatom:  atom                    An unquantified atom.
935  *              quantatom '{' n ',' m '}'
936  *                                      Atom must occur between n and m times.
937  *              quantatom '{' n ',' '}' Atom must occur at least n times.
938  *              quantatom '{' n '}'     Atom must occur exactly n times.
939  *              quantatom '*'           Zero or more times (same as {0,}).
940  *              quantatom '+'           One or more times (same as {1,}).
941  *              quantatom '?'           Zero or one time (same as {0,1}).
942  *
943  *              any of which can be optionally followed by '?' for ungreedy
944  *
945  *  atom:       '(' regexp ')'          A parenthesized regexp (what matched
946  *                                      can be addressed using a backreference,
947  *                                      see '\' n below).
948  *              '.'                     Matches any char except '\n'.
949  *              '[' classlist ']'       A character class.
950  *              '[' '^' classlist ']'   A negated character class.
951  *              '\f'                    Form Feed.
952  *              '\n'                    Newline (Line Feed).
953  *              '\r'                    Carriage Return.
954  *              '\t'                    Horizontal Tab.
955  *              '\v'                    Vertical Tab.
956  *              '\d'                    A digit (same as [0-9]).
957  *              '\D'                    A non-digit.
958  *              '\w'                    A word character, [0-9a-z_A-Z].
959  *              '\W'                    A non-word character.
960  *              '\s'                    A whitespace character, [ \b\f\n\r\t\v].
961  *              '\S'                    A non-whitespace character.
962  *              '\' n                   A backreference to the nth (n decimal
963  *                                      and positive) parenthesized expression.
964  *              '\' octal               An octal escape sequence (octal must be
965  *                                      two or three digits long, unless it is
966  *                                      0 for the null character).
967  *              '\x' hex                A hex escape (hex must be two digits).
968  *              '\u' unicode            A unicode escape (must be four digits).
969  *              '\c' ctrl               A control character, ctrl is a letter.
970  *              '\' literalatomchar     Any character except one of the above
971  *                                      that follow '\' in an atom.
972  *              otheratomchar           Any character not first among the other
973  *                                      atom right-hand sides.
974  */
975 static JSBool
976 ParseTerm(CompilerState *state)
978     jschar c = *state->cp++;
979     uintN nDigits;
980     uintN num, tmp, n, i;
981     const jschar *termStart;
983     switch (c) {
984     /* assertions and atoms */
985     case '^':
986         state->result = NewRENode(state, REOP_BOL);
987         if (!state->result)
988             return JS_FALSE;
989         state->progLength++;
990         return JS_TRUE;
991     case '$':
992         state->result = NewRENode(state, REOP_EOL);
993         if (!state->result)
994             return JS_FALSE;
995         state->progLength++;
996         return JS_TRUE;
997     case '\\':
998         if (state->cp >= state->cpend) {
999             /* a trailing '\' is an error */
1000             js_ReportCompileErrorNumber(state->context, state->tokenStream,
1001                                         NULL, JSREPORT_ERROR,
1002                                         JSMSG_TRAILING_SLASH);
1003             return JS_FALSE;
1004         }
1005         c = *state->cp++;
1006         switch (c) {
1007         /* assertion escapes */
1008         case 'b' :
1009             state->result = NewRENode(state, REOP_WBDRY);
1010             if (!state->result)
1011                 return JS_FALSE;
1012             state->progLength++;
1013             return JS_TRUE;
1014         case 'B':
1015             state->result = NewRENode(state, REOP_WNONBDRY);
1016             if (!state->result)
1017                 return JS_FALSE;
1018             state->progLength++;
1019             return JS_TRUE;
1020         /* Decimal escape */
1021         case '0':
1022             if (JS_HAS_STRICT_OPTION(state->context)) {
1023                 if (!js_ReportCompileErrorNumber(state->context,
1024                                                  state->tokenStream,
1025                                                  NULL,
1026                                                  JSREPORT_WARNING |
1027                                                  JSREPORT_STRICT,
1028                                                  JSMSG_INVALID_BACKREF)) {
1029                     return JS_FALSE;
1030                 }
1031                 c = 0;
1032             } else {
1033      doOctal:
1034                 num = 0;
1035                 while (state->cp < state->cpend) {
1036                     c = *state->cp;
1037                     if (c < '0' || '7' < c)
1038                         break;
1039                     state->cp++;
1040                     tmp = 8 * num + (uintN)JS7_UNDEC(c);
1041                     if (tmp > 0377)
1042                         break;
1043                     num = tmp;
1044                 }
1045                 c = (jschar)num;
1046             }
1047     doFlat:
1048             state->result = NewRENode(state, REOP_FLAT);
1049             if (!state->result)
1050                 return JS_FALSE;
1051             state->result->u.flat.chr = c;
1052             state->result->u.flat.length = 1;
1053             state->progLength += 3;
1054             break;
1055         case '1':
1056         case '2':
1057         case '3':
1058         case '4':
1059         case '5':
1060         case '6':
1061         case '7':
1062         case '8':
1063         case '9':
1064             termStart = state->cp - 1;
1065             num = GetDecimalValue(c, state->parenCount, FindParenCount, state);
1066             if (state->flags & JSREG_FIND_PAREN_ERROR)
1067                 return JS_FALSE;
1068             if (num == OVERFLOW_VALUE) {
1069                 if (!JS_HAS_STRICT_OPTION(state->context)) {
1070                     state->cp = termStart;
1071                     goto doOctal;
1072                 }
1073                 if (!js_ReportCompileErrorNumber(state->context,
1074                                                  state->tokenStream,
1075                                                  NULL,
1076                                                  JSREPORT_WARNING |
1077                                                  JSREPORT_STRICT,
1078                                                  (c >= '8')
1079                                                  ? JSMSG_INVALID_BACKREF
1080                                                  : JSMSG_BAD_BACKREF)) {
1081                     return JS_FALSE;
1082                 }
1083                 num = 0x10000;
1084             }
1085             JS_ASSERT(1 <= num && num <= 0x10000);
1086             state->result = NewRENode(state, REOP_BACKREF);
1087             if (!state->result)
1088                 return JS_FALSE;
1089             state->result->u.parenIndex = num - 1;
1090             state->progLength += 3;
1091             break;
1092         /* Control escape */
1093         case 'f':
1094             c = 0xC;
1095             goto doFlat;
1096         case 'n':
1097             c = 0xA;
1098             goto doFlat;
1099         case 'r':
1100             c = 0xD;
1101             goto doFlat;
1102         case 't':
1103             c = 0x9;
1104             goto doFlat;
1105         case 'v':
1106             c = 0xB;
1107             goto doFlat;
1108         /* Control letter */
1109         case 'c':
1110             if (state->cp + 1 < state->cpend && RE_IS_LETTER(state->cp[1])) {
1111                 c = (jschar) (*state->cp++ & 0x1F);
1112             } else {
1113                 /* back off to accepting the original '\' as a literal */
1114                 --state->cp;
1115                 c = '\\';
1116             }
1117             goto doFlat;
1118         /* HexEscapeSequence */
1119         case 'x':
1120             nDigits = 2;
1121             goto lexHex;
1122         /* UnicodeEscapeSequence */
1123         case 'u':
1124             nDigits = 4;
1125 lexHex:
1126             n = 0;
1127             for (i = 0; i < nDigits && state->cp < state->cpend; i++) {
1128                 uintN digit;
1129                 c = *state->cp++;
1130                 if (!isASCIIHexDigit(c, &digit)) {
1131                     /*
1132                      *  back off to accepting the original
1133                      *  'u' or 'x' as a literal
1134                      */
1135                     state->cp -= i + 2;
1136                     n = *state->cp++;
1137                     break;
1138                 }
1139                 n = (n << 4) | digit;
1140             }
1141             c = (jschar) n;
1142             goto doFlat;
1143         /* Character class escapes */
1144         case 'd':
1145             state->result = NewRENode(state, REOP_DIGIT);
1146 doSimple:
1147             if (!state->result)
1148                 return JS_FALSE;
1149             state->progLength++;
1150             break;
1151         case 'D':
1152             state->result = NewRENode(state, REOP_NONDIGIT);
1153             goto doSimple;
1154         case 's':
1155             state->result = NewRENode(state, REOP_SPACE);
1156             goto doSimple;
1157         case 'S':
1158             state->result = NewRENode(state, REOP_NONSPACE);
1159             goto doSimple;
1160         case 'w':
1161             state->result = NewRENode(state, REOP_ALNUM);
1162             goto doSimple;
1163         case 'W':
1164             state->result = NewRENode(state, REOP_NONALNUM);
1165             goto doSimple;
1166         /* IdentityEscape */
1167         default:
1168             state->result = NewRENode(state, REOP_FLAT);
1169             if (!state->result)
1170                 return JS_FALSE;
1171             state->result->u.flat.chr = c;
1172             state->result->u.flat.length = 1;
1173             state->result->kid = (void *) (state->cp - 1);
1174             state->progLength += 3;
1175             break;
1176         }
1177         break;
1178     case '[':
1179         state->result = NewRENode(state, REOP_CLASS);
1180         if (!state->result)
1181             return JS_FALSE;
1182         termStart = state->cp;
1183         state->result->u.ucclass.startIndex = termStart - state->cpbegin;
1184         while (JS_TRUE) {
1185             if (state->cp == state->cpend) {
1186                 js_ReportCompileErrorNumber(state->context, state->tokenStream,
1187                                             NULL, JSREPORT_ERROR,
1188                                             JSMSG_UNTERM_CLASS, termStart);
1189                 return JS_FALSE;
1190             }
1191             if (*state->cp == '\\') {
1192                 state->cp++;
1193             } else {
1194                 if (*state->cp == ']') {
1195                     state->result->u.ucclass.kidlen = state->cp - termStart;
1196                     break;
1197                 }
1198             }
1199             state->cp++;
1200         }
1201         for (i = 0; i < CLASS_CACHE_SIZE; i++) {
1202             if (!state->classCache[i].start) {
1203                 state->classCache[i].start = termStart;
1204                 state->classCache[i].length = state->result->u.ucclass.kidlen;
1205                 state->classCache[i].index = state->classCount;
1206                 break;
1207             }
1208             if (state->classCache[i].length ==
1209                 state->result->u.ucclass.kidlen) {
1210                 for (n = 0; ; n++) {
1211                     if (n == state->classCache[i].length) {
1212                         state->result->u.ucclass.index =
1213                             state->classCache[i].index;
1214                         goto claim;
1215                     }
1216                     if (state->classCache[i].start[n] != termStart[n])
1217                         break;
1218                 }
1219             }
1220         }
1221         state->result->u.ucclass.index = state->classCount++;
1222     claim:
1223         /*
1224          * Call CalculateBitmapSize now as we want any errors it finds
1225          * to be reported during the parse phase, not at execution.
1226          */
1227         if (!CalculateBitmapSize(state, state->result, termStart, state->cp++))
1228             return JS_FALSE;
1229         state->progLength += 3; /* CLASS, <index> */
1230         break;
1232     case '.':
1233         state->result = NewRENode(state, REOP_DOT);
1234         goto doSimple;
1235     case '*':
1236     case '+':
1237     case '?':
1238         js_ReportCompileErrorNumber(state->context, state->tokenStream,
1239                                     NULL, JSREPORT_ERROR,
1240                                     JSMSG_BAD_QUANTIFIER, state->cp - 1);
1241         return JS_FALSE;
1242     default:
1243         state->result = NewRENode(state, REOP_FLAT);
1244         if (!state->result)
1245             return JS_FALSE;
1246         state->result->u.flat.chr = c;
1247         state->result->u.flat.length = 1;
1248         state->result->kid = (void *) (state->cp - 1);
1249         state->progLength += 3;
1250         break;
1251     }
1252     return ParseQuantifier(state);
1255 static JSBool
1256 ParseQuantifier(CompilerState *state)
1258     RENode *term;
1259     term = state->result;
1260     if (state->cp < state->cpend) {
1261         switch (*state->cp) {
1262         case '+':
1263             state->result = NewRENode(state, REOP_QUANT);
1264             if (!state->result)
1265                 return JS_FALSE;
1266             state->result->u.range.min = 1;
1267             state->result->u.range.max = (uint16)-1;
1268             /* <PLUS>, <next> ... <ENDCHILD> */
1269             state->progLength += 4;
1270             goto quantifier;
1271         case '*':
1272             state->result = NewRENode(state, REOP_QUANT);
1273             if (!state->result)
1274                 return JS_FALSE;
1275             state->result->u.range.min = 0;
1276             state->result->u.range.max = (uint16)-1;
1277             /* <STAR>, <next> ... <ENDCHILD> */
1278             state->progLength += 4;
1279             goto quantifier;
1280         case '?':
1281             state->result = NewRENode(state, REOP_QUANT);
1282             if (!state->result)
1283                 return JS_FALSE;
1284             state->result->u.range.min = 0;
1285             state->result->u.range.max = 1;
1286             /* <OPT>, <next> ... <ENDCHILD> */
1287             state->progLength += 4;
1288             goto quantifier;
1289         case '{':       /* balance '}' */
1290             {
1291                 intN err;
1292                 uintN min, max;
1293                 jschar c;
1294                 const jschar *errp = state->cp++;
1296                 c = *state->cp;
1297                 if (JS7_ISDEC(c)) {
1298                     ++state->cp;
1299                     min = GetDecimalValue(c, 0xFFFF, NULL, state);
1300                     c = *state->cp;
1302                     if (min == OVERFLOW_VALUE) {
1303                         err = JSMSG_MIN_TOO_BIG;
1304                         goto quantError;
1305                     }
1306                     if (c == ',') {
1307                         c = *++state->cp;
1308                         if (JS7_ISDEC(c)) {
1309                             ++state->cp;
1310                             max = GetDecimalValue(c, 0xFFFF, NULL, state);
1311                             c = *state->cp;
1312                             if (max == OVERFLOW_VALUE) {
1313                                 err = JSMSG_MAX_TOO_BIG;
1314                                 goto quantError;
1315                             }
1316                             if (min > max) {
1317                                 err = JSMSG_OUT_OF_ORDER;
1318                                 goto quantError;
1319                             }
1320                         } else {
1321                             max = (uintN)-1;
1322                         }
1323                     } else {
1324                         max = min;
1325                     }
1326                     if (c == '}') {
1327                         state->result = NewRENode(state, REOP_QUANT);
1328                         if (!state->result)
1329                             return JS_FALSE;
1330                         state->result->u.range.min = min;
1331                         state->result->u.range.max = max;
1332                         /* QUANT, <min>, <max>, <next> ... <ENDCHILD> */
1333                         state->progLength += 8;
1334                         goto quantifier;
1335                     }
1336                 }
1337                 state->cp = errp;
1338                 return JS_TRUE;
1339 quantError:
1340                 js_ReportCompileErrorNumber(state->context,
1341                                             state->tokenStream,
1342                                             NULL, JSREPORT_ERROR,
1343                                             err, errp);
1344                 return JS_FALSE;
1345             }
1346         }
1347     }
1348     return JS_TRUE;
1350 quantifier:
1351     ++state->treeDepth;
1352     ++state->cp;
1353     state->result->kid = term;
1354     if (state->cp < state->cpend && *state->cp == '?') {
1355         ++state->cp;
1356         state->result->u.range.greedy = JS_FALSE;
1357     }
1358     else
1359         state->result->u.range.greedy = JS_TRUE;
1360     return JS_TRUE;
1363 #define CHECK_OFFSET(diff) (JS_ASSERT(((diff) >= -32768) && ((diff) <= 32767)))
1364 #define SET_OFFSET(pc,off) ((pc)[0] = JUMP_OFFSET_HI(off),                     \
1365                                  (pc)[1] = JUMP_OFFSET_LO(off))
1366 #define GET_OFFSET(pc)     ((int16)(((pc)[0] << 8) | (pc)[1]))
1367 #define OFFSET_LEN         (2)
1368 #define GET_ARG(pc)        GET_OFFSET(pc)
1369 #define SET_ARG(pc,arg)    SET_OFFSET(pc,arg)
1370 #define ARG_LEN            OFFSET_LEN
1372 /*
1373  * Recursively generate bytecode for the tree rooted at t. Iteratively.
1374  */
1376 typedef struct {
1377     RENode *nextAlt;
1378     jsbytecode *nextAltFixup, *nextTermFixup, *endTermFixup;
1379     RENode *continueNode;
1380     REOp continueOp;
1381 } EmitStateStackEntry;
1383 static jsbytecode *
1384 EmitREBytecode(CompilerState *state, JSRegExp *re, intN treeDepth,
1385                jsbytecode *pc, RENode *t)
1387     ptrdiff_t diff;
1388     RECharSet *charSet;
1389     EmitStateStackEntry *emitStateSP, *emitStateStack = NULL;
1390     REOp op;
1392     if (treeDepth) {
1393         emitStateStack =
1394             (EmitStateStackEntry *)JS_malloc(state->context,
1395                                              sizeof(EmitStateStackEntry) *
1396                                              treeDepth);
1397         if (!emitStateStack)
1398             return NULL;
1399     }
1400     emitStateSP = emitStateStack;
1401     op = t->op;
1403     while (JS_TRUE) {
1404         *pc++ = op;
1405         switch (op) {
1406         case REOP_EMPTY:
1407             --pc;
1408             break;
1410         case REOP_ALTPREREQ2:
1411         case REOP_ALTPREREQ:
1412             JS_ASSERT(emitStateSP);
1413             emitStateSP->endTermFixup = pc;
1414             pc += OFFSET_LEN;
1415             SET_ARG(pc, t->u.altprereq.ch1);
1416             pc += ARG_LEN;
1417             SET_ARG(pc, t->u.altprereq.ch2);
1418             pc += ARG_LEN;
1420             emitStateSP->nextAltFixup = pc;    /* address of next alternate */
1421             pc += OFFSET_LEN;
1423             emitStateSP->continueNode = t;
1424             emitStateSP->continueOp = REOP_JUMP;
1425             ++emitStateSP;
1426             JS_ASSERT((emitStateSP - emitStateStack) <= treeDepth);
1427             t = (RENode *) t->kid;
1428             op = t->op;
1429             continue;
1431         case REOP_JUMP:
1432             emitStateSP->nextTermFixup = pc;    /* address of following term */
1433             pc += OFFSET_LEN;
1434             diff = pc - emitStateSP->nextAltFixup;
1435             CHECK_OFFSET(diff);
1436             SET_OFFSET(emitStateSP->nextAltFixup, diff);
1437             emitStateSP->continueOp = REOP_ENDALT;
1438             ++emitStateSP;
1439             JS_ASSERT((emitStateSP - emitStateStack) <= treeDepth);
1440             t = (RENode *) t->u.kid2;
1441             op = t->op;
1442             continue;
1444         case REOP_ENDALT:
1445             diff = pc - emitStateSP->nextTermFixup;
1446             CHECK_OFFSET(diff);
1447             SET_OFFSET(emitStateSP->nextTermFixup, diff);
1448             if (t->op != REOP_ALT) {
1449                 diff = pc - emitStateSP->endTermFixup;
1450                 CHECK_OFFSET(diff);
1451                 SET_OFFSET(emitStateSP->endTermFixup, diff);
1452             }
1453             break;
1455         case REOP_ALT:
1456             JS_ASSERT(emitStateSP);
1457             emitStateSP->nextAltFixup = pc; /* address of pointer to next alternate */
1458             pc += OFFSET_LEN;
1459             emitStateSP->continueNode = t;
1460             emitStateSP->continueOp = REOP_JUMP;
1461             ++emitStateSP;
1462             JS_ASSERT((emitStateSP - emitStateStack) <= treeDepth);
1463             t = (RENode *) t->kid;
1464             op = t->op;
1465             continue;
1467         case REOP_FLAT:
1468             /*
1469              * Consecutize FLAT's if possible.
1470              */
1471             if (t->kid) {
1472                 while (t->next &&
1473                        t->next->op == REOP_FLAT &&
1474                        (jschar*)t->kid + t->u.flat.length ==
1475                        (jschar*)t->next->kid) {
1476                     t->u.flat.length += t->next->u.flat.length;
1477                     t->next = t->next->next;
1478                 }
1479             }
1480             if (t->kid && (t->u.flat.length > 1)) {
1481                 if (state->flags & JSREG_FOLD)
1482                     pc[-1] = REOP_FLATi;
1483                 else
1484                     pc[-1] = REOP_FLAT;
1485                 SET_ARG(pc, (jschar *)t->kid - state->cpbegin);
1486                 pc += ARG_LEN;
1487                 SET_ARG(pc, t->u.flat.length);
1488                 pc += ARG_LEN;
1489             } else if (t->u.flat.chr < 256) {
1490                 if (state->flags & JSREG_FOLD)
1491                     pc[-1] = REOP_FLAT1i;
1492                 else
1493                     pc[-1] = REOP_FLAT1;
1494                 *pc++ = (jsbytecode) t->u.flat.chr;
1495             } else {
1496                 if (state->flags & JSREG_FOLD)
1497                     pc[-1] = REOP_UCFLAT1i;
1498                 else
1499                     pc[-1] = REOP_UCFLAT1;
1500                 SET_ARG(pc, t->u.flat.chr);
1501                 pc += ARG_LEN;
1502             }
1503             break;
1505         case REOP_LPAREN:
1506             JS_ASSERT(emitStateSP);
1507             SET_ARG(pc, t->u.parenIndex);
1508             pc += ARG_LEN;
1509             emitStateSP->continueNode = t;
1510             emitStateSP->continueOp = REOP_RPAREN;
1511             ++emitStateSP;
1512             JS_ASSERT((emitStateSP - emitStateStack) <= treeDepth);
1513             t = (RENode *) t->kid;
1514             op = t->op;
1515             continue;
1516         case REOP_RPAREN:
1517             SET_ARG(pc, t->u.parenIndex);
1518             pc += ARG_LEN;
1519             break;
1521         case REOP_BACKREF:
1522             SET_ARG(pc, t->u.parenIndex);
1523             pc += ARG_LEN;
1524             break;
1525         case REOP_ASSERT:
1526             JS_ASSERT(emitStateSP);
1527             emitStateSP->nextTermFixup = pc;
1528             pc += OFFSET_LEN;
1529             emitStateSP->continueNode = t;
1530             emitStateSP->continueOp = REOP_ASSERTTEST;
1531             ++emitStateSP;
1532             JS_ASSERT((emitStateSP - emitStateStack) <= treeDepth);
1533             t = (RENode *) t->kid;
1534             op = t->op;
1535             continue;
1536         case REOP_ASSERTTEST:
1537         case REOP_ASSERTNOTTEST:
1538             diff = pc - emitStateSP->nextTermFixup;
1539             CHECK_OFFSET(diff);
1540             SET_OFFSET(emitStateSP->nextTermFixup, diff);
1541             break;
1542         case REOP_ASSERT_NOT:
1543             JS_ASSERT(emitStateSP);
1544             emitStateSP->nextTermFixup = pc;
1545             pc += OFFSET_LEN;
1546             emitStateSP->continueNode = t;
1547             emitStateSP->continueOp = REOP_ASSERTNOTTEST;
1548             ++emitStateSP;
1549             JS_ASSERT((emitStateSP - emitStateStack) <= treeDepth);
1550             t = (RENode *) t->kid;
1551             op = t->op;
1552             continue;
1553         case REOP_QUANT:
1554             JS_ASSERT(emitStateSP);
1555             if (t->u.range.min == 0 && t->u.range.max == (uint16)-1) {
1556                 pc[-1] = (t->u.range.greedy) ? REOP_STAR : REOP_MINIMALSTAR;
1557             } else if (t->u.range.min == 0 && t->u.range.max == 1) {
1558                 pc[-1] = (t->u.range.greedy) ? REOP_OPT : REOP_MINIMALOPT;
1559             } else if (t->u.range.min == 1 && t->u.range.max == (uint16) -1) {
1560                 pc[-1] = (t->u.range.greedy) ? REOP_PLUS : REOP_MINIMALPLUS;
1561             } else {
1562                 if (!t->u.range.greedy)
1563                     pc[-1] = REOP_MINIMALQUANT;
1564                 SET_ARG(pc, t->u.range.min);
1565                 pc += ARG_LEN;
1566                 SET_ARG(pc, t->u.range.max);
1567                 pc += ARG_LEN;
1568             }
1569             emitStateSP->nextTermFixup = pc;
1570             pc += OFFSET_LEN;
1571             emitStateSP->continueNode = t;
1572             emitStateSP->continueOp = REOP_ENDCHILD;
1573             ++emitStateSP;
1574             JS_ASSERT((emitStateSP - emitStateStack) <= treeDepth);
1575             t = (RENode *) t->kid;
1576             op = t->op;
1577             continue;
1578         case REOP_ENDCHILD:
1579             diff = pc - emitStateSP->nextTermFixup;
1580             CHECK_OFFSET(diff);
1581             SET_OFFSET(emitStateSP->nextTermFixup, diff);
1582             break;
1583         case REOP_CLASS:
1584             if (!t->u.ucclass.sense)
1585                 pc[-1] = REOP_NCLASS;
1586             SET_ARG(pc, t->u.ucclass.index);
1587             pc += ARG_LEN;
1588             charSet = &re->classList[t->u.ucclass.index];
1589             charSet->converted = JS_FALSE;
1590             charSet->length = t->u.ucclass.bmsize;
1591             charSet->u.src.startIndex = t->u.ucclass.startIndex;
1592             charSet->u.src.length = t->u.ucclass.kidlen;
1593             charSet->sense = t->u.ucclass.sense;
1594             break;
1595         default:
1596             break;
1597         }
1598         t = t->next;
1599         if (!t) {
1600             if (emitStateSP == emitStateStack)
1601                 break;
1602             --emitStateSP;
1603             t = emitStateSP->continueNode;
1604             op = emitStateSP->continueOp;
1605         }
1606         else
1607             op = t->op;
1608     }
1609     if (emitStateStack)
1610         JS_free(state->context, emitStateStack);
1611     return pc;
1615 JSRegExp *
1616 js_NewRegExp(JSContext *cx, JSTokenStream *ts,
1617              JSString *str, uintN flags, JSBool flat)
1619     JSRegExp *re;
1620     void *mark;
1621     CompilerState state;
1622     size_t resize;
1623     jsbytecode *endPC;
1624     uintN i;
1625     size_t len;
1627     re = NULL;
1628     mark = JS_ARENA_MARK(&cx->tempPool);
1630     state.context = cx;
1631     state.tokenStream = ts;
1632     state.cpbegin = state.cp = JSSTRING_CHARS(str);
1633     state.cpend = state.cp + JSSTRING_LENGTH(str);
1634     state.flags = flags;
1635     state.parenCount = 0;
1636     state.classCount = 0;
1637     state.progLength = 0;
1638     state.treeDepth = 0;
1639     for (i = 0; i < CLASS_CACHE_SIZE; i++)
1640         state.classCache[i].start = NULL;
1642     len = JSSTRING_LENGTH(str);
1644     if (len != 0 && flat) {
1645         state.result = NewRENode(&state, REOP_FLAT);
1646         state.result->u.flat.chr = *state.cpbegin;
1647         state.result->u.flat.length = JSSTRING_LENGTH(str);
1648         state.result->kid = (void *) state.cpbegin;
1649         state.progLength += 5;
1650     } else {
1651         if (!ParseRegExp(&state))
1652             goto out;
1653     }
1654     resize = sizeof *re + state.progLength + 1;
1655     re = (JSRegExp *) JS_malloc(cx, JS_ROUNDUP(resize, sizeof(jsword)));
1656     if (!re)
1657         goto out;
1659     re->nrefs = 1;
1660     re->classCount = state.classCount;
1661     if (re->classCount) {
1662         re->classList = (RECharSet *)
1663             JS_malloc(cx, re->classCount * sizeof(RECharSet));
1664         if (!re->classList) {
1665             js_DestroyRegExp(cx, re);
1666             re = NULL;
1667             goto out;
1668         }
1669     } else {
1670         re->classList = NULL;
1671     }
1672     endPC = EmitREBytecode(&state, re, state.treeDepth, re->program, state.result);
1673     if (!endPC) {
1674         js_DestroyRegExp(cx, re);
1675         re = NULL;
1676         goto out;
1677     }
1678     *endPC++ = REOP_END;
1679     JS_ASSERT(endPC <= (re->program + (state.progLength + 1)));
1681     re->flags = flags;
1682     re->cloneIndex = 0;
1683     re->parenCount = state.parenCount;
1684     re->source = str;
1686 out:
1687     JS_ARENA_RELEASE(&cx->tempPool, mark);
1688     return re;
1691 JSRegExp *
1692 js_NewRegExpOpt(JSContext *cx, JSTokenStream *ts,
1693                 JSString *str, JSString *opt, JSBool flat)
1695     uintN flags;
1696     jschar *s;
1697     size_t i, n;
1698     char charBuf[2];
1700     flags = 0;
1701     if (opt) {
1702         s = JSSTRING_CHARS(opt);
1703         for (i = 0, n = JSSTRING_LENGTH(opt); i < n; i++) {
1704             switch (s[i]) {
1705             case 'g':
1706                 flags |= JSREG_GLOB;
1707                 break;
1708             case 'i':
1709                 flags |= JSREG_FOLD;
1710                 break;
1711             case 'm':
1712                 flags |= JSREG_MULTILINE;
1713                 break;
1714             default:
1715                 charBuf[0] = (char)s[i];
1716                 charBuf[1] = '\0';
1717                 js_ReportCompileErrorNumber(cx, ts, NULL, JSREPORT_ERROR,
1718                                             JSMSG_BAD_FLAG, charBuf);
1719                 return NULL;
1720             }
1721         }
1722     }
1723     return js_NewRegExp(cx, ts, str, flags, flat);
1727 #define HOLD_REGEXP(cx, re) JS_ATOMIC_INCREMENT(&(re)->nrefs)
1728 #define DROP_REGEXP(cx, re) js_DestroyRegExp(cx, re)
1730 /*
1731  * Save the current state of the match - the position in the input
1732  * text as well as the position in the bytecode. The state of any
1733  * parent expressions is also saved (preceding state).
1734  * Contents of parenCount parentheses from parenIndex are also saved.
1735  */
1736 static REBackTrackData *
1737 PushBackTrackState(REGlobalData *gData, REOp op,
1738                    jsbytecode *target, REMatchState *x, const jschar *cp,
1739                    uintN parenIndex, intN parenCount)
1741     intN i;
1742     REBackTrackData *result =
1743         (REBackTrackData *) ((char *)gData->backTrackSP + gData->cursz);
1745     size_t sz = sizeof(REBackTrackData) +
1746                 gData->stateStackTop * sizeof(REProgState) +
1747                 parenCount * sizeof(RECapture);
1749     ptrdiff_t btsize = gData->backTrackStackSize;
1750     ptrdiff_t btincr = ((char *)result + sz) -
1751                        ((char *)gData->backTrackStack + btsize);
1753     if (btincr > 0) {
1754         ptrdiff_t offset = (char *)result - (char *)gData->backTrackStack;
1756         btincr = JS_ROUNDUP(btincr, btsize);
1757         JS_ARENA_GROW_CAST(gData->backTrackStack, REBackTrackData *,
1758                            &gData->pool, btsize, btincr);
1759         if (!gData->backTrackStack)
1760             return NULL;
1761         gData->backTrackStackSize = btsize + btincr;
1762         result = (REBackTrackData *) ((char *)gData->backTrackStack + offset);
1763     }
1764     gData->backTrackSP = result;
1765     result->sz = gData->cursz;
1766     gData->cursz = sz;
1768     result->backtrack_op = op;
1769     result->backtrack_pc = target;
1770     result->cp = cp;
1771     result->parenCount = parenCount;
1773     result->saveStateStackTop = gData->stateStackTop;
1774     JS_ASSERT(gData->stateStackTop);
1775     memcpy(result + 1, gData->stateStack,
1776            sizeof(REProgState) * result->saveStateStackTop);
1778     /* FIXME: parenCount should be uintN */
1779     JS_ASSERT(parenCount >= 0);
1780     if (parenCount > 0) {
1781         result->parenIndex = parenIndex;
1782         memcpy((char *)(result + 1) +
1783                sizeof(REProgState) * result->saveStateStackTop,
1784                &x->parens[parenIndex],
1785                sizeof(RECapture) * parenCount);
1786         for (i = 0; i < parenCount; i++)
1787             x->parens[parenIndex + i].index = -1;
1788     }
1790     return result;
1794 /*
1795  *   Consecutive literal characters.
1796  */
1797 #if 0
1798 static REMatchState *
1799 FlatNMatcher(REGlobalData *gData, REMatchState *x, jschar *matchChars,
1800              intN length)
1802     intN i;
1803     if (x->cp + length > gData->cpend)
1804         return NULL;
1805     for (i = 0; i < length; i++) {
1806         if (matchChars[i] != x->cp[i])
1807             return NULL;
1808     }
1809     x->cp += length;
1810     return x;
1812 #endif
1814 static REMatchState *
1815 FlatNIMatcher(REGlobalData *gData, REMatchState *x, jschar *matchChars,
1816               intN length)
1818     intN i;
1819     if (x->cp + length > gData->cpend)
1820         return NULL;
1821     for (i = 0; i < length; i++) {
1822         if (upcase(matchChars[i]) != upcase(x->cp[i]))
1823             return NULL;
1824     }
1825     x->cp += length;
1826     return x;
1829 /*
1830  * 1. Evaluate DecimalEscape to obtain an EscapeValue E.
1831  * 2. If E is not a character then go to step 6.
1832  * 3. Let ch be E's character.
1833  * 4. Let A be a one-element RECharSet containing the character ch.
1834  * 5. Call CharacterSetMatcher(A, false) and return its Matcher result.
1835  * 6. E must be an integer. Let n be that integer.
1836  * 7. If n=0 or n>NCapturingParens then throw a SyntaxError exception.
1837  * 8. Return an internal Matcher closure that takes two arguments, a State x
1838  *    and a Continuation c, and performs the following:
1839  *     1. Let cap be x's captures internal array.
1840  *     2. Let s be cap[n].
1841  *     3. If s is undefined, then call c(x) and return its result.
1842  *     4. Let e be x's endIndex.
1843  *     5. Let len be s's length.
1844  *     6. Let f be e+len.
1845  *     7. If f>InputLength, return failure.
1846  *     8. If there exists an integer i between 0 (inclusive) and len (exclusive)
1847  *        such that Canonicalize(s[i]) is not the same character as
1848  *        Canonicalize(Input [e+i]), then return failure.
1849  *     9. Let y be the State (f, cap).
1850  *     10. Call c(y) and return its result.
1851  */
1852 static REMatchState *
1853 BackrefMatcher(REGlobalData *gData, REMatchState *x, uintN parenIndex)
1855     uintN len;
1856     uintN i;
1857     const jschar *parenContent;
1858     RECapture *s = &x->parens[parenIndex];
1859     if (s->index == -1)
1860         return x;
1862     len = s->length;
1863     if (x->cp + len > gData->cpend)
1864         return NULL;
1866     parenContent = &gData->cpbegin[s->index];
1867     if (gData->regexp->flags & JSREG_FOLD) {
1868         for (i = 0; i < len; i++) {
1869             if (upcase(parenContent[i]) != upcase(x->cp[i]))
1870                 return NULL;
1871         }
1872     } else {
1873         for (i = 0; i < len; i++) {
1874             if (parenContent[i] != x->cp[i])
1875                 return NULL;
1876         }
1877     }
1878     x->cp += len;
1879     return x;
1883 /* Add a single character to the RECharSet */
1884 static void
1885 AddCharacterToCharSet(RECharSet *cs, jschar c)
1887     uintN byteIndex = (uintN)(c >> 3);
1888     JS_ASSERT(c <= cs->length);
1889     cs->u.bits[byteIndex] |= 1 << (c & 0x7);
1893 /* Add a character range, c1 to c2 (inclusive) to the RECharSet */
1894 static void
1895 AddCharacterRangeToCharSet(RECharSet *cs, jschar c1, jschar c2)
1897     uintN i;
1899     uintN byteIndex1 = (uintN)(c1 >> 3);
1900     uintN byteIndex2 = (uintN)(c2 >> 3);
1902     JS_ASSERT((c2 <= cs->length) && (c1 <= c2));
1904     c1 &= 0x7;
1905     c2 &= 0x7;
1907     if (byteIndex1 == byteIndex2) {
1908         cs->u.bits[byteIndex1] |= ((uint8)0xFF >> (7 - (c2 - c1))) << c1;
1909     } else {
1910         cs->u.bits[byteIndex1] |= 0xFF << c1;
1911         for (i = byteIndex1 + 1; i < byteIndex2; i++)
1912             cs->u.bits[i] = 0xFF;
1913         cs->u.bits[byteIndex2] |= (uint8)0xFF >> (7 - c2);
1914     }
1917 /* Compile the source of the class into a RECharSet */
1918 static JSBool
1919 ProcessCharSet(REGlobalData *gData, RECharSet *charSet)
1921     const jschar *src = JSSTRING_CHARS(gData->regexp->source) +
1922                         charSet->u.src.startIndex;
1923     const jschar *end = src + charSet->u.src.length;
1925     JSBool inRange = JS_FALSE;
1926     jschar rangeStart = 0;
1927     uintN byteLength, n;
1928     jschar c, thisCh;
1929     intN nDigits, i;
1931     JS_ASSERT(!charSet->converted);
1932     charSet->converted = JS_TRUE;
1934     byteLength = (charSet->length >> 3) + 1;
1935     charSet->u.bits = (uint8 *)JS_malloc(gData->cx, byteLength);
1936     if (!charSet->u.bits)
1937         return JS_FALSE;
1938     memset(charSet->u.bits, 0, byteLength);
1940     if (src == end)
1941         return JS_TRUE;
1943     if (*src == '^') {
1944         JS_ASSERT(charSet->sense == JS_FALSE);
1945         ++src;
1946     }
1947     else
1948         JS_ASSERT(charSet->sense == JS_TRUE);
1951     while (src != end) {
1952         switch (*src) {
1953         case '\\':
1954             ++src;
1955             c = *src++;
1956             switch (c) {
1957             case 'b':
1958                 thisCh = 0x8;
1959                 break;
1960             case 'f':
1961                 thisCh = 0xC;
1962                 break;
1963             case 'n':
1964                 thisCh = 0xA;
1965                 break;
1966             case 'r':
1967                 thisCh = 0xD;
1968                 break;
1969             case 't':
1970                 thisCh = 0x9;
1971                 break;
1972             case 'v':
1973                 thisCh = 0xB;
1974                 break;
1975             case 'c':
1976                 if (src + 1 < end && JS_ISWORD(src[1])) {
1977                     thisCh = (jschar)(*src++ & 0x1F);
1978                 } else {
1979                     --src;
1980                     thisCh = '\\';
1981                 }
1982                 break;
1983             case 'x':
1984                 nDigits = 2;
1985                 goto lexHex;
1986             case 'u':
1987                 nDigits = 4;
1988             lexHex:
1989                 n = 0;
1990                 for (i = 0; (i < nDigits) && (src < end); i++) {
1991                     uintN digit;
1992                     c = *src++;
1993                     if (!isASCIIHexDigit(c, &digit)) {
1994                         /*
1995                          * Back off to accepting the original '\'
1996                          * as a literal
1997                          */
1998                         src -= i + 1;
1999                         n = '\\';
2000                         break;
2001                     }
2002                     n = (n << 4) | digit;
2003                 }
2004                 thisCh = (jschar)n;
2005                 break;
2006             case '0':
2007             case '1':
2008             case '2':
2009             case '3':
2010             case '4':
2011             case '5':
2012             case '6':
2013             case '7':
2014                 /*
2015                  *  This is a non-ECMA extension - decimal escapes (in this
2016                  *  case, octal!) are supposed to be an error inside class
2017                  *  ranges, but supported here for backwards compatibility.
2018                  *
2019                  */
2020                 n = JS7_UNDEC(c);
2021                 c = *src;
2022                 if ('0' <= c && c <= '7') {
2023                     src++;
2024                     n = 8 * n + JS7_UNDEC(c);
2025                     c = *src;
2026                     if ('0' <= c && c <= '7') {
2027                         src++;
2028                         i = 8 * n + JS7_UNDEC(c);
2029                         if (i <= 0377)
2030                             n = i;
2031                         else
2032                             src--;
2033                     }
2034                 }
2035                 thisCh = (jschar)n;
2036                 break;
2038             case 'd':
2039                 AddCharacterRangeToCharSet(charSet, '0', '9');
2040                 continue;   /* don't need range processing */
2041             case 'D':
2042                 AddCharacterRangeToCharSet(charSet, 0, '0' - 1);
2043                 AddCharacterRangeToCharSet(charSet,
2044                                            (jschar)('9' + 1),
2045                                            (jschar)charSet->length);
2046                 continue;
2047             case 's':
2048                 for (i = (intN)charSet->length; i >= 0; i--)
2049                     if (JS_ISSPACE(i))
2050                         AddCharacterToCharSet(charSet, (jschar)i);
2051                 continue;
2052             case 'S':
2053                 for (i = (intN)charSet->length; i >= 0; i--)
2054                     if (!JS_ISSPACE(i))
2055                         AddCharacterToCharSet(charSet, (jschar)i);
2056                 continue;
2057             case 'w':
2058                 for (i = (intN)charSet->length; i >= 0; i--)
2059                     if (JS_ISWORD(i))
2060                         AddCharacterToCharSet(charSet, (jschar)i);
2061                 continue;
2062             case 'W':
2063                 for (i = (intN)charSet->length; i >= 0; i--)
2064                     if (!JS_ISWORD(i))
2065                         AddCharacterToCharSet(charSet, (jschar)i);
2066                 continue;
2067             default:
2068                 thisCh = c;
2069                 break;
2071             }
2072             break;
2074         default:
2075             thisCh = *src++;
2076             break;
2078         }
2079         if (inRange) {
2080             if (gData->regexp->flags & JSREG_FOLD) {
2081                 AddCharacterRangeToCharSet(charSet, upcase(rangeStart),
2082                                                     upcase(thisCh));
2083                 AddCharacterRangeToCharSet(charSet, downcase(rangeStart),
2084                                                     downcase(thisCh));
2085             } else {
2086                 AddCharacterRangeToCharSet(charSet, rangeStart, thisCh);
2087             }
2088             inRange = JS_FALSE;
2089         } else {
2090             if (gData->regexp->flags & JSREG_FOLD) {
2091                 AddCharacterToCharSet(charSet, upcase(thisCh));
2092                 AddCharacterToCharSet(charSet, downcase(thisCh));
2093             } else {
2094                 AddCharacterToCharSet(charSet, thisCh);
2095             }
2096             if (src < end - 1) {
2097                 if (*src == '-') {
2098                     ++src;
2099                     inRange = JS_TRUE;
2100                     rangeStart = thisCh;
2101                 }
2102             }
2103         }
2104     }
2105     return JS_TRUE;
2108 void
2109 js_DestroyRegExp(JSContext *cx, JSRegExp *re)
2111     if (JS_ATOMIC_DECREMENT(&re->nrefs) == 0) {
2112         if (re->classList) {
2113             uintN i;
2114             for (i = 0; i < re->classCount; i++) {
2115                 if (re->classList[i].converted)
2116                     JS_free(cx, re->classList[i].u.bits);
2117                 re->classList[i].u.bits = NULL;
2118             }
2119             JS_free(cx, re->classList);
2120         }
2121         JS_free(cx, re);
2122     }
2125 static JSBool
2126 ReallocStateStack(REGlobalData *gData)
2128     uint16 limit = gData->stateStackLimit;
2129     size_t sz = sizeof(REProgState) * limit;
2131     JS_ARENA_GROW_CAST(gData->stateStack, REProgState *, &gData->pool, sz, sz);
2132     if (!gData->stateStack) {
2133         gData->ok = JS_FALSE;
2134         return JS_FALSE;
2135     }
2136     gData->stateStackLimit = limit + limit;
2137     return JS_TRUE;
2140 #define PUSH_STATE_STACK(data)                                                \
2141     JS_BEGIN_MACRO                                                            \
2142         ++(data)->stateStackTop;                                              \
2143         if ((data)->stateStackTop == (data)->stateStackLimit &&               \
2144             !ReallocStateStack((data))) {                                     \
2145             return NULL;                                                      \
2146         }                                                                     \
2147     JS_END_MACRO
2149 /*
2150  * Apply the current op against the given input to see if it's going to match
2151  * or fail. Return false if we don't get a match, true if we do and update the
2152  * state of the input and pc if the update flag is true.
2153  */
2154 static REMatchState *
2155 SimpleMatch(REGlobalData *gData, REMatchState *x, REOp op,
2156             jsbytecode **startpc, JSBool update)
2158     REMatchState *result = NULL;
2159     jschar matchCh;
2160     uintN parenIndex;
2161     intN offset, length, index;
2162     jsbytecode *pc = *startpc;  /* pc has already been incremented past op */
2163     jschar *source;
2164     const jschar *startcp = x->cp;
2165     jschar ch;
2166     RECharSet *charSet;
2168     switch (op) {
2169     case REOP_BOL:
2170         if (x->cp != gData->cpbegin) {
2171             if (!gData->cx->regExpStatics.multiline &&
2172                 !(gData->regexp->flags & JSREG_MULTILINE)) {
2173                 break;
2174             }
2175             if (!RE_IS_LINE_TERM(x->cp[-1]))
2176                 break;
2177         }
2178         result = x;
2179         break;
2180     case REOP_EOL:
2181         if (x->cp != gData->cpend) {
2182             if (!gData->cx->regExpStatics.multiline &&
2183                 !(gData->regexp->flags & JSREG_MULTILINE)) {
2184                 break;
2185             }
2186             if (!RE_IS_LINE_TERM(*x->cp))
2187                 break;
2188         }
2189         result = x;
2190         break;
2191     case REOP_WBDRY:
2192         if ((x->cp == gData->cpbegin || !JS_ISWORD(x->cp[-1])) ^
2193             !(x->cp != gData->cpend && JS_ISWORD(*x->cp))) {
2194             result = x;
2195         }
2196         break;
2197     case REOP_WNONBDRY:
2198         if ((x->cp == gData->cpbegin || !JS_ISWORD(x->cp[-1])) ^
2199             (x->cp != gData->cpend && JS_ISWORD(*x->cp))) {
2200             result = x;
2201         }
2202         break;
2203     case REOP_DOT:
2204         if (x->cp != gData->cpend && !RE_IS_LINE_TERM(*x->cp)) {
2205             result = x;
2206             result->cp++;
2207         }
2208         break;
2209     case REOP_DIGIT:
2210         if (x->cp != gData->cpend && JS_ISDIGIT(*x->cp)) {
2211             result = x;
2212             result->cp++;
2213         }
2214         break;
2215     case REOP_NONDIGIT:
2216         if (x->cp != gData->cpend && !JS_ISDIGIT(*x->cp)) {
2217             result = x;
2218             result->cp++;
2219         }
2220         break;
2221     case REOP_ALNUM:
2222         if (x->cp != gData->cpend && JS_ISWORD(*x->cp)) {
2223             result = x;
2224             result->cp++;
2225         }
2226         break;
2227     case REOP_NONALNUM:
2228         if (x->cp != gData->cpend && !JS_ISWORD(*x->cp)) {
2229             result = x;
2230             result->cp++;
2231         }
2232         break;
2233     case REOP_SPACE:
2234         if (x->cp != gData->cpend && JS_ISSPACE(*x->cp)) {
2235             result = x;
2236             result->cp++;
2237         }
2238         break;
2239     case REOP_NONSPACE:
2240         if (x->cp != gData->cpend && !JS_ISSPACE(*x->cp)) {
2241             result = x;
2242             result->cp++;
2243         }
2244         break;
2245     case REOP_BACKREF:
2246         parenIndex = GET_ARG(pc);
2247         pc += ARG_LEN;
2248         result = BackrefMatcher(gData, x, parenIndex);
2249         break;
2250     case REOP_FLAT:
2251         offset = GET_ARG(pc);
2252         pc += ARG_LEN;
2253         length = GET_ARG(pc);
2254         pc += ARG_LEN;
2255         source = JSSTRING_CHARS(gData->regexp->source) + offset;
2256         if (x->cp + length <= gData->cpend) {
2257             for (index = 0; index < length; index++) {
2258                 if (source[index] != x->cp[index])
2259                     return NULL;
2260             }
2261             x->cp += length;
2262             result = x;
2263         }
2264         break;
2265     case REOP_FLAT1:
2266         matchCh = *pc++;
2267         if (x->cp != gData->cpend && *x->cp == matchCh) {
2268             result = x;
2269             result->cp++;
2270         }
2271         break;
2272     case REOP_FLATi:
2273         offset = GET_ARG(pc);
2274         pc += ARG_LEN;
2275         length = GET_ARG(pc);
2276         pc += ARG_LEN;
2277         source = JSSTRING_CHARS(gData->regexp->source);
2278         result = FlatNIMatcher(gData, x, source + offset, length);
2279         break;
2280     case REOP_FLAT1i:
2281         matchCh = *pc++;
2282         if (x->cp != gData->cpend && upcase(*x->cp) == upcase(matchCh)) {
2283             result = x;
2284             result->cp++;
2285         }
2286         break;
2287     case REOP_UCFLAT1:
2288         matchCh = GET_ARG(pc);
2289         pc += ARG_LEN;
2290         if (x->cp != gData->cpend && *x->cp == matchCh) {
2291             result = x;
2292             result->cp++;
2293         }
2294         break;
2295     case REOP_UCFLAT1i:
2296         matchCh = GET_ARG(pc);
2297         pc += ARG_LEN;
2298         if (x->cp != gData->cpend && upcase(*x->cp) == upcase(matchCh)) {
2299             result = x;
2300             result->cp++;
2301         }
2302         break;
2303     case REOP_CLASS:
2304         index = GET_ARG(pc);
2305         pc += ARG_LEN;
2306         if (x->cp != gData->cpend) {
2307             charSet = &gData->regexp->classList[index];
2308             JS_ASSERT(charSet->converted);
2309             ch = *x->cp;
2310             index = ch >> 3;
2311             if (charSet->length != 0 &&
2312                 ch <= charSet->length &&
2313                 (charSet->u.bits[index] & (1 << (ch & 0x7)))) {
2314                 result = x;
2315                 result->cp++;
2316             }
2317         }
2318         break;
2319     case REOP_NCLASS:
2320         index = GET_ARG(pc);
2321         pc += ARG_LEN;
2322         if (x->cp != gData->cpend) {
2323             charSet = &gData->regexp->classList[index];
2324             JS_ASSERT(charSet->converted);
2325             ch = *x->cp;
2326             index = ch >> 3;
2327             if (charSet->length == 0 ||
2328                 ch > charSet->length ||
2329                 !(charSet->u.bits[index] & (1 << (ch & 0x7)))) {
2330                 result = x;
2331                 result->cp++;
2332             }
2333         }
2334         break;
2335     default:
2336         JS_ASSERT(JS_FALSE);
2337     }
2338     if (result) {
2339         if (update)
2340             *startpc = pc;
2341         else
2342             x->cp = startcp;
2343         return result;
2344     }
2345     x->cp = startcp;
2346     return NULL;
2349 static REMatchState *
2350 ExecuteREBytecode(REGlobalData *gData, REMatchState *x)
2352     REMatchState *result = NULL;
2353     REBackTrackData *backTrackData;
2354     intN offset;
2355     jsbytecode *nextpc;
2356     REOp nextop;
2357     RECapture *cap;
2358     REProgState *curState;
2359     const jschar *startcp;
2360     uintN parenIndex, k;
2361     uintN parenSoFar = 0;
2363     jschar matchCh1, matchCh2;
2364     RECharSet *charSet;
2366     JSBool anchor;
2367     jsbytecode *pc = gData->regexp->program;
2368     REOp op = (REOp) *pc++;
2370     /*
2371      * If the first node is a simple match, step the index into the string
2372      * until that match is made, or fail if it can't be found at all.
2373      */
2374     if (REOP_IS_SIMPLE(op)) {
2375         anchor = JS_FALSE;
2376         while (x->cp <= gData->cpend) {
2377             nextpc = pc;    /* reset back to start each time */
2378             result = SimpleMatch(gData, x, op, &nextpc, JS_TRUE);
2379             if (result) {
2380                 anchor = JS_TRUE;
2381                 x = result;
2382                 pc = nextpc;    /* accept skip to next opcode */
2383                 op = (REOp) *pc++;
2384                 break;
2385             }
2386             gData->skipped++;
2387             x->cp++;
2388         }
2389         if (!anchor)
2390             return NULL;
2391     }
2393     while (JS_TRUE) {
2394         if (REOP_IS_SIMPLE(op)) {
2395             result = SimpleMatch(gData, x, op, &pc, JS_TRUE);
2396         } else {
2397             curState = &gData->stateStack[gData->stateStackTop];
2398             switch (op) {
2399             case REOP_EMPTY:
2400                 result = x;
2401                 break;
2403             case REOP_ALTPREREQ2:
2404                 nextpc = pc + GET_OFFSET(pc);   /* start of next op */
2405                 pc += ARG_LEN;
2406                 matchCh2 = GET_ARG(pc);
2407                 pc += ARG_LEN;
2408                 k = GET_ARG(pc);
2409                 pc += ARG_LEN;
2411                 if (x->cp != gData->cpend) {
2412                     if (*x->cp == matchCh2)
2413                         goto doAlt;
2415                     charSet = &gData->regexp->classList[k];
2416                     if (!charSet->converted)
2417                         if (!ProcessCharSet(gData, charSet))
2418                             return NULL;
2419                     matchCh1 = *x->cp;
2420                     k = matchCh1 >> 3;
2421                     if ((charSet->length == 0 ||
2422                          matchCh1 > charSet->length ||
2423                          !(charSet->u.bits[k] & (1 << (matchCh1 & 0x7)))) ^
2424                         charSet->sense) {
2425                         goto doAlt;
2426                     }
2427                 }
2428                 result = NULL;
2429                 break;
2431             case REOP_ALTPREREQ:
2432                 nextpc = pc + GET_OFFSET(pc);   /* start of next op */
2433                 pc += ARG_LEN;
2434                 matchCh1 = GET_ARG(pc);
2435                 pc += ARG_LEN;
2436                 matchCh2 = GET_ARG(pc);
2437                 pc += ARG_LEN;
2438                 if (x->cp == gData->cpend ||
2439                     (*x->cp != matchCh1 && *x->cp != matchCh2)) {
2440                     result = NULL;
2441                     break;
2442                 }
2443                 /* else false thru... */
2445             case REOP_ALT:
2446             doAlt:
2447                 nextpc = pc + GET_OFFSET(pc);   /* start of next alternate */
2448                 pc += ARG_LEN;                  /* start of this alternate */
2449                 curState->parenSoFar = parenSoFar;
2450                 PUSH_STATE_STACK(gData);
2451                 op = (REOp) *pc++;
2452                 startcp = x->cp;
2453                 if (REOP_IS_SIMPLE(op)) {
2454                     if (!SimpleMatch(gData, x, op, &pc, JS_TRUE)) {
2455                         op = (REOp) *nextpc++;
2456                         pc = nextpc;
2457                         continue;
2458                     }
2459                     result = x;
2460                     op = (REOp) *pc++;
2461                 }
2462                 nextop = (REOp) *nextpc++;
2463                 if (!PushBackTrackState(gData, nextop, nextpc, x, startcp, 0, 0))
2464                     return NULL;
2465                 continue;
2467             /*
2468              * Occurs at (succesful) end of REOP_ALT,
2469              */
2470             case REOP_JUMP:
2471                 --gData->stateStackTop;
2472                 offset = GET_OFFSET(pc);
2473                 pc += offset;
2474                 op = (REOp) *pc++;
2475                 continue;
2477             /*
2478              * Occurs at last (succesful) end of REOP_ALT,
2479              */
2480             case REOP_ENDALT:
2481                 --gData->stateStackTop;
2482                 op = (REOp) *pc++;
2483                 continue;
2485             case REOP_LPAREN:
2486                 parenIndex = GET_ARG(pc);
2487                 if (parenIndex + 1 > parenSoFar)
2488                     parenSoFar = parenIndex + 1;
2489                 pc += ARG_LEN;
2490                 x->parens[parenIndex].index = x->cp - gData->cpbegin;
2491                 x->parens[parenIndex].length = 0;
2492                 op = (REOp) *pc++;
2493                 continue;
2494             case REOP_RPAREN:
2495                 parenIndex = GET_ARG(pc);
2496                 pc += ARG_LEN;
2497                 cap = &x->parens[parenIndex];
2498                 cap->length = x->cp - (gData->cpbegin + cap->index);
2499                 op = (REOp) *pc++;
2500                 continue;
2502             case REOP_ASSERT:
2503                 nextpc = pc + GET_OFFSET(pc);  /* start of term after ASSERT */
2504                 pc += ARG_LEN;                 /* start of ASSERT child */
2505                 op = (REOp) *pc++;
2506                 if (REOP_IS_SIMPLE(op) &&
2507                     !SimpleMatch(gData, x, op, &pc, JS_FALSE)) {
2508                     result = NULL;
2509                     break;
2510                 }
2511                 curState->u.assertion.top =
2512                     (char *)gData->backTrackSP - (char *)gData->backTrackStack;
2513                 curState->u.assertion.sz = gData->cursz;
2514                 curState->index = x->cp - gData->cpbegin;
2515                 curState->parenSoFar = parenSoFar;
2516                 PUSH_STATE_STACK(gData);
2517                 if (!PushBackTrackState(gData, REOP_ASSERTTEST,
2518                                         nextpc, x, x->cp, 0, 0)) {
2519                     return NULL;
2520                 }
2521                 continue;
2522             case REOP_ASSERT_NOT:
2523                 nextpc = pc + GET_OFFSET(pc);
2524                 pc += ARG_LEN;
2525                 op = (REOp) *pc++;
2526                 if (REOP_IS_SIMPLE(op) /* Note - fail to fail! */ &&
2527                     SimpleMatch(gData, x, op, &pc, JS_FALSE) &&
2528                     pc == nextpc) {
2529                     result = NULL;
2530                     break;
2531                 }
2532                 curState->u.assertion.top
2533                     = (char *)gData->backTrackSP -
2534                       (char *)gData->backTrackStack;
2535                 curState->u.assertion.sz = gData->cursz;
2536                 curState->index = x->cp - gData->cpbegin;
2537                 curState->parenSoFar = parenSoFar;
2538                 PUSH_STATE_STACK(gData);
2539                 if (!PushBackTrackState(gData, REOP_ASSERTNOTTEST,
2540                                         nextpc, x, x->cp, 0, 0))
2541                     return NULL;
2542                 continue;
2543             case REOP_ASSERTTEST:
2544                 --gData->stateStackTop;
2545                 --curState;
2546                 x->cp = gData->cpbegin + curState->index;
2547                 gData->backTrackSP =
2548                     (REBackTrackData *) ((char *)gData->backTrackStack +
2549                                          curState->u.assertion.top);
2550                 gData->cursz = curState->u.assertion.sz;
2551                 if (result)
2552                     result = x;
2553                 break;
2554             case REOP_ASSERTNOTTEST:
2555                 --gData->stateStackTop;
2556                 --curState;
2557                 x->cp = gData->cpbegin + curState->index;
2558                 gData->backTrackSP =
2559                     (REBackTrackData *) ((char *)gData->backTrackStack +
2560                                          curState->u.assertion.top);
2561                 gData->cursz = curState->u.assertion.sz;
2562                 result = (!result) ? x : NULL;
2563                 break;
2565             case REOP_END:
2566                 if (x)
2567                     return x;
2568                 break;
2570             case REOP_STAR:
2571                 curState->u.quantifier.min = 0;
2572                 curState->u.quantifier.max = (uint16)-1;
2573                 goto quantcommon;
2574             case REOP_PLUS:
2575                 curState->u.quantifier.min = 1;
2576                 curState->u.quantifier.max = (uint16)-1;
2577                 goto quantcommon;
2578             case REOP_OPT:
2579                 curState->u.quantifier.min = 0;
2580                 curState->u.quantifier.max = 1;
2581                 goto quantcommon;
2582             case REOP_QUANT:
2583                 curState->u.quantifier.min = GET_ARG(pc);
2584                 pc += ARG_LEN;
2585                 curState->u.quantifier.max = GET_ARG(pc);
2586                 pc += ARG_LEN;
2587             quantcommon:
2588                 if (curState->u.quantifier.max == 0) {
2589                     pc = pc + GET_OFFSET(pc);
2590                     op = (REOp) *pc++;
2591                     result = x;
2592                     continue;
2593                 }
2594                 /* Step over <next> */
2595                 nextpc = pc + ARG_LEN;
2596                 op = (REOp) *nextpc++;
2597                 startcp = x->cp;
2598                 if (REOP_IS_SIMPLE(op)) {
2599                     if (!SimpleMatch(gData, x, op, &nextpc, JS_TRUE)) {
2600                         if (curState->u.quantifier.min == 0)
2601                             result = x;
2602                         else
2603                             result = NULL;
2604                         pc = pc + GET_OFFSET(pc);
2605                         break;
2606                     }
2607                     op = (REOp) *nextpc++;
2608                     result = x;
2609                 }
2610                 curState->index = startcp - gData->cpbegin;
2611                 curState->continue_op = REOP_REPEAT;
2612                 curState->continue_pc = pc;
2613                 curState->parenSoFar = parenSoFar;
2614                 PUSH_STATE_STACK(gData);
2615                 if (curState->u.quantifier.min == 0 &&
2616                     !PushBackTrackState(gData, REOP_REPEAT, pc, x, startcp,
2617                                         0, 0)) {
2618                     return NULL;
2619                 }
2620                 pc = nextpc;
2621                 continue;
2623             case REOP_ENDCHILD: /* marks the end of a quantifier child */
2624                 pc = curState[-1].continue_pc;
2625                 op = curState[-1].continue_op;
2626                 continue;
2628             case REOP_REPEAT:
2629                 --curState;
2630                 do {
2631                     --gData->stateStackTop;
2632                     if (!result) {
2633                         /* Failed, see if we have enough children. */
2634                         if (curState->u.quantifier.min == 0)
2635                             goto repeatDone;
2636                         goto break_switch;
2637                     }
2638                     if (curState->u.quantifier.min == 0 &&
2639                         x->cp == gData->cpbegin + curState->index) {
2640                         /* matched an empty string, that'll get us nowhere */
2641                         result = NULL;
2642                         goto break_switch;
2643                     }
2644                     if (curState->u.quantifier.min != 0)
2645                         curState->u.quantifier.min--;
2646                     if (curState->u.quantifier.max != (uint16) -1)
2647                         curState->u.quantifier.max--;
2648                     if (curState->u.quantifier.max == 0)
2649                         goto repeatDone;
2650                     nextpc = pc + ARG_LEN;
2651                     nextop = (REOp) *nextpc;
2652                     startcp = x->cp;
2653                     if (REOP_IS_SIMPLE(nextop)) {
2654                         nextpc++;
2655                         if (!SimpleMatch(gData, x, nextop, &nextpc, JS_TRUE)) {
2656                             if (curState->u.quantifier.min == 0)
2657                                 goto repeatDone;
2658                             result = NULL;
2659                             goto break_switch;
2660                         }
2661                         result = x;
2662                     }
2663                     curState->index = startcp - gData->cpbegin;
2664                     PUSH_STATE_STACK(gData);
2665                     if (curState->u.quantifier.min == 0 &&
2666                         !PushBackTrackState(gData, REOP_REPEAT,
2667                                             pc, x, startcp,
2668                                             curState->parenSoFar,
2669                                             parenSoFar -
2670                                             curState->parenSoFar)) {
2671                         return NULL;
2672                     }
2673                 } while (*nextpc == REOP_ENDCHILD);
2674                 pc = nextpc;
2675                 op = (REOp) *pc++;
2676                 parenSoFar = curState->parenSoFar;
2677                 continue;
2679             repeatDone:
2680                 result = x;
2681                 pc += GET_OFFSET(pc);
2682                 goto break_switch;
2684             case REOP_MINIMALSTAR:
2685                 curState->u.quantifier.min = 0;
2686                 curState->u.quantifier.max = (uint16)-1;
2687                 goto minimalquantcommon;
2688             case REOP_MINIMALPLUS:
2689                 curState->u.quantifier.min = 1;
2690                 curState->u.quantifier.max = (uint16)-1;
2691                 goto minimalquantcommon;
2692             case REOP_MINIMALOPT:
2693                 curState->u.quantifier.min = 0;
2694                 curState->u.quantifier.max = 1;
2695                 goto minimalquantcommon;
2696             case REOP_MINIMALQUANT:
2697                 curState->u.quantifier.min = GET_ARG(pc);
2698                 pc += ARG_LEN;
2699                 curState->u.quantifier.max = GET_ARG(pc);
2700                 pc += ARG_LEN;
2701             minimalquantcommon:
2702                 curState->index = x->cp - gData->cpbegin;
2703                 curState->parenSoFar = parenSoFar;
2704                 PUSH_STATE_STACK(gData);
2705                 if (curState->u.quantifier.min != 0) {
2706                     curState->continue_op = REOP_MINIMALREPEAT;
2707                     curState->continue_pc = pc;
2708                     /* step over <next> */
2709                     pc += ARG_LEN;
2710                     op = (REOp) *pc++;
2711                 } else {
2712                     if (!PushBackTrackState(gData, REOP_MINIMALREPEAT,
2713                                             pc, x, x->cp, 0, 0)) {
2714                         return NULL;
2715                     }
2716                     --gData->stateStackTop;
2717                     pc = pc + GET_OFFSET(pc);
2718                     op = (REOp) *pc++;
2719                 }
2720                 continue;
2722             case REOP_MINIMALREPEAT:
2723                 --gData->stateStackTop;
2724                 --curState;
2726                 if (!result) {
2727                     /*
2728                      * Non-greedy failure - try to consume another child.
2729                      */
2730                     if (curState->u.quantifier.max == (uint16) -1 ||
2731                         curState->u.quantifier.max > 0) {
2732                         curState->index = x->cp - gData->cpbegin;
2733                         curState->continue_op = REOP_MINIMALREPEAT;
2734                         curState->continue_pc = pc;
2735                         pc += ARG_LEN;
2736                         for (k = curState->parenSoFar; k < parenSoFar; k++)
2737                             x->parens[k].index = -1;
2738                         PUSH_STATE_STACK(gData);
2739                         op = (REOp) *pc++;
2740                         continue;
2741                     }
2742                     /* Don't need to adjust pc since we're going to pop. */
2743                     break;
2744                 }
2745                 if (curState->u.quantifier.min == 0 &&
2746                     x->cp == gData->cpbegin + curState->index) {
2747                     /* Matched an empty string, that'll get us nowhere. */
2748                     result = NULL;
2749                     break;
2750                 }
2751                 if (curState->u.quantifier.min != 0)
2752                     curState->u.quantifier.min--;
2753                 if (curState->u.quantifier.max != (uint16) -1)
2754                     curState->u.quantifier.max--;
2755                 if (curState->u.quantifier.min != 0) {
2756                     curState->continue_op = REOP_MINIMALREPEAT;
2757                     curState->continue_pc = pc;
2758                     pc += ARG_LEN;
2759                     for (k = curState->parenSoFar; k < parenSoFar; k++)
2760                         x->parens[k].index = -1;
2761                     curState->index = x->cp - gData->cpbegin;
2762                     PUSH_STATE_STACK(gData);
2763                     op = (REOp) *pc++;
2764                     continue;
2765                 }
2766                 curState->index = x->cp - gData->cpbegin;
2767                 curState->parenSoFar = parenSoFar;
2768                 PUSH_STATE_STACK(gData);
2769                 if (!PushBackTrackState(gData, REOP_MINIMALREPEAT,
2770                                         pc, x, x->cp,
2771                                         curState->parenSoFar,
2772                                         parenSoFar - curState->parenSoFar)) {
2773                     return NULL;
2774                 }
2775                 --gData->stateStackTop;
2776                 pc = pc + GET_OFFSET(pc);
2777                 op = (REOp) *pc++;
2778                 continue;
2780             default:
2781                 JS_ASSERT(JS_FALSE);
2782                 result = NULL;
2783             }
2784         break_switch:;
2785         }
2786         /*
2787          *  If the match failed and there's a backtrack option, take it.
2788          *  Otherwise this is a complete and utter failure.
2789          */
2790         if (!result) {
2791             if (gData->cursz == 0)
2792                 return NULL;
2793             backTrackData = gData->backTrackSP;
2794             gData->cursz = backTrackData->sz;
2795             gData->backTrackSP =
2796                 (REBackTrackData *) ((char *)backTrackData - backTrackData->sz);
2797             x->cp = backTrackData->cp;
2798             pc = backTrackData->backtrack_pc;
2799             op = backTrackData->backtrack_op;
2800             gData->stateStackTop = backTrackData->saveStateStackTop;
2801             JS_ASSERT(gData->stateStackTop);
2803             memcpy(gData->stateStack, backTrackData + 1,
2804                    sizeof(REProgState) * backTrackData->saveStateStackTop);
2805             curState = &gData->stateStack[gData->stateStackTop - 1];
2807             if (backTrackData->parenCount) {
2808                 memcpy(&x->parens[backTrackData->parenIndex],
2809                        (char *)(backTrackData + 1) +
2810                        sizeof(REProgState) * backTrackData->saveStateStackTop,
2811                        sizeof(RECapture) * backTrackData->parenCount);
2812                 parenSoFar = backTrackData->parenIndex + backTrackData->parenCount;
2813             } else {
2814                 for (k = curState->parenSoFar; k < parenSoFar; k++)
2815                     x->parens[k].index = -1;
2816                 parenSoFar = curState->parenSoFar;
2817             }
2818             continue;
2819         }
2820         x = result;
2822         /*
2823          *  Continue with the expression.
2824          */
2825         op = (REOp)*pc++;
2826     }
2827     return NULL;
2830 static REMatchState *
2831 MatchRegExp(REGlobalData *gData, REMatchState *x)
2833     REMatchState *result;
2834     const jschar *cp = x->cp;
2835     const jschar *cp2;
2836     uintN j;
2838     /*
2839      * Have to include the position beyond the last character
2840      * in order to detect end-of-input/line condition.
2841      */
2842     for (cp2 = cp; cp2 <= gData->cpend; cp2++) {
2843         gData->skipped = cp2 - cp;
2844         x->cp = cp2;
2845         for (j = 0; j < gData->regexp->parenCount; j++)
2846             x->parens[j].index = -1;
2847         result = ExecuteREBytecode(gData, x);
2848         if (!gData->ok || result)
2849             return result;
2850         gData->backTrackSP = gData->backTrackStack;
2851         gData->cursz = 0;
2852         gData->stateStackTop = 0;
2853         cp2 = cp + gData->skipped;
2854     }
2855     return NULL;
2859 static REMatchState *
2860 InitMatch(JSContext *cx, REGlobalData *gData, JSRegExp *re)
2862     REMatchState *result;
2863     uintN i;
2865     gData->backTrackStackSize = INITIAL_BACKTRACK;
2866     JS_ARENA_ALLOCATE_CAST(gData->backTrackStack, REBackTrackData *,
2867                            &gData->pool,
2868                            INITIAL_BACKTRACK);
2869     if (!gData->backTrackStack)
2870         return NULL;
2871     gData->backTrackSP = gData->backTrackStack;
2872     gData->cursz = 0;
2875     gData->stateStackLimit = INITIAL_STATESTACK;
2876     JS_ARENA_ALLOCATE_CAST(gData->stateStack, REProgState *,
2877                            &gData->pool,
2878                            sizeof(REProgState) * INITIAL_STATESTACK);
2879     if (!gData->stateStack)
2880         return NULL;
2881     gData->stateStackTop = 0;
2883     gData->cx = cx;
2884     gData->regexp = re;
2885     gData->ok = JS_TRUE;
2887     JS_ARENA_ALLOCATE_CAST(result, REMatchState *,
2888                            &gData->pool,
2889                            sizeof(REMatchState)
2890                                + (re->parenCount - 1) * sizeof(RECapture));
2891     if (!result)
2892         return NULL;
2894     for (i = 0; i < re->classCount; i++)
2895         if (!re->classList[i].converted)
2896             if (!ProcessCharSet(gData, &re->classList[i]))
2897                 return NULL;
2899     return result;
2902 JSBool
2903 js_ExecuteRegExp(JSContext *cx, JSRegExp *re, JSString *str, size_t *indexp,
2904                  JSBool test, jsval *rval)
2906     REGlobalData gData;
2907     REMatchState *x, *result;
2909     const jschar *cp, *ep;
2910     size_t i, length, start;
2911     JSSubString *morepar;
2912     JSBool ok;
2913     JSRegExpStatics *res;
2914     ptrdiff_t matchlen;
2915     uintN num, morenum;
2916     JSString *parstr, *matchstr;
2917     JSObject *obj;
2919     RECapture *parsub = NULL;
2921     /*
2922      * It's safe to load from cp because JSStrings have a zero at the end,
2923      * and we never let cp get beyond cpend.
2924      */
2925     start = *indexp;
2926     length = JSSTRING_LENGTH(str);
2927     if (start > length)
2928         start = length;
2929     cp = JSSTRING_CHARS(str);
2930     gData.cpbegin = cp;
2931     gData.cpend = cp + length;
2932     cp += start;
2933     gData.start = start;
2934     gData.skipped = 0;
2936     JS_InitArenaPool(&gData.pool, "RegExpPool", 8096, 4);
2937     x = InitMatch(cx, &gData, re);
2938     if (!x)
2939         return JS_FALSE;
2940     x->cp = cp;
2942     /*
2943      * Call the recursive matcher to do the real work.  Return null on mismatch
2944      * whether testing or not.  On match, return an extended Array object.
2945      */
2946     result = MatchRegExp(&gData, x);
2947     ok = gData.ok;
2948     if (!ok)
2949         goto out;
2950     if (!result) {
2951         *rval = JSVAL_NULL;
2952         goto out;
2953     }
2954     cp = result->cp;
2955     i = PTRDIFF(cp, gData.cpbegin, jschar);
2956     *indexp = i;
2957     matchlen = i - (start + gData.skipped);
2958     ep = cp;
2959     cp -= matchlen;
2961     if (test) {
2962         /*
2963          * Testing for a match and updating cx->regExpStatics: don't allocate
2964          * an array object, do return true.
2965          */
2966         *rval = JSVAL_TRUE;
2968         /* Avoid warning.  (gcc doesn't detect that obj is needed iff !test); */
2969         obj = NULL;
2970     } else {
2971         /*
2972          * The array returned on match has element 0 bound to the matched
2973          * string, elements 1 through state.parenCount bound to the paren
2974          * matches, an index property telling the length of the left context,
2975          * and an input property referring to the input string.
2976          */
2977         obj = js_NewArrayObject(cx, 0, NULL);
2978         if (!obj) {
2979             ok = JS_FALSE;
2980             goto out;
2981         }
2982         *rval = OBJECT_TO_JSVAL(obj);
2984 #define DEFVAL(val, id) {                                                     \
2985     ok = js_DefineProperty(cx, obj, id, val,                                  \
2986                            JS_PropertyStub, JS_PropertyStub,                  \
2987                            JSPROP_ENUMERATE, NULL);                           \
2988     if (!ok) {                                                                \
2989         cx->newborn[GCX_OBJECT] = NULL;                                       \
2990         cx->newborn[GCX_STRING] = NULL;                                       \
2991         goto out;                                                             \
2992     }                                                                         \
2995         matchstr = js_NewStringCopyN(cx, cp, matchlen, 0);
2996         if (!matchstr) {
2997             cx->newborn[GCX_OBJECT] = NULL;
2998             ok = JS_FALSE;
2999             goto out;
3000         }
3001         DEFVAL(STRING_TO_JSVAL(matchstr), INT_TO_JSVAL(0));
3002     }
3004     res = &cx->regExpStatics;
3005     res->input = str;
3006     res->parenCount = re->parenCount;
3007     if (re->parenCount == 0) {
3008         res->lastParen = js_EmptySubString;
3009     } else {
3010         for (num = 0; num < re->parenCount; num++) {
3011             parsub = &result->parens[num];
3012             if (num < 9) {
3013                 if (parsub->index == -1) {
3014                     res->parens[num].chars = NULL;
3015                     res->parens[num].length = 0;
3016                 } else {
3017                     res->parens[num].chars = gData.cpbegin + parsub->index;
3018                     res->parens[num].length = parsub->length;
3019                 }
3020             } else {
3021                 morenum = num - 9;
3022                 morepar = res->moreParens;
3023                 if (!morepar) {
3024                     res->moreLength = 10;
3025                     morepar = (JSSubString*)
3026                         JS_malloc(cx, 10 * sizeof(JSSubString));
3027                 } else if (morenum >= res->moreLength) {
3028                     res->moreLength += 10;
3029                     morepar = (JSSubString*)
3030                         JS_realloc(cx, morepar, res->moreLength *
3031                                                 sizeof(JSSubString));
3032                 }
3033                 if (!morepar) {
3034                     cx->newborn[GCX_OBJECT] = NULL;
3035                     cx->newborn[GCX_STRING] = NULL;
3036                     ok = JS_FALSE;
3037                     goto out;
3038                 }
3039                 res->moreParens = morepar;
3040                 if (parsub->index == -1) {
3041                     morepar[morenum].chars = NULL;
3042                     morepar[morenum].length = 0;
3043                 } else {
3044                     morepar[morenum].chars = gData.cpbegin + parsub->index;
3045                     morepar[morenum].length = parsub->length;
3046                 }
3047             }
3048             if (test)
3049                 continue;
3050             if (parsub->index == -1) {
3051                 ok = js_DefineProperty(cx, obj, INT_TO_JSVAL(num + 1),
3052                                        JSVAL_VOID, NULL, NULL,
3053                                        JSPROP_ENUMERATE, NULL);
3054             } else {
3055                 parstr = js_NewStringCopyN(cx, gData.cpbegin + parsub->index,
3056                                            parsub->length, 0);
3057                 if (!parstr) {
3058                     cx->newborn[GCX_OBJECT] = NULL;
3059                     cx->newborn[GCX_STRING] = NULL;
3060                     ok = JS_FALSE;
3061                     goto out;
3062                 }
3063                 ok = js_DefineProperty(cx, obj, INT_TO_JSVAL(num + 1),
3064                                        STRING_TO_JSVAL(parstr), NULL, NULL,
3065                                        JSPROP_ENUMERATE, NULL);
3066             }
3067             if (!ok) {
3068                 cx->newborn[GCX_OBJECT] = NULL;
3069                 cx->newborn[GCX_STRING] = NULL;
3070                 goto out;
3071             }
3072         }
3073         if (parsub->index == -1) {
3074             res->lastParen = js_EmptySubString;
3075         } else {
3076             res->lastParen.chars = gData.cpbegin + parsub->index;
3077             res->lastParen.length = parsub->length;
3078         }
3079     }
3081     if (!test) {
3082         /*
3083          * Define the index and input properties last for better for/in loop
3084          * order (so they come after the elements).
3085          */
3086         DEFVAL(INT_TO_JSVAL(start + gData.skipped),
3087                (jsid)cx->runtime->atomState.indexAtom);
3088         DEFVAL(STRING_TO_JSVAL(str),
3089                (jsid)cx->runtime->atomState.inputAtom);
3090     }
3092 #undef DEFVAL
3094     res->lastMatch.chars = cp;
3095     res->lastMatch.length = matchlen;
3096     if (cx->version == JSVERSION_1_2) {
3097         /*
3098          * JS1.2 emulated Perl4.0.1.8 (patch level 36) for global regexps used
3099          * in scalar contexts, and unintentionally for the string.match "list"
3100          * psuedo-context.  On "hi there bye", the following would result:
3101          *
3102          * Language     while(/ /g){print("$`");}   s/ /$`/g
3103          * perl4.036    "hi", "there"               "hihitherehi therebye"
3104          * perl5        "hi", "hi there"            "hihitherehi therebye"
3105          * js1.2        "hi", "there"               "hihitheretherebye"
3106          */
3107         res->leftContext.chars = JSSTRING_CHARS(str) + start;
3108         res->leftContext.length = gData.skipped;
3109     } else {
3110         /*
3111          * For JS1.3 and ECMAv2, emulate Perl5 exactly:
3112          *
3113          * js1.3        "hi", "hi there"            "hihitherehi therebye"
3114          */
3115         res->leftContext.chars = JSSTRING_CHARS(str);
3116         res->leftContext.length = start + gData.skipped;
3117     }
3118     res->rightContext.chars = ep;
3119     res->rightContext.length = gData.cpend - ep;
3121 out:
3122     JS_FinishArenaPool(&gData.pool);
3123     return ok;
3126 /************************************************************************/
3128 enum regexp_tinyid {
3129     REGEXP_SOURCE       = -1,
3130     REGEXP_GLOBAL       = -2,
3131     REGEXP_IGNORE_CASE  = -3,
3132     REGEXP_LAST_INDEX   = -4,
3133     REGEXP_MULTILINE    = -5
3134 };
3136 #define REGEXP_PROP_ATTRS (JSPROP_PERMANENT|JSPROP_SHARED)
3138 static JSPropertySpec regexp_props[] = {
3139     {"source",     REGEXP_SOURCE,      REGEXP_PROP_ATTRS | JSPROP_READONLY,0,0},
3140     {"global",     REGEXP_GLOBAL,      REGEXP_PROP_ATTRS | JSPROP_READONLY,0,0},
3141     {"ignoreCase", REGEXP_IGNORE_CASE, REGEXP_PROP_ATTRS | JSPROP_READONLY,0,0},
3142     {"lastIndex",  REGEXP_LAST_INDEX,  REGEXP_PROP_ATTRS,0,0},
3143     {"multiline",  REGEXP_MULTILINE,   REGEXP_PROP_ATTRS | JSPROP_READONLY,0,0},
3144     {0,0,0,0,0}
3145 };
3147 static JSBool
3148 regexp_getProperty(JSContext *cx, JSObject *obj, jsval id, jsval *vp)
3150     jsint slot;
3151     JSRegExp *re;
3153     if (!JSVAL_IS_INT(id))
3154         return JS_TRUE;
3155     slot = JSVAL_TO_INT(id);
3156     if (slot == REGEXP_LAST_INDEX)
3157         return JS_GetReservedSlot(cx, obj, 0, vp);
3159     JS_LOCK_OBJ(cx, obj);
3160     re = (JSRegExp *) JS_GetInstancePrivate(cx, obj, &js_RegExpClass, NULL);
3161     if (re) {
3162         switch (slot) {
3163           case REGEXP_SOURCE:
3164             *vp = STRING_TO_JSVAL(re->source);
3165             break;
3166           case REGEXP_GLOBAL:
3167             *vp = BOOLEAN_TO_JSVAL((re->flags & JSREG_GLOB) != 0);
3168             break;
3169           case REGEXP_IGNORE_CASE:
3170             *vp = BOOLEAN_TO_JSVAL((re->flags & JSREG_FOLD) != 0);
3171             break;
3172           case REGEXP_MULTILINE:
3173             *vp = BOOLEAN_TO_JSVAL((re->flags & JSREG_MULTILINE) != 0);
3174             break;
3175         }
3176     }
3177     JS_UNLOCK_OBJ(cx, obj);
3178     return JS_TRUE;
3181 static JSBool
3182 regexp_setProperty(JSContext *cx, JSObject *obj, jsval id, jsval *vp)
3184     JSBool ok;
3185     jsint slot;
3186     jsdouble lastIndex;
3188     ok = JS_TRUE;
3189     if (!JSVAL_IS_INT(id))
3190         return ok;
3191     slot = JSVAL_TO_INT(id);
3192     if (slot == REGEXP_LAST_INDEX) {
3193         if (!js_ValueToNumber(cx, *vp, &lastIndex))
3194             return JS_FALSE;
3195         lastIndex = js_DoubleToInteger(lastIndex);
3196         ok = js_NewNumberValue(cx, lastIndex, vp) &&
3197              JS_SetReservedSlot(cx, obj, 0, *vp);
3198     }
3199     return ok;
3202 /*
3203  * RegExp class static properties and their Perl counterparts:
3204  *
3205  *  RegExp.input                $_
3206  *  RegExp.multiline            $*
3207  *  RegExp.lastMatch            $&
3208  *  RegExp.lastParen            $+
3209  *  RegExp.leftContext          $`
3210  *  RegExp.rightContext         $'
3211  */
3212 enum regexp_static_tinyid {
3213     REGEXP_STATIC_INPUT         = -1,
3214     REGEXP_STATIC_MULTILINE     = -2,
3215     REGEXP_STATIC_LAST_MATCH    = -3,
3216     REGEXP_STATIC_LAST_PAREN    = -4,
3217     REGEXP_STATIC_LEFT_CONTEXT  = -5,
3218     REGEXP_STATIC_RIGHT_CONTEXT = -6
3219 };
3221 JSBool
3222 js_InitRegExpStatics(JSContext *cx, JSRegExpStatics *res)
3224     JS_ClearRegExpStatics(cx);
3225     return js_AddRoot(cx, &res->input, "res->input");
3228 void
3229 js_FreeRegExpStatics(JSContext *cx, JSRegExpStatics *res)
3231     if (res->moreParens) {
3232         JS_free(cx, res->moreParens);
3233         res->moreParens = NULL;
3234     }
3235     js_RemoveRoot(cx->runtime, &res->input);
3238 static JSBool
3239 regexp_static_getProperty(JSContext *cx, JSObject *obj, jsval id, jsval *vp)
3241     jsint slot;
3242     JSRegExpStatics *res;
3243     JSString *str;
3244     JSSubString *sub;
3246     res = &cx->regExpStatics;
3247     if (!JSVAL_IS_INT(id))
3248         return JS_TRUE;
3249     slot = JSVAL_TO_INT(id);
3250     switch (slot) {
3251       case REGEXP_STATIC_INPUT:
3252         *vp = res->input ? STRING_TO_JSVAL(res->input)
3253                          : JS_GetEmptyStringValue(cx);
3254         return JS_TRUE;
3255       case REGEXP_STATIC_MULTILINE:
3256         *vp = BOOLEAN_TO_JSVAL(res->multiline);
3257         return JS_TRUE;
3258       case REGEXP_STATIC_LAST_MATCH:
3259         sub = &res->lastMatch;
3260         break;
3261       case REGEXP_STATIC_LAST_PAREN:
3262         sub = &res->lastParen;
3263         break;
3264       case REGEXP_STATIC_LEFT_CONTEXT:
3265         sub = &res->leftContext;
3266         break;
3267       case REGEXP_STATIC_RIGHT_CONTEXT:
3268         sub = &res->rightContext;
3269         break;
3270       default:
3271         sub = REGEXP_PAREN_SUBSTRING(res, slot);
3272         break;
3273     }
3274     str = js_NewStringCopyN(cx, sub->chars, sub->length, 0);
3275     if (!str)
3276         return JS_FALSE;
3277     *vp = STRING_TO_JSVAL(str);
3278     return JS_TRUE;
3281 static JSBool
3282 regexp_static_setProperty(JSContext *cx, JSObject *obj, jsval id, jsval *vp)
3284     JSRegExpStatics *res;
3286     if (!JSVAL_IS_INT(id))
3287         return JS_TRUE;
3288     res = &cx->regExpStatics;
3289     /* XXX use if-else rather than switch to keep MSVC1.52 from crashing */
3290     if (JSVAL_TO_INT(id) == REGEXP_STATIC_INPUT) {
3291         if (!JSVAL_IS_STRING(*vp) &&
3292             !JS_ConvertValue(cx, *vp, JSTYPE_STRING, vp)) {
3293             return JS_FALSE;
3294         }
3295         res->input = JSVAL_TO_STRING(*vp);
3296     } else if (JSVAL_TO_INT(id) == REGEXP_STATIC_MULTILINE) {
3297         if (!JSVAL_IS_BOOLEAN(*vp) &&
3298             !JS_ConvertValue(cx, *vp, JSTYPE_BOOLEAN, vp)) {
3299             return JS_FALSE;
3300         }
3301         res->multiline = JSVAL_TO_BOOLEAN(*vp);
3302     }
3303     return JS_TRUE;
3306 static JSPropertySpec regexp_static_props[] = {
3307     {"input",
3308      REGEXP_STATIC_INPUT,
3309      JSPROP_ENUMERATE|JSPROP_SHARED,
3310      regexp_static_getProperty,    regexp_static_setProperty},
3311     {"multiline",
3312      REGEXP_STATIC_MULTILINE,
3313      JSPROP_ENUMERATE|JSPROP_SHARED,
3314      regexp_static_getProperty,    regexp_static_setProperty},
3315     {"lastMatch",
3316      REGEXP_STATIC_LAST_MATCH,
3317      JSPROP_ENUMERATE|JSPROP_READONLY|JSPROP_SHARED,
3318      regexp_static_getProperty,    regexp_static_getProperty},
3319     {"lastParen",
3320      REGEXP_STATIC_LAST_PAREN,
3321      JSPROP_ENUMERATE|JSPROP_READONLY|JSPROP_SHARED,
3322      regexp_static_getProperty,    regexp_static_getProperty},
3323     {"leftContext",
3324      REGEXP_STATIC_LEFT_CONTEXT,
3325      JSPROP_ENUMERATE|JSPROP_READONLY|JSPROP_SHARED,
3326      regexp_static_getProperty,    regexp_static_getProperty},
3327     {"rightContext",
3328      REGEXP_STATIC_RIGHT_CONTEXT,
3329      JSPROP_ENUMERATE|JSPROP_READONLY|JSPROP_SHARED,
3330      regexp_static_getProperty,    regexp_static_getProperty},
3332     /* XXX should have block scope and local $1, etc. */
3333     {"$1", 0, JSPROP_ENUMERATE|JSPROP_READONLY|JSPROP_SHARED,
3334      regexp_static_getProperty,    regexp_static_getProperty},
3335     {"$2", 1, JSPROP_ENUMERATE|JSPROP_READONLY|JSPROP_SHARED,
3336      regexp_static_getProperty,    regexp_static_getProperty},
3337     {"$3", 2, JSPROP_ENUMERATE|JSPROP_READONLY|JSPROP_SHARED,
3338      regexp_static_getProperty,    regexp_static_getProperty},
3339     {"$4", 3, JSPROP_ENUMERATE|JSPROP_READONLY|JSPROP_SHARED,
3340      regexp_static_getProperty,    regexp_static_getProperty},
3341     {"$5", 4, JSPROP_ENUMERATE|JSPROP_READONLY|JSPROP_SHARED,
3342      regexp_static_getProperty,    regexp_static_getProperty},
3343     {"$6", 5, JSPROP_ENUMERATE|JSPROP_READONLY|JSPROP_SHARED,
3344      regexp_static_getProperty,    regexp_static_getProperty},
3345     {"$7", 6, JSPROP_ENUMERATE|JSPROP_READONLY|JSPROP_SHARED,
3346      regexp_static_getProperty,    regexp_static_getProperty},
3347     {"$8", 7, JSPROP_ENUMERATE|JSPROP_READONLY|JSPROP_SHARED,
3348      regexp_static_getProperty,    regexp_static_getProperty},
3349     {"$9", 8, JSPROP_ENUMERATE|JSPROP_READONLY|JSPROP_SHARED,
3350      regexp_static_getProperty,    regexp_static_getProperty},
3352     {0,0,0,0,0}
3353 };
3355 static void
3356 regexp_finalize(JSContext *cx, JSObject *obj)
3358     JSRegExp *re;
3360     re = (JSRegExp *) JS_GetPrivate(cx, obj);
3361     if (!re)
3362         return;
3363     js_DestroyRegExp(cx, re);
3366 /* Forward static prototype. */
3367 static JSBool
3368 regexp_exec(JSContext *cx, JSObject *obj, uintN argc, jsval *argv,
3369             jsval *rval);
3371 static JSBool
3372 regexp_call(JSContext *cx, JSObject *obj, uintN argc, jsval *argv, jsval *rval)
3374     return regexp_exec(cx, JSVAL_TO_OBJECT(argv[-2]), argc, argv, rval);
3377 #if JS_HAS_XDR
3379 #include "jsxdrapi.h"
3381 static JSBool
3382 regexp_xdrObject(JSXDRState *xdr, JSObject **objp)
3384     JSRegExp *re;
3385     JSString *source;
3386     uint32 flagsword;
3387     JSObject *obj;
3389     if (xdr->mode == JSXDR_ENCODE) {
3390         re = (JSRegExp *) JS_GetPrivate(xdr->cx, *objp);
3391         if (!re)
3392             return JS_FALSE;
3393         source = re->source;
3394         flagsword = ((uint32)re->cloneIndex << 16) | re->flags;
3395     }
3396     if (!JS_XDRString(xdr, &source) ||
3397         !JS_XDRUint32(xdr, &flagsword)) {
3398         return JS_FALSE;
3399     }
3400     if (xdr->mode == JSXDR_DECODE) {
3401         obj = js_NewObject(xdr->cx, &js_RegExpClass, NULL, NULL);
3402         if (!obj)
3403             return JS_FALSE;
3404         re = js_NewRegExp(xdr->cx, NULL, source, (uint16)flagsword, JS_FALSE);
3405         if (!re)
3406             return JS_FALSE;
3407         if (!JS_SetPrivate(xdr->cx, obj, re) ||
3408             !js_SetLastIndex(xdr->cx, obj, 0)) {
3409             js_DestroyRegExp(xdr->cx, re);
3410             return JS_FALSE;
3411         }
3412         re->cloneIndex = (uint16)(flagsword >> 16);
3413         *objp = obj;
3414     }
3415     return JS_TRUE;
3418 #else  /* !JS_HAS_XDR */
3420 #define regexp_xdrObject NULL
3422 #endif /* !JS_HAS_XDR */
3424 static uint32
3425 regexp_mark(JSContext *cx, JSObject *obj, void *arg)
3427     JSRegExp *re = (JSRegExp *) JS_GetPrivate(cx, obj);
3428     if (re)
3429         JS_MarkGCThing(cx, re->source, "source", arg);
3430     return 0;
3433 JSClass js_RegExpClass = {
3434     js_RegExp_str,
3435     JSCLASS_HAS_PRIVATE | JSCLASS_HAS_RESERVED_SLOTS(1),
3436     JS_PropertyStub,  JS_PropertyStub,  regexp_getProperty, regexp_setProperty,
3437     JS_EnumerateStub, JS_ResolveStub,   JS_ConvertStub,     regexp_finalize,
3438     NULL,             NULL,             regexp_call,        NULL,
3439     regexp_xdrObject, NULL,             regexp_mark,        0
3440 };
3442 static const jschar empty_regexp_ucstr[] = {'(', '?', ':', ')', 0};
3444 JSBool
3445 js_regexp_toString(JSContext *cx, JSObject *obj, uintN argc, jsval *argv,
3446                    jsval *rval)
3448     JSRegExp *re;
3449     const jschar *source;
3450     jschar *chars;
3451     size_t length, nflags;
3452     uintN flags;
3453     JSString *str;
3455     if (!JS_InstanceOf(cx, obj, &js_RegExpClass, argv))
3456         return JS_FALSE;
3457     JS_LOCK_OBJ(cx, obj);
3458     re = (JSRegExp *) JS_GetPrivate(cx, obj);
3459     if (!re) {
3460         JS_UNLOCK_OBJ(cx, obj);
3461         *rval = STRING_TO_JSVAL(cx->runtime->emptyString);
3462         return JS_TRUE;
3463     }
3465     source = JSSTRING_CHARS(re->source);
3466     length = JSSTRING_LENGTH(re->source);
3467     if (length == 0) {
3468         source = empty_regexp_ucstr;
3469         length = sizeof(empty_regexp_ucstr) / sizeof(jschar) - 1;
3470     }
3471     length += 2;
3472     nflags = 0;
3473     for (flags = re->flags; flags != 0; flags &= flags - 1)
3474         nflags++;
3475     chars = (jschar*) JS_malloc(cx, (length + nflags + 1) * sizeof(jschar));
3476     if (!chars) {
3477         JS_UNLOCK_OBJ(cx, obj);
3478         return JS_FALSE;
3479     }
3481     chars[0] = '/';
3482     js_strncpy(&chars[1], source, length - 2);
3483     chars[length-1] = '/';
3484     if (nflags) {
3485         if (re->flags & JSREG_GLOB)
3486             chars[length++] = 'g';
3487         if (re->flags & JSREG_FOLD)
3488             chars[length++] = 'i';
3489         if (re->flags & JSREG_MULTILINE)
3490             chars[length++] = 'm';
3491     }
3492     JS_UNLOCK_OBJ(cx, obj);
3493     chars[length] = 0;
3495     str = js_NewString(cx, chars, length, 0);
3496     if (!str) {
3497         JS_free(cx, chars);
3498         return JS_FALSE;
3499     }
3500     *rval = STRING_TO_JSVAL(str);
3501     return JS_TRUE;
3504 static JSBool
3505 regexp_compile(JSContext *cx, JSObject *obj, uintN argc, jsval *argv,
3506                jsval *rval)
3508     JSString *opt, *str;
3509     JSRegExp *oldre, *re;
3510     JSBool ok, ok2;
3511     JSObject *obj2;
3512     size_t length, nbytes;
3513     const jschar *cp, *start, *end;
3514     jschar *nstart, *ncp, *tmp;
3516     if (!JS_InstanceOf(cx, obj, &js_RegExpClass, argv))
3517         return JS_FALSE;
3518     opt = NULL;
3519     if (argc == 0) {
3520         str = cx->runtime->emptyString;
3521     } else {
3522         if (JSVAL_IS_OBJECT(argv[0])) {
3523             /*
3524              * If we get passed in a RegExp object we construct a new
3525              * RegExp that is a duplicate of it by re-compiling the
3526              * original source code. ECMA requires that it be an error
3527              * here if the flags are specified. (We must use the flags
3528              * from the original RegExp also).
3529              */
3530             obj2 = JSVAL_TO_OBJECT(argv[0]);
3531             if (obj2 && OBJ_GET_CLASS(cx, obj2) == &js_RegExpClass) {
3532                 if (argc >= 2 && !JSVAL_IS_VOID(argv[1])) { /* 'flags' passed */
3533                     JS_ReportErrorNumber(cx, js_GetErrorMessage, NULL,
3534                                          JSMSG_NEWREGEXP_FLAGGED);
3535                     return JS_FALSE;
3536                 }
3537                 JS_LOCK_OBJ(cx, obj2);
3538                 re = (JSRegExp *) JS_GetPrivate(cx, obj2);
3539                 if (!re) {
3540                     JS_UNLOCK_OBJ(cx, obj2);
3541                     return JS_FALSE;
3542                 }
3543                 re = js_NewRegExp(cx, NULL, re->source, re->flags, JS_FALSE);
3544                 JS_UNLOCK_OBJ(cx, obj2);
3545                 goto created;
3546             }
3547         }
3548         str = js_ValueToString(cx, argv[0]);
3549         if (!str)
3550             return JS_FALSE;
3551         argv[0] = STRING_TO_JSVAL(str);
3552         if (argc > 1) {
3553             if (JSVAL_IS_VOID(argv[1])) {
3554                 opt = NULL;
3555             } else {
3556                 opt = js_ValueToString(cx, argv[1]);
3557                 if (!opt)
3558                     return JS_FALSE;
3559                 argv[1] = STRING_TO_JSVAL(opt);
3560             }
3561         }
3563         /* Escape any naked slashes in the regexp source. */
3564         length = JSSTRING_LENGTH(str);
3565         start = JSSTRING_CHARS(str);
3566         end = start + length;
3567         nstart = ncp = NULL;
3568         for (cp = start; cp < end; cp++) {
3569             if (*cp == '/' && (cp == start || cp[-1] != '\\')) {
3570                 nbytes = (++length + 1) * sizeof(jschar);
3571                 if (!nstart) {
3572                     nstart = (jschar *) JS_malloc(cx, nbytes);
3573                     if (!nstart)
3574                         return JS_FALSE;
3575                     ncp = nstart + (cp - start);
3576                     js_strncpy(nstart, start, cp - start);
3577                 } else {
3578                     tmp = (jschar *) JS_realloc(cx, nstart, nbytes);
3579                     if (!tmp) {
3580                         JS_free(cx, nstart);
3581                         return JS_FALSE;
3582                     }
3583                     ncp = tmp + (ncp - nstart);
3584                     nstart = tmp;
3585                 }
3586                 *ncp++ = '\\';
3587             }
3588             if (nstart)
3589                 *ncp++ = *cp;
3590         }
3592         if (nstart) {
3593             /* Don't forget to store the backstop after the new string. */
3594             JS_ASSERT((size_t)(ncp - nstart) == length);
3595             *ncp = 0;
3596             str = js_NewString(cx, nstart, length, 0);
3597             if (!str) {
3598                 JS_free(cx, nstart);
3599                 return JS_FALSE;
3600             }
3601             argv[0] = STRING_TO_JSVAL(str);
3602         }
3603     }
3605     re = js_NewRegExpOpt(cx, NULL, str, opt, JS_FALSE);
3606 created:
3607     if (!re)
3608         return JS_FALSE;
3609     JS_LOCK_OBJ(cx, obj);
3610     oldre = (JSRegExp *) JS_GetPrivate(cx, obj);
3611     ok = JS_SetPrivate(cx, obj, re);
3612     ok2 = js_SetLastIndex(cx, obj, 0);
3613     JS_UNLOCK_OBJ(cx, obj);
3614     if (!ok) {
3615         js_DestroyRegExp(cx, re);
3616         return JS_FALSE;
3617     }
3618     if (oldre)
3619         js_DestroyRegExp(cx, oldre);
3620     *rval = OBJECT_TO_JSVAL(obj);
3621     return ok2;
3624 static JSBool
3625 regexp_exec_sub(JSContext *cx, JSObject *obj, uintN argc, jsval *argv,
3626                 JSBool test, jsval *rval)
3628     JSBool ok;
3629     JSRegExp *re;
3630     jsdouble lastIndex;
3631     JSString *str;
3632     size_t i;
3634     ok = JS_InstanceOf(cx, obj, &js_RegExpClass, argv);
3635     if (!ok)
3636         return JS_FALSE;
3637     JS_LOCK_OBJ(cx, obj);
3638     re = (JSRegExp *) JS_GetPrivate(cx, obj);
3639     if (!re) {
3640         JS_UNLOCK_OBJ(cx, obj);
3641         return JS_TRUE;
3642     }
3644     /* NB: we must reach out: after this paragraph, in order to drop re. */
3645     HOLD_REGEXP(cx, re);
3646     if (re->flags & JSREG_GLOB) {
3647         ok = js_GetLastIndex(cx, obj, &lastIndex);
3648     } else {
3649         lastIndex = 0;
3650     }
3651     JS_UNLOCK_OBJ(cx, obj);
3652     if (!ok)
3653         goto out;
3655     /* Now that obj is unlocked, it's safe to (potentially) grab the GC lock. */
3656     if (argc == 0) {
3657         str = cx->regExpStatics.input;
3658         if (!str) {
3659             JS_ReportErrorNumber(cx, js_GetErrorMessage, NULL,
3660                                  JSMSG_NO_INPUT,
3661                                  JS_GetStringBytes(re->source),
3662                                  (re->flags & JSREG_GLOB) ? "g" : "",
3663                                  (re->flags & JSREG_FOLD) ? "i" : "",
3664                                  (re->flags & JSREG_MULTILINE) ? "m" : "");
3665             ok = JS_FALSE;
3666             goto out;
3667         }
3668     } else {
3669         str = js_ValueToString(cx, argv[0]);
3670         if (!str)
3671             goto out;
3672         argv[0] = STRING_TO_JSVAL(str);
3673     }
3675     if (lastIndex < 0 || JSSTRING_LENGTH(str) < lastIndex) {
3676         ok = js_SetLastIndex(cx, obj, 0);
3677         *rval = JSVAL_NULL;
3678     } else {
3679         i = (size_t) lastIndex;
3680         ok = js_ExecuteRegExp(cx, re, str, &i, test, rval);
3681         if (ok && (re->flags & JSREG_GLOB))
3682             ok = js_SetLastIndex(cx, obj, (*rval == JSVAL_NULL) ? 0 : i);
3683     }
3685 out:
3686     DROP_REGEXP(cx, re);
3687     return ok;
3690 static JSBool
3691 regexp_exec(JSContext *cx, JSObject *obj, uintN argc, jsval *argv, jsval *rval)
3693     return regexp_exec_sub(cx, obj, argc, argv, JS_FALSE, rval);
3696 static JSBool
3697 regexp_test(JSContext *cx, JSObject *obj, uintN argc, jsval *argv, jsval *rval)
3699     if (!regexp_exec_sub(cx, obj, argc, argv, JS_TRUE, rval))
3700         return JS_FALSE;
3701     if (*rval != JSVAL_TRUE)
3702         *rval = JSVAL_FALSE;
3703     return JS_TRUE;
3706 static JSFunctionSpec regexp_methods[] = {
3707 #if JS_HAS_TOSOURCE
3708     {js_toSource_str,   js_regexp_toString,     0,0,0},
3709 #endif
3710     {js_toString_str,   js_regexp_toString,     0,0,0},
3711     {"compile",         regexp_compile,         1,0,0},
3712     {"exec",            regexp_exec,            0,0,0},
3713     {"test",            regexp_test,            0,0,0},
3714     {0,0,0,0,0}
3715 };
3717 static JSBool
3718 RegExp(JSContext *cx, JSObject *obj, uintN argc, jsval *argv, jsval *rval)
3720     if (!(cx->fp->flags & JSFRAME_CONSTRUCTING)) {
3721         /*
3722          * If first arg is regexp and no flags are given, just return the arg.
3723          * (regexp_compile detects the regexp + flags case and throws a
3724          * TypeError.)  See 10.15.3.1.
3725          */
3726         if ((argc < 2 || JSVAL_IS_VOID(argv[1])) &&
3727             JSVAL_IS_OBJECT(argv[0]) &&
3728             OBJ_GET_CLASS(cx, JSVAL_TO_OBJECT(argv[0])) == &js_RegExpClass) {
3729             *rval = argv[0];
3730             return JS_TRUE;
3731         }
3733         /* Otherwise, replace obj with a new RegExp object. */
3734         obj = js_NewObject(cx, &js_RegExpClass, NULL, NULL);
3735         if (!obj)
3736             return JS_FALSE;
3737     }
3738     return regexp_compile(cx, obj, argc, argv, rval);
3741 JSObject *
3742 js_InitRegExpClass(JSContext *cx, JSObject *obj)
3744     JSObject *proto, *ctor;
3745     jsval rval;
3747     proto = JS_InitClass(cx, obj, NULL, &js_RegExpClass, RegExp, 1,
3748                          regexp_props, regexp_methods,
3749                          regexp_static_props, NULL);
3751     if (!proto || !(ctor = JS_GetConstructor(cx, proto)))
3752         return NULL;
3753     if (!JS_AliasProperty(cx, ctor, "input",        "$_") ||
3754         !JS_AliasProperty(cx, ctor, "multiline",    "$*") ||
3755         !JS_AliasProperty(cx, ctor, "lastMatch",    "$&") ||
3756         !JS_AliasProperty(cx, ctor, "lastParen",    "$+") ||
3757         !JS_AliasProperty(cx, ctor, "leftContext",  "$`") ||
3758         !JS_AliasProperty(cx, ctor, "rightContext", "$'")) {
3759         goto bad;
3760     }
3762     /* Give RegExp.prototype private data so it matches the empty string. */
3763     if (!regexp_compile(cx, proto, 0, NULL, &rval))
3764         goto bad;
3765     return proto;
3767 bad:
3768     JS_DeleteProperty(cx, obj, js_RegExpClass.name);
3769     return NULL;
3772 JSObject *
3773 js_NewRegExpObject(JSContext *cx, JSTokenStream *ts,
3774                    jschar *chars, size_t length, uintN flags)
3776     JSString *str;
3777     JSObject *obj;
3778     JSRegExp *re;
3780     str = js_NewStringCopyN(cx, chars, length, 0);
3781     if (!str)
3782         return NULL;
3783     re = js_NewRegExp(cx, ts,  str, flags, JS_FALSE);
3784     if (!re)
3785         return NULL;
3786     obj = js_NewObject(cx, &js_RegExpClass, NULL, NULL);
3787     if (!obj || !JS_SetPrivate(cx, obj, re) || !js_SetLastIndex(cx, obj, 0)) {
3788         js_DestroyRegExp(cx, re);
3789         return NULL;
3790     }
3791     return obj;
3794 JSObject *
3795 js_CloneRegExpObject(JSContext *cx, JSObject *obj, JSObject *parent)
3797     JSObject *clone;
3798     JSRegExp *re;
3800     JS_ASSERT(OBJ_GET_CLASS(cx, obj) == &js_RegExpClass);
3801     clone = js_NewObject(cx, &js_RegExpClass, NULL, parent);
3802     if (!clone)
3803         return NULL;
3804     re = JS_GetPrivate(cx, obj);
3805     if (!JS_SetPrivate(cx, clone, re) || !js_SetLastIndex(cx, clone, 0)) {
3806         cx->newborn[GCX_OBJECT] = NULL;
3807         return NULL;
3808     }
3809     HOLD_REGEXP(cx, re);
3810     return clone;
3813 JSBool
3814 js_GetLastIndex(JSContext *cx, JSObject *obj, jsdouble *lastIndex)
3816     jsval v;
3818     return JS_GetReservedSlot(cx, obj, 0, &v) &&
3819            js_ValueToNumber(cx, v, lastIndex);
3822 JSBool
3823 js_SetLastIndex(JSContext *cx, JSObject *obj, jsdouble lastIndex)
3825     jsval v;
3827     return js_NewNumberValue(cx, lastIndex, &v) &&
3828            JS_SetReservedSlot(cx, obj, 0, v);
3831 #endif /* JS_HAS_REGEXPS */