1 /* -*- Mode: C; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
2 * vim: set sw=4 ts=8 et tw=80:
3 *
4 * ***** BEGIN LICENSE BLOCK *****
5 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
6 *
7 * The contents of this file are subject to the Mozilla Public License Version
8 * 1.1 (the "License"); you may not use this file except in compliance with
9 * the License. You may obtain a copy of the License at
10 * http://www.mozilla.org/MPL/
11 *
12 * Software distributed under the License is distributed on an "AS IS" basis,
13 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
14 * for the specific language governing rights and limitations under the
15 * License.
16 *
17 * The Original Code is Mozilla Communicator client code, released
18 * March 31, 1998.
19 *
20 * The Initial Developer of the Original Code is
21 * Netscape Communications Corporation.
22 * Portions created by the Initial Developer are Copyright (C) 1998
23 * the Initial Developer. All Rights Reserved.
24 *
25 * Contributor(s):
26 *
27 * Alternatively, the contents of this file may be used under the terms of
28 * either of the GNU General Public License Version 2 or later (the "GPL"),
29 * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
30 * in which case the provisions of the GPL or the LGPL are applicable instead
31 * of those above. If you wish to allow use of your version of this file only
32 * under the terms of either the GPL or the LGPL, and not to allow others to
33 * use your version of this file under the terms of the MPL, indicate your
34 * decision by deleting the provisions above and replace them with the notice
35 * and other provisions required by the GPL or the LGPL. If you do not delete
36 * the provisions above, a recipient may use your version of this file under
37 * the terms of any one of the MPL, the GPL or the LGPL.
38 *
39 * ***** END LICENSE BLOCK ***** */
41 /*
42 * JS regular expressions, after Perl.
43 */
44 #include "jsstddef.h"
45 #include <stdlib.h>
46 #include <string.h>
47 #include "jstypes.h"
48 #include "jsarena.h" /* Added by JSIFY */
49 #include "jsutil.h" /* Added by JSIFY */
50 #include "jsapi.h"
51 #include "jsarray.h"
52 #include "jsatom.h"
53 #include "jscntxt.h"
54 #include "jsconfig.h"
55 #include "jsfun.h"
56 #include "jsgc.h"
57 #include "jsinterp.h"
58 #include "jslock.h"
59 #include "jsnum.h"
60 #include "jsobj.h"
61 #include "jsopcode.h"
62 #include "jsregexp.h"
63 #include "jsscan.h"
64 #include "jsstr.h"
66 #if JS_HAS_REGEXPS
68 /* Note : contiguity of 'simple opcodes' is important for SimpleMatch() */
69 typedef enum REOp {
70 REOP_EMPTY = 0, /* match rest of input against rest of r.e. */
71 REOP_ALT = 1, /* alternative subexpressions in kid and next */
72 REOP_SIMPLE_START = 2, /* start of 'simple opcodes' */
73 REOP_BOL = 2, /* beginning of input (or line if multiline) */
74 REOP_EOL = 3, /* end of input (or line if multiline) */
75 REOP_WBDRY = 4, /* match "" at word boundary */
76 REOP_WNONBDRY = 5, /* match "" at word non-boundary */
77 REOP_DOT = 6, /* stands for any character */
78 REOP_DIGIT = 7, /* match a digit char: [0-9] */
79 REOP_NONDIGIT = 8, /* match a non-digit char: [^0-9] */
80 REOP_ALNUM = 9, /* match an alphanumeric char: [0-9a-z_A-Z] */
81 REOP_NONALNUM = 10, /* match a non-alphanumeric char: [^0-9a-z_A-Z] */
82 REOP_SPACE = 11, /* match a whitespace char */
83 REOP_NONSPACE = 12, /* match a non-whitespace char */
84 REOP_BACKREF = 13, /* back-reference (e.g., \1) to a parenthetical */
85 REOP_FLAT = 14, /* match a flat string */
86 REOP_FLAT1 = 15, /* match a single char */
87 REOP_FLATi = 16, /* case-independent REOP_FLAT */
88 REOP_FLAT1i = 17, /* case-independent REOP_FLAT1 */
89 REOP_UCFLAT1 = 18, /* single Unicode char */
90 REOP_UCFLAT1i = 19, /* case-independent REOP_UCFLAT1 */
91 REOP_UCFLAT = 20, /* flat Unicode string; len immediate counts chars */
92 REOP_UCFLATi = 21, /* case-independent REOP_UCFLAT */
93 REOP_CLASS = 22, /* character class with index */
94 REOP_NCLASS = 23, /* negated character class with index */
95 REOP_SIMPLE_END = 23, /* end of 'simple opcodes' */
96 REOP_QUANT = 25, /* quantified atom: atom{1,2} */
97 REOP_STAR = 26, /* zero or more occurrences of kid */
98 REOP_PLUS = 27, /* one or more occurrences of kid */
99 REOP_OPT = 28, /* optional subexpression in kid */
100 REOP_LPAREN = 29, /* left paren bytecode: kid is u.num'th sub-regexp */
101 REOP_RPAREN = 30, /* right paren bytecode */
102 REOP_JUMP = 31, /* for deoptimized closure loops */
103 REOP_DOTSTAR = 32, /* optimize .* to use a single opcode */
104 REOP_ANCHOR = 33, /* like .* but skips left context to unanchored r.e. */
105 REOP_EOLONLY = 34, /* $ not preceded by any pattern */
106 REOP_BACKREFi = 37, /* case-independent REOP_BACKREF */
107 REOP_LPARENNON = 41, /* non-capturing version of REOP_LPAREN */
108 REOP_ASSERT = 43, /* zero width positive lookahead assertion */
109 REOP_ASSERT_NOT = 44, /* zero width negative lookahead assertion */
110 REOP_ASSERTTEST = 45, /* sentinel at end of assertion child */
111 REOP_ASSERTNOTTEST = 46, /* sentinel at end of !assertion child */
112 REOP_MINIMALSTAR = 47, /* non-greedy version of * */
113 REOP_MINIMALPLUS = 48, /* non-greedy version of + */
114 REOP_MINIMALOPT = 49, /* non-greedy version of ? */
115 REOP_MINIMALQUANT = 50, /* non-greedy version of {} */
116 REOP_ENDCHILD = 51, /* sentinel at end of quantifier child */
117 REOP_REPEAT = 52, /* directs execution of greedy quantifier */
118 REOP_MINIMALREPEAT = 53, /* directs execution of non-greedy quantifier */
119 REOP_ALTPREREQ = 54, /* prerequisite for ALT, either of two chars */
120 REOP_ALTPREREQ2 = 55, /* prerequisite for ALT, a char or a class */
121 REOP_ENDALT = 56, /* end of final alternate */
122 REOP_CONCAT = 57, /* concatenation of terms (parse time only) */
124 REOP_END
125 } REOp;
127 #define REOP_IS_SIMPLE(op) ((unsigned)((op) - REOP_SIMPLE_START) < \
128 (unsigned)REOP_SIMPLE_END)
130 struct RENode {
131 REOp op; /* r.e. op bytecode */
132 RENode *next; /* next in concatenation order */
133 void *kid; /* first operand */
134 union {
135 void *kid2; /* second operand */
136 jsint num; /* could be a number */
137 size_t parenIndex; /* or a parenthesis index */
138 struct { /* or a quantifier range */
139 uintN min;
140 uintN max;
141 JSPackedBool greedy;
142 } range;
143 struct { /* or a character class */
144 size_t startIndex;
145 size_t kidlen; /* length of string at kid, in jschars */
146 size_t index; /* index into class list */
147 uint16 bmsize; /* bitmap size, based on max char code */
148 JSPackedBool sense;
149 } ucclass;
150 struct { /* or a literal sequence */
151 jschar chr; /* of one character */
152 size_t length; /* or many (via the kid) */
153 } flat;
154 struct {
155 RENode *kid2; /* second operand from ALT */
156 jschar ch1; /* match char for ALTPREREQ */
157 jschar ch2; /* ditto, or class index for ALTPREREQ2 */
158 } altprereq;
159 } u;
160 };
162 #define RE_IS_LETTER(c) (((c >= 'A') && (c <= 'Z')) || \
163 ((c >= 'a') && (c <= 'z')) )
164 #define RE_IS_LINE_TERM(c) ((c == '\n') || (c == '\r') || \
165 (c == LINE_SEPARATOR) || (c == PARA_SEPARATOR))
167 #define CLASS_CACHE_SIZE 4
169 typedef struct CompilerState {
170 JSContext *context;
171 JSTokenStream *tokenStream; /* For reporting errors */
172 const jschar *cpbegin;
173 const jschar *cpend;
174 const jschar *cp;
175 size_t parenCount;
176 size_t classCount; /* number of [] encountered */
177 size_t treeDepth; /* maximum depth of parse tree */
178 size_t progLength; /* estimated bytecode length */
179 RENode *result;
180 size_t classBitmapsMem; /* memory to hold all class bitmaps */
181 struct {
182 const jschar *start; /* small cache of class strings */
183 size_t length; /* since they're often the same */
184 size_t index;
185 } classCache[CLASS_CACHE_SIZE];
186 uint16 flags;
187 } CompilerState;
189 typedef struct EmitStateStackEntry {
190 jsbytecode *altHead; /* start of REOP_ALT* opcode */
191 jsbytecode *nextAltFixup; /* fixup pointer to next-alt offset */
192 jsbytecode *nextTermFixup; /* fixup ptr. to REOP_JUMP offset */
193 jsbytecode *endTermFixup; /* fixup ptr. to REOPT_ALTPREREQ* offset */
194 RENode *continueNode; /* original REOP_ALT* node being stacked */
195 jsbytecode continueOp; /* REOP_JUMP or REOP_ENDALT continuation */
196 JSPackedBool jumpToJumpFlag; /* true if we've patched jump-to-jump to
197 avoid 16-bit unsigned offset overflow */
198 } EmitStateStackEntry;
200 /*
201 * Immediate operand sizes and getter/setters. Unlike the ones in jsopcode.h,
202 * the getters and setters take the pc of the offset, not of the opcode before
203 * the offset.
204 */
205 #define ARG_LEN 2
206 #define GET_ARG(pc) ((uint16)(((pc)[0] << 8) | (pc)[1]))
207 #define SET_ARG(pc, arg) ((pc)[0] = (jsbytecode) ((arg) >> 8), \
208 (pc)[1] = (jsbytecode) (arg))
210 #define OFFSET_LEN ARG_LEN
211 #define OFFSET_MAX (JS_BIT(ARG_LEN * 8) - 1)
212 #define GET_OFFSET(pc) GET_ARG(pc)
214 /*
215 * Maximum supported tree depth is maximum size of EmitStateStackEntry stack.
216 * For sanity, we limit it to 2^24 bytes.
217 */
218 #define TREE_DEPTH_MAX (JS_BIT(24) / sizeof(EmitStateStackEntry))
220 /*
221 * The maximum memory that can be allocated for class bitmaps.
222 * For sanity, we limit it to 2^24 bytes.
223 */
224 #define CLASS_BITMAPS_MEM_LIMIT JS_BIT(24)
226 /*
227 * Functions to get size and write/read bytecode that represent small indexes
228 * compactly.
229 * Each byte in the code represent 7-bit chunk of the index. 8th bit when set
230 * indicates that the following byte brings more bits to the index. Otherwise
231 * this is the last byte in the index bytecode representing highest index bits.
232 */
233 static size_t
234 GetCompactIndexWidth(size_t index)
235 {
236 size_t width;
238 for (width = 1; (index >>= 7) != 0; ++width) { }
239 return width;
240 }
242 static jsbytecode *
243 WriteCompactIndex(jsbytecode *pc, size_t index)
244 {
245 size_t next;
247 while ((next = index >> 7) != 0) {
248 *pc++ = (jsbytecode)(index | 0x80);
249 index = next;
250 }
251 *pc++ = (jsbytecode)index;
252 return pc;
253 }
255 static jsbytecode *
256 ReadCompactIndex(jsbytecode *pc, size_t *result)
257 {
258 size_t nextByte;
260 nextByte = *pc++;
261 if ((nextByte & 0x80) == 0) {
262 /*
263 * Short-circuit the most common case when compact index <= 127.
264 */
265 *result = nextByte;
266 } else {
267 size_t shift = 7;
268 *result = 0x7F & nextByte;
269 do {
270 nextByte = *pc++;
271 *result |= (nextByte & 0x7F) << shift;
272 shift += 7;
273 } while ((nextByte & 0x80) != 0);
274 }
275 return pc;
276 }
278 typedef struct RECapture {
279 ptrdiff_t index; /* start of contents, -1 for empty */
280 size_t length; /* length of capture */
281 } RECapture;
283 typedef struct REMatchState {
284 const jschar *cp;
285 RECapture parens[1]; /* first of 're->parenCount' captures,
286 allocated at end of this struct */
287 } REMatchState;
289 struct REBackTrackData;
291 typedef struct REProgState {
292 jsbytecode *continue_pc; /* current continuation data */
293 jsbytecode continue_op;
294 ptrdiff_t index; /* progress in text */
295 size_t parenSoFar; /* highest indexed paren started */
296 union {
297 struct {
298 uintN min; /* current quantifier limits */
299 uintN max;
300 } quantifier;
301 struct {
302 size_t top; /* backtrack stack state */
303 size_t sz;
304 } assertion;
305 } u;
306 } REProgState;
308 typedef struct REBackTrackData {
309 size_t sz; /* size of previous stack entry */
310 jsbytecode *backtrack_pc; /* where to backtrack to */
311 jsbytecode backtrack_op;
312 const jschar *cp; /* index in text of match at backtrack */
313 size_t parenIndex; /* start index of saved paren contents */
314 size_t parenCount; /* # of saved paren contents */
315 size_t saveStateStackTop; /* number of parent states */
316 /* saved parent states follow */
317 /* saved paren contents follow */
318 } REBackTrackData;
320 #define INITIAL_STATESTACK 100
321 #define INITIAL_BACKTRACK 8000
323 typedef struct REGlobalData {
324 JSContext *cx;
325 JSRegExp *regexp; /* the RE in execution */
326 JSBool ok; /* runtime error (out_of_memory only?) */
327 size_t start; /* offset to start at */
328 ptrdiff_t skipped; /* chars skipped anchoring this r.e. */
329 const jschar *cpbegin; /* text base address */
330 const jschar *cpend; /* text limit address */
332 REProgState *stateStack; /* stack of state of current parents */
333 size_t stateStackTop;
334 size_t stateStackLimit;
336 REBackTrackData *backTrackStack;/* stack of matched-so-far positions */
337 REBackTrackData *backTrackSP;
338 size_t backTrackStackSize;
339 size_t cursz; /* size of current stack entry */
341 JSArenaPool pool; /* It's faster to use one malloc'd pool
342 than to malloc/free the three items
343 that are allocated from this pool */
344 } REGlobalData;
346 /*
347 * 1. If IgnoreCase is false, return ch.
348 * 2. Let u be ch converted to upper case as if by calling
349 * String.prototype.toUpperCase on the one-character string ch.
350 * 3. If u does not consist of a single character, return ch.
351 * 4. Let cu be u's character.
352 * 5. If ch's code point value is greater than or equal to decimal 128 and cu's
353 * code point value is less than decimal 128, then return ch.
354 * 6. Return cu.
355 */
356 static jschar
357 upcase(jschar ch)
358 {
359 jschar cu = JS_TOUPPER(ch);
360 if (ch >= 128 && cu < 128)
361 return ch;
362 return cu;
363 }
365 static jschar
366 downcase(jschar ch)
367 {
368 jschar cl = JS_TOLOWER(ch);
369 if (cl >= 128 && ch < 128)
370 return ch;
371 return cl;
372 }
374 /* Construct and initialize an RENode, returning NULL for out-of-memory */
375 static RENode *
376 NewRENode(CompilerState *state, REOp op)
377 {
378 JSContext *cx;
379 RENode *ren;
381 cx = state->context;
382 JS_ARENA_ALLOCATE_CAST(ren, RENode *, &cx->tempPool, sizeof *ren);
383 if (!ren) {
384 JS_ReportOutOfMemory(cx);
385 return NULL;
386 }
387 ren->op = op;
388 ren->next = NULL;
389 ren->kid = NULL;
390 return ren;
391 }
393 /*
394 * Validates and converts hex ascii value.
395 */
396 static JSBool
397 isASCIIHexDigit(jschar c, uintN *digit)
398 {
399 uintN cv = c;
401 if (cv < '0')
402 return JS_FALSE;
403 if (cv <= '9') {
404 *digit = cv - '0';
405 return JS_TRUE;
406 }
407 cv |= 0x20;
408 if (cv >= 'a' && cv <= 'f') {
409 *digit = cv - 'a' + 10;
410 return JS_TRUE;
411 }
412 return JS_FALSE;
413 }
416 typedef struct {
417 REOp op;
418 const jschar *errPos;
419 size_t parenIndex;
420 } REOpData;
423 /*
424 * Process the op against the two top operands, reducing them to a single
425 * operand in the penultimate slot. Update progLength and treeDepth.
426 */
427 static JSBool
428 ProcessOp(CompilerState *state, REOpData *opData, RENode **operandStack,
429 intN operandSP)
430 {
431 RENode *result;
433 switch (opData->op) {
434 case REOP_ALT:
435 result = NewRENode(state, REOP_ALT);
436 if (!result)
437 return JS_FALSE;
438 result->kid = operandStack[operandSP - 2];
439 result->u.kid2 = operandStack[operandSP - 1];
440 operandStack[operandSP - 2] = result;
442 if (state->treeDepth == TREE_DEPTH_MAX) {
443 js_ReportCompileErrorNumber(state->context, state->tokenStream,
444 JSREPORT_TS | JSREPORT_ERROR,
445 JSMSG_REGEXP_TOO_COMPLEX);
446 return JS_FALSE;
447 }
448 ++state->treeDepth;
450 /*
451 * Look at both alternates to see if there's a FLAT or a CLASS at
452 * the start of each. If so, use a prerequisite match.
453 */
454 if (((RENode *) result->kid)->op == REOP_FLAT &&
455 ((RENode *) result->u.kid2)->op == REOP_FLAT &&
456 (state->flags & JSREG_FOLD) == 0) {
457 result->op = REOP_ALTPREREQ;
458 result->u.altprereq.ch1 = ((RENode *) result->kid)->u.flat.chr;
459 result->u.altprereq.ch2 = ((RENode *) result->u.kid2)->u.flat.chr;
460 /* ALTPREREQ, <end>, uch1, uch2, <next>, ...,
461 JUMP, <end> ... ENDALT */
462 state->progLength += 13;
463 }
464 else
465 if (((RENode *) result->kid)->op == REOP_CLASS &&
466 ((RENode *) result->kid)->u.ucclass.index < 256 &&
467 ((RENode *) result->u.kid2)->op == REOP_FLAT &&
468 (state->flags & JSREG_FOLD) == 0) {
469 result->op = REOP_ALTPREREQ2;
470 result->u.altprereq.ch1 = ((RENode *) result->u.kid2)->u.flat.chr;
471 result->u.altprereq.ch2 = ((RENode *) result->kid)->u.ucclass.index;
472 /* ALTPREREQ2, <end>, uch1, uch2, <next>, ...,
473 JUMP, <end> ... ENDALT */
474 state->progLength += 13;
475 }
476 else
477 if (((RENode *) result->kid)->op == REOP_FLAT &&
478 ((RENode *) result->u.kid2)->op == REOP_CLASS &&
479 ((RENode *) result->u.kid2)->u.ucclass.index < 256 &&
480 (state->flags & JSREG_FOLD) == 0) {
481 result->op = REOP_ALTPREREQ2;
482 result->u.altprereq.ch1 = ((RENode *) result->kid)->u.flat.chr;
483 result->u.altprereq.ch2 =
484 ((RENode *) result->u.kid2)->u.ucclass.index;
485 /* ALTPREREQ2, <end>, uch1, uch2, <next>, ...,
486 JUMP, <end> ... ENDALT */
487 state->progLength += 13;
488 }
489 else {
490 /* ALT, <next>, ..., JUMP, <end> ... ENDALT */
491 state->progLength += 7;
492 }
493 break;
495 case REOP_CONCAT:
496 result = operandStack[operandSP - 2];
497 while (result->next)
498 result = result->next;
499 result->next = operandStack[operandSP - 1];
500 break;
502 case REOP_ASSERT:
503 case REOP_ASSERT_NOT:
504 case REOP_LPARENNON:
505 case REOP_LPAREN:
506 /* These should have been processed by a close paren. */
507 js_ReportCompileErrorNumberUC(state->context, state->tokenStream,
508 JSREPORT_TS | JSREPORT_ERROR,
509 JSMSG_MISSING_PAREN, opData->errPos);
510 return JS_FALSE;
512 default:;
513 }
514 return JS_TRUE;
515 }
517 /*
518 * Parser forward declarations.
519 */
520 static JSBool ParseTerm(CompilerState *state);
521 static JSBool ParseQuantifier(CompilerState *state);
522 static intN ParseMinMaxQuantifier(CompilerState *state, JSBool ignoreValues);
524 /*
525 * Top-down regular expression grammar, based closely on Perl4.
526 *
527 * regexp: altern A regular expression is one or more
528 * altern '|' regexp alternatives separated by vertical bar.
529 */
530 #define INITIAL_STACK_SIZE 128
532 static JSBool
533 ParseRegExp(CompilerState *state)
534 {
535 size_t parenIndex;
536 RENode *operand;
537 REOpData *operatorStack;
538 RENode **operandStack;
539 REOp op;
540 intN i;
541 JSBool result = JS_FALSE;
543 intN operatorSP = 0, operatorStackSize = INITIAL_STACK_SIZE;
544 intN operandSP = 0, operandStackSize = INITIAL_STACK_SIZE;
546 /* Watch out for empty regexp */
547 if (state->cp == state->cpend) {
548 state->result = NewRENode(state, REOP_EMPTY);
549 return (state->result != NULL);
550 }
552 operatorStack = (REOpData *)
553 JS_malloc(state->context, sizeof(REOpData) * operatorStackSize);
554 if (!operatorStack)
555 return JS_FALSE;
557 operandStack = (RENode **)
558 JS_malloc(state->context, sizeof(RENode *) * operandStackSize);
559 if (!operandStack)
560 goto out;
562 for (;;) {
563 parenIndex = state->parenCount;
564 if (state->cp == state->cpend) {
565 /*
566 * If we are at the end of the regexp and we're short one or more
567 * operands, the regexp must have the form /x|/ or some such, with
568 * left parentheses making us short more than one operand.
569 */
570 if (operatorSP >= operandSP) {
571 operand = NewRENode(state, REOP_EMPTY);
572 if (!operand)
573 goto out;
574 goto pushOperand;
575 }
576 } else {
577 switch (*state->cp) {
578 case '(':
579 ++state->cp;
580 if (state->cp + 1 < state->cpend &&
581 *state->cp == '?' &&
582 (state->cp[1] == '=' ||
583 state->cp[1] == '!' ||
584 state->cp[1] == ':')) {
585 switch (state->cp[1]) {
586 case '=':
587 op = REOP_ASSERT;
588 /* ASSERT, <next>, ... ASSERTTEST */
589 state->progLength += 4;
590 break;
591 case '!':
592 op = REOP_ASSERT_NOT;
593 /* ASSERTNOT, <next>, ... ASSERTNOTTEST */
594 state->progLength += 4;
595 break;
596 default:
597 op = REOP_LPARENNON;
598 break;
599 }
600 state->cp += 2;
601 } else {
602 op = REOP_LPAREN;
603 /* LPAREN, <index>, ... RPAREN, <index> */
604 state->progLength
605 += 2 * (1 + GetCompactIndexWidth(parenIndex));
606 state->parenCount++;
607 if (state->parenCount == 65535) {
608 js_ReportCompileErrorNumber(state->context,
609 state->tokenStream,
610 JSREPORT_TS |
611 JSREPORT_ERROR,
612 JSMSG_TOO_MANY_PARENS);
613 goto out;
614 }
615 }
616 goto pushOperator;
618 case ')':
619 /*
620 * If there's no stacked open parenthesis, throw syntax error.
621 */
622 for (i = operatorSP - 1; ; i--) {
623 if (i < 0) {
624 js_ReportCompileErrorNumber(state->context,
625 state->tokenStream,
626 JSREPORT_TS |
627 JSREPORT_ERROR,
628 JSMSG_UNMATCHED_RIGHT_PAREN);
629 goto out;
630 }
631 if (operatorStack[i].op == REOP_ASSERT ||
632 operatorStack[i].op == REOP_ASSERT_NOT ||
633 operatorStack[i].op == REOP_LPARENNON ||
634 operatorStack[i].op == REOP_LPAREN) {
635 break;
636 }
637 }
638 /* FALL THROUGH */
640 case '|':
641 /* Expected an operand before these, so make an empty one */
642 operand = NewRENode(state, REOP_EMPTY);
643 if (!operand)
644 goto out;
645 goto pushOperand;
647 default:
648 if (!ParseTerm(state))
649 goto out;
650 operand = state->result;
651 pushOperand:
652 if (operandSP == operandStackSize) {
653 operandStackSize += operandStackSize;
654 operandStack = (RENode **)
655 JS_realloc(state->context, operandStack,
656 sizeof(RENode *) * operandStackSize);
657 if (!operandStack)
658 goto out;
659 }
660 operandStack[operandSP++] = operand;
661 break;
662 }
663 }
665 /* At the end; process remaining operators. */
666 restartOperator:
667 if (state->cp == state->cpend) {
668 while (operatorSP) {
669 --operatorSP;
670 if (!ProcessOp(state, &operatorStack[operatorSP],
671 operandStack, operandSP))
672 goto out;
673 --operandSP;
674 }
675 JS_ASSERT(operandSP == 1);
676 state->result = operandStack[0];
677 result = JS_TRUE;
678 goto out;
679 }
681 switch (*state->cp) {
682 case '|':
683 /* Process any stacked 'concat' operators */
684 ++state->cp;
685 while (operatorSP &&
686 operatorStack[operatorSP - 1].op == REOP_CONCAT) {
687 --operatorSP;
688 if (!ProcessOp(state, &operatorStack[operatorSP],
689 operandStack, operandSP)) {
690 goto out;
691 }
692 --operandSP;
693 }
694 op = REOP_ALT;
695 goto pushOperator;
697 case ')':
698 /*
699 * If there's no stacked open parenthesis, throw syntax error.
700 */
701 for (i = operatorSP - 1; ; i--) {
702 if (i < 0) {
703 js_ReportCompileErrorNumber(state->context,
704 state->tokenStream,
705 JSREPORT_TS | JSREPORT_ERROR,
706 JSMSG_UNMATCHED_RIGHT_PAREN);
707 goto out;
708 }
709 if (operatorStack[i].op == REOP_ASSERT ||
710 operatorStack[i].op == REOP_ASSERT_NOT ||
711 operatorStack[i].op == REOP_LPARENNON ||
712 operatorStack[i].op == REOP_LPAREN) {
713 break;
714 }
715 }
716 ++state->cp;
718 /* Process everything on the stack until the open parenthesis. */
719 for (;;) {
720 JS_ASSERT(operatorSP);
721 --operatorSP;
722 switch (operatorStack[operatorSP].op) {
723 case REOP_ASSERT:
724 case REOP_ASSERT_NOT:
725 case REOP_LPAREN:
726 operand = NewRENode(state, operatorStack[operatorSP].op);
727 if (!operand)
728 goto out;
729 operand->u.parenIndex =
730 operatorStack[operatorSP].parenIndex;
731 JS_ASSERT(operandSP);
732 operand->kid = operandStack[operandSP - 1];
733 operandStack[operandSP - 1] = operand;
734 if (state->treeDepth == TREE_DEPTH_MAX) {
735 js_ReportCompileErrorNumber(state->context,
736 state->tokenStream,
737 JSREPORT_TS |
738 JSREPORT_ERROR,
739 JSMSG_REGEXP_TOO_COMPLEX);
740 goto out;
741 }
742 ++state->treeDepth;
743 /* FALL THROUGH */
745 case REOP_LPARENNON:
746 state->result = operandStack[operandSP - 1];
747 if (!ParseQuantifier(state))
748 goto out;
749 operandStack[operandSP - 1] = state->result;
750 goto restartOperator;
751 default:
752 if (!ProcessOp(state, &operatorStack[operatorSP],
753 operandStack, operandSP))
754 goto out;
755 --operandSP;
756 break;
757 }
758 }
759 break;
761 case '{':
762 {
763 const jschar *errp = state->cp;
765 if (ParseMinMaxQuantifier(state, JS_TRUE) < 0) {
766 /*
767 * This didn't even scan correctly as a quantifier, so we should
768 * treat it as flat.
769 */
770 op = REOP_CONCAT;
771 goto pushOperator;
772 }
774 state->cp = errp;
775 /* FALL THROUGH */
776 }
778 case '+':
779 case '*':
780 case '?':
781 js_ReportCompileErrorNumberUC(state->context, state->tokenStream,
782 JSREPORT_TS | JSREPORT_ERROR,
783 JSMSG_BAD_QUANTIFIER, state->cp);
784 result = JS_FALSE;
785 goto out;
787 default:
788 /* Anything else is the start of the next term. */
789 op = REOP_CONCAT;
790 pushOperator:
791 if (operatorSP == operatorStackSize) {
792 operatorStackSize += operatorStackSize;
793 operatorStack = (REOpData *)
794 JS_realloc(state->context, operatorStack,
795 sizeof(REOpData) * operatorStackSize);
796 if (!operatorStack)
797 goto out;
798 }
799 operatorStack[operatorSP].op = op;
800 operatorStack[operatorSP].errPos = state->cp;
801 operatorStack[operatorSP++].parenIndex = parenIndex;
802 break;
803 }
804 }
805 out:
806 if (operatorStack)
807 JS_free(state->context, operatorStack);
808 if (operandStack)
809 JS_free(state->context, operandStack);
810 return result;
811 }
813 /*
814 * Hack two bits in CompilerState.flags, for use within FindParenCount to flag
815 * its being on the stack, and to propagate errors to its callers.
816 */
817 #define JSREG_FIND_PAREN_COUNT 0x8000
818 #define JSREG_FIND_PAREN_ERROR 0x4000
820 /*
821 * Magic return value from FindParenCount and GetDecimalValue, to indicate
822 * overflow beyond GetDecimalValue's max parameter, or a computed maximum if
823 * its findMax parameter is non-null.
824 */
825 #define OVERFLOW_VALUE ((uintN)-1)
827 static uintN
828 FindParenCount(CompilerState *state)
829 {
830 CompilerState temp;
831 int i;
833 if (state->flags & JSREG_FIND_PAREN_COUNT)
834 return OVERFLOW_VALUE;
836 /*
837 * Copy state into temp, flag it so we never report an invalid backref,
838 * and reset its members to parse the entire regexp. This is obviously
839 * suboptimal, but GetDecimalValue calls us only if a backref appears to
840 * refer to a forward parenthetical, which is rare.
841 */
842 temp = *state;
843 temp.flags |= JSREG_FIND_PAREN_COUNT;
844 temp.cp = temp.cpbegin;
845 temp.parenCount = 0;
846 temp.classCount = 0;
847 temp.progLength = 0;
848 temp.treeDepth = 0;
849 temp.classBitmapsMem = 0;
850 for (i = 0; i < CLASS_CACHE_SIZE; i++)
851 temp.classCache[i].start = NULL;
853 if (!ParseRegExp(&temp)) {
854 state->flags |= JSREG_FIND_PAREN_ERROR;
855 return OVERFLOW_VALUE;
856 }
857 return temp.parenCount;
858 }
860 /*
861 * Extract and return a decimal value at state->cp. The initial character c
862 * has already been read. Return OVERFLOW_VALUE if the result exceeds max.
863 * Callers who pass a non-null findMax should test JSREG_FIND_PAREN_ERROR in
864 * state->flags to discover whether an error occurred under findMax.
865 */
866 static uintN
867 GetDecimalValue(jschar c, uintN max, uintN (*findMax)(CompilerState *state),
868 CompilerState *state)
869 {
870 uintN value = JS7_UNDEC(c);
871 JSBool overflow = (value > max && (!findMax || value > findMax(state)));
873 /* The following restriction allows simpler overflow checks. */
874 JS_ASSERT(max <= ((uintN)-1 - 9) / 10);
875 while (state->cp < state->cpend) {
876 c = *state->cp;
877 if (!JS7_ISDEC(c))
878 break;
879 value = 10 * value + JS7_UNDEC(c);
880 if (!overflow && value > max && (!findMax || value > findMax(state)))
881 overflow = JS_TRUE;
882 ++state->cp;
883 }
884 return overflow ? OVERFLOW_VALUE : value;
885 }
887 /*
888 * Calculate the total size of the bitmap required for a class expression.
889 */
890 static JSBool
891 CalculateBitmapSize(CompilerState *state, RENode *target, const jschar *src,
892 const jschar *end)
893 {
894 uintN max = 0;
895 JSBool inRange = JS_FALSE;
896 jschar c, rangeStart = 0;
897 uintN n, digit, nDigits, i;
899 target->u.ucclass.bmsize = 0;
900 target->u.ucclass.sense = JS_TRUE;
902 if (src == end)
903 return JS_TRUE;
905 if (*src == '^') {
906 ++src;
907 target->u.ucclass.sense = JS_FALSE;
908 }
910 while (src != end) {
911 uintN localMax = 0;
912 switch (*src) {
913 case '\\':
914 ++src;
915 c = *src++;
916 switch (c) {
917 case 'b':
918 localMax = 0x8;
919 break;
920 case 'f':
921 localMax = 0xC;
922 break;
923 case 'n':
924 localMax = 0xA;
925 break;
926 case 'r':
927 localMax = 0xD;
928 break;
929 case 't':
930 localMax = 0x9;
931 break;
932 case 'v':
933 localMax = 0xB;
934 break;
935 case 'c':
936 if (src + 1 < end && RE_IS_LETTER(src[1]))
937 localMax = (jschar) (*src++ & 0x1F);
938 else
939 localMax = '\\';
940 break;
941 case 'x':
942 nDigits = 2;
943 goto lexHex;
944 case 'u':
945 nDigits = 4;
946 lexHex:
947 n = 0;
948 for (i = 0; (i < nDigits) && (src < end); i++) {
949 c = *src++;
950 if (!isASCIIHexDigit(c, &digit)) {
951 /*
952 * Back off to accepting the original
953 *'\' as a literal.
954 */
955 src -= i + 1;
956 n = '\\';
957 break;
958 }
959 n = (n << 4) | digit;
960 }
961 localMax = n;
962 break;
963 case 'd':
964 if (inRange) {
965 JS_ReportErrorNumber(state->context,
966 js_GetErrorMessage, NULL,
967 JSMSG_BAD_CLASS_RANGE);
968 return JS_FALSE;
969 }
970 localMax = '9';
971 break;
972 case 'D':
973 case 's':
974 case 'S':
975 case 'w':
976 case 'W':
977 if (inRange) {
978 JS_ReportErrorNumber(state->context,
979 js_GetErrorMessage, NULL,
980 JSMSG_BAD_CLASS_RANGE);
981 return JS_FALSE;
982 }
983 target->u.ucclass.bmsize = 65535;
984 return JS_TRUE;
985 case '0':
986 case '1':
987 case '2':
988 case '3':
989 case '4':
990 case '5':
991 case '6':
992 case '7':
993 /*
994 * This is a non-ECMA extension - decimal escapes (in this
995 * case, octal!) are supposed to be an error inside class
996 * ranges, but supported here for backwards compatibility.
997 *
998 */
999 n = JS7_UNDEC(c);
1000 c = *src;
1001 if ('0' <= c && c <= '7') {
1002 src++;
1003 n = 8 * n + JS7_UNDEC(c);
1004 c = *src;
1005 if ('0' <= c && c <= '7') {
1006 src++;
1007 i = 8 * n + JS7_UNDEC(c);
1008 if (i <= 0377)
1009 n = i;
1010 else
1011 src--;
1012 }
1013 }
1014 localMax = n;
1015 break;
1017 default:
1018 localMax = c;
1019 break;
1020 }
1021 break;
1022 default:
1023 localMax = *src++;
1024 break;
1025 }
1026 if (inRange) {
1027 if (rangeStart > localMax) {
1028 JS_ReportErrorNumber(state->context,
1029 js_GetErrorMessage, NULL,
1030 JSMSG_BAD_CLASS_RANGE);
1031 return JS_FALSE;
1032 }
1033 inRange = JS_FALSE;
1034 } else {
1035 if (src < end - 1) {
1036 if (*src == '-') {
1037 ++src;
1038 inRange = JS_TRUE;
1039 rangeStart = (jschar)localMax;
1040 continue;
1041 }
1042 }
1043 }
1044 if (state->flags & JSREG_FOLD) {
1045 c = JS_MAX(upcase((jschar)localMax), downcase((jschar)localMax));
1046 if (c > localMax)
1047 localMax = c;
1048 }
1049 if (localMax > max)
1050 max = localMax;
1051 }
1052 target->u.ucclass.bmsize = max;
1053 return JS_TRUE;
1054 }
1056 /*
1057 * item: assertion An item is either an assertion or
1058 * quantatom a quantified atom.
1059 *
1060 * assertion: '^' Assertions match beginning of string
1061 * (or line if the class static property
1062 * RegExp.multiline is true).
1063 * '$' End of string (or line if the class
1064 * static property RegExp.multiline is
1065 * true).
1066 * '\b' Word boundary (between \w and \W).
1067 * '\B' Word non-boundary.
1068 *
1069 * quantatom: atom An unquantified atom.
1070 * quantatom '{' n ',' m '}'
1071 * Atom must occur between n and m times.
1072 * quantatom '{' n ',' '}' Atom must occur at least n times.
1073 * quantatom '{' n '}' Atom must occur exactly n times.
1074 * quantatom '*' Zero or more times (same as {0,}).
1075 * quantatom '+' One or more times (same as {1,}).
1076 * quantatom '?' Zero or one time (same as {0,1}).
1077 *
1078 * any of which can be optionally followed by '?' for ungreedy
1079 *
1080 * atom: '(' regexp ')' A parenthesized regexp (what matched
1081 * can be addressed using a backreference,
1082 * see '\' n below).
1083 * '.' Matches any char except '\n'.
1084 * '[' classlist ']' A character class.
1085 * '[' '^' classlist ']' A negated character class.
1086 * '\f' Form Feed.
1087 * '\n' Newline (Line Feed).
1088 * '\r' Carriage Return.
1089 * '\t' Horizontal Tab.
1090 * '\v' Vertical Tab.
1091 * '\d' A digit (same as [0-9]).
1092 * '\D' A non-digit.
1093 * '\w' A word character, [0-9a-z_A-Z].
1094 * '\W' A non-word character.
1095 * '\s' A whitespace character, [ \b\f\n\r\t\v].
1096 * '\S' A non-whitespace character.
1097 * '\' n A backreference to the nth (n decimal
1098 * and positive) parenthesized expression.
1099 * '\' octal An octal escape sequence (octal must be
1100 * two or three digits long, unless it is
1101 * 0 for the null character).
1102 * '\x' hex A hex escape (hex must be two digits).
1103 * '\u' unicode A unicode escape (must be four digits).
1104 * '\c' ctrl A control character, ctrl is a letter.
1105 * '\' literalatomchar Any character except one of the above
1106 * that follow '\' in an atom.
1107 * otheratomchar Any character not first among the other
1108 * atom right-hand sides.
1109 */
1110 static JSBool
1111 ParseTerm(CompilerState *state)
1112 {
1113 jschar c = *state->cp++;
1114 uintN nDigits;
1115 uintN num, tmp, n, i;
1116 const jschar *termStart;
1118 switch (c) {
1119 /* assertions and atoms */
1120 case '^':
1121 state->result = NewRENode(state, REOP_BOL);
1122 if (!state->result)
1123 return JS_FALSE;
1124 state->progLength++;
1125 return JS_TRUE;
1126 case '$':
1127 state->result = NewRENode(state, REOP_EOL);
1128 if (!state->result)
1129 return JS_FALSE;
1130 state->progLength++;
1131 return JS_TRUE;
1132 case '\\':
1133 if (state->cp >= state->cpend) {
1134 /* a trailing '\' is an error */
1135 js_ReportCompileErrorNumber(state->context, state->tokenStream,
1136 JSREPORT_TS | JSREPORT_ERROR,
1137 JSMSG_TRAILING_SLASH);
1138 return JS_FALSE;
1139 }
1140 c = *state->cp++;
1141 switch (c) {
1142 /* assertion escapes */
1143 case 'b' :
1144 state->result = NewRENode(state, REOP_WBDRY);
1145 if (!state->result)
1146 return JS_FALSE;
1147 state->progLength++;
1148 return JS_TRUE;
1149 case 'B':
1150 state->result = NewRENode(state, REOP_WNONBDRY);
1151 if (!state->result)
1152 return JS_FALSE;
1153 state->progLength++;
1154 return JS_TRUE;
1155 /* Decimal escape */
1156 case '0':
1157 /* Give a strict warning. See also the note below. */
1158 if (!js_ReportCompileErrorNumber(state->context,
1159 state->tokenStream,
1160 JSREPORT_TS |
1161 JSREPORT_WARNING |
1162 JSREPORT_STRICT,
1163 JSMSG_INVALID_BACKREF)) {
1164 return JS_FALSE;
1165 }
1166 doOctal:
1167 num = 0;
1168 while (state->cp < state->cpend) {
1169 c = *state->cp;
1170 if (c < '0' || '7' < c)
1171 break;
1172 state->cp++;
1173 tmp = 8 * num + (uintN)JS7_UNDEC(c);
1174 if (tmp > 0377)
1175 break;
1176 num = tmp;
1177 }
1178 c = (jschar)num;
1179 doFlat:
1180 state->result = NewRENode(state, REOP_FLAT);
1181 if (!state->result)
1182 return JS_FALSE;
1183 state->result->u.flat.chr = c;
1184 state->result->u.flat.length = 1;
1185 state->progLength += 3;
1186 break;
1187 case '1':
1188 case '2':
1189 case '3':
1190 case '4':
1191 case '5':
1192 case '6':
1193 case '7':
1194 case '8':
1195 case '9':
1196 termStart = state->cp - 1;
1197 num = GetDecimalValue(c, state->parenCount, FindParenCount, state);
1198 if (state->flags & JSREG_FIND_PAREN_ERROR)
1199 return JS_FALSE;
1200 if (num == OVERFLOW_VALUE) {
1201 /* Give a strict mode warning. */
1202 if (!js_ReportCompileErrorNumber(state->context,
1203 state->tokenStream,
1204 JSREPORT_TS |
1205 JSREPORT_WARNING |
1206 JSREPORT_STRICT,
1207 (c >= '8')
1208 ? JSMSG_INVALID_BACKREF
1209 : JSMSG_BAD_BACKREF)) {
1210 return JS_FALSE;
1211 }
1213 /*
1214 * Note: ECMA 262, 15.10.2.9 says that we should throw a syntax
1215 * error here. However, for compatibility with IE, we treat the
1216 * whole backref as flat if the first character in it is not a
1217 * valid octal character, and as an octal escape otherwise.
1218 */
1219 state->cp = termStart;
1220 if (c >= '8') {
1221 /* Treat this as flat. termStart - 1 is the \. */
1222 c = '\\';
1223 goto asFlat;
1224 }
1226 /* Treat this as an octal escape. */
1227 goto doOctal;
1228 }
1229 JS_ASSERT(1 <= num && num <= 0x10000);
1230 state->result = NewRENode(state, REOP_BACKREF);
1231 if (!state->result)
1232 return JS_FALSE;
1233 state->result->u.parenIndex = num - 1;
1234 state->progLength
1235 += 1 + GetCompactIndexWidth(state->result->u.parenIndex);
1236 break;
1237 /* Control escape */
1238 case 'f':
1239 c = 0xC;
1240 goto doFlat;
1241 case 'n':
1242 c = 0xA;
1243 goto doFlat;
1244 case 'r':
1245 c = 0xD;
1246 goto doFlat;
1247 case 't':
1248 c = 0x9;
1249 goto doFlat;
1250 case 'v':
1251 c = 0xB;
1252 goto doFlat;
1253 /* Control letter */
1254 case 'c':
1255 if (state->cp + 1 < state->cpend && RE_IS_LETTER(state->cp[1])) {
1256 c = (jschar) (*state->cp++ & 0x1F);
1257 } else {
1258 /* back off to accepting the original '\' as a literal */
1259 --state->cp;
1260 c = '\\';
1261 }
1262 goto doFlat;
1263 /* HexEscapeSequence */
1264 case 'x':
1265 nDigits = 2;
1266 goto lexHex;
1267 /* UnicodeEscapeSequence */
1268 case 'u':
1269 nDigits = 4;
1270 lexHex:
1271 n = 0;
1272 for (i = 0; i < nDigits && state->cp < state->cpend; i++) {
1273 uintN digit;
1274 c = *state->cp++;
1275 if (!isASCIIHexDigit(c, &digit)) {
1276 /*
1277 * Back off to accepting the original 'u' or 'x' as a
1278 * literal.
1279 */
1280 state->cp -= i + 2;
1281 n = *state->cp++;
1282 break;
1283 }
1284 n = (n << 4) | digit;
1285 }
1286 c = (jschar) n;
1287 goto doFlat;
1288 /* Character class escapes */
1289 case 'd':
1290 state->result = NewRENode(state, REOP_DIGIT);
1291 doSimple:
1292 if (!state->result)
1293 return JS_FALSE;
1294 state->progLength++;
1295 break;
1296 case 'D':
1297 state->result = NewRENode(state, REOP_NONDIGIT);
1298 goto doSimple;
1299 case 's':
1300 state->result = NewRENode(state, REOP_SPACE);
1301 goto doSimple;
1302 case 'S':
1303 state->result = NewRENode(state, REOP_NONSPACE);
1304 goto doSimple;
1305 case 'w':
1306 state->result = NewRENode(state, REOP_ALNUM);
1307 goto doSimple;
1308 case 'W':
1309 state->result = NewRENode(state, REOP_NONALNUM);
1310 goto doSimple;
1311 /* IdentityEscape */
1312 default:
1313 state->result = NewRENode(state, REOP_FLAT);
1314 if (!state->result)
1315 return JS_FALSE;
1316 state->result->u.flat.chr = c;
1317 state->result->u.flat.length = 1;
1318 state->result->kid = (void *) (state->cp - 1);
1319 state->progLength += 3;
1320 break;
1321 }
1322 break;
1323 case '[':
1324 state->result = NewRENode(state, REOP_CLASS);
1325 if (!state->result)
1326 return JS_FALSE;
1327 termStart = state->cp;
1328 state->result->u.ucclass.startIndex = termStart - state->cpbegin;
1329 for (;;) {
1330 if (state->cp == state->cpend) {
1331 js_ReportCompileErrorNumberUC(state->context, state->tokenStream,
1332 JSREPORT_TS | JSREPORT_ERROR,
1333 JSMSG_UNTERM_CLASS, termStart);
1335 return JS_FALSE;
1336 }
1337 if (*state->cp == '\\') {
1338 state->cp++;
1339 if (state->cp != state->cpend)
1340 state->cp++;
1341 continue;
1342 }
1343 if (*state->cp == ']') {
1344 state->result->u.ucclass.kidlen = state->cp - termStart;
1345 break;
1346 }
1347 state->cp++;
1348 }
1349 for (i = 0; i < CLASS_CACHE_SIZE; i++) {
1350 if (!state->classCache[i].start) {
1351 state->classCache[i].start = termStart;
1352 state->classCache[i].length = state->result->u.ucclass.kidlen;
1353 state->classCache[i].index = state->classCount;
1354 break;
1355 }
1356 if (state->classCache[i].length ==
1357 state->result->u.ucclass.kidlen) {
1358 for (n = 0; ; n++) {
1359 if (n == state->classCache[i].length) {
1360 state->result->u.ucclass.index
1361 = state->classCache[i].index;
1362 goto claim;
1363 }
1364 if (state->classCache[i].start[n] != termStart[n])
1365 break;
1366 }
1367 }
1368 }
1369 state->result->u.ucclass.index = state->classCount++;
1371 claim:
1372 /*
1373 * Call CalculateBitmapSize now as we want any errors it finds
1374 * to be reported during the parse phase, not at execution.
1375 */
1376 if (!CalculateBitmapSize(state, state->result, termStart, state->cp++))
1377 return JS_FALSE;
1378 /*
1379 * Update classBitmapsMem with number of bytes to hold bmsize bits,
1380 * which is (bitsCount + 7) / 8 or (highest_bit + 1 + 7) / 8
1381 * or highest_bit / 8 + 1 where highest_bit is u.ucclass.bmsize.
1382 */
1383 n = (state->result->u.ucclass.bmsize >> 3) + 1;
1384 if (n > CLASS_BITMAPS_MEM_LIMIT - state->classBitmapsMem) {
1385 js_ReportCompileErrorNumber(state->context, state->tokenStream,
1386 JSREPORT_TS | JSREPORT_ERROR,
1387 JSMSG_REGEXP_TOO_COMPLEX);
1388 return JS_FALSE;
1389 }
1390 state->classBitmapsMem += n;
1391 /* CLASS, <index> */
1392 state->progLength
1393 += 1 + GetCompactIndexWidth(state->result->u.ucclass.index);
1394 break;
1396 case '.':
1397 state->result = NewRENode(state, REOP_DOT);
1398 goto doSimple;
1400 case '{':
1401 {
1402 const jschar *errp = state->cp--;
1403 intN err;
1405 err = ParseMinMaxQuantifier(state, JS_TRUE);
1406 state->cp = errp;
1408 if (err < 0)
1409 goto asFlat;
1411 /* FALL THROUGH */
1412 }
1413 case '*':
1414 case '+':
1415 case '?':
1416 js_ReportCompileErrorNumberUC(state->context, state->tokenStream,
1417 JSREPORT_TS | JSREPORT_ERROR,
1418 JSMSG_BAD_QUANTIFIER, state->cp - 1);
1419 return JS_FALSE;
1420 default:
1421 asFlat:
1422 state->result = NewRENode(state, REOP_FLAT);
1423 if (!state->result)
1424 return JS_FALSE;
1425 state->result->u.flat.chr = c;
1426 state->result->u.flat.length = 1;
1427 state->result->kid = (void *) (state->cp - 1);
1428 state->progLength += 3;
1429 break;
1430 }
1431 return ParseQuantifier(state);
1432 }
1434 static JSBool
1435 ParseQuantifier(CompilerState *state)
1436 {
1437 RENode *term;
1438 term = state->result;
1439 if (state->cp < state->cpend) {
1440 switch (*state->cp) {
1441 case '+':
1442 state->result = NewRENode(state, REOP_QUANT);
1443 if (!state->result)
1444 return JS_FALSE;
1445 state->result->u.range.min = 1;
1446 state->result->u.range.max = (uintN)-1;
1447 /* <PLUS>, <next> ... <ENDCHILD> */
1448 state->progLength += 4;
1449 goto quantifier;
1450 case '*':
1451 state->result = NewRENode(state, REOP_QUANT);
1452 if (!state->result)
1453 return JS_FALSE;
1454 state->result->u.range.min = 0;
1455 state->result->u.range.max = (uintN)-1;
1456 /* <STAR>, <next> ... <ENDCHILD> */
1457 state->progLength += 4;
1458 goto quantifier;
1459 case '?':
1460 state->result = NewRENode(state, REOP_QUANT);
1461 if (!state->result)
1462 return JS_FALSE;
1463 state->result->u.range.min = 0;
1464 state->result->u.range.max = 1;
1465 /* <OPT>, <next> ... <ENDCHILD> */
1466 state->progLength += 4;
1467 goto quantifier;
1468 case '{': /* balance '}' */
1469 {
1470 intN err;
1471 const jschar *errp = state->cp;
1473 err = ParseMinMaxQuantifier(state, JS_FALSE);
1474 if (err == 0)
1475 goto quantifier;
1476 if (err == -1)
1477 return JS_TRUE;
1479 js_ReportCompileErrorNumberUC(state->context,
1480 state->tokenStream,
1481 JSREPORT_TS | JSREPORT_ERROR,
1482 err, errp);
1483 return JS_FALSE;
1484 }
1485 default:;
1486 }
1487 }
1488 return JS_TRUE;
1490 quantifier:
1491 if (state->treeDepth == TREE_DEPTH_MAX) {
1492 js_ReportCompileErrorNumber(state->context, state->tokenStream,
1493 JSREPORT_TS | JSREPORT_ERROR,
1494 JSMSG_REGEXP_TOO_COMPLEX);
1495 return JS_FALSE;
1496 }
1498 ++state->treeDepth;
1499 ++state->cp;
1500 state->result->kid = term;
1501 if (state->cp < state->cpend && *state->cp == '?') {
1502 ++state->cp;
1503 state->result->u.range.greedy = JS_FALSE;
1504 } else {
1505 state->result->u.range.greedy = JS_TRUE;
1506 }
1507 return JS_TRUE;
1508 }
1510 static intN
1511 ParseMinMaxQuantifier(CompilerState *state, JSBool ignoreValues)
1512 {
1513 uintN min, max;
1514 jschar c;
1515 const jschar *errp = state->cp++;
1517 c = *state->cp;
1518 if (JS7_ISDEC(c)) {
1519 ++state->cp;
1520 min = GetDecimalValue(c, 0xFFFF, NULL, state);
1521 c = *state->cp;
1523 if (!ignoreValues && min == OVERFLOW_VALUE)
1524 return JSMSG_MIN_TOO_BIG;
1526 if (c == ',') {
1527 c = *++state->cp;
1528 if (JS7_ISDEC(c)) {
1529 ++state->cp;
1530 max = GetDecimalValue(c, 0xFFFF, NULL, state);
1531 c = *state->cp;
1532 if (!ignoreValues && max == OVERFLOW_VALUE)
1533 return JSMSG_MAX_TOO_BIG;
1534 if (!ignoreValues && min > max)
1535 return JSMSG_OUT_OF_ORDER;
1536 } else {
1537 max = (uintN)-1;
1538 }
1539 } else {
1540 max = min;
1541 }
1542 if (c == '}') {
1543 state->result = NewRENode(state, REOP_QUANT);
1544 if (!state->result)
1545 return JS_FALSE;
1546 state->result->u.range.min = min;
1547 state->result->u.range.max = max;
1548 /*
1549 * QUANT, <min>, <max>, <next> ... <ENDCHILD>
1550 * where <max> is written as compact(max+1) to make
1551 * (uintN)-1 sentinel to occupy 1 byte, not width_of(max)+1.
1552 */
1553 state->progLength += (1 + GetCompactIndexWidth(min)
1554 + GetCompactIndexWidth(max + 1)
1555 +3);
1556 return 0;
1557 }
1558 }
1560 state->cp = errp;
1561 return -1;
1562 }
1564 static JSBool
1565 SetForwardJumpOffset(jsbytecode *jump, jsbytecode *target)
1566 {
1567 ptrdiff_t offset = target - jump;
1569 /* Check that target really points forward. */
1570 JS_ASSERT(offset >= 2);
1571 if ((size_t)offset > OFFSET_MAX)
1572 return JS_FALSE;
1574 jump[0] = JUMP_OFFSET_HI(offset);
1575 jump[1] = JUMP_OFFSET_LO(offset);
1576 return JS_TRUE;
1577 }
1579 /*
1580 * Generate bytecode for the tree rooted at t using an explicit stack instead
1581 * of recursion.
1582 */
1583 static jsbytecode *
1584 EmitREBytecode(CompilerState *state, JSRegExp *re, size_t treeDepth,
1585 jsbytecode *pc, RENode *t)
1586 {
1587 EmitStateStackEntry *emitStateSP, *emitStateStack;
1588 RECharSet *charSet;
1589 REOp op;
1591 if (treeDepth == 0) {
1592 emitStateStack = NULL;
1593 } else {
1594 emitStateStack =
1595 (EmitStateStackEntry *)JS_malloc(state->context,
1596 sizeof(EmitStateStackEntry) *
1597 treeDepth);
1598 if (!emitStateStack)
1599 return NULL;
1600 }
1601 emitStateSP = emitStateStack;
1602 op = t->op;
1604 for (;;) {
1605 *pc++ = op;
1606 switch (op) {
1607 case REOP_EMPTY:
1608 --pc;
1609 break;
1611 case REOP_ALTPREREQ2:
1612 case REOP_ALTPREREQ:
1613 JS_ASSERT(emitStateSP);
1614 emitStateSP->altHead = pc - 1;
1615 emitStateSP->endTermFixup = pc;
1616 pc += OFFSET_LEN;
1617 SET_ARG(pc, t->u.altprereq.ch1);
1618 pc += ARG_LEN;
1619 SET_ARG(pc, t->u.altprereq.ch2);
1620 pc += ARG_LEN;
1622 emitStateSP->nextAltFixup = pc; /* offset to next alternate */
1623 pc += OFFSET_LEN;
1625 emitStateSP->continueNode = t;
1626 emitStateSP->continueOp = REOP_JUMP;
1627 emitStateSP->jumpToJumpFlag = JS_FALSE;
1628 ++emitStateSP;
1629 JS_ASSERT((size_t)(emitStateSP - emitStateStack) <= treeDepth);
1630 t = (RENode *) t->kid;
1631 op = t->op;
1632 continue;
1634 case REOP_JUMP:
1635 emitStateSP->nextTermFixup = pc; /* offset to following term */
1636 pc += OFFSET_LEN;
1637 if (!SetForwardJumpOffset(emitStateSP->nextAltFixup, pc))
1638 goto jump_too_big;
1639 emitStateSP->continueOp = REOP_ENDALT;
1640 ++emitStateSP;
1641 JS_ASSERT((size_t)(emitStateSP - emitStateStack) <= treeDepth);
1642 t = t->u.kid2;
1643 op = t->op;
1644 continue;
1646 case REOP_ENDALT:
1647 /*
1648 * If we already patched emitStateSP->nextTermFixup to jump to
1649 * a nearer jump, to avoid 16-bit immediate offset overflow, we
1650 * are done here.
1651 */
1652 if (emitStateSP->jumpToJumpFlag)
1653 break;
1655 /*
1656 * Fix up the REOP_JUMP offset to go to the op after REOP_ENDALT.
1657 * REOP_ENDALT is executed only on successful match of the last
1658 * alternate in a group.
1659 */
1660 if (!SetForwardJumpOffset(emitStateSP->nextTermFixup, pc))
1661 goto jump_too_big;
1662 if (t->op != REOP_ALT) {
1663 if (!SetForwardJumpOffset(emitStateSP->endTermFixup, pc))
1664 goto jump_too_big;
1665 }
1667 /*
1668 * If the program is bigger than the REOP_JUMP offset range, then
1669 * we must check for alternates before this one that are part of
1670 * the same group, and fix up their jump offsets to target jumps
1671 * close enough to fit in a 16-bit unsigned offset immediate.
1672 */
1673 if ((size_t)(pc - re->program) > OFFSET_MAX &&
1674 emitStateSP > emitStateStack) {
1675 EmitStateStackEntry *esp, *esp2;
1676 jsbytecode *alt, *jump;
1677 ptrdiff_t span, header;
1679 esp2 = emitStateSP;
1680 alt = esp2->altHead;
1681 for (esp = esp2 - 1; esp >= emitStateStack; --esp) {
1682 if (esp->continueOp == REOP_ENDALT &&
1683 !esp->jumpToJumpFlag &&
1684 esp->nextTermFixup + OFFSET_LEN == alt &&
1685 (size_t)(pc - ((esp->continueNode->op != REOP_ALT)
1686 ? esp->endTermFixup
1687 : esp->nextTermFixup)) > OFFSET_MAX) {
1688 alt = esp->altHead;
1689 jump = esp->nextTermFixup;
1691 /*
1692 * The span must be 1 less than the distance from
1693 * jump offset to jump offset, so we actually jump
1694 * to a REOP_JUMP bytecode, not to its offset!
1695 */
1696 for (;;) {
1697 JS_ASSERT(jump < esp2->nextTermFixup);
1698 span = esp2->nextTermFixup - jump - 1;
1699 if ((size_t)span <= OFFSET_MAX)
1700 break;
1701 do {
1702 if (--esp2 == esp)
1703 goto jump_too_big;
1704 } while (esp2->continueOp != REOP_ENDALT);
1705 }
1707 jump[0] = JUMP_OFFSET_HI(span);
1708 jump[1] = JUMP_OFFSET_LO(span);
1710 if (esp->continueNode->op != REOP_ALT) {
1711 /*
1712 * We must patch the offset at esp->endTermFixup
1713 * as well, for the REOP_ALTPREREQ{,2} opcodes.
1714 * If we're unlucky and endTermFixup is more than
1715 * OFFSET_MAX bytes from its target, we cheat by
1716 * jumping 6 bytes to the jump whose offset is at
1717 * esp->nextTermFixup, which has the same target.
1718 */
1719 jump = esp->endTermFixup;
1720 header = esp->nextTermFixup - jump;
1721 span += header;
1722 if ((size_t)span > OFFSET_MAX)
1723 span = header;
1725 jump[0] = JUMP_OFFSET_HI(span);
1726 jump[1] = JUMP_OFFSET_LO(span);
1727 }
1729 esp->jumpToJumpFlag = JS_TRUE;
1730 }
1731 }
1732 }
1733 break;
1735 case REOP_ALT:
1736 JS_ASSERT(emitStateSP);
1737 emitStateSP->altHead = pc - 1;
1738 emitStateSP->nextAltFixup = pc; /* offset to next alternate */
1739 pc += OFFSET_LEN;
1740 emitStateSP->continueNode = t;
1741 emitStateSP->continueOp = REOP_JUMP;
1742 emitStateSP->jumpToJumpFlag = JS_FALSE;
1743 ++emitStateSP;
1744 JS_ASSERT((size_t)(emitStateSP - emitStateStack) <= treeDepth);
1745 t = t->kid;
1746 op = t->op;
1747 continue;
1749 case REOP_FLAT:
1750 /*
1751 * Coalesce FLATs if possible and if it would not increase bytecode
1752 * beyond preallocated limit. The latter happens only when bytecode
1753 * size for coalesced string with offset p and length 2 exceeds 6
1754 * bytes preallocated for 2 single char nodes, i.e. when
1755 * 1 + GetCompactIndexWidth(p) + GetCompactIndexWidth(2) > 6 or
1756 * GetCompactIndexWidth(p) > 4.
1757 * Since when GetCompactIndexWidth(p) <= 4 coalescing of 3 or more
1758 * nodes strictly decreases bytecode size, the check has to be
1759 * done only for the first coalescing.
1760 */
1761 if (t->kid &&
1762 GetCompactIndexWidth((jschar *)t->kid - state->cpbegin) <= 4)
1763 {
1764 while (t->next &&
1765 t->next->op == REOP_FLAT &&
1766 (jschar*)t->kid + t->u.flat.length ==
1767 (jschar*)t->next->kid) {
1768 t->u.flat.length += t->next->u.flat.length;
1769 t->next = t->next->next;
1770 }
1771 }
1772 if (t->kid && t->u.flat.length > 1) {
1773 pc[-1] = (state->flags & JSREG_FOLD) ? REOP_FLATi : REOP_FLAT;
1774 pc = WriteCompactIndex(pc, (jschar *)t->kid - state->cpbegin);
1775 pc = WriteCompactIndex(pc, t->u.flat.length);
1776 } else if (t->u.flat.chr < 256) {
1777 pc[-1] = (state->flags & JSREG_FOLD) ? REOP_FLAT1i : REOP_FLAT1;
1778 *pc++ = (jsbytecode) t->u.flat.chr;
1779 } else {
1780 pc[-1] = (state->flags & JSREG_FOLD)
1781 ? REOP_UCFLAT1i
1782 : REOP_UCFLAT1;
1783 SET_ARG(pc, t->u.flat.chr);
1784 pc += ARG_LEN;
1785 }
1786 break;
1788 case REOP_LPAREN:
1789 JS_ASSERT(emitStateSP);
1790 pc = WriteCompactIndex(pc, t->u.parenIndex);
1791 emitStateSP->continueNode = t;
1792 emitStateSP->continueOp = REOP_RPAREN;
1793 ++emitStateSP;
1794 JS_ASSERT((size_t)(emitStateSP - emitStateStack) <= treeDepth);
1795 t = (RENode *) t->kid;
1796 op = t->op;
1797 continue;
1799 case REOP_RPAREN:
1800 pc = WriteCompactIndex(pc, t->u.parenIndex);
1801 break;
1803 case REOP_BACKREF:
1804 pc = WriteCompactIndex(pc, t->u.parenIndex);
1805 break;
1807 case REOP_ASSERT:
1808 JS_ASSERT(emitStateSP);
1809 emitStateSP->nextTermFixup = pc;
1810 pc += OFFSET_LEN;
1811 emitStateSP->continueNode = t;
1812 emitStateSP->continueOp = REOP_ASSERTTEST;
1813 ++emitStateSP;
1814 JS_ASSERT((size_t)(emitStateSP - emitStateStack) <= treeDepth);
1815 t = (RENode *) t->kid;
1816 op = t->op;
1817 continue;
1819 case REOP_ASSERTTEST:
1820 case REOP_ASSERTNOTTEST:
1821 if (!SetForwardJumpOffset(emitStateSP->nextTermFixup, pc))
1822 goto jump_too_big;
1823 break;
1825 case REOP_ASSERT_NOT:
1826 JS_ASSERT(emitStateSP);
1827 emitStateSP->nextTermFixup = pc;
1828 pc += OFFSET_LEN;
1829 emitStateSP->continueNode = t;
1830 emitStateSP->continueOp = REOP_ASSERTNOTTEST;
1831 ++emitStateSP;
1832 JS_ASSERT((size_t)(emitStateSP - emitStateStack) <= treeDepth);
1833 t = (RENode *) t->kid;
1834 op = t->op;
1835 continue;
1837 case REOP_QUANT:
1838 JS_ASSERT(emitStateSP);
1839 if (t->u.range.min == 0 && t->u.range.max == (uintN)-1) {
1840 pc[-1] = (t->u.range.greedy) ? REOP_STAR : REOP_MINIMALSTAR;
1841 } else if (t->u.range.min == 0 && t->u.range.max == 1) {
1842 pc[-1] = (t->u.range.greedy) ? REOP_OPT : REOP_MINIMALOPT;
1843 } else if (t->u.range.min == 1 && t->u.range.max == (uintN) -1) {
1844 pc[-1] = (t->u.range.greedy) ? REOP_PLUS : REOP_MINIMALPLUS;
1845 } else {
1846 if (!t->u.range.greedy)
1847 pc[-1] = REOP_MINIMALQUANT;
1848 pc = WriteCompactIndex(pc, t->u.range.min);
1849 /*
1850 * Write max + 1 to avoid using size_t(max) + 1 bytes
1851 * for (uintN)-1 sentinel.
1852 */
1853 pc = WriteCompactIndex(pc, t->u.range.max + 1);
1854 }
1855 emitStateSP->nextTermFixup = pc;
1856 pc += OFFSET_LEN;
1857 emitStateSP->continueNode = t;
1858 emitStateSP->continueOp = REOP_ENDCHILD;
1859 ++emitStateSP;
1860 JS_ASSERT((size_t)(emitStateSP - emitStateStack) <= treeDepth);
1861 t = (RENode *) t->kid;
1862 op = t->op;
1863 continue;
1865 case REOP_ENDCHILD:
1866 if (!SetForwardJumpOffset(emitStateSP->nextTermFixup, pc))
1867 goto jump_too_big;
1868 break;
1870 case REOP_CLASS:
1871 if (!t->u.ucclass.sense)
1872 pc[-1] = REOP_NCLASS;
1873 pc = WriteCompactIndex(pc, t->u.ucclass.index);
1874 charSet = &re->classList[t->u.ucclass.index];
1875 charSet->converted = JS_FALSE;
1876 charSet->length = t->u.ucclass.bmsize;
1877 charSet->u.src.startIndex = t->u.ucclass.startIndex;
1878 charSet->u.src.length = t->u.ucclass.kidlen;
1879 charSet->sense = t->u.ucclass.sense;
1880 break;
1882 default:
1883 break;
1884 }
1886 t = t->next;
1887 if (t) {
1888 op = t->op;
1889 } else {
1890 if (emitStateSP == emitStateStack)
1891 break;
1892 --emitStateSP;
1893 t = emitStateSP->continueNode;
1894 op = emitStateSP->continueOp;
1895 }
1896 }
1898 cleanup:
1899 if (emitStateStack)
1900 JS_free(state->context, emitStateStack);
1901 return pc;
1903 jump_too_big:
1904 js_ReportCompileErrorNumber(state->context, state->tokenStream,
1905 JSREPORT_TS | JSREPORT_ERROR,
1906 JSMSG_REGEXP_TOO_COMPLEX);
1907 pc = NULL;
1908 goto cleanup;
1909 }
1912 JSRegExp *
1913 js_NewRegExp(JSContext *cx, JSTokenStream *ts,
1914 JSString *str, uintN flags, JSBool flat)
1915 {
1916 JSRegExp *re;
1917 void *mark;
1918 CompilerState state;
1919 size_t resize;
1920 jsbytecode *endPC;
1921 uintN i;
1922 size_t len;
1924 re = NULL;
1925 mark = JS_ARENA_MARK(&cx->tempPool);
1926 len = JSSTRING_LENGTH(str);
1928 state.context = cx;
1929 state.tokenStream = ts;
1930 state.cpbegin = state.cp = JSSTRING_CHARS(str);
1931 state.cpend = state.cp + len;
1932 state.flags = flags;
1933 state.parenCount = 0;
1934 state.classCount = 0;
1935 state.progLength = 0;
1936 state.treeDepth = 0;
1937 state.classBitmapsMem = 0;
1938 for (i = 0; i < CLASS_CACHE_SIZE; i++)
1939 state.classCache[i].start = NULL;
1941 if (len != 0 && flat) {
1942 state.result = NewRENode(&state, REOP_FLAT);
1943 state.result->u.flat.chr = *state.cpbegin;
1944 state.result->u.flat.length = len;
1945 state.result->kid = (void *) state.cpbegin;
1946 /* Flat bytecode: REOP_FLAT compact(string_offset) compact(len). */
1947 state.progLength += 1 + GetCompactIndexWidth(0)
1948 + GetCompactIndexWidth(len);
1949 } else {
1950 if (!ParseRegExp(&state))
1951 goto out;
1952 }
1953 resize = offsetof(JSRegExp, program) + state.progLength + 1;
1954 re = (JSRegExp *) JS_malloc(cx, resize);
1955 if (!re)
1956 goto out;
1958 re->nrefs = 1;
1959 JS_ASSERT(state.classBitmapsMem <= CLASS_BITMAPS_MEM_LIMIT);
1960 re->classCount = state.classCount;
1961 if (re->classCount) {
1962 re->classList = (RECharSet *)
1963 JS_malloc(cx, re->classCount * sizeof(RECharSet));
1964 if (!re->classList) {
1965 js_DestroyRegExp(cx, re);
1966 re = NULL;
1967 goto out;
1968 }
1969 } else {
1970 re->classList = NULL;
1971 }
1972 endPC = EmitREBytecode(&state, re, state.treeDepth, re->program, state.result);
1973 if (!endPC) {
1974 js_DestroyRegExp(cx, re);
1975 re = NULL;
1976 goto out;
1977 }
1978 *endPC++ = REOP_END;
1979 /*
1980 * Check whether size was overestimated and shrink using realloc.
1981 * This is safe since no pointers to newly parsed regexp or its parts
1982 * besides re exist here.
1983 */
1984 if ((size_t)(endPC - re->program) != state.progLength + 1) {
1985 JSRegExp *tmp;
1986 JS_ASSERT((size_t)(endPC - re->program) < state.progLength + 1);
1987 resize = offsetof(JSRegExp, program) + (endPC - re->program);
1988 tmp = (JSRegExp *) JS_realloc(cx, re, resize);
1989 if (tmp)
1990 re = tmp;
1991 }
1993 re->flags = flags;
1994 re->cloneIndex = 0;
1995 re->parenCount = state.parenCount;
1996 re->source = str;
1998 out:
1999 JS_ARENA_RELEASE(&cx->tempPool, mark);
2000 return re;
2001 }
2003 JSRegExp *
2004 js_NewRegExpOpt(JSContext *cx, JSTokenStream *ts,
2005 JSString *str, JSString *opt, JSBool flat)
2006 {
2007 uintN flags;
2008 jschar *s;
2009 size_t i, n;
2010 char charBuf[2];
2012 flags = 0;
2013 if (opt) {
2014 s = JSSTRING_CHARS(opt);
2015 for (i = 0, n = JSSTRING_LENGTH(opt); i < n; i++) {
2016 switch (s[i]) {
2017 case 'g':
2018 flags |= JSREG_GLOB;
2019 break;
2020 case 'i':
2021 flags |= JSREG_FOLD;
2022 break;
2023 case 'm':
2024 flags |= JSREG_MULTILINE;
2025 break;
2026 default:
2027 charBuf[0] = (char)s[i];
2028 charBuf[1] = '\0';
2029 js_ReportCompileErrorNumber(cx, ts,
2030 JSREPORT_TS | JSREPORT_ERROR,
2031 JSMSG_BAD_FLAG, charBuf);
2032 return NULL;
2033 }
2034 }
2035 }
2036 return js_NewRegExp(cx, ts, str, flags, flat);
2037 }
2039 /*
2040 * Save the current state of the match - the position in the input
2041 * text as well as the position in the bytecode. The state of any
2042 * parent expressions is also saved (preceding state).
2043 * Contents of parenCount parentheses from parenIndex are also saved.
2044 */
2045 static REBackTrackData *
2046 PushBackTrackState(REGlobalData *gData, REOp op,
2047 jsbytecode *target, REMatchState *x, const jschar *cp,
2048 size_t parenIndex, size_t parenCount)
2049 {
2050 size_t i;
2051 REBackTrackData *result =
2052 (REBackTrackData *) ((char *)gData->backTrackSP + gData->cursz);
2054 size_t sz = sizeof(REBackTrackData) +
2055 gData->stateStackTop * sizeof(REProgState) +
2056 parenCount * sizeof(RECapture);
2058 ptrdiff_t btsize = gData->backTrackStackSize;
2059 ptrdiff_t btincr = ((char *)result + sz) -
2060 ((char *)gData->backTrackStack + btsize);
2062 if (btincr > 0) {
2063 ptrdiff_t offset = (char *)result - (char *)gData->backTrackStack;
2065 btincr = JS_ROUNDUP(btincr, btsize);
2066 JS_ARENA_GROW_CAST(gData->backTrackStack, REBackTrackData *,
2067 &gData->pool, btsize, btincr);
2068 if (!gData->backTrackStack)
2069 return NULL;
2070 gData->backTrackStackSize = btsize + btincr;
2071 result = (REBackTrackData *) ((char *)gData->backTrackStack + offset);
2072 }
2073 gData->backTrackSP = result;
2074 result->sz = gData->cursz;
2075 gData->cursz = sz;
2077 result->backtrack_op = op;
2078 result->backtrack_pc = target;
2079 result->cp = cp;
2080 result->parenCount = parenCount;
2082 result->saveStateStackTop = gData->stateStackTop;
2083 JS_ASSERT(gData->stateStackTop);
2084 memcpy(result + 1, gData->stateStack,
2085 sizeof(REProgState) * result->saveStateStackTop);
2087 if (parenCount != 0) {
2088 result->parenIndex = parenIndex;
2089 memcpy((char *)(result + 1) +
2090 sizeof(REProgState) * result->saveStateStackTop,
2091 &x->parens[parenIndex],
2092 sizeof(RECapture) * parenCount);
2093 for (i = 0; i != parenCount; i++)
2094 x->parens[parenIndex + i].index = -1;
2095 }
2097 return result;
2098 }
2101 /*
2102 * Consecutive literal characters.
2103 */
2104 #if 0
2105 static REMatchState *
2106 FlatNMatcher(REGlobalData *gData, REMatchState *x, jschar *matchChars,
2107 size_t length)
2108 {
2109 size_t i;
2110 if (length > gData->cpend - x->cp)
2111 return NULL;
2112 for (i = 0; i != length; i++) {
2113 if (matchChars[i] != x->cp[i])
2114 return NULL;
2115 }
2116 x->cp += length;
2117 return x;
2118 }
2119 #endif
2121 static REMatchState *
2122 FlatNIMatcher(REGlobalData *gData, REMatchState *x, jschar *matchChars,
2123 size_t length)
2124 {
2125 size_t i;
2126 JS_ASSERT(gData->cpend >= x->cp);
2127 if (length > (size_t)(gData->cpend - x->cp))
2128 return NULL;
2129 for (i = 0; i != length; i++) {
2130 if (upcase(matchChars[i]) != upcase(x->cp[i]))
2131 return NULL;
2132 }
2133 x->cp += length;
2134 return x;
2135 }
2137 /*
2138 * 1. Evaluate DecimalEscape to obtain an EscapeValue E.
2139 * 2. If E is not a character then go to step 6.
2140 * 3. Let ch be E's character.
2141 * 4. Let A be a one-element RECharSet containing the character ch.
2142 * 5. Call CharacterSetMatcher(A, false) and return its Matcher result.
2143 * 6. E must be an integer. Let n be that integer.
2144 * 7. If n=0 or n>NCapturingParens then throw a SyntaxError exception.
2145 * 8. Return an internal Matcher closure that takes two arguments, a State x
2146 * and a Continuation c, and performs the following:
2147 * 1. Let cap be x's captures internal array.
2148 * 2. Let s be cap[n].
2149 * 3. If s is undefined, then call c(x) and return its result.
2150 * 4. Let e be x's endIndex.
2151 * 5. Let len be s's length.
2152 * 6. Let f be e+len.
2153 * 7. If f>InputLength, return failure.
2154 * 8. If there exists an integer i between 0 (inclusive) and len (exclusive)
2155 * such that Canonicalize(s[i]) is not the same character as
2156 * Canonicalize(Input [e+i]), then return failure.
2157 * 9. Let y be the State (f, cap).
2158 * 10. Call c(y) and return its result.
2159 */
2160 static REMatchState *
2161 BackrefMatcher(REGlobalData *gData, REMatchState *x, size_t parenIndex)
2162 {
2163 size_t len, i;
2164 const jschar *parenContent;
2165 RECapture *cap = &x->parens[parenIndex];
2167 if (cap->index == -1)
2168 return x;
2170 len = cap->length;
2171 if (x->cp + len > gData->cpend)
2172 return NULL;
2174 parenContent = &gData->cpbegin[cap->index];
2175 if (gData->regexp->flags & JSREG_FOLD) {
2176 for (i = 0; i < len; i++) {
2177 if (upcase(parenContent[i]) != upcase(x->cp[i]))
2178 return NULL;
2179 }
2180 } else {
2181 for (i = 0; i < len; i++) {
2182 if (parenContent[i] != x->cp[i])
2183 return NULL;
2184 }
2185 }
2186 x->cp += len;
2187 return x;
2188 }
2191 /* Add a single character to the RECharSet */
2192 static void
2193 AddCharacterToCharSet(RECharSet *cs, jschar c)
2194 {
2195 uintN byteIndex = (uintN)(c >> 3);
2196 JS_ASSERT(c <= cs->length);
2197 cs->u.bits[byteIndex] |= 1 << (c & 0x7);
2198 }
2201 /* Add a character range, c1 to c2 (inclusive) to the RECharSet */
2202 static void
2203 AddCharacterRangeToCharSet(RECharSet *cs, jschar c1, jschar c2)
2204 {
2205 uintN i;
2207 uintN byteIndex1 = (uintN)(c1 >> 3);
2208 uintN byteIndex2 = (uintN)(c2 >> 3);
2210 JS_ASSERT((c2 <= cs->length) && (c1 <= c2));
2212 c1 &= 0x7;
2213 c2 &= 0x7;
2215 if (byteIndex1 == byteIndex2) {
2216 cs->u.bits[byteIndex1] |= ((uint8)0xFF >> (7 - (c2 - c1))) << c1;
2217 } else {
2218 cs->u.bits[byteIndex1] |= 0xFF << c1;
2219 for (i = byteIndex1 + 1; i < byteIndex2; i++)
2220 cs->u.bits[i] = 0xFF;
2221 cs->u.bits[byteIndex2] |= (uint8)0xFF >> (7 - c2);
2222 }
2223 }
2225 /* Compile the source of the class into a RECharSet */
2226 static JSBool
2227 ProcessCharSet(REGlobalData *gData, RECharSet *charSet)
2228 {
2229 const jschar *src, *end;
2230 JSBool inRange = JS_FALSE;
2231 jschar rangeStart = 0;
2232 uintN byteLength, n;
2233 jschar c, thisCh;
2234 intN nDigits, i;
2236 JS_ASSERT(!charSet->converted);
2237 /*
2238 * Assert that startIndex and length points to chars inside [] inside
2239 * source string.
2240 */
2241 JS_ASSERT(1 <= charSet->u.src.startIndex);
2242 JS_ASSERT(charSet->u.src.startIndex
2243 < JSSTRING_LENGTH(gData->regexp->source));
2244 JS_ASSERT(charSet->u.src.length <= JSSTRING_LENGTH(gData->regexp->source)
2245 - 1 - charSet->u.src.startIndex);
2247 charSet->converted = JS_TRUE;
2248 src = JSSTRING_CHARS(gData->regexp->source) + charSet->u.src.startIndex;
2249 end = src + charSet->u.src.length;
2250 JS_ASSERT(src[-1] == '[');
2251 JS_ASSERT(end[0] == ']');
2253 byteLength = (charSet->length >> 3) + 1;
2254 charSet->u.bits = (uint8 *)JS_malloc(gData->cx, byteLength);
2255 if (!charSet->u.bits)
2256 return JS_FALSE;
2257 memset(charSet->u.bits, 0, byteLength);
2259 if (src == end)
2260 return JS_TRUE;
2262 if (*src == '^') {
2263 JS_ASSERT(charSet->sense == JS_FALSE);
2264 ++src;
2265 } else {
2266 JS_ASSERT(charSet->sense == JS_TRUE);
2267 }
2269 while (src != end) {
2270 switch (*src) {
2271 case '\\':
2272 ++src;
2273 c = *src++;
2274 switch (c) {
2275 case 'b':
2276 thisCh = 0x8;
2277 break;
2278 case 'f':
2279 thisCh = 0xC;
2280 break;
2281 case 'n':
2282 thisCh = 0xA;
2283 break;
2284 case 'r':
2285 thisCh = 0xD;
2286 break;
2287 case 't':
2288 thisCh = 0x9;
2289 break;
2290 case 'v':
2291 thisCh = 0xB;
2292 break;
2293 case 'c':
2294 if (src + 1 < end && JS_ISWORD(src[1])) {
2295 thisCh = (jschar)(*src++ & 0x1F);
2296 } else {
2297 --src;
2298 thisCh = '\\';
2299 }
2300 break;
2301 case 'x':
2302 nDigits = 2;
2303 goto lexHex;
2304 case 'u':
2305 nDigits = 4;
2306 lexHex:
2307 n = 0;
2308 for (i = 0; (i < nDigits) && (src < end); i++) {
2309 uintN digit;
2310 c = *src++;
2311 if (!isASCIIHexDigit(c, &digit)) {
2312 /*
2313 * Back off to accepting the original '\'
2314 * as a literal
2315 */
2316 src -= i + 1;
2317 n = '\\';
2318 break;
2319 }
2320 n = (n << 4) | digit;
2321 }
2322 thisCh = (jschar)n;
2323 break;
2324 case '0':
2325 case '1':
2326 case '2':
2327 case '3':
2328 case '4':
2329 case '5':
2330 case '6':
2331 case '7':
2332 /*
2333 * This is a non-ECMA extension - decimal escapes (in this
2334 * case, octal!) are supposed to be an error inside class
2335 * ranges, but supported here for backwards compatibility.
2336 */
2337 n = JS7_UNDEC(c);
2338 c = *src;
2339 if ('0' <= c && c <= '7') {
2340 src++;
2341 n = 8 * n + JS7_UNDEC(c);
2342 c = *src;
2343 if ('0' <= c && c <= '7') {
2344 src++;
2345 i = 8 * n + JS7_UNDEC(c);
2346 if (i <= 0377)
2347 n = i;
2348 else
2349 src--;
2350 }
2351 }
2352 thisCh = (jschar)n;
2353 break;
2355 case 'd':
2356 AddCharacterRangeToCharSet(charSet, '0', '9');
2357 continue; /* don't need range processing */
2358 case 'D':
2359 AddCharacterRangeToCharSet(charSet, 0, '0' - 1);
2360 AddCharacterRangeToCharSet(charSet,
2361 (jschar)('9' + 1),
2362 (jschar)charSet->length);
2363 continue;
2364 case 's':
2365 for (i = (intN)charSet->length; i >= 0; i--)
2366 if (JS_ISSPACE(i))
2367 AddCharacterToCharSet(charSet, (jschar)i);
2368 continue;
2369 case 'S':
2370 for (i = (intN)charSet->length; i >= 0; i--)
2371 if (!JS_ISSPACE(i))
2372 AddCharacterToCharSet(charSet, (jschar)i);
2373 continue;
2374 case 'w':
2375 for (i = (intN)charSet->length; i >= 0; i--)
2376 if (JS_ISWORD(i))
2377 AddCharacterToCharSet(charSet, (jschar)i);
2378 continue;
2379 case 'W':
2380 for (i = (intN)charSet->length; i >= 0; i--)
2381 if (!JS_ISWORD(i))
2382 AddCharacterToCharSet(charSet, (jschar)i);
2383 continue;
2384 default:
2385 thisCh = c;
2386 break;
2388 }
2389 break;
2391 default:
2392 thisCh = *src++;
2393 break;
2395 }
2396 if (inRange) {
2397 if (gData->regexp->flags & JSREG_FOLD) {
2398 AddCharacterRangeToCharSet(charSet, upcase(rangeStart),
2399 upcase(thisCh));
2400 AddCharacterRangeToCharSet(charSet, downcase(rangeStart),
2401 downcase(thisCh));
2402 } else {
2403 AddCharacterRangeToCharSet(charSet, rangeStart, thisCh);
2404 }
2405 inRange = JS_FALSE;
2406 } else {
2407 if (gData->regexp->flags & JSREG_FOLD) {
2408 AddCharacterToCharSet(charSet, upcase(thisCh));
2409 AddCharacterToCharSet(charSet, downcase(thisCh));
2410 } else {
2411 AddCharacterToCharSet(charSet, thisCh);
2412 }
2413 if (src < end - 1) {
2414 if (*src == '-') {
2415 ++src;
2416 inRange = JS_TRUE;
2417 rangeStart = thisCh;
2418 }
2419 }
2420 }
2421 }
2422 return JS_TRUE;
2423 }
2425 void
2426 js_DestroyRegExp(JSContext *cx, JSRegExp *re)
2427 {
2428 if (JS_ATOMIC_DECREMENT(&re->nrefs) == 0) {
2429 if (re->classList) {
2430 uintN i;
2431 for (i = 0; i < re->classCount; i++) {
2432 if (re->classList[i].converted)
2433 JS_free(cx, re->classList[i].u.bits);
2434 re->classList[i].u.bits = NULL;
2435 }
2436 JS_free(cx, re->classList);
2437 }
2438 JS_free(cx, re);
2439 }
2440 }
2442 static JSBool
2443 ReallocStateStack(REGlobalData *gData)
2444 {
2445 size_t limit = gData->stateStackLimit;
2446 size_t sz = sizeof(REProgState) * limit;
2448 JS_ARENA_GROW_CAST(gData->stateStack, REProgState *, &gData->pool, sz, sz);
2449 if (!gData->stateStack) {
2450 gData->ok = JS_FALSE;
2451 return JS_FALSE;
2452 }
2453 gData->stateStackLimit = limit + limit;
2454 return JS_TRUE;
2455 }
2457 #define PUSH_STATE_STACK(data) \
2458 JS_BEGIN_MACRO \
2459 ++(data)->stateStackTop; \
2460 if ((data)->stateStackTop == (data)->stateStackLimit && \
2461 !ReallocStateStack((data))) { \
2462 return NULL; \
2463 } \
2464 JS_END_MACRO
2466 /*
2467 * Apply the current op against the given input to see if it's going to match
2468 * or fail. Return false if we don't get a match, true if we do and update the
2469 * state of the input and pc if the update flag is true.
2470 */
2471 static REMatchState *
2472 SimpleMatch(REGlobalData *gData, REMatchState *x, REOp op,
2473 jsbytecode **startpc, JSBool update)
2474 {
2475 REMatchState *result = NULL;
2476 jschar matchCh;
2477 size_t parenIndex;
2478 size_t offset, length, index;
2479 jsbytecode *pc = *startpc; /* pc has already been incremented past op */
2480 jschar *source;
2481 const jschar *startcp = x->cp;
2482 jschar ch;
2483 RECharSet *charSet;
2485 switch (op) {
2486 case REOP_BOL:
2487 if (x->cp != gData->cpbegin) {
2488 if (!gData->cx->regExpStatics.multiline &&
2489 !(gData->regexp->flags & JSREG_MULTILINE)) {
2490 break;
2491 }
2492 if (!RE_IS_LINE_TERM(x->cp[-1]))
2493 break;
2494 }
2495 result = x;
2496 break;
2497 case REOP_EOL:
2498 if (x->cp != gData->cpend) {
2499 if (!gData->cx->regExpStatics.multiline &&
2500 !(gData->regexp->flags & JSREG_MULTILINE)) {
2501 break;
2502 }
2503 if (!RE_IS_LINE_TERM(*x->cp))
2504 break;
2505 }
2506 result = x;
2507 break;
2508 case REOP_WBDRY:
2509 if ((x->cp == gData->cpbegin || !JS_ISWORD(x->cp[-1])) ^
2510 !(x->cp != gData->cpend && JS_ISWORD(*x->cp))) {
2511 result = x;
2512 }
2513 break;
2514 case REOP_WNONBDRY:
2515 if ((x->cp == gData->cpbegin || !JS_ISWORD(x->cp[-1])) ^
2516 (x->cp != gData->cpend && JS_ISWORD(*x->cp))) {
2517 result = x;
2518 }
2519 break;
2520 case REOP_DOT:
2521 if (x->cp != gData->cpend && !RE_IS_LINE_TERM(*x->cp)) {
2522 result = x;
2523 result->cp++;
2524 }
2525 break;
2526 case REOP_DIGIT:
2527 if (x->cp != gData->cpend && JS_ISDIGIT(*x->cp)) {
2528 result = x;
2529 result->cp++;
2530 }
2531 break;
2532 case REOP_NONDIGIT:
2533 if (x->cp != gData->cpend && !JS_ISDIGIT(*x->cp)) {
2534 result = x;
2535 result->cp++;
2536 }
2537 break;
2538 case REOP_ALNUM:
2539 if (x->cp != gData->cpend && JS_ISWORD(*x->cp)) {
2540 result = x;
2541 result->cp++;
2542 }
2543 break;
2544 case REOP_NONALNUM:
2545 if (x->cp != gData->cpend && !JS_ISWORD(*x->cp)) {
2546 result = x;
2547 result->cp++;
2548 }
2549 break;
2550 case REOP_SPACE:
2551 if (x->cp != gData->cpend && JS_ISSPACE(*x->cp)) {
2552 result = x;
2553 result->cp++;
2554 }
2555 break;
2556 case REOP_NONSPACE:
2557 if (x->cp != gData->cpend && !JS_ISSPACE(*x->cp)) {
2558 result = x;
2559 result->cp++;
2560 }
2561 break;
2562 case REOP_BACKREF:
2563 pc = ReadCompactIndex(pc, &parenIndex);
2564 JS_ASSERT(parenIndex < gData->regexp->parenCount);
2565 result = BackrefMatcher(gData, x, parenIndex);
2566 break;
2567 case REOP_FLAT:
2568 pc = ReadCompactIndex(pc, &offset);
2569 JS_ASSERT(offset < JSSTRING_LENGTH(gData->regexp->source));
2570 pc = ReadCompactIndex(pc, &length);
2571 JS_ASSERT(1 <= length);
2572 JS_ASSERT(length <= JSSTRING_LENGTH(gData->regexp->source) - offset);
2573 if (length <= (size_t)(gData->cpend - x->cp)) {
2574 source = JSSTRING_CHARS(gData->regexp->source) + offset;
2575 for (index = 0; index != length; index++) {
2576 if (source[index] != x->cp[index])
2577 return NULL;
2578 }
2579 x->cp += length;
2580 result = x;
2581 }
2582 break;
2583 case REOP_FLAT1:
2584 matchCh = *pc++;
2585 if (x->cp != gData->cpend && *x->cp == matchCh) {
2586 result = x;
2587 result->cp++;
2588 }
2589 break;
2590 case REOP_FLATi:
2591 pc = ReadCompactIndex(pc, &offset);
2592 JS_ASSERT(offset < JSSTRING_LENGTH(gData->regexp->source));
2593 pc = ReadCompactIndex(pc, &length);
2594 JS_ASSERT(1 <= length);
2595 JS_ASSERT(length <= JSSTRING_LENGTH(gData->regexp->source) - offset);
2596 source = JSSTRING_CHARS(gData->regexp->source);
2597 result = FlatNIMatcher(gData, x, source + offset, length);
2598 break;
2599 case REOP_FLAT1i:
2600 matchCh = *pc++;
2601 if (x->cp != gData->cpend && upcase(*x->cp) == upcase(matchCh)) {
2602 result = x;
2603 result->cp++;
2604 }
2605 break;
2606 case REOP_UCFLAT1:
2607 matchCh = GET_ARG(pc);
2608 pc += ARG_LEN;
2609 if (x->cp != gData->cpend && *x->cp == matchCh) {
2610 result = x;
2611 result->cp++;
2612 }
2613 break;
2614 case REOP_UCFLAT1i:
2615 matchCh = GET_ARG(pc);
2616 pc += ARG_LEN;
2617 if (x->cp != gData->cpend && upcase(*x->cp) == upcase(matchCh)) {
2618 result = x;
2619 result->cp++;
2620 }
2621 break;
2622 case REOP_CLASS:
2623 pc = ReadCompactIndex(pc, &index);
2624 JS_ASSERT(index < gData->regexp->classCount);
2625 if (x->cp != gData->cpend) {
2626 charSet = &gData->regexp->classList[index];
2627 JS_ASSERT(charSet->converted);
2628 ch = *x->cp;
2629 index = ch >> 3;
2630 if (charSet->length != 0 &&
2631 ch <= charSet->length &&
2632 (charSet->u.bits[index] & (1 << (ch & 0x7)))) {
2633 result = x;
2634 result->cp++;
2635 }
2636 }
2637 break;
2638 case REOP_NCLASS:
2639 pc = ReadCompactIndex(pc, &index);
2640 JS_ASSERT(index < gData->regexp->classCount);
2641 if (x->cp != gData->cpend) {
2642 charSet = &gData->regexp->classList[index];
2643 JS_ASSERT(charSet->converted);
2644 ch = *x->cp;
2645 index = ch >> 3;
2646 if (charSet->length == 0 ||
2647 ch > charSet->length ||
2648 !(charSet->u.bits[index] & (1 << (ch & 0x7)))) {
2649 result = x;
2650 result->cp++;
2651 }
2652 }
2653 break;
2654 default:
2655 JS_ASSERT(JS_FALSE);
2656 }
2657 if (result) {
2658 if (update)
2659 *startpc = pc;
2660 else
2661 x->cp = startcp;
2662 return result;
2663 }
2664 x->cp = startcp;
2665 return NULL;
2666 }
2668 static REMatchState *
2669 ExecuteREBytecode(REGlobalData *gData, REMatchState *x)
2670 {
2671 REMatchState *result = NULL;
2672 REBackTrackData *backTrackData;
2673 jsbytecode *nextpc;
2674 REOp nextop;
2675 RECapture *cap;
2676 REProgState *curState;
2677 const jschar *startcp;
2678 size_t parenIndex, k;
2679 size_t parenSoFar = 0;
2681 jschar matchCh1, matchCh2;
2682 RECharSet *charSet;
2684 JSBool anchor;
2685 jsbytecode *pc = gData->regexp->program;
2686 REOp op = (REOp) *pc++;
2688 /*
2689 * If the first node is a simple match, step the index into the string
2690 * until that match is made, or fail if it can't be found at all.
2691 */
2692 if (REOP_IS_SIMPLE(op)) {
2693 anchor = JS_FALSE;
2694 while (x->cp <= gData->cpend) {
2695 nextpc = pc; /* reset back to start each time */
2696 result = SimpleMatch(gData, x, op, &nextpc, JS_TRUE);
2697 if (result) {
2698 anchor = JS_TRUE;
2699 x = result;
2700 pc = nextpc; /* accept skip to next opcode */
2701 op = (REOp) *pc++;
2702 break;
2703 }
2704 gData->skipped++;
2705 x->cp++;
2706 }
2707 if (!anchor)
2708 return NULL;
2709 }
2711 for (;;) {
2712 if (REOP_IS_SIMPLE(op)) {
2713 result = SimpleMatch(gData, x, op, &pc, JS_TRUE);
2714 } else {
2715 curState = &gData->stateStack[gData->stateStackTop];
2716 switch (op) {
2717 case REOP_EMPTY:
2718 result = x;
2719 break;
2721 case REOP_ALTPREREQ2:
2722 nextpc = pc + GET_OFFSET(pc); /* start of next op */
2723 pc += ARG_LEN;
2724 matchCh2 = GET_ARG(pc);
2725 pc += ARG_LEN;
2726 k = GET_ARG(pc);
2727 pc += ARG_LEN;
2729 if (x->cp != gData->cpend) {
2730 if (*x->cp == matchCh2)
2731 goto doAlt;
2733 charSet = &gData->regexp->classList[k];
2734 if (!charSet->converted)
2735 if (!ProcessCharSet(gData, charSet))
2736 return NULL;
2737 matchCh1 = *x->cp;
2738 k = matchCh1 >> 3;
2739 if ((charSet->length == 0 ||
2740 matchCh1 > charSet->length ||
2741 !(charSet->u.bits[k] & (1 << (matchCh1 & 0x7)))) ^
2742 charSet->sense) {
2743 goto doAlt;
2744 }
2745 }
2746 result = NULL;
2747 break;
2749 case REOP_ALTPREREQ:
2750 nextpc = pc + GET_OFFSET(pc); /* start of next op */
2751 pc += ARG_LEN;
2752 matchCh1 = GET_ARG(pc);
2753 pc += ARG_LEN;
2754 matchCh2 = GET_ARG(pc);
2755 pc += ARG_LEN;
2756 if (x->cp == gData->cpend ||
2757 (*x->cp != matchCh1 && *x->cp != matchCh2)) {
2758 result = NULL;
2759 break;
2760 }
2761 /* else false thru... */
2763 case REOP_ALT:
2764 doAlt:
2765 nextpc = pc + GET_OFFSET(pc); /* start of next alternate */
2766 pc += ARG_LEN; /* start of this alternate */
2767 curState->parenSoFar = parenSoFar;
2768 PUSH_STATE_STACK(gData);
2769 op = (REOp) *pc++;
2770 startcp = x->cp;
2771 if (REOP_IS_SIMPLE(op)) {
2772 if (!SimpleMatch(gData, x, op, &pc, JS_TRUE)) {
2773 op = (REOp) *nextpc++;
2774 pc = nextpc;
2775 continue;
2776 }
2777 result = x;
2778 op = (REOp) *pc++;
2779 }
2780 nextop = (REOp) *nextpc++;
2781 if (!PushBackTrackState(gData, nextop, nextpc, x, startcp, 0, 0))
2782 return NULL;
2783 continue;
2785 /*
2786 * Occurs at (succesful) end of REOP_ALT,
2787 */
2788 case REOP_JUMP:
2789 --gData->stateStackTop;
2790 pc += GET_OFFSET(pc);
2791 op = (REOp) *pc++;
2792 continue;
2794 /*
2795 * Occurs at last (succesful) end of REOP_ALT,
2796 */
2797 case REOP_ENDALT:
2798 --gData->stateStackTop;
2799 op = (REOp) *pc++;
2800 continue;
2802 case REOP_LPAREN:
2803 pc = ReadCompactIndex(pc, &parenIndex);
2804 JS_ASSERT(parenIndex < gData->regexp->parenCount);
2805 if (parenIndex + 1 > parenSoFar)
2806 parenSoFar = parenIndex + 1;
2807 x->parens[parenIndex].index = x->cp - gData->cpbegin;
2808 x->parens[parenIndex].length = 0;
2809 op = (REOp) *pc++;
2810 continue;
2812 case REOP_RPAREN:
2813 pc = ReadCompactIndex(pc, &parenIndex);
2814 JS_ASSERT(parenIndex < gData->regexp->parenCount);
2815 cap = &x->parens[parenIndex];
2817 /*
2818 * FIXME: https://bugzilla.mozilla.org/show_bug.cgi?id=346090
2819 * This wallpaper prevents a case where we somehow took a step
2820 * backward in input while minimally-matching an empty string.
2821 */
2822 if (x->cp < gData->cpbegin + cap->index)
2823 cap->index = -1;
2824 cap->length = x->cp - (gData->cpbegin + cap->index);
2825 op = (REOp) *pc++;
2826 continue;
2828 case REOP_ASSERT:
2829 nextpc = pc + GET_OFFSET(pc); /* start of term after ASSERT */
2830 pc += ARG_LEN; /* start of ASSERT child */
2831 op = (REOp) *pc++;
2832 if (REOP_IS_SIMPLE(op) &&
2833 !SimpleMatch(gData, x, op, &pc, JS_FALSE)) {
2834 result = NULL;
2835 break;
2836 }
2837 curState->u.assertion.top =
2838 (char *)gData->backTrackSP - (char *)gData->backTrackStack;
2839 curState->u.assertion.sz = gData->cursz;
2840 curState->index = x->cp - gData->cpbegin;
2841 curState->parenSoFar = parenSoFar;
2842 PUSH_STATE_STACK(gData);
2843 if (!PushBackTrackState(gData, REOP_ASSERTTEST,
2844 nextpc, x, x->cp, 0, 0)) {
2845 return NULL;
2846 }
2847 continue;
2849 case REOP_ASSERT_NOT:
2850 nextpc = pc + GET_OFFSET(pc);
2851 pc += ARG_LEN;
2852 op = (REOp) *pc++;
2853 if (REOP_IS_SIMPLE(op) /* Note - fail to fail! */ &&
2854 SimpleMatch(gData, x, op, &pc, JS_FALSE) &&
2855 pc == nextpc) {
2856 result = NULL;
2857 break;
2858 }
2859 curState->u.assertion.top
2860 = (char *)gData->backTrackSP -
2861 (char *)gData->backTrackStack;
2862 curState->u.assertion.sz = gData->cursz;
2863 curState->index = x->cp - gData->cpbegin;
2864 curState->parenSoFar = parenSoFar;
2865 PUSH_STATE_STACK(gData);
2866 if (!PushBackTrackState(gData, REOP_ASSERTNOTTEST,
2867 nextpc, x, x->cp, 0, 0))
2868 return NULL;
2869 continue;
2871 case REOP_ASSERTTEST:
2872 --gData->stateStackTop;
2873 --curState;
2874 x->cp = gData->cpbegin + curState->index;
2875 gData->backTrackSP =
2876 (REBackTrackData *) ((char *)gData->backTrackStack +
2877 curState->u.assertion.top);
2878 gData->cursz = curState->u.assertion.sz;
2879 if (result)
2880 result = x;
2881 break;
2883 case REOP_ASSERTNOTTEST:
2884 --gData->stateStackTop;
2885 --curState;
2886 x->cp = gData->cpbegin + curState->index;
2887 gData->backTrackSP =
2888 (REBackTrackData *) ((char *)gData->backTrackStack +
2889 curState->u.assertion.top);
2890 gData->cursz = curState->u.assertion.sz;
2891 result = (!result) ? x : NULL;
2892 break;
2894 case REOP_END:
2895 if (x)
2896 return x;
2897 break;
2899 case REOP_STAR:
2900 curState->u.quantifier.min = 0;
2901 curState->u.quantifier.max = (uintN)-1;
2902 goto quantcommon;
2903 case REOP_PLUS:
2904 curState->u.quantifier.min = 1;
2905 curState->u.quantifier.max = (uintN)-1;
2906 goto quantcommon;
2907 case REOP_OPT:
2908 curState->u.quantifier.min = 0;
2909 curState->u.quantifier.max = 1;
2910 goto quantcommon;
2911 case REOP_QUANT:
2912 pc = ReadCompactIndex(pc, &k);
2913 curState->u.quantifier.min = k;
2914 pc = ReadCompactIndex(pc, &k);
2915 /* max is k - 1 to use one byte for (uintN)-1 sentinel. */
2916 curState->u.quantifier.max = k - 1;
2917 JS_ASSERT(curState->u.quantifier.min
2918 <= curState->u.quantifier.max);
2919 quantcommon:
2920 if (curState->u.quantifier.max == 0) {
2921 pc = pc + GET_OFFSET(pc);
2922 op = (REOp) *pc++;
2923 result = x;
2924 continue;
2925 }
2926 /* Step over <next> */
2927 nextpc = pc + ARG_LEN;
2928 op = (REOp) *nextpc++;
2929 startcp = x->cp;
2930 if (REOP_IS_SIMPLE(op)) {
2931 if (!SimpleMatch(gData, x, op, &nextpc, JS_TRUE)) {
2932 if (curState->u.quantifier.min == 0)
2933 result = x;
2934 else
2935 result = NULL;
2936 pc = pc + GET_OFFSET(pc);
2937 break;
2938 }
2939 op = (REOp) *nextpc++;
2940 result = x;
2941 }
2942 curState->index = startcp - gData->cpbegin;
2943 curState->continue_op = REOP_REPEAT;
2944 curState->continue_pc = pc;
2945 curState->parenSoFar = parenSoFar;
2946 PUSH_STATE_STACK(gData);
2947 if (curState->u.quantifier.min == 0 &&
2948 !PushBackTrackState(gData, REOP_REPEAT, pc, x, startcp,
2949 0, 0)) {
2950 return NULL;
2951 }
2952 pc = nextpc;
2953 continue;
2955 case REOP_ENDCHILD: /* marks the end of a quantifier child */
2956 pc = curState[-1].continue_pc;
2957 op = curState[-1].continue_op;
2958 continue;
2960 case REOP_REPEAT:
2961 --curState;
2962 do {
2963 --gData->stateStackTop;
2964 if (!result) {
2965 /* Failed, see if we have enough children. */
2966 if (curState->u.quantifier.min == 0)
2967 goto repeatDone;
2968 goto break_switch;
2969 }
2970 if (curState->u.quantifier.min == 0 &&
2971 x->cp == gData->cpbegin + curState->index) {
2972 /* matched an empty string, that'll get us nowhere */
2973 result = NULL;
2974 goto break_switch;
2975 }
2976 if (curState->u.quantifier.min != 0)
2977 curState->u.quantifier.min--;
2978 if (curState->u.quantifier.max != (uintN) -1)
2979 curState->u.quantifier.max--;
2980 if (curState->u.quantifier.max == 0)
2981 goto repeatDone;
2982 nextpc = pc + ARG_LEN;
2983 nextop = (REOp) *nextpc;
2984 startcp = x->cp;
2985 if (REOP_IS_SIMPLE(nextop)) {
2986 nextpc++;
2987 if (!SimpleMatch(gData, x, nextop, &nextpc, JS_TRUE)) {
2988 if (curState->u.quantifier.min == 0)
2989 goto repeatDone;
2990 result = NULL;
2991 goto break_switch;
2992 }
2993 result = x;
2994 }
2995 curState->index = startcp - gData->cpbegin;
2996 PUSH_STATE_STACK(gData);
2997 if (curState->u.quantifier.min == 0 &&
2998 !PushBackTrackState(gData, REOP_REPEAT,
2999 pc, x, startcp,
3000 curState->parenSoFar,
3001 parenSoFar -
3002 curState->parenSoFar)) {
3003 return NULL;
3004 }
3005 } while (*nextpc == REOP_ENDCHILD);
3006 pc = nextpc;
3007 op = (REOp) *pc++;
3008 parenSoFar = curState->parenSoFar;
3009 continue;
3011 repeatDone:
3012 result = x;
3013 pc += GET_OFFSET(pc);
3014 goto break_switch;
3016 case REOP_MINIMALSTAR:
3017 curState->u.quantifier.min = 0;
3018 curState->u.quantifier.max = (uintN)-1;
3019 goto minimalquantcommon;
3020 case REOP_MINIMALPLUS:
3021 curState->u.quantifier.min = 1;
3022 curState->u.quantifier.max = (uintN)-1;
3023 goto minimalquantcommon;
3024 case REOP_MINIMALOPT:
3025 curState->u.quantifier.min = 0;
3026 curState->u.quantifier.max = 1;
3027 goto minimalquantcommon;
3028 case REOP_MINIMALQUANT:
3029 pc = ReadCompactIndex(pc, &k);
3030 curState->u.quantifier.min = k;
3031 pc = ReadCompactIndex(pc, &k);
3032 /* See REOP_QUANT comments about k - 1. */
3033 curState->u.quantifier.max = k - 1;
3034 JS_ASSERT(curState->u.quantifier.min
3035 <= curState->u.quantifier.max);
3036 minimalquantcommon:
3037 curState->index = x->cp - gData->cpbegin;
3038 curState->parenSoFar = parenSoFar;
3039 PUSH_STATE_STACK(gData);
3040 if (curState->u.quantifier.min != 0) {
3041 curState->continue_op = REOP_MINIMALREPEAT;
3042 curState->continue_pc = pc;
3043 /* step over <next> */
3044 pc += OFFSET_LEN;
3045 op = (REOp) *pc++;
3046 } else {
3047 if (!PushBackTrackState(gData, REOP_MINIMALREPEAT,
3048 pc, x, x->cp, 0, 0)) {
3049 return NULL;
3050 }
3051 --gData->stateStackTop;
3052 pc = pc + GET_OFFSET(pc);
3053 op = (REOp) *pc++;
3054 }
3055 continue;
3057 case REOP_MINIMALREPEAT:
3058 --gData->stateStackTop;
3059 --curState;
3061 if (!result) {
3062 /*
3063 * Non-greedy failure - try to consume another child.
3064 */
3065 if (curState->u.quantifier.max == (uintN) -1 ||
3066 curState->u.quantifier.max > 0) {
3067 curState->index = x->cp - gData->cpbegin;
3068 curState->continue_op = REOP_MINIMALREPEAT;
3069 curState->continue_pc = pc;
3070 pc += ARG_LEN;
3071 for (k = curState->parenSoFar; k < parenSoFar; k++)
3072 x->parens[k].index = -1;
3073 PUSH_STATE_STACK(gData);
3074 op = (REOp) *pc++;
3075 continue;
3076 }
3077 /* Don't need to adjust pc since we're going to pop. */
3078 break;
3079 }
3080 if (curState->u.quantifier.min == 0 &&
3081 x->cp == gData->cpbegin + curState->index) {
3082 /* Matched an empty string, that'll get us nowhere. */
3083 result = NULL;
3084 break;
3085 }
3086 if (curState->u.quantifier.min != 0)
3087 curState->u.quantifier.min--;
3088 if (curState->u.quantifier.max != (uintN) -1)
3089 curState->u.quantifier.max--;
3090 if (curState->u.quantifier.min != 0) {
3091 curState->continue_op = REOP_MINIMALREPEAT;
3092 curState->continue_pc = pc;
3093 pc += ARG_LEN;
3094 for (k = curState->parenSoFar; k < parenSoFar; k++)
3095 x->parens[k].index = -1;
3096 curState->index = x->cp - gData->cpbegin;
3097 PUSH_STATE_STACK(gData);
3098 op = (REOp) *pc++;
3099 continue;
3100 }
3101 curState->index = x->cp - gData->cpbegin;
3102 curState->parenSoFar = parenSoFar;
3103 PUSH_STATE_STACK(gData);
3104 if (!PushBackTrackState(gData, REOP_MINIMALREPEAT,
3105 pc, x, x->cp,
3106 curState->parenSoFar,
3107 parenSoFar - curState->parenSoFar)) {
3108 return NULL;
3109 }
3110 --gData->stateStackTop;
3111 pc = pc + GET_OFFSET(pc);
3112 op = (REOp) *pc++;
3113 continue;
3115 default:
3116 JS_ASSERT(JS_FALSE);
3117 result = NULL;
3118 }
3119 break_switch:;
3120 }
3122 /*
3123 * If the match failed and there's a backtrack option, take it.
3124 * Otherwise this is a complete and utter failure.
3125 */
3126 if (!result) {
3127 if (gData->cursz == 0)
3128 return NULL;
3129 backTrackData = gData->backTrackSP;
3130 gData->cursz = backTrackData->sz;
3131 gData->backTrackSP =
3132 (REBackTrackData *) ((char *)backTrackData - backTrackData->sz);
3133 x->cp = backTrackData->cp;
3134 pc = backTrackData->backtrack_pc;
3135 op = backTrackData->backtrack_op;
3136 gData->stateStackTop = backTrackData->saveStateStackTop;
3137 JS_ASSERT(gData->stateStackTop);
3139 memcpy(gData->stateStack, backTrackData + 1,
3140 sizeof(REProgState) * backTrackData->saveStateStackTop);
3141 curState = &gData->stateStack[gData->stateStackTop - 1];
3143 if (backTrackData->parenCount) {
3144 memcpy(&x->parens[backTrackData->parenIndex],
3145 (char *)(backTrackData + 1) +
3146 sizeof(REProgState) * backTrackData->saveStateStackTop,
3147 sizeof(RECapture) * backTrackData->parenCount);
3148 parenSoFar = backTrackData->parenIndex + backTrackData->parenCount;
3149 } else {
3150 for (k = curState->parenSoFar; k < parenSoFar; k++)
3151 x->parens[k].index = -1;
3152 parenSoFar = curState->parenSoFar;
3153 }
3154 continue;
3155 }
3156 x = result;
3158 /*
3159 * Continue with the expression.
3160 */
3161 op = (REOp)*pc++;
3162 }
3163 return NULL;
3164 }
3166 static REMatchState *
3167 MatchRegExp(REGlobalData *gData, REMatchState *x)
3168 {
3169 REMatchState *result;
3170 const jschar *cp = x->cp;
3171 const jschar *cp2;
3172 uintN j;
3174 /*
3175 * Have to include the position beyond the last character
3176 * in order to detect end-of-input/line condition.
3177 */
3178 for (cp2 = cp; cp2 <= gData->cpend; cp2++) {
3179 gData->skipped = cp2 - cp;
3180 x->cp = cp2;
3181 for (j = 0; j < gData->regexp->parenCount; j++)
3182 x->parens[j].index = -1;
3183 result = ExecuteREBytecode(gData, x);
3184 if (!gData->ok || result)
3185 return result;
3186 gData->backTrackSP = gData->backTrackStack;
3187 gData->cursz = 0;
3188 gData->stateStackTop = 0;
3189 cp2 = cp + gData->skipped;
3190 }
3191 return NULL;
3192 }
3195 static REMatchState *
3196 InitMatch(JSContext *cx, REGlobalData *gData, JSRegExp *re)
3197 {
3198 REMatchState *result;
3199 uintN i;
3201 gData->backTrackStackSize = INITIAL_BACKTRACK;
3202 JS_ARENA_ALLOCATE_CAST(gData->backTrackStack, REBackTrackData *,
3203 &gData->pool,
3204 INITIAL_BACKTRACK);
3205 if (!gData->backTrackStack)
3206 return NULL;
3207 gData->backTrackSP = gData->backTrackStack;
3208 gData->cursz = 0;
3211 gData->stateStackLimit = INITIAL_STATESTACK;
3212 JS_ARENA_ALLOCATE_CAST(gData->stateStack, REProgState *,
3213 &gData->pool,
3214 sizeof(REProgState) * INITIAL_STATESTACK);
3215 if (!gData->stateStack)
3216 return NULL;
3217 gData->stateStackTop = 0;
3219 gData->cx = cx;
3220 gData->regexp = re;
3221 gData->ok = JS_TRUE;
3223 JS_ARENA_ALLOCATE_CAST(result, REMatchState *,
3224 &gData->pool,
3225 offsetof(REMatchState, parens)
3226 + re->parenCount * sizeof(RECapture));
3227 if (!result)
3228 return NULL;
3230 for (i = 0; i < re->classCount; i++)
3231 if (!re->classList[i].converted)
3232 if (!ProcessCharSet(gData, &re->classList[i]))
3233 return NULL;
3235 return result;
3236 }
3238 JSBool
3239 js_ExecuteRegExp(JSContext *cx, JSRegExp *re, JSString *str, size_t *indexp,
3240 JSBool test, jsval *rval)
3241 {
3242 REGlobalData gData;
3243 REMatchState *x, *result;
3245 const jschar *cp, *ep;
3246 size_t i, length, start;
3247 JSSubString *morepar;
3248 JSBool ok;
3249 JSRegExpStatics *res;
3250 ptrdiff_t matchlen;
3251 uintN num, morenum;
3252 JSString *parstr, *matchstr;
3253 JSObject *obj;
3255 RECapture *parsub = NULL;
3257 /*
3258 * It's safe to load from cp because JSStrings have a zero at the end,
3259 * and we never let cp get beyond cpend.
3260 */
3261 start = *indexp;
3262 length = JSSTRING_LENGTH(str);
3263 if (start > length)
3264 start = length;
3265 cp = JSSTRING_CHARS(str);
3266 gData.cpbegin = cp;
3267 gData.cpend = cp + length;
3268 cp += start;
3269 gData.start = start;
3270 gData.skipped = 0;
3272 JS_InitArenaPool(&gData.pool, "RegExpPool", 8096, 4);
3273 x = InitMatch(cx, &gData, re);
3274 if (!x)
3275 return JS_FALSE;
3276 x->cp = cp;
3278 /*
3279 * Call the recursive matcher to do the real work. Return null on mismatch
3280 * whether testing or not. On match, return an extended Array object.
3281 */
3282 result = MatchRegExp(&gData, x);
3283 ok = gData.ok;
3284 if (!ok)
3285 goto out;
3286 if (!result) {
3287 *rval = JSVAL_NULL;
3288 goto out;
3289 }
3290 cp = result->cp;
3291 i = cp - gData.cpbegin;
3292 *indexp = i;
3293 matchlen = i - (start + gData.skipped);
3294 ep = cp;
3295 cp -= matchlen;
3297 if (test) {
3298 /*
3299 * Testing for a match and updating cx->regExpStatics: don't allocate
3300 * an array object, do return true.
3301 */
3302 *rval = JSVAL_TRUE;
3304 /* Avoid warning. (gcc doesn't detect that obj is needed iff !test); */
3305 obj = NULL;
3306 } else {
3307 /*
3308 * The array returned on match has element 0 bound to the matched
3309 * string, elements 1 through state.parenCount bound to the paren
3310 * matches, an index property telling the length of the left context,
3311 * and an input property referring to the input string.
3312 */
3313 obj = js_NewArrayObject(cx, 0, NULL);
3314 if (!obj) {
3315 ok = JS_FALSE;
3316 goto out;
3317 }
3318 *rval = OBJECT_TO_JSVAL(obj);
3320 #define DEFVAL(val, id) { \
3321 ok = js_DefineProperty(cx, obj, id, val, \
3322 JS_PropertyStub, JS_PropertyStub, \
3323 JSPROP_ENUMERATE, NULL); \
3324 if (!ok) { \
3325 cx->newborn[GCX_OBJECT] = NULL; \
3326 cx->newborn[GCX_STRING] = NULL; \
3327 goto out; \
3328 } \
3329 }
3331 matchstr = js_NewStringCopyN(cx, cp, matchlen, 0);
3332 if (!matchstr) {
3333 cx->newborn[GCX_OBJECT] = NULL;
3334 ok = JS_FALSE;
3335 goto out;
3336 }
3337 DEFVAL(STRING_TO_JSVAL(matchstr), INT_TO_JSID(0));
3338 }
3340 res = &cx->regExpStatics;
3341 res->input = str;
3342 res->parenCount = re->parenCount;
3343 if (re->parenCount == 0) {
3344 res->lastParen = js_EmptySubString;
3345 } else {
3346 for (num = 0; num < re->parenCount; num++) {
3347 parsub = &result->parens[num];
3348 if (num < 9) {
3349 if (parsub->index == -1) {
3350 res->parens[num].chars = NULL;
3351 res->parens[num].length = 0;
3352 } else {
3353 res->parens[num].chars = gData.cpbegin + parsub->index;
3354 res->parens[num].length = parsub->length;
3355 }
3356 } else {
3357 morenum = num - 9;
3358 morepar = res->moreParens;
3359 if (!morepar) {
3360 res->moreLength = 10;
3361 morepar = (JSSubString*)
3362 JS_malloc(cx, 10 * sizeof(JSSubString));
3363 } else if (morenum >= res->moreLength) {
3364 res->moreLength += 10;
3365 morepar = (JSSubString*)
3366 JS_realloc(cx, morepar,
3367 res->moreLength * sizeof(JSSubString));
3368 }
3369 if (!morepar) {
3370 cx->newborn[GCX_OBJECT] = NULL;
3371 cx->newborn[GCX_STRING] = NULL;
3372 ok = JS_FALSE;
3373 goto out;
3374 }
3375 res->moreParens = morepar;
3376 if (parsub->index == -1) {
3377 morepar[morenum].chars = NULL;
3378 morepar[morenum].length = 0;
3379 } else {
3380 morepar[morenum].chars = gData.cpbegin + parsub->index;
3381 morepar[morenum].length = parsub->length;
3382 }
3383 }
3384 if (test)
3385 continue;
3386 if (parsub->index == -1) {
3387 ok = js_DefineProperty(cx, obj, INT_TO_JSID(num + 1),
3388 JSVAL_VOID, NULL, NULL,
3389 JSPROP_ENUMERATE, NULL);
3390 } else {
3391 parstr = js_NewStringCopyN(cx, gData.cpbegin + parsub->index,
3392 parsub->length, 0);
3393 if (!parstr) {
3394 cx->newborn[GCX_OBJECT] = NULL;
3395 cx->newborn[GCX_STRING] = NULL;
3396 ok = JS_FALSE;
3397 goto out;
3398 }
3399 ok = js_DefineProperty(cx, obj, INT_TO_JSID(num + 1),
3400 STRING_TO_JSVAL(parstr), NULL, NULL,
3401 JSPROP_ENUMERATE, NULL);
3402 }
3403 if (!ok) {
3404 cx->newborn[GCX_OBJECT] = NULL;
3405 cx->newborn[GCX_STRING] = NULL;
3406 goto out;
3407 }
3408 }
3409 if (parsub->index == -1) {
3410 res->lastParen = js_EmptySubString;
3411 } else {
3412 res->lastParen.chars = gData.cpbegin + parsub->index;
3413 res->lastParen.length = parsub->length;
3414 }
3415 }
3417 if (!test) {
3418 /*
3419 * Define the index and input properties last for better for/in loop
3420 * order (so they come after the elements).
3421 */
3422 DEFVAL(INT_TO_JSVAL(start + gData.skipped),
3423 ATOM_TO_JSID(cx->runtime->atomState.indexAtom));
3424 DEFVAL(STRING_TO_JSVAL(str),
3425 ATOM_TO_JSID(cx->runtime->atomState.inputAtom));
3426 }
3428 #undef DEFVAL
3430 res->lastMatch.chars = cp;
3431 res->lastMatch.length = matchlen;
3432 if (JS_VERSION_IS_1_2(cx)) {
3433 /*
3434 * JS1.2 emulated Perl4.0.1.8 (patch level 36) for global regexps used
3435 * in scalar contexts, and unintentionally for the string.match "list"
3436 * pseudo-context. On "hi there bye", the following would result:
3437 *
3438 * Language while(/ /g){print("$`");} s/ /$`/g
3439 * perl4.036 "hi", "there" "hihitherehi therebye"
3440 * perl5 "hi", "hi there" "hihitherehi therebye"
3441 * js1.2 "hi", "there" "hihitheretherebye"
3442 */
3443 res->leftContext.chars = JSSTRING_CHARS(str) + start;
3444 res->leftContext.length = gData.skipped;
3445 } else {
3446 /*
3447 * For JS1.3 and ECMAv2, emulate Perl5 exactly:
3448 *
3449 * js1.3 "hi", "hi there" "hihitherehi therebye"
3450 */
3451 res->leftContext.chars = JSSTRING_CHARS(str);
3452 res->leftContext.length = start + gData.skipped;
3453 }
3454 res->rightContext.chars = ep;
3455 res->rightContext.length = gData.cpend - ep;
3457 out:
3458 JS_FinishArenaPool(&gData.pool);
3459 return ok;
3460 }
3462 /************************************************************************/
3464 enum regexp_tinyid {
3465 REGEXP_SOURCE = -1,
3466 REGEXP_GLOBAL = -2,
3467 REGEXP_IGNORE_CASE = -3,
3468 REGEXP_LAST_INDEX = -4,
3469 REGEXP_MULTILINE = -5
3470 };
3472 #define REGEXP_PROP_ATTRS (JSPROP_PERMANENT|JSPROP_SHARED)
3474 static JSPropertySpec regexp_props[] = {
3475 {"source", REGEXP_SOURCE, REGEXP_PROP_ATTRS | JSPROP_READONLY,0,0},
3476 {"global", REGEXP_GLOBAL, REGEXP_PROP_ATTRS | JSPROP_READONLY,0,0},
3477 {"ignoreCase", REGEXP_IGNORE_CASE, REGEXP_PROP_ATTRS | JSPROP_READONLY,0,0},
3478 {"lastIndex", REGEXP_LAST_INDEX, REGEXP_PROP_ATTRS,0,0},
3479 {"multiline", REGEXP_MULTILINE, REGEXP_PROP_ATTRS | JSPROP_READONLY,0,0},
3480 {0,0,0,0,0}
3481 };
3483 static JSBool
3484 regexp_getProperty(JSContext *cx, JSObject *obj, jsval id, jsval *vp)
3485 {
3486 jsint slot;
3487 JSRegExp *re;
3489 if (!JSVAL_IS_INT(id))
3490 return JS_TRUE;
3491 slot = JSVAL_TO_INT(id);
3492 if (slot == REGEXP_LAST_INDEX)
3493 return JS_GetReservedSlot(cx, obj, 0, vp);
3495 JS_LOCK_OBJ(cx, obj);
3496 re = (JSRegExp *) JS_GetInstancePrivate(cx, obj, &js_RegExpClass, NULL);
3497 if (re) {
3498 switch (slot) {
3499 case REGEXP_SOURCE:
3500 *vp = STRING_TO_JSVAL(re->source);
3501 break;
3502 case REGEXP_GLOBAL:
3503 *vp = BOOLEAN_TO_JSVAL((re->flags & JSREG_GLOB) != 0);
3504 break;
3505 case REGEXP_IGNORE_CASE:
3506 *vp = BOOLEAN_TO_JSVAL((re->flags & JSREG_FOLD) != 0);
3507 break;
3508 case REGEXP_MULTILINE:
3509 *vp = BOOLEAN_TO_JSVAL((re->flags & JSREG_MULTILINE) != 0);
3510 break;
3511 }
3512 }
3513 JS_UNLOCK_OBJ(cx, obj);
3514 return JS_TRUE;
3515 }
3517 static JSBool
3518 regexp_setProperty(JSContext *cx, JSObject *obj, jsval id, jsval *vp)
3519 {
3520 JSBool ok;
3521 jsint slot;
3522 jsdouble lastIndex;
3524 ok = JS_TRUE;
3525 if (!JSVAL_IS_INT(id))
3526 return ok;
3527 slot = JSVAL_TO_INT(id);
3528 if (slot == REGEXP_LAST_INDEX) {
3529 if (!js_ValueToNumber(cx, *vp, &lastIndex))
3530 return JS_FALSE;
3531 lastIndex = js_DoubleToInteger(lastIndex);
3532 ok = js_NewNumberValue(cx, lastIndex, vp) &&
3533 JS_SetReservedSlot(cx, obj, 0, *vp);
3534 }
3535 return ok;
3536 }
3538 /*
3539 * RegExp class static properties and their Perl counterparts:
3540 *
3541 * RegExp.input $_
3542 * RegExp.multiline $*
3543 * RegExp.lastMatch $&
3544 * RegExp.lastParen $+
3545 * RegExp.leftContext $`
3546 * RegExp.rightContext $'
3547 */
3548 enum regexp_static_tinyid {
3549 REGEXP_STATIC_INPUT = -1,
3550 REGEXP_STATIC_MULTILINE = -2,
3551 REGEXP_STATIC_LAST_MATCH = -3,
3552 REGEXP_STATIC_LAST_PAREN = -4,
3553 REGEXP_STATIC_LEFT_CONTEXT = -5,
3554 REGEXP_STATIC_RIGHT_CONTEXT = -6
3555 };
3557 JSBool
3558 js_InitRegExpStatics(JSContext *cx, JSRegExpStatics *res)
3559 {
3560 JS_ClearRegExpStatics(cx);
3561 return js_AddRoot(cx, &res->input, "res->input");
3562 }
3564 void
3565 js_FreeRegExpStatics(JSContext *cx, JSRegExpStatics *res)
3566 {
3567 if (res->moreParens) {
3568 JS_free(cx, res->moreParens);
3569 res->moreParens = NULL;
3570 }
3571 js_RemoveRoot(cx->runtime, &res->input);
3572 }
3574 static JSBool
3575 regexp_static_getProperty(JSContext *cx, JSObject *obj, jsval id, jsval *vp)
3576 {
3577 jsint slot;
3578 JSRegExpStatics *res;
3579 JSString *str;
3580 JSSubString *sub;
3582 res = &cx->regExpStatics;
3583 if (!JSVAL_IS_INT(id))
3584 return JS_TRUE;
3585 slot = JSVAL_TO_INT(id);
3586 switch (slot) {
3587 case REGEXP_STATIC_INPUT:
3588 *vp = res->input ? STRING_TO_JSVAL(res->input)
3589 : JS_GetEmptyStringValue(cx);
3590 return JS_TRUE;
3591 case REGEXP_STATIC_MULTILINE:
3592 *vp = BOOLEAN_TO_JSVAL(res->multiline);
3593 return JS_TRUE;
3594 case REGEXP_STATIC_LAST_MATCH:
3595 sub = &res->lastMatch;
3596 break;
3597 case REGEXP_STATIC_LAST_PAREN:
3598 sub = &res->lastParen;
3599 break;
3600 case REGEXP_STATIC_LEFT_CONTEXT:
3601 sub = &res->leftContext;
3602 break;
3603 case REGEXP_STATIC_RIGHT_CONTEXT:
3604 sub = &res->rightContext;
3605 break;
3606 default:
3607 sub = REGEXP_PAREN_SUBSTRING(res, slot);
3608 break;
3609 }
3610 str = js_NewStringCopyN(cx, sub->chars, sub->length, 0);
3611 if (!str)
3612 return JS_FALSE;
3613 *vp = STRING_TO_JSVAL(str);
3614 return JS_TRUE;
3615 }
3617 static JSBool
3618 regexp_static_setProperty(JSContext *cx, JSObject *obj, jsval id, jsval *vp)
3619 {
3620 JSRegExpStatics *res;
3622 if (!JSVAL_IS_INT(id))
3623 return JS_TRUE;
3624 res = &cx->regExpStatics;
3625 /* XXX use if-else rather than switch to keep MSVC1.52 from crashing */
3626 if (JSVAL_TO_INT(id) == REGEXP_STATIC_INPUT) {
3627 if (!JSVAL_IS_STRING(*vp) &&
3628 !JS_ConvertValue(cx, *vp, JSTYPE_STRING, vp)) {
3629 return JS_FALSE;
3630 }
3631 res->input = JSVAL_TO_STRING(*vp);
3632 } else if (JSVAL_TO_INT(id) == REGEXP_STATIC_MULTILINE) {
3633 if (!JSVAL_IS_BOOLEAN(*vp) &&
3634 !JS_ConvertValue(cx, *vp, JSTYPE_BOOLEAN, vp)) {
3635 return JS_FALSE;
3636 }
3637 res->multiline = JSVAL_TO_BOOLEAN(*vp);
3638 }
3639 return JS_TRUE;
3640 }
3642 static JSPropertySpec regexp_static_props[] = {
3643 {"input",
3644 REGEXP_STATIC_INPUT,
3645 JSPROP_ENUMERATE|JSPROP_SHARED,
3646 regexp_static_getProperty, regexp_static_setProperty},
3647 {"multiline",
3648 REGEXP_STATIC_MULTILINE,
3649 JSPROP_ENUMERATE|JSPROP_SHARED,
3650 regexp_static_getProperty, regexp_static_setProperty},
3651 {"lastMatch",
3652 REGEXP_STATIC_LAST_MATCH,
3653 JSPROP_ENUMERATE|JSPROP_READONLY|JSPROP_SHARED,
3654 regexp_static_getProperty, regexp_static_getProperty},
3655 {"lastParen",
3656 REGEXP_STATIC_LAST_PAREN,
3657 JSPROP_ENUMERATE|JSPROP_READONLY|JSPROP_SHARED,
3658 regexp_static_getProperty, regexp_static_getProperty},
3659 {"leftContext",
3660 REGEXP_STATIC_LEFT_CONTEXT,
3661 JSPROP_ENUMERATE|JSPROP_READONLY|JSPROP_SHARED,
3662 regexp_static_getProperty, regexp_static_getProperty},
3663 {"rightContext",
3664 REGEXP_STATIC_RIGHT_CONTEXT,
3665 JSPROP_ENUMERATE|JSPROP_READONLY|JSPROP_SHARED,
3666 regexp_static_getProperty, regexp_static_getProperty},
3668 /* XXX should have block scope and local $1, etc. */
3669 {"$1", 0, JSPROP_ENUMERATE|JSPROP_READONLY|JSPROP_SHARED,
3670 regexp_static_getProperty, regexp_static_getProperty},
3671 {"$2", 1, JSPROP_ENUMERATE|JSPROP_READONLY|JSPROP_SHARED,
3672 regexp_static_getProperty, regexp_static_getProperty},
3673 {"$3", 2, JSPROP_ENUMERATE|JSPROP_READONLY|JSPROP_SHARED,
3674 regexp_static_getProperty, regexp_static_getProperty},
3675 {"$4", 3, JSPROP_ENUMERATE|JSPROP_READONLY|JSPROP_SHARED,
3676 regexp_static_getProperty, regexp_static_getProperty},
3677 {"$5", 4, JSPROP_ENUMERATE|JSPROP_READONLY|JSPROP_SHARED,
3678 regexp_static_getProperty, regexp_static_getProperty},
3679 {"$6", 5, JSPROP_ENUMERATE|JSPROP_READONLY|JSPROP_SHARED,
3680 regexp_static_getProperty, regexp_static_getProperty},
3681 {"$7", 6, JSPROP_ENUMERATE|JSPROP_READONLY|JSPROP_SHARED,
3682 regexp_static_getProperty, regexp_static_getProperty},
3683 {"$8", 7, JSPROP_ENUMERATE|JSPROP_READONLY|JSPROP_SHARED,
3684 regexp_static_getProperty, regexp_static_getProperty},
3685 {"$9", 8, JSPROP_ENUMERATE|JSPROP_READONLY|JSPROP_SHARED,
3686 regexp_static_getProperty, regexp_static_getProperty},
3688 {0,0,0,0,0}
3689 };
3691 static void
3692 regexp_finalize(JSContext *cx, JSObject *obj)
3693 {
3694 JSRegExp *re;
3696 re = (JSRegExp *) JS_GetPrivate(cx, obj);
3697 if (!re)
3698 return;
3699 js_DestroyRegExp(cx, re);
3700 }
3702 /* Forward static prototype. */
3703 static JSBool
3704 regexp_exec(JSContext *cx, JSObject *obj, uintN argc, jsval *argv,
3705 jsval *rval);
3707 static JSBool
3708 regexp_call(JSContext *cx, JSObject *obj, uintN argc, jsval *argv, jsval *rval)
3709 {
3710 return regexp_exec(cx, JSVAL_TO_OBJECT(argv[-2]), argc, argv, rval);
3711 }
3713 #if JS_HAS_XDR
3715 #include "jsxdrapi.h"
3717 static JSBool
3718 regexp_xdrObject(JSXDRState *xdr, JSObject **objp)
3719 {
3720 JSRegExp *re;
3721 JSString *source;
3722 uint32 flagsword;
3723 JSObject *obj;
3725 if (xdr->mode == JSXDR_ENCODE) {
3726 re = (JSRegExp *) JS_GetPrivate(xdr->cx, *objp);
3727 if (!re)
3728 return JS_FALSE;
3729 source = re->source;
3730 flagsword = ((uint32)re->cloneIndex << 16) | re->flags;
3731 }
3732 if (!JS_XDRString(xdr, &source) ||
3733 !JS_XDRUint32(xdr, &flagsword)) {
3734 return JS_FALSE;
3735 }
3736 if (xdr->mode == JSXDR_DECODE) {
3737 obj = js_NewObject(xdr->cx, &js_RegExpClass, NULL, NULL);
3738 if (!obj)
3739 return JS_FALSE;
3740 re = js_NewRegExp(xdr->cx, NULL, source, (uint16)flagsword, JS_FALSE);
3741 if (!re)
3742 return JS_FALSE;
3743 if (!JS_SetPrivate(xdr->cx, obj, re) ||
3744 !js_SetLastIndex(xdr->cx, obj, 0)) {
3745 js_DestroyRegExp(xdr->cx, re);
3746 return JS_FALSE;
3747 }
3748 re->cloneIndex = (uint16)(flagsword >> 16);
3749 *objp = obj;
3750 }
3751 return JS_TRUE;
3752 }
3754 #else /* !JS_HAS_XDR */
3756 #define regexp_xdrObject NULL
3758 #endif /* !JS_HAS_XDR */
3760 static uint32
3761 regexp_mark(JSContext *cx, JSObject *obj, void *arg)
3762 {
3763 JSRegExp *re = (JSRegExp *) JS_GetPrivate(cx, obj);
3764 if (re)
3765 JS_MarkGCThing(cx, re->source, "source", arg);
3766 return 0;
3767 }
3769 JSClass js_RegExpClass = {
3770 js_RegExp_str,
3771 JSCLASS_HAS_PRIVATE | JSCLASS_HAS_RESERVED_SLOTS(1),
3772 JS_PropertyStub, JS_PropertyStub,
3773 regexp_getProperty, regexp_setProperty,
3774 JS_EnumerateStub, JS_ResolveStub,
3775 JS_ConvertStub, regexp_finalize,
3776 NULL, NULL,
3777 regexp_call, NULL,
3778 regexp_xdrObject, NULL,
3779 regexp_mark, 0
3780 };
3782 static const jschar empty_regexp_ucstr[] = {'(', '?', ':', ')', 0};
3784 JSBool
3785 js_regexp_toString(JSContext *cx, JSObject *obj, uintN argc, jsval *argv,
3786 jsval *rval)
3787 {
3788 JSRegExp *re;
3789 const jschar *source;
3790 jschar *chars;
3791 size_t length, nflags;
3792 uintN flags;
3793 JSString *str;
3795 if (!JS_InstanceOf(cx, obj, &js_RegExpClass, argv))
3796 return JS_FALSE;
3797 JS_LOCK_OBJ(cx, obj);
3798 re = (JSRegExp *) JS_GetPrivate(cx, obj);
3799 if (!re) {
3800 JS_UNLOCK_OBJ(cx, obj);
3801 *rval = STRING_TO_JSVAL(cx->runtime->emptyString);
3802 return JS_TRUE;
3803 }
3805 source = JSSTRING_CHARS(re->source);
3806 length = JSSTRING_LENGTH(re->source);
3807 if (length == 0) {
3808 source = empty_regexp_ucstr;
3809 length = sizeof(empty_regexp_ucstr) / sizeof(jschar) - 1;
3810 }
3811 length += 2;
3812 nflags = 0;
3813 for (flags = re->flags; flags != 0; flags &= flags - 1)
3814 nflags++;
3815 chars = (jschar*) JS_malloc(cx, (length + nflags + 1) * sizeof(jschar));
3816 if (!chars) {
3817 JS_UNLOCK_OBJ(cx, obj);
3818 return JS_FALSE;
3819 }
3821 chars[0] = '/';
3822 js_strncpy(&chars[1], source, length - 2);
3823 chars[length-1] = '/';
3824 if (nflags) {
3825 if (re->flags & JSREG_GLOB)
3826 chars[length++] = 'g';
3827 if (re->flags & JSREG_FOLD)
3828 chars[length++] = 'i';
3829 if (re->flags & JSREG_MULTILINE)
3830 chars[length++] = 'm';
3831 }
3832 JS_UNLOCK_OBJ(cx, obj);
3833 chars[length] = 0;
3835 str = js_NewString(cx, chars, length, 0);
3836 if (!str) {
3837 JS_free(cx, chars);
3838 return JS_FALSE;
3839 }
3840 *rval = STRING_TO_JSVAL(str);
3841 return JS_TRUE;
3842 }
3844 static JSBool
3845 regexp_compile(JSContext *cx, JSObject *obj, uintN argc, jsval *argv,
3846 jsval *rval)
3847 {
3848 JSString *opt, *str;
3849 JSRegExp *oldre, *re;
3850 JSBool ok, ok2;
3851 JSObject *obj2;
3852 size_t length, nbytes;
3853 const jschar *cp, *start, *end;
3854 jschar *nstart, *ncp, *tmp;
3856 if (!JS_InstanceOf(cx, obj, &js_RegExpClass, argv))
3857 return JS_FALSE;
3858 opt = NULL;
3859 if (argc == 0) {
3860 str = cx->runtime->emptyString;
3861 } else {
3862 if (JSVAL_IS_OBJECT(argv[0])) {
3863 /*
3864 * If we get passed in a RegExp object we construct a new
3865 * RegExp that is a duplicate of it by re-compiling the
3866 * original source code. ECMA requires that it be an error
3867 * here if the flags are specified. (We must use the flags
3868 * from the original RegExp also).
3869 */
3870 obj2 = JSVAL_TO_OBJECT(argv[0]);
3871 if (obj2 && OBJ_GET_CLASS(cx, obj2) == &js_RegExpClass) {
3872 if (argc >= 2 && !JSVAL_IS_VOID(argv[1])) { /* 'flags' passed */
3873 JS_ReportErrorNumber(cx, js_GetErrorMessage, NULL,
3874 JSMSG_NEWREGEXP_FLAGGED);
3875 return JS_FALSE;
3876 }
3877 JS_LOCK_OBJ(cx, obj2);
3878 re = (JSRegExp *) JS_GetPrivate(cx, obj2);
3879 if (!re) {
3880 JS_UNLOCK_OBJ(cx, obj2);
3881 return JS_FALSE;
3882 }
3883 re = js_NewRegExp(cx, NULL, re->source, re->flags, JS_FALSE);
3884 JS_UNLOCK_OBJ(cx, obj2);
3885 goto created;
3886 }
3887 }
3888 str = js_ValueToString(cx, argv[0]);
3889 if (!str)
3890 return JS_FALSE;
3891 argv[0] = STRING_TO_JSVAL(str);
3892 if (argc > 1) {
3893 if (JSVAL_IS_VOID(argv[1])) {
3894 opt = NULL;
3895 } else {
3896 opt = js_ValueToString(cx, argv[1]);
3897 if (!opt)
3898 return JS_FALSE;
3899 argv[1] = STRING_TO_JSVAL(opt);
3900 }
3901 }
3903 /* Escape any naked slashes in the regexp source. */
3904 length = JSSTRING_LENGTH(str);
3905 start = JSSTRING_CHARS(str);
3906 end = start + length;
3907 nstart = ncp = NULL;
3908 for (cp = start; cp < end; cp++) {
3909 if (*cp == '/' && (cp == start || cp[-1] != '\\')) {
3910 nbytes = (++length + 1) * sizeof(jschar);
3911 if (!nstart) {
3912 nstart = (jschar *) JS_malloc(cx, nbytes);
3913 if (!nstart)
3914 return JS_FALSE;
3915 ncp = nstart + (cp - start);
3916 js_strncpy(nstart, start, cp - start);
3917 } else {
3918 tmp = (jschar *) JS_realloc(cx, nstart, nbytes);
3919 if (!tmp) {
3920 JS_free(cx, nstart);
3921 return JS_FALSE;
3922 }
3923 ncp = tmp + (ncp - nstart);
3924 nstart = tmp;
3925 }
3926 *ncp++ = '\\';
3927 }
3928 if (nstart)
3929 *ncp++ = *cp;
3930 }
3932 if (nstart) {
3933 /* Don't forget to store the backstop after the new string. */
3934 JS_ASSERT((size_t)(ncp - nstart) == length);
3935 *ncp = 0;
3936 str = js_NewString(cx, nstart, length, 0);
3937 if (!str) {
3938 JS_free(cx, nstart);
3939 return JS_FALSE;
3940 }
3941 argv[0] = STRING_TO_JSVAL(str);
3942 }
3943 }
3945 re = js_NewRegExpOpt(cx, NULL, str, opt, JS_FALSE);
3946 created:
3947 if (!re)
3948 return JS_FALSE;
3949 JS_LOCK_OBJ(cx, obj);
3950 oldre = (JSRegExp *) JS_GetPrivate(cx, obj);
3951 ok = JS_SetPrivate(cx, obj, re);
3952 ok2 = js_SetLastIndex(cx, obj, 0);
3953 JS_UNLOCK_OBJ(cx, obj);
3954 if (!ok) {
3955 js_DestroyRegExp(cx, re);
3956 return JS_FALSE;
3957 }
3958 if (oldre)
3959 js_DestroyRegExp(cx, oldre);
3960 *rval = OBJECT_TO_JSVAL(obj);
3961 return ok2;
3962 }
3964 static JSBool
3965 regexp_exec_sub(JSContext *cx, JSObject *obj, uintN argc, jsval *argv,
3966 JSBool test, jsval *rval)
3967 {
3968 JSBool ok;
3969 JSRegExp *re;
3970 jsdouble lastIndex;
3971 JSString *str;
3972 size_t i;
3974 ok = JS_InstanceOf(cx, obj, &js_RegExpClass, argv);
3975 if (!ok)
3976 return JS_FALSE;
3977 JS_LOCK_OBJ(cx, obj);
3978 re = (JSRegExp *) JS_GetPrivate(cx, obj);
3979 if (!re) {
3980 JS_UNLOCK_OBJ(cx, obj);
3981 return JS_TRUE;
3982 }
3984 /* NB: we must reach out: after this paragraph, in order to drop re. */
3985 HOLD_REGEXP(cx, re);
3986 if (re->flags & JSREG_GLOB) {
3987 ok = js_GetLastIndex(cx, obj, &lastIndex);
3988 } else {
3989 lastIndex = 0;
3990 }
3991 JS_UNLOCK_OBJ(cx, obj);
3992 if (!ok)
3993 goto out;
3995 /* Now that obj is unlocked, it's safe to (potentially) grab the GC lock. */
3996 if (argc == 0) {
3997 str = cx->regExpStatics.input;
3998 if (!str) {
3999 JS_ReportErrorNumber(cx, js_GetErrorMessage, NULL,
4000 JSMSG_NO_INPUT,
4001 JS_GetStringBytes(re->source),
4002 (re->flags & JSREG_GLOB) ? "g" : "",
4003 (re->flags & JSREG_FOLD) ? "i" : "",
4004 (re->flags & JSREG_MULTILINE) ? "m" : "");
4005 ok = JS_FALSE;
4006 goto out;
4007 }
4008 } else {
4009 str = js_ValueToString(cx, argv[0]);
4010 if (!str)
4011 goto out;
4012 argv[0] = STRING_TO_JSVAL(str);
4013 }
4015 if (lastIndex < 0 || JSSTRING_LENGTH(str) < lastIndex) {
4016 ok = js_SetLastIndex(cx, obj, 0);
4017 *rval = JSVAL_NULL;
4018 } else {
4019 i = (size_t) lastIndex;
4020 ok = js_ExecuteRegExp(cx, re, str, &i, test, rval);
4021 if (ok && (re->flags & JSREG_GLOB))
4022 ok = js_SetLastIndex(cx, obj, (*rval == JSVAL_NULL) ? 0 : i);
4023 }
4025 out:
4026 DROP_REGEXP(cx, re);
4027 return ok;
4028 }
4030 static JSBool
4031 regexp_exec(JSContext *cx, JSObject *obj, uintN argc, jsval *argv, jsval *rval)
4032 {
4033 return regexp_exec_sub(cx, obj, argc, argv, JS_FALSE, rval);
4034 }
4036 static JSBool
4037 regexp_test(JSContext *cx, JSObject *obj, uintN argc, jsval *argv, jsval *rval)
4038 {
4039 if (!regexp_exec_sub(cx, obj, argc, argv, JS_TRUE, rval))
4040 return JS_FALSE;
4041 if (*rval != JSVAL_TRUE)
4042 *rval = JSVAL_FALSE;
4043 return JS_TRUE;
4044 }
4046 static JSFunctionSpec regexp_methods[] = {
4047 #if JS_HAS_TOSOURCE
4048 {js_toSource_str, js_regexp_toString, 0,0,0},
4049 #endif
4050 {js_toString_str, js_regexp_toString, 0,0,0},
4051 {"compile", regexp_compile, 1,0,0},
4052 {"exec", regexp_exec, 0,0,0},
4053 {"test", regexp_test, 0,0,0},
4054 {0,0,0,0,0}
4055 };
4057 static JSBool
4058 RegExp(JSContext *cx, JSObject *obj, uintN argc, jsval *argv, jsval *rval)
4059 {
4060 if (!(cx->fp->flags & JSFRAME_CONSTRUCTING)) {
4061 /*
4062 * If first arg is regexp and no flags are given, just return the arg.
4063 * (regexp_compile detects the regexp + flags case and throws a
4064 * TypeError.) See 10.15.3.1.
4065 */
4066 if ((argc < 2 || JSVAL_IS_VOID(argv[1])) &&
4067 !JSVAL_IS_PRIMITIVE(argv[0]) &&
4068 OBJ_GET_CLASS(cx, JSVAL_TO_OBJECT(argv[0])) == &js_RegExpClass) {
4069 *rval = argv[0];
4070 return JS_TRUE;
4071 }
4073 /* Otherwise, replace obj with a new RegExp object. */
4074 obj = js_NewObject(cx, &js_RegExpClass, NULL, NULL);
4075 if (!obj)
4076 return JS_FALSE;
4078 /*
4079 * regexp_compile does not use rval to root its temporaries
4080 * so we can use it to root obj.
4081 */
4082 *rval = OBJECT_TO_JSVAL(obj);
4083 }
4084 return regexp_compile(cx, obj, argc, argv, rval);
4085 }
4087 JSObject *
4088 js_InitRegExpClass(JSContext *cx, JSObject *obj)
4089 {
4090 JSObject *proto, *ctor;
4091 jsval rval;
4093 proto = JS_InitClass(cx, obj, NULL, &js_RegExpClass, RegExp, 1,
4094 regexp_props, regexp_methods,
4095 regexp_static_props, NULL);
4097 if (!proto || !(ctor = JS_GetConstructor(cx, proto)))
4098 return NULL;
4099 if (!JS_AliasProperty(cx, ctor, "input", "$_") ||
4100 !JS_AliasProperty(cx, ctor, "multiline", "$*") ||
4101 !JS_AliasProperty(cx, ctor, "lastMatch", "$&") ||
4102 !JS_AliasProperty(cx, ctor, "lastParen", "$+") ||
4103 !JS_AliasProperty(cx, ctor, "leftContext", "$`") ||
4104 !JS_AliasProperty(cx, ctor, "rightContext", "$'")) {
4105 goto bad;
4106 }
4108 /* Give RegExp.prototype private data so it matches the empty string. */
4109 if (!regexp_compile(cx, proto, 0, NULL, &rval))
4110 goto bad;
4111 return proto;
4113 bad:
4114 JS_DeleteProperty(cx, obj, js_RegExpClass.name);
4115 return NULL;
4116 }
4118 JSObject *
4119 js_NewRegExpObject(JSContext *cx, JSTokenStream *ts,
4120 jschar *chars, size_t length, uintN flags)
4121 {
4122 JSString *str;
4123 JSObject *obj;
4124 JSRegExp *re;
4125 JSTempValueRooter tvr;
4127 str = js_NewStringCopyN(cx, chars, length, 0);
4128 if (!str)
4129 return NULL;
4130 re = js_NewRegExp(cx, ts, str, flags, JS_FALSE);
4131 if (!re)
4132 return NULL;
4133 JS_PUSH_SINGLE_TEMP_ROOT(cx, STRING_TO_JSVAL(str), &tvr);
4134 obj = js_NewObject(cx, &js_RegExpClass, NULL, NULL);
4135 if (!obj || !JS_SetPrivate(cx, obj, re) || !js_SetLastIndex(cx, obj, 0)) {
4136 js_DestroyRegExp(cx, re);
4137 obj = NULL;
4138 }
4139 JS_POP_TEMP_ROOT(cx, &tvr);
4140 return obj;
4141 }
4143 JSObject *
4144 js_CloneRegExpObject(JSContext *cx, JSObject *obj, JSObject *parent)
4145 {
4146 JSObject *clone;
4147 JSRegExp *re;
4149 JS_ASSERT(OBJ_GET_CLASS(cx, obj) == &js_RegExpClass);
4150 clone = js_NewObject(cx, &js_RegExpClass, NULL, parent);
4151 if (!clone)
4152 return NULL;
4153 re = JS_GetPrivate(cx, obj);
4154 if (!JS_SetPrivate(cx, clone, re) || !js_SetLastIndex(cx, clone, 0)) {
4155 cx->newborn[GCX_OBJECT] = NULL;
4156 return NULL;
4157 }
4158 HOLD_REGEXP(cx, re);
4159 return clone;
4160 }
4162 JSBool
4163 js_GetLastIndex(JSContext *cx, JSObject *obj, jsdouble *lastIndex)
4164 {
4165 jsval v;
4167 return JS_GetReservedSlot(cx, obj, 0, &v) &&
4168 js_ValueToNumber(cx, v, lastIndex);
4169 }
4171 JSBool
4172 js_SetLastIndex(JSContext *cx, JSObject *obj, jsdouble lastIndex)
4173 {
4174 jsval v;
4176 return js_NewNumberValue(cx, lastIndex, &v) &&
4177 JS_SetReservedSlot(cx, obj, 0, v);
4178 }
4180 #endif /* JS_HAS_REGEXPS */