X-Git-Url: https://git.tokkee.org/?a=blobdiff_plain;f=grep.c;h=04c777a20c1a8c10417cc9d44e53e5b99dc32a27;hb=c646217e1366b0397552fad8c32acb47fbe8977d;hp=600f69f2fe2a0271f4bdf736f95f70c8f7381aa4;hpb=cc137194519a8ddbc0514da088fb012bea40df51;p=git.git diff --git a/grep.c b/grep.c index 600f69f2f..04c777a20 100644 --- a/grep.c +++ b/grep.c @@ -28,9 +28,27 @@ void append_grep_pattern(struct grep_opt *opt, const char *pat, p->next = NULL; } +static int is_fixed(const char *s) +{ + while (*s && !is_regex_special(*s)) + s++; + return !*s; +} + static void compile_regexp(struct grep_pat *p, struct grep_opt *opt) { - int err = regcomp(&p->regexp, p->pattern, opt->regflags); + int err; + + p->word_regexp = opt->word_regexp; + + if (opt->fixed || is_fixed(p->pattern)) + p->fixed = 1; + if (opt->regflags & REG_ICASE) + p->fixed = 0; + if (p->fixed) + return; + + err = regcomp(&p->regexp, p->pattern, opt->regflags); if (err) { char errbuf[1024]; char where[1024]; @@ -54,6 +72,8 @@ static struct grep_expr *compile_pattern_atom(struct grep_pat **list) struct grep_expr *x; p = *list; + if (!p) + return NULL; switch (p->token) { case GREP_PATTERN: /* atom */ case GREP_PATTERN_HEAD: @@ -66,8 +86,6 @@ static struct grep_expr *compile_pattern_atom(struct grep_pat **list) case GREP_OPEN_PAREN: *list = p->next; x = compile_pattern_or(list); - if (!x) - return NULL; if (!*list || (*list)->token != GREP_CLOSE_PAREN) die("unmatched parenthesis"); *list = (*list)->next; @@ -83,6 +101,8 @@ static struct grep_expr *compile_pattern_not(struct grep_pat **list) struct grep_expr *x; p = *list; + if (!p) + return NULL; switch (p->token) { case GREP_NOT: if (!p->next) @@ -159,8 +179,7 @@ void compile_grep_patterns(struct grep_opt *opt) case GREP_PATTERN: /* atom */ case GREP_PATTERN_HEAD: case GREP_PATTERN_BODY: - if (!opt->fixed) - compile_regexp(p, opt); + compile_regexp(p, opt); break; default: opt->extended = 1; @@ -175,7 +194,8 @@ void compile_grep_patterns(struct grep_opt *opt) * A classic recursive descent parser would do. */ p = opt->pattern_list; - opt->pattern_expression = compile_pattern_expr(&p); + if (p) + opt->pattern_expression = compile_pattern_expr(&p); if (p) die("incomplete pattern expression: %s", p->pattern); } @@ -236,18 +256,6 @@ static int word_char(char ch) return isalnum(ch) || ch == '_'; } -static void show_line(struct grep_opt *opt, const char *bol, const char *eol, - const char *name, unsigned lno, char sign) -{ - if (opt->null_following_name) - sign = '\0'; - if (opt->pathname) - printf("%s%c", name, sign); - if (opt->linenum) - printf("%d%c", lno, sign); - printf("%.*s\n", (int)(eol-bol), bol); -} - static void show_name(struct grep_opt *opt, const char *name) { printf("%s%c", name, opt->null_following_name ? '\0' : '\n'); @@ -291,12 +299,12 @@ static struct { { "committer ", 10 }, }; -static int match_one_pattern(struct grep_opt *opt, struct grep_pat *p, char *bol, char *eol, enum grep_context ctx) +static int match_one_pattern(struct grep_pat *p, char *bol, char *eol, + enum grep_context ctx, + regmatch_t *pmatch, int eflags) { int hit = 0; - int at_true_bol = 1; int saved_ch = 0; - regmatch_t pmatch[10]; if ((p->token != GREP_PATTERN) && ((p->token == GREP_PATTERN_HEAD) != (ctx == GREP_CONTEXT_HEAD))) @@ -315,16 +323,12 @@ static int match_one_pattern(struct grep_opt *opt, struct grep_pat *p, char *bol } again: - if (!opt->fixed) { - regex_t *exp = &p->regexp; - hit = !regexec(exp, bol, ARRAY_SIZE(pmatch), - pmatch, 0); - } - else { + if (p->fixed) hit = !fixmatch(p->pattern, bol, pmatch); - } + else + hit = !regexec(&p->regexp, bol, 1, pmatch, eflags); - if (hit && opt->word_regexp) { + if (hit && p->word_regexp) { if ((pmatch[0].rm_so < 0) || (eol - bol) <= pmatch[0].rm_so || (pmatch[0].rm_eo < 0) || @@ -337,7 +341,7 @@ static int match_one_pattern(struct grep_opt *opt, struct grep_pat *p, char *bol * either end of the line, or at word boundary * (i.e. the next char must not be a word char). */ - if ( ((pmatch[0].rm_so == 0 && at_true_bol) || + if ( ((pmatch[0].rm_so == 0) || !word_char(bol[pmatch[0].rm_so-1])) && ((pmatch[0].rm_eo == (eol-bol)) || !word_char(bol[pmatch[0].rm_eo])) ) @@ -349,10 +353,14 @@ static int match_one_pattern(struct grep_opt *opt, struct grep_pat *p, char *bol /* There could be more than one match on the * line, and the first match might not be * strict word match. But later ones could be! + * Forward to the next possible start, i.e. the + * next position following a non-word char. */ bol = pmatch[0].rm_so + bol + 1; - at_true_bol = 0; - goto again; + while (word_char(bol[-1]) && bol < eol) + bol++; + if (bol < eol) + goto again; } } if (p->token == GREP_PATTERN_HEAD && saved_ch) @@ -360,42 +368,38 @@ static int match_one_pattern(struct grep_opt *opt, struct grep_pat *p, char *bol return hit; } -static int match_expr_eval(struct grep_opt *o, - struct grep_expr *x, - char *bol, char *eol, - enum grep_context ctx, - int collect_hits) +static int match_expr_eval(struct grep_expr *x, char *bol, char *eol, + enum grep_context ctx, int collect_hits) { int h = 0; + regmatch_t match; + if (!x) + die("Not a valid grep expression"); switch (x->node) { case GREP_NODE_ATOM: - h = match_one_pattern(o, x->u.atom, bol, eol, ctx); + h = match_one_pattern(x->u.atom, bol, eol, ctx, &match, 0); break; case GREP_NODE_NOT: - h = !match_expr_eval(o, x->u.unary, bol, eol, ctx, 0); + h = !match_expr_eval(x->u.unary, bol, eol, ctx, 0); break; case GREP_NODE_AND: - if (!collect_hits) - return (match_expr_eval(o, x->u.binary.left, - bol, eol, ctx, 0) && - match_expr_eval(o, x->u.binary.right, - bol, eol, ctx, 0)); - h = match_expr_eval(o, x->u.binary.left, bol, eol, ctx, 0); - h &= match_expr_eval(o, x->u.binary.right, bol, eol, ctx, 0); + if (!match_expr_eval(x->u.binary.left, bol, eol, ctx, 0)) + return 0; + h = match_expr_eval(x->u.binary.right, bol, eol, ctx, 0); break; case GREP_NODE_OR: if (!collect_hits) - return (match_expr_eval(o, x->u.binary.left, + return (match_expr_eval(x->u.binary.left, bol, eol, ctx, 0) || - match_expr_eval(o, x->u.binary.right, + match_expr_eval(x->u.binary.right, bol, eol, ctx, 0)); - h = match_expr_eval(o, x->u.binary.left, bol, eol, ctx, 0); + h = match_expr_eval(x->u.binary.left, bol, eol, ctx, 0); x->u.binary.left->hit |= h; - h |= match_expr_eval(o, x->u.binary.right, bol, eol, ctx, 1); + h |= match_expr_eval(x->u.binary.right, bol, eol, ctx, 1); break; default: - die("Unexpected node type (internal error) %d\n", x->node); + die("Unexpected node type (internal error) %d", x->node); } if (collect_hits) x->hit |= h; @@ -406,24 +410,104 @@ static int match_expr(struct grep_opt *opt, char *bol, char *eol, enum grep_context ctx, int collect_hits) { struct grep_expr *x = opt->pattern_expression; - return match_expr_eval(opt, x, bol, eol, ctx, collect_hits); + return match_expr_eval(x, bol, eol, ctx, collect_hits); } static int match_line(struct grep_opt *opt, char *bol, char *eol, enum grep_context ctx, int collect_hits) { struct grep_pat *p; + regmatch_t match; + if (opt->extended) return match_expr(opt, bol, eol, ctx, collect_hits); /* we do not call with collect_hits without being extended */ for (p = opt->pattern_list; p; p = p->next) { - if (match_one_pattern(opt, p, bol, eol, ctx)) + if (match_one_pattern(p, bol, eol, ctx, &match, 0)) return 1; } return 0; } +static int match_next_pattern(struct grep_pat *p, char *bol, char *eol, + enum grep_context ctx, + regmatch_t *pmatch, int eflags) +{ + regmatch_t match; + + if (!match_one_pattern(p, bol, eol, ctx, &match, eflags)) + return 0; + if (match.rm_so < 0 || match.rm_eo < 0) + return 0; + if (pmatch->rm_so >= 0 && pmatch->rm_eo >= 0) { + if (match.rm_so > pmatch->rm_so) + return 1; + if (match.rm_so == pmatch->rm_so && match.rm_eo < pmatch->rm_eo) + return 1; + } + pmatch->rm_so = match.rm_so; + pmatch->rm_eo = match.rm_eo; + return 1; +} + +static int next_match(struct grep_opt *opt, char *bol, char *eol, + enum grep_context ctx, regmatch_t *pmatch, int eflags) +{ + struct grep_pat *p; + int hit = 0; + + pmatch->rm_so = pmatch->rm_eo = -1; + if (bol < eol) { + for (p = opt->pattern_list; p; p = p->next) { + switch (p->token) { + case GREP_PATTERN: /* atom */ + case GREP_PATTERN_HEAD: + case GREP_PATTERN_BODY: + hit |= match_next_pattern(p, bol, eol, ctx, + pmatch, eflags); + break; + default: + break; + } + } + } + return hit; +} + +static void show_line(struct grep_opt *opt, char *bol, char *eol, + const char *name, unsigned lno, char sign) +{ + int rest = eol - bol; + + if (opt->null_following_name) + sign = '\0'; + if (opt->pathname) + printf("%s%c", name, sign); + if (opt->linenum) + printf("%d%c", lno, sign); + if (opt->color) { + regmatch_t match; + enum grep_context ctx = GREP_CONTEXT_BODY; + int ch = *eol; + int eflags = 0; + + *eol = '\0'; + while (next_match(opt, bol, eol, ctx, &match, eflags)) { + printf("%.*s%s%.*s%s", + (int)match.rm_so, bol, + opt->color_match, + (int)(match.rm_eo - match.rm_so), bol + match.rm_so, + GIT_COLOR_RESET); + bol += match.rm_eo; + rest -= match.rm_eo; + eflags = REG_NOTBOL; + } + *eol = ch; + } + printf("%.*s\n", rest, bol); +} + static int grep_buffer_1(struct grep_opt *opt, const char *name, char *buf, unsigned long size, int collect_hits) {