From 0ab7befa31d07fe3ffb51a6cc626d4c09ded1c92 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Wed, 27 Sep 2006 17:50:52 -0700 Subject: [PATCH] grep --all-match This lets you say: git grep --all-match -e A -e B -e C to find lines that match A or B or C but limit the matches from the files that have all of A, B and C. This is different from git grep -e A --and -e B --and -e C in that the latter looks for a single line that has all of these at the same time. Signed-off-by: Junio C Hamano --- Documentation/git-grep.txt | 11 +++- builtin-grep.c | 4 ++ grep.c | 114 +++++++++++++++++++++++++++++++------ grep.h | 2 + 4 files changed, 112 insertions(+), 19 deletions(-) diff --git a/Documentation/git-grep.txt b/Documentation/git-grep.txt index d8af4d961..bfbece986 100644 --- a/Documentation/git-grep.txt +++ b/Documentation/git-grep.txt @@ -14,7 +14,7 @@ SYNOPSIS [-v | --invert-match] [-h|-H] [--full-name] [-E | --extended-regexp] [-G | --basic-regexp] [-F | --fixed-strings] [-n] [-l | --files-with-matches] [-L | --files-without-match] - [-c | --count] + [-c | --count] [--all-match] [-A ] [-B ] [-C ] [-f ] [-e] [--and|--or|--not|(|)|-e ...] [...] @@ -96,6 +96,11 @@ OPTIONS higher precedence than `--or`. `-e` has to be used for all patterns. +--all-match:: + When giving multiple pattern expressions combined with `--or`, + this flag is specified to limit the match to files that + have lines to match all of them. + `...`:: Search blobs in the trees for specified patterns. @@ -111,6 +116,10 @@ git grep -e \'#define\' --and \( -e MAX_PATH -e PATH_MAX \):: Looks for a line that has `#define` and either `MAX_PATH` or `PATH_MAX`. +git grep --all-match -e NODE -e Unexpected:: + Looks for a line that has `NODE` or `Unexpected` in + files that have lines that match both. + Author ------ Originally written by Linus Torvalds , later diff --git a/builtin-grep.c b/builtin-grep.c index 4205e5d38..ad7dc00cd 100644 --- a/builtin-grep.c +++ b/builtin-grep.c @@ -596,6 +596,10 @@ int cmd_grep(int argc, const char **argv, const char *prefix) GREP_CLOSE_PAREN); continue; } + if (!strcmp("--all-match", arg)) { + opt.all_match = 1; + continue; + } if (!strcmp("-e", arg)) { if (1 < argc) { append_grep_pattern(&opt, argv[1], diff --git a/grep.c b/grep.c index c411ddd4d..0fc078ec0 100644 --- a/grep.c +++ b/grep.c @@ -34,7 +34,7 @@ static void compile_regexp(struct grep_pat *p, struct grep_opt *opt) } } -static struct grep_expr *compile_pattern_expr(struct grep_pat **); +static struct grep_expr *compile_pattern_or(struct grep_pat **); static struct grep_expr *compile_pattern_atom(struct grep_pat **list) { struct grep_pat *p; @@ -52,7 +52,7 @@ static struct grep_expr *compile_pattern_atom(struct grep_pat **list) return x; case GREP_OPEN_PAREN: *list = p->next; - x = compile_pattern_expr(list); + x = compile_pattern_or(list); if (!x) return NULL; if (!*list || (*list)->token != GREP_CLOSE_PAREN) @@ -138,6 +138,9 @@ void compile_grep_patterns(struct grep_opt *opt) { struct grep_pat *p; + if (opt->all_match) + opt->extended = 1; + for (p = opt->pattern_list; p; p = p->next) { switch (p->token) { case GREP_PATTERN: /* atom */ @@ -309,40 +312,63 @@ static int match_one_pattern(struct grep_opt *opt, struct grep_pat *p, char *bol return hit; } -static int match_expr_eval(struct grep_opt *opt, +static int match_expr_eval(struct grep_opt *o, struct grep_expr *x, char *bol, char *eol, - enum grep_context ctx) + enum grep_context ctx, + int collect_hits) { + int h = 0; + switch (x->node) { case GREP_NODE_ATOM: - return match_one_pattern(opt, x->u.atom, bol, eol, ctx); + h = match_one_pattern(o, x->u.atom, bol, eol, ctx); break; case GREP_NODE_NOT: - return !match_expr_eval(opt, x->u.unary, bol, eol, ctx); + h = !match_expr_eval(o, x->u.unary, bol, eol, ctx, 0); + break; case GREP_NODE_AND: - return (match_expr_eval(opt, x->u.binary.left, bol, eol, ctx) && - match_expr_eval(opt, x->u.binary.right, bol, eol, ctx)); + if (!collect_hits) + return (match_expr_eval(o, x->u.binary.left, + bol, eol, ctx, 0) && + match_expr_eval(o, x->u.binary.right, + bol, eol, ctx, 0)); + h = match_expr_eval(o, x->u.binary.left, bol, eol, ctx, 0); + h &= match_expr_eval(o, x->u.binary.right, bol, eol, ctx, 0); + break; case GREP_NODE_OR: - return (match_expr_eval(opt, x->u.binary.left, bol, eol, ctx) || - match_expr_eval(opt, x->u.binary.right, bol, eol, ctx)); + if (!collect_hits) + return (match_expr_eval(o, x->u.binary.left, + bol, eol, ctx, 0) || + match_expr_eval(o, x->u.binary.right, + bol, eol, ctx, 0)); + h = match_expr_eval(o, x->u.binary.left, bol, eol, ctx, 0); + x->u.binary.left->hit |= h; + h |= match_expr_eval(o, x->u.binary.right, bol, eol, ctx, 1); + break; + default: + die("Unexpected node type (internal error) %d\n", x->node); } - die("Unexpected node type (internal error) %d\n", x->node); + if (collect_hits) + x->hit |= h; + return h; } static int match_expr(struct grep_opt *opt, char *bol, char *eol, - enum grep_context ctx) + enum grep_context ctx, int collect_hits) { struct grep_expr *x = opt->pattern_expression; - return match_expr_eval(opt, x, bol, eol, ctx); + return match_expr_eval(opt, x, bol, eol, ctx, collect_hits); } static int match_line(struct grep_opt *opt, char *bol, char *eol, - enum grep_context ctx) + enum grep_context ctx, int collect_hits) { struct grep_pat *p; if (opt->extended) - return match_expr(opt, bol, eol, ctx); + return match_expr(opt, bol, eol, ctx, collect_hits); + + /* we do not call with collect_hits without being extended */ for (p = opt->pattern_list; p; p = p->next) { if (match_one_pattern(opt, p, bol, eol, ctx)) return 1; @@ -350,7 +376,8 @@ static int match_line(struct grep_opt *opt, char *bol, char *eol, return 0; } -int grep_buffer(struct grep_opt *opt, const char *name, char *buf, unsigned long size) +static int grep_buffer_1(struct grep_opt *opt, const char *name, + char *buf, unsigned long size, int collect_hits) { char *bol = buf; unsigned long left = size; @@ -386,7 +413,7 @@ int grep_buffer(struct grep_opt *opt, const char *name, char *buf, unsigned long while (left) { char *eol, ch; - int hit = 0; + int hit; eol = end_of_line(bol, &left); ch = *eol; @@ -395,9 +422,12 @@ int grep_buffer(struct grep_opt *opt, const char *name, char *buf, unsigned long if ((ctx == GREP_CONTEXT_HEAD) && (eol == bol)) ctx = GREP_CONTEXT_BODY; - hit = match_line(opt, bol, eol, ctx); + hit = match_line(opt, bol, eol, ctx, collect_hits); *eol = ch; + if (collect_hits) + goto next_line; + /* "grep -v -e foo -e bla" should list lines * that do not have either, so inversion should * be done outside. @@ -477,6 +507,8 @@ int grep_buffer(struct grep_opt *opt, const char *name, char *buf, unsigned long } free(prev); + if (collect_hits) + return 0; if (opt->status_only) return 0; @@ -496,3 +528,49 @@ int grep_buffer(struct grep_opt *opt, const char *name, char *buf, unsigned long return !!last_hit; } +static void clr_hit_marker(struct grep_expr *x) +{ + /* All-hit markers are meaningful only at the very top level + * OR node. + */ + while (1) { + x->hit = 0; + if (x->node != GREP_NODE_OR) + return; + x->u.binary.left->hit = 0; + x = x->u.binary.right; + } +} + +static int chk_hit_marker(struct grep_expr *x) +{ + /* Top level nodes have hit markers. See if they all are hits */ + while (1) { + if (x->node != GREP_NODE_OR) + return x->hit; + if (!x->u.binary.left->hit) + return 0; + x = x->u.binary.right; + } +} + +int grep_buffer(struct grep_opt *opt, const char *name, char *buf, unsigned long size) +{ + /* + * we do not have to do the two-pass grep when we do not check + * buffer-wide "all-match". + */ + if (!opt->all_match) + return grep_buffer_1(opt, name, buf, size, 0); + + /* Otherwise the toplevel "or" terms hit a bit differently. + * We first clear hit markers from them. + */ + clr_hit_marker(opt->pattern_expression); + grep_buffer_1(opt, name, buf, size, 1); + + if (!chk_hit_marker(opt->pattern_expression)) + return 0; + + return grep_buffer_1(opt, name, buf, size, 0); +} diff --git a/grep.h b/grep.h index af9098cfe..d252dd25f 100644 --- a/grep.h +++ b/grep.h @@ -35,6 +35,7 @@ enum grep_expr_node { struct grep_expr { enum grep_expr_node node; + unsigned hit; union { struct grep_pat *atom; struct grep_expr *unary; @@ -59,6 +60,7 @@ struct grep_opt { unsigned count:1; unsigned word_regexp:1; unsigned fixed:1; + unsigned all_match:1; #define GREP_BINARY_DEFAULT 0 #define GREP_BINARY_NOMATCH 1 #define GREP_BINARY_TEXT 2 -- 2.30.2