Code

Fix git-diff A...B
[git.git] / builtin-grep.c
1 /*
2  * Builtin "git grep"
3  *
4  * Copyright (c) 2006 Junio C Hamano
5  */
6 #include "cache.h"
7 #include "blob.h"
8 #include "tree.h"
9 #include "commit.h"
10 #include "tag.h"
11 #include "tree-walk.h"
12 #include "builtin.h"
13 #include <regex.h>
14 #include <fnmatch.h>
15 #include <sys/wait.h>
17 /*
18  * git grep pathspecs are somewhat different from diff-tree pathspecs;
19  * pathname wildcards are allowed.
20  */
21 static int pathspec_matches(const char **paths, const char *name)
22 {
23         int namelen, i;
24         if (!paths || !*paths)
25                 return 1;
26         namelen = strlen(name);
27         for (i = 0; paths[i]; i++) {
28                 const char *match = paths[i];
29                 int matchlen = strlen(match);
30                 const char *cp, *meta;
32                 if (!matchlen ||
33                     ((matchlen <= namelen) &&
34                      !strncmp(name, match, matchlen) &&
35                      (match[matchlen-1] == '/' ||
36                       name[matchlen] == '\0' || name[matchlen] == '/')))
37                         return 1;
38                 if (!fnmatch(match, name, 0))
39                         return 1;
40                 if (name[namelen-1] != '/')
41                         continue;
43                 /* We are being asked if the directory ("name") is worth
44                  * descending into.
45                  *
46                  * Find the longest leading directory name that does
47                  * not have metacharacter in the pathspec; the name
48                  * we are looking at must overlap with that directory.
49                  */
50                 for (cp = match, meta = NULL; cp - match < matchlen; cp++) {
51                         char ch = *cp;
52                         if (ch == '*' || ch == '[' || ch == '?') {
53                                 meta = cp;
54                                 break;
55                         }
56                 }
57                 if (!meta)
58                         meta = cp; /* fully literal */
60                 if (namelen <= meta - match) {
61                         /* Looking at "Documentation/" and
62                          * the pattern says "Documentation/howto/", or
63                          * "Documentation/diff*.txt".  The name we
64                          * have should match prefix.
65                          */
66                         if (!memcmp(match, name, namelen))
67                                 return 1;
68                         continue;
69                 }
71                 if (meta - match < namelen) {
72                         /* Looking at "Documentation/howto/" and
73                          * the pattern says "Documentation/h*";
74                          * match up to "Do.../h"; this avoids descending
75                          * into "Documentation/technical/".
76                          */
77                         if (!memcmp(match, name, meta - match))
78                                 return 1;
79                         continue;
80                 }
81         }
82         return 0;
83 }
85 enum grep_pat_token {
86         GREP_PATTERN,
87         GREP_AND,
88         GREP_OPEN_PAREN,
89         GREP_CLOSE_PAREN,
90         GREP_NOT,
91         GREP_OR,
92 };
94 struct grep_pat {
95         struct grep_pat *next;
96         const char *origin;
97         int no;
98         enum grep_pat_token token;
99         const char *pattern;
100         regex_t regexp;
101 };
103 enum grep_expr_node {
104         GREP_NODE_ATOM,
105         GREP_NODE_NOT,
106         GREP_NODE_AND,
107         GREP_NODE_OR,
108 };
110 struct grep_expr {
111         enum grep_expr_node node;
112         union {
113                 struct grep_pat *atom;
114                 struct grep_expr *unary;
115                 struct {
116                         struct grep_expr *left;
117                         struct grep_expr *right;
118                 } binary;
119         } u;
120 };
122 struct grep_opt {
123         struct grep_pat *pattern_list;
124         struct grep_pat **pattern_tail;
125         struct grep_expr *pattern_expression;
126         regex_t regexp;
127         unsigned linenum:1;
128         unsigned invert:1;
129         unsigned name_only:1;
130         unsigned unmatch_name_only:1;
131         unsigned count:1;
132         unsigned word_regexp:1;
133         unsigned fixed:1;
134 #define GREP_BINARY_DEFAULT     0
135 #define GREP_BINARY_NOMATCH     1
136 #define GREP_BINARY_TEXT        2
137         unsigned binary:2;
138         unsigned extended:1;
139         int regflags;
140         unsigned pre_context;
141         unsigned post_context;
142 };
144 static void add_pattern(struct grep_opt *opt, const char *pat,
145                         const char *origin, int no, enum grep_pat_token t)
147         struct grep_pat *p = xcalloc(1, sizeof(*p));
148         p->pattern = pat;
149         p->origin = origin;
150         p->no = no;
151         p->token = t;
152         *opt->pattern_tail = p;
153         opt->pattern_tail = &p->next;
154         p->next = NULL;
157 static void compile_regexp(struct grep_pat *p, struct grep_opt *opt)
159         int err = regcomp(&p->regexp, p->pattern, opt->regflags);
160         if (err) {
161                 char errbuf[1024];
162                 char where[1024];
163                 if (p->no)
164                         sprintf(where, "In '%s' at %d, ",
165                                 p->origin, p->no);
166                 else if (p->origin)
167                         sprintf(where, "%s, ", p->origin);
168                 else
169                         where[0] = 0;
170                 regerror(err, &p->regexp, errbuf, 1024);
171                 regfree(&p->regexp);
172                 die("%s'%s': %s", where, p->pattern, errbuf);
173         }
176 #if DEBUG
177 static inline void indent(int in)
179         int i;
180         for (i = 0; i < in; i++) putchar(' ');
183 static void dump_pattern_exp(struct grep_expr *x, int in)
185         switch (x->node) {
186         case GREP_NODE_ATOM:
187                 indent(in);
188                 puts(x->u.atom->pattern);
189                 break;
190         case GREP_NODE_NOT:
191                 indent(in);
192                 puts("--not");
193                 dump_pattern_exp(x->u.unary, in+1);
194                 break;
195         case GREP_NODE_AND:
196                 dump_pattern_exp(x->u.binary.left, in+1);
197                 indent(in);
198                 puts("--and");
199                 dump_pattern_exp(x->u.binary.right, in+1);
200                 break;
201         case GREP_NODE_OR:
202                 dump_pattern_exp(x->u.binary.left, in+1);
203                 indent(in);
204                 puts("--or");
205                 dump_pattern_exp(x->u.binary.right, in+1);
206                 break;
207         }
210 static void looking_at(const char *msg, struct grep_pat **list)
212         struct grep_pat *p = *list;
213         fprintf(stderr, "%s: looking at ", msg);
214         if (!p)
215                 fprintf(stderr, "empty\n");
216         else
217                 fprintf(stderr, "<%s>\n", p->pattern);
219 #else
220 #define looking_at(a,b) do {} while(0)
221 #endif
223 static struct grep_expr *compile_pattern_expr(struct grep_pat **);
224 static struct grep_expr *compile_pattern_atom(struct grep_pat **list)
226         struct grep_pat *p;
227         struct grep_expr *x;
229         looking_at("atom", list);
231         p = *list;
232         switch (p->token) {
233         case GREP_PATTERN: /* atom */
234                 x = xcalloc(1, sizeof (struct grep_expr));
235                 x->node = GREP_NODE_ATOM;
236                 x->u.atom = p;
237                 *list = p->next;
238                 return x;
239         case GREP_OPEN_PAREN:
240                 *list = p->next;
241                 x = compile_pattern_expr(list);
242                 if (!x)
243                         return NULL;
244                 if (!*list || (*list)->token != GREP_CLOSE_PAREN)
245                         die("unmatched parenthesis");
246                 *list = (*list)->next;
247                 return x;
248         default:
249                 return NULL;
250         }
253 static struct grep_expr *compile_pattern_not(struct grep_pat **list)
255         struct grep_pat *p;
256         struct grep_expr *x;
258         looking_at("not", list);
260         p = *list;
261         switch (p->token) {
262         case GREP_NOT:
263                 if (!p->next)
264                         die("--not not followed by pattern expression");
265                 *list = p->next;
266                 x = xcalloc(1, sizeof (struct grep_expr));
267                 x->node = GREP_NODE_NOT;
268                 x->u.unary = compile_pattern_not(list);
269                 if (!x->u.unary)
270                         die("--not followed by non pattern expression");
271                 return x;
272         default:
273                 return compile_pattern_atom(list);
274         }
277 static struct grep_expr *compile_pattern_and(struct grep_pat **list)
279         struct grep_pat *p;
280         struct grep_expr *x, *y, *z;
282         looking_at("and", list);
284         x = compile_pattern_not(list);
285         p = *list;
286         if (p && p->token == GREP_AND) {
287                 if (!p->next)
288                         die("--and not followed by pattern expression");
289                 *list = p->next;
290                 y = compile_pattern_and(list);
291                 if (!y)
292                         die("--and not followed by pattern expression");
293                 z = xcalloc(1, sizeof (struct grep_expr));
294                 z->node = GREP_NODE_AND;
295                 z->u.binary.left = x;
296                 z->u.binary.right = y;
297                 return z;
298         }
299         return x;
302 static struct grep_expr *compile_pattern_or(struct grep_pat **list)
304         struct grep_pat *p;
305         struct grep_expr *x, *y, *z;
307         looking_at("or", list);
309         x = compile_pattern_and(list);
310         p = *list;
311         if (x && p && p->token != GREP_CLOSE_PAREN) {
312                 y = compile_pattern_or(list);
313                 if (!y)
314                         die("not a pattern expression %s", p->pattern);
315                 z = xcalloc(1, sizeof (struct grep_expr));
316                 z->node = GREP_NODE_OR;
317                 z->u.binary.left = x;
318                 z->u.binary.right = y;
319                 return z;
320         }
321         return x;
324 static struct grep_expr *compile_pattern_expr(struct grep_pat **list)
326         looking_at("expr", list);
328         return compile_pattern_or(list);
331 static void compile_patterns(struct grep_opt *opt)
333         struct grep_pat *p;
335         /* First compile regexps */
336         for (p = opt->pattern_list; p; p = p->next) {
337                 if (p->token == GREP_PATTERN)
338                         compile_regexp(p, opt);
339                 else
340                         opt->extended = 1;
341         }
343         if (!opt->extended)
344                 return;
346         /* Then bundle them up in an expression.
347          * A classic recursive descent parser would do.
348          */
349         p = opt->pattern_list;
350         opt->pattern_expression = compile_pattern_expr(&p);
351 #if DEBUG
352         dump_pattern_exp(opt->pattern_expression, 0);
353 #endif
354         if (p)
355                 die("incomplete pattern expression: %s", p->pattern);
358 static char *end_of_line(char *cp, unsigned long *left)
360         unsigned long l = *left;
361         while (l && *cp != '\n') {
362                 l--;
363                 cp++;
364         }
365         *left = l;
366         return cp;
369 static int word_char(char ch)
371         return isalnum(ch) || ch == '_';
374 static void show_line(struct grep_opt *opt, const char *bol, const char *eol,
375                       const char *name, unsigned lno, char sign)
377         printf("%s%c", name, sign);
378         if (opt->linenum)
379                 printf("%d%c", lno, sign);
380         printf("%.*s\n", (int)(eol-bol), bol);
383 /*
384  * NEEDSWORK: share code with diff.c
385  */
386 #define FIRST_FEW_BYTES 8000
387 static int buffer_is_binary(const char *ptr, unsigned long size)
389         if (FIRST_FEW_BYTES < size)
390                 size = FIRST_FEW_BYTES;
391         if (memchr(ptr, 0, size))
392                 return 1;
393         return 0;
396 static int fixmatch(const char *pattern, char *line, regmatch_t *match)
398         char *hit = strstr(line, pattern);
399         if (!hit) {
400                 match->rm_so = match->rm_eo = -1;
401                 return REG_NOMATCH;
402         }
403         else {
404                 match->rm_so = hit - line;
405                 match->rm_eo = match->rm_so + strlen(pattern);
406                 return 0;
407         }
410 static int match_one_pattern(struct grep_opt *opt, struct grep_pat *p, char *bol, char *eol)
412         int hit = 0;
413         int at_true_bol = 1;
414         regmatch_t pmatch[10];
416  again:
417         if (!opt->fixed) {
418                 regex_t *exp = &p->regexp;
419                 hit = !regexec(exp, bol, ARRAY_SIZE(pmatch),
420                                pmatch, 0);
421         }
422         else {
423                 hit = !fixmatch(p->pattern, bol, pmatch);
424         }
426         if (hit && opt->word_regexp) {
427                 if ((pmatch[0].rm_so < 0) ||
428                     (eol - bol) <= pmatch[0].rm_so ||
429                     (pmatch[0].rm_eo < 0) ||
430                     (eol - bol) < pmatch[0].rm_eo)
431                         die("regexp returned nonsense");
433                 /* Match beginning must be either beginning of the
434                  * line, or at word boundary (i.e. the last char must
435                  * not be a word char).  Similarly, match end must be
436                  * either end of the line, or at word boundary
437                  * (i.e. the next char must not be a word char).
438                  */
439                 if ( ((pmatch[0].rm_so == 0 && at_true_bol) ||
440                       !word_char(bol[pmatch[0].rm_so-1])) &&
441                      ((pmatch[0].rm_eo == (eol-bol)) ||
442                       !word_char(bol[pmatch[0].rm_eo])) )
443                         ;
444                 else
445                         hit = 0;
447                 if (!hit && pmatch[0].rm_so + bol + 1 < eol) {
448                         /* There could be more than one match on the
449                          * line, and the first match might not be
450                          * strict word match.  But later ones could be!
451                          */
452                         bol = pmatch[0].rm_so + bol + 1;
453                         at_true_bol = 0;
454                         goto again;
455                 }
456         }
457         return hit;
460 static int match_expr_eval(struct grep_opt *opt,
461                            struct grep_expr *x,
462                            char *bol, char *eol)
464         switch (x->node) {
465         case GREP_NODE_ATOM:
466                 return match_one_pattern(opt, x->u.atom, bol, eol);
467                 break;
468         case GREP_NODE_NOT:
469                 return !match_expr_eval(opt, x->u.unary, bol, eol);
470         case GREP_NODE_AND:
471                 return (match_expr_eval(opt, x->u.binary.left, bol, eol) &&
472                         match_expr_eval(opt, x->u.binary.right, bol, eol));
473         case GREP_NODE_OR:
474                 return (match_expr_eval(opt, x->u.binary.left, bol, eol) ||
475                         match_expr_eval(opt, x->u.binary.right, bol, eol));
476         }
477         die("Unexpected node type (internal error) %d\n", x->node);
480 static int match_expr(struct grep_opt *opt, char *bol, char *eol)
482         struct grep_expr *x = opt->pattern_expression;
483         return match_expr_eval(opt, x, bol, eol);
486 static int match_line(struct grep_opt *opt, char *bol, char *eol)
488         struct grep_pat *p;
489         if (opt->extended)
490                 return match_expr(opt, bol, eol);
491         for (p = opt->pattern_list; p; p = p->next) {
492                 if (match_one_pattern(opt, p, bol, eol))
493                         return 1;
494         }
495         return 0;
498 static int grep_buffer(struct grep_opt *opt, const char *name,
499                        char *buf, unsigned long size)
501         char *bol = buf;
502         unsigned long left = size;
503         unsigned lno = 1;
504         struct pre_context_line {
505                 char *bol;
506                 char *eol;
507         } *prev = NULL, *pcl;
508         unsigned last_hit = 0;
509         unsigned last_shown = 0;
510         int binary_match_only = 0;
511         const char *hunk_mark = "";
512         unsigned count = 0;
514         if (buffer_is_binary(buf, size)) {
515                 switch (opt->binary) {
516                 case GREP_BINARY_DEFAULT:
517                         binary_match_only = 1;
518                         break;
519                 case GREP_BINARY_NOMATCH:
520                         return 0; /* Assume unmatch */
521                         break;
522                 default:
523                         break;
524                 }
525         }
527         if (opt->pre_context)
528                 prev = xcalloc(opt->pre_context, sizeof(*prev));
529         if (opt->pre_context || opt->post_context)
530                 hunk_mark = "--\n";
532         while (left) {
533                 char *eol, ch;
534                 int hit = 0;
536                 eol = end_of_line(bol, &left);
537                 ch = *eol;
538                 *eol = 0;
540                 hit = match_line(opt, bol, eol);
542                 /* "grep -v -e foo -e bla" should list lines
543                  * that do not have either, so inversion should
544                  * be done outside.
545                  */
546                 if (opt->invert)
547                         hit = !hit;
548                 if (opt->unmatch_name_only) {
549                         if (hit)
550                                 return 0;
551                         goto next_line;
552                 }
553                 if (hit) {
554                         count++;
555                         if (binary_match_only) {
556                                 printf("Binary file %s matches\n", name);
557                                 return 1;
558                         }
559                         if (opt->name_only) {
560                                 printf("%s\n", name);
561                                 return 1;
562                         }
563                         /* Hit at this line.  If we haven't shown the
564                          * pre-context lines, we would need to show them.
565                          * When asked to do "count", this still show
566                          * the context which is nonsense, but the user
567                          * deserves to get that ;-).
568                          */
569                         if (opt->pre_context) {
570                                 unsigned from;
571                                 if (opt->pre_context < lno)
572                                         from = lno - opt->pre_context;
573                                 else
574                                         from = 1;
575                                 if (from <= last_shown)
576                                         from = last_shown + 1;
577                                 if (last_shown && from != last_shown + 1)
578                                         printf(hunk_mark);
579                                 while (from < lno) {
580                                         pcl = &prev[lno-from-1];
581                                         show_line(opt, pcl->bol, pcl->eol,
582                                                   name, from, '-');
583                                         from++;
584                                 }
585                                 last_shown = lno-1;
586                         }
587                         if (last_shown && lno != last_shown + 1)
588                                 printf(hunk_mark);
589                         if (!opt->count)
590                                 show_line(opt, bol, eol, name, lno, ':');
591                         last_shown = last_hit = lno;
592                 }
593                 else if (last_hit &&
594                          lno <= last_hit + opt->post_context) {
595                         /* If the last hit is within the post context,
596                          * we need to show this line.
597                          */
598                         if (last_shown && lno != last_shown + 1)
599                                 printf(hunk_mark);
600                         show_line(opt, bol, eol, name, lno, '-');
601                         last_shown = lno;
602                 }
603                 if (opt->pre_context) {
604                         memmove(prev+1, prev,
605                                 (opt->pre_context-1) * sizeof(*prev));
606                         prev->bol = bol;
607                         prev->eol = eol;
608                 }
610         next_line:
611                 *eol = ch;
612                 bol = eol + 1;
613                 if (!left)
614                         break;
615                 left--;
616                 lno++;
617         }
619         if (opt->unmatch_name_only) {
620                 /* We did not see any hit, so we want to show this */
621                 printf("%s\n", name);
622                 return 1;
623         }
625         /* NEEDSWORK:
626          * The real "grep -c foo *.c" gives many "bar.c:0" lines,
627          * which feels mostly useless but sometimes useful.  Maybe
628          * make it another option?  For now suppress them.
629          */
630         if (opt->count && count)
631                 printf("%s:%u\n", name, count);
632         return !!last_hit;
635 static int grep_sha1(struct grep_opt *opt, const unsigned char *sha1, const char *name)
637         unsigned long size;
638         char *data;
639         char type[20];
640         int hit;
641         data = read_sha1_file(sha1, type, &size);
642         if (!data) {
643                 error("'%s': unable to read %s", name, sha1_to_hex(sha1));
644                 return 0;
645         }
646         hit = grep_buffer(opt, name, data, size);
647         free(data);
648         return hit;
651 static int grep_file(struct grep_opt *opt, const char *filename)
653         struct stat st;
654         int i;
655         char *data;
656         if (lstat(filename, &st) < 0) {
657         err_ret:
658                 if (errno != ENOENT)
659                         error("'%s': %s", filename, strerror(errno));
660                 return 0;
661         }
662         if (!st.st_size)
663                 return 0; /* empty file -- no grep hit */
664         if (!S_ISREG(st.st_mode))
665                 return 0;
666         i = open(filename, O_RDONLY);
667         if (i < 0)
668                 goto err_ret;
669         data = xmalloc(st.st_size + 1);
670         if (st.st_size != xread(i, data, st.st_size)) {
671                 error("'%s': short read %s", filename, strerror(errno));
672                 close(i);
673                 free(data);
674                 return 0;
675         }
676         close(i);
677         i = grep_buffer(opt, filename, data, st.st_size);
678         free(data);
679         return i;
682 static int exec_grep(int argc, const char **argv)
684         pid_t pid;
685         int status;
687         argv[argc] = NULL;
688         pid = fork();
689         if (pid < 0)
690                 return pid;
691         if (!pid) {
692                 execvp("grep", (char **) argv);
693                 exit(255);
694         }
695         while (waitpid(pid, &status, 0) < 0) {
696                 if (errno == EINTR)
697                         continue;
698                 return -1;
699         }
700         if (WIFEXITED(status)) {
701                 if (!WEXITSTATUS(status))
702                         return 1;
703                 return 0;
704         }
705         return -1;
708 #define MAXARGS 1000
709 #define ARGBUF 4096
710 #define push_arg(a) do { \
711         if (nr < MAXARGS) argv[nr++] = (a); \
712         else die("maximum number of args exceeded"); \
713         } while (0)
715 static int external_grep(struct grep_opt *opt, const char **paths, int cached)
717         int i, nr, argc, hit, len, status;
718         const char *argv[MAXARGS+1];
719         char randarg[ARGBUF];
720         char *argptr = randarg;
721         struct grep_pat *p;
723         if (opt->extended)
724                 return -1;
725         len = nr = 0;
726         push_arg("grep");
727         if (opt->fixed)
728                 push_arg("-F");
729         if (opt->linenum)
730                 push_arg("-n");
731         if (opt->regflags & REG_EXTENDED)
732                 push_arg("-E");
733         if (opt->regflags & REG_ICASE)
734                 push_arg("-i");
735         if (opt->word_regexp)
736                 push_arg("-w");
737         if (opt->name_only)
738                 push_arg("-l");
739         if (opt->unmatch_name_only)
740                 push_arg("-L");
741         if (opt->count)
742                 push_arg("-c");
743         if (opt->post_context || opt->pre_context) {
744                 if (opt->post_context != opt->pre_context) {
745                         if (opt->pre_context) {
746                                 push_arg("-B");
747                                 len += snprintf(argptr, sizeof(randarg)-len,
748                                                 "%u", opt->pre_context);
749                                 if (sizeof(randarg) <= len)
750                                         die("maximum length of args exceeded");
751                                 push_arg(argptr);
752                                 argptr += len;
753                         }
754                         if (opt->post_context) {
755                                 push_arg("-A");
756                                 len += snprintf(argptr, sizeof(randarg)-len,
757                                                 "%u", opt->post_context);
758                                 if (sizeof(randarg) <= len)
759                                         die("maximum length of args exceeded");
760                                 push_arg(argptr);
761                                 argptr += len;
762                         }
763                 }
764                 else {
765                         push_arg("-C");
766                         len += snprintf(argptr, sizeof(randarg)-len,
767                                         "%u", opt->post_context);
768                         if (sizeof(randarg) <= len)
769                                 die("maximum length of args exceeded");
770                         push_arg(argptr);
771                         argptr += len;
772                 }
773         }
774         for (p = opt->pattern_list; p; p = p->next) {
775                 push_arg("-e");
776                 push_arg(p->pattern);
777         }
779         /*
780          * To make sure we get the header printed out when we want it,
781          * add /dev/null to the paths to grep.  This is unnecessary
782          * (and wrong) with "-l" or "-L", which always print out the
783          * name anyway.
784          *
785          * GNU grep has "-H", but this is portable.
786          */
787         if (!opt->name_only && !opt->unmatch_name_only)
788                 push_arg("/dev/null");
790         hit = 0;
791         argc = nr;
792         for (i = 0; i < active_nr; i++) {
793                 struct cache_entry *ce = active_cache[i];
794                 char *name;
795                 if (ce_stage(ce) || !S_ISREG(ntohl(ce->ce_mode)))
796                         continue;
797                 if (!pathspec_matches(paths, ce->name))
798                         continue;
799                 name = ce->name;
800                 if (name[0] == '-') {
801                         int len = ce_namelen(ce);
802                         name = xmalloc(len + 3);
803                         memcpy(name, "./", 2);
804                         memcpy(name + 2, ce->name, len + 1);
805                 }
806                 argv[argc++] = name;
807                 if (argc < MAXARGS)
808                         continue;
809                 status = exec_grep(argc, argv);
810                 if (0 < status)
811                         hit = 1;
812                 argc = nr;
813         }
814         if (argc > nr) {
815                 status = exec_grep(argc, argv);
816                 if (0 < status)
817                         hit = 1;
818         }
819         return hit;
822 static int grep_cache(struct grep_opt *opt, const char **paths, int cached)
824         int hit = 0;
825         int nr;
826         read_cache();
828 #ifdef __unix__
829         /*
830          * Use the external "grep" command for the case where
831          * we grep through the checked-out files. It tends to
832          * be a lot more optimized
833          */
834         if (!cached) {
835                 hit = external_grep(opt, paths, cached);
836                 if (hit >= 0)
837                         return hit;
838         }
839 #endif
841         for (nr = 0; nr < active_nr; nr++) {
842                 struct cache_entry *ce = active_cache[nr];
843                 if (ce_stage(ce) || !S_ISREG(ntohl(ce->ce_mode)))
844                         continue;
845                 if (!pathspec_matches(paths, ce->name))
846                         continue;
847                 if (cached)
848                         hit |= grep_sha1(opt, ce->sha1, ce->name);
849                 else
850                         hit |= grep_file(opt, ce->name);
851         }
852         return hit;
855 static int grep_tree(struct grep_opt *opt, const char **paths,
856                      struct tree_desc *tree,
857                      const char *tree_name, const char *base)
859         int len;
860         int hit = 0;
861         struct name_entry entry;
862         char *down;
863         char *path_buf = xmalloc(PATH_MAX + strlen(tree_name) + 100);
865         if (tree_name[0]) {
866                 int offset = sprintf(path_buf, "%s:", tree_name);
867                 down = path_buf + offset;
868                 strcat(down, base);
869         }
870         else {
871                 down = path_buf;
872                 strcpy(down, base);
873         }
874         len = strlen(path_buf);
876         while (tree_entry(tree, &entry)) {
877                 strcpy(path_buf + len, entry.path);
879                 if (S_ISDIR(entry.mode))
880                         /* Match "abc/" against pathspec to
881                          * decide if we want to descend into "abc"
882                          * directory.
883                          */
884                         strcpy(path_buf + len + entry.pathlen, "/");
886                 if (!pathspec_matches(paths, down))
887                         ;
888                 else if (S_ISREG(entry.mode))
889                         hit |= grep_sha1(opt, entry.sha1, path_buf);
890                 else if (S_ISDIR(entry.mode)) {
891                         char type[20];
892                         struct tree_desc sub;
893                         void *data;
894                         data = read_sha1_file(entry.sha1, type, &sub.size);
895                         if (!data)
896                                 die("unable to read tree (%s)",
897                                     sha1_to_hex(entry.sha1));
898                         sub.buf = data;
899                         hit |= grep_tree(opt, paths, &sub, tree_name, down);
900                         free(data);
901                 }
902         }
903         return hit;
906 static int grep_object(struct grep_opt *opt, const char **paths,
907                        struct object *obj, const char *name)
909         if (obj->type == OBJ_BLOB)
910                 return grep_sha1(opt, obj->sha1, name);
911         if (obj->type == OBJ_COMMIT || obj->type == OBJ_TREE) {
912                 struct tree_desc tree;
913                 void *data;
914                 int hit;
915                 data = read_object_with_reference(obj->sha1, tree_type,
916                                                   &tree.size, NULL);
917                 if (!data)
918                         die("unable to read tree (%s)", sha1_to_hex(obj->sha1));
919                 tree.buf = data;
920                 hit = grep_tree(opt, paths, &tree, name, "");
921                 free(data);
922                 return hit;
923         }
924         die("unable to grep from object of type %s", typename(obj->type));
927 static const char builtin_grep_usage[] =
928 "git-grep <option>* <rev>* [-e] <pattern> [<path>...]";
930 static const char emsg_invalid_context_len[] =
931 "%s: invalid context length argument";
932 static const char emsg_missing_context_len[] =
933 "missing context length argument";
934 static const char emsg_missing_argument[] =
935 "option requires an argument -%s";
937 int cmd_grep(int argc, const char **argv, const char *prefix)
939         int hit = 0;
940         int cached = 0;
941         int seen_dashdash = 0;
942         struct grep_opt opt;
943         struct object_array list = { 0, 0, NULL };
944         const char **paths = NULL;
945         int i;
947         memset(&opt, 0, sizeof(opt));
948         opt.pattern_tail = &opt.pattern_list;
949         opt.regflags = REG_NEWLINE;
951         /*
952          * If there is no -- then the paths must exist in the working
953          * tree.  If there is no explicit pattern specified with -e or
954          * -f, we take the first unrecognized non option to be the
955          * pattern, but then what follows it must be zero or more
956          * valid refs up to the -- (if exists), and then existing
957          * paths.  If there is an explicit pattern, then the first
958          * unrecognized non option is the beginning of the refs list
959          * that continues up to the -- (if exists), and then paths.
960          */
962         while (1 < argc) {
963                 const char *arg = argv[1];
964                 argc--; argv++;
965                 if (!strcmp("--cached", arg)) {
966                         cached = 1;
967                         continue;
968                 }
969                 if (!strcmp("-a", arg) ||
970                     !strcmp("--text", arg)) {
971                         opt.binary = GREP_BINARY_TEXT;
972                         continue;
973                 }
974                 if (!strcmp("-i", arg) ||
975                     !strcmp("--ignore-case", arg)) {
976                         opt.regflags |= REG_ICASE;
977                         continue;
978                 }
979                 if (!strcmp("-I", arg)) {
980                         opt.binary = GREP_BINARY_NOMATCH;
981                         continue;
982                 }
983                 if (!strcmp("-v", arg) ||
984                     !strcmp("--invert-match", arg)) {
985                         opt.invert = 1;
986                         continue;
987                 }
988                 if (!strcmp("-E", arg) ||
989                     !strcmp("--extended-regexp", arg)) {
990                         opt.regflags |= REG_EXTENDED;
991                         continue;
992                 }
993                 if (!strcmp("-F", arg) ||
994                     !strcmp("--fixed-strings", arg)) {
995                         opt.fixed = 1;
996                         continue;
997                 }
998                 if (!strcmp("-G", arg) ||
999                     !strcmp("--basic-regexp", arg)) {
1000                         opt.regflags &= ~REG_EXTENDED;
1001                         continue;
1002                 }
1003                 if (!strcmp("-n", arg)) {
1004                         opt.linenum = 1;
1005                         continue;
1006                 }
1007                 if (!strcmp("-H", arg)) {
1008                         /* We always show the pathname, so this
1009                          * is a noop.
1010                          */
1011                         continue;
1012                 }
1013                 if (!strcmp("-l", arg) ||
1014                     !strcmp("--files-with-matches", arg)) {
1015                         opt.name_only = 1;
1016                         continue;
1017                 }
1018                 if (!strcmp("-L", arg) ||
1019                     !strcmp("--files-without-match", arg)) {
1020                         opt.unmatch_name_only = 1;
1021                         continue;
1022                 }
1023                 if (!strcmp("-c", arg) ||
1024                     !strcmp("--count", arg)) {
1025                         opt.count = 1;
1026                         continue;
1027                 }
1028                 if (!strcmp("-w", arg) ||
1029                     !strcmp("--word-regexp", arg)) {
1030                         opt.word_regexp = 1;
1031                         continue;
1032                 }
1033                 if (!strncmp("-A", arg, 2) ||
1034                     !strncmp("-B", arg, 2) ||
1035                     !strncmp("-C", arg, 2) ||
1036                     (arg[0] == '-' && '1' <= arg[1] && arg[1] <= '9')) {
1037                         unsigned num;
1038                         const char *scan;
1039                         switch (arg[1]) {
1040                         case 'A': case 'B': case 'C':
1041                                 if (!arg[2]) {
1042                                         if (argc <= 1)
1043                                                 die(emsg_missing_context_len);
1044                                         scan = *++argv;
1045                                         argc--;
1046                                 }
1047                                 else
1048                                         scan = arg + 2;
1049                                 break;
1050                         default:
1051                                 scan = arg + 1;
1052                                 break;
1053                         }
1054                         if (sscanf(scan, "%u", &num) != 1)
1055                                 die(emsg_invalid_context_len, scan);
1056                         switch (arg[1]) {
1057                         case 'A':
1058                                 opt.post_context = num;
1059                                 break;
1060                         default:
1061                         case 'C':
1062                                 opt.post_context = num;
1063                         case 'B':
1064                                 opt.pre_context = num;
1065                                 break;
1066                         }
1067                         continue;
1068                 }
1069                 if (!strcmp("-f", arg)) {
1070                         FILE *patterns;
1071                         int lno = 0;
1072                         char buf[1024];
1073                         if (argc <= 1)
1074                                 die(emsg_missing_argument, arg);
1075                         patterns = fopen(argv[1], "r");
1076                         if (!patterns)
1077                                 die("'%s': %s", argv[1], strerror(errno));
1078                         while (fgets(buf, sizeof(buf), patterns)) {
1079                                 int len = strlen(buf);
1080                                 if (buf[len-1] == '\n')
1081                                         buf[len-1] = 0;
1082                                 /* ignore empty line like grep does */
1083                                 if (!buf[0])
1084                                         continue;
1085                                 add_pattern(&opt, strdup(buf), argv[1], ++lno,
1086                                             GREP_PATTERN);
1087                         }
1088                         fclose(patterns);
1089                         argv++;
1090                         argc--;
1091                         continue;
1092                 }
1093                 if (!strcmp("--not", arg)) {
1094                         add_pattern(&opt, arg, "command line", 0, GREP_NOT);
1095                         continue;
1096                 }
1097                 if (!strcmp("--and", arg)) {
1098                         add_pattern(&opt, arg, "command line", 0, GREP_AND);
1099                         continue;
1100                 }
1101                 if (!strcmp("--or", arg))
1102                         continue; /* no-op */
1103                 if (!strcmp("(", arg)) {
1104                         add_pattern(&opt, arg, "command line", 0, GREP_OPEN_PAREN);
1105                         continue;
1106                 }
1107                 if (!strcmp(")", arg)) {
1108                         add_pattern(&opt, arg, "command line", 0, GREP_CLOSE_PAREN);
1109                         continue;
1110                 }
1111                 if (!strcmp("-e", arg)) {
1112                         if (1 < argc) {
1113                                 add_pattern(&opt, argv[1], "-e option", 0,
1114                                             GREP_PATTERN);
1115                                 argv++;
1116                                 argc--;
1117                                 continue;
1118                         }
1119                         die(emsg_missing_argument, arg);
1120                 }
1121                 if (!strcmp("--", arg)) {
1122                         /* later processing wants to have this at argv[1] */
1123                         argv--;
1124                         argc++;
1125                         break;
1126                 }
1127                 if (*arg == '-')
1128                         usage(builtin_grep_usage);
1130                 /* First unrecognized non-option token */
1131                 if (!opt.pattern_list) {
1132                         add_pattern(&opt, arg, "command line", 0,
1133                                     GREP_PATTERN);
1134                         break;
1135                 }
1136                 else {
1137                         /* We are looking at the first path or rev;
1138                          * it is found at argv[1] after leaving the
1139                          * loop.
1140                          */
1141                         argc++; argv--;
1142                         break;
1143                 }
1144         }
1146         if (!opt.pattern_list)
1147                 die("no pattern given.");
1148         if ((opt.regflags != REG_NEWLINE) && opt.fixed)
1149                 die("cannot mix --fixed-strings and regexp");
1150         if (!opt.fixed)
1151                 compile_patterns(&opt);
1153         /* Check revs and then paths */
1154         for (i = 1; i < argc; i++) {
1155                 const char *arg = argv[i];
1156                 unsigned char sha1[20];
1157                 /* Is it a rev? */
1158                 if (!get_sha1(arg, sha1)) {
1159                         struct object *object = parse_object(sha1);
1160                         if (!object)
1161                                 die("bad object %s", arg);
1162                         add_object_array(object, arg, &list);
1163                         continue;
1164                 }
1165                 if (!strcmp(arg, "--")) {
1166                         i++;
1167                         seen_dashdash = 1;
1168                 }
1169                 break;
1170         }
1172         /* The rest are paths */
1173         if (!seen_dashdash) {
1174                 int j;
1175                 for (j = i; j < argc; j++)
1176                         verify_filename(prefix, argv[j]);
1177         }
1179         if (i < argc)
1180                 paths = get_pathspec(prefix, argv + i);
1181         else if (prefix) {
1182                 paths = xcalloc(2, sizeof(const char *));
1183                 paths[0] = prefix;
1184                 paths[1] = NULL;
1185         }
1187         if (!list.nr)
1188                 return !grep_cache(&opt, paths, cached);
1190         if (cached)
1191                 die("both --cached and trees are given.");
1193         for (i = 0; i < list.nr; i++) {
1194                 struct object *real_obj;
1195                 real_obj = deref_tag(list.objects[i].item, NULL, 0);
1196                 if (grep_object(&opt, paths, real_obj, list.objects[i].name))
1197                         hit = 1;
1198         }
1199         return !hit;