From: Junio C Hamano Date: Sat, 21 May 2005 09:40:01 +0000 (-0700) Subject: [PATCH] Introducing software archaeologist's tool "pickaxe". X-Git-Tag: v0.99~516 X-Git-Url: https://git.tokkee.org/?a=commitdiff_plain;h=52e9578985fb636ec1d3f6cf794fdadd5ec896fc;p=git.git [PATCH] Introducing software archaeologist's tool "pickaxe". This steals the "pickaxe" feature from JIT and make it available to the bare Plumbing layer. From the command line, the user gives a string he is intersted in. Using the diff-core infrastructure previously introduced, it filters the differences to limit the output only to the diffs between and where the string appears only in one but not in the other. For example: $ ./git-rev-list HEAD | ./git-diff-tree -Sdiff-tree-helper --stdin -M would show the diffs that touch the string "diff-tree-helper". In real software-archaeologist application, you would typically look for a few to several lines of code and see where that code came from. The "pickaxe" module runs after "rename/copy detection" module, so it even crosses the file rename boundary, as the above example demonstrates. Signed-off-by: Junio C Hamano Signed-off-by: Linus Torvalds --- diff --git a/Documentation/git-diff-cache.txt b/Documentation/git-diff-cache.txt index e3ed7131a..2cfdffd87 100644 --- a/Documentation/git-diff-cache.txt +++ b/Documentation/git-diff-cache.txt @@ -9,7 +9,7 @@ git-diff-cache - Compares content and mode of blobs between the cache and reposi SYNOPSIS -------- -'git-diff-cache' [-p] [-r] [-z] [-m] [-M] [-R] [-C] [--cached] +'git-diff-cache' [-p] [-r] [-z] [-m] [-M] [-R] [-C] [-S] [--cached] DESCRIPTION ----------- @@ -39,6 +39,10 @@ OPTIONS -C:: Detect copies as well as renames; implies -p. +-S:: + Look for differences that contains the change in . + + -R:: Output diff in reverse. diff --git a/Documentation/git-diff-files.txt b/Documentation/git-diff-files.txt index 1eae3d033..51a3d0fcd 100644 --- a/Documentation/git-diff-files.txt +++ b/Documentation/git-diff-files.txt @@ -9,7 +9,7 @@ git-diff-files - Compares files in the working tree and the cache SYNOPSIS -------- -'git-diff-files' [-p] [-q] [-r] [-z] [-M] [-C] [-R] [...] +'git-diff-files' [-p] [-q] [-r] [-z] [-M] [-C] [-R] [-S] [...] DESCRIPTION ----------- @@ -35,6 +35,10 @@ OPTIONS -C:: Detect copies as well as renames; implies -p. +-S:: + Look for differences that contains the change in . + + -r:: This flag does not mean anything. It is there only to match git-diff-tree. Unlike git-diff-tree, git-diff-files always looks diff --git a/Documentation/git-diff-helper.txt b/Documentation/git-diff-helper.txt index 302789e9d..2036c6cc1 100644 --- a/Documentation/git-diff-helper.txt +++ b/Documentation/git-diff-helper.txt @@ -9,7 +9,7 @@ git-diff-helper - Generates patch format output for git-diff-* SYNOPSIS -------- -'git-diff-helper' [-z] [-R] [-M] [-C] +'git-diff-helper' [-z] [-R] [-M] [-C] [-S] DESCRIPTION ----------- @@ -37,6 +37,10 @@ OPTIONS -C:: Detect copies as well as renames. +-S:: + Look for differences that contains the change in . + + See Also -------- The section on generating patches in link:git-diff-cache.html[git-diff-cache] diff --git a/Documentation/git-diff-tree.txt b/Documentation/git-diff-tree.txt index f4e95a97c..bdc8d5a53 100644 --- a/Documentation/git-diff-tree.txt +++ b/Documentation/git-diff-tree.txt @@ -9,7 +9,7 @@ git-diff-tree - Compares the content and mode of blobs found via two tree object SYNOPSIS -------- -'git-diff-tree' [-p] [-r] [-z] [--stdin] [-M] [-R] [-C] [-m] [-s] [-v] []\* +'git-diff-tree' [-p] [-r] [-z] [--stdin] [-M] [-R] [-C] [-S] [-m] [-s] [-v] []\* DESCRIPTION ----------- @@ -43,6 +43,9 @@ OPTIONS -R:: Output diff in reverse. +-S:: + Look for differences that contains the change in . + -r:: recurse diff --git a/Makefile b/Makefile index f61bfe00d..b13edabc2 100644 --- a/Makefile +++ b/Makefile @@ -45,7 +45,7 @@ LIB_H += strbuf.h LIB_OBJS += strbuf.o LIB_H += diff.h -LIB_OBJS += diff.o diffcore-rename.o +LIB_OBJS += diff.o diffcore-rename.o diffcore-pickaxe.o LIB_OBJS += gitenv.o @@ -125,6 +125,7 @@ strbuf.o: $(LIB_H) gitenv.o: $(LIB_H) diff.o: $(LIB_H) diffcore-rename.o : $(LIB_H) +diffcore-pickaxe.o : $(LIB_H) test: all make -C t/ all diff --git a/diff-cache.c b/diff-cache.c index 383302abb..a5deb8c7a 100644 --- a/diff-cache.c +++ b/diff-cache.c @@ -8,6 +8,7 @@ static int line_termination = '\n'; static int detect_rename = 0; static int reverse_diff = 0; static int diff_score_opt = 0; +static char *pickaxe = 0; /* A file entry went away or appeared */ static void show_file(const char *prefix, struct cache_entry *ce, unsigned char *sha1, unsigned int mode) @@ -153,7 +154,7 @@ static void mark_merge_entries(void) } static char *diff_cache_usage = -"git-diff-cache [-p] [-r] [-z] [-m] [-M] [-C] [-R] [--cached] "; +"git-diff-cache [-p] [-r] [-z] [-m] [-M] [-C] [-R] [-S] [--cached] "; int main(int argc, char **argv) { @@ -194,6 +195,10 @@ int main(int argc, char **argv) reverse_diff = 1; continue; } + if (!strcmp(arg, "-S")) { + pickaxe = arg + 2; + continue; + } if (!strcmp(arg, "-m")) { match_nonexisting = 1; continue; @@ -208,8 +213,8 @@ int main(int argc, char **argv) if (argc != 2 || get_sha1(argv[1], tree_sha1)) usage(diff_cache_usage); - diff_setup(detect_rename, diff_score_opt, reverse_diff, - (generate_patch ? -1 : line_termination), + diff_setup(detect_rename, diff_score_opt, pickaxe, + reverse_diff, (generate_patch ? -1 : line_termination), NULL, 0); mark_merge_entries(); diff --git a/diff-files.c b/diff-files.c index d02025492..d3b80a072 100644 --- a/diff-files.c +++ b/diff-files.c @@ -7,13 +7,14 @@ #include "diff.h" static const char *diff_files_usage = -"git-diff-files [-p] [-q] [-r] [-z] [-M] [-C] [-R] [paths...]"; +"git-diff-files [-p] [-q] [-r] [-z] [-M] [-C] [-R] [-S] [paths...]"; static int generate_patch = 0; static int line_termination = '\n'; static int detect_rename = 0; static int reverse_diff = 0; static int diff_score_opt = 0; +static char *pickaxe = 0; static int silent = 0; static int matches_pathspec(struct cache_entry *ce, char **spec, int cnt) @@ -67,6 +68,8 @@ int main(int argc, char **argv) line_termination = 0; else if (!strcmp(argv[1], "-R")) reverse_diff = 1; + else if (!strcmp(argv[1], "-S")) + pickaxe = argv[1] + 2; else if (!strncmp(argv[1], "-M", 2)) { diff_score_opt = diff_scoreopt_parse(argv[1]); detect_rename = generate_patch = 1; @@ -89,8 +92,8 @@ int main(int argc, char **argv) exit(1); } - diff_setup(detect_rename, diff_score_opt, reverse_diff, - (generate_patch ? -1 : line_termination), + diff_setup(detect_rename, diff_score_opt, pickaxe, + reverse_diff, (generate_patch ? -1 : line_termination), NULL, 0); for (i = 0; i < entries; i++) { diff --git a/diff-helper.c b/diff-helper.c index 4e966db76..568d5ae35 100644 --- a/diff-helper.c +++ b/diff-helper.c @@ -9,6 +9,7 @@ static int detect_rename = 0; static int diff_score_opt = 0; static int generate_patch = 1; +static char *pickaxe = 0; static int parse_oneside_change(const char *cp, int *mode, unsigned char *sha1, char *path) @@ -93,7 +94,7 @@ static int parse_diff_raw_output(const char *buf) } static const char *diff_helper_usage = - "git-diff-helper [-z] [-R] [-M] [-C] paths..."; + "git-diff-helper [-z] [-R] [-M] [-C] [-S] paths..."; int main(int ac, const char **av) { struct strbuf sb; @@ -117,14 +118,17 @@ int main(int ac, const char **av) { detect_rename = 2; diff_score_opt = diff_scoreopt_parse(av[1]); } + else if (av[1][1] == 'S') { + pickaxe = av[1] + 2; + } else usage(diff_helper_usage); ac--; av++; } /* the remaining parameters are paths patterns */ - diff_setup(detect_rename, diff_score_opt, reverse, - (generate_patch ? -1 : line_termination), + diff_setup(detect_rename, diff_score_opt, pickaxe, + reverse, (generate_patch ? -1 : line_termination), av+1, ac-1); while (1) { diff --git a/diff-tree.c b/diff-tree.c index aa4944686..233a25066 100644 --- a/diff-tree.c +++ b/diff-tree.c @@ -13,6 +13,7 @@ static int generate_patch = 0; static int detect_rename = 0; static int reverse_diff = 0; static int diff_score_opt = 0; +static char *pickaxe = 0; static const char *header = NULL; static const char *header_prefix = ""; @@ -271,8 +272,8 @@ static int diff_tree_sha1_top(const unsigned char *old, { int ret; - diff_setup(detect_rename, diff_score_opt, reverse_diff, - (generate_patch ? -1 : line_termination), + diff_setup(detect_rename, diff_score_opt, pickaxe, + reverse_diff, (generate_patch ? -1 : line_termination), NULL, 0); ret = diff_tree_sha1(old, new, base); diff_flush(); @@ -285,8 +286,8 @@ static int diff_root_tree(const unsigned char *new, const char *base) void *tree; unsigned long size; - diff_setup(detect_rename, diff_score_opt, reverse_diff, - (generate_patch ? -1 : line_termination), + diff_setup(detect_rename, diff_score_opt, pickaxe, + reverse_diff, (generate_patch ? -1 : line_termination), NULL, 0); tree = read_object_with_reference(new, "tree", &size, NULL); if (!tree) @@ -430,7 +431,7 @@ static int diff_tree_stdin(char *line) } static char *diff_tree_usage = -"git-diff-tree [-p] [-r] [-z] [--stdin] [-M] [-C] [-R] [-m] [-s] [-v] "; +"git-diff-tree [-p] [-r] [-z] [--stdin] [-M] [-C] [-R] [-S] [-m] [-s] [-v] "; int main(int argc, char **argv) { @@ -473,6 +474,10 @@ int main(int argc, char **argv) recursive = generate_patch = 1; continue; } + if (!strncmp(arg, "-S", 2)) { + pickaxe = arg + 2; + continue; + } if (!strncmp(arg, "-M", 2)) { detect_rename = recursive = generate_patch = 1; diff_score_opt = diff_scoreopt_parse(arg); diff --git a/diff.c b/diff.c index d908ef3fd..13d5e3edf 100644 --- a/diff.c +++ b/diff.c @@ -17,6 +17,7 @@ static int reverse_diff; static int diff_raw_output = -1; static const char **pathspec; static int speccnt; +static const char *pickaxe; static int minimum_score; static const char *external_diff(void) @@ -511,8 +512,9 @@ int diff_scoreopt_parse(const char *opt) return MAX_SCORE * num / scale; } -void diff_setup(int detect_rename_, int minimum_score_, int reverse_diff_, - int diff_raw_output_, +void diff_setup(int detect_rename_, int minimum_score_, + char *pickaxe_, + int reverse_diff_, int diff_raw_output_, const char **pathspec_, int speccnt_) { detect_rename = detect_rename_; @@ -521,15 +523,16 @@ void diff_setup(int detect_rename_, int minimum_score_, int reverse_diff_, diff_raw_output = diff_raw_output_; speccnt = speccnt_; minimum_score = minimum_score_ ? : DEFAULT_MINIMUM_SCORE; + pickaxe = pickaxe_; } static struct diff_queue_struct queued_diff; -struct diff_file_pair *diff_queue(struct diff_queue_struct *queue, +struct diff_filepair *diff_queue(struct diff_queue_struct *queue, struct diff_filespec *one, struct diff_filespec *two) { - struct diff_file_pair *dp = xmalloc(sizeof(*dp)); + struct diff_filepair *dp = xmalloc(sizeof(*dp)); dp->one = one; dp->two = two; dp->xfrm_msg = 0; @@ -549,7 +552,7 @@ static const char *git_object_type(unsigned mode) return S_ISDIR(mode) ? "tree" : "blob"; } -static void diff_flush_raw(struct diff_file_pair *p) +static void diff_flush_raw(struct diff_filepair *p) { struct diff_filespec *it; int addremove; @@ -583,7 +586,7 @@ static void diff_flush_raw(struct diff_file_pair *p) sha1_to_hex(it->sha1), it->path, diff_raw_output); } -static void diff_flush_patch(struct diff_file_pair *p) +static void diff_flush_patch(struct diff_filepair *p) { const char *name, *other; @@ -600,7 +603,7 @@ static int identical(struct diff_filespec *one, struct diff_filespec *two) { /* This function is written stricter than necessary to support * the currently implemented transformers, but the idea is to - * let transformers to produce diff_file_pairs any way they want, + * let transformers to produce diff_filepairs any way they want, * and filter and clean them up here before producing the output. */ @@ -623,7 +626,7 @@ static int identical(struct diff_filespec *one, struct diff_filespec *two) return 0; } -static void diff_flush_one(struct diff_file_pair *p) +static void diff_flush_one(struct diff_filepair *p) { if (identical(p->one, p->two)) return; @@ -640,11 +643,13 @@ void diff_flush(void) if (detect_rename) diff_detect_rename(q, detect_rename, minimum_score); + if (pickaxe) + diff_pickaxe(q, pickaxe); for (i = 0; i < q->nr; i++) diff_flush_one(q->queue[i]); for (i = 0; i < q->nr; i++) { - struct diff_file_pair *p = q->queue[i]; + struct diff_filepair *p = q->queue[i]; diff_free_filespec_data(p->one); diff_free_filespec_data(p->two); free(p->xfrm_msg); diff --git a/diff.h b/diff.h index 86a645afe..97d39ac96 100644 --- a/diff.h +++ b/diff.h @@ -20,6 +20,7 @@ extern void diff_unmerge(const char *path); extern int diff_scoreopt_parse(const char *opt); extern void diff_setup(int detect_rename, int minimum_score, + char *pickaxe, int reverse, int raw_output, const char **spec, int cnt); diff --git a/diffcore-pickaxe.c b/diffcore-pickaxe.c new file mode 100644 index 000000000..ee22e36ab --- /dev/null +++ b/diffcore-pickaxe.c @@ -0,0 +1,56 @@ +/* + * Copyright (C) 2005 Junio C Hamano + */ +#include "cache.h" +#include "diff.h" +#include "diffcore.h" +#include "delta.h" + +static int contains(struct diff_filespec *one, + const char *needle, unsigned long len) +{ + unsigned long offset, sz; + const char *data; + if (diff_populate_filespec(one)) + return 0; + sz = one->size; + data = one->data; + for (offset = 0; offset + len <= sz; offset++) + if (!strncmp(needle, data + offset, len)) + return 1; + return 0; +} + +void diff_pickaxe(struct diff_queue_struct *q, const char *needle) +{ + unsigned long len = strlen(needle); + int i; + struct diff_queue_struct outq; + outq.queue = NULL; + outq.nr = outq.alloc = 0; + + for (i = 0; i < q->nr; i++) { + struct diff_filepair *p = q->queue[i]; + if (!p->one->file_valid) { + if (!p->two->file_valid) + continue; /* ignore nonsense */ + /* created */ + if (contains(p->two, needle, len)) + diff_queue(&outq, p->one, p->two); + } + else if (!p->two->file_valid) { + if (contains(p->one, needle, len)) + diff_queue(&outq, p->one, p->two); + } + else if (contains(p->one, needle, len) != + contains(p->two, needle, len)) + diff_queue(&outq, p->one, p->two); + } + for (i = 0; i < q->nr; i++) { + struct diff_filepair *p = q->queue[i]; + free(p); + } + free(q->queue); + *q = outq; + return; +} diff --git a/diffcore-rename.c b/diffcore-rename.c index 8aa8f841c..6dd753bc6 100644 --- a/diffcore-rename.c +++ b/diffcore-rename.c @@ -129,7 +129,7 @@ static void record_rename_pair(struct diff_queue_struct *outq, * To achieve this sort order, we give xform_work the number * above. */ - struct diff_file_pair *dp = diff_queue(outq, src, dst); + struct diff_filepair *dp = diff_queue(outq, src, dst); dp->xfrm_work = (rank * 2 + 1) | (score<xfrm_flags |= RENAME_DST_MATCHED; } @@ -148,7 +148,7 @@ static void debug_filespec(struct diff_filespec *s, int x, const char *one) s->size, s->xfrm_flags); } -static void debug_filepair(const struct diff_file_pair *p, int i) +static void debug_filepair(const struct diff_filepair *p, int i) { debug_filespec(p->one, i, "one"); debug_filespec(p->two, i, "two"); @@ -165,7 +165,7 @@ static void debug_queue(const char *msg, struct diff_queue_struct *q) fprintf(stderr, "%s\n", msg); fprintf(stderr, "q->nr = %d\n", q->nr); for (i = 0; i < q->nr; i++) { - struct diff_file_pair *p = q->queue[i]; + struct diff_filepair *p = q->queue[i]; debug_filepair(p, i); } } @@ -180,8 +180,8 @@ static void debug_queue(const char *msg, struct diff_queue_struct *q) */ static int rank_compare(const void *a_, const void *b_) { - const struct diff_file_pair *a = *(const struct diff_file_pair **)a_; - const struct diff_file_pair *b = *(const struct diff_file_pair **)b_; + const struct diff_filepair *a = *(const struct diff_filepair **)a_; + const struct diff_filepair *b = *(const struct diff_filepair **)b_; int a_rank = a->xfrm_work & ((1<xfrm_work & ((1<nr) { - struct diff_file_pair *p = q->queue[i++]; + struct diff_filepair *p = q->queue[i++]; if (!p->two->file_valid) continue; /* removed is fine */ if (strcmp(p->one->path, it->path)) @@ -243,15 +243,8 @@ void diff_detect_rename(struct diff_queue_struct *q, srcs[0] = &deleted; srcs[1] = &stay; - /* NEEDSWORK: - * (1) make sure we properly ignore but pass trees. - * - * (2) make sure we do right thing on the same path deleted - * and created in the same patch. - */ - for (i = 0; i < q->nr; i++) { - struct diff_file_pair *p = q->queue[i]; + struct diff_filepair *p = q->queue[i]; if (!p->one->file_valid) if (!p->two->file_valid) continue; /* ignore nonsense */ @@ -340,11 +333,11 @@ void diff_detect_rename(struct diff_queue_struct *q, * See comments at the top of record_rename_pair for numbers used * to assign xfrm_work. * - * Note that we have not annotated the diff_file_pair with any comment + * Note that we have not annotated the diff_filepair with any comment * so there is nothing other than p to free. */ for (i = 0; i < q->nr; i++) { - struct diff_file_pair *dp, *p = q->queue[i]; + struct diff_filepair *dp, *p = q->queue[i]; if (!p->one->file_valid) { if (p->two->file_valid) { /* creation */ @@ -378,7 +371,7 @@ void diff_detect_rename(struct diff_queue_struct *q, /* Copy it out to q, removing duplicates. */ for (i = 0; i < outq.nr; i++) { - struct diff_file_pair *p = outq.queue[i]; + struct diff_filepair *p = outq.queue[i]; if (!p->one->file_valid) { /* created */ if (p->two->xfrm_flags & RENAME_DST_MATCHED) @@ -395,7 +388,7 @@ void diff_detect_rename(struct diff_queue_struct *q, } else if (strcmp(p->one->path, p->two->path)) { /* rename or copy */ - struct diff_file_pair *dp = + struct diff_filepair *dp = diff_queue(q, p->one, p->two); int msglen = (strlen(p->one->path) + strlen(p->two->path) + 100); diff --git a/diffcore.h b/diffcore.h index 5fa706760..c3809ef85 100644 --- a/diffcore.h +++ b/diffcore.h @@ -38,7 +38,7 @@ extern void fill_filespec(struct diff_filespec *, const unsigned char *, extern int diff_populate_filespec(struct diff_filespec *); extern void diff_free_filespec_data(struct diff_filespec *); -struct diff_file_pair { +struct diff_filepair { struct diff_filespec *one; struct diff_filespec *two; char *xfrm_msg; @@ -47,14 +47,15 @@ struct diff_file_pair { }; struct diff_queue_struct { - struct diff_file_pair **queue; + struct diff_filepair **queue; int alloc; int nr; }; -extern struct diff_file_pair *diff_queue(struct diff_queue_struct *, - struct diff_filespec *, - struct diff_filespec *); +extern struct diff_filepair *diff_queue(struct diff_queue_struct *, + struct diff_filespec *, + struct diff_filespec *); extern void diff_detect_rename(struct diff_queue_struct *, int, int); +extern void diff_pickaxe(struct diff_queue_struct *, const char *); #endif