X-Git-Url: https://git.tokkee.org/?a=blobdiff_plain;f=builtin-blame.c;h=b51cdc71faeca467cd3a965db0ff84d1f087076d;hb=86952cdabdcc550a20794794db539c41877d17fc;hp=4a1accf13c1220745c73999c6c345735aeadc3e5;hpb=1fcdd62adf81a172f45c7c6a58177212d500b9d9;p=git.git diff --git a/builtin-blame.c b/builtin-blame.c index 4a1accf13..b51cdc71f 100644 --- a/builtin-blame.c +++ b/builtin-blame.c @@ -13,10 +13,12 @@ #include "diff.h" #include "diffcore.h" #include "revision.h" +#include "quote.h" #include "xdiff-interface.h" +#include "cache-tree.h" static char blame_usage[] = -"git-blame [-c] [-l] [-t] [-f] [-n] [-p] [-L n,m] [-S ] [-M] [-C] [-C] [commit] [--] file\n" +"git-blame [-c] [-l] [-t] [-f] [-n] [-p] [-L n,m] [-S ] [-M] [-C] [-C] [--contents ] [--incremental] [commit] [--] file\n" " -c, --compatibility Use the same output mode as git-annotate (Default: off)\n" " -b Show blank SHA-1 for boundary commits (Default: off)\n" " -l, --long Show long commit SHA1 (Default: off)\n" @@ -27,6 +29,8 @@ static char blame_usage[] = " -p, --porcelain Show in a format designed for machine consumption\n" " -L n,m Process only line range n,m, counting from 1\n" " -M, -C Find line movements within and across files\n" +" --incremental Show blame entries as we find them, incrementally\n" +" --contents file Use 's contents as the final image\n" " -S revs-file Use revisions from revs-file instead of calling git-rev-list\n"; static int longest_file; @@ -36,6 +40,8 @@ static int max_digits; static int max_score_digits; static int show_root; static int blank_boundary; +static int incremental; +static int cmd_is_annotate; #ifndef DEBUG #define DEBUG 0 @@ -74,12 +80,16 @@ struct origin { char path[FLEX_ARRAY]; }; +/* + * Given an origin, prepare mmfile_t structure to be used by the + * diff machinery + */ static char *fill_origin_blob(struct origin *o, mmfile_t *file) { if (!o->file.ptr) { - char type[10]; + enum object_type type; num_read_blob++; - file->ptr = read_sha1_file(o->blob_sha1, type, + file->ptr = read_sha1_file(o->blob_sha1, &type, (unsigned long *)(&(file->size))); o->file = *file; } @@ -88,6 +98,10 @@ static char *fill_origin_blob(struct origin *o, mmfile_t *file) return file->ptr; } +/* + * Origin is refcounted and usually we keep the blob contents to be + * reused. + */ static inline struct origin *origin_incref(struct origin *o) { if (o) @@ -105,6 +119,11 @@ static void origin_decref(struct origin *o) } } +/* + * Each group of lines is described by a blame_entry; it can be split + * as we pass blame to the parents. They form a linked list in the + * scoreboard structure, sorted by the target line number. + */ struct blame_entry { struct blame_entry *prev; struct blame_entry *next; @@ -131,19 +150,24 @@ struct blame_entry { int s_lno; /* how significant this entry is -- cached to avoid - * scanning the lines over and over + * scanning the lines over and over. */ unsigned score; }; +/* + * The current state of the blame assignment. + */ struct scoreboard { /* the final commit (i.e. where we started digging from) */ struct commit *final; const char *path; - /* the contents in the final; pointed into by buf pointers of - * blame_entries + /* + * The contents in the final image. + * Used by many functions to obtain contents of the nth line, + * indexed with scoreboard.lineno[blame_entry.lno]. */ const char *final_buf; unsigned long final_buf_size; @@ -168,6 +192,11 @@ static int cmp_suspect(struct origin *a, struct origin *b) static void sanity_check_refcnt(struct scoreboard *); +/* + * If two blame entries that are next to each other came from + * contiguous lines in the same origin (i.e. pair), + * merge them together. + */ static void coalesce(struct scoreboard *sb) { struct blame_entry *ent, *next; @@ -191,6 +220,12 @@ static void coalesce(struct scoreboard *sb) sanity_check_refcnt(sb); } +/* + * Given a commit and a path in it, create a new origin structure. + * The callers that add blame to the scoreboard should use + * get_origin() to obtain shared, refcounted copy instead of calling + * this function directly. + */ static struct origin *make_origin(struct commit *commit, const char *path) { struct origin *o; @@ -201,6 +236,9 @@ static struct origin *make_origin(struct commit *commit, const char *path) return o; } +/* + * Locate an existing origin or create a new one. + */ static struct origin *get_origin(struct scoreboard *sb, struct commit *commit, const char *path) @@ -215,10 +253,16 @@ static struct origin *get_origin(struct scoreboard *sb, return make_origin(commit, path); } +/* + * Fill the blob_sha1 field of an origin if it hasn't, so that later + * call to fill_origin_blob() can use it to locate the data. blob_sha1 + * for an origin is also used to pass the blame for the entire file to + * the parent to detect the case where a child's blob is identical to + * that of its parent's. + */ static int fill_blob_sha1(struct origin *origin) { unsigned mode; - char type[10]; if (!is_null_sha1(origin->blob_sha1)) return 0; @@ -226,8 +270,7 @@ static int fill_blob_sha1(struct origin *origin) origin->path, origin->blob_sha1, &mode)) goto error_out; - if (sha1_object_info(origin->blob_sha1, type, NULL) || - strcmp(type, blob_type)) + if (sha1_object_info(origin->blob_sha1, NULL) != OBJ_BLOB) goto error_out; return 0; error_out: @@ -235,6 +278,10 @@ static int fill_blob_sha1(struct origin *origin) return -1; } +/* + * We have an origin -- check if the same path exists in the + * parent and return an origin structure to represent it. + */ static struct origin *find_origin(struct scoreboard *sb, struct commit *parent, struct origin *origin) @@ -244,12 +291,26 @@ static struct origin *find_origin(struct scoreboard *sb, const char *paths[2]; if (parent->util) { - /* This is a freestanding copy of origin and not - * refcounted. + /* + * Each commit object can cache one origin in that + * commit. This is a freestanding copy of origin and + * not refcounted. */ struct origin *cached = parent->util; if (!strcmp(cached->path, origin->path)) { + /* + * The same path between origin and its parent + * without renaming -- the most common case. + */ porigin = get_origin(sb, parent, cached->path); + + /* + * If the origin was newly created (i.e. get_origin + * would call make_origin if none is found in the + * scoreboard), it does not know the blob_sha1, + * so copy it. Otherwise porigin was in the + * scoreboard and already knows blob_sha1. + */ if (porigin->refcnt == 1) hashcpy(porigin->blob_sha1, cached->blob_sha1); return porigin; @@ -273,9 +334,13 @@ static struct origin *find_origin(struct scoreboard *sb, diff_tree_setup_paths(paths, &diff_opts); if (diff_setup_done(&diff_opts) < 0) die("diff-setup"); - diff_tree_sha1(parent->tree->object.sha1, - origin->commit->tree->object.sha1, - "", &diff_opts); + + if (is_null_sha1(origin->commit->object.sha1)) + do_diff_cache(parent->tree->object.sha1, &diff_opts); + else + diff_tree_sha1(parent->tree->object.sha1, + origin->commit->tree->object.sha1, + "", &diff_opts); diffcore_std(&diff_opts); /* It is either one entry that says "modified", or "created", @@ -306,7 +371,13 @@ static struct origin *find_origin(struct scoreboard *sb, } diff_flush(&diff_opts); if (porigin) { + /* + * Create a freestanding copy that is not part of + * the refcounted origin found in the scoreboard, and + * cache it in the commit. + */ struct origin *cached; + cached = make_origin(porigin->commit, porigin->path); hashcpy(cached->blob_sha1, porigin->blob_sha1); parent->util = cached; @@ -314,6 +385,10 @@ static struct origin *find_origin(struct scoreboard *sb, return porigin; } +/* + * We have an origin -- find the path that corresponds to it in its + * parent and return an origin structure to represent it. + */ static struct origin *find_rename(struct scoreboard *sb, struct commit *parent, struct origin *origin) @@ -332,9 +407,13 @@ static struct origin *find_rename(struct scoreboard *sb, diff_tree_setup_paths(paths, &diff_opts); if (diff_setup_done(&diff_opts) < 0) die("diff-setup"); - diff_tree_sha1(parent->tree->object.sha1, - origin->commit->tree->object.sha1, - "", &diff_opts); + + if (is_null_sha1(origin->commit->object.sha1)) + do_diff_cache(parent->tree->object.sha1, &diff_opts); + else + diff_tree_sha1(parent->tree->object.sha1, + origin->commit->tree->object.sha1, + "", &diff_opts); diffcore_std(&diff_opts); for (i = 0; i < diff_queued_diff.nr; i++) { @@ -350,6 +429,9 @@ static struct origin *find_rename(struct scoreboard *sb, return porigin; } +/* + * Parsing of patch chunks... + */ struct chunk { /* line number in postimage; up to but not including this * line is the same as preimage @@ -451,6 +533,11 @@ static struct patch *compare_buffer(mmfile_t *file_p, mmfile_t *file_o, return state.ret; } +/* + * Run diff between two origins and grab the patch output, so that + * we can pass blame for lines origin is currently suspected for + * to its parent. + */ static struct patch *get_patch(struct origin *parent, struct origin *origin) { mmfile_t file_p, file_o; @@ -471,6 +558,10 @@ static void free_patch(struct patch *p) free(p); } +/* + * Link in a new blame entry to the scoreboard. Entries that cover the + * same line range have been removed from the scoreboard previously. + */ static void add_blame_entry(struct scoreboard *sb, struct blame_entry *e) { struct blame_entry *ent, *prev = NULL; @@ -494,6 +585,12 @@ static void add_blame_entry(struct scoreboard *sb, struct blame_entry *e) e->next->prev = e; } +/* + * src typically is on-stack; we want to copy the information in it to + * an malloced blame_entry that is already on the linked list of the + * scoreboard. The origin of dst loses a refcnt while the origin of src + * gains one. + */ static void dup_entry(struct blame_entry *dst, struct blame_entry *src) { struct blame_entry *p, *n; @@ -513,25 +610,25 @@ static const char *nth_line(struct scoreboard *sb, int lno) return sb->final_buf + sb->lineno[lno]; } +/* + * It is known that lines between tlno to same came from parent, and e + * has an overlap with that range. it also is known that parent's + * line plno corresponds to e's line tlno. + * + * <---- e -----> + * <------> + * <------------> + * <------------> + * <------------------> + * + * Split e into potentially three parts; before this chunk, the chunk + * to be blamed for the parent, and after that portion. + */ static void split_overlap(struct blame_entry *split, struct blame_entry *e, int tlno, int plno, int same, struct origin *parent) { - /* it is known that lines between tlno to same came from - * parent, and e has an overlap with that range. it also is - * known that parent's line plno corresponds to e's line tlno. - * - * <---- e -----> - * <------> - * <------------> - * <------------> - * <------------------> - * - * Potentially we need to split e into three parts; before - * this chunk, the chunk to be blamed for parent, and after - * that portion. - */ int chunk_end_lno; memset(split, 0, sizeof(struct blame_entry [3])); @@ -561,11 +658,20 @@ static void split_overlap(struct blame_entry *split, chunk_end_lno = e->lno + e->num_lines; split[1].num_lines = chunk_end_lno - split[1].lno; + /* + * if it turns out there is nothing to blame the parent for, + * forget about the splitting. !split[1].suspect signals this. + */ if (split[1].num_lines < 1) return; split[1].suspect = origin_incref(parent); } +/* + * split_overlap() divided an existing blame e into up to three parts + * in split. Adjust the linked list of blames in the scoreboard to + * reflect the split. + */ static void split_blame(struct scoreboard *sb, struct blame_entry *split, struct blame_entry *e) @@ -573,21 +679,27 @@ static void split_blame(struct scoreboard *sb, struct blame_entry *new_entry; if (split[0].suspect && split[2].suspect) { - /* we need to split e into two and add another for parent */ + /* The first part (reuse storage for the existing entry e) */ dup_entry(e, &split[0]); + /* The last part -- me */ new_entry = xmalloc(sizeof(*new_entry)); memcpy(new_entry, &(split[2]), sizeof(struct blame_entry)); add_blame_entry(sb, new_entry); + /* ... and the middle part -- parent */ new_entry = xmalloc(sizeof(*new_entry)); memcpy(new_entry, &(split[1]), sizeof(struct blame_entry)); add_blame_entry(sb, new_entry); } else if (!split[0].suspect && !split[2].suspect) - /* parent covers the entire area */ + /* + * The parent covers the entire area; reuse storage for + * e and replace it with the parent. + */ dup_entry(e, &split[1]); else if (split[0].suspect) { + /* me and then parent */ dup_entry(e, &split[0]); new_entry = xmalloc(sizeof(*new_entry)); @@ -595,6 +707,7 @@ static void split_blame(struct scoreboard *sb, add_blame_entry(sb, new_entry); } else { + /* parent and then me */ dup_entry(e, &split[1]); new_entry = xmalloc(sizeof(*new_entry)); @@ -625,6 +738,10 @@ static void split_blame(struct scoreboard *sb, } } +/* + * After splitting the blame, the origins used by the + * on-stack blame_entry should lose one refcnt each. + */ static void decref_split(struct blame_entry *split) { int i; @@ -633,6 +750,10 @@ static void decref_split(struct blame_entry *split) origin_decref(split[i].suspect); } +/* + * Helper for blame_chunk(). blame_entry e is known to overlap with + * the patch hunk; split it and pass blame to the parent. + */ static void blame_overlap(struct scoreboard *sb, struct blame_entry *e, int tlno, int plno, int same, struct origin *parent) @@ -645,6 +766,9 @@ static void blame_overlap(struct scoreboard *sb, struct blame_entry *e, decref_split(split); } +/* + * Find the line number of the last line the target is suspected for. + */ static int find_last_in_target(struct scoreboard *sb, struct origin *target) { struct blame_entry *e; @@ -659,6 +783,11 @@ static int find_last_in_target(struct scoreboard *sb, struct origin *target) return last_in_target; } +/* + * Process one hunk from the patch between the current suspect for + * blame_entry e and its parent. Find and split the overlap, and + * pass blame to the overlapping part to the parent. + */ static void blame_chunk(struct scoreboard *sb, int tlno, int plno, int same, struct origin *target, struct origin *parent) @@ -675,6 +804,11 @@ static void blame_chunk(struct scoreboard *sb, } } +/* + * We are looking at the origin 'target' and aiming to pass blame + * for the lines it is suspected to its parent. Run diff to find + * which lines came from parent and pass blame for them. + */ static int pass_blame_to_parent(struct scoreboard *sb, struct origin *target, struct origin *parent) @@ -695,13 +829,22 @@ static int pass_blame_to_parent(struct scoreboard *sb, plno = chunk->p_next; tlno = chunk->t_next; } - /* rest (i.e. anything above tlno) are the same as parent */ + /* The rest (i.e. anything after tlno) are the same as the parent */ blame_chunk(sb, tlno, plno, last_in_target, target, parent); free_patch(patch); return 0; } +/* + * The lines in blame_entry after splitting blames many times can become + * very small and trivial, and at some point it becomes pointless to + * blame the parents. E.g. "\t\t}\n\t}\n\n" appears everywhere in any + * ordinary C program, and it is not worth to say it was copied from + * totally unrelated file in the parent. + * + * Compute how trivial the lines in the blame_entry are. + */ static unsigned ent_score(struct scoreboard *sb, struct blame_entry *e) { unsigned score; @@ -723,6 +866,12 @@ static unsigned ent_score(struct scoreboard *sb, struct blame_entry *e) return score; } +/* + * best_so_far[] and this[] are both a split of an existing blame_entry + * that passes blame to the parent. Maintain best_so_far the best split + * so far, by comparing this and best_so_far and copying this into + * bst_so_far as needed. + */ static void copy_split_if_better(struct scoreboard *sb, struct blame_entry *best_so_far, struct blame_entry *this) @@ -742,6 +891,11 @@ static void copy_split_if_better(struct scoreboard *sb, memcpy(best_so_far, this, sizeof(struct blame_entry [3])); } +/* + * Find the lines from parent that are the same as ent so that + * we can pass blames to it. file_p has the blob contents for + * the parent. + */ static void find_copy_in_blob(struct scoreboard *sb, struct blame_entry *ent, struct origin *parent, @@ -754,6 +908,9 @@ static void find_copy_in_blob(struct scoreboard *sb, struct patch *patch; int i, plno, tlno; + /* + * Prepare mmfile that contains only the lines in ent. + */ cp = nth_line(sb, ent->lno); file_o.ptr = (char*) cp; cnt = ent->num_lines; @@ -789,6 +946,10 @@ static void find_copy_in_blob(struct scoreboard *sb, free_patch(patch); } +/* + * See if lines currently target is suspected for can be attributed to + * parent. + */ static int find_move_in_parent(struct scoreboard *sb, struct origin *target, struct origin *parent) @@ -823,12 +984,15 @@ static int find_move_in_parent(struct scoreboard *sb, return 0; } - struct blame_list { struct blame_entry *ent; struct blame_entry split[3]; }; +/* + * Count the number of entries the target is suspected for, + * and prepare a list of entry and the best split. + */ static struct blame_list *setup_blame_list(struct scoreboard *sb, struct origin *target, int *num_ents_p) @@ -837,9 +1001,6 @@ static struct blame_list *setup_blame_list(struct scoreboard *sb, int num_ents, i; struct blame_list *blame_list = NULL; - /* Count the number of entries the target is suspected for, - * and prepare a list of entry and the best split. - */ for (e = sb->ent, num_ents = 0; e; e = e->next) if (!e->guilty && !cmp_suspect(e->suspect, target)) num_ents++; @@ -853,6 +1014,11 @@ static struct blame_list *setup_blame_list(struct scoreboard *sb, return blame_list; } +/* + * For lines target is suspected for, see if we can find code movement + * across file boundary from the parent commit. porigin is the path + * in the parent we already tried. + */ static int find_copy_in_parent(struct scoreboard *sb, struct origin *target, struct commit *parent, @@ -890,9 +1056,12 @@ static int find_copy_in_parent(struct scoreboard *sb, (!porigin || strcmp(target->path, porigin->path))) diff_opts.find_copies_harder = 1; - diff_tree_sha1(parent->tree->object.sha1, - target->commit->tree->object.sha1, - "", &diff_opts); + if (is_null_sha1(target->commit->object.sha1)) + do_diff_cache(parent->tree->object.sha1, &diff_opts); + else + diff_tree_sha1(parent->tree->object.sha1, + target->commit->tree->object.sha1, + "", &diff_opts); if (!diff_opts.find_copies_harder) diffcore_std(&diff_opts); @@ -953,7 +1122,8 @@ static int find_copy_in_parent(struct scoreboard *sb, return retval; } -/* The blobs of origin and porigin exactly match, so everything +/* + * The blobs of origin and porigin exactly match, so everything * origin is suspected for can be blamed on the parent. */ static void pass_whole_blame(struct scoreboard *sb, @@ -1038,7 +1208,7 @@ static void pass_blame(struct scoreboard *sb, struct origin *origin, int opt) } /* - * Optionally run "miff" to find moves in parents' files here. + * Optionally find moves in parents' files. */ if (opt & PICKAXE_BLAME_MOVE) for (i = 0, parent = commit->parents; @@ -1052,7 +1222,7 @@ static void pass_blame(struct scoreboard *sb, struct origin *origin, int opt) } /* - * Optionally run "ciff" to find copies from parents' files here. + * Optionally find copies from parents' files. */ if (opt & PICKAXE_BLAME_COPY) for (i = 0, parent = commit->parents; @@ -1069,91 +1239,31 @@ static void pass_blame(struct scoreboard *sb, struct origin *origin, int opt) origin_decref(parent_origin[i]); } -static void assign_blame(struct scoreboard *sb, struct rev_info *revs, int opt) -{ - while (1) { - struct blame_entry *ent; - struct commit *commit; - struct origin *suspect = NULL; - - /* find one suspect to break down */ - for (ent = sb->ent; !suspect && ent; ent = ent->next) - if (!ent->guilty) - suspect = ent->suspect; - if (!suspect) - return; /* all done */ - - origin_incref(suspect); - commit = suspect->commit; - if (!commit->object.parsed) - parse_commit(commit); - if (!(commit->object.flags & UNINTERESTING) && - !(revs->max_age != -1 && commit->date < revs->max_age)) - pass_blame(sb, suspect, opt); - else { - commit->object.flags |= UNINTERESTING; - if (commit->object.parsed) - mark_parents_uninteresting(commit); - } - /* treat root commit as boundary */ - if (!commit->parents && !show_root) - commit->object.flags |= UNINTERESTING; - - /* Take responsibility for the remaining entries */ - for (ent = sb->ent; ent; ent = ent->next) - if (!cmp_suspect(ent->suspect, suspect)) - ent->guilty = 1; - origin_decref(suspect); - - if (DEBUG) /* sanity */ - sanity_check_refcnt(sb); - } -} - -static const char *format_time(unsigned long time, const char *tz_str, - int show_raw_time) -{ - static char time_buf[128]; - time_t t = time; - int minutes, tz; - struct tm *tm; - - if (show_raw_time) { - sprintf(time_buf, "%lu %s", time, tz_str); - return time_buf; - } - - tz = atoi(tz_str); - minutes = tz < 0 ? -tz : tz; - minutes = (minutes / 100)*60 + (minutes % 100); - minutes = tz < 0 ? -minutes : minutes; - t = time + minutes * 60; - tm = gmtime(&t); - - strftime(time_buf, sizeof(time_buf), "%Y-%m-%d %H:%M:%S ", tm); - strcat(time_buf, tz_str); - return time_buf; -} - +/* + * Information on commits, used for output. + */ struct commit_info { - char *author; - char *author_mail; + const char *author; + const char *author_mail; unsigned long author_time; - char *author_tz; + const char *author_tz; /* filled only when asked for details */ - char *committer; - char *committer_mail; + const char *committer; + const char *committer_mail; unsigned long committer_time; - char *committer_tz; + const char *committer_tz; - char *summary; + const char *summary; }; +/* + * Parse author/committer line in the commit object buffer + */ static void get_ac_line(const char *inbuf, const char *what, - int bufsz, char *person, char **mail, - unsigned long *time, char **tz) + int bufsz, char *person, const char **mail, + unsigned long *time, const char **tz) { int len; char *tmp, *endp; @@ -1170,7 +1280,7 @@ static void get_ac_line(const char *inbuf, const char *what, if (bufsz <= len) { error_out: /* Ugh */ - person = *mail = *tz = "(unknown)"; + *mail = *tz = "(unknown)"; *time = 0; return; } @@ -1205,14 +1315,15 @@ static void get_commit_info(struct commit *commit, static char committer_buf[1024]; static char summary_buf[1024]; - /* We've operated without save_commit_buffer, so + /* + * We've operated without save_commit_buffer, so * we now need to populate them for output. */ if (!commit->buffer) { - char type[20]; + enum object_type type; unsigned long size; commit->buffer = - read_sha1_file(commit->object.sha1, type, &size); + read_sha1_file(commit->object.sha1, &type, &size); } ret->author = author_buf; get_ac_line(commit->buffer, "\nauthor ", @@ -1237,14 +1348,135 @@ static void get_commit_info(struct commit *commit, tmp += 2; endp = strchr(tmp, '\n'); if (!endp) - goto error_out; + endp = tmp + strlen(tmp); len = endp - tmp; - if (len >= sizeof(summary_buf)) + if (len >= sizeof(summary_buf) || len == 0) goto error_out; memcpy(summary_buf, tmp, len); summary_buf[len] = 0; } +/* + * To allow LF and other nonportable characters in pathnames, + * they are c-style quoted as needed. + */ +static void write_filename_info(const char *path) +{ + printf("filename "); + write_name_quoted(NULL, 0, path, 1, stdout); + putchar('\n'); +} + +/* + * The blame_entry is found to be guilty for the range. Mark it + * as such, and show it in incremental output. + */ +static void found_guilty_entry(struct blame_entry *ent) +{ + if (ent->guilty) + return; + ent->guilty = 1; + if (incremental) { + struct origin *suspect = ent->suspect; + + printf("%s %d %d %d\n", + sha1_to_hex(suspect->commit->object.sha1), + ent->s_lno + 1, ent->lno + 1, ent->num_lines); + if (!(suspect->commit->object.flags & METAINFO_SHOWN)) { + struct commit_info ci; + suspect->commit->object.flags |= METAINFO_SHOWN; + get_commit_info(suspect->commit, &ci, 1); + printf("author %s\n", ci.author); + printf("author-mail %s\n", ci.author_mail); + printf("author-time %lu\n", ci.author_time); + printf("author-tz %s\n", ci.author_tz); + printf("committer %s\n", ci.committer); + printf("committer-mail %s\n", ci.committer_mail); + printf("committer-time %lu\n", ci.committer_time); + printf("committer-tz %s\n", ci.committer_tz); + printf("summary %s\n", ci.summary); + if (suspect->commit->object.flags & UNINTERESTING) + printf("boundary\n"); + } + write_filename_info(suspect->path); + } +} + +/* + * The main loop -- while the scoreboard has lines whose true origin + * is still unknown, pick one blame_entry, and allow its current + * suspect to pass blames to its parents. + */ +static void assign_blame(struct scoreboard *sb, struct rev_info *revs, int opt) +{ + while (1) { + struct blame_entry *ent; + struct commit *commit; + struct origin *suspect = NULL; + + /* find one suspect to break down */ + for (ent = sb->ent; !suspect && ent; ent = ent->next) + if (!ent->guilty) + suspect = ent->suspect; + if (!suspect) + return; /* all done */ + + /* + * We will use this suspect later in the loop, + * so hold onto it in the meantime. + */ + origin_incref(suspect); + commit = suspect->commit; + if (!commit->object.parsed) + parse_commit(commit); + if (!(commit->object.flags & UNINTERESTING) && + !(revs->max_age != -1 && commit->date < revs->max_age)) + pass_blame(sb, suspect, opt); + else { + commit->object.flags |= UNINTERESTING; + if (commit->object.parsed) + mark_parents_uninteresting(commit); + } + /* treat root commit as boundary */ + if (!commit->parents && !show_root) + commit->object.flags |= UNINTERESTING; + + /* Take responsibility for the remaining entries */ + for (ent = sb->ent; ent; ent = ent->next) + if (!cmp_suspect(ent->suspect, suspect)) + found_guilty_entry(ent); + origin_decref(suspect); + + if (DEBUG) /* sanity */ + sanity_check_refcnt(sb); + } +} + +static const char *format_time(unsigned long time, const char *tz_str, + int show_raw_time) +{ + static char time_buf[128]; + time_t t = time; + int minutes, tz; + struct tm *tm; + + if (show_raw_time) { + sprintf(time_buf, "%lu %s", time, tz_str); + return time_buf; + } + + tz = atoi(tz_str); + minutes = tz < 0 ? -tz : tz; + minutes = (minutes / 100)*60 + (minutes % 100); + minutes = tz < 0 ? -minutes : minutes; + t = time + minutes * 60; + tm = gmtime(&t); + + strftime(time_buf, sizeof(time_buf), "%Y-%m-%d %H:%M:%S ", tm); + strcat(time_buf, tz_str); + return time_buf; +} + #define OUTPUT_ANNOTATE_COMPAT 001 #define OUTPUT_LONG_OBJECT_NAME 002 #define OUTPUT_RAW_TIMESTAMP 004 @@ -1279,13 +1511,13 @@ static void emit_porcelain(struct scoreboard *sb, struct blame_entry *ent) printf("committer-mail %s\n", ci.committer_mail); printf("committer-time %lu\n", ci.committer_time); printf("committer-tz %s\n", ci.committer_tz); - printf("filename %s\n", suspect->path); + write_filename_info(suspect->path); printf("summary %s\n", ci.summary); if (suspect->commit->object.flags & UNINTERESTING) printf("boundary\n"); } else if (suspect->commit->object.flags & MORE_THAN_ONE_PATH) - printf("filename %s\n", suspect->path); + write_filename_info(suspect->path); cp = nth_line(sb, ent->lno); for (cnt = 0; cnt < ent->num_lines; cnt++) { @@ -1321,12 +1553,12 @@ static void emit_other(struct scoreboard *sb, struct blame_entry *ent, int opt) int length = (opt & OUTPUT_LONG_OBJECT_NAME) ? 40 : 8; if (suspect->commit->object.flags & UNINTERESTING) { - if (!blank_boundary) { + if (blank_boundary) + memset(hex, ' ', length); + else if (!cmd_is_annotate) { length--; putchar('^'); } - else - memset(hex, ' ', length); } printf("%.*s", length, hex); @@ -1390,6 +1622,10 @@ static void output(struct scoreboard *sb, int option) } } +/* + * To allow quick access to the contents of nth line in the + * final image, prepare an index in the scoreboard. + */ static int prepare_lines(struct scoreboard *sb) { const char *buf = sb->final_buf; @@ -1417,6 +1653,11 @@ static int prepare_lines(struct scoreboard *sb) return sb->num_lines; } +/* + * Add phony grafts for use with -S; this is primarily to + * support git-cvsserver that wants to give a linear history + * to its clients. + */ static int read_ancestry(const char *graft_file) { FILE *fp = fopen(graft_file, "r"); @@ -1434,6 +1675,9 @@ static int read_ancestry(const char *graft_file) return 0; } +/* + * How many columns do we need to show line numbers in decimal? + */ static int lineno_width(int lines) { int i, width; @@ -1443,6 +1687,10 @@ static int lineno_width(int lines) return width; } +/* + * How many columns do we need to show line numbers, authors, + * and filenames? + */ static void find_alignment(struct scoreboard *sb, int *option) { int longest_src_lines = 0; @@ -1481,6 +1729,10 @@ static void find_alignment(struct scoreboard *sb, int *option) max_score_digits = lineno_width(largest_score); } +/* + * For debugging -- origin is refcounted, and this asserts that + * we do not underflow. + */ static void sanity_check_refcnt(struct scoreboard *sb) { int baa = 0; @@ -1502,8 +1754,9 @@ static void sanity_check_refcnt(struct scoreboard *sb) ent->suspect->refcnt = -ent->suspect->refcnt; } for (ent = sb->ent; ent; ent = ent->next) { - /* then pick each and see if they have the the correct - * refcnt. + /* + * ... then pick each and see if they have the the + * correct refcnt. */ int found; struct blame_entry *e; @@ -1533,6 +1786,10 @@ static void sanity_check_refcnt(struct scoreboard *sb) } } +/* + * Used for the command line parsing; check if the path exists + * in the working tree. + */ static int has_path_in_work_tree(const char *path) { struct stat st; @@ -1555,6 +1812,9 @@ static const char *add_prefix(const char *prefix, const char *path) return prefix_path(prefix, strlen(prefix), path); } +/* + * Parsing of (comma separated) one item in the -L option + */ static const char *parse_loc(const char *spec, struct scoreboard *sb, long lno, long begin, long *ret) @@ -1629,6 +1889,9 @@ static const char *parse_loc(const char *spec, } } +/* + * Parsing of -L option + */ static void prepare_blame_range(struct scoreboard *sb, const char *bottomtop, long lno, @@ -1659,6 +1922,137 @@ static int git_blame_config(const char *var, const char *value) return git_default_config(var, value); } +static struct commit *fake_working_tree_commit(const char *path, const char *contents_from) +{ + struct commit *commit; + struct origin *origin; + unsigned char head_sha1[20]; + char *buf; + const char *ident; + int fd; + time_t now; + unsigned long fin_size; + int size, len; + struct cache_entry *ce; + unsigned mode; + + if (get_sha1("HEAD", head_sha1)) + die("No such ref: HEAD"); + + time(&now); + commit = xcalloc(1, sizeof(*commit)); + commit->parents = xcalloc(1, sizeof(*commit->parents)); + commit->parents->item = lookup_commit_reference(head_sha1); + commit->object.parsed = 1; + commit->date = now; + commit->object.type = OBJ_COMMIT; + + origin = make_origin(commit, path); + + if (!contents_from || strcmp("-", contents_from)) { + struct stat st; + const char *read_from; + + if (contents_from) { + if (stat(contents_from, &st) < 0) + die("Cannot stat %s", contents_from); + read_from = contents_from; + } + else { + if (lstat(path, &st) < 0) + die("Cannot lstat %s", path); + read_from = path; + } + fin_size = xsize_t(st.st_size); + buf = xmalloc(fin_size+1); + mode = canon_mode(st.st_mode); + switch (st.st_mode & S_IFMT) { + case S_IFREG: + fd = open(read_from, O_RDONLY); + if (fd < 0) + die("cannot open %s", read_from); + if (read_in_full(fd, buf, fin_size) != fin_size) + die("cannot read %s", read_from); + break; + case S_IFLNK: + if (readlink(read_from, buf, fin_size+1) != fin_size) + die("cannot readlink %s", read_from); + break; + default: + die("unsupported file type %s", read_from); + } + } + else { + /* Reading from stdin */ + contents_from = "standard input"; + buf = NULL; + fin_size = 0; + mode = 0; + while (1) { + ssize_t cnt = 8192; + buf = xrealloc(buf, fin_size + cnt); + cnt = xread(0, buf + fin_size, cnt); + if (cnt < 0) + die("read error %s from stdin", + strerror(errno)); + if (!cnt) + break; + fin_size += cnt; + } + buf = xrealloc(buf, fin_size + 1); + } + buf[fin_size] = 0; + origin->file.ptr = buf; + origin->file.size = fin_size; + pretend_sha1_file(buf, fin_size, OBJ_BLOB, origin->blob_sha1); + commit->util = origin; + + /* + * Read the current index, replace the path entry with + * origin->blob_sha1 without mucking with its mode or type + * bits; we are not going to write this index out -- we just + * want to run "diff-index --cached". + */ + discard_cache(); + read_cache(); + + len = strlen(path); + if (!mode) { + int pos = cache_name_pos(path, len); + if (0 <= pos) + mode = ntohl(active_cache[pos]->ce_mode); + else + /* Let's not bother reading from HEAD tree */ + mode = S_IFREG | 0644; + } + size = cache_entry_size(len); + ce = xcalloc(1, size); + hashcpy(ce->sha1, origin->blob_sha1); + memcpy(ce->name, path, len); + ce->ce_flags = create_ce_flags(len, 0); + ce->ce_mode = create_ce_mode(mode); + add_cache_entry(ce, ADD_CACHE_OK_TO_ADD|ADD_CACHE_OK_TO_REPLACE); + + /* + * We are not going to write this out, so this does not matter + * right now, but someday we might optimize diff-index --cached + * with cache-tree information. + */ + cache_tree_invalidate_path(active_cache_tree, path); + + commit->buffer = xmalloc(400); + ident = fmt_ident("Not Committed Yet", "not.committed.yet", NULL, 0); + sprintf(commit->buffer, + "tree 0000000000000000000000000000000000000000\n" + "parent %s\n" + "author %s\n" + "committer %s\n\n" + "Version of %s from %s\n", + sha1_to_hex(head_sha1), + ident, ident, path, contents_from ? contents_from : path); + return commit; +} + int cmd_blame(int argc, const char **argv, const char *prefix) { struct rev_info revs; @@ -1669,10 +2063,14 @@ int cmd_blame(int argc, const char **argv, const char *prefix) int i, seen_dashdash, unk, opt; long bottom, top, lno; int output_option = 0; + int show_stats = 0; const char *revs_file = NULL; const char *final_commit_name = NULL; - char type[10]; + enum object_type type; const char *bottomtop = NULL; + const char *contents_from = NULL; + + cmd_is_annotate = !strcmp(argv[0], "annotate"); git_config(git_blame_config); save_commit_buffer = 0; @@ -1687,6 +2085,8 @@ int cmd_blame(int argc, const char **argv, const char *prefix) blank_boundary = 1; else if (!strcmp("--root", arg)) show_root = 1; + else if (!strcmp(arg, "--show-stats")) + show_stats = 1; else if (!strcmp("-c", arg)) output_option |= OUTPUT_ANNOTATE_COMPAT; else if (!strcmp("-t", arg)) @@ -1695,17 +2095,17 @@ int cmd_blame(int argc, const char **argv, const char *prefix) output_option |= OUTPUT_LONG_OBJECT_NAME; else if (!strcmp("-S", arg) && ++i < argc) revs_file = argv[i]; - else if (!strncmp("-M", arg, 2)) { + else if (!prefixcmp(arg, "-M")) { opt |= PICKAXE_BLAME_MOVE; blame_move_score = parse_score(arg+2); } - else if (!strncmp("-C", arg, 2)) { + else if (!prefixcmp(arg, "-C")) { if (opt & PICKAXE_BLAME_COPY) opt |= PICKAXE_BLAME_COPY_HARDER; opt |= PICKAXE_BLAME_COPY | PICKAXE_BLAME_MOVE; blame_copy_score = parse_score(arg+2); } - else if (!strncmp("-L", arg, 2)) { + else if (!prefixcmp(arg, "-L")) { if (!arg[2]) { if (++i >= argc) usage(blame_usage); @@ -1717,6 +2117,13 @@ int cmd_blame(int argc, const char **argv, const char *prefix) die("More than one '-L n,m' option given"); bottomtop = arg; } + else if (!strcmp("--contents", arg)) { + if (++i >= argc) + usage(blame_usage); + contents_from = argv[i]; + } + else if (!strcmp("--incremental", arg)) + incremental = 1; else if (!strcmp("--score-debug", arg)) output_option |= OUTPUT_SHOW_SCORE; else if (!strcmp("-f", arg) || @@ -1737,14 +2144,18 @@ int cmd_blame(int argc, const char **argv, const char *prefix) argv[unk++] = arg; } + if (!incremental) + setup_pager(); + if (!blame_move_score) blame_move_score = BLAME_DEFAULT_MOVE_SCORE; if (!blame_copy_score) blame_copy_score = BLAME_DEFAULT_COPY_SCORE; - /* We have collected options unknown to us in argv[1..unk] + /* + * We have collected options unknown to us in argv[1..unk] * which are to be passed to revision machinery if we are - * going to do the "bottom" procesing. + * going to do the "bottom" processing. * * The remaining are: * @@ -1790,6 +2201,7 @@ int cmd_blame(int argc, const char **argv, const char *prefix) if (!strcmp(argv[j], "--")) seen_dashdash = j; if (seen_dashdash) { + /* (2) */ if (seen_dashdash + 1 != argc - 1) usage(blame_usage); path = add_prefix(prefix, argv[seen_dashdash + 1]); @@ -1798,6 +2210,8 @@ int cmd_blame(int argc, const char **argv, const char *prefix) } else { /* (3) */ + if (argc <= i) + usage(blame_usage); path = add_prefix(prefix, argv[i]); if (i + 1 == argc - 1) { final_commit_name = argv[i + 1]; @@ -1822,17 +2236,19 @@ int cmd_blame(int argc, const char **argv, const char *prefix) if (final_commit_name) argv[unk++] = final_commit_name; - /* Now we got rev and path. We do not want the path pruning + /* + * Now we got rev and path. We do not want the path pruning * but we may want "bottom" processing. */ argv[unk++] = "--"; /* terminate the rev name */ argv[unk] = NULL; init_revisions(&revs, NULL); - setup_revisions(unk, argv, &revs, "HEAD"); + setup_revisions(unk, argv, &revs, NULL); memset(&sb, 0, sizeof(sb)); - /* There must be one and only one positive commit in the + /* + * There must be one and only one positive commit in the * revs->pending array. */ for (i = 0; i < revs.pending.nr; i++) { @@ -1853,27 +2269,40 @@ int cmd_blame(int argc, const char **argv, const char *prefix) } if (!sb.final) { - /* "--not A B -- path" without anything positive */ - unsigned char head_sha1[20]; - - final_commit_name = "HEAD"; - if (get_sha1(final_commit_name, head_sha1)) - die("No such ref: HEAD"); - sb.final = lookup_commit_reference(head_sha1); - add_pending_object(&revs, &(sb.final->object), "HEAD"); + /* + * "--not A B -- path" without anything positive; + * do not default to HEAD, but use the working tree + * or "--contents". + */ + sb.final = fake_working_tree_commit(path, contents_from); + add_pending_object(&revs, &(sb.final->object), ":"); } + else if (contents_from) + die("Cannot use --contents with final commit object name"); - /* If we have bottom, this will mark the ancestors of the + /* + * If we have bottom, this will mark the ancestors of the * bottom commits we would reach while traversing as * uninteresting. */ prepare_revision_walk(&revs); - o = get_origin(&sb, sb.final, path); - if (fill_blob_sha1(o)) - die("no such path %s in %s", path, final_commit_name); + if (is_null_sha1(sb.final->object.sha1)) { + char *buf; + o = sb.final->util; + buf = xmalloc(o->file.size + 1); + memcpy(buf, o->file.ptr, o->file.size + 1); + sb.final_buf = buf; + sb.final_buf_size = o->file.size; + } + else { + o = get_origin(&sb, sb.final, path); + if (fill_blob_sha1(o)) + die("no such path %s in %s", path, final_commit_name); - sb.final_buf = read_sha1_file(o->blob_sha1, type, &sb.final_buf_size); + sb.final_buf = read_sha1_file(o->blob_sha1, &type, + &sb.final_buf_size); + } num_read_blob++; lno = prepare_lines(&sb); @@ -1907,6 +2336,9 @@ int cmd_blame(int argc, const char **argv, const char *prefix) assign_blame(&sb, &revs, opt); + if (incremental) + return 0; + coalesce(&sb); if (!(output_option & OUTPUT_PORCELAIN)) @@ -1920,7 +2352,7 @@ int cmd_blame(int argc, const char **argv, const char *prefix) ent = e; } - if (DEBUG) { + if (show_stats) { printf("num read blob: %d\n", num_read_blob); printf("num get patch: %d\n", num_get_patch); printf("num commits: %d\n", num_commits);