Code

Correct fast-import timezone documentation.
[git.git] / builtin-blame.c
1 /*
2  * Pickaxe
3  *
4  * Copyright (c) 2006, Junio C Hamano
5  */
7 #include "cache.h"
8 #include "builtin.h"
9 #include "blob.h"
10 #include "commit.h"
11 #include "tag.h"
12 #include "tree-walk.h"
13 #include "diff.h"
14 #include "diffcore.h"
15 #include "revision.h"
16 #include "quote.h"
17 #include "xdiff-interface.h"
19 static char blame_usage[] =
20 "git-blame [-c] [-l] [-t] [-f] [-n] [-p] [-L n,m] [-S <revs-file>] [-M] [-C] [-C] [commit] [--] file\n"
21 "  -c, --compatibility Use the same output mode as git-annotate (Default: off)\n"
22 "  -b                  Show blank SHA-1 for boundary commits (Default: off)\n"
23 "  -l, --long          Show long commit SHA1 (Default: off)\n"
24 "  --root              Do not treat root commits as boundaries (Default: off)\n"
25 "  -t, --time          Show raw timestamp (Default: off)\n"
26 "  -f, --show-name     Show original filename (Default: auto)\n"
27 "  -n, --show-number   Show original linenumber (Default: off)\n"
28 "  -p, --porcelain     Show in a format designed for machine consumption\n"
29 "  -L n,m              Process only line range n,m, counting from 1\n"
30 "  -M, -C              Find line movements within and across files\n"
31 "  --incremental       Show blame entries as we find them, incrementally\n"
32 "  -S revs-file        Use revisions from revs-file instead of calling git-rev-list\n";
34 static int longest_file;
35 static int longest_author;
36 static int max_orig_digits;
37 static int max_digits;
38 static int max_score_digits;
39 static int show_root;
40 static int blank_boundary;
41 static int incremental;
43 #ifndef DEBUG
44 #define DEBUG 0
45 #endif
47 /* stats */
48 static int num_read_blob;
49 static int num_get_patch;
50 static int num_commits;
52 #define PICKAXE_BLAME_MOVE              01
53 #define PICKAXE_BLAME_COPY              02
54 #define PICKAXE_BLAME_COPY_HARDER       04
56 /*
57  * blame for a blame_entry with score lower than these thresholds
58  * is not passed to the parent using move/copy logic.
59  */
60 static unsigned blame_move_score;
61 static unsigned blame_copy_score;
62 #define BLAME_DEFAULT_MOVE_SCORE        20
63 #define BLAME_DEFAULT_COPY_SCORE        40
65 /* bits #0..7 in revision.h, #8..11 used for merge_bases() in commit.c */
66 #define METAINFO_SHOWN          (1u<<12)
67 #define MORE_THAN_ONE_PATH      (1u<<13)
69 /*
70  * One blob in a commit that is being suspected
71  */
72 struct origin {
73         int refcnt;
74         struct commit *commit;
75         mmfile_t file;
76         unsigned char blob_sha1[20];
77         char path[FLEX_ARRAY];
78 };
80 /*
81  * Given an origin, prepare mmfile_t structure to be used by the
82  * diff machinery
83  */
84 static char *fill_origin_blob(struct origin *o, mmfile_t *file)
85 {
86         if (!o->file.ptr) {
87                 char type[10];
88                 num_read_blob++;
89                 file->ptr = read_sha1_file(o->blob_sha1, type,
90                                            (unsigned long *)(&(file->size)));
91                 o->file = *file;
92         }
93         else
94                 *file = o->file;
95         return file->ptr;
96 }
98 /*
99  * Origin is refcounted and usually we keep the blob contents to be
100  * reused.
101  */
102 static inline struct origin *origin_incref(struct origin *o)
104         if (o)
105                 o->refcnt++;
106         return o;
109 static void origin_decref(struct origin *o)
111         if (o && --o->refcnt <= 0) {
112                 if (o->file.ptr)
113                         free(o->file.ptr);
114                 memset(o, 0, sizeof(*o));
115                 free(o);
116         }
119 /*
120  * Each group of lines is described by a blame_entry; it can be split
121  * as we pass blame to the parents.  They form a linked list in the
122  * scoreboard structure, sorted by the target line number.
123  */
124 struct blame_entry {
125         struct blame_entry *prev;
126         struct blame_entry *next;
128         /* the first line of this group in the final image;
129          * internally all line numbers are 0 based.
130          */
131         int lno;
133         /* how many lines this group has */
134         int num_lines;
136         /* the commit that introduced this group into the final image */
137         struct origin *suspect;
139         /* true if the suspect is truly guilty; false while we have not
140          * checked if the group came from one of its parents.
141          */
142         char guilty;
144         /* the line number of the first line of this group in the
145          * suspect's file; internally all line numbers are 0 based.
146          */
147         int s_lno;
149         /* how significant this entry is -- cached to avoid
150          * scanning the lines over and over.
151          */
152         unsigned score;
153 };
155 /*
156  * The current state of the blame assignment.
157  */
158 struct scoreboard {
159         /* the final commit (i.e. where we started digging from) */
160         struct commit *final;
162         const char *path;
164         /*
165          * The contents in the final image.
166          * Used by many functions to obtain contents of the nth line,
167          * indexed with scoreboard.lineno[blame_entry.lno].
168          */
169         const char *final_buf;
170         unsigned long final_buf_size;
172         /* linked list of blames */
173         struct blame_entry *ent;
175         /* look-up a line in the final buffer */
176         int num_lines;
177         int *lineno;
178 };
180 static int cmp_suspect(struct origin *a, struct origin *b)
182         int cmp = hashcmp(a->commit->object.sha1, b->commit->object.sha1);
183         if (cmp)
184                 return cmp;
185         return strcmp(a->path, b->path);
188 #define cmp_suspect(a, b) ( ((a)==(b)) ? 0 : cmp_suspect(a,b) )
190 static void sanity_check_refcnt(struct scoreboard *);
192 /*
193  * If two blame entries that are next to each other came from
194  * contiguous lines in the same origin (i.e. <commit, path> pair),
195  * merge them together.
196  */
197 static void coalesce(struct scoreboard *sb)
199         struct blame_entry *ent, *next;
201         for (ent = sb->ent; ent && (next = ent->next); ent = next) {
202                 if (!cmp_suspect(ent->suspect, next->suspect) &&
203                     ent->guilty == next->guilty &&
204                     ent->s_lno + ent->num_lines == next->s_lno) {
205                         ent->num_lines += next->num_lines;
206                         ent->next = next->next;
207                         if (ent->next)
208                                 ent->next->prev = ent;
209                         origin_decref(next->suspect);
210                         free(next);
211                         ent->score = 0;
212                         next = ent; /* again */
213                 }
214         }
216         if (DEBUG) /* sanity */
217                 sanity_check_refcnt(sb);
220 /*
221  * Given a commit and a path in it, create a new origin structure.
222  * The callers that add blame to the scoreboard should use
223  * get_origin() to obtain shared, refcounted copy instead of calling
224  * this function directly.
225  */
226 static struct origin *make_origin(struct commit *commit, const char *path)
228         struct origin *o;
229         o = xcalloc(1, sizeof(*o) + strlen(path) + 1);
230         o->commit = commit;
231         o->refcnt = 1;
232         strcpy(o->path, path);
233         return o;
236 /*
237  * Locate an existing origin or create a new one.
238  */
239 static struct origin *get_origin(struct scoreboard *sb,
240                                  struct commit *commit,
241                                  const char *path)
243         struct blame_entry *e;
245         for (e = sb->ent; e; e = e->next) {
246                 if (e->suspect->commit == commit &&
247                     !strcmp(e->suspect->path, path))
248                         return origin_incref(e->suspect);
249         }
250         return make_origin(commit, path);
253 /*
254  * Fill the blob_sha1 field of an origin if it hasn't, so that later
255  * call to fill_origin_blob() can use it to locate the data.  blob_sha1
256  * for an origin is also used to pass the blame for the entire file to
257  * the parent to detect the case where a child's blob is identical to
258  * that of its parent's.
259  */
260 static int fill_blob_sha1(struct origin *origin)
262         unsigned mode;
263         char type[10];
265         if (!is_null_sha1(origin->blob_sha1))
266                 return 0;
267         if (get_tree_entry(origin->commit->object.sha1,
268                            origin->path,
269                            origin->blob_sha1, &mode))
270                 goto error_out;
271         if (sha1_object_info(origin->blob_sha1, type, NULL) ||
272             strcmp(type, blob_type))
273                 goto error_out;
274         return 0;
275  error_out:
276         hashclr(origin->blob_sha1);
277         return -1;
280 /*
281  * We have an origin -- check if the same path exists in the
282  * parent and return an origin structure to represent it.
283  */
284 static struct origin *find_origin(struct scoreboard *sb,
285                                   struct commit *parent,
286                                   struct origin *origin)
288         struct origin *porigin = NULL;
289         struct diff_options diff_opts;
290         const char *paths[2];
292         if (parent->util) {
293                 /*
294                  * Each commit object can cache one origin in that
295                  * commit.  This is a freestanding copy of origin and
296                  * not refcounted.
297                  */
298                 struct origin *cached = parent->util;
299                 if (!strcmp(cached->path, origin->path)) {
300                         /*
301                          * The same path between origin and its parent
302                          * without renaming -- the most common case.
303                          */
304                         porigin = get_origin(sb, parent, cached->path);
306                         /*
307                          * If the origin was newly created (i.e. get_origin
308                          * would call make_origin if none is found in the
309                          * scoreboard), it does not know the blob_sha1,
310                          * so copy it.  Otherwise porigin was in the
311                          * scoreboard and already knows blob_sha1.
312                          */
313                         if (porigin->refcnt == 1)
314                                 hashcpy(porigin->blob_sha1, cached->blob_sha1);
315                         return porigin;
316                 }
317                 /* otherwise it was not very useful; free it */
318                 free(parent->util);
319                 parent->util = NULL;
320         }
322         /* See if the origin->path is different between parent
323          * and origin first.  Most of the time they are the
324          * same and diff-tree is fairly efficient about this.
325          */
326         diff_setup(&diff_opts);
327         diff_opts.recursive = 1;
328         diff_opts.detect_rename = 0;
329         diff_opts.output_format = DIFF_FORMAT_NO_OUTPUT;
330         paths[0] = origin->path;
331         paths[1] = NULL;
333         diff_tree_setup_paths(paths, &diff_opts);
334         if (diff_setup_done(&diff_opts) < 0)
335                 die("diff-setup");
336         diff_tree_sha1(parent->tree->object.sha1,
337                        origin->commit->tree->object.sha1,
338                        "", &diff_opts);
339         diffcore_std(&diff_opts);
341         /* It is either one entry that says "modified", or "created",
342          * or nothing.
343          */
344         if (!diff_queued_diff.nr) {
345                 /* The path is the same as parent */
346                 porigin = get_origin(sb, parent, origin->path);
347                 hashcpy(porigin->blob_sha1, origin->blob_sha1);
348         }
349         else if (diff_queued_diff.nr != 1)
350                 die("internal error in blame::find_origin");
351         else {
352                 struct diff_filepair *p = diff_queued_diff.queue[0];
353                 switch (p->status) {
354                 default:
355                         die("internal error in blame::find_origin (%c)",
356                             p->status);
357                 case 'M':
358                         porigin = get_origin(sb, parent, origin->path);
359                         hashcpy(porigin->blob_sha1, p->one->sha1);
360                         break;
361                 case 'A':
362                 case 'T':
363                         /* Did not exist in parent, or type changed */
364                         break;
365                 }
366         }
367         diff_flush(&diff_opts);
368         if (porigin) {
369                 /*
370                  * Create a freestanding copy that is not part of
371                  * the refcounted origin found in the scoreboard, and
372                  * cache it in the commit.
373                  */
374                 struct origin *cached;
376                 cached = make_origin(porigin->commit, porigin->path);
377                 hashcpy(cached->blob_sha1, porigin->blob_sha1);
378                 parent->util = cached;
379         }
380         return porigin;
383 /*
384  * We have an origin -- find the path that corresponds to it in its
385  * parent and return an origin structure to represent it.
386  */
387 static struct origin *find_rename(struct scoreboard *sb,
388                                   struct commit *parent,
389                                   struct origin *origin)
391         struct origin *porigin = NULL;
392         struct diff_options diff_opts;
393         int i;
394         const char *paths[2];
396         diff_setup(&diff_opts);
397         diff_opts.recursive = 1;
398         diff_opts.detect_rename = DIFF_DETECT_RENAME;
399         diff_opts.output_format = DIFF_FORMAT_NO_OUTPUT;
400         diff_opts.single_follow = origin->path;
401         paths[0] = NULL;
402         diff_tree_setup_paths(paths, &diff_opts);
403         if (diff_setup_done(&diff_opts) < 0)
404                 die("diff-setup");
405         diff_tree_sha1(parent->tree->object.sha1,
406                        origin->commit->tree->object.sha1,
407                        "", &diff_opts);
408         diffcore_std(&diff_opts);
410         for (i = 0; i < diff_queued_diff.nr; i++) {
411                 struct diff_filepair *p = diff_queued_diff.queue[i];
412                 if ((p->status == 'R' || p->status == 'C') &&
413                     !strcmp(p->two->path, origin->path)) {
414                         porigin = get_origin(sb, parent, p->one->path);
415                         hashcpy(porigin->blob_sha1, p->one->sha1);
416                         break;
417                 }
418         }
419         diff_flush(&diff_opts);
420         return porigin;
423 /*
424  * Parsing of patch chunks...
425  */
426 struct chunk {
427         /* line number in postimage; up to but not including this
428          * line is the same as preimage
429          */
430         int same;
432         /* preimage line number after this chunk */
433         int p_next;
435         /* postimage line number after this chunk */
436         int t_next;
437 };
439 struct patch {
440         struct chunk *chunks;
441         int num;
442 };
444 struct blame_diff_state {
445         struct xdiff_emit_state xm;
446         struct patch *ret;
447         unsigned hunk_post_context;
448         unsigned hunk_in_pre_context : 1;
449 };
451 static void process_u_diff(void *state_, char *line, unsigned long len)
453         struct blame_diff_state *state = state_;
454         struct chunk *chunk;
455         int off1, off2, len1, len2, num;
457         num = state->ret->num;
458         if (len < 4 || line[0] != '@' || line[1] != '@') {
459                 if (state->hunk_in_pre_context && line[0] == ' ')
460                         state->ret->chunks[num - 1].same++;
461                 else {
462                         state->hunk_in_pre_context = 0;
463                         if (line[0] == ' ')
464                                 state->hunk_post_context++;
465                         else
466                                 state->hunk_post_context = 0;
467                 }
468                 return;
469         }
471         if (num && state->hunk_post_context) {
472                 chunk = &state->ret->chunks[num - 1];
473                 chunk->p_next -= state->hunk_post_context;
474                 chunk->t_next -= state->hunk_post_context;
475         }
476         state->ret->num = ++num;
477         state->ret->chunks = xrealloc(state->ret->chunks,
478                                       sizeof(struct chunk) * num);
479         chunk = &state->ret->chunks[num - 1];
480         if (parse_hunk_header(line, len, &off1, &len1, &off2, &len2)) {
481                 state->ret->num--;
482                 return;
483         }
485         /* Line numbers in patch output are one based. */
486         off1--;
487         off2--;
489         chunk->same = len2 ? off2 : (off2 + 1);
491         chunk->p_next = off1 + (len1 ? len1 : 1);
492         chunk->t_next = chunk->same + len2;
493         state->hunk_in_pre_context = 1;
494         state->hunk_post_context = 0;
497 static struct patch *compare_buffer(mmfile_t *file_p, mmfile_t *file_o,
498                                     int context)
500         struct blame_diff_state state;
501         xpparam_t xpp;
502         xdemitconf_t xecfg;
503         xdemitcb_t ecb;
505         xpp.flags = XDF_NEED_MINIMAL;
506         xecfg.ctxlen = context;
507         xecfg.flags = 0;
508         ecb.outf = xdiff_outf;
509         ecb.priv = &state;
510         memset(&state, 0, sizeof(state));
511         state.xm.consume = process_u_diff;
512         state.ret = xmalloc(sizeof(struct patch));
513         state.ret->chunks = NULL;
514         state.ret->num = 0;
516         xdl_diff(file_p, file_o, &xpp, &xecfg, &ecb);
518         if (state.ret->num) {
519                 struct chunk *chunk;
520                 chunk = &state.ret->chunks[state.ret->num - 1];
521                 chunk->p_next -= state.hunk_post_context;
522                 chunk->t_next -= state.hunk_post_context;
523         }
524         return state.ret;
527 /*
528  * Run diff between two origins and grab the patch output, so that
529  * we can pass blame for lines origin is currently suspected for
530  * to its parent.
531  */
532 static struct patch *get_patch(struct origin *parent, struct origin *origin)
534         mmfile_t file_p, file_o;
535         struct patch *patch;
537         fill_origin_blob(parent, &file_p);
538         fill_origin_blob(origin, &file_o);
539         if (!file_p.ptr || !file_o.ptr)
540                 return NULL;
541         patch = compare_buffer(&file_p, &file_o, 0);
542         num_get_patch++;
543         return patch;
546 static void free_patch(struct patch *p)
548         free(p->chunks);
549         free(p);
552 /*
553  * Link in a new blame entry to the scorebord.  Entries that cover the
554  * same line range have been removed from the scoreboard previously.
555  */
556 static void add_blame_entry(struct scoreboard *sb, struct blame_entry *e)
558         struct blame_entry *ent, *prev = NULL;
560         origin_incref(e->suspect);
562         for (ent = sb->ent; ent && ent->lno < e->lno; ent = ent->next)
563                 prev = ent;
565         /* prev, if not NULL, is the last one that is below e */
566         e->prev = prev;
567         if (prev) {
568                 e->next = prev->next;
569                 prev->next = e;
570         }
571         else {
572                 e->next = sb->ent;
573                 sb->ent = e;
574         }
575         if (e->next)
576                 e->next->prev = e;
579 /*
580  * src typically is on-stack; we want to copy the information in it to
581  * an malloced blame_entry that is already on the linked list of the
582  * scoreboard.  The origin of dst loses a refcnt while the origin of src
583  * gains one.
584  */
585 static void dup_entry(struct blame_entry *dst, struct blame_entry *src)
587         struct blame_entry *p, *n;
589         p = dst->prev;
590         n = dst->next;
591         origin_incref(src->suspect);
592         origin_decref(dst->suspect);
593         memcpy(dst, src, sizeof(*src));
594         dst->prev = p;
595         dst->next = n;
596         dst->score = 0;
599 static const char *nth_line(struct scoreboard *sb, int lno)
601         return sb->final_buf + sb->lineno[lno];
604 /*
605  * It is known that lines between tlno to same came from parent, and e
606  * has an overlap with that range.  it also is known that parent's
607  * line plno corresponds to e's line tlno.
608  *
609  *                <---- e ----->
610  *                   <------>
611  *                   <------------>
612  *             <------------>
613  *             <------------------>
614  *
615  * Split e into potentially three parts; before this chunk, the chunk
616  * to be blamed for the parent, and after that portion.
617  */
618 static void split_overlap(struct blame_entry *split,
619                           struct blame_entry *e,
620                           int tlno, int plno, int same,
621                           struct origin *parent)
623         int chunk_end_lno;
624         memset(split, 0, sizeof(struct blame_entry [3]));
626         if (e->s_lno < tlno) {
627                 /* there is a pre-chunk part not blamed on parent */
628                 split[0].suspect = origin_incref(e->suspect);
629                 split[0].lno = e->lno;
630                 split[0].s_lno = e->s_lno;
631                 split[0].num_lines = tlno - e->s_lno;
632                 split[1].lno = e->lno + tlno - e->s_lno;
633                 split[1].s_lno = plno;
634         }
635         else {
636                 split[1].lno = e->lno;
637                 split[1].s_lno = plno + (e->s_lno - tlno);
638         }
640         if (same < e->s_lno + e->num_lines) {
641                 /* there is a post-chunk part not blamed on parent */
642                 split[2].suspect = origin_incref(e->suspect);
643                 split[2].lno = e->lno + (same - e->s_lno);
644                 split[2].s_lno = e->s_lno + (same - e->s_lno);
645                 split[2].num_lines = e->s_lno + e->num_lines - same;
646                 chunk_end_lno = split[2].lno;
647         }
648         else
649                 chunk_end_lno = e->lno + e->num_lines;
650         split[1].num_lines = chunk_end_lno - split[1].lno;
652         /*
653          * if it turns out there is nothing to blame the parent for,
654          * forget about the splitting.  !split[1].suspect signals this.
655          */
656         if (split[1].num_lines < 1)
657                 return;
658         split[1].suspect = origin_incref(parent);
661 /*
662  * split_overlap() divided an existing blame e into up to three parts
663  * in split.  Adjust the linked list of blames in the scoreboard to
664  * reflect the split.
665  */
666 static void split_blame(struct scoreboard *sb,
667                         struct blame_entry *split,
668                         struct blame_entry *e)
670         struct blame_entry *new_entry;
672         if (split[0].suspect && split[2].suspect) {
673                 /* The first part (reuse storage for the existing entry e) */
674                 dup_entry(e, &split[0]);
676                 /* The last part -- me */
677                 new_entry = xmalloc(sizeof(*new_entry));
678                 memcpy(new_entry, &(split[2]), sizeof(struct blame_entry));
679                 add_blame_entry(sb, new_entry);
681                 /* ... and the middle part -- parent */
682                 new_entry = xmalloc(sizeof(*new_entry));
683                 memcpy(new_entry, &(split[1]), sizeof(struct blame_entry));
684                 add_blame_entry(sb, new_entry);
685         }
686         else if (!split[0].suspect && !split[2].suspect)
687                 /*
688                  * The parent covers the entire area; reuse storage for
689                  * e and replace it with the parent.
690                  */
691                 dup_entry(e, &split[1]);
692         else if (split[0].suspect) {
693                 /* me and then parent */
694                 dup_entry(e, &split[0]);
696                 new_entry = xmalloc(sizeof(*new_entry));
697                 memcpy(new_entry, &(split[1]), sizeof(struct blame_entry));
698                 add_blame_entry(sb, new_entry);
699         }
700         else {
701                 /* parent and then me */
702                 dup_entry(e, &split[1]);
704                 new_entry = xmalloc(sizeof(*new_entry));
705                 memcpy(new_entry, &(split[2]), sizeof(struct blame_entry));
706                 add_blame_entry(sb, new_entry);
707         }
709         if (DEBUG) { /* sanity */
710                 struct blame_entry *ent;
711                 int lno = sb->ent->lno, corrupt = 0;
713                 for (ent = sb->ent; ent; ent = ent->next) {
714                         if (lno != ent->lno)
715                                 corrupt = 1;
716                         if (ent->s_lno < 0)
717                                 corrupt = 1;
718                         lno += ent->num_lines;
719                 }
720                 if (corrupt) {
721                         lno = sb->ent->lno;
722                         for (ent = sb->ent; ent; ent = ent->next) {
723                                 printf("L %8d l %8d n %8d\n",
724                                        lno, ent->lno, ent->num_lines);
725                                 lno = ent->lno + ent->num_lines;
726                         }
727                         die("oops");
728                 }
729         }
732 /*
733  * After splitting the blame, the origins used by the
734  * on-stack blame_entry should lose one refcnt each.
735  */
736 static void decref_split(struct blame_entry *split)
738         int i;
740         for (i = 0; i < 3; i++)
741                 origin_decref(split[i].suspect);
744 /*
745  * Helper for blame_chunk().  blame_entry e is known to overlap with
746  * the patch hunk; split it and pass blame to the parent.
747  */
748 static void blame_overlap(struct scoreboard *sb, struct blame_entry *e,
749                           int tlno, int plno, int same,
750                           struct origin *parent)
752         struct blame_entry split[3];
754         split_overlap(split, e, tlno, plno, same, parent);
755         if (split[1].suspect)
756                 split_blame(sb, split, e);
757         decref_split(split);
760 /*
761  * Find the line number of the last line the target is suspected for.
762  */
763 static int find_last_in_target(struct scoreboard *sb, struct origin *target)
765         struct blame_entry *e;
766         int last_in_target = -1;
768         for (e = sb->ent; e; e = e->next) {
769                 if (e->guilty || cmp_suspect(e->suspect, target))
770                         continue;
771                 if (last_in_target < e->s_lno + e->num_lines)
772                         last_in_target = e->s_lno + e->num_lines;
773         }
774         return last_in_target;
777 /*
778  * Process one hunk from the patch between the current suspect for
779  * blame_entry e and its parent.  Find and split the overlap, and
780  * pass blame to the overlapping part to the parent.
781  */
782 static void blame_chunk(struct scoreboard *sb,
783                         int tlno, int plno, int same,
784                         struct origin *target, struct origin *parent)
786         struct blame_entry *e;
788         for (e = sb->ent; e; e = e->next) {
789                 if (e->guilty || cmp_suspect(e->suspect, target))
790                         continue;
791                 if (same <= e->s_lno)
792                         continue;
793                 if (tlno < e->s_lno + e->num_lines)
794                         blame_overlap(sb, e, tlno, plno, same, parent);
795         }
798 /*
799  * We are looking at the origin 'target' and aiming to pass blame
800  * for the lines it is suspected to its parent.  Run diff to find
801  * which lines came from parent and pass blame for them.
802  */
803 static int pass_blame_to_parent(struct scoreboard *sb,
804                                 struct origin *target,
805                                 struct origin *parent)
807         int i, last_in_target, plno, tlno;
808         struct patch *patch;
810         last_in_target = find_last_in_target(sb, target);
811         if (last_in_target < 0)
812                 return 1; /* nothing remains for this target */
814         patch = get_patch(parent, target);
815         plno = tlno = 0;
816         for (i = 0; i < patch->num; i++) {
817                 struct chunk *chunk = &patch->chunks[i];
819                 blame_chunk(sb, tlno, plno, chunk->same, target, parent);
820                 plno = chunk->p_next;
821                 tlno = chunk->t_next;
822         }
823         /* The rest (i.e. anything after tlno) are the same as the parent */
824         blame_chunk(sb, tlno, plno, last_in_target, target, parent);
826         free_patch(patch);
827         return 0;
830 /*
831  * The lines in blame_entry after splitting blames many times can become
832  * very small and trivial, and at some point it becomes pointless to
833  * blame the parents.  E.g. "\t\t}\n\t}\n\n" appears everywhere in any
834  * ordinary C program, and it is not worth to say it was copied from
835  * totally unrelated file in the parent.
836  *
837  * Compute how trivial the lines in the blame_entry are.
838  */
839 static unsigned ent_score(struct scoreboard *sb, struct blame_entry *e)
841         unsigned score;
842         const char *cp, *ep;
844         if (e->score)
845                 return e->score;
847         score = 1;
848         cp = nth_line(sb, e->lno);
849         ep = nth_line(sb, e->lno + e->num_lines);
850         while (cp < ep) {
851                 unsigned ch = *((unsigned char *)cp);
852                 if (isalnum(ch))
853                         score++;
854                 cp++;
855         }
856         e->score = score;
857         return score;
860 /*
861  * best_so_far[] and this[] are both a split of an existing blame_entry
862  * that passes blame to the parent.  Maintain best_so_far the best split
863  * so far, by comparing this and best_so_far and copying this into
864  * bst_so_far as needed.
865  */
866 static void copy_split_if_better(struct scoreboard *sb,
867                                  struct blame_entry *best_so_far,
868                                  struct blame_entry *this)
870         int i;
872         if (!this[1].suspect)
873                 return;
874         if (best_so_far[1].suspect) {
875                 if (ent_score(sb, &this[1]) < ent_score(sb, &best_so_far[1]))
876                         return;
877         }
879         for (i = 0; i < 3; i++)
880                 origin_incref(this[i].suspect);
881         decref_split(best_so_far);
882         memcpy(best_so_far, this, sizeof(struct blame_entry [3]));
885 /*
886  * Find the lines from parent that are the same as ent so that
887  * we can pass blames to it.  file_p has the blob contents for
888  * the parent.
889  */
890 static void find_copy_in_blob(struct scoreboard *sb,
891                               struct blame_entry *ent,
892                               struct origin *parent,
893                               struct blame_entry *split,
894                               mmfile_t *file_p)
896         const char *cp;
897         int cnt;
898         mmfile_t file_o;
899         struct patch *patch;
900         int i, plno, tlno;
902         /*
903          * Prepare mmfile that contains only the lines in ent.
904          */
905         cp = nth_line(sb, ent->lno);
906         file_o.ptr = (char*) cp;
907         cnt = ent->num_lines;
909         while (cnt && cp < sb->final_buf + sb->final_buf_size) {
910                 if (*cp++ == '\n')
911                         cnt--;
912         }
913         file_o.size = cp - file_o.ptr;
915         patch = compare_buffer(file_p, &file_o, 1);
917         memset(split, 0, sizeof(struct blame_entry [3]));
918         plno = tlno = 0;
919         for (i = 0; i < patch->num; i++) {
920                 struct chunk *chunk = &patch->chunks[i];
922                 /* tlno to chunk->same are the same as ent */
923                 if (ent->num_lines <= tlno)
924                         break;
925                 if (tlno < chunk->same) {
926                         struct blame_entry this[3];
927                         split_overlap(this, ent,
928                                       tlno + ent->s_lno, plno,
929                                       chunk->same + ent->s_lno,
930                                       parent);
931                         copy_split_if_better(sb, split, this);
932                         decref_split(this);
933                 }
934                 plno = chunk->p_next;
935                 tlno = chunk->t_next;
936         }
937         free_patch(patch);
940 /*
941  * See if lines currently target is suspected for can be attributed to
942  * parent.
943  */
944 static int find_move_in_parent(struct scoreboard *sb,
945                                struct origin *target,
946                                struct origin *parent)
948         int last_in_target, made_progress;
949         struct blame_entry *e, split[3];
950         mmfile_t file_p;
952         last_in_target = find_last_in_target(sb, target);
953         if (last_in_target < 0)
954                 return 1; /* nothing remains for this target */
956         fill_origin_blob(parent, &file_p);
957         if (!file_p.ptr)
958                 return 0;
960         made_progress = 1;
961         while (made_progress) {
962                 made_progress = 0;
963                 for (e = sb->ent; e; e = e->next) {
964                         if (e->guilty || cmp_suspect(e->suspect, target))
965                                 continue;
966                         find_copy_in_blob(sb, e, parent, split, &file_p);
967                         if (split[1].suspect &&
968                             blame_move_score < ent_score(sb, &split[1])) {
969                                 split_blame(sb, split, e);
970                                 made_progress = 1;
971                         }
972                         decref_split(split);
973                 }
974         }
975         return 0;
978 struct blame_list {
979         struct blame_entry *ent;
980         struct blame_entry split[3];
981 };
983 /*
984  * Count the number of entries the target is suspected for,
985  * and prepare a list of entry and the best split.
986  */
987 static struct blame_list *setup_blame_list(struct scoreboard *sb,
988                                            struct origin *target,
989                                            int *num_ents_p)
991         struct blame_entry *e;
992         int num_ents, i;
993         struct blame_list *blame_list = NULL;
995         for (e = sb->ent, num_ents = 0; e; e = e->next)
996                 if (!e->guilty && !cmp_suspect(e->suspect, target))
997                         num_ents++;
998         if (num_ents) {
999                 blame_list = xcalloc(num_ents, sizeof(struct blame_list));
1000                 for (e = sb->ent, i = 0; e; e = e->next)
1001                         if (!e->guilty && !cmp_suspect(e->suspect, target))
1002                                 blame_list[i++].ent = e;
1003         }
1004         *num_ents_p = num_ents;
1005         return blame_list;
1008 /*
1009  * For lines target is suspected for, see if we can find code movement
1010  * across file boundary from the parent commit.  porigin is the path
1011  * in the parent we already tried.
1012  */
1013 static int find_copy_in_parent(struct scoreboard *sb,
1014                                struct origin *target,
1015                                struct commit *parent,
1016                                struct origin *porigin,
1017                                int opt)
1019         struct diff_options diff_opts;
1020         const char *paths[1];
1021         int i, j;
1022         int retval;
1023         struct blame_list *blame_list;
1024         int num_ents;
1026         blame_list = setup_blame_list(sb, target, &num_ents);
1027         if (!blame_list)
1028                 return 1; /* nothing remains for this target */
1030         diff_setup(&diff_opts);
1031         diff_opts.recursive = 1;
1032         diff_opts.output_format = DIFF_FORMAT_NO_OUTPUT;
1034         paths[0] = NULL;
1035         diff_tree_setup_paths(paths, &diff_opts);
1036         if (diff_setup_done(&diff_opts) < 0)
1037                 die("diff-setup");
1039         /* Try "find copies harder" on new path if requested;
1040          * we do not want to use diffcore_rename() actually to
1041          * match things up; find_copies_harder is set only to
1042          * force diff_tree_sha1() to feed all filepairs to diff_queue,
1043          * and this code needs to be after diff_setup_done(), which
1044          * usually makes find-copies-harder imply copy detection.
1045          */
1046         if ((opt & PICKAXE_BLAME_COPY_HARDER) &&
1047             (!porigin || strcmp(target->path, porigin->path)))
1048                 diff_opts.find_copies_harder = 1;
1050         diff_tree_sha1(parent->tree->object.sha1,
1051                        target->commit->tree->object.sha1,
1052                        "", &diff_opts);
1054         if (!diff_opts.find_copies_harder)
1055                 diffcore_std(&diff_opts);
1057         retval = 0;
1058         while (1) {
1059                 int made_progress = 0;
1061                 for (i = 0; i < diff_queued_diff.nr; i++) {
1062                         struct diff_filepair *p = diff_queued_diff.queue[i];
1063                         struct origin *norigin;
1064                         mmfile_t file_p;
1065                         struct blame_entry this[3];
1067                         if (!DIFF_FILE_VALID(p->one))
1068                                 continue; /* does not exist in parent */
1069                         if (porigin && !strcmp(p->one->path, porigin->path))
1070                                 /* find_move already dealt with this path */
1071                                 continue;
1073                         norigin = get_origin(sb, parent, p->one->path);
1074                         hashcpy(norigin->blob_sha1, p->one->sha1);
1075                         fill_origin_blob(norigin, &file_p);
1076                         if (!file_p.ptr)
1077                                 continue;
1079                         for (j = 0; j < num_ents; j++) {
1080                                 find_copy_in_blob(sb, blame_list[j].ent,
1081                                                   norigin, this, &file_p);
1082                                 copy_split_if_better(sb, blame_list[j].split,
1083                                                      this);
1084                                 decref_split(this);
1085                         }
1086                         origin_decref(norigin);
1087                 }
1089                 for (j = 0; j < num_ents; j++) {
1090                         struct blame_entry *split = blame_list[j].split;
1091                         if (split[1].suspect &&
1092                             blame_copy_score < ent_score(sb, &split[1])) {
1093                                 split_blame(sb, split, blame_list[j].ent);
1094                                 made_progress = 1;
1095                         }
1096                         decref_split(split);
1097                 }
1098                 free(blame_list);
1100                 if (!made_progress)
1101                         break;
1102                 blame_list = setup_blame_list(sb, target, &num_ents);
1103                 if (!blame_list) {
1104                         retval = 1;
1105                         break;
1106                 }
1107         }
1108         diff_flush(&diff_opts);
1110         return retval;
1113 /*
1114  * The blobs of origin and porigin exactly match, so everything
1115  * origin is suspected for can be blamed on the parent.
1116  */
1117 static void pass_whole_blame(struct scoreboard *sb,
1118                              struct origin *origin, struct origin *porigin)
1120         struct blame_entry *e;
1122         if (!porigin->file.ptr && origin->file.ptr) {
1123                 /* Steal its file */
1124                 porigin->file = origin->file;
1125                 origin->file.ptr = NULL;
1126         }
1127         for (e = sb->ent; e; e = e->next) {
1128                 if (cmp_suspect(e->suspect, origin))
1129                         continue;
1130                 origin_incref(porigin);
1131                 origin_decref(e->suspect);
1132                 e->suspect = porigin;
1133         }
1136 #define MAXPARENT 16
1138 static void pass_blame(struct scoreboard *sb, struct origin *origin, int opt)
1140         int i, pass;
1141         struct commit *commit = origin->commit;
1142         struct commit_list *parent;
1143         struct origin *parent_origin[MAXPARENT], *porigin;
1145         memset(parent_origin, 0, sizeof(parent_origin));
1147         /* The first pass looks for unrenamed path to optimize for
1148          * common cases, then we look for renames in the second pass.
1149          */
1150         for (pass = 0; pass < 2; pass++) {
1151                 struct origin *(*find)(struct scoreboard *,
1152                                        struct commit *, struct origin *);
1153                 find = pass ? find_rename : find_origin;
1155                 for (i = 0, parent = commit->parents;
1156                      i < MAXPARENT && parent;
1157                      parent = parent->next, i++) {
1158                         struct commit *p = parent->item;
1159                         int j, same;
1161                         if (parent_origin[i])
1162                                 continue;
1163                         if (parse_commit(p))
1164                                 continue;
1165                         porigin = find(sb, p, origin);
1166                         if (!porigin)
1167                                 continue;
1168                         if (!hashcmp(porigin->blob_sha1, origin->blob_sha1)) {
1169                                 pass_whole_blame(sb, origin, porigin);
1170                                 origin_decref(porigin);
1171                                 goto finish;
1172                         }
1173                         for (j = same = 0; j < i; j++)
1174                                 if (parent_origin[j] &&
1175                                     !hashcmp(parent_origin[j]->blob_sha1,
1176                                              porigin->blob_sha1)) {
1177                                         same = 1;
1178                                         break;
1179                                 }
1180                         if (!same)
1181                                 parent_origin[i] = porigin;
1182                         else
1183                                 origin_decref(porigin);
1184                 }
1185         }
1187         num_commits++;
1188         for (i = 0, parent = commit->parents;
1189              i < MAXPARENT && parent;
1190              parent = parent->next, i++) {
1191                 struct origin *porigin = parent_origin[i];
1192                 if (!porigin)
1193                         continue;
1194                 if (pass_blame_to_parent(sb, origin, porigin))
1195                         goto finish;
1196         }
1198         /*
1199          * Optionally find moves in parents' files.
1200          */
1201         if (opt & PICKAXE_BLAME_MOVE)
1202                 for (i = 0, parent = commit->parents;
1203                      i < MAXPARENT && parent;
1204                      parent = parent->next, i++) {
1205                         struct origin *porigin = parent_origin[i];
1206                         if (!porigin)
1207                                 continue;
1208                         if (find_move_in_parent(sb, origin, porigin))
1209                                 goto finish;
1210                 }
1212         /*
1213          * Optionally find copies from parents' files.
1214          */
1215         if (opt & PICKAXE_BLAME_COPY)
1216                 for (i = 0, parent = commit->parents;
1217                      i < MAXPARENT && parent;
1218                      parent = parent->next, i++) {
1219                         struct origin *porigin = parent_origin[i];
1220                         if (find_copy_in_parent(sb, origin, parent->item,
1221                                                 porigin, opt))
1222                                 goto finish;
1223                 }
1225  finish:
1226         for (i = 0; i < MAXPARENT; i++)
1227                 origin_decref(parent_origin[i]);
1230 /*
1231  * Information on commits, used for output.
1232  */
1233 struct commit_info
1235         char *author;
1236         char *author_mail;
1237         unsigned long author_time;
1238         char *author_tz;
1240         /* filled only when asked for details */
1241         char *committer;
1242         char *committer_mail;
1243         unsigned long committer_time;
1244         char *committer_tz;
1246         char *summary;
1247 };
1249 /*
1250  * Parse author/committer line in the commit object buffer
1251  */
1252 static void get_ac_line(const char *inbuf, const char *what,
1253                         int bufsz, char *person, char **mail,
1254                         unsigned long *time, char **tz)
1256         int len;
1257         char *tmp, *endp;
1259         tmp = strstr(inbuf, what);
1260         if (!tmp)
1261                 goto error_out;
1262         tmp += strlen(what);
1263         endp = strchr(tmp, '\n');
1264         if (!endp)
1265                 len = strlen(tmp);
1266         else
1267                 len = endp - tmp;
1268         if (bufsz <= len) {
1269         error_out:
1270                 /* Ugh */
1271                 person = *mail = *tz = "(unknown)";
1272                 *time = 0;
1273                 return;
1274         }
1275         memcpy(person, tmp, len);
1277         tmp = person;
1278         tmp += len;
1279         *tmp = 0;
1280         while (*tmp != ' ')
1281                 tmp--;
1282         *tz = tmp+1;
1284         *tmp = 0;
1285         while (*tmp != ' ')
1286                 tmp--;
1287         *time = strtoul(tmp, NULL, 10);
1289         *tmp = 0;
1290         while (*tmp != ' ')
1291                 tmp--;
1292         *mail = tmp + 1;
1293         *tmp = 0;
1296 static void get_commit_info(struct commit *commit,
1297                             struct commit_info *ret,
1298                             int detailed)
1300         int len;
1301         char *tmp, *endp;
1302         static char author_buf[1024];
1303         static char committer_buf[1024];
1304         static char summary_buf[1024];
1306         /*
1307          * We've operated without save_commit_buffer, so
1308          * we now need to populate them for output.
1309          */
1310         if (!commit->buffer) {
1311                 char type[20];
1312                 unsigned long size;
1313                 commit->buffer =
1314                         read_sha1_file(commit->object.sha1, type, &size);
1315         }
1316         ret->author = author_buf;
1317         get_ac_line(commit->buffer, "\nauthor ",
1318                     sizeof(author_buf), author_buf, &ret->author_mail,
1319                     &ret->author_time, &ret->author_tz);
1321         if (!detailed)
1322                 return;
1324         ret->committer = committer_buf;
1325         get_ac_line(commit->buffer, "\ncommitter ",
1326                     sizeof(committer_buf), committer_buf, &ret->committer_mail,
1327                     &ret->committer_time, &ret->committer_tz);
1329         ret->summary = summary_buf;
1330         tmp = strstr(commit->buffer, "\n\n");
1331         if (!tmp) {
1332         error_out:
1333                 sprintf(summary_buf, "(%s)", sha1_to_hex(commit->object.sha1));
1334                 return;
1335         }
1336         tmp += 2;
1337         endp = strchr(tmp, '\n');
1338         if (!endp)
1339                 goto error_out;
1340         len = endp - tmp;
1341         if (len >= sizeof(summary_buf))
1342                 goto error_out;
1343         memcpy(summary_buf, tmp, len);
1344         summary_buf[len] = 0;
1347 /*
1348  * To allow LF and other nonportable characters in pathnames,
1349  * they are c-style quoted as needed.
1350  */
1351 static void write_filename_info(const char *path)
1353         printf("filename ");
1354         write_name_quoted(NULL, 0, path, 1, stdout);
1355         putchar('\n');
1358 /*
1359  * The blame_entry is found to be guilty for the range.  Mark it
1360  * as such, and show it in incremental output.
1361  */
1362 static void found_guilty_entry(struct blame_entry *ent)
1364         if (ent->guilty)
1365                 return;
1366         ent->guilty = 1;
1367         if (incremental) {
1368                 struct origin *suspect = ent->suspect;
1370                 printf("%s %d %d %d\n",
1371                        sha1_to_hex(suspect->commit->object.sha1),
1372                        ent->s_lno + 1, ent->lno + 1, ent->num_lines);
1373                 if (!(suspect->commit->object.flags & METAINFO_SHOWN)) {
1374                         struct commit_info ci;
1375                         suspect->commit->object.flags |= METAINFO_SHOWN;
1376                         get_commit_info(suspect->commit, &ci, 1);
1377                         printf("author %s\n", ci.author);
1378                         printf("author-mail %s\n", ci.author_mail);
1379                         printf("author-time %lu\n", ci.author_time);
1380                         printf("author-tz %s\n", ci.author_tz);
1381                         printf("committer %s\n", ci.committer);
1382                         printf("committer-mail %s\n", ci.committer_mail);
1383                         printf("committer-time %lu\n", ci.committer_time);
1384                         printf("committer-tz %s\n", ci.committer_tz);
1385                         printf("summary %s\n", ci.summary);
1386                         if (suspect->commit->object.flags & UNINTERESTING)
1387                                 printf("boundary\n");
1388                 }
1389                 write_filename_info(suspect->path);
1390         }
1393 /*
1394  * The main loop -- while the scoreboard has lines whose true origin
1395  * is still unknown, pick one brame_entry, and allow its current
1396  * suspect to pass blames to its parents.
1397  */
1398 static void assign_blame(struct scoreboard *sb, struct rev_info *revs, int opt)
1400         while (1) {
1401                 struct blame_entry *ent;
1402                 struct commit *commit;
1403                 struct origin *suspect = NULL;
1405                 /* find one suspect to break down */
1406                 for (ent = sb->ent; !suspect && ent; ent = ent->next)
1407                         if (!ent->guilty)
1408                                 suspect = ent->suspect;
1409                 if (!suspect)
1410                         return; /* all done */
1412                 /*
1413                  * We will use this suspect later in the loop,
1414                  * so hold onto it in the meantime.
1415                  */
1416                 origin_incref(suspect);
1417                 commit = suspect->commit;
1418                 if (!commit->object.parsed)
1419                         parse_commit(commit);
1420                 if (!(commit->object.flags & UNINTERESTING) &&
1421                     !(revs->max_age != -1 && commit->date < revs->max_age))
1422                         pass_blame(sb, suspect, opt);
1423                 else {
1424                         commit->object.flags |= UNINTERESTING;
1425                         if (commit->object.parsed)
1426                                 mark_parents_uninteresting(commit);
1427                 }
1428                 /* treat root commit as boundary */
1429                 if (!commit->parents && !show_root)
1430                         commit->object.flags |= UNINTERESTING;
1432                 /* Take responsibility for the remaining entries */
1433                 for (ent = sb->ent; ent; ent = ent->next)
1434                         if (!cmp_suspect(ent->suspect, suspect))
1435                                 found_guilty_entry(ent);
1436                 origin_decref(suspect);
1438                 if (DEBUG) /* sanity */
1439                         sanity_check_refcnt(sb);
1440         }
1443 static const char *format_time(unsigned long time, const char *tz_str,
1444                                int show_raw_time)
1446         static char time_buf[128];
1447         time_t t = time;
1448         int minutes, tz;
1449         struct tm *tm;
1451         if (show_raw_time) {
1452                 sprintf(time_buf, "%lu %s", time, tz_str);
1453                 return time_buf;
1454         }
1456         tz = atoi(tz_str);
1457         minutes = tz < 0 ? -tz : tz;
1458         minutes = (minutes / 100)*60 + (minutes % 100);
1459         minutes = tz < 0 ? -minutes : minutes;
1460         t = time + minutes * 60;
1461         tm = gmtime(&t);
1463         strftime(time_buf, sizeof(time_buf), "%Y-%m-%d %H:%M:%S ", tm);
1464         strcat(time_buf, tz_str);
1465         return time_buf;
1468 #define OUTPUT_ANNOTATE_COMPAT  001
1469 #define OUTPUT_LONG_OBJECT_NAME 002
1470 #define OUTPUT_RAW_TIMESTAMP    004
1471 #define OUTPUT_PORCELAIN        010
1472 #define OUTPUT_SHOW_NAME        020
1473 #define OUTPUT_SHOW_NUMBER      040
1474 #define OUTPUT_SHOW_SCORE      0100
1476 static void emit_porcelain(struct scoreboard *sb, struct blame_entry *ent)
1478         int cnt;
1479         const char *cp;
1480         struct origin *suspect = ent->suspect;
1481         char hex[41];
1483         strcpy(hex, sha1_to_hex(suspect->commit->object.sha1));
1484         printf("%s%c%d %d %d\n",
1485                hex,
1486                ent->guilty ? ' ' : '*', // purely for debugging
1487                ent->s_lno + 1,
1488                ent->lno + 1,
1489                ent->num_lines);
1490         if (!(suspect->commit->object.flags & METAINFO_SHOWN)) {
1491                 struct commit_info ci;
1492                 suspect->commit->object.flags |= METAINFO_SHOWN;
1493                 get_commit_info(suspect->commit, &ci, 1);
1494                 printf("author %s\n", ci.author);
1495                 printf("author-mail %s\n", ci.author_mail);
1496                 printf("author-time %lu\n", ci.author_time);
1497                 printf("author-tz %s\n", ci.author_tz);
1498                 printf("committer %s\n", ci.committer);
1499                 printf("committer-mail %s\n", ci.committer_mail);
1500                 printf("committer-time %lu\n", ci.committer_time);
1501                 printf("committer-tz %s\n", ci.committer_tz);
1502                 write_filename_info(suspect->path);
1503                 printf("summary %s\n", ci.summary);
1504                 if (suspect->commit->object.flags & UNINTERESTING)
1505                         printf("boundary\n");
1506         }
1507         else if (suspect->commit->object.flags & MORE_THAN_ONE_PATH)
1508                 write_filename_info(suspect->path);
1510         cp = nth_line(sb, ent->lno);
1511         for (cnt = 0; cnt < ent->num_lines; cnt++) {
1512                 char ch;
1513                 if (cnt)
1514                         printf("%s %d %d\n", hex,
1515                                ent->s_lno + 1 + cnt,
1516                                ent->lno + 1 + cnt);
1517                 putchar('\t');
1518                 do {
1519                         ch = *cp++;
1520                         putchar(ch);
1521                 } while (ch != '\n' &&
1522                          cp < sb->final_buf + sb->final_buf_size);
1523         }
1526 static void emit_other(struct scoreboard *sb, struct blame_entry *ent, int opt)
1528         int cnt;
1529         const char *cp;
1530         struct origin *suspect = ent->suspect;
1531         struct commit_info ci;
1532         char hex[41];
1533         int show_raw_time = !!(opt & OUTPUT_RAW_TIMESTAMP);
1535         get_commit_info(suspect->commit, &ci, 1);
1536         strcpy(hex, sha1_to_hex(suspect->commit->object.sha1));
1538         cp = nth_line(sb, ent->lno);
1539         for (cnt = 0; cnt < ent->num_lines; cnt++) {
1540                 char ch;
1541                 int length = (opt & OUTPUT_LONG_OBJECT_NAME) ? 40 : 8;
1543                 if (suspect->commit->object.flags & UNINTERESTING) {
1544                         if (!blank_boundary) {
1545                                 length--;
1546                                 putchar('^');
1547                         }
1548                         else
1549                                 memset(hex, ' ', length);
1550                 }
1552                 printf("%.*s", length, hex);
1553                 if (opt & OUTPUT_ANNOTATE_COMPAT)
1554                         printf("\t(%10s\t%10s\t%d)", ci.author,
1555                                format_time(ci.author_time, ci.author_tz,
1556                                            show_raw_time),
1557                                ent->lno + 1 + cnt);
1558                 else {
1559                         if (opt & OUTPUT_SHOW_SCORE)
1560                                 printf(" %*d %02d",
1561                                        max_score_digits, ent->score,
1562                                        ent->suspect->refcnt);
1563                         if (opt & OUTPUT_SHOW_NAME)
1564                                 printf(" %-*.*s", longest_file, longest_file,
1565                                        suspect->path);
1566                         if (opt & OUTPUT_SHOW_NUMBER)
1567                                 printf(" %*d", max_orig_digits,
1568                                        ent->s_lno + 1 + cnt);
1569                         printf(" (%-*.*s %10s %*d) ",
1570                                longest_author, longest_author, ci.author,
1571                                format_time(ci.author_time, ci.author_tz,
1572                                            show_raw_time),
1573                                max_digits, ent->lno + 1 + cnt);
1574                 }
1575                 do {
1576                         ch = *cp++;
1577                         putchar(ch);
1578                 } while (ch != '\n' &&
1579                          cp < sb->final_buf + sb->final_buf_size);
1580         }
1583 static void output(struct scoreboard *sb, int option)
1585         struct blame_entry *ent;
1587         if (option & OUTPUT_PORCELAIN) {
1588                 for (ent = sb->ent; ent; ent = ent->next) {
1589                         struct blame_entry *oth;
1590                         struct origin *suspect = ent->suspect;
1591                         struct commit *commit = suspect->commit;
1592                         if (commit->object.flags & MORE_THAN_ONE_PATH)
1593                                 continue;
1594                         for (oth = ent->next; oth; oth = oth->next) {
1595                                 if ((oth->suspect->commit != commit) ||
1596                                     !strcmp(oth->suspect->path, suspect->path))
1597                                         continue;
1598                                 commit->object.flags |= MORE_THAN_ONE_PATH;
1599                                 break;
1600                         }
1601                 }
1602         }
1604         for (ent = sb->ent; ent; ent = ent->next) {
1605                 if (option & OUTPUT_PORCELAIN)
1606                         emit_porcelain(sb, ent);
1607                 else {
1608                         emit_other(sb, ent, option);
1609                 }
1610         }
1613 /*
1614  * To allow quick access to the contents of nth line in the
1615  * final image, prepare an index in the scoreboard.
1616  */
1617 static int prepare_lines(struct scoreboard *sb)
1619         const char *buf = sb->final_buf;
1620         unsigned long len = sb->final_buf_size;
1621         int num = 0, incomplete = 0, bol = 1;
1623         if (len && buf[len-1] != '\n')
1624                 incomplete++; /* incomplete line at the end */
1625         while (len--) {
1626                 if (bol) {
1627                         sb->lineno = xrealloc(sb->lineno,
1628                                               sizeof(int* ) * (num + 1));
1629                         sb->lineno[num] = buf - sb->final_buf;
1630                         bol = 0;
1631                 }
1632                 if (*buf++ == '\n') {
1633                         num++;
1634                         bol = 1;
1635                 }
1636         }
1637         sb->lineno = xrealloc(sb->lineno,
1638                               sizeof(int* ) * (num + incomplete + 1));
1639         sb->lineno[num + incomplete] = buf - sb->final_buf;
1640         sb->num_lines = num + incomplete;
1641         return sb->num_lines;
1644 /*
1645  * Add phony grafts for use with -S; this is primarily to
1646  * support git-cvsserver that wants to give a linear history
1647  * to its clients.
1648  */
1649 static int read_ancestry(const char *graft_file)
1651         FILE *fp = fopen(graft_file, "r");
1652         char buf[1024];
1653         if (!fp)
1654                 return -1;
1655         while (fgets(buf, sizeof(buf), fp)) {
1656                 /* The format is just "Commit Parent1 Parent2 ...\n" */
1657                 int len = strlen(buf);
1658                 struct commit_graft *graft = read_graft_line(buf, len);
1659                 if (graft)
1660                         register_commit_graft(graft, 0);
1661         }
1662         fclose(fp);
1663         return 0;
1666 /*
1667  * How many columns do we need to show line numbers in decimal?
1668  */
1669 static int lineno_width(int lines)
1671         int i, width;
1673         for (width = 1, i = 10; i <= lines + 1; width++)
1674                 i *= 10;
1675         return width;
1678 /*
1679  * How many columns do we need to show line numbers, authors,
1680  * and filenames?
1681  */
1682 static void find_alignment(struct scoreboard *sb, int *option)
1684         int longest_src_lines = 0;
1685         int longest_dst_lines = 0;
1686         unsigned largest_score = 0;
1687         struct blame_entry *e;
1689         for (e = sb->ent; e; e = e->next) {
1690                 struct origin *suspect = e->suspect;
1691                 struct commit_info ci;
1692                 int num;
1694                 if (strcmp(suspect->path, sb->path))
1695                         *option |= OUTPUT_SHOW_NAME;
1696                 num = strlen(suspect->path);
1697                 if (longest_file < num)
1698                         longest_file = num;
1699                 if (!(suspect->commit->object.flags & METAINFO_SHOWN)) {
1700                         suspect->commit->object.flags |= METAINFO_SHOWN;
1701                         get_commit_info(suspect->commit, &ci, 1);
1702                         num = strlen(ci.author);
1703                         if (longest_author < num)
1704                                 longest_author = num;
1705                 }
1706                 num = e->s_lno + e->num_lines;
1707                 if (longest_src_lines < num)
1708                         longest_src_lines = num;
1709                 num = e->lno + e->num_lines;
1710                 if (longest_dst_lines < num)
1711                         longest_dst_lines = num;
1712                 if (largest_score < ent_score(sb, e))
1713                         largest_score = ent_score(sb, e);
1714         }
1715         max_orig_digits = lineno_width(longest_src_lines);
1716         max_digits = lineno_width(longest_dst_lines);
1717         max_score_digits = lineno_width(largest_score);
1720 /*
1721  * For debugging -- origin is refcounted, and this asserts that
1722  * we do not underflow.
1723  */
1724 static void sanity_check_refcnt(struct scoreboard *sb)
1726         int baa = 0;
1727         struct blame_entry *ent;
1729         for (ent = sb->ent; ent; ent = ent->next) {
1730                 /* Nobody should have zero or negative refcnt */
1731                 if (ent->suspect->refcnt <= 0) {
1732                         fprintf(stderr, "%s in %s has negative refcnt %d\n",
1733                                 ent->suspect->path,
1734                                 sha1_to_hex(ent->suspect->commit->object.sha1),
1735                                 ent->suspect->refcnt);
1736                         baa = 1;
1737                 }
1738         }
1739         for (ent = sb->ent; ent; ent = ent->next) {
1740                 /* Mark the ones that haven't been checked */
1741                 if (0 < ent->suspect->refcnt)
1742                         ent->suspect->refcnt = -ent->suspect->refcnt;
1743         }
1744         for (ent = sb->ent; ent; ent = ent->next) {
1745                 /*
1746                  * ... then pick each and see if they have the the
1747                  * correct refcnt.
1748                  */
1749                 int found;
1750                 struct blame_entry *e;
1751                 struct origin *suspect = ent->suspect;
1753                 if (0 < suspect->refcnt)
1754                         continue;
1755                 suspect->refcnt = -suspect->refcnt; /* Unmark */
1756                 for (found = 0, e = sb->ent; e; e = e->next) {
1757                         if (e->suspect != suspect)
1758                                 continue;
1759                         found++;
1760                 }
1761                 if (suspect->refcnt != found) {
1762                         fprintf(stderr, "%s in %s has refcnt %d, not %d\n",
1763                                 ent->suspect->path,
1764                                 sha1_to_hex(ent->suspect->commit->object.sha1),
1765                                 ent->suspect->refcnt, found);
1766                         baa = 2;
1767                 }
1768         }
1769         if (baa) {
1770                 int opt = 0160;
1771                 find_alignment(sb, &opt);
1772                 output(sb, opt);
1773                 die("Baa %d!", baa);
1774         }
1777 /*
1778  * Used for the command line parsing; check if the path exists
1779  * in the working tree.
1780  */
1781 static int has_path_in_work_tree(const char *path)
1783         struct stat st;
1784         return !lstat(path, &st);
1787 static unsigned parse_score(const char *arg)
1789         char *end;
1790         unsigned long score = strtoul(arg, &end, 10);
1791         if (*end)
1792                 return 0;
1793         return score;
1796 static const char *add_prefix(const char *prefix, const char *path)
1798         if (!prefix || !prefix[0])
1799                 return path;
1800         return prefix_path(prefix, strlen(prefix), path);
1803 /*
1804  * Parsing of (comma separated) one item in the -L option
1805  */
1806 static const char *parse_loc(const char *spec,
1807                              struct scoreboard *sb, long lno,
1808                              long begin, long *ret)
1810         char *term;
1811         const char *line;
1812         long num;
1813         int reg_error;
1814         regex_t regexp;
1815         regmatch_t match[1];
1817         /* Allow "-L <something>,+20" to mean starting at <something>
1818          * for 20 lines, or "-L <something>,-5" for 5 lines ending at
1819          * <something>.
1820          */
1821         if (1 < begin && (spec[0] == '+' || spec[0] == '-')) {
1822                 num = strtol(spec + 1, &term, 10);
1823                 if (term != spec + 1) {
1824                         if (spec[0] == '-')
1825                                 num = 0 - num;
1826                         if (0 < num)
1827                                 *ret = begin + num - 2;
1828                         else if (!num)
1829                                 *ret = begin;
1830                         else
1831                                 *ret = begin + num;
1832                         return term;
1833                 }
1834                 return spec;
1835         }
1836         num = strtol(spec, &term, 10);
1837         if (term != spec) {
1838                 *ret = num;
1839                 return term;
1840         }
1841         if (spec[0] != '/')
1842                 return spec;
1844         /* it could be a regexp of form /.../ */
1845         for (term = (char*) spec + 1; *term && *term != '/'; term++) {
1846                 if (*term == '\\')
1847                         term++;
1848         }
1849         if (*term != '/')
1850                 return spec;
1852         /* try [spec+1 .. term-1] as regexp */
1853         *term = 0;
1854         begin--; /* input is in human terms */
1855         line = nth_line(sb, begin);
1857         if (!(reg_error = regcomp(&regexp, spec + 1, REG_NEWLINE)) &&
1858             !(reg_error = regexec(&regexp, line, 1, match, 0))) {
1859                 const char *cp = line + match[0].rm_so;
1860                 const char *nline;
1862                 while (begin++ < lno) {
1863                         nline = nth_line(sb, begin);
1864                         if (line <= cp && cp < nline)
1865                                 break;
1866                         line = nline;
1867                 }
1868                 *ret = begin;
1869                 regfree(&regexp);
1870                 *term++ = '/';
1871                 return term;
1872         }
1873         else {
1874                 char errbuf[1024];
1875                 regerror(reg_error, &regexp, errbuf, 1024);
1876                 die("-L parameter '%s': %s", spec + 1, errbuf);
1877         }
1880 /*
1881  * Parsing of -L option
1882  */
1883 static void prepare_blame_range(struct scoreboard *sb,
1884                                 const char *bottomtop,
1885                                 long lno,
1886                                 long *bottom, long *top)
1888         const char *term;
1890         term = parse_loc(bottomtop, sb, lno, 1, bottom);
1891         if (*term == ',') {
1892                 term = parse_loc(term + 1, sb, lno, *bottom + 1, top);
1893                 if (*term)
1894                         usage(blame_usage);
1895         }
1896         if (*term)
1897                 usage(blame_usage);
1900 static int git_blame_config(const char *var, const char *value)
1902         if (!strcmp(var, "blame.showroot")) {
1903                 show_root = git_config_bool(var, value);
1904                 return 0;
1905         }
1906         if (!strcmp(var, "blame.blankboundary")) {
1907                 blank_boundary = git_config_bool(var, value);
1908                 return 0;
1909         }
1910         return git_default_config(var, value);
1913 int cmd_blame(int argc, const char **argv, const char *prefix)
1915         struct rev_info revs;
1916         const char *path;
1917         struct scoreboard sb;
1918         struct origin *o;
1919         struct blame_entry *ent;
1920         int i, seen_dashdash, unk, opt;
1921         long bottom, top, lno;
1922         int output_option = 0;
1923         const char *revs_file = NULL;
1924         const char *final_commit_name = NULL;
1925         char type[10];
1926         const char *bottomtop = NULL;
1928         git_config(git_blame_config);
1929         save_commit_buffer = 0;
1931         opt = 0;
1932         seen_dashdash = 0;
1933         for (unk = i = 1; i < argc; i++) {
1934                 const char *arg = argv[i];
1935                 if (*arg != '-')
1936                         break;
1937                 else if (!strcmp("-b", arg))
1938                         blank_boundary = 1;
1939                 else if (!strcmp("--root", arg))
1940                         show_root = 1;
1941                 else if (!strcmp("-c", arg))
1942                         output_option |= OUTPUT_ANNOTATE_COMPAT;
1943                 else if (!strcmp("-t", arg))
1944                         output_option |= OUTPUT_RAW_TIMESTAMP;
1945                 else if (!strcmp("-l", arg))
1946                         output_option |= OUTPUT_LONG_OBJECT_NAME;
1947                 else if (!strcmp("-S", arg) && ++i < argc)
1948                         revs_file = argv[i];
1949                 else if (!strncmp("-M", arg, 2)) {
1950                         opt |= PICKAXE_BLAME_MOVE;
1951                         blame_move_score = parse_score(arg+2);
1952                 }
1953                 else if (!strncmp("-C", arg, 2)) {
1954                         if (opt & PICKAXE_BLAME_COPY)
1955                                 opt |= PICKAXE_BLAME_COPY_HARDER;
1956                         opt |= PICKAXE_BLAME_COPY | PICKAXE_BLAME_MOVE;
1957                         blame_copy_score = parse_score(arg+2);
1958                 }
1959                 else if (!strncmp("-L", arg, 2)) {
1960                         if (!arg[2]) {
1961                                 if (++i >= argc)
1962                                         usage(blame_usage);
1963                                 arg = argv[i];
1964                         }
1965                         else
1966                                 arg += 2;
1967                         if (bottomtop)
1968                                 die("More than one '-L n,m' option given");
1969                         bottomtop = arg;
1970                 }
1971                 else if (!strcmp("--incremental", arg))
1972                         incremental = 1;
1973                 else if (!strcmp("--score-debug", arg))
1974                         output_option |= OUTPUT_SHOW_SCORE;
1975                 else if (!strcmp("-f", arg) ||
1976                          !strcmp("--show-name", arg))
1977                         output_option |= OUTPUT_SHOW_NAME;
1978                 else if (!strcmp("-n", arg) ||
1979                          !strcmp("--show-number", arg))
1980                         output_option |= OUTPUT_SHOW_NUMBER;
1981                 else if (!strcmp("-p", arg) ||
1982                          !strcmp("--porcelain", arg))
1983                         output_option |= OUTPUT_PORCELAIN;
1984                 else if (!strcmp("--", arg)) {
1985                         seen_dashdash = 1;
1986                         i++;
1987                         break;
1988                 }
1989                 else
1990                         argv[unk++] = arg;
1991         }
1993         if (!incremental)
1994                 setup_pager();
1996         if (!blame_move_score)
1997                 blame_move_score = BLAME_DEFAULT_MOVE_SCORE;
1998         if (!blame_copy_score)
1999                 blame_copy_score = BLAME_DEFAULT_COPY_SCORE;
2001         /*
2002          * We have collected options unknown to us in argv[1..unk]
2003          * which are to be passed to revision machinery if we are
2004          * going to do the "bottom" procesing.
2005          *
2006          * The remaining are:
2007          *
2008          * (1) if seen_dashdash, its either
2009          *     "-options -- <path>" or
2010          *     "-options -- <path> <rev>".
2011          *     but the latter is allowed only if there is no
2012          *     options that we passed to revision machinery.
2013          *
2014          * (2) otherwise, we may have "--" somewhere later and
2015          *     might be looking at the first one of multiple 'rev'
2016          *     parameters (e.g. " master ^next ^maint -- path").
2017          *     See if there is a dashdash first, and give the
2018          *     arguments before that to revision machinery.
2019          *     After that there must be one 'path'.
2020          *
2021          * (3) otherwise, its one of the three:
2022          *     "-options <path> <rev>"
2023          *     "-options <rev> <path>"
2024          *     "-options <path>"
2025          *     but again the first one is allowed only if
2026          *     there is no options that we passed to revision
2027          *     machinery.
2028          */
2030         if (seen_dashdash) {
2031                 /* (1) */
2032                 if (argc <= i)
2033                         usage(blame_usage);
2034                 path = add_prefix(prefix, argv[i]);
2035                 if (i + 1 == argc - 1) {
2036                         if (unk != 1)
2037                                 usage(blame_usage);
2038                         argv[unk++] = argv[i + 1];
2039                 }
2040                 else if (i + 1 != argc)
2041                         /* garbage at end */
2042                         usage(blame_usage);
2043         }
2044         else {
2045                 int j;
2046                 for (j = i; !seen_dashdash && j < argc; j++)
2047                         if (!strcmp(argv[j], "--"))
2048                                 seen_dashdash = j;
2049                 if (seen_dashdash) {
2050                         if (seen_dashdash + 1 != argc - 1)
2051                                 usage(blame_usage);
2052                         path = add_prefix(prefix, argv[seen_dashdash + 1]);
2053                         for (j = i; j < seen_dashdash; j++)
2054                                 argv[unk++] = argv[j];
2055                 }
2056                 else {
2057                         /* (3) */
2058                         path = add_prefix(prefix, argv[i]);
2059                         if (i + 1 == argc - 1) {
2060                                 final_commit_name = argv[i + 1];
2062                                 /* if (unk == 1) we could be getting
2063                                  * old-style
2064                                  */
2065                                 if (unk == 1 && !has_path_in_work_tree(path)) {
2066                                         path = add_prefix(prefix, argv[i + 1]);
2067                                         final_commit_name = argv[i];
2068                                 }
2069                         }
2070                         else if (i != argc - 1)
2071                                 usage(blame_usage); /* garbage at end */
2073                         if (!has_path_in_work_tree(path))
2074                                 die("cannot stat path %s: %s",
2075                                     path, strerror(errno));
2076                 }
2077         }
2079         if (final_commit_name)
2080                 argv[unk++] = final_commit_name;
2082         /*
2083          * Now we got rev and path.  We do not want the path pruning
2084          * but we may want "bottom" processing.
2085          */
2086         argv[unk++] = "--"; /* terminate the rev name */
2087         argv[unk] = NULL;
2089         init_revisions(&revs, NULL);
2090         setup_revisions(unk, argv, &revs, "HEAD");
2091         memset(&sb, 0, sizeof(sb));
2093         /*
2094          * There must be one and only one positive commit in the
2095          * revs->pending array.
2096          */
2097         for (i = 0; i < revs.pending.nr; i++) {
2098                 struct object *obj = revs.pending.objects[i].item;
2099                 if (obj->flags & UNINTERESTING)
2100                         continue;
2101                 while (obj->type == OBJ_TAG)
2102                         obj = deref_tag(obj, NULL, 0);
2103                 if (obj->type != OBJ_COMMIT)
2104                         die("Non commit %s?",
2105                             revs.pending.objects[i].name);
2106                 if (sb.final)
2107                         die("More than one commit to dig from %s and %s?",
2108                             revs.pending.objects[i].name,
2109                             final_commit_name);
2110                 sb.final = (struct commit *) obj;
2111                 final_commit_name = revs.pending.objects[i].name;
2112         }
2114         if (!sb.final) {
2115                 /*
2116                  * "--not A B -- path" without anything positive;
2117                  * default to HEAD.
2118                  */
2119                 unsigned char head_sha1[20];
2121                 final_commit_name = "HEAD";
2122                 if (get_sha1(final_commit_name, head_sha1))
2123                         die("No such ref: HEAD");
2124                 sb.final = lookup_commit_reference(head_sha1);
2125                 add_pending_object(&revs, &(sb.final->object), "HEAD");
2126         }
2128         /*
2129          * If we have bottom, this will mark the ancestors of the
2130          * bottom commits we would reach while traversing as
2131          * uninteresting.
2132          */
2133         prepare_revision_walk(&revs);
2135         o = get_origin(&sb, sb.final, path);
2136         if (fill_blob_sha1(o))
2137                 die("no such path %s in %s", path, final_commit_name);
2139         sb.final_buf = read_sha1_file(o->blob_sha1, type, &sb.final_buf_size);
2140         num_read_blob++;
2141         lno = prepare_lines(&sb);
2143         bottom = top = 0;
2144         if (bottomtop)
2145                 prepare_blame_range(&sb, bottomtop, lno, &bottom, &top);
2146         if (bottom && top && top < bottom) {
2147                 long tmp;
2148                 tmp = top; top = bottom; bottom = tmp;
2149         }
2150         if (bottom < 1)
2151                 bottom = 1;
2152         if (top < 1)
2153                 top = lno;
2154         bottom--;
2155         if (lno < top)
2156                 die("file %s has only %lu lines", path, lno);
2158         ent = xcalloc(1, sizeof(*ent));
2159         ent->lno = bottom;
2160         ent->num_lines = top - bottom;
2161         ent->suspect = o;
2162         ent->s_lno = bottom;
2164         sb.ent = ent;
2165         sb.path = path;
2167         if (revs_file && read_ancestry(revs_file))
2168                 die("reading graft file %s failed: %s",
2169                     revs_file, strerror(errno));
2171         assign_blame(&sb, &revs, opt);
2173         if (incremental)
2174                 return 0;
2176         coalesce(&sb);
2178         if (!(output_option & OUTPUT_PORCELAIN))
2179                 find_alignment(&sb, &output_option);
2181         output(&sb, output_option);
2182         free((void *)sb.final_buf);
2183         for (ent = sb.ent; ent; ) {
2184                 struct blame_entry *e = ent->next;
2185                 free(ent);
2186                 ent = e;
2187         }
2189         if (DEBUG) {
2190                 printf("num read blob: %d\n", num_read_blob);
2191                 printf("num get patch: %d\n", num_get_patch);
2192                 printf("num commits: %d\n", num_commits);
2193         }
2194         return 0;