X-Git-Url: https://git.tokkee.org/?a=blobdiff_plain;f=diffcore-rename.c;h=0ec488a9033ea2d4514dab450d9e20581782ddbb;hb=6fe5b7ff6cafcc94415deba2f3d611770d8e6b1e;hp=625b589fb728e5ae94b3b13d419b43c30347508d;hpb=6c2711e70cc070559537a44f4975add451ee53f2;p=git.git diff --git a/diffcore-rename.c b/diffcore-rename.c index 625b589fb..0ec488a90 100644 --- a/diffcore-rename.c +++ b/diffcore-rename.c @@ -54,12 +54,14 @@ static struct diff_rename_dst *locate_rename_dst(struct diff_filespec *two, /* Table of rename/copy src files */ static struct diff_rename_src { struct diff_filespec *one; + unsigned short score; /* to remember the break score */ unsigned src_path_left : 1; } *rename_src; static int rename_src_nr, rename_src_alloc; static struct diff_rename_src *register_rename_src(struct diff_filespec *one, - int src_path_left) + int src_path_left, + unsigned short score) { int first, last; @@ -89,15 +91,20 @@ static struct diff_rename_src *register_rename_src(struct diff_filespec *one, memmove(rename_src + first + 1, rename_src + first, (rename_src_nr - first - 1) * sizeof(*rename_src)); rename_src[first].one = one; + rename_src[first].score = score; rename_src[first].src_path_left = src_path_left; return &(rename_src[first]); } -static int is_exact_match(struct diff_filespec *src, struct diff_filespec *dst) +static int is_exact_match(struct diff_filespec *src, + struct diff_filespec *dst, + int contents_too) { if (src->sha1_valid && dst->sha1_valid && !memcmp(src->sha1, dst->sha1, 20)) return 1; + if (!contents_too) + return 0; if (diff_populate_filespec(src, 1) || diff_populate_filespec(dst, 1)) return 0; if (src->size != dst->size) @@ -133,7 +140,7 @@ static int estimate_similarity(struct diff_filespec *src, * match than anything else; the destination does not even * call into this function in that case. */ - unsigned long delta_size, base_size, src_copied, literal_added; + unsigned long max_size, delta_size, base_size, src_copied, literal_added; unsigned long delta_limit; int score; @@ -144,9 +151,9 @@ static int estimate_similarity(struct diff_filespec *src, if (!S_ISREG(src->mode) || !S_ISREG(dst->mode)) return 0; - delta_size = ((src->size < dst->size) ? - (dst->size - src->size) : (src->size - dst->size)); + max_size = ((src->size > dst->size) ? src->size : dst->size); base_size = ((src->size < dst->size) ? src->size : dst->size); + delta_size = max_size - base_size; /* We would not consider edits that change the file size so * drastically. delta_size must be smaller than @@ -166,6 +173,7 @@ static int estimate_similarity(struct diff_filespec *src, delta_limit = base_size * (MAX_SCORE-minimum_score) / MAX_SCORE; if (diffcore_count_changes(src->data, src->size, dst->data, dst->size, + &src->cnt_data, &dst->cnt_data, delta_limit, &src_copied, &literal_added)) return 0; @@ -173,12 +181,10 @@ static int estimate_similarity(struct diff_filespec *src, /* How similar are they? * what percentage of material in dst are from source? */ - if (dst->size < src_copied) - score = MAX_SCORE; - else if (!dst->size) + if (!dst->size) score = 0; /* should not happen */ else - score = src_copied * MAX_SCORE / dst->size; + score = src_copied * MAX_SCORE / max_size; return score; } @@ -199,7 +205,11 @@ static void record_rename_pair(int dst_index, int src_index, int score) fill_filespec(two, dst->sha1, dst->mode); dp = diff_queue(NULL, one, two); - dp->score = score; + dp->renamed_pair = 1; + if (!strcmp(src->path, dst->path)) + dp->score = rename_src[src_index].score; + else + dp->score = score; dp->source_stays = rename_src[src_index].src_path_left; rename_dst[dst_index].pair = dp; } @@ -237,7 +247,7 @@ void diffcore_rename(struct diff_options *options) struct diff_queue_struct *q = &diff_queued_diff; struct diff_queue_struct outq; struct diff_score *mx; - int i, j, rename_count; + int i, j, rename_count, contents_too; int num_create, num_src, dst_cnt; if (!minimum_score) @@ -257,10 +267,10 @@ void diffcore_rename(struct diff_options *options) * that means the source actually stays. */ int stays = (p->broken_pair && !p->score); - register_rename_src(p->one, stays); + register_rename_src(p->one, stays, p->score); } else if (detect_rename == DIFF_DETECT_COPY) - register_rename_src(p->one, 1); + register_rename_src(p->one, 1, p->score); } if (rename_dst_nr == 0 || rename_src_nr == 0 || (0 < rename_limit && rename_limit < rename_dst_nr)) @@ -268,16 +278,23 @@ void diffcore_rename(struct diff_options *options) /* We really want to cull the candidates list early * with cheap tests in order to avoid doing deltas. + * The first round matches up the up-to-date entries, + * and then during the second round we try to match + * cache-dirty entries as well. */ - for (i = 0; i < rename_dst_nr; i++) { - struct diff_filespec *two = rename_dst[i].two; - for (j = 0; j < rename_src_nr; j++) { - struct diff_filespec *one = rename_src[j].one; - if (!is_exact_match(one, two)) - continue; - record_rename_pair(i, j, MAX_SCORE); - rename_count++; - break; /* we are done with this entry */ + for (contents_too = 0; contents_too < 2; contents_too++) { + for (i = 0; i < rename_dst_nr; i++) { + struct diff_filespec *two = rename_dst[i].two; + if (rename_dst[i].pair) + continue; /* dealt with an earlier round */ + for (j = 0; j < rename_src_nr; j++) { + struct diff_filespec *one = rename_src[j].one; + if (!is_exact_match(one, two, contents_too)) + continue; + record_rename_pair(i, j, MAX_SCORE); + rename_count++; + break; /* we are done with this entry */ + } } } @@ -306,6 +323,8 @@ void diffcore_rename(struct diff_options *options) m->score = estimate_similarity(one, two, minimum_score); } + /* We do not need the text anymore */ + diff_free_filespec_data(two); dst_cnt++; } /* cost matrix sorted by most to least similar pair */