Code

diffcore-rename: similarity estimator fix.
authorJunio C Hamano <junkio@cox.net>
Fri, 3 Mar 2006 06:11:25 +0000 (22:11 -0800)
committerJunio C Hamano <junkio@cox.net>
Fri, 3 Mar 2006 06:12:33 +0000 (22:12 -0800)
The "similarity" logic was giving added material way too much
negative weight.  What we wanted to see was how similar the
post-change image was compared to the pre-change image, so the
natural definition of similarity is how much common things are
there, relative to the post-change image's size.

This simplifies things a lot.

Signed-off-by: Junio C Hamano <junkio@cox.net>
diffcore-rename.c

index 55cf1c37f344628eb06c40393295f288fb186a50..625b589fb728e5ae94b3b13d419b43c30347508d 100644 (file)
@@ -170,19 +170,15 @@ static int estimate_similarity(struct diff_filespec *src,
                                   &src_copied, &literal_added))
                return 0;
 
-       /* Extent of damage */
-       if (src->size + literal_added < src_copied)
-               delta_size = 0;
-       else
-               delta_size = (src->size - src_copied) + literal_added;
-
-       /*
-        * Now we will give some score to it.  100% edit gets 0 points
-        * and 0% edit gets MAX_SCORE points.
+       /* How similar are they?
+        * what percentage of material in dst are from source?
         */
-       score = MAX_SCORE - (MAX_SCORE * delta_size / base_size); 
-       if (score < 0) return 0;
-       if (MAX_SCORE < score) return MAX_SCORE;
+       if (dst->size < src_copied)
+               score = MAX_SCORE;
+       else if (!dst->size)
+               score = 0; /* should not happen */
+       else
+               score = src_copied * MAX_SCORE / dst->size;
        return score;
 }