X-Git-Url: https://git.tokkee.org/?a=blobdiff_plain;f=xdiff%2Fxprepare.c;h=e419f4f726019a5b0365c589285439fb3bfb8db2;hb=25f745fbec0e92a29e33aa85413c2bf7762608bf;hp=eba31ffaeebb4fe15f45ce854aa2ed3f63489b8c;hpb=8e969454e1588e506df20f552aa8622b6517ea20;p=git.git

diff --git a/xdiff/xprepare.c b/xdiff/xprepare.c
index eba31ffae..e419f4f72 100644
--- a/xdiff/xprepare.c
+++ b/xdiff/xprepare.c
@@ -264,6 +264,8 @@ int xdl_prepare_env(mmfile_t *mf1, mmfile_t *mf2, xpparam_t const *xpp,
 	long enl1, enl2, sample;
 	xdlclassifier_t cf;
 
+	memset(&cf, 0, sizeof(cf));
+
 	/*
 	 * For histogram diff, we can afford a smaller sample size and
 	 * thus a poorer estimate of the number of lines, as the hash
@@ -381,7 +383,7 @@ static int xdl_clean_mmatch(char const *dis, long i, long s, long e) {
  * might be potentially discarded if they happear in a run of discardable.
  */
 static int xdl_cleanup_records(xdlclassifier_t *cf, xdfile_t *xdf1, xdfile_t *xdf2) {
-	long i, nm, nreff;
+	long i, nm, nreff, mlim;
 	xrecord_t **recs;
 	xdlclass_t *rcrec;
 	char *dis, *dis1, *dis2;
@@ -394,16 +396,20 @@ static int xdl_cleanup_records(xdlclassifier_t *cf, xdfile_t *xdf1, xdfile_t *xd
 	dis1 = dis;
 	dis2 = dis1 + xdf1->nrec + 1;
 
+	if ((mlim = xdl_bogosqrt(xdf1->nrec)) > XDL_MAX_EQLIMIT)
+		mlim = XDL_MAX_EQLIMIT;
 	for (i = xdf1->dstart, recs = &xdf1->recs[xdf1->dstart]; i <= xdf1->dend; i++, recs++) {
 		rcrec = cf->rcrecs[(*recs)->ha];
 		nm = rcrec ? rcrec->len2 : 0;
-		dis1[i] = (nm == 0) ? 0: 1;
+		dis1[i] = (nm == 0) ? 0: (nm >= mlim) ? 2: 1;
 	}
 
+	if ((mlim = xdl_bogosqrt(xdf2->nrec)) > XDL_MAX_EQLIMIT)
+		mlim = XDL_MAX_EQLIMIT;
 	for (i = xdf2->dstart, recs = &xdf2->recs[xdf2->dstart]; i <= xdf2->dend; i++, recs++) {
 		rcrec = cf->rcrecs[(*recs)->ha];
 		nm = rcrec ? rcrec->len1 : 0;
-		dis2[i] = (nm == 0) ? 0: 1;
+		dis2[i] = (nm == 0) ? 0: (nm >= mlim) ? 2: 1;
 	}
 
 	for (nreff = 0, i = xdf1->dstart, recs = &xdf1->recs[xdf1->dstart];