X-Git-Url: https://git.tokkee.org/?a=blobdiff_plain;f=xdiff%2Fxprepare.c;h=e419f4f726019a5b0365c589285439fb3bfb8db2;hb=25f745fbec0e92a29e33aa85413c2bf7762608bf;hp=eba31ffaeebb4fe15f45ce854aa2ed3f63489b8c;hpb=8e969454e1588e506df20f552aa8622b6517ea20;p=git.git diff --git a/xdiff/xprepare.c b/xdiff/xprepare.c index eba31ffae..e419f4f72 100644 --- a/xdiff/xprepare.c +++ b/xdiff/xprepare.c @@ -264,6 +264,8 @@ int xdl_prepare_env(mmfile_t *mf1, mmfile_t *mf2, xpparam_t const *xpp, long enl1, enl2, sample; xdlclassifier_t cf; + memset(&cf, 0, sizeof(cf)); + /* * For histogram diff, we can afford a smaller sample size and * thus a poorer estimate of the number of lines, as the hash @@ -381,7 +383,7 @@ static int xdl_clean_mmatch(char const *dis, long i, long s, long e) { * might be potentially discarded if they happear in a run of discardable. */ static int xdl_cleanup_records(xdlclassifier_t *cf, xdfile_t *xdf1, xdfile_t *xdf2) { - long i, nm, nreff; + long i, nm, nreff, mlim; xrecord_t **recs; xdlclass_t *rcrec; char *dis, *dis1, *dis2; @@ -394,16 +396,20 @@ static int xdl_cleanup_records(xdlclassifier_t *cf, xdfile_t *xdf1, xdfile_t *xd dis1 = dis; dis2 = dis1 + xdf1->nrec + 1; + if ((mlim = xdl_bogosqrt(xdf1->nrec)) > XDL_MAX_EQLIMIT) + mlim = XDL_MAX_EQLIMIT; for (i = xdf1->dstart, recs = &xdf1->recs[xdf1->dstart]; i <= xdf1->dend; i++, recs++) { rcrec = cf->rcrecs[(*recs)->ha]; nm = rcrec ? rcrec->len2 : 0; - dis1[i] = (nm == 0) ? 0: 1; + dis1[i] = (nm == 0) ? 0: (nm >= mlim) ? 2: 1; } + if ((mlim = xdl_bogosqrt(xdf2->nrec)) > XDL_MAX_EQLIMIT) + mlim = XDL_MAX_EQLIMIT; for (i = xdf2->dstart, recs = &xdf2->recs[xdf2->dstart]; i <= xdf2->dend; i++, recs++) { rcrec = cf->rcrecs[(*recs)->ha]; nm = rcrec ? rcrec->len1 : 0; - dis2[i] = (nm == 0) ? 0: 1; + dis2[i] = (nm == 0) ? 0: (nm >= mlim) ? 2: 1; } for (nreff = 0, i = xdf1->dstart, recs = &xdf1->recs[xdf1->dstart];