X-Git-Url: https://git.tokkee.org/?a=blobdiff_plain;f=diff-delta.c;h=fa16d06c8d1e85a458428c673cb2f589857f5424;hb=0f03ca946142bd656c1af9ff811cb9efbc8314da;hp=35e517d2d79002214988b57b476faa29f8292394;hpb=a9fb5823236e675165cd91f3651094c967be532e;p=git.git diff --git a/diff-delta.c b/diff-delta.c index 35e517d2d..fa16d06c8 100644 --- a/diff-delta.c +++ b/diff-delta.c @@ -22,6 +22,7 @@ #include #include "delta.h" +#include "git-compat-util.h" /* maximum hash entry list for the same hash bucket */ #define HASH_LIMIT 64 @@ -131,33 +132,35 @@ struct delta_index { const void *src_buf; unsigned long src_size; unsigned int hash_mask; - struct index_entry *hash[0]; + struct index_entry *hash[FLEX_ARRAY]; }; struct delta_index * create_delta_index(const void *buf, unsigned long bufsize) { - unsigned int i, hsize, hmask, entries, *hash_count; + unsigned int i, hsize, hmask, entries, prev_val, *hash_count; const unsigned char *data, *buffer = buf; struct delta_index *index; struct index_entry *entry, **hash; void *mem; + unsigned long memsize; if (!buf || !bufsize) return NULL; /* Determine index hash size. Note that indexing skips the - first byte to allow for optimizing the rabin polynomial + first byte to allow for optimizing the Rabin's polynomial initialization in create_delta(). */ entries = (bufsize - 1) / RABIN_WINDOW; hsize = entries / 4; - for (i = 4; (1 << i) < hsize && i < 31; i++); + for (i = 4; (1u << i) < hsize && i < 31; i++); hsize = 1 << i; hmask = hsize - 1; /* allocate lookup index */ - mem = malloc(sizeof(*index) + - sizeof(*hash) * hsize + - sizeof(*entry) * entries); + memsize = sizeof(*index) + + sizeof(*hash) * hsize + + sizeof(*entry) * entries; + mem = malloc(memsize); if (!mem) return NULL; index = mem; @@ -179,23 +182,30 @@ struct delta_index * create_delta_index(const void *buf, unsigned long bufsize) } /* then populate the index */ - data = buffer + entries * RABIN_WINDOW - RABIN_WINDOW; - while (data >= buffer) { + prev_val = ~0; + for (data = buffer + entries * RABIN_WINDOW - RABIN_WINDOW; + data >= buffer; + data -= RABIN_WINDOW) { unsigned int val = 0; for (i = 1; i <= RABIN_WINDOW; i++) val = ((val << 8) | data[i]) ^ T[val >> RABIN_SHIFT]; - i = val & hmask; - entry->ptr = data + RABIN_WINDOW; - entry->val = val; - entry->next = hash[i]; - hash[i] = entry++; - hash_count[i]++; - data -= RABIN_WINDOW; + if (val == prev_val) { + /* keep the lowest of consecutive identical blocks */ + entry[-1].ptr = data + RABIN_WINDOW; + } else { + prev_val = val; + i = val & hmask; + entry->ptr = data + RABIN_WINDOW; + entry->val = val; + entry->next = hash[i]; + hash[i] = entry++; + hash_count[i]++; + } } /* * Determine a limit on the number of entries in the same hash - * bucket. This guard us against patological data sets causing + * bucket. This guards us against pathological data sets causing * really bad hash distribution with most entries in the same hash * bucket that would bring us to O(m*n) computing costs (m and n * corresponding to reference and target buffer sizes). @@ -230,7 +240,7 @@ void free_delta_index(struct delta_index *index) /* * The maximum size for any opcode sequence, including the initial header - * plus rabin window plus biggest copy. + * plus Rabin window plus biggest copy. */ #define MAX_OP_SIZE (5 + 5 + 1 + RABIN_WINDOW + 7) @@ -239,7 +249,7 @@ create_delta(const struct delta_index *index, const void *trg_buf, unsigned long trg_size, unsigned long *delta_size, unsigned long max_size) { - unsigned int i, outpos, outsize, hash_mask, val; + unsigned int i, outpos, outsize, val; int inscnt; const unsigned char *ref_data, *ref_top, *data, *top; unsigned char *out; @@ -274,8 +284,7 @@ create_delta(const struct delta_index *index, ref_data = index->src_buf; ref_top = ref_data + index->src_size; data = trg_buf; - top = trg_buf + trg_size; - hash_mask = index->hash_mask; + top = (const unsigned char *) trg_buf + trg_size; outpos++; val = 0; @@ -290,7 +299,7 @@ create_delta(const struct delta_index *index, struct index_entry *entry; val ^= U[data[-RABIN_WINDOW]]; val = ((val << 8) | *data) ^ T[val >> RABIN_SHIFT]; - i = val & hash_mask; + i = val & index->hash_mask; for (entry = index->hash[i]; entry; entry = entry->next) { const unsigned char *ref = entry->ptr; const unsigned char *src = data; @@ -383,7 +392,7 @@ create_delta(const struct delta_index *index, outsize = max_size + MAX_OP_SIZE + 1; if (max_size && outpos > max_size) break; - out = realloc(out, outsize); + out = xrealloc(out, outsize); if (!out) { free(tmp); return NULL;