summary | shortlog | log | commit | commitdiff | tree
raw | patch | inline | side by side (parent: 6541675)
raw | patch | inline | side by side (parent: 6541675)
author | Junio C Hamano <junkio@cox.net> | |
Wed, 1 Mar 2006 04:16:41 +0000 (20:16 -0800) | ||
committer | Junio C Hamano <junkio@cox.net> | |
Wed, 1 Mar 2006 04:20:04 +0000 (20:20 -0800) |
This switches the change estimation logic used by break, rename
and copy detection from delta packing code to a more line
oriented one. This way, thee performance-density tradeoff by
delta packing code can be made without worrying about breaking
the rename detection.
Signed-off-by: Junio C Hamano <junkio@cox.net>
and copy detection from delta packing code to a more line
oriented one. This way, thee performance-density tradeoff by
delta packing code can be made without worrying about breaking
the rename detection.
Signed-off-by: Junio C Hamano <junkio@cox.net>
diffcore-delta.c | patch | blob | history |
diff --git a/diffcore-delta.c b/diffcore-delta.c
index 1e6a6911ecc7b9a6b6c28a20db9adea3e1238a2d..d03787be65be82b0fcd6dc62dae77013b2576c80 100644 (file)
--- a/diffcore-delta.c
+++ b/diffcore-delta.c
#include "cache.h"
#include "diff.h"
#include "diffcore.h"
-#include "delta.h"
-#include "count-delta.h"
-
-static int diffcore_count_changes_1(void *src, unsigned long src_size,
- void *dst, unsigned long dst_size,
- unsigned long delta_limit,
- unsigned long *src_copied,
- unsigned long *literal_added)
-{
- void *delta;
- unsigned long delta_size;
-
- delta = diff_delta(src, src_size,
- dst, dst_size,
- &delta_size, delta_limit);
- if (!delta)
- /* If delta_limit is exceeded, we have too much differences */
- return -1;
- /* Estimate the edit size by interpreting delta. */
- if (count_delta(delta, delta_size, src_copied, literal_added)) {
- free(delta);
- return -1;
+struct linehash {
+ unsigned long bytes;
+ unsigned long hash;
+};
+
+static unsigned long hash_extended_line(const unsigned char **buf_p,
+ unsigned long left)
+{
+ /* An extended line is zero or more whitespace letters (including LF)
+ * followed by one non whitespace letter followed by zero or more
+ * non LF, and terminated with by a LF (or EOF).
+ */
+ const unsigned char *bol = *buf_p;
+ const unsigned char *buf = bol;
+ unsigned long hashval = 0;
+ while (left) {
+ unsigned c = *buf++;
+ if (!c)
+ goto binary;
+ left--;
+ if (' ' < c) {
+ hashval = c;
+ break;
+ }
+ }
+ while (left) {
+ unsigned c = *buf++;
+ if (!c)
+ goto binary;
+ left--;
+ if (c == '\n')
+ break;
+ if (' ' < c)
+ hashval = hashval * 11 + c;
}
- free(delta);
+ *buf_p = buf;
+ return hashval;
+
+ binary:
+ *buf_p = NULL;
+ return 0;
+}
+
+static int linehash_compare(const void *a_, const void *b_)
+{
+ struct linehash *a = (struct linehash *) a_;
+ struct linehash *b = (struct linehash *) b_;
+ if (a->hash < b->hash) return -1;
+ if (a->hash > b->hash) return 1;
return 0;
}
+static struct linehash *hash_lines(const unsigned char *buf,
+ unsigned long size)
+{
+ const unsigned char *eobuf = buf + size;
+ struct linehash *line = NULL;
+ int alloc = 0, used = 0;
+
+ while (buf < eobuf) {
+ const unsigned char *ptr = buf;
+ unsigned long hash = hash_extended_line(&buf, eobuf-ptr);
+ if (!buf) {
+ free(line);
+ return NULL;
+ }
+ if (alloc <= used) {
+ alloc = alloc_nr(alloc);
+ line = xrealloc(line, sizeof(*line) * alloc);
+ }
+ line[used].bytes = buf - ptr;
+ line[used].hash = hash;
+ used++;
+ }
+ qsort(line, used, sizeof(*line), linehash_compare);
+
+ /* Terminate the list */
+ if (alloc <= used)
+ line = xrealloc(line, sizeof(*line) * (used+1));
+ line[used].bytes = line[used].hash = 0;
+ return line;
+}
+
int diffcore_count_changes(void *src, unsigned long src_size,
void *dst, unsigned long dst_size,
unsigned long delta_limit,
unsigned long *src_copied,
unsigned long *literal_added)
{
- return diffcore_count_changes_1(src, src_size,
- dst, dst_size,
- delta_limit,
- src_copied,
- literal_added);
+ struct linehash *src_lines, *dst_lines;
+ unsigned long sc, la;
+
+ src_lines = hash_lines(src, src_size);
+ if (!src_lines)
+ return -1;
+ dst_lines = hash_lines(dst, dst_size);
+ if (!dst_lines) {
+ free(src_lines);
+ return -1;
+ }
+ sc = la = 0;
+ while (src_lines->bytes && dst_lines->bytes) {
+ int cmp = linehash_compare(src_lines, dst_lines);
+ if (!cmp) {
+ sc += src_lines->bytes;
+ src_lines++;
+ dst_lines++;
+ continue;
+ }
+ if (cmp < 0) {
+ src_lines++;
+ continue;
+ }
+ la += dst_lines->bytes;
+ dst_lines++;
+ }
+ while (dst_lines->bytes) {
+ la += dst_lines->bytes;
+ dst_lines++;
+ }
+ *src_copied = sc;
+ *literal_added = la;
+ return 0;
}