summary | shortlog | log | commit | commitdiff | tree
raw | patch | inline | side by side (parent: 445b859)
raw | patch | inline | side by side (parent: 445b859)
author | Shawn O. Pearce <spearce@spearce.org> | |
Mon, 28 Aug 2006 16:22:50 +0000 (12:22 -0400) | ||
committer | Shawn O. Pearce <spearce@spearce.org> | |
Sun, 14 Jan 2007 07:15:10 +0000 (02:15 -0500) |
We now store for every tree entry two modes and two sha1 values;
the base (aka "version 0") and the current/new (aka "version 1").
When we generate a tree object we also regenerate the prior version
object and use that as our base object for a delta. This strategy
saves a significant amount of memory as we can continue to use the
atom pool for file/directory names and only increases each tree
entry by an additional 24 bytes of memory.
Branches should automatically delta against their ancestor tree,
unless the ancestor tree is already at the delta chain limit.
Signed-off-by: Shawn O. Pearce <spearce@spearce.org>
the base (aka "version 0") and the current/new (aka "version 1").
When we generate a tree object we also regenerate the prior version
object and use that as our base object for a delta. This strategy
saves a significant amount of memory as we can continue to use the
atom pool for file/directory names and only increases each tree
entry by an additional 24 bytes of memory.
Branches should automatically delta against their ancestor tree,
unless the ancestor tree is already at the delta chain limit.
Signed-off-by: Shawn O. Pearce <spearce@spearce.org>
fast-import.c | patch | blob | history |
diff --git a/fast-import.c b/fast-import.c
index b1b2382560e24b9e8f148eeb8c09c9c19c0ac482..6b011204150111d40cf8eb50117b24236eb214c8 100644 (file)
--- a/fast-import.c
+++ b/fast-import.c
struct last_object
{
void *data;
- unsigned int len;
+ unsigned long len;
unsigned int depth;
unsigned char sha1[20];
};
{
struct tree_content *tree;
struct atom_str* name;
- unsigned int mode;
- unsigned char sha1[20];
+ struct tree_entry_ms
+ {
+ unsigned int mode;
+ unsigned char sha1[20];
+ } versions[2];
};
struct tree_content
{
unsigned int entry_capacity; /* must match avail_tree_content */
unsigned int entry_count;
+ unsigned int delta_depth;
struct tree_entry *entries[FLEX_ARRAY]; /* more */
};
static unsigned long marks_set_count;
static unsigned long object_count_by_type[9];
static unsigned long duplicate_count_by_type[9];
+static unsigned long delta_count_by_type[9];
/* Memory pools */
static size_t mem_pool_alloc = 2*1024*1024 - sizeof(struct mem_pool);
static unsigned long page_size;
/* Table of objects we've written. */
-static unsigned int object_entry_alloc = 1000;
+static unsigned int object_entry_alloc = 5000;
static struct object_entry_pool *blocks;
static struct object_entry *object_table[1 << 16];
static struct mark_set *marks;
t = (struct tree_content*)f;
t->entry_count = 0;
+ t->delta_depth = 0;
return t;
}
{
struct tree_content *r = new_tree_content(t->entry_count + amt);
r->entry_count = t->entry_count;
+ r->delta_depth = t->delta_depth;
memcpy(r->entries,t->entries,t->entry_count*sizeof(t->entries[0]));
release_tree_content(t);
return r;
deflateInit(&s, zlib_compression_level);
if (delta) {
+ delta_count_by_type[type]++;
last->depth++;
s.next_in = delta;
s.avail_in = deltalen;
return result;
}
-static void *unpack_entry(unsigned long offset, unsigned long *sizep);
+static void *unpack_entry(unsigned long offset,
+ unsigned long *sizep,
+ unsigned int *delta_depth);
static void *unpack_delta_entry(unsigned long offset,
unsigned long delta_size,
- unsigned long *sizep)
+ unsigned long *sizep,
+ unsigned int *delta_depth)
{
struct object_entry *base_oe;
unsigned char *base_sha1;
base_oe = find_object(base_sha1);
if (!base_oe)
die("I'm broken; I can't find a base I know must be here.");
- base = unpack_entry(base_oe->offset, &base_size);
+ base = unpack_entry(base_oe->offset, &base_size, delta_depth);
delta_data = unpack_non_delta_entry(offset + 20, delta_size);
result = patch_delta(base, base_size,
delta_data, delta_size,
free(delta_data);
free(base);
*sizep = result_size;
+ (*delta_depth)++;
return result;
}
-static void *unpack_entry(unsigned long offset, unsigned long *sizep)
+static void *unpack_entry(unsigned long offset,
+ unsigned long *sizep,
+ unsigned int *delta_depth)
{
unsigned long size;
enum object_type kind;
offset = unpack_object_header(offset, &kind, &size);
switch (kind) {
case OBJ_DELTA:
- return unpack_delta_entry(offset, size, sizep);
+ return unpack_delta_entry(offset, size, sizep, delta_depth);
case OBJ_COMMIT:
case OBJ_TREE:
case OBJ_BLOB:
case OBJ_TAG:
*sizep = size;
+ *delta_depth = 0;
return unpack_non_delta_entry(offset, size);
default:
die("I created an object I can't read!");
static void load_tree(struct tree_entry *root)
{
+ unsigned char* sha1 = root->versions[1].sha1;
struct object_entry *myoe;
struct tree_content *t;
unsigned long size;
const char *c;
root->tree = t = new_tree_content(8);
- if (is_null_sha1(root->sha1))
+ if (is_null_sha1(sha1))
return;
- myoe = find_object(root->sha1);
+ myoe = find_object(sha1);
if (myoe) {
if (myoe->type != OBJ_TREE)
- die("Not a tree: %s", sha1_to_hex(root->sha1));
- buf = unpack_entry(myoe->offset, &size);
+ die("Not a tree: %s", sha1_to_hex(sha1));
+ buf = unpack_entry(myoe->offset, &size, &t->delta_depth);
} else {
char type[20];
- buf = read_sha1_file(root->sha1, type, &size);
+ buf = read_sha1_file(sha1, type, &size);
if (!buf || strcmp(type, tree_type))
- die("Can't load tree %s", sha1_to_hex(root->sha1));
+ die("Can't load tree %s", sha1_to_hex(sha1));
}
c = buf;
t->entries[t->entry_count++] = e;
e->tree = NULL;
- c = get_mode(c, &e->mode);
+ c = get_mode(c, &e->versions[1].mode);
if (!c)
- die("Corrupt mode in %s", sha1_to_hex(root->sha1));
+ die("Corrupt mode in %s", sha1_to_hex(sha1));
+ e->versions[0].mode = e->versions[1].mode;
e->name = to_atom(c, strlen(c));
c += e->name->str_len + 1;
- hashcpy(e->sha1, c);
+ hashcpy(e->versions[0].sha1, (unsigned char*)c);
+ hashcpy(e->versions[1].sha1, (unsigned char*)c);
c += 20;
}
free(buf);
}
-static int tecmp (const void *_a, const void *_b)
+static int tecmp0 (const void *_a, const void *_b)
{
struct tree_entry *a = *((struct tree_entry**)_a);
struct tree_entry *b = *((struct tree_entry**)_b);
return base_name_compare(
- a->name->str_dat, a->name->str_len, a->mode,
- b->name->str_dat, b->name->str_len, b->mode);
+ a->name->str_dat, a->name->str_len, a->versions[0].mode,
+ b->name->str_dat, b->name->str_len, b->versions[0].mode);
}
-static void store_tree(struct tree_entry *root)
+static int tecmp1 (const void *_a, const void *_b)
{
- struct tree_content *t = root->tree;
+ struct tree_entry *a = *((struct tree_entry**)_a);
+ struct tree_entry *b = *((struct tree_entry**)_b);
+ return base_name_compare(
+ a->name->str_dat, a->name->str_len, a->versions[1].mode,
+ b->name->str_dat, b->name->str_len, b->versions[1].mode);
+}
+
+static void* mktree(struct tree_content *t, int v, unsigned long *szp)
+{
+ size_t maxlen = 0;
unsigned int i;
- size_t maxlen;
char *buf, *c;
- if (!is_null_sha1(root->sha1))
- return;
+ if (!v)
+ qsort(t->entries,t->entry_count,sizeof(t->entries[0]),tecmp0);
+ else
+ qsort(t->entries,t->entry_count,sizeof(t->entries[0]),tecmp1);
- maxlen = 0;
for (i = 0; i < t->entry_count; i++) {
- maxlen += t->entries[i]->name->str_len + 34;
- if (t->entries[i]->tree)
- store_tree(t->entries[i]);
+ if (t->entries[i]->versions[v].mode)
+ maxlen += t->entries[i]->name->str_len + 34;
}
- qsort(t->entries, t->entry_count, sizeof(t->entries[0]), tecmp);
buf = c = xmalloc(maxlen);
for (i = 0; i < t->entry_count; i++) {
struct tree_entry *e = t->entries[i];
- c += sprintf(c, "%o", e->mode);
+ if (!e->versions[v].mode)
+ continue;
+ c += sprintf(c, "%o", e->versions[v].mode);
*c++ = ' ';
strcpy(c, e->name->str_dat);
c += e->name->str_len + 1;
- hashcpy(c, e->sha1);
+ hashcpy((unsigned char*)c, e->versions[v].sha1);
c += 20;
}
- store_object(OBJ_TREE, buf, c - buf, NULL, root->sha1, 0);
- free(buf);
+
+ *szp = c - buf;
+ return buf;
+}
+
+static void store_tree(struct tree_entry *root)
+{
+ struct tree_content *t = root->tree;
+ unsigned int i, j, del;
+ unsigned long vers1len;
+ void **vers1dat;
+ struct last_object lo;
+
+ if (!is_null_sha1(root->versions[1].sha1))
+ return;
+
+ for (i = 0; i < t->entry_count; i++) {
+ if (t->entries[i]->tree)
+ store_tree(t->entries[i]);
+ }
+
+ if (is_null_sha1(root->versions[0].sha1)
+ || !find_object(root->versions[0].sha1)) {
+ lo.data = NULL;
+ lo.depth = 0;
+ } else {
+ lo.data = mktree(t, 0, &lo.len);
+ lo.depth = t->delta_depth;
+ hashcpy(lo.sha1, root->versions[0].sha1);
+ }
+ vers1dat = mktree(t, 1, &vers1len);
+
+ store_object(OBJ_TREE, vers1dat, vers1len,
+ &lo, root->versions[1].sha1, 0);
+ /* note: lo.dat (if created) was freed by store_object */
+ free(vers1dat);
+
+ t->delta_depth = lo.depth;
+ hashcpy(root->versions[0].sha1, root->versions[1].sha1);
+ for (i = 0, j = 0, del = 0; i < t->entry_count; i++) {
+ struct tree_entry *e = t->entries[i];
+ if (e->versions[1].mode) {
+ e->versions[0].mode = e->versions[1].mode;
+ hashcpy(e->versions[0].sha1, e->versions[1].sha1);
+ t->entries[j++] = e;
+ } else {
+ release_tree_entry(e);
+ del++;
+ }
+ }
+ t->entry_count -= del;
}
static int tree_content_set(
e = t->entries[i];
if (e->name->str_len == n && !strncmp(p, e->name->str_dat, n)) {
if (!slash1) {
- if (e->mode == mode && !hashcmp(e->sha1, sha1))
+ if (e->versions[1].mode == mode
+ && !hashcmp(e->versions[1].sha1, sha1))
return 0;
- e->mode = mode;
- hashcpy(e->sha1, sha1);
+ e->versions[1].mode = mode;
+ hashcpy(e->versions[1].sha1, sha1);
if (e->tree) {
release_tree_content_recursive(e->tree);
e->tree = NULL;
}
- hashclr(root->sha1);
+ hashclr(root->versions[1].sha1);
return 1;
}
- if (!S_ISDIR(e->mode)) {
+ if (!S_ISDIR(e->versions[1].mode)) {
e->tree = new_tree_content(8);
- e->mode = S_IFDIR;
+ e->versions[1].mode = S_IFDIR;
}
if (!e->tree)
load_tree(e);
if (tree_content_set(e, slash1 + 1, sha1, mode)) {
- hashclr(root->sha1);
+ hashclr(root->versions[1].sha1);
return 1;
}
return 0;
root->tree = t = grow_tree_content(t, 8);
e = new_tree_entry();
e->name = to_atom(p, n);
+ e->versions[0].mode = 0;
+ hashclr(e->versions[0].sha1);
t->entries[t->entry_count++] = e;
if (slash1) {
e->tree = new_tree_content(8);
- e->mode = S_IFDIR;
+ e->versions[1].mode = S_IFDIR;
tree_content_set(e, slash1 + 1, sha1, mode);
} else {
e->tree = NULL;
- e->mode = mode;
- hashcpy(e->sha1, sha1);
+ e->versions[1].mode = mode;
+ hashcpy(e->versions[1].sha1, sha1);
}
- hashclr(root->sha1);
+ hashclr(root->versions[1].sha1);
return 1;
}
for (i = 0; i < t->entry_count; i++) {
e = t->entries[i];
if (e->name->str_len == n && !strncmp(p, e->name->str_dat, n)) {
- if (!slash1 || !S_ISDIR(e->mode))
+ if (!slash1 || !S_ISDIR(e->versions[1].mode))
goto del_entry;
if (!e->tree)
load_tree(e);
if (tree_content_remove(e, slash1 + 1)) {
if (!e->tree->entry_count)
goto del_entry;
- hashclr(root->sha1);
+ hashclr(root->versions[1].sha1);
return 1;
}
return 0;
return 0;
del_entry:
- for (i++; i < t->entry_count; i++)
- t->entries[i-1] = t->entries[i];
- t->entry_count--;
- release_tree_entry(e);
- hashclr(root->sha1);
+ if (e->tree) {
+ release_tree_content_recursive(e->tree);
+ e->tree = NULL;
+ }
+ e->versions[1].mode = 0;
+ hashclr(e->versions[1].sha1);
+ hashclr(root->versions[1].sha1);
return 1;
}
if (b == s)
die("Can't create a branch from itself: %s", b->name);
else if (s) {
+ unsigned char *t = s->branch_tree.versions[1].sha1;
hashcpy(b->sha1, s->sha1);
- hashcpy(b->branch_tree.sha1, s->branch_tree.sha1);
+ hashcpy(b->branch_tree.versions[0].sha1, t);
+ hashcpy(b->branch_tree.versions[1].sha1, t);
} else if (*from == ':') {
unsigned long idnum = strtoul(from + 1, NULL, 10);
struct object_entry *oe = find_mark(idnum);
unsigned long size;
+ unsigned int depth;
char *buf;
if (oe->type != OBJ_COMMIT)
die("Mark :%lu not a commit", idnum);
hashcpy(b->sha1, oe->sha1);
- buf = unpack_entry(oe->offset, &size);
+ buf = unpack_entry(oe->offset, &size, &depth);
if (!buf || size < 46)
die("Not a valid commit: %s", from);
if (memcmp("tree ", buf, 5)
- || get_sha1_hex(buf + 5, b->branch_tree.sha1))
+ || get_sha1_hex(buf + 5, b->branch_tree.versions[1].sha1))
die("The commit %s is corrupt", sha1_to_hex(b->sha1));
free(buf);
+ hashcpy(b->branch_tree.versions[0].sha1,
+ b->branch_tree.versions[1].sha1);
} else if (!get_sha1(from, b->sha1)) {
- if (is_null_sha1(b->sha1))
- hashclr(b->branch_tree.sha1);
- else {
+ if (is_null_sha1(b->sha1)) {
+ hashclr(b->branch_tree.versions[0].sha1);
+ hashclr(b->branch_tree.versions[1].sha1);
+ } else {
unsigned long size;
char *buf;
if (!buf || size < 46)
die("Not a valid commit: %s", from);
if (memcmp("tree ", buf, 5)
- || get_sha1_hex(buf + 5, b->branch_tree.sha1))
+ || get_sha1_hex(buf + 5, b->branch_tree.versions[1].sha1))
die("The commit %s is corrupt", sha1_to_hex(b->sha1));
free(buf);
+ hashcpy(b->branch_tree.versions[0].sha1,
+ b->branch_tree.versions[1].sha1);
}
} else
die("Invalid ref name or SHA1 expression: %s", from);
? strlen(author) + strlen(committer)
: 2 * strlen(committer)));
sp = body;
- sp += sprintf(sp, "tree %s\n", sha1_to_hex(b->branch_tree.sha1));
+ sp += sprintf(sp, "tree %s\n",
+ sha1_to_hex(b->branch_tree.versions[1].sha1));
if (!is_null_sha1(b->sha1))
sp += sprintf(sp, "parent %s\n", sha1_to_hex(b->sha1));
if (author)
fprintf(stderr, "---------------------------------------------------\n");
fprintf(stderr, "Alloc'd objects: %10lu (%10lu overflow )\n", alloc_count, alloc_count - est_obj_cnt);
fprintf(stderr, "Total objects: %10lu (%10lu duplicates)\n", object_count, duplicate_count);
- fprintf(stderr, " blobs : %10lu (%10lu duplicates)\n", object_count_by_type[OBJ_BLOB], duplicate_count_by_type[OBJ_BLOB]);
- fprintf(stderr, " trees : %10lu (%10lu duplicates)\n", object_count_by_type[OBJ_TREE], duplicate_count_by_type[OBJ_TREE]);
- fprintf(stderr, " commits: %10lu (%10lu duplicates)\n", object_count_by_type[OBJ_COMMIT], duplicate_count_by_type[OBJ_COMMIT]);
- fprintf(stderr, " tags : %10lu (%10lu duplicates)\n", object_count_by_type[OBJ_TAG], duplicate_count_by_type[OBJ_TAG]);
+ fprintf(stderr, " blobs : %10lu (%10lu duplicates %10lu deltas)\n", object_count_by_type[OBJ_BLOB], duplicate_count_by_type[OBJ_BLOB], delta_count_by_type[OBJ_BLOB]);
+ fprintf(stderr, " trees : %10lu (%10lu duplicates %10lu deltas)\n", object_count_by_type[OBJ_TREE], duplicate_count_by_type[OBJ_TREE], delta_count_by_type[OBJ_TREE]);
+ fprintf(stderr, " commits: %10lu (%10lu duplicates %10lu deltas)\n", object_count_by_type[OBJ_COMMIT], duplicate_count_by_type[OBJ_COMMIT], delta_count_by_type[OBJ_COMMIT]);
+ fprintf(stderr, " tags : %10lu (%10lu duplicates %10lu deltas)\n", object_count_by_type[OBJ_TAG], duplicate_count_by_type[OBJ_TAG], delta_count_by_type[OBJ_TAG]);
fprintf(stderr, "Total branches: %10lu (%10lu loads )\n", branch_count, branch_load_count);
fprintf(stderr, " marks: %10u (%10lu unique )\n", (1 << marks->shift) * 1024, marks_set_count);
fprintf(stderr, " atoms: %10u\n", atom_cnt);