summary | shortlog | log | commit | commitdiff | tree
raw | patch | inline | side by side (parent: 2fce1f3)
raw | patch | inline | side by side (parent: 2fce1f3)
author | Shawn O. Pearce <spearce@spearce.org> | |
Mon, 15 Jan 2007 13:00:49 +0000 (08:00 -0500) | ||
committer | Shawn O. Pearce <spearce@spearce.org> | |
Mon, 15 Jan 2007 13:00:49 +0000 (08:00 -0500) |
When the number of objects or number of bytes gets close to the limit
allowed by the packfile format (or configured on the command line by
our caller) we should automatically checkpoint the current packfile
and start a new one before writing the object out. This does however
require that we abandon the delta (if we had one) as its not valid
in a new packfile.
I also added the simple rule that if we got a delta back but the
delta itself is the same size as or larger than the uncompressed
object to ignore the delta and just store the object data. This
should avoid some really bad behavior caused by our current delta
strategy.
Signed-off-by: Shawn O. Pearce <spearce@spearce.org>
allowed by the packfile format (or configured on the command line by
our caller) we should automatically checkpoint the current packfile
and start a new one before writing the object out. This does however
require that we abandon the delta (if we had one) as its not valid
in a new packfile.
I also added the simple rule that if we got a delta back but the
delta itself is the same size as or larger than the uncompressed
object to ignore the delta and just store the object data. This
should avoid some really bad behavior caused by our current delta
strategy.
Signed-off-by: Shawn O. Pearce <spearce@spearce.org>
fast-import.c | patch | blob | history |
diff --git a/fast-import.c b/fast-import.c
index cfadda043296474ebb5f11a917eb8aa745ba786a..c19567f68cee1421aa53f68d89f250e5bc99e51a 100644 (file)
--- a/fast-import.c
+++ b/fast-import.c
/* Stats and misc. counters */
static unsigned long max_depth = 10;
+static unsigned long max_objects = -1;
+static unsigned long max_packsize = -1;
static unsigned long alloc_count;
static unsigned long branch_count;
static unsigned long branch_load_count;
alloc_objects(object_entry_alloc);
e = blocks->next_free++;
- e->pack_id = pack_id;
hashcpy(e->sha1, sha1);
return e;
}
idx = xmalloc(object_count * sizeof(struct object_entry*));
c = idx;
for (o = blocks; o; o = o->next_pool)
- for (e = o->next_free; e-- != o->entries;) {
- if (pack_id != e->pack_id)
- goto sort_index;
- *c++ = e;
- }
-sort_index:
+ for (e = o->next_free; e-- != o->entries;)
+ if (pack_id == e->pack_id)
+ *c++ = e;
last = idx + object_count;
if (c != last)
die("internal consistency error creating the index");
last_blob.depth = 0;
}
+static void checkpoint()
+{
+ end_packfile();
+ start_packfile();
+}
+
static size_t encode_header(
enum object_type type,
size_t size,
duplicate_count_by_type[type]++;
return 1;
}
- e->type = type;
- e->offset = pack_size;
- object_count++;
- object_count_by_type[type]++;
- if (last && last->data && last->depth < max_depth)
+ if (last && last->data && last->depth < max_depth) {
delta = diff_delta(last->data, last->len,
dat, datlen,
&deltalen, 0);
- else
- delta = 0;
+ if (delta && deltalen >= datlen) {
+ free(delta);
+ delta = NULL;
+ }
+ } else
+ delta = NULL;
memset(&s, 0, sizeof(s));
deflateInit(&s, zlib_compression_level);
+ if (delta) {
+ s.next_in = delta;
+ s.avail_in = deltalen;
+ } else {
+ s.next_in = dat;
+ s.avail_in = datlen;
+ }
+ s.avail_out = deflateBound(&s, s.avail_in);
+ s.next_out = out = xmalloc(s.avail_out);
+ while (deflate(&s, Z_FINISH) == Z_OK)
+ /* nothing */;
+ deflateEnd(&s);
+
+ /* Determine if we should auto-checkpoint. */
+ if ((object_count + 1) > max_objects
+ || (object_count + 1) < object_count
+ || (pack_size + 60 + s.total_out) > max_packsize
+ || (pack_size + 60 + s.total_out) < pack_size) {
+
+ /* This new object needs to *not* have the current pack_id. */
+ e->pack_id = pack_id + 1;
+ checkpoint();
+
+ /* We cannot carry a delta into the new pack. */
+ if (delta) {
+ free(delta);
+ delta = NULL;
+ }
+ memset(&s, 0, sizeof(s));
+ deflateInit(&s, zlib_compression_level);
+ s.next_in = dat;
+ s.avail_in = datlen;
+ s.avail_out = deflateBound(&s, s.avail_in);
+ s.next_out = out;
+ while (deflate(&s, Z_FINISH) == Z_OK)
+ /* nothing */;
+ deflateEnd(&s);
+ }
+
+ e->type = type;
+ e->pack_id = pack_id;
+ e->offset = pack_size;
+ object_count++;
+ object_count_by_type[type]++;
if (delta) {
unsigned long ofs = e->offset - last->offset;
delta_count_by_type[type]++;
last->depth++;
- s.next_in = delta;
- s.avail_in = deltalen;
hdrlen = encode_header(OBJ_OFS_DELTA, deltalen, hdr);
write_or_die(pack_fd, hdr, hdrlen);
} else {
if (last)
last->depth = 0;
- s.next_in = dat;
- s.avail_in = datlen;
hdrlen = encode_header(type, datlen, hdr);
write_or_die(pack_fd, hdr, hdrlen);
pack_size += hdrlen;
}
- s.avail_out = deflateBound(&s, s.avail_in);
- s.next_out = out = xmalloc(s.avail_out);
- while (deflate(&s, Z_FINISH) == Z_OK)
- /* nothing */;
- deflateEnd(&s);
-
write_or_die(pack_fd, out, s.total_out);
pack_size += s.total_out;
static void cmd_checkpoint()
{
- if (object_count) {
- end_packfile();
- start_packfile();
- }
+ if (object_count)
+ checkpoint();
read_next_command();
}
break;
else if (!strncmp(a, "--objects=", 10))
est_obj_cnt = strtoul(a + 10, NULL, 0);
+ else if (!strncmp(a, "--max-objects-per-pack=", 23))
+ max_objects = strtoul(a + 23, NULL, 0);
+ else if (!strncmp(a, "--max-pack-size=", 16))
+ max_packsize = strtoul(a + 16, NULL, 0) * 1024 * 1024;
else if (!strncmp(a, "--depth=", 8))
max_depth = strtoul(a + 8, NULL, 0);
else if (!strncmp(a, "--active-branches=", 18))