X-Git-Url: https://git.tokkee.org/?a=blobdiff_plain;f=index-pack.c;h=58c4a9c41dd7a05b86d40e6eeee33ba0a3fb6c4f;hb=ed82edc402c271a707da632083f1f4c19155d573;hp=43e007f8238006313e95b7d386a590ce51144dfa;hpb=85023577a8f4b540aa64aa37f6f44578c0c305a3;p=git.git diff --git a/index-pack.c b/index-pack.c index 43e007f82..58c4a9c41 100644 --- a/index-pack.c +++ b/index-pack.c @@ -6,15 +6,17 @@ #include "commit.h" #include "tag.h" #include "tree.h" +#include "progress.h" static const char index_pack_usage[] = "git-index-pack [-v] [-o ] [{ ---keep | --keep= }] { | --stdin [--fix-thin] [] }"; struct object_entry { - unsigned long offset; + off_t offset; unsigned long size; unsigned int hdr_size; + uint32_t crc32; enum object_type type; enum object_type real_type; unsigned char sha1[20]; @@ -22,7 +24,7 @@ struct object_entry union delta_base { unsigned char sha1[20]; - unsigned long offset; + off_t offset; }; /* @@ -46,45 +48,14 @@ static int nr_resolved_deltas; static int from_stdin; static int verbose; -static volatile sig_atomic_t progress_update; - -static void progress_interval(int signum) -{ - progress_update = 1; -} - -static void setup_progress_signal(void) -{ - struct sigaction sa; - struct itimerval v; - - memset(&sa, 0, sizeof(sa)); - sa.sa_handler = progress_interval; - sigemptyset(&sa.sa_mask); - sa.sa_flags = SA_RESTART; - sigaction(SIGALRM, &sa, NULL); - - v.it_interval.tv_sec = 1; - v.it_interval.tv_usec = 0; - v.it_value = v.it_interval; - setitimer(ITIMER_REAL, &v, NULL); - -} - -static unsigned display_progress(unsigned n, unsigned total, unsigned last_pc) -{ - unsigned percent = n * 100 / total; - if (percent != last_pc || progress_update) { - fprintf(stderr, "%4u%% (%u/%u) done\r", percent, n, total); - progress_update = 0; - } - return percent; -} +static struct progress progress; /* We always read in 4kB chunks. */ static unsigned char input_buffer[4096]; -static unsigned long input_offset, input_len, consumed_bytes; +static unsigned int input_offset, input_len; +static off_t consumed_bytes; static SHA_CTX input_ctx; +static uint32_t input_crc32; static int input_fd, output_fd, pack_fd; /* Discard current buffer used content. */ @@ -111,7 +82,7 @@ static void *fill(int min) die("cannot fill %d bytes", min); flush(); do { - int ret = xread(input_fd, input_buffer + input_len, + ssize_t ret = xread(input_fd, input_buffer + input_len, sizeof(input_buffer) - input_len); if (ret <= 0) { if (!ret) @@ -127,8 +98,13 @@ static void use(int bytes) { if (bytes > input_len) die("used more bytes than were available"); + input_crc32 = crc32(input_crc32, input_buffer + input_offset, bytes); input_len -= bytes; input_offset += bytes; + + /* make sure off_t is sufficiently large not to wrap */ + if (consumed_bytes > consumed_bytes + bytes) + die("pack too large for current definition of off_t"); consumed_bytes += bytes; } @@ -139,7 +115,7 @@ static const char *open_pack_file(const char *pack_name) if (!pack_name) { static char tmpfile[PATH_MAX]; snprintf(tmpfile, sizeof(tmpfile), - "%s/pack_XXXXXX", get_object_directory()); + "%s/tmp_pack_XXXXXX", get_object_directory()); output_fd = mkstemp(tmpfile); pack_name = xstrdup(tmpfile); } else @@ -216,10 +192,13 @@ static void *unpack_entry_data(unsigned long offset, unsigned long size) static void *unpack_raw_entry(struct object_entry *obj, union delta_base *delta_base) { unsigned char *p, c; - unsigned long size, base_offset; + unsigned long size; + off_t base_offset; unsigned shift; + void *data; obj->offset = consumed_bytes; + input_crc32 = crc32(0, Z_NULL, 0); p = fill(1); c = *p; @@ -249,7 +228,7 @@ static void *unpack_raw_entry(struct object_entry *obj, union delta_base *delta_ base_offset = c & 127; while (c & 128) { base_offset += 1; - if (!base_offset || base_offset & ~(~0UL >> 7)) + if (!base_offset || MSB(base_offset, 7)) bad_object(obj->offset, "offset value overflow for delta base object"); p = fill(1); c = *p; @@ -266,24 +245,32 @@ static void *unpack_raw_entry(struct object_entry *obj, union delta_base *delta_ case OBJ_TAG: break; default: - bad_object(obj->offset, "bad object type %d", obj->type); + bad_object(obj->offset, "unknown object type %d", obj->type); } obj->hdr_size = consumed_bytes - obj->offset; - return unpack_entry_data(obj->offset, obj->size); + data = unpack_entry_data(obj->offset, obj->size); + obj->crc32 = input_crc32; + return data; } static void *get_data_from_pack(struct object_entry *obj) { unsigned long from = obj[0].offset + obj[0].hdr_size; unsigned long len = obj[1].offset - from; + unsigned long rdy = 0; unsigned char *src, *data; z_stream stream; int st; src = xmalloc(len); - if (pread(pack_fd, src, len, from) != len) - die("cannot pread pack file: %s", strerror(errno)); + data = src; + do { + ssize_t n = pread(pack_fd, data + rdy, len - rdy, from + rdy); + if (n <= 0) + die("cannot pread pack file: %s", strerror(errno)); + rdy += n; + } while (rdy < len); data = xmalloc(obj->size); memset(&stream, 0, sizeof(stream)); stream.next_out = data; @@ -341,26 +328,19 @@ static int find_delta_children(const union delta_base *base, static void sha1_object(const void *data, unsigned long size, enum object_type type, unsigned char *sha1) { - SHA_CTX ctx; - char header[50]; - int header_size; - const char *type_str; - - switch (type) { - case OBJ_COMMIT: type_str = commit_type; break; - case OBJ_TREE: type_str = tree_type; break; - case OBJ_BLOB: type_str = blob_type; break; - case OBJ_TAG: type_str = tag_type; break; - default: - die("bad type %d", type); + hash_sha1_file(data, size, typename(type), sha1); + if (has_sha1_file(sha1)) { + void *has_data; + enum object_type has_type; + unsigned long has_size; + has_data = read_sha1_file(sha1, &has_type, &has_size); + if (!has_data) + die("cannot read existing object %s", sha1_to_hex(sha1)); + if (size != has_size || type != has_type || + memcmp(data, has_data, size) != 0) + die("SHA1 COLLISION FOUND WITH %s !", sha1_to_hex(sha1)); + free(has_data); } - - header_size = sprintf(header, "%s %lu", type_str, size) + 1; - - SHA1_Init(&ctx); - SHA1_Update(&ctx, header, header_size); - SHA1_Update(&ctx, data, size); - SHA1_Final(sha1, &ctx); } static void resolve_delta(struct object_entry *delta_obj, void *base_data, @@ -416,7 +396,7 @@ static int compare_delta_entry(const void *a, const void *b) /* Parse all objects and return the pack content SHA1 hash */ static void parse_pack_objects(unsigned char *sha1) { - int i, percent = -1; + int i; struct delta_entry *delta = deltas; void *data; struct stat st; @@ -428,7 +408,7 @@ static void parse_pack_objects(unsigned char *sha1) * - remember base (SHA1 or offset) for all deltas. */ if (verbose) - fprintf(stderr, "Indexing %d objects.\n", nr_objects); + start_progress(&progress, "Indexing %u objects...", "", nr_objects); for (i = 0; i < nr_objects; i++) { struct object_entry *obj = &objects[i]; data = unpack_raw_entry(obj, &delta->base); @@ -441,11 +421,11 @@ static void parse_pack_objects(unsigned char *sha1) sha1_object(data, obj->size, obj->type, obj->sha1); free(data); if (verbose) - percent = display_progress(i+1, nr_objects, percent); + display_progress(&progress, i+1); } objects[i].offset = consumed_bytes; if (verbose) - fputc('\n', stderr); + stop_progress(&progress); /* Check pack integrity */ flush(); @@ -457,7 +437,8 @@ static void parse_pack_objects(unsigned char *sha1) /* If input_fd is a file, we should have reached its end now. */ if (fstat(input_fd, &st)) die("cannot fstat packfile: %s", strerror(errno)); - if (S_ISREG(st.st_mode) && st.st_size != consumed_bytes) + if (S_ISREG(st.st_mode) && + lseek(input_fd, 0, SEEK_CUR) - input_len != st.st_size) die("pack has junk at the end"); if (!nr_deltas) @@ -476,7 +457,7 @@ static void parse_pack_objects(unsigned char *sha1) * for some more deltas. */ if (verbose) - fprintf(stderr, "Resolving %d deltas.\n", nr_deltas); + start_progress(&progress, "Resolving %u deltas...", "", nr_deltas); for (i = 0; i < nr_objects; i++) { struct object_entry *obj = &objects[i]; union delta_base base; @@ -508,14 +489,11 @@ static void parse_pack_objects(unsigned char *sha1) } free(data); if (verbose) - percent = display_progress(nr_resolved_deltas, - nr_deltas, percent); + display_progress(&progress, nr_resolved_deltas); } - if (verbose && nr_resolved_deltas == nr_deltas) - fputc('\n', stderr); } -static int write_compressed(int fd, void *in, unsigned int size) +static int write_compressed(int fd, void *in, unsigned int size, uint32_t *obj_crc) { z_stream stream; unsigned long maxsize; @@ -536,11 +514,12 @@ static int write_compressed(int fd, void *in, unsigned int size) size = stream.total_out; write_or_die(fd, out, size); + *obj_crc = crc32(*obj_crc, out, size); free(out); return size; } -static void append_obj_to_pack(void *buf, +static void append_obj_to_pack(const unsigned char *sha1, void *buf, unsigned long size, enum object_type type) { struct object_entry *obj = &objects[nr_objects++]; @@ -556,9 +535,11 @@ static void append_obj_to_pack(void *buf, } header[n++] = c; write_or_die(output_fd, header, n); + obj[0].crc32 = crc32(0, Z_NULL, 0); + obj[0].crc32 = crc32(obj[0].crc32, header, n); obj[1].offset = obj[0].offset + n; - obj[1].offset += write_compressed(output_fd, buf, size); - sha1_object(buf, size, type, obj->sha1); + obj[1].offset += write_compressed(output_fd, buf, size, &obj[0].crc32); + hashcpy(obj->sha1, sha1); } static int delta_pos_compare(const void *_a, const void *_b) @@ -571,7 +552,7 @@ static int delta_pos_compare(const void *_a, const void *_b) static void fix_unresolved_deltas(int nr_unresolved) { struct delta_entry **sorted_by_pos; - int i, n = 0, percent = -1; + int i, n = 0; /* * Since many unresolved deltas may well be themselves base objects @@ -595,70 +576,34 @@ static void fix_unresolved_deltas(int nr_unresolved) struct delta_entry *d = sorted_by_pos[i]; void *data; unsigned long size; - char type[10]; - enum object_type obj_type; + enum object_type type; int j, first, last; if (objects[d->obj_no].real_type != OBJ_REF_DELTA) continue; - data = read_sha1_file(d->base.sha1, type, &size); + data = read_sha1_file(d->base.sha1, &type, &size); if (!data) continue; - if (!strcmp(type, blob_type)) obj_type = OBJ_BLOB; - else if (!strcmp(type, tree_type)) obj_type = OBJ_TREE; - else if (!strcmp(type, commit_type)) obj_type = OBJ_COMMIT; - else if (!strcmp(type, tag_type)) obj_type = OBJ_TAG; - else die("base object %s is of type '%s'", - sha1_to_hex(d->base.sha1), type); find_delta_children(&d->base, &first, &last); for (j = first; j <= last; j++) { struct object_entry *child = objects + deltas[j].obj_no; if (child->real_type == OBJ_REF_DELTA) - resolve_delta(child, data, size, obj_type); + resolve_delta(child, data, size, type); } - append_obj_to_pack(data, size, obj_type); + if (check_sha1_signature(d->base.sha1, data, size, typename(type))) + die("local object %s is corrupt", sha1_to_hex(d->base.sha1)); + append_obj_to_pack(d->base.sha1, data, size, type); free(data); if (verbose) - percent = display_progress(nr_resolved_deltas, - nr_deltas, percent); + display_progress(&progress, nr_resolved_deltas); } free(sorted_by_pos); - if (verbose) - fputc('\n', stderr); } -static void readjust_pack_header_and_sha1(unsigned char *sha1) -{ - struct pack_header hdr; - SHA_CTX ctx; - int size; - - /* Rewrite pack header with updated object number */ - if (lseek(output_fd, 0, SEEK_SET) != 0) - die("cannot seek back: %s", strerror(errno)); - if (xread(output_fd, &hdr, sizeof(hdr)) != sizeof(hdr)) - die("cannot read pack header back: %s", strerror(errno)); - hdr.hdr_entries = htonl(nr_objects); - if (lseek(output_fd, 0, SEEK_SET) != 0) - die("cannot seek back: %s", strerror(errno)); - write_or_die(output_fd, &hdr, sizeof(hdr)); - if (lseek(output_fd, 0, SEEK_SET) != 0) - die("cannot seek back: %s", strerror(errno)); - - /* Recompute and store the new pack's SHA1 */ - SHA1_Init(&ctx); - do { - unsigned char *buf[4096]; - size = xread(output_fd, buf, sizeof(buf)); - if (size < 0) - die("cannot read pack data back: %s", strerror(errno)); - SHA1_Update(&ctx, buf, size); - } while (size > 0); - SHA1_Final(sha1, &ctx); - write_or_die(output_fd, sha1, 20); -} +static uint32_t index_default_version = 1; +static uint32_t index_off32_limit = 0x7fffffff; static int sha1_compare(const void *_a, const void *_b) { @@ -675,9 +620,10 @@ static const char *write_index_file(const char *index_name, unsigned char *sha1) { struct sha1file *f; struct object_entry **sorted_by_sha, **list, **last; - unsigned int array[256]; + uint32_t array[256]; int i, fd; SHA_CTX ctx; + uint32_t index_version; if (nr_objects) { sorted_by_sha = @@ -688,7 +634,6 @@ static const char *write_index_file(const char *index_name, unsigned char *sha1) sorted_by_sha[i] = &objects[i]; qsort(sorted_by_sha, nr_objects, sizeof(sorted_by_sha[0]), sha1_compare); - } else sorted_by_sha = list = last = NULL; @@ -696,7 +641,7 @@ static const char *write_index_file(const char *index_name, unsigned char *sha1) if (!index_name) { static char tmpfile[PATH_MAX]; snprintf(tmpfile, sizeof(tmpfile), - "%s/index_XXXXXX", get_object_directory()); + "%s/tmp_idx_XXXXXX", get_object_directory()); fd = mkstemp(tmpfile); index_name = xstrdup(tmpfile); } else { @@ -707,6 +652,17 @@ static const char *write_index_file(const char *index_name, unsigned char *sha1) die("unable to create %s: %s", index_name, strerror(errno)); f = sha1fd(fd, index_name); + /* if last object's offset is >= 2^31 we should use index V2 */ + index_version = (objects[nr_objects-1].offset >> 31) ? 2 : index_default_version; + + /* index versions 2 and above need a header */ + if (index_version >= 2) { + struct pack_idx_header hdr; + hdr.idx_signature = htonl(PACK_IDX_SIGNATURE); + hdr.idx_version = htonl(index_version); + sha1write(f, &hdr, sizeof(hdr)); + } + /* * Write the first-level table (the list is sorted, * but we use a 256-entry lookup to be able to avoid @@ -723,24 +679,61 @@ static const char *write_index_file(const char *index_name, unsigned char *sha1) array[i] = htonl(next - sorted_by_sha); list = next; } - sha1write(f, array, 256 * sizeof(int)); + sha1write(f, array, 256 * 4); - /* recompute the SHA1 hash of sorted object names. - * currently pack-objects does not do this, but that - * can be fixed. - */ + /* compute the SHA1 hash of sorted object names. */ SHA1_Init(&ctx); + /* * Write the actual SHA1 entries.. */ list = sorted_by_sha; for (i = 0; i < nr_objects; i++) { struct object_entry *obj = *list++; - unsigned int offset = htonl(obj->offset); - sha1write(f, &offset, 4); + if (index_version < 2) { + uint32_t offset = htonl(obj->offset); + sha1write(f, &offset, 4); + } sha1write(f, obj->sha1, 20); SHA1_Update(&ctx, obj->sha1, 20); } + + if (index_version >= 2) { + unsigned int nr_large_offset = 0; + + /* write the crc32 table */ + list = sorted_by_sha; + for (i = 0; i < nr_objects; i++) { + struct object_entry *obj = *list++; + uint32_t crc32_val = htonl(obj->crc32); + sha1write(f, &crc32_val, 4); + } + + /* write the 32-bit offset table */ + list = sorted_by_sha; + for (i = 0; i < nr_objects; i++) { + struct object_entry *obj = *list++; + uint32_t offset = (obj->offset <= index_off32_limit) ? + obj->offset : (0x80000000 | nr_large_offset++); + offset = htonl(offset); + sha1write(f, &offset, 4); + } + + /* write the large offset table */ + list = sorted_by_sha; + while (nr_large_offset) { + struct object_entry *obj = *list++; + uint64_t offset = obj->offset; + if (offset > index_off32_limit) { + uint32_t split[2]; + split[0] = htonl(offset >> 32); + split[1] = htonl(offset & 0xffffffff); + sha1write(f, split, 8); + nr_large_offset--; + } + } + } + sha1write(f, sha1, 20); sha1close(f, NULL, 1); free(sorted_by_sha); @@ -753,7 +746,7 @@ static void final(const char *final_pack_name, const char *curr_pack_name, const char *keep_name, const char *keep_msg, unsigned char *sha1) { - char *report = "pack"; + const char *report = "pack"; char name[PATH_MAX]; int err; @@ -814,7 +807,7 @@ static void final(const char *final_pack_name, const char *curr_pack_name, char buf[48]; int len = snprintf(buf, sizeof(buf), "%s\t%s\n", report, sha1_to_hex(sha1)); - xwrite(1, buf, len); + write_or_die(1, buf, len); /* * Let's just mimic git-unpack-objects here and write @@ -849,9 +842,9 @@ int main(int argc, char **argv) fix_thin_pack = 1; } else if (!strcmp(arg, "--keep")) { keep_msg = ""; - } else if (!strncmp(arg, "--keep=", 7)) { + } else if (!prefixcmp(arg, "--keep=")) { keep_msg = arg + 7; - } else if (!strncmp(arg, "--pack_header=", 14)) { + } else if (!prefixcmp(arg, "--pack_header=")) { struct pack_header *hdr; char *c; @@ -870,6 +863,15 @@ int main(int argc, char **argv) if (index_name || (i+1) >= argc) usage(index_pack_usage); index_name = argv[++i]; + } else if (!prefixcmp(arg, "--index-version=")) { + char *c; + index_default_version = strtoul(arg + 16, &c, 10); + if (index_default_version > 2) + die("bad %s", arg); + if (*c == ',') + index_off32_limit = strtoul(c+1, &c, 0); + if (*c || index_off32_limit & 0x80000000) + die("bad %s", arg); } else usage(index_pack_usage); continue; @@ -909,10 +911,13 @@ int main(int argc, char **argv) parse_pack_header(); objects = xmalloc((nr_objects + 1) * sizeof(struct object_entry)); deltas = xmalloc(nr_objects * sizeof(struct delta_entry)); - if (verbose) - setup_progress_signal(); parse_pack_objects(sha1); - if (nr_deltas != nr_resolved_deltas) { + if (nr_deltas == nr_resolved_deltas) { + if (verbose) + stop_progress(&progress); + /* Flush remaining pack final 20-byte SHA1. */ + flush(); + } else { if (fix_thin_pack) { int nr_unresolved = nr_deltas - nr_resolved_deltas; int nr_objects_initial = nr_objects; @@ -922,17 +927,17 @@ int main(int argc, char **argv) (nr_objects + nr_unresolved + 1) * sizeof(*objects)); fix_unresolved_deltas(nr_unresolved); - if (verbose) + if (verbose) { + stop_progress(&progress); fprintf(stderr, "%d objects were added to complete this thin pack.\n", nr_objects - nr_objects_initial); - readjust_pack_header_and_sha1(sha1); + } + fixup_pack_header_footer(output_fd, sha1, + curr_pack, nr_objects); } if (nr_deltas != nr_resolved_deltas) die("pack has %d unresolved deltas", nr_deltas - nr_resolved_deltas); - } else { - /* Flush remaining pack final 20-byte SHA1. */ - flush(); } free(deltas); curr_index = write_index_file(index_name, sha1);