Add git-index-pack utility

author Sergey Vlasov <vsu@altlinux.ru>

Wed, 12 Oct 2005 19:01:31 +0000 (12:01 -0700)

committer Junio C Hamano <junkio@cox.net>

Thu, 13 Oct 2005 01:32:02 +0000 (18:32 -0700)
author Sergey Vlasov <vsu@altlinux.ru>
Wed, 12 Oct 2005 19:01:31 +0000 (12:01 -0700)
committer Junio C Hamano <junkio@cox.net>
Thu, 13 Oct 2005 01:32:02 +0000 (18:32 -0700)
diff --git a/Documentation/git-index-pack.txt b/Documentation/git-index-pack.txt

new file mode 100644 (file)

index 0000000..71ce557
--- /dev/null
+++ b/Documentation/git-index-pack.txt
@@ -0,0 +1,44 @@
+git-index-pack(1)
+=================
+
+NAME
+----
+git-index-pack - Build pack index file for an existing packed archive
+
+
+SYNOPSIS
+--------
+'git-index-pack' [-o <index-file>] <pack-file>
+
+
+DESCRIPTION
+-----------
+Reads a packed archive (.pack) from the specified file, and
+builds a pack index file (.idx) for it.  The packed archive
+together with the pack index can then be placed in the
+objects/pack/ directory of a git repository.
+
+
+OPTIONS
+-------
+-o <index-file>::
+       Write the generated pack index into the specified
+       file.  Without this option the name of pack index
+       file is constructed from the name of packed archive
+       file by replacing .pack with .idx (and the program
+       fails if the name of packed archive does not end
+       with .pack).
+
+
+Author
+------
+Written by Sergey Vlasov <vsu@altlinux.ru>
+
+Documentation
+-------------
+Documentation by Sergey Vlasov
+
+GIT
+---
+Part of the gitlink:git[7] suite
+
diff --git a/Documentation/git.txt b/Documentation/git.txt

index 243c00a178365ee756de00de1526cabcceebd10e..796c4f61ea232aef526ab1032879cf3431d6324d 100644 (file)
--- a/Documentation/git.txt
+++ b/Documentation/git.txt
@@ -68,6 +68,9 @@ gitlink:git-commit-tree[1]::
  gitlink:git-hash-object[1]::
         Computes the object ID from a file.
  
+gitlink:git-index-pack.html[1]::
+       Build pack index file for an existing packed archive.
+
  gitlink:git-init-db[1]::
         Creates an empty git object database
  
diff --git a/Makefile b/Makefile

index 5e7d0555ea8e63de6599b383c6dc81d879733146..7c8f6474a3a0bc2593f966a406667fdb10d1b129 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -110,7 +110,7 @@ PROGRAMS = \
         git-convert-objects$X git-diff-files$X \
         git-diff-index$X git-diff-stages$X \
         git-diff-tree$X git-fetch-pack$X git-fsck-objects$X \
-       git-hash-object$X git-init-db$X \
+       git-hash-object$X git-index-pack$X git-init-db$X \
         git-local-fetch$X git-ls-files$X git-ls-tree$X git-merge-base$X \
         git-merge-index$X git-mktag$X git-pack-objects$X git-patch-id$X \
         git-peek-remote$X git-prune-packed$X git-read-tree$X \
diff --git a/index-pack.c b/index-pack.c

new file mode 100644 (file)

index 0000000..badbeab
--- /dev/null
+++ b/index-pack.c
@@ -0,0 +1,451 @@
+#include "cache.h"
+#include "delta.h"
+#include "pack.h"
+#include "csum-file.h"
+
+static const char index_pack_usage[] =
+"git-index-pack [-o index-file] pack-file";
+
+struct object_entry
+{
+       unsigned long offset;
+       enum object_type type;
+       enum object_type real_type;
+       unsigned char sha1[20];
+};
+
+struct delta_entry
+{
+       struct object_entry *obj;
+       unsigned char base_sha1[20];
+};
+
+static const char *pack_name;
+static unsigned char *pack_base;
+static unsigned long pack_size;
+static struct object_entry *objects;
+static struct delta_entry *deltas;
+static int nr_objects;
+static int nr_deltas;
+
+static void open_pack_file(void)
+{
+       int fd;
+       struct stat st;
+
+       fd = open(pack_name, O_RDONLY);
+       if (fd < 0)
+               die("cannot open packfile '%s': %s", pack_name,
+                   strerror(errno));
+       if (fstat(fd, &st)) {
+               int err = errno;
+               close(fd);
+               die("cannot fstat packfile '%s': %s", pack_name,
+                   strerror(err));
+       }
+       pack_size = st.st_size;
+       pack_base = mmap(NULL, pack_size, PROT_READ, MAP_PRIVATE, fd, 0);
+       if (pack_base == MAP_FAILED) {
+               int err = errno;
+               close(fd);
+               die("cannot mmap packfile '%s': %s", pack_name,
+                   strerror(err));
+       }
+       close(fd);
+}
+
+static void parse_pack_header(void)
+{
+       const struct pack_header *hdr;
+       unsigned char sha1[20];
+       SHA_CTX ctx;
+
+       /* Ensure there are enough bytes for the header and final SHA1 */
+       if (pack_size < sizeof(struct pack_header) + 20)
+               die("packfile '%s' is too small", pack_name);
+
+       /* Header consistency check */
+       hdr = (void *)pack_base;
+       if (hdr->hdr_signature != htonl(PACK_SIGNATURE))
+               die("packfile '%s' signature mismatch", pack_name);
+       if (hdr->hdr_version != htonl(PACK_VERSION))
+               die("packfile '%s' version %d different from ours %d",
+                   pack_name, ntohl(hdr->hdr_version), PACK_VERSION);
+
+       nr_objects = ntohl(hdr->hdr_entries);
+
+       /* Check packfile integrity */
+       SHA1_Init(&ctx);
+       SHA1_Update(&ctx, pack_base, pack_size - 20);
+       SHA1_Final(sha1, &ctx);
+       if (memcmp(sha1, pack_base + pack_size - 20, 20))
+               die("packfile '%s' SHA1 mismatch", pack_name);
+}
+
+static void bad_object(unsigned long offset, const char *format,
+                      ...) NORETURN __attribute__((format (printf, 2, 3)));
+
+static void bad_object(unsigned long offset, const char *format, ...)
+{
+       va_list params;
+       char buf[1024];
+
+       va_start(params, format);
+       vsnprintf(buf, sizeof(buf), format, params);
+       va_end(params);
+       die("packfile '%s': bad object at offset %lu: %s",
+           pack_name, offset, buf);
+}
+
+static void *unpack_entry_data(unsigned long offset,
+                              unsigned long *current_pos, unsigned long size)
+{
+       unsigned long pack_limit = pack_size - 20;
+       unsigned long pos = *current_pos;
+       z_stream stream;
+       void *buf = xmalloc(size);
+
+       memset(&stream, 0, sizeof(stream));
+       stream.next_out = buf;
+       stream.avail_out = size;
+       stream.next_in = pack_base + pos;
+       stream.avail_in = pack_limit - pos;
+       inflateInit(&stream);
+
+       for (;;) {
+               int ret = inflate(&stream, 0);
+               if (ret == Z_STREAM_END)
+                       break;
+               if (ret != Z_OK)
+                       bad_object(offset, "inflate returned %d", ret);
+       }
+       inflateEnd(&stream);
+       if (stream.total_out != size)
+               bad_object(offset, "size mismatch (expected %lu, got %lu)",
+                          size, stream.total_out);
+       *current_pos = pack_limit - stream.avail_in;
+       return buf;
+}
+
+static void *unpack_raw_entry(unsigned long offset,
+                             enum object_type *obj_type,
+                             unsigned long *obj_size,
+                             unsigned char *delta_base,
+                             unsigned long *next_obj_offset)
+{
+       unsigned long pack_limit = pack_size - 20;
+       unsigned long pos = offset;
+       unsigned char c;
+       unsigned long size;
+       unsigned shift;
+       enum object_type type;
+       void *data;
+
+       c = pack_base[pos++];
+       type = (c >> 4) & 7;
+       size = (c & 15);
+       shift = 4;
+       while (c & 0x80) {
+               if (pos >= pack_limit)
+                       bad_object(offset, "object extends past end of pack");
+               c = pack_base[pos++];
+               size += (c & 0x7fUL) << shift;
+               shift += 7;
+       }
+
+       switch (type) {
+       case OBJ_DELTA:
+               if (pos + 20 >= pack_limit)
+                       bad_object(offset, "object extends past end of pack");
+               memcpy(delta_base, pack_base + pos, 20);
+               pos += 20;
+               /* fallthru */
+       case OBJ_COMMIT:
+       case OBJ_TREE:
+       case OBJ_BLOB:
+       case OBJ_TAG:
+               data = unpack_entry_data(offset, &pos, size);
+               break;
+       default:
+               bad_object(offset, "bad object type %d", type);
+       }
+
+       *obj_type = type;
+       *obj_size = size;
+       *next_obj_offset = pos;
+       return data;
+}
+
+static int find_delta(const unsigned char *base_sha1)
+{
+       int first = 0, last = nr_deltas;
+
+        while (first < last) {
+                int next = (first + last) / 2;
+                struct delta_entry *delta = &deltas[next];
+                int cmp;
+
+                cmp = memcmp(base_sha1, delta->base_sha1, 20);
+                if (!cmp)
+                        return next;
+                if (cmp < 0) {
+                        last = next;
+                        continue;
+                }
+                first = next+1;
+        }
+        return -first-1;
+}
+
+static int find_deltas_based_on_sha1(const unsigned char *base_sha1,
+                                    int *first_index, int *last_index)
+{
+       int first = find_delta(base_sha1);
+       int last = first;
+       int end = nr_deltas - 1;
+
+       if (first < 0)
+               return -1;
+       while (first > 0 && !memcmp(deltas[first-1].base_sha1, base_sha1, 20))
+               --first;
+       while (last < end && !memcmp(deltas[last+1].base_sha1, base_sha1, 20))
+               ++last;
+       *first_index = first;
+       *last_index = last;
+       return 0;
+}
+
+static void sha1_object(const void *data, unsigned long size,
+                       enum object_type type, unsigned char *sha1)
+{
+       SHA_CTX ctx;
+       char header[50];
+       int header_size;
+       const char *type_str;
+
+       switch (type) {
+       case OBJ_COMMIT: type_str = "commit"; break;
+       case OBJ_TREE:   type_str = "tree"; break;
+       case OBJ_BLOB:   type_str = "blob"; break;
+       case OBJ_TAG:    type_str = "tag"; break;
+       default:
+               die("bad type %d", type);
+       }
+
+       header_size = sprintf(header, "%s %lu", type_str, size) + 1;
+
+       SHA1_Init(&ctx);
+       SHA1_Update(&ctx, header, header_size);
+       SHA1_Update(&ctx, data, size);
+       SHA1_Final(sha1, &ctx);
+}
+
+static void resolve_delta(struct delta_entry *delta, void *base_data,
+                         unsigned long base_size, enum object_type type)
+{
+       struct object_entry *obj = delta->obj;
+       void *delta_data;
+       unsigned long delta_size;
+       void *result;
+       unsigned long result_size;
+       enum object_type delta_type;
+       unsigned char base_sha1[20];
+       unsigned long next_obj_offset;
+       int j, first, last;
+
+       obj->real_type = type;
+       delta_data = unpack_raw_entry(obj->offset, &delta_type,
+                                     &delta_size, base_sha1,
+                                     &next_obj_offset);
+       result = patch_delta(base_data, base_size, delta_data, delta_size,
+                            &result_size);
+       free(delta_data);
+       if (!result)
+               bad_object(obj->offset, "failed to apply delta");
+       sha1_object(result, result_size, type, obj->sha1);
+       if (!find_deltas_based_on_sha1(obj->sha1, &first, &last)) {
+               for (j = first; j <= last; j++)
+                       resolve_delta(&deltas[j], result, result_size, type);
+       }
+       free(result);
+}
+
+static int compare_delta_entry(const void *a, const void *b)
+{
+       const struct delta_entry *delta_a = a;
+       const struct delta_entry *delta_b = b;
+       return memcmp(delta_a->base_sha1, delta_b->base_sha1, 20);
+}
+
+static void parse_pack_objects(void)
+{
+       int i;
+       unsigned long offset = sizeof(struct pack_header);
+       unsigned char base_sha1[20];
+       void *data;
+       unsigned long data_size;
+
+       /*
+        * First pass:
+        * - find locations of all objects;
+        * - calculate SHA1 of all non-delta objects;
+        * - remember base SHA1 for all deltas.
+        */
+       for (i = 0; i < nr_objects; i++) {
+               struct object_entry *obj = &objects[i];
+               obj->offset = offset;
+               data = unpack_raw_entry(offset, &obj->type, &data_size,
+                                       base_sha1, &offset);
+               obj->real_type = obj->type;
+               if (obj->type == OBJ_DELTA) {
+                       struct delta_entry *delta = &deltas[nr_deltas++];
+                       delta->obj = obj;
+                       memcpy(delta->base_sha1, base_sha1, 20);
+               } else
+                       sha1_object(data, data_size, obj->type, obj->sha1);
+               free(data);
+       }
+       if (offset != pack_size - 20)
+               die("packfile '%s' has junk at the end", pack_name);
+
+       /* Sort deltas by base SHA1 for fast searching */
+       qsort(deltas, nr_deltas, sizeof(struct delta_entry),
+             compare_delta_entry);
+
+       /*
+        * Second pass:
+        * - for all non-delta objects, look if it is used as a base for
+        *   deltas;
+        * - if used as a base, uncompress the object and apply all deltas,
+        *   recursively checking if the resulting object is used as a base
+        *   for some more deltas.
+        */
+       for (i = 0; i < nr_objects; i++) {
+               struct object_entry *obj = &objects[i];
+               int j, first, last;
+
+               if (obj->type == OBJ_DELTA)
+                       continue;
+               if (find_deltas_based_on_sha1(obj->sha1, &first, &last))
+                       continue;
+               data = unpack_raw_entry(obj->offset, &obj->type, &data_size,
+                                       base_sha1, &offset);
+               for (j = first; j <= last; j++)
+                       resolve_delta(&deltas[j], data, data_size, obj->type);
+               free(data);
+       }
+
+       /* Check for unresolved deltas */
+       for (i = 0; i < nr_deltas; i++) {
+               if (deltas[i].obj->real_type == OBJ_DELTA)
+                       die("packfile '%s' has unresolved deltas",  pack_name);
+       }
+}
+
+static int sha1_compare(const void *_a, const void *_b)
+{
+       struct object_entry *a = *(struct object_entry **)_a;
+       struct object_entry *b = *(struct object_entry **)_b;
+       return memcmp(a->sha1, b->sha1, 20);
+}
+
+static void write_index_file(const char *index_name)
+{
+       struct sha1file *f;
+       struct object_entry **sorted_by_sha =
+               xcalloc(nr_objects, sizeof(struct object_entry *));
+       struct object_entry **list = sorted_by_sha;
+       struct object_entry **last = sorted_by_sha + nr_objects;
+       unsigned int array[256];
+       int i;
+
+       for (i = 0; i < nr_objects; ++i)
+               sorted_by_sha[i] = &objects[i];
+       qsort(sorted_by_sha, nr_objects, sizeof(sorted_by_sha[0]),
+             sha1_compare);
+
+       unlink(index_name);
+       f = sha1create("%s", index_name);
+
+       /*
+        * Write the first-level table (the list is sorted,
+        * but we use a 256-entry lookup to be able to avoid
+        * having to do eight extra binary search iterations).
+        */
+       for (i = 0; i < 256; i++) {
+               struct object_entry **next = list;
+               while (next < last) {
+                       struct object_entry *obj = *next;
+                       if (obj->sha1[0] != i)
+                               break;
+                       next++;
+               }
+               array[i] = htonl(next - sorted_by_sha);
+               list = next;
+       }
+       sha1write(f, array, 256 * sizeof(int));
+
+       /*
+        * Write the actual SHA1 entries..
+        */
+       list = sorted_by_sha;
+       for (i = 0; i < nr_objects; i++) {
+               struct object_entry *obj = *list++;
+               unsigned int offset = htonl(obj->offset);
+               sha1write(f, &offset, 4);
+               sha1write(f, obj->sha1, 20);
+       }
+       sha1write(f, pack_base + pack_size - 20, 20);
+       sha1close(f, NULL, 1);
+       free(sorted_by_sha);
+}
+
+int main(int argc, char **argv)
+{
+       int i;
+       char *index_name = NULL;
+       char *index_name_buf = NULL;
+
+       for (i = 1; i < argc; i++) {
+               const char *arg = argv[i];
+
+               if (*arg == '-') {
+                       if (!strcmp(arg, "-o")) {
+                               if (index_name || (i+1) >= argc)
+                                       usage(index_pack_usage);
+                               index_name = argv[++i];
+                       } else
+                               usage(index_pack_usage);
+                       continue;
+               }
+
+               if (pack_name)
+                       usage(index_pack_usage);
+               pack_name = arg;
+       }
+
+       if (!pack_name)
+               usage(index_pack_usage);
+       if (!index_name) {
+               int len = strlen(pack_name);
+               if (len < 5 || strcmp(pack_name + len - 5, ".pack"))
+                       die("packfile name '%s' does not end with '.pack'",
+                           pack_name);
+               index_name_buf = xmalloc(len - 1);
+               memcpy(index_name_buf, pack_name, len - 5);
+               strcpy(index_name_buf + len - 5, ".idx");
+               index_name = index_name_buf;
+       }
+
+       open_pack_file();
+       parse_pack_header();
+       objects = xcalloc(nr_objects, sizeof(struct object_entry));
+       deltas = xcalloc(nr_objects, sizeof(struct delta_entry));
+       parse_pack_objects();
+       free(deltas);
+       write_index_file(index_name);
+       free(objects);
+       free(index_name_buf);
+
+       return 0;
+}
diff --git a/t/t5300-pack-object.sh b/t/t5300-pack-object.sh

index bb62336f267086cc85bf0c6d5133560c581f5973..96db98b65cf07e978fb739466c816336e947c689 100755 (executable)
--- a/t/t5300-pack-object.sh
+++ b/t/t5300-pack-object.sh
@@ -165,4 +165,22 @@ test_expect_success \
  
       :'
  
+test_expect_success \
+    'build pack index for an existing pack' \
+    'cp test-1-${packname_1}.pack test-3.pack &&
+     git-index-pack -o tmp.idx test-3.pack &&
+     cmp tmp.idx test-1-${packname_1}.idx &&
+
+     git-index-pack test-3.pack &&
+     cmp test-3.idx test-1-${packname_1}.idx &&
+
+     cp test-2-${packname_2}.pack test-3.pack &&
+     git-index-pack -o tmp.idx test-2-${packname_2}.pack &&
+     cmp tmp.idx test-2-${packname_2}.idx &&
+
+     git-index-pack test-3.pack &&
+     cmp test-3.idx test-2-${packname_2}.idx &&
+
+     :'
+
  test_done
author	Sergey Vlasov <vsu@altlinux.ru>
	Wed, 12 Oct 2005 19:01:31 +0000 (12:01 -0700)
committer	Junio C Hamano <junkio@cox.net>
	Thu, 13 Oct 2005 01:32:02 +0000 (18:32 -0700)
Documentation/git-index-pack.txt	[new file with mode: 0644]	patch \| blob
Documentation/git.txt		patch \| blob \| history
Makefile		patch \| blob \| history
index-pack.c	[new file with mode: 0644]	patch \| blob
t/t5300-pack-object.sh		patch \| blob \| history