Code

Merge branch 'np/verify-pack'
authorJunio C Hamano <gitster@pobox.com>
Mon, 3 Mar 2008 00:07:30 +0000 (16:07 -0800)
committerJunio C Hamano <gitster@pobox.com>
Mon, 3 Mar 2008 00:07:30 +0000 (16:07 -0800)
* np/verify-pack:
  add storage size output to 'git verify-pack -v'
  fix unimplemented packed_object_info_detail() features
  make verify_one_pack() a bit less wrong wrt packed_git structure
  factorize revindex code out of builtin-pack-objects.c

Conflicts:

Makefile

Documentation/git-verify-pack.txt
Makefile
builtin-pack-objects.c
builtin-verify-pack.c
contrib/stats/packinfo.pl
pack-check.c
pack-revindex.c [new file with mode: 0644]
pack-revindex.h [new file with mode: 0644]
sha1_file.c

index db019a2b8d1bb283789726bf47ee0f7b9e4a523e..ba2a157299566b4dcc88f8ecb6f0eaed755f95bb 100644 (file)
@@ -32,11 +32,11 @@ OUTPUT FORMAT
 -------------
 When specifying the -v option the format used is:
 
-       SHA1 type size offset-in-packfile
+       SHA1 type size size-in-pack-file offset-in-packfile
 
 for objects that are not deltified in the pack, and
 
-       SHA1 type size offset-in-packfile depth base-SHA1
+       SHA1 type size size-in-packfile offset-in-packfile depth base-SHA1
 
 for objects that are deltified.
 
index 71f01d16b1a42d366b04f1fce621db4dac6b1976..ca5aad963ca7b5565338f49ce1705c451216c8be 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -304,7 +304,8 @@ LIB_H = \
        run-command.h strbuf.h tag.h tree.h git-compat-util.h revision.h \
        tree-walk.h log-tree.h dir.h path-list.h unpack-trees.h builtin.h \
        utf8.h reflog-walk.h patch-ids.h attr.h decorate.h progress.h \
-       mailmap.h remote.h parse-options.h transport.h diffcore.h hash.h fsck.h
+       mailmap.h remote.h parse-options.h transport.h diffcore.h hash.h fsck.h \
+       pack-revindex.h
 
 DIFF_OBJS = \
        diff.o diff-lib.o diffcore-break.o diffcore-order.o \
@@ -328,7 +329,7 @@ LIB_OBJS = \
        color.o wt-status.o archive-zip.o archive-tar.o shallow.o utf8.o \
        convert.o attr.o decorate.o progress.o mailmap.o symlinks.o remote.o \
        transport.o bundle.o walker.o parse-options.o ws.o archive.o branch.o \
-       alias.o fsck.o
+       alias.o fsck.o pack-revindex.o
 
 BUILTIN_OBJS = \
        builtin-add.o \
index 6c8b662e7835501f7540c8323596d4e04e0b90d1..2799e6833849eea9a4cb35213c1115c44bf702ed 100644 (file)
@@ -8,6 +8,7 @@
 #include "tree.h"
 #include "delta.h"
 #include "pack.h"
+#include "pack-revindex.h"
 #include "csum-file.h"
 #include "tree-walk.h"
 #include "diff.h"
@@ -92,158 +93,12 @@ static unsigned long window_memory_limit = 0;
 static int *object_ix;
 static int object_ix_hashsz;
 
-/*
- * Pack index for existing packs give us easy access to the offsets into
- * corresponding pack file where each object's data starts, but the entries
- * do not store the size of the compressed representation (uncompressed
- * size is easily available by examining the pack entry header).  It is
- * also rather expensive to find the sha1 for an object given its offset.
- *
- * We build a hashtable of existing packs (pack_revindex), and keep reverse
- * index here -- pack index file is sorted by object name mapping to offset;
- * this pack_revindex[].revindex array is a list of offset/index_nr pairs
- * ordered by offset, so if you know the offset of an object, next offset
- * is where its packed representation ends and the index_nr can be used to
- * get the object sha1 from the main index.
- */
-struct revindex_entry {
-       off_t offset;
-       unsigned int nr;
-};
-struct pack_revindex {
-       struct packed_git *p;
-       struct revindex_entry *revindex;
-};
-static struct  pack_revindex *pack_revindex;
-static int pack_revindex_hashsz;
-
 /*
  * stats
  */
 static uint32_t written, written_delta;
 static uint32_t reused, reused_delta;
 
-static int pack_revindex_ix(struct packed_git *p)
-{
-       unsigned long ui = (unsigned long)p;
-       int i;
-
-       ui = ui ^ (ui >> 16); /* defeat structure alignment */
-       i = (int)(ui % pack_revindex_hashsz);
-       while (pack_revindex[i].p) {
-               if (pack_revindex[i].p == p)
-                       return i;
-               if (++i == pack_revindex_hashsz)
-                       i = 0;
-       }
-       return -1 - i;
-}
-
-static void prepare_pack_ix(void)
-{
-       int num;
-       struct packed_git *p;
-       for (num = 0, p = packed_git; p; p = p->next)
-               num++;
-       if (!num)
-               return;
-       pack_revindex_hashsz = num * 11;
-       pack_revindex = xcalloc(sizeof(*pack_revindex), pack_revindex_hashsz);
-       for (p = packed_git; p; p = p->next) {
-               num = pack_revindex_ix(p);
-               num = - 1 - num;
-               pack_revindex[num].p = p;
-       }
-       /* revindex elements are lazily initialized */
-}
-
-static int cmp_offset(const void *a_, const void *b_)
-{
-       const struct revindex_entry *a = a_;
-       const struct revindex_entry *b = b_;
-       return (a->offset < b->offset) ? -1 : (a->offset > b->offset) ? 1 : 0;
-}
-
-/*
- * Ordered list of offsets of objects in the pack.
- */
-static void prepare_pack_revindex(struct pack_revindex *rix)
-{
-       struct packed_git *p = rix->p;
-       int num_ent = p->num_objects;
-       int i;
-       const char *index = p->index_data;
-
-       rix->revindex = xmalloc(sizeof(*rix->revindex) * (num_ent + 1));
-       index += 4 * 256;
-
-       if (p->index_version > 1) {
-               const uint32_t *off_32 =
-                       (uint32_t *)(index + 8 + p->num_objects * (20 + 4));
-               const uint32_t *off_64 = off_32 + p->num_objects;
-               for (i = 0; i < num_ent; i++) {
-                       uint32_t off = ntohl(*off_32++);
-                       if (!(off & 0x80000000)) {
-                               rix->revindex[i].offset = off;
-                       } else {
-                               rix->revindex[i].offset =
-                                       ((uint64_t)ntohl(*off_64++)) << 32;
-                               rix->revindex[i].offset |=
-                                       ntohl(*off_64++);
-                       }
-                       rix->revindex[i].nr = i;
-               }
-       } else {
-               for (i = 0; i < num_ent; i++) {
-                       uint32_t hl = *((uint32_t *)(index + 24 * i));
-                       rix->revindex[i].offset = ntohl(hl);
-                       rix->revindex[i].nr = i;
-               }
-       }
-
-       /* This knows the pack format -- the 20-byte trailer
-        * follows immediately after the last object data.
-        */
-       rix->revindex[num_ent].offset = p->pack_size - 20;
-       rix->revindex[num_ent].nr = -1;
-       qsort(rix->revindex, num_ent, sizeof(*rix->revindex), cmp_offset);
-}
-
-static struct revindex_entry * find_packed_object(struct packed_git *p,
-                                                 off_t ofs)
-{
-       int num;
-       int lo, hi;
-       struct pack_revindex *rix;
-       struct revindex_entry *revindex;
-       num = pack_revindex_ix(p);
-       if (num < 0)
-               die("internal error: pack revindex uninitialized");
-       rix = &pack_revindex[num];
-       if (!rix->revindex)
-               prepare_pack_revindex(rix);
-       revindex = rix->revindex;
-       lo = 0;
-       hi = p->num_objects + 1;
-       do {
-               int mi = (lo + hi) / 2;
-               if (revindex[mi].offset == ofs) {
-                       return revindex + mi;
-               }
-               else if (ofs < revindex[mi].offset)
-                       hi = mi;
-               else
-                       lo = mi + 1;
-       } while (lo < hi);
-       die("internal error: pack revindex corrupt");
-}
-
-static const unsigned char *find_packed_object_name(struct packed_git *p,
-                                                   off_t ofs)
-{
-       struct revindex_entry *entry = find_packed_object(p, ofs);
-       return nth_packed_object_sha1(p, entry->nr);
-}
 
 static void *delta_against(void *buf, unsigned long size, struct object_entry *entry)
 {
@@ -510,7 +365,7 @@ static unsigned long write_object(struct sha1file *f,
                }
                hdrlen = encode_header(obj_type, entry->size, header);
                offset = entry->in_pack_offset;
-               revidx = find_packed_object(p, offset);
+               revidx = find_pack_revindex(p, offset);
                datalen = revidx[1].offset - offset;
                if (!pack_to_stdout && p->index_version > 1 &&
                    check_pack_crc(p, &w_curs, offset, datalen, revidx->nr))
@@ -1162,8 +1017,11 @@ static void check_object(struct object_entry *entry)
                                die("delta base offset out of bound for %s",
                                    sha1_to_hex(entry->idx.sha1));
                        ofs = entry->in_pack_offset - ofs;
-                       if (!no_reuse_delta && !entry->preferred_base)
-                               base_ref = find_packed_object_name(p, ofs);
+                       if (!no_reuse_delta && !entry->preferred_base) {
+                               struct revindex_entry *revidx;
+                               revidx = find_pack_revindex(p, ofs);
+                               base_ref = nth_packed_object_sha1(p, revidx->nr);
+                       }
                        entry->in_pack_header_size = used + used_0;
                        break;
                }
@@ -1240,9 +1098,11 @@ static void get_object_details(void)
                sorted_by_offset[i] = objects + i;
        qsort(sorted_by_offset, nr_objects, sizeof(*sorted_by_offset), pack_offset_sort);
 
-       prepare_pack_ix();
+       init_pack_revindex();
+
        for (i = 0; i < nr_objects; i++)
                check_object(sorted_by_offset[i]);
+
        free(sorted_by_offset);
 }
 
index 4e31c273f48e3983aaf99dc6525982d34b6fed06..4958bbbf11f5f796feedfa7480b827029f912d01 100644 (file)
@@ -40,8 +40,8 @@ static int verify_one_pack(const char *path, int verbose)
        if (!pack)
                return error("packfile %s not found.", arg);
 
+       install_packed_git(pack);
        err = verify_pack(pack, verbose);
-       free(pack);
 
        return err;
 }
index aab501ea08129cc3b8304fb8f76f206578273f85..f4a7b62cd9f1a397118b95792c04c2f70f910f9e 100755 (executable)
@@ -93,7 +93,7 @@ my %depths;
 my @depths;
 
 while (<STDIN>) {
-    my ($sha1, $type, $size, $offset, $depth, $parent) = split(/\s+/, $_);
+    my ($sha1, $type, $size, $space, $offset, $depth, $parent) = split(/\s+/, $_);
     next unless ($sha1 =~ /^[0-9a-f]{40}$/);
     $depths{$sha1} = $depth || 0;
     push(@depths, $depth || 0);
index d7dd62bb8346c4cac8dbd7334e999a450c21c5ab..0f8ad2c00f21d9a0a64b541ed95fff91650d7175 100644 (file)
@@ -1,5 +1,6 @@
 #include "cache.h"
 #include "pack.h"
+#include "pack-revindex.h"
 
 struct idx_entry
 {
@@ -101,8 +102,10 @@ static int verify_packfile(struct packed_git *p,
 static void show_pack_info(struct packed_git *p)
 {
        uint32_t nr_objects, i, chain_histogram[MAX_CHAIN+1];
+
        nr_objects = p->num_objects;
        memset(chain_histogram, 0, sizeof(chain_histogram));
+       init_pack_revindex();
 
        for (i = 0; i < nr_objects; i++) {
                const unsigned char *sha1;
@@ -125,11 +128,11 @@ static void show_pack_info(struct packed_git *p)
                                                 base_sha1);
                printf("%s ", sha1_to_hex(sha1));
                if (!delta_chain_length)
-                       printf("%-6s %lu %"PRIuMAX"\n",
-                              type, size, (uintmax_t)offset);
+                       printf("%-6s %lu %lu %"PRIuMAX"\n",
+                              type, size, store_size, (uintmax_t)offset);
                else {
-                       printf("%-6s %lu %"PRIuMAX" %u %s\n",
-                              type, size, (uintmax_t)offset,
+                       printf("%-6s %lu %lu %"PRIuMAX" %u %s\n",
+                              type, size, store_size, (uintmax_t)offset,
                               delta_chain_length, sha1_to_hex(base_sha1));
                        if (delta_chain_length <= MAX_CHAIN)
                                chain_histogram[delta_chain_length]++;
diff --git a/pack-revindex.c b/pack-revindex.c
new file mode 100644 (file)
index 0000000..a8aa2cd
--- /dev/null
@@ -0,0 +1,142 @@
+#include "cache.h"
+#include "pack-revindex.h"
+
+/*
+ * Pack index for existing packs give us easy access to the offsets into
+ * corresponding pack file where each object's data starts, but the entries
+ * do not store the size of the compressed representation (uncompressed
+ * size is easily available by examining the pack entry header).  It is
+ * also rather expensive to find the sha1 for an object given its offset.
+ *
+ * We build a hashtable of existing packs (pack_revindex), and keep reverse
+ * index here -- pack index file is sorted by object name mapping to offset;
+ * this pack_revindex[].revindex array is a list of offset/index_nr pairs
+ * ordered by offset, so if you know the offset of an object, next offset
+ * is where its packed representation ends and the index_nr can be used to
+ * get the object sha1 from the main index.
+ */
+
+struct pack_revindex {
+       struct packed_git *p;
+       struct revindex_entry *revindex;
+};
+
+static struct pack_revindex *pack_revindex;
+static int pack_revindex_hashsz;
+
+static int pack_revindex_ix(struct packed_git *p)
+{
+       unsigned long ui = (unsigned long)p;
+       int i;
+
+       ui = ui ^ (ui >> 16); /* defeat structure alignment */
+       i = (int)(ui % pack_revindex_hashsz);
+       while (pack_revindex[i].p) {
+               if (pack_revindex[i].p == p)
+                       return i;
+               if (++i == pack_revindex_hashsz)
+                       i = 0;
+       }
+       return -1 - i;
+}
+
+void init_pack_revindex(void)
+{
+       int num;
+       struct packed_git *p;
+
+       for (num = 0, p = packed_git; p; p = p->next)
+               num++;
+       if (!num)
+               return;
+       pack_revindex_hashsz = num * 11;
+       pack_revindex = xcalloc(sizeof(*pack_revindex), pack_revindex_hashsz);
+       for (p = packed_git; p; p = p->next) {
+               num = pack_revindex_ix(p);
+               num = - 1 - num;
+               pack_revindex[num].p = p;
+       }
+       /* revindex elements are lazily initialized */
+}
+
+static int cmp_offset(const void *a_, const void *b_)
+{
+       const struct revindex_entry *a = a_;
+       const struct revindex_entry *b = b_;
+       return (a->offset < b->offset) ? -1 : (a->offset > b->offset) ? 1 : 0;
+}
+
+/*
+ * Ordered list of offsets of objects in the pack.
+ */
+static void create_pack_revindex(struct pack_revindex *rix)
+{
+       struct packed_git *p = rix->p;
+       int num_ent = p->num_objects;
+       int i;
+       const char *index = p->index_data;
+
+       rix->revindex = xmalloc(sizeof(*rix->revindex) * (num_ent + 1));
+       index += 4 * 256;
+
+       if (p->index_version > 1) {
+               const uint32_t *off_32 =
+                       (uint32_t *)(index + 8 + p->num_objects * (20 + 4));
+               const uint32_t *off_64 = off_32 + p->num_objects;
+               for (i = 0; i < num_ent; i++) {
+                       uint32_t off = ntohl(*off_32++);
+                       if (!(off & 0x80000000)) {
+                               rix->revindex[i].offset = off;
+                       } else {
+                               rix->revindex[i].offset =
+                                       ((uint64_t)ntohl(*off_64++)) << 32;
+                               rix->revindex[i].offset |=
+                                       ntohl(*off_64++);
+                       }
+                       rix->revindex[i].nr = i;
+               }
+       } else {
+               for (i = 0; i < num_ent; i++) {
+                       uint32_t hl = *((uint32_t *)(index + 24 * i));
+                       rix->revindex[i].offset = ntohl(hl);
+                       rix->revindex[i].nr = i;
+               }
+       }
+
+       /* This knows the pack format -- the 20-byte trailer
+        * follows immediately after the last object data.
+        */
+       rix->revindex[num_ent].offset = p->pack_size - 20;
+       rix->revindex[num_ent].nr = -1;
+       qsort(rix->revindex, num_ent, sizeof(*rix->revindex), cmp_offset);
+}
+
+struct revindex_entry *find_pack_revindex(struct packed_git *p, off_t ofs)
+{
+       int num;
+       int lo, hi;
+       struct pack_revindex *rix;
+       struct revindex_entry *revindex;
+
+       num = pack_revindex_ix(p);
+       if (num < 0)
+               die("internal error: pack revindex uninitialized");
+
+       rix = &pack_revindex[num];
+       if (!rix->revindex)
+               create_pack_revindex(rix);
+       revindex = rix->revindex;
+
+       lo = 0;
+       hi = p->num_objects + 1;
+       do {
+               int mi = (lo + hi) / 2;
+               if (revindex[mi].offset == ofs) {
+                       return revindex + mi;
+               } else if (ofs < revindex[mi].offset)
+                       hi = mi;
+               else
+                       lo = mi + 1;
+       } while (lo < hi);
+       die("internal error: pack revindex corrupt");
+}
diff --git a/pack-revindex.h b/pack-revindex.h
new file mode 100644 (file)
index 0000000..c3527a7
--- /dev/null
@@ -0,0 +1,12 @@
+#ifndef PACK_REVINDEX_H
+#define PACK_REVINDEX_H
+
+struct revindex_entry {
+       off_t offset;
+       unsigned int nr;
+};
+
+void init_pack_revindex(void);
+struct revindex_entry *find_pack_revindex(struct packed_git *p, off_t ofs);
+
+#endif
index 1ddb96bb82b60f4da8489664d13f398d88bf5a15..445a871db31673af20017d36ff22fd106f77f510 100644 (file)
@@ -14,6 +14,7 @@
 #include "tag.h"
 #include "tree.h"
 #include "refs.h"
+#include "pack-revindex.h"
 
 #ifndef O_NOATIME
 #if defined(__linux__) && (defined(__i386__) || defined(__PPC__))
@@ -1367,11 +1368,15 @@ const char *packed_object_info_detail(struct packed_git *p,
        unsigned long dummy;
        unsigned char *next_sha1;
        enum object_type type;
+       struct revindex_entry *revidx;
 
        *delta_chain_length = 0;
        curpos = obj_offset;
        type = unpack_object_header(p, &w_curs, &curpos, size);
 
+       revidx = find_pack_revindex(p, obj_offset);
+       *store_size = revidx[1].offset - obj_offset;
+
        for (;;) {
                switch (type) {
                default:
@@ -1381,14 +1386,13 @@ const char *packed_object_info_detail(struct packed_git *p,
                case OBJ_TREE:
                case OBJ_BLOB:
                case OBJ_TAG:
-                       *store_size = 0; /* notyet */
                        unuse_pack(&w_curs);
                        return typename(type);
                case OBJ_OFS_DELTA:
                        obj_offset = get_delta_base(p, &w_curs, &curpos, type, obj_offset);
                        if (*delta_chain_length == 0) {
-                               /* TODO: find base_sha1 as pointed by curpos */
-                               hashclr(base_sha1);
+                               revidx = find_pack_revindex(p, obj_offset);
+                               hashcpy(base_sha1, nth_packed_object_sha1(p, revidx->nr));
                        }
                        break;
                case OBJ_REF_DELTA: