Code

http-fetch: Use index-pack rather than verify-pack to check packs
authorShawn O. Pearce <spearce@spearce.org>
Mon, 19 Apr 2010 14:23:09 +0000 (07:23 -0700)
committerJunio C Hamano <gitster@pobox.com>
Tue, 20 Apr 2010 00:56:20 +0000 (17:56 -0700)
To ensure we don't leave a corrupt pack file positioned as though
it were a valid pack file, run index-pack on the temporary pack
before we rename it to its final name.  If index-pack crashes out
when it discovers file corruption (e.g. GitHub's error HTML at the
end of the file), simply delete the temporary files to cleanup.

By waiting until the pack has been validated before we move it
to its final name, we eliminate a race condition where another
concurrent reader might try to access the pack at the same time
that we are still trying to verify its not corrupt.

Switching from verify-pack to index-pack is a change in behavior,
but it should turn out better for users.  The index-pack algorithm
tries to minimize disk seeks, as well as the number of times any
given object is inflated, by organizing its work along delta chains.
The verify-pack logic does not attempt to do this, thrashing the
delta base cache and the filesystem cache.

By recreating the index file locally, we also can automatically
upgrade from a v1 pack table of contents to v2.  This makes the
CRC32 data available for use during later repacks, even if the
server didn't have them on hand.

Signed-off-by: Shawn O. Pearce <spearce@spearce.org>
Acked-by: Tay Ray Chuan <rctay89@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
http.c
t/t5550-http-fetch.sh

diff --git a/http.c b/http.c
index 9c626322022c50d0c4c123e1e017a3797b28a9e8..2ebd679393223aa186b2b6cc5a0bfe8274d496cf 100644 (file)
--- a/http.c
+++ b/http.c
@@ -1,6 +1,7 @@
 #include "http.h"
 #include "pack.h"
 #include "sideband.h"
+#include "run-command.h"
 
 int data_received;
 int active_requests;
@@ -998,11 +999,14 @@ void release_http_pack_request(struct http_pack_request *preq)
 
 int finish_http_pack_request(struct http_pack_request *preq)
 {
-       int ret;
        struct packed_git **lst;
        struct packed_git *p = preq->target;
+       char *tmp_idx;
+       struct child_process ip;
+       const char *ip_argv[8];
+
+       close_pack_index(p);
 
-       p->pack_size = ftell(preq->packfile);
        fclose(preq->packfile);
        preq->packfile = NULL;
        preq->slot->local = NULL;
@@ -1012,13 +1016,39 @@ int finish_http_pack_request(struct http_pack_request *preq)
                lst = &((*lst)->next);
        *lst = (*lst)->next;
 
-       ret = move_temp_to_file(preq->tmpfile, sha1_pack_name(p->sha1));
-       if (ret)
-               return ret;
-       if (verify_pack(p))
+       tmp_idx = xstrdup(preq->tmpfile);
+       strcpy(tmp_idx + strlen(tmp_idx) - strlen(".pack.temp"),
+              ".idx.temp");
+
+       ip_argv[0] = "index-pack";
+       ip_argv[1] = "-o";
+       ip_argv[2] = tmp_idx;
+       ip_argv[3] = preq->tmpfile;
+       ip_argv[4] = NULL;
+
+       memset(&ip, 0, sizeof(ip));
+       ip.argv = ip_argv;
+       ip.git_cmd = 1;
+       ip.no_stdin = 1;
+       ip.no_stdout = 1;
+
+       if (run_command(&ip)) {
+               unlink(preq->tmpfile);
+               unlink(tmp_idx);
+               free(tmp_idx);
                return -1;
-       install_packed_git(p);
+       }
+
+       unlink(sha1_pack_index_name(p->sha1));
 
+       if (move_temp_to_file(preq->tmpfile, sha1_pack_name(p->sha1))
+        || move_temp_to_file(tmp_idx, sha1_pack_index_name(p->sha1))) {
+               free(tmp_idx);
+               return -1;
+       }
+
+       install_packed_git(p);
+       free(tmp_idx);
        return 0;
 }
 
index 78c31c961ed7b57ec5a4c5588da02e239ee293bd..1a4dfc9695af92cd5774471977b25afdba8627ec 100755 (executable)
@@ -62,6 +62,21 @@ test_expect_success 'fetch packed objects' '
        git clone $HTTPD_URL/dumb/repo_pack.git
 '
 
+test_expect_success 'fetch notices corrupt pack' '
+       cp -R "$HTTPD_DOCUMENT_ROOT_PATH"/repo_pack.git "$HTTPD_DOCUMENT_ROOT_PATH"/repo_bad1.git &&
+       (cd "$HTTPD_DOCUMENT_ROOT_PATH"/repo_bad1.git &&
+        p=`ls objects/pack/pack-*.pack` &&
+        chmod u+w $p &&
+        printf %0256d 0 | dd of=$p bs=256 count=1 seek=1 conv=notrunc
+       ) &&
+       mkdir repo_bad1.git &&
+       (cd repo_bad1.git &&
+        git --bare init &&
+        test_must_fail git --bare fetch $HTTPD_URL/dumb/repo_bad1.git &&
+        test 0 = `ls objects/pack/pack-*.pack | wc -l`
+       )
+'
+
 test_expect_success 'did not use upload-pack service' '
        grep '/git-upload-pack' <"$HTTPD_ROOT_PATH"/access.log >act
        : >exp