From 96155e55e175e7fed1d8ebd73557d339f35629d8 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Mon, 25 Jul 2005 22:42:18 -0700 Subject: [PATCH] Fetch from a packed repository on dumb servers. Implement fetching from a packed repository over http/https using the dumb server support files. I consider some parts of the logic should be in a separate C program, but it appears to work with my simple tests. I have backburnered it for a bit too long for my liking, so let's throw it out in the open and see what happens. Signed-off-by: Junio C Hamano --- Makefile | 5 ++- git-fetch-dumb-http | 100 ++++++++++++++++++++++++++++++++++++++++++++ git-fetch-script | 24 ++++++++++- missing-revs.c | 63 ++++++++++++++++++++++++++++ 4 files changed, 188 insertions(+), 4 deletions(-) create mode 100755 git-fetch-dumb-http create mode 100644 missing-revs.c diff --git a/Makefile b/Makefile index 332cd7d23..db52a852f 100644 --- a/Makefile +++ b/Makefile @@ -65,7 +65,7 @@ SCRIPTS=git git-apply-patch-script git-merge-one-file-script git-prune-script \ git-format-patch-script git-sh-setup-script git-push-script \ git-branch-script git-parse-remote git-verify-tag-script \ git-ls-remote-script git-clone-dumb-http git-rename-script \ - git-request-pull-script git-bisect-script + git-request-pull-script git-bisect-script git-fetch-dumb-http PROG= git-update-cache git-diff-files git-init-db git-write-tree \ git-read-tree git-commit-tree git-cat-file git-fsck-cache \ @@ -79,7 +79,8 @@ PROG= git-update-cache git-diff-files git-init-db git-write-tree \ git-unpack-objects git-verify-pack git-receive-pack git-send-pack \ git-prune-packed git-fetch-pack git-upload-pack git-clone-pack \ git-show-index git-daemon git-var git-peek-remote \ - git-update-server-info git-show-rev-cache git-build-rev-cache + git-update-server-info git-show-rev-cache git-build-rev-cache \ + git-missing-revs ifndef NO_CURL PROG+= git-http-pull diff --git a/git-fetch-dumb-http b/git-fetch-dumb-http new file mode 100755 index 000000000..0034296ac --- /dev/null +++ b/git-fetch-dumb-http @@ -0,0 +1,100 @@ +#!/bin/sh +# +# Copyright (c) 2005, Junio C Hamano +# +# Called by git-fetch-script +# Exits 2 when the remote site does not support dumb server protocol. + +# Usage: git-fetch-dumb-http [ | tag ] + +. git-sh-setup-script || die "Not a git archive" +head="$1" +shift +. git-parse-remote "$@" + +merge_repo="$_remote_repo" +merge_head="$_remote_head" +merge_store="$_remote_store" + +if [ -n "$GIT_SSL_NO_VERIFY" ]; then + curl_extra_args="-k" +fi +http_fetch () { + # $1 = Remote, $2 = Local + curl -ns $curl_extra_args "$1" >"$2" +} + +# Try dumb server protocol + +clone_tmp=".git/clone-tmp$$" && +mkdir -p "$clone_tmp" || exit 1 +trap "rm -rf $clone_tmp" 0 1 2 3 15 +http_fetch "$merge_repo/info/refs" "$clone_tmp/refs" && +http_fetch "$merge_repo/objects/info/packs" "$clone_tmp/packs" && +http_fetch "$merge_repo/info/rev-cache" "$clone_tmp/rev-cache" || exit 2 + +# Which packs are we interested in? +has_missing=, +while read tag num sha1 type +do + case "$tag" in + T) ;; + *) continue ;; + esac + git-cat-file -t "$sha1" >/dev/null || has_missing="$has_missing$num," +done <$clone_tmp/packs + +# Slurp the pack index we do not have all objects for. +pack_ix=0 +may_want_pack_count=0 +while read tag pack +do + case "$tag" in + P) ;; + *) break ;; # P records always come first. + esac + case "$has_missing" in + *",$pack_ix,"*) + name=`expr "$pack" : '\(.*\)\.pack$'` && + idx="$name.idx" && + http_fetch "$merge_repo/objects/pack/$idx" "$clone_tmp/$idx" && + # Note that idx file is sorted --- otherwise we need to sort it here. + git-show-index <"$clone_tmp/$idx" | + sed -e 's/^[^ ]* //' >"$clone_tmp/$name.toc" || + exit 1 + may_want_pack_count=`expr "$may_want_pack_count" + 1` + ;; + esac + pack_ix=`expr "$pack_ix" + 1` +done <$clone_tmp/packs + +case "$may_want_pack_count" in +0) + exit 0 ;; +esac + +# We want $head. What are the head objects we are missing? +git-missing-revs $clone_tmp/rev-cache $head >$clone_tmp/missing-revs && +sort -o $clone_tmp/missing-revs $clone_tmp/missing-revs || exit 2 + +for toc in $clone_tmp/*.toc +do + name=`expr $toc : '.*/\([^/]*\)\.toc'` && + comm -12 $clone_tmp/missing-revs $toc >$clone_tmp/$name.can + # FIXME: this is stupid. + if test -s $clone_tmp/$name.can + then + pack="$name.pack" idx="$name.idx" && + http_fetch "$merge_repo/objects/pack/$pack" "$clone_tmp/$pack" && + git-verify-pack "$clone_tmp/$pack" && + mkdir -p "$GIT_OBJECT_DIRECTORY/pack" && + mv "$clone_tmp/$pack" "$clone_tmp/$idx" \ + "$GIT_OBJECT_DIRECTORY/pack/" || { + # remote may just have a stale dumb server information files. + # and normal pull might succeed. + exit 2 + } + fi +done + +exit 0 diff --git a/git-fetch-script b/git-fetch-script index 34ddfc8cb..2040c125f 100755 --- a/git-fetch-script +++ b/git-fetch-script @@ -13,9 +13,29 @@ http://* | https://*) if [ -n "$GIT_SSL_NO_VERIFY" ]; then curl_extra_args="-k" fi - head=$(curl -ns $curl_extra_args "$merge_repo/$merge_head") || exit 1 + _x40='[0-9a-f][0-9a-f][0-9a-f][0-9a-f][0-9a-f]' && + _x40="$_x40$_x40$_x40$_x40$_x40$_x40$_x40$_x40" && + head=$(curl -ns $curl_extra_args "$merge_repo/$merge_head") && + expr "$head" : "$_x40\$" >/dev/null || { + echo >&2 "Failed to fetch $merge_head from $merge_repo" + exit 1 + } + git-fetch-dumb-http "$head" "$@" + case "$?" in + 0) ;; + 2) no_dumb_http_support=1 ;; + *) exit;; + esac echo Fetching "$merge_head" using http - git-http-pull -v -a "$head" "$merge_repo/" + git-http-pull -v -a "$head" "$merge_repo/" || { + case "$no_dumb_http_support" in + 1) + echo >&2 "* This could be because the $merge_repo is packed without" + echo >&2 " preparing dumb server support files." + ;; + esac + exit 1 + } ;; rsync://*) rsync -L "$merge_repo/$merge_head" "$TMP_HEAD" || exit 1 diff --git a/missing-revs.c b/missing-revs.c new file mode 100644 index 000000000..afe41e33d --- /dev/null +++ b/missing-revs.c @@ -0,0 +1,63 @@ +#include "cache.h" +#include "rev-cache.h" + +static const char missing_revs_usage[] = +"git-missing-revs ..."; + +#define REV_WANT 01 +#define REV_HAVE 02 + +static void process(struct rev_cache *head_list) +{ + while (head_list) { + struct rev_cache *rc = head_list; + struct rev_list_elem *e; + head_list = rc->head_list; + rc->head_list = NULL; + if (has_sha1_file(rc->sha1)) { + rc->work |= REV_HAVE; + continue; + } + if (rc->work & (REV_WANT|REV_HAVE)) + continue; + rc->work |= REV_WANT; + printf("%s\n", sha1_to_hex(rc->sha1)); + for (e = rc->parents; e; e = e->next) { + if (e->ri->work & REV_HAVE) + continue; + e->ri->head_list = head_list; + head_list = e->ri; + } + } +} + +int main(int ac, char **av) +{ + const char *rev_cache_file; + struct rev_cache *head_list = NULL; + int i; + + if (ac < 3) + usage(missing_revs_usage); + rev_cache_file = av[1]; + read_rev_cache(rev_cache_file, NULL, 0); + for (i = 2; i < ac; i++) { + unsigned char sha1[20]; + int pos; + struct rev_cache *rc; + if (get_sha1_hex(av[i], sha1)) + die("%s: not an SHA1", av[i]); + if ((pos = find_rev_cache(sha1)) < 0) { + /* We could be asked for tags, which would not + * appear in the rev-cache. + */ + puts(av[i]); + continue; + } + rc = rev_cache[pos]; + rc->head_list = head_list; + head_list = rc; + } + process(head_list); + return 0; +} -- 2.30.2