Code

Merge branch 'nd/stream-more'
authorJunio C Hamano <gitster@pobox.com>
Mon, 16 Apr 2012 05:50:38 +0000 (22:50 -0700)
committerJunio C Hamano <gitster@pobox.com>
Mon, 16 Apr 2012 05:50:39 +0000 (22:50 -0700)
Use API to read blob data in smaller chunks in more places to reduce the
memory footprint.

By Nguyễn Thái Ngọc Duy (6) and Junio C Hamano (1)
* nd/stream-more:
  update-server-info: respect core.bigfilethreshold
  fsck: use streaming API for writing lost-found blobs
  show: use streaming API for showing blobs
  parse_object: avoid putting whole blob in core
  cat-file: use streaming API to print blobs
  Add more large blob test cases
  streaming: make streaming-write-entry to be more reusable

builtin/cat-file.c
builtin/fsck.c
builtin/log.c
builtin/update-server-info.c
entry.c
object.c
sha1_file.c
streaming.c
streaming.h
t/t1050-large.sh
wrapper.c

index 8ed501f220424976cc30f4a4dbf3d59f979902be..36a9104433e23422aab39b1912e998a7f54cd3f4 100644 (file)
@@ -11,6 +11,7 @@
 #include "parse-options.h"
 #include "diff.h"
 #include "userdiff.h"
+#include "streaming.h"
 
 #define BATCH 1
 #define BATCH_CHECK 2
@@ -127,6 +128,8 @@ static int cat_one_file(int opt, const char *exp_type, const char *obj_name)
                        return cmd_ls_tree(2, ls_args, NULL);
                }
 
+               if (type == OBJ_BLOB)
+                       return stream_blob_to_fd(1, sha1, NULL, 0);
                buf = read_sha1_file(sha1, &type, &size);
                if (!buf)
                        die("Cannot read object %s", obj_name);
@@ -149,6 +152,28 @@ static int cat_one_file(int opt, const char *exp_type, const char *obj_name)
                break;
 
        case 0:
+               if (type_from_string(exp_type) == OBJ_BLOB) {
+                       unsigned char blob_sha1[20];
+                       if (sha1_object_info(sha1, NULL) == OBJ_TAG) {
+                               enum object_type type;
+                               unsigned long size;
+                               char *buffer = read_sha1_file(sha1, &type, &size);
+                               if (memcmp(buffer, "object ", 7) ||
+                                   get_sha1_hex(buffer + 7, blob_sha1))
+                                       die("%s not a valid tag", sha1_to_hex(sha1));
+                               free(buffer);
+                       } else
+                               hashcpy(blob_sha1, sha1);
+
+                       if (sha1_object_info(blob_sha1, NULL) == OBJ_BLOB)
+                               return stream_blob_to_fd(1, blob_sha1, NULL, 0);
+                       /*
+                        * we attempted to dereference a tag to a blob
+                        * and failed; there may be new dereference
+                        * mechanisms this code is not aware of.
+                        * fall-back to the usual case.
+                        */
+               }
                buf = read_object_with_reference(sha1, exp_type, &size, NULL);
                break;
 
index 67eb553c7dc3d8ce62fbbefbe64a90c6431963c7..a710227a64a9862c0a70f3022f901fc65b0c7f90 100644 (file)
@@ -12,6 +12,7 @@
 #include "parse-options.h"
 #include "dir.h"
 #include "progress.h"
+#include "streaming.h"
 
 #define REACHABLE 0x0001
 #define SEEN      0x0002
@@ -238,13 +239,8 @@ static void check_unreachable_object(struct object *obj)
                        if (!(f = fopen(filename, "w")))
                                die_errno("Could not open '%s'", filename);
                        if (obj->type == OBJ_BLOB) {
-                               enum object_type type;
-                               unsigned long size;
-                               char *buf = read_sha1_file(obj->sha1,
-                                               &type, &size);
-                               if (buf && fwrite(buf, 1, size, f) != size)
+                               if (stream_blob_to_fd(fileno(f), obj->sha1, NULL, 1))
                                        die_errno("Could not write '%s'", filename);
-                               free(buf);
                        } else
                                fprintf(f, "%s\n", sha1_to_hex(obj->sha1));
                        if (fclose(f))
index 8a47012b0bd2fefe616c44b918d16a18463b5d2a..690caa7830b2a4549012db5e46794118bc36e989 100644 (file)
@@ -20,6 +20,7 @@
 #include "string-list.h"
 #include "parse-options.h"
 #include "branch.h"
+#include "streaming.h"
 
 /* Set a default date-time format for git log ("log.date" config variable) */
 static const char *default_date_mode = NULL;
@@ -383,8 +384,13 @@ static void show_tagger(char *buf, int len, struct rev_info *rev)
        strbuf_release(&out);
 }
 
-static int show_object(const unsigned char *sha1, int show_tag_object,
-       struct rev_info *rev)
+static int show_blob_object(const unsigned char *sha1, struct rev_info *rev)
+{
+       fflush(stdout);
+       return stream_blob_to_fd(1, sha1, NULL, 0);
+}
+
+static int show_tag_object(const unsigned char *sha1, struct rev_info *rev)
 {
        unsigned long size;
        enum object_type type;
@@ -394,16 +400,16 @@ static int show_object(const unsigned char *sha1, int show_tag_object,
        if (!buf)
                return error(_("Could not read object %s"), sha1_to_hex(sha1));
 
-       if (show_tag_object)
-               while (offset < size && buf[offset] != '\n') {
-                       int new_offset = offset + 1;
-                       while (new_offset < size && buf[new_offset++] != '\n')
-                               ; /* do nothing */
-                       if (!prefixcmp(buf + offset, "tagger "))
-                               show_tagger(buf + offset + 7,
-                                           new_offset - offset - 7, rev);
-                       offset = new_offset;
-               }
+       assert(type == OBJ_TAG);
+       while (offset < size && buf[offset] != '\n') {
+               int new_offset = offset + 1;
+               while (new_offset < size && buf[new_offset++] != '\n')
+                       ; /* do nothing */
+               if (!prefixcmp(buf + offset, "tagger "))
+                       show_tagger(buf + offset + 7,
+                                   new_offset - offset - 7, rev);
+               offset = new_offset;
+       }
 
        if (offset < size)
                fwrite(buf + offset, size - offset, 1, stdout);
@@ -463,7 +469,7 @@ int cmd_show(int argc, const char **argv, const char *prefix)
                const char *name = objects[i].name;
                switch (o->type) {
                case OBJ_BLOB:
-                       ret = show_object(o->sha1, 0, NULL);
+                       ret = show_blob_object(o->sha1, NULL);
                        break;
                case OBJ_TAG: {
                        struct tag *t = (struct tag *)o;
@@ -474,7 +480,7 @@ int cmd_show(int argc, const char **argv, const char *prefix)
                                        diff_get_color_opt(&rev.diffopt, DIFF_COMMIT),
                                        t->tag,
                                        diff_get_color_opt(&rev.diffopt, DIFF_RESET));
-                       ret = show_object(o->sha1, 1, &rev);
+                       ret = show_tag_object(o->sha1, &rev);
                        rev.shown_one = 1;
                        if (ret)
                                break;
index b90dce6358153b274a1e26afde9cc89aad473d14..0d63c4498c0c10193846c020a2d76958bd12e1bd 100644 (file)
@@ -15,6 +15,7 @@ int cmd_update_server_info(int argc, const char **argv, const char *prefix)
                OPT_END()
        };
 
+       git_config(git_default_config, NULL);
        argc = parse_options(argc, argv, prefix, options,
                             update_server_info_usage, 0);
        if (argc > 0)
diff --git a/entry.c b/entry.c
index 852fea13955475c1e2fda9cfc25a63a54a1f61c7..17a6bccec64e0e523aacc124611c43bd818372e3 100644 (file)
--- a/entry.c
+++ b/entry.c
@@ -120,58 +120,15 @@ static int streaming_write_entry(struct cache_entry *ce, char *path,
                                 const struct checkout *state, int to_tempfile,
                                 int *fstat_done, struct stat *statbuf)
 {
-       struct git_istream *st;
-       enum object_type type;
-       unsigned long sz;
        int result = -1;
-       ssize_t kept = 0;
-       int fd = -1;
-
-       st = open_istream(ce->sha1, &type, &sz, filter);
-       if (!st)
-               return -1;
-       if (type != OBJ_BLOB)
-               goto close_and_exit;
+       int fd;
 
        fd = open_output_fd(path, ce, to_tempfile);
-       if (fd < 0)
-               goto close_and_exit;
-
-       for (;;) {
-               char buf[1024 * 16];
-               ssize_t wrote, holeto;
-               ssize_t readlen = read_istream(st, buf, sizeof(buf));
-
-               if (!readlen)
-                       break;
-               if (sizeof(buf) == readlen) {
-                       for (holeto = 0; holeto < readlen; holeto++)
-                               if (buf[holeto])
-                                       break;
-                       if (readlen == holeto) {
-                               kept += holeto;
-                               continue;
-                       }
-               }
-
-               if (kept && lseek(fd, kept, SEEK_CUR) == (off_t) -1)
-                       goto close_and_exit;
-               else
-                       kept = 0;
-               wrote = write_in_full(fd, buf, readlen);
-
-               if (wrote != readlen)
-                       goto close_and_exit;
-       }
-       if (kept && (lseek(fd, kept - 1, SEEK_CUR) == (off_t) -1 ||
-                    write(fd, "", 1) != 1))
-               goto close_and_exit;
-       *fstat_done = fstat_output(fd, state, statbuf);
-
-close_and_exit:
-       close_istream(st);
-       if (0 <= fd)
+       if (0 <= fd) {
+               result = stream_blob_to_fd(fd, ce->sha1, filter, 1);
+               *fstat_done = fstat_output(fd, state, statbuf);
                result = close(fd);
+       }
        if (result && 0 <= fd)
                unlink(path);
        return result;
index 6b06297a5f06cc35cb266d6dd36c92df75a82de7..0498b18d451b2335d21e2db3edc0ce7838aaa50e 100644 (file)
--- a/object.c
+++ b/object.c
@@ -198,6 +198,17 @@ struct object *parse_object(const unsigned char *sha1)
        if (obj && obj->parsed)
                return obj;
 
+       if ((obj && obj->type == OBJ_BLOB) ||
+           (!obj && has_sha1_file(sha1) &&
+            sha1_object_info(sha1, NULL) == OBJ_BLOB)) {
+               if (check_sha1_signature(repl, NULL, 0, NULL) < 0) {
+                       error("sha1 mismatch %s\n", sha1_to_hex(repl));
+                       return NULL;
+               }
+               parse_blob_buffer(lookup_blob(sha1), NULL, 0);
+               return lookup_object(sha1);
+       }
+
        buffer = read_sha1_file(sha1, &type, &size);
        if (buffer) {
                if (check_sha1_signature(repl, buffer, size, typename(type)) < 0) {
index 4f06a0e450359744528d3b125fb09eacebf1eb4a..ad314f08b9abd9a16b410483f9a9629ce59345cf 100644 (file)
@@ -19,6 +19,7 @@
 #include "pack-revindex.h"
 #include "sha1-lookup.h"
 #include "bulk-checkin.h"
+#include "streaming.h"
 
 #ifndef O_NOATIME
 #if defined(__linux__) && (defined(__i386__) || defined(__PPC__))
@@ -1146,10 +1147,47 @@ static const struct packed_git *has_packed_and_bad(const unsigned char *sha1)
        return NULL;
 }
 
-int check_sha1_signature(const unsigned char *sha1, void *map, unsigned long size, const char *type)
+/*
+ * With an in-core object data in "map", rehash it to make sure the
+ * object name actually matches "sha1" to detect object corruption.
+ * With "map" == NULL, try reading the object named with "sha1" using
+ * the streaming interface and rehash it to do the same.
+ */
+int check_sha1_signature(const unsigned char *sha1, void *map,
+                        unsigned long size, const char *type)
 {
        unsigned char real_sha1[20];
-       hash_sha1_file(map, size, type, real_sha1);
+       enum object_type obj_type;
+       struct git_istream *st;
+       git_SHA_CTX c;
+       char hdr[32];
+       int hdrlen;
+
+       if (map) {
+               hash_sha1_file(map, size, type, real_sha1);
+               return hashcmp(sha1, real_sha1) ? -1 : 0;
+       }
+
+       st = open_istream(sha1, &obj_type, &size, NULL);
+       if (!st)
+               return -1;
+
+       /* Generate the header */
+       hdrlen = sprintf(hdr, "%s %lu", typename(obj_type), size) + 1;
+
+       /* Sha1.. */
+       git_SHA1_Init(&c);
+       git_SHA1_Update(&c, hdr, hdrlen);
+       for (;;) {
+               char buf[1024 * 16];
+               ssize_t readlen = read_istream(st, buf, sizeof(buf));
+
+               if (!readlen)
+                       break;
+               git_SHA1_Update(&c, buf, readlen);
+       }
+       git_SHA1_Final(real_sha1, &c);
+       close_istream(st);
        return hashcmp(sha1, real_sha1) ? -1 : 0;
 }
 
index 71072e1b1da670cdb4b048a3a6e83a4ae806bf5f..7e7ee2be6fe147ff660f8ab25618fbe4d4d0f11c 100644 (file)
@@ -489,3 +489,58 @@ static open_method_decl(incore)
 
        return st->u.incore.buf ? 0 : -1;
 }
+
+
+/****************************************************************
+ * Users of streaming interface
+ ****************************************************************/
+
+int stream_blob_to_fd(int fd, unsigned const char *sha1, struct stream_filter *filter,
+                     int can_seek)
+{
+       struct git_istream *st;
+       enum object_type type;
+       unsigned long sz;
+       ssize_t kept = 0;
+       int result = -1;
+
+       st = open_istream(sha1, &type, &sz, filter);
+       if (!st)
+               return result;
+       if (type != OBJ_BLOB)
+               goto close_and_exit;
+       for (;;) {
+               char buf[1024 * 16];
+               ssize_t wrote, holeto;
+               ssize_t readlen = read_istream(st, buf, sizeof(buf));
+
+               if (!readlen)
+                       break;
+               if (can_seek && sizeof(buf) == readlen) {
+                       for (holeto = 0; holeto < readlen; holeto++)
+                               if (buf[holeto])
+                                       break;
+                       if (readlen == holeto) {
+                               kept += holeto;
+                               continue;
+                       }
+               }
+
+               if (kept && lseek(fd, kept, SEEK_CUR) == (off_t) -1)
+                       goto close_and_exit;
+               else
+                       kept = 0;
+               wrote = write_in_full(fd, buf, readlen);
+
+               if (wrote != readlen)
+                       goto close_and_exit;
+       }
+       if (kept && (lseek(fd, kept - 1, SEEK_CUR) == (off_t) -1 ||
+                    write(fd, "", 1) != 1))
+               goto close_and_exit;
+       result = 0;
+
+ close_and_exit:
+       close_istream(st);
+       return result;
+}
index 589e857b8c4ad68e30b91da2eb29a076b98ef903..3e827709c85eeaf6669d05d0d59e288541ceb579 100644 (file)
@@ -12,4 +12,6 @@ extern struct git_istream *open_istream(const unsigned char *, enum object_type
 extern int close_istream(struct git_istream *);
 extern ssize_t read_istream(struct git_istream *, char *, size_t);
 
+extern int stream_blob_to_fd(int fd, const unsigned char *, struct stream_filter *, int can_seek);
+
 #endif /* STREAMING_H */
index 29d6024b7f1b55c09cbd7e9ed682a3e745c550d6..4d127f19b78cc76018e316532c905137e9c7ab08 100755 (executable)
@@ -6,11 +6,15 @@ test_description='adding and checking out large blobs'
 . ./test-lib.sh
 
 test_expect_success setup '
-       git config core.bigfilethreshold 200k &&
+       # clone does not allow us to pass core.bigfilethreshold to
+       # new repos, so set core.bigfilethreshold globally
+       git config --global core.bigfilethreshold 200k &&
        echo X | dd of=large1 bs=1k seek=2000 &&
        echo X | dd of=large2 bs=1k seek=2000 &&
        echo X | dd of=large3 bs=1k seek=2000 &&
-       echo Y | dd of=huge bs=1k seek=2500
+       echo Y | dd of=huge bs=1k seek=2500 &&
+       GIT_ALLOC_LIMIT=1500 &&
+       export GIT_ALLOC_LIMIT
 '
 
 test_expect_success 'add a large file or two' '
@@ -100,4 +104,34 @@ test_expect_success 'packsize limit' '
        )
 '
 
+test_expect_success 'diff --raw' '
+       git commit -q -m initial &&
+       echo modified >>large1 &&
+       git add large1 &&
+       git commit -q -m modified &&
+       git diff --raw HEAD^
+'
+
+test_expect_success 'hash-object' '
+       git hash-object large1
+'
+
+test_expect_success 'cat-file a large file' '
+       git cat-file blob :large1 >/dev/null
+'
+
+test_expect_success 'cat-file a large file from a tag' '
+       git tag -m largefile largefiletag :large1 &&
+       git cat-file blob largefiletag >/dev/null
+'
+
+test_expect_success 'git-show a large file' '
+       git show :large1 >/dev/null
+
+'
+
+test_expect_success 'repack' '
+       git repack -ad
+'
+
 test_done
index 85f09df747637b94e0488ad65984c3f97c732034..6ccd0595f43d0ef62bd60a5863804f9a842a4235 100644 (file)
--- a/wrapper.c
+++ b/wrapper.c
@@ -9,6 +9,18 @@ static void do_nothing(size_t size)
 
 static void (*try_to_free_routine)(size_t size) = do_nothing;
 
+static void memory_limit_check(size_t size)
+{
+       static int limit = -1;
+       if (limit == -1) {
+               const char *env = getenv("GIT_ALLOC_LIMIT");
+               limit = env ? atoi(env) * 1024 : 0;
+       }
+       if (limit && size > limit)
+               die("attempting to allocate %"PRIuMAX" over limit %d",
+                   (intmax_t)size, limit);
+}
+
 try_to_free_t set_try_to_free_routine(try_to_free_t routine)
 {
        try_to_free_t old = try_to_free_routine;
@@ -32,7 +44,10 @@ char *xstrdup(const char *str)
 
 void *xmalloc(size_t size)
 {
-       void *ret = malloc(size);
+       void *ret;
+
+       memory_limit_check(size);
+       ret = malloc(size);
        if (!ret && !size)
                ret = malloc(1);
        if (!ret) {
@@ -79,7 +94,10 @@ char *xstrndup(const char *str, size_t len)
 
 void *xrealloc(void *ptr, size_t size)
 {
-       void *ret = realloc(ptr, size);
+       void *ret;
+
+       memory_limit_check(size);
+       ret = realloc(ptr, size);
        if (!ret && !size)
                ret = realloc(ptr, 1);
        if (!ret) {
@@ -95,7 +113,10 @@ void *xrealloc(void *ptr, size_t size)
 
 void *xcalloc(size_t nmemb, size_t size)
 {
-       void *ret = calloc(nmemb, size);
+       void *ret;
+
+       memory_limit_check(size * nmemb);
+       ret = calloc(nmemb, size);
        if (!ret && (!nmemb || !size))
                ret = calloc(1, 1);
        if (!ret) {