X-Git-Url: https://git.tokkee.org/?a=blobdiff_plain;f=sha1_file.c;h=32268d11d0c929624729b01f143845b13d7a569c;hb=e65ceb61cd7d3fabedea8cb545f8c210b48552d4;hp=94d431907cc6f974277ab912411d2a9de2cb98bf;hpb=ef49a7a0126d64359c974b4b3b71d7ad42ee3bca;p=git.git diff --git a/sha1_file.c b/sha1_file.c index 94d431907..32268d11d 100644 --- a/sha1_file.c +++ b/sha1_file.c @@ -11,6 +11,7 @@ #include "pack.h" #include "blob.h" #include "commit.h" +#include "run-command.h" #include "tag.h" #include "tree.h" #include "tree-walk.h" @@ -379,7 +380,7 @@ void add_to_alternates_file(const char *reference) { struct lock_file *lock = xcalloc(1, sizeof(struct lock_file)); int fd = hold_lock_file_for_append(lock, git_path("objects/info/alternates"), LOCK_DIE_ON_ERROR); - char *alt = mkpath("%s/objects\n", reference); + char *alt = mkpath("%s\n", reference); write_or_die(fd, alt, strlen(alt)); if (commit_lock_file(lock)) die("could not close alternates file"); @@ -1204,20 +1205,29 @@ static void *map_sha1_file(const unsigned char *sha1, unsigned long *size) return map; } -static int legacy_loose_object(unsigned char *map) +/* + * There used to be a second loose object header format which + * was meant to mimic the in-pack format, allowing for direct + * copy of the object data. This format turned up not to be + * really worth it and we no longer write loose objects in that + * format. + */ +static int experimental_loose_object(unsigned char *map) { unsigned int word; /* * Is it a zlib-compressed buffer? If so, the first byte * must be 0x78 (15-bit window size, deflated), and the - * first 16-bit word is evenly divisible by 31 + * first 16-bit word is evenly divisible by 31. If so, + * we are looking at the official format, not the experimental + * one. */ word = (map[0] << 8) + map[1]; if (map[0] == 0x78 && !(word % 31)) - return 1; - else return 0; + else + return 1; } unsigned long unpack_object_header_buffer(const unsigned char *buf, @@ -1261,34 +1271,29 @@ static int unpack_sha1_header(git_zstream *stream, unsigned char *map, unsigned stream->next_out = buffer; stream->avail_out = bufsiz; - if (legacy_loose_object(map)) { - git_inflate_init(stream); - return git_inflate(stream, 0); - } - + if (experimental_loose_object(map)) { + /* + * The old experimental format we no longer produce; + * we can still read it. + */ + used = unpack_object_header_buffer(map, mapsize, &type, &size); + if (!used || !valid_loose_object_type[type]) + return -1; + map += used; + mapsize -= used; - /* - * There used to be a second loose object header format which - * was meant to mimic the in-pack format, allowing for direct - * copy of the object data. This format turned up not to be - * really worth it and we don't write it any longer. But we - * can still read it. - */ - used = unpack_object_header_buffer(map, mapsize, &type, &size); - if (!used || !valid_loose_object_type[type]) - return -1; - map += used; - mapsize -= used; + /* Set up the stream for the rest.. */ + stream->next_in = map; + stream->avail_in = mapsize; + git_inflate_init(stream); - /* Set up the stream for the rest.. */ - stream->next_in = map; - stream->avail_in = mapsize; + /* And generate the fake traditional header */ + stream->total_out = 1 + snprintf(buffer, bufsiz, "%s %lu", + typename(type), size); + return 0; + } git_inflate_init(stream); - - /* And generate the fake traditional header */ - stream->total_out = 1 + snprintf(buffer, bufsiz, "%s %lu", - typename(type), size); - return 0; + return git_inflate(stream, 0); } static void *unpack_sha1_rest(git_zstream *stream, void *buffer, unsigned long size, const unsigned char *sha1) @@ -2205,23 +2210,21 @@ static void *read_object(const unsigned char *sha1, enum object_type *type, * deal with them should arrange to call read_object() and give error * messages themselves. */ -void *read_sha1_file_repl(const unsigned char *sha1, - enum object_type *type, - unsigned long *size, - const unsigned char **replacement) +void *read_sha1_file_extended(const unsigned char *sha1, + enum object_type *type, + unsigned long *size, + unsigned flag) { - const unsigned char *repl = lookup_replace_object(sha1); void *data; char *path; const struct packed_git *p; + const unsigned char *repl = (flag & READ_SHA1_FILE_REPLACE) + ? lookup_replace_object(sha1) : sha1; errno = 0; data = read_object(repl, type, size); - if (data) { - if (replacement) - *replacement = repl; + if (data) return data; - } if (errno && errno != ENOENT) die_errno("failed to read object %s", sha1_to_hex(sha1)); @@ -2580,10 +2583,11 @@ static void check_tag(const void *buf, size_t size) } static int index_mem(unsigned char *sha1, void *buf, size_t size, - int write_object, enum object_type type, - const char *path, int format_check) + enum object_type type, + const char *path, unsigned flags) { int ret, re_allocated = 0; + int write_object = flags & HASH_WRITE_OBJECT; if (!type) type = OBJ_BLOB; @@ -2599,7 +2603,7 @@ static int index_mem(unsigned char *sha1, void *buf, size_t size, re_allocated = 1; } } - if (format_check) { + if (flags & HASH_FORMAT_CHECK) { if (type == OBJ_TREE) check_tree(buf, size); if (type == OBJ_COMMIT) @@ -2617,44 +2621,141 @@ static int index_mem(unsigned char *sha1, void *buf, size_t size, return ret; } +static int index_pipe(unsigned char *sha1, int fd, enum object_type type, + const char *path, unsigned flags) +{ + struct strbuf sbuf = STRBUF_INIT; + int ret; + + if (strbuf_read(&sbuf, fd, 4096) >= 0) + ret = index_mem(sha1, sbuf.buf, sbuf.len, type, path, flags); + else + ret = -1; + strbuf_release(&sbuf); + return ret; +} + #define SMALL_FILE_SIZE (32*1024) -int index_fd(unsigned char *sha1, int fd, struct stat *st, int write_object, - enum object_type type, const char *path, int format_check) +static int index_core(unsigned char *sha1, int fd, size_t size, + enum object_type type, const char *path, + unsigned flags) { int ret; - size_t size = xsize_t(st->st_size); - if (!S_ISREG(st->st_mode)) { - struct strbuf sbuf = STRBUF_INIT; - if (strbuf_read(&sbuf, fd, 4096) >= 0) - ret = index_mem(sha1, sbuf.buf, sbuf.len, write_object, - type, path, format_check); - else - ret = -1; - strbuf_release(&sbuf); - } else if (!size) { - ret = index_mem(sha1, NULL, size, write_object, type, path, - format_check); + if (!size) { + ret = index_mem(sha1, NULL, size, type, path, flags); } else if (size <= SMALL_FILE_SIZE) { char *buf = xmalloc(size); if (size == read_in_full(fd, buf, size)) - ret = index_mem(sha1, buf, size, write_object, type, - path, format_check); + ret = index_mem(sha1, buf, size, type, path, flags); else ret = error("short read %s", strerror(errno)); free(buf); } else { void *buf = xmmap(NULL, size, PROT_READ, MAP_PRIVATE, fd, 0); - ret = index_mem(sha1, buf, size, write_object, type, path, - format_check); + ret = index_mem(sha1, buf, size, type, path, flags); munmap(buf, size); } + return ret; +} + +/* + * This creates one packfile per large blob, because the caller + * immediately wants the result sha1, and fast-import can report the + * object name via marks mechanism only by closing the created + * packfile. + * + * This also bypasses the usual "convert-to-git" dance, and that is on + * purpose. We could write a streaming version of the converting + * functions and insert that before feeding the data to fast-import + * (or equivalent in-core API described above), but the primary + * motivation for trying to stream from the working tree file and to + * avoid mmaping it in core is to deal with large binary blobs, and + * by definition they do _not_ want to get any conversion. + */ +static int index_stream(unsigned char *sha1, int fd, size_t size, + enum object_type type, const char *path, + unsigned flags) +{ + struct child_process fast_import; + char export_marks[512]; + const char *argv[] = { "fast-import", "--quiet", export_marks, NULL }; + char tmpfile[512]; + char fast_import_cmd[512]; + char buf[512]; + int len, tmpfd; + + strcpy(tmpfile, git_path("hashstream_XXXXXX")); + tmpfd = git_mkstemp_mode(tmpfile, 0600); + if (tmpfd < 0) + die_errno("cannot create tempfile: %s", tmpfile); + if (close(tmpfd)) + die_errno("cannot close tempfile: %s", tmpfile); + sprintf(export_marks, "--export-marks=%s", tmpfile); + + memset(&fast_import, 0, sizeof(fast_import)); + fast_import.in = -1; + fast_import.argv = argv; + fast_import.git_cmd = 1; + if (start_command(&fast_import)) + die_errno("index-stream: git fast-import failed"); + + len = sprintf(fast_import_cmd, "blob\nmark :1\ndata %lu\n", + (unsigned long) size); + write_or_whine(fast_import.in, fast_import_cmd, len, + "index-stream: feeding fast-import"); + while (size) { + char buf[10240]; + size_t sz = size < sizeof(buf) ? size : sizeof(buf); + size_t actual; + + actual = read_in_full(fd, buf, sz); + if (actual < 0) + die_errno("index-stream: reading input"); + if (write_in_full(fast_import.in, buf, actual) != actual) + die_errno("index-stream: feeding fast-import"); + size -= actual; + } + if (close(fast_import.in)) + die_errno("index-stream: closing fast-import"); + if (finish_command(&fast_import)) + die_errno("index-stream: finishing fast-import"); + + tmpfd = open(tmpfile, O_RDONLY); + if (tmpfd < 0) + die_errno("index-stream: cannot open fast-import mark"); + len = read(tmpfd, buf, sizeof(buf)); + if (len < 0) + die_errno("index-stream: reading fast-import mark"); + if (close(tmpfd) < 0) + die_errno("index-stream: closing fast-import mark"); + if (unlink(tmpfile)) + die_errno("index-stream: unlinking fast-import mark"); + if (len != 44 || + memcmp(":1 ", buf, 3) || + get_sha1_hex(buf + 3, sha1)) + die_errno("index-stream: unexpected fast-import mark: <%s>", buf); + return 0; +} + +int index_fd(unsigned char *sha1, int fd, struct stat *st, + enum object_type type, const char *path, unsigned flags) +{ + int ret; + size_t size = xsize_t(st->st_size); + + if (!S_ISREG(st->st_mode)) + ret = index_pipe(sha1, fd, type, path, flags); + else if (size <= big_file_threshold || type != OBJ_BLOB) + ret = index_core(sha1, fd, size, type, path, flags); + else + ret = index_stream(sha1, fd, size, type, path, flags); close(fd); return ret; } -int index_path(unsigned char *sha1, const char *path, struct stat *st, int write_object) +int index_path(unsigned char *sha1, const char *path, struct stat *st, unsigned flags) { int fd; struct strbuf sb = STRBUF_INIT; @@ -2665,7 +2766,7 @@ int index_path(unsigned char *sha1, const char *path, struct stat *st, int write if (fd < 0) return error("open(\"%s\"): %s", path, strerror(errno)); - if (index_fd(sha1, fd, st, write_object, OBJ_BLOB, path, 0) < 0) + if (index_fd(sha1, fd, st, OBJ_BLOB, path, flags) < 0) return error("%s: failed to insert into database", path); break; @@ -2675,7 +2776,7 @@ int index_path(unsigned char *sha1, const char *path, struct stat *st, int write return error("readlink(\"%s\"): %s", path, errstr); } - if (!write_object) + if (!(flags & HASH_WRITE_OBJECT)) hash_sha1_file(sb.buf, sb.len, blob_type, sha1); else if (write_sha1_file(sb.buf, sb.len, blob_type, sha1)) return error("%s: failed to insert into database",