X-Git-Url: https://git.tokkee.org/?a=blobdiff_plain;f=sha1_file.c;h=219a10f403e0c3aa2f8d60ecde5807cf51d1a659;hb=ed287ab7fa71528e8de288988d1662eebae64002;hp=43ff4023808cc8b5a5f54c75ff77e80776a581aa;hpb=8276c0070f8f8959ab4b6ec458fa0740e9abda9e;p=git.git diff --git a/sha1_file.c b/sha1_file.c index 43ff40238..219a10f40 100644 --- a/sha1_file.c +++ b/sha1_file.c @@ -349,6 +349,7 @@ static void link_alt_odb_entries(const char *alt, const char *ep, int sep, static void read_info_alternates(const char * relative_base, int depth) { char *map; + size_t mapsz; struct stat st; char path[PATH_MAX]; int fd; @@ -361,12 +362,13 @@ static void read_info_alternates(const char * relative_base, int depth) close(fd); return; } - map = xmmap(NULL, st.st_size, PROT_READ, MAP_PRIVATE, fd, 0); + mapsz = xsize_t(st.st_size); + map = xmmap(NULL, mapsz, PROT_READ, MAP_PRIVATE, fd, 0); close(fd); - link_alt_odb_entries(map, map + st.st_size, '\n', relative_base, depth); + link_alt_odb_entries(map, map + mapsz, '\n', relative_base, depth); - munmap(map, st.st_size); + munmap(map, mapsz); } void prepare_alt_odb(void) @@ -407,7 +409,6 @@ static unsigned int peak_pack_open_windows; static unsigned int pack_open_windows; static size_t peak_pack_mapped; static size_t pack_mapped; -static size_t page_size; struct packed_git *packed_git; void pack_report() @@ -416,7 +417,7 @@ void pack_report() "pack_report: getpagesize() = %10" SZ_FMT "\n" "pack_report: core.packedGitWindowSize = %10" SZ_FMT "\n" "pack_report: core.packedGitLimit = %10" SZ_FMT "\n", - page_size, + (size_t) getpagesize(), packed_git_window_size, packed_git_limit); fprintf(stderr, @@ -431,13 +432,14 @@ void pack_report() pack_mapped, peak_pack_mapped); } -static int check_packed_git_idx(const char *path, unsigned long *idx_size_, - void **idx_map_) +static int check_packed_git_idx(const char *path, + unsigned long *idx_size_, + void **idx_map_) { void *idx_map; uint32_t *index; - unsigned long idx_size; - int nr, i; + size_t idx_size; + uint32_t nr, i; int fd = open(path, O_RDONLY); struct stat st; if (fd < 0) @@ -446,7 +448,11 @@ static int check_packed_git_idx(const char *path, unsigned long *idx_size_, close(fd); return -1; } - idx_size = st.st_size; + idx_size = xsize_t(st.st_size); + if (idx_size < 4 * 256 + 20 + 20) { + close(fd); + return error("index file %s is too small", path); + } idx_map = xmmap(NULL, idx_size, PROT_READ, MAP_PRIVATE, fd, 0); close(fd); @@ -454,25 +460,25 @@ static int check_packed_git_idx(const char *path, unsigned long *idx_size_, *idx_map_ = idx_map; *idx_size_ = idx_size; - /* check index map */ - if (idx_size < 4*256 + 20 + 20) - return error("index file %s is too small", path); - /* a future index format would start with this, as older git * binaries would fail the non-monotonic index check below. * give a nicer warning to the user if we can. */ - if (index[0] == htonl(PACK_IDX_SIGNATURE)) + if (index[0] == htonl(PACK_IDX_SIGNATURE)) { + munmap(idx_map, idx_size); return error("index file %s is a newer version" " and is not supported by this binary" " (try upgrading GIT to a newer version)", path); + } nr = 0; for (i = 0; i < 256; i++) { - unsigned int n = ntohl(index[i]); - if (n < nr) + uint32_t n = ntohl(index[i]); + if (n < nr) { + munmap(idx_map, idx_size); return error("non-monotonic index %s", path); + } nr = n; } @@ -483,8 +489,10 @@ static int check_packed_git_idx(const char *path, unsigned long *idx_size_, * - 20-byte SHA1 of the packfile * - 20-byte SHA1 file checksum */ - if (idx_size != 4*256 + nr * 24 + 20 + 20) + if (idx_size != 4*256 + nr * 24 + 20 + 20) { + munmap(idx_map, idx_size); return error("wrong index file size in %s", path); + } return 0; } @@ -552,7 +560,11 @@ void unuse_pack(struct pack_window **w_cursor) } } -static void open_packed_git(struct packed_git *p) +/* + * Do not call this directly as this leaks p->pack_fd on error return; + * call open_packed_git() instead. + */ +static int open_packed_git_1(struct packed_git *p) { struct stat st; struct pack_header hdr; @@ -562,52 +574,64 @@ static void open_packed_git(struct packed_git *p) p->pack_fd = open(p->pack_name, O_RDONLY); if (p->pack_fd < 0 || fstat(p->pack_fd, &st)) - die("packfile %s cannot be opened", p->pack_name); + return -1; /* If we created the struct before we had the pack we lack size. */ if (!p->pack_size) { if (!S_ISREG(st.st_mode)) - die("packfile %s not a regular file", p->pack_name); + return error("packfile %s not a regular file", p->pack_name); p->pack_size = st.st_size; } else if (p->pack_size != st.st_size) - die("packfile %s size changed", p->pack_name); + return error("packfile %s size changed", p->pack_name); /* We leave these file descriptors open with sliding mmap; * there is no point keeping them open across exec(), though. */ fd_flag = fcntl(p->pack_fd, F_GETFD, 0); if (fd_flag < 0) - die("cannot determine file descriptor flags"); + return error("cannot determine file descriptor flags"); fd_flag |= FD_CLOEXEC; if (fcntl(p->pack_fd, F_SETFD, fd_flag) == -1) - die("cannot set FD_CLOEXEC"); + return error("cannot set FD_CLOEXEC"); /* Verify we recognize this pack file format. */ if (read_in_full(p->pack_fd, &hdr, sizeof(hdr)) != sizeof(hdr)) - die("file %s is far too short to be a packfile", p->pack_name); + return error("file %s is far too short to be a packfile", p->pack_name); if (hdr.hdr_signature != htonl(PACK_SIGNATURE)) - die("file %s is not a GIT packfile", p->pack_name); + return error("file %s is not a GIT packfile", p->pack_name); if (!pack_version_ok(hdr.hdr_version)) - die("packfile %s is version %u and not supported" + return error("packfile %s is version %u and not supported" " (try upgrading GIT to a newer version)", p->pack_name, ntohl(hdr.hdr_version)); /* Verify the pack matches its index. */ if (num_packed_objects(p) != ntohl(hdr.hdr_entries)) - die("packfile %s claims to have %u objects" + return error("packfile %s claims to have %u objects" " while index size indicates %u objects", p->pack_name, ntohl(hdr.hdr_entries), num_packed_objects(p)); if (lseek(p->pack_fd, p->pack_size - sizeof(sha1), SEEK_SET) == -1) - die("end of packfile %s is unavailable", p->pack_name); + return error("end of packfile %s is unavailable", p->pack_name); if (read_in_full(p->pack_fd, sha1, sizeof(sha1)) != sizeof(sha1)) - die("packfile %s signature is unavailable", p->pack_name); + return error("packfile %s signature is unavailable", p->pack_name); idx_sha1 = ((unsigned char *)p->index_base) + p->index_size - 40; if (hashcmp(sha1, idx_sha1)) - die("packfile %s does not match index", p->pack_name); + return error("packfile %s does not match index", p->pack_name); + return 0; } -static int in_window(struct pack_window *win, unsigned long offset) +static int open_packed_git(struct packed_git *p) +{ + if (!open_packed_git_1(p)) + return 0; + if (p->pack_fd != -1) { + close(p->pack_fd); + p->pack_fd = -1; + } + return -1; +} + +static int in_window(struct pack_window *win, off_t offset) { /* We must promise at least 20 bytes (one hash) after the * offset is available from this window, otherwise the offset @@ -622,13 +646,13 @@ static int in_window(struct pack_window *win, unsigned long offset) unsigned char* use_pack(struct packed_git *p, struct pack_window **w_cursor, - unsigned long offset, + off_t offset, unsigned int *left) { struct pack_window *win = *w_cursor; - if (p->pack_fd == -1) - open_packed_git(p); + if (p->pack_fd == -1 && open_packed_git(p)) + die("packfile %s cannot be accessed", p->pack_name); /* Since packfiles end in a hash of their content and its * pointless to ask for an offset into the middle of that @@ -646,13 +670,14 @@ unsigned char* use_pack(struct packed_git *p, break; } if (!win) { - if (!page_size) - page_size = getpagesize(); + size_t window_align = packed_git_window_size / 2; + off_t len; win = xcalloc(1, sizeof(*win)); - win->offset = (offset / page_size) * page_size; - win->len = p->pack_size - win->offset; - if (win->len > packed_git_window_size) - win->len = packed_git_window_size; + win->offset = (offset / window_align) * window_align; + len = p->pack_size - win->offset; + if (len > packed_git_window_size) + len = packed_git_window_size; + win->len = (size_t)len; pack_mapped += win->len; while (packed_git_limit < pack_mapped && unuse_one_window(p)) @@ -681,7 +706,7 @@ unsigned char* use_pack(struct packed_git *p, } offset -= win->offset; if (left) - *left = win->len - offset; + *left = win->len - xsize_t(offset); return win->base + offset; } @@ -779,7 +804,7 @@ static void prepare_packed_git_one(char *objdir, int local) if (!has_extension(de->d_name, ".idx")) continue; - /* we have .idx. Is it a file we can map? */ + /* Don't reopen a pack we already have. */ strcpy(path + len, de->d_name); for (p = packed_git; p; p = p->next) { if (!memcmp(path, p->pack_name, len + namelen - 4)) @@ -787,11 +812,13 @@ static void prepare_packed_git_one(char *objdir, int local) } if (p) continue; + /* See if it really is a valid .idx file with corresponding + * .pack file that we can map. + */ p = add_packed_git(path, len + namelen, local); if (!p) continue; - p->next = packed_git; - packed_git = p; + install_packed_git(p); } closedir(dir); } @@ -855,9 +882,9 @@ void *map_sha1_file(const unsigned char *sha1, unsigned long *size) */ sha1_file_open_flag = 0; } - map = xmmap(NULL, st.st_size, PROT_READ, MAP_PRIVATE, fd, 0); + *size = xsize_t(st.st_size); + map = xmmap(NULL, *size, PROT_READ, MAP_PRIVATE, fd, 0); close(fd); - *size = st.st_size; return map; } @@ -936,7 +963,7 @@ static int unpack_sha1_header(z_stream *stream, unsigned char *map, unsigned lon /* And generate the fake traditional header */ stream->total_out = 1 + snprintf(buffer, bufsiz, "%s %lu", - type_names[type], size); + typename(type), size); return 0; } @@ -967,26 +994,27 @@ static void *unpack_sha1_rest(z_stream *stream, void *buffer, unsigned long size * too permissive for what we want to check. So do an anal * object header parse by hand. */ -static int parse_sha1_header(char *hdr, char *type, unsigned long *sizep) +static int parse_sha1_header(const char *hdr, unsigned long *sizep) { + char type[10]; int i; unsigned long size; /* * The type can be at most ten bytes (including the * terminating '\0' that we add), and is followed by - * a space. + * a space. */ - i = 10; + i = 0; for (;;) { char c = *hdr++; if (c == ' ') break; - if (!--i) + type[i++] = c; + if (i >= sizeof(type)) return -1; - *type++ = c; } - *type = 0; + type[i] = 0; /* * The length must follow immediately, and be in canonical @@ -1009,31 +1037,30 @@ static int parse_sha1_header(char *hdr, char *type, unsigned long *sizep) /* * The length must be followed by a zero byte */ - return *hdr ? -1 : 0; + return *hdr ? -1 : type_from_string(type); } -void * unpack_sha1_file(void *map, unsigned long mapsize, char *type, unsigned long *size) +void * unpack_sha1_file(void *map, unsigned long mapsize, enum object_type *type, unsigned long *size) { int ret; z_stream stream; char hdr[8192]; ret = unpack_sha1_header(&stream, map, mapsize, hdr, sizeof(hdr)); - if (ret < Z_OK || parse_sha1_header(hdr, type, size) < 0) + if (ret < Z_OK || (*type = parse_sha1_header(hdr, size)) < 0) return NULL; return unpack_sha1_rest(&stream, hdr, *size); } -static unsigned long get_delta_base(struct packed_git *p, +static off_t get_delta_base(struct packed_git *p, struct pack_window **w_curs, - unsigned long offset, - enum object_type kind, - unsigned long delta_obj_offset, - unsigned long *base_obj_offset) + off_t *curpos, + enum object_type type, + off_t delta_obj_offset) { - unsigned char *base_info = use_pack(p, w_curs, offset, NULL); - unsigned long base_offset; + unsigned char *base_info = use_pack(p, w_curs, *curpos, NULL); + off_t base_offset; /* use_pack() assured us we have [base_info, base_info + 20) * as a range that we can look at without walking off the @@ -1041,7 +1068,7 @@ static unsigned long get_delta_base(struct packed_git *p, * that is assured. An OFS_DELTA longer than the hash size * is stupid, as then a REF_DELTA would be smaller to store. */ - if (kind == OBJ_OFS_DELTA) { + if (type == OBJ_OFS_DELTA) { unsigned used = 0; unsigned char c = base_info[used++]; base_offset = c & 127; @@ -1055,49 +1082,43 @@ static unsigned long get_delta_base(struct packed_git *p, base_offset = delta_obj_offset - base_offset; if (base_offset >= delta_obj_offset) die("delta base offset out of bound"); - offset += used; - } else if (kind == OBJ_REF_DELTA) { + *curpos += used; + } else if (type == OBJ_REF_DELTA) { /* The base entry _must_ be in the same pack */ base_offset = find_pack_entry_one(base_info, p); if (!base_offset) die("failed to find delta-pack base object %s", sha1_to_hex(base_info)); - offset += 20; + *curpos += 20; } else die("I am totally screwed"); - *base_obj_offset = base_offset; - return offset; + return base_offset; } /* forward declaration for a mutually recursive function */ -static int packed_object_info(struct packed_git *p, unsigned long offset, - char *type, unsigned long *sizep); +static int packed_object_info(struct packed_git *p, off_t offset, + unsigned long *sizep); static int packed_delta_info(struct packed_git *p, struct pack_window **w_curs, - unsigned long offset, - enum object_type kind, - unsigned long obj_offset, - char *type, + off_t curpos, + enum object_type type, + off_t obj_offset, unsigned long *sizep) { - unsigned long base_offset; + off_t base_offset; - offset = get_delta_base(p, w_curs, offset, kind, - obj_offset, &base_offset); + base_offset = get_delta_base(p, w_curs, &curpos, type, obj_offset); + type = packed_object_info(p, base_offset, NULL); /* We choose to only get the type of the base object and * ignore potentially corrupt pack file that expects the delta * based on a base with a wrong size. This saves tons of * inflate() calls. */ - if (packed_object_info(p, base_offset, type, NULL)) - die("cannot get info for delta-pack base"); - if (sizep) { const unsigned char *data; unsigned char delta_head[20], *in; - unsigned long result_size; z_stream stream; int st; @@ -1107,10 +1128,10 @@ static int packed_delta_info(struct packed_git *p, inflateInit(&stream); do { - in = use_pack(p, w_curs, offset, &stream.avail_in); + in = use_pack(p, w_curs, curpos, &stream.avail_in); stream.next_in = in; st = inflate(&stream, Z_FINISH); - offset += stream.next_in - in; + curpos += stream.next_in - in; } while ((st == Z_OK || st == Z_BUF_ERROR) && stream.total_out < sizeof(delta_head)); inflateEnd(&stream); @@ -1127,122 +1148,120 @@ static int packed_delta_info(struct packed_git *p, get_delta_hdr_size(&data, delta_head+sizeof(delta_head)); /* Read the result size */ - result_size = get_delta_hdr_size(&data, delta_head+sizeof(delta_head)); - *sizep = result_size; + *sizep = get_delta_hdr_size(&data, delta_head+sizeof(delta_head)); } - return 0; + + return type; } -static unsigned long unpack_object_header(struct packed_git *p, - struct pack_window **w_curs, - unsigned long offset, - enum object_type *type, - unsigned long *sizep) +static int unpack_object_header(struct packed_git *p, + struct pack_window **w_curs, + off_t *curpos, + unsigned long *sizep) { unsigned char *base; unsigned int left; unsigned long used; + enum object_type type; /* use_pack() assures us we have [base, base + 20) available * as a range that we can look at at. (Its actually the hash - * size that is assurred.) With our object header encoding + * size that is assured.) With our object header encoding * the maximum deflated object size is 2^137, which is just * insane, so we know won't exceed what we have been given. */ - base = use_pack(p, w_curs, offset, &left); - used = unpack_object_header_gently(base, left, type, sizep); + base = use_pack(p, w_curs, *curpos, &left); + used = unpack_object_header_gently(base, left, &type, sizep); if (!used) die("object offset outside of pack file"); + *curpos += used; - return offset + used; + return type; } -void packed_object_info_detail(struct packed_git *p, - unsigned long offset, - char *type, - unsigned long *size, - unsigned long *store_size, - unsigned int *delta_chain_length, - unsigned char *base_sha1) +const char *packed_object_info_detail(struct packed_git *p, + off_t obj_offset, + unsigned long *size, + unsigned long *store_size, + unsigned int *delta_chain_length, + unsigned char *base_sha1) { struct pack_window *w_curs = NULL; - unsigned long obj_offset, val; + off_t curpos; + unsigned long dummy; unsigned char *next_sha1; - enum object_type kind; + enum object_type type; *delta_chain_length = 0; - obj_offset = offset; - offset = unpack_object_header(p, &w_curs, offset, &kind, size); + curpos = obj_offset; + type = unpack_object_header(p, &w_curs, &curpos, size); for (;;) { - switch (kind) { + switch (type) { default: die("pack %s contains unknown object type %d", - p->pack_name, kind); + p->pack_name, type); case OBJ_COMMIT: case OBJ_TREE: case OBJ_BLOB: case OBJ_TAG: - strcpy(type, type_names[kind]); *store_size = 0; /* notyet */ unuse_pack(&w_curs); - return; + return typename(type); case OBJ_OFS_DELTA: - get_delta_base(p, &w_curs, offset, kind, - obj_offset, &offset); + obj_offset = get_delta_base(p, &w_curs, &curpos, type, obj_offset); if (*delta_chain_length == 0) { - /* TODO: find base_sha1 as pointed by offset */ + /* TODO: find base_sha1 as pointed by curpos */ + hashclr(base_sha1); } break; case OBJ_REF_DELTA: - next_sha1 = use_pack(p, &w_curs, offset, NULL); + next_sha1 = use_pack(p, &w_curs, curpos, NULL); if (*delta_chain_length == 0) hashcpy(base_sha1, next_sha1); - offset = find_pack_entry_one(next_sha1, p); + obj_offset = find_pack_entry_one(next_sha1, p); break; } - obj_offset = offset; - offset = unpack_object_header(p, &w_curs, offset, &kind, &val); (*delta_chain_length)++; + curpos = obj_offset; + type = unpack_object_header(p, &w_curs, &curpos, &dummy); } } -static int packed_object_info(struct packed_git *p, unsigned long offset, - char *type, unsigned long *sizep) +static int packed_object_info(struct packed_git *p, off_t obj_offset, + unsigned long *sizep) { struct pack_window *w_curs = NULL; - unsigned long size, obj_offset = offset; - enum object_type kind; - int r; + unsigned long size; + off_t curpos = obj_offset; + enum object_type type; - offset = unpack_object_header(p, &w_curs, offset, &kind, &size); + type = unpack_object_header(p, &w_curs, &curpos, &size); - switch (kind) { + switch (type) { case OBJ_OFS_DELTA: case OBJ_REF_DELTA: - r = packed_delta_info(p, &w_curs, offset, kind, - obj_offset, type, sizep); - unuse_pack(&w_curs); - return r; + type = packed_delta_info(p, &w_curs, curpos, + type, obj_offset, sizep); + break; case OBJ_COMMIT: case OBJ_TREE: case OBJ_BLOB: case OBJ_TAG: - strcpy(type, type_names[kind]); - unuse_pack(&w_curs); + if (sizep) + *sizep = size; break; default: die("pack %s contains unknown object type %d", - p->pack_name, kind); + p->pack_name, type); } - if (sizep) - *sizep = size; - return 0; + unuse_pack(&w_curs); + return type; } static void *unpack_compressed_entry(struct packed_git *p, struct pack_window **w_curs, - unsigned long offset, + off_t curpos, unsigned long size) { int st; @@ -1257,10 +1276,10 @@ static void *unpack_compressed_entry(struct packed_git *p, inflateInit(&stream); do { - in = use_pack(p, w_curs, offset, &stream.avail_in); + in = use_pack(p, w_curs, curpos, &stream.avail_in); stream.next_in = in; st = inflate(&stream, Z_FINISH); - offset += stream.next_in - in; + curpos += stream.next_in - in; } while (st == Z_OK || st == Z_BUF_ERROR); inflateEnd(&stream); if ((st != Z_STREAM_END) || stream.total_out != size) { @@ -1273,82 +1292,78 @@ static void *unpack_compressed_entry(struct packed_git *p, static void *unpack_delta_entry(struct packed_git *p, struct pack_window **w_curs, - unsigned long offset, + off_t curpos, unsigned long delta_size, - enum object_type kind, - unsigned long obj_offset, - char *type, + off_t obj_offset, + enum object_type *type, unsigned long *sizep) { void *delta_data, *result, *base; - unsigned long result_size, base_size, base_offset; + unsigned long base_size; + off_t base_offset; - offset = get_delta_base(p, w_curs, offset, kind, - obj_offset, &base_offset); + base_offset = get_delta_base(p, w_curs, &curpos, *type, obj_offset); base = unpack_entry(p, base_offset, type, &base_size); if (!base) - die("failed to read delta base object at %lu from %s", - base_offset, p->pack_name); + die("failed to read delta base object" + " at %"PRIuMAX" from %s", + (uintmax_t)base_offset, p->pack_name); - delta_data = unpack_compressed_entry(p, w_curs, offset, delta_size); + delta_data = unpack_compressed_entry(p, w_curs, curpos, delta_size); result = patch_delta(base, base_size, delta_data, delta_size, - &result_size); + sizep); if (!result) die("failed to apply delta"); free(delta_data); free(base); - *sizep = result_size; return result; } -void *unpack_entry(struct packed_git *p, unsigned long offset, - char *type, unsigned long *sizep) +void *unpack_entry(struct packed_git *p, off_t obj_offset, + enum object_type *type, unsigned long *sizep) { struct pack_window *w_curs = NULL; - unsigned long size, obj_offset = offset; - enum object_type kind; - void *retval; + off_t curpos = obj_offset; + void *data; - offset = unpack_object_header(p, &w_curs, offset, &kind, &size); - switch (kind) { + *type = unpack_object_header(p, &w_curs, &curpos, sizep); + switch (*type) { case OBJ_OFS_DELTA: case OBJ_REF_DELTA: - retval = unpack_delta_entry(p, &w_curs, offset, size, - kind, obj_offset, type, sizep); + data = unpack_delta_entry(p, &w_curs, curpos, *sizep, + obj_offset, type, sizep); break; case OBJ_COMMIT: case OBJ_TREE: case OBJ_BLOB: case OBJ_TAG: - strcpy(type, type_names[kind]); - *sizep = size; - retval = unpack_compressed_entry(p, &w_curs, offset, size); + data = unpack_compressed_entry(p, &w_curs, curpos, *sizep); break; default: - die("unknown object type %i in %s", kind, p->pack_name); + die("unknown object type %i in %s", *type, p->pack_name); } unuse_pack(&w_curs); - return retval; + return data; } -int num_packed_objects(const struct packed_git *p) +uint32_t num_packed_objects(const struct packed_git *p) { /* See check_packed_git_idx() */ - return (p->index_size - 20 - 20 - 4*256) / 24; + return (uint32_t)((p->index_size - 20 - 20 - 4*256) / 24); } -int nth_packed_object_sha1(const struct packed_git *p, int n, +int nth_packed_object_sha1(const struct packed_git *p, uint32_t n, unsigned char* sha1) { void *index = p->index_base + 256; - if (n < 0 || num_packed_objects(p) <= n) + if (num_packed_objects(p) <= n) return -1; hashcpy(sha1, (unsigned char *) index + (24 * n) + 4); return 0; } -unsigned long find_pack_entry_one(const unsigned char *sha1, +off_t find_pack_entry_one(const unsigned char *sha1, struct packed_git *p) { uint32_t *level1_ofs = p->index_base; @@ -1390,7 +1405,7 @@ static int matches_pack_name(struct packed_git *p, const char *ig) static int find_pack_entry(const unsigned char *sha1, struct pack_entry *e, const char **ignore_packed) { struct packed_git *p; - unsigned long offset; + off_t offset; prepare_packed_git(); @@ -1405,6 +1420,18 @@ static int find_pack_entry(const unsigned char *sha1, struct pack_entry *e, cons } offset = find_pack_entry_one(sha1, p); if (offset) { + /* + * We are about to tell the caller where they can + * locate the requested object. We better make + * sure the packfile is still here and can be + * accessed before supplying that answer, as + * it may have been deleted since the index + * was loaded! + */ + if (p->pack_fd == -1 && open_packed_git(p)) { + error("packfile %s cannot be accessed", p->pack_name); + continue; + } e->offset = offset; e->p = p; hashcpy(e->sha1, sha1); @@ -1424,16 +1451,16 @@ struct packed_git *find_sha1_pack(const unsigned char *sha1, return p; } return NULL; - + } -static int sha1_loose_object_info(const unsigned char *sha1, char *type, unsigned long *sizep) +static int sha1_loose_object_info(const unsigned char *sha1, unsigned long *sizep) { int status; unsigned long mapsize, size; void *map; z_stream stream; - char hdr[128]; + char hdr[32]; map = map_sha1_file(sha1, &mapsize); if (!map) @@ -1441,31 +1468,29 @@ static int sha1_loose_object_info(const unsigned char *sha1, char *type, unsigne if (unpack_sha1_header(&stream, map, mapsize, hdr, sizeof(hdr)) < 0) status = error("unable to unpack %s header", sha1_to_hex(sha1)); - if (parse_sha1_header(hdr, type, &size) < 0) + else if ((status = parse_sha1_header(hdr, &size)) < 0) status = error("unable to parse %s header", sha1_to_hex(sha1)); - else { - status = 0; - if (sizep) - *sizep = size; - } + else if (sizep) + *sizep = size; inflateEnd(&stream); munmap(map, mapsize); return status; } -int sha1_object_info(const unsigned char *sha1, char *type, unsigned long *sizep) +int sha1_object_info(const unsigned char *sha1, unsigned long *sizep) { struct pack_entry e; if (!find_pack_entry(sha1, &e, NULL)) { reprepare_packed_git(); if (!find_pack_entry(sha1, &e, NULL)) - return sha1_loose_object_info(sha1, type, sizep); + return sha1_loose_object_info(sha1, sizep); } - return packed_object_info(e.p, e.offset, type, sizep); + return packed_object_info(e.p, e.offset, sizep); } -static void *read_packed_sha1(const unsigned char *sha1, char *type, unsigned long *size) +static void *read_packed_sha1(const unsigned char *sha1, + enum object_type *type, unsigned long *size) { struct pack_entry e; @@ -1475,10 +1500,71 @@ static void *read_packed_sha1(const unsigned char *sha1, char *type, unsigned lo return unpack_entry(e.p, e.offset, type, size); } -void * read_sha1_file(const unsigned char *sha1, char *type, unsigned long *size) +/* + * This is meant to hold a *small* number of objects that you would + * want read_sha1_file() to be able to return, but yet you do not want + * to write them into the object store (e.g. a browse-only + * application). + */ +static struct cached_object { + unsigned char sha1[20]; + enum object_type type; + void *buf; + unsigned long size; +} *cached_objects; +static int cached_object_nr, cached_object_alloc; + +static struct cached_object *find_cached_object(const unsigned char *sha1) +{ + int i; + struct cached_object *co = cached_objects; + + for (i = 0; i < cached_object_nr; i++, co++) { + if (!hashcmp(co->sha1, sha1)) + return co; + } + return NULL; +} + +int pretend_sha1_file(void *buf, unsigned long len, enum object_type type, + unsigned char *sha1) +{ + struct cached_object *co; + + hash_sha1_file(buf, len, typename(type), sha1); + if (has_sha1_file(sha1) || find_cached_object(sha1)) + return 0; + if (cached_object_alloc <= cached_object_nr) { + cached_object_alloc = alloc_nr(cached_object_alloc); + cached_objects = xrealloc(cached_objects, + sizeof(*cached_objects) * + cached_object_alloc); + } + co = &cached_objects[cached_object_nr++]; + co->size = len; + co->type = type; + co->buf = xmalloc(len); + memcpy(co->buf, buf, len); + hashcpy(co->sha1, sha1); + return 0; +} + +void *read_sha1_file(const unsigned char *sha1, enum object_type *type, + unsigned long *size) { unsigned long mapsize; void *map, *buf; + struct cached_object *co; + + co = find_cached_object(sha1); + if (co) { + buf = xmalloc(co->size + 1); + memcpy(buf, co->buf, co->size); + ((char*)buf)[co->size] = 0; + *type = co->type; + *size = co->size; + return buf; + } buf = read_packed_sha1(sha1, type, size); if (buf) @@ -1494,33 +1580,34 @@ void * read_sha1_file(const unsigned char *sha1, char *type, unsigned long *size } void *read_object_with_reference(const unsigned char *sha1, - const char *required_type, + const char *required_type_name, unsigned long *size, unsigned char *actual_sha1_return) { - char type[20]; + enum object_type type, required_type; void *buffer; unsigned long isize; unsigned char actual_sha1[20]; + required_type = type_from_string(required_type_name); hashcpy(actual_sha1, sha1); while (1) { int ref_length = -1; const char *ref_type = NULL; - buffer = read_sha1_file(actual_sha1, type, &isize); + buffer = read_sha1_file(actual_sha1, &type, &isize); if (!buffer) return NULL; - if (!strcmp(type, required_type)) { + if (type == required_type) { *size = isize; if (actual_sha1_return) hashcpy(actual_sha1_return, actual_sha1); return buffer; } /* Handle references */ - else if (!strcmp(type, commit_type)) + else if (type == OBJ_COMMIT) ref_type = "tree "; - else if (!strcmp(type, tag_type)) + else if (type == OBJ_TAG) ref_type = "object "; else { free(buffer); @@ -1541,12 +1628,12 @@ void *read_object_with_reference(const unsigned char *sha1, static void write_sha1_file_prepare(void *buf, unsigned long len, const char *type, unsigned char *sha1, - unsigned char *hdr, int *hdrlen) + char *hdr, int *hdrlen) { SHA_CTX c; /* Generate the header */ - *hdrlen = sprintf((char *)hdr, "%s %lu", type, len)+1; + *hdrlen = sprintf(hdr, "%s %lu", type, len)+1; /* Sha1.. */ SHA1_Init(&c); @@ -1653,33 +1740,24 @@ static int write_binary_header(unsigned char *hdr, enum object_type type, unsign static void setup_object_header(z_stream *stream, const char *type, unsigned long len) { - int obj_type, hdr; + int obj_type, hdrlen; if (use_legacy_headers) { while (deflate(stream, 0) == Z_OK) /* nothing */; return; } - if (!strcmp(type, blob_type)) - obj_type = OBJ_BLOB; - else if (!strcmp(type, tree_type)) - obj_type = OBJ_TREE; - else if (!strcmp(type, commit_type)) - obj_type = OBJ_COMMIT; - else if (!strcmp(type, tag_type)) - obj_type = OBJ_TAG; - else - die("trying to generate bogus object of type '%s'", type); - hdr = write_binary_header(stream->next_out, obj_type, len); - stream->total_out = hdr; - stream->next_out += hdr; - stream->avail_out -= hdr; + obj_type = type_from_string(type); + hdrlen = write_binary_header(stream->next_out, obj_type, len); + stream->total_out = hdrlen; + stream->next_out += hdrlen; + stream->avail_out -= hdrlen; } int hash_sha1_file(void *buf, unsigned long len, const char *type, unsigned char *sha1) { - unsigned char hdr[50]; + char hdr[32]; int hdrlen; write_sha1_file_prepare(buf, len, type, sha1, hdr, &hdrlen); return 0; @@ -1693,7 +1771,7 @@ int write_sha1_file(void *buf, unsigned long len, const char *type, unsigned cha unsigned char sha1[20]; char *filename; static char tmpfile[PATH_MAX]; - unsigned char hdr[50]; + char hdr[32]; int fd, hdrlen; /* Normally if we have it in the pack then we do not bother writing @@ -1740,7 +1818,7 @@ int write_sha1_file(void *buf, unsigned long len, const char *type, unsigned cha stream.avail_out = size; /* First header.. */ - stream.next_in = hdr; + stream.next_in = (unsigned char *)hdr; stream.avail_in = hdrlen; setup_object_header(&stream, type, len); @@ -1771,17 +1849,17 @@ static void *repack_object(const unsigned char *sha1, unsigned long *objsize) z_stream stream; unsigned char *unpacked; unsigned long len; - char type[20]; - char hdr[50]; + enum object_type type; + char hdr[32]; int hdrlen; void *buf; /* need to unpack and recompress it by itself */ - unpacked = read_packed_sha1(sha1, type, &len); + unpacked = read_packed_sha1(sha1, &type, &len); if (!unpacked) error("cannot read sha1_file for %s", sha1_to_hex(sha1)); - hdrlen = sprintf(hdr, "%s %lu", type, len) + 1; + hdrlen = sprintf(hdr, "%s %lu", typename(type), len) + 1; /* Set it up */ memset(&stream, 0, sizeof(stream)); @@ -1991,23 +2069,43 @@ int index_pipe(unsigned char *sha1, int fd, const char *type, int write_object) return ret; } -int index_fd(unsigned char *sha1, int fd, struct stat *st, int write_object, const char *type) +int index_fd(unsigned char *sha1, int fd, struct stat *st, int write_object, + enum object_type type, const char *path) { - unsigned long size = st->st_size; - void *buf; - int ret; + size_t size = xsize_t(st->st_size); + void *buf = NULL; + int ret, re_allocated = 0; - buf = ""; if (size) buf = xmmap(NULL, size, PROT_READ, MAP_PRIVATE, fd, 0); close(fd); if (!type) - type = blob_type; + type = OBJ_BLOB; + + /* + * Convert blobs to git internal format + */ + if ((type == OBJ_BLOB) && S_ISREG(st->st_mode)) { + unsigned long nsize = size; + char *nbuf = buf; + if (convert_to_git(path, &nbuf, &nsize)) { + if (size) + munmap(buf, size); + size = nsize; + buf = nbuf; + re_allocated = 1; + } + } + if (write_object) - ret = write_sha1_file(buf, size, type, sha1); + ret = write_sha1_file(buf, size, typename(type), sha1); else - ret = hash_sha1_file(buf, size, type, sha1); + ret = hash_sha1_file(buf, size, typename(type), sha1); + if (re_allocated) { + free(buf); + return ret; + } if (size) munmap(buf, size); return ret; @@ -2017,6 +2115,7 @@ int index_path(unsigned char *sha1, const char *path, struct stat *st, int write { int fd; char *target; + size_t len; switch (st->st_mode & S_IFMT) { case S_IFREG: @@ -2024,21 +2123,22 @@ int index_path(unsigned char *sha1, const char *path, struct stat *st, int write if (fd < 0) return error("open(\"%s\"): %s", path, strerror(errno)); - if (index_fd(sha1, fd, st, write_object, NULL) < 0) + if (index_fd(sha1, fd, st, write_object, OBJ_BLOB, path) < 0) return error("%s: failed to insert into database", path); break; case S_IFLNK: - target = xmalloc(st->st_size+1); - if (readlink(path, target, st->st_size+1) != st->st_size) { + len = xsize_t(st->st_size); + target = xmalloc(len + 1); + if (readlink(path, target, len + 1) != st->st_size) { char *errstr = strerror(errno); free(target); return error("readlink(\"%s\"): %s", path, errstr); } if (!write_object) - hash_sha1_file(target, st->st_size, blob_type, sha1); - else if (write_sha1_file(target, st->st_size, blob_type, sha1)) + hash_sha1_file(target, len, blob_type, sha1); + else if (write_sha1_file(target, len, blob_type, sha1)) return error("%s: failed to insert into database", path); free(target); @@ -2048,3 +2148,24 @@ int index_path(unsigned char *sha1, const char *path, struct stat *st, int write } return 0; } + +int read_pack_header(int fd, struct pack_header *header) +{ + char *c = (char*)header; + ssize_t remaining = sizeof(struct pack_header); + do { + ssize_t r = xread(fd, c, remaining); + if (r <= 0) + /* "eof before pack header was fully read" */ + return PH_ERROR_EOF; + remaining -= r; + c += r; + } while (remaining > 0); + if (header->hdr_signature != htonl(PACK_SIGNATURE)) + /* "protocol error (pack signature mismatch detected)" */ + return PH_ERROR_PACK_SIGNATURE; + if (!pack_version_ok(header->hdr_version)) + /* "protocol error (pack version unsupported)" */ + return PH_ERROR_PROTOCOL; + return 0; +}