X-Git-Url: https://git.tokkee.org/?a=blobdiff_plain;ds=sidebyside;f=sha1_file.c;h=be991ed22acb0c84141474360f345d51ccc594be;hb=4b7f59af2a5b072a0a3950c956842e4d6223a167;hp=4304fe9bbc2b8e796e944fa7ddb2ea2791000adf;hpb=c182ec90d824168cfb70494bb920c0a2fb590d98;p=git.git diff --git a/sha1_file.c b/sha1_file.c index 4304fe9bb..aca741b79 100644 --- a/sha1_file.c +++ b/sha1_file.c @@ -13,6 +13,7 @@ #include "commit.h" #include "tag.h" #include "tree.h" +#include "refs.h" #ifndef O_NOATIME #if defined(__linux__) && (defined(__i386__) || defined(__PPC__)) @@ -32,7 +33,7 @@ const unsigned char null_sha1[20]; static unsigned int sha1_file_open_flag = O_NOATIME; -signed char hexval_table[256] = { +const signed char hexval_table[256] = { -1, -1, -1, -1, -1, -1, -1, -1, /* 00-07 */ -1, -1, -1, -1, -1, -1, -1, -1, /* 08-0f */ -1, -1, -1, -1, -1, -1, -1, -1, /* 10-17 */ @@ -192,7 +193,7 @@ char *sha1_pack_name(const unsigned char *sha1) *buf++ = hex[val >> 4]; *buf++ = hex[val & 0xf]; } - + return base; } @@ -217,7 +218,7 @@ char *sha1_pack_index_name(const unsigned char *sha1) *buf++ = hex[val >> 4]; *buf++ = hex[val & 0xf]; } - + return base; } @@ -351,10 +352,14 @@ static void read_info_alternates(const char * relative_base, int depth) char *map; size_t mapsz; struct stat st; - char path[PATH_MAX]; + const char alt_file_name[] = "info/alternates"; + /* Given that relative_base is no longer than PATH_MAX, + ensure that "path" has enough space to append "/", the + file name, "info/alternates", and a trailing NUL. */ + char path[PATH_MAX + 1 + sizeof alt_file_name]; int fd; - sprintf(path, "%s/info/alternates", relative_base); + sprintf(path, "%s/%s", relative_base, alt_file_name); fd = open(path, O_RDONLY); if (fd < 0) return; @@ -375,11 +380,12 @@ void prepare_alt_odb(void) { const char *alt; + if (alt_odb_tail) + return; + alt = getenv(ALTERNATE_DB_ENVIRONMENT); if (!alt) alt = ""; - if (alt_odb_tail) - return; alt_odb_tail = &alt_odb_list; link_alt_odb_entries(alt, alt + strlen(alt), ':', NULL, 0); @@ -411,7 +417,7 @@ static size_t peak_pack_mapped; static size_t pack_mapped; struct packed_git *packed_git; -void pack_report() +void pack_report(void) { fprintf(stderr, "pack_report: getpagesize() = %10" SZ_FMT "\n" @@ -437,7 +443,7 @@ static int check_packed_git_idx(const char *path, struct packed_git *p) void *idx_map; struct pack_idx_header *hdr; size_t idx_size; - uint32_t nr, i, *index; + uint32_t version, nr, i, *index; int fd = open(path, O_RDONLY); struct stat st; @@ -455,21 +461,23 @@ static int check_packed_git_idx(const char *path, struct packed_git *p) idx_map = xmmap(NULL, idx_size, PROT_READ, MAP_PRIVATE, fd, 0); close(fd); - /* a future index format would start with this, as older git - * binaries would fail the non-monotonic index check below. - * give a nicer warning to the user if we can. - */ hdr = idx_map; if (hdr->idx_signature == htonl(PACK_IDX_SIGNATURE)) { - munmap(idx_map, idx_size); - return error("index file %s is a newer version" - " and is not supported by this binary" - " (try upgrading GIT to a newer version)", - path); - } + version = ntohl(hdr->idx_version); + if (version < 2 || version > 2) { + munmap(idx_map, idx_size); + return error("index file %s is version %d" + " and is not supported by this binary" + " (try upgrading GIT to a newer version)", + path, version); + } + } else + version = 1; nr = 0; index = idx_map; + if (version > 1) + index += 2; /* skip index header */ for (i = 0; i < 256; i++) { uint32_t n = ntohl(index[i]); if (n < nr) { @@ -479,24 +487,72 @@ static int check_packed_git_idx(const char *path, struct packed_git *p) nr = n; } - /* - * Total size: - * - 256 index entries 4 bytes each - * - 24-byte entries * nr (20-byte sha1 + 4-byte offset) - * - 20-byte SHA1 of the packfile - * - 20-byte SHA1 file checksum - */ - if (idx_size != 4*256 + nr * 24 + 20 + 20) { - munmap(idx_map, idx_size); - return error("wrong index file size in %s", path); + if (version == 1) { + /* + * Total size: + * - 256 index entries 4 bytes each + * - 24-byte entries * nr (20-byte sha1 + 4-byte offset) + * - 20-byte SHA1 of the packfile + * - 20-byte SHA1 file checksum + */ + if (idx_size != 4*256 + nr * 24 + 20 + 20) { + munmap(idx_map, idx_size); + return error("wrong index file size in %s", path); + } + } else if (version == 2) { + /* + * Minimum size: + * - 8 bytes of header + * - 256 index entries 4 bytes each + * - 20-byte sha1 entry * nr + * - 4-byte crc entry * nr + * - 4-byte offset entry * nr + * - 20-byte SHA1 of the packfile + * - 20-byte SHA1 file checksum + * And after the 4-byte offset table might be a + * variable sized table containing 8-byte entries + * for offsets larger than 2^31. + */ + unsigned long min_size = 8 + 4*256 + nr*(20 + 4 + 4) + 20 + 20; + unsigned long max_size = min_size; + if (nr) + max_size += (nr - 1)*8; + if (idx_size < min_size || idx_size > max_size) { + munmap(idx_map, idx_size); + return error("wrong index file size in %s", path); + } + if (idx_size != min_size) { + /* make sure we can deal with large pack offsets */ + off_t x = 0x7fffffffUL, y = 0xffffffffUL; + if (x > (x + 1) || y > (y + 1)) { + munmap(idx_map, idx_size); + return error("pack too large for current definition of off_t in %s", path); + } + } } - p->index_version = 1; + p->index_version = version; p->index_data = idx_map; p->index_size = idx_size; + p->num_objects = nr; return 0; } +int open_pack_index(struct packed_git *p) +{ + char *idx_name; + int ret; + + if (p->index_data) + return 0; + + idx_name = xstrdup(p->pack_name); + strcpy(idx_name + strlen(idx_name) - strlen(".pack"), ".idx"); + ret = check_packed_git_idx(idx_name, p); + free(idx_name); + return ret; +} + static void scan_windows(struct packed_git *p, struct packed_git **lru_p, struct pack_window **lru_w, @@ -516,7 +572,7 @@ static void scan_windows(struct packed_git *p, } } -static int unuse_one_window(struct packed_git *current) +static int unuse_one_window(struct packed_git *current, int keep_fd) { struct packed_git *p, *lru_p = NULL; struct pack_window *lru_w = NULL, *lru_l = NULL; @@ -532,7 +588,7 @@ static int unuse_one_window(struct packed_git *current) lru_l->next = lru_w->next; else { lru_p->windows = lru_w->next; - if (!lru_p->windows && lru_p != current) { + if (!lru_p->windows && lru_p->pack_fd != keep_fd) { close(lru_p->pack_fd); lru_p->pack_fd = -1; } @@ -544,10 +600,10 @@ static int unuse_one_window(struct packed_git *current) return 0; } -void release_pack_memory(size_t need) +void release_pack_memory(size_t need, int fd) { size_t cur = pack_mapped; - while (need >= (cur - pack_mapped) && unuse_one_window(NULL)) + while (need >= (cur - pack_mapped) && unuse_one_window(NULL, fd)) ; /* nothing */ } @@ -572,6 +628,9 @@ static int open_packed_git_1(struct packed_git *p) unsigned char *idx_sha1; long fd_flag; + if (!p->index_data && open_pack_index(p)) + return error("packfile %s index unavailable", p->pack_name); + p->pack_fd = open(p->pack_name, O_RDONLY); if (p->pack_fd < 0 || fstat(p->pack_fd, &st)) return -1; @@ -605,11 +664,11 @@ static int open_packed_git_1(struct packed_git *p) p->pack_name, ntohl(hdr.hdr_version)); /* Verify the pack matches its index. */ - if (num_packed_objects(p) != ntohl(hdr.hdr_entries)) + if (p->num_objects != ntohl(hdr.hdr_entries)) return error("packfile %s claims to have %u objects" - " while index size indicates %u objects", - p->pack_name, ntohl(hdr.hdr_entries), - num_packed_objects(p)); + " while index indicates %u objects", + p->pack_name, ntohl(hdr.hdr_entries), + p->num_objects); if (lseek(p->pack_fd, p->pack_size - sizeof(sha1), SEEK_SET) == -1) return error("end of packfile %s is unavailable", p->pack_name); if (read_in_full(p->pack_fd, sha1, sizeof(sha1)) != sizeof(sha1)) @@ -680,7 +739,7 @@ unsigned char* use_pack(struct packed_git *p, win->len = (size_t)len; pack_mapped += win->len; while (packed_git_limit < pack_mapped - && unuse_one_window(p)) + && unuse_one_window(p, p->pack_fd)) ; /* nothing */ win->base = xmmap(NULL, win->len, PROT_READ, MAP_PRIVATE, @@ -724,8 +783,7 @@ struct packed_git *add_packed_git(const char *path, int path_len, int local) return NULL; memcpy(p->pack_name, path, path_len); strcpy(p->pack_name + path_len, ".pack"); - if (stat(p->pack_name, &st) || !S_ISREG(st.st_mode) || - check_packed_git_idx(path, p)) { + if (stat(p->pack_name, &st) || !S_ISREG(st.st_mode)) { free(p); return NULL; } @@ -733,6 +791,10 @@ struct packed_git *add_packed_git(const char *path, int path_len, int local) /* ok, it looks sane as far as we can check without * actually mapping the pack file. */ + p->index_version = 0; + p->index_data = NULL; + p->index_size = 0; + p->num_objects = 0; p->pack_size = st.st_size; p->next = NULL; p->windows = NULL; @@ -778,7 +840,10 @@ void install_packed_git(struct packed_git *pack) static void prepare_packed_git_one(char *objdir, int local) { - char path[PATH_MAX]; + /* Ensure that this buffer is large enough so that we can + append "/pack/" without clobbering the stack even if + strlen(objdir) were PATH_MAX. */ + char path[PATH_MAX + 1 + 4 + 1 + 1]; int len; DIR *dir; struct dirent *de; @@ -800,6 +865,9 @@ static void prepare_packed_git_one(char *objdir, int local) if (!has_extension(de->d_name, ".idx")) continue; + if (len + namelen + 1 > sizeof(path)) + continue; + /* Don't reopen a pack we already have. */ strcpy(path + len, de->d_name); for (p = packed_git; p; p = p->next) { @@ -904,7 +972,7 @@ int check_sha1_signature(const unsigned char *sha1, void *map, unsigned long siz return hashcmp(sha1, real_sha1) ? -1 : 0; } -void *map_sha1_file(const unsigned char *sha1, unsigned long *size) +static void *map_sha1_file(const unsigned char *sha1, unsigned long *size) { struct stat st; void *map; @@ -939,7 +1007,7 @@ void *map_sha1_file(const unsigned char *sha1, unsigned long *size) return map; } -int legacy_loose_object(unsigned char *map) +static int legacy_loose_object(unsigned char *map) { unsigned int word; @@ -1001,6 +1069,14 @@ static int unpack_sha1_header(z_stream *stream, unsigned char *map, unsigned lon return inflate(stream, 0); } + + /* + * There used to be a second loose object header format which + * was meant to mimic the in-pack format, allowing for direct + * copy of the object data. This format turned up not to be + * really worth it and we don't write it any longer. But we + * can still read it. + */ used = unpack_object_header_gently(map, mapsize, &type, &size); if (!used || !valid_loose_object_type[type]) return -1; @@ -1076,7 +1152,7 @@ static int parse_sha1_header(const char *hdr, unsigned long *sizep) unsigned long size; /* - * The type can be at most ten bytes (including the + * The type can be at most ten bytes (including the * terminating '\0' that we add), and is followed by * a space. */ @@ -1128,6 +1204,43 @@ static void *unpack_sha1_file(void *map, unsigned long mapsize, enum object_type return unpack_sha1_rest(&stream, hdr, *size, sha1); } +unsigned long get_size_from_delta(struct packed_git *p, + struct pack_window **w_curs, + off_t curpos) +{ + const unsigned char *data; + unsigned char delta_head[20], *in; + z_stream stream; + int st; + + memset(&stream, 0, sizeof(stream)); + stream.next_out = delta_head; + stream.avail_out = sizeof(delta_head); + + inflateInit(&stream); + do { + in = use_pack(p, w_curs, curpos, &stream.avail_in); + stream.next_in = in; + st = inflate(&stream, Z_FINISH); + curpos += stream.next_in - in; + } while ((st == Z_OK || st == Z_BUF_ERROR) && + stream.total_out < sizeof(delta_head)); + inflateEnd(&stream); + if ((st != Z_STREAM_END) && stream.total_out != sizeof(delta_head)) + die("delta data unpack-initial failed"); + + /* Examine the initial part of the delta to figure out + * the result size. + */ + data = delta_head; + + /* ignore base size */ + get_delta_hdr_size(&data, delta_head+sizeof(delta_head)); + + /* Read the result size */ + return get_delta_hdr_size(&data, delta_head+sizeof(delta_head)); +} + static off_t get_delta_base(struct packed_git *p, struct pack_window **w_curs, off_t *curpos, @@ -1149,7 +1262,7 @@ static off_t get_delta_base(struct packed_git *p, base_offset = c & 127; while (c & 128) { base_offset += 1; - if (!base_offset || base_offset & ~(~0UL >> 7)) + if (!base_offset || MSB(base_offset, 7)) die("offset value overflow for delta base object"); c = base_info[used++]; base_offset = (base_offset << 7) + (c & 127); @@ -1191,40 +1304,8 @@ static int packed_delta_info(struct packed_git *p, * based on a base with a wrong size. This saves tons of * inflate() calls. */ - if (sizep) { - const unsigned char *data; - unsigned char delta_head[20], *in; - z_stream stream; - int st; - - memset(&stream, 0, sizeof(stream)); - stream.next_out = delta_head; - stream.avail_out = sizeof(delta_head); - - inflateInit(&stream); - do { - in = use_pack(p, w_curs, curpos, &stream.avail_in); - stream.next_in = in; - st = inflate(&stream, Z_FINISH); - curpos += stream.next_in - in; - } while ((st == Z_OK || st == Z_BUF_ERROR) - && stream.total_out < sizeof(delta_head)); - inflateEnd(&stream); - if ((st != Z_STREAM_END) && - stream.total_out != sizeof(delta_head)) - die("delta data unpack-initial failed"); - - /* Examine the initial part of the delta to figure out - * the result size. - */ - data = delta_head; - - /* ignore base size */ - get_delta_hdr_size(&data, delta_head+sizeof(delta_head)); - - /* Read the result size */ - *sizep = get_delta_hdr_size(&data, delta_head+sizeof(delta_head)); - } + if (sizep) + *sizep = get_size_from_delta(p, w_curs, curpos); return type; } @@ -1526,37 +1607,71 @@ void *unpack_entry(struct packed_git *p, off_t obj_offset, return data; } -uint32_t num_packed_objects(const struct packed_git *p) +const unsigned char *nth_packed_object_sha1(struct packed_git *p, + uint32_t n) { - /* See check_packed_git_idx() */ - return (uint32_t)((p->index_size - 20 - 20 - 4*256) / 24); + const unsigned char *index = p->index_data; + if (!index) { + if (open_pack_index(p)) + return NULL; + index = p->index_data; + } + if (n >= p->num_objects) + return NULL; + index += 4 * 256; + if (p->index_version == 1) { + return index + 24 * n + 4; + } else { + index += 8; + return index + 20 * n; + } } -const unsigned char *nth_packed_object_sha1(const struct packed_git *p, - uint32_t n) +static off_t nth_packed_object_offset(const struct packed_git *p, uint32_t n) { const unsigned char *index = p->index_data; index += 4 * 256; - if (num_packed_objects(p) <= n) - return NULL; - return index + 24 * n + 4; + if (p->index_version == 1) { + return ntohl(*((uint32_t *)(index + 24 * n))); + } else { + uint32_t off; + index += 8 + p->num_objects * (20 + 4); + off = ntohl(*((uint32_t *)(index + 4 * n))); + if (!(off & 0x80000000)) + return off; + index += p->num_objects * 4 + (off & 0x7fffffff) * 8; + return (((uint64_t)ntohl(*((uint32_t *)(index + 0)))) << 32) | + ntohl(*((uint32_t *)(index + 4))); + } } off_t find_pack_entry_one(const unsigned char *sha1, struct packed_git *p) { const uint32_t *level1_ofs = p->index_data; - int hi = ntohl(level1_ofs[*sha1]); - int lo = ((*sha1 == 0x0) ? 0 : ntohl(level1_ofs[*sha1 - 1])); const unsigned char *index = p->index_data; + unsigned hi, lo; + if (!index) { + if (open_pack_index(p)) + return 0; + level1_ofs = p->index_data; + index = p->index_data; + } + if (p->index_version > 1) { + level1_ofs += 2; + index += 8; + } index += 4 * 256; + hi = ntohl(level1_ofs[*sha1]); + lo = ((*sha1 == 0x0) ? 0 : ntohl(level1_ofs[*sha1 - 1])); do { - int mi = (lo + hi) / 2; - int cmp = hashcmp(index + 24 * mi + 4, sha1); + unsigned mi = (lo + hi) / 2; + unsigned x = (p->index_version > 1) ? (mi * 20) : (mi * 24 + 4); + int cmp = hashcmp(index + x, sha1); if (!cmp) - return ntohl(*((uint32_t *)((char *)index + (24 * mi)))); + return nth_packed_object_offset(p, mi); if (cmp > 0) hi = mi; else @@ -1585,20 +1700,25 @@ static int matches_pack_name(struct packed_git *p, const char *ig) static int find_pack_entry(const unsigned char *sha1, struct pack_entry *e, const char **ignore_packed) { + static struct packed_git *last_found = (void *)1; struct packed_git *p; off_t offset; prepare_packed_git(); + if (!packed_git) + return 0; + p = (last_found == (void *)1) ? packed_git : last_found; - for (p = packed_git; p; p = p->next) { + do { if (ignore_packed) { const char **ig; for (ig = ignore_packed; *ig; ig++) if (!matches_pack_name(p, *ig)) break; if (*ig) - continue; + goto next; } + offset = find_pack_entry_one(sha1, p); if (offset) { /* @@ -1611,18 +1731,27 @@ static int find_pack_entry(const unsigned char *sha1, struct pack_entry *e, cons */ if (p->pack_fd == -1 && open_packed_git(p)) { error("packfile %s cannot be accessed", p->pack_name); - continue; + goto next; } e->offset = offset; e->p = p; hashcpy(e->sha1, sha1); + last_found = p; return 1; } - } + + next: + if (p == last_found) + p = packed_git; + else + p = p->next; + if (p == last_found) + p = p->next; + } while (p); return 0; } -struct packed_git *find_sha1_pack(const unsigned char *sha1, +struct packed_git *find_sha1_pack(const unsigned char *sha1, struct packed_git *packs) { struct packed_git *p; @@ -1901,40 +2030,6 @@ static int write_buffer(int fd, const void *buf, size_t len) return 0; } -static int write_binary_header(unsigned char *hdr, enum object_type type, unsigned long len) -{ - int hdr_len; - unsigned char c; - - c = (type << 4) | (len & 15); - len >>= 4; - hdr_len = 1; - while (len) { - *hdr++ = c | 0x80; - hdr_len++; - c = (len & 0x7f); - len >>= 7; - } - *hdr = c; - return hdr_len; -} - -static void setup_object_header(z_stream *stream, const char *type, unsigned long len) -{ - int obj_type, hdrlen; - - if (use_legacy_headers) { - while (deflate(stream, 0) == Z_OK) - /* nothing */; - return; - } - obj_type = type_from_string(type); - hdrlen = write_binary_header(stream->next_out, obj_type, len); - stream->total_out = hdrlen; - stream->next_out += hdrlen; - stream->avail_out -= hdrlen; -} - int hash_sha1_file(const void *buf, unsigned long len, const char *type, unsigned char *sha1) { @@ -2001,7 +2096,8 @@ int write_sha1_file(void *buf, unsigned long len, const char *type, unsigned cha /* First header.. */ stream.next_in = (unsigned char *)hdr; stream.avail_in = hdrlen; - setup_object_header(&stream, type, len); + while (deflate(&stream, 0) == Z_OK) + /* nothing */; /* Then the data itself.. */ stream.next_in = buf; @@ -2208,27 +2304,36 @@ int has_sha1_file(const unsigned char *sha1) * * returns 0 if anything went fine and -1 otherwise * + * The buffer is always NUL-terminated, not including it in returned size. + * * NOTE: both buf and size may change, but even when -1 is returned * you still have to free() it yourself. */ -int read_pipe(int fd, char** return_buf, unsigned long* return_size) +int read_fd(int fd, char **return_buf, unsigned long *return_size) { - char* buf = *return_buf; + char *buf = *return_buf; unsigned long size = *return_size; - int iret; + ssize_t iret; unsigned long off = 0; + if (!buf || size <= 1) { + size = 1024; + buf = xrealloc(buf, size); + } + do { - iret = xread(fd, buf + off, size - off); + iret = xread(fd, buf + off, (size - 1) - off); if (iret > 0) { off += iret; - if (off == size) { - size *= 2; + if (off == size - 1) { + size = alloc_nr(size); buf = xrealloc(buf, size); } } } while (iret > 0); + buf[off] = '\0'; + *return_buf = buf; *return_size = off; @@ -2243,7 +2348,7 @@ int index_pipe(unsigned char *sha1, int fd, const char *type, int write_object) char *buf = xmalloc(size); int ret; - if (read_pipe(fd, &buf, &size)) { + if (read_fd(fd, &buf, &size)) { free(buf); return -1; } @@ -2277,10 +2382,9 @@ int index_fd(unsigned char *sha1, int fd, struct stat *st, int write_object, */ if ((type == OBJ_BLOB) && S_ISREG(st->st_mode)) { unsigned long nsize = size; - char *nbuf = buf; - if (convert_to_git(path, &nbuf, &nsize)) { - if (size) - munmap(buf, size); + char *nbuf = convert_to_git(path, buf, &nsize); + if (nbuf) { + munmap(buf, size); size = nsize; buf = nbuf; re_allocated = 1; @@ -2332,6 +2436,8 @@ int index_path(unsigned char *sha1, const char *path, struct stat *st, int write path); free(target); break; + case S_IFDIR: + return resolve_gitlink_ref(path, "HEAD", sha1); default: return error("%s: unsupported file type", path); }