X-Git-Url: https://git.tokkee.org/?a=blobdiff_plain;f=sha1_file.c;h=940bab561254842fd50c59cf9284a8c878cb3fd8;hb=fed820ad5662d8b6f11b5e7f788fa87afe1ad919;hp=9a1dee051a20891698f44814824ba8aa3b83bc3a;hpb=4e4b55dd0f5b4644767265f7c16a8b370278ce56;p=git.git diff --git a/sha1_file.c b/sha1_file.c index 9a1dee051..940bab561 100644 --- a/sha1_file.c +++ b/sha1_file.c @@ -349,6 +349,7 @@ static void link_alt_odb_entries(const char *alt, const char *ep, int sep, static void read_info_alternates(const char * relative_base, int depth) { char *map; + size_t mapsz; struct stat st; char path[PATH_MAX]; int fd; @@ -361,12 +362,13 @@ static void read_info_alternates(const char * relative_base, int depth) close(fd); return; } - map = xmmap(NULL, st.st_size, PROT_READ, MAP_PRIVATE, fd, 0); + mapsz = xsize_t(st.st_size); + map = xmmap(NULL, mapsz, PROT_READ, MAP_PRIVATE, fd, 0); close(fd); - link_alt_odb_entries(map, map + st.st_size, '\n', relative_base, depth); + link_alt_odb_entries(map, map + mapsz, '\n', relative_base, depth); - munmap(map, st.st_size); + munmap(map, mapsz); } void prepare_alt_odb(void) @@ -430,48 +432,50 @@ void pack_report() pack_mapped, peak_pack_mapped); } -static int check_packed_git_idx(const char *path, unsigned long *idx_size_, - void **idx_map_) +static int check_packed_git_idx(const char *path, struct packed_git *p) { void *idx_map; - uint32_t *index; - unsigned long idx_size; - int nr, i; + struct pack_idx_header *hdr; + size_t idx_size; + uint32_t nr, i, *index; int fd = open(path, O_RDONLY); struct stat st; + if (fd < 0) return -1; if (fstat(fd, &st)) { close(fd); return -1; } - idx_size = st.st_size; + idx_size = xsize_t(st.st_size); + if (idx_size < 4 * 256 + 20 + 20) { + close(fd); + return error("index file %s is too small", path); + } idx_map = xmmap(NULL, idx_size, PROT_READ, MAP_PRIVATE, fd, 0); close(fd); - index = idx_map; - *idx_map_ = idx_map; - *idx_size_ = idx_size; - - /* check index map */ - if (idx_size < 4*256 + 20 + 20) - return error("index file %s is too small", path); - /* a future index format would start with this, as older git * binaries would fail the non-monotonic index check below. * give a nicer warning to the user if we can. */ - if (index[0] == htonl(PACK_IDX_SIGNATURE)) + hdr = idx_map; + if (hdr->idx_signature == htonl(PACK_IDX_SIGNATURE)) { + munmap(idx_map, idx_size); return error("index file %s is a newer version" " and is not supported by this binary" " (try upgrading GIT to a newer version)", path); + } nr = 0; + index = idx_map; for (i = 0; i < 256; i++) { - unsigned int n = ntohl(index[i]); - if (n < nr) + uint32_t n = ntohl(index[i]); + if (n < nr) { + munmap(idx_map, idx_size); return error("non-monotonic index %s", path); + } nr = n; } @@ -482,9 +486,14 @@ static int check_packed_git_idx(const char *path, unsigned long *idx_size_, * - 20-byte SHA1 of the packfile * - 20-byte SHA1 file checksum */ - if (idx_size != 4*256 + nr * 24 + 20 + 20) + if (idx_size != 4*256 + nr * 24 + 20 + 20) { + munmap(idx_map, idx_size); return error("wrong index file size in %s", path); + } + p->index_version = 1; + p->index_data = idx_map; + p->index_size = idx_size; return 0; } @@ -605,7 +614,7 @@ static int open_packed_git_1(struct packed_git *p) return error("end of packfile %s is unavailable", p->pack_name); if (read_in_full(p->pack_fd, sha1, sizeof(sha1)) != sizeof(sha1)) return error("packfile %s signature is unavailable", p->pack_name); - idx_sha1 = ((unsigned char *)p->index_base) + p->index_size - 40; + idx_sha1 = ((unsigned char *)p->index_data) + p->index_size - 40; if (hashcmp(sha1, idx_sha1)) return error("packfile %s does not match index", p->pack_name); return 0; @@ -622,7 +631,7 @@ static int open_packed_git(struct packed_git *p) return -1; } -static int in_window(struct pack_window *win, unsigned long offset) +static int in_window(struct pack_window *win, off_t offset) { /* We must promise at least 20 bytes (one hash) after the * offset is available from this window, otherwise the offset @@ -637,7 +646,7 @@ static int in_window(struct pack_window *win, unsigned long offset) unsigned char* use_pack(struct packed_git *p, struct pack_window **w_cursor, - unsigned long offset, + off_t offset, unsigned int *left) { struct pack_window *win = *w_cursor; @@ -662,11 +671,13 @@ unsigned char* use_pack(struct packed_git *p, } if (!win) { size_t window_align = packed_git_window_size / 2; + off_t len; win = xcalloc(1, sizeof(*win)); win->offset = (offset / window_align) * window_align; - win->len = p->pack_size - win->offset; - if (win->len > packed_git_window_size) - win->len = packed_git_window_size; + len = p->pack_size - win->offset; + if (len > packed_git_window_size) + len = packed_git_window_size; + win->len = (size_t)len; pack_mapped += win->len; while (packed_git_limit < pack_mapped && unuse_one_window(p)) @@ -695,41 +706,41 @@ unsigned char* use_pack(struct packed_git *p, } offset -= win->offset; if (left) - *left = win->len - offset; + *left = win->len - xsize_t(offset); return win->base + offset; } -struct packed_git *add_packed_git(char *path, int path_len, int local) +struct packed_git *add_packed_git(const char *path, int path_len, int local) { struct stat st; - struct packed_git *p; - unsigned long idx_size; - void *idx_map; - unsigned char sha1[20]; + struct packed_git *p = xmalloc(sizeof(*p) + path_len + 2); - if (check_packed_git_idx(path, &idx_size, &idx_map)) + /* + * Make sure a corresponding .pack file exists and that + * the index looks sane. + */ + path_len -= strlen(".idx"); + if (path_len < 1) return NULL; - - /* do we have a corresponding .pack file? */ - strcpy(path + path_len - 4, ".pack"); - if (stat(path, &st) || !S_ISREG(st.st_mode)) { - munmap(idx_map, idx_size); + memcpy(p->pack_name, path, path_len); + strcpy(p->pack_name + path_len, ".pack"); + if (stat(p->pack_name, &st) || !S_ISREG(st.st_mode) || + check_packed_git_idx(path, p)) { + free(p); return NULL; } + /* ok, it looks sane as far as we can check without * actually mapping the pack file. */ - p = xmalloc(sizeof(*p) + path_len + 2); - strcpy(p->pack_name, path); - p->index_size = idx_size; p->pack_size = st.st_size; - p->index_base = idx_map; p->next = NULL; p->windows = NULL; p->pack_fd = -1; p->pack_local = local; - if ((path_len > 44) && !get_sha1_hex(path + path_len - 44, sha1)) - hashcpy(p->sha1, sha1); + p->mtime = st.st_mtime; + if (path_len < 40 || get_sha1_hex(path + path_len - 40, p->sha1)) + hashclr(p->sha1); return p; } @@ -739,23 +750,19 @@ struct packed_git *parse_pack_index(unsigned char *sha1) return parse_pack_index_file(sha1, path); } -struct packed_git *parse_pack_index_file(const unsigned char *sha1, char *idx_path) +struct packed_git *parse_pack_index_file(const unsigned char *sha1, + const char *idx_path) { - struct packed_git *p; - unsigned long idx_size; - void *idx_map; - char *path; + const char *path = sha1_pack_name(sha1); + struct packed_git *p = xmalloc(sizeof(*p) + strlen(path) + 2); - if (check_packed_git_idx(idx_path, &idx_size, &idx_map)) + if (check_packed_git_idx(idx_path, p)) { + free(p); return NULL; + } - path = sha1_pack_name(sha1); - - p = xmalloc(sizeof(*p) + strlen(path) + 2); strcpy(p->pack_name, path); - p->index_size = idx_size; p->pack_size = 0; - p->index_base = idx_map; p->next = NULL; p->windows = NULL; p->pack_fd = -1; @@ -812,6 +819,60 @@ static void prepare_packed_git_one(char *objdir, int local) closedir(dir); } +static int sort_pack(const void *a_, const void *b_) +{ + struct packed_git *a = *((struct packed_git **)a_); + struct packed_git *b = *((struct packed_git **)b_); + int st; + + /* + * Local packs tend to contain objects specific to our + * variant of the project than remote ones. In addition, + * remote ones could be on a network mounted filesystem. + * Favor local ones for these reasons. + */ + st = a->pack_local - b->pack_local; + if (st) + return -st; + + /* + * Younger packs tend to contain more recent objects, + * and more recent objects tend to get accessed more + * often. + */ + if (a->mtime < b->mtime) + return 1; + else if (a->mtime == b->mtime) + return 0; + return -1; +} + +static void rearrange_packed_git(void) +{ + struct packed_git **ary, *p; + int i, n; + + for (n = 0, p = packed_git; p; p = p->next) + n++; + if (n < 2) + return; + + /* prepare an array of packed_git for easier sorting */ + ary = xcalloc(n, sizeof(struct packed_git *)); + for (n = 0, p = packed_git; p; p = p->next) + ary[n++] = p; + + qsort(ary, n, sizeof(struct packed_git *), sort_pack); + + /* link them back again */ + for (i = 0; i < n - 1; i++) + ary[i]->next = ary[i + 1]; + ary[n - 1]->next = NULL; + packed_git = ary[0]; + + free(ary); +} + static int prepare_packed_git_run_once = 0; void prepare_packed_git(void) { @@ -826,6 +887,7 @@ void prepare_packed_git(void) prepare_packed_git_one(alt->base, 0); alt->name[-1] = '/'; } + rearrange_packed_git(); prepare_packed_git_run_once = 1; } @@ -871,9 +933,9 @@ void *map_sha1_file(const unsigned char *sha1, unsigned long *size) */ sha1_file_open_flag = 0; } - map = xmmap(NULL, st.st_size, PROT_READ, MAP_PRIVATE, fd, 0); + *size = xsize_t(st.st_size); + map = xmmap(NULL, *size, PROT_READ, MAP_PRIVATE, fd, 0); close(fd); - *size = st.st_size; return map; } @@ -952,30 +1014,54 @@ static int unpack_sha1_header(z_stream *stream, unsigned char *map, unsigned lon /* And generate the fake traditional header */ stream->total_out = 1 + snprintf(buffer, bufsiz, "%s %lu", - type_names[type], size); + typename(type), size); return 0; } -static void *unpack_sha1_rest(z_stream *stream, void *buffer, unsigned long size) +static void *unpack_sha1_rest(z_stream *stream, void *buffer, unsigned long size, const unsigned char *sha1) { int bytes = strlen(buffer) + 1; unsigned char *buf = xmalloc(1+size); unsigned long n; + int status = Z_OK; n = stream->total_out - bytes; if (n > size) n = size; memcpy(buf, (char *) buffer + bytes, n); bytes = n; - if (bytes < size) { + if (bytes <= size) { + /* + * The above condition must be (bytes <= size), not + * (bytes < size). In other words, even though we + * expect no more output and set avail_out to zer0, + * the input zlib stream may have bytes that express + * "this concludes the stream", and we *do* want to + * eat that input. + * + * Otherwise we would not be able to test that we + * consumed all the input to reach the expected size; + * we also want to check that zlib tells us that all + * went well with status == Z_STREAM_END at the end. + */ stream->next_out = buf + bytes; stream->avail_out = size - bytes; - while (inflate(stream, Z_FINISH) == Z_OK) - /* nothing */; + while (status == Z_OK) + status = inflate(stream, Z_FINISH); } buf[size] = 0; - inflateEnd(stream); - return buf; + if (status == Z_STREAM_END && !stream->avail_in) { + inflateEnd(stream); + return buf; + } + + if (status < 0) + error("corrupt loose object '%s'", sha1_to_hex(sha1)); + else if (stream->avail_in) + error("garbage at end of loose object '%s'", + sha1_to_hex(sha1)); + free(buf); + return NULL; } /* @@ -983,26 +1069,27 @@ static void *unpack_sha1_rest(z_stream *stream, void *buffer, unsigned long size * too permissive for what we want to check. So do an anal * object header parse by hand. */ -static int parse_sha1_header(char *hdr, char *type, unsigned long *sizep) +static int parse_sha1_header(const char *hdr, unsigned long *sizep) { + char type[10]; int i; unsigned long size; /* * The type can be at most ten bytes (including the * terminating '\0' that we add), and is followed by - * a space. + * a space. */ - i = 10; + i = 0; for (;;) { char c = *hdr++; if (c == ' ') break; - if (!--i) + type[i++] = c; + if (i >= sizeof(type)) return -1; - *type++ = c; } - *type = 0; + type[i] = 0; /* * The length must follow immediately, and be in canonical @@ -1025,31 +1112,30 @@ static int parse_sha1_header(char *hdr, char *type, unsigned long *sizep) /* * The length must be followed by a zero byte */ - return *hdr ? -1 : 0; + return *hdr ? -1 : type_from_string(type); } -void * unpack_sha1_file(void *map, unsigned long mapsize, char *type, unsigned long *size) +static void *unpack_sha1_file(void *map, unsigned long mapsize, enum object_type *type, unsigned long *size, const unsigned char *sha1) { int ret; z_stream stream; char hdr[8192]; ret = unpack_sha1_header(&stream, map, mapsize, hdr, sizeof(hdr)); - if (ret < Z_OK || parse_sha1_header(hdr, type, size) < 0) + if (ret < Z_OK || (*type = parse_sha1_header(hdr, size)) < 0) return NULL; - return unpack_sha1_rest(&stream, hdr, *size); + return unpack_sha1_rest(&stream, hdr, *size, sha1); } -static unsigned long get_delta_base(struct packed_git *p, +static off_t get_delta_base(struct packed_git *p, struct pack_window **w_curs, - unsigned long offset, - enum object_type kind, - unsigned long delta_obj_offset, - unsigned long *base_obj_offset) + off_t *curpos, + enum object_type type, + off_t delta_obj_offset) { - unsigned char *base_info = use_pack(p, w_curs, offset, NULL); - unsigned long base_offset; + unsigned char *base_info = use_pack(p, w_curs, *curpos, NULL); + off_t base_offset; /* use_pack() assured us we have [base_info, base_info + 20) * as a range that we can look at without walking off the @@ -1057,7 +1143,7 @@ static unsigned long get_delta_base(struct packed_git *p, * that is assured. An OFS_DELTA longer than the hash size * is stupid, as then a REF_DELTA would be smaller to store. */ - if (kind == OBJ_OFS_DELTA) { + if (type == OBJ_OFS_DELTA) { unsigned used = 0; unsigned char c = base_info[used++]; base_offset = c & 127; @@ -1071,49 +1157,43 @@ static unsigned long get_delta_base(struct packed_git *p, base_offset = delta_obj_offset - base_offset; if (base_offset >= delta_obj_offset) die("delta base offset out of bound"); - offset += used; - } else if (kind == OBJ_REF_DELTA) { + *curpos += used; + } else if (type == OBJ_REF_DELTA) { /* The base entry _must_ be in the same pack */ base_offset = find_pack_entry_one(base_info, p); if (!base_offset) die("failed to find delta-pack base object %s", sha1_to_hex(base_info)); - offset += 20; + *curpos += 20; } else die("I am totally screwed"); - *base_obj_offset = base_offset; - return offset; + return base_offset; } /* forward declaration for a mutually recursive function */ -static int packed_object_info(struct packed_git *p, unsigned long offset, - char *type, unsigned long *sizep); +static int packed_object_info(struct packed_git *p, off_t offset, + unsigned long *sizep); static int packed_delta_info(struct packed_git *p, struct pack_window **w_curs, - unsigned long offset, - enum object_type kind, - unsigned long obj_offset, - char *type, + off_t curpos, + enum object_type type, + off_t obj_offset, unsigned long *sizep) { - unsigned long base_offset; + off_t base_offset; - offset = get_delta_base(p, w_curs, offset, kind, - obj_offset, &base_offset); + base_offset = get_delta_base(p, w_curs, &curpos, type, obj_offset); + type = packed_object_info(p, base_offset, NULL); /* We choose to only get the type of the base object and * ignore potentially corrupt pack file that expects the delta * based on a base with a wrong size. This saves tons of * inflate() calls. */ - if (packed_object_info(p, base_offset, type, NULL)) - die("cannot get info for delta-pack base"); - if (sizep) { const unsigned char *data; unsigned char delta_head[20], *in; - unsigned long result_size; z_stream stream; int st; @@ -1123,10 +1203,10 @@ static int packed_delta_info(struct packed_git *p, inflateInit(&stream); do { - in = use_pack(p, w_curs, offset, &stream.avail_in); + in = use_pack(p, w_curs, curpos, &stream.avail_in); stream.next_in = in; st = inflate(&stream, Z_FINISH); - offset += stream.next_in - in; + curpos += stream.next_in - in; } while ((st == Z_OK || st == Z_BUF_ERROR) && stream.total_out < sizeof(delta_head)); inflateEnd(&stream); @@ -1143,21 +1223,21 @@ static int packed_delta_info(struct packed_git *p, get_delta_hdr_size(&data, delta_head+sizeof(delta_head)); /* Read the result size */ - result_size = get_delta_hdr_size(&data, delta_head+sizeof(delta_head)); - *sizep = result_size; + *sizep = get_delta_hdr_size(&data, delta_head+sizeof(delta_head)); } - return 0; + + return type; } -static unsigned long unpack_object_header(struct packed_git *p, - struct pack_window **w_curs, - unsigned long offset, - enum object_type *type, - unsigned long *sizep) +static int unpack_object_header(struct packed_git *p, + struct pack_window **w_curs, + off_t *curpos, + unsigned long *sizep) { unsigned char *base; unsigned int left; unsigned long used; + enum object_type type; /* use_pack() assures us we have [base, base + 20) available * as a range that we can look at at. (Its actually the hash @@ -1165,100 +1245,98 @@ static unsigned long unpack_object_header(struct packed_git *p, * the maximum deflated object size is 2^137, which is just * insane, so we know won't exceed what we have been given. */ - base = use_pack(p, w_curs, offset, &left); - used = unpack_object_header_gently(base, left, type, sizep); + base = use_pack(p, w_curs, *curpos, &left); + used = unpack_object_header_gently(base, left, &type, sizep); if (!used) die("object offset outside of pack file"); + *curpos += used; - return offset + used; + return type; } -void packed_object_info_detail(struct packed_git *p, - unsigned long offset, - char *type, - unsigned long *size, - unsigned long *store_size, - unsigned int *delta_chain_length, - unsigned char *base_sha1) +const char *packed_object_info_detail(struct packed_git *p, + off_t obj_offset, + unsigned long *size, + unsigned long *store_size, + unsigned int *delta_chain_length, + unsigned char *base_sha1) { struct pack_window *w_curs = NULL; - unsigned long obj_offset, val; + off_t curpos; + unsigned long dummy; unsigned char *next_sha1; - enum object_type kind; + enum object_type type; *delta_chain_length = 0; - obj_offset = offset; - offset = unpack_object_header(p, &w_curs, offset, &kind, size); + curpos = obj_offset; + type = unpack_object_header(p, &w_curs, &curpos, size); for (;;) { - switch (kind) { + switch (type) { default: die("pack %s contains unknown object type %d", - p->pack_name, kind); + p->pack_name, type); case OBJ_COMMIT: case OBJ_TREE: case OBJ_BLOB: case OBJ_TAG: - strcpy(type, type_names[kind]); *store_size = 0; /* notyet */ unuse_pack(&w_curs); - return; + return typename(type); case OBJ_OFS_DELTA: - get_delta_base(p, &w_curs, offset, kind, - obj_offset, &offset); + obj_offset = get_delta_base(p, &w_curs, &curpos, type, obj_offset); if (*delta_chain_length == 0) { - /* TODO: find base_sha1 as pointed by offset */ + /* TODO: find base_sha1 as pointed by curpos */ + hashclr(base_sha1); } break; case OBJ_REF_DELTA: - next_sha1 = use_pack(p, &w_curs, offset, NULL); + next_sha1 = use_pack(p, &w_curs, curpos, NULL); if (*delta_chain_length == 0) hashcpy(base_sha1, next_sha1); - offset = find_pack_entry_one(next_sha1, p); + obj_offset = find_pack_entry_one(next_sha1, p); break; } - obj_offset = offset; - offset = unpack_object_header(p, &w_curs, offset, &kind, &val); (*delta_chain_length)++; + curpos = obj_offset; + type = unpack_object_header(p, &w_curs, &curpos, &dummy); } } -static int packed_object_info(struct packed_git *p, unsigned long offset, - char *type, unsigned long *sizep) +static int packed_object_info(struct packed_git *p, off_t obj_offset, + unsigned long *sizep) { struct pack_window *w_curs = NULL; - unsigned long size, obj_offset = offset; - enum object_type kind; - int r; + unsigned long size; + off_t curpos = obj_offset; + enum object_type type; - offset = unpack_object_header(p, &w_curs, offset, &kind, &size); + type = unpack_object_header(p, &w_curs, &curpos, &size); - switch (kind) { + switch (type) { case OBJ_OFS_DELTA: case OBJ_REF_DELTA: - r = packed_delta_info(p, &w_curs, offset, kind, - obj_offset, type, sizep); - unuse_pack(&w_curs); - return r; + type = packed_delta_info(p, &w_curs, curpos, + type, obj_offset, sizep); + break; case OBJ_COMMIT: case OBJ_TREE: case OBJ_BLOB: case OBJ_TAG: - strcpy(type, type_names[kind]); - unuse_pack(&w_curs); + if (sizep) + *sizep = size; break; default: die("pack %s contains unknown object type %d", - p->pack_name, kind); + p->pack_name, type); } - if (sizep) - *sizep = size; - return 0; + unuse_pack(&w_curs); + return type; } static void *unpack_compressed_entry(struct packed_git *p, struct pack_window **w_curs, - unsigned long offset, + off_t curpos, unsigned long size) { int st; @@ -1273,10 +1351,10 @@ static void *unpack_compressed_entry(struct packed_git *p, inflateInit(&stream); do { - in = use_pack(p, w_curs, offset, &stream.avail_in); + in = use_pack(p, w_curs, curpos, &stream.avail_in); stream.next_in = in; st = inflate(&stream, Z_FINISH); - offset += stream.next_in - in; + curpos += stream.next_in - in; } while (st == Z_OK || st == Z_BUF_ERROR); inflateEnd(&stream); if ((st != Z_STREAM_END) || stream.total_out != size) { @@ -1287,94 +1365,197 @@ static void *unpack_compressed_entry(struct packed_git *p, return buffer; } +#define MAX_DELTA_CACHE (256) + +static size_t delta_base_cached; + +static struct delta_base_cache_lru_list { + struct delta_base_cache_lru_list *prev; + struct delta_base_cache_lru_list *next; +} delta_base_cache_lru = { &delta_base_cache_lru, &delta_base_cache_lru }; + +static struct delta_base_cache_entry { + struct delta_base_cache_lru_list lru; + void *data; + struct packed_git *p; + off_t base_offset; + unsigned long size; + enum object_type type; +} delta_base_cache[MAX_DELTA_CACHE]; + +static unsigned long pack_entry_hash(struct packed_git *p, off_t base_offset) +{ + unsigned long hash; + + hash = (unsigned long)p + (unsigned long)base_offset; + hash += (hash >> 8) + (hash >> 16); + return hash % MAX_DELTA_CACHE; +} + +static void *cache_or_unpack_entry(struct packed_git *p, off_t base_offset, + unsigned long *base_size, enum object_type *type, int keep_cache) +{ + void *ret; + unsigned long hash = pack_entry_hash(p, base_offset); + struct delta_base_cache_entry *ent = delta_base_cache + hash; + + ret = ent->data; + if (ret && ent->p == p && ent->base_offset == base_offset) + goto found_cache_entry; + return unpack_entry(p, base_offset, type, base_size); + +found_cache_entry: + if (!keep_cache) { + ent->data = NULL; + ent->lru.next->prev = ent->lru.prev; + ent->lru.prev->next = ent->lru.next; + delta_base_cached -= ent->size; + } + else { + ret = xmalloc(ent->size + 1); + memcpy(ret, ent->data, ent->size); + ((char *)ret)[ent->size] = 0; + } + *type = ent->type; + *base_size = ent->size; + return ret; +} + +static inline void release_delta_base_cache(struct delta_base_cache_entry *ent) +{ + if (ent->data) { + free(ent->data); + ent->data = NULL; + ent->lru.next->prev = ent->lru.prev; + ent->lru.prev->next = ent->lru.next; + delta_base_cached -= ent->size; + } +} + +static void add_delta_base_cache(struct packed_git *p, off_t base_offset, + void *base, unsigned long base_size, enum object_type type) +{ + unsigned long hash = pack_entry_hash(p, base_offset); + struct delta_base_cache_entry *ent = delta_base_cache + hash; + struct delta_base_cache_lru_list *lru; + + release_delta_base_cache(ent); + delta_base_cached += base_size; + + for (lru = delta_base_cache_lru.next; + delta_base_cached > delta_base_cache_limit + && lru != &delta_base_cache_lru; + lru = lru->next) { + struct delta_base_cache_entry *f = (void *)lru; + if (f->type == OBJ_BLOB) + release_delta_base_cache(f); + } + for (lru = delta_base_cache_lru.next; + delta_base_cached > delta_base_cache_limit + && lru != &delta_base_cache_lru; + lru = lru->next) { + struct delta_base_cache_entry *f = (void *)lru; + release_delta_base_cache(f); + } + + ent->p = p; + ent->base_offset = base_offset; + ent->type = type; + ent->data = base; + ent->size = base_size; + ent->lru.next = &delta_base_cache_lru; + ent->lru.prev = delta_base_cache_lru.prev; + delta_base_cache_lru.prev->next = &ent->lru; + delta_base_cache_lru.prev = &ent->lru; +} + static void *unpack_delta_entry(struct packed_git *p, struct pack_window **w_curs, - unsigned long offset, + off_t curpos, unsigned long delta_size, - enum object_type kind, - unsigned long obj_offset, - char *type, + off_t obj_offset, + enum object_type *type, unsigned long *sizep) { void *delta_data, *result, *base; - unsigned long result_size, base_size, base_offset; + unsigned long base_size; + off_t base_offset; - offset = get_delta_base(p, w_curs, offset, kind, - obj_offset, &base_offset); - base = unpack_entry(p, base_offset, type, &base_size); + base_offset = get_delta_base(p, w_curs, &curpos, *type, obj_offset); + base = cache_or_unpack_entry(p, base_offset, &base_size, type, 0); if (!base) - die("failed to read delta base object at %lu from %s", - base_offset, p->pack_name); + die("failed to read delta base object" + " at %"PRIuMAX" from %s", + (uintmax_t)base_offset, p->pack_name); - delta_data = unpack_compressed_entry(p, w_curs, offset, delta_size); + delta_data = unpack_compressed_entry(p, w_curs, curpos, delta_size); result = patch_delta(base, base_size, delta_data, delta_size, - &result_size); + sizep); if (!result) die("failed to apply delta"); free(delta_data); - free(base); - *sizep = result_size; + add_delta_base_cache(p, base_offset, base, base_size, *type); return result; } -void *unpack_entry(struct packed_git *p, unsigned long offset, - char *type, unsigned long *sizep) +void *unpack_entry(struct packed_git *p, off_t obj_offset, + enum object_type *type, unsigned long *sizep) { struct pack_window *w_curs = NULL; - unsigned long size, obj_offset = offset; - enum object_type kind; - void *retval; + off_t curpos = obj_offset; + void *data; - offset = unpack_object_header(p, &w_curs, offset, &kind, &size); - switch (kind) { + *type = unpack_object_header(p, &w_curs, &curpos, sizep); + switch (*type) { case OBJ_OFS_DELTA: case OBJ_REF_DELTA: - retval = unpack_delta_entry(p, &w_curs, offset, size, - kind, obj_offset, type, sizep); + data = unpack_delta_entry(p, &w_curs, curpos, *sizep, + obj_offset, type, sizep); break; case OBJ_COMMIT: case OBJ_TREE: case OBJ_BLOB: case OBJ_TAG: - strcpy(type, type_names[kind]); - *sizep = size; - retval = unpack_compressed_entry(p, &w_curs, offset, size); + data = unpack_compressed_entry(p, &w_curs, curpos, *sizep); break; default: - die("unknown object type %i in %s", kind, p->pack_name); + die("unknown object type %i in %s", *type, p->pack_name); } unuse_pack(&w_curs); - return retval; + return data; } -int num_packed_objects(const struct packed_git *p) +uint32_t num_packed_objects(const struct packed_git *p) { /* See check_packed_git_idx() */ - return (p->index_size - 20 - 20 - 4*256) / 24; + return (uint32_t)((p->index_size - 20 - 20 - 4*256) / 24); } -int nth_packed_object_sha1(const struct packed_git *p, int n, +int nth_packed_object_sha1(const struct packed_git *p, uint32_t n, unsigned char* sha1) { - void *index = p->index_base + 256; - if (n < 0 || num_packed_objects(p) <= n) + const unsigned char *index = p->index_data; + index += 4 * 256; + if (num_packed_objects(p) <= n) return -1; - hashcpy(sha1, (unsigned char *) index + (24 * n) + 4); + hashcpy(sha1, index + 24 * n + 4); return 0; } -unsigned long find_pack_entry_one(const unsigned char *sha1, +off_t find_pack_entry_one(const unsigned char *sha1, struct packed_git *p) { - uint32_t *level1_ofs = p->index_base; + const uint32_t *level1_ofs = p->index_data; int hi = ntohl(level1_ofs[*sha1]); int lo = ((*sha1 == 0x0) ? 0 : ntohl(level1_ofs[*sha1 - 1])); - void *index = p->index_base + 256; + const unsigned char *index = p->index_data; + + index += 4 * 256; do { int mi = (lo + hi) / 2; - int cmp = hashcmp((unsigned char *)index + (24 * mi) + 4, sha1); + int cmp = hashcmp(index + 24 * mi + 4, sha1); if (!cmp) return ntohl(*((uint32_t *)((char *)index + (24 * mi)))); if (cmp > 0) @@ -1406,7 +1587,7 @@ static int matches_pack_name(struct packed_git *p, const char *ig) static int find_pack_entry(const unsigned char *sha1, struct pack_entry *e, const char **ignore_packed) { struct packed_git *p; - unsigned long offset; + off_t offset; prepare_packed_git(); @@ -1452,16 +1633,16 @@ struct packed_git *find_sha1_pack(const unsigned char *sha1, return p; } return NULL; - + } -static int sha1_loose_object_info(const unsigned char *sha1, char *type, unsigned long *sizep) +static int sha1_loose_object_info(const unsigned char *sha1, unsigned long *sizep) { int status; unsigned long mapsize, size; void *map; z_stream stream; - char hdr[128]; + char hdr[32]; map = map_sha1_file(sha1, &mapsize); if (!map) @@ -1469,38 +1650,36 @@ static int sha1_loose_object_info(const unsigned char *sha1, char *type, unsigne if (unpack_sha1_header(&stream, map, mapsize, hdr, sizeof(hdr)) < 0) status = error("unable to unpack %s header", sha1_to_hex(sha1)); - if (parse_sha1_header(hdr, type, &size) < 0) + else if ((status = parse_sha1_header(hdr, &size)) < 0) status = error("unable to parse %s header", sha1_to_hex(sha1)); - else { - status = 0; - if (sizep) - *sizep = size; - } + else if (sizep) + *sizep = size; inflateEnd(&stream); munmap(map, mapsize); return status; } -int sha1_object_info(const unsigned char *sha1, char *type, unsigned long *sizep) +int sha1_object_info(const unsigned char *sha1, unsigned long *sizep) { struct pack_entry e; if (!find_pack_entry(sha1, &e, NULL)) { reprepare_packed_git(); if (!find_pack_entry(sha1, &e, NULL)) - return sha1_loose_object_info(sha1, type, sizep); + return sha1_loose_object_info(sha1, sizep); } - return packed_object_info(e.p, e.offset, type, sizep); + return packed_object_info(e.p, e.offset, sizep); } -static void *read_packed_sha1(const unsigned char *sha1, char *type, unsigned long *size) +static void *read_packed_sha1(const unsigned char *sha1, + enum object_type *type, unsigned long *size) { struct pack_entry e; if (!find_pack_entry(sha1, &e, NULL)) return NULL; else - return unpack_entry(e.p, e.offset, type, size); + return cache_or_unpack_entry(e.p, e.offset, size, type, 1); } /* @@ -1511,7 +1690,7 @@ static void *read_packed_sha1(const unsigned char *sha1, char *type, unsigned lo */ static struct cached_object { unsigned char sha1[20]; - const char *type; + enum object_type type; void *buf; unsigned long size; } *cached_objects; @@ -1529,11 +1708,12 @@ static struct cached_object *find_cached_object(const unsigned char *sha1) return NULL; } -int pretend_sha1_file(void *buf, unsigned long len, const char *type, unsigned char *sha1) +int pretend_sha1_file(void *buf, unsigned long len, enum object_type type, + unsigned char *sha1) { struct cached_object *co; - hash_sha1_file(buf, len, type, sha1); + hash_sha1_file(buf, len, typename(type), sha1); if (has_sha1_file(sha1) || find_cached_object(sha1)) return 0; if (cached_object_alloc <= cached_object_nr) { @@ -1544,14 +1724,15 @@ int pretend_sha1_file(void *buf, unsigned long len, const char *type, unsigned c } co = &cached_objects[cached_object_nr++]; co->size = len; - co->type = strdup(type); + co->type = type; co->buf = xmalloc(len); memcpy(co->buf, buf, len); hashcpy(co->sha1, sha1); return 0; } -void *read_sha1_file(const unsigned char *sha1, char *type, unsigned long *size) +void *read_sha1_file(const unsigned char *sha1, enum object_type *type, + unsigned long *size) { unsigned long mapsize; void *map, *buf; @@ -1562,7 +1743,7 @@ void *read_sha1_file(const unsigned char *sha1, char *type, unsigned long *size) buf = xmalloc(co->size + 1); memcpy(buf, co->buf, co->size); ((char*)buf)[co->size] = 0; - strcpy(type, co->type); + *type = co->type; *size = co->size; return buf; } @@ -1572,7 +1753,7 @@ void *read_sha1_file(const unsigned char *sha1, char *type, unsigned long *size) return buf; map = map_sha1_file(sha1, &mapsize); if (map) { - buf = unpack_sha1_file(map, mapsize, type, size); + buf = unpack_sha1_file(map, mapsize, type, size, sha1); munmap(map, mapsize); return buf; } @@ -1581,33 +1762,34 @@ void *read_sha1_file(const unsigned char *sha1, char *type, unsigned long *size) } void *read_object_with_reference(const unsigned char *sha1, - const char *required_type, + const char *required_type_name, unsigned long *size, unsigned char *actual_sha1_return) { - char type[20]; + enum object_type type, required_type; void *buffer; unsigned long isize; unsigned char actual_sha1[20]; + required_type = type_from_string(required_type_name); hashcpy(actual_sha1, sha1); while (1) { int ref_length = -1; const char *ref_type = NULL; - buffer = read_sha1_file(actual_sha1, type, &isize); + buffer = read_sha1_file(actual_sha1, &type, &isize); if (!buffer) return NULL; - if (!strcmp(type, required_type)) { + if (type == required_type) { *size = isize; if (actual_sha1_return) hashcpy(actual_sha1_return, actual_sha1); return buffer; } /* Handle references */ - else if (!strcmp(type, commit_type)) + else if (type == OBJ_COMMIT) ref_type = "tree "; - else if (!strcmp(type, tag_type)) + else if (type == OBJ_TAG) ref_type = "object "; else { free(buffer); @@ -1626,14 +1808,14 @@ void *read_object_with_reference(const unsigned char *sha1, } } -static void write_sha1_file_prepare(void *buf, unsigned long len, +static void write_sha1_file_prepare(const void *buf, unsigned long len, const char *type, unsigned char *sha1, - unsigned char *hdr, int *hdrlen) + char *hdr, int *hdrlen) { SHA_CTX c; /* Generate the header */ - *hdrlen = sprintf((char *)hdr, "%s %lu", type, len)+1; + *hdrlen = sprintf(hdr, "%s %lu", type, len)+1; /* Sha1.. */ SHA1_Init(&c); @@ -1740,33 +1922,24 @@ static int write_binary_header(unsigned char *hdr, enum object_type type, unsign static void setup_object_header(z_stream *stream, const char *type, unsigned long len) { - int obj_type, hdr; + int obj_type, hdrlen; if (use_legacy_headers) { while (deflate(stream, 0) == Z_OK) /* nothing */; return; } - if (!strcmp(type, blob_type)) - obj_type = OBJ_BLOB; - else if (!strcmp(type, tree_type)) - obj_type = OBJ_TREE; - else if (!strcmp(type, commit_type)) - obj_type = OBJ_COMMIT; - else if (!strcmp(type, tag_type)) - obj_type = OBJ_TAG; - else - die("trying to generate bogus object of type '%s'", type); - hdr = write_binary_header(stream->next_out, obj_type, len); - stream->total_out = hdr; - stream->next_out += hdr; - stream->avail_out -= hdr; + obj_type = type_from_string(type); + hdrlen = write_binary_header(stream->next_out, obj_type, len); + stream->total_out = hdrlen; + stream->next_out += hdrlen; + stream->avail_out -= hdrlen; } -int hash_sha1_file(void *buf, unsigned long len, const char *type, +int hash_sha1_file(const void *buf, unsigned long len, const char *type, unsigned char *sha1) { - unsigned char hdr[50]; + char hdr[32]; int hdrlen; write_sha1_file_prepare(buf, len, type, sha1, hdr, &hdrlen); return 0; @@ -1774,13 +1947,13 @@ int hash_sha1_file(void *buf, unsigned long len, const char *type, int write_sha1_file(void *buf, unsigned long len, const char *type, unsigned char *returnsha1) { - int size; + int size, ret; unsigned char *compressed; z_stream stream; unsigned char sha1[20]; char *filename; static char tmpfile[PATH_MAX]; - unsigned char hdr[50]; + char hdr[32]; int fd, hdrlen; /* Normally if we have it in the pack then we do not bother writing @@ -1827,16 +2000,21 @@ int write_sha1_file(void *buf, unsigned long len, const char *type, unsigned cha stream.avail_out = size; /* First header.. */ - stream.next_in = hdr; + stream.next_in = (unsigned char *)hdr; stream.avail_in = hdrlen; setup_object_header(&stream, type, len); /* Then the data itself.. */ stream.next_in = buf; stream.avail_in = len; - while (deflate(&stream, Z_FINISH) == Z_OK) - /* nothing */; - deflateEnd(&stream); + ret = deflate(&stream, Z_FINISH); + if (ret != Z_STREAM_END) + die("unable to deflate new object %s (%d)", sha1_to_hex(sha1), ret); + + ret = deflateEnd(&stream); + if (ret != Z_OK) + die("deflateEnd on object %s failed (%d)", sha1_to_hex(sha1), ret); + size = stream.total_out; if (write_buffer(fd, compressed, size) < 0) @@ -1858,17 +2036,17 @@ static void *repack_object(const unsigned char *sha1, unsigned long *objsize) z_stream stream; unsigned char *unpacked; unsigned long len; - char type[20]; - char hdr[50]; + enum object_type type; + char hdr[32]; int hdrlen; void *buf; /* need to unpack and recompress it by itself */ - unpacked = read_packed_sha1(sha1, type, &len); + unpacked = read_packed_sha1(sha1, &type, &len); if (!unpacked) error("cannot read sha1_file for %s", sha1_to_hex(sha1)); - hdrlen = sprintf(hdr, "%s %lu", type, len) + 1; + hdrlen = sprintf(hdr, "%s %lu", typename(type), len) + 1; /* Set it up */ memset(&stream, 0, sizeof(stream)); @@ -2078,27 +2256,27 @@ int index_pipe(unsigned char *sha1, int fd, const char *type, int write_object) return ret; } -int index_fd(unsigned char *sha1, int fd, struct stat *st, int write_object, const char *type) +int index_fd(unsigned char *sha1, int fd, struct stat *st, int write_object, + enum object_type type, const char *path) { - unsigned long size = st->st_size; - void *buf; + size_t size = xsize_t(st->st_size); + void *buf = NULL; int ret, re_allocated = 0; - buf = ""; if (size) buf = xmmap(NULL, size, PROT_READ, MAP_PRIVATE, fd, 0); close(fd); if (!type) - type = blob_type; + type = OBJ_BLOB; /* * Convert blobs to git internal format */ - if (!strcmp(type, blob_type)) { + if ((type == OBJ_BLOB) && S_ISREG(st->st_mode)) { unsigned long nsize = size; char *nbuf = buf; - if (convert_to_git(NULL, &nbuf, &nsize)) { + if (convert_to_git(path, &nbuf, &nsize)) { if (size) munmap(buf, size); size = nsize; @@ -2108,9 +2286,9 @@ int index_fd(unsigned char *sha1, int fd, struct stat *st, int write_object, con } if (write_object) - ret = write_sha1_file(buf, size, type, sha1); + ret = write_sha1_file(buf, size, typename(type), sha1); else - ret = hash_sha1_file(buf, size, type, sha1); + ret = hash_sha1_file(buf, size, typename(type), sha1); if (re_allocated) { free(buf); return ret; @@ -2124,6 +2302,7 @@ int index_path(unsigned char *sha1, const char *path, struct stat *st, int write { int fd; char *target; + size_t len; switch (st->st_mode & S_IFMT) { case S_IFREG: @@ -2131,21 +2310,22 @@ int index_path(unsigned char *sha1, const char *path, struct stat *st, int write if (fd < 0) return error("open(\"%s\"): %s", path, strerror(errno)); - if (index_fd(sha1, fd, st, write_object, NULL) < 0) + if (index_fd(sha1, fd, st, write_object, OBJ_BLOB, path) < 0) return error("%s: failed to insert into database", path); break; case S_IFLNK: - target = xmalloc(st->st_size+1); - if (readlink(path, target, st->st_size+1) != st->st_size) { + len = xsize_t(st->st_size); + target = xmalloc(len + 1); + if (readlink(path, target, len + 1) != st->st_size) { char *errstr = strerror(errno); free(target); return error("readlink(\"%s\"): %s", path, errstr); } if (!write_object) - hash_sha1_file(target, st->st_size, blob_type, sha1); - else if (write_sha1_file(target, st->st_size, blob_type, sha1)) + hash_sha1_file(target, len, blob_type, sha1); + else if (write_sha1_file(target, len, blob_type, sha1)) return error("%s: failed to insert into database", path); free(target);