Code

fffddd25c9b9cc92b837434d7099cb7b4180ce0b
[git.git] / index-pack.c
1 #include "cache.h"
2 #include "delta.h"
3 #include "pack.h"
4 #include "csum-file.h"
5 #include "blob.h"
6 #include "commit.h"
7 #include "tag.h"
8 #include "tree.h"
10 static const char index_pack_usage[] =
11 "git-index-pack [-o index-file] pack-file";
13 struct object_entry
14 {
15         unsigned long offset;
16         enum object_type type;
17         enum object_type real_type;
18         unsigned char sha1[20];
19 };
21 union delta_base {
22         unsigned char sha1[20];
23         unsigned long offset;
24 };
26 struct delta_entry
27 {
28         struct object_entry *obj;
29         union delta_base base;
30 };
32 static const char *pack_name;
33 static unsigned char *pack_base;
34 static unsigned long pack_size;
35 static struct object_entry *objects;
36 static struct delta_entry *deltas;
37 static int nr_objects;
38 static int nr_deltas;
40 static void open_pack_file(void)
41 {
42         int fd;
43         struct stat st;
45         fd = open(pack_name, O_RDONLY);
46         if (fd < 0)
47                 die("cannot open packfile '%s': %s", pack_name,
48                     strerror(errno));
49         if (fstat(fd, &st)) {
50                 int err = errno;
51                 close(fd);
52                 die("cannot fstat packfile '%s': %s", pack_name,
53                     strerror(err));
54         }
55         pack_size = st.st_size;
56         pack_base = mmap(NULL, pack_size, PROT_READ, MAP_PRIVATE, fd, 0);
57         if (pack_base == MAP_FAILED) {
58                 int err = errno;
59                 close(fd);
60                 die("cannot mmap packfile '%s': %s", pack_name,
61                     strerror(err));
62         }
63         close(fd);
64 }
66 static void parse_pack_header(void)
67 {
68         const struct pack_header *hdr;
69         unsigned char sha1[20];
70         SHA_CTX ctx;
72         /* Ensure there are enough bytes for the header and final SHA1 */
73         if (pack_size < sizeof(struct pack_header) + 20)
74                 die("packfile '%s' is too small", pack_name);
76         /* Header consistency check */
77         hdr = (void *)pack_base;
78         if (hdr->hdr_signature != htonl(PACK_SIGNATURE))
79                 die("packfile '%s' signature mismatch", pack_name);
80         if (!pack_version_ok(hdr->hdr_version))
81                 die("packfile '%s' version %d unsupported",
82                     pack_name, ntohl(hdr->hdr_version));
84         nr_objects = ntohl(hdr->hdr_entries);
86         /* Check packfile integrity */
87         SHA1_Init(&ctx);
88         SHA1_Update(&ctx, pack_base, pack_size - 20);
89         SHA1_Final(sha1, &ctx);
90         if (hashcmp(sha1, pack_base + pack_size - 20))
91                 die("packfile '%s' SHA1 mismatch", pack_name);
92 }
94 static void bad_object(unsigned long offset, const char *format,
95                        ...) NORETURN __attribute__((format (printf, 2, 3)));
97 static void bad_object(unsigned long offset, const char *format, ...)
98 {
99         va_list params;
100         char buf[1024];
102         va_start(params, format);
103         vsnprintf(buf, sizeof(buf), format, params);
104         va_end(params);
105         die("packfile '%s': bad object at offset %lu: %s",
106             pack_name, offset, buf);
109 static void *unpack_entry_data(unsigned long offset,
110                                unsigned long *current_pos, unsigned long size)
112         unsigned long pack_limit = pack_size - 20;
113         unsigned long pos = *current_pos;
114         z_stream stream;
115         void *buf = xmalloc(size);
117         memset(&stream, 0, sizeof(stream));
118         stream.next_out = buf;
119         stream.avail_out = size;
120         stream.next_in = pack_base + pos;
121         stream.avail_in = pack_limit - pos;
122         inflateInit(&stream);
124         for (;;) {
125                 int ret = inflate(&stream, 0);
126                 if (ret == Z_STREAM_END)
127                         break;
128                 if (ret != Z_OK)
129                         bad_object(offset, "inflate returned %d", ret);
130         }
131         inflateEnd(&stream);
132         if (stream.total_out != size)
133                 bad_object(offset, "size mismatch (expected %lu, got %lu)",
134                            size, stream.total_out);
135         *current_pos = pack_limit - stream.avail_in;
136         return buf;
139 static void *unpack_raw_entry(unsigned long offset,
140                               enum object_type *obj_type,
141                               unsigned long *obj_size,
142                               union delta_base *delta_base,
143                               unsigned long *next_obj_offset)
145         unsigned long pack_limit = pack_size - 20;
146         unsigned long pos = offset;
147         unsigned char c;
148         unsigned long size, base_offset;
149         unsigned shift;
150         enum object_type type;
151         void *data;
153         c = pack_base[pos++];
154         type = (c >> 4) & 7;
155         size = (c & 15);
156         shift = 4;
157         while (c & 0x80) {
158                 if (pos >= pack_limit)
159                         bad_object(offset, "object extends past end of pack");
160                 c = pack_base[pos++];
161                 size += (c & 0x7fUL) << shift;
162                 shift += 7;
163         }
165         switch (type) {
166         case OBJ_REF_DELTA:
167                 if (pos + 20 >= pack_limit)
168                         bad_object(offset, "object extends past end of pack");
169                 hashcpy(delta_base->sha1, pack_base + pos);
170                 pos += 20;
171                 break;
172         case OBJ_OFS_DELTA:
173                 memset(delta_base, 0, sizeof(*delta_base));
174                 c = pack_base[pos++];
175                 base_offset = c & 127;
176                 while (c & 128) {
177                         base_offset += 1;
178                         if (!base_offset || base_offset & ~(~0UL >> 7))
179                                 bad_object(offset, "offset value overflow for delta base object");
180                         if (pos >= pack_limit)
181                                 bad_object(offset, "object extends past end of pack");
182                         c = pack_base[pos++];
183                         base_offset = (base_offset << 7) + (c & 127);
184                 }
185                 delta_base->offset = offset - base_offset;
186                 if (delta_base->offset >= offset)
187                         bad_object(offset, "delta base offset is out of bound");
188                 break;
189         case OBJ_COMMIT:
190         case OBJ_TREE:
191         case OBJ_BLOB:
192         case OBJ_TAG:
193                 break;
194         default:
195                 bad_object(offset, "bad object type %d", type);
196         }
198         data = unpack_entry_data(offset, &pos, size);
199         *obj_type = type;
200         *obj_size = size;
201         *next_obj_offset = pos;
202         return data;
205 static int find_delta(const union delta_base *base)
207         int first = 0, last = nr_deltas;
209         while (first < last) {
210                 int next = (first + last) / 2;
211                 struct delta_entry *delta = &deltas[next];
212                 int cmp;
214                 cmp = memcmp(base, &delta->base, sizeof(*base));
215                 if (!cmp)
216                         return next;
217                 if (cmp < 0) {
218                         last = next;
219                         continue;
220                 }
221                 first = next+1;
222         }
223         return -first-1;
226 static int find_delta_childs(const union delta_base *base,
227                              int *first_index, int *last_index)
229         int first = find_delta(base);
230         int last = first;
231         int end = nr_deltas - 1;
233         if (first < 0)
234                 return -1;
235         while (first > 0 && !memcmp(&deltas[first - 1].base, base, sizeof(*base)))
236                 --first;
237         while (last < end && !memcmp(&deltas[last + 1].base, base, sizeof(*base)))
238                 ++last;
239         *first_index = first;
240         *last_index = last;
241         return 0;
244 static void sha1_object(const void *data, unsigned long size,
245                         enum object_type type, unsigned char *sha1)
247         SHA_CTX ctx;
248         char header[50];
249         int header_size;
250         const char *type_str;
252         switch (type) {
253         case OBJ_COMMIT: type_str = commit_type; break;
254         case OBJ_TREE:   type_str = tree_type; break;
255         case OBJ_BLOB:   type_str = blob_type; break;
256         case OBJ_TAG:    type_str = tag_type; break;
257         default:
258                 die("bad type %d", type);
259         }
261         header_size = sprintf(header, "%s %lu", type_str, size) + 1;
263         SHA1_Init(&ctx);
264         SHA1_Update(&ctx, header, header_size);
265         SHA1_Update(&ctx, data, size);
266         SHA1_Final(sha1, &ctx);
269 static void resolve_delta(struct delta_entry *delta, void *base_data,
270                           unsigned long base_size, enum object_type type)
272         struct object_entry *obj = delta->obj;
273         void *delta_data;
274         unsigned long delta_size;
275         void *result;
276         unsigned long result_size;
277         enum object_type delta_type;
278         union delta_base delta_base;
279         unsigned long next_obj_offset;
280         int j, first, last;
282         obj->real_type = type;
283         delta_data = unpack_raw_entry(obj->offset, &delta_type,
284                                       &delta_size, &delta_base,
285                                       &next_obj_offset);
286         result = patch_delta(base_data, base_size, delta_data, delta_size,
287                              &result_size);
288         free(delta_data);
289         if (!result)
290                 bad_object(obj->offset, "failed to apply delta");
291         sha1_object(result, result_size, type, obj->sha1);
293         hashcpy(delta_base.sha1, obj->sha1);
294         if (!find_delta_childs(&delta_base, &first, &last)) {
295                 for (j = first; j <= last; j++)
296                         if (deltas[j].obj->type == OBJ_REF_DELTA)
297                                 resolve_delta(&deltas[j], result, result_size, type);
298         }
300         memset(&delta_base, 0, sizeof(delta_base));
301         delta_base.offset = obj->offset;
302         if (!find_delta_childs(&delta_base, &first, &last)) {
303                 for (j = first; j <= last; j++)
304                         if (deltas[j].obj->type == OBJ_OFS_DELTA)
305                                 resolve_delta(&deltas[j], result, result_size, type);
306         }
308         free(result);
311 static int compare_delta_entry(const void *a, const void *b)
313         const struct delta_entry *delta_a = a;
314         const struct delta_entry *delta_b = b;
315         return memcmp(&delta_a->base, &delta_b->base, sizeof(union delta_base));
318 static void parse_pack_objects(void)
320         int i;
321         unsigned long offset = sizeof(struct pack_header);
322         struct delta_entry *delta = deltas;
323         void *data;
324         unsigned long data_size;
326         /*
327          * First pass:
328          * - find locations of all objects;
329          * - calculate SHA1 of all non-delta objects;
330          * - remember base SHA1 for all deltas.
331          */
332         for (i = 0; i < nr_objects; i++) {
333                 struct object_entry *obj = &objects[i];
334                 obj->offset = offset;
335                 data = unpack_raw_entry(offset, &obj->type, &data_size,
336                                         &delta->base, &offset);
337                 obj->real_type = obj->type;
338                 if (obj->type == OBJ_REF_DELTA || obj->type == OBJ_OFS_DELTA) {
339                         nr_deltas++;
340                         delta->obj = obj;
341                         delta++;
342                 } else
343                         sha1_object(data, data_size, obj->type, obj->sha1);
344                 free(data);
345         }
346         if (offset != pack_size - 20)
347                 die("packfile '%s' has junk at the end", pack_name);
349         /* Sort deltas by base SHA1/offset for fast searching */
350         qsort(deltas, nr_deltas, sizeof(struct delta_entry),
351               compare_delta_entry);
353         /*
354          * Second pass:
355          * - for all non-delta objects, look if it is used as a base for
356          *   deltas;
357          * - if used as a base, uncompress the object and apply all deltas,
358          *   recursively checking if the resulting object is used as a base
359          *   for some more deltas.
360          */
361         for (i = 0; i < nr_objects; i++) {
362                 struct object_entry *obj = &objects[i];
363                 union delta_base base;
364                 int j, ref, ref_first, ref_last, ofs, ofs_first, ofs_last;
366                 if (obj->type == OBJ_REF_DELTA || obj->type == OBJ_OFS_DELTA)
367                         continue;
368                 hashcpy(base.sha1, obj->sha1);
369                 ref = !find_delta_childs(&base, &ref_first, &ref_last);
370                 memset(&base, 0, sizeof(base));
371                 base.offset = obj->offset;
372                 ofs = !find_delta_childs(&base, &ofs_first, &ofs_last);
373                 if (!ref && !ofs)
374                         continue;
375                 data = unpack_raw_entry(obj->offset, &obj->type, &data_size,
376                                         &base, &offset);
377                 if (ref)
378                         for (j = ref_first; j <= ref_last; j++)
379                                 if (deltas[j].obj->type == OBJ_REF_DELTA)
380                                         resolve_delta(&deltas[j], data,
381                                                       data_size, obj->type);
382                 if (ofs)
383                         for (j = ofs_first; j <= ofs_last; j++)
384                                 if (deltas[j].obj->type == OBJ_OFS_DELTA)
385                                         resolve_delta(&deltas[j], data,
386                                                       data_size, obj->type);
387                 free(data);
388         }
390         /* Check for unresolved deltas */
391         for (i = 0; i < nr_deltas; i++) {
392                 if (deltas[i].obj->real_type == OBJ_REF_DELTA ||
393                     deltas[i].obj->real_type == OBJ_OFS_DELTA)
394                         die("packfile '%s' has unresolved deltas",  pack_name);
395         }
398 static int sha1_compare(const void *_a, const void *_b)
400         struct object_entry *a = *(struct object_entry **)_a;
401         struct object_entry *b = *(struct object_entry **)_b;
402         return hashcmp(a->sha1, b->sha1);
405 static void write_index_file(const char *index_name, unsigned char *sha1)
407         struct sha1file *f;
408         struct object_entry **sorted_by_sha, **list, **last;
409         unsigned int array[256];
410         int i;
411         SHA_CTX ctx;
413         if (nr_objects) {
414                 sorted_by_sha =
415                         xcalloc(nr_objects, sizeof(struct object_entry *));
416                 list = sorted_by_sha;
417                 last = sorted_by_sha + nr_objects;
418                 for (i = 0; i < nr_objects; ++i)
419                         sorted_by_sha[i] = &objects[i];
420                 qsort(sorted_by_sha, nr_objects, sizeof(sorted_by_sha[0]),
421                       sha1_compare);
423         }
424         else
425                 sorted_by_sha = list = last = NULL;
427         unlink(index_name);
428         f = sha1create("%s", index_name);
430         /*
431          * Write the first-level table (the list is sorted,
432          * but we use a 256-entry lookup to be able to avoid
433          * having to do eight extra binary search iterations).
434          */
435         for (i = 0; i < 256; i++) {
436                 struct object_entry **next = list;
437                 while (next < last) {
438                         struct object_entry *obj = *next;
439                         if (obj->sha1[0] != i)
440                                 break;
441                         next++;
442                 }
443                 array[i] = htonl(next - sorted_by_sha);
444                 list = next;
445         }
446         sha1write(f, array, 256 * sizeof(int));
448         /* recompute the SHA1 hash of sorted object names.
449          * currently pack-objects does not do this, but that
450          * can be fixed.
451          */
452         SHA1_Init(&ctx);
453         /*
454          * Write the actual SHA1 entries..
455          */
456         list = sorted_by_sha;
457         for (i = 0; i < nr_objects; i++) {
458                 struct object_entry *obj = *list++;
459                 unsigned int offset = htonl(obj->offset);
460                 sha1write(f, &offset, 4);
461                 sha1write(f, obj->sha1, 20);
462                 SHA1_Update(&ctx, obj->sha1, 20);
463         }
464         sha1write(f, pack_base + pack_size - 20, 20);
465         sha1close(f, NULL, 1);
466         free(sorted_by_sha);
467         SHA1_Final(sha1, &ctx);
470 int main(int argc, char **argv)
472         int i;
473         char *index_name = NULL;
474         char *index_name_buf = NULL;
475         unsigned char sha1[20];
477         for (i = 1; i < argc; i++) {
478                 const char *arg = argv[i];
480                 if (*arg == '-') {
481                         if (!strcmp(arg, "-o")) {
482                                 if (index_name || (i+1) >= argc)
483                                         usage(index_pack_usage);
484                                 index_name = argv[++i];
485                         } else
486                                 usage(index_pack_usage);
487                         continue;
488                 }
490                 if (pack_name)
491                         usage(index_pack_usage);
492                 pack_name = arg;
493         }
495         if (!pack_name)
496                 usage(index_pack_usage);
497         if (!index_name) {
498                 int len = strlen(pack_name);
499                 if (!has_extension(pack_name, ".pack"))
500                         die("packfile name '%s' does not end with '.pack'",
501                             pack_name);
502                 index_name_buf = xmalloc(len);
503                 memcpy(index_name_buf, pack_name, len - 5);
504                 strcpy(index_name_buf + len - 5, ".idx");
505                 index_name = index_name_buf;
506         }
508         open_pack_file();
509         parse_pack_header();
510         objects = xcalloc(nr_objects, sizeof(struct object_entry));
511         deltas = xcalloc(nr_objects, sizeof(struct delta_entry));
512         parse_pack_objects();
513         free(deltas);
514         write_index_file(index_name, sha1);
515         free(objects);
516         free(index_name_buf);
518         printf("%s\n", sha1_to_hex(sha1));
520         return 0;