Code

pack-objects: remove obsolete comments
[git.git] / builtin-pack-objects.c
1 #include "builtin.h"
2 #include "cache.h"
3 #include "object.h"
4 #include "blob.h"
5 #include "commit.h"
6 #include "tag.h"
7 #include "tree.h"
8 #include "delta.h"
9 #include "pack.h"
10 #include "csum-file.h"
11 #include "tree-walk.h"
12 #include "diff.h"
13 #include "revision.h"
14 #include "list-objects.h"
16 static const char pack_usage[] = "\
17 git-pack-objects [{ -q | --progress | --all-progress }] \n\
18         [--local] [--incremental] [--window=N] [--depth=N] \n\
19         [--no-reuse-delta] [--delta-base-offset] [--non-empty] \n\
20         [--revs [--unpacked | --all]*] [--reflog] [--stdout | base-name] \n\
21         [<ref-list | <object-list]";
23 struct object_entry {
24         unsigned char sha1[20];
25         uint32_t crc32;         /* crc of raw pack data for this object */
26         off_t offset;           /* offset into the final pack file */
27         unsigned long size;     /* uncompressed size */
28         unsigned int hash;      /* name hint hash */
29         unsigned int depth;     /* delta depth */
30         struct packed_git *in_pack;     /* already in pack */
31         off_t in_pack_offset;
32         struct object_entry *delta;     /* delta base object */
33         struct object_entry *delta_child; /* deltified objects who bases me */
34         struct object_entry *delta_sibling; /* other deltified objects who
35                                              * uses the same base as me
36                                              */
37         unsigned long delta_size;       /* delta data size (uncompressed) */
38         enum object_type type;
39         enum object_type in_pack_type;  /* could be delta */
40         unsigned char in_pack_header_size;
41         unsigned char preferred_base; /* we do not pack this, but is available
42                                        * to be used as the base objectto delta
43                                        * objects against.
44                                        */
45 };
47 /*
48  * Objects we are going to pack are collected in objects array (dynamically
49  * expanded).  nr_objects & nr_alloc controls this array.  They are stored
50  * in the order we see -- typically rev-list --objects order that gives us
51  * nice "minimum seek" order.
52  */
53 static struct object_entry *objects;
54 static uint32_t nr_objects, nr_alloc, nr_result;
56 static int non_empty;
57 static int no_reuse_delta;
58 static int local;
59 static int incremental;
60 static int allow_ofs_delta;
61 static const char *pack_tmp_name, *idx_tmp_name;
62 static char tmpname[PATH_MAX];
63 static unsigned char pack_file_sha1[20];
64 static int progress = 1;
65 static volatile sig_atomic_t progress_update;
66 static int window = 10;
67 static int pack_to_stdout;
68 static int num_preferred_base;
70 /*
71  * The object names in objects array are hashed with this hashtable,
72  * to help looking up the entry by object name.
73  * This hashtable is built after all the objects are seen.
74  */
75 static int *object_ix;
76 static int object_ix_hashsz;
78 /*
79  * Pack index for existing packs give us easy access to the offsets into
80  * corresponding pack file where each object's data starts, but the entries
81  * do not store the size of the compressed representation (uncompressed
82  * size is easily available by examining the pack entry header).  It is
83  * also rather expensive to find the sha1 for an object given its offset.
84  *
85  * We build a hashtable of existing packs (pack_revindex), and keep reverse
86  * index here -- pack index file is sorted by object name mapping to offset;
87  * this pack_revindex[].revindex array is a list of offset/index_nr pairs
88  * ordered by offset, so if you know the offset of an object, next offset
89  * is where its packed representation ends and the index_nr can be used to
90  * get the object sha1 from the main index.
91  */
92 struct revindex_entry {
93         off_t offset;
94         unsigned int nr;
95 };
96 struct pack_revindex {
97         struct packed_git *p;
98         struct revindex_entry *revindex;
99 };
100 static struct  pack_revindex *pack_revindex;
101 static int pack_revindex_hashsz;
103 /*
104  * stats
105  */
106 static uint32_t written, written_delta;
107 static uint32_t reused, reused_delta;
109 static int pack_revindex_ix(struct packed_git *p)
111         unsigned long ui = (unsigned long)p;
112         int i;
114         ui = ui ^ (ui >> 16); /* defeat structure alignment */
115         i = (int)(ui % pack_revindex_hashsz);
116         while (pack_revindex[i].p) {
117                 if (pack_revindex[i].p == p)
118                         return i;
119                 if (++i == pack_revindex_hashsz)
120                         i = 0;
121         }
122         return -1 - i;
125 static void prepare_pack_ix(void)
127         int num;
128         struct packed_git *p;
129         for (num = 0, p = packed_git; p; p = p->next)
130                 num++;
131         if (!num)
132                 return;
133         pack_revindex_hashsz = num * 11;
134         pack_revindex = xcalloc(sizeof(*pack_revindex), pack_revindex_hashsz);
135         for (p = packed_git; p; p = p->next) {
136                 num = pack_revindex_ix(p);
137                 num = - 1 - num;
138                 pack_revindex[num].p = p;
139         }
140         /* revindex elements are lazily initialized */
143 static int cmp_offset(const void *a_, const void *b_)
145         const struct revindex_entry *a = a_;
146         const struct revindex_entry *b = b_;
147         return (a->offset < b->offset) ? -1 : (a->offset > b->offset) ? 1 : 0;
150 /*
151  * Ordered list of offsets of objects in the pack.
152  */
153 static void prepare_pack_revindex(struct pack_revindex *rix)
155         struct packed_git *p = rix->p;
156         int num_ent = p->num_objects;
157         int i;
158         const char *index = p->index_data;
160         rix->revindex = xmalloc(sizeof(*rix->revindex) * (num_ent + 1));
161         index += 4 * 256;
163         if (p->index_version > 1) {
164                 const uint32_t *off_32 =
165                         (uint32_t *)(index + 8 + p->num_objects * (20 + 4));
166                 const uint32_t *off_64 = off_32 + p->num_objects;
167                 for (i = 0; i < num_ent; i++) {
168                         uint32_t off = ntohl(*off_32++);
169                         if (!(off & 0x80000000)) {
170                                 rix->revindex[i].offset = off;
171                         } else {
172                                 rix->revindex[i].offset =
173                                         ((uint64_t)ntohl(*off_64++)) << 32;
174                                 rix->revindex[i].offset |=
175                                         ntohl(*off_64++);
176                         }
177                         rix->revindex[i].nr = i;
178                 }
179         } else {
180                 for (i = 0; i < num_ent; i++) {
181                         uint32_t hl = *((uint32_t *)(index + 24 * i));
182                         rix->revindex[i].offset = ntohl(hl);
183                         rix->revindex[i].nr = i;
184                 }
185         }
187         /* This knows the pack format -- the 20-byte trailer
188          * follows immediately after the last object data.
189          */
190         rix->revindex[num_ent].offset = p->pack_size - 20;
191         rix->revindex[num_ent].nr = -1;
192         qsort(rix->revindex, num_ent, sizeof(*rix->revindex), cmp_offset);
195 static struct revindex_entry * find_packed_object(struct packed_git *p,
196                                                   off_t ofs)
198         int num;
199         int lo, hi;
200         struct pack_revindex *rix;
201         struct revindex_entry *revindex;
202         num = pack_revindex_ix(p);
203         if (num < 0)
204                 die("internal error: pack revindex uninitialized");
205         rix = &pack_revindex[num];
206         if (!rix->revindex)
207                 prepare_pack_revindex(rix);
208         revindex = rix->revindex;
209         lo = 0;
210         hi = p->num_objects + 1;
211         do {
212                 int mi = (lo + hi) / 2;
213                 if (revindex[mi].offset == ofs) {
214                         return revindex + mi;
215                 }
216                 else if (ofs < revindex[mi].offset)
217                         hi = mi;
218                 else
219                         lo = mi + 1;
220         } while (lo < hi);
221         die("internal error: pack revindex corrupt");
224 static const unsigned char *find_packed_object_name(struct packed_git *p,
225                                                     off_t ofs)
227         struct revindex_entry *entry = find_packed_object(p, ofs);
228         return nth_packed_object_sha1(p, entry->nr);
231 static void *delta_against(void *buf, unsigned long size, struct object_entry *entry)
233         unsigned long othersize, delta_size;
234         enum object_type type;
235         void *otherbuf = read_sha1_file(entry->delta->sha1, &type, &othersize);
236         void *delta_buf;
238         if (!otherbuf)
239                 die("unable to read %s", sha1_to_hex(entry->delta->sha1));
240         delta_buf = diff_delta(otherbuf, othersize,
241                                buf, size, &delta_size, 0);
242         if (!delta_buf || delta_size != entry->delta_size)
243                 die("delta size changed");
244         free(buf);
245         free(otherbuf);
246         return delta_buf;
249 /*
250  * The per-object header is a pretty dense thing, which is
251  *  - first byte: low four bits are "size", then three bits of "type",
252  *    and the high bit is "size continues".
253  *  - each byte afterwards: low seven bits are size continuation,
254  *    with the high bit being "size continues"
255  */
256 static int encode_header(enum object_type type, unsigned long size, unsigned char *hdr)
258         int n = 1;
259         unsigned char c;
261         if (type < OBJ_COMMIT || type > OBJ_REF_DELTA)
262                 die("bad type %d", type);
264         c = (type << 4) | (size & 15);
265         size >>= 4;
266         while (size) {
267                 *hdr++ = c | 0x80;
268                 c = size & 0x7f;
269                 size >>= 7;
270                 n++;
271         }
272         *hdr = c;
273         return n;
276 /*
277  * we are going to reuse the existing object data as is.  make
278  * sure it is not corrupt.
279  */
280 static int check_pack_inflate(struct packed_git *p,
281                 struct pack_window **w_curs,
282                 off_t offset,
283                 off_t len,
284                 unsigned long expect)
286         z_stream stream;
287         unsigned char fakebuf[4096], *in;
288         int st;
290         memset(&stream, 0, sizeof(stream));
291         inflateInit(&stream);
292         do {
293                 in = use_pack(p, w_curs, offset, &stream.avail_in);
294                 stream.next_in = in;
295                 stream.next_out = fakebuf;
296                 stream.avail_out = sizeof(fakebuf);
297                 st = inflate(&stream, Z_FINISH);
298                 offset += stream.next_in - in;
299         } while (st == Z_OK || st == Z_BUF_ERROR);
300         inflateEnd(&stream);
301         return (st == Z_STREAM_END &&
302                 stream.total_out == expect &&
303                 stream.total_in == len) ? 0 : -1;
306 static int check_pack_crc(struct packed_git *p, struct pack_window **w_curs,
307                           off_t offset, off_t len, unsigned int nr)
309         const uint32_t *index_crc;
310         uint32_t data_crc = crc32(0, Z_NULL, 0);
312         do {
313                 unsigned int avail;
314                 void *data = use_pack(p, w_curs, offset, &avail);
315                 if (avail > len)
316                         avail = len;
317                 data_crc = crc32(data_crc, data, avail);
318                 offset += avail;
319                 len -= avail;
320         } while (len);
322         index_crc = p->index_data;
323         index_crc += 2 + 256 + p->num_objects * (20/4) + nr;
325         return data_crc != ntohl(*index_crc);
328 static void copy_pack_data(struct sha1file *f,
329                 struct packed_git *p,
330                 struct pack_window **w_curs,
331                 off_t offset,
332                 off_t len)
334         unsigned char *in;
335         unsigned int avail;
337         while (len) {
338                 in = use_pack(p, w_curs, offset, &avail);
339                 if (avail > len)
340                         avail = (unsigned int)len;
341                 sha1write(f, in, avail);
342                 offset += avail;
343                 len -= avail;
344         }
347 static int check_loose_inflate(unsigned char *data, unsigned long len, unsigned long expect)
349         z_stream stream;
350         unsigned char fakebuf[4096];
351         int st;
353         memset(&stream, 0, sizeof(stream));
354         stream.next_in = data;
355         stream.avail_in = len;
356         stream.next_out = fakebuf;
357         stream.avail_out = sizeof(fakebuf);
358         inflateInit(&stream);
360         while (1) {
361                 st = inflate(&stream, Z_FINISH);
362                 if (st == Z_STREAM_END || st == Z_OK) {
363                         st = (stream.total_out == expect &&
364                               stream.total_in == len) ? 0 : -1;
365                         break;
366                 }
367                 if (st != Z_BUF_ERROR) {
368                         st = -1;
369                         break;
370                 }
371                 stream.next_out = fakebuf;
372                 stream.avail_out = sizeof(fakebuf);
373         }
374         inflateEnd(&stream);
375         return st;
378 static int revalidate_loose_object(struct object_entry *entry,
379                                    unsigned char *map,
380                                    unsigned long mapsize)
382         /* we already know this is a loose object with new type header. */
383         enum object_type type;
384         unsigned long size, used;
386         if (pack_to_stdout)
387                 return 0;
389         used = unpack_object_header_gently(map, mapsize, &type, &size);
390         if (!used)
391                 return -1;
392         map += used;
393         mapsize -= used;
394         return check_loose_inflate(map, mapsize, size);
397 static unsigned long write_object(struct sha1file *f,
398                                   struct object_entry *entry)
400         unsigned long size;
401         enum object_type type;
402         void *buf;
403         unsigned char header[10];
404         unsigned hdrlen;
405         off_t datalen;
406         enum object_type obj_type;
407         int to_reuse = 0;
409         if (!pack_to_stdout)
410                 crc32_begin(f);
412         obj_type = entry->type;
413         if (! entry->in_pack)
414                 to_reuse = 0;   /* can't reuse what we don't have */
415         else if (obj_type == OBJ_REF_DELTA || obj_type == OBJ_OFS_DELTA)
416                 to_reuse = 1;   /* check_object() decided it for us */
417         else if (obj_type != entry->in_pack_type)
418                 to_reuse = 0;   /* pack has delta which is unusable */
419         else if (entry->delta)
420                 to_reuse = 0;   /* we want to pack afresh */
421         else
422                 to_reuse = 1;   /* we have it in-pack undeltified,
423                                  * and we do not need to deltify it.
424                                  */
426         if (!entry->in_pack && !entry->delta) {
427                 unsigned char *map;
428                 unsigned long mapsize;
429                 map = map_sha1_file(entry->sha1, &mapsize);
430                 if (map && !legacy_loose_object(map)) {
431                         /* We can copy straight into the pack file */
432                         if (revalidate_loose_object(entry, map, mapsize))
433                                 die("corrupt loose object %s",
434                                     sha1_to_hex(entry->sha1));
435                         sha1write(f, map, mapsize);
436                         munmap(map, mapsize);
437                         written++;
438                         reused++;
439                         return mapsize;
440                 }
441                 if (map)
442                         munmap(map, mapsize);
443         }
445         if (!to_reuse) {
446                 buf = read_sha1_file(entry->sha1, &type, &size);
447                 if (!buf)
448                         die("unable to read %s", sha1_to_hex(entry->sha1));
449                 if (size != entry->size)
450                         die("object %s size inconsistency (%lu vs %lu)",
451                             sha1_to_hex(entry->sha1), size, entry->size);
452                 if (entry->delta) {
453                         buf = delta_against(buf, size, entry);
454                         size = entry->delta_size;
455                         obj_type = (allow_ofs_delta && entry->delta->offset) ?
456                                 OBJ_OFS_DELTA : OBJ_REF_DELTA;
457                 }
458                 /*
459                  * The object header is a byte of 'type' followed by zero or
460                  * more bytes of length.
461                  */
462                 hdrlen = encode_header(obj_type, size, header);
463                 sha1write(f, header, hdrlen);
465                 if (obj_type == OBJ_OFS_DELTA) {
466                         /*
467                          * Deltas with relative base contain an additional
468                          * encoding of the relative offset for the delta
469                          * base from this object's position in the pack.
470                          */
471                         off_t ofs = entry->offset - entry->delta->offset;
472                         unsigned pos = sizeof(header) - 1;
473                         header[pos] = ofs & 127;
474                         while (ofs >>= 7)
475                                 header[--pos] = 128 | (--ofs & 127);
476                         sha1write(f, header + pos, sizeof(header) - pos);
477                         hdrlen += sizeof(header) - pos;
478                 } else if (obj_type == OBJ_REF_DELTA) {
479                         /*
480                          * Deltas with a base reference contain
481                          * an additional 20 bytes for the base sha1.
482                          */
483                         sha1write(f, entry->delta->sha1, 20);
484                         hdrlen += 20;
485                 }
486                 datalen = sha1write_compressed(f, buf, size);
487                 free(buf);
488         }
489         else {
490                 struct packed_git *p = entry->in_pack;
491                 struct pack_window *w_curs = NULL;
492                 struct revindex_entry *revidx;
493                 off_t offset;
495                 if (entry->delta) {
496                         obj_type = (allow_ofs_delta && entry->delta->offset) ?
497                                 OBJ_OFS_DELTA : OBJ_REF_DELTA;
498                         reused_delta++;
499                 }
500                 hdrlen = encode_header(obj_type, entry->size, header);
501                 sha1write(f, header, hdrlen);
502                 if (obj_type == OBJ_OFS_DELTA) {
503                         off_t ofs = entry->offset - entry->delta->offset;
504                         unsigned pos = sizeof(header) - 1;
505                         header[pos] = ofs & 127;
506                         while (ofs >>= 7)
507                                 header[--pos] = 128 | (--ofs & 127);
508                         sha1write(f, header + pos, sizeof(header) - pos);
509                         hdrlen += sizeof(header) - pos;
510                 } else if (obj_type == OBJ_REF_DELTA) {
511                         sha1write(f, entry->delta->sha1, 20);
512                         hdrlen += 20;
513                 }
515                 offset = entry->in_pack_offset;
516                 revidx = find_packed_object(p, offset);
517                 datalen = revidx[1].offset - offset;
518                 if (!pack_to_stdout && p->index_version > 1 &&
519                     check_pack_crc(p, &w_curs, offset, datalen, revidx->nr))
520                         die("bad packed object CRC for %s", sha1_to_hex(entry->sha1));
521                 offset += entry->in_pack_header_size;
522                 datalen -= entry->in_pack_header_size;
523                 if (!pack_to_stdout && p->index_version == 1 &&
524                     check_pack_inflate(p, &w_curs, offset, datalen, entry->size))
525                         die("corrupt packed object for %s", sha1_to_hex(entry->sha1));
526                 copy_pack_data(f, p, &w_curs, offset, datalen);
527                 unuse_pack(&w_curs);
528                 reused++;
529         }
530         if (entry->delta)
531                 written_delta++;
532         written++;
533         if (!pack_to_stdout)
534                 entry->crc32 = crc32_end(f);
535         return hdrlen + datalen;
538 static off_t write_one(struct sha1file *f,
539                                struct object_entry *e,
540                                off_t offset)
542         unsigned long size;
544         /* offset is non zero if object is written already. */
545         if (e->offset || e->preferred_base)
546                 return offset;
548         /* if we are deltified, write out base object first. */
549         if (e->delta)
550                 offset = write_one(f, e->delta, offset);
552         e->offset = offset;
553         size = write_object(f, e);
555         /* make sure off_t is sufficiently large not to wrap */
556         if (offset > offset + size)
557                 die("pack too large for current definition of off_t");
558         return offset + size;
561 static off_t write_pack_file(void)
563         uint32_t i;
564         struct sha1file *f;
565         off_t offset, last_obj_offset = 0;
566         struct pack_header hdr;
567         unsigned last_percent = 999;
568         int do_progress = progress;
570         if (pack_to_stdout) {
571                 f = sha1fd(1, "<stdout>");
572                 do_progress >>= 1;
573         } else {
574                 int fd;
575                 snprintf(tmpname, sizeof(tmpname), "tmp_pack_XXXXXX");
576                 fd = mkstemp(tmpname);
577                 if (fd < 0)
578                         die("unable to create %s: %s\n", tmpname, strerror(errno));
579                 pack_tmp_name = xstrdup(tmpname);
580                 f = sha1fd(fd, pack_tmp_name);
581         }
583         if (do_progress)
584                 fprintf(stderr, "Writing %u objects.\n", nr_result);
586         hdr.hdr_signature = htonl(PACK_SIGNATURE);
587         hdr.hdr_version = htonl(PACK_VERSION);
588         hdr.hdr_entries = htonl(nr_result);
589         sha1write(f, &hdr, sizeof(hdr));
590         offset = sizeof(hdr);
591         if (!nr_result)
592                 goto done;
593         for (i = 0; i < nr_objects; i++) {
594                 last_obj_offset = offset;
595                 offset = write_one(f, objects + i, offset);
596                 if (do_progress) {
597                         unsigned percent = written * 100 / nr_result;
598                         if (progress_update || percent != last_percent) {
599                                 fprintf(stderr, "%4u%% (%u/%u) done\r",
600                                         percent, written, nr_result);
601                                 progress_update = 0;
602                                 last_percent = percent;
603                         }
604                 }
605         }
606         if (do_progress)
607                 fputc('\n', stderr);
608  done:
609         if (written != nr_result)
610                 die("wrote %u objects while expecting %u", written, nr_result);
611         sha1close(f, pack_file_sha1, 1);
613         return last_obj_offset;
616 static int sha1_sort(const void *_a, const void *_b)
618         const struct object_entry *a = *(struct object_entry **)_a;
619         const struct object_entry *b = *(struct object_entry **)_b;
620         return hashcmp(a->sha1, b->sha1);
623 static uint32_t index_default_version = 1;
624 static uint32_t index_off32_limit = 0x7fffffff;
626 static void write_index_file(off_t last_obj_offset, unsigned char *sha1)
628         struct sha1file *f;
629         struct object_entry **sorted_by_sha, **list, **last;
630         uint32_t array[256];
631         uint32_t i, index_version;
632         SHA_CTX ctx;
633         int fd;
635         snprintf(tmpname, sizeof(tmpname), "tmp_idx_XXXXXX");
636         fd = mkstemp(tmpname);
637         if (fd < 0)
638                 die("unable to create %s: %s\n", tmpname, strerror(errno));
639         idx_tmp_name = xstrdup(tmpname);
640         f = sha1fd(fd, idx_tmp_name);
642         if (nr_result) {
643                 uint32_t j = 0;
644                 sorted_by_sha =
645                         xcalloc(nr_result, sizeof(struct object_entry *));
646                 for (i = 0; i < nr_objects; i++)
647                         if (!objects[i].preferred_base)
648                                 sorted_by_sha[j++] = objects + i;
649                 if (j != nr_result)
650                         die("listed %u objects while expecting %u", j, nr_result);
651                 qsort(sorted_by_sha, nr_result, sizeof(*sorted_by_sha), sha1_sort);
652                 list = sorted_by_sha;
653                 last = sorted_by_sha + nr_result;
654         } else
655                 sorted_by_sha = list = last = NULL;
657         /* if last object's offset is >= 2^31 we should use index V2 */
658         index_version = (last_obj_offset >> 31) ? 2 : index_default_version;
660         /* index versions 2 and above need a header */
661         if (index_version >= 2) {
662                 struct pack_idx_header hdr;
663                 hdr.idx_signature = htonl(PACK_IDX_SIGNATURE);
664                 hdr.idx_version = htonl(index_version);
665                 sha1write(f, &hdr, sizeof(hdr));
666         }
668         /*
669          * Write the first-level table (the list is sorted,
670          * but we use a 256-entry lookup to be able to avoid
671          * having to do eight extra binary search iterations).
672          */
673         for (i = 0; i < 256; i++) {
674                 struct object_entry **next = list;
675                 while (next < last) {
676                         struct object_entry *entry = *next;
677                         if (entry->sha1[0] != i)
678                                 break;
679                         next++;
680                 }
681                 array[i] = htonl(next - sorted_by_sha);
682                 list = next;
683         }
684         sha1write(f, array, 256 * 4);
686         /* Compute the SHA1 hash of sorted object names. */
687         SHA1_Init(&ctx);
689         /* Write the actual SHA1 entries. */
690         list = sorted_by_sha;
691         for (i = 0; i < nr_result; i++) {
692                 struct object_entry *entry = *list++;
693                 if (index_version < 2) {
694                         uint32_t offset = htonl(entry->offset);
695                         sha1write(f, &offset, 4);
696                 }
697                 sha1write(f, entry->sha1, 20);
698                 SHA1_Update(&ctx, entry->sha1, 20);
699         }
701         if (index_version >= 2) {
702                 unsigned int nr_large_offset = 0;
704                 /* write the crc32 table */
705                 list = sorted_by_sha;
706                 for (i = 0; i < nr_objects; i++) {
707                         struct object_entry *entry = *list++;
708                         uint32_t crc32_val = htonl(entry->crc32);
709                         sha1write(f, &crc32_val, 4);
710                 }
712                 /* write the 32-bit offset table */
713                 list = sorted_by_sha;
714                 for (i = 0; i < nr_objects; i++) {
715                         struct object_entry *entry = *list++;
716                         uint32_t offset = (entry->offset <= index_off32_limit) ?
717                                 entry->offset : (0x80000000 | nr_large_offset++);
718                         offset = htonl(offset);
719                         sha1write(f, &offset, 4);
720                 }
722                 /* write the large offset table */
723                 list = sorted_by_sha;
724                 while (nr_large_offset) {
725                         struct object_entry *entry = *list++;
726                         uint64_t offset = entry->offset;
727                         if (offset > index_off32_limit) {
728                                 uint32_t split[2];
729                                 split[0]        = htonl(offset >> 32);
730                                 split[1] = htonl(offset & 0xffffffff);
731                                 sha1write(f, split, 8);
732                                 nr_large_offset--;
733                         }
734                 }
735         }
737         sha1write(f, pack_file_sha1, 20);
738         sha1close(f, NULL, 1);
739         free(sorted_by_sha);
740         SHA1_Final(sha1, &ctx);
743 static int locate_object_entry_hash(const unsigned char *sha1)
745         int i;
746         unsigned int ui;
747         memcpy(&ui, sha1, sizeof(unsigned int));
748         i = ui % object_ix_hashsz;
749         while (0 < object_ix[i]) {
750                 if (!hashcmp(sha1, objects[object_ix[i] - 1].sha1))
751                         return i;
752                 if (++i == object_ix_hashsz)
753                         i = 0;
754         }
755         return -1 - i;
758 static struct object_entry *locate_object_entry(const unsigned char *sha1)
760         int i;
762         if (!object_ix_hashsz)
763                 return NULL;
765         i = locate_object_entry_hash(sha1);
766         if (0 <= i)
767                 return &objects[object_ix[i]-1];
768         return NULL;
771 static void rehash_objects(void)
773         uint32_t i;
774         struct object_entry *oe;
776         object_ix_hashsz = nr_objects * 3;
777         if (object_ix_hashsz < 1024)
778                 object_ix_hashsz = 1024;
779         object_ix = xrealloc(object_ix, sizeof(int) * object_ix_hashsz);
780         memset(object_ix, 0, sizeof(int) * object_ix_hashsz);
781         for (i = 0, oe = objects; i < nr_objects; i++, oe++) {
782                 int ix = locate_object_entry_hash(oe->sha1);
783                 if (0 <= ix)
784                         continue;
785                 ix = -1 - ix;
786                 object_ix[ix] = i + 1;
787         }
790 static unsigned name_hash(const char *name)
792         unsigned char c;
793         unsigned hash = 0;
795         /*
796          * This effectively just creates a sortable number from the
797          * last sixteen non-whitespace characters. Last characters
798          * count "most", so things that end in ".c" sort together.
799          */
800         while ((c = *name++) != 0) {
801                 if (isspace(c))
802                         continue;
803                 hash = (hash >> 2) + (c << 24);
804         }
805         return hash;
808 static int add_object_entry(const unsigned char *sha1, enum object_type type,
809                             unsigned hash, int exclude)
811         struct object_entry *entry;
812         struct packed_git *p, *found_pack = NULL;
813         off_t found_offset = 0;
814         int ix;
816         ix = nr_objects ? locate_object_entry_hash(sha1) : -1;
817         if (ix >= 0) {
818                 if (exclude) {
819                         entry = objects + object_ix[ix] - 1;
820                         if (!entry->preferred_base)
821                                 nr_result--;
822                         entry->preferred_base = 1;
823                 }
824                 return 0;
825         }
827         for (p = packed_git; p; p = p->next) {
828                 off_t offset = find_pack_entry_one(sha1, p);
829                 if (offset) {
830                         if (!found_pack) {
831                                 found_offset = offset;
832                                 found_pack = p;
833                         }
834                         if (exclude)
835                                 break;
836                         if (incremental)
837                                 return 0;
838                         if (local && !p->pack_local)
839                                 return 0;
840                 }
841         }
843         if (nr_objects >= nr_alloc) {
844                 nr_alloc = (nr_alloc  + 1024) * 3 / 2;
845                 objects = xrealloc(objects, nr_alloc * sizeof(*entry));
846         }
848         entry = objects + nr_objects++;
849         memset(entry, 0, sizeof(*entry));
850         hashcpy(entry->sha1, sha1);
851         entry->hash = hash;
852         if (type)
853                 entry->type = type;
854         if (exclude)
855                 entry->preferred_base = 1;
856         else
857                 nr_result++;
858         if (found_pack) {
859                 entry->in_pack = found_pack;
860                 entry->in_pack_offset = found_offset;
861         }
863         if (object_ix_hashsz * 3 <= nr_objects * 4)
864                 rehash_objects();
865         else
866                 object_ix[-1 - ix] = nr_objects;
868         if (progress_update) {
869                 fprintf(stderr, "Counting objects...%u\r", nr_objects);
870                 progress_update = 0;
871         }
873         return 1;
876 struct pbase_tree_cache {
877         unsigned char sha1[20];
878         int ref;
879         int temporary;
880         void *tree_data;
881         unsigned long tree_size;
882 };
884 static struct pbase_tree_cache *(pbase_tree_cache[256]);
885 static int pbase_tree_cache_ix(const unsigned char *sha1)
887         return sha1[0] % ARRAY_SIZE(pbase_tree_cache);
889 static int pbase_tree_cache_ix_incr(int ix)
891         return (ix+1) % ARRAY_SIZE(pbase_tree_cache);
894 static struct pbase_tree {
895         struct pbase_tree *next;
896         /* This is a phony "cache" entry; we are not
897          * going to evict it nor find it through _get()
898          * mechanism -- this is for the toplevel node that
899          * would almost always change with any commit.
900          */
901         struct pbase_tree_cache pcache;
902 } *pbase_tree;
904 static struct pbase_tree_cache *pbase_tree_get(const unsigned char *sha1)
906         struct pbase_tree_cache *ent, *nent;
907         void *data;
908         unsigned long size;
909         enum object_type type;
910         int neigh;
911         int my_ix = pbase_tree_cache_ix(sha1);
912         int available_ix = -1;
914         /* pbase-tree-cache acts as a limited hashtable.
915          * your object will be found at your index or within a few
916          * slots after that slot if it is cached.
917          */
918         for (neigh = 0; neigh < 8; neigh++) {
919                 ent = pbase_tree_cache[my_ix];
920                 if (ent && !hashcmp(ent->sha1, sha1)) {
921                         ent->ref++;
922                         return ent;
923                 }
924                 else if (((available_ix < 0) && (!ent || !ent->ref)) ||
925                          ((0 <= available_ix) &&
926                           (!ent && pbase_tree_cache[available_ix])))
927                         available_ix = my_ix;
928                 if (!ent)
929                         break;
930                 my_ix = pbase_tree_cache_ix_incr(my_ix);
931         }
933         /* Did not find one.  Either we got a bogus request or
934          * we need to read and perhaps cache.
935          */
936         data = read_sha1_file(sha1, &type, &size);
937         if (!data)
938                 return NULL;
939         if (type != OBJ_TREE) {
940                 free(data);
941                 return NULL;
942         }
944         /* We need to either cache or return a throwaway copy */
946         if (available_ix < 0)
947                 ent = NULL;
948         else {
949                 ent = pbase_tree_cache[available_ix];
950                 my_ix = available_ix;
951         }
953         if (!ent) {
954                 nent = xmalloc(sizeof(*nent));
955                 nent->temporary = (available_ix < 0);
956         }
957         else {
958                 /* evict and reuse */
959                 free(ent->tree_data);
960                 nent = ent;
961         }
962         hashcpy(nent->sha1, sha1);
963         nent->tree_data = data;
964         nent->tree_size = size;
965         nent->ref = 1;
966         if (!nent->temporary)
967                 pbase_tree_cache[my_ix] = nent;
968         return nent;
971 static void pbase_tree_put(struct pbase_tree_cache *cache)
973         if (!cache->temporary) {
974                 cache->ref--;
975                 return;
976         }
977         free(cache->tree_data);
978         free(cache);
981 static int name_cmp_len(const char *name)
983         int i;
984         for (i = 0; name[i] && name[i] != '\n' && name[i] != '/'; i++)
985                 ;
986         return i;
989 static void add_pbase_object(struct tree_desc *tree,
990                              const char *name,
991                              int cmplen,
992                              const char *fullname)
994         struct name_entry entry;
995         int cmp;
997         while (tree_entry(tree,&entry)) {
998                 cmp = tree_entry_len(entry.path, entry.sha1) != cmplen ? 1 :
999                       memcmp(name, entry.path, cmplen);
1000                 if (cmp > 0)
1001                         continue;
1002                 if (cmp < 0)
1003                         return;
1004                 if (name[cmplen] != '/') {
1005                         unsigned hash = name_hash(fullname);
1006                         add_object_entry(entry.sha1,
1007                                          S_ISDIR(entry.mode) ? OBJ_TREE : OBJ_BLOB,
1008                                          hash, 1);
1009                         return;
1010                 }
1011                 if (S_ISDIR(entry.mode)) {
1012                         struct tree_desc sub;
1013                         struct pbase_tree_cache *tree;
1014                         const char *down = name+cmplen+1;
1015                         int downlen = name_cmp_len(down);
1017                         tree = pbase_tree_get(entry.sha1);
1018                         if (!tree)
1019                                 return;
1020                         init_tree_desc(&sub, tree->tree_data, tree->tree_size);
1022                         add_pbase_object(&sub, down, downlen, fullname);
1023                         pbase_tree_put(tree);
1024                 }
1025         }
1028 static unsigned *done_pbase_paths;
1029 static int done_pbase_paths_num;
1030 static int done_pbase_paths_alloc;
1031 static int done_pbase_path_pos(unsigned hash)
1033         int lo = 0;
1034         int hi = done_pbase_paths_num;
1035         while (lo < hi) {
1036                 int mi = (hi + lo) / 2;
1037                 if (done_pbase_paths[mi] == hash)
1038                         return mi;
1039                 if (done_pbase_paths[mi] < hash)
1040                         hi = mi;
1041                 else
1042                         lo = mi + 1;
1043         }
1044         return -lo-1;
1047 static int check_pbase_path(unsigned hash)
1049         int pos = (!done_pbase_paths) ? -1 : done_pbase_path_pos(hash);
1050         if (0 <= pos)
1051                 return 1;
1052         pos = -pos - 1;
1053         if (done_pbase_paths_alloc <= done_pbase_paths_num) {
1054                 done_pbase_paths_alloc = alloc_nr(done_pbase_paths_alloc);
1055                 done_pbase_paths = xrealloc(done_pbase_paths,
1056                                             done_pbase_paths_alloc *
1057                                             sizeof(unsigned));
1058         }
1059         done_pbase_paths_num++;
1060         if (pos < done_pbase_paths_num)
1061                 memmove(done_pbase_paths + pos + 1,
1062                         done_pbase_paths + pos,
1063                         (done_pbase_paths_num - pos - 1) * sizeof(unsigned));
1064         done_pbase_paths[pos] = hash;
1065         return 0;
1068 static void add_preferred_base_object(const char *name, unsigned hash)
1070         struct pbase_tree *it;
1071         int cmplen;
1073         if (!num_preferred_base || check_pbase_path(hash))
1074                 return;
1076         cmplen = name_cmp_len(name);
1077         for (it = pbase_tree; it; it = it->next) {
1078                 if (cmplen == 0) {
1079                         add_object_entry(it->pcache.sha1, OBJ_TREE, 0, 1);
1080                 }
1081                 else {
1082                         struct tree_desc tree;
1083                         init_tree_desc(&tree, it->pcache.tree_data, it->pcache.tree_size);
1084                         add_pbase_object(&tree, name, cmplen, name);
1085                 }
1086         }
1089 static void add_preferred_base(unsigned char *sha1)
1091         struct pbase_tree *it;
1092         void *data;
1093         unsigned long size;
1094         unsigned char tree_sha1[20];
1096         if (window <= num_preferred_base++)
1097                 return;
1099         data = read_object_with_reference(sha1, tree_type, &size, tree_sha1);
1100         if (!data)
1101                 return;
1103         for (it = pbase_tree; it; it = it->next) {
1104                 if (!hashcmp(it->pcache.sha1, tree_sha1)) {
1105                         free(data);
1106                         return;
1107                 }
1108         }
1110         it = xcalloc(1, sizeof(*it));
1111         it->next = pbase_tree;
1112         pbase_tree = it;
1114         hashcpy(it->pcache.sha1, tree_sha1);
1115         it->pcache.tree_data = data;
1116         it->pcache.tree_size = size;
1119 static void check_object(struct object_entry *entry)
1121         if (entry->in_pack) {
1122                 struct packed_git *p = entry->in_pack;
1123                 struct pack_window *w_curs = NULL;
1124                 const unsigned char *base_ref = NULL;
1125                 struct object_entry *base_entry;
1126                 unsigned long used, used_0;
1127                 unsigned int avail;
1128                 off_t ofs;
1129                 unsigned char *buf, c;
1131                 buf = use_pack(p, &w_curs, entry->in_pack_offset, &avail);
1133                 /*
1134                  * We want in_pack_type even if we do not reuse delta.
1135                  * There is no point not reusing non-delta representations.
1136                  */
1137                 used = unpack_object_header_gently(buf, avail,
1138                                                    &entry->in_pack_type,
1139                                                    &entry->size);
1141                 /*
1142                  * Determine if this is a delta and if so whether we can
1143                  * reuse it or not.  Otherwise let's find out as cheaply as
1144                  * possible what the actual type and size for this object is.
1145                  */
1146                 switch (entry->in_pack_type) {
1147                 default:
1148                         /* Not a delta hence we've already got all we need. */
1149                         entry->type = entry->in_pack_type;
1150                         entry->in_pack_header_size = used;
1151                         unuse_pack(&w_curs);
1152                         return;
1153                 case OBJ_REF_DELTA:
1154                         if (!no_reuse_delta && !entry->preferred_base)
1155                                 base_ref = use_pack(p, &w_curs,
1156                                                 entry->in_pack_offset + used, NULL);
1157                         entry->in_pack_header_size = used + 20;
1158                         break;
1159                 case OBJ_OFS_DELTA:
1160                         buf = use_pack(p, &w_curs,
1161                                        entry->in_pack_offset + used, NULL);
1162                         used_0 = 0;
1163                         c = buf[used_0++];
1164                         ofs = c & 127;
1165                         while (c & 128) {
1166                                 ofs += 1;
1167                                 if (!ofs || MSB(ofs, 7))
1168                                         die("delta base offset overflow in pack for %s",
1169                                             sha1_to_hex(entry->sha1));
1170                                 c = buf[used_0++];
1171                                 ofs = (ofs << 7) + (c & 127);
1172                         }
1173                         if (ofs >= entry->in_pack_offset)
1174                                 die("delta base offset out of bound for %s",
1175                                     sha1_to_hex(entry->sha1));
1176                         ofs = entry->in_pack_offset - ofs;
1177                         if (!no_reuse_delta && !entry->preferred_base)
1178                                 base_ref = find_packed_object_name(p, ofs);
1179                         entry->in_pack_header_size = used + used_0;
1180                         break;
1181                 }
1183                 if (base_ref && (base_entry = locate_object_entry(base_ref))) {
1184                         /*
1185                          * If base_ref was set above that means we wish to
1186                          * reuse delta data, and we even found that base
1187                          * in the list of objects we want to pack. Goodie!
1188                          *
1189                          * Depth value does not matter - find_deltas() will
1190                          * never consider reused delta as the base object to
1191                          * deltify other objects against, in order to avoid
1192                          * circular deltas.
1193                          */
1194                         entry->type = entry->in_pack_type;
1195                         entry->delta = base_entry;
1196                         entry->delta_sibling = base_entry->delta_child;
1197                         base_entry->delta_child = entry;
1198                         unuse_pack(&w_curs);
1199                         return;
1200                 }
1202                 if (entry->type) {
1203                         /*
1204                          * This must be a delta and we already know what the
1205                          * final object type is.  Let's extract the actual
1206                          * object size from the delta header.
1207                          */
1208                         entry->size = get_size_from_delta(p, &w_curs,
1209                                         entry->in_pack_offset + entry->in_pack_header_size);
1210                         unuse_pack(&w_curs);
1211                         return;
1212                 }
1214                 /*
1215                  * No choice but to fall back to the recursive delta walk
1216                  * with sha1_object_info() to find about the object type
1217                  * at this point...
1218                  */
1219                 unuse_pack(&w_curs);
1220         }
1222         entry->type = sha1_object_info(entry->sha1, &entry->size);
1223         if (entry->type < 0)
1224                 die("unable to get type of object %s",
1225                     sha1_to_hex(entry->sha1));
1228 static int pack_offset_sort(const void *_a, const void *_b)
1230         const struct object_entry *a = *(struct object_entry **)_a;
1231         const struct object_entry *b = *(struct object_entry **)_b;
1233         /* avoid filesystem trashing with loose objects */
1234         if (!a->in_pack && !b->in_pack)
1235                 return hashcmp(a->sha1, b->sha1);
1237         if (a->in_pack < b->in_pack)
1238                 return -1;
1239         if (a->in_pack > b->in_pack)
1240                 return 1;
1241         return a->in_pack_offset < b->in_pack_offset ? -1 :
1242                         (a->in_pack_offset > b->in_pack_offset);
1245 static void get_object_details(void)
1247         uint32_t i;
1248         struct object_entry **sorted_by_offset;
1250         sorted_by_offset = xcalloc(nr_objects, sizeof(struct object_entry *));
1251         for (i = 0; i < nr_objects; i++)
1252                 sorted_by_offset[i] = objects + i;
1253         qsort(sorted_by_offset, nr_objects, sizeof(*sorted_by_offset), pack_offset_sort);
1255         prepare_pack_ix();
1256         for (i = 0; i < nr_objects; i++)
1257                 check_object(sorted_by_offset[i]);
1258         free(sorted_by_offset);
1261 static int type_size_sort(const void *_a, const void *_b)
1263         const struct object_entry *a = *(struct object_entry **)_a;
1264         const struct object_entry *b = *(struct object_entry **)_b;
1266         if (a->type < b->type)
1267                 return -1;
1268         if (a->type > b->type)
1269                 return 1;
1270         if (a->hash < b->hash)
1271                 return -1;
1272         if (a->hash > b->hash)
1273                 return 1;
1274         if (a->preferred_base < b->preferred_base)
1275                 return -1;
1276         if (a->preferred_base > b->preferred_base)
1277                 return 1;
1278         if (a->size < b->size)
1279                 return -1;
1280         if (a->size > b->size)
1281                 return 1;
1282         return a > b ? -1 : (a < b);  /* newest last */
1285 struct unpacked {
1286         struct object_entry *entry;
1287         void *data;
1288         struct delta_index *index;
1289 };
1291 /*
1292  * We search for deltas _backwards_ in a list sorted by type and
1293  * by size, so that we see progressively smaller and smaller files.
1294  * That's because we prefer deltas to be from the bigger file
1295  * to the smaller - deletes are potentially cheaper, but perhaps
1296  * more importantly, the bigger file is likely the more recent
1297  * one.
1298  */
1299 static int try_delta(struct unpacked *trg, struct unpacked *src,
1300                      unsigned max_depth)
1302         struct object_entry *trg_entry = trg->entry;
1303         struct object_entry *src_entry = src->entry;
1304         unsigned long trg_size, src_size, delta_size, sizediff, max_size, sz;
1305         enum object_type type;
1306         void *delta_buf;
1308         /* Don't bother doing diffs between different types */
1309         if (trg_entry->type != src_entry->type)
1310                 return -1;
1312         /* We do not compute delta to *create* objects we are not
1313          * going to pack.
1314          */
1315         if (trg_entry->preferred_base)
1316                 return -1;
1318         /*
1319          * We do not bother to try a delta that we discarded
1320          * on an earlier try, but only when reusing delta data.
1321          */
1322         if (!no_reuse_delta && trg_entry->in_pack &&
1323             trg_entry->in_pack == src_entry->in_pack &&
1324             trg_entry->in_pack_type != OBJ_REF_DELTA &&
1325             trg_entry->in_pack_type != OBJ_OFS_DELTA)
1326                 return 0;
1328         /* Let's not bust the allowed depth. */
1329         if (src_entry->depth >= max_depth)
1330                 return 0;
1332         /* Now some size filtering heuristics. */
1333         trg_size = trg_entry->size;
1334         max_size = trg_size/2 - 20;
1335         max_size = max_size * (max_depth - src_entry->depth) / max_depth;
1336         if (max_size == 0)
1337                 return 0;
1338         if (trg_entry->delta && trg_entry->delta_size <= max_size)
1339                 max_size = trg_entry->delta_size-1;
1340         src_size = src_entry->size;
1341         sizediff = src_size < trg_size ? trg_size - src_size : 0;
1342         if (sizediff >= max_size)
1343                 return 0;
1345         /* Load data if not already done */
1346         if (!trg->data) {
1347                 trg->data = read_sha1_file(trg_entry->sha1, &type, &sz);
1348                 if (sz != trg_size)
1349                         die("object %s inconsistent object length (%lu vs %lu)",
1350                             sha1_to_hex(trg_entry->sha1), sz, trg_size);
1351         }
1352         if (!src->data) {
1353                 src->data = read_sha1_file(src_entry->sha1, &type, &sz);
1354                 if (sz != src_size)
1355                         die("object %s inconsistent object length (%lu vs %lu)",
1356                             sha1_to_hex(src_entry->sha1), sz, src_size);
1357         }
1358         if (!src->index) {
1359                 src->index = create_delta_index(src->data, src_size);
1360                 if (!src->index)
1361                         die("out of memory");
1362         }
1364         delta_buf = create_delta(src->index, trg->data, trg_size, &delta_size, max_size);
1365         if (!delta_buf)
1366                 return 0;
1368         trg_entry->delta = src_entry;
1369         trg_entry->delta_size = delta_size;
1370         trg_entry->depth = src_entry->depth + 1;
1371         free(delta_buf);
1372         return 1;
1375 static unsigned int check_delta_limit(struct object_entry *me, unsigned int n)
1377         struct object_entry *child = me->delta_child;
1378         unsigned int m = n;
1379         while (child) {
1380                 unsigned int c = check_delta_limit(child, n + 1);
1381                 if (m < c)
1382                         m = c;
1383                 child = child->delta_sibling;
1384         }
1385         return m;
1388 static void find_deltas(struct object_entry **list, int window, int depth)
1390         uint32_t i = nr_objects, idx = 0, processed = 0;
1391         unsigned int array_size = window * sizeof(struct unpacked);
1392         struct unpacked *array;
1393         unsigned last_percent = 999;
1394         int max_depth;
1396         if (!nr_objects)
1397                 return;
1398         array = xmalloc(array_size);
1399         memset(array, 0, array_size);
1400         if (progress)
1401                 fprintf(stderr, "Deltifying %u objects.\n", nr_result);
1403         do {
1404                 struct object_entry *entry = list[--i];
1405                 struct unpacked *n = array + idx;
1406                 int j;
1408                 if (!entry->preferred_base)
1409                         processed++;
1411                 if (progress) {
1412                         unsigned percent = processed * 100 / nr_result;
1413                         if (percent != last_percent || progress_update) {
1414                                 fprintf(stderr, "%4u%% (%u/%u) done\r",
1415                                         percent, processed, nr_result);
1416                                 progress_update = 0;
1417                                 last_percent = percent;
1418                         }
1419                 }
1421                 if (entry->delta)
1422                         /* This happens if we decided to reuse existing
1423                          * delta from a pack.  "!no_reuse_delta &&" is implied.
1424                          */
1425                         continue;
1427                 if (entry->size < 50)
1428                         continue;
1429                 free_delta_index(n->index);
1430                 n->index = NULL;
1431                 free(n->data);
1432                 n->data = NULL;
1433                 n->entry = entry;
1435                 /*
1436                  * If the current object is at pack edge, take the depth the
1437                  * objects that depend on the current object into account
1438                  * otherwise they would become too deep.
1439                  */
1440                 max_depth = depth;
1441                 if (entry->delta_child) {
1442                         max_depth -= check_delta_limit(entry, 0);
1443                         if (max_depth <= 0)
1444                                 goto next;
1445                 }
1447                 j = window;
1448                 while (--j > 0) {
1449                         uint32_t other_idx = idx + j;
1450                         struct unpacked *m;
1451                         if (other_idx >= window)
1452                                 other_idx -= window;
1453                         m = array + other_idx;
1454                         if (!m->entry)
1455                                 break;
1456                         if (try_delta(n, m, max_depth) < 0)
1457                                 break;
1458                 }
1460                 /* if we made n a delta, and if n is already at max
1461                  * depth, leaving it in the window is pointless.  we
1462                  * should evict it first.
1463                  */
1464                 if (entry->delta && depth <= entry->depth)
1465                         continue;
1467                 next:
1468                 idx++;
1469                 if (idx >= window)
1470                         idx = 0;
1471         } while (i > 0);
1473         if (progress)
1474                 fputc('\n', stderr);
1476         for (i = 0; i < window; ++i) {
1477                 free_delta_index(array[i].index);
1478                 free(array[i].data);
1479         }
1480         free(array);
1483 static void prepare_pack(int window, int depth)
1485         struct object_entry **delta_list;
1486         uint32_t i;
1488         get_object_details();
1490         if (!window || !depth)
1491                 return;
1493         delta_list = xmalloc(nr_objects * sizeof(*delta_list));
1494         for (i = 0; i < nr_objects; i++)
1495                 delta_list[i] = objects + i;
1496         qsort(delta_list, nr_objects, sizeof(*delta_list), type_size_sort);
1497         find_deltas(delta_list, window+1, depth);
1498         free(delta_list);
1501 static void progress_interval(int signum)
1503         progress_update = 1;
1506 static void setup_progress_signal(void)
1508         struct sigaction sa;
1509         struct itimerval v;
1511         memset(&sa, 0, sizeof(sa));
1512         sa.sa_handler = progress_interval;
1513         sigemptyset(&sa.sa_mask);
1514         sa.sa_flags = SA_RESTART;
1515         sigaction(SIGALRM, &sa, NULL);
1517         v.it_interval.tv_sec = 1;
1518         v.it_interval.tv_usec = 0;
1519         v.it_value = v.it_interval;
1520         setitimer(ITIMER_REAL, &v, NULL);
1523 static int git_pack_config(const char *k, const char *v)
1525         if(!strcmp(k, "pack.window")) {
1526                 window = git_config_int(k, v);
1527                 return 0;
1528         }
1529         return git_default_config(k, v);
1532 static void read_object_list_from_stdin(void)
1534         char line[40 + 1 + PATH_MAX + 2];
1535         unsigned char sha1[20];
1536         unsigned hash;
1538         for (;;) {
1539                 if (!fgets(line, sizeof(line), stdin)) {
1540                         if (feof(stdin))
1541                                 break;
1542                         if (!ferror(stdin))
1543                                 die("fgets returned NULL, not EOF, not error!");
1544                         if (errno != EINTR)
1545                                 die("fgets: %s", strerror(errno));
1546                         clearerr(stdin);
1547                         continue;
1548                 }
1549                 if (line[0] == '-') {
1550                         if (get_sha1_hex(line+1, sha1))
1551                                 die("expected edge sha1, got garbage:\n %s",
1552                                     line);
1553                         add_preferred_base(sha1);
1554                         continue;
1555                 }
1556                 if (get_sha1_hex(line, sha1))
1557                         die("expected sha1, got garbage:\n %s", line);
1559                 hash = name_hash(line+41);
1560                 add_preferred_base_object(line+41, hash);
1561                 add_object_entry(sha1, 0, hash, 0);
1562         }
1565 static void show_commit(struct commit *commit)
1567         add_object_entry(commit->object.sha1, OBJ_COMMIT, 0, 0);
1570 static void show_object(struct object_array_entry *p)
1572         unsigned hash = name_hash(p->name);
1573         add_preferred_base_object(p->name, hash);
1574         add_object_entry(p->item->sha1, p->item->type, hash, 0);
1577 static void show_edge(struct commit *commit)
1579         add_preferred_base(commit->object.sha1);
1582 static void get_object_list(int ac, const char **av)
1584         struct rev_info revs;
1585         char line[1000];
1586         int flags = 0;
1588         init_revisions(&revs, NULL);
1589         save_commit_buffer = 0;
1590         track_object_refs = 0;
1591         setup_revisions(ac, av, &revs, NULL);
1593         while (fgets(line, sizeof(line), stdin) != NULL) {
1594                 int len = strlen(line);
1595                 if (line[len - 1] == '\n')
1596                         line[--len] = 0;
1597                 if (!len)
1598                         break;
1599                 if (*line == '-') {
1600                         if (!strcmp(line, "--not")) {
1601                                 flags ^= UNINTERESTING;
1602                                 continue;
1603                         }
1604                         die("not a rev '%s'", line);
1605                 }
1606                 if (handle_revision_arg(line, &revs, flags, 1))
1607                         die("bad revision '%s'", line);
1608         }
1610         prepare_revision_walk(&revs);
1611         mark_edges_uninteresting(revs.commits, &revs, show_edge);
1612         traverse_commit_list(&revs, show_commit, show_object);
1615 int cmd_pack_objects(int argc, const char **argv, const char *prefix)
1617         int depth = 10;
1618         int use_internal_rev_list = 0;
1619         int thin = 0;
1620         uint32_t i;
1621         off_t last_obj_offset;
1622         const char *base_name = NULL;
1623         const char **rp_av;
1624         int rp_ac_alloc = 64;
1625         int rp_ac;
1627         rp_av = xcalloc(rp_ac_alloc, sizeof(*rp_av));
1629         rp_av[0] = "pack-objects";
1630         rp_av[1] = "--objects"; /* --thin will make it --objects-edge */
1631         rp_ac = 2;
1633         git_config(git_pack_config);
1635         progress = isatty(2);
1636         for (i = 1; i < argc; i++) {
1637                 const char *arg = argv[i];
1639                 if (*arg != '-')
1640                         break;
1642                 if (!strcmp("--non-empty", arg)) {
1643                         non_empty = 1;
1644                         continue;
1645                 }
1646                 if (!strcmp("--local", arg)) {
1647                         local = 1;
1648                         continue;
1649                 }
1650                 if (!strcmp("--incremental", arg)) {
1651                         incremental = 1;
1652                         continue;
1653                 }
1654                 if (!prefixcmp(arg, "--window=")) {
1655                         char *end;
1656                         window = strtoul(arg+9, &end, 0);
1657                         if (!arg[9] || *end)
1658                                 usage(pack_usage);
1659                         continue;
1660                 }
1661                 if (!prefixcmp(arg, "--depth=")) {
1662                         char *end;
1663                         depth = strtoul(arg+8, &end, 0);
1664                         if (!arg[8] || *end)
1665                                 usage(pack_usage);
1666                         continue;
1667                 }
1668                 if (!strcmp("--progress", arg)) {
1669                         progress = 1;
1670                         continue;
1671                 }
1672                 if (!strcmp("--all-progress", arg)) {
1673                         progress = 2;
1674                         continue;
1675                 }
1676                 if (!strcmp("-q", arg)) {
1677                         progress = 0;
1678                         continue;
1679                 }
1680                 if (!strcmp("--no-reuse-delta", arg)) {
1681                         no_reuse_delta = 1;
1682                         continue;
1683                 }
1684                 if (!strcmp("--delta-base-offset", arg)) {
1685                         allow_ofs_delta = 1;
1686                         continue;
1687                 }
1688                 if (!strcmp("--stdout", arg)) {
1689                         pack_to_stdout = 1;
1690                         continue;
1691                 }
1692                 if (!strcmp("--revs", arg)) {
1693                         use_internal_rev_list = 1;
1694                         continue;
1695                 }
1696                 if (!strcmp("--unpacked", arg) ||
1697                     !prefixcmp(arg, "--unpacked=") ||
1698                     !strcmp("--reflog", arg) ||
1699                     !strcmp("--all", arg)) {
1700                         use_internal_rev_list = 1;
1701                         if (rp_ac >= rp_ac_alloc - 1) {
1702                                 rp_ac_alloc = alloc_nr(rp_ac_alloc);
1703                                 rp_av = xrealloc(rp_av,
1704                                                  rp_ac_alloc * sizeof(*rp_av));
1705                         }
1706                         rp_av[rp_ac++] = arg;
1707                         continue;
1708                 }
1709                 if (!strcmp("--thin", arg)) {
1710                         use_internal_rev_list = 1;
1711                         thin = 1;
1712                         rp_av[1] = "--objects-edge";
1713                         continue;
1714                 }
1715                 if (!prefixcmp(arg, "--index-version=")) {
1716                         char *c;
1717                         index_default_version = strtoul(arg + 16, &c, 10);
1718                         if (index_default_version > 2)
1719                                 die("bad %s", arg);
1720                         if (*c == ',')
1721                                 index_off32_limit = strtoul(c+1, &c, 0);
1722                         if (*c || index_off32_limit & 0x80000000)
1723                                 die("bad %s", arg);
1724                         continue;
1725                 }
1726                 usage(pack_usage);
1727         }
1729         /* Traditionally "pack-objects [options] base extra" failed;
1730          * we would however want to take refs parameter that would
1731          * have been given to upstream rev-list ourselves, which means
1732          * we somehow want to say what the base name is.  So the
1733          * syntax would be:
1734          *
1735          * pack-objects [options] base <refs...>
1736          *
1737          * in other words, we would treat the first non-option as the
1738          * base_name and send everything else to the internal revision
1739          * walker.
1740          */
1742         if (!pack_to_stdout)
1743                 base_name = argv[i++];
1745         if (pack_to_stdout != !base_name)
1746                 usage(pack_usage);
1748         if (!pack_to_stdout && thin)
1749                 die("--thin cannot be used to build an indexable pack.");
1751         prepare_packed_git();
1753         if (progress) {
1754                 fprintf(stderr, "Generating pack...\n");
1755                 setup_progress_signal();
1756         }
1758         if (!use_internal_rev_list)
1759                 read_object_list_from_stdin();
1760         else {
1761                 rp_av[rp_ac] = NULL;
1762                 get_object_list(rp_ac, rp_av);
1763         }
1765         if (progress)
1766                 fprintf(stderr, "Done counting %u objects.\n", nr_objects);
1767         if (non_empty && !nr_result)
1768                 return 0;
1769         if (progress && (nr_objects != nr_result))
1770                 fprintf(stderr, "Result has %u objects.\n", nr_result);
1771         if (nr_result)
1772                 prepare_pack(window, depth);
1773         if (progress == 1 && pack_to_stdout) {
1774                 /* the other end usually displays progress itself */
1775                 struct itimerval v = {{0,},};
1776                 setitimer(ITIMER_REAL, &v, NULL);
1777                 signal(SIGALRM, SIG_IGN );
1778                 progress_update = 0;
1779         }
1780         last_obj_offset = write_pack_file();
1781         if (!pack_to_stdout) {
1782                 unsigned char object_list_sha1[20];
1783                 write_index_file(last_obj_offset, object_list_sha1);
1784                 snprintf(tmpname, sizeof(tmpname), "%s-%s.pack",
1785                          base_name, sha1_to_hex(object_list_sha1));
1786                 if (rename(pack_tmp_name, tmpname))
1787                         die("unable to rename temporary pack file: %s",
1788                             strerror(errno));
1789                 snprintf(tmpname, sizeof(tmpname), "%s-%s.idx",
1790                          base_name, sha1_to_hex(object_list_sha1));
1791                 if (rename(idx_tmp_name, tmpname))
1792                         die("unable to rename temporary index file: %s",
1793                             strerror(errno));
1794                 puts(sha1_to_hex(object_list_sha1));
1795         }
1796         if (progress)
1797                 fprintf(stderr, "Total %u (delta %u), reused %u (delta %u)\n",
1798                         written, written_delta, reused, reused_delta);
1799         return 0;