Code

8f7a975f96334ed59c1df9f4c9d4d01edc05a63c
[git.git] / http-walker.c
1 #include "cache.h"
2 #include "commit.h"
3 #include "walker.h"
4 #include "http.h"
6 #define PREV_BUF_SIZE 4096
8 struct alt_base
9 {
10         char *base;
11         int got_indices;
12         struct packed_git *packs;
13         struct alt_base *next;
14 };
16 enum object_request_state {
17         WAITING,
18         ABORTED,
19         ACTIVE,
20         COMPLETE,
21 };
23 struct object_request
24 {
25         struct walker *walker;
26         unsigned char sha1[20];
27         struct alt_base *repo;
28         char *url;
29         char filename[PATH_MAX];
30         char tmpfile[PATH_MAX];
31         int local;
32         enum object_request_state state;
33         CURLcode curl_result;
34         char errorstr[CURL_ERROR_SIZE];
35         long http_code;
36         unsigned char real_sha1[20];
37         git_SHA_CTX c;
38         z_stream stream;
39         int zret;
40         int rename;
41         struct active_request_slot *slot;
42         struct object_request *next;
43 };
45 struct alternates_request {
46         struct walker *walker;
47         const char *base;
48         char *url;
49         struct strbuf *buffer;
50         struct active_request_slot *slot;
51         int http_specific;
52 };
54 struct walker_data {
55         const char *url;
56         int got_alternates;
57         struct alt_base *alt;
58 };
60 static struct object_request *object_queue_head;
62 static size_t fwrite_sha1_file(void *ptr, size_t eltsize, size_t nmemb,
63                                void *data)
64 {
65         unsigned char expn[4096];
66         size_t size = eltsize * nmemb;
67         int posn = 0;
68         struct object_request *obj_req = (struct object_request *)data;
69         do {
70                 ssize_t retval = xwrite(obj_req->local,
71                                         (char *) ptr + posn, size - posn);
72                 if (retval < 0)
73                         return posn;
74                 posn += retval;
75         } while (posn < size);
77         obj_req->stream.avail_in = size;
78         obj_req->stream.next_in = ptr;
79         do {
80                 obj_req->stream.next_out = expn;
81                 obj_req->stream.avail_out = sizeof(expn);
82                 obj_req->zret = git_inflate(&obj_req->stream, Z_SYNC_FLUSH);
83                 git_SHA1_Update(&obj_req->c, expn,
84                                 sizeof(expn) - obj_req->stream.avail_out);
85         } while (obj_req->stream.avail_in && obj_req->zret == Z_OK);
86         data_received++;
87         return size;
88 }
90 static void fetch_alternates(struct walker *walker, const char *base);
92 static void process_object_response(void *callback_data);
94 static void start_object_request(struct walker *walker,
95                                  struct object_request *obj_req)
96 {
97         char *hex = sha1_to_hex(obj_req->sha1);
98         char prevfile[PATH_MAX];
99         char *url;
100         char *posn;
101         int prevlocal;
102         unsigned char prev_buf[PREV_BUF_SIZE];
103         ssize_t prev_read = 0;
104         long prev_posn = 0;
105         char range[RANGE_HEADER_SIZE];
106         struct curl_slist *range_header = NULL;
107         struct active_request_slot *slot;
109         snprintf(prevfile, sizeof(prevfile), "%s.prev", obj_req->filename);
110         unlink_or_warn(prevfile);
111         rename(obj_req->tmpfile, prevfile);
112         unlink_or_warn(obj_req->tmpfile);
114         if (obj_req->local != -1)
115                 error("fd leakage in start: %d", obj_req->local);
116         obj_req->local = open(obj_req->tmpfile,
117                               O_WRONLY | O_CREAT | O_EXCL, 0666);
118         /*
119          * This could have failed due to the "lazy directory creation";
120          * try to mkdir the last path component.
121          */
122         if (obj_req->local < 0 && errno == ENOENT) {
123                 char *dir = strrchr(obj_req->tmpfile, '/');
124                 if (dir) {
125                         *dir = 0;
126                         mkdir(obj_req->tmpfile, 0777);
127                         *dir = '/';
128                 }
129                 obj_req->local = open(obj_req->tmpfile,
130                                       O_WRONLY | O_CREAT | O_EXCL, 0666);
131         }
133         if (obj_req->local < 0) {
134                 obj_req->state = ABORTED;
135                 error("Couldn't create temporary file %s for %s: %s",
136                       obj_req->tmpfile, obj_req->filename, strerror(errno));
137                 return;
138         }
140         memset(&obj_req->stream, 0, sizeof(obj_req->stream));
142         git_inflate_init(&obj_req->stream);
144         git_SHA1_Init(&obj_req->c);
146         url = xmalloc(strlen(obj_req->repo->base) + 51);
147         obj_req->url = xmalloc(strlen(obj_req->repo->base) + 51);
148         strcpy(url, obj_req->repo->base);
149         posn = url + strlen(obj_req->repo->base);
150         strcpy(posn, "/objects/");
151         posn += 9;
152         memcpy(posn, hex, 2);
153         posn += 2;
154         *(posn++) = '/';
155         strcpy(posn, hex + 2);
156         strcpy(obj_req->url, url);
158         /*
159          * If a previous temp file is present, process what was already
160          * fetched.
161          */
162         prevlocal = open(prevfile, O_RDONLY);
163         if (prevlocal != -1) {
164                 do {
165                         prev_read = xread(prevlocal, prev_buf, PREV_BUF_SIZE);
166                         if (prev_read>0) {
167                                 if (fwrite_sha1_file(prev_buf,
168                                                      1,
169                                                      prev_read,
170                                                      obj_req) == prev_read)
171                                         prev_posn += prev_read;
172                                 else
173                                         prev_read = -1;
174                         }
175                 } while (prev_read > 0);
176                 close(prevlocal);
177         }
178         unlink_or_warn(prevfile);
180         /*
181          * Reset inflate/SHA1 if there was an error reading the previous temp
182          * file; also rewind to the beginning of the local file.
183          */
184         if (prev_read == -1) {
185                 memset(&obj_req->stream, 0, sizeof(obj_req->stream));
186                 git_inflate_init(&obj_req->stream);
187                 git_SHA1_Init(&obj_req->c);
188                 if (prev_posn>0) {
189                         prev_posn = 0;
190                         lseek(obj_req->local, 0, SEEK_SET);
191                         ftruncate(obj_req->local, 0);
192                 }
193         }
195         slot = get_active_slot();
196         slot->callback_func = process_object_response;
197         slot->callback_data = obj_req;
198         obj_req->slot = slot;
200         curl_easy_setopt(slot->curl, CURLOPT_FILE, obj_req);
201         curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, fwrite_sha1_file);
202         curl_easy_setopt(slot->curl, CURLOPT_ERRORBUFFER, obj_req->errorstr);
203         curl_easy_setopt(slot->curl, CURLOPT_URL, url);
204         curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, no_pragma_header);
206         /*
207          * If we have successfully processed data from a previous fetch
208          * attempt, only fetch the data we don't already have.
209          */
210         if (prev_posn>0) {
211                 if (walker->get_verbosely)
212                         fprintf(stderr,
213                                 "Resuming fetch of object %s at byte %ld\n",
214                                 hex, prev_posn);
215                 sprintf(range, "Range: bytes=%ld-", prev_posn);
216                 range_header = curl_slist_append(range_header, range);
217                 curl_easy_setopt(slot->curl,
218                                  CURLOPT_HTTPHEADER, range_header);
219         }
221         /* Try to get the request started, abort the request on error */
222         obj_req->state = ACTIVE;
223         if (!start_active_slot(slot)) {
224                 obj_req->state = ABORTED;
225                 obj_req->slot = NULL;
226                 close(obj_req->local);
227                 obj_req->local = -1;
228                 free(obj_req->url);
229                 return;
230         }
233 static void finish_object_request(struct object_request *obj_req)
235         struct stat st;
237         close(obj_req->local);
238         obj_req->local = -1;
240         if (obj_req->http_code == 416) {
241                 fprintf(stderr, "Warning: requested range invalid; we may already have all the data.\n");
242         } else if (obj_req->curl_result != CURLE_OK) {
243                 if (stat(obj_req->tmpfile, &st) == 0)
244                         if (st.st_size == 0)
245                                 unlink_or_warn(obj_req->tmpfile);
246                 return;
247         }
249         git_inflate_end(&obj_req->stream);
250         git_SHA1_Final(obj_req->real_sha1, &obj_req->c);
251         if (obj_req->zret != Z_STREAM_END) {
252                 unlink_or_warn(obj_req->tmpfile);
253                 return;
254         }
255         if (hashcmp(obj_req->sha1, obj_req->real_sha1)) {
256                 unlink_or_warn(obj_req->tmpfile);
257                 return;
258         }
259         obj_req->rename =
260                 move_temp_to_file(obj_req->tmpfile, obj_req->filename);
262         if (obj_req->rename == 0)
263                 walker_say(obj_req->walker, "got %s\n", sha1_to_hex(obj_req->sha1));
266 static void process_object_response(void *callback_data)
268         struct object_request *obj_req =
269                 (struct object_request *)callback_data;
270         struct walker *walker = obj_req->walker;
271         struct walker_data *data = walker->data;
272         struct alt_base *alt = data->alt;
274         obj_req->curl_result = obj_req->slot->curl_result;
275         obj_req->http_code = obj_req->slot->http_code;
276         obj_req->slot = NULL;
277         obj_req->state = COMPLETE;
279         /* Use alternates if necessary */
280         if (missing_target(obj_req)) {
281                 fetch_alternates(walker, alt->base);
282                 if (obj_req->repo->next != NULL) {
283                         obj_req->repo =
284                                 obj_req->repo->next;
285                         close(obj_req->local);
286                         obj_req->local = -1;
287                         start_object_request(walker, obj_req);
288                         return;
289                 }
290         }
292         finish_object_request(obj_req);
295 static void release_object_request(struct object_request *obj_req)
297         struct object_request *entry = object_queue_head;
299         if (obj_req->local != -1)
300                 error("fd leakage in release: %d", obj_req->local);
301         if (obj_req == object_queue_head) {
302                 object_queue_head = obj_req->next;
303         } else {
304                 while (entry->next != NULL && entry->next != obj_req)
305                         entry = entry->next;
306                 if (entry->next == obj_req)
307                         entry->next = entry->next->next;
308         }
310         free(obj_req->url);
311         free(obj_req);
314 #ifdef USE_CURL_MULTI
315 static int fill_active_slot(struct walker *walker)
317         struct object_request *obj_req;
319         for (obj_req = object_queue_head; obj_req; obj_req = obj_req->next) {
320                 if (obj_req->state == WAITING) {
321                         if (has_sha1_file(obj_req->sha1))
322                                 obj_req->state = COMPLETE;
323                         else {
324                                 start_object_request(walker, obj_req);
325                                 return 1;
326                         }
327                 }
328         }
329         return 0;
331 #endif
333 static void prefetch(struct walker *walker, unsigned char *sha1)
335         struct object_request *newreq;
336         struct object_request *tail;
337         struct walker_data *data = walker->data;
338         char *filename = sha1_file_name(sha1);
340         newreq = xmalloc(sizeof(*newreq));
341         newreq->walker = walker;
342         hashcpy(newreq->sha1, sha1);
343         newreq->repo = data->alt;
344         newreq->url = NULL;
345         newreq->local = -1;
346         newreq->state = WAITING;
347         snprintf(newreq->filename, sizeof(newreq->filename), "%s", filename);
348         snprintf(newreq->tmpfile, sizeof(newreq->tmpfile),
349                  "%s.temp", filename);
350         newreq->slot = NULL;
351         newreq->next = NULL;
353         http_is_verbose = walker->get_verbosely;
355         if (object_queue_head == NULL) {
356                 object_queue_head = newreq;
357         } else {
358                 tail = object_queue_head;
359                 while (tail->next != NULL)
360                         tail = tail->next;
361                 tail->next = newreq;
362         }
364 #ifdef USE_CURL_MULTI
365         fill_active_slots();
366         step_active_slots();
367 #endif
370 static void process_alternates_response(void *callback_data)
372         struct alternates_request *alt_req =
373                 (struct alternates_request *)callback_data;
374         struct walker *walker = alt_req->walker;
375         struct walker_data *cdata = walker->data;
376         struct active_request_slot *slot = alt_req->slot;
377         struct alt_base *tail = cdata->alt;
378         const char *base = alt_req->base;
379         static const char null_byte = '\0';
380         char *data;
381         int i = 0;
383         if (alt_req->http_specific) {
384                 if (slot->curl_result != CURLE_OK ||
385                     !alt_req->buffer->len) {
387                         /* Try reusing the slot to get non-http alternates */
388                         alt_req->http_specific = 0;
389                         sprintf(alt_req->url, "%s/objects/info/alternates",
390                                 base);
391                         curl_easy_setopt(slot->curl, CURLOPT_URL,
392                                          alt_req->url);
393                         active_requests++;
394                         slot->in_use = 1;
395                         if (slot->finished != NULL)
396                                 (*slot->finished) = 0;
397                         if (!start_active_slot(slot)) {
398                                 cdata->got_alternates = -1;
399                                 slot->in_use = 0;
400                                 if (slot->finished != NULL)
401                                         (*slot->finished) = 1;
402                         }
403                         return;
404                 }
405         } else if (slot->curl_result != CURLE_OK) {
406                 if (!missing_target(slot)) {
407                         cdata->got_alternates = -1;
408                         return;
409                 }
410         }
412         fwrite_buffer(&null_byte, 1, 1, alt_req->buffer);
413         alt_req->buffer->len--;
414         data = alt_req->buffer->buf;
416         while (i < alt_req->buffer->len) {
417                 int posn = i;
418                 while (posn < alt_req->buffer->len && data[posn] != '\n')
419                         posn++;
420                 if (data[posn] == '\n') {
421                         int okay = 0;
422                         int serverlen = 0;
423                         struct alt_base *newalt;
424                         char *target = NULL;
425                         if (data[i] == '/') {
426                                 /*
427                                  * This counts
428                                  * http://git.host/pub/scm/linux.git/
429                                  * -----------here^
430                                  * so memcpy(dst, base, serverlen) will
431                                  * copy up to "...git.host".
432                                  */
433                                 const char *colon_ss = strstr(base,"://");
434                                 if (colon_ss) {
435                                         serverlen = (strchr(colon_ss + 3, '/')
436                                                      - base);
437                                         okay = 1;
438                                 }
439                         } else if (!memcmp(data + i, "../", 3)) {
440                                 /*
441                                  * Relative URL; chop the corresponding
442                                  * number of subpath from base (and ../
443                                  * from data), and concatenate the result.
444                                  *
445                                  * The code first drops ../ from data, and
446                                  * then drops one ../ from data and one path
447                                  * from base.  IOW, one extra ../ is dropped
448                                  * from data than path is dropped from base.
449                                  *
450                                  * This is not wrong.  The alternate in
451                                  *     http://git.host/pub/scm/linux.git/
452                                  * to borrow from
453                                  *     http://git.host/pub/scm/linus.git/
454                                  * is ../../linus.git/objects/.  You need
455                                  * two ../../ to borrow from your direct
456                                  * neighbour.
457                                  */
458                                 i += 3;
459                                 serverlen = strlen(base);
460                                 while (i + 2 < posn &&
461                                        !memcmp(data + i, "../", 3)) {
462                                         do {
463                                                 serverlen--;
464                                         } while (serverlen &&
465                                                  base[serverlen - 1] != '/');
466                                         i += 3;
467                                 }
468                                 /* If the server got removed, give up. */
469                                 okay = strchr(base, ':') - base + 3 <
470                                        serverlen;
471                         } else if (alt_req->http_specific) {
472                                 char *colon = strchr(data + i, ':');
473                                 char *slash = strchr(data + i, '/');
474                                 if (colon && slash && colon < data + posn &&
475                                     slash < data + posn && colon < slash) {
476                                         okay = 1;
477                                 }
478                         }
479                         /* skip "objects\n" at end */
480                         if (okay) {
481                                 target = xmalloc(serverlen + posn - i - 6);
482                                 memcpy(target, base, serverlen);
483                                 memcpy(target + serverlen, data + i,
484                                        posn - i - 7);
485                                 target[serverlen + posn - i - 7] = 0;
486                                 if (walker->get_verbosely)
487                                         fprintf(stderr,
488                                                 "Also look at %s\n", target);
489                                 newalt = xmalloc(sizeof(*newalt));
490                                 newalt->next = NULL;
491                                 newalt->base = target;
492                                 newalt->got_indices = 0;
493                                 newalt->packs = NULL;
495                                 while (tail->next != NULL)
496                                         tail = tail->next;
497                                 tail->next = newalt;
498                         }
499                 }
500                 i = posn + 1;
501         }
503         cdata->got_alternates = 1;
506 static void fetch_alternates(struct walker *walker, const char *base)
508         struct strbuf buffer = STRBUF_INIT;
509         char *url;
510         struct active_request_slot *slot;
511         struct alternates_request alt_req;
512         struct walker_data *cdata = walker->data;
514         /*
515          * If another request has already started fetching alternates,
516          * wait for them to arrive and return to processing this request's
517          * curl message
518          */
519 #ifdef USE_CURL_MULTI
520         while (cdata->got_alternates == 0) {
521                 step_active_slots();
522         }
523 #endif
525         /* Nothing to do if they've already been fetched */
526         if (cdata->got_alternates == 1)
527                 return;
529         /* Start the fetch */
530         cdata->got_alternates = 0;
532         if (walker->get_verbosely)
533                 fprintf(stderr, "Getting alternates list for %s\n", base);
535         url = xmalloc(strlen(base) + 31);
536         sprintf(url, "%s/objects/info/http-alternates", base);
538         /*
539          * Use a callback to process the result, since another request
540          * may fail and need to have alternates loaded before continuing
541          */
542         slot = get_active_slot();
543         slot->callback_func = process_alternates_response;
544         alt_req.walker = walker;
545         slot->callback_data = &alt_req;
547         curl_easy_setopt(slot->curl, CURLOPT_FILE, &buffer);
548         curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, fwrite_buffer);
549         curl_easy_setopt(slot->curl, CURLOPT_URL, url);
551         alt_req.base = base;
552         alt_req.url = url;
553         alt_req.buffer = &buffer;
554         alt_req.http_specific = 1;
555         alt_req.slot = slot;
557         if (start_active_slot(slot))
558                 run_active_slot(slot);
559         else
560                 cdata->got_alternates = -1;
562         strbuf_release(&buffer);
563         free(url);
566 static int fetch_indices(struct walker *walker, struct alt_base *repo)
568         int ret;
570         if (repo->got_indices)
571                 return 0;
573         if (walker->get_verbosely)
574                 fprintf(stderr, "Getting pack list for %s\n", repo->base);
576         switch (http_get_info_packs(repo->base, &repo->packs)) {
577         case HTTP_OK:
578         case HTTP_MISSING_TARGET:
579                 repo->got_indices = 1;
580                 ret = 0;
581                 break;
582         default:
583                 repo->got_indices = 0;
584                 ret = -1;
585         }
587         return ret;
590 static int fetch_pack(struct walker *walker, struct alt_base *repo, unsigned char *sha1)
592         struct packed_git *target;
593         int ret;
594         struct slot_results results;
595         struct http_pack_request *preq;
597         if (fetch_indices(walker, repo))
598                 return -1;
599         target = find_sha1_pack(sha1, repo->packs);
600         if (!target)
601                 return -1;
603         if (walker->get_verbosely) {
604                 fprintf(stderr, "Getting pack %s\n",
605                         sha1_to_hex(target->sha1));
606                 fprintf(stderr, " which contains %s\n",
607                         sha1_to_hex(sha1));
608         }
610         preq = new_http_pack_request(target, repo->base);
611         if (preq == NULL)
612                 goto abort;
613         preq->lst = &repo->packs;
614         preq->slot->results = &results;
616         if (start_active_slot(preq->slot)) {
617                 run_active_slot(preq->slot);
618                 if (results.curl_result != CURLE_OK) {
619                         error("Unable to get pack file %s\n%s", preq->url,
620                               curl_errorstr);
621                         goto abort;
622                 }
623         } else {
624                 error("Unable to start request");
625                 goto abort;
626         }
628         ret = finish_http_pack_request(preq);
629         release_http_pack_request(preq);
630         if (ret)
631                 return ret;
633         return 0;
635 abort:
636         return -1;
639 static void abort_object_request(struct object_request *obj_req)
641         if (obj_req->local >= 0) {
642                 close(obj_req->local);
643                 obj_req->local = -1;
644         }
645         unlink_or_warn(obj_req->tmpfile);
646         if (obj_req->slot) {
647                 release_active_slot(obj_req->slot);
648                 obj_req->slot = NULL;
649         }
650         release_object_request(obj_req);
653 static int fetch_object(struct walker *walker, struct alt_base *repo, unsigned char *sha1)
655         char *hex = sha1_to_hex(sha1);
656         int ret = 0;
657         struct object_request *obj_req = object_queue_head;
659         while (obj_req != NULL && hashcmp(obj_req->sha1, sha1))
660                 obj_req = obj_req->next;
661         if (obj_req == NULL)
662                 return error("Couldn't find request for %s in the queue", hex);
664         if (has_sha1_file(obj_req->sha1)) {
665                 abort_object_request(obj_req);
666                 return 0;
667         }
669 #ifdef USE_CURL_MULTI
670         while (obj_req->state == WAITING)
671                 step_active_slots();
672 #else
673         start_object_request(walker, obj_req);
674 #endif
676         while (obj_req->state == ACTIVE)
677                 run_active_slot(obj_req->slot);
679         if (obj_req->local != -1) {
680                 close(obj_req->local);
681                 obj_req->local = -1;
682         }
684         if (obj_req->state == ABORTED) {
685                 ret = error("Request for %s aborted", hex);
686         } else if (obj_req->curl_result != CURLE_OK &&
687                    obj_req->http_code != 416) {
688                 if (missing_target(obj_req))
689                         ret = -1; /* Be silent, it is probably in a pack. */
690                 else
691                         ret = error("%s (curl_result = %d, http_code = %ld, sha1 = %s)",
692                                     obj_req->errorstr, obj_req->curl_result,
693                                     obj_req->http_code, hex);
694         } else if (obj_req->zret != Z_STREAM_END) {
695                 walker->corrupt_object_found++;
696                 ret = error("File %s (%s) corrupt", hex, obj_req->url);
697         } else if (hashcmp(obj_req->sha1, obj_req->real_sha1)) {
698                 ret = error("File %s has bad hash", hex);
699         } else if (obj_req->rename < 0) {
700                 ret = error("unable to write sha1 filename %s",
701                             obj_req->filename);
702         }
704         release_object_request(obj_req);
705         return ret;
708 static int fetch(struct walker *walker, unsigned char *sha1)
710         struct walker_data *data = walker->data;
711         struct alt_base *altbase = data->alt;
713         if (!fetch_object(walker, altbase, sha1))
714                 return 0;
715         while (altbase) {
716                 if (!fetch_pack(walker, altbase, sha1))
717                         return 0;
718                 fetch_alternates(walker, data->alt->base);
719                 altbase = altbase->next;
720         }
721         return error("Unable to find %s under %s", sha1_to_hex(sha1),
722                      data->alt->base);
725 static int fetch_ref(struct walker *walker, struct ref *ref)
727         struct walker_data *data = walker->data;
728         return http_fetch_ref(data->alt->base, ref);
731 static void cleanup(struct walker *walker)
733         http_cleanup();
736 struct walker *get_http_walker(const char *url, struct remote *remote)
738         char *s;
739         struct walker_data *data = xmalloc(sizeof(struct walker_data));
740         struct walker *walker = xmalloc(sizeof(struct walker));
742         http_init(remote);
744         data->alt = xmalloc(sizeof(*data->alt));
745         data->alt->base = xmalloc(strlen(url) + 1);
746         strcpy(data->alt->base, url);
747         for (s = data->alt->base + strlen(data->alt->base) - 1; *s == '/'; --s)
748                 *s = 0;
750         data->alt->got_indices = 0;
751         data->alt->packs = NULL;
752         data->alt->next = NULL;
753         data->got_alternates = -1;
755         walker->corrupt_object_found = 0;
756         walker->fetch = fetch;
757         walker->fetch_ref = fetch_ref;
758         walker->prefetch = prefetch;
759         walker->cleanup = cleanup;
760         walker->data = data;
762 #ifdef USE_CURL_MULTI
763         add_fill_function(walker, (int (*)(void *)) fill_active_slot);
764 #endif
766         return walker;