Code

Merge branch 'jn/diffstat-tests'
[git.git] / sha1_file.c
index 88f2151ff31870138a53e40f99f5ae9928b09545..ad314f08b9abd9a16b410483f9a9629ce59345cf 100644 (file)
@@ -19,6 +19,7 @@
 #include "pack-revindex.h"
 #include "sha1-lookup.h"
 #include "bulk-checkin.h"
+#include "streaming.h"
 
 #ifndef O_NOATIME
 #if defined(__linux__) && (defined(__i386__) || defined(__PPC__))
@@ -54,6 +55,8 @@ static struct cached_object empty_tree = {
        0
 };
 
+static struct packed_git *last_found_pack;
+
 static struct cached_object *find_cached_object(const unsigned char *sha1)
 {
        int i;
@@ -720,6 +723,8 @@ void free_pack_by_name(const char *pack_name)
                        close_pack_index(p);
                        free(p->bad_object_sha1);
                        *pp = p->next;
+                       if (last_found_pack == p)
+                               last_found_pack = NULL;
                        free(p);
                        return;
                }
@@ -1142,10 +1147,47 @@ static const struct packed_git *has_packed_and_bad(const unsigned char *sha1)
        return NULL;
 }
 
-int check_sha1_signature(const unsigned char *sha1, void *map, unsigned long size, const char *type)
+/*
+ * With an in-core object data in "map", rehash it to make sure the
+ * object name actually matches "sha1" to detect object corruption.
+ * With "map" == NULL, try reading the object named with "sha1" using
+ * the streaming interface and rehash it to do the same.
+ */
+int check_sha1_signature(const unsigned char *sha1, void *map,
+                        unsigned long size, const char *type)
 {
        unsigned char real_sha1[20];
-       hash_sha1_file(map, size, type, real_sha1);
+       enum object_type obj_type;
+       struct git_istream *st;
+       git_SHA_CTX c;
+       char hdr[32];
+       int hdrlen;
+
+       if (map) {
+               hash_sha1_file(map, size, type, real_sha1);
+               return hashcmp(sha1, real_sha1) ? -1 : 0;
+       }
+
+       st = open_istream(sha1, &obj_type, &size, NULL);
+       if (!st)
+               return -1;
+
+       /* Generate the header */
+       hdrlen = sprintf(hdr, "%s %lu", typename(obj_type), size) + 1;
+
+       /* Sha1.. */
+       git_SHA1_Init(&c);
+       git_SHA1_Update(&c, hdr, hdrlen);
+       for (;;) {
+               char buf[1024 * 16];
+               ssize_t readlen = read_istream(st, buf, sizeof(buf));
+
+               if (!readlen)
+                       break;
+               git_SHA1_Update(&c, buf, readlen);
+       }
+       git_SHA1_Final(real_sha1, &c);
+       close_istream(st);
        return hashcmp(sha1, real_sha1) ? -1 : 0;
 }
 
@@ -1202,6 +1244,11 @@ void *map_sha1_file(const unsigned char *sha1, unsigned long *size)
 
                if (!fstat(fd, &st)) {
                        *size = xsize_t(st.st_size);
+                       if (!*size) {
+                               /* mmap() is forbidden on empty files */
+                               error("object file %s is empty", sha1_file_name(sha1));
+                               return NULL;
+                       }
                        map = xmmap(NULL, *size, PROT_READ, MAP_PRIVATE, fd, 0);
                }
                close(fd);
@@ -2010,54 +2057,58 @@ int is_pack_valid(struct packed_git *p)
        return !open_packed_git(p);
 }
 
+static int fill_pack_entry(const unsigned char *sha1,
+                          struct pack_entry *e,
+                          struct packed_git *p)
+{
+       off_t offset;
+
+       if (p->num_bad_objects) {
+               unsigned i;
+               for (i = 0; i < p->num_bad_objects; i++)
+                       if (!hashcmp(sha1, p->bad_object_sha1 + 20 * i))
+                               return 0;
+       }
+
+       offset = find_pack_entry_one(sha1, p);
+       if (!offset)
+               return 0;
+
+       /*
+        * We are about to tell the caller where they can locate the
+        * requested object.  We better make sure the packfile is
+        * still here and can be accessed before supplying that
+        * answer, as it may have been deleted since the index was
+        * loaded!
+        */
+       if (!is_pack_valid(p)) {
+               warning("packfile %s cannot be accessed", p->pack_name);
+               return 0;
+       }
+       e->offset = offset;
+       e->p = p;
+       hashcpy(e->sha1, sha1);
+       return 1;
+}
+
 static int find_pack_entry(const unsigned char *sha1, struct pack_entry *e)
 {
-       static struct packed_git *last_found = (void *)1;
        struct packed_git *p;
-       off_t offset;
 
        prepare_packed_git();
        if (!packed_git)
                return 0;
-       p = (last_found == (void *)1) ? packed_git : last_found;
 
-       do {
-               if (p->num_bad_objects) {
-                       unsigned i;
-                       for (i = 0; i < p->num_bad_objects; i++)
-                               if (!hashcmp(sha1, p->bad_object_sha1 + 20 * i))
-                                       goto next;
-               }
+       if (last_found_pack && fill_pack_entry(sha1, e, last_found_pack))
+               return 1;
 
-               offset = find_pack_entry_one(sha1, p);
-               if (offset) {
-                       /*
-                        * We are about to tell the caller where they can
-                        * locate the requested object.  We better make
-                        * sure the packfile is still here and can be
-                        * accessed before supplying that answer, as
-                        * it may have been deleted since the index
-                        * was loaded!
-                        */
-                       if (!is_pack_valid(p)) {
-                               warning("packfile %s cannot be accessed", p->pack_name);
-                               goto next;
-                       }
-                       e->offset = offset;
-                       e->p = p;
-                       hashcpy(e->sha1, sha1);
-                       last_found = p;
-                       return 1;
-               }
+       for (p = packed_git; p; p = p->next) {
+               if (p == last_found_pack || !fill_pack_entry(sha1, e, p))
+                       continue;
 
-               next:
-               if (p == last_found)
-                       p = packed_git;
-               else
-                       p = p->next;
-               if (p == last_found)
-                       p = p->next;
-       } while (p);
+               last_found_pack = p;
+               return 1;
+       }
        return 0;
 }
 
@@ -2687,10 +2738,13 @@ static int index_core(unsigned char *sha1, int fd, size_t size,
  * This also bypasses the usual "convert-to-git" dance, and that is on
  * purpose. We could write a streaming version of the converting
  * functions and insert that before feeding the data to fast-import
- * (or equivalent in-core API described above), but the primary
- * motivation for trying to stream from the working tree file and to
- * avoid mmaping it in core is to deal with large binary blobs, and
- * by definition they do _not_ want to get any conversion.
+ * (or equivalent in-core API described above). However, that is
+ * somewhat complicated, as we do not know the size of the filter
+ * result, which we need to know beforehand when writing a git object.
+ * Since the primary motivation for trying to stream from the working
+ * tree file and to avoid mmaping it in core is to deal with large
+ * binary blobs, they generally do not want to get any conversion, and
+ * callers should avoid this code path when filters are requested.
  */
 static int index_stream(unsigned char *sha1, int fd, size_t size,
                        enum object_type type, const char *path,
@@ -2707,7 +2761,8 @@ int index_fd(unsigned char *sha1, int fd, struct stat *st,
 
        if (!S_ISREG(st->st_mode))
                ret = index_pipe(sha1, fd, type, path, flags);
-       else if (size <= big_file_threshold || type != OBJ_BLOB)
+       else if (size <= big_file_threshold || type != OBJ_BLOB ||
+                (path && would_convert_to_git(path, NULL, 0, 0)))
                ret = index_core(sha1, fd, size, type, path, flags);
        else
                ret = index_stream(sha1, fd, size, type, path, flags);