Code

[PATCH] Find size of SHA1 object without inflating everything.
authorJunio C Hamano <junkio@cox.net>
Thu, 2 Jun 2005 22:20:54 +0000 (15:20 -0700)
committerLinus Torvalds <torvalds@ppc970.osdl.org>
Thu, 2 Jun 2005 22:48:33 +0000 (15:48 -0700)
This adds sha1_file_size() helper function and uses it in the
rename/copy similarity estimator.  The helper function handles
deltified object as well.

Signed-off-by: Junio C Hamano <junkio@cox.net>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
cache.h
diff.c
sha1_file.c

diff --git a/cache.h b/cache.h
index e54176b4b572e7681ecbed1832b1b4ef4951d8bc..481f7c787040aadbbea877adbb3b9a4fd5f9b9d0 100644 (file)
--- a/cache.h
+++ b/cache.h
@@ -154,6 +154,7 @@ extern void * map_sha1_file(const unsigned char *sha1, unsigned long *size);
 extern int unpack_sha1_header(z_stream *stream, void *map, unsigned long mapsize, void *buffer, unsigned long size);
 extern int parse_sha1_header(char *hdr, char *type, unsigned long *sizep);
 extern int sha1_delta_base(const unsigned char *, unsigned char *);
+extern int sha1_file_size(const unsigned char *, unsigned long *);
 extern void * unpack_sha1_file(void *map, unsigned long mapsize, char *type, unsigned long *size);
 extern void * read_sha1_file(const unsigned char *sha1, char *type, unsigned long *size);
 extern int write_sha1_file(void *buf, unsigned long len, const char *type, unsigned char *return_sha1);
diff --git a/diff.c b/diff.c
index 7cf40daee5f2f7317b1c7558428f5dd184b68a67..5513632b9fa1892de7eb1b54bd2c5f6eae6cd5f4 100644 (file)
--- a/diff.c
+++ b/diff.c
@@ -333,7 +333,6 @@ int diff_populate_filespec(struct diff_filespec *s, int size_only)
                close(fd);
        }
        else {
-               /* We cannot do size only for SHA1 blobs */
                char type[20];
                struct sha1_size_cache *e;
 
@@ -343,11 +342,13 @@ int diff_populate_filespec(struct diff_filespec *s, int size_only)
                                s->size = e->size;
                                return 0;
                        }
+                       if (!sha1_file_size(s->sha1, &s->size))
+                               locate_size_cache(s->sha1, s->size);
+               }
+               else {
+                       s->data = read_sha1_file(s->sha1, type, &s->size);
+                       s->should_free = 1;
                }
-               s->data = read_sha1_file(s->sha1, type, &s->size);
-               s->should_free = 1;
-               if (s->data && size_only)
-                       locate_size_cache(s->sha1, s->size);
        }
        return 0;
 }
index ccfcca07c76297ea9295f7bcf174a35edf814b2e..a2ba4c81dba1b55b119d9ec3c42a7e4ce4ca1df5 100644 (file)
@@ -432,6 +432,66 @@ int sha1_delta_base(const unsigned char *sha1, unsigned char *base_sha1)
        return ret;
 }
 
+int sha1_file_size(const unsigned char *sha1, unsigned long *sizep)
+{
+       int ret, status;
+       unsigned long mapsize, size;
+       void *map;
+       z_stream stream;
+       char hdr[64], type[20];
+       const unsigned char *data;
+       unsigned char cmd;
+       int i;
+
+       map = map_sha1_file(sha1, &mapsize);
+       if (!map)
+               return -1;
+       ret = unpack_sha1_header(&stream, map, mapsize, hdr, sizeof(hdr));
+       status = -1;
+       if (ret < Z_OK || parse_sha1_header(hdr, type, &size) < 0)
+               goto out;
+       if (strcmp(type, "delta")) {
+               *sizep = size;
+               status = 0;
+               goto out;
+       }
+
+       /* We are dealing with a delta object.  Inflated, the first
+        * 20 bytes hold the base object SHA1, and delta data follows
+        * immediately after it.
+        *
+        * The initial part of the delta starts at delta_data_head +
+        * 20.  Borrow code from patch-delta to read the result size.
+        */
+       data = hdr + strlen(hdr) + 1 + 20;
+
+       /* Skip over the source size; we are not interested in
+        * it and we cannot verify it because we do not want
+        * to read the base object.
+        */
+       cmd = *data++;
+       while (cmd) {
+               if (cmd & 1)
+                       data++;
+               cmd >>= 1;
+       }
+       /* Read the result size */
+       size = i = 0;
+       cmd = *data++;
+       while (cmd) {
+               if (cmd & 1)
+                       size |= *data++ << i;
+               i += 8;
+               cmd >>= 1;
+       }
+       *sizep = size;
+       status = 0;
+ out:
+       inflateEnd(&stream);
+       munmap(map, mapsize);
+       return status;
+}
+
 void * read_sha1_file(const unsigned char *sha1, char *type, unsigned long *size)
 {
        unsigned long mapsize;