Code

[PATCH] don't load and decompress objects twice with parse_object()
authorNicolas Pitre <nico@cam.org>
Fri, 6 May 2005 17:48:34 +0000 (13:48 -0400)
committerLinus Torvalds <torvalds@ppc970.osdl.org>
Fri, 6 May 2005 18:02:01 +0000 (11:02 -0700)
It turns out that parse_object() is loading and decompressing given
object to free it just before calling the specific object parsing
function which does mmap and decompress the same object again. This
patch introduces the ability to parse specific objects directly from a
memory buffer.

Without this patch, running git-fsck-cache on the kernel repositorytake:

real    0m13.006s
user    0m11.421s
sys     0m1.218s

With this patch applied:

real    0m8.060s
user    0m7.071s
sys     0m0.710s

The performance increase is significant, and this is kind of a
prerequisite for sane delta object support with fsck.

Signed-off-by: Nicolas Pitre <nico@cam.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
blob.c
blob.h
commit.c
commit.h
object.c
tag.c
tag.h
tree.c
tree.h

diff --git a/blob.c b/blob.c
index 625db43971182e14eacc64f6514c16dd45644c86..280f5241577ac029e9d5a7eb5bf895642b342fc8 100644 (file)
--- a/blob.c
+++ b/blob.c
@@ -22,21 +22,29 @@ struct blob *lookup_blob(unsigned char *sha1)
        return (struct blob *) obj;
 }
 
+int parse_blob_buffer(struct blob *item, void *buffer, unsigned long size)
+{
+       item->object.parsed = 1;
+       return 0;
+}
+
 int parse_blob(struct blob *item)
 {
         char type[20];
         void *buffer;
         unsigned long size;
+       int ret;
+
         if (item->object.parsed)
                 return 0;
-        item->object.parsed = 1;
         buffer = read_sha1_file(item->object.sha1, type, &size);
         if (!buffer)
                 return error("Could not read %s",
                              sha1_to_hex(item->object.sha1));
-       free(buffer);
         if (strcmp(type, blob_type))
                 return error("Object %s not a blob",
                              sha1_to_hex(item->object.sha1));
-       return 0;
+       ret = parse_blob_buffer(item, buffer, size);
+       free(buffer);
+       return ret;
 }
diff --git a/blob.h b/blob.h
index 4afad0f067e1979d7ccc4778e254cce6962ad136..8004c835e73d44f3d9f0adf533cbf2475c288b42 100644 (file)
--- a/blob.h
+++ b/blob.h
@@ -11,6 +11,8 @@ struct blob {
 
 struct blob *lookup_blob(unsigned char *sha1);
 
+int parse_blob_buffer(struct blob *item, void *buffer, unsigned long size);
+
 int parse_blob(struct blob *item);
 
 #endif /* BLOB_H */
index b45118a544276797f6bc9c0f482c187647f0e88d..706c7cba08ebc2100c2dbf63ed1238f39324f750 100644 (file)
--- a/commit.c
+++ b/commit.c
@@ -41,24 +41,14 @@ static unsigned long parse_commit_date(const char *buf)
        return date;
 }
 
-int parse_commit(struct commit *item)
+int parse_commit_buffer(struct commit *item, void *buffer, unsigned long size)
 {
-       char type[20];
-       void * buffer, *bufptr;
-       unsigned long size;
+       void *bufptr = buffer;
        unsigned char parent[20];
+
        if (item->object.parsed)
                return 0;
        item->object.parsed = 1;
-       buffer = bufptr = read_sha1_file(item->object.sha1, type, &size);
-       if (!buffer)
-               return error("Could not read %s",
-                            sha1_to_hex(item->object.sha1));
-       if (strcmp(type, commit_type)) {
-               free(buffer);
-               return error("Object %s not a commit",
-                            sha1_to_hex(item->object.sha1));
-       }
        get_sha1_hex(bufptr + 5, parent);
        item->tree = lookup_tree(parent);
        if (item->tree)
@@ -74,10 +64,32 @@ int parse_commit(struct commit *item)
                bufptr += 48;
        }
        item->date = parse_commit_date(bufptr);
-       free(buffer);
        return 0;
 }
 
+int parse_commit(struct commit *item)
+{
+       char type[20];
+       void *buffer;
+       unsigned long size;
+       int ret;
+
+       if (item->object.parsed)
+               return 0;
+       buffer = read_sha1_file(item->object.sha1, type, &size);
+       if (!buffer)
+               return error("Could not read %s",
+                            sha1_to_hex(item->object.sha1));
+       if (strcmp(type, commit_type)) {
+               free(buffer);
+               return error("Object %s not a commit",
+                            sha1_to_hex(item->object.sha1));
+       }
+       ret = parse_commit_buffer(item, buffer, size);
+       free(buffer);
+       return ret;
+}
+
 void commit_list_insert(struct commit *item, struct commit_list **list_p)
 {
        struct commit_list *new_list = xmalloc(sizeof(struct commit_list));
index d61d084c89c72f4fe79c654db721df31c4f04224..ce0b436711161d667eb9c067d343b82bad20f21d 100644 (file)
--- a/commit.h
+++ b/commit.h
@@ -20,6 +20,8 @@ extern const char *commit_type;
 
 struct commit *lookup_commit(unsigned char *sha1);
 
+int parse_commit_buffer(struct commit *item, void *buffer, unsigned long size);
+
 int parse_commit(struct commit *item);
 
 void commit_list_insert(struct commit *item, struct commit_list **list_p);
index c1f22910a2e207997fbc627ee8150a81b813c3b0..b5a62e7f87f24c2ab0ea83f3c445d81bcbff027a 100644 (file)
--- a/object.c
+++ b/object.c
@@ -104,6 +104,7 @@ struct object *parse_object(unsigned char *sha1)
        unsigned long mapsize;
        void *map = map_sha1_file(sha1, &mapsize);
        if (map) {
+               struct object *obj;
                char type[100];
                unsigned long size;
                void *buffer = unpack_sha1_file(map, mapsize, type, &size);
@@ -112,26 +113,27 @@ struct object *parse_object(unsigned char *sha1)
                        return NULL;
                if (check_sha1_signature(sha1, buffer, size, type) < 0)
                        printf("sha1 mismatch %s\n", sha1_to_hex(sha1));
-               free(buffer);
                if (!strcmp(type, "blob")) {
-                       struct blob *ret = lookup_blob(sha1);
-                       parse_blob(ret);
-                       return &ret->object;
+                       struct blob *blob = lookup_blob(sha1);
+                       parse_blob_buffer(blob, buffer, size);
+                       obj = &blob->object;
                } else if (!strcmp(type, "tree")) {
-                       struct tree *ret = lookup_tree(sha1);
-                       parse_tree(ret);
-                       return &ret->object;
+                       struct tree *tree = lookup_tree(sha1);
+                       parse_tree_buffer(tree, buffer, size);
+                       obj = &tree->object;
                } else if (!strcmp(type, "commit")) {
-                       struct commit *ret = lookup_commit(sha1);
-                       parse_commit(ret);
-                       return &ret->object;
+                       struct commit *commit = lookup_commit(sha1);
+                       parse_commit_buffer(commit, buffer, size);
+                       obj = &commit->object;
                } else if (!strcmp(type, "tag")) {
-                       struct tag *ret = lookup_tag(sha1);
-                       parse_tag(ret);
-                       return &ret->object;
+                       struct tag *tag = lookup_tag(sha1);
+                       parse_tag_buffer(tag, buffer, size);
+                       obj = &tag->object;
                } else {
-                       return NULL;
+                       obj = NULL;
                }
+               free(buffer);
+               return obj;
        }
        return NULL;
 }
diff --git a/tag.c b/tag.c
index 3a71dd416ad18d86ed28ed6593e4588151cc43ea..22deb243ad58b2c57fb8652fe2d08c571ee3e781 100644 (file)
--- a/tag.c
+++ b/tag.c
@@ -21,11 +21,8 @@ struct tag *lookup_tag(unsigned char *sha1)
         return (struct tag *) obj;
 }
 
-int parse_tag(struct tag *item)
+int parse_tag_buffer(struct tag *item, void *data, unsigned long size)
 {
-        char type[20];
-        void *data, *bufptr;
-        unsigned long size;
        int typelen, taglen;
        unsigned char object[20];
        const char *type_line, *tag_line, *sig_line;
@@ -33,20 +30,11 @@ int parse_tag(struct tag *item)
         if (item->object.parsed)
                 return 0;
         item->object.parsed = 1;
-        data = bufptr = read_sha1_file(item->object.sha1, type, &size);
-        if (!data)
-                return error("Could not read %s",
-                             sha1_to_hex(item->object.sha1));
-        if (strcmp(type, tag_type)) {
-               free(data);
-                return error("Object %s not a tag",
-                             sha1_to_hex(item->object.sha1));
-       }
 
        if (size < 64)
-               goto err;
+               return -1;
        if (memcmp("object ", data, 7) || get_sha1_hex(data + 7, object))
-               goto err;
+               return -1;
 
        item->tagged = parse_object(object);
        if (item->tagged)
@@ -54,29 +42,47 @@ int parse_tag(struct tag *item)
 
        type_line = data + 48;
        if (memcmp("\ntype ", type_line-1, 6))
-               goto err;
+               return -1;
 
        tag_line = strchr(type_line, '\n');
        if (!tag_line || memcmp("tag ", ++tag_line, 4))
-               goto err;
+               return -1;
 
        sig_line = strchr(tag_line, '\n');
        if (!sig_line)
-               goto err;
+               return -1;
        sig_line++;
 
        typelen = tag_line - type_line - strlen("type \n");
        if (typelen >= 20)
-               goto err;
+               return -1;
        taglen = sig_line - tag_line - strlen("tag \n");
        item->tag = xmalloc(taglen + 1);
        memcpy(item->tag, tag_line + 4, taglen);
        item->tag[taglen] = '\0';
 
-       free(data);
        return 0;
+}
 
-err:
+int parse_tag(struct tag *item)
+{
+       char type[20];
+       void *data;
+       unsigned long size;
+       int ret;
+
+       if (item->object.parsed)
+               return 0;
+       data = read_sha1_file(item->object.sha1, type, &size);
+       if (!data)
+               return error("Could not read %s",
+                            sha1_to_hex(item->object.sha1));
+       if (strcmp(type, tag_type)) {
+               free(data);
+               return error("Object %s not a tag",
+                            sha1_to_hex(item->object.sha1));
+       }
+       ret = parse_tag_buffer(item, data, size);
        free(data);
-       return -1;
+       return ret;
 }
diff --git a/tag.h b/tag.h
index eddfed6bf3b1cae1458b6ee9232d6c5ea12aad35..41826870f87aaa2e45f34b825aa2c7c63aa81d80 100644 (file)
--- a/tag.h
+++ b/tag.h
@@ -13,6 +13,7 @@ struct tag {
 };
 
 extern struct tag *lookup_tag(unsigned char *sha1);
+extern int parse_tag_buffer(struct tag *item, void *data, unsigned long size);
 extern int parse_tag(struct tag *item);
 
 #endif /* TAG_H */
diff --git a/tree.c b/tree.c
index 468f99e4944b139450ed5b1d2f84e6ef0b303e84..d9777bf810af18be5e54b730bc057d6b3a554470 100644 (file)
--- a/tree.c
+++ b/tree.c
@@ -88,24 +88,14 @@ struct tree *lookup_tree(unsigned char *sha1)
        return (struct tree *) obj;
 }
 
-int parse_tree(struct tree *item)
+int parse_tree_buffer(struct tree *item, void *buffer, unsigned long size)
 {
-       char type[20];
-       void *buffer, *bufptr;
-       unsigned long size;
+       void *bufptr = buffer;
        struct tree_entry_list **list_p;
+
        if (item->object.parsed)
                return 0;
        item->object.parsed = 1;
-       buffer = bufptr = read_sha1_file(item->object.sha1, type, &size);
-       if (!buffer)
-               return error("Could not read %s",
-                            sha1_to_hex(item->object.sha1));
-       if (strcmp(type, tree_type)) {
-               free(buffer);
-               return error("Object %s not a tree",
-                            sha1_to_hex(item->object.sha1));
-       }
        list_p = &item->entries;
        while (size) {
                struct object *obj;
@@ -115,10 +105,8 @@ int parse_tree(struct tree *item)
                char *path = strchr(bufptr, ' ');
                unsigned int mode;
                if (size < len + 20 || !path || 
-                   sscanf(bufptr, "%o", &mode) != 1) {
-                       free(buffer);
+                   sscanf(bufptr, "%o", &mode) != 1)
                        return -1;
-               }
 
                entry = xmalloc(sizeof(struct tree_entry_list));
                entry->name = strdup(path + 1);
@@ -144,6 +132,28 @@ int parse_tree(struct tree *item)
                *list_p = entry;
                list_p = &entry->next;
        }
-       free(buffer);
        return 0;
 }
+
+int parse_tree(struct tree *item)
+{
+        char type[20];
+        void *buffer;
+        unsigned long size;
+        int ret;
+
+       if (item->object.parsed)
+               return 0;
+       buffer = read_sha1_file(item->object.sha1, type, &size);
+       if (!buffer)
+               return error("Could not read %s",
+                            sha1_to_hex(item->object.sha1));
+       if (strcmp(type, tree_type)) {
+               free(buffer);
+               return error("Object %s not a tree",
+                            sha1_to_hex(item->object.sha1));
+       }
+       ret = parse_tree_buffer(item, buffer, size);
+       free(buffer);
+       return ret;
+}
diff --git a/tree.h b/tree.h
index e1c94c090c8d73268176e8f1ca97f4949cb537df..1383beb2bf11f66bc34246acc5d843866ae680c9 100644 (file)
--- a/tree.h
+++ b/tree.h
@@ -25,6 +25,8 @@ struct tree {
 
 struct tree *lookup_tree(unsigned char *sha1);
 
+int parse_tree_buffer(struct tree *item, void *buffer, unsigned long size);
+
 int parse_tree(struct tree *tree);
 
 #endif /* TREE_H */