Code

--pretty=format: parse commit message only once
authorRené Scharfe <rene.scharfe@lsrfire.ath.cx>
Sat, 10 Nov 2007 11:14:20 +0000 (12:14 +0100)
committerJunio C Hamano <gitster@pobox.com>
Sun, 11 Nov 2007 10:04:46 +0000 (02:04 -0800)
As Jeff King pointed out, some placeholder expansions are related to
each other: the steps to calculate one go most of the way towards
calculating the other, too.

This patch makes format_commit_message() parse the commit message
only once, remembering the position of each item.  This speeds up
handling of format strings containing multiple placeholders from the
set %s, %a*, %c*, %e, %b.

Here are the timings for the git version in next.  The first one is
to estimate the overhead of the caching, the second one is taken
from http://svn.tue.mpg.de/tentakel/trunk/tentakel/Makefile as an
example of a format string found in the wild.  The times are the
fastest of three consecutive runs in each case:

$ time git log --pretty=format:%e >/dev/null

real    0m0.381s
user    0m0.340s
sys     0m0.024s

$ time git log --pretty=format:"* %cd %cn%n%n%s%n%b" >/dev/null

real    0m0.623s
user    0m0.556s
sys     0m0.052s

And here the times with this patch:

$ time git log --pretty=format:%e >/dev/null

real    0m0.385s
user    0m0.332s
sys     0m0.040s

$ time git log --pretty=format:"* %cd %cn%n%n%s%n%b" >/dev/null

real    0m0.563s
user    0m0.504s
sys     0m0.048s

Signed-off-by: Rene Scharfe <rene.scharfe@lsrfire.ath.cx>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
pretty.c

index 9fbd73f748c6cd250b5e9534168072a1cea88a85..17a3010a6ecf315f75d7d6c970ac4b3d1877d168 100644 (file)
--- a/pretty.c
+++ b/pretty.c
@@ -354,14 +354,67 @@ static void format_person_part(struct strbuf *sb, char part,
        }
 }
 
-static void format_commit_item(struct strbuf *sb, const char *placeholder,
-                               void *context)
+struct chunk {
+       size_t off;
+       size_t len;
+};
+
+struct format_commit_context {
+       const struct commit *commit;
+
+       /* These offsets are relative to the start of the commit message. */
+       int commit_header_parsed;
+       struct chunk subject;
+       struct chunk author;
+       struct chunk committer;
+       struct chunk encoding;
+       size_t body_off;
+};
+
+static void parse_commit_header(struct format_commit_context *context)
 {
-       const struct commit *commit = context;
-       struct commit_list *p;
+       const char *msg = context->commit->buffer;
        int i;
        enum { HEADER, SUBJECT, BODY } state;
+
+       for (i = 0, state = HEADER; msg[i] && state < BODY; i++) {
+               int eol;
+               for (eol = i; msg[eol] && msg[eol] != '\n'; eol++)
+                       ; /* do nothing */
+
+               if (state == SUBJECT) {
+                       context->subject.off = i;
+                       context->subject.len = eol - i;
+                       i = eol;
+               }
+               if (i == eol) {
+                       state++;
+                       /* strip empty lines */
+                       while (msg[eol + 1] == '\n')
+                               eol++;
+               } else if (!prefixcmp(msg + i, "author ")) {
+                       context->author.off = i + 7;
+                       context->author.len = eol - i - 7;
+               } else if (!prefixcmp(msg + i, "committer ")) {
+                       context->committer.off = i + 10;
+                       context->committer.len = eol - i - 10;
+               } else if (!prefixcmp(msg + i, "encoding ")) {
+                       context->encoding.off = i + 9;
+                       context->encoding.len = eol - i - 9;
+               }
+               i = eol;
+       }
+       context->body_off = i;
+       context->commit_header_parsed = 1;
+}
+
+static void format_commit_item(struct strbuf *sb, const char *placeholder,
+                               void *context)
+{
+       struct format_commit_context *c = context;
+       const struct commit *commit = c->commit;
        const char *msg = commit->buffer;
+       struct commit_list *p;
 
        /* these are independent of the commit */
        switch (placeholder[0]) {
@@ -429,45 +482,28 @@ static void format_commit_item(struct strbuf *sb, const char *placeholder,
        }
 
        /* For the rest we have to parse the commit header. */
-       for (i = 0, state = HEADER; msg[i] && state < BODY; i++) {
-               int eol;
-               for (eol = i; msg[eol] && msg[eol] != '\n'; eol++)
-                       ; /* do nothing */
+       if (!c->commit_header_parsed)
+               parse_commit_header(c);
 
-               if (state == SUBJECT) {
-                       if (placeholder[0] == 's') {
-                               strbuf_add(sb, msg + i, eol - i);
-                               return;
-                       }
-                       i = eol;
-               }
-               if (i == eol) {
-                       state++;
-                       /* strip empty lines */
-                       while (msg[eol + 1] == '\n')
-                               eol++;
-               } else if (!prefixcmp(msg + i, "author ")) {
-                       if (placeholder[0] == 'a') {
-                               format_person_part(sb, placeholder[1],
-                                                  msg + i + 7, eol - i - 7);
-                               return;
-                       }
-               } else if (!prefixcmp(msg + i, "committer ")) {
-                       if (placeholder[0] == 'c') {
-                               format_person_part(sb, placeholder[1],
-                                                  msg + i + 10, eol - i - 10);
-                               return;
-                       }
-               } else if (!prefixcmp(msg + i, "encoding ")) {
-                       if (placeholder[0] == 'e') {
-                               strbuf_add(sb, msg + i + 9, eol - i - 9);
-                               return;
-                       }
-               }
-               i = eol;
+       switch (placeholder[0]) {
+       case 's':
+               strbuf_add(sb, msg + c->subject.off, c->subject.len);
+               return;
+       case 'a':
+               format_person_part(sb, placeholder[1],
+                                  msg + c->author.off, c->author.len);
+               return;
+       case 'c':
+               format_person_part(sb, placeholder[1],
+                                  msg + c->committer.off, c->committer.len);
+               return;
+       case 'e':
+               strbuf_add(sb, msg + c->encoding.off, c->encoding.len);
+               return;
+       case 'b':
+               strbuf_addstr(sb, msg + c->body_off);
+               return;
        }
-       if (msg[i] && placeholder[0] == 'b')    /* body */
-               strbuf_addstr(sb, msg + i);
 }
 
 void format_commit_message(const struct commit *commit,
@@ -505,7 +541,11 @@ void format_commit_message(const struct commit *commit,
                "m",            /* left/right/bottom */
                NULL
        };
-       strbuf_expand(sb, format, placeholders, format_commit_item, (void *)commit);
+       struct format_commit_context context;
+
+       memset(&context, 0, sizeof(context));
+       context.commit = commit;
+       strbuf_expand(sb, format, placeholders, format_commit_item, &context);
 }
 
 static void pp_header(enum cmit_fmt fmt,