summary | shortlog | log | commit | commitdiff | tree
raw | patch | inline | side by side (parent: 840383b)
raw | patch | inline | side by side (parent: 840383b)
author | Jeff King <peff@peff.net> | |
Fri, 2 Apr 2010 00:12:15 +0000 (20:12 -0400) | ||
committer | Junio C Hamano <gitster@pobox.com> | |
Fri, 2 Apr 2010 07:05:31 +0000 (00:05 -0700) |
Running a textconv filter can take a long time. It's
particularly bad for a large file which needs to be spooled
to disk, but even for small files, the fork+exec overhead
can add up for something like "git log -p".
This patch uses the notes-cache mechanism to keep a fast
cache of textconv output. Caches are stored in
refs/notes/textconv/$x, where $x is the userdiff driver
defined in gitattributes.
Caching is enabled only if diff.$x.cachetextconv is true.
In my test repo, on a commit with 45 jpg and avi files
changed and a textconv to show their exif tags:
[before]
$ time git show >/dev/null
real 0m13.724s
user 0m12.057s
sys 0m1.624s
[after, first run]
$ git config diff.mfo.cachetextconv true
$ time git show >/dev/null
real 0m14.252s
user 0m12.197s
sys 0m1.800s
[after, subsequent runs]
$ time git show >/dev/null
real 0m0.352s
user 0m0.148s
sys 0m0.200s
So for a slight (3.8%) cost on the first run, we achieve an
almost 40x speed up on subsequent runs.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
particularly bad for a large file which needs to be spooled
to disk, but even for small files, the fork+exec overhead
can add up for something like "git log -p".
This patch uses the notes-cache mechanism to keep a fast
cache of textconv output. Caches are stored in
refs/notes/textconv/$x, where $x is the userdiff driver
defined in gitattributes.
Caching is enabled only if diff.$x.cachetextconv is true.
In my test repo, on a commit with 45 jpg and avi files
changed and a textconv to show their exif tags:
[before]
$ time git show >/dev/null
real 0m13.724s
user 0m12.057s
sys 0m1.624s
[after, first run]
$ git config diff.mfo.cachetextconv true
$ time git show >/dev/null
real 0m14.252s
user 0m12.197s
sys 0m1.800s
[after, subsequent runs]
$ time git show >/dev/null
real 0m0.352s
user 0m0.148s
sys 0m0.200s
So for a slight (3.8%) cost on the first run, we achieve an
almost 40x speed up on subsequent runs.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
Documentation/gitattributes.txt | patch | blob | history | |
diff.c | patch | blob | history | |
t/t4042-diff-textconv-caching.sh | [new file with mode: 0755] | patch | blob |
userdiff.c | patch | blob | history | |
userdiff.h | patch | blob | history |
index d892e642eded92fd0409e39fc34c005f88bc4a9b..a8500d177289c8ed7371c3ef4703b71b01c71b9c 100644 (file)
should generate it separately and send it as a comment _in
addition to_ the usual binary diff that you might send.
+Because text conversion can be slow, especially when doing a
+large number of them with `git log -p`, git provides a mechanism
+to cache the output and use it in future diffs. To enable
+caching, set the "cachetextconv" variable in your diff driver's
+config. For example:
+
+------------------------
+[diff "jpg"]
+ textconv = exif
+ cachetextconv = true
+------------------------
+
+This will cache the result of running "exif" on each blob
+indefinitely. If you change the textconv config variable for a
+diff driver, git will automatically invalidate the cache entries
+and re-run the textconv filter. If you want to invalidate the
+cache manually (e.g., because your version of "exif" was updated
+and now produces better output), you can remove the cache
+manually with `git update-ref -d refs/notes/textconv/jpg` (where
+"jpg" is the name of the diff driver, as in the example above).
Performing a three-way merge
~~~~~~~~~~~~~~~~~~~~~~~~~~~~
index 9665d6d41fbbe4d610a54c76655a58d251db0291..72d8503d89ec99a278e9307090a440c8279cd266 100644 (file)
--- a/diff.c
+++ b/diff.c
};
static void diff_filespec_load_driver(struct diff_filespec *one);
-static size_t fill_textconv(const char *cmd,
+static size_t fill_textconv(struct userdiff_driver *driver,
struct diff_filespec *df, char **outbuf);
static int parse_diff_color_slot(const char *var, int ofs)
const char *name_b,
struct diff_filespec *one,
struct diff_filespec *two,
- const char *textconv_one,
- const char *textconv_two,
+ struct userdiff_driver *textconv_one,
+ struct userdiff_driver *textconv_two,
struct diff_options *o)
{
int lc_a, lc_b;
@@ -1569,14 +1569,26 @@ void diff_set_mnemonic_prefix(struct diff_options *options, const char *a, const
options->b_prefix = b;
}
-static const char *get_textconv(struct diff_filespec *one)
+static struct userdiff_driver *get_textconv(struct diff_filespec *one)
{
if (!DIFF_FILE_VALID(one))
return NULL;
if (!S_ISREG(one->mode))
return NULL;
diff_filespec_load_driver(one);
- return one->driver->textconv;
+ if (!one->driver->textconv)
+ return NULL;
+
+ if (one->driver->textconv_want_cache && !one->driver->textconv_cache) {
+ struct notes_cache *c = xmalloc(sizeof(*c));
+ struct strbuf name = STRBUF_INIT;
+
+ strbuf_addf(&name, "textconv/%s", one->driver->name);
+ notes_cache_init(c, name.buf, one->driver->textconv);
+ one->driver->textconv_cache = c;
+ }
+
+ return one->driver;
}
static void builtin_diff(const char *name_a,
const char *set = diff_get_color_opt(o, DIFF_METAINFO);
const char *reset = diff_get_color_opt(o, DIFF_RESET);
const char *a_prefix, *b_prefix;
- const char *textconv_one = NULL, *textconv_two = NULL;
+ struct userdiff_driver *textconv_one = NULL;
+ struct userdiff_driver *textconv_two = NULL;
struct strbuf header = STRBUF_INIT;
if (DIFF_OPT_TST(o, SUBMODULE_LOG) &&
return strbuf_detach(&buf, outsize);
}
-static size_t fill_textconv(const char *cmd,
+static size_t fill_textconv(struct userdiff_driver *driver,
struct diff_filespec *df,
char **outbuf)
{
size_t size;
- if (!cmd) {
+ if (!driver || !driver->textconv) {
if (!DIFF_FILE_VALID(df)) {
*outbuf = "";
return 0;
return df->size;
}
- *outbuf = run_textconv(cmd, df, &size);
+ if (driver->textconv_cache) {
+ *outbuf = notes_cache_get(driver->textconv_cache, df->sha1,
+ &size);
+ if (*outbuf)
+ return size;
+ }
+
+ *outbuf = run_textconv(driver->textconv, df, &size);
if (!*outbuf)
die("unable to read files to diff");
+
+ if (driver->textconv_cache) {
+ /* ignore errors, as we might be in a readonly repository */
+ notes_cache_put(driver->textconv_cache, df->sha1, *outbuf,
+ size);
+ /*
+ * we could save up changes and flush them all at the end,
+ * but we would need an extra call after all diffing is done.
+ * Since generating a cache entry is the slow path anyway,
+ * this extra overhead probably isn't a big deal.
+ */
+ notes_cache_write(driver->textconv_cache);
+ }
+
return size;
}
diff --git a/t/t4042-diff-textconv-caching.sh b/t/t4042-diff-textconv-caching.sh
--- /dev/null
@@ -0,0 +1,109 @@
+#!/bin/sh
+
+test_description='test textconv caching'
+. ./test-lib.sh
+
+cat >helper <<'EOF'
+#!/bin/sh
+sed 's/^/converted: /' "$@" >helper.out
+cat helper.out
+EOF
+chmod +x helper
+
+test_expect_success 'setup' '
+ echo foo content 1 >foo.bin &&
+ echo bar content 1 >bar.bin &&
+ git add . &&
+ git commit -m one &&
+ echo foo content 2 >foo.bin &&
+ echo bar content 2 >bar.bin &&
+ git commit -a -m two &&
+ echo "*.bin diff=magic" >.gitattributes &&
+ git config diff.magic.textconv ./helper &&
+ git config diff.magic.cachetextconv true
+'
+
+cat >expect <<EOF
+diff --git a/bar.bin b/bar.bin
+index fcf9166..28283d5 100644
+--- a/bar.bin
++++ b/bar.bin
+@@ -1 +1 @@
+-converted: bar content 1
++converted: bar content 2
+diff --git a/foo.bin b/foo.bin
+index d5b9fe3..1345db2 100644
+--- a/foo.bin
++++ b/foo.bin
+@@ -1 +1 @@
+-converted: foo content 1
++converted: foo content 2
+EOF
+
+test_expect_success 'first textconv works' '
+ git diff HEAD^ HEAD >actual &&
+ test_cmp expect actual
+'
+
+test_expect_success 'cached textconv produces same output' '
+ git diff HEAD^ HEAD >actual &&
+ test_cmp expect actual
+'
+
+test_expect_success 'cached textconv does not run helper' '
+ rm -f helper.out &&
+ git diff HEAD^ HEAD >actual &&
+ test_cmp expect actual &&
+ ! test -r helper.out
+'
+
+cat >expect <<EOF
+diff --git a/bar.bin b/bar.bin
+index fcf9166..28283d5 100644
+--- a/bar.bin
++++ b/bar.bin
+@@ -1,2 +1,2 @@
+ converted: other
+-converted: bar content 1
++converted: bar content 2
+diff --git a/foo.bin b/foo.bin
+index d5b9fe3..1345db2 100644
+--- a/foo.bin
++++ b/foo.bin
+@@ -1,2 +1,2 @@
+ converted: other
+-converted: foo content 1
++converted: foo content 2
+EOF
+test_expect_success 'changing textconv invalidates cache' '
+ echo other >other &&
+ git config diff.magic.textconv "./helper other" &&
+ git diff HEAD^ HEAD >actual &&
+ test_cmp expect actual
+'
+
+cat >expect <<EOF
+diff --git a/bar.bin b/bar.bin
+index fcf9166..28283d5 100644
+--- a/bar.bin
++++ b/bar.bin
+@@ -1,2 +1,2 @@
+ converted: other
+-converted: bar content 1
++converted: bar content 2
+diff --git a/foo.bin b/foo.bin
+index d5b9fe3..1345db2 100644
+--- a/foo.bin
++++ b/foo.bin
+@@ -1 +1 @@
+-converted: foo content 1
++converted: foo content 2
+EOF
+test_expect_success 'switching diff driver produces correct results' '
+ git config diff.moremagic.textconv ./helper &&
+ echo foo.bin diff=moremagic >>.gitattributes &&
+ git diff HEAD^ HEAD >actual &&
+ test_cmp expect actual
+'
+
+test_done
diff --git a/userdiff.c b/userdiff.c
index df992490d5f86b5eff2b87e90090b1ec576aae9a..67003fbb232cd85627f61c02a616b3d6a9ccd5d2 100644 (file)
--- a/userdiff.c
+++ b/userdiff.c
+#include "cache.h"
#include "userdiff.h"
#include "cache.h"
#include "attr.h"
return 1;
}
+static int parse_bool(int *b, const char *k, const char *v)
+{
+ *b = git_config_bool(k, v);
+ return 1;
+}
+
int userdiff_config(const char *k, const char *v)
{
struct userdiff_driver *drv;
return parse_string(&drv->external, k, v);
if ((drv = parse_driver(k, v, "textconv")))
return parse_string(&drv->textconv, k, v);
+ if ((drv = parse_driver(k, v, "cachetextconv")))
+ return parse_bool(&drv->textconv_want_cache, k, v);
if ((drv = parse_driver(k, v, "wordregex")))
return parse_string(&drv->word_regex, k, v);
diff --git a/userdiff.h b/userdiff.h
index c3151594f5c0643fead757accc27bf1093cf4a68..942d5949501027fb5d30e0d59629aab38fd2669a 100644 (file)
--- a/userdiff.h
+++ b/userdiff.h
#ifndef USERDIFF_H
#define USERDIFF_H
+#include "notes-cache.h"
+
struct userdiff_funcname {
const char *pattern;
int cflags;
struct userdiff_funcname funcname;
const char *word_regex;
const char *textconv;
+ struct notes_cache *textconv_cache;
+ int textconv_want_cache;
};
int userdiff_config(const char *k, const char *v);