Code

Add 'filter' attribute and external filter driver definition.
authorJunio C Hamano <junkio@cox.net>
Sat, 21 Apr 2007 10:14:13 +0000 (03:14 -0700)
committerJunio C Hamano <junkio@cox.net>
Wed, 25 Apr 2007 05:38:51 +0000 (22:38 -0700)
The interface is similar to the custom low-level merge drivers.

First you configure your filter driver by defining 'filter.<name>.*'
variables in the configuration.

filter.<name>.clean filter command to run upon checkin
filter.<name>.smudge filter command to run upon checkout

Then you assign filter attribute to each path, whose name
matches the custom filter driver's name.

Example:

(in .gitattributes)
*.c filter=indent

(in config)
[filter "indent"]
clean = indent
smudge = cat

Signed-off-by: Junio C Hamano <junkio@cox.net>
Documentation/gitattributes.txt
convert.c
t/t0021-conversion.sh

index b6f90f6f37b5ab28096fdc3a7cb3dbc35fa82175..87723105d1aeeab3cc9918ffc80503f4291e749c 100644 (file)
@@ -156,6 +156,45 @@ In the check-out codepath, the blob content is first converted
 with `crlf`, and then `ident`.
 
 
+`filter`
+^^^^^^^^
+
+A `filter` attribute can be set to a string value.  This names
+filter driver specified in the configuration.
+
+A filter driver consists of `clean` command and `smudge`
+command, either of which can be left unspecified.  Upon
+checkout, when `smudge` command is specified, the command is fed
+the blob object from its standard input, and its standard output
+is used to update the worktree file.  Similarly, `clean` command
+is used to convert the contents of worktree file upon checkin.
+
+Missing filter driver definition in the config is not an error
+but makes the filter a no-op passthru.
+
+The content filtering is done to massage the content into a
+shape that is more convenient for the platform, filesystem, and
+the user to use.  The keyword here is "more convenient" and not
+"turning something unusable into usable".  In other words, it is
+"hanging yourself because we gave you a long rope" if your
+project uses filtering mechanism in such a way that it makes
+your project unusable unless the checkout is done with a
+specific filter in effect.
+
+
+Interaction between checkin/checkout attributes
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+In the check-in codepath, the worktree file is first converted
+with `filter` driver (if specified and corresponding driver
+defined), then the result is processed with `ident` (if
+specified), and then finally with `crlf` (again, if specified
+and applicable).
+
+In the check-out codepath, the blob content is first converted
+with `crlf`, and then `ident` and fed to `filter`.
+
+
 Generating diff text
 ~~~~~~~~~~~~~~~~~~~~
 
index 5fdaee75714fa6a42601fbb366aeafb18f3334fb..9ee31b0ee0949e1d45ac70b463efbb0d47132b32 100644 (file)
--- a/convert.c
+++ b/convert.c
@@ -201,17 +201,212 @@ static char *crlf_to_worktree(const char *path, const char *src, unsigned long *
        return buffer;
 }
 
+static int filter_buffer(const char *path, const char *src,
+                        unsigned long size, const char *cmd)
+{
+       /*
+        * Spawn cmd and feed the buffer contents through its stdin.
+        */
+       struct child_process child_process;
+       int pipe_feed[2];
+       int write_err, status;
+
+       memset(&child_process, 0, sizeof(child_process));
+
+       if (pipe(pipe_feed) < 0) {
+               error("cannot create pipe to run external filter %s", cmd);
+               return 1;
+       }
+
+       child_process.pid = fork();
+       if (child_process.pid < 0) {
+               error("cannot fork to run external filter %s", cmd);
+               close(pipe_feed[0]);
+               close(pipe_feed[1]);
+               return 1;
+       }
+       if (!child_process.pid) {
+               dup2(pipe_feed[0], 0);
+               close(pipe_feed[0]);
+               close(pipe_feed[1]);
+               execlp("sh", "sh", "-c", cmd, NULL);
+               return 1;
+       }
+       close(pipe_feed[0]);
+
+       write_err = (write_in_full(pipe_feed[1], src, size) < 0);
+       if (close(pipe_feed[1]))
+               write_err = 1;
+       if (write_err)
+               error("cannot feed the input to external filter %s", cmd);
+
+       status = finish_command(&child_process);
+       if (status)
+               error("external filter %s failed %d", cmd, -status);
+       return (write_err || status);
+}
+
+static char *apply_filter(const char *path, const char *src,
+                         unsigned long *sizep, const char *cmd)
+{
+       /*
+        * Create a pipeline to have the command filter the buffer's
+        * contents.
+        *
+        * (child --> cmd) --> us
+        */
+       const int SLOP = 4096;
+       int pipe_feed[2];
+       int status;
+       char *dst;
+       unsigned long dstsize, dstalloc;
+       struct child_process child_process;
+
+       if (!cmd)
+               return NULL;
+
+       memset(&child_process, 0, sizeof(child_process));
+
+       if (pipe(pipe_feed) < 0) {
+               error("cannot create pipe to run external filter %s", cmd);
+               return NULL;
+       }
+
+       fflush(NULL);
+       child_process.pid = fork();
+       if (child_process.pid < 0) {
+               error("cannot fork to run external filter %s", cmd);
+               close(pipe_feed[0]);
+               close(pipe_feed[1]);
+               return NULL;
+       }
+       if (!child_process.pid) {
+               dup2(pipe_feed[1], 1);
+               close(pipe_feed[0]);
+               close(pipe_feed[1]);
+               exit(filter_buffer(path, src, *sizep, cmd));
+       }
+       close(pipe_feed[1]);
+
+       dstalloc = *sizep;
+       dst = xmalloc(dstalloc);
+       dstsize = 0;
+
+       while (1) {
+               ssize_t numread = xread(pipe_feed[0], dst + dstsize,
+                                       dstalloc - dstsize);
+
+               if (numread <= 0) {
+                       if (!numread)
+                               break;
+                       error("read from external filter %s failed", cmd);
+                       free(dst);
+                       dst = NULL;
+                       break;
+               }
+               dstsize += numread;
+               if (dstalloc <= dstsize + SLOP) {
+                       dstalloc = dstsize + SLOP;
+                       dst = xrealloc(dst, dstalloc);
+               }
+       }
+       if (close(pipe_feed[0])) {
+               error("read from external filter %s failed", cmd);
+               free(dst);
+               dst = NULL;
+       }
+
+       status = finish_command(&child_process);
+       if (status) {
+               error("external filter %s failed %d", cmd, -status);
+               free(dst);
+               dst = NULL;
+       }
+
+       if (dst)
+               *sizep = dstsize;
+       return dst;
+}
+
+static struct convert_driver {
+       const char *name;
+       struct convert_driver *next;
+       char *smudge;
+       char *clean;
+} *user_convert, **user_convert_tail;
+
+static int read_convert_config(const char *var, const char *value)
+{
+       const char *ep, *name;
+       int namelen;
+       struct convert_driver *drv;
+
+       /*
+        * External conversion drivers are configured using
+        * "filter.<name>.variable".
+        */
+       if (prefixcmp(var, "filter.") || (ep = strrchr(var, '.')) == var + 6)
+               return 0;
+       name = var + 7;
+       namelen = ep - name;
+       for (drv = user_convert; drv; drv = drv->next)
+               if (!strncmp(drv->name, name, namelen) && !drv->name[namelen])
+                       break;
+       if (!drv) {
+               char *namebuf;
+               drv = xcalloc(1, sizeof(struct convert_driver));
+               namebuf = xmalloc(namelen + 1);
+               memcpy(namebuf, name, namelen);
+               namebuf[namelen] = 0;
+               drv->name = namebuf;
+               drv->next = NULL;
+               *user_convert_tail = drv;
+               user_convert_tail = &(drv->next);
+       }
+
+       ep++;
+
+       /*
+        * filter.<name>.smudge and filter.<name>.clean specifies
+        * the command line:
+        *
+        *      command-line
+        *
+        * The command-line will not be interpolated in any way.
+        */
+
+       if (!strcmp("smudge", ep)) {
+               if (!value)
+                       return error("%s: lacks value", var);
+               drv->smudge = strdup(value);
+               return 0;
+       }
+
+       if (!strcmp("clean", ep)) {
+               if (!value)
+                       return error("%s: lacks value", var);
+               drv->clean = strdup(value);
+               return 0;
+       }
+       return 0;
+}
+
 static void setup_convert_check(struct git_attr_check *check)
 {
        static struct git_attr *attr_crlf;
        static struct git_attr *attr_ident;
+       static struct git_attr *attr_filter;
 
        if (!attr_crlf) {
                attr_crlf = git_attr("crlf", 4);
                attr_ident = git_attr("ident", 5);
+               attr_filter = git_attr("filter", 6);
+               user_convert_tail = &user_convert;
+               git_config(read_convert_config);
        }
        check[0].attr = attr_crlf;
        check[1].attr = attr_ident;
+       check[2].attr = attr_filter;
 }
 
 static int count_ident(const char *cp, unsigned long size)
@@ -367,6 +562,20 @@ static int git_path_check_crlf(const char *path, struct git_attr_check *check)
        return CRLF_GUESS;
 }
 
+static struct convert_driver *git_path_check_convert(const char *path,
+                                            struct git_attr_check *check)
+{
+       const char *value = check->value;
+       struct convert_driver *drv;
+
+       if (ATTR_TRUE(value) || ATTR_FALSE(value) || ATTR_UNSET(value))
+               return NULL;
+       for (drv = user_convert; drv; drv = drv->next)
+               if (!strcmp(value, drv->name))
+                       return drv;
+       return NULL;
+}
+
 static int git_path_check_ident(const char *path, struct git_attr_check *check)
 {
        const char *value = check->value;
@@ -376,18 +585,29 @@ static int git_path_check_ident(const char *path, struct git_attr_check *check)
 
 char *convert_to_git(const char *path, const char *src, unsigned long *sizep)
 {
-       struct git_attr_check check[2];
+       struct git_attr_check check[3];
        int crlf = CRLF_GUESS;
        int ident = 0;
+       char *filter = NULL;
        char *buf, *buf2;
 
        setup_convert_check(check);
        if (!git_checkattr(path, ARRAY_SIZE(check), check)) {
+               struct convert_driver *drv;
                crlf = git_path_check_crlf(path, check + 0);
                ident = git_path_check_ident(path, check + 1);
+               drv = git_path_check_convert(path, check + 2);
+               if (drv && drv->clean)
+                       filter = drv->clean;
        }
 
-       buf = crlf_to_git(path, src, sizep, crlf);
+       buf = apply_filter(path, src, sizep, filter);
+
+       buf2 = crlf_to_git(path, buf ? buf : src, sizep, crlf);
+       if (buf2) {
+               free(buf);
+               buf = buf2;
+       }
 
        buf2 = ident_to_git(path, buf ? buf : src, sizep, ident);
        if (buf2) {
@@ -400,15 +620,20 @@ char *convert_to_git(const char *path, const char *src, unsigned long *sizep)
 
 char *convert_to_working_tree(const char *path, const char *src, unsigned long *sizep)
 {
-       struct git_attr_check check[2];
+       struct git_attr_check check[3];
        int crlf = CRLF_GUESS;
        int ident = 0;
+       char *filter = NULL;
        char *buf, *buf2;
 
        setup_convert_check(check);
        if (!git_checkattr(path, ARRAY_SIZE(check), check)) {
+               struct convert_driver *drv;
                crlf = git_path_check_crlf(path, check + 0);
                ident = git_path_check_ident(path, check + 1);
+               drv = git_path_check_convert(path, check + 2);
+               if (drv && drv->smudge)
+                       filter = drv->smudge;
        }
 
        buf = ident_to_worktree(path, src, sizep, ident);
@@ -419,5 +644,11 @@ char *convert_to_working_tree(const char *path, const char *src, unsigned long *
                buf = buf2;
        }
 
+       buf2 = apply_filter(path, buf ? buf : src, sizep, filter);
+       if (buf2) {
+               free(buf);
+               buf = buf2;
+       }
+
        return buf;
 }
index ad952c9ce2ce2e1a83145b4949c4885e2bdaf47d..bab9ecc34e0304e855e7f4aa131f89c0300bc3e2 100755 (executable)
@@ -4,8 +4,17 @@ test_description='blob conversion via gitattributes'
 
 . ./test-lib.sh
 
+cat <<\EOF >rot13.sh
+tr '[a-zA-Z]' '[n-za-mN-ZA-M]'
+EOF
+chmod +x rot13.sh
+
 test_expect_success setup '
+       git config filter.rot13.smudge ./rot13.sh &&
+       git config filter.rot13.clean ./rot13.sh &&
+
        {
+           echo "*.t filter=rot13"
            echo "*.i ident"
        } >.gitattributes &&