From: Eric W. Biederman Date: Mon, 10 Apr 2006 09:33:06 +0000 (-0600) Subject: Implement limited context matching in git-apply. X-Git-Tag: v1.3.0-rc4~21^2 X-Git-Url: https://git.tokkee.org/?a=commitdiff_plain;h=474958871394365ee7807d88217c3d75269161a6;p=git.git Implement limited context matching in git-apply. Ok this really should be the good version. The option handling has been reworked to be automation safe. Currently to import the -mm tree I have to work around git-apply by using patch. Because some of Andrews patches in quilt will only apply with fuzz. I started out implementing a --fuzz option and then I realized fuzz is not a very safe concept for an automated system. What you really want is a minimum number of context lines that must match. This allows policy to be set without knowing how many lines of context a patch actually provides. By default the policy remains to match all provided lines of context. Allowng git-apply to match a restricted set of context makes it much easier to import the -mm tree into git. I am still only processing 1.5 to 1.6 patches a second for the 692 patches in 2.6.17-rc1-mm2 is still painful but it does help. If I just loop through all of Andrews patches in order and run git-apply --index -C1 I process the entire patchset in 1m53s or about 6 patches per second. So running git-mailinfo, git-write-tree, git-commit-tree, and git-update-ref everytime has a measurable impact, and shows things can be speeded up even more. All of these timings were taking on my poor 700Mhz Athlon with 512MB of ram. So people with fast machiens should see much better performance. When a match is found after the number of context are reduced a warning is generated. Since this is a rare event and possibly dangerous this seems to make sense. Unless you are patching a single file the error message is a little bit terse at the moment, but it should be easy to go back and fix. I have also updated the documentation for git-apply to reflect the new -C option that sets the minimum number of context lines that must match. Signed-off-by: Eric W. Biederman Signed-off-by: Junio C Hamano --- diff --git a/Documentation/git-apply.txt b/Documentation/git-apply.txt index 1c64a1aa8..e93ea1f26 100644 --- a/Documentation/git-apply.txt +++ b/Documentation/git-apply.txt @@ -11,7 +11,7 @@ SYNOPSIS [verse] 'git-apply' [--stat] [--numstat] [--summary] [--check] [--index] [--apply] [--no-add] [--index-info] [--allow-binary-replacement] [-z] [-pNUM] - [--whitespace=] + [-CNUM] [--whitespace=] [...] DESCRIPTION @@ -73,6 +73,12 @@ OPTIONS Remove leading slashes from traditional diff paths. The default is 1. +-C:: + Ensure at least lines of surrounding context match before + and after each change. When fewer lines of surrounding + context exist they all most match. By default no context is + ever ignored. + --apply:: If you use any of the options marked ``Turns off "apply"'' above, git-apply reads and outputs the diff --git a/apply.c b/apply.c index 33b427128..269210a57 100644 --- a/apply.c +++ b/apply.c @@ -32,8 +32,9 @@ static int apply = 1; static int no_add = 0; static int show_index_info = 0; static int line_termination = '\n'; +static unsigned long p_context = -1; static const char apply_usage[] = -"git-apply [--stat] [--numstat] [--summary] [--check] [--index] [--apply] [--no-add] [--index-info] [--allow-binary-replacement] [-z] [-pNUM] [--whitespace=] ..."; +"git-apply [--stat] [--numstat] [--summary] [--check] [--index] [--apply] [--no-add] [--index-info] [--allow-binary-replacement] [-z] [-pNUM] [-CNUM] [--whitespace=] ..."; static enum whitespace_eol { nowarn_whitespace, @@ -100,6 +101,7 @@ static int max_change, max_len; static int linenr = 1; struct fragment { + unsigned long leading, trailing; unsigned long oldpos, oldlines; unsigned long newpos, newlines; const char *patch; @@ -817,12 +819,15 @@ static int parse_fragment(char *line, unsigned long size, struct patch *patch, s int added, deleted; int len = linelen(line, size), offset; unsigned long oldlines, newlines; + unsigned long leading, trailing; offset = parse_fragment_header(line, len, fragment); if (offset < 0) return -1; oldlines = fragment->oldlines; newlines = fragment->newlines; + leading = 0; + trailing = 0; if (patch->is_new < 0) { patch->is_new = !oldlines; @@ -860,10 +865,14 @@ static int parse_fragment(char *line, unsigned long size, struct patch *patch, s case ' ': oldlines--; newlines--; + if (!deleted && !added) + leading++; + trailing++; break; case '-': deleted++; oldlines--; + trailing = 0; break; case '+': /* @@ -887,6 +896,7 @@ static int parse_fragment(char *line, unsigned long size, struct patch *patch, s } added++; newlines--; + trailing = 0; break; /* We allow "\ No newline at end of file". Depending @@ -904,6 +914,9 @@ static int parse_fragment(char *line, unsigned long size, struct patch *patch, s } if (oldlines || newlines) return -1; + fragment->leading = leading; + fragment->trailing = trailing; + /* If a fragment ends with an incomplete line, we failed to include * it in the above loop because we hit oldlines == newlines == 0 * before seeing it. @@ -1087,7 +1100,7 @@ static int read_old_data(struct stat *st, const char *path, void *buf, unsigned } } -static int find_offset(const char *buf, unsigned long size, const char *fragment, unsigned long fragsize, int line) +static int find_offset(const char *buf, unsigned long size, const char *fragment, unsigned long fragsize, int line, int *lines) { int i; unsigned long start, backwards, forwards; @@ -1148,6 +1161,7 @@ static int find_offset(const char *buf, unsigned long size, const char *fragment n = (i >> 1)+1; if (i & 1) n = -n; + *lines = n; return try; } @@ -1157,6 +1171,33 @@ static int find_offset(const char *buf, unsigned long size, const char *fragment return -1; } +static void remove_first_line(const char **rbuf, int *rsize) +{ + const char *buf = *rbuf; + int size = *rsize; + unsigned long offset; + offset = 0; + while (offset <= size) { + if (buf[offset++] == '\n') + break; + } + *rsize = size - offset; + *rbuf = buf + offset; +} + +static void remove_last_line(const char **rbuf, int *rsize) +{ + const char *buf = *rbuf; + int size = *rsize; + unsigned long offset; + offset = size - 1; + while (offset > 0) { + if (buf[--offset] == '\n') + break; + } + *rsize = offset + 1; +} + struct buffer_desc { char *buffer; unsigned long size; @@ -1192,7 +1233,10 @@ static int apply_one_fragment(struct buffer_desc *desc, struct fragment *frag) int offset, size = frag->size; char *old = xmalloc(size); char *new = xmalloc(size); + const char *oldlines, *newlines; int oldsize = 0, newsize = 0; + unsigned long leading, trailing; + int pos, lines; while (size > 0) { int len = linelen(patch, size); @@ -1241,23 +1285,59 @@ static int apply_one_fragment(struct buffer_desc *desc, struct fragment *frag) newsize--; } #endif - - offset = find_offset(buf, desc->size, old, oldsize, frag->newpos); - if (offset >= 0) { - int diff = newsize - oldsize; - unsigned long size = desc->size + diff; - unsigned long alloc = desc->alloc; - - if (size > alloc) { - alloc = size + 8192; - desc->alloc = alloc; - buf = xrealloc(buf, alloc); - desc->buffer = buf; + + oldlines = old; + newlines = new; + leading = frag->leading; + trailing = frag->trailing; + lines = 0; + pos = frag->newpos; + for (;;) { + offset = find_offset(buf, desc->size, oldlines, oldsize, pos, &lines); + if (offset >= 0) { + int diff = newsize - oldsize; + unsigned long size = desc->size + diff; + unsigned long alloc = desc->alloc; + + /* Warn if it was necessary to reduce the number + * of context lines. + */ + if ((leading != frag->leading) || (trailing != frag->trailing)) + fprintf(stderr, "Context reduced to (%ld/%ld) to apply fragment at %d\n", + leading, trailing, pos + lines); + + if (size > alloc) { + alloc = size + 8192; + desc->alloc = alloc; + buf = xrealloc(buf, alloc); + desc->buffer = buf; + } + desc->size = size; + memmove(buf + offset + newsize, buf + offset + oldsize, size - offset - newsize); + memcpy(buf + offset, newlines, newsize); + offset = 0; + + break; + } + + /* Am I at my context limits? */ + if ((leading <= p_context) && (trailing <= p_context)) + break; + /* Reduce the number of context lines + * Reduce both leading and trailing if they are equal + * otherwise just reduce the larger context. + */ + if (leading >= trailing) { + remove_first_line(&oldlines, &oldsize); + remove_first_line(&newlines, &newsize); + pos--; + leading--; + } + if (trailing > leading) { + remove_last_line(&oldlines, &oldsize); + remove_last_line(&newlines, &newsize); + trailing--; } - desc->size = size; - memmove(buf + offset + newsize, buf + offset + oldsize, size - offset - newsize); - memcpy(buf + offset, new, newsize); - offset = 0; } free(old); @@ -1882,6 +1962,7 @@ int main(int argc, char **argv) for (i = 1; i < argc; i++) { const char *arg = argv[i]; + char *end; int fd; if (!strcmp(arg, "-")) { @@ -1945,6 +2026,12 @@ int main(int argc, char **argv) line_termination = 0; continue; } + if (!strncmp(arg, "-C", 2)) { + p_context = strtoul(arg + 2, &end, 0); + if (*end != '\0') + die("unrecognized context count '%s'", arg + 2); + continue; + } if (!strncmp(arg, "--whitespace=", 13)) { whitespace_option = arg + 13; parse_whitespace_option(arg + 13);