summary | shortlog | log | commit | commitdiff | tree
raw | patch | inline | side by side (parent: b95c5ad)
raw | patch | inline | side by side (parent: b95c5ad)
author | Fredrik Kuivinen <frekui@gmail.com> | |
Sat, 20 Aug 2011 22:42:18 +0000 (00:42 +0200) | ||
committer | Junio C Hamano <gitster@pobox.com> | |
Sun, 21 Aug 2011 05:33:58 +0000 (22:33 -0700) |
Benchmarks for the hot cache case:
before:
$ perf stat --repeat=5 git grep qwerty > /dev/null
Performance counter stats for 'git grep qwerty' (5 runs):
3,478,085 cache-misses # 2.322 M/sec ( +- 2.690% )
11,356,177 cache-references # 7.582 M/sec ( +- 2.598% )
3,872,184 branch-misses # 0.363 % ( +- 0.258% )
1,067,367,848 branches # 712.673 M/sec ( +- 2.622% )
3,828,370,782 instructions # 0.947 IPC ( +- 0.033% )
4,043,832,831 cycles # 2700.037 M/sec ( +- 0.167% )
8,518 page-faults # 0.006 M/sec ( +- 3.648% )
847 CPU-migrations # 0.001 M/sec ( +- 3.262% )
6,546 context-switches # 0.004 M/sec ( +- 2.292% )
1497.695495 task-clock-msecs # 3.303 CPUs ( +- 2.550% )
0.453394396 seconds time elapsed ( +- 0.912% )
after:
$ perf stat --repeat=5 git grep qwerty > /dev/null
Performance counter stats for 'git grep qwerty' (5 runs):
2,989,918 cache-misses # 3.166 M/sec ( +- 5.013% )
10,986,041 cache-references # 11.633 M/sec ( +- 4.899% ) (scaled from 95.06%)
3,511,993 branch-misses # 1.422 % ( +- 0.785% )
246,893,561 branches # 261.433 M/sec ( +- 3.967% )
1,392,727,757 instructions # 0.564 IPC ( +- 0.040% )
2,468,142,397 cycles # 2613.494 M/sec ( +- 0.110% )
7,747 page-faults # 0.008 M/sec ( +- 3.995% )
897 CPU-migrations # 0.001 M/sec ( +- 2.383% )
6,535 context-switches # 0.007 M/sec ( +- 1.993% )
944.384228 task-clock-msecs # 3.177 CPUs ( +- 0.268% )
0.297257643 seconds time elapsed ( +- 0.450% )
So we gain about 35% by using the kwset code.
As a side effect of using kwset two grep tests are fixed by this
patch. The first is fixed because kwset can deal with case-insensitive
search containing NULs, something strcasestr cannot do. The second one
is fixed because we consider patterns containing NULs as fixed strings
(regcomp cannot accept patterns with NULs).
Signed-off-by: Fredrik Kuivinen <frekui@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
before:
$ perf stat --repeat=5 git grep qwerty > /dev/null
Performance counter stats for 'git grep qwerty' (5 runs):
3,478,085 cache-misses # 2.322 M/sec ( +- 2.690% )
11,356,177 cache-references # 7.582 M/sec ( +- 2.598% )
3,872,184 branch-misses # 0.363 % ( +- 0.258% )
1,067,367,848 branches # 712.673 M/sec ( +- 2.622% )
3,828,370,782 instructions # 0.947 IPC ( +- 0.033% )
4,043,832,831 cycles # 2700.037 M/sec ( +- 0.167% )
8,518 page-faults # 0.006 M/sec ( +- 3.648% )
847 CPU-migrations # 0.001 M/sec ( +- 3.262% )
6,546 context-switches # 0.004 M/sec ( +- 2.292% )
1497.695495 task-clock-msecs # 3.303 CPUs ( +- 2.550% )
0.453394396 seconds time elapsed ( +- 0.912% )
after:
$ perf stat --repeat=5 git grep qwerty > /dev/null
Performance counter stats for 'git grep qwerty' (5 runs):
2,989,918 cache-misses # 3.166 M/sec ( +- 5.013% )
10,986,041 cache-references # 11.633 M/sec ( +- 4.899% ) (scaled from 95.06%)
3,511,993 branch-misses # 1.422 % ( +- 0.785% )
246,893,561 branches # 261.433 M/sec ( +- 3.967% )
1,392,727,757 instructions # 0.564 IPC ( +- 0.040% )
2,468,142,397 cycles # 2613.494 M/sec ( +- 0.110% )
7,747 page-faults # 0.008 M/sec ( +- 3.995% )
897 CPU-migrations # 0.001 M/sec ( +- 2.383% )
6,535 context-switches # 0.007 M/sec ( +- 1.993% )
944.384228 task-clock-msecs # 3.177 CPUs ( +- 0.268% )
0.297257643 seconds time elapsed ( +- 0.450% )
So we gain about 35% by using the kwset code.
As a side effect of using kwset two grep tests are fixed by this
patch. The first is fixed because kwset can deal with case-insensitive
search containing NULs, something strcasestr cannot do. The second one
is fixed because we consider patterns containing NULs as fixed strings
(regcomp cannot accept patterns with NULs).
Signed-off-by: Fredrik Kuivinen <frekui@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
grep.c | patch | blob | history | |
grep.h | patch | blob | history | |
t/t7008-grep-binary.sh | patch | blob | history |
index 26e8d8ec4cbec60cd5f4e2bef0d99607c7118fb7..6618cd8b92fc1948304a73e7e99d74ca0035d51e 100644 (file)
--- a/grep.c
+++ b/grep.c
}
#endif /* !USE_LIBPCRE */
+static int is_fixed(const char *s, size_t len)
+{
+ size_t i;
+
+ /* regcomp cannot accept patterns with NULs so we
+ * consider any pattern containing a NUL fixed.
+ */
+ if (memchr(s, 0, len))
+ return 1;
+
+ for (i = 0; i < len; i++) {
+ if (is_regex_special(s[i]))
+ return 0;
+ }
+
+ return 1;
+}
+
static void compile_regexp(struct grep_pat *p, struct grep_opt *opt)
{
int err;
p->word_regexp = opt->word_regexp;
p->ignore_case = opt->ignore_case;
- p->fixed = opt->fixed;
- if (p->fixed)
+ if (opt->fixed || is_fixed(p->pattern, p->patternlen))
+ p->fixed = 1;
+ else
+ p->fixed = 0;
+
+ if (p->fixed) {
+ if (opt->regflags & REG_ICASE || p->ignore_case) {
+ static char trans[256];
+ int i;
+ for (i = 0; i < 256; i++)
+ trans[i] = tolower(i);
+ p->kws = kwsalloc(trans);
+ } else {
+ p->kws = kwsalloc(NULL);
+ }
+ kwsincr(p->kws, p->pattern, p->patternlen);
+ kwsprep(p->kws);
return;
+ }
if (opt->pcre) {
compile_pcre_regexp(p, opt);
case GREP_PATTERN: /* atom */
case GREP_PATTERN_HEAD:
case GREP_PATTERN_BODY:
- if (p->pcre_regexp)
+ if (p->kws)
+ kwsfree(p->kws);
+ else if (p->pcre_regexp)
free_pcre_regexp(p);
else
regfree(&p->regexp);
static int fixmatch(struct grep_pat *p, char *line, char *eol,
regmatch_t *match)
{
- char *hit;
-
- if (p->ignore_case) {
- char *s = line;
- do {
- hit = strcasestr(s, p->pattern);
- if (hit)
- break;
- s += strlen(s) + 1;
- } while (s < eol);
- } else
- hit = memmem(line, eol - line, p->pattern, p->patternlen);
-
- if (!hit) {
+ struct kwsmatch kwsm;
+ size_t offset = kwsexec(p->kws, line, eol - line, &kwsm);
+ if (offset == -1) {
match->rm_so = match->rm_eo = -1;
return REG_NOMATCH;
- }
- else {
- match->rm_so = hit - line;
- match->rm_eo = match->rm_so + p->patternlen;
+ } else {
+ match->rm_so = offset;
+ match->rm_eo = match->rm_so + kwsm.size[0];
return 0;
}
}
index ae50c45a4d408c1931f39b3d393ea0188bdde950..a65280026d5dee8ab059bead79f05d6a1111147a 100644 (file)
--- a/grep.h
+++ b/grep.h
typedef int pcre;
typedef int pcre_extra;
#endif
+#include "kwset.h"
enum grep_pat_token {
GREP_PATTERN,
regex_t regexp;
pcre *pcre_regexp;
pcre_extra *pcre_extra_info;
+ kwset_t kws;
unsigned fixed:1;
unsigned ignore_case:1;
unsigned word_regexp:1;
diff --git a/t/t7008-grep-binary.sh b/t/t7008-grep-binary.sh
index e058d184d1c072bd3078fe17ad41f1026f093201..917a264eea6c6b3593e9f19caadbef715daace20 100755 (executable)
--- a/t/t7008-grep-binary.sh
+++ b/t/t7008-grep-binary.sh
git grep -f f -Fi a
"
-test_expect_failure 'git grep -Fi Y<NUL>x a' "
+test_expect_success 'git grep -Fi Y<NUL>x a' "
printf 'YQx' | q_to_nul >f &&
test_must_fail git grep -f f -Fi a
"
git grep -f f a
"
-test_expect_failure 'git grep y<NUL>x a' "
+test_expect_success 'git grep y<NUL>x a' "
printf 'yQx' | q_to_nul >f &&
test_must_fail git grep -f f a
"