diff options
author | René Scharfe <rene.scharfe@lsrfire.ath.cx> | 2009-01-10 00:08:40 +0100 |
---|---|---|
committer | Junio C Hamano <gitster@pobox.com> | 2009-01-09 21:33:35 -0800 |
commit | fb62eb7fab97cea880ea7fe4f341a4dfad14ab48 (patch) | |
tree | 281d1b9daeb9a1d0a7ecd86b1e03d686a3394633 /grep.c | |
parent | c123b7c5fb596d93cd015645212c379fc3c381d5 (diff) | |
download | git-fb62eb7fab97cea880ea7fe4f341a4dfad14ab48.tar.gz |
grep -w: forward to next possible position after rejected match
grep -w accepts matches between non-word characters, only. If a match
from regexec() doesn't meet this criteria, grep continues its search
after the first character of that match.
We can be a bit smarter here and skip all positions that follow a word
character first, as they can't match our criteria. This way we can
consume characters quite cheaply and don't need to special-case the
handling of the beginning of a line.
Here's a contrived example command on msysgit (best of five runs):
$ time git grep -w ...... v1.6.1 >/dev/null
real 0m1.611s
user 0m0.000s
sys 0m0.015s
With the patch it's quite a bit faster:
$ time git grep -w ...... v1.6.1 >/dev/null
real 0m1.179s
user 0m0.000s
sys 0m0.015s
More common search patterns will gain a lot less, but it's a nice clean
up anyway.
Signed-off-by: Rene Scharfe <rene.scharfe@lsrfire.ath.cx>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
Diffstat (limited to 'grep.c')
-rw-r--r-- | grep.c | 11 |
1 files changed, 7 insertions, 4 deletions
@@ -294,7 +294,6 @@ static struct { static int match_one_pattern(struct grep_opt *opt, struct grep_pat *p, char *bol, char *eol, enum grep_context ctx) { int hit = 0; - int at_true_bol = 1; int saved_ch = 0; regmatch_t pmatch[10]; @@ -337,7 +336,7 @@ static int match_one_pattern(struct grep_opt *opt, struct grep_pat *p, char *bol * either end of the line, or at word boundary * (i.e. the next char must not be a word char). */ - if ( ((pmatch[0].rm_so == 0 && at_true_bol) || + if ( ((pmatch[0].rm_so == 0) || !word_char(bol[pmatch[0].rm_so-1])) && ((pmatch[0].rm_eo == (eol-bol)) || !word_char(bol[pmatch[0].rm_eo])) ) @@ -349,10 +348,14 @@ static int match_one_pattern(struct grep_opt *opt, struct grep_pat *p, char *bol /* There could be more than one match on the * line, and the first match might not be * strict word match. But later ones could be! + * Forward to the next possible start, i.e. the + * next position following a non-word char. */ bol = pmatch[0].rm_so + bol + 1; - at_true_bol = 0; - goto again; + while (word_char(bol[-1]) && bol < eol) + bol++; + if (bol < eol) + goto again; } } if (p->token == GREP_PATTERN_HEAD && saved_ch) |