diff options
-rw-r--r-- | Documentation/git-grep.txt | 8 | ||||
-rw-r--r-- | Makefile | 16 | ||||
-rw-r--r-- | builtin/grep.c | 52 | ||||
-rw-r--r-- | config.mak.in | 1 | ||||
-rw-r--r-- | configure.ac | 40 | ||||
-rwxr-xr-x | contrib/completion/git-completion.bash | 3 | ||||
-rw-r--r-- | grep.c | 125 | ||||
-rw-r--r-- | grep.h | 9 | ||||
-rw-r--r-- | t/README | 5 | ||||
-rwxr-xr-x | t/t7810-grep.sh | 117 | ||||
-rw-r--r-- | t/test-lib.sh | 1 |
11 files changed, 347 insertions, 30 deletions
diff --git a/Documentation/git-grep.txt b/Documentation/git-grep.txt index d7523b3e45..e150c77cff 100644 --- a/Documentation/git-grep.txt +++ b/Documentation/git-grep.txt @@ -12,7 +12,8 @@ SYNOPSIS 'git grep' [-a | --text] [-I] [-i | --ignore-case] [-w | --word-regexp] [-v | --invert-match] [-h|-H] [--full-name] [-E | --extended-regexp] [-G | --basic-regexp] - [-F | --fixed-strings] [-n] + [-P | --perl-regexp] + [-F | --fixed-strings] [-n | --line-number] [-l | --files-with-matches] [-L | --files-without-match] [(-O | --open-files-in-pager) [<pager>]] [-z | --null] @@ -97,6 +98,11 @@ OPTIONS Use POSIX extended/basic regexp for patterns. Default is to use basic regexp. +-P:: +--perl-regexp:: + Use Perl-compatible regexp for patterns. Requires libpcre to be + compiled in. + -F:: --fixed-strings:: Use fixed strings for patterns (don't interpret pattern @@ -24,6 +24,12 @@ all:: # Define NO_OPENSSL environment variable if you do not have OpenSSL. # This also implies BLK_SHA1. # +# Define USE_LIBPCRE if you have and want to use libpcre. git-grep will be +# able to use Perl-compatible regular expressions. +# +# Define LIBPCREDIR=/foo/bar if your libpcre header and library files are in +# /foo/bar/include and /foo/bar/lib directories. +# # Define NO_CURL if you do not have libcurl installed. git-http-pull and # git-http-push are not built, and you cannot use http:// and https:// # transports. @@ -1258,6 +1264,15 @@ ifdef NO_LIBGEN_H COMPAT_OBJS += compat/basename.o endif +ifdef USE_LIBPCRE + BASIC_CFLAGS += -DUSE_LIBPCRE + ifdef LIBPCREDIR + BASIC_CFLAGS += -I$(LIBPCREDIR)/include + EXTLIBS += -L$(LIBPCREDIR)/$(lib) $(CC_LD_DYNPATH)$(LIBPCREDIR)/$(lib) + endif + EXTLIBS += -lpcre +endif + ifdef NO_CURL BASIC_CFLAGS += -DNO_CURL REMOTE_CURL_PRIMARY = @@ -2089,6 +2104,7 @@ GIT-BUILD-OPTIONS: FORCE @echo PYTHON_PATH=\''$(subst ','\'',$(PYTHON_PATH_SQ))'\' >>$@ @echo TAR=\''$(subst ','\'',$(subst ','\'',$(TAR)))'\' >>$@ @echo NO_CURL=\''$(subst ','\'',$(subst ','\'',$(NO_CURL)))'\' >>$@ + @echo USE_LIBPCRE=\''$(subst ','\'',$(subst ','\'',$(USE_LIBPCRE)))'\' >>$@ @echo NO_PERL=\''$(subst ','\'',$(subst ','\'',$(NO_PERL)))'\' >>$@ @echo NO_PYTHON=\''$(subst ','\'',$(subst ','\'',$(NO_PYTHON)))'\' >>$@ ifdef GIT_TEST_CMP diff --git a/builtin/grep.c b/builtin/grep.c index 931eee0d75..871afaa3c7 100644 --- a/builtin/grep.c +++ b/builtin/grep.c @@ -753,6 +753,15 @@ int cmd_grep(int argc, const char **argv, const char *prefix) int i; int dummy; int use_index = 1; + enum { + pattern_type_unspecified = 0, + pattern_type_bre, + pattern_type_ere, + pattern_type_fixed, + pattern_type_pcre, + }; + int pattern_type = pattern_type_unspecified; + struct option options[] = { OPT_BOOLEAN(0, "cached", &cached, "search in index instead of in the work tree"), @@ -774,13 +783,18 @@ int cmd_grep(int argc, const char **argv, const char *prefix) "descend at most <depth> levels", PARSE_OPT_NONEG, NULL, 1 }, OPT_GROUP(""), - OPT_BIT('E', "extended-regexp", &opt.regflags, - "use extended POSIX regular expressions", REG_EXTENDED), - OPT_NEGBIT('G', "basic-regexp", &opt.regflags, - "use basic POSIX regular expressions (default)", - REG_EXTENDED), - OPT_BOOLEAN('F', "fixed-strings", &opt.fixed, - "interpret patterns as fixed strings"), + OPT_SET_INT('E', "extended-regexp", &pattern_type, + "use extended POSIX regular expressions", + pattern_type_ere), + OPT_SET_INT('G', "basic-regexp", &pattern_type, + "use basic POSIX regular expressions (default)", + pattern_type_bre), + OPT_SET_INT('F', "fixed-strings", &pattern_type, + "interpret patterns as fixed strings", + pattern_type_fixed), + OPT_SET_INT('P', "perl-regexp", &pattern_type, + "use Perl-compatible regular expressions", + pattern_type_pcre), OPT_GROUP(""), OPT_BOOLEAN('n', "line-number", &opt.linenum, "show line numbers"), OPT_NEGBIT('h', NULL, &opt.pathname, "don't show filenames", 1), @@ -886,6 +900,28 @@ int cmd_grep(int argc, const char **argv, const char *prefix) PARSE_OPT_KEEP_DASHDASH | PARSE_OPT_STOP_AT_NON_OPTION | PARSE_OPT_NO_INTERNAL_HELP); + switch (pattern_type) { + case pattern_type_fixed: + opt.fixed = 1; + opt.pcre = 0; + break; + case pattern_type_bre: + opt.fixed = 0; + opt.pcre = 0; + opt.regflags &= ~REG_EXTENDED; + break; + case pattern_type_ere: + opt.fixed = 0; + opt.pcre = 0; + opt.regflags |= REG_EXTENDED; + break; + case pattern_type_pcre: + opt.fixed = 0; + opt.pcre = 1; + break; + default: + break; /* nothing */ + } if (use_index && !startup_info->have_repository) /* die the same way as if we did it at the beginning */ @@ -925,8 +961,6 @@ int cmd_grep(int argc, const char **argv, const char *prefix) die(_("no pattern given.")); if (!opt.fixed && opt.ignore_case) opt.regflags |= REG_ICASE; - if ((opt.regflags != REG_NEWLINE) && opt.fixed) - die(_("cannot mix --fixed-strings and regexp")); #ifndef NO_PTHREADS if (online_cpus() == 1 || !grep_threads_ok(&opt)) diff --git a/config.mak.in b/config.mak.in index e378534cbd..f30130b17a 100644 --- a/config.mak.in +++ b/config.mak.in @@ -61,6 +61,7 @@ NO_INET_PTON=@NO_INET_PTON@ NO_ICONV=@NO_ICONV@ OLD_ICONV=@OLD_ICONV@ NO_REGEX=@NO_REGEX@ +USE_LIBPCRE=@USE_LIBPCRE@ NO_DEFLATE_BOUND=@NO_DEFLATE_BOUND@ INLINE=@INLINE@ SOCKLEN_T=@SOCKLEN_T@ diff --git a/configure.ac b/configure.ac index fafd81557c..048a1d4972 100644 --- a/configure.ac +++ b/configure.ac @@ -220,6 +220,27 @@ AS_HELP_STRING([--with-openssl],[use OpenSSL library (default is YES)]) AS_HELP_STRING([], [ARG can be prefix for openssl library and headers]),\ GIT_PARSE_WITH(openssl)) # +# Define USE_LIBPCRE if you have and want to use libpcre. git-grep will be +# able to use Perl-compatible regular expressions. +# +# Define LIBPCREDIR=/foo/bar if your libpcre header and library files are in +# /foo/bar/include and /foo/bar/lib directories. +# +AC_ARG_WITH(libpcre, +AS_HELP_STRING([--with-libpcre],[support Perl-compatible regexes (default is NO)]) +AS_HELP_STRING([], [ARG can be also prefix for libpcre library and headers]), +if test "$withval" = "no"; then \ + USE_LIBPCRE=; \ +elif test "$withval" = "yes"; then \ + USE_LIBPCRE=YesPlease; \ +else + USE_LIBPCRE=YesPlease; \ + LIBPCREDIR=$withval; \ + AC_MSG_NOTICE([Setting LIBPCREDIR to $withval]); \ + GIT_CONF_APPEND_LINE(LIBPCREDIR=$withval); \ +fi \ +) +# # Define NO_CURL if you do not have curl installed. git-http-pull and # git-http-push are not built, and you cannot use http:// and https:// # transports. @@ -435,6 +456,25 @@ AC_SUBST(NEEDS_SSL_WITH_CRYPTO) AC_SUBST(NO_OPENSSL) # +# Define USE_LIBPCRE if you have and want to use libpcre. git-grep will be +# able to use Perl-compatible regular expressions. +# + +if test -n "$USE_LIBPCRE"; then + +GIT_STASH_FLAGS($LIBPCREDIR) + +AC_CHECK_LIB([pcre], [pcre_version], +[USE_LIBPCRE=YesPlease], +[USE_LIBPCRE=]) + +GIT_UNSTASH_FLAGS($LIBPCREDIR) + +AC_SUBST(USE_LIBPCRE) + +fi + +# # Define NO_CURL if you do not have libcurl installed. git-http-pull and # git-http-push are not built, and you cannot use http:// and https:// # transports. diff --git a/contrib/completion/git-completion.bash b/contrib/completion/git-completion.bash index bb8d7d0878..b36290fa60 100755 --- a/contrib/completion/git-completion.bash +++ b/contrib/completion/git-completion.bash @@ -1441,8 +1441,9 @@ _git_grep () __gitcomp " --cached --text --ignore-case --word-regexp --invert-match - --full-name + --full-name --line-number --extended-regexp --basic-regexp --fixed-strings + --perl-regexp --files-with-matches --name-only --files-without-match --max-depth @@ -59,6 +59,84 @@ struct grep_opt *grep_opt_dup(const struct grep_opt *opt) return ret; } +static NORETURN void compile_regexp_failed(const struct grep_pat *p, + const char *error) +{ + char where[1024]; + + if (p->no) + sprintf(where, "In '%s' at %d, ", p->origin, p->no); + else if (p->origin) + sprintf(where, "%s, ", p->origin); + else + where[0] = 0; + + die("%s'%s': %s", where, p->pattern, error); +} + +#ifdef USE_LIBPCRE +static void compile_pcre_regexp(struct grep_pat *p, const struct grep_opt *opt) +{ + const char *error; + int erroffset; + int options = 0; + + if (opt->ignore_case) + options |= PCRE_CASELESS; + + p->pcre_regexp = pcre_compile(p->pattern, options, &error, &erroffset, + NULL); + if (!p->pcre_regexp) + compile_regexp_failed(p, error); + + p->pcre_extra_info = pcre_study(p->pcre_regexp, 0, &error); + if (!p->pcre_extra_info && error) + die("%s", error); +} + +static int pcrematch(struct grep_pat *p, const char *line, const char *eol, + regmatch_t *match, int eflags) +{ + int ovector[30], ret, flags = 0; + + if (eflags & REG_NOTBOL) + flags |= PCRE_NOTBOL; + + ret = pcre_exec(p->pcre_regexp, p->pcre_extra_info, line, eol - line, + 0, flags, ovector, ARRAY_SIZE(ovector)); + if (ret < 0 && ret != PCRE_ERROR_NOMATCH) + die("pcre_exec failed with error code %d", ret); + if (ret > 0) { + ret = 0; + match->rm_so = ovector[0]; + match->rm_eo = ovector[1]; + } + + return ret; +} + +static void free_pcre_regexp(struct grep_pat *p) +{ + pcre_free(p->pcre_regexp); + pcre_free(p->pcre_extra_info); +} +#else /* !USE_LIBPCRE */ +static void compile_pcre_regexp(struct grep_pat *p, const struct grep_opt *opt) +{ + die("cannot use Perl-compatible regexes when not compiled with USE_LIBPCRE"); +} + +static int pcrematch(struct grep_pat *p, const char *line, const char *eol, + regmatch_t *match, int eflags) +{ + return 1; +} + +static void free_pcre_regexp(struct grep_pat *p) +{ +} +#endif /* !USE_LIBPCRE */ + static void compile_regexp(struct grep_pat *p, struct grep_opt *opt) { int err; @@ -70,20 +148,17 @@ static void compile_regexp(struct grep_pat *p, struct grep_opt *opt) if (p->fixed) return; + if (opt->pcre) { + compile_pcre_regexp(p, opt); + return; + } + err = regcomp(&p->regexp, p->pattern, opt->regflags); if (err) { char errbuf[1024]; - char where[1024]; - if (p->no) - sprintf(where, "In '%s' at %d, ", - p->origin, p->no); - else if (p->origin) - sprintf(where, "%s, ", p->origin); - else - where[0] = 0; regerror(err, &p->regexp, errbuf, 1024); regfree(&p->regexp); - die("%s'%s': %s", where, p->pattern, errbuf); + compile_regexp_failed(p, errbuf); } } @@ -320,7 +395,10 @@ void free_grep_patterns(struct grep_opt *opt) case GREP_PATTERN: /* atom */ case GREP_PATTERN_HEAD: case GREP_PATTERN_BODY: - regfree(&p->regexp); + if (p->pcre_regexp) + free_pcre_regexp(p); + else + regfree(&p->regexp); break; default: break; @@ -412,6 +490,21 @@ static int regmatch(const regex_t *preg, char *line, char *eol, return regexec(preg, line, 1, match, eflags); } +static int patmatch(struct grep_pat *p, char *line, char *eol, + regmatch_t *match, int eflags) +{ + int hit; + + if (p->fixed) + hit = !fixmatch(p, line, eol, match); + else if (p->pcre_regexp) + hit = !pcrematch(p, line, eol, match, eflags); + else + hit = !regmatch(&p->regexp, line, eol, match, eflags); + + return hit; +} + static int strip_timestamp(char *bol, char **eol_p) { char *eol = *eol_p; @@ -461,10 +554,7 @@ static int match_one_pattern(struct grep_pat *p, char *bol, char *eol, } again: - if (p->fixed) - hit = !fixmatch(p, bol, eol, pmatch); - else - hit = !regmatch(&p->regexp, bol, eol, pmatch, eflags); + hit = patmatch(p, bol, eol, pmatch, eflags); if (hit && p->word_regexp) { if ((pmatch[0].rm_so < 0) || @@ -791,10 +881,7 @@ static int look_ahead(struct grep_opt *opt, int hit; regmatch_t m; - if (p->fixed) - hit = !fixmatch(p, bol, bol + *left_p, &m); - else - hit = !regmatch(&p->regexp, bol, bol + *left_p, &m, 0); + hit = patmatch(p, bol, bol + *left_p, &m, 0); if (!hit || m.rm_so < 0 || m.rm_eo < 0) continue; if (earliest < 0 || m.rm_so < earliest) @@ -891,7 +978,7 @@ static int grep_buffer_1(struct grep_opt *opt, const char *name, int hit; /* - * look_ahead() skips quicly to the line that possibly + * look_ahead() skips quickly to the line that possibly * has the next hit; don't call it if we need to do * something more than just skipping the current line * in response to an unmatch for the current line. E.g. @@ -1,6 +1,12 @@ #ifndef GREP_H #define GREP_H #include "color.h" +#ifdef USE_LIBPCRE +#include <pcre.h> +#else +typedef int pcre; +typedef int pcre_extra; +#endif enum grep_pat_token { GREP_PATTERN, @@ -33,6 +39,8 @@ struct grep_pat { size_t patternlen; enum grep_header_field field; regex_t regexp; + pcre *pcre_regexp; + pcre_extra *pcre_extra_info; unsigned fixed:1; unsigned ignore_case:1; unsigned word_regexp:1; @@ -83,6 +91,7 @@ struct grep_opt { #define GREP_BINARY_TEXT 2 int binary; int extended; + int pcre; int relative; int pathname; int null_following_name; @@ -588,6 +588,11 @@ use these, and "test_set_prereq" for how to define your own. Test is not run by root user, and an attempt to write to an unwritable file is expected to fail correctly. + - LIBPCRE + + Git was compiled with USE_LIBPCRE=YesPlease. Wrap any tests + that use git-grep --perl-regexp or git-grep -P in these. + Tips for Writing Tests ---------------------- diff --git a/t/t7810-grep.sh b/t/t7810-grep.sh index 8184c264cf..69bd576d1c 100755 --- a/t/t7810-grep.sh +++ b/t/t7810-grep.sh @@ -26,6 +26,17 @@ test_expect_success setup ' echo foo mmap bar_mmap echo foo_mmap bar mmap baz } >file && + { + echo Hello world + echo HeLLo world + echo Hello_world + echo HeLLo_world + } >hello_world && + { + echo "a+b*c" + echo "a+bc" + echo "abc" + } >ab && echo vvv >v && echo ww w >w && echo x x xx x >x && @@ -221,7 +232,17 @@ do git grep --max-depth 0 -n -e vvv $H -- t . >actual && test_cmp expected actual ' + test_expect_success "grep $L with grep.extendedRegexp=false" ' + echo "ab:a+bc" >expected && + git -c grep.extendedRegexp=false grep "a+b*c" ab >actual && + test_cmp expected actual + ' + test_expect_success "grep $L with grep.extendedRegexp=true" ' + echo "ab:abc" >expected && + git -c grep.extendedRegexp=true grep "a+b*c" ab >actual && + test_cmp expected actual + ' done cat >expected <<EOF @@ -599,4 +620,100 @@ test_expect_success 'grep -e -- -- path' ' test_cmp expected actual ' +cat >expected <<EOF +hello.c:int main(int argc, const char **argv) +hello.c: printf("Hello world.\n"); +EOF + +test_expect_success LIBPCRE 'grep --perl-regexp pattern' ' + git grep --perl-regexp "\p{Ps}.*?\p{Pe}" hello.c >actual && + test_cmp expected actual +' + +test_expect_success LIBPCRE 'grep -P pattern' ' + git grep -P "\p{Ps}.*?\p{Pe}" hello.c >actual && + test_cmp expected actual +' + +test_expect_success 'grep pattern with grep.extendedRegexp=true' ' + >empty && + test_must_fail git -c grep.extendedregexp=true \ + grep "\p{Ps}.*?\p{Pe}" hello.c >actual && + test_cmp empty actual +' + +test_expect_success LIBPCRE 'grep -P pattern with grep.extendedRegexp=true' ' + git -c grep.extendedregexp=true \ + grep -P "\p{Ps}.*?\p{Pe}" hello.c >actual && + test_cmp expected actual +' + +test_expect_success LIBPCRE 'grep -P -v pattern' ' + { + echo "ab:a+b*c" + echo "ab:a+bc" + } >expected && + git grep -P -v "abc" ab >actual && + test_cmp expected actual +' + +test_expect_success LIBPCRE 'grep -P -i pattern' ' + { + echo "hello.c: printf(\"Hello world.\n\");" + } >expected && + git grep -P -i "PRINTF\([^\d]+\)" hello.c >actual && + test_cmp expected actual +' + +test_expect_success LIBPCRE 'grep -P -w pattern' ' + { + echo "hello_world:Hello world" + echo "hello_world:HeLLo world" + } >expected && + git grep -P -w "He((?i)ll)o" hello_world >actual && + test_cmp expected actual +' + +test_expect_success 'grep -G invalidpattern properly dies ' ' + test_must_fail git grep -G "a[" +' + +test_expect_success 'grep -E invalidpattern properly dies ' ' + test_must_fail git grep -E "a[" +' + +test_expect_success LIBPCRE 'grep -P invalidpattern properly dies ' ' + test_must_fail git grep -P "a[" +' + +test_expect_success 'grep -G -E -F pattern' ' + echo "ab:a+b*c" >expected && + git grep -G -E -F "a+b*c" ab >actual && + test_cmp expected actual +' + +test_expect_success 'grep -E -F -G pattern' ' + echo "ab:a+bc" >expected && + git grep -E -F -G "a+b*c" ab >actual && + test_cmp expected actual +' + +test_expect_success 'grep -F -G -E pattern' ' + echo "ab:abc" >expected && + git grep -F -G -E "a+b*c" ab >actual && + test_cmp expected actual +' + +test_expect_success 'grep -G -F -P -E pattern' ' + >empty && + test_must_fail git grep -G -F -P -E "a\x{2b}b\x{2a}c" ab >actual && + test_cmp empty actual +' + +test_expect_success LIBPCRE 'grep -G -F -E -P pattern' ' + echo "ab:a+b*c" >expected && + git grep -G -F -E -P "a\x{2b}b\x{2a}c" ab >actual && + test_cmp expected actual +' + test_done diff --git a/t/test-lib.sh b/t/test-lib.sh index b12b993e9a..64390d716d 100644 --- a/t/test-lib.sh +++ b/t/test-lib.sh @@ -1071,6 +1071,7 @@ esac test -z "$NO_PERL" && test_set_prereq PERL test -z "$NO_PYTHON" && test_set_prereq PYTHON +test -n "$USE_LIBPCRE" && test_set_prereq LIBPCRE # Can we rely on git's output in the C locale? if test -n "$GETTEXT_POISON" |