summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/git-grep.txt8
-rw-r--r--Makefile16
-rw-r--r--builtin/grep.c52
-rw-r--r--config.mak.in1
-rw-r--r--configure.ac40
-rwxr-xr-xcontrib/completion/git-completion.bash3
-rw-r--r--grep.c125
-rw-r--r--grep.h9
-rw-r--r--t/README5
-rwxr-xr-xt/t7810-grep.sh117
-rw-r--r--t/test-lib.sh1
11 files changed, 347 insertions, 30 deletions
diff --git a/Documentation/git-grep.txt b/Documentation/git-grep.txt
index d7523b3e45..e150c77cff 100644
--- a/Documentation/git-grep.txt
+++ b/Documentation/git-grep.txt
@@ -12,7 +12,8 @@ SYNOPSIS
'git grep' [-a | --text] [-I] [-i | --ignore-case] [-w | --word-regexp]
[-v | --invert-match] [-h|-H] [--full-name]
[-E | --extended-regexp] [-G | --basic-regexp]
- [-F | --fixed-strings] [-n]
+ [-P | --perl-regexp]
+ [-F | --fixed-strings] [-n | --line-number]
[-l | --files-with-matches] [-L | --files-without-match]
[(-O | --open-files-in-pager) [<pager>]]
[-z | --null]
@@ -97,6 +98,11 @@ OPTIONS
Use POSIX extended/basic regexp for patterns. Default
is to use basic regexp.
+-P::
+--perl-regexp::
+ Use Perl-compatible regexp for patterns. Requires libpcre to be
+ compiled in.
+
-F::
--fixed-strings::
Use fixed strings for patterns (don't interpret pattern
diff --git a/Makefile b/Makefile
index edd78419ff..e40ac0c7f5 100644
--- a/Makefile
+++ b/Makefile
@@ -24,6 +24,12 @@ all::
# Define NO_OPENSSL environment variable if you do not have OpenSSL.
# This also implies BLK_SHA1.
#
+# Define USE_LIBPCRE if you have and want to use libpcre. git-grep will be
+# able to use Perl-compatible regular expressions.
+#
+# Define LIBPCREDIR=/foo/bar if your libpcre header and library files are in
+# /foo/bar/include and /foo/bar/lib directories.
+#
# Define NO_CURL if you do not have libcurl installed. git-http-pull and
# git-http-push are not built, and you cannot use http:// and https://
# transports.
@@ -1258,6 +1264,15 @@ ifdef NO_LIBGEN_H
COMPAT_OBJS += compat/basename.o
endif
+ifdef USE_LIBPCRE
+ BASIC_CFLAGS += -DUSE_LIBPCRE
+ ifdef LIBPCREDIR
+ BASIC_CFLAGS += -I$(LIBPCREDIR)/include
+ EXTLIBS += -L$(LIBPCREDIR)/$(lib) $(CC_LD_DYNPATH)$(LIBPCREDIR)/$(lib)
+ endif
+ EXTLIBS += -lpcre
+endif
+
ifdef NO_CURL
BASIC_CFLAGS += -DNO_CURL
REMOTE_CURL_PRIMARY =
@@ -2089,6 +2104,7 @@ GIT-BUILD-OPTIONS: FORCE
@echo PYTHON_PATH=\''$(subst ','\'',$(PYTHON_PATH_SQ))'\' >>$@
@echo TAR=\''$(subst ','\'',$(subst ','\'',$(TAR)))'\' >>$@
@echo NO_CURL=\''$(subst ','\'',$(subst ','\'',$(NO_CURL)))'\' >>$@
+ @echo USE_LIBPCRE=\''$(subst ','\'',$(subst ','\'',$(USE_LIBPCRE)))'\' >>$@
@echo NO_PERL=\''$(subst ','\'',$(subst ','\'',$(NO_PERL)))'\' >>$@
@echo NO_PYTHON=\''$(subst ','\'',$(subst ','\'',$(NO_PYTHON)))'\' >>$@
ifdef GIT_TEST_CMP
diff --git a/builtin/grep.c b/builtin/grep.c
index 931eee0d75..871afaa3c7 100644
--- a/builtin/grep.c
+++ b/builtin/grep.c
@@ -753,6 +753,15 @@ int cmd_grep(int argc, const char **argv, const char *prefix)
int i;
int dummy;
int use_index = 1;
+ enum {
+ pattern_type_unspecified = 0,
+ pattern_type_bre,
+ pattern_type_ere,
+ pattern_type_fixed,
+ pattern_type_pcre,
+ };
+ int pattern_type = pattern_type_unspecified;
+
struct option options[] = {
OPT_BOOLEAN(0, "cached", &cached,
"search in index instead of in the work tree"),
@@ -774,13 +783,18 @@ int cmd_grep(int argc, const char **argv, const char *prefix)
"descend at most <depth> levels", PARSE_OPT_NONEG,
NULL, 1 },
OPT_GROUP(""),
- OPT_BIT('E', "extended-regexp", &opt.regflags,
- "use extended POSIX regular expressions", REG_EXTENDED),
- OPT_NEGBIT('G', "basic-regexp", &opt.regflags,
- "use basic POSIX regular expressions (default)",
- REG_EXTENDED),
- OPT_BOOLEAN('F', "fixed-strings", &opt.fixed,
- "interpret patterns as fixed strings"),
+ OPT_SET_INT('E', "extended-regexp", &pattern_type,
+ "use extended POSIX regular expressions",
+ pattern_type_ere),
+ OPT_SET_INT('G', "basic-regexp", &pattern_type,
+ "use basic POSIX regular expressions (default)",
+ pattern_type_bre),
+ OPT_SET_INT('F', "fixed-strings", &pattern_type,
+ "interpret patterns as fixed strings",
+ pattern_type_fixed),
+ OPT_SET_INT('P', "perl-regexp", &pattern_type,
+ "use Perl-compatible regular expressions",
+ pattern_type_pcre),
OPT_GROUP(""),
OPT_BOOLEAN('n', "line-number", &opt.linenum, "show line numbers"),
OPT_NEGBIT('h', NULL, &opt.pathname, "don't show filenames", 1),
@@ -886,6 +900,28 @@ int cmd_grep(int argc, const char **argv, const char *prefix)
PARSE_OPT_KEEP_DASHDASH |
PARSE_OPT_STOP_AT_NON_OPTION |
PARSE_OPT_NO_INTERNAL_HELP);
+ switch (pattern_type) {
+ case pattern_type_fixed:
+ opt.fixed = 1;
+ opt.pcre = 0;
+ break;
+ case pattern_type_bre:
+ opt.fixed = 0;
+ opt.pcre = 0;
+ opt.regflags &= ~REG_EXTENDED;
+ break;
+ case pattern_type_ere:
+ opt.fixed = 0;
+ opt.pcre = 0;
+ opt.regflags |= REG_EXTENDED;
+ break;
+ case pattern_type_pcre:
+ opt.fixed = 0;
+ opt.pcre = 1;
+ break;
+ default:
+ break; /* nothing */
+ }
if (use_index && !startup_info->have_repository)
/* die the same way as if we did it at the beginning */
@@ -925,8 +961,6 @@ int cmd_grep(int argc, const char **argv, const char *prefix)
die(_("no pattern given."));
if (!opt.fixed && opt.ignore_case)
opt.regflags |= REG_ICASE;
- if ((opt.regflags != REG_NEWLINE) && opt.fixed)
- die(_("cannot mix --fixed-strings and regexp"));
#ifndef NO_PTHREADS
if (online_cpus() == 1 || !grep_threads_ok(&opt))
diff --git a/config.mak.in b/config.mak.in
index e378534cbd..f30130b17a 100644
--- a/config.mak.in
+++ b/config.mak.in
@@ -61,6 +61,7 @@ NO_INET_PTON=@NO_INET_PTON@
NO_ICONV=@NO_ICONV@
OLD_ICONV=@OLD_ICONV@
NO_REGEX=@NO_REGEX@
+USE_LIBPCRE=@USE_LIBPCRE@
NO_DEFLATE_BOUND=@NO_DEFLATE_BOUND@
INLINE=@INLINE@
SOCKLEN_T=@SOCKLEN_T@
diff --git a/configure.ac b/configure.ac
index fafd81557c..048a1d4972 100644
--- a/configure.ac
+++ b/configure.ac
@@ -220,6 +220,27 @@ AS_HELP_STRING([--with-openssl],[use OpenSSL library (default is YES)])
AS_HELP_STRING([], [ARG can be prefix for openssl library and headers]),\
GIT_PARSE_WITH(openssl))
#
+# Define USE_LIBPCRE if you have and want to use libpcre. git-grep will be
+# able to use Perl-compatible regular expressions.
+#
+# Define LIBPCREDIR=/foo/bar if your libpcre header and library files are in
+# /foo/bar/include and /foo/bar/lib directories.
+#
+AC_ARG_WITH(libpcre,
+AS_HELP_STRING([--with-libpcre],[support Perl-compatible regexes (default is NO)])
+AS_HELP_STRING([], [ARG can be also prefix for libpcre library and headers]),
+if test "$withval" = "no"; then \
+ USE_LIBPCRE=; \
+elif test "$withval" = "yes"; then \
+ USE_LIBPCRE=YesPlease; \
+else
+ USE_LIBPCRE=YesPlease; \
+ LIBPCREDIR=$withval; \
+ AC_MSG_NOTICE([Setting LIBPCREDIR to $withval]); \
+ GIT_CONF_APPEND_LINE(LIBPCREDIR=$withval); \
+fi \
+)
+#
# Define NO_CURL if you do not have curl installed. git-http-pull and
# git-http-push are not built, and you cannot use http:// and https://
# transports.
@@ -435,6 +456,25 @@ AC_SUBST(NEEDS_SSL_WITH_CRYPTO)
AC_SUBST(NO_OPENSSL)
#
+# Define USE_LIBPCRE if you have and want to use libpcre. git-grep will be
+# able to use Perl-compatible regular expressions.
+#
+
+if test -n "$USE_LIBPCRE"; then
+
+GIT_STASH_FLAGS($LIBPCREDIR)
+
+AC_CHECK_LIB([pcre], [pcre_version],
+[USE_LIBPCRE=YesPlease],
+[USE_LIBPCRE=])
+
+GIT_UNSTASH_FLAGS($LIBPCREDIR)
+
+AC_SUBST(USE_LIBPCRE)
+
+fi
+
+#
# Define NO_CURL if you do not have libcurl installed. git-http-pull and
# git-http-push are not built, and you cannot use http:// and https://
# transports.
diff --git a/contrib/completion/git-completion.bash b/contrib/completion/git-completion.bash
index bb8d7d0878..b36290fa60 100755
--- a/contrib/completion/git-completion.bash
+++ b/contrib/completion/git-completion.bash
@@ -1441,8 +1441,9 @@ _git_grep ()
__gitcomp "
--cached
--text --ignore-case --word-regexp --invert-match
- --full-name
+ --full-name --line-number
--extended-regexp --basic-regexp --fixed-strings
+ --perl-regexp
--files-with-matches --name-only
--files-without-match
--max-depth
diff --git a/grep.c b/grep.c
index 63c4280cac..d03d9e24c2 100644
--- a/grep.c
+++ b/grep.c
@@ -59,6 +59,84 @@ struct grep_opt *grep_opt_dup(const struct grep_opt *opt)
return ret;
}
+static NORETURN void compile_regexp_failed(const struct grep_pat *p,
+ const char *error)
+{
+ char where[1024];
+
+ if (p->no)
+ sprintf(where, "In '%s' at %d, ", p->origin, p->no);
+ else if (p->origin)
+ sprintf(where, "%s, ", p->origin);
+ else
+ where[0] = 0;
+
+ die("%s'%s': %s", where, p->pattern, error);
+}
+
+#ifdef USE_LIBPCRE
+static void compile_pcre_regexp(struct grep_pat *p, const struct grep_opt *opt)
+{
+ const char *error;
+ int erroffset;
+ int options = 0;
+
+ if (opt->ignore_case)
+ options |= PCRE_CASELESS;
+
+ p->pcre_regexp = pcre_compile(p->pattern, options, &error, &erroffset,
+ NULL);
+ if (!p->pcre_regexp)
+ compile_regexp_failed(p, error);
+
+ p->pcre_extra_info = pcre_study(p->pcre_regexp, 0, &error);
+ if (!p->pcre_extra_info && error)
+ die("%s", error);
+}
+
+static int pcrematch(struct grep_pat *p, const char *line, const char *eol,
+ regmatch_t *match, int eflags)
+{
+ int ovector[30], ret, flags = 0;
+
+ if (eflags & REG_NOTBOL)
+ flags |= PCRE_NOTBOL;
+
+ ret = pcre_exec(p->pcre_regexp, p->pcre_extra_info, line, eol - line,
+ 0, flags, ovector, ARRAY_SIZE(ovector));
+ if (ret < 0 && ret != PCRE_ERROR_NOMATCH)
+ die("pcre_exec failed with error code %d", ret);
+ if (ret > 0) {
+ ret = 0;
+ match->rm_so = ovector[0];
+ match->rm_eo = ovector[1];
+ }
+
+ return ret;
+}
+
+static void free_pcre_regexp(struct grep_pat *p)
+{
+ pcre_free(p->pcre_regexp);
+ pcre_free(p->pcre_extra_info);
+}
+#else /* !USE_LIBPCRE */
+static void compile_pcre_regexp(struct grep_pat *p, const struct grep_opt *opt)
+{
+ die("cannot use Perl-compatible regexes when not compiled with USE_LIBPCRE");
+}
+
+static int pcrematch(struct grep_pat *p, const char *line, const char *eol,
+ regmatch_t *match, int eflags)
+{
+ return 1;
+}
+
+static void free_pcre_regexp(struct grep_pat *p)
+{
+}
+#endif /* !USE_LIBPCRE */
+
static void compile_regexp(struct grep_pat *p, struct grep_opt *opt)
{
int err;
@@ -70,20 +148,17 @@ static void compile_regexp(struct grep_pat *p, struct grep_opt *opt)
if (p->fixed)
return;
+ if (opt->pcre) {
+ compile_pcre_regexp(p, opt);
+ return;
+ }
+
err = regcomp(&p->regexp, p->pattern, opt->regflags);
if (err) {
char errbuf[1024];
- char where[1024];
- if (p->no)
- sprintf(where, "In '%s' at %d, ",
- p->origin, p->no);
- else if (p->origin)
- sprintf(where, "%s, ", p->origin);
- else
- where[0] = 0;
regerror(err, &p->regexp, errbuf, 1024);
regfree(&p->regexp);
- die("%s'%s': %s", where, p->pattern, errbuf);
+ compile_regexp_failed(p, errbuf);
}
}
@@ -320,7 +395,10 @@ void free_grep_patterns(struct grep_opt *opt)
case GREP_PATTERN: /* atom */
case GREP_PATTERN_HEAD:
case GREP_PATTERN_BODY:
- regfree(&p->regexp);
+ if (p->pcre_regexp)
+ free_pcre_regexp(p);
+ else
+ regfree(&p->regexp);
break;
default:
break;
@@ -412,6 +490,21 @@ static int regmatch(const regex_t *preg, char *line, char *eol,
return regexec(preg, line, 1, match, eflags);
}
+static int patmatch(struct grep_pat *p, char *line, char *eol,
+ regmatch_t *match, int eflags)
+{
+ int hit;
+
+ if (p->fixed)
+ hit = !fixmatch(p, line, eol, match);
+ else if (p->pcre_regexp)
+ hit = !pcrematch(p, line, eol, match, eflags);
+ else
+ hit = !regmatch(&p->regexp, line, eol, match, eflags);
+
+ return hit;
+}
+
static int strip_timestamp(char *bol, char **eol_p)
{
char *eol = *eol_p;
@@ -461,10 +554,7 @@ static int match_one_pattern(struct grep_pat *p, char *bol, char *eol,
}
again:
- if (p->fixed)
- hit = !fixmatch(p, bol, eol, pmatch);
- else
- hit = !regmatch(&p->regexp, bol, eol, pmatch, eflags);
+ hit = patmatch(p, bol, eol, pmatch, eflags);
if (hit && p->word_regexp) {
if ((pmatch[0].rm_so < 0) ||
@@ -791,10 +881,7 @@ static int look_ahead(struct grep_opt *opt,
int hit;
regmatch_t m;
- if (p->fixed)
- hit = !fixmatch(p, bol, bol + *left_p, &m);
- else
- hit = !regmatch(&p->regexp, bol, bol + *left_p, &m, 0);
+ hit = patmatch(p, bol, bol + *left_p, &m, 0);
if (!hit || m.rm_so < 0 || m.rm_eo < 0)
continue;
if (earliest < 0 || m.rm_so < earliest)
@@ -891,7 +978,7 @@ static int grep_buffer_1(struct grep_opt *opt, const char *name,
int hit;
/*
- * look_ahead() skips quicly to the line that possibly
+ * look_ahead() skips quickly to the line that possibly
* has the next hit; don't call it if we need to do
* something more than just skipping the current line
* in response to an unmatch for the current line. E.g.
diff --git a/grep.h b/grep.h
index 06621fe663..cd055cdfa8 100644
--- a/grep.h
+++ b/grep.h
@@ -1,6 +1,12 @@
#ifndef GREP_H
#define GREP_H
#include "color.h"
+#ifdef USE_LIBPCRE
+#include <pcre.h>
+#else
+typedef int pcre;
+typedef int pcre_extra;
+#endif
enum grep_pat_token {
GREP_PATTERN,
@@ -33,6 +39,8 @@ struct grep_pat {
size_t patternlen;
enum grep_header_field field;
regex_t regexp;
+ pcre *pcre_regexp;
+ pcre_extra *pcre_extra_info;
unsigned fixed:1;
unsigned ignore_case:1;
unsigned word_regexp:1;
@@ -83,6 +91,7 @@ struct grep_opt {
#define GREP_BINARY_TEXT 2
int binary;
int extended;
+ int pcre;
int relative;
int pathname;
int null_following_name;
diff --git a/t/README b/t/README
index cad36dd750..c85abaffb3 100644
--- a/t/README
+++ b/t/README
@@ -588,6 +588,11 @@ use these, and "test_set_prereq" for how to define your own.
Test is not run by root user, and an attempt to write to an
unwritable file is expected to fail correctly.
+ - LIBPCRE
+
+ Git was compiled with USE_LIBPCRE=YesPlease. Wrap any tests
+ that use git-grep --perl-regexp or git-grep -P in these.
+
Tips for Writing Tests
----------------------
diff --git a/t/t7810-grep.sh b/t/t7810-grep.sh
index 8184c264cf..69bd576d1c 100755
--- a/t/t7810-grep.sh
+++ b/t/t7810-grep.sh
@@ -26,6 +26,17 @@ test_expect_success setup '
echo foo mmap bar_mmap
echo foo_mmap bar mmap baz
} >file &&
+ {
+ echo Hello world
+ echo HeLLo world
+ echo Hello_world
+ echo HeLLo_world
+ } >hello_world &&
+ {
+ echo "a+b*c"
+ echo "a+bc"
+ echo "abc"
+ } >ab &&
echo vvv >v &&
echo ww w >w &&
echo x x xx x >x &&
@@ -221,7 +232,17 @@ do
git grep --max-depth 0 -n -e vvv $H -- t . >actual &&
test_cmp expected actual
'
+ test_expect_success "grep $L with grep.extendedRegexp=false" '
+ echo "ab:a+bc" >expected &&
+ git -c grep.extendedRegexp=false grep "a+b*c" ab >actual &&
+ test_cmp expected actual
+ '
+ test_expect_success "grep $L with grep.extendedRegexp=true" '
+ echo "ab:abc" >expected &&
+ git -c grep.extendedRegexp=true grep "a+b*c" ab >actual &&
+ test_cmp expected actual
+ '
done
cat >expected <<EOF
@@ -599,4 +620,100 @@ test_expect_success 'grep -e -- -- path' '
test_cmp expected actual
'
+cat >expected <<EOF
+hello.c:int main(int argc, const char **argv)
+hello.c: printf("Hello world.\n");
+EOF
+
+test_expect_success LIBPCRE 'grep --perl-regexp pattern' '
+ git grep --perl-regexp "\p{Ps}.*?\p{Pe}" hello.c >actual &&
+ test_cmp expected actual
+'
+
+test_expect_success LIBPCRE 'grep -P pattern' '
+ git grep -P "\p{Ps}.*?\p{Pe}" hello.c >actual &&
+ test_cmp expected actual
+'
+
+test_expect_success 'grep pattern with grep.extendedRegexp=true' '
+ >empty &&
+ test_must_fail git -c grep.extendedregexp=true \
+ grep "\p{Ps}.*?\p{Pe}" hello.c >actual &&
+ test_cmp empty actual
+'
+
+test_expect_success LIBPCRE 'grep -P pattern with grep.extendedRegexp=true' '
+ git -c grep.extendedregexp=true \
+ grep -P "\p{Ps}.*?\p{Pe}" hello.c >actual &&
+ test_cmp expected actual
+'
+
+test_expect_success LIBPCRE 'grep -P -v pattern' '
+ {
+ echo "ab:a+b*c"
+ echo "ab:a+bc"
+ } >expected &&
+ git grep -P -v "abc" ab >actual &&
+ test_cmp expected actual
+'
+
+test_expect_success LIBPCRE 'grep -P -i pattern' '
+ {
+ echo "hello.c: printf(\"Hello world.\n\");"
+ } >expected &&
+ git grep -P -i "PRINTF\([^\d]+\)" hello.c >actual &&
+ test_cmp expected actual
+'
+
+test_expect_success LIBPCRE 'grep -P -w pattern' '
+ {
+ echo "hello_world:Hello world"
+ echo "hello_world:HeLLo world"
+ } >expected &&
+ git grep -P -w "He((?i)ll)o" hello_world >actual &&
+ test_cmp expected actual
+'
+
+test_expect_success 'grep -G invalidpattern properly dies ' '
+ test_must_fail git grep -G "a["
+'
+
+test_expect_success 'grep -E invalidpattern properly dies ' '
+ test_must_fail git grep -E "a["
+'
+
+test_expect_success LIBPCRE 'grep -P invalidpattern properly dies ' '
+ test_must_fail git grep -P "a["
+'
+
+test_expect_success 'grep -G -E -F pattern' '
+ echo "ab:a+b*c" >expected &&
+ git grep -G -E -F "a+b*c" ab >actual &&
+ test_cmp expected actual
+'
+
+test_expect_success 'grep -E -F -G pattern' '
+ echo "ab:a+bc" >expected &&
+ git grep -E -F -G "a+b*c" ab >actual &&
+ test_cmp expected actual
+'
+
+test_expect_success 'grep -F -G -E pattern' '
+ echo "ab:abc" >expected &&
+ git grep -F -G -E "a+b*c" ab >actual &&
+ test_cmp expected actual
+'
+
+test_expect_success 'grep -G -F -P -E pattern' '
+ >empty &&
+ test_must_fail git grep -G -F -P -E "a\x{2b}b\x{2a}c" ab >actual &&
+ test_cmp empty actual
+'
+
+test_expect_success LIBPCRE 'grep -G -F -E -P pattern' '
+ echo "ab:a+b*c" >expected &&
+ git grep -G -F -E -P "a\x{2b}b\x{2a}c" ab >actual &&
+ test_cmp expected actual
+'
+
test_done
diff --git a/t/test-lib.sh b/t/test-lib.sh
index b12b993e9a..64390d716d 100644
--- a/t/test-lib.sh
+++ b/t/test-lib.sh
@@ -1071,6 +1071,7 @@ esac
test -z "$NO_PERL" && test_set_prereq PERL
test -z "$NO_PYTHON" && test_set_prereq PYTHON
+test -n "$USE_LIBPCRE" && test_set_prereq LIBPCRE
# Can we rely on git's output in the C locale?
if test -n "$GETTEXT_POISON"