summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNorihiro Tanaka <noritnk@kcn.ne.jp>2020-10-08 18:20:13 +0900
committerJim Meyering <meyering@fb.com>2020-11-01 11:32:25 -0800
commitffc6e407e3657598702ba24ab1ba3a6b8ab253ea (patch)
tree2420e833b8eb851473274a3db52c317c5566578f
parent3bd06de4f7fd6331a59e7285afadc719ce09f384 (diff)
downloadgrep-ffc6e407e3657598702ba24ab1ba3a6b8ab253ea.tar.gz
grep: use RE_NO_SUB when calling regex solely to check syntax
* src/dfasearch.c (regex_compile): New parameter. All callers changed. (GEAcompile): Move setting syntax for regex into regex_compile() function. This addresses a performance problem exposed by extreme regular expressions, as described in https://bugs.gnu.org/43862 .
-rw-r--r--src/dfasearch.c16
1 files changed, 12 insertions, 4 deletions
diff --git a/src/dfasearch.c b/src/dfasearch.c
index 812a0dc9..8ede0ec4 100644
--- a/src/dfasearch.c
+++ b/src/dfasearch.c
@@ -145,7 +145,8 @@ possible_backrefs_in_pattern (char const *keys, ptrdiff_t len, bool bs_safe)
static bool
regex_compile (struct dfa_comp *dc, char const *p, ptrdiff_t len,
- ptrdiff_t pcount, ptrdiff_t lineno, bool syntax_only)
+ ptrdiff_t pcount, ptrdiff_t lineno, reg_syntax_t syntax_bits,
+ bool syntax_only)
{
struct re_pattern_buffer pat0;
struct re_pattern_buffer *pat = syntax_only ? &pat0 : &dc->patterns[pcount];
@@ -157,6 +158,11 @@ regex_compile (struct dfa_comp *dc, char const *p, ptrdiff_t len,
pat->translate = NULL;
+ if (syntax_only)
+ re_set_syntax (syntax_bits | RE_NO_SUB);
+ else
+ re_set_syntax (syntax_bits);
+
char const *err = re_compile_pattern (p, len, pat);
if (!err)
return true;
@@ -189,7 +195,6 @@ GEAcompile (char *pattern, size_t size, reg_syntax_t syntax_bits,
if (match_icase)
syntax_bits |= RE_ICASE;
- re_set_syntax (syntax_bits);
int dfaopts = eolbyte ? 0 : DFA_EOL_NUL;
dfasyntax (dc->dfa, &localeinfo, syntax_bits, dfaopts);
bool bs_safe = !localeinfo.multibyte | localeinfo.using_utf8;
@@ -242,7 +247,10 @@ GEAcompile (char *pattern, size_t size, reg_syntax_t syntax_bits,
dc->patterns++;
}
- if (!regex_compile (dc, p, len, dc->pcount, lineno, !backref))
+ re_set_syntax (syntax_bits);
+
+ if (!regex_compile (dc, p, len, dc->pcount, lineno, syntax_bits,
+ !backref))
compilation_failed = true;
p = sep + 1;
@@ -317,7 +325,7 @@ GEAcompile (char *pattern, size_t size, reg_syntax_t syntax_bits,
dc->patterns--;
dc->pcount++;
- if (!regex_compile (dc, buf, buflen, 0, -1, false))
+ if (!regex_compile (dc, buf, buflen, 0, -1, syntax_bits, false))
abort ();
}