diff options
author | Norihiro Tanaka <noritnk@kcn.ne.jp> | 2020-10-08 18:20:13 +0900 |
---|---|---|
committer | Jim Meyering <meyering@fb.com> | 2020-11-01 11:32:25 -0800 |
commit | ffc6e407e3657598702ba24ab1ba3a6b8ab253ea (patch) | |
tree | 2420e833b8eb851473274a3db52c317c5566578f | |
parent | 3bd06de4f7fd6331a59e7285afadc719ce09f384 (diff) | |
download | grep-ffc6e407e3657598702ba24ab1ba3a6b8ab253ea.tar.gz |
grep: use RE_NO_SUB when calling regex solely to check syntax
* src/dfasearch.c (regex_compile): New parameter. All callers changed.
(GEAcompile): Move setting syntax for regex into regex_compile()
function. This addresses a performance problem exposed by extreme
regular expressions, as described in https://bugs.gnu.org/43862 .
-rw-r--r-- | src/dfasearch.c | 16 |
1 files changed, 12 insertions, 4 deletions
diff --git a/src/dfasearch.c b/src/dfasearch.c index 812a0dc9..8ede0ec4 100644 --- a/src/dfasearch.c +++ b/src/dfasearch.c @@ -145,7 +145,8 @@ possible_backrefs_in_pattern (char const *keys, ptrdiff_t len, bool bs_safe) static bool regex_compile (struct dfa_comp *dc, char const *p, ptrdiff_t len, - ptrdiff_t pcount, ptrdiff_t lineno, bool syntax_only) + ptrdiff_t pcount, ptrdiff_t lineno, reg_syntax_t syntax_bits, + bool syntax_only) { struct re_pattern_buffer pat0; struct re_pattern_buffer *pat = syntax_only ? &pat0 : &dc->patterns[pcount]; @@ -157,6 +158,11 @@ regex_compile (struct dfa_comp *dc, char const *p, ptrdiff_t len, pat->translate = NULL; + if (syntax_only) + re_set_syntax (syntax_bits | RE_NO_SUB); + else + re_set_syntax (syntax_bits); + char const *err = re_compile_pattern (p, len, pat); if (!err) return true; @@ -189,7 +195,6 @@ GEAcompile (char *pattern, size_t size, reg_syntax_t syntax_bits, if (match_icase) syntax_bits |= RE_ICASE; - re_set_syntax (syntax_bits); int dfaopts = eolbyte ? 0 : DFA_EOL_NUL; dfasyntax (dc->dfa, &localeinfo, syntax_bits, dfaopts); bool bs_safe = !localeinfo.multibyte | localeinfo.using_utf8; @@ -242,7 +247,10 @@ GEAcompile (char *pattern, size_t size, reg_syntax_t syntax_bits, dc->patterns++; } - if (!regex_compile (dc, p, len, dc->pcount, lineno, !backref)) + re_set_syntax (syntax_bits); + + if (!regex_compile (dc, p, len, dc->pcount, lineno, syntax_bits, + !backref)) compilation_failed = true; p = sep + 1; @@ -317,7 +325,7 @@ GEAcompile (char *pattern, size_t size, reg_syntax_t syntax_bits, dc->patterns--; dc->pcount++; - if (!regex_compile (dc, buf, buflen, 0, -1, false)) + if (!regex_compile (dc, buf, buflen, 0, -1, syntax_bits, false)) abort (); } |