summaryrefslogtreecommitdiff
path: root/regcomp.c
diff options
context:
space:
mode:
authorKarl Williamson <public@khwilliamson.com>2013-09-22 22:36:57 -0600
committerKarl Williamson <public@khwilliamson.com>2013-09-24 11:36:19 -0600
commitcfafade59059f2ad7828285e85e1a3d6629b60b0 (patch)
treec856759dba335e53b3f5f568acc3f562ba70c40b /regcomp.c
parentcdd87c1d4df41f9a54cccff996fa64d291adcee8 (diff)
downloadperl-cfafade59059f2ad7828285e85e1a3d6629b60b0.tar.gz
regcomp.c: Tighten optimizer for /li matches
The synthetic start class (ssc) generated by the regex optimizer frequently has case-sensitive matching enabled, even if nowhere in the pattern is there a /i. This commit causes any pattern that doesn't have /i to not have its ssc contain a /i.
Diffstat (limited to 'regcomp.c')
-rw-r--r--regcomp.c14
1 files changed, 13 insertions, 1 deletions
diff --git a/regcomp.c b/regcomp.c
index efefd0a17b..a77fea11d2 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -144,6 +144,7 @@ struct RExC_state_t {
I32 recurse_count; /* Number of recurse regops */
I32 in_lookbehind;
I32 contains_locale;
+ I32 contains_i;
I32 override_recoding;
I32 in_multi_char_class;
struct reg_code_block *code_blocks; /* positions of literal (?{})
@@ -201,6 +202,7 @@ struct RExC_state_t {
#define RExC_recurse_count (pRExC_state->recurse_count)
#define RExC_in_lookbehind (pRExC_state->in_lookbehind)
#define RExC_contains_locale (pRExC_state->contains_locale)
+#define RExC_contains_i (pRExC_state->contains_i)
#define RExC_override_recoding (pRExC_state->override_recoding)
#define RExC_in_multi_char_class (pRExC_state->in_multi_char_class)
@@ -1118,7 +1120,10 @@ S_ssc_init(pTHX_ const RExC_state_t *pRExC_state, regnode_ssc *ssc)
* necessary. */
if (RExC_contains_locale) {
ANYOF_POSIXL_SETALL(ssc);
- ANYOF_FLAGS(ssc) |= ANYOF_LOCALE_FLAGS;
+ ANYOF_FLAGS(ssc) |= ANYOF_LOCALE|ANYOF_POSIXL;
+ if (RExC_contains_i) {
+ ANYOF_FLAGS(ssc) |= ANYOF_LOC_FOLD;
+ }
}
else {
ANYOF_POSIXL_ZERO(ssc);
@@ -5985,6 +5990,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
RExC_utf8 = RExC_orig_utf8 = (plen == 0 || IN_BYTES) ? 0 : SvUTF8(pat);
RExC_uni_semantics = 0;
RExC_contains_locale = 0;
+ RExC_contains_i = 0;
pRExC_state->runtime_code_qr = NULL;
DEBUG_COMPILE_r({
@@ -6027,6 +6033,9 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
rx_flags = orig_rx_flags;
+ if (rx_flags & PMf_FOLD) {
+ RExC_contains_i = 1;
+ }
if (initial_charset == REGEX_LOCALE_CHARSET) {
RExC_contains_locale = 1;
}
@@ -8917,6 +8926,9 @@ S_parse_lparen_question_flags(pTHX_ RExC_state_t *pRExC_state)
RExC_flags |= posflags;
RExC_flags &= ~negflags;
set_regex_charset(&RExC_flags, cs);
+ if (RExC_flags & RXf_PMf_FOLD) {
+ RExC_contains_i = 1;
+ }
return;
/*NOTREACHED*/
default: