summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--regcomp.c38
-rw-r--r--t/re/anyof.t10
2 files changed, 43 insertions, 5 deletions
diff --git a/regcomp.c b/regcomp.c
index 8be6cbe274..0de0afd9be 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -4335,6 +4335,23 @@ S_join_exact(pTHX_ RExC_state_t *pRExC_state, regnode *scan,
}
#endif
}
+
+ if ( STR_LEN(scan) == 1
+ && isALPHA_A(* STRING(scan))
+ && ( OP(scan) == EXACTFAA
+ || ( OP(scan) == EXACTFU
+ && ! HAS_NONLATIN1_SIMPLE_FOLD_CLOSURE(* STRING(scan)))))
+ {
+ U8 mask = ~ ('A' ^ 'a'); /* These differ in just one bit */
+
+ /* Replace a length 1 ASCII fold pair node with an ANYOFM node,
+ * with the mask set to the complement of the bit that differs
+ * between upper and lower case, and the lowest code point of the
+ * pair (which the '&' forces) */
+ OP(scan) = ANYOFM;
+ ARG_SET(scan, *STRING(scan) & mask);
+ FLAGS(scan) = mask;
+ }
}
#ifdef DEBUGGING
@@ -5275,6 +5292,27 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
OP(next) = EXACTFU;
}
+ if ( STR_LEN(next) == 1
+ && isALPHA_A(* STRING(next))
+ && ( OP(next) == EXACTFAA
+ || ( OP(next) == EXACTFU
+ && ! HAS_NONLATIN1_SIMPLE_FOLD_CLOSURE(* STRING(next)))))
+ {
+ /* These differ in just one bit */
+ U8 mask = ~ ('A' ^ 'a');
+
+ assert(isALPHA_A(* STRING(next)));
+
+ /* Then replace it by an ANYOFM node, with
+ * the mask set to the complement of the
+ * bit that differs between upper and lower
+ * case, and the lowest code point of the
+ * pair (which the '&' forces) */
+ OP(next) = ANYOFM;
+ ARG_SET(next, *STRING(next) & mask);
+ FLAGS(next) = mask;
+ }
+
if (flags & SCF_DO_STCLASS) {
mincount = 0;
maxcount = REG_INFTY;
diff --git a/t/re/anyof.t b/t/re/anyof.t
index f08116b9be..ad0a2d9ada 100644
--- a/t/re/anyof.t
+++ b/t/re/anyof.t
@@ -141,13 +141,13 @@ my @tests = (
'(?il)[\x{212A}]' => 'ANYOFL{i}[{utf8 locale}Kk][212A]',
'(?il)(?[\x{212A}])' => 'ANYOFL{utf8-locale-reqd}[Kk][212A]',
- '(?i)b[s]\xe0' => 'EXACTFU <b>', # The s goes into a 2nd node
+ '(?i)b[s]\xe0' => 'ANYOFM[Bb]', # The s goes into a 2nd node
- 'ebcdic_ok_below_this_marker',
+ '[aA]' => 'ANYOFM[Aa]',
+ '[bB]' => 'ANYOFM[Bb]',
+ '[kK]' => 'ANYOFM[Kk]',
- '[aA]' => 'EXACTFAA <a>',
- '[bB]' => 'EXACTFU <b>',
- '[kK]' => 'EXACTFAA <k>',
+ 'ebcdic_ok_below_this_marker',
'(?i:[^:])' => 'NANYOFM[:]',