diff options
-rw-r--r-- | regcomp.c | 38 | ||||
-rw-r--r-- | t/re/anyof.t | 10 |
2 files changed, 43 insertions, 5 deletions
@@ -4335,6 +4335,23 @@ S_join_exact(pTHX_ RExC_state_t *pRExC_state, regnode *scan, } #endif } + + if ( STR_LEN(scan) == 1 + && isALPHA_A(* STRING(scan)) + && ( OP(scan) == EXACTFAA + || ( OP(scan) == EXACTFU + && ! HAS_NONLATIN1_SIMPLE_FOLD_CLOSURE(* STRING(scan))))) + { + U8 mask = ~ ('A' ^ 'a'); /* These differ in just one bit */ + + /* Replace a length 1 ASCII fold pair node with an ANYOFM node, + * with the mask set to the complement of the bit that differs + * between upper and lower case, and the lowest code point of the + * pair (which the '&' forces) */ + OP(scan) = ANYOFM; + ARG_SET(scan, *STRING(scan) & mask); + FLAGS(scan) = mask; + } } #ifdef DEBUGGING @@ -5275,6 +5292,27 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, OP(next) = EXACTFU; } + if ( STR_LEN(next) == 1 + && isALPHA_A(* STRING(next)) + && ( OP(next) == EXACTFAA + || ( OP(next) == EXACTFU + && ! HAS_NONLATIN1_SIMPLE_FOLD_CLOSURE(* STRING(next))))) + { + /* These differ in just one bit */ + U8 mask = ~ ('A' ^ 'a'); + + assert(isALPHA_A(* STRING(next))); + + /* Then replace it by an ANYOFM node, with + * the mask set to the complement of the + * bit that differs between upper and lower + * case, and the lowest code point of the + * pair (which the '&' forces) */ + OP(next) = ANYOFM; + ARG_SET(next, *STRING(next) & mask); + FLAGS(next) = mask; + } + if (flags & SCF_DO_STCLASS) { mincount = 0; maxcount = REG_INFTY; diff --git a/t/re/anyof.t b/t/re/anyof.t index f08116b9be..ad0a2d9ada 100644 --- a/t/re/anyof.t +++ b/t/re/anyof.t @@ -141,13 +141,13 @@ my @tests = ( '(?il)[\x{212A}]' => 'ANYOFL{i}[{utf8 locale}Kk][212A]', '(?il)(?[\x{212A}])' => 'ANYOFL{utf8-locale-reqd}[Kk][212A]', - '(?i)b[s]\xe0' => 'EXACTFU <b>', # The s goes into a 2nd node + '(?i)b[s]\xe0' => 'ANYOFM[Bb]', # The s goes into a 2nd node - 'ebcdic_ok_below_this_marker', + '[aA]' => 'ANYOFM[Aa]', + '[bB]' => 'ANYOFM[Bb]', + '[kK]' => 'ANYOFM[Kk]', - '[aA]' => 'EXACTFAA <a>', - '[bB]' => 'EXACTFU <b>', - '[kK]' => 'EXACTFAA <k>', + 'ebcdic_ok_below_this_marker', '(?i:[^:])' => 'NANYOFM[:]', |