Fix regex /il and /iaa failures for single element [] class

This was a regression introduced in the v5.17 series. It only affected UTF-8 encoded patterns. Basically, the code here should have corresponded to, and didn't, similar logic located after the defchar: label in this file, which is executed for the general case (not stemming from a single element [bracketed] character class node). We don't fold code points 0-255 under locale, as those aren't known until run time. Similarly, we don't allow folds that cross the 255/256 boundary, as those aren't well-defined; and under /aa we don't allow folds that cross the 127/128 boundary.
author: Karl Williamson <public@khwilliamson.com> 2013-05-08 23:06:17 -0600
committer: Karl Williamson <public@khwilliamson.com> 2013-05-09 10:15:13 -0600
commit: 3345a47950127cf99a197eba4dce9c91f7bc9139 (patch)
tree: b584b8866960a66c9d80ac8dc993144c99cb9d10
parent: fe02ddb7a070fc75fab3f7c2ed77f31b0dc5fc23 (diff)
download: perl-3345a47950127cf99a197eba4dce9c91f7bc9139.tar.gz
2 files changed, 18 insertions, 6 deletions
diff --git a/regcomp.c b/regcomp.c
index de17958146..bc0c0efd86 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -10131,8 +10131,9 @@ S_alloc_maybe_populate_EXACT(pTHX_ RExC_state_t *pRExC_state, regnode *node, I32
      * additionally will populate the node's STRING with <code_point>, if <len>
      * is 0.  In both cases <*flagp> is appropriately set
      *
-     * It knows that under FOLD, UTF characters and the Latin Sharp S must be
-     * folded (the latter only when the rules indicate it can match 'ss') */
+     * It knows that under FOLD, the Latin Sharp S and UTF characters above
+     * 255, must be folded (the former only when the rules indicate it can
+     * match 'ss') */
 
     bool len_passed_in = cBOOL(len != 0);
     U8 character[UTF8_MAXBYTES_CASE+1];
@@ -10141,8 +10142,15 @@ S_alloc_maybe_populate_EXACT(pTHX_ RExC_state_t *pRExC_state, regnode *node, I32
 
     if (! len_passed_in) {
         if (UTF) {
-            if (FOLD) {
-                to_uni_fold(NATIVE_TO_UNI(code_point), character, &len);
+            if (FOLD && (! LOC || code_point > 255)) {
+                _to_uni_fold_flags(NATIVE_TO_UNI(code_point),
+                                   character,
+                                   &len,
+                                   FOLD_FLAGS_FULL | ((LOC)
+                                                     ? FOLD_FLAGS_LOCALE
+                                                     : (ASCII_FOLD_RESTRICTED)
+                                                       ? FOLD_FLAGS_NOMIX_ASCII
+                                                       : 0));
             }
             else {
                 uvchr_to_utf8( character, code_point);
diff --git a/t/re/fold_grind.t b/t/re/fold_grind.t
index 3267336d84..bb45a699ad 100644
--- a/t/re/fold_grind.t
+++ b/t/re/fold_grind.t
@@ -666,6 +666,8 @@ foreach my $test (sort { numerically } keys %tests) {
           foreach my $bracketed (0, 1) {   # Put rhs in [...], or not
             next if $bracketed && @pattern != 1;    # bracketed makes these
                                                     # or's instead of a sequence
+            foreach my $optimize_bracketed (0, 1) {
+                next if $optimize_bracketed && ! $bracketed;
             foreach my $inverted (0,1) {
                 next if $inverted && ! $bracketed;  # inversion only valid in [^...]
                 next if $inverted && @target != 1;  # [perl #89750] multi-char
@@ -687,8 +689,9 @@ foreach my $test (sort { numerically } keys %tests) {
                       $rhs .=  $rhs_char;
 
                       # Add a character to the class, so class doesn't get
-                      # optimized out
-                      $rhs .= '_]' if $bracketed;
+                      # optimized out, unless we are testing that optimization
+                      $rhs .= '_' if $optimize_bracketed;
+                      $rhs .= ']' if $bracketed;
                   }
 
                   # Add one of: no capturing parens
@@ -812,6 +815,7 @@ foreach my $test (sort { numerically } keys %tests) {
               }
             }
           }
+          }
         }
       }
       unless($list_all_tests) {
author	Karl Williamson <public@khwilliamson.com>	2013-05-08 23:06:17 -0600
committer	Karl Williamson <public@khwilliamson.com>	2013-05-09 10:15:13 -0600
commit	3345a47950127cf99a197eba4dce9c91f7bc9139 (patch)
tree	b584b8866960a66c9d80ac8dc993144c99cb9d10
parent	fe02ddb7a070fc75fab3f7c2ed77f31b0dc5fc23 (diff)
download	perl-3345a47950127cf99a197eba4dce9c91f7bc9139.tar.gz