regcomp.c: /a should handle /\xdf/i same as /u

/a and /u should match identically case-insensitively, but they didn't. Nor was /a being tested because it was thought that they handled things identically, and the tests were already taking too long. So this adds some tests as well.
author: Karl Williamson <public@khwilliamson.com> 2011-03-12 12:37:40 -0700
committer: Karl Williamson <public@khwilliamson.com> 2011-03-12 12:56:19 -0700
commit: 419d89744c528882284e8c1f6db945501a4ca7b6 (patch)
tree: 392c905be1c7e03f2c942a8c340151ffc7c4e5fe
parent: a10707d85bb39630271c7862504e361a1cc722f9 (diff)
download: perl-419d89744c528882284e8c1f6db945501a4ca7b6.tar.gz
2 files changed, 35 insertions, 7 deletions
diff --git a/regcomp.c b/regcomp.c
index 0581217a9b..3f4d6345c5 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -9331,13 +9331,13 @@ S_set_regclass_bit_fold(pTHX_ RExC_state_t *pRExC_state, regnode* node, const U8
 		break;
 	    case LATIN_SMALL_LETTER_SHARP_S:
 
-		/* Under /d and /u, this can match the two chars "ss" */
+		/* Under /a, /d, and /u, this can match the two chars "ss" */
 		if (! MORE_ASCII_RESTRICTED) {
 		    add_alternate(alternate_ptr, (U8 *) "ss", 2);
 
-		    /* And under /u, it can match even if the target is not
-		     * utf8 */
-		    if (UNI_SEMANTICS) {
+		    /* And under /u or /a, it can match even if the target is
+		     * not utf8 */
+		    if (AT_LEAST_UNI_SEMANTICS) {
 			ANYOF_FLAGS(node) |= ANYOF_NONBITMAP_NON_UTF8;
 		    }
 		}
diff --git a/t/re/fold_grind.t b/t/re/fold_grind.t
index 0a848f9616..5861bd2f15 100644
--- a/t/re/fold_grind.t
+++ b/t/re/fold_grind.t
@@ -208,6 +208,9 @@ my $has_tested_ascii_l;
 my $has_tested_above_latin1_d;
 my $has_tested_ascii_d;
 my $has_tested_non_latin1_d;
+my $has_tested_above_latin1_a;
+my $has_tested_ascii_a;
+my $has_tested_non_latin1_a;
 
 # For use by pairs() in generating combinations
 sub prefix {
@@ -223,7 +226,7 @@ sub pairs (@) {
     map { prefix $_[$_], @_[0..$_-1, $_+1..$#_] } 0..$#_
 }
 
-my @charsets = qw(d u aa);
+my @charsets = qw(d u a aa);
 my $current_locale = POSIX::setlocale( &POSIX::LC_ALL, "C") // "";
 push @charsets, 'l' if $current_locale eq 'C';
 
@@ -365,6 +368,30 @@ foreach my $test (sort { numerically } keys %tests) {
             }
           }
         }
+        elsif ($charset eq 'a') {
+          # Similarly for a.  This should match identically to /u, so wasn't
+          # tested at all until a bug was found that was thereby missed.
+          # As a compromise, beyond one test (besides self) each, we  don't
+          # test pairs that are both ascii; or both above latin1, or are
+          # combinations of ascii and above latin1.
+          if (! $target_has_upper_latin1 && ! $pattern_has_upper_latin1) {
+            if ($target_has_ascii && $pattern_has_ascii) {
+              next if defined $has_tested_ascii_a
+                      && $has_tested_ascii_a != $test;
+              $has_tested_ascii_a = $test
+            }
+            elsif (! $target_has_latin1 && ! $pattern_has_latin1) {
+              next if defined $has_tested_above_latin1_a
+                      && $has_tested_above_latin1_a != $test;
+              $has_tested_above_latin1_a = $test;
+            }
+            else {
+              next if defined $has_tested_non_latin1_a
+                      && $has_tested_non_latin1_a != $test;
+              $has_tested_non_latin1_a = $test;
+            }
+          }
+        }
       }
 
       foreach my $utf8_target (0, 1) {    # Both utf8 and not, for
@@ -402,8 +429,9 @@ foreach my $test (sort { numerically } keys %tests) {
           my $todo = ($test == 0xdf
                       && $lhs =~ /DF/
                       && $uni_semantics
-                      && ($charset eq 'u' || $charset eq 'd')
-                      && ! ($charset eq 'u' && (($upgrade_target eq "") != ($upgrade_pattern eq "")))
+                      && ($charset eq 'u' || $charset eq 'a' || $charset eq 'd')
+                      && ! (($charset eq 'u' || $charset eq 'a')
+                            && (($upgrade_target eq "") != ($upgrade_pattern eq "")))
                       && ! ($charset eq 'd' && (! $upgrade_target || ! $upgrade_pattern))
                       );
           my $eval = "my \$c = \"$lhs$rhs\"; my \$p = qr/(?$charset:^($rhs)\\1\$)/i;$upgrade_target$upgrade_pattern \$c $op \$p";
author	Karl Williamson <public@khwilliamson.com>	2011-03-12 12:37:40 -0700
committer	Karl Williamson <public@khwilliamson.com>	2011-03-12 12:56:19 -0700
commit	419d89744c528882284e8c1f6db945501a4ca7b6 (patch)
tree	392c905be1c7e03f2c942a8c340151ffc7c4e5fe
parent	a10707d85bb39630271c7862504e361a1cc722f9 (diff)
download	perl-419d89744c528882284e8c1f6db945501a4ca7b6.tar.gz