summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKarl Williamson <public@khwilliamson.com>2011-03-12 12:37:40 -0700
committerKarl Williamson <public@khwilliamson.com>2011-03-12 12:56:19 -0700
commit419d89744c528882284e8c1f6db945501a4ca7b6 (patch)
tree392c905be1c7e03f2c942a8c340151ffc7c4e5fe
parenta10707d85bb39630271c7862504e361a1cc722f9 (diff)
downloadperl-419d89744c528882284e8c1f6db945501a4ca7b6.tar.gz
regcomp.c: /a should handle /\xdf/i same as /u
/a and /u should match identically case-insensitively, but they didn't. Nor was /a being tested because it was thought that they handled things identically, and the tests were already taking too long. So this adds some tests as well.
-rw-r--r--regcomp.c8
-rw-r--r--t/re/fold_grind.t34
2 files changed, 35 insertions, 7 deletions
diff --git a/regcomp.c b/regcomp.c
index 0581217a9b..3f4d6345c5 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -9331,13 +9331,13 @@ S_set_regclass_bit_fold(pTHX_ RExC_state_t *pRExC_state, regnode* node, const U8
break;
case LATIN_SMALL_LETTER_SHARP_S:
- /* Under /d and /u, this can match the two chars "ss" */
+ /* Under /a, /d, and /u, this can match the two chars "ss" */
if (! MORE_ASCII_RESTRICTED) {
add_alternate(alternate_ptr, (U8 *) "ss", 2);
- /* And under /u, it can match even if the target is not
- * utf8 */
- if (UNI_SEMANTICS) {
+ /* And under /u or /a, it can match even if the target is
+ * not utf8 */
+ if (AT_LEAST_UNI_SEMANTICS) {
ANYOF_FLAGS(node) |= ANYOF_NONBITMAP_NON_UTF8;
}
}
diff --git a/t/re/fold_grind.t b/t/re/fold_grind.t
index 0a848f9616..5861bd2f15 100644
--- a/t/re/fold_grind.t
+++ b/t/re/fold_grind.t
@@ -208,6 +208,9 @@ my $has_tested_ascii_l;
my $has_tested_above_latin1_d;
my $has_tested_ascii_d;
my $has_tested_non_latin1_d;
+my $has_tested_above_latin1_a;
+my $has_tested_ascii_a;
+my $has_tested_non_latin1_a;
# For use by pairs() in generating combinations
sub prefix {
@@ -223,7 +226,7 @@ sub pairs (@) {
map { prefix $_[$_], @_[0..$_-1, $_+1..$#_] } 0..$#_
}
-my @charsets = qw(d u aa);
+my @charsets = qw(d u a aa);
my $current_locale = POSIX::setlocale( &POSIX::LC_ALL, "C") // "";
push @charsets, 'l' if $current_locale eq 'C';
@@ -365,6 +368,30 @@ foreach my $test (sort { numerically } keys %tests) {
}
}
}
+ elsif ($charset eq 'a') {
+ # Similarly for a. This should match identically to /u, so wasn't
+ # tested at all until a bug was found that was thereby missed.
+ # As a compromise, beyond one test (besides self) each, we don't
+ # test pairs that are both ascii; or both above latin1, or are
+ # combinations of ascii and above latin1.
+ if (! $target_has_upper_latin1 && ! $pattern_has_upper_latin1) {
+ if ($target_has_ascii && $pattern_has_ascii) {
+ next if defined $has_tested_ascii_a
+ && $has_tested_ascii_a != $test;
+ $has_tested_ascii_a = $test
+ }
+ elsif (! $target_has_latin1 && ! $pattern_has_latin1) {
+ next if defined $has_tested_above_latin1_a
+ && $has_tested_above_latin1_a != $test;
+ $has_tested_above_latin1_a = $test;
+ }
+ else {
+ next if defined $has_tested_non_latin1_a
+ && $has_tested_non_latin1_a != $test;
+ $has_tested_non_latin1_a = $test;
+ }
+ }
+ }
}
foreach my $utf8_target (0, 1) { # Both utf8 and not, for
@@ -402,8 +429,9 @@ foreach my $test (sort { numerically } keys %tests) {
my $todo = ($test == 0xdf
&& $lhs =~ /DF/
&& $uni_semantics
- && ($charset eq 'u' || $charset eq 'd')
- && ! ($charset eq 'u' && (($upgrade_target eq "") != ($upgrade_pattern eq "")))
+ && ($charset eq 'u' || $charset eq 'a' || $charset eq 'd')
+ && ! (($charset eq 'u' || $charset eq 'a')
+ && (($upgrade_target eq "") != ($upgrade_pattern eq "")))
&& ! ($charset eq 'd' && (! $upgrade_target || ! $upgrade_pattern))
);
my $eval = "my \$c = \"$lhs$rhs\"; my \$p = qr/(?$charset:^($rhs)\\1\$)/i;$upgrade_target$upgrade_pattern \$c $op \$p";