summaryrefslogtreecommitdiff
path: root/t/re
diff options
context:
space:
mode:
authorKarl Williamson <public@khwilliamson.com>2010-10-20 10:20:29 -0600
committerFather Chrysostomos <sprout@cpan.org>2010-10-21 05:56:30 -0700
commit634c83a2672252257e360eb1939b7ec762ef6308 (patch)
treecb4ff7fdfa156a475b0cd9b5b9ace912cbc84318 /t/re
parentd53d27f973b3f4329ad8aa1e1a11554c8e19c3e3 (diff)
downloadperl-634c83a2672252257e360eb1939b7ec762ef6308.tar.gz
regexec.c: utf8 doesn't match non-utf8 self
Some regex patterns don't match a character with itself when the target string is in utf8 and the pattern isn't, and the character is variant under utf8. (This means only Latin1-range characters in the pattern are affected.) The solution is to test for this case and use the utf8 representation of the pattern character for the comparison.
Diffstat (limited to 't/re')
-rw-r--r--t/re/pat.t14
1 files changed, 13 insertions, 1 deletions
diff --git a/t/re/pat.t b/t/re/pat.t
index c007880b8c..46681040e5 100644
--- a/t/re/pat.t
+++ b/t/re/pat.t
@@ -23,7 +23,7 @@ BEGIN {
}
-plan tests => 398; # Update this when adding/deleting tests.
+plan tests => 402; # Update this when adding/deleting tests.
run_tests() unless caller;
@@ -1072,6 +1072,18 @@ sub run_tests {
}
+ { # Some constructs with Latin1 characters cause a utf8 string not to
+ # match itself in non-utf8
+ my $c = "\xc0";
+ my $pattern = my $utf8_pattern = qr/((\xc0)+,?)/;
+ utf8::upgrade($utf8_pattern);
+ ok $c =~ $pattern, "\\xc0 =~ $pattern; Neither pattern nor target utf8";
+ ok $c =~ $utf8_pattern, "\\xc0 =~ $pattern; pattern utf8, target not";
+ utf8::upgrade($c);
+ ok $c =~ $pattern, "\\xc0 =~ $pattern; target utf8, pattern not";
+ ok $c =~ $utf8_pattern, "\\xc0 =~ $pattern; Both target and pattern utf8";
+ }
+
{
# Test that a regex followed by an operator and/or a statement modifier work
# These tests use string-eval so that it reports a clean error when it fails