diff options
author | Karl Williamson <public@khwilliamson.com> | 2010-10-20 10:20:29 -0600 |
---|---|---|
committer | Father Chrysostomos <sprout@cpan.org> | 2010-10-21 05:56:30 -0700 |
commit | 634c83a2672252257e360eb1939b7ec762ef6308 (patch) | |
tree | cb4ff7fdfa156a475b0cd9b5b9ace912cbc84318 /t/re | |
parent | d53d27f973b3f4329ad8aa1e1a11554c8e19c3e3 (diff) | |
download | perl-634c83a2672252257e360eb1939b7ec762ef6308.tar.gz |
regexec.c: utf8 doesn't match non-utf8 self
Some regex patterns don't match a character with itself when the target
string is in utf8 and the pattern isn't, and the character is variant
under utf8. (This means only Latin1-range characters in the pattern are
affected.)
The solution is to test for this case and use the utf8 representation of
the pattern character for the comparison.
Diffstat (limited to 't/re')
-rw-r--r-- | t/re/pat.t | 14 |
1 files changed, 13 insertions, 1 deletions
diff --git a/t/re/pat.t b/t/re/pat.t index c007880b8c..46681040e5 100644 --- a/t/re/pat.t +++ b/t/re/pat.t @@ -23,7 +23,7 @@ BEGIN { } -plan tests => 398; # Update this when adding/deleting tests. +plan tests => 402; # Update this when adding/deleting tests. run_tests() unless caller; @@ -1072,6 +1072,18 @@ sub run_tests { } + { # Some constructs with Latin1 characters cause a utf8 string not to + # match itself in non-utf8 + my $c = "\xc0"; + my $pattern = my $utf8_pattern = qr/((\xc0)+,?)/; + utf8::upgrade($utf8_pattern); + ok $c =~ $pattern, "\\xc0 =~ $pattern; Neither pattern nor target utf8"; + ok $c =~ $utf8_pattern, "\\xc0 =~ $pattern; pattern utf8, target not"; + utf8::upgrade($c); + ok $c =~ $pattern, "\\xc0 =~ $pattern; target utf8, pattern not"; + ok $c =~ $utf8_pattern, "\\xc0 =~ $pattern; Both target and pattern utf8"; + } + { # Test that a regex followed by an operator and/or a statement modifier work # These tests use string-eval so that it reports a clean error when it fails |