diff options
author | Karl Williamson <public@khwilliamson.com> | 2011-09-27 19:36:35 -0600 |
---|---|---|
committer | Karl Williamson <public@khwilliamson.com> | 2011-10-01 09:15:32 -0600 |
commit | 0bda3001dd6310abfca950134ed78f6192d3d8a7 (patch) | |
tree | 44fa1a99c82fae80eda8f6b1f12f9ea86b733f11 | |
parent | 9ac792f4c8d84f4e7dc8fa8d1eb254f0389b3b38 (diff) | |
download | perl-0bda3001dd6310abfca950134ed78f6192d3d8a7.tar.gz |
utf8.c: Don't invert beyond-Unicode code points
The Unicode properties are defined only on Unicode code points. In the
past, this meant all property matches would fail for non-Unicode code
points. However, starting with 5.15.1 some properties do succeed. This
restores the previous behavior.
-rw-r--r-- | t/re/pat.t | 16 | ||||
-rw-r--r-- | utf8.c | 14 |
2 files changed, 29 insertions, 1 deletions
diff --git a/t/re/pat.t b/t/re/pat.t index 4ef9663b5e..988c23e01d 100644 --- a/t/re/pat.t +++ b/t/re/pat.t @@ -21,7 +21,7 @@ BEGIN { require './test.pl'; } -plan tests => 451; # Update this when adding/deleting tests. +plan tests => 455; # Update this when adding/deleting tests. run_tests() unless caller; @@ -1167,6 +1167,20 @@ sub run_tests { is($got,$want,'RT #84294: check that "ab" =~ /((\w+)(?{ push @got, $2 })){2}/ leaves @got in the correct state'); } + { + # Suppress warnings, as the non-unicode one comes out even if turn off + # warnings here (because the execution is done in another scope). + local $SIG{__WARN__} = sub {}; + my $str = "\x{110000}"; + + # No non-unicode code points match any Unicode property, even inverse + # ones + unlike($str, qr/\p{ASCII_Hex_Digit=True}/, "Non-Unicode doesn't match \\p{}"); + unlike($str, qr/\p{ASCII_Hex_Digit=False}/, "Non-Unicode doesn't match \\p{}"); + like($str, qr/\P{ASCII_Hex_Digit=True}/, "Non-Unicode matches \\P{}"); + like($str, qr/\P{ASCII_Hex_Digit=False}/, "Non-Unicode matches \\P{}"); + } + } # End of sub run_tests 1; @@ -2476,11 +2476,25 @@ S_swash_get(pTHX_ SV* swash, UV start, UV span) /* Invert if the data says it should be */ if (invert_it_svp && SvUV(*invert_it_svp)) { + + /* Unicode properties should come with all bits above PERL_UNICODE_MAX + * be 0, and their inversion should also be 0, as we don't succeed any + * Unicode property matches for non-Unicode code points */ + if (start <= PERL_UNICODE_MAX) { + + /* The code below assumes that we never cross the + * Unicode/above-Unicode boundary in a range, as otherwise we would + * have to figure out where to stop flipping the bits. Since this + * boundary is divisible by a large power of 2, and swatches comes + * in small powers of 2, this should be a valid assumption */ + assert(start + span - 1 <= PERL_UNICODE_MAX); + send = s + scur; while (s < send) { *s = ~(*s); s++; } + } } /* read $swash->{EXTRAS} |