summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKarl Williamson <public@khwilliamson.com>2011-09-27 19:36:35 -0600
committerKarl Williamson <public@khwilliamson.com>2011-10-01 09:15:32 -0600
commit0bda3001dd6310abfca950134ed78f6192d3d8a7 (patch)
tree44fa1a99c82fae80eda8f6b1f12f9ea86b733f11
parent9ac792f4c8d84f4e7dc8fa8d1eb254f0389b3b38 (diff)
downloadperl-0bda3001dd6310abfca950134ed78f6192d3d8a7.tar.gz
utf8.c: Don't invert beyond-Unicode code points
The Unicode properties are defined only on Unicode code points. In the past, this meant all property matches would fail for non-Unicode code points. However, starting with 5.15.1 some properties do succeed. This restores the previous behavior.
-rw-r--r--t/re/pat.t16
-rw-r--r--utf8.c14
2 files changed, 29 insertions, 1 deletions
diff --git a/t/re/pat.t b/t/re/pat.t
index 4ef9663b5e..988c23e01d 100644
--- a/t/re/pat.t
+++ b/t/re/pat.t
@@ -21,7 +21,7 @@ BEGIN {
require './test.pl';
}
-plan tests => 451; # Update this when adding/deleting tests.
+plan tests => 455; # Update this when adding/deleting tests.
run_tests() unless caller;
@@ -1167,6 +1167,20 @@ sub run_tests {
is($got,$want,'RT #84294: check that "ab" =~ /((\w+)(?{ push @got, $2 })){2}/ leaves @got in the correct state');
}
+ {
+ # Suppress warnings, as the non-unicode one comes out even if turn off
+ # warnings here (because the execution is done in another scope).
+ local $SIG{__WARN__} = sub {};
+ my $str = "\x{110000}";
+
+ # No non-unicode code points match any Unicode property, even inverse
+ # ones
+ unlike($str, qr/\p{ASCII_Hex_Digit=True}/, "Non-Unicode doesn't match \\p{}");
+ unlike($str, qr/\p{ASCII_Hex_Digit=False}/, "Non-Unicode doesn't match \\p{}");
+ like($str, qr/\P{ASCII_Hex_Digit=True}/, "Non-Unicode matches \\P{}");
+ like($str, qr/\P{ASCII_Hex_Digit=False}/, "Non-Unicode matches \\P{}");
+ }
+
} # End of sub run_tests
1;
diff --git a/utf8.c b/utf8.c
index 003e3fcadd..b8a52273b5 100644
--- a/utf8.c
+++ b/utf8.c
@@ -2476,11 +2476,25 @@ S_swash_get(pTHX_ SV* swash, UV start, UV span)
/* Invert if the data says it should be */
if (invert_it_svp && SvUV(*invert_it_svp)) {
+
+ /* Unicode properties should come with all bits above PERL_UNICODE_MAX
+ * be 0, and their inversion should also be 0, as we don't succeed any
+ * Unicode property matches for non-Unicode code points */
+ if (start <= PERL_UNICODE_MAX) {
+
+ /* The code below assumes that we never cross the
+ * Unicode/above-Unicode boundary in a range, as otherwise we would
+ * have to figure out where to stop flipping the bits. Since this
+ * boundary is divisible by a large power of 2, and swatches comes
+ * in small powers of 2, this should be a valid assumption */
+ assert(start + span - 1 <= PERL_UNICODE_MAX);
+
send = s + scur;
while (s < send) {
*s = ~(*s);
s++;
}
+ }
}
/* read $swash->{EXTRAS}