summaryrefslogtreecommitdiff
path: root/utf8.c
diff options
context:
space:
mode:
authorKarl Williamson <public@khwilliamson.com>2011-09-27 19:36:35 -0600
committerKarl Williamson <public@khwilliamson.com>2011-10-01 09:15:32 -0600
commit0bda3001dd6310abfca950134ed78f6192d3d8a7 (patch)
tree44fa1a99c82fae80eda8f6b1f12f9ea86b733f11 /utf8.c
parent9ac792f4c8d84f4e7dc8fa8d1eb254f0389b3b38 (diff)
downloadperl-0bda3001dd6310abfca950134ed78f6192d3d8a7.tar.gz
utf8.c: Don't invert beyond-Unicode code points
The Unicode properties are defined only on Unicode code points. In the past, this meant all property matches would fail for non-Unicode code points. However, starting with 5.15.1 some properties do succeed. This restores the previous behavior.
Diffstat (limited to 'utf8.c')
-rw-r--r--utf8.c14
1 files changed, 14 insertions, 0 deletions
diff --git a/utf8.c b/utf8.c
index 003e3fcadd..b8a52273b5 100644
--- a/utf8.c
+++ b/utf8.c
@@ -2476,11 +2476,25 @@ S_swash_get(pTHX_ SV* swash, UV start, UV span)
/* Invert if the data says it should be */
if (invert_it_svp && SvUV(*invert_it_svp)) {
+
+ /* Unicode properties should come with all bits above PERL_UNICODE_MAX
+ * be 0, and their inversion should also be 0, as we don't succeed any
+ * Unicode property matches for non-Unicode code points */
+ if (start <= PERL_UNICODE_MAX) {
+
+ /* The code below assumes that we never cross the
+ * Unicode/above-Unicode boundary in a range, as otherwise we would
+ * have to figure out where to stop flipping the bits. Since this
+ * boundary is divisible by a large power of 2, and swatches comes
+ * in small powers of 2, this should be a valid assumption */
+ assert(start + span - 1 <= PERL_UNICODE_MAX);
+
send = s + scur;
while (s < send) {
*s = ~(*s);
s++;
}
+ }
}
/* read $swash->{EXTRAS}