summaryrefslogtreecommitdiff
path: root/t
diff options
context:
space:
mode:
authorKarl Williamson <khw@cpan.org>2017-07-01 11:58:00 -0600
committerKarl Williamson <khw@cpan.org>2017-07-12 21:14:26 -0600
commitd22ec71778db5c28dfb4e339337f90dad5d214f9 (patch)
tree9521906349857015d508b0330e0c6d93a226fc03 /t
parente050c0076b9d0972c025d71afe0180d9dfbc6b15 (diff)
downloadperl-d22ec71778db5c28dfb4e339337f90dad5d214f9.tar.gz
Forbid above IV_MAX code points
This implements the restriction of code points to 0..IV_MAX in such a way that the process doesn't die when presented with input UTF-8 that evaluates to a larger one. Instead, it is treated as overflow. The commit reinstates causing the offending process to die if trying to create a character somehow that is above IV_MAX (like chr(0xFFFFFFFFFFFFF) or trying to do certain operations on one if somehow one did get created. The long term goal is to use code points above IV_MAX internally, as Perl6 does. So code and tests are not removed, just commented out
Diffstat (limited to 't')
-rw-r--r--t/lib/warnings/utf825
-rw-r--r--t/op/index.t1
-rw-r--r--t/op/utf8decode.t19
-rw-r--r--t/re/pat_advanced.t3
4 files changed, 17 insertions, 31 deletions
diff --git a/t/lib/warnings/utf8 b/t/lib/warnings/utf8
index b322ae0250..906630898f 100644
--- a/t/lib/warnings/utf8
+++ b/t/lib/warnings/utf8
@@ -735,38 +735,25 @@ $a = uc("\x{103}");
$a = ucfirst("\x{104}");
EXPECT
########
-# NAME Deprecation of too-large code points
+# NAME Fatality of too-large code points, but IV_MAX works, warns
require "../test.pl";
use warnings 'non_unicode';
my $max_cp = ~0 >> 1;
my $max_char = chr $max_cp;
-my $to_warn_cp = $max_cp + 1;
-my $to_warn_char = chr $to_warn_cp;
-$max_char =~ /[\x{110000}\P{Unassigned}]/;
-$to_warn_char =~ /[\x{110000}\P{Unassigned}]/;
my $temp = qr/$max_char/;
-$temp = qr/$to_warn_char/;
$temp = uc($max_char);
-$temp = uc($to_warn_char);
+$max_char =~ /[\x{110000}\P{Unassigned}]/;
my $file = tempfile();
open(my $fh, "+>:utf8", $file);
print $fh $max_char, "\n";
-print $fh $to_warn_char, "\n";
close $fh;
+my $error_cp = $max_cp + 1;
+my $error_char = chr $error_cp;
EXPECT
-OPTION regex
-Use of code point 0x80+ is deprecated; the permissible max is 0x7F+\. This will be fatal in Perl 5\.28 at - line \d+.
-Use of code point 0x80+ is deprecated; the permissible max is 0x7F+\. This will be fatal in Perl 5\.28 in pattern match \(m//\) at - line \d+.
-Use of code point 0x80+ is deprecated; the permissible max is 0x7F+\. This will be fatal in Perl 5\.28 in regexp compilation at - line \d+.
-Use of code point 0x80+ is deprecated; the permissible max is 0x7F+\. This will be fatal in Perl 5\.28 in regexp compilation at - line \d+.
-Use of code point 0x80+ is deprecated; the permissible max is 0x7F+\. This will be fatal in Perl 5\.28 at - line \d+.
-Use of code point 0x80+ is deprecated; the permissible max is 0x7F+\. This will be fatal in Perl 5\.28 in regexp compilation at - line \d+.
+OPTIONS fatal regex
Operation "uc" returns its argument for non-Unicode code point 0x7F+ at - line \d+.
-Use of code point 0x80+ is deprecated; the permissible max is 0x7F+\. This will be fatal in Perl 5\.28 in uc at - line \d+.
-Use of code point 0x80+ is deprecated; the permissible max is 0x7F+\. This will be fatal in Perl 5\.28 at - line \d+.
-Operation "uc" returns its argument for non-Unicode code point 0x80+ at - line \d+.
Code point 0x7F+ is not Unicode, (may not be|requires a Perl extension, and so is not) portable in print at - line \d+.
-Use of code point 0x80+ is deprecated; the permissible max is 0x7F+\. This will be fatal in Perl 5\.28 in print at - line \d+.
+Use of code point 0x80+ is not allowed; the permissible max is 0x7F+\ at - line \d+.
########
# NAME [perl #127262]
BEGIN{
diff --git a/t/op/index.t b/t/op/index.t
index 996c5e4516..5e951914d3 100644
--- a/t/op/index.t
+++ b/t/op/index.t
@@ -131,7 +131,6 @@ is(rindex($a, "foo", ), 0);
SKIP: {
skip("Not a 64-bit machine", 3) if length sprintf("%x", ~0) <= 8;
- no warnings 'deprecated'; # These are above IV_MAX on 32 bit machines
my $a = eval q{"\x{80000000}"};
my $s = $a.'defxyz';
is(index($s, 'def'), 1, "0x80000000 is a single character");
diff --git a/t/op/utf8decode.t b/t/op/utf8decode.t
index 1e9ea88749..18ab588b8c 100644
--- a/t/op/utf8decode.t
+++ b/t/op/utf8decode.t
@@ -85,16 +85,17 @@ foreach (<DATA>) {
my $message;
my $after = "";
if ($expect64 < $expect32 && ! $is64bit) {
- if ( substr($octets, 0, 1) gt "\xfe"
- || ( substr($octets, 0, 1) eq "\xfe"
- && length $octets > 1
- && substr($octets, 1, 1) le "\xbf"
- && substr($octets, 1, 1) ge "\x80"))
- {
+ # This is needed for code points above IV_MAX
+ #if ( substr($octets, 0, 1) gt "\xfe"
+ # || ( substr($octets, 0, 1) eq "\xfe"
+ # && length $octets > 1
+ # && substr($octets, 1, 1) le "\xbf"
+ # && substr($octets, 1, 1) ge "\x80"))
+ #{
like($warnings[0], qr/overflow/, "overflow warning for $id seen");
shift @warnings;
$after .= "overflow";
- }
+ #}
}
# The data below assumes that if there is both a 'short' and
@@ -189,8 +190,8 @@ __DATA__
3.4.1 N15 - 30 c0:e0:80:f0:80:80:f8:80:80:80:fc:80:80:80:80:df:ef:bf:f7:bf:bf:fb:bf:bf:bf:fd:bf:bf:bf:bf - unexpected non-continuation byte 0xe0, immediately after start byte 0xc0
3.5 Impossible bytes (but not with Perl's extended UTF-8)
3.5.1 n - 1 fe - 1 byte available, need 7
-3.5.2 N2,1 - 1 ff - 1 byte available, need 13
-3.5.3 N9,7 - 4 fe:fe:ff:ff - byte 0xfe
+3.5.2 n - 1 ff - 1 byte available, need 13
+3.5.3 N7 - 4 fe:fe:ff:ff - byte 0xfe
4 Overlong sequences
4.1 Examples of an overlong ASCII character
4.1.1 n - 2 c0:af - overlong
diff --git a/t/re/pat_advanced.t b/t/re/pat_advanced.t
index 3f2d6c3b5e..d90ceeb5bd 100644
--- a/t/re/pat_advanced.t
+++ b/t/re/pat_advanced.t
@@ -2421,8 +2421,7 @@ EOF
$Config{uvsize} == 8
or skip("need large code-points for this test", 1);
- # This is above IV_MAX on 32 bit machines, so turn off those warnings
- fresh_perl_is('no warnings "deprecated"; /\x{E000000000}|/ and print qq(ok\n)', "ok\n", {},
+ fresh_perl_is('/\x{E000000000}|/ and print qq(ok\n)', "ok\n", {},
"buffer overflow in TRIE_STORE_REVCHAR");
}