Forbid above IV_MAX code points

This implements the restriction of code points to 0..IV_MAX in such a way that the process doesn't die when presented with input UTF-8 that evaluates to a larger one. Instead, it is treated as overflow. The commit reinstates causing the offending process to die if trying to create a character somehow that is above IV_MAX (like chr(0xFFFFFFFFFFFFF) or trying to do certain operations on one if somehow one did get created. The long term goal is to use code points above IV_MAX internally, as Perl6 does. So code and tests are not removed, just commented out
author: Karl Williamson <khw@cpan.org> 2017-07-01 11:58:00 -0600
committer: Karl Williamson <khw@cpan.org> 2017-07-12 21:14:26 -0600
commit: d22ec71778db5c28dfb4e339337f90dad5d214f9 (patch)
tree: 9521906349857015d508b0330e0c6d93a226fc03 /t
parent: e050c0076b9d0972c025d71afe0180d9dfbc6b15 (diff)
download: perl-d22ec71778db5c28dfb4e339337f90dad5d214f9.tar.gz
4 files changed, 17 insertions, 31 deletions
diff --git a/t/lib/warnings/utf8 b/t/lib/warnings/utf8
index b322ae0250..906630898f 100644
--- a/t/lib/warnings/utf8
+++ b/t/lib/warnings/utf8
@@ -735,38 +735,25 @@ $a = uc("\x{103}");
 $a = ucfirst("\x{104}");
 EXPECT
 ########
-# NAME Deprecation of too-large code points
+# NAME Fatality of too-large code points, but IV_MAX works, warns
 require "../test.pl";
 use warnings 'non_unicode';
 my $max_cp = ~0 >> 1;
 my $max_char = chr $max_cp;
-my $to_warn_cp = $max_cp + 1;
-my $to_warn_char = chr $to_warn_cp;
-$max_char =~ /[\x{110000}\P{Unassigned}]/;
-$to_warn_char =~ /[\x{110000}\P{Unassigned}]/;
 my $temp = qr/$max_char/;
-$temp = qr/$to_warn_char/;
 $temp = uc($max_char);
-$temp = uc($to_warn_char);
+$max_char =~ /[\x{110000}\P{Unassigned}]/;
 my $file = tempfile();
 open(my $fh, "+>:utf8", $file);
 print $fh $max_char, "\n";
-print $fh $to_warn_char, "\n";
 close $fh;
+my $error_cp = $max_cp + 1;
+my $error_char = chr $error_cp;
 EXPECT
-OPTION regex
-Use of code point 0x80+ is deprecated; the permissible max is 0x7F+\. This will be fatal in Perl 5\.28 at - line \d+.
-Use of code point 0x80+ is deprecated; the permissible max is 0x7F+\. This will be fatal in Perl 5\.28 in pattern match \(m//\) at - line \d+.
-Use of code point 0x80+ is deprecated; the permissible max is 0x7F+\. This will be fatal in Perl 5\.28 in regexp compilation at - line \d+.
-Use of code point 0x80+ is deprecated; the permissible max is 0x7F+\. This will be fatal in Perl 5\.28 in regexp compilation at - line \d+.
-Use of code point 0x80+ is deprecated; the permissible max is 0x7F+\. This will be fatal in Perl 5\.28 at - line \d+.
-Use of code point 0x80+ is deprecated; the permissible max is 0x7F+\. This will be fatal in Perl 5\.28 in regexp compilation at - line \d+.
+OPTIONS fatal regex
 Operation "uc" returns its argument for non-Unicode code point 0x7F+ at - line \d+.
-Use of code point 0x80+ is deprecated; the permissible max is 0x7F+\. This will be fatal in Perl 5\.28 in uc at - line \d+.
-Use of code point 0x80+ is deprecated; the permissible max is 0x7F+\. This will be fatal in Perl 5\.28 at - line \d+.
-Operation "uc" returns its argument for non-Unicode code point 0x80+ at - line \d+.
 Code point 0x7F+ is not Unicode, (may not be|requires a Perl extension, and so is not) portable in print at - line \d+.
-Use of code point 0x80+ is deprecated; the permissible max is 0x7F+\. This will be fatal in Perl 5\.28 in print at - line \d+.
+Use of code point 0x80+ is not allowed; the permissible max is 0x7F+\ at - line \d+.
 ########
 # NAME  [perl #127262]
 BEGIN{
diff --git a/t/op/index.t b/t/op/index.t
index 996c5e4516..5e951914d3 100644
--- a/t/op/index.t
+++ b/t/op/index.t
@@ -131,7 +131,6 @@ is(rindex($a, "foo",    ), 0);
 
 SKIP: {
     skip("Not a 64-bit machine", 3) if length sprintf("%x", ~0) <= 8;
-    no warnings 'deprecated'; # These are above IV_MAX on 32 bit machines
     my $a = eval q{"\x{80000000}"};
     my $s = $a.'defxyz';
     is(index($s, 'def'), 1, "0x80000000 is a single character");
diff --git a/t/op/utf8decode.t b/t/op/utf8decode.t
index 1e9ea88749..18ab588b8c 100644
--- a/t/op/utf8decode.t
+++ b/t/op/utf8decode.t
@@ -85,16 +85,17 @@ foreach (<DATA>) {
             my $message;
             my $after = "";
             if ($expect64 < $expect32 && ! $is64bit) {
-                if (       substr($octets, 0, 1) gt "\xfe"
-                    || (   substr($octets, 0, 1) eq "\xfe"
-                        && length $octets > 1
-                        && substr($octets, 1, 1) le "\xbf"
-                        && substr($octets, 1, 1) ge "\x80"))
-                {
+                # This is needed for code points above IV_MAX
+                #if (       substr($octets, 0, 1) gt "\xfe"
+                #    || (   substr($octets, 0, 1) eq "\xfe"
+                #        && length $octets > 1
+                #        && substr($octets, 1, 1) le "\xbf"
+                #        && substr($octets, 1, 1) ge "\x80"))
+                #{
                     like($warnings[0], qr/overflow/, "overflow warning for $id seen");
                     shift @warnings;
                     $after .= "overflow";
-                }
+                #}
             }
 
             # The data below assumes that if there is both a 'short' and
@@ -189,8 +190,8 @@ __DATA__
 3.4.1 N15 -	30	c0:e0:80:f0:80:80:f8:80:80:80:fc:80:80:80:80:df:ef:bf:f7:bf:bf:fb:bf:bf:bf:fd:bf:bf:bf:bf	-	unexpected non-continuation byte 0xe0, immediately after start byte 0xc0
 3.5	Impossible bytes (but not with Perl's extended UTF-8)
 3.5.1 n -	1	fe	-	1 byte available, need 7
-3.5.2 N2,1 -	1	ff	-	1 byte available, need 13
-3.5.3 N9,7 -	4	fe:fe:ff:ff	-	byte 0xfe
+3.5.2 n -	1	ff	-	1 byte available, need 13
+3.5.3 N7 -	4	fe:fe:ff:ff	-	byte 0xfe
 4	Overlong sequences
 4.1	Examples of an overlong ASCII character
 4.1.1 n -	2	c0:af	-	overlong
diff --git a/t/re/pat_advanced.t b/t/re/pat_advanced.t
index 3f2d6c3b5e..d90ceeb5bd 100644
--- a/t/re/pat_advanced.t
+++ b/t/re/pat_advanced.t
@@ -2421,8 +2421,7 @@ EOF
         $Config{uvsize} == 8
 	  or skip("need large code-points for this test", 1);
 
-        # This is above IV_MAX on 32 bit machines, so turn off those warnings
-	fresh_perl_is('no warnings "deprecated"; /\x{E000000000}|/ and print qq(ok\n)', "ok\n", {},
+	fresh_perl_is('/\x{E000000000}|/ and print qq(ok\n)', "ok\n", {},
 		      "buffer overflow in TRIE_STORE_REVCHAR");
     }
author	Karl Williamson <khw@cpan.org>	2017-07-01 11:58:00 -0600
committer	Karl Williamson <khw@cpan.org>	2017-07-12 21:14:26 -0600
commit	d22ec71778db5c28dfb4e339337f90dad5d214f9 (patch)
tree	9521906349857015d508b0330e0c6d93a226fc03 /t
parent	e050c0076b9d0972c025d71afe0180d9dfbc6b15 (diff)
download	perl-d22ec71778db5c28dfb4e339337f90dad5d214f9.tar.gz