summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKarl Williamson <khw@cpan.org>2017-12-19 16:03:39 -0700
committerKarl Williamson <khw@cpan.org>2018-01-19 11:20:11 -0700
commitba52ce15fe5ca68de1be69e394f41ccb48a731cc (patch)
tree12d167bb59e39ac5943c1a249f2cbe626170dfff
parent78ba90076f5958f9830fb2559e21420018be4f8d (diff)
downloadperl-ba52ce15fe5ca68de1be69e394f41ccb48a731cc.tar.gz
Deprecate above \xFF in bitwise string ops
This is already a fatal error for operations whose outcome depends on them, but in things like "abc" & "def\x{100}" the wide character doesn't actually need to participate in the AND, and so perl doesn't. As a result of the discussion in the thread beginning with http://nntp.perl.org/group/perl.perl5.porters/244884, it was decided to deprecate these ones too.
-rw-r--r--doop.c5
-rw-r--r--op.h4
-rw-r--r--pod/perldelta.pod7
-rw-r--r--pod/perldeprecation.pod18
-rw-r--r--t/op/bop.t79
5 files changed, 106 insertions, 7 deletions
diff --git a/doop.c b/doop.c
index f5a40efaae..afbbcdae5d 100644
--- a/doop.c
+++ b/doop.c
@@ -1095,6 +1095,7 @@ Perl_do_vop(pTHX_ I32 optype, SV *sv, SV *left, SV *right)
* portion. That means that at least one of the operands has to be
* entirely non-UTF-8, and the length of that operand has to be before the
* first above-FF in the other */
+ if (left_utf8 || right_utf8) {
if (left_utf8) {
if (right_utf8 || rightlen > leftlen) {
Perl_croak(aTHX_ fatal_above_ff_msg, PL_op_desc[optype]);
@@ -1107,6 +1108,10 @@ Perl_do_vop(pTHX_ I32 optype, SV *sv, SV *left, SV *right)
}
len = leftlen;
}
+
+ Perl_ck_warner_d(aTHX_ packWARN(WARN_DEPRECATED),
+ deprecated_above_ff_msg, PL_op_desc[optype]);
+ }
else { /* Neither is UTF-8 */
len = MIN(leftlen, rightlen);
}
diff --git a/op.h b/op.h
index ed4ff9d1a7..64668dcf0a 100644
--- a/op.h
+++ b/op.h
@@ -1112,6 +1112,10 @@ C<sib> is non-null. For a higher-level interface, see C<L</op_sibling_splice>>.
static const char * const fatal_above_ff_msg
= "Use of strings with code points over 0xFF as arguments to "
"%s operator is not allowed";
+static const char * const deprecated_above_ff_msg
+ = "Use of strings with code points over 0xFF as arguments to "
+ "%s operator is deprecated. This will be a fatal error in "
+ "Perl 5.32";
#endif
diff --git a/pod/perldelta.pod b/pod/perldelta.pod
index 378c1903df..dc3c84ecad 100644
--- a/pod/perldelta.pod
+++ b/pod/perldelta.pod
@@ -81,6 +81,13 @@ reverted due to the extent of the trouble caused to CPAN modules.
It is expected that smartmatch will be changed again in the future,
but preceded by some kind of explicit deprecation.
+=head1 Deprecations
+
+=head2 Use of code points over 0xFF in string bitwise operators
+
+Some uses of these already are illegal after a previous deprecation
+cycle. This deprecates the remaining uses. See L<perldeprecation>.
+
=head1 Performance Enhancements
=over 4
diff --git a/pod/perldeprecation.pod b/pod/perldeprecation.pod
index e929314a68..8cd3eb950f 100644
--- a/pod/perldeprecation.pod
+++ b/pod/perldeprecation.pod
@@ -56,6 +56,24 @@ C<vec> views its string argument as a sequence of bits. A string
containing a code point over 0xFF is nonsensical. This usage is
deprecated in Perl 5.28, and will be removed in Perl 5.32.
+=head3 Use of code points over 0xFF in string bitwise operators
+
+The string bitwise operators, C<&>, C<|>, C<^>, and C<~>, treat their
+operands as strings of bytes. As such, values above 0xFF are
+nonsensical. Some instances of these have been deprecated since Perl
+5.24, and were made fatal in 5.28, but it turns out that in cases where
+the wide characters did not affect the end result, no deprecation
+notice was raised, and so remain legal. Now, all occurrences either are
+fatal or raise a deprecation warning, so that the remaining legal
+occurrences will be fatal in 5.32.
+
+An example of this is
+
+ "" & "\x{100}"
+
+The wide character is not used in the C<&> operation because the left
+operand is shorter. This now warns anyway.
+
=head3 hostname() doesn't accept any arguments
The function C<hostname()> in the L<Sys::Hostname> module has always
diff --git a/t/op/bop.t b/t/op/bop.t
index 2d0890e852..411d253a7b 100644
--- a/t/op/bop.t
+++ b/t/op/bop.t
@@ -18,7 +18,7 @@ BEGIN {
# If you find tests are failing, please try adding names to tests to track
# down where the failure is, and supply your new names as a patch.
# (Just-in-time test naming)
-plan tests => 491;
+plan tests => 504;
# numerics
ok ((0xdead & 0xbeef) == 0x9ead);
@@ -612,9 +612,74 @@ foreach my $op_info ([and => "&"], [or => "|"], [xor => "^"]) {
"(~) is not allowed";
}
-is("abc" & "abc\x{100}", "abc", '"abc" & "abc\x{100}" works');
-is("abc" | "abc\x{100}", "abc\x{100}", '"abc" | "abc\x{100}" works');
-is("abc" ^ "abc\x{100}", "\0\0\0\x{100}", '"abc" ^ "abc\x{100}" works');
-is("abc\x{100}" & "abc", "abc", '"abc\x{100}" & "abc" works');
-is("abc\x{100}" | "abc", "abc\x{100}", '"abc\x{100}" | "abc" works');
-is("abc\x{100}" ^ "abc", "\0\0\0\x{100}", '"abc\x{100}" ^ "abc" works');
+{
+ # Since these are temporary, and it was a pain to make them into loops,
+ # the code is just rolled out.
+ local $SIG{__WARN__} = sub { push @warnings, @_; };
+
+ undef @warnings;
+ is("abc" & "abc\x{100}", "abc", '"abc" & "abc\x{100}" works');
+ if (! is(@warnings, 1, "... but returned a single warning")) {
+ diag join "\n", @warnings;
+ }
+ like ($warnings[0], qr /^Use of strings with code points over 0xFF as (?#
+ )arguments to bitwise and \(&\) operator (?#
+ )is deprecated/,
+ "... which is the expected warning");
+ undef @warnings;
+ is("abc" | "abc\x{100}", "abc\x{100}", '"abc" | "abc\x{100}" works');
+ if (! is(@warnings, 1, "... but returned a single warning")) {
+ diag join "\n", @warnings;
+ }
+ like ($warnings[0], qr /^Use of strings with code points over 0xFF as (?#
+ )arguments to bitwise or \(|\) operator (?#
+ )is deprecated/,
+ "... which is the expected warning");
+ undef @warnings;
+ is("abc" ^ "abc\x{100}", "\0\0\0\x{100}", '"abc" ^ "abc\x{100}" works');
+ if (! is(@warnings, 1, "... but returned a single warning")) {
+ diag join "\n", @warnings;
+ }
+ like ($warnings[0], qr /^Use of strings with code points over 0xFF as (?#
+ )arguments to bitwise xor \(\^\) operator (?#
+ )is deprecated/,
+ "... which is the expected warning");
+ undef @warnings;
+ is("abc\x{100}" & "abc", "abc", '"abc\x{100}" & "abc" works');
+ if (! is(@warnings, 1, "... but returned a single warning")) {
+ diag join "\n", @warnings;
+ }
+ like ($warnings[0], qr /^Use of strings with code points over 0xFF as (?#
+ )arguments to bitwise and \(&\) operator (?#
+ )is deprecated/,
+ "... which is the expected warning");
+ undef @warnings;
+ is("abc\x{100}" | "abc", "abc\x{100}", '"abc\x{100}" | "abc" works');
+ if (! is(@warnings, 1, "... but returned a single warning")) {
+ diag join "\n", @warnings;
+ }
+ like ($warnings[0], qr /^Use of strings with code points over 0xFF as (?#
+ )arguments to bitwise or \(|\) operator (?#
+ )is deprecated/,
+ "... which is the expected warning");
+ undef @warnings;
+ is("abc\x{100}" ^ "abc", "\0\0\0\x{100}", '"abc\x{100}" ^ "abc" works');
+ if (! is(@warnings, 1, "... but returned a single warning")) {
+ diag join "\n", @warnings;
+ }
+ like ($warnings[0], qr /^Use of strings with code points over 0xFF as (?#
+ )arguments to bitwise xor \(\^\) operator (?#
+ )is deprecated/,
+ "... which is the expected warning");
+ no warnings 'deprecated';
+ undef @warnings;
+ my $foo = "abc" & "abc\x{100}";
+ $foo = "abc" | "abc\x{100}";
+ $foo = "abc" ^ "abc\x{100}";
+ $foo = "abc\x{100}" & "abc";
+ $foo = "abc\x{100}" | "abc";
+ $foo = "abc\x{100}" ^ "abc";
+ if (! is(@warnings, 0, "... And none of the last 6 main tests warns when 'deprecated' is off")) {
+ diag join "\n", @warnings;
+ }
+}