summaryrefslogtreecommitdiff
path: root/dist/Carp
diff options
context:
space:
mode:
authorZefram <zefram@fysh.org>2017-07-20 04:00:30 +0100
committerZefram <zefram@fysh.org>2017-07-20 04:00:30 +0100
commit285ac8e2a208401b204f46b958d170d1cc5bd361 (patch)
tree000a01fe5824b52bb2b36ef0df9c7fcef337a6f5 /dist/Carp
parent3172fdbc0bbda2530c9d7ffc4c7b7d34a36db0dc (diff)
downloadperl-285ac8e2a208401b204f46b958d170d1cc5bd361.tar.gz
fix problems from Carp's partial EBCDIC support
Commit 975fe8546427b5f6259103912b13925be148becd introduced partial EBCDIC support to Carp, but simultaneously introduced some bugs into the module and the tests. Multiple issues are addressed in this commit: * The main check for whether a character needs a non-literal representation when dumping a string or regexp argument, which used to be a regexp character range [ -~], was expanded to an explicit character set not using range syntax, but in the expansion the "&" was omitted. This caused unwanted \x representation of any "&" in an argument in a stack trace. Add the "&" back in and fix the sorting of the character set. * The substitute version of this check for Perls on which Carp can't safely apply a regexp to an upgraded string, but new enough to have utf8::native_to_unicode(), was applying that function to some fixed codepoint values that were already Unicode codepoints. Remove those calls, and compare the fixed codepoints directly to codepoints correctly converted through that function. * That version of the check, by referring to utf8::native_to_unicode() directly in source that is always compiled, caused the utf8:: stash to be vivified on Perl 5.6, causing havoc (and failed tests). Hide that version of the check behind a (compile-time) string eval. * Another version of the printability check, for EBCDIC on Perl 5.6, treated as printable any codepoint above 0xff. Change that to correctly treat all such codepoints as not safely printable. * Some tests in t/arg_regexp.t which were originally about non-ASCII characters specified in a regexp by using \x regexp syntax got changed to use the non-ASCII characters literally at the regexp syntax level (by interpolating them from a constructed string). Restore these to using \x syntax, with the appropriate variability of the hex digits. * Add a couple of "fixme" comments about parts of the EBCDIC support that are incomplete. * Some tests involving non-ASCII characters were later made to skip on any Perl prior to 5.17.1. In practice they work fine on earlier Perls, and they're fairly important. Suspect that the problem that led to the skipping being added was dependent on the tests having been broken as described above, so remove the skipping logic. * Incidentally, correct a comment about the purpose of t/arg_string.t and add a similar one to t/arg_regexp.t. * Incidentally, add Changes entries for versions 1.41 and 1.42, which were omitted when those changes were made.
Diffstat (limited to 'dist/Carp')
-rw-r--r--dist/Carp/Changes14
-rw-r--r--dist/Carp/lib/Carp.pm73
-rw-r--r--dist/Carp/lib/Carp/Heavy.pm2
-rw-r--r--dist/Carp/t/arg_regexp.t41
-rw-r--r--dist/Carp/t/arg_string.t9
5 files changed, 70 insertions, 69 deletions
diff --git a/dist/Carp/Changes b/dist/Carp/Changes
index dca6a522cf..0498eeb885 100644
--- a/dist/Carp/Changes
+++ b/dist/Carp/Changes
@@ -1,3 +1,17 @@
+version 1.43
+
+ * fix problems introduced by the partial EBCDIC support from version
+ 1.35
+
+version 1.42
+
+ * add some doc clue about what cluck does
+
+ * avoid floating point overflow in test
+
+version 1.41
+
+ * add missing "<FH> chunk #" phrase to messages
version 1.40; 2016-03-10
* Get arg_string.t to compile in perl v5.6
diff --git a/dist/Carp/lib/Carp.pm b/dist/Carp/lib/Carp.pm
index 05052b9ef6..6127b26f54 100644
--- a/dist/Carp/lib/Carp.pm
+++ b/dist/Carp/lib/Carp.pm
@@ -87,7 +87,36 @@ BEGIN {
}
}
-our $VERSION = '1.42';
+# is_safe_printable_codepoint() indicates whether a character, specified
+# by integer codepoint, is OK to output literally in a trace. Generally
+# this is if it is a printable character in the ancestral character set
+# (ASCII or EBCDIC). This is used on some Perls in situations where a
+# regexp can't be used.
+BEGIN {
+ *is_safe_printable_codepoint =
+ "$]" >= 5.007_003 ?
+ eval(q(sub ($) {
+ my $u = utf8::native_to_unicode($_[0]);
+ $u >= 0x20 && $u <= 0x7e;
+ }))
+ : ord("A") == 65 ?
+ sub ($) { $_[0] >= 0x20 && $_[0] <= 0x7e }
+ :
+ sub ($) {
+ # Early EBCDIC
+ # 3 EBCDIC code pages supported then; all controls but one
+ # are the code points below SPACE. The other one is 0x5F on
+ # POSIX-BC; FF on the other two.
+ # FIXME: there are plenty of unprintable codepoints other
+ # than those that this code and the comment above identifies
+ # as "controls".
+ $_[0] >= ord(" ") && $_[0] <= 0xff &&
+ $_[0] != (ord ("^") == 106 ? 0x5f : 0xff);
+ }
+ ;
+}
+
+our $VERSION = '1.43';
$VERSION =~ tr/_//d;
our $MaxEvalLen = 0;
@@ -300,32 +329,15 @@ sub format_arg {
next;
}
my $o = ord($c);
-
- # This code is repeated in Regexp::CARP_TRACE()
- if ($] ge 5.007_003) {
- substr $arg, $i, 1, sprintf("\\x{%x}", $o)
- if utf8::native_to_unicode($o) < utf8::native_to_unicode(0x20)
- || utf8::native_to_unicode($o) > utf8::native_to_unicode(0x7e);
- } elsif (ord("A") == 65) {
- substr $arg, $i, 1, sprintf("\\x{%x}", $o)
- if $o < 0x20 || $o > 0x7e;
- } else { # Early EBCDIC
-
- # 3 EBCDIC code pages supported then; all controls but one
- # are the code points below SPACE. The other one is 0x5F on
- # POSIX-BC; FF on the other two.
- substr $arg, $i, 1, sprintf("\\x{%x}", $o)
- if $o < ord(" ") || ((ord ("^") == 106)
- ? $o == 0x5f
- : $o == 0xff);
- }
+ substr $arg, $i, 1, sprintf("\\x{%x}", $o)
+ unless is_safe_printable_codepoint($o);
}
} else {
$arg =~ s/([\"\\\$\@])/\\$1/g;
# This is all the ASCII printables spelled-out. It is portable to all
# Perl versions and platforms (such as EBCDIC). There are other more
# compact ways to do this, but may not work everywhere every version.
- $arg =~ s/([^ !"\$\%#'()*+,\-.\/0123456789:;<=>?\@ABCDEFGHIJKLMNOPQRSTUVWXYZ\[\\\]^_`abcdefghijklmnopqrstuvwxyz\{|}~])/sprintf("\\x{%x}",ord($1))/eg;
+ $arg =~ s/([^ !"#\$\%\&'()*+,\-.\/0123456789:;<=>?\@ABCDEFGHIJKLMNOPQRSTUVWXYZ\[\\\]^_`abcdefghijklmnopqrstuvwxyz\{|}~])/sprintf("\\x{%x}",ord($1))/eg;
}
downgrade($arg, 1);
return "\"".$arg."\"".$suffix;
@@ -338,25 +350,12 @@ sub Regexp::CARP_TRACE {
for(my $i = length($arg); $i--; ) {
my $o = ord(substr($arg, $i, 1));
my $x = substr($arg, 0, 0); # work around bug on Perl 5.8.{1,2}
-
- # This code is repeated in format_arg()
- if ($] ge 5.007_003) {
- substr $arg, $i, 1, sprintf("\\x{%x}", $o)
- if utf8::native_to_unicode($o) < utf8::native_to_unicode(0x20)
- || utf8::native_to_unicode($o) > utf8::native_to_unicode(0x7e);
- } elsif (ord("A") == 65) {
- substr $arg, $i, 1, sprintf("\\x{%x}", $o)
- if $o < 0x20 || $o > 0x7e;
- } else { # Early EBCDIC
- substr $arg, $i, 1, sprintf("\\x{%x}", $o)
- if $o < ord(" ") || ((ord ("^") == 106)
- ? $o == 0x5f
- : $o == 0xff);
- }
+ substr $arg, $i, 1, sprintf("\\x{%x}", $o)
+ unless is_safe_printable_codepoint($o);
}
} else {
# See comment in format_arg() about this same regex.
- $arg =~ s/([^ !"\$\%#'()*+,\-.\/0123456789:;<=>?\@ABCDEFGHIJKLMNOPQRSTUVWXYZ\[\\\]^_`abcdefghijklmnopqrstuvwxyz\{|}~])/sprintf("\\x{%x}",ord($1))/eg;
+ $arg =~ s/([^ !"#\$\%\&'()*+,\-.\/0123456789:;<=>?\@ABCDEFGHIJKLMNOPQRSTUVWXYZ\[\\\]^_`abcdefghijklmnopqrstuvwxyz\{|}~])/sprintf("\\x{%x}",ord($1))/eg;
}
downgrade($arg, 1);
my $suffix = "";
diff --git a/dist/Carp/lib/Carp/Heavy.pm b/dist/Carp/lib/Carp/Heavy.pm
index f9c584a42b..4b8cbe1b94 100644
--- a/dist/Carp/lib/Carp/Heavy.pm
+++ b/dist/Carp/lib/Carp/Heavy.pm
@@ -2,7 +2,7 @@ package Carp::Heavy;
use Carp ();
-our $VERSION = '1.42';
+our $VERSION = '1.43';
$VERSION =~ tr/_//d;
# Carp::Heavy was merged into Carp in version 1.12. Any mismatched versions
diff --git a/dist/Carp/t/arg_regexp.t b/dist/Carp/t/arg_regexp.t
index 1575b291ab..83e8f0359f 100644
--- a/dist/Carp/t/arg_regexp.t
+++ b/dist/Carp/t/arg_regexp.t
@@ -1,6 +1,8 @@
use warnings;
use strict;
+# confirm that regexp-typed stack args are displayed correctly by longmess()
+
use Test::More tests => 42;
use Carp ();
@@ -16,12 +18,14 @@ my $e9 = sprintf "%02x", (($] ge 5.007_003)
: ((ord("A") == 193)
? 0x51
: 0xE9));
-my $chr_e9 = chr eval "0x$e9";
+my $xe9 = "\\x$e9";
+my $chr_e9 = eval "\"$xe9\"";
my $nl_as_hex = sprintf "%x", ord("\n");
# On Perl 5.6 we accept some incorrect quoting of Unicode characters,
# because upgradedness of regexps isn't preserved by stringification,
# so it's impossible to implement the correct behaviour.
+# FIXME: the permissive patterns don't account for EBCDIC
my $xe9_rx = "$]" < 5.008 ? qr/\\x\{c3\}\\x\{a9\}|\\x\{e9\}/ : qr/\\x\{$e9\}/;
my $x666_rx = "$]" < 5.008 ? qr/\\x\{d9\}\\x\{a6\}|\\x\{666\}/ : qr/\\x\{666\}/;
my $x2603_rx = "$]" < 5.008 ? qr/\\x\{e2\}\\x\{98\}\\x\{83\}|\\x\{2603\}/ : qr/\\x\{2603\}/;
@@ -41,16 +45,10 @@ like lm(qr/\x{666}b/), qr/main::lm\(qr\(\\x\{666\}b\)u?\)/;
like lm(rx("\x{666}b")), qr/main::lm\(qr\(${x666_rx}b\)u?\)/;
like lm(qr/a\x{666}/), qr/main::lm\(qr\(a\\x\{666\}\)u?\)/;
like lm(rx("a\x{666}")), qr/main::lm\(qr\(a${x666_rx}\)u?\)/;
-like lm(qr/L${chr_e9}on/), qr/main::lm\(qr\(L\\x\{?${e9}\}?on\)u?\)/;
+like lm(qr/L${xe9}on/), qr/main::lm\(qr\(L\\x${e9}on\)u?\)/;
like lm(rx("L${chr_e9}on")), qr/main::lm\(qr\(L${xe9_rx}on\)u?\)/;
-
-
-SKIP: {
- skip "wide-character-related bug in pre-5.18 perls", 2 if $] lt 5.017_001;
-
- like lm(qr/L${chr_e9}on \x{2603} !/), qr/main::lm\(qr\(L\\x\{?${e9}\}?on \\x\{2603\} !\)u?\)/;
- like lm(rx("L${chr_e9}on \x{2603} !")), qr/main::lm\(qr\(L${xe9_rx}on ${x2603_rx} !\)u?\)/;
-}
+like lm(qr/L${xe9}on \x{2603} !/), qr/main::lm\(qr\(L\\x${e9}on \\x\{2603\} !\)u?\)/;
+like lm(rx("L${chr_e9}on \x{2603} !")), qr/main::lm\(qr\(L${xe9_rx}on ${x2603_rx} !\)u?\)/;
$Carp::MaxArgLen = 5;
foreach my $arg ("foo bar baz", "foo bar ba", "foo bar b", "foo bar ", "foo bar", "foo ba") {
@@ -60,16 +58,10 @@ foreach my $arg ("foo b", "foo ", "foo", "fo", "f", "") {
like lm(rx($arg)), qr/main::lm\(qr\(\Q$arg\E\)u?\)/;
}
like lm(qr/foo.bar$/sm), qr/main::lm\(qr\(fo\)\.\.\.u?ms\)/;
-
-SKIP: {
- skip "wide-character-related bug in pre-5.18 perls", 4 if $] lt 5.017_001;
-
- like lm(qr/L${chr_e9}on \x{2603} !/), qr/main::lm\(qr\(L\\\)\.\.\.u?\)/;
- like lm(rx("L${chr_e9}on \x{2603} !")), qr/main::lm\(qr\(L\\\)\.\.\.u?\)/;
- like lm(qr/L${chr_e9}on\x{2603}/), qr/main::lm\(qr\(L\\\)\.\.\.u?\)/;
- like lm(rx("L${chr_e9}on\x{2603}")), qr/main::lm\(qr\(L\\\)\.\.\.u?\)/;
-}
-
+like lm(qr/L${xe9}on \x{2603} !/), qr/main::lm\(qr\(L\\\)\.\.\.u?\)/;
+like lm(rx("L${chr_e9}on \x{2603} !")), qr/main::lm\(qr\(L\\\)\.\.\.u?\)/;
+like lm(qr/L${xe9}on\x{2603}/), qr/main::lm\(qr\(L\\\)\.\.\.u?\)/;
+like lm(rx("L${chr_e9}on\x{2603}")), qr/main::lm\(qr\(L\\\)\.\.\.u?\)/;
like lm(qr/foo\x{2603}/), qr/main::lm\(qr\(fo\)\.\.\.u?\)/;
like lm(rx("foo\x{2603}")), qr/main::lm\(qr\(fo\)\.\.\.u?\)/;
@@ -77,12 +69,7 @@ $Carp::MaxArgLen = 0;
foreach my $arg ("wibble:" x 20, "foo bar baz") {
like lm(rx($arg)), qr/main::lm\(qr\(\Q$arg\E\)u?\)/;
}
-
-SKIP: {
- skip "wide-character-related bug in pre-5.18 perls", 2 if $] lt 5.017_001;
-
- like lm(qr/L${chr_e9}on\x{2603}/), qr/main::lm\(qr\(L\\x\{?${e9}\}?on\\x\{2603\}\)u?\)/;
- like lm(rx("L${chr_e9}on\x{2603}")), qr/main::lm\(qr\(L${xe9_rx}on${x2603_rx}\)u?\)/;
-}
+like lm(qr/L${xe9}on\x{2603}/), qr/main::lm\(qr\(L\\x${e9}on\\x\{2603\}\)u?\)/;
+like lm(rx("L${chr_e9}on\x{2603}")), qr/main::lm\(qr\(L${xe9_rx}on${x2603_rx}\)u?\)/;
1;
diff --git a/dist/Carp/t/arg_string.t b/dist/Carp/t/arg_string.t
index dc70f4370b..544a4fe059 100644
--- a/dist/Carp/t/arg_string.t
+++ b/dist/Carp/t/arg_string.t
@@ -1,9 +1,9 @@
use warnings;
use strict;
-# confirm that stack args are displayed correctly by longmess()
+# confirm that string-typed stack args are displayed correctly by longmess()
-use Test::More tests => 32;
+use Test::More tests => 33;
use Carp ();
@@ -17,7 +17,8 @@ my $e9 = sprintf "%02x", (($] ge 5.007_003)
: ((ord("A") == 193)
? 0x51
: 0xE9));
-my $chr_e9 = chr eval "0x$e9";
+my $xe9 = "\\x$e9";
+my $chr_e9 = eval "\"$xe9\"";
my $nl_as_hex = sprintf "%x", ord("\n");
like lm(3), qr/main::lm\(3\)/;
@@ -33,9 +34,9 @@ like lm(-3.5e30),
\) /x;
like lm(""), qr/main::lm\(""\)/;
like lm("foo"), qr/main::lm\("foo"\)/;
+like lm("a&b"), qr/main::lm\("a&b"\)/;
like lm("a\$b\@c\\d\"e"), qr/main::lm\("a\\\$b\\\@c\\\\d\\\"e"\)/;
like lm("a\nb"), qr/main::lm\("a\\x\{$nl_as_hex\}b"\)/;
-
like lm("a\x{666}b"), qr/main::lm\("a\\x\{666\}b"\)/;
like lm("\x{666}b"), qr/main::lm\("\\x\{666\}b"\)/;
like lm("a\x{666}"), qr/main::lm\("a\\x\{666\}"\)/;