summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKarl Williamson <khw@cpan.org>2015-10-24 20:03:31 -0600
committerKarl Williamson <khw@cpan.org>2015-11-05 12:46:42 -0700
commitab36d597128b4016dda4ce194191e3237cf3f6a1 (patch)
treec07eb90e90d6712a4f7563f75b4ace468f3293b3
parent771d08d04402e3fb2c253e6bf8b46524b761a020 (diff)
downloadperl-ab36d597128b4016dda4ce194191e3237cf3f6a1.tar.gz
Dumpvalue: Generalize for non-ASCII platforms
I overlooked this module until now. It turns out that much of the code I had changed had a common ancestor with the code I had already changed to work on non-ASCII platforms in lib/dumpvar.pl. So I just copied that, changing the things that needed to be different. It appears that Dumpvalue had a bug, in that it did not escape NUL, of all the C0 controls. I changed it to do so.
-rw-r--r--dist/Dumpvalue/lib/Dumpvalue.pm42
-rw-r--r--dist/Dumpvalue/t/Dumpvalue.t10
2 files changed, 36 insertions, 16 deletions
diff --git a/dist/Dumpvalue/lib/Dumpvalue.pm b/dist/Dumpvalue/lib/Dumpvalue.pm
index ca40548ce5..eef9b27157 100644
--- a/dist/Dumpvalue/lib/Dumpvalue.pm
+++ b/dist/Dumpvalue/lib/Dumpvalue.pm
@@ -1,9 +1,20 @@
use 5.006_001; # for (defined ref) and $#$v and our
package Dumpvalue;
use strict;
-our $VERSION = '1.17';
+our $VERSION = '1.18';
our(%address, $stab, @stab, %stab, %subs);
+sub ASCII { return ord('A') == 65; }
+
+# This module will give incorrect results for some inputs on EBCDIC platforms
+# before v5.8
+*to_native = ($] lt "5.008")
+ ? sub { return shift }
+ : sub { return utf8::unicode_to_native(shift) };
+
+my $APC = chr to_native(0x9F);
+my $backslash_c_question = (ASCII) ? '\177' : $APC;
+
# documentation nits, handle complex data structures better by chromatic
# translate control chars to ^X - Randal Schwartz
# Modifications to print types by Peter Gordon v1.0
@@ -78,7 +89,8 @@ sub unctrl {
local($_) = @_;
return \$_ if ref \$_ eq "GLOB";
- s/([\001-\037\177])/'^'.pack('c',ord($1)^64)/eg;
+ s/([\000-\037])/'^' . chr(to_native(ord($1)^64))/eg;
+ s/ $backslash_c_question /^?/xg;
$_;
}
@@ -95,9 +107,8 @@ sub stringify {
if $self->{bareStringify} and ref $_
and %overload:: and defined &{'overload::StrVal'};
}
-
if ($tick eq 'auto') {
- if (/[\000-\011\013-\037\177]/) {
+ if (/[^[:^cntrl:]\n]/) { # All ASCII controls but \n get '"'
$tick = '"';
} else {
$tick = "'";
@@ -107,20 +118,33 @@ sub stringify {
s/([\'\\])/\\$1/g;
} elsif ($self->{unctrl} eq 'unctrl') {
s/([\"\\])/\\$1/g ;
- s/([\000-\037\177])/'^'.pack('c',ord($1)^64)/eg;
- s/([\200-\377])/'\\0x'.sprintf('%2X',ord($1))/eg
+ $_ = &unctrl($_);
+ s/([[:^ascii:]])/'\\0x'.sprintf('%2X',ord($1))/eg
if $self->{quoteHighBit};
} elsif ($self->{unctrl} eq 'quote') {
s/([\"\\\$\@])/\\$1/g if $tick eq '"';
- s/\033/\\e/g;
- s/([\000-\037\177])/'\\c'.chr(ord($1)^64)/eg;
+ s/\e/\\e/g;
+ s/([\000-\037$backslash_c_question])/'\\c'._escaped_ord($1)/eg;
}
- s/([\200-\377])/'\\'.sprintf('%3o',ord($1))/eg if $self->{quoteHighBit};
+ s/([[:^ascii:]])/'\\'.sprintf('%3o',ord($1))/eg if $self->{quoteHighBit};
($noticks || /^\d+(\.\d*)?\Z/)
? $_
: $tick . $_ . $tick;
}
+# Ensure a resulting \ is escaped to be \\
+sub _escaped_ord {
+ my $chr = shift;
+ if ($chr eq $backslash_c_question) {
+ $chr = '?';
+ }
+ else {
+ $chr = chr(to_native(ord($chr)^64));
+ $chr =~ s{\\}{\\\\}g;
+ }
+ return $chr;
+}
+
sub DumpElem {
my ($self, $v) = (shift, shift);
my $short = $self->stringify($v, ref $v);
diff --git a/dist/Dumpvalue/t/Dumpvalue.t b/dist/Dumpvalue/t/Dumpvalue.t
index e661fc51e7..8e9da19823 100644
--- a/dist/Dumpvalue/t/Dumpvalue.t
+++ b/dist/Dumpvalue/t/Dumpvalue.t
@@ -1,10 +1,6 @@
#!./perl
BEGIN {
- if (ord('A') == 193) {
- print "1..0 # skip: EBCDIC\n";
- exit 0;
- }
require Config;
if (($Config::Config{'extensions'} !~ m!\bList/Util\b!) ){
print "1..0 # Skip -- Perl configured without List::Util module\n";
@@ -57,15 +53,15 @@ is( $d->stringify('hi'), "'hi'", 'used single-quotes when appropriate' );
$d->{unctrl} = 'unctrl';
like( $d->stringify('double and whack:\ "'), qr!\\ \"!, 'escaped with unctrl' );
like( $d->stringify("a\005"), qr/^"a\^/, 'escaped ASCII value in unctrl' );
-like( $d->stringify("b\205"), qr!^'b.'$!, 'no high-bit escape value in unctrl');
+like( $d->stringify("b\xb6"), qr!^'b.'$!, 'no high-bit escape value in unctrl');
$d->{quoteHighBit} = 1;
-like( $d->stringify("b\205"), qr!^'b\\205!, 'high-bit now escaped in unctrl');
+like( $d->stringify("b\266"), qr!^'b\\266!, 'high-bit now escaped in unctrl');
# if 'quote' is set
$d->{unctrl} = 'quote';
is( $d->stringify('5@ $1'), "'5\@ \$1'", 'quoted $ and @ fine' );
-is( $d->stringify("5@\033\$1"), '"5\@\e\$1"', 'quoted $ and @ and \033 fine' );
+is( $d->stringify("5@\e\$1"), '"5\@\e\$1"', 'quoted $ and @ and \e fine' );
like( $d->stringify("\037"), qr/^"\\c/, 'escaped ASCII value okay' );
# add ticks, if necessary