summaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
authorKarl Williamson <khw@cpan.org>2015-03-14 21:50:27 -0600
committerKarl Williamson <khw@cpan.org>2015-03-19 10:20:39 -0600
commit4b6af431de82c30fc8df0d5d024cf77f6512e8b9 (patch)
treebcda9fc9b5f1bafebd609e5a6ee5cfb0866cf938 /lib
parent8b371338c1359c38e4de8ec65a0b9b884f05e450 (diff)
downloadperl-4b6af431de82c30fc8df0d5d024cf77f6512e8b9.tar.gz
Create single fcn for dup'd /lib code
Several /lib .pm's have the same code which is complicated enough to warrant being placed in a shared function. This commit creates a .pm to be used by these .pm's. This implements the perhaps archaic 'Meta' notation wherein characters above 0x7f are displayed as M- plus the ASCII-range character derived by looking at only the lower 7 bits of the upper range one. There are problems with this, in that a literal control character can be in the string, whereas it is trying to get rid of control characters. But I left it to work as-is, just centralizing the code. On EBCDIC platforms this notation makes no sense because the bit patterns are all mixed up about having the upper bit set. So this commit fixes things on these platforms, so these are changed to \x{...}. No literal control characters are emitted. Another potential problem is that characters above 0xFF are passed through, unchanged. But again, I let the existing behavior stand.
Diffstat (limited to 'lib')
-rw-r--r--lib/DB.pm6
-rw-r--r--lib/meta_notation.pm54
-rw-r--r--lib/meta_notation.t40
-rw-r--r--lib/perl5db.pl11
-rw-r--r--lib/sigtrap.pm6
5 files changed, 105 insertions, 12 deletions
diff --git a/lib/DB.pm b/lib/DB.pm
index fd0ff929f2..404c57cb25 100644
--- a/lib/DB.pm
+++ b/lib/DB.pm
@@ -41,7 +41,7 @@ BEGIN {
$DB::subname = ''; # currently executing sub (fully qualified name)
$DB::lineno = ''; # current line number
- $DB::VERSION = $DB::VERSION = '1.07';
+ $DB::VERSION = $DB::VERSION = '1.08';
# initialize private globals to avoid warnings
@@ -244,8 +244,8 @@ sub backtrace {
for (@a) {
s/'/\\'/g;
s/([^\0]*)/'$1'/ unless /^-?[\d.]+$/;
- s/([\200-\377])/sprintf("M-%c",ord($1)&0177)/eg;
- s/([\0-\37\177])/sprintf("^%c",ord($1)^64)/eg;
+ require 'meta_notation.pm';
+ $_ = _meta_notation($_) if /[[:^print:]]/a;
}
$w = $w ? '@ = ' : '$ = ';
$a = $h ? '(' . join(', ', @a) . ')' : '';
diff --git a/lib/meta_notation.pm b/lib/meta_notation.pm
new file mode 100644
index 0000000000..2f85cd30c5
--- /dev/null
+++ b/lib/meta_notation.pm
@@ -0,0 +1,54 @@
+use strict;
+use warnings;
+
+# A tiny private library routine which is a helper to several Perl core
+# modules, to allow a paradigm to be implemented in a single place. The name,
+# contents, or even the existence of this file may be changed at any time and
+# are NOT to be used by anthing outside the Perl core.
+
+sub _meta_notation ($) {
+
+ # Returns a copy of the input string with the nonprintable characters
+ # below 0x100 changed into printables. Any ASCII printables or above 0xFF
+ # are unchanged. (XXX Probably above-Latin1 characters should be
+ # converted to \X{...})
+ #
+ # \0 .. \x1F (which are "\c@" .. "\c_") are changed into ^@, ^A, ^B, ...
+ # ^Z, ^[, ^\, ^], ^^, ^_
+ # \c? is changed into ^?.
+ #
+ # The above accounts for all the ASCII-range nonprintables.
+ #
+ # On ASCII platforms, the upper-Latin1-range characters are converted to
+ # Meta notation, so that \xC1 becomes 'M-A', \xE2 becomes 'M-b', etc.
+ # This is how it always has worked, so is continued that way for backwards
+ # compatibility. XXX Wrong, but the way it has always worked is that \x80
+ # .. \x9F are converted to M- followed by a literal control char. This
+ # probably has escaped attention due to the limited domains this code has
+ # been applied to. ext/SDBM_File/dbu.c does this right.
+ #
+ # On EBCDIC platforms, the upper-Latin1-range characters are converted
+ # into '\x{...}' Meta notation doesn't make sense on EBCDIC platforms
+ # because the ASCII-range printables are a mixture of upper bit set or
+ # not. [A-Za-Z0-9] all have the upper bit set. The underscore likely
+ # doesn't; and other punctuation may or may not. There's no simple
+ # pattern.
+
+ my $string = shift;
+
+ $string =~ s/([\0-\037])/
+ sprintf("^%c",utf8::unicode_to_native(ord($1)^64))/xeg;
+ $string =~ s/\c?/^?/g;
+ if (ord("A") == 65) {
+ $string =~ s/([\200-\377])/sprintf("M-%c",ord($1)&0177)/eg;
+ }
+ else {
+ no warnings 'experimental::regex_sets';
+ # Leave alone things above \xff
+ $string =~ s/( (?[ [\x00-\xFF] & [:^print:]])) /
+ sprintf("\\x{%X}", ord($1))/xaeg;
+ }
+
+ return $string;
+}
+1
diff --git a/lib/meta_notation.t b/lib/meta_notation.t
new file mode 100644
index 0000000000..d89d50ca28
--- /dev/null
+++ b/lib/meta_notation.t
@@ -0,0 +1,40 @@
+#!./perl
+
+BEGIN {
+ chdir 't' if -d 't';
+ @INC = '../lib';
+ require './test.pl';
+}
+
+use strict;
+use warnings;
+
+eval "require 'meta_notation.pm'";
+if ($@) {
+ fail("Could not find 'meta_notation.pm'");
+}
+else {
+
+ is(_meta_notation("\007\010\011\c?Z\x{103}"), "^G^H^I^?Z\x{103}");
+
+ if ($::IS_ASCII || $::IS_ASCII) {
+ is(_meta_notation("\x{c1}\x{e2}"), 'M-AM-b');
+ is(_meta_notation("\x{df}"), 'M-_');
+ }
+ else { # EBCDIC platform
+ # In the first iteration we are looking for a non-ASCII control; in
+ # the second, a regular non-ASCII character. SPACE marks the end of
+ # most controls. We test each to see that they are properly converted
+ # to \x{...}
+ foreach my $start (0x20, ord " ") {
+ for (my $i = $start; $i < 256; $i++) {
+ my $char = chr $i;
+ next if $char =~ /[[:ascii:]]/;
+ is(_meta_notation($char), sprintf("\\x{%X}", $i));
+ last;
+ }
+ }
+ }
+}
+
+done_testing();
diff --git a/lib/perl5db.pl b/lib/perl5db.pl
index e0989dbfc4..47b9f4ad6e 100644
--- a/lib/perl5db.pl
+++ b/lib/perl5db.pl
@@ -528,7 +528,7 @@ BEGIN {
# Debugger for Perl 5.00x; perl5db.pl patch level:
use vars qw($VERSION $header);
-$VERSION = '1.47';
+$VERSION = '1.48';
$header = "perl5db.pl version $VERSION";
@@ -6540,11 +6540,10 @@ sub _dump_trace_calc_saved_single_arg
s/(.*)/'$1'/s
unless /^(?: -?[\d.]+ | \*[\w:]* )$/x;
- # Turn high-bit characters into meta-whatever.
- s/([\200-\377])/sprintf("M-%c",ord($1)&0177)/eg;
-
- # Turn control characters into ^-whatever.
- s/([\0-\37\177])/sprintf("^%c",ord($1)^64)/eg;
+ # Turn high-bit characters into meta-whatever, and controls into like
+ # '^D'.
+ require 'meta_notation.pm';
+ $_ = _meta_notation($_) if /[[:^print:]]/a;
return $_;
}
diff --git a/lib/sigtrap.pm b/lib/sigtrap.pm
index df728c8ca6..7d801461d4 100644
--- a/lib/sigtrap.pm
+++ b/lib/sigtrap.pm
@@ -8,7 +8,7 @@ sigtrap - Perl pragma to enable simple signal handling
use Carp;
-$VERSION = 1.07;
+$VERSION = 1.08;
$Verbose ||= 0;
sub import {
@@ -99,8 +99,8 @@ sub handler_traceback {
s/([\'\\])/\\$1/g;
s/([^\0]*)/'$1'/
unless /^(?: -?[\d.]+ | \*[\w:]* )$/x;
- s/([\200-\377])/sprintf("M-%c",ord($1)&0177)/eg;
- s/([\0-\37\177])/sprintf("^%c",ord($1)^64)/eg;
+ require 'meta_notation.pm';
+ $_ = _meta_notation($_) if /[[:^print:]]/a;
push(@a, $_);
}
$w = $w ? '@ = ' : '$ = ';