summaryrefslogtreecommitdiff
path: root/cpan/Pod-Escapes
diff options
context:
space:
mode:
authorChris 'BinGOs' Williams <chris@bingosnet.co.uk>2014-12-11 21:29:22 +0000
committerChris 'BinGOs' Williams <chris@bingosnet.co.uk>2014-12-11 21:29:22 +0000
commitf347d3e37893158fcefa9e51712d785eb38aaf0a (patch)
tree1e7ea26224a051da12fae73fb9ad24d0ecbd50af /cpan/Pod-Escapes
parenteed9221580b325cc9e73ebb61d115f94a5af3dd4 (diff)
downloadperl-f347d3e37893158fcefa9e51712d785eb38aaf0a.tar.gz
Update Pod-Escapes to CPAN version 1.07
[DELTA] 1.07 2014-12-09 NEILB - All changes from Ken Williamson as part of EBCDIC work for 5.22 - Fix typo in comment - Reorder structure definitions - Fill %Name2character better for EBCDIC - Generalize for EBCDIC - doc clarification
Diffstat (limited to 'cpan/Pod-Escapes')
-rw-r--r--cpan/Pod-Escapes/lib/Pod/Escapes.pm316
1 files changed, 158 insertions, 158 deletions
diff --git a/cpan/Pod-Escapes/lib/Pod/Escapes.pm b/cpan/Pod-Escapes/lib/Pod/Escapes.pm
index 52b52724fc..00501db3e8 100644
--- a/cpan/Pod-Escapes/lib/Pod/Escapes.pm
+++ b/cpan/Pod-Escapes/lib/Pod/Escapes.pm
@@ -17,7 +17,7 @@ use vars qw(
require Exporter;
@ISA = ('Exporter');
-$VERSION = '1.06';
+$VERSION = '1.07';
@EXPORT_OK = qw(
%Code2USASCII
%Name2character
@@ -48,28 +48,15 @@ sub e2char {
$in = hex $1;
} # else it's decimal, or named
- if($NOT_ASCII) {
- # We're in bizarro world of not-ASCII!
- # Cope with US-ASCII codes, use fallbacks for Latin-1, or use FAR_CHAR.
- unless($in =~ m/^\d+$/s) {
- # It's a named character reference. Get its numeric Unicode value.
- $in = $Name2character{$in};
- return undef unless defined $in; # (if there's no such name)
- $in = ord $in; # (All ents must be one character long.)
- # ...So $in holds the char's US-ASCII numeric value, which we'll
- # now go get the local equivalent for.
- }
-
- # It's numeric, whether by origin or by mutation from a known name
- return $Code2USASCII{$in} # so "65" => "A" everywhere
- || $Latin1Code_to_fallback{$in} # Fallback.
- || $FAR_CHAR; # Fall further back
- }
-
- # Normal handling:
if($in =~ m/^\d+$/s) {
if($] < 5.007 and $in > 255) { # can't be trusted with Unicode
return $FAR_CHAR;
+ } elsif ($] >= 5.007003) {
+ return chr(utf8::unicode_to_native($in));
+ } elsif ($NOT_ASCII) {
+ return $Code2USASCII{$in} # so "65" => "A" everywhere
+ || $Latin1Code_to_fallback{$in} # Fallback.
+ || $FAR_CHAR; # Fall further back
} else {
return chr($in);
}
@@ -99,6 +86,133 @@ sub e2charnum {
#--------------------------------------------------------------------------
+%Code2USASCII = (
+# mostly generated by
+# perl -e "printf qq{ \x25 3s, '\x25s',\n}, $_, chr($_) foreach (32 .. 126)"
+ 32, ' ',
+ 33, '!',
+ 34, '"',
+ 35, '#',
+ 36, '$',
+ 37, '%',
+ 38, '&',
+ 39, "'", #!
+ 40, '(',
+ 41, ')',
+ 42, '*',
+ 43, '+',
+ 44, ',',
+ 45, '-',
+ 46, '.',
+ 47, '/',
+ 48, '0',
+ 49, '1',
+ 50, '2',
+ 51, '3',
+ 52, '4',
+ 53, '5',
+ 54, '6',
+ 55, '7',
+ 56, '8',
+ 57, '9',
+ 58, ':',
+ 59, ';',
+ 60, '<',
+ 61, '=',
+ 62, '>',
+ 63, '?',
+ 64, '@',
+ 65, 'A',
+ 66, 'B',
+ 67, 'C',
+ 68, 'D',
+ 69, 'E',
+ 70, 'F',
+ 71, 'G',
+ 72, 'H',
+ 73, 'I',
+ 74, 'J',
+ 75, 'K',
+ 76, 'L',
+ 77, 'M',
+ 78, 'N',
+ 79, 'O',
+ 80, 'P',
+ 81, 'Q',
+ 82, 'R',
+ 83, 'S',
+ 84, 'T',
+ 85, 'U',
+ 86, 'V',
+ 87, 'W',
+ 88, 'X',
+ 89, 'Y',
+ 90, 'Z',
+ 91, '[',
+ 92, "\\", #!
+ 93, ']',
+ 94, '^',
+ 95, '_',
+ 96, '`',
+ 97, 'a',
+ 98, 'b',
+ 99, 'c',
+ 100, 'd',
+ 101, 'e',
+ 102, 'f',
+ 103, 'g',
+ 104, 'h',
+ 105, 'i',
+ 106, 'j',
+ 107, 'k',
+ 108, 'l',
+ 109, 'm',
+ 110, 'n',
+ 111, 'o',
+ 112, 'p',
+ 113, 'q',
+ 114, 'r',
+ 115, 's',
+ 116, 't',
+ 117, 'u',
+ 118, 'v',
+ 119, 'w',
+ 120, 'x',
+ 121, 'y',
+ 122, 'z',
+ 123, '{',
+ 124, '|',
+ 125, '}',
+ 126, '~',
+);
+
+#--------------------------------------------------------------------------
+
+%Latin1Code_to_fallback = ();
+@Latin1Code_to_fallback{0xA0 .. 0xFF} = (
+# Copied from Text/Unidecode/x00.pm:
+
+' ', qq{!}, qq{C/}, 'PS', qq{\$?}, qq{Y=}, qq{|}, 'SS', qq{"}, qq{(c)}, 'a', qq{<<}, qq{!}, "", qq{(r)}, qq{-},
+'deg', qq{+-}, '2', '3', qq{'}, 'u', 'P', qq{*}, qq{,}, '1', 'o', qq{>>}, qq{1/4}, qq{1/2}, qq{3/4}, qq{?},
+'A', 'A', 'A', 'A', 'A', 'A', 'AE', 'C', 'E', 'E', 'E', 'E', 'I', 'I', 'I', 'I',
+'D', 'N', 'O', 'O', 'O', 'O', 'O', 'x', 'O', 'U', 'U', 'U', 'U', 'U', 'Th', 'ss',
+'a', 'a', 'a', 'a', 'a', 'a', 'ae', 'c', 'e', 'e', 'e', 'e', 'i', 'i', 'i', 'i',
+'d', 'n', 'o', 'o', 'o', 'o', 'o', qq{/}, 'o', 'u', 'u', 'u', 'u', 'y', 'th', 'y',
+
+);
+
+{
+ # Now stuff %Latin1Char_to_fallback:
+ %Latin1Char_to_fallback = ();
+ my($k,$v);
+ while( ($k,$v) = each %Latin1Code_to_fallback) {
+ $Latin1Char_to_fallback{chr $k} = $v;
+ #print chr($k), ' => ', $v, "\n";
+ }
+}
+
+#--------------------------------------------------------------------------
+
%Name2character_number = (
# General XML/XHTML:
'lt' => 60,
@@ -380,147 +494,28 @@ sub e2charnum {
if($] < 5.007 and $number > 255) {
$Name2character{$name} = $FAR_CHAR;
# substitute for Unicode characters, for perls
- # that can't reliable handle them
+ # that can't reliably handle them
+ } elsif ($] >= 5.007003) {
+ $Name2character{$name} = chr utf8::unicode_to_native($number);
+ # normal case for more recent Perls where we can translate from Unicode
+ # to the native character set.
+ }
+ elsif (exists $Code2USASCII{$number}) {
+ $Name2character{$name} = $Code2USASCII{$number};
+ # on older Perls, we can use the translations we have hard-coded in this
+ # file, but these don't include the non-ASCII-range characters
+ }
+ elsif ($NOT_ASCII && $number > 127 && $number < 256) {
+ # this range on old non-ASCII-platform perls is wrong
+ if (exists $Latin1Code_to_fallback{$number}) {
+ $Name2character{$name} = $Latin1Code_to_fallback{$number};
+ } else {
+ $Name2character{$name} = $FAR_CHAR;
+ }
} else {
$Name2character{$name} = chr $number;
- # normal case
}
}
- # So they resolve 'right' even in EBCDIC-land
- $Name2character{'lt' } = '<';
- $Name2character{'gt' } = '>';
- $Name2character{'quot'} = '"';
- $Name2character{'amp' } = '&';
- $Name2character{'apos'} = "'";
- $Name2character{'sol' } = '/';
- $Name2character{'verbar'} = '|';
-}
-
-#--------------------------------------------------------------------------
-
-%Code2USASCII = (
-# mostly generated by
-# perl -e "printf qq{ \x25 3s, '\x25s',\n}, $_, chr($_) foreach (32 .. 126)"
- 32, ' ',
- 33, '!',
- 34, '"',
- 35, '#',
- 36, '$',
- 37, '%',
- 38, '&',
- 39, "'", #!
- 40, '(',
- 41, ')',
- 42, '*',
- 43, '+',
- 44, ',',
- 45, '-',
- 46, '.',
- 47, '/',
- 48, '0',
- 49, '1',
- 50, '2',
- 51, '3',
- 52, '4',
- 53, '5',
- 54, '6',
- 55, '7',
- 56, '8',
- 57, '9',
- 58, ':',
- 59, ';',
- 60, '<',
- 61, '=',
- 62, '>',
- 63, '?',
- 64, '@',
- 65, 'A',
- 66, 'B',
- 67, 'C',
- 68, 'D',
- 69, 'E',
- 70, 'F',
- 71, 'G',
- 72, 'H',
- 73, 'I',
- 74, 'J',
- 75, 'K',
- 76, 'L',
- 77, 'M',
- 78, 'N',
- 79, 'O',
- 80, 'P',
- 81, 'Q',
- 82, 'R',
- 83, 'S',
- 84, 'T',
- 85, 'U',
- 86, 'V',
- 87, 'W',
- 88, 'X',
- 89, 'Y',
- 90, 'Z',
- 91, '[',
- 92, "\\", #!
- 93, ']',
- 94, '^',
- 95, '_',
- 96, '`',
- 97, 'a',
- 98, 'b',
- 99, 'c',
- 100, 'd',
- 101, 'e',
- 102, 'f',
- 103, 'g',
- 104, 'h',
- 105, 'i',
- 106, 'j',
- 107, 'k',
- 108, 'l',
- 109, 'm',
- 110, 'n',
- 111, 'o',
- 112, 'p',
- 113, 'q',
- 114, 'r',
- 115, 's',
- 116, 't',
- 117, 'u',
- 118, 'v',
- 119, 'w',
- 120, 'x',
- 121, 'y',
- 122, 'z',
- 123, '{',
- 124, '|',
- 125, '}',
- 126, '~',
-);
-
-#--------------------------------------------------------------------------
-
-%Latin1Code_to_fallback = ();
-@Latin1Code_to_fallback{0xA0 .. 0xFF} = (
-# Copied from Text/Unidecode/x00.pm:
-
-' ', qq{!}, qq{C/}, 'PS', qq{\$?}, qq{Y=}, qq{|}, 'SS', qq{"}, qq{(c)}, 'a', qq{<<}, qq{!}, "", qq{(r)}, qq{-},
-'deg', qq{+-}, '2', '3', qq{'}, 'u', 'P', qq{*}, qq{,}, '1', 'o', qq{>>}, qq{1/4}, qq{1/2}, qq{3/4}, qq{?},
-'A', 'A', 'A', 'A', 'A', 'A', 'AE', 'C', 'E', 'E', 'E', 'E', 'I', 'I', 'I', 'I',
-'D', 'N', 'O', 'O', 'O', 'O', 'O', 'x', 'O', 'U', 'U', 'U', 'U', 'U', 'Th', 'ss',
-'a', 'a', 'a', 'a', 'a', 'a', 'ae', 'c', 'e', 'e', 'e', 'e', 'i', 'i', 'i', 'i',
-'d', 'n', 'o', 'o', 'o', 'o', 'o', qq{/}, 'o', 'u', 'u', 'u', 'u', 'y', 'th', 'y',
-
-);
-
-{
- # Now stuff %Latin1Char_to_fallback:
- %Latin1Char_to_fallback = ();
- my($k,$v);
- while( ($k,$v) = each %Latin1Code_to_fallback) {
- $Latin1Char_to_fallback{chr $k} = $v;
- #print chr($k), ' => ', $v, "\n";
- }
}
#--------------------------------------------------------------------------
@@ -637,7 +632,12 @@ characters (characters 160-255) are unaffected.
Under EBCDIC platforms, C<e2char($n)> may not always be the
same as C<chr(e2charnum($n))>, and ditto for
C<$Name2character{$name}> and
-C<chr($Name2character_number{$name})>.
+C<chr($Name2character_number{$name})>, because the strings are returned as
+native, and the numbers are returned as Unicode.
+However, for Perls starting with v5.8, C<e2char($n)> is the same as
+C<chr(utf8::unicode_to_native(e2charnum($n)))>, and ditto for
+C<$Name2character{$name}> and
+C<chr(utf8::unicode_to_native($Name2character_number{$name}))>.
=head1 SEE ALSO