charnames: Generalize to work on non-ASCII platforms

This includes the tests. The character names are now stored in native order. This means that pack('U') no longer works on non-ASCII platforms. Use chr instead, mostly, and pack('W*') for a sequence. These changes required the 'encoding' pragma to no longer affect e.g., chr() outside its scope, which was recently done by 3e669301f0a6fa34269f0e1eaf1fbbd72cae498a.
author: Karl Williamson <khw@cpan.org> 2014-11-24 10:34:27 -0700
committer: Karl Williamson <khw@cpan.org> 2014-11-24 10:50:26 -0700
commit: 27c3afbd6068ac83b49a11df3e33758ef059027e (patch)
tree: e964ad36e5ade6b830a8e54f3771352b00651370 /lib/_charnames.pm
parent: 374aed2060f1b45b00ae9ad47fb986b4619492c7 (diff)
download: perl-27c3afbd6068ac83b49a11df3e33758ef059027e.tar.gz
1 files changed, 11 insertions, 13 deletions
diff --git a/lib/_charnames.pm b/lib/_charnames.pm
index fbbe79ab92..bb7d7c6c7b 100644
--- a/lib/_charnames.pm
+++ b/lib/_charnames.pm
@@ -65,10 +65,10 @@ $Carp::Internal{ (__PACKAGE__) } = 1;
 
 my %system_aliases = (
 
-    'SINGLE-SHIFT 2'                => pack("U", utf8::unicode_to_native(0x8E)),
-    'SINGLE-SHIFT 3'                => pack("U", utf8::unicode_to_native(0x8F)),
-    'PRIVATE USE 1'                 => pack("U", utf8::unicode_to_native(0x91)),
-    'PRIVATE USE 2'                 => pack("U", utf8::unicode_to_native(0x92)),
+    'SINGLE-SHIFT 2'                => chr utf8::unicode_to_native(0x8E),
+    'SINGLE-SHIFT 3'                => chr utf8::unicode_to_native(0x8F),
+    'PRIVATE USE 1'                 => chr utf8::unicode_to_native(0x91),
+    'PRIVATE USE 2'                 => chr utf8::unicode_to_native(0x92),
 );
 
 # These are the aliases above that differ under :loose and :full matching
@@ -77,15 +77,15 @@ my %system_aliases = (
 #);
 
 #my %deprecated_aliases;
-#$deprecated_aliases{'BELL'} = pack("U", utf8::unicode_to_native(0x07)) if $^V lt v5.17.0;
+#$deprecated_aliases{'BELL'} = chr utf8::unicode_to_native(0x07) if $^V lt v5.17.0;
 
 #my %loose_deprecated_aliases = (
 #);
 
 # These are special cased in :loose matching, differing only in a medial
 # hyphen
-my $HANGUL_JUNGSEONG_O_E_utf8 = pack("U", 0x1180);
-my $HANGUL_JUNGSEONG_OE_utf8 = pack("U", 0x116C);
+my $HANGUL_JUNGSEONG_O_E_utf8 = chr 0x1180;
+my $HANGUL_JUNGSEONG_OE_utf8 = chr 0x116C;
 
 
 my $txt;  # The table of official character names
@@ -163,7 +163,7 @@ sub alias (@) # Set up a single alias
     }
     if ($value =~ $decimal_qr) {
         no warnings qw(non_unicode surrogate nonchar); # Allow any of these
-        $^H{charnames_ord_aliases}{$name} = pack("U", $value);
+        $^H{charnames_ord_aliases}{$name} = chr $value;
 
         # Use a canonical form.
         $^H{charnames_inverse_ords}{sprintf("%05X", $value)} = $name;
@@ -432,7 +432,7 @@ sub lookup_name ($$$) {
       if (($loose || $^H{charnames_full})
           && (defined (my $ord = charnames::name_to_code_point_special($lookup_name, $loose))))
       {
-        $result = pack("U", $ord);
+        $result = chr $ord;
       }
       else {
 
@@ -525,7 +525,7 @@ sub lookup_name ($$$) {
         # therefore yield the very last character in the table, which should
         # also be a \n, so the statement works anyway.)
         if (substr($txt, $off[0] - 7, 1) eq "\n") {
-          $result = pack("U", CORE::hex substr($txt, $off[0] - 6, 5));
+          $result = chr CORE::hex substr($txt, $off[0] - 6, 5);
 
           # Handle the single loose matching special case, in which two names
           # differ only by a single medial hyphen.  If the original had a
@@ -544,7 +544,7 @@ sub lookup_name ($$$) {
           # The +1 skips past that newline, or, if the rindex() fails, to put
           # us to an offset of zero.
           my $charstart = rindex($txt, "\n", $off[0] - 7) + 1;
-          $result = pack("U*", map { CORE::hex }
+          $result = pack("W*", map { CORE::hex }
               split " ", substr($txt, $charstart, $off[0] - $charstart - 1));
         }
       }
@@ -556,8 +556,6 @@ sub lookup_name ($$$) {
       $cache_ref->{$name} = $result if defined $cache_ref;
     }
   }
-  utf8::downgrade($result, 1);
-
 
   # Here, have the result character.  If the return is to be an ord, must be
   # any single character.
author	Karl Williamson <khw@cpan.org>	2014-11-24 10:34:27 -0700
committer	Karl Williamson <khw@cpan.org>	2014-11-24 10:50:26 -0700
commit	27c3afbd6068ac83b49a11df3e33758ef059027e (patch)
tree	e964ad36e5ade6b830a8e54f3771352b00651370 /lib/_charnames.pm
parent	374aed2060f1b45b00ae9ad47fb986b4619492c7 (diff)
download	perl-27c3afbd6068ac83b49a11df3e33758ef059027e.tar.gz