summaryrefslogtreecommitdiff
path: root/lib/_charnames.pm
diff options
context:
space:
mode:
authorKarl Williamson <khw@cpan.org>2014-11-24 10:34:27 -0700
committerKarl Williamson <khw@cpan.org>2014-11-24 10:50:26 -0700
commit27c3afbd6068ac83b49a11df3e33758ef059027e (patch)
treee964ad36e5ade6b830a8e54f3771352b00651370 /lib/_charnames.pm
parent374aed2060f1b45b00ae9ad47fb986b4619492c7 (diff)
downloadperl-27c3afbd6068ac83b49a11df3e33758ef059027e.tar.gz
charnames: Generalize to work on non-ASCII platforms
This includes the tests. The character names are now stored in native order. This means that pack('U') no longer works on non-ASCII platforms. Use chr instead, mostly, and pack('W*') for a sequence. These changes required the 'encoding' pragma to no longer affect e.g., chr() outside its scope, which was recently done by 3e669301f0a6fa34269f0e1eaf1fbbd72cae498a.
Diffstat (limited to 'lib/_charnames.pm')
-rw-r--r--lib/_charnames.pm24
1 files changed, 11 insertions, 13 deletions
diff --git a/lib/_charnames.pm b/lib/_charnames.pm
index fbbe79ab92..bb7d7c6c7b 100644
--- a/lib/_charnames.pm
+++ b/lib/_charnames.pm
@@ -65,10 +65,10 @@ $Carp::Internal{ (__PACKAGE__) } = 1;
my %system_aliases = (
- 'SINGLE-SHIFT 2' => pack("U", utf8::unicode_to_native(0x8E)),
- 'SINGLE-SHIFT 3' => pack("U", utf8::unicode_to_native(0x8F)),
- 'PRIVATE USE 1' => pack("U", utf8::unicode_to_native(0x91)),
- 'PRIVATE USE 2' => pack("U", utf8::unicode_to_native(0x92)),
+ 'SINGLE-SHIFT 2' => chr utf8::unicode_to_native(0x8E),
+ 'SINGLE-SHIFT 3' => chr utf8::unicode_to_native(0x8F),
+ 'PRIVATE USE 1' => chr utf8::unicode_to_native(0x91),
+ 'PRIVATE USE 2' => chr utf8::unicode_to_native(0x92),
);
# These are the aliases above that differ under :loose and :full matching
@@ -77,15 +77,15 @@ my %system_aliases = (
#);
#my %deprecated_aliases;
-#$deprecated_aliases{'BELL'} = pack("U", utf8::unicode_to_native(0x07)) if $^V lt v5.17.0;
+#$deprecated_aliases{'BELL'} = chr utf8::unicode_to_native(0x07) if $^V lt v5.17.0;
#my %loose_deprecated_aliases = (
#);
# These are special cased in :loose matching, differing only in a medial
# hyphen
-my $HANGUL_JUNGSEONG_O_E_utf8 = pack("U", 0x1180);
-my $HANGUL_JUNGSEONG_OE_utf8 = pack("U", 0x116C);
+my $HANGUL_JUNGSEONG_O_E_utf8 = chr 0x1180;
+my $HANGUL_JUNGSEONG_OE_utf8 = chr 0x116C;
my $txt; # The table of official character names
@@ -163,7 +163,7 @@ sub alias (@) # Set up a single alias
}
if ($value =~ $decimal_qr) {
no warnings qw(non_unicode surrogate nonchar); # Allow any of these
- $^H{charnames_ord_aliases}{$name} = pack("U", $value);
+ $^H{charnames_ord_aliases}{$name} = chr $value;
# Use a canonical form.
$^H{charnames_inverse_ords}{sprintf("%05X", $value)} = $name;
@@ -432,7 +432,7 @@ sub lookup_name ($$$) {
if (($loose || $^H{charnames_full})
&& (defined (my $ord = charnames::name_to_code_point_special($lookup_name, $loose))))
{
- $result = pack("U", $ord);
+ $result = chr $ord;
}
else {
@@ -525,7 +525,7 @@ sub lookup_name ($$$) {
# therefore yield the very last character in the table, which should
# also be a \n, so the statement works anyway.)
if (substr($txt, $off[0] - 7, 1) eq "\n") {
- $result = pack("U", CORE::hex substr($txt, $off[0] - 6, 5));
+ $result = chr CORE::hex substr($txt, $off[0] - 6, 5);
# Handle the single loose matching special case, in which two names
# differ only by a single medial hyphen. If the original had a
@@ -544,7 +544,7 @@ sub lookup_name ($$$) {
# The +1 skips past that newline, or, if the rindex() fails, to put
# us to an offset of zero.
my $charstart = rindex($txt, "\n", $off[0] - 7) + 1;
- $result = pack("U*", map { CORE::hex }
+ $result = pack("W*", map { CORE::hex }
split " ", substr($txt, $charstart, $off[0] - $charstart - 1));
}
}
@@ -556,8 +556,6 @@ sub lookup_name ($$$) {
$cache_ref->{$name} = $result if defined $cache_ref;
}
}
- utf8::downgrade($result, 1);
-
# Here, have the result character. If the return is to be an ord, must be
# any single character.