summaryrefslogtreecommitdiff
path: root/lib/charnames.pm
diff options
context:
space:
mode:
authorKarl Williamson <public@khwilliamson.com>2010-09-09 17:16:53 -0600
committerFather Chrysostomos <sprout@cpan.org>2010-09-25 00:46:59 -0700
commitb1c167a3f17cc65c27981e99ce05526cb080220d (patch)
treedcd0047baa27e6e9063db293906b6079ac9aec41 /lib/charnames.pm
parenta79b922baa417139b1a0a4393e181b72d5ebc030 (diff)
downloadperl-b1c167a3f17cc65c27981e99ce05526cb080220d.tar.gz
charnames.pm: Small performance enhancements
mktables is changed to output 5 digit code points, which means that charnames doesn't have to go looking for the boundaries, which gives a slight performance enhancement.
Diffstat (limited to 'lib/charnames.pm')
-rw-r--r--lib/charnames.pm30
1 files changed, 8 insertions, 22 deletions
diff --git a/lib/charnames.pm b/lib/charnames.pm
index 412357829c..29eb8e8869 100644
--- a/lib/charnames.pm
+++ b/lib/charnames.pm
@@ -464,7 +464,7 @@ sub alias (@) # Set up a single alias
$^H{charnames_ord_aliases}{$name} = $value;
# Use a canonical form.
- $^H{charnames_inverse_ords}{sprintf("%04X", $value)} = $name;
+ $^H{charnames_inverse_ords}{sprintf("%05X", $value)} = $name;
}
else {
# XXX validate syntax when deprecation cycle complete. ie. start
@@ -578,7 +578,7 @@ sub lookup_name ($;$) {
## Suck in the code/name list as a big string.
## Lines look like:
- ## "0052\t\tLATIN CAPITAL LETTER R\n"
+ ## "00052\t\tLATIN CAPITAL LETTER R\n"
$txt = do "unicore/Name.pl" unless $txt;
## @off will hold the index into the code/name string of the start and
@@ -639,24 +639,10 @@ sub lookup_name ($;$) {
}
if (! defined $ord) {
- ##
- ## Now know where in the string the name starts.
- ## The code, in hex, is before that.
- ##
- ## The code can be 4-6 characters long, so we've got to sort of
- ## go look for it, just after the newline that comes before $off[0].
- ##
- ## This would be much easier if unicore/Name.pl had info in
- ## a name/code order, instead of code/name order.
- ##
- ## The +1 after the rindex() is to skip past the newline we're finding,
- ## or, if the rindex() fails, to put us to an offset of zero.
- ##
- my $hexstart = rindex($txt, "\n", $off[0]) + 1;
-
- ## we know where it starts, so turn into number -
- ## the ordinal for the char.
- $ord = CORE::hex substr($txt, $hexstart, $off[0] - 2 - $hexstart);
+
+ # Now know where in the string the name starts.
+ # The code, 5 hex digits long (and 2 tabs) is before that.
+ $ord = CORE::hex substr($txt, $off[0] - 7, 5);
}
# Cache the input so as to not have to search the large table
@@ -792,10 +778,10 @@ sub viacode {
# Must check if decimal first; see comments at that definition
my $hex;
if ($arg =~ $decimal_qr) {
- $hex = sprintf "%04X", $arg;
+ $hex = sprintf "%05X", $arg;
} elsif ($arg =~ $hex_qr) {
# Below is the line that differs from the _getcode() source
- $hex = sprintf "%04X", hex $1;
+ $hex = sprintf "%05X", hex $1;
} else {
carp("unexpected arg \"$arg\" to charnames::viacode()");
return;