diff options
author | Karl Williamson <public@khwilliamson.com> | 2010-09-09 17:16:53 -0600 |
---|---|---|
committer | Father Chrysostomos <sprout@cpan.org> | 2010-09-25 00:46:59 -0700 |
commit | b1c167a3f17cc65c27981e99ce05526cb080220d (patch) | |
tree | dcd0047baa27e6e9063db293906b6079ac9aec41 /lib/charnames.pm | |
parent | a79b922baa417139b1a0a4393e181b72d5ebc030 (diff) | |
download | perl-b1c167a3f17cc65c27981e99ce05526cb080220d.tar.gz |
charnames.pm: Small performance enhancements
mktables is changed to output 5 digit code points, which means that
charnames doesn't have to go looking for the boundaries, which gives a
slight performance enhancement.
Diffstat (limited to 'lib/charnames.pm')
-rw-r--r-- | lib/charnames.pm | 30 |
1 files changed, 8 insertions, 22 deletions
diff --git a/lib/charnames.pm b/lib/charnames.pm index 412357829c..29eb8e8869 100644 --- a/lib/charnames.pm +++ b/lib/charnames.pm @@ -464,7 +464,7 @@ sub alias (@) # Set up a single alias $^H{charnames_ord_aliases}{$name} = $value; # Use a canonical form. - $^H{charnames_inverse_ords}{sprintf("%04X", $value)} = $name; + $^H{charnames_inverse_ords}{sprintf("%05X", $value)} = $name; } else { # XXX validate syntax when deprecation cycle complete. ie. start @@ -578,7 +578,7 @@ sub lookup_name ($;$) { ## Suck in the code/name list as a big string. ## Lines look like: - ## "0052\t\tLATIN CAPITAL LETTER R\n" + ## "00052\t\tLATIN CAPITAL LETTER R\n" $txt = do "unicore/Name.pl" unless $txt; ## @off will hold the index into the code/name string of the start and @@ -639,24 +639,10 @@ sub lookup_name ($;$) { } if (! defined $ord) { - ## - ## Now know where in the string the name starts. - ## The code, in hex, is before that. - ## - ## The code can be 4-6 characters long, so we've got to sort of - ## go look for it, just after the newline that comes before $off[0]. - ## - ## This would be much easier if unicore/Name.pl had info in - ## a name/code order, instead of code/name order. - ## - ## The +1 after the rindex() is to skip past the newline we're finding, - ## or, if the rindex() fails, to put us to an offset of zero. - ## - my $hexstart = rindex($txt, "\n", $off[0]) + 1; - - ## we know where it starts, so turn into number - - ## the ordinal for the char. - $ord = CORE::hex substr($txt, $hexstart, $off[0] - 2 - $hexstart); + + # Now know where in the string the name starts. + # The code, 5 hex digits long (and 2 tabs) is before that. + $ord = CORE::hex substr($txt, $off[0] - 7, 5); } # Cache the input so as to not have to search the large table @@ -792,10 +778,10 @@ sub viacode { # Must check if decimal first; see comments at that definition my $hex; if ($arg =~ $decimal_qr) { - $hex = sprintf "%04X", $arg; + $hex = sprintf "%05X", $arg; } elsif ($arg =~ $hex_qr) { # Below is the line that differs from the _getcode() source - $hex = sprintf "%04X", hex $1; + $hex = sprintf "%05X", hex $1; } else { carp("unexpected arg \"$arg\" to charnames::viacode()"); return; |