charnames.pm: Small performance enhancements

mktables is changed to output 5 digit code points, which means that charnames doesn't have to go looking for the boundaries, which gives a slight performance enhancement.
author: Karl Williamson <public@khwilliamson.com> 2010-09-09 17:16:53 -0600
committer: Father Chrysostomos <sprout@cpan.org> 2010-09-25 00:46:59 -0700
commit: b1c167a3f17cc65c27981e99ce05526cb080220d (patch)
tree: dcd0047baa27e6e9063db293906b6079ac9aec41 /lib/charnames.pm
parent: a79b922baa417139b1a0a4393e181b72d5ebc030 (diff)
download: perl-b1c167a3f17cc65c27981e99ce05526cb080220d.tar.gz
1 files changed, 8 insertions, 22 deletions
diff --git a/lib/charnames.pm b/lib/charnames.pm
index 412357829c..29eb8e8869 100644
--- a/lib/charnames.pm
+++ b/lib/charnames.pm
@@ -464,7 +464,7 @@ sub alias (@) # Set up a single alias
         $^H{charnames_ord_aliases}{$name} = $value;
 
         # Use a canonical form.
-        $^H{charnames_inverse_ords}{sprintf("%04X", $value)} = $name;
+        $^H{charnames_inverse_ords}{sprintf("%05X", $value)} = $name;
     }
     else {
         # XXX validate syntax when deprecation cycle complete. ie. start
@@ -578,7 +578,7 @@ sub lookup_name ($;$) {
 
       ## Suck in the code/name list as a big string.
       ## Lines look like:
-      ##     "0052\t\tLATIN CAPITAL LETTER R\n"
+      ##     "00052\t\tLATIN CAPITAL LETTER R\n"
       $txt = do "unicore/Name.pl" unless $txt;
 
       ## @off will hold the index into the code/name string of the start and
@@ -639,24 +639,10 @@ sub lookup_name ($;$) {
       }
 
       if (! defined $ord) {
-        ##
-        ## Now know where in the string the name starts.
-        ## The code, in hex, is before that.
-        ##
-        ## The code can be 4-6 characters long, so we've got to sort of
-        ## go look for it, just after the newline that comes before $off[0].
-        ##
-        ## This would be much easier if unicore/Name.pl had info in
-        ## a name/code order, instead of code/name order.
-        ##
-        ## The +1 after the rindex() is to skip past the newline we're finding,
-        ## or, if the rindex() fails, to put us to an offset of zero.
-        ##
-        my $hexstart = rindex($txt, "\n", $off[0]) + 1;
-
-        ## we know where it starts, so turn into number -
-        ## the ordinal for the char.
-        $ord = CORE::hex substr($txt, $hexstart, $off[0] - 2 - $hexstart);
+
+        # Now know where in the string the name starts.
+        # The code, 5 hex digits long (and 2 tabs) is before that.
+        $ord = CORE::hex substr($txt, $off[0] - 7, 5);
       }
 
       # Cache the input so as to not have to search the large table
@@ -792,10 +778,10 @@ sub viacode {
   # Must check if decimal first; see comments at that definition
   my $hex;
   if ($arg =~ $decimal_qr) {
-    $hex = sprintf "%04X", $arg;
+    $hex = sprintf "%05X", $arg;
   } elsif ($arg =~ $hex_qr) {
     # Below is the line that differs from the _getcode() source
-    $hex = sprintf "%04X", hex $1;
+    $hex = sprintf "%05X", hex $1;
   } else {
     carp("unexpected arg \"$arg\" to charnames::viacode()");
     return;
author	Karl Williamson <public@khwilliamson.com>	2010-09-09 17:16:53 -0600
committer	Father Chrysostomos <sprout@cpan.org>	2010-09-25 00:46:59 -0700
commit	b1c167a3f17cc65c27981e99ce05526cb080220d (patch)
tree	dcd0047baa27e6e9063db293906b6079ac9aec41 /lib/charnames.pm
parent	a79b922baa417139b1a0a4393e181b72d5ebc030 (diff)
download	perl-b1c167a3f17cc65c27981e99ce05526cb080220d.tar.gz