charnames.pm: Small performance enhancements

mktables is changed to output 5 digit code points, which means that charnames doesn't have to go looking for the boundaries, which gives a slight performance enhancement.
author: Karl Williamson <public@khwilliamson.com> 2010-09-09 17:16:53 -0600
committer: Father Chrysostomos <sprout@cpan.org> 2010-09-25 00:46:59 -0700
commit: b1c167a3f17cc65c27981e99ce05526cb080220d (patch)
tree: dcd0047baa27e6e9063db293906b6079ac9aec41
parent: a79b922baa417139b1a0a4393e181b72d5ebc030 (diff)
download: perl-b1c167a3f17cc65c27981e99ce05526cb080220d.tar.gz
2 files changed, 36 insertions, 32 deletions
diff --git a/lib/charnames.pm b/lib/charnames.pm
index 412357829c..29eb8e8869 100644
--- a/lib/charnames.pm
+++ b/lib/charnames.pm
@@ -464,7 +464,7 @@ sub alias (@) # Set up a single alias
         $^H{charnames_ord_aliases}{$name} = $value;
 
         # Use a canonical form.
-        $^H{charnames_inverse_ords}{sprintf("%04X", $value)} = $name;
+        $^H{charnames_inverse_ords}{sprintf("%05X", $value)} = $name;
     }
     else {
         # XXX validate syntax when deprecation cycle complete. ie. start
@@ -578,7 +578,7 @@ sub lookup_name ($;$) {
 
       ## Suck in the code/name list as a big string.
       ## Lines look like:
-      ##     "0052\t\tLATIN CAPITAL LETTER R\n"
+      ##     "00052\t\tLATIN CAPITAL LETTER R\n"
       $txt = do "unicore/Name.pl" unless $txt;
 
       ## @off will hold the index into the code/name string of the start and
@@ -639,24 +639,10 @@ sub lookup_name ($;$) {
       }
 
       if (! defined $ord) {
-        ##
-        ## Now know where in the string the name starts.
-        ## The code, in hex, is before that.
-        ##
-        ## The code can be 4-6 characters long, so we've got to sort of
-        ## go look for it, just after the newline that comes before $off[0].
-        ##
-        ## This would be much easier if unicore/Name.pl had info in
-        ## a name/code order, instead of code/name order.
-        ##
-        ## The +1 after the rindex() is to skip past the newline we're finding,
-        ## or, if the rindex() fails, to put us to an offset of zero.
-        ##
-        my $hexstart = rindex($txt, "\n", $off[0]) + 1;
-
-        ## we know where it starts, so turn into number -
-        ## the ordinal for the char.
-        $ord = CORE::hex substr($txt, $hexstart, $off[0] - 2 - $hexstart);
+
+        # Now know where in the string the name starts.
+        # The code, 5 hex digits long (and 2 tabs) is before that.
+        $ord = CORE::hex substr($txt, $off[0] - 7, 5);
       }
 
       # Cache the input so as to not have to search the large table
@@ -792,10 +778,10 @@ sub viacode {
   # Must check if decimal first; see comments at that definition
   my $hex;
   if ($arg =~ $decimal_qr) {
-    $hex = sprintf "%04X", $arg;
+    $hex = sprintf "%05X", $arg;
   } elsif ($arg =~ $hex_qr) {
     # Below is the line that differs from the _getcode() source
-    $hex = sprintf "%04X", hex $1;
+    $hex = sprintf "%05X", hex $1;
   } else {
     carp("unexpected arg \"$arg\" to charnames::viacode()");
     return;
diff --git a/lib/unicore/mktables b/lib/unicore/mktables
index b88483c8b3..b959bd3e60 100644
--- a/lib/unicore/mktables
+++ b/lib/unicore/mktables
@@ -4675,16 +4675,23 @@ sub trace { return main::trace(@_); }
 
                 # If has or wants a single point range output
                 if ($start == $end || $range_size_1) {
-                    for my $i ($start .. $end) {
-                        push @OUT, sprintf "%04X\t\t%s\n", $i, $value;
-                        if ($output_names) {
-                            if (! defined $viacode[$i]) {
-                                $viacode[$i] =
-                                    Property::property_ref('Perl_Charnames')
-                                                                ->value_of($i)
-                                    || "";
+                    if (ref $range_size_1 eq 'CODE') {
+                        for my $i ($start .. $end) {
+                            push @OUT, &$range_size_1($i, $value);
+                        }
+                    }
+                    else {
+                        for my $i ($start .. $end) {
+                            push @OUT, sprintf "%04X\t\t%s\n", $i, $value;
+                            if ($output_names) {
+                                if (! defined $viacode[$i]) {
+                                    $viacode[$i] =
+                                        Property::property_ref('Perl_Charnames')
+                                                                    ->value_of($i)
+                                        || "";
+                                }
+                                $OUT[-1] =~ s/\n/\t# $viacode[$i]\n/;
                             }
-                            $OUT[-1] =~ s/\n/\t# $viacode[$i]\n/;
                         }
                     }
                 }
@@ -8536,6 +8543,17 @@ END
     return @return;
 }
 
+sub output_perl_charnames_line ($$) {
+
+    # Output the entries in Perl_charnames specially, using 5 digits instead
+    # of four.  This makes the entries a constant length, and simplifies
+    # charnames.pm which this table is for.  Unicode can have 6 digit
+    # ordinals, but they are all private use or noncharacters which do not
+    # have names, so won't be in this table.
+
+    return sprintf "%05X\t\t%s\n", $_[0], $_[1];
+}
+
 { # Closure
     # This is used to store the range list of all the code points usable when
     # the little used $compare_versions feature is enabled.
@@ -9120,7 +9138,7 @@ END
                        File => 'Name',
                        Internal_Only_Warning => 1,
                        Perl_Extension => 1,
-                       Range_Size_1 => 1,
+                       Range_Size_1 => \&output_perl_charnames_line,
                        Type => $STRING,
                        );
author	Karl Williamson <public@khwilliamson.com>	2010-09-09 17:16:53 -0600
committer	Father Chrysostomos <sprout@cpan.org>	2010-09-25 00:46:59 -0700
commit	b1c167a3f17cc65c27981e99ce05526cb080220d (patch)
tree	dcd0047baa27e6e9063db293906b6079ac9aec41
parent	a79b922baa417139b1a0a4393e181b72d5ebc030 (diff)
download	perl-b1c167a3f17cc65c27981e99ce05526cb080220d.tar.gz