diff options
author | Karl Williamson <public@khwilliamson.com> | 2010-09-09 17:16:53 -0600 |
---|---|---|
committer | Father Chrysostomos <sprout@cpan.org> | 2010-09-25 00:46:59 -0700 |
commit | b1c167a3f17cc65c27981e99ce05526cb080220d (patch) | |
tree | dcd0047baa27e6e9063db293906b6079ac9aec41 | |
parent | a79b922baa417139b1a0a4393e181b72d5ebc030 (diff) | |
download | perl-b1c167a3f17cc65c27981e99ce05526cb080220d.tar.gz |
charnames.pm: Small performance enhancements
mktables is changed to output 5 digit code points, which means that
charnames doesn't have to go looking for the boundaries, which gives a
slight performance enhancement.
-rw-r--r-- | lib/charnames.pm | 30 | ||||
-rw-r--r-- | lib/unicore/mktables | 38 |
2 files changed, 36 insertions, 32 deletions
diff --git a/lib/charnames.pm b/lib/charnames.pm index 412357829c..29eb8e8869 100644 --- a/lib/charnames.pm +++ b/lib/charnames.pm @@ -464,7 +464,7 @@ sub alias (@) # Set up a single alias $^H{charnames_ord_aliases}{$name} = $value; # Use a canonical form. - $^H{charnames_inverse_ords}{sprintf("%04X", $value)} = $name; + $^H{charnames_inverse_ords}{sprintf("%05X", $value)} = $name; } else { # XXX validate syntax when deprecation cycle complete. ie. start @@ -578,7 +578,7 @@ sub lookup_name ($;$) { ## Suck in the code/name list as a big string. ## Lines look like: - ## "0052\t\tLATIN CAPITAL LETTER R\n" + ## "00052\t\tLATIN CAPITAL LETTER R\n" $txt = do "unicore/Name.pl" unless $txt; ## @off will hold the index into the code/name string of the start and @@ -639,24 +639,10 @@ sub lookup_name ($;$) { } if (! defined $ord) { - ## - ## Now know where in the string the name starts. - ## The code, in hex, is before that. - ## - ## The code can be 4-6 characters long, so we've got to sort of - ## go look for it, just after the newline that comes before $off[0]. - ## - ## This would be much easier if unicore/Name.pl had info in - ## a name/code order, instead of code/name order. - ## - ## The +1 after the rindex() is to skip past the newline we're finding, - ## or, if the rindex() fails, to put us to an offset of zero. - ## - my $hexstart = rindex($txt, "\n", $off[0]) + 1; - - ## we know where it starts, so turn into number - - ## the ordinal for the char. - $ord = CORE::hex substr($txt, $hexstart, $off[0] - 2 - $hexstart); + + # Now know where in the string the name starts. + # The code, 5 hex digits long (and 2 tabs) is before that. + $ord = CORE::hex substr($txt, $off[0] - 7, 5); } # Cache the input so as to not have to search the large table @@ -792,10 +778,10 @@ sub viacode { # Must check if decimal first; see comments at that definition my $hex; if ($arg =~ $decimal_qr) { - $hex = sprintf "%04X", $arg; + $hex = sprintf "%05X", $arg; } elsif ($arg =~ $hex_qr) { # Below is the line that differs from the _getcode() source - $hex = sprintf "%04X", hex $1; + $hex = sprintf "%05X", hex $1; } else { carp("unexpected arg \"$arg\" to charnames::viacode()"); return; diff --git a/lib/unicore/mktables b/lib/unicore/mktables index b88483c8b3..b959bd3e60 100644 --- a/lib/unicore/mktables +++ b/lib/unicore/mktables @@ -4675,16 +4675,23 @@ sub trace { return main::trace(@_); } # If has or wants a single point range output if ($start == $end || $range_size_1) { - for my $i ($start .. $end) { - push @OUT, sprintf "%04X\t\t%s\n", $i, $value; - if ($output_names) { - if (! defined $viacode[$i]) { - $viacode[$i] = - Property::property_ref('Perl_Charnames') - ->value_of($i) - || ""; + if (ref $range_size_1 eq 'CODE') { + for my $i ($start .. $end) { + push @OUT, &$range_size_1($i, $value); + } + } + else { + for my $i ($start .. $end) { + push @OUT, sprintf "%04X\t\t%s\n", $i, $value; + if ($output_names) { + if (! defined $viacode[$i]) { + $viacode[$i] = + Property::property_ref('Perl_Charnames') + ->value_of($i) + || ""; + } + $OUT[-1] =~ s/\n/\t# $viacode[$i]\n/; } - $OUT[-1] =~ s/\n/\t# $viacode[$i]\n/; } } } @@ -8536,6 +8543,17 @@ END return @return; } +sub output_perl_charnames_line ($$) { + + # Output the entries in Perl_charnames specially, using 5 digits instead + # of four. This makes the entries a constant length, and simplifies + # charnames.pm which this table is for. Unicode can have 6 digit + # ordinals, but they are all private use or noncharacters which do not + # have names, so won't be in this table. + + return sprintf "%05X\t\t%s\n", $_[0], $_[1]; +} + { # Closure # This is used to store the range list of all the code points usable when # the little used $compare_versions feature is enabled. @@ -9120,7 +9138,7 @@ END File => 'Name', Internal_Only_Warning => 1, Perl_Extension => 1, - Range_Size_1 => 1, + Range_Size_1 => \&output_perl_charnames_line, Type => $STRING, ); |