summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKarl Williamson <public@khwilliamson.com>2010-09-09 17:16:53 -0600
committerFather Chrysostomos <sprout@cpan.org>2010-09-25 00:46:59 -0700
commitb1c167a3f17cc65c27981e99ce05526cb080220d (patch)
treedcd0047baa27e6e9063db293906b6079ac9aec41
parenta79b922baa417139b1a0a4393e181b72d5ebc030 (diff)
downloadperl-b1c167a3f17cc65c27981e99ce05526cb080220d.tar.gz
charnames.pm: Small performance enhancements
mktables is changed to output 5 digit code points, which means that charnames doesn't have to go looking for the boundaries, which gives a slight performance enhancement.
-rw-r--r--lib/charnames.pm30
-rw-r--r--lib/unicore/mktables38
2 files changed, 36 insertions, 32 deletions
diff --git a/lib/charnames.pm b/lib/charnames.pm
index 412357829c..29eb8e8869 100644
--- a/lib/charnames.pm
+++ b/lib/charnames.pm
@@ -464,7 +464,7 @@ sub alias (@) # Set up a single alias
$^H{charnames_ord_aliases}{$name} = $value;
# Use a canonical form.
- $^H{charnames_inverse_ords}{sprintf("%04X", $value)} = $name;
+ $^H{charnames_inverse_ords}{sprintf("%05X", $value)} = $name;
}
else {
# XXX validate syntax when deprecation cycle complete. ie. start
@@ -578,7 +578,7 @@ sub lookup_name ($;$) {
## Suck in the code/name list as a big string.
## Lines look like:
- ## "0052\t\tLATIN CAPITAL LETTER R\n"
+ ## "00052\t\tLATIN CAPITAL LETTER R\n"
$txt = do "unicore/Name.pl" unless $txt;
## @off will hold the index into the code/name string of the start and
@@ -639,24 +639,10 @@ sub lookup_name ($;$) {
}
if (! defined $ord) {
- ##
- ## Now know where in the string the name starts.
- ## The code, in hex, is before that.
- ##
- ## The code can be 4-6 characters long, so we've got to sort of
- ## go look for it, just after the newline that comes before $off[0].
- ##
- ## This would be much easier if unicore/Name.pl had info in
- ## a name/code order, instead of code/name order.
- ##
- ## The +1 after the rindex() is to skip past the newline we're finding,
- ## or, if the rindex() fails, to put us to an offset of zero.
- ##
- my $hexstart = rindex($txt, "\n", $off[0]) + 1;
-
- ## we know where it starts, so turn into number -
- ## the ordinal for the char.
- $ord = CORE::hex substr($txt, $hexstart, $off[0] - 2 - $hexstart);
+
+ # Now know where in the string the name starts.
+ # The code, 5 hex digits long (and 2 tabs) is before that.
+ $ord = CORE::hex substr($txt, $off[0] - 7, 5);
}
# Cache the input so as to not have to search the large table
@@ -792,10 +778,10 @@ sub viacode {
# Must check if decimal first; see comments at that definition
my $hex;
if ($arg =~ $decimal_qr) {
- $hex = sprintf "%04X", $arg;
+ $hex = sprintf "%05X", $arg;
} elsif ($arg =~ $hex_qr) {
# Below is the line that differs from the _getcode() source
- $hex = sprintf "%04X", hex $1;
+ $hex = sprintf "%05X", hex $1;
} else {
carp("unexpected arg \"$arg\" to charnames::viacode()");
return;
diff --git a/lib/unicore/mktables b/lib/unicore/mktables
index b88483c8b3..b959bd3e60 100644
--- a/lib/unicore/mktables
+++ b/lib/unicore/mktables
@@ -4675,16 +4675,23 @@ sub trace { return main::trace(@_); }
# If has or wants a single point range output
if ($start == $end || $range_size_1) {
- for my $i ($start .. $end) {
- push @OUT, sprintf "%04X\t\t%s\n", $i, $value;
- if ($output_names) {
- if (! defined $viacode[$i]) {
- $viacode[$i] =
- Property::property_ref('Perl_Charnames')
- ->value_of($i)
- || "";
+ if (ref $range_size_1 eq 'CODE') {
+ for my $i ($start .. $end) {
+ push @OUT, &$range_size_1($i, $value);
+ }
+ }
+ else {
+ for my $i ($start .. $end) {
+ push @OUT, sprintf "%04X\t\t%s\n", $i, $value;
+ if ($output_names) {
+ if (! defined $viacode[$i]) {
+ $viacode[$i] =
+ Property::property_ref('Perl_Charnames')
+ ->value_of($i)
+ || "";
+ }
+ $OUT[-1] =~ s/\n/\t# $viacode[$i]\n/;
}
- $OUT[-1] =~ s/\n/\t# $viacode[$i]\n/;
}
}
}
@@ -8536,6 +8543,17 @@ END
return @return;
}
+sub output_perl_charnames_line ($$) {
+
+ # Output the entries in Perl_charnames specially, using 5 digits instead
+ # of four. This makes the entries a constant length, and simplifies
+ # charnames.pm which this table is for. Unicode can have 6 digit
+ # ordinals, but they are all private use or noncharacters which do not
+ # have names, so won't be in this table.
+
+ return sprintf "%05X\t\t%s\n", $_[0], $_[1];
+}
+
{ # Closure
# This is used to store the range list of all the code points usable when
# the little used $compare_versions feature is enabled.
@@ -9120,7 +9138,7 @@ END
File => 'Name',
Internal_Only_Warning => 1,
Perl_Extension => 1,
- Range_Size_1 => 1,
+ Range_Size_1 => \&output_perl_charnames_line,
Type => $STRING,
);