diff options
author | Karl Williamson <khw@cpan.org> | 2015-08-18 14:47:48 -0600 |
---|---|---|
committer | Karl Williamson <khw@cpan.org> | 2015-08-18 15:03:35 -0600 |
commit | 4b06b8deeb339269bad2112a4e6603e43f7bee36 (patch) | |
tree | 5cc8d08bafec32e3a02dfa9914356d7e7809e942 | |
parent | f3106bc89eb4bbffee5ca7cb67bd63d2f3ce87bf (diff) | |
download | perl-4b06b8deeb339269bad2112a4e6603e43f7bee36.tar.gz |
mktables: Add code point ages to --annotate option
This can be useful information.
-rw-r--r-- | charclass_invlists.h | 2 | ||||
-rw-r--r-- | lib/unicore/mktables | 31 | ||||
-rw-r--r-- | regcharclass.h | 2 |
3 files changed, 30 insertions, 5 deletions
diff --git a/charclass_invlists.h b/charclass_invlists.h index 64406bfe78..8f14d3167b 100644 --- a/charclass_invlists.h +++ b/charclass_invlists.h @@ -99537,7 +99537,7 @@ static const UV XPosixXDigit_invlist[] = { /* for EBCDIC POSIX-BC */ * 1a0687fb9c6c4567e853913549df0944fe40821279a3e9cdaa6ab8679bc286fd lib/unicore/extracted/DLineBreak.txt * 40bcfed3ca727c19e1331f6c33806231d5f7eeeabd2e6a9e06a3740c85d0c250 lib/unicore/extracted/DNumType.txt * a18d502bad39d527ac5586d7bc93e29f565859e3bcc24ada627eff606d6f5fed lib/unicore/extracted/DNumValues.txt - * 46f739fb5c9daf6fb457ed67f821d88d9eadd2df17b098f385b3b50f99c01acf lib/unicore/mktables + * ad739a46951b5f46396074b0682a2cfeed24b633a742a8e1aa0e337f69ef8b1c lib/unicore/mktables * 462c9aaa608fb2014cd9649af1c5c009485c60b9c8b15b89401fdc10cf6161c6 lib/unicore/version * c6884f4d629f04d1316f3476cb1050b6a1b98ca30c903262955d4eae337c6b1e regen/charset_translations.pl * 8a097f8f726bb1619af2f27f149ab87e60a1602f790147e3a561358be16abd27 regen/mk_invlists.pl diff --git a/lib/unicore/mktables b/lib/unicore/mktables index 27cb45ab9d..81539367a2 100644 --- a/lib/unicore/mktables +++ b/lib/unicore/mktables @@ -1441,6 +1441,7 @@ sub objaddr($) { # after all the input has been processed. But most can be skipped, as they # have the same descriptive phrases, such as being unassigned my @viacode; # Contains the 1 million character names +my @age; # And their ages ("" if none) my @printable; # boolean: And are those characters printable? my @annotate_char_type; # Contains a type of those characters, specifically # for the purposes of annotation. @@ -1499,12 +1500,14 @@ sub populate_char_info ($) { $annotate_char_type[$i] = $ABOVE_UNICODE_TYPE; $printable[$i] = 0; $end = $MAX_WORKING_CODEPOINT; + $age[$i] = ""; } elsif ($gc-> table('Private_use')->contains($i)) { $viacode[$i] = 'Private Use'; $annotate_char_type[$i] = $PRIVATE_USE_TYPE; $printable[$i] = 0; $end = $gc->table('Private_Use')->containing_range($i)->end; + $age[$i] = property_ref("Age")->value_of($i); } elsif ((defined ($nonchar = Property::property_ref('Noncharacter_Code_Point')) @@ -1515,11 +1518,13 @@ sub populate_char_info ($) { $printable[$i] = 0; $end = property_ref('Noncharacter_Code_Point')->table('Y')-> containing_range($i)->end; + $age[$i] = property_ref("Age")->value_of($i); } elsif ($gc-> table('Control')->contains($i)) { $viacode[$i] = property_ref('Name_Alias')->value_of($i) || 'Control'; $annotate_char_type[$i] = $CONTROL_TYPE; $printable[$i] = 0; + $age[$i] = property_ref("Age")->value_of($i); } elsif ($gc-> table('Unassigned')->contains($i)) { $annotate_char_type[$i] = $UNASSIGNED_TYPE; @@ -1540,12 +1545,14 @@ sub populate_char_info ($) { $unassigned_sans_noncharacters-> containing_range($i)->end); } + $age[$i] = property_ref("Age")->value_of($i); } elsif ($perl->table('_Perl_Surrogate')->contains($i)) { $viacode[$i] = 'Surrogate'; $annotate_char_type[$i] = $SURROGATE_TYPE; $printable[$i] = 0; $end = $gc->table('Surrogate')->containing_range($i)->end; + $age[$i] = property_ref("Age")->value_of($i); } else { Carp::my_carp_bug("Can't figure out how to annotate " @@ -1561,7 +1568,18 @@ sub populate_char_info ($) { # appended to the name, do that. elsif ($annotate_char_type[$i] == $CP_IN_NAME) { $viacode[$i] .= sprintf("-%04X", $i); - $end = $perl_charname->containing_range($i)->end; + + # Do all these as groups of the same age, instead of individually, + # because their names are so meaningless, and there are typically + # large quantities of them. + my $Age = property_ref("Age"); + $age[$i] = $Age->value_of($i); + my $limit = $perl_charname->containing_range($i)->end; + $end = $i + 1; + while ($end <= $limit && $Age->value_of($end) == $age[$i]) { + $end++; + } + $end--; } # And here, has a name, but if it's a hangul syllable one, replace it with @@ -1574,8 +1592,12 @@ sub populate_char_info ($) { my $T = $TBase + $SIndex % $TCount; $viacode[$i] = "HANGUL SYLLABLE $Jamo{$L}$Jamo{$V}"; $viacode[$i] .= $Jamo{$T} if $T != $TBase; + $age[$i] = property_ref("Age")->value_of($i); $end = $perl_charname->containing_range($i)->end; } + else { + $age[$i] = property_ref("Age")->value_of($i); + } return if ! defined wantarray; return $i if ! defined $end; # If not a range, return the input @@ -6341,7 +6363,10 @@ END else { # Indent if not displaying code points $annotation = " " x 4; } - $annotation .= " $range_name" if $range_name; + if ($range_name) { + $annotation .= " $age[$i]" if $age[$i]; + $annotation .= " $range_name"; + } # Include the number of code points in the # range @@ -6418,7 +6443,7 @@ END } if ($include_cp) { - $annotation = sprintf "%04X", $i; + $annotation = sprintf "%04X %s", $i, $age[$i]; if ($use_adjustments) { $annotation .= " => $output_value"; } diff --git a/regcharclass.h b/regcharclass.h index f7cd2fde5b..f4b6298276 100644 --- a/regcharclass.h +++ b/regcharclass.h @@ -2514,7 +2514,7 @@ * 1a0687fb9c6c4567e853913549df0944fe40821279a3e9cdaa6ab8679bc286fd lib/unicore/extracted/DLineBreak.txt * 40bcfed3ca727c19e1331f6c33806231d5f7eeeabd2e6a9e06a3740c85d0c250 lib/unicore/extracted/DNumType.txt * a18d502bad39d527ac5586d7bc93e29f565859e3bcc24ada627eff606d6f5fed lib/unicore/extracted/DNumValues.txt - * 46f739fb5c9daf6fb457ed67f821d88d9eadd2df17b098f385b3b50f99c01acf lib/unicore/mktables + * ad739a46951b5f46396074b0682a2cfeed24b633a742a8e1aa0e337f69ef8b1c lib/unicore/mktables * 462c9aaa608fb2014cd9649af1c5c009485c60b9c8b15b89401fdc10cf6161c6 lib/unicore/version * c6884f4d629f04d1316f3476cb1050b6a1b98ca30c903262955d4eae337c6b1e regen/charset_translations.pl * d9c04ac46bdd81bb3e26519f2b8eb6242cb12337205add3f7cf092b0c58dccc4 regen/regcharclass.pl |