diff options
author | Karl Williamson <khw@cpan.org> | 2017-12-03 11:09:17 -0700 |
---|---|---|
committer | Karl Williamson <khw@cpan.org> | 2017-12-03 20:03:04 -0700 |
commit | 22a1eafe5214cfc1f77e492e7842c1d6ff47e739 (patch) | |
tree | 390e3855ad0c3752b7ad9342b6dd0028059816e2 /lib/unicore | |
parent | 2ce94a867b15d96bd49eb8807d39df950f3a1087 (diff) | |
download | perl-22a1eafe5214cfc1f77e492e7842c1d6ff47e739.tar.gz |
perluniprops/mktables: Add Definition concept
This specifies what code points a table matches. This step takes the
Description field from various properties and extracts the code points
matched portion into the Definition, which just changes the generated
perluniprops.pod slightly, in the ordering of how the information about
a given property appears.
Diffstat (limited to 'lib/unicore')
-rw-r--r-- | lib/unicore/mktables | 53 |
1 files changed, 34 insertions, 19 deletions
diff --git a/lib/unicore/mktables b/lib/unicore/mktables index 55da8fff94..13732d3496 100644 --- a/lib/unicore/mktables +++ b/lib/unicore/mktables @@ -7810,6 +7810,10 @@ use parent '-norequire', '_Base_Table'; # version. But manual intervention to decide what the actual behavior # should be may be required should this happen. The introductory comments # have more to say about this. +# +# 4) Definition. This is a string for human consumption that specifies the +# code points that this table matches. This is used only for the generated +# pod file. sub standardize { return main::standardize($_[0]); } sub trace { return main::trace(@_); } @@ -7854,6 +7858,10 @@ sub trace { return main::trace(@_); } # none. main::set_access('complement', \%complement, 'r'); + my %definition; + # Human readable string of the code points matched by this table + main::set_access('definition', \%definition, 'r', 's'); + sub new { my $class = shift; @@ -7870,6 +7878,7 @@ sub trace { return main::trace(@_); } my $initialize = delete $args{'Initialize'}; my $matches_all = delete $args{'Matches_All'} || 0; my $format = delete $args{'Format'}; + my $definition = delete $args{'Definition'} // ""; # Rest of parameters passed on. my $range_list = Range_List->new(Initialize => $initialize, @@ -7904,6 +7913,7 @@ sub trace { return main::trace(@_); } $leader{$addr} = $self; $parent{$addr} = $self; $complement{$addr} = 0; + $definition{$addr} = $definition; if (defined $format && $format ne $EMPTY_FORMAT) { Carp::my_carp_bug("'Format' must be '$EMPTY_FORMAT' in a match table instead of '$format'. Using '$EMPTY_FORMAT'"); @@ -9381,6 +9391,7 @@ sub trace { return main::trace(@_) if main::DEBUG && $to_trace } containing_range count default_map + definition delete_range description each_range @@ -14221,8 +14232,8 @@ sub compile_perl() { } my $Any = $perl->add_match_table('Any', - Description => "All Unicode code points: [\\x{0000}-\\x{$MAX_UNICODE_CODEPOINT_STRING}]", - ); + Description => "All Unicode code points", + Definition => "[\\x{0000}-\\x{$MAX_UNICODE_CODEPOINT_STRING}]"); $Any->add_range(0, $MAX_UNICODE_CODEPOINT); $Any->add_alias('Unicode'); @@ -14235,7 +14246,7 @@ sub compile_perl() { ->set_equivalent_to(property_ref('ccc')->table('Above'), Related => 1); - my $ASCII = $perl->add_match_table('ASCII', Description => '[[:ASCII:]]'); + my $ASCII = $perl->add_match_table('ASCII'); if (defined $block) { # This is equivalent to the block if have it. my $Unicode_ASCII = $block->table('Basic_Latin'); if (defined $Unicode_ASCII && ! $Unicode_ASCII->is_empty) { @@ -14295,7 +14306,7 @@ sub compile_perl() { $Lower += $temp & $Assigned; } my $Posix_Lower = $perl->add_match_table("PosixLower", - Description => "[a-z]", + Definition => "[a-z]", Initialize => $Lower & $ASCII, ); @@ -14313,7 +14324,7 @@ sub compile_perl() { $Upper->add_range(0x24B6, 0x24CF); # Circled Latin upper case letters } my $Posix_Upper = $perl->add_match_table("PosixUpper", - Description => "[A-Z]", + Definition => "[A-Z]", Initialize => $Upper & $ASCII, ); @@ -14521,7 +14532,7 @@ sub compile_perl() { $Alpha->add_alias('Alphabetic'); } my $Posix_Alpha = $perl->add_match_table("PosixAlpha", - Description => "[A-Za-z]", + Definition => "[A-Za-z]", Initialize => $Alpha & $ASCII, ); $Posix_Upper->set_caseless_equivalent($Posix_Alpha); @@ -14532,7 +14543,7 @@ sub compile_perl() { Initialize => $Alpha + $gc->table('Decimal_Number'), ); $perl->add_match_table("PosixAlnum", - Description => "[A-Za-z0-9]", + Definition => "[A-Za-z0-9]", Initialize => $Alnum & $ASCII, ); @@ -14559,7 +14570,8 @@ sub compile_perl() { # This is a Perl extension, so the name doesn't begin with Posix. my $PerlWord = $perl->add_match_table('PosixWord', - Description => '\w, restricted to ASCII = [A-Za-z0-9_]', + Description => '\w, restricted to ASCII', + Definition => '[A-Za-z0-9_]', Initialize => $Word & $ASCII, ); $PerlWord->add_alias('PerlWord'); @@ -14576,7 +14588,7 @@ sub compile_perl() { ); $Blank->add_alias('HorizSpace'); # Another name for it. $perl->add_match_table("PosixBlank", - Description => "\\t and ' '", + Definition => "\\t and ' '", Initialize => $Blank & $ASCII, ); @@ -14602,7 +14614,7 @@ sub compile_perl() { $Space->add_alias('Space') if $v_version lt v4.1.0; my $Posix_space = $perl->add_match_table("PosixSpace", - Description => "\\t, \\n, \\cK, \\f, \\r, and ' '. (\\cK is vertical tab)", + Definition => "\\t, \\n, \\cK, \\f, \\r, and ' '. (\\cK is vertical tab)", Initialize => $Space & $ASCII, ); $Posix_space->add_alias('PerlSpace'); # A pre-existing synonym @@ -14611,8 +14623,8 @@ sub compile_perl() { Description => 'Control characters'); $Cntrl->set_equivalent_to($gc->table('Cc'), Related => 1); $perl->add_match_table("PosixCntrl", - Description => "ASCII control characters " - . "ACK, BEL, BS, CAN, CR, DC1, DC2," + Description => "ASCII control characters", + Definition => "ACK, BEL, BS, CAN, CR, DC1, DC2," . " DC3, DC4, DEL, DLE, ENQ, EOM," . " EOT, ESC, ETB, ETX, FF, FS, GS," . " HT, LF, NAK, NUL, RS, SI, SO," @@ -14640,7 +14652,7 @@ sub compile_perl() { Initialize => ~ ($Space + $controls), ); $perl->add_match_table("PosixGraph", - Description => + Definition => '[-!"#$%&\'()*+,./:;<=>?@[\\\]^_`{|}~0-9A-Za-z]', Initialize => $Graph & $ASCII, ); @@ -14650,7 +14662,7 @@ sub compile_perl() { Initialize => $Blank + $Graph - $gc->table('Control'), ); $perl->add_match_table("PosixPrint", - Description => + Definition => '[- 0-9A-Za-z!"#$%&\'()*+,./:;<=>?@[\\\]^_`{|}~]', Initialize => $print & $ASCII, ); @@ -14666,7 +14678,7 @@ sub compile_perl() { Perl_Extension => 1 ); $perl->add_match_table('PosixPunct', Perl_Extension => 1, - Description => '[-!"#$%&\'()*+,./:;<=>?@[\\\]^_`{|}~]', + Definition => '[-!"#$%&\'()*+,./:;<=>?@[\\\]^_`{|}~]', Initialize => $ASCII & $XPosixPunct, ); @@ -14674,7 +14686,7 @@ sub compile_perl() { Description => '[0-9] + all other decimal digits'); $Digit->set_equivalent_to($gc->table('Decimal_Number'), Related => 1); my $PosixDigit = $perl->add_match_table("PosixDigit", - Description => '[0-9]', + Definition => '[0-9]', Initialize => $Digit & $ASCII, ); @@ -14689,7 +14701,7 @@ sub compile_perl() { ord('A') .. ord('F'), ord('a') .. ord('f'), 0xFF10..0xFF19, 0xFF21..0xFF26, 0xFF41..0xFF46]); - $Xdigit->add_description('[0-9A-Fa-f] and corresponding fullwidth versions, like U+FF10: FULLWIDTH DIGIT ZERO'); + $Xdigit->set_definition('[0-9A-Fa-f] and corresponding fullwidth versions, like U+FF10: FULLWIDTH DIGIT ZERO'); } # AHex was not present in early releases @@ -14703,7 +14715,7 @@ sub compile_perl() { $PosixXDigit->add_alias('AHex'); $PosixXDigit->add_alias('Ascii_Hex_Digit'); } - $PosixXDigit->add_description('[0-9A-Fa-f]'); + $PosixXDigit->set_definition('[0-9A-Fa-f]'); my $any_folds = $perl->add_match_table("_Perl_Any_Folds", Description => "Code points that particpate in some fold", @@ -16382,7 +16394,10 @@ sub make_re_pod_entries($) { if ($table_property != $perl && $table->perl_extension) { push @info, '(Perl extension)'; } - push @info, "($string_count)"; + my $definition = $table->definition // ""; + $definition = "" if $entry_for_first_alias; + $definition = ": $definition" if $definition; + push @info, "($string_count$definition)"; # Now, we have both the entry and info so add them to the # list of all the properties. |