diff options
author | Karl Williamson <khw@cpan.org> | 2019-02-01 11:22:15 -0700 |
---|---|---|
committer | Karl Williamson <khw@cpan.org> | 2019-02-05 11:44:29 -0700 |
commit | a2aeff5051e0dae6b97396d6a080fdb867a91ea4 (patch) | |
tree | 928d9fcf7e449e6e8b993f9b21ab86e75074a782 /regen | |
parent | e061e59351ce8074d899ffcfe7c4b029c3322928 (diff) | |
download | perl-a2aeff5051e0dae6b97396d6a080fdb867a91ea4.tar.gz |
regen/mk_invlists.pl: Create new inversion list
This will be used in a future commit.
Diffstat (limited to 'regen')
-rw-r--r-- | regen/mk_invlists.pl | 30 |
1 files changed, 30 insertions, 0 deletions
diff --git a/regen/mk_invlists.pl b/regen/mk_invlists.pl index dd6a0321e0..55c4afb279 100644 --- a/regen/mk_invlists.pl +++ b/regen/mk_invlists.pl @@ -1092,6 +1092,35 @@ sub UpperLatin1 { return \@return; } +sub _Perl_CCC_non0_non230 { + + # Create an inversion list of code points with non-zero canonical + # combining class that also don't have 230 as the class number. This is + # part of a Unicode Standard rule + + my @nonzeros = prop_invlist("ccc=0"); + shift @nonzeros; # Invert so is "ccc != 0" + + my @return; + + # Expand into list of code points, while excluding those with ccc == 230 + for (my $i = 0; $i < @nonzeros; $i += 2) { + my $upper = ($i + 1) < @nonzeros + ? $nonzeros[$i+1] - 1 # In range + : $Unicode::UCD::MAX_CP; # To infinity. + for my $j ($nonzeros[$i] .. $upper) { + my @ccc_names = prop_value_aliases("ccc", charprop($j, "ccc")); + + # Final element in @ccc_names will be all numeric + push @return, $j if $ccc_names[-1] != 230; + } + } + + @return = sort { $a <=> $b } @return; + @return = mk_invlist_from_sorted_cp_list(\@return); + return \@return; +} + sub output_table_common { # Common subroutine to actually output the generated rules table. @@ -2319,6 +2348,7 @@ push @props, sort { prop_name_for_cmp($a) cmp prop_name_for_cmp($b) } qw( Simple_Case_Folding Case_Folding &_Perl_IVCF + &_Perl_CCC_non0_non230 ); # NOTE that the convention is that extra enum values come # after the property name, separated by commas, with the enums |