summaryrefslogtreecommitdiff
path: root/regen
diff options
context:
space:
mode:
authorKarl Williamson <khw@cpan.org>2019-02-01 11:22:15 -0700
committerKarl Williamson <khw@cpan.org>2019-02-05 11:44:29 -0700
commita2aeff5051e0dae6b97396d6a080fdb867a91ea4 (patch)
tree928d9fcf7e449e6e8b993f9b21ab86e75074a782 /regen
parente061e59351ce8074d899ffcfe7c4b029c3322928 (diff)
downloadperl-a2aeff5051e0dae6b97396d6a080fdb867a91ea4.tar.gz
regen/mk_invlists.pl: Create new inversion list
This will be used in a future commit.
Diffstat (limited to 'regen')
-rw-r--r--regen/mk_invlists.pl30
1 files changed, 30 insertions, 0 deletions
diff --git a/regen/mk_invlists.pl b/regen/mk_invlists.pl
index dd6a0321e0..55c4afb279 100644
--- a/regen/mk_invlists.pl
+++ b/regen/mk_invlists.pl
@@ -1092,6 +1092,35 @@ sub UpperLatin1 {
return \@return;
}
+sub _Perl_CCC_non0_non230 {
+
+ # Create an inversion list of code points with non-zero canonical
+ # combining class that also don't have 230 as the class number. This is
+ # part of a Unicode Standard rule
+
+ my @nonzeros = prop_invlist("ccc=0");
+ shift @nonzeros; # Invert so is "ccc != 0"
+
+ my @return;
+
+ # Expand into list of code points, while excluding those with ccc == 230
+ for (my $i = 0; $i < @nonzeros; $i += 2) {
+ my $upper = ($i + 1) < @nonzeros
+ ? $nonzeros[$i+1] - 1 # In range
+ : $Unicode::UCD::MAX_CP; # To infinity.
+ for my $j ($nonzeros[$i] .. $upper) {
+ my @ccc_names = prop_value_aliases("ccc", charprop($j, "ccc"));
+
+ # Final element in @ccc_names will be all numeric
+ push @return, $j if $ccc_names[-1] != 230;
+ }
+ }
+
+ @return = sort { $a <=> $b } @return;
+ @return = mk_invlist_from_sorted_cp_list(\@return);
+ return \@return;
+}
+
sub output_table_common {
# Common subroutine to actually output the generated rules table.
@@ -2319,6 +2348,7 @@ push @props, sort { prop_name_for_cmp($a) cmp prop_name_for_cmp($b) } qw(
Simple_Case_Folding
Case_Folding
&_Perl_IVCF
+ &_Perl_CCC_non0_non230
);
# NOTE that the convention is that extra enum values come
# after the property name, separated by commas, with the enums