summaryrefslogtreecommitdiff
path: root/regen/mk_invlists.pl
diff options
context:
space:
mode:
authorKarl Williamson <khw@cpan.org>2019-11-24 21:21:39 -0700
committerKarl Williamson <khw@cpan.org>2019-11-26 19:38:15 -0700
commita405530a0bdfb812498536bb46a01f9dcbe8b5d7 (patch)
tree9927edf134a05bee7b8c9f3b208d7e2f48dc6ddd /regen/mk_invlists.pl
parenta8def808210e08cea0b7889fea7c5146b21af4ed (diff)
downloadperl-a405530a0bdfb812498536bb46a01f9dcbe8b5d7.tar.gz
Move data for PL_InBitmap to charclass_invlists.h
This makes it consistent with the other inversion lists for this sort of thing, and finishes the fix for GH #17154
Diffstat (limited to 'regen/mk_invlists.pl')
-rw-r--r--regen/mk_invlists.pl31
1 files changed, 31 insertions, 0 deletions
diff --git a/regen/mk_invlists.pl b/regen/mk_invlists.pl
index 8136f437e7..e14b5c30cb 100644
--- a/regen/mk_invlists.pl
+++ b/regen/mk_invlists.pl
@@ -61,6 +61,29 @@ my $max_hdr_len = 3; # In headings, how wide a name is allowed?
print $out_fh "/* See the generating file for comments */\n\n";
+print $out_fh <<'EOF';
+/* This gives the number of code points that can be in the bitmap of an ANYOF
+ * node. The shift number must currently be one of: 8..12. It can't be less
+ * than 8 (256) because some code relies on it being at least that. Above 12
+ * (4096), and you start running into warnings that some data structure widths
+ * have been exceeded, though the test suite as of this writing still passes
+ * for up through 16, which is as high as anyone would ever want to go,
+ * encompassing all of the Unicode BMP, and thus including all the economically
+ * important world scripts. At 12 most of them are: including Arabic,
+ * Cyrillic, Greek, Hebrew, Indian subcontinent, Latin, and Thai; but not Han,
+ * Japanese, nor Korean. (The regarglen structure in regnodes.h is a U8, and
+ * the trie types TRIEC and AHOCORASICKC are larger than U8 for shift values
+ * above 12.) Be sure to benchmark before changing, as larger sizes do
+ * significantly slow down the test suite */
+
+EOF
+
+my $num_anyof_code_points = '(1 << 8)';
+
+print $out_fh "#define NUM_ANYOF_CODE_POINTS $num_anyof_code_points\n\n";
+
+$num_anyof_code_points = eval $num_anyof_code_points;
+
# enums that should be made public
my %public_enums = (
_Perl_SCX => 1
@@ -1128,6 +1151,13 @@ sub _Perl_CCC_non0_non230 {
return \@return;
}
+sub _Perl_InBitmap {
+ my @return;
+ push @return, $_ for 0 .. $num_anyof_code_points - 1;
+ @return = mk_invlist_from_sorted_cp_list(\@return);
+ return \@return;
+}
+
sub output_table_common {
# Common subroutine to actually output the generated rules table.
@@ -2356,6 +2386,7 @@ push @props, sort { prop_name_for_cmp($a) cmp prop_name_for_cmp($b) } qw(
Case_Folding
&_Perl_IVCF
&_Perl_CCC_non0_non230
+ &_Perl_InBitmap
);
# NOTE that the convention is that extra enum values come
# after the property name, separated by commas, with the enums