summaryrefslogtreecommitdiff
path: root/charclass_invlists.h
diff options
context:
space:
mode:
authorKarl Williamson <khw@cpan.org>2019-11-24 21:21:39 -0700
committerKarl Williamson <khw@cpan.org>2019-11-26 19:38:15 -0700
commita405530a0bdfb812498536bb46a01f9dcbe8b5d7 (patch)
tree9927edf134a05bee7b8c9f3b208d7e2f48dc6ddd /charclass_invlists.h
parenta8def808210e08cea0b7889fea7c5146b21af4ed (diff)
downloadperl-a405530a0bdfb812498536bb46a01f9dcbe8b5d7.tar.gz
Move data for PL_InBitmap to charclass_invlists.h
This makes it consistent with the other inversion lists for this sort of thing, and finishes the fix for GH #17154
Diffstat (limited to 'charclass_invlists.h')
-rw-r--r--charclass_invlists.h27
1 files changed, 26 insertions, 1 deletions
diff --git a/charclass_invlists.h b/charclass_invlists.h
index fbad48d3ce..87cd5938fc 100644
--- a/charclass_invlists.h
+++ b/charclass_invlists.h
@@ -6,6 +6,22 @@
/* See the generating file for comments */
+/* This gives the number of code points that can be in the bitmap of an ANYOF
+ * node. The shift number must currently be one of: 8..12. It can't be less
+ * than 8 (256) because some code relies on it being at least that. Above 12
+ * (4096), and you start running into warnings that some data structure widths
+ * have been exceeded, though the test suite as of this writing still passes
+ * for up through 16, which is as high as anyone would ever want to go,
+ * encompassing all of the Unicode BMP, and thus including all the economically
+ * important world scripts. At 12 most of them are: including Arabic,
+ * Cyrillic, Greek, Hebrew, Indian subcontinent, Latin, and Thai; but not Han,
+ * Japanese, nor Korean. (The regarglen structure in regnodes.h is a U8, and
+ * the trie types TRIEC and AHOCORASICKC are larger than U8 for shift values
+ * above 12.) Be sure to benchmark before changing, as larger sizes do
+ * significantly slow down the test suite */
+
+#define NUM_ANYOF_CODE_POINTS (1 << 8)
+
#if (defined(PERL_IN_REGCOMP_C) && ! defined(PERL_IN_XSUB_RE))
@@ -29368,6 +29384,15 @@ static const GCB_enum _Perl_GCB_invmap[] = { /* for EBCDIC 037 */
#if (defined(PERL_IN_REGCOMP_C) && ! defined(PERL_IN_XSUB_RE))
+static const UV _Perl_InBitmap_invlist[] = { /* for all charsets */
+ 2, /* Number of elements */
+ 148565664, /* Version and data structure type */
+ 0, /* 0 if the list starts at 0;
+ 1 if it starts at the element beyond 0 */
+ 0x0,
+ 0x100
+};
+
# if 'A' == 65 /* ASCII/Latin1 */
static const UV _Perl_IVCF_invlist[] = { /* for ASCII/Latin1 */
@@ -395153,5 +395178,5 @@ static const U8 WB_table[23][23] = {
* a712c758275b460d18fa77a26ed3589689bb3f69dcc1ea99b913e32db92a5cd2 lib/unicore/version
* 2680b9254eb236c5c090f11b149605043e8c8433661b96efc4a42fb4709342a5 regen/charset_translations.pl
* e9283c761c5a95e3379384ca47c13a284f08d743c2be6e5091f1152b1b6b7a37 regen/mk_PL_charclass.pl
- * 74442760b048f85cf5e9e87c3baffc94e861ba397dda0d33f4c22b40ef7efbe6 regen/mk_invlists.pl
+ * 65a08c53c038137b3293a52b3e73bce979d9859f237dea6d48ff7e1fb9efb4ef regen/mk_invlists.pl
* ex: set ro: */