summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--charclass_invlists.h356
-rw-r--r--embedvar.h2
-rw-r--r--perlapi.h2
-rw-r--r--perlvars.h1
-rw-r--r--regcomp.c1
-rw-r--r--regen/mk_invlists.pl30
-rw-r--r--uni_keywords.h2
7 files changed, 392 insertions, 2 deletions
diff --git a/charclass_invlists.h b/charclass_invlists.h
index eb40f5eaf7..b1dd4430e3 100644
--- a/charclass_invlists.h
+++ b/charclass_invlists.h
@@ -18043,6 +18043,360 @@ static const int Lowercase_Mapping_invmap[] = { /* for EBCDIC 037 */
#if (defined(PERL_IN_REGCOMP_C) && ! defined(PERL_IN_XSUB_RE))
+static const UV _Perl_CCC_non0_non230_invlist[] = { /* for all charsets */
+ 347, /* Number of elements */
+ 148565664, /* Version and data structure type */
+ 1, /* 0 if the list starts at 0;
+ 1 if it starts at the element beyond 0 */
+ 0x0,
+ 0x315,
+ 0x33D,
+ 0x345,
+ 0x346,
+ 0x347,
+ 0x34A,
+ 0x34D,
+ 0x34F,
+ 0x353,
+ 0x357,
+ 0x358,
+ 0x35B,
+ 0x35C,
+ 0x363,
+ 0x591,
+ 0x592,
+ 0x596,
+ 0x597,
+ 0x59A,
+ 0x59C,
+ 0x5A2,
+ 0x5A8,
+ 0x5AA,
+ 0x5AB,
+ 0x5AD,
+ 0x5AF,
+ 0x5B0,
+ 0x5BE,
+ 0x5BF,
+ 0x5C0,
+ 0x5C1,
+ 0x5C3,
+ 0x5C5,
+ 0x5C6,
+ 0x5C7,
+ 0x5C8,
+ 0x618,
+ 0x61B,
+ 0x64B,
+ 0x653,
+ 0x655,
+ 0x657,
+ 0x65C,
+ 0x65D,
+ 0x65F,
+ 0x660,
+ 0x670,
+ 0x671,
+ 0x6E3,
+ 0x6E4,
+ 0x6EA,
+ 0x6EB,
+ 0x6ED,
+ 0x6EE,
+ 0x711,
+ 0x712,
+ 0x731,
+ 0x732,
+ 0x734,
+ 0x735,
+ 0x737,
+ 0x73A,
+ 0x73B,
+ 0x73D,
+ 0x73E,
+ 0x73F,
+ 0x742,
+ 0x743,
+ 0x744,
+ 0x745,
+ 0x746,
+ 0x747,
+ 0x748,
+ 0x749,
+ 0x7F2,
+ 0x7F3,
+ 0x7FD,
+ 0x7FE,
+ 0x859,
+ 0x85C,
+ 0x8D3,
+ 0x8D4,
+ 0x8E3,
+ 0x8E4,
+ 0x8E6,
+ 0x8E7,
+ 0x8E9,
+ 0x8EA,
+ 0x8ED,
+ 0x8F3,
+ 0x8F6,
+ 0x8F7,
+ 0x8F9,
+ 0x8FB,
+ 0x93C,
+ 0x93D,
+ 0x94D,
+ 0x94E,
+ 0x952,
+ 0x953,
+ 0x9BC,
+ 0x9BD,
+ 0x9CD,
+ 0x9CE,
+ 0xA3C,
+ 0xA3D,
+ 0xA4D,
+ 0xA4E,
+ 0xABC,
+ 0xABD,
+ 0xACD,
+ 0xACE,
+ 0xB3C,
+ 0xB3D,
+ 0xB4D,
+ 0xB4E,
+ 0xBCD,
+ 0xBCE,
+ 0xC4D,
+ 0xC4E,
+ 0xC55,
+ 0xC57,
+ 0xCBC,
+ 0xCBD,
+ 0xCCD,
+ 0xCCE,
+ 0xD3B,
+ 0xD3D,
+ 0xD4D,
+ 0xD4E,
+ 0xDCA,
+ 0xDCB,
+ 0xE38,
+ 0xE3B,
+ 0xE48,
+ 0xE4C,
+ 0xEB8,
+ 0xEBA,
+ 0xEC8,
+ 0xECC,
+ 0xF18,
+ 0xF1A,
+ 0xF35,
+ 0xF36,
+ 0xF37,
+ 0xF38,
+ 0xF39,
+ 0xF3A,
+ 0xF71,
+ 0xF73,
+ 0xF74,
+ 0xF75,
+ 0xF7A,
+ 0xF7E,
+ 0xF80,
+ 0xF81,
+ 0xF84,
+ 0xF85,
+ 0xFC6,
+ 0xFC7,
+ 0x1037,
+ 0x1038,
+ 0x1039,
+ 0x103B,
+ 0x108D,
+ 0x108E,
+ 0x1714,
+ 0x1715,
+ 0x1734,
+ 0x1735,
+ 0x17D2,
+ 0x17D3,
+ 0x18A9,
+ 0x18AA,
+ 0x1939,
+ 0x193A,
+ 0x193B,
+ 0x193C,
+ 0x1A18,
+ 0x1A19,
+ 0x1A60,
+ 0x1A61,
+ 0x1A7F,
+ 0x1A80,
+ 0x1AB5,
+ 0x1ABB,
+ 0x1ABD,
+ 0x1ABE,
+ 0x1B34,
+ 0x1B35,
+ 0x1B44,
+ 0x1B45,
+ 0x1B6C,
+ 0x1B6D,
+ 0x1BAA,
+ 0x1BAC,
+ 0x1BE6,
+ 0x1BE7,
+ 0x1BF2,
+ 0x1BF4,
+ 0x1C37,
+ 0x1C38,
+ 0x1CD4,
+ 0x1CDA,
+ 0x1CDC,
+ 0x1CE0,
+ 0x1CE2,
+ 0x1CE9,
+ 0x1CED,
+ 0x1CEE,
+ 0x1DC2,
+ 0x1DC3,
+ 0x1DCA,
+ 0x1DCB,
+ 0x1DCD,
+ 0x1DD1,
+ 0x1DF6,
+ 0x1DFA,
+ 0x1DFC,
+ 0x1DFE,
+ 0x1DFF,
+ 0x1E00,
+ 0x20D2,
+ 0x20D4,
+ 0x20D8,
+ 0x20DB,
+ 0x20E5,
+ 0x20E7,
+ 0x20E8,
+ 0x20E9,
+ 0x20EA,
+ 0x20F0,
+ 0x2D7F,
+ 0x2D80,
+ 0x302A,
+ 0x3030,
+ 0x3099,
+ 0x309B,
+ 0xA806,
+ 0xA807,
+ 0xA8C4,
+ 0xA8C5,
+ 0xA92B,
+ 0xA92E,
+ 0xA953,
+ 0xA954,
+ 0xA9B3,
+ 0xA9B4,
+ 0xA9C0,
+ 0xA9C1,
+ 0xAAB4,
+ 0xAAB5,
+ 0xAAF6,
+ 0xAAF7,
+ 0xABED,
+ 0xABEE,
+ 0xFB1E,
+ 0xFB1F,
+ 0xFE27,
+ 0xFE2E,
+ 0x101FD,
+ 0x101FE,
+ 0x102E0,
+ 0x102E1,
+ 0x10A0D,
+ 0x10A0E,
+ 0x10A39,
+ 0x10A3B,
+ 0x10A3F,
+ 0x10A40,
+ 0x10AE6,
+ 0x10AE7,
+ 0x10F46,
+ 0x10F48,
+ 0x10F4B,
+ 0x10F4C,
+ 0x10F4D,
+ 0x10F51,
+ 0x11046,
+ 0x11047,
+ 0x1107F,
+ 0x11080,
+ 0x110B9,
+ 0x110BB,
+ 0x11133,
+ 0x11135,
+ 0x11173,
+ 0x11174,
+ 0x111C0,
+ 0x111C1,
+ 0x111CA,
+ 0x111CB,
+ 0x11235,
+ 0x11237,
+ 0x112E9,
+ 0x112EB,
+ 0x1133B,
+ 0x1133D,
+ 0x1134D,
+ 0x1134E,
+ 0x11442,
+ 0x11443,
+ 0x11446,
+ 0x11447,
+ 0x114C2,
+ 0x114C4,
+ 0x115BF,
+ 0x115C1,
+ 0x1163F,
+ 0x11640,
+ 0x116B6,
+ 0x116B8,
+ 0x1172B,
+ 0x1172C,
+ 0x11839,
+ 0x1183B,
+ 0x11A34,
+ 0x11A35,
+ 0x11A47,
+ 0x11A48,
+ 0x11A99,
+ 0x11A9A,
+ 0x11C3F,
+ 0x11C40,
+ 0x11D42,
+ 0x11D43,
+ 0x11D44,
+ 0x11D46,
+ 0x11D97,
+ 0x11D98,
+ 0x16AF0,
+ 0x16AF5,
+ 0x1BC9E,
+ 0x1BC9F,
+ 0x1D165,
+ 0x1D16A,
+ 0x1D16D,
+ 0x1D173,
+ 0x1D17B,
+ 0x1D183,
+ 0x1D18A,
+ 0x1D18C,
+ 0x1E8D0,
+ 0x1E8D7,
+ 0x1E94A,
+ 0x1E94B
+};
+
# if 'A' == 65 /* ASCII/Latin1 */
static const UV _Perl_GCB_invlist[] = { /* for ASCII/Latin1 */
@@ -383696,5 +384050,5 @@ static const U8 WB_table[23][23] = {
* 7bd6bcbe3813e0cd55e0998053d182b7bc8c97dcfd0b85028e9f7f55af4ad61b lib/unicore/version
* 4bb677187a1a64e39d48f2e341b5ecb6c99857e49d7a79cf503bd8a3c709999b regen/charset_translations.pl
* 03e51b0f07beebd5da62ab943899aa4934eee1f792fa27c1fb638c33bf4ac6ea regen/mk_PL_charclass.pl
- * 1fdcc4c0ed94008c13daeb934b40cbd9f5b2871201dce7a9f0530be4145ea026 regen/mk_invlists.pl
+ * 8ae37f2b5bbc7d215f63e8d1189754d83a16c6156fd353847f6fcced90c513d5 regen/mk_invlists.pl
* ex: set ro: */
diff --git a/embedvar.h b/embedvar.h
index 539fc5a32e..ad7aae8306 100644
--- a/embedvar.h
+++ b/embedvar.h
@@ -351,6 +351,8 @@
#define PL_GAboveLatin1 (my_vars->GAboveLatin1)
#define PL_Assigned_invlist (my_vars->GAssigned_invlist)
#define PL_GAssigned_invlist (my_vars->GAssigned_invlist)
+#define PL_CCC_non0_non230 (my_vars->GCCC_non0_non230)
+#define PL_GCCC_non0_non230 (my_vars->GCCC_non0_non230)
#define PL_C_locale_obj (my_vars->GC_locale_obj)
#define PL_GC_locale_obj (my_vars->GC_locale_obj)
#define PL_GCB_invlist (my_vars->GGCB_invlist)
diff --git a/perlapi.h b/perlapi.h
index cb77694651..de4267aa75 100644
--- a/perlapi.h
+++ b/perlapi.h
@@ -103,6 +103,8 @@ END_EXTERN_C
#define PL_AboveLatin1 (*Perl_GAboveLatin1_ptr(NULL))
#undef PL_Assigned_invlist
#define PL_Assigned_invlist (*Perl_GAssigned_invlist_ptr(NULL))
+#undef PL_CCC_non0_non230
+#define PL_CCC_non0_non230 (*Perl_GCCC_non0_non230_ptr(NULL))
#undef PL_C_locale_obj
#define PL_C_locale_obj (*Perl_GC_locale_obj_ptr(NULL))
#undef PL_GCB_invlist
diff --git a/perlvars.h b/perlvars.h
index e8064eb320..8a4ff6a47b 100644
--- a/perlvars.h
+++ b/perlvars.h
@@ -305,6 +305,7 @@ PERLVAR(G, utf8_charname_begin, SV *)
PERLVAR(G, utf8_charname_continue, SV *)
PERLVAR(G, utf8_mark, SV *)
PERLVAR(G, InBitmap, SV *)
+PERLVAR(G, CCC_non0_non230, SV *)
/* Everything that folds to a given character, for case insensitivity regex
* matching */
diff --git a/regcomp.c b/regcomp.c
index 66cc6e0268..4fa1224769 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -21947,6 +21947,7 @@ Perl_init_uniprops(pTHX)
PL_utf8_tosimplefold = _new_invlist_C_array(Simple_Case_Folding_invlist);
PL_utf8_foldclosures = _new_invlist_C_array(_Perl_IVCF_invlist);
PL_utf8_mark = _new_invlist_C_array(uni_prop_ptrs[UNI_M]);
+ PL_CCC_non0_non230 = _new_invlist_C_array(_Perl_CCC_non0_non230_invlist);
#ifdef UNI_XIDC
/* The below are used only by deprecated functions. They could be removed */
diff --git a/regen/mk_invlists.pl b/regen/mk_invlists.pl
index dd6a0321e0..55c4afb279 100644
--- a/regen/mk_invlists.pl
+++ b/regen/mk_invlists.pl
@@ -1092,6 +1092,35 @@ sub UpperLatin1 {
return \@return;
}
+sub _Perl_CCC_non0_non230 {
+
+ # Create an inversion list of code points with non-zero canonical
+ # combining class that also don't have 230 as the class number. This is
+ # part of a Unicode Standard rule
+
+ my @nonzeros = prop_invlist("ccc=0");
+ shift @nonzeros; # Invert so is "ccc != 0"
+
+ my @return;
+
+ # Expand into list of code points, while excluding those with ccc == 230
+ for (my $i = 0; $i < @nonzeros; $i += 2) {
+ my $upper = ($i + 1) < @nonzeros
+ ? $nonzeros[$i+1] - 1 # In range
+ : $Unicode::UCD::MAX_CP; # To infinity.
+ for my $j ($nonzeros[$i] .. $upper) {
+ my @ccc_names = prop_value_aliases("ccc", charprop($j, "ccc"));
+
+ # Final element in @ccc_names will be all numeric
+ push @return, $j if $ccc_names[-1] != 230;
+ }
+ }
+
+ @return = sort { $a <=> $b } @return;
+ @return = mk_invlist_from_sorted_cp_list(\@return);
+ return \@return;
+}
+
sub output_table_common {
# Common subroutine to actually output the generated rules table.
@@ -2319,6 +2348,7 @@ push @props, sort { prop_name_for_cmp($a) cmp prop_name_for_cmp($b) } qw(
Simple_Case_Folding
Case_Folding
&_Perl_IVCF
+ &_Perl_CCC_non0_non230
);
# NOTE that the convention is that extra enum values come
# after the property name, separated by commas, with the enums
diff --git a/uni_keywords.h b/uni_keywords.h
index c2fceb5527..9d2b8816ae 100644
--- a/uni_keywords.h
+++ b/uni_keywords.h
@@ -6996,6 +6996,6 @@ MPH_VALt match_uniprop( const unsigned char * const key, const U16 key_len ) {
* 7bd6bcbe3813e0cd55e0998053d182b7bc8c97dcfd0b85028e9f7f55af4ad61b lib/unicore/version
* 4bb677187a1a64e39d48f2e341b5ecb6c99857e49d7a79cf503bd8a3c709999b regen/charset_translations.pl
* 03e51b0f07beebd5da62ab943899aa4934eee1f792fa27c1fb638c33bf4ac6ea regen/mk_PL_charclass.pl
- * 1fdcc4c0ed94008c13daeb934b40cbd9f5b2871201dce7a9f0530be4145ea026 regen/mk_invlists.pl
+ * 8ae37f2b5bbc7d215f63e8d1189754d83a16c6156fd353847f6fcced90c513d5 regen/mk_invlists.pl
* c56b78df81e0f96632246052d71580b212546ca02ba4075158965e11d892f21e regen/mph.pl
* ex: set ro: */