summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKarl Williamson <khw@cpan.org>2021-09-15 07:36:41 -0600
committerKarl Williamson <khw@cpan.org>2021-09-15 08:48:04 -0600
commitde524f25f5a90dad63fa962cb8585acd86c62a88 (patch)
treea739b7d284acb2f41fe23247925301a45cf68dba
parentaf56221a729795569e62d31e32db88f53f56572c (diff)
downloadperl-de524f25f5a90dad63fa962cb8585acd86c62a88.tar.gz
mktables: Split a Line Break equivalence class
This is used for the \b{lb}, and the rule is changing in Unicode 14.0
-rw-r--r--charclass_invlists.h470
-rw-r--r--lib/unicore/mktables17
-rw-r--r--lib/unicore/uni_keywords.pl4
-rw-r--r--regcharclass.h2
-rw-r--r--regen/mk_invlists.pl7
-rw-r--r--uni_keywords.h4
6 files changed, 418 insertions, 86 deletions
diff --git a/charclass_invlists.h b/charclass_invlists.h
index d2ab37df3c..3e34e8beec 100644
--- a/charclass_invlists.h
+++ b/charclass_invlists.h
@@ -38427,7 +38427,7 @@ static const I32 _Perl_IVCF_invmap[] = { /* for EBCDIC 037 */
# if 'A' == 65 /* ASCII/Latin1 */
static const UV _Perl_LB_invlist[] = { /* for ASCII/Latin1 */
- 2416, /* Number of elements */
+ 2467, /* Number of elements */
148565664, /* Version and data structure type */
0, /* 0 if the list starts at 0;
1 if it starts at the element beyond 0 */
@@ -40705,14 +40705,36 @@ static const UV _Perl_LB_invlist[] = { /* for ASCII/Latin1 */
0x1ECB0,
0x1ECB1,
0x1F000,
+ 0x1F02C,
+ 0x1F030,
+ 0x1F094,
+ 0x1F0A0,
+ 0x1F0AF,
+ 0x1F0B1,
+ 0x1F0C0,
+ 0x1F0C1,
+ 0x1F0D0,
+ 0x1F0D1,
+ 0x1F0F6,
0x1F100,
0x1F10D,
0x1F110,
0x1F16D,
0x1F170,
0x1F1AD,
+ 0x1F1AE,
0x1F1E6,
0x1F200,
+ 0x1F203,
+ 0x1F210,
+ 0x1F23C,
+ 0x1F240,
+ 0x1F249,
+ 0x1F250,
+ 0x1F252,
+ 0x1F260,
+ 0x1F266,
+ 0x1F300,
0x1F385,
0x1F386,
0x1F39C,
@@ -40791,10 +40813,18 @@ static const UV _Perl_LB_invlist[] = { /* for ASCII/Latin1 */
0x1F6C1,
0x1F6CC,
0x1F6CD,
+ 0x1F6D8,
+ 0x1F6E0,
+ 0x1F6ED,
+ 0x1F6F0,
+ 0x1F6FD,
0x1F700,
0x1F774,
0x1F780,
0x1F7D5,
+ 0x1F7D9,
+ 0x1F7E0,
+ 0x1F7EC,
0x1F800,
0x1F80C,
0x1F810,
@@ -40805,6 +40835,8 @@ static const UV _Perl_LB_invlist[] = { /* for ASCII/Latin1 */
0x1F888,
0x1F890,
0x1F8AE,
+ 0x1F8B0,
+ 0x1F8B2,
0x1F900,
0x1F90C,
0x1F90D,
@@ -40820,18 +40852,37 @@ static const UV _Perl_LB_invlist[] = { /* for ASCII/Latin1 */
0x1F93F,
0x1F977,
0x1F978,
+ 0x1F979,
+ 0x1F97A,
0x1F9B5,
0x1F9B7,
0x1F9B8,
0x1F9BA,
0x1F9BB,
0x1F9BC,
+ 0x1F9CC,
0x1F9CD,
0x1F9D0,
0x1F9D1,
0x1F9DE,
0x1FA00,
0x1FA54,
+ 0x1FA60,
+ 0x1FA6E,
+ 0x1FA70,
+ 0x1FA75,
+ 0x1FA78,
+ 0x1FA7B,
+ 0x1FA80,
+ 0x1FA87,
+ 0x1FA90,
+ 0x1FAA9,
+ 0x1FAB0,
+ 0x1FAB7,
+ 0x1FAC0,
+ 0x1FAC3,
+ 0x1FAD0,
+ 0x1FAD7,
0x1FB00,
0x1FBF0,
0x1FBFA,
@@ -40895,10 +40946,11 @@ typedef enum {
LB_Regional_Indicator = 34,
LB_Space = 35,
LB_Break_Symbols = 36,
- LB_Word_Joiner = 37,
- LB_ZWSpace = 38,
- LB_ZWJ = 39,
- LB_East_Asian_CP = 40
+ LB_Unassigned_Extended_Pictographic_Ideographic = 37,
+ LB_Word_Joiner = 38,
+ LB_ZWSpace = 39,
+ LB_ZWJ = 40,
+ LB_East_Asian_CP = 41
} LB_enum;
static const LB_enum _Perl_LB_invmap[] = { /* for ASCII/Latin1 */
@@ -43176,14 +43228,36 @@ static const LB_enum _Perl_LB_invmap[] = { /* for ASCII/Latin1 */
LB_Postfix_Numeric,
LB_Alphabetic,
LB_Ideographic,
+ LB_Unassigned_Extended_Pictographic_Ideographic,
+ LB_Ideographic,
+ LB_Unassigned_Extended_Pictographic_Ideographic,
+ LB_Ideographic,
+ LB_Unassigned_Extended_Pictographic_Ideographic,
+ LB_Ideographic,
+ LB_Unassigned_Extended_Pictographic_Ideographic,
+ LB_Ideographic,
+ LB_Unassigned_Extended_Pictographic_Ideographic,
+ LB_Ideographic,
+ LB_Unassigned_Extended_Pictographic_Ideographic,
LB_Alphabetic,
LB_Ideographic,
LB_Alphabetic,
LB_Ideographic,
LB_Alphabetic,
LB_Ideographic,
+ LB_Unassigned_Extended_Pictographic_Ideographic,
LB_Regional_Indicator,
LB_Ideographic,
+ LB_Unassigned_Extended_Pictographic_Ideographic,
+ LB_Ideographic,
+ LB_Unassigned_Extended_Pictographic_Ideographic,
+ LB_Ideographic,
+ LB_Unassigned_Extended_Pictographic_Ideographic,
+ LB_Ideographic,
+ LB_Unassigned_Extended_Pictographic_Ideographic,
+ LB_Ideographic,
+ LB_Unassigned_Extended_Pictographic_Ideographic,
+ LB_Ideographic,
LB_E_Base,
LB_Ideographic,
LB_Alphabetic,
@@ -43262,20 +43336,30 @@ static const LB_enum _Perl_LB_invmap[] = { /* for ASCII/Latin1 */
LB_Ideographic,
LB_E_Base,
LB_Ideographic,
- LB_Alphabetic,
+ LB_Unassigned_Extended_Pictographic_Ideographic,
LB_Ideographic,
- LB_Alphabetic,
+ LB_Unassigned_Extended_Pictographic_Ideographic,
LB_Ideographic,
+ LB_Unassigned_Extended_Pictographic_Ideographic,
LB_Alphabetic,
- LB_Ideographic,
+ LB_Unassigned_Extended_Pictographic_Ideographic,
LB_Alphabetic,
LB_Ideographic,
- LB_Alphabetic,
+ LB_Unassigned_Extended_Pictographic_Ideographic,
LB_Ideographic,
+ LB_Unassigned_Extended_Pictographic_Ideographic,
LB_Alphabetic,
- LB_Ideographic,
+ LB_Unassigned_Extended_Pictographic_Ideographic,
+ LB_Alphabetic,
+ LB_Unassigned_Extended_Pictographic_Ideographic,
LB_Alphabetic,
+ LB_Unassigned_Extended_Pictographic_Ideographic,
+ LB_Alphabetic,
+ LB_Unassigned_Extended_Pictographic_Ideographic,
+ LB_Alphabetic,
+ LB_Unassigned_Extended_Pictographic_Ideographic,
LB_Ideographic,
+ LB_Unassigned_Extended_Pictographic_Ideographic,
LB_Alphabetic,
LB_E_Base,
LB_Ideographic,
@@ -43291,22 +43375,41 @@ static const LB_enum _Perl_LB_invmap[] = { /* for ASCII/Latin1 */
LB_Ideographic,
LB_E_Base,
LB_Ideographic,
+ LB_Unassigned_Extended_Pictographic_Ideographic,
+ LB_Ideographic,
LB_E_Base,
LB_Ideographic,
LB_E_Base,
LB_Ideographic,
LB_E_Base,
LB_Ideographic,
+ LB_Unassigned_Extended_Pictographic_Ideographic,
LB_E_Base,
LB_Ideographic,
LB_E_Base,
LB_Ideographic,
LB_Alphabetic,
+ LB_Unassigned_Extended_Pictographic_Ideographic,
+ LB_Ideographic,
+ LB_Unassigned_Extended_Pictographic_Ideographic,
+ LB_Ideographic,
+ LB_Unassigned_Extended_Pictographic_Ideographic,
+ LB_Ideographic,
+ LB_Unassigned_Extended_Pictographic_Ideographic,
+ LB_Ideographic,
+ LB_Unassigned_Extended_Pictographic_Ideographic,
LB_Ideographic,
+ LB_Unassigned_Extended_Pictographic_Ideographic,
+ LB_Ideographic,
+ LB_Unassigned_Extended_Pictographic_Ideographic,
+ LB_Ideographic,
+ LB_Unassigned_Extended_Pictographic_Ideographic,
+ LB_Ideographic,
+ LB_Unassigned_Extended_Pictographic_Ideographic,
LB_Alphabetic,
LB_Numeric,
LB_Alphabetic,
- LB_Ideographic,
+ LB_Unassigned_Extended_Pictographic_Ideographic,
LB_Alphabetic,
LB_Ideographic,
LB_Alphabetic,
@@ -43332,7 +43435,7 @@ static const LB_enum _Perl_LB_invmap[] = { /* for ASCII/Latin1 */
&& '$' == 91 && '@' == 124 && '`' == 121 && '\n' == 21
static const UV _Perl_LB_invlist[] = { /* for EBCDIC 1047 */
- 2428, /* Number of elements */
+ 2479, /* Number of elements */
148565664, /* Version and data structure type */
0, /* 0 if the list starts at 0;
1 if it starts at the element beyond 0 */
@@ -45622,14 +45725,36 @@ static const UV _Perl_LB_invlist[] = { /* for EBCDIC 1047 */
0x1ECB0,
0x1ECB1,
0x1F000,
+ 0x1F02C,
+ 0x1F030,
+ 0x1F094,
+ 0x1F0A0,
+ 0x1F0AF,
+ 0x1F0B1,
+ 0x1F0C0,
+ 0x1F0C1,
+ 0x1F0D0,
+ 0x1F0D1,
+ 0x1F0F6,
0x1F100,
0x1F10D,
0x1F110,
0x1F16D,
0x1F170,
0x1F1AD,
+ 0x1F1AE,
0x1F1E6,
0x1F200,
+ 0x1F203,
+ 0x1F210,
+ 0x1F23C,
+ 0x1F240,
+ 0x1F249,
+ 0x1F250,
+ 0x1F252,
+ 0x1F260,
+ 0x1F266,
+ 0x1F300,
0x1F385,
0x1F386,
0x1F39C,
@@ -45708,10 +45833,18 @@ static const UV _Perl_LB_invlist[] = { /* for EBCDIC 1047 */
0x1F6C1,
0x1F6CC,
0x1F6CD,
+ 0x1F6D8,
+ 0x1F6E0,
+ 0x1F6ED,
+ 0x1F6F0,
+ 0x1F6FD,
0x1F700,
0x1F774,
0x1F780,
0x1F7D5,
+ 0x1F7D9,
+ 0x1F7E0,
+ 0x1F7EC,
0x1F800,
0x1F80C,
0x1F810,
@@ -45722,6 +45855,8 @@ static const UV _Perl_LB_invlist[] = { /* for EBCDIC 1047 */
0x1F888,
0x1F890,
0x1F8AE,
+ 0x1F8B0,
+ 0x1F8B2,
0x1F900,
0x1F90C,
0x1F90D,
@@ -45737,18 +45872,37 @@ static const UV _Perl_LB_invlist[] = { /* for EBCDIC 1047 */
0x1F93F,
0x1F977,
0x1F978,
+ 0x1F979,
+ 0x1F97A,
0x1F9B5,
0x1F9B7,
0x1F9B8,
0x1F9BA,
0x1F9BB,
0x1F9BC,
+ 0x1F9CC,
0x1F9CD,
0x1F9D0,
0x1F9D1,
0x1F9DE,
0x1FA00,
0x1FA54,
+ 0x1FA60,
+ 0x1FA6E,
+ 0x1FA70,
+ 0x1FA75,
+ 0x1FA78,
+ 0x1FA7B,
+ 0x1FA80,
+ 0x1FA87,
+ 0x1FA90,
+ 0x1FAA9,
+ 0x1FAB0,
+ 0x1FAB7,
+ 0x1FAC0,
+ 0x1FAC3,
+ 0x1FAD0,
+ 0x1FAD7,
0x1FB00,
0x1FBF0,
0x1FBFA,
@@ -45815,10 +45969,11 @@ typedef enum {
LB_Regional_Indicator = 34,
LB_Space = 35,
LB_Break_Symbols = 36,
- LB_Word_Joiner = 37,
- LB_ZWSpace = 38,
- LB_ZWJ = 39,
- LB_East_Asian_CP = 40
+ LB_Unassigned_Extended_Pictographic_Ideographic = 37,
+ LB_Word_Joiner = 38,
+ LB_ZWSpace = 39,
+ LB_ZWJ = 40,
+ LB_East_Asian_CP = 41
} LB_enum;
static const LB_enum _Perl_LB_invmap[] = { /* for EBCDIC 1047 */
@@ -48108,14 +48263,36 @@ static const LB_enum _Perl_LB_invmap[] = { /* for EBCDIC 1047 */
LB_Postfix_Numeric,
LB_Alphabetic,
LB_Ideographic,
+ LB_Unassigned_Extended_Pictographic_Ideographic,
+ LB_Ideographic,
+ LB_Unassigned_Extended_Pictographic_Ideographic,
+ LB_Ideographic,
+ LB_Unassigned_Extended_Pictographic_Ideographic,
+ LB_Ideographic,
+ LB_Unassigned_Extended_Pictographic_Ideographic,
+ LB_Ideographic,
+ LB_Unassigned_Extended_Pictographic_Ideographic,
+ LB_Ideographic,
+ LB_Unassigned_Extended_Pictographic_Ideographic,
LB_Alphabetic,
LB_Ideographic,
LB_Alphabetic,
LB_Ideographic,
LB_Alphabetic,
LB_Ideographic,
+ LB_Unassigned_Extended_Pictographic_Ideographic,
LB_Regional_Indicator,
LB_Ideographic,
+ LB_Unassigned_Extended_Pictographic_Ideographic,
+ LB_Ideographic,
+ LB_Unassigned_Extended_Pictographic_Ideographic,
+ LB_Ideographic,
+ LB_Unassigned_Extended_Pictographic_Ideographic,
+ LB_Ideographic,
+ LB_Unassigned_Extended_Pictographic_Ideographic,
+ LB_Ideographic,
+ LB_Unassigned_Extended_Pictographic_Ideographic,
+ LB_Ideographic,
LB_E_Base,
LB_Ideographic,
LB_Alphabetic,
@@ -48194,20 +48371,30 @@ static const LB_enum _Perl_LB_invmap[] = { /* for EBCDIC 1047 */
LB_Ideographic,
LB_E_Base,
LB_Ideographic,
- LB_Alphabetic,
+ LB_Unassigned_Extended_Pictographic_Ideographic,
LB_Ideographic,
- LB_Alphabetic,
+ LB_Unassigned_Extended_Pictographic_Ideographic,
LB_Ideographic,
+ LB_Unassigned_Extended_Pictographic_Ideographic,
LB_Alphabetic,
- LB_Ideographic,
+ LB_Unassigned_Extended_Pictographic_Ideographic,
LB_Alphabetic,
LB_Ideographic,
- LB_Alphabetic,
+ LB_Unassigned_Extended_Pictographic_Ideographic,
LB_Ideographic,
+ LB_Unassigned_Extended_Pictographic_Ideographic,
LB_Alphabetic,
- LB_Ideographic,
+ LB_Unassigned_Extended_Pictographic_Ideographic,
+ LB_Alphabetic,
+ LB_Unassigned_Extended_Pictographic_Ideographic,
+ LB_Alphabetic,
+ LB_Unassigned_Extended_Pictographic_Ideographic,
+ LB_Alphabetic,
+ LB_Unassigned_Extended_Pictographic_Ideographic,
LB_Alphabetic,
+ LB_Unassigned_Extended_Pictographic_Ideographic,
LB_Ideographic,
+ LB_Unassigned_Extended_Pictographic_Ideographic,
LB_Alphabetic,
LB_E_Base,
LB_Ideographic,
@@ -48223,22 +48410,41 @@ static const LB_enum _Perl_LB_invmap[] = { /* for EBCDIC 1047 */
LB_Ideographic,
LB_E_Base,
LB_Ideographic,
+ LB_Unassigned_Extended_Pictographic_Ideographic,
+ LB_Ideographic,
LB_E_Base,
LB_Ideographic,
LB_E_Base,
LB_Ideographic,
LB_E_Base,
LB_Ideographic,
+ LB_Unassigned_Extended_Pictographic_Ideographic,
LB_E_Base,
LB_Ideographic,
LB_E_Base,
LB_Ideographic,
LB_Alphabetic,
+ LB_Unassigned_Extended_Pictographic_Ideographic,
+ LB_Ideographic,
+ LB_Unassigned_Extended_Pictographic_Ideographic,
+ LB_Ideographic,
+ LB_Unassigned_Extended_Pictographic_Ideographic,
+ LB_Ideographic,
+ LB_Unassigned_Extended_Pictographic_Ideographic,
+ LB_Ideographic,
+ LB_Unassigned_Extended_Pictographic_Ideographic,
+ LB_Ideographic,
+ LB_Unassigned_Extended_Pictographic_Ideographic,
+ LB_Ideographic,
+ LB_Unassigned_Extended_Pictographic_Ideographic,
+ LB_Ideographic,
+ LB_Unassigned_Extended_Pictographic_Ideographic,
LB_Ideographic,
+ LB_Unassigned_Extended_Pictographic_Ideographic,
LB_Alphabetic,
LB_Numeric,
LB_Alphabetic,
- LB_Ideographic,
+ LB_Unassigned_Extended_Pictographic_Ideographic,
LB_Alphabetic,
LB_Ideographic,
LB_Alphabetic,
@@ -48264,7 +48470,7 @@ static const LB_enum _Perl_LB_invmap[] = { /* for EBCDIC 1047 */
&& '$' == 91 && '@' == 124 && '`' == 121 && '\n' == 37
static const UV _Perl_LB_invlist[] = { /* for EBCDIC 037 */
- 2428, /* Number of elements */
+ 2479, /* Number of elements */
148565664, /* Version and data structure type */
0, /* 0 if the list starts at 0;
1 if it starts at the element beyond 0 */
@@ -50554,14 +50760,36 @@ static const UV _Perl_LB_invlist[] = { /* for EBCDIC 037 */
0x1ECB0,
0x1ECB1,
0x1F000,
+ 0x1F02C,
+ 0x1F030,
+ 0x1F094,
+ 0x1F0A0,
+ 0x1F0AF,
+ 0x1F0B1,
+ 0x1F0C0,
+ 0x1F0C1,
+ 0x1F0D0,
+ 0x1F0D1,
+ 0x1F0F6,
0x1F100,
0x1F10D,
0x1F110,
0x1F16D,
0x1F170,
0x1F1AD,
+ 0x1F1AE,
0x1F1E6,
0x1F200,
+ 0x1F203,
+ 0x1F210,
+ 0x1F23C,
+ 0x1F240,
+ 0x1F249,
+ 0x1F250,
+ 0x1F252,
+ 0x1F260,
+ 0x1F266,
+ 0x1F300,
0x1F385,
0x1F386,
0x1F39C,
@@ -50640,10 +50868,18 @@ static const UV _Perl_LB_invlist[] = { /* for EBCDIC 037 */
0x1F6C1,
0x1F6CC,
0x1F6CD,
+ 0x1F6D8,
+ 0x1F6E0,
+ 0x1F6ED,
+ 0x1F6F0,
+ 0x1F6FD,
0x1F700,
0x1F774,
0x1F780,
0x1F7D5,
+ 0x1F7D9,
+ 0x1F7E0,
+ 0x1F7EC,
0x1F800,
0x1F80C,
0x1F810,
@@ -50654,6 +50890,8 @@ static const UV _Perl_LB_invlist[] = { /* for EBCDIC 037 */
0x1F888,
0x1F890,
0x1F8AE,
+ 0x1F8B0,
+ 0x1F8B2,
0x1F900,
0x1F90C,
0x1F90D,
@@ -50669,18 +50907,37 @@ static const UV _Perl_LB_invlist[] = { /* for EBCDIC 037 */
0x1F93F,
0x1F977,
0x1F978,
+ 0x1F979,
+ 0x1F97A,
0x1F9B5,
0x1F9B7,
0x1F9B8,
0x1F9BA,
0x1F9BB,
0x1F9BC,
+ 0x1F9CC,
0x1F9CD,
0x1F9D0,
0x1F9D1,
0x1F9DE,
0x1FA00,
0x1FA54,
+ 0x1FA60,
+ 0x1FA6E,
+ 0x1FA70,
+ 0x1FA75,
+ 0x1FA78,
+ 0x1FA7B,
+ 0x1FA80,
+ 0x1FA87,
+ 0x1FA90,
+ 0x1FAA9,
+ 0x1FAB0,
+ 0x1FAB7,
+ 0x1FAC0,
+ 0x1FAC3,
+ 0x1FAD0,
+ 0x1FAD7,
0x1FB00,
0x1FBF0,
0x1FBFA,
@@ -50747,10 +51004,11 @@ typedef enum {
LB_Regional_Indicator = 34,
LB_Space = 35,
LB_Break_Symbols = 36,
- LB_Word_Joiner = 37,
- LB_ZWSpace = 38,
- LB_ZWJ = 39,
- LB_East_Asian_CP = 40
+ LB_Unassigned_Extended_Pictographic_Ideographic = 37,
+ LB_Word_Joiner = 38,
+ LB_ZWSpace = 39,
+ LB_ZWJ = 40,
+ LB_East_Asian_CP = 41
} LB_enum;
static const LB_enum _Perl_LB_invmap[] = { /* for EBCDIC 037 */
@@ -53040,14 +53298,36 @@ static const LB_enum _Perl_LB_invmap[] = { /* for EBCDIC 037 */
LB_Postfix_Numeric,
LB_Alphabetic,
LB_Ideographic,
+ LB_Unassigned_Extended_Pictographic_Ideographic,
+ LB_Ideographic,
+ LB_Unassigned_Extended_Pictographic_Ideographic,
+ LB_Ideographic,
+ LB_Unassigned_Extended_Pictographic_Ideographic,
+ LB_Ideographic,
+ LB_Unassigned_Extended_Pictographic_Ideographic,
+ LB_Ideographic,
+ LB_Unassigned_Extended_Pictographic_Ideographic,
+ LB_Ideographic,
+ LB_Unassigned_Extended_Pictographic_Ideographic,
LB_Alphabetic,
LB_Ideographic,
LB_Alphabetic,
LB_Ideographic,
LB_Alphabetic,
LB_Ideographic,
+ LB_Unassigned_Extended_Pictographic_Ideographic,
LB_Regional_Indicator,
LB_Ideographic,
+ LB_Unassigned_Extended_Pictographic_Ideographic,
+ LB_Ideographic,
+ LB_Unassigned_Extended_Pictographic_Ideographic,
+ LB_Ideographic,
+ LB_Unassigned_Extended_Pictographic_Ideographic,
+ LB_Ideographic,
+ LB_Unassigned_Extended_Pictographic_Ideographic,
+ LB_Ideographic,
+ LB_Unassigned_Extended_Pictographic_Ideographic,
+ LB_Ideographic,
LB_E_Base,
LB_Ideographic,
LB_Alphabetic,
@@ -53126,20 +53406,30 @@ static const LB_enum _Perl_LB_invmap[] = { /* for EBCDIC 037 */
LB_Ideographic,
LB_E_Base,
LB_Ideographic,
- LB_Alphabetic,
+ LB_Unassigned_Extended_Pictographic_Ideographic,
LB_Ideographic,
- LB_Alphabetic,
+ LB_Unassigned_Extended_Pictographic_Ideographic,
LB_Ideographic,
+ LB_Unassigned_Extended_Pictographic_Ideographic,
LB_Alphabetic,
- LB_Ideographic,
+ LB_Unassigned_Extended_Pictographic_Ideographic,
LB_Alphabetic,
LB_Ideographic,
- LB_Alphabetic,
+ LB_Unassigned_Extended_Pictographic_Ideographic,
LB_Ideographic,
+ LB_Unassigned_Extended_Pictographic_Ideographic,
LB_Alphabetic,
- LB_Ideographic,
+ LB_Unassigned_Extended_Pictographic_Ideographic,
+ LB_Alphabetic,
+ LB_Unassigned_Extended_Pictographic_Ideographic,
+ LB_Alphabetic,
+ LB_Unassigned_Extended_Pictographic_Ideographic,
LB_Alphabetic,
+ LB_Unassigned_Extended_Pictographic_Ideographic,
+ LB_Alphabetic,
+ LB_Unassigned_Extended_Pictographic_Ideographic,
LB_Ideographic,
+ LB_Unassigned_Extended_Pictographic_Ideographic,
LB_Alphabetic,
LB_E_Base,
LB_Ideographic,
@@ -53155,22 +53445,41 @@ static const LB_enum _Perl_LB_invmap[] = { /* for EBCDIC 037 */
LB_Ideographic,
LB_E_Base,
LB_Ideographic,
+ LB_Unassigned_Extended_Pictographic_Ideographic,
+ LB_Ideographic,
LB_E_Base,
LB_Ideographic,
LB_E_Base,
LB_Ideographic,
LB_E_Base,
LB_Ideographic,
+ LB_Unassigned_Extended_Pictographic_Ideographic,
LB_E_Base,
LB_Ideographic,
LB_E_Base,
LB_Ideographic,
LB_Alphabetic,
+ LB_Unassigned_Extended_Pictographic_Ideographic,
+ LB_Ideographic,
+ LB_Unassigned_Extended_Pictographic_Ideographic,
LB_Ideographic,
+ LB_Unassigned_Extended_Pictographic_Ideographic,
+ LB_Ideographic,
+ LB_Unassigned_Extended_Pictographic_Ideographic,
+ LB_Ideographic,
+ LB_Unassigned_Extended_Pictographic_Ideographic,
+ LB_Ideographic,
+ LB_Unassigned_Extended_Pictographic_Ideographic,
+ LB_Ideographic,
+ LB_Unassigned_Extended_Pictographic_Ideographic,
+ LB_Ideographic,
+ LB_Unassigned_Extended_Pictographic_Ideographic,
+ LB_Ideographic,
+ LB_Unassigned_Extended_Pictographic_Ideographic,
LB_Alphabetic,
LB_Numeric,
LB_Alphabetic,
- LB_Ideographic,
+ LB_Unassigned_Extended_Pictographic_Ideographic,
LB_Alphabetic,
LB_Ideographic,
LB_Alphabetic,
@@ -419752,51 +420061,52 @@ static const U8 GCB_table[17][17] = {
#define LB_RI_then_RI 15
#define LB_various_then_PO_or_PR 32
-static const U8 LB_table[41][41] = {
- /* 'EAO' stands for 'East_Asian_OP'; 'edg' stands for 'EDGE'; u stands for 'unused in this Unicode release (and the data in its row and
- * column are garbage) */
-/* AL B2 BA BB BK CB CL CM CP CR EAO EB edg EM EX GL H2 H3 HL HY ID IN IS JL JT JV LF NL NS NU OP PO PR QU RI SP SY WJ ZW ZWJ u */
-/* AL */ { 0, 1, 0, 1, 0, 1, 2, 0, 2, 0, 1, 1, 1, 1, 2, 0, 1, 1, 0, 0, 1, 0, 2, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 2, 0, 0, 0, 2 },
-/* B2 */ { 1, 2, 0, 1, 0, 1, 2, 0, 2, 0, 1, 1, 1, 1, 2, 0, 1, 1, 1, 0, 1, 0, 2, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 1, 0, 2, 0, 0, 0, 2 },
-/* BA */ {14,14, 0,14, 0, 1, 2, 0, 2, 0, 14,14, 1,14, 2,14,14,14,14, 0,14,13, 2,14,14,14, 0, 0, 0,14,14,14,14, 0,14, 0, 2, 0, 0, 0, 2 },
-/* BB */ { 0, 0, 0, 0, 0, 1, 2, 0, 2, 0, 0, 0, 1, 0, 2, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 2 },
-/* BK */ { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 },
-/* CB */ { 1, 1, 1, 1, 0, 1, 2, 0, 2, 0, 1, 1, 1, 1, 2, 0, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 0, 2, 0, 0, 0, 2 },
-/* CL */ { 1, 1, 0, 1, 0, 1, 2, 0, 2, 0, 1, 1, 1, 1, 2, 0, 1, 1, 1, 0, 1, 0, 2, 1, 1, 1, 0, 0, 2, 1, 1,33,33, 0, 1, 0, 2, 0, 0, 0, 2 },
-/* CM */ { 3, 3, 3, 3, 0, 3, 3, 0, 3, 0, 3, 3, 1, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 3, 3, 3, 3, 3, 3, 3, 0, 3, 3, 0, 3, 3 },
-/* CP */ { 0, 1, 0, 1, 0, 1, 2, 0, 2, 0, 1, 1, 1, 1, 2, 0, 1, 1, 0, 0, 1, 0, 2, 1, 1, 1, 0, 0, 2, 0, 1,33,33, 0, 1, 0, 2, 0, 0, 0, 2 },
-/* CR */ { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 },
-/* EAO*/ { 2, 2, 2, 2, 0, 2, 2, 2, 2, 0, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 2, 2, 2, 2, 2, 2, 2, 0, 2, 2, 0, 2, 2 },
-/* EB */ { 1, 1, 0, 1, 0, 1, 2, 0, 2, 0, 1, 1, 1, 0, 2, 0, 1, 1, 1, 0, 1, 0, 2, 1, 1, 1, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 2, 0, 0, 0, 2 },
-/* edg*/ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
-/* EM */ { 1, 1, 0, 1, 0, 1, 2, 0, 2, 0, 1, 1, 1, 1, 2, 0, 1, 1, 1, 0, 1, 0, 2, 1, 1, 1, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 2, 0, 0, 0, 2 },
-/* EX */ { 1, 1, 0, 1, 0, 1, 2, 0, 2, 0, 1, 1, 1, 1, 2, 0, 1, 1, 1, 0, 1, 0, 2, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 1, 0, 2, 0, 0, 0, 2 },
-/* GL */ { 0, 0, 0, 0, 0, 0, 2, 0, 2, 0, 0, 0, 1, 0, 2, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 2 },
-/* H2 */ { 1, 1, 0, 1, 0, 1, 2, 0, 2, 0, 1, 1, 1, 1, 2, 0, 1, 1, 1, 0, 1, 0, 2, 1, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 2, 0, 0, 0, 2 },
-/* H3 */ { 1, 1, 0, 1, 0, 1, 2, 0, 2, 0, 1, 1, 1, 1, 2, 0, 1, 1, 1, 0, 1, 0, 2, 1, 0, 1, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 2, 0, 0, 0, 2 },
-/* HL */ { 0, 1, 0, 1, 0, 1, 2, 0, 2, 0, 1, 1, 1, 1, 2, 0, 1, 1, 0, 0, 1, 0, 2, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 2, 0, 0, 0, 2 },
-/* HY */ {14,14, 0,14, 0, 1, 2, 0, 2, 0, 14,14, 1,14, 2,14,14,14,14, 0,14,13, 2,14,14,14, 0, 0, 0,13,14,14,14, 0,14, 0, 2, 0, 0, 0, 2 },
-/* ID */ { 1, 1, 0, 1, 0, 1, 2, 0, 2, 0, 1, 1, 1, 1, 2, 0, 1, 1, 1, 0, 1, 0, 2, 1, 1, 1, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 2, 0, 0, 0, 2 },
-/* IN */ { 1, 1, 0, 1, 0, 1, 2, 0, 2, 0, 1, 1, 1, 1, 2, 0, 1, 1, 1, 0, 1, 0, 2, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 1, 0, 2, 0, 0, 0, 2 },
-/* IS */ { 0, 1, 0, 1, 0, 1, 2, 0, 2, 0, 1, 1, 1, 1, 2, 0, 1, 1, 0, 0, 1, 0, 2, 1, 1, 1, 0, 0, 0,12, 1,33,33, 0, 1, 0, 2, 0, 0, 0, 2 },
-/* JL */ { 1, 1, 0, 1, 0, 1, 2, 0, 2, 0, 1, 1, 1, 1, 2, 0, 0, 0, 1, 0, 1, 0, 2, 0, 1, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 2, 0, 0, 0, 2 },
-/* JT */ { 1, 1, 0, 1, 0, 1, 2, 0, 2, 0, 1, 1, 1, 1, 2, 0, 1, 1, 1, 0, 1, 0, 2, 1, 0, 1, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 2, 0, 0, 0, 2 },
-/* JV */ { 1, 1, 0, 1, 0, 1, 2, 0, 2, 0, 1, 1, 1, 1, 2, 0, 1, 1, 1, 0, 1, 0, 2, 1, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 2, 0, 0, 0, 2 },
-/* LF */ { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 },
-/* NL */ { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 },
-/* NS */ { 1, 1, 0, 1, 0, 1, 2, 0, 2, 0, 1, 1, 1, 1, 2, 0, 1, 1, 1, 0, 1, 0, 2, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 1, 0, 2, 0, 0, 0, 2 },
-/* NU */ { 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 1, 1, 1, 1, 2, 0, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0 },
-/* OP */ { 2, 2, 2, 2, 0, 2, 2, 2, 2, 0, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 2, 2, 2, 2, 2, 2, 2, 0, 2, 2, 0, 2, 2 },
-/* PO */ { 0, 1, 0, 1, 0, 1, 2, 0, 2, 0, 1, 1, 1, 1, 2, 0, 1, 1, 0, 0, 1, 0, 2, 1, 1, 1, 0, 0, 0, 0,10, 1, 1, 0, 1, 0, 2, 0, 0, 0, 2 },
-/* PR */ { 0, 1, 0, 1, 0, 1, 2, 0, 2, 0, 10, 0, 1, 0, 2, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0,10, 1, 1, 0, 1, 0, 2, 0, 0, 0, 2 },
-/* QU */ { 0, 0, 0, 0, 0, 0, 2, 0, 2, 0, 2, 0, 1, 0, 2, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 2, 0, 0, 0, 2 },
-/* RI */ { 1, 1, 0, 1, 0, 1, 2, 0, 2, 0, 1, 1, 1, 1, 2, 0, 1, 1, 1, 0, 1, 0, 2, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0,15, 0, 2, 0, 0, 0, 2 },
-/* SP */ { 7, 7, 7, 7, 0, 7, 8, 7, 8, 0, 7, 7, 1, 7, 8, 7, 7, 7, 7, 7, 7, 7, 8, 7, 7, 7, 0, 0, 7, 7, 7, 7, 7, 7, 7, 0, 8, 8, 0, 7, 8 },
-/* SY */ { 1, 1, 0, 1, 0, 1, 2, 0, 2, 0, 1, 1, 1, 1, 2, 0, 1, 1, 0, 0, 1, 0, 2, 1, 1, 1, 0, 0, 0,12, 1,33,33, 0, 1, 0, 2, 0, 0, 0, 2 },
-/* WJ */ { 0, 0, 0, 0, 0, 0, 2, 0, 2, 0, 0, 0, 1, 0, 2, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 2 },
-/* ZW */ { 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1 },
-/* ZWJ*/ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
-/* u */ { 1, 1, 0, 1, 0, 1, 2, 0, 2, 0, 1, 1, 1, 1, 2, 0, 1, 1, 1, 0, 1, 0, 2, 1, 1, 1, 0, 0, 2, 1, 1,33,33, 0, 1, 0, 2, 0, 0, 0, 2 }
+static const U8 LB_table[42][42] = {
+ /* 'EAO' stands for 'East_Asian_OP'; 'edg' stands for 'EDGE'; 'UEP' stands for 'Unassigned_Extended_Pictographic_Ideographic'; u stands for
+ * 'unused in this Unicode release (and the data in its row and column are garbage) */
+/* AL B2 BA BB BK CB CL CM CP CR EAO EB edg EM EX GL H2 H3 HL HY ID IN IS JL JT JV LF NL NS NU OP PO PR QU RI SP SY UEP WJ ZW ZWJ u */
+/* AL */ { 0, 1, 0, 1, 0, 1, 2, 0, 2, 0, 1, 1, 1, 1, 2, 0, 1, 1, 0, 0, 1, 0, 2, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 2, 1, 0, 0, 0, 2 },
+/* B2 */ { 1, 2, 0, 1, 0, 1, 2, 0, 2, 0, 1, 1, 1, 1, 2, 0, 1, 1, 1, 0, 1, 0, 2, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 1, 0, 2, 1, 0, 0, 0, 2 },
+/* BA */ {14,14, 0,14, 0, 1, 2, 0, 2, 0, 14,14, 1,14, 2,14,14,14,14, 0,14,13, 2,14,14,14, 0, 0, 0,14,14,14,14, 0,14, 0, 2, 14, 0, 0, 0, 2 },
+/* BB */ { 0, 0, 0, 0, 0, 1, 2, 0, 2, 0, 0, 0, 1, 0, 2, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 2 },
+/* BK */ { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 },
+/* CB */ { 1, 1, 1, 1, 0, 1, 2, 0, 2, 0, 1, 1, 1, 1, 2, 0, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 0, 2, 1, 0, 0, 0, 2 },
+/* CL */ { 1, 1, 0, 1, 0, 1, 2, 0, 2, 0, 1, 1, 1, 1, 2, 0, 1, 1, 1, 0, 1, 0, 2, 1, 1, 1, 0, 0, 2, 1, 1,33,33, 0, 1, 0, 2, 1, 0, 0, 0, 2 },
+/* CM */ { 3, 3, 3, 3, 0, 3, 3, 0, 3, 0, 3, 3, 1, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 3, 3, 3, 3, 3, 3, 3, 0, 3, 3, 3, 0, 3, 3 },
+/* CP */ { 0, 1, 0, 1, 0, 1, 2, 0, 2, 0, 1, 1, 1, 1, 2, 0, 1, 1, 0, 0, 1, 0, 2, 1, 1, 1, 0, 0, 2, 0, 1,33,33, 0, 1, 0, 2, 1, 0, 0, 0, 2 },
+/* CR */ { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 },
+/* EAO*/ { 2, 2, 2, 2, 0, 2, 2, 2, 2, 0, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 2, 2, 2, 2, 2, 2, 2, 0, 2, 2, 2, 0, 2, 2 },
+/* EB */ { 1, 1, 0, 1, 0, 1, 2, 0, 2, 0, 1, 1, 1, 0, 2, 0, 1, 1, 1, 0, 1, 0, 2, 1, 1, 1, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 2, 1, 0, 0, 0, 2 },
+/* edg*/ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
+/* EM */ { 1, 1, 0, 1, 0, 1, 2, 0, 2, 0, 1, 1, 1, 1, 2, 0, 1, 1, 1, 0, 1, 0, 2, 1, 1, 1, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 2, 1, 0, 0, 0, 2 },
+/* EX */ { 1, 1, 0, 1, 0, 1, 2, 0, 2, 0, 1, 1, 1, 1, 2, 0, 1, 1, 1, 0, 1, 0, 2, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 1, 0, 2, 1, 0, 0, 0, 2 },
+/* GL */ { 0, 0, 0, 0, 0, 0, 2, 0, 2, 0, 0, 0, 1, 0, 2, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 2 },
+/* H2 */ { 1, 1, 0, 1, 0, 1, 2, 0, 2, 0, 1, 1, 1, 1, 2, 0, 1, 1, 1, 0, 1, 0, 2, 1, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 2, 1, 0, 0, 0, 2 },
+/* H3 */ { 1, 1, 0, 1, 0, 1, 2, 0, 2, 0, 1, 1, 1, 1, 2, 0, 1, 1, 1, 0, 1, 0, 2, 1, 0, 1, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 2, 1, 0, 0, 0, 2 },
+/* HL */ { 0, 1, 0, 1, 0, 1, 2, 0, 2, 0, 1, 1, 1, 1, 2, 0, 1, 1, 0, 0, 1, 0, 2, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 2, 1, 0, 0, 0, 2 },
+/* HY */ {14,14, 0,14, 0, 1, 2, 0, 2, 0, 14,14, 1,14, 2,14,14,14,14, 0,14,13, 2,14,14,14, 0, 0, 0,13,14,14,14, 0,14, 0, 2, 14, 0, 0, 0, 2 },
+/* ID */ { 1, 1, 0, 1, 0, 1, 2, 0, 2, 0, 1, 1, 1, 1, 2, 0, 1, 1, 1, 0, 1, 0, 2, 1, 1, 1, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 2, 1, 0, 0, 0, 2 },
+/* IN */ { 1, 1, 0, 1, 0, 1, 2, 0, 2, 0, 1, 1, 1, 1, 2, 0, 1, 1, 1, 0, 1, 0, 2, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 1, 0, 2, 1, 0, 0, 0, 2 },
+/* IS */ { 0, 1, 0, 1, 0, 1, 2, 0, 2, 0, 1, 1, 1, 1, 2, 0, 1, 1, 0, 0, 1, 0, 2, 1, 1, 1, 0, 0, 0,12, 1,33,33, 0, 1, 0, 2, 1, 0, 0, 0, 2 },
+/* JL */ { 1, 1, 0, 1, 0, 1, 2, 0, 2, 0, 1, 1, 1, 1, 2, 0, 0, 0, 1, 0, 1, 0, 2, 0, 1, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 2, 1, 0, 0, 0, 2 },
+/* JT */ { 1, 1, 0, 1, 0, 1, 2, 0, 2, 0, 1, 1, 1, 1, 2, 0, 1, 1, 1, 0, 1, 0, 2, 1, 0, 1, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 2, 1, 0, 0, 0, 2 },
+/* JV */ { 1, 1, 0, 1, 0, 1, 2, 0, 2, 0, 1, 1, 1, 1, 2, 0, 1, 1, 1, 0, 1, 0, 2, 1, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 2, 1, 0, 0, 0, 2 },
+/* LF */ { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 },
+/* NL */ { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 },
+/* NS */ { 1, 1, 0, 1, 0, 1, 2, 0, 2, 0, 1, 1, 1, 1, 2, 0, 1, 1, 1, 0, 1, 0, 2, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 1, 0, 2, 1, 0, 0, 0, 2 },
+/* NU */ { 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 1, 1, 1, 1, 2, 0, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0 },
+/* OP */ { 2, 2, 2, 2, 0, 2, 2, 2, 2, 0, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 2, 2, 2, 2, 2, 2, 2, 0, 2, 2, 2, 0, 2, 2 },
+/* PO */ { 0, 1, 0, 1, 0, 1, 2, 0, 2, 0, 1, 1, 1, 1, 2, 0, 1, 1, 0, 0, 1, 0, 2, 1, 1, 1, 0, 0, 0, 0,10, 1, 1, 0, 1, 0, 2, 1, 0, 0, 0, 2 },
+/* PR */ { 0, 1, 0, 1, 0, 1, 2, 0, 2, 0, 10, 0, 1, 0, 2, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0,10, 1, 1, 0, 1, 0, 2, 0, 0, 0, 0, 2 },
+/* QU */ { 0, 0, 0, 0, 0, 0, 2, 0, 2, 0, 2, 0, 1, 0, 2, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 2 },
+/* RI */ { 1, 1, 0, 1, 0, 1, 2, 0, 2, 0, 1, 1, 1, 1, 2, 0, 1, 1, 1, 0, 1, 0, 2, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0,15, 0, 2, 1, 0, 0, 0, 2 },
+/* SP */ { 7, 7, 7, 7, 0, 7, 8, 7, 8, 0, 7, 7, 1, 7, 8, 7, 7, 7, 7, 7, 7, 7, 8, 7, 7, 7, 0, 0, 7, 7, 7, 7, 7, 7, 7, 0, 8, 7, 8, 0, 7, 8 },
+/* SY */ { 1, 1, 0, 1, 0, 1, 2, 0, 2, 0, 1, 1, 1, 1, 2, 0, 1, 1, 0, 0, 1, 0, 2, 1, 1, 1, 0, 0, 0,12, 1,33,33, 0, 1, 0, 2, 1, 0, 0, 0, 2 },
+/* UEP*/ { 1, 1, 0, 1, 0, 1, 2, 0, 2, 0, 1, 1, 1, 1, 2, 0, 1, 1, 1, 0, 1, 0, 2, 1, 1, 1, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 2, 1, 0, 0, 0, 2 },
+/* WJ */ { 0, 0, 0, 0, 0, 0, 2, 0, 2, 0, 0, 0, 1, 0, 2, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 2 },
+/* ZW */ { 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1 },
+/* ZWJ*/ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
+/* u */ { 1, 1, 0, 1, 0, 1, 2, 0, 2, 0, 1, 1, 1, 1, 2, 0, 1, 1, 1, 0, 1, 0, 2, 1, 1, 1, 0, 0, 2, 1, 1,33,33, 0, 1, 0, 2, 1, 0, 0, 0, 2 }
};
#define WB_NOBREAK 0
@@ -419900,9 +420210,9 @@ static const U8 WB_table[23][23] = {
* baba9dfc133e3cb770a89aaf0973b1341fa61c2da6c176baf6428898b3b568d8 lib/unicore/extracted/DLineBreak.txt
* 6d4a8c945dd7db83ed617cbb7d937de7f4ecf016ff22970d846e996a7c9a2a5d lib/unicore/extracted/DNumType.txt
* 5b7c14380d5cceeaffcfbc18db1ed936391d2af2d51f5a41f1a17b692c77e59b lib/unicore/extracted/DNumValues.txt
- * b74c07296be6f14f3b99c92090a9a190188ca6f6afcc46e5d000f1529922120a lib/unicore/mktables
+ * 4e169849b96b76987a8fc443ef421b44d2dcebbd981bb457f6a13e8af77cdbe2 lib/unicore/mktables
* 50b85a67451145545a65cea370dab8d3444fbfe07e9c34cef560c5b7da9d3eef lib/unicore/version
* 0a6b5ab33bb1026531f816efe81aea1a8ffcd34a27cbea37dd6a70a63d73c844 regen/charset_translations.pl
* 5f8520d3a17ade6317fc0c423f5091470924b1ef425bca0c41ce8e4a9f8460fe regen/mk_PL_charclass.pl
- * 5eb9e6c825496cc9aa705e3cd33bc6d5a9657dcca16d4c4acc4824ff30b34a26 regen/mk_invlists.pl
+ * 4635ff74b13c8f059599be8d0b0e2aea19fefe6ddcbc4c7deef1a3096c91a0dd regen/mk_invlists.pl
* ex: set ro: */
diff --git a/lib/unicore/mktables b/lib/unicore/mktables
index 04e9cf6c24..89a2e15ef5 100644
--- a/lib/unicore/mktables
+++ b/lib/unicore/mktables
@@ -15175,6 +15175,8 @@ END
$perl_lb->set_default_map('Alphabetic', 'full_name'); # XX -> AL
my $ea = property_ref('East_Asian_Width');
+ my $Cn_EP;
+ $Cn_EP = $ep & $gc->table('Unassigned') if defined $ep;
for my $range ($perl_lb->ranges) {
my $value = standardize($range->value);
@@ -15202,6 +15204,21 @@ END
}
}
}
+ elsif (defined $ep && $value eq standardize('Ideographic')) {
+
+ # Unicode 14 adds a rule to not break lines before any potential
+ # EBase, They say that any unassigned code point that is ExtPict,
+ # is potentially an EBase. In 14.0, all such ones are in the
+ # ExtPict=ID category. We must split that category for the
+ # pairwise rule table to work.
+ for my $i ($range->start .. $range->end) {
+ if ($Cn_EP->contains($i)) {
+ $perl_lb->add_map($i, $i,
+ 'Unassigned_Extended_Pictographic_Ideographic',
+ Replace => $UNCONDITIONALLY);
+ }
+ }
+ }
elsif ( defined $ea
&& ( $value eq standardize('Close_Parenthesis')
|| $value eq standardize('Open_Punctuation')))
diff --git a/lib/unicore/uni_keywords.pl b/lib/unicore/uni_keywords.pl
index 0ea5d98fc6..51c63044f4 100644
--- a/lib/unicore/uni_keywords.pl
+++ b/lib/unicore/uni_keywords.pl
@@ -1295,9 +1295,9 @@
# baba9dfc133e3cb770a89aaf0973b1341fa61c2da6c176baf6428898b3b568d8 lib/unicore/extracted/DLineBreak.txt
# 6d4a8c945dd7db83ed617cbb7d937de7f4ecf016ff22970d846e996a7c9a2a5d lib/unicore/extracted/DNumType.txt
# 5b7c14380d5cceeaffcfbc18db1ed936391d2af2d51f5a41f1a17b692c77e59b lib/unicore/extracted/DNumValues.txt
-# b74c07296be6f14f3b99c92090a9a190188ca6f6afcc46e5d000f1529922120a lib/unicore/mktables
+# 4e169849b96b76987a8fc443ef421b44d2dcebbd981bb457f6a13e8af77cdbe2 lib/unicore/mktables
# 50b85a67451145545a65cea370dab8d3444fbfe07e9c34cef560c5b7da9d3eef lib/unicore/version
# 0a6b5ab33bb1026531f816efe81aea1a8ffcd34a27cbea37dd6a70a63d73c844 regen/charset_translations.pl
# 5f8520d3a17ade6317fc0c423f5091470924b1ef425bca0c41ce8e4a9f8460fe regen/mk_PL_charclass.pl
-# 5eb9e6c825496cc9aa705e3cd33bc6d5a9657dcca16d4c4acc4824ff30b34a26 regen/mk_invlists.pl
+# 4635ff74b13c8f059599be8d0b0e2aea19fefe6ddcbc4c7deef1a3096c91a0dd regen/mk_invlists.pl
# ex: set ro:
diff --git a/regcharclass.h b/regcharclass.h
index 6f60941f55..0500d572bd 100644
--- a/regcharclass.h
+++ b/regcharclass.h
@@ -3762,7 +3762,7 @@
* baba9dfc133e3cb770a89aaf0973b1341fa61c2da6c176baf6428898b3b568d8 lib/unicore/extracted/DLineBreak.txt
* 6d4a8c945dd7db83ed617cbb7d937de7f4ecf016ff22970d846e996a7c9a2a5d lib/unicore/extracted/DNumType.txt
* 5b7c14380d5cceeaffcfbc18db1ed936391d2af2d51f5a41f1a17b692c77e59b lib/unicore/extracted/DNumValues.txt
- * b74c07296be6f14f3b99c92090a9a190188ca6f6afcc46e5d000f1529922120a lib/unicore/mktables
+ * 4e169849b96b76987a8fc443ef421b44d2dcebbd981bb457f6a13e8af77cdbe2 lib/unicore/mktables
* 50b85a67451145545a65cea370dab8d3444fbfe07e9c34cef560c5b7da9d3eef lib/unicore/version
* 0a6b5ab33bb1026531f816efe81aea1a8ffcd34a27cbea37dd6a70a63d73c844 regen/charset_translations.pl
* 1aa94679c695efd507b7e4491629dba1021b74c21a5324dfd3a582a5d654bd32 regen/regcharclass.pl
diff --git a/regen/mk_invlists.pl b/regen/mk_invlists.pl
index d58e63cba9..cdddbc5e08 100644
--- a/regen/mk_invlists.pl
+++ b/regen/mk_invlists.pl
@@ -1783,6 +1783,9 @@ sub output_LB_table() {
# PR × (ID | EB | EM)
$lb_table[$lb_enums{'Prefix_Numeric'}][$lb_enums{'Ideographic'}]
= $lb_actions{'LB_NOBREAK'};
+ $lb_table[$lb_enums{'Prefix_Numeric'}]
+ [$lb_enums{'Unassigned_Extended_Pictographic_Ideographic'}]
+ = $lb_actions{'LB_NOBREAK'};
$lb_table[$lb_enums{'Prefix_Numeric'}][$lb_enums{'E_Base'}]
= $lb_actions{'LB_NOBREAK'};
$lb_table[$lb_enums{'Prefix_Numeric'}][$lb_enums{'E_Modifier'}]
@@ -1791,6 +1794,8 @@ sub output_LB_table() {
# (ID | EB | EM) × PO
$lb_table[$lb_enums{'Ideographic'}][$lb_enums{'Postfix_Numeric'}]
= $lb_actions{'LB_NOBREAK'};
+ $lb_table[$lb_enums{'Unassigned_Extended_Pictographic_Ideographic'}]
+ [$lb_enums{'Postfix_Numeric'}] = $lb_actions{'LB_NOBREAK'};
$lb_table[$lb_enums{'E_Base'}][$lb_enums{'Postfix_Numeric'}]
= $lb_actions{'LB_NOBREAK'};
$lb_table[$lb_enums{'E_Modifier'}][$lb_enums{'Postfix_Numeric'}]
@@ -2462,7 +2467,7 @@ my @props;
push @props, sort { prop_name_for_cmp($a) cmp prop_name_for_cmp($b) } qw(
&UpperLatin1
_Perl_GCB,EDGE,E_Base,E_Base_GAZ,E_Modifier,Glue_After_Zwj,LV,Prepend,Regional_Indicator,SpacingMark,ZWJ,ExtPict_XX
- _Perl_LB,EDGE,Close_Parenthesis,Hebrew_Letter,Next_Line,Regional_Indicator,ZWJ,Contingent_Break,E_Base,E_Modifier,H2,H3,JL,JT,JV,Word_Joiner,East_Asian_CP,East_Asian_OP
+ _Perl_LB,EDGE,Close_Parenthesis,Hebrew_Letter,Next_Line,Regional_Indicator,ZWJ,Contingent_Break,E_Base,E_Modifier,H2,H3,JL,JT,JV,Word_Joiner,East_Asian_CP,East_Asian_OP,Unassigned_Extended_Pictographic_Ideographic
_Perl_SB,EDGE,SContinue,CR,Extend,LF
_Perl_WB,Perl_Tailored_HSpace,EDGE,UNKNOWN,CR,Double_Quote,E_Base,E_Base_GAZ,E_Modifier,Extend,Glue_After_Zwj,Hebrew_Letter,LF,MidNumLet,Newline,Regional_Indicator,Single_Quote,ZWJ,ExtPict_XX,ExtPict_LE
_Perl_SCX,Latin,Inherited,Unknown,Kore,Jpan,Hanb,INVALID
diff --git a/uni_keywords.h b/uni_keywords.h
index 862079859b..4141c56cc5 100644
--- a/uni_keywords.h
+++ b/uni_keywords.h
@@ -7542,10 +7542,10 @@ MPH_VALt match_uniprop( const unsigned char * const key, const U16 key_len ) {
* baba9dfc133e3cb770a89aaf0973b1341fa61c2da6c176baf6428898b3b568d8 lib/unicore/extracted/DLineBreak.txt
* 6d4a8c945dd7db83ed617cbb7d937de7f4ecf016ff22970d846e996a7c9a2a5d lib/unicore/extracted/DNumType.txt
* 5b7c14380d5cceeaffcfbc18db1ed936391d2af2d51f5a41f1a17b692c77e59b lib/unicore/extracted/DNumValues.txt
- * b74c07296be6f14f3b99c92090a9a190188ca6f6afcc46e5d000f1529922120a lib/unicore/mktables
+ * 4e169849b96b76987a8fc443ef421b44d2dcebbd981bb457f6a13e8af77cdbe2 lib/unicore/mktables
* 50b85a67451145545a65cea370dab8d3444fbfe07e9c34cef560c5b7da9d3eef lib/unicore/version
* 0a6b5ab33bb1026531f816efe81aea1a8ffcd34a27cbea37dd6a70a63d73c844 regen/charset_translations.pl
* 5f8520d3a17ade6317fc0c423f5091470924b1ef425bca0c41ce8e4a9f8460fe regen/mk_PL_charclass.pl
- * 5eb9e6c825496cc9aa705e3cd33bc6d5a9657dcca16d4c4acc4824ff30b34a26 regen/mk_invlists.pl
+ * 4635ff74b13c8f059599be8d0b0e2aea19fefe6ddcbc4c7deef1a3096c91a0dd regen/mk_invlists.pl
* cf1d68efb7d919d302c4005641eae8d36da6d7850816ad374b0c00b45e609f43 regen/mph.pl
* ex: set ro: */