summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--charclass_invlists.h1183
-rw-r--r--lib/unicore/uni_keywords.pl2
-rw-r--r--regen/mk_invlists.pl190
-rw-r--r--uni_keywords.h2
4 files changed, 709 insertions, 668 deletions
diff --git a/charclass_invlists.h b/charclass_invlists.h
index d1402c357f..db5f37e8da 100644
--- a/charclass_invlists.h
+++ b/charclass_invlists.h
@@ -20359,19 +20359,19 @@ typedef enum {
GCB_Other = 0,
GCB_Control = 1,
GCB_CR = 2,
- GCB_Extend = 3,
- GCB_L = 4,
- GCB_LF = 5,
- GCB_LV = 6,
- GCB_LVT = 7,
- GCB_Prepend = 8,
- GCB_Regional_Indicator = 9,
- GCB_SpacingMark = 10,
- GCB_T = 11,
- GCB_V = 12,
- GCB_XPG_XX = 13,
- GCB_ZWJ = 14,
- GCB_EDGE = 15,
+ GCB_EDGE = 3,
+ GCB_Extend = 4,
+ GCB_L = 5,
+ GCB_LF = 6,
+ GCB_LV = 7,
+ GCB_LVT = 8,
+ GCB_Prepend = 9,
+ GCB_Regional_Indicator = 10,
+ GCB_SpacingMark = 11,
+ GCB_T = 12,
+ GCB_V = 13,
+ GCB_XPG_XX = 14,
+ GCB_ZWJ = 15,
GCB_E_Base = 16,
GCB_E_Base_GAZ = 16,
GCB_E_Modifier = 16,
@@ -23969,19 +23969,19 @@ typedef enum {
GCB_Other = 0,
GCB_Control = 1,
GCB_CR = 2,
- GCB_Extend = 3,
- GCB_L = 4,
- GCB_LF = 5,
- GCB_LV = 6,
- GCB_LVT = 7,
- GCB_Prepend = 8,
- GCB_Regional_Indicator = 9,
- GCB_SpacingMark = 10,
- GCB_T = 11,
- GCB_V = 12,
- GCB_XPG_XX = 13,
- GCB_ZWJ = 14,
- GCB_EDGE = 15,
+ GCB_EDGE = 3,
+ GCB_Extend = 4,
+ GCB_L = 5,
+ GCB_LF = 6,
+ GCB_LV = 7,
+ GCB_LVT = 8,
+ GCB_Prepend = 9,
+ GCB_Regional_Indicator = 10,
+ GCB_SpacingMark = 11,
+ GCB_T = 12,
+ GCB_V = 13,
+ GCB_XPG_XX = 14,
+ GCB_ZWJ = 15,
GCB_E_Base = 16,
GCB_E_Base_GAZ = 16,
GCB_E_Modifier = 16,
@@ -27580,19 +27580,19 @@ typedef enum {
GCB_Other = 0,
GCB_Control = 1,
GCB_CR = 2,
- GCB_Extend = 3,
- GCB_L = 4,
- GCB_LF = 5,
- GCB_LV = 6,
- GCB_LVT = 7,
- GCB_Prepend = 8,
- GCB_Regional_Indicator = 9,
- GCB_SpacingMark = 10,
- GCB_T = 11,
- GCB_V = 12,
- GCB_XPG_XX = 13,
- GCB_ZWJ = 14,
- GCB_EDGE = 15,
+ GCB_EDGE = 3,
+ GCB_Extend = 4,
+ GCB_L = 5,
+ GCB_LF = 6,
+ GCB_LV = 7,
+ GCB_LVT = 8,
+ GCB_Prepend = 9,
+ GCB_Regional_Indicator = 10,
+ GCB_SpacingMark = 11,
+ GCB_T = 12,
+ GCB_V = 13,
+ GCB_XPG_XX = 14,
+ GCB_ZWJ = 15,
GCB_E_Base = 16,
GCB_E_Base_GAZ = 16,
GCB_E_Modifier = 16,
@@ -40519,31 +40519,31 @@ static const UV _Perl_LB_invlist[] = { /* for ASCII/Latin1 */
typedef enum {
LB_Alphabetic = 0,
- LB_Break_After = 1,
- LB_Break_Before = 2,
- LB_Break_Both = 3,
- LB_Break_Symbols = 4,
- LB_Carriage_Return = 5,
- LB_Close_Parenthesis = 6,
- LB_Close_Punctuation = 7,
- LB_Combining_Mark = 8,
- LB_Contingent_Break = 9,
+ LB_Break_Both = 1,
+ LB_Break_After = 2,
+ LB_Break_Before = 3,
+ LB_Mandatory_Break = 4,
+ LB_Contingent_Break = 5,
+ LB_Close_Punctuation = 6,
+ LB_Combining_Mark = 7,
+ LB_Close_Parenthesis = 8,
+ LB_Carriage_Return = 9,
LB_E_Base = 10,
- LB_E_Modifier = 11,
- LB_Exclamation = 12,
- LB_Glue = 13,
- LB_H2 = 14,
- LB_H3 = 15,
- LB_Hebrew_Letter = 16,
- LB_Hyphen = 17,
- LB_Ideographic = 18,
- LB_Infix_Numeric = 19,
+ LB_EDGE = 11,
+ LB_E_Modifier = 12,
+ LB_Exclamation = 13,
+ LB_Glue = 14,
+ LB_H2 = 15,
+ LB_H3 = 16,
+ LB_Hebrew_Letter = 17,
+ LB_Hyphen = 18,
+ LB_Ideographic = 19,
LB_Inseparable = 20,
- LB_JL = 21,
- LB_JT = 22,
- LB_JV = 23,
- LB_Line_Feed = 24,
- LB_Mandatory_Break = 25,
+ LB_Infix_Numeric = 21,
+ LB_JL = 22,
+ LB_JT = 23,
+ LB_JV = 24,
+ LB_Line_Feed = 25,
LB_Next_Line = 26,
LB_Nonstarter = 27,
LB_Numeric = 28,
@@ -40553,10 +40553,10 @@ typedef enum {
LB_Quotation = 32,
LB_Regional_Indicator = 33,
LB_Space = 34,
- LB_Word_Joiner = 35,
- LB_ZWJ = 36,
+ LB_Break_Symbols = 35,
+ LB_Word_Joiner = 36,
LB_ZWSpace = 37,
- LB_EDGE = 38
+ LB_ZWJ = 38
} LB_enum;
static const LB_enum _Perl_LB_invmap[] = { /* for ASCII/Latin1 */
@@ -45381,31 +45381,31 @@ static const UV _Perl_LB_invlist[] = { /* for EBCDIC 1047 */
typedef enum {
LB_Alphabetic = 0,
- LB_Break_After = 1,
- LB_Break_Before = 2,
- LB_Break_Both = 3,
- LB_Break_Symbols = 4,
- LB_Carriage_Return = 5,
- LB_Close_Parenthesis = 6,
- LB_Close_Punctuation = 7,
- LB_Combining_Mark = 8,
- LB_Contingent_Break = 9,
+ LB_Break_Both = 1,
+ LB_Break_After = 2,
+ LB_Break_Before = 3,
+ LB_Mandatory_Break = 4,
+ LB_Contingent_Break = 5,
+ LB_Close_Punctuation = 6,
+ LB_Combining_Mark = 7,
+ LB_Close_Parenthesis = 8,
+ LB_Carriage_Return = 9,
LB_E_Base = 10,
- LB_E_Modifier = 11,
- LB_Exclamation = 12,
- LB_Glue = 13,
- LB_H2 = 14,
- LB_H3 = 15,
- LB_Hebrew_Letter = 16,
- LB_Hyphen = 17,
- LB_Ideographic = 18,
- LB_Infix_Numeric = 19,
+ LB_EDGE = 11,
+ LB_E_Modifier = 12,
+ LB_Exclamation = 13,
+ LB_Glue = 14,
+ LB_H2 = 15,
+ LB_H3 = 16,
+ LB_Hebrew_Letter = 17,
+ LB_Hyphen = 18,
+ LB_Ideographic = 19,
LB_Inseparable = 20,
- LB_JL = 21,
- LB_JT = 22,
- LB_JV = 23,
- LB_Line_Feed = 24,
- LB_Mandatory_Break = 25,
+ LB_Infix_Numeric = 21,
+ LB_JL = 22,
+ LB_JT = 23,
+ LB_JV = 24,
+ LB_Line_Feed = 25,
LB_Next_Line = 26,
LB_Nonstarter = 27,
LB_Numeric = 28,
@@ -45415,10 +45415,10 @@ typedef enum {
LB_Quotation = 32,
LB_Regional_Indicator = 33,
LB_Space = 34,
- LB_Word_Joiner = 35,
- LB_ZWJ = 36,
+ LB_Break_Symbols = 35,
+ LB_Word_Joiner = 36,
LB_ZWSpace = 37,
- LB_EDGE = 38
+ LB_ZWJ = 38
} LB_enum;
static const LB_enum _Perl_LB_invmap[] = { /* for EBCDIC 1047 */
@@ -50255,31 +50255,31 @@ static const UV _Perl_LB_invlist[] = { /* for EBCDIC 037 */
typedef enum {
LB_Alphabetic = 0,
- LB_Break_After = 1,
- LB_Break_Before = 2,
- LB_Break_Both = 3,
- LB_Break_Symbols = 4,
- LB_Carriage_Return = 5,
- LB_Close_Parenthesis = 6,
- LB_Close_Punctuation = 7,
- LB_Combining_Mark = 8,
- LB_Contingent_Break = 9,
+ LB_Break_Both = 1,
+ LB_Break_After = 2,
+ LB_Break_Before = 3,
+ LB_Mandatory_Break = 4,
+ LB_Contingent_Break = 5,
+ LB_Close_Punctuation = 6,
+ LB_Combining_Mark = 7,
+ LB_Close_Parenthesis = 8,
+ LB_Carriage_Return = 9,
LB_E_Base = 10,
- LB_E_Modifier = 11,
- LB_Exclamation = 12,
- LB_Glue = 13,
- LB_H2 = 14,
- LB_H3 = 15,
- LB_Hebrew_Letter = 16,
- LB_Hyphen = 17,
- LB_Ideographic = 18,
- LB_Infix_Numeric = 19,
+ LB_EDGE = 11,
+ LB_E_Modifier = 12,
+ LB_Exclamation = 13,
+ LB_Glue = 14,
+ LB_H2 = 15,
+ LB_H3 = 16,
+ LB_Hebrew_Letter = 17,
+ LB_Hyphen = 18,
+ LB_Ideographic = 19,
LB_Inseparable = 20,
- LB_JL = 21,
- LB_JT = 22,
- LB_JV = 23,
- LB_Line_Feed = 24,
- LB_Mandatory_Break = 25,
+ LB_Infix_Numeric = 21,
+ LB_JL = 22,
+ LB_JT = 23,
+ LB_JV = 24,
+ LB_Line_Feed = 25,
LB_Next_Line = 26,
LB_Nonstarter = 27,
LB_Numeric = 28,
@@ -50289,10 +50289,10 @@ typedef enum {
LB_Quotation = 32,
LB_Regional_Indicator = 33,
LB_Space = 34,
- LB_Word_Joiner = 35,
- LB_ZWJ = 36,
+ LB_Break_Symbols = 35,
+ LB_Word_Joiner = 36,
LB_ZWSpace = 37,
- LB_EDGE = 38
+ LB_ZWJ = 38
} LB_enum;
static const LB_enum _Perl_LB_invmap[] = { /* for EBCDIC 037 */
@@ -55823,18 +55823,18 @@ typedef enum {
SB_ATerm = 1,
SB_Close = 2,
SB_CR = 3,
- SB_Extend = 4,
- SB_Format = 5,
- SB_LF = 6,
- SB_Lower = 7,
- SB_Numeric = 8,
- SB_OLetter = 9,
- SB_SContinue = 10,
- SB_Sep = 11,
- SB_Sp = 12,
- SB_STerm = 13,
- SB_Upper = 14,
- SB_EDGE = 15
+ SB_EDGE = 4,
+ SB_Extend = 5,
+ SB_Format = 6,
+ SB_LF = 7,
+ SB_Lower = 8,
+ SB_Numeric = 9,
+ SB_OLetter = 10,
+ SB_SContinue = 11,
+ SB_Sep = 12,
+ SB_Sp = 13,
+ SB_STerm = 14,
+ SB_Upper = 15
} SB_enum;
static const SB_enum _Perl_SB_invmap[] = { /* for ASCII/Latin1 */
@@ -62092,18 +62092,18 @@ typedef enum {
SB_ATerm = 1,
SB_Close = 2,
SB_CR = 3,
- SB_Extend = 4,
- SB_Format = 5,
- SB_LF = 6,
- SB_Lower = 7,
- SB_Numeric = 8,
- SB_OLetter = 9,
- SB_SContinue = 10,
- SB_Sep = 11,
- SB_Sp = 12,
- SB_STerm = 13,
- SB_Upper = 14,
- SB_EDGE = 15
+ SB_EDGE = 4,
+ SB_Extend = 5,
+ SB_Format = 6,
+ SB_LF = 7,
+ SB_Lower = 8,
+ SB_Numeric = 9,
+ SB_OLetter = 10,
+ SB_SContinue = 11,
+ SB_Sep = 12,
+ SB_Sp = 13,
+ SB_STerm = 14,
+ SB_Upper = 15
} SB_enum;
static const SB_enum _Perl_SB_invmap[] = { /* for EBCDIC 1047 */
@@ -68381,18 +68381,18 @@ typedef enum {
SB_ATerm = 1,
SB_Close = 2,
SB_CR = 3,
- SB_Extend = 4,
- SB_Format = 5,
- SB_LF = 6,
- SB_Lower = 7,
- SB_Numeric = 8,
- SB_OLetter = 9,
- SB_SContinue = 10,
- SB_Sep = 11,
- SB_Sp = 12,
- SB_STerm = 13,
- SB_Upper = 14,
- SB_EDGE = 15
+ SB_EDGE = 4,
+ SB_Extend = 5,
+ SB_Format = 6,
+ SB_LF = 7,
+ SB_Lower = 8,
+ SB_Numeric = 9,
+ SB_OLetter = 10,
+ SB_SContinue = 11,
+ SB_Sep = 12,
+ SB_Sp = 13,
+ SB_STerm = 14,
+ SB_Upper = 15
} SB_enum;
static const SB_enum _Perl_SB_invmap[] = { /* for EBCDIC 037 */
@@ -73242,106 +73242,106 @@ typedef enum {
SCX_Inherited = 54,
SCX_Inscriptional_Pahlavi = 55,
SCX_Inscriptional_Parthian = 56,
- SCX_Javanese = 57,
- SCX_Jpan = 58,
- SCX_Kaithi = 59,
- SCX_Kannada = 60,
- SCX_Katakana = 61,
- SCX_Kayah_Li = 62,
- SCX_Kharoshthi = 63,
- SCX_Khmer = 64,
- SCX_Khojki = 65,
- SCX_Khudawadi = 66,
- SCX_Kore = 67,
- SCX_Lao = 68,
- SCX_Latin = 69,
- SCX_Lepcha = 70,
- SCX_Limbu = 71,
- SCX_Linear_A = 72,
- SCX_Linear_B = 73,
- SCX_Lisu = 74,
- SCX_Lycian = 75,
- SCX_Lydian = 76,
- SCX_Mahajani = 77,
- SCX_Makasar = 78,
- SCX_Malayalam = 79,
- SCX_Mandaic = 80,
- SCX_Manichaean = 81,
- SCX_Marchen = 82,
- SCX_Masaram_Gondi = 83,
- SCX_Medefaidrin = 84,
- SCX_Meetei_Mayek = 85,
- SCX_Mende_Kikakui = 86,
- SCX_Meroitic_Cursive = 87,
- SCX_Meroitic_Hieroglyphs = 88,
- SCX_Miao = 89,
- SCX_Modi = 90,
- SCX_Mongolian = 91,
- SCX_Mro = 92,
- SCX_Multani = 93,
- SCX_Myanmar = 94,
- SCX_Nabataean = 95,
- SCX_Nandinagari = 96,
- SCX_New_Tai_Lue = 97,
- SCX_Newa = 98,
- SCX_Nko = 99,
- SCX_Nushu = 100,
- SCX_Nyiakeng_Puachue_Hmong = 101,
- SCX_Ogham = 102,
- SCX_Ol_Chiki = 103,
- SCX_Old_Hungarian = 104,
- SCX_Old_Italic = 105,
- SCX_Old_North_Arabian = 106,
- SCX_Old_Permic = 107,
- SCX_Old_Persian = 108,
- SCX_Old_Sogdian = 109,
- SCX_Old_South_Arabian = 110,
- SCX_Old_Turkic = 111,
- SCX_Oriya = 112,
- SCX_Osage = 113,
- SCX_Osmanya = 114,
- SCX_Pahawh_Hmong = 115,
- SCX_Palmyrene = 116,
- SCX_Pau_Cin_Hau = 117,
- SCX_Phags_Pa = 118,
- SCX_Phoenician = 119,
- SCX_Psalter_Pahlavi = 120,
- SCX_Rejang = 121,
- SCX_Runic = 122,
- SCX_Samaritan = 123,
- SCX_Saurashtra = 124,
- SCX_Sharada = 125,
- SCX_Shavian = 126,
- SCX_Siddham = 127,
- SCX_SignWriting = 128,
- SCX_Sinhala = 129,
- SCX_Sogdian = 130,
- SCX_Sora_Sompeng = 131,
- SCX_Soyombo = 132,
- SCX_Sundanese = 133,
- SCX_Syloti_Nagri = 134,
- SCX_Syriac = 135,
- SCX_Tagalog = 136,
- SCX_Tagbanwa = 137,
- SCX_Tai_Le = 138,
- SCX_Tai_Tham = 139,
- SCX_Tai_Viet = 140,
- SCX_Takri = 141,
- SCX_Tamil = 142,
- SCX_Tangut = 143,
- SCX_Telugu = 144,
- SCX_Thaana = 145,
- SCX_Thai = 146,
- SCX_Tibetan = 147,
- SCX_Tifinagh = 148,
- SCX_Tirhuta = 149,
- SCX_Ugaritic = 150,
- SCX_Vai = 151,
- SCX_Wancho = 152,
- SCX_Warang_Citi = 153,
- SCX_Yi = 154,
- SCX_Zanabazar_Square = 155,
- SCX_INVALID = 156,
+ SCX_INVALID = 57,
+ SCX_Javanese = 58,
+ SCX_Jpan = 59,
+ SCX_Kaithi = 60,
+ SCX_Kannada = 61,
+ SCX_Katakana = 62,
+ SCX_Kayah_Li = 63,
+ SCX_Kharoshthi = 64,
+ SCX_Khmer = 65,
+ SCX_Khojki = 66,
+ SCX_Khudawadi = 67,
+ SCX_Kore = 68,
+ SCX_Lao = 69,
+ SCX_Latin = 70,
+ SCX_Lepcha = 71,
+ SCX_Limbu = 72,
+ SCX_Linear_A = 73,
+ SCX_Linear_B = 74,
+ SCX_Lisu = 75,
+ SCX_Lycian = 76,
+ SCX_Lydian = 77,
+ SCX_Mahajani = 78,
+ SCX_Makasar = 79,
+ SCX_Malayalam = 80,
+ SCX_Mandaic = 81,
+ SCX_Manichaean = 82,
+ SCX_Marchen = 83,
+ SCX_Masaram_Gondi = 84,
+ SCX_Medefaidrin = 85,
+ SCX_Meetei_Mayek = 86,
+ SCX_Mende_Kikakui = 87,
+ SCX_Meroitic_Cursive = 88,
+ SCX_Meroitic_Hieroglyphs = 89,
+ SCX_Miao = 90,
+ SCX_Modi = 91,
+ SCX_Mongolian = 92,
+ SCX_Mro = 93,
+ SCX_Multani = 94,
+ SCX_Myanmar = 95,
+ SCX_Nabataean = 96,
+ SCX_Nandinagari = 97,
+ SCX_New_Tai_Lue = 98,
+ SCX_Newa = 99,
+ SCX_Nko = 100,
+ SCX_Nushu = 101,
+ SCX_Nyiakeng_Puachue_Hmong = 102,
+ SCX_Ogham = 103,
+ SCX_Ol_Chiki = 104,
+ SCX_Old_Hungarian = 105,
+ SCX_Old_Italic = 106,
+ SCX_Old_North_Arabian = 107,
+ SCX_Old_Permic = 108,
+ SCX_Old_Persian = 109,
+ SCX_Old_Sogdian = 110,
+ SCX_Old_South_Arabian = 111,
+ SCX_Old_Turkic = 112,
+ SCX_Oriya = 113,
+ SCX_Osage = 114,
+ SCX_Osmanya = 115,
+ SCX_Pahawh_Hmong = 116,
+ SCX_Palmyrene = 117,
+ SCX_Pau_Cin_Hau = 118,
+ SCX_Phags_Pa = 119,
+ SCX_Phoenician = 120,
+ SCX_Psalter_Pahlavi = 121,
+ SCX_Rejang = 122,
+ SCX_Runic = 123,
+ SCX_Samaritan = 124,
+ SCX_Saurashtra = 125,
+ SCX_Sharada = 126,
+ SCX_Shavian = 127,
+ SCX_Siddham = 128,
+ SCX_SignWriting = 129,
+ SCX_Sinhala = 130,
+ SCX_Sogdian = 131,
+ SCX_Sora_Sompeng = 132,
+ SCX_Soyombo = 133,
+ SCX_Sundanese = 134,
+ SCX_Syloti_Nagri = 135,
+ SCX_Syriac = 136,
+ SCX_Tagalog = 137,
+ SCX_Tagbanwa = 138,
+ SCX_Tai_Le = 139,
+ SCX_Tai_Tham = 140,
+ SCX_Tai_Viet = 141,
+ SCX_Takri = 142,
+ SCX_Tamil = 143,
+ SCX_Tangut = 144,
+ SCX_Telugu = 145,
+ SCX_Thaana = 146,
+ SCX_Thai = 147,
+ SCX_Tibetan = 148,
+ SCX_Tifinagh = 149,
+ SCX_Tirhuta = 150,
+ SCX_Ugaritic = 151,
+ SCX_Vai = 152,
+ SCX_Wancho = 153,
+ SCX_Warang_Citi = 154,
+ SCX_Yi = 155,
+ SCX_Zanabazar_Square = 156,
SCX_use_AUX_TABLE_1 = -1,
SCX_use_AUX_TABLE_2 = -2,
SCX_use_AUX_TABLE_3 = -3,
@@ -73993,6 +73993,7 @@ static const UV script_zeros[] = {
0, /* Inherited */
'0', /* Inscriptional_Pahlavi */
'0', /* Inscriptional_Parthian */
+ '0', /* INVALID */
0xa9d0, /* Javanese */
'0', /* Jpan */
0x966, /* Kaithi */
@@ -77498,106 +77499,106 @@ typedef enum {
SCX_Inherited = 54,
SCX_Inscriptional_Pahlavi = 55,
SCX_Inscriptional_Parthian = 56,
- SCX_Javanese = 57,
- SCX_Jpan = 58,
- SCX_Kaithi = 59,
- SCX_Kannada = 60,
- SCX_Katakana = 61,
- SCX_Kayah_Li = 62,
- SCX_Kharoshthi = 63,
- SCX_Khmer = 64,
- SCX_Khojki = 65,
- SCX_Khudawadi = 66,
- SCX_Kore = 67,
- SCX_Lao = 68,
- SCX_Latin = 69,
- SCX_Lepcha = 70,
- SCX_Limbu = 71,
- SCX_Linear_A = 72,
- SCX_Linear_B = 73,
- SCX_Lisu = 74,
- SCX_Lycian = 75,
- SCX_Lydian = 76,
- SCX_Mahajani = 77,
- SCX_Makasar = 78,
- SCX_Malayalam = 79,
- SCX_Mandaic = 80,
- SCX_Manichaean = 81,
- SCX_Marchen = 82,
- SCX_Masaram_Gondi = 83,
- SCX_Medefaidrin = 84,
- SCX_Meetei_Mayek = 85,
- SCX_Mende_Kikakui = 86,
- SCX_Meroitic_Cursive = 87,
- SCX_Meroitic_Hieroglyphs = 88,
- SCX_Miao = 89,
- SCX_Modi = 90,
- SCX_Mongolian = 91,
- SCX_Mro = 92,
- SCX_Multani = 93,
- SCX_Myanmar = 94,
- SCX_Nabataean = 95,
- SCX_Nandinagari = 96,
- SCX_New_Tai_Lue = 97,
- SCX_Newa = 98,
- SCX_Nko = 99,
- SCX_Nushu = 100,
- SCX_Nyiakeng_Puachue_Hmong = 101,
- SCX_Ogham = 102,
- SCX_Ol_Chiki = 103,
- SCX_Old_Hungarian = 104,
- SCX_Old_Italic = 105,
- SCX_Old_North_Arabian = 106,
- SCX_Old_Permic = 107,
- SCX_Old_Persian = 108,
- SCX_Old_Sogdian = 109,
- SCX_Old_South_Arabian = 110,
- SCX_Old_Turkic = 111,
- SCX_Oriya = 112,
- SCX_Osage = 113,
- SCX_Osmanya = 114,
- SCX_Pahawh_Hmong = 115,
- SCX_Palmyrene = 116,
- SCX_Pau_Cin_Hau = 117,
- SCX_Phags_Pa = 118,
- SCX_Phoenician = 119,
- SCX_Psalter_Pahlavi = 120,
- SCX_Rejang = 121,
- SCX_Runic = 122,
- SCX_Samaritan = 123,
- SCX_Saurashtra = 124,
- SCX_Sharada = 125,
- SCX_Shavian = 126,
- SCX_Siddham = 127,
- SCX_SignWriting = 128,
- SCX_Sinhala = 129,
- SCX_Sogdian = 130,
- SCX_Sora_Sompeng = 131,
- SCX_Soyombo = 132,
- SCX_Sundanese = 133,
- SCX_Syloti_Nagri = 134,
- SCX_Syriac = 135,
- SCX_Tagalog = 136,
- SCX_Tagbanwa = 137,
- SCX_Tai_Le = 138,
- SCX_Tai_Tham = 139,
- SCX_Tai_Viet = 140,
- SCX_Takri = 141,
- SCX_Tamil = 142,
- SCX_Tangut = 143,
- SCX_Telugu = 144,
- SCX_Thaana = 145,
- SCX_Thai = 146,
- SCX_Tibetan = 147,
- SCX_Tifinagh = 148,
- SCX_Tirhuta = 149,
- SCX_Ugaritic = 150,
- SCX_Vai = 151,
- SCX_Wancho = 152,
- SCX_Warang_Citi = 153,
- SCX_Yi = 154,
- SCX_Zanabazar_Square = 155,
- SCX_INVALID = 156,
+ SCX_INVALID = 57,
+ SCX_Javanese = 58,
+ SCX_Jpan = 59,
+ SCX_Kaithi = 60,
+ SCX_Kannada = 61,
+ SCX_Katakana = 62,
+ SCX_Kayah_Li = 63,
+ SCX_Kharoshthi = 64,
+ SCX_Khmer = 65,
+ SCX_Khojki = 66,
+ SCX_Khudawadi = 67,
+ SCX_Kore = 68,
+ SCX_Lao = 69,
+ SCX_Latin = 70,
+ SCX_Lepcha = 71,
+ SCX_Limbu = 72,
+ SCX_Linear_A = 73,
+ SCX_Linear_B = 74,
+ SCX_Lisu = 75,
+ SCX_Lycian = 76,
+ SCX_Lydian = 77,
+ SCX_Mahajani = 78,
+ SCX_Makasar = 79,
+ SCX_Malayalam = 80,
+ SCX_Mandaic = 81,
+ SCX_Manichaean = 82,
+ SCX_Marchen = 83,
+ SCX_Masaram_Gondi = 84,
+ SCX_Medefaidrin = 85,
+ SCX_Meetei_Mayek = 86,
+ SCX_Mende_Kikakui = 87,
+ SCX_Meroitic_Cursive = 88,
+ SCX_Meroitic_Hieroglyphs = 89,
+ SCX_Miao = 90,
+ SCX_Modi = 91,
+ SCX_Mongolian = 92,
+ SCX_Mro = 93,
+ SCX_Multani = 94,
+ SCX_Myanmar = 95,
+ SCX_Nabataean = 96,
+ SCX_Nandinagari = 97,
+ SCX_New_Tai_Lue = 98,
+ SCX_Newa = 99,
+ SCX_Nko = 100,
+ SCX_Nushu = 101,
+ SCX_Nyiakeng_Puachue_Hmong = 102,
+ SCX_Ogham = 103,
+ SCX_Ol_Chiki = 104,
+ SCX_Old_Hungarian = 105,
+ SCX_Old_Italic = 106,
+ SCX_Old_North_Arabian = 107,
+ SCX_Old_Permic = 108,
+ SCX_Old_Persian = 109,
+ SCX_Old_Sogdian = 110,
+ SCX_Old_South_Arabian = 111,
+ SCX_Old_Turkic = 112,
+ SCX_Oriya = 113,
+ SCX_Osage = 114,
+ SCX_Osmanya = 115,
+ SCX_Pahawh_Hmong = 116,
+ SCX_Palmyrene = 117,
+ SCX_Pau_Cin_Hau = 118,
+ SCX_Phags_Pa = 119,
+ SCX_Phoenician = 120,
+ SCX_Psalter_Pahlavi = 121,
+ SCX_Rejang = 122,
+ SCX_Runic = 123,
+ SCX_Samaritan = 124,
+ SCX_Saurashtra = 125,
+ SCX_Sharada = 126,
+ SCX_Shavian = 127,
+ SCX_Siddham = 128,
+ SCX_SignWriting = 129,
+ SCX_Sinhala = 130,
+ SCX_Sogdian = 131,
+ SCX_Sora_Sompeng = 132,
+ SCX_Soyombo = 133,
+ SCX_Sundanese = 134,
+ SCX_Syloti_Nagri = 135,
+ SCX_Syriac = 136,
+ SCX_Tagalog = 137,
+ SCX_Tagbanwa = 138,
+ SCX_Tai_Le = 139,
+ SCX_Tai_Tham = 140,
+ SCX_Tai_Viet = 141,
+ SCX_Takri = 142,
+ SCX_Tamil = 143,
+ SCX_Tangut = 144,
+ SCX_Telugu = 145,
+ SCX_Thaana = 146,
+ SCX_Thai = 147,
+ SCX_Tibetan = 148,
+ SCX_Tifinagh = 149,
+ SCX_Tirhuta = 150,
+ SCX_Ugaritic = 151,
+ SCX_Vai = 152,
+ SCX_Wancho = 153,
+ SCX_Warang_Citi = 154,
+ SCX_Yi = 155,
+ SCX_Zanabazar_Square = 156,
SCX_use_AUX_TABLE_1 = -1,
SCX_use_AUX_TABLE_2 = -2,
SCX_use_AUX_TABLE_3 = -3,
@@ -78252,6 +78253,7 @@ static const UV script_zeros[] = {
0, /* Inherited */
'0', /* Inscriptional_Pahlavi */
'0', /* Inscriptional_Parthian */
+ '0', /* INVALID */
0xa9d0, /* Javanese */
'0', /* Jpan */
0x966, /* Kaithi */
@@ -81779,106 +81781,106 @@ typedef enum {
SCX_Inherited = 54,
SCX_Inscriptional_Pahlavi = 55,
SCX_Inscriptional_Parthian = 56,
- SCX_Javanese = 57,
- SCX_Jpan = 58,
- SCX_Kaithi = 59,
- SCX_Kannada = 60,
- SCX_Katakana = 61,
- SCX_Kayah_Li = 62,
- SCX_Kharoshthi = 63,
- SCX_Khmer = 64,
- SCX_Khojki = 65,
- SCX_Khudawadi = 66,
- SCX_Kore = 67,
- SCX_Lao = 68,
- SCX_Latin = 69,
- SCX_Lepcha = 70,
- SCX_Limbu = 71,
- SCX_Linear_A = 72,
- SCX_Linear_B = 73,
- SCX_Lisu = 74,
- SCX_Lycian = 75,
- SCX_Lydian = 76,
- SCX_Mahajani = 77,
- SCX_Makasar = 78,
- SCX_Malayalam = 79,
- SCX_Mandaic = 80,
- SCX_Manichaean = 81,
- SCX_Marchen = 82,
- SCX_Masaram_Gondi = 83,
- SCX_Medefaidrin = 84,
- SCX_Meetei_Mayek = 85,
- SCX_Mende_Kikakui = 86,
- SCX_Meroitic_Cursive = 87,
- SCX_Meroitic_Hieroglyphs = 88,
- SCX_Miao = 89,
- SCX_Modi = 90,
- SCX_Mongolian = 91,
- SCX_Mro = 92,
- SCX_Multani = 93,
- SCX_Myanmar = 94,
- SCX_Nabataean = 95,
- SCX_Nandinagari = 96,
- SCX_New_Tai_Lue = 97,
- SCX_Newa = 98,
- SCX_Nko = 99,
- SCX_Nushu = 100,
- SCX_Nyiakeng_Puachue_Hmong = 101,
- SCX_Ogham = 102,
- SCX_Ol_Chiki = 103,
- SCX_Old_Hungarian = 104,
- SCX_Old_Italic = 105,
- SCX_Old_North_Arabian = 106,
- SCX_Old_Permic = 107,
- SCX_Old_Persian = 108,
- SCX_Old_Sogdian = 109,
- SCX_Old_South_Arabian = 110,
- SCX_Old_Turkic = 111,
- SCX_Oriya = 112,
- SCX_Osage = 113,
- SCX_Osmanya = 114,
- SCX_Pahawh_Hmong = 115,
- SCX_Palmyrene = 116,
- SCX_Pau_Cin_Hau = 117,
- SCX_Phags_Pa = 118,
- SCX_Phoenician = 119,
- SCX_Psalter_Pahlavi = 120,
- SCX_Rejang = 121,
- SCX_Runic = 122,
- SCX_Samaritan = 123,
- SCX_Saurashtra = 124,
- SCX_Sharada = 125,
- SCX_Shavian = 126,
- SCX_Siddham = 127,
- SCX_SignWriting = 128,
- SCX_Sinhala = 129,
- SCX_Sogdian = 130,
- SCX_Sora_Sompeng = 131,
- SCX_Soyombo = 132,
- SCX_Sundanese = 133,
- SCX_Syloti_Nagri = 134,
- SCX_Syriac = 135,
- SCX_Tagalog = 136,
- SCX_Tagbanwa = 137,
- SCX_Tai_Le = 138,
- SCX_Tai_Tham = 139,
- SCX_Tai_Viet = 140,
- SCX_Takri = 141,
- SCX_Tamil = 142,
- SCX_Tangut = 143,
- SCX_Telugu = 144,
- SCX_Thaana = 145,
- SCX_Thai = 146,
- SCX_Tibetan = 147,
- SCX_Tifinagh = 148,
- SCX_Tirhuta = 149,
- SCX_Ugaritic = 150,
- SCX_Vai = 151,
- SCX_Wancho = 152,
- SCX_Warang_Citi = 153,
- SCX_Yi = 154,
- SCX_Zanabazar_Square = 155,
- SCX_INVALID = 156,
+ SCX_INVALID = 57,
+ SCX_Javanese = 58,
+ SCX_Jpan = 59,
+ SCX_Kaithi = 60,
+ SCX_Kannada = 61,
+ SCX_Katakana = 62,
+ SCX_Kayah_Li = 63,
+ SCX_Kharoshthi = 64,
+ SCX_Khmer = 65,
+ SCX_Khojki = 66,
+ SCX_Khudawadi = 67,
+ SCX_Kore = 68,
+ SCX_Lao = 69,
+ SCX_Latin = 70,
+ SCX_Lepcha = 71,
+ SCX_Limbu = 72,
+ SCX_Linear_A = 73,
+ SCX_Linear_B = 74,
+ SCX_Lisu = 75,
+ SCX_Lycian = 76,
+ SCX_Lydian = 77,
+ SCX_Mahajani = 78,
+ SCX_Makasar = 79,
+ SCX_Malayalam = 80,
+ SCX_Mandaic = 81,
+ SCX_Manichaean = 82,
+ SCX_Marchen = 83,
+ SCX_Masaram_Gondi = 84,
+ SCX_Medefaidrin = 85,
+ SCX_Meetei_Mayek = 86,
+ SCX_Mende_Kikakui = 87,
+ SCX_Meroitic_Cursive = 88,
+ SCX_Meroitic_Hieroglyphs = 89,
+ SCX_Miao = 90,
+ SCX_Modi = 91,
+ SCX_Mongolian = 92,
+ SCX_Mro = 93,
+ SCX_Multani = 94,
+ SCX_Myanmar = 95,
+ SCX_Nabataean = 96,
+ SCX_Nandinagari = 97,
+ SCX_New_Tai_Lue = 98,
+ SCX_Newa = 99,
+ SCX_Nko = 100,
+ SCX_Nushu = 101,
+ SCX_Nyiakeng_Puachue_Hmong = 102,
+ SCX_Ogham = 103,
+ SCX_Ol_Chiki = 104,
+ SCX_Old_Hungarian = 105,
+ SCX_Old_Italic = 106,
+ SCX_Old_North_Arabian = 107,
+ SCX_Old_Permic = 108,
+ SCX_Old_Persian = 109,
+ SCX_Old_Sogdian = 110,
+ SCX_Old_South_Arabian = 111,
+ SCX_Old_Turkic = 112,
+ SCX_Oriya = 113,
+ SCX_Osage = 114,
+ SCX_Osmanya = 115,
+ SCX_Pahawh_Hmong = 116,
+ SCX_Palmyrene = 117,
+ SCX_Pau_Cin_Hau = 118,
+ SCX_Phags_Pa = 119,
+ SCX_Phoenician = 120,
+ SCX_Psalter_Pahlavi = 121,
+ SCX_Rejang = 122,
+ SCX_Runic = 123,
+ SCX_Samaritan = 124,
+ SCX_Saurashtra = 125,
+ SCX_Sharada = 126,
+ SCX_Shavian = 127,
+ SCX_Siddham = 128,
+ SCX_SignWriting = 129,
+ SCX_Sinhala = 130,
+ SCX_Sogdian = 131,
+ SCX_Sora_Sompeng = 132,
+ SCX_Soyombo = 133,
+ SCX_Sundanese = 134,
+ SCX_Syloti_Nagri = 135,
+ SCX_Syriac = 136,
+ SCX_Tagalog = 137,
+ SCX_Tagbanwa = 138,
+ SCX_Tai_Le = 139,
+ SCX_Tai_Tham = 140,
+ SCX_Tai_Viet = 141,
+ SCX_Takri = 142,
+ SCX_Tamil = 143,
+ SCX_Tangut = 144,
+ SCX_Telugu = 145,
+ SCX_Thaana = 146,
+ SCX_Thai = 147,
+ SCX_Tibetan = 148,
+ SCX_Tifinagh = 149,
+ SCX_Tirhuta = 150,
+ SCX_Ugaritic = 151,
+ SCX_Vai = 152,
+ SCX_Wancho = 153,
+ SCX_Warang_Citi = 154,
+ SCX_Yi = 155,
+ SCX_Zanabazar_Square = 156,
SCX_use_AUX_TABLE_1 = -1,
SCX_use_AUX_TABLE_2 = -2,
SCX_use_AUX_TABLE_3 = -3,
@@ -82533,6 +82535,7 @@ static const UV script_zeros[] = {
0, /* Inherited */
'0', /* Inscriptional_Pahlavi */
'0', /* Inscriptional_Parthian */
+ '0', /* INVALID */
0xa9d0, /* Javanese */
'0', /* Jpan */
0x966, /* Kaithi */
@@ -86174,27 +86177,27 @@ static const UV _Perl_WB_invlist[] = { /* for ASCII/Latin1 */
typedef enum {
WB_Other = 0,
- WB_ALetter = 1,
- WB_CR = 2,
- WB_Double_Quote = 3,
- WB_Extend = 4,
- WB_ExtendNumLet = 5,
+ WB_CR = 1,
+ WB_Double_Quote = 2,
+ WB_EDGE = 3,
+ WB_ExtendNumLet = 4,
+ WB_Extend = 5,
WB_Format = 6,
WB_Hebrew_Letter = 7,
- WB_Katakana = 8,
- WB_LF = 9,
- WB_MidLetter = 10,
- WB_MidNum = 11,
+ WB_Perl_Tailored_HSpace = 8,
+ WB_Katakana = 9,
+ WB_ALetter = 10,
+ WB_LF = 11,
WB_MidNumLet = 12,
- WB_Newline = 13,
- WB_Numeric = 14,
- WB_Perl_Tailored_HSpace = 15,
- WB_Regional_Indicator = 16,
- WB_Single_Quote = 17,
- WB_XPG_LE = 18,
- WB_XPG_XX = 19,
- WB_ZWJ = 20,
- WB_EDGE = 21,
+ WB_MidLetter = 13,
+ WB_MidNum = 14,
+ WB_Newline = 15,
+ WB_Numeric = 16,
+ WB_Regional_Indicator = 17,
+ WB_Single_Quote = 18,
+ WB_XPG_LE = 19,
+ WB_XPG_XX = 20,
+ WB_ZWJ = 21,
WB_E_Base = 22,
WB_E_Base_GAZ = 22,
WB_E_Modifier = 22,
@@ -89955,27 +89958,27 @@ static const UV _Perl_WB_invlist[] = { /* for EBCDIC 1047 */
typedef enum {
WB_Other = 0,
- WB_ALetter = 1,
- WB_CR = 2,
- WB_Double_Quote = 3,
- WB_Extend = 4,
- WB_ExtendNumLet = 5,
+ WB_CR = 1,
+ WB_Double_Quote = 2,
+ WB_EDGE = 3,
+ WB_ExtendNumLet = 4,
+ WB_Extend = 5,
WB_Format = 6,
WB_Hebrew_Letter = 7,
- WB_Katakana = 8,
- WB_LF = 9,
- WB_MidLetter = 10,
- WB_MidNum = 11,
+ WB_Perl_Tailored_HSpace = 8,
+ WB_Katakana = 9,
+ WB_ALetter = 10,
+ WB_LF = 11,
WB_MidNumLet = 12,
- WB_Newline = 13,
- WB_Numeric = 14,
- WB_Perl_Tailored_HSpace = 15,
- WB_Regional_Indicator = 16,
- WB_Single_Quote = 17,
- WB_XPG_LE = 18,
- WB_XPG_XX = 19,
- WB_ZWJ = 20,
- WB_EDGE = 21,
+ WB_MidLetter = 13,
+ WB_MidNum = 14,
+ WB_Newline = 15,
+ WB_Numeric = 16,
+ WB_Regional_Indicator = 17,
+ WB_Single_Quote = 18,
+ WB_XPG_LE = 19,
+ WB_XPG_XX = 20,
+ WB_ZWJ = 21,
WB_E_Base = 22,
WB_E_Base_GAZ = 22,
WB_E_Modifier = 22,
@@ -93755,27 +93758,27 @@ static const UV _Perl_WB_invlist[] = { /* for EBCDIC 037 */
typedef enum {
WB_Other = 0,
- WB_ALetter = 1,
- WB_CR = 2,
- WB_Double_Quote = 3,
- WB_Extend = 4,
- WB_ExtendNumLet = 5,
+ WB_CR = 1,
+ WB_Double_Quote = 2,
+ WB_EDGE = 3,
+ WB_ExtendNumLet = 4,
+ WB_Extend = 5,
WB_Format = 6,
WB_Hebrew_Letter = 7,
- WB_Katakana = 8,
- WB_LF = 9,
- WB_MidLetter = 10,
- WB_MidNum = 11,
+ WB_Perl_Tailored_HSpace = 8,
+ WB_Katakana = 9,
+ WB_ALetter = 10,
+ WB_LF = 11,
WB_MidNumLet = 12,
- WB_Newline = 13,
- WB_Numeric = 14,
- WB_Perl_Tailored_HSpace = 15,
- WB_Regional_Indicator = 16,
- WB_Single_Quote = 17,
- WB_XPG_LE = 18,
- WB_XPG_XX = 19,
- WB_ZWJ = 20,
- WB_EDGE = 21,
+ WB_MidLetter = 13,
+ WB_MidNum = 14,
+ WB_Newline = 15,
+ WB_Numeric = 16,
+ WB_Regional_Indicator = 17,
+ WB_Single_Quote = 18,
+ WB_XPG_LE = 19,
+ WB_XPG_XX = 20,
+ WB_ZWJ = 21,
WB_E_Base = 22,
WB_E_Base_GAZ = 22,
WB_E_Modifier = 22,
@@ -394993,24 +394996,24 @@ static const U8 GCB_table[17][17] = {
/* 'edg' stands for 'EDGE'; 'XPG' stands for 'XPG_XX'; u stands
* for 'unused in this Unicode release (and the data in its row
* and column are garbage) */
-/* XX CN CR EX L LF LV LVT PP RI SM T V XPG ZWJ edg u */
-/* XX */ { 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1 },
-/* CN */ { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 },
-/* CR */ { 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 },
-/* EX */ { 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 3 },
-/* L */ { 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 1 },
-/* LF */ { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 },
-/* LV */ { 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 1, 1 },
-/* LVT*/ { 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1 },
-/* PP */ { 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0 },
-/* RI */ { 1, 1, 1, 0, 1, 1, 1, 1, 1, 2, 0, 1, 1, 1, 0, 1, 1 },
-/* SM */ { 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1 },
-/* T */ { 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1 },
-/* V */ { 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 1, 1 },
-/* XPG*/ { 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1 },
-/* ZWJ*/ { 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 4, 0, 1, 0 },
-/* edg*/ { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1 },
-/* u */ { 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0 }
+/* XX CN CR edg EX L LF LV LVT PP RI SM T V XPG ZWJ u */
+/* XX */ { 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1 },
+/* CN */ { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 },
+/* CR */ { 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 },
+/* edg*/ { 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 },
+/* EX */ { 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 3 },
+/* L */ { 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 0, 1, 0, 1, 0, 1 },
+/* LF */ { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 },
+/* LV */ { 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 1 },
+/* LVT*/ { 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1 },
+/* PP */ { 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
+/* RI */ { 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 2, 0, 1, 1, 1, 0, 1 },
+/* SM */ { 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1 },
+/* T */ { 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1 },
+/* V */ { 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 1 },
+/* XPG*/ { 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1 },
+/* ZWJ*/ { 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 4, 0, 0 },
+/* u */ { 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0 }
};
#define LB_NOBREAK 0
@@ -395026,46 +395029,46 @@ static const U8 GCB_table[17][17] = {
static const U8 LB_table[39][39] = {
/* 'edg' stands for 'EDGE' */
-/* AL BA BB B2 SY CR CP CL CM CB EB EM EX GL H2 H3 HL HY ID IS IN JL JT JV LF BK NL NS NU OP PO PR QU RI SP WJ ZWJ ZW edg */
-/* AL */ { 0, 0, 1, 1, 2, 0, 2, 2, 0, 1, 1, 1, 2, 0, 1, 1, 0, 0, 1, 2, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1 },
-/* BA */ {14, 0,14,14, 2, 0, 2, 2, 0, 1,14,14, 2,14,14,14,14, 0,14, 2,14,14,14,14, 0, 0, 0, 0,14,14,14,14, 0,14, 0, 0, 0, 0, 1 },
-/* BB */ { 0, 0, 0, 0, 2, 0, 2, 2, 0, 1, 0, 0, 2, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 },
-/* B2 */ { 1, 0, 1, 2, 2, 0, 2, 2, 0, 1, 1, 1, 2, 0, 1, 1, 1, 0, 1, 2, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 1 },
-/* SY */ { 1, 0, 1, 1, 2, 0, 2, 2, 0, 1, 1, 1, 2, 0, 1, 1, 0, 0, 1, 2, 1, 1, 1, 1, 0, 0, 0, 0,12, 1,33,33, 0, 1, 0, 0, 0, 0, 1 },
-/* CR */ { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 },
-/* CP */ { 0, 0, 1, 1, 2, 0, 2, 2, 0, 1, 1, 1, 2, 0, 1, 1, 0, 0, 1, 2, 1, 1, 1, 1, 0, 0, 0, 2, 0, 1,33,33, 0, 1, 0, 0, 0, 0, 1 },
-/* CL */ { 1, 0, 1, 1, 2, 0, 2, 2, 0, 1, 1, 1, 2, 0, 1, 1, 1, 0, 1, 2, 1, 1, 1, 1, 0, 0, 0, 2, 1, 1,33,33, 0, 1, 0, 0, 0, 0, 1 },
-/* CM */ { 3, 3, 3, 3, 3, 0, 3, 3, 0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 0, 3, 3, 0, 1 },
-/* CB */ { 1, 1, 1, 1, 2, 0, 2, 2, 0, 1, 1, 1, 2, 0, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 1 },
-/* EB */ { 1, 0, 1, 1, 2, 0, 2, 2, 0, 1, 1, 0, 2, 0, 1, 1, 1, 0, 1, 2, 0, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 0, 0, 1 },
-/* EM */ { 1, 0, 1, 1, 2, 0, 2, 2, 0, 1, 1, 1, 2, 0, 1, 1, 1, 0, 1, 2, 0, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 0, 0, 1 },
-/* EX */ { 1, 0, 1, 1, 2, 0, 2, 2, 0, 1, 1, 1, 2, 0, 1, 1, 1, 0, 1, 2, 0, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 1 },
-/* GL */ { 0, 0, 0, 0, 2, 0, 2, 2, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 },
-/* H2 */ { 1, 0, 1, 1, 2, 0, 2, 2, 0, 1, 1, 1, 2, 0, 1, 1, 1, 0, 1, 2, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 0, 0, 1 },
-/* H3 */ { 1, 0, 1, 1, 2, 0, 2, 2, 0, 1, 1, 1, 2, 0, 1, 1, 1, 0, 1, 2, 0, 1, 0, 1, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 0, 0, 1 },
-/* HL */ { 0, 0, 1, 1, 2, 0, 2, 2, 0, 1, 1, 1, 2, 0, 1, 1, 0, 0, 1, 2, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1 },
-/* HY */ {14, 0,14,14, 2, 0, 2, 2, 0, 1,14,14, 2,14,14,14,14, 0,14, 2,14,14,14,14, 0, 0, 0, 0,13,14,14,14, 0,14, 0, 0, 0, 0, 1 },
-/* ID */ { 1, 0, 1, 1, 2, 0, 2, 2, 0, 1, 1, 1, 2, 0, 1, 1, 1, 0, 1, 2, 0, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 0, 0, 1 },
-/* IS */ { 0, 0, 1, 1, 2, 0, 2, 2, 0, 1, 1, 1, 2, 0, 1, 1, 0, 0, 1, 2, 1, 1, 1, 1, 0, 0, 0, 0,12, 1,33,33, 0, 1, 0, 0, 0, 0, 1 },
-/* IN */ { 1, 0, 1, 1, 2, 0, 2, 2, 0, 1, 1, 1, 2, 0, 1, 1, 1, 0, 1, 2, 0, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 1 },
-/* JL */ { 1, 0, 1, 1, 2, 0, 2, 2, 0, 1, 1, 1, 2, 0, 0, 0, 1, 0, 1, 2, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 0, 0, 1 },
-/* JT */ { 1, 0, 1, 1, 2, 0, 2, 2, 0, 1, 1, 1, 2, 0, 1, 1, 1, 0, 1, 2, 0, 1, 0, 1, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 0, 0, 1 },
-/* JV */ { 1, 0, 1, 1, 2, 0, 2, 2, 0, 1, 1, 1, 2, 0, 1, 1, 1, 0, 1, 2, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 0, 0, 1 },
-/* LF */ { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 },
-/* BK */ { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 },
-/* NL */ { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 },
-/* NS */ { 1, 0, 1, 1, 2, 0, 2, 2, 0, 1, 1, 1, 2, 0, 1, 1, 1, 0, 1, 2, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 1 },
-/* NU */ { 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 2, 0, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1 },
-/* OP */ { 2, 2, 2, 2, 2, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 0, 2, 2, 0, 1 },
-/* PO */ { 0, 0, 1, 1, 2, 0, 2, 2, 0, 1, 1, 1, 2, 0, 1, 1, 0, 0, 1, 2, 1, 1, 1, 1, 0, 0, 0, 0, 0,10, 1, 1, 0, 1, 0, 0, 0, 0, 1 },
-/* PR */ { 0, 0, 1, 1, 2, 0, 2, 2, 0, 1, 0, 0, 2, 0, 0, 0, 0, 0, 0, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0,10, 1, 1, 0, 1, 0, 0, 0, 0, 1 },
-/* QU */ { 0, 0, 0, 0, 2, 0, 2, 2, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 1 },
-/* RI */ { 1, 0, 1, 1, 2, 0, 2, 2, 0, 1, 1, 1, 2, 0, 1, 1, 1, 0, 1, 2, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0,15, 0, 0, 0, 0, 1 },
-/* SP */ { 7, 7, 7, 7, 8, 0, 8, 8, 7, 7, 7, 7, 8, 7, 7, 7, 7, 7, 7, 8, 7, 7, 7, 7, 0, 0, 0, 7, 7, 7, 7, 7, 7, 7, 0, 8, 7, 0, 1 },
-/* WJ */ { 0, 0, 0, 0, 2, 0, 2, 2, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 },
-/* ZWJ*/ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 },
-/* ZW */ { 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1 },
-/* edg*/ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }
+/* AL B2 BA BB BK CB CL CM CP CR EB edg EM EX GL H2 H3 HL HY ID IN IS JL JT JV LF NL NS NU OP PO PR QU RI SP SY WJ ZW ZWJ */
+/* AL */ { 0, 1, 0, 1, 0, 1, 2, 0, 2, 0, 1, 1, 1, 2, 0, 1, 1, 0, 0, 1, 0, 2, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 2, 0, 0, 0 },
+/* B2 */ { 1, 2, 0, 1, 0, 1, 2, 0, 2, 0, 1, 1, 1, 2, 0, 1, 1, 1, 0, 1, 1, 2, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 1, 0, 2, 0, 0, 0 },
+/* BA */ {14,14, 0,14, 0, 1, 2, 0, 2, 0,14, 1,14, 2,14,14,14,14, 0,14,14, 2,14,14,14, 0, 0, 0,14,14,14,14, 0,14, 0, 2, 0, 0, 0 },
+/* BB */ { 0, 0, 0, 0, 0, 1, 2, 0, 2, 0, 0, 1, 0, 2, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0 },
+/* BK */ { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 },
+/* CB */ { 1, 1, 1, 1, 0, 1, 2, 0, 2, 0, 1, 1, 1, 2, 0, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 0, 2, 0, 0, 0 },
+/* CL */ { 1, 1, 0, 1, 0, 1, 2, 0, 2, 0, 1, 1, 1, 2, 0, 1, 1, 1, 0, 1, 1, 2, 1, 1, 1, 0, 0, 2, 1, 1,33,33, 0, 1, 0, 2, 0, 0, 0 },
+/* CM */ { 3, 3, 3, 3, 0, 3, 3, 0, 3, 0, 3, 1, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 3, 3, 3, 3, 3, 3, 3, 0, 3, 3, 0, 3 },
+/* CP */ { 0, 1, 0, 1, 0, 1, 2, 0, 2, 0, 1, 1, 1, 2, 0, 1, 1, 0, 0, 1, 1, 2, 1, 1, 1, 0, 0, 2, 0, 1,33,33, 0, 1, 0, 2, 0, 0, 0 },
+/* CR */ { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 },
+/* EB */ { 1, 1, 0, 1, 0, 1, 2, 0, 2, 0, 1, 1, 0, 2, 0, 1, 1, 1, 0, 1, 0, 2, 1, 1, 1, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 2, 0, 0, 0 },
+/* edg*/ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
+/* EM */ { 1, 1, 0, 1, 0, 1, 2, 0, 2, 0, 1, 1, 1, 2, 0, 1, 1, 1, 0, 1, 0, 2, 1, 1, 1, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 2, 0, 0, 0 },
+/* EX */ { 1, 1, 0, 1, 0, 1, 2, 0, 2, 0, 1, 1, 1, 2, 0, 1, 1, 1, 0, 1, 0, 2, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 1, 0, 2, 0, 0, 0 },
+/* GL */ { 0, 0, 0, 0, 0, 0, 2, 0, 2, 0, 0, 1, 0, 2, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0 },
+/* H2 */ { 1, 1, 0, 1, 0, 1, 2, 0, 2, 0, 1, 1, 1, 2, 0, 1, 1, 1, 0, 1, 0, 2, 1, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 2, 0, 0, 0 },
+/* H3 */ { 1, 1, 0, 1, 0, 1, 2, 0, 2, 0, 1, 1, 1, 2, 0, 1, 1, 1, 0, 1, 0, 2, 1, 0, 1, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 2, 0, 0, 0 },
+/* HL */ { 0, 1, 0, 1, 0, 1, 2, 0, 2, 0, 1, 1, 1, 2, 0, 1, 1, 0, 0, 1, 0, 2, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 2, 0, 0, 0 },
+/* HY */ {14,14, 0,14, 0, 1, 2, 0, 2, 0,14, 1,14, 2,14,14,14,14, 0,14,14, 2,14,14,14, 0, 0, 0,13,14,14,14, 0,14, 0, 2, 0, 0, 0 },
+/* ID */ { 1, 1, 0, 1, 0, 1, 2, 0, 2, 0, 1, 1, 1, 2, 0, 1, 1, 1, 0, 1, 0, 2, 1, 1, 1, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 2, 0, 0, 0 },
+/* IN */ { 1, 1, 0, 1, 0, 1, 2, 0, 2, 0, 1, 1, 1, 2, 0, 1, 1, 1, 0, 1, 0, 2, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 1, 0, 2, 0, 0, 0 },
+/* IS */ { 0, 1, 0, 1, 0, 1, 2, 0, 2, 0, 1, 1, 1, 2, 0, 1, 1, 0, 0, 1, 1, 2, 1, 1, 1, 0, 0, 0,12, 1,33,33, 0, 1, 0, 2, 0, 0, 0 },
+/* JL */ { 1, 1, 0, 1, 0, 1, 2, 0, 2, 0, 1, 1, 1, 2, 0, 0, 0, 1, 0, 1, 0, 2, 0, 1, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 2, 0, 0, 0 },
+/* JT */ { 1, 1, 0, 1, 0, 1, 2, 0, 2, 0, 1, 1, 1, 2, 0, 1, 1, 1, 0, 1, 0, 2, 1, 0, 1, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 2, 0, 0, 0 },
+/* JV */ { 1, 1, 0, 1, 0, 1, 2, 0, 2, 0, 1, 1, 1, 2, 0, 1, 1, 1, 0, 1, 0, 2, 1, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 2, 0, 0, 0 },
+/* LF */ { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 },
+/* NL */ { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 },
+/* NS */ { 1, 1, 0, 1, 0, 1, 2, 0, 2, 0, 1, 1, 1, 2, 0, 1, 1, 1, 0, 1, 1, 2, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 1, 0, 2, 0, 0, 0 },
+/* NU */ { 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 1, 1, 1, 2, 0, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0 },
+/* OP */ { 2, 2, 2, 2, 0, 2, 2, 2, 2, 0, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 2, 2, 2, 2, 2, 2, 2, 0, 2, 2, 0, 2 },
+/* PO */ { 0, 1, 0, 1, 0, 1, 2, 0, 2, 0, 1, 1, 1, 2, 0, 1, 1, 0, 0, 1, 1, 2, 1, 1, 1, 0, 0, 0, 0,10, 1, 1, 0, 1, 0, 2, 0, 0, 0 },
+/* PR */ { 0, 1, 0, 1, 0, 1, 2, 0, 2, 0, 0, 1, 0, 2, 0, 0, 0, 0, 0, 0, 1, 2, 0, 0, 0, 0, 0, 0, 0,10, 1, 1, 0, 1, 0, 2, 0, 0, 0 },
+/* QU */ { 0, 0, 0, 0, 0, 0, 2, 0, 2, 0, 0, 1, 0, 2, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 2, 0, 0, 0 },
+/* RI */ { 1, 1, 0, 1, 0, 1, 2, 0, 2, 0, 1, 1, 1, 2, 0, 1, 1, 1, 0, 1, 1, 2, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0,15, 0, 2, 0, 0, 0 },
+/* SP */ { 7, 7, 7, 7, 0, 7, 8, 7, 8, 0, 7, 1, 7, 8, 7, 7, 7, 7, 7, 7, 7, 8, 7, 7, 7, 0, 0, 7, 7, 7, 7, 7, 7, 7, 0, 8, 8, 0, 7 },
+/* SY */ { 1, 1, 0, 1, 0, 1, 2, 0, 2, 0, 1, 1, 1, 2, 0, 1, 1, 0, 0, 1, 1, 2, 1, 1, 1, 0, 0, 0,12, 1,33,33, 0, 1, 0, 2, 0, 0, 0 },
+/* WJ */ { 0, 0, 0, 0, 0, 0, 2, 0, 2, 0, 0, 1, 0, 2, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0 },
+/* ZW */ { 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1 },
+/* ZWJ*/ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }
};
#define WB_NOBREAK 0
@@ -395085,30 +395088,30 @@ static const U8 WB_table[23][23] = {
* 'Perl_Tailored_HSpace'; 'XPG' stands for 'XPG_LE'; 'XPH' stands for 'XPG_XX'; u
* stands for 'unused in this Unicode release (and the data in its row and column
* are garbage) */
-/* XX LE CR DQ Ext EX FO HL KA LF ML MN MB NL NU hs RI SQ XPG XPH ZWJ edg u */
-/* XX */ { 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1 },
-/* LE */ { 1, 0, 1, 1, 0, 0, 0, 0, 1, 1, 9, 1, 9, 1, 0, 1, 1, 9, 1, 1, 0, 1, 1 },
-/* CR */ { 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1 },
-/* DQ */ { 1, 1, 1, 1, 0, 1, 0, 5, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1 },
-/* Ext*/ { 3, 3, 1, 3, 0, 3, 0, 3, 3, 1, 3, 3, 3, 1, 3, 1, 3, 3, 3, 3, 0, 1, 3 },
-/* EX */ { 1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1 },
-/* FO */ { 3, 3, 1, 3, 0, 3, 0, 3, 3, 1, 3, 3, 3, 1, 3, 1, 3, 3, 3, 3, 0, 1, 3 },
-/* HL */ { 1, 0, 1, 7, 0, 0, 0, 0, 1, 1, 9, 1, 9, 1, 0, 1, 1, 8, 0, 1, 0, 1, 1 },
-/* KA */ { 1, 1, 1, 1, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1 },
-/* LF */ { 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1 },
-/* ML */ { 1,11, 1, 1, 0, 1, 0,11, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 11, 1, 0, 1, 1 },
-/* MN */ { 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1,13, 1, 1, 1, 1, 1, 0, 1, 1 },
-/* MB */ { 1,11, 1, 1, 0, 1, 0,11, 1, 1, 1, 1, 1, 1,13, 1, 1, 1, 11, 1, 0, 1, 1 },
-/* NL */ { 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1 },
-/* NU */ { 1, 0, 1, 1, 0, 0, 0, 0, 1, 1, 1,15,15, 1, 0, 1, 1,15, 0, 1, 0, 1, 1 },
-/* hs */ { 1, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 0, 1, 2, 1, 1, 1, 1, 0, 1, 1 },
-/* RI */ { 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1,16, 1, 1, 1, 0, 1, 1 },
-/* SQ */ { 1,11, 1, 1, 0, 1, 0,11, 1, 1, 1, 1, 1, 1,13, 1, 1, 1, 11, 1, 0, 1, 1 },
-/* XPG*/ { 1, 0, 1, 1, 0, 0, 0, 0, 1, 1, 9, 1, 9, 1, 0, 1, 1, 9, 0, 1, 0, 1, 1 },
-/* XPH*/ { 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1 },
-/* ZWJ*/ { 3, 3, 1, 3, 0, 3, 0, 3, 3, 1, 3, 3, 3, 1, 3, 1, 3, 3, 0, 0, 0, 1, 0 },
-/* edg*/ { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1 },
-/* u */ { 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0 }
+/* XX CR DQ edg EX Ext FO HL hs KA LE LF MB ML MN NL NU RI SQ XPG XPH ZWJ u */
+/* XX */ { 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1 },
+/* CR */ { 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1 },
+/* DQ */ { 1, 1, 1, 1, 1, 0, 0, 5, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1 },
+/* edg*/ { 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 },
+/* EX */ { 1, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1 },
+/* Ext*/ { 3, 1, 3, 1, 3, 0, 0, 3, 1, 3, 3, 1, 3, 3, 3, 1, 3, 3, 3, 3, 3, 0, 3 },
+/* FO */ { 3, 1, 3, 1, 3, 0, 0, 3, 1, 3, 3, 1, 3, 3, 3, 1, 3, 3, 3, 3, 3, 0, 3 },
+/* HL */ { 1, 1, 7, 1, 0, 0, 0, 0, 1, 1, 0, 1, 9, 9, 1, 1, 0, 1, 8, 0, 1, 0, 1 },
+/* hs */ { 1, 0, 1, 1, 1, 0, 0, 1, 2, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1 },
+/* KA */ { 1, 1, 1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1 },
+/* LE */ { 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 1, 9, 9, 1, 1, 0, 1, 9, 1, 1, 0, 1 },
+/* LF */ { 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1 },
+/* MB */ { 1, 1, 1, 1, 1, 0, 0,11, 1, 1,11, 1, 1, 1, 1, 1,13, 1, 1, 11, 1, 0, 1 },
+/* ML */ { 1, 1, 1, 1, 1, 0, 0,11, 1, 1,11, 1, 1, 1, 1, 1, 1, 1, 1, 11, 1, 0, 1 },
+/* MN */ { 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1,13, 1, 1, 1, 1, 0, 1 },
+/* NL */ { 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1 },
+/* NU */ { 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 1,15, 1,15, 1, 0, 1,15, 0, 1, 0, 1 },
+/* RI */ { 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,16, 1, 1, 1, 0, 1 },
+/* SQ */ { 1, 1, 1, 1, 1, 0, 0,11, 1, 1,11, 1, 1, 1, 1, 1,13, 1, 1, 11, 1, 0, 1 },
+/* XPG*/ { 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 1, 9, 9, 1, 1, 0, 1, 9, 0, 1, 0, 1 },
+/* XPH*/ { 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1 },
+/* ZWJ*/ { 3, 1, 3, 1, 3, 0, 0, 3, 1, 3, 3, 1, 3, 3, 3, 1, 3, 3, 3, 0, 0, 0, 0 },
+/* u */ { 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0 }
};
#endif /* defined(PERL_IN_REGEXEC_C) */
@@ -395171,5 +395174,5 @@ static const U8 WB_table[23][23] = {
* a712c758275b460d18fa77a26ed3589689bb3f69dcc1ea99b913e32db92a5cd2 lib/unicore/version
* 2680b9254eb236c5c090f11b149605043e8c8433661b96efc4a42fb4709342a5 regen/charset_translations.pl
* 6bbad21de0848e0236b02f34f5fa0edd3cdae9ba8173cc9469a5513936b9e728 regen/mk_PL_charclass.pl
- * 5034a0379d7390c7150d79889d5020f0e09fd23a984bb0a3a12dc4d2d8511f5d regen/mk_invlists.pl
+ * e9a592b418a84dce6d70d67180ae0fdfc08496c4030e6f52e19cd45d0f27fbbd regen/mk_invlists.pl
* ex: set ro: */
diff --git a/lib/unicore/uni_keywords.pl b/lib/unicore/uni_keywords.pl
index 07d22cc188..20ce838ab8 100644
--- a/lib/unicore/uni_keywords.pl
+++ b/lib/unicore/uni_keywords.pl
@@ -1264,5 +1264,5 @@
# a712c758275b460d18fa77a26ed3589689bb3f69dcc1ea99b913e32db92a5cd2 lib/unicore/version
# 2680b9254eb236c5c090f11b149605043e8c8433661b96efc4a42fb4709342a5 regen/charset_translations.pl
# 6bbad21de0848e0236b02f34f5fa0edd3cdae9ba8173cc9469a5513936b9e728 regen/mk_PL_charclass.pl
-# 5034a0379d7390c7150d79889d5020f0e09fd23a984bb0a3a12dc4d2d8511f5d regen/mk_invlists.pl
+# e9a592b418a84dce6d70d67180ae0fdfc08496c4030e6f52e19cd45d0f27fbbd regen/mk_invlists.pl
# ex: set ro:
diff --git a/regen/mk_invlists.pl b/regen/mk_invlists.pl
index b4392b3205..a4bc575092 100644
--- a/regen/mk_invlists.pl
+++ b/regen/mk_invlists.pl
@@ -466,104 +466,136 @@ sub output_invmap ($$$$$$$) {
# one beyond the final used one
}
+ # These properties have extra tables written out for them that we want
+ # to make as compact and legible as possible. So we find short names
+ # for their property values. For non-official ones we will need to
+ # add a legend at the top of the table to say what the abbreviation
+ # stands for.
+ my $property_needs_table_re = qr/ ^ _Perl_ (?: GCB | LB | WB ) $ /x;
+
+ my %short_enum_name;
+ my %need_explanation; # For non-official abbreviations, we will need
+ # to explain what the one we come up with
+ # stands for
+ my $type = lc $prop_name;
+ if ($name =~ $property_needs_table_re) {
+ my @short_names; # List of already used abbreviations, so we
+ # don't duplicate
+ for my $enum (@enums) {
+ my $short_enum;
+ my $is_official_name = 0;
+
+ # Special case this wb property value to make the
+ # name more clear
+ if ($enum eq 'Perl_Tailored_HSpace') {
+ $short_enum = 'hs';
+ }
+ else {
+
+ # Use the official short name, if found.
+ ($short_enum) = prop_value_aliases($type, $enum);
+ if ( defined $short_enum) {
+ $is_official_name = 1;
+ }
+ else {
+ # But if there is no official name, use the name that
+ # came from the data (if any). Otherwise, the name
+ # had to come from the extras list. There are two
+ # types of values in that list.
+ #
+ # First are those enums that are not part of the
+ # property, but are defined by the code in this file.
+ # By convention these have all-caps names. We use the
+ # lowercased name for these.
+ #
+ # Second are enums that are needed to get the
+ # algorithms below to work and/or to get regexec.c to
+ # compile, but don't exist in all Unicode releases.
+ # These are handled outside this loop as
+ # 'unused_enums' (as they are unused they all get
+ # collapsed into a single column, and their names
+ # don't matter)
+ if (grep { $_ eq $enum } @input_enums) {
+ $short_enum = $enum
+ }
+ else {
+ $short_enum = lc $enum;
+ }
+ }
+
+ # If our short name is too long, or we already know that
+ # the name is an abbreviation, truncate to make sure it's
+ # short enough, and remember that we did this so we can
+ # later add a comment in the generated file
+ if (length $short_enum > $max_hdr_len) {
+ # First try using just the uppercase letters of the name;
+ # if it is something like FooBar, FB is a better
+ # abbreviation than Foo. That's not the case if it is
+ # entirely lowercase.
+ my $uc = $short_enum;
+ $uc =~ s/[[:^upper:]]//g;
+ $short_enum = $uc if length $uc > 1
+ && length $uc < length $short_enum;
+
+ $short_enum = substr($short_enum, 0, $max_hdr_len);
+ $is_official_name = 0;
+ }
+ }
+
+ # If the name we are to display conflicts, try another.
+ if (grep { $_ eq $short_enum } @short_names) {
+ $is_official_name = 0;
+ do { # The increment operator on strings doesn't work on
+ # those containing an '_', so get rid of any final
+ # portion.
+ $short_enum =~ s/_//g;
+ $short_enum++;
+ } while grep { $_ eq $short_enum } @short_names;
+ }
+
+ push @short_names, $short_enum;
+ $short_enum_name{$enum} = $short_enum;
+ $need_explanation{$enum} = $short_enum unless $is_official_name;
+ }
+ } # End of calculating short enum names for certain properties
+
# Assign a value to each element of the enum type we are creating.
# The default value always gets 0; the others are arbitrarily
- # assigned.
+ # assigned, but for the properties which have the extra table, it is
+ # in the order we have computed above so the rows and columns appear
+ # alphabetically by heading abbreviation.
my $enum_val = 0;
my $canonical_default = prop_value_aliases($prop_name, $default);
$default = $canonical_default if defined $canonical_default;
$enums{$default} = $enum_val++;
- for my $enum (@enums) {
+ for my $enum (sort { ($name =~ $property_needs_table_re)
+ ? lc $short_enum_name{$a}
+ cmp lc $short_enum_name{$b}
+ : lc $a cmp lc $b
+ } @enums)
+ {
$enums{$enum} = $enum_val++ unless exists $enums{$enum};
}
- # Calculate the data for the special tables output for these properties.
- if ($name =~ / ^ _Perl_ (?: GCB | LB | WB ) $ /x) {
+ # Now calculate the data for the special tables output for these
+ # properties.
+ if ($name =~ $property_needs_table_re) {
# The data includes the hashes %gcb_enums, %lb_enums, etc.
# Similarly we calculate column headings for the tables.
#
# We use string evals to allow the same code to work on
# all the tables
- my $type = lc $prop_name;
# Skip if we've already done this code, which populated
# this hash
if (eval "! \%${type}_enums") {
# For each enum in the type ...
- foreach my $enum (sort keys %enums) {
+ foreach my $enum (keys %enums) {
my $value = $enums{$enum};
- my $short;
- my $abbreviated_from;
-
- # Special case this wb property value to make the
- # name more clear
- if ($enum eq 'Perl_Tailored_HSpace') {
- $short = 'hs';
- $abbreviated_from = $enum;
- }
- else {
-
- # Use the official short name, if found.
- ($short) = prop_value_aliases($type, $enum);
-
- if (! defined $short) {
-
- # But if there is no official name, use the name
- # that came from the data (if any). Otherwise,
- # the name had to come from the extras list.
- # There are two types of values in that list.
- #
- # First are those enums that are not part of the
- # property, but are defined by this code. By
- # convention these have all-caps names. We use
- # the lowercased name for these.
- #
- # Second are enums that are needed to get the
- # algorithms below to work and/or to get regexec.c
- # to compile, but don't exist in all Unicode
- # releases. These are handled outside this loop
- # as 'unused_enums'
- if (grep { $_ eq $enum } @input_enums) {
- $short = $enum
- }
- else {
- $short = lc $enum;
- }
- }
- }
-
- # If our short name is too long, or we already
- # know that the name is an abbreviation, truncate
- # to make sure it's short enough, and remember
- # that we did this so we can later add a comment in the
- # generated file
- if ( $abbreviated_from
- || length $short > $max_hdr_len)
- {
- $short = substr($short, 0, $max_hdr_len);
- $abbreviated_from = $enum
- unless $abbreviated_from;
- # If the name we are to display conflicts, try
- # another.
- while (eval "exists
- \$${type}_abbreviations{$short}")
- {
- die $@ if $@;
-
- # The increment operator on strings doesn't work
- # on those containing an '_', so just use the
- # final portion.
- my @short = split '_', $short;
- $short[-1]++;
- $short = join "_", @short;
- }
-
- eval "\$${type}_abbreviations{$short} = '$enum'";
- die $@ if $@;
- }
+ my $short_enum = $short_enum_name{$enum};
# Remember the mapping from the property value
# (enum) name to its value.
@@ -573,8 +605,14 @@ sub output_invmap ($$$$$$$) {
# Remember the inverse mapping to the short name
# so that we can properly label the generated
# table's rows and columns
- eval "\$${type}_short_enums[$value] = '$short'";
+ eval "\$${type}_short_enums[$value] = '$short_enum'";
die $@ if $@;
+
+ # And note the abbreviations that need explanation
+ if ($need_explanation{$enum}) {
+ eval "\$${type}_abbreviations{$short_enum} = '$enum'";
+ die $@ if $@;
+ }
}
# Each unused enum has the same value. They all are collapsed
diff --git a/uni_keywords.h b/uni_keywords.h
index 10cdb62a4f..95b333b813 100644
--- a/uni_keywords.h
+++ b/uni_keywords.h
@@ -7287,6 +7287,6 @@ MPH_VALt match_uniprop( const unsigned char * const key, const U16 key_len ) {
* a712c758275b460d18fa77a26ed3589689bb3f69dcc1ea99b913e32db92a5cd2 lib/unicore/version
* 2680b9254eb236c5c090f11b149605043e8c8433661b96efc4a42fb4709342a5 regen/charset_translations.pl
* 6bbad21de0848e0236b02f34f5fa0edd3cdae9ba8173cc9469a5513936b9e728 regen/mk_PL_charclass.pl
- * 5034a0379d7390c7150d79889d5020f0e09fd23a984bb0a3a12dc4d2d8511f5d regen/mk_invlists.pl
+ * e9a592b418a84dce6d70d67180ae0fdfc08496c4030e6f52e19cd45d0f27fbbd regen/mk_invlists.pl
* cf1d68efb7d919d302c4005641eae8d36da6d7850816ad374b0c00b45e609f43 regen/mph.pl
* ex: set ro: */