summaryrefslogtreecommitdiff
path: root/lib/unicore/In.pl
diff options
context:
space:
mode:
Diffstat (limited to 'lib/unicore/In.pl')
-rw-r--r--lib/unicore/In.pl680
1 files changed, 351 insertions, 329 deletions
diff --git a/lib/unicore/In.pl b/lib/unicore/In.pl
index 9e410f9420..cd872faac1 100644
--- a/lib/unicore/In.pl
+++ b/lib/unicore/In.pl
@@ -1,469 +1,491 @@
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.txt.
+# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
-%utf8::In = (
-'LATIN' => 0,
-'GREEK' => 1,
-'CYRILLIC' => 2,
-'ARMENIAN' => 3,
-'HEBREW' => 4,
-'ARABIC' => 5,
-'SYRIAC' => 6,
-'THAANA' => 7,
-'DEVANAGARI' => 8,
-'BENGALI' => 9,
-'GURMUKHI' => 10,
-'GUJARATI' => 11,
-'ORIYA' => 12,
-'TAMIL' => 13,
-'TELUGU' => 14,
-'KANNADA' => 15,
-'MALAYALAM' => 16,
-'SINHALA' => 17,
-'THAI' => 18,
-'LAO' => 19,
-'TIBETAN' => 20,
-'MYANMAR' => 21,
-'GEORGIAN' => 22,
-'HANGUL' => 23,
-'ETHIOPIC' => 24,
-'CHEROKEE' => 25,
-'CANADIAN-ABORIGINAL' => 26,
-'OGHAM' => 27,
-'RUNIC' => 28,
-'KHMER' => 29,
-'MONGOLIAN' => 30,
-'HIRAGANA' => 31,
-'KATAKANA' => 32,
-'BOPOMOFO' => 33,
-'HAN' => 34,
-'YI' => 35,
-'OLD-ITALIC' => 36,
-'GOTHIC' => 37,
-'DESERET' => 38,
-'INHERITED' => 39,
-'Basic Latin' => 40,
-'Latin-1 Supplement' => 41,
-'Latin Extended-A' => 42,
-'Latin Extended-B' => 43,
-'IPA Extensions' => 44,
-'Spacing Modifier Letters' => 45,
-'Combining Diacritical Marks' => 46,
-'Greek Block' => 47,
-'Cyrillic Block' => 48,
-'Armenian Block' => 49,
-'Hebrew Block' => 50,
-'Arabic Block' => 51,
-'Syriac Block' => 52,
-'Thaana Block' => 53,
-'Devanagari Block' => 54,
-'Bengali Block' => 55,
-'Gurmukhi Block' => 56,
-'Gujarati Block' => 57,
-'Oriya Block' => 58,
-'Tamil Block' => 59,
-'Telugu Block' => 60,
-'Kannada Block' => 61,
-'Malayalam Block' => 62,
-'Sinhala Block' => 63,
-'Thai Block' => 64,
-'Lao Block' => 65,
-'Tibetan Block' => 66,
-'Myanmar Block' => 67,
-'Georgian Block' => 68,
-'Hangul Jamo' => 69,
-'Ethiopic Block' => 70,
-'Cherokee Block' => 71,
-'Unified Canadian Aboriginal Syllabics' => 72,
-'Ogham Block' => 73,
-'Runic Block' => 74,
-'Khmer Block' => 75,
-'Mongolian Block' => 76,
-'Latin Extended Additional' => 77,
-'Greek Extended' => 78,
-'General Punctuation' => 79,
-'Superscripts and Subscripts' => 80,
-'Currency Symbols' => 81,
-'Combining Marks for Symbols' => 82,
-'Letterlike Symbols' => 83,
-'Number Forms' => 84,
-'Arrows' => 85,
-'Mathematical Operators' => 86,
-'Miscellaneous Technical' => 87,
-'Control Pictures' => 88,
-'Optical Character Recognition' => 89,
-'Enclosed Alphanumerics' => 90,
-'Box Drawing' => 91,
-'Block Elements' => 92,
-'Geometric Shapes' => 93,
-'Miscellaneous Symbols' => 94,
-'Dingbats' => 95,
-'Braille Patterns' => 96,
-'CJK Radicals Supplement' => 97,
-'Kangxi Radicals' => 98,
-'Ideographic Description Characters' => 99,
-'CJK Symbols and Punctuation' => 100,
-'Hiragana Block' => 101,
-'Katakana Block' => 102,
-'Bopomofo Block' => 103,
-'Hangul Compatibility Jamo' => 104,
-'Kanbun' => 105,
-'Bopomofo Extended' => 106,
-'Enclosed CJK Letters and Months' => 107,
-'CJK Compatibility' => 108,
-'CJK Unified Ideographs Extension A' => 109,
-'CJK Unified Ideographs' => 110,
-'Yi Syllables' => 111,
-'Yi Radicals' => 112,
-'Hangul Syllables' => 113,
-'High Surrogates' => 114,
-'High Private Use Surrogates' => 115,
-'Low Surrogates' => 116,
-'Private Use' => 117,
-'CJK Compatibility Ideographs' => 118,
-'Alphabetic Presentation Forms' => 119,
-'Arabic Presentation Forms-A' => 120,
-'Combining Half Marks' => 121,
-'CJK Compatibility Forms' => 122,
-'Small Form Variants' => 123,
-'Arabic Presentation Forms-B' => 124,
-'Specials' => 125,
-'Halfwidth and Fullwidth Forms' => 126,
-'Old Italic' => 127,
-'Gothic Block' => 128,
-'Deseret Block' => 129,
-'Byzantine Musical Symbols' => 130,
-'Musical Symbols' => 131,
-'Mathematical Alphanumeric Symbols' => 132,
-'CJK Unified Ideographs Extension B' => 133,
-'CJK Compatibility Ideographs Supplement' => 134,
-'Tags' => 135,
-'Common' => 136,
-'Any' => 137,
-'White_space' => 138,
-'Bidi_Control' => 139,
-'Join_Control' => 140,
-'Dash' => 141,
-'Hyphen' => 142,
-'Quotation_Mark' => 143,
-'Terminal_Punctuation' => 144,
-'Other_Math' => 145,
-'Hex_Digit' => 146,
-'ASCII_Hex_Digit' => 147,
-'Other_Alphabetic' => 148,
-'Ideographic' => 149,
-'Diacritic' => 150,
-'Extender' => 151,
-'Other_Lowercase' => 152,
-'Other_Uppercase' => 153,
-'Noncharacter_Code_Point' => 154,
-'Assigned' => 155,
-'Alphabetic' => 156,
-'Lowercase' => 157,
-'Uppercase' => 158,
-'Math' => 159,
-'Lampersand' => 160,
-'ID_Start' => 161,
-'ID_Continue' => 162,
+%utf8::In =
+(
+'ARABIC' => '16',
+'ARMENIAN' => '14',
+'ASCII_Hex_Digit' => '152',
+'Alphabetic' => '164',
+'Alphabetic Presentation Forms' => '129',
+'Any' => '171',
+'Arabic Block' => '62',
+'Arabic Presentation Forms-A' => '130',
+'Arabic Presentation Forms-B' => '134',
+'Armenian Block' => '60',
+'Arrows' => '96',
+'Assigned' => '163',
+'BENGALI' => '20',
+'BOPOMOFO' => '45',
+'Basic Latin' => '51',
+'Bengali Block' => '66',
+'Bidi_Control' => '159',
+'Block Elements' => '103',
+'Bopomofo Block' => '114',
+'Bopomofo Extended' => '117',
+'Box Drawing' => '102',
+'Braille Patterns' => '107',
+'Byzantine Musical Symbols' => '140',
+'CANADIAN-ABORIGINAL' => '37',
+'CHEROKEE' => '36',
+'CJK Compatibility' => '119',
+'CJK Compatibility Forms' => '132',
+'CJK Compatibility Ideographs' => '128',
+'CJK Compatibility Ideographs Supplement' => '144',
+'CJK Ideograph' => '1',
+'CJK Ideograph Extension A' => '0',
+'CJK Ideograph Extension B' => '7',
+'CJK Radicals Supplement' => '108',
+'CJK Symbols and Punctuation' => '111',
+'CJK Unified Ideographs' => '121',
+'CJK Unified Ideographs Extension A' => '120',
+'CJK Unified Ideographs Extension B' => '143',
+'CYRILLIC' => '13',
+'Cherokee Block' => '82',
+'Combining Diacritical Marks' => '57',
+'Combining Half Marks' => '131',
+'Combining Marks for Symbols' => '93',
+'Common' => '50',
+'Control Pictures' => '99',
+'Currency Symbols' => '92',
+'Cyrillic Block' => '59',
+'DESERET' => '49',
+'DEVANAGARI' => '19',
+'Dash' => '151',
+'Deseret Block' => '139',
+'Devanagari Block' => '65',
+'Diacritic' => '154',
+'Dingbats' => '106',
+'ETHIOPIC' => '35',
+'Enclosed Alphanumerics' => '101',
+'Enclosed CJK Letters and Months' => '118',
+'Ethiopic Block' => '81',
+'Extender' => '155',
+'GEORGIAN' => '33',
+'GOTHIC' => '48',
+'GREEK' => '11',
+'GUJARATI' => '22',
+'GURMUKHI' => '21',
+'General Punctuation' => '90',
+'Geometric Shapes' => '104',
+'Georgian Block' => '79',
+'Gothic Block' => '138',
+'Greek Block' => '58',
+'Greek Extended' => '89',
+'Gujarati Block' => '68',
+'Gurmukhi Block' => '67',
+'HAN' => '42',
+'HANGUL' => '34',
+'HEBREW' => '15',
+'HIRAGANA' => '43',
+'Halfwidth and Fullwidth Forms' => '136',
+'Hangul Compatibility Jamo' => '115',
+'Hangul Jamo' => '80',
+'Hangul Syllable' => '2',
+'Hangul Syllables' => '124',
+'Hebrew Block' => '61',
+'Hex_Digit' => '153',
+'High Private Use Surrogates' => '126',
+'High Surrogates' => '125',
+'Hiragana Block' => '112',
+'Hyphen' => '150',
+'ID_Continue' => '170',
+'ID_Start' => '169',
+'INHERITED' => '12',
+'IPA Extensions' => '55',
+'Ideographic' => '161',
+'Ideographic Description Characters' => '110',
+'Join_Control' => '158',
+'KANNADA' => '26',
+'KATAKANA' => '44',
+'KHMER' => '40',
+'Kanbun' => '116',
+'Kangxi Radicals' => '109',
+'Kannada Block' => '72',
+'Katakana Block' => '113',
+'Khmer Block' => '86',
+'LAO' => '30',
+'LATIN' => '10',
+'Lampersand' => '168',
+'Lao Block' => '76',
+'Latin Extended Additional' => '88',
+'Latin Extended-A' => '53',
+'Latin Extended-B' => '54',
+'Latin-1 Supplement' => '52',
+'Letterlike Symbols' => '94',
+'Low Surrogate' => '5',
+'Low Surrogates' => '127',
+'Lowercase' => '165',
+'MALAYALAM' => '27',
+'MONGOLIAN' => '41',
+'MYANMAR' => '32',
+'Malayalam Block' => '73',
+'Math' => '167',
+'Mathematical Alphanumeric Symbols' => '142',
+'Mathematical Operators' => '97',
+'Miscellaneous Symbols' => '105',
+'Miscellaneous Technical' => '98',
+'Mongolian Block' => '87',
+'Musical Symbols' => '141',
+'Myanmar Block' => '78',
+'Non Private Use High Surrogate' => '3',
+'Noncharacter_Code_Point' => '162',
+'Number Forms' => '95',
+'OGHAM' => '38',
+'OLD-ITALIC' => '47',
+'ORIYA' => '23',
+'Ogham Block' => '84',
+'Old Italic' => '137',
+'Optical Character Recognition' => '100',
+'Oriya Block' => '69',
+'Other_Alphabetic' => '157',
+'Other_Lowercase' => '156',
+'Other_Math' => '149',
+'Other_Uppercase' => '160',
+'Plane 15 Private Use' => '8',
+'Plane 16 Private Use' => '9',
+'Private Use' => '6',
+'Private Use High Surrogate' => '4',
+'Quotation_Mark' => '148',
+'RUNIC' => '39',
+'Runic Block' => '85',
+'SINHALA' => '28',
+'SYRIAC' => '17',
+'Sinhala Block' => '74',
+'Small Form Variants' => '133',
+'Spacing Modifier Letters' => '56',
+'Specials' => '135',
+'Superscripts and Subscripts' => '91',
+'Syriac Block' => '63',
+'TAMIL' => '24',
+'TELUGU' => '25',
+'THAANA' => '18',
+'THAI' => '29',
+'TIBETAN' => '31',
+'Tags' => '145',
+'Tamil Block' => '70',
+'Telugu Block' => '71',
+'Terminal_Punctuation' => '147',
+'Thaana Block' => '64',
+'Thai Block' => '75',
+'Tibetan Block' => '77',
+'Unified Canadian Aboriginal Syllabics' => '83',
+'Uppercase' => '166',
+'White_space' => '146',
+'YI' => '46',
+'Yi Radicals' => '123',
+'Yi Syllables' => '122',
);
-%utf8::InPat = (
+%utf8::InPat =
+(
'al' => {
- 'Alphabetic(?:[-_]|\s+)?Presentation(?:[-_]|\s+)?Forms' => 'Alphabetic Presentation Forms',
- 'Alphabetic' => 'Alphabetic',
+ 'Alphabetic' => '164',
+ 'Alphabetic(?:[-_]|\s+)?Presentation(?:[-_]|\s+)?Forms' => '129',
},
'an' => {
- 'Any' => 'Any',
+ 'Any' => '171',
},
'ar' => {
- 'ARMENIAN' => 'ARMENIAN',
- 'ARABIC' => 'ARABIC',
- 'Armenian(?:[-_]|\s+)?Block' => 'Armenian Block',
- 'Arabic(?:[-_]|\s+)?Block' => 'Arabic Block',
- 'Arrows' => 'Arrows',
- 'Arabic(?:[-_]|\s+)?Presentation(?:[-_]|\s+)?Forms(?:[-_]|\s+)?A' => 'Arabic Presentation Forms-A',
- 'Arabic(?:[-_]|\s+)?Presentation(?:[-_]|\s+)?Forms(?:[-_]|\s+)?B' => 'Arabic Presentation Forms-B',
+ 'ARABIC' => '16',
+ 'ARMENIAN' => '14',
+ 'Arabic(?:[-_]|\s+)?Block' => '62',
+ 'Arabic(?:[-_]|\s+)?Presentation(?:[-_]|\s+)?Forms(?:[-_]|\s+)?A' => '130',
+ 'Arabic(?:[-_]|\s+)?Presentation(?:[-_]|\s+)?Forms(?:[-_]|\s+)?B' => '134',
+ 'Armenian(?:[-_]|\s+)?Block' => '60',
+ 'Arrows' => '96',
},
'as' => {
- 'ASCII(?:[-_]|\s+)?Hex(?:[-_]|\s+)?Digit' => 'ASCII_Hex_Digit',
- 'Assigned' => 'Assigned',
+ 'ASCII(?:[-_]|\s+)?Hex(?:[-_]|\s+)?Digit' => '152',
+ 'Assigned' => '163',
},
'ba' => {
- 'Basic(?:[-_]|\s+)?Latin' => 'Basic Latin',
+ 'Basic(?:[-_]|\s+)?Latin' => '51',
},
'be' => {
- 'BENGALI' => 'BENGALI',
- 'Bengali(?:[-_]|\s+)?Block' => 'Bengali Block',
+ 'BENGALI' => '20',
+ 'Bengali(?:[-_]|\s+)?Block' => '66',
},
'bi' => {
- 'Bidi(?:[-_]|\s+)?Control' => 'Bidi_Control',
+ 'Bidi(?:[-_]|\s+)?Control' => '159',
},
'bl' => {
- 'Block(?:[-_]|\s+)?Elements' => 'Block Elements',
+ 'Block(?:[-_]|\s+)?Elements' => '103',
},
'bo' => {
- 'BOPOMOFO' => 'BOPOMOFO',
- 'Box(?:[-_]|\s+)?Drawing' => 'Box Drawing',
- 'Bopomofo(?:[-_]|\s+)?Block' => 'Bopomofo Block',
- 'Bopomofo(?:[-_]|\s+)?Extended' => 'Bopomofo Extended',
+ 'BOPOMOFO' => '45',
+ 'Bopomofo(?:[-_]|\s+)?Block' => '114',
+ 'Bopomofo(?:[-_]|\s+)?Extended' => '117',
+ 'Box(?:[-_]|\s+)?Drawing' => '102',
},
'br' => {
- 'Braille(?:[-_]|\s+)?Patterns' => 'Braille Patterns',
+ 'Braille(?:[-_]|\s+)?Patterns' => '107',
},
'by' => {
- 'Byzantine(?:[-_]|\s+)?Musical(?:[-_]|\s+)?Symbols' => 'Byzantine Musical Symbols',
+ 'Byzantine(?:[-_]|\s+)?Musical(?:[-_]|\s+)?Symbols' => '140',
},
'ca' => {
- 'CANADIAN(?:[-_]|\s+)?ABORIGINAL' => 'CANADIAN-ABORIGINAL',
+ 'CANADIAN(?:[-_]|\s+)?ABORIGINAL' => '37',
},
'ch' => {
- 'CHEROKEE' => 'CHEROKEE',
- 'Cherokee(?:[-_]|\s+)?Block' => 'Cherokee Block',
+ 'CHEROKEE' => '36',
+ 'Cherokee(?:[-_]|\s+)?Block' => '82',
},
'cj' => {
- 'CJK(?:[-_]|\s+)?Radicals(?:[-_]|\s+)?Supplement' => 'CJK Radicals Supplement',
- 'CJK(?:[-_]|\s+)?Symbols(?:[-_]|\s+)?and(?:[-_]|\s+)?Punctuation' => 'CJK Symbols and Punctuation',
- 'CJK(?:[-_]|\s+)?Compatibility' => 'CJK Compatibility',
- 'CJK(?:[-_]|\s+)?Unified(?:[-_]|\s+)?Ideographs(?:[-_]|\s+)?Extension(?:[-_]|\s+)?A' => 'CJK Unified Ideographs Extension A',
- 'CJK(?:[-_]|\s+)?Unified(?:[-_]|\s+)?Ideographs' => 'CJK Unified Ideographs',
- 'CJK(?:[-_]|\s+)?Compatibility(?:[-_]|\s+)?Ideographs' => 'CJK Compatibility Ideographs',
- 'CJK(?:[-_]|\s+)?Compatibility(?:[-_]|\s+)?Forms' => 'CJK Compatibility Forms',
- 'CJK(?:[-_]|\s+)?Unified(?:[-_]|\s+)?Ideographs(?:[-_]|\s+)?Extension(?:[-_]|\s+)?B' => 'CJK Unified Ideographs Extension B',
- 'CJK(?:[-_]|\s+)?Compatibility(?:[-_]|\s+)?Ideographs(?:[-_]|\s+)?Supplement' => 'CJK Compatibility Ideographs Supplement',
+ 'CJK(?:[-_]|\s+)?Compatibility' => '119',
+ 'CJK(?:[-_]|\s+)?Compatibility(?:[-_]|\s+)?Forms' => '132',
+ 'CJK(?:[-_]|\s+)?Compatibility(?:[-_]|\s+)?Ideographs' => '128',
+ 'CJK(?:[-_]|\s+)?Compatibility(?:[-_]|\s+)?Ideographs(?:[-_]|\s+)?Supplement' => '144',
+ 'CJK(?:[-_]|\s+)?Ideograph' => '1',
+ 'CJK(?:[-_]|\s+)?Ideograph(?:[-_]|\s+)?Extension(?:[-_]|\s+)?A' => '0',
+ 'CJK(?:[-_]|\s+)?Ideograph(?:[-_]|\s+)?Extension(?:[-_]|\s+)?B' => '7',
+ 'CJK(?:[-_]|\s+)?Radicals(?:[-_]|\s+)?Supplement' => '108',
+ 'CJK(?:[-_]|\s+)?Symbols(?:[-_]|\s+)?and(?:[-_]|\s+)?Punctuation' => '111',
+ 'CJK(?:[-_]|\s+)?Unified(?:[-_]|\s+)?Ideographs' => '121',
+ 'CJK(?:[-_]|\s+)?Unified(?:[-_]|\s+)?Ideographs(?:[-_]|\s+)?Extension(?:[-_]|\s+)?A' => '120',
+ 'CJK(?:[-_]|\s+)?Unified(?:[-_]|\s+)?Ideographs(?:[-_]|\s+)?Extension(?:[-_]|\s+)?B' => '143',
},
'co' => {
- 'Combining(?:[-_]|\s+)?Diacritical(?:[-_]|\s+)?Marks' => 'Combining Diacritical Marks',
- 'Combining(?:[-_]|\s+)?Marks(?:[-_]|\s+)?for(?:[-_]|\s+)?Symbols' => 'Combining Marks for Symbols',
- 'Control(?:[-_]|\s+)?Pictures' => 'Control Pictures',
- 'Combining(?:[-_]|\s+)?Half(?:[-_]|\s+)?Marks' => 'Combining Half Marks',
- 'Common' => 'Common',
+ 'Combining(?:[-_]|\s+)?Diacritical(?:[-_]|\s+)?Marks' => '57',
+ 'Combining(?:[-_]|\s+)?Half(?:[-_]|\s+)?Marks' => '131',
+ 'Combining(?:[-_]|\s+)?Marks(?:[-_]|\s+)?for(?:[-_]|\s+)?Symbols' => '93',
+ 'Common' => '50',
+ 'Control(?:[-_]|\s+)?Pictures' => '99',
},
'cu' => {
- 'Currency(?:[-_]|\s+)?Symbols' => 'Currency Symbols',
+ 'Currency(?:[-_]|\s+)?Symbols' => '92',
},
'cy' => {
- 'CYRILLIC' => 'CYRILLIC',
- 'Cyrillic(?:[-_]|\s+)?Block' => 'Cyrillic Block',
+ 'CYRILLIC' => '13',
+ 'Cyrillic(?:[-_]|\s+)?Block' => '59',
},
'da' => {
- 'Dash' => 'Dash',
+ 'Dash' => '151',
},
'de' => {
- 'DEVANAGARI' => 'DEVANAGARI',
- 'DESERET' => 'DESERET',
- 'Devanagari(?:[-_]|\s+)?Block' => 'Devanagari Block',
- 'Deseret(?:[-_]|\s+)?Block' => 'Deseret Block',
+ 'DESERET' => '49',
+ 'DEVANAGARI' => '19',
+ 'Deseret(?:[-_]|\s+)?Block' => '139',
+ 'Devanagari(?:[-_]|\s+)?Block' => '65',
},
'di' => {
- 'Dingbats' => 'Dingbats',
- 'Diacritic' => 'Diacritic',
+ 'Diacritic' => '154',
+ 'Dingbats' => '106',
},
'en' => {
- 'Enclosed(?:[-_]|\s+)?Alphanumerics' => 'Enclosed Alphanumerics',
- 'Enclosed(?:[-_]|\s+)?CJK(?:[-_]|\s+)?Letters(?:[-_]|\s+)?and(?:[-_]|\s+)?Months' => 'Enclosed CJK Letters and Months',
+ 'Enclosed(?:[-_]|\s+)?Alphanumerics' => '101',
+ 'Enclosed(?:[-_]|\s+)?CJK(?:[-_]|\s+)?Letters(?:[-_]|\s+)?and(?:[-_]|\s+)?Months' => '118',
},
'et' => {
- 'ETHIOPIC' => 'ETHIOPIC',
- 'Ethiopic(?:[-_]|\s+)?Block' => 'Ethiopic Block',
+ 'ETHIOPIC' => '35',
+ 'Ethiopic(?:[-_]|\s+)?Block' => '81',
},
'ex' => {
- 'Extender' => 'Extender',
+ 'Extender' => '155',
},
'ge' => {
- 'GEORGIAN' => 'GEORGIAN',
- 'Georgian(?:[-_]|\s+)?Block' => 'Georgian Block',
- 'General(?:[-_]|\s+)?Punctuation' => 'General Punctuation',
- 'Geometric(?:[-_]|\s+)?Shapes' => 'Geometric Shapes',
+ 'GEORGIAN' => '33',
+ 'General(?:[-_]|\s+)?Punctuation' => '90',
+ 'Geometric(?:[-_]|\s+)?Shapes' => '104',
+ 'Georgian(?:[-_]|\s+)?Block' => '79',
},
'go' => {
- 'GOTHIC' => 'GOTHIC',
- 'Gothic(?:[-_]|\s+)?Block' => 'Gothic Block',
+ 'GOTHIC' => '48',
+ 'Gothic(?:[-_]|\s+)?Block' => '138',
},
'gr' => {
- 'GREEK' => 'GREEK',
- 'Greek(?:[-_]|\s+)?Block' => 'Greek Block',
- 'Greek(?:[-_]|\s+)?Extended' => 'Greek Extended',
+ 'GREEK' => '11',
+ 'Greek(?:[-_]|\s+)?Block' => '58',
+ 'Greek(?:[-_]|\s+)?Extended' => '89',
},
'gu' => {
- 'GURMUKHI' => 'GURMUKHI',
- 'GUJARATI' => 'GUJARATI',
- 'Gurmukhi(?:[-_]|\s+)?Block' => 'Gurmukhi Block',
- 'Gujarati(?:[-_]|\s+)?Block' => 'Gujarati Block',
+ 'GUJARATI' => '22',
+ 'GURMUKHI' => '21',
+ 'Gujarati(?:[-_]|\s+)?Block' => '68',
+ 'Gurmukhi(?:[-_]|\s+)?Block' => '67',
},
'ha' => {
- 'HANGUL' => 'HANGUL',
- 'HAN' => 'HAN',
- 'Hangul(?:[-_]|\s+)?Jamo' => 'Hangul Jamo',
- 'Hangul(?:[-_]|\s+)?Compatibility(?:[-_]|\s+)?Jamo' => 'Hangul Compatibility Jamo',
- 'Hangul(?:[-_]|\s+)?Syllables' => 'Hangul Syllables',
- 'Halfwidth(?:[-_]|\s+)?and(?:[-_]|\s+)?Fullwidth(?:[-_]|\s+)?Forms' => 'Halfwidth and Fullwidth Forms',
+ 'HAN' => '42',
+ 'HANGUL' => '34',
+ 'Halfwidth(?:[-_]|\s+)?and(?:[-_]|\s+)?Fullwidth(?:[-_]|\s+)?Forms' => '136',
+ 'Hangul(?:[-_]|\s+)?Compatibility(?:[-_]|\s+)?Jamo' => '115',
+ 'Hangul(?:[-_]|\s+)?Jamo' => '80',
+ 'Hangul(?:[-_]|\s+)?Syllable' => '2',
+ 'Hangul(?:[-_]|\s+)?Syllables' => '124',
},
'he' => {
- 'HEBREW' => 'HEBREW',
- 'Hebrew(?:[-_]|\s+)?Block' => 'Hebrew Block',
- 'Hex(?:[-_]|\s+)?Digit' => 'Hex_Digit',
+ 'HEBREW' => '15',
+ 'Hebrew(?:[-_]|\s+)?Block' => '61',
+ 'Hex(?:[-_]|\s+)?Digit' => '153',
},
'hi' => {
- 'HIRAGANA' => 'HIRAGANA',
- 'Hiragana(?:[-_]|\s+)?Block' => 'Hiragana Block',
- 'High(?:[-_]|\s+)?Surrogates' => 'High Surrogates',
- 'High(?:[-_]|\s+)?Private(?:[-_]|\s+)?Use(?:[-_]|\s+)?Surrogates' => 'High Private Use Surrogates',
+ 'HIRAGANA' => '43',
+ 'High(?:[-_]|\s+)?Private(?:[-_]|\s+)?Use(?:[-_]|\s+)?Surrogates' => '126',
+ 'High(?:[-_]|\s+)?Surrogates' => '125',
+ 'Hiragana(?:[-_]|\s+)?Block' => '112',
},
'hy' => {
- 'Hyphen' => 'Hyphen',
+ 'Hyphen' => '150',
},
'id' => {
- 'Ideographic(?:[-_]|\s+)?Description(?:[-_]|\s+)?Characters' => 'Ideographic Description Characters',
- 'Ideographic' => 'Ideographic',
- 'ID(?:[-_]|\s+)?Start' => 'ID_Start',
- 'ID(?:[-_]|\s+)?Continue' => 'ID_Continue',
+ 'ID(?:[-_]|\s+)?Continue' => '170',
+ 'ID(?:[-_]|\s+)?Start' => '169',
+ 'Ideographic' => '161',
+ 'Ideographic(?:[-_]|\s+)?Description(?:[-_]|\s+)?Characters' => '110',
},
'in' => {
- 'INHERITED' => 'INHERITED',
+ 'INHERITED' => '12',
},
'ip' => {
- 'IPA(?:[-_]|\s+)?Extensions' => 'IPA Extensions',
+ 'IPA(?:[-_]|\s+)?Extensions' => '55',
},
'jo' => {
- 'Join(?:[-_]|\s+)?Control' => 'Join_Control',
+ 'Join(?:[-_]|\s+)?Control' => '158',
},
'ka' => {
- 'KANNADA' => 'KANNADA',
- 'KATAKANA' => 'KATAKANA',
- 'Kannada(?:[-_]|\s+)?Block' => 'Kannada Block',
- 'Kangxi(?:[-_]|\s+)?Radicals' => 'Kangxi Radicals',
- 'Katakana(?:[-_]|\s+)?Block' => 'Katakana Block',
- 'Kanbun' => 'Kanbun',
+ 'KANNADA' => '26',
+ 'KATAKANA' => '44',
+ 'Kanbun' => '116',
+ 'Kangxi(?:[-_]|\s+)?Radicals' => '109',
+ 'Kannada(?:[-_]|\s+)?Block' => '72',
+ 'Katakana(?:[-_]|\s+)?Block' => '113',
},
'kh' => {
- 'KHMER' => 'KHMER',
- 'Khmer(?:[-_]|\s+)?Block' => 'Khmer Block',
+ 'KHMER' => '40',
+ 'Khmer(?:[-_]|\s+)?Block' => '86',
},
'la' => {
- 'LATIN' => 'LATIN',
- 'LAO' => 'LAO',
- 'Latin(?:[-_]|\s+)?1(?:[-_]|\s+)?Supplement' => 'Latin-1 Supplement',
- 'Latin(?:[-_]|\s+)?Extended(?:[-_]|\s+)?A' => 'Latin Extended-A',
- 'Latin(?:[-_]|\s+)?Extended(?:[-_]|\s+)?B' => 'Latin Extended-B',
- 'Lao(?:[-_]|\s+)?Block' => 'Lao Block',
- 'Latin(?:[-_]|\s+)?Extended(?:[-_]|\s+)?Additional' => 'Latin Extended Additional',
- 'Lampersand' => 'Lampersand',
+ 'LAO' => '30',
+ 'LATIN' => '10',
+ 'Lampersand' => '168',
+ 'Lao(?:[-_]|\s+)?Block' => '76',
+ 'Latin(?:[-_]|\s+)?Extended(?:[-_]|\s+)?Additional' => '88',
+ 'Latin(?:[-_]|\s+)?Extended(?:[-_]|\s+)?A' => '53',
+ 'Latin(?:[-_]|\s+)?Extended(?:[-_]|\s+)?B' => '54',
+ 'Latin(?:[-_]|\s+)?1(?:[-_]|\s+)?Supplement' => '52',
},
'le' => {
- 'Letterlike(?:[-_]|\s+)?Symbols' => 'Letterlike Symbols',
+ 'Letterlike(?:[-_]|\s+)?Symbols' => '94',
},
'lo' => {
- 'Low(?:[-_]|\s+)?Surrogates' => 'Low Surrogates',
- 'Lowercase' => 'Lowercase',
+ 'Low(?:[-_]|\s+)?Surrogate' => '5',
+ 'Low(?:[-_]|\s+)?Surrogates' => '127',
+ 'Lowercase' => '165',
},
'ma' => {
- 'MALAYALAM' => 'MALAYALAM',
- 'Malayalam(?:[-_]|\s+)?Block' => 'Malayalam Block',
- 'Mathematical(?:[-_]|\s+)?Operators' => 'Mathematical Operators',
- 'Mathematical(?:[-_]|\s+)?Alphanumeric(?:[-_]|\s+)?Symbols' => 'Mathematical Alphanumeric Symbols',
- 'Math' => 'Math',
+ 'MALAYALAM' => '27',
+ 'Malayalam(?:[-_]|\s+)?Block' => '73',
+ 'Math' => '167',
+ 'Mathematical(?:[-_]|\s+)?Alphanumeric(?:[-_]|\s+)?Symbols' => '142',
+ 'Mathematical(?:[-_]|\s+)?Operators' => '97',
},
'mi' => {
- 'Miscellaneous(?:[-_]|\s+)?Technical' => 'Miscellaneous Technical',
- 'Miscellaneous(?:[-_]|\s+)?Symbols' => 'Miscellaneous Symbols',
+ 'Miscellaneous(?:[-_]|\s+)?Symbols' => '105',
+ 'Miscellaneous(?:[-_]|\s+)?Technical' => '98',
},
'mo' => {
- 'MONGOLIAN' => 'MONGOLIAN',
- 'Mongolian(?:[-_]|\s+)?Block' => 'Mongolian Block',
+ 'MONGOLIAN' => '41',
+ 'Mongolian(?:[-_]|\s+)?Block' => '87',
},
'mu' => {
- 'Musical(?:[-_]|\s+)?Symbols' => 'Musical Symbols',
+ 'Musical(?:[-_]|\s+)?Symbols' => '141',
},
'my' => {
- 'MYANMAR' => 'MYANMAR',
- 'Myanmar(?:[-_]|\s+)?Block' => 'Myanmar Block',
+ 'MYANMAR' => '32',
+ 'Myanmar(?:[-_]|\s+)?Block' => '78',
},
'no' => {
- 'Noncharacter(?:[-_]|\s+)?Code(?:[-_]|\s+)?Point' => 'Noncharacter_Code_Point',
+ 'Non(?:[-_]|\s+)?Private(?:[-_]|\s+)?Use(?:[-_]|\s+)?High(?:[-_]|\s+)?Surrogate' => '3',
+ 'Noncharacter(?:[-_]|\s+)?Code(?:[-_]|\s+)?Point' => '162',
},
'nu' => {
- 'Number(?:[-_]|\s+)?Forms' => 'Number Forms',
+ 'Number(?:[-_]|\s+)?Forms' => '95',
},
'og' => {
- 'OGHAM' => 'OGHAM',
- 'Ogham(?:[-_]|\s+)?Block' => 'Ogham Block',
+ 'OGHAM' => '38',
+ 'Ogham(?:[-_]|\s+)?Block' => '84',
},
'ol' => {
- 'OLD(?:[-_]|\s+)?ITALIC' => 'OLD-ITALIC',
- 'Old(?:[-_]|\s+)?Italic' => 'Old Italic',
+ 'OLD(?:[-_]|\s+)?ITALIC' => '47',
+ 'Old(?:[-_]|\s+)?Italic' => '137',
},
'op' => {
- 'Optical(?:[-_]|\s+)?Character(?:[-_]|\s+)?Recognition' => 'Optical Character Recognition',
+ 'Optical(?:[-_]|\s+)?Character(?:[-_]|\s+)?Recognition' => '100',
},
'or' => {
- 'ORIYA' => 'ORIYA',
- 'Oriya(?:[-_]|\s+)?Block' => 'Oriya Block',
+ 'ORIYA' => '23',
+ 'Oriya(?:[-_]|\s+)?Block' => '69',
},
'ot' => {
- 'Other(?:[-_]|\s+)?Math' => 'Other_Math',
- 'Other(?:[-_]|\s+)?Alphabetic' => 'Other_Alphabetic',
- 'Other(?:[-_]|\s+)?Lowercase' => 'Other_Lowercase',
- 'Other(?:[-_]|\s+)?Uppercase' => 'Other_Uppercase',
+ 'Other(?:[-_]|\s+)?Alphabetic' => '157',
+ 'Other(?:[-_]|\s+)?Lowercase' => '156',
+ 'Other(?:[-_]|\s+)?Math' => '149',
+ 'Other(?:[-_]|\s+)?Uppercase' => '160',
+},
+'pl' => {
+ 'Plane(?:[-_]|\s+)?15(?:[-_]|\s+)?Private(?:[-_]|\s+)?Use' => '8',
+ 'Plane(?:[-_]|\s+)?16(?:[-_]|\s+)?Private(?:[-_]|\s+)?Use' => '9',
},
'pr' => {
- 'Private(?:[-_]|\s+)?Use' => 'Private Use',
+ 'Private(?:[-_]|\s+)?Use' => '6',
+ 'Private(?:[-_]|\s+)?Use(?:[-_]|\s+)?High(?:[-_]|\s+)?Surrogate' => '4',
},
'qu' => {
- 'Quotation(?:[-_]|\s+)?Mark' => 'Quotation_Mark',
+ 'Quotation(?:[-_]|\s+)?Mark' => '148',
},
'ru' => {
- 'RUNIC' => 'RUNIC',
- 'Runic(?:[-_]|\s+)?Block' => 'Runic Block',
+ 'RUNIC' => '39',
+ 'Runic(?:[-_]|\s+)?Block' => '85',
},
'si' => {
- 'SINHALA' => 'SINHALA',
- 'Sinhala(?:[-_]|\s+)?Block' => 'Sinhala Block',
+ 'SINHALA' => '28',
+ 'Sinhala(?:[-_]|\s+)?Block' => '74',
},
'sm' => {
- 'Small(?:[-_]|\s+)?Form(?:[-_]|\s+)?Variants' => 'Small Form Variants',
+ 'Small(?:[-_]|\s+)?Form(?:[-_]|\s+)?Variants' => '133',
},
'sp' => {
- 'Spacing(?:[-_]|\s+)?Modifier(?:[-_]|\s+)?Letters' => 'Spacing Modifier Letters',
- 'Specials' => 'Specials',
+ 'Spacing(?:[-_]|\s+)?Modifier(?:[-_]|\s+)?Letters' => '56',
+ 'Specials' => '135',
},
'su' => {
- 'Superscripts(?:[-_]|\s+)?and(?:[-_]|\s+)?Subscripts' => 'Superscripts and Subscripts',
+ 'Superscripts(?:[-_]|\s+)?and(?:[-_]|\s+)?Subscripts' => '91',
},
'sy' => {
- 'SYRIAC' => 'SYRIAC',
- 'Syriac(?:[-_]|\s+)?Block' => 'Syriac Block',
+ 'SYRIAC' => '17',
+ 'Syriac(?:[-_]|\s+)?Block' => '63',
},
'ta' => {
- 'TAMIL' => 'TAMIL',
- 'Tamil(?:[-_]|\s+)?Block' => 'Tamil Block',
- 'Tags' => 'Tags',
+ 'TAMIL' => '24',
+ 'Tags' => '145',
+ 'Tamil(?:[-_]|\s+)?Block' => '70',
},
'te' => {
- 'TELUGU' => 'TELUGU',
- 'Telugu(?:[-_]|\s+)?Block' => 'Telugu Block',
- 'Terminal(?:[-_]|\s+)?Punctuation' => 'Terminal_Punctuation',
+ 'TELUGU' => '25',
+ 'Telugu(?:[-_]|\s+)?Block' => '71',
+ 'Terminal(?:[-_]|\s+)?Punctuation' => '147',
},
'th' => {
- 'THAANA' => 'THAANA',
- 'THAI' => 'THAI',
- 'Thaana(?:[-_]|\s+)?Block' => 'Thaana Block',
- 'Thai(?:[-_]|\s+)?Block' => 'Thai Block',
+ 'THAANA' => '18',
+ 'THAI' => '29',
+ 'Thaana(?:[-_]|\s+)?Block' => '64',
+ 'Thai(?:[-_]|\s+)?Block' => '75',
},
'ti' => {
- 'TIBETAN' => 'TIBETAN',
- 'Tibetan(?:[-_]|\s+)?Block' => 'Tibetan Block',
+ 'TIBETAN' => '31',
+ 'Tibetan(?:[-_]|\s+)?Block' => '77',
},
'un' => {
- 'Unified(?:[-_]|\s+)?Canadian(?:[-_]|\s+)?Aboriginal(?:[-_]|\s+)?Syllabics' => 'Unified Canadian Aboriginal Syllabics',
+ 'Unified(?:[-_]|\s+)?Canadian(?:[-_]|\s+)?Aboriginal(?:[-_]|\s+)?Syllabics' => '83',
},
'up' => {
- 'Uppercase' => 'Uppercase',
+ 'Uppercase' => '166',
},
'wh' => {
- 'White(?:[-_]|\s+)?space' => 'White_space',
+ 'White(?:[-_]|\s+)?space' => '146',
},
'yi' => {
- 'YI' => 'YI',
- 'Yi(?:[-_]|\s+)?Syllables' => 'Yi Syllables',
- 'Yi(?:[-_]|\s+)?Radicals' => 'Yi Radicals',
+ 'YI' => '46',
+ 'Yi(?:[-_]|\s+)?Radicals' => '123',
+ 'Yi(?:[-_]|\s+)?Syllables' => '122',
},
);