diff options
Diffstat (limited to 'lib/unicore/In.pl')
-rw-r--r-- | lib/unicore/In.pl | 680 |
1 files changed, 351 insertions, 329 deletions
diff --git a/lib/unicore/In.pl b/lib/unicore/In.pl index 9e410f9420..cd872faac1 100644 --- a/lib/unicore/In.pl +++ b/lib/unicore/In.pl @@ -1,469 +1,491 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.txt. +# This file is built by mktables from e.g. Unicode.txt. # Any changes made here will be lost! -%utf8::In = ( -'LATIN' => 0, -'GREEK' => 1, -'CYRILLIC' => 2, -'ARMENIAN' => 3, -'HEBREW' => 4, -'ARABIC' => 5, -'SYRIAC' => 6, -'THAANA' => 7, -'DEVANAGARI' => 8, -'BENGALI' => 9, -'GURMUKHI' => 10, -'GUJARATI' => 11, -'ORIYA' => 12, -'TAMIL' => 13, -'TELUGU' => 14, -'KANNADA' => 15, -'MALAYALAM' => 16, -'SINHALA' => 17, -'THAI' => 18, -'LAO' => 19, -'TIBETAN' => 20, -'MYANMAR' => 21, -'GEORGIAN' => 22, -'HANGUL' => 23, -'ETHIOPIC' => 24, -'CHEROKEE' => 25, -'CANADIAN-ABORIGINAL' => 26, -'OGHAM' => 27, -'RUNIC' => 28, -'KHMER' => 29, -'MONGOLIAN' => 30, -'HIRAGANA' => 31, -'KATAKANA' => 32, -'BOPOMOFO' => 33, -'HAN' => 34, -'YI' => 35, -'OLD-ITALIC' => 36, -'GOTHIC' => 37, -'DESERET' => 38, -'INHERITED' => 39, -'Basic Latin' => 40, -'Latin-1 Supplement' => 41, -'Latin Extended-A' => 42, -'Latin Extended-B' => 43, -'IPA Extensions' => 44, -'Spacing Modifier Letters' => 45, -'Combining Diacritical Marks' => 46, -'Greek Block' => 47, -'Cyrillic Block' => 48, -'Armenian Block' => 49, -'Hebrew Block' => 50, -'Arabic Block' => 51, -'Syriac Block' => 52, -'Thaana Block' => 53, -'Devanagari Block' => 54, -'Bengali Block' => 55, -'Gurmukhi Block' => 56, -'Gujarati Block' => 57, -'Oriya Block' => 58, -'Tamil Block' => 59, -'Telugu Block' => 60, -'Kannada Block' => 61, -'Malayalam Block' => 62, -'Sinhala Block' => 63, -'Thai Block' => 64, -'Lao Block' => 65, -'Tibetan Block' => 66, -'Myanmar Block' => 67, -'Georgian Block' => 68, -'Hangul Jamo' => 69, -'Ethiopic Block' => 70, -'Cherokee Block' => 71, -'Unified Canadian Aboriginal Syllabics' => 72, -'Ogham Block' => 73, -'Runic Block' => 74, -'Khmer Block' => 75, -'Mongolian Block' => 76, -'Latin Extended Additional' => 77, -'Greek Extended' => 78, -'General Punctuation' => 79, -'Superscripts and Subscripts' => 80, -'Currency Symbols' => 81, -'Combining Marks for Symbols' => 82, -'Letterlike Symbols' => 83, -'Number Forms' => 84, -'Arrows' => 85, -'Mathematical Operators' => 86, -'Miscellaneous Technical' => 87, -'Control Pictures' => 88, -'Optical Character Recognition' => 89, -'Enclosed Alphanumerics' => 90, -'Box Drawing' => 91, -'Block Elements' => 92, -'Geometric Shapes' => 93, -'Miscellaneous Symbols' => 94, -'Dingbats' => 95, -'Braille Patterns' => 96, -'CJK Radicals Supplement' => 97, -'Kangxi Radicals' => 98, -'Ideographic Description Characters' => 99, -'CJK Symbols and Punctuation' => 100, -'Hiragana Block' => 101, -'Katakana Block' => 102, -'Bopomofo Block' => 103, -'Hangul Compatibility Jamo' => 104, -'Kanbun' => 105, -'Bopomofo Extended' => 106, -'Enclosed CJK Letters and Months' => 107, -'CJK Compatibility' => 108, -'CJK Unified Ideographs Extension A' => 109, -'CJK Unified Ideographs' => 110, -'Yi Syllables' => 111, -'Yi Radicals' => 112, -'Hangul Syllables' => 113, -'High Surrogates' => 114, -'High Private Use Surrogates' => 115, -'Low Surrogates' => 116, -'Private Use' => 117, -'CJK Compatibility Ideographs' => 118, -'Alphabetic Presentation Forms' => 119, -'Arabic Presentation Forms-A' => 120, -'Combining Half Marks' => 121, -'CJK Compatibility Forms' => 122, -'Small Form Variants' => 123, -'Arabic Presentation Forms-B' => 124, -'Specials' => 125, -'Halfwidth and Fullwidth Forms' => 126, -'Old Italic' => 127, -'Gothic Block' => 128, -'Deseret Block' => 129, -'Byzantine Musical Symbols' => 130, -'Musical Symbols' => 131, -'Mathematical Alphanumeric Symbols' => 132, -'CJK Unified Ideographs Extension B' => 133, -'CJK Compatibility Ideographs Supplement' => 134, -'Tags' => 135, -'Common' => 136, -'Any' => 137, -'White_space' => 138, -'Bidi_Control' => 139, -'Join_Control' => 140, -'Dash' => 141, -'Hyphen' => 142, -'Quotation_Mark' => 143, -'Terminal_Punctuation' => 144, -'Other_Math' => 145, -'Hex_Digit' => 146, -'ASCII_Hex_Digit' => 147, -'Other_Alphabetic' => 148, -'Ideographic' => 149, -'Diacritic' => 150, -'Extender' => 151, -'Other_Lowercase' => 152, -'Other_Uppercase' => 153, -'Noncharacter_Code_Point' => 154, -'Assigned' => 155, -'Alphabetic' => 156, -'Lowercase' => 157, -'Uppercase' => 158, -'Math' => 159, -'Lampersand' => 160, -'ID_Start' => 161, -'ID_Continue' => 162, +%utf8::In = +( +'ARABIC' => '16', +'ARMENIAN' => '14', +'ASCII_Hex_Digit' => '152', +'Alphabetic' => '164', +'Alphabetic Presentation Forms' => '129', +'Any' => '171', +'Arabic Block' => '62', +'Arabic Presentation Forms-A' => '130', +'Arabic Presentation Forms-B' => '134', +'Armenian Block' => '60', +'Arrows' => '96', +'Assigned' => '163', +'BENGALI' => '20', +'BOPOMOFO' => '45', +'Basic Latin' => '51', +'Bengali Block' => '66', +'Bidi_Control' => '159', +'Block Elements' => '103', +'Bopomofo Block' => '114', +'Bopomofo Extended' => '117', +'Box Drawing' => '102', +'Braille Patterns' => '107', +'Byzantine Musical Symbols' => '140', +'CANADIAN-ABORIGINAL' => '37', +'CHEROKEE' => '36', +'CJK Compatibility' => '119', +'CJK Compatibility Forms' => '132', +'CJK Compatibility Ideographs' => '128', +'CJK Compatibility Ideographs Supplement' => '144', +'CJK Ideograph' => '1', +'CJK Ideograph Extension A' => '0', +'CJK Ideograph Extension B' => '7', +'CJK Radicals Supplement' => '108', +'CJK Symbols and Punctuation' => '111', +'CJK Unified Ideographs' => '121', +'CJK Unified Ideographs Extension A' => '120', +'CJK Unified Ideographs Extension B' => '143', +'CYRILLIC' => '13', +'Cherokee Block' => '82', +'Combining Diacritical Marks' => '57', +'Combining Half Marks' => '131', +'Combining Marks for Symbols' => '93', +'Common' => '50', +'Control Pictures' => '99', +'Currency Symbols' => '92', +'Cyrillic Block' => '59', +'DESERET' => '49', +'DEVANAGARI' => '19', +'Dash' => '151', +'Deseret Block' => '139', +'Devanagari Block' => '65', +'Diacritic' => '154', +'Dingbats' => '106', +'ETHIOPIC' => '35', +'Enclosed Alphanumerics' => '101', +'Enclosed CJK Letters and Months' => '118', +'Ethiopic Block' => '81', +'Extender' => '155', +'GEORGIAN' => '33', +'GOTHIC' => '48', +'GREEK' => '11', +'GUJARATI' => '22', +'GURMUKHI' => '21', +'General Punctuation' => '90', +'Geometric Shapes' => '104', +'Georgian Block' => '79', +'Gothic Block' => '138', +'Greek Block' => '58', +'Greek Extended' => '89', +'Gujarati Block' => '68', +'Gurmukhi Block' => '67', +'HAN' => '42', +'HANGUL' => '34', +'HEBREW' => '15', +'HIRAGANA' => '43', +'Halfwidth and Fullwidth Forms' => '136', +'Hangul Compatibility Jamo' => '115', +'Hangul Jamo' => '80', +'Hangul Syllable' => '2', +'Hangul Syllables' => '124', +'Hebrew Block' => '61', +'Hex_Digit' => '153', +'High Private Use Surrogates' => '126', +'High Surrogates' => '125', +'Hiragana Block' => '112', +'Hyphen' => '150', +'ID_Continue' => '170', +'ID_Start' => '169', +'INHERITED' => '12', +'IPA Extensions' => '55', +'Ideographic' => '161', +'Ideographic Description Characters' => '110', +'Join_Control' => '158', +'KANNADA' => '26', +'KATAKANA' => '44', +'KHMER' => '40', +'Kanbun' => '116', +'Kangxi Radicals' => '109', +'Kannada Block' => '72', +'Katakana Block' => '113', +'Khmer Block' => '86', +'LAO' => '30', +'LATIN' => '10', +'Lampersand' => '168', +'Lao Block' => '76', +'Latin Extended Additional' => '88', +'Latin Extended-A' => '53', +'Latin Extended-B' => '54', +'Latin-1 Supplement' => '52', +'Letterlike Symbols' => '94', +'Low Surrogate' => '5', +'Low Surrogates' => '127', +'Lowercase' => '165', +'MALAYALAM' => '27', +'MONGOLIAN' => '41', +'MYANMAR' => '32', +'Malayalam Block' => '73', +'Math' => '167', +'Mathematical Alphanumeric Symbols' => '142', +'Mathematical Operators' => '97', +'Miscellaneous Symbols' => '105', +'Miscellaneous Technical' => '98', +'Mongolian Block' => '87', +'Musical Symbols' => '141', +'Myanmar Block' => '78', +'Non Private Use High Surrogate' => '3', +'Noncharacter_Code_Point' => '162', +'Number Forms' => '95', +'OGHAM' => '38', +'OLD-ITALIC' => '47', +'ORIYA' => '23', +'Ogham Block' => '84', +'Old Italic' => '137', +'Optical Character Recognition' => '100', +'Oriya Block' => '69', +'Other_Alphabetic' => '157', +'Other_Lowercase' => '156', +'Other_Math' => '149', +'Other_Uppercase' => '160', +'Plane 15 Private Use' => '8', +'Plane 16 Private Use' => '9', +'Private Use' => '6', +'Private Use High Surrogate' => '4', +'Quotation_Mark' => '148', +'RUNIC' => '39', +'Runic Block' => '85', +'SINHALA' => '28', +'SYRIAC' => '17', +'Sinhala Block' => '74', +'Small Form Variants' => '133', +'Spacing Modifier Letters' => '56', +'Specials' => '135', +'Superscripts and Subscripts' => '91', +'Syriac Block' => '63', +'TAMIL' => '24', +'TELUGU' => '25', +'THAANA' => '18', +'THAI' => '29', +'TIBETAN' => '31', +'Tags' => '145', +'Tamil Block' => '70', +'Telugu Block' => '71', +'Terminal_Punctuation' => '147', +'Thaana Block' => '64', +'Thai Block' => '75', +'Tibetan Block' => '77', +'Unified Canadian Aboriginal Syllabics' => '83', +'Uppercase' => '166', +'White_space' => '146', +'YI' => '46', +'Yi Radicals' => '123', +'Yi Syllables' => '122', ); -%utf8::InPat = ( +%utf8::InPat = +( 'al' => { - 'Alphabetic(?:[-_]|\s+)?Presentation(?:[-_]|\s+)?Forms' => 'Alphabetic Presentation Forms', - 'Alphabetic' => 'Alphabetic', + 'Alphabetic' => '164', + 'Alphabetic(?:[-_]|\s+)?Presentation(?:[-_]|\s+)?Forms' => '129', }, 'an' => { - 'Any' => 'Any', + 'Any' => '171', }, 'ar' => { - 'ARMENIAN' => 'ARMENIAN', - 'ARABIC' => 'ARABIC', - 'Armenian(?:[-_]|\s+)?Block' => 'Armenian Block', - 'Arabic(?:[-_]|\s+)?Block' => 'Arabic Block', - 'Arrows' => 'Arrows', - 'Arabic(?:[-_]|\s+)?Presentation(?:[-_]|\s+)?Forms(?:[-_]|\s+)?A' => 'Arabic Presentation Forms-A', - 'Arabic(?:[-_]|\s+)?Presentation(?:[-_]|\s+)?Forms(?:[-_]|\s+)?B' => 'Arabic Presentation Forms-B', + 'ARABIC' => '16', + 'ARMENIAN' => '14', + 'Arabic(?:[-_]|\s+)?Block' => '62', + 'Arabic(?:[-_]|\s+)?Presentation(?:[-_]|\s+)?Forms(?:[-_]|\s+)?A' => '130', + 'Arabic(?:[-_]|\s+)?Presentation(?:[-_]|\s+)?Forms(?:[-_]|\s+)?B' => '134', + 'Armenian(?:[-_]|\s+)?Block' => '60', + 'Arrows' => '96', }, 'as' => { - 'ASCII(?:[-_]|\s+)?Hex(?:[-_]|\s+)?Digit' => 'ASCII_Hex_Digit', - 'Assigned' => 'Assigned', + 'ASCII(?:[-_]|\s+)?Hex(?:[-_]|\s+)?Digit' => '152', + 'Assigned' => '163', }, 'ba' => { - 'Basic(?:[-_]|\s+)?Latin' => 'Basic Latin', + 'Basic(?:[-_]|\s+)?Latin' => '51', }, 'be' => { - 'BENGALI' => 'BENGALI', - 'Bengali(?:[-_]|\s+)?Block' => 'Bengali Block', + 'BENGALI' => '20', + 'Bengali(?:[-_]|\s+)?Block' => '66', }, 'bi' => { - 'Bidi(?:[-_]|\s+)?Control' => 'Bidi_Control', + 'Bidi(?:[-_]|\s+)?Control' => '159', }, 'bl' => { - 'Block(?:[-_]|\s+)?Elements' => 'Block Elements', + 'Block(?:[-_]|\s+)?Elements' => '103', }, 'bo' => { - 'BOPOMOFO' => 'BOPOMOFO', - 'Box(?:[-_]|\s+)?Drawing' => 'Box Drawing', - 'Bopomofo(?:[-_]|\s+)?Block' => 'Bopomofo Block', - 'Bopomofo(?:[-_]|\s+)?Extended' => 'Bopomofo Extended', + 'BOPOMOFO' => '45', + 'Bopomofo(?:[-_]|\s+)?Block' => '114', + 'Bopomofo(?:[-_]|\s+)?Extended' => '117', + 'Box(?:[-_]|\s+)?Drawing' => '102', }, 'br' => { - 'Braille(?:[-_]|\s+)?Patterns' => 'Braille Patterns', + 'Braille(?:[-_]|\s+)?Patterns' => '107', }, 'by' => { - 'Byzantine(?:[-_]|\s+)?Musical(?:[-_]|\s+)?Symbols' => 'Byzantine Musical Symbols', + 'Byzantine(?:[-_]|\s+)?Musical(?:[-_]|\s+)?Symbols' => '140', }, 'ca' => { - 'CANADIAN(?:[-_]|\s+)?ABORIGINAL' => 'CANADIAN-ABORIGINAL', + 'CANADIAN(?:[-_]|\s+)?ABORIGINAL' => '37', }, 'ch' => { - 'CHEROKEE' => 'CHEROKEE', - 'Cherokee(?:[-_]|\s+)?Block' => 'Cherokee Block', + 'CHEROKEE' => '36', + 'Cherokee(?:[-_]|\s+)?Block' => '82', }, 'cj' => { - 'CJK(?:[-_]|\s+)?Radicals(?:[-_]|\s+)?Supplement' => 'CJK Radicals Supplement', - 'CJK(?:[-_]|\s+)?Symbols(?:[-_]|\s+)?and(?:[-_]|\s+)?Punctuation' => 'CJK Symbols and Punctuation', - 'CJK(?:[-_]|\s+)?Compatibility' => 'CJK Compatibility', - 'CJK(?:[-_]|\s+)?Unified(?:[-_]|\s+)?Ideographs(?:[-_]|\s+)?Extension(?:[-_]|\s+)?A' => 'CJK Unified Ideographs Extension A', - 'CJK(?:[-_]|\s+)?Unified(?:[-_]|\s+)?Ideographs' => 'CJK Unified Ideographs', - 'CJK(?:[-_]|\s+)?Compatibility(?:[-_]|\s+)?Ideographs' => 'CJK Compatibility Ideographs', - 'CJK(?:[-_]|\s+)?Compatibility(?:[-_]|\s+)?Forms' => 'CJK Compatibility Forms', - 'CJK(?:[-_]|\s+)?Unified(?:[-_]|\s+)?Ideographs(?:[-_]|\s+)?Extension(?:[-_]|\s+)?B' => 'CJK Unified Ideographs Extension B', - 'CJK(?:[-_]|\s+)?Compatibility(?:[-_]|\s+)?Ideographs(?:[-_]|\s+)?Supplement' => 'CJK Compatibility Ideographs Supplement', + 'CJK(?:[-_]|\s+)?Compatibility' => '119', + 'CJK(?:[-_]|\s+)?Compatibility(?:[-_]|\s+)?Forms' => '132', + 'CJK(?:[-_]|\s+)?Compatibility(?:[-_]|\s+)?Ideographs' => '128', + 'CJK(?:[-_]|\s+)?Compatibility(?:[-_]|\s+)?Ideographs(?:[-_]|\s+)?Supplement' => '144', + 'CJK(?:[-_]|\s+)?Ideograph' => '1', + 'CJK(?:[-_]|\s+)?Ideograph(?:[-_]|\s+)?Extension(?:[-_]|\s+)?A' => '0', + 'CJK(?:[-_]|\s+)?Ideograph(?:[-_]|\s+)?Extension(?:[-_]|\s+)?B' => '7', + 'CJK(?:[-_]|\s+)?Radicals(?:[-_]|\s+)?Supplement' => '108', + 'CJK(?:[-_]|\s+)?Symbols(?:[-_]|\s+)?and(?:[-_]|\s+)?Punctuation' => '111', + 'CJK(?:[-_]|\s+)?Unified(?:[-_]|\s+)?Ideographs' => '121', + 'CJK(?:[-_]|\s+)?Unified(?:[-_]|\s+)?Ideographs(?:[-_]|\s+)?Extension(?:[-_]|\s+)?A' => '120', + 'CJK(?:[-_]|\s+)?Unified(?:[-_]|\s+)?Ideographs(?:[-_]|\s+)?Extension(?:[-_]|\s+)?B' => '143', }, 'co' => { - 'Combining(?:[-_]|\s+)?Diacritical(?:[-_]|\s+)?Marks' => 'Combining Diacritical Marks', - 'Combining(?:[-_]|\s+)?Marks(?:[-_]|\s+)?for(?:[-_]|\s+)?Symbols' => 'Combining Marks for Symbols', - 'Control(?:[-_]|\s+)?Pictures' => 'Control Pictures', - 'Combining(?:[-_]|\s+)?Half(?:[-_]|\s+)?Marks' => 'Combining Half Marks', - 'Common' => 'Common', + 'Combining(?:[-_]|\s+)?Diacritical(?:[-_]|\s+)?Marks' => '57', + 'Combining(?:[-_]|\s+)?Half(?:[-_]|\s+)?Marks' => '131', + 'Combining(?:[-_]|\s+)?Marks(?:[-_]|\s+)?for(?:[-_]|\s+)?Symbols' => '93', + 'Common' => '50', + 'Control(?:[-_]|\s+)?Pictures' => '99', }, 'cu' => { - 'Currency(?:[-_]|\s+)?Symbols' => 'Currency Symbols', + 'Currency(?:[-_]|\s+)?Symbols' => '92', }, 'cy' => { - 'CYRILLIC' => 'CYRILLIC', - 'Cyrillic(?:[-_]|\s+)?Block' => 'Cyrillic Block', + 'CYRILLIC' => '13', + 'Cyrillic(?:[-_]|\s+)?Block' => '59', }, 'da' => { - 'Dash' => 'Dash', + 'Dash' => '151', }, 'de' => { - 'DEVANAGARI' => 'DEVANAGARI', - 'DESERET' => 'DESERET', - 'Devanagari(?:[-_]|\s+)?Block' => 'Devanagari Block', - 'Deseret(?:[-_]|\s+)?Block' => 'Deseret Block', + 'DESERET' => '49', + 'DEVANAGARI' => '19', + 'Deseret(?:[-_]|\s+)?Block' => '139', + 'Devanagari(?:[-_]|\s+)?Block' => '65', }, 'di' => { - 'Dingbats' => 'Dingbats', - 'Diacritic' => 'Diacritic', + 'Diacritic' => '154', + 'Dingbats' => '106', }, 'en' => { - 'Enclosed(?:[-_]|\s+)?Alphanumerics' => 'Enclosed Alphanumerics', - 'Enclosed(?:[-_]|\s+)?CJK(?:[-_]|\s+)?Letters(?:[-_]|\s+)?and(?:[-_]|\s+)?Months' => 'Enclosed CJK Letters and Months', + 'Enclosed(?:[-_]|\s+)?Alphanumerics' => '101', + 'Enclosed(?:[-_]|\s+)?CJK(?:[-_]|\s+)?Letters(?:[-_]|\s+)?and(?:[-_]|\s+)?Months' => '118', }, 'et' => { - 'ETHIOPIC' => 'ETHIOPIC', - 'Ethiopic(?:[-_]|\s+)?Block' => 'Ethiopic Block', + 'ETHIOPIC' => '35', + 'Ethiopic(?:[-_]|\s+)?Block' => '81', }, 'ex' => { - 'Extender' => 'Extender', + 'Extender' => '155', }, 'ge' => { - 'GEORGIAN' => 'GEORGIAN', - 'Georgian(?:[-_]|\s+)?Block' => 'Georgian Block', - 'General(?:[-_]|\s+)?Punctuation' => 'General Punctuation', - 'Geometric(?:[-_]|\s+)?Shapes' => 'Geometric Shapes', + 'GEORGIAN' => '33', + 'General(?:[-_]|\s+)?Punctuation' => '90', + 'Geometric(?:[-_]|\s+)?Shapes' => '104', + 'Georgian(?:[-_]|\s+)?Block' => '79', }, 'go' => { - 'GOTHIC' => 'GOTHIC', - 'Gothic(?:[-_]|\s+)?Block' => 'Gothic Block', + 'GOTHIC' => '48', + 'Gothic(?:[-_]|\s+)?Block' => '138', }, 'gr' => { - 'GREEK' => 'GREEK', - 'Greek(?:[-_]|\s+)?Block' => 'Greek Block', - 'Greek(?:[-_]|\s+)?Extended' => 'Greek Extended', + 'GREEK' => '11', + 'Greek(?:[-_]|\s+)?Block' => '58', + 'Greek(?:[-_]|\s+)?Extended' => '89', }, 'gu' => { - 'GURMUKHI' => 'GURMUKHI', - 'GUJARATI' => 'GUJARATI', - 'Gurmukhi(?:[-_]|\s+)?Block' => 'Gurmukhi Block', - 'Gujarati(?:[-_]|\s+)?Block' => 'Gujarati Block', + 'GUJARATI' => '22', + 'GURMUKHI' => '21', + 'Gujarati(?:[-_]|\s+)?Block' => '68', + 'Gurmukhi(?:[-_]|\s+)?Block' => '67', }, 'ha' => { - 'HANGUL' => 'HANGUL', - 'HAN' => 'HAN', - 'Hangul(?:[-_]|\s+)?Jamo' => 'Hangul Jamo', - 'Hangul(?:[-_]|\s+)?Compatibility(?:[-_]|\s+)?Jamo' => 'Hangul Compatibility Jamo', - 'Hangul(?:[-_]|\s+)?Syllables' => 'Hangul Syllables', - 'Halfwidth(?:[-_]|\s+)?and(?:[-_]|\s+)?Fullwidth(?:[-_]|\s+)?Forms' => 'Halfwidth and Fullwidth Forms', + 'HAN' => '42', + 'HANGUL' => '34', + 'Halfwidth(?:[-_]|\s+)?and(?:[-_]|\s+)?Fullwidth(?:[-_]|\s+)?Forms' => '136', + 'Hangul(?:[-_]|\s+)?Compatibility(?:[-_]|\s+)?Jamo' => '115', + 'Hangul(?:[-_]|\s+)?Jamo' => '80', + 'Hangul(?:[-_]|\s+)?Syllable' => '2', + 'Hangul(?:[-_]|\s+)?Syllables' => '124', }, 'he' => { - 'HEBREW' => 'HEBREW', - 'Hebrew(?:[-_]|\s+)?Block' => 'Hebrew Block', - 'Hex(?:[-_]|\s+)?Digit' => 'Hex_Digit', + 'HEBREW' => '15', + 'Hebrew(?:[-_]|\s+)?Block' => '61', + 'Hex(?:[-_]|\s+)?Digit' => '153', }, 'hi' => { - 'HIRAGANA' => 'HIRAGANA', - 'Hiragana(?:[-_]|\s+)?Block' => 'Hiragana Block', - 'High(?:[-_]|\s+)?Surrogates' => 'High Surrogates', - 'High(?:[-_]|\s+)?Private(?:[-_]|\s+)?Use(?:[-_]|\s+)?Surrogates' => 'High Private Use Surrogates', + 'HIRAGANA' => '43', + 'High(?:[-_]|\s+)?Private(?:[-_]|\s+)?Use(?:[-_]|\s+)?Surrogates' => '126', + 'High(?:[-_]|\s+)?Surrogates' => '125', + 'Hiragana(?:[-_]|\s+)?Block' => '112', }, 'hy' => { - 'Hyphen' => 'Hyphen', + 'Hyphen' => '150', }, 'id' => { - 'Ideographic(?:[-_]|\s+)?Description(?:[-_]|\s+)?Characters' => 'Ideographic Description Characters', - 'Ideographic' => 'Ideographic', - 'ID(?:[-_]|\s+)?Start' => 'ID_Start', - 'ID(?:[-_]|\s+)?Continue' => 'ID_Continue', + 'ID(?:[-_]|\s+)?Continue' => '170', + 'ID(?:[-_]|\s+)?Start' => '169', + 'Ideographic' => '161', + 'Ideographic(?:[-_]|\s+)?Description(?:[-_]|\s+)?Characters' => '110', }, 'in' => { - 'INHERITED' => 'INHERITED', + 'INHERITED' => '12', }, 'ip' => { - 'IPA(?:[-_]|\s+)?Extensions' => 'IPA Extensions', + 'IPA(?:[-_]|\s+)?Extensions' => '55', }, 'jo' => { - 'Join(?:[-_]|\s+)?Control' => 'Join_Control', + 'Join(?:[-_]|\s+)?Control' => '158', }, 'ka' => { - 'KANNADA' => 'KANNADA', - 'KATAKANA' => 'KATAKANA', - 'Kannada(?:[-_]|\s+)?Block' => 'Kannada Block', - 'Kangxi(?:[-_]|\s+)?Radicals' => 'Kangxi Radicals', - 'Katakana(?:[-_]|\s+)?Block' => 'Katakana Block', - 'Kanbun' => 'Kanbun', + 'KANNADA' => '26', + 'KATAKANA' => '44', + 'Kanbun' => '116', + 'Kangxi(?:[-_]|\s+)?Radicals' => '109', + 'Kannada(?:[-_]|\s+)?Block' => '72', + 'Katakana(?:[-_]|\s+)?Block' => '113', }, 'kh' => { - 'KHMER' => 'KHMER', - 'Khmer(?:[-_]|\s+)?Block' => 'Khmer Block', + 'KHMER' => '40', + 'Khmer(?:[-_]|\s+)?Block' => '86', }, 'la' => { - 'LATIN' => 'LATIN', - 'LAO' => 'LAO', - 'Latin(?:[-_]|\s+)?1(?:[-_]|\s+)?Supplement' => 'Latin-1 Supplement', - 'Latin(?:[-_]|\s+)?Extended(?:[-_]|\s+)?A' => 'Latin Extended-A', - 'Latin(?:[-_]|\s+)?Extended(?:[-_]|\s+)?B' => 'Latin Extended-B', - 'Lao(?:[-_]|\s+)?Block' => 'Lao Block', - 'Latin(?:[-_]|\s+)?Extended(?:[-_]|\s+)?Additional' => 'Latin Extended Additional', - 'Lampersand' => 'Lampersand', + 'LAO' => '30', + 'LATIN' => '10', + 'Lampersand' => '168', + 'Lao(?:[-_]|\s+)?Block' => '76', + 'Latin(?:[-_]|\s+)?Extended(?:[-_]|\s+)?Additional' => '88', + 'Latin(?:[-_]|\s+)?Extended(?:[-_]|\s+)?A' => '53', + 'Latin(?:[-_]|\s+)?Extended(?:[-_]|\s+)?B' => '54', + 'Latin(?:[-_]|\s+)?1(?:[-_]|\s+)?Supplement' => '52', }, 'le' => { - 'Letterlike(?:[-_]|\s+)?Symbols' => 'Letterlike Symbols', + 'Letterlike(?:[-_]|\s+)?Symbols' => '94', }, 'lo' => { - 'Low(?:[-_]|\s+)?Surrogates' => 'Low Surrogates', - 'Lowercase' => 'Lowercase', + 'Low(?:[-_]|\s+)?Surrogate' => '5', + 'Low(?:[-_]|\s+)?Surrogates' => '127', + 'Lowercase' => '165', }, 'ma' => { - 'MALAYALAM' => 'MALAYALAM', - 'Malayalam(?:[-_]|\s+)?Block' => 'Malayalam Block', - 'Mathematical(?:[-_]|\s+)?Operators' => 'Mathematical Operators', - 'Mathematical(?:[-_]|\s+)?Alphanumeric(?:[-_]|\s+)?Symbols' => 'Mathematical Alphanumeric Symbols', - 'Math' => 'Math', + 'MALAYALAM' => '27', + 'Malayalam(?:[-_]|\s+)?Block' => '73', + 'Math' => '167', + 'Mathematical(?:[-_]|\s+)?Alphanumeric(?:[-_]|\s+)?Symbols' => '142', + 'Mathematical(?:[-_]|\s+)?Operators' => '97', }, 'mi' => { - 'Miscellaneous(?:[-_]|\s+)?Technical' => 'Miscellaneous Technical', - 'Miscellaneous(?:[-_]|\s+)?Symbols' => 'Miscellaneous Symbols', + 'Miscellaneous(?:[-_]|\s+)?Symbols' => '105', + 'Miscellaneous(?:[-_]|\s+)?Technical' => '98', }, 'mo' => { - 'MONGOLIAN' => 'MONGOLIAN', - 'Mongolian(?:[-_]|\s+)?Block' => 'Mongolian Block', + 'MONGOLIAN' => '41', + 'Mongolian(?:[-_]|\s+)?Block' => '87', }, 'mu' => { - 'Musical(?:[-_]|\s+)?Symbols' => 'Musical Symbols', + 'Musical(?:[-_]|\s+)?Symbols' => '141', }, 'my' => { - 'MYANMAR' => 'MYANMAR', - 'Myanmar(?:[-_]|\s+)?Block' => 'Myanmar Block', + 'MYANMAR' => '32', + 'Myanmar(?:[-_]|\s+)?Block' => '78', }, 'no' => { - 'Noncharacter(?:[-_]|\s+)?Code(?:[-_]|\s+)?Point' => 'Noncharacter_Code_Point', + 'Non(?:[-_]|\s+)?Private(?:[-_]|\s+)?Use(?:[-_]|\s+)?High(?:[-_]|\s+)?Surrogate' => '3', + 'Noncharacter(?:[-_]|\s+)?Code(?:[-_]|\s+)?Point' => '162', }, 'nu' => { - 'Number(?:[-_]|\s+)?Forms' => 'Number Forms', + 'Number(?:[-_]|\s+)?Forms' => '95', }, 'og' => { - 'OGHAM' => 'OGHAM', - 'Ogham(?:[-_]|\s+)?Block' => 'Ogham Block', + 'OGHAM' => '38', + 'Ogham(?:[-_]|\s+)?Block' => '84', }, 'ol' => { - 'OLD(?:[-_]|\s+)?ITALIC' => 'OLD-ITALIC', - 'Old(?:[-_]|\s+)?Italic' => 'Old Italic', + 'OLD(?:[-_]|\s+)?ITALIC' => '47', + 'Old(?:[-_]|\s+)?Italic' => '137', }, 'op' => { - 'Optical(?:[-_]|\s+)?Character(?:[-_]|\s+)?Recognition' => 'Optical Character Recognition', + 'Optical(?:[-_]|\s+)?Character(?:[-_]|\s+)?Recognition' => '100', }, 'or' => { - 'ORIYA' => 'ORIYA', - 'Oriya(?:[-_]|\s+)?Block' => 'Oriya Block', + 'ORIYA' => '23', + 'Oriya(?:[-_]|\s+)?Block' => '69', }, 'ot' => { - 'Other(?:[-_]|\s+)?Math' => 'Other_Math', - 'Other(?:[-_]|\s+)?Alphabetic' => 'Other_Alphabetic', - 'Other(?:[-_]|\s+)?Lowercase' => 'Other_Lowercase', - 'Other(?:[-_]|\s+)?Uppercase' => 'Other_Uppercase', + 'Other(?:[-_]|\s+)?Alphabetic' => '157', + 'Other(?:[-_]|\s+)?Lowercase' => '156', + 'Other(?:[-_]|\s+)?Math' => '149', + 'Other(?:[-_]|\s+)?Uppercase' => '160', +}, +'pl' => { + 'Plane(?:[-_]|\s+)?15(?:[-_]|\s+)?Private(?:[-_]|\s+)?Use' => '8', + 'Plane(?:[-_]|\s+)?16(?:[-_]|\s+)?Private(?:[-_]|\s+)?Use' => '9', }, 'pr' => { - 'Private(?:[-_]|\s+)?Use' => 'Private Use', + 'Private(?:[-_]|\s+)?Use' => '6', + 'Private(?:[-_]|\s+)?Use(?:[-_]|\s+)?High(?:[-_]|\s+)?Surrogate' => '4', }, 'qu' => { - 'Quotation(?:[-_]|\s+)?Mark' => 'Quotation_Mark', + 'Quotation(?:[-_]|\s+)?Mark' => '148', }, 'ru' => { - 'RUNIC' => 'RUNIC', - 'Runic(?:[-_]|\s+)?Block' => 'Runic Block', + 'RUNIC' => '39', + 'Runic(?:[-_]|\s+)?Block' => '85', }, 'si' => { - 'SINHALA' => 'SINHALA', - 'Sinhala(?:[-_]|\s+)?Block' => 'Sinhala Block', + 'SINHALA' => '28', + 'Sinhala(?:[-_]|\s+)?Block' => '74', }, 'sm' => { - 'Small(?:[-_]|\s+)?Form(?:[-_]|\s+)?Variants' => 'Small Form Variants', + 'Small(?:[-_]|\s+)?Form(?:[-_]|\s+)?Variants' => '133', }, 'sp' => { - 'Spacing(?:[-_]|\s+)?Modifier(?:[-_]|\s+)?Letters' => 'Spacing Modifier Letters', - 'Specials' => 'Specials', + 'Spacing(?:[-_]|\s+)?Modifier(?:[-_]|\s+)?Letters' => '56', + 'Specials' => '135', }, 'su' => { - 'Superscripts(?:[-_]|\s+)?and(?:[-_]|\s+)?Subscripts' => 'Superscripts and Subscripts', + 'Superscripts(?:[-_]|\s+)?and(?:[-_]|\s+)?Subscripts' => '91', }, 'sy' => { - 'SYRIAC' => 'SYRIAC', - 'Syriac(?:[-_]|\s+)?Block' => 'Syriac Block', + 'SYRIAC' => '17', + 'Syriac(?:[-_]|\s+)?Block' => '63', }, 'ta' => { - 'TAMIL' => 'TAMIL', - 'Tamil(?:[-_]|\s+)?Block' => 'Tamil Block', - 'Tags' => 'Tags', + 'TAMIL' => '24', + 'Tags' => '145', + 'Tamil(?:[-_]|\s+)?Block' => '70', }, 'te' => { - 'TELUGU' => 'TELUGU', - 'Telugu(?:[-_]|\s+)?Block' => 'Telugu Block', - 'Terminal(?:[-_]|\s+)?Punctuation' => 'Terminal_Punctuation', + 'TELUGU' => '25', + 'Telugu(?:[-_]|\s+)?Block' => '71', + 'Terminal(?:[-_]|\s+)?Punctuation' => '147', }, 'th' => { - 'THAANA' => 'THAANA', - 'THAI' => 'THAI', - 'Thaana(?:[-_]|\s+)?Block' => 'Thaana Block', - 'Thai(?:[-_]|\s+)?Block' => 'Thai Block', + 'THAANA' => '18', + 'THAI' => '29', + 'Thaana(?:[-_]|\s+)?Block' => '64', + 'Thai(?:[-_]|\s+)?Block' => '75', }, 'ti' => { - 'TIBETAN' => 'TIBETAN', - 'Tibetan(?:[-_]|\s+)?Block' => 'Tibetan Block', + 'TIBETAN' => '31', + 'Tibetan(?:[-_]|\s+)?Block' => '77', }, 'un' => { - 'Unified(?:[-_]|\s+)?Canadian(?:[-_]|\s+)?Aboriginal(?:[-_]|\s+)?Syllabics' => 'Unified Canadian Aboriginal Syllabics', + 'Unified(?:[-_]|\s+)?Canadian(?:[-_]|\s+)?Aboriginal(?:[-_]|\s+)?Syllabics' => '83', }, 'up' => { - 'Uppercase' => 'Uppercase', + 'Uppercase' => '166', }, 'wh' => { - 'White(?:[-_]|\s+)?space' => 'White_space', + 'White(?:[-_]|\s+)?space' => '146', }, 'yi' => { - 'YI' => 'YI', - 'Yi(?:[-_]|\s+)?Syllables' => 'Yi Syllables', - 'Yi(?:[-_]|\s+)?Radicals' => 'Yi Radicals', + 'YI' => '46', + 'Yi(?:[-_]|\s+)?Radicals' => '123', + 'Yi(?:[-_]|\s+)?Syllables' => '122', }, ); |