summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJarkko Hietaniemi <jhi@iki.fi>2001-09-29 04:57:42 +0000
committerJarkko Hietaniemi <jhi@iki.fi>2001-09-29 04:57:42 +0000
commitd9efae67d76cc4acd8980b711b5bebc7142b5319 (patch)
tree85511ac1926809c78e0399fa9cde4033552336cb
parente8c9ad1b2aea45573ad656f23dcb17204fe59851 (diff)
downloadperl-d9efae67d76cc4acd8980b711b5bebc7142b5319.tar.gz
Allow for more flexibility in the \p{In...} names, now
case doesn't matter, and any space or dash can be matched by any space, dash, underbar, or empty. (may be going too far on leniency) p4raw-id: //depot/perl@12264
-rw-r--r--lib/unicore/Blocks.pl198
-rw-r--r--lib/unicore/In.pl552
-rw-r--r--lib/unicore/Scripts.pl880
-rwxr-xr-xlib/unicore/mktables.PL88
-rw-r--r--lib/utf8_heavy.pl17
-rw-r--r--pod/perlunicode.pod7
-rwxr-xr-xt/op/pat.t14
7 files changed, 1045 insertions, 711 deletions
diff --git a/lib/unicore/Blocks.pl b/lib/unicore/Blocks.pl
index e45026a996..83c275709e 100644
--- a/lib/unicore/Blocks.pl
+++ b/lib/unicore/Blocks.pl
@@ -2,103 +2,103 @@
# This file is built by mktables.PL from e.g. Unicode.txt.
# Any changes made here will be lost!
return <<'END';
-0000 007F Basic Latin # BasicLatin In/40.pl
-0080 00FF Latin-1 Supplement # Latin1Supplement In/41.pl
-0100 017F Latin Extended-A # LatinExtendedA In/42.pl
-0180 024F Latin Extended-B # LatinExtendedB In/43.pl
-0250 02AF IPA Extensions # IPAExtensions In/44.pl
-02B0 02FF Spacing Modifier Letters # SpacingModifierLetters In/45.pl
-0300 036F Combining Diacritical Marks # CombiningDiacriticalMarks In/46.pl
-0370 03FF Greek # GreekBlock In/47.pl
-0400 04FF Cyrillic # CyrillicBlock In/48.pl
-0530 058F Armenian # ArmenianBlock In/49.pl
-0590 05FF Hebrew # HebrewBlock In/50.pl
-0600 06FF Arabic # ArabicBlock In/51.pl
-0700 074F Syriac # SyriacBlock In/52.pl
-0780 07BF Thaana # ThaanaBlock In/53.pl
-0900 097F Devanagari # DevanagariBlock In/54.pl
-0980 09FF Bengali # BengaliBlock In/55.pl
-0A00 0A7F Gurmukhi # GurmukhiBlock In/56.pl
-0A80 0AFF Gujarati # GujaratiBlock In/57.pl
-0B00 0B7F Oriya # OriyaBlock In/58.pl
-0B80 0BFF Tamil # TamilBlock In/59.pl
-0C00 0C7F Telugu # TeluguBlock In/60.pl
-0C80 0CFF Kannada # KannadaBlock In/61.pl
-0D00 0D7F Malayalam # MalayalamBlock In/62.pl
-0D80 0DFF Sinhala # SinhalaBlock In/63.pl
-0E00 0E7F Thai # ThaiBlock In/64.pl
-0E80 0EFF Lao # LaoBlock In/65.pl
-0F00 0FFF Tibetan # TibetanBlock In/66.pl
-1000 109F Myanmar # MyanmarBlock In/67.pl
-10A0 10FF Georgian # GeorgianBlock In/68.pl
-1100 11FF Hangul Jamo # HangulJamo In/69.pl
-1200 137F Ethiopic # EthiopicBlock In/70.pl
-13A0 13FF Cherokee # CherokeeBlock In/71.pl
-1400 167F Unified Canadian Aboriginal Syllabics # UnifiedCanadianAboriginalSyllabics In/72.pl
-1680 169F Ogham # OghamBlock In/73.pl
-16A0 16FF Runic # RunicBlock In/74.pl
-1780 17FF Khmer # KhmerBlock In/75.pl
-1800 18AF Mongolian # MongolianBlock In/76.pl
-1E00 1EFF Latin Extended Additional # LatinExtendedAdditional In/77.pl
-1F00 1FFF Greek Extended # GreekExtended In/78.pl
-2000 206F General Punctuation # GeneralPunctuation In/79.pl
-2070 209F Superscripts and Subscripts # SuperscriptsandSubscripts In/80.pl
-20A0 20CF Currency Symbols # CurrencySymbols In/81.pl
-20D0 20FF Combining Marks for Symbols # CombiningMarksforSymbols In/82.pl
-2100 214F Letterlike Symbols # LetterlikeSymbols In/83.pl
-2150 218F Number Forms # NumberForms In/84.pl
-2190 21FF Arrows # Arrows In/85.pl
-2200 22FF Mathematical Operators # MathematicalOperators In/86.pl
-2300 23FF Miscellaneous Technical # MiscellaneousTechnical In/87.pl
-2400 243F Control Pictures # ControlPictures In/88.pl
-2440 245F Optical Character Recognition # OpticalCharacterRecognition In/89.pl
-2460 24FF Enclosed Alphanumerics # EnclosedAlphanumerics In/90.pl
-2500 257F Box Drawing # BoxDrawing In/91.pl
-2580 259F Block Elements # BlockElements In/92.pl
-25A0 25FF Geometric Shapes # GeometricShapes In/93.pl
-2600 26FF Miscellaneous Symbols # MiscellaneousSymbols In/94.pl
-2700 27BF Dingbats # Dingbats In/95.pl
-2800 28FF Braille Patterns # BraillePatterns In/96.pl
-2E80 2EFF CJK Radicals Supplement # CJKRadicalsSupplement In/97.pl
-2F00 2FDF Kangxi Radicals # KangxiRadicals In/98.pl
-2FF0 2FFF Ideographic Description Characters # IdeographicDescriptionCharacters In/99.pl
-3000 303F CJK Symbols and Punctuation # CJKSymbolsandPunctuation In/100.pl
-3040 309F Hiragana # HiraganaBlock In/101.pl
-30A0 30FF Katakana # KatakanaBlock In/102.pl
-3100 312F Bopomofo # BopomofoBlock In/103.pl
-3130 318F Hangul Compatibility Jamo # HangulCompatibilityJamo In/104.pl
-3190 319F Kanbun # Kanbun In/105.pl
-31A0 31BF Bopomofo Extended # BopomofoExtended In/106.pl
-3200 32FF Enclosed CJK Letters and Months # EnclosedCJKLettersandMonths In/107.pl
-3300 33FF CJK Compatibility # CJKCompatibility In/108.pl
-3400 4DB5 CJK Unified Ideographs Extension A # CJKUnifiedIdeographsExtensionA In/109.pl
-4E00 9FFF CJK Unified Ideographs # CJKUnifiedIdeographs In/110.pl
-A000 A48F Yi Syllables # YiSyllables In/111.pl
-A490 A4CF Yi Radicals # YiRadicals In/112.pl
-AC00 D7A3 Hangul Syllables # HangulSyllables In/113.pl
-D800 DB7F High Surrogates # HighSurrogates In/114.pl
-DB80 DBFF High Private Use Surrogates # HighPrivateUseSurrogates In/115.pl
-DC00 DFFF Low Surrogates # LowSurrogates In/116.pl
-E000 F8FF Private Use # PrivateUse In/117.pl
-F900 FAFF CJK Compatibility Ideographs # CJKCompatibilityIdeographs In/118.pl
-FB00 FB4F Alphabetic Presentation Forms # AlphabeticPresentationForms In/119.pl
-FB50 FDFF Arabic Presentation Forms-A # ArabicPresentationFormsA In/120.pl
-FE20 FE2F Combining Half Marks # CombiningHalfMarks In/121.pl
-FE30 FE4F CJK Compatibility Forms # CJKCompatibilityForms In/122.pl
-FE50 FE6F Small Form Variants # SmallFormVariants In/123.pl
-FE70 FEFE Arabic Presentation Forms-B # ArabicPresentationFormsB In/124.pl
-FEFF FEFF Specials # Specials In/125.pl
-FF00 FFEF Halfwidth and Fullwidth Forms # HalfwidthandFullwidthForms In/126.pl
-FFF0 FFFD Specials # Specials In/125.pl
-10300 1032F Old Italic # OldItalicBlock In/127.pl
-10330 1034F Gothic # GothicBlock In/128.pl
-10400 1044F Deseret # DeseretBlock In/129.pl
-1D000 1D0FF Byzantine Musical Symbols # ByzantineMusicalSymbols In/130.pl
-1D100 1D1FF Musical Symbols # MusicalSymbols In/131.pl
-1D400 1D7FF Mathematical Alphanumeric Symbols # MathematicalAlphanumericSymbols In/132.pl
-20000 2A6D6 CJK Unified Ideographs Extension B # CJKUnifiedIdeographsExtensionB In/133.pl
-2F800 2FA1F CJK Compatibility Ideographs Supplement # CJKCompatibilityIdeographsSupplement In/134.pl
-E0000 E007F Tags # Tags In/135.pl
-F0000 FFFFD Private Use # PrivateUse In/117.pl
-100000 10FFFD Private Use # PrivateUse In/117.pl
+0000 007F Basic Latin # In/40.pl
+0080 00FF Latin-1 Supplement # In/41.pl
+0100 017F Latin Extended-A # In/42.pl
+0180 024F Latin Extended-B # In/43.pl
+0250 02AF IPA Extensions # In/44.pl
+02B0 02FF Spacing Modifier Letters # In/45.pl
+0300 036F Combining Diacritical Marks # In/46.pl
+0370 03FF Greek # In/47.pl
+0400 04FF Cyrillic # In/48.pl
+0530 058F Armenian # In/49.pl
+0590 05FF Hebrew # In/50.pl
+0600 06FF Arabic # In/51.pl
+0700 074F Syriac # In/52.pl
+0780 07BF Thaana # In/53.pl
+0900 097F Devanagari # In/54.pl
+0980 09FF Bengali # In/55.pl
+0A00 0A7F Gurmukhi # In/56.pl
+0A80 0AFF Gujarati # In/57.pl
+0B00 0B7F Oriya # In/58.pl
+0B80 0BFF Tamil # In/59.pl
+0C00 0C7F Telugu # In/60.pl
+0C80 0CFF Kannada # In/61.pl
+0D00 0D7F Malayalam # In/62.pl
+0D80 0DFF Sinhala # In/63.pl
+0E00 0E7F Thai # In/64.pl
+0E80 0EFF Lao # In/65.pl
+0F00 0FFF Tibetan # In/66.pl
+1000 109F Myanmar # In/67.pl
+10A0 10FF Georgian # In/68.pl
+1100 11FF Hangul Jamo # In/69.pl
+1200 137F Ethiopic # In/70.pl
+13A0 13FF Cherokee # In/71.pl
+1400 167F Unified Canadian Aboriginal Syllabics # In/72.pl
+1680 169F Ogham # In/73.pl
+16A0 16FF Runic # In/74.pl
+1780 17FF Khmer # In/75.pl
+1800 18AF Mongolian # In/76.pl
+1E00 1EFF Latin Extended Additional # In/77.pl
+1F00 1FFF Greek Extended # In/78.pl
+2000 206F General Punctuation # In/79.pl
+2070 209F Superscripts and Subscripts # In/80.pl
+20A0 20CF Currency Symbols # In/81.pl
+20D0 20FF Combining Marks for Symbols # In/82.pl
+2100 214F Letterlike Symbols # In/83.pl
+2150 218F Number Forms # In/84.pl
+2190 21FF Arrows # In/85.pl
+2200 22FF Mathematical Operators # In/86.pl
+2300 23FF Miscellaneous Technical # In/87.pl
+2400 243F Control Pictures # In/88.pl
+2440 245F Optical Character Recognition # In/89.pl
+2460 24FF Enclosed Alphanumerics # In/90.pl
+2500 257F Box Drawing # In/91.pl
+2580 259F Block Elements # In/92.pl
+25A0 25FF Geometric Shapes # In/93.pl
+2600 26FF Miscellaneous Symbols # In/94.pl
+2700 27BF Dingbats # In/95.pl
+2800 28FF Braille Patterns # In/96.pl
+2E80 2EFF CJK Radicals Supplement # In/97.pl
+2F00 2FDF Kangxi Radicals # In/98.pl
+2FF0 2FFF Ideographic Description Characters # In/99.pl
+3000 303F CJK Symbols and Punctuation # In/100.pl
+3040 309F Hiragana # In/101.pl
+30A0 30FF Katakana # In/102.pl
+3100 312F Bopomofo # In/103.pl
+3130 318F Hangul Compatibility Jamo # In/104.pl
+3190 319F Kanbun # In/105.pl
+31A0 31BF Bopomofo Extended # In/106.pl
+3200 32FF Enclosed CJK Letters and Months # In/107.pl
+3300 33FF CJK Compatibility # In/108.pl
+3400 4DB5 CJK Unified Ideographs Extension A # In/109.pl
+4E00 9FFF CJK Unified Ideographs # In/110.pl
+A000 A48F Yi Syllables # In/111.pl
+A490 A4CF Yi Radicals # In/112.pl
+AC00 D7A3 Hangul Syllables # In/113.pl
+D800 DB7F High Surrogates # In/114.pl
+DB80 DBFF High Private Use Surrogates # In/115.pl
+DC00 DFFF Low Surrogates # In/116.pl
+E000 F8FF Private Use # In/117.pl
+F900 FAFF CJK Compatibility Ideographs # In/118.pl
+FB00 FB4F Alphabetic Presentation Forms # In/119.pl
+FB50 FDFF Arabic Presentation Forms-A # In/120.pl
+FE20 FE2F Combining Half Marks # In/121.pl
+FE30 FE4F CJK Compatibility Forms # In/122.pl
+FE50 FE6F Small Form Variants # In/123.pl
+FE70 FEFE Arabic Presentation Forms-B # In/124.pl
+FEFF FEFF Specials # In/125.pl
+FF00 FFEF Halfwidth and Fullwidth Forms # In/126.pl
+FFF0 FFFD Specials # In/125.pl
+10300 1032F Old Italic # In/127.pl
+10330 1034F Gothic # In/128.pl
+10400 1044F Deseret # In/129.pl
+1D000 1D0FF Byzantine Musical Symbols # In/130.pl
+1D100 1D1FF Musical Symbols # In/131.pl
+1D400 1D7FF Mathematical Alphanumeric Symbols # In/132.pl
+20000 2A6D6 CJK Unified Ideographs Extension B # In/133.pl
+2F800 2FA1F CJK Compatibility Ideographs Supplement # In/134.pl
+E0000 E007F Tags # In/135.pl
+F0000 FFFFD Private Use # In/117.pl
+100000 10FFFD Private Use # In/117.pl
END
diff --git a/lib/unicore/In.pl b/lib/unicore/In.pl
index a6c24199a7..c11445c26b 100644
--- a/lib/unicore/In.pl
+++ b/lib/unicore/In.pl
@@ -2,140 +2,420 @@
# This file is built by mktables.PL from e.g. Unicode.txt.
# Any changes made here will be lost!
%utf8::In = (
-'Latin' => 0,
-'Greek' => 1,
-'Cyrillic' => 2,
-'Armenian' => 3,
-'Hebrew' => 4,
-'Arabic' => 5,
-'Syriac' => 6,
-'Thaana' => 7,
-'Devanagari' => 8,
-'Bengali' => 9,
-'Gurmukhi' => 10,
-'Gujarati' => 11,
-'Oriya' => 12,
-'Tamil' => 13,
-'Telugu' => 14,
-'Kannada' => 15,
-'Malayalam' => 16,
-'Sinhala' => 17,
-'Thai' => 18,
-'Lao' => 19,
-'Tibetan' => 20,
-'Myanmar' => 21,
-'Georgian' => 22,
-'Hangul' => 23,
-'Ethiopic' => 24,
-'Cherokee' => 25,
-'CanadianAboriginal' => 26,
-'Ogham' => 27,
-'Runic' => 28,
-'Khmer' => 29,
-'Mongolian' => 30,
-'Hiragana' => 31,
-'Katakana' => 32,
-'Bopomofo' => 33,
-'Han' => 34,
-'Yi' => 35,
-'OldItalic' => 36,
-'Gothic' => 37,
-'Deseret' => 38,
-'Inherited' => 39,
-'BasicLatin' => 40,
-'Latin1Supplement' => 41,
-'LatinExtendedA' => 42,
-'LatinExtendedB' => 43,
-'IPAExtensions' => 44,
-'SpacingModifierLetters' => 45,
-'CombiningDiacriticalMarks' => 46,
-'GreekBlock' => 47,
-'CyrillicBlock' => 48,
-'ArmenianBlock' => 49,
-'HebrewBlock' => 50,
-'ArabicBlock' => 51,
-'SyriacBlock' => 52,
-'ThaanaBlock' => 53,
-'DevanagariBlock' => 54,
-'BengaliBlock' => 55,
-'GurmukhiBlock' => 56,
-'GujaratiBlock' => 57,
-'OriyaBlock' => 58,
-'TamilBlock' => 59,
-'TeluguBlock' => 60,
-'KannadaBlock' => 61,
-'MalayalamBlock' => 62,
-'SinhalaBlock' => 63,
-'ThaiBlock' => 64,
-'LaoBlock' => 65,
-'TibetanBlock' => 66,
-'MyanmarBlock' => 67,
-'GeorgianBlock' => 68,
-'HangulJamo' => 69,
-'EthiopicBlock' => 70,
-'CherokeeBlock' => 71,
-'UnifiedCanadianAboriginalSyllabics' => 72,
-'OghamBlock' => 73,
-'RunicBlock' => 74,
-'KhmerBlock' => 75,
-'MongolianBlock' => 76,
-'LatinExtendedAdditional' => 77,
-'GreekExtended' => 78,
-'GeneralPunctuation' => 79,
-'SuperscriptsandSubscripts' => 80,
-'CurrencySymbols' => 81,
-'CombiningMarksforSymbols' => 82,
-'LetterlikeSymbols' => 83,
-'NumberForms' => 84,
-'Arrows' => 85,
-'MathematicalOperators' => 86,
-'MiscellaneousTechnical' => 87,
-'ControlPictures' => 88,
-'OpticalCharacterRecognition' => 89,
-'EnclosedAlphanumerics' => 90,
-'BoxDrawing' => 91,
-'BlockElements' => 92,
-'GeometricShapes' => 93,
-'MiscellaneousSymbols' => 94,
-'Dingbats' => 95,
-'BraillePatterns' => 96,
-'CJKRadicalsSupplement' => 97,
-'KangxiRadicals' => 98,
-'IdeographicDescriptionCharacters' => 99,
-'CJKSymbolsandPunctuation' => 100,
-'HiraganaBlock' => 101,
-'KatakanaBlock' => 102,
-'BopomofoBlock' => 103,
-'HangulCompatibilityJamo' => 104,
-'Kanbun' => 105,
-'BopomofoExtended' => 106,
-'EnclosedCJKLettersandMonths' => 107,
-'CJKCompatibility' => 108,
-'CJKUnifiedIdeographsExtensionA' => 109,
-'CJKUnifiedIdeographs' => 110,
-'YiSyllables' => 111,
-'YiRadicals' => 112,
-'HangulSyllables' => 113,
-'HighSurrogates' => 114,
-'HighPrivateUseSurrogates' => 115,
-'LowSurrogates' => 116,
-'PrivateUse' => 117,
-'CJKCompatibilityIdeographs' => 118,
-'AlphabeticPresentationForms' => 119,
-'ArabicPresentationFormsA' => 120,
-'CombiningHalfMarks' => 121,
-'CJKCompatibilityForms' => 122,
-'SmallFormVariants' => 123,
-'ArabicPresentationFormsB' => 124,
-'Specials' => 125,
-'HalfwidthandFullwidthForms' => 126,
-'OldItalicBlock' => 127,
-'GothicBlock' => 128,
-'DeseretBlock' => 129,
-'ByzantineMusicalSymbols' => 130,
-'MusicalSymbols' => 131,
-'MathematicalAlphanumericSymbols' => 132,
-'CJKUnifiedIdeographsExtensionB' => 133,
-'CJKCompatibilityIdeographsSupplement' => 134,
-'Tags' => 135,
+'LATIN' => 0,
+'GREEK' => 1,
+'CYRILLIC' => 2,
+'ARMENIAN' => 3,
+'HEBREW' => 4,
+'ARABIC' => 5,
+'SYRIAC' => 6,
+'THAANA' => 7,
+'DEVANAGARI' => 8,
+'BENGALI' => 9,
+'GURMUKHI' => 10,
+'GUJARATI' => 11,
+'ORIYA' => 12,
+'TAMIL' => 13,
+'TELUGU' => 14,
+'KANNADA' => 15,
+'MALAYALAM' => 16,
+'SINHALA' => 17,
+'THAI' => 18,
+'LAO' => 19,
+'TIBETAN' => 20,
+'MYANMAR' => 21,
+'GEORGIAN' => 22,
+'HANGUL' => 23,
+'ETHIOPIC' => 24,
+'CHEROKEE' => 25,
+'CANADIAN-ABORIGINAL' => 26,
+'OGHAM' => 27,
+'RUNIC' => 28,
+'KHMER' => 29,
+'MONGOLIAN' => 30,
+'HIRAGANA' => 31,
+'KATAKANA' => 32,
+'BOPOMOFO' => 33,
+'HAN' => 34,
+'YI' => 35,
+'OLD-ITALIC' => 36,
+'GOTHIC' => 37,
+'DESERET' => 38,
+'INHERITED' => 39,
+'Basic Latin' => 40,
+'Latin-1 Supplement' => 41,
+'Latin Extended-A' => 42,
+'Latin Extended-B' => 43,
+'IPA Extensions' => 44,
+'Spacing Modifier Letters' => 45,
+'Combining Diacritical Marks' => 46,
+'Greek Block' => 47,
+'Cyrillic Block' => 48,
+'Armenian Block' => 49,
+'Hebrew Block' => 50,
+'Arabic Block' => 51,
+'Syriac Block' => 52,
+'Thaana Block' => 53,
+'Devanagari Block' => 54,
+'Bengali Block' => 55,
+'Gurmukhi Block' => 56,
+'Gujarati Block' => 57,
+'Oriya Block' => 58,
+'Tamil Block' => 59,
+'Telugu Block' => 60,
+'Kannada Block' => 61,
+'Malayalam Block' => 62,
+'Sinhala Block' => 63,
+'Thai Block' => 64,
+'Lao Block' => 65,
+'Tibetan Block' => 66,
+'Myanmar Block' => 67,
+'Georgian Block' => 68,
+'Hangul Jamo' => 69,
+'Ethiopic Block' => 70,
+'Cherokee Block' => 71,
+'Unified Canadian Aboriginal Syllabics' => 72,
+'Ogham Block' => 73,
+'Runic Block' => 74,
+'Khmer Block' => 75,
+'Mongolian Block' => 76,
+'Latin Extended Additional' => 77,
+'Greek Extended' => 78,
+'General Punctuation' => 79,
+'Superscripts and Subscripts' => 80,
+'Currency Symbols' => 81,
+'Combining Marks for Symbols' => 82,
+'Letterlike Symbols' => 83,
+'Number Forms' => 84,
+'Arrows' => 85,
+'Mathematical Operators' => 86,
+'Miscellaneous Technical' => 87,
+'Control Pictures' => 88,
+'Optical Character Recognition' => 89,
+'Enclosed Alphanumerics' => 90,
+'Box Drawing' => 91,
+'Block Elements' => 92,
+'Geometric Shapes' => 93,
+'Miscellaneous Symbols' => 94,
+'Dingbats' => 95,
+'Braille Patterns' => 96,
+'CJK Radicals Supplement' => 97,
+'Kangxi Radicals' => 98,
+'Ideographic Description Characters' => 99,
+'CJK Symbols and Punctuation' => 100,
+'Hiragana Block' => 101,
+'Katakana Block' => 102,
+'Bopomofo Block' => 103,
+'Hangul Compatibility Jamo' => 104,
+'Kanbun' => 105,
+'Bopomofo Extended' => 106,
+'Enclosed CJK Letters and Months' => 107,
+'CJK Compatibility' => 108,
+'CJK Unified Ideographs Extension A' => 109,
+'CJK Unified Ideographs' => 110,
+'Yi Syllables' => 111,
+'Yi Radicals' => 112,
+'Hangul Syllables' => 113,
+'High Surrogates' => 114,
+'High Private Use Surrogates' => 115,
+'Low Surrogates' => 116,
+'Private Use' => 117,
+'CJK Compatibility Ideographs' => 118,
+'Alphabetic Presentation Forms' => 119,
+'Arabic Presentation Forms-A' => 120,
+'Combining Half Marks' => 121,
+'CJK Compatibility Forms' => 122,
+'Small Form Variants' => 123,
+'Arabic Presentation Forms-B' => 124,
+'Specials' => 125,
+'Halfwidth and Fullwidth Forms' => 126,
+'Old Italic' => 127,
+'Gothic Block' => 128,
+'Deseret Block' => 129,
+'Byzantine Musical Symbols' => 130,
+'Musical Symbols' => 131,
+'Mathematical Alphanumeric Symbols' => 132,
+'CJK Unified Ideographs Extension B' => 133,
+'CJK Compatibility Ideographs Supplement' => 134,
+'Tags' => 135,
+);
+%utf8::InPat = (
+'alp' => {
+ 'Alphabetic[- _]?Presentation[- _]?Forms' => 'Alphabetic Presentation Forms',
+},
+'ara' => {
+ 'ARABIC' => 'ARABIC',
+ 'Arabic[- _]?Block' => 'Arabic Block',
+ 'Arabic[- _]?Presentation[- _]?Forms[- _]?A' => 'Arabic Presentation Forms-A',
+ 'Arabic[- _]?Presentation[- _]?Forms[- _]?B' => 'Arabic Presentation Forms-B',
+},
+'arm' => {
+ 'ARMENIAN' => 'ARMENIAN',
+ 'Armenian[- _]?Block' => 'Armenian Block',
+},
+'arr' => {
+ 'Arrows' => 'Arrows',
+},
+'bas' => {
+ 'Basic[- _]?Latin' => 'Basic Latin',
+},
+'ben' => {
+ 'BENGALI' => 'BENGALI',
+ 'Bengali[- _]?Block' => 'Bengali Block',
+},
+'blo' => {
+ 'Block[- _]?Elements' => 'Block Elements',
+},
+'bop' => {
+ 'BOPOMOFO' => 'BOPOMOFO',
+ 'Bopomofo[- _]?Block' => 'Bopomofo Block',
+ 'Bopomofo[- _]?Extended' => 'Bopomofo Extended',
+},
+'box' => {
+ 'Box[- _]?Drawing' => 'Box Drawing',
+},
+'bra' => {
+ 'Braille[- _]?Patterns' => 'Braille Patterns',
+},
+'byz' => {
+ 'Byzantine[- _]?Musical[- _]?Symbols' => 'Byzantine Musical Symbols',
+},
+'can' => {
+ 'CANADIAN[- _]?ABORIGINAL' => 'CANADIAN-ABORIGINAL',
+},
+'che' => {
+ 'CHEROKEE' => 'CHEROKEE',
+ 'Cherokee[- _]?Block' => 'Cherokee Block',
+},
+'cjk' => {
+ 'CJK[- _]?Radicals[- _]?Supplement' => 'CJK Radicals Supplement',
+ 'CJK[- _]?Symbols[- _]?and[- _]?Punctuation' => 'CJK Symbols and Punctuation',
+ 'CJK[- _]?Compatibility' => 'CJK Compatibility',
+ 'CJK[- _]?Unified[- _]?Ideographs[- _]?Extension[- _]?A' => 'CJK Unified Ideographs Extension A',
+ 'CJK[- _]?Unified[- _]?Ideographs' => 'CJK Unified Ideographs',
+ 'CJK[- _]?Compatibility[- _]?Ideographs' => 'CJK Compatibility Ideographs',
+ 'CJK[- _]?Compatibility[- _]?Forms' => 'CJK Compatibility Forms',
+ 'CJK[- _]?Unified[- _]?Ideographs[- _]?Extension[- _]?B' => 'CJK Unified Ideographs Extension B',
+ 'CJK[- _]?Compatibility[- _]?Ideographs[- _]?Supplement' => 'CJK Compatibility Ideographs Supplement',
+},
+'com' => {
+ 'Combining[- _]?Diacritical[- _]?Marks' => 'Combining Diacritical Marks',
+ 'Combining[- _]?Marks[- _]?for[- _]?Symbols' => 'Combining Marks for Symbols',
+ 'Combining[- _]?Half[- _]?Marks' => 'Combining Half Marks',
+},
+'con' => {
+ 'Control[- _]?Pictures' => 'Control Pictures',
+},
+'cur' => {
+ 'Currency[- _]?Symbols' => 'Currency Symbols',
+},
+'cyr' => {
+ 'CYRILLIC' => 'CYRILLIC',
+ 'Cyrillic[- _]?Block' => 'Cyrillic Block',
+},
+'des' => {
+ 'DESERET' => 'DESERET',
+ 'Deseret[- _]?Block' => 'Deseret Block',
+},
+'dev' => {
+ 'DEVANAGARI' => 'DEVANAGARI',
+ 'Devanagari[- _]?Block' => 'Devanagari Block',
+},
+'din' => {
+ 'Dingbats' => 'Dingbats',
+},
+'enc' => {
+ 'Enclosed[- _]?Alphanumerics' => 'Enclosed Alphanumerics',
+ 'Enclosed[- _]?CJK[- _]?Letters[- _]?and[- _]?Months' => 'Enclosed CJK Letters and Months',
+},
+'eth' => {
+ 'ETHIOPIC' => 'ETHIOPIC',
+ 'Ethiopic[- _]?Block' => 'Ethiopic Block',
+},
+'gen' => {
+ 'General[- _]?Punctuation' => 'General Punctuation',
+},
+'geo' => {
+ 'GEORGIAN' => 'GEORGIAN',
+ 'Georgian[- _]?Block' => 'Georgian Block',
+ 'Geometric[- _]?Shapes' => 'Geometric Shapes',
+},
+'got' => {
+ 'GOTHIC' => 'GOTHIC',
+ 'Gothic[- _]?Block' => 'Gothic Block',
+},
+'gre' => {
+ 'GREEK' => 'GREEK',
+ 'Greek[- _]?Block' => 'Greek Block',
+ 'Greek[- _]?Extended' => 'Greek Extended',
+},
+'guj' => {
+ 'GUJARATI' => 'GUJARATI',
+ 'Gujarati[- _]?Block' => 'Gujarati Block',
+},
+'gur' => {
+ 'GURMUKHI' => 'GURMUKHI',
+ 'Gurmukhi[- _]?Block' => 'Gurmukhi Block',
+},
+'hal' => {
+ 'Halfwidth[- _]?and[- _]?Fullwidth[- _]?Forms' => 'Halfwidth and Fullwidth Forms',
+},
+'han' => {
+ 'HANGUL' => 'HANGUL',
+ 'HAN' => 'HAN',
+ 'Hangul[- _]?Jamo' => 'Hangul Jamo',
+ 'Hangul[- _]?Compatibility[- _]?Jamo' => 'Hangul Compatibility Jamo',
+ 'Hangul[- _]?Syllables' => 'Hangul Syllables',
+},
+'heb' => {
+ 'HEBREW' => 'HEBREW',
+ 'Hebrew[- _]?Block' => 'Hebrew Block',
+},
+'hig' => {
+ 'High[- _]?Surrogates' => 'High Surrogates',
+ 'High[- _]?Private[- _]?Use[- _]?Surrogates' => 'High Private Use Surrogates',
+},
+'hir' => {
+ 'HIRAGANA' => 'HIRAGANA',
+ 'Hiragana[- _]?Block' => 'Hiragana Block',
+},
+'ide' => {
+ 'Ideographic[- _]?Description[- _]?Characters' => 'Ideographic Description Characters',
+},
+'inh' => {
+ 'INHERITED' => 'INHERITED',
+},
+'ipa' => {
+ 'IPA[- _]?Extensions' => 'IPA Extensions',
+},
+'kan' => {
+ 'KANNADA' => 'KANNADA',
+ 'Kannada[- _]?Block' => 'Kannada Block',
+ 'Kangxi[- _]?Radicals' => 'Kangxi Radicals',
+ 'Kanbun' => 'Kanbun',
+},
+'kat' => {
+ 'KATAKANA' => 'KATAKANA',
+ 'Katakana[- _]?Block' => 'Katakana Block',
+},
+'khm' => {
+ 'KHMER' => 'KHMER',
+ 'Khmer[- _]?Block' => 'Khmer Block',
+},
+'lao' => {
+ 'LAO' => 'LAO',
+ 'Lao[- _]?Block' => 'Lao Block',
+},
+'lat' => {
+ 'LATIN' => 'LATIN',
+ 'Latin[- _]?1[- _]?Supplement' => 'Latin-1 Supplement',
+ 'Latin[- _]?Extended[- _]?A' => 'Latin Extended-A',
+ 'Latin[- _]?Extended[- _]?B' => 'Latin Extended-B',
+ 'Latin[- _]?Extended[- _]?Additional' => 'Latin Extended Additional',
+},
+'let' => {
+ 'Letterlike[- _]?Symbols' => 'Letterlike Symbols',
+},
+'low' => {
+ 'Low[- _]?Surrogates' => 'Low Surrogates',
+},
+'mal' => {
+ 'MALAYALAM' => 'MALAYALAM',
+ 'Malayalam[- _]?Block' => 'Malayalam Block',
+},
+'mat' => {
+ 'Mathematical[- _]?Operators' => 'Mathematical Operators',
+ 'Mathematical[- _]?Alphanumeric[- _]?Symbols' => 'Mathematical Alphanumeric Symbols',
+},
+'mis' => {
+ 'Miscellaneous[- _]?Technical' => 'Miscellaneous Technical',
+ 'Miscellaneous[- _]?Symbols' => 'Miscellaneous Symbols',
+},
+'mon' => {
+ 'MONGOLIAN' => 'MONGOLIAN',
+ 'Mongolian[- _]?Block' => 'Mongolian Block',
+},
+'mus' => {
+ 'Musical[- _]?Symbols' => 'Musical Symbols',
+},
+'mya' => {
+ 'MYANMAR' => 'MYANMAR',
+ 'Myanmar[- _]?Block' => 'Myanmar Block',
+},
+'num' => {
+ 'Number[- _]?Forms' => 'Number Forms',
+},
+'ogh' => {
+ 'OGHAM' => 'OGHAM',
+ 'Ogham[- _]?Block' => 'Ogham Block',
+},
+'old' => {
+ 'OLD[- _]?ITALIC' => 'OLD-ITALIC',
+ 'Old[- _]?Italic' => 'Old Italic',
+},
+'opt' => {
+ 'Optical[- _]?Character[- _]?Recognition' => 'Optical Character Recognition',
+},
+'ori' => {
+ 'ORIYA' => 'ORIYA',
+ 'Oriya[- _]?Block' => 'Oriya Block',
+},
+'pri' => {
+ 'Private[- _]?Use' => 'Private Use',
+},
+'run' => {
+ 'RUNIC' => 'RUNIC',
+ 'Runic[- _]?Block' => 'Runic Block',
+},
+'sin' => {
+ 'SINHALA' => 'SINHALA',
+ 'Sinhala[- _]?Block' => 'Sinhala Block',
+},
+'sma' => {
+ 'Small[- _]?Form[- _]?Variants' => 'Small Form Variants',
+},
+'spa' => {
+ 'Spacing[- _]?Modifier[- _]?Letters' => 'Spacing Modifier Letters',
+},
+'spe' => {
+ 'Specials' => 'Specials',
+},
+'sup' => {
+ 'Superscripts[- _]?and[- _]?Subscripts' => 'Superscripts and Subscripts',
+},
+'syr' => {
+ 'SYRIAC' => 'SYRIAC',
+ 'Syriac[- _]?Block' => 'Syriac Block',
+},
+'tag' => {
+ 'Tags' => 'Tags',
+},
+'tam' => {
+ 'TAMIL' => 'TAMIL',
+ 'Tamil[- _]?Block' => 'Tamil Block',
+},
+'tel' => {
+ 'TELUGU' => 'TELUGU',
+ 'Telugu[- _]?Block' => 'Telugu Block',
+},
+'tha' => {
+ 'THAANA' => 'THAANA',
+ 'THAI' => 'THAI',
+ 'Thaana[- _]?Block' => 'Thaana Block',
+ 'Thai[- _]?Block' => 'Thai Block',
+},
+'tib' => {
+ 'TIBETAN' => 'TIBETAN',
+ 'Tibetan[- _]?Block' => 'Tibetan Block',
+},
+'uni' => {
+ 'Unified[- _]?Canadian[- _]?Aboriginal[- _]?Syllabics' => 'Unified Canadian Aboriginal Syllabics',
+},
+'yi' => {
+ 'YI' => 'YI',
+},
+'yi ' => {
+ 'Yi[- _]?Syllables' => 'Yi Syllables',
+ 'Yi[- _]?Radicals' => 'Yi Radicals',
+},
);
diff --git a/lib/unicore/Scripts.pl b/lib/unicore/Scripts.pl
index ed0168e086..b924f3ab5f 100644
--- a/lib/unicore/Scripts.pl
+++ b/lib/unicore/Scripts.pl
@@ -2,444 +2,444 @@
# This file is built by mktables.PL from e.g. Unicode.txt.
# Any changes made here will be lost!
return <<'END';
-0041 005A LATIN # Latin In/0.pl
-0061 007A LATIN # Latin In/0.pl
-00AA LATIN # Latin In/0.pl
-00BA LATIN # Latin In/0.pl
-00C0 00D6 LATIN # Latin In/0.pl
-00D8 00F6 LATIN # Latin In/0.pl
-00F8 01BA LATIN # Latin In/0.pl
-01BB LATIN # Latin In/0.pl
-01BC 01BF LATIN # Latin In/0.pl
-01C0 01C3 LATIN # Latin In/0.pl
-01C4 021F LATIN # Latin In/0.pl
-0222 0233 LATIN # Latin In/0.pl
-0250 02AD LATIN # Latin In/0.pl
-02B0 02B8 LATIN # Latin In/0.pl
-02E0 02E4 LATIN # Latin In/0.pl
-1E00 1E9B LATIN # Latin In/0.pl
-1EA0 1EF9 LATIN # Latin In/0.pl
-207F LATIN # Latin In/0.pl
-212A 212B LATIN # Latin In/0.pl
-FB00 FB06 LATIN # Latin In/0.pl
-FF21 FF3A LATIN # Latin In/0.pl
-FF41 FF5A LATIN # Latin In/0.pl
-00B5 GREEK # Greek In/1.pl
-037A GREEK # Greek In/1.pl
-0386 GREEK # Greek In/1.pl
-0388 038A GREEK # Greek In/1.pl
-038C GREEK # Greek In/1.pl
-038E 03A1 GREEK # Greek In/1.pl
-03A3 03CE GREEK # Greek In/1.pl
-03D0 03D7 GREEK # Greek In/1.pl
-03DA 03F5 GREEK # Greek In/1.pl
-1F00 1F15 GREEK # Greek In/1.pl
-1F18 1F1D GREEK # Greek In/1.pl
-1F20 1F45 GREEK # Greek In/1.pl
-1F48 1F4D GREEK # Greek In/1.pl
-1F50 1F57 GREEK # Greek In/1.pl
-1F59 GREEK # Greek In/1.pl
-1F5B GREEK # Greek In/1.pl
-1F5D GREEK # Greek In/1.pl
-1F5F 1F7D GREEK # Greek In/1.pl
-1F80 1FB4 GREEK # Greek In/1.pl
-1FB6 1FBC GREEK # Greek In/1.pl
-1FBE GREEK # Greek In/1.pl
-1FC2 1FC4 GREEK # Greek In/1.pl
-1FC6 1FCC GREEK # Greek In/1.pl
-1FD0 1FD3 GREEK # Greek In/1.pl
-1FD6 1FDB GREEK # Greek In/1.pl
-1FE0 1FEC GREEK # Greek In/1.pl
-1FF2 1FF4 GREEK # Greek In/1.pl
-1FF6 1FFC GREEK # Greek In/1.pl
-2126 GREEK # Greek In/1.pl
-0400 0481 CYRILLIC # Cyrillic In/2.pl
-0483 0486 CYRILLIC # Cyrillic In/2.pl
-048C 04C4 CYRILLIC # Cyrillic In/2.pl
-04C7 04C8 CYRILLIC # Cyrillic In/2.pl
-04CB 04CC CYRILLIC # Cyrillic In/2.pl
-04D0 04F5 CYRILLIC # Cyrillic In/2.pl
-04F8 04F9 CYRILLIC # Cyrillic In/2.pl
-0531 0556 ARMENIAN # Armenian In/3.pl
-0559 ARMENIAN # Armenian In/3.pl
-0561 0587 ARMENIAN # Armenian In/3.pl
-FB13 FB17 ARMENIAN # Armenian In/3.pl
-05D0 05EA HEBREW # Hebrew In/4.pl
-05F0 05F2 HEBREW # Hebrew In/4.pl
-FB1D HEBREW # Hebrew In/4.pl
-FB1F FB28 HEBREW # Hebrew In/4.pl
-FB2A FB36 HEBREW # Hebrew In/4.pl
-FB38 FB3C HEBREW # Hebrew In/4.pl
-FB3E HEBREW # Hebrew In/4.pl
-FB40 FB41 HEBREW # Hebrew In/4.pl
-FB43 FB44 HEBREW # Hebrew In/4.pl
-FB46 FB4F HEBREW # Hebrew In/4.pl
-0621 063A ARABIC # Arabic In/5.pl
-0641 064A ARABIC # Arabic In/5.pl
-0671 06D3 ARABIC # Arabic In/5.pl
-06D5 ARABIC # Arabic In/5.pl
-06E5 06E6 ARABIC # Arabic In/5.pl
-06FA 06FC ARABIC # Arabic In/5.pl
-FB50 FBB1 ARABIC # Arabic In/5.pl
-FBD3 FD3D ARABIC # Arabic In/5.pl
-FD50 FD8F ARABIC # Arabic In/5.pl
-FD92 FDC7 ARABIC # Arabic In/5.pl
-FDF0 FDFB ARABIC # Arabic In/5.pl
-FE70 FE72 ARABIC # Arabic In/5.pl
-FE74 ARABIC # Arabic In/5.pl
-FE76 FEFC ARABIC # Arabic In/5.pl
-0710 SYRIAC # Syriac In/6.pl
-0711 SYRIAC # Syriac In/6.pl
-0712 072C SYRIAC # Syriac In/6.pl
-0730 074A SYRIAC # Syriac In/6.pl
-0780 07A5 THAANA # Thaana In/7.pl
-07A6 07B0 THAANA # Thaana In/7.pl
-0901 0902 DEVANAGARI # Devanagari In/8.pl
-0903 DEVANAGARI # Devanagari In/8.pl
-0905 0939 DEVANAGARI # Devanagari In/8.pl
-093C DEVANAGARI # Devanagari In/8.pl
-093D DEVANAGARI # Devanagari In/8.pl
-093E 0940 DEVANAGARI # Devanagari In/8.pl
-0941 0948 DEVANAGARI # Devanagari In/8.pl
-0949 094C DEVANAGARI # Devanagari In/8.pl
-094D DEVANAGARI # Devanagari In/8.pl
-0950 DEVANAGARI # Devanagari In/8.pl
-0951 0954 DEVANAGARI # Devanagari In/8.pl
-0958 0961 DEVANAGARI # Devanagari In/8.pl
-0962 0963 DEVANAGARI # Devanagari In/8.pl
-0966 096F DEVANAGARI # Devanagari In/8.pl
-0981 BENGALI # Bengali In/9.pl
-0985 098C BENGALI # Bengali In/9.pl
-098F 0990 BENGALI # Bengali In/9.pl
-0993 09A8 BENGALI # Bengali In/9.pl
-09AA 09B0 BENGALI # Bengali In/9.pl
-09B2 BENGALI # Bengali In/9.pl
-09B6 09B9 BENGALI # Bengali In/9.pl
-09BC BENGALI # Bengali In/9.pl
-09BE 09C0 BENGALI # Bengali In/9.pl
-09C1 09C4 BENGALI # Bengali In/9.pl
-09C7 09C8 BENGALI # Bengali In/9.pl
-09CB 09CC BENGALI # Bengali In/9.pl
-09CD BENGALI # Bengali In/9.pl
-09D7 BENGALI # Bengali In/9.pl
-09DC 09DD BENGALI # Bengali In/9.pl
-09DF 09E1 BENGALI # Bengali In/9.pl
-09E2 09E3 BENGALI # Bengali In/9.pl
-09E6 09EF BENGALI # Bengali In/9.pl
-09F0 09F1 BENGALI # Bengali In/9.pl
-0A02 GURMUKHI # Gurmukhi In/10.pl
-0A05 0A0A GURMUKHI # Gurmukhi In/10.pl
-0A0F 0A10 GURMUKHI # Gurmukhi In/10.pl
-0A13 0A28 GURMUKHI # Gurmukhi In/10.pl
-0A2A 0A30 GURMUKHI # Gurmukhi In/10.pl
-0A32 0A33 GURMUKHI # Gurmukhi In/10.pl
-0A35 0A36 GURMUKHI # Gurmukhi In/10.pl
-0A38 0A39 GURMUKHI # Gurmukhi In/10.pl
-0A3C GURMUKHI # Gurmukhi In/10.pl
-0A3E 0A40 GURMUKHI # Gurmukhi In/10.pl
-0A41 0A42 GURMUKHI # Gurmukhi In/10.pl
-0A47 0A48 GURMUKHI # Gurmukhi In/10.pl
-0A4B 0A4D GURMUKHI # Gurmukhi In/10.pl
-0A59 0A5C GURMUKHI # Gurmukhi In/10.pl
-0A5E GURMUKHI # Gurmukhi In/10.pl
-0A66 0A6F GURMUKHI # Gurmukhi In/10.pl
-0A70 0A71 GURMUKHI # Gurmukhi In/10.pl
-0A72 0A74 GURMUKHI # Gurmukhi In/10.pl
-0A81 0A82 GUJARATI # Gujarati In/11.pl
-0A83 GUJARATI # Gujarati In/11.pl
-0A85 0A8B GUJARATI # Gujarati In/11.pl
-0A8D GUJARATI # Gujarati In/11.pl
-0A8F 0A91 GUJARATI # Gujarati In/11.pl
-0A93 0AA8 GUJARATI # Gujarati In/11.pl
-0AAA 0AB0 GUJARATI # Gujarati In/11.pl
-0AB2 0AB3 GUJARATI # Gujarati In/11.pl
-0AB5 0AB9 GUJARATI # Gujarati In/11.pl
-0ABC GUJARATI # Gujarati In/11.pl
-0ABD GUJARATI # Gujarati In/11.pl
-0ABE 0AC0 GUJARATI # Gujarati In/11.pl
-0AC1 0AC5 GUJARATI # Gujarati In/11.pl
-0AC7 0AC8 GUJARATI # Gujarati In/11.pl
-0AC9 GUJARATI # Gujarati In/11.pl
-0ACB 0ACC GUJARATI # Gujarati In/11.pl
-0ACD GUJARATI # Gujarati In/11.pl
-0AD0 GUJARATI # Gujarati In/11.pl
-0AE0 GUJARATI # Gujarati In/11.pl
-0AE6 0AEF GUJARATI # Gujarati In/11.pl
-0B01 ORIYA # Oriya In/12.pl
-0B02 0B03 ORIYA # Oriya In/12.pl
-0B05 0B0C ORIYA # Oriya In/12.pl
-0B0F 0B10 ORIYA # Oriya In/12.pl
-0B13 0B28 ORIYA # Oriya In/12.pl
-0B2A 0B30 ORIYA # Oriya In/12.pl
-0B32 0B33 ORIYA # Oriya In/12.pl
-0B36 0B39 ORIYA # Oriya In/12.pl
-0B3C ORIYA # Oriya In/12.pl
-0B3D ORIYA # Oriya In/12.pl
-0B3E ORIYA # Oriya In/12.pl
-0B3F ORIYA # Oriya In/12.pl
-0B40 ORIYA # Oriya In/12.pl
-0B41 0B43 ORIYA # Oriya In/12.pl
-0B47 0B48 ORIYA # Oriya In/12.pl
-0B4B 0B4C ORIYA # Oriya In/12.pl
-0B4D ORIYA # Oriya In/12.pl
-0B56 ORIYA # Oriya In/12.pl
-0B57 ORIYA # Oriya In/12.pl
-0B5C 0B5D ORIYA # Oriya In/12.pl
-0B5F 0B61 ORIYA # Oriya In/12.pl
-0B66 0B6F ORIYA # Oriya In/12.pl
-0B82 TAMIL # Tamil In/13.pl
-0B83 TAMIL # Tamil In/13.pl
-0B85 0B8A TAMIL # Tamil In/13.pl
-0B8E 0B90 TAMIL # Tamil In/13.pl
-0B92 0B95 TAMIL # Tamil In/13.pl
-0B99 0B9A TAMIL # Tamil In/13.pl
-0B9C TAMIL # Tamil In/13.pl
-0B9E 0B9F TAMIL # Tamil In/13.pl
-0BA3 0BA4 TAMIL # Tamil In/13.pl
-0BA8 0BAA TAMIL # Tamil In/13.pl
-0BAE 0BB5 TAMIL # Tamil In/13.pl
-0BB7 0BB9 TAMIL # Tamil In/13.pl
-0BBE 0BBF TAMIL # Tamil In/13.pl
-0BC0 TAMIL # Tamil In/13.pl
-0BC1 0BC2 TAMIL # Tamil In/13.pl
-0BC6 0BC8 TAMIL # Tamil In/13.pl
-0BCA 0BCC TAMIL # Tamil In/13.pl
-0BCD TAMIL # Tamil In/13.pl
-0BD7 TAMIL # Tamil In/13.pl
-0BE7 0BEF TAMIL # Tamil In/13.pl
-0BF0 0BF2 TAMIL # Tamil In/13.pl
-0C01 0C03 TELUGU # Telugu In/14.pl
-0C05 0C0C TELUGU # Telugu In/14.pl
-0C0E 0C10 TELUGU # Telugu In/14.pl
-0C12 0C28 TELUGU # Telugu In/14.pl
-0C2A 0C33 TELUGU # Telugu In/14.pl
-0C35 0C39 TELUGU # Telugu In/14.pl
-0C3E 0C40 TELUGU # Telugu In/14.pl
-0C41 0C44 TELUGU # Telugu In/14.pl
-0C46 0C48 TELUGU # Telugu In/14.pl
-0C4A 0C4D TELUGU # Telugu In/14.pl
-0C55 0C56 TELUGU # Telugu In/14.pl
-0C60 0C61 TELUGU # Telugu In/14.pl
-0C66 0C6F TELUGU # Telugu In/14.pl
-0C82 0C83 KANNADA # Kannada In/15.pl
-0C85 0C8C KANNADA # Kannada In/15.pl
-0C8E 0C90 KANNADA # Kannada In/15.pl
-0C92 0CA8 KANNADA # Kannada In/15.pl
-0CAA 0CB3 KANNADA # Kannada In/15.pl
-0CB5 0CB9 KANNADA # Kannada In/15.pl
-0CBE KANNADA # Kannada In/15.pl
-0CBF KANNADA # Kannada In/15.pl
-0CC0 0CC4 KANNADA # Kannada In/15.pl
-0CC6 KANNADA # Kannada In/15.pl
-0CC7 0CC8 KANNADA # Kannada In/15.pl
-0CCA 0CCB KANNADA # Kannada In/15.pl
-0CCC 0CCD KANNADA # Kannada In/15.pl
-0CD5 0CD6 KANNADA # Kannada In/15.pl
-0CDE KANNADA # Kannada In/15.pl
-0CE0 0CE1 KANNADA # Kannada In/15.pl
-0CE6 0CEF KANNADA # Kannada In/15.pl
-0D02 0D03 MALAYALAM # Malayalam In/16.pl
-0D05 0D0C MALAYALAM # Malayalam In/16.pl
-0D0E 0D10 MALAYALAM # Malayalam In/16.pl
-0D12 0D28 MALAYALAM # Malayalam In/16.pl
-0D2A 0D39 MALAYALAM # Malayalam In/16.pl
-0D3E 0D40 MALAYALAM # Malayalam In/16.pl
-0D41 0D43 MALAYALAM # Malayalam In/16.pl
-0D46 0D48 MALAYALAM # Malayalam In/16.pl
-0D4A 0D4C MALAYALAM # Malayalam In/16.pl
-0D4D MALAYALAM # Malayalam In/16.pl
-0D57 MALAYALAM # Malayalam In/16.pl
-0D60 0D61 MALAYALAM # Malayalam In/16.pl
-0D66 0D6F MALAYALAM # Malayalam In/16.pl
-0D82 0D83 SINHALA # Sinhala In/17.pl
-0D85 0D96 SINHALA # Sinhala In/17.pl
-0D9A 0DB1 SINHALA # Sinhala In/17.pl
-0DB3 0DBB SINHALA # Sinhala In/17.pl
-0DBD SINHALA # Sinhala In/17.pl
-0DC0 0DC6 SINHALA # Sinhala In/17.pl
-0DCA SINHALA # Sinhala In/17.pl
-0DCF 0DD1 SINHALA # Sinhala In/17.pl
-0DD2 0DD4 SINHALA # Sinhala In/17.pl
-0DD6 SINHALA # Sinhala In/17.pl
-0DD8 0DDF SINHALA # Sinhala In/17.pl
-0DF2 0DF3 SINHALA # Sinhala In/17.pl
-0E01 0E30 THAI # Thai In/18.pl
-0E31 THAI # Thai In/18.pl
-0E32 0E33 THAI # Thai In/18.pl
-0E34 0E3A THAI # Thai In/18.pl
-0E40 0E45 THAI # Thai In/18.pl
-0E46 THAI # Thai In/18.pl
-0E47 0E4E THAI # Thai In/18.pl
-0E50 0E59 THAI # Thai In/18.pl
-0E81 0E82 LAO # Lao In/19.pl
-0E84 LAO # Lao In/19.pl
-0E87 0E88 LAO # Lao In/19.pl
-0E8A LAO # Lao In/19.pl
-0E8D LAO # Lao In/19.pl
-0E94 0E97 LAO # Lao In/19.pl
-0E99 0E9F LAO # Lao In/19.pl
-0EA1 0EA3 LAO # Lao In/19.pl
-0EA5 LAO # Lao In/19.pl
-0EA7 LAO # Lao In/19.pl
-0EAA 0EAB LAO # Lao In/19.pl
-0EAD 0EB0 LAO # Lao In/19.pl
-0EB1 LAO # Lao In/19.pl
-0EB2 0EB3 LAO # Lao In/19.pl
-0EB4 0EB9 LAO # Lao In/19.pl
-0EBB 0EBC LAO # Lao In/19.pl
-0EBD LAO # Lao In/19.pl
-0EC0 0EC4 LAO # Lao In/19.pl
-0EC6 LAO # Lao In/19.pl
-0EC8 0ECD LAO # Lao In/19.pl
-0ED0 0ED9 LAO # Lao In/19.pl
-0EDC 0EDD LAO # Lao In/19.pl
-0F00 TIBETAN # Tibetan In/20.pl
-0F18 0F19 TIBETAN # Tibetan In/20.pl
-0F20 0F29 TIBETAN # Tibetan In/20.pl
-0F2A 0F33 TIBETAN # Tibetan In/20.pl
-0F35 TIBETAN # Tibetan In/20.pl
-0F37 TIBETAN # Tibetan In/20.pl
-0F39 TIBETAN # Tibetan In/20.pl
-0F40 0F47 TIBETAN # Tibetan In/20.pl
-0F49 0F6A TIBETAN # Tibetan In/20.pl
-0F71 0F7E TIBETAN # Tibetan In/20.pl
-0F7F TIBETAN # Tibetan In/20.pl
-0F80 0F84 TIBETAN # Tibetan In/20.pl
-0F86 0F87 TIBETAN # Tibetan In/20.pl
-0F88 0F8B TIBETAN # Tibetan In/20.pl
-0F90 0F97 TIBETAN # Tibetan In/20.pl
-0F99 0FBC TIBETAN # Tibetan In/20.pl
-0FC6 TIBETAN # Tibetan In/20.pl
-1000 1021 MYANMAR # Myanmar In/21.pl
-1023 1027 MYANMAR # Myanmar In/21.pl
-1029 102A MYANMAR # Myanmar In/21.pl
-102C MYANMAR # Myanmar In/21.pl
-102D 1030 MYANMAR # Myanmar In/21.pl
-1031 MYANMAR # Myanmar In/21.pl
-1032 MYANMAR # Myanmar In/21.pl
-1036 1037 MYANMAR # Myanmar In/21.pl
-1038 MYANMAR # Myanmar In/21.pl
-1039 MYANMAR # Myanmar In/21.pl
-1040 1049 MYANMAR # Myanmar In/21.pl
-1050 1055 MYANMAR # Myanmar In/21.pl
-1056 1057 MYANMAR # Myanmar In/21.pl
-1058 1059 MYANMAR # Myanmar In/21.pl
-10A0 10C5 GEORGIAN # Georgian In/22.pl
-10D0 10F6 GEORGIAN # Georgian In/22.pl
-1100 1159 HANGUL # Hangul In/23.pl
-115F 11A2 HANGUL # Hangul In/23.pl
-11A8 11F9 HANGUL # Hangul In/23.pl
-3131 318E HANGUL # Hangul In/23.pl
-AC00 D7A3 HANGUL # Hangul In/23.pl
-FFA0 FFBE HANGUL # Hangul In/23.pl
-FFC2 FFC7 HANGUL # Hangul In/23.pl
-FFCA FFCF HANGUL # Hangul In/23.pl
-FFD2 FFD7 HANGUL # Hangul In/23.pl
-FFDA FFDC HANGUL # Hangul In/23.pl
-1200 1206 ETHIOPIC # Ethiopic In/24.pl
-1208 1246 ETHIOPIC # Ethiopic In/24.pl
-1248 ETHIOPIC # Ethiopic In/24.pl
-124A 124D ETHIOPIC # Ethiopic In/24.pl
-1250 1256 ETHIOPIC # Ethiopic In/24.pl
-1258 ETHIOPIC # Ethiopic In/24.pl
-125A 125D ETHIOPIC # Ethiopic In/24.pl
-1260 1286 ETHIOPIC # Ethiopic In/24.pl
-1288 ETHIOPIC # Ethiopic In/24.pl
-128A 128D ETHIOPIC # Ethiopic In/24.pl
-1290 12AE ETHIOPIC # Ethiopic In/24.pl
-12B0 ETHIOPIC # Ethiopic In/24.pl
-12B2 12B5 ETHIOPIC # Ethiopic In/24.pl
-12B8 12BE ETHIOPIC # Ethiopic In/24.pl
-12C0 ETHIOPIC # Ethiopic In/24.pl
-12C2 12C5 ETHIOPIC # Ethiopic In/24.pl
-12C8 12CE ETHIOPIC # Ethiopic In/24.pl
-12D0 12D6 ETHIOPIC # Ethiopic In/24.pl
-12D8 12EE ETHIOPIC # Ethiopic In/24.pl
-12F0 130E ETHIOPIC # Ethiopic In/24.pl
-1310 ETHIOPIC # Ethiopic In/24.pl
-1312 1315 ETHIOPIC # Ethiopic In/24.pl
-1318 131E ETHIOPIC # Ethiopic In/24.pl
-1320 1346 ETHIOPIC # Ethiopic In/24.pl
-1348 135A ETHIOPIC # Ethiopic In/24.pl
-1369 1371 ETHIOPIC # Ethiopic In/24.pl
-1372 137C ETHIOPIC # Ethiopic In/24.pl
-13A0 13F4 CHEROKEE # Cherokee In/25.pl
-1401 166C CANADIAN-ABORIGINAL # CanadianAboriginal In/26.pl
-166F 1676 CANADIAN-ABORIGINAL # CanadianAboriginal In/26.pl
-1681 169A OGHAM # Ogham In/27.pl
-16A0 16EA RUNIC # Runic In/28.pl
-16EE 16F0 RUNIC # Runic In/28.pl
-1780 17B3 KHMER # Khmer In/29.pl
-17B4 17B6 KHMER # Khmer In/29.pl
-17B7 17BD KHMER # Khmer In/29.pl
-17BE 17C5 KHMER # Khmer In/29.pl
-17C6 KHMER # Khmer In/29.pl
-17C7 17C8 KHMER # Khmer In/29.pl
-17C9 17D3 KHMER # Khmer In/29.pl
-17E0 17E9 KHMER # Khmer In/29.pl
-1810 1819 MONGOLIAN # Mongolian In/30.pl
-1820 1842 MONGOLIAN # Mongolian In/30.pl
-1843 MONGOLIAN # Mongolian In/30.pl
-1844 1877 MONGOLIAN # Mongolian In/30.pl
-1880 18A8 MONGOLIAN # Mongolian In/30.pl
-18A9 MONGOLIAN # Mongolian In/30.pl
-3041 3094 HIRAGANA # Hiragana In/31.pl
-309D 309E HIRAGANA # Hiragana In/31.pl
-30A1 30FA KATAKANA # Katakana In/32.pl
-30FD 30FE KATAKANA # Katakana In/32.pl
-FF66 FF6F KATAKANA # Katakana In/32.pl
-FF71 FF9D KATAKANA # Katakana In/32.pl
-3105 312C BOPOMOFO # Bopomofo In/33.pl
-31A0 31B7 BOPOMOFO # Bopomofo In/33.pl
-2E80 2E99 HAN # Han In/34.pl
-2E9B 2EF3 HAN # Han In/34.pl
-2F00 2FD5 HAN # Han In/34.pl
-3005 HAN # Han In/34.pl
-3007 HAN # Han In/34.pl
-3021 3029 HAN # Han In/34.pl
-3038 303A HAN # Han In/34.pl
-3400 4DB5 HAN # Han In/34.pl
-4E00 9FA5 HAN # Han In/34.pl
-F900 FA2D HAN # Han In/34.pl
-20000 2A6D6 HAN # Han In/34.pl
-2F800 2FA1D HAN # Han In/34.pl
-A000 A48C YI # Yi In/35.pl
-A490 A4A1 YI # Yi In/35.pl
-A4A4 A4B3 YI # Yi In/35.pl
-A4B5 A4C0 YI # Yi In/35.pl
-A4C2 A4C4 YI # Yi In/35.pl
-A4C6 YI # Yi In/35.pl
-10300 1031E OLD-ITALIC # OldItalic In/36.pl
-10330 10349 GOTHIC # Gothic In/37.pl
-1034A GOTHIC # Gothic In/37.pl
-10400 10425 DESERET # Deseret In/38.pl
-10428 1044D DESERET # Deseret In/38.pl
-0300 034E INHERITED # Inherited In/39.pl
-0360 0362 INHERITED # Inherited In/39.pl
-0488 0489 INHERITED # Inherited In/39.pl
-0591 05A1 INHERITED # Inherited In/39.pl
-05A3 05B9 INHERITED # Inherited In/39.pl
-05BB 05BD INHERITED # Inherited In/39.pl
-05BF INHERITED # Inherited In/39.pl
-05C1 05C2 INHERITED # Inherited In/39.pl
-05C4 INHERITED # Inherited In/39.pl
-064B 0655 INHERITED # Inherited In/39.pl
-0670 INHERITED # Inherited In/39.pl
-06D6 06DC INHERITED # Inherited In/39.pl
-06DD 06DE INHERITED # Inherited In/39.pl
-06DF 06E4 INHERITED # Inherited In/39.pl
-06E7 06E8 INHERITED # Inherited In/39.pl
-06EA 06ED INHERITED # Inherited In/39.pl
-20D0 20DC INHERITED # Inherited In/39.pl
-20DD 20E0 INHERITED # Inherited In/39.pl
-20E1 INHERITED # Inherited In/39.pl
-20E2 20E3 INHERITED # Inherited In/39.pl
-302A 302F INHERITED # Inherited In/39.pl
-3099 309A INHERITED # Inherited In/39.pl
-FB1E INHERITED # Inherited In/39.pl
-FE20 FE23 INHERITED # Inherited In/39.pl
-1D167 1D169 INHERITED # Inherited In/39.pl
-1D17B 1D182 INHERITED # Inherited In/39.pl
-1D185 1D18B INHERITED # Inherited In/39.pl
-1D1AA 1D1AD INHERITED # Inherited In/39.pl
+0041 005A LATIN # In/0.pl
+0061 007A LATIN # In/0.pl
+00AA LATIN # In/0.pl
+00BA LATIN # In/0.pl
+00C0 00D6 LATIN # In/0.pl
+00D8 00F6 LATIN # In/0.pl
+00F8 01BA LATIN # In/0.pl
+01BB LATIN # In/0.pl
+01BC 01BF LATIN # In/0.pl
+01C0 01C3 LATIN # In/0.pl
+01C4 021F LATIN # In/0.pl
+0222 0233 LATIN # In/0.pl
+0250 02AD LATIN # In/0.pl
+02B0 02B8 LATIN # In/0.pl
+02E0 02E4 LATIN # In/0.pl
+1E00 1E9B LATIN # In/0.pl
+1EA0 1EF9 LATIN # In/0.pl
+207F LATIN # In/0.pl
+212A 212B LATIN # In/0.pl
+FB00 FB06 LATIN # In/0.pl
+FF21 FF3A LATIN # In/0.pl
+FF41 FF5A LATIN # In/0.pl
+00B5 GREEK # In/1.pl
+037A GREEK # In/1.pl
+0386 GREEK # In/1.pl
+0388 038A GREEK # In/1.pl
+038C GREEK # In/1.pl
+038E 03A1 GREEK # In/1.pl
+03A3 03CE GREEK # In/1.pl
+03D0 03D7 GREEK # In/1.pl
+03DA 03F5 GREEK # In/1.pl
+1F00 1F15 GREEK # In/1.pl
+1F18 1F1D GREEK # In/1.pl
+1F20 1F45 GREEK # In/1.pl
+1F48 1F4D GREEK # In/1.pl
+1F50 1F57 GREEK # In/1.pl
+1F59 GREEK # In/1.pl
+1F5B GREEK # In/1.pl
+1F5D GREEK # In/1.pl
+1F5F 1F7D GREEK # In/1.pl
+1F80 1FB4 GREEK # In/1.pl
+1FB6 1FBC GREEK # In/1.pl
+1FBE GREEK # In/1.pl
+1FC2 1FC4 GREEK # In/1.pl
+1FC6 1FCC GREEK # In/1.pl
+1FD0 1FD3 GREEK # In/1.pl
+1FD6 1FDB GREEK # In/1.pl
+1FE0 1FEC GREEK # In/1.pl
+1FF2 1FF4 GREEK # In/1.pl
+1FF6 1FFC GREEK # In/1.pl
+2126 GREEK # In/1.pl
+0400 0481 CYRILLIC # In/2.pl
+0483 0486 CYRILLIC # In/2.pl
+048C 04C4 CYRILLIC # In/2.pl
+04C7 04C8 CYRILLIC # In/2.pl
+04CB 04CC CYRILLIC # In/2.pl
+04D0 04F5 CYRILLIC # In/2.pl
+04F8 04F9 CYRILLIC # In/2.pl
+0531 0556 ARMENIAN # In/3.pl
+0559 ARMENIAN # In/3.pl
+0561 0587 ARMENIAN # In/3.pl
+FB13 FB17 ARMENIAN # In/3.pl
+05D0 05EA HEBREW # In/4.pl
+05F0 05F2 HEBREW # In/4.pl
+FB1D HEBREW # In/4.pl
+FB1F FB28 HEBREW # In/4.pl
+FB2A FB36 HEBREW # In/4.pl
+FB38 FB3C HEBREW # In/4.pl
+FB3E HEBREW # In/4.pl
+FB40 FB41 HEBREW # In/4.pl
+FB43 FB44 HEBREW # In/4.pl
+FB46 FB4F HEBREW # In/4.pl
+0621 063A ARABIC # In/5.pl
+0641 064A ARABIC # In/5.pl
+0671 06D3 ARABIC # In/5.pl
+06D5 ARABIC # In/5.pl
+06E5 06E6 ARABIC # In/5.pl
+06FA 06FC ARABIC # In/5.pl
+FB50 FBB1 ARABIC # In/5.pl
+FBD3 FD3D ARABIC # In/5.pl
+FD50 FD8F ARABIC # In/5.pl
+FD92 FDC7 ARABIC # In/5.pl
+FDF0 FDFB ARABIC # In/5.pl
+FE70 FE72 ARABIC # In/5.pl
+FE74 ARABIC # In/5.pl
+FE76 FEFC ARABIC # In/5.pl
+0710 SYRIAC # In/6.pl
+0711 SYRIAC # In/6.pl
+0712 072C SYRIAC # In/6.pl
+0730 074A SYRIAC # In/6.pl
+0780 07A5 THAANA # In/7.pl
+07A6 07B0 THAANA # In/7.pl
+0901 0902 DEVANAGARI # In/8.pl
+0903 DEVANAGARI # In/8.pl
+0905 0939 DEVANAGARI # In/8.pl
+093C DEVANAGARI # In/8.pl
+093D DEVANAGARI # In/8.pl
+093E 0940 DEVANAGARI # In/8.pl
+0941 0948 DEVANAGARI # In/8.pl
+0949 094C DEVANAGARI # In/8.pl
+094D DEVANAGARI # In/8.pl
+0950 DEVANAGARI # In/8.pl
+0951 0954 DEVANAGARI # In/8.pl
+0958 0961 DEVANAGARI # In/8.pl
+0962 0963 DEVANAGARI # In/8.pl
+0966 096F DEVANAGARI # In/8.pl
+0981 BENGALI # In/9.pl
+0985 098C BENGALI # In/9.pl
+098F 0990 BENGALI # In/9.pl
+0993 09A8 BENGALI # In/9.pl
+09AA 09B0 BENGALI # In/9.pl
+09B2 BENGALI # In/9.pl
+09B6 09B9 BENGALI # In/9.pl
+09BC BENGALI # In/9.pl
+09BE 09C0 BENGALI # In/9.pl
+09C1 09C4 BENGALI # In/9.pl
+09C7 09C8 BENGALI # In/9.pl
+09CB 09CC BENGALI # In/9.pl
+09CD BENGALI # In/9.pl
+09D7 BENGALI # In/9.pl
+09DC 09DD BENGALI # In/9.pl
+09DF 09E1 BENGALI # In/9.pl
+09E2 09E3 BENGALI # In/9.pl
+09E6 09EF BENGALI # In/9.pl
+09F0 09F1 BENGALI # In/9.pl
+0A02 GURMUKHI # In/10.pl
+0A05 0A0A GURMUKHI # In/10.pl
+0A0F 0A10 GURMUKHI # In/10.pl
+0A13 0A28 GURMUKHI # In/10.pl
+0A2A 0A30 GURMUKHI # In/10.pl
+0A32 0A33 GURMUKHI # In/10.pl
+0A35 0A36 GURMUKHI # In/10.pl
+0A38 0A39 GURMUKHI # In/10.pl
+0A3C GURMUKHI # In/10.pl
+0A3E 0A40 GURMUKHI # In/10.pl
+0A41 0A42 GURMUKHI # In/10.pl
+0A47 0A48 GURMUKHI # In/10.pl
+0A4B 0A4D GURMUKHI # In/10.pl
+0A59 0A5C GURMUKHI # In/10.pl
+0A5E GURMUKHI # In/10.pl
+0A66 0A6F GURMUKHI # In/10.pl
+0A70 0A71 GURMUKHI # In/10.pl
+0A72 0A74 GURMUKHI # In/10.pl
+0A81 0A82 GUJARATI # In/11.pl
+0A83 GUJARATI # In/11.pl
+0A85 0A8B GUJARATI # In/11.pl
+0A8D GUJARATI # In/11.pl
+0A8F 0A91 GUJARATI # In/11.pl
+0A93 0AA8 GUJARATI # In/11.pl
+0AAA 0AB0 GUJARATI # In/11.pl
+0AB2 0AB3 GUJARATI # In/11.pl
+0AB5 0AB9 GUJARATI # In/11.pl
+0ABC GUJARATI # In/11.pl
+0ABD GUJARATI # In/11.pl
+0ABE 0AC0 GUJARATI # In/11.pl
+0AC1 0AC5 GUJARATI # In/11.pl
+0AC7 0AC8 GUJARATI # In/11.pl
+0AC9 GUJARATI # In/11.pl
+0ACB 0ACC GUJARATI # In/11.pl
+0ACD GUJARATI # In/11.pl
+0AD0 GUJARATI # In/11.pl
+0AE0 GUJARATI # In/11.pl
+0AE6 0AEF GUJARATI # In/11.pl
+0B01 ORIYA # In/12.pl
+0B02 0B03 ORIYA # In/12.pl
+0B05 0B0C ORIYA # In/12.pl
+0B0F 0B10 ORIYA # In/12.pl
+0B13 0B28 ORIYA # In/12.pl
+0B2A 0B30 ORIYA # In/12.pl
+0B32 0B33 ORIYA # In/12.pl
+0B36 0B39 ORIYA # In/12.pl
+0B3C ORIYA # In/12.pl
+0B3D ORIYA # In/12.pl
+0B3E ORIYA # In/12.pl
+0B3F ORIYA # In/12.pl
+0B40 ORIYA # In/12.pl
+0B41 0B43 ORIYA # In/12.pl
+0B47 0B48 ORIYA # In/12.pl
+0B4B 0B4C ORIYA # In/12.pl
+0B4D ORIYA # In/12.pl
+0B56 ORIYA # In/12.pl
+0B57 ORIYA # In/12.pl
+0B5C 0B5D ORIYA # In/12.pl
+0B5F 0B61 ORIYA # In/12.pl
+0B66 0B6F ORIYA # In/12.pl
+0B82 TAMIL # In/13.pl
+0B83 TAMIL # In/13.pl
+0B85 0B8A TAMIL # In/13.pl
+0B8E 0B90 TAMIL # In/13.pl
+0B92 0B95 TAMIL # In/13.pl
+0B99 0B9A TAMIL # In/13.pl
+0B9C TAMIL # In/13.pl
+0B9E 0B9F TAMIL # In/13.pl
+0BA3 0BA4 TAMIL # In/13.pl
+0BA8 0BAA TAMIL # In/13.pl
+0BAE 0BB5 TAMIL # In/13.pl
+0BB7 0BB9 TAMIL # In/13.pl
+0BBE 0BBF TAMIL # In/13.pl
+0BC0 TAMIL # In/13.pl
+0BC1 0BC2 TAMIL # In/13.pl
+0BC6 0BC8 TAMIL # In/13.pl
+0BCA 0BCC TAMIL # In/13.pl
+0BCD TAMIL # In/13.pl
+0BD7 TAMIL # In/13.pl
+0BE7 0BEF TAMIL # In/13.pl
+0BF0 0BF2 TAMIL # In/13.pl
+0C01 0C03 TELUGU # In/14.pl
+0C05 0C0C TELUGU # In/14.pl
+0C0E 0C10 TELUGU # In/14.pl
+0C12 0C28 TELUGU # In/14.pl
+0C2A 0C33 TELUGU # In/14.pl
+0C35 0C39 TELUGU # In/14.pl
+0C3E 0C40 TELUGU # In/14.pl
+0C41 0C44 TELUGU # In/14.pl
+0C46 0C48 TELUGU # In/14.pl
+0C4A 0C4D TELUGU # In/14.pl
+0C55 0C56 TELUGU # In/14.pl
+0C60 0C61 TELUGU # In/14.pl
+0C66 0C6F TELUGU # In/14.pl
+0C82 0C83 KANNADA # In/15.pl
+0C85 0C8C KANNADA # In/15.pl
+0C8E 0C90 KANNADA # In/15.pl
+0C92 0CA8 KANNADA # In/15.pl
+0CAA 0CB3 KANNADA # In/15.pl
+0CB5 0CB9 KANNADA # In/15.pl
+0CBE KANNADA # In/15.pl
+0CBF KANNADA # In/15.pl
+0CC0 0CC4 KANNADA # In/15.pl
+0CC6 KANNADA # In/15.pl
+0CC7 0CC8 KANNADA # In/15.pl
+0CCA 0CCB KANNADA # In/15.pl
+0CCC 0CCD KANNADA # In/15.pl
+0CD5 0CD6 KANNADA # In/15.pl
+0CDE KANNADA # In/15.pl
+0CE0 0CE1 KANNADA # In/15.pl
+0CE6 0CEF KANNADA # In/15.pl
+0D02 0D03 MALAYALAM # In/16.pl
+0D05 0D0C MALAYALAM # In/16.pl
+0D0E 0D10 MALAYALAM # In/16.pl
+0D12 0D28 MALAYALAM # In/16.pl
+0D2A 0D39 MALAYALAM # In/16.pl
+0D3E 0D40 MALAYALAM # In/16.pl
+0D41 0D43 MALAYALAM # In/16.pl
+0D46 0D48 MALAYALAM # In/16.pl
+0D4A 0D4C MALAYALAM # In/16.pl
+0D4D MALAYALAM # In/16.pl
+0D57 MALAYALAM # In/16.pl
+0D60 0D61 MALAYALAM # In/16.pl
+0D66 0D6F MALAYALAM # In/16.pl
+0D82 0D83 SINHALA # In/17.pl
+0D85 0D96 SINHALA # In/17.pl
+0D9A 0DB1 SINHALA # In/17.pl
+0DB3 0DBB SINHALA # In/17.pl
+0DBD SINHALA # In/17.pl
+0DC0 0DC6 SINHALA # In/17.pl
+0DCA SINHALA # In/17.pl
+0DCF 0DD1 SINHALA # In/17.pl
+0DD2 0DD4 SINHALA # In/17.pl
+0DD6 SINHALA # In/17.pl
+0DD8 0DDF SINHALA # In/17.pl
+0DF2 0DF3 SINHALA # In/17.pl
+0E01 0E30 THAI # In/18.pl
+0E31 THAI # In/18.pl
+0E32 0E33 THAI # In/18.pl
+0E34 0E3A THAI # In/18.pl
+0E40 0E45 THAI # In/18.pl
+0E46 THAI # In/18.pl
+0E47 0E4E THAI # In/18.pl
+0E50 0E59 THAI # In/18.pl
+0E81 0E82 LAO # In/19.pl
+0E84 LAO # In/19.pl
+0E87 0E88 LAO # In/19.pl
+0E8A LAO # In/19.pl
+0E8D LAO # In/19.pl
+0E94 0E97 LAO # In/19.pl
+0E99 0E9F LAO # In/19.pl
+0EA1 0EA3 LAO # In/19.pl
+0EA5 LAO # In/19.pl
+0EA7 LAO # In/19.pl
+0EAA 0EAB LAO # In/19.pl
+0EAD 0EB0 LAO # In/19.pl
+0EB1 LAO # In/19.pl
+0EB2 0EB3 LAO # In/19.pl
+0EB4 0EB9 LAO # In/19.pl
+0EBB 0EBC LAO # In/19.pl
+0EBD LAO # In/19.pl
+0EC0 0EC4 LAO # In/19.pl
+0EC6 LAO # In/19.pl
+0EC8 0ECD LAO # In/19.pl
+0ED0 0ED9 LAO # In/19.pl
+0EDC 0EDD LAO # In/19.pl
+0F00 TIBETAN # In/20.pl
+0F18 0F19 TIBETAN # In/20.pl
+0F20 0F29 TIBETAN # In/20.pl
+0F2A 0F33 TIBETAN # In/20.pl
+0F35 TIBETAN # In/20.pl
+0F37 TIBETAN # In/20.pl
+0F39 TIBETAN # In/20.pl
+0F40 0F47 TIBETAN # In/20.pl
+0F49 0F6A TIBETAN # In/20.pl
+0F71 0F7E TIBETAN # In/20.pl
+0F7F TIBETAN # In/20.pl
+0F80 0F84 TIBETAN # In/20.pl
+0F86 0F87 TIBETAN # In/20.pl
+0F88 0F8B TIBETAN # In/20.pl
+0F90 0F97 TIBETAN # In/20.pl
+0F99 0FBC TIBETAN # In/20.pl
+0FC6 TIBETAN # In/20.pl
+1000 1021 MYANMAR # In/21.pl
+1023 1027 MYANMAR # In/21.pl
+1029 102A MYANMAR # In/21.pl
+102C MYANMAR # In/21.pl
+102D 1030 MYANMAR # In/21.pl
+1031 MYANMAR # In/21.pl
+1032 MYANMAR # In/21.pl
+1036 1037 MYANMAR # In/21.pl
+1038 MYANMAR # In/21.pl
+1039 MYANMAR # In/21.pl
+1040 1049 MYANMAR # In/21.pl
+1050 1055 MYANMAR # In/21.pl
+1056 1057 MYANMAR # In/21.pl
+1058 1059 MYANMAR # In/21.pl
+10A0 10C5 GEORGIAN # In/22.pl
+10D0 10F6 GEORGIAN # In/22.pl
+1100 1159 HANGUL # In/23.pl
+115F 11A2 HANGUL # In/23.pl
+11A8 11F9 HANGUL # In/23.pl
+3131 318E HANGUL # In/23.pl
+AC00 D7A3 HANGUL # In/23.pl
+FFA0 FFBE HANGUL # In/23.pl
+FFC2 FFC7 HANGUL # In/23.pl
+FFCA FFCF HANGUL # In/23.pl
+FFD2 FFD7 HANGUL # In/23.pl
+FFDA FFDC HANGUL # In/23.pl
+1200 1206 ETHIOPIC # In/24.pl
+1208 1246 ETHIOPIC # In/24.pl
+1248 ETHIOPIC # In/24.pl
+124A 124D ETHIOPIC # In/24.pl
+1250 1256 ETHIOPIC # In/24.pl
+1258 ETHIOPIC # In/24.pl
+125A 125D ETHIOPIC # In/24.pl
+1260 1286 ETHIOPIC # In/24.pl
+1288 ETHIOPIC # In/24.pl
+128A 128D ETHIOPIC # In/24.pl
+1290 12AE ETHIOPIC # In/24.pl
+12B0 ETHIOPIC # In/24.pl
+12B2 12B5 ETHIOPIC # In/24.pl
+12B8 12BE ETHIOPIC # In/24.pl
+12C0 ETHIOPIC # In/24.pl
+12C2 12C5 ETHIOPIC # In/24.pl
+12C8 12CE ETHIOPIC # In/24.pl
+12D0 12D6 ETHIOPIC # In/24.pl
+12D8 12EE ETHIOPIC # In/24.pl
+12F0 130E ETHIOPIC # In/24.pl
+1310 ETHIOPIC # In/24.pl
+1312 1315 ETHIOPIC # In/24.pl
+1318 131E ETHIOPIC # In/24.pl
+1320 1346 ETHIOPIC # In/24.pl
+1348 135A ETHIOPIC # In/24.pl
+1369 1371 ETHIOPIC # In/24.pl
+1372 137C ETHIOPIC # In/24.pl
+13A0 13F4 CHEROKEE # In/25.pl
+1401 166C CANADIAN-ABORIGINAL # In/26.pl
+166F 1676 CANADIAN-ABORIGINAL # In/26.pl
+1681 169A OGHAM # In/27.pl
+16A0 16EA RUNIC # In/28.pl
+16EE 16F0 RUNIC # In/28.pl
+1780 17B3 KHMER # In/29.pl
+17B4 17B6 KHMER # In/29.pl
+17B7 17BD KHMER # In/29.pl
+17BE 17C5 KHMER # In/29.pl
+17C6 KHMER # In/29.pl
+17C7 17C8 KHMER # In/29.pl
+17C9 17D3 KHMER # In/29.pl
+17E0 17E9 KHMER # In/29.pl
+1810 1819 MONGOLIAN # In/30.pl
+1820 1842 MONGOLIAN # In/30.pl
+1843 MONGOLIAN # In/30.pl
+1844 1877 MONGOLIAN # In/30.pl
+1880 18A8 MONGOLIAN # In/30.pl
+18A9 MONGOLIAN # In/30.pl
+3041 3094 HIRAGANA # In/31.pl
+309D 309E HIRAGANA # In/31.pl
+30A1 30FA KATAKANA # In/32.pl
+30FD 30FE KATAKANA # In/32.pl
+FF66 FF6F KATAKANA # In/32.pl
+FF71 FF9D KATAKANA # In/32.pl
+3105 312C BOPOMOFO # In/33.pl
+31A0 31B7 BOPOMOFO # In/33.pl
+2E80 2E99 HAN # In/34.pl
+2E9B 2EF3 HAN # In/34.pl
+2F00 2FD5 HAN # In/34.pl
+3005 HAN # In/34.pl
+3007 HAN # In/34.pl
+3021 3029 HAN # In/34.pl
+3038 303A HAN # In/34.pl
+3400 4DB5 HAN # In/34.pl
+4E00 9FA5 HAN # In/34.pl
+F900 FA2D HAN # In/34.pl
+20000 2A6D6 HAN # In/34.pl
+2F800 2FA1D HAN # In/34.pl
+A000 A48C YI # In/35.pl
+A490 A4A1 YI # In/35.pl
+A4A4 A4B3 YI # In/35.pl
+A4B5 A4C0 YI # In/35.pl
+A4C2 A4C4 YI # In/35.pl
+A4C6 YI # In/35.pl
+10300 1031E OLD-ITALIC # In/36.pl
+10330 10349 GOTHIC # In/37.pl
+1034A GOTHIC # In/37.pl
+10400 10425 DESERET # In/38.pl
+10428 1044D DESERET # In/38.pl
+0300 034E INHERITED # In/39.pl
+0360 0362 INHERITED # In/39.pl
+0488 0489 INHERITED # In/39.pl
+0591 05A1 INHERITED # In/39.pl
+05A3 05B9 INHERITED # In/39.pl
+05BB 05BD INHERITED # In/39.pl
+05BF INHERITED # In/39.pl
+05C1 05C2 INHERITED # In/39.pl
+05C4 INHERITED # In/39.pl
+064B 0655 INHERITED # In/39.pl
+0670 INHERITED # In/39.pl
+06D6 06DC INHERITED # In/39.pl
+06DD 06DE INHERITED # In/39.pl
+06DF 06E4 INHERITED # In/39.pl
+06E7 06E8 INHERITED # In/39.pl
+06EA 06ED INHERITED # In/39.pl
+20D0 20DC INHERITED # In/39.pl
+20DD 20E0 INHERITED # In/39.pl
+20E1 INHERITED # In/39.pl
+20E2 20E3 INHERITED # In/39.pl
+302A 302F INHERITED # In/39.pl
+3099 309A INHERITED # In/39.pl
+FB1E INHERITED # In/39.pl
+FE20 FE23 INHERITED # In/39.pl
+1D167 1D169 INHERITED # In/39.pl
+1D17B 1D182 INHERITED # In/39.pl
+1D185 1D18B INHERITED # In/39.pl
+1D1AA 1D1AD INHERITED # In/39.pl
END
diff --git a/lib/unicore/mktables.PL b/lib/unicore/mktables.PL
index f86ff696d1..642c66fc72 100755
--- a/lib/unicore/mktables.PL
+++ b/lib/unicore/mktables.PL
@@ -231,7 +231,8 @@ mkdir "To", 0755;
# This is not written for speed...
-my %InId;
+my %InIdScript;
+my %InIdBlock;
my $InId = 0;
foreach $file (@todo) {
@@ -258,9 +259,6 @@ END
close OUT;
}
-# Do Scripts before Blocks so that in case of naming conflicts
-# the more natural one (Script) wins over the artificial one (Block).
-
print "Scripts\n";
open(UD, 'Scripts.txt') or die "Can't open Scripts.txt: $!\n";
open(OUT, ">Scripts.pl") or die "Can't create Scripts.pl: $!\n";
@@ -281,13 +279,11 @@ while (<UD>) {
chomp;
($code, $last, $name) = /^([0-9a-f]+)(?:\.\.([0-9a-f]+))?\s+;\s+(.+)\s+\#/i;
if ($name) {
- my $InName = lc($name);
- $InName =~ s/\b(\w)/uc($1)/ge;
- $InName =~ s/\W+//g;
+ my $InName = $name;
my $id;
- unless (exists $InId{$InName}) {
+ unless (exists $InIdScript{$InName}) {
print "\t$InName\n";
- $id = $Scripts{$InName} = $InId{$InName} = $InId++;
+ $id = $Scripts{$InName} = $InIdScript{$InName} = $InId++;
open(SCRIPT, ">In/$id.pl") or die "create In/$id.pl: $!\n";
print SCRIPT <<EOH;
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
@@ -297,10 +293,10 @@ return <<'END';
EOH
close(SCRIPT);
} else {
- $id = $InId{$InName};
+ $id = $InIdScript{$InName};
}
$last = "" unless defined $last;
- print OUT "$code\t$last\t$name\t# $InName In/$id.pl\n";
+ print OUT "$code\t$last\t$name\t# In/$id.pl\n";
open(SCRIPT, ">>In/$id.pl");
print SCRIPT <<END;
$code $last
@@ -309,7 +305,7 @@ END
}
}
-for my $id (values %InId) {
+for my $id (values %InIdScript) {
open(SCRIPT, ">>In/$id.pl");
print SCRIPT <<END2;
END
@@ -339,22 +335,18 @@ while (<UD>) {
next if /^#/;
next if /^$/;
chomp;
- ($code, $last, $name) = /^([0-9a-f]+)\.\.([0-9a-f]+); (.+)/i;
+ ($code, $last, $name) = /^([0-9a-f]+)\.\.([0-9a-f]+); (.+?)\s*$/i;
if ($name) {
my $InName = $name;
- $InName =~ s/\W+//g;
print "\t$InName\n";
my $id;
# TODO: only the first one of Private Use blocks qualifies
- unless (exists $InId{$InName}) {
- $InId{$InName} = $InId++;
- } elsif (exists $Scripts{$InName}) {
- $InName .= 'Block';
- $InId{$InName} = $InId++;
+ unless (exists $InIdBlock{$InName}) {
+ $InIdBlock{$InName} = $InId++;
}
- $id = $InId{$InName};
+ $id = $InIdBlock{$InName};
open(BLOCK, ">In/$id.pl") or die "create In/$id.pl: $!\n";
- print OUT "$code\t$last\t$name\t# $InName In/$id.pl\n";
+ print OUT "$code\t$last\t$name\t# In/$id.pl\n";
print BLOCK <<EOH;
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by $0 from e.g. $UnicodeData.
@@ -381,9 +373,57 @@ print INID <<EOH;
%utf8::In = (
EOH
-# Order doesn't matter but let's prettyprint anyway.
-foreach my $in (sort { $InId{$a} <=> $InId{$b} } keys %InId) {
- printf INID "%-40s => %3d,\n", "'$in'", $InId{$in};
+my %InIdScriptById = reverse %InIdScript;
+my %InIdBlockById = reverse %InIdBlock;
+
+my @InIdScriptById = sort { $a <=> $b } keys %InIdScriptById;
+my @InIdBlockById = sort { $a <=> $b } keys %InIdBlockById;
+
+my %InId;
+my %IdIdLcName;
+
+for my $id (@InIdScriptById) {
+ my $name = $InIdScriptById{$id};
+ my $lcname = lc($name);
+ $InId{$name} = $id;
+ $IdIdLcName{$lcname} = $id;
+}
+
+for my $id (@InIdBlockById) {
+ my $name = $InIdBlockById{$id};
+ my $lcname = lc($name);
+ if (exists $IdIdLcName{$lcname}) {
+ $InId{"$name Block"} = $id;
+ } else {
+ $InId{$name} = $id;
+ }
+ $IdIdLcName{$lcname} = $id;
+}
+
+my @InId = sort { $InId{$a} <=> $InId{$b} } keys %InId;
+
+my %InIdPrefix;
+
+foreach my $in (@InId) {
+ my $inpat = $in;
+ $inpat =~ s/([- ])/[- _]?/g;
+ push @{$InIdPrefix{lc(substr($in, 0, 3))}}, [ $in, $inpat ];
+ printf INID "%-45s => %3d,\n", "'$in'", $InId{$in};
+}
+
+print INID ");\n";
+
+print INID <<EOH;
+%utf8::InPat = (
+EOH
+
+foreach my $prefix (sort keys %InIdPrefix) {
+ printf INID "'$prefix' => {\n";
+ foreach my $ininpat (@{$InIdPrefix{$prefix}}) {
+ my ($in, $inpat) = @$ininpat;
+ printf INID "\t'$inpat' => '$in',\n";
+ }
+ printf INID "},\n";
}
print INID ");\n";
diff --git a/lib/utf8_heavy.pl b/lib/utf8_heavy.pl
index a90e24ce71..e8cf0cc4ab 100644
--- a/lib/utf8_heavy.pl
+++ b/lib/utf8_heavy.pl
@@ -26,11 +26,20 @@ sub SWASHNEW {
while (($caller = caller($i)) eq __PACKAGE__) { $i++ }
my $encoding = $enc{$caller} || "unicore";
(my $file = $type) =~ s!::!/!g;
- if ($file =~ /^In(.+)/) {
+ if ($file =~ /^In[- ]?(.+)/i) {
my $In = $1;
defined %utf8::In || do "$encoding/In.pl";
- if (exists $utf8::In{$In}) {
- $file = "$encoding/In/$utf8::In{$In}";
+ my $prefix = substr(lc($In), 0, 3);
+ if (exists $utf8::InPat{$prefix}) {
+ for my $k (keys %{$utf8::InPat{$prefix}}) {
+ if ($In =~ /^$k$/i) {
+ $In = $utf8::InPat{$prefix}->{$k};
+ if (exists $utf8::In{$In}) {
+ $file = "$encoding/In/$utf8::In{$In}";
+ last;
+ }
+ }
+ }
}
} else {
$file =~ s#^(Is|To)([A-Z].*)#$1/$2#;
@@ -43,7 +52,7 @@ sub SWASHNEW {
|| do "$file.pl"
|| do "$encoding/$file.pl"
|| do "$encoding/Is/${type}.pl"
- || croak("Can't find $encoding character property \"$type\"");
+ || croak("Can't find Unicode character property \"$type\"");
}
$| = 1;
diff --git a/pod/perlunicode.pod b/pod/perlunicode.pod
index 63ad011546..f27173cded 100644
--- a/pod/perlunicode.pod
+++ b/pod/perlunicode.pod
@@ -169,9 +169,10 @@ character with the Unicode uppercase property, while C<\p{M}> matches
any mark character. Single letter properties may omit the brackets,
so that can be written C<\pM> also. Many predefined character classes
are available, such as C<\p{IsMirrored}> and C<\p{InTibetan}>. The
-names of the C<In> classes are the official Unicode script and block
-names but with all non-alphanumeric characters removed, for example
-the block name C<"Latin-1 Supplement"> becomes C<\p{InLatin1Supplement}>.
+recommended names of the C<In> classes are the official Unicode script
+and block names but with all non-alphanumeric characters removed, for
+example the block name C<"Latin-1 Supplement"> becomes
+C<\p{InLatin1Supplement}>.
Here is the list as of Unicode 3.1.0 (the two-letter classes) and
as defined by Perl (the one-letter classes) (in Unicode materials
diff --git a/t/op/pat.t b/t/op/pat.t
index 2042f398d5..f5a2eddced 100755
--- a/t/op/pat.t
+++ b/t/op/pat.t
@@ -6,7 +6,7 @@
$| = 1;
-print "1..715\n";
+print "1..716\n";
BEGIN {
chdir 't' if -d 't';
@@ -2121,9 +2121,13 @@ sub ok ($$) {
}
{
- # high bit bug -- japhy
- my $x = "ab\200d";
- $x =~ /.*?\200/ or print "not ";
- print "ok 715\n";
+ # high bit bug -- japhy
+ my $x = "ab\200d";
+ $x =~ /.*?\200/ or print "not ";
+ print "ok 715\n";
}
+{
+ print "not " unless "\x80" =~ /\p{in-latin1_SUPPLEMENT}/;
+ print "ok 716\n";
+}