diff options
author | Jarkko Hietaniemi <jhi@iki.fi> | 2001-09-29 04:57:42 +0000 |
---|---|---|
committer | Jarkko Hietaniemi <jhi@iki.fi> | 2001-09-29 04:57:42 +0000 |
commit | d9efae67d76cc4acd8980b711b5bebc7142b5319 (patch) | |
tree | 85511ac1926809c78e0399fa9cde4033552336cb | |
parent | e8c9ad1b2aea45573ad656f23dcb17204fe59851 (diff) | |
download | perl-d9efae67d76cc4acd8980b711b5bebc7142b5319.tar.gz |
Allow for more flexibility in the \p{In...} names, now
case doesn't matter, and any space or dash can be
matched by any space, dash, underbar, or empty.
(may be going too far on leniency)
p4raw-id: //depot/perl@12264
-rw-r--r-- | lib/unicore/Blocks.pl | 198 | ||||
-rw-r--r-- | lib/unicore/In.pl | 552 | ||||
-rw-r--r-- | lib/unicore/Scripts.pl | 880 | ||||
-rwxr-xr-x | lib/unicore/mktables.PL | 88 | ||||
-rw-r--r-- | lib/utf8_heavy.pl | 17 | ||||
-rw-r--r-- | pod/perlunicode.pod | 7 | ||||
-rwxr-xr-x | t/op/pat.t | 14 |
7 files changed, 1045 insertions, 711 deletions
diff --git a/lib/unicore/Blocks.pl b/lib/unicore/Blocks.pl index e45026a996..83c275709e 100644 --- a/lib/unicore/Blocks.pl +++ b/lib/unicore/Blocks.pl @@ -2,103 +2,103 @@ # This file is built by mktables.PL from e.g. Unicode.txt. # Any changes made here will be lost! return <<'END'; -0000 007F Basic Latin # BasicLatin In/40.pl -0080 00FF Latin-1 Supplement # Latin1Supplement In/41.pl -0100 017F Latin Extended-A # LatinExtendedA In/42.pl -0180 024F Latin Extended-B # LatinExtendedB In/43.pl -0250 02AF IPA Extensions # IPAExtensions In/44.pl -02B0 02FF Spacing Modifier Letters # SpacingModifierLetters In/45.pl -0300 036F Combining Diacritical Marks # CombiningDiacriticalMarks In/46.pl -0370 03FF Greek # GreekBlock In/47.pl -0400 04FF Cyrillic # CyrillicBlock In/48.pl -0530 058F Armenian # ArmenianBlock In/49.pl -0590 05FF Hebrew # HebrewBlock In/50.pl -0600 06FF Arabic # ArabicBlock In/51.pl -0700 074F Syriac # SyriacBlock In/52.pl -0780 07BF Thaana # ThaanaBlock In/53.pl -0900 097F Devanagari # DevanagariBlock In/54.pl -0980 09FF Bengali # BengaliBlock In/55.pl -0A00 0A7F Gurmukhi # GurmukhiBlock In/56.pl -0A80 0AFF Gujarati # GujaratiBlock In/57.pl -0B00 0B7F Oriya # OriyaBlock In/58.pl -0B80 0BFF Tamil # TamilBlock In/59.pl -0C00 0C7F Telugu # TeluguBlock In/60.pl -0C80 0CFF Kannada # KannadaBlock In/61.pl -0D00 0D7F Malayalam # MalayalamBlock In/62.pl -0D80 0DFF Sinhala # SinhalaBlock In/63.pl -0E00 0E7F Thai # ThaiBlock In/64.pl -0E80 0EFF Lao # LaoBlock In/65.pl -0F00 0FFF Tibetan # TibetanBlock In/66.pl -1000 109F Myanmar # MyanmarBlock In/67.pl -10A0 10FF Georgian # GeorgianBlock In/68.pl -1100 11FF Hangul Jamo # HangulJamo In/69.pl -1200 137F Ethiopic # EthiopicBlock In/70.pl -13A0 13FF Cherokee # CherokeeBlock In/71.pl -1400 167F Unified Canadian Aboriginal Syllabics # UnifiedCanadianAboriginalSyllabics In/72.pl -1680 169F Ogham # OghamBlock In/73.pl -16A0 16FF Runic # RunicBlock In/74.pl -1780 17FF Khmer # KhmerBlock In/75.pl -1800 18AF Mongolian # MongolianBlock In/76.pl -1E00 1EFF Latin Extended Additional # LatinExtendedAdditional In/77.pl -1F00 1FFF Greek Extended # GreekExtended In/78.pl -2000 206F General Punctuation # GeneralPunctuation In/79.pl -2070 209F Superscripts and Subscripts # SuperscriptsandSubscripts In/80.pl -20A0 20CF Currency Symbols # CurrencySymbols In/81.pl -20D0 20FF Combining Marks for Symbols # CombiningMarksforSymbols In/82.pl -2100 214F Letterlike Symbols # LetterlikeSymbols In/83.pl -2150 218F Number Forms # NumberForms In/84.pl -2190 21FF Arrows # Arrows In/85.pl -2200 22FF Mathematical Operators # MathematicalOperators In/86.pl -2300 23FF Miscellaneous Technical # MiscellaneousTechnical In/87.pl -2400 243F Control Pictures # ControlPictures In/88.pl -2440 245F Optical Character Recognition # OpticalCharacterRecognition In/89.pl -2460 24FF Enclosed Alphanumerics # EnclosedAlphanumerics In/90.pl -2500 257F Box Drawing # BoxDrawing In/91.pl -2580 259F Block Elements # BlockElements In/92.pl -25A0 25FF Geometric Shapes # GeometricShapes In/93.pl -2600 26FF Miscellaneous Symbols # MiscellaneousSymbols In/94.pl -2700 27BF Dingbats # Dingbats In/95.pl -2800 28FF Braille Patterns # BraillePatterns In/96.pl -2E80 2EFF CJK Radicals Supplement # CJKRadicalsSupplement In/97.pl -2F00 2FDF Kangxi Radicals # KangxiRadicals In/98.pl -2FF0 2FFF Ideographic Description Characters # IdeographicDescriptionCharacters In/99.pl -3000 303F CJK Symbols and Punctuation # CJKSymbolsandPunctuation In/100.pl -3040 309F Hiragana # HiraganaBlock In/101.pl -30A0 30FF Katakana # KatakanaBlock In/102.pl -3100 312F Bopomofo # BopomofoBlock In/103.pl -3130 318F Hangul Compatibility Jamo # HangulCompatibilityJamo In/104.pl -3190 319F Kanbun # Kanbun In/105.pl -31A0 31BF Bopomofo Extended # BopomofoExtended In/106.pl -3200 32FF Enclosed CJK Letters and Months # EnclosedCJKLettersandMonths In/107.pl -3300 33FF CJK Compatibility # CJKCompatibility In/108.pl -3400 4DB5 CJK Unified Ideographs Extension A # CJKUnifiedIdeographsExtensionA In/109.pl -4E00 9FFF CJK Unified Ideographs # CJKUnifiedIdeographs In/110.pl -A000 A48F Yi Syllables # YiSyllables In/111.pl -A490 A4CF Yi Radicals # YiRadicals In/112.pl -AC00 D7A3 Hangul Syllables # HangulSyllables In/113.pl -D800 DB7F High Surrogates # HighSurrogates In/114.pl -DB80 DBFF High Private Use Surrogates # HighPrivateUseSurrogates In/115.pl -DC00 DFFF Low Surrogates # LowSurrogates In/116.pl -E000 F8FF Private Use # PrivateUse In/117.pl -F900 FAFF CJK Compatibility Ideographs # CJKCompatibilityIdeographs In/118.pl -FB00 FB4F Alphabetic Presentation Forms # AlphabeticPresentationForms In/119.pl -FB50 FDFF Arabic Presentation Forms-A # ArabicPresentationFormsA In/120.pl -FE20 FE2F Combining Half Marks # CombiningHalfMarks In/121.pl -FE30 FE4F CJK Compatibility Forms # CJKCompatibilityForms In/122.pl -FE50 FE6F Small Form Variants # SmallFormVariants In/123.pl -FE70 FEFE Arabic Presentation Forms-B # ArabicPresentationFormsB In/124.pl -FEFF FEFF Specials # Specials In/125.pl -FF00 FFEF Halfwidth and Fullwidth Forms # HalfwidthandFullwidthForms In/126.pl -FFF0 FFFD Specials # Specials In/125.pl -10300 1032F Old Italic # OldItalicBlock In/127.pl -10330 1034F Gothic # GothicBlock In/128.pl -10400 1044F Deseret # DeseretBlock In/129.pl -1D000 1D0FF Byzantine Musical Symbols # ByzantineMusicalSymbols In/130.pl -1D100 1D1FF Musical Symbols # MusicalSymbols In/131.pl -1D400 1D7FF Mathematical Alphanumeric Symbols # MathematicalAlphanumericSymbols In/132.pl -20000 2A6D6 CJK Unified Ideographs Extension B # CJKUnifiedIdeographsExtensionB In/133.pl -2F800 2FA1F CJK Compatibility Ideographs Supplement # CJKCompatibilityIdeographsSupplement In/134.pl -E0000 E007F Tags # Tags In/135.pl -F0000 FFFFD Private Use # PrivateUse In/117.pl -100000 10FFFD Private Use # PrivateUse In/117.pl +0000 007F Basic Latin # In/40.pl +0080 00FF Latin-1 Supplement # In/41.pl +0100 017F Latin Extended-A # In/42.pl +0180 024F Latin Extended-B # In/43.pl +0250 02AF IPA Extensions # In/44.pl +02B0 02FF Spacing Modifier Letters # In/45.pl +0300 036F Combining Diacritical Marks # In/46.pl +0370 03FF Greek # In/47.pl +0400 04FF Cyrillic # In/48.pl +0530 058F Armenian # In/49.pl +0590 05FF Hebrew # In/50.pl +0600 06FF Arabic # In/51.pl +0700 074F Syriac # In/52.pl +0780 07BF Thaana # In/53.pl +0900 097F Devanagari # In/54.pl +0980 09FF Bengali # In/55.pl +0A00 0A7F Gurmukhi # In/56.pl +0A80 0AFF Gujarati # In/57.pl +0B00 0B7F Oriya # In/58.pl +0B80 0BFF Tamil # In/59.pl +0C00 0C7F Telugu # In/60.pl +0C80 0CFF Kannada # In/61.pl +0D00 0D7F Malayalam # In/62.pl +0D80 0DFF Sinhala # In/63.pl +0E00 0E7F Thai # In/64.pl +0E80 0EFF Lao # In/65.pl +0F00 0FFF Tibetan # In/66.pl +1000 109F Myanmar # In/67.pl +10A0 10FF Georgian # In/68.pl +1100 11FF Hangul Jamo # In/69.pl +1200 137F Ethiopic # In/70.pl +13A0 13FF Cherokee # In/71.pl +1400 167F Unified Canadian Aboriginal Syllabics # In/72.pl +1680 169F Ogham # In/73.pl +16A0 16FF Runic # In/74.pl +1780 17FF Khmer # In/75.pl +1800 18AF Mongolian # In/76.pl +1E00 1EFF Latin Extended Additional # In/77.pl +1F00 1FFF Greek Extended # In/78.pl +2000 206F General Punctuation # In/79.pl +2070 209F Superscripts and Subscripts # In/80.pl +20A0 20CF Currency Symbols # In/81.pl +20D0 20FF Combining Marks for Symbols # In/82.pl +2100 214F Letterlike Symbols # In/83.pl +2150 218F Number Forms # In/84.pl +2190 21FF Arrows # In/85.pl +2200 22FF Mathematical Operators # In/86.pl +2300 23FF Miscellaneous Technical # In/87.pl +2400 243F Control Pictures # In/88.pl +2440 245F Optical Character Recognition # In/89.pl +2460 24FF Enclosed Alphanumerics # In/90.pl +2500 257F Box Drawing # In/91.pl +2580 259F Block Elements # In/92.pl +25A0 25FF Geometric Shapes # In/93.pl +2600 26FF Miscellaneous Symbols # In/94.pl +2700 27BF Dingbats # In/95.pl +2800 28FF Braille Patterns # In/96.pl +2E80 2EFF CJK Radicals Supplement # In/97.pl +2F00 2FDF Kangxi Radicals # In/98.pl +2FF0 2FFF Ideographic Description Characters # In/99.pl +3000 303F CJK Symbols and Punctuation # In/100.pl +3040 309F Hiragana # In/101.pl +30A0 30FF Katakana # In/102.pl +3100 312F Bopomofo # In/103.pl +3130 318F Hangul Compatibility Jamo # In/104.pl +3190 319F Kanbun # In/105.pl +31A0 31BF Bopomofo Extended # In/106.pl +3200 32FF Enclosed CJK Letters and Months # In/107.pl +3300 33FF CJK Compatibility # In/108.pl +3400 4DB5 CJK Unified Ideographs Extension A # In/109.pl +4E00 9FFF CJK Unified Ideographs # In/110.pl +A000 A48F Yi Syllables # In/111.pl +A490 A4CF Yi Radicals # In/112.pl +AC00 D7A3 Hangul Syllables # In/113.pl +D800 DB7F High Surrogates # In/114.pl +DB80 DBFF High Private Use Surrogates # In/115.pl +DC00 DFFF Low Surrogates # In/116.pl +E000 F8FF Private Use # In/117.pl +F900 FAFF CJK Compatibility Ideographs # In/118.pl +FB00 FB4F Alphabetic Presentation Forms # In/119.pl +FB50 FDFF Arabic Presentation Forms-A # In/120.pl +FE20 FE2F Combining Half Marks # In/121.pl +FE30 FE4F CJK Compatibility Forms # In/122.pl +FE50 FE6F Small Form Variants # In/123.pl +FE70 FEFE Arabic Presentation Forms-B # In/124.pl +FEFF FEFF Specials # In/125.pl +FF00 FFEF Halfwidth and Fullwidth Forms # In/126.pl +FFF0 FFFD Specials # In/125.pl +10300 1032F Old Italic # In/127.pl +10330 1034F Gothic # In/128.pl +10400 1044F Deseret # In/129.pl +1D000 1D0FF Byzantine Musical Symbols # In/130.pl +1D100 1D1FF Musical Symbols # In/131.pl +1D400 1D7FF Mathematical Alphanumeric Symbols # In/132.pl +20000 2A6D6 CJK Unified Ideographs Extension B # In/133.pl +2F800 2FA1F CJK Compatibility Ideographs Supplement # In/134.pl +E0000 E007F Tags # In/135.pl +F0000 FFFFD Private Use # In/117.pl +100000 10FFFD Private Use # In/117.pl END diff --git a/lib/unicore/In.pl b/lib/unicore/In.pl index a6c24199a7..c11445c26b 100644 --- a/lib/unicore/In.pl +++ b/lib/unicore/In.pl @@ -2,140 +2,420 @@ # This file is built by mktables.PL from e.g. Unicode.txt. # Any changes made here will be lost! %utf8::In = ( -'Latin' => 0, -'Greek' => 1, -'Cyrillic' => 2, -'Armenian' => 3, -'Hebrew' => 4, -'Arabic' => 5, -'Syriac' => 6, -'Thaana' => 7, -'Devanagari' => 8, -'Bengali' => 9, -'Gurmukhi' => 10, -'Gujarati' => 11, -'Oriya' => 12, -'Tamil' => 13, -'Telugu' => 14, -'Kannada' => 15, -'Malayalam' => 16, -'Sinhala' => 17, -'Thai' => 18, -'Lao' => 19, -'Tibetan' => 20, -'Myanmar' => 21, -'Georgian' => 22, -'Hangul' => 23, -'Ethiopic' => 24, -'Cherokee' => 25, -'CanadianAboriginal' => 26, -'Ogham' => 27, -'Runic' => 28, -'Khmer' => 29, -'Mongolian' => 30, -'Hiragana' => 31, -'Katakana' => 32, -'Bopomofo' => 33, -'Han' => 34, -'Yi' => 35, -'OldItalic' => 36, -'Gothic' => 37, -'Deseret' => 38, -'Inherited' => 39, -'BasicLatin' => 40, -'Latin1Supplement' => 41, -'LatinExtendedA' => 42, -'LatinExtendedB' => 43, -'IPAExtensions' => 44, -'SpacingModifierLetters' => 45, -'CombiningDiacriticalMarks' => 46, -'GreekBlock' => 47, -'CyrillicBlock' => 48, -'ArmenianBlock' => 49, -'HebrewBlock' => 50, -'ArabicBlock' => 51, -'SyriacBlock' => 52, -'ThaanaBlock' => 53, -'DevanagariBlock' => 54, -'BengaliBlock' => 55, -'GurmukhiBlock' => 56, -'GujaratiBlock' => 57, -'OriyaBlock' => 58, -'TamilBlock' => 59, -'TeluguBlock' => 60, -'KannadaBlock' => 61, -'MalayalamBlock' => 62, -'SinhalaBlock' => 63, -'ThaiBlock' => 64, -'LaoBlock' => 65, -'TibetanBlock' => 66, -'MyanmarBlock' => 67, -'GeorgianBlock' => 68, -'HangulJamo' => 69, -'EthiopicBlock' => 70, -'CherokeeBlock' => 71, -'UnifiedCanadianAboriginalSyllabics' => 72, -'OghamBlock' => 73, -'RunicBlock' => 74, -'KhmerBlock' => 75, -'MongolianBlock' => 76, -'LatinExtendedAdditional' => 77, -'GreekExtended' => 78, -'GeneralPunctuation' => 79, -'SuperscriptsandSubscripts' => 80, -'CurrencySymbols' => 81, -'CombiningMarksforSymbols' => 82, -'LetterlikeSymbols' => 83, -'NumberForms' => 84, -'Arrows' => 85, -'MathematicalOperators' => 86, -'MiscellaneousTechnical' => 87, -'ControlPictures' => 88, -'OpticalCharacterRecognition' => 89, -'EnclosedAlphanumerics' => 90, -'BoxDrawing' => 91, -'BlockElements' => 92, -'GeometricShapes' => 93, -'MiscellaneousSymbols' => 94, -'Dingbats' => 95, -'BraillePatterns' => 96, -'CJKRadicalsSupplement' => 97, -'KangxiRadicals' => 98, -'IdeographicDescriptionCharacters' => 99, -'CJKSymbolsandPunctuation' => 100, -'HiraganaBlock' => 101, -'KatakanaBlock' => 102, -'BopomofoBlock' => 103, -'HangulCompatibilityJamo' => 104, -'Kanbun' => 105, -'BopomofoExtended' => 106, -'EnclosedCJKLettersandMonths' => 107, -'CJKCompatibility' => 108, -'CJKUnifiedIdeographsExtensionA' => 109, -'CJKUnifiedIdeographs' => 110, -'YiSyllables' => 111, -'YiRadicals' => 112, -'HangulSyllables' => 113, -'HighSurrogates' => 114, -'HighPrivateUseSurrogates' => 115, -'LowSurrogates' => 116, -'PrivateUse' => 117, -'CJKCompatibilityIdeographs' => 118, -'AlphabeticPresentationForms' => 119, -'ArabicPresentationFormsA' => 120, -'CombiningHalfMarks' => 121, -'CJKCompatibilityForms' => 122, -'SmallFormVariants' => 123, -'ArabicPresentationFormsB' => 124, -'Specials' => 125, -'HalfwidthandFullwidthForms' => 126, -'OldItalicBlock' => 127, -'GothicBlock' => 128, -'DeseretBlock' => 129, -'ByzantineMusicalSymbols' => 130, -'MusicalSymbols' => 131, -'MathematicalAlphanumericSymbols' => 132, -'CJKUnifiedIdeographsExtensionB' => 133, -'CJKCompatibilityIdeographsSupplement' => 134, -'Tags' => 135, +'LATIN' => 0, +'GREEK' => 1, +'CYRILLIC' => 2, +'ARMENIAN' => 3, +'HEBREW' => 4, +'ARABIC' => 5, +'SYRIAC' => 6, +'THAANA' => 7, +'DEVANAGARI' => 8, +'BENGALI' => 9, +'GURMUKHI' => 10, +'GUJARATI' => 11, +'ORIYA' => 12, +'TAMIL' => 13, +'TELUGU' => 14, +'KANNADA' => 15, +'MALAYALAM' => 16, +'SINHALA' => 17, +'THAI' => 18, +'LAO' => 19, +'TIBETAN' => 20, +'MYANMAR' => 21, +'GEORGIAN' => 22, +'HANGUL' => 23, +'ETHIOPIC' => 24, +'CHEROKEE' => 25, +'CANADIAN-ABORIGINAL' => 26, +'OGHAM' => 27, +'RUNIC' => 28, +'KHMER' => 29, +'MONGOLIAN' => 30, +'HIRAGANA' => 31, +'KATAKANA' => 32, +'BOPOMOFO' => 33, +'HAN' => 34, +'YI' => 35, +'OLD-ITALIC' => 36, +'GOTHIC' => 37, +'DESERET' => 38, +'INHERITED' => 39, +'Basic Latin' => 40, +'Latin-1 Supplement' => 41, +'Latin Extended-A' => 42, +'Latin Extended-B' => 43, +'IPA Extensions' => 44, +'Spacing Modifier Letters' => 45, +'Combining Diacritical Marks' => 46, +'Greek Block' => 47, +'Cyrillic Block' => 48, +'Armenian Block' => 49, +'Hebrew Block' => 50, +'Arabic Block' => 51, +'Syriac Block' => 52, +'Thaana Block' => 53, +'Devanagari Block' => 54, +'Bengali Block' => 55, +'Gurmukhi Block' => 56, +'Gujarati Block' => 57, +'Oriya Block' => 58, +'Tamil Block' => 59, +'Telugu Block' => 60, +'Kannada Block' => 61, +'Malayalam Block' => 62, +'Sinhala Block' => 63, +'Thai Block' => 64, +'Lao Block' => 65, +'Tibetan Block' => 66, +'Myanmar Block' => 67, +'Georgian Block' => 68, +'Hangul Jamo' => 69, +'Ethiopic Block' => 70, +'Cherokee Block' => 71, +'Unified Canadian Aboriginal Syllabics' => 72, +'Ogham Block' => 73, +'Runic Block' => 74, +'Khmer Block' => 75, +'Mongolian Block' => 76, +'Latin Extended Additional' => 77, +'Greek Extended' => 78, +'General Punctuation' => 79, +'Superscripts and Subscripts' => 80, +'Currency Symbols' => 81, +'Combining Marks for Symbols' => 82, +'Letterlike Symbols' => 83, +'Number Forms' => 84, +'Arrows' => 85, +'Mathematical Operators' => 86, +'Miscellaneous Technical' => 87, +'Control Pictures' => 88, +'Optical Character Recognition' => 89, +'Enclosed Alphanumerics' => 90, +'Box Drawing' => 91, +'Block Elements' => 92, +'Geometric Shapes' => 93, +'Miscellaneous Symbols' => 94, +'Dingbats' => 95, +'Braille Patterns' => 96, +'CJK Radicals Supplement' => 97, +'Kangxi Radicals' => 98, +'Ideographic Description Characters' => 99, +'CJK Symbols and Punctuation' => 100, +'Hiragana Block' => 101, +'Katakana Block' => 102, +'Bopomofo Block' => 103, +'Hangul Compatibility Jamo' => 104, +'Kanbun' => 105, +'Bopomofo Extended' => 106, +'Enclosed CJK Letters and Months' => 107, +'CJK Compatibility' => 108, +'CJK Unified Ideographs Extension A' => 109, +'CJK Unified Ideographs' => 110, +'Yi Syllables' => 111, +'Yi Radicals' => 112, +'Hangul Syllables' => 113, +'High Surrogates' => 114, +'High Private Use Surrogates' => 115, +'Low Surrogates' => 116, +'Private Use' => 117, +'CJK Compatibility Ideographs' => 118, +'Alphabetic Presentation Forms' => 119, +'Arabic Presentation Forms-A' => 120, +'Combining Half Marks' => 121, +'CJK Compatibility Forms' => 122, +'Small Form Variants' => 123, +'Arabic Presentation Forms-B' => 124, +'Specials' => 125, +'Halfwidth and Fullwidth Forms' => 126, +'Old Italic' => 127, +'Gothic Block' => 128, +'Deseret Block' => 129, +'Byzantine Musical Symbols' => 130, +'Musical Symbols' => 131, +'Mathematical Alphanumeric Symbols' => 132, +'CJK Unified Ideographs Extension B' => 133, +'CJK Compatibility Ideographs Supplement' => 134, +'Tags' => 135, +); +%utf8::InPat = ( +'alp' => { + 'Alphabetic[- _]?Presentation[- _]?Forms' => 'Alphabetic Presentation Forms', +}, +'ara' => { + 'ARABIC' => 'ARABIC', + 'Arabic[- _]?Block' => 'Arabic Block', + 'Arabic[- _]?Presentation[- _]?Forms[- _]?A' => 'Arabic Presentation Forms-A', + 'Arabic[- _]?Presentation[- _]?Forms[- _]?B' => 'Arabic Presentation Forms-B', +}, +'arm' => { + 'ARMENIAN' => 'ARMENIAN', + 'Armenian[- _]?Block' => 'Armenian Block', +}, +'arr' => { + 'Arrows' => 'Arrows', +}, +'bas' => { + 'Basic[- _]?Latin' => 'Basic Latin', +}, +'ben' => { + 'BENGALI' => 'BENGALI', + 'Bengali[- _]?Block' => 'Bengali Block', +}, +'blo' => { + 'Block[- _]?Elements' => 'Block Elements', +}, +'bop' => { + 'BOPOMOFO' => 'BOPOMOFO', + 'Bopomofo[- _]?Block' => 'Bopomofo Block', + 'Bopomofo[- _]?Extended' => 'Bopomofo Extended', +}, +'box' => { + 'Box[- _]?Drawing' => 'Box Drawing', +}, +'bra' => { + 'Braille[- _]?Patterns' => 'Braille Patterns', +}, +'byz' => { + 'Byzantine[- _]?Musical[- _]?Symbols' => 'Byzantine Musical Symbols', +}, +'can' => { + 'CANADIAN[- _]?ABORIGINAL' => 'CANADIAN-ABORIGINAL', +}, +'che' => { + 'CHEROKEE' => 'CHEROKEE', + 'Cherokee[- _]?Block' => 'Cherokee Block', +}, +'cjk' => { + 'CJK[- _]?Radicals[- _]?Supplement' => 'CJK Radicals Supplement', + 'CJK[- _]?Symbols[- _]?and[- _]?Punctuation' => 'CJK Symbols and Punctuation', + 'CJK[- _]?Compatibility' => 'CJK Compatibility', + 'CJK[- _]?Unified[- _]?Ideographs[- _]?Extension[- _]?A' => 'CJK Unified Ideographs Extension A', + 'CJK[- _]?Unified[- _]?Ideographs' => 'CJK Unified Ideographs', + 'CJK[- _]?Compatibility[- _]?Ideographs' => 'CJK Compatibility Ideographs', + 'CJK[- _]?Compatibility[- _]?Forms' => 'CJK Compatibility Forms', + 'CJK[- _]?Unified[- _]?Ideographs[- _]?Extension[- _]?B' => 'CJK Unified Ideographs Extension B', + 'CJK[- _]?Compatibility[- _]?Ideographs[- _]?Supplement' => 'CJK Compatibility Ideographs Supplement', +}, +'com' => { + 'Combining[- _]?Diacritical[- _]?Marks' => 'Combining Diacritical Marks', + 'Combining[- _]?Marks[- _]?for[- _]?Symbols' => 'Combining Marks for Symbols', + 'Combining[- _]?Half[- _]?Marks' => 'Combining Half Marks', +}, +'con' => { + 'Control[- _]?Pictures' => 'Control Pictures', +}, +'cur' => { + 'Currency[- _]?Symbols' => 'Currency Symbols', +}, +'cyr' => { + 'CYRILLIC' => 'CYRILLIC', + 'Cyrillic[- _]?Block' => 'Cyrillic Block', +}, +'des' => { + 'DESERET' => 'DESERET', + 'Deseret[- _]?Block' => 'Deseret Block', +}, +'dev' => { + 'DEVANAGARI' => 'DEVANAGARI', + 'Devanagari[- _]?Block' => 'Devanagari Block', +}, +'din' => { + 'Dingbats' => 'Dingbats', +}, +'enc' => { + 'Enclosed[- _]?Alphanumerics' => 'Enclosed Alphanumerics', + 'Enclosed[- _]?CJK[- _]?Letters[- _]?and[- _]?Months' => 'Enclosed CJK Letters and Months', +}, +'eth' => { + 'ETHIOPIC' => 'ETHIOPIC', + 'Ethiopic[- _]?Block' => 'Ethiopic Block', +}, +'gen' => { + 'General[- _]?Punctuation' => 'General Punctuation', +}, +'geo' => { + 'GEORGIAN' => 'GEORGIAN', + 'Georgian[- _]?Block' => 'Georgian Block', + 'Geometric[- _]?Shapes' => 'Geometric Shapes', +}, +'got' => { + 'GOTHIC' => 'GOTHIC', + 'Gothic[- _]?Block' => 'Gothic Block', +}, +'gre' => { + 'GREEK' => 'GREEK', + 'Greek[- _]?Block' => 'Greek Block', + 'Greek[- _]?Extended' => 'Greek Extended', +}, +'guj' => { + 'GUJARATI' => 'GUJARATI', + 'Gujarati[- _]?Block' => 'Gujarati Block', +}, +'gur' => { + 'GURMUKHI' => 'GURMUKHI', + 'Gurmukhi[- _]?Block' => 'Gurmukhi Block', +}, +'hal' => { + 'Halfwidth[- _]?and[- _]?Fullwidth[- _]?Forms' => 'Halfwidth and Fullwidth Forms', +}, +'han' => { + 'HANGUL' => 'HANGUL', + 'HAN' => 'HAN', + 'Hangul[- _]?Jamo' => 'Hangul Jamo', + 'Hangul[- _]?Compatibility[- _]?Jamo' => 'Hangul Compatibility Jamo', + 'Hangul[- _]?Syllables' => 'Hangul Syllables', +}, +'heb' => { + 'HEBREW' => 'HEBREW', + 'Hebrew[- _]?Block' => 'Hebrew Block', +}, +'hig' => { + 'High[- _]?Surrogates' => 'High Surrogates', + 'High[- _]?Private[- _]?Use[- _]?Surrogates' => 'High Private Use Surrogates', +}, +'hir' => { + 'HIRAGANA' => 'HIRAGANA', + 'Hiragana[- _]?Block' => 'Hiragana Block', +}, +'ide' => { + 'Ideographic[- _]?Description[- _]?Characters' => 'Ideographic Description Characters', +}, +'inh' => { + 'INHERITED' => 'INHERITED', +}, +'ipa' => { + 'IPA[- _]?Extensions' => 'IPA Extensions', +}, +'kan' => { + 'KANNADA' => 'KANNADA', + 'Kannada[- _]?Block' => 'Kannada Block', + 'Kangxi[- _]?Radicals' => 'Kangxi Radicals', + 'Kanbun' => 'Kanbun', +}, +'kat' => { + 'KATAKANA' => 'KATAKANA', + 'Katakana[- _]?Block' => 'Katakana Block', +}, +'khm' => { + 'KHMER' => 'KHMER', + 'Khmer[- _]?Block' => 'Khmer Block', +}, +'lao' => { + 'LAO' => 'LAO', + 'Lao[- _]?Block' => 'Lao Block', +}, +'lat' => { + 'LATIN' => 'LATIN', + 'Latin[- _]?1[- _]?Supplement' => 'Latin-1 Supplement', + 'Latin[- _]?Extended[- _]?A' => 'Latin Extended-A', + 'Latin[- _]?Extended[- _]?B' => 'Latin Extended-B', + 'Latin[- _]?Extended[- _]?Additional' => 'Latin Extended Additional', +}, +'let' => { + 'Letterlike[- _]?Symbols' => 'Letterlike Symbols', +}, +'low' => { + 'Low[- _]?Surrogates' => 'Low Surrogates', +}, +'mal' => { + 'MALAYALAM' => 'MALAYALAM', + 'Malayalam[- _]?Block' => 'Malayalam Block', +}, +'mat' => { + 'Mathematical[- _]?Operators' => 'Mathematical Operators', + 'Mathematical[- _]?Alphanumeric[- _]?Symbols' => 'Mathematical Alphanumeric Symbols', +}, +'mis' => { + 'Miscellaneous[- _]?Technical' => 'Miscellaneous Technical', + 'Miscellaneous[- _]?Symbols' => 'Miscellaneous Symbols', +}, +'mon' => { + 'MONGOLIAN' => 'MONGOLIAN', + 'Mongolian[- _]?Block' => 'Mongolian Block', +}, +'mus' => { + 'Musical[- _]?Symbols' => 'Musical Symbols', +}, +'mya' => { + 'MYANMAR' => 'MYANMAR', + 'Myanmar[- _]?Block' => 'Myanmar Block', +}, +'num' => { + 'Number[- _]?Forms' => 'Number Forms', +}, +'ogh' => { + 'OGHAM' => 'OGHAM', + 'Ogham[- _]?Block' => 'Ogham Block', +}, +'old' => { + 'OLD[- _]?ITALIC' => 'OLD-ITALIC', + 'Old[- _]?Italic' => 'Old Italic', +}, +'opt' => { + 'Optical[- _]?Character[- _]?Recognition' => 'Optical Character Recognition', +}, +'ori' => { + 'ORIYA' => 'ORIYA', + 'Oriya[- _]?Block' => 'Oriya Block', +}, +'pri' => { + 'Private[- _]?Use' => 'Private Use', +}, +'run' => { + 'RUNIC' => 'RUNIC', + 'Runic[- _]?Block' => 'Runic Block', +}, +'sin' => { + 'SINHALA' => 'SINHALA', + 'Sinhala[- _]?Block' => 'Sinhala Block', +}, +'sma' => { + 'Small[- _]?Form[- _]?Variants' => 'Small Form Variants', +}, +'spa' => { + 'Spacing[- _]?Modifier[- _]?Letters' => 'Spacing Modifier Letters', +}, +'spe' => { + 'Specials' => 'Specials', +}, +'sup' => { + 'Superscripts[- _]?and[- _]?Subscripts' => 'Superscripts and Subscripts', +}, +'syr' => { + 'SYRIAC' => 'SYRIAC', + 'Syriac[- _]?Block' => 'Syriac Block', +}, +'tag' => { + 'Tags' => 'Tags', +}, +'tam' => { + 'TAMIL' => 'TAMIL', + 'Tamil[- _]?Block' => 'Tamil Block', +}, +'tel' => { + 'TELUGU' => 'TELUGU', + 'Telugu[- _]?Block' => 'Telugu Block', +}, +'tha' => { + 'THAANA' => 'THAANA', + 'THAI' => 'THAI', + 'Thaana[- _]?Block' => 'Thaana Block', + 'Thai[- _]?Block' => 'Thai Block', +}, +'tib' => { + 'TIBETAN' => 'TIBETAN', + 'Tibetan[- _]?Block' => 'Tibetan Block', +}, +'uni' => { + 'Unified[- _]?Canadian[- _]?Aboriginal[- _]?Syllabics' => 'Unified Canadian Aboriginal Syllabics', +}, +'yi' => { + 'YI' => 'YI', +}, +'yi ' => { + 'Yi[- _]?Syllables' => 'Yi Syllables', + 'Yi[- _]?Radicals' => 'Yi Radicals', +}, ); diff --git a/lib/unicore/Scripts.pl b/lib/unicore/Scripts.pl index ed0168e086..b924f3ab5f 100644 --- a/lib/unicore/Scripts.pl +++ b/lib/unicore/Scripts.pl @@ -2,444 +2,444 @@ # This file is built by mktables.PL from e.g. Unicode.txt. # Any changes made here will be lost! return <<'END'; -0041 005A LATIN # Latin In/0.pl -0061 007A LATIN # Latin In/0.pl -00AA LATIN # Latin In/0.pl -00BA LATIN # Latin In/0.pl -00C0 00D6 LATIN # Latin In/0.pl -00D8 00F6 LATIN # Latin In/0.pl -00F8 01BA LATIN # Latin In/0.pl -01BB LATIN # Latin In/0.pl -01BC 01BF LATIN # Latin In/0.pl -01C0 01C3 LATIN # Latin In/0.pl -01C4 021F LATIN # Latin In/0.pl -0222 0233 LATIN # Latin In/0.pl -0250 02AD LATIN # Latin In/0.pl -02B0 02B8 LATIN # Latin In/0.pl -02E0 02E4 LATIN # Latin In/0.pl -1E00 1E9B LATIN # Latin In/0.pl -1EA0 1EF9 LATIN # Latin In/0.pl -207F LATIN # Latin In/0.pl -212A 212B LATIN # Latin In/0.pl -FB00 FB06 LATIN # Latin In/0.pl -FF21 FF3A LATIN # Latin In/0.pl -FF41 FF5A LATIN # Latin In/0.pl -00B5 GREEK # Greek In/1.pl -037A GREEK # Greek In/1.pl -0386 GREEK # Greek In/1.pl -0388 038A GREEK # Greek In/1.pl -038C GREEK # Greek In/1.pl -038E 03A1 GREEK # Greek In/1.pl -03A3 03CE GREEK # Greek In/1.pl -03D0 03D7 GREEK # Greek In/1.pl -03DA 03F5 GREEK # Greek In/1.pl -1F00 1F15 GREEK # Greek In/1.pl -1F18 1F1D GREEK # Greek In/1.pl -1F20 1F45 GREEK # Greek In/1.pl -1F48 1F4D GREEK # Greek In/1.pl -1F50 1F57 GREEK # Greek In/1.pl -1F59 GREEK # Greek In/1.pl -1F5B GREEK # Greek In/1.pl -1F5D GREEK # Greek In/1.pl -1F5F 1F7D GREEK # Greek In/1.pl -1F80 1FB4 GREEK # Greek In/1.pl -1FB6 1FBC GREEK # Greek In/1.pl -1FBE GREEK # Greek In/1.pl -1FC2 1FC4 GREEK # Greek In/1.pl -1FC6 1FCC GREEK # Greek In/1.pl -1FD0 1FD3 GREEK # Greek In/1.pl -1FD6 1FDB GREEK # Greek In/1.pl -1FE0 1FEC GREEK # Greek In/1.pl -1FF2 1FF4 GREEK # Greek In/1.pl -1FF6 1FFC GREEK # Greek In/1.pl -2126 GREEK # Greek In/1.pl -0400 0481 CYRILLIC # Cyrillic In/2.pl -0483 0486 CYRILLIC # Cyrillic In/2.pl -048C 04C4 CYRILLIC # Cyrillic In/2.pl -04C7 04C8 CYRILLIC # Cyrillic In/2.pl -04CB 04CC CYRILLIC # Cyrillic In/2.pl -04D0 04F5 CYRILLIC # Cyrillic In/2.pl -04F8 04F9 CYRILLIC # Cyrillic In/2.pl -0531 0556 ARMENIAN # Armenian In/3.pl -0559 ARMENIAN # Armenian In/3.pl -0561 0587 ARMENIAN # Armenian In/3.pl -FB13 FB17 ARMENIAN # Armenian In/3.pl -05D0 05EA HEBREW # Hebrew In/4.pl -05F0 05F2 HEBREW # Hebrew In/4.pl -FB1D HEBREW # Hebrew In/4.pl -FB1F FB28 HEBREW # Hebrew In/4.pl -FB2A FB36 HEBREW # Hebrew In/4.pl -FB38 FB3C HEBREW # Hebrew In/4.pl -FB3E HEBREW # Hebrew In/4.pl -FB40 FB41 HEBREW # Hebrew In/4.pl -FB43 FB44 HEBREW # Hebrew In/4.pl -FB46 FB4F HEBREW # Hebrew In/4.pl -0621 063A ARABIC # Arabic In/5.pl -0641 064A ARABIC # Arabic In/5.pl -0671 06D3 ARABIC # Arabic In/5.pl -06D5 ARABIC # Arabic In/5.pl -06E5 06E6 ARABIC # Arabic In/5.pl -06FA 06FC ARABIC # Arabic In/5.pl -FB50 FBB1 ARABIC # Arabic In/5.pl -FBD3 FD3D ARABIC # Arabic In/5.pl -FD50 FD8F ARABIC # Arabic In/5.pl -FD92 FDC7 ARABIC # Arabic In/5.pl -FDF0 FDFB ARABIC # Arabic In/5.pl -FE70 FE72 ARABIC # Arabic In/5.pl -FE74 ARABIC # Arabic In/5.pl -FE76 FEFC ARABIC # Arabic In/5.pl -0710 SYRIAC # Syriac In/6.pl -0711 SYRIAC # Syriac In/6.pl -0712 072C SYRIAC # Syriac In/6.pl -0730 074A SYRIAC # Syriac In/6.pl -0780 07A5 THAANA # Thaana In/7.pl -07A6 07B0 THAANA # Thaana In/7.pl -0901 0902 DEVANAGARI # Devanagari In/8.pl -0903 DEVANAGARI # Devanagari In/8.pl -0905 0939 DEVANAGARI # Devanagari In/8.pl -093C DEVANAGARI # Devanagari In/8.pl -093D DEVANAGARI # Devanagari In/8.pl -093E 0940 DEVANAGARI # Devanagari In/8.pl -0941 0948 DEVANAGARI # Devanagari In/8.pl -0949 094C DEVANAGARI # Devanagari In/8.pl -094D DEVANAGARI # Devanagari In/8.pl -0950 DEVANAGARI # Devanagari In/8.pl -0951 0954 DEVANAGARI # Devanagari In/8.pl -0958 0961 DEVANAGARI # Devanagari In/8.pl -0962 0963 DEVANAGARI # Devanagari In/8.pl -0966 096F DEVANAGARI # Devanagari In/8.pl -0981 BENGALI # Bengali In/9.pl -0985 098C BENGALI # Bengali In/9.pl -098F 0990 BENGALI # Bengali In/9.pl -0993 09A8 BENGALI # Bengali In/9.pl -09AA 09B0 BENGALI # Bengali In/9.pl -09B2 BENGALI # Bengali In/9.pl -09B6 09B9 BENGALI # Bengali In/9.pl -09BC BENGALI # Bengali In/9.pl -09BE 09C0 BENGALI # Bengali In/9.pl -09C1 09C4 BENGALI # Bengali In/9.pl -09C7 09C8 BENGALI # Bengali In/9.pl -09CB 09CC BENGALI # Bengali In/9.pl -09CD BENGALI # Bengali In/9.pl -09D7 BENGALI # Bengali In/9.pl -09DC 09DD BENGALI # Bengali In/9.pl -09DF 09E1 BENGALI # Bengali In/9.pl -09E2 09E3 BENGALI # Bengali In/9.pl -09E6 09EF BENGALI # Bengali In/9.pl -09F0 09F1 BENGALI # Bengali In/9.pl -0A02 GURMUKHI # Gurmukhi In/10.pl -0A05 0A0A GURMUKHI # Gurmukhi In/10.pl -0A0F 0A10 GURMUKHI # Gurmukhi In/10.pl -0A13 0A28 GURMUKHI # Gurmukhi In/10.pl -0A2A 0A30 GURMUKHI # Gurmukhi In/10.pl -0A32 0A33 GURMUKHI # Gurmukhi In/10.pl -0A35 0A36 GURMUKHI # Gurmukhi In/10.pl -0A38 0A39 GURMUKHI # Gurmukhi In/10.pl -0A3C GURMUKHI # Gurmukhi In/10.pl -0A3E 0A40 GURMUKHI # Gurmukhi In/10.pl -0A41 0A42 GURMUKHI # Gurmukhi In/10.pl -0A47 0A48 GURMUKHI # Gurmukhi In/10.pl -0A4B 0A4D GURMUKHI # Gurmukhi In/10.pl -0A59 0A5C GURMUKHI # Gurmukhi In/10.pl -0A5E GURMUKHI # Gurmukhi In/10.pl -0A66 0A6F GURMUKHI # Gurmukhi In/10.pl -0A70 0A71 GURMUKHI # Gurmukhi In/10.pl -0A72 0A74 GURMUKHI # Gurmukhi In/10.pl -0A81 0A82 GUJARATI # Gujarati In/11.pl -0A83 GUJARATI # Gujarati In/11.pl -0A85 0A8B GUJARATI # Gujarati In/11.pl -0A8D GUJARATI # Gujarati In/11.pl -0A8F 0A91 GUJARATI # Gujarati In/11.pl -0A93 0AA8 GUJARATI # Gujarati In/11.pl -0AAA 0AB0 GUJARATI # Gujarati In/11.pl -0AB2 0AB3 GUJARATI # Gujarati In/11.pl -0AB5 0AB9 GUJARATI # Gujarati In/11.pl -0ABC GUJARATI # Gujarati In/11.pl -0ABD GUJARATI # Gujarati In/11.pl -0ABE 0AC0 GUJARATI # Gujarati In/11.pl -0AC1 0AC5 GUJARATI # Gujarati In/11.pl -0AC7 0AC8 GUJARATI # Gujarati In/11.pl -0AC9 GUJARATI # Gujarati In/11.pl -0ACB 0ACC GUJARATI # Gujarati In/11.pl -0ACD GUJARATI # Gujarati In/11.pl -0AD0 GUJARATI # Gujarati In/11.pl -0AE0 GUJARATI # Gujarati In/11.pl -0AE6 0AEF GUJARATI # Gujarati In/11.pl -0B01 ORIYA # Oriya In/12.pl -0B02 0B03 ORIYA # Oriya In/12.pl -0B05 0B0C ORIYA # Oriya In/12.pl -0B0F 0B10 ORIYA # Oriya In/12.pl -0B13 0B28 ORIYA # Oriya In/12.pl -0B2A 0B30 ORIYA # Oriya In/12.pl -0B32 0B33 ORIYA # Oriya In/12.pl -0B36 0B39 ORIYA # Oriya In/12.pl -0B3C ORIYA # Oriya In/12.pl -0B3D ORIYA # Oriya In/12.pl -0B3E ORIYA # Oriya In/12.pl -0B3F ORIYA # Oriya In/12.pl -0B40 ORIYA # Oriya In/12.pl -0B41 0B43 ORIYA # Oriya In/12.pl -0B47 0B48 ORIYA # Oriya In/12.pl -0B4B 0B4C ORIYA # Oriya In/12.pl -0B4D ORIYA # Oriya In/12.pl -0B56 ORIYA # Oriya In/12.pl -0B57 ORIYA # Oriya In/12.pl -0B5C 0B5D ORIYA # Oriya In/12.pl -0B5F 0B61 ORIYA # Oriya In/12.pl -0B66 0B6F ORIYA # Oriya In/12.pl -0B82 TAMIL # Tamil In/13.pl -0B83 TAMIL # Tamil In/13.pl -0B85 0B8A TAMIL # Tamil In/13.pl -0B8E 0B90 TAMIL # Tamil In/13.pl -0B92 0B95 TAMIL # Tamil In/13.pl -0B99 0B9A TAMIL # Tamil In/13.pl -0B9C TAMIL # Tamil In/13.pl -0B9E 0B9F TAMIL # Tamil In/13.pl -0BA3 0BA4 TAMIL # Tamil In/13.pl -0BA8 0BAA TAMIL # Tamil In/13.pl -0BAE 0BB5 TAMIL # Tamil In/13.pl -0BB7 0BB9 TAMIL # Tamil In/13.pl -0BBE 0BBF TAMIL # Tamil In/13.pl -0BC0 TAMIL # Tamil In/13.pl -0BC1 0BC2 TAMIL # Tamil In/13.pl -0BC6 0BC8 TAMIL # Tamil In/13.pl -0BCA 0BCC TAMIL # Tamil In/13.pl -0BCD TAMIL # Tamil In/13.pl -0BD7 TAMIL # Tamil In/13.pl -0BE7 0BEF TAMIL # Tamil In/13.pl -0BF0 0BF2 TAMIL # Tamil In/13.pl -0C01 0C03 TELUGU # Telugu In/14.pl -0C05 0C0C TELUGU # Telugu In/14.pl -0C0E 0C10 TELUGU # Telugu In/14.pl -0C12 0C28 TELUGU # Telugu In/14.pl -0C2A 0C33 TELUGU # Telugu In/14.pl -0C35 0C39 TELUGU # Telugu In/14.pl -0C3E 0C40 TELUGU # Telugu In/14.pl -0C41 0C44 TELUGU # Telugu In/14.pl -0C46 0C48 TELUGU # Telugu In/14.pl -0C4A 0C4D TELUGU # Telugu In/14.pl -0C55 0C56 TELUGU # Telugu In/14.pl -0C60 0C61 TELUGU # Telugu In/14.pl -0C66 0C6F TELUGU # Telugu In/14.pl -0C82 0C83 KANNADA # Kannada In/15.pl -0C85 0C8C KANNADA # Kannada In/15.pl -0C8E 0C90 KANNADA # Kannada In/15.pl -0C92 0CA8 KANNADA # Kannada In/15.pl -0CAA 0CB3 KANNADA # Kannada In/15.pl -0CB5 0CB9 KANNADA # Kannada In/15.pl -0CBE KANNADA # Kannada In/15.pl -0CBF KANNADA # Kannada In/15.pl -0CC0 0CC4 KANNADA # Kannada In/15.pl -0CC6 KANNADA # Kannada In/15.pl -0CC7 0CC8 KANNADA # Kannada In/15.pl -0CCA 0CCB KANNADA # Kannada In/15.pl -0CCC 0CCD KANNADA # Kannada In/15.pl -0CD5 0CD6 KANNADA # Kannada In/15.pl -0CDE KANNADA # Kannada In/15.pl -0CE0 0CE1 KANNADA # Kannada In/15.pl -0CE6 0CEF KANNADA # Kannada In/15.pl -0D02 0D03 MALAYALAM # Malayalam In/16.pl -0D05 0D0C MALAYALAM # Malayalam In/16.pl -0D0E 0D10 MALAYALAM # Malayalam In/16.pl -0D12 0D28 MALAYALAM # Malayalam In/16.pl -0D2A 0D39 MALAYALAM # Malayalam In/16.pl -0D3E 0D40 MALAYALAM # Malayalam In/16.pl -0D41 0D43 MALAYALAM # Malayalam In/16.pl -0D46 0D48 MALAYALAM # Malayalam In/16.pl -0D4A 0D4C MALAYALAM # Malayalam In/16.pl -0D4D MALAYALAM # Malayalam In/16.pl -0D57 MALAYALAM # Malayalam In/16.pl -0D60 0D61 MALAYALAM # Malayalam In/16.pl -0D66 0D6F MALAYALAM # Malayalam In/16.pl -0D82 0D83 SINHALA # Sinhala In/17.pl -0D85 0D96 SINHALA # Sinhala In/17.pl -0D9A 0DB1 SINHALA # Sinhala In/17.pl -0DB3 0DBB SINHALA # Sinhala In/17.pl -0DBD SINHALA # Sinhala In/17.pl -0DC0 0DC6 SINHALA # Sinhala In/17.pl -0DCA SINHALA # Sinhala In/17.pl -0DCF 0DD1 SINHALA # Sinhala In/17.pl -0DD2 0DD4 SINHALA # Sinhala In/17.pl -0DD6 SINHALA # Sinhala In/17.pl -0DD8 0DDF SINHALA # Sinhala In/17.pl -0DF2 0DF3 SINHALA # Sinhala In/17.pl -0E01 0E30 THAI # Thai In/18.pl -0E31 THAI # Thai In/18.pl -0E32 0E33 THAI # Thai In/18.pl -0E34 0E3A THAI # Thai In/18.pl -0E40 0E45 THAI # Thai In/18.pl -0E46 THAI # Thai In/18.pl -0E47 0E4E THAI # Thai In/18.pl -0E50 0E59 THAI # Thai In/18.pl -0E81 0E82 LAO # Lao In/19.pl -0E84 LAO # Lao In/19.pl -0E87 0E88 LAO # Lao In/19.pl -0E8A LAO # Lao In/19.pl -0E8D LAO # Lao In/19.pl -0E94 0E97 LAO # Lao In/19.pl -0E99 0E9F LAO # Lao In/19.pl -0EA1 0EA3 LAO # Lao In/19.pl -0EA5 LAO # Lao In/19.pl -0EA7 LAO # Lao In/19.pl -0EAA 0EAB LAO # Lao In/19.pl -0EAD 0EB0 LAO # Lao In/19.pl -0EB1 LAO # Lao In/19.pl -0EB2 0EB3 LAO # Lao In/19.pl -0EB4 0EB9 LAO # Lao In/19.pl -0EBB 0EBC LAO # Lao In/19.pl -0EBD LAO # Lao In/19.pl -0EC0 0EC4 LAO # Lao In/19.pl -0EC6 LAO # Lao In/19.pl -0EC8 0ECD LAO # Lao In/19.pl -0ED0 0ED9 LAO # Lao In/19.pl -0EDC 0EDD LAO # Lao In/19.pl -0F00 TIBETAN # Tibetan In/20.pl -0F18 0F19 TIBETAN # Tibetan In/20.pl -0F20 0F29 TIBETAN # Tibetan In/20.pl -0F2A 0F33 TIBETAN # Tibetan In/20.pl -0F35 TIBETAN # Tibetan In/20.pl -0F37 TIBETAN # Tibetan In/20.pl -0F39 TIBETAN # Tibetan In/20.pl -0F40 0F47 TIBETAN # Tibetan In/20.pl -0F49 0F6A TIBETAN # Tibetan In/20.pl -0F71 0F7E TIBETAN # Tibetan In/20.pl -0F7F TIBETAN # Tibetan In/20.pl -0F80 0F84 TIBETAN # Tibetan In/20.pl -0F86 0F87 TIBETAN # Tibetan In/20.pl -0F88 0F8B TIBETAN # Tibetan In/20.pl -0F90 0F97 TIBETAN # Tibetan In/20.pl -0F99 0FBC TIBETAN # Tibetan In/20.pl -0FC6 TIBETAN # Tibetan In/20.pl -1000 1021 MYANMAR # Myanmar In/21.pl -1023 1027 MYANMAR # Myanmar In/21.pl -1029 102A MYANMAR # Myanmar In/21.pl -102C MYANMAR # Myanmar In/21.pl -102D 1030 MYANMAR # Myanmar In/21.pl -1031 MYANMAR # Myanmar In/21.pl -1032 MYANMAR # Myanmar In/21.pl -1036 1037 MYANMAR # Myanmar In/21.pl -1038 MYANMAR # Myanmar In/21.pl -1039 MYANMAR # Myanmar In/21.pl -1040 1049 MYANMAR # Myanmar In/21.pl -1050 1055 MYANMAR # Myanmar In/21.pl -1056 1057 MYANMAR # Myanmar In/21.pl -1058 1059 MYANMAR # Myanmar In/21.pl -10A0 10C5 GEORGIAN # Georgian In/22.pl -10D0 10F6 GEORGIAN # Georgian In/22.pl -1100 1159 HANGUL # Hangul In/23.pl -115F 11A2 HANGUL # Hangul In/23.pl -11A8 11F9 HANGUL # Hangul In/23.pl -3131 318E HANGUL # Hangul In/23.pl -AC00 D7A3 HANGUL # Hangul In/23.pl -FFA0 FFBE HANGUL # Hangul In/23.pl -FFC2 FFC7 HANGUL # Hangul In/23.pl -FFCA FFCF HANGUL # Hangul In/23.pl -FFD2 FFD7 HANGUL # Hangul In/23.pl -FFDA FFDC HANGUL # Hangul In/23.pl -1200 1206 ETHIOPIC # Ethiopic In/24.pl -1208 1246 ETHIOPIC # Ethiopic In/24.pl -1248 ETHIOPIC # Ethiopic In/24.pl -124A 124D ETHIOPIC # Ethiopic In/24.pl -1250 1256 ETHIOPIC # Ethiopic In/24.pl -1258 ETHIOPIC # Ethiopic In/24.pl -125A 125D ETHIOPIC # Ethiopic In/24.pl -1260 1286 ETHIOPIC # Ethiopic In/24.pl -1288 ETHIOPIC # Ethiopic In/24.pl -128A 128D ETHIOPIC # Ethiopic In/24.pl -1290 12AE ETHIOPIC # Ethiopic In/24.pl -12B0 ETHIOPIC # Ethiopic In/24.pl -12B2 12B5 ETHIOPIC # Ethiopic In/24.pl -12B8 12BE ETHIOPIC # Ethiopic In/24.pl -12C0 ETHIOPIC # Ethiopic In/24.pl -12C2 12C5 ETHIOPIC # Ethiopic In/24.pl -12C8 12CE ETHIOPIC # Ethiopic In/24.pl -12D0 12D6 ETHIOPIC # Ethiopic In/24.pl -12D8 12EE ETHIOPIC # Ethiopic In/24.pl -12F0 130E ETHIOPIC # Ethiopic In/24.pl -1310 ETHIOPIC # Ethiopic In/24.pl -1312 1315 ETHIOPIC # Ethiopic In/24.pl -1318 131E ETHIOPIC # Ethiopic In/24.pl -1320 1346 ETHIOPIC # Ethiopic In/24.pl -1348 135A ETHIOPIC # Ethiopic In/24.pl -1369 1371 ETHIOPIC # Ethiopic In/24.pl -1372 137C ETHIOPIC # Ethiopic In/24.pl -13A0 13F4 CHEROKEE # Cherokee In/25.pl -1401 166C CANADIAN-ABORIGINAL # CanadianAboriginal In/26.pl -166F 1676 CANADIAN-ABORIGINAL # CanadianAboriginal In/26.pl -1681 169A OGHAM # Ogham In/27.pl -16A0 16EA RUNIC # Runic In/28.pl -16EE 16F0 RUNIC # Runic In/28.pl -1780 17B3 KHMER # Khmer In/29.pl -17B4 17B6 KHMER # Khmer In/29.pl -17B7 17BD KHMER # Khmer In/29.pl -17BE 17C5 KHMER # Khmer In/29.pl -17C6 KHMER # Khmer In/29.pl -17C7 17C8 KHMER # Khmer In/29.pl -17C9 17D3 KHMER # Khmer In/29.pl -17E0 17E9 KHMER # Khmer In/29.pl -1810 1819 MONGOLIAN # Mongolian In/30.pl -1820 1842 MONGOLIAN # Mongolian In/30.pl -1843 MONGOLIAN # Mongolian In/30.pl -1844 1877 MONGOLIAN # Mongolian In/30.pl -1880 18A8 MONGOLIAN # Mongolian In/30.pl -18A9 MONGOLIAN # Mongolian In/30.pl -3041 3094 HIRAGANA # Hiragana In/31.pl -309D 309E HIRAGANA # Hiragana In/31.pl -30A1 30FA KATAKANA # Katakana In/32.pl -30FD 30FE KATAKANA # Katakana In/32.pl -FF66 FF6F KATAKANA # Katakana In/32.pl -FF71 FF9D KATAKANA # Katakana In/32.pl -3105 312C BOPOMOFO # Bopomofo In/33.pl -31A0 31B7 BOPOMOFO # Bopomofo In/33.pl -2E80 2E99 HAN # Han In/34.pl -2E9B 2EF3 HAN # Han In/34.pl -2F00 2FD5 HAN # Han In/34.pl -3005 HAN # Han In/34.pl -3007 HAN # Han In/34.pl -3021 3029 HAN # Han In/34.pl -3038 303A HAN # Han In/34.pl -3400 4DB5 HAN # Han In/34.pl -4E00 9FA5 HAN # Han In/34.pl -F900 FA2D HAN # Han In/34.pl -20000 2A6D6 HAN # Han In/34.pl -2F800 2FA1D HAN # Han In/34.pl -A000 A48C YI # Yi In/35.pl -A490 A4A1 YI # Yi In/35.pl -A4A4 A4B3 YI # Yi In/35.pl -A4B5 A4C0 YI # Yi In/35.pl -A4C2 A4C4 YI # Yi In/35.pl -A4C6 YI # Yi In/35.pl -10300 1031E OLD-ITALIC # OldItalic In/36.pl -10330 10349 GOTHIC # Gothic In/37.pl -1034A GOTHIC # Gothic In/37.pl -10400 10425 DESERET # Deseret In/38.pl -10428 1044D DESERET # Deseret In/38.pl -0300 034E INHERITED # Inherited In/39.pl -0360 0362 INHERITED # Inherited In/39.pl -0488 0489 INHERITED # Inherited In/39.pl -0591 05A1 INHERITED # Inherited In/39.pl -05A3 05B9 INHERITED # Inherited In/39.pl -05BB 05BD INHERITED # Inherited In/39.pl -05BF INHERITED # Inherited In/39.pl -05C1 05C2 INHERITED # Inherited In/39.pl -05C4 INHERITED # Inherited In/39.pl -064B 0655 INHERITED # Inherited In/39.pl -0670 INHERITED # Inherited In/39.pl -06D6 06DC INHERITED # Inherited In/39.pl -06DD 06DE INHERITED # Inherited In/39.pl -06DF 06E4 INHERITED # Inherited In/39.pl -06E7 06E8 INHERITED # Inherited In/39.pl -06EA 06ED INHERITED # Inherited In/39.pl -20D0 20DC INHERITED # Inherited In/39.pl -20DD 20E0 INHERITED # Inherited In/39.pl -20E1 INHERITED # Inherited In/39.pl -20E2 20E3 INHERITED # Inherited In/39.pl -302A 302F INHERITED # Inherited In/39.pl -3099 309A INHERITED # Inherited In/39.pl -FB1E INHERITED # Inherited In/39.pl -FE20 FE23 INHERITED # Inherited In/39.pl -1D167 1D169 INHERITED # Inherited In/39.pl -1D17B 1D182 INHERITED # Inherited In/39.pl -1D185 1D18B INHERITED # Inherited In/39.pl -1D1AA 1D1AD INHERITED # Inherited In/39.pl +0041 005A LATIN # In/0.pl +0061 007A LATIN # In/0.pl +00AA LATIN # In/0.pl +00BA LATIN # In/0.pl +00C0 00D6 LATIN # In/0.pl +00D8 00F6 LATIN # In/0.pl +00F8 01BA LATIN # In/0.pl +01BB LATIN # In/0.pl +01BC 01BF LATIN # In/0.pl +01C0 01C3 LATIN # In/0.pl +01C4 021F LATIN # In/0.pl +0222 0233 LATIN # In/0.pl +0250 02AD LATIN # In/0.pl +02B0 02B8 LATIN # In/0.pl +02E0 02E4 LATIN # In/0.pl +1E00 1E9B LATIN # In/0.pl +1EA0 1EF9 LATIN # In/0.pl +207F LATIN # In/0.pl +212A 212B LATIN # In/0.pl +FB00 FB06 LATIN # In/0.pl +FF21 FF3A LATIN # In/0.pl +FF41 FF5A LATIN # In/0.pl +00B5 GREEK # In/1.pl +037A GREEK # In/1.pl +0386 GREEK # In/1.pl +0388 038A GREEK # In/1.pl +038C GREEK # In/1.pl +038E 03A1 GREEK # In/1.pl +03A3 03CE GREEK # In/1.pl +03D0 03D7 GREEK # In/1.pl +03DA 03F5 GREEK # In/1.pl +1F00 1F15 GREEK # In/1.pl +1F18 1F1D GREEK # In/1.pl +1F20 1F45 GREEK # In/1.pl +1F48 1F4D GREEK # In/1.pl +1F50 1F57 GREEK # In/1.pl +1F59 GREEK # In/1.pl +1F5B GREEK # In/1.pl +1F5D GREEK # In/1.pl +1F5F 1F7D GREEK # In/1.pl +1F80 1FB4 GREEK # In/1.pl +1FB6 1FBC GREEK # In/1.pl +1FBE GREEK # In/1.pl +1FC2 1FC4 GREEK # In/1.pl +1FC6 1FCC GREEK # In/1.pl +1FD0 1FD3 GREEK # In/1.pl +1FD6 1FDB GREEK # In/1.pl +1FE0 1FEC GREEK # In/1.pl +1FF2 1FF4 GREEK # In/1.pl +1FF6 1FFC GREEK # In/1.pl +2126 GREEK # In/1.pl +0400 0481 CYRILLIC # In/2.pl +0483 0486 CYRILLIC # In/2.pl +048C 04C4 CYRILLIC # In/2.pl +04C7 04C8 CYRILLIC # In/2.pl +04CB 04CC CYRILLIC # In/2.pl +04D0 04F5 CYRILLIC # In/2.pl +04F8 04F9 CYRILLIC # In/2.pl +0531 0556 ARMENIAN # In/3.pl +0559 ARMENIAN # In/3.pl +0561 0587 ARMENIAN # In/3.pl +FB13 FB17 ARMENIAN # In/3.pl +05D0 05EA HEBREW # In/4.pl +05F0 05F2 HEBREW # In/4.pl +FB1D HEBREW # In/4.pl +FB1F FB28 HEBREW # In/4.pl +FB2A FB36 HEBREW # In/4.pl +FB38 FB3C HEBREW # In/4.pl +FB3E HEBREW # In/4.pl +FB40 FB41 HEBREW # In/4.pl +FB43 FB44 HEBREW # In/4.pl +FB46 FB4F HEBREW # In/4.pl +0621 063A ARABIC # In/5.pl +0641 064A ARABIC # In/5.pl +0671 06D3 ARABIC # In/5.pl +06D5 ARABIC # In/5.pl +06E5 06E6 ARABIC # In/5.pl +06FA 06FC ARABIC # In/5.pl +FB50 FBB1 ARABIC # In/5.pl +FBD3 FD3D ARABIC # In/5.pl +FD50 FD8F ARABIC # In/5.pl +FD92 FDC7 ARABIC # In/5.pl +FDF0 FDFB ARABIC # In/5.pl +FE70 FE72 ARABIC # In/5.pl +FE74 ARABIC # In/5.pl +FE76 FEFC ARABIC # In/5.pl +0710 SYRIAC # In/6.pl +0711 SYRIAC # In/6.pl +0712 072C SYRIAC # In/6.pl +0730 074A SYRIAC # In/6.pl +0780 07A5 THAANA # In/7.pl +07A6 07B0 THAANA # In/7.pl +0901 0902 DEVANAGARI # In/8.pl +0903 DEVANAGARI # In/8.pl +0905 0939 DEVANAGARI # In/8.pl +093C DEVANAGARI # In/8.pl +093D DEVANAGARI # In/8.pl +093E 0940 DEVANAGARI # In/8.pl +0941 0948 DEVANAGARI # In/8.pl +0949 094C DEVANAGARI # In/8.pl +094D DEVANAGARI # In/8.pl +0950 DEVANAGARI # In/8.pl +0951 0954 DEVANAGARI # In/8.pl +0958 0961 DEVANAGARI # In/8.pl +0962 0963 DEVANAGARI # In/8.pl +0966 096F DEVANAGARI # In/8.pl +0981 BENGALI # In/9.pl +0985 098C BENGALI # In/9.pl +098F 0990 BENGALI # In/9.pl +0993 09A8 BENGALI # In/9.pl +09AA 09B0 BENGALI # In/9.pl +09B2 BENGALI # In/9.pl +09B6 09B9 BENGALI # In/9.pl +09BC BENGALI # In/9.pl +09BE 09C0 BENGALI # In/9.pl +09C1 09C4 BENGALI # In/9.pl +09C7 09C8 BENGALI # In/9.pl +09CB 09CC BENGALI # In/9.pl +09CD BENGALI # In/9.pl +09D7 BENGALI # In/9.pl +09DC 09DD BENGALI # In/9.pl +09DF 09E1 BENGALI # In/9.pl +09E2 09E3 BENGALI # In/9.pl +09E6 09EF BENGALI # In/9.pl +09F0 09F1 BENGALI # In/9.pl +0A02 GURMUKHI # In/10.pl +0A05 0A0A GURMUKHI # In/10.pl +0A0F 0A10 GURMUKHI # In/10.pl +0A13 0A28 GURMUKHI # In/10.pl +0A2A 0A30 GURMUKHI # In/10.pl +0A32 0A33 GURMUKHI # In/10.pl +0A35 0A36 GURMUKHI # In/10.pl +0A38 0A39 GURMUKHI # In/10.pl +0A3C GURMUKHI # In/10.pl +0A3E 0A40 GURMUKHI # In/10.pl +0A41 0A42 GURMUKHI # In/10.pl +0A47 0A48 GURMUKHI # In/10.pl +0A4B 0A4D GURMUKHI # In/10.pl +0A59 0A5C GURMUKHI # In/10.pl +0A5E GURMUKHI # In/10.pl +0A66 0A6F GURMUKHI # In/10.pl +0A70 0A71 GURMUKHI # In/10.pl +0A72 0A74 GURMUKHI # In/10.pl +0A81 0A82 GUJARATI # In/11.pl +0A83 GUJARATI # In/11.pl +0A85 0A8B GUJARATI # In/11.pl +0A8D GUJARATI # In/11.pl +0A8F 0A91 GUJARATI # In/11.pl +0A93 0AA8 GUJARATI # In/11.pl +0AAA 0AB0 GUJARATI # In/11.pl +0AB2 0AB3 GUJARATI # In/11.pl +0AB5 0AB9 GUJARATI # In/11.pl +0ABC GUJARATI # In/11.pl +0ABD GUJARATI # In/11.pl +0ABE 0AC0 GUJARATI # In/11.pl +0AC1 0AC5 GUJARATI # In/11.pl +0AC7 0AC8 GUJARATI # In/11.pl +0AC9 GUJARATI # In/11.pl +0ACB 0ACC GUJARATI # In/11.pl +0ACD GUJARATI # In/11.pl +0AD0 GUJARATI # In/11.pl +0AE0 GUJARATI # In/11.pl +0AE6 0AEF GUJARATI # In/11.pl +0B01 ORIYA # In/12.pl +0B02 0B03 ORIYA # In/12.pl +0B05 0B0C ORIYA # In/12.pl +0B0F 0B10 ORIYA # In/12.pl +0B13 0B28 ORIYA # In/12.pl +0B2A 0B30 ORIYA # In/12.pl +0B32 0B33 ORIYA # In/12.pl +0B36 0B39 ORIYA # In/12.pl +0B3C ORIYA # In/12.pl +0B3D ORIYA # In/12.pl +0B3E ORIYA # In/12.pl +0B3F ORIYA # In/12.pl +0B40 ORIYA # In/12.pl +0B41 0B43 ORIYA # In/12.pl +0B47 0B48 ORIYA # In/12.pl +0B4B 0B4C ORIYA # In/12.pl +0B4D ORIYA # In/12.pl +0B56 ORIYA # In/12.pl +0B57 ORIYA # In/12.pl +0B5C 0B5D ORIYA # In/12.pl +0B5F 0B61 ORIYA # In/12.pl +0B66 0B6F ORIYA # In/12.pl +0B82 TAMIL # In/13.pl +0B83 TAMIL # In/13.pl +0B85 0B8A TAMIL # In/13.pl +0B8E 0B90 TAMIL # In/13.pl +0B92 0B95 TAMIL # In/13.pl +0B99 0B9A TAMIL # In/13.pl +0B9C TAMIL # In/13.pl +0B9E 0B9F TAMIL # In/13.pl +0BA3 0BA4 TAMIL # In/13.pl +0BA8 0BAA TAMIL # In/13.pl +0BAE 0BB5 TAMIL # In/13.pl +0BB7 0BB9 TAMIL # In/13.pl +0BBE 0BBF TAMIL # In/13.pl +0BC0 TAMIL # In/13.pl +0BC1 0BC2 TAMIL # In/13.pl +0BC6 0BC8 TAMIL # In/13.pl +0BCA 0BCC TAMIL # In/13.pl +0BCD TAMIL # In/13.pl +0BD7 TAMIL # In/13.pl +0BE7 0BEF TAMIL # In/13.pl +0BF0 0BF2 TAMIL # In/13.pl +0C01 0C03 TELUGU # In/14.pl +0C05 0C0C TELUGU # In/14.pl +0C0E 0C10 TELUGU # In/14.pl +0C12 0C28 TELUGU # In/14.pl +0C2A 0C33 TELUGU # In/14.pl +0C35 0C39 TELUGU # In/14.pl +0C3E 0C40 TELUGU # In/14.pl +0C41 0C44 TELUGU # In/14.pl +0C46 0C48 TELUGU # In/14.pl +0C4A 0C4D TELUGU # In/14.pl +0C55 0C56 TELUGU # In/14.pl +0C60 0C61 TELUGU # In/14.pl +0C66 0C6F TELUGU # In/14.pl +0C82 0C83 KANNADA # In/15.pl +0C85 0C8C KANNADA # In/15.pl +0C8E 0C90 KANNADA # In/15.pl +0C92 0CA8 KANNADA # In/15.pl +0CAA 0CB3 KANNADA # In/15.pl +0CB5 0CB9 KANNADA # In/15.pl +0CBE KANNADA # In/15.pl +0CBF KANNADA # In/15.pl +0CC0 0CC4 KANNADA # In/15.pl +0CC6 KANNADA # In/15.pl +0CC7 0CC8 KANNADA # In/15.pl +0CCA 0CCB KANNADA # In/15.pl +0CCC 0CCD KANNADA # In/15.pl +0CD5 0CD6 KANNADA # In/15.pl +0CDE KANNADA # In/15.pl +0CE0 0CE1 KANNADA # In/15.pl +0CE6 0CEF KANNADA # In/15.pl +0D02 0D03 MALAYALAM # In/16.pl +0D05 0D0C MALAYALAM # In/16.pl +0D0E 0D10 MALAYALAM # In/16.pl +0D12 0D28 MALAYALAM # In/16.pl +0D2A 0D39 MALAYALAM # In/16.pl +0D3E 0D40 MALAYALAM # In/16.pl +0D41 0D43 MALAYALAM # In/16.pl +0D46 0D48 MALAYALAM # In/16.pl +0D4A 0D4C MALAYALAM # In/16.pl +0D4D MALAYALAM # In/16.pl +0D57 MALAYALAM # In/16.pl +0D60 0D61 MALAYALAM # In/16.pl +0D66 0D6F MALAYALAM # In/16.pl +0D82 0D83 SINHALA # In/17.pl +0D85 0D96 SINHALA # In/17.pl +0D9A 0DB1 SINHALA # In/17.pl +0DB3 0DBB SINHALA # In/17.pl +0DBD SINHALA # In/17.pl +0DC0 0DC6 SINHALA # In/17.pl +0DCA SINHALA # In/17.pl +0DCF 0DD1 SINHALA # In/17.pl +0DD2 0DD4 SINHALA # In/17.pl +0DD6 SINHALA # In/17.pl +0DD8 0DDF SINHALA # In/17.pl +0DF2 0DF3 SINHALA # In/17.pl +0E01 0E30 THAI # In/18.pl +0E31 THAI # In/18.pl +0E32 0E33 THAI # In/18.pl +0E34 0E3A THAI # In/18.pl +0E40 0E45 THAI # In/18.pl +0E46 THAI # In/18.pl +0E47 0E4E THAI # In/18.pl +0E50 0E59 THAI # In/18.pl +0E81 0E82 LAO # In/19.pl +0E84 LAO # In/19.pl +0E87 0E88 LAO # In/19.pl +0E8A LAO # In/19.pl +0E8D LAO # In/19.pl +0E94 0E97 LAO # In/19.pl +0E99 0E9F LAO # In/19.pl +0EA1 0EA3 LAO # In/19.pl +0EA5 LAO # In/19.pl +0EA7 LAO # In/19.pl +0EAA 0EAB LAO # In/19.pl +0EAD 0EB0 LAO # In/19.pl +0EB1 LAO # In/19.pl +0EB2 0EB3 LAO # In/19.pl +0EB4 0EB9 LAO # In/19.pl +0EBB 0EBC LAO # In/19.pl +0EBD LAO # In/19.pl +0EC0 0EC4 LAO # In/19.pl +0EC6 LAO # In/19.pl +0EC8 0ECD LAO # In/19.pl +0ED0 0ED9 LAO # In/19.pl +0EDC 0EDD LAO # In/19.pl +0F00 TIBETAN # In/20.pl +0F18 0F19 TIBETAN # In/20.pl +0F20 0F29 TIBETAN # In/20.pl +0F2A 0F33 TIBETAN # In/20.pl +0F35 TIBETAN # In/20.pl +0F37 TIBETAN # In/20.pl +0F39 TIBETAN # In/20.pl +0F40 0F47 TIBETAN # In/20.pl +0F49 0F6A TIBETAN # In/20.pl +0F71 0F7E TIBETAN # In/20.pl +0F7F TIBETAN # In/20.pl +0F80 0F84 TIBETAN # In/20.pl +0F86 0F87 TIBETAN # In/20.pl +0F88 0F8B TIBETAN # In/20.pl +0F90 0F97 TIBETAN # In/20.pl +0F99 0FBC TIBETAN # In/20.pl +0FC6 TIBETAN # In/20.pl +1000 1021 MYANMAR # In/21.pl +1023 1027 MYANMAR # In/21.pl +1029 102A MYANMAR # In/21.pl +102C MYANMAR # In/21.pl +102D 1030 MYANMAR # In/21.pl +1031 MYANMAR # In/21.pl +1032 MYANMAR # In/21.pl +1036 1037 MYANMAR # In/21.pl +1038 MYANMAR # In/21.pl +1039 MYANMAR # In/21.pl +1040 1049 MYANMAR # In/21.pl +1050 1055 MYANMAR # In/21.pl +1056 1057 MYANMAR # In/21.pl +1058 1059 MYANMAR # In/21.pl +10A0 10C5 GEORGIAN # In/22.pl +10D0 10F6 GEORGIAN # In/22.pl +1100 1159 HANGUL # In/23.pl +115F 11A2 HANGUL # In/23.pl +11A8 11F9 HANGUL # In/23.pl +3131 318E HANGUL # In/23.pl +AC00 D7A3 HANGUL # In/23.pl +FFA0 FFBE HANGUL # In/23.pl +FFC2 FFC7 HANGUL # In/23.pl +FFCA FFCF HANGUL # In/23.pl +FFD2 FFD7 HANGUL # In/23.pl +FFDA FFDC HANGUL # In/23.pl +1200 1206 ETHIOPIC # In/24.pl +1208 1246 ETHIOPIC # In/24.pl +1248 ETHIOPIC # In/24.pl +124A 124D ETHIOPIC # In/24.pl +1250 1256 ETHIOPIC # In/24.pl +1258 ETHIOPIC # In/24.pl +125A 125D ETHIOPIC # In/24.pl +1260 1286 ETHIOPIC # In/24.pl +1288 ETHIOPIC # In/24.pl +128A 128D ETHIOPIC # In/24.pl +1290 12AE ETHIOPIC # In/24.pl +12B0 ETHIOPIC # In/24.pl +12B2 12B5 ETHIOPIC # In/24.pl +12B8 12BE ETHIOPIC # In/24.pl +12C0 ETHIOPIC # In/24.pl +12C2 12C5 ETHIOPIC # In/24.pl +12C8 12CE ETHIOPIC # In/24.pl +12D0 12D6 ETHIOPIC # In/24.pl +12D8 12EE ETHIOPIC # In/24.pl +12F0 130E ETHIOPIC # In/24.pl +1310 ETHIOPIC # In/24.pl +1312 1315 ETHIOPIC # In/24.pl +1318 131E ETHIOPIC # In/24.pl +1320 1346 ETHIOPIC # In/24.pl +1348 135A ETHIOPIC # In/24.pl +1369 1371 ETHIOPIC # In/24.pl +1372 137C ETHIOPIC # In/24.pl +13A0 13F4 CHEROKEE # In/25.pl +1401 166C CANADIAN-ABORIGINAL # In/26.pl +166F 1676 CANADIAN-ABORIGINAL # In/26.pl +1681 169A OGHAM # In/27.pl +16A0 16EA RUNIC # In/28.pl +16EE 16F0 RUNIC # In/28.pl +1780 17B3 KHMER # In/29.pl +17B4 17B6 KHMER # In/29.pl +17B7 17BD KHMER # In/29.pl +17BE 17C5 KHMER # In/29.pl +17C6 KHMER # In/29.pl +17C7 17C8 KHMER # In/29.pl +17C9 17D3 KHMER # In/29.pl +17E0 17E9 KHMER # In/29.pl +1810 1819 MONGOLIAN # In/30.pl +1820 1842 MONGOLIAN # In/30.pl +1843 MONGOLIAN # In/30.pl +1844 1877 MONGOLIAN # In/30.pl +1880 18A8 MONGOLIAN # In/30.pl +18A9 MONGOLIAN # In/30.pl +3041 3094 HIRAGANA # In/31.pl +309D 309E HIRAGANA # In/31.pl +30A1 30FA KATAKANA # In/32.pl +30FD 30FE KATAKANA # In/32.pl +FF66 FF6F KATAKANA # In/32.pl +FF71 FF9D KATAKANA # In/32.pl +3105 312C BOPOMOFO # In/33.pl +31A0 31B7 BOPOMOFO # In/33.pl +2E80 2E99 HAN # In/34.pl +2E9B 2EF3 HAN # In/34.pl +2F00 2FD5 HAN # In/34.pl +3005 HAN # In/34.pl +3007 HAN # In/34.pl +3021 3029 HAN # In/34.pl +3038 303A HAN # In/34.pl +3400 4DB5 HAN # In/34.pl +4E00 9FA5 HAN # In/34.pl +F900 FA2D HAN # In/34.pl +20000 2A6D6 HAN # In/34.pl +2F800 2FA1D HAN # In/34.pl +A000 A48C YI # In/35.pl +A490 A4A1 YI # In/35.pl +A4A4 A4B3 YI # In/35.pl +A4B5 A4C0 YI # In/35.pl +A4C2 A4C4 YI # In/35.pl +A4C6 YI # In/35.pl +10300 1031E OLD-ITALIC # In/36.pl +10330 10349 GOTHIC # In/37.pl +1034A GOTHIC # In/37.pl +10400 10425 DESERET # In/38.pl +10428 1044D DESERET # In/38.pl +0300 034E INHERITED # In/39.pl +0360 0362 INHERITED # In/39.pl +0488 0489 INHERITED # In/39.pl +0591 05A1 INHERITED # In/39.pl +05A3 05B9 INHERITED # In/39.pl +05BB 05BD INHERITED # In/39.pl +05BF INHERITED # In/39.pl +05C1 05C2 INHERITED # In/39.pl +05C4 INHERITED # In/39.pl +064B 0655 INHERITED # In/39.pl +0670 INHERITED # In/39.pl +06D6 06DC INHERITED # In/39.pl +06DD 06DE INHERITED # In/39.pl +06DF 06E4 INHERITED # In/39.pl +06E7 06E8 INHERITED # In/39.pl +06EA 06ED INHERITED # In/39.pl +20D0 20DC INHERITED # In/39.pl +20DD 20E0 INHERITED # In/39.pl +20E1 INHERITED # In/39.pl +20E2 20E3 INHERITED # In/39.pl +302A 302F INHERITED # In/39.pl +3099 309A INHERITED # In/39.pl +FB1E INHERITED # In/39.pl +FE20 FE23 INHERITED # In/39.pl +1D167 1D169 INHERITED # In/39.pl +1D17B 1D182 INHERITED # In/39.pl +1D185 1D18B INHERITED # In/39.pl +1D1AA 1D1AD INHERITED # In/39.pl END diff --git a/lib/unicore/mktables.PL b/lib/unicore/mktables.PL index f86ff696d1..642c66fc72 100755 --- a/lib/unicore/mktables.PL +++ b/lib/unicore/mktables.PL @@ -231,7 +231,8 @@ mkdir "To", 0755; # This is not written for speed... -my %InId; +my %InIdScript; +my %InIdBlock; my $InId = 0; foreach $file (@todo) { @@ -258,9 +259,6 @@ END close OUT; } -# Do Scripts before Blocks so that in case of naming conflicts -# the more natural one (Script) wins over the artificial one (Block). - print "Scripts\n"; open(UD, 'Scripts.txt') or die "Can't open Scripts.txt: $!\n"; open(OUT, ">Scripts.pl") or die "Can't create Scripts.pl: $!\n"; @@ -281,13 +279,11 @@ while (<UD>) { chomp; ($code, $last, $name) = /^([0-9a-f]+)(?:\.\.([0-9a-f]+))?\s+;\s+(.+)\s+\#/i; if ($name) { - my $InName = lc($name); - $InName =~ s/\b(\w)/uc($1)/ge; - $InName =~ s/\W+//g; + my $InName = $name; my $id; - unless (exists $InId{$InName}) { + unless (exists $InIdScript{$InName}) { print "\t$InName\n"; - $id = $Scripts{$InName} = $InId{$InName} = $InId++; + $id = $Scripts{$InName} = $InIdScript{$InName} = $InId++; open(SCRIPT, ">In/$id.pl") or die "create In/$id.pl: $!\n"; print SCRIPT <<EOH; # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! @@ -297,10 +293,10 @@ return <<'END'; EOH close(SCRIPT); } else { - $id = $InId{$InName}; + $id = $InIdScript{$InName}; } $last = "" unless defined $last; - print OUT "$code\t$last\t$name\t# $InName In/$id.pl\n"; + print OUT "$code\t$last\t$name\t# In/$id.pl\n"; open(SCRIPT, ">>In/$id.pl"); print SCRIPT <<END; $code $last @@ -309,7 +305,7 @@ END } } -for my $id (values %InId) { +for my $id (values %InIdScript) { open(SCRIPT, ">>In/$id.pl"); print SCRIPT <<END2; END @@ -339,22 +335,18 @@ while (<UD>) { next if /^#/; next if /^$/; chomp; - ($code, $last, $name) = /^([0-9a-f]+)\.\.([0-9a-f]+); (.+)/i; + ($code, $last, $name) = /^([0-9a-f]+)\.\.([0-9a-f]+); (.+?)\s*$/i; if ($name) { my $InName = $name; - $InName =~ s/\W+//g; print "\t$InName\n"; my $id; # TODO: only the first one of Private Use blocks qualifies - unless (exists $InId{$InName}) { - $InId{$InName} = $InId++; - } elsif (exists $Scripts{$InName}) { - $InName .= 'Block'; - $InId{$InName} = $InId++; + unless (exists $InIdBlock{$InName}) { + $InIdBlock{$InName} = $InId++; } - $id = $InId{$InName}; + $id = $InIdBlock{$InName}; open(BLOCK, ">In/$id.pl") or die "create In/$id.pl: $!\n"; - print OUT "$code\t$last\t$name\t# $InName In/$id.pl\n"; + print OUT "$code\t$last\t$name\t# In/$id.pl\n"; print BLOCK <<EOH; # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! # This file is built by $0 from e.g. $UnicodeData. @@ -381,9 +373,57 @@ print INID <<EOH; %utf8::In = ( EOH -# Order doesn't matter but let's prettyprint anyway. -foreach my $in (sort { $InId{$a} <=> $InId{$b} } keys %InId) { - printf INID "%-40s => %3d,\n", "'$in'", $InId{$in}; +my %InIdScriptById = reverse %InIdScript; +my %InIdBlockById = reverse %InIdBlock; + +my @InIdScriptById = sort { $a <=> $b } keys %InIdScriptById; +my @InIdBlockById = sort { $a <=> $b } keys %InIdBlockById; + +my %InId; +my %IdIdLcName; + +for my $id (@InIdScriptById) { + my $name = $InIdScriptById{$id}; + my $lcname = lc($name); + $InId{$name} = $id; + $IdIdLcName{$lcname} = $id; +} + +for my $id (@InIdBlockById) { + my $name = $InIdBlockById{$id}; + my $lcname = lc($name); + if (exists $IdIdLcName{$lcname}) { + $InId{"$name Block"} = $id; + } else { + $InId{$name} = $id; + } + $IdIdLcName{$lcname} = $id; +} + +my @InId = sort { $InId{$a} <=> $InId{$b} } keys %InId; + +my %InIdPrefix; + +foreach my $in (@InId) { + my $inpat = $in; + $inpat =~ s/([- ])/[- _]?/g; + push @{$InIdPrefix{lc(substr($in, 0, 3))}}, [ $in, $inpat ]; + printf INID "%-45s => %3d,\n", "'$in'", $InId{$in}; +} + +print INID ");\n"; + +print INID <<EOH; +%utf8::InPat = ( +EOH + +foreach my $prefix (sort keys %InIdPrefix) { + printf INID "'$prefix' => {\n"; + foreach my $ininpat (@{$InIdPrefix{$prefix}}) { + my ($in, $inpat) = @$ininpat; + printf INID "\t'$inpat' => '$in',\n"; + } + printf INID "},\n"; } print INID ");\n"; diff --git a/lib/utf8_heavy.pl b/lib/utf8_heavy.pl index a90e24ce71..e8cf0cc4ab 100644 --- a/lib/utf8_heavy.pl +++ b/lib/utf8_heavy.pl @@ -26,11 +26,20 @@ sub SWASHNEW { while (($caller = caller($i)) eq __PACKAGE__) { $i++ } my $encoding = $enc{$caller} || "unicore"; (my $file = $type) =~ s!::!/!g; - if ($file =~ /^In(.+)/) { + if ($file =~ /^In[- ]?(.+)/i) { my $In = $1; defined %utf8::In || do "$encoding/In.pl"; - if (exists $utf8::In{$In}) { - $file = "$encoding/In/$utf8::In{$In}"; + my $prefix = substr(lc($In), 0, 3); + if (exists $utf8::InPat{$prefix}) { + for my $k (keys %{$utf8::InPat{$prefix}}) { + if ($In =~ /^$k$/i) { + $In = $utf8::InPat{$prefix}->{$k}; + if (exists $utf8::In{$In}) { + $file = "$encoding/In/$utf8::In{$In}"; + last; + } + } + } } } else { $file =~ s#^(Is|To)([A-Z].*)#$1/$2#; @@ -43,7 +52,7 @@ sub SWASHNEW { || do "$file.pl" || do "$encoding/$file.pl" || do "$encoding/Is/${type}.pl" - || croak("Can't find $encoding character property \"$type\""); + || croak("Can't find Unicode character property \"$type\""); } $| = 1; diff --git a/pod/perlunicode.pod b/pod/perlunicode.pod index 63ad011546..f27173cded 100644 --- a/pod/perlunicode.pod +++ b/pod/perlunicode.pod @@ -169,9 +169,10 @@ character with the Unicode uppercase property, while C<\p{M}> matches any mark character. Single letter properties may omit the brackets, so that can be written C<\pM> also. Many predefined character classes are available, such as C<\p{IsMirrored}> and C<\p{InTibetan}>. The -names of the C<In> classes are the official Unicode script and block -names but with all non-alphanumeric characters removed, for example -the block name C<"Latin-1 Supplement"> becomes C<\p{InLatin1Supplement}>. +recommended names of the C<In> classes are the official Unicode script +and block names but with all non-alphanumeric characters removed, for +example the block name C<"Latin-1 Supplement"> becomes +C<\p{InLatin1Supplement}>. Here is the list as of Unicode 3.1.0 (the two-letter classes) and as defined by Perl (the one-letter classes) (in Unicode materials diff --git a/t/op/pat.t b/t/op/pat.t index 2042f398d5..f5a2eddced 100755 --- a/t/op/pat.t +++ b/t/op/pat.t @@ -6,7 +6,7 @@ $| = 1; -print "1..715\n"; +print "1..716\n"; BEGIN { chdir 't' if -d 't'; @@ -2121,9 +2121,13 @@ sub ok ($$) { } { - # high bit bug -- japhy - my $x = "ab\200d"; - $x =~ /.*?\200/ or print "not "; - print "ok 715\n"; + # high bit bug -- japhy + my $x = "ab\200d"; + $x =~ /.*?\200/ or print "not "; + print "ok 715\n"; } +{ + print "not " unless "\x80" =~ /\p{in-latin1_SUPPLEMENT}/; + print "ok 716\n"; +} |