diff options
author | Jarkko Hietaniemi <jhi@iki.fi> | 2001-09-29 04:57:42 +0000 |
---|---|---|
committer | Jarkko Hietaniemi <jhi@iki.fi> | 2001-09-29 04:57:42 +0000 |
commit | d9efae67d76cc4acd8980b711b5bebc7142b5319 (patch) | |
tree | 85511ac1926809c78e0399fa9cde4033552336cb /lib/unicore/In.pl | |
parent | e8c9ad1b2aea45573ad656f23dcb17204fe59851 (diff) | |
download | perl-d9efae67d76cc4acd8980b711b5bebc7142b5319.tar.gz |
Allow for more flexibility in the \p{In...} names, now
case doesn't matter, and any space or dash can be
matched by any space, dash, underbar, or empty.
(may be going too far on leniency)
p4raw-id: //depot/perl@12264
Diffstat (limited to 'lib/unicore/In.pl')
-rw-r--r-- | lib/unicore/In.pl | 552 |
1 files changed, 416 insertions, 136 deletions
diff --git a/lib/unicore/In.pl b/lib/unicore/In.pl index a6c24199a7..c11445c26b 100644 --- a/lib/unicore/In.pl +++ b/lib/unicore/In.pl @@ -2,140 +2,420 @@ # This file is built by mktables.PL from e.g. Unicode.txt. # Any changes made here will be lost! %utf8::In = ( -'Latin' => 0, -'Greek' => 1, -'Cyrillic' => 2, -'Armenian' => 3, -'Hebrew' => 4, -'Arabic' => 5, -'Syriac' => 6, -'Thaana' => 7, -'Devanagari' => 8, -'Bengali' => 9, -'Gurmukhi' => 10, -'Gujarati' => 11, -'Oriya' => 12, -'Tamil' => 13, -'Telugu' => 14, -'Kannada' => 15, -'Malayalam' => 16, -'Sinhala' => 17, -'Thai' => 18, -'Lao' => 19, -'Tibetan' => 20, -'Myanmar' => 21, -'Georgian' => 22, -'Hangul' => 23, -'Ethiopic' => 24, -'Cherokee' => 25, -'CanadianAboriginal' => 26, -'Ogham' => 27, -'Runic' => 28, -'Khmer' => 29, -'Mongolian' => 30, -'Hiragana' => 31, -'Katakana' => 32, -'Bopomofo' => 33, -'Han' => 34, -'Yi' => 35, -'OldItalic' => 36, -'Gothic' => 37, -'Deseret' => 38, -'Inherited' => 39, -'BasicLatin' => 40, -'Latin1Supplement' => 41, -'LatinExtendedA' => 42, -'LatinExtendedB' => 43, -'IPAExtensions' => 44, -'SpacingModifierLetters' => 45, -'CombiningDiacriticalMarks' => 46, -'GreekBlock' => 47, -'CyrillicBlock' => 48, -'ArmenianBlock' => 49, -'HebrewBlock' => 50, -'ArabicBlock' => 51, -'SyriacBlock' => 52, -'ThaanaBlock' => 53, -'DevanagariBlock' => 54, -'BengaliBlock' => 55, -'GurmukhiBlock' => 56, -'GujaratiBlock' => 57, -'OriyaBlock' => 58, -'TamilBlock' => 59, -'TeluguBlock' => 60, -'KannadaBlock' => 61, -'MalayalamBlock' => 62, -'SinhalaBlock' => 63, -'ThaiBlock' => 64, -'LaoBlock' => 65, -'TibetanBlock' => 66, -'MyanmarBlock' => 67, -'GeorgianBlock' => 68, -'HangulJamo' => 69, -'EthiopicBlock' => 70, -'CherokeeBlock' => 71, -'UnifiedCanadianAboriginalSyllabics' => 72, -'OghamBlock' => 73, -'RunicBlock' => 74, -'KhmerBlock' => 75, -'MongolianBlock' => 76, -'LatinExtendedAdditional' => 77, -'GreekExtended' => 78, -'GeneralPunctuation' => 79, -'SuperscriptsandSubscripts' => 80, -'CurrencySymbols' => 81, -'CombiningMarksforSymbols' => 82, -'LetterlikeSymbols' => 83, -'NumberForms' => 84, -'Arrows' => 85, -'MathematicalOperators' => 86, -'MiscellaneousTechnical' => 87, -'ControlPictures' => 88, -'OpticalCharacterRecognition' => 89, -'EnclosedAlphanumerics' => 90, -'BoxDrawing' => 91, -'BlockElements' => 92, -'GeometricShapes' => 93, -'MiscellaneousSymbols' => 94, -'Dingbats' => 95, -'BraillePatterns' => 96, -'CJKRadicalsSupplement' => 97, -'KangxiRadicals' => 98, -'IdeographicDescriptionCharacters' => 99, -'CJKSymbolsandPunctuation' => 100, -'HiraganaBlock' => 101, -'KatakanaBlock' => 102, -'BopomofoBlock' => 103, -'HangulCompatibilityJamo' => 104, -'Kanbun' => 105, -'BopomofoExtended' => 106, -'EnclosedCJKLettersandMonths' => 107, -'CJKCompatibility' => 108, -'CJKUnifiedIdeographsExtensionA' => 109, -'CJKUnifiedIdeographs' => 110, -'YiSyllables' => 111, -'YiRadicals' => 112, -'HangulSyllables' => 113, -'HighSurrogates' => 114, -'HighPrivateUseSurrogates' => 115, -'LowSurrogates' => 116, -'PrivateUse' => 117, -'CJKCompatibilityIdeographs' => 118, -'AlphabeticPresentationForms' => 119, -'ArabicPresentationFormsA' => 120, -'CombiningHalfMarks' => 121, -'CJKCompatibilityForms' => 122, -'SmallFormVariants' => 123, -'ArabicPresentationFormsB' => 124, -'Specials' => 125, -'HalfwidthandFullwidthForms' => 126, -'OldItalicBlock' => 127, -'GothicBlock' => 128, -'DeseretBlock' => 129, -'ByzantineMusicalSymbols' => 130, -'MusicalSymbols' => 131, -'MathematicalAlphanumericSymbols' => 132, -'CJKUnifiedIdeographsExtensionB' => 133, -'CJKCompatibilityIdeographsSupplement' => 134, -'Tags' => 135, +'LATIN' => 0, +'GREEK' => 1, +'CYRILLIC' => 2, +'ARMENIAN' => 3, +'HEBREW' => 4, +'ARABIC' => 5, +'SYRIAC' => 6, +'THAANA' => 7, +'DEVANAGARI' => 8, +'BENGALI' => 9, +'GURMUKHI' => 10, +'GUJARATI' => 11, +'ORIYA' => 12, +'TAMIL' => 13, +'TELUGU' => 14, +'KANNADA' => 15, +'MALAYALAM' => 16, +'SINHALA' => 17, +'THAI' => 18, +'LAO' => 19, +'TIBETAN' => 20, +'MYANMAR' => 21, +'GEORGIAN' => 22, +'HANGUL' => 23, +'ETHIOPIC' => 24, +'CHEROKEE' => 25, +'CANADIAN-ABORIGINAL' => 26, +'OGHAM' => 27, +'RUNIC' => 28, +'KHMER' => 29, +'MONGOLIAN' => 30, +'HIRAGANA' => 31, +'KATAKANA' => 32, +'BOPOMOFO' => 33, +'HAN' => 34, +'YI' => 35, +'OLD-ITALIC' => 36, +'GOTHIC' => 37, +'DESERET' => 38, +'INHERITED' => 39, +'Basic Latin' => 40, +'Latin-1 Supplement' => 41, +'Latin Extended-A' => 42, +'Latin Extended-B' => 43, +'IPA Extensions' => 44, +'Spacing Modifier Letters' => 45, +'Combining Diacritical Marks' => 46, +'Greek Block' => 47, +'Cyrillic Block' => 48, +'Armenian Block' => 49, +'Hebrew Block' => 50, +'Arabic Block' => 51, +'Syriac Block' => 52, +'Thaana Block' => 53, +'Devanagari Block' => 54, +'Bengali Block' => 55, +'Gurmukhi Block' => 56, +'Gujarati Block' => 57, +'Oriya Block' => 58, +'Tamil Block' => 59, +'Telugu Block' => 60, +'Kannada Block' => 61, +'Malayalam Block' => 62, +'Sinhala Block' => 63, +'Thai Block' => 64, +'Lao Block' => 65, +'Tibetan Block' => 66, +'Myanmar Block' => 67, +'Georgian Block' => 68, +'Hangul Jamo' => 69, +'Ethiopic Block' => 70, +'Cherokee Block' => 71, +'Unified Canadian Aboriginal Syllabics' => 72, +'Ogham Block' => 73, +'Runic Block' => 74, +'Khmer Block' => 75, +'Mongolian Block' => 76, +'Latin Extended Additional' => 77, +'Greek Extended' => 78, +'General Punctuation' => 79, +'Superscripts and Subscripts' => 80, +'Currency Symbols' => 81, +'Combining Marks for Symbols' => 82, +'Letterlike Symbols' => 83, +'Number Forms' => 84, +'Arrows' => 85, +'Mathematical Operators' => 86, +'Miscellaneous Technical' => 87, +'Control Pictures' => 88, +'Optical Character Recognition' => 89, +'Enclosed Alphanumerics' => 90, +'Box Drawing' => 91, +'Block Elements' => 92, +'Geometric Shapes' => 93, +'Miscellaneous Symbols' => 94, +'Dingbats' => 95, +'Braille Patterns' => 96, +'CJK Radicals Supplement' => 97, +'Kangxi Radicals' => 98, +'Ideographic Description Characters' => 99, +'CJK Symbols and Punctuation' => 100, +'Hiragana Block' => 101, +'Katakana Block' => 102, +'Bopomofo Block' => 103, +'Hangul Compatibility Jamo' => 104, +'Kanbun' => 105, +'Bopomofo Extended' => 106, +'Enclosed CJK Letters and Months' => 107, +'CJK Compatibility' => 108, +'CJK Unified Ideographs Extension A' => 109, +'CJK Unified Ideographs' => 110, +'Yi Syllables' => 111, +'Yi Radicals' => 112, +'Hangul Syllables' => 113, +'High Surrogates' => 114, +'High Private Use Surrogates' => 115, +'Low Surrogates' => 116, +'Private Use' => 117, +'CJK Compatibility Ideographs' => 118, +'Alphabetic Presentation Forms' => 119, +'Arabic Presentation Forms-A' => 120, +'Combining Half Marks' => 121, +'CJK Compatibility Forms' => 122, +'Small Form Variants' => 123, +'Arabic Presentation Forms-B' => 124, +'Specials' => 125, +'Halfwidth and Fullwidth Forms' => 126, +'Old Italic' => 127, +'Gothic Block' => 128, +'Deseret Block' => 129, +'Byzantine Musical Symbols' => 130, +'Musical Symbols' => 131, +'Mathematical Alphanumeric Symbols' => 132, +'CJK Unified Ideographs Extension B' => 133, +'CJK Compatibility Ideographs Supplement' => 134, +'Tags' => 135, +); +%utf8::InPat = ( +'alp' => { + 'Alphabetic[- _]?Presentation[- _]?Forms' => 'Alphabetic Presentation Forms', +}, +'ara' => { + 'ARABIC' => 'ARABIC', + 'Arabic[- _]?Block' => 'Arabic Block', + 'Arabic[- _]?Presentation[- _]?Forms[- _]?A' => 'Arabic Presentation Forms-A', + 'Arabic[- _]?Presentation[- _]?Forms[- _]?B' => 'Arabic Presentation Forms-B', +}, +'arm' => { + 'ARMENIAN' => 'ARMENIAN', + 'Armenian[- _]?Block' => 'Armenian Block', +}, +'arr' => { + 'Arrows' => 'Arrows', +}, +'bas' => { + 'Basic[- _]?Latin' => 'Basic Latin', +}, +'ben' => { + 'BENGALI' => 'BENGALI', + 'Bengali[- _]?Block' => 'Bengali Block', +}, +'blo' => { + 'Block[- _]?Elements' => 'Block Elements', +}, +'bop' => { + 'BOPOMOFO' => 'BOPOMOFO', + 'Bopomofo[- _]?Block' => 'Bopomofo Block', + 'Bopomofo[- _]?Extended' => 'Bopomofo Extended', +}, +'box' => { + 'Box[- _]?Drawing' => 'Box Drawing', +}, +'bra' => { + 'Braille[- _]?Patterns' => 'Braille Patterns', +}, +'byz' => { + 'Byzantine[- _]?Musical[- _]?Symbols' => 'Byzantine Musical Symbols', +}, +'can' => { + 'CANADIAN[- _]?ABORIGINAL' => 'CANADIAN-ABORIGINAL', +}, +'che' => { + 'CHEROKEE' => 'CHEROKEE', + 'Cherokee[- _]?Block' => 'Cherokee Block', +}, +'cjk' => { + 'CJK[- _]?Radicals[- _]?Supplement' => 'CJK Radicals Supplement', + 'CJK[- _]?Symbols[- _]?and[- _]?Punctuation' => 'CJK Symbols and Punctuation', + 'CJK[- _]?Compatibility' => 'CJK Compatibility', + 'CJK[- _]?Unified[- _]?Ideographs[- _]?Extension[- _]?A' => 'CJK Unified Ideographs Extension A', + 'CJK[- _]?Unified[- _]?Ideographs' => 'CJK Unified Ideographs', + 'CJK[- _]?Compatibility[- _]?Ideographs' => 'CJK Compatibility Ideographs', + 'CJK[- _]?Compatibility[- _]?Forms' => 'CJK Compatibility Forms', + 'CJK[- _]?Unified[- _]?Ideographs[- _]?Extension[- _]?B' => 'CJK Unified Ideographs Extension B', + 'CJK[- _]?Compatibility[- _]?Ideographs[- _]?Supplement' => 'CJK Compatibility Ideographs Supplement', +}, +'com' => { + 'Combining[- _]?Diacritical[- _]?Marks' => 'Combining Diacritical Marks', + 'Combining[- _]?Marks[- _]?for[- _]?Symbols' => 'Combining Marks for Symbols', + 'Combining[- _]?Half[- _]?Marks' => 'Combining Half Marks', +}, +'con' => { + 'Control[- _]?Pictures' => 'Control Pictures', +}, +'cur' => { + 'Currency[- _]?Symbols' => 'Currency Symbols', +}, +'cyr' => { + 'CYRILLIC' => 'CYRILLIC', + 'Cyrillic[- _]?Block' => 'Cyrillic Block', +}, +'des' => { + 'DESERET' => 'DESERET', + 'Deseret[- _]?Block' => 'Deseret Block', +}, +'dev' => { + 'DEVANAGARI' => 'DEVANAGARI', + 'Devanagari[- _]?Block' => 'Devanagari Block', +}, +'din' => { + 'Dingbats' => 'Dingbats', +}, +'enc' => { + 'Enclosed[- _]?Alphanumerics' => 'Enclosed Alphanumerics', + 'Enclosed[- _]?CJK[- _]?Letters[- _]?and[- _]?Months' => 'Enclosed CJK Letters and Months', +}, +'eth' => { + 'ETHIOPIC' => 'ETHIOPIC', + 'Ethiopic[- _]?Block' => 'Ethiopic Block', +}, +'gen' => { + 'General[- _]?Punctuation' => 'General Punctuation', +}, +'geo' => { + 'GEORGIAN' => 'GEORGIAN', + 'Georgian[- _]?Block' => 'Georgian Block', + 'Geometric[- _]?Shapes' => 'Geometric Shapes', +}, +'got' => { + 'GOTHIC' => 'GOTHIC', + 'Gothic[- _]?Block' => 'Gothic Block', +}, +'gre' => { + 'GREEK' => 'GREEK', + 'Greek[- _]?Block' => 'Greek Block', + 'Greek[- _]?Extended' => 'Greek Extended', +}, +'guj' => { + 'GUJARATI' => 'GUJARATI', + 'Gujarati[- _]?Block' => 'Gujarati Block', +}, +'gur' => { + 'GURMUKHI' => 'GURMUKHI', + 'Gurmukhi[- _]?Block' => 'Gurmukhi Block', +}, +'hal' => { + 'Halfwidth[- _]?and[- _]?Fullwidth[- _]?Forms' => 'Halfwidth and Fullwidth Forms', +}, +'han' => { + 'HANGUL' => 'HANGUL', + 'HAN' => 'HAN', + 'Hangul[- _]?Jamo' => 'Hangul Jamo', + 'Hangul[- _]?Compatibility[- _]?Jamo' => 'Hangul Compatibility Jamo', + 'Hangul[- _]?Syllables' => 'Hangul Syllables', +}, +'heb' => { + 'HEBREW' => 'HEBREW', + 'Hebrew[- _]?Block' => 'Hebrew Block', +}, +'hig' => { + 'High[- _]?Surrogates' => 'High Surrogates', + 'High[- _]?Private[- _]?Use[- _]?Surrogates' => 'High Private Use Surrogates', +}, +'hir' => { + 'HIRAGANA' => 'HIRAGANA', + 'Hiragana[- _]?Block' => 'Hiragana Block', +}, +'ide' => { + 'Ideographic[- _]?Description[- _]?Characters' => 'Ideographic Description Characters', +}, +'inh' => { + 'INHERITED' => 'INHERITED', +}, +'ipa' => { + 'IPA[- _]?Extensions' => 'IPA Extensions', +}, +'kan' => { + 'KANNADA' => 'KANNADA', + 'Kannada[- _]?Block' => 'Kannada Block', + 'Kangxi[- _]?Radicals' => 'Kangxi Radicals', + 'Kanbun' => 'Kanbun', +}, +'kat' => { + 'KATAKANA' => 'KATAKANA', + 'Katakana[- _]?Block' => 'Katakana Block', +}, +'khm' => { + 'KHMER' => 'KHMER', + 'Khmer[- _]?Block' => 'Khmer Block', +}, +'lao' => { + 'LAO' => 'LAO', + 'Lao[- _]?Block' => 'Lao Block', +}, +'lat' => { + 'LATIN' => 'LATIN', + 'Latin[- _]?1[- _]?Supplement' => 'Latin-1 Supplement', + 'Latin[- _]?Extended[- _]?A' => 'Latin Extended-A', + 'Latin[- _]?Extended[- _]?B' => 'Latin Extended-B', + 'Latin[- _]?Extended[- _]?Additional' => 'Latin Extended Additional', +}, +'let' => { + 'Letterlike[- _]?Symbols' => 'Letterlike Symbols', +}, +'low' => { + 'Low[- _]?Surrogates' => 'Low Surrogates', +}, +'mal' => { + 'MALAYALAM' => 'MALAYALAM', + 'Malayalam[- _]?Block' => 'Malayalam Block', +}, +'mat' => { + 'Mathematical[- _]?Operators' => 'Mathematical Operators', + 'Mathematical[- _]?Alphanumeric[- _]?Symbols' => 'Mathematical Alphanumeric Symbols', +}, +'mis' => { + 'Miscellaneous[- _]?Technical' => 'Miscellaneous Technical', + 'Miscellaneous[- _]?Symbols' => 'Miscellaneous Symbols', +}, +'mon' => { + 'MONGOLIAN' => 'MONGOLIAN', + 'Mongolian[- _]?Block' => 'Mongolian Block', +}, +'mus' => { + 'Musical[- _]?Symbols' => 'Musical Symbols', +}, +'mya' => { + 'MYANMAR' => 'MYANMAR', + 'Myanmar[- _]?Block' => 'Myanmar Block', +}, +'num' => { + 'Number[- _]?Forms' => 'Number Forms', +}, +'ogh' => { + 'OGHAM' => 'OGHAM', + 'Ogham[- _]?Block' => 'Ogham Block', +}, +'old' => { + 'OLD[- _]?ITALIC' => 'OLD-ITALIC', + 'Old[- _]?Italic' => 'Old Italic', +}, +'opt' => { + 'Optical[- _]?Character[- _]?Recognition' => 'Optical Character Recognition', +}, +'ori' => { + 'ORIYA' => 'ORIYA', + 'Oriya[- _]?Block' => 'Oriya Block', +}, +'pri' => { + 'Private[- _]?Use' => 'Private Use', +}, +'run' => { + 'RUNIC' => 'RUNIC', + 'Runic[- _]?Block' => 'Runic Block', +}, +'sin' => { + 'SINHALA' => 'SINHALA', + 'Sinhala[- _]?Block' => 'Sinhala Block', +}, +'sma' => { + 'Small[- _]?Form[- _]?Variants' => 'Small Form Variants', +}, +'spa' => { + 'Spacing[- _]?Modifier[- _]?Letters' => 'Spacing Modifier Letters', +}, +'spe' => { + 'Specials' => 'Specials', +}, +'sup' => { + 'Superscripts[- _]?and[- _]?Subscripts' => 'Superscripts and Subscripts', +}, +'syr' => { + 'SYRIAC' => 'SYRIAC', + 'Syriac[- _]?Block' => 'Syriac Block', +}, +'tag' => { + 'Tags' => 'Tags', +}, +'tam' => { + 'TAMIL' => 'TAMIL', + 'Tamil[- _]?Block' => 'Tamil Block', +}, +'tel' => { + 'TELUGU' => 'TELUGU', + 'Telugu[- _]?Block' => 'Telugu Block', +}, +'tha' => { + 'THAANA' => 'THAANA', + 'THAI' => 'THAI', + 'Thaana[- _]?Block' => 'Thaana Block', + 'Thai[- _]?Block' => 'Thai Block', +}, +'tib' => { + 'TIBETAN' => 'TIBETAN', + 'Tibetan[- _]?Block' => 'Tibetan Block', +}, +'uni' => { + 'Unified[- _]?Canadian[- _]?Aboriginal[- _]?Syllabics' => 'Unified Canadian Aboriginal Syllabics', +}, +'yi' => { + 'YI' => 'YI', +}, +'yi ' => { + 'Yi[- _]?Syllables' => 'Yi Syllables', + 'Yi[- _]?Radicals' => 'Yi Radicals', +}, ); |