summaryrefslogtreecommitdiff
path: root/lib/unicore/In.pl
diff options
context:
space:
mode:
authorJarkko Hietaniemi <jhi@iki.fi>2001-09-29 04:57:42 +0000
committerJarkko Hietaniemi <jhi@iki.fi>2001-09-29 04:57:42 +0000
commitd9efae67d76cc4acd8980b711b5bebc7142b5319 (patch)
tree85511ac1926809c78e0399fa9cde4033552336cb /lib/unicore/In.pl
parente8c9ad1b2aea45573ad656f23dcb17204fe59851 (diff)
downloadperl-d9efae67d76cc4acd8980b711b5bebc7142b5319.tar.gz
Allow for more flexibility in the \p{In...} names, now
case doesn't matter, and any space or dash can be matched by any space, dash, underbar, or empty. (may be going too far on leniency) p4raw-id: //depot/perl@12264
Diffstat (limited to 'lib/unicore/In.pl')
-rw-r--r--lib/unicore/In.pl552
1 files changed, 416 insertions, 136 deletions
diff --git a/lib/unicore/In.pl b/lib/unicore/In.pl
index a6c24199a7..c11445c26b 100644
--- a/lib/unicore/In.pl
+++ b/lib/unicore/In.pl
@@ -2,140 +2,420 @@
# This file is built by mktables.PL from e.g. Unicode.txt.
# Any changes made here will be lost!
%utf8::In = (
-'Latin' => 0,
-'Greek' => 1,
-'Cyrillic' => 2,
-'Armenian' => 3,
-'Hebrew' => 4,
-'Arabic' => 5,
-'Syriac' => 6,
-'Thaana' => 7,
-'Devanagari' => 8,
-'Bengali' => 9,
-'Gurmukhi' => 10,
-'Gujarati' => 11,
-'Oriya' => 12,
-'Tamil' => 13,
-'Telugu' => 14,
-'Kannada' => 15,
-'Malayalam' => 16,
-'Sinhala' => 17,
-'Thai' => 18,
-'Lao' => 19,
-'Tibetan' => 20,
-'Myanmar' => 21,
-'Georgian' => 22,
-'Hangul' => 23,
-'Ethiopic' => 24,
-'Cherokee' => 25,
-'CanadianAboriginal' => 26,
-'Ogham' => 27,
-'Runic' => 28,
-'Khmer' => 29,
-'Mongolian' => 30,
-'Hiragana' => 31,
-'Katakana' => 32,
-'Bopomofo' => 33,
-'Han' => 34,
-'Yi' => 35,
-'OldItalic' => 36,
-'Gothic' => 37,
-'Deseret' => 38,
-'Inherited' => 39,
-'BasicLatin' => 40,
-'Latin1Supplement' => 41,
-'LatinExtendedA' => 42,
-'LatinExtendedB' => 43,
-'IPAExtensions' => 44,
-'SpacingModifierLetters' => 45,
-'CombiningDiacriticalMarks' => 46,
-'GreekBlock' => 47,
-'CyrillicBlock' => 48,
-'ArmenianBlock' => 49,
-'HebrewBlock' => 50,
-'ArabicBlock' => 51,
-'SyriacBlock' => 52,
-'ThaanaBlock' => 53,
-'DevanagariBlock' => 54,
-'BengaliBlock' => 55,
-'GurmukhiBlock' => 56,
-'GujaratiBlock' => 57,
-'OriyaBlock' => 58,
-'TamilBlock' => 59,
-'TeluguBlock' => 60,
-'KannadaBlock' => 61,
-'MalayalamBlock' => 62,
-'SinhalaBlock' => 63,
-'ThaiBlock' => 64,
-'LaoBlock' => 65,
-'TibetanBlock' => 66,
-'MyanmarBlock' => 67,
-'GeorgianBlock' => 68,
-'HangulJamo' => 69,
-'EthiopicBlock' => 70,
-'CherokeeBlock' => 71,
-'UnifiedCanadianAboriginalSyllabics' => 72,
-'OghamBlock' => 73,
-'RunicBlock' => 74,
-'KhmerBlock' => 75,
-'MongolianBlock' => 76,
-'LatinExtendedAdditional' => 77,
-'GreekExtended' => 78,
-'GeneralPunctuation' => 79,
-'SuperscriptsandSubscripts' => 80,
-'CurrencySymbols' => 81,
-'CombiningMarksforSymbols' => 82,
-'LetterlikeSymbols' => 83,
-'NumberForms' => 84,
-'Arrows' => 85,
-'MathematicalOperators' => 86,
-'MiscellaneousTechnical' => 87,
-'ControlPictures' => 88,
-'OpticalCharacterRecognition' => 89,
-'EnclosedAlphanumerics' => 90,
-'BoxDrawing' => 91,
-'BlockElements' => 92,
-'GeometricShapes' => 93,
-'MiscellaneousSymbols' => 94,
-'Dingbats' => 95,
-'BraillePatterns' => 96,
-'CJKRadicalsSupplement' => 97,
-'KangxiRadicals' => 98,
-'IdeographicDescriptionCharacters' => 99,
-'CJKSymbolsandPunctuation' => 100,
-'HiraganaBlock' => 101,
-'KatakanaBlock' => 102,
-'BopomofoBlock' => 103,
-'HangulCompatibilityJamo' => 104,
-'Kanbun' => 105,
-'BopomofoExtended' => 106,
-'EnclosedCJKLettersandMonths' => 107,
-'CJKCompatibility' => 108,
-'CJKUnifiedIdeographsExtensionA' => 109,
-'CJKUnifiedIdeographs' => 110,
-'YiSyllables' => 111,
-'YiRadicals' => 112,
-'HangulSyllables' => 113,
-'HighSurrogates' => 114,
-'HighPrivateUseSurrogates' => 115,
-'LowSurrogates' => 116,
-'PrivateUse' => 117,
-'CJKCompatibilityIdeographs' => 118,
-'AlphabeticPresentationForms' => 119,
-'ArabicPresentationFormsA' => 120,
-'CombiningHalfMarks' => 121,
-'CJKCompatibilityForms' => 122,
-'SmallFormVariants' => 123,
-'ArabicPresentationFormsB' => 124,
-'Specials' => 125,
-'HalfwidthandFullwidthForms' => 126,
-'OldItalicBlock' => 127,
-'GothicBlock' => 128,
-'DeseretBlock' => 129,
-'ByzantineMusicalSymbols' => 130,
-'MusicalSymbols' => 131,
-'MathematicalAlphanumericSymbols' => 132,
-'CJKUnifiedIdeographsExtensionB' => 133,
-'CJKCompatibilityIdeographsSupplement' => 134,
-'Tags' => 135,
+'LATIN' => 0,
+'GREEK' => 1,
+'CYRILLIC' => 2,
+'ARMENIAN' => 3,
+'HEBREW' => 4,
+'ARABIC' => 5,
+'SYRIAC' => 6,
+'THAANA' => 7,
+'DEVANAGARI' => 8,
+'BENGALI' => 9,
+'GURMUKHI' => 10,
+'GUJARATI' => 11,
+'ORIYA' => 12,
+'TAMIL' => 13,
+'TELUGU' => 14,
+'KANNADA' => 15,
+'MALAYALAM' => 16,
+'SINHALA' => 17,
+'THAI' => 18,
+'LAO' => 19,
+'TIBETAN' => 20,
+'MYANMAR' => 21,
+'GEORGIAN' => 22,
+'HANGUL' => 23,
+'ETHIOPIC' => 24,
+'CHEROKEE' => 25,
+'CANADIAN-ABORIGINAL' => 26,
+'OGHAM' => 27,
+'RUNIC' => 28,
+'KHMER' => 29,
+'MONGOLIAN' => 30,
+'HIRAGANA' => 31,
+'KATAKANA' => 32,
+'BOPOMOFO' => 33,
+'HAN' => 34,
+'YI' => 35,
+'OLD-ITALIC' => 36,
+'GOTHIC' => 37,
+'DESERET' => 38,
+'INHERITED' => 39,
+'Basic Latin' => 40,
+'Latin-1 Supplement' => 41,
+'Latin Extended-A' => 42,
+'Latin Extended-B' => 43,
+'IPA Extensions' => 44,
+'Spacing Modifier Letters' => 45,
+'Combining Diacritical Marks' => 46,
+'Greek Block' => 47,
+'Cyrillic Block' => 48,
+'Armenian Block' => 49,
+'Hebrew Block' => 50,
+'Arabic Block' => 51,
+'Syriac Block' => 52,
+'Thaana Block' => 53,
+'Devanagari Block' => 54,
+'Bengali Block' => 55,
+'Gurmukhi Block' => 56,
+'Gujarati Block' => 57,
+'Oriya Block' => 58,
+'Tamil Block' => 59,
+'Telugu Block' => 60,
+'Kannada Block' => 61,
+'Malayalam Block' => 62,
+'Sinhala Block' => 63,
+'Thai Block' => 64,
+'Lao Block' => 65,
+'Tibetan Block' => 66,
+'Myanmar Block' => 67,
+'Georgian Block' => 68,
+'Hangul Jamo' => 69,
+'Ethiopic Block' => 70,
+'Cherokee Block' => 71,
+'Unified Canadian Aboriginal Syllabics' => 72,
+'Ogham Block' => 73,
+'Runic Block' => 74,
+'Khmer Block' => 75,
+'Mongolian Block' => 76,
+'Latin Extended Additional' => 77,
+'Greek Extended' => 78,
+'General Punctuation' => 79,
+'Superscripts and Subscripts' => 80,
+'Currency Symbols' => 81,
+'Combining Marks for Symbols' => 82,
+'Letterlike Symbols' => 83,
+'Number Forms' => 84,
+'Arrows' => 85,
+'Mathematical Operators' => 86,
+'Miscellaneous Technical' => 87,
+'Control Pictures' => 88,
+'Optical Character Recognition' => 89,
+'Enclosed Alphanumerics' => 90,
+'Box Drawing' => 91,
+'Block Elements' => 92,
+'Geometric Shapes' => 93,
+'Miscellaneous Symbols' => 94,
+'Dingbats' => 95,
+'Braille Patterns' => 96,
+'CJK Radicals Supplement' => 97,
+'Kangxi Radicals' => 98,
+'Ideographic Description Characters' => 99,
+'CJK Symbols and Punctuation' => 100,
+'Hiragana Block' => 101,
+'Katakana Block' => 102,
+'Bopomofo Block' => 103,
+'Hangul Compatibility Jamo' => 104,
+'Kanbun' => 105,
+'Bopomofo Extended' => 106,
+'Enclosed CJK Letters and Months' => 107,
+'CJK Compatibility' => 108,
+'CJK Unified Ideographs Extension A' => 109,
+'CJK Unified Ideographs' => 110,
+'Yi Syllables' => 111,
+'Yi Radicals' => 112,
+'Hangul Syllables' => 113,
+'High Surrogates' => 114,
+'High Private Use Surrogates' => 115,
+'Low Surrogates' => 116,
+'Private Use' => 117,
+'CJK Compatibility Ideographs' => 118,
+'Alphabetic Presentation Forms' => 119,
+'Arabic Presentation Forms-A' => 120,
+'Combining Half Marks' => 121,
+'CJK Compatibility Forms' => 122,
+'Small Form Variants' => 123,
+'Arabic Presentation Forms-B' => 124,
+'Specials' => 125,
+'Halfwidth and Fullwidth Forms' => 126,
+'Old Italic' => 127,
+'Gothic Block' => 128,
+'Deseret Block' => 129,
+'Byzantine Musical Symbols' => 130,
+'Musical Symbols' => 131,
+'Mathematical Alphanumeric Symbols' => 132,
+'CJK Unified Ideographs Extension B' => 133,
+'CJK Compatibility Ideographs Supplement' => 134,
+'Tags' => 135,
+);
+%utf8::InPat = (
+'alp' => {
+ 'Alphabetic[- _]?Presentation[- _]?Forms' => 'Alphabetic Presentation Forms',
+},
+'ara' => {
+ 'ARABIC' => 'ARABIC',
+ 'Arabic[- _]?Block' => 'Arabic Block',
+ 'Arabic[- _]?Presentation[- _]?Forms[- _]?A' => 'Arabic Presentation Forms-A',
+ 'Arabic[- _]?Presentation[- _]?Forms[- _]?B' => 'Arabic Presentation Forms-B',
+},
+'arm' => {
+ 'ARMENIAN' => 'ARMENIAN',
+ 'Armenian[- _]?Block' => 'Armenian Block',
+},
+'arr' => {
+ 'Arrows' => 'Arrows',
+},
+'bas' => {
+ 'Basic[- _]?Latin' => 'Basic Latin',
+},
+'ben' => {
+ 'BENGALI' => 'BENGALI',
+ 'Bengali[- _]?Block' => 'Bengali Block',
+},
+'blo' => {
+ 'Block[- _]?Elements' => 'Block Elements',
+},
+'bop' => {
+ 'BOPOMOFO' => 'BOPOMOFO',
+ 'Bopomofo[- _]?Block' => 'Bopomofo Block',
+ 'Bopomofo[- _]?Extended' => 'Bopomofo Extended',
+},
+'box' => {
+ 'Box[- _]?Drawing' => 'Box Drawing',
+},
+'bra' => {
+ 'Braille[- _]?Patterns' => 'Braille Patterns',
+},
+'byz' => {
+ 'Byzantine[- _]?Musical[- _]?Symbols' => 'Byzantine Musical Symbols',
+},
+'can' => {
+ 'CANADIAN[- _]?ABORIGINAL' => 'CANADIAN-ABORIGINAL',
+},
+'che' => {
+ 'CHEROKEE' => 'CHEROKEE',
+ 'Cherokee[- _]?Block' => 'Cherokee Block',
+},
+'cjk' => {
+ 'CJK[- _]?Radicals[- _]?Supplement' => 'CJK Radicals Supplement',
+ 'CJK[- _]?Symbols[- _]?and[- _]?Punctuation' => 'CJK Symbols and Punctuation',
+ 'CJK[- _]?Compatibility' => 'CJK Compatibility',
+ 'CJK[- _]?Unified[- _]?Ideographs[- _]?Extension[- _]?A' => 'CJK Unified Ideographs Extension A',
+ 'CJK[- _]?Unified[- _]?Ideographs' => 'CJK Unified Ideographs',
+ 'CJK[- _]?Compatibility[- _]?Ideographs' => 'CJK Compatibility Ideographs',
+ 'CJK[- _]?Compatibility[- _]?Forms' => 'CJK Compatibility Forms',
+ 'CJK[- _]?Unified[- _]?Ideographs[- _]?Extension[- _]?B' => 'CJK Unified Ideographs Extension B',
+ 'CJK[- _]?Compatibility[- _]?Ideographs[- _]?Supplement' => 'CJK Compatibility Ideographs Supplement',
+},
+'com' => {
+ 'Combining[- _]?Diacritical[- _]?Marks' => 'Combining Diacritical Marks',
+ 'Combining[- _]?Marks[- _]?for[- _]?Symbols' => 'Combining Marks for Symbols',
+ 'Combining[- _]?Half[- _]?Marks' => 'Combining Half Marks',
+},
+'con' => {
+ 'Control[- _]?Pictures' => 'Control Pictures',
+},
+'cur' => {
+ 'Currency[- _]?Symbols' => 'Currency Symbols',
+},
+'cyr' => {
+ 'CYRILLIC' => 'CYRILLIC',
+ 'Cyrillic[- _]?Block' => 'Cyrillic Block',
+},
+'des' => {
+ 'DESERET' => 'DESERET',
+ 'Deseret[- _]?Block' => 'Deseret Block',
+},
+'dev' => {
+ 'DEVANAGARI' => 'DEVANAGARI',
+ 'Devanagari[- _]?Block' => 'Devanagari Block',
+},
+'din' => {
+ 'Dingbats' => 'Dingbats',
+},
+'enc' => {
+ 'Enclosed[- _]?Alphanumerics' => 'Enclosed Alphanumerics',
+ 'Enclosed[- _]?CJK[- _]?Letters[- _]?and[- _]?Months' => 'Enclosed CJK Letters and Months',
+},
+'eth' => {
+ 'ETHIOPIC' => 'ETHIOPIC',
+ 'Ethiopic[- _]?Block' => 'Ethiopic Block',
+},
+'gen' => {
+ 'General[- _]?Punctuation' => 'General Punctuation',
+},
+'geo' => {
+ 'GEORGIAN' => 'GEORGIAN',
+ 'Georgian[- _]?Block' => 'Georgian Block',
+ 'Geometric[- _]?Shapes' => 'Geometric Shapes',
+},
+'got' => {
+ 'GOTHIC' => 'GOTHIC',
+ 'Gothic[- _]?Block' => 'Gothic Block',
+},
+'gre' => {
+ 'GREEK' => 'GREEK',
+ 'Greek[- _]?Block' => 'Greek Block',
+ 'Greek[- _]?Extended' => 'Greek Extended',
+},
+'guj' => {
+ 'GUJARATI' => 'GUJARATI',
+ 'Gujarati[- _]?Block' => 'Gujarati Block',
+},
+'gur' => {
+ 'GURMUKHI' => 'GURMUKHI',
+ 'Gurmukhi[- _]?Block' => 'Gurmukhi Block',
+},
+'hal' => {
+ 'Halfwidth[- _]?and[- _]?Fullwidth[- _]?Forms' => 'Halfwidth and Fullwidth Forms',
+},
+'han' => {
+ 'HANGUL' => 'HANGUL',
+ 'HAN' => 'HAN',
+ 'Hangul[- _]?Jamo' => 'Hangul Jamo',
+ 'Hangul[- _]?Compatibility[- _]?Jamo' => 'Hangul Compatibility Jamo',
+ 'Hangul[- _]?Syllables' => 'Hangul Syllables',
+},
+'heb' => {
+ 'HEBREW' => 'HEBREW',
+ 'Hebrew[- _]?Block' => 'Hebrew Block',
+},
+'hig' => {
+ 'High[- _]?Surrogates' => 'High Surrogates',
+ 'High[- _]?Private[- _]?Use[- _]?Surrogates' => 'High Private Use Surrogates',
+},
+'hir' => {
+ 'HIRAGANA' => 'HIRAGANA',
+ 'Hiragana[- _]?Block' => 'Hiragana Block',
+},
+'ide' => {
+ 'Ideographic[- _]?Description[- _]?Characters' => 'Ideographic Description Characters',
+},
+'inh' => {
+ 'INHERITED' => 'INHERITED',
+},
+'ipa' => {
+ 'IPA[- _]?Extensions' => 'IPA Extensions',
+},
+'kan' => {
+ 'KANNADA' => 'KANNADA',
+ 'Kannada[- _]?Block' => 'Kannada Block',
+ 'Kangxi[- _]?Radicals' => 'Kangxi Radicals',
+ 'Kanbun' => 'Kanbun',
+},
+'kat' => {
+ 'KATAKANA' => 'KATAKANA',
+ 'Katakana[- _]?Block' => 'Katakana Block',
+},
+'khm' => {
+ 'KHMER' => 'KHMER',
+ 'Khmer[- _]?Block' => 'Khmer Block',
+},
+'lao' => {
+ 'LAO' => 'LAO',
+ 'Lao[- _]?Block' => 'Lao Block',
+},
+'lat' => {
+ 'LATIN' => 'LATIN',
+ 'Latin[- _]?1[- _]?Supplement' => 'Latin-1 Supplement',
+ 'Latin[- _]?Extended[- _]?A' => 'Latin Extended-A',
+ 'Latin[- _]?Extended[- _]?B' => 'Latin Extended-B',
+ 'Latin[- _]?Extended[- _]?Additional' => 'Latin Extended Additional',
+},
+'let' => {
+ 'Letterlike[- _]?Symbols' => 'Letterlike Symbols',
+},
+'low' => {
+ 'Low[- _]?Surrogates' => 'Low Surrogates',
+},
+'mal' => {
+ 'MALAYALAM' => 'MALAYALAM',
+ 'Malayalam[- _]?Block' => 'Malayalam Block',
+},
+'mat' => {
+ 'Mathematical[- _]?Operators' => 'Mathematical Operators',
+ 'Mathematical[- _]?Alphanumeric[- _]?Symbols' => 'Mathematical Alphanumeric Symbols',
+},
+'mis' => {
+ 'Miscellaneous[- _]?Technical' => 'Miscellaneous Technical',
+ 'Miscellaneous[- _]?Symbols' => 'Miscellaneous Symbols',
+},
+'mon' => {
+ 'MONGOLIAN' => 'MONGOLIAN',
+ 'Mongolian[- _]?Block' => 'Mongolian Block',
+},
+'mus' => {
+ 'Musical[- _]?Symbols' => 'Musical Symbols',
+},
+'mya' => {
+ 'MYANMAR' => 'MYANMAR',
+ 'Myanmar[- _]?Block' => 'Myanmar Block',
+},
+'num' => {
+ 'Number[- _]?Forms' => 'Number Forms',
+},
+'ogh' => {
+ 'OGHAM' => 'OGHAM',
+ 'Ogham[- _]?Block' => 'Ogham Block',
+},
+'old' => {
+ 'OLD[- _]?ITALIC' => 'OLD-ITALIC',
+ 'Old[- _]?Italic' => 'Old Italic',
+},
+'opt' => {
+ 'Optical[- _]?Character[- _]?Recognition' => 'Optical Character Recognition',
+},
+'ori' => {
+ 'ORIYA' => 'ORIYA',
+ 'Oriya[- _]?Block' => 'Oriya Block',
+},
+'pri' => {
+ 'Private[- _]?Use' => 'Private Use',
+},
+'run' => {
+ 'RUNIC' => 'RUNIC',
+ 'Runic[- _]?Block' => 'Runic Block',
+},
+'sin' => {
+ 'SINHALA' => 'SINHALA',
+ 'Sinhala[- _]?Block' => 'Sinhala Block',
+},
+'sma' => {
+ 'Small[- _]?Form[- _]?Variants' => 'Small Form Variants',
+},
+'spa' => {
+ 'Spacing[- _]?Modifier[- _]?Letters' => 'Spacing Modifier Letters',
+},
+'spe' => {
+ 'Specials' => 'Specials',
+},
+'sup' => {
+ 'Superscripts[- _]?and[- _]?Subscripts' => 'Superscripts and Subscripts',
+},
+'syr' => {
+ 'SYRIAC' => 'SYRIAC',
+ 'Syriac[- _]?Block' => 'Syriac Block',
+},
+'tag' => {
+ 'Tags' => 'Tags',
+},
+'tam' => {
+ 'TAMIL' => 'TAMIL',
+ 'Tamil[- _]?Block' => 'Tamil Block',
+},
+'tel' => {
+ 'TELUGU' => 'TELUGU',
+ 'Telugu[- _]?Block' => 'Telugu Block',
+},
+'tha' => {
+ 'THAANA' => 'THAANA',
+ 'THAI' => 'THAI',
+ 'Thaana[- _]?Block' => 'Thaana Block',
+ 'Thai[- _]?Block' => 'Thai Block',
+},
+'tib' => {
+ 'TIBETAN' => 'TIBETAN',
+ 'Tibetan[- _]?Block' => 'Tibetan Block',
+},
+'uni' => {
+ 'Unified[- _]?Canadian[- _]?Aboriginal[- _]?Syllabics' => 'Unified Canadian Aboriginal Syllabics',
+},
+'yi' => {
+ 'YI' => 'YI',
+},
+'yi ' => {
+ 'Yi[- _]?Syllables' => 'Yi Syllables',
+ 'Yi[- _]?Radicals' => 'Yi Radicals',
+},
);