summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--lib/unicore/In.pl220
-rw-r--r--lib/unicore/In/136.pl692
-rw-r--r--lib/unicore/In/155.pl19
-rwxr-xr-xlib/unicore/mktables.PL26
-rw-r--r--lib/utf8_heavy.pl2
5 files changed, 466 insertions, 493 deletions
diff --git a/lib/unicore/In.pl b/lib/unicore/In.pl
index 3a98027f97..9e410f9420 100644
--- a/lib/unicore/In.pl
+++ b/lib/unicore/In.pl
@@ -167,67 +167,59 @@
'ID_Continue' => 162,
);
%utf8::InPat = (
-'alp' => {
+'al' => {
'Alphabetic(?:[-_]|\s+)?Presentation(?:[-_]|\s+)?Forms' => 'Alphabetic Presentation Forms',
'Alphabetic' => 'Alphabetic',
},
-'any' => {
+'an' => {
'Any' => 'Any',
},
-'ara' => {
+'ar' => {
+ 'ARMENIAN' => 'ARMENIAN',
'ARABIC' => 'ARABIC',
+ 'Armenian(?:[-_]|\s+)?Block' => 'Armenian Block',
'Arabic(?:[-_]|\s+)?Block' => 'Arabic Block',
+ 'Arrows' => 'Arrows',
'Arabic(?:[-_]|\s+)?Presentation(?:[-_]|\s+)?Forms(?:[-_]|\s+)?A' => 'Arabic Presentation Forms-A',
'Arabic(?:[-_]|\s+)?Presentation(?:[-_]|\s+)?Forms(?:[-_]|\s+)?B' => 'Arabic Presentation Forms-B',
},
-'arm' => {
- 'ARMENIAN' => 'ARMENIAN',
- 'Armenian(?:[-_]|\s+)?Block' => 'Armenian Block',
-},
-'arr' => {
- 'Arrows' => 'Arrows',
-},
-'asc' => {
+'as' => {
'ASCII(?:[-_]|\s+)?Hex(?:[-_]|\s+)?Digit' => 'ASCII_Hex_Digit',
-},
-'ass' => {
'Assigned' => 'Assigned',
},
-'bas' => {
+'ba' => {
'Basic(?:[-_]|\s+)?Latin' => 'Basic Latin',
},
-'ben' => {
+'be' => {
'BENGALI' => 'BENGALI',
'Bengali(?:[-_]|\s+)?Block' => 'Bengali Block',
},
-'bid' => {
+'bi' => {
'Bidi(?:[-_]|\s+)?Control' => 'Bidi_Control',
},
-'blo' => {
+'bl' => {
'Block(?:[-_]|\s+)?Elements' => 'Block Elements',
},
-'bop' => {
+'bo' => {
'BOPOMOFO' => 'BOPOMOFO',
+ 'Box(?:[-_]|\s+)?Drawing' => 'Box Drawing',
'Bopomofo(?:[-_]|\s+)?Block' => 'Bopomofo Block',
'Bopomofo(?:[-_]|\s+)?Extended' => 'Bopomofo Extended',
},
-'box' => {
- 'Box(?:[-_]|\s+)?Drawing' => 'Box Drawing',
-},
-'bra' => {
+'br' => {
'Braille(?:[-_]|\s+)?Patterns' => 'Braille Patterns',
},
-'byz' => {
+'by' => {
'Byzantine(?:[-_]|\s+)?Musical(?:[-_]|\s+)?Symbols' => 'Byzantine Musical Symbols',
},
-'can' => {
+'ca' => {
'CANADIAN(?:[-_]|\s+)?ABORIGINAL' => 'CANADIAN-ABORIGINAL',
},
-'che' => {
+'ch' => {
'CHEROKEE' => 'CHEROKEE',
'Cherokee(?:[-_]|\s+)?Block' => 'Cherokee Block',
},
-'cjk' => {
+'cj' => {
'CJK(?:[-_]|\s+)?Radicals(?:[-_]|\s+)?Supplement' => 'CJK Radicals Supplement',
'CJK(?:[-_]|\s+)?Symbols(?:[-_]|\s+)?and(?:[-_]|\s+)?Punctuation' => 'CJK Symbols and Punctuation',
'CJK(?:[-_]|\s+)?Compatibility' => 'CJK Compatibility',
@@ -238,273 +230,239 @@
'CJK(?:[-_]|\s+)?Unified(?:[-_]|\s+)?Ideographs(?:[-_]|\s+)?Extension(?:[-_]|\s+)?B' => 'CJK Unified Ideographs Extension B',
'CJK(?:[-_]|\s+)?Compatibility(?:[-_]|\s+)?Ideographs(?:[-_]|\s+)?Supplement' => 'CJK Compatibility Ideographs Supplement',
},
-'com' => {
+'co' => {
'Combining(?:[-_]|\s+)?Diacritical(?:[-_]|\s+)?Marks' => 'Combining Diacritical Marks',
'Combining(?:[-_]|\s+)?Marks(?:[-_]|\s+)?for(?:[-_]|\s+)?Symbols' => 'Combining Marks for Symbols',
+ 'Control(?:[-_]|\s+)?Pictures' => 'Control Pictures',
'Combining(?:[-_]|\s+)?Half(?:[-_]|\s+)?Marks' => 'Combining Half Marks',
'Common' => 'Common',
},
-'con' => {
- 'Control(?:[-_]|\s+)?Pictures' => 'Control Pictures',
-},
-'cur' => {
+'cu' => {
'Currency(?:[-_]|\s+)?Symbols' => 'Currency Symbols',
},
-'cyr' => {
+'cy' => {
'CYRILLIC' => 'CYRILLIC',
'Cyrillic(?:[-_]|\s+)?Block' => 'Cyrillic Block',
},
-'das' => {
+'da' => {
'Dash' => 'Dash',
},
-'des' => {
- 'DESERET' => 'DESERET',
- 'Deseret(?:[-_]|\s+)?Block' => 'Deseret Block',
-},
-'dev' => {
+'de' => {
'DEVANAGARI' => 'DEVANAGARI',
+ 'DESERET' => 'DESERET',
'Devanagari(?:[-_]|\s+)?Block' => 'Devanagari Block',
+ 'Deseret(?:[-_]|\s+)?Block' => 'Deseret Block',
},
-'dia' => {
- 'Diacritic' => 'Diacritic',
-},
-'din' => {
+'di' => {
'Dingbats' => 'Dingbats',
+ 'Diacritic' => 'Diacritic',
},
-'enc' => {
+'en' => {
'Enclosed(?:[-_]|\s+)?Alphanumerics' => 'Enclosed Alphanumerics',
'Enclosed(?:[-_]|\s+)?CJK(?:[-_]|\s+)?Letters(?:[-_]|\s+)?and(?:[-_]|\s+)?Months' => 'Enclosed CJK Letters and Months',
},
-'eth' => {
+'et' => {
'ETHIOPIC' => 'ETHIOPIC',
'Ethiopic(?:[-_]|\s+)?Block' => 'Ethiopic Block',
},
-'ext' => {
+'ex' => {
'Extender' => 'Extender',
},
-'gen' => {
- 'General(?:[-_]|\s+)?Punctuation' => 'General Punctuation',
-},
-'geo' => {
+'ge' => {
'GEORGIAN' => 'GEORGIAN',
'Georgian(?:[-_]|\s+)?Block' => 'Georgian Block',
+ 'General(?:[-_]|\s+)?Punctuation' => 'General Punctuation',
'Geometric(?:[-_]|\s+)?Shapes' => 'Geometric Shapes',
},
-'got' => {
+'go' => {
'GOTHIC' => 'GOTHIC',
'Gothic(?:[-_]|\s+)?Block' => 'Gothic Block',
},
-'gre' => {
+'gr' => {
'GREEK' => 'GREEK',
'Greek(?:[-_]|\s+)?Block' => 'Greek Block',
'Greek(?:[-_]|\s+)?Extended' => 'Greek Extended',
},
-'guj' => {
- 'GUJARATI' => 'GUJARATI',
- 'Gujarati(?:[-_]|\s+)?Block' => 'Gujarati Block',
-},
-'gur' => {
+'gu' => {
'GURMUKHI' => 'GURMUKHI',
+ 'GUJARATI' => 'GUJARATI',
'Gurmukhi(?:[-_]|\s+)?Block' => 'Gurmukhi Block',
+ 'Gujarati(?:[-_]|\s+)?Block' => 'Gujarati Block',
},
-'hal' => {
- 'Halfwidth(?:[-_]|\s+)?and(?:[-_]|\s+)?Fullwidth(?:[-_]|\s+)?Forms' => 'Halfwidth and Fullwidth Forms',
-},
-'han' => {
+'ha' => {
'HANGUL' => 'HANGUL',
'HAN' => 'HAN',
'Hangul(?:[-_]|\s+)?Jamo' => 'Hangul Jamo',
'Hangul(?:[-_]|\s+)?Compatibility(?:[-_]|\s+)?Jamo' => 'Hangul Compatibility Jamo',
'Hangul(?:[-_]|\s+)?Syllables' => 'Hangul Syllables',
+ 'Halfwidth(?:[-_]|\s+)?and(?:[-_]|\s+)?Fullwidth(?:[-_]|\s+)?Forms' => 'Halfwidth and Fullwidth Forms',
},
-'heb' => {
+'he' => {
'HEBREW' => 'HEBREW',
'Hebrew(?:[-_]|\s+)?Block' => 'Hebrew Block',
-},
-'hex' => {
'Hex(?:[-_]|\s+)?Digit' => 'Hex_Digit',
},
-'hig' => {
- 'High(?:[-_]|\s+)?Surrogates' => 'High Surrogates',
- 'High(?:[-_]|\s+)?Private(?:[-_]|\s+)?Use(?:[-_]|\s+)?Surrogates' => 'High Private Use Surrogates',
-},
-'hir' => {
+'hi' => {
'HIRAGANA' => 'HIRAGANA',
'Hiragana(?:[-_]|\s+)?Block' => 'Hiragana Block',
+ 'High(?:[-_]|\s+)?Surrogates' => 'High Surrogates',
+ 'High(?:[-_]|\s+)?Private(?:[-_]|\s+)?Use(?:[-_]|\s+)?Surrogates' => 'High Private Use Surrogates',
},
-'hyp' => {
+'hy' => {
'Hyphen' => 'Hyphen',
},
-'id_' => {
- 'ID(?:[-_]|\s+)?Start' => 'ID_Start',
- 'ID(?:[-_]|\s+)?Continue' => 'ID_Continue',
-},
-'ide' => {
+'id' => {
'Ideographic(?:[-_]|\s+)?Description(?:[-_]|\s+)?Characters' => 'Ideographic Description Characters',
'Ideographic' => 'Ideographic',
+ 'ID(?:[-_]|\s+)?Start' => 'ID_Start',
+ 'ID(?:[-_]|\s+)?Continue' => 'ID_Continue',
},
-'inh' => {
+'in' => {
'INHERITED' => 'INHERITED',
},
-'ipa' => {
+'ip' => {
'IPA(?:[-_]|\s+)?Extensions' => 'IPA Extensions',
},
-'joi' => {
+'jo' => {
'Join(?:[-_]|\s+)?Control' => 'Join_Control',
},
-'kan' => {
+'ka' => {
'KANNADA' => 'KANNADA',
+ 'KATAKANA' => 'KATAKANA',
'Kannada(?:[-_]|\s+)?Block' => 'Kannada Block',
'Kangxi(?:[-_]|\s+)?Radicals' => 'Kangxi Radicals',
- 'Kanbun' => 'Kanbun',
-},
-'kat' => {
- 'KATAKANA' => 'KATAKANA',
'Katakana(?:[-_]|\s+)?Block' => 'Katakana Block',
+ 'Kanbun' => 'Kanbun',
},
-'khm' => {
+'kh' => {
'KHMER' => 'KHMER',
'Khmer(?:[-_]|\s+)?Block' => 'Khmer Block',
},
-'lam' => {
- 'Lampersand' => 'Lampersand',
-},
-'lao' => {
- 'LAO' => 'LAO',
- 'Lao(?:[-_]|\s+)?Block' => 'Lao Block',
-},
-'lat' => {
+'la' => {
'LATIN' => 'LATIN',
+ 'LAO' => 'LAO',
'Latin(?:[-_]|\s+)?1(?:[-_]|\s+)?Supplement' => 'Latin-1 Supplement',
'Latin(?:[-_]|\s+)?Extended(?:[-_]|\s+)?A' => 'Latin Extended-A',
'Latin(?:[-_]|\s+)?Extended(?:[-_]|\s+)?B' => 'Latin Extended-B',
+ 'Lao(?:[-_]|\s+)?Block' => 'Lao Block',
'Latin(?:[-_]|\s+)?Extended(?:[-_]|\s+)?Additional' => 'Latin Extended Additional',
+ 'Lampersand' => 'Lampersand',
},
-'let' => {
+'le' => {
'Letterlike(?:[-_]|\s+)?Symbols' => 'Letterlike Symbols',
},
-'low' => {
+'lo' => {
'Low(?:[-_]|\s+)?Surrogates' => 'Low Surrogates',
'Lowercase' => 'Lowercase',
},
-'mal' => {
+'ma' => {
'MALAYALAM' => 'MALAYALAM',
'Malayalam(?:[-_]|\s+)?Block' => 'Malayalam Block',
-},
-'mat' => {
'Mathematical(?:[-_]|\s+)?Operators' => 'Mathematical Operators',
'Mathematical(?:[-_]|\s+)?Alphanumeric(?:[-_]|\s+)?Symbols' => 'Mathematical Alphanumeric Symbols',
'Math' => 'Math',
},
-'mis' => {
+'mi' => {
'Miscellaneous(?:[-_]|\s+)?Technical' => 'Miscellaneous Technical',
'Miscellaneous(?:[-_]|\s+)?Symbols' => 'Miscellaneous Symbols',
},
-'mon' => {
+'mo' => {
'MONGOLIAN' => 'MONGOLIAN',
'Mongolian(?:[-_]|\s+)?Block' => 'Mongolian Block',
},
-'mus' => {
+'mu' => {
'Musical(?:[-_]|\s+)?Symbols' => 'Musical Symbols',
},
-'mya' => {
+'my' => {
'MYANMAR' => 'MYANMAR',
'Myanmar(?:[-_]|\s+)?Block' => 'Myanmar Block',
},
-'non' => {
+'no' => {
'Noncharacter(?:[-_]|\s+)?Code(?:[-_]|\s+)?Point' => 'Noncharacter_Code_Point',
},
-'num' => {
+'nu' => {
'Number(?:[-_]|\s+)?Forms' => 'Number Forms',
},
-'ogh' => {
+'og' => {
'OGHAM' => 'OGHAM',
'Ogham(?:[-_]|\s+)?Block' => 'Ogham Block',
},
-'old' => {
+'ol' => {
'OLD(?:[-_]|\s+)?ITALIC' => 'OLD-ITALIC',
'Old(?:[-_]|\s+)?Italic' => 'Old Italic',
},
-'opt' => {
+'op' => {
'Optical(?:[-_]|\s+)?Character(?:[-_]|\s+)?Recognition' => 'Optical Character Recognition',
},
-'ori' => {
+'or' => {
'ORIYA' => 'ORIYA',
'Oriya(?:[-_]|\s+)?Block' => 'Oriya Block',
},
-'oth' => {
+'ot' => {
'Other(?:[-_]|\s+)?Math' => 'Other_Math',
'Other(?:[-_]|\s+)?Alphabetic' => 'Other_Alphabetic',
'Other(?:[-_]|\s+)?Lowercase' => 'Other_Lowercase',
'Other(?:[-_]|\s+)?Uppercase' => 'Other_Uppercase',
},
-'pri' => {
+'pr' => {
'Private(?:[-_]|\s+)?Use' => 'Private Use',
},
-'quo' => {
+'qu' => {
'Quotation(?:[-_]|\s+)?Mark' => 'Quotation_Mark',
},
-'run' => {
+'ru' => {
'RUNIC' => 'RUNIC',
'Runic(?:[-_]|\s+)?Block' => 'Runic Block',
},
-'sin' => {
+'si' => {
'SINHALA' => 'SINHALA',
'Sinhala(?:[-_]|\s+)?Block' => 'Sinhala Block',
},
-'sma' => {
+'sm' => {
'Small(?:[-_]|\s+)?Form(?:[-_]|\s+)?Variants' => 'Small Form Variants',
},
-'spa' => {
+'sp' => {
'Spacing(?:[-_]|\s+)?Modifier(?:[-_]|\s+)?Letters' => 'Spacing Modifier Letters',
-},
-'spe' => {
'Specials' => 'Specials',
},
-'sup' => {
+'su' => {
'Superscripts(?:[-_]|\s+)?and(?:[-_]|\s+)?Subscripts' => 'Superscripts and Subscripts',
},
-'syr' => {
+'sy' => {
'SYRIAC' => 'SYRIAC',
'Syriac(?:[-_]|\s+)?Block' => 'Syriac Block',
},
-'tag' => {
- 'Tags' => 'Tags',
-},
-'tam' => {
+'ta' => {
'TAMIL' => 'TAMIL',
'Tamil(?:[-_]|\s+)?Block' => 'Tamil Block',
+ 'Tags' => 'Tags',
},
-'tel' => {
+'te' => {
'TELUGU' => 'TELUGU',
'Telugu(?:[-_]|\s+)?Block' => 'Telugu Block',
-},
-'ter' => {
'Terminal(?:[-_]|\s+)?Punctuation' => 'Terminal_Punctuation',
},
-'tha' => {
+'th' => {
'THAANA' => 'THAANA',
'THAI' => 'THAI',
'Thaana(?:[-_]|\s+)?Block' => 'Thaana Block',
'Thai(?:[-_]|\s+)?Block' => 'Thai Block',
},
-'tib' => {
+'ti' => {
'TIBETAN' => 'TIBETAN',
'Tibetan(?:[-_]|\s+)?Block' => 'Tibetan Block',
},
-'uni' => {
+'un' => {
'Unified(?:[-_]|\s+)?Canadian(?:[-_]|\s+)?Aboriginal(?:[-_]|\s+)?Syllabics' => 'Unified Canadian Aboriginal Syllabics',
},
-'upp' => {
+'up' => {
'Uppercase' => 'Uppercase',
},
-'whi' => {
+'wh' => {
'White(?:[-_]|\s+)?space' => 'White_space',
},
'yi' => {
'YI' => 'YI',
-},
-'yi ' => {
'Yi(?:[-_]|\s+)?Syllables' => 'Yi Syllables',
'Yi(?:[-_]|\s+)?Radicals' => 'Yi Radicals',
},
diff --git a/lib/unicore/In/136.pl b/lib/unicore/In/136.pl
index ecf758691c..d3f2067de9 100644
--- a/lib/unicore/In/136.pl
+++ b/lib/unicore/In/136.pl
@@ -2,350 +2,350 @@
# This file is built by mktables.PL from e.g. Unicode.txt.
# Any changes made here will be lost!
return <<'END';
-0000 0040 Common # In/136.pl
-005b 0060 Common # In/136.pl
-007b 00a9 Common # In/136.pl
-00ab 00b4 Common # In/136.pl
-00b6 00b9 Common # In/136.pl
-00bb 00bf Common # In/136.pl
-00d7 Common # In/136.pl
-00f7 Common # In/136.pl
-0220 0221 Common # In/136.pl
-0234 024f Common # In/136.pl
-02ae 02af Common # In/136.pl
-02b9 02df Common # In/136.pl
-02e5 02ff Common # In/136.pl
-034f 035f Common # In/136.pl
-0363 0379 Common # In/136.pl
-037b 0385 Common # In/136.pl
-0387 Common # In/136.pl
-038b Common # In/136.pl
-038d Common # In/136.pl
-03a2 Common # In/136.pl
-03cf Common # In/136.pl
-03d8 03d9 Common # In/136.pl
-03f6 03ff Common # In/136.pl
-0482 Common # In/136.pl
-0487 Common # In/136.pl
-048a 048b Common # In/136.pl
-04c5 04c6 Common # In/136.pl
-04c9 04ca Common # In/136.pl
-04cd 04cf Common # In/136.pl
-04f6 04f7 Common # In/136.pl
-04fa 0530 Common # In/136.pl
-0557 0558 Common # In/136.pl
-055a 0560 Common # In/136.pl
-0588 0590 Common # In/136.pl
-05a2 Common # In/136.pl
-05ba Common # In/136.pl
-05be Common # In/136.pl
-05c0 Common # In/136.pl
-05c3 Common # In/136.pl
-05c5 05cf Common # In/136.pl
-05eb 05ef Common # In/136.pl
-05f3 0620 Common # In/136.pl
-063b 0640 Common # In/136.pl
-0656 066f Common # In/136.pl
-06d4 Common # In/136.pl
-06e9 Common # In/136.pl
-06ee 06f9 Common # In/136.pl
-06fd 070f Common # In/136.pl
-072d 072f Common # In/136.pl
-074b 077f Common # In/136.pl
-07b1 0900 Common # In/136.pl
-0904 Common # In/136.pl
-093a 093b Common # In/136.pl
-094e 094f Common # In/136.pl
-0955 0957 Common # In/136.pl
-0964 0965 Common # In/136.pl
-0970 0980 Common # In/136.pl
-0982 0984 Common # In/136.pl
-098d 098e Common # In/136.pl
-0991 0992 Common # In/136.pl
-09a9 Common # In/136.pl
-09b1 Common # In/136.pl
-09b3 09b5 Common # In/136.pl
-09ba 09bb Common # In/136.pl
-09bd Common # In/136.pl
-09c5 09c6 Common # In/136.pl
-09c9 09ca Common # In/136.pl
-09ce 09d6 Common # In/136.pl
-09d8 09db Common # In/136.pl
-09de Common # In/136.pl
-09e4 09e5 Common # In/136.pl
-09f2 0a01 Common # In/136.pl
-0a03 0a04 Common # In/136.pl
-0a0b 0a0e Common # In/136.pl
-0a11 0a12 Common # In/136.pl
-0a29 Common # In/136.pl
-0a31 Common # In/136.pl
-0a34 Common # In/136.pl
-0a37 Common # In/136.pl
-0a3a 0a3b Common # In/136.pl
-0a3d Common # In/136.pl
-0a43 0a46 Common # In/136.pl
-0a49 0a4a Common # In/136.pl
-0a4e 0a58 Common # In/136.pl
-0a5d Common # In/136.pl
-0a5f 0a65 Common # In/136.pl
-0a75 0a80 Common # In/136.pl
-0a84 Common # In/136.pl
-0a8c Common # In/136.pl
-0a8e Common # In/136.pl
-0a92 Common # In/136.pl
-0aa9 Common # In/136.pl
-0ab1 Common # In/136.pl
-0ab4 Common # In/136.pl
-0aba 0abb Common # In/136.pl
-0ac6 Common # In/136.pl
-0aca Common # In/136.pl
-0ace 0acf Common # In/136.pl
-0ad1 0adf Common # In/136.pl
-0ae1 0ae5 Common # In/136.pl
-0af0 0b00 Common # In/136.pl
-0b04 Common # In/136.pl
-0b0d 0b0e Common # In/136.pl
-0b11 0b12 Common # In/136.pl
-0b29 Common # In/136.pl
-0b31 Common # In/136.pl
-0b34 0b35 Common # In/136.pl
-0b3a 0b3b Common # In/136.pl
-0b44 0b46 Common # In/136.pl
-0b49 0b4a Common # In/136.pl
-0b4e 0b55 Common # In/136.pl
-0b58 0b5b Common # In/136.pl
-0b5e Common # In/136.pl
-0b62 0b65 Common # In/136.pl
-0b70 0b81 Common # In/136.pl
-0b84 Common # In/136.pl
-0b8b 0b8d Common # In/136.pl
-0b91 Common # In/136.pl
-0b96 0b98 Common # In/136.pl
-0b9b Common # In/136.pl
-0b9d Common # In/136.pl
-0ba0 0ba2 Common # In/136.pl
-0ba5 0ba7 Common # In/136.pl
-0bab 0bad Common # In/136.pl
-0bb6 Common # In/136.pl
-0bba 0bbd Common # In/136.pl
-0bc3 0bc5 Common # In/136.pl
-0bc9 Common # In/136.pl
-0bce 0bd6 Common # In/136.pl
-0bd8 0be6 Common # In/136.pl
-0bf3 0c00 Common # In/136.pl
-0c04 Common # In/136.pl
-0c0d Common # In/136.pl
-0c11 Common # In/136.pl
-0c29 Common # In/136.pl
-0c34 Common # In/136.pl
-0c3a 0c3d Common # In/136.pl
-0c45 Common # In/136.pl
-0c49 Common # In/136.pl
-0c4e 0c54 Common # In/136.pl
-0c57 0c5f Common # In/136.pl
-0c62 0c65 Common # In/136.pl
-0c70 0c81 Common # In/136.pl
-0c84 Common # In/136.pl
-0c8d Common # In/136.pl
-0c91 Common # In/136.pl
-0ca9 Common # In/136.pl
-0cb4 Common # In/136.pl
-0cba 0cbd Common # In/136.pl
-0cc5 Common # In/136.pl
-0cc9 Common # In/136.pl
-0cce 0cd4 Common # In/136.pl
-0cd7 0cdd Common # In/136.pl
-0cdf Common # In/136.pl
-0ce2 0ce5 Common # In/136.pl
-0cf0 0d01 Common # In/136.pl
-0d04 Common # In/136.pl
-0d0d Common # In/136.pl
-0d11 Common # In/136.pl
-0d29 Common # In/136.pl
-0d3a 0d3d Common # In/136.pl
-0d44 0d45 Common # In/136.pl
-0d49 Common # In/136.pl
-0d4e 0d56 Common # In/136.pl
-0d58 0d5f Common # In/136.pl
-0d62 0d65 Common # In/136.pl
-0d70 0d81 Common # In/136.pl
-0d84 Common # In/136.pl
-0d97 0d99 Common # In/136.pl
-0db2 Common # In/136.pl
-0dbc Common # In/136.pl
-0dbe 0dbf Common # In/136.pl
-0dc7 0dc9 Common # In/136.pl
-0dcb 0dce Common # In/136.pl
-0dd5 Common # In/136.pl
-0dd7 Common # In/136.pl
-0de0 0df1 Common # In/136.pl
-0df4 0e00 Common # In/136.pl
-0e3b 0e3f Common # In/136.pl
-0e4f Common # In/136.pl
-0e5a 0e80 Common # In/136.pl
-0e83 Common # In/136.pl
-0e85 0e86 Common # In/136.pl
-0e89 Common # In/136.pl
-0e8b 0e8c Common # In/136.pl
-0e8e 0e93 Common # In/136.pl
-0e98 Common # In/136.pl
-0ea0 Common # In/136.pl
-0ea4 Common # In/136.pl
-0ea6 Common # In/136.pl
-0ea8 0ea9 Common # In/136.pl
-0eac Common # In/136.pl
-0eba Common # In/136.pl
-0ebe 0ebf Common # In/136.pl
-0ec5 Common # In/136.pl
-0ec7 Common # In/136.pl
-0ece 0ecf Common # In/136.pl
-0eda 0edb Common # In/136.pl
-0ede 0eff Common # In/136.pl
-0f01 0f17 Common # In/136.pl
-0f1a 0f1f Common # In/136.pl
-0f34 Common # In/136.pl
-0f36 Common # In/136.pl
-0f38 Common # In/136.pl
-0f3a 0f3f Common # In/136.pl
-0f48 Common # In/136.pl
-0f6b 0f70 Common # In/136.pl
-0f85 Common # In/136.pl
-0f8c 0f8f Common # In/136.pl
-0f98 Common # In/136.pl
-0fbd 0fc5 Common # In/136.pl
-0fc7 0fff Common # In/136.pl
-1022 Common # In/136.pl
-1028 Common # In/136.pl
-102b Common # In/136.pl
-1033 1035 Common # In/136.pl
-103a 103f Common # In/136.pl
-104a 104f Common # In/136.pl
-105a 109f Common # In/136.pl
-10c6 10cf Common # In/136.pl
-10f7 10ff Common # In/136.pl
-115a 115e Common # In/136.pl
-11a3 11a7 Common # In/136.pl
-11fa 11ff Common # In/136.pl
-1207 Common # In/136.pl
-1247 Common # In/136.pl
-1249 Common # In/136.pl
-124e 124f Common # In/136.pl
-1257 Common # In/136.pl
-1259 Common # In/136.pl
-125e 125f Common # In/136.pl
-1287 Common # In/136.pl
-1289 Common # In/136.pl
-128e 128f Common # In/136.pl
-12af Common # In/136.pl
-12b1 Common # In/136.pl
-12b6 12b7 Common # In/136.pl
-12bf Common # In/136.pl
-12c1 Common # In/136.pl
-12c6 12c7 Common # In/136.pl
-12cf Common # In/136.pl
-12d7 Common # In/136.pl
-12ef Common # In/136.pl
-130f Common # In/136.pl
-1311 Common # In/136.pl
-1316 1317 Common # In/136.pl
-131f Common # In/136.pl
-1347 Common # In/136.pl
-135b 1368 Common # In/136.pl
-137d 139f Common # In/136.pl
-13f5 1400 Common # In/136.pl
-166d 166e Common # In/136.pl
-1677 1680 Common # In/136.pl
-169b 169f Common # In/136.pl
-16eb 16ed Common # In/136.pl
-16f1 177f Common # In/136.pl
-17d4 17df Common # In/136.pl
-17ea 180f Common # In/136.pl
-181a 181f Common # In/136.pl
-1878 187f Common # In/136.pl
-18aa 1dff Common # In/136.pl
-1e9c 1e9f Common # In/136.pl
-1efa 1eff Common # In/136.pl
-1f16 1f17 Common # In/136.pl
-1f1e 1f1f Common # In/136.pl
-1f46 1f47 Common # In/136.pl
-1f4e 1f4f Common # In/136.pl
-1f58 Common # In/136.pl
-1f5a Common # In/136.pl
-1f5c Common # In/136.pl
-1f5e Common # In/136.pl
-1f7e 1f7f Common # In/136.pl
-1fb5 Common # In/136.pl
-1fbd Common # In/136.pl
-1fbf 1fc1 Common # In/136.pl
-1fc5 Common # In/136.pl
-1fcd 1fcf Common # In/136.pl
-1fd4 1fd5 Common # In/136.pl
-1fdc 1fdf Common # In/136.pl
-1fed 1ff1 Common # In/136.pl
-1ff5 Common # In/136.pl
-1ffd 207e Common # In/136.pl
-2080 20cf Common # In/136.pl
-20e4 2125 Common # In/136.pl
-2127 2129 Common # In/136.pl
-212c 2e7f Common # In/136.pl
-2e9a Common # In/136.pl
-2ef4 2eff Common # In/136.pl
-2fd6 3004 Common # In/136.pl
-3006 Common # In/136.pl
-3008 3020 Common # In/136.pl
-3030 3037 Common # In/136.pl
-303b 3040 Common # In/136.pl
-3095 3098 Common # In/136.pl
-309b 309c Common # In/136.pl
-309f 30a0 Common # In/136.pl
-30fb 30fc Common # In/136.pl
-30ff 3104 Common # In/136.pl
-312d 3130 Common # In/136.pl
-318f 319f Common # In/136.pl
-31b8 33ff Common # In/136.pl
-4db6 4dff Common # In/136.pl
-9fa6 9fff Common # In/136.pl
-a48d a48f Common # In/136.pl
-a4a2 a4a3 Common # In/136.pl
-a4b4 Common # In/136.pl
-a4c1 Common # In/136.pl
-a4c5 Common # In/136.pl
-a4c7 abff Common # In/136.pl
-d7a4 f8ff Common # In/136.pl
-fa2e faff Common # In/136.pl
-fb07 fb12 Common # In/136.pl
-fb18 fb1c Common # In/136.pl
-fb29 Common # In/136.pl
-fb37 Common # In/136.pl
-fb3d Common # In/136.pl
-fb3f Common # In/136.pl
-fb42 Common # In/136.pl
-fb45 Common # In/136.pl
-fbb2 fbd2 Common # In/136.pl
-fd3e fd4f Common # In/136.pl
-fd90 fd91 Common # In/136.pl
-fdc8 fdef Common # In/136.pl
-fdfc fe1f Common # In/136.pl
-fe24 fe6f Common # In/136.pl
-fe73 Common # In/136.pl
-fe75 Common # In/136.pl
-fefd ff20 Common # In/136.pl
-ff3b ff40 Common # In/136.pl
-ff5b ff65 Common # In/136.pl
-ff70 Common # In/136.pl
-ff9e ff9f Common # In/136.pl
-ffbf ffc1 Common # In/136.pl
-ffc8 ffc9 Common # In/136.pl
-ffd0 ffd1 Common # In/136.pl
-ffd8 ffd9 Common # In/136.pl
-ffdd 102ff Common # In/136.pl
-1031f 1032f Common # In/136.pl
-1034b 103ff Common # In/136.pl
-10426 10427 Common # In/136.pl
-1044e 1d166 Common # In/136.pl
-1d16a 1d17a Common # In/136.pl
-1d183 1d184 Common # In/136.pl
-1d18c 1d1a9 Common # In/136.pl
-1d1ae 1ffff Common # In/136.pl
-2a6d7 2f7ff Common # In/136.pl
+0000 0040
+005b 0060
+007b 00a9
+00ab 00b4
+00b6 00b9
+00bb 00bf
+00d7
+00f7
+0220 0221
+0234 024f
+02ae 02af
+02b9 02df
+02e5 02ff
+034f 035f
+0363 0379
+037b 0385
+0387
+038b
+038d
+03a2
+03cf
+03d8 03d9
+03f6 03ff
+0482
+0487
+048a 048b
+04c5 04c6
+04c9 04ca
+04cd 04cf
+04f6 04f7
+04fa 0530
+0557 0558
+055a 0560
+0588 0590
+05a2
+05ba
+05be
+05c0
+05c3
+05c5 05cf
+05eb 05ef
+05f3 0620
+063b 0640
+0656 066f
+06d4
+06e9
+06ee 06f9
+06fd 070f
+072d 072f
+074b 077f
+07b1 0900
+0904
+093a 093b
+094e 094f
+0955 0957
+0964 0965
+0970 0980
+0982 0984
+098d 098e
+0991 0992
+09a9
+09b1
+09b3 09b5
+09ba 09bb
+09bd
+09c5 09c6
+09c9 09ca
+09ce 09d6
+09d8 09db
+09de
+09e4 09e5
+09f2 0a01
+0a03 0a04
+0a0b 0a0e
+0a11 0a12
+0a29
+0a31
+0a34
+0a37
+0a3a 0a3b
+0a3d
+0a43 0a46
+0a49 0a4a
+0a4e 0a58
+0a5d
+0a5f 0a65
+0a75 0a80
+0a84
+0a8c
+0a8e
+0a92
+0aa9
+0ab1
+0ab4
+0aba 0abb
+0ac6
+0aca
+0ace 0acf
+0ad1 0adf
+0ae1 0ae5
+0af0 0b00
+0b04
+0b0d 0b0e
+0b11 0b12
+0b29
+0b31
+0b34 0b35
+0b3a 0b3b
+0b44 0b46
+0b49 0b4a
+0b4e 0b55
+0b58 0b5b
+0b5e
+0b62 0b65
+0b70 0b81
+0b84
+0b8b 0b8d
+0b91
+0b96 0b98
+0b9b
+0b9d
+0ba0 0ba2
+0ba5 0ba7
+0bab 0bad
+0bb6
+0bba 0bbd
+0bc3 0bc5
+0bc9
+0bce 0bd6
+0bd8 0be6
+0bf3 0c00
+0c04
+0c0d
+0c11
+0c29
+0c34
+0c3a 0c3d
+0c45
+0c49
+0c4e 0c54
+0c57 0c5f
+0c62 0c65
+0c70 0c81
+0c84
+0c8d
+0c91
+0ca9
+0cb4
+0cba 0cbd
+0cc5
+0cc9
+0cce 0cd4
+0cd7 0cdd
+0cdf
+0ce2 0ce5
+0cf0 0d01
+0d04
+0d0d
+0d11
+0d29
+0d3a 0d3d
+0d44 0d45
+0d49
+0d4e 0d56
+0d58 0d5f
+0d62 0d65
+0d70 0d81
+0d84
+0d97 0d99
+0db2
+0dbc
+0dbe 0dbf
+0dc7 0dc9
+0dcb 0dce
+0dd5
+0dd7
+0de0 0df1
+0df4 0e00
+0e3b 0e3f
+0e4f
+0e5a 0e80
+0e83
+0e85 0e86
+0e89
+0e8b 0e8c
+0e8e 0e93
+0e98
+0ea0
+0ea4
+0ea6
+0ea8 0ea9
+0eac
+0eba
+0ebe 0ebf
+0ec5
+0ec7
+0ece 0ecf
+0eda 0edb
+0ede 0eff
+0f01 0f17
+0f1a 0f1f
+0f34
+0f36
+0f38
+0f3a 0f3f
+0f48
+0f6b 0f70
+0f85
+0f8c 0f8f
+0f98
+0fbd 0fc5
+0fc7 0fff
+1022
+1028
+102b
+1033 1035
+103a 103f
+104a 104f
+105a 109f
+10c6 10cf
+10f7 10ff
+115a 115e
+11a3 11a7
+11fa 11ff
+1207
+1247
+1249
+124e 124f
+1257
+1259
+125e 125f
+1287
+1289
+128e 128f
+12af
+12b1
+12b6 12b7
+12bf
+12c1
+12c6 12c7
+12cf
+12d7
+12ef
+130f
+1311
+1316 1317
+131f
+1347
+135b 1368
+137d 139f
+13f5 1400
+166d 166e
+1677 1680
+169b 169f
+16eb 16ed
+16f1 177f
+17d4 17df
+17ea 180f
+181a 181f
+1878 187f
+18aa 1dff
+1e9c 1e9f
+1efa 1eff
+1f16 1f17
+1f1e 1f1f
+1f46 1f47
+1f4e 1f4f
+1f58
+1f5a
+1f5c
+1f5e
+1f7e 1f7f
+1fb5
+1fbd
+1fbf 1fc1
+1fc5
+1fcd 1fcf
+1fd4 1fd5
+1fdc 1fdf
+1fed 1ff1
+1ff5
+1ffd 207e
+2080 20cf
+20e4 2125
+2127 2129
+212c 2e7f
+2e9a
+2ef4 2eff
+2fd6 3004
+3006
+3008 3020
+3030 3037
+303b 3040
+3095 3098
+309b 309c
+309f 30a0
+30fb 30fc
+30ff 3104
+312d 3130
+318f 319f
+31b8 33ff
+4db6 4dff
+9fa6 9fff
+a48d a48f
+a4a2 a4a3
+a4b4
+a4c1
+a4c5
+a4c7 abff
+d7a4 f8ff
+fa2e faff
+fb07 fb12
+fb18 fb1c
+fb29
+fb37
+fb3d
+fb3f
+fb42
+fb45
+fbb2 fbd2
+fd3e fd4f
+fd90 fd91
+fdc8 fdef
+fdfc fe1f
+fe24 fe6f
+fe73
+fe75
+fefd ff20
+ff3b ff40
+ff5b ff65
+ff70
+ff9e ff9f
+ffbf ffc1
+ffc8 ffc9
+ffd0 ffd1
+ffd8 ffd9
+ffdd 102ff
+1031f 1032f
+1034b 103ff
+10426 10427
+1044e 1d166
+1d16a 1d17a
+1d183 1d184
+1d18c 1d1a9
+1d1ae 1ffff
+2a6d7 2f7ff
END
diff --git a/lib/unicore/In/155.pl b/lib/unicore/In/155.pl
index c4c37445be..5dca0dc48a 100644
--- a/lib/unicore/In/155.pl
+++ b/lib/unicore/In/155.pl
@@ -2,5 +2,22 @@
# This file is built by mktables.PL from e.g. Unicode.txt.
# Any changes made here will be lost!
return <<'END';
-0000 10ffff Common # In/136.pl
+0000 fdcf
+fdf0 fffd
+10000 1fffd
+20000 2fffd
+30000 3fffd
+40000 4fffd
+50000 5fffd
+60000 6fffd
+70000 7fffd
+80000 8fffd
+90000 9fffd
+a0000 afffd
+b0000 bfffd
+c0000 cfffd
+d0000 dfffd
+e0000 efffd
+f0000 ffffd
+100000 10ffff
END
diff --git a/lib/unicore/mktables.PL b/lib/unicore/mktables.PL
index 500e49ff58..e281edd972 100755
--- a/lib/unicore/mktables.PL
+++ b/lib/unicore/mktables.PL
@@ -380,12 +380,12 @@ close OUT;
my $first;
-sub flushzerorange {
- my ($scriptname, $scriptid, $i) = @_;
+sub flush_zero_range {
+ my ($i) = @_;
if (defined $first) {
my $last = $i - 1;
$last = $last == $first ? "" : sprintf("%04x", $last);
- printf SCRIPT "%04x\t$last\t$scriptname\t# In/$scriptid.pl\n", $first;
+ printf SCRIPT "%04x\t$last\n", $first;
printf "\t\t%04x..$last\n", $first;
undef $first;
}
@@ -404,12 +404,12 @@ EOH
undef $first;
for my $i (0..$lastlast) {
if (vec($ScriptsVec, $i, 1)) {
- defined $first && flushzerorange('Common', $CommonId, $i);
+ defined $first && flush_zero_range($i);
} else {
$first = $i unless defined $first;
}
}
-flushzerorange('Common', $CommonId, $lastlast+1);
+flush_zero_range($lastlast+1);
print SCRIPT "END\n";
close(SCRIPT);
@@ -459,18 +459,16 @@ EOH
}
$id = $InIdScript{$InName};
print PROP "\L$code\t\L$last\n";
- if ($InName eq 'Cn') {
+ if ($InName eq 'Noncharacter_Code_Point') {
my $firsti = hex($code);
my $lasti = $last ? hex($last) : $firsti;
- for my $I ($firsti..$hexi) {
+ for my $i ($firsti..$lasti) {
vec($CnVec, $i, 1) = 1;
}
}
}
}
-print PROP <<EOH;
-END
-EOH
+print PROP "END\n";
print "\tAssigned\n";
my $AssignedId = $Scripts{Assigned} = $InIdScript{Assigned} = $InId++;
@@ -485,14 +483,13 @@ EOH
undef $first;
for my $i (0..hex($UnicodeLastHex)) {
if (vec($CnVec, $i, 1)) {
- defined $first && flushzerorange('Assigned', $AssignedId, $i);
+ defined $first && flush_zero_range($i);
} else {
$first = $i unless defined $first;
}
}
-flushzerorange('Common', $CommonId, hex($UnicodeLastHex)+1);
+flush_zero_range(hex($UnicodeLastHex)+1);
print SCRIPT "END\n";
-close(SCRIPT);
#
# \p{Alphabetic} is \pL and \p{Other_Alphabetic}
@@ -706,7 +703,8 @@ my %InIdPrefix;
foreach my $in (@InId) {
my $inpat = $in;
$inpat =~ s/([- _])/(?:[-_]|\\s+)?/g;
- push @{$InIdPrefix{lc(substr($in, 0, 3))}}, [ $in, $inpat ];
+ my $inprefix = lc(substr($in, 0, 2));
+ push @{$InIdPrefix{$inprefix}}, [ $in, $inpat ];
printf INID "%-45s => %3d,\n", "'$in'", $InId{$in};
}
diff --git a/lib/utf8_heavy.pl b/lib/utf8_heavy.pl
index f73b9eb88f..06b2266b11 100644
--- a/lib/utf8_heavy.pl
+++ b/lib/utf8_heavy.pl
@@ -28,7 +28,7 @@ sub SWASHNEW {
$type = 'Lampersand' if $type =~ /^(?:Is)?L&$/;
- my $inprefix = substr(lc($type), 0, 3);
+ my $inprefix = substr(lc($type), 0, 2);
if (exists $utf8::InPat{$inprefix}) {
my $In = $type;
for my $k (keys %{$utf8::InPat{$inprefix}}) {