diff options
257 files changed, 1863 insertions, 2858 deletions
@@ -707,9 +707,11 @@ lib/timelocal.pl Perl library supporting inverse of localtime, gmtime lib/unicode/ArabLink.pl Unicode character database lib/unicode/ArabLnkGrp.pl Unicode character database lib/unicode/ArabShap.txt Unicode character database +lib/unicode/BidiMirr.txt Unicode character database lib/unicode/Bidirectional.pl Unicode character database lib/unicode/Block.pl Unicode character database lib/unicode/Blocks.txt Unicode character database +lib/unicode/CaseFold.txt Unicode character database lib/unicode/Category.pl Unicode character database lib/unicode/CombiningClass.pl Unicode character database lib/unicode/CompExcl.txt Unicode character database @@ -954,17 +956,18 @@ lib/unicode/Name.pl Unicode character database lib/unicode/Names.txt Unicode character database lib/unicode/NamesList.html Unicode character database lib/unicode/Number.pl Unicode character database -lib/unicode/Props.txt Unicode character database +lib/unicode/PropList.txt Unicode character database lib/unicode/README.Ethiopic Unicode character database +lib/unicode/README.perl Unicode character database lib/unicode/ReadMe.txt Unicode character database info lib/unicode/SpecCase.txt Unicode character database lib/unicode/To/Digit.pl Unicode character database lib/unicode/To/Lower.pl Unicode character database lib/unicode/To/Title.pl Unicode character database lib/unicode/To/Upper.pl Unicode character database -lib/unicode/UCD300.html Unicode character database -lib/unicode/Unicode.300 Unicode character database -lib/unicode/Unicode3.html Unicode character database +lib/unicode/UCD301.html Unicode character database +lib/unicode/UCDFF301.html Unicode character database +lib/unicode/Unicode.301 Unicode character database lib/unicode/mktables.PL Unicode character database generator lib/unicode/syllables.txt Unicode character database lib/utf8.pm Pragma to control Unicode support diff --git a/lib/unicode/ArabLink.pl b/lib/unicode/ArabLink.pl index fd5ed8a6b1..2ad1871bac 100644 --- a/lib/unicode/ArabLink.pl +++ b/lib/unicode/ArabLink.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 0622 0625 R @@ -12,10 +12,9 @@ return <<'END'; 0633 063a D 0640 C 0641 0647 D -0648 0649 R -064a D -0671 U -0672 0673 R +0648 R +0649 064a D +0671 0673 R 0674 U 0675 0677 R 0678 0687 D diff --git a/lib/unicode/ArabLnkGrp.pl b/lib/unicode/ArabLnkGrp.pl index 61f30d4348..1581a04897 100644 --- a/lib/unicode/ArabLnkGrp.pl +++ b/lib/unicode/ArabLnkGrp.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 0622 0623 ALEF @@ -27,8 +27,7 @@ return <<'END'; 0647 HEH 0648 WAW 0649 064a YEH -0671 <no shaping> -0672 0673 ALEF +0671 0673 ALEF 0674 <no shaping> 0675 ALEF 0676 0677 WAW diff --git a/lib/unicode/ArabShap.txt b/lib/unicode/ArabShap.txt index 6092d6223c..9b60290e62 100644 --- a/lib/unicode/ArabShap.txt +++ b/lib/unicode/ArabShap.txt @@ -1,5 +1,32 @@ -# Unicode; Schematic Name; Link; Link Group +# ArabicShaping-3.txt +# +# This file is a normative contributory data file in the +# Unicode Character Database. +# +# This file defines the shaping classes for Arabic and Syriac +# positional shaping, repeating in machine readable form the +# information printed in Tables 8-6, 8-7, 8-8, 8-10, 8-11, and +# 8-13 of The Unicode Standard, Version 3.0. +# +# See sections 8.2 and 8.3 of The Unicode Standard, Version 3.0 +# for more information. +# +# Each line contains four fields, separated by a semicolon. +# +# The first field gives the code point, in 4-digit hexadecimal +# form, of an Arabic or Syriac character. +# The second field gives a short schematic name for that character, +# abbreviated from the normative Unicode character name. +# The third field defines the joining type: R right-joining, +# D dual-joining, U non-joining +# The fourth field defines the joining group. +# +# ############################################################# + +# Unicode; Schematic Name; Joining Type; Joining Group + # Arabic characters + 0622; MADDA ON ALEF; R; ALEF 0623; HAMZA ON ALEF; R; ALEF 0624; HAMZA ON WAW; R; WAW @@ -34,9 +61,9 @@ 0646; NOON; D; NOON 0647; HEH; D; HEH 0648; WAW; R; WAW -0649; ALEF MAKSURA; R; YEH +0649; ALEF MAKSURA; D; YEH 064A; YEH; D; YEH -0671; HAMZAT WASL ON ALEF; U; <no shaping> +0671; HAMZAT WASL ON ALEF; R; ALEF 0672; WAVY HAMZA ON ALEF; R; ALEF 0673; WAVY HAMZA UNDER ALEF; R; ALEF 0674; HIGH HAMZA; U; <no shaping> @@ -139,7 +166,9 @@ 06FA; SEEN WITH DOT BELOW AND 3 DOTS ABOVE; D; SEEN 06FB; DAD WITH DOT BELOW; D; SAD 06FC; GHAIN WITH DOT BELOW; D; AIN + # Syriac characters + 0710; ALAPH; R; ALAPH 0712; BETH; D; BETH 0713; GAMAL; D; GAMAL diff --git a/lib/unicode/BidiMirr.txt b/lib/unicode/BidiMirr.txt new file mode 100644 index 0000000000..8ac5be98a1 --- /dev/null +++ b/lib/unicode/BidiMirr.txt @@ -0,0 +1,238 @@ +# BidiMirroring-1.txt +# +# This file is an informative supplement to the UnicodeData file. It +# lists characters that have the mirrored property +# where there is another Unicode character that typically has a glyph +# that is the mirror image of the original character's glyph. +# The repertoire covered by the file is Unicode 3.0.1. +# +# The file contains a list of lines with mappings from one code point +# to another one for character-based mirroring. +# Note that for "real" mirroring, a rendering engine needs to select +# appropriate alternative glyphs, and that many Unicode characters do not +# have a mirror-image Unicode character. +# +# Each mapping line contains two fields, separated by a semicolon (';'). +# Each of the two fields contains a code point represented as a +# variable-length hexadecimal value with 4 to 6 digits. +# A comment indicates where the characters are "BEST FIT" mirroring. +# +# Code points with the "mirrored" property but no appropriate mirrors are +# listed as comments at the end of the file. +# +# For information on bidi mirroring, see UTR #21: Bidirectional Algorithm, +# at http://www.unicode.org/unicode/reports/tr9/ +# +# Please address any comments to <errata@unicode.org>. +# Note that this is an archival address: messages will be checked, +# but do not expect an immediate response. +# +# This file was originally created by Markus Scherer +# +# ############################################################ + +0028; 0029 # LEFT PARENTHESIS +0029; 0028 # RIGHT PARENTHESIS +003C; 003E # LESS-THAN SIGN +003E; 003C # GREATER-THAN SIGN +005B; 005D # LEFT SQUARE BRACKET +005D; 005B # RIGHT SQUARE BRACKET +007B; 007D # LEFT CURLY BRACKET +007D; 007B # RIGHT CURLY BRACKET +00AB; 00BB # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK +00BB; 00AB # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK +2039; 203A # SINGLE LEFT-POINTING ANGLE QUOTATION MARK +203A; 2039 # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK +2045; 2046 # LEFT SQUARE BRACKET WITH QUILL +2046; 2045 # RIGHT SQUARE BRACKET WITH QUILL +207D; 207E # SUPERSCRIPT LEFT PARENTHESIS +207E; 207D # SUPERSCRIPT RIGHT PARENTHESIS +208D; 208E # SUBSCRIPT LEFT PARENTHESIS +208E; 208D # SUBSCRIPT RIGHT PARENTHESIS +2208; 220B # ELEMENT OF +2209; 220C # NOT AN ELEMENT OF +220A; 220D # SMALL ELEMENT OF +220B; 2208 # CONTAINS AS MEMBER +220C; 2209 # DOES NOT CONTAIN AS MEMBER +220D; 220A # SMALL CONTAINS AS MEMBER +223C; 223D # TILDE OPERATOR +223D; 223C # REVERSED TILDE +2243; 22CD # ASYMPTOTICALLY EQUAL TO +2252; 2253 # APPROXIMATELY EQUAL TO OR THE IMAGE OF +2253; 2252 # IMAGE OF OR APPROXIMATELY EQUAL TO +2254; 2255 # COLON EQUALS +2255; 2254 # EQUALS COLON +2264; 2265 # LESS-THAN OR EQUAL TO +2265; 2264 # GREATER-THAN OR EQUAL TO +2266; 2267 # LESS-THAN OVER EQUAL TO +2267; 2266 # GREATER-THAN OVER EQUAL TO +2268; 2269 # [BEST FIT] LESS-THAN BUT NOT EQUAL TO +2269; 2268 # [BEST FIT] GREATER-THAN BUT NOT EQUAL TO +226A; 226B # MUCH LESS-THAN +226B; 226A # MUCH GREATER-THAN +226E; 226F # [BEST FIT] NOT LESS-THAN +226F; 226E # [BEST FIT] NOT GREATER-THAN +2270; 2271 # [BEST FIT] NEITHER LESS-THAN NOR EQUAL TO +2271; 2270 # [BEST FIT] NEITHER GREATER-THAN NOR EQUAL TO +2272; 2273 # [BEST FIT] LESS-THAN OR EQUIVALENT TO +2273; 2272 # [BEST FIT] GREATER-THAN OR EQUIVALENT TO +2274; 2275 # [BEST FIT] NEITHER LESS-THAN NOR EQUIVALENT TO +2275; 2274 # [BEST FIT] NEITHER GREATER-THAN NOR EQUIVALENT TO +2276; 2277 # LESS-THAN OR GREATER-THAN +2277; 2276 # GREATER-THAN OR LESS-THAN +2278; 2279 # NEITHER LESS-THAN NOR GREATER-THAN +2279; 2278 # NEITHER GREATER-THAN NOR LESS-THAN +227A; 227B # PRECEDES +227B; 227A # SUCCEEDS +227C; 227D # PRECEDES OR EQUAL TO +227D; 227C # SUCCEEDS OR EQUAL TO +227E; 227F # [BEST FIT] PRECEDES OR EQUIVALENT TO +227F; 227E # [BEST FIT] SUCCEEDS OR EQUIVALENT TO +2280; 2281 # [BEST FIT] DOES NOT PRECEDE +2281; 2280 # [BEST FIT] DOES NOT SUCCEED +2282; 2283 # SUBSET OF +2283; 2282 # SUPERSET OF +2284; 2285 # [BEST FIT] NOT A SUBSET OF +2285; 2284 # [BEST FIT] NOT A SUPERSET OF +2286; 2287 # SUBSET OF OR EQUAL TO +2287; 2286 # SUPERSET OF OR EQUAL TO +2288; 2289 # [BEST FIT] NEITHER A SUBSET OF NOR EQUAL TO +2289; 2288 # [BEST FIT] NEITHER A SUPERSET OF NOR EQUAL TO +228A; 228B # [BEST FIT] SUBSET OF WITH NOT EQUAL TO +228B; 228A # [BEST FIT] SUPERSET OF WITH NOT EQUAL TO +228F; 2290 # SQUARE IMAGE OF +2290; 228F # SQUARE ORIGINAL OF +2291; 2292 # SQUARE IMAGE OF OR EQUAL TO +2292; 2291 # SQUARE ORIGINAL OF OR EQUAL TO +22A2; 22A3 # RIGHT TACK +22A3; 22A2 # LEFT TACK +22B0; 22B1 # PRECEDES UNDER RELATION +22B1; 22B0 # SUCCEEDS UNDER RELATION +22B2; 22B3 # NORMAL SUBGROUP OF +22B3; 22B2 # CONTAINS AS NORMAL SUBGROUP +22B4; 22B5 # NORMAL SUBGROUP OF OR EQUAL TO +22B5; 22B4 # CONTAINS AS NORMAL SUBGROUP OR EQUAL TO +22B6; 22B7 # ORIGINAL OF +22B7; 22B6 # IMAGE OF +22C9; 22CA # LEFT NORMAL FACTOR SEMIDIRECT PRODUCT +22CA; 22C9 # RIGHT NORMAL FACTOR SEMIDIRECT PRODUCT +22CB; 22CC # LEFT SEMIDIRECT PRODUCT +22CC; 22CB # RIGHT SEMIDIRECT PRODUCT +22CD; 2243 # REVERSED TILDE EQUALS +22D0; 22D1 # DOUBLE SUBSET +22D1; 22D0 # DOUBLE SUPERSET +22D6; 22D7 # LESS-THAN WITH DOT +22D7; 22D6 # GREATER-THAN WITH DOT +22D8; 22D9 # VERY MUCH LESS-THAN +22D9; 22D8 # VERY MUCH GREATER-THAN +22DA; 22DB # LESS-THAN EQUAL TO OR GREATER-THAN +22DB; 22DA # GREATER-THAN EQUAL TO OR LESS-THAN +22DC; 22DD # EQUAL TO OR LESS-THAN +22DD; 22DC # EQUAL TO OR GREATER-THAN +22DE; 22DF # EQUAL TO OR PRECEDES +22DF; 22DE # EQUAL TO OR SUCCEEDS +22E0; 22E1 # [BEST FIT] DOES NOT PRECEDE OR EQUAL +22E1; 22E0 # [BEST FIT] DOES NOT SUCCEED OR EQUAL +22E2; 22E3 # [BEST FIT] NOT SQUARE IMAGE OF OR EQUAL TO +22E3; 22E2 # [BEST FIT] NOT SQUARE ORIGINAL OF OR EQUAL TO +22E4; 22E5 # [BEST FIT] SQUARE IMAGE OF OR NOT EQUAL TO +22E5; 22E4 # [BEST FIT] SQUARE ORIGINAL OF OR NOT EQUAL TO +22E6; 22E7 # [BEST FIT] LESS-THAN BUT NOT EQUIVALENT TO +22E7; 22E6 # [BEST FIT] GREATER-THAN BUT NOT EQUIVALENT TO +22E8; 22E9 # [BEST FIT] PRECEDES BUT NOT EQUIVALENT TO +22E9; 22E8 # [BEST FIT] SUCCEEDS BUT NOT EQUIVALENT TO +22EA; 22EB # [BEST FIT] NOT NORMAL SUBGROUP OF +22EB; 22EA # [BEST FIT] DOES NOT CONTAIN AS NORMAL SUBGROUP +22EC; 22ED # [BEST FIT] NOT NORMAL SUBGROUP OF OR EQUAL TO +22ED; 22EC # [BEST FIT] DOES NOT CONTAIN AS NORMAL SUBGROUP OR EQUAL +22F0; 22F1 # UP RIGHT DIAGONAL ELLIPSIS +22F1; 22F0 # DOWN RIGHT DIAGONAL ELLIPSIS +2308; 2309 # LEFT CEILING +2309; 2308 # RIGHT CEILING +230A; 230B # LEFT FLOOR +230B; 230A # RIGHT FLOOR +2329; 232A # LEFT-POINTING ANGLE BRACKET +232A; 2329 # RIGHT-POINTING ANGLE BRACKET +3008; 3009 # LEFT ANGLE BRACKET +3009; 3008 # RIGHT ANGLE BRACKET +300A; 300B # LEFT DOUBLE ANGLE BRACKET +300B; 300A # RIGHT DOUBLE ANGLE BRACKET +300C; 300D # [BEST FIT] LEFT CORNER BRACKET +300D; 300C # [BEST FIT] RIGHT CORNER BRACKET +300E; 300F # [BEST FIT] LEFT WHITE CORNER BRACKET +300F; 300E # [BEST FIT] RIGHT WHITE CORNER BRACKET +3010; 3011 # LEFT BLACK LENTICULAR BRACKET +3011; 3010 # RIGHT BLACK LENTICULAR BRACKET +3014; 3015 # [BEST FIT] LEFT TORTOISE SHELL BRACKET +3015; 3014 # [BEST FIT] RIGHT TORTOISE SHELL BRACKET +3016; 3017 # LEFT WHITE LENTICULAR BRACKET +3017; 3016 # RIGHT WHITE LENTICULAR BRACKET +3018; 3019 # LEFT WHITE TORTOISE SHELL BRACKET +3019; 3018 # RIGHT WHITE TORTOISE SHELL BRACKET +301A; 301B # LEFT WHITE SQUARE BRACKET +301B; 301A # RIGHT WHITE SQUARE BRACKET + +# The following characters have no appropriate mirroring character + +# 2201; COMPLEMENT +# 2202; PARTIAL DIFFERENTIAL +# 2203; THERE EXISTS +# 2204; THERE DOES NOT EXIST +# 2211; N-ARY SUMMATION +# 2215; DIVISION SLASH +# 2216; SET MINUS +# 221A; SQUARE ROOT +# 221B; CUBE ROOT +# 221C; FOURTH ROOT +# 221D; PROPORTIONAL TO +# 221F; RIGHT ANGLE +# 2220; ANGLE +# 2221; MEASURED ANGLE +# 2222; SPHERICAL ANGLE +# 2224; DOES NOT DIVIDE +# 2226; NOT PARALLEL TO +# 222B; INTEGRAL +# 222C; DOUBLE INTEGRAL +# 222D; TRIPLE INTEGRAL +# 222E; CONTOUR INTEGRAL +# 222F; SURFACE INTEGRAL +# 2230; VOLUME INTEGRAL +# 2231; CLOCKWISE INTEGRAL +# 2232; CLOCKWISE CONTOUR INTEGRAL +# 2233; ANTICLOCKWISE CONTOUR INTEGRAL +# 2239; EXCESS +# 223B; HOMOTHETIC +# 223E; INVERTED LAZY S +# 223F; SINE WAVE +# 2240; WREATH PRODUCT +# 2241; NOT TILDE +# 2242; MINUS TILDE +# 2244; NOT ASYMPTOTICALLY EQUAL TO +# 2245; APPROXIMATELY EQUAL TO +# 2246; APPROXIMATELY BUT NOT ACTUALLY EQUAL TO +# 2247; NEITHER APPROXIMATELY NOR ACTUALLY EQUAL TO +# 2248; ALMOST EQUAL TO +# 2249; NOT ALMOST EQUAL TO +# 224A; ALMOST EQUAL OR EQUAL TO +# 224B; TRIPLE TILDE +# 224C; ALL EQUAL TO +# 225F; QUESTIONED EQUAL TO +# 2260; NOT EQUAL TO +# 2262; NOT IDENTICAL TO +# 228C; MULTISET +# 2298; CIRCLED DIVISION SLASH +# 22A6; ASSERTION +# 22A7; MODELS +# 22A8; TRUE +# 22A9; FORCES +# 22AA; TRIPLE VERTICAL BAR RIGHT TURNSTILE +# 22AB; DOUBLE VERTICAL BAR DOUBLE RIGHT TURNSTILE +# 22AC; DOES NOT PROVE +# 22AD; NOT TRUE +# 22AE; DOES NOT FORCE +# 22AF; NEGATED DOUBLE VERTICAL BAR DOUBLE RIGHT TURNSTILE +# 22B8; MULTIMAP +# 22BE; RIGHT ANGLE WITH ARC +# 22BF; RIGHT TRIANGLE +# 2320; TOP HALF INTEGRAL +# 2321; BOTTOM HALF INTEGRAL diff --git a/lib/unicode/Bidirectional.pl b/lib/unicode/Bidirectional.pl index 73898b8399..3cc2d0aafd 100644 --- a/lib/unicode/Bidirectional.pl +++ b/lib/unicode/Bidirectional.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 0000 0008 BN @@ -635,4 +635,6 @@ ffe5 ffe6 ET ffe8 ffee ON fff9 fffb BN fffc fffd ON +f0000 ffffd L +100000 10fffd L END diff --git a/lib/unicode/Block.pl b/lib/unicode/Block.pl index ee680b724d..2b5bfce3e1 100644 --- a/lib/unicode/Block.pl +++ b/lib/unicode/Block.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 0000 007F Basic Latin diff --git a/lib/unicode/CaseFold.txt b/lib/unicode/CaseFold.txt new file mode 100644 index 0000000000..6529c412f2 --- /dev/null +++ b/lib/unicode/CaseFold.txt @@ -0,0 +1,821 @@ +# CaseFolding-2.txt +# +# Case Folding Properties +# +# This file is a supplement to the UnicodeData file. +# It provides a case folding mapping generated from the Unicode Character Database. +# If all characters are mapped according to this mapping, then +# case differences (according to UnicodeData.txt and SpecialCasing.txt) +# are eliminated. +# +# For information on case folding, see +# UTR #21 Case Mappings, at http://www.unicode.org/unicode/reports/tr21/ +# +# These are informative character properties. +# +# Send comments to mark@unicode.org +# +# ================================================================================ +# Format +# ================================================================================ +# The entries in this file are in the following machine-readable format: +# +# <code>; <status>; <mapping>; # <name> +# +# The status is: +# L (for Lowercase) if the case mapping matches the standard 1-1 lowercase mapping +# E (for exception) if it does not. +# +# The mapping may consist of multiple characters. +# If so, they are separated by spaces. +# +# ================================================================= + +0041; L; 0061; #LATIN CAPITAL LETTER A +0042; L; 0062; #LATIN CAPITAL LETTER B +0043; L; 0063; #LATIN CAPITAL LETTER C +0044; L; 0064; #LATIN CAPITAL LETTER D +0045; L; 0065; #LATIN CAPITAL LETTER E +0046; L; 0066; #LATIN CAPITAL LETTER F +0047; L; 0067; #LATIN CAPITAL LETTER G +0048; L; 0068; #LATIN CAPITAL LETTER H +0049; L; 0069; #LATIN CAPITAL LETTER I +004A; L; 006A; #LATIN CAPITAL LETTER J +004B; L; 006B; #LATIN CAPITAL LETTER K +004C; L; 006C; #LATIN CAPITAL LETTER L +004D; L; 006D; #LATIN CAPITAL LETTER M +004E; L; 006E; #LATIN CAPITAL LETTER N +004F; L; 006F; #LATIN CAPITAL LETTER O +0050; L; 0070; #LATIN CAPITAL LETTER P +0051; L; 0071; #LATIN CAPITAL LETTER Q +0052; L; 0072; #LATIN CAPITAL LETTER R +0053; L; 0073; #LATIN CAPITAL LETTER S +0054; L; 0074; #LATIN CAPITAL LETTER T +0055; L; 0075; #LATIN CAPITAL LETTER U +0056; L; 0076; #LATIN CAPITAL LETTER V +0057; L; 0077; #LATIN CAPITAL LETTER W +0058; L; 0078; #LATIN CAPITAL LETTER X +0059; L; 0079; #LATIN CAPITAL LETTER Y +005A; L; 007A; #LATIN CAPITAL LETTER Z +00B5; E; 03BC; #MICRO SIGN +00C0; L; 00E0; #LATIN CAPITAL LETTER A WITH GRAVE +00C1; L; 00E1; #LATIN CAPITAL LETTER A WITH ACUTE +00C2; L; 00E2; #LATIN CAPITAL LETTER A WITH CIRCUMFLEX +00C3; L; 00E3; #LATIN CAPITAL LETTER A WITH TILDE +00C4; L; 00E4; #LATIN CAPITAL LETTER A WITH DIAERESIS +00C5; L; 00E5; #LATIN CAPITAL LETTER A WITH RING ABOVE +00C6; L; 00E6; #LATIN CAPITAL LETTER AE +00C7; L; 00E7; #LATIN CAPITAL LETTER C WITH CEDILLA +00C8; L; 00E8; #LATIN CAPITAL LETTER E WITH GRAVE +00C9; L; 00E9; #LATIN CAPITAL LETTER E WITH ACUTE +00CA; L; 00EA; #LATIN CAPITAL LETTER E WITH CIRCUMFLEX +00CB; L; 00EB; #LATIN CAPITAL LETTER E WITH DIAERESIS +00CC; L; 00EC; #LATIN CAPITAL LETTER I WITH GRAVE +00CD; L; 00ED; #LATIN CAPITAL LETTER I WITH ACUTE +00CE; L; 00EE; #LATIN CAPITAL LETTER I WITH CIRCUMFLEX +00CF; L; 00EF; #LATIN CAPITAL LETTER I WITH DIAERESIS +00D0; L; 00F0; #LATIN CAPITAL LETTER ETH +00D1; L; 00F1; #LATIN CAPITAL LETTER N WITH TILDE +00D2; L; 00F2; #LATIN CAPITAL LETTER O WITH GRAVE +00D3; L; 00F3; #LATIN CAPITAL LETTER O WITH ACUTE +00D4; L; 00F4; #LATIN CAPITAL LETTER O WITH CIRCUMFLEX +00D5; L; 00F5; #LATIN CAPITAL LETTER O WITH TILDE +00D6; L; 00F6; #LATIN CAPITAL LETTER O WITH DIAERESIS +00D8; L; 00F8; #LATIN CAPITAL LETTER O WITH STROKE +00D9; L; 00F9; #LATIN CAPITAL LETTER U WITH GRAVE +00DA; L; 00FA; #LATIN CAPITAL LETTER U WITH ACUTE +00DB; L; 00FB; #LATIN CAPITAL LETTER U WITH CIRCUMFLEX +00DC; L; 00FC; #LATIN CAPITAL LETTER U WITH DIAERESIS +00DD; L; 00FD; #LATIN CAPITAL LETTER Y WITH ACUTE +00DE; L; 00FE; #LATIN CAPITAL LETTER THORN +00DF; E; 0073 0073; #LATIN SMALL LETTER SHARP S +0100; L; 0101; #LATIN CAPITAL LETTER A WITH MACRON +0102; L; 0103; #LATIN CAPITAL LETTER A WITH BREVE +0104; L; 0105; #LATIN CAPITAL LETTER A WITH OGONEK +0106; L; 0107; #LATIN CAPITAL LETTER C WITH ACUTE +0108; L; 0109; #LATIN CAPITAL LETTER C WITH CIRCUMFLEX +010A; L; 010B; #LATIN CAPITAL LETTER C WITH DOT ABOVE +010C; L; 010D; #LATIN CAPITAL LETTER C WITH CARON +010E; L; 010F; #LATIN CAPITAL LETTER D WITH CARON +0110; L; 0111; #LATIN CAPITAL LETTER D WITH STROKE +0112; L; 0113; #LATIN CAPITAL LETTER E WITH MACRON +0114; L; 0115; #LATIN CAPITAL LETTER E WITH BREVE +0116; L; 0117; #LATIN CAPITAL LETTER E WITH DOT ABOVE +0118; L; 0119; #LATIN CAPITAL LETTER E WITH OGONEK +011A; L; 011B; #LATIN CAPITAL LETTER E WITH CARON +011C; L; 011D; #LATIN CAPITAL LETTER G WITH CIRCUMFLEX +011E; L; 011F; #LATIN CAPITAL LETTER G WITH BREVE +0120; L; 0121; #LATIN CAPITAL LETTER G WITH DOT ABOVE +0122; L; 0123; #LATIN CAPITAL LETTER G WITH CEDILLA +0124; L; 0125; #LATIN CAPITAL LETTER H WITH CIRCUMFLEX +0126; L; 0127; #LATIN CAPITAL LETTER H WITH STROKE +0128; L; 0129; #LATIN CAPITAL LETTER I WITH TILDE +012A; L; 012B; #LATIN CAPITAL LETTER I WITH MACRON +012C; L; 012D; #LATIN CAPITAL LETTER I WITH BREVE +012E; L; 012F; #LATIN CAPITAL LETTER I WITH OGONEK +0130; L; 0069; #LATIN CAPITAL LETTER I WITH DOT ABOVE +0131; E; 0069; #LATIN SMALL LETTER DOTLESS I +0132; L; 0133; #LATIN CAPITAL LIGATURE IJ +0134; L; 0135; #LATIN CAPITAL LETTER J WITH CIRCUMFLEX +0136; L; 0137; #LATIN CAPITAL LETTER K WITH CEDILLA +0139; L; 013A; #LATIN CAPITAL LETTER L WITH ACUTE +013B; L; 013C; #LATIN CAPITAL LETTER L WITH CEDILLA +013D; L; 013E; #LATIN CAPITAL LETTER L WITH CARON +013F; L; 0140; #LATIN CAPITAL LETTER L WITH MIDDLE DOT +0141; L; 0142; #LATIN CAPITAL LETTER L WITH STROKE +0143; L; 0144; #LATIN CAPITAL LETTER N WITH ACUTE +0145; L; 0146; #LATIN CAPITAL LETTER N WITH CEDILLA +0147; L; 0148; #LATIN CAPITAL LETTER N WITH CARON +0149; E; 02BC 006E; #LATIN SMALL LETTER N PRECEDED BY APOSTROPHE +014A; L; 014B; #LATIN CAPITAL LETTER ENG +014C; L; 014D; #LATIN CAPITAL LETTER O WITH MACRON +014E; L; 014F; #LATIN CAPITAL LETTER O WITH BREVE +0150; L; 0151; #LATIN CAPITAL LETTER O WITH DOUBLE ACUTE +0152; L; 0153; #LATIN CAPITAL LIGATURE OE +0154; L; 0155; #LATIN CAPITAL LETTER R WITH ACUTE +0156; L; 0157; #LATIN CAPITAL LETTER R WITH CEDILLA +0158; L; 0159; #LATIN CAPITAL LETTER R WITH CARON +015A; L; 015B; #LATIN CAPITAL LETTER S WITH ACUTE +015C; L; 015D; #LATIN CAPITAL LETTER S WITH CIRCUMFLEX +015E; L; 015F; #LATIN CAPITAL LETTER S WITH CEDILLA +0160; L; 0161; #LATIN CAPITAL LETTER S WITH CARON +0162; L; 0163; #LATIN CAPITAL LETTER T WITH CEDILLA +0164; L; 0165; #LATIN CAPITAL LETTER T WITH CARON +0166; L; 0167; #LATIN CAPITAL LETTER T WITH STROKE +0168; L; 0169; #LATIN CAPITAL LETTER U WITH TILDE +016A; L; 016B; #LATIN CAPITAL LETTER U WITH MACRON +016C; L; 016D; #LATIN CAPITAL LETTER U WITH BREVE +016E; L; 016F; #LATIN CAPITAL LETTER U WITH RING ABOVE +0170; L; 0171; #LATIN CAPITAL LETTER U WITH DOUBLE ACUTE +0172; L; 0173; #LATIN CAPITAL LETTER U WITH OGONEK +0174; L; 0175; #LATIN CAPITAL LETTER W WITH CIRCUMFLEX +0176; L; 0177; #LATIN CAPITAL LETTER Y WITH CIRCUMFLEX +0178; L; 00FF; #LATIN CAPITAL LETTER Y WITH DIAERESIS +0179; L; 017A; #LATIN CAPITAL LETTER Z WITH ACUTE +017B; L; 017C; #LATIN CAPITAL LETTER Z WITH DOT ABOVE +017D; L; 017E; #LATIN CAPITAL LETTER Z WITH CARON +017F; E; 0073; #LATIN SMALL LETTER LONG S +0181; L; 0253; #LATIN CAPITAL LETTER B WITH HOOK +0182; L; 0183; #LATIN CAPITAL LETTER B WITH TOPBAR +0184; L; 0185; #LATIN CAPITAL LETTER TONE SIX +0186; L; 0254; #LATIN CAPITAL LETTER OPEN O +0187; L; 0188; #LATIN CAPITAL LETTER C WITH HOOK +0189; L; 0256; #LATIN CAPITAL LETTER AFRICAN D +018A; L; 0257; #LATIN CAPITAL LETTER D WITH HOOK +018B; L; 018C; #LATIN CAPITAL LETTER D WITH TOPBAR +018E; L; 01DD; #LATIN CAPITAL LETTER REVERSED E +018F; L; 0259; #LATIN CAPITAL LETTER SCHWA +0190; L; 025B; #LATIN CAPITAL LETTER OPEN E +0191; L; 0192; #LATIN CAPITAL LETTER F WITH HOOK +0193; L; 0260; #LATIN CAPITAL LETTER G WITH HOOK +0194; L; 0263; #LATIN CAPITAL LETTER GAMMA +0196; L; 0269; #LATIN CAPITAL LETTER IOTA +0197; L; 0268; #LATIN CAPITAL LETTER I WITH STROKE +0198; L; 0199; #LATIN CAPITAL LETTER K WITH HOOK +019C; L; 026F; #LATIN CAPITAL LETTER TURNED M +019D; L; 0272; #LATIN CAPITAL LETTER N WITH LEFT HOOK +019F; L; 0275; #LATIN CAPITAL LETTER O WITH MIDDLE TILDE +01A0; L; 01A1; #LATIN CAPITAL LETTER O WITH HORN +01A2; L; 01A3; #LATIN CAPITAL LETTER OI +01A4; L; 01A5; #LATIN CAPITAL LETTER P WITH HOOK +01A6; L; 0280; #LATIN LETTER YR +01A7; L; 01A8; #LATIN CAPITAL LETTER TONE TWO +01A9; L; 0283; #LATIN CAPITAL LETTER ESH +01AC; L; 01AD; #LATIN CAPITAL LETTER T WITH HOOK +01AE; L; 0288; #LATIN CAPITAL LETTER T WITH RETROFLEX HOOK +01AF; L; 01B0; #LATIN CAPITAL LETTER U WITH HORN +01B1; L; 028A; #LATIN CAPITAL LETTER UPSILON +01B2; L; 028B; #LATIN CAPITAL LETTER V WITH HOOK +01B3; L; 01B4; #LATIN CAPITAL LETTER Y WITH HOOK +01B5; L; 01B6; #LATIN CAPITAL LETTER Z WITH STROKE +01B7; L; 0292; #LATIN CAPITAL LETTER EZH +01B8; L; 01B9; #LATIN CAPITAL LETTER EZH REVERSED +01BC; L; 01BD; #LATIN CAPITAL LETTER TONE FIVE +01C4; L; 01C6; #LATIN CAPITAL LETTER DZ WITH CARON +01C5; L; 01C6; #LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON +01C7; L; 01C9; #LATIN CAPITAL LETTER LJ +01C8; L; 01C9; #LATIN CAPITAL LETTER L WITH SMALL LETTER J +01CA; L; 01CC; #LATIN CAPITAL LETTER NJ +01CB; L; 01CC; #LATIN CAPITAL LETTER N WITH SMALL LETTER J +01CD; L; 01CE; #LATIN CAPITAL LETTER A WITH CARON +01CF; L; 01D0; #LATIN CAPITAL LETTER I WITH CARON +01D1; L; 01D2; #LATIN CAPITAL LETTER O WITH CARON +01D3; L; 01D4; #LATIN CAPITAL LETTER U WITH CARON +01D5; L; 01D6; #LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON +01D7; L; 01D8; #LATIN CAPITAL LETTER U WITH DIAERESIS AND ACUTE +01D9; L; 01DA; #LATIN CAPITAL LETTER U WITH DIAERESIS AND CARON +01DB; L; 01DC; #LATIN CAPITAL LETTER U WITH DIAERESIS AND GRAVE +01DE; L; 01DF; #LATIN CAPITAL LETTER A WITH DIAERESIS AND MACRON +01E0; L; 01E1; #LATIN CAPITAL LETTER A WITH DOT ABOVE AND MACRON +01E2; L; 01E3; #LATIN CAPITAL LETTER AE WITH MACRON +01E4; L; 01E5; #LATIN CAPITAL LETTER G WITH STROKE +01E6; L; 01E7; #LATIN CAPITAL LETTER G WITH CARON +01E8; L; 01E9; #LATIN CAPITAL LETTER K WITH CARON +01EA; L; 01EB; #LATIN CAPITAL LETTER O WITH OGONEK +01EC; L; 01ED; #LATIN CAPITAL LETTER O WITH OGONEK AND MACRON +01EE; L; 01EF; #LATIN CAPITAL LETTER EZH WITH CARON +01F0; E; 006A 030C; #LATIN SMALL LETTER J WITH CARON +01F1; L; 01F3; #LATIN CAPITAL LETTER DZ +01F2; L; 01F3; #LATIN CAPITAL LETTER D WITH SMALL LETTER Z +01F4; L; 01F5; #LATIN CAPITAL LETTER G WITH ACUTE +01F6; L; 0195; #LATIN CAPITAL LETTER HWAIR +01F7; L; 01BF; #LATIN CAPITAL LETTER WYNN +01F8; L; 01F9; #LATIN CAPITAL LETTER N WITH GRAVE +01FA; L; 01FB; #LATIN CAPITAL LETTER A WITH RING ABOVE AND ACUTE +01FC; L; 01FD; #LATIN CAPITAL LETTER AE WITH ACUTE +01FE; L; 01FF; #LATIN CAPITAL LETTER O WITH STROKE AND ACUTE +0200; L; 0201; #LATIN CAPITAL LETTER A WITH DOUBLE GRAVE +0202; L; 0203; #LATIN CAPITAL LETTER A WITH INVERTED BREVE +0204; L; 0205; #LATIN CAPITAL LETTER E WITH DOUBLE GRAVE +0206; L; 0207; #LATIN CAPITAL LETTER E WITH INVERTED BREVE +0208; L; 0209; #LATIN CAPITAL LETTER I WITH DOUBLE GRAVE +020A; L; 020B; #LATIN CAPITAL LETTER I WITH INVERTED BREVE +020C; L; 020D; #LATIN CAPITAL LETTER O WITH DOUBLE GRAVE +020E; L; 020F; #LATIN CAPITAL LETTER O WITH INVERTED BREVE +0210; L; 0211; #LATIN CAPITAL LETTER R WITH DOUBLE GRAVE +0212; L; 0213; #LATIN CAPITAL LETTER R WITH INVERTED BREVE +0214; L; 0215; #LATIN CAPITAL LETTER U WITH DOUBLE GRAVE +0216; L; 0217; #LATIN CAPITAL LETTER U WITH INVERTED BREVE +0218; L; 0219; #LATIN CAPITAL LETTER S WITH COMMA BELOW +021A; L; 021B; #LATIN CAPITAL LETTER T WITH COMMA BELOW +021C; L; 021D; #LATIN CAPITAL LETTER YOGH +021E; L; 021F; #LATIN CAPITAL LETTER H WITH CARON +0222; L; 0223; #LATIN CAPITAL LETTER OU +0224; L; 0225; #LATIN CAPITAL LETTER Z WITH HOOK +0226; L; 0227; #LATIN CAPITAL LETTER A WITH DOT ABOVE +0228; L; 0229; #LATIN CAPITAL LETTER E WITH CEDILLA +022A; L; 022B; #LATIN CAPITAL LETTER O WITH DIAERESIS AND MACRON +022C; L; 022D; #LATIN CAPITAL LETTER O WITH TILDE AND MACRON +022E; L; 022F; #LATIN CAPITAL LETTER O WITH DOT ABOVE +0230; L; 0231; #LATIN CAPITAL LETTER O WITH DOT ABOVE AND MACRON +0232; L; 0233; #LATIN CAPITAL LETTER Y WITH MACRON +0345; E; 03B9; #COMBINING GREEK YPOGEGRAMMENI +0386; L; 03AC; #GREEK CAPITAL LETTER ALPHA WITH TONOS +0388; L; 03AD; #GREEK CAPITAL LETTER EPSILON WITH TONOS +0389; L; 03AE; #GREEK CAPITAL LETTER ETA WITH TONOS +038A; L; 03AF; #GREEK CAPITAL LETTER IOTA WITH TONOS +038C; L; 03CC; #GREEK CAPITAL LETTER OMICRON WITH TONOS +038E; L; 03CD; #GREEK CAPITAL LETTER UPSILON WITH TONOS +038F; L; 03CE; #GREEK CAPITAL LETTER OMEGA WITH TONOS +0390; E; 03B9 0308 0301; #GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS +0391; L; 03B1; #GREEK CAPITAL LETTER ALPHA +0392; L; 03B2; #GREEK CAPITAL LETTER BETA +0393; L; 03B3; #GREEK CAPITAL LETTER GAMMA +0394; L; 03B4; #GREEK CAPITAL LETTER DELTA +0395; L; 03B5; #GREEK CAPITAL LETTER EPSILON +0396; L; 03B6; #GREEK CAPITAL LETTER ZETA +0397; L; 03B7; #GREEK CAPITAL LETTER ETA +0398; L; 03B8; #GREEK CAPITAL LETTER THETA +0399; L; 03B9; #GREEK CAPITAL LETTER IOTA +039A; L; 03BA; #GREEK CAPITAL LETTER KAPPA +039B; L; 03BB; #GREEK CAPITAL LETTER LAMDA +039C; L; 03BC; #GREEK CAPITAL LETTER MU +039D; L; 03BD; #GREEK CAPITAL LETTER NU +039E; L; 03BE; #GREEK CAPITAL LETTER XI +039F; L; 03BF; #GREEK CAPITAL LETTER OMICRON +03A0; L; 03C0; #GREEK CAPITAL LETTER PI +03A1; L; 03C1; #GREEK CAPITAL LETTER RHO +03A3; E; 03C2; #GREEK CAPITAL LETTER SIGMA +03A4; L; 03C4; #GREEK CAPITAL LETTER TAU +03A5; L; 03C5; #GREEK CAPITAL LETTER UPSILON +03A6; L; 03C6; #GREEK CAPITAL LETTER PHI +03A7; L; 03C7; #GREEK CAPITAL LETTER CHI +03A8; L; 03C8; #GREEK CAPITAL LETTER PSI +03A9; L; 03C9; #GREEK CAPITAL LETTER OMEGA +03AA; L; 03CA; #GREEK CAPITAL LETTER IOTA WITH DIALYTIKA +03AB; L; 03CB; #GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA +03B0; E; 03C5 0308 0301; #GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS +03C2; L; 03C2; #GREEK SMALL LETTER FINAL SIGMA +03C3; E; 03C2; #GREEK SMALL LETTER SIGMA +03D0; E; 03B2; #GREEK BETA SYMBOL +03D1; E; 03B8; #GREEK THETA SYMBOL +03D5; E; 03C6; #GREEK PHI SYMBOL +03D6; E; 03C0; #GREEK PI SYMBOL +03DA; L; 03DB; #GREEK LETTER STIGMA +03DC; L; 03DD; #GREEK LETTER DIGAMMA +03DE; L; 03DF; #GREEK LETTER KOPPA +03E0; L; 03E1; #GREEK LETTER SAMPI +03E2; L; 03E3; #COPTIC CAPITAL LETTER SHEI +03E4; L; 03E5; #COPTIC CAPITAL LETTER FEI +03E6; L; 03E7; #COPTIC CAPITAL LETTER KHEI +03E8; L; 03E9; #COPTIC CAPITAL LETTER HORI +03EA; L; 03EB; #COPTIC CAPITAL LETTER GANGIA +03EC; L; 03ED; #COPTIC CAPITAL LETTER SHIMA +03EE; L; 03EF; #COPTIC CAPITAL LETTER DEI +03F0; E; 03BA; #GREEK KAPPA SYMBOL +03F1; E; 03C1; #GREEK RHO SYMBOL +03F2; E; 03C2; #GREEK LUNATE SIGMA SYMBOL +0400; L; 0450; #CYRILLIC CAPITAL LETTER IE WITH GRAVE +0401; L; 0451; #CYRILLIC CAPITAL LETTER IO +0402; L; 0452; #CYRILLIC CAPITAL LETTER DJE +0403; L; 0453; #CYRILLIC CAPITAL LETTER GJE +0404; L; 0454; #CYRILLIC CAPITAL LETTER UKRAINIAN IE +0405; L; 0455; #CYRILLIC CAPITAL LETTER DZE +0406; L; 0456; #CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I +0407; L; 0457; #CYRILLIC CAPITAL LETTER YI +0408; L; 0458; #CYRILLIC CAPITAL LETTER JE +0409; L; 0459; #CYRILLIC CAPITAL LETTER LJE +040A; L; 045A; #CYRILLIC CAPITAL LETTER NJE +040B; L; 045B; #CYRILLIC CAPITAL LETTER TSHE +040C; L; 045C; #CYRILLIC CAPITAL LETTER KJE +040D; L; 045D; #CYRILLIC CAPITAL LETTER I WITH GRAVE +040E; L; 045E; #CYRILLIC CAPITAL LETTER SHORT U +040F; L; 045F; #CYRILLIC CAPITAL LETTER DZHE +0410; L; 0430; #CYRILLIC CAPITAL LETTER A +0411; L; 0431; #CYRILLIC CAPITAL LETTER BE +0412; L; 0432; #CYRILLIC CAPITAL LETTER VE +0413; L; 0433; #CYRILLIC CAPITAL LETTER GHE +0414; L; 0434; #CYRILLIC CAPITAL LETTER DE +0415; L; 0435; #CYRILLIC CAPITAL LETTER IE +0416; L; 0436; #CYRILLIC CAPITAL LETTER ZHE +0417; L; 0437; #CYRILLIC CAPITAL LETTER ZE +0418; L; 0438; #CYRILLIC CAPITAL LETTER I +0419; L; 0439; #CYRILLIC CAPITAL LETTER SHORT I +041A; L; 043A; #CYRILLIC CAPITAL LETTER KA +041B; L; 043B; #CYRILLIC CAPITAL LETTER EL +041C; L; 043C; #CYRILLIC CAPITAL LETTER EM +041D; L; 043D; #CYRILLIC CAPITAL LETTER EN +041E; L; 043E; #CYRILLIC CAPITAL LETTER O +041F; L; 043F; #CYRILLIC CAPITAL LETTER PE +0420; L; 0440; #CYRILLIC CAPITAL LETTER ER +0421; L; 0441; #CYRILLIC CAPITAL LETTER ES +0422; L; 0442; #CYRILLIC CAPITAL LETTER TE +0423; L; 0443; #CYRILLIC CAPITAL LETTER U +0424; L; 0444; #CYRILLIC CAPITAL LETTER EF +0425; L; 0445; #CYRILLIC CAPITAL LETTER HA +0426; L; 0446; #CYRILLIC CAPITAL LETTER TSE +0427; L; 0447; #CYRILLIC CAPITAL LETTER CHE +0428; L; 0448; #CYRILLIC CAPITAL LETTER SHA +0429; L; 0449; #CYRILLIC CAPITAL LETTER SHCHA +042A; L; 044A; #CYRILLIC CAPITAL LETTER HARD SIGN +042B; L; 044B; #CYRILLIC CAPITAL LETTER YERU +042C; L; 044C; #CYRILLIC CAPITAL LETTER SOFT SIGN +042D; L; 044D; #CYRILLIC CAPITAL LETTER E +042E; L; 044E; #CYRILLIC CAPITAL LETTER YU +042F; L; 044F; #CYRILLIC CAPITAL LETTER YA +0460; L; 0461; #CYRILLIC CAPITAL LETTER OMEGA +0462; L; 0463; #CYRILLIC CAPITAL LETTER YAT +0464; L; 0465; #CYRILLIC CAPITAL LETTER IOTIFIED E +0466; L; 0467; #CYRILLIC CAPITAL LETTER LITTLE YUS +0468; L; 0469; #CYRILLIC CAPITAL LETTER IOTIFIED LITTLE YUS +046A; L; 046B; #CYRILLIC CAPITAL LETTER BIG YUS +046C; L; 046D; #CYRILLIC CAPITAL LETTER IOTIFIED BIG YUS +046E; L; 046F; #CYRILLIC CAPITAL LETTER KSI +0470; L; 0471; #CYRILLIC CAPITAL LETTER PSI +0472; L; 0473; #CYRILLIC CAPITAL LETTER FITA +0474; L; 0475; #CYRILLIC CAPITAL LETTER IZHITSA +0476; L; 0477; #CYRILLIC CAPITAL LETTER IZHITSA WITH DOUBLE GRAVE ACCENT +0478; L; 0479; #CYRILLIC CAPITAL LETTER UK +047A; L; 047B; #CYRILLIC CAPITAL LETTER ROUND OMEGA +047C; L; 047D; #CYRILLIC CAPITAL LETTER OMEGA WITH TITLO +047E; L; 047F; #CYRILLIC CAPITAL LETTER OT +0480; L; 0481; #CYRILLIC CAPITAL LETTER KOPPA +048C; L; 048D; #CYRILLIC CAPITAL LETTER SEMISOFT SIGN +048E; L; 048F; #CYRILLIC CAPITAL LETTER ER WITH TICK +0490; L; 0491; #CYRILLIC CAPITAL LETTER GHE WITH UPTURN +0492; L; 0493; #CYRILLIC CAPITAL LETTER GHE WITH STROKE +0494; L; 0495; #CYRILLIC CAPITAL LETTER GHE WITH MIDDLE HOOK +0496; L; 0497; #CYRILLIC CAPITAL LETTER ZHE WITH DESCENDER +0498; L; 0499; #CYRILLIC CAPITAL LETTER ZE WITH DESCENDER +049A; L; 049B; #CYRILLIC CAPITAL LETTER KA WITH DESCENDER +049C; L; 049D; #CYRILLIC CAPITAL LETTER KA WITH VERTICAL STROKE +049E; L; 049F; #CYRILLIC CAPITAL LETTER KA WITH STROKE +04A0; L; 04A1; #CYRILLIC CAPITAL LETTER BASHKIR KA +04A2; L; 04A3; #CYRILLIC CAPITAL LETTER EN WITH DESCENDER +04A4; L; 04A5; #CYRILLIC CAPITAL LIGATURE EN GHE +04A6; L; 04A7; #CYRILLIC CAPITAL LETTER PE WITH MIDDLE HOOK +04A8; L; 04A9; #CYRILLIC CAPITAL LETTER ABKHASIAN HA +04AA; L; 04AB; #CYRILLIC CAPITAL LETTER ES WITH DESCENDER +04AC; L; 04AD; #CYRILLIC CAPITAL LETTER TE WITH DESCENDER +04AE; L; 04AF; #CYRILLIC CAPITAL LETTER STRAIGHT U +04B0; L; 04B1; #CYRILLIC CAPITAL LETTER STRAIGHT U WITH STROKE +04B2; L; 04B3; #CYRILLIC CAPITAL LETTER HA WITH DESCENDER +04B4; L; 04B5; #CYRILLIC CAPITAL LIGATURE TE TSE +04B6; L; 04B7; #CYRILLIC CAPITAL LETTER CHE WITH DESCENDER +04B8; L; 04B9; #CYRILLIC CAPITAL LETTER CHE WITH VERTICAL STROKE +04BA; L; 04BB; #CYRILLIC CAPITAL LETTER SHHA +04BC; L; 04BD; #CYRILLIC CAPITAL LETTER ABKHASIAN CHE +04BE; L; 04BF; #CYRILLIC CAPITAL LETTER ABKHASIAN CHE WITH DESCENDER +04C1; L; 04C2; #CYRILLIC CAPITAL LETTER ZHE WITH BREVE +04C3; L; 04C4; #CYRILLIC CAPITAL LETTER KA WITH HOOK +04C7; L; 04C8; #CYRILLIC CAPITAL LETTER EN WITH HOOK +04CB; L; 04CC; #CYRILLIC CAPITAL LETTER KHAKASSIAN CHE +04D0; L; 04D1; #CYRILLIC CAPITAL LETTER A WITH BREVE +04D2; L; 04D3; #CYRILLIC CAPITAL LETTER A WITH DIAERESIS +04D4; L; 04D5; #CYRILLIC CAPITAL LIGATURE A IE +04D6; L; 04D7; #CYRILLIC CAPITAL LETTER IE WITH BREVE +04D8; L; 04D9; #CYRILLIC CAPITAL LETTER SCHWA +04DA; L; 04DB; #CYRILLIC CAPITAL LETTER SCHWA WITH DIAERESIS +04DC; L; 04DD; #CYRILLIC CAPITAL LETTER ZHE WITH DIAERESIS +04DE; L; 04DF; #CYRILLIC CAPITAL LETTER ZE WITH DIAERESIS +04E0; L; 04E1; #CYRILLIC CAPITAL LETTER ABKHASIAN DZE +04E2; L; 04E3; #CYRILLIC CAPITAL LETTER I WITH MACRON +04E4; L; 04E5; #CYRILLIC CAPITAL LETTER I WITH DIAERESIS +04E6; L; 04E7; #CYRILLIC CAPITAL LETTER O WITH DIAERESIS +04E8; L; 04E9; #CYRILLIC CAPITAL LETTER BARRED O +04EA; L; 04EB; #CYRILLIC CAPITAL LETTER BARRED O WITH DIAERESIS +04EC; L; 04ED; #CYRILLIC CAPITAL LETTER E WITH DIAERESIS +04EE; L; 04EF; #CYRILLIC CAPITAL LETTER U WITH MACRON +04F0; L; 04F1; #CYRILLIC CAPITAL LETTER U WITH DIAERESIS +04F2; L; 04F3; #CYRILLIC CAPITAL LETTER U WITH DOUBLE ACUTE +04F4; L; 04F5; #CYRILLIC CAPITAL LETTER CHE WITH DIAERESIS +04F8; L; 04F9; #CYRILLIC CAPITAL LETTER YERU WITH DIAERESIS +0531; L; 0561; #ARMENIAN CAPITAL LETTER AYB +0532; L; 0562; #ARMENIAN CAPITAL LETTER BEN +0533; L; 0563; #ARMENIAN CAPITAL LETTER GIM +0534; L; 0564; #ARMENIAN CAPITAL LETTER DA +0535; L; 0565; #ARMENIAN CAPITAL LETTER ECH +0536; L; 0566; #ARMENIAN CAPITAL LETTER ZA +0537; L; 0567; #ARMENIAN CAPITAL LETTER EH +0538; L; 0568; #ARMENIAN CAPITAL LETTER ET +0539; L; 0569; #ARMENIAN CAPITAL LETTER TO +053A; L; 056A; #ARMENIAN CAPITAL LETTER ZHE +053B; L; 056B; #ARMENIAN CAPITAL LETTER INI +053C; L; 056C; #ARMENIAN CAPITAL LETTER LIWN +053D; L; 056D; #ARMENIAN CAPITAL LETTER XEH +053E; L; 056E; #ARMENIAN CAPITAL LETTER CA +053F; L; 056F; #ARMENIAN CAPITAL LETTER KEN +0540; L; 0570; #ARMENIAN CAPITAL LETTER HO +0541; L; 0571; #ARMENIAN CAPITAL LETTER JA +0542; L; 0572; #ARMENIAN CAPITAL LETTER GHAD +0543; L; 0573; #ARMENIAN CAPITAL LETTER CHEH +0544; L; 0574; #ARMENIAN CAPITAL LETTER MEN +0545; L; 0575; #ARMENIAN CAPITAL LETTER YI +0546; L; 0576; #ARMENIAN CAPITAL LETTER NOW +0547; L; 0577; #ARMENIAN CAPITAL LETTER SHA +0548; L; 0578; #ARMENIAN CAPITAL LETTER VO +0549; L; 0579; #ARMENIAN CAPITAL LETTER CHA +054A; L; 057A; #ARMENIAN CAPITAL LETTER PEH +054B; L; 057B; #ARMENIAN CAPITAL LETTER JHEH +054C; L; 057C; #ARMENIAN CAPITAL LETTER RA +054D; L; 057D; #ARMENIAN CAPITAL LETTER SEH +054E; L; 057E; #ARMENIAN CAPITAL LETTER VEW +054F; L; 057F; #ARMENIAN CAPITAL LETTER TIWN +0550; L; 0580; #ARMENIAN CAPITAL LETTER REH +0551; L; 0581; #ARMENIAN CAPITAL LETTER CO +0552; L; 0582; #ARMENIAN CAPITAL LETTER YIWN +0553; L; 0583; #ARMENIAN CAPITAL LETTER PIWR +0554; L; 0584; #ARMENIAN CAPITAL LETTER KEH +0555; L; 0585; #ARMENIAN CAPITAL LETTER OH +0556; L; 0586; #ARMENIAN CAPITAL LETTER FEH +0587; E; 0565 0582; #ARMENIAN SMALL LIGATURE ECH YIWN +1E00; L; 1E01; #LATIN CAPITAL LETTER A WITH RING BELOW +1E02; L; 1E03; #LATIN CAPITAL LETTER B WITH DOT ABOVE +1E04; L; 1E05; #LATIN CAPITAL LETTER B WITH DOT BELOW +1E06; L; 1E07; #LATIN CAPITAL LETTER B WITH LINE BELOW +1E08; L; 1E09; #LATIN CAPITAL LETTER C WITH CEDILLA AND ACUTE +1E0A; L; 1E0B; #LATIN CAPITAL LETTER D WITH DOT ABOVE +1E0C; L; 1E0D; #LATIN CAPITAL LETTER D WITH DOT BELOW +1E0E; L; 1E0F; #LATIN CAPITAL LETTER D WITH LINE BELOW +1E10; L; 1E11; #LATIN CAPITAL LETTER D WITH CEDILLA +1E12; L; 1E13; #LATIN CAPITAL LETTER D WITH CIRCUMFLEX BELOW +1E14; L; 1E15; #LATIN CAPITAL LETTER E WITH MACRON AND GRAVE +1E16; L; 1E17; #LATIN CAPITAL LETTER E WITH MACRON AND ACUTE +1E18; L; 1E19; #LATIN CAPITAL LETTER E WITH CIRCUMFLEX BELOW +1E1A; L; 1E1B; #LATIN CAPITAL LETTER E WITH TILDE BELOW +1E1C; L; 1E1D; #LATIN CAPITAL LETTER E WITH CEDILLA AND BREVE +1E1E; L; 1E1F; #LATIN CAPITAL LETTER F WITH DOT ABOVE +1E20; L; 1E21; #LATIN CAPITAL LETTER G WITH MACRON +1E22; L; 1E23; #LATIN CAPITAL LETTER H WITH DOT ABOVE +1E24; L; 1E25; #LATIN CAPITAL LETTER H WITH DOT BELOW +1E26; L; 1E27; #LATIN CAPITAL LETTER H WITH DIAERESIS +1E28; L; 1E29; #LATIN CAPITAL LETTER H WITH CEDILLA +1E2A; L; 1E2B; #LATIN CAPITAL LETTER H WITH BREVE BELOW +1E2C; L; 1E2D; #LATIN CAPITAL LETTER I WITH TILDE BELOW +1E2E; L; 1E2F; #LATIN CAPITAL LETTER I WITH DIAERESIS AND ACUTE +1E30; L; 1E31; #LATIN CAPITAL LETTER K WITH ACUTE +1E32; L; 1E33; #LATIN CAPITAL LETTER K WITH DOT BELOW +1E34; L; 1E35; #LATIN CAPITAL LETTER K WITH LINE BELOW +1E36; L; 1E37; #LATIN CAPITAL LETTER L WITH DOT BELOW +1E38; L; 1E39; #LATIN CAPITAL LETTER L WITH DOT BELOW AND MACRON +1E3A; L; 1E3B; #LATIN CAPITAL LETTER L WITH LINE BELOW +1E3C; L; 1E3D; #LATIN CAPITAL LETTER L WITH CIRCUMFLEX BELOW +1E3E; L; 1E3F; #LATIN CAPITAL LETTER M WITH ACUTE +1E40; L; 1E41; #LATIN CAPITAL LETTER M WITH DOT ABOVE +1E42; L; 1E43; #LATIN CAPITAL LETTER M WITH DOT BELOW +1E44; L; 1E45; #LATIN CAPITAL LETTER N WITH DOT ABOVE +1E46; L; 1E47; #LATIN CAPITAL LETTER N WITH DOT BELOW +1E48; L; 1E49; #LATIN CAPITAL LETTER N WITH LINE BELOW +1E4A; L; 1E4B; #LATIN CAPITAL LETTER N WITH CIRCUMFLEX BELOW +1E4C; L; 1E4D; #LATIN CAPITAL LETTER O WITH TILDE AND ACUTE +1E4E; L; 1E4F; #LATIN CAPITAL LETTER O WITH TILDE AND DIAERESIS +1E50; L; 1E51; #LATIN CAPITAL LETTER O WITH MACRON AND GRAVE +1E52; L; 1E53; #LATIN CAPITAL LETTER O WITH MACRON AND ACUTE +1E54; L; 1E55; #LATIN CAPITAL LETTER P WITH ACUTE +1E56; L; 1E57; #LATIN CAPITAL LETTER P WITH DOT ABOVE +1E58; L; 1E59; #LATIN CAPITAL LETTER R WITH DOT ABOVE +1E5A; L; 1E5B; #LATIN CAPITAL LETTER R WITH DOT BELOW +1E5C; L; 1E5D; #LATIN CAPITAL LETTER R WITH DOT BELOW AND MACRON +1E5E; L; 1E5F; #LATIN CAPITAL LETTER R WITH LINE BELOW +1E60; L; 1E61; #LATIN CAPITAL LETTER S WITH DOT ABOVE +1E62; L; 1E63; #LATIN CAPITAL LETTER S WITH DOT BELOW +1E64; L; 1E65; #LATIN CAPITAL LETTER S WITH ACUTE AND DOT ABOVE +1E66; L; 1E67; #LATIN CAPITAL LETTER S WITH CARON AND DOT ABOVE +1E68; L; 1E69; #LATIN CAPITAL LETTER S WITH DOT BELOW AND DOT ABOVE +1E6A; L; 1E6B; #LATIN CAPITAL LETTER T WITH DOT ABOVE +1E6C; L; 1E6D; #LATIN CAPITAL LETTER T WITH DOT BELOW +1E6E; L; 1E6F; #LATIN CAPITAL LETTER T WITH LINE BELOW +1E70; L; 1E71; #LATIN CAPITAL LETTER T WITH CIRCUMFLEX BELOW +1E72; L; 1E73; #LATIN CAPITAL LETTER U WITH DIAERESIS BELOW +1E74; L; 1E75; #LATIN CAPITAL LETTER U WITH TILDE BELOW +1E76; L; 1E77; #LATIN CAPITAL LETTER U WITH CIRCUMFLEX BELOW +1E78; L; 1E79; #LATIN CAPITAL LETTER U WITH TILDE AND ACUTE +1E7A; L; 1E7B; #LATIN CAPITAL LETTER U WITH MACRON AND DIAERESIS +1E7C; L; 1E7D; #LATIN CAPITAL LETTER V WITH TILDE +1E7E; L; 1E7F; #LATIN CAPITAL LETTER V WITH DOT BELOW +1E80; L; 1E81; #LATIN CAPITAL LETTER W WITH GRAVE +1E82; L; 1E83; #LATIN CAPITAL LETTER W WITH ACUTE +1E84; L; 1E85; #LATIN CAPITAL LETTER W WITH DIAERESIS +1E86; L; 1E87; #LATIN CAPITAL LETTER W WITH DOT ABOVE +1E88; L; 1E89; #LATIN CAPITAL LETTER W WITH DOT BELOW +1E8A; L; 1E8B; #LATIN CAPITAL LETTER X WITH DOT ABOVE +1E8C; L; 1E8D; #LATIN CAPITAL LETTER X WITH DIAERESIS +1E8E; L; 1E8F; #LATIN CAPITAL LETTER Y WITH DOT ABOVE +1E90; L; 1E91; #LATIN CAPITAL LETTER Z WITH CIRCUMFLEX +1E92; L; 1E93; #LATIN CAPITAL LETTER Z WITH DOT BELOW +1E94; L; 1E95; #LATIN CAPITAL LETTER Z WITH LINE BELOW +1E96; E; 0068 0331; #LATIN SMALL LETTER H WITH LINE BELOW +1E97; E; 0074 0308; #LATIN SMALL LETTER T WITH DIAERESIS +1E98; E; 0077 030A; #LATIN SMALL LETTER W WITH RING ABOVE +1E99; E; 0079 030A; #LATIN SMALL LETTER Y WITH RING ABOVE +1E9A; E; 0061 02BE; #LATIN SMALL LETTER A WITH RIGHT HALF RING +1E9B; E; 1E61; #LATIN SMALL LETTER LONG S WITH DOT ABOVE +1EA0; L; 1EA1; #LATIN CAPITAL LETTER A WITH DOT BELOW +1EA2; L; 1EA3; #LATIN CAPITAL LETTER A WITH HOOK ABOVE +1EA4; L; 1EA5; #LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND ACUTE +1EA6; L; 1EA7; #LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND GRAVE +1EA8; L; 1EA9; #LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND HOOK ABOVE +1EAA; L; 1EAB; #LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND TILDE +1EAC; L; 1EAD; #LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND DOT BELOW +1EAE; L; 1EAF; #LATIN CAPITAL LETTER A WITH BREVE AND ACUTE +1EB0; L; 1EB1; #LATIN CAPITAL LETTER A WITH BREVE AND GRAVE +1EB2; L; 1EB3; #LATIN CAPITAL LETTER A WITH BREVE AND HOOK ABOVE +1EB4; L; 1EB5; #LATIN CAPITAL LETTER A WITH BREVE AND TILDE +1EB6; L; 1EB7; #LATIN CAPITAL LETTER A WITH BREVE AND DOT BELOW +1EB8; L; 1EB9; #LATIN CAPITAL LETTER E WITH DOT BELOW +1EBA; L; 1EBB; #LATIN CAPITAL LETTER E WITH HOOK ABOVE +1EBC; L; 1EBD; #LATIN CAPITAL LETTER E WITH TILDE +1EBE; L; 1EBF; #LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND ACUTE +1EC0; L; 1EC1; #LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND GRAVE +1EC2; L; 1EC3; #LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND HOOK ABOVE +1EC4; L; 1EC5; #LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND TILDE +1EC6; L; 1EC7; #LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND DOT BELOW +1EC8; L; 1EC9; #LATIN CAPITAL LETTER I WITH HOOK ABOVE +1ECA; L; 1ECB; #LATIN CAPITAL LETTER I WITH DOT BELOW +1ECC; L; 1ECD; #LATIN CAPITAL LETTER O WITH DOT BELOW +1ECE; L; 1ECF; #LATIN CAPITAL LETTER O WITH HOOK ABOVE +1ED0; L; 1ED1; #LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND ACUTE +1ED2; L; 1ED3; #LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND GRAVE +1ED4; L; 1ED5; #LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND HOOK ABOVE +1ED6; L; 1ED7; #LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND TILDE +1ED8; L; 1ED9; #LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND DOT BELOW +1EDA; L; 1EDB; #LATIN CAPITAL LETTER O WITH HORN AND ACUTE +1EDC; L; 1EDD; #LATIN CAPITAL LETTER O WITH HORN AND GRAVE +1EDE; L; 1EDF; #LATIN CAPITAL LETTER O WITH HORN AND HOOK ABOVE +1EE0; L; 1EE1; #LATIN CAPITAL LETTER O WITH HORN AND TILDE +1EE2; L; 1EE3; #LATIN CAPITAL LETTER O WITH HORN AND DOT BELOW +1EE4; L; 1EE5; #LATIN CAPITAL LETTER U WITH DOT BELOW +1EE6; L; 1EE7; #LATIN CAPITAL LETTER U WITH HOOK ABOVE +1EE8; L; 1EE9; #LATIN CAPITAL LETTER U WITH HORN AND ACUTE +1EEA; L; 1EEB; #LATIN CAPITAL LETTER U WITH HORN AND GRAVE +1EEC; L; 1EED; #LATIN CAPITAL LETTER U WITH HORN AND HOOK ABOVE +1EEE; L; 1EEF; #LATIN CAPITAL LETTER U WITH HORN AND TILDE +1EF0; L; 1EF1; #LATIN CAPITAL LETTER U WITH HORN AND DOT BELOW +1EF2; L; 1EF3; #LATIN CAPITAL LETTER Y WITH GRAVE +1EF4; L; 1EF5; #LATIN CAPITAL LETTER Y WITH DOT BELOW +1EF6; L; 1EF7; #LATIN CAPITAL LETTER Y WITH HOOK ABOVE +1EF8; L; 1EF9; #LATIN CAPITAL LETTER Y WITH TILDE +1F08; L; 1F00; #GREEK CAPITAL LETTER ALPHA WITH PSILI +1F09; L; 1F01; #GREEK CAPITAL LETTER ALPHA WITH DASIA +1F0A; L; 1F02; #GREEK CAPITAL LETTER ALPHA WITH PSILI AND VARIA +1F0B; L; 1F03; #GREEK CAPITAL LETTER ALPHA WITH DASIA AND VARIA +1F0C; L; 1F04; #GREEK CAPITAL LETTER ALPHA WITH PSILI AND OXIA +1F0D; L; 1F05; #GREEK CAPITAL LETTER ALPHA WITH DASIA AND OXIA +1F0E; L; 1F06; #GREEK CAPITAL LETTER ALPHA WITH PSILI AND PERISPOMENI +1F0F; L; 1F07; #GREEK CAPITAL LETTER ALPHA WITH DASIA AND PERISPOMENI +1F18; L; 1F10; #GREEK CAPITAL LETTER EPSILON WITH PSILI +1F19; L; 1F11; #GREEK CAPITAL LETTER EPSILON WITH DASIA +1F1A; L; 1F12; #GREEK CAPITAL LETTER EPSILON WITH PSILI AND VARIA +1F1B; L; 1F13; #GREEK CAPITAL LETTER EPSILON WITH DASIA AND VARIA +1F1C; L; 1F14; #GREEK CAPITAL LETTER EPSILON WITH PSILI AND OXIA +1F1D; L; 1F15; #GREEK CAPITAL LETTER EPSILON WITH DASIA AND OXIA +1F28; L; 1F20; #GREEK CAPITAL LETTER ETA WITH PSILI +1F29; L; 1F21; #GREEK CAPITAL LETTER ETA WITH DASIA +1F2A; L; 1F22; #GREEK CAPITAL LETTER ETA WITH PSILI AND VARIA +1F2B; L; 1F23; #GREEK CAPITAL LETTER ETA WITH DASIA AND VARIA +1F2C; L; 1F24; #GREEK CAPITAL LETTER ETA WITH PSILI AND OXIA +1F2D; L; 1F25; #GREEK CAPITAL LETTER ETA WITH DASIA AND OXIA +1F2E; L; 1F26; #GREEK CAPITAL LETTER ETA WITH PSILI AND PERISPOMENI +1F2F; L; 1F27; #GREEK CAPITAL LETTER ETA WITH DASIA AND PERISPOMENI +1F38; L; 1F30; #GREEK CAPITAL LETTER IOTA WITH PSILI +1F39; L; 1F31; #GREEK CAPITAL LETTER IOTA WITH DASIA +1F3A; L; 1F32; #GREEK CAPITAL LETTER IOTA WITH PSILI AND VARIA +1F3B; L; 1F33; #GREEK CAPITAL LETTER IOTA WITH DASIA AND VARIA +1F3C; L; 1F34; #GREEK CAPITAL LETTER IOTA WITH PSILI AND OXIA +1F3D; L; 1F35; #GREEK CAPITAL LETTER IOTA WITH DASIA AND OXIA +1F3E; L; 1F36; #GREEK CAPITAL LETTER IOTA WITH PSILI AND PERISPOMENI +1F3F; L; 1F37; #GREEK CAPITAL LETTER IOTA WITH DASIA AND PERISPOMENI +1F48; L; 1F40; #GREEK CAPITAL LETTER OMICRON WITH PSILI +1F49; L; 1F41; #GREEK CAPITAL LETTER OMICRON WITH DASIA +1F4A; L; 1F42; #GREEK CAPITAL LETTER OMICRON WITH PSILI AND VARIA +1F4B; L; 1F43; #GREEK CAPITAL LETTER OMICRON WITH DASIA AND VARIA +1F4C; L; 1F44; #GREEK CAPITAL LETTER OMICRON WITH PSILI AND OXIA +1F4D; L; 1F45; #GREEK CAPITAL LETTER OMICRON WITH DASIA AND OXIA +1F50; E; 03C5 0313; #GREEK SMALL LETTER UPSILON WITH PSILI +1F52; E; 03C5 0313 0300; #GREEK SMALL LETTER UPSILON WITH PSILI AND VARIA +1F54; E; 03C5 0313 0301; #GREEK SMALL LETTER UPSILON WITH PSILI AND OXIA +1F56; E; 03C5 0313 0342; #GREEK SMALL LETTER UPSILON WITH PSILI AND PERISPOMENI +1F59; L; 1F51; #GREEK CAPITAL LETTER UPSILON WITH DASIA +1F5B; L; 1F53; #GREEK CAPITAL LETTER UPSILON WITH DASIA AND VARIA +1F5D; L; 1F55; #GREEK CAPITAL LETTER UPSILON WITH DASIA AND OXIA +1F5F; L; 1F57; #GREEK CAPITAL LETTER UPSILON WITH DASIA AND PERISPOMENI +1F68; L; 1F60; #GREEK CAPITAL LETTER OMEGA WITH PSILI +1F69; L; 1F61; #GREEK CAPITAL LETTER OMEGA WITH DASIA +1F6A; L; 1F62; #GREEK CAPITAL LETTER OMEGA WITH PSILI AND VARIA +1F6B; L; 1F63; #GREEK CAPITAL LETTER OMEGA WITH DASIA AND VARIA +1F6C; L; 1F64; #GREEK CAPITAL LETTER OMEGA WITH PSILI AND OXIA +1F6D; L; 1F65; #GREEK CAPITAL LETTER OMEGA WITH DASIA AND OXIA +1F6E; L; 1F66; #GREEK CAPITAL LETTER OMEGA WITH PSILI AND PERISPOMENI +1F6F; L; 1F67; #GREEK CAPITAL LETTER OMEGA WITH DASIA AND PERISPOMENI +1F80; E; 1F00 03B9; #GREEK SMALL LETTER ALPHA WITH PSILI AND YPOGEGRAMMENI +1F81; E; 1F01 03B9; #GREEK SMALL LETTER ALPHA WITH DASIA AND YPOGEGRAMMENI +1F82; E; 1F02 03B9; #GREEK SMALL LETTER ALPHA WITH PSILI AND VARIA AND YPOGEGRAMMENI +1F83; E; 1F03 03B9; #GREEK SMALL LETTER ALPHA WITH DASIA AND VARIA AND YPOGEGRAMMENI +1F84; E; 1F04 03B9; #GREEK SMALL LETTER ALPHA WITH PSILI AND OXIA AND YPOGEGRAMMENI +1F85; E; 1F05 03B9; #GREEK SMALL LETTER ALPHA WITH DASIA AND OXIA AND YPOGEGRAMMENI +1F86; E; 1F06 03B9; #GREEK SMALL LETTER ALPHA WITH PSILI AND PERISPOMENI AND YPOGEGRAMMENI +1F87; E; 1F07 03B9; #GREEK SMALL LETTER ALPHA WITH DASIA AND PERISPOMENI AND YPOGEGRAMMENI +1F88; E; 1F00 03B9; #GREEK CAPITAL LETTER ALPHA WITH PSILI AND PROSGEGRAMMENI +1F89; E; 1F01 03B9; #GREEK CAPITAL LETTER ALPHA WITH DASIA AND PROSGEGRAMMENI +1F8A; E; 1F02 03B9; #GREEK CAPITAL LETTER ALPHA WITH PSILI AND VARIA AND PROSGEGRAMMENI +1F8B; E; 1F03 03B9; #GREEK CAPITAL LETTER ALPHA WITH DASIA AND VARIA AND PROSGEGRAMMENI +1F8C; E; 1F04 03B9; #GREEK CAPITAL LETTER ALPHA WITH PSILI AND OXIA AND PROSGEGRAMMENI +1F8D; E; 1F05 03B9; #GREEK CAPITAL LETTER ALPHA WITH DASIA AND OXIA AND PROSGEGRAMMENI +1F8E; E; 1F06 03B9; #GREEK CAPITAL LETTER ALPHA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI +1F8F; E; 1F07 03B9; #GREEK CAPITAL LETTER ALPHA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI +1F90; E; 1F20 03B9; #GREEK SMALL LETTER ETA WITH PSILI AND YPOGEGRAMMENI +1F91; E; 1F21 03B9; #GREEK SMALL LETTER ETA WITH DASIA AND YPOGEGRAMMENI +1F92; E; 1F22 03B9; #GREEK SMALL LETTER ETA WITH PSILI AND VARIA AND YPOGEGRAMMENI +1F93; E; 1F23 03B9; #GREEK SMALL LETTER ETA WITH DASIA AND VARIA AND YPOGEGRAMMENI +1F94; E; 1F24 03B9; #GREEK SMALL LETTER ETA WITH PSILI AND OXIA AND YPOGEGRAMMENI +1F95; E; 1F25 03B9; #GREEK SMALL LETTER ETA WITH DASIA AND OXIA AND YPOGEGRAMMENI +1F96; E; 1F26 03B9; #GREEK SMALL LETTER ETA WITH PSILI AND PERISPOMENI AND YPOGEGRAMMENI +1F97; E; 1F27 03B9; #GREEK SMALL LETTER ETA WITH DASIA AND PERISPOMENI AND YPOGEGRAMMENI +1F98; E; 1F20 03B9; #GREEK CAPITAL LETTER ETA WITH PSILI AND PROSGEGRAMMENI +1F99; E; 1F21 03B9; #GREEK CAPITAL LETTER ETA WITH DASIA AND PROSGEGRAMMENI +1F9A; E; 1F22 03B9; #GREEK CAPITAL LETTER ETA WITH PSILI AND VARIA AND PROSGEGRAMMENI +1F9B; E; 1F23 03B9; #GREEK CAPITAL LETTER ETA WITH DASIA AND VARIA AND PROSGEGRAMMENI +1F9C; E; 1F24 03B9; #GREEK CAPITAL LETTER ETA WITH PSILI AND OXIA AND PROSGEGRAMMENI +1F9D; E; 1F25 03B9; #GREEK CAPITAL LETTER ETA WITH DASIA AND OXIA AND PROSGEGRAMMENI +1F9E; E; 1F26 03B9; #GREEK CAPITAL LETTER ETA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI +1F9F; E; 1F27 03B9; #GREEK CAPITAL LETTER ETA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI +1FA0; E; 1F60 03B9; #GREEK SMALL LETTER OMEGA WITH PSILI AND YPOGEGRAMMENI +1FA1; E; 1F61 03B9; #GREEK SMALL LETTER OMEGA WITH DASIA AND YPOGEGRAMMENI +1FA2; E; 1F62 03B9; #GREEK SMALL LETTER OMEGA WITH PSILI AND VARIA AND YPOGEGRAMMENI +1FA3; E; 1F63 03B9; #GREEK SMALL LETTER OMEGA WITH DASIA AND VARIA AND YPOGEGRAMMENI +1FA4; E; 1F64 03B9; #GREEK SMALL LETTER OMEGA WITH PSILI AND OXIA AND YPOGEGRAMMENI +1FA5; E; 1F65 03B9; #GREEK SMALL LETTER OMEGA WITH DASIA AND OXIA AND YPOGEGRAMMENI +1FA6; E; 1F66 03B9; #GREEK SMALL LETTER OMEGA WITH PSILI AND PERISPOMENI AND YPOGEGRAMMENI +1FA7; E; 1F67 03B9; #GREEK SMALL LETTER OMEGA WITH DASIA AND PERISPOMENI AND YPOGEGRAMMENI +1FA8; E; 1F60 03B9; #GREEK CAPITAL LETTER OMEGA WITH PSILI AND PROSGEGRAMMENI +1FA9; E; 1F61 03B9; #GREEK CAPITAL LETTER OMEGA WITH DASIA AND PROSGEGRAMMENI +1FAA; E; 1F62 03B9; #GREEK CAPITAL LETTER OMEGA WITH PSILI AND VARIA AND PROSGEGRAMMENI +1FAB; E; 1F63 03B9; #GREEK CAPITAL LETTER OMEGA WITH DASIA AND VARIA AND PROSGEGRAMMENI +1FAC; E; 1F64 03B9; #GREEK CAPITAL LETTER OMEGA WITH PSILI AND OXIA AND PROSGEGRAMMENI +1FAD; E; 1F65 03B9; #GREEK CAPITAL LETTER OMEGA WITH DASIA AND OXIA AND PROSGEGRAMMENI +1FAE; E; 1F66 03B9; #GREEK CAPITAL LETTER OMEGA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI +1FAF; E; 1F67 03B9; #GREEK CAPITAL LETTER OMEGA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI +1FB2; E; 1F70 03B9; #GREEK SMALL LETTER ALPHA WITH VARIA AND YPOGEGRAMMENI +1FB3; E; 03B1 03B9; #GREEK SMALL LETTER ALPHA WITH YPOGEGRAMMENI +1FB4; E; 03AC 03B9; #GREEK SMALL LETTER ALPHA WITH OXIA AND YPOGEGRAMMENI +1FB6; E; 03B1 0342; #GREEK SMALL LETTER ALPHA WITH PERISPOMENI +1FB7; E; 03B1 0342 03B9; #GREEK SMALL LETTER ALPHA WITH PERISPOMENI AND YPOGEGRAMMENI +1FB8; L; 1FB0; #GREEK CAPITAL LETTER ALPHA WITH VRACHY +1FB9; L; 1FB1; #GREEK CAPITAL LETTER ALPHA WITH MACRON +1FBA; L; 1F70; #GREEK CAPITAL LETTER ALPHA WITH VARIA +1FBB; L; 1F71; #GREEK CAPITAL LETTER ALPHA WITH OXIA +1FBC; E; 03B1 03B9; #GREEK CAPITAL LETTER ALPHA WITH PROSGEGRAMMENI +1FBE; E; 03B9; #GREEK PROSGEGRAMMENI +1FC2; E; 1F74 03B9; #GREEK SMALL LETTER ETA WITH VARIA AND YPOGEGRAMMENI +1FC3; E; 03B7 03B9; #GREEK SMALL LETTER ETA WITH YPOGEGRAMMENI +1FC4; E; 03AE 03B9; #GREEK SMALL LETTER ETA WITH OXIA AND YPOGEGRAMMENI +1FC6; E; 03B7 0342; #GREEK SMALL LETTER ETA WITH PERISPOMENI +1FC7; E; 03B7 0342 03B9; #GREEK SMALL LETTER ETA WITH PERISPOMENI AND YPOGEGRAMMENI +1FC8; L; 1F72; #GREEK CAPITAL LETTER EPSILON WITH VARIA +1FC9; L; 1F73; #GREEK CAPITAL LETTER EPSILON WITH OXIA +1FCA; L; 1F74; #GREEK CAPITAL LETTER ETA WITH VARIA +1FCB; L; 1F75; #GREEK CAPITAL LETTER ETA WITH OXIA +1FCC; E; 03B7 03B9; #GREEK CAPITAL LETTER ETA WITH PROSGEGRAMMENI +1FD2; E; 03B9 0308 0300; #GREEK SMALL LETTER IOTA WITH DIALYTIKA AND VARIA +1FD3; E; 03B9 0308 0301; #GREEK SMALL LETTER IOTA WITH DIALYTIKA AND OXIA +1FD6; E; 03B9 0342; #GREEK SMALL LETTER IOTA WITH PERISPOMENI +1FD7; E; 03B9 0308 0342; #GREEK SMALL LETTER IOTA WITH DIALYTIKA AND PERISPOMENI +1FD8; L; 1FD0; #GREEK CAPITAL LETTER IOTA WITH VRACHY +1FD9; L; 1FD1; #GREEK CAPITAL LETTER IOTA WITH MACRON +1FDA; L; 1F76; #GREEK CAPITAL LETTER IOTA WITH VARIA +1FDB; L; 1F77; #GREEK CAPITAL LETTER IOTA WITH OXIA +1FE2; E; 03C5 0308 0300; #GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND VARIA +1FE3; E; 03C5 0308 0301; #GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND OXIA +1FE4; E; 03C1 0313; #GREEK SMALL LETTER RHO WITH PSILI +1FE6; E; 03C5 0342; #GREEK SMALL LETTER UPSILON WITH PERISPOMENI +1FE7; E; 03C5 0308 0342; #GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND PERISPOMENI +1FE8; L; 1FE0; #GREEK CAPITAL LETTER UPSILON WITH VRACHY +1FE9; L; 1FE1; #GREEK CAPITAL LETTER UPSILON WITH MACRON +1FEA; L; 1F7A; #GREEK CAPITAL LETTER UPSILON WITH VARIA +1FEB; L; 1F7B; #GREEK CAPITAL LETTER UPSILON WITH OXIA +1FEC; L; 1FE5; #GREEK CAPITAL LETTER RHO WITH DASIA +1FF2; E; 1F7C 03B9; #GREEK SMALL LETTER OMEGA WITH VARIA AND YPOGEGRAMMENI +1FF3; E; 03C9 03B9; #GREEK SMALL LETTER OMEGA WITH YPOGEGRAMMENI +1FF4; E; 03CE 03B9; #GREEK SMALL LETTER OMEGA WITH OXIA AND YPOGEGRAMMENI +1FF6; E; 03C9 0342; #GREEK SMALL LETTER OMEGA WITH PERISPOMENI +1FF7; E; 03C9 0342 03B9; #GREEK SMALL LETTER OMEGA WITH PERISPOMENI AND YPOGEGRAMMENI +1FF8; L; 1F78; #GREEK CAPITAL LETTER OMICRON WITH VARIA +1FF9; L; 1F79; #GREEK CAPITAL LETTER OMICRON WITH OXIA +1FFA; L; 1F7C; #GREEK CAPITAL LETTER OMEGA WITH VARIA +1FFB; L; 1F7D; #GREEK CAPITAL LETTER OMEGA WITH OXIA +1FFC; E; 03C9 03B9; #GREEK CAPITAL LETTER OMEGA WITH PROSGEGRAMMENI +2126; L; 03C9; #OHM SIGN +212A; L; 006B; #KELVIN SIGN +212B; L; 00E5; #ANGSTROM SIGN +2160; L; 2170; #ROMAN NUMERAL ONE +2161; L; 2171; #ROMAN NUMERAL TWO +2162; L; 2172; #ROMAN NUMERAL THREE +2163; L; 2173; #ROMAN NUMERAL FOUR +2164; L; 2174; #ROMAN NUMERAL FIVE +2165; L; 2175; #ROMAN NUMERAL SIX +2166; L; 2176; #ROMAN NUMERAL SEVEN +2167; L; 2177; #ROMAN NUMERAL EIGHT +2168; L; 2178; #ROMAN NUMERAL NINE +2169; L; 2179; #ROMAN NUMERAL TEN +216A; L; 217A; #ROMAN NUMERAL ELEVEN +216B; L; 217B; #ROMAN NUMERAL TWELVE +216C; L; 217C; #ROMAN NUMERAL FIFTY +216D; L; 217D; #ROMAN NUMERAL ONE HUNDRED +216E; L; 217E; #ROMAN NUMERAL FIVE HUNDRED +216F; L; 217F; #ROMAN NUMERAL ONE THOUSAND +24B6; L; 24D0; #CIRCLED LATIN CAPITAL LETTER A +24B7; L; 24D1; #CIRCLED LATIN CAPITAL LETTER B +24B8; L; 24D2; #CIRCLED LATIN CAPITAL LETTER C +24B9; L; 24D3; #CIRCLED LATIN CAPITAL LETTER D +24BA; L; 24D4; #CIRCLED LATIN CAPITAL LETTER E +24BB; L; 24D5; #CIRCLED LATIN CAPITAL LETTER F +24BC; L; 24D6; #CIRCLED LATIN CAPITAL LETTER G +24BD; L; 24D7; #CIRCLED LATIN CAPITAL LETTER H +24BE; L; 24D8; #CIRCLED LATIN CAPITAL LETTER I +24BF; L; 24D9; #CIRCLED LATIN CAPITAL LETTER J +24C0; L; 24DA; #CIRCLED LATIN CAPITAL LETTER K +24C1; L; 24DB; #CIRCLED LATIN CAPITAL LETTER L +24C2; L; 24DC; #CIRCLED LATIN CAPITAL LETTER M +24C3; L; 24DD; #CIRCLED LATIN CAPITAL LETTER N +24C4; L; 24DE; #CIRCLED LATIN CAPITAL LETTER O +24C5; L; 24DF; #CIRCLED LATIN CAPITAL LETTER P +24C6; L; 24E0; #CIRCLED LATIN CAPITAL LETTER Q +24C7; L; 24E1; #CIRCLED LATIN CAPITAL LETTER R +24C8; L; 24E2; #CIRCLED LATIN CAPITAL LETTER S +24C9; L; 24E3; #CIRCLED LATIN CAPITAL LETTER T +24CA; L; 24E4; #CIRCLED LATIN CAPITAL LETTER U +24CB; L; 24E5; #CIRCLED LATIN CAPITAL LETTER V +24CC; L; 24E6; #CIRCLED LATIN CAPITAL LETTER W +24CD; L; 24E7; #CIRCLED LATIN CAPITAL LETTER X +24CE; L; 24E8; #CIRCLED LATIN CAPITAL LETTER Y +24CF; L; 24E9; #CIRCLED LATIN CAPITAL LETTER Z +FB00; E; 0066 0066; #LATIN SMALL LIGATURE FF +FB01; E; 0066 0069; #LATIN SMALL LIGATURE FI +FB02; E; 0066 006C; #LATIN SMALL LIGATURE FL +FB03; E; 0066 0066 0069; #LATIN SMALL LIGATURE FFI +FB04; E; 0066 0066 006C; #LATIN SMALL LIGATURE FFL +FB05; E; 0073 0074; #LATIN SMALL LIGATURE LONG S T +FB06; E; 0073 0074; #LATIN SMALL LIGATURE ST +FB13; E; 0574 0576; #ARMENIAN SMALL LIGATURE MEN NOW +FB14; E; 0574 0565; #ARMENIAN SMALL LIGATURE MEN ECH +FB15; E; 0574 056B; #ARMENIAN SMALL LIGATURE MEN INI +FB16; E; 057E 0576; #ARMENIAN SMALL LIGATURE VEW NOW +FB17; E; 0574 056D; #ARMENIAN SMALL LIGATURE MEN XEH +FF21; L; FF41; #FULLWIDTH LATIN CAPITAL LETTER A +FF22; L; FF42; #FULLWIDTH LATIN CAPITAL LETTER B +FF23; L; FF43; #FULLWIDTH LATIN CAPITAL LETTER C +FF24; L; FF44; #FULLWIDTH LATIN CAPITAL LETTER D +FF25; L; FF45; #FULLWIDTH LATIN CAPITAL LETTER E +FF26; L; FF46; #FULLWIDTH LATIN CAPITAL LETTER F +FF27; L; FF47; #FULLWIDTH LATIN CAPITAL LETTER G +FF28; L; FF48; #FULLWIDTH LATIN CAPITAL LETTER H +FF29; L; FF49; #FULLWIDTH LATIN CAPITAL LETTER I +FF2A; L; FF4A; #FULLWIDTH LATIN CAPITAL LETTER J +FF2B; L; FF4B; #FULLWIDTH LATIN CAPITAL LETTER K +FF2C; L; FF4C; #FULLWIDTH LATIN CAPITAL LETTER L +FF2D; L; FF4D; #FULLWIDTH LATIN CAPITAL LETTER M +FF2E; L; FF4E; #FULLWIDTH LATIN CAPITAL LETTER N +FF2F; L; FF4F; #FULLWIDTH LATIN CAPITAL LETTER O +FF30; L; FF50; #FULLWIDTH LATIN CAPITAL LETTER P +FF31; L; FF51; #FULLWIDTH LATIN CAPITAL LETTER Q +FF32; L; FF52; #FULLWIDTH LATIN CAPITAL LETTER R +FF33; L; FF53; #FULLWIDTH LATIN CAPITAL LETTER S +FF34; L; FF54; #FULLWIDTH LATIN CAPITAL LETTER T +FF35; L; FF55; #FULLWIDTH LATIN CAPITAL LETTER U +FF36; L; FF56; #FULLWIDTH LATIN CAPITAL LETTER V +FF37; L; FF57; #FULLWIDTH LATIN CAPITAL LETTER W +FF38; L; FF58; #FULLWIDTH LATIN CAPITAL LETTER X +FF39; L; FF59; #FULLWIDTH LATIN CAPITAL LETTER Y +FF3A; L; FF5A; #FULLWIDTH LATIN CAPITAL LETTER Z + + diff --git a/lib/unicode/Category.pl b/lib/unicode/Category.pl index bffd1169be..9c81514c58 100644 --- a/lib/unicode/Category.pl +++ b/lib/unicode/Category.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 0000 001f Cc @@ -1503,4 +1503,6 @@ ffe9 ffec Sm ffed ffee So fff9 fffb Cf fffc fffd So +f0000 ffffd Co +100000 10fffd Co END diff --git a/lib/unicode/CombiningClass.pl b/lib/unicode/CombiningClass.pl index a40949830c..628b9c63db 100644 --- a/lib/unicode/CombiningClass.pl +++ b/lib/unicode/CombiningClass.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 0300 0314 230 diff --git a/lib/unicode/CompExcl.txt b/lib/unicode/CompExcl.txt index 5ea46afc63..53f846729d 100644 --- a/lib/unicode/CompExcl.txt +++ b/lib/unicode/CompExcl.txt @@ -1,3 +1,5 @@ +# CompositionExclusions-2.txt +# # Composition Exclusions # This file lists the characters from the UTR #15 Composition Exclusion Table. # @@ -133,8 +135,8 @@ FB4E # HEBREW LETTER PE WITH RAFE # (4) Non-Starter Decompositions # These characters can be derived from the UnicodeData file # by including all characters whose canonical decomposition consists -# of a sequence of characters, the first of which has a canonical -# class of zero. +# of a sequence of characters, the first of which has a non-zero +# combining class. # These characters are simply quoted here for reference. # 0344 COMBINING GREEK DIALYTIKA TONOS diff --git a/lib/unicode/Decomposition.pl b/lib/unicode/Decomposition.pl index ecc30b205e..1fe29cd157 100644 --- a/lib/unicode/Decomposition.pl +++ b/lib/unicode/Decomposition.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 00a0 <noBreak> 0020 diff --git a/lib/unicode/In/AlphabeticPresentationForms.pl b/lib/unicode/In/AlphabeticPresentationForms.pl index c42e944a3c..a85b9cabec 100644 --- a/lib/unicode/In/AlphabeticPresentationForms.pl +++ b/lib/unicode/In/AlphabeticPresentationForms.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; FB00 FB4F diff --git a/lib/unicode/In/Arabic.pl b/lib/unicode/In/Arabic.pl index 5010ab73de..5fbbbfa028 100644 --- a/lib/unicode/In/Arabic.pl +++ b/lib/unicode/In/Arabic.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 0600 06FF diff --git a/lib/unicode/In/ArabicPresentationForms-A.pl b/lib/unicode/In/ArabicPresentationForms-A.pl index 6edd74d755..62521bb1b1 100644 --- a/lib/unicode/In/ArabicPresentationForms-A.pl +++ b/lib/unicode/In/ArabicPresentationForms-A.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; FB50 FDFF diff --git a/lib/unicode/In/ArabicPresentationForms-B.pl b/lib/unicode/In/ArabicPresentationForms-B.pl index 964073931e..6b2d44742b 100644 --- a/lib/unicode/In/ArabicPresentationForms-B.pl +++ b/lib/unicode/In/ArabicPresentationForms-B.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; FE70 FEFE diff --git a/lib/unicode/In/Armenian.pl b/lib/unicode/In/Armenian.pl index 19b74acd71..d4736a7506 100644 --- a/lib/unicode/In/Armenian.pl +++ b/lib/unicode/In/Armenian.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 0530 058F diff --git a/lib/unicode/In/Arrows.pl b/lib/unicode/In/Arrows.pl index 7ce44183a1..a7ef468593 100644 --- a/lib/unicode/In/Arrows.pl +++ b/lib/unicode/In/Arrows.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 2190 21FF diff --git a/lib/unicode/In/BasicLatin.pl b/lib/unicode/In/BasicLatin.pl index 39987f16ec..36d6456fa6 100644 --- a/lib/unicode/In/BasicLatin.pl +++ b/lib/unicode/In/BasicLatin.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 0000 007F diff --git a/lib/unicode/In/Bengali.pl b/lib/unicode/In/Bengali.pl index c0a47d30d1..07dc6ac102 100644 --- a/lib/unicode/In/Bengali.pl +++ b/lib/unicode/In/Bengali.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 0980 09FF diff --git a/lib/unicode/In/BlockElements.pl b/lib/unicode/In/BlockElements.pl index e96e64faa0..495629b938 100644 --- a/lib/unicode/In/BlockElements.pl +++ b/lib/unicode/In/BlockElements.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 2580 259F diff --git a/lib/unicode/In/Bopomofo.pl b/lib/unicode/In/Bopomofo.pl index 553560670c..3dbf73a236 100644 --- a/lib/unicode/In/Bopomofo.pl +++ b/lib/unicode/In/Bopomofo.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 3100 312F diff --git a/lib/unicode/In/BopomofoExtended.pl b/lib/unicode/In/BopomofoExtended.pl index d0ee43a437..f2ca6de96d 100644 --- a/lib/unicode/In/BopomofoExtended.pl +++ b/lib/unicode/In/BopomofoExtended.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 31A0 31BF diff --git a/lib/unicode/In/BoxDrawing.pl b/lib/unicode/In/BoxDrawing.pl index d580199b7f..a3cd897498 100644 --- a/lib/unicode/In/BoxDrawing.pl +++ b/lib/unicode/In/BoxDrawing.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 2500 257F diff --git a/lib/unicode/In/BraillePatterns.pl b/lib/unicode/In/BraillePatterns.pl index e5c9e4ca70..58afc05a20 100644 --- a/lib/unicode/In/BraillePatterns.pl +++ b/lib/unicode/In/BraillePatterns.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 2800 28FF diff --git a/lib/unicode/In/CJKCompatibility.pl b/lib/unicode/In/CJKCompatibility.pl index 07ab8edfd4..793520f4eb 100644 --- a/lib/unicode/In/CJKCompatibility.pl +++ b/lib/unicode/In/CJKCompatibility.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 3300 33FF diff --git a/lib/unicode/In/CJKCompatibilityForms.pl b/lib/unicode/In/CJKCompatibilityForms.pl index 122ccd7ad6..a9ba270122 100644 --- a/lib/unicode/In/CJKCompatibilityForms.pl +++ b/lib/unicode/In/CJKCompatibilityForms.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; FE30 FE4F diff --git a/lib/unicode/In/CJKCompatibilityIdeographs.pl b/lib/unicode/In/CJKCompatibilityIdeographs.pl index 59c8e5dd5b..d841bc5482 100644 --- a/lib/unicode/In/CJKCompatibilityIdeographs.pl +++ b/lib/unicode/In/CJKCompatibilityIdeographs.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; F900 FAFF diff --git a/lib/unicode/In/CJKRadicalsSupplement.pl b/lib/unicode/In/CJKRadicalsSupplement.pl index d4c0c82bb6..2d1370799a 100644 --- a/lib/unicode/In/CJKRadicalsSupplement.pl +++ b/lib/unicode/In/CJKRadicalsSupplement.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 2E80 2EFF diff --git a/lib/unicode/In/CJKSymbolsandPunctuation.pl b/lib/unicode/In/CJKSymbolsandPunctuation.pl index 24ecc37b67..ca525ae383 100644 --- a/lib/unicode/In/CJKSymbolsandPunctuation.pl +++ b/lib/unicode/In/CJKSymbolsandPunctuation.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 3000 303F diff --git a/lib/unicode/In/CJKUnifiedIdeographs.pl b/lib/unicode/In/CJKUnifiedIdeographs.pl index 351cf74a82..729f4c6315 100644 --- a/lib/unicode/In/CJKUnifiedIdeographs.pl +++ b/lib/unicode/In/CJKUnifiedIdeographs.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 4E00 9FFF diff --git a/lib/unicode/In/CJKUnifiedIdeographsExtensionA.pl b/lib/unicode/In/CJKUnifiedIdeographsExtensionA.pl index 012f54c824..e92f091938 100644 --- a/lib/unicode/In/CJKUnifiedIdeographsExtensionA.pl +++ b/lib/unicode/In/CJKUnifiedIdeographsExtensionA.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 3400 4DB5 diff --git a/lib/unicode/In/Cherokee.pl b/lib/unicode/In/Cherokee.pl index 10cae1a652..1e9ad746d3 100644 --- a/lib/unicode/In/Cherokee.pl +++ b/lib/unicode/In/Cherokee.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 13A0 13FF diff --git a/lib/unicode/In/CombiningDiacriticalMarks.pl b/lib/unicode/In/CombiningDiacriticalMarks.pl index a32f974bfb..d3a45d4cd0 100644 --- a/lib/unicode/In/CombiningDiacriticalMarks.pl +++ b/lib/unicode/In/CombiningDiacriticalMarks.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 0300 036F diff --git a/lib/unicode/In/CombiningHalfMarks.pl b/lib/unicode/In/CombiningHalfMarks.pl index 100471bdbb..4f0a5731a7 100644 --- a/lib/unicode/In/CombiningHalfMarks.pl +++ b/lib/unicode/In/CombiningHalfMarks.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; FE20 FE2F diff --git a/lib/unicode/In/CombiningMarksforSymbols.pl b/lib/unicode/In/CombiningMarksforSymbols.pl index f45e7e0490..9dde706cc3 100644 --- a/lib/unicode/In/CombiningMarksforSymbols.pl +++ b/lib/unicode/In/CombiningMarksforSymbols.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 20D0 20FF diff --git a/lib/unicode/In/ControlPictures.pl b/lib/unicode/In/ControlPictures.pl index 77a759f1a0..78113e8c55 100644 --- a/lib/unicode/In/ControlPictures.pl +++ b/lib/unicode/In/ControlPictures.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 2400 243F diff --git a/lib/unicode/In/CurrencySymbols.pl b/lib/unicode/In/CurrencySymbols.pl index 567ae97da3..8cbc1600e9 100644 --- a/lib/unicode/In/CurrencySymbols.pl +++ b/lib/unicode/In/CurrencySymbols.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 20A0 20CF diff --git a/lib/unicode/In/Cyrillic.pl b/lib/unicode/In/Cyrillic.pl index 9ca104c7db..f057731818 100644 --- a/lib/unicode/In/Cyrillic.pl +++ b/lib/unicode/In/Cyrillic.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 0400 04FF diff --git a/lib/unicode/In/Devanagari.pl b/lib/unicode/In/Devanagari.pl index 61372b58ab..c99eff18ec 100644 --- a/lib/unicode/In/Devanagari.pl +++ b/lib/unicode/In/Devanagari.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 0900 097F diff --git a/lib/unicode/In/Dingbats.pl b/lib/unicode/In/Dingbats.pl index 0f820ca711..1bbb102999 100644 --- a/lib/unicode/In/Dingbats.pl +++ b/lib/unicode/In/Dingbats.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 2700 27BF diff --git a/lib/unicode/In/EnclosedAlphanumerics.pl b/lib/unicode/In/EnclosedAlphanumerics.pl index de52aa8d99..46b4cf5589 100644 --- a/lib/unicode/In/EnclosedAlphanumerics.pl +++ b/lib/unicode/In/EnclosedAlphanumerics.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 2460 24FF diff --git a/lib/unicode/In/EnclosedCJKLettersandMonths.pl b/lib/unicode/In/EnclosedCJKLettersandMonths.pl index e4de0e0261..da5a7a1ecb 100644 --- a/lib/unicode/In/EnclosedCJKLettersandMonths.pl +++ b/lib/unicode/In/EnclosedCJKLettersandMonths.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 3200 32FF diff --git a/lib/unicode/In/Ethiopic.pl b/lib/unicode/In/Ethiopic.pl index 13c309050a..5b472c47c5 100644 --- a/lib/unicode/In/Ethiopic.pl +++ b/lib/unicode/In/Ethiopic.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 1200 137F diff --git a/lib/unicode/In/GeneralPunctuation.pl b/lib/unicode/In/GeneralPunctuation.pl index 81c76992dc..aa82c30bd7 100644 --- a/lib/unicode/In/GeneralPunctuation.pl +++ b/lib/unicode/In/GeneralPunctuation.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 2000 206F diff --git a/lib/unicode/In/GeometricShapes.pl b/lib/unicode/In/GeometricShapes.pl index 170422d2d0..6cf8ea72f7 100644 --- a/lib/unicode/In/GeometricShapes.pl +++ b/lib/unicode/In/GeometricShapes.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 25A0 25FF diff --git a/lib/unicode/In/Georgian.pl b/lib/unicode/In/Georgian.pl index 773ed1562a..493f57053e 100644 --- a/lib/unicode/In/Georgian.pl +++ b/lib/unicode/In/Georgian.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 10A0 10FF diff --git a/lib/unicode/In/Greek.pl b/lib/unicode/In/Greek.pl index ff753d19b4..ac4bbee588 100644 --- a/lib/unicode/In/Greek.pl +++ b/lib/unicode/In/Greek.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 0370 03FF diff --git a/lib/unicode/In/GreekExtended.pl b/lib/unicode/In/GreekExtended.pl index b8f02e7f0a..acd43be814 100644 --- a/lib/unicode/In/GreekExtended.pl +++ b/lib/unicode/In/GreekExtended.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 1F00 1FFF diff --git a/lib/unicode/In/Gujarati.pl b/lib/unicode/In/Gujarati.pl index ff6c6503bb..0e3c8e98ce 100644 --- a/lib/unicode/In/Gujarati.pl +++ b/lib/unicode/In/Gujarati.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 0A80 0AFF diff --git a/lib/unicode/In/Gurmukhi.pl b/lib/unicode/In/Gurmukhi.pl index b888df6941..32ff23943b 100644 --- a/lib/unicode/In/Gurmukhi.pl +++ b/lib/unicode/In/Gurmukhi.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 0A00 0A7F diff --git a/lib/unicode/In/HalfwidthandFullwidthForms.pl b/lib/unicode/In/HalfwidthandFullwidthForms.pl index e45265393f..fd3ba327f6 100644 --- a/lib/unicode/In/HalfwidthandFullwidthForms.pl +++ b/lib/unicode/In/HalfwidthandFullwidthForms.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; FF00 FFEF diff --git a/lib/unicode/In/HangulCompatibilityJamo.pl b/lib/unicode/In/HangulCompatibilityJamo.pl index c15379fafc..744e57270f 100644 --- a/lib/unicode/In/HangulCompatibilityJamo.pl +++ b/lib/unicode/In/HangulCompatibilityJamo.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 3130 318F diff --git a/lib/unicode/In/HangulJamo.pl b/lib/unicode/In/HangulJamo.pl index c329b54c34..a1d1c67708 100644 --- a/lib/unicode/In/HangulJamo.pl +++ b/lib/unicode/In/HangulJamo.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 1100 11FF diff --git a/lib/unicode/In/HangulSyllables.pl b/lib/unicode/In/HangulSyllables.pl index 7d91a363f5..80cd4a4420 100644 --- a/lib/unicode/In/HangulSyllables.pl +++ b/lib/unicode/In/HangulSyllables.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; AC00 D7A3 diff --git a/lib/unicode/In/Hebrew.pl b/lib/unicode/In/Hebrew.pl index abe7b9ede4..2e29a28236 100644 --- a/lib/unicode/In/Hebrew.pl +++ b/lib/unicode/In/Hebrew.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 0590 05FF diff --git a/lib/unicode/In/HighPrivateUseSurrogates.pl b/lib/unicode/In/HighPrivateUseSurrogates.pl index 6ed7ac96fd..0e1320d7ee 100644 --- a/lib/unicode/In/HighPrivateUseSurrogates.pl +++ b/lib/unicode/In/HighPrivateUseSurrogates.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; DB80 DBFF diff --git a/lib/unicode/In/HighSurrogates.pl b/lib/unicode/In/HighSurrogates.pl index 924a0c9bdb..6acc6c4503 100644 --- a/lib/unicode/In/HighSurrogates.pl +++ b/lib/unicode/In/HighSurrogates.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; D800 DB7F diff --git a/lib/unicode/In/Hiragana.pl b/lib/unicode/In/Hiragana.pl index 7a65302188..5905fe9b28 100644 --- a/lib/unicode/In/Hiragana.pl +++ b/lib/unicode/In/Hiragana.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 3040 309F diff --git a/lib/unicode/In/IPAExtensions.pl b/lib/unicode/In/IPAExtensions.pl index 20906d6300..5365373c1b 100644 --- a/lib/unicode/In/IPAExtensions.pl +++ b/lib/unicode/In/IPAExtensions.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 0250 02AF diff --git a/lib/unicode/In/IdeographicDescriptionCharacters.pl b/lib/unicode/In/IdeographicDescriptionCharacters.pl index 4baae881a1..dafb5b4fe3 100644 --- a/lib/unicode/In/IdeographicDescriptionCharacters.pl +++ b/lib/unicode/In/IdeographicDescriptionCharacters.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 2FF0 2FFF diff --git a/lib/unicode/In/Kanbun.pl b/lib/unicode/In/Kanbun.pl index 57d6bd21f4..9ad03a661b 100644 --- a/lib/unicode/In/Kanbun.pl +++ b/lib/unicode/In/Kanbun.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 3190 319F diff --git a/lib/unicode/In/KangxiRadicals.pl b/lib/unicode/In/KangxiRadicals.pl index d26fd6c774..165398c9e9 100644 --- a/lib/unicode/In/KangxiRadicals.pl +++ b/lib/unicode/In/KangxiRadicals.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 2F00 2FDF diff --git a/lib/unicode/In/Kannada.pl b/lib/unicode/In/Kannada.pl index 109197a6f7..a679445f3f 100644 --- a/lib/unicode/In/Kannada.pl +++ b/lib/unicode/In/Kannada.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 0C80 0CFF diff --git a/lib/unicode/In/Katakana.pl b/lib/unicode/In/Katakana.pl index 93bd5a03fa..2976d25822 100644 --- a/lib/unicode/In/Katakana.pl +++ b/lib/unicode/In/Katakana.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 30A0 30FF diff --git a/lib/unicode/In/Khmer.pl b/lib/unicode/In/Khmer.pl index f3e86851b3..6a85224223 100644 --- a/lib/unicode/In/Khmer.pl +++ b/lib/unicode/In/Khmer.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 1780 17FF diff --git a/lib/unicode/In/Lao.pl b/lib/unicode/In/Lao.pl index 41ff11f805..fdddd86766 100644 --- a/lib/unicode/In/Lao.pl +++ b/lib/unicode/In/Lao.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 0E80 0EFF diff --git a/lib/unicode/In/Latin-1Supplement.pl b/lib/unicode/In/Latin-1Supplement.pl index 1b252eb23e..6a901fba1c 100644 --- a/lib/unicode/In/Latin-1Supplement.pl +++ b/lib/unicode/In/Latin-1Supplement.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 0080 00FF diff --git a/lib/unicode/In/LatinExtended-A.pl b/lib/unicode/In/LatinExtended-A.pl index b8be987db0..a042350176 100644 --- a/lib/unicode/In/LatinExtended-A.pl +++ b/lib/unicode/In/LatinExtended-A.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 0100 017F diff --git a/lib/unicode/In/LatinExtended-B.pl b/lib/unicode/In/LatinExtended-B.pl index b9aff43f3d..b7106c6d37 100644 --- a/lib/unicode/In/LatinExtended-B.pl +++ b/lib/unicode/In/LatinExtended-B.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 0180 024F diff --git a/lib/unicode/In/LatinExtendedAdditional.pl b/lib/unicode/In/LatinExtendedAdditional.pl index d309e90814..e17cc3de95 100644 --- a/lib/unicode/In/LatinExtendedAdditional.pl +++ b/lib/unicode/In/LatinExtendedAdditional.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 1E00 1EFF diff --git a/lib/unicode/In/LetterlikeSymbols.pl b/lib/unicode/In/LetterlikeSymbols.pl index 1768740d42..c2249a7b94 100644 --- a/lib/unicode/In/LetterlikeSymbols.pl +++ b/lib/unicode/In/LetterlikeSymbols.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 2100 214F diff --git a/lib/unicode/In/LowSurrogates.pl b/lib/unicode/In/LowSurrogates.pl index 752b264e81..025bd13950 100644 --- a/lib/unicode/In/LowSurrogates.pl +++ b/lib/unicode/In/LowSurrogates.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; DC00 DFFF diff --git a/lib/unicode/In/Malayalam.pl b/lib/unicode/In/Malayalam.pl index 8fb57cdb10..5a01d40927 100644 --- a/lib/unicode/In/Malayalam.pl +++ b/lib/unicode/In/Malayalam.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 0D00 0D7F diff --git a/lib/unicode/In/MathematicalOperators.pl b/lib/unicode/In/MathematicalOperators.pl index 055f19e590..8b45e1881c 100644 --- a/lib/unicode/In/MathematicalOperators.pl +++ b/lib/unicode/In/MathematicalOperators.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 2200 22FF diff --git a/lib/unicode/In/MiscellaneousSymbols.pl b/lib/unicode/In/MiscellaneousSymbols.pl index 9dcdd26954..cc5b02fdc2 100644 --- a/lib/unicode/In/MiscellaneousSymbols.pl +++ b/lib/unicode/In/MiscellaneousSymbols.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 2600 26FF diff --git a/lib/unicode/In/MiscellaneousTechnical.pl b/lib/unicode/In/MiscellaneousTechnical.pl index 370c00f320..a1058a0c6d 100644 --- a/lib/unicode/In/MiscellaneousTechnical.pl +++ b/lib/unicode/In/MiscellaneousTechnical.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 2300 23FF diff --git a/lib/unicode/In/Mongolian.pl b/lib/unicode/In/Mongolian.pl index 394014d496..98a4914ce6 100644 --- a/lib/unicode/In/Mongolian.pl +++ b/lib/unicode/In/Mongolian.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 1800 18AF diff --git a/lib/unicode/In/Myanmar.pl b/lib/unicode/In/Myanmar.pl index 4b3f3181b0..3aa2f41410 100644 --- a/lib/unicode/In/Myanmar.pl +++ b/lib/unicode/In/Myanmar.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 1000 109F diff --git a/lib/unicode/In/NumberForms.pl b/lib/unicode/In/NumberForms.pl index d33ece0bbc..2a606a6bf7 100644 --- a/lib/unicode/In/NumberForms.pl +++ b/lib/unicode/In/NumberForms.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 2150 218F diff --git a/lib/unicode/In/Ogham.pl b/lib/unicode/In/Ogham.pl index e097d90c77..de320a9172 100644 --- a/lib/unicode/In/Ogham.pl +++ b/lib/unicode/In/Ogham.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 1680 169F diff --git a/lib/unicode/In/OpticalCharacterRecognition.pl b/lib/unicode/In/OpticalCharacterRecognition.pl index be1d981c7c..7f0aff830e 100644 --- a/lib/unicode/In/OpticalCharacterRecognition.pl +++ b/lib/unicode/In/OpticalCharacterRecognition.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 2440 245F diff --git a/lib/unicode/In/Oriya.pl b/lib/unicode/In/Oriya.pl index 5a680f6743..771a245757 100644 --- a/lib/unicode/In/Oriya.pl +++ b/lib/unicode/In/Oriya.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 0B00 0B7F diff --git a/lib/unicode/In/PrivateUse.pl b/lib/unicode/In/PrivateUse.pl index 0c118f4fe4..0b0c00407d 100644 --- a/lib/unicode/In/PrivateUse.pl +++ b/lib/unicode/In/PrivateUse.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; E000 F8FF diff --git a/lib/unicode/In/Runic.pl b/lib/unicode/In/Runic.pl index 0bd42df80c..52ca7aa4fb 100644 --- a/lib/unicode/In/Runic.pl +++ b/lib/unicode/In/Runic.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 16A0 16FF diff --git a/lib/unicode/In/Sinhala.pl b/lib/unicode/In/Sinhala.pl index 37e007c057..5a892fd3c1 100644 --- a/lib/unicode/In/Sinhala.pl +++ b/lib/unicode/In/Sinhala.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 0D80 0DFF diff --git a/lib/unicode/In/SmallFormVariants.pl b/lib/unicode/In/SmallFormVariants.pl index 736415e67e..148e6e85b8 100644 --- a/lib/unicode/In/SmallFormVariants.pl +++ b/lib/unicode/In/SmallFormVariants.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; FE50 FE6F diff --git a/lib/unicode/In/SpacingModifierLetters.pl b/lib/unicode/In/SpacingModifierLetters.pl index 6e9cdf0b53..0e31fea4b4 100644 --- a/lib/unicode/In/SpacingModifierLetters.pl +++ b/lib/unicode/In/SpacingModifierLetters.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 02B0 02FF diff --git a/lib/unicode/In/Specials.pl b/lib/unicode/In/Specials.pl index f9f730f840..03f69a3b8e 100644 --- a/lib/unicode/In/Specials.pl +++ b/lib/unicode/In/Specials.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; FFF0 FFFD diff --git a/lib/unicode/In/SuperscriptsandSubscripts.pl b/lib/unicode/In/SuperscriptsandSubscripts.pl index efcec0b841..b0f90cd67b 100644 --- a/lib/unicode/In/SuperscriptsandSubscripts.pl +++ b/lib/unicode/In/SuperscriptsandSubscripts.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 2070 209F diff --git a/lib/unicode/In/Syriac.pl b/lib/unicode/In/Syriac.pl index 7c81fb6f32..f85f1a9fd7 100644 --- a/lib/unicode/In/Syriac.pl +++ b/lib/unicode/In/Syriac.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 0700 074F diff --git a/lib/unicode/In/Tamil.pl b/lib/unicode/In/Tamil.pl index e65ed2fa19..71fa923d6f 100644 --- a/lib/unicode/In/Tamil.pl +++ b/lib/unicode/In/Tamil.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 0B80 0BFF diff --git a/lib/unicode/In/Telugu.pl b/lib/unicode/In/Telugu.pl index d5ed2368c2..ff09b1ed87 100644 --- a/lib/unicode/In/Telugu.pl +++ b/lib/unicode/In/Telugu.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 0C00 0C7F diff --git a/lib/unicode/In/Thaana.pl b/lib/unicode/In/Thaana.pl index 361bd4d4b4..f88768c924 100644 --- a/lib/unicode/In/Thaana.pl +++ b/lib/unicode/In/Thaana.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 0780 07BF diff --git a/lib/unicode/In/Thai.pl b/lib/unicode/In/Thai.pl index 3376de4e18..e77c0c512f 100644 --- a/lib/unicode/In/Thai.pl +++ b/lib/unicode/In/Thai.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 0E00 0E7F diff --git a/lib/unicode/In/Tibetan.pl b/lib/unicode/In/Tibetan.pl index 50837ad8bc..35436b3b14 100644 --- a/lib/unicode/In/Tibetan.pl +++ b/lib/unicode/In/Tibetan.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 0F00 0FFF diff --git a/lib/unicode/In/UnifiedCanadianAboriginalSyllabics.pl b/lib/unicode/In/UnifiedCanadianAboriginalSyllabics.pl index ad4eb27866..83c6a78cca 100644 --- a/lib/unicode/In/UnifiedCanadianAboriginalSyllabics.pl +++ b/lib/unicode/In/UnifiedCanadianAboriginalSyllabics.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 1400 167F diff --git a/lib/unicode/In/YiRadicals.pl b/lib/unicode/In/YiRadicals.pl index f25c6954ff..7350871cb2 100644 --- a/lib/unicode/In/YiRadicals.pl +++ b/lib/unicode/In/YiRadicals.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; A490 A4CF diff --git a/lib/unicode/In/YiSyllables.pl b/lib/unicode/In/YiSyllables.pl index f4e3a8bcbc..baa038eb30 100644 --- a/lib/unicode/In/YiSyllables.pl +++ b/lib/unicode/In/YiSyllables.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; A000 A48F diff --git a/lib/unicode/Is/ASCII.pl b/lib/unicode/Is/ASCII.pl index 63f95ae7dd..1434a55d96 100644 --- a/lib/unicode/Is/ASCII.pl +++ b/lib/unicode/Is/ASCII.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 0000 007f diff --git a/lib/unicode/Is/Alnum.pl b/lib/unicode/Is/Alnum.pl index 203860bac1..94f9a5c621 100644 --- a/lib/unicode/Is/Alnum.pl +++ b/lib/unicode/Is/Alnum.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 0030 0039 diff --git a/lib/unicode/Is/Alpha.pl b/lib/unicode/Is/Alpha.pl index 90020b8fb6..de5046f9d4 100644 --- a/lib/unicode/Is/Alpha.pl +++ b/lib/unicode/Is/Alpha.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 0041 005a diff --git a/lib/unicode/Is/BidiAL.pl b/lib/unicode/Is/BidiAL.pl index e04f2f562d..a8d43b816c 100644 --- a/lib/unicode/Is/BidiAL.pl +++ b/lib/unicode/Is/BidiAL.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 061b diff --git a/lib/unicode/Is/BidiAN.pl b/lib/unicode/Is/BidiAN.pl index 4a71ae532d..4519c6d51d 100644 --- a/lib/unicode/Is/BidiAN.pl +++ b/lib/unicode/Is/BidiAN.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 0660 0669 diff --git a/lib/unicode/Is/BidiB.pl b/lib/unicode/Is/BidiB.pl index e4ba16567a..33bdb45944 100644 --- a/lib/unicode/Is/BidiB.pl +++ b/lib/unicode/Is/BidiB.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 000a diff --git a/lib/unicode/Is/BidiBN.pl b/lib/unicode/Is/BidiBN.pl index 795a4a9f40..75b8871a59 100644 --- a/lib/unicode/Is/BidiBN.pl +++ b/lib/unicode/Is/BidiBN.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 0000 0008 diff --git a/lib/unicode/Is/BidiCS.pl b/lib/unicode/Is/BidiCS.pl index f8d037d118..e217653843 100644 --- a/lib/unicode/Is/BidiCS.pl +++ b/lib/unicode/Is/BidiCS.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 002c diff --git a/lib/unicode/Is/BidiEN.pl b/lib/unicode/Is/BidiEN.pl index d63270aecf..113de87454 100644 --- a/lib/unicode/Is/BidiEN.pl +++ b/lib/unicode/Is/BidiEN.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 0030 0039 diff --git a/lib/unicode/Is/BidiES.pl b/lib/unicode/Is/BidiES.pl index 5a1a36a6d8..d1cd305f54 100644 --- a/lib/unicode/Is/BidiES.pl +++ b/lib/unicode/Is/BidiES.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 002f diff --git a/lib/unicode/Is/BidiET.pl b/lib/unicode/Is/BidiET.pl index 5e7af2bbf4..0a66fa8346 100644 --- a/lib/unicode/Is/BidiET.pl +++ b/lib/unicode/Is/BidiET.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 0023 0025 diff --git a/lib/unicode/Is/BidiL.pl b/lib/unicode/Is/BidiL.pl index 8dc4ca87c0..a08d8b8900 100644 --- a/lib/unicode/Is/BidiL.pl +++ b/lib/unicode/Is/BidiL.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 0041 005a @@ -320,4 +320,6 @@ ffc2 ffc7 ffca ffcf ffd2 ffd7 ffda ffdc +f0000 ffffd +100000 10fffd END diff --git a/lib/unicode/Is/BidiLRE.pl b/lib/unicode/Is/BidiLRE.pl index ef2a6e462f..25704212c2 100644 --- a/lib/unicode/Is/BidiLRE.pl +++ b/lib/unicode/Is/BidiLRE.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 202a diff --git a/lib/unicode/Is/BidiLRO.pl b/lib/unicode/Is/BidiLRO.pl index e9958c4b81..721a22311e 100644 --- a/lib/unicode/Is/BidiLRO.pl +++ b/lib/unicode/Is/BidiLRO.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 202d diff --git a/lib/unicode/Is/BidiNSM.pl b/lib/unicode/Is/BidiNSM.pl index 191bc052a9..a5c3af4b34 100644 --- a/lib/unicode/Is/BidiNSM.pl +++ b/lib/unicode/Is/BidiNSM.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 0300 034e diff --git a/lib/unicode/Is/BidiON.pl b/lib/unicode/Is/BidiON.pl index bde00ff123..ec0f18ff1d 100644 --- a/lib/unicode/Is/BidiON.pl +++ b/lib/unicode/Is/BidiON.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 0021 0022 diff --git a/lib/unicode/Is/BidiPDF.pl b/lib/unicode/Is/BidiPDF.pl index 4a3eedd564..20e3d9d31b 100644 --- a/lib/unicode/Is/BidiPDF.pl +++ b/lib/unicode/Is/BidiPDF.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 202c diff --git a/lib/unicode/Is/BidiR.pl b/lib/unicode/Is/BidiR.pl index fccc1f6d6e..9f776ae53f 100644 --- a/lib/unicode/Is/BidiR.pl +++ b/lib/unicode/Is/BidiR.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 05be diff --git a/lib/unicode/Is/BidiRLE.pl b/lib/unicode/Is/BidiRLE.pl index d789246ddb..eaf7ab7a23 100644 --- a/lib/unicode/Is/BidiRLE.pl +++ b/lib/unicode/Is/BidiRLE.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 202b diff --git a/lib/unicode/Is/BidiRLO.pl b/lib/unicode/Is/BidiRLO.pl index fcb81acc93..8974dd955a 100644 --- a/lib/unicode/Is/BidiRLO.pl +++ b/lib/unicode/Is/BidiRLO.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 202e diff --git a/lib/unicode/Is/BidiS.pl b/lib/unicode/Is/BidiS.pl index b28b3310ea..ac2655d6ed 100644 --- a/lib/unicode/Is/BidiS.pl +++ b/lib/unicode/Is/BidiS.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 0009 diff --git a/lib/unicode/Is/BidiWS.pl b/lib/unicode/Is/BidiWS.pl index 25d8b8f6aa..ebd24e546e 100644 --- a/lib/unicode/Is/BidiWS.pl +++ b/lib/unicode/Is/BidiWS.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 000c diff --git a/lib/unicode/Is/C.pl b/lib/unicode/Is/C.pl index 0db83c4bf3..51e4ede067 100644 --- a/lib/unicode/Is/C.pl +++ b/lib/unicode/Is/C.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 0000 001f @@ -15,4 +15,6 @@ dc00 dfff e000 f8ff feff fff9 fffb +f0000 ffffd +100000 10fffd END diff --git a/lib/unicode/Is/Cc.pl b/lib/unicode/Is/Cc.pl index d7184e3151..6b97adc9eb 100644 --- a/lib/unicode/Is/Cc.pl +++ b/lib/unicode/Is/Cc.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 0000 001f diff --git a/lib/unicode/Is/Cf.pl b/lib/unicode/Is/Cf.pl index 896c3e6cd6..cef5ac448b 100644 --- a/lib/unicode/Is/Cf.pl +++ b/lib/unicode/Is/Cf.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 070f diff --git a/lib/unicode/Is/Cn.pl b/lib/unicode/Is/Cn.pl index 3c686154c1..fb75e8769c 100644 --- a/lib/unicode/Is/Cn.pl +++ b/lib/unicode/Is/Cn.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 0220 0221 @@ -356,4 +356,18 @@ ffd8 ffd9 ffdd ffdf ffe7 ffef fff8 +10000 1fffd +20000 2fffd +30000 3fffd +40000 4fffd +50000 5fffd +60000 6fffd +70000 7fffd +80000 8fffd +90000 9fffd +a0000 afffd +b0000 bfffd +c0000 cfffd +d0000 dfffd +e0000 efffd END diff --git a/lib/unicode/Is/Cntrl.pl b/lib/unicode/Is/Cntrl.pl index 0db83c4bf3..51e4ede067 100644 --- a/lib/unicode/Is/Cntrl.pl +++ b/lib/unicode/Is/Cntrl.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 0000 001f @@ -15,4 +15,6 @@ dc00 dfff e000 f8ff feff fff9 fffb +f0000 ffffd +100000 10fffd END diff --git a/lib/unicode/Is/Co.pl b/lib/unicode/Is/Co.pl index c456d33aea..d077fd2bd3 100644 --- a/lib/unicode/Is/Co.pl +++ b/lib/unicode/Is/Co.pl @@ -1,6 +1,8 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; e000 f8ff +f0000 ffffd +100000 10fffd END diff --git a/lib/unicode/Is/Cs.pl b/lib/unicode/Is/Cs.pl index 8888fb5f3c..33e1daca88 100644 --- a/lib/unicode/Is/Cs.pl +++ b/lib/unicode/Is/Cs.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; d800 db7f diff --git a/lib/unicode/Is/DCcircle.pl b/lib/unicode/Is/DCcircle.pl index 4c47b28b26..82c9edcd2b 100644 --- a/lib/unicode/Is/DCcircle.pl +++ b/lib/unicode/Is/DCcircle.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 2460 2473 diff --git a/lib/unicode/Is/DCcompat.pl b/lib/unicode/Is/DCcompat.pl index 75d25695f3..5ae2b6a9f4 100644 --- a/lib/unicode/Is/DCcompat.pl +++ b/lib/unicode/Is/DCcompat.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 00a8 diff --git a/lib/unicode/Is/DCfinal.pl b/lib/unicode/Is/DCfinal.pl index 33fbf6aff8..3c81bcc6c4 100644 --- a/lib/unicode/Is/DCfinal.pl +++ b/lib/unicode/Is/DCfinal.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; fb51 diff --git a/lib/unicode/Is/DCfont.pl b/lib/unicode/Is/DCfont.pl index c72234b3bf..7feff18b22 100644 --- a/lib/unicode/Is/DCfont.pl +++ b/lib/unicode/Is/DCfont.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 2102 diff --git a/lib/unicode/Is/DCfraction.pl b/lib/unicode/Is/DCfraction.pl index fc2dd6755d..abac9d9cfa 100644 --- a/lib/unicode/Is/DCfraction.pl +++ b/lib/unicode/Is/DCfraction.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 00bc 00be diff --git a/lib/unicode/Is/DCinital.pl b/lib/unicode/Is/DCinital.pl index 2c9cf47e7d..8778a75ed5 100644 --- a/lib/unicode/Is/DCinital.pl +++ b/lib/unicode/Is/DCinital.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; fb55 diff --git a/lib/unicode/Is/DCinitial.pl b/lib/unicode/Is/DCinitial.pl index 0145b7dd71..c6d7802eaf 100644 --- a/lib/unicode/Is/DCinitial.pl +++ b/lib/unicode/Is/DCinitial.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; fb54 diff --git a/lib/unicode/Is/DCisolated.pl b/lib/unicode/Is/DCisolated.pl index cc8541eb7b..e4e24f786a 100644 --- a/lib/unicode/Is/DCisolated.pl +++ b/lib/unicode/Is/DCisolated.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; fb50 diff --git a/lib/unicode/Is/DCnarrow.pl b/lib/unicode/Is/DCnarrow.pl index 9417de1bbd..7887452105 100644 --- a/lib/unicode/Is/DCnarrow.pl +++ b/lib/unicode/Is/DCnarrow.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; ff61 ffbe diff --git a/lib/unicode/Is/DCnoBreak.pl b/lib/unicode/Is/DCnoBreak.pl index 1fd9e8735b..18c01059ed 100644 --- a/lib/unicode/Is/DCnoBreak.pl +++ b/lib/unicode/Is/DCnoBreak.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 00a0 diff --git a/lib/unicode/Is/DCsmall.pl b/lib/unicode/Is/DCsmall.pl index f6c8069163..3a37931b56 100644 --- a/lib/unicode/Is/DCsmall.pl +++ b/lib/unicode/Is/DCsmall.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; fe50 fe52 diff --git a/lib/unicode/Is/DCsquare.pl b/lib/unicode/Is/DCsquare.pl index b55fdd9c6a..f27993d6b8 100644 --- a/lib/unicode/Is/DCsquare.pl +++ b/lib/unicode/Is/DCsquare.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 3300 3357 diff --git a/lib/unicode/Is/DCsub.pl b/lib/unicode/Is/DCsub.pl index 98c4dfa87e..f709a228c2 100644 --- a/lib/unicode/Is/DCsub.pl +++ b/lib/unicode/Is/DCsub.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 2080 208e diff --git a/lib/unicode/Is/DCsuper.pl b/lib/unicode/Is/DCsuper.pl index 865a26dd92..1e6a0c5feb 100644 --- a/lib/unicode/Is/DCsuper.pl +++ b/lib/unicode/Is/DCsuper.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 00aa diff --git a/lib/unicode/Is/DCvertical.pl b/lib/unicode/Is/DCvertical.pl index 5d55483606..33b9feb724 100644 --- a/lib/unicode/Is/DCvertical.pl +++ b/lib/unicode/Is/DCvertical.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; fe30 fe44 diff --git a/lib/unicode/Is/DCwide.pl b/lib/unicode/Is/DCwide.pl index 09dae19629..afe1e06b7d 100644 --- a/lib/unicode/Is/DCwide.pl +++ b/lib/unicode/Is/DCwide.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 3000 diff --git a/lib/unicode/Is/DecoCanon.pl b/lib/unicode/Is/DecoCanon.pl index c5a59f6596..57c167b5f8 100644 --- a/lib/unicode/Is/DecoCanon.pl +++ b/lib/unicode/Is/DecoCanon.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 00c0 00c5 diff --git a/lib/unicode/Is/DecoCompat.pl b/lib/unicode/Is/DecoCompat.pl index 43d34fc110..940d956f9f 100644 --- a/lib/unicode/Is/DecoCompat.pl +++ b/lib/unicode/Is/DecoCompat.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 00a0 diff --git a/lib/unicode/Is/Digit.pl b/lib/unicode/Is/Digit.pl index 2ab8156d77..259bb891f6 100644 --- a/lib/unicode/Is/Digit.pl +++ b/lib/unicode/Is/Digit.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 0030 0039 diff --git a/lib/unicode/Is/Graph.pl b/lib/unicode/Is/Graph.pl index 156f1711af..40d35067f6 100644 --- a/lib/unicode/Is/Graph.pl +++ b/lib/unicode/Is/Graph.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 0021 007e diff --git a/lib/unicode/Is/L.pl b/lib/unicode/Is/L.pl index c32f83049c..bfe2c27412 100644 --- a/lib/unicode/Is/L.pl +++ b/lib/unicode/Is/L.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 0041 005a diff --git a/lib/unicode/Is/LbrkAI.pl b/lib/unicode/Is/LbrkAI.pl index a15f76014e..297f5cf1fa 100644 --- a/lib/unicode/Is/LbrkAI.pl +++ b/lib/unicode/Is/LbrkAI.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 00a1 diff --git a/lib/unicode/Is/LbrkAL.pl b/lib/unicode/Is/LbrkAL.pl index c705dc8a3d..c1fc416662 100644 --- a/lib/unicode/Is/LbrkAL.pl +++ b/lib/unicode/Is/LbrkAL.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 0023 diff --git a/lib/unicode/Is/LbrkB2.pl b/lib/unicode/Is/LbrkB2.pl index 527e4c8977..a1410da25f 100644 --- a/lib/unicode/Is/LbrkB2.pl +++ b/lib/unicode/Is/LbrkB2.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 2014 diff --git a/lib/unicode/Is/LbrkBA.pl b/lib/unicode/Is/LbrkBA.pl index 053369bccc..93ec04efb4 100644 --- a/lib/unicode/Is/LbrkBA.pl +++ b/lib/unicode/Is/LbrkBA.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 0009 diff --git a/lib/unicode/Is/LbrkBB.pl b/lib/unicode/Is/LbrkBB.pl index 5d3952a25d..b9bc7e8e35 100644 --- a/lib/unicode/Is/LbrkBB.pl +++ b/lib/unicode/Is/LbrkBB.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 02c8 diff --git a/lib/unicode/Is/LbrkBK.pl b/lib/unicode/Is/LbrkBK.pl index 2c314fb0ba..17bb5514c5 100644 --- a/lib/unicode/Is/LbrkBK.pl +++ b/lib/unicode/Is/LbrkBK.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 000c diff --git a/lib/unicode/Is/LbrkCB.pl b/lib/unicode/Is/LbrkCB.pl index 95f8b803dc..8da9eba019 100644 --- a/lib/unicode/Is/LbrkCB.pl +++ b/lib/unicode/Is/LbrkCB.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; fffc diff --git a/lib/unicode/Is/LbrkCL.pl b/lib/unicode/Is/LbrkCL.pl index a73130a6bd..353c59490b 100644 --- a/lib/unicode/Is/LbrkCL.pl +++ b/lib/unicode/Is/LbrkCL.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 0029 diff --git a/lib/unicode/Is/LbrkCM.pl b/lib/unicode/Is/LbrkCM.pl index 3d0f3474f8..87c7833f51 100644 --- a/lib/unicode/Is/LbrkCM.pl +++ b/lib/unicode/Is/LbrkCM.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 0000 0008 diff --git a/lib/unicode/Is/LbrkCR.pl b/lib/unicode/Is/LbrkCR.pl index c61a527902..2548269791 100644 --- a/lib/unicode/Is/LbrkCR.pl +++ b/lib/unicode/Is/LbrkCR.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 000d diff --git a/lib/unicode/Is/LbrkEX.pl b/lib/unicode/Is/LbrkEX.pl index d847092dce..173bf58d55 100644 --- a/lib/unicode/Is/LbrkEX.pl +++ b/lib/unicode/Is/LbrkEX.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 0021 diff --git a/lib/unicode/Is/LbrkGL.pl b/lib/unicode/Is/LbrkGL.pl index b03a627c2b..49bb985c12 100644 --- a/lib/unicode/Is/LbrkGL.pl +++ b/lib/unicode/Is/LbrkGL.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 00a0 diff --git a/lib/unicode/Is/LbrkHY.pl b/lib/unicode/Is/LbrkHY.pl index 6989bc8631..71e5886869 100644 --- a/lib/unicode/Is/LbrkHY.pl +++ b/lib/unicode/Is/LbrkHY.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 002d diff --git a/lib/unicode/Is/LbrkID.pl b/lib/unicode/Is/LbrkID.pl index 0b1cc8453f..6e6100c191 100644 --- a/lib/unicode/Is/LbrkID.pl +++ b/lib/unicode/Is/LbrkID.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 1100 1159 diff --git a/lib/unicode/Is/LbrkIN.pl b/lib/unicode/Is/LbrkIN.pl index 825198d12c..1758673e4b 100644 --- a/lib/unicode/Is/LbrkIN.pl +++ b/lib/unicode/Is/LbrkIN.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 2024 2026 diff --git a/lib/unicode/Is/LbrkIS.pl b/lib/unicode/Is/LbrkIS.pl index afa01a8004..32dcfb0f5b 100644 --- a/lib/unicode/Is/LbrkIS.pl +++ b/lib/unicode/Is/LbrkIS.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 002c diff --git a/lib/unicode/Is/LbrkLF.pl b/lib/unicode/Is/LbrkLF.pl index dcb5490eb2..9b845aed6f 100644 --- a/lib/unicode/Is/LbrkLF.pl +++ b/lib/unicode/Is/LbrkLF.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 000a diff --git a/lib/unicode/Is/LbrkNS.pl b/lib/unicode/Is/LbrkNS.pl index af9f3371c9..b7ff279c1d 100644 --- a/lib/unicode/Is/LbrkNS.pl +++ b/lib/unicode/Is/LbrkNS.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 0e5a 0e5b diff --git a/lib/unicode/Is/LbrkNU.pl b/lib/unicode/Is/LbrkNU.pl index 5c55d221b4..eb51418f68 100644 --- a/lib/unicode/Is/LbrkNU.pl +++ b/lib/unicode/Is/LbrkNU.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 0030 0039 diff --git a/lib/unicode/Is/LbrkOP.pl b/lib/unicode/Is/LbrkOP.pl index a7dee379eb..90f18098b7 100644 --- a/lib/unicode/Is/LbrkOP.pl +++ b/lib/unicode/Is/LbrkOP.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 0028 diff --git a/lib/unicode/Is/LbrkPO.pl b/lib/unicode/Is/LbrkPO.pl index cdfb56ea17..07f7bf74d2 100644 --- a/lib/unicode/Is/LbrkPO.pl +++ b/lib/unicode/Is/LbrkPO.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 0025 diff --git a/lib/unicode/Is/LbrkPR.pl b/lib/unicode/Is/LbrkPR.pl index c2d20da481..03466c912a 100644 --- a/lib/unicode/Is/LbrkPR.pl +++ b/lib/unicode/Is/LbrkPR.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 0024 diff --git a/lib/unicode/Is/LbrkQU.pl b/lib/unicode/Is/LbrkQU.pl index 46a6ee3a07..35dac981c5 100644 --- a/lib/unicode/Is/LbrkQU.pl +++ b/lib/unicode/Is/LbrkQU.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 0022 diff --git a/lib/unicode/Is/LbrkSA.pl b/lib/unicode/Is/LbrkSA.pl index bae4ced946..4539e093d0 100644 --- a/lib/unicode/Is/LbrkSA.pl +++ b/lib/unicode/Is/LbrkSA.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 0e01 0e30 diff --git a/lib/unicode/Is/LbrkSG.pl b/lib/unicode/Is/LbrkSG.pl index 8888fb5f3c..33e1daca88 100644 --- a/lib/unicode/Is/LbrkSG.pl +++ b/lib/unicode/Is/LbrkSG.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; d800 db7f diff --git a/lib/unicode/Is/LbrkSP.pl b/lib/unicode/Is/LbrkSP.pl index e786a0c935..2153e128dc 100644 --- a/lib/unicode/Is/LbrkSP.pl +++ b/lib/unicode/Is/LbrkSP.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 0020 diff --git a/lib/unicode/Is/LbrkSY.pl b/lib/unicode/Is/LbrkSY.pl index d2a33aeacc..ce65fe1d85 100644 --- a/lib/unicode/Is/LbrkSY.pl +++ b/lib/unicode/Is/LbrkSY.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 002f diff --git a/lib/unicode/Is/LbrkXX.pl b/lib/unicode/Is/LbrkXX.pl index ec287c456a..c3b32ac61d 100644 --- a/lib/unicode/Is/LbrkXX.pl +++ b/lib/unicode/Is/LbrkXX.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; END diff --git a/lib/unicode/Is/LbrkZW.pl b/lib/unicode/Is/LbrkZW.pl index 96d8e99efc..63c9dcf489 100644 --- a/lib/unicode/Is/LbrkZW.pl +++ b/lib/unicode/Is/LbrkZW.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 200b diff --git a/lib/unicode/Is/Ll.pl b/lib/unicode/Is/Ll.pl index 28147943e8..03dafcc742 100644 --- a/lib/unicode/Is/Ll.pl +++ b/lib/unicode/Is/Ll.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 0061 007a diff --git a/lib/unicode/Is/Lm.pl b/lib/unicode/Is/Lm.pl index 4380afe18e..23a3c55d4a 100644 --- a/lib/unicode/Is/Lm.pl +++ b/lib/unicode/Is/Lm.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 02b0 02b8 diff --git a/lib/unicode/Is/Lo.pl b/lib/unicode/Is/Lo.pl index 78fab4cd0e..d82c6bbdaf 100644 --- a/lib/unicode/Is/Lo.pl +++ b/lib/unicode/Is/Lo.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 01bb diff --git a/lib/unicode/Is/Lower.pl b/lib/unicode/Is/Lower.pl index 28147943e8..03dafcc742 100644 --- a/lib/unicode/Is/Lower.pl +++ b/lib/unicode/Is/Lower.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 0061 007a diff --git a/lib/unicode/Is/Lt.pl b/lib/unicode/Is/Lt.pl index 809c37a1f2..b19755ca8e 100644 --- a/lib/unicode/Is/Lt.pl +++ b/lib/unicode/Is/Lt.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 01c5 diff --git a/lib/unicode/Is/Lu.pl b/lib/unicode/Is/Lu.pl index 8dde2742d0..07dee4834c 100644 --- a/lib/unicode/Is/Lu.pl +++ b/lib/unicode/Is/Lu.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 0041 005a diff --git a/lib/unicode/Is/M.pl b/lib/unicode/Is/M.pl index 9367775a82..e3ef7f3dfa 100644 --- a/lib/unicode/Is/M.pl +++ b/lib/unicode/Is/M.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 0300 034e diff --git a/lib/unicode/Is/Mc.pl b/lib/unicode/Is/Mc.pl index 937d8d4005..a76d66c9b3 100644 --- a/lib/unicode/Is/Mc.pl +++ b/lib/unicode/Is/Mc.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 0903 diff --git a/lib/unicode/Is/Me.pl b/lib/unicode/Is/Me.pl index 00f446d87d..23ef860d8e 100644 --- a/lib/unicode/Is/Me.pl +++ b/lib/unicode/Is/Me.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 0488 0489 diff --git a/lib/unicode/Is/Mirrored.pl b/lib/unicode/Is/Mirrored.pl index e2c55a6443..d324f506a7 100644 --- a/lib/unicode/Is/Mirrored.pl +++ b/lib/unicode/Is/Mirrored.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 0028 0029 diff --git a/lib/unicode/Is/Mn.pl b/lib/unicode/Is/Mn.pl index aba40afa57..803e038d97 100644 --- a/lib/unicode/Is/Mn.pl +++ b/lib/unicode/Is/Mn.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 0300 034e diff --git a/lib/unicode/Is/N.pl b/lib/unicode/Is/N.pl index 1291f2713f..8667e774b4 100644 --- a/lib/unicode/Is/N.pl +++ b/lib/unicode/Is/N.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 0030 0039 diff --git a/lib/unicode/Is/Nd.pl b/lib/unicode/Is/Nd.pl index 2ab8156d77..259bb891f6 100644 --- a/lib/unicode/Is/Nd.pl +++ b/lib/unicode/Is/Nd.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 0030 0039 diff --git a/lib/unicode/Is/Nl.pl b/lib/unicode/Is/Nl.pl index 8f1af469bb..bdeefd5761 100644 --- a/lib/unicode/Is/Nl.pl +++ b/lib/unicode/Is/Nl.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 2160 2183 diff --git a/lib/unicode/Is/No.pl b/lib/unicode/Is/No.pl index 6a57dc5f89..13cac3b0e8 100644 --- a/lib/unicode/Is/No.pl +++ b/lib/unicode/Is/No.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 00b2 00b3 diff --git a/lib/unicode/Is/P.pl b/lib/unicode/Is/P.pl index 8fd1e8e183..97330ecd48 100644 --- a/lib/unicode/Is/P.pl +++ b/lib/unicode/Is/P.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 0021 0023 diff --git a/lib/unicode/Is/Pc.pl b/lib/unicode/Is/Pc.pl index 342efac344..e14874d011 100644 --- a/lib/unicode/Is/Pc.pl +++ b/lib/unicode/Is/Pc.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 005f diff --git a/lib/unicode/Is/Pd.pl b/lib/unicode/Is/Pd.pl index 58997ca7e9..b4a2ffbe8f 100644 --- a/lib/unicode/Is/Pd.pl +++ b/lib/unicode/Is/Pd.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 002d diff --git a/lib/unicode/Is/Pe.pl b/lib/unicode/Is/Pe.pl index 8879191c34..2b5bd3eeb9 100644 --- a/lib/unicode/Is/Pe.pl +++ b/lib/unicode/Is/Pe.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 0029 diff --git a/lib/unicode/Is/Pf.pl b/lib/unicode/Is/Pf.pl index 166c64bbb6..b27a4f6851 100644 --- a/lib/unicode/Is/Pf.pl +++ b/lib/unicode/Is/Pf.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 00bb diff --git a/lib/unicode/Is/Pi.pl b/lib/unicode/Is/Pi.pl index 7f2243d5d8..dbbae44957 100644 --- a/lib/unicode/Is/Pi.pl +++ b/lib/unicode/Is/Pi.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 00ab diff --git a/lib/unicode/Is/Po.pl b/lib/unicode/Is/Po.pl index e6b8b02520..849ee17867 100644 --- a/lib/unicode/Is/Po.pl +++ b/lib/unicode/Is/Po.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 0021 0023 diff --git a/lib/unicode/Is/Print.pl b/lib/unicode/Is/Print.pl index 9560586065..c3adba6c5c 100644 --- a/lib/unicode/Is/Print.pl +++ b/lib/unicode/Is/Print.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 0020 007e diff --git a/lib/unicode/Is/Ps.pl b/lib/unicode/Is/Ps.pl index a7dee379eb..90f18098b7 100644 --- a/lib/unicode/Is/Ps.pl +++ b/lib/unicode/Is/Ps.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 0028 diff --git a/lib/unicode/Is/Punct.pl b/lib/unicode/Is/Punct.pl index 9e8684d6fc..9e088bab85 100644 --- a/lib/unicode/Is/Punct.pl +++ b/lib/unicode/Is/Punct.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 0021 0023 diff --git a/lib/unicode/Is/S.pl b/lib/unicode/Is/S.pl index 8851766e9f..a304e17ff5 100644 --- a/lib/unicode/Is/S.pl +++ b/lib/unicode/Is/S.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 0024 diff --git a/lib/unicode/Is/Sc.pl b/lib/unicode/Is/Sc.pl index 5776bd6a57..adeb3e4336 100644 --- a/lib/unicode/Is/Sc.pl +++ b/lib/unicode/Is/Sc.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 0024 diff --git a/lib/unicode/Is/Sk.pl b/lib/unicode/Is/Sk.pl index b5f6e591a7..52f88ae004 100644 --- a/lib/unicode/Is/Sk.pl +++ b/lib/unicode/Is/Sk.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 005e diff --git a/lib/unicode/Is/Sm.pl b/lib/unicode/Is/Sm.pl index ae9424cc62..540da63e64 100644 --- a/lib/unicode/Is/Sm.pl +++ b/lib/unicode/Is/Sm.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 002b diff --git a/lib/unicode/Is/So.pl b/lib/unicode/Is/So.pl index 4e9dfc2b5e..3caf617b66 100644 --- a/lib/unicode/Is/So.pl +++ b/lib/unicode/Is/So.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 00a6 00a7 diff --git a/lib/unicode/Is/Space.pl b/lib/unicode/Is/Space.pl index 701329ff82..1625dce03b 100644 --- a/lib/unicode/Is/Space.pl +++ b/lib/unicode/Is/Space.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 0009 000d diff --git a/lib/unicode/Is/SylA.pl b/lib/unicode/Is/SylA.pl index be1107822d..6a3fc47eb9 100644 --- a/lib/unicode/Is/SylA.pl +++ b/lib/unicode/Is/SylA.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 1203 diff --git a/lib/unicode/Is/SylAA.pl b/lib/unicode/Is/SylAA.pl index 45d6692de7..6d1bd6dadd 100644 --- a/lib/unicode/Is/SylAA.pl +++ b/lib/unicode/Is/SylAA.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 140b diff --git a/lib/unicode/Is/SylAAI.pl b/lib/unicode/Is/SylAAI.pl index a8b03d4c6c..83134b6a16 100644 --- a/lib/unicode/Is/SylAAI.pl +++ b/lib/unicode/Is/SylAAI.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 1402 diff --git a/lib/unicode/Is/SylAI.pl b/lib/unicode/Is/SylAI.pl index b70d793bc6..e639bd0438 100644 --- a/lib/unicode/Is/SylAI.pl +++ b/lib/unicode/Is/SylAI.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 141c diff --git a/lib/unicode/Is/SylC.pl b/lib/unicode/Is/SylC.pl index e2a1601dd3..fb8b08e300 100644 --- a/lib/unicode/Is/SylC.pl +++ b/lib/unicode/Is/SylC.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 1205 diff --git a/lib/unicode/Is/SylE.pl b/lib/unicode/Is/SylE.pl index b3c3e60437..d762748c69 100644 --- a/lib/unicode/Is/SylE.pl +++ b/lib/unicode/Is/SylE.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 1204 diff --git a/lib/unicode/Is/SylEE.pl b/lib/unicode/Is/SylEE.pl index 0a22f78f65..9f8ff07dc1 100644 --- a/lib/unicode/Is/SylEE.pl +++ b/lib/unicode/Is/SylEE.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 1408 diff --git a/lib/unicode/Is/SylI.pl b/lib/unicode/Is/SylI.pl index f80790ce44..29bc70f4fa 100644 --- a/lib/unicode/Is/SylI.pl +++ b/lib/unicode/Is/SylI.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 1202 diff --git a/lib/unicode/Is/SylII.pl b/lib/unicode/Is/SylII.pl index 4516d7a32a..2dcd49f24e 100644 --- a/lib/unicode/Is/SylII.pl +++ b/lib/unicode/Is/SylII.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 1404 diff --git a/lib/unicode/Is/SylN.pl b/lib/unicode/Is/SylN.pl index 215463fb7f..d7d90c7a0d 100644 --- a/lib/unicode/Is/SylN.pl +++ b/lib/unicode/Is/SylN.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 3093 diff --git a/lib/unicode/Is/SylO.pl b/lib/unicode/Is/SylO.pl index a0a6f7dd01..2c795f0291 100644 --- a/lib/unicode/Is/SylO.pl +++ b/lib/unicode/Is/SylO.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 1206 diff --git a/lib/unicode/Is/SylOO.pl b/lib/unicode/Is/SylOO.pl index 12280534b1..27c8032275 100644 --- a/lib/unicode/Is/SylOO.pl +++ b/lib/unicode/Is/SylOO.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 1406 1407 diff --git a/lib/unicode/Is/SylU.pl b/lib/unicode/Is/SylU.pl index c458382f25..117d981ee6 100644 --- a/lib/unicode/Is/SylU.pl +++ b/lib/unicode/Is/SylU.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 1201 diff --git a/lib/unicode/Is/SylV.pl b/lib/unicode/Is/SylV.pl index b6e76f81b9..e5a39ed654 100644 --- a/lib/unicode/Is/SylV.pl +++ b/lib/unicode/Is/SylV.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 1200 diff --git a/lib/unicode/Is/SylWA.pl b/lib/unicode/Is/SylWA.pl index 9bb529ed01..39e94caabe 100644 --- a/lib/unicode/Is/SylWA.pl +++ b/lib/unicode/Is/SylWA.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 120f diff --git a/lib/unicode/Is/SylWAA.pl b/lib/unicode/Is/SylWAA.pl index 5f3b784d0c..cd560eb8a9 100644 --- a/lib/unicode/Is/SylWAA.pl +++ b/lib/unicode/Is/SylWAA.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 1419 141b diff --git a/lib/unicode/Is/SylWC.pl b/lib/unicode/Is/SylWC.pl index 3ad968c505..4272b8934f 100644 --- a/lib/unicode/Is/SylWC.pl +++ b/lib/unicode/Is/SylWC.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 124d diff --git a/lib/unicode/Is/SylWE.pl b/lib/unicode/Is/SylWE.pl index 9e32c0e602..c4c5ba99ae 100644 --- a/lib/unicode/Is/SylWE.pl +++ b/lib/unicode/Is/SylWE.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 124c diff --git a/lib/unicode/Is/SylWEE.pl b/lib/unicode/Is/SylWEE.pl index c4bccb5240..d3160290fe 100644 --- a/lib/unicode/Is/SylWEE.pl +++ b/lib/unicode/Is/SylWEE.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 15d9 diff --git a/lib/unicode/Is/SylWI.pl b/lib/unicode/Is/SylWI.pl index 4cd6c6789c..c914b07a56 100644 --- a/lib/unicode/Is/SylWI.pl +++ b/lib/unicode/Is/SylWI.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 124a diff --git a/lib/unicode/Is/SylWII.pl b/lib/unicode/Is/SylWII.pl index bd68aeadf5..c990e437e6 100644 --- a/lib/unicode/Is/SylWII.pl +++ b/lib/unicode/Is/SylWII.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 1410 1411 diff --git a/lib/unicode/Is/SylWO.pl b/lib/unicode/Is/SylWO.pl index 7676564130..a73cbdd0ec 100644 --- a/lib/unicode/Is/SylWO.pl +++ b/lib/unicode/Is/SylWO.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 1412 1413 diff --git a/lib/unicode/Is/SylWOO.pl b/lib/unicode/Is/SylWOO.pl index 0ab766a553..6e92f850a7 100644 --- a/lib/unicode/Is/SylWOO.pl +++ b/lib/unicode/Is/SylWOO.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 1414 1416 diff --git a/lib/unicode/Is/SylWU.pl b/lib/unicode/Is/SylWU.pl index 76af7aefad..d165f41d72 100644 --- a/lib/unicode/Is/SylWU.pl +++ b/lib/unicode/Is/SylWU.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 15d6 diff --git a/lib/unicode/Is/SylWV.pl b/lib/unicode/Is/SylWV.pl index 8bd8849042..6a06ae9087 100644 --- a/lib/unicode/Is/SylWV.pl +++ b/lib/unicode/Is/SylWV.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 1248 diff --git a/lib/unicode/Is/Upper.pl b/lib/unicode/Is/Upper.pl index 4fda655dc4..16f875241d 100644 --- a/lib/unicode/Is/Upper.pl +++ b/lib/unicode/Is/Upper.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 0041 005a diff --git a/lib/unicode/Is/Word.pl b/lib/unicode/Is/Word.pl index 2f13b382af..1c76c60b78 100644 --- a/lib/unicode/Is/Word.pl +++ b/lib/unicode/Is/Word.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 0030 0039 diff --git a/lib/unicode/Is/XDigit.pl b/lib/unicode/Is/XDigit.pl index e55682500b..b26a3b4074 100644 --- a/lib/unicode/Is/XDigit.pl +++ b/lib/unicode/Is/XDigit.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 0030 0039 diff --git a/lib/unicode/Is/Z.pl b/lib/unicode/Is/Z.pl index 22a9792d4f..03416c0265 100644 --- a/lib/unicode/Is/Z.pl +++ b/lib/unicode/Is/Z.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 0020 diff --git a/lib/unicode/Is/Zl.pl b/lib/unicode/Is/Zl.pl index 0989e1d920..5f127ce33a 100644 --- a/lib/unicode/Is/Zl.pl +++ b/lib/unicode/Is/Zl.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 2028 diff --git a/lib/unicode/Is/Zp.pl b/lib/unicode/Is/Zp.pl index 3b23446fe9..4e38303e72 100644 --- a/lib/unicode/Is/Zp.pl +++ b/lib/unicode/Is/Zp.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 2029 diff --git a/lib/unicode/Is/Zs.pl b/lib/unicode/Is/Zs.pl index db18055ea4..56cf9e4662 100644 --- a/lib/unicode/Is/Zs.pl +++ b/lib/unicode/Is/Zs.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 0020 diff --git a/lib/unicode/Jamo.txt b/lib/unicode/Jamo.txt index 6910ab924e..ea288f0397 100644 --- a/lib/unicode/Jamo.txt +++ b/lib/unicode/Jamo.txt @@ -1,69 +1,91 @@ -#Value; Short Name; Unicode Name -U+1100; G; HANGUL CHOSEONG KIYEOK -U+1101; GG; HANGUL CHOSEONG SSANGKIYEOK -U+1102; N; HANGUL CHOSEONG NIEUN -U+1103; D; HANGUL CHOSEONG TIKEUT -U+1104; DD; HANGUL CHOSEONG SSANGTIKEUT -U+1105; R; HANGUL CHOSEONG RIEUL -U+1106; M; HANGUL CHOSEONG MIEUM -U+1107; B; HANGUL CHOSEONG PIEUP -U+1108; BB; HANGUL CHOSEONG SSANGPIEUP -U+1109; S; HANGUL CHOSEONG SIOS -U+110A; SS; HANGUL CHOSEONG SSANGSIOS -U+110B; ; HANGUL CHOSEONG IEUNG -U+110C; J; HANGUL CHOSEONG CIEUC -U+110D; JJ; HANGUL CHOSEONG SSANGCIEUC -U+110E; C; HANGUL CHOSEONG CHIEUCH -U+110F; K; HANGUL CHOSEONG KHIEUKH -U+1110; T; HANGUL CHOSEONG THIEUTH -U+1111; P; HANGUL CHOSEONG PHIEUPH -U+1112; H; HANGUL CHOSEONG HIEUH -U+1161; A; HANGUL JUNGSEONG A -U+1162; AE; HANGUL JUNGSEONG AE -U+1163; YA; HANGUL JUNGSEONG YA -U+1164; YAE; HANGUL JUNGSEONG YAE -U+1165; EO; HANGUL JUNGSEONG EO -U+1166; E; HANGUL JUNGSEONG E -U+1167; YEO; HANGUL JUNGSEONG YEO -U+1168; YE; HANGUL JUNGSEONG YE -U+1169; O; HANGUL JUNGSEONG O -U+116A; WA; HANGUL JUNGSEONG WA -U+116B; WAE; HANGUL JUNGSEONG WAE -U+116C; OE; HANGUL JUNGSEONG OE -U+116D; YO; HANGUL JUNGSEONG YO -U+116E; U; HANGUL JUNGSEONG U -U+116F; WEO; HANGUL JUNGSEONG WEO -U+1170; WE; HANGUL JUNGSEONG WE -U+1171; WI; HANGUL JUNGSEONG WI -U+1172; YU; HANGUL JUNGSEONG YU -U+1173; EU; HANGUL JUNGSEONG EU -U+1174; YI; HANGUL JUNGSEONG YI -U+1175; I; HANGUL JUNGSEONG I -U+11A8; G; HANGUL JONGSEONG KIYEOK -U+11A9; GG; HANGUL JONGSEONG SSANGKIYEOK -U+11AA; GS; HANGUL JONGSEONG KIYEOK-SIOS -U+11AB; N; HANGUL JONGSEONG NIEUN -U+11AC; NJ; HANGUL JONGSEONG NIEUN-CIEUC -U+11AD; NH; HANGUL JONGSEONG NIEUN-HIEUH -U+11AE; D; HANGUL JONGSEONG TIKEUT -U+11AF; L; HANGUL JONGSEONG RIEUL -U+11B0; LG; HANGUL JONGSEONG RIEUL-KIYEOK -U+11B1; LM; HANGUL JONGSEONG RIEUL-MIEUM -U+11B2; LB; HANGUL JONGSEONG RIEUL-PIEUP -U+11B3; LS; HANGUL JONGSEONG RIEUL-SIOS -U+11B4; LT; HANGUL JONGSEONG RIEUL-THIEUTH -U+11B5; LP; HANGUL JONGSEONG RIEUL-PHIEUPH -U+11B6; LH; HANGUL JONGSEONG RIEUL-HIEUH -U+11B7; M; HANGUL JONGSEONG MIEUM -U+11B8; B; HANGUL JONGSEONG PIEUP -U+11B9; BS; HANGUL JONGSEONG PIEUP-SIOS -U+11BA; S; HANGUL JONGSEONG SIOS -U+11BB; SS; HANGUL JONGSEONG SSANGSIOS -U+11BC; NG; HANGUL JONGSEONG IEUNG -U+11BD; J; HANGUL JONGSEONG CIEUC -U+11BE; C; HANGUL JONGSEONG CHIEUCH -U+11BF; K; HANGUL JONGSEONG KHIEUKH -U+11C0; T; HANGUL JONGSEONG THIEUTH -U+11C1; P; HANGUL JONGSEONG PHIEUPH -U+11C2; H; HANGUL JONGSEONG HIEUH +# Jamo-3.txt +# +# This file is a normative contributory data file in the +# Unicode Character Database. +# +# This file defines the Jamo Short Name property, repeating +# in machine readable form the information printed in Table 4-4 +# of The Unicode Standard, Version 3.0. +# +# See sections 3.11 and 4.4 of The Unicode Standard, Version 3.0 +# for more information. +# +# Each line contains two fields, separated by a semicolon. +# +# The first field gives the code point, in 4-digit hexadecimal +# form, of a combining jamo character that participates in +# the algorithmic determination Hangul syllable character names. +# The second field gives the Jamo Short Name as a one-, two-, +# or three-character ASCII string (or in one case, for U+110B, +# the null string). +# +# ############################################################# + +1100; G # HANGUL CHOSEONG KIYEOK +1101; GG # HANGUL CHOSEONG SSANGKIYEOK +1102; N # HANGUL CHOSEONG NIEUN +1103; D # HANGUL CHOSEONG TIKEUT +1104; DD # HANGUL CHOSEONG SSANGTIKEUT +1105; R # HANGUL CHOSEONG RIEUL +1106; M # HANGUL CHOSEONG MIEUM +1107; B # HANGUL CHOSEONG PIEUP +1108; BB # HANGUL CHOSEONG SSANGPIEUP +1109; S # HANGUL CHOSEONG SIOS +110A; SS # HANGUL CHOSEONG SSANGSIOS +110B; # HANGUL CHOSEONG IEUNG +110C; J # HANGUL CHOSEONG CIEUC +110D; JJ # HANGUL CHOSEONG SSANGCIEUC +110E; C # HANGUL CHOSEONG CHIEUCH +110F; K # HANGUL CHOSEONG KHIEUKH +1110; T # HANGUL CHOSEONG THIEUTH +1111; P # HANGUL CHOSEONG PHIEUPH +1112; H # HANGUL CHOSEONG HIEUH +1161; A # HANGUL JUNGSEONG A +1162; AE # HANGUL JUNGSEONG AE +1163; YA # HANGUL JUNGSEONG YA +1164; YAE # HANGUL JUNGSEONG YAE +1165; EO # HANGUL JUNGSEONG EO +1166; E # HANGUL JUNGSEONG E +1167; YEO # HANGUL JUNGSEONG YEO +1168; YE # HANGUL JUNGSEONG YE +1169; O # HANGUL JUNGSEONG O +116A; WA # HANGUL JUNGSEONG WA +116B; WAE # HANGUL JUNGSEONG WAE +116C; OE # HANGUL JUNGSEONG OE +116D; YO # HANGUL JUNGSEONG YO +116E; U # HANGUL JUNGSEONG U +116F; WEO # HANGUL JUNGSEONG WEO +1170; WE # HANGUL JUNGSEONG WE +1171; WI # HANGUL JUNGSEONG WI +1172; YU # HANGUL JUNGSEONG YU +1173; EU # HANGUL JUNGSEONG EU +1174; YI # HANGUL JUNGSEONG YI +1175; I # HANGUL JUNGSEONG I +11A8; G # HANGUL JONGSEONG KIYEOK +11A9; GG # HANGUL JONGSEONG SSANGKIYEOK +11AA; GS # HANGUL JONGSEONG KIYEOK-SIOS +11AB; N # HANGUL JONGSEONG NIEUN +11AC; NJ # HANGUL JONGSEONG NIEUN-CIEUC +11AD; NH # HANGUL JONGSEONG NIEUN-HIEUH +11AE; D # HANGUL JONGSEONG TIKEUT +11AF; L # HANGUL JONGSEONG RIEUL +11B0; LG # HANGUL JONGSEONG RIEUL-KIYEOK +11B1; LM # HANGUL JONGSEONG RIEUL-MIEUM +11B2; LB # HANGUL JONGSEONG RIEUL-PIEUP +11B3; LS # HANGUL JONGSEONG RIEUL-SIOS +11B4; LT # HANGUL JONGSEONG RIEUL-THIEUTH +11B5; LP # HANGUL JONGSEONG RIEUL-PHIEUPH +11B6; LH # HANGUL JONGSEONG RIEUL-HIEUH +11B7; M # HANGUL JONGSEONG MIEUM +11B8; B # HANGUL JONGSEONG PIEUP +11B9; BS # HANGUL JONGSEONG PIEUP-SIOS +11BA; S # HANGUL JONGSEONG SIOS +11BB; SS # HANGUL JONGSEONG SSANGSIOS +11BC; NG # HANGUL JONGSEONG IEUNG +11BD; J # HANGUL JONGSEONG CIEUC +11BE; C # HANGUL JONGSEONG CHIEUCH +11BF; K # HANGUL JONGSEONG KHIEUKH +11C0; T # HANGUL JONGSEONG THIEUTH +11C1; P # HANGUL JONGSEONG PHIEUPH +11C2; H # HANGUL JONGSEONG HIEUH diff --git a/lib/unicode/JamoShort.pl b/lib/unicode/JamoShort.pl index 760bcba03e..19cd4290c6 100644 --- a/lib/unicode/JamoShort.pl +++ b/lib/unicode/JamoShort.pl @@ -1,72 +1,72 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; -1100 G -1101 GG -1102 N -1103 D -1104 DD -1105 R -1106 M -1107 B -1108 BB -1109 S -110a SS -110b -110c J -110d JJ -110e C -110f K -1110 T -1111 P -1112 H -1161 A -1162 AE -1163 YA -1164 YAE -1165 EO -1166 E -1167 YEO -1168 YE -1169 O -116a WA -116b WAE -116c OE -116d YO -116e U -116f WEO -1170 WE -1171 WI -1172 YU -1173 EU -1174 YI -1175 I -11a8 G -11a9 GG -11aa GS -11ab N -11ac NJ -11ad NH -11ae D -11af L -11b0 LG -11b1 LM -11b2 LB -11b3 LS -11b4 LT -11b5 LP -11b6 LH -11b7 M -11b8 B -11b9 BS -11ba S -11bb SS -11bc NG -11bd J -11be C -11bf K -11c0 T -11c1 P -11c2 H +1100 G # HANGUL CHOSEONG KIYEOK +1101 GG # HANGUL CHOSEONG SSANGKIYEOK +1102 N # HANGUL CHOSEONG NIEUN +1103 D # HANGUL CHOSEONG TIKEUT +1104 DD # HANGUL CHOSEONG SSANGTIKEUT +1105 R # HANGUL CHOSEONG RIEUL +1106 M # HANGUL CHOSEONG MIEUM +1107 B # HANGUL CHOSEONG PIEUP +1108 BB # HANGUL CHOSEONG SSANGPIEUP +1109 S # HANGUL CHOSEONG SIOS +110a SS # HANGUL CHOSEONG SSANGSIOS +110b # HANGUL CHOSEONG IEUNG +110c J # HANGUL CHOSEONG CIEUC +110d JJ # HANGUL CHOSEONG SSANGCIEUC +110e C # HANGUL CHOSEONG CHIEUCH +110f K # HANGUL CHOSEONG KHIEUKH +1110 T # HANGUL CHOSEONG THIEUTH +1111 P # HANGUL CHOSEONG PHIEUPH +1112 H # HANGUL CHOSEONG HIEUH +1161 A # HANGUL JUNGSEONG A +1162 AE # HANGUL JUNGSEONG AE +1163 YA # HANGUL JUNGSEONG YA +1164 YAE # HANGUL JUNGSEONG YAE +1165 EO # HANGUL JUNGSEONG EO +1166 E # HANGUL JUNGSEONG E +1167 YEO # HANGUL JUNGSEONG YEO +1168 YE # HANGUL JUNGSEONG YE +1169 O # HANGUL JUNGSEONG O +116a WA # HANGUL JUNGSEONG WA +116b WAE # HANGUL JUNGSEONG WAE +116c OE # HANGUL JUNGSEONG OE +116d YO # HANGUL JUNGSEONG YO +116e U # HANGUL JUNGSEONG U +116f WEO # HANGUL JUNGSEONG WEO +1170 WE # HANGUL JUNGSEONG WE +1171 WI # HANGUL JUNGSEONG WI +1172 YU # HANGUL JUNGSEONG YU +1173 EU # HANGUL JUNGSEONG EU +1174 YI # HANGUL JUNGSEONG YI +1175 I # HANGUL JUNGSEONG I +11a8 G # HANGUL JONGSEONG KIYEOK +11a9 GG # HANGUL JONGSEONG SSANGKIYEOK +11aa GS # HANGUL JONGSEONG KIYEOK-SIOS +11ab N # HANGUL JONGSEONG NIEUN +11ac NJ # HANGUL JONGSEONG NIEUN-CIEUC +11ad NH # HANGUL JONGSEONG NIEUN-HIEUH +11ae D # HANGUL JONGSEONG TIKEUT +11af L # HANGUL JONGSEONG RIEUL +11b0 LG # HANGUL JONGSEONG RIEUL-KIYEOK +11b1 LM # HANGUL JONGSEONG RIEUL-MIEUM +11b2 LB # HANGUL JONGSEONG RIEUL-PIEUP +11b3 LS # HANGUL JONGSEONG RIEUL-SIOS +11b4 LT # HANGUL JONGSEONG RIEUL-THIEUTH +11b5 LP # HANGUL JONGSEONG RIEUL-PHIEUPH +11b6 LH # HANGUL JONGSEONG RIEUL-HIEUH +11b7 M # HANGUL JONGSEONG MIEUM +11b8 B # HANGUL JONGSEONG PIEUP +11b9 BS # HANGUL JONGSEONG PIEUP-SIOS +11ba S # HANGUL JONGSEONG SIOS +11bb SS # HANGUL JONGSEONG SSANGSIOS +11bc NG # HANGUL JONGSEONG IEUNG +11bd J # HANGUL JONGSEONG CIEUC +11be C # HANGUL JONGSEONG CHIEUCH +11bf K # HANGUL JONGSEONG KHIEUKH +11c0 T # HANGUL JONGSEONG THIEUTH +11c1 P # HANGUL JONGSEONG PHIEUPH +11c2 H # HANGUL JONGSEONG HIEUH END diff --git a/lib/unicode/Name.pl b/lib/unicode/Name.pl index ef8979f0d1..f5c4c56f21 100644 --- a/lib/unicode/Name.pl +++ b/lib/unicode/Name.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 0000 001f <control> @@ -10549,4 +10549,6 @@ fffa INTERLINEAR ANNOTATION SEPARATOR fffb INTERLINEAR ANNOTATION TERMINATOR fffc OBJECT REPLACEMENT CHARACTER fffd REPLACEMENT CHARACTER +f0000 ffffd <Plane 15 Private Use, First> +100000 10fffd <Plane 16 Private Use, First> END diff --git a/lib/unicode/Number.pl b/lib/unicode/Number.pl index b9e596f88c..1f5c2c84c7 100644 --- a/lib/unicode/Number.pl +++ b/lib/unicode/Number.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 0030 0 @@ -165,6 +165,16 @@ return <<'END'; 0f27 7 0f28 8 0f29 9 +0f2a 1/2 +0f2b 3/2 +0f2c 5/2 +0f2d 7/2 +0f2e 9/2 +0f2f 11/2 +0f30 13/2 +0f31 15/2 +0f32 17/2 +0f33 -1/2 1040 0 1041 1 1042 2 @@ -385,6 +395,20 @@ return <<'END'; 3038 10 3039 20 303a 30 +3192 1 +3193 2 +3194 3 +3195 4 +3220 1 +3221 2 +3222 3 +3223 4 +3224 5 +3225 6 +3226 7 +3227 8 +3228 9 +3229 10 3280 1 3281 2 3282 3 diff --git a/lib/unicode/Props.txt b/lib/unicode/PropList.txt index b3548e164d..e47f127987 100644 --- a/lib/unicode/Props.txt +++ b/lib/unicode/PropList.txt @@ -1,4 +1,4 @@ -Property dump: UnicodeData-3.0.0.txt +Property dump: UnicodeData-3.0.1.txt ******************************************* @@ -192,89 +192,6 @@ FF61..FF65 (5 chars) ******************************************* -Property dump for: 0x00800000 (Delimiter) - -0000 -0009..000D (5 chars) -001C..0023 (8 chars) -0028..0029 (2 chars) -002C -002E..002F (2 chars) -003A..003B (2 chars) -003F -005B..005D (3 chars) -007B..007D (3 chars) -0085 -00A0..00A1 (2 chars) -00A6..00A7 (2 chars) -00AB -00B6 -00BB -00BF -037E -0387 -055C..055E (3 chars) -0589 -060C -061B -061F -066B..066C (2 chars) -06D4 -0700..0709 (10 chars) -070B..070D (3 chars) -0964..0965 (2 chars) -0DF4 -0E5A..0E5B (2 chars) -0F0B -0F0D..0F12 (6 chars) -0F3A..0F3D (4 chars) -104A..104B (2 chars) -10FB -1361..1368 (8 chars) -166D..166E (2 chars) -1680 -169B..169C (2 chars) -16EB..16ED (3 chars) -17D4..17D6 (3 chars) -17DA -1802..1805 (4 chars) -1808..1809 (2 chars) -2000..200B (12 chars) -2016 -2018..201F (8 chars) -2028..2029 (2 chars) -202F -2039..203A (2 chars) -203C..203D (2 chars) -2045..2046 (2 chars) -2048..2049 (2 chars) -207D..207E (2 chars) -208D..208E (2 chars) -2329..232A (2 chars) -3000..3002 (3 chars) -3008..3011 (10 chars) -3014..301B (8 chars) -301D..301F (3 chars) -30FB -FD3E..FD3F (2 chars) -FE35..FE44 (16 chars) -FE50..FE52 (3 chars) -FE54..FE57 (4 chars) -FE59..FE5F (7 chars) -FE68 -FF01..FF03 (3 chars) -FF08..FF09 (2 chars) -FF0C -FF0E..FF0F (2 chars) -FF1A..FF1B (2 chars) -FF1F -FF3B..FF3D (3 chars) -FF5B..FF5D (3 chars) -FF61..FF65 (5 chars) -FFE4 - -******************************************* - Property dump for: 0x80000003 (Line Separator) 2028 @@ -591,7 +508,6 @@ Property dump for: 0x20000004 (Combining) 20D0..20E3 (20 chars) 302A..302F (6 chars) 3099..309A (2 chars) -F8F0..F8FF (16 chars) FB1E FE20..FE23 (4 chars) @@ -689,7 +605,6 @@ Property dump for: 0x20040000 (Non-spacing) 20D0..20E3 (20 chars) 302A..302F (6 chars) 3099..309A (2 chars) -F8F0..F8FF (16 chars) FB1E FE20..FE23 (4 chars) @@ -1161,7 +1076,6 @@ Property dump for: 0x20000001 (Alphabetic) 0DF2..0DF3 (2 chars) 0E01..0E3A (58 chars) 0E40..0E45 (6 chars) -0E47 0E4D 0E81..0E82 (2 chars) 0E84 @@ -1355,7 +1269,7 @@ Property dump for: 0x20010000 (Diacritic) 0CCD 0D4D 0DCA -0E48..0E4C (5 chars) +0E47..0E4C (6 chars) 0E4E 0EC8..0ECC (5 chars) 0F18..0F19 (2 chars) @@ -2077,8 +1991,7 @@ Property dump for: 0x01000000 (Bidi: Left-to-Right) 4E00..9FA5 (20902 chars) A000..A48C (1165 chars) AC00..D7A3 (11172 chars) -D800..F7FF (8192 chars) -F900..FA2D (302 chars) +D800..FA2D (8750 chars) FB00..FB06 (7 chars) FB13..FB17 (5 chars) FF21..FF3A (26 chars) @@ -2088,6 +2001,8 @@ FFC2..FFC7 (6 chars) FFCA..FFCF (6 chars) FFD2..FFD7 (6 chars) FFDA..FFDC (3 chars) +F0000..FFFFD (65534 chars) +100000..10FFFD (65534 chars) ******************************************* @@ -2331,7 +2246,6 @@ Property dump for: 0x0C000000 (Bidi: Non-spacing Mark) 20D0..20E3 (20 chars) 302A..302F (6 chars) 3099..309A (2 chars) -F8F0..F8FF (16 chars) FB1E FE20..FE23 (4 chars) @@ -2483,6 +2397,8 @@ FFFC..FFFD (2 chars) Property dump for: 0x80000005 (Private Use) E000..F8FF (6400 chars) +F0000..FFFFD (65534 chars) +100000..10FFFD (65534 chars) ******************************************* @@ -3281,6 +3197,28 @@ DB80..DBFF (128 chars) ******************************************* +Property dump for: 0x8000000A (Not a Character) + +FFFE..FFFF (2 chars) +1FFFE..1FFFF (2 chars) +2FFFE..2FFFF (2 chars) +3FFFE..3FFFF (2 chars) +4FFFE..4FFFF (2 chars) +5FFFE..5FFFF (2 chars) +6FFFE..6FFFF (2 chars) +7FFFE..7FFFF (2 chars) +8FFFE..8FFFF (2 chars) +9FFFE..9FFFF (2 chars) +AFFFE..AFFFF (2 chars) +BFFFE..BFFFF (2 chars) +CFFFE..CFFFF (2 chars) +DFFFE..DFFFF (2 chars) +EFFFE..EFFFF (2 chars) +FFFFE..FFFFF (2 chars) +10FFFE..10FFFF (2 chars) + +******************************************* + Property dump for: 0x00000000 (Unassigned Code Value) 0220..0221 (2 chars) @@ -3637,4 +3575,18 @@ FFD8..FFD9 (2 chars) FFDD..FFDF (3 chars) FFE7 FFEF..FFF8 (10 chars) +10000..1FFFD (65534 chars) +20000..2FFFD (65534 chars) +30000..3FFFD (65534 chars) +40000..4FFFD (65534 chars) +50000..5FFFD (65534 chars) +60000..6FFFD (65534 chars) +70000..7FFFD (65534 chars) +80000..8FFFD (65534 chars) +90000..9FFFD (65534 chars) +A0000..AFFFD (65534 chars) +B0000..BFFFD (65534 chars) +C0000..CFFFD (65534 chars) +D0000..DFFFD (65534 chars) +E0000..EFFFD (65534 chars) diff --git a/lib/unicode/README.perl b/lib/unicode/README.perl new file mode 100644 index 0000000000..2294bd3cc4 --- /dev/null +++ b/lib/unicode/README.perl @@ -0,0 +1,37 @@ +The *.txt files were copied 30 Aug 2000 from + + http://www.unicode.org/Public/UNIDATA/ + +and most of them were renamed to better fit 8.3 filename limitations, +by which the Perl distribution tries to live. + + www.unicode.org Perl distribution + + ArabicShaping.txt ArabShap.txt + BidiMirroring.txt BidiMirr.txt + Blocks.txt Blocks.txt + CaseFolding.txt CaseFold.txt + CompositionExclusions.txt CompExcl.txt + EastAsianWidth.txt EAWidth.txt (0) + Index.txt Index.txt + Jamo.txt Jamo.txt + LineBreak.txt LineBrk.txt (0) + NamesList.html NamesList.html (0) + NamesList.txt Names.txt + PropList.txt PropList.txt + ReadMe.txt ReadMe.txt + SpecialCasing.txt SpecCase.txt + UnicodeCharacterDatabase.html UCD301.html + UnicodeData.html UCDFF301.html + UnicodeData.txt Unicode.301 + +The two big files, NormalizationTest.txt (1.7MB) and Unihan.txt (15.8MB) +were not copied for space considerations. The files marked with (0) had +not been updated since Unicode 3.0.0 (10 Sep 1999) + +The *.pl files are generated from these files by the 'mktables.PL' script. + +While the files have been renamed the links in the html files haven't. + +-- +jhi@iki.fi diff --git a/lib/unicode/ReadMe.txt b/lib/unicode/ReadMe.txt index c2c4aee6a5..b8a643ca27 100644 --- a/lib/unicode/ReadMe.txt +++ b/lib/unicode/ReadMe.txt @@ -1,45 +1,13 @@ -June 23, 1999 +August 30, 2000 -This directory contains the initial release for Unicode 3.0. +This directory contains the first update release for Unicode 3.0. This release consists of corrections and additions to the -Unicode Character Database, to match the publication of -The Unicode Standard, Version 3.0. +Unicode Character Database for the Unicode Standard, +Version 3.0.1. Detailed documentation of the files constituting the Unicode Character Database (contributory data files for the standard itself) can now be found in UnicodeCharacterDatabase.html. --------------------------------------------------------------------------- -NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE - -The files have been copied from - - ftp://ftp.unicode.org/Public/3.0-Update/ - -and most of them have been renamed to better fit 8.3 filename limitations. - -long name at unicode.org short name latest '#' ------------------------- ---------- ---------- -ArabicShaping-#.txt ArabShap.txt 2 -Blocks-#.txt Blocks.txt 3 -CompositionExclusions-#.txt CompExcl.txt 1 -EastAsianWidth-#.txt EAWidth.txt 3 -Index-#.txt Index.txt 3.0.0 -Jamo-#.txt Jamo.txt 2 -LineBreak-#.txt LineBrk.txt 5 -NamesList-#.txt Names.txt 3.0.0 -NamesList-#.html NamesList.html 1 -PropList-#.txt Props.txt 3.0.0 -SpecialCasing-#.txt SpecCase.txt 2 -UnicodeData-#.txt Unicode.300 3.0.0 -UnicodeData-#.html Unicode3.html 3.0.0 -UnicodeCharacterDatabase-#.html UCD300.html 3.0.0 - -The *.pl files are generated from these files by the 'mktables.PL' script. - -While the files have been renamed the links in the html files haven't. - --- -jhi@iki.fi diff --git a/lib/unicode/SpecCase.txt b/lib/unicode/SpecCase.txt index af002ef4cf..94662d384f 100644 --- a/lib/unicode/SpecCase.txt +++ b/lib/unicode/SpecCase.txt @@ -1,4 +1,4 @@ -# SpecialCasing-2.txt +# SpecialCasing-3.txt # # Special Casing Properties # @@ -26,26 +26,33 @@ # <upper> := <code_point_list> # <code_point_list> := <code_point> (<s>+ <code_point>)* # <code_point> := <hex><hex><hex><hex> -# <hex> := [0-1A-Fa-f] +# <hex> := [0-9A-Fa-f] # <s> := <space> # -# <condition_list> := <locale>? (<s>+ <context>)* -# <locale> := <ISO_3166_code> ( "_" <ISO_639_code> )? ( "_" <variant> )? +# <condition_list> := <locale>? (<s>+ <context>)* <sep> +# <locale> := <ISO_639_code> ( "_" <ISO_3166_code> )? ( "_" <variant> )? # <ISO_3166_code> := 2-letter country code, # as in http://www.unicode.org/unicode/onlinedat/countries.html # <ISO_639_code> := 2-letter code, # as in http://www.unicode.org/unicode/onlinedat/languages.html -# <context> := "FINAL" | "NON_FINAL" | "MODERN" | "NON_MODERN" +# <context> := "FINAL" | "NON_FINAL" | "MODERN" | "NON_MODERN" | "AFTER_i" +# +# A condition list overrides the normal behavior if all of the listed conditions are true. +# Case distinctions in the condition list are not significant. # -# A condition list overrides the normal behavior if any of the listed conditions is true. # FINAL: The letter is not followed by a letter of category L* (e.g. Ll, Lt, Lu, Lm, or Lo). # MODERN: The mapping is only used for modern text. +# AFTER_i: The last base character was "i" 0069 +# # Conditions preceded by "NON_" represent the negation of the condition # # New contexts may be added in the future. -# Parsers of this file must be prepared to deal with that situation. # Additional whitespace around elements is optional. Blank lines are ignored in parsing. # On any line, all text following "#" is a comment, and are ignored in parsing. +# +# Parsers of this file must be prepared to deal future additions to this format: +# * Additional contexts +# * Additional fields # ================================================================================ # ================================================================================ @@ -76,7 +83,7 @@ FB17; FB17; 0544 056D; 0544 053D; # ARMENIAN SMALL LIGATURE MEN XEH # No corresponding uppercase precomposed character -0149; 0149; 02BC 006E; 02BC 004E; # LATIN SMALL LETTER N PRECEDED BY APOSTROPHE +0149; 0149; 02BC 004E; 02BC 004E; # LATIN SMALL LETTER N PRECEDED BY APOSTROPHE 0390; 0390; 0399 0308 0301; 0399 0308 0301; # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS 03B0; 03B0; 03A5 0308 0301; 03A5 0308 0301; # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS 01F0; 01F0; 004A 030C; 004A 030C; # LATIN SMALL LETTER J WITH CARON @@ -199,7 +206,7 @@ FB17; FB17; 0544 056D; 0544 053D; # ARMENIAN SMALL LIGATURE MEN XEH # 03C3; 03C3; 03A3; 03A3; # GREEK SMALL LETTER SIGMA # 03C2; 03C2; 03A3; 03A3; # GREEK SMALL LETTER FINAL SIGMA -# Note: the following cases are not included, since they would normalize in lowercasing +# Note: the following cases are not included, since they would case-fold in lowercasing # 03C3; 03C2; 03A3; 03A3; FINAL; # GREEK SMALL LETTER SIGMA # 03C2; 03C3; 03A3; 03A3; NON_FINAL; # GREEK SMALL LETTER FINAL SIGMA @@ -208,12 +215,16 @@ FB17; FB17; 0544 056D; 0544 053D; # ARMENIAN SMALL LIGATURE MEN XEH # Locale-sensitive mappings # ================================================================================ +# Lithuanian + +0307; 0307; ; ; lt AFTER_i; # Remove DOT ABOVE after "i" with upper or titlecase + # Turkish -0049; 0131; 0049; 0049; TR; # LATIN CAPITAL LETTER I -0069; 0069; 0130; 0130; TR; # LATIN SMALL LETTER I +0049; 0131; 0049; 0049; tr; # LATIN CAPITAL LETTER I +0069; 0069; 0130; 0130; tr; # LATIN SMALL LETTER I # Note: the following cases are already in the UnicodeData file. -# 0131; 0131; 0049; 0049; TR; # LATIN SMALL LETTER DOTLESS I -# 0130; 0069; 0130; 0130; TR; # LATIN CAPITAL LETTER I WITH DOT ABOVE +# 0131; 0131; 0049; 0049; tr; # LATIN SMALL LETTER DOTLESS I +# 0130; 0069; 0130; 0130; tr; # LATIN CAPITAL LETTER I WITH DOT ABOVE diff --git a/lib/unicode/To/Digit.pl b/lib/unicode/To/Digit.pl index a96bc1c1a6..4bace1e662 100644 --- a/lib/unicode/To/Digit.pl +++ b/lib/unicode/To/Digit.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 0030 0039 0000 diff --git a/lib/unicode/To/Lower.pl b/lib/unicode/To/Lower.pl index a78a7e4492..89755b7c3a 100644 --- a/lib/unicode/To/Lower.pl +++ b/lib/unicode/To/Lower.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 0041 005a 0061 diff --git a/lib/unicode/To/Title.pl b/lib/unicode/To/Title.pl index d8f5c048d4..cadeaf909b 100644 --- a/lib/unicode/To/Title.pl +++ b/lib/unicode/To/Title.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 0061 007a 0041 diff --git a/lib/unicode/To/Upper.pl b/lib/unicode/To/Upper.pl index 1fc7637753..d6c03d34bd 100644 --- a/lib/unicode/To/Upper.pl +++ b/lib/unicode/To/Upper.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 0061 007a 0041 diff --git a/lib/unicode/UCD300.html b/lib/unicode/UCD301.html index 113d311f01..284349e264 100644 --- a/lib/unicode/UCD300.html +++ b/lib/unicode/UCD301.html @@ -4,342 +4,198 @@ <html> - - <head> - <meta http-equiv="Content-Type" content="text/html; charset=utf-8"> - <meta http-equiv="Content-Language" content="en-us"> - <meta name="GENERATOR" content="Microsoft FrontPage 4.0"> - <meta name="ProgId" content="FrontPage.Editor.Document"> - <link rel="stylesheet" href="http://www.unicode.org/unicode.css" type="text/css"> - <title>Unicode Character Database</title> - </head> - - <body> - - -<h1>UNICODE CHARACTER DATABASE<br> -Version 3.0.0</h1> - +<h1>UNICODE CHARACTER DATABASE<br> +Version 3.0.1</h1> <table border="1" cellspacing="2" cellpadding="0" height="87" width="100%"> - <tr> - <td valign="TOP" width="144">Revision</td> - - <td valign="TOP">3.0.0</td> - + <td valign="TOP">3.0.1</td> </tr> - <tr> - <td valign="TOP" width="144">Authors</td> - <td valign="TOP">Mark Davis and Ken Whistler</td> - </tr> - <tr> - <td valign="TOP" width="144">Date</td> - - <td valign="TOP">1999-09-11</td> - + <td valign="TOP">2000-08-17</td> </tr> - <tr> - <td valign="TOP" width="144">This Version</td> - - <td valign="TOP"><a href="ftp://ftp.unicode.org/Public/3.0-Update/UnicodeCharacterDatabase-3.0.0.html">ftp://ftp.unicode.org/Public/3.0-Update/UnicodeCharacterDatabase-3.0.0.html</a></td> - + <td valign="TOP"><a + href="http://www.unicode.org/Public/3.0-Update1/UnicodeCharacterDatabase-3.0.1.html">http://www.unicode.org/Public/3.0-Update1/UnicodeCharacterDatabase-3.0.1.html</a></td> </tr> - <tr> - <td valign="TOP" width="144">Previous Version</td> - - <td valign="TOP">n/a</td> - + <td valign="TOP"><a + href="http://www.unicode.org/Public/3.0-Update/UnicodeCharacterDatabase-3.0.0.html">http://www.unicode.org/Public/3.0-Update/UnicodeCharacterDatabase-3.0.0.html</a></td> </tr> - <tr> - <td valign="TOP" width="144">Latest Version</td> - - <td valign="TOP"><a href="ftp://ftp.unicode.org/Public/3.0-Update/UnicodeCharacterDatabase-3.0.0.html">ftp://ftp.unicode.org/Public/3.0-Update/UnicodeCharacterDatabase-3.0.0.html</a></td> - + <td valign="TOP"><a + href="http://www.unicode.org/Public/UNIDATA/UnicodeCharacterDatabase.html">http://www.unicode.org/Public/UNIDATA/UnicodeCharacterDatabase.html</a></td> </tr> - </table> - -<p align="center">Copyright © 1995-1999 Unicode, Inc. All Rights reserved.</p> - -<h2>Disclaimer</h2> - -<p>The Unicode Character Database is provided as is by Unicode, Inc. No claims - -are made as to fitness for any particular purpose. No warranties of any kind are - -expressed or implied. The recipient agrees to determine applicability of - -information provided. If this file has been purchased on magnetic or optical - -media from Unicode, Inc., the sole remedy for any claim will be exchange of - -defective media within 90 days of receipt.</p> - -<p>This disclaimer is applicable for all other data files accompanying the - -Unicode Character Database, some of which have been compiled by the Unicode - -Consortium, and some of which have been supplied by other sources.</p> - -<h2>Limitations on Rights to Redistribute This Data</h2> - -<p>Recipient is granted the right to make copies in any form for internal - -distribution and to freely use the information supplied in the creation of - -products supporting the Unicode<sup>TM</sup> Standard. The files in the Unicode - -Character Database can be redistributed to third parties or other organizations - -(whether for profit or not) as long as this notice and the disclaimer notice are - -retained. Information can be extracted from these files and used in - -documentation or programs, as long as there is an accompanying notice indicating - -the source.</p> - -<h2>Introduction</h2> - -<p>The Unicode Character Database is a set of files that define the Unicode - -character properties and internal mappings. For more information about character - -properties and mappings, see <i><a href="http://www.unicode.org/unicode/uni2book/u2.html">The - -Unicode Standard</a></i>.</p> - -<p>The Unicode Character Database has been updated to reflect Version 3.0 of the - -Unicode Standard, with many characters added to those published in Version 2.0. - -A number of corrections have also been made to case mappings or other errors in - -the database noted since the publication of Version 2.0. Normative bidirectional - -properties have also been modified to reflect decisions of the Unicode Technical - -Committee.</p> - -<p>For more information on versions of the Unicode Standard and how to reference - -them, see <a href="http://www.unicode.org/unicode/standard/versions/">http://www.unicode.org/unicode/standard/versions/</a>.</p> - -<h2>Conformance</h2> - -<p>Character properties may be either normative or informative. <i>Normative</i> - -means that implementations that claim conformance to the Unicode Standard (at a - -particular version) and which make use of a particular property or field must - -follow the specifications of the standard for that property or field in order to - -be conformant. The term <i>normative</i> when applied to a property or field of - -the Unicode Character Database, does <i>not</i> mean that the value of that - -field will never change. Corrections and extensions to the standard in the - -future may require minor changes to normative values, even though the Unicode - -Technical Committee strives to minimize such changes. An<i> informative </i>property - -or field is strongly recommended, but a conformant implementation is free to use - -or change such values as it may require while still being conformant to the - -standard. Particular implementations may choose to override the properties and - -mappings that are not normative. In that case, it is up to the implementer to - -establish a protocol to convey that information.</p> - -<h2>Files</h2> - -<p>The following summarizes the files in the Unicode Character Database. For - -more information about these files, see the referenced technical report or - -section of Unicode Standard, Version 3.0.</p> - -<p><b>UnicodeData.txt (Chapter 4)</b> - -<ul> - - <li>The main file in the Unicode Character Database.</li> - - <li>For detailed information on the format, see <a href="UnicodeData.html">UnicodeData.html</a>. - - This file also characterizes which properties are normative and which are - - informative.</li> - -</ul> - -<p><b>PropList.txt (Chapter 4)</b> - -<ul> - - <li>Additional informative properties list: <i>Alphabetic, Ideographic,</i> - - and <i>Mathematical</i>, among others.</li> - -</ul> - -<p><b>SpecialCasing.txt (Chapter 4)</b> - -<ul> - - <li>List of informative special casing properties, including one-to-many - - mappings such as SHARP S => "SS", and locale-specific mappings, - - such as for Turkish <i>dotless i</i>.</li> - -</ul> - -<p><b>Blocks.txt (Chapter 14)</b> - -<ul> - - <li>List of normative block names.</li> - -</ul> - -<p><b>Jamo.txt (Chapter 4)</b> - -<ul> - - <li>List of normative Jamo short names, used in deriving HANGUL SYLLABLE names - - algorithmically.</li> - -</ul> - -<p><b>ArabicShaping.txt (Section 8.2)</b> - -<ul> - - <li>Basic Arabic and Syriac character shaping properties, such as initial, - - medial and final shapes. These properties are normative for minimal shaping - - of Arabic and Syriac. </li> - -</ul> - -<p><b>NamesList.txt (Chapter 14)</b> - -<ul> - - <li>This file duplicates some of the material in the UnicodeData file, and - - adds informative annotations uses in the character charts, as printed in the - - Unicode Standard. </li> - - <li><b>Note: </b>The information in NamesList.txt and Index.txt files matches - - the appropriate version of the book. Changes in the Unicode Character - - Database since then may not be reflected in these files, since they are - - primarily of archival interest.</li> - -</ul> - -<p><b>Index.txt (Chapter 14)</b> - -<ul> - - <li>Informative index to Unicode characters, as printed in the Unicode - - Standard</li> - - <li><b>Note: </b>The information in NamesList.txt and Index.txt files matches - - the appropriate version of the book. Changes in the Unicode Character - - Database since then may not be reflected in these files, since they are - - primarily of archival interest.</li> - -</ul> - -<p><b>CompositionExclusions.txt (<a href="http://www.unicode.org/unicode/reports/tr15/">UTR#15 - -Unicode Normalization Forms</a>)</b> - -<ul> - - <li>Normative properties for normalization.</li> - -</ul> - -<p><b>LineBreak.txt (<a href="http://www.unicode.org/unicode/reports/tr14/">UTR - -#14: Line Breaking Properties</a>)</b> - -<ul> - - <li>Normative and informative properties for line breaking. To see which - - properties are informative and which are normative, consult UTR#14.</li> - -</ul> - -<p><b>EastAsianWidth.txt (<a href="http://www.unicode.org/unicode/reports/tr11/">UTR - -#11: East Asian Character Width</a>)</b> - -<ul> - - <li>Informative properties for determining the choice of wide vs. narrow - - glyphs in East Asian contexts.</li> - -</ul> - -<p><b>diffXvY.txt</b> - -<ul> - - <li>Mechanically-generated informative files containing accumulated - - differences between successive versions of UnicodeData.txt</li> - -</ul> - - - -</body> - - - -</html> - +<p align="center">Copyright © 1995-2000 Unicode, Inc. All Rights reserved.</p> +<h2>Disclaimer</h2> +<p>The Unicode Character Database is provided as is by Unicode, Inc. No claims +are made as to fitness for any particular purpose. No warranties of any kind are +expressed or implied. The recipient agrees to determine applicability of +information provided. If this file has been purchased on magnetic or optical +media from Unicode, Inc., the sole remedy for any claim will be exchange of +defective media within 90 days of receipt.</p> +<p>This disclaimer is applicable for all other data files accompanying the +Unicode Character Database, some of which have been compiled by the Unicode +Consortium, and some of which have been supplied by other sources.</p> +<h2>Limitations on Rights to Redistribute This Data</h2> +<p>Recipient is granted the right to make copies in any form for internal +distribution and to freely use the information supplied in the creation of +products supporting the Unicode<sup>TM</sup> Standard. The files in the Unicode +Character Database can be redistributed to third parties or other organizations +(whether for profit or not) as long as this notice and the disclaimer notice are +retained. Information can be extracted from these files and used in +documentation or programs, as long as there is an accompanying notice indicating +the source.</p> +<h2>Introduction</h2> +<p>The Unicode Character Database is a set of files that define the Unicode +character properties and internal mappings. For more information about character +properties and mappings, see <i><a +href="http://www.unicode.org/unicode/uni2book/u2.html">The Unicode Standard</a></i>.</p> +<p>The Unicode Character Database has been updated to reflect Version 3.0 of the +Unicode Standard, with many characters added to those published in Version 2.0. +A number of corrections have also been made to case mappings or other errors in +the database noted since the publication of Version 2.0. Normative bidirectional +properties have also been modified to reflect decisions of the Unicode Technical +Committee.</p> +<p>For more information on versions of the Unicode Standard and how to reference +them, see <a href="http://www.unicode.org/unicode/standard/versions/">http://www.unicode.org/unicode/standard/versions/</a>.</p> +<h2>Conformance</h2> +<p>Character properties may be either normative or informative. <i>Normative</i> +means that implementations that claim conformance to the Unicode Standard (at a +particular version) and which make use of a particular property or field must +follow the specifications of the standard for that property or field in order to +be conformant. The term <i>normative</i> when applied to a property or field of +the Unicode Character Database, does <i>not</i> mean that the value of that +field will never change. Corrections and extensions to the standard in the +future may require minor changes to normative values, even though the Unicode +Technical Committee strives to minimize such changes. An<i> informative </i>property +or field is strongly recommended, but a conformant implementation is free to use +or change such values as it may require while still being conformant to the +standard. Particular implementations may choose to override the properties and +mappings that are not normative. In that case, it is up to the implementer to +establish a protocol to convey that information.</p> +<h2>Files</h2> +<p>The following summarizes the files in the Unicode Character Database. For +more information about these files, see the referenced technical report(s) or +section of Unicode Standard, Version 3.0.</p> +<p><b>UnicodeData.txt (Chapter 4, <a +href="http://www.unicode.org/unicode/reports/tr21/">UTR #21: Case Mappings</a>, <a +href="http://www.unicode.org/unicode/reports/tr15/">UAX #15 Unicode Normalization +Forms</a>)</b> +<ul> + <li>The main file in the Unicode Character Database.</li> + <li>For detailed information on the format, see <a href="UnicodeData.html">UnicodeData.html</a>. + This file also characterizes which properties are normative and which are + informative.</li> +</ul> +<p><b>PropList.txt (Chapter 4)</b> +<ul> + <li>Additional informative properties list: <i>Alphabetic, Ideographic,</i> + and <i>Mathematical</i>, among others.</li> +</ul> +<p><b>SpecialCasing.txt (Chapter 4, <a +href="http://www.unicode.org/unicode/reports/tr21/">UTR #21: Case Mappings</a>)</b> +<ul> + <li>List of informative special casing properties, including one-to-many + mappings such as SHARP S => "SS", and locale-specific mappings, + such as for Turkish <i>dotless i</i>.</li> +</ul> +<p><b>Blocks.txt (Chapter 14)</b> +<ul> + <li>List of normative block names.</li> +</ul> +<p><b>Jamo.txt (Chapter 4)</b> +<ul> + <li>List of normative Jamo short names, used in deriving HANGUL SYLLABLE names + algorithmically.</li> +</ul> +<p><b>ArabicShaping.txt (Section 8.2)</b> +<ul> + <li>Basic Arabic and Syriac character shaping properties, such as initial, + medial and final shapes. These properties are normative for minimal shaping + of Arabic and Syriac.</li> +</ul> +<p><b>NamesList.txt (Chapter 14)</b> +<ul> + <li>This file duplicates some of the material in the UnicodeData file, and + adds informative annotations uses in the character charts, as printed in the + Unicode Standard.</li> + <li><b>Note: </b>The information in NamesList.txt and Index.txt files matches + the appropriate version of the book. Changes in the Unicode Character + Database since then may not be reflected in these files, since they are + primarily of archival interest.</li> +</ul> +<p><b>Index.txt (Chapter 14)</b> +<ul> + <li>Informative index to Unicode characters, as printed in the Unicode + Standard</li> + <li><b>Note: </b>The information in NamesList.txt and Index.txt files matches + the appropriate version of the book. Changes in the Unicode Character + Database since then may not be reflected in these files, since they are + primarily of archival interest.</li> +</ul> +<p><b>CompositionExclusions.txt (<a +href="http://www.unicode.org/unicode/reports/tr15/">UAX #15 Unicode Normalization +Forms</a>)</b> +<ul> + <li>Normative properties for normalization.</li> +</ul> +<p><b>LineBreak.txt (<a href="http://www.unicode.org/unicode/reports/tr14/">UAX +#14: Line Breaking Properties</a>)</b> +<ul> + <li>Normative and informative properties for line breaking. To see which + properties are informative and which are normative, consult UAX #14.</li> +</ul> +<p><b>EastAsianWidth.txt (<a href="http://www.unicode.org/unicode/reports/tr11/">UAX +#11: East Asian Character Width</a>)</b> +<ul> + <li>Informative properties for determining the choice of wide vs. narrow + glyphs in East Asian contexts.</li> +</ul> +<p><b>BidiMirroring.txt</b><b> (<a +href="http://www.unicode.org/unicode/reports/tr9/">UAX #9: The +Bidirectional Algorithm</a>)</b></p> +<ul> + <li>Informative properties for substituting characters in an implementation of + bidirectional mirroring.</li> +</ul> +<p><b>CaseFolding.txt (<a href="http://www.unicode.org/unicode/reports/tr21/">UTR +#21: Case Mappings</a>)</b></p> +<ul> + <li>Informative file mapping characters to their case-folded form.</li> +</ul> +<p><b>NormalizationTest.txt (<a +href="http://www.unicode.org/unicode/reports/tr15/">UAX #15 Unicode Normalization +Forms</a>)</b></p> +<ul> + <li>Normative test file for conformance to Unicode Normalization Forms.</li> +</ul> +<p><b>diffXvY.txt</b> +<ul> + <li>Mechanically-generated informative files containing accumulated + differences between successive versions of UnicodeData.txt</li> +</ul> + +</body> + +</html> diff --git a/lib/unicode/Unicode.300 b/lib/unicode/Unicode.301 index 6a54d3d74e..6804a75065 100644 --- a/lib/unicode/Unicode.300 +++ b/lib/unicode/Unicode.301 @@ -130,7 +130,7 @@ 0081;<control>;Cc;0;BN;;;;;N;;;;; 0082;<control>;Cc;0;BN;;;;;N;BREAK PERMITTED HERE;;;; 0083;<control>;Cc;0;BN;;;;;N;NO BREAK HERE;;;; -0084;<control>;Cc;0;BN;;;;;N;INDEX;;;; +0084;<control>;Cc;0;BN;;;;;N;;;;; 0085;<control>;Cc;0;B;;;;;N;NEXT LINE;;;; 0086;<control>;Cc;0;BN;;;;;N;START OF SELECTED AREA;;;; 0087;<control>;Cc;0;BN;;;;;N;END OF SELECTED AREA;;;; @@ -420,7 +420,7 @@ 01A3;LATIN SMALL LETTER OI;Ll;0;L;;;;;N;LATIN SMALL LETTER O I;gha;01A2;;01A2 01A4;LATIN CAPITAL LETTER P WITH HOOK;Lu;0;L;;;;;N;LATIN CAPITAL LETTER P HOOK;;;01A5; 01A5;LATIN SMALL LETTER P WITH HOOK;Ll;0;L;;;;;N;LATIN SMALL LETTER P HOOK;;01A4;;01A4 -01A6;LATIN LETTER YR;Lu;0;L;;;;;N;LATIN LETTER Y R;;;0280; +01A6;LATIN LETTER YR;Lu;0;L;;;;;N;LATIN LETTER Y R;*;;0280; 01A7;LATIN CAPITAL LETTER TONE TWO;Lu;0;L;;;;;N;;;;01A8; 01A8;LATIN SMALL LETTER TONE TWO;Ll;0;L;;;;;N;;;01A7;;01A7 01A9;LATIN CAPITAL LETTER ESH;Lu;0;L;;;;;N;;;;0283; @@ -608,7 +608,7 @@ 027D;LATIN SMALL LETTER R WITH TAIL;Ll;0;L;;;;;N;LATIN SMALL LETTER R HOOK;;;; 027E;LATIN SMALL LETTER R WITH FISHHOOK;Ll;0;L;;;;;N;LATIN SMALL LETTER FISHHOOK R;;;; 027F;LATIN SMALL LETTER REVERSED R WITH FISHHOOK;Ll;0;L;;;;;N;LATIN SMALL LETTER REVERSED FISHHOOK R;;;; -0280;LATIN LETTER SMALL CAPITAL R;Ll;0;L;;;;;N;;;01A6;;01A6 +0280;LATIN LETTER SMALL CAPITAL R;Ll;0;L;;;;;N;;*;01A6;;01A6 0281;LATIN LETTER SMALL CAPITAL INVERTED R;Ll;0;L;;;;;N;;;;; 0282;LATIN SMALL LETTER S WITH HOOK;Ll;0;L;;;;;N;LATIN SMALL LETTER S HOOK;;;; 0283;LATIN SMALL LETTER ESH;Ll;0;L;;;;;N;;;01A9;;01A9 @@ -2639,16 +2639,16 @@ 0F27;TIBETAN DIGIT SEVEN;Nd;0;L;;7;7;7;N;;;;; 0F28;TIBETAN DIGIT EIGHT;Nd;0;L;;8;8;8;N;;;;; 0F29;TIBETAN DIGIT NINE;Nd;0;L;;9;9;9;N;;;;; -0F2A;TIBETAN DIGIT HALF ONE;No;0;L;;;;;N;;;;; -0F2B;TIBETAN DIGIT HALF TWO;No;0;L;;;;;N;;;;; -0F2C;TIBETAN DIGIT HALF THREE;No;0;L;;;;;N;;;;; -0F2D;TIBETAN DIGIT HALF FOUR;No;0;L;;;;;N;;;;; -0F2E;TIBETAN DIGIT HALF FIVE;No;0;L;;;;;N;;;;; -0F2F;TIBETAN DIGIT HALF SIX;No;0;L;;;;;N;;;;; -0F30;TIBETAN DIGIT HALF SEVEN;No;0;L;;;;;N;;;;; -0F31;TIBETAN DIGIT HALF EIGHT;No;0;L;;;;;N;;;;; -0F32;TIBETAN DIGIT HALF NINE;No;0;L;;;;;N;;;;; -0F33;TIBETAN DIGIT HALF ZERO;No;0;L;;;;;N;;;;; +0F2A;TIBETAN DIGIT HALF ONE;No;0;L;;;;1/2;N;;;;; +0F2B;TIBETAN DIGIT HALF TWO;No;0;L;;;;3/2;N;;;;; +0F2C;TIBETAN DIGIT HALF THREE;No;0;L;;;;5/2;N;;;;; +0F2D;TIBETAN DIGIT HALF FOUR;No;0;L;;;;7/2;N;;;;; +0F2E;TIBETAN DIGIT HALF FIVE;No;0;L;;;;9/2;N;;;;; +0F2F;TIBETAN DIGIT HALF SIX;No;0;L;;;;11/2;N;;;;; +0F30;TIBETAN DIGIT HALF SEVEN;No;0;L;;;;13/2;N;;;;; +0F31;TIBETAN DIGIT HALF EIGHT;No;0;L;;;;15/2;N;;;;; +0F32;TIBETAN DIGIT HALF NINE;No;0;L;;;;17/2;N;;;;; +0F33;TIBETAN DIGIT HALF ZERO;No;0;L;;;;-1/2;N;;;;; 0F34;TIBETAN MARK BSDUS RTAGS;So;0;L;;;;;N;;du ta;;; 0F35;TIBETAN MARK NGAS BZUNG NYI ZLA;Mn;220;NSM;;;;;N;TIBETAN HONORIFIC UNDER RING;nge zung nyi da;;; 0F36;TIBETAN MARK CARET -DZUD RTAGS BZHI MIG CAN;So;0;L;;;;;N;;dzu ta shi mig chen;;; @@ -2789,7 +2789,7 @@ 0FCA;TIBETAN SYMBOL NOR BU NYIS -KHYIL;So;0;L;;;;;N;;norbu nyi khyi;;; 0FCB;TIBETAN SYMBOL NOR BU GSUM -KHYIL;So;0;L;;;;;N;;norbu sum khyi;;; 0FCC;TIBETAN SYMBOL NOR BU BZHI -KHYIL;So;0;L;;;;;N;;norbu shi khyi;;; -0FCF;TIBETAN SIGN RDEL NAG GSUM;So;0;L;;;;;N;;;;; +0FCF;TIBETAN SIGN RDEL NAG GSUM;So;0;L;;;;;N;;dena sum;;; 1000;MYANMAR LETTER KA;Lo;0;L;;;;;N;;;;; 1001;MYANMAR LETTER KHA;Lo;0;L;;;;;N;;;;; 1002;MYANMAR LETTER GA;Lo;0;L;;;;;N;;;;; @@ -7518,10 +7518,10 @@ 318E;HANGUL LETTER ARAEAE;Lo;0;L;<compat> 11A1;;;;N;HANGUL LETTER ALAE AE;;;; 3190;IDEOGRAPHIC ANNOTATION LINKING MARK;So;0;L;;;;;N;KANBUN TATETEN;Kanbun Tateten;;; 3191;IDEOGRAPHIC ANNOTATION REVERSE MARK;So;0;L;;;;;N;KAERITEN RE;Kaeriten;;; -3192;IDEOGRAPHIC ANNOTATION ONE MARK;No;0;L;<super> 4E00;;;;N;KAERITEN ITI;Kaeriten;;; -3193;IDEOGRAPHIC ANNOTATION TWO MARK;No;0;L;<super> 4E8C;;;;N;KAERITEN NI;Kaeriten;;; -3194;IDEOGRAPHIC ANNOTATION THREE MARK;No;0;L;<super> 4E09;;;;N;KAERITEN SAN;Kaeriten;;; -3195;IDEOGRAPHIC ANNOTATION FOUR MARK;No;0;L;<super> 56DB;;;;N;KAERITEN SI;Kaeriten;;; +3192;IDEOGRAPHIC ANNOTATION ONE MARK;No;0;L;<super> 4E00;;;1;N;KAERITEN ITI;Kaeriten;;; +3193;IDEOGRAPHIC ANNOTATION TWO MARK;No;0;L;<super> 4E8C;;;2;N;KAERITEN NI;Kaeriten;;; +3194;IDEOGRAPHIC ANNOTATION THREE MARK;No;0;L;<super> 4E09;;;3;N;KAERITEN SAN;Kaeriten;;; +3195;IDEOGRAPHIC ANNOTATION FOUR MARK;No;0;L;<super> 56DB;;;4;N;KAERITEN SI;Kaeriten;;; 3196;IDEOGRAPHIC ANNOTATION TOP MARK;So;0;L;<super> 4E0A;;;;N;KAERITEN ZYOU;Kaeriten;;; 3197;IDEOGRAPHIC ANNOTATION MIDDLE MARK;So;0;L;<super> 4E2D;;;;N;KAERITEN TYUU;Kaeriten;;; 3198;IDEOGRAPHIC ANNOTATION BOTTOM MARK;So;0;L;<super> 4E0B;;;;N;KAERITEN GE;Kaeriten;;; @@ -7585,16 +7585,16 @@ 321A;PARENTHESIZED HANGUL PHIEUPH A;So;0;L;<compat> 0028 1111 1161 0029;;;;N;PARENTHESIZED HANGUL PA;;;; 321B;PARENTHESIZED HANGUL HIEUH A;So;0;L;<compat> 0028 1112 1161 0029;;;;N;PARENTHESIZED HANGUL HA;;;; 321C;PARENTHESIZED HANGUL CIEUC U;So;0;L;<compat> 0028 110C 116E 0029;;;;N;PARENTHESIZED HANGUL JU;;;; -3220;PARENTHESIZED IDEOGRAPH ONE;No;0;L;<compat> 0028 4E00 0029;;;;N;;;;; -3221;PARENTHESIZED IDEOGRAPH TWO;No;0;L;<compat> 0028 4E8C 0029;;;;N;;;;; -3222;PARENTHESIZED IDEOGRAPH THREE;No;0;L;<compat> 0028 4E09 0029;;;;N;;;;; -3223;PARENTHESIZED IDEOGRAPH FOUR;No;0;L;<compat> 0028 56DB 0029;;;;N;;;;; -3224;PARENTHESIZED IDEOGRAPH FIVE;No;0;L;<compat> 0028 4E94 0029;;;;N;;;;; -3225;PARENTHESIZED IDEOGRAPH SIX;No;0;L;<compat> 0028 516D 0029;;;;N;;;;; -3226;PARENTHESIZED IDEOGRAPH SEVEN;No;0;L;<compat> 0028 4E03 0029;;;;N;;;;; -3227;PARENTHESIZED IDEOGRAPH EIGHT;No;0;L;<compat> 0028 516B 0029;;;;N;;;;; -3228;PARENTHESIZED IDEOGRAPH NINE;No;0;L;<compat> 0028 4E5D 0029;;;;N;;;;; -3229;PARENTHESIZED IDEOGRAPH TEN;No;0;L;<compat> 0028 5341 0029;;;;N;;;;; +3220;PARENTHESIZED IDEOGRAPH ONE;No;0;L;<compat> 0028 4E00 0029;;;1;N;;;;; +3221;PARENTHESIZED IDEOGRAPH TWO;No;0;L;<compat> 0028 4E8C 0029;;;2;N;;;;; +3222;PARENTHESIZED IDEOGRAPH THREE;No;0;L;<compat> 0028 4E09 0029;;;3;N;;;;; +3223;PARENTHESIZED IDEOGRAPH FOUR;No;0;L;<compat> 0028 56DB 0029;;;4;N;;;;; +3224;PARENTHESIZED IDEOGRAPH FIVE;No;0;L;<compat> 0028 4E94 0029;;;5;N;;;;; +3225;PARENTHESIZED IDEOGRAPH SIX;No;0;L;<compat> 0028 516D 0029;;;6;N;;;;; +3226;PARENTHESIZED IDEOGRAPH SEVEN;No;0;L;<compat> 0028 4E03 0029;;;7;N;;;;; +3227;PARENTHESIZED IDEOGRAPH EIGHT;No;0;L;<compat> 0028 516B 0029;;;8;N;;;;; +3228;PARENTHESIZED IDEOGRAPH NINE;No;0;L;<compat> 0028 4E5D 0029;;;9;N;;;;; +3229;PARENTHESIZED IDEOGRAPH TEN;No;0;L;<compat> 0028 5341 0029;;;10;N;;;;; 322A;PARENTHESIZED IDEOGRAPH MOON;So;0;L;<compat> 0028 6708 0029;;;;N;;;;; 322B;PARENTHESIZED IDEOGRAPH FIRE;So;0;L;<compat> 0028 706B 0029;;;;N;;;;; 322C;PARENTHESIZED IDEOGRAPH WATER;So;0;L;<compat> 0028 6C34 0029;;;;N;;;;; @@ -10615,3 +10615,7 @@ FFFA;INTERLINEAR ANNOTATION SEPARATOR;Cf;0;BN;;;;;N;;;;; FFFB;INTERLINEAR ANNOTATION TERMINATOR;Cf;0;BN;;;;;N;;;;; FFFC;OBJECT REPLACEMENT CHARACTER;So;0;ON;;;;;N;;;;; FFFD;REPLACEMENT CHARACTER;So;0;ON;;;;;N;;;;; +F0000;<Plane 15 Private Use, First>;Co;0;L;;;;;N;;;;; +FFFFD;<Plane 15 Private Use, Last>;Co;0;L;;;;;N;;;;; +100000;<Plane 16 Private Use, First>;Co;0;L;;;;;N;;;;; +10FFFD;<Plane 16 Private Use, Last>;Co;0;L;;;;;N;;;;; diff --git a/lib/unicode/Unicode3.html b/lib/unicode/Unicode3.html deleted file mode 100644 index a08a25ec75..0000000000 --- a/lib/unicode/Unicode3.html +++ /dev/null @@ -1,1988 +0,0 @@ -<html> - - - -<head> - -<meta NAME="GENERATOR" CONTENT="Microsoft FrontPage 4.0"> - -<meta HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=UTF-8"> - -<link REL="stylesheet" HREF="http://www.unicode.org/unicode.css" TYPE="text/css"> - -<title>UnicodeData File Format</title> - -</head> - - - -<body> - - - -<h1>UnicodeData File Format<br> -Version 3.0.0</h1> - - - -<table BORDER="1" CELLSPACING="2" CELLPADDING="0" HEIGHT="87" WIDTH="100%"> - - <tr> - - <td VALIGN="TOP" width="144">Revision</td> - - <td VALIGN="TOP">3.0.0</td> - - </tr> - - <tr> - - <td VALIGN="TOP" width="144">Authors</td> - - <td VALIGN="TOP">Mark Davis and Ken Whistler</td> - - </tr> - - <tr> - - <td VALIGN="TOP" width="144">Date</td> - - <td VALIGN="TOP">1999-09-12</td> - - </tr> - - <tr> - - <td VALIGN="TOP" width="144">This Version</td> - - <td VALIGN="TOP"><a href="ftp://ftp.unicode.org/Public/3.0-Update/UnicodeData-3.0.0.html">ftp://ftp.unicode.org/Public/3.0-Update/UnicodeData-3.0.0.html</a></td> - - </tr> - - <tr> - - <td VALIGN="TOP" width="144">Previous Version</td> - - <td VALIGN="TOP">n/a</td> - - </tr> - - <tr> - - <td VALIGN="TOP" width="144">Latest Version</td> - - <td VALIGN="TOP"><a href="ftp://ftp.unicode.org/Public/3.0-Update/UnicodeData-3.0.0.html">ftp://ftp.unicode.org/Public/3.0-Update/UnicodeData-3.0.0.html</a></td> - - </tr> - -</table> - - - -<p align="center">Copyright © 1995-1999 Unicode, Inc. All Rights reserved.<br> - -<i>For more information, including Disclamer and Limitations, see <a HREF="UnicodeCharacterDatabase-3.0.0.html">UnicodeCharacterDatabase-3.0.0.html</a> </i></p> - - - -<p>This document describes the format of the UnicodeData.txt file, which is one of the - -files in the Unicode Character Database. The document is divided into the following - -sections: - - - -<ul> - - <li><a HREF="#Field Formats">Field Formats</a> <ul> - - <li><a HREF="#General Category">General Category</a> </li> - - <li><a HREF="#Bidirectional Category">Bidirectional Category</a> </li> - - <li><a HREF="#Character Decomposition">Character Decomposition Mapping</a> </li> - - <li><a HREF="#Canonical Combining Classes">Canonical Combining Classes</a> </li> - - <li><a HREF="#Decompositions and Normalization">Decompositions and Normalization</a> </li> - - <li><a HREF="#Case Mappings">Case Mappings</a> </li> - - </ul> - - </li> - - <li><a HREF="#Property Invariants">Property Invariants</a> </li> - - <li><a HREF="#Modification History">Modification History</a> </li> - -</ul> - - - -<p><b>Warning: </b>the information in this file does not completely describe the use and - -interpretation of Unicode character properties and behavior. It must be used in - -conjunction with the data in the other files in the Unicode Character Database, and relies - -on the notation and definitions supplied in <i><a href="http://www.unicode.org/unicode/standard/versions/Unicode3.0.html"> The Unicode -Standard</a></i>. All chapter references - -are to Version 3.0 of the standard.</p> - - - -<h2><a NAME="Field Formats"></a>Field Formats</h2> - - - -<p>The file consists of lines containing fields terminated by semicolons. Each line - -represents the data for one encoded character in the Unicode Standard. Every encoded - -character has a data entry, with the exception of certain special ranges, as detailed - -below. - - - -<ul> - - <li>There are six special ranges of characters that are represented only by their start and - - end characters, since the properties in the file are uniform, except for code values - - (which are all sequential and assigned). </li> - - <li>The names of CJK ideograph characters and the names and decompositions of Hangul - - syllable characters are algorithmically derivable. (See the Unicode Standard and <a - - HREF="http://www.unicode.org/unicode/reports/tr15/">Unicode Technical Report #15</a> for - - more information). </li> - - <li>Surrogate code values and private use characters have no names. </li> - - <li>The Private Use character outside of the BMP (U+F0000..U+FFFFD, U+100000..U+10FFFD) are - - not listed. These correspond to surrogate pairs where the first surrogate is in the High - - Surrogate Private Use section. </li> - -</ul> - - - -<p>The exact ranges represented by start and end characters are: - - - -<ul> - - <li>CJK Ideographs Extension A (U+3400 - U+4DB5) </li> - - <li>CJK Ideographs (U+4E00 - U+9FA5) </li> - - <li>Hangul Syllables (U+AC00 - U+D7A3) </li> - - <li>Non-Private Use High Surrogates (U+D800 - U+DB7F) </li> - - <li>Private Use High Surrogates (U+DB80 - U+DBFF) </li> - - <li>Low Surrogates (U+DC00 - U+DFFF) </li> - - <li>The Private Use Area (U+E000 - U+F8FF) </li> - -</ul> - - - -<p>The following table describes the format and meaning of each field in a data entry in - -the UnicodeData file. Fields which contain normative information are so indicated.</p> - - - -<table BORDER="1" CELLSPACING="2" CELLPADDING="2"> - - <tr> - - <th VALIGN="top" ALIGN="LEFT"><p ALIGN="LEFT">Field</th> - - <th VALIGN="top" ALIGN="LEFT"><p ALIGN="LEFT">Name</th> - - <th VALIGN="top" ALIGN="LEFT"><p ALIGN="LEFT">Status</th> - - <th VALIGN="top" ALIGN="LEFT"><p ALIGN="LEFT">Explanation</th> - - </tr> - - <tr> - - <th VALIGN="top">0</th> - - <td VALIGN="top">Code value</td> - - <td VALIGN="top">normative</td> - - <td VALIGN="top">Code value in 4-digit hexadecimal format.</td> - - </tr> - - <tr> - - <th VALIGN="top">1</th> - - <td VALIGN="top">Character name</td> - - <td VALIGN="top">normative</td> - - <td VALIGN="top">These names match exactly the names published in Chapter 14 of the - - Unicode Standard, Version 3.0.</td> - - </tr> - - <tr> - - <th VALIGN="top">2</th> - - <td VALIGN="top"><a HREF="#General Category">General Category</a> </td> - - <td VALIGN="top">normative / informative<br> - - (see below)</td> - - <td VALIGN="top">This is a useful breakdown into various "character types" which - - can be used as a default categorization in implementations. See below for a brief - - explanation.</td> - - </tr> - - <tr> - - <th VALIGN="top">3</th> - - <td VALIGN="top"><a HREF="#Canonical Combining Classes">Canonical Combining Classes</a> </td> - - <td VALIGN="top">normative</td> - - <td VALIGN="top">The classes used for the Canonical Ordering Algorithm in the Unicode - - Standard. These classes are also printed in Chapter 4 of the Unicode Standard.</td> - - </tr> - - <tr> - - <th VALIGN="top">4</th> - - <td VALIGN="top"><a HREF="#Bidirectional Category">Bidirectional Category</a> </td> - - <td VALIGN="top">normative</td> - - <td VALIGN="top">See the list below for an explanation of the abbreviations used in this - - field. These are the categories required by the Bidirectional Behavior Algorithm in the - - Unicode Standard. These categories are summarized in Chapter 3 of the Unicode Standard.</td> - - </tr> - - <tr> - - <th VALIGN="top">5</th> - - <td VALIGN="top"><a HREF="#Character Decomposition">Character Decomposition - Mapping</a></td> - - <td VALIGN="top">normative</td> - - <td VALIGN="top">In the Unicode Standard, not all of the mappings are full (maximal) - - decompositions. Recursive application of look-up for decompositions will, in all cases, - - lead to a maximal decomposition. The decomposition mappings match exactly the - - decomposition mappings published with the character names in the Unicode Standard.</td> - - </tr> - - <tr> - - <th VALIGN="top">6</th> - - <td VALIGN="top">Decimal digit value</td> - - <td VALIGN="top">normative</td> - - <td VALIGN="top">This is a numeric field. If the character has the decimal digit property, - - as specified in Chapter 4 of the Unicode Standard, the value of that digit is represented - - with an integer value in this field</td> - - </tr> - - <tr> - - <th VALIGN="top">7</th> - - <td VALIGN="top">Digit value</td> - - <td VALIGN="top">normative</td> - - <td VALIGN="top">This is a numeric field. If the character represents a digit, not - - necessarily a decimal digit, the value is here. This covers digits which do not form - - decimal radix forms, such as the compatibility superscript digits</td> - - </tr> - - <tr> - - <th VALIGN="top">8</th> - - <td VALIGN="top">Numeric value</td> - - <td VALIGN="top">normative</td> - - <td VALIGN="top">This is a numeric field. If the character has the numeric property, as - - specified in Chapter 4 of the Unicode Standard, the value of that character is represented - - with an integer or rational number in this field. This includes fractions as, e.g., - - "1/5" for U+2155 VULGAR FRACTION ONE FIFTH Also included are numerical values - - for compatibility characters such as circled numbers.</td> - - </tr> - - <tr> - - <th VALIGN="top">8</th> - - <td VALIGN="top">Mirrored</td> - - <td VALIGN="top">normative</td> - - <td VALIGN="top">If the character has been identified as a "mirrored" character - - in bidirectional text, this field has the value "Y"; otherwise "N". - - The list of mirrored characters is also printed in Chapter 4 of the Unicode Standard.</td> - - </tr> - - <tr> - - <th VALIGN="top">10</th> - - <td VALIGN="top">Unicode 1.0 Name</td> - - <td VALIGN="top">informative</td> - - <td VALIGN="top">This is the old name as published in Unicode 1.0. This name is only - - provided when it is significantly different from the Unicode 3.0 name for the character.</td> - - </tr> - - <tr> - - <th VALIGN="top">11</th> - - <td VALIGN="top">10646 comment field</td> - - <td VALIGN="top">informative</td> - - <td VALIGN="top">This is the ISO 10646 comment field. It is in parantheses in the 10646 - - names list.</td> - - </tr> - - <tr> - - <th VALIGN="top">12</th> - - <td VALIGN="top"><a HREF="#Case Mappings">Uppercase Mapping</a></td> - - <td VALIGN="top">informative</td> - - <td VALIGN="top">Upper case equivalent mapping. If a character is part of an alphabet with - - case distinctions, and has an upper case equivalent, then the upper case equivalent is in - - this field. See the explanation below on case distinctions. These mappings are always - - one-to-one, not one-to-many or many-to-one. This field is informative.</td> - - </tr> - - <tr> - - <th VALIGN="top">13</th> - - <td VALIGN="top"><a HREF="#Case Mappings">Lowercase Mapping</a></td> - - <td VALIGN="top">informative</td> - - <td VALIGN="top">Similar to Uppercase mapping</td> - - </tr> - - <tr> - - <th VALIGN="top">14</th> - - <td VALIGN="top"><a HREF="#Case Mappings">Titlecase Mapping</a></td> - - <td VALIGN="top">informative</td> - - <td VALIGN="top">Similar to Uppercase mapping</td> - - </tr> - -</table> - - - -<h3><a NAME="General Category"></a>General Category</h3> - - - -<p>The values in this field are abbreviations for the following. Some of the values are - -normative, and some are informative. For more information, see the Unicode Standard.</p> - - - -<p><b>Note:</b> the standard does not assign information to control characters (except for - -certain cases in the Bidirectional Algorithm). Implementations will generally also assign - -categories to certain control characters, notably CR and LF, according to platform - -conventions.</p> - - - -<h4>Normative Categories</h4> - - - -<table BORDER="0" CELLSPACING="2" CELLPADDING="0"> - - <tr> - - <th><p ALIGN="LEFT">Abbr.</th> - - <th><p ALIGN="LEFT">Description</th> - - </tr> - - <tr> - - <td ALIGN="CENTER">Lu</td> - - <td>Letter, Uppercase</td> - - </tr> - - <tr> - - <td ALIGN="CENTER">Ll</td> - - <td>Letter, Lowercase</td> - - </tr> - - <tr> - - <td ALIGN="CENTER">Lt</td> - - <td>Letter, Titlecase</td> - - </tr> - - <tr> - - <td ALIGN="CENTER">Mn</td> - - <td>Mark, Non-Spacing</td> - - </tr> - - <tr> - - <td ALIGN="CENTER">Mc</td> - - <td>Mark, Spacing Combining</td> - - </tr> - - <tr> - - <td ALIGN="CENTER">Me</td> - - <td>Mark, Enclosing</td> - - </tr> - - <tr> - - <td ALIGN="CENTER">Nd</td> - - <td>Number, Decimal Digit</td> - - </tr> - - <tr> - - <td ALIGN="CENTER">Nl</td> - - <td>Number, Letter</td> - - </tr> - - <tr> - - <td ALIGN="CENTER">No</td> - - <td>Number, Other</td> - - </tr> - - <tr> - - <td ALIGN="CENTER">Zs</td> - - <td>Separator, Space</td> - - </tr> - - <tr> - - <td ALIGN="CENTER">Zl</td> - - <td>Separator, Line</td> - - </tr> - - <tr> - - <td ALIGN="CENTER">Zp</td> - - <td>Separator, Paragraph</td> - - </tr> - - <tr> - - <td ALIGN="CENTER">Cc</td> - - <td>Other, Control</td> - - </tr> - - <tr> - - <td ALIGN="CENTER">Cf</td> - - <td>Other, Format</td> - - </tr> - - <tr> - - <td ALIGN="CENTER">Cs</td> - - <td>Other, Surrogate</td> - - </tr> - - <tr> - - <td ALIGN="CENTER">Co</td> - - <td>Other, Private Use</td> - - </tr> - - <tr> - - <td ALIGN="CENTER">Cn</td> - - <td>Other, Not Assigned (no characters in the file have this property)</td> - - </tr> - -</table> - - - -<h4>Informative Categories</h4> - - - -<table BORDER="0" CELLSPACING="2" CELLPADDING="0"> - - <tr> - - <th><p ALIGN="LEFT">Abbr.</th> - - <th><p ALIGN="LEFT">Description</th> - - </tr> - - <tr> - - <td ALIGN="CENTER">Lm</td> - - <td>Letter, Modifier</td> - - </tr> - - <tr> - - <td ALIGN="CENTER">Lo</td> - - <td>Letter, Other</td> - - </tr> - - <tr> - - <td ALIGN="CENTER">Pc</td> - - <td>Punctuation, Connector</td> - - </tr> - - <tr> - - <td ALIGN="CENTER">Pd</td> - - <td>Punctuation, Dash</td> - - </tr> - - <tr> - - <td ALIGN="CENTER">Ps</td> - - <td>Punctuation, Open</td> - - </tr> - - <tr> - - <td ALIGN="CENTER">Pe</td> - - <td>Punctuation, Close</td> - - </tr> - - <tr> - - <td ALIGN="CENTER">Pi</td> - - <td>Punctuation, Initial quote (may behave like Ps or Pe depending on usage)</td> - - </tr> - - <tr> - - <td ALIGN="CENTER">Pf</td> - - <td>Punctuation, Final quote (may behave like Ps or Pe depending on usage)</td> - - </tr> - - <tr> - - <td ALIGN="CENTER">Po</td> - - <td>Punctuation, Other</td> - - </tr> - - <tr> - - <td ALIGN="CENTER">Sm</td> - - <td>Symbol, Math</td> - - </tr> - - <tr> - - <td ALIGN="CENTER">Sc</td> - - <td>Symbol, Currency</td> - - </tr> - - <tr> - - <td ALIGN="CENTER">Sk</td> - - <td>Symbol, Modifier</td> - - </tr> - - <tr> - - <td ALIGN="CENTER">So</td> - - <td>Symbol, Other</td> - - </tr> - -</table> - - - -<h3><a NAME="Bidirectional Category"></a>Bidirectional Category</h3> - - - -<p>Please refer to Chapter 3 for an explanation of the algorithm for Bidirectional - -Behavior and an explanation of the significance of these categories. An up-to-date version - -can be found on <a HREF="http://www.unicode.org/unicode/reports/tr9/">Unicode Technical - -Report #9: The Bidirectional Algorithm</a>. These values are normative.</p> - - - -<table BORDER="0" CELLPADDING="2"> - - <tr> - - <th VALIGN="TOP" ALIGN="LEFT"><p ALIGN="LEFT">Type</th> - - <th VALIGN="TOP" ALIGN="LEFT"><p ALIGN="LEFT">Description</th> - - </tr> - - <tr> - - <td VALIGN="TOP"><b>L</b></td> - - <td VALIGN="TOP">Left-to-Right</td> - - </tr> - - <tr> - - <td VALIGN="TOP"><b>LRE</b></td> - - <td VALIGN="TOP">Left-to-Right Embedding</td> - - </tr> - - <tr> - - <td VALIGN="TOP"><b>LRO</b></td> - - <td VALIGN="TOP">Left-to-Right Override</td> - - </tr> - - <tr> - - <td VALIGN="TOP"><b>R</b></td> - - <td VALIGN="TOP">Right-to-Left</td> - - </tr> - - <tr> - - <td VALIGN="TOP"><b>AL</b></td> - - <td VALIGN="TOP">Right-to-Left Arabic</td> - - </tr> - - <tr> - - <td VALIGN="TOP"><b>RLE</b></td> - - <td VALIGN="TOP">Right-to-Left Embedding</td> - - </tr> - - <tr> - - <td VALIGN="TOP"><b>RLO</b></td> - - <td VALIGN="TOP">Right-to-Left Override</td> - - </tr> - - <tr> - - <td VALIGN="TOP"><b>PDF</b></td> - - <td VALIGN="TOP">Pop Directional Format</td> - - </tr> - - <tr> - - <td VALIGN="TOP"><b>EN</b></td> - - <td VALIGN="TOP">European Number</td> - - </tr> - - <tr> - - <td VALIGN="TOP"><b>ES</b></td> - - <td VALIGN="TOP">European Number Separator</td> - - </tr> - - <tr> - - <td VALIGN="TOP"><b>ET</b></td> - - <td VALIGN="TOP">European Number Terminator</td> - - </tr> - - <tr> - - <td VALIGN="TOP"><b>AN</b></td> - - <td VALIGN="TOP">Arabic Number</td> - - </tr> - - <tr> - - <td VALIGN="TOP"><b>CS</b></td> - - <td VALIGN="TOP">Common Number Separator</td> - - </tr> - - <tr> - - <td VALIGN="TOP"><b>NSM</b></td> - - <td VALIGN="TOP">Non-Spacing Mark</td> - - </tr> - - <tr> - - <td VALIGN="TOP"><b>BN</b></td> - - <td VALIGN="TOP">Boundary Neutral</td> - - </tr> - - <tr> - - <td VALIGN="TOP"><b>B</b></td> - - <td VALIGN="TOP">Paragraph Separator</td> - - </tr> - - <tr> - - <td VALIGN="TOP"><b>S</b></td> - - <td VALIGN="TOP">Segment Separator</td> - - </tr> - - <tr> - - <td VALIGN="TOP"><b>WS</b></td> - - <td VALIGN="TOP">Whitespace</td> - - </tr> - - <tr> - - <td VALIGN="TOP"><b>ON</b></td> - - <td VALIGN="TOP">Other Neutrals</td> - - </tr> - -</table> - - - -<h3><a NAME="Character Decomposition"></a>Character Decomposition Mapping</h3> - - - -<p>The decomposition is a normative property of a character. The tags supplied with - -certain decomposition mappings generally indicate formatting information. Where no such - -tag is given, the mapping is designated as canonical. Conversely, the presence of a - -formatting tag also indicates that the mapping is a compatibility mapping and not a - -canonical mapping. In the absence of other formatting information in a compatibility - -mapping, the tag is used to distinguish it from canonical mappings.</p> - - - -<p>In some instances a canonical mapping or a compatibility mapping may consist of a - -single character. For a canonical mapping, this indicates that the character is a - -canonical equivalent of another single character. For a compatibility mapping, this - -indicates that the character is a compatibility equivalent of another single character. - -The compatibility formatting tags used are:</p> - - - -<table BORDER="0" CELLSPACING="2" CELLPADDING="0"> - - <tr> - - <th>Tag</th> - - <th><p ALIGN="LEFT">Description</th> - - </tr> - - <tr> - - <td ALIGN="CENTER"><font> </td> - - <td>A font variant (e.g. a blackletter form).</td> - - </tr> - - <tr> - - <td ALIGN="CENTER"><noBreak> </td> - - <td>A no-break version of a space or hyphen.</td> - - </tr> - - <tr> - - <td ALIGN="CENTER"><initial> </td> - - <td>An initial presentation form (Arabic).</td> - - </tr> - - <tr> - - <td ALIGN="CENTER"><medial> </td> - - <td>A medial presentation form (Arabic).</td> - - </tr> - - <tr> - - <td ALIGN="CENTER"><final> </td> - - <td>A final presentation form (Arabic).</td> - - </tr> - - <tr> - - <td ALIGN="CENTER"><isolated> </td> - - <td>An isolated presentation form (Arabic).</td> - - </tr> - - <tr> - - <td ALIGN="CENTER"><circle> </td> - - <td>An encircled form.</td> - - </tr> - - <tr> - - <td ALIGN="CENTER"><super> </td> - - <td>A superscript form.</td> - - </tr> - - <tr> - - <td ALIGN="CENTER"><sub> </td> - - <td>A subscript form.</td> - - </tr> - - <tr> - - <td ALIGN="CENTER"><vertical> </td> - - <td>A vertical layout presentation form.</td> - - </tr> - - <tr> - - <td ALIGN="CENTER"><wide> </td> - - <td>A wide (or zenkaku) compatibility character.</td> - - </tr> - - <tr> - - <td ALIGN="CENTER"><narrow> </td> - - <td>A narrow (or hankaku) compatibility character.</td> - - </tr> - - <tr> - - <td ALIGN="CENTER"><small> </td> - - <td>A small variant form (CNS compatibility).</td> - - </tr> - - <tr> - - <td ALIGN="CENTER"><square> </td> - - <td>A CJK squared font variant.</td> - - </tr> - - <tr> - - <td ALIGN="CENTER"><fraction> </td> - - <td>A vulgar fraction form.</td> - - </tr> - - <tr> - - <td ALIGN="CENTER"><compat> </td> - - <td>Otherwise unspecified compatibility character.</td> - - </tr> - -</table> - - - -<p><b>Reminder: </b>There is a difference between decomposition and decomposition mapping. - -The decomposition mappings are defined in the UnicodeData, while the decomposition (also - -termed "full decomposition") is defined in Chapter 3 to use those mappings -<i> - -recursively.</i> - - - -<ul> - - <li>The canonical decomposition is formed by recursively applying the canonical mappings, - - then applying the canonical reordering algorithm. </li> - - <li>The compatibility decomposition is formed by recursively applying the canonical <em>and</em> - - compatibility mappings, then applying the canonical reordering algorithm. </li> - -</ul> - - - -<h3><a NAME="Canonical Combining Classes"></a>Canonical Combining Classes</h3> - - - -<table BORDER="0" CELLSPACING="2" CELLPADDING="0"> - - <tr> - - <th><p ALIGN="LEFT">Value</th> - - <th><p ALIGN="LEFT">Description</th> - - </tr> - - <tr> - - <td ALIGN="RIGHT">0:</td> - - <td>Spacing, split, enclosing, reordrant, and Tibetan subjoined</td> - - </tr> - - <tr> - - <td ALIGN="RIGHT">1:</td> - - <td>Overlays and interior</td> - - </tr> - - <tr> - - <td ALIGN="RIGHT">7:</td> - - <td>Nuktas</td> - - </tr> - - <tr> - - <td ALIGN="RIGHT">8:</td> - - <td>Hiragana/Katakana voicing marks</td> - - </tr> - - <tr> - - <td ALIGN="RIGHT">9:</td> - - <td>Viramas</td> - - </tr> - - <tr> - - <td ALIGN="RIGHT">10:</td> - - <td>Start of fixed position classes</td> - - </tr> - - <tr> - - <td ALIGN="RIGHT">199:</td> - - <td>End of fixed position classes</td> - - </tr> - - <tr> - - <td ALIGN="RIGHT">200:</td> - - <td>Below left attached</td> - - </tr> - - <tr> - - <td ALIGN="RIGHT">202:</td> - - <td>Below attached</td> - - </tr> - - <tr> - - <td ALIGN="RIGHT">204:</td> - - <td>Below right attached</td> - - </tr> - - <tr> - - <td ALIGN="RIGHT">208:</td> - - <td>Left attached (reordrant around single base character)</td> - - </tr> - - <tr> - - <td ALIGN="RIGHT">210:</td> - - <td>Right attached</td> - - </tr> - - <tr> - - <td ALIGN="RIGHT">212:</td> - - <td>Above left attached</td> - - </tr> - - <tr> - - <td ALIGN="RIGHT">214:</td> - - <td>Above attached</td> - - </tr> - - <tr> - - <td ALIGN="RIGHT">216:</td> - - <td>Above right attached</td> - - </tr> - - <tr> - - <td ALIGN="RIGHT">218:</td> - - <td>Below left</td> - - </tr> - - <tr> - - <td ALIGN="RIGHT">220:</td> - - <td>Below</td> - - </tr> - - <tr> - - <td ALIGN="RIGHT">222:</td> - - <td>Below right</td> - - </tr> - - <tr> - - <td ALIGN="RIGHT">224:</td> - - <td>Left (reordrant around single base character)</td> - - </tr> - - <tr> - - <td ALIGN="RIGHT">226:</td> - - <td>Right</td> - - </tr> - - <tr> - - <td ALIGN="RIGHT">228:</td> - - <td>Above left</td> - - </tr> - - <tr> - - <td ALIGN="RIGHT">230:</td> - - <td>Above</td> - - </tr> - - <tr> - - <td ALIGN="RIGHT">232:</td> - - <td>Above right</td> - - </tr> - - <tr> - - <td ALIGN="RIGHT">233:</td> - - <td>Double below</td> - - </tr> - - <tr> - - <td ALIGN="RIGHT">234:</td> - - <td>Double above</td> - - </tr> - - <tr> - - <td ALIGN="RIGHT">240:</td> - - <td>Below (iota subscript)</td> - - </tr> - -</table> - - - -<p><strong>Note: </strong>some of the combining classes in this list do not currently have - -members but are specified here for completeness.</p> - - - -<h3><a NAME="Decompositions and Normalization"></a>Decompositions and Normalization</h3> - - - -<p>Decomposition is specified in Chapter 3. <a href="http://www.unicode.org/unicode/reports/tr15/"><i>Unicode Technical Report #15: - -Normalization Forms</i></a> specifies the interaction between decomposition and normalization. The - -most up-to-date version is found on <a HREF="http://www.unicode.org/unicode/reports/tr15/">http://www.unicode.org/unicode/reports/tr15/</a>. - -That report specifies how the decompositions defined in UnicodeData.txt are used to derive - -normalized forms of Unicode text.</p> - - - -<p>Note that as of the 2.1.9 update of the Unicode Character Database, the decompositions - -in the UnicodeData.txt file can be used to recursively derive the full decomposition in - -canonical order, without the need to separately apply canonical reordering. However, - -canonical reordering of combining character sequences must still be applied in - -decomposition when normalizing source text which contains any combining marks.</p> - - - -<h3><a NAME="Case Mappings"></a>Case Mappings</h3> - - - -<p>The case mapping is an informative, default mapping. Case itself, on the other hand, - -has normative status. Thus, for example, 0041 LATIN CAPITAL LETTER A is normatively - -uppercase, but its lowercase mapping the 0061 LATIN SMALL LETTER A is informative. The - -reason for this is that case can be considered to be an inherent property of a particular - -character (and is usually, but not always, derivable from the presence of the terms - -"CAPITAL" or "SMALL" in the character name), but case mappings between - -characters are occasionally influenced by local conventions. For example, certain - -languages, such as Turkish, German, French, or Greek may have small deviations from the - -default mappings listed in UnicodeData.</p> - - - -<p>In addition to uppercase and lowercase, because of the inclusion of certain composite - -characters for compatibility, such as 01F1 LATIN CAPITAL LETTER DZ, there is a third case, - -called <i>titlecase</i>, which is used where the first letter of a word is to be - -capitalized (e.g. UPPERCASE, Titlecase, lowercase). An example of such a titlecase letter - -is 01F2 LATIN CAPITAL LETTER D WITH SMALL LETTER Z.</p> - - - -<p>The uppercase, titlecase and lowercase fields are only included for characters that - -have a single corresponding character of that type. Composite characters (such as - -"339D SQUARE CM") that do not have a single corresponding character of that type - -can be cased by decomposition.</p> - - - -<p>For compatibility with existing parsers, UnicodeData only contains case mappings for - -characters where they are one-to-one mappings; it also omits information about - -context-sensitive case mappings. Information about these special cases can be found in a - -separate data file, SpecialCasing.txt, - -which has been added starting with the 2.1.8 update to the Unicode data files. - -SpecialCasing.txt contains additional informative case mappings that are either not - -one-to-one or which are context-sensitive.</p> - - - -<h2><a NAME="Property Invariants"></a>Property Invariants</h2> - - - -<p>Values in UnicodeData.txt are subject to correction as errors are found; however, some - -characteristics of the categories themselves can be considered invariants. Applications - -may wish to take these invariants into account when choosing how to implement character - -properties. The following is a partial list of known invariants for the Unicode Character - -Database.</p> - - - -<h4>Database Fields</h4> - - - -<ul> - - <li>The number of fields in UnicodeData.txt is fixed. </li> - - <li>The order of the fields is also fixed. <ul> - - <li>Any additional information about character properties to be added in the future will - - appear in separate data tables, rather than being added on to the existing table or by - - subdivision or reinterpretation of existing fields. </li> - - </ul> - - </li> - -</ul> - - - -<h4>General Category</h4> - - - -<ul> - - <li>There will never be more than 32 General Category values. <ul> - - <li>It is very unlikely that the Unicode Technical Committee will subdivide the General - - Category partition any further, since that can cause implementations to misbehave. Because - - the General Category is limited to 32 values, 5 bits can be used to represent the - - information, and a 32-bit integer can be used as a bitmask to represent arbitrary sets of - - categories. </li> - - </ul> - - </li> - -</ul> - - - -<h4>Combining Classes</h4> - - - -<ul> - - <li>Combining classes are limited to the values 0 to 255. <ul> - - <li>In practice, there are far fewer than 256 values used. Implementations may take - - advantage of this fact for compression, since only the ordering of the non-zero values - - matters for the Canonical Reordering Algorithm. It is possible for up to 256 values to be - - used in the future; however, UTC decisions in the future may restrict the number of values - - to 128, since this has implementation advantages. [Signed bytes can be used without - - widening to ints in Java, for example.] </li> - - </ul> - - </li> - - <li>All characters other than those of General Category M* have the combining class 0. <ul> - - <li>Currently, all characters other than those of General Category Mn have the value 0. - - However, some characters of General Category Me or Mc may be given non-zero values in the - - future. </li> - - <li>The precise values above the value 0 are not invariant--only the relative ordering is - - considered normative. For example, it is not guaranteed in future versions that the class - - of U+05B4 will be precisely 14. </li> - - </ul> - - </li> - -</ul> - - - -<h4>Case</h4> - - - -<ul> - - <li>Characters of type Lu, Lt, or Ll are called <i>cased</i>. All characters with an Upper, - - Lower, or Titlecase mapping are cased characters. <ul> - - <li>However, characters with the General Categories of Lu, Ll, or Lt may not always have - - case mappings, and case mappings may vary by locale. (See - - ftp://ftp.unicode.org/Public/UNIDATA/SpecialCasing.txt). </li> - - </ul> - - </li> - -</ul> - - - -<h4>Canonical Decomposition</h4> - - - -<ul> - - <li>Canonical mappings are always in canonical order. </li> - - <li>Canonical mappings have only the first of a pair possibly further decomposing. </li> - - <li>Canonical decompositions are "transparent" to other character data: <ul> - - <li><tt>BIDI(a) = BIDI(principal(canonicalDecomposition(a))</tt> </li> - - <li><tt>Category(a) = Category(principal(canonicalDecomposition(a))</tt> </li> - - <li><tt>CombiningClass(a) = CombiningClass(principal(canonicalDecomposition(a))</tt><br> - - where principal(a) is the first character not of type Mn, or the first character if all - - characters are of type Mn. </li> - - </ul> - - </li> - - <li>However, because there are sometimes missing case pairs, and because of some legacy - - characters, it is only generally true that: <ul> - - <li><tt>upper(canonicalDecomposition(a)) = canonicalDecomposition(upper(a))</tt> </li> - - <li><tt>lower(canonicalDecomposition(a)) = canonicalDecomposition(lower(a))</tt> </li> - - <li><tt>title(canonicalDecomposition(a)) = canonicalDecomposition(title(a))</tt> </li> - - </ul> - - </li> - -</ul> - - - -<h2><a NAME="Modification History"></a>Modification History</h2> - - - -<p>This section provides a summary of the changes between update versions of the Unicode - -Standard.</p> - - - -<h3><a href="http://www.unicode.org/unicode/standard/versions/enumeratedversions.html#Unicode 3.0.0"> Unicode 3.0.0</a></h3> - - - -<p>Modifications made for Version 3.0.0 of UnicodeData.txt include many new characters and - -a number of property changes. These are summarized in Appendex D of <em>The Unicode - -Standard, Version 3.0.</em></p> - - - -<h3><a HREF="http://www.unicode.org/unicode/standard/versions/enumeratedversions.html#Unicode 2.1.9">Unicode 2.1.9</a> </h3> - - - -<p>Modifications made for Version 2.1.9 of UnicodeData.txt include: - - - -<ul> - - <li>Corrected combining class for U+05AE HEBREW ACCENT ZINOR. </li> - - <li>Corrected combining class for U+20E1 COMBINING LEFT RIGHT ARROW ABOVE </li> - - <li>Corrected combining class for U+0F35 and U+0F37 to 220. </li> - - <li>Corrected combining class for U+0F71 to 129. </li> - - <li>Added a decomposition for U+0F0C TIBETAN MARK DELIMITER TSHEG BSTAR. </li> - - <li>Added decompositions for several Greek symbol letters: U+03D0..U+03D2, U+03D5, - - U+03D6, U+03F0..U+03F2. </li> - - <li>Removed decompositions from the conjoining jamo block: U+1100..U+11F8. </li> - - <li>Changes to decomposition mappings for some Tibetan vowels for consistency in - - normalization. (U+0F71, U+0F73, U+0F77, U+0F79, U+0F81) </li> - - <li>Updated the decomposition mappings for several Vietnamese characters with two diacritics - - (U+1EAC, U+1EAD, U+1EB6, U+1EB7, U+1EC6, U+1EC7, U+1ED8, U+1ED9), so that the recursive - - decomposition can be generated directly in canonically reordered form (not a normative - - change). </li> - - <li>Updated the decomposition mappings for several Arabic compatibility characters involving - - shadda (U+FC5E..U+FC62, U+FCF2..U+FCF4), and two Latin characters (U+1E1C, U+1E1D), so - - that the decompositions are generated directly in canonically reordered form (not a - - normative change). </li> - - <li>Changed BIDI category for: U+00A0 NO-BREAK SPACE, U+2007 FIGURE SPACE, U+2028 LINE - - SEPARATOR. </li> - - <li>Changed BIDI category for extenders of General Category Lm: U+3005, U+3021..U+3035, - - U+FF9E, U+FF9F. </li> - - <li>Changed General Category and BIDI category for the Greek numeral signs: U+0374, U+0375. </li> - - <li>Corrected General Category for U+FFE8 HALFWIDTH FORMS LIGHT VERTICAL. </li> - - <li>Added Unicode 1.0 names for many Tibetan characters (informative). </li> - -</ul> - - - -<h3><a HREF="http://www.unicode.org/unicode/standard/versions/enumeratedversions.html#Unicode 2.1.8">Unicode 2.1.8</a> </h3> - - - -<p>Modifications made for Version 2.1.8 of UnicodeData.txt include: - - - -<ul> - - <li>Added combining class 240 for U+0345 COMBINING GREEK YPOGEGRAMMENI so that - - decompositions involving iota subscript are derivable directly in canonically reordered - - form; this also has a bearing on simplification of casing of polytonic Greek. </li> - - <li>Changes in decompositions related to Greek tonos. These result from the clarification - - that monotonic Greek "tonos" should be equated with U+0301 COMBINING ACUTE, - - rather than with U+030D COMBINING VERTICAL LINE ABOVE. (All Greek characters in the Greek - - block involving "tonos"; some Greek characters in the polytonic Greek in the - - 1FXX block.) </li> - - <li>Changed decompositions involving dialytika tonos. (U+0390, U+03B0) </li> - - <li>Changed ternary decompositions to binary. (U+0CCB, U+FB2C, U+FB2D) These changes - - simplify normalization. </li> - - <li>Removed canonical decomposition for Latin Candrabindu. (U+0310) </li> - - <li>Corrected error in canonical decomposition for U+1FF4. </li> - - <li>Added compatibility decompositions to clarify collation tables. (U+2100, U+2101, U+2105, - - U+2106, U+1E9A) </li> - - <li>A series of general category changes to assist the convergence of of Unicode definition - - of identifier with ISO TR 10176: <ul> - - <li>So > Lo: U+0950, U+0AD0, U+0F00, U+0F88..U+0F8B </li> - - <li>Po > Lo: U+0E2F, U+0EAF, U+3006 </li> - - <li>Lm > Sk: U+309B, U+309C </li> - - <li>Po > Pc: U+30FB, U+FF65 </li> - - <li>Ps/Pe > Mn: U+0F3E, U+0F3F </li> - - </ul> - - </li> - - <li>A series of bidi property changes for consistency. <ul> - - <li>L > ET: U+09F2, U+09F3 </li> - - <li>ON > L: U+3007 </li> - - <li>L > ON: U+0F3A..U+0F3D, U+037E, U+0387 </li> - - </ul> - - </li> - - <li>Add case mapping: U+01A6 <-> U+0280 </li> - - <li>Updated symmetric swapping value for guillemets: U+00AB, U+00BB, U+2039, U+203A. </li> - - <li>Changes to combining class values. Most Indic fixed position class non-spacing marks - - were changed to combining class 0. This fixes some inconsistencies in how canonical - - reordering would apply to Indic scripts, including Tibetan. Indic interacting top/bottom - - fixed position classes were merged into single (non-zero) classes as part of this change. - - Tibetan subjoined consonants are changed from combining class 6 to combining class 0. Thai - - pinthu (U+0E3A) moved to combining class 9. Moved two Devanagari stress marks into generic - - above and below combining classes (U+0951, U+0952). </li> - - <li>Corrected placement of semicolon near symmetric swapping field. (U+FA0E, etc., scattered - - positions to U+FA29) </li> - -</ul> - - - -<h3>Version 2.1.7</h3> - - - -<p><i>This version was for internal change tracking only, and never publicly released.</i></p> - - - -<h3>Version 2.1.6</h3> - - - -<p><i>This version was for internal change tracking only, and never publicly released.</i></p> - - - -<h3><a HREF="http://www.unicode.org/unicode/standard/versions/enumeratedversions.html#Unicode 2.1.5">Unicode 2.1.5</a> </h3> - - - -<p>Modifications made for Version 2.1.5 of UnicodeData.txt include: - - - -<ul> - - <li>Changed decomposition for U+FF9E and U+FF9F so that correct collation weighting will - - automatically result from the canonical equivalences. </li> - - <li>Removed canonical decompositions for U+04D4, U+04D5, U+04D8, U+04D9, U+04E0, U+04E1, - - U+04E8, U+04E9 (the implication being that no canonical equivalence is claimed between - - these 8 characters and similar Latin letters), and updated 4 canonical decompositions for - - U+04DB, U+04DC, U+04EA, U+04EB to reflect the implied difference in the base character. </li> - - <li>Added Pi, and Pf categories and assigned the relevant quotation marks to those - - categories, based on the Unicode Technical Corrigendum on Quotation Characters. </li> - - <li>Updating of many bidi properties, following the advice of the ad hoc committee on bidi, - - and to make the bidi properties of compatibility characters more consistent. </li> - - <li>Changed category of several Tibetan characters: U+0F3E, U+0F3F, U+0F88..U+0F8B to make - - them non-combining, reflecting the combined opinion of Tibetan experts. </li> - - <li>Added case mapping for U+03F2. </li> - - <li>Corrected case mapping for U+0275. </li> - - <li>Added titlecase mappings for U+03D0, U+03D1, U+03D5, U+03D6, U+03F0.. U+03F2. </li> - - <li>Corrected compatibility label for U+2121. </li> - - <li>Add specific entries for all the CJK compatibility ideographs, U+F900..U+FA2D, so the - - canonical decomposition for each (the URO character it is equivalent to) can be carried in - - the database. </li> - -</ul> - - - -<h3>Version 2.1.4</h3> - - - -<p><i>This version was for internal change tracking only, and never publicly released.</i></p> - - - -<h3>Version 2.1.3</h3> - - - -<p><i>This version was for internal change tracking only, and never publicly released.</i></p> - - - -<h3><a HREF="http://www.unicode.org/unicode/standard/versions/enumeratedversions.html#Unicode 2.1.2">Unicode 2.1.2</a> </h3> - - - -<p>Modifications made in updating UnicodeData.txt to Version 2.1.2 for the Unicode - -Standard, Version 2.1 (from Version 2.0) include: - - - -<ul> - - <li>Added two characters (U+20AC and U+FFFC). </li> - - <li>Amended bidi properties for U+0026, U+002E, U+0040, U+2007. </li> - - <li>Corrected case mappings for U+018E, U+019F, U+01DD, U+0258, U+0275, U+03C2, U+1E9B. </li> - - <li>Changed combining order class for U+0F71. </li> - - <li>Corrected canonical decompositions for U+0F73, U+1FBE. </li> - - <li>Changed decomposition for U+FB1F from compatibility to canonical. </li> - - <li>Added compatibility decompositions for U+FBE8, U+FBE9, U+FBF9..U+FBFB. </li> - - <li>Corrected compatibility decompositions for U+2469, U+246A, U+3358. </li> - -</ul> - - - -<h3>Version 2.1.1</h3> - - - -<p><i>This version was for internal change tracking only, and never publicly released.</i></p> - - - -<h3><a HREF="http://www.unicode.org/unicode/standard/versions/enumeratedversions.html#Unicode 2.0.0">Unicode 2.0.0</a> </h3> - - - -<p>The modifications made in updating UnicodeData.txt for the Unicode - -Standard, Version 2.0 include: - - - -<ul> - - <li>Fixed decompositions with TONOS to use correct NSM: 030D. </li> - - <li>Removed old Hangul Syllables; mapping to new characters are in a separate table. </li> - - <li>Marked compatibility decompositions with additional tags. </li> - - <li>Changed old tag names for clarity. </li> - - <li>Revision of decompositions to use first-level decomposition, instead of maximal - - decomposition. </li> - - <li>Correction of all known errors in decompositions from earlier versions. </li> - - <li>Added control code names (as old Unicode names). </li> - - <li>Added Hangul Jamo decompositions. </li> - - <li>Added Number category to match properties list in book. </li> - - <li>Fixed categories of Koranic Arabic marks. </li> - - <li>Fixed categories of precomposed characters to match decomposition where possible. </li> - - <li>Added Hebrew cantillation marks and the Tibetan script. </li> - - <li>Added place holders for ranges such as CJK Ideographic Area and the Private Use Area. </li> - - <li>Added categories Me, Sk, Pc, Nl, Cs, Cf, and rectified a number of mistakes in the - - database. </li> - -</ul> - -</body> - -</html> - diff --git a/lib/unicode/mktables.PL b/lib/unicode/mktables.PL index 23801399d7..608a3259d3 100755 --- a/lib/unicode/mktables.PL +++ b/lib/unicode/mktables.PL @@ -2,9 +2,9 @@ use bytes; -$UnicodeData = "Unicode.300"; +$UnicodeData = "Unicode.301"; $SyllableData = "syllables.txt"; -$PropData = "Props.txt"; +$PropData = "PropList.txt"; # Note: we try to keep filenames unique within first 8 chars. Using |