diff options
author | Karl <khw@karl.(none)> | 2009-01-26 19:31:42 -0700 |
---|---|---|
committer | Rafael Garcia-Suarez <rgarciasuarez@gmail.com> | 2009-01-27 08:17:46 +0100 |
commit | b6922eda63cca3fefe5f447d156d10f6b75bd8eb (patch) | |
tree | 4bf47166019364151abbfead15cc450c506db8fd /lib/unicore | |
parent | 3127de7dcb470aaa3997c4164384a5bfb3162ffe (diff) | |
download | perl-b6922eda63cca3fefe5f447d156d10f6b75bd8eb.tar.gz |
Change to use 5.1 Unicode file versions
Diffstat (limited to 'lib/unicore')
-rw-r--r-- | lib/unicore/NamedSqProv.txt | 393 | ||||
-rw-r--r-- | lib/unicore/PropValueAliases.txt | 508 | ||||
-rw-r--r-- | lib/unicore/README.perl | 10 | ||||
-rw-r--r-- | lib/unicore/mktables | 48 | ||||
-rw-r--r-- | lib/unicore/mktables.lst | 52 |
5 files changed, 868 insertions, 143 deletions
diff --git a/lib/unicore/NamedSqProv.txt b/lib/unicore/NamedSqProv.txt index 155fcc92dc..726806e54b 100644 --- a/lib/unicore/NamedSqProv.txt +++ b/lib/unicore/NamedSqProv.txt @@ -1,8 +1,8 @@ -# NamedSequencesProv-5.0.0.txt -# Date: 2006-05-23, 11:33 PST [KW] +# NamedSequencesProv-5.1.0.txt +# Date: 2008-02-14, 12:17 PST [KW] # # Unicode Character Database -# Copyright (c) 1991-2006 Unicode, Inc. +# Copyright (c) 1991-2008 Unicode, Inc. # For terms of use, see http://www.unicode.org/terms_of_use.html # For documentation, see UCD.html # @@ -17,7 +17,341 @@ # Note: The order of entries in this file is not significant. # However, entries are generally in script order corresponding # to block order in the Unicode Standard, to make it easier -# to find entries in the list. +# to find entries currently in the list. + +# ================================================ + +# Provisional entries for NamedSequences.txt. + +# Proposed additions for Tamil. 2007-10-19 +# +# These Tamil named sequences have been accepted for a future +# version of the Unicode Standard. 2008-02-08 +# +# All of the Tamil named sequences will be moved to +# the approved NamedSequences.txt, once the required +# synchronization with the normative clause of ISO/IEC 10646 +# has been completed, through the ISO ballotting process. +# +# A visual display of the Tamil named sequences is available +# in the documentation for Unicode 5.1. See: +# http://www.unicode.org/versions/Unicode5.1.0/ + +TAMIL CONSONANT K; 0B95 0BCD +TAMIL CONSONANT NG; 0B99 0BCD +TAMIL CONSONANT C; 0B9A 0BCD +TAMIL CONSONANT NY; 0B9E 0BCD +TAMIL CONSONANT TT; 0B9F 0BCD +TAMIL CONSONANT NN; 0BA3 0BCD +TAMIL CONSONANT T; 0BA4 0BCD +TAMIL CONSONANT N; 0BA8 0BCD +TAMIL CONSONANT P; 0BAA 0BCD +TAMIL CONSONANT M; 0BAE 0BCD +TAMIL CONSONANT Y; 0BAF 0BCD +TAMIL CONSONANT R; 0BB0 0BCD +TAMIL CONSONANT L; 0BB2 0BCD +TAMIL CONSONANT V; 0BB5 0BCD +TAMIL CONSONANT LLL;0BB4 0BCD +TAMIL CONSONANT LL; 0BB3 0BCD +TAMIL CONSONANT RR; 0BB1 0BCD +TAMIL CONSONANT NNN;0BA9 0BCD +TAMIL CONSONANT J; 0B9C 0BCD +TAMIL CONSONANT SH; 0BB6 0BCD +TAMIL CONSONANT SS; 0BB7 0BCD +TAMIL CONSONANT S; 0BB8 0BCD +TAMIL CONSONANT H; 0BB9 0BCD +TAMIL CONSONANT KSS;0B95 0BCD 0BB7 0BCD + +TAMIL SYLLABLE KAA; 0B95 0BBE +TAMIL SYLLABLE KI; 0B95 0BBF +TAMIL SYLLABLE KII; 0B95 0BC0 +TAMIL SYLLABLE KU; 0B95 0BC1 +TAMIL SYLLABLE KUU; 0B95 0BC2 +TAMIL SYLLABLE KE; 0B95 0BC6 +TAMIL SYLLABLE KEE; 0B95 0BC7 +TAMIL SYLLABLE KAI; 0B95 0BC8 +TAMIL SYLLABLE KO; 0B95 0BCA +TAMIL SYLLABLE KOO; 0B95 0BCB +TAMIL SYLLABLE KAU; 0B95 0BCC + +TAMIL SYLLABLE NGAA; 0B99 0BBE +TAMIL SYLLABLE NGI; 0B99 0BBF +TAMIL SYLLABLE NGII; 0B99 0BC0 +TAMIL SYLLABLE NGU; 0B99 0BC1 +TAMIL SYLLABLE NGUU; 0B99 0BC2 +TAMIL SYLLABLE NGE; 0B99 0BC6 +TAMIL SYLLABLE NGEE; 0B99 0BC7 +TAMIL SYLLABLE NGAI; 0B99 0BC8 +TAMIL SYLLABLE NGO; 0B99 0BCA +TAMIL SYLLABLE NGOO; 0B99 0BCB +TAMIL SYLLABLE NGAU; 0B99 0BCC + +TAMIL SYLLABLE CAA; 0B9A 0BBE +TAMIL SYLLABLE CI; 0B9A 0BBF +TAMIL SYLLABLE CII; 0B9A 0BC0 +TAMIL SYLLABLE CU; 0B9A 0BC1 +TAMIL SYLLABLE CUU; 0B9A 0BC2 +TAMIL SYLLABLE CE; 0B9A 0BC6 +TAMIL SYLLABLE CEE; 0B9A 0BC7 +TAMIL SYLLABLE CAI; 0B9A 0BC8 +TAMIL SYLLABLE CO; 0B9A 0BCA +TAMIL SYLLABLE COO; 0B9A 0BCB +TAMIL SYLLABLE CAU; 0B9A 0BCC + +TAMIL SYLLABLE NYAA; 0B9E 0BBE +TAMIL SYLLABLE NYI; 0B9E 0BBF +TAMIL SYLLABLE NYII; 0B9E 0BC0 +TAMIL SYLLABLE NYU; 0B9E 0BC1 +TAMIL SYLLABLE NYUU; 0B9E 0BC2 +TAMIL SYLLABLE NYE; 0B9E 0BC6 +TAMIL SYLLABLE NYEE; 0B9E 0BC7 +TAMIL SYLLABLE NYAI; 0B9E 0BC8 +TAMIL SYLLABLE NYO; 0B9E 0BCA +TAMIL SYLLABLE NYOO; 0B9E 0BCB +TAMIL SYLLABLE NYAU; 0B9E 0BCC + +TAMIL SYLLABLE TTAA; 0B9F 0BBE +TAMIL SYLLABLE TTI; 0B9F 0BBF +TAMIL SYLLABLE TTII; 0B9F 0BC0 +TAMIL SYLLABLE TTU; 0B9F 0BC1 +TAMIL SYLLABLE TTUU; 0B9F 0BC2 +TAMIL SYLLABLE TTE; 0B9F 0BC6 +TAMIL SYLLABLE TTEE; 0B9F 0BC7 +TAMIL SYLLABLE TTAI; 0B9F 0BC8 +TAMIL SYLLABLE TTO; 0B9F 0BCA +TAMIL SYLLABLE TTOO; 0B9F 0BCB +TAMIL SYLLABLE TTAU; 0B9F 0BCC + +TAMIL SYLLABLE NNAA; 0BA3 0BBE +TAMIL SYLLABLE NNI; 0BA3 0BBF +TAMIL SYLLABLE NNII; 0BA3 0BC0 +TAMIL SYLLABLE NNU; 0BA3 0BC1 +TAMIL SYLLABLE NNUU; 0BA3 0BC2 +TAMIL SYLLABLE NNE; 0BA3 0BC6 +TAMIL SYLLABLE NNEE; 0BA3 0BC7 +TAMIL SYLLABLE NNAI; 0BA3 0BC8 +TAMIL SYLLABLE NNO; 0BA3 0BCA +TAMIL SYLLABLE NNOO; 0BA3 0BCB +TAMIL SYLLABLE NNAU; 0BA3 0BCC + +TAMIL SYLLABLE TAA; 0BA4 0BBE +TAMIL SYLLABLE TI; 0BA4 0BBF +TAMIL SYLLABLE TII; 0BA4 0BC0 +TAMIL SYLLABLE TU; 0BA4 0BC1 +TAMIL SYLLABLE TUU; 0BA4 0BC2 +TAMIL SYLLABLE TE; 0BA4 0BC6 +TAMIL SYLLABLE TEE; 0BA4 0BC7 +TAMIL SYLLABLE TAI; 0BA4 0BC8 +TAMIL SYLLABLE TO; 0BA4 0BCA +TAMIL SYLLABLE TOO; 0BA4 0BCB +TAMIL SYLLABLE TAU; 0BA4 0BCC + +TAMIL SYLLABLE NAA; 0BA8 0BBE +TAMIL SYLLABLE NI; 0BA8 0BBF +TAMIL SYLLABLE NII; 0BA8 0BC0 +TAMIL SYLLABLE NU; 0BA8 0BC1 +TAMIL SYLLABLE NUU; 0BA8 0BC2 +TAMIL SYLLABLE NE; 0BA8 0BC6 +TAMIL SYLLABLE NEE; 0BA8 0BC7 +TAMIL SYLLABLE NAI; 0BA8 0BC8 +TAMIL SYLLABLE NO; 0BA8 0BCA +TAMIL SYLLABLE NOO; 0BA8 0BCB +TAMIL SYLLABLE NAU; 0BA8 0BCC + +TAMIL SYLLABLE PAA; 0BAA 0BBE +TAMIL SYLLABLE PI; 0BAA 0BBF +TAMIL SYLLABLE PII; 0BAA 0BC0 +TAMIL SYLLABLE PU; 0BAA 0BC1 +TAMIL SYLLABLE PUU; 0BAA 0BC2 +TAMIL SYLLABLE PE; 0BAA 0BC6 +TAMIL SYLLABLE PEE; 0BAA 0BC7 +TAMIL SYLLABLE PAI; 0BAA 0BC8 +TAMIL SYLLABLE PO; 0BAA 0BCA +TAMIL SYLLABLE POO; 0BAA 0BCB +TAMIL SYLLABLE PAU; 0BAA 0BCC + +TAMIL SYLLABLE MAA; 0BAE 0BBE +TAMIL SYLLABLE MI; 0BAE 0BBF +TAMIL SYLLABLE MII; 0BAE 0BC0 +TAMIL SYLLABLE MU; 0BAE 0BC1 +TAMIL SYLLABLE MUU; 0BAE 0BC2 +TAMIL SYLLABLE ME; 0BAE 0BC6 +TAMIL SYLLABLE MEE; 0BAE 0BC7 +TAMIL SYLLABLE MAI; 0BAE 0BC8 +TAMIL SYLLABLE MO; 0BAE 0BCA +TAMIL SYLLABLE MOO; 0BAE 0BCB +TAMIL SYLLABLE MAU; 0BAE 0BCC + +TAMIL SYLLABLE YAA; 0BAF 0BBE +TAMIL SYLLABLE YI; 0BAF 0BBF +TAMIL SYLLABLE YII; 0BAF 0BC0 +TAMIL SYLLABLE YU; 0BAF 0BC1 +TAMIL SYLLABLE YUU; 0BAF 0BC2 +TAMIL SYLLABLE YE; 0BAF 0BC6 +TAMIL SYLLABLE YEE; 0BAF 0BC7 +TAMIL SYLLABLE YAI; 0BAF 0BC8 +TAMIL SYLLABLE YO; 0BAF 0BCA +TAMIL SYLLABLE YOO; 0BAF 0BCB +TAMIL SYLLABLE YAU; 0BAF 0BCC + +TAMIL SYLLABLE RAA; 0BB0 0BBE +TAMIL SYLLABLE RI; 0BB0 0BBF +TAMIL SYLLABLE RII; 0BB0 0BC0 +TAMIL SYLLABLE RU; 0BB0 0BC1 +TAMIL SYLLABLE RUU; 0BB0 0BC2 +TAMIL SYLLABLE RE; 0BB0 0BC6 +TAMIL SYLLABLE REE; 0BB0 0BC7 +TAMIL SYLLABLE RAI; 0BB0 0BC8 +TAMIL SYLLABLE RO; 0BB0 0BCA +TAMIL SYLLABLE ROO; 0BB0 0BCB +TAMIL SYLLABLE RAU; 0BB0 0BCC + +TAMIL SYLLABLE LAA; 0BB2 0BBE +TAMIL SYLLABLE LI; 0BB2 0BBF +TAMIL SYLLABLE LII; 0BB2 0BC0 +TAMIL SYLLABLE LU; 0BB2 0BC1 +TAMIL SYLLABLE LUU; 0BB2 0BC2 +TAMIL SYLLABLE LE; 0BB2 0BC6 +TAMIL SYLLABLE LEE; 0BB2 0BC7 +TAMIL SYLLABLE LAI; 0BB2 0BC8 +TAMIL SYLLABLE LO; 0BB2 0BCA +TAMIL SYLLABLE LOO; 0BB2 0BCB +TAMIL SYLLABLE LAU; 0BB2 0BCC + +TAMIL SYLLABLE VAA; 0BB5 0BBE +TAMIL SYLLABLE VI; 0BB5 0BBF +TAMIL SYLLABLE VII; 0BB5 0BC0 +TAMIL SYLLABLE VU; 0BB5 0BC1 +TAMIL SYLLABLE VUU; 0BB5 0BC2 +TAMIL SYLLABLE VE; 0BB5 0BC6 +TAMIL SYLLABLE VEE; 0BB5 0BC7 +TAMIL SYLLABLE VAI; 0BB5 0BC8 +TAMIL SYLLABLE VO; 0BB5 0BCA +TAMIL SYLLABLE VOO; 0BB5 0BCB +TAMIL SYLLABLE VAU; 0BB5 0BCC + +TAMIL SYLLABLE LLLAA; 0BB4 0BBE +TAMIL SYLLABLE LLLI; 0BB4 0BBF +TAMIL SYLLABLE LLLII; 0BB4 0BC0 +TAMIL SYLLABLE LLLU; 0BB4 0BC1 +TAMIL SYLLABLE LLLUU; 0BB4 0BC2 +TAMIL SYLLABLE LLLE; 0BB4 0BC6 +TAMIL SYLLABLE LLLEE; 0BB4 0BC7 +TAMIL SYLLABLE LLLAI; 0BB4 0BC8 +TAMIL SYLLABLE LLLO; 0BB4 0BCA +TAMIL SYLLABLE LLLOO; 0BB4 0BCB +TAMIL SYLLABLE LLLAU; 0BB4 0BCC + +TAMIL SYLLABLE LLAA; 0BB3 0BBE +TAMIL SYLLABLE LLI; 0BB3 0BBF +TAMIL SYLLABLE LLII; 0BB3 0BC0 +TAMIL SYLLABLE LLU; 0BB3 0BC1 +TAMIL SYLLABLE LLUU; 0BB3 0BC2 +TAMIL SYLLABLE LLE; 0BB3 0BC6 +TAMIL SYLLABLE LLEE; 0BB3 0BC7 +TAMIL SYLLABLE LLAI; 0BB3 0BC8 +TAMIL SYLLABLE LLO; 0BB3 0BCA +TAMIL SYLLABLE LLOO; 0BB3 0BCB +TAMIL SYLLABLE LLAU; 0BB3 0BCC + +TAMIL SYLLABLE RRAA; 0BB1 0BBE +TAMIL SYLLABLE RRI; 0BB1 0BBF +TAMIL SYLLABLE RRII; 0BB1 0BC0 +TAMIL SYLLABLE RRU; 0BB1 0BC1 +TAMIL SYLLABLE RRUU; 0BB1 0BC2 +TAMIL SYLLABLE RRE; 0BB1 0BC6 +TAMIL SYLLABLE RREE; 0BB1 0BC7 +TAMIL SYLLABLE RRAI; 0BB1 0BC8 +TAMIL SYLLABLE RRO; 0BB1 0BCA +TAMIL SYLLABLE RROO; 0BB1 0BCB +TAMIL SYLLABLE RRAU; 0BB1 0BCC + +TAMIL SYLLABLE NNNAA; 0BA9 0BBE +TAMIL SYLLABLE NNNI; 0BA9 0BBF +TAMIL SYLLABLE NNNII; 0BA9 0BC0 +TAMIL SYLLABLE NNNU; 0BA9 0BC1 +TAMIL SYLLABLE NNNUU; 0BA9 0BC2 +TAMIL SYLLABLE NNNE; 0BA9 0BC6 +TAMIL SYLLABLE NNNEE; 0BA9 0BC7 +TAMIL SYLLABLE NNNAI; 0BA9 0BC8 +TAMIL SYLLABLE NNNO; 0BA9 0BCA +TAMIL SYLLABLE NNNOO; 0BA9 0BCB +TAMIL SYLLABLE NNNAU; 0BA9 0BCC + +TAMIL SYLLABLE JAA; 0B9C 0BBE +TAMIL SYLLABLE JI; 0B9C 0BBF +TAMIL SYLLABLE JII; 0B9C 0BC0 +TAMIL SYLLABLE JU; 0B9C 0BC1 +TAMIL SYLLABLE JUU; 0B9C 0BC2 +TAMIL SYLLABLE JE; 0B9C 0BC6 +TAMIL SYLLABLE JEE; 0B9C 0BC7 +TAMIL SYLLABLE JAI; 0B9C 0BC8 +TAMIL SYLLABLE JO; 0B9C 0BCA +TAMIL SYLLABLE JOO; 0B9C 0BCB +TAMIL SYLLABLE JAU; 0B9C 0BCC + +TAMIL SYLLABLE SHAA; 0BB6 0BBE +TAMIL SYLLABLE SHI; 0BB6 0BBF +TAMIL SYLLABLE SHII; 0BB6 0BC0 +TAMIL SYLLABLE SHU; 0BB6 0BC1 +TAMIL SYLLABLE SHUU; 0BB6 0BC2 +TAMIL SYLLABLE SHE; 0BB6 0BC6 +TAMIL SYLLABLE SHEE; 0BB6 0BC7 +TAMIL SYLLABLE SHAI; 0BB6 0BC8 +TAMIL SYLLABLE SHO; 0BB6 0BCA +TAMIL SYLLABLE SHOO; 0BB6 0BCB +TAMIL SYLLABLE SHAU; 0BB6 0BCC + +TAMIL SYLLABLE SSAA; 0BB7 0BBE +TAMIL SYLLABLE SSI; 0BB7 0BBF +TAMIL SYLLABLE SSII; 0BB7 0BC0 +TAMIL SYLLABLE SSU; 0BB7 0BC1 +TAMIL SYLLABLE SSUU; 0BB7 0BC2 +TAMIL SYLLABLE SSE; 0BB7 0BC6 +TAMIL SYLLABLE SSEE; 0BB7 0BC7 +TAMIL SYLLABLE SSAI; 0BB7 0BC8 +TAMIL SYLLABLE SSO; 0BB7 0BCA +TAMIL SYLLABLE SSOO; 0BB7 0BCB +TAMIL SYLLABLE SSAU; 0BB7 0BCC + +TAMIL SYLLABLE SAA; 0BB8 0BBE +TAMIL SYLLABLE SI; 0BB8 0BBF +TAMIL SYLLABLE SII; 0BB8 0BC0 +TAMIL SYLLABLE SU; 0BB8 0BC1 +TAMIL SYLLABLE SUU; 0BB8 0BC2 +TAMIL SYLLABLE SE; 0BB8 0BC6 +TAMIL SYLLABLE SEE; 0BB8 0BC7 +TAMIL SYLLABLE SAI; 0BB8 0BC8 +TAMIL SYLLABLE SO; 0BB8 0BCA +TAMIL SYLLABLE SOO; 0BB8 0BCB +TAMIL SYLLABLE SAU; 0BB8 0BCC + +TAMIL SYLLABLE HAA; 0BB9 0BBE +TAMIL SYLLABLE HI; 0BB9 0BBF +TAMIL SYLLABLE HII; 0BB9 0BC0 +TAMIL SYLLABLE HU; 0BB9 0BC1 +TAMIL SYLLABLE HUU; 0BB9 0BC2 +TAMIL SYLLABLE HE; 0BB9 0BC6 +TAMIL SYLLABLE HEE; 0BB9 0BC7 +TAMIL SYLLABLE HAI; 0BB9 0BC8 +TAMIL SYLLABLE HO; 0BB9 0BCA +TAMIL SYLLABLE HOO; 0BB9 0BCB +TAMIL SYLLABLE HAU; 0BB9 0BCC + +TAMIL SYLLABLE KSSA; 0B95 0BCD 0BB7 +TAMIL SYLLABLE KSSAA; 0B95 0BCD 0BB7 0BBE +TAMIL SYLLABLE KSSI; 0B95 0BCD 0BB7 0BBF +TAMIL SYLLABLE KSSII; 0B95 0BCD 0BB7 0BC0 +TAMIL SYLLABLE KSSU; 0B95 0BCD 0BB7 0BC1 +TAMIL SYLLABLE KSSUU; 0B95 0BCD 0BB7 0BC2 +TAMIL SYLLABLE KSSE; 0B95 0BCD 0BB7 0BC6 +TAMIL SYLLABLE KSSEE; 0B95 0BCD 0BB7 0BC7 +TAMIL SYLLABLE KSSAI; 0B95 0BCD 0BB7 0BC8 +TAMIL SYLLABLE KSSO; 0B95 0BCD 0BB7 0BCA +TAMIL SYLLABLE KSSOO; 0B95 0BCD 0BB7 0BCB +TAMIL SYLLABLE KSSAU; 0B95 0BCD 0BB7 0BCC + +TAMIL SYLLABLE SHRII; 0BB6 0BCD 0BB0 0BC0 # ================================================ @@ -33,52 +367,7 @@ # LATIN SMALL LETTER A WITH ACUTE AND OGONEK;00E1 0328 # # This entry was removed because the sequence was not in NFC, -# as required. It will be replaced with the NFC version of -# the sequence, based on the Lithuanian additions proposed -# here for a future version of the standard. - -# ================================================ - -# Provisional entries for NamedSequences.txt. +# as required. It was replaced with the NFC version of +# the sequence, based on the Lithuanian additions accepted +# for Unicode 5.0. -# Proposed additions for Lithuanian. 2006-05-18 - -LATIN CAPITAL LETTER A WITH OGONEK AND ACUTE;0104 0301 -LATIN SMALL LETTER A WITH OGONEK AND ACUTE;0105 0301 -LATIN CAPITAL LETTER A WITH OGONEK AND TILDE;0104 0303 -LATIN SMALL LETTER A WITH OGONEK AND TILDE;0105 0303 -LATIN CAPITAL LETTER E WITH OGONEK AND ACUTE;0118 0301 -LATIN SMALL LETTER E WITH OGONEK AND ACUTE;0119 0301 -LATIN CAPITAL LETTER E WITH OGONEK AND TILDE;0118 0303 -LATIN SMALL LETTER E WITH OGONEK AND TILDE;0119 0303 -LATIN CAPITAL LETTER E WITH DOT ABOVE AND ACUTE;0116 0301 -LATIN SMALL LETTER E WITH DOT ABOVE AND ACUTE;0117 0301 -LATIN CAPITAL LETTER E WITH DOT ABOVE AND TILDE;0116 0303 -LATIN SMALL LETTER E WITH DOT ABOVE AND TILDE;0117 0303 -LATIN SMALL LETTER I WITH DOT ABOVE AND GRAVE;0069 0307 0300 -LATIN SMALL LETTER I WITH DOT ABOVE AND TILDE;0069 0307 0303 -LATIN CAPITAL LETTER I WITH OGONEK AND ACUTE;012E 0301 -LATIN SMALL LETTER I WITH OGONEK AND DOT ABOVE AND ACUTE;012F 0307 0301 -LATIN CAPITAL LETTER I WITH OGONEK AND TILDE;012E 0303 -LATIN SMALL LETTER I WITH OGONEK AND DOT ABOVE AND TILDE;012F 0307 0303 -LATIN CAPITAL LETTER J WITH TILDE;004A 0303 -LATIN SMALL LETTER J WITH DOT ABOVE AND TILDE;006A 0307 0303 -LATIN CAPITAL LETTER L WITH TILDE;004C 0303 -LATIN SMALL LETTER L WITH TILDE;006C 0303 -LATIN CAPITAL LETTER M WITH TILDE;004D 0303 -LATIN SMALL LETTER M WITH TILDE;006D 0303 -LATIN CAPITAL LETTER R WITH TILDE;0052 0303 -LATIN SMALL LETTER R WITH TILDE;0072 0303 -LATIN CAPITAL LETTER U WITH OGONEK AND ACUTE;0172 0301 -LATIN SMALL LETTER U WITH OGONEK AND ACUTE;0173 0301 -LATIN CAPITAL LETTER U WITH OGONEK AND TILDE;0172 0303 -LATIN SMALL LETTER U WITH OGONEK AND TILDE;0173 0303 -LATIN CAPITAL LETTER U WITH MACRON AND ACUTE;016A 0301 -LATIN SMALL LETTER U WITH MACRON AND ACUTE;016B 0301 -LATIN CAPITAL LETTER U WITH MACRON AND TILDE;016A 0303 -LATIN SMALL LETTER U WITH MACRON AND TILDE;016B 0303 - -# Proposed additions for Tamil. - -TAMIL LETTER KSSA;0B95 0BCD 0BB7 -TAMIL LETTER SHRII;0BB6 0BCD 0BB0 0BC0 diff --git a/lib/unicore/PropValueAliases.txt b/lib/unicore/PropValueAliases.txt index 60546fb0b5..61f2238916 100644 --- a/lib/unicore/PropValueAliases.txt +++ b/lib/unicore/PropValueAliases.txt @@ -1,8 +1,8 @@ -# PropertyValueAliases-5.0.0.txt -# Date: 2006-03-03, 08:23:34 GMT [MD] +# PropertyValueAliases-5.1.0.txt +# Date: 2008-03-03, 21:58:08 GMT [MD] # # Unicode Character Database -# Copyright (c) 1991-2006 Unicode, Inc. +# Copyright (c) 1991-2008 Unicode, Inc. # For terms of use, see http://www.unicode.org/terms_of_use.html # For documentation, see UCD.html # @@ -55,6 +55,11 @@ # ================================================ +# ASCII_Hex_Digit (AHex) + +AHex; N ; No ; F ; False +AHex; Y ; Yes ; T ; True + # Age (age) age; n/a ; 1.1 @@ -66,8 +71,14 @@ age; n/a ; 3.2 age; n/a ; 4.0 age; n/a ; 4.1 age; n/a ; 5.0 +age; n/a ; 5.1 age; n/a ; unassigned +# Alphabetic (Alpha) + +Alpha; N ; No ; F ; False +Alpha; Y ; Yes ; T ; True + # Bidi_Class (bc) bc ; AL ; Arabic_Letter @@ -90,20 +101,35 @@ bc ; RLO ; Right_To_Left_Override bc ; S ; Segment_Separator bc ; WS ; White_Space +# Bidi_Control (Bidi_C) + +Bidi_C; N ; No ; F ; False +Bidi_C; Y ; Yes ; T ; True + +# Bidi_Mirrored (Bidi_M) + +Bidi_M; N ; No ; F ; False +Bidi_M; Y ; Yes ; T ; True + +# Bidi_Mirroring_Glyph (bmg) + +# @missing: 0000..10FFFF; Bidi_Mirroring_Glyph; <none> + # Block (blk) blk; n/a ; Aegean_Numbers blk; n/a ; Alphabetic_Presentation_Forms blk; n/a ; Ancient_Greek_Musical_Notation blk; n/a ; Ancient_Greek_Numbers +blk; n/a ; Ancient_Symbols blk; n/a ; Arabic -blk; n/a ; Arabic_Presentation_Forms-A -blk; n/a ; Arabic_Presentation_Forms-B +blk; n/a ; Arabic_Presentation_Forms_A ; Arabic_Presentation_Forms-A +blk; n/a ; Arabic_Presentation_Forms_B blk; n/a ; Arabic_Supplement blk; n/a ; Armenian blk; n/a ; Arrows blk; n/a ; Balinese -blk; n/a ; Basic_Latin +blk; n/a ; Basic_Latin ; ASCII blk; n/a ; Bengali blk; n/a ; Block_Elements blk; n/a ; Bopomofo @@ -113,6 +139,8 @@ blk; n/a ; Braille_Patterns blk; n/a ; Buginese blk; n/a ; Buhid blk; n/a ; Byzantine_Musical_Symbols +blk; n/a ; Carian +blk; n/a ; Cham blk; n/a ; Cherokee blk; n/a ; CJK_Compatibility blk; n/a ; CJK_Compatibility_Forms @@ -120,28 +148,31 @@ blk; n/a ; CJK_Compatibility_Ideographs blk; n/a ; CJK_Compatibility_Ideographs_Supplement blk; n/a ; CJK_Radicals_Supplement blk; n/a ; CJK_Strokes -blk; n/a ; CJK_Symbols_and_Punctuation +blk; n/a ; CJK_Symbols_And_Punctuation blk; n/a ; CJK_Unified_Ideographs blk; n/a ; CJK_Unified_Ideographs_Extension_A blk; n/a ; CJK_Unified_Ideographs_Extension_B blk; n/a ; Combining_Diacritical_Marks -blk; n/a ; Combining_Diacritical_Marks_for_Symbols +blk; n/a ; Combining_Diacritical_Marks_For_Symbols; Combining_Marks_For_Symbols blk; n/a ; Combining_Diacritical_Marks_Supplement blk; n/a ; Combining_Half_Marks blk; n/a ; Control_Pictures blk; n/a ; Coptic blk; n/a ; Counting_Rod_Numerals blk; n/a ; Cuneiform -blk; n/a ; Cuneiform_Numbers_and_Punctuation +blk; n/a ; Cuneiform_Numbers_And_Punctuation blk; n/a ; Currency_Symbols blk; n/a ; Cypriot_Syllabary blk; n/a ; Cyrillic +blk; n/a ; Cyrillic_Extended_A +blk; n/a ; Cyrillic_Extended_B blk; n/a ; Cyrillic_Supplement ; Cyrillic_Supplementary blk; n/a ; Deseret blk; n/a ; Devanagari blk; n/a ; Dingbats +blk; n/a ; Domino_Tiles blk; n/a ; Enclosed_Alphanumerics -blk; n/a ; Enclosed_CJK_Letters_and_Months +blk; n/a ; Enclosed_CJK_Letters_And_Months blk; n/a ; Ethiopic blk; n/a ; Ethiopic_Extended blk; n/a ; Ethiopic_Supplement @@ -151,11 +182,11 @@ blk; n/a ; Georgian blk; n/a ; Georgian_Supplement blk; n/a ; Glagolitic blk; n/a ; Gothic -blk; n/a ; Greek_and_Coptic +blk; n/a ; Greek_And_Coptic ; Greek blk; n/a ; Greek_Extended blk; n/a ; Gujarati blk; n/a ; Gurmukhi -blk; n/a ; Halfwidth_and_Fullwidth_Forms +blk; n/a ; Halfwidth_And_Fullwidth_Forms blk; n/a ; Hangul_Compatibility_Jamo blk; n/a ; Hangul_Jamo blk; n/a ; Hangul_Syllables @@ -171,28 +202,33 @@ blk; n/a ; Kangxi_Radicals blk; n/a ; Kannada blk; n/a ; Katakana blk; n/a ; Katakana_Phonetic_Extensions +blk; n/a ; Kayah_Li blk; n/a ; Kharoshthi blk; n/a ; Khmer blk; n/a ; Khmer_Symbols blk; n/a ; Lao -blk; n/a ; Latin-1_Supplement -blk; n/a ; Latin_Extended-A -blk; n/a ; Latin_Extended-B -blk; n/a ; Latin_Extended-C -blk; n/a ; Latin_Extended-D +blk; n/a ; Latin_1_Supplement ; Latin_1 +blk; n/a ; Latin_Extended_A blk; n/a ; Latin_Extended_Additional +blk; n/a ; Latin_Extended_B +blk; n/a ; Latin_Extended_C +blk; n/a ; Latin_Extended_D +blk; n/a ; Lepcha blk; n/a ; Letterlike_Symbols blk; n/a ; Limbu blk; n/a ; Linear_B_Ideograms blk; n/a ; Linear_B_Syllabary blk; n/a ; Low_Surrogates +blk; n/a ; Lycian +blk; n/a ; Lydian +blk; n/a ; Mahjong_Tiles blk; n/a ; Malayalam blk; n/a ; Mathematical_Alphanumeric_Symbols blk; n/a ; Mathematical_Operators -blk; n/a ; Miscellaneous_Mathematical_Symbols-A -blk; n/a ; Miscellaneous_Mathematical_Symbols-B +blk; n/a ; Miscellaneous_Mathematical_Symbols_A +blk; n/a ; Miscellaneous_Mathematical_Symbols_B blk; n/a ; Miscellaneous_Symbols -blk; n/a ; Miscellaneous_Symbols_and_Arrows +blk; n/a ; Miscellaneous_Symbols_And_Arrows blk; n/a ; Miscellaneous_Technical blk; n/a ; Modifier_Tone_Letters blk; n/a ; Mongolian @@ -203,29 +239,34 @@ blk; n/a ; NKo blk; n/a ; No_Block blk; n/a ; Number_Forms blk; n/a ; Ogham +blk; n/a ; Ol_Chiki blk; n/a ; Old_Italic blk; n/a ; Old_Persian blk; n/a ; Optical_Character_Recognition blk; n/a ; Oriya blk; n/a ; Osmanya -blk; n/a ; Phags-pa +blk; n/a ; Phags_Pa +blk; n/a ; Phaistos_Disc blk; n/a ; Phoenician blk; n/a ; Phonetic_Extensions blk; n/a ; Phonetic_Extensions_Supplement -blk; n/a ; Private_Use_Area +blk; n/a ; Private_Use_Area ; Private_Use +blk; n/a ; Rejang blk; n/a ; Runic +blk; n/a ; Saurashtra blk; n/a ; Shavian blk; n/a ; Sinhala blk; n/a ; Small_Form_Variants blk; n/a ; Spacing_Modifier_Letters blk; n/a ; Specials -blk; n/a ; Superscripts_and_Subscripts -blk; n/a ; Supplemental_Arrows-A -blk; n/a ; Supplemental_Arrows-B +blk; n/a ; Sundanese +blk; n/a ; Superscripts_And_Subscripts +blk; n/a ; Supplemental_Arrows_A +blk; n/a ; Supplemental_Arrows_B blk; n/a ; Supplemental_Mathematical_Operators blk; n/a ; Supplemental_Punctuation -blk; n/a ; Supplementary_Private_Use_Area-A -blk; n/a ; Supplementary_Private_Use_Area-B +blk; n/a ; Supplementary_Private_Use_Area_A +blk; n/a ; Supplementary_Private_Use_Area_B blk; n/a ; Syloti_Nagri blk; n/a ; Syriac blk; n/a ; Tagalog @@ -240,7 +281,8 @@ blk; n/a ; Thai blk; n/a ; Tibetan blk; n/a ; Tifinagh blk; n/a ; Ugaritic -blk; n/a ; Unified_Canadian_Aboriginal_Syllabics +blk; n/a ; Unified_Canadian_Aboriginal_Syllabics; Canadian_Syllabics +blk; n/a ; Vai blk; n/a ; Variation_Selectors blk; n/a ; Variation_Selectors_Supplement blk; n/a ; Vertical_Forms @@ -270,26 +312,59 @@ ccc; 233; DB ; Double_Below ccc; 234; DA ; Double_Above ccc; 240; IS ; Iota_Subscript +# Case_Folding (cf) + +# @missing: 0000..10FFFF; Case_Folding; <code point> + +# Composition_Exclusion (CE) + +CE ; N ; No ; F ; False +CE ; Y ; Yes ; T ; True + +# Dash (Dash) + +Dash; N ; No ; F ; False +Dash; Y ; Yes ; T ; True + +# Decomposition_Mapping (dm) + +# @missing: 0000..10FFFF; Decomposition_Mapping; <code point> + # Decomposition_Type (dt) -dt ; can ; Canonical -dt ; com ; Compat -dt ; enc ; Circle -dt ; fin ; Final -dt ; font ; Font -dt ; fra ; Fraction -dt ; init ; Initial -dt ; iso ; Isolated -dt ; med ; Medial -dt ; nar ; Narrow -dt ; nb ; Nobreak -dt ; none ; None -dt ; sml ; Small -dt ; sqr ; Square -dt ; sub ; Sub -dt ; sup ; Super -dt ; vert ; Vertical -dt ; wide ; Wide +dt ; Can ; Canonical ; can +dt ; Com ; Compat ; com +dt ; Enc ; Circle ; enc +dt ; Fin ; Final ; fin +dt ; Font ; font +dt ; Fra ; Fraction ; fra +dt ; Init ; Initial ; init +dt ; Iso ; Isolated ; iso +dt ; Med ; Medial ; med +dt ; Nar ; Narrow ; nar +dt ; Nb ; Nobreak ; nb +dt ; None ; none +dt ; Sml ; Small ; sml +dt ; Sqr ; Square ; sqr +dt ; Sub ; sub +dt ; Sup ; Super ; sup +dt ; Vert ; Vertical ; vert +dt ; Wide ; wide + +# Default_Ignorable_Code_Point (DI) + +DI ; N ; No ; F ; False +DI ; Y ; Yes ; T ; True + +# Deprecated (Dep) + +Dep; N ; No ; F ; False +Dep; Y ; Yes ; T ; True + +# Diacritic (Dia) + +Dia; N ; No ; F ; False +Dia; Y ; Yes ; T ; True # East_Asian_Width (ea) @@ -300,6 +375,40 @@ ea ; N ; Neutral ea ; Na ; Narrow ea ; W ; Wide +# Expands_On_NFC (XO_NFC) + +XO_NFC; N ; No ; F ; False +XO_NFC; Y ; Yes ; T ; True + +# Expands_On_NFD (XO_NFD) + +XO_NFD; N ; No ; F ; False +XO_NFD; Y ; Yes ; T ; True + +# Expands_On_NFKC (XO_NFKC) + +XO_NFKC; N ; No ; F ; False +XO_NFKC; Y ; Yes ; T ; True + +# Expands_On_NFKD (XO_NFKD) + +XO_NFKD; N ; No ; F ; False +XO_NFKD; Y ; Yes ; T ; True + +# Extender (Ext) + +Ext; N ; No ; F ; False +Ext; Y ; Yes ; T ; True + +# FC_NFKC_Closure (FC_NFKC) + +# @missing: 0000..10FFFF; FC_NFKC_Closure; <code point> + +# Full_Composition_Exclusion (Comp_Ex) + +Comp_Ex; N ; No ; F ; False +Comp_Ex; Y ; Yes ; T ; True + # General_Category (gc) gc ; C ; Other # Cc | Cf | Cn | Co | Cs @@ -341,6 +450,11 @@ gc ; Zl ; Line_Separator gc ; Zp ; Paragraph_Separator gc ; Zs ; Space_Separator +# Grapheme_Base (Gr_Base) + +Gr_Base; N ; No ; F ; False +Gr_Base; Y ; Yes ; T ; True + # Grapheme_Cluster_Break (GCB) GCB; CN ; Control @@ -350,10 +464,22 @@ GCB; L ; L GCB; LF ; LF GCB; LV ; LV GCB; LVT ; LVT +GCB; PP ; Prepend +GCB; SM ; SpacingMark GCB; T ; T GCB; V ; V GCB; XX ; Other +# Grapheme_Extend (Gr_Ext) + +Gr_Ext; N ; No ; F ; False +Gr_Ext; Y ; Yes ; T ; True + +# Grapheme_Link (Gr_Link) + +Gr_Link; N ; No ; F ; False +Gr_Link; Y ; Yes ; T ; True + # Hangul_Syllable_Type (hst) hst; L ; Leading_Jamo @@ -363,6 +489,106 @@ hst; NA ; Not_Applicable hst; T ; Trailing_Jamo hst; V ; Vowel_Jamo +# Hex_Digit (Hex) + +Hex; N ; No ; F ; False +Hex; Y ; Yes ; T ; True + +# Hyphen (Hyphen) + +Hyphen; N ; No ; F ; False +Hyphen; Y ; Yes ; T ; True + +# IDS_Binary_Operator (IDSB) + +IDSB; N ; No ; F ; False +IDSB; Y ; Yes ; T ; True + +# IDS_Trinary_Operator (IDST) + +IDST; N ; No ; F ; False +IDST; Y ; Yes ; T ; True + +# ID_Continue (IDC) + +IDC; N ; No ; F ; False +IDC; Y ; Yes ; T ; True + +# ID_Start (IDS) + +IDS; N ; No ; F ; False +IDS; Y ; Yes ; T ; True + +# ISO_Comment (isc) + +# @missing: 0000..10FFFF; ISO_Comment; <none> + +# Ideographic (Ideo) + +Ideo; N ; No ; F ; False +Ideo; Y ; Yes ; T ; True + +# Jamo_Short_Name (JSN) + +# @missing: 0000..10FFFF; Jamo_Short_Name; <none> +JSN; A ; A +JSN; AE ; AE +JSN; B ; B +JSN; BB ; BB +JSN; BS ; BS +JSN; C ; C +JSN; D ; D +JSN; DD ; DD +JSN; E ; E +JSN; EO ; EO +JSN; EU ; EU +JSN; G ; G +JSN; GG ; GG +JSN; GS ; GS +JSN; H ; H +JSN; I ; I +JSN; J ; J +JSN; JJ ; JJ +JSN; K ; K +JSN; L ; L +JSN; LB ; LB +JSN; LG ; LG +JSN; LH ; LH +JSN; LM ; LM +JSN; LP ; LP +JSN; LS ; LS +JSN; LT ; LT +JSN; M ; M +JSN; N ; N +JSN; NG ; NG +JSN; NH ; NH +JSN; NJ ; NJ +JSN; O ; O +JSN; OE ; OE +JSN; P ; P +JSN; R ; R +JSN; S ; S +JSN; SS ; SS +JSN; T ; T +JSN; U ; U +JSN; WA ; WA +JSN; WAE ; WAE +JSN; WE ; WE +JSN; WEO ; WEO +JSN; WI ; WI +JSN; YA ; YA +JSN; YAE ; YAE +JSN; YE ; YE +JSN; YEO ; YEO +JSN; YI ; YI +JSN; YO ; YO +JSN; YU ; YU + +# Join_Control (Join_C) + +Join_C; N ; No ; F ; False +Join_C; Y ; Yes ; T ; True + # Joining_Group (jg) jg ; n/a ; Ain @@ -370,6 +596,7 @@ jg ; n/a ; Alaph jg ; n/a ; Alef jg ; n/a ; Beh jg ; n/a ; Beth +jg ; n/a ; Burushaski_Yeh_Barree jg ; n/a ; Dal jg ; n/a ; Dalath_Rish jg ; n/a ; E @@ -468,6 +695,25 @@ lb ; WJ ; Word_Joiner lb ; XX ; Unknown lb ; ZW ; ZWSpace +# Logical_Order_Exception (LOE) + +LOE; N ; No ; F ; False +LOE; Y ; Yes ; T ; True + +# Lowercase (Lower) + +Lower; N ; No ; F ; False +Lower; Y ; Yes ; T ; True + +# Lowercase_Mapping (lc) + +# @missing: 0000..10FFFF; Lowercase_Mapping; <code point> + +# Math (Math) + +Math; N ; No ; F ; False +Math; Y ; Yes ; T ; True + # NFC_Quick_Check (NFC_QC) NFC_QC; M ; Maybe @@ -490,6 +736,15 @@ NFKC_QC; Y ; Yes NFKD_QC; N ; No NFKD_QC; Y ; Yes +# Name (na) + +# @missing: 0000..10FFFF; Name; <none> + +# Noncharacter_Code_Point (NChar) + +NChar; N ; No ; F ; False +NChar; Y ; Yes ; T ; True + # Numeric_Type (nt) nt ; De ; Decimal @@ -497,6 +752,75 @@ nt ; Di ; Digit nt ; None ; None nt ; Nu ; Numeric +# Numeric_Value (nv) + +# @missing: 0000..10FFFF; Numeric_Value; NaN + +# Other_Alphabetic (OAlpha) + +OAlpha; N ; No ; F ; False +OAlpha; Y ; Yes ; T ; True + +# Other_Default_Ignorable_Code_Point (ODI) + +ODI; N ; No ; F ; False +ODI; Y ; Yes ; T ; True + +# Other_Grapheme_Extend (OGr_Ext) + +OGr_Ext; N ; No ; F ; False +OGr_Ext; Y ; Yes ; T ; True + +# Other_ID_Continue (OIDC) + +OIDC; N ; No ; F ; False +OIDC; Y ; Yes ; T ; True + +# Other_ID_Start (OIDS) + +OIDS; N ; No ; F ; False +OIDS; Y ; Yes ; T ; True + +# Other_Lowercase (OLower) + +OLower; N ; No ; F ; False +OLower; Y ; Yes ; T ; True + +# Other_Math (OMath) + +OMath; N ; No ; F ; False +OMath; Y ; Yes ; T ; True + +# Other_Uppercase (OUpper) + +OUpper; N ; No ; F ; False +OUpper; Y ; Yes ; T ; True + +# Pattern_Syntax (Pat_Syn) + +Pat_Syn; N ; No ; F ; False +Pat_Syn; Y ; Yes ; T ; True + +# Pattern_White_Space (Pat_WS) + +Pat_WS; N ; No ; F ; False +Pat_WS; Y ; Yes ; T ; True + +# Quotation_Mark (QMark) + +QMark; N ; No ; F ; False +QMark; Y ; Yes ; T ; True + +# Radical (Radical) + +Radical; N ; No ; F ; False +Radical; Y ; Yes ; T ; True + +# STerm (STerm) + +STerm; N ; No ; F ; False +STerm; Y ; Yes ; T ; True + # Script (sc) sc ; Arab ; Arabic @@ -508,6 +832,8 @@ sc ; Brai ; Braille sc ; Bugi ; Buginese sc ; Buhd ; Buhid sc ; Cans ; Canadian_Aboriginal +sc ; Cari ; Carian +sc ; Cham ; Cham sc ; Cher ; Cherokee sc ; Copt ; Coptic ; Qaac sc ; Cprt ; Cypriot @@ -528,27 +854,35 @@ sc ; Hebr ; Hebrew sc ; Hira ; Hiragana sc ; Hrkt ; Katakana_Or_Hiragana sc ; Ital ; Old_Italic +sc ; Kali ; Kayah_Li sc ; Kana ; Katakana sc ; Khar ; Kharoshthi sc ; Khmr ; Khmer sc ; Knda ; Kannada sc ; Laoo ; Lao sc ; Latn ; Latin +sc ; Lepc ; Lepcha sc ; Limb ; Limbu sc ; Linb ; Linear_B +sc ; Lyci ; Lycian +sc ; Lydi ; Lydian sc ; Mlym ; Malayalam sc ; Mong ; Mongolian sc ; Mymr ; Myanmar sc ; Nkoo ; Nko sc ; Ogam ; Ogham +sc ; Olck ; Ol_Chiki sc ; Orya ; Oriya sc ; Osma ; Osmanya sc ; Phag ; Phags_Pa sc ; Phnx ; Phoenician sc ; Qaai ; Inherited +sc ; Rjng ; Rejang sc ; Runr ; Runic +sc ; Saur ; Saurashtra sc ; Shaw ; Shavian sc ; Sinh ; Sinhala +sc ; Sund ; Sundanese sc ; Sylo ; Syloti_Nagri sc ; Syrc ; Syriac sc ; Tagb ; Tagbanwa @@ -562,6 +896,7 @@ sc ; Thaa ; Thaana sc ; Thai ; Thai sc ; Tibt ; Tibetan sc ; Ugar ; Ugaritic +sc ; Vaii ; Vai sc ; Xpeo ; Old_Persian sc ; Xsux ; Cuneiform sc ; Yiii ; Yi @@ -572,25 +907,106 @@ sc ; Zzzz ; Unknown SB ; AT ; ATerm SB ; CL ; Close +SB ; CR ; CR +SB ; EX ; Extend SB ; FO ; Format SB ; LE ; OLetter +SB ; LF ; LF SB ; LO ; Lower SB ; NU ; Numeric +SB ; SC ; SContinue SB ; SE ; Sep SB ; SP ; Sp SB ; ST ; STerm SB ; UP ; Upper SB ; XX ; Other +# Simple_Case_Folding (scf) + +# @missing: 0000..10FFFF; Simple_Case_Folding; <code point> + +# Simple_Lowercase_Mapping (slc) + +# @missing: 0000..10FFFF; Simple_Lowercase_Mapping; <code point> + +# Simple_Titlecase_Mapping (stc) + +# @missing: 0000..10FFFF; Simple_Titlecase_Mapping; <code point> + +# Simple_Uppercase_Mapping (suc) + +# @missing: 0000..10FFFF; Simple_Uppercase_Mapping; <code point> + +# Soft_Dotted (SD) + +SD ; N ; No ; F ; False +SD ; Y ; Yes ; T ; True + +# Terminal_Punctuation (Term) + +Term; N ; No ; F ; False +Term; Y ; Yes ; T ; True + +# Titlecase_Mapping (tc) + +# @missing: 0000..10FFFF; Titlecase_Mapping; <code point> + +# Unicode_1_Name (na1) + +# @missing: 0000..10FFFF; Unicode_1_Name; <none> + +# Unicode_Radical_Stroke (URS) + +# @missing: 0000..10FFFF; Unicode_Radical_Stroke; <none> + +# Unified_Ideograph (UIdeo) + +UIdeo; N ; No ; F ; False +UIdeo; Y ; Yes ; T ; True + +# Uppercase (Upper) + +Upper; N ; No ; F ; False +Upper; Y ; Yes ; T ; True + +# Uppercase_Mapping (uc) + +# @missing: 0000..10FFFF; Uppercase_Mapping; <code point> + +# Variation_Selector (VS) + +VS ; N ; No ; F ; False +VS ; Y ; Yes ; T ; True + +# White_Space (WSpace) + +WSpace; N ; No ; F ; False +WSpace; Y ; Yes ; T ; True + # Word_Break (WB) +WB ; CR ; CR WB ; EX ; ExtendNumLet +WB ; Extend ; Extend WB ; FO ; Format WB ; KA ; Katakana WB ; LE ; ALetter +WB ; LF ; LF +WB ; MB ; MidNumLet WB ; ML ; MidLetter WB ; MN ; MidNum +WB ; NL ; Newline WB ; NU ; Numeric WB ; XX ; Other +# XID_Continue (XIDC) + +XIDC; N ; No ; F ; False +XIDC; Y ; Yes ; T ; True + +# XID_Start (XIDS) + +XIDS; N ; No ; F ; False +XIDS; Y ; Yes ; T ; True + # EOF diff --git a/lib/unicore/README.perl b/lib/unicore/README.perl index 731c4399d6..509d31a986 100644 --- a/lib/unicore/README.perl +++ b/lib/unicore/README.perl @@ -1,8 +1,9 @@ The *.txt files were copied from - http://www.unicode.org/Public/5.0.0/ucd -as of Unicode 5.0.0 (July 2006). + http://www.unicode.org/Public/5.1.0/ucd + +as of Unicode 5.1.0 (March 2008). The two big files, NormalizationTest.txt (2 MB) and Unihan.txt (28 MB, 5.8 MB zip) were not included due to space considerations. Also NOT @@ -14,7 +15,7 @@ included were any *.html files and the Derived*.txt files or any files from subdirectories. -To be 8.3-friendly, the lib/unicore/PropertyValueAliases.txt was +To be 8.3 filesystem friendly, the lib/unicore/PropertyValueAliases.txt was renamed to be lib/unicore/PropValueAliases.txt and the lib/unicore/NamedSequencesProv.txt was renamed to be lib/unicore/NamedSqProv.txt, since otherwise they would have @@ -30,6 +31,9 @@ the build process. FOR PUMPKINS +The files are inter-related. If you take the latest UnicodeData.txt, for example, +but leave the older versions of other files, there can be subtle problems. + The *.pl files are generated from the *.txt files by the mktables script, more recently done during the Perl build process, but if you want to try the old manual way: diff --git a/lib/unicore/mktables b/lib/unicore/mktables index 41c206cc9a..d15ed91310 100644 --- a/lib/unicore/mktables +++ b/lib/unicore/mktables @@ -67,7 +67,7 @@ while (@ARGV) " -w : Write files regardless\n", " -maketest : Make test script\n", " -makelist : Rewrite the file list based on current setup\n", - " -L filelist : Use this file list, (defaults to $0)\n", + " -L filelist : Use this file list, (defaults to $0.lst)\n", " -C dir : Change to this directory before proceeding\n", " -check A B : Executes only if A and B are the same\n"; } @@ -161,6 +161,13 @@ my $HEADER=<<"EOF"; EOF +my $INTERNAL_ONLY=<<"EOF"; +# This file is for internal use by the Perl program only. The format and even +# name or existence of this file are subject to change without notice. Don't +# use it directly. + +EOF + sub force_unlink { my $filename = shift; return unless -e $filename; @@ -505,7 +512,7 @@ sub Table::Append ## ## Given a code point range starting value and ending value (and name), -## Add the range to teh Table. +## Add the range to the Table. ## ## NOTE: Code points must be added in strictly ascending numeric order. ## @@ -612,6 +619,10 @@ sub Table::Write my $comment = shift; my @OUT = $HEADER; + + # files in subdirectories are internal-use-only + push @OUT, $INTERNAL_ONLY if ref $filename; + if (defined $comment) { $comment =~ s/\s+\Z//; $comment =~ s/^/# /gm; @@ -817,8 +828,8 @@ sub UnicodeData_Txt() my %DC; my %Bidi; my %Number; - $DC{can} = Table->New(); - $DC{com} = Table->New(); + $DC{Can} = Table->New(); + $DC{Com} = Table->New(); ## Initialize Broken Perl-generated categories ## (Categories from UnicodeData.txt are auto-initialized in gencat) @@ -902,8 +913,11 @@ sub UnicodeData_Txt() my $isspace = ($cat =~ /Zs|Zl|Zp/ && - $code != 0x200B) # 200B is ZWSP which is for line break control - # and therefore it is not part of "space" even while it is "Zs". + $code != 0x200B) # 200B is ZWSP which is for line break control + # and therefore it is not part of "space" even + # while it is "Zs" in some versions of Unicode. + # In 5.1 it is Cf, so this line is no longer + # necessary. || $code == 0x0009 # 0009: HORIZONTAL TAB || $code == 0x000A # 000A: LINE FEED || $code == 0x000B # 000B: VERTICAL TAB @@ -984,7 +998,7 @@ sub UnicodeData_Txt() my ($hexcode, ## code point in hex (e.g. "0041") $name, ## character name (e.g. "LATIN CAPITAL LETTER A") $cat, ## category (e.g. "Lu") - $comb, ## Canonical combining class (e.t. "230") + $comb, ## Canonical combining class (e.g. "230") $bidi, ## directional category (e.g. "L") $deco, ## decomposition mapping $decimal, ## decimal digit value @@ -1061,14 +1075,16 @@ sub UnicodeData_Txt() if ($deco =~/^<(\w+)>/) { my $dshort = $PVA_reverse{dt}{ucfirst lc $1}; - $DC{com}->Append($code); - - $DC{$dshort} ||= Table->New(); - $DC{$dshort}->Append($code); + $DC{Com}->Append($code); + $dshort = $PVA_reverse{dt}{lc $1} unless $dshort ne ""; + die "No reverse for $1'" unless $dshort ne ""; + #$dshort = lc $dshort; # use lower case only + $DC{$dshort} ||= Table->New(); + $DC{$dshort}->Append($code); } else { - $DC{can}->Append($code); + $DC{Can}->Append($code); } } } @@ -2094,8 +2110,8 @@ sub SpecialCasing_txt() my @OUT = ( - $HEADER, "\n", - "# The key UTF-8 _bytes_, the value UTF-8 (speed hack)\n", + $HEADER, $INTERNAL_ONLY, "\n", + "# The key: UTF-8 _bytes_, the value: UTF-8 (speed hack)\n", "%utf8::ToSpec$case =\n(\n", ); @@ -2159,8 +2175,8 @@ sub CaseFolding_txt() my @OUT = ( - $HEADER, "\n", - "# The ke UTF-8 _bytes_, the value UTF-8 (speed hack)\n", + $HEADER, $INTERNAL_ONLY, "\n", + "# The key: UTF-8 _bytes_, the value: UTF-8 (speed hack)\n", "%utf8::ToSpecFold =\n(\n", ); for my $code (sort { $a <=> $b } keys %Fold) { diff --git a/lib/unicore/mktables.lst b/lib/unicore/mktables.lst index 90182b9ec0..f4c55bb64e 100644 --- a/lib/unicore/mktables.lst +++ b/lib/unicore/mktables.lst @@ -1,7 +1,7 @@ # # mktables.lst -- File list for mktables. # -# Autogenerated on Sun Jan 18 18:35:22 2009 +# Autogenerated on Mon Jan 26 17:57:26 2009 # # - First section is input files # (mktables itself is automatically included) @@ -92,23 +92,23 @@ lib/ccc/NR.pl lib/ccc/OV.pl lib/ccc/R.pl lib/ccc/VR.pl -lib/dt/can.pl -lib/dt/com.pl -lib/dt/enc.pl -lib/dt/fin.pl -lib/dt/font.pl -lib/dt/fra.pl -lib/dt/init.pl -lib/dt/iso.pl -lib/dt/med.pl -lib/dt/nar.pl -lib/dt/nb.pl -lib/dt/sml.pl -lib/dt/sqr.pl -lib/dt/sub.pl -lib/dt/sup.pl -lib/dt/vert.pl -lib/dt/wide.pl +lib/dt/Can.pl +lib/dt/Com.pl +lib/dt/Enc.pl +lib/dt/Fin.pl +lib/dt/Font.pl +lib/dt/Fra.pl +lib/dt/Init.pl +lib/dt/Iso.pl +lib/dt/Med.pl +lib/dt/Nar.pl +lib/dt/Nb.pl +lib/dt/Sml.pl +lib/dt/Sqr.pl +lib/dt/Sub.pl +lib/dt/Sup.pl +lib/dt/Vert.pl +lib/dt/Wide.pl lib/ea/A.pl lib/ea/F.pl lib/ea/H.pl @@ -136,7 +136,7 @@ lib/gc_sc/Bugi.pl lib/gc_sc/Buhd.pl lib/gc_sc/C.pl lib/gc_sc/Canadian.pl -lib/gc_sc/Carian.pl +lib/gc_sc/Cari.pl lib/gc_sc/Cc.pl lib/gc_sc/Cf.pl lib/gc_sc/Cham.pl @@ -369,7 +369,7 @@ lib/gc_sc/LC.pl lib/gc_sc/LOE.pl lib/gc_sc/Laoo.pl lib/gc_sc/Latn.pl -lib/gc_sc/Lepcha.pl +lib/gc_sc/Lepc.pl lib/gc_sc/Limb.pl lib/gc_sc/LinearB.pl lib/gc_sc/Ll.pl @@ -380,8 +380,8 @@ lib/gc_sc/Lower.pl lib/gc_sc/Lowercas.pl lib/gc_sc/Lt.pl lib/gc_sc/Lu.pl -lib/gc_sc/Lycian.pl -lib/gc_sc/Lydian.pl +lib/gc_sc/Lyci.pl +lib/gc_sc/Lydi.pl lib/gc_sc/M.pl lib/gc_sc/Math.pl lib/gc_sc/Mc.pl @@ -454,12 +454,12 @@ lib/gc_sc/Qaai.pl lib/gc_sc/Quotatio.pl lib/gc_sc/Radical.pl lib/gc_sc/Radical2.pl -lib/gc_sc/Rejang.pl +lib/gc_sc/Rjng.pl lib/gc_sc/Runr.pl lib/gc_sc/S.pl lib/gc_sc/SD.pl lib/gc_sc/STerm.pl -lib/gc_sc/Saurasht.pl +lib/gc_sc/Saur.pl lib/gc_sc/Sc.pl lib/gc_sc/Shaw.pl lib/gc_sc/Sinh.pl @@ -470,7 +470,7 @@ lib/gc_sc/SoftDott.pl lib/gc_sc/Space.pl lib/gc_sc/SpacePer.pl lib/gc_sc/Sterm2.pl -lib/gc_sc/Sundanes.pl +lib/gc_sc/Sund.pl lib/gc_sc/SylotiNa.pl lib/gc_sc/Syrc.pl lib/gc_sc/Tagb.pl @@ -491,7 +491,7 @@ lib/gc_sc/UnifiedI.pl lib/gc_sc/Upper.pl lib/gc_sc/Uppercas.pl lib/gc_sc/VS.pl -lib/gc_sc/Vai.pl +lib/gc_sc/Vaii.pl lib/gc_sc/Variatio.pl lib/gc_sc/VertSpac.pl lib/gc_sc/WSpace.pl |