summaryrefslogtreecommitdiff
path: root/lib/unicore
diff options
context:
space:
mode:
authorKarl <khw@karl.(none)>2009-01-26 19:31:42 -0700
committerRafael Garcia-Suarez <rgarciasuarez@gmail.com>2009-01-27 08:17:46 +0100
commitb6922eda63cca3fefe5f447d156d10f6b75bd8eb (patch)
tree4bf47166019364151abbfead15cc450c506db8fd /lib/unicore
parent3127de7dcb470aaa3997c4164384a5bfb3162ffe (diff)
downloadperl-b6922eda63cca3fefe5f447d156d10f6b75bd8eb.tar.gz
Change to use 5.1 Unicode file versions
Diffstat (limited to 'lib/unicore')
-rw-r--r--lib/unicore/NamedSqProv.txt393
-rw-r--r--lib/unicore/PropValueAliases.txt508
-rw-r--r--lib/unicore/README.perl10
-rw-r--r--lib/unicore/mktables48
-rw-r--r--lib/unicore/mktables.lst52
5 files changed, 868 insertions, 143 deletions
diff --git a/lib/unicore/NamedSqProv.txt b/lib/unicore/NamedSqProv.txt
index 155fcc92dc..726806e54b 100644
--- a/lib/unicore/NamedSqProv.txt
+++ b/lib/unicore/NamedSqProv.txt
@@ -1,8 +1,8 @@
-# NamedSequencesProv-5.0.0.txt
-# Date: 2006-05-23, 11:33 PST [KW]
+# NamedSequencesProv-5.1.0.txt
+# Date: 2008-02-14, 12:17 PST [KW]
#
# Unicode Character Database
-# Copyright (c) 1991-2006 Unicode, Inc.
+# Copyright (c) 1991-2008 Unicode, Inc.
# For terms of use, see http://www.unicode.org/terms_of_use.html
# For documentation, see UCD.html
#
@@ -17,7 +17,341 @@
# Note: The order of entries in this file is not significant.
# However, entries are generally in script order corresponding
# to block order in the Unicode Standard, to make it easier
-# to find entries in the list.
+# to find entries currently in the list.
+
+# ================================================
+
+# Provisional entries for NamedSequences.txt.
+
+# Proposed additions for Tamil. 2007-10-19
+#
+# These Tamil named sequences have been accepted for a future
+# version of the Unicode Standard. 2008-02-08
+#
+# All of the Tamil named sequences will be moved to
+# the approved NamedSequences.txt, once the required
+# synchronization with the normative clause of ISO/IEC 10646
+# has been completed, through the ISO ballotting process.
+#
+# A visual display of the Tamil named sequences is available
+# in the documentation for Unicode 5.1. See:
+# http://www.unicode.org/versions/Unicode5.1.0/
+
+TAMIL CONSONANT K; 0B95 0BCD
+TAMIL CONSONANT NG; 0B99 0BCD
+TAMIL CONSONANT C; 0B9A 0BCD
+TAMIL CONSONANT NY; 0B9E 0BCD
+TAMIL CONSONANT TT; 0B9F 0BCD
+TAMIL CONSONANT NN; 0BA3 0BCD
+TAMIL CONSONANT T; 0BA4 0BCD
+TAMIL CONSONANT N; 0BA8 0BCD
+TAMIL CONSONANT P; 0BAA 0BCD
+TAMIL CONSONANT M; 0BAE 0BCD
+TAMIL CONSONANT Y; 0BAF 0BCD
+TAMIL CONSONANT R; 0BB0 0BCD
+TAMIL CONSONANT L; 0BB2 0BCD
+TAMIL CONSONANT V; 0BB5 0BCD
+TAMIL CONSONANT LLL;0BB4 0BCD
+TAMIL CONSONANT LL; 0BB3 0BCD
+TAMIL CONSONANT RR; 0BB1 0BCD
+TAMIL CONSONANT NNN;0BA9 0BCD
+TAMIL CONSONANT J; 0B9C 0BCD
+TAMIL CONSONANT SH; 0BB6 0BCD
+TAMIL CONSONANT SS; 0BB7 0BCD
+TAMIL CONSONANT S; 0BB8 0BCD
+TAMIL CONSONANT H; 0BB9 0BCD
+TAMIL CONSONANT KSS;0B95 0BCD 0BB7 0BCD
+
+TAMIL SYLLABLE KAA; 0B95 0BBE
+TAMIL SYLLABLE KI; 0B95 0BBF
+TAMIL SYLLABLE KII; 0B95 0BC0
+TAMIL SYLLABLE KU; 0B95 0BC1
+TAMIL SYLLABLE KUU; 0B95 0BC2
+TAMIL SYLLABLE KE; 0B95 0BC6
+TAMIL SYLLABLE KEE; 0B95 0BC7
+TAMIL SYLLABLE KAI; 0B95 0BC8
+TAMIL SYLLABLE KO; 0B95 0BCA
+TAMIL SYLLABLE KOO; 0B95 0BCB
+TAMIL SYLLABLE KAU; 0B95 0BCC
+
+TAMIL SYLLABLE NGAA; 0B99 0BBE
+TAMIL SYLLABLE NGI; 0B99 0BBF
+TAMIL SYLLABLE NGII; 0B99 0BC0
+TAMIL SYLLABLE NGU; 0B99 0BC1
+TAMIL SYLLABLE NGUU; 0B99 0BC2
+TAMIL SYLLABLE NGE; 0B99 0BC6
+TAMIL SYLLABLE NGEE; 0B99 0BC7
+TAMIL SYLLABLE NGAI; 0B99 0BC8
+TAMIL SYLLABLE NGO; 0B99 0BCA
+TAMIL SYLLABLE NGOO; 0B99 0BCB
+TAMIL SYLLABLE NGAU; 0B99 0BCC
+
+TAMIL SYLLABLE CAA; 0B9A 0BBE
+TAMIL SYLLABLE CI; 0B9A 0BBF
+TAMIL SYLLABLE CII; 0B9A 0BC0
+TAMIL SYLLABLE CU; 0B9A 0BC1
+TAMIL SYLLABLE CUU; 0B9A 0BC2
+TAMIL SYLLABLE CE; 0B9A 0BC6
+TAMIL SYLLABLE CEE; 0B9A 0BC7
+TAMIL SYLLABLE CAI; 0B9A 0BC8
+TAMIL SYLLABLE CO; 0B9A 0BCA
+TAMIL SYLLABLE COO; 0B9A 0BCB
+TAMIL SYLLABLE CAU; 0B9A 0BCC
+
+TAMIL SYLLABLE NYAA; 0B9E 0BBE
+TAMIL SYLLABLE NYI; 0B9E 0BBF
+TAMIL SYLLABLE NYII; 0B9E 0BC0
+TAMIL SYLLABLE NYU; 0B9E 0BC1
+TAMIL SYLLABLE NYUU; 0B9E 0BC2
+TAMIL SYLLABLE NYE; 0B9E 0BC6
+TAMIL SYLLABLE NYEE; 0B9E 0BC7
+TAMIL SYLLABLE NYAI; 0B9E 0BC8
+TAMIL SYLLABLE NYO; 0B9E 0BCA
+TAMIL SYLLABLE NYOO; 0B9E 0BCB
+TAMIL SYLLABLE NYAU; 0B9E 0BCC
+
+TAMIL SYLLABLE TTAA; 0B9F 0BBE
+TAMIL SYLLABLE TTI; 0B9F 0BBF
+TAMIL SYLLABLE TTII; 0B9F 0BC0
+TAMIL SYLLABLE TTU; 0B9F 0BC1
+TAMIL SYLLABLE TTUU; 0B9F 0BC2
+TAMIL SYLLABLE TTE; 0B9F 0BC6
+TAMIL SYLLABLE TTEE; 0B9F 0BC7
+TAMIL SYLLABLE TTAI; 0B9F 0BC8
+TAMIL SYLLABLE TTO; 0B9F 0BCA
+TAMIL SYLLABLE TTOO; 0B9F 0BCB
+TAMIL SYLLABLE TTAU; 0B9F 0BCC
+
+TAMIL SYLLABLE NNAA; 0BA3 0BBE
+TAMIL SYLLABLE NNI; 0BA3 0BBF
+TAMIL SYLLABLE NNII; 0BA3 0BC0
+TAMIL SYLLABLE NNU; 0BA3 0BC1
+TAMIL SYLLABLE NNUU; 0BA3 0BC2
+TAMIL SYLLABLE NNE; 0BA3 0BC6
+TAMIL SYLLABLE NNEE; 0BA3 0BC7
+TAMIL SYLLABLE NNAI; 0BA3 0BC8
+TAMIL SYLLABLE NNO; 0BA3 0BCA
+TAMIL SYLLABLE NNOO; 0BA3 0BCB
+TAMIL SYLLABLE NNAU; 0BA3 0BCC
+
+TAMIL SYLLABLE TAA; 0BA4 0BBE
+TAMIL SYLLABLE TI; 0BA4 0BBF
+TAMIL SYLLABLE TII; 0BA4 0BC0
+TAMIL SYLLABLE TU; 0BA4 0BC1
+TAMIL SYLLABLE TUU; 0BA4 0BC2
+TAMIL SYLLABLE TE; 0BA4 0BC6
+TAMIL SYLLABLE TEE; 0BA4 0BC7
+TAMIL SYLLABLE TAI; 0BA4 0BC8
+TAMIL SYLLABLE TO; 0BA4 0BCA
+TAMIL SYLLABLE TOO; 0BA4 0BCB
+TAMIL SYLLABLE TAU; 0BA4 0BCC
+
+TAMIL SYLLABLE NAA; 0BA8 0BBE
+TAMIL SYLLABLE NI; 0BA8 0BBF
+TAMIL SYLLABLE NII; 0BA8 0BC0
+TAMIL SYLLABLE NU; 0BA8 0BC1
+TAMIL SYLLABLE NUU; 0BA8 0BC2
+TAMIL SYLLABLE NE; 0BA8 0BC6
+TAMIL SYLLABLE NEE; 0BA8 0BC7
+TAMIL SYLLABLE NAI; 0BA8 0BC8
+TAMIL SYLLABLE NO; 0BA8 0BCA
+TAMIL SYLLABLE NOO; 0BA8 0BCB
+TAMIL SYLLABLE NAU; 0BA8 0BCC
+
+TAMIL SYLLABLE PAA; 0BAA 0BBE
+TAMIL SYLLABLE PI; 0BAA 0BBF
+TAMIL SYLLABLE PII; 0BAA 0BC0
+TAMIL SYLLABLE PU; 0BAA 0BC1
+TAMIL SYLLABLE PUU; 0BAA 0BC2
+TAMIL SYLLABLE PE; 0BAA 0BC6
+TAMIL SYLLABLE PEE; 0BAA 0BC7
+TAMIL SYLLABLE PAI; 0BAA 0BC8
+TAMIL SYLLABLE PO; 0BAA 0BCA
+TAMIL SYLLABLE POO; 0BAA 0BCB
+TAMIL SYLLABLE PAU; 0BAA 0BCC
+
+TAMIL SYLLABLE MAA; 0BAE 0BBE
+TAMIL SYLLABLE MI; 0BAE 0BBF
+TAMIL SYLLABLE MII; 0BAE 0BC0
+TAMIL SYLLABLE MU; 0BAE 0BC1
+TAMIL SYLLABLE MUU; 0BAE 0BC2
+TAMIL SYLLABLE ME; 0BAE 0BC6
+TAMIL SYLLABLE MEE; 0BAE 0BC7
+TAMIL SYLLABLE MAI; 0BAE 0BC8
+TAMIL SYLLABLE MO; 0BAE 0BCA
+TAMIL SYLLABLE MOO; 0BAE 0BCB
+TAMIL SYLLABLE MAU; 0BAE 0BCC
+
+TAMIL SYLLABLE YAA; 0BAF 0BBE
+TAMIL SYLLABLE YI; 0BAF 0BBF
+TAMIL SYLLABLE YII; 0BAF 0BC0
+TAMIL SYLLABLE YU; 0BAF 0BC1
+TAMIL SYLLABLE YUU; 0BAF 0BC2
+TAMIL SYLLABLE YE; 0BAF 0BC6
+TAMIL SYLLABLE YEE; 0BAF 0BC7
+TAMIL SYLLABLE YAI; 0BAF 0BC8
+TAMIL SYLLABLE YO; 0BAF 0BCA
+TAMIL SYLLABLE YOO; 0BAF 0BCB
+TAMIL SYLLABLE YAU; 0BAF 0BCC
+
+TAMIL SYLLABLE RAA; 0BB0 0BBE
+TAMIL SYLLABLE RI; 0BB0 0BBF
+TAMIL SYLLABLE RII; 0BB0 0BC0
+TAMIL SYLLABLE RU; 0BB0 0BC1
+TAMIL SYLLABLE RUU; 0BB0 0BC2
+TAMIL SYLLABLE RE; 0BB0 0BC6
+TAMIL SYLLABLE REE; 0BB0 0BC7
+TAMIL SYLLABLE RAI; 0BB0 0BC8
+TAMIL SYLLABLE RO; 0BB0 0BCA
+TAMIL SYLLABLE ROO; 0BB0 0BCB
+TAMIL SYLLABLE RAU; 0BB0 0BCC
+
+TAMIL SYLLABLE LAA; 0BB2 0BBE
+TAMIL SYLLABLE LI; 0BB2 0BBF
+TAMIL SYLLABLE LII; 0BB2 0BC0
+TAMIL SYLLABLE LU; 0BB2 0BC1
+TAMIL SYLLABLE LUU; 0BB2 0BC2
+TAMIL SYLLABLE LE; 0BB2 0BC6
+TAMIL SYLLABLE LEE; 0BB2 0BC7
+TAMIL SYLLABLE LAI; 0BB2 0BC8
+TAMIL SYLLABLE LO; 0BB2 0BCA
+TAMIL SYLLABLE LOO; 0BB2 0BCB
+TAMIL SYLLABLE LAU; 0BB2 0BCC
+
+TAMIL SYLLABLE VAA; 0BB5 0BBE
+TAMIL SYLLABLE VI; 0BB5 0BBF
+TAMIL SYLLABLE VII; 0BB5 0BC0
+TAMIL SYLLABLE VU; 0BB5 0BC1
+TAMIL SYLLABLE VUU; 0BB5 0BC2
+TAMIL SYLLABLE VE; 0BB5 0BC6
+TAMIL SYLLABLE VEE; 0BB5 0BC7
+TAMIL SYLLABLE VAI; 0BB5 0BC8
+TAMIL SYLLABLE VO; 0BB5 0BCA
+TAMIL SYLLABLE VOO; 0BB5 0BCB
+TAMIL SYLLABLE VAU; 0BB5 0BCC
+
+TAMIL SYLLABLE LLLAA; 0BB4 0BBE
+TAMIL SYLLABLE LLLI; 0BB4 0BBF
+TAMIL SYLLABLE LLLII; 0BB4 0BC0
+TAMIL SYLLABLE LLLU; 0BB4 0BC1
+TAMIL SYLLABLE LLLUU; 0BB4 0BC2
+TAMIL SYLLABLE LLLE; 0BB4 0BC6
+TAMIL SYLLABLE LLLEE; 0BB4 0BC7
+TAMIL SYLLABLE LLLAI; 0BB4 0BC8
+TAMIL SYLLABLE LLLO; 0BB4 0BCA
+TAMIL SYLLABLE LLLOO; 0BB4 0BCB
+TAMIL SYLLABLE LLLAU; 0BB4 0BCC
+
+TAMIL SYLLABLE LLAA; 0BB3 0BBE
+TAMIL SYLLABLE LLI; 0BB3 0BBF
+TAMIL SYLLABLE LLII; 0BB3 0BC0
+TAMIL SYLLABLE LLU; 0BB3 0BC1
+TAMIL SYLLABLE LLUU; 0BB3 0BC2
+TAMIL SYLLABLE LLE; 0BB3 0BC6
+TAMIL SYLLABLE LLEE; 0BB3 0BC7
+TAMIL SYLLABLE LLAI; 0BB3 0BC8
+TAMIL SYLLABLE LLO; 0BB3 0BCA
+TAMIL SYLLABLE LLOO; 0BB3 0BCB
+TAMIL SYLLABLE LLAU; 0BB3 0BCC
+
+TAMIL SYLLABLE RRAA; 0BB1 0BBE
+TAMIL SYLLABLE RRI; 0BB1 0BBF
+TAMIL SYLLABLE RRII; 0BB1 0BC0
+TAMIL SYLLABLE RRU; 0BB1 0BC1
+TAMIL SYLLABLE RRUU; 0BB1 0BC2
+TAMIL SYLLABLE RRE; 0BB1 0BC6
+TAMIL SYLLABLE RREE; 0BB1 0BC7
+TAMIL SYLLABLE RRAI; 0BB1 0BC8
+TAMIL SYLLABLE RRO; 0BB1 0BCA
+TAMIL SYLLABLE RROO; 0BB1 0BCB
+TAMIL SYLLABLE RRAU; 0BB1 0BCC
+
+TAMIL SYLLABLE NNNAA; 0BA9 0BBE
+TAMIL SYLLABLE NNNI; 0BA9 0BBF
+TAMIL SYLLABLE NNNII; 0BA9 0BC0
+TAMIL SYLLABLE NNNU; 0BA9 0BC1
+TAMIL SYLLABLE NNNUU; 0BA9 0BC2
+TAMIL SYLLABLE NNNE; 0BA9 0BC6
+TAMIL SYLLABLE NNNEE; 0BA9 0BC7
+TAMIL SYLLABLE NNNAI; 0BA9 0BC8
+TAMIL SYLLABLE NNNO; 0BA9 0BCA
+TAMIL SYLLABLE NNNOO; 0BA9 0BCB
+TAMIL SYLLABLE NNNAU; 0BA9 0BCC
+
+TAMIL SYLLABLE JAA; 0B9C 0BBE
+TAMIL SYLLABLE JI; 0B9C 0BBF
+TAMIL SYLLABLE JII; 0B9C 0BC0
+TAMIL SYLLABLE JU; 0B9C 0BC1
+TAMIL SYLLABLE JUU; 0B9C 0BC2
+TAMIL SYLLABLE JE; 0B9C 0BC6
+TAMIL SYLLABLE JEE; 0B9C 0BC7
+TAMIL SYLLABLE JAI; 0B9C 0BC8
+TAMIL SYLLABLE JO; 0B9C 0BCA
+TAMIL SYLLABLE JOO; 0B9C 0BCB
+TAMIL SYLLABLE JAU; 0B9C 0BCC
+
+TAMIL SYLLABLE SHAA; 0BB6 0BBE
+TAMIL SYLLABLE SHI; 0BB6 0BBF
+TAMIL SYLLABLE SHII; 0BB6 0BC0
+TAMIL SYLLABLE SHU; 0BB6 0BC1
+TAMIL SYLLABLE SHUU; 0BB6 0BC2
+TAMIL SYLLABLE SHE; 0BB6 0BC6
+TAMIL SYLLABLE SHEE; 0BB6 0BC7
+TAMIL SYLLABLE SHAI; 0BB6 0BC8
+TAMIL SYLLABLE SHO; 0BB6 0BCA
+TAMIL SYLLABLE SHOO; 0BB6 0BCB
+TAMIL SYLLABLE SHAU; 0BB6 0BCC
+
+TAMIL SYLLABLE SSAA; 0BB7 0BBE
+TAMIL SYLLABLE SSI; 0BB7 0BBF
+TAMIL SYLLABLE SSII; 0BB7 0BC0
+TAMIL SYLLABLE SSU; 0BB7 0BC1
+TAMIL SYLLABLE SSUU; 0BB7 0BC2
+TAMIL SYLLABLE SSE; 0BB7 0BC6
+TAMIL SYLLABLE SSEE; 0BB7 0BC7
+TAMIL SYLLABLE SSAI; 0BB7 0BC8
+TAMIL SYLLABLE SSO; 0BB7 0BCA
+TAMIL SYLLABLE SSOO; 0BB7 0BCB
+TAMIL SYLLABLE SSAU; 0BB7 0BCC
+
+TAMIL SYLLABLE SAA; 0BB8 0BBE
+TAMIL SYLLABLE SI; 0BB8 0BBF
+TAMIL SYLLABLE SII; 0BB8 0BC0
+TAMIL SYLLABLE SU; 0BB8 0BC1
+TAMIL SYLLABLE SUU; 0BB8 0BC2
+TAMIL SYLLABLE SE; 0BB8 0BC6
+TAMIL SYLLABLE SEE; 0BB8 0BC7
+TAMIL SYLLABLE SAI; 0BB8 0BC8
+TAMIL SYLLABLE SO; 0BB8 0BCA
+TAMIL SYLLABLE SOO; 0BB8 0BCB
+TAMIL SYLLABLE SAU; 0BB8 0BCC
+
+TAMIL SYLLABLE HAA; 0BB9 0BBE
+TAMIL SYLLABLE HI; 0BB9 0BBF
+TAMIL SYLLABLE HII; 0BB9 0BC0
+TAMIL SYLLABLE HU; 0BB9 0BC1
+TAMIL SYLLABLE HUU; 0BB9 0BC2
+TAMIL SYLLABLE HE; 0BB9 0BC6
+TAMIL SYLLABLE HEE; 0BB9 0BC7
+TAMIL SYLLABLE HAI; 0BB9 0BC8
+TAMIL SYLLABLE HO; 0BB9 0BCA
+TAMIL SYLLABLE HOO; 0BB9 0BCB
+TAMIL SYLLABLE HAU; 0BB9 0BCC
+
+TAMIL SYLLABLE KSSA; 0B95 0BCD 0BB7
+TAMIL SYLLABLE KSSAA; 0B95 0BCD 0BB7 0BBE
+TAMIL SYLLABLE KSSI; 0B95 0BCD 0BB7 0BBF
+TAMIL SYLLABLE KSSII; 0B95 0BCD 0BB7 0BC0
+TAMIL SYLLABLE KSSU; 0B95 0BCD 0BB7 0BC1
+TAMIL SYLLABLE KSSUU; 0B95 0BCD 0BB7 0BC2
+TAMIL SYLLABLE KSSE; 0B95 0BCD 0BB7 0BC6
+TAMIL SYLLABLE KSSEE; 0B95 0BCD 0BB7 0BC7
+TAMIL SYLLABLE KSSAI; 0B95 0BCD 0BB7 0BC8
+TAMIL SYLLABLE KSSO; 0B95 0BCD 0BB7 0BCA
+TAMIL SYLLABLE KSSOO; 0B95 0BCD 0BB7 0BCB
+TAMIL SYLLABLE KSSAU; 0B95 0BCD 0BB7 0BCC
+
+TAMIL SYLLABLE SHRII; 0BB6 0BCD 0BB0 0BC0
# ================================================
@@ -33,52 +367,7 @@
# LATIN SMALL LETTER A WITH ACUTE AND OGONEK;00E1 0328
#
# This entry was removed because the sequence was not in NFC,
-# as required. It will be replaced with the NFC version of
-# the sequence, based on the Lithuanian additions proposed
-# here for a future version of the standard.
-
-# ================================================
-
-# Provisional entries for NamedSequences.txt.
+# as required. It was replaced with the NFC version of
+# the sequence, based on the Lithuanian additions accepted
+# for Unicode 5.0.
-# Proposed additions for Lithuanian. 2006-05-18
-
-LATIN CAPITAL LETTER A WITH OGONEK AND ACUTE;0104 0301
-LATIN SMALL LETTER A WITH OGONEK AND ACUTE;0105 0301
-LATIN CAPITAL LETTER A WITH OGONEK AND TILDE;0104 0303
-LATIN SMALL LETTER A WITH OGONEK AND TILDE;0105 0303
-LATIN CAPITAL LETTER E WITH OGONEK AND ACUTE;0118 0301
-LATIN SMALL LETTER E WITH OGONEK AND ACUTE;0119 0301
-LATIN CAPITAL LETTER E WITH OGONEK AND TILDE;0118 0303
-LATIN SMALL LETTER E WITH OGONEK AND TILDE;0119 0303
-LATIN CAPITAL LETTER E WITH DOT ABOVE AND ACUTE;0116 0301
-LATIN SMALL LETTER E WITH DOT ABOVE AND ACUTE;0117 0301
-LATIN CAPITAL LETTER E WITH DOT ABOVE AND TILDE;0116 0303
-LATIN SMALL LETTER E WITH DOT ABOVE AND TILDE;0117 0303
-LATIN SMALL LETTER I WITH DOT ABOVE AND GRAVE;0069 0307 0300
-LATIN SMALL LETTER I WITH DOT ABOVE AND TILDE;0069 0307 0303
-LATIN CAPITAL LETTER I WITH OGONEK AND ACUTE;012E 0301
-LATIN SMALL LETTER I WITH OGONEK AND DOT ABOVE AND ACUTE;012F 0307 0301
-LATIN CAPITAL LETTER I WITH OGONEK AND TILDE;012E 0303
-LATIN SMALL LETTER I WITH OGONEK AND DOT ABOVE AND TILDE;012F 0307 0303
-LATIN CAPITAL LETTER J WITH TILDE;004A 0303
-LATIN SMALL LETTER J WITH DOT ABOVE AND TILDE;006A 0307 0303
-LATIN CAPITAL LETTER L WITH TILDE;004C 0303
-LATIN SMALL LETTER L WITH TILDE;006C 0303
-LATIN CAPITAL LETTER M WITH TILDE;004D 0303
-LATIN SMALL LETTER M WITH TILDE;006D 0303
-LATIN CAPITAL LETTER R WITH TILDE;0052 0303
-LATIN SMALL LETTER R WITH TILDE;0072 0303
-LATIN CAPITAL LETTER U WITH OGONEK AND ACUTE;0172 0301
-LATIN SMALL LETTER U WITH OGONEK AND ACUTE;0173 0301
-LATIN CAPITAL LETTER U WITH OGONEK AND TILDE;0172 0303
-LATIN SMALL LETTER U WITH OGONEK AND TILDE;0173 0303
-LATIN CAPITAL LETTER U WITH MACRON AND ACUTE;016A 0301
-LATIN SMALL LETTER U WITH MACRON AND ACUTE;016B 0301
-LATIN CAPITAL LETTER U WITH MACRON AND TILDE;016A 0303
-LATIN SMALL LETTER U WITH MACRON AND TILDE;016B 0303
-
-# Proposed additions for Tamil.
-
-TAMIL LETTER KSSA;0B95 0BCD 0BB7
-TAMIL LETTER SHRII;0BB6 0BCD 0BB0 0BC0
diff --git a/lib/unicore/PropValueAliases.txt b/lib/unicore/PropValueAliases.txt
index 60546fb0b5..61f2238916 100644
--- a/lib/unicore/PropValueAliases.txt
+++ b/lib/unicore/PropValueAliases.txt
@@ -1,8 +1,8 @@
-# PropertyValueAliases-5.0.0.txt
-# Date: 2006-03-03, 08:23:34 GMT [MD]
+# PropertyValueAliases-5.1.0.txt
+# Date: 2008-03-03, 21:58:08 GMT [MD]
#
# Unicode Character Database
-# Copyright (c) 1991-2006 Unicode, Inc.
+# Copyright (c) 1991-2008 Unicode, Inc.
# For terms of use, see http://www.unicode.org/terms_of_use.html
# For documentation, see UCD.html
#
@@ -55,6 +55,11 @@
# ================================================
+# ASCII_Hex_Digit (AHex)
+
+AHex; N ; No ; F ; False
+AHex; Y ; Yes ; T ; True
+
# Age (age)
age; n/a ; 1.1
@@ -66,8 +71,14 @@ age; n/a ; 3.2
age; n/a ; 4.0
age; n/a ; 4.1
age; n/a ; 5.0
+age; n/a ; 5.1
age; n/a ; unassigned
+# Alphabetic (Alpha)
+
+Alpha; N ; No ; F ; False
+Alpha; Y ; Yes ; T ; True
+
# Bidi_Class (bc)
bc ; AL ; Arabic_Letter
@@ -90,20 +101,35 @@ bc ; RLO ; Right_To_Left_Override
bc ; S ; Segment_Separator
bc ; WS ; White_Space
+# Bidi_Control (Bidi_C)
+
+Bidi_C; N ; No ; F ; False
+Bidi_C; Y ; Yes ; T ; True
+
+# Bidi_Mirrored (Bidi_M)
+
+Bidi_M; N ; No ; F ; False
+Bidi_M; Y ; Yes ; T ; True
+
+# Bidi_Mirroring_Glyph (bmg)
+
+# @missing: 0000..10FFFF; Bidi_Mirroring_Glyph; <none>
+
# Block (blk)
blk; n/a ; Aegean_Numbers
blk; n/a ; Alphabetic_Presentation_Forms
blk; n/a ; Ancient_Greek_Musical_Notation
blk; n/a ; Ancient_Greek_Numbers
+blk; n/a ; Ancient_Symbols
blk; n/a ; Arabic
-blk; n/a ; Arabic_Presentation_Forms-A
-blk; n/a ; Arabic_Presentation_Forms-B
+blk; n/a ; Arabic_Presentation_Forms_A ; Arabic_Presentation_Forms-A
+blk; n/a ; Arabic_Presentation_Forms_B
blk; n/a ; Arabic_Supplement
blk; n/a ; Armenian
blk; n/a ; Arrows
blk; n/a ; Balinese
-blk; n/a ; Basic_Latin
+blk; n/a ; Basic_Latin ; ASCII
blk; n/a ; Bengali
blk; n/a ; Block_Elements
blk; n/a ; Bopomofo
@@ -113,6 +139,8 @@ blk; n/a ; Braille_Patterns
blk; n/a ; Buginese
blk; n/a ; Buhid
blk; n/a ; Byzantine_Musical_Symbols
+blk; n/a ; Carian
+blk; n/a ; Cham
blk; n/a ; Cherokee
blk; n/a ; CJK_Compatibility
blk; n/a ; CJK_Compatibility_Forms
@@ -120,28 +148,31 @@ blk; n/a ; CJK_Compatibility_Ideographs
blk; n/a ; CJK_Compatibility_Ideographs_Supplement
blk; n/a ; CJK_Radicals_Supplement
blk; n/a ; CJK_Strokes
-blk; n/a ; CJK_Symbols_and_Punctuation
+blk; n/a ; CJK_Symbols_And_Punctuation
blk; n/a ; CJK_Unified_Ideographs
blk; n/a ; CJK_Unified_Ideographs_Extension_A
blk; n/a ; CJK_Unified_Ideographs_Extension_B
blk; n/a ; Combining_Diacritical_Marks
-blk; n/a ; Combining_Diacritical_Marks_for_Symbols
+blk; n/a ; Combining_Diacritical_Marks_For_Symbols; Combining_Marks_For_Symbols
blk; n/a ; Combining_Diacritical_Marks_Supplement
blk; n/a ; Combining_Half_Marks
blk; n/a ; Control_Pictures
blk; n/a ; Coptic
blk; n/a ; Counting_Rod_Numerals
blk; n/a ; Cuneiform
-blk; n/a ; Cuneiform_Numbers_and_Punctuation
+blk; n/a ; Cuneiform_Numbers_And_Punctuation
blk; n/a ; Currency_Symbols
blk; n/a ; Cypriot_Syllabary
blk; n/a ; Cyrillic
+blk; n/a ; Cyrillic_Extended_A
+blk; n/a ; Cyrillic_Extended_B
blk; n/a ; Cyrillic_Supplement ; Cyrillic_Supplementary
blk; n/a ; Deseret
blk; n/a ; Devanagari
blk; n/a ; Dingbats
+blk; n/a ; Domino_Tiles
blk; n/a ; Enclosed_Alphanumerics
-blk; n/a ; Enclosed_CJK_Letters_and_Months
+blk; n/a ; Enclosed_CJK_Letters_And_Months
blk; n/a ; Ethiopic
blk; n/a ; Ethiopic_Extended
blk; n/a ; Ethiopic_Supplement
@@ -151,11 +182,11 @@ blk; n/a ; Georgian
blk; n/a ; Georgian_Supplement
blk; n/a ; Glagolitic
blk; n/a ; Gothic
-blk; n/a ; Greek_and_Coptic
+blk; n/a ; Greek_And_Coptic ; Greek
blk; n/a ; Greek_Extended
blk; n/a ; Gujarati
blk; n/a ; Gurmukhi
-blk; n/a ; Halfwidth_and_Fullwidth_Forms
+blk; n/a ; Halfwidth_And_Fullwidth_Forms
blk; n/a ; Hangul_Compatibility_Jamo
blk; n/a ; Hangul_Jamo
blk; n/a ; Hangul_Syllables
@@ -171,28 +202,33 @@ blk; n/a ; Kangxi_Radicals
blk; n/a ; Kannada
blk; n/a ; Katakana
blk; n/a ; Katakana_Phonetic_Extensions
+blk; n/a ; Kayah_Li
blk; n/a ; Kharoshthi
blk; n/a ; Khmer
blk; n/a ; Khmer_Symbols
blk; n/a ; Lao
-blk; n/a ; Latin-1_Supplement
-blk; n/a ; Latin_Extended-A
-blk; n/a ; Latin_Extended-B
-blk; n/a ; Latin_Extended-C
-blk; n/a ; Latin_Extended-D
+blk; n/a ; Latin_1_Supplement ; Latin_1
+blk; n/a ; Latin_Extended_A
blk; n/a ; Latin_Extended_Additional
+blk; n/a ; Latin_Extended_B
+blk; n/a ; Latin_Extended_C
+blk; n/a ; Latin_Extended_D
+blk; n/a ; Lepcha
blk; n/a ; Letterlike_Symbols
blk; n/a ; Limbu
blk; n/a ; Linear_B_Ideograms
blk; n/a ; Linear_B_Syllabary
blk; n/a ; Low_Surrogates
+blk; n/a ; Lycian
+blk; n/a ; Lydian
+blk; n/a ; Mahjong_Tiles
blk; n/a ; Malayalam
blk; n/a ; Mathematical_Alphanumeric_Symbols
blk; n/a ; Mathematical_Operators
-blk; n/a ; Miscellaneous_Mathematical_Symbols-A
-blk; n/a ; Miscellaneous_Mathematical_Symbols-B
+blk; n/a ; Miscellaneous_Mathematical_Symbols_A
+blk; n/a ; Miscellaneous_Mathematical_Symbols_B
blk; n/a ; Miscellaneous_Symbols
-blk; n/a ; Miscellaneous_Symbols_and_Arrows
+blk; n/a ; Miscellaneous_Symbols_And_Arrows
blk; n/a ; Miscellaneous_Technical
blk; n/a ; Modifier_Tone_Letters
blk; n/a ; Mongolian
@@ -203,29 +239,34 @@ blk; n/a ; NKo
blk; n/a ; No_Block
blk; n/a ; Number_Forms
blk; n/a ; Ogham
+blk; n/a ; Ol_Chiki
blk; n/a ; Old_Italic
blk; n/a ; Old_Persian
blk; n/a ; Optical_Character_Recognition
blk; n/a ; Oriya
blk; n/a ; Osmanya
-blk; n/a ; Phags-pa
+blk; n/a ; Phags_Pa
+blk; n/a ; Phaistos_Disc
blk; n/a ; Phoenician
blk; n/a ; Phonetic_Extensions
blk; n/a ; Phonetic_Extensions_Supplement
-blk; n/a ; Private_Use_Area
+blk; n/a ; Private_Use_Area ; Private_Use
+blk; n/a ; Rejang
blk; n/a ; Runic
+blk; n/a ; Saurashtra
blk; n/a ; Shavian
blk; n/a ; Sinhala
blk; n/a ; Small_Form_Variants
blk; n/a ; Spacing_Modifier_Letters
blk; n/a ; Specials
-blk; n/a ; Superscripts_and_Subscripts
-blk; n/a ; Supplemental_Arrows-A
-blk; n/a ; Supplemental_Arrows-B
+blk; n/a ; Sundanese
+blk; n/a ; Superscripts_And_Subscripts
+blk; n/a ; Supplemental_Arrows_A
+blk; n/a ; Supplemental_Arrows_B
blk; n/a ; Supplemental_Mathematical_Operators
blk; n/a ; Supplemental_Punctuation
-blk; n/a ; Supplementary_Private_Use_Area-A
-blk; n/a ; Supplementary_Private_Use_Area-B
+blk; n/a ; Supplementary_Private_Use_Area_A
+blk; n/a ; Supplementary_Private_Use_Area_B
blk; n/a ; Syloti_Nagri
blk; n/a ; Syriac
blk; n/a ; Tagalog
@@ -240,7 +281,8 @@ blk; n/a ; Thai
blk; n/a ; Tibetan
blk; n/a ; Tifinagh
blk; n/a ; Ugaritic
-blk; n/a ; Unified_Canadian_Aboriginal_Syllabics
+blk; n/a ; Unified_Canadian_Aboriginal_Syllabics; Canadian_Syllabics
+blk; n/a ; Vai
blk; n/a ; Variation_Selectors
blk; n/a ; Variation_Selectors_Supplement
blk; n/a ; Vertical_Forms
@@ -270,26 +312,59 @@ ccc; 233; DB ; Double_Below
ccc; 234; DA ; Double_Above
ccc; 240; IS ; Iota_Subscript
+# Case_Folding (cf)
+
+# @missing: 0000..10FFFF; Case_Folding; <code point>
+
+# Composition_Exclusion (CE)
+
+CE ; N ; No ; F ; False
+CE ; Y ; Yes ; T ; True
+
+# Dash (Dash)
+
+Dash; N ; No ; F ; False
+Dash; Y ; Yes ; T ; True
+
+# Decomposition_Mapping (dm)
+
+# @missing: 0000..10FFFF; Decomposition_Mapping; <code point>
+
# Decomposition_Type (dt)
-dt ; can ; Canonical
-dt ; com ; Compat
-dt ; enc ; Circle
-dt ; fin ; Final
-dt ; font ; Font
-dt ; fra ; Fraction
-dt ; init ; Initial
-dt ; iso ; Isolated
-dt ; med ; Medial
-dt ; nar ; Narrow
-dt ; nb ; Nobreak
-dt ; none ; None
-dt ; sml ; Small
-dt ; sqr ; Square
-dt ; sub ; Sub
-dt ; sup ; Super
-dt ; vert ; Vertical
-dt ; wide ; Wide
+dt ; Can ; Canonical ; can
+dt ; Com ; Compat ; com
+dt ; Enc ; Circle ; enc
+dt ; Fin ; Final ; fin
+dt ; Font ; font
+dt ; Fra ; Fraction ; fra
+dt ; Init ; Initial ; init
+dt ; Iso ; Isolated ; iso
+dt ; Med ; Medial ; med
+dt ; Nar ; Narrow ; nar
+dt ; Nb ; Nobreak ; nb
+dt ; None ; none
+dt ; Sml ; Small ; sml
+dt ; Sqr ; Square ; sqr
+dt ; Sub ; sub
+dt ; Sup ; Super ; sup
+dt ; Vert ; Vertical ; vert
+dt ; Wide ; wide
+
+# Default_Ignorable_Code_Point (DI)
+
+DI ; N ; No ; F ; False
+DI ; Y ; Yes ; T ; True
+
+# Deprecated (Dep)
+
+Dep; N ; No ; F ; False
+Dep; Y ; Yes ; T ; True
+
+# Diacritic (Dia)
+
+Dia; N ; No ; F ; False
+Dia; Y ; Yes ; T ; True
# East_Asian_Width (ea)
@@ -300,6 +375,40 @@ ea ; N ; Neutral
ea ; Na ; Narrow
ea ; W ; Wide
+# Expands_On_NFC (XO_NFC)
+
+XO_NFC; N ; No ; F ; False
+XO_NFC; Y ; Yes ; T ; True
+
+# Expands_On_NFD (XO_NFD)
+
+XO_NFD; N ; No ; F ; False
+XO_NFD; Y ; Yes ; T ; True
+
+# Expands_On_NFKC (XO_NFKC)
+
+XO_NFKC; N ; No ; F ; False
+XO_NFKC; Y ; Yes ; T ; True
+
+# Expands_On_NFKD (XO_NFKD)
+
+XO_NFKD; N ; No ; F ; False
+XO_NFKD; Y ; Yes ; T ; True
+
+# Extender (Ext)
+
+Ext; N ; No ; F ; False
+Ext; Y ; Yes ; T ; True
+
+# FC_NFKC_Closure (FC_NFKC)
+
+# @missing: 0000..10FFFF; FC_NFKC_Closure; <code point>
+
+# Full_Composition_Exclusion (Comp_Ex)
+
+Comp_Ex; N ; No ; F ; False
+Comp_Ex; Y ; Yes ; T ; True
+
# General_Category (gc)
gc ; C ; Other # Cc | Cf | Cn | Co | Cs
@@ -341,6 +450,11 @@ gc ; Zl ; Line_Separator
gc ; Zp ; Paragraph_Separator
gc ; Zs ; Space_Separator
+# Grapheme_Base (Gr_Base)
+
+Gr_Base; N ; No ; F ; False
+Gr_Base; Y ; Yes ; T ; True
+
# Grapheme_Cluster_Break (GCB)
GCB; CN ; Control
@@ -350,10 +464,22 @@ GCB; L ; L
GCB; LF ; LF
GCB; LV ; LV
GCB; LVT ; LVT
+GCB; PP ; Prepend
+GCB; SM ; SpacingMark
GCB; T ; T
GCB; V ; V
GCB; XX ; Other
+# Grapheme_Extend (Gr_Ext)
+
+Gr_Ext; N ; No ; F ; False
+Gr_Ext; Y ; Yes ; T ; True
+
+# Grapheme_Link (Gr_Link)
+
+Gr_Link; N ; No ; F ; False
+Gr_Link; Y ; Yes ; T ; True
+
# Hangul_Syllable_Type (hst)
hst; L ; Leading_Jamo
@@ -363,6 +489,106 @@ hst; NA ; Not_Applicable
hst; T ; Trailing_Jamo
hst; V ; Vowel_Jamo
+# Hex_Digit (Hex)
+
+Hex; N ; No ; F ; False
+Hex; Y ; Yes ; T ; True
+
+# Hyphen (Hyphen)
+
+Hyphen; N ; No ; F ; False
+Hyphen; Y ; Yes ; T ; True
+
+# IDS_Binary_Operator (IDSB)
+
+IDSB; N ; No ; F ; False
+IDSB; Y ; Yes ; T ; True
+
+# IDS_Trinary_Operator (IDST)
+
+IDST; N ; No ; F ; False
+IDST; Y ; Yes ; T ; True
+
+# ID_Continue (IDC)
+
+IDC; N ; No ; F ; False
+IDC; Y ; Yes ; T ; True
+
+# ID_Start (IDS)
+
+IDS; N ; No ; F ; False
+IDS; Y ; Yes ; T ; True
+
+# ISO_Comment (isc)
+
+# @missing: 0000..10FFFF; ISO_Comment; <none>
+
+# Ideographic (Ideo)
+
+Ideo; N ; No ; F ; False
+Ideo; Y ; Yes ; T ; True
+
+# Jamo_Short_Name (JSN)
+
+# @missing: 0000..10FFFF; Jamo_Short_Name; <none>
+JSN; A ; A
+JSN; AE ; AE
+JSN; B ; B
+JSN; BB ; BB
+JSN; BS ; BS
+JSN; C ; C
+JSN; D ; D
+JSN; DD ; DD
+JSN; E ; E
+JSN; EO ; EO
+JSN; EU ; EU
+JSN; G ; G
+JSN; GG ; GG
+JSN; GS ; GS
+JSN; H ; H
+JSN; I ; I
+JSN; J ; J
+JSN; JJ ; JJ
+JSN; K ; K
+JSN; L ; L
+JSN; LB ; LB
+JSN; LG ; LG
+JSN; LH ; LH
+JSN; LM ; LM
+JSN; LP ; LP
+JSN; LS ; LS
+JSN; LT ; LT
+JSN; M ; M
+JSN; N ; N
+JSN; NG ; NG
+JSN; NH ; NH
+JSN; NJ ; NJ
+JSN; O ; O
+JSN; OE ; OE
+JSN; P ; P
+JSN; R ; R
+JSN; S ; S
+JSN; SS ; SS
+JSN; T ; T
+JSN; U ; U
+JSN; WA ; WA
+JSN; WAE ; WAE
+JSN; WE ; WE
+JSN; WEO ; WEO
+JSN; WI ; WI
+JSN; YA ; YA
+JSN; YAE ; YAE
+JSN; YE ; YE
+JSN; YEO ; YEO
+JSN; YI ; YI
+JSN; YO ; YO
+JSN; YU ; YU
+
+# Join_Control (Join_C)
+
+Join_C; N ; No ; F ; False
+Join_C; Y ; Yes ; T ; True
+
# Joining_Group (jg)
jg ; n/a ; Ain
@@ -370,6 +596,7 @@ jg ; n/a ; Alaph
jg ; n/a ; Alef
jg ; n/a ; Beh
jg ; n/a ; Beth
+jg ; n/a ; Burushaski_Yeh_Barree
jg ; n/a ; Dal
jg ; n/a ; Dalath_Rish
jg ; n/a ; E
@@ -468,6 +695,25 @@ lb ; WJ ; Word_Joiner
lb ; XX ; Unknown
lb ; ZW ; ZWSpace
+# Logical_Order_Exception (LOE)
+
+LOE; N ; No ; F ; False
+LOE; Y ; Yes ; T ; True
+
+# Lowercase (Lower)
+
+Lower; N ; No ; F ; False
+Lower; Y ; Yes ; T ; True
+
+# Lowercase_Mapping (lc)
+
+# @missing: 0000..10FFFF; Lowercase_Mapping; <code point>
+
+# Math (Math)
+
+Math; N ; No ; F ; False
+Math; Y ; Yes ; T ; True
+
# NFC_Quick_Check (NFC_QC)
NFC_QC; M ; Maybe
@@ -490,6 +736,15 @@ NFKC_QC; Y ; Yes
NFKD_QC; N ; No
NFKD_QC; Y ; Yes
+# Name (na)
+
+# @missing: 0000..10FFFF; Name; <none>
+
+# Noncharacter_Code_Point (NChar)
+
+NChar; N ; No ; F ; False
+NChar; Y ; Yes ; T ; True
+
# Numeric_Type (nt)
nt ; De ; Decimal
@@ -497,6 +752,75 @@ nt ; Di ; Digit
nt ; None ; None
nt ; Nu ; Numeric
+# Numeric_Value (nv)
+
+# @missing: 0000..10FFFF; Numeric_Value; NaN
+
+# Other_Alphabetic (OAlpha)
+
+OAlpha; N ; No ; F ; False
+OAlpha; Y ; Yes ; T ; True
+
+# Other_Default_Ignorable_Code_Point (ODI)
+
+ODI; N ; No ; F ; False
+ODI; Y ; Yes ; T ; True
+
+# Other_Grapheme_Extend (OGr_Ext)
+
+OGr_Ext; N ; No ; F ; False
+OGr_Ext; Y ; Yes ; T ; True
+
+# Other_ID_Continue (OIDC)
+
+OIDC; N ; No ; F ; False
+OIDC; Y ; Yes ; T ; True
+
+# Other_ID_Start (OIDS)
+
+OIDS; N ; No ; F ; False
+OIDS; Y ; Yes ; T ; True
+
+# Other_Lowercase (OLower)
+
+OLower; N ; No ; F ; False
+OLower; Y ; Yes ; T ; True
+
+# Other_Math (OMath)
+
+OMath; N ; No ; F ; False
+OMath; Y ; Yes ; T ; True
+
+# Other_Uppercase (OUpper)
+
+OUpper; N ; No ; F ; False
+OUpper; Y ; Yes ; T ; True
+
+# Pattern_Syntax (Pat_Syn)
+
+Pat_Syn; N ; No ; F ; False
+Pat_Syn; Y ; Yes ; T ; True
+
+# Pattern_White_Space (Pat_WS)
+
+Pat_WS; N ; No ; F ; False
+Pat_WS; Y ; Yes ; T ; True
+
+# Quotation_Mark (QMark)
+
+QMark; N ; No ; F ; False
+QMark; Y ; Yes ; T ; True
+
+# Radical (Radical)
+
+Radical; N ; No ; F ; False
+Radical; Y ; Yes ; T ; True
+
+# STerm (STerm)
+
+STerm; N ; No ; F ; False
+STerm; Y ; Yes ; T ; True
+
# Script (sc)
sc ; Arab ; Arabic
@@ -508,6 +832,8 @@ sc ; Brai ; Braille
sc ; Bugi ; Buginese
sc ; Buhd ; Buhid
sc ; Cans ; Canadian_Aboriginal
+sc ; Cari ; Carian
+sc ; Cham ; Cham
sc ; Cher ; Cherokee
sc ; Copt ; Coptic ; Qaac
sc ; Cprt ; Cypriot
@@ -528,27 +854,35 @@ sc ; Hebr ; Hebrew
sc ; Hira ; Hiragana
sc ; Hrkt ; Katakana_Or_Hiragana
sc ; Ital ; Old_Italic
+sc ; Kali ; Kayah_Li
sc ; Kana ; Katakana
sc ; Khar ; Kharoshthi
sc ; Khmr ; Khmer
sc ; Knda ; Kannada
sc ; Laoo ; Lao
sc ; Latn ; Latin
+sc ; Lepc ; Lepcha
sc ; Limb ; Limbu
sc ; Linb ; Linear_B
+sc ; Lyci ; Lycian
+sc ; Lydi ; Lydian
sc ; Mlym ; Malayalam
sc ; Mong ; Mongolian
sc ; Mymr ; Myanmar
sc ; Nkoo ; Nko
sc ; Ogam ; Ogham
+sc ; Olck ; Ol_Chiki
sc ; Orya ; Oriya
sc ; Osma ; Osmanya
sc ; Phag ; Phags_Pa
sc ; Phnx ; Phoenician
sc ; Qaai ; Inherited
+sc ; Rjng ; Rejang
sc ; Runr ; Runic
+sc ; Saur ; Saurashtra
sc ; Shaw ; Shavian
sc ; Sinh ; Sinhala
+sc ; Sund ; Sundanese
sc ; Sylo ; Syloti_Nagri
sc ; Syrc ; Syriac
sc ; Tagb ; Tagbanwa
@@ -562,6 +896,7 @@ sc ; Thaa ; Thaana
sc ; Thai ; Thai
sc ; Tibt ; Tibetan
sc ; Ugar ; Ugaritic
+sc ; Vaii ; Vai
sc ; Xpeo ; Old_Persian
sc ; Xsux ; Cuneiform
sc ; Yiii ; Yi
@@ -572,25 +907,106 @@ sc ; Zzzz ; Unknown
SB ; AT ; ATerm
SB ; CL ; Close
+SB ; CR ; CR
+SB ; EX ; Extend
SB ; FO ; Format
SB ; LE ; OLetter
+SB ; LF ; LF
SB ; LO ; Lower
SB ; NU ; Numeric
+SB ; SC ; SContinue
SB ; SE ; Sep
SB ; SP ; Sp
SB ; ST ; STerm
SB ; UP ; Upper
SB ; XX ; Other
+# Simple_Case_Folding (scf)
+
+# @missing: 0000..10FFFF; Simple_Case_Folding; <code point>
+
+# Simple_Lowercase_Mapping (slc)
+
+# @missing: 0000..10FFFF; Simple_Lowercase_Mapping; <code point>
+
+# Simple_Titlecase_Mapping (stc)
+
+# @missing: 0000..10FFFF; Simple_Titlecase_Mapping; <code point>
+
+# Simple_Uppercase_Mapping (suc)
+
+# @missing: 0000..10FFFF; Simple_Uppercase_Mapping; <code point>
+
+# Soft_Dotted (SD)
+
+SD ; N ; No ; F ; False
+SD ; Y ; Yes ; T ; True
+
+# Terminal_Punctuation (Term)
+
+Term; N ; No ; F ; False
+Term; Y ; Yes ; T ; True
+
+# Titlecase_Mapping (tc)
+
+# @missing: 0000..10FFFF; Titlecase_Mapping; <code point>
+
+# Unicode_1_Name (na1)
+
+# @missing: 0000..10FFFF; Unicode_1_Name; <none>
+
+# Unicode_Radical_Stroke (URS)
+
+# @missing: 0000..10FFFF; Unicode_Radical_Stroke; <none>
+
+# Unified_Ideograph (UIdeo)
+
+UIdeo; N ; No ; F ; False
+UIdeo; Y ; Yes ; T ; True
+
+# Uppercase (Upper)
+
+Upper; N ; No ; F ; False
+Upper; Y ; Yes ; T ; True
+
+# Uppercase_Mapping (uc)
+
+# @missing: 0000..10FFFF; Uppercase_Mapping; <code point>
+
+# Variation_Selector (VS)
+
+VS ; N ; No ; F ; False
+VS ; Y ; Yes ; T ; True
+
+# White_Space (WSpace)
+
+WSpace; N ; No ; F ; False
+WSpace; Y ; Yes ; T ; True
+
# Word_Break (WB)
+WB ; CR ; CR
WB ; EX ; ExtendNumLet
+WB ; Extend ; Extend
WB ; FO ; Format
WB ; KA ; Katakana
WB ; LE ; ALetter
+WB ; LF ; LF
+WB ; MB ; MidNumLet
WB ; ML ; MidLetter
WB ; MN ; MidNum
+WB ; NL ; Newline
WB ; NU ; Numeric
WB ; XX ; Other
+# XID_Continue (XIDC)
+
+XIDC; N ; No ; F ; False
+XIDC; Y ; Yes ; T ; True
+
+# XID_Start (XIDS)
+
+XIDS; N ; No ; F ; False
+XIDS; Y ; Yes ; T ; True
+
# EOF
diff --git a/lib/unicore/README.perl b/lib/unicore/README.perl
index 731c4399d6..509d31a986 100644
--- a/lib/unicore/README.perl
+++ b/lib/unicore/README.perl
@@ -1,8 +1,9 @@
The *.txt files were copied from
- http://www.unicode.org/Public/5.0.0/ucd
-as of Unicode 5.0.0 (July 2006).
+ http://www.unicode.org/Public/5.1.0/ucd
+
+as of Unicode 5.1.0 (March 2008).
The two big files, NormalizationTest.txt (2 MB) and Unihan.txt (28 MB,
5.8 MB zip) were not included due to space considerations. Also NOT
@@ -14,7 +15,7 @@ included were any *.html files and the Derived*.txt files
or any files from subdirectories.
-To be 8.3-friendly, the lib/unicore/PropertyValueAliases.txt was
+To be 8.3 filesystem friendly, the lib/unicore/PropertyValueAliases.txt was
renamed to be lib/unicore/PropValueAliases.txt and the
lib/unicore/NamedSequencesProv.txt was renamed to be
lib/unicore/NamedSqProv.txt, since otherwise they would have
@@ -30,6 +31,9 @@ the build process.
FOR PUMPKINS
+The files are inter-related. If you take the latest UnicodeData.txt, for example,
+but leave the older versions of other files, there can be subtle problems.
+
The *.pl files are generated from the *.txt files by the mktables script,
more recently done during the Perl build process, but if you want to try
the old manual way:
diff --git a/lib/unicore/mktables b/lib/unicore/mktables
index 41c206cc9a..d15ed91310 100644
--- a/lib/unicore/mktables
+++ b/lib/unicore/mktables
@@ -67,7 +67,7 @@ while (@ARGV)
" -w : Write files regardless\n",
" -maketest : Make test script\n",
" -makelist : Rewrite the file list based on current setup\n",
- " -L filelist : Use this file list, (defaults to $0)\n",
+ " -L filelist : Use this file list, (defaults to $0.lst)\n",
" -C dir : Change to this directory before proceeding\n",
" -check A B : Executes only if A and B are the same\n";
}
@@ -161,6 +161,13 @@ my $HEADER=<<"EOF";
EOF
+my $INTERNAL_ONLY=<<"EOF";
+# This file is for internal use by the Perl program only. The format and even
+# name or existence of this file are subject to change without notice. Don't
+# use it directly.
+
+EOF
+
sub force_unlink {
my $filename = shift;
return unless -e $filename;
@@ -505,7 +512,7 @@ sub Table::Append
##
## Given a code point range starting value and ending value (and name),
-## Add the range to teh Table.
+## Add the range to the Table.
##
## NOTE: Code points must be added in strictly ascending numeric order.
##
@@ -612,6 +619,10 @@ sub Table::Write
my $comment = shift;
my @OUT = $HEADER;
+
+ # files in subdirectories are internal-use-only
+ push @OUT, $INTERNAL_ONLY if ref $filename;
+
if (defined $comment) {
$comment =~ s/\s+\Z//;
$comment =~ s/^/# /gm;
@@ -817,8 +828,8 @@ sub UnicodeData_Txt()
my %DC;
my %Bidi;
my %Number;
- $DC{can} = Table->New();
- $DC{com} = Table->New();
+ $DC{Can} = Table->New();
+ $DC{Com} = Table->New();
## Initialize Broken Perl-generated categories
## (Categories from UnicodeData.txt are auto-initialized in gencat)
@@ -902,8 +913,11 @@ sub UnicodeData_Txt()
my $isspace =
($cat =~ /Zs|Zl|Zp/ &&
- $code != 0x200B) # 200B is ZWSP which is for line break control
- # and therefore it is not part of "space" even while it is "Zs".
+ $code != 0x200B) # 200B is ZWSP which is for line break control
+ # and therefore it is not part of "space" even
+ # while it is "Zs" in some versions of Unicode.
+ # In 5.1 it is Cf, so this line is no longer
+ # necessary.
|| $code == 0x0009 # 0009: HORIZONTAL TAB
|| $code == 0x000A # 000A: LINE FEED
|| $code == 0x000B # 000B: VERTICAL TAB
@@ -984,7 +998,7 @@ sub UnicodeData_Txt()
my ($hexcode, ## code point in hex (e.g. "0041")
$name, ## character name (e.g. "LATIN CAPITAL LETTER A")
$cat, ## category (e.g. "Lu")
- $comb, ## Canonical combining class (e.t. "230")
+ $comb, ## Canonical combining class (e.g. "230")
$bidi, ## directional category (e.g. "L")
$deco, ## decomposition mapping
$decimal, ## decimal digit value
@@ -1061,14 +1075,16 @@ sub UnicodeData_Txt()
if ($deco =~/^<(\w+)>/)
{
my $dshort = $PVA_reverse{dt}{ucfirst lc $1};
- $DC{com}->Append($code);
-
- $DC{$dshort} ||= Table->New();
- $DC{$dshort}->Append($code);
+ $DC{Com}->Append($code);
+ $dshort = $PVA_reverse{dt}{lc $1} unless $dshort ne "";
+ die "No reverse for $1'" unless $dshort ne "";
+ #$dshort = lc $dshort; # use lower case only
+ $DC{$dshort} ||= Table->New();
+ $DC{$dshort}->Append($code);
}
else
{
- $DC{can}->Append($code);
+ $DC{Can}->Append($code);
}
}
}
@@ -2094,8 +2110,8 @@ sub SpecialCasing_txt()
my @OUT =
(
- $HEADER, "\n",
- "# The key UTF-8 _bytes_, the value UTF-8 (speed hack)\n",
+ $HEADER, $INTERNAL_ONLY, "\n",
+ "# The key: UTF-8 _bytes_, the value: UTF-8 (speed hack)\n",
"%utf8::ToSpec$case =\n(\n",
);
@@ -2159,8 +2175,8 @@ sub CaseFolding_txt()
my @OUT =
(
- $HEADER, "\n",
- "# The ke UTF-8 _bytes_, the value UTF-8 (speed hack)\n",
+ $HEADER, $INTERNAL_ONLY, "\n",
+ "# The key: UTF-8 _bytes_, the value: UTF-8 (speed hack)\n",
"%utf8::ToSpecFold =\n(\n",
);
for my $code (sort { $a <=> $b } keys %Fold) {
diff --git a/lib/unicore/mktables.lst b/lib/unicore/mktables.lst
index 90182b9ec0..f4c55bb64e 100644
--- a/lib/unicore/mktables.lst
+++ b/lib/unicore/mktables.lst
@@ -1,7 +1,7 @@
#
# mktables.lst -- File list for mktables.
#
-# Autogenerated on Sun Jan 18 18:35:22 2009
+# Autogenerated on Mon Jan 26 17:57:26 2009
#
# - First section is input files
# (mktables itself is automatically included)
@@ -92,23 +92,23 @@ lib/ccc/NR.pl
lib/ccc/OV.pl
lib/ccc/R.pl
lib/ccc/VR.pl
-lib/dt/can.pl
-lib/dt/com.pl
-lib/dt/enc.pl
-lib/dt/fin.pl
-lib/dt/font.pl
-lib/dt/fra.pl
-lib/dt/init.pl
-lib/dt/iso.pl
-lib/dt/med.pl
-lib/dt/nar.pl
-lib/dt/nb.pl
-lib/dt/sml.pl
-lib/dt/sqr.pl
-lib/dt/sub.pl
-lib/dt/sup.pl
-lib/dt/vert.pl
-lib/dt/wide.pl
+lib/dt/Can.pl
+lib/dt/Com.pl
+lib/dt/Enc.pl
+lib/dt/Fin.pl
+lib/dt/Font.pl
+lib/dt/Fra.pl
+lib/dt/Init.pl
+lib/dt/Iso.pl
+lib/dt/Med.pl
+lib/dt/Nar.pl
+lib/dt/Nb.pl
+lib/dt/Sml.pl
+lib/dt/Sqr.pl
+lib/dt/Sub.pl
+lib/dt/Sup.pl
+lib/dt/Vert.pl
+lib/dt/Wide.pl
lib/ea/A.pl
lib/ea/F.pl
lib/ea/H.pl
@@ -136,7 +136,7 @@ lib/gc_sc/Bugi.pl
lib/gc_sc/Buhd.pl
lib/gc_sc/C.pl
lib/gc_sc/Canadian.pl
-lib/gc_sc/Carian.pl
+lib/gc_sc/Cari.pl
lib/gc_sc/Cc.pl
lib/gc_sc/Cf.pl
lib/gc_sc/Cham.pl
@@ -369,7 +369,7 @@ lib/gc_sc/LC.pl
lib/gc_sc/LOE.pl
lib/gc_sc/Laoo.pl
lib/gc_sc/Latn.pl
-lib/gc_sc/Lepcha.pl
+lib/gc_sc/Lepc.pl
lib/gc_sc/Limb.pl
lib/gc_sc/LinearB.pl
lib/gc_sc/Ll.pl
@@ -380,8 +380,8 @@ lib/gc_sc/Lower.pl
lib/gc_sc/Lowercas.pl
lib/gc_sc/Lt.pl
lib/gc_sc/Lu.pl
-lib/gc_sc/Lycian.pl
-lib/gc_sc/Lydian.pl
+lib/gc_sc/Lyci.pl
+lib/gc_sc/Lydi.pl
lib/gc_sc/M.pl
lib/gc_sc/Math.pl
lib/gc_sc/Mc.pl
@@ -454,12 +454,12 @@ lib/gc_sc/Qaai.pl
lib/gc_sc/Quotatio.pl
lib/gc_sc/Radical.pl
lib/gc_sc/Radical2.pl
-lib/gc_sc/Rejang.pl
+lib/gc_sc/Rjng.pl
lib/gc_sc/Runr.pl
lib/gc_sc/S.pl
lib/gc_sc/SD.pl
lib/gc_sc/STerm.pl
-lib/gc_sc/Saurasht.pl
+lib/gc_sc/Saur.pl
lib/gc_sc/Sc.pl
lib/gc_sc/Shaw.pl
lib/gc_sc/Sinh.pl
@@ -470,7 +470,7 @@ lib/gc_sc/SoftDott.pl
lib/gc_sc/Space.pl
lib/gc_sc/SpacePer.pl
lib/gc_sc/Sterm2.pl
-lib/gc_sc/Sundanes.pl
+lib/gc_sc/Sund.pl
lib/gc_sc/SylotiNa.pl
lib/gc_sc/Syrc.pl
lib/gc_sc/Tagb.pl
@@ -491,7 +491,7 @@ lib/gc_sc/UnifiedI.pl
lib/gc_sc/Upper.pl
lib/gc_sc/Uppercas.pl
lib/gc_sc/VS.pl
-lib/gc_sc/Vai.pl
+lib/gc_sc/Vaii.pl
lib/gc_sc/Variatio.pl
lib/gc_sc/VertSpac.pl
lib/gc_sc/WSpace.pl