Update to Unicode 3.1.1.

p4raw-id: //depot/perl@11825
author: Jarkko Hietaniemi <jhi@iki.fi> 2001-09-02 11:16:24 +0000
committer: Jarkko Hietaniemi <jhi@iki.fi> 2001-09-02 11:16:24 +0000
commit: afc46004557cada88060a20d235b3f5e6303a4ac (patch)
tree: 1bd79b3f394b42ffde28f6658fd8071639d349fc
parent: 78bf21c2b5382395f2e75b313393c17f529af2e0 (diff)
download: perl-afc46004557cada88060a20d235b3f5e6303a4ac.tar.gz
12 files changed, 176 insertions, 89 deletions
diff --git a/lib/unicore/ArabLink.pl b/lib/unicore/ArabLink.pl
index 4db2e62dc2..61fa496d32 100644
--- a/lib/unicore/ArabLink.pl
+++ b/lib/unicore/ArabLink.pl
@@ -2,6 +2,7 @@
 # This file is built by mktables.PL from e.g. Unicode.txt.
 # Any changes made here will be lost!
 return <<'END';
+0621		U
 0622	0625	R
 0626		D
 0627		R
@@ -29,7 +30,7 @@ return <<'END';
 06cf		R
 06d0	06d1	D
 06d2	06d3	R
-06d5		U
+06d5		R
 06fa	06fc	D
 0710		R
 0712	0714	D
@@ -42,4 +43,5 @@ return <<'END';
 072a		R
 072b		D
 072c		R
+200d		C
 END
diff --git a/lib/unicore/ArabLnkGrp.pl b/lib/unicore/ArabLnkGrp.pl
index ff4220d4b3..c293d9fa5a 100644
--- a/lib/unicore/ArabLnkGrp.pl
+++ b/lib/unicore/ArabLnkGrp.pl
@@ -2,6 +2,7 @@
 # This file is built by mktables.PL from e.g. Unicode.txt.
 # Any changes made here will be lost!
 return <<'END';
+0621		<no shaping>
 0622	0623	ALEF
 0624		WAW
 0625		ALEF
@@ -61,7 +62,7 @@ return <<'END';
 06cf		WAW
 06d0	06d1	YEH
 06d2	06d3	YEH BARREE
-06d5		<no shaping>
+06d5		TEH MARBUTA
 06fa		SEEN
 06fb		SAD
 06fc		AIN
@@ -90,4 +91,5 @@ return <<'END';
 072a		DALATH RISH
 072b		SHIN
 072c		TAW
+200d		<no shaping>
 END
diff --git a/lib/unicore/ArabShap.txt b/lib/unicore/ArabShap.txt
index 9b60290e62..fc035d5d68 100644
--- a/lib/unicore/ArabShap.txt
+++ b/lib/unicore/ArabShap.txt
@@ -1,4 +1,4 @@
-# ArabicShaping-3.txt
+# ArabicShaping-4.txt
 #
 # This file is a normative contributory data file in the
 # Unicode Character Database.
@@ -21,12 +21,32 @@
 #   D dual-joining, U non-joining
 # The fourth field defines the joining group.
 #
+#
+# Note: Characters of joining type T and most characters of 
+# joining type U are not explicitly listed in this file.
+#
+# Characters of joining type T can derived by the following formula:
+#   T = Mn + Cf - ZWNJ - ZWJ
+# where Mn and Cf are the general category values. In other words,
+# any non-spacing mark or any format control character, except
+# U+200C ZERO WIDTH NON-JOINER (joining type U) and U+200D ZERO WIDTH
+# JOINER (joining type C).
+#
+# For an explicit listing of characters of joining type T, see
+# the derived property file DerivedJoiningType.txt.
+#
+# There are currently no characters of type L defined in Unicode.
+#
+# Joining type U includes all characters which are neither joining
+# type T, nor explicitly marked in this file as types R, L, D, or C.
+#
 # #############################################################
  
 # Unicode; Schematic Name; Joining Type; Joining Group
 
 # Arabic characters
 
+0621; HAMZA; U; <no shaping>
 0622; MADDA ON ALEF; R; ALEF
 0623; HAMZA ON ALEF; R; ALEF
 0624; HAMZA ON WAW; R; WAW
@@ -162,7 +182,7 @@
 06D1; YEH WITH 3 DOTS BELOW; D; YEH
 06D2; YEH BARREE; R; YEH BARREE
 06D3; HAMZA ON YEH BARREE; R; YEH BARREE
-06D5; AE; U; <no shaping>
+06D5; AE; R; TEH MARBUTA
 06FA; SEEN WITH DOT BELOW AND 3 DOTS ABOVE; D; SEEN
 06FB; DAD WITH DOT BELOW; D; SAD
 06FC; GHAIN WITH DOT BELOW; D; AIN
@@ -197,3 +217,7 @@
 072A; RISH; R; DALATH RISH
 072B; SHIN; D; SHIN
 072C; TAW; R; TAW
+
+# Other
+
+200D; ZERO WIDTH JOINER; C; <no shaping>
diff --git a/lib/unicore/CaseFold.txt b/lib/unicore/CaseFold.txt
index c8361e8ba1..b05f8d0e6e 100644
--- a/lib/unicore/CaseFold.txt
+++ b/lib/unicore/CaseFold.txt
@@ -1,4 +1,4 @@
-# CaseFolding-3.txt
+# CaseFolding-4.txt
 #
 # Case Folding Properties
 #
@@ -8,8 +8,8 @@
 # case differences (according to UnicodeData.txt and SpecialCasing.txt)
 # are eliminated.
 #
-# The data supports both implemenations that require simple case foldings
-# (where string lengths don't change), and implemenations that allow full case folding
+# The data supports both implementations that require simple case foldings
+# (where string lengths don't change), and implementations that allow full case folding
 # (where string lengths may grow). Note that where they can be supported, the
 # full case foldings are superior: for example, they allow "MASSE" and "Ma�e" to match.
 #
diff --git a/lib/unicore/EAWidth.txt b/lib/unicore/EAWidth.txt
index e11f544efb..cfec5f6460 100644
--- a/lib/unicore/EAWidth.txt
+++ b/lib/unicore/EAWidth.txt
@@ -1,4 +1,4 @@
-# EastAsianWidth-4.txt
+# EastAsianWidth-5.txt
 #
 # East Asian Width Properties
 #
@@ -190,7 +190,7 @@
 00AB;N # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
 00AC;Na # NOT SIGN
 00AD;A # SOFT HYPHEN
-00AE;N # REGISTERED SIGN
+00AE;A # REGISTERED SIGN
 00AF;Na # MACRON
 00B0;A # DEGREE SIGN
 00B1;A # PLUS-MINUS SIGN
@@ -347,7 +347,7 @@
 0148;A # LATIN SMALL LETTER N WITH CARON
 0149;A # LATIN SMALL LETTER N PRECEDED BY APOSTROPHE
 014A;A # LATIN CAPITAL LETTER ENG
-014B;N # LATIN SMALL LETTER ENG
+014B;A # LATIN SMALL LETTER ENG
 014C;N # LATIN CAPITAL LETTER O WITH MACRON
 014D;A # LATIN SMALL LETTER O WITH MACRON
 014E;N # LATIN CAPITAL LETTER O WITH BREVE
@@ -692,7 +692,7 @@
 02C1;N # MODIFIER LETTER REVERSED GLOTTAL STOP
 02C2;N # MODIFIER LETTER LEFT ARROWHEAD
 02C3;N # MODIFIER LETTER RIGHT ARROWHEAD
-02C4;N # MODIFIER LETTER UP ARROWHEAD
+02C4;A # MODIFIER LETTER UP ARROWHEAD
 02C5;N # MODIFIER LETTER DOWN ARROWHEAD
 02C6;N # MODIFIER LETTER CIRCUMFLEX ACCENT
 02C7;A # CARON
@@ -719,7 +719,7 @@
 02DC;N # SMALL TILDE
 02DD;A # DOUBLE ACUTE ACCENT
 02DE;N # MODIFIER LETTER RHOTIC HOOK
-02DF;N # MODIFIER LETTER CROSS ACCENT
+02DF;A # MODIFIER LETTER CROSS ACCENT
 02E0;N # MODIFIER LETTER SMALL GAMMA
 02E1;N # MODIFIER LETTER SMALL L
 02E2;N # MODIFIER LETTER SMALL S
@@ -5147,9 +5147,9 @@
 201F;N # DOUBLE HIGH-REVERSED-9 QUOTATION MARK
 2020;A # DAGGER
 2021;A # DOUBLE DAGGER
-2022;N # BULLET
+2022;A # BULLET
 2023;N # TRIANGULAR BULLET
-2024;N # ONE DOT LEADER
+2024;A # ONE DOT LEADER
 2025;A # TWO DOT LEADER
 2026;A # HORIZONTAL ELLIPSIS
 2027;A # HYPHENATION POINT
@@ -5175,7 +5175,7 @@
 203B;A # REFERENCE MARK
 203C;N # DOUBLE EXCLAMATION MARK
 203D;N # INTERROBANG
-203E;N # OVERLINE
+203E;A # OVERLINE
 203F;N # UNDERTIE
 2040;N # CHARACTER TIE
 2041;N # CARET INSERTION POINT
@@ -5282,7 +5282,7 @@
 2113;A # SCRIPT SMALL L
 2114;N # L B BAR SYMBOL
 2115;N # DOUBLE-STRUCK CAPITAL N
-2116;N # NUMERO SIGN
+2116;A # NUMERO SIGN
 2117;N # SOUND RECORDING COPYRIGHT
 2118;N # SCRIPT CAPITAL P
 2119;N # DOUBLE-STRUCK CAPITAL P
@@ -5319,7 +5319,7 @@
 2138;N # DALET SYMBOL
 2139;N # INFORMATION SOURCE
 213A;N # ROTATED CAPITAL Q
-2153;N # VULGAR FRACTION ONE THIRD
+2153;A # VULGAR FRACTION ONE THIRD
 2154;A # VULGAR FRACTION TWO THIRDS
 2155;A # VULGAR FRACTION ONE FIFTH
 2156;N # VULGAR FRACTION TWO FIFTHS
@@ -5328,8 +5328,8 @@
 2159;N # VULGAR FRACTION ONE SIXTH
 215A;N # VULGAR FRACTION FIVE SIXTHS
 215B;A # VULGAR FRACTION ONE EIGHTH
-215C;N # VULGAR FRACTION THREE EIGHTHS
-215D;N # VULGAR FRACTION FIVE EIGHTHS
+215C;A # VULGAR FRACTION THREE EIGHTHS
+215D;A # VULGAR FRACTION FIVE EIGHTHS
 215E;A # VULGAR FRACTION SEVEN EIGHTHS
 215F;N # FRACTION NUMERATOR ONE
 2160;A # ROMAN NUMERAL ONE
@@ -5408,8 +5408,8 @@
 21B5;N # DOWNWARDS ARROW WITH CORNER LEFTWARDS
 21B6;N # ANTICLOCKWISE TOP SEMICIRCLE ARROW
 21B7;N # CLOCKWISE TOP SEMICIRCLE ARROW
-21B8;N # NORTH WEST ARROW TO LONG BAR
-21B9;N # LEFTWARDS ARROW TO BAR OVER RIGHTWARDS ARROW TO BAR
+21B8;A # NORTH WEST ARROW TO LONG BAR
+21B9;A # LEFTWARDS ARROW TO BAR OVER RIGHTWARDS ARROW TO BAR
 21BA;N # ANTICLOCKWISE OPEN CIRCLE ARROW
 21BB;N # CLOCKWISE OPEN CIRCLE ARROW
 21BC;N # LEFTWARDS HARPOON WITH BARB UPWARDS
@@ -5455,7 +5455,7 @@
 21E4;N # LEFTWARDS ARROW TO BAR
 21E5;N # RIGHTWARDS ARROW TO BAR
 21E6;N # LEFTWARDS WHITE ARROW
-21E7;N # UPWARDS WHITE ARROW
+21E7;A # UPWARDS WHITE ARROW
 21E8;N # RIGHTWARDS WHITE ARROW
 21E9;N # DOWNWARDS WHITE ARROW
 21EA;N # UPWARDS WHITE ARROW FROM BAR
@@ -6456,7 +6456,7 @@
 273A;N # SIXTEEN POINTED ASTERISK
 273B;N # TEARDROP-SPOKED ASTERISK
 273C;N # OPEN CENTRE TEARDROP-SPOKED ASTERISK
-273D;N # HEAVY TEARDROP-SPOKED ASTERISK
+273D;A # HEAVY TEARDROP-SPOKED ASTERISK
 273E;N # SIX PETALLED BLACK AND WHITE FLORETTE
 273F;N # BLACK FLORETTE
 2740;N # WHITE FLORETTE
diff --git a/lib/unicore/PropList.html b/lib/unicore/PropList.html
index 665fc67d54..508a533f3d 100644
--- a/lib/unicore/PropList.html
+++ b/lib/unicore/PropList.html
@@ -38,7 +38,7 @@ content="unicode, normalization, composition, decomposition">
   <tbody>
     <tr>
       <td valign="top" width="144">Revision</td>
-      <td valign="top">3.1.0</td>
+      <td valign="top">3.1.1</td>
     </tr>
     <tr>
       <td valign="top" width="144">Authors</td>
@@ -46,12 +46,12 @@ content="unicode, normalization, composition, decomposition">
     </tr>
     <tr>
       <td valign="top" width="144">Date</td>
-      <td valign="top">2001-02-28</td>
+      <td valign="top">2001-07-12</td>
     </tr>
     <tr>
       <td valign="top" width="144">This Version</td>
       <td valign="top"><a
-        href="http://www.unicode.org/Public/3.1-Update/PropList-3.1.0.html">http://www.unicode.org/Public/3.1-Update/PropList-3.1.0.html</a></td>
+        href="http://www.unicode.org/Public/3.1-Update1/PropList-3.1.1.html">http://www.unicode.org/Public/3.1-Update1/PropList-3.1.1.html</a></td>
     </tr>
     <tr>
       <td valign="top" width="144">Previous Version</td>
@@ -128,6 +128,12 @@ the following table.</p>
         functions for control of cursive joining and ligation.</td>
     </tr>
     <tr>
+      <th valign="top">ASCII_Hex_Digit</th>
+      <th valign="top">N</th>
+      <td valign="top">ASCII characters commonly used for the representation of 
+        hexadecimal numbers.</td>
+    </tr>
+    <tr>
       <th valign="top">Dash</th>
       <th valign="top">I</th>
       <td valign="top">Those punctuation characters explicitly called out as 
diff --git a/lib/unicore/PropList.txt b/lib/unicore/PropList.txt
index 7c833ae77c..ebb4e7880b 100644
--- a/lib/unicore/PropList.txt
+++ b/lib/unicore/PropList.txt
@@ -1,8 +1,8 @@
-# PropList-3.1.0.txt
+# PropList-3.1.1.txt
 #
 # Unicode Character Database: Extended Properties
 # For documentation, see PropList.html
-# Date: 2001-03-02 00:06:33.9 GMT [MD]
+# Date: 2001-07-12 14:15:00.0 PDT [KW]
 # Note: Unassigned and Noncharacter codepoints are omitted,
 #       except when listing Noncharacter or Cn.
 # ================================================
@@ -243,9 +243,16 @@ FF5D          ; Other_Math # Pe       FULLWIDTH RIGHT CURLY BRACKET
 FF10..FF19    ; Hex_Digit # Nd  [10] FULLWIDTH DIGIT ZERO..FULLWIDTH DIGIT NINE
 FF21..FF26    ; Hex_Digit # L&   [6] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LATIN CAPITAL LETTER F
 FF41..FF46    ; Hex_Digit # L&   [6] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN SMALL LETTER F
-1D7CE..1D7FF  ; Hex_Digit # Nd  [50] MATHEMATICAL BOLD DIGIT ZERO..MATHEMATICAL MONOSPACE DIGIT NINE
 
-# Total code points: 94
+# Total code points: 44
+
+# ================================================
+
+0030..0039    ; ASCII_Hex_Digit # Nd  [10] DIGIT ZERO..DIGIT NINE
+0041..0046    ; ASCII_Hex_Digit # L&   [6] LATIN CAPITAL LETTER A..LATIN CAPITAL LETTER F
+0061..0066    ; ASCII_Hex_Digit # L&   [6] LATIN SMALL LETTER A..LATIN SMALL LETTER F
+
+# Total code points: 22
 
 # ================================================
 
diff --git a/lib/unicore/README.perl b/lib/unicore/README.perl
index f4c568472a..f6ac702238 100644
--- a/lib/unicore/README.perl
+++ b/lib/unicore/README.perl
@@ -6,8 +6,24 @@ and most of them were renamed to better fit 8.3 filename limitations,
 by which the Perl distribution tries to live.  The renamings are listed
 in the file 'rename'.
 
-The two big files, NormalizationTest.txt (1.7MB) and Unihan.txt (15.8MB)
-were not copied due to space considerations.
+The two big files, NormalizationTest.txt (2.0MB) and Unihan.txt (15.8MB)
+were not copied due to space considerations.  Also not included are the
+derived files:
+
+	DerivedBidiClass.txt
+	DerivedBinaryProperties.txt
+	DerivedCombiningClass.txt
+	DerivedCoreProperties.txt
+	DerivedDecompositionType.txt
+	DerivedEastAsianWidth.txt
+	DerivedGeneralCategory.txt
+	DerivedJoiningGroup.txt
+	DerivedJoiningType.txt
+	DerivedLineBreak.txt
+	DerivedNormalizationProperties.txt
+	DerivedNumericType.txt
+	DerivedNumericValues.txt
+	DerivedProperties.html
 
 The *.pl files are generated from these files by the 'mktables.PL' script.
 
diff --git a/lib/unicore/ReadMe.txt b/lib/unicore/ReadMe.txt
index 4e100a28f8..2c190c6948 100644
--- a/lib/unicore/ReadMe.txt
+++ b/lib/unicore/ReadMe.txt
@@ -1,4 +1,4 @@
-March 30, 2001
+August 10, 2001
 
 This directory contains the Unicode Character Database
 data files.
@@ -6,10 +6,12 @@ data files.
 Currently, the Unicode Character Database files are at
 the version level:
 
-   Unicode Standard, Version 3.1.
+   Unicode Standard, Version 3.1.1
 
 For information about the standard itself, see
 UAX #27, Unicode 3.1. <http://www.unicode.org/unicode/reports/tr27/>
+and the Unicode 3.1.1 Update Notice.
+<http://www.unicode.org/versions/Unicode3.1.1.html>
 
 Detailed documentation of the files constituting the
 Unicode Character Database (contributory data files for
@@ -20,32 +22,18 @@ for specific details about particular files or sets of
 files.
 
 Unihan.txt is a very large file. For convenience, the current
-Unicode 3.1 version of Unihan.txt is also available in
-three compressed formats in the Unicode 3.1 update directory.
-See: <http://www.unicode.org/Public/3.1-Update/> or
-<ftp://ftp.unicode.org/Public/3.1-Update/>
+Unicode 3.1.1 version of Unihan.txt is also available in
+two compressed formats in the Unicode 3.1.1 update directory.
+See: <http://www.unicode.org/Public/3.1-Update1/> or
+<ftp://ftp.unicode.org/Public/3.1-Update1/>
 
-Unihan-3.1.zip for Windows. (Use winzip)
-Unihan-3.1.Z   for Unix.    (Use uncompress)
-Unihan-3.1.gz  for Unix.    (Use gzip)
-
-The Unicode 3.1 update directory also contains a winzip
-version of all the other contributory data files for
-that update directory. That file is named:
-
-UCDwithoutUnihan.zip        (Use winzip)
-
-UCDwithoutUnihan.zip is a convenient way to download
-most of the Unicode Character Database files, but to
-be complete, be sure to also get ArabicShaping.txt,
-BidiMirroring.txt, and Jamo.txt, which were unchanged
-between Unicode 3.0.1 and Unicode 3.1, and which were
-therefore not included in the 3.1-Update directory.
+Unihan-3.1.1.zip for Windows. (Use winzip)
+Unihan-3.1.1.txt.gz  for Unix.    (Use gzip or gunzip)
 
 Note that the files are zipped in
 exactly the same format they have on the server (with Unix
 line endings). From a browser, right-clicking on 
-UCDwithoutUnihan.zip will allow automatic download and unzipping on a
+Unihan-3.1.1.zip will allow automatic download and unzip on a
 Windows system with winzip installed.
 
 
diff --git a/lib/unicore/SpecCase.txt b/lib/unicore/SpecCase.txt
index aead3fff45..833c3352f7 100644
--- a/lib/unicore/SpecCase.txt
+++ b/lib/unicore/SpecCase.txt
@@ -1,4 +1,4 @@
-# SpecialCasing-4.txt
+# SpecialCasing-5.txt
 #
 # Special Casing Properties
 #
@@ -20,20 +20,33 @@
 # one character, they are separated by spaces.
 #
 # The <condition_list> is optional. Where present, it consists of one or more locales or contexts,
-# separated by spaces.
-#  A condition list overrides the normal behavior if all of the listed conditions are true.
-#  Case distinctions in the condition list are not significant.
-#  Conditions preceded by "NON_" represent the negation of the condition
+# separated by spaces. In these conditions:
+# - A condition list overrides the normal behavior if all of the listed conditions are true.
+# - Case distinctions in the condition list are not significant.
+# - Conditions preceded by "NOT_" represent the negation of the condition.
+# - A cased letter is any character with general category = Ll or Lo or Lt
+# - An ignorable sequence is a sequence of *zero* or more characters from
+#    the set {HYPHEN, SOFT HYPHEN, general category = Mn}.
 #
 # A locale is defined as:
 # <locale> := <ISO_639_code> ( "_" <ISO_3166_code> ( "_" <variant> )? )?
 # <ISO_3166_code> := 2-letter ISO country code,
 # <ISO_639_code> :=  2-letter ISO language code
 #
-# A context is one of the following choices:
-#   FINAL:  The letter is not followed by a letter of general category L* (e.g. Ll, Lt, Lu, Lm, or Lo).
-#   MODERN: The mapping is only used for modern text.
-#   AFTER_i: The last base character was "i" 0069
+# A context is a locale or one of the following choices:
+#   CFINAL:      The character is not followed by a sequence consisting of
+#                an ignorable sequence and then a cased letter.
+#   CINITIAL:    The character is not preceded by a sequence consisting of
+#                a cased letter and an ignorable sequence.
+#   FINAL_SIGMA: CFINAL and NOT_CINITIAL
+#   TYPE_i:      The character is "i" (0069), "j" (006A),
+#                or has a canonical decomposition that begins with an "i" or "j"
+#                but has no combining characters above (i.e., i-ogonek (012F),
+#                i-tilde-below (1E2D), or i-dot-below (1ECB)).
+#   AFTER_i:     The last preceding base character was TYPE_i, and
+#                no combining character class 230 (above) has intervened.
+#   MORE_ABOVE:  The character is followed by one or more characters of
+#                combining class 230 (ABOVE) in the combining character sequence
 #
 # Other than as used to separate elements, spaces are to be ignored.
 #
@@ -185,7 +198,7 @@ FB17; FB17; 0544 056D; 0544 053D; # ARMENIAN SMALL LIGATURE MEN XEH
 
 # Special case for final form of sigma
 
-03A3; 03C2; 03A3; 03A3; FINAL; # GREEK CAPITAL LETTER SIGMA
+03A3; 03C2; 03A3; 03A3; FINAL_SIGMA; # GREEK CAPITAL LETTER SIGMA
 
 # Note: the following cases for non-final are already in the UnicodeData file.
 
@@ -195,18 +208,45 @@ FB17; FB17; 0544 056D; 0544 053D; # ARMENIAN SMALL LIGATURE MEN XEH
 
 # Note: the following cases are not included, since they would case-fold in lowercasing
 
-# 03C3; 03C2; 03A3; 03A3; FINAL; # GREEK SMALL LETTER SIGMA
-# 03C2; 03C3; 03A3; 03A3; NON_FINAL; # GREEK SMALL LETTER FINAL SIGMA
+# 03C3; 03C2; 03A3; 03A3; FINAL_SIGMA; # GREEK SMALL LETTER SIGMA
+# 03C2; 03C3; 03A3; 03A3; NOT_FINAL_SIGMA; # GREEK SMALL LETTER FINAL SIGMA
 
 # ================================================================================
 # Locale-sensitive mappings
 # ================================================================================
 
-# Lithuanian 
+# Lithuanian
 
-0307; 0307; ; ; lt AFTER_i; # Remove DOT ABOVE after "i" with upper or titlecase
+# Lithuanian retains the dot in a lowercase i when followed by accents.
 
-# Turkish, Azeri
+# Remove DOT ABOVE after "i" with upper or titlecase
+
+0307; 0307; ; ; lt AFTER_i # COMBINING DOT ABOVE
+
+# Introduce an explicit dot above when lowercasing capital I's and J's
+# whenever there are more accents above
+# (of the accents used in Lithuanian: grave, acute, tilde above, and ogonek)
+
+0049; 0069 0307; 0049; 0049; lt MORE_ABOVE # LATIN CAPITAL LETTER I
+004A; 006A 0307; 004A; 004A; lt MORE_ABOVE # LATIN CAPITAL LETTER J
+012E; 012F 0307; 012E; 012E; lt MORE_ABOVE # LATIN CAPITAL LETTER I WITH OGONEK
+00CC; 0069 0307 0300; 00CC; 00CC; lt # LATIN CAPITAL LETTER I WITH GRAVE
+00CD; 0069 0307 0301; 00CD; 00CD; lt # LATIN CAPITAL LETTER I WITH ACUTE
+0128; 0069 0307 0303; 0128; 0128; lt # LATIN CAPITAL LETTER I WITH TILDE
+
+# ================================================================================
+
+# Turkish and Azeri
+
+# I and i-dotless; I-dot and i are case pairs in Turkish and Azeri
+# The following rules handle those cases.
+
+# Remove spurious dot above small i's when lowercasing, if there are no more accents above:
+
+0307; ; 0307; 0307; tr AFTER_i NOT_MORE_ABOVE # COMBINING DOT ABOVE
+0307; ; 0307; 0307; az AFTER_i NOT_MORE_ABOVE # COMBINING DOT ABOVE
+
+# Fix case pairs
 
 0049; 0131; 0049; 0049; tr; # LATIN CAPITAL LETTER I
 0069; 0069; 0130; 0130; tr; # LATIN SMALL LETTER I
diff --git a/lib/unicore/rename b/lib/unicore/rename
index 71c18575aa..d9e6051e18 100644
--- a/lib/unicore/rename
+++ b/lib/unicore/rename
@@ -1,18 +1,20 @@
-ArabicShaping.txt		ArabShap.txt
-BidiMirroring.txt		BidiMirr.txt
-#Blocks.txt			Blocks.txt
-CaseFolding.txt			CaseFold.txt
-CompositionExclusions.txt	CompExcl.txt
-EastAsianWidth.txt		EAWidth.txt
-#Index.txt			Index.txt
-#Jamo.txt			Jamo.txt
-LineBreak.txt			LineBrk.txt
-#NamesList.html			NamesList.html
-#NamesList.txt			NamesList.txt
-#PropList.txt			PropList.txt
-#PropList.html			PropList.html
-#ReadMe.txt			ReadMe.txt
-SpecialCasing.txt		SpecCase.txt
-UnicodeCharacterDatabase.html	UCD.html
-UnicodeData.html		Unicode.html
-UnicodeData.txt			Unicode.txt
+#!/bin/sh
+
+mv ArabicShaping.txt			ArabShap.txt
+mv BidiMirroring.txt			BidiMirr.txt
+#Blocks.txt				Blocks.txt
+mv CaseFolding.txt			CaseFold.txt
+mv CompositionExclusions.txt		CompExcl.txt
+mv EastAsianWidth.txt			EAWidth.txt
+#Index.txt				Index.txt
+#Jamo.txt				Jamo.txt
+mv LineBreak.txt			LineBrk.txt
+#NamesList.html				NamesList.html
+#NamesList.txt				NamesList.txt
+#PropList.txt				PropList.txt
+#PropList.html				PropList.html
+#ReadMe.txt				ReadMe.txt
+mv SpecialCasing.txt			SpecCase.txt
+mv UnicodeCharacterDatabase.html	UCD.html
+mv UnicodeData.html			Unicode.html
+mv UnicodeData.txt			Unicode.txt
diff --git a/lib/unicore/version b/lib/unicore/version
index 8c50098d8a..94ff29cc4d 100644
--- a/lib/unicore/version
+++ b/lib/unicore/version
@@ -1 +1 @@
-3.1
+3.1.1
author	Jarkko Hietaniemi <jhi@iki.fi>	2001-09-02 11:16:24 +0000
committer	Jarkko Hietaniemi <jhi@iki.fi>	2001-09-02 11:16:24 +0000
commit	afc46004557cada88060a20d235b3f5e6303a4ac (patch)
tree	1bd79b3f394b42ffde28f6658fd8071639d349fc
parent	78bf21c2b5382395f2e75b313393c17f529af2e0 (diff)
download	perl-afc46004557cada88060a20d235b3f5e6303a4ac.tar.gz