summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJarkko Hietaniemi <jhi@iki.fi>2001-09-02 11:16:24 +0000
committerJarkko Hietaniemi <jhi@iki.fi>2001-09-02 11:16:24 +0000
commitafc46004557cada88060a20d235b3f5e6303a4ac (patch)
tree1bd79b3f394b42ffde28f6658fd8071639d349fc
parent78bf21c2b5382395f2e75b313393c17f529af2e0 (diff)
downloadperl-afc46004557cada88060a20d235b3f5e6303a4ac.tar.gz
Update to Unicode 3.1.1.
p4raw-id: //depot/perl@11825
-rw-r--r--lib/unicore/ArabLink.pl4
-rw-r--r--lib/unicore/ArabLnkGrp.pl4
-rw-r--r--lib/unicore/ArabShap.txt28
-rw-r--r--lib/unicore/CaseFold.txt6
-rw-r--r--lib/unicore/EAWidth.txt32
-rw-r--r--lib/unicore/PropList.html12
-rw-r--r--lib/unicore/PropList.txt15
-rw-r--r--lib/unicore/README.perl20
-rw-r--r--lib/unicore/ReadMe.txt34
-rw-r--r--lib/unicore/SpecCase.txt70
-rw-r--r--lib/unicore/rename38
-rw-r--r--lib/unicore/version2
12 files changed, 176 insertions, 89 deletions
diff --git a/lib/unicore/ArabLink.pl b/lib/unicore/ArabLink.pl
index 4db2e62dc2..61fa496d32 100644
--- a/lib/unicore/ArabLink.pl
+++ b/lib/unicore/ArabLink.pl
@@ -2,6 +2,7 @@
# This file is built by mktables.PL from e.g. Unicode.txt.
# Any changes made here will be lost!
return <<'END';
+0621 U
0622 0625 R
0626 D
0627 R
@@ -29,7 +30,7 @@ return <<'END';
06cf R
06d0 06d1 D
06d2 06d3 R
-06d5 U
+06d5 R
06fa 06fc D
0710 R
0712 0714 D
@@ -42,4 +43,5 @@ return <<'END';
072a R
072b D
072c R
+200d C
END
diff --git a/lib/unicore/ArabLnkGrp.pl b/lib/unicore/ArabLnkGrp.pl
index ff4220d4b3..c293d9fa5a 100644
--- a/lib/unicore/ArabLnkGrp.pl
+++ b/lib/unicore/ArabLnkGrp.pl
@@ -2,6 +2,7 @@
# This file is built by mktables.PL from e.g. Unicode.txt.
# Any changes made here will be lost!
return <<'END';
+0621 <no shaping>
0622 0623 ALEF
0624 WAW
0625 ALEF
@@ -61,7 +62,7 @@ return <<'END';
06cf WAW
06d0 06d1 YEH
06d2 06d3 YEH BARREE
-06d5 <no shaping>
+06d5 TEH MARBUTA
06fa SEEN
06fb SAD
06fc AIN
@@ -90,4 +91,5 @@ return <<'END';
072a DALATH RISH
072b SHIN
072c TAW
+200d <no shaping>
END
diff --git a/lib/unicore/ArabShap.txt b/lib/unicore/ArabShap.txt
index 9b60290e62..fc035d5d68 100644
--- a/lib/unicore/ArabShap.txt
+++ b/lib/unicore/ArabShap.txt
@@ -1,4 +1,4 @@
-# ArabicShaping-3.txt
+# ArabicShaping-4.txt
#
# This file is a normative contributory data file in the
# Unicode Character Database.
@@ -21,12 +21,32 @@
# D dual-joining, U non-joining
# The fourth field defines the joining group.
#
+#
+# Note: Characters of joining type T and most characters of
+# joining type U are not explicitly listed in this file.
+#
+# Characters of joining type T can derived by the following formula:
+# T = Mn + Cf - ZWNJ - ZWJ
+# where Mn and Cf are the general category values. In other words,
+# any non-spacing mark or any format control character, except
+# U+200C ZERO WIDTH NON-JOINER (joining type U) and U+200D ZERO WIDTH
+# JOINER (joining type C).
+#
+# For an explicit listing of characters of joining type T, see
+# the derived property file DerivedJoiningType.txt.
+#
+# There are currently no characters of type L defined in Unicode.
+#
+# Joining type U includes all characters which are neither joining
+# type T, nor explicitly marked in this file as types R, L, D, or C.
+#
# #############################################################
# Unicode; Schematic Name; Joining Type; Joining Group
# Arabic characters
+0621; HAMZA; U; <no shaping>
0622; MADDA ON ALEF; R; ALEF
0623; HAMZA ON ALEF; R; ALEF
0624; HAMZA ON WAW; R; WAW
@@ -162,7 +182,7 @@
06D1; YEH WITH 3 DOTS BELOW; D; YEH
06D2; YEH BARREE; R; YEH BARREE
06D3; HAMZA ON YEH BARREE; R; YEH BARREE
-06D5; AE; U; <no shaping>
+06D5; AE; R; TEH MARBUTA
06FA; SEEN WITH DOT BELOW AND 3 DOTS ABOVE; D; SEEN
06FB; DAD WITH DOT BELOW; D; SAD
06FC; GHAIN WITH DOT BELOW; D; AIN
@@ -197,3 +217,7 @@
072A; RISH; R; DALATH RISH
072B; SHIN; D; SHIN
072C; TAW; R; TAW
+
+# Other
+
+200D; ZERO WIDTH JOINER; C; <no shaping>
diff --git a/lib/unicore/CaseFold.txt b/lib/unicore/CaseFold.txt
index c8361e8ba1..b05f8d0e6e 100644
--- a/lib/unicore/CaseFold.txt
+++ b/lib/unicore/CaseFold.txt
@@ -1,4 +1,4 @@
-# CaseFolding-3.txt
+# CaseFolding-4.txt
#
# Case Folding Properties
#
@@ -8,8 +8,8 @@
# case differences (according to UnicodeData.txt and SpecialCasing.txt)
# are eliminated.
#
-# The data supports both implemenations that require simple case foldings
-# (where string lengths don't change), and implemenations that allow full case folding
+# The data supports both implementations that require simple case foldings
+# (where string lengths don't change), and implementations that allow full case folding
# (where string lengths may grow). Note that where they can be supported, the
# full case foldings are superior: for example, they allow "MASSE" and "Maße" to match.
#
diff --git a/lib/unicore/EAWidth.txt b/lib/unicore/EAWidth.txt
index e11f544efb..cfec5f6460 100644
--- a/lib/unicore/EAWidth.txt
+++ b/lib/unicore/EAWidth.txt
@@ -1,4 +1,4 @@
-# EastAsianWidth-4.txt
+# EastAsianWidth-5.txt
#
# East Asian Width Properties
#
@@ -190,7 +190,7 @@
00AB;N # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
00AC;Na # NOT SIGN
00AD;A # SOFT HYPHEN
-00AE;N # REGISTERED SIGN
+00AE;A # REGISTERED SIGN
00AF;Na # MACRON
00B0;A # DEGREE SIGN
00B1;A # PLUS-MINUS SIGN
@@ -347,7 +347,7 @@
0148;A # LATIN SMALL LETTER N WITH CARON
0149;A # LATIN SMALL LETTER N PRECEDED BY APOSTROPHE
014A;A # LATIN CAPITAL LETTER ENG
-014B;N # LATIN SMALL LETTER ENG
+014B;A # LATIN SMALL LETTER ENG
014C;N # LATIN CAPITAL LETTER O WITH MACRON
014D;A # LATIN SMALL LETTER O WITH MACRON
014E;N # LATIN CAPITAL LETTER O WITH BREVE
@@ -692,7 +692,7 @@
02C1;N # MODIFIER LETTER REVERSED GLOTTAL STOP
02C2;N # MODIFIER LETTER LEFT ARROWHEAD
02C3;N # MODIFIER LETTER RIGHT ARROWHEAD
-02C4;N # MODIFIER LETTER UP ARROWHEAD
+02C4;A # MODIFIER LETTER UP ARROWHEAD
02C5;N # MODIFIER LETTER DOWN ARROWHEAD
02C6;N # MODIFIER LETTER CIRCUMFLEX ACCENT
02C7;A # CARON
@@ -719,7 +719,7 @@
02DC;N # SMALL TILDE
02DD;A # DOUBLE ACUTE ACCENT
02DE;N # MODIFIER LETTER RHOTIC HOOK
-02DF;N # MODIFIER LETTER CROSS ACCENT
+02DF;A # MODIFIER LETTER CROSS ACCENT
02E0;N # MODIFIER LETTER SMALL GAMMA
02E1;N # MODIFIER LETTER SMALL L
02E2;N # MODIFIER LETTER SMALL S
@@ -5147,9 +5147,9 @@
201F;N # DOUBLE HIGH-REVERSED-9 QUOTATION MARK
2020;A # DAGGER
2021;A # DOUBLE DAGGER
-2022;N # BULLET
+2022;A # BULLET
2023;N # TRIANGULAR BULLET
-2024;N # ONE DOT LEADER
+2024;A # ONE DOT LEADER
2025;A # TWO DOT LEADER
2026;A # HORIZONTAL ELLIPSIS
2027;A # HYPHENATION POINT
@@ -5175,7 +5175,7 @@
203B;A # REFERENCE MARK
203C;N # DOUBLE EXCLAMATION MARK
203D;N # INTERROBANG
-203E;N # OVERLINE
+203E;A # OVERLINE
203F;N # UNDERTIE
2040;N # CHARACTER TIE
2041;N # CARET INSERTION POINT
@@ -5282,7 +5282,7 @@
2113;A # SCRIPT SMALL L
2114;N # L B BAR SYMBOL
2115;N # DOUBLE-STRUCK CAPITAL N
-2116;N # NUMERO SIGN
+2116;A # NUMERO SIGN
2117;N # SOUND RECORDING COPYRIGHT
2118;N # SCRIPT CAPITAL P
2119;N # DOUBLE-STRUCK CAPITAL P
@@ -5319,7 +5319,7 @@
2138;N # DALET SYMBOL
2139;N # INFORMATION SOURCE
213A;N # ROTATED CAPITAL Q
-2153;N # VULGAR FRACTION ONE THIRD
+2153;A # VULGAR FRACTION ONE THIRD
2154;A # VULGAR FRACTION TWO THIRDS
2155;A # VULGAR FRACTION ONE FIFTH
2156;N # VULGAR FRACTION TWO FIFTHS
@@ -5328,8 +5328,8 @@
2159;N # VULGAR FRACTION ONE SIXTH
215A;N # VULGAR FRACTION FIVE SIXTHS
215B;A # VULGAR FRACTION ONE EIGHTH
-215C;N # VULGAR FRACTION THREE EIGHTHS
-215D;N # VULGAR FRACTION FIVE EIGHTHS
+215C;A # VULGAR FRACTION THREE EIGHTHS
+215D;A # VULGAR FRACTION FIVE EIGHTHS
215E;A # VULGAR FRACTION SEVEN EIGHTHS
215F;N # FRACTION NUMERATOR ONE
2160;A # ROMAN NUMERAL ONE
@@ -5408,8 +5408,8 @@
21B5;N # DOWNWARDS ARROW WITH CORNER LEFTWARDS
21B6;N # ANTICLOCKWISE TOP SEMICIRCLE ARROW
21B7;N # CLOCKWISE TOP SEMICIRCLE ARROW
-21B8;N # NORTH WEST ARROW TO LONG BAR
-21B9;N # LEFTWARDS ARROW TO BAR OVER RIGHTWARDS ARROW TO BAR
+21B8;A # NORTH WEST ARROW TO LONG BAR
+21B9;A # LEFTWARDS ARROW TO BAR OVER RIGHTWARDS ARROW TO BAR
21BA;N # ANTICLOCKWISE OPEN CIRCLE ARROW
21BB;N # CLOCKWISE OPEN CIRCLE ARROW
21BC;N # LEFTWARDS HARPOON WITH BARB UPWARDS
@@ -5455,7 +5455,7 @@
21E4;N # LEFTWARDS ARROW TO BAR
21E5;N # RIGHTWARDS ARROW TO BAR
21E6;N # LEFTWARDS WHITE ARROW
-21E7;N # UPWARDS WHITE ARROW
+21E7;A # UPWARDS WHITE ARROW
21E8;N # RIGHTWARDS WHITE ARROW
21E9;N # DOWNWARDS WHITE ARROW
21EA;N # UPWARDS WHITE ARROW FROM BAR
@@ -6456,7 +6456,7 @@
273A;N # SIXTEEN POINTED ASTERISK
273B;N # TEARDROP-SPOKED ASTERISK
273C;N # OPEN CENTRE TEARDROP-SPOKED ASTERISK
-273D;N # HEAVY TEARDROP-SPOKED ASTERISK
+273D;A # HEAVY TEARDROP-SPOKED ASTERISK
273E;N # SIX PETALLED BLACK AND WHITE FLORETTE
273F;N # BLACK FLORETTE
2740;N # WHITE FLORETTE
diff --git a/lib/unicore/PropList.html b/lib/unicore/PropList.html
index 665fc67d54..508a533f3d 100644
--- a/lib/unicore/PropList.html
+++ b/lib/unicore/PropList.html
@@ -38,7 +38,7 @@ content="unicode, normalization, composition, decomposition">
<tbody>
<tr>
<td valign="top" width="144">Revision</td>
- <td valign="top">3.1.0</td>
+ <td valign="top">3.1.1</td>
</tr>
<tr>
<td valign="top" width="144">Authors</td>
@@ -46,12 +46,12 @@ content="unicode, normalization, composition, decomposition">
</tr>
<tr>
<td valign="top" width="144">Date</td>
- <td valign="top">2001-02-28</td>
+ <td valign="top">2001-07-12</td>
</tr>
<tr>
<td valign="top" width="144">This Version</td>
<td valign="top"><a
- href="http://www.unicode.org/Public/3.1-Update/PropList-3.1.0.html">http://www.unicode.org/Public/3.1-Update/PropList-3.1.0.html</a></td>
+ href="http://www.unicode.org/Public/3.1-Update1/PropList-3.1.1.html">http://www.unicode.org/Public/3.1-Update1/PropList-3.1.1.html</a></td>
</tr>
<tr>
<td valign="top" width="144">Previous Version</td>
@@ -128,6 +128,12 @@ the following table.</p>
functions for control of cursive joining and ligation.</td>
</tr>
<tr>
+ <th valign="top">ASCII_Hex_Digit</th>
+ <th valign="top">N</th>
+ <td valign="top">ASCII characters commonly used for the representation of
+ hexadecimal numbers.</td>
+ </tr>
+ <tr>
<th valign="top">Dash</th>
<th valign="top">I</th>
<td valign="top">Those punctuation characters explicitly called out as
diff --git a/lib/unicore/PropList.txt b/lib/unicore/PropList.txt
index 7c833ae77c..ebb4e7880b 100644
--- a/lib/unicore/PropList.txt
+++ b/lib/unicore/PropList.txt
@@ -1,8 +1,8 @@
-# PropList-3.1.0.txt
+# PropList-3.1.1.txt
#
# Unicode Character Database: Extended Properties
# For documentation, see PropList.html
-# Date: 2001-03-02 00:06:33.9 GMT [MD]
+# Date: 2001-07-12 14:15:00.0 PDT [KW]
# Note: Unassigned and Noncharacter codepoints are omitted,
# except when listing Noncharacter or Cn.
# ================================================
@@ -243,9 +243,16 @@ FF5D ; Other_Math # Pe FULLWIDTH RIGHT CURLY BRACKET
FF10..FF19 ; Hex_Digit # Nd [10] FULLWIDTH DIGIT ZERO..FULLWIDTH DIGIT NINE
FF21..FF26 ; Hex_Digit # L& [6] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LATIN CAPITAL LETTER F
FF41..FF46 ; Hex_Digit # L& [6] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN SMALL LETTER F
-1D7CE..1D7FF ; Hex_Digit # Nd [50] MATHEMATICAL BOLD DIGIT ZERO..MATHEMATICAL MONOSPACE DIGIT NINE
-# Total code points: 94
+# Total code points: 44
+
+# ================================================
+
+0030..0039 ; ASCII_Hex_Digit # Nd [10] DIGIT ZERO..DIGIT NINE
+0041..0046 ; ASCII_Hex_Digit # L& [6] LATIN CAPITAL LETTER A..LATIN CAPITAL LETTER F
+0061..0066 ; ASCII_Hex_Digit # L& [6] LATIN SMALL LETTER A..LATIN SMALL LETTER F
+
+# Total code points: 22
# ================================================
diff --git a/lib/unicore/README.perl b/lib/unicore/README.perl
index f4c568472a..f6ac702238 100644
--- a/lib/unicore/README.perl
+++ b/lib/unicore/README.perl
@@ -6,8 +6,24 @@ and most of them were renamed to better fit 8.3 filename limitations,
by which the Perl distribution tries to live. The renamings are listed
in the file 'rename'.
-The two big files, NormalizationTest.txt (1.7MB) and Unihan.txt (15.8MB)
-were not copied due to space considerations.
+The two big files, NormalizationTest.txt (2.0MB) and Unihan.txt (15.8MB)
+were not copied due to space considerations. Also not included are the
+derived files:
+
+ DerivedBidiClass.txt
+ DerivedBinaryProperties.txt
+ DerivedCombiningClass.txt
+ DerivedCoreProperties.txt
+ DerivedDecompositionType.txt
+ DerivedEastAsianWidth.txt
+ DerivedGeneralCategory.txt
+ DerivedJoiningGroup.txt
+ DerivedJoiningType.txt
+ DerivedLineBreak.txt
+ DerivedNormalizationProperties.txt
+ DerivedNumericType.txt
+ DerivedNumericValues.txt
+ DerivedProperties.html
The *.pl files are generated from these files by the 'mktables.PL' script.
diff --git a/lib/unicore/ReadMe.txt b/lib/unicore/ReadMe.txt
index 4e100a28f8..2c190c6948 100644
--- a/lib/unicore/ReadMe.txt
+++ b/lib/unicore/ReadMe.txt
@@ -1,4 +1,4 @@
-March 30, 2001
+August 10, 2001
This directory contains the Unicode Character Database
data files.
@@ -6,10 +6,12 @@ data files.
Currently, the Unicode Character Database files are at
the version level:
- Unicode Standard, Version 3.1.
+ Unicode Standard, Version 3.1.1
For information about the standard itself, see
UAX #27, Unicode 3.1. <http://www.unicode.org/unicode/reports/tr27/>
+and the Unicode 3.1.1 Update Notice.
+<http://www.unicode.org/versions/Unicode3.1.1.html>
Detailed documentation of the files constituting the
Unicode Character Database (contributory data files for
@@ -20,32 +22,18 @@ for specific details about particular files or sets of
files.
Unihan.txt is a very large file. For convenience, the current
-Unicode 3.1 version of Unihan.txt is also available in
-three compressed formats in the Unicode 3.1 update directory.
-See: <http://www.unicode.org/Public/3.1-Update/> or
-<ftp://ftp.unicode.org/Public/3.1-Update/>
+Unicode 3.1.1 version of Unihan.txt is also available in
+two compressed formats in the Unicode 3.1.1 update directory.
+See: <http://www.unicode.org/Public/3.1-Update1/> or
+<ftp://ftp.unicode.org/Public/3.1-Update1/>
-Unihan-3.1.zip for Windows. (Use winzip)
-Unihan-3.1.Z for Unix. (Use uncompress)
-Unihan-3.1.gz for Unix. (Use gzip)
-
-The Unicode 3.1 update directory also contains a winzip
-version of all the other contributory data files for
-that update directory. That file is named:
-
-UCDwithoutUnihan.zip (Use winzip)
-
-UCDwithoutUnihan.zip is a convenient way to download
-most of the Unicode Character Database files, but to
-be complete, be sure to also get ArabicShaping.txt,
-BidiMirroring.txt, and Jamo.txt, which were unchanged
-between Unicode 3.0.1 and Unicode 3.1, and which were
-therefore not included in the 3.1-Update directory.
+Unihan-3.1.1.zip for Windows. (Use winzip)
+Unihan-3.1.1.txt.gz for Unix. (Use gzip or gunzip)
Note that the files are zipped in
exactly the same format they have on the server (with Unix
line endings). From a browser, right-clicking on
-UCDwithoutUnihan.zip will allow automatic download and unzipping on a
+Unihan-3.1.1.zip will allow automatic download and unzip on a
Windows system with winzip installed.
diff --git a/lib/unicore/SpecCase.txt b/lib/unicore/SpecCase.txt
index aead3fff45..833c3352f7 100644
--- a/lib/unicore/SpecCase.txt
+++ b/lib/unicore/SpecCase.txt
@@ -1,4 +1,4 @@
-# SpecialCasing-4.txt
+# SpecialCasing-5.txt
#
# Special Casing Properties
#
@@ -20,20 +20,33 @@
# one character, they are separated by spaces.
#
# The <condition_list> is optional. Where present, it consists of one or more locales or contexts,
-# separated by spaces.
-# A condition list overrides the normal behavior if all of the listed conditions are true.
-# Case distinctions in the condition list are not significant.
-# Conditions preceded by "NON_" represent the negation of the condition
+# separated by spaces. In these conditions:
+# - A condition list overrides the normal behavior if all of the listed conditions are true.
+# - Case distinctions in the condition list are not significant.
+# - Conditions preceded by "NOT_" represent the negation of the condition.
+# - A cased letter is any character with general category = Ll or Lo or Lt
+# - An ignorable sequence is a sequence of *zero* or more characters from
+# the set {HYPHEN, SOFT HYPHEN, general category = Mn}.
#
# A locale is defined as:
# <locale> := <ISO_639_code> ( "_" <ISO_3166_code> ( "_" <variant> )? )?
# <ISO_3166_code> := 2-letter ISO country code,
# <ISO_639_code> := 2-letter ISO language code
#
-# A context is one of the following choices:
-# FINAL: The letter is not followed by a letter of general category L* (e.g. Ll, Lt, Lu, Lm, or Lo).
-# MODERN: The mapping is only used for modern text.
-# AFTER_i: The last base character was "i" 0069
+# A context is a locale or one of the following choices:
+# CFINAL: The character is not followed by a sequence consisting of
+# an ignorable sequence and then a cased letter.
+# CINITIAL: The character is not preceded by a sequence consisting of
+# a cased letter and an ignorable sequence.
+# FINAL_SIGMA: CFINAL and NOT_CINITIAL
+# TYPE_i: The character is "i" (0069), "j" (006A),
+# or has a canonical decomposition that begins with an "i" or "j"
+# but has no combining characters above (i.e., i-ogonek (012F),
+# i-tilde-below (1E2D), or i-dot-below (1ECB)).
+# AFTER_i: The last preceding base character was TYPE_i, and
+# no combining character class 230 (above) has intervened.
+# MORE_ABOVE: The character is followed by one or more characters of
+# combining class 230 (ABOVE) in the combining character sequence
#
# Other than as used to separate elements, spaces are to be ignored.
#
@@ -185,7 +198,7 @@ FB17; FB17; 0544 056D; 0544 053D; # ARMENIAN SMALL LIGATURE MEN XEH
# Special case for final form of sigma
-03A3; 03C2; 03A3; 03A3; FINAL; # GREEK CAPITAL LETTER SIGMA
+03A3; 03C2; 03A3; 03A3; FINAL_SIGMA; # GREEK CAPITAL LETTER SIGMA
# Note: the following cases for non-final are already in the UnicodeData file.
@@ -195,18 +208,45 @@ FB17; FB17; 0544 056D; 0544 053D; # ARMENIAN SMALL LIGATURE MEN XEH
# Note: the following cases are not included, since they would case-fold in lowercasing
-# 03C3; 03C2; 03A3; 03A3; FINAL; # GREEK SMALL LETTER SIGMA
-# 03C2; 03C3; 03A3; 03A3; NON_FINAL; # GREEK SMALL LETTER FINAL SIGMA
+# 03C3; 03C2; 03A3; 03A3; FINAL_SIGMA; # GREEK SMALL LETTER SIGMA
+# 03C2; 03C3; 03A3; 03A3; NOT_FINAL_SIGMA; # GREEK SMALL LETTER FINAL SIGMA
# ================================================================================
# Locale-sensitive mappings
# ================================================================================
-# Lithuanian
+# Lithuanian
-0307; 0307; ; ; lt AFTER_i; # Remove DOT ABOVE after "i" with upper or titlecase
+# Lithuanian retains the dot in a lowercase i when followed by accents.
-# Turkish, Azeri
+# Remove DOT ABOVE after "i" with upper or titlecase
+
+0307; 0307; ; ; lt AFTER_i # COMBINING DOT ABOVE
+
+# Introduce an explicit dot above when lowercasing capital I's and J's
+# whenever there are more accents above
+# (of the accents used in Lithuanian: grave, acute, tilde above, and ogonek)
+
+0049; 0069 0307; 0049; 0049; lt MORE_ABOVE # LATIN CAPITAL LETTER I
+004A; 006A 0307; 004A; 004A; lt MORE_ABOVE # LATIN CAPITAL LETTER J
+012E; 012F 0307; 012E; 012E; lt MORE_ABOVE # LATIN CAPITAL LETTER I WITH OGONEK
+00CC; 0069 0307 0300; 00CC; 00CC; lt # LATIN CAPITAL LETTER I WITH GRAVE
+00CD; 0069 0307 0301; 00CD; 00CD; lt # LATIN CAPITAL LETTER I WITH ACUTE
+0128; 0069 0307 0303; 0128; 0128; lt # LATIN CAPITAL LETTER I WITH TILDE
+
+# ================================================================================
+
+# Turkish and Azeri
+
+# I and i-dotless; I-dot and i are case pairs in Turkish and Azeri
+# The following rules handle those cases.
+
+# Remove spurious dot above small i's when lowercasing, if there are no more accents above:
+
+0307; ; 0307; 0307; tr AFTER_i NOT_MORE_ABOVE # COMBINING DOT ABOVE
+0307; ; 0307; 0307; az AFTER_i NOT_MORE_ABOVE # COMBINING DOT ABOVE
+
+# Fix case pairs
0049; 0131; 0049; 0049; tr; # LATIN CAPITAL LETTER I
0069; 0069; 0130; 0130; tr; # LATIN SMALL LETTER I
diff --git a/lib/unicore/rename b/lib/unicore/rename
index 71c18575aa..d9e6051e18 100644
--- a/lib/unicore/rename
+++ b/lib/unicore/rename
@@ -1,18 +1,20 @@
-ArabicShaping.txt ArabShap.txt
-BidiMirroring.txt BidiMirr.txt
-#Blocks.txt Blocks.txt
-CaseFolding.txt CaseFold.txt
-CompositionExclusions.txt CompExcl.txt
-EastAsianWidth.txt EAWidth.txt
-#Index.txt Index.txt
-#Jamo.txt Jamo.txt
-LineBreak.txt LineBrk.txt
-#NamesList.html NamesList.html
-#NamesList.txt NamesList.txt
-#PropList.txt PropList.txt
-#PropList.html PropList.html
-#ReadMe.txt ReadMe.txt
-SpecialCasing.txt SpecCase.txt
-UnicodeCharacterDatabase.html UCD.html
-UnicodeData.html Unicode.html
-UnicodeData.txt Unicode.txt
+#!/bin/sh
+
+mv ArabicShaping.txt ArabShap.txt
+mv BidiMirroring.txt BidiMirr.txt
+#Blocks.txt Blocks.txt
+mv CaseFolding.txt CaseFold.txt
+mv CompositionExclusions.txt CompExcl.txt
+mv EastAsianWidth.txt EAWidth.txt
+#Index.txt Index.txt
+#Jamo.txt Jamo.txt
+mv LineBreak.txt LineBrk.txt
+#NamesList.html NamesList.html
+#NamesList.txt NamesList.txt
+#PropList.txt PropList.txt
+#PropList.html PropList.html
+#ReadMe.txt ReadMe.txt
+mv SpecialCasing.txt SpecCase.txt
+mv UnicodeCharacterDatabase.html UCD.html
+mv UnicodeData.html Unicode.html
+mv UnicodeData.txt Unicode.txt
diff --git a/lib/unicore/version b/lib/unicore/version
index 8c50098d8a..94ff29cc4d 100644
--- a/lib/unicore/version
+++ b/lib/unicore/version
@@ -1 +1 @@
-3.1
+3.1.1