summaryrefslogtreecommitdiff
path: root/maint
diff options
context:
space:
mode:
authorchpe <chpe@2f5784b3-3f2a-0410-8824-cb99058d5e15>2012-09-30 18:20:10 +0000
committerchpe <chpe@2f5784b3-3f2a-0410-8824-cb99058d5e15>2012-09-30 18:20:10 +0000
commitf16304072b2fb5a95aae43a59d9f3537ae0b0052 (patch)
treec356e6176fcbbcbe5b91ffe6bc386356155a9c2a /maint
parent3d364d6ac845a11ef1f04c07ddd48911a5117bc0 (diff)
downloadpcre-f16304072b2fb5a95aae43a59d9f3537ae0b0052.tar.gz
unicode: Update to Unicode 6.2
git-svn-id: svn://vcs.exim.org/pcre/code/trunk@1050 2f5784b3-3f2a-0410-8824-cb99058d5e15
Diffstat (limited to 'maint')
-rwxr-xr-xmaint/GenerateUtt.py4
-rwxr-xr-xmaint/MultiStage2.py5
-rw-r--r--maint/Unicode.tables/CaseFolding.txt8
-rw-r--r--maint/Unicode.tables/DerivedGeneralCategory.txt14
-rw-r--r--maint/Unicode.tables/GraphemeBreakProperty.txt12
-rw-r--r--maint/Unicode.tables/Scripts.txt17
-rw-r--r--maint/Unicode.tables/UnicodeData.txt9
7 files changed, 40 insertions, 29 deletions
diff --git a/maint/GenerateUtt.py b/maint/GenerateUtt.py
index e8190ae..92adf47 100755
--- a/maint/GenerateUtt.py
+++ b/maint/GenerateUtt.py
@@ -16,6 +16,8 @@
# Modified by PH 01-March-2010 to add new scripts for Unicode 5.2.0.
# Modified by PH 04-May-2010 to add new "X.." special categories.
# Modified by PH 30-April-2011 to add new scripts for Unicode 6.0.0
+# Modified by ChPe 30-September-2012 to add this note; no other changes were
+# necessary for Unicode 6.2.0 support.
script_names = ['Arabic', 'Armenian', 'Bengali', 'Bopomofo', 'Braille', 'Buginese', 'Buhid', 'Canadian_Aboriginal', \
'Cherokee', 'Common', 'Coptic', 'Cypriot', 'Cyrillic', 'Deseret', 'Devanagari', 'Ethiopic', 'Georgian', \
@@ -101,6 +103,6 @@ for utt in utt_table:
value = 'ucp_' + utt[0]
if utt == utt_table[-1]:
last = ''
- print ' { %3d, %s, %s }%s ' % (offset, utt[1], value, last)
+ print ' { %3d, %s, %s }%s' % (offset, utt[1], value, last)
offset += len(utt[0]) + 1
print '};'
diff --git a/maint/MultiStage2.py b/maint/MultiStage2.py
index 1c5fce7..ed68f23 100755
--- a/maint/MultiStage2.py
+++ b/maint/MultiStage2.py
@@ -107,6 +107,7 @@
# not much bigger than before.
# 18-September-2012: Added code for multiple caseless sets. This uses the
# final hole in the structure.
+# 30-September-2012: Added RegionalIndicator break property from Unicode 6.2.0
##############################################################################
@@ -304,8 +305,8 @@ category_names = ['Cc', 'Cf', 'Cn', 'Co', 'Cs', 'Ll', 'Lm', 'Lo', 'Lt', 'Lu',
'Mc', 'Me', 'Mn', 'Nd', 'Nl', 'No', 'Pc', 'Pd', 'Pe', 'Pf', 'Pi', 'Po', 'Ps',
'Sc', 'Sk', 'Sm', 'So', 'Zl', 'Zp', 'Zs' ]
-break_property_names = ['CR', 'LF', 'Control', 'Extend', 'Prepend',
- 'SpacingMark', 'L', 'V', 'T', 'LV', 'LVT', 'Other' ]
+break_property_names = ['CR', 'LF', 'Control', 'Extend', 'Prepend',
+ 'SpacingMark', 'L', 'V', 'T', 'LV', 'LVT', 'Regional_Indicator', 'Other' ]
test_record_size()
diff --git a/maint/Unicode.tables/CaseFolding.txt b/maint/Unicode.tables/CaseFolding.txt
index 0d9a409..df1813d 100644
--- a/maint/Unicode.tables/CaseFolding.txt
+++ b/maint/Unicode.tables/CaseFolding.txt
@@ -1,8 +1,8 @@
-# CaseFolding-6.1.0.txt
-# Date: 2011-07-25, 21:21:56 GMT [MD]
+# CaseFolding-6.2.0.txt
+# Date: 2012-08-14, 17:54:49 GMT [MD]
#
# Unicode Character Database
-# Copyright (c) 1991-2011 Unicode, Inc.
+# Copyright (c) 1991-2012 Unicode, Inc.
# For terms of use, see http://www.unicode.org/terms_of_use.html
# For documentation, see http://www.unicode.org/reports/tr44/
#
@@ -1222,3 +1222,5 @@ FF3A; C; FF5A; # FULLWIDTH LATIN CAPITAL LETTER Z
10425; C; 1044D; # DESERET CAPITAL LETTER ENG
10426; C; 1044E; # DESERET CAPITAL LETTER OI
10427; C; 1044F; # DESERET CAPITAL LETTER EW
+#
+# EOF
diff --git a/maint/Unicode.tables/DerivedGeneralCategory.txt b/maint/Unicode.tables/DerivedGeneralCategory.txt
index 12a346f..546a677 100644
--- a/maint/Unicode.tables/DerivedGeneralCategory.txt
+++ b/maint/Unicode.tables/DerivedGeneralCategory.txt
@@ -1,8 +1,8 @@
-# DerivedGeneralCategory-6.1.0.txt
-# Date: 2011-11-27, 05:10:22 GMT [MD]
+# DerivedGeneralCategory-6.2.0.txt
+# Date: 2012-05-20, 00:42:34 GMT [MD]
#
# Unicode Character Database
-# Copyright (c) 1991-2011 Unicode, Inc.
+# Copyright (c) 1991-2012 Unicode, Inc.
# For terms of use, see http://www.unicode.org/terms_of_use.html
# For documentation, see http://www.unicode.org/reports/tr44/
@@ -275,7 +275,7 @@
2072..2073 ; Cn # [2] <reserved-2072>..<reserved-2073>
208F ; Cn # <reserved-208F>
209D..209F ; Cn # [3] <reserved-209D>..<reserved-209F>
-20BA..20CF ; Cn # [22] <reserved-20BA>..<reserved-20CF>
+20BB..20CF ; Cn # [21] <reserved-20BB>..<reserved-20CF>
20F1..20FF ; Cn # [15] <reserved-20F1>..<reserved-20FF>
218A..218F ; Cn # [6] <reserved-218A>..<reserved-218F>
23F4..23FF ; Cn # [12] <reserved-23F4>..<reserved-23FF>
@@ -554,7 +554,7 @@ E01F0..EFFFF ; Cn # [65040] <reserved-E01F0>..<noncharacter-EFFFF>
FFFFE..FFFFF ; Cn # [2] <noncharacter-FFFFE>..<noncharacter-FFFFF>
10FFFE..10FFFF; Cn # [2] <noncharacter-10FFFE>..<noncharacter-10FFFF>
-# Total code points: 864415
+# Total code points: 864414
# ================================================
@@ -3230,7 +3230,7 @@ FFE9..FFEC ; Sm # [4] HALFWIDTH LEFTWARDS ARROW..HALFWIDTH DOWNWARDS ARROW
0BF9 ; Sc # TAMIL RUPEE SIGN
0E3F ; Sc # THAI CURRENCY SYMBOL BAHT
17DB ; Sc # KHMER CURRENCY SYMBOL RIEL
-20A0..20B9 ; Sc # [26] EURO-CURRENCY SIGN..INDIAN RUPEE SIGN
+20A0..20BA ; Sc # [27] EURO-CURRENCY SIGN..TURKISH LIRA SIGN
A838 ; Sc # NORTH INDIC RUPEE MARK
FDFC ; Sc # RIAL SIGN
FE69 ; Sc # SMALL DOLLAR SIGN
@@ -3238,7 +3238,7 @@ FF04 ; Sc # FULLWIDTH DOLLAR SIGN
FFE0..FFE1 ; Sc # [2] FULLWIDTH CENT SIGN..FULLWIDTH POUND SIGN
FFE5..FFE6 ; Sc # [2] FULLWIDTH YEN SIGN..FULLWIDTH WON SIGN
-# Total code points: 48
+# Total code points: 49
# ================================================
diff --git a/maint/Unicode.tables/GraphemeBreakProperty.txt b/maint/Unicode.tables/GraphemeBreakProperty.txt
index d3f480d..948faa9 100644
--- a/maint/Unicode.tables/GraphemeBreakProperty.txt
+++ b/maint/Unicode.tables/GraphemeBreakProperty.txt
@@ -1,8 +1,8 @@
-# GraphemeBreakProperty-6.1.0.txt
-# Date: 2011-12-05, 16:44:15 GMT [MD]
+# GraphemeBreakProperty-6.2.0.txt
+# Date: 2012-08-13, 19:12:02 GMT [MD]
#
# Unicode Character Database
-# Copyright (c) 1991-2011 Unicode, Inc.
+# Copyright (c) 1991-2012 Unicode, Inc.
# For terms of use, see http://www.unicode.org/terms_of_use.html
# For documentation, see http://www.unicode.org/reports/tr44/
@@ -308,6 +308,12 @@ E0100..E01EF ; Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256
# ================================================
+1F1E6..1F1FF ; Regional_Indicator # So [26] REGIONAL INDICATOR SYMBOL LETTER A..REGIONAL INDICATOR SYMBOL LETTER Z
+
+# Total code points: 26
+
+# ================================================
+
0903 ; SpacingMark # Mc DEVANAGARI SIGN VISARGA
093B ; SpacingMark # Mc DEVANAGARI VOWEL SIGN OOE
093E..0940 ; SpacingMark # Mc [3] DEVANAGARI VOWEL SIGN AA..DEVANAGARI VOWEL SIGN II
diff --git a/maint/Unicode.tables/Scripts.txt b/maint/Unicode.tables/Scripts.txt
index 2516f88..1a8e722 100644
--- a/maint/Unicode.tables/Scripts.txt
+++ b/maint/Unicode.tables/Scripts.txt
@@ -1,8 +1,8 @@
-# Scripts-6.1.0.txt
-# Date: 2011-11-27, 05:10:50 GMT [MD]
+# Scripts-6.2.0.txt
+# Date: 2012-06-04, 17:21:29 GMT [MD]
#
# Unicode Character Database
-# Copyright (c) 1991-2011 Unicode, Inc.
+# Copyright (c) 1991-2012 Unicode, Inc.
# For terms of use, see http://www.unicode.org/terms_of_use.html
# For documentation, see http://www.unicode.org/reports/tr44/
@@ -146,7 +146,7 @@
208A..208C ; Common # Sm [3] SUBSCRIPT PLUS SIGN..SUBSCRIPT EQUALS SIGN
208D ; Common # Ps SUBSCRIPT LEFT PARENTHESIS
208E ; Common # Pe SUBSCRIPT RIGHT PARENTHESIS
-20A0..20B9 ; Common # Sc [26] EURO-CURRENCY SIGN..INDIAN RUPEE SIGN
+20A0..20BA ; Common # Sc [27] EURO-CURRENCY SIGN..TURKISH LIRA SIGN
2100..2101 ; Common # So [2] ACCOUNT OF..ADDRESSED TO THE SUBJECT
2102 ; Common # L& DOUBLE-STRUCK CAPITAL C
2103..2106 ; Common # So [4] DEGREE CELSIUS..CADA UNA
@@ -576,7 +576,7 @@ FFFC..FFFD ; Common # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHAR
E0001 ; Common # Cf LANGUAGE TAG
E0020..E007F ; Common # Cf [96] TAG SPACE..CANCEL TAG
-# Total code points: 6412
+# Total code points: 6413
# ================================================
@@ -760,7 +760,7 @@ FB46..FB4F ; Hebrew # Lo [10] HEBREW LETTER TSADI WITH DAGESH..HEBREW LIGATU
061E ; Arabic # Po ARABIC TRIPLE DOT PUNCTUATION MARK
0620..063F ; Arabic # Lo [32] ARABIC LETTER KASHMIRI YEH..ARABIC LETTER FARSI YEH WITH THREE DOTS ABOVE
0641..064A ; Arabic # Lo [10] ARABIC LETTER FEH..ARABIC LETTER YEH
-0656..065E ; Arabic # Mn [9] ARABIC SUBSCRIPT ALEF..ARABIC FATHA WITH TWO DOTS
+0656..065F ; Arabic # Mn [10] ARABIC SUBSCRIPT ALEF..ARABIC WAVY HAMZA BELOW
066A..066D ; Arabic # Po [4] ARABIC PERCENT SIGN..ARABIC FIVE POINTED STAR
066E..066F ; Arabic # Lo [2] ARABIC LETTER DOTLESS BEH..ARABIC LETTER DOTLESS QAF
0671..06D3 ; Arabic # Lo [99] ARABIC LETTER ALEF WASLA..ARABIC LETTER YEH BARREE WITH HAMZA ABOVE
@@ -827,7 +827,7 @@ FE76..FEFC ; Arabic # Lo [135] ARABIC FATHA ISOLATED FORM..ARABIC LIGATURE LA
1EEAB..1EEBB ; Arabic # Lo [17] ARABIC MATHEMATICAL DOUBLE-STRUCK LAM..ARABIC MATHEMATICAL DOUBLE-STRUCK GHAIN
1EEF0..1EEF1 ; Arabic # Sm [2] ARABIC MATHEMATICAL OPERATOR MEEM WITH HAH WITH TATWEEL..ARABIC MATHEMATICAL OPERATOR HAH WITH DAL
-# Total code points: 1234
+# Total code points: 1235
# ================================================
@@ -1477,7 +1477,6 @@ A490..A4C6 ; Yi # So [55] YI RADICAL QOT..YI RADICAL KE
0300..036F ; Inherited # Mn [112] COMBINING GRAVE ACCENT..COMBINING LATIN SMALL LETTER X
0485..0486 ; Inherited # Mn [2] COMBINING CYRILLIC DASIA PNEUMATA..COMBINING CYRILLIC PSILI PNEUMATA
064B..0655 ; Inherited # Mn [11] ARABIC FATHATAN..ARABIC HAMZA BELOW
-065F ; Inherited # Mn ARABIC WAVY HAMZA BELOW
0670 ; Inherited # Mn ARABIC LETTER SUPERSCRIPT ALEF
0951..0952 ; Inherited # Mn [2] DEVANAGARI STRESS SIGN UDATTA..DEVANAGARI STRESS SIGN ANUDATTA
1CD0..1CD2 ; Inherited # Mn [3] VEDIC TONE KARSHANA..VEDIC TONE PRENKHA
@@ -1504,7 +1503,7 @@ FE20..FE26 ; Inherited # Mn [7] COMBINING LIGATURE LEFT HALF..COMBINING CON
1D1AA..1D1AD ; Inherited # Mn [4] MUSICAL SYMBOL COMBINING DOWN BOW..MUSICAL SYMBOL COMBINING SNAP PIZZICATO
E0100..E01EF ; Inherited # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256
-# Total code points: 524
+# Total code points: 523
# ================================================
diff --git a/maint/Unicode.tables/UnicodeData.txt b/maint/Unicode.tables/UnicodeData.txt
index 9f20405..086379e 100644
--- a/maint/Unicode.tables/UnicodeData.txt
+++ b/maint/Unicode.tables/UnicodeData.txt
@@ -7190,6 +7190,7 @@
20B7;SPESMILO SIGN;Sc;0;ET;;;;;N;;;;;
20B8;TENGE SIGN;Sc;0;ET;;;;;N;;;;;
20B9;INDIAN RUPEE SIGN;Sc;0;ET;;;;;N;;;;;
+20BA;TURKISH LIRA SIGN;Sc;0;ET;;;;;N;;;;;
20D0;COMBINING LEFT HARPOON ABOVE;Mn;230;NSM;;;;;N;NON-SPACING LEFT HARPOON ABOVE;;;;
20D1;COMBINING RIGHT HARPOON ABOVE;Mn;230;NSM;;;;;N;NON-SPACING RIGHT HARPOON ABOVE;;;;
20D2;COMBINING LONG VERTICAL LINE OVERLAY;Mn;1;NSM;;;;;N;NON-SPACING LONG VERTICAL BAR OVERLAY;;;;
@@ -18703,8 +18704,8 @@ FFFD;REPLACEMENT CHARACTER;So;0;ON;;;;;N;;;;;
1242F;CUNEIFORM NUMERIC SIGN THREE SHARU VARIANT FORM;Nl;0;L;;;;3;N;;;;;
12430;CUNEIFORM NUMERIC SIGN FOUR SHARU;Nl;0;L;;;;4;N;;;;;
12431;CUNEIFORM NUMERIC SIGN FIVE SHARU;Nl;0;L;;;;5;N;;;;;
-12432;CUNEIFORM NUMERIC SIGN SHAR2 TIMES GAL PLUS DISH;Nl;0;L;;;;;N;;;;;
-12433;CUNEIFORM NUMERIC SIGN SHAR2 TIMES GAL PLUS MIN;Nl;0;L;;;;;N;;;;;
+12432;CUNEIFORM NUMERIC SIGN SHAR2 TIMES GAL PLUS DISH;Nl;0;L;;;;216000;N;;;;;
+12433;CUNEIFORM NUMERIC SIGN SHAR2 TIMES GAL PLUS MIN;Nl;0;L;;;;432000;N;;;;;
12434;CUNEIFORM NUMERIC SIGN ONE BURU;Nl;0;L;;;;1;N;;;;;
12435;CUNEIFORM NUMERIC SIGN TWO BURU;Nl;0;L;;;;2;N;;;;;
12436;CUNEIFORM NUMERIC SIGN THREE BURU;Nl;0;L;;;;3;N;;;;;
@@ -18739,8 +18740,8 @@ FFFD;REPLACEMENT CHARACTER;So;0;ON;;;;;N;;;;;
12453;CUNEIFORM NUMERIC SIGN FOUR BAN2 VARIANT FORM;Nl;0;L;;;;4;N;;;;;
12454;CUNEIFORM NUMERIC SIGN FIVE BAN2;Nl;0;L;;;;5;N;;;;;
12455;CUNEIFORM NUMERIC SIGN FIVE BAN2 VARIANT FORM;Nl;0;L;;;;5;N;;;;;
-12456;CUNEIFORM NUMERIC SIGN NIGIDAMIN;Nl;0;L;;;;;N;;;;;
-12457;CUNEIFORM NUMERIC SIGN NIGIDAESH;Nl;0;L;;;;;N;;;;;
+12456;CUNEIFORM NUMERIC SIGN NIGIDAMIN;Nl;0;L;;;;-1;N;;;;;
+12457;CUNEIFORM NUMERIC SIGN NIGIDAESH;Nl;0;L;;;;-1;N;;;;;
12458;CUNEIFORM NUMERIC SIGN ONE ESHE3;Nl;0;L;;;;1;N;;;;;
12459;CUNEIFORM NUMERIC SIGN TWO ESHE3;Nl;0;L;;;;2;N;;;;;
1245A;CUNEIFORM NUMERIC SIGN ONE THIRD DISH;Nl;0;L;;;;1/3;N;;;;;