diff options
author | chpe <chpe@2f5784b3-3f2a-0410-8824-cb99058d5e15> | 2012-09-30 18:20:10 +0000 |
---|---|---|
committer | chpe <chpe@2f5784b3-3f2a-0410-8824-cb99058d5e15> | 2012-09-30 18:20:10 +0000 |
commit | f16304072b2fb5a95aae43a59d9f3537ae0b0052 (patch) | |
tree | c356e6176fcbbcbe5b91ffe6bc386356155a9c2a /maint | |
parent | 3d364d6ac845a11ef1f04c07ddd48911a5117bc0 (diff) | |
download | pcre-f16304072b2fb5a95aae43a59d9f3537ae0b0052.tar.gz |
unicode: Update to Unicode 6.2
git-svn-id: svn://vcs.exim.org/pcre/code/trunk@1050 2f5784b3-3f2a-0410-8824-cb99058d5e15
Diffstat (limited to 'maint')
-rwxr-xr-x | maint/GenerateUtt.py | 4 | ||||
-rwxr-xr-x | maint/MultiStage2.py | 5 | ||||
-rw-r--r-- | maint/Unicode.tables/CaseFolding.txt | 8 | ||||
-rw-r--r-- | maint/Unicode.tables/DerivedGeneralCategory.txt | 14 | ||||
-rw-r--r-- | maint/Unicode.tables/GraphemeBreakProperty.txt | 12 | ||||
-rw-r--r-- | maint/Unicode.tables/Scripts.txt | 17 | ||||
-rw-r--r-- | maint/Unicode.tables/UnicodeData.txt | 9 |
7 files changed, 40 insertions, 29 deletions
diff --git a/maint/GenerateUtt.py b/maint/GenerateUtt.py index e8190ae..92adf47 100755 --- a/maint/GenerateUtt.py +++ b/maint/GenerateUtt.py @@ -16,6 +16,8 @@ # Modified by PH 01-March-2010 to add new scripts for Unicode 5.2.0. # Modified by PH 04-May-2010 to add new "X.." special categories. # Modified by PH 30-April-2011 to add new scripts for Unicode 6.0.0 +# Modified by ChPe 30-September-2012 to add this note; no other changes were +# necessary for Unicode 6.2.0 support. script_names = ['Arabic', 'Armenian', 'Bengali', 'Bopomofo', 'Braille', 'Buginese', 'Buhid', 'Canadian_Aboriginal', \ 'Cherokee', 'Common', 'Coptic', 'Cypriot', 'Cyrillic', 'Deseret', 'Devanagari', 'Ethiopic', 'Georgian', \ @@ -101,6 +103,6 @@ for utt in utt_table: value = 'ucp_' + utt[0] if utt == utt_table[-1]: last = '' - print ' { %3d, %s, %s }%s ' % (offset, utt[1], value, last) + print ' { %3d, %s, %s }%s' % (offset, utt[1], value, last) offset += len(utt[0]) + 1 print '};' diff --git a/maint/MultiStage2.py b/maint/MultiStage2.py index 1c5fce7..ed68f23 100755 --- a/maint/MultiStage2.py +++ b/maint/MultiStage2.py @@ -107,6 +107,7 @@ # not much bigger than before. # 18-September-2012: Added code for multiple caseless sets. This uses the # final hole in the structure. +# 30-September-2012: Added RegionalIndicator break property from Unicode 6.2.0 ############################################################################## @@ -304,8 +305,8 @@ category_names = ['Cc', 'Cf', 'Cn', 'Co', 'Cs', 'Ll', 'Lm', 'Lo', 'Lt', 'Lu', 'Mc', 'Me', 'Mn', 'Nd', 'Nl', 'No', 'Pc', 'Pd', 'Pe', 'Pf', 'Pi', 'Po', 'Ps', 'Sc', 'Sk', 'Sm', 'So', 'Zl', 'Zp', 'Zs' ] -break_property_names = ['CR', 'LF', 'Control', 'Extend', 'Prepend', - 'SpacingMark', 'L', 'V', 'T', 'LV', 'LVT', 'Other' ] +break_property_names = ['CR', 'LF', 'Control', 'Extend', 'Prepend', + 'SpacingMark', 'L', 'V', 'T', 'LV', 'LVT', 'Regional_Indicator', 'Other' ] test_record_size() diff --git a/maint/Unicode.tables/CaseFolding.txt b/maint/Unicode.tables/CaseFolding.txt index 0d9a409..df1813d 100644 --- a/maint/Unicode.tables/CaseFolding.txt +++ b/maint/Unicode.tables/CaseFolding.txt @@ -1,8 +1,8 @@ -# CaseFolding-6.1.0.txt -# Date: 2011-07-25, 21:21:56 GMT [MD] +# CaseFolding-6.2.0.txt +# Date: 2012-08-14, 17:54:49 GMT [MD] # # Unicode Character Database -# Copyright (c) 1991-2011 Unicode, Inc. +# Copyright (c) 1991-2012 Unicode, Inc. # For terms of use, see http://www.unicode.org/terms_of_use.html # For documentation, see http://www.unicode.org/reports/tr44/ # @@ -1222,3 +1222,5 @@ FF3A; C; FF5A; # FULLWIDTH LATIN CAPITAL LETTER Z 10425; C; 1044D; # DESERET CAPITAL LETTER ENG 10426; C; 1044E; # DESERET CAPITAL LETTER OI 10427; C; 1044F; # DESERET CAPITAL LETTER EW +# +# EOF diff --git a/maint/Unicode.tables/DerivedGeneralCategory.txt b/maint/Unicode.tables/DerivedGeneralCategory.txt index 12a346f..546a677 100644 --- a/maint/Unicode.tables/DerivedGeneralCategory.txt +++ b/maint/Unicode.tables/DerivedGeneralCategory.txt @@ -1,8 +1,8 @@ -# DerivedGeneralCategory-6.1.0.txt -# Date: 2011-11-27, 05:10:22 GMT [MD] +# DerivedGeneralCategory-6.2.0.txt +# Date: 2012-05-20, 00:42:34 GMT [MD] # # Unicode Character Database -# Copyright (c) 1991-2011 Unicode, Inc. +# Copyright (c) 1991-2012 Unicode, Inc. # For terms of use, see http://www.unicode.org/terms_of_use.html # For documentation, see http://www.unicode.org/reports/tr44/ @@ -275,7 +275,7 @@ 2072..2073 ; Cn # [2] <reserved-2072>..<reserved-2073> 208F ; Cn # <reserved-208F> 209D..209F ; Cn # [3] <reserved-209D>..<reserved-209F> -20BA..20CF ; Cn # [22] <reserved-20BA>..<reserved-20CF> +20BB..20CF ; Cn # [21] <reserved-20BB>..<reserved-20CF> 20F1..20FF ; Cn # [15] <reserved-20F1>..<reserved-20FF> 218A..218F ; Cn # [6] <reserved-218A>..<reserved-218F> 23F4..23FF ; Cn # [12] <reserved-23F4>..<reserved-23FF> @@ -554,7 +554,7 @@ E01F0..EFFFF ; Cn # [65040] <reserved-E01F0>..<noncharacter-EFFFF> FFFFE..FFFFF ; Cn # [2] <noncharacter-FFFFE>..<noncharacter-FFFFF> 10FFFE..10FFFF; Cn # [2] <noncharacter-10FFFE>..<noncharacter-10FFFF> -# Total code points: 864415 +# Total code points: 864414 # ================================================ @@ -3230,7 +3230,7 @@ FFE9..FFEC ; Sm # [4] HALFWIDTH LEFTWARDS ARROW..HALFWIDTH DOWNWARDS ARROW 0BF9 ; Sc # TAMIL RUPEE SIGN 0E3F ; Sc # THAI CURRENCY SYMBOL BAHT 17DB ; Sc # KHMER CURRENCY SYMBOL RIEL -20A0..20B9 ; Sc # [26] EURO-CURRENCY SIGN..INDIAN RUPEE SIGN +20A0..20BA ; Sc # [27] EURO-CURRENCY SIGN..TURKISH LIRA SIGN A838 ; Sc # NORTH INDIC RUPEE MARK FDFC ; Sc # RIAL SIGN FE69 ; Sc # SMALL DOLLAR SIGN @@ -3238,7 +3238,7 @@ FF04 ; Sc # FULLWIDTH DOLLAR SIGN FFE0..FFE1 ; Sc # [2] FULLWIDTH CENT SIGN..FULLWIDTH POUND SIGN FFE5..FFE6 ; Sc # [2] FULLWIDTH YEN SIGN..FULLWIDTH WON SIGN -# Total code points: 48 +# Total code points: 49 # ================================================ diff --git a/maint/Unicode.tables/GraphemeBreakProperty.txt b/maint/Unicode.tables/GraphemeBreakProperty.txt index d3f480d..948faa9 100644 --- a/maint/Unicode.tables/GraphemeBreakProperty.txt +++ b/maint/Unicode.tables/GraphemeBreakProperty.txt @@ -1,8 +1,8 @@ -# GraphemeBreakProperty-6.1.0.txt -# Date: 2011-12-05, 16:44:15 GMT [MD] +# GraphemeBreakProperty-6.2.0.txt +# Date: 2012-08-13, 19:12:02 GMT [MD] # # Unicode Character Database -# Copyright (c) 1991-2011 Unicode, Inc. +# Copyright (c) 1991-2012 Unicode, Inc. # For terms of use, see http://www.unicode.org/terms_of_use.html # For documentation, see http://www.unicode.org/reports/tr44/ @@ -308,6 +308,12 @@ E0100..E01EF ; Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 # ================================================ +1F1E6..1F1FF ; Regional_Indicator # So [26] REGIONAL INDICATOR SYMBOL LETTER A..REGIONAL INDICATOR SYMBOL LETTER Z + +# Total code points: 26 + +# ================================================ + 0903 ; SpacingMark # Mc DEVANAGARI SIGN VISARGA 093B ; SpacingMark # Mc DEVANAGARI VOWEL SIGN OOE 093E..0940 ; SpacingMark # Mc [3] DEVANAGARI VOWEL SIGN AA..DEVANAGARI VOWEL SIGN II diff --git a/maint/Unicode.tables/Scripts.txt b/maint/Unicode.tables/Scripts.txt index 2516f88..1a8e722 100644 --- a/maint/Unicode.tables/Scripts.txt +++ b/maint/Unicode.tables/Scripts.txt @@ -1,8 +1,8 @@ -# Scripts-6.1.0.txt -# Date: 2011-11-27, 05:10:50 GMT [MD] +# Scripts-6.2.0.txt +# Date: 2012-06-04, 17:21:29 GMT [MD] # # Unicode Character Database -# Copyright (c) 1991-2011 Unicode, Inc. +# Copyright (c) 1991-2012 Unicode, Inc. # For terms of use, see http://www.unicode.org/terms_of_use.html # For documentation, see http://www.unicode.org/reports/tr44/ @@ -146,7 +146,7 @@ 208A..208C ; Common # Sm [3] SUBSCRIPT PLUS SIGN..SUBSCRIPT EQUALS SIGN 208D ; Common # Ps SUBSCRIPT LEFT PARENTHESIS 208E ; Common # Pe SUBSCRIPT RIGHT PARENTHESIS -20A0..20B9 ; Common # Sc [26] EURO-CURRENCY SIGN..INDIAN RUPEE SIGN +20A0..20BA ; Common # Sc [27] EURO-CURRENCY SIGN..TURKISH LIRA SIGN 2100..2101 ; Common # So [2] ACCOUNT OF..ADDRESSED TO THE SUBJECT 2102 ; Common # L& DOUBLE-STRUCK CAPITAL C 2103..2106 ; Common # So [4] DEGREE CELSIUS..CADA UNA @@ -576,7 +576,7 @@ FFFC..FFFD ; Common # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHAR E0001 ; Common # Cf LANGUAGE TAG E0020..E007F ; Common # Cf [96] TAG SPACE..CANCEL TAG -# Total code points: 6412 +# Total code points: 6413 # ================================================ @@ -760,7 +760,7 @@ FB46..FB4F ; Hebrew # Lo [10] HEBREW LETTER TSADI WITH DAGESH..HEBREW LIGATU 061E ; Arabic # Po ARABIC TRIPLE DOT PUNCTUATION MARK 0620..063F ; Arabic # Lo [32] ARABIC LETTER KASHMIRI YEH..ARABIC LETTER FARSI YEH WITH THREE DOTS ABOVE 0641..064A ; Arabic # Lo [10] ARABIC LETTER FEH..ARABIC LETTER YEH -0656..065E ; Arabic # Mn [9] ARABIC SUBSCRIPT ALEF..ARABIC FATHA WITH TWO DOTS +0656..065F ; Arabic # Mn [10] ARABIC SUBSCRIPT ALEF..ARABIC WAVY HAMZA BELOW 066A..066D ; Arabic # Po [4] ARABIC PERCENT SIGN..ARABIC FIVE POINTED STAR 066E..066F ; Arabic # Lo [2] ARABIC LETTER DOTLESS BEH..ARABIC LETTER DOTLESS QAF 0671..06D3 ; Arabic # Lo [99] ARABIC LETTER ALEF WASLA..ARABIC LETTER YEH BARREE WITH HAMZA ABOVE @@ -827,7 +827,7 @@ FE76..FEFC ; Arabic # Lo [135] ARABIC FATHA ISOLATED FORM..ARABIC LIGATURE LA 1EEAB..1EEBB ; Arabic # Lo [17] ARABIC MATHEMATICAL DOUBLE-STRUCK LAM..ARABIC MATHEMATICAL DOUBLE-STRUCK GHAIN 1EEF0..1EEF1 ; Arabic # Sm [2] ARABIC MATHEMATICAL OPERATOR MEEM WITH HAH WITH TATWEEL..ARABIC MATHEMATICAL OPERATOR HAH WITH DAL -# Total code points: 1234 +# Total code points: 1235 # ================================================ @@ -1477,7 +1477,6 @@ A490..A4C6 ; Yi # So [55] YI RADICAL QOT..YI RADICAL KE 0300..036F ; Inherited # Mn [112] COMBINING GRAVE ACCENT..COMBINING LATIN SMALL LETTER X 0485..0486 ; Inherited # Mn [2] COMBINING CYRILLIC DASIA PNEUMATA..COMBINING CYRILLIC PSILI PNEUMATA 064B..0655 ; Inherited # Mn [11] ARABIC FATHATAN..ARABIC HAMZA BELOW -065F ; Inherited # Mn ARABIC WAVY HAMZA BELOW 0670 ; Inherited # Mn ARABIC LETTER SUPERSCRIPT ALEF 0951..0952 ; Inherited # Mn [2] DEVANAGARI STRESS SIGN UDATTA..DEVANAGARI STRESS SIGN ANUDATTA 1CD0..1CD2 ; Inherited # Mn [3] VEDIC TONE KARSHANA..VEDIC TONE PRENKHA @@ -1504,7 +1503,7 @@ FE20..FE26 ; Inherited # Mn [7] COMBINING LIGATURE LEFT HALF..COMBINING CON 1D1AA..1D1AD ; Inherited # Mn [4] MUSICAL SYMBOL COMBINING DOWN BOW..MUSICAL SYMBOL COMBINING SNAP PIZZICATO E0100..E01EF ; Inherited # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 524 +# Total code points: 523 # ================================================ diff --git a/maint/Unicode.tables/UnicodeData.txt b/maint/Unicode.tables/UnicodeData.txt index 9f20405..086379e 100644 --- a/maint/Unicode.tables/UnicodeData.txt +++ b/maint/Unicode.tables/UnicodeData.txt @@ -7190,6 +7190,7 @@ 20B7;SPESMILO SIGN;Sc;0;ET;;;;;N;;;;; 20B8;TENGE SIGN;Sc;0;ET;;;;;N;;;;; 20B9;INDIAN RUPEE SIGN;Sc;0;ET;;;;;N;;;;; +20BA;TURKISH LIRA SIGN;Sc;0;ET;;;;;N;;;;; 20D0;COMBINING LEFT HARPOON ABOVE;Mn;230;NSM;;;;;N;NON-SPACING LEFT HARPOON ABOVE;;;; 20D1;COMBINING RIGHT HARPOON ABOVE;Mn;230;NSM;;;;;N;NON-SPACING RIGHT HARPOON ABOVE;;;; 20D2;COMBINING LONG VERTICAL LINE OVERLAY;Mn;1;NSM;;;;;N;NON-SPACING LONG VERTICAL BAR OVERLAY;;;; @@ -18703,8 +18704,8 @@ FFFD;REPLACEMENT CHARACTER;So;0;ON;;;;;N;;;;; 1242F;CUNEIFORM NUMERIC SIGN THREE SHARU VARIANT FORM;Nl;0;L;;;;3;N;;;;; 12430;CUNEIFORM NUMERIC SIGN FOUR SHARU;Nl;0;L;;;;4;N;;;;; 12431;CUNEIFORM NUMERIC SIGN FIVE SHARU;Nl;0;L;;;;5;N;;;;; -12432;CUNEIFORM NUMERIC SIGN SHAR2 TIMES GAL PLUS DISH;Nl;0;L;;;;;N;;;;; -12433;CUNEIFORM NUMERIC SIGN SHAR2 TIMES GAL PLUS MIN;Nl;0;L;;;;;N;;;;; +12432;CUNEIFORM NUMERIC SIGN SHAR2 TIMES GAL PLUS DISH;Nl;0;L;;;;216000;N;;;;; +12433;CUNEIFORM NUMERIC SIGN SHAR2 TIMES GAL PLUS MIN;Nl;0;L;;;;432000;N;;;;; 12434;CUNEIFORM NUMERIC SIGN ONE BURU;Nl;0;L;;;;1;N;;;;; 12435;CUNEIFORM NUMERIC SIGN TWO BURU;Nl;0;L;;;;2;N;;;;; 12436;CUNEIFORM NUMERIC SIGN THREE BURU;Nl;0;L;;;;3;N;;;;; @@ -18739,8 +18740,8 @@ FFFD;REPLACEMENT CHARACTER;So;0;ON;;;;;N;;;;; 12453;CUNEIFORM NUMERIC SIGN FOUR BAN2 VARIANT FORM;Nl;0;L;;;;4;N;;;;; 12454;CUNEIFORM NUMERIC SIGN FIVE BAN2;Nl;0;L;;;;5;N;;;;; 12455;CUNEIFORM NUMERIC SIGN FIVE BAN2 VARIANT FORM;Nl;0;L;;;;5;N;;;;; -12456;CUNEIFORM NUMERIC SIGN NIGIDAMIN;Nl;0;L;;;;;N;;;;; -12457;CUNEIFORM NUMERIC SIGN NIGIDAESH;Nl;0;L;;;;;N;;;;; +12456;CUNEIFORM NUMERIC SIGN NIGIDAMIN;Nl;0;L;;;;-1;N;;;;; +12457;CUNEIFORM NUMERIC SIGN NIGIDAESH;Nl;0;L;;;;-1;N;;;;; 12458;CUNEIFORM NUMERIC SIGN ONE ESHE3;Nl;0;L;;;;1;N;;;;; 12459;CUNEIFORM NUMERIC SIGN TWO ESHE3;Nl;0;L;;;;2;N;;;;; 1245A;CUNEIFORM NUMERIC SIGN ONE THIRD DISH;Nl;0;L;;;;1/3;N;;;;; |