diff options
author | Alexander Barkov <bar@mariadb.com> | 2023-02-17 16:20:01 +0400 |
---|---|---|
committer | Alexander Barkov <bar@mariadb.com> | 2023-02-17 17:33:27 +0400 |
commit | 7f6b648d7db895c09a6ace3b5ee62a5fb482a8e0 (patch) | |
tree | 66e727d8e65166497e68447ba659650d0cd53768 /mysql-test | |
parent | 345356b868d840554a8572876efc027d3ccd9842 (diff) | |
download | mariadb-git-7f6b648d7db895c09a6ace3b5ee62a5fb482a8e0.tar.gz |
MDEV-30661 UPPER() returns an empty string for U+0251 in uca1400 collations for utf8
String length growth during upper/lower conversion
in Unicode collations depends only on the underlying MY_UNICASE_INFO
used in the collation.
Maintaining a separate member CHARSET_INFO::caseup_multiply and
CHARSET_INFO::casedn_multiply duplicated this information
and caused bugs like this (when MY_UNICASE_INFO and case??_multiply
when out of sync because of incomplete CHARSET_INFO initialization).
Fix:
Changing CHARSET_INFO::caseup_multiply and CHARSET_INFO::casedn_multiply
from members to virtual functions.
The virtual functions in Unicode collations calculate case conversion
growth factors from the MY_UNICASE_INFO. This guarantees that the growth
factors are always in sync with the MY_UNICASE_INFO.
Diffstat (limited to 'mysql-test')
-rw-r--r-- | mysql-test/include/ctype_casefolding.inc | 8 | ||||
-rw-r--r-- | mysql-test/main/ctype_ldml.result | 56 | ||||
-rw-r--r-- | mysql-test/main/ctype_ldml.test | 7 | ||||
-rw-r--r-- | mysql-test/main/ctype_utf8_uca.result | 145 | ||||
-rw-r--r-- | mysql-test/main/ctype_utf8_uca.test | 19 | ||||
-rw-r--r-- | mysql-test/main/ctype_utf8mb4_uca.result | 139 | ||||
-rw-r--r-- | mysql-test/main/ctype_utf8mb4_uca.test | 10 |
7 files changed, 384 insertions, 0 deletions
diff --git a/mysql-test/include/ctype_casefolding.inc b/mysql-test/include/ctype_casefolding.inc index 4ee402c95ad..74b2ab7650a 100644 --- a/mysql-test/include/ctype_casefolding.inc +++ b/mysql-test/include/ctype_casefolding.inc @@ -13,6 +13,14 @@ INSERT INTO case_folding (code) VALUES (0x26B), (0x271), (0x27D); + +INSERT INTO case_folding (code) VALUES +(0x0049) /* LATIN CAPITAL LETTER I */, +(0x0069) /* LATIN SMALL LETTER I */, +(0x0130) /* LATIN CAPITAL LETTER I WITH DOT ABOVE */, +(0x0131) /* LATIN SMALL LETTER DOTLESS I */ +; + UPDATE case_folding SET c=CHAR(code USING ucs2); SELECT HEX(code), HEX(LOWER(c)), HEX(UPPER(c)), c FROM case_folding; DROP TABLE case_folding; diff --git a/mysql-test/main/ctype_ldml.result b/mysql-test/main/ctype_ldml.result index efafa9dc041..a23e835d1fa 100644 --- a/mysql-test/main/ctype_ldml.result +++ b/mysql-test/main/ctype_ldml.result @@ -3068,6 +3068,12 @@ INSERT INTO case_folding (code) VALUES (0x26B), (0x271), (0x27D); +INSERT INTO case_folding (code) VALUES +(0x0049) /* LATIN CAPITAL LETTER I */, +(0x0069) /* LATIN SMALL LETTER I */, +(0x0130) /* LATIN CAPITAL LETTER I WITH DOT ABOVE */, +(0x0131) /* LATIN SMALL LETTER DOTLESS I */ +; UPDATE case_folding SET c=CHAR(code USING ucs2); SELECT HEX(code), HEX(LOWER(c)), HEX(UPPER(c)), c FROM case_folding; HEX(code) HEX(LOWER(c)) HEX(UPPER(c)) c @@ -3081,6 +3087,10 @@ HEX(code) HEX(LOWER(c)) HEX(UPPER(c)) c 26B C9AB E2B1A2 ɫ 271 C9B1 E2B1AE ɱ 27D C9BD E2B1A4 ɽ +49 69 49 I +69 69 49 i +130 69 C4B0 İ +131 C4B1 49 ı DROP TABLE case_folding; # # End of 10.3 tests @@ -3230,5 +3240,51 @@ is_520 0 is_1400 1 DROP TABLE t1; # +# MDEV-30661 UPPER() returns an empty string for U+0251 in uca1400 collations for utf8 +# +SET NAMES utf8mb4 COLLATE utf8mb4_uca1400_test01_as_ci; +CREATE OR REPLACE TABLE case_folding AS SELECT 0 AS code, SPACE(32) AS c LIMIT 0; +SHOW CREATE TABLE case_folding; +Table Create Table +case_folding CREATE TABLE `case_folding` ( + `code` int(1) NOT NULL, + `c` varchar(32) CHARACTER SET utf8mb4 COLLATE utf8mb4_uca1400_test01_as_ci DEFAULT NULL +) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci +INSERT INTO case_folding (code) VALUES +(0x23A), +(0x23E), +(0x23F), +(0x240), +(0x250), +(0x251), +(0x252), +(0x26B), +(0x271), +(0x27D); +INSERT INTO case_folding (code) VALUES +(0x0049) /* LATIN CAPITAL LETTER I */, +(0x0069) /* LATIN SMALL LETTER I */, +(0x0130) /* LATIN CAPITAL LETTER I WITH DOT ABOVE */, +(0x0131) /* LATIN SMALL LETTER DOTLESS I */ +; +UPDATE case_folding SET c=CHAR(code USING ucs2); +SELECT HEX(code), HEX(LOWER(c)), HEX(UPPER(c)), c FROM case_folding; +HEX(code) HEX(LOWER(c)) HEX(UPPER(c)) c +23A E2B1A5 C8BA Ⱥ +23E E2B1A6 C8BE Ⱦ +23F C8BF E2B1BE ȿ +240 C980 E2B1BF ɀ +250 C990 E2B1AF ɐ +251 C991 E2B1AD ɑ +252 C992 E2B1B0 ɒ +26B C9AB E2B1A2 ɫ +271 C9B1 E2B1AE ɱ +27D C9BD E2B1A4 ɽ +49 69 49 I +69 69 49 i +130 69 C4B0 İ +131 C4B1 49 ı +DROP TABLE case_folding; +# # End of 10.10 tests # diff --git a/mysql-test/main/ctype_ldml.test b/mysql-test/main/ctype_ldml.test index 5ee56c3b950..6336a1d0f5f 100644 --- a/mysql-test/main/ctype_ldml.test +++ b/mysql-test/main/ctype_ldml.test @@ -698,5 +698,12 @@ ORDER BY a, HEX(a); DROP TABLE t1; --echo # +--echo # MDEV-30661 UPPER() returns an empty string for U+0251 in uca1400 collations for utf8 +--echo # + +SET NAMES utf8mb4 COLLATE utf8mb4_uca1400_test01_as_ci; +--source include/ctype_casefolding.inc + +--echo # --echo # End of 10.10 tests --echo # diff --git a/mysql-test/main/ctype_utf8_uca.result b/mysql-test/main/ctype_utf8_uca.result index e31acf8edc4..ecd2daa5272 100644 --- a/mysql-test/main/ctype_utf8_uca.result +++ b/mysql-test/main/ctype_utf8_uca.result @@ -612,6 +612,12 @@ INSERT INTO case_folding (code) VALUES (0x26B), (0x271), (0x27D); +INSERT INTO case_folding (code) VALUES +(0x0049) /* LATIN CAPITAL LETTER I */, +(0x0069) /* LATIN SMALL LETTER I */, +(0x0130) /* LATIN CAPITAL LETTER I WITH DOT ABOVE */, +(0x0131) /* LATIN SMALL LETTER DOTLESS I */ +; UPDATE case_folding SET c=CHAR(code USING ucs2); SELECT HEX(code), HEX(LOWER(c)), HEX(UPPER(c)), c FROM case_folding; HEX(code) HEX(LOWER(c)) HEX(UPPER(c)) c @@ -625,6 +631,53 @@ HEX(code) HEX(LOWER(c)) HEX(UPPER(c)) c 26B C9AB C9AB ɫ 271 C9B1 C9B1 ɱ 27D C9BD C9BD ɽ +49 69 49 I +69 69 49 i +130 69 C4B0 İ +131 C4B1 49 ı +DROP TABLE case_folding; +SET NAMES utf8mb3 COLLATE utf8mb3_turkish_ci /*Unicode-4.0 folding, with Turkish mapping for I */; +CREATE OR REPLACE TABLE case_folding AS SELECT 0 AS code, SPACE(32) AS c LIMIT 0; +SHOW CREATE TABLE case_folding; +Table Create Table +case_folding CREATE TABLE `case_folding` ( + `code` int(1) NOT NULL, + `c` varchar(32) CHARACTER SET utf8mb3 COLLATE utf8mb3_turkish_ci DEFAULT NULL +) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci +INSERT INTO case_folding (code) VALUES +(0x23A), +(0x23E), +(0x23F), +(0x240), +(0x250), +(0x251), +(0x252), +(0x26B), +(0x271), +(0x27D); +INSERT INTO case_folding (code) VALUES +(0x0049) /* LATIN CAPITAL LETTER I */, +(0x0069) /* LATIN SMALL LETTER I */, +(0x0130) /* LATIN CAPITAL LETTER I WITH DOT ABOVE */, +(0x0131) /* LATIN SMALL LETTER DOTLESS I */ +; +UPDATE case_folding SET c=CHAR(code USING ucs2); +SELECT HEX(code), HEX(LOWER(c)), HEX(UPPER(c)), c FROM case_folding; +HEX(code) HEX(LOWER(c)) HEX(UPPER(c)) c +23A C8BA C8BA Ⱥ +23E C8BE C8BE Ⱦ +23F C8BF C8BF ȿ +240 C980 C980 ɀ +250 C990 C990 ɐ +251 C991 C991 ɑ +252 C992 C992 ɒ +26B C9AB C9AB ɫ +271 C9B1 C9B1 ɱ +27D C9BD C9BD ɽ +49 C4B1 49 I +69 69 C4B0 i +130 69 C4B0 İ +131 C4B1 49 ı DROP TABLE case_folding; SET NAMES utf8mb3 COLLATE utf8mb3_unicode_520_ci; CREATE OR REPLACE TABLE case_folding AS SELECT 0 AS code, SPACE(32) AS c LIMIT 0; @@ -645,6 +698,12 @@ INSERT INTO case_folding (code) VALUES (0x26B), (0x271), (0x27D); +INSERT INTO case_folding (code) VALUES +(0x0049) /* LATIN CAPITAL LETTER I */, +(0x0069) /* LATIN SMALL LETTER I */, +(0x0130) /* LATIN CAPITAL LETTER I WITH DOT ABOVE */, +(0x0131) /* LATIN SMALL LETTER DOTLESS I */ +; UPDATE case_folding SET c=CHAR(code USING ucs2); SELECT HEX(code), HEX(LOWER(c)), HEX(UPPER(c)), c FROM case_folding; HEX(code) HEX(LOWER(c)) HEX(UPPER(c)) c @@ -658,6 +717,10 @@ HEX(code) HEX(LOWER(c)) HEX(UPPER(c)) c 26B C9AB E2B1A2 ɫ 271 C9B1 E2B1AE ɱ 27D C9BD E2B1A4 ɽ +49 69 49 I +69 69 49 i +130 69 C4B0 İ +131 C4B1 49 ı DROP TABLE case_folding; SET NAMES utf8mb3 COLLATE utf8mb3_unicode_520_nopad_ci; CREATE OR REPLACE TABLE case_folding AS SELECT 0 AS code, SPACE(32) AS c LIMIT 0; @@ -678,6 +741,12 @@ INSERT INTO case_folding (code) VALUES (0x26B), (0x271), (0x27D); +INSERT INTO case_folding (code) VALUES +(0x0049) /* LATIN CAPITAL LETTER I */, +(0x0069) /* LATIN SMALL LETTER I */, +(0x0130) /* LATIN CAPITAL LETTER I WITH DOT ABOVE */, +(0x0131) /* LATIN SMALL LETTER DOTLESS I */ +; UPDATE case_folding SET c=CHAR(code USING ucs2); SELECT HEX(code), HEX(LOWER(c)), HEX(UPPER(c)), c FROM case_folding; HEX(code) HEX(LOWER(c)) HEX(UPPER(c)) c @@ -691,6 +760,10 @@ HEX(code) HEX(LOWER(c)) HEX(UPPER(c)) c 26B C9AB E2B1A2 ɫ 271 C9B1 E2B1AE ɱ 27D C9BD E2B1A4 ɽ +49 69 49 I +69 69 49 i +130 69 C4B0 İ +131 C4B1 49 ı DROP TABLE case_folding; SET NAMES utf8mb3 COLLATE utf8mb3_myanmar_ci; CREATE OR REPLACE TABLE case_folding AS SELECT 0 AS code, SPACE(32) AS c LIMIT 0; @@ -711,6 +784,12 @@ INSERT INTO case_folding (code) VALUES (0x26B), (0x271), (0x27D); +INSERT INTO case_folding (code) VALUES +(0x0049) /* LATIN CAPITAL LETTER I */, +(0x0069) /* LATIN SMALL LETTER I */, +(0x0130) /* LATIN CAPITAL LETTER I WITH DOT ABOVE */, +(0x0131) /* LATIN SMALL LETTER DOTLESS I */ +; UPDATE case_folding SET c=CHAR(code USING ucs2); SELECT HEX(code), HEX(LOWER(c)), HEX(UPPER(c)), c FROM case_folding; HEX(code) HEX(LOWER(c)) HEX(UPPER(c)) c @@ -724,6 +803,10 @@ HEX(code) HEX(LOWER(c)) HEX(UPPER(c)) c 26B C9AB E2B1A2 ɫ 271 C9B1 E2B1AE ɱ 27D C9BD E2B1A4 ɽ +49 69 49 I +69 69 49 i +130 69 C4B0 İ +131 C4B1 49 ı DROP TABLE case_folding; SET NAMES utf8mb3 COLLATE utf8mb3_thai_520_w2; CREATE OR REPLACE TABLE case_folding AS SELECT 0 AS code, SPACE(32) AS c LIMIT 0; @@ -744,6 +827,12 @@ INSERT INTO case_folding (code) VALUES (0x26B), (0x271), (0x27D); +INSERT INTO case_folding (code) VALUES +(0x0049) /* LATIN CAPITAL LETTER I */, +(0x0069) /* LATIN SMALL LETTER I */, +(0x0130) /* LATIN CAPITAL LETTER I WITH DOT ABOVE */, +(0x0131) /* LATIN SMALL LETTER DOTLESS I */ +; UPDATE case_folding SET c=CHAR(code USING ucs2); SELECT HEX(code), HEX(LOWER(c)), HEX(UPPER(c)), c FROM case_folding; HEX(code) HEX(LOWER(c)) HEX(UPPER(c)) c @@ -757,6 +846,10 @@ HEX(code) HEX(LOWER(c)) HEX(UPPER(c)) c 26B C9AB E2B1A2 ɫ 271 C9B1 E2B1AE ɱ 27D C9BD E2B1A4 ɽ +49 69 49 I +69 69 49 i +130 69 C4B0 İ +131 C4B1 49 ı DROP TABLE case_folding; # # End of 10.3 tests @@ -1533,3 +1626,55 @@ SET NAMES utf8mb3; # # End of 10.10 tests # +# +# Start of 10.10 tests +# +# +# MDEV-30661 UPPER() returns an empty string for U+0251 in uca1400 collations for utf8 +# +SET NAMES utf8mb3 COLLATE utf8mb3_uca1400_ai_ci; +CREATE OR REPLACE TABLE case_folding AS SELECT 0 AS code, SPACE(32) AS c LIMIT 0; +SHOW CREATE TABLE case_folding; +Table Create Table +case_folding CREATE TABLE `case_folding` ( + `code` int(1) NOT NULL, + `c` varchar(32) CHARACTER SET utf8mb3 COLLATE utf8mb3_uca1400_ai_ci DEFAULT NULL +) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci +INSERT INTO case_folding (code) VALUES +(0x23A), +(0x23E), +(0x23F), +(0x240), +(0x250), +(0x251), +(0x252), +(0x26B), +(0x271), +(0x27D); +INSERT INTO case_folding (code) VALUES +(0x0049) /* LATIN CAPITAL LETTER I */, +(0x0069) /* LATIN SMALL LETTER I */, +(0x0130) /* LATIN CAPITAL LETTER I WITH DOT ABOVE */, +(0x0131) /* LATIN SMALL LETTER DOTLESS I */ +; +UPDATE case_folding SET c=CHAR(code USING ucs2); +SELECT HEX(code), HEX(LOWER(c)), HEX(UPPER(c)), c FROM case_folding; +HEX(code) HEX(LOWER(c)) HEX(UPPER(c)) c +23A E2B1A5 C8BA Ⱥ +23E E2B1A6 C8BE Ⱦ +23F C8BF E2B1BE ȿ +240 C980 E2B1BF ɀ +250 C990 E2B1AF ɐ +251 C991 E2B1AD ɑ +252 C992 E2B1B0 ɒ +26B C9AB E2B1A2 ɫ +271 C9B1 E2B1AE ɱ +27D C9BD E2B1A4 ɽ +49 69 49 I +69 69 49 i +130 69 C4B0 İ +131 C4B1 49 ı +DROP TABLE case_folding; +# +# End of 10.10 tests +# diff --git a/mysql-test/main/ctype_utf8_uca.test b/mysql-test/main/ctype_utf8_uca.test index 946649417ae..5abf9192511 100644 --- a/mysql-test/main/ctype_utf8_uca.test +++ b/mysql-test/main/ctype_utf8_uca.test @@ -34,6 +34,9 @@ SET NAMES utf8 COLLATE utf8_unicode_nopad_ci; SET NAMES utf8mb3 COLLATE utf8mb3_unicode_ci /*Unicode-4.0 folding*/; --source include/ctype_casefolding.inc +SET NAMES utf8mb3 COLLATE utf8mb3_turkish_ci /*Unicode-4.0 folding, with Turkish mapping for I */; +--source include/ctype_casefolding.inc + SET NAMES utf8mb3 COLLATE utf8mb3_unicode_520_ci; --source include/ctype_casefolding.inc @@ -79,3 +82,19 @@ SET NAMES utf8mb3; --echo # --echo # End of 10.10 tests --echo # + + +--echo # +--echo # Start of 10.10 tests +--echo # + +--echo # +--echo # MDEV-30661 UPPER() returns an empty string for U+0251 in uca1400 collations for utf8 +--echo # + +SET NAMES utf8mb3 COLLATE utf8mb3_uca1400_ai_ci; +--source include/ctype_casefolding.inc + +--echo # +--echo # End of 10.10 tests +--echo # diff --git a/mysql-test/main/ctype_utf8mb4_uca.result b/mysql-test/main/ctype_utf8mb4_uca.result index 195c47e0360..42de45c98a3 100644 --- a/mysql-test/main/ctype_utf8mb4_uca.result +++ b/mysql-test/main/ctype_utf8mb4_uca.result @@ -6630,6 +6630,55 @@ INSERT INTO case_folding (code) VALUES (0x26B), (0x271), (0x27D); +INSERT INTO case_folding (code) VALUES +(0x0049) /* LATIN CAPITAL LETTER I */, +(0x0069) /* LATIN SMALL LETTER I */, +(0x0130) /* LATIN CAPITAL LETTER I WITH DOT ABOVE */, +(0x0131) /* LATIN SMALL LETTER DOTLESS I */ +; +UPDATE case_folding SET c=CHAR(code USING ucs2); +SELECT HEX(code), HEX(LOWER(c)), HEX(UPPER(c)), c FROM case_folding; +HEX(code) HEX(LOWER(c)) HEX(UPPER(c)) c +23A C8BA C8BA Ⱥ +23E C8BE C8BE Ⱦ +23F C8BF C8BF ȿ +240 C980 C980 ɀ +250 C990 C990 ɐ +251 C991 C991 ɑ +252 C992 C992 ɒ +26B C9AB C9AB ɫ +271 C9B1 C9B1 ɱ +27D C9BD C9BD ɽ +49 69 49 I +69 69 49 i +130 69 C4B0 İ +131 C4B1 49 ı +DROP TABLE case_folding; +SET NAMES utf8mb4 COLLATE utf8mb4_turkish_ci /*Unicode-4.0 folding with Turkish mapping for I */; +CREATE OR REPLACE TABLE case_folding AS SELECT 0 AS code, SPACE(32) AS c LIMIT 0; +SHOW CREATE TABLE case_folding; +Table Create Table +case_folding CREATE TABLE `case_folding` ( + `code` int(1) NOT NULL, + `c` varchar(32) CHARACTER SET utf8mb4 COLLATE utf8mb4_turkish_ci DEFAULT NULL +) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci +INSERT INTO case_folding (code) VALUES +(0x23A), +(0x23E), +(0x23F), +(0x240), +(0x250), +(0x251), +(0x252), +(0x26B), +(0x271), +(0x27D); +INSERT INTO case_folding (code) VALUES +(0x0049) /* LATIN CAPITAL LETTER I */, +(0x0069) /* LATIN SMALL LETTER I */, +(0x0130) /* LATIN CAPITAL LETTER I WITH DOT ABOVE */, +(0x0131) /* LATIN SMALL LETTER DOTLESS I */ +; UPDATE case_folding SET c=CHAR(code USING ucs2); SELECT HEX(code), HEX(LOWER(c)), HEX(UPPER(c)), c FROM case_folding; HEX(code) HEX(LOWER(c)) HEX(UPPER(c)) c @@ -6643,6 +6692,10 @@ HEX(code) HEX(LOWER(c)) HEX(UPPER(c)) c 26B C9AB C9AB ɫ 271 C9B1 C9B1 ɱ 27D C9BD C9BD ɽ +49 C4B1 49 I +69 69 C4B0 i +130 69 C4B0 İ +131 C4B1 49 ı DROP TABLE case_folding; SET NAMES utf8mb4 COLLATE utf8mb4_unicode_520_ci; CREATE OR REPLACE TABLE case_folding AS SELECT 0 AS code, SPACE(32) AS c LIMIT 0; @@ -6663,6 +6716,12 @@ INSERT INTO case_folding (code) VALUES (0x26B), (0x271), (0x27D); +INSERT INTO case_folding (code) VALUES +(0x0049) /* LATIN CAPITAL LETTER I */, +(0x0069) /* LATIN SMALL LETTER I */, +(0x0130) /* LATIN CAPITAL LETTER I WITH DOT ABOVE */, +(0x0131) /* LATIN SMALL LETTER DOTLESS I */ +; UPDATE case_folding SET c=CHAR(code USING ucs2); SELECT HEX(code), HEX(LOWER(c)), HEX(UPPER(c)), c FROM case_folding; HEX(code) HEX(LOWER(c)) HEX(UPPER(c)) c @@ -6676,6 +6735,10 @@ HEX(code) HEX(LOWER(c)) HEX(UPPER(c)) c 26B C9AB E2B1A2 ɫ 271 C9B1 E2B1AE ɱ 27D C9BD E2B1A4 ɽ +49 69 49 I +69 69 49 i +130 69 C4B0 İ +131 C4B1 49 ı DROP TABLE case_folding; SET NAMES utf8mb4 COLLATE utf8mb4_unicode_520_nopad_ci; CREATE OR REPLACE TABLE case_folding AS SELECT 0 AS code, SPACE(32) AS c LIMIT 0; @@ -6696,6 +6759,12 @@ INSERT INTO case_folding (code) VALUES (0x26B), (0x271), (0x27D); +INSERT INTO case_folding (code) VALUES +(0x0049) /* LATIN CAPITAL LETTER I */, +(0x0069) /* LATIN SMALL LETTER I */, +(0x0130) /* LATIN CAPITAL LETTER I WITH DOT ABOVE */, +(0x0131) /* LATIN SMALL LETTER DOTLESS I */ +; UPDATE case_folding SET c=CHAR(code USING ucs2); SELECT HEX(code), HEX(LOWER(c)), HEX(UPPER(c)), c FROM case_folding; HEX(code) HEX(LOWER(c)) HEX(UPPER(c)) c @@ -6709,6 +6778,10 @@ HEX(code) HEX(LOWER(c)) HEX(UPPER(c)) c 26B C9AB E2B1A2 ɫ 271 C9B1 E2B1AE ɱ 27D C9BD E2B1A4 ɽ +49 69 49 I +69 69 49 i +130 69 C4B0 İ +131 C4B1 49 ı DROP TABLE case_folding; SET NAMES utf8mb4 COLLATE utf8mb4_myanmar_ci; CREATE OR REPLACE TABLE case_folding AS SELECT 0 AS code, SPACE(32) AS c LIMIT 0; @@ -6729,6 +6802,12 @@ INSERT INTO case_folding (code) VALUES (0x26B), (0x271), (0x27D); +INSERT INTO case_folding (code) VALUES +(0x0049) /* LATIN CAPITAL LETTER I */, +(0x0069) /* LATIN SMALL LETTER I */, +(0x0130) /* LATIN CAPITAL LETTER I WITH DOT ABOVE */, +(0x0131) /* LATIN SMALL LETTER DOTLESS I */ +; UPDATE case_folding SET c=CHAR(code USING ucs2); SELECT HEX(code), HEX(LOWER(c)), HEX(UPPER(c)), c FROM case_folding; HEX(code) HEX(LOWER(c)) HEX(UPPER(c)) c @@ -6742,6 +6821,10 @@ HEX(code) HEX(LOWER(c)) HEX(UPPER(c)) c 26B C9AB E2B1A2 ɫ 271 C9B1 E2B1AE ɱ 27D C9BD E2B1A4 ɽ +49 69 49 I +69 69 49 i +130 69 C4B0 İ +131 C4B1 49 ı DROP TABLE case_folding; SET NAMES utf8mb4 COLLATE utf8mb4_thai_520_w2; CREATE OR REPLACE TABLE case_folding AS SELECT 0 AS code, SPACE(32) AS c LIMIT 0; @@ -6762,6 +6845,12 @@ INSERT INTO case_folding (code) VALUES (0x26B), (0x271), (0x27D); +INSERT INTO case_folding (code) VALUES +(0x0049) /* LATIN CAPITAL LETTER I */, +(0x0069) /* LATIN SMALL LETTER I */, +(0x0130) /* LATIN CAPITAL LETTER I WITH DOT ABOVE */, +(0x0131) /* LATIN SMALL LETTER DOTLESS I */ +; UPDATE case_folding SET c=CHAR(code USING ucs2); SELECT HEX(code), HEX(LOWER(c)), HEX(UPPER(c)), c FROM case_folding; HEX(code) HEX(LOWER(c)) HEX(UPPER(c)) c @@ -6775,6 +6864,10 @@ HEX(code) HEX(LOWER(c)) HEX(UPPER(c)) c 26B C9AB E2B1A2 ɫ 271 C9B1 E2B1AE ɱ 27D C9BD E2B1A4 ɽ +49 69 49 I +69 69 49 i +130 69 C4B0 İ +131 C4B1 49 ı DROP TABLE case_folding; # # End of 10.3 tests @@ -11379,5 +11472,51 @@ DROP TABLE t1; DROP PROCEDURE exec_verbose; DROP PROCEDURE test_styles; # +# MDEV-30661 UPPER() returns an empty string for U+0251 in uca1400 collations for utf8 +# +SET NAMES utf8mb4 COLLATE utf8mb4_uca1400_ai_ci; +CREATE OR REPLACE TABLE case_folding AS SELECT 0 AS code, SPACE(32) AS c LIMIT 0; +SHOW CREATE TABLE case_folding; +Table Create Table +case_folding CREATE TABLE `case_folding` ( + `code` int(1) NOT NULL, + `c` varchar(32) CHARACTER SET utf8mb4 COLLATE utf8mb4_uca1400_ai_ci DEFAULT NULL +) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci +INSERT INTO case_folding (code) VALUES +(0x23A), +(0x23E), +(0x23F), +(0x240), +(0x250), +(0x251), +(0x252), +(0x26B), +(0x271), +(0x27D); +INSERT INTO case_folding (code) VALUES +(0x0049) /* LATIN CAPITAL LETTER I */, +(0x0069) /* LATIN SMALL LETTER I */, +(0x0130) /* LATIN CAPITAL LETTER I WITH DOT ABOVE */, +(0x0131) /* LATIN SMALL LETTER DOTLESS I */ +; +UPDATE case_folding SET c=CHAR(code USING ucs2); +SELECT HEX(code), HEX(LOWER(c)), HEX(UPPER(c)), c FROM case_folding; +HEX(code) HEX(LOWER(c)) HEX(UPPER(c)) c +23A E2B1A5 C8BA Ⱥ +23E E2B1A6 C8BE Ⱦ +23F C8BF E2B1BE ȿ +240 C980 E2B1BF ɀ +250 C990 E2B1AF ɐ +251 C991 E2B1AD ɑ +252 C992 E2B1B0 ɒ +26B C9AB E2B1A2 ɫ +271 C9B1 E2B1AE ɱ +27D C9BD E2B1A4 ɽ +49 69 49 I +69 69 49 i +130 69 C4B0 İ +131 C4B1 49 ı +DROP TABLE case_folding; +# # End of 10.10 tests # diff --git a/mysql-test/main/ctype_utf8mb4_uca.test b/mysql-test/main/ctype_utf8mb4_uca.test index b4cfc910336..3f428447cc9 100644 --- a/mysql-test/main/ctype_utf8mb4_uca.test +++ b/mysql-test/main/ctype_utf8mb4_uca.test @@ -121,6 +121,9 @@ SET NAMES utf8mb4; SET NAMES utf8mb4 COLLATE utf8mb4_unicode_ci /*Unicode-4.0 folding*/; --source include/ctype_casefolding.inc +SET NAMES utf8mb4 COLLATE utf8mb4_turkish_ci /*Unicode-4.0 folding with Turkish mapping for I */; +--source include/ctype_casefolding.inc + SET NAMES utf8mb4 COLLATE utf8mb4_unicode_520_ci; --source include/ctype_casefolding.inc @@ -427,5 +430,12 @@ DROP PROCEDURE exec_verbose; DROP PROCEDURE test_styles; --echo # +--echo # MDEV-30661 UPPER() returns an empty string for U+0251 in uca1400 collations for utf8 +--echo # + +SET NAMES utf8mb4 COLLATE utf8mb4_uca1400_ai_ci; +--source include/ctype_casefolding.inc + +--echo # --echo # End of 10.10 tests --echo # |