--source include/have_innodb.inc --source include/have_ucs2.inc --source include/have_utf8mb4.inc --source include/have_utf16.inc --source include/have_utf32.inc --disable_warnings drop table if exists t1; --enable_warnings # Index.xml has some intentional errors in collation definitions. # They are reported to the server error log. Let's suppress them. --disable_query_log call mtr.add_suppression("Syntax error at '\\[strength tertiary\\]'"); call mtr.add_suppression("Can't reset before a primary ignorable character U\\+A48C"); call mtr.add_suppression("Charset id.*trying to replace"); --enable_query_log --echo In the following tests we change the order of letter "b" --echo making it equal to letter "a", and check that it works --echo with all Unicode character sets set names utf8; --replace_result $MYSQL_TEST_DIR MYSQL_TEST_DIR show variables like 'character_sets_dir%'; show collation like 'utf8mb3_phone_ci'; CREATE TABLE t1 ( name VARCHAR(64), phone VARCHAR(64) CHARACTER SET utf8 COLLATE utf8_phone_ci ); INSERT INTO t1 VALUES ('Svoj','+7 912 800 80 02'); INSERT INTO t1 VALUES ('Hf','+7 (912) 800 80 04'); INSERT INTO t1 VALUES ('Bar','+7-912-800-80-01'); INSERT INTO t1 VALUES ('Ramil','(7912) 800 80 03'); INSERT INTO t1 VALUES ('Sanja','+380 (912) 8008005'); SELECT * FROM t1 ORDER BY phone; SELECT * FROM t1 WHERE phone='+7(912)800-80-01'; SELECT * FROM t1 WHERE phone='79128008001'; SELECT * FROM t1 WHERE phone='7 9 1 2 8 0 0 8 0 0 1'; DROP TABLE t1; show collation like 'utf8mb3_test_ci'; create table t1 (c1 char(1) character set utf8 collate utf8_test_ci); insert into t1 values ('a'); select * from t1 where c1='b'; drop table t1; show collation like 'ucs2_test_ci'; create table t1 (c1 char(1) character set ucs2 collate ucs2_test_ci); insert into t1 values ('a'); select * from t1 where c1='b'; drop table t1; show collation like 'utf8mb4_test_ci'; create table t1 (c1 char(1) character set utf8mb4 collate utf8mb4_test_ci); insert into t1 values ('a'); select * from t1 where c1='b'; drop table t1; show collation like 'utf16_test_ci'; create table t1 (c1 char(1) character set utf16 collate utf16_test_ci); insert into t1 values ('a'); select * from t1 where c1='b'; drop table t1; show collation like 'utf32_test_ci'; create table t1 (c1 char(1) character set utf32 collate utf32_test_ci); insert into t1 values ('a'); select * from t1 where c1='b'; drop table t1; ### ### TODO: activate this when utf8_unicode_520_ci is merged ### ### make sure utf8_test_ci is Unicode-5.0.0 ##SELECT hex(weight_string(_utf8mb4'a' collate utf8mb4_test_ci)); ##SELECT hex(weight_string(convert(_utf32 0x10002 using utf8mb4) collate utf8mb4_test_ci)); ##SELECT hex(@a:=convert(_utf32 0x10400 using utf8mb4) collate utf8mb4_test_ci), hex(lower(@a)); ##SELECT hex(@a:=convert(_utf32 0x10428 using utf8mb4) collate utf8mb4_test_ci), hex(upper(@a)); ##SELECT hex(@a:=convert(_utf32 0x2C00 using utf8mb4) collate utf8mb4_test_ci), hex(lower(@a)); ##SELECT hex(@a:=convert(_utf32 0x2C30 using utf8mb4) collate utf8mb4_test_ci), hex(upper(@a)); ### check that it works with supplementary characters ##SELECT hex(weight_string(convert(_utf32 0x61 using utf8mb4) collate utf8mb4_test_ci)); ##SELECT hex(weight_string(convert(_utf32 0x62 using utf8mb4) collate utf8mb4_test_ci)); ##SELECT hex(weight_string(convert(_utf32 0x10062 using utf8mb4) collate utf8mb4_test_ci)); ##SELECT hex(weight_string(convert(_utf32 0x10400 using utf8mb4) collate utf8mb4_test_ci)); ##SELECT hex(weight_string(convert(_utf32 0x100400 using utf8mb4) collate utf8mb4_test_ci)); ### check contractions with non-ascii characters ##SELECT hex(weight_string(_utf8mb4 0x64 collate utf8mb4_test_ci)); ##SELECT hex(weight_string(convert(_ucs2 0x0064017e using utf8mb4) collate utf8mb4_test_ci)); ##SELECT hex(weight_string(convert(_ucs2 0x0044017e using utf8mb4) collate utf8mb4_test_ci)); ##SELECT hex(weight_string(convert(_ucs2 0x0044017d using utf8mb4) collate utf8mb4_test_ci)); --echo # --echo # Bug#45645 Mysql server close all connection and restart using lower function --echo # CREATE TABLE t1 (a VARCHAR(10)) CHARACTER SET utf8 COLLATE utf8_test_ci; INSERT INTO t1 (a) VALUES ('hello!'); SELECT * FROM t1 WHERE LOWER(a)=LOWER('N'); DROP TABLE t1; --echo # --echo # Bug#51976 LDML collations issue (cyrillic example) --echo # CREATE TABLE t1 (a VARCHAR(10) CHARACTER SET utf8 COLLATE utf8_test_ci); INSERT INTO t1 (a) VALUES ('Hello'); SELECT a, UPPER(a), LOWER(a) FROM t1; DROP TABLE t1; --echo # --echo # Bug#43827 Server closes connections and restarts --echo # # Crash happened with a user-defined utf8 collation, # on attempt to insert a string longer than the column can store. CREATE TABLE t1 (c1 VARCHAR(10) CHARACTER SET utf8 COLLATE utf8_test_ci); INSERT IGNORE INTO t1 SELECT REPEAT('a',11); DROP TABLE t1; # # Vietnamese experimental collation # --echo Vietnamese experimental collation show collation like 'ucs2_vn_ci'; create table t1 (c1 char(1) character set ucs2 collate ucs2_vn_ci); insert into t1 values (0x0061),(0x0041),(0x00E0),(0x00C0),(0x1EA3),(0x1EA2), (0x00E3),(0x00C3),(0x00E1),(0x00C1),(0x1EA1),(0x1EA0); insert into t1 values (0x0103),(0x0102),(0x1EB1),(0x1EB0),(0x1EB3),(0x1EB2), (0x1EB5),(0x1EB4),(0x1EAF),(0x1EAE),(0x1EB7),(0x1EB6); insert into t1 values (0x00E2),(0x00C2),(0x1EA7),(0x1EA6),(0x1EA9),(0x1EA8), (0x1EAB),(0x1EAA),(0x1EA5),(0x1EA4),(0x1EAD),(0x1EAC); insert into t1 values ('b'),('B'),('c'),('C'); insert into t1 values ('d'),('D'),(0x0111),(0x0110); insert into t1 values (0x0065),(0x0045),(0x00E8),(0x00C8),(0x1EBB),(0x1EBA), (0x1EBD),(0x1EBC),(0x00E9),(0x00C9),(0x1EB9),(0x1EB8); insert into t1 values (0x00EA),(0x00CA),(0x1EC1),(0x1EC0),(0x1EC3),(0x1EC2), (0x1EC5),(0x1EC4),(0x1EBF),(0x1EBE),(0x1EC7),(0x1EC6); insert into t1 values ('g'),('G'),('h'),('H'); insert into t1 values (0x0069),(0x0049),(0x00EC),(0x00CC),(0x1EC9),(0x1EC8), (0x0129),(0x0128),(0x00ED),(0x00CD),(0x1ECB),(0x1ECA); insert into t1 values ('k'),('K'),('l'),('L'),('m'),('M'); insert into t1 values (0x006F),(0x004F),(0x00F2),(0x00D2),(0x1ECF),(0x1ECE), (0x00F5),(0x00D5),(0x00F3),(0x00D3),(0x1ECD),(0x1ECC); insert into t1 values (0x00F4),(0x00D4),(0x1ED3),(0x1ED2),(0x1ED5),(0x1ED4), (0x1ED7),(0x1ED6),(0x1ED1),(0x1ED0),(0x1ED9),(0x1ED8); insert into t1 values (0x01A1),(0x01A0),(0x1EDD),(0x1EDC),(0x1EDF),(0x1EDE), (0x1EE1),(0x1EE0),(0x1EDB),(0x1EDA),(0x1EE3),(0x1EE2); insert into t1 values ('p'),('P'),('q'),('Q'),('r'),('R'),('s'),('S'),('t'),('T'); insert into t1 values (0x0075),(0x0055),(0x00F9),(0x00D9),(0x1EE7),(0x1EE6), (0x0169),(0x0168),(0x00FA),(0x00DA),(0x1EE5),(0x1EE4); insert into t1 values (0x01B0),(0x01AF),(0x1EEB),(0x1EEA),(0x1EED),(0x1EEC), (0x1EEF),(0x1EEE),(0x1EE9),(0x1EE8),(0x1EF1),(0x1EF0); insert into t1 values ('v'),('V'),('x'),('X'); insert into t1 values (0x0079),(0x0059),(0x1EF3),(0x1EF2),(0x1EF7),(0x1EF6), (0x1EF9),(0x1EF8),(0x00FD),(0x00DD),(0x1EF5),(0x1EF4); select hex(c1) as h, c1 from t1 order by c1, h; select group_concat(hex(c1) order by hex(c1)) from t1 group by c1; select group_concat(c1 order by hex(c1) SEPARATOR '') from t1 group by c1; drop table t1; --echo Bug#46448 trailing spaces are not ignored when user collation maps space != 0x20 set names latin1; show collation like 'latin1_test'; select "foo" = "foo " collate latin1_test; -- echo The following tests check that two-byte collation IDs work # The file ../std-data/Index.xml has a number of collations with high IDs. # Test that the "ID" column in I_S and SHOW queries can handle two bytes select * from information_schema.collations where id>256 and is_compiled<>'Yes' order by id; show collation like '%test%'; # Test that two-byte collation ID is correctly transfered to the client side. show collation like 'ucs2_vn_ci'; create table t1 (c1 char(1) character set ucs2 collate ucs2_vn_ci); show create table t1; insert into t1 values (0x0061); --enable_metadata set @@character_set_results=NULL; select * from t1; --disable_metadata drop table t1; # # Check maximum collation ID (2047 as of MySQL-6.0.9) # CREATE TABLE t1 (s1 char(10) character set utf8 collate utf8_maxuserid_ci); INSERT INTO t1 VALUES ('a'),('b'); SELECT * FROM t1 WHERE s1='a' ORDER BY BINARY s1; DROP TABLE t1; # # Bug#47756 Setting 2byte collation ID with 'set names' crashes the server # SET NAMES utf8 COLLATE utf8_phone_ci; show collation like 'utf8mb3_phone_ci'; SET NAMES utf8; # make sure utf8mb4_test_400_ci is Unicode-4.0.0 based SELECT hex(weight_string(_utf8mb4'a' collate utf8mb4_test_400_ci)); SELECT hex(weight_string(convert(_utf32 0x10002 using utf8mb4) collate utf8mb4_test_400_ci)); SELECT hex(@a:=convert(_utf32 0x10400 using utf8mb4) collate utf8mb4_test_400_ci), hex(lower(@a)); SELECT hex(@a:=convert(_utf32 0x10428 using utf8mb4) collate utf8mb4_test_400_ci), hex(upper(@a)); SELECT hex(@a:=convert(_utf32 0x2C00 using utf8mb4) collate utf8mb4_test_400_ci), hex(lower(@a)); SELECT hex(@a:=convert(_utf32 0x2C30 using utf8mb4) collate utf8mb4_test_400_ci), hex(upper(@a)); --echo # --echo # WL#5624 Collation customization improvements --echo # SET NAMES utf8 COLLATE utf8_5624_1; CREATE TABLE t1 AS SELECT REPEAT(' ', 16) AS a LIMIT 0; # Part 1,2,3: long contractions and expansions # Part 7: Quarternary difference INSERT INTO t1 VALUES ('012345'),('001234'),('000123'),('000012'),('000001'); INSERT INTO t1 VALUES ('12345'),('01234'),('00123'),('00012'),('00001'); INSERT INTO t1 VALUES ('1234'),('0123'),('0012'),('0001'); INSERT INTO t1 VALUES ('123'),('012'),('001'); INSERT INTO t1 VALUES ('12'),('01'); INSERT INTO t1 VALUES ('1'),('9'); INSERT INTO t1 VALUES ('ГАИ'),('ГИБДД'); # Part 4: reset before # Part 6: characters rather than escape sequences INSERT INTO t1 VALUES ('a'),('b'),('c'),('d'),('e'); INSERT INTO t1 VALUES ('cz'),('Ċ'),('ċ'); INSERT INTO t1 VALUES ('f'),('fz'),('g'),('Ġ'),('ġ'); INSERT INTO t1 VALUES ('h'),('hz'),('GĦ'),('Għ'),('gĦ'),('għ'); INSERT INTO t1 VALUES ('i'),('iz'),('Ħ'),('ħ'); INSERT INTO t1 VALUES ('y'),('yz'),('z'),('Ż'),('ż'); INSERT INTO t1 VALUES ('ā'),('Ā'),('á'),('Á'),('à'),('À'); INSERT INTO t1 VALUES ('ē'),('é'),('ě'),('ê'),('Ē'),('É'),('Ě'),('Ê'); # Part 8: Abbreviated shift syntax INSERT INTO t1 VALUES ('a'),('~'),('!'),('@'),('#'),('$'),('%'),('^'); INSERT INTO t1 VALUES ('('),(')'),('-'),('+'),('|'),('='),(':'),(';'); INSERT INTO t1 VALUES ('"'),('\''),('?'); # Part 9: Normal expansion syntax INSERT INTO t1 VALUES ('ch'),('k'),('cs'),('ccs'),('cscs'); # Part 10: Previous context INSERT INTO t1 VALUES ('aa-'),('ab-'),('ac-'),('ad-'),('ae-'),('af-'),('az-'); # Part 12: Logical reset positions INSERT INTO t1 VALUES ('lp-fni'),('lp-lni'); INSERT INTO t1 VALUES ('lp-fpi'),('lp-lpi'); INSERT INTO t1 VALUES ('lp-fsi'),('lp-lsi'); INSERT INTO t1 VALUES ('lp-fti'),('lp-lti'); INSERT INTO t1 VALUES ('lp-ft'),('lp-lt'); INSERT INTO t1 VALUES ('lp-fv'),('lp-lv'); # Logical positions with reset before INSERT INTO t1 VALUES ('lb-fni'),('lb-lni'); INSERT INTO t1 VALUES ('lb-fv'),('lb-lv'); # Part 5: Long tailoring INSERT INTO t1 VALUES (_ucs2 0x3106),(_ucs2 0x3110), (_ucs2 0x3111), (_ucs2 0x3112); INSERT INTO t1 VALUES (_ucs2 0x32A3), (_ucs2 0x3231); INSERT INTO t1 VALUES (_ucs2 0x84D9), (_ucs2 0x98F5), (_ucs2 0x7CF3), (_ucs2 0x5497); SELECT a, HEX(WEIGHT_STRING(a)) FROM t1 ORDER BY a, LENGTH(a), BINARY a; --echo # --echo # WL#5624, the same test with UCS2 --echo # ALTER TABLE t1 CONVERT TO CHARACTER SET ucs2 COLLATE ucs2_5624_1; SELECT a, HEX(WEIGHT_STRING(a)) FROM t1 ORDER BY a, LENGTH(a), BINARY(a); DROP TABLE t1; --echo # --echo # WL#5624, unsupported features --echo # # Part 13: More verbosity --error ER_UNKNOWN_COLLATION SET NAMES utf8 COLLATE utf8_5624_2; SHOW WARNINGS; --echo # --echo # WL#5624, reset before primary ignorable --echo # --error ER_UNKNOWN_COLLATION SET NAMES utf8 COLLATE utf8_5624_3; SHOW WARNINGS; --echo # --echo # WL#5624, \u without hex digits is equal to {'\\', 'u'} --echo # SET NAMES utf8 COLLATE utf8_5624_4; CREATE TABLE t1 AS SELECT REPEAT(' ', 10) AS a LIMIT 0; INSERT INTO t1 VALUES ('\\'),('u'),('x'),('X'); SELECT a, HEX(WEIGHT_STRING(a)) FROM t1 ORDER BY a, LENGTH(a), BINARY(a); DROP TABLE t1; --echo # --echo # WL#5624, testing Bengali collations --echo # SET NAMES utf8, collation_connection=utf8_bengali_standard_ci; CREATE TABLE t1 AS SELECT REPEAT (' ', 10) AS a LIMIT 0; INSERT INTO t1 VALUES (_ucs2 0x09FA), (_ucs2 0x09F8), (_ucs2 0x09F9), (_ucs2 0x09F2); INSERT INTO t1 VALUES (_ucs2 0x09DC), (_ucs2 0x09A109BC); INSERT INTO t1 VALUES (_ucs2 0x09A2), (_ucs2 0x09DD), (_ucs2 0x09A209BC); INSERT INTO t1 VALUES (_ucs2 0x09A3); SELECT HEX(WEIGHT_STRING(a)), HEX(CONVERT(a USING ucs2)), HEX(a) FROM t1 ORDER BY a, BINARY a; DROP TABLE t1; SET NAMES utf8, collation_connection=utf8_bengali_traditional_ci; CREATE TABLE t1 AS SELECT REPEAT (' ', 10) AS a LIMIT 0; INSERT INTO t1 VALUES (_ucs2 0x0985),(_ucs2 0x0986),(_ucs2 0x0987),(_ucs2 0x0988), (_ucs2 0x0989),(_ucs2 0x098A),(_ucs2 0x098B),(_ucs2 0x09E0), (_ucs2 0x098C),(_ucs2 0x09E1),(_ucs2 0x098F),(_ucs2 0x0990), (_ucs2 0x0993); INSERT INTO t1 VALUES (_ucs2 0x0994),(_ucs2 0x0982),(_ucs2 0x0983),(_ucs2 0x0981), (_ucs2 0x099509CD), (_ucs2 0x099609CD), (_ucs2 0x099709CD), (_ucs2 0x099809CD), (_ucs2 0x099909CD), (_ucs2 0x099A09CD), (_ucs2 0x099B09CD), (_ucs2 0x099C09CD), (_ucs2 0x099D09CD), (_ucs2 0x099E09CD), (_ucs2 0x099F09CD), (_ucs2 0x09A009CD), (_ucs2 0x09A109CD), (_ucs2 0x09A209CD), (_ucs2 0x09A309CD), (_ucs2 0x09CE), (_ucs2 0x09A409CD200D), (_ucs2 0x09A409CD), (_ucs2 0x09A509CD),(_ucs2 0x09A609CD), (_ucs2 0x09A709CD), (_ucs2 0x09A809CD), (_ucs2 0x09AA09CD), (_ucs2 0x09AB09CD), (_ucs2 0x09AC09CD), (_ucs2 0x09AD09CD), (_ucs2 0x09AE09CD), (_ucs2 0x09AF09CD), (_ucs2 0x09B009CD), (_ucs2 0x09F009CD), (_ucs2 0x09B209CD), (_ucs2 0x09F109CD), (_ucs2 0x09B609CD), (_ucs2 0x09B709CD), (_ucs2 0x09B809CD), (_ucs2 0x09B909CD); INSERT INTO t1 VALUES (_ucs2 0x099509CD0985),(_ucs2 0x0995), (_ucs2 0x099509CD0986),(_ucs2 0x099509BE), (_ucs2 0x099509CD0987),(_ucs2 0x099509BF), (_ucs2 0x099509CD0988),(_ucs2 0x099509C0), (_ucs2 0x099509CD0989),(_ucs2 0x099509C1), (_ucs2 0x099509CD098A),(_ucs2 0x099509C2), (_ucs2 0x099509CD098B),(_ucs2 0x099509C3), (_ucs2 0x099509CD09E0),(_ucs2 0x099509C4), (_ucs2 0x099509CD098C),(_ucs2 0x099509E2), (_ucs2 0x099509CD09E1),(_ucs2 0x099509E3), (_ucs2 0x099509CD098F),(_ucs2 0x099509C7), (_ucs2 0x099509CD0990),(_ucs2 0x099509C8), (_ucs2 0x099509CD0993),(_ucs2 0x099509CB), (_ucs2 0x099509CD0994),(_ucs2 0x099509CC); SELECT HEX(WEIGHT_STRING(a)), HEX(CONVERT(a USING ucs2)), HEX(a) FROM t1 ORDER BY a, BINARY(a); SELECT HEX(WEIGHT_STRING(a)) as wa, GROUP_CONCAT(HEX(CONVERT(a USING ucs2)) ORDER BY LENGTH(a), BINARY a) FROM t1 GROUP BY a ORDER BY a; DROP TABLE t1; --echo # --echo # WL#5624, shift after, using expansion --echo # SET NAMES utf8 COLLATE utf8_5624_5; CREATE TABLE t1 AS SELECT REPEAT(' ', 10) AS a LIMIT 0; INSERT INTO t1 VALUES ('0'),('1'),('0z'),(_ucs2 0x0030FF9D); INSERT INTO t1 VALUES ('a'),('b'),('c'),('d'),('e'),('f'),('g'),('h'),('i'); INSERT INTO t1 VALUES ('j'),('k'),('l'),('m'),('n'),('o'),('p'),('q'),('r'); INSERT INTO t1 VALUES ('s'),('t'),('u'),('v'),('w'),('x'),('y'),('z'); INSERT INTO t1 VALUES ('aa'),('aaa'); INSERT INTO t1 VALUES ('A'),('B'),('C'),('D'),('E'),('F'),('G'),('H'),('I'); INSERT INTO t1 VALUES ('J'),('K'),('L'),('M'),('N'),('O'),('P'),('Q'),('R'); INSERT INTO t1 VALUES ('S'),('T'),('U'),('V'),('W'),('X'),('Y'),('Z'); INSERT INTO t1 VALUES ('AA'),('AAA'); SELECT a, HEX(WEIGHT_STRING(a)) FROM t1 ORDER BY a, LENGTH(a), BINARY(a); DROP TABLE t1; --echo # --echo # End of WL#5624 --echo #