--source include/have_ucs2.inc --source include/have_utf8mb4.inc --source include/have_utf16.inc --source include/have_utf32.inc #remove this include after fix MDEV-27871 --source include/no_view_protocol.inc --disable_query_log call mtr.add_suppression("Charset id.*trying to replace"); --enable_query_log --disable_warnings drop table if exists t1; --enable_warnings --echo In the following tests we change the order of letter "b" --echo making it equal to letter "a", and check that it works --echo with all Unicode character sets set names utf8; --replace_result $MYSQL_TEST_DIR MYSQL_TEST_DIR show variables like 'character_sets_dir%'; show collation like 'utf8mb3_phone_ci'; CREATE TABLE t1 ( name VARCHAR(64), phone VARCHAR(64) CHARACTER SET utf8 COLLATE utf8_phone_ci ); INSERT INTO t1 VALUES ('Svoj','+7 912 800 80 02'); INSERT INTO t1 VALUES ('Hf','+7 (912) 800 80 04'); INSERT INTO t1 VALUES ('Bar','+7-912-800-80-01'); INSERT INTO t1 VALUES ('Ramil','(7912) 800 80 03'); INSERT INTO t1 VALUES ('Sanja','+380 (912) 8008005'); SELECT * FROM t1 ORDER BY phone; SELECT * FROM t1 WHERE phone='+7(912)800-80-01'; SELECT * FROM t1 WHERE phone='79128008001'; SELECT * FROM t1 WHERE phone='7 9 1 2 8 0 0 8 0 0 1'; SELECT * FROM t1 WHERE phone='tel.79128008001'; DROP TABLE t1; show collation like 'utf8mb3_test_ci'; create table t1 (c1 char(1) character set utf8 collate utf8_test_ci); insert into t1 values ('a'); select * from t1 where c1='b'; drop table t1; show collation like 'ucs2_test_ci'; create table t1 (c1 char(1) character set ucs2 collate ucs2_test_ci); insert into t1 values ('a'); select * from t1 where c1='b'; drop table t1; show collation like 'utf8mb4_test_ci'; create table t1 (c1 char(1) character set utf8mb4 collate utf8mb4_test_ci); insert into t1 values ('a'); select * from t1 where c1='b'; drop table t1; show collation like 'utf16_test_ci'; create table t1 (c1 char(1) character set utf16 collate utf16_test_ci); insert into t1 values ('a'); select * from t1 where c1='b'; drop table t1; show collation like 'utf32_test_ci'; create table t1 (c1 char(1) character set utf32 collate utf32_test_ci); insert into t1 values ('a'); select * from t1 where c1='b'; drop table t1; # make sure utf8_test_ci is Unicode-5.0.0 SELECT hex(weight_string(_utf8mb4'a' collate utf8mb4_test_ci)); SELECT hex(weight_string(convert(_utf32 0x10002 using utf8mb4) collate utf8mb4_test_ci)); SELECT hex(@a:=convert(_utf32 0x10400 using utf8mb4) collate utf8mb4_test_ci), hex(lower(@a)); SELECT hex(@a:=convert(_utf32 0x10428 using utf8mb4) collate utf8mb4_test_ci), hex(upper(@a)); SELECT hex(@a:=convert(_utf32 0x2C00 using utf8mb4) collate utf8mb4_test_ci), hex(lower(@a)); SELECT hex(@a:=convert(_utf32 0x2C30 using utf8mb4) collate utf8mb4_test_ci), hex(upper(@a)); # check that it works with supplementary characters SELECT hex(weight_string(convert(_utf32 0x61 using utf8mb4) collate utf8mb4_test_ci)); SELECT hex(weight_string(convert(_utf32 0x62 using utf8mb4) collate utf8mb4_test_ci)); SELECT hex(weight_string(convert(_utf32 0x10062 using utf8mb4) collate utf8mb4_test_ci)); SELECT hex(weight_string(convert(_utf32 0x10400 using utf8mb4) collate utf8mb4_test_ci)); SELECT hex(weight_string(convert(_utf32 0x100400 using utf8mb4) collate utf8mb4_test_ci)); # check contractions with non-ascii characters SELECT hex(weight_string(_utf8mb4 0x64 collate utf8mb4_test_ci)); SELECT hex(weight_string(convert(_ucs2 0x0064017e using utf8mb4) collate utf8mb4_test_ci)); SELECT hex(weight_string(convert(_ucs2 0x0044017e using utf8mb4) collate utf8mb4_test_ci)); SELECT hex(weight_string(convert(_ucs2 0x0044017d using utf8mb4) collate utf8mb4_test_ci)); # # Bug#41084 full-text index added to custom UCA collation not working # CREATE TABLE t1 ( col1 varchar(100) character set utf8 collate utf8_test_ci ); INSERT INTO t1 (col1) VALUES ('abcd'),('efgh'),('ijkl'); ALTER TABLE t1 ADD FULLTEXT INDEX (col1); SELECT * FROM t1 where match (col1) against ('abcd'); SELECT * FROM t1 where match (col1) against ('abcd' IN BOOLEAN MODE); ALTER TABLE t1 ADD (col2 varchar(100) character set latin1); UPDATE t1 SET col2=col1; SELECT * FROM t1 WHERE col1=col2 ORDER BY col1; DROP TABLE t1; --echo # --echo # Bug#45645 Mysql server close all connection and restart using lower function --echo # CREATE TABLE t1 (a VARCHAR(10)) CHARACTER SET utf8 COLLATE utf8_test_ci; INSERT INTO t1 (a) VALUES ('hello!'); SELECT * FROM t1 WHERE LOWER(a)=LOWER('N'); DROP TABLE t1; --echo # --echo # Bug#51976 LDML collations issue (cyrillic example) --echo # CREATE TABLE t1 (a VARCHAR(10) CHARACTER SET utf8 COLLATE utf8_test_ci); INSERT INTO t1 (a) VALUES ('Hello'); SELECT a, UPPER(a), LOWER(a) FROM t1; DROP TABLE t1; --echo # --echo # Bug#43827 Server closes connections and restarts --echo # # Crash happened with a user-defined utf8 collation, # on attempt to insert a string longer than the column can store. CREATE TABLE t1 (c1 VARCHAR(10) CHARACTER SET utf8 COLLATE utf8_test_ci); INSERT IGNORE INTO t1 SELECT REPEAT('a',11); DROP TABLE t1; # # Vietnamese experimental collation # --echo Vietnamese experimental collation show collation like 'ucs2_vn_ci'; create table t1 (c1 char(1) character set ucs2 collate ucs2_vn_ci); insert into t1 values (0x0061),(0x0041),(0x00E0),(0x00C0),(0x1EA3),(0x1EA2), (0x00E3),(0x00C3),(0x00E1),(0x00C1),(0x1EA1),(0x1EA0); insert into t1 values (0x0103),(0x0102),(0x1EB1),(0x1EB0),(0x1EB3),(0x1EB2), (0x1EB5),(0x1EB4),(0x1EAF),(0x1EAE),(0x1EB7),(0x1EB6); insert into t1 values (0x00E2),(0x00C2),(0x1EA7),(0x1EA6),(0x1EA9),(0x1EA8), (0x1EAB),(0x1EAA),(0x1EA5),(0x1EA4),(0x1EAD),(0x1EAC); insert into t1 values ('b'),('B'),('c'),('C'); insert into t1 values ('d'),('D'),(0x0111),(0x0110); insert into t1 values (0x0065),(0x0045),(0x00E8),(0x00C8),(0x1EBB),(0x1EBA), (0x1EBD),(0x1EBC),(0x00E9),(0x00C9),(0x1EB9),(0x1EB8); insert into t1 values (0x00EA),(0x00CA),(0x1EC1),(0x1EC0),(0x1EC3),(0x1EC2), (0x1EC5),(0x1EC4),(0x1EBF),(0x1EBE),(0x1EC7),(0x1EC6); insert into t1 values ('g'),('G'),('h'),('H'); insert into t1 values (0x0069),(0x0049),(0x00EC),(0x00CC),(0x1EC9),(0x1EC8), (0x0129),(0x0128),(0x00ED),(0x00CD),(0x1ECB),(0x1ECA); insert into t1 values ('k'),('K'),('l'),('L'),('m'),('M'); insert into t1 values (0x006F),(0x004F),(0x00F2),(0x00D2),(0x1ECF),(0x1ECE), (0x00F5),(0x00D5),(0x00F3),(0x00D3),(0x1ECD),(0x1ECC); insert into t1 values (0x00F4),(0x00D4),(0x1ED3),(0x1ED2),(0x1ED5),(0x1ED4), (0x1ED7),(0x1ED6),(0x1ED1),(0x1ED0),(0x1ED9),(0x1ED8); insert into t1 values (0x01A1),(0x01A0),(0x1EDD),(0x1EDC),(0x1EDF),(0x1EDE), (0x1EE1),(0x1EE0),(0x1EDB),(0x1EDA),(0x1EE3),(0x1EE2); insert into t1 values ('p'),('P'),('q'),('Q'),('r'),('R'),('s'),('S'),('t'),('T'); insert into t1 values (0x0075),(0x0055),(0x00F9),(0x00D9),(0x1EE7),(0x1EE6), (0x0169),(0x0168),(0x00FA),(0x00DA),(0x1EE5),(0x1EE4); insert into t1 values (0x01B0),(0x01AF),(0x1EEB),(0x1EEA),(0x1EED),(0x1EEC), (0x1EEF),(0x1EEE),(0x1EE9),(0x1EE8),(0x1EF1),(0x1EF0); insert into t1 values ('v'),('V'),('x'),('X'); insert into t1 values (0x0079),(0x0059),(0x1EF3),(0x1EF2),(0x1EF7),(0x1EF6), (0x1EF9),(0x1EF8),(0x00FD),(0x00DD),(0x1EF5),(0x1EF4); select hex(c1) as h, c1 from t1 order by c1, h; select group_concat(hex(c1) order by hex(c1)) from t1 group by c1; select group_concat(c1 order by hex(c1) SEPARATOR '') from t1 group by c1; drop table t1; --echo Bug#46448 trailing spaces are not ignored when user collation maps space != 0x20 set names latin1; show collation like 'latin1_test'; select "foo" = "foo " collate latin1_test; -- echo The following tests check that two-byte collation IDs work # The file ../std-data/Index.xml has a number of collations with high IDs. # Test that the "ID" column in I_S and SHOW queries can handle two bytes select * from information_schema.collations where id>256 and is_compiled<>'Yes' order by id; show collation like '%test%'; # Test that two-byte collation ID is correctly transfered to the client side. show collation like 'ucs2_vn_ci'; create table t1 (c1 char(1) character set ucs2 collate ucs2_vn_ci); insert into t1 values (0x0061); --enable_metadata set @@character_set_results=NULL; select * from t1; --disable_metadata drop table t1; # # Check maximum collation ID (2047 as of MySQL-6.0.9) # CREATE TABLE t1 (s1 char(10) character set utf8 collate utf8_maxuserid_ci); INSERT INTO t1 VALUES ('a'),('b'); SELECT * FROM t1 WHERE s1='a' ORDER BY BINARY s1; DROP TABLE t1; # # Bug#47756 Setting 2byte collation ID with 'set names' crashes the server # SET NAMES utf8 COLLATE utf8_phone_ci; show collation like 'utf8mb3_phone_ci'; SET NAMES utf8; # make sure utf8mb4_test_400_ci is Unicode-4.0.0 based SELECT hex(weight_string(_utf8mb4'a' collate utf8mb4_test_400_ci)); SELECT hex(weight_string(convert(_utf32 0x10002 using utf8mb4) collate utf8mb4_test_400_ci)); SELECT hex(@a:=convert(_utf32 0x10400 using utf8mb4) collate utf8mb4_test_400_ci), hex(lower(@a)); SELECT hex(@a:=convert(_utf32 0x10428 using utf8mb4) collate utf8mb4_test_400_ci), hex(upper(@a)); SELECT hex(@a:=convert(_utf32 0x2C00 using utf8mb4) collate utf8mb4_test_400_ci), hex(lower(@a)); SELECT hex(@a:=convert(_utf32 0x2C30 using utf8mb4) collate utf8mb4_test_400_ci), hex(upper(@a)); --echo # --echo # WL#5624 Collation customization improvements --echo # SET NAMES utf8 COLLATE utf8_5624_1; CREATE TABLE t1 AS SELECT REPEAT(' ', 16) AS a LIMIT 0; # Part 1,2,3: long contractions and expansions # Part 7: Quarternary difference INSERT INTO t1 VALUES ('012345'),('001234'),('000123'),('000012'),('000001'); INSERT INTO t1 VALUES ('12345'),('01234'),('00123'),('00012'),('00001'); INSERT INTO t1 VALUES ('1234'),('0123'),('0012'),('0001'); INSERT INTO t1 VALUES ('123'),('012'),('001'); INSERT INTO t1 VALUES ('12'),('01'); INSERT INTO t1 VALUES ('1'),('9'); INSERT INTO t1 VALUES ('ГАИ'),('ГИБДД'); # Part 4: reset before # Part 6: characters rather than escape sequences INSERT INTO t1 VALUES ('a'),('b'),('c'),('d'),('e'); INSERT INTO t1 VALUES ('cz'),('Ċ'),('ċ'); INSERT INTO t1 VALUES ('f'),('fz'),('g'),('Ġ'),('ġ'); INSERT INTO t1 VALUES ('h'),('hz'),('GĦ'),('Għ'),('gĦ'),('għ'); INSERT INTO t1 VALUES ('i'),('iz'),('Ħ'),('ħ'); INSERT INTO t1 VALUES ('y'),('yz'),('z'),('Ż'),('ż'); INSERT INTO t1 VALUES ('ā'),('Ā'),('á'),('Á'),('à'),('À'); INSERT INTO t1 VALUES ('ē'),('é'),('ě'),('ê'),('Ē'),('É'),('Ě'),('Ê'); # Part 8: Abbreviated shift syntax INSERT INTO t1 VALUES ('a'),('~'),('!'),('@'),('#'),('$'),('%'),('^'); INSERT INTO t1 VALUES ('('),(')'),('-'),('+'),('|'),('='),(':'),(';'); INSERT INTO t1 VALUES ('"'),('\''),('?'); # Part 9: Normal expansion syntax INSERT INTO t1 VALUES ('ch'),('k'),('cs'),('ccs'),('cscs'); # Part 10: Previous context INSERT INTO t1 VALUES ('aa-'),('ab-'),('ac-'),('ad-'),('ae-'),('af-'),('az-'); # Part 12: Logical reset positions INSERT INTO t1 VALUES ('lp-fni'),('lp-lni'); INSERT INTO t1 VALUES ('lp-fpi'),('lp-lpi'); INSERT INTO t1 VALUES ('lp-fsi'),('lp-lsi'); INSERT INTO t1 VALUES ('lp-fti'),('lp-lti'); INSERT INTO t1 VALUES ('lp-ft'),('lp-lt'); INSERT INTO t1 VALUES ('lp-fv'),('lp-lv'); # Logical positions with reset before INSERT INTO t1 VALUES ('lb-fni'),('lb-lni'); INSERT INTO t1 VALUES ('lb-fv'),('lb-lv'); # Part 5: Long tailoring INSERT INTO t1 VALUES (_ucs2 0x3106),(_ucs2 0x3110), (_ucs2 0x3111), (_ucs2 0x3112); INSERT INTO t1 VALUES (_ucs2 0x32A3), (_ucs2 0x3231); INSERT INTO t1 VALUES (_ucs2 0x84D9), (_ucs2 0x98F5), (_ucs2 0x7CF3), (_ucs2 0x5497); SELECT a, HEX(WEIGHT_STRING(a)) FROM t1 ORDER BY a, LENGTH(a), BINARY a; --echo # --echo # WL#5624, the same test with UCS2 --echo # ALTER TABLE t1 CONVERT TO CHARACTER SET ucs2 COLLATE ucs2_5624_1; SELECT a, HEX(WEIGHT_STRING(a)) FROM t1 ORDER BY a, LENGTH(a), BINARY(a); DROP TABLE t1; --echo # --echo # WL#5624, unsupported features --echo # # Part 13: More verbosity --error ER_UNKNOWN_COLLATION SET NAMES utf8 COLLATE utf8_5624_2; SHOW WARNINGS; --error ER_UNKNOWN_COLLATION SELECT _utf8'test' COLLATE utf8_5624_2; SHOW WARNINGS; --echo # --echo # WL#5624, reset before primary ignorable --echo # --error ER_UNKNOWN_COLLATION SET NAMES utf8 COLLATE utf8_5624_3; SHOW WARNINGS; --echo # --echo # WL#5624, \u without hex digits is equal to {'\\', 'u'} --echo # SET NAMES utf8 COLLATE utf8_5624_4; CREATE TABLE t1 AS SELECT REPEAT(' ', 10) AS a LIMIT 0; INSERT INTO t1 VALUES ('\\'),('u'),('x'),('X'); SELECT a, HEX(WEIGHT_STRING(a)) FROM t1 ORDER BY a, LENGTH(a), BINARY(a); DROP TABLE t1; --echo # --echo # WL#5624, testing Bengali collations --echo # SET NAMES utf8, collation_connection=utf8_bengali_standard_ci; CREATE TABLE t1 AS SELECT REPEAT (' ', 10) AS a LIMIT 0; INSERT INTO t1 VALUES (_ucs2 0x09FA), (_ucs2 0x09F8), (_ucs2 0x09F9), (_ucs2 0x09F2); INSERT INTO t1 VALUES (_ucs2 0x09DC), (_ucs2 0x09A109BC); INSERT INTO t1 VALUES (_ucs2 0x09A2), (_ucs2 0x09DD), (_ucs2 0x09A209BC); INSERT INTO t1 VALUES (_ucs2 0x09A3); SELECT HEX(WEIGHT_STRING(a)), HEX(CONVERT(a USING ucs2)), HEX(a) FROM t1 ORDER BY a, BINARY a; DROP TABLE t1; SET NAMES utf8, collation_connection=utf8_bengali_traditional_ci; CREATE TABLE t1 AS SELECT REPEAT (' ', 10) AS a LIMIT 0; INSERT INTO t1 VALUES (_ucs2 0x0985),(_ucs2 0x0986),(_ucs2 0x0987),(_ucs2 0x0988), (_ucs2 0x0989),(_ucs2 0x098A),(_ucs2 0x098B),(_ucs2 0x09E0), (_ucs2 0x098C),(_ucs2 0x09E1),(_ucs2 0x098F),(_ucs2 0x0990), (_ucs2 0x0993); INSERT INTO t1 VALUES (_ucs2 0x0994),(_ucs2 0x0982),(_ucs2 0x0983),(_ucs2 0x0981), (_ucs2 0x099509CD), (_ucs2 0x099609CD), (_ucs2 0x099709CD), (_ucs2 0x099809CD), (_ucs2 0x099909CD), (_ucs2 0x099A09CD), (_ucs2 0x099B09CD), (_ucs2 0x099C09CD), (_ucs2 0x099D09CD), (_ucs2 0x099E09CD), (_ucs2 0x099F09CD), (_ucs2 0x09A009CD), (_ucs2 0x09A109CD), (_ucs2 0x09A209CD), (_ucs2 0x09A309CD), (_ucs2 0x09CE), (_ucs2 0x09A409CD200D), (_ucs2 0x09A409CD), (_ucs2 0x09A509CD),(_ucs2 0x09A609CD), (_ucs2 0x09A709CD), (_ucs2 0x09A809CD), (_ucs2 0x09AA09CD), (_ucs2 0x09AB09CD), (_ucs2 0x09AC09CD), (_ucs2 0x09AD09CD), (_ucs2 0x09AE09CD), (_ucs2 0x09AF09CD), (_ucs2 0x09B009CD), (_ucs2 0x09F009CD), (_ucs2 0x09B209CD), (_ucs2 0x09F109CD), (_ucs2 0x09B609CD), (_ucs2 0x09B709CD), (_ucs2 0x09B809CD), (_ucs2 0x09B909CD); INSERT INTO t1 VALUES (_ucs2 0x099509CD0985),(_ucs2 0x0995), (_ucs2 0x099509CD0986),(_ucs2 0x099509BE), (_ucs2 0x099509CD0987),(_ucs2 0x099509BF), (_ucs2 0x099509CD0988),(_ucs2 0x099509C0), (_ucs2 0x099509CD0989),(_ucs2 0x099509C1), (_ucs2 0x099509CD098A),(_ucs2 0x099509C2), (_ucs2 0x099509CD098B),(_ucs2 0x099509C3), (_ucs2 0x099509CD09E0),(_ucs2 0x099509C4), (_ucs2 0x099509CD098C),(_ucs2 0x099509E2), (_ucs2 0x099509CD09E1),(_ucs2 0x099509E3), (_ucs2 0x099509CD098F),(_ucs2 0x099509C7), (_ucs2 0x099509CD0990),(_ucs2 0x099509C8), (_ucs2 0x099509CD0993),(_ucs2 0x099509CB), (_ucs2 0x099509CD0994),(_ucs2 0x099509CC); SELECT HEX(WEIGHT_STRING(a)), HEX(CONVERT(a USING ucs2)), HEX(a) FROM t1 ORDER BY a, BINARY(a); SELECT HEX(WEIGHT_STRING(a)) as wa, GROUP_CONCAT(HEX(CONVERT(a USING ucs2)) ORDER BY LENGTH(a), BINARY a) FROM t1 GROUP BY a ORDER BY a; DROP TABLE t1; --echo # --echo # WL#5624, shift after, using expansion --echo # SET NAMES utf8 COLLATE utf8_5624_5; CREATE TABLE t1 AS SELECT REPEAT(' ', 10) AS a LIMIT 0; INSERT INTO t1 VALUES ('0'),('1'),('0z'),(_ucs2 0x0030FF9D); INSERT INTO t1 VALUES ('a'),('b'),('c'),('d'),('e'),('f'),('g'),('h'),('i'); INSERT INTO t1 VALUES ('j'),('k'),('l'),('m'),('n'),('o'),('p'),('q'),('r'); INSERT INTO t1 VALUES ('s'),('t'),('u'),('v'),('w'),('x'),('y'),('z'); INSERT INTO t1 VALUES ('aa'),('aaa'); INSERT INTO t1 VALUES ('A'),('B'),('C'),('D'),('E'),('F'),('G'),('H'),('I'); INSERT INTO t1 VALUES ('J'),('K'),('L'),('M'),('N'),('O'),('P'),('Q'),('R'); INSERT INTO t1 VALUES ('S'),('T'),('U'),('V'),('W'),('X'),('Y'),('Z'); INSERT INTO t1 VALUES ('AA'),('AAA'); INSERT INTO t1 VALUES ('001'),('002'); SELECT a, HEX(WEIGHT_STRING(a)) FROM t1 ORDER BY a, LENGTH(a), BINARY(a); DROP TABLE t1; --error ER_UNKNOWN_COLLATION SET NAMES utf8 COLLATE utf8_5624_5_bad; SHOW WARNINGS; --echo # --echo # End of WL#5624 --echo # --echo # --echo # Bug#14197426 PARSE ERRORS IN LOADABLE UCA / LDML COLLATIONS ARE SILENTLY IGNORED --echo # --let $out_file= $MYSQLTEST_VARDIR/tmp/ctype_ldml_log.err --let OUTF= $out_file # Error messages are not seen in error log in embedded version --let EMBEDDED=`SELECT IF(version() LIKE '%embedded%',2,0)` --echo # Search for occurrences of [ERROR] Syntax error at '[strength tertiary]' perl; use strict; my $outf= $ENV{'OUTF'} or die "OUTF not set"; open(FILE, "$outf") or die("Unable to open $outf: $!\n"); my $count_error= grep(/\[ERROR\] Syntax error at '\[strength tertiary\]'/gi,); my $count_error= $count_error + $ENV{"EMBEDDED"}; print "Occurances : $count_error\n"; close(FILE); EOF --echo # --echo # MDEV-8686 A user defined collation utf8_confusables doesn't work --echo # CREATE TABLE t1 (a VARCHAR(10) CHARACTER SET utf8 COLLATE utf8_implicit_weights_ci); INSERT INTO t1 VALUES ('a'),('b'),('c'); INSERT INTO t1 VALUES (_ucs2 0x1500),(_ucs2 0x1501); INSERT INTO t1 VALUES (_ucs2 0x3400),(_ucs2 0x3560),(_ucs2 0x3561),(_ucs2 0x3600); INSERT INTO t1 VALUES (_ucs2 0x3700),(_ucs2 0x3701); SELECT HEX(CONVERT(a USING ucs2)) AS ch, HEX(WEIGHT_STRING(a)) AS w, HEX(WEIGHT_STRING(a COLLATE utf8_unicode_ci)) AS ducet FROM t1 ORDER BY a,ch; DROP TABLE t1; --echo # --echo # Testing that the MY_CS_PUREASCII flag is set properly --echo # Comparison between ascii2 and latin1 should not give "illegal collation error" --echo # CREATE TABLE t1 (a VARCHAR(10) CHARACTER SET ascii2, b VARCHAR(10) CHARACTER SET latin1); INSERT INTO t1 VALUES ('a','a'),('b','b'); SELECT * FROM t1 WHERE a=b; ALTER TABLE t1 MODIFY a VARCHAR(10) CHARACTER SET ascii2 COLLATE ascii2_bin2; SELECT * FROM t1 WHERE a=b; ALTER TABLE t1 MODIFY a VARCHAR(10) CHARACTER SET ascii2 COLLATE ascii2_bin; SELECT * FROM t1 WHERE a=b; ALTER TABLE t1 MODIFY a VARCHAR(10) CHARACTER SET ascii2 COLLATE ascii2_general_inherited_ci; SELECT * FROM t1 WHERE a=b; ALTER TABLE t1 MODIFY a VARCHAR(10) CHARACTER SET ascii2 COLLATE ascii2_general_inherited2_ci; SELECT * FROM t1 WHERE a=b; DROP TABLE t1; --echo # --echo # Testing that in case of two binary collations --echo # "BINARY" in a column definition uses the collation with the least id --echo # CREATE TABLE t1 (a VARCHAR(10) CHARACTER SET ascii2 BINARY); INSERT INTO t1 VALUES ('test'); SELECT COLLATION(a) FROM t1; DROP TABLE t1; --echo # --echo # Testing mixing of two binary collations of the same character set --echo # CREATE TABLE t1 ( a VARCHAR(10) CHARACTER SET ascii2 COLLATE ascii2_bin, b VARCHAR(10) CHARACTER SET ascii2 COLLATE ascii2_bin2 ); INSERT INTO t1 VALUES ('a','a'); --error ER_CANT_AGGREGATE_2COLLATIONS SELECT * FROM t1 WHERE a=b; DROP TABLE t1; --echo # --echo # Testing bad collation inheritance --echo # --error ER_UNKNOWN_COLLATION CREATE TABLE t1 (a VARCHAR(10) CHARACTER SET ascii2 COLLATE ascii2_badly_inherited_ci); --echo # --echo # Testing that the MY_CS_CSSORT flag is set properly --echo # CREATE TABLE t1 (a VARCHAR(10) CHARACTER SET latin1 COLLATE latin1_test); INSERT INTO t1 VALUES ('a'),('A'); # should be case insensitive SELECT * FROM t1 WHERE a RLIKE 'a'; DROP TABLE t1; CREATE TABLE t1 (a VARCHAR(10) CHARACTER SET latin1 COLLATE latin1_test2_cs); INSERT INTO t1 VALUES ('a'),('A'); # should be case sensitive SELECT * FROM t1 WHERE a RLIKE 'a'; DROP TABLE t1; --echo # --echo # MDEV-9711 NO PAD collations --echo # CREATE TABLE t1 (a VARCHAR(10) CHARACTER SET ascii2 COLLATE ascii2_general_nopad_ci); INSERT INTO t1 VALUES ('a'),('a '),('A'),('A '); SELECT a, a='a', a='a ', HEX(a), LOWER(a), UPPER(a), CONVERT(a USING utf8) FROM t1; ALTER TABLE t1 MODIFY a VARCHAR(10) CHARACTER SET ascii2 COLLATE ascii2_nopad_bin; SELECT a, a='a', a='a ', HEX(a), LOWER(a), UPPER(a), CONVERT(a USING utf8) FROM t1; ALTER TABLE t1 MODIFY a VARCHAR(10) CHARACTER SET latin1 COLLATE latin1_swedish_nopad2_ci; SELECT a, a='a', a='a ', HEX(a), LOWER(a), UPPER(a), CONVERT(a USING utf8) FROM t1; DROP TABLE t1; --echo # --echo # MDEV-10877 xxx_unicode_nopad_ci collations --echo # CREATE TABLE t1 (a VARCHAR(10) CHARACTER SET utf8mb4 COLLATE utf8mb4_test_520_nopad_ci); INSERT INTO t1 VALUES ('a'),('b'),('a '),('b '); SELECT HEX(a), WEIGHT_STRING(a)=WEIGHT_STRING(a COLLATE utf8mb4_unicode_ci) AS is_400, WEIGHT_STRING(a)=WEIGHT_STRING(a COLLATE utf8mb4_unicode_520_ci) AS is_520 FROM t1 ORDER BY a; SELECT COUNT(DISTINCT a) FROM t1; SELECT HEX(a), REPLACE(a,' ','') FROM t1 WHERE a='a'; SELECT HEX(a), REPLACE(a,' ','') FROM t1 ORDER BY a; SELECT HEX(a), REPLACE(a,' ','') FROM t1 ORDER BY a DESC; DROP TABLE t1; SET NAMES utf8 COLLATE utf8_czech_test_w2; CREATE TABLE t1 AS SELECT SPACE(10) AS c1 LIMIT 0; --source include/ctype_unicode_latin.inc INSERT INTO t1 VALUES ('a '); SELECT c1, HEX(WEIGHT_STRING(c1 LEVEL 1)), HEX(WEIGHT_STRING(c1 LEVEL 2)) FROM t1 ORDER BY c1, BINARY c1; SELECT c1, HEX(WEIGHT_STRING(c1 AS CHAR(3) LEVEL 1)), HEX(WEIGHT_STRING(c1 AS CHAR(3) LEVEL 2)) FROM t1 WHERE c1 BETWEEN 'a' AND 'aZ' ORDER BY c1, BINARY c1; DROP TABLE t1; SELECT 'a' = 'a '; SELECT 'a' < 'á'; SELECT 'áa' < 'ab'; SELECT 'á' < 'ä'; SELECT 'äa' < 'áb'; SELECT 'c' < 'č'; SELECT 'cb' < 'ča'; SELECT 'd' < 'ď'; SELECT 'ďa' < 'db'; SELECT 'e' < 'é'; SELECT 'éa' < 'eb'; SELECT 'é' < 'ě'; SELECT 'ěa' < 'éb'; SELECT 'i' < 'í'; SELECT 'ía' < 'ib'; SELECT 'n' < 'ň'; SELECT 'ňa' < 'nb'; SELECT 'o' < 'ó'; SELECT 'óa' < 'ob'; SELECT 'ó' < 'ö'; SELECT 'öa' < 'ób'; SELECT 'r' < 'ř'; SELECT 'rb' < 'řa'; SELECT 's' < 'š'; SELECT 'sb' < 'ša'; SELECT 't' < 'ť'; SELECT 'ťa' < 'tb'; SELECT 'u' < 'ú'; SELECT 'úa' < 'ub'; SELECT 'ú' < 'ů'; SELECT 'ůa' < 'úb'; SELECT 'ů' < 'ü'; SELECT 'üa' < 'ůb'; SELECT 'y' < 'ý'; SELECT 'ýa' < 'yb'; SELECT 'z' < 'ž'; SELECT 'zb' < 'ža'; SELECT 'hž' < 'ch'; SELECT 'chž'< 'i'; SET NAMES utf8 COLLATE utf8_czech_test_nopad_w2; CREATE TABLE t1 AS SELECT SPACE(10) AS c1 LIMIT 0; --source include/ctype_unicode_latin.inc INSERT INTO t1 VALUES ('a '); SELECT c1, HEX(WEIGHT_STRING(c1 LEVEL 1)), HEX(WEIGHT_STRING(c1 LEVEL 2)) FROM t1 ORDER BY c1, BINARY c1; SELECT c1, HEX(WEIGHT_STRING(c1 AS CHAR(3) LEVEL 1)), HEX(WEIGHT_STRING(c1 AS CHAR(3) LEVEL 2)) FROM t1 WHERE c1 BETWEEN 'a' AND 'aZ' ORDER BY c1, BINARY c1; DROP TABLE t1; SELECT 'a' = 'a '; SELECT 'a' < 'á'; SELECT 'áa' < 'ab'; SELECT 'á' < 'ä'; SELECT 'äa' < 'áb'; SELECT 'c' < 'č'; SELECT 'cb' < 'ča'; SELECT 'd' < 'ď'; SELECT 'ďa' < 'db'; SELECT 'e' < 'é'; SELECT 'éa' < 'eb'; SELECT 'é' < 'ě'; SELECT 'ěa' < 'éb'; SELECT 'i' < 'í'; SELECT 'ía' < 'ib'; SELECT 'n' < 'ň'; SELECT 'ňa' < 'nb'; SELECT 'o' < 'ó'; SELECT 'óa' < 'ob'; SELECT 'ó' < 'ö'; SELECT 'öa' < 'ób'; SELECT 'r' < 'ř'; SELECT 'rb' < 'řa'; SELECT 's' < 'š'; SELECT 'sb' < 'ša'; SELECT 't' < 'ť'; SELECT 'ťa' < 'tb'; SELECT 'u' < 'ú'; SELECT 'úa' < 'ub'; SELECT 'ú' < 'ů'; SELECT 'ůa' < 'úb'; SELECT 'ů' < 'ü'; SELECT 'üa' < 'ůb'; SELECT 'y' < 'ý'; SELECT 'ýa' < 'yb'; SELECT 'z' < 'ž'; SELECT 'zb' < 'ža'; SELECT 'hž' < 'ch'; SELECT 'chž'< 'i'; --error ER_UNKNOWN_COLLATION SELECT 'a' COLLATE utf8_czech_test_bad_w2; --echo # --echo # End of 10.2 tests --echo # --echo # --echo # Start of 10.3 tests --echo # --echo # --echo # MDEV-30556 UPPER() returns an empty string for U+0251 in Unicode-5.2.0+ collations for utf8 --echo # SET NAMES utf8mb4 COLLATE utf8mb4_test_520_nopad_ci; --source include/ctype_casefolding.inc --echo # --echo # End of 10.3 tests --echo # --echo # --echo # MDEV-7947 my_charset_same: strcmp() takes 0.37% in OLTP RO --echo # SHOW COLLATION LIKE 'latin1_test_replace'; --error ER_UNKNOWN_COLLATION SELECT 'foo' = 'foo ' COLLATE latin1_test_replace; --echo # --echo # End of 10.5 tests --echo # --echo # --echo # MDEV-27042 UCA: Resetting contractions to ignorable does not work well --echo # CREATE TABLE t1 ( phone VARCHAR(64) CHARACTER SET utf8 COLLATE utf8_phone_ci ); INSERT INTO t1 VALUES ('123'); INSERT INTO t1 VALUES ('tel.123'); INSERT INTO t1 VALUES ('tél.123'); INSERT INTO t1 VALUES ('tèl.123'); INSERT INTO t1 VALUES ('ťel.123'); INSERT INTO t1 VALUES ('ťèl.123'); INSERT INTO t1 VALUES ('tex.123'); SELECT * FROM t1 WHERE phone='123' ORDER BY BINARY phone; SELECT * FROM t1 WHERE phone<>'123' ORDER BY BINARY phone; SELECT phone, HEX(WEIGHT_STRING(phone)) FROM t1 ORDER BY phone, BINARY phone; DROP TABLE t1; --echo # --echo # End of 10.8 tests --echo # --echo # --echo # MDEV-27009 Add UCA-14.0.0 collations --echo # CREATE TABLE t1 (a VARCHAR(10) CHARACTER SET utf8mb4 COLLATE utf8mb4_uca1400_test01_as_ci, comment VARCHAR(128)); INSERT INTO t1 VALUES ('a', '0061 LATIN SMALL LETTER A'); INSERT INTO t1 VALUES ('c', '0063 LATIN SMALL LETTER C'); INSERT INTO t1 VALUES ('à', '00E0 LATIN SMALL LETTER A WITH GRAVE'); INSERT INTO t1 VALUES ('ć', '0107 LATIN SMALL LETTER C WITH ACUTE'); INSERT INTO t1 VALUES (_ucs2 0x0439, '0439 CYRILLIC SMALL LETTER SHORT I - precomposed'); INSERT INTO t1 VALUES (_ucs2 0x04380306, '0438 0306 CYRILLIC SMALL LETTER SHORT I - contraction'); INSERT INTO t1 VALUES (_ucs2 0x0CCB, '0CCB KANNADA VOWEL SIGN OO - precomposed'); INSERT INTO t1 VALUES (_ucs2 0x0CC60CC20CD5, '0CC6 0CC2 0CD5 KANNADA VOWEL SIGN OO - contraction'); --vertical_results SELECT '---' AS `---`, comment, HEX(a), HEX(WEIGHT_STRING(a)), HEX(WEIGHT_STRING(a LEVEL 1)), HEX(WEIGHT_STRING(a LEVEL 2)), WEIGHT_STRING(a LEVEL 1)=WEIGHT_STRING(a COLLATE utf8mb4_unicode_ci) AS is_400, WEIGHT_STRING(a LEVEL 1)=WEIGHT_STRING(a COLLATE utf8mb4_unicode_520_ci) AS is_520, WEIGHT_STRING(a LEVEL 1)=WEIGHT_STRING(a COLLATE utf8mb4_uca1400_ai_ci) AS is_1400 FROM t1 ORDER BY a, HEX(a); --horizontal_results DROP TABLE t1; --echo # --echo # MDEV-30661 UPPER() returns an empty string for U+0251 in uca1400 collations for utf8 --echo # SET NAMES utf8mb4 COLLATE utf8mb4_uca1400_test01_as_ci; --source include/ctype_casefolding.inc --echo # --echo # MDEV-30577 Case folding for uca1400 collations is not up to date --echo # SET NAMES utf8mb4 COLLATE utf8mb4_uca1400_test01_as_ci; --source include/ctype_casefolding_supplementary.inc --echo # --echo # End of 10.10 tests --echo #