# # Tests with the utf8 character set # --disable_warnings drop table if exists t1,t2; --enable_warnings set names utf8; select left(_utf8 0xD0B0D0B1D0B2,1); select right(_utf8 0xD0B0D0B2D0B2,1); select locate('he','hello'); select locate('he','hello',2); select locate('lo','hello',2); select locate('HE','hello'); select locate('HE','hello',2); select locate('LO','hello',2); select locate('HE','hello' collate utf8_bin); select locate('HE','hello' collate utf8_bin,2); select locate('LO','hello' collate utf8_bin,2); select locate(_utf8 0xD0B1, _utf8 0xD0B0D0B1D0B2); select locate(_utf8 0xD091, _utf8 0xD0B0D0B1D0B2); select locate(_utf8 0xD0B1, _utf8 0xD0B0D091D0B2); select locate(_utf8 0xD091, _utf8 0xD0B0D0B1D0B2 collate utf8_bin); select locate(_utf8 0xD0B1, _utf8 0xD0B0D091D0B2 collate utf8_bin); select length(_utf8 0xD0B1), bit_length(_utf8 0xD0B1), char_length(_utf8 0xD0B1); select 'a' like 'a'; select 'A' like 'a'; select 'A' like 'a' collate utf8_bin; select _utf8 0xD0B0D0B1D0B2 like concat(_utf8'%',_utf8 0xD0B1,_utf8 '%'); # Bug #6040: can't retrieve records with umlaut # characters in case insensitive manner. # Case insensitive search LIKE comparison # was broken for multibyte characters: select convert(_latin1'Gnter Andr' using utf8) like CONVERT(_latin1'GNTER%' USING utf8); select CONVERT(_koi8r'' USING utf8) LIKE CONVERT(_koi8r'' USING utf8); select CONVERT(_koi8r'' USING utf8) LIKE CONVERT(_koi8r'' USING utf8); # # Check the following: # "a" == "a " # "a\0" < "a" # "a\0" < "a " SELECT 'a' = 'a '; SELECT 'a\0' < 'a'; SELECT 'a\0' < 'a '; SELECT 'a\t' < 'a'; SELECT 'a\t' < 'a '; # # The same for binary collation # SELECT 'a' = 'a ' collate utf8_bin; SELECT 'a\0' < 'a' collate utf8_bin; SELECT 'a\0' < 'a ' collate utf8_bin; SELECT 'a\t' < 'a' collate utf8_bin; SELECT 'a\t' < 'a ' collate utf8_bin; CREATE TABLE t1 (a char(10) character set utf8 not null); INSERT INTO t1 VALUES ('a'),('a\0'),('a\t'),('a '); SELECT hex(a),STRCMP(a,'a'), STRCMP(a,'a ') FROM t1; DROP TABLE t1; # # Fix this, it should return 1: # #select _utf8 0xD0B0D0B1D0B2 like concat(_utf8'%',_utf8 0xD091,_utf8 '%'); # # # Bug 2367: INSERT() behaviour is different for different charsets. # select insert('txs',2,1,'hi'),insert('is ',4,0,'a'),insert('txxxxt',2,4,'es'); select insert("aa",100,1,"b"),insert("aa",1,3,"b"); # # LELF() didn't work well with utf8 in some cases too. # select char_length(left(@a:='тест',5)), length(@a), @a; # # CREATE ... SELECT # create table t1 select date_format("2004-01-19 10:10:10", "%Y-%m-%d"); show create table t1; select * from t1; drop table t1; # # Bug #2366 Wrong utf8 behaviour when data is truncated # set names koi8r; create table t1 (s1 char(1) character set utf8); insert into t1 values (_koi8r''); select s1,hex(s1),char_length(s1),octet_length(s1) from t1; drop table t1; create table t1 (s1 tinytext character set utf8); insert into t1 select repeat('a',300); insert into t1 select repeat('',300); insert into t1 select repeat('a',300); insert into t1 select repeat('a',300); insert into t1 select repeat('',300); select hex(s1) from t1; select length(s1),char_length(s1) from t1; drop table t1; create table t1 (s1 text character set utf8); insert into t1 select repeat('a',66000); insert into t1 select repeat('',66000); insert into t1 select repeat('a',66000); insert into t1 select repeat('a',66000); insert into t1 select repeat('',66000); select length(s1),char_length(s1) from t1; drop table t1; # # Bug #2368 Multibyte charsets do not check that incoming data is well-formed # create table t1 (s1 char(10) character set utf8); insert into t1 values (0x41FF); select hex(s1) from t1; drop table t1; create table t1 (s1 varchar(10) character set utf8); insert into t1 values (0x41FF); select hex(s1) from t1; drop table t1; create table t1 (s1 text character set utf8); insert into t1 values (0x41FF); select hex(s1) from t1; drop table t1; # # Bug 2699 # UTF8 breaks primary keys for cols > 333 characters # --error 1071 create table t1 (a text character set utf8, primary key(a(360))); # # Bug 2959 # UTF8 charset breaks joins with mixed column/string constant # CREATE TABLE t1 ( a varchar(10) ) CHARACTER SET utf8; INSERT INTO t1 VALUES ( 'test' ); SELECT a.a, b.a FROM t1 a, t1 b WHERE a.a = b.a; SELECT a.a, b.a FROM t1 a, t1 b WHERE a.a = 'test' and b.a = 'test'; SELECT a.a, b.a FROM t1 a, t1 b WHERE a.a = b.a and a.a = 'test'; DROP TABLE t1; create table t1 (a char(255) character set utf8); insert into t1 values('b'),('b'); select * from t1 where a = 'b'; select * from t1 where a = 'b' and a = 'b'; select * from t1 where a = 'b' and a != 'b'; drop table t1; # # Bug #3928 regexp [[:>:]] and UTF-8 # set names utf8; # This should return TRUE select 'вася' rlike '[[:<:]]вася[[:>:]]'; select 'вася ' rlike '[[:<:]]вася[[:>:]]'; select ' вася' rlike '[[:<:]]вася[[:>:]]'; select ' вася ' rlike '[[:<:]]вася[[:>:]]'; # This should return FALSE select 'васяz' rlike '[[:<:]]вася[[:>:]]'; select 'zвася' rlike '[[:<:]]вася[[:>:]]'; select 'zвасяz' rlike '[[:<:]]вася[[:>:]]'; # # Bug #4555 # ALTER TABLE crashes mysqld with enum column collated utf8_unicode_ci # CREATE TABLE t1 (a enum ('Y', 'N') DEFAULT 'N' COLLATE utf8_unicode_ci); ALTER TABLE t1 ADD COLUMN b CHAR(20); DROP TABLE t1; # Customer Support Center issue # 3299 # ENUM and SET multibyte fields computed their length wronly # when converted into a char field set names utf8; create table t1 (a enum('aaaa','проба') character set utf8); show create table t1; insert into t1 values ('проба'); select * from t1; create table t2 select ifnull(a,a) from t1; show create table t2; select * from t2; drop table t1; drop table t2; # # Bug 4521: unique key prefix interacts poorly with utf8 # MYISAM: keys with prefix compression, case insensitive collation. # create table t1 (c varchar(30) character set utf8, unique(c(10))); insert into t1 values ('1'),('2'),('3'),('x'),('y'),('z'); insert into t1 values ('aaaaaaaaaa'); --error 1062 insert into t1 values ('aaaaaaaaaaa'); --error 1062 insert into t1 values ('aaaaaaaaaaaa'); insert into t1 values (repeat('b',20)); select c c1 from t1 where c='1'; select c c2 from t1 where c='2'; select c c3 from t1 where c='3'; select c cx from t1 where c='x'; select c cy from t1 where c='y'; select c cz from t1 where c='z'; select c ca10 from t1 where c='aaaaaaaaaa'; select c cb20 from t1 where c=repeat('b',20); drop table t1; # # Bug 4521: unique key prefix interacts poorly with utf8 # InnoDB: keys with prefix compression, case insensitive collation. # --disable_warnings create table t1 (c varchar(30) character set utf8, unique(c(10))) engine=innodb; --enable_warnings insert into t1 values ('1'),('2'),('3'),('x'),('y'),('z'); insert into t1 values ('aaaaaaaaaa'); --error 1062 insert into t1 values ('aaaaaaaaaaa'); --error 1062 insert into t1 values ('aaaaaaaaaaaa'); insert into t1 values (repeat('b',20)); select c c1 from t1 where c='1'; select c c2 from t1 where c='2'; select c c3 from t1 where c='3'; select c cx from t1 where c='x'; select c cy from t1 where c='y'; select c cz from t1 where c='z'; select c ca10 from t1 where c='aaaaaaaaaa'; select c cb20 from t1 where c=repeat('b',20); drop table t1; # # Bug 4521: unique key prefix interacts poorly with utf8 # MYISAM: fixed length keys, case insensitive collation # create table t1 (c char(3) character set utf8, unique (c(2))); insert into t1 values ('1'),('2'),('3'),('4'),('x'),('y'),('z'); insert into t1 values ('a'); insert into t1 values ('aa'); --error 1062 insert into t1 values ('aaa'); insert into t1 values ('b'); insert into t1 values ('bb'); --error 1062 insert into t1 values ('bbb'); insert into t1 values ('а'); insert into t1 values ('аа'); --error 1062 insert into t1 values ('ааа'); insert into t1 values ('б'); insert into t1 values ('бб'); --error 1062 insert into t1 values ('ббб'); insert into t1 values ('ꪪ'); insert into t1 values ('ꪪꪪ'); --error 1062 insert into t1 values ('ꪪꪪꪪ'); drop table t1; # # Bug 4521: unique key prefix interacts poorly with utf8 # InnoDB: fixed length keys, case insensitive collation # --disable_warnings create table t1 (c char(3) character set utf8, unique (c(2))) engine=innodb; --enable_warnings insert into t1 values ('1'),('2'),('3'),('4'),('x'),('y'),('z'); insert into t1 values ('a'); insert into t1 values ('aa'); --error 1062 insert into t1 values ('aaa'); insert into t1 values ('b'); insert into t1 values ('bb'); --error 1062 insert into t1 values ('bbb'); insert into t1 values ('а'); insert into t1 values ('аа'); --error 1062 insert into t1 values ('ааа'); insert into t1 values ('б'); insert into t1 values ('бб'); --error 1062 insert into t1 values ('ббб'); insert into t1 values ('ꪪ'); insert into t1 values ('ꪪꪪ'); --error 1062 insert into t1 values ('ꪪꪪꪪ'); drop table t1; # # Bug 4531: unique key prefix interacts poorly with utf8 # Check HEAP+HASH, case insensitive collation # create table t1 ( c char(10) character set utf8, unique key a using hash (c(1)) ) engine=heap; show create table t1; insert into t1 values ('a'),('b'),('c'),('d'),('e'),('f'); --error 1062 insert into t1 values ('aa'); --error 1062 insert into t1 values ('aaa'); insert into t1 values ('б'); --error 1062 insert into t1 values ('бб'); --error 1062 insert into t1 values ('ббб'); select c as c_all from t1 order by c; select c as c_a from t1 where c='a'; select c as c_a from t1 where c='б'; drop table t1; # # Bug 4531: unique key prefix interacts poorly with utf8 # Check HEAP+BTREE, case insensitive collation # create table t1 ( c char(10) character set utf8, unique key a using btree (c(1)) ) engine=heap; show create table t1; insert into t1 values ('a'),('b'),('c'),('d'),('e'),('f'); --error 1062 insert into t1 values ('aa'); --error 1062 insert into t1 values ('aaa'); insert into t1 values ('б'); --error 1062 insert into t1 values ('бб'); --error 1062 insert into t1 values ('ббб'); select c as c_all from t1 order by c; select c as c_a from t1 where c='a'; select c as c_a from t1 where c='б'; drop table t1; # # Bug 4531: unique key prefix interacts poorly with utf8 # Check BDB, case insensitive collation # --disable_warnings create table t1 ( c char(10) character set utf8, unique key a (c(1)) ) engine=bdb; --enable_warnings insert into t1 values ('a'),('b'),('c'),('d'),('e'),('f'); --error 1062 insert into t1 values ('aa'); --error 1062 insert into t1 values ('aaa'); insert into t1 values ('б'); --error 1062 insert into t1 values ('бб'); --error 1062 insert into t1 values ('ббб'); select c as c_all from t1 order by c; select c as c_a from t1 where c='a'; select c as c_a from t1 where c='б'; drop table t1; # # Bug 4521: unique key prefix interacts poorly with utf8 # MYISAM: keys with prefix compression, binary collation. # create table t1 (c varchar(30) character set utf8 collate utf8_bin, unique(c(10))); insert into t1 values ('1'),('2'),('3'),('x'),('y'),('z'); insert into t1 values ('aaaaaaaaaa'); --error 1062 insert into t1 values ('aaaaaaaaaaa'); --error 1062 insert into t1 values ('aaaaaaaaaaaa'); insert into t1 values (repeat('b',20)); select c c1 from t1 where c='1'; select c c2 from t1 where c='2'; select c c3 from t1 where c='3'; select c cx from t1 where c='x'; select c cy from t1 where c='y'; select c cz from t1 where c='z'; select c ca10 from t1 where c='aaaaaaaaaa'; select c cb20 from t1 where c=repeat('b',20); drop table t1; # # Bug 4521: unique key prefix interacts poorly with utf8 # MYISAM: fixed length keys, binary collation # create table t1 (c char(3) character set utf8 collate utf8_bin, unique (c(2))); insert into t1 values ('1'),('2'),('3'),('4'),('x'),('y'),('z'); insert into t1 values ('a'); insert into t1 values ('aa'); --error 1062 insert into t1 values ('aaa'); insert into t1 values ('b'); insert into t1 values ('bb'); --error 1062 insert into t1 values ('bbb'); insert into t1 values ('а'); insert into t1 values ('аа'); --error 1062 insert into t1 values ('ааа'); insert into t1 values ('б'); insert into t1 values ('бб'); --error 1062 insert into t1 values ('ббб'); insert into t1 values ('ꪪ'); insert into t1 values ('ꪪꪪ'); --error 1062 insert into t1 values ('ꪪꪪꪪ'); drop table t1; # # Bug 4531: unique key prefix interacts poorly with utf8 # Check HEAP+HASH, binary collation # create table t1 ( c char(10) character set utf8 collate utf8_bin, unique key a using hash (c(1)) ) engine=heap; show create table t1; insert into t1 values ('a'),('b'),('c'),('d'),('e'),('f'); --error 1062 insert into t1 values ('aa'); --error 1062 insert into t1 values ('aaa'); insert into t1 values ('б'); --error 1062 insert into t1 values ('бб'); --error 1062 insert into t1 values ('ббб'); select c as c_all from t1 order by c; select c as c_a from t1 where c='a'; select c as c_a from t1 where c='б'; drop table t1; # # Bug 4531: unique key prefix interacts poorly with utf8 # Check HEAP+BTREE, binary collation # create table t1 ( c char(10) character set utf8 collate utf8_bin, unique key a using btree (c(1)) ) engine=heap; show create table t1; insert into t1 values ('a'),('b'),('c'),('d'),('e'),('f'); --error 1062 insert into t1 values ('aa'); --error 1062 insert into t1 values ('aaa'); insert into t1 values ('б'); --error 1062 insert into t1 values ('бб'); --error 1062 insert into t1 values ('ббб'); select c as c_all from t1 order by c; select c as c_a from t1 where c='a'; select c as c_a from t1 where c='б'; drop table t1; # # Bug 4531: unique key prefix interacts poorly with utf8 # Check BDB, binary collation # --disable_warnings create table t1 ( c char(10) character set utf8 collate utf8_bin, unique key a (c(1)) ) engine=bdb; --enable_warnings insert into t1 values ('a'),('b'),('c'),('d'),('e'),('f'); --error 1062 insert into t1 values ('aa'); --error 1062 insert into t1 values ('aaa'); insert into t1 values ('б'); --error 1062 insert into t1 values ('бб'); --error 1062 insert into t1 values ('ббб'); select c as c_all from t1 order by c; select c as c_a from t1 where c='a'; select c as c_a from t1 where c='б'; drop table t1; # Bug#4594: column index make = failed for gbk, but like works # Check MYISAM # create table t1 ( str varchar(255) character set utf8 not null, key str (str(2)) ) engine=myisam; INSERT INTO t1 VALUES ('str'); INSERT INTO t1 VALUES ('str2'); select * from t1 where str='str'; drop table t1; # Bug#4594: column index make = failed for gbk, but like works # Check InnoDB # --disable_warnings create table t1 ( str varchar(255) character set utf8 not null, key str (str(2)) ) engine=innodb; --enable_warnings INSERT INTO t1 VALUES ('str'); INSERT INTO t1 VALUES ('str2'); select * from t1 where str='str'; drop table t1; # the same for HEAP+BTREE # create table t1 ( str varchar(255) character set utf8 not null, key str using btree (str(2)) ) engine=heap; INSERT INTO t1 VALUES ('str'); INSERT INTO t1 VALUES ('str2'); select * from t1 where str='str'; drop table t1; # the same for HEAP+HASH # create table t1 ( str varchar(255) character set utf8 not null, key str using hash (str(2)) ) engine=heap; INSERT INTO t1 VALUES ('str'); INSERT INTO t1 VALUES ('str2'); select * from t1 where str='str'; drop table t1; # the same for BDB # --disable_warnings create table t1 ( str varchar(255) character set utf8 not null, key str (str(2)) ) engine=bdb; --enable_warnings INSERT INTO t1 VALUES ('str'); INSERT INTO t1 VALUES ('str2'); select * from t1 where str='str'; drop table t1; # # Bug #5397: Crash with varchar binary and LIKE # CREATE TABLE t1 (a varchar(32) BINARY) CHARACTER SET utf8; INSERT INTO t1 VALUES ('test'); SELECT a FROM t1 WHERE a LIKE '%te'; DROP TABLE t1; # # Bug #5723: length() returns varying results # --disable_warnings SET NAMES utf8; --disable_warnings CREATE TABLE t1 ( subject varchar(255) character set utf8 collate utf8_unicode_ci, p varchar(15) character set utf8 ) ENGINE=InnoDB DEFAULT CHARSET=latin1; --enable_warnings INSERT INTO t1 VALUES ('谷川俊二と申しますが、インターネット予約の会員登録をしましたところ、メールアドレスを間違えてしまい会員IDが受け取ることが出来ませんでした。間違えアドレスはtani-shun@n.vodafone.ne.jpを書き込みました。どうすればよいですか? その他、住所等は間違えありません。連絡ください。よろしくお願いします。m(__)m','040312-000057'); INSERT INTO t1 VALUES ('aaa','bbb'); SELECT length(subject) FROM t1; SELECT length(subject) FROM t1 ORDER BY 1; DROP TABLE t1; # # Bug #5832 SELECT doesn't return records in some cases # CREATE TABLE t1 ( id int unsigned NOT NULL auto_increment, list_id smallint unsigned NOT NULL, term TEXT NOT NULL, PRIMARY KEY(id), INDEX(list_id, term(4)) ) ENGINE=MYISAM CHARSET=utf8; INSERT INTO t1 SET list_id = 1, term = "letterc"; INSERT INTO t1 SET list_id = 1, term = "letterb"; INSERT INTO t1 SET list_id = 1, term = "lettera"; INSERT INTO t1 SET list_id = 1, term = "letterd"; SELECT id FROM t1 WHERE (list_id = 1) AND (term = "letterc"); SELECT id FROM t1 WHERE (list_id = 1) AND (term = "letterb"); SELECT id FROM t1 WHERE (list_id = 1) AND (term = "lettera"); SELECT id FROM t1 WHERE (list_id = 1) AND (term = "letterd"); DROP TABLE t1; # # Bug #6043 erratic searching for diacriticals in indexed MyISAM UTF-8 table # SET NAMES latin1; CREATE TABLE t1 ( id int unsigned NOT NULL auto_increment, list_id smallint unsigned NOT NULL, term text NOT NULL, PRIMARY KEY(id), INDEX(list_id, term(19)) ) ENGINE=MyISAM CHARSET=utf8; INSERT INTO t1 set list_id = 1, term = "testtest"; INSERT INTO t1 set list_id = 1, term = "testetest"; INSERT INTO t1 set list_id = 1, term = "testtest"; SELECT id, term FROM t1 where (list_id = 1) AND (term = "testtest"); SELECT id, term FROM t1 where (list_id = 1) AND (term = "testetest"); SELECT id, term FROM t1 where (list_id = 1) AND (term = "testtest"); DROP TABLE t1; # # Bug #6019 SELECT tries to use too short prefix index on utf8 data # set names utf8; --disable_warnings create table t1 ( a int primary key, b varchar(6), index b3(b(3)) ) engine=innodb character set=utf8; --enable_warnings insert into t1 values(1,'foo'),(2,'foobar'); select * from t1 where b like 'foob%'; --disable_warnings alter table t1 engine=bdb; --enable_warnings select * from t1 where b like 'foob%'; drop table t1; # # Test for calculate_interval_lengths() function # create table t1 ( a enum('петя','вася','анюта') character set utf8 not null default 'анюта', b set('петя','вася','анюта') character set utf8 not null default 'анюта' ); create table t2 select concat(a,_utf8'') as a, concat(b,_utf8'')as b from t1; show create table t2; drop table t2; drop table t1; # # Bug #6787 LIKE not working properly with _ and utf8 data # select 'c' like '\_' as want0; # # Bug #7730 Server crash using soundex on an utf8 table # create table t1 (id integer, a varchar(100) character set utf8 collate utf8_unicode_ci); insert into t1 values (1, 'Test'); select * from t1 where soundex(a) = soundex('Test'); select * from t1 where soundex(a) = soundex('TEST'); select * from t1 where soundex(a) = soundex('test'); drop table t1; SET collation_connection='utf8_general_ci'; -- source include/ctype_filesort.inc SET collation_connection='utf8_bin'; -- source include/ctype_filesort.inc # # Bug #7874 CONCAT() gives wrong results mixing # latin1 field and utf8 string literals # CREATE TABLE t1 ( user varchar(255) NOT NULL default '' ) ENGINE=MyISAM DEFAULT CHARSET=latin1; INSERT INTO t1 VALUES ('one'),('two'); SELECT CHARSET('a'); SELECT user, CONCAT('<', user, '>') AS c FROM t1; DROP TABLE t1; # # Bug#8785 # the same problem with the above, but with nested CONCATs # create table t1 (f1 varchar(1) not null) default charset utf8; insert into t1 values (''), (''); select concat(concat(_latin1'->',f1),_latin1'<-') from t1; drop table t1; # # Bug#8385: utf8_general_ci treats Cyrillic letters I and SHORT I as the same # select convert(_koi8r'' using utf8) < convert(_koi8r'' using utf8);