diff options
Diffstat (limited to 'mysql-test/main/ctype_utf32.test')
-rw-r--r-- | mysql-test/main/ctype_utf32.test | 1032 |
1 files changed, 1032 insertions, 0 deletions
diff --git a/mysql-test/main/ctype_utf32.test b/mysql-test/main/ctype_utf32.test new file mode 100644 index 00000000000..6f036898d15 --- /dev/null +++ b/mysql-test/main/ctype_utf32.test @@ -0,0 +1,1032 @@ +-- source include/have_utf32.inc +-- source include/have_utf8mb4.inc + +SET TIME_ZONE = '+03:00'; + +--disable_warnings +DROP TABLE IF EXISTS t1; +--enable_warnings + +--echo # +--echo # Start of 5.5 tests +--echo # + +SET NAMES latin1; +SET character_set_connection=utf32; +select hex('a'), hex('a '); +-- source include/endspace.inc + +# +# Check that incomplete utf32 characters in HEX notation +# are left-padded with zeros +# +select hex(_utf32 0x44); +select hex(_utf32 0x3344); +select hex(_utf32 0x103344); + +select hex(_utf32 X'44'); +select hex(_utf32 X'3344'); +select hex(_utf32 X'103344'); + + +# +# Check that 0x20 is only trimmed when it is +# a part of real SPACE character, not just a part +# of a multibyte sequence. +# Note, CYRILLIC LETTER ER is used as an example, which +# is stored as 0x0420 in UCS2, thus contains 0x20 in the +# low byte. The second character is THREE-PER-M, U+2004, +# which contains 0x20 in the high byte. +# + +CREATE TABLE t1 (word VARCHAR(64), word2 CHAR(64)) CHARACTER SET utf32; +INSERT INTO t1 VALUES (_koi8r 0xF2, _koi8r 0xF2), (X'2004',X'2004'); +SELECT hex(word) FROM t1 ORDER BY word; +SELECT hex(word2) FROM t1 ORDER BY word2; +DELETE FROM t1; + +# +# Check that real spaces are correctly trimmed. +# + +INSERT INTO t1 VALUES + (X'000004200000002000000020',X'000004200000002000000020'), + (X'000020040000002000000020',X'000020040000002000000020'); +SELECT hex(word) FROM t1 ORDER BY word; +SELECT hex(word2) FROM t1 ORDER BY word2; +DROP TABLE t1; + +# +# Check LPAD/RPAD +# +SELECT hex(LPAD(_utf32 X'0420',10,_utf32 X'0421')); +SELECT hex(LPAD(_utf32 X'0420',10,_utf32 X'0000042100000422')); +SELECT hex(LPAD(_utf32 X'0420',10,_utf32 X'000004210000042200000423')); +SELECT hex(LPAD(_utf32 X'000004200000042100000422000004230000042400000425000004260000042700000428000004290000042A0000042B',10,_utf32 X'000004210000042200000423')); + +SELECT hex(RPAD(_utf32 X'0420',10,_utf32 X'0421')); +SELECT hex(RPAD(_utf32 X'0420',10,_utf32 X'0000042100000422')); +SELECT hex(RPAD(_utf32 X'0420',10,_utf32 X'000004210000042200000423')); +SELECT hex(RPAD(_utf32 X'000004200000042100000422000004230000042400000425000004260000042700000428000004290000042A0000042B',10,_utf32 X'000004210000042200000423')); + +CREATE TABLE t1 SELECT +LPAD(_utf32 X'0420',10,_utf32 X'0421') l, +RPAD(_utf32 X'0420',10,_utf32 X'0421') r; +SHOW CREATE TABLE t1; +select hex(l), hex(r) from t1; +DROP TABLE t1; + +create table t1 (f1 char(30)); +insert into t1 values ("103000"), ("22720000"), ("3401200"), ("78000"); +select lpad(f1, 12, "-o-/") from t1; +drop table t1; + +###################################################### +# +# Test of like +# + +SET NAMES latin1; +SET character_set_connection=utf32; +--source include/ctype_like.inc + +SET NAMES utf8; +SET character_set_connection=utf32; +CREATE TABLE t1 (a VARCHAR(10) CHARACTER SET utf32); +INSERT INTO t1 VALUES ('фыва'),('Фыва'),('фЫва'),('фыВа'),('фывА'),('ФЫВА'); +INSERT INTO t1 VALUES ('фывапролдж'),('Фывапролдж'),('фЫвапролдж'),('фыВапролдж'); +INSERT INTO t1 VALUES ('фывАпролдж'),('фываПролдж'),('фывапРолдж'),('фывапрОлдж'); +INSERT INTO t1 VALUES ('фывапроЛдж'),('фывапролДж'),('фывапролдЖ'),('ФЫВАПРОЛДЖ'); +SELECT * FROM t1 WHERE a LIKE '%фЫва%' ORDER BY BINARY a; +SELECT * FROM t1 WHERE a LIKE '%фЫв%' ORDER BY BINARY a; +SELECT * FROM t1 WHERE a LIKE 'фЫва%' ORDER BY BINARY a; +SELECT * FROM t1 WHERE a LIKE 'фЫва%' COLLATE utf32_bin ORDER BY BINARY a; +DROP TABLE t1; + +CREATE TABLE t1 (word varchar(64) NOT NULL, PRIMARY KEY (word)) +ENGINE=MyISAM CHARACTER SET utf32; +INSERT INTO t1 (word) VALUES ("cat"); +SELECT * FROM t1 WHERE word LIKE "c%"; +SELECT * FROM t1 WHERE word LIKE "ca_"; +SELECT * FROM t1 WHERE word LIKE "cat"; +SELECT * FROM t1 WHERE word LIKE _utf32 x'0000006300000025'; # "c%" +SELECT * FROM t1 WHERE word LIKE _utf32 x'00000063000000610000005F'; # "ca_" +DROP TABLE t1; + + +# +# Check that INSERT() works fine. +# This invokes charpos() function. +select insert(_utf32 0x000000610000006200000063,10,2,_utf32 0x000000640000006500000066); +select insert(_utf32 0x000000610000006200000063,1,2,_utf32 0x000000640000006500000066); + +####################################################### + +# +# Bug 1264 +# +# Description: +# +# When using a ucs2 table in MySQL, +# either with ucs2_general_ci or ucs2_bin collation, +# words are returned in an incorrect order when using ORDER BY +# on an _indexed_ CHAR or VARCHAR column. They are sorted with +# the longest word *first* instead of last. I.E. The word "aardvark" +# is in the results before the word "a". +# +# If there is no index for the column, the problem does not occur. +# +# Interestingly, if there is no second column, the words are returned +# in the correct order. +# +# According to EXPLAIN, it looks like when the output includes columns that +# are not part of the index sorted on, it does a filesort, which fails. +# Using a straight index yields correct results. + +SET NAMES latin1; + +# +# Two fields, index +# + +CREATE TABLE t1 ( + word VARCHAR(64), + bar INT(11) default 0, + PRIMARY KEY (word)) + ENGINE=MyISAM + CHARSET utf32 + COLLATE utf32_general_ci ; + +INSERT INTO t1 (word) VALUES ("aar"); +INSERT INTO t1 (word) VALUES ("a"); +INSERT INTO t1 (word) VALUES ("aardvar"); +INSERT INTO t1 (word) VALUES ("aardvark"); +INSERT INTO t1 (word) VALUES ("aardvara"); +INSERT INTO t1 (word) VALUES ("aardvarz"); +EXPLAIN SELECT * FROM t1 ORDER BY word; +SELECT * FROM t1 ORDER BY word; +EXPLAIN SELECT word FROM t1 ORDER BY word; +SELECT word FROM t1 ORDER by word; +DROP TABLE t1; + + +# +# One field, index +# + +CREATE TABLE t1 ( + word VARCHAR(64) , + PRIMARY KEY (word)) + ENGINE=MyISAM + CHARSET utf32 + COLLATE utf32_general_ci; + +INSERT INTO t1 (word) VALUES ("aar"); +INSERT INTO t1 (word) VALUES ("a"); +INSERT INTO t1 (word) VALUES ("aardvar"); +INSERT INTO t1 (word) VALUES ("aardvark"); +INSERT INTO t1 (word) VALUES ("aardvara"); +INSERT INTO t1 (word) VALUES ("aardvarz"); +EXPLAIN SELECT * FROM t1 ORDER BY WORD; +SELECT * FROM t1 ORDER BY word; +DROP TABLE t1; + + +# +# Two fields, no index +# + +CREATE TABLE t1 ( + word TEXT, + bar INT(11) AUTO_INCREMENT, + PRIMARY KEY (bar)) + ENGINE=MyISAM + CHARSET utf32 + COLLATE utf32_general_ci ; +INSERT INTO t1 (word) VALUES ("aar"); +INSERT INTO t1 (word) VALUES ("a" ); +INSERT INTO t1 (word) VALUES ("aardvar"); +INSERT INTO t1 (word) VALUES ("aardvark"); +INSERT INTO t1 (word) VALUES ("aardvara"); +INSERT INTO t1 (word) VALUES ("aardvarz"); +EXPLAIN SELECT * FROM t1 ORDER BY word; +SELECT * FROM t1 ORDER BY word; +EXPLAIN SELECT word FROM t1 ORDER BY word; +SELECT word FROM t1 ORDER BY word; +DROP TABLE t1; + +# +# END OF Bug 1264 test +# +######################################################## + + +# +# Check alignment for from-binary-conversion with CAST and CONVERT +# +SELECT hex(cast(0xAA as char character set utf32)); +SELECT hex(convert(0xAA using utf32)); + +# +# Check alignment for string types +# +CREATE TABLE t1 (a char(10) character set utf32); +INSERT INTO t1 VALUES (0x1),(0x11),(0x111),(0x1111),(0x11111); +SELECT HEX(a) FROM t1; +DROP TABLE t1; + +CREATE TABLE t1 (a varchar(10) character set utf32); +INSERT INTO t1 VALUES (0x1),(0x11),(0x111),(0x1111),(0x11111); +SELECT HEX(a) FROM t1; +DROP TABLE t1; + +CREATE TABLE t1 (a text character set utf32); +INSERT INTO t1 VALUES (0x1),(0x11),(0x111),(0x1111),(0x11111); +SELECT HEX(a) FROM t1; +DROP TABLE t1; + +CREATE TABLE t1 (a mediumtext character set utf32); +INSERT INTO t1 VALUES (0x1),(0x11),(0x111),(0x1111),(0x11111); +SELECT HEX(a) FROM t1; +DROP TABLE t1; + +CREATE TABLE t1 (a longtext character set utf32); +INSERT INTO t1 VALUES (0x1),(0x11),(0x111),(0x1111),(0x11111); +SELECT HEX(a) FROM t1; +DROP TABLE t1; + +## +## Bug #5024 Server crashes with queries on fields +## with certain charset/collation settings +## +# +#create table t1 (s1 char character set `ucs2` collate `ucs2_czech_ci`); +#insert into t1 values ('0'),('1'),('2'),('a'),('b'),('c'); +#select s1 from t1 where s1 > 'a' order by s1; +#drop table t1; + +# +# Bug #5081 : UCS2 fields are filled with '0x2020' +# after extending field length +# +create table t1(a char(1)) default charset utf32; +insert into t1 values ('a'),('b'),('c'); +alter table t1 modify a char(5); +select a, hex(a) from t1; +drop table t1; + +# +# Check prepare statement from an UTF32 string +# +set @ivar= 1234; +set @str1 = 'select ?'; +set @str2 = convert(@str1 using utf32); +prepare stmt1 from @str2; +execute stmt1 using @ivar; + +# +# Check that utf32 works with ENUM and SET type +# +set names utf8; +create table t1 (a enum('x','y','z') character set utf32); +show create table t1; +insert into t1 values ('x'); +insert into t1 values ('y'); +insert into t1 values ('z'); +select a, hex(a) from t1 order by a; +alter table t1 change a a enum('x','y','z','d','e','ä','ö','ü') character set utf32; +show create table t1; +insert into t1 values ('D'); +insert into t1 values ('E '); +insert into t1 values ('ä'); +insert into t1 values ('ö'); +insert into t1 values ('ü'); +select a, hex(a) from t1 order by a; +drop table t1; + +create table t1 (a set ('x','y','z','ä','ö','ü') character set utf32); +show create table t1; +insert into t1 values ('x'); +insert into t1 values ('y'); +insert into t1 values ('z'); +insert into t1 values ('x,y'); +insert into t1 values ('x,y,z,ä,ö,ü'); +select a, hex(a) from t1 order by a; +drop table t1; + +# +# Bug#7302 UCS2 data in ENUM fields get truncated when new column is added +# +create table t1(a enum('a','b','c')) default character set utf32; +insert into t1 values('a'),('b'),('c'); +alter table t1 add b char(1); +show warnings; +select * from t1 order by a; +drop table t1; + +SET NAMES latin1; +SET collation_connection='utf32_general_ci'; +-- source include/ctype_filesort.inc +-- source include/ctype_like_escape.inc +SET NAMES latin1; +SET collation_connection='utf32_bin'; +-- source include/ctype_filesort.inc +-- source include/ctype_filesort2.inc +-- source include/ctype_like_escape.inc + +# +# Bug#10344 Some string functions fail for UCS2 +# +select hex(substr(_utf32 0x000000e4000000e500000068,1)); +select hex(substr(_utf32 0x000000e4000000e500000068,2)); +select hex(substr(_utf32 0x000000e4000000e500000068,3)); +select hex(substr(_utf32 0x000000e4000000e500000068,-1)); +select hex(substr(_utf32 0x000000e4000000e500000068,-2)); +select hex(substr(_utf32 0x000000e4000000e500000068,-3)); + +#SET NAMES latin1; +# +# Bug#8235 +# +# This bug also helped to find another problem that +# INSERT of a UCS2 string containing a negative number +# into a unsigned int column didn't produce warnings. +# This test covers both problems. +# +#SET collation_connection='ucs2_swedish_ci'; +#CREATE TABLE t1 (Field1 int(10) default '0'); +## no warnings, negative numbers are allowed +#INSERT INTO t1 VALUES ('-1'); +#SELECT * FROM t1; +#DROP TABLE t1; +#CREATE TABLE t1 (Field1 int(10) unsigned default '0'); +## this should generate a "Data truncated" warning +#INSERT INTO t1 VALUES ('-1'); +#DROP TABLE t1; +#SET NAMES latin1; + +# +## +## Bug#18691 Converting number to UNICODE string returns invalid result +## +#SELECT CONVERT(103, CHAR(50) UNICODE); +#SELECT CONVERT(103.0, CHAR(50) UNICODE); +#SELECT CONVERT(-103, CHAR(50) UNICODE); +#SELECT CONVERT(-103.0, CHAR(50) UNICODE); + +# +# Bug#9557 MyISAM utf8 table crash +# +CREATE TABLE t1 ( + a varchar(250) NOT NULL default '', + KEY a (a) +) ENGINE=MyISAM DEFAULT CHARSET=utf32 COLLATE utf32_general_ci; +insert into t1 values (0x803d); +insert into t1 values (0x005b); +select hex(a) from t1; +drop table t1; + +## +## Bug #14583 Bug on query using a LIKE on indexed field with ucs2_bin collation +## +#--disable_warnings +#create table t1(f1 varchar(5) CHARACTER SET utf32 COLLATE utf32_bin NOT NULL) engine=InnoDB; +#--enable_warnings +#insert into t1 values('a'); +#create index t1f1 on t1(f1); +#select f1 from t1 where f1 like 'a%'; +#drop table t1; + +# +# Bug#9442 Set parameter make query fail if column character set is UCS2 +# +create table t1 (utext varchar(20) character set utf32); +insert into t1 values ("lily"); +insert into t1 values ("river"); +prepare stmt from 'select utext from t1 where utext like ?'; +set @param1='%%'; +execute stmt using @param1; +execute stmt using @param1; +select utext from t1 where utext like '%%'; +drop table t1; +deallocate prepare stmt; + +# +# Bug#22052 Trailing spaces are not removed from UNICODE fields in an index +# +create table t1 ( + a char(10) character set utf32 not null, + index a (a) +) engine=myisam; +insert into t1 values (repeat(0x0000201f, 10)); +insert into t1 values (repeat(0x00002020, 10)); +insert into t1 values (repeat(0x00002021, 10)); +# make sure "index read" is used +explain select hex(a) from t1 order by a; +select hex(a) from t1 order by a; +alter table t1 drop index a; +select hex(a) from t1 order by a; +drop table t1; + +# +# Bug #20076: server crashes for a query with GROUP BY if MIN/MAX aggregation +# over a 'ucs2' field uses a temporary table +# +#CREATE TABLE t1 (id int, s char(5) CHARACTER SET ucs2 COLLATE ucs2_unicode_ci); +#INSERT INTO t1 VALUES (1, 'ZZZZZ'), (1, 'ZZZ'), (2, 'ZZZ'), (2, 'ZZZZZ'); +#SELECT id, MIN(s) FROM t1 GROUP BY id; +#DROP TABLE t1; + +## +## Bug #20536: md5() with GROUP BY and UCS2 return different results on myisam/innodb +## +# +#--disable_warnings +#drop table if exists bug20536; +#--enable_warnings +# +#set names latin1; +#create table bug20536 (id bigint not null auto_increment primary key, name +#varchar(255) character set ucs2 not null); +#insert into `bug20536` (`id`,`name`) values (1, _latin1 x'7465737431'), (2, "'test\\_2'"); +#select md5(name) from bug20536; +#select sha1(name) from bug20536; +#select make_set(3, name, upper(name)) from bug20536; +#select export_set(5, name, upper(name)) from bug20536; +#select export_set(5, name, upper(name), ",", 5) from bug20536; + +# +# Bug #20108: corrupted default enum value for a ucs2 field +# + +CREATE TABLE t1 ( + status enum('active','passive') character set utf32 collate utf32_general_ci + NOT NULL default 'passive' +); +SHOW CREATE TABLE t1; +ALTER TABLE t1 ADD a int NOT NULL AFTER status; +SHOW CREATE TABLE t1; +DROP TABLE t1; + +#CREATE TABLE t2 ( +# status enum('active','passive') collate ucs2_turkish_ci +# NOT NULL default 'passive' +#); +#SHOW CREATE TABLE t2; +#ALTER TABLE t2 ADD a int NOT NULL AFTER status; +#DROP TABLE t2; + + +## Some broken functions: add these tests just to document current behavior. +# +## PASSWORD and OLD_PASSWORD don't work with UCS2 strings, but to fix it would +## not be backwards compatible in all cases, so it's best to leave it alone +#select password(name) from bug20536; +#select old_password(name) from bug20536; +# +## Disable test case as encrypt relies on 'crypt' function. +## "decrypt" is noramlly tested in func_crypt.test which have a +## "have_crypt.inc" test +#--disable_parsing +## ENCRYPT relies on OS function crypt() which takes a NUL-terminated string; it +## doesn't return good results for strings with embedded 0 bytes. It won't be +## fixed unless we choose to re-implement the crypt() function ourselves to take +## an extra size_t string_length argument. +#select encrypt(name, 'SALT') from bug20536; +#--enable_parsing +# +## QUOTE doesn't work with UCS2 data. It would require a total rewrite +## of Item_func_quote::val_str(), which isn't worthwhile until UCS2 is +## supported fully as a client character set. +#select quote(name) from bug20536; +# +#drop table bug20536; +# +--echo End of 4.1 tests + + +# +# Conversion from an UTF32 string to a decimal column +# +CREATE TABLE t1 (a varchar(64) character set utf32, b decimal(10,3)); +INSERT INTO t1 VALUES ("1.1", 0), ("2.1", 0); +update t1 set b=a; +SELECT *, hex(a) FROM t1; +DROP TABLE t1; + +# +# Bug#9442 Set parameter make query fail if column character set is UCS2 +# +create table t1 (utext varchar(20) character set utf32); +insert into t1 values ("lily"); +insert into t1 values ("river"); +prepare stmt from 'select utext from t1 where utext like ?'; +set @param1='%%'; +execute stmt using @param1; +execute stmt using @param1; +select utext from t1 where utext like '%%'; +drop table t1; +deallocate prepare stmt; + +# +# Bug#22638 SOUNDEX broken for international characters +# +set names latin1; +set character_set_connection=utf32; +select soundex(''),soundex('he'),soundex('hello all folks'),soundex('#3556 in bugdb'); +select hex(soundex('')),hex(soundex('he')),hex(soundex('hello all folks')),hex(soundex('#3556 in bugdb')); +select 'mood' sounds like 'mud'; +# Cyrillic A, BE, VE +select hex(soundex(_utf32 0x000004100000041100000412)); +# Make sure that "U+00BF INVERTED QUESTION MARK" is not considered as letter +select hex(soundex(_utf32 0x000000BF000000C0)); +set names latin1; + +# +# Bug #14290: character_maximum_length for text fields +# +create table t1(a blob, b text charset utf32); +select data_type, character_octet_length, character_maximum_length + from information_schema.columns where table_name='t1'; +drop table t1; + + +set names latin1; +set collation_connection=utf32_general_ci; +# +# Testing cs->coll->instr() +# +select position('bb' in 'abba'); + +# +# Testing cs->coll->hash_sort() +# +create table t1 (a varchar(10) character set utf32) engine=heap; +insert into t1 values ('a'),('A'),('b'),('B'); +select * from t1 where a='a' order by binary a; +select hex(min(binary a)),count(*) from t1 group by a; +drop table t1; + +# +# Testing cs->cset->numchars() +# +select char_length('abcd'), octet_length('abcd'); + +# +# Testing cs->cset->charpos() +# +select left('abcd',2); + +# +# Testing cs->cset->well_formed_length() +# +create table t1 (a varchar(10) character set utf32); +insert into t1 values (_utf32 0x0010FFFF); +--error ER_INVALID_CHARACTER_STRING +insert into t1 values (_utf32 0x00110000); +--error ER_INVALID_CHARACTER_STRING +insert into t1 values (_utf32 0x00110101); +--error ER_INVALID_CHARACTER_STRING +insert into t1 values (_utf32 0x01000101); +--error ER_INVALID_CHARACTER_STRING +insert into t1 values (_utf32 0x11000101); +select hex(a) from t1; +drop table t1; + +# +# Bug#32914 Character sets: illegal characters in utf8 and utf32 columns +# +create table t1 (utf32 varchar(2) character set utf32); +--echo Wrong character with pad +insert ignore into t1 values (0x110000); +--echo Wrong chsaracter without pad +insert ignore into t1 values (0x00110000); +--echo Wrong character with pad followed by another wrong character +insert ignore into t1 values (0x11000000110000); +--echo Good character with pad followed by bad character +insert ignore into t1 values (0x10000000110000); +--echo Good character without pad followed by bad character +insert ignore into t1 values (0x0010000000110000); +--echo Wrong character with the second byte higher than 0x10 +insert ignore into t1 values (0x00800037); +--echo Wrong character with pad with the second byte higher than 0x10 +insert ignore into t1 values (0x00800037); +drop table t1; + +# +# Bug#32394 Character sets: crash if comparison with 0xfffd +# +select _utf32'a' collate utf32_general_ci = 0xfffd; +select hex(concat(_utf32 0x0410 collate utf32_general_ci, 0x61)); +create table t1 (s1 varchar(5) character set utf32); +insert into t1 values (0xfffd); +select case when s1 = 0xfffd then 1 else 0 end from t1; +select hex(s1) from t1 where s1 = 0xfffd; +drop table t1; + +# +# Testing cs->cset->lengthsp() +# +create table t1 (a char(10)) character set utf32; +insert into t1 values ('a '); +select hex(a) from t1; +drop table t1; + +# +# Testing cs->cset->caseup() and cs->cset->casedn() +# +select upper('abcd'), lower('ABCD'); + +# +# TODO: str_to_datetime() is broken and doesn't work with ucs2 and utf32 +# Testing cs->cset->snprintf() +# +#create table t1 (a date); +#insert into t1 values ('2007-09-16'); +#select * from t1; +#drop table t1; + +# +# Testing cs->cset->l10tostr +# !!! Not used in the code + +# +# Testing cs->cset->ll10tostr +# +create table t1 (a varchar(10) character set utf32); +insert into t1 values (123456); +select a, hex(a) from t1; +drop table t1; + +# +# Testing cs->cset->fill +# SOUNDEX fills strings with DIGIT ZERO up to four characters +select hex(soundex('a')); + +# +# Testing cs->cset->strntol +# !!! Not used in the code + +# +# Testing cs->cset->strntoul +# +create table t1 (a enum ('a','b','c')) character set utf32; +insert into t1 values ('1'); +select * from t1; +drop table t1; + +# +# Testing cs->cset->strntoll and cs->cset->strntoull +# +set names latin1; +select hex(conv(convert('123' using utf32), -10, 16)); +select hex(conv(convert('123' using utf32), 10, 16)); + +# +# Testing cs->cset->strntod +# +set names latin1; +set character_set_connection=utf32; +select 1.1 + '1.2'; +select 1.1 + '1.2xxx'; + +# Testing strntoll10_utf32 +# Testing cs->cset->strtoll10 +select left('aaa','1'); +--source include/ctype_strtoll10.inc + +# +# Testing cs->cset->strntoull10rnd +# +create table t1 (a int); +insert into t1 values ('-1234.1e2'); +insert ignore into t1 values ('-1234.1e2xxxx'); +insert into t1 values ('-1234.1e2 '); +select * from t1; +drop table t1; + +# +# Testing cs->cset->scan +# +create table t1 (a int); +insert into t1 values ('1 '); +insert ignore into t1 values ('1 x'); +select * from t1; +drop table t1; + +# +# Testing auto-conversion to TEXT +# +SET STATEMENT sql_mode = 'NO_ENGINE_SUBSTITUTION' FOR +create table t1 (a varchar(17000) character set utf32); +show create table t1; +drop table t1; + +# +# Testing that maximim possible key length is 1332 bytes +# +create table t1 (a varchar(250) character set utf32 primary key); +show create table t1; +drop table t1; +--error ER_TOO_LONG_KEY +create table t1 (a varchar(334) character set utf32 primary key); + +# +# Testing mi_check with long key values +# +create table t1 (a varchar(333) character set utf32, key(a)); +insert into t1 values (repeat('a',333)), (repeat('b',333)); +flush tables; +check table t1; +drop table t1; + +# +# Test how character set works with date/time +# +SET collation_connection=utf32_general_ci; +--source include/ctype_datetime.inc +SET NAMES latin1; + +# +# Test basic regex functionality +# +set collation_connection=utf32_general_ci; +--source include/ctype_regex.inc +set names latin1; + + +# TODO: add tests for all engines + +# +# Bug #36418 Character sets: crash if char(256 using utf32) +# +select hex(char(0x01 using utf32)); +select hex(char(0x0102 using utf32)); +select hex(char(0x010203 using utf32)); +select hex(char(0x01020304 using utf32)); +create table t1 (s1 varchar(1) character set utf32, s2 text character set utf32); +create index i on t1 (s1); +insert into t1 values (char(256 using utf32), char(256 using utf32)); +select hex(s1), hex(s2) from t1; +drop table t1; + + +# +# Bug#33073 Character sets: ordering fails with utf32 +# +SET collation_connection=utf32_general_ci; +CREATE TABLE t1 AS SELECT repeat('a',2) as s1 LIMIT 0; +SHOW CREATE TABLE t1; +INSERT INTO t1 VALUES ('ab'),('AE'),('ab'),('AE'); +SELECT * FROM t1 ORDER BY s1; +SET max_sort_length=4; +SELECT * FROM t1 ORDER BY s1; +DROP TABLE t1; +SET max_sort_length=DEFAULT; +SET NAMES latin1; + +--echo # +--echo # Bug#52520 Difference in tinytext utf column metadata +--echo # +CREATE TABLE t1 ( + s1 TINYTEXT CHARACTER SET utf32, + s2 TEXT CHARACTER SET utf32, + s3 MEDIUMTEXT CHARACTER SET utf32, + s4 LONGTEXT CHARACTER SET utf32 +); +--enable_metadata +SET NAMES utf8mb4, @@character_set_results=NULL; +SELECT *, HEX(s1) FROM t1; +SET NAMES latin1; +SELECT *, HEX(s1) FROM t1; +SET NAMES utf8mb4; +SELECT *, HEX(s1) FROM t1; +--disable_metadata +CREATE TABLE t2 AS SELECT CONCAT(s1) FROM t1; +SHOW CREATE TABLE t2; +DROP TABLE t1, t2; + + +--echo # +--echo # Bug#45263 utf32_general_ci, bad effects around CREATE TABLE AS SELECT +--echo # +SET collation_connection=utf32_general_ci; +CREATE TABLE t1 AS SELECT HEX(0x00) AS my_col; +SELECT * FROM t1; +DROP TABLE t1; + +--echo # +--echo # Bug#32859 Character sets: no warning with non-fitting chariot wheel +--echo # +CREATE TABLE t1 (utf32 CHAR(5) CHARACTER SET utf32, latin1 CHAR(5) CHARACTER SET latin1); +INSERT INTO t1 (utf32) VALUES (0xc581); +UPDATE IGNORE t1 SET latin1 = utf32; +DELETE FROM t1; +INSERT INTO t1 (utf32) VALUES (0x100cc); +UPDATE IGNORE t1 SET latin1 = utf32; +DROP TABLE t1; + +--echo # +--echo # Bug#55912 FORMAT with locale set fails for numbers < 1000 +--echo # +SET collation_connection=utf32_general_ci; +CREATE TABLE t1 AS SELECT format(123,2,'no_NO'); +SHOW CREATE TABLE t1; +SELECT * FROM t1; +DROP TABLE t1; + +--echo # +--echo # Bug#42511 mysqld: ctype-ucs2.c:2044: my_strnncollsp_utf32: Assertion (tlen % 4) == 0' faied +--echo # +CREATE TABLE t1 ( + b char(250) CHARACTER SET utf32, + key (b) +) ENGINE=MYISAM; +INSERT INTO t1 VALUES ('d'),('f'); +SELECT * FROM t1 WHERE b BETWEEN 'a' AND 'z'; +DROP TABLE t1; + +--echo # +--echo # Bug#11753363 (Bug#44793) Character sets: case clause, ucs2 or utf32, failure +--echo # +SELECT CASE _latin1'a' WHEN _utf32'a' THEN 'A' END; +SELECT CASE _utf32'a' WHEN _latin1'a' THEN 'A' END; +CREATE TABLE t1 (s1 CHAR(5) CHARACTER SET utf32); +INSERT INTO t1 VALUES ('a'); +SELECT CASE s1 WHEN 'a' THEN 'b' ELSE 'c' END FROM t1; +DROP TABLE t1; + +--echo # +--echo # Bug#11750518 41090: ORDER BY TRUNCATES GROUP_CONCAT RESULT +--echo # + +SET NAMES utf8, @@character_set_connection=utf32; +SET STATEMENT group_concat_max_len=1024 FOR +SELECT id, CHAR_LENGTH(GROUP_CONCAT(body)) AS l +FROM (SELECT 'a' AS id, REPEAT('foo bar', 100) AS body +UNION ALL +SELECT 'a' AS id, REPEAT('bla bla', 100) AS body) t1 +GROUP BY id +ORDER BY l DESC; + +--echo # +--echo # incorrect charset for val_str_ascii +--echo # + +SELECT '2010-10-10 10:10:10' + INTERVAL GeometryType(GeomFromText('POINT(1 1)')) hour_second; + +--echo # +--echo # MDEV-5745 analyze MySQL fix for bug#12368495 +--echo # +SELECT CHAR_LENGTH(TRIM(LEADING 0x0000000000 FROM _utf32 0x00000061)); +SELECT CHAR_LENGTH(TRIM(LEADING 0x0001 FROM _utf32 0x00000061)); +SELECT CHAR_LENGTH(TRIM(LEADING 0x00 FROM _utf32 0x00000061)); + +SELECT CHAR_LENGTH(TRIM(TRAILING 0x0000000000 FROM _utf32 0x00000061)); +SELECT CHAR_LENGTH(TRIM(TRAILING 0x0001 FROM _utf32 0x00000061)); +SELECT CHAR_LENGTH(TRIM(TRAILING 0x61 FROM _utf32 0x00000061)); + +SELECT CHAR_LENGTH(TRIM(BOTH 0x0000000000 FROM _utf32 0x00000061)); +SELECT CHAR_LENGTH(TRIM(BOTH 0x0001 FROM _utf32 0x00000061)); +SELECT CHAR_LENGTH(TRIM(BOTH 0x61 FROM _utf32 0x00000061)); +SELECT CHAR_LENGTH(TRIM(BOTH 0x00 FROM _utf32 0x00000061)); + +# +# potential signedness issue +# +select hex(lower(cast(0xffff0000 as char character set utf32))) as c; + +--echo # +--echo # MDEV-11685: sql_mode can't be set with non-ascii connection charset +--echo # +SET character_set_connection=utf32; +SET sql_mode='NO_ENGINE_SUBSTITUTION'; +SELECT @@sql_mode; +SET sql_mode=DEFAULT; +SET NAMES utf8; + +--echo # +--echo # MDEV-13530 VARBINARY doesn't convert to to BLOB for sizes 65533, 65534 and 65535 +--echo # + +set sql_mode=''; +CREATE TABLE t1 (c1 VARCHAR(16383) CHARACTER SET utf32); +DESCRIBE t1; +DROP TABLE t1; + +CREATE TABLE t1 (c1 VARCHAR(16384) CHARACTER SET utf32); +DESCRIBE t1; +DROP TABLE t1; +set sql_mode=default; + + +--echo # +--echo # End of 5.5 tests +--echo # + + +--echo # +--echo # Start of 5.6 tests +--echo # + +--echo # +--echo # WL#3664 WEIGHT_STRING +--echo # + +set collation_connection=utf32_general_ci; +--source include/weight_string.inc +--source include/weight_string_euro.inc +select hex(weight_string(_utf32 0x10000)); +select hex(weight_string(_utf32 0x10001)); +--source include/weight_string_l1.inc + +set collation_connection=utf32_bin; +--source include/weight_string.inc +--source include/weight_string_euro.inc +--source include/weight_string_l1.inc + +--echo # +--echo # End of 5.6 tests +--echo # + + +--echo # +--echo # Start of 10.0 tests +--echo # + +--echo # +--echo # MDEV-6661 PI() does not work well in UCS2/UTF16/UTF32 context +--echo # +SELECT CONCAT(CONVERT('pi=' USING utf32),PI()) AS PI; + +--echo # +--echo # MDEV-6666 Malformed result for CONCAT(utf8_column, binary_string) +--echo # + +SET NAMES utf8mb4; +CREATE TABLE t1 (a VARCHAR(10) CHARACTER SET utf32); +INSERT INTO t1 VALUES ('a'); +--error ER_INVALID_CHARACTER_STRING +SELECT CONCAT(a,0x20FFFF) FROM t1; +SELECT CONCAT(a,0x010000) FROM t1; +SELECT CONCAT(a,0x00FF) FROM t1; +DROP TABLE t1; +--error ER_INVALID_CHARACTER_STRING +SELECT CONCAT(_utf32'a' COLLATE utf32_unicode_ci, _binary 0x20FFFF); +PREPARE stmt FROM "SELECT CONCAT(_utf32'a' COLLATE utf32_unicode_ci, ?)"; +SET @arg00=_binary 0x20FFFF; +--error ER_INVALID_CHARACTER_STRING +EXECUTE stmt USING @arg00; +SET @arg00=_binary 0x010000; +EXECUTE stmt USING @arg00; +SET @arg00=_binary 0x00FF; +EXECUTE stmt USING @arg00; +DEALLOCATE PREPARE stmt; + +--echo # +--echo # End of 10.0 tests +--echo # + +--echo # +--echo # Start of 10.1 tests +--echo # + +--echo # +--echo # MDEV-8419 utf32: compare broken bytes as "greater than any non-broken character" +--echo # +# Make sure that all non-BMP characters are compared as equal +CREATE TABLE t1 (a VARCHAR(10) CHARACTER SET utf32, KEY(a)); +INSERT INTO t1 VALUES (0x10000),(0x10001),(0x10002); +SELECT COUNT(DISTINCT a) FROM t1; +DROP TABLE t1; +SELECT _utf32 0x10001=_utf32 0x10002; + +--echo # +--echo # MDEV-9178 Wrong result for CAST(CONVERT('1IJ3' USING ucs2) AS SIGNED) +--echo # +SET NAMES utf8; +SELECT CAST(CONVERT('1IJ3' USING utf32) AS SIGNED); + +--echo # +--echo # End of 10.1 tests +--echo # + +--echo # +--echo # Start of 10.2 tests +--echo # + +--echo # +--echo # MDEV-9711 NO PAD Collatons +--echo # +SET character_set_connection=utf32; +let $coll='utf32_general_nopad_ci'; +let $coll_pad='utf32_general_ci'; +--source include/ctype_pad_all_engines.inc + +let $coll='utf32_nopad_bin'; +let $coll_pad='utf32_bin'; +--source include/ctype_pad_all_engines.inc + +--echo # +--echo # End of 10.2 tests +--echo # |