1 files changed, 1032 insertions, 0 deletions
diff --git a/mysql-test/main/ctype_utf32.test b/mysql-test/main/ctype_utf32.test
new file mode 100644
index 00000000000..6f036898d15
--- /dev/null
+++ b/mysql-test/main/ctype_utf32.test
@@ -0,0 +1,1032 @@
+-- source include/have_utf32.inc
+-- source include/have_utf8mb4.inc
+
+SET TIME_ZONE = '+03:00';
+
+--disable_warnings
+DROP TABLE IF EXISTS t1;
+--enable_warnings
+
+--echo #
+--echo # Start of 5.5 tests
+--echo #
+
+SET NAMES latin1;
+SET character_set_connection=utf32;
+select hex('a'), hex('a ');
+-- source include/endspace.inc
+
+#
+# Check that incomplete utf32 characters in HEX notation
+# are left-padded with zeros
+#
+select hex(_utf32 0x44);
+select hex(_utf32 0x3344);
+select hex(_utf32 0x103344);
+
+select hex(_utf32 X'44');
+select hex(_utf32 X'3344');
+select hex(_utf32 X'103344');
+
+
+#
+# Check that 0x20 is only trimmed when it is 
+# a part of real SPACE character, not just a part
+# of a multibyte sequence.
+# Note, CYRILLIC LETTER ER is used as an example, which
+# is stored as 0x0420 in UCS2, thus contains 0x20 in the
+# low byte. The second character is THREE-PER-M, U+2004,
+# which contains 0x20 in the high byte.
+#
+
+CREATE TABLE t1 (word VARCHAR(64), word2 CHAR(64)) CHARACTER SET utf32;
+INSERT INTO t1 VALUES (_koi8r 0xF2, _koi8r 0xF2), (X'2004',X'2004');
+SELECT hex(word) FROM t1 ORDER BY word;
+SELECT hex(word2) FROM t1 ORDER BY word2;
+DELETE FROM t1;
+
+#
+# Check that real spaces are correctly trimmed.
+#
+
+INSERT INTO t1 VALUES
+  (X'000004200000002000000020',X'000004200000002000000020'),
+  (X'000020040000002000000020',X'000020040000002000000020');
+SELECT hex(word) FROM t1 ORDER BY word;
+SELECT hex(word2) FROM t1 ORDER BY word2;
+DROP TABLE t1;
+
+#
+# Check LPAD/RPAD
+#
+SELECT hex(LPAD(_utf32 X'0420',10,_utf32 X'0421'));
+SELECT hex(LPAD(_utf32 X'0420',10,_utf32 X'0000042100000422'));
+SELECT hex(LPAD(_utf32 X'0420',10,_utf32 X'000004210000042200000423'));
+SELECT hex(LPAD(_utf32 X'000004200000042100000422000004230000042400000425000004260000042700000428000004290000042A0000042B',10,_utf32 X'000004210000042200000423'));
+
+SELECT hex(RPAD(_utf32 X'0420',10,_utf32 X'0421'));
+SELECT hex(RPAD(_utf32 X'0420',10,_utf32 X'0000042100000422'));
+SELECT hex(RPAD(_utf32 X'0420',10,_utf32 X'000004210000042200000423'));
+SELECT hex(RPAD(_utf32 X'000004200000042100000422000004230000042400000425000004260000042700000428000004290000042A0000042B',10,_utf32 X'000004210000042200000423'));
+
+CREATE TABLE t1 SELECT 
+LPAD(_utf32 X'0420',10,_utf32 X'0421') l,
+RPAD(_utf32 X'0420',10,_utf32 X'0421') r;
+SHOW CREATE TABLE t1;
+select hex(l), hex(r) from t1;
+DROP TABLE t1;
+
+create table t1 (f1 char(30));
+insert into t1 values ("103000"), ("22720000"), ("3401200"), ("78000");
+select lpad(f1, 12, "-o-/") from t1;
+drop table t1;
+
+######################################################
+#
+# Test of like
+#
+
+SET NAMES latin1;
+SET character_set_connection=utf32;
+--source include/ctype_like.inc
+
+SET NAMES utf8;
+SET character_set_connection=utf32;
+CREATE TABLE t1 (a VARCHAR(10) CHARACTER SET utf32);
+INSERT INTO t1 VALUES ('фыва'),('Фыва'),('фЫва'),('фыВа'),('фывА'),('ФЫВА');
+INSERT INTO t1 VALUES ('фывапролдж'),('Фывапролдж'),('фЫвапролдж'),('фыВапролдж');
+INSERT INTO t1 VALUES ('фывАпролдж'),('фываПролдж'),('фывапРолдж'),('фывапрОлдж');
+INSERT INTO t1 VALUES ('фывапроЛдж'),('фывапролДж'),('фывапролдЖ'),('ФЫВАПРОЛДЖ');
+SELECT * FROM t1 WHERE a LIKE '%фЫва%' ORDER BY BINARY a;
+SELECT * FROM t1 WHERE a LIKE '%фЫв%' ORDER BY BINARY a;
+SELECT * FROM t1 WHERE a LIKE 'фЫва%' ORDER BY BINARY a;
+SELECT * FROM t1 WHERE a LIKE 'фЫва%' COLLATE utf32_bin ORDER BY BINARY a;
+DROP TABLE t1;
+
+CREATE TABLE t1 (word varchar(64) NOT NULL, PRIMARY KEY (word))
+ENGINE=MyISAM CHARACTER SET utf32;
+INSERT INTO t1 (word) VALUES ("cat");
+SELECT * FROM t1 WHERE word LIKE "c%";
+SELECT * FROM t1 WHERE word LIKE "ca_";
+SELECT * FROM t1 WHERE word LIKE "cat";
+SELECT * FROM t1 WHERE word LIKE _utf32 x'0000006300000025'; # "c%"
+SELECT * FROM t1 WHERE word LIKE _utf32 x'00000063000000610000005F'; # "ca_"
+DROP TABLE t1;
+
+
+#
+# Check that INSERT() works fine. 
+# This invokes charpos() function.
+select insert(_utf32 0x000000610000006200000063,10,2,_utf32 0x000000640000006500000066);
+select insert(_utf32 0x000000610000006200000063,1,2,_utf32 0x000000640000006500000066);
+
+#######################################################
+
+#
+# Bug 1264
+#
+# Description: 
+#
+# When using a ucs2 table in MySQL, 
+# either with ucs2_general_ci or ucs2_bin collation,
+# words are returned in an incorrect order when using ORDER BY
+# on an _indexed_ CHAR or VARCHAR column. They are sorted with
+# the longest word *first* instead of last. I.E. The word "aardvark"
+# is in the results before the word "a".
+#
+# If there is no index for the column, the problem does not occur.
+#
+# Interestingly, if there is no second column, the words are returned
+# in the correct order. 
+#
+# According to EXPLAIN, it looks like when the output includes columns that
+# are not part of the index sorted on, it does a filesort, which fails. 
+# Using a straight index yields correct results.
+
+SET NAMES latin1;
+
+#
+# Two fields, index
+#
+
+CREATE TABLE t1 (
+   word VARCHAR(64),
+   bar INT(11) default 0,
+   PRIMARY KEY (word))
+   ENGINE=MyISAM
+   CHARSET utf32
+   COLLATE utf32_general_ci ;
+
+INSERT INTO t1 (word) VALUES ("aar");
+INSERT INTO t1 (word) VALUES ("a");
+INSERT INTO t1 (word) VALUES ("aardvar");
+INSERT INTO t1 (word) VALUES ("aardvark");
+INSERT INTO t1 (word) VALUES ("aardvara");
+INSERT INTO t1 (word) VALUES ("aardvarz");
+EXPLAIN SELECT * FROM t1 ORDER BY word;
+SELECT * FROM t1 ORDER BY word;
+EXPLAIN SELECT word FROM t1 ORDER BY word;
+SELECT word FROM t1 ORDER by word;
+DROP TABLE t1;
+
+
+#
+# One field, index
+# 
+
+CREATE TABLE t1 (
+   word VARCHAR(64) ,
+   PRIMARY KEY (word))
+   ENGINE=MyISAM
+   CHARSET utf32
+   COLLATE utf32_general_ci;
+
+INSERT INTO t1 (word) VALUES ("aar");
+INSERT INTO t1 (word) VALUES ("a");
+INSERT INTO t1 (word) VALUES ("aardvar");
+INSERT INTO t1 (word) VALUES ("aardvark");
+INSERT INTO t1 (word) VALUES ("aardvara");
+INSERT INTO t1 (word) VALUES ("aardvarz");
+EXPLAIN SELECT * FROM t1 ORDER BY WORD;
+SELECT * FROM t1 ORDER BY word;
+DROP TABLE t1;
+
+
+#
+# Two fields, no index
+#
+
+CREATE TABLE t1 (
+   word TEXT,
+   bar INT(11) AUTO_INCREMENT,
+   PRIMARY KEY (bar))
+   ENGINE=MyISAM
+   CHARSET utf32
+   COLLATE utf32_general_ci ;
+INSERT INTO t1 (word) VALUES ("aar");
+INSERT INTO t1 (word) VALUES ("a" );
+INSERT INTO t1 (word) VALUES ("aardvar");
+INSERT INTO t1 (word) VALUES ("aardvark");
+INSERT INTO t1 (word) VALUES ("aardvara");
+INSERT INTO t1 (word) VALUES ("aardvarz");
+EXPLAIN SELECT * FROM t1 ORDER BY word;
+SELECT * FROM t1 ORDER BY word;
+EXPLAIN SELECT word FROM t1 ORDER BY word;
+SELECT word FROM t1 ORDER BY word;
+DROP TABLE t1;
+
+#
+# END OF Bug 1264 test
+#
+########################################################
+
+
+#
+# Check alignment for from-binary-conversion with CAST and CONVERT
+#
+SELECT hex(cast(0xAA as char character set utf32));
+SELECT hex(convert(0xAA using utf32));
+
+#
+# Check alignment for string types
+#
+CREATE TABLE t1 (a char(10) character set utf32);
+INSERT INTO t1 VALUES (0x1),(0x11),(0x111),(0x1111),(0x11111);
+SELECT HEX(a) FROM t1;
+DROP TABLE t1;
+
+CREATE TABLE t1 (a varchar(10) character set utf32);
+INSERT INTO t1 VALUES (0x1),(0x11),(0x111),(0x1111),(0x11111);
+SELECT HEX(a) FROM t1;
+DROP TABLE t1;
+
+CREATE TABLE t1 (a text character set utf32);
+INSERT INTO t1 VALUES (0x1),(0x11),(0x111),(0x1111),(0x11111);
+SELECT HEX(a) FROM t1;
+DROP TABLE t1;
+
+CREATE TABLE t1 (a mediumtext character set utf32);
+INSERT INTO t1 VALUES (0x1),(0x11),(0x111),(0x1111),(0x11111);
+SELECT HEX(a) FROM t1;
+DROP TABLE t1;
+
+CREATE TABLE t1 (a longtext character set utf32);
+INSERT INTO t1 VALUES (0x1),(0x11),(0x111),(0x1111),(0x11111);
+SELECT HEX(a) FROM t1;
+DROP TABLE t1;
+
+##
+## Bug #5024 Server crashes with queries on fields
+##  with certain charset/collation settings
+##
+#
+#create table t1 (s1 char character set `ucs2` collate `ucs2_czech_ci`);
+#insert into t1 values ('0'),('1'),('2'),('a'),('b'),('c');
+#select s1 from t1 where s1 > 'a' order by s1;
+#drop table t1;
+
+#
+# Bug #5081 : UCS2 fields are filled with '0x2020'
+# after extending field length
+#
+create table t1(a char(1)) default charset utf32;
+insert into t1 values ('a'),('b'),('c');
+alter table t1 modify a char(5);
+select a, hex(a) from t1;
+drop table t1;
+
+#
+# Check prepare statement from an UTF32 string
+#
+set @ivar= 1234;
+set @str1 = 'select ?';
+set @str2 = convert(@str1 using utf32);
+prepare stmt1 from @str2;
+execute stmt1 using @ivar;
+
+#
+# Check that utf32 works with ENUM and SET type
+#
+set names utf8;
+create table t1 (a enum('x','y','z') character set utf32);
+show create table t1;
+insert into t1 values ('x');
+insert into t1 values ('y');
+insert into t1 values ('z');
+select a, hex(a) from t1 order by a;
+alter table t1 change a a enum('x','y','z','d','e','ä','ö','ü') character set utf32;
+show create table t1;
+insert into t1 values ('D');
+insert into t1 values ('E ');
+insert into t1 values ('ä');
+insert into t1 values ('ö');
+insert into t1 values ('ü');
+select a, hex(a) from t1 order by a;
+drop table t1;
+
+create table t1 (a set ('x','y','z','ä','ö','ü') character set utf32);
+show create table t1;
+insert into t1 values ('x');
+insert into t1 values ('y');
+insert into t1 values ('z');
+insert into t1 values ('x,y');
+insert into t1 values ('x,y,z,ä,ö,ü');
+select a, hex(a) from t1 order by a;
+drop table t1;
+
+#
+# Bug#7302 UCS2 data in ENUM fields get truncated when new column is added
+#
+create table t1(a enum('a','b','c')) default character set utf32;
+insert into t1 values('a'),('b'),('c');
+alter table t1 add b char(1);
+show warnings;
+select * from t1 order by a;
+drop table t1;
+
+SET NAMES latin1;
+SET collation_connection='utf32_general_ci';
+-- source include/ctype_filesort.inc
+-- source include/ctype_like_escape.inc
+SET NAMES latin1;
+SET collation_connection='utf32_bin';
+-- source include/ctype_filesort.inc
+-- source include/ctype_filesort2.inc
+-- source include/ctype_like_escape.inc
+
+#
+# Bug#10344 Some string functions fail for UCS2
+#
+select hex(substr(_utf32 0x000000e4000000e500000068,1));
+select hex(substr(_utf32 0x000000e4000000e500000068,2));
+select hex(substr(_utf32 0x000000e4000000e500000068,3));
+select hex(substr(_utf32 0x000000e4000000e500000068,-1));
+select hex(substr(_utf32 0x000000e4000000e500000068,-2));
+select hex(substr(_utf32 0x000000e4000000e500000068,-3));
+
+#SET NAMES latin1;
+#
+# Bug#8235
+#
+# This bug also helped to find another problem that
+# INSERT of a UCS2 string containing a negative number
+# into a unsigned int column didn't produce warnings.
+# This test covers both problems.
+#
+#SET collation_connection='ucs2_swedish_ci';
+#CREATE TABLE t1 (Field1 int(10) default '0');
+## no warnings, negative numbers are allowed
+#INSERT INTO t1 VALUES ('-1');
+#SELECT * FROM t1;
+#DROP TABLE t1;
+#CREATE TABLE t1 (Field1 int(10) unsigned default '0');
+## this should generate a "Data truncated" warning
+#INSERT INTO t1 VALUES ('-1');
+#DROP TABLE t1;
+#SET NAMES latin1;
+
+#
+##
+## Bug#18691 Converting number to UNICODE string returns invalid result
+##
+#SELECT CONVERT(103, CHAR(50) UNICODE);
+#SELECT CONVERT(103.0, CHAR(50) UNICODE);
+#SELECT CONVERT(-103, CHAR(50) UNICODE);
+#SELECT CONVERT(-103.0, CHAR(50) UNICODE);
+
+#
+# Bug#9557 MyISAM utf8 table crash
+#
+CREATE TABLE t1 (
+  a varchar(250) NOT NULL default '',
+  KEY a (a)
+) ENGINE=MyISAM DEFAULT CHARSET=utf32 COLLATE utf32_general_ci;
+insert into t1 values (0x803d);
+insert into t1 values (0x005b);
+select hex(a) from t1;
+drop table t1;
+
+##
+## Bug #14583 Bug on query using a LIKE on indexed field with ucs2_bin collation
+##
+#--disable_warnings
+#create table t1(f1 varchar(5) CHARACTER SET utf32 COLLATE utf32_bin NOT NULL) engine=InnoDB;
+#--enable_warnings
+#insert into t1 values('a');
+#create index t1f1 on t1(f1);
+#select f1 from t1 where f1 like 'a%';
+#drop table t1;
+
+#
+# Bug#9442 Set parameter make query fail if column character set is UCS2
+#
+create table t1 (utext varchar(20) character set utf32);
+insert into t1 values ("lily");
+insert into t1 values ("river");
+prepare stmt from 'select utext from t1 where utext like ?';
+set @param1='%%';
+execute stmt using @param1;
+execute stmt using @param1;
+select utext from t1 where utext like '%%';
+drop table t1;
+deallocate prepare stmt;
+
+#
+# Bug#22052 Trailing spaces are not removed from UNICODE fields in an index
+#
+create table t1 (
+  a char(10) character set utf32 not null, 
+  index a (a)
+) engine=myisam;
+insert into t1 values (repeat(0x0000201f, 10));
+insert into t1 values (repeat(0x00002020, 10));
+insert into t1 values (repeat(0x00002021, 10));
+# make sure "index read" is used
+explain select hex(a) from t1 order by a;
+select hex(a) from t1 order by a;
+alter table t1 drop index a;
+select hex(a) from t1 order by a;
+drop table t1;
+
+#
+# Bug #20076: server crashes for a query with GROUP BY if MIN/MAX aggregation
+#             over a 'ucs2' field uses a temporary table 
+#
+#CREATE TABLE t1 (id int, s char(5) CHARACTER SET ucs2 COLLATE ucs2_unicode_ci);
+#INSERT INTO t1 VALUES (1, 'ZZZZZ'), (1, 'ZZZ'), (2, 'ZZZ'), (2, 'ZZZZZ');
+#SELECT id, MIN(s) FROM t1 GROUP BY id;
+#DROP TABLE t1;
+
+##
+## Bug #20536: md5() with GROUP BY and UCS2 return different results on myisam/innodb
+##
+#
+#--disable_warnings
+#drop table if exists bug20536;
+#--enable_warnings
+#
+#set names latin1;
+#create table bug20536 (id bigint not null auto_increment primary key, name
+#varchar(255) character set ucs2 not null);
+#insert into `bug20536` (`id`,`name`) values (1, _latin1 x'7465737431'), (2, "'test\\_2'");
+#select md5(name) from bug20536;
+#select sha1(name) from bug20536;
+#select make_set(3, name, upper(name)) from bug20536;
+#select export_set(5, name, upper(name)) from bug20536;
+#select export_set(5, name, upper(name), ",", 5) from bug20536;
+
+#
+# Bug #20108: corrupted default enum value for a ucs2 field              
+#
+
+CREATE TABLE t1 (
+  status enum('active','passive') character set utf32 collate utf32_general_ci 
+    NOT NULL default 'passive'
+);
+SHOW CREATE TABLE t1;
+ALTER TABLE t1 ADD a int NOT NULL AFTER status; 
+SHOW CREATE TABLE t1;
+DROP TABLE t1;
+
+#CREATE TABLE t2 (
+#  status enum('active','passive') collate ucs2_turkish_ci 
+#    NOT NULL default 'passive'
+#);
+#SHOW CREATE TABLE t2;
+#ALTER TABLE t2 ADD a int NOT NULL AFTER status; 
+#DROP TABLE t2;
+
+
+## Some broken functions:  add these tests just to document current behavior.
+#
+## PASSWORD and OLD_PASSWORD don't work with UCS2 strings, but to fix it would
+## not be backwards compatible in all cases, so it's best to leave it alone
+#select password(name) from bug20536;
+#select old_password(name) from bug20536;
+#
+## Disable test case as encrypt relies on 'crypt' function.
+## "decrypt" is noramlly tested in func_crypt.test which have a
+## "have_crypt.inc" test
+#--disable_parsing
+## ENCRYPT relies on OS function crypt() which takes a NUL-terminated string; it
+## doesn't return good results for strings with embedded 0 bytes.  It won't be
+## fixed unless we choose to re-implement the crypt() function ourselves to take
+## an extra size_t string_length argument.
+#select encrypt(name, 'SALT') from bug20536;
+#--enable_parsing
+#
+## QUOTE doesn't work with UCS2 data.  It would require a total rewrite
+## of Item_func_quote::val_str(), which isn't worthwhile until UCS2 is
+## supported fully as a client character set.
+#select quote(name) from bug20536;
+#
+#drop table bug20536;
+#
+--echo End of 4.1 tests
+
+
+#
+# Conversion from an UTF32 string to a decimal column
+#
+CREATE TABLE t1 (a varchar(64) character set utf32, b decimal(10,3));
+INSERT INTO t1 VALUES ("1.1", 0), ("2.1", 0);
+update t1 set b=a;
+SELECT *, hex(a) FROM t1;
+DROP TABLE t1;
+
+#
+# Bug#9442 Set parameter make query fail if column character set is UCS2
+#
+create table t1 (utext varchar(20) character set utf32);
+insert into t1 values ("lily");
+insert into t1 values ("river");
+prepare stmt from 'select utext from t1 where utext like ?';
+set @param1='%%';
+execute stmt using @param1;
+execute stmt using @param1;
+select utext from t1 where utext like '%%';
+drop table t1;
+deallocate prepare stmt;
+
+#
+# Bug#22638 SOUNDEX broken for international characters
+#
+set names latin1;
+set character_set_connection=utf32;
+select soundex(''),soundex('he'),soundex('hello all folks'),soundex('#3556 in bugdb');
+select hex(soundex('')),hex(soundex('he')),hex(soundex('hello all folks')),hex(soundex('#3556 in bugdb'));
+select 'mood' sounds like 'mud';
+# Cyrillic A, BE, VE
+select hex(soundex(_utf32 0x000004100000041100000412));
+# Make sure that "U+00BF INVERTED QUESTION MARK" is not considered as letter
+select hex(soundex(_utf32 0x000000BF000000C0));
+set names latin1;
+
+#
+# Bug #14290: character_maximum_length for text fields
+#
+create table t1(a blob, b text charset utf32);
+select data_type, character_octet_length, character_maximum_length
+  from information_schema.columns where table_name='t1';
+drop table t1;
+
+
+set names latin1;
+set collation_connection=utf32_general_ci;
+#
+# Testing cs->coll->instr()
+#
+select position('bb' in 'abba');
+
+#
+# Testing cs->coll->hash_sort()
+#
+create table t1 (a varchar(10) character set utf32) engine=heap;
+insert into t1 values ('a'),('A'),('b'),('B');
+select * from t1 where a='a' order by binary a;
+select hex(min(binary a)),count(*) from t1 group by a;
+drop table t1;
+
+#
+# Testing cs->cset->numchars()
+#
+select char_length('abcd'), octet_length('abcd');
+
+#
+# Testing cs->cset->charpos()
+#
+select left('abcd',2);
+
+#
+# Testing cs->cset->well_formed_length()
+#
+create table t1 (a varchar(10) character set utf32);
+insert into t1 values (_utf32 0x0010FFFF);
+--error ER_INVALID_CHARACTER_STRING
+insert into t1 values (_utf32 0x00110000);
+--error ER_INVALID_CHARACTER_STRING
+insert into t1 values (_utf32 0x00110101);
+--error ER_INVALID_CHARACTER_STRING
+insert into t1 values (_utf32 0x01000101);
+--error ER_INVALID_CHARACTER_STRING
+insert into t1 values (_utf32 0x11000101);
+select hex(a) from t1;
+drop table t1;
+
+#
+# Bug#32914 Character sets: illegal characters in utf8 and utf32 columns
+#
+create table t1 (utf32 varchar(2) character set utf32);
+--echo Wrong character with pad
+insert ignore into t1 values (0x110000);
+--echo Wrong chsaracter without pad
+insert ignore into t1 values (0x00110000);
+--echo Wrong character with pad followed by another wrong character
+insert ignore into t1 values (0x11000000110000);
+--echo Good character with pad followed by bad character
+insert ignore into t1 values (0x10000000110000);
+--echo Good character without pad followed by bad character
+insert ignore into t1 values (0x0010000000110000);
+--echo Wrong character with the second byte higher than 0x10
+insert ignore into t1 values (0x00800037);
+--echo Wrong character with pad with the second byte higher than 0x10
+insert ignore into t1 values (0x00800037);
+drop table t1;
+
+#
+# Bug#32394 Character sets: crash if comparison with 0xfffd
+#
+select _utf32'a' collate utf32_general_ci = 0xfffd;
+select hex(concat(_utf32 0x0410 collate utf32_general_ci, 0x61));
+create table t1 (s1 varchar(5) character set utf32);
+insert into t1 values (0xfffd);
+select case when s1 = 0xfffd then 1 else 0 end from t1;
+select hex(s1) from t1 where s1 = 0xfffd;
+drop table t1;
+
+#
+# Testing cs->cset->lengthsp()
+#
+create table t1 (a char(10)) character set utf32;
+insert into t1 values ('a   ');
+select hex(a) from t1;
+drop table t1;
+
+#
+# Testing cs->cset->caseup() and cs->cset->casedn()
+#
+select upper('abcd'), lower('ABCD');
+
+#
+# TODO: str_to_datetime() is broken and doesn't work with ucs2 and utf32
+# Testing cs->cset->snprintf()
+#
+#create table t1 (a date);
+#insert into t1 values ('2007-09-16');
+#select * from t1;
+#drop table t1;
+
+#
+# Testing cs->cset->l10tostr
+# !!! Not used in the code
+
+#
+# Testing cs->cset->ll10tostr
+#
+create table t1 (a varchar(10) character set utf32);
+insert into t1 values (123456);
+select a, hex(a) from t1;
+drop table t1;
+
+#
+# Testing cs->cset->fill
+# SOUNDEX fills strings with DIGIT ZERO up to four characters
+select hex(soundex('a'));
+
+#
+# Testing cs->cset->strntol
+# !!! Not used in the code
+
+#
+# Testing cs->cset->strntoul
+#
+create table t1 (a enum ('a','b','c')) character set utf32;
+insert into t1 values ('1');
+select * from t1;
+drop table t1;
+
+#
+# Testing cs->cset->strntoll and cs->cset->strntoull
+#
+set names latin1;
+select hex(conv(convert('123' using utf32), -10, 16));
+select hex(conv(convert('123' using utf32), 10, 16));
+
+#
+# Testing cs->cset->strntod
+#
+set names latin1;
+set character_set_connection=utf32;
+select 1.1 + '1.2';
+select 1.1 + '1.2xxx';
+
+# Testing strntoll10_utf32
+# Testing cs->cset->strtoll10
+select left('aaa','1');
+--source include/ctype_strtoll10.inc
+
+#
+# Testing cs->cset->strntoull10rnd
+#
+create table t1 (a int);
+insert into t1 values ('-1234.1e2');
+insert ignore into t1 values ('-1234.1e2xxxx');
+insert into t1 values ('-1234.1e2    ');
+select * from t1;
+drop table t1;
+
+#
+# Testing cs->cset->scan
+#
+create table t1 (a int);
+insert into t1 values ('1 ');
+insert ignore into t1 values ('1 x');
+select * from t1;
+drop table t1;
+
+#
+# Testing auto-conversion to TEXT
+#
+SET STATEMENT sql_mode = 'NO_ENGINE_SUBSTITUTION' FOR
+create table t1 (a varchar(17000) character set utf32);
+show create table t1;
+drop table t1;
+
+#
+# Testing that maximim possible key length is 1332 bytes
+#
+create table t1 (a varchar(250) character set utf32 primary key);
+show create table t1;
+drop table t1;
+--error ER_TOO_LONG_KEY
+create table t1 (a varchar(334) character set utf32 primary key);
+
+#
+# Testing mi_check with long key values
+#
+create table t1 (a varchar(333) character set utf32, key(a));
+insert into t1 values (repeat('a',333)), (repeat('b',333));
+flush tables;
+check table t1;
+drop table t1;
+
+#
+# Test how character set works with date/time
+#
+SET collation_connection=utf32_general_ci;
+--source include/ctype_datetime.inc
+SET NAMES latin1;
+
+#
+# Test basic regex functionality
+#
+set collation_connection=utf32_general_ci;
+--source include/ctype_regex.inc
+set names latin1;
+
+
+# TODO: add tests for all engines
+
+#
+# Bug #36418 Character sets: crash if char(256 using utf32)
+#
+select hex(char(0x01 using utf32));
+select hex(char(0x0102 using utf32));
+select hex(char(0x010203 using utf32));
+select hex(char(0x01020304 using utf32));
+create table t1 (s1 varchar(1) character set utf32, s2 text character set utf32);
+create index i on t1 (s1);
+insert into t1 values (char(256 using utf32), char(256 using utf32));
+select hex(s1), hex(s2) from t1;
+drop table t1;
+
+
+#
+# Bug#33073 Character sets: ordering fails with utf32
+#
+SET collation_connection=utf32_general_ci;
+CREATE TABLE t1 AS SELECT repeat('a',2) as s1 LIMIT 0;
+SHOW CREATE TABLE t1;
+INSERT INTO t1 VALUES ('ab'),('AE'),('ab'),('AE');
+SELECT * FROM t1 ORDER BY s1;
+SET max_sort_length=4;
+SELECT * FROM t1 ORDER BY s1;
+DROP TABLE t1;
+SET max_sort_length=DEFAULT;
+SET NAMES latin1;
+
+--echo #
+--echo # Bug#52520 Difference in tinytext utf column metadata
+--echo #
+CREATE TABLE t1 (
+  s1 TINYTEXT CHARACTER SET utf32,
+  s2 TEXT CHARACTER SET utf32,
+  s3 MEDIUMTEXT CHARACTER SET utf32,
+  s4 LONGTEXT CHARACTER SET utf32
+);
+--enable_metadata
+SET NAMES utf8mb4, @@character_set_results=NULL;
+SELECT *, HEX(s1) FROM t1;
+SET NAMES latin1;
+SELECT *, HEX(s1) FROM t1;
+SET NAMES utf8mb4;
+SELECT *, HEX(s1) FROM t1;
+--disable_metadata
+CREATE TABLE t2 AS SELECT CONCAT(s1) FROM t1;
+SHOW CREATE TABLE t2;
+DROP TABLE t1, t2;
+
+
+--echo #
+--echo # Bug#45263 utf32_general_ci, bad effects around CREATE TABLE AS SELECT
+--echo #
+SET collation_connection=utf32_general_ci;
+CREATE TABLE t1 AS SELECT HEX(0x00) AS my_col;
+SELECT * FROM t1;
+DROP TABLE t1;
+
+--echo #
+--echo # Bug#32859 Character sets: no warning with non-fitting chariot wheel
+--echo #
+CREATE TABLE t1 (utf32 CHAR(5) CHARACTER SET utf32, latin1 CHAR(5) CHARACTER SET latin1);
+INSERT INTO t1 (utf32) VALUES (0xc581);
+UPDATE IGNORE t1 SET latin1 = utf32;
+DELETE FROM t1;
+INSERT INTO t1 (utf32) VALUES (0x100cc);
+UPDATE IGNORE t1 SET latin1 = utf32;
+DROP TABLE t1;
+
+--echo #
+--echo # Bug#55912 FORMAT with locale set fails for numbers < 1000
+--echo #
+SET collation_connection=utf32_general_ci;
+CREATE TABLE t1 AS SELECT format(123,2,'no_NO');
+SHOW CREATE TABLE t1;
+SELECT * FROM t1;
+DROP TABLE t1;
+
+--echo #
+--echo # Bug#42511 mysqld: ctype-ucs2.c:2044: my_strnncollsp_utf32: Assertion (tlen % 4) == 0' faied
+--echo #
+CREATE TABLE t1 (
+ b char(250) CHARACTER SET utf32,
+ key (b)
+) ENGINE=MYISAM;
+INSERT INTO t1 VALUES ('d'),('f');
+SELECT * FROM t1 WHERE b BETWEEN 'a' AND 'z';
+DROP TABLE t1;
+
+--echo #
+--echo # Bug#11753363 (Bug#44793) Character sets: case clause, ucs2 or utf32, failure
+--echo #
+SELECT CASE _latin1'a' WHEN _utf32'a' THEN 'A' END;
+SELECT CASE _utf32'a' WHEN _latin1'a' THEN 'A' END;
+CREATE TABLE t1 (s1 CHAR(5) CHARACTER SET utf32);
+INSERT INTO t1 VALUES ('a');
+SELECT CASE s1 WHEN 'a' THEN 'b' ELSE 'c' END FROM t1;
+DROP TABLE t1;
+
+--echo #
+--echo # Bug#11750518 41090: ORDER BY TRUNCATES GROUP_CONCAT RESULT
+--echo #
+
+SET NAMES utf8, @@character_set_connection=utf32;
+SET STATEMENT group_concat_max_len=1024 FOR
+SELECT id, CHAR_LENGTH(GROUP_CONCAT(body)) AS l
+FROM (SELECT 'a' AS id, REPEAT('foo bar', 100) AS body
+UNION ALL
+SELECT 'a' AS id, REPEAT('bla bla', 100) AS body) t1
+GROUP BY id
+ORDER BY l DESC;
+
+--echo #
+--echo # incorrect charset for val_str_ascii
+--echo #
+
+SELECT '2010-10-10 10:10:10' + INTERVAL GeometryType(GeomFromText('POINT(1 1)')) hour_second;
+
+--echo #
+--echo # MDEV-5745 analyze MySQL fix for bug#12368495
+--echo #
+SELECT CHAR_LENGTH(TRIM(LEADING 0x0000000000 FROM _utf32 0x00000061));
+SELECT CHAR_LENGTH(TRIM(LEADING 0x0001 FROM _utf32 0x00000061));
+SELECT CHAR_LENGTH(TRIM(LEADING 0x00 FROM _utf32 0x00000061));
+
+SELECT CHAR_LENGTH(TRIM(TRAILING 0x0000000000 FROM _utf32 0x00000061));
+SELECT CHAR_LENGTH(TRIM(TRAILING 0x0001 FROM _utf32 0x00000061));
+SELECT CHAR_LENGTH(TRIM(TRAILING 0x61 FROM _utf32 0x00000061));
+
+SELECT CHAR_LENGTH(TRIM(BOTH 0x0000000000 FROM _utf32 0x00000061));
+SELECT CHAR_LENGTH(TRIM(BOTH 0x0001 FROM _utf32 0x00000061));
+SELECT CHAR_LENGTH(TRIM(BOTH 0x61 FROM _utf32 0x00000061));
+SELECT CHAR_LENGTH(TRIM(BOTH 0x00 FROM _utf32 0x00000061));
+
+#
+# potential signedness issue
+#
+select hex(lower(cast(0xffff0000 as char character set utf32))) as c;
+
+--echo #
+--echo # MDEV-11685: sql_mode can't be set with non-ascii connection charset
+--echo #
+SET character_set_connection=utf32;
+SET sql_mode='NO_ENGINE_SUBSTITUTION';
+SELECT @@sql_mode;
+SET sql_mode=DEFAULT;
+SET NAMES utf8;
+
+--echo #
+--echo # MDEV-13530 VARBINARY doesn't convert to to BLOB for sizes 65533, 65534 and 65535
+--echo #
+
+set sql_mode='';
+CREATE TABLE t1 (c1 VARCHAR(16383) CHARACTER SET utf32);
+DESCRIBE t1;
+DROP TABLE t1;
+
+CREATE TABLE t1 (c1 VARCHAR(16384) CHARACTER SET utf32);
+DESCRIBE t1;
+DROP TABLE t1;
+set sql_mode=default;
+
+
+--echo #
+--echo # End of 5.5 tests
+--echo #
+
+
+--echo #
+--echo # Start of 5.6 tests
+--echo #
+
+--echo #
+--echo # WL#3664 WEIGHT_STRING
+--echo #
+
+set collation_connection=utf32_general_ci;
+--source include/weight_string.inc
+--source include/weight_string_euro.inc
+select hex(weight_string(_utf32 0x10000));
+select hex(weight_string(_utf32 0x10001));
+--source include/weight_string_l1.inc
+
+set collation_connection=utf32_bin;
+--source include/weight_string.inc
+--source include/weight_string_euro.inc
+--source include/weight_string_l1.inc
+
+--echo #
+--echo # End of 5.6 tests
+--echo #
+
+
+--echo #
+--echo # Start of 10.0 tests
+--echo #
+
+--echo #
+--echo # MDEV-6661 PI() does not work well in UCS2/UTF16/UTF32 context
+--echo #
+SELECT CONCAT(CONVERT('pi=' USING utf32),PI()) AS PI;
+
+--echo #
+--echo # MDEV-6666 Malformed result for CONCAT(utf8_column, binary_string)
+--echo #
+
+SET NAMES utf8mb4;
+CREATE TABLE t1 (a VARCHAR(10) CHARACTER SET utf32);
+INSERT INTO t1 VALUES ('a');
+--error ER_INVALID_CHARACTER_STRING
+SELECT CONCAT(a,0x20FFFF) FROM t1;
+SELECT CONCAT(a,0x010000) FROM t1;
+SELECT CONCAT(a,0x00FF) FROM t1;
+DROP TABLE t1;
+--error ER_INVALID_CHARACTER_STRING
+SELECT CONCAT(_utf32'a' COLLATE utf32_unicode_ci, _binary 0x20FFFF);
+PREPARE stmt FROM "SELECT CONCAT(_utf32'a' COLLATE utf32_unicode_ci, ?)";
+SET @arg00=_binary 0x20FFFF;
+--error ER_INVALID_CHARACTER_STRING
+EXECUTE stmt USING @arg00;
+SET @arg00=_binary 0x010000;
+EXECUTE stmt USING @arg00;
+SET @arg00=_binary 0x00FF;
+EXECUTE stmt USING @arg00;
+DEALLOCATE PREPARE stmt;
+
+--echo #
+--echo # End of 10.0 tests
+--echo #
+
+--echo #
+--echo # Start of 10.1 tests
+--echo #
+
+--echo #
+--echo # MDEV-8419 utf32: compare broken bytes as "greater than any non-broken character"
+--echo #
+# Make sure that all non-BMP characters are compared as equal
+CREATE TABLE t1 (a VARCHAR(10) CHARACTER SET utf32, KEY(a));
+INSERT INTO t1 VALUES (0x10000),(0x10001),(0x10002);
+SELECT COUNT(DISTINCT a) FROM t1;
+DROP TABLE t1;
+SELECT _utf32 0x10001=_utf32 0x10002;
+
+--echo #
+--echo # MDEV-9178 Wrong result for CAST(CONVERT('1Ĳ3' USING ucs2) AS SIGNED)
+--echo #
+SET NAMES utf8;
+SELECT CAST(CONVERT('1Ĳ3' USING utf32) AS SIGNED);
+
+--echo #
+--echo # End of 10.1 tests
+--echo #
+
+--echo #
+--echo # Start of 10.2 tests
+--echo #
+
+--echo #
+--echo # MDEV-9711 NO PAD Collatons
+--echo #
+SET character_set_connection=utf32;
+let $coll='utf32_general_nopad_ci';
+let $coll_pad='utf32_general_ci';
+--source include/ctype_pad_all_engines.inc
+
+let $coll='utf32_nopad_bin';
+let $coll_pad='utf32_bin';
+--source include/ctype_pad_all_engines.inc
+
+--echo #
+--echo # End of 10.2 tests
+--echo #