diff options
author | Alexander Barkov <bar@mysql.com> | 2010-02-24 13:15:34 +0400 |
---|---|---|
committer | Alexander Barkov <bar@mysql.com> | 2010-02-24 13:15:34 +0400 |
commit | 8994fad85db18b4ab31fc67e2f8e15f1203d0b1a (patch) | |
tree | 469ee723904a6610a4436b55056ed5e2c19d9972 | |
parent | d2af6c43c0f7b62a6051d0c5f74c66f98dbefe7a (diff) | |
download | mariadb-git-8994fad85db18b4ab31fc67e2f8e15f1203d0b1a.tar.gz |
Backporting WL#1213
config/ac-macros/character_sets.m4:
- Adding configure definitions for utf8mb4, utf16, utf32
include/config-win.h:
- Enabling utf8mb4, utf16, utf32 in Windows build
include/m_ctype.h:
- Adding new flags
- Adding new shared functions prototypes
mysql-test/include/ctype_datetime.inc:
- Adding test to check that datetime functions
work with "real" multibyte character sets.
mysql-test/include/ctype_like.inc:
- Adding LIKE tests
mysql-test/include/have_utf16.inc:
New file
mysql-test/include/have_utf32.inc:
New file
mysql-test/include/have_utf8mb4.inc:
New file
mysql-test/r/ctype_ldml.result:
- Adding tests for utf8mb4, utf16, utf32
mysql-test/r/ctype_many.result:
- Adding tests to check superset/subset relations
between all Unicode character sets.
mysql-test/r/ctype_utf16.result:
New file
mysql-test/r/ctype_utf16_uca.result:
New file
mysql-test/r/ctype_utf32.result:
New file
mysql-test/r/ctype_utf32_uca.result:
New file
mysql-test/r/ctype_utf8.result:
- Adding tests for utf8mn3 alias
mysql-test/r/ctype_utf8mb4.result:
- Adding tests for utf8mb4
mysql-test/r/have_utf16.require:
New file
mysql-test/r/have_utf32.require:
New file
mysql-test/r/have_utf8mb4.require:
New file
mysql-test/std_data/Index.xml:
- Adding tests for loadable utf8m4, utf16, utf32 collations
mysql-test/suite/sys_vars/r/character_set_client_basic.result:
- Adding tests for utf16, utf32.
- Fixing new number of character sets
mysql-test/suite/sys_vars/r/character_set_connection_basic.result:
- Fixing new number of character sets
mysql-test/suite/sys_vars/r/character_set_database_basic.result:
- Fixing new number of character sets
mysql-test/suite/sys_vars/r/character_set_filesystem_basic.result:
- Fixing new number of character sets
mysql-test/suite/sys_vars/r/character_set_results_basic.result:
- Fixing new number of character sets
mysql-test/suite/sys_vars/t/character_set_client_basic.test:
- Adding tests for new character sets
mysql-test/suite/sys_vars/t/character_set_connection_basic.test:
- Adding dependency on utf8mb4, utf16, utf32
mysql-test/suite/sys_vars/t/character_set_database_basic.test:
- Adding dependency on utf8mb4, utf16, utf32
mysql-test/suite/sys_vars/t/character_set_filesystem_basic.test:
- Adding dependency on utf8mb4, utf16, utf32
mysql-test/suite/sys_vars/t/character_set_results_basic.test:
- Adding dependency on utf8mb4, utf16, utf32
mysql-test/t/ctype_ldml.test:
- Adding tests for dynamic utf8mb4, utf16, utf32 collations
mysql-test/t/ctype_many.test:
- Adding tests to check superset/subset relations
between all Unicode character sets
mysql-test/t/ctype_utf16.test:
New file
mysql-test/t/ctype_utf16_uca.test:
New file
mysql-test/t/ctype_utf32.test:
New file
mysql-test/t/ctype_utf32_uca.test:
New file
mysql-test/t/ctype_utf8.test:
- Adding tests for utf8mb4 alias
mysql-test/t/ctype_utf8mb4.test:
New file
mysys/charset-def.c:
- Adding initialization of utf8mb4, utf16, utf32 built-int collations
mysys/charset.c:
- Adding initialization of utf8mb4, utf16, utf32 dynamic collations
sql/field.cc:
- Fixing "truncated" error with datetime functions:
Force conversion in case of non-ascii character sets.
sql/item.cc:
- Adding superset/subset relation check for utf8mb4/utf8
sql/item_strfunc.cc:
- Fixing a problem with CHAR(x USING utf32)
sql/sql_string.cc:
- Fixing problems with zero padding for UTF32
sql/sql_table.cc:
- Fixing buffer size, to make utf32 comma fit.
strings/ctype-mb.c:
- Making handlers for multi-byte binary collations public
strings/ctype-uca.c:
- Adding definitions for utf8mb4, utf16, utf32 UCA collations
strings/ctype-ucs2.c:
- Adding functions which are shared between ucs2, utf16, utf32
- Ading utf16 implementation
- Adding utf32 implementation
strings/ctype-utf8.c:
- Adding functions shared between utf8 and utf8mb4
- Adding implementation of utf8mb4
49 files changed, 19031 insertions, 798 deletions
diff --git a/config/ac-macros/character_sets.m4 b/config/ac-macros/character_sets.m4 index 24bdd92b083..81967d383ec 100644 --- a/config/ac-macros/character_sets.m4 +++ b/config/ac-macros/character_sets.m4 @@ -13,11 +13,11 @@ define(CHARSETS_AVAILABLE1,armscii8 ascii big5 cp1250 cp1251 cp1256 cp1257) define(CHARSETS_AVAILABLE2,cp850 cp852 cp866 cp932 dec8 eucjpms euckr gb2312 gbk geostd8) define(CHARSETS_AVAILABLE3,greek hebrew hp8 keybcs2 koi8r koi8u) define(CHARSETS_AVAILABLE4,latin1 latin2 latin5 latin7 macce macroman) -define(CHARSETS_AVAILABLE5,sjis swe7 tis620 ucs2 ujis utf8) +define(CHARSETS_AVAILABLE5,sjis swe7 tis620 ucs2 ujis utf8mb4 utf8 utf16 utf32) DEFAULT_CHARSET=latin1 CHARSETS_AVAILABLE="CHARSETS_AVAILABLE0 CHARSETS_AVAILABLE1 CHARSETS_AVAILABLE2 CHARSETS_AVAILABLE3 CHARSETS_AVAILABLE4 CHARSETS_AVAILABLE5" -CHARSETS_COMPLEX="big5 cp1250 cp932 eucjpms euckr gb2312 gbk latin1 latin2 sjis tis620 ucs2 ujis utf8" +CHARSETS_COMPLEX="big5 cp1250 cp932 eucjpms euckr gb2312 gbk latin1 latin2 sjis tis620 ucs2 ujis utf8mb4 utf8 utf16 utf32" AC_DIVERT_POP @@ -50,7 +50,7 @@ AC_ARG_WITH(extra-charsets, AC_MSG_CHECKING("character sets") -CHARSETS="$default_charset latin1 utf8" +CHARSETS="$default_charset latin1 utf8mb4 utf8" if test "$extra_charsets" = no; then CHARSETS="$CHARSETS" @@ -195,8 +195,23 @@ do AC_DEFINE([USE_MB], [1], [Use multi-byte character routines]) AC_DEFINE(USE_MB_IDENT, 1) ;; + utf8mb4) + AC_DEFINE(HAVE_CHARSET_utf8mb4, 1, [Define to enable utf8mb4]) + AC_DEFINE([USE_MB], 1, [Use multi-byte character routines]) + AC_DEFINE(USE_MB_IDENT, 1) + ;; utf8) - AC_DEFINE(HAVE_CHARSET_utf8, 1, [Define to enable ut8]) + AC_DEFINE(HAVE_CHARSET_utf8, 1, [Define to enable utf8]) + AC_DEFINE([USE_MB], 1, [Use multi-byte character routines]) + AC_DEFINE(USE_MB_IDENT, 1) + ;; + utf16) + AC_DEFINE(HAVE_CHARSET_utf16, 1, [Define to enable utf16]) + AC_DEFINE([USE_MB], 1, [Use multi-byte character routines]) + AC_DEFINE(USE_MB_IDENT, 1) + ;; + utf32) + AC_DEFINE(HAVE_CHARSET_utf32, 1, [Define to enable utf32]) AC_DEFINE([USE_MB], 1, [Use multi-byte character routines]) AC_DEFINE(USE_MB_IDENT, 1) ;; @@ -381,6 +396,48 @@ case $default_charset in fi default_charset_collations="$UTFC" ;; + utf8mb4) + default_charset_default_collation="utf8mb4_general_ci" + define(UTFC1, utf8mb4_general_ci utf8mb4_bin) + define(UTFC2, utf8mb4_czech_ci utf8mb4_danish_ci) + define(UTFC3, utf8mb4_esperanto_ci utf8mb4_estonian_ci utf8mb4_hungarian_ci) + define(UTFC4, utf8mb4_icelandic_ci utf8mb4_latvian_ci utf8mb4_lithuanian_ci) + define(UTFC5, utf8mb4_persian_ci utf8mb4_polish_ci utf8mb4_romanian_ci) + define(UTFC6, utf8mb4_sinhala_ci utf8mb4_slovak_ci utf8mb4_slovenian_ci) + define(UTFC7, utf8mb4_spanish2_ci utf8mb4_spanish_ci) + define(UTFC8, utf8mb4_swedish_ci utf8mb4_turkish_ci) + define(UTFC9, utf8mb4_unicode_ci) + UTFC="UTFC1 UTFC2 UTFC3 UTFC4 UTFC5 UTFC6 UTFC7 UTFC8 UTFC9" + default_charset_collations="$UTFC" + ;; + utf16) + default_charset_default_collation="utf16_general_ci" + define(UTFC1, utf16_general_ci utf16_bin) + define(UTFC2, utf16_czech_ci utf16_danish_ci) + define(UTFC3, utf16_esperanto_ci utf16_estonian_ci utf16_hungarian_ci) + define(UTFC4, utf16_icelandic_ci utf16_latvian_ci utf16_lithuanian_ci) + define(UTFC5, utf16_persian_ci utf16_polish_ci utf16_romanian_ci) + define(UTFC6, utf16_sinhala_ci utf16_slovak_ci utf16_slovenian_ci) + define(UTFC7, utf16_spanish2_ci utf16_spanish_ci) + define(UTFC8, utf16_swedish_ci utf16_turkish_ci) + define(UTFC9, utf16_unicode_ci) + UTFC="UTFC1 UTFC2 UTFC3 UTFC4 UTFC5 UTFC6 UTFC7 UTFC8 UTFC9" + default_charset_collations="$UTFC" + ;; + utf32) + default_charset_default_collation="utf32_general_ci" + define(UTFC1, utf32_general_ci utf32_bin) + define(UTFC2, utf32_czech_ci utf32_danish_ci) + define(UTFC3, utf32_esperanto_ci utf32_estonian_ci utf32_hungarian_ci) + define(UTFC4, utf32_icelandic_ci utf32_latvian_ci utf32_lithuanian_ci) + define(UTFC5, utf32_persian_ci utf32_polish_ci utf32_romanian_ci) + define(UTFC6, utf32_sinhala_ci utf32_slovak_ci utf32_slovenian_ci) + define(UTFC7, utf32_spanish2_ci utf32_spanish_ci) + define(UTFC8, utf32_swedish_ci utf32_turkish_ci) + define(UTFC9, utf32_unicode_ci) + UTFC="UTFC1 UTFC2 UTFC3 UTFC4 UTFC5 UTFC6 UTFC7 UTFC8 UTFC9" + default_charset_collations="$UTFC" + ;; *) AC_MSG_ERROR([Charset $cs not available. (Available are: $CHARSETS_AVAILABLE). See the Installation chapter in the Reference Manual.]) diff --git a/include/config-win.h b/include/config-win.h index 57d4ed26307..269ec0e925a 100644 --- a/include/config-win.h +++ b/include/config-win.h @@ -432,6 +432,9 @@ inline ulonglong double2ulonglong(double d) #define HAVE_CHARSET_ucs2 1 #define HAVE_CHARSET_ujis 1 #define HAVE_CHARSET_utf8 1 +#define HAVE_CHARSET_utf8mb4 1 +#define HAVE_CHARSET_utf16 1 +#define HAVE_CHARSET_utf32 1 #define HAVE_UCA_COLLATIONS 1 #define HAVE_BOOL 1 diff --git a/include/m_ctype.h b/include/m_ctype.h index 7cf5ce113f8..d97c0c87b6e 100644 --- a/include/m_ctype.h +++ b/include/m_ctype.h @@ -98,13 +98,14 @@ extern MY_UNI_CTYPE my_uni_ctype[256]; #define MY_CS_BINSORT 16 /* if binary sort order */ #define MY_CS_PRIMARY 32 /* if primary collation */ #define MY_CS_STRNXFRM 64 /* if strnxfrm is used for sort */ -#define MY_CS_UNICODE 128 /* is a charset is full unicode */ +#define MY_CS_UNICODE 128 /* is a charset is BMP Unicode */ #define MY_CS_READY 256 /* if a charset is initialized */ #define MY_CS_AVAILABLE 512 /* If either compiled-in or loaded*/ #define MY_CS_CSSORT 1024 /* if case sensitive sort order */ #define MY_CS_HIDDEN 2048 /* don't display in SHOW */ #define MY_CS_PUREASCII 4096 /* if a charset is pure ascii */ #define MY_CS_NONASCII 8192 /* if not ASCII-compatible */ +#define MY_CS_UNICODE_SUPPLEMENT 16384 /* Non-BMP Unicode characters */ #define MY_CHARSET_UNDEFINED 0 /* Character repertoire flags */ @@ -112,7 +113,6 @@ extern MY_UNI_CTYPE my_uni_ctype[256]; #define MY_REPERTOIRE_EXTENDED 2 /* Extended characters: U+0080..U+FFFF */ #define MY_REPERTOIRE_UNICODE30 3 /* ASCII | EXTENDED: U+0000..U+FFFF */ - typedef struct my_uni_idx_st { uint16 from; @@ -304,10 +304,14 @@ typedef struct charset_info_st extern MYSQL_PLUGIN_IMPORT CHARSET_INFO my_charset_bin; +extern MYSQL_PLUGIN_IMPORT CHARSET_INFO my_charset_latin1; +extern MYSQL_PLUGIN_IMPORT CHARSET_INFO my_charset_filename; + extern CHARSET_INFO my_charset_big5_chinese_ci; extern CHARSET_INFO my_charset_big5_bin; extern CHARSET_INFO my_charset_cp932_japanese_ci; extern CHARSET_INFO my_charset_cp932_bin; +extern CHARSET_INFO my_charset_cp1250_czech_ci; extern CHARSET_INFO my_charset_eucjpms_japanese_ci; extern CHARSET_INFO my_charset_eucjpms_bin; extern CHARSET_INFO my_charset_euckr_korean_ci; @@ -316,7 +320,6 @@ extern CHARSET_INFO my_charset_gb2312_chinese_ci; extern CHARSET_INFO my_charset_gb2312_bin; extern CHARSET_INFO my_charset_gbk_chinese_ci; extern CHARSET_INFO my_charset_gbk_bin; -extern MYSQL_PLUGIN_IMPORT CHARSET_INFO my_charset_latin1; extern CHARSET_INFO my_charset_latin1_german2_ci; extern CHARSET_INFO my_charset_latin1_bin; extern CHARSET_INFO my_charset_latin2_czech_ci; @@ -329,11 +332,22 @@ extern CHARSET_INFO my_charset_ucs2_bin; extern CHARSET_INFO my_charset_ucs2_unicode_ci; extern CHARSET_INFO my_charset_ujis_japanese_ci; extern CHARSET_INFO my_charset_ujis_bin; +extern CHARSET_INFO my_charset_utf16_bin; +extern CHARSET_INFO my_charset_utf16_general_ci; +extern CHARSET_INFO my_charset_utf16_unicode_ci; +extern CHARSET_INFO my_charset_utf32_bin; +extern CHARSET_INFO my_charset_utf32_general_ci; +extern CHARSET_INFO my_charset_utf32_unicode_ci; + extern CHARSET_INFO my_charset_utf8_general_ci; extern CHARSET_INFO my_charset_utf8_unicode_ci; extern CHARSET_INFO my_charset_utf8_bin; -extern CHARSET_INFO my_charset_cp1250_czech_ci; -extern MYSQL_PLUGIN_IMPORT CHARSET_INFO my_charset_filename; +extern CHARSET_INFO my_charset_utf8mb4_bin; +extern CHARSET_INFO my_charset_utf8mb4_general_ci; +extern CHARSET_INFO my_charset_utf8mb4_unicode_ci; +#define MY_UTF8MB3 "utf8" +#define MY_UTF8MB4 "utf8mb4" + /* declarations for simple charsets */ extern size_t my_strnxfrm_simple(CHARSET_INFO *, uchar *, size_t, @@ -430,6 +444,19 @@ my_bool my_like_range_ucs2(CHARSET_INFO *cs, char *min_str, char *max_str, size_t *min_length, size_t *max_length); +my_bool my_like_range_utf16(CHARSET_INFO *cs, + const char *ptr, size_t ptr_length, + pbool escape, pbool w_one, pbool w_many, + size_t res_length, + char *min_str, char *max_str, + size_t *min_length, size_t *max_length); + +my_bool my_like_range_utf32(CHARSET_INFO *cs, + const char *ptr, size_t ptr_length, + pbool escape, pbool w_one, pbool w_many, + size_t res_length, + char *min_str, char *max_str, + size_t *min_length, size_t *max_length); int my_wildcmp_8bit(CHARSET_INFO *, const char *str,const char *str_end, @@ -480,6 +507,31 @@ uint my_instr_mb(struct charset_info_st *, const char *s, size_t s_length, my_match_t *match, uint nmatch); +int my_strnncoll_mb_bin(CHARSET_INFO * cs, + const uchar *s, size_t slen, + const uchar *t, size_t tlen, + my_bool t_is_prefix); + +int my_strnncollsp_mb_bin(CHARSET_INFO *cs, + const uchar *a, size_t a_length, + const uchar *b, size_t b_length, + my_bool diff_if_only_endspace_difference); + +int my_wildcmp_mb_bin(CHARSET_INFO *cs, + const char *str,const char *str_end, + const char *wildstr,const char *wildend, + int escape, int w_one, int w_many); + +int my_strcasecmp_mb_bin(CHARSET_INFO * cs __attribute__((unused)), + const char *s, const char *t); + +void my_hash_sort_mb_bin(CHARSET_INFO *cs __attribute__((unused)), + const uchar *key, size_t len,ulong *nr1, ulong *nr2); + +size_t my_strnxfrm_unicode(CHARSET_INFO *, + uchar *dst, size_t dstlen, + const uchar *src, size_t srclen); + int my_wildcmp_unicode(CHARSET_INFO *cs, const char *str, const char *str_end, const char *wildstr, const char *wildend, diff --git a/mysql-test/include/ctype_datetime.inc b/mysql-test/include/ctype_datetime.inc new file mode 100644 index 00000000000..dc70f1f38a9 --- /dev/null +++ b/mysql-test/include/ctype_datetime.inc @@ -0,0 +1,11 @@ +# +# Bug#32390 Character sets: casting utf32 to/from date doesn't work +# +CREATE TABLE t1 AS SELECT repeat('a',20) AS s1 LIMIT 0; +SET timestamp=1216359724; +INSERT INTO t1 VALUES (current_date); +INSERT INTO t1 VALUES (current_time); +INSERT INTO t1 VALUES (current_timestamp); +SELECT s1, hex(s1) FROM t1; +DROP TABLE t1; +SET timestamp=0; diff --git a/mysql-test/include/ctype_like.inc b/mysql-test/include/ctype_like.inc new file mode 100644 index 00000000000..38de0bf2671 --- /dev/null +++ b/mysql-test/include/ctype_like.inc @@ -0,0 +1,50 @@ +select @@collation_connection; + +# +# Create a table with a nullable varchar(10) column +# using currect character_set_connection. +create table t1 as select repeat(' ',10) as a union select null; +alter table t1 add key(a); +show create table t1; +insert into t1 values ("a"),("abc"),("abcd"),("hello"),("test"); +explain select * from t1 where a like 'abc%'; +explain select * from t1 where a like concat('abc','%'); +select * from t1 where a like "abc%"; +select * from t1 where a like concat("abc","%"); +select * from t1 where a like "ABC%"; +select * from t1 where a like "test%"; +select * from t1 where a like "te_t"; +select * from t1 where a like "%a%"; +select * from t1 where a like "%abcd%"; +select * from t1 where a like "%abc\d%"; +drop table t1; + +# +# Bug #2619 ucs2 LIKE comparison fails in some cases +# + +select 'AA' like 'AA'; +select 'AA' like 'A%A'; +select 'AA' like 'A%%A'; +select 'AA' like 'AA%'; +select 'AA' like '%AA%'; +select 'AA' like '%A'; +select 'AA' like '%AA'; +select 'AA' like 'A%A%'; +select 'AA' like '_%_%'; +select 'AA' like '%A%A'; +select 'AAA'like 'A%A%A'; + +select 'AZ' like 'AZ'; +select 'AZ' like 'A%Z'; +select 'AZ' like 'A%%Z'; +select 'AZ' like 'AZ%'; +select 'AZ' like '%AZ%'; +select 'AZ' like '%Z'; +select 'AZ' like '%AZ'; +select 'AZ' like 'A%Z%'; +select 'AZ' like '_%_%'; +select 'AZ' like '%A%Z'; +select 'AZ' like 'A_'; +select 'AZ' like '_Z'; +select 'AMZ'like 'A%M%Z'; diff --git a/mysql-test/include/have_utf16.inc b/mysql-test/include/have_utf16.inc new file mode 100644 index 00000000000..ab22c255c88 --- /dev/null +++ b/mysql-test/include/have_utf16.inc @@ -0,0 +1,4 @@ +-- require r/have_utf16.require +disable_query_log; +show collation like 'utf16_general_ci'; +enable_query_log; diff --git a/mysql-test/include/have_utf32.inc b/mysql-test/include/have_utf32.inc new file mode 100644 index 00000000000..f5b5353c9fd --- /dev/null +++ b/mysql-test/include/have_utf32.inc @@ -0,0 +1,4 @@ +-- require r/have_utf32.require +disable_query_log; +show collation like 'utf32_general_ci'; +enable_query_log; diff --git a/mysql-test/include/have_utf8mb4.inc b/mysql-test/include/have_utf8mb4.inc new file mode 100644 index 00000000000..6eb91b1c23c --- /dev/null +++ b/mysql-test/include/have_utf8mb4.inc @@ -0,0 +1,7 @@ +--require r/have_utf8mb4.require + +--disable_query_log + +SHOW COLLATION LIKE 'utf8mb4_general_ci'; + +--enable_query_log diff --git a/mysql-test/r/ctype_ldml.result b/mysql-test/r/ctype_ldml.result index 3373e31539f..433078a2a07 100644 --- a/mysql-test/r/ctype_ldml.result +++ b/mysql-test/r/ctype_ldml.result @@ -53,6 +53,33 @@ select * from t1 where c1='b'; c1 a drop table t1; +show collation like 'utf8mb4_test_ci'; +Collation Charset Id Default Compiled Sortlen +utf8mb4_test_ci utf8mb4 326 8 +create table t1 (c1 char(1) character set utf8mb4 collate utf8mb4_test_ci); +insert into t1 values ('a'); +select * from t1 where c1='b'; +c1 +a +drop table t1; +show collation like 'utf16_test_ci'; +Collation Charset Id Default Compiled Sortlen +utf16_test_ci utf16 327 8 +create table t1 (c1 char(1) character set utf16 collate utf16_test_ci); +insert into t1 values ('a'); +select * from t1 where c1='b'; +c1 +a +drop table t1; +show collation like 'utf32_test_ci'; +Collation Charset Id Default Compiled Sortlen +utf32_test_ci utf32 391 8 +create table t1 (c1 char(1) character set utf32 collate utf32_test_ci); +insert into t1 values ('a'); +select * from t1 where c1='b'; +c1 +a +drop table t1; CREATE TABLE t1 ( col1 varchar(100) character set utf8 collate utf8_test_ci ); @@ -373,16 +400,22 @@ select "foo" = "foo " collate latin1_test; The following tests check that two-byte collation IDs work select * from information_schema.collations where id>256 order by id; COLLATION_NAME CHARACTER_SET_NAME ID IS_DEFAULT IS_COMPILED SORTLEN +utf8mb4_test_ci utf8mb4 326 8 +utf16_test_ci utf16 327 8 utf8_phone_ci utf8 352 8 utf8_test_ci utf8 353 8 ucs2_test_ci ucs2 358 8 ucs2_vn_ci ucs2 359 8 +utf32_test_ci utf32 391 8 utf8_maxuserid_ci utf8 2047 8 show collation like '%test%'; Collation Charset Id Default Compiled Sortlen latin1_test latin1 99 Yes 1 utf8_test_ci utf8 353 8 ucs2_test_ci ucs2 358 8 +utf8mb4_test_ci utf8mb4 326 8 +utf16_test_ci utf16 327 8 +utf32_test_ci utf32 391 8 show collation like 'ucs2_vn_ci'; Collation Charset Id Default Compiled Sortlen ucs2_vn_ci ucs2 359 8 diff --git a/mysql-test/r/ctype_many.result b/mysql-test/r/ctype_many.result index 89e05bf4484..4730b4df668 100644 --- a/mysql-test/r/ctype_many.result +++ b/mysql-test/r/ctype_many.result @@ -1683,3 +1683,59 @@ ARMENIAN CAPIT DA 2 ARMENIAN CAPIT ECH 2 ARMENIAN CAPIT ZA 2 DROP TABLE t1; +# +# WL#1213 Implement 4-byte UTF8, UTF16 and UTF32 +# Testing that only utf8mb4 is superset for utf8 +# No other Unicode character set pairs have superset/subset relations +# +CREATE TABLE t1 ( +utf8 CHAR CHARACTER SET utf8, +utf8mb4 CHAR CHARACTER SET utf8mb4, +ucs2 CHAR CHARACTER SET ucs2, +utf16 CHAR CHARACTER SET utf16, +utf32 CHAR CHARACTER SET utf32 +); +INSERT INTO t1 VALUES ('','','','',''); +SELECT CHARSET(CONCAT(utf8, utf8mb4)) FROM t1; +CHARSET(CONCAT(utf8, utf8mb4)) +utf8mb4 +SELECT CHARSET(CONCAT(utf8, ucs2)) FROM t1; +ERROR HY000: Illegal mix of collations (utf8_general_ci,IMPLICIT) and (ucs2_general_ci,IMPLICIT) for operation 'concat' +SELECT CHARSET(CONCAT(utf8, utf16)) FROM t1; +ERROR HY000: Illegal mix of collations (utf8_general_ci,IMPLICIT) and (utf16_general_ci,IMPLICIT) for operation 'concat' +SELECT CHARSET(CONCAT(utf8, utf32)) FROM t1; +ERROR HY000: Illegal mix of collations (utf8_general_ci,IMPLICIT) and (utf32_general_ci,IMPLICIT) for operation 'concat' +SELECT CHARSET(CONCAT(utf8mb4, utf8)) FROM t1; +CHARSET(CONCAT(utf8mb4, utf8)) +utf8mb4 +SELECT CHARSET(CONCAT(utf8mb4, ucs2)) FROM t1; +ERROR HY000: Illegal mix of collations (utf8mb4_general_ci,IMPLICIT) and (ucs2_general_ci,IMPLICIT) for operation 'concat' +SELECT CHARSET(CONCAT(utf8mb4, utf16)) FROM t1; +ERROR HY000: Illegal mix of collations (utf8mb4_general_ci,IMPLICIT) and (utf16_general_ci,IMPLICIT) for operation 'concat' +SELECT CHARSET(CONCAT(utf8mb4, utf32)) FROM t1; +ERROR HY000: Illegal mix of collations (utf8mb4_general_ci,IMPLICIT) and (utf32_general_ci,IMPLICIT) for operation 'concat' +SELECT CHARSET(CONCAT(ucs2, utf8)) FROM t1; +ERROR HY000: Illegal mix of collations (ucs2_general_ci,IMPLICIT) and (utf8_general_ci,IMPLICIT) for operation 'concat' +SELECT CHARSET(CONCAT(ucs2, utf8mb4)) FROM t1; +ERROR HY000: Illegal mix of collations (ucs2_general_ci,IMPLICIT) and (utf8mb4_general_ci,IMPLICIT) for operation 'concat' +SELECT CHARSET(CONCAT(ucs2, utf16)) FROM t1; +ERROR HY000: Illegal mix of collations (ucs2_general_ci,IMPLICIT) and (utf16_general_ci,IMPLICIT) for operation 'concat' +SELECT CHARSET(CONCAT(ucs2, utf32)) FROM t1; +ERROR HY000: Illegal mix of collations (ucs2_general_ci,IMPLICIT) and (utf32_general_ci,IMPLICIT) for operation 'concat' +SELECT CHARSET(CONCAT(utf16, utf8)) FROM t1; +ERROR HY000: Illegal mix of collations (utf16_general_ci,IMPLICIT) and (utf8_general_ci,IMPLICIT) for operation 'concat' +SELECT CHARSET(CONCAT(utf16, ucs2)) FROM t1; +ERROR HY000: Illegal mix of collations (utf16_general_ci,IMPLICIT) and (ucs2_general_ci,IMPLICIT) for operation 'concat' +SELECT CHARSET(CONCAT(utf16, utf8mb4)) FROM t1; +ERROR HY000: Illegal mix of collations (utf16_general_ci,IMPLICIT) and (utf8mb4_general_ci,IMPLICIT) for operation 'concat' +SELECT CHARSET(CONCAT(utf16, utf32)) FROM t1; +ERROR HY000: Illegal mix of collations (utf16_general_ci,IMPLICIT) and (utf32_general_ci,IMPLICIT) for operation 'concat' +SELECT CHARSET(CONCAT(utf32, utf8)) FROM t1; +ERROR HY000: Illegal mix of collations (utf32_general_ci,IMPLICIT) and (utf8_general_ci,IMPLICIT) for operation 'concat' +SELECT CHARSET(CONCAT(utf32, ucs2)) FROM t1; +ERROR HY000: Illegal mix of collations (utf32_general_ci,IMPLICIT) and (ucs2_general_ci,IMPLICIT) for operation 'concat' +SELECT CHARSET(CONCAT(utf32, utf8mb4)) FROM t1; +ERROR HY000: Illegal mix of collations (utf32_general_ci,IMPLICIT) and (utf8mb4_general_ci,IMPLICIT) for operation 'concat' +SELECT CHARSET(CONCAT(utf32, utf16)) FROM t1; +ERROR HY000: Illegal mix of collations (utf32_general_ci,IMPLICIT) and (utf16_general_ci,IMPLICIT) for operation 'concat' +DROP TABLE t1; diff --git a/mysql-test/r/ctype_utf16.result b/mysql-test/r/ctype_utf16.result new file mode 100644 index 00000000000..3c2fe316d71 --- /dev/null +++ b/mysql-test/r/ctype_utf16.result @@ -0,0 +1,1038 @@ +DROP TABLE IF EXISTS t1; +# +# Start of 5.5 tests +# +SET NAMES latin1; +SET character_set_connection=utf16; +select hex('a'), hex('a '); +hex('a') hex('a ') +0061 00610020 +select 'a' = 'a', 'a' = 'a ', 'a ' = 'a'; +'a' = 'a' 'a' = 'a ' 'a ' = 'a' +1 1 1 +select 'a\0' = 'a', 'a\0' < 'a', 'a\0' > 'a'; +'a\0' = 'a' 'a\0' < 'a' 'a\0' > 'a' +0 1 0 +select 'a' = 'a\0', 'a' < 'a\0', 'a' > 'a\0'; +'a' = 'a\0' 'a' < 'a\0' 'a' > 'a\0' +0 0 1 +select 'a\0' = 'a ', 'a\0' < 'a ', 'a\0' > 'a '; +'a\0' = 'a ' 'a\0' < 'a ' 'a\0' > 'a ' +0 1 0 +select 'a ' = 'a\0', 'a ' < 'a\0', 'a ' > 'a\0'; +'a ' = 'a\0' 'a ' < 'a\0' 'a ' > 'a\0' +0 0 1 +select 'a a' > 'a', 'a \0' < 'a'; +'a a' > 'a' 'a \0' < 'a' +1 1 +select binary 'a a' > 'a', binary 'a \0' > 'a', binary 'a\0' > 'a'; +binary 'a a' > 'a' binary 'a \0' > 'a' binary 'a\0' > 'a' +1 1 1 +select hex(_utf16 0x44); +hex(_utf16 0x44) +00000044 +select hex(_utf16 0x3344); +hex(_utf16 0x3344) +3344 +select hex(_utf16 0x113344); +hex(_utf16 0x113344) +000000113344 +CREATE TABLE t1 (word VARCHAR(64), word2 CHAR(64)) CHARACTER SET utf16; +INSERT INTO t1 VALUES (_koi8r 0xF2, _koi8r 0xF2), (X'2004',X'2004'); +SELECT hex(word) FROM t1 ORDER BY word; +hex(word) +0420 +2004 +SELECT hex(word2) FROM t1 ORDER BY word2; +hex(word2) +0420 +2004 +DELETE FROM t1; +INSERT INTO t1 VALUES (X'042000200020',X'042000200020'), (X'200400200020', X'200400200020'); +SELECT hex(word) FROM t1 ORDER BY word; +hex(word) +042000200020 +200400200020 +SELECT hex(word2) FROM t1 ORDER BY word2; +hex(word2) +0420 +2004 +DROP TABLE t1; +SELECT hex(LPAD(_utf16 X'0420',10,_utf16 X'0421')); +hex(LPAD(_utf16 X'0420',10,_utf16 X'0421')) +0421042104210421042104210421042104210420 +SELECT hex(LPAD(_utf16 X'0420',10,_utf16 X'04210422')); +hex(LPAD(_utf16 X'0420',10,_utf16 X'04210422')) +0421042204210422042104220421042204210420 +SELECT hex(LPAD(_utf16 X'0420',10,_utf16 X'042104220423')); +hex(LPAD(_utf16 X'0420',10,_utf16 X'042104220423')) +0421042204230421042204230421042204230420 +SELECT hex(LPAD(_utf16 X'0420042104220423042404250426042704280429042A042B',10,_utf16 X'042104220423')); +hex(LPAD(_utf16 X'0420042104220423042404250426042704280429042A042B',10,_utf16 X'042104220423')) +0420042104220423042404250426042704280429 +SELECT hex(LPAD(_utf16 X'D800DC00', 10, _utf16 X'0421')); +hex(LPAD(_utf16 X'D800DC00', 10, _utf16 X'0421')) +042104210421042104210421042104210421D800DC00 +SELECT hex(LPAD(_utf16 X'0421', 10, _utf16 X'D800DC00')); +hex(LPAD(_utf16 X'0421', 10, _utf16 X'D800DC00')) +D800DC00D800DC00D800DC00D800DC00D800DC00D800DC00D800DC00D800DC00D800DC000421 +SELECT hex(RPAD(_utf16 X'0420',10,_utf16 X'0421')); +hex(RPAD(_utf16 X'0420',10,_utf16 X'0421')) +0420042104210421042104210421042104210421 +SELECT hex(RPAD(_utf16 X'0420',10,_utf16 X'04210422')); +hex(RPAD(_utf16 X'0420',10,_utf16 X'04210422')) +0420042104220421042204210422042104220421 +SELECT hex(RPAD(_utf16 X'0420',10,_utf16 X'042104220423')); +hex(RPAD(_utf16 X'0420',10,_utf16 X'042104220423')) +0420042104220423042104220423042104220423 +SELECT hex(RPAD(_utf16 X'0420042104220423042404250426042704280429042A042B',10,_utf16 X'042104220423')); +hex(RPAD(_utf16 X'0420042104220423042404250426042704280429042A042B',10,_utf16 X'042104220423')) +0420042104220423042404250426042704280429 +SELECT hex(RPAD(_utf16 X'D800DC00', 10, _utf16 X'0421')); +hex(RPAD(_utf16 X'D800DC00', 10, _utf16 X'0421')) +D800DC00042104210421042104210421042104210421 +SELECT hex(RPAD(_utf16 X'0421', 10, _utf16 X'D800DC00')); +hex(RPAD(_utf16 X'0421', 10, _utf16 X'D800DC00')) +0421D800DC00D800DC00D800DC00D800DC00D800DC00D800DC00D800DC00D800DC00D800DC00 +CREATE TABLE t1 SELECT +LPAD(_utf16 X'0420',10,_utf16 X'0421') l, +RPAD(_utf16 X'0420',10,_utf16 X'0421') r; +SHOW CREATE TABLE t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `l` varchar(10) CHARACTER SET utf16 NOT NULL DEFAULT '', + `r` varchar(10) CHARACTER SET utf16 NOT NULL DEFAULT '' +) ENGINE=MyISAM DEFAULT CHARSET=latin1 +select hex(l), hex(r) from t1; +hex(l) hex(r) +0421042104210421042104210421042104210420 0420042104210421042104210421042104210421 +DROP TABLE t1; +create table t1 (f1 char(30)); +insert into t1 values ("103000"), ("22720000"), ("3401200"), ("78000"); +select lpad(f1, 12, "-o-/") from t1; +lpad(f1, 12, "-o-/") +-o-/-o103000 +-o-/22720000 +-o-/-3401200 +-o-/-o-78000 +drop table t1; +SET NAMES latin1; +SET character_set_connection=utf16; +select @@collation_connection; +@@collation_connection +utf16_general_ci +create table t1 as select repeat(' ',10) as a union select null; +alter table t1 add key(a); +show create table t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `a` varchar(10) CHARACTER SET utf16 DEFAULT NULL, + KEY `a` (`a`) +) ENGINE=MyISAM DEFAULT CHARSET=latin1 +insert into t1 values ("a"),("abc"),("abcd"),("hello"),("test"); +explain select * from t1 where a like 'abc%'; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 range a a 43 NULL 1 Using where; Using index +explain select * from t1 where a like concat('abc','%'); +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 range a a 43 NULL 1 Using where; Using index +select * from t1 where a like "abc%"; +a +abc +abcd +select * from t1 where a like concat("abc","%"); +a +abc +abcd +select * from t1 where a like "ABC%"; +a +abc +abcd +select * from t1 where a like "test%"; +a +test +select * from t1 where a like "te_t"; +a +test +select * from t1 where a like "%a%"; +a +a +abc +abcd +select * from t1 where a like "%abcd%"; +a +abcd +select * from t1 where a like "%abc\d%"; +a +abcd +drop table t1; +select 'AA' like 'AA'; +'AA' like 'AA' +1 +select 'AA' like 'A%A'; +'AA' like 'A%A' +1 +select 'AA' like 'A%%A'; +'AA' like 'A%%A' +1 +select 'AA' like 'AA%'; +'AA' like 'AA%' +1 +select 'AA' like '%AA%'; +'AA' like '%AA%' +1 +select 'AA' like '%A'; +'AA' like '%A' +1 +select 'AA' like '%AA'; +'AA' like '%AA' +1 +select 'AA' like 'A%A%'; +'AA' like 'A%A%' +1 +select 'AA' like '_%_%'; +'AA' like '_%_%' +1 +select 'AA' like '%A%A'; +'AA' like '%A%A' +1 +select 'AAA'like 'A%A%A'; +'AAA'like 'A%A%A' +1 +select 'AZ' like 'AZ'; +'AZ' like 'AZ' +1 +select 'AZ' like 'A%Z'; +'AZ' like 'A%Z' +1 +select 'AZ' like 'A%%Z'; +'AZ' like 'A%%Z' +1 +select 'AZ' like 'AZ%'; +'AZ' like 'AZ%' +1 +select 'AZ' like '%AZ%'; +'AZ' like '%AZ%' +1 +select 'AZ' like '%Z'; +'AZ' like '%Z' +1 +select 'AZ' like '%AZ'; +'AZ' like '%AZ' +1 +select 'AZ' like 'A%Z%'; +'AZ' like 'A%Z%' +1 +select 'AZ' like '_%_%'; +'AZ' like '_%_%' +1 +select 'AZ' like '%A%Z'; +'AZ' like '%A%Z' +1 +select 'AZ' like 'A_'; +'AZ' like 'A_' +1 +select 'AZ' like '_Z'; +'AZ' like '_Z' +1 +select 'AMZ'like 'A%M%Z'; +'AMZ'like 'A%M%Z' +1 +SET NAMES utf8; +SET character_set_connection=utf16; +CREATE TABLE t1 (a VARCHAR(10) CHARACTER SET utf16); +INSERT INTO t1 VALUES ('фыва'),('Фыва'),('фЫва'),('фыВа'),('фывÐ'),('ФЫВÐ'); +INSERT INTO t1 VALUES ('фывапролдж'),('Фывапролдж'),('фЫвапролдж'),('фыВапролдж'); +INSERT INTO t1 VALUES ('фывÐпролдж'),('фываПролдж'),('фывапРолдж'),('фывапрОлдж'); +INSERT INTO t1 VALUES ('фывапроЛдж'),('фывапролДж'),('фывапролдЖ'),('ФЫВÐПРОЛДЖ'); +SELECT * FROM t1 WHERE a LIKE '%фЫва%' ORDER BY BINARY a; +a +ФЫВР+ФЫВÐПРОЛДЖ +Фыва +Фывапролдж +фЫва +фЫвапролдж +фыВа +фыВапролдж +фывР+фывÐпролдж +фыва +фываПролдж +фывапРолдж +фывапрОлдж +фывапроЛдж +фывапролДж +фывапролдЖ +фывапролдж +SELECT * FROM t1 WHERE a LIKE '%фЫв%' ORDER BY BINARY a; +a +ФЫВР+ФЫВÐПРОЛДЖ +Фыва +Фывапролдж +фЫва +фЫвапролдж +фыВа +фыВапролдж +фывР+фывÐпролдж +фыва +фываПролдж +фывапРолдж +фывапрОлдж +фывапроЛдж +фывапролДж +фывапролдЖ +фывапролдж +SELECT * FROM t1 WHERE a LIKE 'фЫва%' ORDER BY BINARY a; +a +ФЫВР+ФЫВÐПРОЛДЖ +Фыва +Фывапролдж +фЫва +фЫвапролдж +фыВа +фыВапролдж +фывР+фывÐпролдж +фыва +фываПролдж +фывапРолдж +фывапрОлдж +фывапроЛдж +фывапролДж +фывапролдЖ +фывапролдж +SELECT * FROM t1 WHERE a LIKE 'фЫва%' COLLATE utf16_bin ORDER BY BINARY a; +a +фЫва +фЫвапролдж +DROP TABLE t1; +CREATE TABLE t1 (word varchar(64) NOT NULL, PRIMARY KEY (word)) +ENGINE=MyISAM CHARACTER SET utf16; +INSERT INTO t1 (word) VALUES ("cat"); +SELECT * FROM t1 WHERE word LIKE "c%"; +word +cat +SELECT * FROM t1 WHERE word LIKE "ca_"; +word +cat +SELECT * FROM t1 WHERE word LIKE "cat"; +word +cat +SELECT * FROM t1 WHERE word LIKE _utf16 x'00630025'; +word +cat +SELECT * FROM t1 WHERE word LIKE _utf16 x'00630061005F'; +word +cat +DROP TABLE t1; +select insert(_utf16 0x006100620063,10,2,_utf16 0x006400650066); +insert(_utf16 0x006100620063,10,2,_utf16 0x006400650066) +abc +select insert(_utf16 0x006100620063,1,2,_utf16 0x006400650066); +insert(_utf16 0x006100620063,1,2,_utf16 0x006400650066) +defc +SET NAMES latin1; +CREATE TABLE t1 ( +word VARCHAR(64), +bar INT(11) default 0, +PRIMARY KEY (word)) +ENGINE=MyISAM +CHARSET utf16 +COLLATE utf16_general_ci ; +INSERT INTO t1 (word) VALUES ("aar"); +INSERT INTO t1 (word) VALUES ("a"); +INSERT INTO t1 (word) VALUES ("aardvar"); +INSERT INTO t1 (word) VALUES ("aardvark"); +INSERT INTO t1 (word) VALUES ("aardvara"); +INSERT INTO t1 (word) VALUES ("aardvarz"); +EXPLAIN SELECT * FROM t1 ORDER BY word; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 6 Using filesort +SELECT * FROM t1 ORDER BY word; +word bar +a 0 +aar 0 +aardvar 0 +aardvara 0 +aardvark 0 +aardvarz 0 +EXPLAIN SELECT word FROM t1 ORDER BY word; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 index NULL PRIMARY 258 NULL 6 Using index +SELECT word FROM t1 ORDER by word; +word +a +aar +aardvar +aardvara +aardvark +aardvarz +DROP TABLE t1; +CREATE TABLE t1 ( +word VARCHAR(64) , +PRIMARY KEY (word)) +ENGINE=MyISAM +CHARSET utf16 +COLLATE utf16_general_ci; +INSERT INTO t1 (word) VALUES ("aar"); +INSERT INTO t1 (word) VALUES ("a"); +INSERT INTO t1 (word) VALUES ("aardvar"); +INSERT INTO t1 (word) VALUES ("aardvark"); +INSERT INTO t1 (word) VALUES ("aardvara"); +INSERT INTO t1 (word) VALUES ("aardvarz"); +EXPLAIN SELECT * FROM t1 ORDER BY WORD; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 index NULL PRIMARY 258 NULL 6 Using index +SELECT * FROM t1 ORDER BY word; +word +a +aar +aardvar +aardvara +aardvark +aardvarz +DROP TABLE t1; +CREATE TABLE t1 ( +word TEXT, +bar INT(11) AUTO_INCREMENT, +PRIMARY KEY (bar)) +ENGINE=MyISAM +CHARSET utf16 +COLLATE utf16_general_ci ; +INSERT INTO t1 (word) VALUES ("aar"); +INSERT INTO t1 (word) VALUES ("a" ); +INSERT INTO t1 (word) VALUES ("aardvar"); +INSERT INTO t1 (word) VALUES ("aardvark"); +INSERT INTO t1 (word) VALUES ("aardvara"); +INSERT INTO t1 (word) VALUES ("aardvarz"); +EXPLAIN SELECT * FROM t1 ORDER BY word; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 6 Using filesort +SELECT * FROM t1 ORDER BY word; +word bar +a 2 +aar 1 +aardvar 3 +aardvara 5 +aardvark 4 +aardvarz 6 +EXPLAIN SELECT word FROM t1 ORDER BY word; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 6 Using filesort +SELECT word FROM t1 ORDER BY word; +word +a +aar +aardvar +aardvara +aardvark +aardvarz +DROP TABLE t1; +SELECT hex(cast(0xAA as char character set utf16)); +hex(cast(0xAA as char character set utf16)) +000000AA +SELECT hex(convert(0xAA using utf16)); +hex(convert(0xAA using utf16)) +000000AA +CREATE TABLE t1 (a char(10) character set utf16); +INSERT INTO t1 VALUES (0x1),(0x11),(0x111),(0x1111),(0x11111); +SELECT HEX(a) FROM t1; +HEX(a) +0001 +0011 +0111 +1111 +00011111 +DROP TABLE t1; +CREATE TABLE t1 (a varchar(10) character set utf16); +INSERT INTO t1 VALUES (0x1),(0x11),(0x111),(0x1111),(0x11111); +SELECT HEX(a) FROM t1; +HEX(a) +0001 +0011 +0111 +1111 +00011111 +DROP TABLE t1; +CREATE TABLE t1 (a text character set utf16); +INSERT INTO t1 VALUES (0x1),(0x11),(0x111),(0x1111),(0x11111); +SELECT HEX(a) FROM t1; +HEX(a) +0001 +0011 +0111 +1111 +00011111 +DROP TABLE t1; +CREATE TABLE t1 (a mediumtext character set utf16); +INSERT INTO t1 VALUES (0x1),(0x11),(0x111),(0x1111),(0x11111); +SELECT HEX(a) FROM t1; +HEX(a) +0001 +0011 +0111 +1111 +00011111 +DROP TABLE t1; +CREATE TABLE t1 (a longtext character set utf16); +INSERT INTO t1 VALUES (0x1),(0x11),(0x111),(0x1111),(0x11111); +SELECT HEX(a) FROM t1; +HEX(a) +0001 +0011 +0111 +1111 +00011111 +DROP TABLE t1; +create table t1(a char(1)) default charset utf16; +insert into t1 values ('a'),('b'),('c'); +alter table t1 modify a char(5); +select a, hex(a) from t1; +a hex(a) +a 0061 +b 0062 +c 0063 +drop table t1; +set @ivar= 1234; +set @str1 = 'select ?'; +set @str2 = convert(@str1 using utf16); +prepare stmt1 from @str2; +execute stmt1 using @ivar; +? +1234 +set names utf8; +create table t1 (a enum('x','y','z') character set utf16); +show create table t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `a` enum('x','y','z') CHARACTER SET utf16 DEFAULT NULL +) ENGINE=MyISAM DEFAULT CHARSET=latin1 +insert into t1 values ('x'); +insert into t1 values ('y'); +insert into t1 values ('z'); +select a, hex(a) from t1 order by a; +a hex(a) +x 0078 +y 0079 +z 007A +alter table t1 change a a enum('x','y','z','d','e','ä','ö','ü') character set utf16; +show create table t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `a` enum('x','y','z','d','e','ä','ö','ü') CHARACTER SET utf16 DEFAULT NULL +) ENGINE=MyISAM DEFAULT CHARSET=latin1 +insert into t1 values ('D'); +insert into t1 values ('E '); +insert into t1 values ('ä'); +insert into t1 values ('ö'); +insert into t1 values ('ü'); +select a, hex(a) from t1 order by a; +a hex(a) +x 0078 +y 0079 +z 007A +d 0064 +e 0065 +ä 00E4 +ö 00F6 +ü 00FC +drop table t1; +create table t1 (a set ('x','y','z','ä','ö','ü') character set utf16); +show create table t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `a` set('x','y','z','ä','ö','ü') CHARACTER SET utf16 DEFAULT NULL +) ENGINE=MyISAM DEFAULT CHARSET=latin1 +insert into t1 values ('x'); +insert into t1 values ('y'); +insert into t1 values ('z'); +insert into t1 values ('x,y'); +insert into t1 values ('x,y,z,ä,ö,ü'); +select a, hex(a) from t1 order by a; +a hex(a) +x 0078 +y 0079 +x,y 0078002C0079 +z 007A +x,y,z,ä,ö,ü 0078002C0079002C007A002C00E4002C00F6002C00FC +drop table t1; +create table t1(a enum('a','b','c')) default character set utf16; +insert into t1 values('a'),('b'),('c'); +alter table t1 add b char(1); +show warnings; +Level Code Message +select * from t1 order by a; +a b +a NULL +b NULL +c NULL +drop table t1; +SET NAMES latin1; +SET collation_connection='utf16_general_ci'; +create table t1 select repeat('a',4000) a; +delete from t1; +insert into t1 values ('a'), ('a '), ('a\t'); +select collation(a),hex(a) from t1 order by a; +collation(a) hex(a) +utf16_general_ci 00610009 +utf16_general_ci 0061 +utf16_general_ci 00610020 +drop table t1; +select @@collation_connection; +@@collation_connection +utf16_general_ci +create table t1 ROW_FORMAT=DYNAMIC select repeat('a',50) as c1 ; +insert into t1 values('abcdef'); +insert into t1 values('_bcdef'); +insert into t1 values('a_cdef'); +insert into t1 values('ab_def'); +insert into t1 values('abc_ef'); +insert into t1 values('abcd_f'); +insert into t1 values('abcde_'); +select c1 as c1u from t1 where c1 like 'ab\_def'; +c1u +ab_def +select c1 as c2h from t1 where c1 like 'ab#_def' escape '#'; +c2h +ab_def +drop table t1; +SET NAMES latin1; +SET collation_connection='utf16_bin'; +create table t1 select repeat('a',4000) a; +delete from t1; +insert into t1 values ('a'), ('a '), ('a\t'); +select collation(a),hex(a) from t1 order by a; +collation(a) hex(a) +utf16_bin 00610009 +utf16_bin 0061 +utf16_bin 00610020 +drop table t1; +select @@collation_connection; +@@collation_connection +utf16_bin +create table t1 ROW_FORMAT=DYNAMIC select repeat('a',50) as c1 ; +insert into t1 values('abcdef'); +insert into t1 values('_bcdef'); +insert into t1 values('a_cdef'); +insert into t1 values('ab_def'); +insert into t1 values('abc_ef'); +insert into t1 values('abcd_f'); +insert into t1 values('abcde_'); +select c1 as c1u from t1 where c1 like 'ab\_def'; +c1u +ab_def +select c1 as c2h from t1 where c1 like 'ab#_def' escape '#'; +c2h +ab_def +drop table t1; +select hex(substr(_utf16 0x00e400e50068,1)); +hex(substr(_utf16 0x00e400e50068,1)) +00E400E50068 +select hex(substr(_utf16 0x00e400e50068,2)); +hex(substr(_utf16 0x00e400e50068,2)) +00E50068 +select hex(substr(_utf16 0x00e400e50068,3)); +hex(substr(_utf16 0x00e400e50068,3)) +0068 +select hex(substr(_utf16 0x00e400e50068,-1)); +hex(substr(_utf16 0x00e400e50068,-1)) +0068 +select hex(substr(_utf16 0x00e400e50068,-2)); +hex(substr(_utf16 0x00e400e50068,-2)) +00E50068 +select hex(substr(_utf16 0x00e400e50068,-3)); +hex(substr(_utf16 0x00e400e50068,-3)) +00E400E50068 +select hex(substr(_utf16 0x00e400e5D800DC00,1)); +hex(substr(_utf16 0x00e400e5D800DC00,1)) +00E400E5D800DC00 +select hex(substr(_utf16 0x00e400e5D800DC00,2)); +hex(substr(_utf16 0x00e400e5D800DC00,2)) +00E5D800DC00 +select hex(substr(_utf16 0x00e400e5D800DC00,3)); +hex(substr(_utf16 0x00e400e5D800DC00,3)) +D800DC00 +select hex(substr(_utf16 0x00e400e5D800DC00,-1)); +hex(substr(_utf16 0x00e400e5D800DC00,-1)) +D800DC00 +select hex(substr(_utf16 0x00e400e5D800DC00,-2)); +hex(substr(_utf16 0x00e400e5D800DC00,-2)) +00E5D800DC00 +select hex(substr(_utf16 0x00e400e5D800DC00,-3)); +hex(substr(_utf16 0x00e400e5D800DC00,-3)) +00E400E5D800DC00 +SET NAMES latin1; +create table t1 (utext varchar(20) character set utf16); +insert into t1 values ("lily"); +insert into t1 values ("river"); +prepare stmt from 'select utext from t1 where utext like ?'; +set @param1='%%'; +execute stmt using @param1; +utext +lily +river +execute stmt using @param1; +utext +lily +river +select utext from t1 where utext like '%%'; +utext +lily +river +drop table t1; +deallocate prepare stmt; +create table t1 ( +a char(10) character set utf16 not null, +index a (a) +) engine=myisam; +insert into t1 values (repeat(0x201f, 10)); +insert into t1 values (repeat(0x2020, 10)); +insert into t1 values (repeat(0x2021, 10)); +explain select hex(a) from t1 order by a; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 index NULL a 40 NULL 3 Using index +select hex(a) from t1 order by a; +hex(a) +201F201F201F201F201F201F201F201F201F201F +2020202020202020202020202020202020202020 +2021202120212021202120212021202120212021 +alter table t1 drop index a; +select hex(a) from t1 order by a; +hex(a) +201F201F201F201F201F201F201F201F201F201F +2020202020202020202020202020202020202020 +2021202120212021202120212021202120212021 +drop table t1; +CREATE TABLE t1 ( +status enum('active','passive') character set utf16 collate utf16_general_ci +NOT NULL default 'passive' +); +SHOW CREATE TABLE t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `status` enum('active','passive') CHARACTER SET utf16 NOT NULL DEFAULT 'passive' +) ENGINE=MyISAM DEFAULT CHARSET=latin1 +ALTER TABLE t1 ADD a int NOT NULL AFTER status; +SHOW CREATE TABLE t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `status` enum('active','passive') CHARACTER SET utf16 NOT NULL DEFAULT 'passive', + `a` int(11) NOT NULL +) ENGINE=MyISAM DEFAULT CHARSET=latin1 +DROP TABLE t1; +End of 4.1 tests +CREATE TABLE t1 (a varchar(64) character set utf16, b decimal(10,3)); +INSERT INTO t1 VALUES ("1.1", 0), ("2.1", 0); +update t1 set b=a; +SELECT *, hex(a) FROM t1; +a b hex(a) +1.1 1.100 0031002E0031 +2.1 2.100 0032002E0031 +DROP TABLE t1; +create table t1 (utext varchar(20) character set utf16); +insert into t1 values ("lily"); +insert into t1 values ("river"); +prepare stmt from 'select utext from t1 where utext like ?'; +set @param1='%%'; +execute stmt using @param1; +utext +lily +river +execute stmt using @param1; +utext +lily +river +select utext from t1 where utext like '%%'; +utext +lily +river +drop table t1; +deallocate prepare stmt; +set names latin1; +set character_set_connection=utf16; +select soundex(''),soundex('he'),soundex('hello all folks'),soundex('#3556 in bugdb'); +soundex('') soundex('he') soundex('hello all folks') soundex('#3556 in bugdb') + H000 H4142 I51231 +select hex(soundex('')),hex(soundex('he')),hex(soundex('hello all folks')),hex(soundex('#3556 in bugdb')); +hex(soundex('')) hex(soundex('he')) hex(soundex('hello all folks')) hex(soundex('#3556 in bugdb')) + 0048003000300030 00480034003100340032 004900350031003200330031 +select 'mood' sounds like 'mud'; +'mood' sounds like 'mud' +1 +select hex(soundex(_utf16 0x041004110412)); +hex(soundex(_utf16 0x041004110412)) +0410003000300030 +select hex(soundex(_utf16 0x00BF00C0)); +hex(soundex(_utf16 0x00BF00C0)) +00C0003000300030 +set names latin1; +create table t1(a blob, b text charset utf16); +select data_type, character_octet_length, character_maximum_length +from information_schema.columns where table_name='t1'; +data_type character_octet_length character_maximum_length +blob 65535 65535 +text 65535 32767 +drop table t1; +set names latin1; +set collation_connection=utf16_general_ci; +select position('bb' in 'abba'); +position('bb' in 'abba') +2 +create table t1 (a varchar(10) character set utf16) engine=heap; +insert into t1 values ('a'),('A'),('b'),('B'); +select * from t1 where a='a' order by binary a; +a +A +a +select hex(min(binary a)),count(*) from t1 group by a; +hex(min(binary a)) count(*) +0041 2 +0042 2 +drop table t1; +select char_length('abcd'), octet_length('abcd'); +char_length('abcd') octet_length('abcd') +4 8 +select char_length(_utf16 0xD800DC00), octet_length(_utf16 0xD800DC00); +char_length(_utf16 0xD800DC00) octet_length(_utf16 0xD800DC00) +1 4 +select char_length(_utf16 0xD87FDFFF), octet_length(_utf16 0xD87FDFFF); +char_length(_utf16 0xD87FDFFF) octet_length(_utf16 0xD87FDFFF) +1 4 +select left('abcd',2); +left('abcd',2) +ab +select hex(left(_utf16 0xD800DC00D87FDFFF, 1)); +hex(left(_utf16 0xD800DC00D87FDFFF, 1)) +D800DC00 +select hex(right(_utf16 0xD800DC00D87FDFFF, 1)); +hex(right(_utf16 0xD800DC00D87FDFFF, 1)) +D87FDFFF +create table t1 (a varchar(10) character set utf16); +insert into t1 values (_utf16 0xD800); +ERROR HY000: Invalid utf16 character string: 'D800' +insert into t1 values (_utf16 0xDC00); +ERROR HY000: Invalid utf16 character string: 'DC00' +insert into t1 values (_utf16 0xD800D800); +ERROR HY000: Invalid utf16 character string: 'D800D8' +insert into t1 values (_utf16 0xD800E800); +ERROR HY000: Invalid utf16 character string: 'D800E8' +insert into t1 values (_utf16 0xD8000800); +ERROR HY000: Invalid utf16 character string: 'D80008' +insert into t1 values (_utf16 0xD800DC00); +insert into t1 values (_utf16 0xD800DCFF); +insert into t1 values (_utf16 0xDBFFDC00); +insert into t1 values (_utf16 0xDBFFDCFF); +select hex(a) from t1; +hex(a) +D800DC00 +D800DCFF +DBFFDC00 +DBFFDCFF +drop table t1; +create table t1 (s1 varchar(50) character set ucs2); +insert into t1 values (0xdf84); +alter table t1 modify column s1 varchar(50) character set utf16; +Warnings: +Warning 1366 Incorrect string value: '\xDF\x84' for column 's1' at row 1 +select hex(s1) from t1; +hex(s1) +003F +drop table t1; +create table t1 (s1 varchar(5) character set ucs2, s2 varchar(5) character set utf16); +insert into t1 (s1) values (0xdf84); +update t1 set s2 = s1; +Warnings: +Warning 1366 Incorrect string value: '\xDF\x84' for column 's2' at row 1 +select hex(s2) from t1; +hex(s2) +003F +drop table t1; +create table t1 (a char(10)) character set utf16; +insert into t1 values ('a '); +select hex(a) from t1; +hex(a) +0061 +drop table t1; +select upper('abcd'), lower('ABCD'); +upper('abcd') lower('ABCD') +ABCD abcd +create table t1 (a varchar(10) character set utf16); +insert into t1 values (123456); +select a, hex(a) from t1; +a hex(a) +123456 003100320033003400350036 +drop table t1; +select hex(soundex('a')); +hex(soundex('a')) +0041003000300030 +create table t1 (a enum ('a','b','c')) character set utf16; +insert into t1 values ('1'); +select * from t1; +a +a +drop table t1; +set names latin1; +select hex(conv(convert('123' using utf16), -10, 16)); +hex(conv(convert('123' using utf16), -10, 16)) +3742 +select hex(conv(convert('123' using utf16), 10, 16)); +hex(conv(convert('123' using utf16), 10, 16)) +3742 +set names latin1; +set character_set_connection=utf16; +select 1.1 + '1.2'; +1.1 + '1.2' +2.3 +select 1.1 + '1.2xxx'; +1.1 + '1.2xxx' +2.3 +Warnings: +Warning 1292 Truncated incorrect DOUBLE value: '' +select left('aaa','1'); +left('aaa','1') +a +create table t1 (a int); +insert into t1 values ('-1234.1e2'); +insert into t1 values ('-1234.1e2xxxx'); +Warnings: +Warning 1265 Data truncated for column 'a' at row 1 +insert into t1 values ('-1234.1e2 '); +select * from t1; +a +-123410 +-123410 +-123410 +drop table t1; +create table t1 (a int); +insert into t1 values ('1 '); +insert into t1 values ('1 x'); +Warnings: +Warning 1265 Data truncated for column 'a' at row 1 +select * from t1; +a +1 +1 +drop table t1; +create table t1 (a varchar(17000) character set utf16); +Warnings: +Note 1246 Converting column 'a' from VARCHAR to TEXT +show create table t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `a` mediumtext CHARACTER SET utf16 +) ENGINE=MyISAM DEFAULT CHARSET=latin1 +drop table t1; +create table t1 (a varchar(250) character set utf16 primary key); +show create table t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `a` varchar(250) CHARACTER SET utf16 NOT NULL, + PRIMARY KEY (`a`) +) ENGINE=MyISAM DEFAULT CHARSET=latin1 +drop table t1; +create table t1 (a varchar(334) character set utf16 primary key); +ERROR 42000: Specified key was too long; max key length is 1000 bytes +create table t1 (a char(1) character set utf16); +insert into t1 values (0xD800DC00),(0xD800DCFF),(0xDB7FDC00),(0xDB7FDCFF); +insert into t1 values (0x00C0), (0x00FF),(0xE000), (0xFFFF); +select hex(a), hex(@a:=convert(a using utf8mb4)), hex(convert(@a using utf16)) from t1; +hex(a) hex(@a:=convert(a using utf8mb4)) hex(convert(@a using utf16)) +D800DC00 F0908080 D800DC00 +D800DCFF F09083BF D800DCFF +DB7FDC00 F3AFB080 DB7FDC00 +DB7FDCFF F3AFB3BF DB7FDCFF +00C0 C380 00C0 +00FF C3BF 00FF +E000 EE8080 E000 +FFFF EFBFBF FFFF +drop table t1; +set collation_connection=utf16_general_ci; +drop table if exists t1; +create table t1 as +select repeat(' ', 64) as s1, repeat(' ',64) as s2 +union +select null, null; +show create table t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `s1` varchar(64) CHARACTER SET utf16 DEFAULT NULL, + `s2` varchar(64) CHARACTER SET utf16 DEFAULT NULL +) ENGINE=MyISAM DEFAULT CHARSET=latin1 +delete from t1; +insert into t1 values('aaa','aaa'); +insert into t1 values('aaa|qqq','qqq'); +insert into t1 values('gheis','^[^a-dXYZ]+$'); +insert into t1 values('aab','^aa?b'); +insert into t1 values('Baaan','^Ba*n'); +insert into t1 values('aaa','qqq|aaa'); +insert into t1 values('qqq','qqq|aaa'); +insert into t1 values('bbb','qqq|aaa'); +insert into t1 values('bbb','qqq'); +insert into t1 values('aaa','aba'); +insert into t1 values(null,'abc'); +insert into t1 values('def',null); +insert into t1 values(null,null); +insert into t1 values('ghi','ghi['); +select HIGH_PRIORITY s1 regexp s2 from t1; +s1 regexp s2 +1 +1 +1 +1 +1 +1 +1 +0 +0 +0 +NULL +NULL +NULL +NULL +drop table t1; +set names latin1; +SET collation_connection=utf16_general_ci; +CREATE TABLE t1 AS SELECT repeat('a',20) AS s1 LIMIT 0; +SET timestamp=1216359724; +INSERT INTO t1 VALUES (current_date); +INSERT INTO t1 VALUES (current_time); +INSERT INTO t1 VALUES (current_timestamp); +SELECT s1, hex(s1) FROM t1; +s1 hex(s1) +2008-07-18 0032003000300038002D00300037002D00310038 +08:42:04 00300038003A00340032003A00300034 +2008-07-18 08:42:04 0032003000300038002D00300037002D00310038002000300038003A00340032003A00300034 +DROP TABLE t1; +SET timestamp=0; +SET NAMES latin1; +SET collation_connection=utf16_general_ci; +CREATE TABLE t1 AS SELECT repeat('a',2) as s1 LIMIT 0; +SHOW CREATE TABLE t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `s1` varchar(2) CHARACTER SET utf16 NOT NULL DEFAULT '' +) ENGINE=MyISAM DEFAULT CHARSET=latin1 +INSERT INTO t1 VALUES ('ab'),('AE'),('ab'),('AE'); +SELECT * FROM t1 ORDER BY s1; +s1 +ab +ab +AE +AE +SET max_sort_length=4; +SELECT * FROM t1 ORDER BY s1; +s1 +ab +ab +AE +AE +DROP TABLE t1; +SET max_sort_length=DEFAULT; +SET NAMES latin1; +# +# End of 5.5 tests +# diff --git a/mysql-test/r/ctype_utf16_uca.result b/mysql-test/r/ctype_utf16_uca.result new file mode 100644 index 00000000000..d83ef2af09e --- /dev/null +++ b/mysql-test/r/ctype_utf16_uca.result @@ -0,0 +1,2373 @@ +DROP TABLE IF EXISTS t1; +# +# Start of 5.5 tests +# +set names utf8; +set collation_connection=utf16_unicode_ci; +select hex('a'), hex('a '); +hex('a') hex('a ') +0061 00610020 +select 'a' = 'a', 'a' = 'a ', 'a ' = 'a'; +'a' = 'a' 'a' = 'a ' 'a ' = 'a' +1 1 1 +select 'a\0' = 'a', 'a\0' < 'a', 'a\0' > 'a'; +'a\0' = 'a' 'a\0' < 'a' 'a\0' > 'a' +1 0 0 +select 'a' = 'a\0', 'a' < 'a\0', 'a' > 'a\0'; +'a' = 'a\0' 'a' < 'a\0' 'a' > 'a\0' +1 0 0 +select 'a\0' = 'a ', 'a\0' < 'a ', 'a\0' > 'a '; +'a\0' = 'a ' 'a\0' < 'a ' 'a\0' > 'a ' +1 0 0 +select 'a ' = 'a\0', 'a ' < 'a\0', 'a ' > 'a\0'; +'a ' = 'a\0' 'a ' < 'a\0' 'a ' > 'a\0' +1 0 0 +select 'a a' > 'a', 'a \0' < 'a'; +'a a' > 'a' 'a \0' < 'a' +1 0 +select binary 'a a' > 'a', binary 'a \0' > 'a', binary 'a\0' > 'a'; +binary 'a a' > 'a' binary 'a \0' > 'a' binary 'a\0' > 'a' +1 1 1 +select 'c' like '\_' as want0; +want0 +0 +CREATE TABLE t ( +c char(20) NOT NULL +) ENGINE=MyISAM DEFAULT CHARACTER SET utf16 COLLATE utf16_unicode_ci; +INSERT INTO t VALUES ('a'),('ab'),('aba'); +ALTER TABLE t ADD INDEX (c); +SELECT c FROM t WHERE c LIKE 'a%'; +c +a +ab +aba +DROP TABLE t; +create table t1 (c1 char(10) character set utf16 collate utf16_bin); +insert into t1 values ('A'),('a'); +insert into t1 values ('B'),('b'); +insert into t1 values ('C'),('c'); +insert into t1 values ('D'),('d'); +insert into t1 values ('E'),('e'); +insert into t1 values ('F'),('f'); +insert into t1 values ('G'),('g'); +insert into t1 values ('H'),('h'); +insert into t1 values ('I'),('i'); +insert into t1 values ('J'),('j'); +insert into t1 values ('K'),('k'); +insert into t1 values ('L'),('l'); +insert into t1 values ('M'),('m'); +insert into t1 values ('N'),('n'); +insert into t1 values ('O'),('o'); +insert into t1 values ('P'),('p'); +insert into t1 values ('Q'),('q'); +insert into t1 values ('R'),('r'); +insert into t1 values ('S'),('s'); +insert into t1 values ('T'),('t'); +insert into t1 values ('U'),('u'); +insert into t1 values ('V'),('v'); +insert into t1 values ('W'),('w'); +insert into t1 values ('X'),('x'); +insert into t1 values ('Y'),('y'); +insert into t1 values ('Z'),('z'); +insert into t1 values (0x00e0),(0x00c0); +insert into t1 values (0x00e1),(0x00c1); +insert into t1 values (0x00e2),(0x00c2); +insert into t1 values (0x00e3),(0x00c3); +insert into t1 values (0x00e4),(0x00c4); +insert into t1 values (0x00e5),(0x00c5); +insert into t1 values (0x00e6),(0x00c6); +insert into t1 values (0x00e7),(0x00c7); +insert into t1 values (0x00e8),(0x00c8); +insert into t1 values (0x00e9),(0x00c9); +insert into t1 values (0x00ea),(0x00ca); +insert into t1 values (0x00eb),(0x00cb); +insert into t1 values (0x00ec),(0x00cc); +insert into t1 values (0x00ed),(0x00cd); +insert into t1 values (0x00ee),(0x00ce); +insert into t1 values (0x00ef),(0x00cf); +insert into t1 values (0x00f0),(0x00d0); +insert into t1 values (0x00f1),(0x00d1); +insert into t1 values (0x00f2),(0x00d2); +insert into t1 values (0x00f3),(0x00d3); +insert into t1 values (0x00f4),(0x00d4); +insert into t1 values (0x00f5),(0x00d5); +insert into t1 values (0x00f6),(0x00d6); +insert into t1 values (0x00f7),(0x00d7); +insert into t1 values (0x00f8),(0x00d8); +insert into t1 values (0x00f9),(0x00d9); +insert into t1 values (0x00fa),(0x00da); +insert into t1 values (0x00fb),(0x00db); +insert into t1 values (0x00fc),(0x00dc); +insert into t1 values (0x00fd),(0x00dd); +insert into t1 values (0x00fe),(0x00de); +insert into t1 values (0x00ff),(0x00df); +insert into t1 values (0x0100),(0x0101),(0x0102),(0x0103); +insert into t1 values (0x0104),(0x0105),(0x0106),(0x0107); +insert into t1 values (0x0108),(0x0109),(0x010a),(0x010b); +insert into t1 values (0x010c),(0x010d),(0x010e),(0x010f); +insert into t1 values (0x0110),(0x0111),(0x0112),(0x0113); +insert into t1 values (0x0114),(0x0115),(0x0116),(0x0117); +insert into t1 values (0x0118),(0x0119),(0x011a),(0x011b); +insert into t1 values (0x011c),(0x011d),(0x011e),(0x011f); +insert into t1 values (0x0120),(0x0121),(0x0122),(0x0123); +insert into t1 values (0x0124),(0x0125),(0x0126),(0x0127); +insert into t1 values (0x0128),(0x0129),(0x012a),(0x012b); +insert into t1 values (0x012c),(0x012d),(0x012e),(0x012f); +insert into t1 values (0x0130),(0x0131),(0x0132),(0x0133); +insert into t1 values (0x0134),(0x0135),(0x0136),(0x0137); +insert into t1 values (0x0138),(0x0139),(0x013a),(0x013b); +insert into t1 values (0x013c),(0x013d),(0x013e),(0x013f); +insert into t1 values (0x0140),(0x0141),(0x0142),(0x0143); +insert into t1 values (0x0144),(0x0145),(0x0146),(0x0147); +insert into t1 values (0x0148),(0x0149),(0x014a),(0x014b); +insert into t1 values (0x014c),(0x014d),(0x014e),(0x014f); +insert into t1 values (0x0150),(0x0151),(0x0152),(0x0153); +insert into t1 values (0x0154),(0x0155),(0x0156),(0x0157); +insert into t1 values (0x0158),(0x0159),(0x015a),(0x015b); +insert into t1 values (0x015c),(0x015d),(0x015e),(0x015f); +insert into t1 values (0x0160),(0x0161),(0x0162),(0x0163); +insert into t1 values (0x0164),(0x0165),(0x0166),(0x0167); +insert into t1 values (0x0168),(0x0169),(0x016a),(0x016b); +insert into t1 values (0x016c),(0x016d),(0x016e),(0x016f); +insert into t1 values (0x0170),(0x0171),(0x0172),(0x0173); +insert into t1 values (0x0174),(0x0175),(0x0176),(0x0177); +insert into t1 values (0x0178),(0x0179),(0x017a),(0x017b); +insert into t1 values (0x017c),(0x017d),(0x017e),(0x017f); +insert into t1 values (0x0180),(0x0181),(0x0182),(0x0183); +insert into t1 values (0x0184),(0x0185),(0x0186),(0x0187); +insert into t1 values (0x0188),(0x0189),(0x018a),(0x018b); +insert into t1 values (0x018c),(0x018d),(0x018e),(0x018f); +insert into t1 values (0x0190),(0x0191),(0x0192),(0x0193); +insert into t1 values (0x0194),(0x0195),(0x0196),(0x0197); +insert into t1 values (0x0198),(0x0199),(0x019a),(0x019b); +insert into t1 values (0x019c),(0x019d),(0x019e),(0x019f); +insert into t1 values (0x01a0),(0x01a1),(0x01a2),(0x01a3); +insert into t1 values (0x01a4),(0x01a5),(0x01a6),(0x01a7); +insert into t1 values (0x01a8),(0x01a9),(0x01aa),(0x01ab); +insert into t1 values (0x01ac),(0x01ad),(0x01ae),(0x01af); +insert into t1 values (0x01b0),(0x01b1),(0x01b2),(0x01b3); +insert into t1 values (0x01b4),(0x01b5),(0x01b6),(0x01b7); +insert into t1 values (0x01b8),(0x01b9),(0x01ba),(0x01bb); +insert into t1 values (0x01bc),(0x01bd),(0x01be),(0x01bf); +insert into t1 values (0x01c0),(0x01c1),(0x01c2),(0x01c3); +insert into t1 values (0x01c4),(0x01c5),(0x01c6),(0x01c7); +insert into t1 values (0x01c8),(0x01c9),(0x01ca),(0x01cb); +insert into t1 values (0x01cc),(0x01cd),(0x01ce),(0x01cf); +insert into t1 values (0x01d0),(0x01d1),(0x01d2),(0x01d3); +insert into t1 values (0x01d4),(0x01d5),(0x01d6),(0x01d7); +insert into t1 values (0x01d8),(0x01d9),(0x01da),(0x01db); +insert into t1 values (0x01dc),(0x01dd),(0x01de),(0x01df); +insert into t1 values (0x01e0),(0x01e1),(0x01e2),(0x01e3); +insert into t1 values (0x01e4),(0x01e5),(0x01e6),(0x01e7); +insert into t1 values (0x01e8),(0x01e9),(0x01ea),(0x01eb); +insert into t1 values (0x01ec),(0x01ed),(0x01ee),(0x01ef); +insert into t1 values (0x01f0),(0x01f1),(0x01f2),(0x01f3); +insert into t1 values (0x01f4),(0x01f5),(0x01f6),(0x01f7); +insert into t1 values (0x01f8),(0x01f9),(0x01fa),(0x01fb); +insert into t1 values (0x01fc),(0x01fd),(0x01fe),(0x01ff); +insert into t1 values ('AA'),('Aa'),('aa'),('aA'); +insert into t1 values ('CH'),('Ch'),('ch'),('cH'); +insert into t1 values ('DZ'),('Dz'),('dz'),('dZ'); +insert into t1 values ('IJ'),('Ij'),('ij'),('iJ'); +insert into t1 values ('LJ'),('Lj'),('lj'),('lJ'); +insert into t1 values ('LL'),('Ll'),('ll'),('lL'); +insert into t1 values ('NJ'),('Nj'),('nj'),('nJ'); +insert into t1 values ('OE'),('Oe'),('oe'),('oE'); +insert into t1 values ('SS'),('Ss'),('ss'),('sS'); +insert into t1 values ('RR'),('Rr'),('rr'),('rR'); +select group_concat(c1 order by binary c1 separator '') from t1 group by c1 collate utf16_unicode_ci; +group_concat(c1 order by binary c1 separator '') +÷ +× +AaÀÃÂÃÄÅà áâãäåĀÄĂ㥹ÇÇŽÇžÇŸÇ Ç¡ÇºÇ» +AAAaaAaa +ÆæǢǣǼǽ +Bb +Æ€ +Æ +Ƃƃ +CcÃ‡Ã§Ä†Ä‡ÄˆÄ‰ÄŠÄ‹ÄŒÄ +CHChcHch +Ƈƈ +DdÄŽÄ +DZDzdZdzDŽDždžDZDzdz +ÄÄ‘ +Ɖ +ÆŠ +Æ‹ÆŒ +Ãð +EeÈÉÊËèéêëĒēĔĕĖėĘęĚě +ÆŽÇ +Æ +Æ +Ff +Æ‘Æ’ +GgÄœÄÄžÄŸÄ Ä¡Ä¢Ä£Ç¦Ç§Ç´Çµ +Ǥǥ +Æ“ +Æ” +Ƣƣ +HhĤĥ +ƕǶ +Ħħ +IiÃŒÃÃŽÃìÃîïĨĩĪīĬÄĮįİÇÇ +IJIjiJijIJij +ı +Æ— +Æ– +JjĴĵǰ +KkĶķǨǩ +Ƙƙ +LlĹĺĻļĽľ +Ä¿Å€ +LJLjlJljLJLjlj +LLLllLll +ÅÅ‚ +Æš +Æ› +Mm +NnÑñŃńŅņŇňǸǹ +NJNjnJnjÇŠÇ‹ÇŒ +Æ +Æž +ÅŠÅ‹ +OoÒÓÔÕÖòóôõöŌÅÅŽÅÅÅ‘Æ Æ¡Ç‘Ç’ÇªÇ«Ç¬Ç +OEOeoEoeÅ’Å“ +ØøǾǿ +Ɔ +ÆŸ +Pp +Ƥƥ +Qq +ĸ +RrŔŕŖŗŘř +RRRrrRrr +Ʀ +SsŚśŜÅŞşŠšſ +SSSssSssß +Æ© +ƪ +TtŢţŤť +ƾ +Ŧŧ +Æ« +Æ¬Æ +Æ® +UuÙÚÛÜùúûüŨũŪūŬÅŮůŰűŲųƯưǓǔǕǖǗǘǙǚǛǜ +Æœ +Ʊ +Vv +Ʋ +WwŴŵ +Xx +YyÃýÿŶŷŸ +Ƴƴ +ZzŹźŻżŽž +Æ +Ƶƶ +ƷǮǯ +Ƹƹ +ƺ +Þþ +Æ¿Ç· +Æ» +Ƨƨ +Ƽƽ +Æ„Æ… +ʼn +Ç€ +Ç +Ç‚ +ǃ +select group_concat(c1 order by binary c1 separator '') from t1 group by c1 collate utf16_icelandic_ci; +group_concat(c1 order by binary c1 separator '') +÷ +× +AaÀÂÃà âãĀÄĂ㥹ÇÇŽÇžÇŸÇ Ç¡ÇºÇ» +AAAaaAaa +Ãá +ǢǣǼǽ +Bb +Æ€ +Æ +Ƃƃ +CcÃ‡Ã§Ä†Ä‡ÄˆÄ‰ÄŠÄ‹ÄŒÄ +CHChcHch +Ƈƈ +DdÄŽÄ +DZDzdZdzDŽDždžDZDzdz +Ãð +ÄÄ‘ +Ɖ +ÆŠ +Æ‹ÆŒ +EeÈÊËèêëĒēĔĕĖėĘęĚě +Éé +ÆŽÇ +Æ +Æ +Ff +Æ‘Æ’ +GgÄœÄÄžÄŸÄ Ä¡Ä¢Ä£Ç¦Ç§Ç´Çµ +Ǥǥ +Æ“ +Æ” +Ƣƣ +HhĤĥ +ƕǶ +Ħħ +IiÌÎÃìîïĨĩĪīĬÄĮįİÇÇ +IJIjiJijIJij +Ãà +ı +Æ— +Æ– +JjĴĵǰ +KkĶķǨǩ +Ƙƙ +LlĹĺĻļĽľ +Ä¿Å€ +LJLjlJljLJLjlj +LLLllLll +ÅÅ‚ +Æš +Æ› +Mm +NnÑñŃńŅņŇňǸǹ +NJNjnJnjÇŠÇ‹ÇŒ +Æ +Æž +ÅŠÅ‹ +OoÒÔÕòôõŌÅÅŽÅÅÅ‘Æ Æ¡Ç‘Ç’ÇªÇ«Ç¬Ç +OEOeoEoeÅ’Å“ +Óó +Ǿǿ +Ɔ +ÆŸ +Pp +Ƥƥ +Qq +ĸ +RrŔŕŖŗŘř +RRRrrRrr +Ʀ +SsŚśŜÅŞşŠšſ +SSSssSssß +Æ© +ƪ +TtŢţŤť +ƾ +Ŧŧ +Æ« +Æ¬Æ +Æ® +UuÙÛÜùûüŨũŪūŬÅŮůŰűŲųƯưǓǔǕǖǗǘǙǚǛǜ +Úú +Æœ +Ʊ +Vv +Ʋ +WwŴŵ +Xx +YyÿŶŷŸ +Ãý +Ƴƴ +ZzŹźŻżŽž +Æ +Þþ +ÄÆäæ +ÖØöø +Ã…Ã¥ +Ƶƶ +ƷǮǯ +Ƹƹ +ƺ +Æ¿Ç· +Æ» +Ƨƨ +Ƽƽ +Æ„Æ… +ʼn +Ç€ +Ç +Ç‚ +ǃ +select group_concat(c1 order by binary c1 separator '') from t1 group by c1 collate utf16_latvian_ci; +group_concat(c1 order by binary c1 separator '') +÷ +× +AaÀÃÂÃÄÅà áâãäåĀÄĂ㥹ÇÇŽÇžÇŸÇ Ç¡ÇºÇ» +AAAaaAaa +ÆæǢǣǼǽ +Bb +Æ€ +Æ +Ƃƃ +CcÇçĆćĈĉĊċ +CHChcHch +ÄŒÄ +Ƈƈ +DdÄŽÄ +DZDzdZdzDŽDždžDZDzdz +ÄÄ‘ +Ɖ +ÆŠ +Æ‹ÆŒ +Ãð +EeÈÉÊËèéêëĒēĔĕĖėĘęĚě +ÆŽÇ +Æ +Æ +Ff +Æ‘Æ’ +GgÄœÄÄžÄŸÄ Ä¡Ç¦Ç§Ç´Çµ +Ģģ +Ǥǥ +Æ“ +Æ” +Ƣƣ +HhĤĥ +ƕǶ +Ħħ +IiÃŒÃÃŽÃìÃîïĨĩĪīĬÄĮįİÇÇ +IJIjiJijIJij +Yy +ı +Æ— +Æ– +JjĴĵǰ +KkǨǩ +Ķķ +Ƙƙ +LlĹ弾 +Ä¿Å€ +LJLjlJljLJLjlj +LLLllLll +Ļļ +ÅÅ‚ +Æš +Æ› +Mm +NnÑñŃńŇňǸǹ +NJNjnJnjÇŠÇ‹ÇŒ +Ņņ +Æ +Æž +ÅŠÅ‹ +OoÒÓÔÕÖòóôõöŌÅÅŽÅÅÅ‘Æ Æ¡Ç‘Ç’ÇªÇ«Ç¬Ç +OEOeoEoeÅ’Å“ +ØøǾǿ +Ɔ +ÆŸ +Pp +Ƥƥ +Qq +ĸ +RrŔŕŘř +RRRrrRrr +Å–Å— +Ʀ +SsŚśŜÅŞşſ +SSSssSssß +Å Å¡ +Æ© +ƪ +TtŢţŤť +ƾ +Ŧŧ +Æ« +Æ¬Æ +Æ® +UuÙÚÛÜùúûüŨũŪūŬÅŮůŰűŲųƯưǓǔǕǖǗǘǙǚǛǜ +Æœ +Ʊ +Vv +Ʋ +WwŴŵ +Xx +ÃýÿŶŷŸ +Ƴƴ +ZzŹźŻż +Æ +Žž +Ƶƶ +ƷǮǯ +Ƹƹ +ƺ +Þþ +Æ¿Ç· +Æ» +Ƨƨ +Ƽƽ +Æ„Æ… +ʼn +Ç€ +Ç +Ç‚ +ǃ +select group_concat(c1 order by binary c1 separator '') from t1 group by c1 collate utf16_romanian_ci; +group_concat(c1 order by binary c1 separator '') +÷ +× +AaÀÃÃÄÅà áãäåĀÄÄ„Ä…ÇÇŽÇžÇŸÇ Ç¡ÇºÇ» +AAAaaAaa +Ăă +Ââ +ÆæǢǣǼǽ +Bb +Æ€ +Æ +Ƃƃ +CcÃ‡Ã§Ä†Ä‡ÄˆÄ‰ÄŠÄ‹ÄŒÄ +CHChcHch +Ƈƈ +DdÄŽÄ +DZDzdZdzDŽDždžDZDzdz +ÄÄ‘ +Ɖ +ÆŠ +Æ‹ÆŒ +Ãð +EeÈÉÊËèéêëĒēĔĕĖėĘęĚě +ÆŽÇ +Æ +Æ +Ff +Æ‘Æ’ +GgÄœÄÄžÄŸÄ Ä¡Ä¢Ä£Ç¦Ç§Ç´Çµ +Ǥǥ +Æ“ +Æ” +Ƣƣ +HhĤĥ +ƕǶ +Ħħ +IiÃŒÃÃìÃïĨĩĪīĬÄĮįİÇÇ +IJIjiJijIJij +Îî +ı +Æ— +Æ– +JjĴĵǰ +KkĶķǨǩ +Ƙƙ +LlĹĺĻļĽľ +Ä¿Å€ +LJLjlJljLJLjlj +LLLllLll +ÅÅ‚ +Æš +Æ› +Mm +NnÑñŃńŅņŇňǸǹ +NJNjnJnjÇŠÇ‹ÇŒ +Æ +Æž +ÅŠÅ‹ +OoÒÓÔÕÖòóôõöŌÅÅŽÅÅÅ‘Æ Æ¡Ç‘Ç’ÇªÇ«Ç¬Ç +OEOeoEoeÅ’Å“ +ØøǾǿ +Ɔ +ÆŸ +Pp +Ƥƥ +Qq +ĸ +RrŔŕŖŗŘř +RRRrrRrr +Ʀ +SsŚśŜÅÅ Å¡Å¿ +SSSssSssß +Şş +Æ© +ƪ +TtŤť +ƾ +Ţţ +Ŧŧ +Æ« +Æ¬Æ +Æ® +UuÙÚÛÜùúûüŨũŪūŬÅŮůŰűŲųƯưǓǔǕǖǗǘǙǚǛǜ +Æœ +Ʊ +Vv +Ʋ +WwŴŵ +Xx +YyÃýÿŶŷŸ +Ƴƴ +ZzŹźŻżŽž +Æ +Ƶƶ +ƷǮǯ +Ƹƹ +ƺ +Þþ +Æ¿Ç· +Æ» +Ƨƨ +Ƽƽ +Æ„Æ… +ʼn +Ç€ +Ç +Ç‚ +ǃ +select group_concat(c1 order by binary c1 separator '') from t1 group by c1 collate utf16_slovenian_ci; +group_concat(c1 order by binary c1 separator '') +÷ +× +AaÀÃÂÃÄÅà áâãäåĀÄĂ㥹ÇÇŽÇžÇŸÇ Ç¡ÇºÇ» +AAAaaAaa +ÆæǢǣǼǽ +Bb +Æ€ +Æ +Ƃƃ +CcÇçĆćĈĉĊċ +CHChcHch +ÄŒÄ +Ƈƈ +DdÄŽÄ +DZDzdZdzDŽDždžDZDzdz +ÄÄ‘ +Ɖ +ÆŠ +Æ‹ÆŒ +Ãð +EeÈÉÊËèéêëĒēĔĕĖėĘęĚě +ÆŽÇ +Æ +Æ +Ff +Æ‘Æ’ +GgÄœÄÄžÄŸÄ Ä¡Ä¢Ä£Ç¦Ç§Ç´Çµ +Ǥǥ +Æ“ +Æ” +Ƣƣ +HhĤĥ +ƕǶ +Ħħ +IiÃŒÃÃŽÃìÃîïĨĩĪīĬÄĮįİÇÇ +IJIjiJijIJij +ı +Æ— +Æ– +JjĴĵǰ +KkĶķǨǩ +Ƙƙ +LlĹĺĻļĽľ +Ä¿Å€ +LJLjlJljLJLjlj +LLLllLll +ÅÅ‚ +Æš +Æ› +Mm +NnÑñŃńŅņŇňǸǹ +NJNjnJnjÇŠÇ‹ÇŒ +Æ +Æž +ÅŠÅ‹ +OoÒÓÔÕÖòóôõöŌÅÅŽÅÅÅ‘Æ Æ¡Ç‘Ç’ÇªÇ«Ç¬Ç +OEOeoEoeÅ’Å“ +ØøǾǿ +Ɔ +ÆŸ +Pp +Ƥƥ +Qq +ĸ +RrŔŕŖŗŘř +RRRrrRrr +Ʀ +SsŚśŜÅŞşſ +SSSssSssß +Å Å¡ +Æ© +ƪ +TtŢţŤť +ƾ +Ŧŧ +Æ« +Æ¬Æ +Æ® +UuÙÚÛÜùúûüŨũŪūŬÅŮůŰűŲųƯưǓǔǕǖǗǘǙǚǛǜ +Æœ +Ʊ +Vv +Ʋ +WwŴŵ +Xx +YyÃýÿŶŷŸ +Ƴƴ +ZzŹźŻż +Æ +Žž +Ƶƶ +ƷǮǯ +Ƹƹ +ƺ +Þþ +Æ¿Ç· +Æ» +Ƨƨ +Ƽƽ +Æ„Æ… +ʼn +Ç€ +Ç +Ç‚ +ǃ +select group_concat(c1 order by binary c1 separator '') from t1 group by c1 collate utf16_polish_ci; +group_concat(c1 order by binary c1 separator '') +÷ +× +AaÀÃÂÃÄÅà áâãäåĀÄĂăÇÇŽÇžÇŸÇ Ç¡ÇºÇ» +AAAaaAaa +Ä„Ä… +ÆæǢǣǼǽ +Bb +Æ€ +Æ +Ƃƃ +CcÃ‡Ã§ÄˆÄ‰ÄŠÄ‹ÄŒÄ +CHChcHch +Ćć +Ƈƈ +DdÄŽÄ +DZDzdZdzDŽDždžDZDzdz +ÄÄ‘ +Ɖ +ÆŠ +Æ‹ÆŒ +Ãð +EeÈÉÊËèéêëĒēĔĕĖėĚě +Ęę +ÆŽÇ +Æ +Æ +Ff +Æ‘Æ’ +GgÄœÄÄžÄŸÄ Ä¡Ä¢Ä£Ç¦Ç§Ç´Çµ +Ǥǥ +Æ“ +Æ” +Ƣƣ +HhĤĥ +ƕǶ +Ħħ +IiÃŒÃÃŽÃìÃîïĨĩĪīĬÄĮįİÇÇ +IJIjiJijIJij +ı +Æ— +Æ– +JjĴĵǰ +KkĶķǨǩ +Ƙƙ +LlĹĺĻļĽľ +Ä¿Å€ +LJLjlJljLJLjlj +LLLllLll +ÅÅ‚ +Æš +Æ› +Mm +NnÑñŅņŇňǸǹ +NJNjnJnjÇŠÇ‹ÇŒ +Ńń +Æ +Æž +ÅŠÅ‹ +OoÒÔÕÖòôõöŌÅÅŽÅÅÅ‘Æ Æ¡Ç‘Ç’ÇªÇ«Ç¬Ç +OEOeoEoeÅ’Å“ +Óó +ØøǾǿ +Ɔ +ÆŸ +Pp +Ƥƥ +Qq +ĸ +RrŔŕŖŗŘř +RRRrrRrr +Ʀ +SsÅœÅŞşŠšſ +SSSssSssß +Śś +Æ© +ƪ +TtŢţŤť +ƾ +Ŧŧ +Æ« +Æ¬Æ +Æ® +UuÙÚÛÜùúûüŨũŪūŬÅŮůŰűŲųƯưǓǔǕǖǗǘǙǚǛǜ +Æœ +Ʊ +Vv +Ʋ +WwŴŵ +Xx +YyÃýÿŶŷŸ +Ƴƴ +ZzŽž +Æ +Źź +Żż +Ƶƶ +ƷǮǯ +Ƹƹ +ƺ +Þþ +Æ¿Ç· +Æ» +Ƨƨ +Ƽƽ +Æ„Æ… +ʼn +Ç€ +Ç +Ç‚ +ǃ +select group_concat(c1 order by binary c1 separator '') from t1 group by c1 collate utf16_estonian_ci; +group_concat(c1 order by binary c1 separator '') +÷ +× +AaÀÃÂÃÅà áâãåĀÄĂ㥹ÇÇŽÇžÇŸÇ Ç¡ÇºÇ» +AAAaaAaa +ÆæǢǣǼǽ +Bb +Æ€ +Æ +Ƃƃ +CcÃ‡Ã§Ä†Ä‡ÄˆÄ‰ÄŠÄ‹ÄŒÄ +CHChcHch +Ƈƈ +DdÄŽÄ +DZDzdZdz +DŽDždžDZDzdz +ÄÄ‘ +Ɖ +ÆŠ +Æ‹ÆŒ +Ãð +EeÈÉÊËèéêëĒēĔĕĖėĘęĚě +ÆŽÇ +Æ +Æ +Ff +Æ‘Æ’ +GgÄœÄÄžÄŸÄ Ä¡Ä¢Ä£Ç¦Ç§Ç´Çµ +Ǥǥ +Æ“ +Æ” +Ƣƣ +HhĤĥ +ƕǶ +Ħħ +IiÃŒÃÃŽÃìÃîïĨĩĪīĬÄĮįİÇÇ +IJIjiJijIJij +ı +Æ— +Æ– +JjĴĵǰ +KkĶķǨǩ +Ƙƙ +LlĹĺĻļĽľ +Ä¿Å€ +LJLjlJljLJLjlj +LLLllLll +ÅÅ‚ +Æš +Æ› +Mm +NnÑñŃńŅņŇňǸǹ +NJNjnJnjÇŠÇ‹ÇŒ +Æ +Æž +ÅŠÅ‹ +OoÒÓÔòóôŌÅÅŽÅÅÅ‘Æ Æ¡Ç‘Ç’ÇªÇ«Ç¬Ç +OEOeoEoeÅ’Å“ +ØøǾǿ +Ɔ +ÆŸ +Pp +Ƥƥ +Qq +ĸ +RrŔŕŖŗŘř +RRRrrRrr +Ʀ +SsŚśŜÅŞşſ +SSSssSssß +Å Å¡ +Zz +Žž +Æ© +ƪ +TtŢţŤť +ƾ +Ŧŧ +Æ« +Æ¬Æ +Æ® +UuÙÚÛùúûŨũŪūŬÅŮůŰűŲųƯưǓǔǕǖǗǘǙǚǛǜ +Æœ +Ʊ +Vv +Ʋ +WwŴŵ +Õõ +Ää +Öö +Üü +Xx +YyÃýÿŶŷŸ +Ƴƴ +ŹźŻż +Æ +Ƶƶ +ƷǮǯ +Ƹƹ +ƺ +Þþ +Æ¿Ç· +Æ» +Ƨƨ +Ƽƽ +Æ„Æ… +ʼn +Ç€ +Ç +Ç‚ +ǃ +select group_concat(c1 order by binary c1 separator '') from t1 group by c1 collate utf16_spanish_ci; +group_concat(c1 order by binary c1 separator '') +÷ +× +AaÀÃÂÃÄÅà áâãäåĀÄĂ㥹ÇÇŽÇžÇŸÇ Ç¡ÇºÇ» +AAAaaAaa +ÆæǢǣǼǽ +Bb +Æ€ +Æ +Ƃƃ +CcÃ‡Ã§Ä†Ä‡ÄˆÄ‰ÄŠÄ‹ÄŒÄ +CHChcHch +Ƈƈ +DdÄŽÄ +DZDzdZdzDŽDždžDZDzdz +ÄÄ‘ +Ɖ +ÆŠ +Æ‹ÆŒ +Ãð +EeÈÉÊËèéêëĒēĔĕĖėĘęĚě +ÆŽÇ +Æ +Æ +Ff +Æ‘Æ’ +GgÄœÄÄžÄŸÄ Ä¡Ä¢Ä£Ç¦Ç§Ç´Çµ +Ǥǥ +Æ“ +Æ” +Ƣƣ +HhĤĥ +ƕǶ +Ħħ +IiÃŒÃÃŽÃìÃîïĨĩĪīĬÄĮįİÇÇ +IJIjiJijIJij +ı +Æ— +Æ– +JjĴĵǰ +KkĶķǨǩ +Ƙƙ +LlĹĺĻļĽľ +Ä¿Å€ +LJLjlJljLJLjlj +LLLllLll +ÅÅ‚ +Æš +Æ› +Mm +NnŃńŅņŇňǸǹ +NJNjnJnjÇŠÇ‹ÇŒ +Ññ +Æ +Æž +ÅŠÅ‹ +OoÒÓÔÕÖòóôõöŌÅÅŽÅÅÅ‘Æ Æ¡Ç‘Ç’ÇªÇ«Ç¬Ç +OEOeoEoeÅ’Å“ +ØøǾǿ +Ɔ +ÆŸ +Pp +Ƥƥ +Qq +ĸ +RrŔŕŖŗŘř +RRRrrRrr +Ʀ +SsŚśŜÅŞşŠšſ +SSSssSssß +Æ© +ƪ +TtŢţŤť +ƾ +Ŧŧ +Æ« +Æ¬Æ +Æ® +UuÙÚÛÜùúûüŨũŪūŬÅŮůŰűŲųƯưǓǔǕǖǗǘǙǚǛǜ +Æœ +Ʊ +Vv +Ʋ +WwŴŵ +Xx +YyÃýÿŶŷŸ +Ƴƴ +ZzŹźŻżŽž +Æ +Ƶƶ +ƷǮǯ +Ƹƹ +ƺ +Þþ +Æ¿Ç· +Æ» +Ƨƨ +Ƽƽ +Æ„Æ… +ʼn +Ç€ +Ç +Ç‚ +ǃ +select group_concat(c1 order by binary c1 separator '') from t1 group by c1 collate utf16_swedish_ci; +group_concat(c1 order by binary c1 separator '') +÷ +× +AaÀÃÂÃà áâãĀÄĂ㥹ÇÇŽÇžÇŸÇ Ç¡ÇºÇ» +AAAaaAaa +ǢǣǼǽ +Bb +Æ€ +Æ +Ƃƃ +CcÃ‡Ã§Ä†Ä‡ÄˆÄ‰ÄŠÄ‹ÄŒÄ +CHChcHch +Ƈƈ +DdÄŽÄ +DZDzdZdzDŽDždžDZDzdz +ÄÄ‘ +Ɖ +ÆŠ +Æ‹ÆŒ +Ãð +EeÈÉÊËèéêëĒēĔĕĖėĘęĚě +ÆŽÇ +Æ +Æ +Ff +Æ‘Æ’ +GgÄœÄÄžÄŸÄ Ä¡Ä¢Ä£Ç¦Ç§Ç´Çµ +Ǥǥ +Æ“ +Æ” +Ƣƣ +HhĤĥ +ƕǶ +Ħħ +IiÃŒÃÃŽÃìÃîïĨĩĪīĬÄĮįİÇÇ +IJIjiJijIJij +ı +Æ— +Æ– +JjĴĵǰ +KkĶķǨǩ +Ƙƙ +LlĹĺĻļĽľ +Ä¿Å€ +LJLjlJljLJLjlj +LLLllLll +ÅÅ‚ +Æš +Æ› +Mm +NnÑñŃńŅņŇňǸǹ +NJNjnJnjÇŠÇ‹ÇŒ +Æ +Æž +ÅŠÅ‹ +OoÒÓÔÕòóôõŌÅÅŽÅÅÅ‘Æ Æ¡Ç‘Ç’ÇªÇ«Ç¬Ç +OEOeoEoeÅ’Å“ +Ǿǿ +Ɔ +ÆŸ +Pp +Ƥƥ +Qq +ĸ +RrŔŕŖŗŘř +RRRrrRrr +Ʀ +SsŚśŜÅŞşŠšſ +SSSssSssß +Æ© +ƪ +TtŢţŤť +ƾ +Ŧŧ +Æ« +Æ¬Æ +Æ® +UuÙÚÛùúûŨũŪūŬÅŮůŰűŲųƯưǓǔǕǖǗǘǙǚǛǜ +Æœ +Ʊ +Vv +Ʋ +WwŴŵ +Xx +YyÃœÃüýÿŶŷŸ +Ƴƴ +ZzŹźŻżŽž +Æ +Ã…Ã¥ +ÄÆäæ +ÖØöø +Ƶƶ +ƷǮǯ +Ƹƹ +ƺ +Þþ +Æ¿Ç· +Æ» +Ƨƨ +Ƽƽ +Æ„Æ… +ʼn +Ç€ +Ç +Ç‚ +ǃ +select group_concat(c1 order by binary c1 separator '') from t1 group by c1 collate utf16_turkish_ci; +group_concat(c1 order by binary c1 separator '') +÷ +× +AaÀÃÂÃÄÅà áâãäåĀÄĂ㥹ÇÇŽÇžÇŸÇ Ç¡ÇºÇ» +AAAaaAaa +ÆæǢǣǼǽ +Bb +Æ€ +Æ +Ƃƃ +CcÄ†Ä‡ÄˆÄ‰ÄŠÄ‹ÄŒÄ +CHChcHch +Çç +Ƈƈ +DdÄŽÄ +DZDzdZdzDŽDždžDZDzdz +ÄÄ‘ +Ɖ +ÆŠ +Æ‹ÆŒ +Ãð +EeÈÉÊËèéêëĒēĔĕĖėĘęĚě +ÆŽÇ +Æ +Æ +Ff +Æ‘Æ’ +GgÄœÄÄ Ä¡Ä¢Ä£Ç¦Ç§Ç´Çµ +Ğğ +Ǥǥ +Æ“ +Æ” +Ƣƣ +HhĤĥ +Iı +IJIj +ƕǶ +Ħħ +iÃŒÃÃŽÃìÃîïĨĩĪīĬÄĮįİÇÇ +iJijIJij +Æ— +Æ– +JjĴĵǰ +KkĶķǨǩ +Ƙƙ +LlĹĺĻļĽľ +Ä¿Å€ +LJLjlJljLJLjlj +LLLllLll +ÅÅ‚ +Æš +Æ› +Mm +NnÑñŃńŅņŇňǸǹ +NJNjnJnjÇŠÇ‹ÇŒ +Æ +Æž +ÅŠÅ‹ +OoÒÓÔÕòóôõŌÅÅŽÅÅÅ‘Æ Æ¡Ç‘Ç’ÇªÇ«Ç¬Ç +OEOeoEoeÅ’Å“ +Öö +ØøǾǿ +Ɔ +ÆŸ +Pp +Ƥƥ +Qq +ĸ +RrŔŕŖŗŘř +RRRrrRrr +Ʀ +SsŚśŜÅÅ Å¡Å¿ +SSSssSssß +Şş +Æ© +ƪ +TtŢţŤť +ƾ +Ŧŧ +Æ« +Æ¬Æ +Æ® +UuÙÚÛùúûŨũŪūŬÅŮůŰűŲųƯưǓǔǕǖǗǘǙǚǛǜ +Üü +Æœ +Ʊ +Vv +Ʋ +WwŴŵ +Xx +YyÃýÿŶŷŸ +Ƴƴ +ZzŹźŻżŽž +Æ +Ƶƶ +ƷǮǯ +Ƹƹ +ƺ +Þþ +Æ¿Ç· +Æ» +Ƨƨ +Ƽƽ +Æ„Æ… +ʼn +Ç€ +Ç +Ç‚ +ǃ +select group_concat(c1 order by binary c1 separator '') from t1 group by c1 collate utf16_czech_ci; +group_concat(c1 order by binary c1 separator '') +÷ +× +AaÀÃÂÃÄÅà áâãäåĀÄĂ㥹ÇÇŽÇžÇŸÇ Ç¡ÇºÇ» +AAAaaAaa +ÆæǢǣǼǽ +Bb +Æ€ +Æ +Ƃƃ +CcÇçĆćĈĉĊċ +cH +ÄŒÄ +Ƈƈ +DdÄŽÄ +DZDzdZdzDŽDždžDZDzdz +ÄÄ‘ +Ɖ +ÆŠ +Æ‹ÆŒ +Ãð +EeÈÉÊËèéêëĒēĔĕĖėĘęĚě +ÆŽÇ +Æ +Æ +Ff +Æ‘Æ’ +GgÄœÄÄžÄŸÄ Ä¡Ä¢Ä£Ç¦Ç§Ç´Çµ +Ǥǥ +Æ“ +Æ” +Ƣƣ +HhĤĥ +CHChch +ƕǶ +Ħħ +IiÃŒÃÃŽÃìÃîïĨĩĪīĬÄĮįİÇÇ +IJIjiJijIJij +ı +Æ— +Æ– +JjĴĵǰ +KkĶķǨǩ +Ƙƙ +LlĹĺĻļĽľ +Ä¿Å€ +LJLjlJljLJLjlj +LLLllLll +ÅÅ‚ +Æš +Æ› +Mm +NnÑñŃńŅņŇňǸǹ +NJNjnJnjÇŠÇ‹ÇŒ +Æ +Æž +ÅŠÅ‹ +OoÒÓÔÕÖòóôõöŌÅÅŽÅÅÅ‘Æ Æ¡Ç‘Ç’ÇªÇ«Ç¬Ç +OEOeoEoeÅ’Å“ +ØøǾǿ +Ɔ +ÆŸ +Pp +Ƥƥ +Qq +ĸ +RrŔŕŖŗ +RRRrrRrr +Řř +Ʀ +SsŚśŜÅŞşſ +SSSssSssß +Å Å¡ +Æ© +ƪ +TtŢţŤť +ƾ +Ŧŧ +Æ« +Æ¬Æ +Æ® +UuÙÚÛÜùúûüŨũŪūŬÅŮůŰűŲųƯưǓǔǕǖǗǘǙǚǛǜ +Æœ +Ʊ +Vv +Ʋ +WwŴŵ +Xx +YyÃýÿŶŷŸ +Ƴƴ +ZzŹźŻż +Æ +Žž +Ƶƶ +ƷǮǯ +Ƹƹ +ƺ +Þþ +Æ¿Ç· +Æ» +Ƨƨ +Ƽƽ +Æ„Æ… +ʼn +Ç€ +Ç +Ç‚ +ǃ +select group_concat(c1 order by binary c1 separator '') from t1 group by c1 collate utf16_danish_ci; +group_concat(c1 order by binary c1 separator '') +÷ +× +AaÀÃÂÃà áâãĀÄĂ㥹ÇÇŽÇžÇŸÇ Ç¡ÇºÇ» +aA +ǢǣǼǽ +Bb +Æ€ +Æ +Ƃƃ +CcÃ‡Ã§Ä†Ä‡ÄˆÄ‰ÄŠÄ‹ÄŒÄ +CHChcHch +Ƈƈ +DdÄŽÄ +DZDzdZdzDŽDždžDZDzdz +ÄÄ‘ +Ɖ +ÆŠ +Æ‹ÆŒ +Ãð +EeÈÉÊËèéêëĒēĔĕĖėĘęĚě +ÆŽÇ +Æ +Æ +Ff +Æ‘Æ’ +GgÄœÄÄžÄŸÄ Ä¡Ä¢Ä£Ç¦Ç§Ç´Çµ +Ǥǥ +Æ“ +Æ” +Ƣƣ +HhĤĥ +ƕǶ +Ħħ +IiÃŒÃÃŽÃìÃîïĨĩĪīĬÄĮįİÇÇ +IJIjiJijIJij +ı +Æ— +Æ– +JjĴĵǰ +KkĶķǨǩ +Ƙƙ +LlĹĺĻļĽľ +Ä¿Å€ +LJLjlJljLJLjlj +LLLllLll +ÅÅ‚ +Æš +Æ› +Mm +NnÑñŃńŅņŇňǸǹ +NJNjnJnjÇŠÇ‹ÇŒ +Æ +Æž +ÅŠÅ‹ +OoÒÓÔÕòóôõŌÅÅŽÅÆ Æ¡Ç‘Ç’ÇªÇ«Ç¬Ç +OEOeoEoeÅ’Å“ +Ǿǿ +Ɔ +ÆŸ +Pp +Ƥƥ +Qq +ĸ +RrŔŕŖŗŘř +RRRrrRrr +Ʀ +SsŚśŜÅŞşŠšſ +SSSssSssß +Æ© +ƪ +TtŢţŤť +ƾ +Ŧŧ +Æ« +Æ¬Æ +Æ® +UuÙÚÛùúûŨũŪūŬÅŮůŲųƯưǓǔǕǖǗǘǙǚǛǜ +Æœ +Ʊ +Vv +Ʋ +WwŴŵ +Xx +YyÃœÃüýÿŰűŶŷŸ +Ƴƴ +ZzŹźŻżŽž +Æ +ÄÆäæ +ÖØöøÅÅ‘ +AAAaaaÃ…Ã¥ +Ƶƶ +ƷǮǯ +Ƹƹ +ƺ +Þþ +Æ¿Ç· +Æ» +Ƨƨ +Ƽƽ +Æ„Æ… +ʼn +Ç€ +Ç +Ç‚ +ǃ +select group_concat(c1 order by binary c1 separator '') from t1 group by c1 collate utf16_lithuanian_ci; +group_concat(c1 order by binary c1 separator '') +÷ +× +AaÀÃÂÃÄÅà áâãäåĀÄĂ㥹ÇÇŽÇžÇŸÇ Ç¡ÇºÇ» +AAAaaAaa +ÆæǢǣǼǽ +Bb +Æ€ +Æ +Ƃƃ +CCHChcchÇçĆćĈĉĊċ +cH +ÄŒÄ +Ƈƈ +DdÄŽÄ +DZDzdZdzDŽDždžDZDzdz +ÄÄ‘ +Ɖ +ÆŠ +Æ‹ÆŒ +Ãð +EeÈÉÊËèéêëĒēĔĕĖėĘęĚě +ÆŽÇ +Æ +Æ +Ff +Æ‘Æ’ +GgÄœÄÄžÄŸÄ Ä¡Ä¢Ä£Ç¦Ç§Ç´Çµ +Ǥǥ +Æ“ +Æ” +Ƣƣ +HhĤĥ +ƕǶ +Ħħ +IYiyÃŒÃÃŽÃìÃîïĨĩĪīĬÄĮįİÇÇ +IJIjiJijIJij +ı +Æ— +Æ– +JjĴĵǰ +KkĶķǨǩ +Ƙƙ +LlĹĺĻļĽľ +Ä¿Å€ +LJLjlJljLJLjlj +LLLllLll +ÅÅ‚ +Æš +Æ› +Mm +NnÑñŃńŅņŇňǸǹ +NJNjnJnjÇŠÇ‹ÇŒ +Æ +Æž +ÅŠÅ‹ +OoÒÓÔÕÖòóôõöŌÅÅŽÅÅÅ‘Æ Æ¡Ç‘Ç’ÇªÇ«Ç¬Ç +OEOeoEoeÅ’Å“ +ØøǾǿ +Ɔ +ÆŸ +Pp +Ƥƥ +Qq +ĸ +RrŔŕŖŗŘř +RRRrrRrr +Ʀ +SsŚśŜÅŞşſ +SSSssSssß +Å Å¡ +Æ© +ƪ +TtŢţŤť +ƾ +Ŧŧ +Æ« +Æ¬Æ +Æ® +UuÙÚÛÜùúûüŨũŪūŬÅŮůŰűŲųƯưǓǔǕǖǗǘǙǚǛǜ +Æœ +Ʊ +Vv +Ʋ +WwŴŵ +Xx +ÃýÿŶŷŸ +Ƴƴ +ZzŹźŻż +Æ +Žž +Ƶƶ +ƷǮǯ +Ƹƹ +ƺ +Þþ +Æ¿Ç· +Æ» +Ƨƨ +Ƽƽ +Æ„Æ… +ʼn +Ç€ +Ç +Ç‚ +ǃ +select group_concat(c1 order by binary c1 separator '') from t1 group by c1 collate utf16_slovak_ci; +group_concat(c1 order by binary c1 separator '') +÷ +× +AaÀÃÂÃÅà áâãåĀÄĂ㥹ÇÇŽÇžÇŸÇ Ç¡ÇºÇ» +AAAaaAaa +Ää +ÆæǢǣǼǽ +Bb +Æ€ +Æ +Ƃƃ +CcÇçĆćĈĉĊċ +cH +ÄŒÄ +Ƈƈ +DdÄŽÄ +DZDzdZdzDŽDždžDZDzdz +ÄÄ‘ +Ɖ +ÆŠ +Æ‹ÆŒ +Ãð +EeÈÉÊËèéêëĒēĔĕĖėĘęĚě +ÆŽÇ +Æ +Æ +Ff +Æ‘Æ’ +GgÄœÄÄžÄŸÄ Ä¡Ä¢Ä£Ç¦Ç§Ç´Çµ +Ǥǥ +Æ“ +Æ” +Ƣƣ +HhĤĥ +CHChch +ƕǶ +Ħħ +IiÃŒÃÃŽÃìÃîïĨĩĪīĬÄĮįİÇÇ +IJIjiJijIJij +ı +Æ— +Æ– +JjĴĵǰ +KkĶķǨǩ +Ƙƙ +LlĹĺĻļĽľ +Ä¿Å€ +LJLjlJljLJLjlj +LLLllLll +ÅÅ‚ +Æš +Æ› +Mm +NnÑñŃńŅņŇňǸǹ +NJNjnJnjÇŠÇ‹ÇŒ +Æ +Æž +ÅŠÅ‹ +OoÒÓÕÖòóõöŌÅÅŽÅÅÅ‘Æ Æ¡Ç‘Ç’ÇªÇ«Ç¬Ç +OEOeoEoeÅ’Å“ +Ôô +ØøǾǿ +Ɔ +ÆŸ +Pp +Ƥƥ +Qq +ĸ +RrŔŕŖŗŘř +RRRrrRrr +Ʀ +SsŚśŜÅŞşſ +SSSssSssß +Å Å¡ +Æ© +ƪ +TtŢţŤť +ƾ +Ŧŧ +Æ« +Æ¬Æ +Æ® +UuÙÚÛÜùúûüŨũŪūŬÅŮůŰűŲųƯưǓǔǕǖǗǘǙǚǛǜ +Æœ +Ʊ +Vv +Ʋ +WwŴŵ +Xx +YyÃýÿŶŷŸ +Ƴƴ +ZzŹźŻż +Æ +Žž +Ƶƶ +ƷǮǯ +Ƹƹ +ƺ +Þþ +Æ¿Ç· +Æ» +Ƨƨ +Ƽƽ +Æ„Æ… +ʼn +Ç€ +Ç +Ç‚ +ǃ +select group_concat(c1 order by binary c1 separator '') from t1 group by c1 collate utf16_spanish2_ci; +group_concat(c1 order by binary c1 separator '') +÷ +× +AaÀÃÂÃÄÅà áâãäåĀÄĂ㥹ÇÇŽÇžÇŸÇ Ç¡ÇºÇ» +AAAaaAaa +ÆæǢǣǼǽ +Bb +Æ€ +Æ +Ƃƃ +CcÃ‡Ã§Ä†Ä‡ÄˆÄ‰ÄŠÄ‹ÄŒÄ +cH +CHChch +Ƈƈ +DdÄŽÄ +DZDzdZdzDŽDždžDZDzdz +ÄÄ‘ +Ɖ +ÆŠ +Æ‹ÆŒ +Ãð +EeÈÉÊËèéêëĒēĔĕĖėĘęĚě +ÆŽÇ +Æ +Æ +Ff +Æ‘Æ’ +GgÄœÄÄžÄŸÄ Ä¡Ä¢Ä£Ç¦Ç§Ç´Çµ +Ǥǥ +Æ“ +Æ” +Ƣƣ +HhĤĥ +ƕǶ +Ħħ +IiÃŒÃÃŽÃìÃîïĨĩĪīĬÄĮįİÇÇ +IJIjiJijIJij +ı +Æ— +Æ– +JjĴĵǰ +KkĶķǨǩ +Ƙƙ +LlĹĺĻļĽľ +Ä¿Å€ +LJLjlJljLJLjlj +lL +LLLlll +ÅÅ‚ +Æš +Æ› +Mm +NnŃńŅņŇňǸǹ +NJNjnJnjÇŠÇ‹ÇŒ +Ññ +Æ +Æž +ÅŠÅ‹ +OoÒÓÔÕÖòóôõöŌÅÅŽÅÅÅ‘Æ Æ¡Ç‘Ç’ÇªÇ«Ç¬Ç +OEOeoEoeÅ’Å“ +ØøǾǿ +Ɔ +ÆŸ +Pp +Ƥƥ +Qq +ĸ +RrŔŕŖŗŘř +RRRrrRrr +Ʀ +SsŚśŜÅŞşŠšſ +SSSssSssß +Æ© +ƪ +TtŢţŤť +ƾ +Ŧŧ +Æ« +Æ¬Æ +Æ® +UuÙÚÛÜùúûüŨũŪūŬÅŮůŰűŲųƯưǓǔǕǖǗǘǙǚǛǜ +Æœ +Ʊ +Vv +Ʋ +WwŴŵ +Xx +YyÃýÿŶŷŸ +Ƴƴ +ZzŹźŻżŽž +Æ +Ƶƶ +ƷǮǯ +Ƹƹ +ƺ +Þþ +Æ¿Ç· +Æ» +Ƨƨ +Ƽƽ +Æ„Æ… +ʼn +Ç€ +Ç +Ç‚ +ǃ +select group_concat(c1 order by binary c1 separator '') from t1 group by c1 collate utf16_roman_ci; +group_concat(c1 order by binary c1 separator '') +÷ +× +AaÀÃÂÃÄÅà áâãäåĀÄĂ㥹ÇÇŽÇžÇŸÇ Ç¡ÇºÇ» +AAAaaAaa +ÆæǢǣǼǽ +Bb +Æ€ +Æ +Ƃƃ +CcÃ‡Ã§Ä†Ä‡ÄˆÄ‰ÄŠÄ‹ÄŒÄ +CHChcHch +Ƈƈ +DdÄŽÄ +DZDzdZdzDŽDždžDZDzdz +ÄÄ‘ +Ɖ +ÆŠ +Æ‹ÆŒ +Ãð +EeÈÉÊËèéêëĒēĔĕĖėĘęĚě +ÆŽÇ +Æ +Æ +Ff +Æ‘Æ’ +GgÄœÄÄžÄŸÄ Ä¡Ä¢Ä£Ç¦Ç§Ç´Çµ +Ǥǥ +Æ“ +Æ” +Ƣƣ +HhĤĥ +ƕǶ +Ħħ +IJijÃŒÃÃŽÃìÃîïĨĩĪīĬÄĮįİÇÇ +IJIjiJij +IJij +ı +Æ— +Æ– +Ĵĵǰ +KkĶķǨǩ +Ƙƙ +LlĹĺĻļĽľ +Ä¿Å€ +LJLjlJlj +LJLjlj +LLLllLll +ÅÅ‚ +Æš +Æ› +Mm +NnÑñŃńŅņŇňǸǹ +NJNjnJnj +ÇŠÇ‹ÇŒ +Æ +Æž +ÅŠÅ‹ +OoÒÓÔÕÖòóôõöŌÅÅŽÅÅÅ‘Æ Æ¡Ç‘Ç’ÇªÇ«Ç¬Ç +OEOeoEoeÅ’Å“ +ØøǾǿ +Ɔ +ÆŸ +Pp +Ƥƥ +Qq +ĸ +RrŔŕŖŗŘř +RRRrrRrr +Ʀ +SsŚśŜÅŞşŠšſ +SSSssSssß +Æ© +ƪ +TtŢţŤť +ƾ +Ŧŧ +Æ« +Æ¬Æ +Æ® +ÙÚÛÜùúûüŨũŪūŬÅŮůŰűŲųƯưǓǔǕǖǗǘǙǚǛǜ +Æœ +Ʊ +UVuv +Ʋ +WwŴŵ +Xx +YyÃýÿŶŷŸ +Ƴƴ +ZzŹźŻżŽž +Æ +Ƶƶ +ƷǮǯ +Ƹƹ +ƺ +Þþ +Æ¿Ç· +Æ» +Ƨƨ +Ƽƽ +Æ„Æ… +ʼn +Ç€ +Ç +Ç‚ +ǃ +select group_concat(c1 order by binary c1 separator '') from t1 group by c1 collate utf16_esperanto_ci; +group_concat(c1 order by binary c1 separator '') +÷ +× +AaÀÃÂÃÄÅà áâãäåĀÄĂ㥹ÇÇŽÇžÇŸÇ Ç¡ÇºÇ» +AAAaaAaa +ÆæǢǣǼǽ +Bb +Æ€ +Æ +Ƃƃ +CcÃ‡Ã§Ä†Ä‡ÄŠÄ‹ÄŒÄ +CHChcHch +Ĉĉ +Ƈƈ +DdÄŽÄ +DZDzdZdzDŽDždžDZDzdz +ÄÄ‘ +Ɖ +ÆŠ +Æ‹ÆŒ +Ãð +EeÈÉÊËèéêëĒēĔĕĖėĘęĚě +ÆŽÇ +Æ +Æ +Ff +Æ‘Æ’ +GgÄžÄŸÄ Ä¡Ä¢Ä£Ç¦Ç§Ç´Çµ +ÄœÄ +Ǥǥ +Æ“ +Æ” +Ƣƣ +Hh +Ĥĥ +ƕǶ +Ħħ +IiÃŒÃÃŽÃìÃîïĨĩĪīĬÄĮįİÇÇ +IJIjiJijIJij +ı +Æ— +Æ– +JjÇ° +Ĵĵ +KkĶķǨǩ +Ƙƙ +LlĹĺĻļĽľ +Ä¿Å€ +LJLjlJljLJLjlj +LLLllLll +ÅÅ‚ +Æš +Æ› +Mm +NnÑñŃńŅņŇňǸǹ +NJNjnJnjÇŠÇ‹ÇŒ +Æ +Æž +ÅŠÅ‹ +OoÒÓÔÕÖòóôõöŌÅÅŽÅÅÅ‘Æ Æ¡Ç‘Ç’ÇªÇ«Ç¬Ç +OEOeoEoeÅ’Å“ +ØøǾǿ +Ɔ +ÆŸ +Pp +Ƥƥ +Qq +ĸ +RrŔŕŖŗŘř +RRRrrRrr +Ʀ +SsŚśŞşŠšſ +SSSssSssß +ŜŠ+Æ© +ƪ +TtŢţŤť +ƾ +Ŧŧ +Æ« +Æ¬Æ +Æ® +UuÙÚÛÜùúûüŨũŪūŮůŰűŲųƯưǓǔǕǖǗǘǙǚǛǜ +ŬŠ+Æœ +Ʊ +Vv +Ʋ +WwŴŵ +Xx +YyÃýÿŶŷŸ +Ƴƴ +ZzŹźŻżŽž +Æ +Ƶƶ +ƷǮǯ +Ƹƹ +ƺ +Þþ +Æ¿Ç· +Æ» +Ƨƨ +Ƽƽ +Æ„Æ… +ʼn +Ç€ +Ç +Ç‚ +ǃ +select group_concat(c1 order by binary c1 separator '') from t1 group by c1 collate utf16_hungarian_ci; +group_concat(c1 order by binary c1 separator '') +÷ +× +AaÀÃÂÃÄÅà áâãäåĀÄĂ㥹ÇÇŽÇžÇŸÇ Ç¡ÇºÇ» +AAAaaAaa +ÆæǢǣǼǽ +Bb +Æ€ +Æ +Ƃƃ +CcÃ‡Ã§Ä†Ä‡ÄˆÄ‰ÄŠÄ‹ÄŒÄ +CHChcHch +Ƈƈ +DdÄŽÄ +DZDzdZdzDŽDždžDZDzdz +ÄÄ‘ +Ɖ +ÆŠ +Æ‹ÆŒ +Ãð +EeÈÉÊËèéêëĒēĔĕĖėĘęĚě +ÆŽÇ +Æ +Æ +Ff +Æ‘Æ’ +GgÄœÄÄžÄŸÄ Ä¡Ä¢Ä£Ç¦Ç§Ç´Çµ +Ǥǥ +Æ“ +Æ” +Ƣƣ +HhĤĥ +ƕǶ +Ħħ +IiÃŒÃÃŽÃìÃîïĨĩĪīĬÄĮįİÇÇ +IJIjiJijIJij +ı +Æ— +Æ– +JjĴĵǰ +KkĶķǨǩ +Ƙƙ +LlĹĺĻļĽľ +Ä¿Å€ +LJLjlJljLJLjlj +LLLllLll +ÅÅ‚ +Æš +Æ› +Mm +NnÑñŃńŅņŇňǸǹ +NJNjnJnjÇŠÇ‹ÇŒ +Æ +Æž +ÅŠÅ‹ +OoÒÓÔÕòóôõŌÅÅŽÅÆ Æ¡Ç‘Ç’ÇªÇ«Ç¬Ç +OEOeoEoeÅ’Å“ +ÖöÅÅ‘ +ØøǾǿ +Ɔ +ÆŸ +Pp +Ƥƥ +Qq +ĸ +RrŔŕŖŗŘř +RRRrrRrr +Ʀ +SsŚśŜÅŞşŠšſ +SSSssSssß +Æ© +ƪ +TtŢţŤť +ƾ +Ŧŧ +Æ« +Æ¬Æ +Æ® +UuÙÚÛùúûŨũŪūŬÅŮůŲųƯưǓǔǕǖǗǘǙǚǛǜ +ÜüŰű +Æœ +Ʊ +Vv +Ʋ +WwŴŵ +Xx +YyÃýÿŶŷŸ +Ƴƴ +ZzŹźŻżŽž +Æ +Ƶƶ +ƷǮǯ +Ƹƹ +ƺ +Þþ +Æ¿Ç· +Æ» +Ƨƨ +Ƽƽ +Æ„Æ… +ʼn +Ç€ +Ç +Ç‚ +ǃ +drop table t1; +SET NAMES utf8; +CREATE TABLE t1 (c varchar(200) CHARACTER SET utf16 COLLATE utf16_general_ci NOT NULL, INDEX (c)); +INSERT INTO t1 VALUES (0x039C03C903B403B11F770308); +SELECT * FROM t1 WHERE c LIKE _utf16 0x039C0025 COLLATE utf16_general_ci; +c +Μωδαί̈ +INSERT INTO t1 VALUES (0x039C03C903B4); +SELECT * FROM t1 WHERE c LIKE _utf16 0x039C0025 +COLLATE utf16_general_ci ORDER BY c; +c +Μωδ +Μωδαί̈ +DROP TABLE t1; +CREATE TABLE t1 (c varchar(200) CHARACTER SET utf16 COLLATE utf16_unicode_ci NOT NULL, INDEX (c)); +INSERT INTO t1 VALUES (0x039C03C903B403B11F770308); +SELECT * FROM t1 WHERE c LIKE _utf16 0x039C0025 COLLATE utf16_unicode_ci; +c +Μωδαί̈ +INSERT INTO t1 VALUES (0x039C03C903B4); +SELECT * FROM t1 WHERE c LIKE _utf16 0x039C0025 +COLLATE utf16_unicode_ci ORDER BY c; +c +Μωδ +Μωδαί̈ +DROP TABLE t1; +CREATE TABLE t1 (c varchar(200) CHARACTER SET utf16 COLLATE utf16_unicode_ci NOT NULL, INDEX (c)); +INSERT INTO t1 VALUES (0x039C03C903B403B11F770308); +SELECT * FROM t1 WHERE c LIKE _utf16 0x039C0025 COLLATE utf16_unicode_ci; +c +Μωδαί̈ +INSERT INTO t1 VALUES (0x039C03C903B4); +SELECT * FROM t1 WHERE c LIKE _utf16 0x039C0025 +COLLATE utf16_unicode_ci ORDER BY c; +c +Μωδ +Μωδαί̈ +DROP TABLE t1; +SET NAMES utf8; +SET @test_character_set='utf16'; +SET @test_collation='utf16_swedish_ci'; +SET @safe_character_set_server= @@character_set_server; +SET @safe_collation_server= @@collation_server; +SET @safe_character_set_client= @@character_set_client; +SET @safe_character_set_results= @@character_set_results; +SET character_set_server= @test_character_set; +SET collation_server= @test_collation; +CREATE DATABASE d1; +USE d1; +CREATE TABLE t1 (c CHAR(10), KEY(c)); +SHOW FULL COLUMNS FROM t1; +Field Type Collation Null Key Default Extra Privileges Comment +c char(10) utf16_swedish_ci YES MUL NULL +INSERT INTO t1 VALUES ('aaa'),('aaaa'),('aaaaa'); +SELECT c as want3results FROM t1 WHERE c LIKE 'aaa%'; +want3results +aaa +aaaa +aaaaa +DROP TABLE t1; +CREATE TABLE t1 (c1 varchar(15), KEY c1 (c1(2))); +SHOW FULL COLUMNS FROM t1; +Field Type Collation Null Key Default Extra Privileges Comment +c1 varchar(15) utf16_swedish_ci YES MUL NULL +INSERT INTO t1 VALUES ('location'),('loberge'),('lotre'),('boabab'); +SELECT c1 as want3results from t1 where c1 like 'l%'; +want3results +location +loberge +lotre +SELECT c1 as want3results from t1 where c1 like 'lo%'; +want3results +location +loberge +lotre +SELECT c1 as want1result from t1 where c1 like 'loc%'; +want1result +location +SELECT c1 as want1result from t1 where c1 like 'loca%'; +want1result +location +SELECT c1 as want1result from t1 where c1 like 'locat%'; +want1result +location +SELECT c1 as want1result from t1 where c1 like 'locati%'; +want1result +location +SELECT c1 as want1result from t1 where c1 like 'locatio%'; +want1result +location +SELECT c1 as want1result from t1 where c1 like 'location%'; +want1result +location +DROP TABLE t1; +create table t1 (a set('a') not null); +insert into t1 values (),(); +Warnings: +Warning 1364 Field 'a' doesn't have a default value +select cast(a as char(1)) from t1; +cast(a as char(1)) + + +select a sounds like a from t1; +a sounds like a +1 +1 +select 1 from t1 order by cast(a as char(1)); +1 +1 +1 +drop table t1; +set names utf8; +create table t1 ( +name varchar(10), +level smallint unsigned); +show create table t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `name` varchar(10) COLLATE utf16_swedish_ci DEFAULT NULL, + `level` smallint(5) unsigned DEFAULT NULL +) ENGINE=MyISAM DEFAULT CHARSET=utf16 COLLATE=utf16_swedish_ci +insert into t1 values ('string',1); +select concat(name,space(level)), concat(name, repeat(' ',level)) from t1; +concat(name,space(level)) concat(name, repeat(' ',level)) +string string +drop table t1; +DROP DATABASE d1; +USE test; +SET character_set_server= @safe_character_set_server; +SET collation_server= @safe_collation_server; +SET character_set_client= @safe_character_set_client; +SET character_set_results= @safe_character_set_results; +SET collation_connection='utf16_unicode_ci'; +create table t1 select repeat('a',4000) a; +delete from t1; +insert into t1 values ('a'), ('a '), ('a\t'); +select collation(a),hex(a) from t1 order by a; +collation(a) hex(a) +utf16_unicode_ci 00610009 +utf16_unicode_ci 0061 +utf16_unicode_ci 00610020 +drop table t1; +select @@collation_connection; +@@collation_connection +utf16_unicode_ci +create table t1 ROW_FORMAT=DYNAMIC select repeat('a',50) as c1 ; +insert into t1 values('abcdef'); +insert into t1 values('_bcdef'); +insert into t1 values('a_cdef'); +insert into t1 values('ab_def'); +insert into t1 values('abc_ef'); +insert into t1 values('abcd_f'); +insert into t1 values('abcde_'); +select c1 as c1u from t1 where c1 like 'ab\_def'; +c1u +ab_def +select c1 as c2h from t1 where c1 like 'ab#_def' escape '#'; +c2h +ab_def +drop table t1; +End of 4.1 tests +CREATE TABLE t1 (id int, a varchar(30) character set utf16); +INSERT INTO t1 VALUES (1, 0x01310069), (2, 0x01310131); +INSERT INTO t1 VALUES (3, 0x00690069), (4, 0x01300049); +INSERT INTO t1 VALUES (5, 0x01300130), (6, 0x00490049); +SELECT a, length(a) la, @l:=lower(a) l, length(@l) ll, @u:=upper(a) u, length(@u) lu +FROM t1 ORDER BY id; +a la l ll u lu +ıi 4 ıi 4 II 4 +ıı 4 ıı 4 II 4 +ii 4 ii 4 II 4 +Ä°I 4 ii 4 Ä°I 4 +Ä°Ä° 4 ii 4 Ä°Ä° 4 +II 4 ii 4 II 4 +ALTER TABLE t1 MODIFY a VARCHAR(30) character set utf16 collate utf16_turkish_ci; +SELECT a, length(a) la, @l:=lower(a) l, length(@l) ll, @u:=upper(a) u, length(@u) lu +FROM t1 ORDER BY id; +a la l ll u lu +ıi 4 ıi 4 IÄ° 4 +ıı 4 ıı 4 II 4 +ii 4 ii 4 Ä°Ä° 4 +Ä°I 4 iı 4 Ä°I 4 +Ä°Ä° 4 ii 4 Ä°Ä° 4 +II 4 ıı 4 II 4 +DROP TABLE t1; +CREATE TABLE t1 ( +c1 text character set utf16 collate utf16_polish_ci NOT NULL +) ENGINE=MyISAM; +insert into t1 values (''),('a'); +SELECT COUNT(*), c1 FROM t1 GROUP BY c1; +COUNT(*) c1 +1 +1 a +DROP TABLE IF EXISTS t1; +set collation_connection=utf16_unicode_ci; +drop table if exists t1; +create table t1 as +select repeat(' ', 64) as s1, repeat(' ',64) as s2 +union +select null, null; +show create table t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `s1` varchar(64) CHARACTER SET utf16 COLLATE utf16_unicode_ci DEFAULT NULL, + `s2` varchar(64) CHARACTER SET utf16 COLLATE utf16_unicode_ci DEFAULT NULL +) ENGINE=MyISAM DEFAULT CHARSET=latin1 +delete from t1; +insert into t1 values('aaa','aaa'); +insert into t1 values('aaa|qqq','qqq'); +insert into t1 values('gheis','^[^a-dXYZ]+$'); +insert into t1 values('aab','^aa?b'); +insert into t1 values('Baaan','^Ba*n'); +insert into t1 values('aaa','qqq|aaa'); +insert into t1 values('qqq','qqq|aaa'); +insert into t1 values('bbb','qqq|aaa'); +insert into t1 values('bbb','qqq'); +insert into t1 values('aaa','aba'); +insert into t1 values(null,'abc'); +insert into t1 values('def',null); +insert into t1 values(null,null); +insert into t1 values('ghi','ghi['); +select HIGH_PRIORITY s1 regexp s2 from t1; +s1 regexp s2 +1 +1 +1 +1 +1 +1 +1 +0 +0 +0 +NULL +NULL +NULL +NULL +drop table t1; +# +# End of 5.5 tests +# diff --git a/mysql-test/r/ctype_utf32.result b/mysql-test/r/ctype_utf32.result new file mode 100644 index 00000000000..f0f10be9743 --- /dev/null +++ b/mysql-test/r/ctype_utf32.result @@ -0,0 +1,1052 @@ +DROP TABLE IF EXISTS t1; +# +# Start of 5.5 tests +# +SET NAMES latin1; +SET character_set_connection=utf32; +select hex('a'), hex('a '); +hex('a') hex('a ') +00000061 0000006100000020 +select 'a' = 'a', 'a' = 'a ', 'a ' = 'a'; +'a' = 'a' 'a' = 'a ' 'a ' = 'a' +1 1 1 +select 'a\0' = 'a', 'a\0' < 'a', 'a\0' > 'a'; +'a\0' = 'a' 'a\0' < 'a' 'a\0' > 'a' +0 1 0 +select 'a' = 'a\0', 'a' < 'a\0', 'a' > 'a\0'; +'a' = 'a\0' 'a' < 'a\0' 'a' > 'a\0' +0 0 1 +select 'a\0' = 'a ', 'a\0' < 'a ', 'a\0' > 'a '; +'a\0' = 'a ' 'a\0' < 'a ' 'a\0' > 'a ' +0 1 0 +select 'a ' = 'a\0', 'a ' < 'a\0', 'a ' > 'a\0'; +'a ' = 'a\0' 'a ' < 'a\0' 'a ' > 'a\0' +0 0 1 +select 'a a' > 'a', 'a \0' < 'a'; +'a a' > 'a' 'a \0' < 'a' +1 1 +select binary 'a a' > 'a', binary 'a \0' > 'a', binary 'a\0' > 'a'; +binary 'a a' > 'a' binary 'a \0' > 'a' binary 'a\0' > 'a' +1 1 1 +select hex(_utf32 0x44); +hex(_utf32 0x44) +00000044 +select hex(_utf32 0x3344); +hex(_utf32 0x3344) +00003344 +select hex(_utf32 0x103344); +hex(_utf32 0x103344) +00103344 +select hex(_utf32 X'44'); +hex(_utf32 X'44') +00000044 +select hex(_utf32 X'3344'); +hex(_utf32 X'3344') +00003344 +select hex(_utf32 X'103344'); +hex(_utf32 X'103344') +00103344 +CREATE TABLE t1 (word VARCHAR(64), word2 CHAR(64)) CHARACTER SET utf32; +INSERT INTO t1 VALUES (_koi8r 0xF2, _koi8r 0xF2), (X'2004',X'2004'); +SELECT hex(word) FROM t1 ORDER BY word; +hex(word) +00000420 +00002004 +SELECT hex(word2) FROM t1 ORDER BY word2; +hex(word2) +00000420 +00002004 +DELETE FROM t1; +INSERT INTO t1 VALUES +(X'000004200000002000000020',X'000004200000002000000020'), +(X'000020040000002000000020',X'000020040000002000000020'); +SELECT hex(word) FROM t1 ORDER BY word; +hex(word) +000004200000002000000020 +000020040000002000000020 +SELECT hex(word2) FROM t1 ORDER BY word2; +hex(word2) +00000420 +00002004 +DROP TABLE t1; +SELECT hex(LPAD(_utf32 X'0420',10,_utf32 X'0421')); +hex(LPAD(_utf32 X'0420',10,_utf32 X'0421')) +00000421000004210000042100000421000004210000042100000421000004210000042100000420 +SELECT hex(LPAD(_utf32 X'0420',10,_utf32 X'0000042100000422')); +hex(LPAD(_utf32 X'0420',10,_utf32 X'0000042100000422')) +00000421000004220000042100000422000004210000042200000421000004220000042100000420 +SELECT hex(LPAD(_utf32 X'0420',10,_utf32 X'000004210000042200000423')); +hex(LPAD(_utf32 X'0420',10,_utf32 X'000004210000042200000423')) +00000421000004220000042300000421000004220000042300000421000004220000042300000420 +SELECT hex(LPAD(_utf32 X'000004200000042100000422000004230000042400000425000004260000042700000428000004290000042A0000042B',10,_utf32 X'000004210000042200000423')); +hex(LPAD(_utf32 X'000004200000042100000422000004230000042400000425000004260000042700000428000004290000042A0000042B',10,_utf32 X'000004210000042200000423')) +00000420000004210000042200000423000004240000042500000426000004270000042800000429 +SELECT hex(RPAD(_utf32 X'0420',10,_utf32 X'0421')); +hex(RPAD(_utf32 X'0420',10,_utf32 X'0421')) +00000420000004210000042100000421000004210000042100000421000004210000042100000421 +SELECT hex(RPAD(_utf32 X'0420',10,_utf32 X'0000042100000422')); +hex(RPAD(_utf32 X'0420',10,_utf32 X'0000042100000422')) +00000420000004210000042200000421000004220000042100000422000004210000042200000421 +SELECT hex(RPAD(_utf32 X'0420',10,_utf32 X'000004210000042200000423')); +hex(RPAD(_utf32 X'0420',10,_utf32 X'000004210000042200000423')) +00000420000004210000042200000423000004210000042200000423000004210000042200000423 +SELECT hex(RPAD(_utf32 X'000004200000042100000422000004230000042400000425000004260000042700000428000004290000042A0000042B',10,_utf32 X'000004210000042200000423')); +hex(RPAD(_utf32 X'000004200000042100000422000004230000042400000425000004260000042700000428000004290000042A0000042B',10,_utf32 X'000004210000042200000423')) +00000420000004210000042200000423000004240000042500000426000004270000042800000429 +CREATE TABLE t1 SELECT +LPAD(_utf32 X'0420',10,_utf32 X'0421') l, +RPAD(_utf32 X'0420',10,_utf32 X'0421') r; +SHOW CREATE TABLE t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `l` varchar(10) CHARACTER SET utf32 NOT NULL DEFAULT '', + `r` varchar(10) CHARACTER SET utf32 NOT NULL DEFAULT '' +) ENGINE=MyISAM DEFAULT CHARSET=latin1 +select hex(l), hex(r) from t1; +hex(l) hex(r) +00000421000004210000042100000421000004210000042100000421000004210000042100000420 00000420000004210000042100000421000004210000042100000421000004210000042100000421 +DROP TABLE t1; +create table t1 (f1 char(30)); +insert into t1 values ("103000"), ("22720000"), ("3401200"), ("78000"); +select lpad(f1, 12, "-o-/") from t1; +lpad(f1, 12, "-o-/") +-o-/-o103000 +-o-/22720000 +-o-/-3401200 +-o-/-o-78000 +drop table t1; +SET NAMES latin1; +SET character_set_connection=utf32; +select @@collation_connection; +@@collation_connection +utf32_general_ci +create table t1 as select repeat(' ',10) as a union select null; +alter table t1 add key(a); +show create table t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `a` varchar(10) CHARACTER SET utf32 DEFAULT NULL, + KEY `a` (`a`) +) ENGINE=MyISAM DEFAULT CHARSET=latin1 +insert into t1 values ("a"),("abc"),("abcd"),("hello"),("test"); +explain select * from t1 where a like 'abc%'; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 range a a 43 NULL 1 Using where; Using index +explain select * from t1 where a like concat('abc','%'); +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 range a a 43 NULL 1 Using where; Using index +select * from t1 where a like "abc%"; +a +abc +abcd +select * from t1 where a like concat("abc","%"); +a +abc +abcd +select * from t1 where a like "ABC%"; +a +abc +abcd +select * from t1 where a like "test%"; +a +test +select * from t1 where a like "te_t"; +a +test +select * from t1 where a like "%a%"; +a +a +abc +abcd +select * from t1 where a like "%abcd%"; +a +abcd +select * from t1 where a like "%abc\d%"; +a +abcd +drop table t1; +select 'AA' like 'AA'; +'AA' like 'AA' +1 +select 'AA' like 'A%A'; +'AA' like 'A%A' +1 +select 'AA' like 'A%%A'; +'AA' like 'A%%A' +1 +select 'AA' like 'AA%'; +'AA' like 'AA%' +1 +select 'AA' like '%AA%'; +'AA' like '%AA%' +1 +select 'AA' like '%A'; +'AA' like '%A' +1 +select 'AA' like '%AA'; +'AA' like '%AA' +1 +select 'AA' like 'A%A%'; +'AA' like 'A%A%' +1 +select 'AA' like '_%_%'; +'AA' like '_%_%' +1 +select 'AA' like '%A%A'; +'AA' like '%A%A' +1 +select 'AAA'like 'A%A%A'; +'AAA'like 'A%A%A' +1 +select 'AZ' like 'AZ'; +'AZ' like 'AZ' +1 +select 'AZ' like 'A%Z'; +'AZ' like 'A%Z' +1 +select 'AZ' like 'A%%Z'; +'AZ' like 'A%%Z' +1 +select 'AZ' like 'AZ%'; +'AZ' like 'AZ%' +1 +select 'AZ' like '%AZ%'; +'AZ' like '%AZ%' +1 +select 'AZ' like '%Z'; +'AZ' like '%Z' +1 +select 'AZ' like '%AZ'; +'AZ' like '%AZ' +1 +select 'AZ' like 'A%Z%'; +'AZ' like 'A%Z%' +1 +select 'AZ' like '_%_%'; +'AZ' like '_%_%' +1 +select 'AZ' like '%A%Z'; +'AZ' like '%A%Z' +1 +select 'AZ' like 'A_'; +'AZ' like 'A_' +1 +select 'AZ' like '_Z'; +'AZ' like '_Z' +1 +select 'AMZ'like 'A%M%Z'; +'AMZ'like 'A%M%Z' +1 +SET NAMES utf8; +SET character_set_connection=utf32; +CREATE TABLE t1 (a VARCHAR(10) CHARACTER SET utf32); +INSERT INTO t1 VALUES ('фыва'),('Фыва'),('фЫва'),('фыВа'),('фывÐ'),('ФЫВÐ'); +INSERT INTO t1 VALUES ('фывапролдж'),('Фывапролдж'),('фЫвапролдж'),('фыВапролдж'); +INSERT INTO t1 VALUES ('фывÐпролдж'),('фываПролдж'),('фывапРолдж'),('фывапрОлдж'); +INSERT INTO t1 VALUES ('фывапроЛдж'),('фывапролДж'),('фывапролдЖ'),('ФЫВÐПРОЛДЖ'); +SELECT * FROM t1 WHERE a LIKE '%фЫва%' ORDER BY BINARY a; +a +ФЫВР+ФЫВÐПРОЛДЖ +Фыва +Фывапролдж +фЫва +фЫвапролдж +фыВа +фыВапролдж +фывР+фывÐпролдж +фыва +фываПролдж +фывапРолдж +фывапрОлдж +фывапроЛдж +фывапролДж +фывапролдЖ +фывапролдж +SELECT * FROM t1 WHERE a LIKE '%фЫв%' ORDER BY BINARY a; +a +ФЫВР+ФЫВÐПРОЛДЖ +Фыва +Фывапролдж +фЫва +фЫвапролдж +фыВа +фыВапролдж +фывР+фывÐпролдж +фыва +фываПролдж +фывапРолдж +фывапрОлдж +фывапроЛдж +фывапролДж +фывапролдЖ +фывапролдж +SELECT * FROM t1 WHERE a LIKE 'фЫва%' ORDER BY BINARY a; +a +ФЫВР+ФЫВÐПРОЛДЖ +Фыва +Фывапролдж +фЫва +фЫвапролдж +фыВа +фыВапролдж +фывР+фывÐпролдж +фыва +фываПролдж +фывапРолдж +фывапрОлдж +фывапроЛдж +фывапролДж +фывапролдЖ +фывапролдж +SELECT * FROM t1 WHERE a LIKE 'фЫва%' COLLATE utf32_bin ORDER BY BINARY a; +a +фЫва +фЫвапролдж +DROP TABLE t1; +CREATE TABLE t1 (word varchar(64) NOT NULL, PRIMARY KEY (word)) +ENGINE=MyISAM CHARACTER SET utf32; +INSERT INTO t1 (word) VALUES ("cat"); +SELECT * FROM t1 WHERE word LIKE "c%"; +word +cat +SELECT * FROM t1 WHERE word LIKE "ca_"; +word +cat +SELECT * FROM t1 WHERE word LIKE "cat"; +word +cat +SELECT * FROM t1 WHERE word LIKE _utf32 x'0000006300000025'; +word +cat +SELECT * FROM t1 WHERE word LIKE _utf32 x'00000063000000610000005F'; +word +cat +DROP TABLE t1; +select insert(_utf32 0x000000610000006200000063,10,2,_utf32 0x000000640000006500000066); +insert(_utf32 0x000000610000006200000063,10,2,_utf32 0x000000640000006500000066) +abc +select insert(_utf32 0x000000610000006200000063,1,2,_utf32 0x000000640000006500000066); +insert(_utf32 0x000000610000006200000063,1,2,_utf32 0x000000640000006500000066) +defc +SET NAMES latin1; +CREATE TABLE t1 ( +word VARCHAR(64), +bar INT(11) default 0, +PRIMARY KEY (word)) +ENGINE=MyISAM +CHARSET utf32 +COLLATE utf32_general_ci ; +INSERT INTO t1 (word) VALUES ("aar"); +INSERT INTO t1 (word) VALUES ("a"); +INSERT INTO t1 (word) VALUES ("aardvar"); +INSERT INTO t1 (word) VALUES ("aardvark"); +INSERT INTO t1 (word) VALUES ("aardvara"); +INSERT INTO t1 (word) VALUES ("aardvarz"); +EXPLAIN SELECT * FROM t1 ORDER BY word; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 6 Using filesort +SELECT * FROM t1 ORDER BY word; +word bar +a 0 +aar 0 +aardvar 0 +aardvara 0 +aardvark 0 +aardvarz 0 +EXPLAIN SELECT word FROM t1 ORDER BY word; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 index NULL PRIMARY 258 NULL 6 Using index +SELECT word FROM t1 ORDER by word; +word +a +aar +aardvar +aardvara +aardvark +aardvarz +DROP TABLE t1; +CREATE TABLE t1 ( +word VARCHAR(64) , +PRIMARY KEY (word)) +ENGINE=MyISAM +CHARSET utf32 +COLLATE utf32_general_ci; +INSERT INTO t1 (word) VALUES ("aar"); +INSERT INTO t1 (word) VALUES ("a"); +INSERT INTO t1 (word) VALUES ("aardvar"); +INSERT INTO t1 (word) VALUES ("aardvark"); +INSERT INTO t1 (word) VALUES ("aardvara"); +INSERT INTO t1 (word) VALUES ("aardvarz"); +EXPLAIN SELECT * FROM t1 ORDER BY WORD; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 index NULL PRIMARY 258 NULL 6 Using index +SELECT * FROM t1 ORDER BY word; +word +a +aar +aardvar +aardvara +aardvark +aardvarz +DROP TABLE t1; +CREATE TABLE t1 ( +word TEXT, +bar INT(11) AUTO_INCREMENT, +PRIMARY KEY (bar)) +ENGINE=MyISAM +CHARSET utf32 +COLLATE utf32_general_ci ; +INSERT INTO t1 (word) VALUES ("aar"); +INSERT INTO t1 (word) VALUES ("a" ); +INSERT INTO t1 (word) VALUES ("aardvar"); +INSERT INTO t1 (word) VALUES ("aardvark"); +INSERT INTO t1 (word) VALUES ("aardvara"); +INSERT INTO t1 (word) VALUES ("aardvarz"); +EXPLAIN SELECT * FROM t1 ORDER BY word; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 6 Using filesort +SELECT * FROM t1 ORDER BY word; +word bar +a 2 +aar 1 +aardvar 3 +aardvara 5 +aardvark 4 +aardvarz 6 +EXPLAIN SELECT word FROM t1 ORDER BY word; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 6 Using filesort +SELECT word FROM t1 ORDER BY word; +word +a +aar +aardvar +aardvara +aardvark +aardvarz +DROP TABLE t1; +SELECT hex(cast(0xAA as char character set utf32)); +hex(cast(0xAA as char character set utf32)) +000000AA +SELECT hex(convert(0xAA using utf32)); +hex(convert(0xAA using utf32)) +000000AA +CREATE TABLE t1 (a char(10) character set utf32); +INSERT INTO t1 VALUES (0x1),(0x11),(0x111),(0x1111),(0x11111); +SELECT HEX(a) FROM t1; +HEX(a) +00000001 +00000011 +00000111 +00001111 +00011111 +DROP TABLE t1; +CREATE TABLE t1 (a varchar(10) character set utf32); +INSERT INTO t1 VALUES (0x1),(0x11),(0x111),(0x1111),(0x11111); +SELECT HEX(a) FROM t1; +HEX(a) +00000001 +00000011 +00000111 +00001111 +00011111 +DROP TABLE t1; +CREATE TABLE t1 (a text character set utf32); +INSERT INTO t1 VALUES (0x1),(0x11),(0x111),(0x1111),(0x11111); +SELECT HEX(a) FROM t1; +HEX(a) +00000001 +00000011 +00000111 +00001111 +00011111 +DROP TABLE t1; +CREATE TABLE t1 (a mediumtext character set utf32); +INSERT INTO t1 VALUES (0x1),(0x11),(0x111),(0x1111),(0x11111); +SELECT HEX(a) FROM t1; +HEX(a) +00000001 +00000011 +00000111 +00001111 +00011111 +DROP TABLE t1; +CREATE TABLE t1 (a longtext character set utf32); +INSERT INTO t1 VALUES (0x1),(0x11),(0x111),(0x1111),(0x11111); +SELECT HEX(a) FROM t1; +HEX(a) +00000001 +00000011 +00000111 +00001111 +00011111 +DROP TABLE t1; +create table t1(a char(1)) default charset utf32; +insert into t1 values ('a'),('b'),('c'); +alter table t1 modify a char(5); +select a, hex(a) from t1; +a hex(a) +a 00000061 +b 00000062 +c 00000063 +drop table t1; +set @ivar= 1234; +set @str1 = 'select ?'; +set @str2 = convert(@str1 using utf32); +prepare stmt1 from @str2; +execute stmt1 using @ivar; +? +1234 +set names utf8; +create table t1 (a enum('x','y','z') character set utf32); +show create table t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `a` enum('x','y','z') CHARACTER SET utf32 DEFAULT NULL +) ENGINE=MyISAM DEFAULT CHARSET=latin1 +insert into t1 values ('x'); +insert into t1 values ('y'); +insert into t1 values ('z'); +select a, hex(a) from t1 order by a; +a hex(a) +x 00000078 +y 00000079 +z 0000007A +alter table t1 change a a enum('x','y','z','d','e','ä','ö','ü') character set utf32; +show create table t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `a` enum('x','y','z','d','e','ä','ö','ü') CHARACTER SET utf32 DEFAULT NULL +) ENGINE=MyISAM DEFAULT CHARSET=latin1 +insert into t1 values ('D'); +insert into t1 values ('E '); +insert into t1 values ('ä'); +insert into t1 values ('ö'); +insert into t1 values ('ü'); +select a, hex(a) from t1 order by a; +a hex(a) +x 00000078 +y 00000079 +z 0000007A +d 00000064 +e 00000065 +ä 000000E4 +ö 000000F6 +ü 000000FC +drop table t1; +create table t1 (a set ('x','y','z','ä','ö','ü') character set utf32); +show create table t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `a` set('x','y','z','ä','ö','ü') CHARACTER SET utf32 DEFAULT NULL +) ENGINE=MyISAM DEFAULT CHARSET=latin1 +insert into t1 values ('x'); +insert into t1 values ('y'); +insert into t1 values ('z'); +insert into t1 values ('x,y'); +insert into t1 values ('x,y,z,ä,ö,ü'); +select a, hex(a) from t1 order by a; +a hex(a) +x 00000078 +y 00000079 +x,y 000000780000002C00000079 +z 0000007A +x,y,z,ä,ö,ü 000000780000002C000000790000002C0000007A0000002C000000E40000002C000000F60000002C000000FC +drop table t1; +create table t1(a enum('a','b','c')) default character set utf32; +insert into t1 values('a'),('b'),('c'); +alter table t1 add b char(1); +show warnings; +Level Code Message +select * from t1 order by a; +a b +a NULL +b NULL +c NULL +drop table t1; +SET NAMES latin1; +SET collation_connection='utf32_general_ci'; +create table t1 select repeat('a',4000) a; +delete from t1; +insert into t1 values ('a'), ('a '), ('a\t'); +select collation(a),hex(a) from t1 order by a; +collation(a) hex(a) +utf32_general_ci 0000006100000009 +utf32_general_ci 00000061 +utf32_general_ci 0000006100000020 +drop table t1; +select @@collation_connection; +@@collation_connection +utf32_general_ci +create table t1 ROW_FORMAT=DYNAMIC select repeat('a',50) as c1 ; +insert into t1 values('abcdef'); +insert into t1 values('_bcdef'); +insert into t1 values('a_cdef'); +insert into t1 values('ab_def'); +insert into t1 values('abc_ef'); +insert into t1 values('abcd_f'); +insert into t1 values('abcde_'); +select c1 as c1u from t1 where c1 like 'ab\_def'; +c1u +ab_def +select c1 as c2h from t1 where c1 like 'ab#_def' escape '#'; +c2h +ab_def +drop table t1; +SET NAMES latin1; +SET collation_connection='utf32_bin'; +create table t1 select repeat('a',4000) a; +delete from t1; +insert into t1 values ('a'), ('a '), ('a\t'); +select collation(a),hex(a) from t1 order by a; +collation(a) hex(a) +utf32_bin 0000006100000009 +utf32_bin 00000061 +utf32_bin 0000006100000020 +drop table t1; +select @@collation_connection; +@@collation_connection +utf32_bin +create table t1 ROW_FORMAT=DYNAMIC select repeat('a',50) as c1 ; +insert into t1 values('abcdef'); +insert into t1 values('_bcdef'); +insert into t1 values('a_cdef'); +insert into t1 values('ab_def'); +insert into t1 values('abc_ef'); +insert into t1 values('abcd_f'); +insert into t1 values('abcde_'); +select c1 as c1u from t1 where c1 like 'ab\_def'; +c1u +ab_def +select c1 as c2h from t1 where c1 like 'ab#_def' escape '#'; +c2h +ab_def +drop table t1; +select hex(substr(_utf32 0x000000e4000000e500000068,1)); +hex(substr(_utf32 0x000000e4000000e500000068,1)) +000000E4000000E500000068 +select hex(substr(_utf32 0x000000e4000000e500000068,2)); +hex(substr(_utf32 0x000000e4000000e500000068,2)) +000000E500000068 +select hex(substr(_utf32 0x000000e4000000e500000068,3)); +hex(substr(_utf32 0x000000e4000000e500000068,3)) +00000068 +select hex(substr(_utf32 0x000000e4000000e500000068,-1)); +hex(substr(_utf32 0x000000e4000000e500000068,-1)) +00000068 +select hex(substr(_utf32 0x000000e4000000e500000068,-2)); +hex(substr(_utf32 0x000000e4000000e500000068,-2)) +000000E500000068 +select hex(substr(_utf32 0x000000e4000000e500000068,-3)); +hex(substr(_utf32 0x000000e4000000e500000068,-3)) +000000E4000000E500000068 +CREATE TABLE t1 ( +a varchar(250) NOT NULL default '', +KEY a (a) +) ENGINE=MyISAM DEFAULT CHARSET=utf32 COLLATE utf32_general_ci; +insert into t1 values (0x803d); +insert into t1 values (0x005b); +select hex(a) from t1; +hex(a) +0000005B +0000803D +drop table t1; +create table t1 (utext varchar(20) character set utf32); +insert into t1 values ("lily"); +insert into t1 values ("river"); +prepare stmt from 'select utext from t1 where utext like ?'; +set @param1='%%'; +execute stmt using @param1; +utext +lily +river +execute stmt using @param1; +utext +lily +river +select utext from t1 where utext like '%%'; +utext +lily +river +drop table t1; +deallocate prepare stmt; +create table t1 ( +a char(10) character set utf32 not null, +index a (a) +) engine=myisam; +insert into t1 values (repeat(0x0000201f, 10)); +insert into t1 values (repeat(0x00002020, 10)); +insert into t1 values (repeat(0x00002021, 10)); +explain select hex(a) from t1 order by a; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 index NULL a 40 NULL 3 Using index +select hex(a) from t1 order by a; +hex(a) +0000201F0000201F0000201F0000201F0000201F0000201F0000201F0000201F0000201F0000201F +00002020000020200000202000002020000020200000202000002020000020200000202000002020 +00002021000020210000202100002021000020210000202100002021000020210000202100002021 +alter table t1 drop index a; +select hex(a) from t1 order by a; +hex(a) +0000201F0000201F0000201F0000201F0000201F0000201F0000201F0000201F0000201F0000201F +00002020000020200000202000002020000020200000202000002020000020200000202000002020 +00002021000020210000202100002021000020210000202100002021000020210000202100002021 +drop table t1; +CREATE TABLE t1 ( +status enum('active','passive') character set utf32 collate utf32_general_ci +NOT NULL default 'passive' +); +SHOW CREATE TABLE t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `status` enum('active','passive') CHARACTER SET utf32 NOT NULL DEFAULT 'passive' +) ENGINE=MyISAM DEFAULT CHARSET=latin1 +ALTER TABLE t1 ADD a int NOT NULL AFTER status; +SHOW CREATE TABLE t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `status` enum('active','passive') CHARACTER SET utf32 NOT NULL DEFAULT 'passive', + `a` int(11) NOT NULL +) ENGINE=MyISAM DEFAULT CHARSET=latin1 +DROP TABLE t1; +End of 4.1 tests +CREATE TABLE t1 (a varchar(64) character set utf32, b decimal(10,3)); +INSERT INTO t1 VALUES ("1.1", 0), ("2.1", 0); +update t1 set b=a; +SELECT *, hex(a) FROM t1; +a b hex(a) +1.1 1.100 000000310000002E00000031 +2.1 2.100 000000320000002E00000031 +DROP TABLE t1; +create table t1 (utext varchar(20) character set utf32); +insert into t1 values ("lily"); +insert into t1 values ("river"); +prepare stmt from 'select utext from t1 where utext like ?'; +set @param1='%%'; +execute stmt using @param1; +utext +lily +river +execute stmt using @param1; +utext +lily +river +select utext from t1 where utext like '%%'; +utext +lily +river +drop table t1; +deallocate prepare stmt; +set names latin1; +set character_set_connection=utf32; +select soundex(''),soundex('he'),soundex('hello all folks'),soundex('#3556 in bugdb'); +soundex('') soundex('he') soundex('hello all folks') soundex('#3556 in bugdb') + H000 H4142 I51231 +select hex(soundex('')),hex(soundex('he')),hex(soundex('hello all folks')),hex(soundex('#3556 in bugdb')); +hex(soundex('')) hex(soundex('he')) hex(soundex('hello all folks')) hex(soundex('#3556 in bugdb')) + 00000048000000300000003000000030 0000004800000034000000310000003400000032 000000490000003500000031000000320000003300000031 +select 'mood' sounds like 'mud'; +'mood' sounds like 'mud' +1 +select hex(soundex(_utf32 0x000004100000041100000412)); +hex(soundex(_utf32 0x000004100000041100000412)) +00000410000000300000003000000030 +select hex(soundex(_utf32 0x000000BF000000C0)); +hex(soundex(_utf32 0x000000BF000000C0)) +000000C0000000300000003000000030 +set names latin1; +create table t1(a blob, b text charset utf32); +select data_type, character_octet_length, character_maximum_length +from information_schema.columns where table_name='t1'; +data_type character_octet_length character_maximum_length +blob 65535 65535 +text 65535 16383 +drop table t1; +set names latin1; +set collation_connection=utf32_general_ci; +select position('bb' in 'abba'); +position('bb' in 'abba') +2 +create table t1 (a varchar(10) character set utf32) engine=heap; +insert into t1 values ('a'),('A'),('b'),('B'); +select * from t1 where a='a' order by binary a; +a +A +a +select hex(min(binary a)),count(*) from t1 group by a; +hex(min(binary a)) count(*) +00000041 2 +00000042 2 +drop table t1; +select char_length('abcd'), octet_length('abcd'); +char_length('abcd') octet_length('abcd') +4 16 +select left('abcd',2); +left('abcd',2) +ab +create table t1 (a varchar(10) character set utf32); +insert into t1 values (_utf32 0x0010FFFF); +insert into t1 values (_utf32 0x00110000); +ERROR HY000: Invalid utf32 character string: '001100' +insert into t1 values (_utf32 0x00110101); +ERROR HY000: Invalid utf32 character string: '001101' +insert into t1 values (_utf32 0x01000101); +ERROR HY000: Invalid utf32 character string: '010001' +insert into t1 values (_utf32 0x11000101); +ERROR HY000: Invalid utf32 character string: '110001' +select hex(a) from t1; +hex(a) +0010FFFF +drop table t1; +create table t1 (utf32 varchar(2) character set utf32); +Wrong character with pad +insert into t1 values (0x110000); +Warnings: +Warning 1366 Incorrect string value: '\x11\x00\x00' for column 'utf32' at row 1 +Wrong chsaracter without pad +insert into t1 values (0x00110000); +Warnings: +Warning 1366 Incorrect string value: '\x00\x11\x00\x00' for column 'utf32' at row 1 +Wrong character with pad followed by another wrong character +insert into t1 values (0x11000000110000); +Warnings: +Warning 1366 Incorrect string value: '\x11\x00\x00\x00\x11\x00...' for column 'utf32' at row 1 +Good character with pad followed by bad character +insert into t1 values (0x10000000110000); +Warnings: +Warning 1366 Incorrect string value: '\x00\x11\x00\x00' for column 'utf32' at row 1 +Good character without pad followed by bad character +insert into t1 values (0x0010000000110000); +Warnings: +Warning 1366 Incorrect string value: '\x00\x11\x00\x00' for column 'utf32' at row 1 +Wrong character with the second byte higher than 0x10 +insert into t1 values (0x00800037); +Warnings: +Warning 1366 Incorrect string value: '\x00\x80\x007' for column 'utf32' at row 1 +Wrong character with pad with the second byte higher than 0x10 +insert into t1 values (0x00800037); +Warnings: +Warning 1366 Incorrect string value: '\x00\x80\x007' for column 'utf32' at row 1 +drop table t1; +select _utf32'a' collate utf32_general_ci = 0xfffd; +_utf32'a' collate utf32_general_ci = 0xfffd +0 +select hex(concat(_utf32 0x0410 collate utf32_general_ci, 0x61)); +hex(concat(_utf32 0x0410 collate utf32_general_ci, 0x61)) +0000041000000061 +create table t1 (s1 varchar(5) character set utf32); +insert into t1 values (0xfffd); +select case when s1 = 0xfffd then 1 else 0 end from t1; +case when s1 = 0xfffd then 1 else 0 end +1 +select hex(s1) from t1 where s1 = 0xfffd; +hex(s1) +0000FFFD +drop table t1; +create table t1 (a char(10)) character set utf32; +insert into t1 values ('a '); +select hex(a) from t1; +hex(a) +00000061 +drop table t1; +select upper('abcd'), lower('ABCD'); +upper('abcd') lower('ABCD') +ABCD abcd +create table t1 (a varchar(10) character set utf32); +insert into t1 values (123456); +select a, hex(a) from t1; +a hex(a) +123456 000000310000003200000033000000340000003500000036 +drop table t1; +select hex(soundex('a')); +hex(soundex('a')) +00000041000000300000003000000030 +create table t1 (a enum ('a','b','c')) character set utf32; +insert into t1 values ('1'); +select * from t1; +a +a +drop table t1; +set names latin1; +select hex(conv(convert('123' using utf32), -10, 16)); +hex(conv(convert('123' using utf32), -10, 16)) +3742 +select hex(conv(convert('123' using utf32), 10, 16)); +hex(conv(convert('123' using utf32), 10, 16)) +3742 +set names latin1; +set character_set_connection=utf32; +select 1.1 + '1.2'; +1.1 + '1.2' +2.3 +select 1.1 + '1.2xxx'; +1.1 + '1.2xxx' +2.3 +Warnings: +Warning 1292 Truncated incorrect DOUBLE value: '' +select left('aaa','1'); +left('aaa','1') +a +create table t1 (a int); +insert into t1 values ('-1234.1e2'); +insert into t1 values ('-1234.1e2xxxx'); +Warnings: +Warning 1265 Data truncated for column 'a' at row 1 +insert into t1 values ('-1234.1e2 '); +select * from t1; +a +-123410 +-123410 +-123410 +drop table t1; +create table t1 (a int); +insert into t1 values ('1 '); +insert into t1 values ('1 x'); +Warnings: +Warning 1265 Data truncated for column 'a' at row 1 +select * from t1; +a +1 +1 +drop table t1; +create table t1 (a varchar(17000) character set utf32); +Warnings: +Note 1246 Converting column 'a' from VARCHAR to TEXT +show create table t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `a` mediumtext CHARACTER SET utf32 +) ENGINE=MyISAM DEFAULT CHARSET=latin1 +drop table t1; +create table t1 (a varchar(250) character set utf32 primary key); +show create table t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `a` varchar(250) CHARACTER SET utf32 NOT NULL, + PRIMARY KEY (`a`) +) ENGINE=MyISAM DEFAULT CHARSET=latin1 +drop table t1; +create table t1 (a varchar(334) character set utf32 primary key); +ERROR 42000: Specified key was too long; max key length is 1000 bytes +create table t1 (a varchar(333) character set utf32, key(a)); +Warnings: +Warning 1071 Specified key was too long; max key length is 1000 bytes +insert into t1 values (repeat('a',333)), (repeat('b',333)); +flush tables; +check table t1; +Table Op Msg_type Msg_text +test.t1 check status OK +drop table t1; +SET collation_connection=utf32_general_ci; +CREATE TABLE t1 AS SELECT repeat('a',20) AS s1 LIMIT 0; +SET timestamp=1216359724; +INSERT INTO t1 VALUES (current_date); +INSERT INTO t1 VALUES (current_time); +INSERT INTO t1 VALUES (current_timestamp); +SELECT s1, hex(s1) FROM t1; +s1 hex(s1) +2008-07-18 000000320000003000000030000000380000002D00000030000000370000002D0000003100000038 +08:42:04 00000030000000380000003A00000034000000320000003A0000003000000034 +2008-07-18 08:42:04 000000320000003000000030000000380000002D00000030000000370000002D00000031000000380000002000000030000000380000003A00000034000000320000003A0000003000000034 +DROP TABLE t1; +SET timestamp=0; +SET NAMES latin1; +set collation_connection=utf32_general_ci; +drop table if exists t1; +create table t1 as +select repeat(' ', 64) as s1, repeat(' ',64) as s2 +union +select null, null; +show create table t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `s1` varchar(64) CHARACTER SET utf32 DEFAULT NULL, + `s2` varchar(64) CHARACTER SET utf32 DEFAULT NULL +) ENGINE=MyISAM DEFAULT CHARSET=latin1 +delete from t1; +insert into t1 values('aaa','aaa'); +insert into t1 values('aaa|qqq','qqq'); +insert into t1 values('gheis','^[^a-dXYZ]+$'); +insert into t1 values('aab','^aa?b'); +insert into t1 values('Baaan','^Ba*n'); +insert into t1 values('aaa','qqq|aaa'); +insert into t1 values('qqq','qqq|aaa'); +insert into t1 values('bbb','qqq|aaa'); +insert into t1 values('bbb','qqq'); +insert into t1 values('aaa','aba'); +insert into t1 values(null,'abc'); +insert into t1 values('def',null); +insert into t1 values(null,null); +insert into t1 values('ghi','ghi['); +select HIGH_PRIORITY s1 regexp s2 from t1; +s1 regexp s2 +1 +1 +1 +1 +1 +1 +1 +0 +0 +0 +NULL +NULL +NULL +NULL +drop table t1; +set names latin1; +select hex(char(0x01 using utf32)); +hex(char(0x01 using utf32)) +00000001 +select hex(char(0x0102 using utf32)); +hex(char(0x0102 using utf32)) +00000102 +select hex(char(0x010203 using utf32)); +hex(char(0x010203 using utf32)) +00010203 +select hex(char(0x01020304 using utf32)); +hex(char(0x01020304 using utf32)) + +Warnings: +Warning 1300 Invalid utf32 character string: '010203' +create table t1 (s1 varchar(1) character set utf32, s2 text character set utf32); +create index i on t1 (s1); +insert into t1 values (char(256 using utf32), char(256 using utf32)); +select hex(s1), hex(s2) from t1; +hex(s1) hex(s2) +00000100 00000100 +drop table t1; +SET collation_connection=utf32_general_ci; +CREATE TABLE t1 AS SELECT repeat('a',2) as s1 LIMIT 0; +SHOW CREATE TABLE t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `s1` varchar(2) CHARACTER SET utf32 NOT NULL DEFAULT '' +) ENGINE=MyISAM DEFAULT CHARSET=latin1 +INSERT INTO t1 VALUES ('ab'),('AE'),('ab'),('AE'); +SELECT * FROM t1 ORDER BY s1; +s1 +ab +ab +AE +AE +SET max_sort_length=4; +SELECT * FROM t1 ORDER BY s1; +s1 +ab +ab +AE +AE +DROP TABLE t1; +SET max_sort_length=DEFAULT; +SET NAMES latin1; +# +# End of 5.5 tests +# diff --git a/mysql-test/r/ctype_utf32_uca.result b/mysql-test/r/ctype_utf32_uca.result new file mode 100644 index 00000000000..5006009fc9c --- /dev/null +++ b/mysql-test/r/ctype_utf32_uca.result @@ -0,0 +1,2373 @@ +DROP TABLE IF EXISTS t1; +# +# Start of 5.5 tests +# +set names utf8; +set collation_connection=utf32_unicode_ci; +select hex('a'), hex('a '); +hex('a') hex('a ') +00000061 0000006100000020 +select 'a' = 'a', 'a' = 'a ', 'a ' = 'a'; +'a' = 'a' 'a' = 'a ' 'a ' = 'a' +1 1 1 +select 'a\0' = 'a', 'a\0' < 'a', 'a\0' > 'a'; +'a\0' = 'a' 'a\0' < 'a' 'a\0' > 'a' +1 0 0 +select 'a' = 'a\0', 'a' < 'a\0', 'a' > 'a\0'; +'a' = 'a\0' 'a' < 'a\0' 'a' > 'a\0' +1 0 0 +select 'a\0' = 'a ', 'a\0' < 'a ', 'a\0' > 'a '; +'a\0' = 'a ' 'a\0' < 'a ' 'a\0' > 'a ' +1 0 0 +select 'a ' = 'a\0', 'a ' < 'a\0', 'a ' > 'a\0'; +'a ' = 'a\0' 'a ' < 'a\0' 'a ' > 'a\0' +1 0 0 +select 'a a' > 'a', 'a \0' < 'a'; +'a a' > 'a' 'a \0' < 'a' +1 0 +select binary 'a a' > 'a', binary 'a \0' > 'a', binary 'a\0' > 'a'; +binary 'a a' > 'a' binary 'a \0' > 'a' binary 'a\0' > 'a' +1 1 1 +select 'c' like '\_' as want0; +want0 +0 +CREATE TABLE t ( +c char(20) NOT NULL +) ENGINE=MyISAM DEFAULT CHARACTER SET utf32 COLLATE=utf32_unicode_ci; +INSERT INTO t VALUES ('a'),('ab'),('aba'); +ALTER TABLE t ADD INDEX (c); +SELECT c FROM t WHERE c LIKE 'a%'; +c +a +ab +aba +DROP TABLE t; +create table t1 (c1 char(10) character set utf32 collate utf32_bin); +insert into t1 values ('A'),('a'); +insert into t1 values ('B'),('b'); +insert into t1 values ('C'),('c'); +insert into t1 values ('D'),('d'); +insert into t1 values ('E'),('e'); +insert into t1 values ('F'),('f'); +insert into t1 values ('G'),('g'); +insert into t1 values ('H'),('h'); +insert into t1 values ('I'),('i'); +insert into t1 values ('J'),('j'); +insert into t1 values ('K'),('k'); +insert into t1 values ('L'),('l'); +insert into t1 values ('M'),('m'); +insert into t1 values ('N'),('n'); +insert into t1 values ('O'),('o'); +insert into t1 values ('P'),('p'); +insert into t1 values ('Q'),('q'); +insert into t1 values ('R'),('r'); +insert into t1 values ('S'),('s'); +insert into t1 values ('T'),('t'); +insert into t1 values ('U'),('u'); +insert into t1 values ('V'),('v'); +insert into t1 values ('W'),('w'); +insert into t1 values ('X'),('x'); +insert into t1 values ('Y'),('y'); +insert into t1 values ('Z'),('z'); +insert into t1 values (_ucs2 0x00e0),(_ucs2 0x00c0); +insert into t1 values (_ucs2 0x00e1),(_ucs2 0x00c1); +insert into t1 values (_ucs2 0x00e2),(_ucs2 0x00c2); +insert into t1 values (_ucs2 0x00e3),(_ucs2 0x00c3); +insert into t1 values (_ucs2 0x00e4),(_ucs2 0x00c4); +insert into t1 values (_ucs2 0x00e5),(_ucs2 0x00c5); +insert into t1 values (_ucs2 0x00e6),(_ucs2 0x00c6); +insert into t1 values (_ucs2 0x00e7),(_ucs2 0x00c7); +insert into t1 values (_ucs2 0x00e8),(_ucs2 0x00c8); +insert into t1 values (_ucs2 0x00e9),(_ucs2 0x00c9); +insert into t1 values (_ucs2 0x00ea),(_ucs2 0x00ca); +insert into t1 values (_ucs2 0x00eb),(_ucs2 0x00cb); +insert into t1 values (_ucs2 0x00ec),(_ucs2 0x00cc); +insert into t1 values (_ucs2 0x00ed),(_ucs2 0x00cd); +insert into t1 values (_ucs2 0x00ee),(_ucs2 0x00ce); +insert into t1 values (_ucs2 0x00ef),(_ucs2 0x00cf); +insert into t1 values (_ucs2 0x00f0),(_ucs2 0x00d0); +insert into t1 values (_ucs2 0x00f1),(_ucs2 0x00d1); +insert into t1 values (_ucs2 0x00f2),(_ucs2 0x00d2); +insert into t1 values (_ucs2 0x00f3),(_ucs2 0x00d3); +insert into t1 values (_ucs2 0x00f4),(_ucs2 0x00d4); +insert into t1 values (_ucs2 0x00f5),(_ucs2 0x00d5); +insert into t1 values (_ucs2 0x00f6),(_ucs2 0x00d6); +insert into t1 values (_ucs2 0x00f7),(_ucs2 0x00d7); +insert into t1 values (_ucs2 0x00f8),(_ucs2 0x00d8); +insert into t1 values (_ucs2 0x00f9),(_ucs2 0x00d9); +insert into t1 values (_ucs2 0x00fa),(_ucs2 0x00da); +insert into t1 values (_ucs2 0x00fb),(_ucs2 0x00db); +insert into t1 values (_ucs2 0x00fc),(_ucs2 0x00dc); +insert into t1 values (_ucs2 0x00fd),(_ucs2 0x00dd); +insert into t1 values (_ucs2 0x00fe),(_ucs2 0x00de); +insert into t1 values (_ucs2 0x00ff),(_ucs2 0x00df); +insert into t1 values (_ucs2 0x0100),(_ucs2 0x0101),(_ucs2 0x0102),(_ucs2 0x0103); +insert into t1 values (_ucs2 0x0104),(_ucs2 0x0105),(_ucs2 0x0106),(_ucs2 0x0107); +insert into t1 values (_ucs2 0x0108),(_ucs2 0x0109),(_ucs2 0x010a),(_ucs2 0x010b); +insert into t1 values (_ucs2 0x010c),(_ucs2 0x010d),(_ucs2 0x010e),(_ucs2 0x010f); +insert into t1 values (_ucs2 0x0110),(_ucs2 0x0111),(_ucs2 0x0112),(_ucs2 0x0113); +insert into t1 values (_ucs2 0x0114),(_ucs2 0x0115),(_ucs2 0x0116),(_ucs2 0x0117); +insert into t1 values (_ucs2 0x0118),(_ucs2 0x0119),(_ucs2 0x011a),(_ucs2 0x011b); +insert into t1 values (_ucs2 0x011c),(_ucs2 0x011d),(_ucs2 0x011e),(_ucs2 0x011f); +insert into t1 values (_ucs2 0x0120),(_ucs2 0x0121),(_ucs2 0x0122),(_ucs2 0x0123); +insert into t1 values (_ucs2 0x0124),(_ucs2 0x0125),(_ucs2 0x0126),(_ucs2 0x0127); +insert into t1 values (_ucs2 0x0128),(_ucs2 0x0129),(_ucs2 0x012a),(_ucs2 0x012b); +insert into t1 values (_ucs2 0x012c),(_ucs2 0x012d),(_ucs2 0x012e),(_ucs2 0x012f); +insert into t1 values (_ucs2 0x0130),(_ucs2 0x0131),(_ucs2 0x0132),(_ucs2 0x0133); +insert into t1 values (_ucs2 0x0134),(_ucs2 0x0135),(_ucs2 0x0136),(_ucs2 0x0137); +insert into t1 values (_ucs2 0x0138),(_ucs2 0x0139),(_ucs2 0x013a),(_ucs2 0x013b); +insert into t1 values (_ucs2 0x013c),(_ucs2 0x013d),(_ucs2 0x013e),(_ucs2 0x013f); +insert into t1 values (_ucs2 0x0140),(_ucs2 0x0141),(_ucs2 0x0142),(_ucs2 0x0143); +insert into t1 values (_ucs2 0x0144),(_ucs2 0x0145),(_ucs2 0x0146),(_ucs2 0x0147); +insert into t1 values (_ucs2 0x0148),(_ucs2 0x0149),(_ucs2 0x014a),(_ucs2 0x014b); +insert into t1 values (_ucs2 0x014c),(_ucs2 0x014d),(_ucs2 0x014e),(_ucs2 0x014f); +insert into t1 values (_ucs2 0x0150),(_ucs2 0x0151),(_ucs2 0x0152),(_ucs2 0x0153); +insert into t1 values (_ucs2 0x0154),(_ucs2 0x0155),(_ucs2 0x0156),(_ucs2 0x0157); +insert into t1 values (_ucs2 0x0158),(_ucs2 0x0159),(_ucs2 0x015a),(_ucs2 0x015b); +insert into t1 values (_ucs2 0x015c),(_ucs2 0x015d),(_ucs2 0x015e),(_ucs2 0x015f); +insert into t1 values (_ucs2 0x0160),(_ucs2 0x0161),(_ucs2 0x0162),(_ucs2 0x0163); +insert into t1 values (_ucs2 0x0164),(_ucs2 0x0165),(_ucs2 0x0166),(_ucs2 0x0167); +insert into t1 values (_ucs2 0x0168),(_ucs2 0x0169),(_ucs2 0x016a),(_ucs2 0x016b); +insert into t1 values (_ucs2 0x016c),(_ucs2 0x016d),(_ucs2 0x016e),(_ucs2 0x016f); +insert into t1 values (_ucs2 0x0170),(_ucs2 0x0171),(_ucs2 0x0172),(_ucs2 0x0173); +insert into t1 values (_ucs2 0x0174),(_ucs2 0x0175),(_ucs2 0x0176),(_ucs2 0x0177); +insert into t1 values (_ucs2 0x0178),(_ucs2 0x0179),(_ucs2 0x017a),(_ucs2 0x017b); +insert into t1 values (_ucs2 0x017c),(_ucs2 0x017d),(_ucs2 0x017e),(_ucs2 0x017f); +insert into t1 values (_ucs2 0x0180),(_ucs2 0x0181),(_ucs2 0x0182),(_ucs2 0x0183); +insert into t1 values (_ucs2 0x0184),(_ucs2 0x0185),(_ucs2 0x0186),(_ucs2 0x0187); +insert into t1 values (_ucs2 0x0188),(_ucs2 0x0189),(_ucs2 0x018a),(_ucs2 0x018b); +insert into t1 values (_ucs2 0x018c),(_ucs2 0x018d),(_ucs2 0x018e),(_ucs2 0x018f); +insert into t1 values (_ucs2 0x0190),(_ucs2 0x0191),(_ucs2 0x0192),(_ucs2 0x0193); +insert into t1 values (_ucs2 0x0194),(_ucs2 0x0195),(_ucs2 0x0196),(_ucs2 0x0197); +insert into t1 values (_ucs2 0x0198),(_ucs2 0x0199),(_ucs2 0x019a),(_ucs2 0x019b); +insert into t1 values (_ucs2 0x019c),(_ucs2 0x019d),(_ucs2 0x019e),(_ucs2 0x019f); +insert into t1 values (_ucs2 0x01a0),(_ucs2 0x01a1),(_ucs2 0x01a2),(_ucs2 0x01a3); +insert into t1 values (_ucs2 0x01a4),(_ucs2 0x01a5),(_ucs2 0x01a6),(_ucs2 0x01a7); +insert into t1 values (_ucs2 0x01a8),(_ucs2 0x01a9),(_ucs2 0x01aa),(_ucs2 0x01ab); +insert into t1 values (_ucs2 0x01ac),(_ucs2 0x01ad),(_ucs2 0x01ae),(_ucs2 0x01af); +insert into t1 values (_ucs2 0x01b0),(_ucs2 0x01b1),(_ucs2 0x01b2),(_ucs2 0x01b3); +insert into t1 values (_ucs2 0x01b4),(_ucs2 0x01b5),(_ucs2 0x01b6),(_ucs2 0x01b7); +insert into t1 values (_ucs2 0x01b8),(_ucs2 0x01b9),(_ucs2 0x01ba),(_ucs2 0x01bb); +insert into t1 values (_ucs2 0x01bc),(_ucs2 0x01bd),(_ucs2 0x01be),(_ucs2 0x01bf); +insert into t1 values (_ucs2 0x01c0),(_ucs2 0x01c1),(_ucs2 0x01c2),(_ucs2 0x01c3); +insert into t1 values (_ucs2 0x01c4),(_ucs2 0x01c5),(_ucs2 0x01c6),(_ucs2 0x01c7); +insert into t1 values (_ucs2 0x01c8),(_ucs2 0x01c9),(_ucs2 0x01ca),(_ucs2 0x01cb); +insert into t1 values (_ucs2 0x01cc),(_ucs2 0x01cd),(_ucs2 0x01ce),(_ucs2 0x01cf); +insert into t1 values (_ucs2 0x01d0),(_ucs2 0x01d1),(_ucs2 0x01d2),(_ucs2 0x01d3); +insert into t1 values (_ucs2 0x01d4),(_ucs2 0x01d5),(_ucs2 0x01d6),(_ucs2 0x01d7); +insert into t1 values (_ucs2 0x01d8),(_ucs2 0x01d9),(_ucs2 0x01da),(_ucs2 0x01db); +insert into t1 values (_ucs2 0x01dc),(_ucs2 0x01dd),(_ucs2 0x01de),(_ucs2 0x01df); +insert into t1 values (_ucs2 0x01e0),(_ucs2 0x01e1),(_ucs2 0x01e2),(_ucs2 0x01e3); +insert into t1 values (_ucs2 0x01e4),(_ucs2 0x01e5),(_ucs2 0x01e6),(_ucs2 0x01e7); +insert into t1 values (_ucs2 0x01e8),(_ucs2 0x01e9),(_ucs2 0x01ea),(_ucs2 0x01eb); +insert into t1 values (_ucs2 0x01ec),(_ucs2 0x01ed),(_ucs2 0x01ee),(_ucs2 0x01ef); +insert into t1 values (_ucs2 0x01f0),(_ucs2 0x01f1),(_ucs2 0x01f2),(_ucs2 0x01f3); +insert into t1 values (_ucs2 0x01f4),(_ucs2 0x01f5),(_ucs2 0x01f6),(_ucs2 0x01f7); +insert into t1 values (_ucs2 0x01f8),(_ucs2 0x01f9),(_ucs2 0x01fa),(_ucs2 0x01fb); +insert into t1 values (_ucs2 0x01fc),(_ucs2 0x01fd),(_ucs2 0x01fe),(_ucs2 0x01ff); +insert into t1 values ('AA'),('Aa'),('aa'),('aA'); +insert into t1 values ('CH'),('Ch'),('ch'),('cH'); +insert into t1 values ('DZ'),('Dz'),('dz'),('dZ'); +insert into t1 values ('IJ'),('Ij'),('ij'),('iJ'); +insert into t1 values ('LJ'),('Lj'),('lj'),('lJ'); +insert into t1 values ('LL'),('Ll'),('ll'),('lL'); +insert into t1 values ('NJ'),('Nj'),('nj'),('nJ'); +insert into t1 values ('OE'),('Oe'),('oe'),('oE'); +insert into t1 values ('SS'),('Ss'),('ss'),('sS'); +insert into t1 values ('RR'),('Rr'),('rr'),('rR'); +select group_concat(c1 order by binary c1 separator '') from t1 group by c1 collate utf32_unicode_ci; +group_concat(c1 order by binary c1 separator '') +÷ +× +AaÀÃÂÃÄÅà áâãäåĀÄĂ㥹ÇÇŽÇžÇŸÇ Ç¡ÇºÇ» +AAAaaAaa +ÆæǢǣǼǽ +Bb +Æ€ +Æ +Ƃƃ +CcÃ‡Ã§Ä†Ä‡ÄˆÄ‰ÄŠÄ‹ÄŒÄ +CHChcHch +Ƈƈ +DdÄŽÄ +DZDzdZdzDŽDždžDZDzdz +ÄÄ‘ +Ɖ +ÆŠ +Æ‹ÆŒ +Ãð +EeÈÉÊËèéêëĒēĔĕĖėĘęĚě +ÆŽÇ +Æ +Æ +Ff +Æ‘Æ’ +GgÄœÄÄžÄŸÄ Ä¡Ä¢Ä£Ç¦Ç§Ç´Çµ +Ǥǥ +Æ“ +Æ” +Ƣƣ +HhĤĥ +ƕǶ +Ħħ +IiÃŒÃÃŽÃìÃîïĨĩĪīĬÄĮįİÇÇ +IJIjiJijIJij +ı +Æ— +Æ– +JjĴĵǰ +KkĶķǨǩ +Ƙƙ +LlĹĺĻļĽľ +Ä¿Å€ +LJLjlJljLJLjlj +LLLllLll +ÅÅ‚ +Æš +Æ› +Mm +NnÑñŃńŅņŇňǸǹ +NJNjnJnjÇŠÇ‹ÇŒ +Æ +Æž +ÅŠÅ‹ +OoÒÓÔÕÖòóôõöŌÅÅŽÅÅÅ‘Æ Æ¡Ç‘Ç’ÇªÇ«Ç¬Ç +OEOeoEoeÅ’Å“ +ØøǾǿ +Ɔ +ÆŸ +Pp +Ƥƥ +Qq +ĸ +RrŔŕŖŗŘř +RRRrrRrr +Ʀ +SsŚśŜÅŞşŠšſ +SSSssSssß +Æ© +ƪ +TtŢţŤť +ƾ +Ŧŧ +Æ« +Æ¬Æ +Æ® +UuÙÚÛÜùúûüŨũŪūŬÅŮůŰűŲųƯưǓǔǕǖǗǘǙǚǛǜ +Æœ +Ʊ +Vv +Ʋ +WwŴŵ +Xx +YyÃýÿŶŷŸ +Ƴƴ +ZzŹźŻżŽž +Æ +Ƶƶ +ƷǮǯ +Ƹƹ +ƺ +Þþ +Æ¿Ç· +Æ» +Ƨƨ +Ƽƽ +Æ„Æ… +ʼn +Ç€ +Ç +Ç‚ +ǃ +select group_concat(c1 order by binary c1 separator '') from t1 group by c1 collate utf32_icelandic_ci; +group_concat(c1 order by binary c1 separator '') +÷ +× +AaÀÂÃà âãĀÄĂ㥹ÇÇŽÇžÇŸÇ Ç¡ÇºÇ» +AAAaaAaa +Ãá +ǢǣǼǽ +Bb +Æ€ +Æ +Ƃƃ +CcÃ‡Ã§Ä†Ä‡ÄˆÄ‰ÄŠÄ‹ÄŒÄ +CHChcHch +Ƈƈ +DdÄŽÄ +DZDzdZdzDŽDždžDZDzdz +Ãð +ÄÄ‘ +Ɖ +ÆŠ +Æ‹ÆŒ +EeÈÊËèêëĒēĔĕĖėĘęĚě +Éé +ÆŽÇ +Æ +Æ +Ff +Æ‘Æ’ +GgÄœÄÄžÄŸÄ Ä¡Ä¢Ä£Ç¦Ç§Ç´Çµ +Ǥǥ +Æ“ +Æ” +Ƣƣ +HhĤĥ +ƕǶ +Ħħ +IiÌÎÃìîïĨĩĪīĬÄĮįİÇÇ +IJIjiJijIJij +Ãà +ı +Æ— +Æ– +JjĴĵǰ +KkĶķǨǩ +Ƙƙ +LlĹĺĻļĽľ +Ä¿Å€ +LJLjlJljLJLjlj +LLLllLll +ÅÅ‚ +Æš +Æ› +Mm +NnÑñŃńŅņŇňǸǹ +NJNjnJnjÇŠÇ‹ÇŒ +Æ +Æž +ÅŠÅ‹ +OoÒÔÕòôõŌÅÅŽÅÅÅ‘Æ Æ¡Ç‘Ç’ÇªÇ«Ç¬Ç +OEOeoEoeÅ’Å“ +Óó +Ǿǿ +Ɔ +ÆŸ +Pp +Ƥƥ +Qq +ĸ +RrŔŕŖŗŘř +RRRrrRrr +Ʀ +SsŚśŜÅŞşŠšſ +SSSssSssß +Æ© +ƪ +TtŢţŤť +ƾ +Ŧŧ +Æ« +Æ¬Æ +Æ® +UuÙÛÜùûüŨũŪūŬÅŮůŰűŲųƯưǓǔǕǖǗǘǙǚǛǜ +Úú +Æœ +Ʊ +Vv +Ʋ +WwŴŵ +Xx +YyÿŶŷŸ +Ãý +Ƴƴ +ZzŹźŻżŽž +Æ +Þþ +ÄÆäæ +ÖØöø +Ã…Ã¥ +Ƶƶ +ƷǮǯ +Ƹƹ +ƺ +Æ¿Ç· +Æ» +Ƨƨ +Ƽƽ +Æ„Æ… +ʼn +Ç€ +Ç +Ç‚ +ǃ +select group_concat(c1 order by binary c1 separator '') from t1 group by c1 collate utf32_latvian_ci; +group_concat(c1 order by binary c1 separator '') +÷ +× +AaÀÃÂÃÄÅà áâãäåĀÄĂ㥹ÇÇŽÇžÇŸÇ Ç¡ÇºÇ» +AAAaaAaa +ÆæǢǣǼǽ +Bb +Æ€ +Æ +Ƃƃ +CcÇçĆćĈĉĊċ +CHChcHch +ÄŒÄ +Ƈƈ +DdÄŽÄ +DZDzdZdzDŽDždžDZDzdz +ÄÄ‘ +Ɖ +ÆŠ +Æ‹ÆŒ +Ãð +EeÈÉÊËèéêëĒēĔĕĖėĘęĚě +ÆŽÇ +Æ +Æ +Ff +Æ‘Æ’ +GgÄœÄÄžÄŸÄ Ä¡Ç¦Ç§Ç´Çµ +Ģģ +Ǥǥ +Æ“ +Æ” +Ƣƣ +HhĤĥ +ƕǶ +Ħħ +IiÃŒÃÃŽÃìÃîïĨĩĪīĬÄĮįİÇÇ +IJIjiJijIJij +Yy +ı +Æ— +Æ– +JjĴĵǰ +KkǨǩ +Ķķ +Ƙƙ +LlĹ弾 +Ä¿Å€ +LJLjlJljLJLjlj +LLLllLll +Ļļ +ÅÅ‚ +Æš +Æ› +Mm +NnÑñŃńŇňǸǹ +NJNjnJnjÇŠÇ‹ÇŒ +Ņņ +Æ +Æž +ÅŠÅ‹ +OoÒÓÔÕÖòóôõöŌÅÅŽÅÅÅ‘Æ Æ¡Ç‘Ç’ÇªÇ«Ç¬Ç +OEOeoEoeÅ’Å“ +ØøǾǿ +Ɔ +ÆŸ +Pp +Ƥƥ +Qq +ĸ +RrŔŕŘř +RRRrrRrr +Å–Å— +Ʀ +SsŚśŜÅŞşſ +SSSssSssß +Å Å¡ +Æ© +ƪ +TtŢţŤť +ƾ +Ŧŧ +Æ« +Æ¬Æ +Æ® +UuÙÚÛÜùúûüŨũŪūŬÅŮůŰűŲųƯưǓǔǕǖǗǘǙǚǛǜ +Æœ +Ʊ +Vv +Ʋ +WwŴŵ +Xx +ÃýÿŶŷŸ +Ƴƴ +ZzŹźŻż +Æ +Žž +Ƶƶ +ƷǮǯ +Ƹƹ +ƺ +Þþ +Æ¿Ç· +Æ» +Ƨƨ +Ƽƽ +Æ„Æ… +ʼn +Ç€ +Ç +Ç‚ +ǃ +select group_concat(c1 order by binary c1 separator '') from t1 group by c1 collate utf32_romanian_ci; +group_concat(c1 order by binary c1 separator '') +÷ +× +AaÀÃÃÄÅà áãäåĀÄÄ„Ä…ÇÇŽÇžÇŸÇ Ç¡ÇºÇ» +AAAaaAaa +Ăă +Ââ +ÆæǢǣǼǽ +Bb +Æ€ +Æ +Ƃƃ +CcÃ‡Ã§Ä†Ä‡ÄˆÄ‰ÄŠÄ‹ÄŒÄ +CHChcHch +Ƈƈ +DdÄŽÄ +DZDzdZdzDŽDždžDZDzdz +ÄÄ‘ +Ɖ +ÆŠ +Æ‹ÆŒ +Ãð +EeÈÉÊËèéêëĒēĔĕĖėĘęĚě +ÆŽÇ +Æ +Æ +Ff +Æ‘Æ’ +GgÄœÄÄžÄŸÄ Ä¡Ä¢Ä£Ç¦Ç§Ç´Çµ +Ǥǥ +Æ“ +Æ” +Ƣƣ +HhĤĥ +ƕǶ +Ħħ +IiÃŒÃÃìÃïĨĩĪīĬÄĮįİÇÇ +IJIjiJijIJij +Îî +ı +Æ— +Æ– +JjĴĵǰ +KkĶķǨǩ +Ƙƙ +LlĹĺĻļĽľ +Ä¿Å€ +LJLjlJljLJLjlj +LLLllLll +ÅÅ‚ +Æš +Æ› +Mm +NnÑñŃńŅņŇňǸǹ +NJNjnJnjÇŠÇ‹ÇŒ +Æ +Æž +ÅŠÅ‹ +OoÒÓÔÕÖòóôõöŌÅÅŽÅÅÅ‘Æ Æ¡Ç‘Ç’ÇªÇ«Ç¬Ç +OEOeoEoeÅ’Å“ +ØøǾǿ +Ɔ +ÆŸ +Pp +Ƥƥ +Qq +ĸ +RrŔŕŖŗŘř +RRRrrRrr +Ʀ +SsŚśŜÅÅ Å¡Å¿ +SSSssSssß +Şş +Æ© +ƪ +TtŤť +ƾ +Ţţ +Ŧŧ +Æ« +Æ¬Æ +Æ® +UuÙÚÛÜùúûüŨũŪūŬÅŮůŰűŲųƯưǓǔǕǖǗǘǙǚǛǜ +Æœ +Ʊ +Vv +Ʋ +WwŴŵ +Xx +YyÃýÿŶŷŸ +Ƴƴ +ZzŹźŻżŽž +Æ +Ƶƶ +ƷǮǯ +Ƹƹ +ƺ +Þþ +Æ¿Ç· +Æ» +Ƨƨ +Ƽƽ +Æ„Æ… +ʼn +Ç€ +Ç +Ç‚ +ǃ +select group_concat(c1 order by binary c1 separator '') from t1 group by c1 collate utf32_slovenian_ci; +group_concat(c1 order by binary c1 separator '') +÷ +× +AaÀÃÂÃÄÅà áâãäåĀÄĂ㥹ÇÇŽÇžÇŸÇ Ç¡ÇºÇ» +AAAaaAaa +ÆæǢǣǼǽ +Bb +Æ€ +Æ +Ƃƃ +CcÇçĆćĈĉĊċ +CHChcHch +ÄŒÄ +Ƈƈ +DdÄŽÄ +DZDzdZdzDŽDždžDZDzdz +ÄÄ‘ +Ɖ +ÆŠ +Æ‹ÆŒ +Ãð +EeÈÉÊËèéêëĒēĔĕĖėĘęĚě +ÆŽÇ +Æ +Æ +Ff +Æ‘Æ’ +GgÄœÄÄžÄŸÄ Ä¡Ä¢Ä£Ç¦Ç§Ç´Çµ +Ǥǥ +Æ“ +Æ” +Ƣƣ +HhĤĥ +ƕǶ +Ħħ +IiÃŒÃÃŽÃìÃîïĨĩĪīĬÄĮįİÇÇ +IJIjiJijIJij +ı +Æ— +Æ– +JjĴĵǰ +KkĶķǨǩ +Ƙƙ +LlĹĺĻļĽľ +Ä¿Å€ +LJLjlJljLJLjlj +LLLllLll +ÅÅ‚ +Æš +Æ› +Mm +NnÑñŃńŅņŇňǸǹ +NJNjnJnjÇŠÇ‹ÇŒ +Æ +Æž +ÅŠÅ‹ +OoÒÓÔÕÖòóôõöŌÅÅŽÅÅÅ‘Æ Æ¡Ç‘Ç’ÇªÇ«Ç¬Ç +OEOeoEoeÅ’Å“ +ØøǾǿ +Ɔ +ÆŸ +Pp +Ƥƥ +Qq +ĸ +RrŔŕŖŗŘř +RRRrrRrr +Ʀ +SsŚśŜÅŞşſ +SSSssSssß +Å Å¡ +Æ© +ƪ +TtŢţŤť +ƾ +Ŧŧ +Æ« +Æ¬Æ +Æ® +UuÙÚÛÜùúûüŨũŪūŬÅŮůŰűŲųƯưǓǔǕǖǗǘǙǚǛǜ +Æœ +Ʊ +Vv +Ʋ +WwŴŵ +Xx +YyÃýÿŶŷŸ +Ƴƴ +ZzŹźŻż +Æ +Žž +Ƶƶ +ƷǮǯ +Ƹƹ +ƺ +Þþ +Æ¿Ç· +Æ» +Ƨƨ +Ƽƽ +Æ„Æ… +ʼn +Ç€ +Ç +Ç‚ +ǃ +select group_concat(c1 order by binary c1 separator '') from t1 group by c1 collate utf32_polish_ci; +group_concat(c1 order by binary c1 separator '') +÷ +× +AaÀÃÂÃÄÅà áâãäåĀÄĂăÇÇŽÇžÇŸÇ Ç¡ÇºÇ» +AAAaaAaa +Ä„Ä… +ÆæǢǣǼǽ +Bb +Æ€ +Æ +Ƃƃ +CcÃ‡Ã§ÄˆÄ‰ÄŠÄ‹ÄŒÄ +CHChcHch +Ćć +Ƈƈ +DdÄŽÄ +DZDzdZdzDŽDždžDZDzdz +ÄÄ‘ +Ɖ +ÆŠ +Æ‹ÆŒ +Ãð +EeÈÉÊËèéêëĒēĔĕĖėĚě +Ęę +ÆŽÇ +Æ +Æ +Ff +Æ‘Æ’ +GgÄœÄÄžÄŸÄ Ä¡Ä¢Ä£Ç¦Ç§Ç´Çµ +Ǥǥ +Æ“ +Æ” +Ƣƣ +HhĤĥ +ƕǶ +Ħħ +IiÃŒÃÃŽÃìÃîïĨĩĪīĬÄĮįİÇÇ +IJIjiJijIJij +ı +Æ— +Æ– +JjĴĵǰ +KkĶķǨǩ +Ƙƙ +LlĹĺĻļĽľ +Ä¿Å€ +LJLjlJljLJLjlj +LLLllLll +ÅÅ‚ +Æš +Æ› +Mm +NnÑñŅņŇňǸǹ +NJNjnJnjÇŠÇ‹ÇŒ +Ńń +Æ +Æž +ÅŠÅ‹ +OoÒÔÕÖòôõöŌÅÅŽÅÅÅ‘Æ Æ¡Ç‘Ç’ÇªÇ«Ç¬Ç +OEOeoEoeÅ’Å“ +Óó +ØøǾǿ +Ɔ +ÆŸ +Pp +Ƥƥ +Qq +ĸ +RrŔŕŖŗŘř +RRRrrRrr +Ʀ +SsÅœÅŞşŠšſ +SSSssSssß +Śś +Æ© +ƪ +TtŢţŤť +ƾ +Ŧŧ +Æ« +Æ¬Æ +Æ® +UuÙÚÛÜùúûüŨũŪūŬÅŮůŰűŲųƯưǓǔǕǖǗǘǙǚǛǜ +Æœ +Ʊ +Vv +Ʋ +WwŴŵ +Xx +YyÃýÿŶŷŸ +Ƴƴ +ZzŽž +Æ +Źź +Żż +Ƶƶ +ƷǮǯ +Ƹƹ +ƺ +Þþ +Æ¿Ç· +Æ» +Ƨƨ +Ƽƽ +Æ„Æ… +ʼn +Ç€ +Ç +Ç‚ +ǃ +select group_concat(c1 order by binary c1 separator '') from t1 group by c1 collate utf32_estonian_ci; +group_concat(c1 order by binary c1 separator '') +÷ +× +AaÀÃÂÃÅà áâãåĀÄĂ㥹ÇÇŽÇžÇŸÇ Ç¡ÇºÇ» +AAAaaAaa +ÆæǢǣǼǽ +Bb +Æ€ +Æ +Ƃƃ +CcÃ‡Ã§Ä†Ä‡ÄˆÄ‰ÄŠÄ‹ÄŒÄ +CHChcHch +Ƈƈ +DdÄŽÄ +DZDzdZdz +DŽDždžDZDzdz +ÄÄ‘ +Ɖ +ÆŠ +Æ‹ÆŒ +Ãð +EeÈÉÊËèéêëĒēĔĕĖėĘęĚě +ÆŽÇ +Æ +Æ +Ff +Æ‘Æ’ +GgÄœÄÄžÄŸÄ Ä¡Ä¢Ä£Ç¦Ç§Ç´Çµ +Ǥǥ +Æ“ +Æ” +Ƣƣ +HhĤĥ +ƕǶ +Ħħ +IiÃŒÃÃŽÃìÃîïĨĩĪīĬÄĮįİÇÇ +IJIjiJijIJij +ı +Æ— +Æ– +JjĴĵǰ +KkĶķǨǩ +Ƙƙ +LlĹĺĻļĽľ +Ä¿Å€ +LJLjlJljLJLjlj +LLLllLll +ÅÅ‚ +Æš +Æ› +Mm +NnÑñŃńŅņŇňǸǹ +NJNjnJnjÇŠÇ‹ÇŒ +Æ +Æž +ÅŠÅ‹ +OoÒÓÔòóôŌÅÅŽÅÅÅ‘Æ Æ¡Ç‘Ç’ÇªÇ«Ç¬Ç +OEOeoEoeÅ’Å“ +ØøǾǿ +Ɔ +ÆŸ +Pp +Ƥƥ +Qq +ĸ +RrŔŕŖŗŘř +RRRrrRrr +Ʀ +SsŚśŜÅŞşſ +SSSssSssß +Å Å¡ +Zz +Žž +Æ© +ƪ +TtŢţŤť +ƾ +Ŧŧ +Æ« +Æ¬Æ +Æ® +UuÙÚÛùúûŨũŪūŬÅŮůŰűŲųƯưǓǔǕǖǗǘǙǚǛǜ +Æœ +Ʊ +Vv +Ʋ +WwŴŵ +Õõ +Ää +Öö +Üü +Xx +YyÃýÿŶŷŸ +Ƴƴ +ŹźŻż +Æ +Ƶƶ +ƷǮǯ +Ƹƹ +ƺ +Þþ +Æ¿Ç· +Æ» +Ƨƨ +Ƽƽ +Æ„Æ… +ʼn +Ç€ +Ç +Ç‚ +ǃ +select group_concat(c1 order by binary c1 separator '') from t1 group by c1 collate utf32_spanish_ci; +group_concat(c1 order by binary c1 separator '') +÷ +× +AaÀÃÂÃÄÅà áâãäåĀÄĂ㥹ÇÇŽÇžÇŸÇ Ç¡ÇºÇ» +AAAaaAaa +ÆæǢǣǼǽ +Bb +Æ€ +Æ +Ƃƃ +CcÃ‡Ã§Ä†Ä‡ÄˆÄ‰ÄŠÄ‹ÄŒÄ +CHChcHch +Ƈƈ +DdÄŽÄ +DZDzdZdzDŽDždžDZDzdz +ÄÄ‘ +Ɖ +ÆŠ +Æ‹ÆŒ +Ãð +EeÈÉÊËèéêëĒēĔĕĖėĘęĚě +ÆŽÇ +Æ +Æ +Ff +Æ‘Æ’ +GgÄœÄÄžÄŸÄ Ä¡Ä¢Ä£Ç¦Ç§Ç´Çµ +Ǥǥ +Æ“ +Æ” +Ƣƣ +HhĤĥ +ƕǶ +Ħħ +IiÃŒÃÃŽÃìÃîïĨĩĪīĬÄĮįİÇÇ +IJIjiJijIJij +ı +Æ— +Æ– +JjĴĵǰ +KkĶķǨǩ +Ƙƙ +LlĹĺĻļĽľ +Ä¿Å€ +LJLjlJljLJLjlj +LLLllLll +ÅÅ‚ +Æš +Æ› +Mm +NnŃńŅņŇňǸǹ +NJNjnJnjÇŠÇ‹ÇŒ +Ññ +Æ +Æž +ÅŠÅ‹ +OoÒÓÔÕÖòóôõöŌÅÅŽÅÅÅ‘Æ Æ¡Ç‘Ç’ÇªÇ«Ç¬Ç +OEOeoEoeÅ’Å“ +ØøǾǿ +Ɔ +ÆŸ +Pp +Ƥƥ +Qq +ĸ +RrŔŕŖŗŘř +RRRrrRrr +Ʀ +SsŚśŜÅŞşŠšſ +SSSssSssß +Æ© +ƪ +TtŢţŤť +ƾ +Ŧŧ +Æ« +Æ¬Æ +Æ® +UuÙÚÛÜùúûüŨũŪūŬÅŮůŰűŲųƯưǓǔǕǖǗǘǙǚǛǜ +Æœ +Ʊ +Vv +Ʋ +WwŴŵ +Xx +YyÃýÿŶŷŸ +Ƴƴ +ZzŹźŻżŽž +Æ +Ƶƶ +ƷǮǯ +Ƹƹ +ƺ +Þþ +Æ¿Ç· +Æ» +Ƨƨ +Ƽƽ +Æ„Æ… +ʼn +Ç€ +Ç +Ç‚ +ǃ +select group_concat(c1 order by binary c1 separator '') from t1 group by c1 collate utf32_swedish_ci; +group_concat(c1 order by binary c1 separator '') +÷ +× +AaÀÃÂÃà áâãĀÄĂ㥹ÇÇŽÇžÇŸÇ Ç¡ÇºÇ» +AAAaaAaa +ǢǣǼǽ +Bb +Æ€ +Æ +Ƃƃ +CcÃ‡Ã§Ä†Ä‡ÄˆÄ‰ÄŠÄ‹ÄŒÄ +CHChcHch +Ƈƈ +DdÄŽÄ +DZDzdZdzDŽDždžDZDzdz +ÄÄ‘ +Ɖ +ÆŠ +Æ‹ÆŒ +Ãð +EeÈÉÊËèéêëĒēĔĕĖėĘęĚě +ÆŽÇ +Æ +Æ +Ff +Æ‘Æ’ +GgÄœÄÄžÄŸÄ Ä¡Ä¢Ä£Ç¦Ç§Ç´Çµ +Ǥǥ +Æ“ +Æ” +Ƣƣ +HhĤĥ +ƕǶ +Ħħ +IiÃŒÃÃŽÃìÃîïĨĩĪīĬÄĮįİÇÇ +IJIjiJijIJij +ı +Æ— +Æ– +JjĴĵǰ +KkĶķǨǩ +Ƙƙ +LlĹĺĻļĽľ +Ä¿Å€ +LJLjlJljLJLjlj +LLLllLll +ÅÅ‚ +Æš +Æ› +Mm +NnÑñŃńŅņŇňǸǹ +NJNjnJnjÇŠÇ‹ÇŒ +Æ +Æž +ÅŠÅ‹ +OoÒÓÔÕòóôõŌÅÅŽÅÅÅ‘Æ Æ¡Ç‘Ç’ÇªÇ«Ç¬Ç +OEOeoEoeÅ’Å“ +Ǿǿ +Ɔ +ÆŸ +Pp +Ƥƥ +Qq +ĸ +RrŔŕŖŗŘř +RRRrrRrr +Ʀ +SsŚśŜÅŞşŠšſ +SSSssSssß +Æ© +ƪ +TtŢţŤť +ƾ +Ŧŧ +Æ« +Æ¬Æ +Æ® +UuÙÚÛùúûŨũŪūŬÅŮůŰűŲųƯưǓǔǕǖǗǘǙǚǛǜ +Æœ +Ʊ +Vv +Ʋ +WwŴŵ +Xx +YyÃœÃüýÿŶŷŸ +Ƴƴ +ZzŹźŻżŽž +Æ +Ã…Ã¥ +ÄÆäæ +ÖØöø +Ƶƶ +ƷǮǯ +Ƹƹ +ƺ +Þþ +Æ¿Ç· +Æ» +Ƨƨ +Ƽƽ +Æ„Æ… +ʼn +Ç€ +Ç +Ç‚ +ǃ +select group_concat(c1 order by binary c1 separator '') from t1 group by c1 collate utf32_turkish_ci; +group_concat(c1 order by binary c1 separator '') +÷ +× +AaÀÃÂÃÄÅà áâãäåĀÄĂ㥹ÇÇŽÇžÇŸÇ Ç¡ÇºÇ» +AAAaaAaa +ÆæǢǣǼǽ +Bb +Æ€ +Æ +Ƃƃ +CcÄ†Ä‡ÄˆÄ‰ÄŠÄ‹ÄŒÄ +CHChcHch +Çç +Ƈƈ +DdÄŽÄ +DZDzdZdzDŽDždžDZDzdz +ÄÄ‘ +Ɖ +ÆŠ +Æ‹ÆŒ +Ãð +EeÈÉÊËèéêëĒēĔĕĖėĘęĚě +ÆŽÇ +Æ +Æ +Ff +Æ‘Æ’ +GgÄœÄÄ Ä¡Ä¢Ä£Ç¦Ç§Ç´Çµ +Ğğ +Ǥǥ +Æ“ +Æ” +Ƣƣ +HhĤĥ +Iı +IJIj +ƕǶ +Ħħ +iÃŒÃÃŽÃìÃîïĨĩĪīĬÄĮįİÇÇ +iJijIJij +Æ— +Æ– +JjĴĵǰ +KkĶķǨǩ +Ƙƙ +LlĹĺĻļĽľ +Ä¿Å€ +LJLjlJljLJLjlj +LLLllLll +ÅÅ‚ +Æš +Æ› +Mm +NnÑñŃńŅņŇňǸǹ +NJNjnJnjÇŠÇ‹ÇŒ +Æ +Æž +ÅŠÅ‹ +OoÒÓÔÕòóôõŌÅÅŽÅÅÅ‘Æ Æ¡Ç‘Ç’ÇªÇ«Ç¬Ç +OEOeoEoeÅ’Å“ +Öö +ØøǾǿ +Ɔ +ÆŸ +Pp +Ƥƥ +Qq +ĸ +RrŔŕŖŗŘř +RRRrrRrr +Ʀ +SsŚśŜÅÅ Å¡Å¿ +SSSssSssß +Şş +Æ© +ƪ +TtŢţŤť +ƾ +Ŧŧ +Æ« +Æ¬Æ +Æ® +UuÙÚÛùúûŨũŪūŬÅŮůŰűŲųƯưǓǔǕǖǗǘǙǚǛǜ +Üü +Æœ +Ʊ +Vv +Ʋ +WwŴŵ +Xx +YyÃýÿŶŷŸ +Ƴƴ +ZzŹźŻżŽž +Æ +Ƶƶ +ƷǮǯ +Ƹƹ +ƺ +Þþ +Æ¿Ç· +Æ» +Ƨƨ +Ƽƽ +Æ„Æ… +ʼn +Ç€ +Ç +Ç‚ +ǃ +select group_concat(c1 order by binary c1 separator '') from t1 group by c1 collate utf32_czech_ci; +group_concat(c1 order by binary c1 separator '') +÷ +× +AaÀÃÂÃÄÅà áâãäåĀÄĂ㥹ÇÇŽÇžÇŸÇ Ç¡ÇºÇ» +AAAaaAaa +ÆæǢǣǼǽ +Bb +Æ€ +Æ +Ƃƃ +CcÇçĆćĈĉĊċ +cH +ÄŒÄ +Ƈƈ +DdÄŽÄ +DZDzdZdzDŽDždžDZDzdz +ÄÄ‘ +Ɖ +ÆŠ +Æ‹ÆŒ +Ãð +EeÈÉÊËèéêëĒēĔĕĖėĘęĚě +ÆŽÇ +Æ +Æ +Ff +Æ‘Æ’ +GgÄœÄÄžÄŸÄ Ä¡Ä¢Ä£Ç¦Ç§Ç´Çµ +Ǥǥ +Æ“ +Æ” +Ƣƣ +HhĤĥ +CHChch +ƕǶ +Ħħ +IiÃŒÃÃŽÃìÃîïĨĩĪīĬÄĮįİÇÇ +IJIjiJijIJij +ı +Æ— +Æ– +JjĴĵǰ +KkĶķǨǩ +Ƙƙ +LlĹĺĻļĽľ +Ä¿Å€ +LJLjlJljLJLjlj +LLLllLll +ÅÅ‚ +Æš +Æ› +Mm +NnÑñŃńŅņŇňǸǹ +NJNjnJnjÇŠÇ‹ÇŒ +Æ +Æž +ÅŠÅ‹ +OoÒÓÔÕÖòóôõöŌÅÅŽÅÅÅ‘Æ Æ¡Ç‘Ç’ÇªÇ«Ç¬Ç +OEOeoEoeÅ’Å“ +ØøǾǿ +Ɔ +ÆŸ +Pp +Ƥƥ +Qq +ĸ +RrŔŕŖŗ +RRRrrRrr +Řř +Ʀ +SsŚśŜÅŞşſ +SSSssSssß +Å Å¡ +Æ© +ƪ +TtŢţŤť +ƾ +Ŧŧ +Æ« +Æ¬Æ +Æ® +UuÙÚÛÜùúûüŨũŪūŬÅŮůŰűŲųƯưǓǔǕǖǗǘǙǚǛǜ +Æœ +Ʊ +Vv +Ʋ +WwŴŵ +Xx +YyÃýÿŶŷŸ +Ƴƴ +ZzŹźŻż +Æ +Žž +Ƶƶ +ƷǮǯ +Ƹƹ +ƺ +Þþ +Æ¿Ç· +Æ» +Ƨƨ +Ƽƽ +Æ„Æ… +ʼn +Ç€ +Ç +Ç‚ +ǃ +select group_concat(c1 order by binary c1 separator '') from t1 group by c1 collate utf32_danish_ci; +group_concat(c1 order by binary c1 separator '') +÷ +× +AaÀÃÂÃà áâãĀÄĂ㥹ÇÇŽÇžÇŸÇ Ç¡ÇºÇ» +aA +ǢǣǼǽ +Bb +Æ€ +Æ +Ƃƃ +CcÃ‡Ã§Ä†Ä‡ÄˆÄ‰ÄŠÄ‹ÄŒÄ +CHChcHch +Ƈƈ +DdÄŽÄ +DZDzdZdzDŽDždžDZDzdz +ÄÄ‘ +Ɖ +ÆŠ +Æ‹ÆŒ +Ãð +EeÈÉÊËèéêëĒēĔĕĖėĘęĚě +ÆŽÇ +Æ +Æ +Ff +Æ‘Æ’ +GgÄœÄÄžÄŸÄ Ä¡Ä¢Ä£Ç¦Ç§Ç´Çµ +Ǥǥ +Æ“ +Æ” +Ƣƣ +HhĤĥ +ƕǶ +Ħħ +IiÃŒÃÃŽÃìÃîïĨĩĪīĬÄĮįİÇÇ +IJIjiJijIJij +ı +Æ— +Æ– +JjĴĵǰ +KkĶķǨǩ +Ƙƙ +LlĹĺĻļĽľ +Ä¿Å€ +LJLjlJljLJLjlj +LLLllLll +ÅÅ‚ +Æš +Æ› +Mm +NnÑñŃńŅņŇňǸǹ +NJNjnJnjÇŠÇ‹ÇŒ +Æ +Æž +ÅŠÅ‹ +OoÒÓÔÕòóôõŌÅÅŽÅÆ Æ¡Ç‘Ç’ÇªÇ«Ç¬Ç +OEOeoEoeÅ’Å“ +Ǿǿ +Ɔ +ÆŸ +Pp +Ƥƥ +Qq +ĸ +RrŔŕŖŗŘř +RRRrrRrr +Ʀ +SsŚśŜÅŞşŠšſ +SSSssSssß +Æ© +ƪ +TtŢţŤť +ƾ +Ŧŧ +Æ« +Æ¬Æ +Æ® +UuÙÚÛùúûŨũŪūŬÅŮůŲųƯưǓǔǕǖǗǘǙǚǛǜ +Æœ +Ʊ +Vv +Ʋ +WwŴŵ +Xx +YyÃœÃüýÿŰűŶŷŸ +Ƴƴ +ZzŹźŻżŽž +Æ +ÄÆäæ +ÖØöøÅÅ‘ +AAAaaaÃ…Ã¥ +Ƶƶ +ƷǮǯ +Ƹƹ +ƺ +Þþ +Æ¿Ç· +Æ» +Ƨƨ +Ƽƽ +Æ„Æ… +ʼn +Ç€ +Ç +Ç‚ +ǃ +select group_concat(c1 order by binary c1 separator '') from t1 group by c1 collate utf32_lithuanian_ci; +group_concat(c1 order by binary c1 separator '') +÷ +× +AaÀÃÂÃÄÅà áâãäåĀÄĂ㥹ÇÇŽÇžÇŸÇ Ç¡ÇºÇ» +AAAaaAaa +ÆæǢǣǼǽ +Bb +Æ€ +Æ +Ƃƃ +CCHChcchÇçĆćĈĉĊċ +cH +ÄŒÄ +Ƈƈ +DdÄŽÄ +DZDzdZdzDŽDždžDZDzdz +ÄÄ‘ +Ɖ +ÆŠ +Æ‹ÆŒ +Ãð +EeÈÉÊËèéêëĒēĔĕĖėĘęĚě +ÆŽÇ +Æ +Æ +Ff +Æ‘Æ’ +GgÄœÄÄžÄŸÄ Ä¡Ä¢Ä£Ç¦Ç§Ç´Çµ +Ǥǥ +Æ“ +Æ” +Ƣƣ +HhĤĥ +ƕǶ +Ħħ +IYiyÃŒÃÃŽÃìÃîïĨĩĪīĬÄĮįİÇÇ +IJIjiJijIJij +ı +Æ— +Æ– +JjĴĵǰ +KkĶķǨǩ +Ƙƙ +LlĹĺĻļĽľ +Ä¿Å€ +LJLjlJljLJLjlj +LLLllLll +ÅÅ‚ +Æš +Æ› +Mm +NnÑñŃńŅņŇňǸǹ +NJNjnJnjÇŠÇ‹ÇŒ +Æ +Æž +ÅŠÅ‹ +OoÒÓÔÕÖòóôõöŌÅÅŽÅÅÅ‘Æ Æ¡Ç‘Ç’ÇªÇ«Ç¬Ç +OEOeoEoeÅ’Å“ +ØøǾǿ +Ɔ +ÆŸ +Pp +Ƥƥ +Qq +ĸ +RrŔŕŖŗŘř +RRRrrRrr +Ʀ +SsŚśŜÅŞşſ +SSSssSssß +Å Å¡ +Æ© +ƪ +TtŢţŤť +ƾ +Ŧŧ +Æ« +Æ¬Æ +Æ® +UuÙÚÛÜùúûüŨũŪūŬÅŮůŰűŲųƯưǓǔǕǖǗǘǙǚǛǜ +Æœ +Ʊ +Vv +Ʋ +WwŴŵ +Xx +ÃýÿŶŷŸ +Ƴƴ +ZzŹźŻż +Æ +Žž +Ƶƶ +ƷǮǯ +Ƹƹ +ƺ +Þþ +Æ¿Ç· +Æ» +Ƨƨ +Ƽƽ +Æ„Æ… +ʼn +Ç€ +Ç +Ç‚ +ǃ +select group_concat(c1 order by binary c1 separator '') from t1 group by c1 collate utf32_slovak_ci; +group_concat(c1 order by binary c1 separator '') +÷ +× +AaÀÃÂÃÅà áâãåĀÄĂ㥹ÇÇŽÇžÇŸÇ Ç¡ÇºÇ» +AAAaaAaa +Ää +ÆæǢǣǼǽ +Bb +Æ€ +Æ +Ƃƃ +CcÇçĆćĈĉĊċ +cH +ÄŒÄ +Ƈƈ +DdÄŽÄ +DZDzdZdzDŽDždžDZDzdz +ÄÄ‘ +Ɖ +ÆŠ +Æ‹ÆŒ +Ãð +EeÈÉÊËèéêëĒēĔĕĖėĘęĚě +ÆŽÇ +Æ +Æ +Ff +Æ‘Æ’ +GgÄœÄÄžÄŸÄ Ä¡Ä¢Ä£Ç¦Ç§Ç´Çµ +Ǥǥ +Æ“ +Æ” +Ƣƣ +HhĤĥ +CHChch +ƕǶ +Ħħ +IiÃŒÃÃŽÃìÃîïĨĩĪīĬÄĮįİÇÇ +IJIjiJijIJij +ı +Æ— +Æ– +JjĴĵǰ +KkĶķǨǩ +Ƙƙ +LlĹĺĻļĽľ +Ä¿Å€ +LJLjlJljLJLjlj +LLLllLll +ÅÅ‚ +Æš +Æ› +Mm +NnÑñŃńŅņŇňǸǹ +NJNjnJnjÇŠÇ‹ÇŒ +Æ +Æž +ÅŠÅ‹ +OoÒÓÕÖòóõöŌÅÅŽÅÅÅ‘Æ Æ¡Ç‘Ç’ÇªÇ«Ç¬Ç +OEOeoEoeÅ’Å“ +Ôô +ØøǾǿ +Ɔ +ÆŸ +Pp +Ƥƥ +Qq +ĸ +RrŔŕŖŗŘř +RRRrrRrr +Ʀ +SsŚśŜÅŞşſ +SSSssSssß +Å Å¡ +Æ© +ƪ +TtŢţŤť +ƾ +Ŧŧ +Æ« +Æ¬Æ +Æ® +UuÙÚÛÜùúûüŨũŪūŬÅŮůŰűŲųƯưǓǔǕǖǗǘǙǚǛǜ +Æœ +Ʊ +Vv +Ʋ +WwŴŵ +Xx +YyÃýÿŶŷŸ +Ƴƴ +ZzŹźŻż +Æ +Žž +Ƶƶ +ƷǮǯ +Ƹƹ +ƺ +Þþ +Æ¿Ç· +Æ» +Ƨƨ +Ƽƽ +Æ„Æ… +ʼn +Ç€ +Ç +Ç‚ +ǃ +select group_concat(c1 order by binary c1 separator '') from t1 group by c1 collate utf32_spanish2_ci; +group_concat(c1 order by binary c1 separator '') +÷ +× +AaÀÃÂÃÄÅà áâãäåĀÄĂ㥹ÇÇŽÇžÇŸÇ Ç¡ÇºÇ» +AAAaaAaa +ÆæǢǣǼǽ +Bb +Æ€ +Æ +Ƃƃ +CcÃ‡Ã§Ä†Ä‡ÄˆÄ‰ÄŠÄ‹ÄŒÄ +cH +CHChch +Ƈƈ +DdÄŽÄ +DZDzdZdzDŽDždžDZDzdz +ÄÄ‘ +Ɖ +ÆŠ +Æ‹ÆŒ +Ãð +EeÈÉÊËèéêëĒēĔĕĖėĘęĚě +ÆŽÇ +Æ +Æ +Ff +Æ‘Æ’ +GgÄœÄÄžÄŸÄ Ä¡Ä¢Ä£Ç¦Ç§Ç´Çµ +Ǥǥ +Æ“ +Æ” +Ƣƣ +HhĤĥ +ƕǶ +Ħħ +IiÃŒÃÃŽÃìÃîïĨĩĪīĬÄĮįİÇÇ +IJIjiJijIJij +ı +Æ— +Æ– +JjĴĵǰ +KkĶķǨǩ +Ƙƙ +LlĹĺĻļĽľ +Ä¿Å€ +LJLjlJljLJLjlj +lL +LLLlll +ÅÅ‚ +Æš +Æ› +Mm +NnŃńŅņŇňǸǹ +NJNjnJnjÇŠÇ‹ÇŒ +Ññ +Æ +Æž +ÅŠÅ‹ +OoÒÓÔÕÖòóôõöŌÅÅŽÅÅÅ‘Æ Æ¡Ç‘Ç’ÇªÇ«Ç¬Ç +OEOeoEoeÅ’Å“ +ØøǾǿ +Ɔ +ÆŸ +Pp +Ƥƥ +Qq +ĸ +RrŔŕŖŗŘř +RRRrrRrr +Ʀ +SsŚśŜÅŞşŠšſ +SSSssSssß +Æ© +ƪ +TtŢţŤť +ƾ +Ŧŧ +Æ« +Æ¬Æ +Æ® +UuÙÚÛÜùúûüŨũŪūŬÅŮůŰűŲųƯưǓǔǕǖǗǘǙǚǛǜ +Æœ +Ʊ +Vv +Ʋ +WwŴŵ +Xx +YyÃýÿŶŷŸ +Ƴƴ +ZzŹźŻżŽž +Æ +Ƶƶ +ƷǮǯ +Ƹƹ +ƺ +Þþ +Æ¿Ç· +Æ» +Ƨƨ +Ƽƽ +Æ„Æ… +ʼn +Ç€ +Ç +Ç‚ +ǃ +select group_concat(c1 order by binary c1 separator '') from t1 group by c1 collate utf32_roman_ci; +group_concat(c1 order by binary c1 separator '') +÷ +× +AaÀÃÂÃÄÅà áâãäåĀÄĂ㥹ÇÇŽÇžÇŸÇ Ç¡ÇºÇ» +AAAaaAaa +ÆæǢǣǼǽ +Bb +Æ€ +Æ +Ƃƃ +CcÃ‡Ã§Ä†Ä‡ÄˆÄ‰ÄŠÄ‹ÄŒÄ +CHChcHch +Ƈƈ +DdÄŽÄ +DZDzdZdzDŽDždžDZDzdz +ÄÄ‘ +Ɖ +ÆŠ +Æ‹ÆŒ +Ãð +EeÈÉÊËèéêëĒēĔĕĖėĘęĚě +ÆŽÇ +Æ +Æ +Ff +Æ‘Æ’ +GgÄœÄÄžÄŸÄ Ä¡Ä¢Ä£Ç¦Ç§Ç´Çµ +Ǥǥ +Æ“ +Æ” +Ƣƣ +HhĤĥ +ƕǶ +Ħħ +IJijÃŒÃÃŽÃìÃîïĨĩĪīĬÄĮįİÇÇ +IJIjiJij +IJij +ı +Æ— +Æ– +Ĵĵǰ +KkĶķǨǩ +Ƙƙ +LlĹĺĻļĽľ +Ä¿Å€ +LJLjlJlj +LJLjlj +LLLllLll +ÅÅ‚ +Æš +Æ› +Mm +NnÑñŃńŅņŇňǸǹ +NJNjnJnj +ÇŠÇ‹ÇŒ +Æ +Æž +ÅŠÅ‹ +OoÒÓÔÕÖòóôõöŌÅÅŽÅÅÅ‘Æ Æ¡Ç‘Ç’ÇªÇ«Ç¬Ç +OEOeoEoeÅ’Å“ +ØøǾǿ +Ɔ +ÆŸ +Pp +Ƥƥ +Qq +ĸ +RrŔŕŖŗŘř +RRRrrRrr +Ʀ +SsŚśŜÅŞşŠšſ +SSSssSssß +Æ© +ƪ +TtŢţŤť +ƾ +Ŧŧ +Æ« +Æ¬Æ +Æ® +ÙÚÛÜùúûüŨũŪūŬÅŮůŰűŲųƯưǓǔǕǖǗǘǙǚǛǜ +Æœ +Ʊ +UVuv +Ʋ +WwŴŵ +Xx +YyÃýÿŶŷŸ +Ƴƴ +ZzŹźŻżŽž +Æ +Ƶƶ +ƷǮǯ +Ƹƹ +ƺ +Þþ +Æ¿Ç· +Æ» +Ƨƨ +Ƽƽ +Æ„Æ… +ʼn +Ç€ +Ç +Ç‚ +ǃ +select group_concat(c1 order by binary c1 separator '') from t1 group by c1 collate utf32_esperanto_ci; +group_concat(c1 order by binary c1 separator '') +÷ +× +AaÀÃÂÃÄÅà áâãäåĀÄĂ㥹ÇÇŽÇžÇŸÇ Ç¡ÇºÇ» +AAAaaAaa +ÆæǢǣǼǽ +Bb +Æ€ +Æ +Ƃƃ +CcÃ‡Ã§Ä†Ä‡ÄŠÄ‹ÄŒÄ +CHChcHch +Ĉĉ +Ƈƈ +DdÄŽÄ +DZDzdZdzDŽDždžDZDzdz +ÄÄ‘ +Ɖ +ÆŠ +Æ‹ÆŒ +Ãð +EeÈÉÊËèéêëĒēĔĕĖėĘęĚě +ÆŽÇ +Æ +Æ +Ff +Æ‘Æ’ +GgÄžÄŸÄ Ä¡Ä¢Ä£Ç¦Ç§Ç´Çµ +ÄœÄ +Ǥǥ +Æ“ +Æ” +Ƣƣ +Hh +Ĥĥ +ƕǶ +Ħħ +IiÃŒÃÃŽÃìÃîïĨĩĪīĬÄĮįİÇÇ +IJIjiJijIJij +ı +Æ— +Æ– +JjÇ° +Ĵĵ +KkĶķǨǩ +Ƙƙ +LlĹĺĻļĽľ +Ä¿Å€ +LJLjlJljLJLjlj +LLLllLll +ÅÅ‚ +Æš +Æ› +Mm +NnÑñŃńŅņŇňǸǹ +NJNjnJnjÇŠÇ‹ÇŒ +Æ +Æž +ÅŠÅ‹ +OoÒÓÔÕÖòóôõöŌÅÅŽÅÅÅ‘Æ Æ¡Ç‘Ç’ÇªÇ«Ç¬Ç +OEOeoEoeÅ’Å“ +ØøǾǿ +Ɔ +ÆŸ +Pp +Ƥƥ +Qq +ĸ +RrŔŕŖŗŘř +RRRrrRrr +Ʀ +SsŚśŞşŠšſ +SSSssSssß +ŜŠ+Æ© +ƪ +TtŢţŤť +ƾ +Ŧŧ +Æ« +Æ¬Æ +Æ® +UuÙÚÛÜùúûüŨũŪūŮůŰűŲųƯưǓǔǕǖǗǘǙǚǛǜ +ŬŠ+Æœ +Ʊ +Vv +Ʋ +WwŴŵ +Xx +YyÃýÿŶŷŸ +Ƴƴ +ZzŹźŻżŽž +Æ +Ƶƶ +ƷǮǯ +Ƹƹ +ƺ +Þþ +Æ¿Ç· +Æ» +Ƨƨ +Ƽƽ +Æ„Æ… +ʼn +Ç€ +Ç +Ç‚ +ǃ +select group_concat(c1 order by binary c1 separator '') from t1 group by c1 collate utf32_hungarian_ci; +group_concat(c1 order by binary c1 separator '') +÷ +× +AaÀÃÂÃÄÅà áâãäåĀÄĂ㥹ÇÇŽÇžÇŸÇ Ç¡ÇºÇ» +AAAaaAaa +ÆæǢǣǼǽ +Bb +Æ€ +Æ +Ƃƃ +CcÃ‡Ã§Ä†Ä‡ÄˆÄ‰ÄŠÄ‹ÄŒÄ +CHChcHch +Ƈƈ +DdÄŽÄ +DZDzdZdzDŽDždžDZDzdz +ÄÄ‘ +Ɖ +ÆŠ +Æ‹ÆŒ +Ãð +EeÈÉÊËèéêëĒēĔĕĖėĘęĚě +ÆŽÇ +Æ +Æ +Ff +Æ‘Æ’ +GgÄœÄÄžÄŸÄ Ä¡Ä¢Ä£Ç¦Ç§Ç´Çµ +Ǥǥ +Æ“ +Æ” +Ƣƣ +HhĤĥ +ƕǶ +Ħħ +IiÃŒÃÃŽÃìÃîïĨĩĪīĬÄĮįİÇÇ +IJIjiJijIJij +ı +Æ— +Æ– +JjĴĵǰ +KkĶķǨǩ +Ƙƙ +LlĹĺĻļĽľ +Ä¿Å€ +LJLjlJljLJLjlj +LLLllLll +ÅÅ‚ +Æš +Æ› +Mm +NnÑñŃńŅņŇňǸǹ +NJNjnJnjÇŠÇ‹ÇŒ +Æ +Æž +ÅŠÅ‹ +OoÒÓÔÕòóôõŌÅÅŽÅÆ Æ¡Ç‘Ç’ÇªÇ«Ç¬Ç +OEOeoEoeÅ’Å“ +ÖöÅÅ‘ +ØøǾǿ +Ɔ +ÆŸ +Pp +Ƥƥ +Qq +ĸ +RrŔŕŖŗŘř +RRRrrRrr +Ʀ +SsŚśŜÅŞşŠšſ +SSSssSssß +Æ© +ƪ +TtŢţŤť +ƾ +Ŧŧ +Æ« +Æ¬Æ +Æ® +UuÙÚÛùúûŨũŪūŬÅŮůŲųƯưǓǔǕǖǗǘǙǚǛǜ +ÜüŰű +Æœ +Ʊ +Vv +Ʋ +WwŴŵ +Xx +YyÃýÿŶŷŸ +Ƴƴ +ZzŹźŻżŽž +Æ +Ƶƶ +ƷǮǯ +Ƹƹ +ƺ +Þþ +Æ¿Ç· +Æ» +Ƨƨ +Ƽƽ +Æ„Æ… +ʼn +Ç€ +Ç +Ç‚ +ǃ +drop table t1; +SET NAMES utf8; +CREATE TABLE t1 (c varchar(200) CHARACTER SET utf32 COLLATE utf32_general_ci NOT NULL, INDEX (c)); +INSERT INTO t1 VALUES (_ucs2 0x039C03C903B403B11F770308); +SELECT * FROM t1 WHERE c LIKE _utf32 0x0000039C00000025 COLLATE utf32_general_ci; +c +Μωδαί̈ +INSERT INTO t1 VALUES (CONVERT(_ucs2 0x039C03C903B4 USING utf8)); +SELECT * FROM t1 WHERE c LIKE _utf32 0x0000039C00000025 +COLLATE utf32_general_ci ORDER BY c; +c +Μωδ +Μωδαί̈ +DROP TABLE t1; +CREATE TABLE t1 (c varchar(200) CHARACTER SET utf32 COLLATE utf32_unicode_ci NOT NULL, INDEX (c)); +INSERT INTO t1 VALUES (_ucs2 0x039C03C903B403B11F770308); +SELECT * FROM t1 WHERE c LIKE _utf32 0x0000039C00000025 COLLATE utf32_unicode_ci; +c +Μωδαί̈ +INSERT INTO t1 VALUES (_ucs2 0x039C03C903B4); +SELECT * FROM t1 WHERE c LIKE _utf32 0x0000039C00000025 +COLLATE utf32_unicode_ci ORDER BY c; +c +Μωδ +Μωδαί̈ +DROP TABLE t1; +CREATE TABLE t1 (c varchar(200) CHARACTER SET utf32 COLLATE utf32_unicode_ci NOT NULL, INDEX (c)); +INSERT INTO t1 VALUES (_ucs2 0x039C03C903B403B11F770308); +SELECT * FROM t1 WHERE c LIKE CONVERT(_ucs2 0x039C0025 USING utf32) COLLATE utf32_unicode_ci; +c +Μωδαί̈ +INSERT INTO t1 VALUES (CONVERT(_ucs2 0x039C03C903B4 USING utf8)); +SELECT * FROM t1 WHERE c LIKE CONVERT(_ucs2 0x039C0025 USING utf32) +COLLATE utf32_unicode_ci ORDER BY c; +c +Μωδ +Μωδαί̈ +DROP TABLE t1; +SET NAMES utf8; +SET @test_character_set='utf32'; +SET @test_collation='utf32_swedish_ci'; +SET @safe_character_set_server= @@character_set_server; +SET @safe_collation_server= @@collation_server; +SET @safe_character_set_client= @@character_set_client; +SET @safe_character_set_results= @@character_set_results; +SET character_set_server= @test_character_set; +SET collation_server= @test_collation; +CREATE DATABASE d1; +USE d1; +CREATE TABLE t1 (c CHAR(10), KEY(c)); +SHOW FULL COLUMNS FROM t1; +Field Type Collation Null Key Default Extra Privileges Comment +c char(10) utf32_swedish_ci YES MUL NULL +INSERT INTO t1 VALUES ('aaa'),('aaaa'),('aaaaa'); +SELECT c as want3results FROM t1 WHERE c LIKE 'aaa%'; +want3results +aaa +aaaa +aaaaa +DROP TABLE t1; +CREATE TABLE t1 (c1 varchar(15), KEY c1 (c1(2))); +SHOW FULL COLUMNS FROM t1; +Field Type Collation Null Key Default Extra Privileges Comment +c1 varchar(15) utf32_swedish_ci YES MUL NULL +INSERT INTO t1 VALUES ('location'),('loberge'),('lotre'),('boabab'); +SELECT c1 as want3results from t1 where c1 like 'l%'; +want3results +location +loberge +lotre +SELECT c1 as want3results from t1 where c1 like 'lo%'; +want3results +location +loberge +lotre +SELECT c1 as want1result from t1 where c1 like 'loc%'; +want1result +location +SELECT c1 as want1result from t1 where c1 like 'loca%'; +want1result +location +SELECT c1 as want1result from t1 where c1 like 'locat%'; +want1result +location +SELECT c1 as want1result from t1 where c1 like 'locati%'; +want1result +location +SELECT c1 as want1result from t1 where c1 like 'locatio%'; +want1result +location +SELECT c1 as want1result from t1 where c1 like 'location%'; +want1result +location +DROP TABLE t1; +create table t1 (a set('a') not null); +insert into t1 values (),(); +Warnings: +Warning 1364 Field 'a' doesn't have a default value +select cast(a as char(1)) from t1; +cast(a as char(1)) + + +select a sounds like a from t1; +a sounds like a +1 +1 +select 1 from t1 order by cast(a as char(1)); +1 +1 +1 +drop table t1; +set names utf8; +create table t1 ( +name varchar(10), +level smallint unsigned); +show create table t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `name` varchar(10) COLLATE utf32_swedish_ci DEFAULT NULL, + `level` smallint(5) unsigned DEFAULT NULL +) ENGINE=MyISAM DEFAULT CHARSET=utf32 COLLATE=utf32_swedish_ci +insert into t1 values ('string',1); +select concat(name,space(level)), concat(name, repeat(' ',level)) from t1; +concat(name,space(level)) concat(name, repeat(' ',level)) +string string +drop table t1; +DROP DATABASE d1; +USE test; +SET character_set_server= @safe_character_set_server; +SET collation_server= @safe_collation_server; +SET character_set_client= @safe_character_set_client; +SET character_set_results= @safe_character_set_results; +SET collation_connection='utf32_unicode_ci'; +create table t1 select repeat('a',4000) a; +delete from t1; +insert into t1 values ('a'), ('a '), ('a\t'); +select collation(a),hex(a) from t1 order by a; +collation(a) hex(a) +utf32_unicode_ci 0000006100000009 +utf32_unicode_ci 00000061 +utf32_unicode_ci 0000006100000020 +drop table t1; +select @@collation_connection; +@@collation_connection +utf32_unicode_ci +create table t1 ROW_FORMAT=DYNAMIC select repeat('a',50) as c1 ; +insert into t1 values('abcdef'); +insert into t1 values('_bcdef'); +insert into t1 values('a_cdef'); +insert into t1 values('ab_def'); +insert into t1 values('abc_ef'); +insert into t1 values('abcd_f'); +insert into t1 values('abcde_'); +select c1 as c1u from t1 where c1 like 'ab\_def'; +c1u +ab_def +select c1 as c2h from t1 where c1 like 'ab#_def' escape '#'; +c2h +ab_def +drop table t1; +End of 4.1 tests +CREATE TABLE t1 (id int, a varchar(30) character set utf32); +INSERT INTO t1 VALUES (1, _ucs2 0x01310069), (2, _ucs2 0x01310131); +INSERT INTO t1 VALUES (3, _ucs2 0x00690069), (4, _ucs2 0x01300049); +INSERT INTO t1 VALUES (5, _ucs2 0x01300130), (6, _ucs2 0x00490049); +SELECT a, length(a) la, @l:=lower(a) l, length(@l) ll, @u:=upper(a) u, length(@u) lu +FROM t1 ORDER BY id; +a la l ll u lu +ıi 8 ıi 8 II 8 +ıı 8 ıı 8 II 8 +ii 8 ii 8 II 8 +Ä°I 8 ii 8 Ä°I 8 +Ä°Ä° 8 ii 8 Ä°Ä° 8 +II 8 ii 8 II 8 +ALTER TABLE t1 MODIFY a VARCHAR(30) character set utf32 collate utf32_turkish_ci; +SELECT a, length(a) la, @l:=lower(a) l, length(@l) ll, @u:=upper(a) u, length(@u) lu +FROM t1 ORDER BY id; +a la l ll u lu +ıi 8 ıi 8 IÄ° 8 +ıı 8 ıı 8 II 8 +ii 8 ii 8 Ä°Ä° 8 +Ä°I 8 iı 8 Ä°I 8 +Ä°Ä° 8 ii 8 Ä°Ä° 8 +II 8 ıı 8 II 8 +DROP TABLE t1; +CREATE TABLE t1 ( +c1 text character set utf32 collate utf32_polish_ci NOT NULL +) ENGINE=MyISAM; +insert into t1 values (''),('a'); +SELECT COUNT(*), c1 FROM t1 GROUP BY c1; +COUNT(*) c1 +1 +1 a +DROP TABLE IF EXISTS t1; +set collation_connection=utf32_unicode_ci; +drop table if exists t1; +create table t1 as +select repeat(' ', 64) as s1, repeat(' ',64) as s2 +union +select null, null; +show create table t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `s1` varchar(64) CHARACTER SET utf32 COLLATE utf32_unicode_ci DEFAULT NULL, + `s2` varchar(64) CHARACTER SET utf32 COLLATE utf32_unicode_ci DEFAULT NULL +) ENGINE=MyISAM DEFAULT CHARSET=latin1 +delete from t1; +insert into t1 values('aaa','aaa'); +insert into t1 values('aaa|qqq','qqq'); +insert into t1 values('gheis','^[^a-dXYZ]+$'); +insert into t1 values('aab','^aa?b'); +insert into t1 values('Baaan','^Ba*n'); +insert into t1 values('aaa','qqq|aaa'); +insert into t1 values('qqq','qqq|aaa'); +insert into t1 values('bbb','qqq|aaa'); +insert into t1 values('bbb','qqq'); +insert into t1 values('aaa','aba'); +insert into t1 values(null,'abc'); +insert into t1 values('def',null); +insert into t1 values(null,null); +insert into t1 values('ghi','ghi['); +select HIGH_PRIORITY s1 regexp s2 from t1; +s1 regexp s2 +1 +1 +1 +1 +1 +1 +1 +0 +0 +0 +NULL +NULL +NULL +NULL +drop table t1; +# +# End of 5.5 tests +# diff --git a/mysql-test/r/ctype_utf8.result b/mysql-test/r/ctype_utf8.result index 55c32c6a1d4..03040d1676c 100644 --- a/mysql-test/r/ctype_utf8.result +++ b/mysql-test/r/ctype_utf8.result @@ -1899,6 +1899,20 @@ CONVERT(a, CHAR) CONVERT(b, CHAR) DROP TABLE t1; End of 5.0 tests Start of 5.4 tests +SET NAMES utf8mb3; +SHOW VARIABLES LIKE 'character_set_results%'; +Variable_name Value +character_set_results utf8 +CREATE TABLE t1 (a CHAR CHARACTER SET utf8mb3 COLLATE utf8mb3_bin); +SHOW CREATE TABLE t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `a` char(1) CHARACTER SET utf8 COLLATE utf8_bin DEFAULT NULL +) ENGINE=MyISAM DEFAULT CHARSET=latin1 +DROP TABLE t1; +SELECT _utf8mb3'test'; +test +test CREATE TABLE t1 ( clipid INT NOT NULL, Tape TINYTEXT, diff --git a/mysql-test/r/ctype_utf8mb4.result b/mysql-test/r/ctype_utf8mb4.result new file mode 100644 index 00000000000..5eae2c3bc1a --- /dev/null +++ b/mysql-test/r/ctype_utf8mb4.result @@ -0,0 +1,2250 @@ +drop table if exists t1,t2; +# +# Start of 5.5 tests +# +set names utf8mb4; +select left(_utf8mb4 0xD0B0D0B1D0B2,1); +left(_utf8mb4 0xD0B0D0B1D0B2,1) +а +select right(_utf8mb4 0xD0B0D0B2D0B2,1); +right(_utf8mb4 0xD0B0D0B2D0B2,1) +в +select locate('he','hello'); +locate('he','hello') +1 +select locate('he','hello',2); +locate('he','hello',2) +0 +select locate('lo','hello',2); +locate('lo','hello',2) +4 +select locate('HE','hello'); +locate('HE','hello') +1 +select locate('HE','hello',2); +locate('HE','hello',2) +0 +select locate('LO','hello',2); +locate('LO','hello',2) +4 +select locate('HE','hello' collate utf8mb4_bin); +locate('HE','hello' collate utf8mb4_bin) +0 +select locate('HE','hello' collate utf8mb4_bin,2); +locate('HE','hello' collate utf8mb4_bin,2) +0 +select locate('LO','hello' collate utf8mb4_bin,2); +locate('LO','hello' collate utf8mb4_bin,2) +0 +select locate(_utf8mb4 0xD0B1, _utf8mb4 0xD0B0D0B1D0B2); +locate(_utf8mb4 0xD0B1, _utf8mb4 0xD0B0D0B1D0B2) +2 +select locate(_utf8mb4 0xD091, _utf8mb4 0xD0B0D0B1D0B2); +locate(_utf8mb4 0xD091, _utf8mb4 0xD0B0D0B1D0B2) +2 +select locate(_utf8mb4 0xD0B1, _utf8mb4 0xD0B0D091D0B2); +locate(_utf8mb4 0xD0B1, _utf8mb4 0xD0B0D091D0B2) +2 +select locate(_utf8mb4 0xD091, _utf8mb4 0xD0B0D0B1D0B2 collate utf8mb4_bin); +locate(_utf8mb4 0xD091, _utf8mb4 0xD0B0D0B1D0B2 collate utf8mb4_bin) +0 +select locate(_utf8mb4 0xD0B1, _utf8mb4 0xD0B0D091D0B2 collate utf8mb4_bin); +locate(_utf8mb4 0xD0B1, _utf8mb4 0xD0B0D091D0B2 collate utf8mb4_bin) +0 +select length(_utf8mb4 0xD0B1), bit_length(_utf8mb4 0xD0B1), char_length(_utf8mb4 0xD0B1); +length(_utf8mb4 0xD0B1) bit_length(_utf8mb4 0xD0B1) char_length(_utf8mb4 0xD0B1) +2 16 1 +select 'a' like 'a'; +'a' like 'a' +1 +select 'A' like 'a'; +'A' like 'a' +1 +select 'A' like 'a' collate utf8mb4_bin; +'A' like 'a' collate utf8mb4_bin +0 +select _utf8mb4 0xD0B0D0B1D0B2 like concat(_utf8mb4'%',_utf8mb4 0xD0B1,_utf8mb4 '%'); +_utf8mb4 0xD0B0D0B1D0B2 like concat(_utf8mb4'%',_utf8mb4 0xD0B1,_utf8mb4 '%') +1 +select convert(_latin1'Günter André' using utf8mb4) like CONVERT(_latin1'GÜNTER%' USING utf8mb4); +convert(_latin1'G?nter Andr?' using utf8mb4) like CONVERT(_latin1'G?NTER%' USING utf8mb4) +1 +select CONVERT(_koi8r'×ÁÓÑ' USING utf8mb4) LIKE CONVERT(_koi8r'÷áóñ' USING utf8mb4); +CONVERT(_koi8r'????' USING utf8mb4) LIKE CONVERT(_koi8r'????' USING utf8mb4) +1 +select CONVERT(_koi8r'÷áóñ' USING utf8mb4) LIKE CONVERT(_koi8r'×ÁÓÑ' USING utf8mb4); +CONVERT(_koi8r'????' USING utf8mb4) LIKE CONVERT(_koi8r'????' USING utf8mb4) +1 +SELECT 'a' = 'a '; +'a' = 'a ' +1 +SELECT 'a\0' < 'a'; +'a\0' < 'a' +1 +SELECT 'a\0' < 'a '; +'a\0' < 'a ' +1 +SELECT 'a\t' < 'a'; +'a\t' < 'a' +1 +SELECT 'a\t' < 'a '; +'a\t' < 'a ' +1 +SELECT 'a' = 'a ' collate utf8mb4_bin; +'a' = 'a ' collate utf8mb4_bin +1 +SELECT 'a\0' < 'a' collate utf8mb4_bin; +'a\0' < 'a' collate utf8mb4_bin +1 +SELECT 'a\0' < 'a ' collate utf8mb4_bin; +'a\0' < 'a ' collate utf8mb4_bin +1 +SELECT 'a\t' < 'a' collate utf8mb4_bin; +'a\t' < 'a' collate utf8mb4_bin +1 +SELECT 'a\t' < 'a ' collate utf8mb4_bin; +'a\t' < 'a ' collate utf8mb4_bin +1 +CREATE TABLE t1 (a char(10) character set utf8mb4 not null); +INSERT INTO t1 VALUES ('a'),('a\0'),('a\t'),('a '); +SELECT hex(a),STRCMP(a,'a'), STRCMP(a,'a ') FROM t1; +hex(a) STRCMP(a,'a') STRCMP(a,'a ') +61 0 0 +6100 -1 -1 +6109 -1 -1 +61 0 0 +DROP TABLE t1; +select insert('txs',2,1,'hi'),insert('is ',4,0,'a'),insert('txxxxt',2,4,'es'); +insert('txs',2,1,'hi') insert('is ',4,0,'a') insert('txxxxt',2,4,'es') +this is a test +select insert("aa",100,1,"b"),insert("aa",1,3,"b"); +insert("aa",100,1,"b") insert("aa",1,3,"b") +aa b +select char_length(left(@a:='теÑÑ‚',5)), length(@a), @a; +char_length(left(@a:='теÑÑ‚',5)) length(@a) @a +4 8 теÑÑ‚ +create table t1 select date_format("2004-01-19 10:10:10", "%Y-%m-%d"); +show create table t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `date_format("2004-01-19 10:10:10", "%Y-%m-%d")` varchar(10) CHARACTER SET utf8mb4 DEFAULT NULL +) ENGINE=MyISAM DEFAULT CHARSET=latin1 +select * from t1; +date_format("2004-01-19 10:10:10", "%Y-%m-%d") +2004-01-19 +drop table t1; +set names utf8mb4; +set LC_TIME_NAMES='fr_FR'; +create table t1 (s1 char(20) character set latin1); +insert into t1 values (date_format('2004-02-02','%M')); +select hex(s1) from t1; +hex(s1) +66E97672696572 +drop table t1; +create table t1 (s1 char(20) character set koi8r); +set LC_TIME_NAMES='ru_RU'; +insert into t1 values (date_format('2004-02-02','%M')); +insert into t1 values (date_format('2004-02-02','%b')); +insert into t1 values (date_format('2004-02-02','%W')); +insert into t1 values (date_format('2004-02-02','%a')); +select hex(s1), s1 from t1; +hex(s1) s1 +E6C5D7D2C1CCD1 Ð¤ÐµÐ²Ñ€Ð°Ð»Ñ +E6C5D7 Фев +F0CFCEC5C4C5CCD8CEC9CB Понедельник +F0CEC4 Пнд +drop table t1; +set LC_TIME_NAMES='en_US'; +set names koi8r; +create table t1 (s1 char(1) character set utf8mb4); +insert into t1 values (_koi8r'ÁÂ'); +Warnings: +Warning 1265 Data truncated for column 's1' at row 1 +select s1,hex(s1),char_length(s1),octet_length(s1) from t1; +s1 hex(s1) char_length(s1) octet_length(s1) +Á D0B0 1 2 +drop table t1; +create table t1 (s1 tinytext character set utf8mb4); +insert into t1 select repeat('a',300); +Warnings: +Warning 1265 Data truncated for column 's1' at row 1 +insert into t1 select repeat('Ñ',300); +Warnings: +Warning 1265 Data truncated for column 's1' at row 1 +insert into t1 select repeat('aÑ',300); +Warnings: +Warning 1265 Data truncated for column 's1' at row 1 +insert into t1 select repeat('Ña',300); +Warnings: +Warning 1265 Data truncated for column 's1' at row 1 +insert into t1 select repeat('ÑÑ',300); +Warnings: +Warning 1265 Data truncated for column 's1' at row 1 +select hex(s1) from t1; +hex(s1) +616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161 +D18FD18FD18FD18FD18FD18FD18FD18FD18FD18FD18FD18FD18FD18FD18FD18FD18FD18FD18FD18FD18FD18FD18FD18FD18FD18FD18FD18FD18FD18FD18FD18FD18FD18FD18FD18FD18FD18FD18FD18FD18FD18FD18FD18FD18FD18FD18FD18FD18FD18FD18FD18FD18FD18FD18FD18FD18FD18FD18FD18FD18FD18FD18FD18FD18FD18FD18FD18FD18FD18FD18FD18FD18FD18FD18FD18FD18FD18FD18FD18FD18FD18FD18FD18FD18FD18FD18FD18FD18FD18FD18FD18FD18FD18FD18FD18FD18FD18FD18FD18FD18FD18FD18FD18FD18FD18FD18FD18FD18FD18FD18FD18FD18FD18FD18FD18FD18FD18FD18FD18FD18FD18FD18FD18FD18FD18FD18F +61D18F61D18F61D18F61D18F61D18F61D18F61D18F61D18F61D18F61D18F61D18F61D18F61D18F61D18F61D18F61D18F61D18F61D18F61D18F61D18F61D18F61D18F61D18F61D18F61D18F61D18F61D18F61D18F61D18F61D18F61D18F61D18F61D18F61D18F61D18F61D18F61D18F61D18F61D18F61D18F61D18F61D18F61D18F61D18F61D18F61D18F61D18F61D18F61D18F61D18F61D18F61D18F61D18F61D18F61D18F61D18F61D18F61D18F61D18F61D18F61D18F61D18F61D18F61D18F61D18F61D18F61D18F61D18F61D18F61D18F61D18F61D18F61D18F61D18F61D18F61D18F61D18F61D18F61D18F61D18F61D18F61D18F61D18F61D18F61D18F +D18F61D18F61D18F61D18F61D18F61D18F61D18F61D18F61D18F61D18F61D18F61D18F61D18F61D18F61D18F61D18F61D18F61D18F61D18F61D18F61D18F61D18F61D18F61D18F61D18F61D18F61D18F61D18F61D18F61D18F61D18F61D18F61D18F61D18F61D18F61D18F61D18F61D18F61D18F61D18F61D18F61D18F61D18F61D18F61D18F61D18F61D18F61D18F61D18F61D18F61D18F61D18F61D18F61D18F61D18F61D18F61D18F61D18F61D18F61D18F61D18F61D18F61D18F61D18F61D18F61D18F61D18F61D18F61D18F61D18F61D18F61D18F61D18F61D18F61D18F61D18F61D18F61D18F61D18F61D18F61D18F61D18F61D18F61D18F61D18F61 +D18FD18FD18FD18FD18FD18FD18FD18FD18FD18FD18FD18FD18FD18FD18FD18FD18FD18FD18FD18FD18FD18FD18FD18FD18FD18FD18FD18FD18FD18FD18FD18FD18FD18FD18FD18FD18FD18FD18FD18FD18FD18FD18FD18FD18FD18FD18FD18FD18FD18FD18FD18FD18FD18FD18FD18FD18FD18FD18FD18FD18FD18FD18FD18FD18FD18FD18FD18FD18FD18FD18FD18FD18FD18FD18FD18FD18FD18FD18FD18FD18FD18FD18FD18FD18FD18FD18FD18FD18FD18FD18FD18FD18FD18FD18FD18FD18FD18FD18FD18FD18FD18FD18FD18FD18FD18FD18FD18FD18FD18FD18FD18FD18FD18FD18FD18FD18FD18FD18FD18FD18FD18FD18FD18FD18FD18FD18F +select length(s1),char_length(s1) from t1; +length(s1) char_length(s1) +255 255 +254 127 +255 170 +255 170 +254 127 +drop table t1; +create table t1 (s1 text character set utf8mb4); +insert into t1 select repeat('a',66000); +Warnings: +Warning 1265 Data truncated for column 's1' at row 1 +insert into t1 select repeat('Ñ',66000); +Warnings: +Warning 1265 Data truncated for column 's1' at row 1 +insert into t1 select repeat('aÑ',66000); +Warnings: +Warning 1265 Data truncated for column 's1' at row 1 +insert into t1 select repeat('Ña',66000); +Warnings: +Warning 1265 Data truncated for column 's1' at row 1 +insert into t1 select repeat('ÑÑ',66000); +Warnings: +Warning 1265 Data truncated for column 's1' at row 1 +select length(s1),char_length(s1) from t1; +length(s1) char_length(s1) +65535 65535 +65534 32767 +65535 43690 +65535 43690 +65534 32767 +drop table t1; +create table t1 (s1 char(10) character set utf8mb4); +insert into t1 values (0x41FF); +Warnings: +Warning 1366 Incorrect string value: '\xFF' for column 's1' at row 1 +select hex(s1) from t1; +hex(s1) +41 +drop table t1; +create table t1 (s1 varchar(10) character set utf8mb4); +insert into t1 values (0x41FF); +Warnings: +Warning 1366 Incorrect string value: '\xFF' for column 's1' at row 1 +select hex(s1) from t1; +hex(s1) +41 +drop table t1; +create table t1 (s1 text character set utf8mb4); +insert into t1 values (0x41FF); +Warnings: +Warning 1366 Incorrect string value: '\xFF' for column 's1' at row 1 +select hex(s1) from t1; +hex(s1) +41 +drop table t1; +create table t1 (a text character set utf8mb4, primary key(a(371))); +ERROR 42000: Specified key was too long; max key length is 1000 bytes +CREATE TABLE t1 ( a varchar(10) ) CHARACTER SET utf8mb4; +INSERT INTO t1 VALUES ( 'test' ); +SELECT a.a, b.a FROM t1 a, t1 b WHERE a.a = b.a; +a a +test test +SELECT a.a, b.a FROM t1 a, t1 b WHERE a.a = 'test' and b.a = 'test'; +a a +test test +SELECT a.a, b.a FROM t1 a, t1 b WHERE a.a = b.a and a.a = 'test'; +a a +test test +DROP TABLE t1; +create table t1 (a char(255) character set utf8mb4); +insert into t1 values('b'),('b'); +select * from t1 where a = 'b'; +a +b +b +select * from t1 where a = 'b' and a = 'b'; +a +b +b +select * from t1 where a = 'b' and a != 'b'; +a +drop table t1; +set collation_connection=utf8mb4_general_ci; +drop table if exists t1; +create table t1 as +select repeat(' ', 64) as s1, repeat(' ',64) as s2 +union +select null, null; +show create table t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `s1` varchar(64) CHARACTER SET utf8mb4 DEFAULT NULL, + `s2` varchar(64) CHARACTER SET utf8mb4 DEFAULT NULL +) ENGINE=MyISAM DEFAULT CHARSET=latin1 +delete from t1; +insert into t1 values('aaa','aaa'); +insert into t1 values('aaa|qqq','qqq'); +insert into t1 values('gheis','^[^a-dXYZ]+$'); +insert into t1 values('aab','^aa?b'); +insert into t1 values('Baaan','^Ba*n'); +insert into t1 values('aaa','qqq|aaa'); +insert into t1 values('qqq','qqq|aaa'); +insert into t1 values('bbb','qqq|aaa'); +insert into t1 values('bbb','qqq'); +insert into t1 values('aaa','aba'); +insert into t1 values(null,'abc'); +insert into t1 values('def',null); +insert into t1 values(null,null); +insert into t1 values('ghi','ghi['); +select HIGH_PRIORITY s1 regexp s2 from t1; +s1 regexp s2 +1 +1 +1 +1 +1 +1 +1 +0 +0 +0 +NULL +NULL +NULL +NULL +drop table t1; +set names utf8mb4; +set names utf8mb4; +select 'ваÑÑ' rlike '[[:<:]]ваÑÑ[[:>:]]'; +'ваÑÑ' rlike '[[:<:]]ваÑÑ[[:>:]]' +1 +select 'ваÑÑ ' rlike '[[:<:]]ваÑÑ[[:>:]]'; +'ваÑÑ ' rlike '[[:<:]]ваÑÑ[[:>:]]' +1 +select ' ваÑÑ' rlike '[[:<:]]ваÑÑ[[:>:]]'; +' ваÑÑ' rlike '[[:<:]]ваÑÑ[[:>:]]' +1 +select ' ваÑÑ ' rlike '[[:<:]]ваÑÑ[[:>:]]'; +' ваÑÑ ' rlike '[[:<:]]ваÑÑ[[:>:]]' +1 +select 'ваÑÑz' rlike '[[:<:]]ваÑÑ[[:>:]]'; +'ваÑÑz' rlike '[[:<:]]ваÑÑ[[:>:]]' +0 +select 'zваÑÑ' rlike '[[:<:]]ваÑÑ[[:>:]]'; +'zваÑÑ' rlike '[[:<:]]ваÑÑ[[:>:]]' +0 +select 'zваÑÑz' rlike '[[:<:]]ваÑÑ[[:>:]]'; +'zваÑÑz' rlike '[[:<:]]ваÑÑ[[:>:]]' +0 +CREATE TABLE t1 (a enum ('Y', 'N') DEFAULT 'N' COLLATE utf8mb4_unicode_ci); +ALTER TABLE t1 ADD COLUMN b CHAR(20); +DROP TABLE t1; +set names utf8mb4; +create table t1 (a enum('aaaa','проба') character set utf8mb4); +show create table t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `a` enum('aaaa','проба') CHARACTER SET utf8mb4 DEFAULT NULL +) ENGINE=MyISAM DEFAULT CHARSET=latin1 +insert into t1 values ('проба'); +select * from t1; +a +проба +create table t2 select ifnull(a,a) from t1; +show create table t2; +Table Create Table +t2 CREATE TABLE `t2` ( + `ifnull(a,a)` varchar(5) CHARACTER SET utf8mb4 DEFAULT NULL +) ENGINE=MyISAM DEFAULT CHARSET=latin1 +select * from t2; +ifnull(a,a) +проба +drop table t1; +drop table t2; +create table t1 (c varchar(30) character set utf8mb4, unique(c(10))); +insert into t1 values ('1'),('2'),('3'),('x'),('y'),('z'); +insert into t1 values ('aaaaaaaaaa'); +insert into t1 values ('aaaaaaaaaaa'); +ERROR 23000: Duplicate entry 'aaaaaaaaaa' for key 'c' +insert into t1 values ('aaaaaaaaaaaa'); +ERROR 23000: Duplicate entry 'aaaaaaaaaa' for key 'c' +insert into t1 values (repeat('b',20)); +select c c1 from t1 where c='1'; +c1 +1 +select c c2 from t1 where c='2'; +c2 +2 +select c c3 from t1 where c='3'; +c3 +3 +select c cx from t1 where c='x'; +cx +x +select c cy from t1 where c='y'; +cy +y +select c cz from t1 where c='z'; +cz +z +select c ca10 from t1 where c='aaaaaaaaaa'; +ca10 +aaaaaaaaaa +select c cb20 from t1 where c=repeat('b',20); +cb20 +bbbbbbbbbbbbbbbbbbbb +drop table t1; +create table t1 (c varchar(30) character set utf8mb4, unique(c(10))) engine=innodb; +insert into t1 values ('1'),('2'),('3'),('x'),('y'),('z'); +insert into t1 values ('aaaaaaaaaa'); +insert into t1 values ('aaaaaaaaaaa'); +ERROR 23000: Duplicate entry 'aaaaaaaaaa' for key 'c' +insert into t1 values ('aaaaaaaaaaaa'); +ERROR 23000: Duplicate entry 'aaaaaaaaaa' for key 'c' +insert into t1 values (repeat('b',20)); +select c c1 from t1 where c='1'; +c1 +1 +select c c2 from t1 where c='2'; +c2 +2 +select c c3 from t1 where c='3'; +c3 +3 +select c cx from t1 where c='x'; +cx +x +select c cy from t1 where c='y'; +cy +y +select c cz from t1 where c='z'; +cz +z +select c ca10 from t1 where c='aaaaaaaaaa'; +ca10 +aaaaaaaaaa +select c cb20 from t1 where c=repeat('b',20); +cb20 +bbbbbbbbbbbbbbbbbbbb +drop table t1; +create table t1 (c char(3) character set utf8mb4, unique (c(2))); +insert into t1 values ('1'),('2'),('3'),('4'),('x'),('y'),('z'); +insert into t1 values ('a'); +insert into t1 values ('aa'); +insert into t1 values ('aaa'); +ERROR 23000: Duplicate entry 'aa' for key 'c' +insert into t1 values ('b'); +insert into t1 values ('bb'); +insert into t1 values ('bbb'); +ERROR 23000: Duplicate entry 'bb' for key 'c' +insert into t1 values ('а'); +insert into t1 values ('аа'); +insert into t1 values ('ааа'); +ERROR 23000: Duplicate entry 'аа' for key 'c' +insert into t1 values ('б'); +insert into t1 values ('бб'); +insert into t1 values ('ббб'); +ERROR 23000: Duplicate entry 'бб' for key 'c' +insert into t1 values ('ꪪ'); +insert into t1 values ('ꪪꪪ'); +insert into t1 values ('ꪪꪪꪪ'); +ERROR 23000: Duplicate entry 'ꪪꪪ' for key 'c' +drop table t1; +create table t1 (c char(3) character set utf8mb4, unique (c(2))) engine=innodb; +insert into t1 values ('1'),('2'),('3'),('4'),('x'),('y'),('z'); +insert into t1 values ('a'); +insert into t1 values ('aa'); +insert into t1 values ('aaa'); +ERROR 23000: Duplicate entry 'aa' for key 'c' +insert into t1 values ('b'); +insert into t1 values ('bb'); +insert into t1 values ('bbb'); +ERROR 23000: Duplicate entry 'bb' for key 'c' +insert into t1 values ('а'); +insert into t1 values ('аа'); +insert into t1 values ('ааа'); +ERROR 23000: Duplicate entry 'аа' for key 'c' +insert into t1 values ('б'); +insert into t1 values ('бб'); +insert into t1 values ('ббб'); +ERROR 23000: Duplicate entry 'бб' for key 'c' +insert into t1 values ('ꪪ'); +insert into t1 values ('ꪪꪪ'); +insert into t1 values ('ꪪꪪꪪ'); +ERROR 23000: Duplicate entry 'ꪪꪪ' for key 'c' +drop table t1; +create table t1 ( +c char(10) character set utf8mb4, +unique key a using hash (c(1)) +) engine=heap; +show create table t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `c` char(10) CHARACTER SET utf8mb4 DEFAULT NULL, + UNIQUE KEY `a` (`c`(1)) USING HASH +) ENGINE=MEMORY DEFAULT CHARSET=latin1 +insert into t1 values ('a'),('b'),('c'),('d'),('e'),('f'); +insert into t1 values ('aa'); +ERROR 23000: Duplicate entry 'a' for key 'a' +insert into t1 values ('aaa'); +ERROR 23000: Duplicate entry 'a' for key 'a' +insert into t1 values ('б'); +insert into t1 values ('бб'); +ERROR 23000: Duplicate entry 'б' for key 'a' +insert into t1 values ('ббб'); +ERROR 23000: Duplicate entry 'б' for key 'a' +select c as c_all from t1 order by c; +c_all +a +b +c +d +e +f +б +select c as c_a from t1 where c='a'; +c_a +a +select c as c_a from t1 where c='б'; +c_a +б +drop table t1; +create table t1 ( +c char(10) character set utf8mb4, +unique key a using btree (c(1)) +) engine=heap; +show create table t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `c` char(10) CHARACTER SET utf8mb4 DEFAULT NULL, + UNIQUE KEY `a` (`c`(1)) USING BTREE +) ENGINE=MEMORY DEFAULT CHARSET=latin1 +insert into t1 values ('a'),('b'),('c'),('d'),('e'),('f'); +insert into t1 values ('aa'); +ERROR 23000: Duplicate entry 'a' for key 'a' +insert into t1 values ('aaa'); +ERROR 23000: Duplicate entry 'a' for key 'a' +insert into t1 values ('б'); +insert into t1 values ('бб'); +ERROR 23000: Duplicate entry 'б' for key 'a' +insert into t1 values ('ббб'); +ERROR 23000: Duplicate entry 'б' for key 'a' +select c as c_all from t1 order by c; +c_all +a +b +c +d +e +f +б +select c as c_a from t1 where c='a'; +c_a +a +select c as c_a from t1 where c='б'; +c_a +б +drop table t1; +create table t1 ( +c char(10) character set utf8mb4, +unique key a (c(1)) +) engine=innodb; +insert into t1 values ('a'),('b'),('c'),('d'),('e'),('f'); +insert into t1 values ('aa'); +ERROR 23000: Duplicate entry 'a' for key 'a' +insert into t1 values ('aaa'); +ERROR 23000: Duplicate entry 'a' for key 'a' +insert into t1 values ('б'); +insert into t1 values ('бб'); +ERROR 23000: Duplicate entry 'б' for key 'a' +insert into t1 values ('ббб'); +ERROR 23000: Duplicate entry 'б' for key 'a' +select c as c_all from t1 order by c; +c_all +a +b +c +d +e +f +б +select c as c_a from t1 where c='a'; +c_a +a +select c as c_a from t1 where c='б'; +c_a +б +drop table t1; +create table t1 (c varchar(30) character set utf8mb4 collate utf8mb4_bin, unique(c(10))); +insert into t1 values ('1'),('2'),('3'),('x'),('y'),('z'); +insert into t1 values ('aaaaaaaaaa'); +insert into t1 values ('aaaaaaaaaaa'); +ERROR 23000: Duplicate entry 'aaaaaaaaaa' for key 'c' +insert into t1 values ('aaaaaaaaaaaa'); +ERROR 23000: Duplicate entry 'aaaaaaaaaa' for key 'c' +insert into t1 values (repeat('b',20)); +select c c1 from t1 where c='1'; +c1 +1 +select c c2 from t1 where c='2'; +c2 +2 +select c c3 from t1 where c='3'; +c3 +3 +select c cx from t1 where c='x'; +cx +x +select c cy from t1 where c='y'; +cy +y +select c cz from t1 where c='z'; +cz +z +select c ca10 from t1 where c='aaaaaaaaaa'; +ca10 +aaaaaaaaaa +select c cb20 from t1 where c=repeat('b',20); +cb20 +bbbbbbbbbbbbbbbbbbbb +drop table t1; +create table t1 (c char(3) character set utf8mb4 collate utf8mb4_bin, unique (c(2))); +insert into t1 values ('1'),('2'),('3'),('4'),('x'),('y'),('z'); +insert into t1 values ('a'); +insert into t1 values ('aa'); +insert into t1 values ('aaa'); +ERROR 23000: Duplicate entry 'aa' for key 'c' +insert into t1 values ('b'); +insert into t1 values ('bb'); +insert into t1 values ('bbb'); +ERROR 23000: Duplicate entry 'bb' for key 'c' +insert into t1 values ('а'); +insert into t1 values ('аа'); +insert into t1 values ('ааа'); +ERROR 23000: Duplicate entry 'аа' for key 'c' +insert into t1 values ('б'); +insert into t1 values ('бб'); +insert into t1 values ('ббб'); +ERROR 23000: Duplicate entry 'бб' for key 'c' +insert into t1 values ('ꪪ'); +insert into t1 values ('ꪪꪪ'); +insert into t1 values ('ꪪꪪꪪ'); +ERROR 23000: Duplicate entry 'ꪪꪪ' for key 'c' +drop table t1; +create table t1 ( +c char(10) character set utf8mb4 collate utf8mb4_bin, +unique key a using hash (c(1)) +) engine=heap; +show create table t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `c` char(10) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin DEFAULT NULL, + UNIQUE KEY `a` (`c`(1)) USING HASH +) ENGINE=MEMORY DEFAULT CHARSET=latin1 +insert into t1 values ('a'),('b'),('c'),('d'),('e'),('f'); +insert into t1 values ('aa'); +ERROR 23000: Duplicate entry 'a' for key 'a' +insert into t1 values ('aaa'); +ERROR 23000: Duplicate entry 'a' for key 'a' +insert into t1 values ('б'); +insert into t1 values ('бб'); +ERROR 23000: Duplicate entry 'б' for key 'a' +insert into t1 values ('ббб'); +ERROR 23000: Duplicate entry 'б' for key 'a' +select c as c_all from t1 order by c; +c_all +a +b +c +d +e +f +б +select c as c_a from t1 where c='a'; +c_a +a +select c as c_a from t1 where c='б'; +c_a +б +drop table t1; +create table t1 ( +c char(10) character set utf8mb4 collate utf8mb4_bin, +unique key a using btree (c(1)) +) engine=heap; +show create table t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `c` char(10) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin DEFAULT NULL, + UNIQUE KEY `a` (`c`(1)) USING BTREE +) ENGINE=MEMORY DEFAULT CHARSET=latin1 +insert into t1 values ('a'),('b'),('c'),('d'),('e'),('f'); +insert into t1 values ('aa'); +ERROR 23000: Duplicate entry 'a' for key 'a' +insert into t1 values ('aaa'); +ERROR 23000: Duplicate entry 'a' for key 'a' +insert into t1 values ('б'); +insert into t1 values ('бб'); +ERROR 23000: Duplicate entry 'б' for key 'a' +insert into t1 values ('ббб'); +ERROR 23000: Duplicate entry 'б' for key 'a' +select c as c_all from t1 order by c; +c_all +a +b +c +d +e +f +б +select c as c_a from t1 where c='a'; +c_a +a +select c as c_a from t1 where c='б'; +c_a +б +drop table t1; +create table t1 ( +c char(10) character set utf8mb4 collate utf8mb4_bin, +unique key a (c(1)) +) engine=innodb; +insert into t1 values ('a'),('b'),('c'),('d'),('e'),('f'); +insert into t1 values ('aa'); +ERROR 23000: Duplicate entry 'a' for key 'a' +insert into t1 values ('aaa'); +ERROR 23000: Duplicate entry 'a' for key 'a' +insert into t1 values ('б'); +insert into t1 values ('бб'); +ERROR 23000: Duplicate entry 'б' for key 'a' +insert into t1 values ('ббб'); +ERROR 23000: Duplicate entry 'б' for key 'a' +select c as c_all from t1 order by c; +c_all +a +b +c +d +e +f +б +select c as c_a from t1 where c='a'; +c_a +a +select c as c_a from t1 where c='б'; +c_a +б +drop table t1; +create table t1 ( +str varchar(255) character set utf8mb4 not null, +key str (str(2)) +) engine=myisam; +INSERT INTO t1 VALUES ('str'); +INSERT INTO t1 VALUES ('str2'); +select * from t1 where str='str'; +str +str +drop table t1; +create table t1 ( +str varchar(255) character set utf8mb4 not null, +key str (str(2)) +) engine=innodb; +INSERT INTO t1 VALUES ('str'); +INSERT INTO t1 VALUES ('str2'); +select * from t1 where str='str'; +str +str +drop table t1; +create table t1 ( +str varchar(255) character set utf8mb4 not null, +key str using btree (str(2)) +) engine=heap; +INSERT INTO t1 VALUES ('str'); +INSERT INTO t1 VALUES ('str2'); +select * from t1 where str='str'; +str +str +drop table t1; +create table t1 ( +str varchar(255) character set utf8mb4 not null, +key str using hash (str(2)) +) engine=heap; +INSERT INTO t1 VALUES ('str'); +INSERT INTO t1 VALUES ('str2'); +select * from t1 where str='str'; +str +str +drop table t1; +create table t1 ( +str varchar(255) character set utf8mb4 not null, +key str (str(2)) +) engine=innodb; +INSERT INTO t1 VALUES ('str'); +INSERT INTO t1 VALUES ('str2'); +select * from t1 where str='str'; +str +str +drop table t1; +CREATE TABLE t1 (a varchar(32) BINARY) CHARACTER SET utf8mb4; +INSERT INTO t1 VALUES ('test'); +SELECT a FROM t1 WHERE a LIKE '%te'; +a +DROP TABLE t1; +SET NAMES utf8mb4; +CREATE TABLE t1 ( +subject varchar(255) character set utf8mb4 collate utf8mb4_unicode_ci, +p varchar(15) character set utf8mb4 +) ENGINE=InnoDB DEFAULT CHARSET=latin1; +INSERT INTO t1 VALUES ('è°·å·ä¿ŠäºŒã¨ç”³ã—ã¾ã™ãŒã€ã‚¤ãƒ³ã‚¿ãƒ¼ãƒãƒƒãƒˆäºˆç´„ã®ä¼šå“¡ç™»éŒ²ã‚’ã—ã¾ã—ãŸã¨ã“ã‚ã€ãƒ¡ãƒ¼ãƒ«ã‚¢ãƒ‰ãƒ¬ã‚¹ã‚’é–“é•ãˆã¦ã—ã¾ã„会員IDãŒå—ã‘å–ã‚‹ã“ã¨ãŒå‡ºæ¥ã¾ã›ã‚“ã§ã—ãŸã€‚é–“é•ãˆã‚¢ãƒ‰ãƒ¬ã‚¹ã¯tani-shun@n.vodafone.ne.jpを書ãè¾¼ã¿ã¾ã—ãŸã€‚ã©ã†ã™ã‚Œã°ã‚ˆã„ã§ã™ã‹ï¼Ÿ ãã®ä»–ã€ä½æ‰€ç‰ã¯é–“é•ãˆã‚ã‚Šã¾ã›ã‚“。連絡ãã ã•ã„。よã‚ã—ããŠé¡˜ã„ã—ã¾ã™ã€‚m(__)m','040312-000057'); +INSERT INTO t1 VALUES ('aaa','bbb'); +SELECT length(subject) FROM t1; +length(subject) +432 +3 +SELECT length(subject) FROM t1 ORDER BY 1; +length(subject) +3 +432 +DROP TABLE t1; +CREATE TABLE t1 ( +id int unsigned NOT NULL auto_increment, +list_id smallint unsigned NOT NULL, +term TEXT NOT NULL, +PRIMARY KEY(id), +INDEX(list_id, term(4)) +) ENGINE=MYISAM CHARSET=utf8mb4; +INSERT INTO t1 SET list_id = 1, term = "letterc"; +INSERT INTO t1 SET list_id = 1, term = "letterb"; +INSERT INTO t1 SET list_id = 1, term = "lettera"; +INSERT INTO t1 SET list_id = 1, term = "letterd"; +SELECT id FROM t1 WHERE (list_id = 1) AND (term = "letterc"); +id +1 +SELECT id FROM t1 WHERE (list_id = 1) AND (term = "letterb"); +id +2 +SELECT id FROM t1 WHERE (list_id = 1) AND (term = "lettera"); +id +3 +SELECT id FROM t1 WHERE (list_id = 1) AND (term = "letterd"); +id +4 +DROP TABLE t1; +SET NAMES latin1; +CREATE TABLE t1 ( +id int unsigned NOT NULL auto_increment, +list_id smallint unsigned NOT NULL, +term text NOT NULL, +PRIMARY KEY(id), +INDEX(list_id, term(19)) +) ENGINE=MyISAM CHARSET=utf8mb4; +INSERT INTO t1 set list_id = 1, term = "testétest"; +INSERT INTO t1 set list_id = 1, term = "testetest"; +INSERT INTO t1 set list_id = 1, term = "testètest"; +SELECT id, term FROM t1 where (list_id = 1) AND (term = "testétest"); +id term +1 testétest +2 testetest +3 testètest +SELECT id, term FROM t1 where (list_id = 1) AND (term = "testetest"); +id term +1 testétest +2 testetest +3 testètest +SELECT id, term FROM t1 where (list_id = 1) AND (term = "testètest"); +id term +1 testétest +2 testetest +3 testètest +DROP TABLE t1; +set names utf8mb4; +create table t1 ( +a int primary key, +b varchar(6), +index b3(b(3)) +) engine=innodb character set=utf8mb4; +insert into t1 values(1,'foo'),(2,'foobar'); +select * from t1 where b like 'foob%'; +a b +2 foobar +alter table t1 engine=innodb; +select * from t1 where b like 'foob%'; +a b +2 foobar +drop table t1; +create table t1 ( +a enum('петÑ','ваÑÑ','анюта') character set utf8mb4 not null default 'анюта', +b set('петÑ','ваÑÑ','анюта') character set utf8mb4 not null default 'анюта' +); +create table t2 select concat(a,_utf8mb4'') as a, concat(b,_utf8mb4'')as b from t1; +show create table t2; +Table Create Table +t2 CREATE TABLE `t2` ( + `a` varchar(5) CHARACTER SET utf8mb4 NOT NULL DEFAULT '', + `b` varchar(15) CHARACTER SET utf8mb4 NOT NULL DEFAULT '' +) ENGINE=MyISAM DEFAULT CHARSET=latin1 +drop table t2; +drop table t1; +select 'c' like '\_' as want0; +want0 +0 +SELECT SUBSTR('ваÑÑ',-2); +SUBSTR('ваÑÑ',-2) +ÑÑ +create table t1 (id integer, a varchar(100) character set utf8mb4 collate utf8mb4_unicode_ci); +insert into t1 values (1, 'Test'); +select * from t1 where soundex(a) = soundex('Test'); +id a +1 Test +select * from t1 where soundex(a) = soundex('TEST'); +id a +1 Test +select * from t1 where soundex(a) = soundex('test'); +id a +1 Test +drop table t1; +select soundex(_utf8mb4 0xE99885E8A788E99A8FE697B6E69BB4E696B0E79A84E696B0E997BB); +soundex(_utf8mb4 0xE99885E8A788E99A8FE697B6E69BB4E696B0E79A84E696B0E997BB) +阅000 +select hex(soundex(_utf8mb4 0xE99885E8A788E99A8FE697B6E69BB4E696B0E79A84E696B0E997BB)); +hex(soundex(_utf8mb4 0xE99885E8A788E99A8FE697B6E69BB4E696B0E79A84E696B0E997BB)) +E99885303030 +select soundex(_utf8mb4 0xD091D092D093); +soundex(_utf8mb4 0xD091D092D093) +Б000 +select hex(soundex(_utf8mb4 0xD091D092D093)); +hex(soundex(_utf8mb4 0xD091D092D093)) +D091303030 +SET collation_connection='utf8mb4_general_ci'; +create table t1 select repeat('a',4000) a; +delete from t1; +insert into t1 values ('a'), ('a '), ('a\t'); +select collation(a),hex(a) from t1 order by a; +collation(a) hex(a) +utf8mb4_general_ci 6109 +utf8mb4_general_ci 61 +utf8mb4_general_ci 6120 +drop table t1; +select @@collation_connection; +@@collation_connection +utf8mb4_general_ci +create table t1 ROW_FORMAT=DYNAMIC select repeat('a',50) as c1 ; +insert into t1 values('abcdef'); +insert into t1 values('_bcdef'); +insert into t1 values('a_cdef'); +insert into t1 values('ab_def'); +insert into t1 values('abc_ef'); +insert into t1 values('abcd_f'); +insert into t1 values('abcde_'); +select c1 as c1u from t1 where c1 like 'ab\_def'; +c1u +ab_def +select c1 as c2h from t1 where c1 like 'ab#_def' escape '#'; +c2h +ab_def +drop table t1; +drop table if exists t1; +create table t1 as select repeat(' ', 64) as s1; +select collation(s1) from t1; +collation(s1) +utf8mb4_general_ci +delete from t1; +insert into t1 values ('a'),('ae'),(_latin1 0xE4); +insert into t1 values ('o'),('oe'),(_latin1 0xF6); +insert into t1 values ('s'),('ss'),(_latin1 0xDF); +insert into t1 values ('u'),('ue'),(_latin1 0xFC); +select s1, hex(s1) from t1 order by s1, binary s1; +s1 hex(s1) +a 61 +ä C3A4 +ae 6165 +o 6F +ö C3B6 +oe 6F65 +s 73 +ß C39F +ss 7373 +u 75 +ü C3BC +ue 7565 +select group_concat(s1 order by binary s1) from t1 group by s1; +group_concat(s1 order by binary s1) +a,ä +ae +o,ö +oe +s,ß +ss +u,ü +ue +drop table t1; +SET collation_connection='utf8mb4_bin'; +create table t1 select repeat('a',4000) a; +delete from t1; +insert into t1 values ('a'), ('a '), ('a\t'); +select collation(a),hex(a) from t1 order by a; +collation(a) hex(a) +utf8mb4_bin 6109 +utf8mb4_bin 61 +utf8mb4_bin 6120 +drop table t1; +select @@collation_connection; +@@collation_connection +utf8mb4_bin +create table t1 ROW_FORMAT=DYNAMIC select repeat('a',50) as c1 ; +insert into t1 values('abcdef'); +insert into t1 values('_bcdef'); +insert into t1 values('a_cdef'); +insert into t1 values('ab_def'); +insert into t1 values('abc_ef'); +insert into t1 values('abcd_f'); +insert into t1 values('abcde_'); +select c1 as c1u from t1 where c1 like 'ab\_def'; +c1u +ab_def +select c1 as c2h from t1 where c1 like 'ab#_def' escape '#'; +c2h +ab_def +drop table t1; +CREATE TABLE t1 ( +user varchar(255) NOT NULL default '' +) ENGINE=MyISAM DEFAULT CHARSET=latin1; +INSERT INTO t1 VALUES ('one'),('two'); +SELECT CHARSET('a'); +CHARSET('a') +utf8mb4 +SELECT user, CONCAT('<', user, '>') AS c FROM t1; +user c +one <one> +two <two> +DROP TABLE t1; +create table t1 (f1 varchar(1) not null) default charset utf8mb4; +insert into t1 values (''), (''); +select concat(concat(_latin1'->',f1),_latin1'<-') from t1; +concat(concat(_latin1'->',f1),_latin1'<-') +-><- +-><- +drop table t1; +select convert(_koi8r'É' using utf8mb4) < convert(_koi8r'Ê' using utf8mb4); +convert(_koi8r'?' using utf8mb4) < convert(_koi8r'?' using utf8mb4) +1 +set names latin1; +create table t1 (a varchar(10)) character set utf8mb4; +insert into t1 values ('test'); +select ifnull(a,'') from t1; +ifnull(a,'') +test +drop table t1; +select repeat(_utf8mb4'+',3) as h union select NULL; +h ++++ +NULL +select ifnull(NULL, _utf8mb4'string'); +ifnull(NULL, _utf8mb4'string') +string +set names utf8mb4; +create table t1 (s1 char(5) character set utf8mb4 collate utf8mb4_lithuanian_ci); +insert into t1 values ('I'),('K'),('Y'); +select * from t1 where s1 < 'K' and s1 = 'Y'; +s1 +I +Y +select * from t1 where 'K' > s1 and s1 = 'Y'; +s1 +I +Y +drop table t1; +create table t1 (s1 char(5) character set utf8mb4 collate utf8mb4_czech_ci); +insert into t1 values ('c'),('d'),('h'),('ch'),('CH'),('cH'),('Ch'),('i'); +select * from t1 where s1 > 'd' and s1 = 'CH'; +s1 +ch +CH +Ch +select * from t1 where 'd' < s1 and s1 = 'CH'; +s1 +ch +CH +Ch +select * from t1 where s1 = 'cH' and s1 <> 'ch'; +s1 +cH +select * from t1 where 'cH' = s1 and s1 <> 'ch'; +s1 +cH +drop table t1; +create table t1 (a varchar(255)) default character set utf8mb4; +insert into t1 values (1.0); +drop table t1; +create table t1 ( +id int not null, +city varchar(20) not null, +key (city(7),id) +) character set=utf8mb4; +insert into t1 values (1,'Durban North'); +insert into t1 values (2,'Durban'); +select * from t1 where city = 'Durban'; +id city +2 Durban +select * from t1 where city = 'Durban '; +id city +2 Durban +drop table t1; +create table t1 (x set('A', 'B') default 0) character set utf8mb4; +ERROR 42000: Invalid default value for 'x' +create table t1 (x enum('A', 'B') default 0) character set utf8mb4; +ERROR 42000: Invalid default value for 'x' +SET NAMES UTF8; +CREATE TABLE t1 ( +`id` int(20) NOT NULL auto_increment, +`country` varchar(100) NOT NULL default '', +`shortcode` varchar(100) NOT NULL default '', +`operator` varchar(100) NOT NULL default '', +`momid` varchar(30) NOT NULL default '', +`keyword` varchar(160) NOT NULL default '', +`content` varchar(160) NOT NULL default '', +`second_token` varchar(160) default NULL, +`gateway_id` int(11) NOT NULL default '0', +`created` datetime NOT NULL default '0000-00-00 00:00:00', +`msisdn` varchar(15) NOT NULL default '', +PRIMARY KEY (`id`), +UNIQUE KEY `MSCCSPK_20030521130957121` (`momid`), +KEY `IX_mobile_originated_message_keyword` (`keyword`), +KEY `IX_mobile_originated_message_created` (`created`), +KEY `IX_mobile_originated_message_support` (`msisdn`,`momid`,`keyword`,`gateway_id`,`created`) +) ENGINE=MyISAM DEFAULT CHARSET=utf8mb4; +INSERT INTO t1 VALUES +(1,'blah','464','aaa','fkc1c9ilc20x0hgae7lx6j09','ERR','ERR Имри.Ðфимим.Ðеимимримдмримрмрирор имримримримр имридм ирбднримрфмририримрфмфмим.Ðд.Д имдимримрад.Ðдимримримрмдиримримримр м.Дадимфшьмримд им.Ðдимимрн имадми','ИМРИ.ÐФИМИМ.ÐЕИМИМРИМДМРИМРМРИРОР',3,'2005-06-01 17:30:43','1234567890'), +(2,'blah','464','aaa','haxpl2ilc20x00bj4tt2m5ti','11','11 g','G',3,'2005-06-02 22:43:10','1234567890'); +CREATE TABLE t2 ( +`msisdn` varchar(15) NOT NULL default '', +`operator_id` int(11) NOT NULL default '0', +`created` datetime NOT NULL default '0000-00-00 00:00:00', +UNIQUE KEY `PK_user` (`msisdn`) +) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4; +INSERT INTO t2 VALUES ('1234567890',2,'2005-05-24 13:53:25'); +SELECT content, t2.msisdn FROM t1, t2 WHERE t1.msisdn = '1234567890'; +content msisdn +ERR Имри.Ðфимим.Ðеимимримдмримрмрирор имримримримр имридм ирбднримрфмририримрфмфмим.Ðд.Д имдимримрад.Ðдимримримрмдиримримримр м.Дадимфшьмримд им.Ðдимимрн имадми 1234567890 +11 g 1234567890 +DROP TABLE t1,t2; +create table t1 (a char(20) character set utf8mb4); +insert into t1 values ('123456'),('андрей'); +alter table t1 modify a char(2) character set utf8mb4; +Warnings: +Warning 1265 Data truncated for column 'a' at row 1 +Warning 1265 Data truncated for column 'a' at row 2 +select char_length(a), length(a), a from t1 order by a; +char_length(a) length(a) a +2 2 12 +2 4 ан +drop table t1; +set names utf8mb4; +select 'andre%' like 'andreñ%' escape 'ñ'; +'andre%' like 'andreñ%' escape 'ñ' +1 +set names utf8mb4; +select 'a\\' like 'a\\'; +'a\\' like 'a\\' +1 +select 'aa\\' like 'a%\\'; +'aa\\' like 'a%\\' +1 +create table t1 (a char(10), key(a)) character set utf8mb4; +insert into t1 values ("a"),("abc"),("abcd"),("hello"),("test"); +select * from t1 where a like "abc%"; +a +abc +abcd +select * from t1 where a like concat("abc","%"); +a +abc +abcd +select * from t1 where a like "ABC%"; +a +abc +abcd +select * from t1 where a like "test%"; +a +test +select * from t1 where a like "te_t"; +a +test +select * from t1 where a like "%a%"; +a +a +abc +abcd +select * from t1 where a like "%abcd%"; +a +abcd +select * from t1 where a like "%abc\d%"; +a +abcd +drop table t1; +CREATE TABLE t1 ( +a varchar(255) NOT NULL default '', +KEY a (a) +) ENGINE=MyISAM DEFAULT CHARSET=utf8mb4 COLLATE utf8mb4_general_ci; +Warnings: +Warning 1071 Specified key was too long; max key length is 1000 bytes +insert into t1 values (_utf8mb4 0xe880bd); +insert into t1 values (_utf8mb4 0x5b); +select hex(a) from t1; +hex(a) +E880BD +5B +drop table t1; +set names 'latin1'; +create table t1 (a varchar(255)) default charset=utf8mb4; +select * from t1 where find_in_set('-1', a); +a +drop table t1; +create table t1 (a int); +insert into t1 values (48),(49),(50); +set names utf8mb4; +select distinct char(a) from t1; +char(a) +0 +1 +2 +drop table t1; +CREATE TABLE t1 (t TINYTEXT CHARACTER SET utf8mb4); +INSERT INTO t1 VALUES(REPEAT('a', 100)); +CREATE TEMPORARY TABLE t2 SELECT COALESCE(t) AS bug FROM t1; +SELECT LENGTH(bug) FROM t2; +LENGTH(bug) +100 +DROP TABLE t2; +DROP TABLE t1; +CREATE TABLE t1 (item varchar(255)) default character set utf8mb4; +INSERT INTO t1 VALUES (N'\\'); +INSERT INTO t1 VALUES (_utf8mb4'\\'); +INSERT INTO t1 VALUES (N'Cote d\'Ivoire'); +INSERT INTO t1 VALUES (_utf8mb4'Cote d\'Ivoire'); +SELECT item FROM t1 ORDER BY item; +item +Cote d'Ivoire +Cote d'Ivoire +\ +\ +DROP TABLE t1; +SET NAMES utf8mb4; +DROP TABLE IF EXISTS t1; +Warnings: +Note 1051 Unknown table 't1' +CREATE TABLE t1(a VARCHAR(255), KEY(a)) ENGINE=MyISAM DEFAULT CHARSET=utf8mb4; +Warnings: +Warning 1071 Specified key was too long; max key length is 1000 bytes +INSERT INTO t1 VALUES('uuABCDEFGHIGKLMNOPRSTUVWXYZ̈bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb'); +INSERT INTO t1 VALUES('uu'); +check table t1; +Table Op Msg_type Msg_text +test.t1 check status OK +INSERT INTO t1 VALUES('uU'); +check table t1; +Table Op Msg_type Msg_text +test.t1 check status OK +INSERT INTO t1 VALUES('uu'); +check table t1; +Table Op Msg_type Msg_text +test.t1 check status OK +INSERT INTO t1 VALUES('uuABC'); +check table t1; +Table Op Msg_type Msg_text +test.t1 check status OK +INSERT INTO t1 VALUES('UuABC'); +check table t1; +Table Op Msg_type Msg_text +test.t1 check status OK +INSERT INTO t1 VALUES('uuABC'); +check table t1; +Table Op Msg_type Msg_text +test.t1 check status OK +alter table t1 add b int; +INSERT INTO t1 VALUES('uuABCDEFGHIGKLMNOPRSTUVWXYZ̈bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb',1); +INSERT INTO t1 VALUES('uuABCDEFGHIGKLMNOPRSTUVWXYZ̈bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb',2); +delete from t1 where b=1; +INSERT INTO t1 VALUES('UUABCDEFGHIGKLMNOPRSTUVWXYZ̈bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb',1); +check table t1; +Table Op Msg_type Msg_text +test.t1 check status OK +INSERT INTO t1 VALUES('uuABCDEFGHIGKLMNOPRSTUVWXYZ̈bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb',3); +INSERT INTO t1 VALUES('uuABCDEFGHIGKLMNOPRSTUVWXYZ̈bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb',4); +delete from t1 where b=3; +INSERT INTO t1 VALUES('uUABCDEFGHIGKLMNOPRSTUVWXYZ̈bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb',3); +check table t1; +Table Op Msg_type Msg_text +test.t1 check status OK +drop table t1; +set names utf8mb4; +create table t1 (s1 char(5) character set utf8mb4); +insert into t1 values +('a'),('b'),(null),('ペテルグル'),('ü'),('Y'); +create index it1 on t1 (s1); +select s1 as before_delete_general_ci from t1 where s1 like 'ペテ%'; +before_delete_general_ci +ペテルグル +delete from t1 where s1 = 'Y'; +select s1 as after_delete_general_ci from t1 where s1 like 'ペテ%'; +after_delete_general_ci +ペテルグル +drop table t1; +set names utf8mb4; +create table t1 (s1 char(5) character set utf8mb4 collate utf8mb4_unicode_ci); +insert into t1 values +('a'),('b'),(null),('ペテルグル'),('ü'),('Y'); +create index it1 on t1 (s1); +select s1 as before_delete_unicode_ci from t1 where s1 like 'ペテ%'; +before_delete_unicode_ci +ペテルグル +delete from t1 where s1 = 'Y'; +select s1 as after_delete_unicode_ci from t1 where s1 like 'ペテ%'; +after_delete_unicode_ci +ペテルグル +drop table t1; +set names utf8mb4; +create table t1 (s1 char(5) character set utf8mb4 collate utf8mb4_bin); +insert into t1 values +('a'),('b'),(null),('ペテルグル'),('ü'),('Y'); +create index it1 on t1 (s1); +select s1 as before_delete_bin from t1 where s1 like 'ペテ%'; +before_delete_bin +ペテルグル +delete from t1 where s1 = 'Y'; +select s1 as after_delete_bin from t1 where s1 like 'ペテ%'; +after_delete_bin +ペテルグル +drop table t1; +set names utf8mb4; +create table t1 (a varchar(30) not null primary key) +engine=innodb default character set utf8mb4 collate utf8mb4_general_ci; +insert into t1 values ('ã‚ã„ã†ãˆãŠã‹ããã‘ã“ã•ã—ã™ã›ã'); +insert into t1 values ('ã•ã—ã™ã›ãã‹ããã‘ã“ã‚ã„ã†ãˆãŠ'); +select a as gci1 from t1 where a like 'ã•ã—ã™ã›ãã‹ããã‘ã“ã‚ã„ã†ãˆãŠ%'; +gci1 +ã•ã—ã™ã›ãã‹ããã‘ã“ã‚ã„ã†ãˆãŠ +select a as gci2 from t1 where a like 'ã‚ã„ã†ãˆãŠã‹ããã‘ã“ã•ã—ã™ã›ã'; +gci2 +ã‚ã„ã†ãˆãŠã‹ããã‘ã“ã•ã—ã™ã›ã +drop table t1; +set names utf8mb4; +create table t1 (a varchar(30) not null primary key) +engine=innodb default character set utf8mb4 collate utf8mb4_unicode_ci; +insert into t1 values ('ã‚ã„ã†ãˆãŠã‹ããã‘ã“ã•ã—ã™ã›ã'); +insert into t1 values ('ã•ã—ã™ã›ãã‹ããã‘ã“ã‚ã„ã†ãˆãŠ'); +select a as uci1 from t1 where a like 'ã•ã—ã™ã›ãã‹ããã‘ã“ã‚ã„ã†ãˆãŠ%'; +uci1 +ã•ã—ã™ã›ãã‹ããã‘ã“ã‚ã„ã†ãˆãŠ +select a as uci2 from t1 where a like 'ã‚ã„ã†ãˆãŠã‹ããã‘ã“ã•ã—ã™ã›ã'; +uci2 +ã‚ã„ã†ãˆãŠã‹ããã‘ã“ã•ã—ã™ã›ã +drop table t1; +set names utf8mb4; +create table t1 (a varchar(30) not null primary key) +engine=innodb default character set utf8mb4 collate utf8mb4_bin; +insert into t1 values ('ã‚ã„ã†ãˆãŠã‹ããã‘ã“ã•ã—ã™ã›ã'); +insert into t1 values ('ã•ã—ã™ã›ãã‹ããã‘ã“ã‚ã„ã†ãˆãŠ'); +select a as bin1 from t1 where a like 'ã•ã—ã™ã›ãã‹ããã‘ã“ã‚ã„ã†ãˆãŠ%'; +bin1 +ã•ã—ã™ã›ãã‹ããã‘ã“ã‚ã„ã†ãˆãŠ +select a as bin2 from t1 where a like 'ã‚ã„ã†ãˆãŠã‹ããã‘ã“ã•ã—ã™ã›ã'; +bin2 +ã‚ã„ã†ãˆãŠã‹ããã‘ã“ã•ã—ã™ã›ã +drop table t1; +SET NAMES utf8mb4; +CREATE TABLE t1 (id int PRIMARY KEY, +a varchar(16) collate utf8mb4_unicode_ci NOT NULL default '', +b int, +f varchar(128) default 'XXX', +INDEX (a(4)) +) ENGINE=MyISAM DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci; +INSERT INTO t1(id, a, b) VALUES +(1, 'cccc', 50), (2, 'cccc', 70), (3, 'cccc', 30), +(4, 'cccc', 30), (5, 'cccc', 20), (6, 'bbbbbb', 40), +(7, 'dddd', 30), (8, 'aaaa', 10), (9, 'aaaa', 50), +(10, 'eeeee', 40), (11, 'bbbbbb', 60); +SELECT id, a, b FROM t1; +id a b +1 cccc 50 +2 cccc 70 +3 cccc 30 +4 cccc 30 +5 cccc 20 +6 bbbbbb 40 +7 dddd 30 +8 aaaa 10 +9 aaaa 50 +10 eeeee 40 +11 bbbbbb 60 +SELECT id, a, b FROM t1 WHERE a BETWEEN 'aaaa' AND 'bbbbbb'; +id a b +8 aaaa 10 +9 aaaa 50 +6 bbbbbb 40 +11 bbbbbb 60 +SELECT id, a FROM t1 WHERE a='bbbbbb'; +id a +6 bbbbbb +11 bbbbbb +SELECT id, a FROM t1 WHERE a='bbbbbb' ORDER BY b; +id a +6 bbbbbb +11 bbbbbb +DROP TABLE t1; +SET NAMES utf8mb4; +CREATE TABLE t1 ( +a CHAR(13) DEFAULT '', +INDEX(a) +) ENGINE=MyISAM DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci; +INSERT INTO t1 VALUES +('Käli Käli 2-4'), ('Käli Käli 2-4'), +('Käli Käli 2+4'), ('Käli Käli 2+4'), +('Käli Käli 2-6'), ('Käli Käli 2-6'); +INSERT INTO t1 SELECT * FROM t1; +CREATE TABLE t2 ( +a CHAR(13) DEFAULT '', +INDEX(a) +) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_general_ci; +INSERT INTO t2 VALUES +('Kali Kali 2-4'), ('Kali Kali 2-4'), +('Kali Kali 2+4'), ('Kali Kali 2+4'), +('Kali Kali 2-6'), ('Kali Kali 2-6'); +INSERT INTO t2 SELECT * FROM t2; +SELECT a FROM t1 WHERE a LIKE 'Käli Käli 2+4'; +a +Käli Käli 2+4 +Käli Käli 2+4 +Käli Käli 2+4 +Käli Käli 2+4 +SELECT a FROM t2 WHERE a LIKE 'Kali Kali 2+4'; +a +Kali Kali 2+4 +Kali Kali 2+4 +Kali Kali 2+4 +Kali Kali 2+4 +EXPLAIN SELECT a FROM t1 WHERE a LIKE 'Käli Käli 2+4'; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 range a a 53 NULL 4 Using where; Using index +EXPLAIN SELECT a FROM t1 WHERE a = 'Käli Käli 2+4'; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ref a a 53 const 4 Using where; Using index +EXPLAIN SELECT a FROM t2 WHERE a LIKE 'Kali Kali 2+4'; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t2 range a a 14 NULL 4 Using where; Using index +EXPLAIN SELECT a FROM t2 WHERE a = 'Kali Kali 2+4'; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t2 ref a a 14 const 4 Using where; Using index +DROP TABLE t1,t2; +CREATE TABLE t1 ( +a char(255) DEFAULT '', +KEY(a(10)) +) ENGINE=MyISAM DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci; +INSERT INTO t1 VALUES ('Käli Käli 2-4'); +SELECT * FROM t1 WHERE a LIKE 'Käli Käli 2%'; +a +Käli Käli 2-4 +INSERT INTO t1 VALUES ('Käli Käli 2-4'); +SELECT * FROM t1 WHERE a LIKE 'Käli Käli 2%'; +a +Käli Käli 2-4 +Käli Käli 2-4 +DROP TABLE t1; +CREATE TABLE t1 ( +a char(255) DEFAULT '' +) ENGINE=MyISAM DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci; +INSERT INTO t1 VALUES ('Käli Käli 2-4'); +INSERT INTO t1 VALUES ('Käli Käli 2-4'); +SELECT * FROM t1 WHERE a LIKE 'Käli Käli 2%'; +a +Käli Käli 2-4 +Käli Käli 2-4 +ALTER TABLE t1 ADD KEY (a(10)); +SELECT * FROM t1 WHERE a LIKE 'Käli Käli 2%'; +a +Käli Käli 2-4 +Käli Käli 2-4 +DROP TABLE t1; +SET NAMES latin2; +CREATE TABLE t1 ( +id int(11) NOT NULL default '0', +tid int(11) NOT NULL default '0', +val text NOT NULL, +INDEX idx(tid, val(10)) +) ENGINE=MyISAM DEFAULT CHARSET=utf8mb4; +INSERT INTO t1 VALUES +(40988,72,'VOLNÝ ADSL'),(41009,72,'VOLNÝ ADSL'), +(41032,72,'VOLNÝ ADSL'),(41038,72,'VOLNÝ ADSL'), +(41063,72,'VOLNÝ ADSL'),(41537,72,'VOLNÝ ADSL Office'), +(42141,72,'VOLNÝ ADSL'),(42565,72,'VOLNÝ ADSL Combi'), +(42749,72,'VOLNÝ ADSL'),(44205,72,'VOLNÝ ADSL'); +SELECT * FROM t1 WHERE tid=72 and val LIKE 'VOLNY ADSL'; +id tid val +40988 72 VOLNÝ ADSL +41009 72 VOLNÝ ADSL +41032 72 VOLNÝ ADSL +41038 72 VOLNÝ ADSL +41063 72 VOLNÝ ADSL +42141 72 VOLNÝ ADSL +42749 72 VOLNÝ ADSL +44205 72 VOLNÝ ADSL +SELECT * FROM t1 WHERE tid=72 and val LIKE 'VOLNÝ ADSL'; +id tid val +40988 72 VOLNÝ ADSL +41009 72 VOLNÝ ADSL +41032 72 VOLNÝ ADSL +41038 72 VOLNÝ ADSL +41063 72 VOLNÝ ADSL +42141 72 VOLNÝ ADSL +42749 72 VOLNÝ ADSL +44205 72 VOLNÝ ADSL +SELECT * FROM t1 WHERE tid=72 and val LIKE '%VOLNÝ ADSL'; +id tid val +40988 72 VOLNÝ ADSL +41009 72 VOLNÝ ADSL +41032 72 VOLNÝ ADSL +41038 72 VOLNÝ ADSL +41063 72 VOLNÝ ADSL +42141 72 VOLNÝ ADSL +42749 72 VOLNÝ ADSL +44205 72 VOLNÝ ADSL +ALTER TABLE t1 DROP KEY idx; +ALTER TABLE t1 ADD KEY idx (tid,val(11)); +SELECT * FROM t1 WHERE tid=72 and val LIKE 'VOLNÝ ADSL'; +id tid val +40988 72 VOLNÝ ADSL +41009 72 VOLNÝ ADSL +41032 72 VOLNÝ ADSL +41038 72 VOLNÝ ADSL +41063 72 VOLNÝ ADSL +42141 72 VOLNÝ ADSL +42749 72 VOLNÝ ADSL +44205 72 VOLNÝ ADSL +DROP TABLE t1; +create table t1(a char(200) collate utf8mb4_unicode_ci NOT NULL default '') +default charset=utf8mb4 collate=utf8mb4_unicode_ci; +insert into t1 values (unhex('65')), (unhex('C3A9')), (unhex('65')); +explain select distinct a from t1; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 3 Using temporary +select distinct a from t1; +a +e +explain select a from t1 group by a; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 3 Using temporary; Using filesort +select a from t1 group by a; +a +e +drop table t1; +create table t1(a char(10)) default charset utf8mb4; +insert into t1 values ('123'), ('456'); +explain +select substr(Z.a,-1), Z.a from t1 as Y join t1 as Z on Y.a=Z.a order by 1; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE Y ALL NULL NULL NULL NULL 2 Using temporary; Using filesort +1 SIMPLE Z ALL NULL NULL NULL NULL 2 Using where; Using join buffer +select substr(Z.a,-1), Z.a from t1 as Y join t1 as Z on Y.a=Z.a order by 1; +substr(Z.a,-1) a +3 123 +6 456 +drop table t1; +SET CHARACTER SET utf8mb4; +SHOW VARIABLES LIKE 'character\_set\_%'; +Variable_name Value +character_set_client utf8mb4 +character_set_connection latin1 +character_set_database latin1 +character_set_filesystem binary +character_set_results utf8mb4 +character_set_server latin1 +character_set_system utf8 +CREATE DATABASE crashtest DEFAULT CHARACTER SET utf8mb4 COLLATE utf8mb4_bin; +USE crashtest; +CREATE TABLE crashtest (crash char(10)) DEFAULT CHARSET=utf8mb4; +INSERT INTO crashtest VALUES ('35'), ('36'), ('37'); +SELECT * FROM crashtest ORDER BY CHAR(crash USING utf8mb4); +crash +35 +36 +37 +INSERT INTO crashtest VALUES ('-1000'); +EXPLAIN SELECT * FROM crashtest ORDER BY CHAR(crash USING utf8mb4); +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE crashtest ALL NULL NULL NULL NULL 4 Using filesort +SELECT * FROM crashtest ORDER BY CHAR(crash USING utf8mb4); +crash +-1000 +35 +36 +37 +Warnings: +Warning 1300 Invalid utf8mb4 character string: 'FFFFFC' +DROP TABLE crashtest; +DROP DATABASE crashtest; +USE test; +SET CHARACTER SET default; +CREATE TABLE t1(id varchar(20) NOT NULL) DEFAULT CHARSET=utf8mb4; +INSERT INTO t1 VALUES ('xxx'), ('aa'), ('yyy'), ('aa'); +SELECT id FROM t1; +id +xxx +aa +yyy +aa +SELECT DISTINCT id FROM t1; +id +xxx +aa +yyy +SELECT DISTINCT id FROM t1 ORDER BY id; +id +aa +xxx +yyy +DROP TABLE t1; +create table t1 ( +a varchar(26) not null +) default character set utf8mb4; +insert into t1 (a) values ('abcdefghijklmnopqrstuvwxyz'); +select * from t1; +a +abcdefghijklmnopqrstuvwxyz +alter table t1 change a a varchar(20) character set utf8mb4 not null; +Warnings: +Warning 1265 Data truncated for column 'a' at row 1 +select * from t1; +a +abcdefghijklmnopqrst +alter table t1 change a a char(15) character set utf8mb4 not null; +Warnings: +Warning 1265 Data truncated for column 'a' at row 1 +select * from t1; +a +abcdefghijklmno +alter table t1 change a a char(10) character set utf8mb4 not null; +Warnings: +Warning 1265 Data truncated for column 'a' at row 1 +select * from t1; +a +abcdefghij +alter table t1 change a a varchar(5) character set utf8mb4 not null; +Warnings: +Warning 1265 Data truncated for column 'a' at row 1 +select * from t1; +a +abcde +drop table t1; +create table t1 ( +a varchar(4000) not null +) default character set utf8mb4; +insert into t1 values (repeat('a',4000)); +alter table t1 change a a varchar(3000) character set utf8mb4 not null; +Warnings: +Warning 1265 Data truncated for column 'a' at row 1 +select length(a) from t1; +length(a) +3000 +drop table t1; +set names utf8mb4; +select hex(char(1 using utf8mb4)); +hex(char(1 using utf8mb4)) +01 +select char(0xd1,0x8f using utf8mb4); +char(0xd1,0x8f using utf8mb4) +Ñ +select char(0xd18f using utf8mb4); +char(0xd18f using utf8mb4) +Ñ +select char(53647 using utf8mb4); +char(53647 using utf8mb4) +Ñ +select char(0xff,0x8f using utf8mb4); +char(0xff,0x8f using utf8mb4) + +Warnings: +Warning 1300 Invalid utf8mb4 character string: 'FF8F' +select convert(char(0xff,0x8f) using utf8mb4); +convert(char(0xff,0x8f) using utf8mb4) + +Warnings: +Warning 1300 Invalid utf8mb4 character string: 'FF8F' +set sql_mode=traditional; +select char(0xff,0x8f using utf8mb4); +char(0xff,0x8f using utf8mb4) +NULL +Warnings: +Warning 1300 Invalid utf8mb4 character string: 'FF8F' +select char(195 using utf8mb4); +char(195 using utf8mb4) +NULL +Warnings: +Warning 1300 Invalid utf8mb4 character string: 'C3' +select char(196 using utf8mb4); +char(196 using utf8mb4) +NULL +Warnings: +Warning 1300 Invalid utf8mb4 character string: 'C4' +select char(2557 using utf8mb4); +char(2557 using utf8mb4) +NULL +Warnings: +Warning 1300 Invalid utf8mb4 character string: 'FD' +select convert(char(0xff,0x8f) using utf8mb4); +convert(char(0xff,0x8f) using utf8mb4) +NULL +Warnings: +Warning 1300 Invalid utf8mb4 character string: 'FF8F' +select hex(convert(char(2557 using latin1) using utf8mb4)); +hex(convert(char(2557 using latin1) using utf8mb4)) +09C3BD +select hex(char(195)); +hex(char(195)) +C3 +select hex(char(196)); +hex(char(196)) +C4 +select hex(char(2557)); +hex(char(2557)) +09FD +set names utf8mb4; +create table t1 (a char(1)) default character set utf8mb4; +create table t2 (a char(1)) default character set utf8mb4; +insert into t1 values('a'),('a'),(0xE38182),(0xE38182); +insert into t1 values('i'),('i'),(0xE38184),(0xE38184); +select * from t1 union distinct select * from t2; +a +a +ã‚ +i +ã„ +drop table t1,t2; +set names utf8mb4; +create table t1 (a char(10), b varchar(10)); +insert into t1 values ('bar','kostja'); +insert into t1 values ('kostja','bar'); +prepare my_stmt from "select * from t1 where a=?"; +set @a:='bar'; +execute my_stmt using @a; +a b +bar kostja +set @a:='kostja'; +execute my_stmt using @a; +a b +kostja bar +set @a:=null; +execute my_stmt using @a; +a b +drop table if exists t1; +drop table if exists t1; +drop view if exists v1, v2; +set names utf8mb4; +create table t1(col1 varchar(12) character set utf8mb4 collate utf8mb4_unicode_ci); +insert into t1 values('t1_val'); +create view v1 as select 'v1_val' as col1; +select coercibility(col1), collation(col1) from v1; +coercibility(col1) collation(col1) +4 utf8mb4_general_ci +create view v2 as select col1 from v1 union select col1 from t1; +select coercibility(col1), collation(col1)from v2; +coercibility(col1) collation(col1) +2 utf8mb4_unicode_ci +2 utf8mb4_unicode_ci +drop view v1, v2; +create view v1 as select 'v1_val' collate utf8mb4_swedish_ci as col1; +select coercibility(col1), collation(col1) from v1; +coercibility(col1) collation(col1) +0 utf8mb4_swedish_ci +create view v2 as select col1 from v1 union select col1 from t1; +select coercibility(col1), collation(col1) from v2; +coercibility(col1) collation(col1) +0 utf8mb4_swedish_ci +0 utf8mb4_swedish_ci +drop view v1, v2; +drop table t1; +set names utf8mb4; +create table t1 (a varchar(10) character set latin1, b int); +insert into t1 values ('a',1); +select concat(a, if(b>10, N'x', N'y')) from t1; +concat(a, if(b>10, N'x', N'y')) +ay +select concat(a, if(b>10, N'æ', N'ß')) from t1; +ERROR HY000: Illegal mix of collations (latin1_swedish_ci,IMPLICIT) and (utf8_general_ci,COERCIBLE) for operation 'concat' +drop table t1; +set names utf8mb4; +create table t1 (a varchar(10) character set latin1, b int); +insert into t1 values ('a',1); +select concat(a, if(b>10, _utf8mb4'x', _utf8mb4'y')) from t1; +concat(a, if(b>10, _utf8mb4'x', _utf8mb4'y')) +ay +select concat(a, if(b>10, _utf8mb4'æ', _utf8mb4'ß')) from t1; +ERROR HY000: Illegal mix of collations (latin1_swedish_ci,IMPLICIT) and (utf8mb4_general_ci,COERCIBLE) for operation 'concat' +drop table t1; +set names utf8mb4; +create table t1 (a varchar(10) character set latin1, b int); +insert into t1 values ('a',1); +select concat(a, if(b>10, _utf8mb4 0x78, _utf8mb4 0x79)) from t1; +concat(a, if(b>10, _utf8mb4 0x78, _utf8mb4 0x79)) +ay +select concat(a, if(b>10, _utf8mb4 0xC3A6, _utf8mb4 0xC3AF)) from t1; +ERROR HY000: Illegal mix of collations (latin1_swedish_ci,IMPLICIT) and (utf8mb4_general_ci,COERCIBLE) for operation 'concat' +drop table t1; +set names utf8mb4; +create table t1 (a varchar(10) character set latin1, b int); +insert into t1 values ('a',1); +select concat(a, if(b>10, 'x' 'x', 'y' 'y')) from t1; +concat(a, if(b>10, 'x' 'x', 'y' 'y')) +ayy +select concat(a, if(b>10, 'x' 'æ', 'y' 'ß')) from t1; +ERROR HY000: Illegal mix of collations (latin1_swedish_ci,IMPLICIT) and (utf8mb4_general_ci,COERCIBLE) for operation 'concat' +drop table t1; +SHOW ENGINES; +Engine Support Comment Transactions XA Savepoints +ndbcluster NO Clustered, fault-tolerant tables NULL NULL NULL +MRG_MYISAM YES Collection of identical MyISAM tables NO NO NO +MEMORY YES Hash based, stored in memory, useful for temporary tables NO NO NO +BLACKHOLE YES /dev/null storage engine (anything you write to it disappears) NO NO NO +CSV YES CSV storage engine NO NO NO +MyISAM DEFAULT Default engine as of MySQL 3.23 with great performance NO NO NO +ARCHIVE YES Archive storage engine NO NO NO +FEDERATED NO Federated MySQL storage engine NULL NULL NULL +PERFORMANCE_SCHEMA YES Performance Schema NO NO NO +InnoDB YES Supports transactions, row-level locking, and foreign keys YES YES YES +CREATE TABLE t1 ( +colA int(11) NOT NULL, +colB varchar(255) character set utf8mb4 NOT NULL, +PRIMARY KEY (colA) +) ENGINE=InnoDB DEFAULT CHARSET=latin1; +INSERT INTO t1 (colA, colB) VALUES (1, 'foo'), (2, 'foo bar'); +CREATE TABLE t2 ( +colA int(11) NOT NULL, +colB varchar(255) character set utf8mb4 NOT NULL, +KEY bad (colA,colB(3)) +) ENGINE=InnoDB DEFAULT CHARSET=latin1; +INSERT INTO t2 (colA, colB) VALUES (1, 'foo'),(2, 'foo bar'); +SELECT * FROM t1 JOIN t2 ON t1.colA=t2.colA AND t1.colB=t2.colB +WHERE t1.colA < 3; +colA colB colA colB +1 foo 1 foo +2 foo bar 2 foo bar +DROP TABLE t1, t2; +SELECT 'н1234567890' UNION SELECT _binary '1'; +н1234567890 +н1234567890 +1 +SELECT 'н1234567890' UNION SELECT 1; +н1234567890 +н1234567890 +1 +SELECT '1' UNION SELECT 'н1234567890'; +1 +1 +н1234567890 +SELECT 1 UNION SELECT 'н1234567890'; +1 +1 +н1234567890 +CREATE TABLE t1 (c VARCHAR(11)) CHARACTER SET utf8mb4; +CREATE TABLE t2 (b CHAR(1) CHARACTER SET binary, i INT); +INSERT INTO t1 (c) VALUES ('н1234567890'); +INSERT INTO t2 (b, i) VALUES ('1', 1); +SELECT c FROM t1 UNION SELECT b FROM t2; +c +н1234567890 +1 +SELECT c FROM t1 UNION SELECT i FROM t2; +c +н1234567890 +1 +SELECT b FROM t2 UNION SELECT c FROM t1; +b +1 +н1234567890 +SELECT i FROM t2 UNION SELECT c FROM t1; +i +1 +н1234567890 +DROP TABLE t1, t2; +set sql_mode=traditional; +select hex(char(0xFF using utf8mb4)); +hex(char(0xFF using utf8mb4)) +NULL +Warnings: +Warning 1300 Invalid utf8mb4 character string: 'FF' +select hex(convert(0xFF using utf8mb4)); +hex(convert(0xFF using utf8mb4)) +NULL +Warnings: +Warning 1300 Invalid utf8mb4 character string: 'FF' +select hex(_utf8mb4 0x616263FF); +ERROR HY000: Invalid utf8mb4 character string: 'FF' +select hex(_utf8mb4 X'616263FF'); +ERROR HY000: Invalid utf8mb4 character string: 'FF' +select hex(_utf8mb4 B'001111111111'); +ERROR HY000: Invalid utf8mb4 character string: 'FF' +select (_utf8mb4 X'616263FF'); +ERROR HY000: Invalid utf8mb4 character string: 'FF' +set sql_mode=default; +select hex(char(0xFF using utf8mb4)); +hex(char(0xFF using utf8mb4)) + +Warnings: +Warning 1300 Invalid utf8mb4 character string: 'FF' +select hex(convert(0xFF using utf8mb4)); +hex(convert(0xFF using utf8mb4)) + +Warnings: +Warning 1300 Invalid utf8mb4 character string: 'FF' +select hex(_utf8mb4 0x616263FF); +ERROR HY000: Invalid utf8mb4 character string: 'FF' +select hex(_utf8mb4 X'616263FF'); +ERROR HY000: Invalid utf8mb4 character string: 'FF' +select hex(_utf8mb4 B'001111111111'); +ERROR HY000: Invalid utf8mb4 character string: 'FF' +select (_utf8mb4 X'616263FF'); +ERROR HY000: Invalid utf8mb4 character string: 'FF' +CREATE TABLE t1 (a INT NOT NULL, b INT NOT NULL); +INSERT INTO t1 VALUES (70000, 1092), (70001, 1085), (70002, 1065); +SELECT CONVERT(a, CHAR), CONVERT(b, CHAR) FROM t1 GROUP BY b; +CONVERT(a, CHAR) CONVERT(b, CHAR) +70002 1065 +70001 1085 +70000 1092 +SELECT CONVERT(a, CHAR), CONVERT(b, CHAR) FROM t1; +CONVERT(a, CHAR) CONVERT(b, CHAR) +70000 1092 +70001 1085 +70002 1065 +ALTER TABLE t1 ADD UNIQUE (b); +SELECT CONVERT(a, CHAR), CONVERT(b, CHAR) FROM t1 GROUP BY b; +CONVERT(a, CHAR) CONVERT(b, CHAR) +70002 1065 +70001 1085 +70000 1092 +DROP INDEX b ON t1; +SELECT CONVERT(a, CHAR), CONVERT(b, CHAR) FROM t1 GROUP BY b; +CONVERT(a, CHAR) CONVERT(b, CHAR) +70002 1065 +70001 1085 +70000 1092 +ALTER TABLE t1 ADD INDEX (b); +SELECT CONVERT(a, CHAR), CONVERT(b, CHAR) from t1 GROUP BY b; +CONVERT(a, CHAR) CONVERT(b, CHAR) +70002 1065 +70001 1085 +70000 1092 +DROP TABLE t1; +# +# Bug#26474: Add Sinhala script (Sri Lanka) collation to MySQL +# +DROP TABLE IF EXISTS t1; +CREATE TABLE t1 ( +predicted_order int NOT NULL, +utf8mb4_encoding VARCHAR(10) NOT NULL +) CHARACTER SET utf8mb4; +INSERT INTO t1 VALUES (19, x'E0B696'), (30, x'E0B69AE0B798'), (61, x'E0B6AF'), (93, x'E0B799'), (52, x'E0B6A6'), (73, x'E0B6BBE0B78AE2808D'), (3, x'E0B686'), (56, x'E0B6AA'), (55, x'E0B6A9'), (70, x'E0B6B9'), (94, x'E0B79A'), (80, x'E0B785'), (25, x'E0B69AE0B791'), (48, x'E0B6A2'), (13, x'E0B690'), (86, x'E0B793'), (91, x'E0B79F'), (81, x'E0B786'), (79, x'E0B784'), (14, x'E0B691'), (99, x'E0B78A'), (8, x'E0B68B'), (68, x'E0B6B7'), (22, x'E0B69A'), (16, x'E0B693'), (33, x'E0B69AE0B7B3'), (38, x'E0B69AE0B79D'), (21, x'E0B683'), (11, x'E0B68E'), (77, x'E0B782'), (40, x'E0B69AE0B78A'), (101, x'E0B78AE2808DE0B6BB'), (35, x'E0B69AE0B79A'), (1, x'E0B7B4'), (9, x'E0B68C'), (96, x'E0B79C'), (6, x'E0B689'), (95, x'E0B79B'), (88, x'E0B796'), (64, x'E0B6B3'), (26, x'E0B69AE0B792'), (82, x'E0B78F'), (28, x'E0B69AE0B794'), (39, x'E0B69AE0B79E'), (97, x'E0B79D'), (2, x'E0B685'), (75, x'E0B780'), (34, x'E0B69AE0B799'), (69, x'E0B6B8'), (83, x'E0B790'), (18, x'E0B695'), (90, x'E0B7B2'), (17, x'E0B694'), (72, x'E0B6BB'), (66, x'E0B6B5'), (59, x'E0B6AD'), (44, x'E0B69E'), (15, x'E0B692'), (23, x'E0B69AE0B78F'), (65, x'E0B6B4'), (42, x'E0B69C'), (63, x'E0B6B1'), (85, x'E0B792'), (47, x'E0B6A1'), (49, x'E0B6A3'), (92, x'E0B7B3'), (78, x'E0B783'), (36, x'E0B69AE0B79B'), (4, x'E0B687'), (24, x'E0B69AE0B790'), (87, x'E0B794'), (37, x'E0B69AE0B79C'), (32, x'E0B69AE0B79F'), (29, x'E0B69AE0B796'), (43, x'E0B69D'), (62, x'E0B6B0'), (100, x'E0B78AE2808DE0B6BA'), (60, x'E0B6AE'), (45, x'E0B69F'), (12, x'E0B68F'), (46, x'E0B6A0'), (50, x'E0B6A5'), (51, x'E0B6A4'), (5, x'E0B688'), (76, x'E0B781'), (89, x'E0B798'), (74, x'E0B6BD'), (10, x'E0B68D'), (57, x'E0B6AB'), (71, x'E0B6BA'), (58, x'E0B6AC'), (27, x'E0B69AE0B793'), (54, x'E0B6A8'), (84, x'E0B791'), (31, x'E0B69AE0B7B2'), (98, x'E0B79E'), (53, x'E0B6A7'), (41, x'E0B69B'), (67, x'E0B6B6'), (7, x'E0B68A'), (20, x'E0B682'); +SELECT predicted_order, hex(utf8mb4_encoding) FROM t1 ORDER BY utf8mb4_encoding COLLATE utf8mb4_sinhala_ci; +predicted_order hex(utf8mb4_encoding) +1 E0B7B4 +2 E0B685 +3 E0B686 +4 E0B687 +5 E0B688 +6 E0B689 +7 E0B68A +8 E0B68B +9 E0B68C +10 E0B68D +11 E0B68E +12 E0B68F +13 E0B690 +14 E0B691 +15 E0B692 +16 E0B693 +17 E0B694 +18 E0B695 +19 E0B696 +20 E0B682 +21 E0B683 +22 E0B69A +23 E0B69AE0B78F +24 E0B69AE0B790 +25 E0B69AE0B791 +26 E0B69AE0B792 +27 E0B69AE0B793 +28 E0B69AE0B794 +29 E0B69AE0B796 +30 E0B69AE0B798 +31 E0B69AE0B7B2 +32 E0B69AE0B79F +33 E0B69AE0B7B3 +34 E0B69AE0B799 +35 E0B69AE0B79A +36 E0B69AE0B79B +37 E0B69AE0B79C +38 E0B69AE0B79D +39 E0B69AE0B79E +40 E0B69AE0B78A +41 E0B69B +42 E0B69C +43 E0B69D +44 E0B69E +45 E0B69F +46 E0B6A0 +47 E0B6A1 +48 E0B6A2 +49 E0B6A3 +50 E0B6A5 +51 E0B6A4 +52 E0B6A6 +53 E0B6A7 +54 E0B6A8 +55 E0B6A9 +56 E0B6AA +57 E0B6AB +58 E0B6AC +59 E0B6AD +60 E0B6AE +61 E0B6AF +62 E0B6B0 +63 E0B6B1 +64 E0B6B3 +65 E0B6B4 +66 E0B6B5 +67 E0B6B6 +68 E0B6B7 +69 E0B6B8 +70 E0B6B9 +71 E0B6BA +72 E0B6BB +73 E0B6BBE0B78AE2808D +74 E0B6BD +75 E0B780 +76 E0B781 +77 E0B782 +78 E0B783 +79 E0B784 +80 E0B785 +81 E0B786 +82 E0B78F +83 E0B790 +84 E0B791 +85 E0B792 +86 E0B793 +87 E0B794 +88 E0B796 +89 E0B798 +90 E0B7B2 +91 E0B79F +92 E0B7B3 +93 E0B799 +94 E0B79A +95 E0B79B +96 E0B79C +97 E0B79D +98 E0B79E +99 E0B78A +100 E0B78AE2808DE0B6BA +101 E0B78AE2808DE0B6BB +DROP TABLE t1; +# +# Bug#32914 Character sets: illegal characters in utf8mb4 and utf32 columns +# +create table t1 (utf8mb4 char(1) character set utf8mb4); +Testing [F0][90..BF][80..BF][80..BF] +insert into t1 values (0xF0908080); +insert into t1 values (0xF0BFBFBF); +insert into t1 values (0xF08F8080); +Warnings: +Warning 1366 Incorrect string value: '\xF0\x8F\x80\x80' for column 'utf8mb4' at row 1 +select hex(utf8mb4) from t1; +hex(utf8mb4) +F0908080 +F0BFBFBF + +delete from t1; +Testing [F2..F3][80..BF][80..BF][80..BF] +insert into t1 values (0xF2808080); +insert into t1 values (0xF2BFBFBF); +select hex(utf8mb4) from t1; +hex(utf8mb4) +F2808080 +F2BFBFBF +delete from t1; +Testing [F4][80..8F][80..BF][80..BF] +insert into t1 values (0xF4808080); +insert into t1 values (0xF48F8080); +insert into t1 values (0xF4908080); +Warnings: +Warning 1366 Incorrect string value: '\xF4\x90\x80\x80' for column 'utf8mb4' at row 1 +select hex(utf8mb4) from t1; +hex(utf8mb4) +F4808080 +F48F8080 + +drop table t1; +# +# Check strnxfrm() with odd length +# +set max_sort_length=5; +select @@max_sort_length; +@@max_sort_length +5 +create table t1 (a varchar(128) character set utf8mb4 collate utf8mb4_general_ci); +insert into t1 values ('a'),('b'),('c'); +select * from t1 order by a; +a +a +b +c +alter table t1 modify a varchar(128) character set utf8mb4 collate utf8mb4_bin; +select * from t1 order by a; +a +a +b +c +drop table t1; +set max_sort_length=default; +# +# Bug#26180: Can't add columns to tables created with utf8mb4 text indexes +# +CREATE TABLE t1 ( +clipid INT NOT NULL, +Tape TINYTEXT, +PRIMARY KEY (clipid), +KEY tape(Tape(255)) +) CHARACTER SET=utf8mb4; +Warnings: +Warning 1071 Specified key was too long; max key length is 1000 bytes +ALTER TABLE t1 ADD mos TINYINT DEFAULT 0 AFTER clipid; +SHOW CREATE TABLE t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `clipid` int(11) NOT NULL, + `mos` tinyint(4) DEFAULT '0', + `Tape` tinytext, + PRIMARY KEY (`clipid`), + KEY `tape` (`Tape`(250)) +) ENGINE=MyISAM DEFAULT CHARSET=utf8mb4 +DROP TABLE t1; +# +# Testing 4-byte values. +# +DROP TABLE IF EXISTS t1; +CREATE TABLE t1 ( +u_decimal int NOT NULL, +utf8mb4_encoding VARCHAR(10) NOT NULL +) CHARACTER SET utf8mb4; +INSERT INTO t1 VALUES (119040, x'f09d8480'), +# G CLEF +(119070, x'f09d849e'), +# HALF NOTE +(119134, x'f09d859e'), +# MUSICAL SYMBOL CROIX +(119247, x'f09d878f'), +# MATHEMATICAL BOLD ITALIC CAPITAL DELTA +(120607, x'f09d9c9f'), +# SANS-SERIF BOLD ITALIC CAPITAL PI +(120735, x'f09d9e9f'), +# <Plane 16 Private Use, Last> (last 4 byte character) +(1114111, x'f48fbfbf'), +# VARIATION SELECTOR-256 +(917999, x'f3a087af'); +INSERT INTO t1 VALUES (119070, x'f09d849ef09d859ef09d859ef09d8480f09d859ff09d859ff09d859ff09d85a0f09d85a0f09d8480'); +INSERT INTO t1 VALUES (65131, x'efb9abf09d849ef09d859ef09d859ef09d8480f09d859fefb9abefb9abf09d85a0efb9ab'); +INSERT INTO t1 VALUES (119070, x'f09d849ef09d859ef09d859ef09d8480f09d859ff09d859ff09d859ff09d85a0f09d85a0f09d8480f09d85a0'); +Warnings: +Warning 1265 Data truncated for column 'utf8mb4_encoding' at row 1 +SELECT u_decimal, hex(utf8mb4_encoding) FROM t1 ORDER BY utf8mb4_encoding COLLATE utf8mb4_general_ci, BINARY utf8mb4_encoding; +u_decimal hex(utf8mb4_encoding) +65131 EFB9ABF09D849EF09D859EF09D859EF09D8480F09D859FEFB9ABEFB9ABF09D85A0EFB9AB +119040 F09D8480 +119070 F09D849E +119134 F09D859E +119247 F09D878F +120607 F09D9C9F +120735 F09D9E9F +917999 F3A087AF +1114111 F48FBFBF +119070 F09D849EF09D859EF09D859EF09D8480F09D859FF09D859FF09D859FF09D85A0F09D85A0F09D8480 +119070 F09D849EF09D859EF09D859EF09D8480F09D859FF09D859FF09D859FF09D85A0F09D85A0F09D8480 +INSERT INTO t1 VALUES (1114111, x'f5808080'); +Warnings: +Warning 1366 Incorrect string value: '\xF5\x80\x80\x80' for column 'utf8mb4_encoding' at row 1 +DROP TABLE IF EXISTS t2; +CREATE TABLE t2 ( +u_decimal int NOT NULL, +utf8mb3_encoding VARCHAR(10) NOT NULL +) CHARACTER SET utf8mb3; +INSERT INTO t2 VALUES (42856, x'ea9da8'); +INSERT INTO t2 VALUES (65131, x'efb9ab'); +INSERT INTO t2 VALUES (1114111, x'f48fbfbf'); +Warnings: +Warning 1366 Incorrect string value: '\xF4\x8F\xBF\xBF' for column 'utf8mb3_encoding' at row 1 +SHOW CREATE TABLE t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `u_decimal` int(11) NOT NULL, + `utf8mb4_encoding` varchar(10) NOT NULL +) ENGINE=MyISAM DEFAULT CHARSET=utf8mb4 +SHOW CREATE TABLE t2; +Table Create Table +t2 CREATE TABLE `t2` ( + `u_decimal` int(11) NOT NULL, + `utf8mb3_encoding` varchar(10) NOT NULL +) ENGINE=MyISAM DEFAULT CHARSET=utf8 +DROP TABLE t1; +DROP TABLE t2; +# +# Testing that mixing utf8 and utf8mb4 collations returns utf8mb4 +# +SELECT CHARSET(CONCAT(_utf8mb4'a',_utf8'b')); +CHARSET(CONCAT(_utf8mb4'a',_utf8'b')) +utf8mb4 +CREATE TABLE t1 (utf8mb4 VARCHAR(10) CHARACTER SET utf8mb4 NOT NULL); +INSERT INTO t1 VALUES (x'ea9da8'),(x'f48fbfbf'); +SELECT CONCAT(utf8mb4, _utf8 x'ea9da8') FROM t1 LIMIT 0; +CONCAT(utf8mb4, _utf8 x'ea9da8') +CREATE TABLE t2 (utf8mb3 VARCHAR(10) CHARACTER SET utf8mb3 NOT NULL); +INSERT INTO t2 VALUES (x'ea9da8'); +SELECT HEX(CONCAT(utf8mb4, utf8mb3)) FROM t1,t2 ORDER BY 1; +HEX(CONCAT(utf8mb4, utf8mb3)) +EA9DA8EA9DA8 +F48FBFBFEA9DA8 +SELECT CHARSET(CONCAT(utf8mb4, utf8mb3)) FROM t1, t2 LIMIT 1; +CHARSET(CONCAT(utf8mb4, utf8mb3)) +utf8mb4 +CREATE TEMPORARY TABLE t3 AS SELECT *, concat(utf8mb4,utf8mb3) FROM t1, t2; +SHOW CREATE TABLE t3; +Table Create Table +t3 CREATE TEMPORARY TABLE `t3` ( + `utf8mb4` varchar(10) CHARACTER SET utf8mb4 NOT NULL, + `utf8mb3` varchar(10) CHARACTER SET utf8 NOT NULL, + `concat(utf8mb4,utf8mb3)` varchar(20) CHARACTER SET utf8mb4 NOT NULL DEFAULT '' +) ENGINE=MyISAM DEFAULT CHARSET=latin1 +DROP TEMPORARY TABLE t3; +SELECT * FROM t1, t2 WHERE t1.utf8mb4 > t2.utf8mb3; +utf8mb4 utf8mb3 +ô¿¿ ê¨ +SELECT * FROM t1, t2 WHERE t1.utf8mb4 = t2.utf8mb3; +utf8mb4 utf8mb3 +ê¨ ê¨ +SELECT * FROM t1, t2 WHERE t1.utf8mb4 < t2.utf8mb3; +utf8mb4 utf8mb3 +DROP TABLE t1; +DROP TABLE t2; +# +# Check that mixing utf8mb4 with an invalid utf8 constant returns error +# +CREATE TABLE t1 (utf8mb4 VARCHAR(10) CHARACTER SET utf8mb4); +INSERT INTO t1 VALUES (x'f48fbfbf'); +SELECT CONCAT(utf8mb4, _utf8 '¿') FROM t1; +ERROR HY000: Illegal mix of collations (utf8mb4_general_ci,IMPLICIT) and (utf8_general_ci,COERCIBLE) for operation 'concat' +SELECT CONCAT('a', _utf8 '¿') FROM t1; +ERROR HY000: Illegal mix of collations (utf8mb4_general_ci,COERCIBLE) and (utf8_general_ci,COERCIBLE) for operation 'concat' +DROP TABLE t1; +# +# End of 5.5 tests +# +# +# End of tests +# diff --git a/mysql-test/r/have_utf16.require b/mysql-test/r/have_utf16.require new file mode 100644 index 00000000000..a1b5469cad7 --- /dev/null +++ b/mysql-test/r/have_utf16.require @@ -0,0 +1,2 @@ +Collation Charset Id Default Compiled Sortlen +utf16_general_ci utf16 54 Yes Yes 1 diff --git a/mysql-test/r/have_utf32.require b/mysql-test/r/have_utf32.require new file mode 100644 index 00000000000..710ddb8090c --- /dev/null +++ b/mysql-test/r/have_utf32.require @@ -0,0 +1,2 @@ +Collation Charset Id Default Compiled Sortlen +utf32_general_ci utf32 60 Yes Yes 1 diff --git a/mysql-test/r/have_utf8mb4.require b/mysql-test/r/have_utf8mb4.require new file mode 100644 index 00000000000..1669b332d0d --- /dev/null +++ b/mysql-test/r/have_utf8mb4.require @@ -0,0 +1,2 @@ +Collation Charset Id Default Compiled Sortlen +utf8mb4_general_ci utf8mb4 45 Yes Yes 1 diff --git a/mysql-test/std_data/Index.xml b/mysql-test/std_data/Index.xml index e4964a33419..27358fefd54 100644 --- a/mysql-test/std_data/Index.xml +++ b/mysql-test/std_data/Index.xml @@ -33,6 +33,36 @@ </collation> </charset> + + <charset name="utf8mb4"> + <collation name="utf8mb4_test_ci" id="326"> + <rules> + <reset>a</reset> + <s>b</s> + </rules> + </collation> + </charset> + + <charset name="utf16"> + <collation name="utf16_test_ci" id="327"> + <rules> + <reset>a</reset> + <s>b</s> + </rules> + </collation> + </charset> + + + <charset name="utf32"> + <collation name="utf32_test_ci" id="391"> + <rules> + <reset>a</reset> + <s>b</s> + </rules> + </collation> + </charset> + + <charset name="ucs2"> <collation name="ucs2_test_ci" id="358"> <rules> diff --git a/mysql-test/suite/sys_vars/r/character_set_client_basic.result b/mysql-test/suite/sys_vars/r/character_set_client_basic.result index 78993015a24..ed3612627e6 100644 --- a/mysql-test/suite/sys_vars/r/character_set_client_basic.result +++ b/mysql-test/suite/sys_vars/r/character_set_client_basic.result @@ -162,8 +162,16 @@ SET @@character_set_client = utf8; SELECT @@character_set_client; @@character_set_client utf8 +SET @@character_set_client = utf8mb4; +SELECT @@character_set_client; +@@character_set_client +utf8mb4 SET @@character_set_client = ucs2; ERROR 42000: Variable 'character_set_client' can't be set to the value of 'ucs2' +SET @@character_set_client = utf16; +ERROR 42000: Variable 'character_set_client' can't be set to the value of 'utf16' +SET @@character_set_client = utf32; +ERROR 42000: Variable 'character_set_client' can't be set to the value of 'utf32' SET @@character_set_client = cp866; SELECT @@character_set_client; @@character_set_client @@ -422,7 +430,7 @@ ERROR 42000: Unknown character set: '100' SET @total_charset = (SELECT count(*) FROM INFORMATION_SCHEMA.CHARACTER_SETS); SELECT @total_charset; @total_charset -36 +39 '#--------------------FN_DYNVARS_010_10-------------------------#' SET @@character_set_client = abc; ERROR 42000: Unknown character set: 'abc' diff --git a/mysql-test/suite/sys_vars/r/character_set_connection_basic.result b/mysql-test/suite/sys_vars/r/character_set_connection_basic.result index dd1052d27ef..feb9073e175 100644 --- a/mysql-test/suite/sys_vars/r/character_set_connection_basic.result +++ b/mysql-test/suite/sys_vars/r/character_set_connection_basic.result @@ -424,7 +424,7 @@ ERROR 42000: Unknown character set: '100' SET @total_charset = (SELECT count(*) FROM INFORMATION_SCHEMA.CHARACTER_SETS); SELECT @total_charset; @total_charset -36 +39 '#--------------------FN_DYNVARS_011_10-------------------------#' SET @@character_set_connection = abc; ERROR 42000: Unknown character set: 'abc' diff --git a/mysql-test/suite/sys_vars/r/character_set_database_basic.result b/mysql-test/suite/sys_vars/r/character_set_database_basic.result index 9aa449816f5..669af6a610c 100644 --- a/mysql-test/suite/sys_vars/r/character_set_database_basic.result +++ b/mysql-test/suite/sys_vars/r/character_set_database_basic.result @@ -424,7 +424,7 @@ ERROR 42000: Unknown character set: '100' SET @total_charset = (SELECT count(*) FROM INFORMATION_SCHEMA.CHARACTER_SETS); SELECT @total_charset; @total_charset -36 +39 '#--------------------FN_DYNVARS_012_10-------------------------#' SET @@character_set_database = "grek"; ERROR 42000: Unknown character set: 'grek' diff --git a/mysql-test/suite/sys_vars/r/character_set_filesystem_basic.result b/mysql-test/suite/sys_vars/r/character_set_filesystem_basic.result index e33df353c74..e8d41d99437 100644 --- a/mysql-test/suite/sys_vars/r/character_set_filesystem_basic.result +++ b/mysql-test/suite/sys_vars/r/character_set_filesystem_basic.result @@ -402,7 +402,7 @@ ERROR 42000: Unknown character set: '100' SET @total_charset = (SELECT count(*) FROM INFORMATION_SCHEMA.CHARACTER_SETS); SELECT @total_charset; @total_charset -36 +39 '#--------------------FN_DYNVARS_008_10-------------------------#' SET @@character_set_filesystem = abc; ERROR 42000: Unknown character set: 'abc' diff --git a/mysql-test/suite/sys_vars/r/character_set_results_basic.result b/mysql-test/suite/sys_vars/r/character_set_results_basic.result Binary files differindex 0b59f60cd45..6b50f128af8 100644 --- a/mysql-test/suite/sys_vars/r/character_set_results_basic.result +++ b/mysql-test/suite/sys_vars/r/character_set_results_basic.result diff --git a/mysql-test/suite/sys_vars/t/character_set_client_basic.test b/mysql-test/suite/sys_vars/t/character_set_client_basic.test index 26912e89509..7b3d36fb16f 100644 --- a/mysql-test/suite/sys_vars/t/character_set_client_basic.test +++ b/mysql-test/suite/sys_vars/t/character_set_client_basic.test @@ -27,6 +27,9 @@ --source include/have_sjis.inc --source include/have_utf8.inc --source include/have_ucs2.inc +--source include/have_utf8mb4.inc +--source include/have_utf16.inc +--source include/have_utf32.inc --source include/load_sysvars.inc ################################################### @@ -163,9 +166,15 @@ SET @@character_set_client = armscii8; SELECT @@character_set_client; SET @@character_set_client = utf8; SELECT @@character_set_client; +SET @@character_set_client = utf8mb4; +SELECT @@character_set_client; --error ER_WRONG_VALUE_FOR_VAR SET @@character_set_client = ucs2; +--error ER_WRONG_VALUE_FOR_VAR +SET @@character_set_client = utf16; +--error ER_WRONG_VALUE_FOR_VAR +SET @@character_set_client = utf32; SET @@character_set_client = cp866; SELECT @@character_set_client; diff --git a/mysql-test/suite/sys_vars/t/character_set_connection_basic.test b/mysql-test/suite/sys_vars/t/character_set_connection_basic.test index 1a51843e0e8..e402dbb0ff5 100644 --- a/mysql-test/suite/sys_vars/t/character_set_connection_basic.test +++ b/mysql-test/suite/sys_vars/t/character_set_connection_basic.test @@ -27,6 +27,9 @@ --source include/have_sjis.inc --source include/have_utf8.inc --source include/have_ucs2.inc +--source include/have_utf8mb4.inc +--source include/have_utf16.inc +--source include/have_utf32.inc --source include/load_sysvars.inc ################################################### diff --git a/mysql-test/suite/sys_vars/t/character_set_database_basic.test b/mysql-test/suite/sys_vars/t/character_set_database_basic.test index 07b4b31b25c..4e650dc6139 100644 --- a/mysql-test/suite/sys_vars/t/character_set_database_basic.test +++ b/mysql-test/suite/sys_vars/t/character_set_database_basic.test @@ -27,6 +27,9 @@ --source include/have_sjis.inc --source include/have_utf8.inc --source include/have_ucs2.inc +--source include/have_utf8mb4.inc +--source include/have_utf16.inc +--source include/have_utf32.inc --source include/load_sysvars.inc ################################################### diff --git a/mysql-test/suite/sys_vars/t/character_set_filesystem_basic.test b/mysql-test/suite/sys_vars/t/character_set_filesystem_basic.test index 928d02ee46b..6bb2de35429 100644 --- a/mysql-test/suite/sys_vars/t/character_set_filesystem_basic.test +++ b/mysql-test/suite/sys_vars/t/character_set_filesystem_basic.test @@ -27,6 +27,9 @@ --source include/have_sjis.inc --source include/have_utf8.inc --source include/have_ucs2.inc +--source include/have_utf8mb4.inc +--source include/have_utf16.inc +--source include/have_utf32.inc --source include/load_sysvars.inc diff --git a/mysql-test/suite/sys_vars/t/character_set_results_basic.test b/mysql-test/suite/sys_vars/t/character_set_results_basic.test index 98635c00287..1bede9e493c 100644 --- a/mysql-test/suite/sys_vars/t/character_set_results_basic.test +++ b/mysql-test/suite/sys_vars/t/character_set_results_basic.test @@ -27,6 +27,9 @@ --source include/have_sjis.inc --source include/have_utf8.inc --source include/have_ucs2.inc +--source include/have_utf8mb4.inc +--source include/have_utf16.inc +--source include/have_utf32.inc --source include/load_sysvars.inc ################################################ diff --git a/mysql-test/t/ctype_ldml.test b/mysql-test/t/ctype_ldml.test index 99d9cd5a866..d2f06a84a8c 100644 --- a/mysql-test/t/ctype_ldml.test +++ b/mysql-test/t/ctype_ldml.test @@ -1,4 +1,7 @@ --source include/have_ucs2.inc +--source include/have_utf8mb4.inc +--source include/have_utf16.inc +--source include/have_utf32.inc --disable_warnings drop table if exists t1; @@ -40,6 +43,24 @@ insert into t1 values ('a'); select * from t1 where c1='b'; drop table t1; +show collation like 'utf8mb4_test_ci'; +create table t1 (c1 char(1) character set utf8mb4 collate utf8mb4_test_ci); +insert into t1 values ('a'); +select * from t1 where c1='b'; +drop table t1; + +show collation like 'utf16_test_ci'; +create table t1 (c1 char(1) character set utf16 collate utf16_test_ci); +insert into t1 values ('a'); +select * from t1 where c1='b'; +drop table t1; + +show collation like 'utf32_test_ci'; +create table t1 (c1 char(1) character set utf32 collate utf32_test_ci); +insert into t1 values ('a'); +select * from t1 where c1='b'; +drop table t1; + # # Bug#41084 full-text index added to custom UCA collation not working diff --git a/mysql-test/t/ctype_many.test b/mysql-test/t/ctype_many.test index 0903c3dd7fa..67726c53585 100644 --- a/mysql-test/t/ctype_many.test +++ b/mysql-test/t/ctype_many.test @@ -1,4 +1,7 @@ -- source include/have_ucs2.inc +-- source include/have_utf8mb4.inc +-- source include/have_utf16.inc +-- source include/have_utf32.inc --disable_warnings DROP TABLE IF EXISTS t1; @@ -211,3 +214,73 @@ SELECT min(comment),count(*) FROM t1 GROUP BY ucs2_f; DROP TABLE t1; # End of 4.1 tests + + +--echo # +--echo # WL#1213 Implement 4-byte UTF8, UTF16 and UTF32 +--echo # Testing that only utf8mb4 is superset for utf8 +--echo # No other Unicode character set pairs have superset/subset relations +--echo # + +CREATE TABLE t1 ( + utf8 CHAR CHARACTER SET utf8, + utf8mb4 CHAR CHARACTER SET utf8mb4, + ucs2 CHAR CHARACTER SET ucs2, + utf16 CHAR CHARACTER SET utf16, + utf32 CHAR CHARACTER SET utf32 +); +INSERT INTO t1 VALUES ('','','','',''); + +# utf8mb4 is superset only for utf8 +SELECT CHARSET(CONCAT(utf8, utf8mb4)) FROM t1; +--error ER_CANT_AGGREGATE_2COLLATIONS +SELECT CHARSET(CONCAT(utf8, ucs2)) FROM t1; +--error ER_CANT_AGGREGATE_2COLLATIONS +SELECT CHARSET(CONCAT(utf8, utf16)) FROM t1; +--error ER_CANT_AGGREGATE_2COLLATIONS +SELECT CHARSET(CONCAT(utf8, utf32)) FROM t1; + + +# utf8mb4 is superset only for utf8 +SELECT CHARSET(CONCAT(utf8mb4, utf8)) FROM t1; +--error ER_CANT_AGGREGATE_2COLLATIONS +SELECT CHARSET(CONCAT(utf8mb4, ucs2)) FROM t1; +--error ER_CANT_AGGREGATE_2COLLATIONS +SELECT CHARSET(CONCAT(utf8mb4, utf16)) FROM t1; +--error ER_CANT_AGGREGATE_2COLLATIONS +SELECT CHARSET(CONCAT(utf8mb4, utf32)) FROM t1; + + +# ucs2 is not a superset for the other Unicode character sets +--error ER_CANT_AGGREGATE_2COLLATIONS +SELECT CHARSET(CONCAT(ucs2, utf8)) FROM t1; +--error ER_CANT_AGGREGATE_2COLLATIONS +SELECT CHARSET(CONCAT(ucs2, utf8mb4)) FROM t1; +--error ER_CANT_AGGREGATE_2COLLATIONS +SELECT CHARSET(CONCAT(ucs2, utf16)) FROM t1; +--error ER_CANT_AGGREGATE_2COLLATIONS +SELECT CHARSET(CONCAT(ucs2, utf32)) FROM t1; + + +# utf16 is not a superset for the other Unicode character sets +--error ER_CANT_AGGREGATE_2COLLATIONS +SELECT CHARSET(CONCAT(utf16, utf8)) FROM t1; +--error ER_CANT_AGGREGATE_2COLLATIONS +SELECT CHARSET(CONCAT(utf16, ucs2)) FROM t1; +--error ER_CANT_AGGREGATE_2COLLATIONS +SELECT CHARSET(CONCAT(utf16, utf8mb4)) FROM t1; +--error ER_CANT_AGGREGATE_2COLLATIONS +SELECT CHARSET(CONCAT(utf16, utf32)) FROM t1; + + +# utf32 is not a superset for the other Unicode character sets +--error ER_CANT_AGGREGATE_2COLLATIONS +SELECT CHARSET(CONCAT(utf32, utf8)) FROM t1; +--error ER_CANT_AGGREGATE_2COLLATIONS +SELECT CHARSET(CONCAT(utf32, ucs2)) FROM t1; +--error ER_CANT_AGGREGATE_2COLLATIONS +SELECT CHARSET(CONCAT(utf32, utf8mb4)) FROM t1; +--error ER_CANT_AGGREGATE_2COLLATIONS +SELECT CHARSET(CONCAT(utf32, utf16)) FROM t1; + +DROP TABLE t1; diff --git a/mysql-test/t/ctype_utf16.test b/mysql-test/t/ctype_utf16.test new file mode 100644 index 00000000000..2e0a9e644b1 --- /dev/null +++ b/mysql-test/t/ctype_utf16.test @@ -0,0 +1,731 @@ +-- source include/have_utf16.inc + +--disable_warnings +DROP TABLE IF EXISTS t1; +--enable_warnings + +--echo # +--echo # Start of 5.5 tests +--echo # + +SET NAMES latin1; +SET character_set_connection=utf16; +select hex('a'), hex('a '); +-- source include/endspace.inc + + +# Check that incomplete utf16 characters in HEX notation +# are left-padded with zeros +# +select hex(_utf16 0x44); +select hex(_utf16 0x3344); +select hex(_utf16 0x113344); + + +# Check that 0x20 is only trimmed when it is +# a part of real SPACE character, not just a part +# of a multibyte sequence. +# Note, CYRILLIC LETTER ER is used as an example, which +# is stored as 0x0420 in utf16, thus contains 0x20 in the +# low byte. The second character is THREE-PER-M, U+2004, +# which contains 0x20 in the high byte. + +CREATE TABLE t1 (word VARCHAR(64), word2 CHAR(64)) CHARACTER SET utf16; +INSERT INTO t1 VALUES (_koi8r 0xF2, _koi8r 0xF2), (X'2004',X'2004'); +SELECT hex(word) FROM t1 ORDER BY word; +SELECT hex(word2) FROM t1 ORDER BY word2; +DELETE FROM t1; + +# +# Check that real spaces are correctly trimmed. +# +INSERT INTO t1 VALUES (X'042000200020',X'042000200020'), (X'200400200020', X'200400200020'); +SELECT hex(word) FROM t1 ORDER BY word; +SELECT hex(word2) FROM t1 ORDER BY word2; +DROP TABLE t1; + + +# +# Check LPAD/RPAD +# +SELECT hex(LPAD(_utf16 X'0420',10,_utf16 X'0421')); +SELECT hex(LPAD(_utf16 X'0420',10,_utf16 X'04210422')); +SELECT hex(LPAD(_utf16 X'0420',10,_utf16 X'042104220423')); +SELECT hex(LPAD(_utf16 X'0420042104220423042404250426042704280429042A042B',10,_utf16 X'042104220423')); +SELECT hex(LPAD(_utf16 X'D800DC00', 10, _utf16 X'0421')); +SELECT hex(LPAD(_utf16 X'0421', 10, _utf16 X'D800DC00')); + +SELECT hex(RPAD(_utf16 X'0420',10,_utf16 X'0421')); +SELECT hex(RPAD(_utf16 X'0420',10,_utf16 X'04210422')); +SELECT hex(RPAD(_utf16 X'0420',10,_utf16 X'042104220423')); +SELECT hex(RPAD(_utf16 X'0420042104220423042404250426042704280429042A042B',10,_utf16 X'042104220423')); +SELECT hex(RPAD(_utf16 X'D800DC00', 10, _utf16 X'0421')); +SELECT hex(RPAD(_utf16 X'0421', 10, _utf16 X'D800DC00')); + +CREATE TABLE t1 SELECT +LPAD(_utf16 X'0420',10,_utf16 X'0421') l, +RPAD(_utf16 X'0420',10,_utf16 X'0421') r; +SHOW CREATE TABLE t1; +select hex(l), hex(r) from t1; +DROP TABLE t1; + +create table t1 (f1 char(30)); +insert into t1 values ("103000"), ("22720000"), ("3401200"), ("78000"); +select lpad(f1, 12, "-o-/") from t1; +drop table t1; + +###################################################### +# +# Test of like +# + +SET NAMES latin1; +SET character_set_connection=utf16; +--source include/ctype_like.inc + +SET NAMES utf8; +SET character_set_connection=utf16; +CREATE TABLE t1 (a VARCHAR(10) CHARACTER SET utf16); +INSERT INTO t1 VALUES ('фыва'),('Фыва'),('фЫва'),('фыВа'),('фывÐ'),('ФЫВÐ'); +INSERT INTO t1 VALUES ('фывапролдж'),('Фывапролдж'),('фЫвапролдж'),('фыВапролдж'); +INSERT INTO t1 VALUES ('фывÐпролдж'),('фываПролдж'),('фывапРолдж'),('фывапрОлдж'); +INSERT INTO t1 VALUES ('фывапроЛдж'),('фывапролДж'),('фывапролдЖ'),('ФЫВÐПРОЛДЖ'); +SELECT * FROM t1 WHERE a LIKE '%фЫва%' ORDER BY BINARY a; +SELECT * FROM t1 WHERE a LIKE '%фЫв%' ORDER BY BINARY a; +SELECT * FROM t1 WHERE a LIKE 'фЫва%' ORDER BY BINARY a; +SELECT * FROM t1 WHERE a LIKE 'фЫва%' COLLATE utf16_bin ORDER BY BINARY a; +DROP TABLE t1; + +CREATE TABLE t1 (word varchar(64) NOT NULL, PRIMARY KEY (word)) +ENGINE=MyISAM CHARACTER SET utf16; +INSERT INTO t1 (word) VALUES ("cat"); +SELECT * FROM t1 WHERE word LIKE "c%"; +SELECT * FROM t1 WHERE word LIKE "ca_"; +SELECT * FROM t1 WHERE word LIKE "cat"; +SELECT * FROM t1 WHERE word LIKE _utf16 x'00630025'; # "c%" +SELECT * FROM t1 WHERE word LIKE _utf16 x'00630061005F'; # "ca_" +DROP TABLE t1; + + +# +# Check that INSERT() works fine. +# This invokes charpos() function. +select insert(_utf16 0x006100620063,10,2,_utf16 0x006400650066); +select insert(_utf16 0x006100620063,1,2,_utf16 0x006400650066); + +######################################################## +# +# Bug 1264 +# +# Description: +# +# When using a ucs2 table in MySQL, +# either with ucs2_general_ci or ucs2_bin collation, +# words are returned in an incorrect order when using ORDER BY +# on an _indexed_ CHAR or VARCHAR column. They are sorted with +# the longest word *first* instead of last. I.E. The word "aardvark" +# is in the results before the word "a". +# +# If there is no index for the column, the problem does not occur. +# +# Interestingly, if there is no second column, the words are returned +# in the correct order. +# +# According to EXPLAIN, it looks like when the output includes columns that +# are not part of the index sorted on, it does a filesort, which fails. +# Using a straight index yields correct results. + +SET NAMES latin1; + +# +# Two fields, index +# + +CREATE TABLE t1 ( + word VARCHAR(64), + bar INT(11) default 0, + PRIMARY KEY (word)) + ENGINE=MyISAM + CHARSET utf16 + COLLATE utf16_general_ci ; + +INSERT INTO t1 (word) VALUES ("aar"); +INSERT INTO t1 (word) VALUES ("a"); +INSERT INTO t1 (word) VALUES ("aardvar"); +INSERT INTO t1 (word) VALUES ("aardvark"); +INSERT INTO t1 (word) VALUES ("aardvara"); +INSERT INTO t1 (word) VALUES ("aardvarz"); +EXPLAIN SELECT * FROM t1 ORDER BY word; +SELECT * FROM t1 ORDER BY word; +EXPLAIN SELECT word FROM t1 ORDER BY word; +SELECT word FROM t1 ORDER by word; +DROP TABLE t1; + + +# +# One field, index +# + +CREATE TABLE t1 ( + word VARCHAR(64) , + PRIMARY KEY (word)) + ENGINE=MyISAM + CHARSET utf16 + COLLATE utf16_general_ci; + +INSERT INTO t1 (word) VALUES ("aar"); +INSERT INTO t1 (word) VALUES ("a"); +INSERT INTO t1 (word) VALUES ("aardvar"); +INSERT INTO t1 (word) VALUES ("aardvark"); +INSERT INTO t1 (word) VALUES ("aardvara"); +INSERT INTO t1 (word) VALUES ("aardvarz"); +EXPLAIN SELECT * FROM t1 ORDER BY WORD; +SELECT * FROM t1 ORDER BY word; +DROP TABLE t1; + + +# +# Two fields, no index +# + +CREATE TABLE t1 ( + word TEXT, + bar INT(11) AUTO_INCREMENT, + PRIMARY KEY (bar)) + ENGINE=MyISAM + CHARSET utf16 + COLLATE utf16_general_ci ; +INSERT INTO t1 (word) VALUES ("aar"); +INSERT INTO t1 (word) VALUES ("a" ); +INSERT INTO t1 (word) VALUES ("aardvar"); +INSERT INTO t1 (word) VALUES ("aardvark"); +INSERT INTO t1 (word) VALUES ("aardvara"); +INSERT INTO t1 (word) VALUES ("aardvarz"); +EXPLAIN SELECT * FROM t1 ORDER BY word; +SELECT * FROM t1 ORDER BY word; +EXPLAIN SELECT word FROM t1 ORDER BY word; +SELECT word FROM t1 ORDER BY word; +DROP TABLE t1; + +# +# END OF Bug 1264 test +# +######################################################## + + +# +# Check alignment for from-binary-conversion with CAST and CONVERT +# +SELECT hex(cast(0xAA as char character set utf16)); +SELECT hex(convert(0xAA using utf16)); + +# +# Check alignment for string types +# +CREATE TABLE t1 (a char(10) character set utf16); +INSERT INTO t1 VALUES (0x1),(0x11),(0x111),(0x1111),(0x11111); +SELECT HEX(a) FROM t1; +DROP TABLE t1; + +CREATE TABLE t1 (a varchar(10) character set utf16); +INSERT INTO t1 VALUES (0x1),(0x11),(0x111),(0x1111),(0x11111); +SELECT HEX(a) FROM t1; +DROP TABLE t1; + +CREATE TABLE t1 (a text character set utf16); +INSERT INTO t1 VALUES (0x1),(0x11),(0x111),(0x1111),(0x11111); +SELECT HEX(a) FROM t1; +DROP TABLE t1; + +CREATE TABLE t1 (a mediumtext character set utf16); +INSERT INTO t1 VALUES (0x1),(0x11),(0x111),(0x1111),(0x11111); +SELECT HEX(a) FROM t1; +DROP TABLE t1; + +CREATE TABLE t1 (a longtext character set utf16); +INSERT INTO t1 VALUES (0x1),(0x11),(0x111),(0x1111),(0x11111); +SELECT HEX(a) FROM t1; +DROP TABLE t1; + +## +## Bug #5024 Server crashes with queries on fields +## with certain charset/collation settings +## +## +#create table t1 (s1 char character set utf16 collate utf16_czech_ci); +#insert into t1 values ('0'),('1'),('2'),('a'),('b'),('c'); +#select s1 from t1 where s1 > 'a' order by s1; +#drop table t1; +# + +# +# Bug #5081 : UCS2 fields are filled with '0x2020' +# after extending field length +# +create table t1(a char(1)) default charset utf16; +insert into t1 values ('a'),('b'),('c'); +alter table t1 modify a char(5); +select a, hex(a) from t1; +drop table t1; + +# +# Check prepare statement from an UTF16 string +# +set @ivar= 1234; +set @str1 = 'select ?'; +set @str2 = convert(@str1 using utf16); +prepare stmt1 from @str2; +execute stmt1 using @ivar; + +# +# Check that utf16 works with ENUM and SET type +# +set names utf8; +create table t1 (a enum('x','y','z') character set utf16); +show create table t1; +insert into t1 values ('x'); +insert into t1 values ('y'); +insert into t1 values ('z'); +select a, hex(a) from t1 order by a; +alter table t1 change a a enum('x','y','z','d','e','ä','ö','ü') character set utf16; +show create table t1; +insert into t1 values ('D'); +insert into t1 values ('E '); +insert into t1 values ('ä'); +insert into t1 values ('ö'); +insert into t1 values ('ü'); +select a, hex(a) from t1 order by a; +drop table t1; + +create table t1 (a set ('x','y','z','ä','ö','ü') character set utf16); +show create table t1; +insert into t1 values ('x'); +insert into t1 values ('y'); +insert into t1 values ('z'); +insert into t1 values ('x,y'); +insert into t1 values ('x,y,z,ä,ö,ü'); +select a, hex(a) from t1 order by a; +drop table t1; + +# +# Bug#7302 UCS2 data in ENUM fields get truncated when new column is added +# +create table t1(a enum('a','b','c')) default character set utf16; +insert into t1 values('a'),('b'),('c'); +alter table t1 add b char(1); +show warnings; +select * from t1 order by a; +drop table t1; + +SET NAMES latin1; +SET collation_connection='utf16_general_ci'; +-- source include/ctype_filesort.inc +-- source include/ctype_like_escape.inc +SET NAMES latin1; +SET collation_connection='utf16_bin'; +-- source include/ctype_filesort.inc +-- source include/ctype_like_escape.inc + +# +# Bug#10344 Some string functions fail for UCS2 +# +select hex(substr(_utf16 0x00e400e50068,1)); +select hex(substr(_utf16 0x00e400e50068,2)); +select hex(substr(_utf16 0x00e400e50068,3)); +select hex(substr(_utf16 0x00e400e50068,-1)); +select hex(substr(_utf16 0x00e400e50068,-2)); +select hex(substr(_utf16 0x00e400e50068,-3)); +select hex(substr(_utf16 0x00e400e5D800DC00,1)); +select hex(substr(_utf16 0x00e400e5D800DC00,2)); +select hex(substr(_utf16 0x00e400e5D800DC00,3)); +select hex(substr(_utf16 0x00e400e5D800DC00,-1)); +select hex(substr(_utf16 0x00e400e5D800DC00,-2)); +select hex(substr(_utf16 0x00e400e5D800DC00,-3)); + +SET NAMES latin1; + +## +## Bug#8235 +## +## This bug also helped to find another problem that +## INSERT of a UCS2 string containing a negative number +## into a unsigned int column didn't produce warnings. +## This test covers both problems. +## +##SET collation_connection='ucs2_swedish_ci'; +##CREATE TABLE t1 (Field1 int(10) default '0'); +### no warnings, negative numbers are allowed +##INSERT INTO t1 VALUES ('-1'); +##SELECT * FROM t1; +##DROP TABLE t1; +##CREATE TABLE t1 (Field1 int(10) unsigned default '0'); +### this should generate a "Data truncated" warning +##INSERT INTO t1 VALUES ('-1'); +##DROP TABLE t1; +##SET NAMES latin1; + +### +### Bug #14583 Bug on query using a LIKE on indexed field with ucs2_bin collation +### +##--disable_warnings +##create table t1(f1 varchar(5) CHARACTER SET utf16 COLLATE utf16_bin NOT NULL) engine=InnoDB; +##--enable_warnings +##insert into t1 values('a'); +##create index t1f1 on t1(f1); +##select f1 from t1 where f1 like 'a%'; +##drop table t1; + +# +# Bug#9442 Set parameter make query fail if column character set is UCS2 +# +create table t1 (utext varchar(20) character set utf16); +insert into t1 values ("lily"); +insert into t1 values ("river"); +prepare stmt from 'select utext from t1 where utext like ?'; +set @param1='%%'; +execute stmt using @param1; +execute stmt using @param1; +select utext from t1 where utext like '%%'; +drop table t1; +deallocate prepare stmt; + +# +# Bug#22052 Trailing spaces are not removed from UNICODE fields in an index +# +create table t1 ( + a char(10) character set utf16 not null, + index a (a) +) engine=myisam; +insert into t1 values (repeat(0x201f, 10)); +insert into t1 values (repeat(0x2020, 10)); +insert into t1 values (repeat(0x2021, 10)); +# make sure "index read" is used +explain select hex(a) from t1 order by a; +select hex(a) from t1 order by a; +alter table t1 drop index a; +select hex(a) from t1 order by a; +drop table t1; + +## +## Bug #20076: server crashes for a query with GROUP BY if MIN/MAX aggregation +## over a 'ucs2' field uses a temporary table +## +##CREATE TABLE t1 (id int, s char(5) CHARACTER SET ucs2 COLLATE ucs2_unicode_ci); +##INSERT INTO t1 VALUES (1, 'ZZZZZ'), (1, 'ZZZ'), (2, 'ZZZ'), (2, 'ZZZZZ'); +##SELECT id, MIN(s) FROM t1 GROUP BY id; +##DROP TABLE t1; + +### +### Bug #20536: md5() with GROUP BY and UCS2 return different results on myisam/innodb +### +## +##--disable_warnings +##drop table if exists bug20536; +##--enable_warnings +## +##set names latin1; +##create table bug20536 (id bigint not null auto_increment primary key, name +##varchar(255) character set ucs2 not null); +##insert into `bug20536` (`id`,`name`) values (1, _latin1 x'7465737431'), (2, "'test\\_2'"); +##select md5(name) from bug20536; +##select sha1(name) from bug20536; +##select make_set(3, name, upper(name)) from bug20536; +##select export_set(5, name, upper(name)) from bug20536; +##select export_set(5, name, upper(name), ",", 5) from bug20536; + +# +# Bug #20108: corrupted default enum value for a ucs2 field +# + +CREATE TABLE t1 ( + status enum('active','passive') character set utf16 collate utf16_general_ci + NOT NULL default 'passive' +); +SHOW CREATE TABLE t1; +ALTER TABLE t1 ADD a int NOT NULL AFTER status; +SHOW CREATE TABLE t1; +DROP TABLE t1; + +##CREATE TABLE t2 ( +## status enum('active','passive') collate ucs2_turkish_ci +## NOT NULL default 'passive' +##); +##SHOW CREATE TABLE t2; +##ALTER TABLE t2 ADD a int NOT NULL AFTER status; +##DROP TABLE t2; + + +--echo End of 4.1 tests + +# +# Conversion from an UTF16 string to a decimal column +# +CREATE TABLE t1 (a varchar(64) character set utf16, b decimal(10,3)); +INSERT INTO t1 VALUES ("1.1", 0), ("2.1", 0); +update t1 set b=a; +SELECT *, hex(a) FROM t1; +DROP TABLE t1; + +# +# Bug#9442 Set parameter make query fail if column character set is UCS2 +# +create table t1 (utext varchar(20) character set utf16); +insert into t1 values ("lily"); +insert into t1 values ("river"); +prepare stmt from 'select utext from t1 where utext like ?'; +set @param1='%%'; +execute stmt using @param1; +execute stmt using @param1; +select utext from t1 where utext like '%%'; +drop table t1; +deallocate prepare stmt; + +# +# Bug#22638 SOUNDEX broken for international characters +# +set names latin1; +set character_set_connection=utf16; +select soundex(''),soundex('he'),soundex('hello all folks'),soundex('#3556 in bugdb'); +select hex(soundex('')),hex(soundex('he')),hex(soundex('hello all folks')),hex(soundex('#3556 in bugdb')); +select 'mood' sounds like 'mud'; +# Cyrillic A, BE, VE +select hex(soundex(_utf16 0x041004110412)); +# Make sure that "U+00BF INVERTED QUESTION MARK" is not considered as letter +select hex(soundex(_utf16 0x00BF00C0)); +set names latin1; + +# +# Bug #14290: character_maximum_length for text fields +# +create table t1(a blob, b text charset utf16); +select data_type, character_octet_length, character_maximum_length + from information_schema.columns where table_name='t1'; +drop table t1; + + +set names latin1; +set collation_connection=utf16_general_ci; +# +# Testing cs->coll->instr() +# +select position('bb' in 'abba'); + +# +# Testing cs->coll->hash_sort() +# +create table t1 (a varchar(10) character set utf16) engine=heap; +insert into t1 values ('a'),('A'),('b'),('B'); +select * from t1 where a='a' order by binary a; +select hex(min(binary a)),count(*) from t1 group by a; +drop table t1; + +# +# Testing cs->cset->numchars() +# +select char_length('abcd'), octet_length('abcd'); +select char_length(_utf16 0xD800DC00), octet_length(_utf16 0xD800DC00); +select char_length(_utf16 0xD87FDFFF), octet_length(_utf16 0xD87FDFFF); + +# +# Testing cs->cset->charpos() +# +select left('abcd',2); +select hex(left(_utf16 0xD800DC00D87FDFFF, 1)); +select hex(right(_utf16 0xD800DC00D87FDFFF, 1)); + +# +# Testing cs->cset->well_formed_length() +# +create table t1 (a varchar(10) character set utf16); +# Bad sequences +--error ER_INVALID_CHARACTER_STRING +insert into t1 values (_utf16 0xD800); +--error ER_INVALID_CHARACTER_STRING +insert into t1 values (_utf16 0xDC00); +--error ER_INVALID_CHARACTER_STRING +insert into t1 values (_utf16 0xD800D800); +--error ER_INVALID_CHARACTER_STRING +insert into t1 values (_utf16 0xD800E800); +--error ER_INVALID_CHARACTER_STRING +insert into t1 values (_utf16 0xD8000800); +# Good sequences +insert into t1 values (_utf16 0xD800DC00); +insert into t1 values (_utf16 0xD800DCFF); +insert into t1 values (_utf16 0xDBFFDC00); +insert into t1 values (_utf16 0xDBFFDCFF); +select hex(a) from t1; +drop table t1; + +# +# Bug#32393 Character sets: illegal characters in utf16 columns +# +# Tests that cs->cset->wc_mb() doesn't accept surrogate parts +# +# via alter +# +create table t1 (s1 varchar(50) character set ucs2); +insert into t1 values (0xdf84); +alter table t1 modify column s1 varchar(50) character set utf16; +select hex(s1) from t1; +drop table t1; +# +# via update +# +create table t1 (s1 varchar(5) character set ucs2, s2 varchar(5) character set utf16); +insert into t1 (s1) values (0xdf84); +update t1 set s2 = s1; +select hex(s2) from t1; +drop table t1; + + + +# +# Testing cs->cset->lengthsp() +# +create table t1 (a char(10)) character set utf16; +insert into t1 values ('a '); +select hex(a) from t1; +drop table t1; + +# +# Testing cs->cset->caseup() and cs->cset->casedn() +# +select upper('abcd'), lower('ABCD'); + +# +# TODO: str_to_datetime() is broken and doesn't work with ucs2 and utf16 +# Testing cs->cset->snprintf() +# +#create table t1 (a date); +#insert into t1 values ('2007-09-16'); +#select * from t1; +#drop table t1; + +# +# Testing cs->cset->l10tostr +# !!! Not used in the code + +# +# Testing cs->cset->ll10tostr +# +create table t1 (a varchar(10) character set utf16); +insert into t1 values (123456); +select a, hex(a) from t1; +drop table t1; + + +# Testing cs->cset->fill +# SOUNDEX fills strings with DIGIT ZERO up to four characters +select hex(soundex('a')); + +# +# Testing cs->cset->strntol +# !!! Not used in the code + +# +# Testing cs->cset->strntoul +# +create table t1 (a enum ('a','b','c')) character set utf16; +insert into t1 values ('1'); +select * from t1; +drop table t1; + +# +# Testing cs->cset->strntoll and cs->cset->strntoull +# +set names latin1; +select hex(conv(convert('123' using utf16), -10, 16)); +select hex(conv(convert('123' using utf16), 10, 16)); + +# +# Testing cs->cset->strntod +# +set names latin1; +set character_set_connection=utf16; +select 1.1 + '1.2'; +select 1.1 + '1.2xxx'; + +# Testing strntoll10_utf16 +# Testing cs->cset->strtoll10 +select left('aaa','1'); + +# +# Testing cs->cset->strntoull10rnd +# +create table t1 (a int); +insert into t1 values ('-1234.1e2'); +insert into t1 values ('-1234.1e2xxxx'); +insert into t1 values ('-1234.1e2 '); +select * from t1; +drop table t1; + +# +# Testing cs->cset->scan +# +create table t1 (a int); +insert into t1 values ('1 '); +insert into t1 values ('1 x'); +select * from t1; +drop table t1; + +# +# Testing auto-conversion to TEXT +# +create table t1 (a varchar(17000) character set utf16); +show create table t1; +drop table t1; + +# +# Testing that maximim possible key length is 1000 bytes +# +create table t1 (a varchar(250) character set utf16 primary key); +show create table t1; +drop table t1; +--error ER_TOO_LONG_KEY +create table t1 (a varchar(334) character set utf16 primary key); + +# +# Conversion to utf8 +# +create table t1 (a char(1) character set utf16); +insert into t1 values (0xD800DC00),(0xD800DCFF),(0xDB7FDC00),(0xDB7FDCFF); +insert into t1 values (0x00C0), (0x00FF),(0xE000), (0xFFFF); +select hex(a), hex(@a:=convert(a using utf8mb4)), hex(convert(@a using utf16)) from t1; +drop table t1; + +# +# Test basic regex functionality +# +set collation_connection=utf16_general_ci; +--source include/ctype_regex.inc +set names latin1; + +# +# Test how character set works with date/time +# +SET collation_connection=utf16_general_ci; +--source include/ctype_datetime.inc +SET NAMES latin1; + +# +# Bug#33073 Character sets: ordering fails with utf32 +# +SET collation_connection=utf16_general_ci; +CREATE TABLE t1 AS SELECT repeat('a',2) as s1 LIMIT 0; +SHOW CREATE TABLE t1; +INSERT INTO t1 VALUES ('ab'),('AE'),('ab'),('AE'); +SELECT * FROM t1 ORDER BY s1; +SET max_sort_length=4; +SELECT * FROM t1 ORDER BY s1; +DROP TABLE t1; +SET max_sort_length=DEFAULT; +SET NAMES latin1; + + +# +## TODO: add tests for all engines +# + +--echo # +--echo # End of 5.5 tests +--echo # diff --git a/mysql-test/t/ctype_utf16_uca.test b/mysql-test/t/ctype_utf16_uca.test new file mode 100644 index 00000000000..5314777c6f4 --- /dev/null +++ b/mysql-test/t/ctype_utf16_uca.test @@ -0,0 +1,290 @@ +-- source include/have_utf16.inc + +--disable_warnings +DROP TABLE IF EXISTS t1; +--enable_warnings + +--echo # +--echo # Start of 5.5 tests +--echo # + +set names utf8; +set collation_connection=utf16_unicode_ci; +select hex('a'), hex('a '); +-- source include/endspace.inc + +# +# Bug #6787 LIKE not working properly with _ and utf8 data +# +select 'c' like '\_' as want0; + +# +# Bug #5679 utf8_unicode_ci LIKE--trailing % doesn't equal zero characters +# +CREATE TABLE t ( + c char(20) NOT NULL +) ENGINE=MyISAM DEFAULT CHARACTER SET utf16 COLLATE utf16_unicode_ci; +INSERT INTO t VALUES ('a'),('ab'),('aba'); +ALTER TABLE t ADD INDEX (c); +SELECT c FROM t WHERE c LIKE 'a%'; +DROP TABLE t; + + +create table t1 (c1 char(10) character set utf16 collate utf16_bin); + +# +# Basic Latin +# +insert into t1 values ('A'),('a'); +insert into t1 values ('B'),('b'); +insert into t1 values ('C'),('c'); +insert into t1 values ('D'),('d'); +insert into t1 values ('E'),('e'); +insert into t1 values ('F'),('f'); +insert into t1 values ('G'),('g'); +insert into t1 values ('H'),('h'); +insert into t1 values ('I'),('i'); +insert into t1 values ('J'),('j'); +insert into t1 values ('K'),('k'); +insert into t1 values ('L'),('l'); +insert into t1 values ('M'),('m'); +insert into t1 values ('N'),('n'); +insert into t1 values ('O'),('o'); +insert into t1 values ('P'),('p'); +insert into t1 values ('Q'),('q'); +insert into t1 values ('R'),('r'); +insert into t1 values ('S'),('s'); +insert into t1 values ('T'),('t'); +insert into t1 values ('U'),('u'); +insert into t1 values ('V'),('v'); +insert into t1 values ('W'),('w'); +insert into t1 values ('X'),('x'); +insert into t1 values ('Y'),('y'); +insert into t1 values ('Z'),('z'); + +# +# Latin1 suppliment +# +insert into t1 values (0x00e0),(0x00c0); +insert into t1 values (0x00e1),(0x00c1); +insert into t1 values (0x00e2),(0x00c2); +insert into t1 values (0x00e3),(0x00c3); +insert into t1 values (0x00e4),(0x00c4); +insert into t1 values (0x00e5),(0x00c5); +insert into t1 values (0x00e6),(0x00c6); +insert into t1 values (0x00e7),(0x00c7); +insert into t1 values (0x00e8),(0x00c8); +insert into t1 values (0x00e9),(0x00c9); +insert into t1 values (0x00ea),(0x00ca); +insert into t1 values (0x00eb),(0x00cb); +insert into t1 values (0x00ec),(0x00cc); +insert into t1 values (0x00ed),(0x00cd); +insert into t1 values (0x00ee),(0x00ce); +insert into t1 values (0x00ef),(0x00cf); + +insert into t1 values (0x00f0),(0x00d0); +insert into t1 values (0x00f1),(0x00d1); +insert into t1 values (0x00f2),(0x00d2); +insert into t1 values (0x00f3),(0x00d3); +insert into t1 values (0x00f4),(0x00d4); +insert into t1 values (0x00f5),(0x00d5); +insert into t1 values (0x00f6),(0x00d6); +insert into t1 values (0x00f7),(0x00d7); +insert into t1 values (0x00f8),(0x00d8); +insert into t1 values (0x00f9),(0x00d9); +insert into t1 values (0x00fa),(0x00da); +insert into t1 values (0x00fb),(0x00db); +insert into t1 values (0x00fc),(0x00dc); +insert into t1 values (0x00fd),(0x00dd); +insert into t1 values (0x00fe),(0x00de); +insert into t1 values (0x00ff),(0x00df); + +# +# Latin extended-A, 0100-017F +# +insert into t1 values (0x0100),(0x0101),(0x0102),(0x0103); +insert into t1 values (0x0104),(0x0105),(0x0106),(0x0107); +insert into t1 values (0x0108),(0x0109),(0x010a),(0x010b); +insert into t1 values (0x010c),(0x010d),(0x010e),(0x010f); +insert into t1 values (0x0110),(0x0111),(0x0112),(0x0113); +insert into t1 values (0x0114),(0x0115),(0x0116),(0x0117); +insert into t1 values (0x0118),(0x0119),(0x011a),(0x011b); +insert into t1 values (0x011c),(0x011d),(0x011e),(0x011f); +insert into t1 values (0x0120),(0x0121),(0x0122),(0x0123); +insert into t1 values (0x0124),(0x0125),(0x0126),(0x0127); +insert into t1 values (0x0128),(0x0129),(0x012a),(0x012b); +insert into t1 values (0x012c),(0x012d),(0x012e),(0x012f); +insert into t1 values (0x0130),(0x0131),(0x0132),(0x0133); +insert into t1 values (0x0134),(0x0135),(0x0136),(0x0137); +insert into t1 values (0x0138),(0x0139),(0x013a),(0x013b); +insert into t1 values (0x013c),(0x013d),(0x013e),(0x013f); +insert into t1 values (0x0140),(0x0141),(0x0142),(0x0143); +insert into t1 values (0x0144),(0x0145),(0x0146),(0x0147); +insert into t1 values (0x0148),(0x0149),(0x014a),(0x014b); +insert into t1 values (0x014c),(0x014d),(0x014e),(0x014f); +insert into t1 values (0x0150),(0x0151),(0x0152),(0x0153); +insert into t1 values (0x0154),(0x0155),(0x0156),(0x0157); +insert into t1 values (0x0158),(0x0159),(0x015a),(0x015b); +insert into t1 values (0x015c),(0x015d),(0x015e),(0x015f); +insert into t1 values (0x0160),(0x0161),(0x0162),(0x0163); +insert into t1 values (0x0164),(0x0165),(0x0166),(0x0167); +insert into t1 values (0x0168),(0x0169),(0x016a),(0x016b); +insert into t1 values (0x016c),(0x016d),(0x016e),(0x016f); +insert into t1 values (0x0170),(0x0171),(0x0172),(0x0173); +insert into t1 values (0x0174),(0x0175),(0x0176),(0x0177); +insert into t1 values (0x0178),(0x0179),(0x017a),(0x017b); +insert into t1 values (0x017c),(0x017d),(0x017e),(0x017f); + +# +# Latin extended-B, 0180-024F +# +insert into t1 values (0x0180),(0x0181),(0x0182),(0x0183); +insert into t1 values (0x0184),(0x0185),(0x0186),(0x0187); +insert into t1 values (0x0188),(0x0189),(0x018a),(0x018b); +insert into t1 values (0x018c),(0x018d),(0x018e),(0x018f); +insert into t1 values (0x0190),(0x0191),(0x0192),(0x0193); +insert into t1 values (0x0194),(0x0195),(0x0196),(0x0197); +insert into t1 values (0x0198),(0x0199),(0x019a),(0x019b); +insert into t1 values (0x019c),(0x019d),(0x019e),(0x019f); +insert into t1 values (0x01a0),(0x01a1),(0x01a2),(0x01a3); +insert into t1 values (0x01a4),(0x01a5),(0x01a6),(0x01a7); +insert into t1 values (0x01a8),(0x01a9),(0x01aa),(0x01ab); +insert into t1 values (0x01ac),(0x01ad),(0x01ae),(0x01af); +insert into t1 values (0x01b0),(0x01b1),(0x01b2),(0x01b3); +insert into t1 values (0x01b4),(0x01b5),(0x01b6),(0x01b7); +insert into t1 values (0x01b8),(0x01b9),(0x01ba),(0x01bb); +insert into t1 values (0x01bc),(0x01bd),(0x01be),(0x01bf); +insert into t1 values (0x01c0),(0x01c1),(0x01c2),(0x01c3); +insert into t1 values (0x01c4),(0x01c5),(0x01c6),(0x01c7); +insert into t1 values (0x01c8),(0x01c9),(0x01ca),(0x01cb); +insert into t1 values (0x01cc),(0x01cd),(0x01ce),(0x01cf); +insert into t1 values (0x01d0),(0x01d1),(0x01d2),(0x01d3); +insert into t1 values (0x01d4),(0x01d5),(0x01d6),(0x01d7); +insert into t1 values (0x01d8),(0x01d9),(0x01da),(0x01db); +insert into t1 values (0x01dc),(0x01dd),(0x01de),(0x01df); +insert into t1 values (0x01e0),(0x01e1),(0x01e2),(0x01e3); +insert into t1 values (0x01e4),(0x01e5),(0x01e6),(0x01e7); +insert into t1 values (0x01e8),(0x01e9),(0x01ea),(0x01eb); +insert into t1 values (0x01ec),(0x01ed),(0x01ee),(0x01ef); +insert into t1 values (0x01f0),(0x01f1),(0x01f2),(0x01f3); +insert into t1 values (0x01f4),(0x01f5),(0x01f6),(0x01f7); +insert into t1 values (0x01f8),(0x01f9),(0x01fa),(0x01fb); +insert into t1 values (0x01fc),(0x01fd),(0x01fe),(0x01ff); + + +insert into t1 values ('AA'),('Aa'),('aa'),('aA'); +insert into t1 values ('CH'),('Ch'),('ch'),('cH'); +insert into t1 values ('DZ'),('Dz'),('dz'),('dZ'); +insert into t1 values ('IJ'),('Ij'),('ij'),('iJ'); +insert into t1 values ('LJ'),('Lj'),('lj'),('lJ'); +insert into t1 values ('LL'),('Ll'),('ll'),('lL'); +insert into t1 values ('NJ'),('Nj'),('nj'),('nJ'); +insert into t1 values ('OE'),('Oe'),('oe'),('oE'); +insert into t1 values ('SS'),('Ss'),('ss'),('sS'); +insert into t1 values ('RR'),('Rr'),('rr'),('rR'); + +select group_concat(c1 order by binary c1 separator '') from t1 group by c1 collate utf16_unicode_ci; +select group_concat(c1 order by binary c1 separator '') from t1 group by c1 collate utf16_icelandic_ci; +select group_concat(c1 order by binary c1 separator '') from t1 group by c1 collate utf16_latvian_ci; +select group_concat(c1 order by binary c1 separator '') from t1 group by c1 collate utf16_romanian_ci; +select group_concat(c1 order by binary c1 separator '') from t1 group by c1 collate utf16_slovenian_ci; +select group_concat(c1 order by binary c1 separator '') from t1 group by c1 collate utf16_polish_ci; +select group_concat(c1 order by binary c1 separator '') from t1 group by c1 collate utf16_estonian_ci; +select group_concat(c1 order by binary c1 separator '') from t1 group by c1 collate utf16_spanish_ci; +select group_concat(c1 order by binary c1 separator '') from t1 group by c1 collate utf16_swedish_ci; +select group_concat(c1 order by binary c1 separator '') from t1 group by c1 collate utf16_turkish_ci; +select group_concat(c1 order by binary c1 separator '') from t1 group by c1 collate utf16_czech_ci; +select group_concat(c1 order by binary c1 separator '') from t1 group by c1 collate utf16_danish_ci; +select group_concat(c1 order by binary c1 separator '') from t1 group by c1 collate utf16_lithuanian_ci; +select group_concat(c1 order by binary c1 separator '') from t1 group by c1 collate utf16_slovak_ci; +select group_concat(c1 order by binary c1 separator '') from t1 group by c1 collate utf16_spanish2_ci; +select group_concat(c1 order by binary c1 separator '') from t1 group by c1 collate utf16_roman_ci; +select group_concat(c1 order by binary c1 separator '') from t1 group by c1 collate utf16_esperanto_ci; +select group_concat(c1 order by binary c1 separator '') from t1 group by c1 collate utf16_hungarian_ci; + +drop table t1; + +# +# Bug#5324 +# +SET NAMES utf8; + +CREATE TABLE t1 (c varchar(200) CHARACTER SET utf16 COLLATE utf16_general_ci NOT NULL, INDEX (c)); +INSERT INTO t1 VALUES (0x039C03C903B403B11F770308); +#Check one row +SELECT * FROM t1 WHERE c LIKE _utf16 0x039C0025 COLLATE utf16_general_ci; +INSERT INTO t1 VALUES (0x039C03C903B4); +#Check two rows +SELECT * FROM t1 WHERE c LIKE _utf16 0x039C0025 +COLLATE utf16_general_ci ORDER BY c; +DROP TABLE t1; + +CREATE TABLE t1 (c varchar(200) CHARACTER SET utf16 COLLATE utf16_unicode_ci NOT NULL, INDEX (c)); +INSERT INTO t1 VALUES (0x039C03C903B403B11F770308); +#Check one row +SELECT * FROM t1 WHERE c LIKE _utf16 0x039C0025 COLLATE utf16_unicode_ci; +INSERT INTO t1 VALUES (0x039C03C903B4); +#Check two rows +SELECT * FROM t1 WHERE c LIKE _utf16 0x039C0025 +COLLATE utf16_unicode_ci ORDER BY c; +DROP TABLE t1; + +CREATE TABLE t1 (c varchar(200) CHARACTER SET utf16 COLLATE utf16_unicode_ci NOT NULL, INDEX (c)); +INSERT INTO t1 VALUES (0x039C03C903B403B11F770308); +#Check one row row +SELECT * FROM t1 WHERE c LIKE _utf16 0x039C0025 COLLATE utf16_unicode_ci; +INSERT INTO t1 VALUES (0x039C03C903B4); +#Check two rows +SELECT * FROM t1 WHERE c LIKE _utf16 0x039C0025 +COLLATE utf16_unicode_ci ORDER BY c; +DROP TABLE t1; + + +SET NAMES utf8; +SET @test_character_set='utf16'; +SET @test_collation='utf16_swedish_ci'; +-- source include/ctype_common.inc + + +SET collation_connection='utf16_unicode_ci'; +-- source include/ctype_filesort.inc +-- source include/ctype_like_escape.inc + +--echo End of 4.1 tests + +# +# Check UPPER/LOWER changing length +# +# Result shorter than argument +CREATE TABLE t1 (id int, a varchar(30) character set utf16); +INSERT INTO t1 VALUES (1, 0x01310069), (2, 0x01310131); +INSERT INTO t1 VALUES (3, 0x00690069), (4, 0x01300049); +INSERT INTO t1 VALUES (5, 0x01300130), (6, 0x00490049); +SELECT a, length(a) la, @l:=lower(a) l, length(@l) ll, @u:=upper(a) u, length(@u) lu +FROM t1 ORDER BY id; +ALTER TABLE t1 MODIFY a VARCHAR(30) character set utf16 collate utf16_turkish_ci; +SELECT a, length(a) la, @l:=lower(a) l, length(@l) ll, @u:=upper(a) u, length(@u) lu +FROM t1 ORDER BY id; +DROP TABLE t1; + +# +# Bug #27079 Crash while grouping empty ucs2 strings +# +CREATE TABLE t1 ( + c1 text character set utf16 collate utf16_polish_ci NOT NULL +) ENGINE=MyISAM; +insert into t1 values (''),('a'); +SELECT COUNT(*), c1 FROM t1 GROUP BY c1; +DROP TABLE IF EXISTS t1; + + +# +# Test basic regex functionality +# +set collation_connection=utf16_unicode_ci; +--source include/ctype_regex.inc + + +--echo # +--echo # End of 5.5 tests +--echo # diff --git a/mysql-test/t/ctype_utf32.test b/mysql-test/t/ctype_utf32.test new file mode 100644 index 00000000000..f1e17532b88 --- /dev/null +++ b/mysql-test/t/ctype_utf32.test @@ -0,0 +1,784 @@ +-- source include/have_utf32.inc + +--disable_warnings +DROP TABLE IF EXISTS t1; +--enable_warnings + +--echo # +--echo # Start of 5.5 tests +--echo # + +SET NAMES latin1; +SET character_set_connection=utf32; +select hex('a'), hex('a '); +-- source include/endspace.inc + +# +# Check that incomplete utf32 characters in HEX notation +# are left-padded with zeros +# +select hex(_utf32 0x44); +select hex(_utf32 0x3344); +select hex(_utf32 0x103344); + +select hex(_utf32 X'44'); +select hex(_utf32 X'3344'); +select hex(_utf32 X'103344'); + + +# +# Check that 0x20 is only trimmed when it is +# a part of real SPACE character, not just a part +# of a multibyte sequence. +# Note, CYRILLIC LETTER ER is used as an example, which +# is stored as 0x0420 in UCS2, thus contains 0x20 in the +# low byte. The second character is THREE-PER-M, U+2004, +# which contains 0x20 in the high byte. +# + +CREATE TABLE t1 (word VARCHAR(64), word2 CHAR(64)) CHARACTER SET utf32; +INSERT INTO t1 VALUES (_koi8r 0xF2, _koi8r 0xF2), (X'2004',X'2004'); +SELECT hex(word) FROM t1 ORDER BY word; +SELECT hex(word2) FROM t1 ORDER BY word2; +DELETE FROM t1; + +# +# Check that real spaces are correctly trimmed. +# + +INSERT INTO t1 VALUES + (X'000004200000002000000020',X'000004200000002000000020'), + (X'000020040000002000000020',X'000020040000002000000020'); +SELECT hex(word) FROM t1 ORDER BY word; +SELECT hex(word2) FROM t1 ORDER BY word2; +DROP TABLE t1; + +# +# Check LPAD/RPAD +# +SELECT hex(LPAD(_utf32 X'0420',10,_utf32 X'0421')); +SELECT hex(LPAD(_utf32 X'0420',10,_utf32 X'0000042100000422')); +SELECT hex(LPAD(_utf32 X'0420',10,_utf32 X'000004210000042200000423')); +SELECT hex(LPAD(_utf32 X'000004200000042100000422000004230000042400000425000004260000042700000428000004290000042A0000042B',10,_utf32 X'000004210000042200000423')); + +SELECT hex(RPAD(_utf32 X'0420',10,_utf32 X'0421')); +SELECT hex(RPAD(_utf32 X'0420',10,_utf32 X'0000042100000422')); +SELECT hex(RPAD(_utf32 X'0420',10,_utf32 X'000004210000042200000423')); +SELECT hex(RPAD(_utf32 X'000004200000042100000422000004230000042400000425000004260000042700000428000004290000042A0000042B',10,_utf32 X'000004210000042200000423')); + +CREATE TABLE t1 SELECT +LPAD(_utf32 X'0420',10,_utf32 X'0421') l, +RPAD(_utf32 X'0420',10,_utf32 X'0421') r; +SHOW CREATE TABLE t1; +select hex(l), hex(r) from t1; +DROP TABLE t1; + +create table t1 (f1 char(30)); +insert into t1 values ("103000"), ("22720000"), ("3401200"), ("78000"); +select lpad(f1, 12, "-o-/") from t1; +drop table t1; + +###################################################### +# +# Test of like +# + +SET NAMES latin1; +SET character_set_connection=utf32; +--source include/ctype_like.inc + +SET NAMES utf8; +SET character_set_connection=utf32; +CREATE TABLE t1 (a VARCHAR(10) CHARACTER SET utf32); +INSERT INTO t1 VALUES ('фыва'),('Фыва'),('фЫва'),('фыВа'),('фывÐ'),('ФЫВÐ'); +INSERT INTO t1 VALUES ('фывапролдж'),('Фывапролдж'),('фЫвапролдж'),('фыВапролдж'); +INSERT INTO t1 VALUES ('фывÐпролдж'),('фываПролдж'),('фывапРолдж'),('фывапрОлдж'); +INSERT INTO t1 VALUES ('фывапроЛдж'),('фывапролДж'),('фывапролдЖ'),('ФЫВÐПРОЛДЖ'); +SELECT * FROM t1 WHERE a LIKE '%фЫва%' ORDER BY BINARY a; +SELECT * FROM t1 WHERE a LIKE '%фЫв%' ORDER BY BINARY a; +SELECT * FROM t1 WHERE a LIKE 'фЫва%' ORDER BY BINARY a; +SELECT * FROM t1 WHERE a LIKE 'фЫва%' COLLATE utf32_bin ORDER BY BINARY a; +DROP TABLE t1; + +CREATE TABLE t1 (word varchar(64) NOT NULL, PRIMARY KEY (word)) +ENGINE=MyISAM CHARACTER SET utf32; +INSERT INTO t1 (word) VALUES ("cat"); +SELECT * FROM t1 WHERE word LIKE "c%"; +SELECT * FROM t1 WHERE word LIKE "ca_"; +SELECT * FROM t1 WHERE word LIKE "cat"; +SELECT * FROM t1 WHERE word LIKE _utf32 x'0000006300000025'; # "c%" +SELECT * FROM t1 WHERE word LIKE _utf32 x'00000063000000610000005F'; # "ca_" +DROP TABLE t1; + + +# +# Check that INSERT() works fine. +# This invokes charpos() function. +select insert(_utf32 0x000000610000006200000063,10,2,_utf32 0x000000640000006500000066); +select insert(_utf32 0x000000610000006200000063,1,2,_utf32 0x000000640000006500000066); + +####################################################### + +# +# Bug 1264 +# +# Description: +# +# When using a ucs2 table in MySQL, +# either with ucs2_general_ci or ucs2_bin collation, +# words are returned in an incorrect order when using ORDER BY +# on an _indexed_ CHAR or VARCHAR column. They are sorted with +# the longest word *first* instead of last. I.E. The word "aardvark" +# is in the results before the word "a". +# +# If there is no index for the column, the problem does not occur. +# +# Interestingly, if there is no second column, the words are returned +# in the correct order. +# +# According to EXPLAIN, it looks like when the output includes columns that +# are not part of the index sorted on, it does a filesort, which fails. +# Using a straight index yields correct results. + +SET NAMES latin1; + +# +# Two fields, index +# + +CREATE TABLE t1 ( + word VARCHAR(64), + bar INT(11) default 0, + PRIMARY KEY (word)) + ENGINE=MyISAM + CHARSET utf32 + COLLATE utf32_general_ci ; + +INSERT INTO t1 (word) VALUES ("aar"); +INSERT INTO t1 (word) VALUES ("a"); +INSERT INTO t1 (word) VALUES ("aardvar"); +INSERT INTO t1 (word) VALUES ("aardvark"); +INSERT INTO t1 (word) VALUES ("aardvara"); +INSERT INTO t1 (word) VALUES ("aardvarz"); +EXPLAIN SELECT * FROM t1 ORDER BY word; +SELECT * FROM t1 ORDER BY word; +EXPLAIN SELECT word FROM t1 ORDER BY word; +SELECT word FROM t1 ORDER by word; +DROP TABLE t1; + + +# +# One field, index +# + +CREATE TABLE t1 ( + word VARCHAR(64) , + PRIMARY KEY (word)) + ENGINE=MyISAM + CHARSET utf32 + COLLATE utf32_general_ci; + +INSERT INTO t1 (word) VALUES ("aar"); +INSERT INTO t1 (word) VALUES ("a"); +INSERT INTO t1 (word) VALUES ("aardvar"); +INSERT INTO t1 (word) VALUES ("aardvark"); +INSERT INTO t1 (word) VALUES ("aardvara"); +INSERT INTO t1 (word) VALUES ("aardvarz"); +EXPLAIN SELECT * FROM t1 ORDER BY WORD; +SELECT * FROM t1 ORDER BY word; +DROP TABLE t1; + + +# +# Two fields, no index +# + +CREATE TABLE t1 ( + word TEXT, + bar INT(11) AUTO_INCREMENT, + PRIMARY KEY (bar)) + ENGINE=MyISAM + CHARSET utf32 + COLLATE utf32_general_ci ; +INSERT INTO t1 (word) VALUES ("aar"); +INSERT INTO t1 (word) VALUES ("a" ); +INSERT INTO t1 (word) VALUES ("aardvar"); +INSERT INTO t1 (word) VALUES ("aardvark"); +INSERT INTO t1 (word) VALUES ("aardvara"); +INSERT INTO t1 (word) VALUES ("aardvarz"); +EXPLAIN SELECT * FROM t1 ORDER BY word; +SELECT * FROM t1 ORDER BY word; +EXPLAIN SELECT word FROM t1 ORDER BY word; +SELECT word FROM t1 ORDER BY word; +DROP TABLE t1; + +# +# END OF Bug 1264 test +# +######################################################## + + +# +# Check alignment for from-binary-conversion with CAST and CONVERT +# +SELECT hex(cast(0xAA as char character set utf32)); +SELECT hex(convert(0xAA using utf32)); + +# +# Check alignment for string types +# +CREATE TABLE t1 (a char(10) character set utf32); +INSERT INTO t1 VALUES (0x1),(0x11),(0x111),(0x1111),(0x11111); +SELECT HEX(a) FROM t1; +DROP TABLE t1; + +CREATE TABLE t1 (a varchar(10) character set utf32); +INSERT INTO t1 VALUES (0x1),(0x11),(0x111),(0x1111),(0x11111); +SELECT HEX(a) FROM t1; +DROP TABLE t1; + +CREATE TABLE t1 (a text character set utf32); +INSERT INTO t1 VALUES (0x1),(0x11),(0x111),(0x1111),(0x11111); +SELECT HEX(a) FROM t1; +DROP TABLE t1; + +CREATE TABLE t1 (a mediumtext character set utf32); +INSERT INTO t1 VALUES (0x1),(0x11),(0x111),(0x1111),(0x11111); +SELECT HEX(a) FROM t1; +DROP TABLE t1; + +CREATE TABLE t1 (a longtext character set utf32); +INSERT INTO t1 VALUES (0x1),(0x11),(0x111),(0x1111),(0x11111); +SELECT HEX(a) FROM t1; +DROP TABLE t1; + +## +## Bug #5024 Server crashes with queries on fields +## with certain charset/collation settings +## +# +#create table t1 (s1 char character set `ucs2` collate `ucs2_czech_ci`); +#insert into t1 values ('0'),('1'),('2'),('a'),('b'),('c'); +#select s1 from t1 where s1 > 'a' order by s1; +#drop table t1; + +# +# Bug #5081 : UCS2 fields are filled with '0x2020' +# after extending field length +# +create table t1(a char(1)) default charset utf32; +insert into t1 values ('a'),('b'),('c'); +alter table t1 modify a char(5); +select a, hex(a) from t1; +drop table t1; + +# +# Check prepare statement from an UTF32 string +# +set @ivar= 1234; +set @str1 = 'select ?'; +set @str2 = convert(@str1 using utf32); +prepare stmt1 from @str2; +execute stmt1 using @ivar; + +# +# Check that utf32 works with ENUM and SET type +# +set names utf8; +create table t1 (a enum('x','y','z') character set utf32); +show create table t1; +insert into t1 values ('x'); +insert into t1 values ('y'); +insert into t1 values ('z'); +select a, hex(a) from t1 order by a; +alter table t1 change a a enum('x','y','z','d','e','ä','ö','ü') character set utf32; +show create table t1; +insert into t1 values ('D'); +insert into t1 values ('E '); +insert into t1 values ('ä'); +insert into t1 values ('ö'); +insert into t1 values ('ü'); +select a, hex(a) from t1 order by a; +drop table t1; + +create table t1 (a set ('x','y','z','ä','ö','ü') character set utf32); +show create table t1; +insert into t1 values ('x'); +insert into t1 values ('y'); +insert into t1 values ('z'); +insert into t1 values ('x,y'); +insert into t1 values ('x,y,z,ä,ö,ü'); +select a, hex(a) from t1 order by a; +drop table t1; + +# +# Bug#7302 UCS2 data in ENUM fields get truncated when new column is added +# +create table t1(a enum('a','b','c')) default character set utf32; +insert into t1 values('a'),('b'),('c'); +alter table t1 add b char(1); +show warnings; +select * from t1 order by a; +drop table t1; + +SET NAMES latin1; +SET collation_connection='utf32_general_ci'; +-- source include/ctype_filesort.inc +-- source include/ctype_like_escape.inc +SET NAMES latin1; +SET collation_connection='utf32_bin'; +-- source include/ctype_filesort.inc +-- source include/ctype_like_escape.inc + +# +# Bug#10344 Some string functions fail for UCS2 +# +select hex(substr(_utf32 0x000000e4000000e500000068,1)); +select hex(substr(_utf32 0x000000e4000000e500000068,2)); +select hex(substr(_utf32 0x000000e4000000e500000068,3)); +select hex(substr(_utf32 0x000000e4000000e500000068,-1)); +select hex(substr(_utf32 0x000000e4000000e500000068,-2)); +select hex(substr(_utf32 0x000000e4000000e500000068,-3)); + +#SET NAMES latin1; +# +# Bug#8235 +# +# This bug also helped to find another problem that +# INSERT of a UCS2 string containing a negative number +# into a unsigned int column didn't produce warnings. +# This test covers both problems. +# +#SET collation_connection='ucs2_swedish_ci'; +#CREATE TABLE t1 (Field1 int(10) default '0'); +## no warnings, negative numbers are allowed +#INSERT INTO t1 VALUES ('-1'); +#SELECT * FROM t1; +#DROP TABLE t1; +#CREATE TABLE t1 (Field1 int(10) unsigned default '0'); +## this should generate a "Data truncated" warning +#INSERT INTO t1 VALUES ('-1'); +#DROP TABLE t1; +#SET NAMES latin1; + +# +## +## Bug#18691 Converting number to UNICODE string returns invalid result +## +#SELECT CONVERT(103, CHAR(50) UNICODE); +#SELECT CONVERT(103.0, CHAR(50) UNICODE); +#SELECT CONVERT(-103, CHAR(50) UNICODE); +#SELECT CONVERT(-103.0, CHAR(50) UNICODE); + +# +# Bug#9557 MyISAM utf8 table crash +# +CREATE TABLE t1 ( + a varchar(250) NOT NULL default '', + KEY a (a) +) ENGINE=MyISAM DEFAULT CHARSET=utf32 COLLATE utf32_general_ci; +insert into t1 values (0x803d); +insert into t1 values (0x005b); +select hex(a) from t1; +drop table t1; + +## +## Bug #14583 Bug on query using a LIKE on indexed field with ucs2_bin collation +## +#--disable_warnings +#create table t1(f1 varchar(5) CHARACTER SET utf32 COLLATE utf32_bin NOT NULL) engine=InnoDB; +#--enable_warnings +#insert into t1 values('a'); +#create index t1f1 on t1(f1); +#select f1 from t1 where f1 like 'a%'; +#drop table t1; + +# +# Bug#9442 Set parameter make query fail if column character set is UCS2 +# +create table t1 (utext varchar(20) character set utf32); +insert into t1 values ("lily"); +insert into t1 values ("river"); +prepare stmt from 'select utext from t1 where utext like ?'; +set @param1='%%'; +execute stmt using @param1; +execute stmt using @param1; +select utext from t1 where utext like '%%'; +drop table t1; +deallocate prepare stmt; + +# +# Bug#22052 Trailing spaces are not removed from UNICODE fields in an index +# +create table t1 ( + a char(10) character set utf32 not null, + index a (a) +) engine=myisam; +insert into t1 values (repeat(0x0000201f, 10)); +insert into t1 values (repeat(0x00002020, 10)); +insert into t1 values (repeat(0x00002021, 10)); +# make sure "index read" is used +explain select hex(a) from t1 order by a; +select hex(a) from t1 order by a; +alter table t1 drop index a; +select hex(a) from t1 order by a; +drop table t1; + +# +# Bug #20076: server crashes for a query with GROUP BY if MIN/MAX aggregation +# over a 'ucs2' field uses a temporary table +# +#CREATE TABLE t1 (id int, s char(5) CHARACTER SET ucs2 COLLATE ucs2_unicode_ci); +#INSERT INTO t1 VALUES (1, 'ZZZZZ'), (1, 'ZZZ'), (2, 'ZZZ'), (2, 'ZZZZZ'); +#SELECT id, MIN(s) FROM t1 GROUP BY id; +#DROP TABLE t1; + +## +## Bug #20536: md5() with GROUP BY and UCS2 return different results on myisam/innodb +## +# +#--disable_warnings +#drop table if exists bug20536; +#--enable_warnings +# +#set names latin1; +#create table bug20536 (id bigint not null auto_increment primary key, name +#varchar(255) character set ucs2 not null); +#insert into `bug20536` (`id`,`name`) values (1, _latin1 x'7465737431'), (2, "'test\\_2'"); +#select md5(name) from bug20536; +#select sha1(name) from bug20536; +#select make_set(3, name, upper(name)) from bug20536; +#select export_set(5, name, upper(name)) from bug20536; +#select export_set(5, name, upper(name), ",", 5) from bug20536; + +# +# Bug #20108: corrupted default enum value for a ucs2 field +# + +CREATE TABLE t1 ( + status enum('active','passive') character set utf32 collate utf32_general_ci + NOT NULL default 'passive' +); +SHOW CREATE TABLE t1; +ALTER TABLE t1 ADD a int NOT NULL AFTER status; +SHOW CREATE TABLE t1; +DROP TABLE t1; + +#CREATE TABLE t2 ( +# status enum('active','passive') collate ucs2_turkish_ci +# NOT NULL default 'passive' +#); +#SHOW CREATE TABLE t2; +#ALTER TABLE t2 ADD a int NOT NULL AFTER status; +#DROP TABLE t2; + + +## Some broken functions: add these tests just to document current behavior. +# +## PASSWORD and OLD_PASSWORD don't work with UCS2 strings, but to fix it would +## not be backwards compatible in all cases, so it's best to leave it alone +#select password(name) from bug20536; +#select old_password(name) from bug20536; +# +## Disable test case as encrypt relies on 'crypt' function. +## "decrypt" is noramlly tested in func_crypt.test which have a +## "have_crypt.inc" test +#--disable_parsing +## ENCRYPT relies on OS function crypt() which takes a NUL-terminated string; it +## doesn't return good results for strings with embedded 0 bytes. It won't be +## fixed unless we choose to re-implement the crypt() function ourselves to take +## an extra size_t string_length argument. +#select encrypt(name, 'SALT') from bug20536; +#--enable_parsing +# +## QUOTE doesn't work with UCS2 data. It would require a total rewrite +## of Item_func_quote::val_str(), which isn't worthwhile until UCS2 is +## supported fully as a client character set. +#select quote(name) from bug20536; +# +#drop table bug20536; +# +--echo End of 4.1 tests + + +# +# Conversion from an UTF32 string to a decimal column +# +CREATE TABLE t1 (a varchar(64) character set utf32, b decimal(10,3)); +INSERT INTO t1 VALUES ("1.1", 0), ("2.1", 0); +update t1 set b=a; +SELECT *, hex(a) FROM t1; +DROP TABLE t1; + +# +# Bug#9442 Set parameter make query fail if column character set is UCS2 +# +create table t1 (utext varchar(20) character set utf32); +insert into t1 values ("lily"); +insert into t1 values ("river"); +prepare stmt from 'select utext from t1 where utext like ?'; +set @param1='%%'; +execute stmt using @param1; +execute stmt using @param1; +select utext from t1 where utext like '%%'; +drop table t1; +deallocate prepare stmt; + +# +# Bug#22638 SOUNDEX broken for international characters +# +set names latin1; +set character_set_connection=utf32; +select soundex(''),soundex('he'),soundex('hello all folks'),soundex('#3556 in bugdb'); +select hex(soundex('')),hex(soundex('he')),hex(soundex('hello all folks')),hex(soundex('#3556 in bugdb')); +select 'mood' sounds like 'mud'; +# Cyrillic A, BE, VE +select hex(soundex(_utf32 0x000004100000041100000412)); +# Make sure that "U+00BF INVERTED QUESTION MARK" is not considered as letter +select hex(soundex(_utf32 0x000000BF000000C0)); +set names latin1; + +# +# Bug #14290: character_maximum_length for text fields +# +create table t1(a blob, b text charset utf32); +select data_type, character_octet_length, character_maximum_length + from information_schema.columns where table_name='t1'; +drop table t1; + + +set names latin1; +set collation_connection=utf32_general_ci; +# +# Testing cs->coll->instr() +# +select position('bb' in 'abba'); + +# +# Testing cs->coll->hash_sort() +# +create table t1 (a varchar(10) character set utf32) engine=heap; +insert into t1 values ('a'),('A'),('b'),('B'); +select * from t1 where a='a' order by binary a; +select hex(min(binary a)),count(*) from t1 group by a; +drop table t1; + +# +# Testing cs->cset->numchars() +# +select char_length('abcd'), octet_length('abcd'); + +# +# Testing cs->cset->charpos() +# +select left('abcd',2); + +# +# Testing cs->cset->well_formed_length() +# +create table t1 (a varchar(10) character set utf32); +insert into t1 values (_utf32 0x0010FFFF); +--error ER_INVALID_CHARACTER_STRING +insert into t1 values (_utf32 0x00110000); +--error ER_INVALID_CHARACTER_STRING +insert into t1 values (_utf32 0x00110101); +--error ER_INVALID_CHARACTER_STRING +insert into t1 values (_utf32 0x01000101); +--error ER_INVALID_CHARACTER_STRING +insert into t1 values (_utf32 0x11000101); +select hex(a) from t1; +drop table t1; + +# +# Bug#32914 Character sets: illegal characters in utf8 and utf32 columns +# +create table t1 (utf32 varchar(2) character set utf32); +--echo Wrong character with pad +insert into t1 values (0x110000); +--echo Wrong chsaracter without pad +insert into t1 values (0x00110000); +--echo Wrong character with pad followed by another wrong character +insert into t1 values (0x11000000110000); +--echo Good character with pad followed by bad character +insert into t1 values (0x10000000110000); +--echo Good character without pad followed by bad character +insert into t1 values (0x0010000000110000); +--echo Wrong character with the second byte higher than 0x10 +insert into t1 values (0x00800037); +--echo Wrong character with pad with the second byte higher than 0x10 +insert into t1 values (0x00800037); +drop table t1; + +# +# Bug#32394 Character sets: crash if comparison with 0xfffd +# +select _utf32'a' collate utf32_general_ci = 0xfffd; +select hex(concat(_utf32 0x0410 collate utf32_general_ci, 0x61)); +create table t1 (s1 varchar(5) character set utf32); +insert into t1 values (0xfffd); +select case when s1 = 0xfffd then 1 else 0 end from t1; +select hex(s1) from t1 where s1 = 0xfffd; +drop table t1; + +# +# Testing cs->cset->lengthsp() +# +create table t1 (a char(10)) character set utf32; +insert into t1 values ('a '); +select hex(a) from t1; +drop table t1; + +# +# Testing cs->cset->caseup() and cs->cset->casedn() +# +select upper('abcd'), lower('ABCD'); + +# +# TODO: str_to_datetime() is broken and doesn't work with ucs2 and utf32 +# Testing cs->cset->snprintf() +# +#create table t1 (a date); +#insert into t1 values ('2007-09-16'); +#select * from t1; +#drop table t1; + +# +# Testing cs->cset->l10tostr +# !!! Not used in the code + +# +# Testing cs->cset->ll10tostr +# +create table t1 (a varchar(10) character set utf32); +insert into t1 values (123456); +select a, hex(a) from t1; +drop table t1; + +# +# Testing cs->cset->fill +# SOUNDEX fills strings with DIGIT ZERO up to four characters +select hex(soundex('a')); + +# +# Testing cs->cset->strntol +# !!! Not used in the code + +# +# Testing cs->cset->strntoul +# +create table t1 (a enum ('a','b','c')) character set utf32; +insert into t1 values ('1'); +select * from t1; +drop table t1; + +# +# Testing cs->cset->strntoll and cs->cset->strntoull +# +set names latin1; +select hex(conv(convert('123' using utf32), -10, 16)); +select hex(conv(convert('123' using utf32), 10, 16)); + +# +# Testing cs->cset->strntod +# +set names latin1; +set character_set_connection=utf32; +select 1.1 + '1.2'; +select 1.1 + '1.2xxx'; + +# Testing strntoll10_utf32 +# Testing cs->cset->strtoll10 +select left('aaa','1'); + +# +# Testing cs->cset->strntoull10rnd +# +create table t1 (a int); +insert into t1 values ('-1234.1e2'); +insert into t1 values ('-1234.1e2xxxx'); +insert into t1 values ('-1234.1e2 '); +select * from t1; +drop table t1; + +# +# Testing cs->cset->scan +# +create table t1 (a int); +insert into t1 values ('1 '); +insert into t1 values ('1 x'); +select * from t1; +drop table t1; + +# +# Testing auto-conversion to TEXT +# +create table t1 (a varchar(17000) character set utf32); +show create table t1; +drop table t1; + +# +# Testing that maximim possible key length is 1332 bytes +# +create table t1 (a varchar(250) character set utf32 primary key); +show create table t1; +drop table t1; +--error ER_TOO_LONG_KEY +create table t1 (a varchar(334) character set utf32 primary key); + +# +# Testing mi_check with long key values +# +create table t1 (a varchar(333) character set utf32, key(a)); +insert into t1 values (repeat('a',333)), (repeat('b',333)); +flush tables; +check table t1; +drop table t1; + +# +# Test how character set works with date/time +# +SET collation_connection=utf32_general_ci; +--source include/ctype_datetime.inc +SET NAMES latin1; + +# +# Test basic regex functionality +# +set collation_connection=utf32_general_ci; +--source include/ctype_regex.inc +set names latin1; + + +# TODO: add tests for all engines + +# +# Bug #36418 Character sets: crash if char(256 using utf32) +# +select hex(char(0x01 using utf32)); +select hex(char(0x0102 using utf32)); +select hex(char(0x010203 using utf32)); +select hex(char(0x01020304 using utf32)); +create table t1 (s1 varchar(1) character set utf32, s2 text character set utf32); +create index i on t1 (s1); +insert into t1 values (char(256 using utf32), char(256 using utf32)); +select hex(s1), hex(s2) from t1; +drop table t1; + + +# +# Bug#33073 Character sets: ordering fails with utf32 +# +SET collation_connection=utf32_general_ci; +CREATE TABLE t1 AS SELECT repeat('a',2) as s1 LIMIT 0; +SHOW CREATE TABLE t1; +INSERT INTO t1 VALUES ('ab'),('AE'),('ab'),('AE'); +SELECT * FROM t1 ORDER BY s1; +SET max_sort_length=4; +SELECT * FROM t1 ORDER BY s1; +DROP TABLE t1; +SET max_sort_length=DEFAULT; +SET NAMES latin1; + +--echo # +--echo # End of 5.5 tests +--echo # diff --git a/mysql-test/t/ctype_utf32_uca.test b/mysql-test/t/ctype_utf32_uca.test new file mode 100644 index 00000000000..9386cc9e65e --- /dev/null +++ b/mysql-test/t/ctype_utf32_uca.test @@ -0,0 +1,291 @@ +-- source include/have_ucs2.inc +-- source include/have_utf32.inc + +--disable_warnings +DROP TABLE IF EXISTS t1; +--enable_warnings + +--echo # +--echo # Start of 5.5 tests +--echo # + +set names utf8; +set collation_connection=utf32_unicode_ci; +select hex('a'), hex('a '); +-- source include/endspace.inc + +# +# Bug #6787 LIKE not working properly with _ and utf8 data +# +select 'c' like '\_' as want0; + +# +# Bug #5679 utf8_unicode_ci LIKE--trailing % doesn't equal zero characters +# +CREATE TABLE t ( + c char(20) NOT NULL +) ENGINE=MyISAM DEFAULT CHARACTER SET utf32 COLLATE=utf32_unicode_ci; +INSERT INTO t VALUES ('a'),('ab'),('aba'); +ALTER TABLE t ADD INDEX (c); +SELECT c FROM t WHERE c LIKE 'a%'; +DROP TABLE t; + + +create table t1 (c1 char(10) character set utf32 collate utf32_bin); + +# +# Basic Latin +# +insert into t1 values ('A'),('a'); +insert into t1 values ('B'),('b'); +insert into t1 values ('C'),('c'); +insert into t1 values ('D'),('d'); +insert into t1 values ('E'),('e'); +insert into t1 values ('F'),('f'); +insert into t1 values ('G'),('g'); +insert into t1 values ('H'),('h'); +insert into t1 values ('I'),('i'); +insert into t1 values ('J'),('j'); +insert into t1 values ('K'),('k'); +insert into t1 values ('L'),('l'); +insert into t1 values ('M'),('m'); +insert into t1 values ('N'),('n'); +insert into t1 values ('O'),('o'); +insert into t1 values ('P'),('p'); +insert into t1 values ('Q'),('q'); +insert into t1 values ('R'),('r'); +insert into t1 values ('S'),('s'); +insert into t1 values ('T'),('t'); +insert into t1 values ('U'),('u'); +insert into t1 values ('V'),('v'); +insert into t1 values ('W'),('w'); +insert into t1 values ('X'),('x'); +insert into t1 values ('Y'),('y'); +insert into t1 values ('Z'),('z'); + +# +# Latin1 suppliment +# +insert into t1 values (_ucs2 0x00e0),(_ucs2 0x00c0); +insert into t1 values (_ucs2 0x00e1),(_ucs2 0x00c1); +insert into t1 values (_ucs2 0x00e2),(_ucs2 0x00c2); +insert into t1 values (_ucs2 0x00e3),(_ucs2 0x00c3); +insert into t1 values (_ucs2 0x00e4),(_ucs2 0x00c4); +insert into t1 values (_ucs2 0x00e5),(_ucs2 0x00c5); +insert into t1 values (_ucs2 0x00e6),(_ucs2 0x00c6); +insert into t1 values (_ucs2 0x00e7),(_ucs2 0x00c7); +insert into t1 values (_ucs2 0x00e8),(_ucs2 0x00c8); +insert into t1 values (_ucs2 0x00e9),(_ucs2 0x00c9); +insert into t1 values (_ucs2 0x00ea),(_ucs2 0x00ca); +insert into t1 values (_ucs2 0x00eb),(_ucs2 0x00cb); +insert into t1 values (_ucs2 0x00ec),(_ucs2 0x00cc); +insert into t1 values (_ucs2 0x00ed),(_ucs2 0x00cd); +insert into t1 values (_ucs2 0x00ee),(_ucs2 0x00ce); +insert into t1 values (_ucs2 0x00ef),(_ucs2 0x00cf); + +insert into t1 values (_ucs2 0x00f0),(_ucs2 0x00d0); +insert into t1 values (_ucs2 0x00f1),(_ucs2 0x00d1); +insert into t1 values (_ucs2 0x00f2),(_ucs2 0x00d2); +insert into t1 values (_ucs2 0x00f3),(_ucs2 0x00d3); +insert into t1 values (_ucs2 0x00f4),(_ucs2 0x00d4); +insert into t1 values (_ucs2 0x00f5),(_ucs2 0x00d5); +insert into t1 values (_ucs2 0x00f6),(_ucs2 0x00d6); +insert into t1 values (_ucs2 0x00f7),(_ucs2 0x00d7); +insert into t1 values (_ucs2 0x00f8),(_ucs2 0x00d8); +insert into t1 values (_ucs2 0x00f9),(_ucs2 0x00d9); +insert into t1 values (_ucs2 0x00fa),(_ucs2 0x00da); +insert into t1 values (_ucs2 0x00fb),(_ucs2 0x00db); +insert into t1 values (_ucs2 0x00fc),(_ucs2 0x00dc); +insert into t1 values (_ucs2 0x00fd),(_ucs2 0x00dd); +insert into t1 values (_ucs2 0x00fe),(_ucs2 0x00de); +insert into t1 values (_ucs2 0x00ff),(_ucs2 0x00df); + +# +# Latin extended-A, 0100-017F +# +insert into t1 values (_ucs2 0x0100),(_ucs2 0x0101),(_ucs2 0x0102),(_ucs2 0x0103); +insert into t1 values (_ucs2 0x0104),(_ucs2 0x0105),(_ucs2 0x0106),(_ucs2 0x0107); +insert into t1 values (_ucs2 0x0108),(_ucs2 0x0109),(_ucs2 0x010a),(_ucs2 0x010b); +insert into t1 values (_ucs2 0x010c),(_ucs2 0x010d),(_ucs2 0x010e),(_ucs2 0x010f); +insert into t1 values (_ucs2 0x0110),(_ucs2 0x0111),(_ucs2 0x0112),(_ucs2 0x0113); +insert into t1 values (_ucs2 0x0114),(_ucs2 0x0115),(_ucs2 0x0116),(_ucs2 0x0117); +insert into t1 values (_ucs2 0x0118),(_ucs2 0x0119),(_ucs2 0x011a),(_ucs2 0x011b); +insert into t1 values (_ucs2 0x011c),(_ucs2 0x011d),(_ucs2 0x011e),(_ucs2 0x011f); +insert into t1 values (_ucs2 0x0120),(_ucs2 0x0121),(_ucs2 0x0122),(_ucs2 0x0123); +insert into t1 values (_ucs2 0x0124),(_ucs2 0x0125),(_ucs2 0x0126),(_ucs2 0x0127); +insert into t1 values (_ucs2 0x0128),(_ucs2 0x0129),(_ucs2 0x012a),(_ucs2 0x012b); +insert into t1 values (_ucs2 0x012c),(_ucs2 0x012d),(_ucs2 0x012e),(_ucs2 0x012f); +insert into t1 values (_ucs2 0x0130),(_ucs2 0x0131),(_ucs2 0x0132),(_ucs2 0x0133); +insert into t1 values (_ucs2 0x0134),(_ucs2 0x0135),(_ucs2 0x0136),(_ucs2 0x0137); +insert into t1 values (_ucs2 0x0138),(_ucs2 0x0139),(_ucs2 0x013a),(_ucs2 0x013b); +insert into t1 values (_ucs2 0x013c),(_ucs2 0x013d),(_ucs2 0x013e),(_ucs2 0x013f); +insert into t1 values (_ucs2 0x0140),(_ucs2 0x0141),(_ucs2 0x0142),(_ucs2 0x0143); +insert into t1 values (_ucs2 0x0144),(_ucs2 0x0145),(_ucs2 0x0146),(_ucs2 0x0147); +insert into t1 values (_ucs2 0x0148),(_ucs2 0x0149),(_ucs2 0x014a),(_ucs2 0x014b); +insert into t1 values (_ucs2 0x014c),(_ucs2 0x014d),(_ucs2 0x014e),(_ucs2 0x014f); +insert into t1 values (_ucs2 0x0150),(_ucs2 0x0151),(_ucs2 0x0152),(_ucs2 0x0153); +insert into t1 values (_ucs2 0x0154),(_ucs2 0x0155),(_ucs2 0x0156),(_ucs2 0x0157); +insert into t1 values (_ucs2 0x0158),(_ucs2 0x0159),(_ucs2 0x015a),(_ucs2 0x015b); +insert into t1 values (_ucs2 0x015c),(_ucs2 0x015d),(_ucs2 0x015e),(_ucs2 0x015f); +insert into t1 values (_ucs2 0x0160),(_ucs2 0x0161),(_ucs2 0x0162),(_ucs2 0x0163); +insert into t1 values (_ucs2 0x0164),(_ucs2 0x0165),(_ucs2 0x0166),(_ucs2 0x0167); +insert into t1 values (_ucs2 0x0168),(_ucs2 0x0169),(_ucs2 0x016a),(_ucs2 0x016b); +insert into t1 values (_ucs2 0x016c),(_ucs2 0x016d),(_ucs2 0x016e),(_ucs2 0x016f); +insert into t1 values (_ucs2 0x0170),(_ucs2 0x0171),(_ucs2 0x0172),(_ucs2 0x0173); +insert into t1 values (_ucs2 0x0174),(_ucs2 0x0175),(_ucs2 0x0176),(_ucs2 0x0177); +insert into t1 values (_ucs2 0x0178),(_ucs2 0x0179),(_ucs2 0x017a),(_ucs2 0x017b); +insert into t1 values (_ucs2 0x017c),(_ucs2 0x017d),(_ucs2 0x017e),(_ucs2 0x017f); + +# +# Latin extended-B, 0180-024F +# +insert into t1 values (_ucs2 0x0180),(_ucs2 0x0181),(_ucs2 0x0182),(_ucs2 0x0183); +insert into t1 values (_ucs2 0x0184),(_ucs2 0x0185),(_ucs2 0x0186),(_ucs2 0x0187); +insert into t1 values (_ucs2 0x0188),(_ucs2 0x0189),(_ucs2 0x018a),(_ucs2 0x018b); +insert into t1 values (_ucs2 0x018c),(_ucs2 0x018d),(_ucs2 0x018e),(_ucs2 0x018f); +insert into t1 values (_ucs2 0x0190),(_ucs2 0x0191),(_ucs2 0x0192),(_ucs2 0x0193); +insert into t1 values (_ucs2 0x0194),(_ucs2 0x0195),(_ucs2 0x0196),(_ucs2 0x0197); +insert into t1 values (_ucs2 0x0198),(_ucs2 0x0199),(_ucs2 0x019a),(_ucs2 0x019b); +insert into t1 values (_ucs2 0x019c),(_ucs2 0x019d),(_ucs2 0x019e),(_ucs2 0x019f); +insert into t1 values (_ucs2 0x01a0),(_ucs2 0x01a1),(_ucs2 0x01a2),(_ucs2 0x01a3); +insert into t1 values (_ucs2 0x01a4),(_ucs2 0x01a5),(_ucs2 0x01a6),(_ucs2 0x01a7); +insert into t1 values (_ucs2 0x01a8),(_ucs2 0x01a9),(_ucs2 0x01aa),(_ucs2 0x01ab); +insert into t1 values (_ucs2 0x01ac),(_ucs2 0x01ad),(_ucs2 0x01ae),(_ucs2 0x01af); +insert into t1 values (_ucs2 0x01b0),(_ucs2 0x01b1),(_ucs2 0x01b2),(_ucs2 0x01b3); +insert into t1 values (_ucs2 0x01b4),(_ucs2 0x01b5),(_ucs2 0x01b6),(_ucs2 0x01b7); +insert into t1 values (_ucs2 0x01b8),(_ucs2 0x01b9),(_ucs2 0x01ba),(_ucs2 0x01bb); +insert into t1 values (_ucs2 0x01bc),(_ucs2 0x01bd),(_ucs2 0x01be),(_ucs2 0x01bf); +insert into t1 values (_ucs2 0x01c0),(_ucs2 0x01c1),(_ucs2 0x01c2),(_ucs2 0x01c3); +insert into t1 values (_ucs2 0x01c4),(_ucs2 0x01c5),(_ucs2 0x01c6),(_ucs2 0x01c7); +insert into t1 values (_ucs2 0x01c8),(_ucs2 0x01c9),(_ucs2 0x01ca),(_ucs2 0x01cb); +insert into t1 values (_ucs2 0x01cc),(_ucs2 0x01cd),(_ucs2 0x01ce),(_ucs2 0x01cf); +insert into t1 values (_ucs2 0x01d0),(_ucs2 0x01d1),(_ucs2 0x01d2),(_ucs2 0x01d3); +insert into t1 values (_ucs2 0x01d4),(_ucs2 0x01d5),(_ucs2 0x01d6),(_ucs2 0x01d7); +insert into t1 values (_ucs2 0x01d8),(_ucs2 0x01d9),(_ucs2 0x01da),(_ucs2 0x01db); +insert into t1 values (_ucs2 0x01dc),(_ucs2 0x01dd),(_ucs2 0x01de),(_ucs2 0x01df); +insert into t1 values (_ucs2 0x01e0),(_ucs2 0x01e1),(_ucs2 0x01e2),(_ucs2 0x01e3); +insert into t1 values (_ucs2 0x01e4),(_ucs2 0x01e5),(_ucs2 0x01e6),(_ucs2 0x01e7); +insert into t1 values (_ucs2 0x01e8),(_ucs2 0x01e9),(_ucs2 0x01ea),(_ucs2 0x01eb); +insert into t1 values (_ucs2 0x01ec),(_ucs2 0x01ed),(_ucs2 0x01ee),(_ucs2 0x01ef); +insert into t1 values (_ucs2 0x01f0),(_ucs2 0x01f1),(_ucs2 0x01f2),(_ucs2 0x01f3); +insert into t1 values (_ucs2 0x01f4),(_ucs2 0x01f5),(_ucs2 0x01f6),(_ucs2 0x01f7); +insert into t1 values (_ucs2 0x01f8),(_ucs2 0x01f9),(_ucs2 0x01fa),(_ucs2 0x01fb); +insert into t1 values (_ucs2 0x01fc),(_ucs2 0x01fd),(_ucs2 0x01fe),(_ucs2 0x01ff); + + +insert into t1 values ('AA'),('Aa'),('aa'),('aA'); +insert into t1 values ('CH'),('Ch'),('ch'),('cH'); +insert into t1 values ('DZ'),('Dz'),('dz'),('dZ'); +insert into t1 values ('IJ'),('Ij'),('ij'),('iJ'); +insert into t1 values ('LJ'),('Lj'),('lj'),('lJ'); +insert into t1 values ('LL'),('Ll'),('ll'),('lL'); +insert into t1 values ('NJ'),('Nj'),('nj'),('nJ'); +insert into t1 values ('OE'),('Oe'),('oe'),('oE'); +insert into t1 values ('SS'),('Ss'),('ss'),('sS'); +insert into t1 values ('RR'),('Rr'),('rr'),('rR'); + +select group_concat(c1 order by binary c1 separator '') from t1 group by c1 collate utf32_unicode_ci; +select group_concat(c1 order by binary c1 separator '') from t1 group by c1 collate utf32_icelandic_ci; +select group_concat(c1 order by binary c1 separator '') from t1 group by c1 collate utf32_latvian_ci; +select group_concat(c1 order by binary c1 separator '') from t1 group by c1 collate utf32_romanian_ci; +select group_concat(c1 order by binary c1 separator '') from t1 group by c1 collate utf32_slovenian_ci; +select group_concat(c1 order by binary c1 separator '') from t1 group by c1 collate utf32_polish_ci; +select group_concat(c1 order by binary c1 separator '') from t1 group by c1 collate utf32_estonian_ci; +select group_concat(c1 order by binary c1 separator '') from t1 group by c1 collate utf32_spanish_ci; +select group_concat(c1 order by binary c1 separator '') from t1 group by c1 collate utf32_swedish_ci; +select group_concat(c1 order by binary c1 separator '') from t1 group by c1 collate utf32_turkish_ci; +select group_concat(c1 order by binary c1 separator '') from t1 group by c1 collate utf32_czech_ci; +select group_concat(c1 order by binary c1 separator '') from t1 group by c1 collate utf32_danish_ci; +select group_concat(c1 order by binary c1 separator '') from t1 group by c1 collate utf32_lithuanian_ci; +select group_concat(c1 order by binary c1 separator '') from t1 group by c1 collate utf32_slovak_ci; +select group_concat(c1 order by binary c1 separator '') from t1 group by c1 collate utf32_spanish2_ci; +select group_concat(c1 order by binary c1 separator '') from t1 group by c1 collate utf32_roman_ci; +select group_concat(c1 order by binary c1 separator '') from t1 group by c1 collate utf32_esperanto_ci; +select group_concat(c1 order by binary c1 separator '') from t1 group by c1 collate utf32_hungarian_ci; + +drop table t1; + +# +# Bug#5324 +# +SET NAMES utf8; +#test1 +CREATE TABLE t1 (c varchar(200) CHARACTER SET utf32 COLLATE utf32_general_ci NOT NULL, INDEX (c)); +INSERT INTO t1 VALUES (_ucs2 0x039C03C903B403B11F770308); +#Check one row +SELECT * FROM t1 WHERE c LIKE _utf32 0x0000039C00000025 COLLATE utf32_general_ci; +INSERT INTO t1 VALUES (CONVERT(_ucs2 0x039C03C903B4 USING utf8)); +#Check two rows +SELECT * FROM t1 WHERE c LIKE _utf32 0x0000039C00000025 +COLLATE utf32_general_ci ORDER BY c; +DROP TABLE t1; +#test2 +CREATE TABLE t1 (c varchar(200) CHARACTER SET utf32 COLLATE utf32_unicode_ci NOT NULL, INDEX (c)); +INSERT INTO t1 VALUES (_ucs2 0x039C03C903B403B11F770308); +#Check one row +SELECT * FROM t1 WHERE c LIKE _utf32 0x0000039C00000025 COLLATE utf32_unicode_ci; +INSERT INTO t1 VALUES (_ucs2 0x039C03C903B4); +#Check two rows +SELECT * FROM t1 WHERE c LIKE _utf32 0x0000039C00000025 +COLLATE utf32_unicode_ci ORDER BY c; +DROP TABLE t1; +#test 3 +CREATE TABLE t1 (c varchar(200) CHARACTER SET utf32 COLLATE utf32_unicode_ci NOT NULL, INDEX (c)); +INSERT INTO t1 VALUES (_ucs2 0x039C03C903B403B11F770308); +#Check one row row +SELECT * FROM t1 WHERE c LIKE CONVERT(_ucs2 0x039C0025 USING utf32) COLLATE utf32_unicode_ci; +INSERT INTO t1 VALUES (CONVERT(_ucs2 0x039C03C903B4 USING utf8)); +#Check two rows +SELECT * FROM t1 WHERE c LIKE CONVERT(_ucs2 0x039C0025 USING utf32) +COLLATE utf32_unicode_ci ORDER BY c; +DROP TABLE t1; + + +SET NAMES utf8; +SET @test_character_set='utf32'; +SET @test_collation='utf32_swedish_ci'; +-- source include/ctype_common.inc + + +SET collation_connection='utf32_unicode_ci'; +-- source include/ctype_filesort.inc +-- source include/ctype_like_escape.inc + +--echo End of 4.1 tests + +# +# Check UPPER/LOWER changing length +# +# Result shorter than argument +CREATE TABLE t1 (id int, a varchar(30) character set utf32); +INSERT INTO t1 VALUES (1, _ucs2 0x01310069), (2, _ucs2 0x01310131); +INSERT INTO t1 VALUES (3, _ucs2 0x00690069), (4, _ucs2 0x01300049); +INSERT INTO t1 VALUES (5, _ucs2 0x01300130), (6, _ucs2 0x00490049); +SELECT a, length(a) la, @l:=lower(a) l, length(@l) ll, @u:=upper(a) u, length(@u) lu +FROM t1 ORDER BY id; +ALTER TABLE t1 MODIFY a VARCHAR(30) character set utf32 collate utf32_turkish_ci; +SELECT a, length(a) la, @l:=lower(a) l, length(@l) ll, @u:=upper(a) u, length(@u) lu +FROM t1 ORDER BY id; +DROP TABLE t1; + +# +# Bug #27079 Crash while grouping empty ucs2 strings +# +CREATE TABLE t1 ( + c1 text character set utf32 collate utf32_polish_ci NOT NULL +) ENGINE=MyISAM; +insert into t1 values (''),('a'); +SELECT COUNT(*), c1 FROM t1 GROUP BY c1; +DROP TABLE IF EXISTS t1; + + +# +# Test basic regex functionality +# +set collation_connection=utf32_unicode_ci; +--source include/ctype_regex.inc + + +--echo # +--echo # End of 5.5 tests +--echo # diff --git a/mysql-test/t/ctype_utf8.test b/mysql-test/t/ctype_utf8.test index 1a560554d3d..201e96b0b09 100644 --- a/mysql-test/t/ctype_utf8.test +++ b/mysql-test/t/ctype_utf8.test @@ -1440,6 +1440,17 @@ DROP TABLE t1; --echo Start of 5.4 tests + +# +# WL#1213: utf8mb3 is an alias for utf8 +# +SET NAMES utf8mb3; +SHOW VARIABLES LIKE 'character_set_results%'; +CREATE TABLE t1 (a CHAR CHARACTER SET utf8mb3 COLLATE utf8mb3_bin); +SHOW CREATE TABLE t1; +DROP TABLE t1; +SELECT _utf8mb3'test'; + # # Bug#26180: Can't add columns to tables created with utf8 text indexes # diff --git a/mysql-test/t/ctype_utf8mb4.test b/mysql-test/t/ctype_utf8mb4.test new file mode 100644 index 00000000000..2c166364b1a --- /dev/null +++ b/mysql-test/t/ctype_utf8mb4.test @@ -0,0 +1,1670 @@ +# +# Tests with the utf8mb4 character set +# + +--source include/have_innodb.inc + +--disable_warnings +drop table if exists t1,t2; +--enable_warnings + +--echo # +--echo # Start of 5.5 tests +--echo # + +set names utf8mb4; + +select left(_utf8mb4 0xD0B0D0B1D0B2,1); +select right(_utf8mb4 0xD0B0D0B2D0B2,1); + +select locate('he','hello'); +select locate('he','hello',2); +select locate('lo','hello',2); +select locate('HE','hello'); +select locate('HE','hello',2); +select locate('LO','hello',2); +select locate('HE','hello' collate utf8mb4_bin); +select locate('HE','hello' collate utf8mb4_bin,2); +select locate('LO','hello' collate utf8mb4_bin,2); + +select locate(_utf8mb4 0xD0B1, _utf8mb4 0xD0B0D0B1D0B2); +select locate(_utf8mb4 0xD091, _utf8mb4 0xD0B0D0B1D0B2); +select locate(_utf8mb4 0xD0B1, _utf8mb4 0xD0B0D091D0B2); +select locate(_utf8mb4 0xD091, _utf8mb4 0xD0B0D0B1D0B2 collate utf8mb4_bin); +select locate(_utf8mb4 0xD0B1, _utf8mb4 0xD0B0D091D0B2 collate utf8mb4_bin); + +select length(_utf8mb4 0xD0B1), bit_length(_utf8mb4 0xD0B1), char_length(_utf8mb4 0xD0B1); + +select 'a' like 'a'; +select 'A' like 'a'; +select 'A' like 'a' collate utf8mb4_bin; +select _utf8mb4 0xD0B0D0B1D0B2 like concat(_utf8mb4'%',_utf8mb4 0xD0B1,_utf8mb4 '%'); + +# Bug #6040: can't retrieve records with umlaut +# characters in case insensitive manner. +# Case insensitive search LIKE comparison +# was broken for multibyte characters: +select convert(_latin1'Günter André' using utf8mb4) like CONVERT(_latin1'GÜNTER%' USING utf8mb4); +select CONVERT(_koi8r'×ÁÓÑ' USING utf8mb4) LIKE CONVERT(_koi8r'÷áóñ' USING utf8mb4); +select CONVERT(_koi8r'÷áóñ' USING utf8mb4) LIKE CONVERT(_koi8r'×ÁÓÑ' USING utf8mb4); + +# +# Check the following: +# "a" == "a " +# "a\0" < "a" +# "a\0" < "a " + +SELECT 'a' = 'a '; +SELECT 'a\0' < 'a'; +SELECT 'a\0' < 'a '; +SELECT 'a\t' < 'a'; +SELECT 'a\t' < 'a '; + +# +# The same for binary collation +# +SELECT 'a' = 'a ' collate utf8mb4_bin; +SELECT 'a\0' < 'a' collate utf8mb4_bin; +SELECT 'a\0' < 'a ' collate utf8mb4_bin; +SELECT 'a\t' < 'a' collate utf8mb4_bin; +SELECT 'a\t' < 'a ' collate utf8mb4_bin; + +CREATE TABLE t1 (a char(10) character set utf8mb4 not null); +INSERT INTO t1 VALUES ('a'),('a\0'),('a\t'),('a '); +SELECT hex(a),STRCMP(a,'a'), STRCMP(a,'a ') FROM t1; +DROP TABLE t1; + +# +# Fix this, it should return 1: +# +#select _utf8mb4 0xD0B0D0B1D0B2 like concat(_utf8mb4'%',_utf8mb4 0xD091,_utf8mb4 '%'); +# + +# +# Bug 2367: INSERT() behaviour is different for different charsets. +# +select insert('txs',2,1,'hi'),insert('is ',4,0,'a'),insert('txxxxt',2,4,'es'); +select insert("aa",100,1,"b"),insert("aa",1,3,"b"); + +# +# LELF() didn't work well with utf8mb4 in some cases too. +# +select char_length(left(@a:='теÑÑ‚',5)), length(@a), @a; + + +# +# CREATE ... SELECT +# +create table t1 select date_format("2004-01-19 10:10:10", "%Y-%m-%d"); +show create table t1; +select * from t1; +drop table t1; + +# +# Bug#22646 LC_TIME_NAMES: Assignment to non-UTF8 target fails +# +set names utf8mb4; +set LC_TIME_NAMES='fr_FR'; +create table t1 (s1 char(20) character set latin1); +insert into t1 values (date_format('2004-02-02','%M')); +select hex(s1) from t1; +drop table t1; +create table t1 (s1 char(20) character set koi8r); +set LC_TIME_NAMES='ru_RU'; +insert into t1 values (date_format('2004-02-02','%M')); +insert into t1 values (date_format('2004-02-02','%b')); +insert into t1 values (date_format('2004-02-02','%W')); +insert into t1 values (date_format('2004-02-02','%a')); +select hex(s1), s1 from t1; +drop table t1; +set LC_TIME_NAMES='en_US'; + + +# +# Bug #2366 Wrong utf8mb4 behaviour when data is truncated +# +set names koi8r; +create table t1 (s1 char(1) character set utf8mb4); +insert into t1 values (_koi8r'ÁÂ'); +select s1,hex(s1),char_length(s1),octet_length(s1) from t1; +drop table t1; + +create table t1 (s1 tinytext character set utf8mb4); +insert into t1 select repeat('a',300); +insert into t1 select repeat('Ñ',300); +insert into t1 select repeat('aÑ',300); +insert into t1 select repeat('Ña',300); +insert into t1 select repeat('ÑÑ',300); +select hex(s1) from t1; +select length(s1),char_length(s1) from t1; +drop table t1; + +create table t1 (s1 text character set utf8mb4); +insert into t1 select repeat('a',66000); +insert into t1 select repeat('Ñ',66000); +insert into t1 select repeat('aÑ',66000); +insert into t1 select repeat('Ña',66000); +insert into t1 select repeat('ÑÑ',66000); +select length(s1),char_length(s1) from t1; +drop table t1; + +# +# Bug #2368 Multibyte charsets do not check that incoming data is well-formed +# +create table t1 (s1 char(10) character set utf8mb4); +insert into t1 values (0x41FF); +select hex(s1) from t1; +drop table t1; + +create table t1 (s1 varchar(10) character set utf8mb4); +insert into t1 values (0x41FF); +select hex(s1) from t1; +drop table t1; + +create table t1 (s1 text character set utf8mb4); +insert into t1 values (0x41FF); +select hex(s1) from t1; +drop table t1; + +# +# Bug 2699 +# UTF8 breaks primary keys for cols > 333 characters +# +--error 1071 +create table t1 (a text character set utf8mb4, primary key(a(371))); + + +# +# Bug 2959 +# UTF8 charset breaks joins with mixed column/string constant +# +CREATE TABLE t1 ( a varchar(10) ) CHARACTER SET utf8mb4; +INSERT INTO t1 VALUES ( 'test' ); +SELECT a.a, b.a FROM t1 a, t1 b WHERE a.a = b.a; +SELECT a.a, b.a FROM t1 a, t1 b WHERE a.a = 'test' and b.a = 'test'; +SELECT a.a, b.a FROM t1 a, t1 b WHERE a.a = b.a and a.a = 'test'; +DROP TABLE t1; + +create table t1 (a char(255) character set utf8mb4); +insert into t1 values('b'),('b'); +select * from t1 where a = 'b'; +select * from t1 where a = 'b' and a = 'b'; +select * from t1 where a = 'b' and a != 'b'; +drop table t1; + +# +# Testing regexp +# +set collation_connection=utf8mb4_general_ci; +--source include/ctype_regex.inc +set names utf8mb4; + +# +# Bug #3928 regexp [[:>:]] and UTF-8 +# +set names utf8mb4; + +# This should return TRUE +select 'ваÑÑ' rlike '[[:<:]]ваÑÑ[[:>:]]'; +select 'ваÑÑ ' rlike '[[:<:]]ваÑÑ[[:>:]]'; +select ' ваÑÑ' rlike '[[:<:]]ваÑÑ[[:>:]]'; +select ' ваÑÑ ' rlike '[[:<:]]ваÑÑ[[:>:]]'; + +# This should return FALSE +select 'ваÑÑz' rlike '[[:<:]]ваÑÑ[[:>:]]'; +select 'zваÑÑ' rlike '[[:<:]]ваÑÑ[[:>:]]'; +select 'zваÑÑz' rlike '[[:<:]]ваÑÑ[[:>:]]'; + +# +# Bug #4555 +# ALTER TABLE crashes mysqld with enum column collated utf8mb4_unicode_ci +# +CREATE TABLE t1 (a enum ('Y', 'N') DEFAULT 'N' COLLATE utf8mb4_unicode_ci); +ALTER TABLE t1 ADD COLUMN b CHAR(20); +DROP TABLE t1; + +# Customer Support Center issue # 3299 +# ENUM and SET multibyte fields computed their length wronly +# when converted into a char field +set names utf8mb4; +create table t1 (a enum('aaaa','проба') character set utf8mb4); +show create table t1; +insert into t1 values ('проба'); +select * from t1; +create table t2 select ifnull(a,a) from t1; +show create table t2; +select * from t2; +drop table t1; +drop table t2; + +# +# Bug 4521: unique key prefix interacts poorly with utf8mb4 +# MYISAM: keys with prefix compression, case insensitive collation. +# +create table t1 (c varchar(30) character set utf8mb4, unique(c(10))); +insert into t1 values ('1'),('2'),('3'),('x'),('y'),('z'); +insert into t1 values ('aaaaaaaaaa'); +--error ER_DUP_ENTRY +insert into t1 values ('aaaaaaaaaaa'); +--error ER_DUP_ENTRY +insert into t1 values ('aaaaaaaaaaaa'); +insert into t1 values (repeat('b',20)); +select c c1 from t1 where c='1'; +select c c2 from t1 where c='2'; +select c c3 from t1 where c='3'; +select c cx from t1 where c='x'; +select c cy from t1 where c='y'; +select c cz from t1 where c='z'; +select c ca10 from t1 where c='aaaaaaaaaa'; +select c cb20 from t1 where c=repeat('b',20); +drop table t1; + +# +# Bug 4521: unique key prefix interacts poorly with utf8mb4 +# InnoDB: keys with prefix compression, case insensitive collation. +# +--disable_warnings +create table t1 (c varchar(30) character set utf8mb4, unique(c(10))) engine=innodb; +--enable_warnings +insert into t1 values ('1'),('2'),('3'),('x'),('y'),('z'); +insert into t1 values ('aaaaaaaaaa'); +--error ER_DUP_ENTRY +insert into t1 values ('aaaaaaaaaaa'); +--error ER_DUP_ENTRY +insert into t1 values ('aaaaaaaaaaaa'); +insert into t1 values (repeat('b',20)); +select c c1 from t1 where c='1'; +select c c2 from t1 where c='2'; +select c c3 from t1 where c='3'; +select c cx from t1 where c='x'; +select c cy from t1 where c='y'; +select c cz from t1 where c='z'; +select c ca10 from t1 where c='aaaaaaaaaa'; +select c cb20 from t1 where c=repeat('b',20); +drop table t1; +# +# Bug 4521: unique key prefix interacts poorly with utf8mb4 +# MYISAM: fixed length keys, case insensitive collation +# +create table t1 (c char(3) character set utf8mb4, unique (c(2))); +insert into t1 values ('1'),('2'),('3'),('4'),('x'),('y'),('z'); +insert into t1 values ('a'); +insert into t1 values ('aa'); +--error ER_DUP_ENTRY +insert into t1 values ('aaa'); +insert into t1 values ('b'); +insert into t1 values ('bb'); +--error ER_DUP_ENTRY +insert into t1 values ('bbb'); +insert into t1 values ('а'); +insert into t1 values ('аа'); +--error ER_DUP_ENTRY +insert into t1 values ('ааа'); +insert into t1 values ('б'); +insert into t1 values ('бб'); +--error ER_DUP_ENTRY +insert into t1 values ('ббб'); +insert into t1 values ('ꪪ'); +insert into t1 values ('ꪪꪪ'); +--error ER_DUP_ENTRY +insert into t1 values ('ꪪꪪꪪ'); +drop table t1; +# +# Bug 4521: unique key prefix interacts poorly with utf8mb4 +# InnoDB: fixed length keys, case insensitive collation +# +--disable_warnings +create table t1 (c char(3) character set utf8mb4, unique (c(2))) engine=innodb; +--enable_warnings +insert into t1 values ('1'),('2'),('3'),('4'),('x'),('y'),('z'); +insert into t1 values ('a'); +insert into t1 values ('aa'); +--error ER_DUP_ENTRY +insert into t1 values ('aaa'); +insert into t1 values ('b'); +insert into t1 values ('bb'); +--error ER_DUP_ENTRY +insert into t1 values ('bbb'); +insert into t1 values ('а'); +insert into t1 values ('аа'); +--error ER_DUP_ENTRY +insert into t1 values ('ааа'); +insert into t1 values ('б'); +insert into t1 values ('бб'); +--error ER_DUP_ENTRY +insert into t1 values ('ббб'); +insert into t1 values ('ꪪ'); +insert into t1 values ('ꪪꪪ'); +--error ER_DUP_ENTRY +insert into t1 values ('ꪪꪪꪪ'); +drop table t1; +# +# Bug 4531: unique key prefix interacts poorly with utf8mb4 +# Check HEAP+HASH, case insensitive collation +# +create table t1 ( +c char(10) character set utf8mb4, +unique key a using hash (c(1)) +) engine=heap; +show create table t1; +insert into t1 values ('a'),('b'),('c'),('d'),('e'),('f'); +--error ER_DUP_ENTRY +insert into t1 values ('aa'); +--error ER_DUP_ENTRY +insert into t1 values ('aaa'); +insert into t1 values ('б'); +--error ER_DUP_ENTRY +insert into t1 values ('бб'); +--error ER_DUP_ENTRY +insert into t1 values ('ббб'); +select c as c_all from t1 order by c; +select c as c_a from t1 where c='a'; +select c as c_a from t1 where c='б'; +drop table t1; + +# +# Bug 4531: unique key prefix interacts poorly with utf8mb4 +# Check HEAP+BTREE, case insensitive collation +# +create table t1 ( +c char(10) character set utf8mb4, +unique key a using btree (c(1)) +) engine=heap; +show create table t1; +insert into t1 values ('a'),('b'),('c'),('d'),('e'),('f'); +--error ER_DUP_ENTRY +insert into t1 values ('aa'); +--error ER_DUP_ENTRY +insert into t1 values ('aaa'); +insert into t1 values ('б'); +--error ER_DUP_ENTRY +insert into t1 values ('бб'); +--error ER_DUP_ENTRY +insert into t1 values ('ббб'); +select c as c_all from t1 order by c; +select c as c_a from t1 where c='a'; +select c as c_a from t1 where c='б'; +drop table t1; + +# +# Bug 4531: unique key prefix interacts poorly with utf8mb4 +# Check BDB, case insensitive collation +# +--disable_warnings +create table t1 ( +c char(10) character set utf8mb4, +unique key a (c(1)) +) engine=innodb; +--enable_warnings +insert into t1 values ('a'),('b'),('c'),('d'),('e'),('f'); +--error ER_DUP_ENTRY +insert into t1 values ('aa'); +--error ER_DUP_ENTRY +insert into t1 values ('aaa'); +insert into t1 values ('б'); +--error ER_DUP_ENTRY +insert into t1 values ('бб'); +--error ER_DUP_ENTRY +insert into t1 values ('ббб'); +select c as c_all from t1 order by c; +select c as c_a from t1 where c='a'; +select c as c_a from t1 where c='б'; +drop table t1; + +# +# Bug 4521: unique key prefix interacts poorly with utf8mb4 +# MYISAM: keys with prefix compression, binary collation. +# +create table t1 (c varchar(30) character set utf8mb4 collate utf8mb4_bin, unique(c(10))); +insert into t1 values ('1'),('2'),('3'),('x'),('y'),('z'); +insert into t1 values ('aaaaaaaaaa'); +--error ER_DUP_ENTRY +insert into t1 values ('aaaaaaaaaaa'); +--error ER_DUP_ENTRY +insert into t1 values ('aaaaaaaaaaaa'); +insert into t1 values (repeat('b',20)); +select c c1 from t1 where c='1'; +select c c2 from t1 where c='2'; +select c c3 from t1 where c='3'; +select c cx from t1 where c='x'; +select c cy from t1 where c='y'; +select c cz from t1 where c='z'; +select c ca10 from t1 where c='aaaaaaaaaa'; +select c cb20 from t1 where c=repeat('b',20); +drop table t1; + +# +# Bug 4521: unique key prefix interacts poorly with utf8mb4 +# MYISAM: fixed length keys, binary collation +# +create table t1 (c char(3) character set utf8mb4 collate utf8mb4_bin, unique (c(2))); +insert into t1 values ('1'),('2'),('3'),('4'),('x'),('y'),('z'); +insert into t1 values ('a'); +insert into t1 values ('aa'); +--error ER_DUP_ENTRY +insert into t1 values ('aaa'); +insert into t1 values ('b'); +insert into t1 values ('bb'); +--error ER_DUP_ENTRY +insert into t1 values ('bbb'); +insert into t1 values ('а'); +insert into t1 values ('аа'); +--error ER_DUP_ENTRY +insert into t1 values ('ааа'); +insert into t1 values ('б'); +insert into t1 values ('бб'); +--error ER_DUP_ENTRY +insert into t1 values ('ббб'); +insert into t1 values ('ꪪ'); +insert into t1 values ('ꪪꪪ'); +--error ER_DUP_ENTRY +insert into t1 values ('ꪪꪪꪪ'); +drop table t1; + +# +# Bug 4531: unique key prefix interacts poorly with utf8mb4 +# Check HEAP+HASH, binary collation +# +create table t1 ( +c char(10) character set utf8mb4 collate utf8mb4_bin, +unique key a using hash (c(1)) +) engine=heap; +show create table t1; +insert into t1 values ('a'),('b'),('c'),('d'),('e'),('f'); +--error ER_DUP_ENTRY +insert into t1 values ('aa'); +--error ER_DUP_ENTRY +insert into t1 values ('aaa'); +insert into t1 values ('б'); +--error ER_DUP_ENTRY +insert into t1 values ('бб'); +--error ER_DUP_ENTRY +insert into t1 values ('ббб'); +select c as c_all from t1 order by c; +select c as c_a from t1 where c='a'; +select c as c_a from t1 where c='б'; +drop table t1; + +# +# Bug 4531: unique key prefix interacts poorly with utf8mb4 +# Check HEAP+BTREE, binary collation +# +create table t1 ( +c char(10) character set utf8mb4 collate utf8mb4_bin, +unique key a using btree (c(1)) +) engine=heap; +show create table t1; +insert into t1 values ('a'),('b'),('c'),('d'),('e'),('f'); +--error ER_DUP_ENTRY +insert into t1 values ('aa'); +--error ER_DUP_ENTRY +insert into t1 values ('aaa'); +insert into t1 values ('б'); +--error ER_DUP_ENTRY +insert into t1 values ('бб'); +--error ER_DUP_ENTRY +insert into t1 values ('ббб'); +select c as c_all from t1 order by c; +select c as c_a from t1 where c='a'; +select c as c_a from t1 where c='б'; +drop table t1; + +# +# Bug 4531: unique key prefix interacts poorly with utf8mb4 +# Check BDB, binary collation +# +--disable_warnings +create table t1 ( +c char(10) character set utf8mb4 collate utf8mb4_bin, +unique key a (c(1)) +) engine=innodb; +--enable_warnings +insert into t1 values ('a'),('b'),('c'),('d'),('e'),('f'); +--error ER_DUP_ENTRY +insert into t1 values ('aa'); +--error ER_DUP_ENTRY +insert into t1 values ('aaa'); +insert into t1 values ('б'); +--error ER_DUP_ENTRY +insert into t1 values ('бб'); +--error ER_DUP_ENTRY +insert into t1 values ('ббб'); +select c as c_all from t1 order by c; +select c as c_a from t1 where c='a'; +select c as c_a from t1 where c='б'; +drop table t1; + + +# Bug#4594: column index make = failed for gbk, but like works +# Check MYISAM +# +create table t1 ( + str varchar(255) character set utf8mb4 not null, + key str (str(2)) +) engine=myisam; +INSERT INTO t1 VALUES ('str'); +INSERT INTO t1 VALUES ('str2'); +select * from t1 where str='str'; +drop table t1; + +# Bug#4594: column index make = failed for gbk, but like works +# Check InnoDB +# +--disable_warnings +create table t1 ( + str varchar(255) character set utf8mb4 not null, + key str (str(2)) +) engine=innodb; +--enable_warnings +INSERT INTO t1 VALUES ('str'); +INSERT INTO t1 VALUES ('str2'); +select * from t1 where str='str'; +drop table t1; + +# the same for HEAP+BTREE +# + +create table t1 ( + str varchar(255) character set utf8mb4 not null, + key str using btree (str(2)) +) engine=heap; +INSERT INTO t1 VALUES ('str'); +INSERT INTO t1 VALUES ('str2'); +select * from t1 where str='str'; +drop table t1; + +# the same for HEAP+HASH +# + +create table t1 ( + str varchar(255) character set utf8mb4 not null, + key str using hash (str(2)) +) engine=heap; +INSERT INTO t1 VALUES ('str'); +INSERT INTO t1 VALUES ('str2'); +select * from t1 where str='str'; +drop table t1; + +# the same for BDB +# + +--disable_warnings +create table t1 ( + str varchar(255) character set utf8mb4 not null, + key str (str(2)) +) engine=innodb; +--enable_warnings +INSERT INTO t1 VALUES ('str'); +INSERT INTO t1 VALUES ('str2'); +select * from t1 where str='str'; +drop table t1; + +# +# Bug #5397: Crash with varchar binary and LIKE +# +CREATE TABLE t1 (a varchar(32) BINARY) CHARACTER SET utf8mb4; +INSERT INTO t1 VALUES ('test'); +SELECT a FROM t1 WHERE a LIKE '%te'; +DROP TABLE t1; + +# +# Bug #5723: length(<varchar utf8mb4 field>) returns varying results +# +--disable_warnings +SET NAMES utf8mb4; +--disable_warnings +CREATE TABLE t1 ( + subject varchar(255) character set utf8mb4 collate utf8mb4_unicode_ci, + p varchar(15) character set utf8mb4 +) ENGINE=InnoDB DEFAULT CHARSET=latin1; +--enable_warnings +INSERT INTO t1 VALUES ('è°·å·ä¿ŠäºŒã¨ç”³ã—ã¾ã™ãŒã€ã‚¤ãƒ³ã‚¿ãƒ¼ãƒãƒƒãƒˆäºˆç´„ã®ä¼šå“¡ç™»éŒ²ã‚’ã—ã¾ã—ãŸã¨ã“ã‚ã€ãƒ¡ãƒ¼ãƒ«ã‚¢ãƒ‰ãƒ¬ã‚¹ã‚’é–“é•ãˆã¦ã—ã¾ã„会員IDãŒå—ã‘å–ã‚‹ã“ã¨ãŒå‡ºæ¥ã¾ã›ã‚“ã§ã—ãŸã€‚é–“é•ãˆã‚¢ãƒ‰ãƒ¬ã‚¹ã¯tani-shun@n.vodafone.ne.jpを書ãè¾¼ã¿ã¾ã—ãŸã€‚ã©ã†ã™ã‚Œã°ã‚ˆã„ã§ã™ã‹ï¼Ÿ ãã®ä»–ã€ä½æ‰€ç‰ã¯é–“é•ãˆã‚ã‚Šã¾ã›ã‚“。連絡ãã ã•ã„。よã‚ã—ããŠé¡˜ã„ã—ã¾ã™ã€‚m(__)m','040312-000057'); +INSERT INTO t1 VALUES ('aaa','bbb'); +SELECT length(subject) FROM t1; +SELECT length(subject) FROM t1 ORDER BY 1; +DROP TABLE t1; + +# +# Bug #5832 SELECT doesn't return records in some cases +# +CREATE TABLE t1 ( + id int unsigned NOT NULL auto_increment, + list_id smallint unsigned NOT NULL, + term TEXT NOT NULL, + PRIMARY KEY(id), + INDEX(list_id, term(4)) +) ENGINE=MYISAM CHARSET=utf8mb4; +INSERT INTO t1 SET list_id = 1, term = "letterc"; +INSERT INTO t1 SET list_id = 1, term = "letterb"; +INSERT INTO t1 SET list_id = 1, term = "lettera"; +INSERT INTO t1 SET list_id = 1, term = "letterd"; +SELECT id FROM t1 WHERE (list_id = 1) AND (term = "letterc"); +SELECT id FROM t1 WHERE (list_id = 1) AND (term = "letterb"); +SELECT id FROM t1 WHERE (list_id = 1) AND (term = "lettera"); +SELECT id FROM t1 WHERE (list_id = 1) AND (term = "letterd"); +DROP TABLE t1; + + +# +# Bug #6043 erratic searching for diacriticals in indexed MyISAM UTF-8 table +# +SET NAMES latin1; +CREATE TABLE t1 ( + id int unsigned NOT NULL auto_increment, + list_id smallint unsigned NOT NULL, + term text NOT NULL, + PRIMARY KEY(id), + INDEX(list_id, term(19)) +) ENGINE=MyISAM CHARSET=utf8mb4; +INSERT INTO t1 set list_id = 1, term = "testétest"; +INSERT INTO t1 set list_id = 1, term = "testetest"; +INSERT INTO t1 set list_id = 1, term = "testètest"; +SELECT id, term FROM t1 where (list_id = 1) AND (term = "testétest"); +SELECT id, term FROM t1 where (list_id = 1) AND (term = "testetest"); +SELECT id, term FROM t1 where (list_id = 1) AND (term = "testètest"); +DROP TABLE t1; + +# +# Bug #6019 SELECT tries to use too short prefix index on utf8mb4 data +# +set names utf8mb4; +--disable_warnings +create table t1 ( + a int primary key, + b varchar(6), + index b3(b(3)) +) engine=innodb character set=utf8mb4; +--enable_warnings +insert into t1 values(1,'foo'),(2,'foobar'); +select * from t1 where b like 'foob%'; +--disable_warnings +alter table t1 engine=innodb; +--enable_warnings +select * from t1 where b like 'foob%'; +drop table t1; + +# +# Test for calculate_interval_lengths() function +# +create table t1 ( + a enum('петÑ','ваÑÑ','анюта') character set utf8mb4 not null default 'анюта', + b set('петÑ','ваÑÑ','анюта') character set utf8mb4 not null default 'анюта' +); +create table t2 select concat(a,_utf8mb4'') as a, concat(b,_utf8mb4'')as b from t1; +show create table t2; +drop table t2; +drop table t1; + +# +# Bug #6787 LIKE not working properly with _ and utf8mb4 data +# +select 'c' like '\_' as want0; + +# +# SUBSTR with negative offset didn't work with multi-byte strings +# +SELECT SUBSTR('ваÑÑ',-2); + + +# +# Bug #7730 Server crash using soundex on an utf8mb4 table +# +create table t1 (id integer, a varchar(100) character set utf8mb4 collate utf8mb4_unicode_ci); +insert into t1 values (1, 'Test'); +select * from t1 where soundex(a) = soundex('Test'); +select * from t1 where soundex(a) = soundex('TEST'); +select * from t1 where soundex(a) = soundex('test'); +drop table t1; + +# +# Bug#22638 SOUNDEX broken for international characters +# +select soundex(_utf8mb4 0xE99885E8A788E99A8FE697B6E69BB4E696B0E79A84E696B0E997BB); +select hex(soundex(_utf8mb4 0xE99885E8A788E99A8FE697B6E69BB4E696B0E79A84E696B0E997BB)); +select soundex(_utf8mb4 0xD091D092D093); +select hex(soundex(_utf8mb4 0xD091D092D093)); + + +SET collation_connection='utf8mb4_general_ci'; +-- source include/ctype_filesort.inc +-- source include/ctype_like_escape.inc +-- source include/ctype_german.inc +SET collation_connection='utf8mb4_bin'; +-- source include/ctype_filesort.inc +-- source include/ctype_like_escape.inc + +# +# Bug #7874 CONCAT() gives wrong results mixing +# latin1 field and utf8mb4 string literals +# +CREATE TABLE t1 ( + user varchar(255) NOT NULL default '' +) ENGINE=MyISAM DEFAULT CHARSET=latin1; +INSERT INTO t1 VALUES ('one'),('two'); +SELECT CHARSET('a'); +SELECT user, CONCAT('<', user, '>') AS c FROM t1; +DROP TABLE t1; + +# +# Bug#8785 +# the same problem with the above, but with nested CONCATs +# +create table t1 (f1 varchar(1) not null) default charset utf8mb4; +insert into t1 values (''), (''); +select concat(concat(_latin1'->',f1),_latin1'<-') from t1; +drop table t1; + +# +# Bug#8385: utf8mb4_general_ci treats Cyrillic letters I and SHORT I as the same +# +select convert(_koi8r'É' using utf8mb4) < convert(_koi8r'Ê' using utf8mb4); + +# +# Bugs#5980: NULL requires a characterset in a union +# +set names latin1; +create table t1 (a varchar(10)) character set utf8mb4; +insert into t1 values ('test'); +select ifnull(a,'') from t1; +drop table t1; +select repeat(_utf8mb4'+',3) as h union select NULL; +select ifnull(NULL, _utf8mb4'string'); + +# +# Bug#9509 Optimizer: wrong result after AND with comparisons +# +set names utf8mb4; +create table t1 (s1 char(5) character set utf8mb4 collate utf8mb4_lithuanian_ci); +insert into t1 values ('I'),('K'),('Y'); +select * from t1 where s1 < 'K' and s1 = 'Y'; +select * from t1 where 'K' > s1 and s1 = 'Y'; +drop table t1; + +create table t1 (s1 char(5) character set utf8mb4 collate utf8mb4_czech_ci); +insert into t1 values ('c'),('d'),('h'),('ch'),('CH'),('cH'),('Ch'),('i'); +select * from t1 where s1 > 'd' and s1 = 'CH'; +select * from t1 where 'd' < s1 and s1 = 'CH'; +select * from t1 where s1 = 'cH' and s1 <> 'ch'; +select * from t1 where 'cH' = s1 and s1 <> 'ch'; +drop table t1; + +# +# Bug#10714: Inserting double value into utf8mb4 column crashes server +# +create table t1 (a varchar(255)) default character set utf8mb4; +insert into t1 values (1.0); +drop table t1; + +# +# Bug#10253 compound index length and utf8mb4 char set +# produces invalid query results +# +create table t1 ( + id int not null, + city varchar(20) not null, + key (city(7),id) +) character set=utf8mb4; +insert into t1 values (1,'Durban North'); +insert into t1 values (2,'Durban'); +select * from t1 where city = 'Durban'; +select * from t1 where city = 'Durban '; +drop table t1; + +# +# Bug #11819 CREATE TABLE with a SET DEFAULT 0 and UTF8 crashes server. +# +--error 1067 +create table t1 (x set('A', 'B') default 0) character set utf8mb4; +--error 1067 +create table t1 (x enum('A', 'B') default 0) character set utf8mb4; + + +# +# Test for bug #11167: join for utf8mb4 varchar value longer than 255 bytes +# + +SET NAMES UTF8; + +CREATE TABLE t1 ( + `id` int(20) NOT NULL auto_increment, + `country` varchar(100) NOT NULL default '', + `shortcode` varchar(100) NOT NULL default '', + `operator` varchar(100) NOT NULL default '', + `momid` varchar(30) NOT NULL default '', + `keyword` varchar(160) NOT NULL default '', + `content` varchar(160) NOT NULL default '', + `second_token` varchar(160) default NULL, + `gateway_id` int(11) NOT NULL default '0', + `created` datetime NOT NULL default '0000-00-00 00:00:00', + `msisdn` varchar(15) NOT NULL default '', + PRIMARY KEY (`id`), + UNIQUE KEY `MSCCSPK_20030521130957121` (`momid`), + KEY `IX_mobile_originated_message_keyword` (`keyword`), + KEY `IX_mobile_originated_message_created` (`created`), + KEY `IX_mobile_originated_message_support` (`msisdn`,`momid`,`keyword`,`gateway_id`,`created`) +) ENGINE=MyISAM DEFAULT CHARSET=utf8mb4; + +INSERT INTO t1 VALUES +(1,'blah','464','aaa','fkc1c9ilc20x0hgae7lx6j09','ERR','ERR Имри.Ðфимим.Ðеимимримдмримрмрирор имримримримр имридм ирбднримрфмририримрфмфмим.Ðд.Д имдимримрад.Ðдимримримрмдиримримримр м.Дадимфшьмримд им.Ðдимимрн имадми','ИМРИ.ÐФИМИМ.ÐЕИМИМРИМДМРИМРМРИРОР',3,'2005-06-01 17:30:43','1234567890'), +(2,'blah','464','aaa','haxpl2ilc20x00bj4tt2m5ti','11','11 g','G',3,'2005-06-02 22:43:10','1234567890'); + +--disable_warnings +CREATE TABLE t2 ( + `msisdn` varchar(15) NOT NULL default '', + `operator_id` int(11) NOT NULL default '0', + `created` datetime NOT NULL default '0000-00-00 00:00:00', + UNIQUE KEY `PK_user` (`msisdn`) +) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4; +--enable_warnings + +INSERT INTO t2 VALUES ('1234567890',2,'2005-05-24 13:53:25'); + +SELECT content, t2.msisdn FROM t1, t2 WHERE t1.msisdn = '1234567890'; + +DROP TABLE t1,t2; + +# +# Bug#11591: CHAR column with utf8mb4 does not work properly +# (more chars than expected) +# +create table t1 (a char(20) character set utf8mb4); +insert into t1 values ('123456'),('андрей'); +alter table t1 modify a char(2) character set utf8mb4; +select char_length(a), length(a), a from t1 order by a; +drop table t1; + +# +# Bugs#12611 +# ESCAPE + LIKE do not work when the escape char is a multibyte one +# +set names utf8mb4; +select 'andre%' like 'andreñ%' escape 'ñ'; + +# +# Bugs#11754: SET NAMES utf8mb4 followed by SELECT "A\\" LIKE "A\\" returns 0 +# +set names utf8mb4; +select 'a\\' like 'a\\'; +select 'aa\\' like 'a%\\'; + +create table t1 (a char(10), key(a)) character set utf8mb4; +insert into t1 values ("a"),("abc"),("abcd"),("hello"),("test"); +select * from t1 where a like "abc%"; +select * from t1 where a like concat("abc","%"); +select * from t1 where a like "ABC%"; +select * from t1 where a like "test%"; +select * from t1 where a like "te_t"; +select * from t1 where a like "%a%"; +select * from t1 where a like "%abcd%"; +select * from t1 where a like "%abc\d%"; +drop table t1; + + +# +# Bug#9557 MyISAM utf8mb4 table crash +# +CREATE TABLE t1 ( + a varchar(255) NOT NULL default '', + KEY a (a) +) ENGINE=MyISAM DEFAULT CHARSET=utf8mb4 COLLATE utf8mb4_general_ci; +insert into t1 values (_utf8mb4 0xe880bd); +insert into t1 values (_utf8mb4 0x5b); +select hex(a) from t1; +drop table t1; + +# +# Bug#13751 find_in_set: Illegal mix of collations +# +set names 'latin1'; +create table t1 (a varchar(255)) default charset=utf8mb4; +select * from t1 where find_in_set('-1', a); +drop table t1; + +# +# Bug#13233: select distinct char(column) fails with utf8mb4 +# +create table t1 (a int); +insert into t1 values (48),(49),(50); +set names utf8mb4; +select distinct char(a) from t1; +drop table t1; + +# +# Bug#15581: COALESCE function truncates mutli-byte TINYTEXT values +# +CREATE TABLE t1 (t TINYTEXT CHARACTER SET utf8mb4); +INSERT INTO t1 VALUES(REPEAT('a', 100)); +CREATE TEMPORARY TABLE t2 SELECT COALESCE(t) AS bug FROM t1; +SELECT LENGTH(bug) FROM t2; +DROP TABLE t2; +DROP TABLE t1; + +# +# Bug#17313: N'xxx' and _utf8mb4'xxx' are not equivalent +# +CREATE TABLE t1 (item varchar(255)) default character set utf8mb4; +INSERT INTO t1 VALUES (N'\\'); +INSERT INTO t1 VALUES (_utf8mb4'\\'); +INSERT INTO t1 VALUES (N'Cote d\'Ivoire'); +INSERT INTO t1 VALUES (_utf8mb4'Cote d\'Ivoire'); +SELECT item FROM t1 ORDER BY item; +DROP TABLE t1; + +# +# Bug#17705: Corruption of compressed index when index length changes between +# 254 and 256 +# + +SET NAMES utf8mb4; +DROP TABLE IF EXISTS t1; +CREATE TABLE t1(a VARCHAR(255), KEY(a)) ENGINE=MyISAM DEFAULT CHARSET=utf8mb4; +INSERT INTO t1 VALUES('uuABCDEFGHIGKLMNOPRSTUVWXYZ̈bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb'); +INSERT INTO t1 VALUES('uu'); +check table t1; +INSERT INTO t1 VALUES('uU'); +check table t1; +INSERT INTO t1 VALUES('uu'); +check table t1; +INSERT INTO t1 VALUES('uuABC'); +check table t1; +INSERT INTO t1 VALUES('UuABC'); +check table t1; +INSERT INTO t1 VALUES('uuABC'); +check table t1; +alter table t1 add b int; +INSERT INTO t1 VALUES('uuABCDEFGHIGKLMNOPRSTUVWXYZ̈bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb',1); +INSERT INTO t1 VALUES('uuABCDEFGHIGKLMNOPRSTUVWXYZ̈bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb',2); +delete from t1 where b=1; +INSERT INTO t1 VALUES('UUABCDEFGHIGKLMNOPRSTUVWXYZ̈bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb',1); +check table t1; +INSERT INTO t1 VALUES('uuABCDEFGHIGKLMNOPRSTUVWXYZ̈bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb',3); +INSERT INTO t1 VALUES('uuABCDEFGHIGKLMNOPRSTUVWXYZ̈bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb',4); +delete from t1 where b=3; +INSERT INTO t1 VALUES('uUABCDEFGHIGKLMNOPRSTUVWXYZ̈bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb',3); +check table t1; +drop table t1; + +# +# Bug#20471 LIKE search fails with indexed utf8mb4 char column +# +set names utf8mb4; +create table t1 (s1 char(5) character set utf8mb4); +insert into t1 values +('a'),('b'),(null),('ペテルグル'),('ü'),('Y'); +create index it1 on t1 (s1); +select s1 as before_delete_general_ci from t1 where s1 like 'ペテ%'; +delete from t1 where s1 = 'Y'; +select s1 as after_delete_general_ci from t1 where s1 like 'ペテ%'; +drop table t1; + +set names utf8mb4; +create table t1 (s1 char(5) character set utf8mb4 collate utf8mb4_unicode_ci); +insert into t1 values +('a'),('b'),(null),('ペテルグル'),('ü'),('Y'); +create index it1 on t1 (s1); +select s1 as before_delete_unicode_ci from t1 where s1 like 'ペテ%'; +delete from t1 where s1 = 'Y'; +select s1 as after_delete_unicode_ci from t1 where s1 like 'ペテ%'; +drop table t1; + +set names utf8mb4; +create table t1 (s1 char(5) character set utf8mb4 collate utf8mb4_bin); +insert into t1 values +('a'),('b'),(null),('ペテルグル'),('ü'),('Y'); +create index it1 on t1 (s1); +select s1 as before_delete_bin from t1 where s1 like 'ペテ%'; +delete from t1 where s1 = 'Y'; +select s1 as after_delete_bin from t1 where s1 like 'ペテ%'; +drop table t1; + +# additional tests from duplicate bug#20744 MySQL return no result + +set names utf8mb4; +--disable_warnings +create table t1 (a varchar(30) not null primary key) +engine=innodb default character set utf8mb4 collate utf8mb4_general_ci; +--enable_warnings +insert into t1 values ('ã‚ã„ã†ãˆãŠã‹ããã‘ã“ã•ã—ã™ã›ã'); +insert into t1 values ('ã•ã—ã™ã›ãã‹ããã‘ã“ã‚ã„ã†ãˆãŠ'); +select a as gci1 from t1 where a like 'ã•ã—ã™ã›ãã‹ããã‘ã“ã‚ã„ã†ãˆãŠ%'; +select a as gci2 from t1 where a like 'ã‚ã„ã†ãˆãŠã‹ããã‘ã“ã•ã—ã™ã›ã'; +drop table t1; + +set names utf8mb4; +--disable_warnings +create table t1 (a varchar(30) not null primary key) +engine=innodb default character set utf8mb4 collate utf8mb4_unicode_ci; +--enable_warnings +insert into t1 values ('ã‚ã„ã†ãˆãŠã‹ããã‘ã“ã•ã—ã™ã›ã'); +insert into t1 values ('ã•ã—ã™ã›ãã‹ããã‘ã“ã‚ã„ã†ãˆãŠ'); +select a as uci1 from t1 where a like 'ã•ã—ã™ã›ãã‹ããã‘ã“ã‚ã„ã†ãˆãŠ%'; +select a as uci2 from t1 where a like 'ã‚ã„ã†ãˆãŠã‹ããã‘ã“ã•ã—ã™ã›ã'; +drop table t1; + +set names utf8mb4; +--disable_warnings +create table t1 (a varchar(30) not null primary key) +engine=innodb default character set utf8mb4 collate utf8mb4_bin; +--enable_warnings +insert into t1 values ('ã‚ã„ã†ãˆãŠã‹ããã‘ã“ã•ã—ã™ã›ã'); +insert into t1 values ('ã•ã—ã™ã›ãã‹ããã‘ã“ã‚ã„ã†ãˆãŠ'); +select a as bin1 from t1 where a like 'ã•ã—ã™ã›ãã‹ããã‘ã“ã‚ã„ã†ãˆãŠ%'; +select a as bin2 from t1 where a like 'ã‚ã„ã†ãˆãŠã‹ããã‘ã“ã•ã—ã™ã›ã'; +drop table t1; + + + +# +# Bug#14896: Comparison with a key in a partial index over mb chararacter field +# + +SET NAMES utf8mb4; +CREATE TABLE t1 (id int PRIMARY KEY, + a varchar(16) collate utf8mb4_unicode_ci NOT NULL default '', + b int, + f varchar(128) default 'XXX', + INDEX (a(4)) +) ENGINE=MyISAM DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci; +INSERT INTO t1(id, a, b) VALUES + (1, 'cccc', 50), (2, 'cccc', 70), (3, 'cccc', 30), + (4, 'cccc', 30), (5, 'cccc', 20), (6, 'bbbbbb', 40), + (7, 'dddd', 30), (8, 'aaaa', 10), (9, 'aaaa', 50), + (10, 'eeeee', 40), (11, 'bbbbbb', 60); + +SELECT id, a, b FROM t1; + +SELECT id, a, b FROM t1 WHERE a BETWEEN 'aaaa' AND 'bbbbbb'; + +SELECT id, a FROM t1 WHERE a='bbbbbb'; +SELECT id, a FROM t1 WHERE a='bbbbbb' ORDER BY b; + +DROP TABLE t1; + +# +# Bug#16674: LIKE predicate for a utf8mb4 character set column +# + +SET NAMES utf8mb4; + +CREATE TABLE t1 ( + a CHAR(13) DEFAULT '', + INDEX(a) +) ENGINE=MyISAM DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci; +INSERT INTO t1 VALUES + ('Käli Käli 2-4'), ('Käli Käli 2-4'), + ('Käli Käli 2+4'), ('Käli Käli 2+4'), + ('Käli Käli 2-6'), ('Käli Käli 2-6'); +INSERT INTO t1 SELECT * FROM t1; + +CREATE TABLE t2 ( + a CHAR(13) DEFAULT '', + INDEX(a) +) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_general_ci; + +INSERT INTO t2 VALUES + ('Kali Kali 2-4'), ('Kali Kali 2-4'), + ('Kali Kali 2+4'), ('Kali Kali 2+4'), + ('Kali Kali 2-6'), ('Kali Kali 2-6'); +INSERT INTO t2 SELECT * FROM t2; + +SELECT a FROM t1 WHERE a LIKE 'Käli Käli 2+4'; +SELECT a FROM t2 WHERE a LIKE 'Kali Kali 2+4'; + +EXPLAIN SELECT a FROM t1 WHERE a LIKE 'Käli Käli 2+4'; +EXPLAIN SELECT a FROM t1 WHERE a = 'Käli Käli 2+4'; +EXPLAIN SELECT a FROM t2 WHERE a LIKE 'Kali Kali 2+4'; +EXPLAIN SELECT a FROM t2 WHERE a = 'Kali Kali 2+4'; + +DROP TABLE t1,t2; + +CREATE TABLE t1 ( + a char(255) DEFAULT '', + KEY(a(10)) +) ENGINE=MyISAM DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci; +INSERT INTO t1 VALUES ('Käli Käli 2-4'); +SELECT * FROM t1 WHERE a LIKE 'Käli Käli 2%'; +INSERT INTO t1 VALUES ('Käli Käli 2-4'); +SELECT * FROM t1 WHERE a LIKE 'Käli Käli 2%'; +DROP TABLE t1; + +CREATE TABLE t1 ( + a char(255) DEFAULT '' +) ENGINE=MyISAM DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci; +INSERT INTO t1 VALUES ('Käli Käli 2-4'); +INSERT INTO t1 VALUES ('Käli Käli 2-4'); +SELECT * FROM t1 WHERE a LIKE 'Käli Käli 2%'; +ALTER TABLE t1 ADD KEY (a(10)); +SELECT * FROM t1 WHERE a LIKE 'Käli Käli 2%'; +DROP TABLE t1; + +# +# Bug#18359: LIKE predicate for a 'utf8mb4' text column with a partial index +# (see bug #16674 as well) +# + +SET NAMES latin2; + +CREATE TABLE t1 ( + id int(11) NOT NULL default '0', + tid int(11) NOT NULL default '0', + val text NOT NULL, + INDEX idx(tid, val(10)) +) ENGINE=MyISAM DEFAULT CHARSET=utf8mb4; + +INSERT INTO t1 VALUES + (40988,72,'VOLNÝ ADSL'),(41009,72,'VOLNÝ ADSL'), + (41032,72,'VOLNÝ ADSL'),(41038,72,'VOLNÝ ADSL'), + (41063,72,'VOLNÝ ADSL'),(41537,72,'VOLNÝ ADSL Office'), + (42141,72,'VOLNÝ ADSL'),(42565,72,'VOLNÝ ADSL Combi'), + (42749,72,'VOLNÝ ADSL'),(44205,72,'VOLNÝ ADSL'); + +SELECT * FROM t1 WHERE tid=72 and val LIKE 'VOLNY ADSL'; +SELECT * FROM t1 WHERE tid=72 and val LIKE 'VOLNÝ ADSL'; +SELECT * FROM t1 WHERE tid=72 and val LIKE '%VOLNÝ ADSL'; + +ALTER TABLE t1 DROP KEY idx; +ALTER TABLE t1 ADD KEY idx (tid,val(11)); + +SELECT * FROM t1 WHERE tid=72 and val LIKE 'VOLNÝ ADSL'; + +DROP TABLE t1; + +# +# Bug 20709: problem with utf8mb4 fields in temporary tables +# + +create table t1(a char(200) collate utf8mb4_unicode_ci NOT NULL default '') + default charset=utf8mb4 collate=utf8mb4_unicode_ci; +insert into t1 values (unhex('65')), (unhex('C3A9')), (unhex('65')); +explain select distinct a from t1; +select distinct a from t1; +explain select a from t1 group by a; +select a from t1 group by a; +drop table t1; + +# +# Bug #20204: "order by" changes the results returned +# + +create table t1(a char(10)) default charset utf8mb4; +insert into t1 values ('123'), ('456'); +explain + select substr(Z.a,-1), Z.a from t1 as Y join t1 as Z on Y.a=Z.a order by 1; +select substr(Z.a,-1), Z.a from t1 as Y join t1 as Z on Y.a=Z.a order by 1; +drop table t1; + +# +# Bug #34349: Passing invalid parameter to CHAR() in an ORDER BY causes +# MySQL to hang +# + +SET CHARACTER SET utf8mb4; +SHOW VARIABLES LIKE 'character\_set\_%'; +CREATE DATABASE crashtest DEFAULT CHARACTER SET utf8mb4 COLLATE utf8mb4_bin; +USE crashtest; +CREATE TABLE crashtest (crash char(10)) DEFAULT CHARSET=utf8mb4; +INSERT INTO crashtest VALUES ('35'), ('36'), ('37'); +SELECT * FROM crashtest ORDER BY CHAR(crash USING utf8mb4); +INSERT INTO crashtest VALUES ('-1000'); +EXPLAIN SELECT * FROM crashtest ORDER BY CHAR(crash USING utf8mb4); +SELECT * FROM crashtest ORDER BY CHAR(crash USING utf8mb4); +DROP TABLE crashtest; +DROP DATABASE crashtest; +USE test; +SET CHARACTER SET default; + +# End of 4.1 tests + +# +# Test for bug #11484: wrong results for a DISTINCT varchar column in uft8. +# + +CREATE TABLE t1(id varchar(20) NOT NULL) DEFAULT CHARSET=utf8mb4; +INSERT INTO t1 VALUES ('xxx'), ('aa'), ('yyy'), ('aa'); + +SELECT id FROM t1; +SELECT DISTINCT id FROM t1; +SELECT DISTINCT id FROM t1 ORDER BY id; + +DROP TABLE t1; + +# +# Bug#20095 Changing length of VARCHAR field with UTF8 +# collation does not truncate values +# +create table t1 ( + a varchar(26) not null +) default character set utf8mb4; +insert into t1 (a) values ('abcdefghijklmnopqrstuvwxyz'); +select * from t1; +# varchar to varchar +alter table t1 change a a varchar(20) character set utf8mb4 not null; +select * from t1; +# varchar to char +alter table t1 change a a char(15) character set utf8mb4 not null; +select * from t1; +# char to char +alter table t1 change a a char(10) character set utf8mb4 not null; +select * from t1; +# char to varchar +alter table t1 change a a varchar(5) character set utf8mb4 not null; +select * from t1; +drop table t1; + +# +# Check that do_varstring2_mb produces a warning +# +create table t1 ( + a varchar(4000) not null +) default character set utf8mb4; +insert into t1 values (repeat('a',4000)); +alter table t1 change a a varchar(3000) character set utf8mb4 not null; +select length(a) from t1; +drop table t1; + + +# +# Bug#10504: Character set does not support traditional mode +# Bug#14146: CHAR(...USING ...) and CONVERT(CHAR(...) USING...) +# produce different results +# +set names utf8mb4; +# correct value +select hex(char(1 using utf8mb4)); +select char(0xd1,0x8f using utf8mb4); +select char(0xd18f using utf8mb4); +select char(53647 using utf8mb4); +# incorrect value: return with warning +select char(0xff,0x8f using utf8mb4); +select convert(char(0xff,0x8f) using utf8mb4); +# incorrect value in strict mode: return NULL with "Error" level warning +set sql_mode=traditional; +select char(0xff,0x8f using utf8mb4); +select char(195 using utf8mb4); +select char(196 using utf8mb4); +select char(2557 using utf8mb4); +select convert(char(0xff,0x8f) using utf8mb4); + +# +# Check convert + char + using +# +select hex(convert(char(2557 using latin1) using utf8mb4)); + +# +# char() without USING returns "binary" by default, any argument is ok +# +select hex(char(195)); +select hex(char(196)); +select hex(char(2557)); + + + +# +# Bug#12891: UNION doesn't return DISTINCT result for multi-byte characters +# +set names utf8mb4; +create table t1 (a char(1)) default character set utf8mb4; +create table t2 (a char(1)) default character set utf8mb4; +insert into t1 values('a'),('a'),(0xE38182),(0xE38182); +insert into t1 values('i'),('i'),(0xE38184),(0xE38184); +select * from t1 union distinct select * from t2; +drop table t1,t2; + + +# +# Bug#12371: executing prepared statement fails (illegal mix of collations) +# +set names utf8mb4; +create table t1 (a char(10), b varchar(10)); +insert into t1 values ('bar','kostja'); +insert into t1 values ('kostja','bar'); +prepare my_stmt from "select * from t1 where a=?"; +set @a:='bar'; +execute my_stmt using @a; +set @a:='kostja'; +execute my_stmt using @a; +set @a:=null; +execute my_stmt using @a; +drop table if exists t1; + + +# +# Bug#21505 Create view - illegal mix of collation for operation 'UNION' +# +--disable_warnings +drop table if exists t1; +drop view if exists v1, v2; +--enable_warnings +set names utf8mb4; +create table t1(col1 varchar(12) character set utf8mb4 collate utf8mb4_unicode_ci); +insert into t1 values('t1_val'); +create view v1 as select 'v1_val' as col1; +select coercibility(col1), collation(col1) from v1; +create view v2 as select col1 from v1 union select col1 from t1; +select coercibility(col1), collation(col1)from v2; +drop view v1, v2; +create view v1 as select 'v1_val' collate utf8mb4_swedish_ci as col1; +select coercibility(col1), collation(col1) from v1; +create view v2 as select col1 from v1 union select col1 from t1; +select coercibility(col1), collation(col1) from v2; +drop view v1, v2; +drop table t1; + +# +# Check conversion of NCHAR strings to subset (e.g. latin1). +# Conversion is possible if string repertoire is ASCII. +# Conversion is not possible if the string have extended characters +# +set names utf8mb4; +create table t1 (a varchar(10) character set latin1, b int); +insert into t1 values ('a',1); +select concat(a, if(b>10, N'x', N'y')) from t1; +--error 1267 +select concat(a, if(b>10, N'æ', N'ß')) from t1; +drop table t1; + +# Conversion tests for character set introducers +set names utf8mb4; +create table t1 (a varchar(10) character set latin1, b int); +insert into t1 values ('a',1); +select concat(a, if(b>10, _utf8mb4'x', _utf8mb4'y')) from t1; +--error 1267 +select concat(a, if(b>10, _utf8mb4'æ', _utf8mb4'ß')) from t1; +drop table t1; + +# Conversion tests for introducer + HEX string +set names utf8mb4; +create table t1 (a varchar(10) character set latin1, b int); +insert into t1 values ('a',1); +select concat(a, if(b>10, _utf8mb4 0x78, _utf8mb4 0x79)) from t1; +--error 1267 +select concat(a, if(b>10, _utf8mb4 0xC3A6, _utf8mb4 0xC3AF)) from t1; +drop table t1; + +# Conversion tests for "text_literal TEXT_STRING_literal" syntax structure +set names utf8mb4; +create table t1 (a varchar(10) character set latin1, b int); +insert into t1 values ('a',1); +select concat(a, if(b>10, 'x' 'x', 'y' 'y')) from t1; +--error 1267 +select concat(a, if(b>10, 'x' 'æ', 'y' 'ß')) from t1; +drop table t1; + + +# +# Bug#19960: Inconsistent results when joining +# InnoDB tables using partial UTF8 indexes +# +SHOW ENGINES; +--disable_warnings +CREATE TABLE t1 ( + colA int(11) NOT NULL, + colB varchar(255) character set utf8mb4 NOT NULL, + PRIMARY KEY (colA) +) ENGINE=InnoDB DEFAULT CHARSET=latin1; +--enable_warnings +INSERT INTO t1 (colA, colB) VALUES (1, 'foo'), (2, 'foo bar'); +--disable_warnings +CREATE TABLE t2 ( + colA int(11) NOT NULL, + colB varchar(255) character set utf8mb4 NOT NULL, + KEY bad (colA,colB(3)) +) ENGINE=InnoDB DEFAULT CHARSET=latin1; +--enable_warnings +INSERT INTO t2 (colA, colB) VALUES (1, 'foo'),(2, 'foo bar'); +SELECT * FROM t1 JOIN t2 ON t1.colA=t2.colA AND t1.colB=t2.colB +WHERE t1.colA < 3; +DROP TABLE t1, t2; + +# +# Bug#29205: truncation of UTF8 values when the UNION statement +# forces collation to the binary charset +# + +SELECT 'н1234567890' UNION SELECT _binary '1'; +SELECT 'н1234567890' UNION SELECT 1; + +SELECT '1' UNION SELECT 'н1234567890'; +SELECT 1 UNION SELECT 'н1234567890'; + +CREATE TABLE t1 (c VARCHAR(11)) CHARACTER SET utf8mb4; +CREATE TABLE t2 (b CHAR(1) CHARACTER SET binary, i INT); + +INSERT INTO t1 (c) VALUES ('н1234567890'); +INSERT INTO t2 (b, i) VALUES ('1', 1); + +SELECT c FROM t1 UNION SELECT b FROM t2; +SELECT c FROM t1 UNION SELECT i FROM t2; + +SELECT b FROM t2 UNION SELECT c FROM t1; +SELECT i FROM t2 UNION SELECT c FROM t1; + +DROP TABLE t1, t2; + +# +# Bug#30982: CHAR(..USING..) can return a not-well-formed string +# Bug #30986: Character set introducer followed by a HEX string can return bad result +# +set sql_mode=traditional; +select hex(char(0xFF using utf8mb4)); +select hex(convert(0xFF using utf8mb4)); +--error ER_INVALID_CHARACTER_STRING +select hex(_utf8mb4 0x616263FF); +--error ER_INVALID_CHARACTER_STRING +select hex(_utf8mb4 X'616263FF'); +--error ER_INVALID_CHARACTER_STRING +select hex(_utf8mb4 B'001111111111'); +--error ER_INVALID_CHARACTER_STRING +select (_utf8mb4 X'616263FF'); +set sql_mode=default; +select hex(char(0xFF using utf8mb4)); +select hex(convert(0xFF using utf8mb4)); +--error ER_INVALID_CHARACTER_STRING +select hex(_utf8mb4 0x616263FF); +--error ER_INVALID_CHARACTER_STRING +select hex(_utf8mb4 X'616263FF'); +--error ER_INVALID_CHARACTER_STRING +select hex(_utf8mb4 B'001111111111'); +--error ER_INVALID_CHARACTER_STRING +select (_utf8mb4 X'616263FF'); + +# +# Bug #36772: When using UTF8, CONVERT with GROUP BY returns truncated results +# +CREATE TABLE t1 (a INT NOT NULL, b INT NOT NULL); +INSERT INTO t1 VALUES (70000, 1092), (70001, 1085), (70002, 1065); +SELECT CONVERT(a, CHAR), CONVERT(b, CHAR) FROM t1 GROUP BY b; +SELECT CONVERT(a, CHAR), CONVERT(b, CHAR) FROM t1; +ALTER TABLE t1 ADD UNIQUE (b); +SELECT CONVERT(a, CHAR), CONVERT(b, CHAR) FROM t1 GROUP BY b; +DROP INDEX b ON t1; +SELECT CONVERT(a, CHAR), CONVERT(b, CHAR) FROM t1 GROUP BY b; +ALTER TABLE t1 ADD INDEX (b); +SELECT CONVERT(a, CHAR), CONVERT(b, CHAR) from t1 GROUP BY b; +DROP TABLE t1; + +--echo # +--echo # Bug#26474: Add Sinhala script (Sri Lanka) collation to MySQL +--echo # +--disable_warnings +DROP TABLE IF EXISTS t1; +--enable_warnings +CREATE TABLE t1 ( + predicted_order int NOT NULL, + utf8mb4_encoding VARCHAR(10) NOT NULL +) CHARACTER SET utf8mb4; +INSERT INTO t1 VALUES (19, x'E0B696'), (30, x'E0B69AE0B798'), (61, x'E0B6AF'), (93, x'E0B799'), (52, x'E0B6A6'), (73, x'E0B6BBE0B78AE2808D'), (3, x'E0B686'), (56, x'E0B6AA'), (55, x'E0B6A9'), (70, x'E0B6B9'), (94, x'E0B79A'), (80, x'E0B785'), (25, x'E0B69AE0B791'), (48, x'E0B6A2'), (13, x'E0B690'), (86, x'E0B793'), (91, x'E0B79F'), (81, x'E0B786'), (79, x'E0B784'), (14, x'E0B691'), (99, x'E0B78A'), (8, x'E0B68B'), (68, x'E0B6B7'), (22, x'E0B69A'), (16, x'E0B693'), (33, x'E0B69AE0B7B3'), (38, x'E0B69AE0B79D'), (21, x'E0B683'), (11, x'E0B68E'), (77, x'E0B782'), (40, x'E0B69AE0B78A'), (101, x'E0B78AE2808DE0B6BB'), (35, x'E0B69AE0B79A'), (1, x'E0B7B4'), (9, x'E0B68C'), (96, x'E0B79C'), (6, x'E0B689'), (95, x'E0B79B'), (88, x'E0B796'), (64, x'E0B6B3'), (26, x'E0B69AE0B792'), (82, x'E0B78F'), (28, x'E0B69AE0B794'), (39, x'E0B69AE0B79E'), (97, x'E0B79D'), (2, x'E0B685'), (75, x'E0B780'), (34, x'E0B69AE0B799'), (69, x'E0B6B8'), (83, x'E0B790'), (18, x'E0B695'), (90, x'E0B7B2'), (17, x'E0B694'), (72, x'E0B6BB'), (66, x'E0B6B5'), (59, x'E0B6AD'), (44, x'E0B69E'), (15, x'E0B692'), (23, x'E0B69AE0B78F'), (65, x'E0B6B4'), (42, x'E0B69C'), (63, x'E0B6B1'), (85, x'E0B792'), (47, x'E0B6A1'), (49, x'E0B6A3'), (92, x'E0B7B3'), (78, x'E0B783'), (36, x'E0B69AE0B79B'), (4, x'E0B687'), (24, x'E0B69AE0B790'), (87, x'E0B794'), (37, x'E0B69AE0B79C'), (32, x'E0B69AE0B79F'), (29, x'E0B69AE0B796'), (43, x'E0B69D'), (62, x'E0B6B0'), (100, x'E0B78AE2808DE0B6BA'), (60, x'E0B6AE'), (45, x'E0B69F'), (12, x'E0B68F'), (46, x'E0B6A0'), (50, x'E0B6A5'), (51, x'E0B6A4'), (5, x'E0B688'), (76, x'E0B781'), (89, x'E0B798'), (74, x'E0B6BD'), (10, x'E0B68D'), (57, x'E0B6AB'), (71, x'E0B6BA'), (58, x'E0B6AC'), (27, x'E0B69AE0B793'), (54, x'E0B6A8'), (84, x'E0B791'), (31, x'E0B69AE0B7B2'), (98, x'E0B79E'), (53, x'E0B6A7'), (41, x'E0B69B'), (67, x'E0B6B6'), (7, x'E0B68A'), (20, x'E0B682'); +SELECT predicted_order, hex(utf8mb4_encoding) FROM t1 ORDER BY utf8mb4_encoding COLLATE utf8mb4_sinhala_ci; +DROP TABLE t1; + +--echo # +--echo # Bug#32914 Character sets: illegal characters in utf8mb4 and utf32 columns +--echo # +create table t1 (utf8mb4 char(1) character set utf8mb4); +--echo Testing [F0][90..BF][80..BF][80..BF] +insert into t1 values (0xF0908080); +insert into t1 values (0xF0BFBFBF); +insert into t1 values (0xF08F8080); +select hex(utf8mb4) from t1; +delete from t1; + +--echo Testing [F2..F3][80..BF][80..BF][80..BF] +insert into t1 values (0xF2808080); +insert into t1 values (0xF2BFBFBF); +select hex(utf8mb4) from t1; +delete from t1; + +--echo Testing [F4][80..8F][80..BF][80..BF] +insert into t1 values (0xF4808080); +insert into t1 values (0xF48F8080); +insert into t1 values (0xF4908080); +select hex(utf8mb4) from t1; +drop table t1; + + +--echo # +--echo # Check strnxfrm() with odd length +--echo # +set max_sort_length=5; +select @@max_sort_length; +create table t1 (a varchar(128) character set utf8mb4 collate utf8mb4_general_ci); +insert into t1 values ('a'),('b'),('c'); +select * from t1 order by a; +alter table t1 modify a varchar(128) character set utf8mb4 collate utf8mb4_bin; +select * from t1 order by a; +drop table t1; +set max_sort_length=default; + +--echo # +--echo # Bug#26180: Can't add columns to tables created with utf8mb4 text indexes +--echo # +CREATE TABLE t1 ( + clipid INT NOT NULL, + Tape TINYTEXT, + PRIMARY KEY (clipid), + KEY tape(Tape(255)) +) CHARACTER SET=utf8mb4; +ALTER TABLE t1 ADD mos TINYINT DEFAULT 0 AFTER clipid; +SHOW CREATE TABLE t1; +DROP TABLE t1; + + +#--echo # +#--echo # Check that supplementary characters are not allowed in identifiers +#--echo # +# TODO: activate this when system_charset_info is changed to utf8mb4 +#--error 1300 +#CREATE DATABASE `ð€€`; +#--error 1300 +#CREATE TABLE `ð€€` (a int); +#--error 1166 +#CREATE TABLE test.t1 SELECT 'ð€€'; +#--error 1300 +#CREATE USER `ð€€`; + +--echo # +--echo # Testing 4-byte values. +--echo # + +--disable_warnings +DROP TABLE IF EXISTS t1; +--enable_warnings +CREATE TABLE t1 ( + u_decimal int NOT NULL, + utf8mb4_encoding VARCHAR(10) NOT NULL +) CHARACTER SET utf8mb4; +# Source of the following values: http://www.fileformat.info/info/unicode/block/index.htm +# SINGLE BARLINE +INSERT INTO t1 VALUES (119040, x'f09d8480'), +# G CLEF + (119070, x'f09d849e'), +# HALF NOTE + (119134, x'f09d859e'), +# MUSICAL SYMBOL CROIX + (119247, x'f09d878f'), +# MATHEMATICAL BOLD ITALIC CAPITAL DELTA + (120607, x'f09d9c9f'), +# SANS-SERIF BOLD ITALIC CAPITAL PI + (120735, x'f09d9e9f'), +# <Plane 16 Private Use, Last> (last 4 byte character) + (1114111, x'f48fbfbf'), +# VARIATION SELECTOR-256 + (917999, x'f3a087af'); +# All from musical chars +INSERT INTO t1 VALUES (119070, x'f09d849ef09d859ef09d859ef09d8480f09d859ff09d859ff09d859ff09d85a0f09d85a0f09d8480'); +# Mix of 3-byte and 4-byte chars +INSERT INTO t1 VALUES (65131, x'efb9abf09d849ef09d859ef09d859ef09d8480f09d859fefb9abefb9abf09d85a0efb9ab'); +# All from musical chars, but 11 instead of 10 chars. truncated +INSERT INTO t1 VALUES (119070, x'f09d849ef09d859ef09d859ef09d8480f09d859ff09d859ff09d859ff09d85a0f09d85a0f09d8480f09d85a0'); + +SELECT u_decimal, hex(utf8mb4_encoding) FROM t1 ORDER BY utf8mb4_encoding COLLATE utf8mb4_general_ci, BINARY utf8mb4_encoding; + +# First invalid 4 byte value +INSERT INTO t1 VALUES (1114111, x'f5808080'); + + +--disable_warnings +DROP TABLE IF EXISTS t2; +--enable_warnings +CREATE TABLE t2 ( + u_decimal int NOT NULL, + utf8mb3_encoding VARCHAR(10) NOT NULL +) CHARACTER SET utf8mb3; +# LATIN CAPITAL LETTER VEND +INSERT INTO t2 VALUES (42856, x'ea9da8'); +# SMALL COMMERCIAL AT +INSERT INTO t2 VALUES (65131, x'efb9ab'); +# <Plane 16 Private Use, Last> (last 4 byte character) +INSERT INTO t2 VALUES (1114111, x'f48fbfbf'); + +SHOW CREATE TABLE t1; +SHOW CREATE TABLE t2; + +DROP TABLE t1; +DROP TABLE t2; + +--echo # +--echo # Testing that mixing utf8 and utf8mb4 collations returns utf8mb4 +--echo # +SELECT CHARSET(CONCAT(_utf8mb4'a',_utf8'b')); + +CREATE TABLE t1 (utf8mb4 VARCHAR(10) CHARACTER SET utf8mb4 NOT NULL); +INSERT INTO t1 VALUES (x'ea9da8'),(x'f48fbfbf'); +SELECT CONCAT(utf8mb4, _utf8 x'ea9da8') FROM t1 LIMIT 0; + +CREATE TABLE t2 (utf8mb3 VARCHAR(10) CHARACTER SET utf8mb3 NOT NULL); +INSERT INTO t2 VALUES (x'ea9da8'); + +SELECT HEX(CONCAT(utf8mb4, utf8mb3)) FROM t1,t2 ORDER BY 1; +SELECT CHARSET(CONCAT(utf8mb4, utf8mb3)) FROM t1, t2 LIMIT 1; + +CREATE TEMPORARY TABLE t3 AS SELECT *, concat(utf8mb4,utf8mb3) FROM t1, t2; +SHOW CREATE TABLE t3; +DROP TEMPORARY TABLE t3; + +SELECT * FROM t1, t2 WHERE t1.utf8mb4 > t2.utf8mb3; +SELECT * FROM t1, t2 WHERE t1.utf8mb4 = t2.utf8mb3; +SELECT * FROM t1, t2 WHERE t1.utf8mb4 < t2.utf8mb3; + +DROP TABLE t1; +DROP TABLE t2; + +--echo # +--echo # Check that mixing utf8mb4 with an invalid utf8 constant returns error +--echo # +# This should perhaps be changed to return ER_INVALID_CHARACTER_STRING +CREATE TABLE t1 (utf8mb4 VARCHAR(10) CHARACTER SET utf8mb4); +INSERT INTO t1 VALUES (x'f48fbfbf'); +--error ER_CANT_AGGREGATE_2COLLATIONS +SELECT CONCAT(utf8mb4, _utf8 '¿') FROM t1; +--error ER_CANT_AGGREGATE_2COLLATIONS +SELECT CONCAT('a', _utf8 '¿') FROM t1; +DROP TABLE t1; + +--echo # +--echo # End of 5.5 tests +--echo # + +--echo # +--echo # End of tests +--echo # diff --git a/mysys/charset-def.c b/mysys/charset-def.c index bf2576621ce..9089347f002 100644 --- a/mysys/charset-def.c +++ b/mysys/charset-def.c @@ -45,6 +45,53 @@ extern CHARSET_INFO my_charset_ucs2_hungarian_uca_ci; extern CHARSET_INFO my_charset_ucs2_sinhala_uca_ci; #endif + +#ifdef HAVE_CHARSET_utf32 +extern CHARSET_INFO my_charset_utf32_icelandic_uca_ci; +extern CHARSET_INFO my_charset_utf32_latvian_uca_ci; +extern CHARSET_INFO my_charset_utf32_romanian_uca_ci; +extern CHARSET_INFO my_charset_utf32_slovenian_uca_ci; +extern CHARSET_INFO my_charset_utf32_polish_uca_ci; +extern CHARSET_INFO my_charset_utf32_estonian_uca_ci; +extern CHARSET_INFO my_charset_utf32_spanish_uca_ci; +extern CHARSET_INFO my_charset_utf32_swedish_uca_ci; +extern CHARSET_INFO my_charset_utf32_turkish_uca_ci; +extern CHARSET_INFO my_charset_utf32_czech_uca_ci; +extern CHARSET_INFO my_charset_utf32_danish_uca_ci; +extern CHARSET_INFO my_charset_utf32_lithuanian_uca_ci; +extern CHARSET_INFO my_charset_utf32_slovak_uca_ci; +extern CHARSET_INFO my_charset_utf32_spanish2_uca_ci; +extern CHARSET_INFO my_charset_utf32_roman_uca_ci; +extern CHARSET_INFO my_charset_utf32_persian_uca_ci; +extern CHARSET_INFO my_charset_utf32_esperanto_uca_ci; +extern CHARSET_INFO my_charset_utf32_hungarian_uca_ci; +extern CHARSET_INFO my_charset_utf32_sinhala_uca_ci; +#endif /* HAVE_CHARSET_utf32 */ + + +#ifdef HAVE_CHARSET_utf16 +extern CHARSET_INFO my_charset_utf16_icelandic_uca_ci; +extern CHARSET_INFO my_charset_utf16_latvian_uca_ci; +extern CHARSET_INFO my_charset_utf16_romanian_uca_ci; +extern CHARSET_INFO my_charset_utf16_slovenian_uca_ci; +extern CHARSET_INFO my_charset_utf16_polish_uca_ci; +extern CHARSET_INFO my_charset_utf16_estonian_uca_ci; +extern CHARSET_INFO my_charset_utf16_spanish_uca_ci; +extern CHARSET_INFO my_charset_utf16_swedish_uca_ci; +extern CHARSET_INFO my_charset_utf16_turkish_uca_ci; +extern CHARSET_INFO my_charset_utf16_czech_uca_ci; +extern CHARSET_INFO my_charset_utf16_danish_uca_ci; +extern CHARSET_INFO my_charset_utf16_lithuanian_uca_ci; +extern CHARSET_INFO my_charset_utf16_slovak_uca_ci; +extern CHARSET_INFO my_charset_utf16_spanish2_uca_ci; +extern CHARSET_INFO my_charset_utf16_roman_uca_ci; +extern CHARSET_INFO my_charset_utf16_persian_uca_ci; +extern CHARSET_INFO my_charset_utf16_esperanto_uca_ci; +extern CHARSET_INFO my_charset_utf16_hungarian_uca_ci; +extern CHARSET_INFO my_charset_utf16_sinhala_uca_ci; +#endif /* HAVE_CHARSET_utf16 */ + + #ifdef HAVE_CHARSET_utf8 extern CHARSET_INFO my_charset_utf8_icelandic_uca_ci; extern CHARSET_INFO my_charset_utf8_latvian_uca_ci; @@ -70,6 +117,28 @@ extern CHARSET_INFO my_charset_utf8_general_cs; #endif #endif +#ifdef HAVE_CHARSET_utf8mb4 +extern CHARSET_INFO my_charset_utf8mb4_icelandic_uca_ci; +extern CHARSET_INFO my_charset_utf8mb4_latvian_uca_ci; +extern CHARSET_INFO my_charset_utf8mb4_romanian_uca_ci; +extern CHARSET_INFO my_charset_utf8mb4_slovenian_uca_ci; +extern CHARSET_INFO my_charset_utf8mb4_polish_uca_ci; +extern CHARSET_INFO my_charset_utf8mb4_estonian_uca_ci; +extern CHARSET_INFO my_charset_utf8mb4_spanish_uca_ci; +extern CHARSET_INFO my_charset_utf8mb4_swedish_uca_ci; +extern CHARSET_INFO my_charset_utf8mb4_turkish_uca_ci; +extern CHARSET_INFO my_charset_utf8mb4_czech_uca_ci; +extern CHARSET_INFO my_charset_utf8mb4_danish_uca_ci; +extern CHARSET_INFO my_charset_utf8mb4_lithuanian_uca_ci; +extern CHARSET_INFO my_charset_utf8mb4_slovak_uca_ci; +extern CHARSET_INFO my_charset_utf8mb4_spanish2_uca_ci; +extern CHARSET_INFO my_charset_utf8mb4_roman_uca_ci; +extern CHARSET_INFO my_charset_utf8mb4_persian_uca_ci; +extern CHARSET_INFO my_charset_utf8mb4_esperanto_uca_ci; +extern CHARSET_INFO my_charset_utf8mb4_hungarian_uca_ci; +extern CHARSET_INFO my_charset_utf8mb4_sinhala_uca_ci; +#endif /* HAVE_CHARSET_utf8mb4 */ + #endif /* HAVE_UCA_COLLATIONS */ my_bool init_compiled_charsets(myf flags __attribute__((unused))) @@ -191,7 +260,91 @@ my_bool init_compiled_charsets(myf flags __attribute__((unused))) add_compiled_collation(&my_charset_utf8_hungarian_uca_ci); add_compiled_collation(&my_charset_utf8_sinhala_uca_ci); #endif -#endif +#endif /* HAVE_CHARSET_utf8 */ + + +#ifdef HAVE_CHARSET_utf8mb4 + add_compiled_collation(&my_charset_utf8mb4_general_ci); + add_compiled_collation(&my_charset_utf8mb4_bin); +#ifdef HAVE_UCA_COLLATIONS + add_compiled_collation(&my_charset_utf8mb4_unicode_ci); + add_compiled_collation(&my_charset_utf8mb4_icelandic_uca_ci); + add_compiled_collation(&my_charset_utf8mb4_latvian_uca_ci); + add_compiled_collation(&my_charset_utf8mb4_romanian_uca_ci); + add_compiled_collation(&my_charset_utf8mb4_slovenian_uca_ci); + add_compiled_collation(&my_charset_utf8mb4_polish_uca_ci); + add_compiled_collation(&my_charset_utf8mb4_estonian_uca_ci); + add_compiled_collation(&my_charset_utf8mb4_spanish_uca_ci); + add_compiled_collation(&my_charset_utf8mb4_swedish_uca_ci); + add_compiled_collation(&my_charset_utf8mb4_turkish_uca_ci); + add_compiled_collation(&my_charset_utf8mb4_czech_uca_ci); + add_compiled_collation(&my_charset_utf8mb4_danish_uca_ci); + add_compiled_collation(&my_charset_utf8mb4_lithuanian_uca_ci); + add_compiled_collation(&my_charset_utf8mb4_slovak_uca_ci); + add_compiled_collation(&my_charset_utf8mb4_spanish2_uca_ci); + add_compiled_collation(&my_charset_utf8mb4_roman_uca_ci); + add_compiled_collation(&my_charset_utf8mb4_persian_uca_ci); + add_compiled_collation(&my_charset_utf8mb4_esperanto_uca_ci); + add_compiled_collation(&my_charset_utf8mb4_hungarian_uca_ci); + add_compiled_collation(&my_charset_utf8mb4_sinhala_uca_ci); +#endif /* HAVE_UCA_COLLATIONS */ +#endif /* HAVE_CHARSET_utf8mb4 */ + + +#ifdef HAVE_CHARSET_utf16 + add_compiled_collation(&my_charset_utf16_general_ci); + add_compiled_collation(&my_charset_utf16_bin); +#ifdef HAVE_UCA_COLLATIONS + add_compiled_collation(&my_charset_utf16_unicode_ci); + add_compiled_collation(&my_charset_utf16_icelandic_uca_ci); + add_compiled_collation(&my_charset_utf16_latvian_uca_ci); + add_compiled_collation(&my_charset_utf16_romanian_uca_ci); + add_compiled_collation(&my_charset_utf16_slovenian_uca_ci); + add_compiled_collation(&my_charset_utf16_polish_uca_ci); + add_compiled_collation(&my_charset_utf16_estonian_uca_ci); + add_compiled_collation(&my_charset_utf16_spanish_uca_ci); + add_compiled_collation(&my_charset_utf16_swedish_uca_ci); + add_compiled_collation(&my_charset_utf16_turkish_uca_ci); + add_compiled_collation(&my_charset_utf16_czech_uca_ci); + add_compiled_collation(&my_charset_utf16_danish_uca_ci); + add_compiled_collation(&my_charset_utf16_lithuanian_uca_ci); + add_compiled_collation(&my_charset_utf16_slovak_uca_ci); + add_compiled_collation(&my_charset_utf16_spanish2_uca_ci); + add_compiled_collation(&my_charset_utf16_roman_uca_ci); + add_compiled_collation(&my_charset_utf16_persian_uca_ci); + add_compiled_collation(&my_charset_utf16_esperanto_uca_ci); + add_compiled_collation(&my_charset_utf16_hungarian_uca_ci); + add_compiled_collation(&my_charset_utf16_sinhala_uca_ci); +#endif /* HAVE_UCA_COLLATIOINS */ +#endif /* HAVE_CHARSET_utf16 */ + + +#ifdef HAVE_CHARSET_utf32 + add_compiled_collation(&my_charset_utf32_general_ci); + add_compiled_collation(&my_charset_utf32_bin); +#ifdef HAVE_UCA_COLLATIONS + add_compiled_collation(&my_charset_utf32_unicode_ci); + add_compiled_collation(&my_charset_utf32_icelandic_uca_ci); + add_compiled_collation(&my_charset_utf32_latvian_uca_ci); + add_compiled_collation(&my_charset_utf32_romanian_uca_ci); + add_compiled_collation(&my_charset_utf32_slovenian_uca_ci); + add_compiled_collation(&my_charset_utf32_polish_uca_ci); + add_compiled_collation(&my_charset_utf32_estonian_uca_ci); + add_compiled_collation(&my_charset_utf32_spanish_uca_ci); + add_compiled_collation(&my_charset_utf32_swedish_uca_ci); + add_compiled_collation(&my_charset_utf32_turkish_uca_ci); + add_compiled_collation(&my_charset_utf32_czech_uca_ci); + add_compiled_collation(&my_charset_utf32_danish_uca_ci); + add_compiled_collation(&my_charset_utf32_lithuanian_uca_ci); + add_compiled_collation(&my_charset_utf32_slovak_uca_ci); + add_compiled_collation(&my_charset_utf32_spanish2_uca_ci); + add_compiled_collation(&my_charset_utf32_roman_uca_ci); + add_compiled_collation(&my_charset_utf32_persian_uca_ci); + add_compiled_collation(&my_charset_utf32_esperanto_uca_ci); + add_compiled_collation(&my_charset_utf32_hungarian_uca_ci); + add_compiled_collation(&my_charset_utf32_sinhala_uca_ci); +#endif /* HAVE_UCA_COLLATIONS */ +#endif /* HAVE_CHARSET_utf32 */ /* Copy compiled charsets */ for (cs=compiled_charsets; cs->name; cs++) diff --git a/mysys/charset.c b/mysys/charset.c index 0cd4fcff56c..b4991555263 100644 --- a/mysys/charset.c +++ b/mysys/charset.c @@ -252,7 +252,7 @@ static int add_collation(CHARSET_INFO *cs) newcs->state|= MY_CS_AVAILABLE | MY_CS_LOADED | MY_CS_NONASCII; #endif } - else if (!strcmp(cs->csname, "utf8")) + else if (!strcmp(cs->csname, "utf8") || !strcmp(cs->csname, "utf8mb3")) { #if defined (HAVE_CHARSET_utf8) && defined(HAVE_UCA_COLLATIONS) copy_uca_collation(newcs, &my_charset_utf8_unicode_ci); @@ -261,6 +261,28 @@ static int add_collation(CHARSET_INFO *cs) return MY_XML_ERROR; #endif } + else if (!strcmp(cs->csname, "utf8mb4")) + { +#if defined (HAVE_CHARSET_utf8mb4) && defined(HAVE_UCA_COLLATIONS) + copy_uca_collation(newcs, &my_charset_utf8mb4_unicode_ci); + newcs->ctype= my_charset_utf8mb4_unicode_ci.ctype; + newcs->state|= MY_CS_AVAILABLE | MY_CS_LOADED; +#endif + } + else if (!strcmp(cs->csname, "utf16")) + { +#if defined (HAVE_CHARSET_utf16) && defined(HAVE_UCA_COLLATIONS) + copy_uca_collation(newcs, &my_charset_utf16_unicode_ci); + newcs->state|= MY_CS_AVAILABLE | MY_CS_LOADED | MY_CS_NONASCII; +#endif + } + else if (!strcmp(cs->csname, "utf32")) + { +#if defined (HAVE_CHARSET_utf32) && defined(HAVE_UCA_COLLATIONS) + copy_uca_collation(newcs, &my_charset_utf32_unicode_ci); + newcs->state|= MY_CS_AVAILABLE | MY_CS_LOADED | MY_CS_NONASCII; +#endif + } else { uchar *sort_order= all_charsets[cs->number]->sort_order; @@ -433,17 +455,35 @@ static void init_available_charsets(void) } +static const char* +get_collation_name_alias(const char *name, char *buf, size_t bufsize) +{ + if (!strncasecmp(name, "utf8mb3_", 8)) + { + my_snprintf(buf, bufsize, "utf8_%s", name + 8); + return buf; + } + return NULL; +} + + uint get_collation_number(const char *name) { + uint id; + char alias[64]; my_pthread_once(&charsets_initialized, init_available_charsets); - return get_collation_number_internal(name); + if ((id= get_collation_number_internal(name))) + return id; + if ((name= get_collation_name_alias(name, alias, sizeof(alias)))) + return get_collation_number_internal(name); + return 0; } -uint get_charset_number(const char *charset_name, uint cs_flags) +static uint +get_charset_number_internal(const char *charset_name, uint cs_flags) { CHARSET_INFO **cs; - my_pthread_once(&charsets_initialized, init_available_charsets); for (cs= all_charsets; cs < all_charsets + array_elements(all_charsets); @@ -457,6 +497,27 @@ uint get_charset_number(const char *charset_name, uint cs_flags) } +static const char* +get_charset_name_alias(const char *name) +{ + if (!my_strcasecmp(&my_charset_latin1, name, "utf8mb3")) + return "utf8"; + return NULL; +} + + +uint get_charset_number(const char *charset_name, uint cs_flags) +{ + uint id; + my_pthread_once(&charsets_initialized, init_available_charsets); + if ((id= get_charset_number_internal(charset_name, cs_flags))) + return id; + if ((charset_name= get_charset_name_alias(charset_name))) + return get_charset_number_internal(charset_name, cs_flags); + return 0; +} + + const char *get_charset_name(uint charset_number) { CHARSET_INFO *cs; diff --git a/sql/field.cc b/sql/field.cc index 51bb527fc85..766aaba9ce1 100644 --- a/sql/field.cc +++ b/sql/field.cc @@ -1836,7 +1836,9 @@ int Field::store_time(MYSQL_TIME *ltime, timestamp_type type_arg) ASSERT_COLUMN_MARKED_FOR_WRITE; char buff[MAX_DATE_STRING_REP_LENGTH]; uint length= (uint) my_TIME_to_str(ltime, buff); - return store(buff, length, &my_charset_bin); + return store(buff, length, + (charset()->state & MY_CS_NONASCII) ? + &my_charset_latin1 : &my_charset_bin); } diff --git a/sql/item.cc b/sql/item.cc index e785f0addde..e9ac44eeba8 100644 --- a/sql/item.cc +++ b/sql/item.cc @@ -854,7 +854,7 @@ Item *Item_param::safe_charset_converter(CHARSET_INFO *tocs) cnvitem->max_length= cnvitem->str_value.numchars() * tocs->mbmaxlen; return cnvitem; } - return NULL; + return Item::safe_charset_converter(tocs); } @@ -1436,7 +1436,12 @@ left_is_superset(DTCollation *left, DTCollation *right) if (left->collation->state & MY_CS_UNICODE && (left->derivation < right->derivation || (left->derivation == right->derivation && - !(right->collation->state & MY_CS_UNICODE)))) + (!(right->collation->state & MY_CS_UNICODE) || + /* The code below makes 4-byte utf8 a superset over 3-byte utf8 */ + (left->collation->state & MY_CS_UNICODE_SUPPLEMENT && + !(right->collation->state & MY_CS_UNICODE_SUPPLEMENT) && + left->collation->mbmaxlen > right->collation->mbmaxlen && + left->collation->mbminlen == right->collation->mbminlen))))) return TRUE; /* Allow convert from ASCII */ if (right->repertoire == MY_REPERTOIRE_ASCII && @@ -1695,7 +1700,7 @@ bool agg_item_set_converter(DTCollation &coll, const char *fname, { Item* conv; uint32 dummy_offset; - if (!String::needs_conversion(0, (*arg)->collation.collation, + if (!String::needs_conversion(1, (*arg)->collation.collation, coll.collation, &dummy_offset)) continue; diff --git a/sql/item_strfunc.cc b/sql/item_strfunc.cc index c33e0f4c6fb..1130e4c9ffc 100644 --- a/sql/item_strfunc.cc +++ b/sql/item_strfunc.cc @@ -2371,17 +2371,27 @@ String *Item_func_char::val_str(String *str) int32 num=(int32) args[i]->val_int(); if (!args[i]->null_value) { - char char_num= (char) num; - if (num&0xFF000000L) { - str->append((char)(num>>24)); - goto b2; - } else if (num&0xFF0000L) { - b2: str->append((char)(num>>16)); - goto b1; - } else if (num&0xFF00L) { - b1: str->append((char)(num>>8)); + char tmp[4]; + if (num & 0xFF000000L) + { + mi_int4store(tmp, num); + str->append(tmp, 4, &my_charset_bin); + } + else if (num & 0xFF0000L) + { + mi_int3store(tmp, num); + str->append(tmp, 3, &my_charset_bin); + } + else if (num & 0xFF00L) + { + mi_int2store(tmp, num); + str->append(tmp, 2, &my_charset_bin); + } + else + { + tmp[0]= (char) num; + str->append(tmp, 1, &my_charset_bin); } - str->append(&char_num, 1); } } str->realloc(str->length()); // Add end 0 (for Purify) @@ -2769,7 +2779,8 @@ String *Item_func_conv_charset::val_str(String *str) void Item_func_conv_charset::fix_length_and_dec() { collation.set(conv_charset, DERIVATION_IMPLICIT); - max_length = args[0]->max_length*conv_charset->mbmaxlen; + max_length = args[0]->max_length / args[0]->collation.collation->mbmaxlen * + conv_charset->mbmaxlen; } void Item_func_conv_charset::print(String *str, enum_query_type query_type) diff --git a/sql/sql_string.cc b/sql/sql_string.cc index e4e51aba622..75e8ca30cf0 100644 --- a/sql/sql_string.cc +++ b/sql/sql_string.cc @@ -412,11 +412,25 @@ bool String::append(const char *s) bool String::append(const char *s,uint32 arg_length, CHARSET_INFO *cs) { - uint32 dummy_offset; + uint32 offset; - if (needs_conversion(arg_length, cs, str_charset, &dummy_offset)) + if (needs_conversion(arg_length, cs, str_charset, &offset)) { - uint32 add_length= arg_length / cs->mbminlen * str_charset->mbmaxlen; + uint32 add_length; + if ((cs == &my_charset_bin) && offset) + { + DBUG_ASSERT(str_charset->mbminlen > offset); + offset= str_charset->mbminlen - offset; // How many characters to pad + add_length= arg_length + offset; + if (realloc(str_length + add_length)) + return TRUE; + bzero((char*) Ptr + str_length, offset); + memcpy(Ptr + str_length + offset, s, arg_length); + str_length+= add_length; + return FALSE; + } + + add_length= arg_length / cs->mbminlen * str_charset->mbmaxlen; uint dummy_errors; if (realloc(str_length + add_length)) return TRUE; @@ -966,6 +980,24 @@ well_formed_copy_nchars(CHARSET_INFO *to_cs, uint pad_length= to_cs->mbminlen - from_offset; bzero(to, pad_length); memmove(to + pad_length, from, from_offset); + /* + In some cases left zero-padding can create an incorrect character. + For example: + INSERT INTO t1 (utf32_column) VALUES (0x110000); + We'll pad the value to 0x00110000, which is a wrong UTF32 sequence! + The valid characters range is limited to 0x00000000..0x0010FFFF. + + Make sure we didn't pad to an incorrect character. + */ + if (to_cs->cset->well_formed_len(to_cs, + to, to + to_cs->mbminlen, 1, + &well_formed_error) != + to_cs->mbminlen) + { + *from_end_pos= *well_formed_error_pos= from; + *cannot_convert_error_pos= NULL; + return 0; + } nchars--; from+= from_offset; from_length-= from_offset; diff --git a/sql/sql_table.cc b/sql/sql_table.cc index 2e057d6a731..d154c238229 100644 --- a/sql/sql_table.cc +++ b/sql/sql_table.cc @@ -2776,7 +2776,7 @@ mysql_prepare_create_table(THD *thd, HA_CREATE_INFO *create_info, sql_field->interval_list); List_iterator<String> int_it(sql_field->interval_list); String conv, *tmp; - char comma_buf[2]; + char comma_buf[4]; /* 4 bytes for utf32 */ int comma_length= cs->cset->wc_mb(cs, ',', (uchar*) comma_buf, (uchar*) comma_buf + sizeof(comma_buf)); diff --git a/strings/ctype-mb.c b/strings/ctype-mb.c index 252c5a08b8c..98b598c3c2c 100644 --- a/strings/ctype-mb.c +++ b/strings/ctype-mb.c @@ -467,10 +467,11 @@ uint my_instr_mb(CHARSET_INFO *cs, /* BINARY collations handlers for MB charsets */ -static int my_strnncoll_mb_bin(CHARSET_INFO * cs __attribute__((unused)), - const uchar *s, size_t slen, - const uchar *t, size_t tlen, - my_bool t_is_prefix) +int +my_strnncoll_mb_bin(CHARSET_INFO * cs __attribute__((unused)), + const uchar *s, size_t slen, + const uchar *t, size_t tlen, + my_bool t_is_prefix) { size_t len=min(slen,tlen); int cmp= memcmp(s,t,len); @@ -503,10 +504,11 @@ static int my_strnncoll_mb_bin(CHARSET_INFO * cs __attribute__((unused)), 0 if strings are equal */ -static int my_strnncollsp_mb_bin(CHARSET_INFO * cs __attribute__((unused)), - const uchar *a, size_t a_length, - const uchar *b, size_t b_length, - my_bool diff_if_only_endspace_difference) +int +my_strnncollsp_mb_bin(CHARSET_INFO * cs __attribute__((unused)), + const uchar *a, size_t a_length, + const uchar *b, size_t b_length, + my_bool diff_if_only_endspace_difference) { const uchar *end; size_t length; @@ -562,14 +564,17 @@ static size_t my_strnxfrm_mb_bin(CHARSET_INFO *cs __attribute__((unused)), } -static int my_strcasecmp_mb_bin(CHARSET_INFO * cs __attribute__((unused)), - const char *s, const char *t) +int +my_strcasecmp_mb_bin(CHARSET_INFO * cs __attribute__((unused)), + const char *s, const char *t) { return strcmp(s,t); } -static void my_hash_sort_mb_bin(CHARSET_INFO *cs __attribute__((unused)), - const uchar *key, size_t len,ulong *nr1, ulong *nr2) + +void +my_hash_sort_mb_bin(CHARSET_INFO *cs __attribute__((unused)), + const uchar *key, size_t len,ulong *nr1, ulong *nr2) { const uchar *pos = key; @@ -787,10 +792,11 @@ fill_max_and_min: } -static int my_wildcmp_mb_bin(CHARSET_INFO *cs, - const char *str,const char *str_end, - const char *wildstr,const char *wildend, - int escape, int w_one, int w_many) +int +my_wildcmp_mb_bin(CHARSET_INFO *cs, + const char *str,const char *str_end, + const char *wildstr,const char *wildend, + int escape, int w_one, int w_many) { int result= -1; /* Not found, using wildcards */ diff --git a/strings/ctype-uca.c b/strings/ctype-uca.c index 56f3ddccae4..7dbec5a1321 100644 --- a/strings/ctype-uca.c +++ b/strings/ctype-uca.c @@ -9464,4 +9464,1990 @@ CHARSET_INFO my_charset_utf8_sinhala_uca_ci= #endif /* HAVE_CHARSET_utf8 */ + +#ifdef HAVE_CHARSET_utf8mb4 + +extern MY_CHARSET_HANDLER my_charset_utf8mb4_handler; + +#define MY_CS_UTF8MB4_UCA_FLAGS (MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_UNICODE_SUPPLEMENT) + +CHARSET_INFO my_charset_utf8mb4_unicode_ci= +{ + 224,0,0, /* number */ + MY_CS_UTF8MB4_UCA_FLAGS,/* state */ + MY_UTF8MB4, /* csname */ + MY_UTF8MB4 "_unicode_ci",/* name */ + "", /* comment */ + "", /* tailoring */ + ctype_utf8, /* ctype */ + NULL, /* to_lower */ + NULL, /* to_upper */ + uca_length, /* sort_order */ + NULL, /* contractions */ + uca_weight, /* sort_order_big*/ + NULL, /* tab_to_uni */ + NULL, /* tab_from_uni */ + my_unicase_default, /* caseinfo */ + NULL, /* state_map */ + NULL, /* ident_map */ + 8, /* strxfrm_multiply */ + 1, /* caseup_multiply */ + 1, /* casedn_multiply */ + 1, /* mbminlen */ + 4, /* mbmaxlen */ + 9, /* min_sort_char */ + 0xFFFF, /* max_sort_char */ + ' ', /* pad char */ + 0, /* escape_with_backslash_is_dangerous */ + &my_charset_utf8mb4_handler, + &my_collation_any_uca_handler +}; + + +CHARSET_INFO my_charset_utf8mb4_icelandic_uca_ci= +{ + 225,0,0, /* number */ + MY_CS_UTF8MB4_UCA_FLAGS,/* state */ + MY_UTF8MB4, /* csname */ + MY_UTF8MB4 "_icelandic_ci",/* name */ + "", /* comment */ + icelandic, /* tailoring */ + ctype_utf8, /* ctype */ + NULL, /* to_lower */ + NULL, /* to_upper */ + NULL, /* sort_order */ + NULL, /* contractions */ + NULL, /* sort_order_big*/ + NULL, /* tab_to_uni */ + NULL, /* tab_from_uni */ + my_unicase_default, /* caseinfo */ + NULL, /* state_map */ + NULL, /* ident_map */ + 8, /* strxfrm_multiply */ + 1, /* caseup_multiply */ + 1, /* casedn_multiply */ + 1, /* mbminlen */ + 4, /* mbmaxlen */ + 9, /* min_sort_char */ + 0xFFFF, /* max_sort_char */ + ' ', /* pad char */ + 0, /* escape_with_backslash_is_dangerous */ + &my_charset_utf8mb4_handler, + &my_collation_any_uca_handler +}; + +CHARSET_INFO my_charset_utf8mb4_latvian_uca_ci= +{ + 226,0,0, /* number */ + MY_CS_UTF8MB4_UCA_FLAGS,/* state */ + MY_UTF8MB4, /* csname */ + MY_UTF8MB4 "_latvian_ci", /* name */ + "", /* comment */ + latvian, /* tailoring */ + ctype_utf8, /* ctype */ + NULL, /* to_lower */ + NULL, /* to_upper */ + NULL, /* sort_order */ + NULL, /* contractions */ + NULL, /* sort_order_big*/ + NULL, /* tab_to_uni */ + NULL, /* tab_from_uni */ + my_unicase_default, /* caseinfo */ + NULL, /* state_map */ + NULL, /* ident_map */ + 8, /* strxfrm_multiply */ + 1, /* caseup_multiply */ + 1, /* casedn_multiply */ + 1, /* mbminlen */ + 4, /* mbmaxlen */ + 9, /* min_sort_char */ + 0xFFFF, /* max_sort_char */ + ' ', /* pad char */ + 0, /* escape_with_backslash_is_dangerous */ + &my_charset_utf8mb4_handler, + &my_collation_any_uca_handler +}; + +CHARSET_INFO my_charset_utf8mb4_romanian_uca_ci= +{ + 227,0,0, /* number */ + MY_CS_UTF8MB4_UCA_FLAGS,/* state */ + MY_UTF8MB4, /* csname */ + MY_UTF8MB4 "_romanian_ci", /* name */ + "", /* comment */ + romanian, /* tailoring */ + ctype_utf8, /* ctype */ + NULL, /* to_lower */ + NULL, /* to_upper */ + NULL, /* sort_order */ + NULL, /* contractions */ + NULL, /* sort_order_big*/ + NULL, /* tab_to_uni */ + NULL, /* tab_from_uni */ + my_unicase_default, /* caseinfo */ + NULL, /* state_map */ + NULL, /* ident_map */ + 8, /* strxfrm_multiply */ + 1, /* caseup_multiply */ + 1, /* casedn_multiply */ + 1, /* mbminlen */ + 4, /* mbmaxlen */ + 9, /* min_sort_char */ + 0xFFFF, /* max_sort_char */ + ' ', /* pad char */ + 0, /* escape_with_backslash_is_dangerous */ + &my_charset_utf8mb4_handler, + &my_collation_any_uca_handler +}; + +CHARSET_INFO my_charset_utf8mb4_slovenian_uca_ci= +{ + 228,0,0, /* number */ + MY_CS_UTF8MB4_UCA_FLAGS,/* state */ + MY_UTF8MB4, /* csname */ + MY_UTF8MB4 "_slovenian_ci",/* name */ + "", /* comment */ + slovenian, /* tailoring */ + ctype_utf8, /* ctype */ + NULL, /* to_lower */ + NULL, /* to_upper */ + NULL, /* sort_order */ + NULL, /* contractions */ + NULL, /* sort_order_big*/ + NULL, /* tab_to_uni */ + NULL, /* tab_from_uni */ + my_unicase_default, /* caseinfo */ + NULL, /* state_map */ + NULL, /* ident_map */ + 8, /* strxfrm_multiply */ + 1, /* caseup_multiply */ + 1, /* casedn_multiply */ + 1, /* mbminlen */ + 4, /* mbmaxlen */ + 9, /* min_sort_char */ + 0xFFFF, /* max_sort_char */ + ' ', /* pad char */ + 0, /* escape_with_backslash_is_dangerous */ + &my_charset_utf8mb4_handler, + &my_collation_any_uca_handler +}; + +CHARSET_INFO my_charset_utf8mb4_polish_uca_ci= +{ + 229,0,0, /* number */ + MY_CS_UTF8MB4_UCA_FLAGS,/* state */ + MY_UTF8MB4, /* csname */ + MY_UTF8MB4 "_polish_ci", /* name */ + "", /* comment */ + polish, /* tailoring */ + ctype_utf8, /* ctype */ + NULL, /* to_lower */ + NULL, /* to_upper */ + NULL, /* sort_order */ + NULL, /* contractions */ + NULL, /* sort_order_big*/ + NULL, /* tab_to_uni */ + NULL, /* tab_from_uni */ + my_unicase_default, /* caseinfo */ + NULL, /* state_map */ + NULL, /* ident_map */ + 8, /* strxfrm_multiply */ + 1, /* caseup_multiply */ + 1, /* casedn_multiply */ + 1, /* mbminlen */ + 4, /* mbmaxlen */ + 9, /* min_sort_char */ + 0xFFFF, /* max_sort_char */ + ' ', /* pad char */ + 0, /* escape_with_backslash_is_dangerous */ + &my_charset_utf8mb4_handler, + &my_collation_any_uca_handler +}; + +CHARSET_INFO my_charset_utf8mb4_estonian_uca_ci= +{ + 230,0,0, /* number */ + MY_CS_UTF8MB4_UCA_FLAGS,/* state */ + MY_UTF8MB4, /* csname */ + MY_UTF8MB4 "_estonian_ci", /* name */ + "", /* comment */ + estonian, /* tailoring */ + ctype_utf8, /* ctype */ + NULL, /* to_lower */ + NULL, /* to_upper */ + NULL, /* sort_order */ + NULL, /* contractions */ + NULL, /* sort_order_big*/ + NULL, /* tab_to_uni */ + NULL, /* tab_from_uni */ + my_unicase_default, /* caseinfo */ + NULL, /* state_map */ + NULL, /* ident_map */ + 8, /* strxfrm_multiply */ + 1, /* caseup_multiply */ + 1, /* casedn_multiply */ + 1, /* mbminlen */ + 4, /* mbmaxlen */ + 9, /* min_sort_char */ + 0xFFFF, /* max_sort_char */ + ' ', /* pad char */ + 0, /* escape_with_backslash_is_dangerous */ + &my_charset_utf8mb4_handler, + &my_collation_any_uca_handler +}; + +CHARSET_INFO my_charset_utf8mb4_spanish_uca_ci= +{ + 231,0,0, /* number */ + MY_CS_UTF8MB4_UCA_FLAGS,/* state */ + MY_UTF8MB4, /* csname */ + MY_UTF8MB4 "_spanish_ci", /* name */ + "", /* comment */ + spanish, /* tailoring */ + ctype_utf8, /* ctype */ + NULL, /* to_lower */ + NULL, /* to_upper */ + NULL, /* sort_order */ + NULL, /* contractions */ + NULL, /* sort_order_big*/ + NULL, /* tab_to_uni */ + NULL, /* tab_from_uni */ + my_unicase_default, /* caseinfo */ + NULL, /* state_map */ + NULL, /* ident_map */ + 8, /* strxfrm_multiply */ + 1, /* caseup_multiply */ + 1, /* casedn_multiply */ + 1, /* mbminlen */ + 4, /* mbmaxlen */ + 9, /* min_sort_char */ + 0xFFFF, /* max_sort_char */ + ' ', /* pad char */ + 0, /* escape_with_backslash_is_dangerous */ + &my_charset_utf8mb4_handler, + &my_collation_any_uca_handler +}; + +CHARSET_INFO my_charset_utf8mb4_swedish_uca_ci= +{ + 232,0,0, /* number */ + MY_CS_UTF8MB4_UCA_FLAGS,/* state */ + MY_UTF8MB4, /* csname */ + MY_UTF8MB4 "_swedish_ci", /* name */ + "", /* comment */ + swedish, /* tailoring */ + ctype_utf8, /* ctype */ + NULL, /* to_lower */ + NULL, /* to_upper */ + NULL, /* sort_order */ + NULL, /* contractions */ + NULL, /* sort_order_big*/ + NULL, /* tab_to_uni */ + NULL, /* tab_from_uni */ + my_unicase_default, /* caseinfo */ + NULL, /* state_map */ + NULL, /* ident_map */ + 8, /* strxfrm_multiply */ + 1, /* caseup_multiply */ + 1, /* casedn_multiply */ + 1, /* mbminlen */ + 4, /* mbmaxlen */ + 9, /* min_sort_char */ + 0xFFFF, /* max_sort_char */ + ' ', /* pad char */ + 0, /* escape_with_backslash_is_dangerous */ + &my_charset_utf8mb4_handler, + &my_collation_any_uca_handler +}; + +CHARSET_INFO my_charset_utf8mb4_turkish_uca_ci= +{ + 233,0,0, /* number */ + MY_CS_UTF8MB4_UCA_FLAGS,/* state */ + MY_UTF8MB4, /* csname */ + MY_UTF8MB4 "_turkish_ci", /* name */ + "", /* comment */ + turkish, /* tailoring */ + ctype_utf8, /* ctype */ + NULL, /* to_lower */ + NULL, /* to_upper */ + NULL, /* sort_order */ + NULL, /* contractions */ + NULL, /* sort_order_big*/ + NULL, /* tab_to_uni */ + NULL, /* tab_from_uni */ + my_unicase_turkish, /* caseinfo */ + NULL, /* state_map */ + NULL, /* ident_map */ + 8, /* strxfrm_multiply */ + 2, /* caseup_multiply */ + 2, /* casedn_multiply */ + 1, /* mbminlen */ + 4, /* mbmaxlen */ + 9, /* min_sort_char */ + 0xFFFF, /* max_sort_char */ + ' ', /* pad char */ + 0, /* escape_with_backslash_is_dangerous */ + &my_charset_utf8mb4_handler, + &my_collation_any_uca_handler +}; + +CHARSET_INFO my_charset_utf8mb4_czech_uca_ci= +{ + 234,0,0, /* number */ + MY_CS_UTF8MB4_UCA_FLAGS,/* state */ + MY_UTF8MB4, /* csname */ + MY_UTF8MB4 "_czech_ci", /* name */ + "", /* comment */ + czech, /* tailoring */ + ctype_utf8, /* ctype */ + NULL, /* to_lower */ + NULL, /* to_upper */ + NULL, /* sort_order */ + NULL, /* contractions */ + NULL, /* sort_order_big*/ + NULL, /* tab_to_uni */ + NULL, /* tab_from_uni */ + my_unicase_default, /* caseinfo */ + NULL, /* state_map */ + NULL, /* ident_map */ + 8, /* strxfrm_multiply */ + 1, /* caseup_multiply */ + 1, /* casedn_multiply */ + 1, /* mbminlen */ + 4, /* mbmaxlen */ + 9, /* min_sort_char */ + 0xFFFF, /* max_sort_char */ + ' ', /* pad char */ + 0, /* escape_with_backslash_is_dangerous */ + &my_charset_utf8mb4_handler, + &my_collation_any_uca_handler +}; + + +CHARSET_INFO my_charset_utf8mb4_danish_uca_ci= +{ + 235,0,0, /* number */ + MY_CS_UTF8MB4_UCA_FLAGS,/* state */ + MY_UTF8MB4, /* csname */ + MY_UTF8MB4 "_danish_ci", /* name */ + "", /* comment */ + danish, /* tailoring */ + ctype_utf8, /* ctype */ + NULL, /* to_lower */ + NULL, /* to_upper */ + NULL, /* sort_order */ + NULL, /* contractions */ + NULL, /* sort_order_big*/ + NULL, /* tab_to_uni */ + NULL, /* tab_from_uni */ + my_unicase_default, /* caseinfo */ + NULL, /* state_map */ + NULL, /* ident_map */ + 8, /* strxfrm_multiply */ + 1, /* caseup_multiply */ + 1, /* casedn_multiply */ + 1, /* mbminlen */ + 4, /* mbmaxlen */ + 9, /* min_sort_char */ + 0xFFFF, /* max_sort_char */ + ' ', /* pad char */ + 0, /* escape_with_backslash_is_dangerous */ + &my_charset_utf8mb4_handler, + &my_collation_any_uca_handler +}; + +CHARSET_INFO my_charset_utf8mb4_lithuanian_uca_ci= +{ + 236,0,0, /* number */ + MY_CS_UTF8MB4_UCA_FLAGS,/* state */ + MY_UTF8MB4, /* csname */ + MY_UTF8MB4 "_lithuanian_ci",/* name */ + "", /* comment */ + lithuanian, /* tailoring */ + ctype_utf8, /* ctype */ + NULL, /* to_lower */ + NULL, /* to_upper */ + NULL, /* sort_order */ + NULL, /* contractions */ + NULL, /* sort_order_big*/ + NULL, /* tab_to_uni */ + NULL, /* tab_from_uni */ + my_unicase_default, /* caseinfo */ + NULL, /* state_map */ + NULL, /* ident_map */ + 8, /* strxfrm_multiply */ + 1, /* caseup_multiply */ + 1, /* casedn_multiply */ + 1, /* mbminlen */ + 4, /* mbmaxlen */ + 9, /* min_sort_char */ + 0xFFFF, /* max_sort_char */ + ' ', /* pad char */ + 0, /* escape_with_backslash_is_dangerous */ + &my_charset_utf8mb4_handler, + &my_collation_any_uca_handler +}; + +CHARSET_INFO my_charset_utf8mb4_slovak_uca_ci= +{ + 237,0,0, /* number */ + MY_CS_UTF8MB4_UCA_FLAGS,/* state */ + MY_UTF8MB4, /* csname */ + MY_UTF8MB4 "_slovak_ci", /* name */ + "", /* comment */ + slovak, /* tailoring */ + ctype_utf8, /* ctype */ + NULL, /* to_lower */ + NULL, /* to_upper */ + NULL, /* sort_order */ + NULL, /* contractions */ + NULL, /* sort_order_big*/ + NULL, /* tab_to_uni */ + NULL, /* tab_from_uni */ + my_unicase_default, /* caseinfo */ + NULL, /* state_map */ + NULL, /* ident_map */ + 8, /* strxfrm_multiply */ + 1, /* caseup_multiply */ + 1, /* casedn_multiply */ + 1, /* mbminlen */ + 4, /* mbmaxlen */ + 9, /* min_sort_char */ + 0xFFFF, /* max_sort_char */ + ' ', /* pad char */ + 0, /* escape_with_backslash_is_dangerous */ + &my_charset_utf8mb4_handler, + &my_collation_any_uca_handler +}; + +CHARSET_INFO my_charset_utf8mb4_spanish2_uca_ci= +{ + 238,0,0, /* number */ + MY_CS_UTF8MB4_UCA_FLAGS,/* state */ + MY_UTF8MB4, /* csname */ + MY_UTF8MB4 "_spanish2_ci", /* name */ + "", /* comment */ + spanish2, /* tailoring */ + ctype_utf8, /* ctype */ + NULL, /* to_lower */ + NULL, /* to_upper */ + NULL, /* sort_order */ + NULL, /* contractions */ + NULL, /* sort_order_big*/ + NULL, /* tab_to_uni */ + NULL, /* tab_from_uni */ + my_unicase_default, /* caseinfo */ + NULL, /* state_map */ + NULL, /* ident_map */ + 8, /* strxfrm_multiply */ + 1, /* caseup_multiply */ + 1, /* casedn_multiply */ + 1, /* mbminlen */ + 4, /* mbmaxlen */ + 9, /* min_sort_char */ + 0xFFFF, /* max_sort_char */ + ' ', /* pad char */ + 0, /* escape_with_backslash_is_dangerous */ + &my_charset_utf8mb4_handler, + &my_collation_any_uca_handler +}; + +CHARSET_INFO my_charset_utf8mb4_roman_uca_ci= +{ + 239,0,0, /* number */ + MY_CS_UTF8MB4_UCA_FLAGS,/* state */ + MY_UTF8MB4, /* csname */ + MY_UTF8MB4 "_roman_ci", /* name */ + "", /* comment */ + roman, /* tailoring */ + ctype_utf8, /* ctype */ + NULL, /* to_lower */ + NULL, /* to_upper */ + NULL, /* sort_order */ + NULL, /* contractions */ + NULL, /* sort_order_big*/ + NULL, /* tab_to_uni */ + NULL, /* tab_from_uni */ + my_unicase_default, /* caseinfo */ + NULL, /* state_map */ + NULL, /* ident_map */ + 8, /* strxfrm_multiply */ + 1, /* caseup_multiply */ + 1, /* casedn_multiply */ + 1, /* mbminlen */ + 4, /* mbmaxlen */ + 9, /* min_sort_char */ + 0xFFFF, /* max_sort_char */ + ' ', /* pad char */ + 0, /* escape_with_backslash_is_dangerous */ + &my_charset_utf8mb4_handler, + &my_collation_any_uca_handler +}; + +CHARSET_INFO my_charset_utf8mb4_persian_uca_ci= +{ + 240,0,0, /* number */ + MY_CS_UTF8MB4_UCA_FLAGS,/* state */ + MY_UTF8MB4, /* csname */ + MY_UTF8MB4 "_persian_ci", /* name */ + "", /* comment */ + persian, /* tailoring */ + ctype_utf8, /* ctype */ + NULL, /* to_lower */ + NULL, /* to_upper */ + NULL, /* sort_order */ + NULL, /* contractions */ + NULL, /* sort_order_big*/ + NULL, /* tab_to_uni */ + NULL, /* tab_from_uni */ + my_unicase_default, /* caseinfo */ + NULL, /* state_map */ + NULL, /* ident_map */ + 8, /* strxfrm_multiply */ + 1, /* caseup_multiply */ + 1, /* casedn_multiply */ + 1, /* mbminlen */ + 4, /* mbmaxlen */ + 9, /* min_sort_char */ + 0xFFFF, /* max_sort_char */ + ' ', /* pad char */ + 0, /* escape_with_backslash_is_dangerous */ + &my_charset_utf8mb4_handler, + &my_collation_any_uca_handler +}; + +CHARSET_INFO my_charset_utf8mb4_esperanto_uca_ci= +{ + 241,0,0, /* number */ + MY_CS_UTF8MB4_UCA_FLAGS,/* state */ + MY_UTF8MB4, /* csname */ + MY_UTF8MB4 "_esperanto_ci",/* name */ + "", /* comment */ + esperanto, /* tailoring */ + ctype_utf8, /* ctype */ + NULL, /* to_lower */ + NULL, /* to_upper */ + NULL, /* sort_order */ + NULL, /* contractions */ + NULL, /* sort_order_big*/ + NULL, /* tab_to_uni */ + NULL, /* tab_from_uni */ + my_unicase_default, /* caseinfo */ + NULL, /* state_map */ + NULL, /* ident_map */ + 8, /* strxfrm_multiply */ + 1, /* caseup_multiply */ + 1, /* casedn_multiply */ + 1, /* mbminlen */ + 4, /* mbmaxlen */ + 9, /* min_sort_char */ + 0xFFFF, /* max_sort_char */ + ' ', /* pad char */ + 0, /* escape_with_backslash_is_dangerous */ + &my_charset_utf8mb4_handler, + &my_collation_any_uca_handler +}; + +CHARSET_INFO my_charset_utf8mb4_hungarian_uca_ci= +{ + 242,0,0, /* number */ + MY_CS_UTF8MB4_UCA_FLAGS,/* state */ + MY_UTF8MB4, /* csname */ + MY_UTF8MB4 "_hungarian_ci",/* name */ + "", /* comment */ + hungarian, /* tailoring */ + ctype_utf8, /* ctype */ + NULL, /* to_lower */ + NULL, /* to_upper */ + NULL, /* sort_order */ + NULL, /* contractions */ + NULL, /* sort_order_big*/ + NULL, /* tab_to_uni */ + NULL, /* tab_from_uni */ + my_unicase_default, /* caseinfo */ + NULL, /* state_map */ + NULL, /* ident_map */ + 8, /* strxfrm_multiply */ + 1, /* caseup_multiply */ + 1, /* casedn_multiply */ + 1, /* mbminlen */ + 4, /* mbmaxlen */ + 9, /* min_sort_char */ + 0xFFFF, /* max_sort_char */ + ' ', /* pad char */ + 0, /* escape_with_backslash_is_dangerous */ + &my_charset_utf8mb4_handler, + &my_collation_any_uca_handler +}; + +CHARSET_INFO my_charset_utf8mb4_sinhala_uca_ci= +{ + 243,0,0, /* number */ + MY_CS_UTF8MB4_UCA_FLAGS,/* state */ + MY_UTF8MB4, /* csname */ + MY_UTF8MB4 "_sinhala_ci",/* name */ + "", /* comment */ + sinhala, /* tailoring */ + ctype_utf8, /* ctype */ + NULL, /* to_lower */ + NULL, /* to_upper */ + NULL, /* sort_order */ + NULL, /* contractions */ + NULL, /* sort_order_big*/ + NULL, /* tab_to_uni */ + NULL, /* tab_from_uni */ + my_unicase_default, /* caseinfo */ + NULL, /* state_map */ + NULL, /* ident_map */ + 8, /* strxfrm_multiply */ + 1, /* caseup_multiply */ + 1, /* casedn_multiply */ + 1, /* mbminlen */ + 4, /* mbmaxlen */ + 9, /* min_sort_char */ + 0xFFFF, /* max_sort_char */ + ' ', /* pad char */ + 0, /* escape_with_backslash_is_dangerous */ + &my_charset_utf8mb4_handler, + &my_collation_any_uca_handler +}; + +#endif /* HAVE_CHARSET_utf8mb4 */ + + +#ifdef HAVE_CHARSET_utf32 + +MY_COLLATION_HANDLER my_collation_utf32_uca_handler = +{ + my_coll_init_uca, /* init */ + my_strnncoll_any_uca, + my_strnncollsp_any_uca, + my_strnxfrm_any_uca, + my_strnxfrmlen_simple, + my_like_range_utf32, + my_wildcmp_uca, + NULL, + my_instr_mb, + my_hash_sort_any_uca, + my_propagate_complex +}; + +extern MY_CHARSET_HANDLER my_charset_utf32_handler; + +#define MY_CS_UTF32_UCA_FLAGS (MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII) + +CHARSET_INFO my_charset_utf32_unicode_ci= +{ + 160,0,0, /* number */ + MY_CS_UTF32_UCA_FLAGS,/* state */ + "utf32", /* csname */ + "utf32_unicode_ci", /* name */ + "", /* comment */ + "", /* tailoring */ + NULL, /* ctype */ + NULL, /* to_lower */ + NULL, /* to_upper */ + uca_length, /* sort_order */ + NULL, /* contractions */ + uca_weight, /* sort_order_big*/ + NULL, /* tab_to_uni */ + NULL, /* tab_from_uni */ + my_unicase_default, /* caseinfo */ + NULL, /* state_map */ + NULL, /* ident_map */ + 8, /* strxfrm_multiply */ + 1, /* caseup_multiply */ + 1, /* casedn_multiply */ + 4, /* mbminlen */ + 4, /* mbmaxlen */ + 9, /* min_sort_char */ + 0xFFFF, /* max_sort_char */ + ' ', /* pad char */ + 0, /* escape_with_backslash_is_dangerous */ + &my_charset_utf32_handler, + &my_collation_utf32_uca_handler +}; + + +CHARSET_INFO my_charset_utf32_icelandic_uca_ci= +{ + 161,0,0, /* number */ + MY_CS_UTF32_UCA_FLAGS,/* state */ + "utf32", /* csname */ + "utf32_icelandic_ci",/* name */ + "", /* comment */ + icelandic, /* tailoring */ + NULL, /* ctype */ + NULL, /* to_lower */ + NULL, /* to_upper */ + NULL, /* sort_order */ + NULL, /* contractions */ + NULL, /* sort_order_big*/ + NULL, /* tab_to_uni */ + NULL, /* tab_from_uni */ + my_unicase_default, /* caseinfo */ + NULL, /* state_map */ + NULL, /* ident_map */ + 8, /* strxfrm_multiply */ + 1, /* caseup_multiply */ + 1, /* casedn_multiply */ + 4, /* mbminlen */ + 4, /* mbmaxlen */ + 9, /* min_sort_char */ + 0xFFFF, /* max_sort_char */ + ' ', /* pad char */ + 0, /* escape_with_backslash_is_dangerous */ + &my_charset_utf32_handler, + &my_collation_utf32_uca_handler +}; + +CHARSET_INFO my_charset_utf32_latvian_uca_ci= +{ + 162,0,0, /* number */ + MY_CS_UTF32_UCA_FLAGS,/* state */ + "utf32", /* csname */ + "utf32_latvian_ci", /* name */ + "", /* comment */ + latvian, /* tailoring */ + NULL, /* ctype */ + NULL, /* to_lower */ + NULL, /* to_upper */ + NULL, /* sort_order */ + NULL, /* contractions */ + NULL, /* sort_order_big*/ + NULL, /* tab_to_uni */ + NULL, /* tab_from_uni */ + my_unicase_default, /* caseinfo */ + NULL, /* state_map */ + NULL, /* ident_map */ + 8, /* strxfrm_multiply */ + 1, /* caseup_multiply */ + 1, /* casedn_multiply */ + 4, /* mbminlen */ + 4, /* mbmaxlen */ + 9, /* min_sort_char */ + 0xFFFF, /* max_sort_char */ + ' ', /* pad char */ + 0, /* escape_with_backslash_is_dangerous */ + &my_charset_utf32_handler, + &my_collation_utf32_uca_handler +}; + +CHARSET_INFO my_charset_utf32_romanian_uca_ci= +{ + 163,0,0, /* number */ + MY_CS_UTF32_UCA_FLAGS,/* state */ + "utf32", /* csname */ + "utf32_romanian_ci", /* name */ + "", /* comment */ + romanian, /* tailoring */ + NULL, /* ctype */ + NULL, /* to_lower */ + NULL, /* to_upper */ + NULL, /* sort_order */ + NULL, /* contractions */ + NULL, /* sort_order_big*/ + NULL, /* tab_to_uni */ + NULL, /* tab_from_uni */ + my_unicase_default, /* caseinfo */ + NULL, /* state_map */ + NULL, /* ident_map */ + 8, /* strxfrm_multiply */ + 1, /* caseup_multiply */ + 1, /* casedn_multiply */ + 4, /* mbminlen */ + 4, /* mbmaxlen */ + 9, /* min_sort_char */ + 0xFFFF, /* max_sort_char */ + ' ', /* pad char */ + 0, /* escape_with_backslash_is_dangerous */ + &my_charset_utf32_handler, + &my_collation_utf32_uca_handler +}; + +CHARSET_INFO my_charset_utf32_slovenian_uca_ci= +{ + 164,0,0, /* number */ + MY_CS_UTF32_UCA_FLAGS,/* state */ + "utf32", /* csname */ + "utf32_slovenian_ci",/* name */ + "", /* comment */ + slovenian, /* tailoring */ + NULL, /* ctype */ + NULL, /* to_lower */ + NULL, /* to_upper */ + NULL, /* sort_order */ + NULL, /* contractions */ + NULL, /* sort_order_big*/ + NULL, /* tab_to_uni */ + NULL, /* tab_from_uni */ + my_unicase_default, /* caseinfo */ + NULL, /* state_map */ + NULL, /* ident_map */ + 8, /* strxfrm_multiply */ + 1, /* caseup_multiply */ + 1, /* casedn_multiply */ + 4, /* mbminlen */ + 4, /* mbmaxlen */ + 9, /* min_sort_char */ + 0xFFFF, /* max_sort_char */ + ' ', /* pad char */ + 0, /* escape_with_backslash_is_dangerous */ + &my_charset_utf32_handler, + &my_collation_utf32_uca_handler +}; + +CHARSET_INFO my_charset_utf32_polish_uca_ci= +{ + 165,0,0, /* number */ + MY_CS_UTF32_UCA_FLAGS,/* state */ + "utf32", /* csname */ + "utf32_polish_ci", /* name */ + "", /* comment */ + polish, /* tailoring */ + NULL, /* ctype */ + NULL, /* to_lower */ + NULL, /* to_upper */ + NULL, /* sort_order */ + NULL, /* contractions */ + NULL, /* sort_order_big*/ + NULL, /* tab_to_uni */ + NULL, /* tab_from_uni */ + my_unicase_default, /* caseinfo */ + NULL, /* state_map */ + NULL, /* ident_map */ + 8, /* strxfrm_multiply */ + 1, /* caseup_multiply */ + 1, /* casedn_multiply */ + 4, /* mbminlen */ + 4, /* mbmaxlen */ + 9, /* min_sort_char */ + 0xFFFF, /* max_sort_char */ + ' ', /* pad char */ + 0, /* escape_with_backslash_is_dangerous */ + &my_charset_utf32_handler, + &my_collation_utf32_uca_handler +}; + +CHARSET_INFO my_charset_utf32_estonian_uca_ci= +{ + 166,0,0, /* number */ + MY_CS_UTF32_UCA_FLAGS,/* state */ + "utf32", /* csname */ + "utf32_estonian_ci", /* name */ + "", /* comment */ + estonian, /* tailoring */ + NULL, /* ctype */ + NULL, /* to_lower */ + NULL, /* to_upper */ + NULL, /* sort_order */ + NULL, /* contractions */ + NULL, /* sort_order_big*/ + NULL, /* tab_to_uni */ + NULL, /* tab_from_uni */ + my_unicase_default, /* caseinfo */ + NULL, /* state_map */ + NULL, /* ident_map */ + 8, /* strxfrm_multiply */ + 1, /* caseup_multiply */ + 1, /* casedn_multiply */ + 4, /* mbminlen */ + 4, /* mbmaxlen */ + 9, /* min_sort_char */ + 0xFFFF, /* max_sort_char */ + ' ', /* pad char */ + 0, /* escape_with_backslash_is_dangerous */ + &my_charset_utf32_handler, + &my_collation_utf32_uca_handler +}; + +CHARSET_INFO my_charset_utf32_spanish_uca_ci= +{ + 167,0,0, /* number */ + MY_CS_UTF32_UCA_FLAGS,/* state */ + "utf32", /* csname */ + "utf32_spanish_ci", /* name */ + "", /* comment */ + spanish, /* tailoring */ + NULL, /* ctype */ + NULL, /* to_lower */ + NULL, /* to_upper */ + NULL, /* sort_order */ + NULL, /* contractions */ + NULL, /* sort_order_big*/ + NULL, /* tab_to_uni */ + NULL, /* tab_from_uni */ + my_unicase_default, /* caseinfo */ + NULL, /* state_map */ + NULL, /* ident_map */ + 8, /* strxfrm_multiply */ + 1, /* caseup_multiply */ + 1, /* casedn_multiply */ + 4, /* mbminlen */ + 4, /* mbmaxlen */ + 9, /* min_sort_char */ + 0xFFFF, /* max_sort_char */ + ' ', /* pad char */ + 0, /* escape_with_backslash_is_dangerous */ + &my_charset_utf32_handler, + &my_collation_utf32_uca_handler +}; + +CHARSET_INFO my_charset_utf32_swedish_uca_ci= +{ + 168,0,0, /* number */ + MY_CS_UTF32_UCA_FLAGS,/* state */ + "utf32", /* csname */ + "utf32_swedish_ci", /* name */ + "", /* comment */ + swedish, /* tailoring */ + NULL, /* ctype */ + NULL, /* to_lower */ + NULL, /* to_upper */ + NULL, /* sort_order */ + NULL, /* contractions */ + NULL, /* sort_order_big*/ + NULL, /* tab_to_uni */ + NULL, /* tab_from_uni */ + my_unicase_default, /* caseinfo */ + NULL, /* state_map */ + NULL, /* ident_map */ + 8, /* strxfrm_multiply */ + 1, /* caseup_multiply */ + 1, /* casedn_multiply */ + 4, /* mbminlen */ + 4, /* mbmaxlen */ + 9, /* min_sort_char */ + 0xFFFF, /* max_sort_char */ + ' ', /* pad char */ + 0, /* escape_with_backslash_is_dangerous */ + &my_charset_utf32_handler, + &my_collation_utf32_uca_handler +}; + +CHARSET_INFO my_charset_utf32_turkish_uca_ci= +{ + 169,0,0, /* number */ + MY_CS_UTF32_UCA_FLAGS,/* state */ + "utf32", /* csname */ + "utf32_turkish_ci", /* name */ + "", /* comment */ + turkish, /* tailoring */ + NULL, /* ctype */ + NULL, /* to_lower */ + NULL, /* to_upper */ + NULL, /* sort_order */ + NULL, /* contractions */ + NULL, /* sort_order_big*/ + NULL, /* tab_to_uni */ + NULL, /* tab_from_uni */ + my_unicase_turkish, /* caseinfo */ + NULL, /* state_map */ + NULL, /* ident_map */ + 8, /* strxfrm_multiply */ + 1, /* caseup_multiply */ + 1, /* casedn_multiply */ + 4, /* mbminlen */ + 4, /* mbmaxlen */ + 9, /* min_sort_char */ + 0xFFFF, /* max_sort_char */ + ' ', /* pad char */ + 0, /* escape_with_backslash_is_dangerous */ + &my_charset_utf32_handler, + &my_collation_utf32_uca_handler +}; + +CHARSET_INFO my_charset_utf32_czech_uca_ci= +{ + 170,0,0, /* number */ + MY_CS_UTF32_UCA_FLAGS,/* state */ + "utf32", /* csname */ + "utf32_czech_ci", /* name */ + "", /* comment */ + czech, /* tailoring */ + NULL, /* ctype */ + NULL, /* to_lower */ + NULL, /* to_upper */ + NULL, /* sort_order */ + NULL, /* contractions */ + NULL, /* sort_order_big*/ + NULL, /* tab_to_uni */ + NULL, /* tab_from_uni */ + my_unicase_default, /* caseinfo */ + NULL, /* state_map */ + NULL, /* ident_map */ + 8, /* strxfrm_multiply */ + 1, /* caseup_multiply */ + 1, /* casedn_multiply */ + 4, /* mbminlen */ + 4, /* mbmaxlen */ + 9, /* min_sort_char */ + 0xFFFF, /* max_sort_char */ + ' ', /* pad char */ + 0, /* escape_with_backslash_is_dangerous */ + &my_charset_utf32_handler, + &my_collation_utf32_uca_handler +}; + + +CHARSET_INFO my_charset_utf32_danish_uca_ci= +{ + 171,0,0, /* number */ + MY_CS_UTF32_UCA_FLAGS,/* state */ + "utf32", /* csname */ + "utf32_danish_ci", /* name */ + "", /* comment */ + danish, /* tailoring */ + NULL, /* ctype */ + NULL, /* to_lower */ + NULL, /* to_upper */ + NULL, /* sort_order */ + NULL, /* contractions */ + NULL, /* sort_order_big*/ + NULL, /* tab_to_uni */ + NULL, /* tab_from_uni */ + my_unicase_default, /* caseinfo */ + NULL, /* state_map */ + NULL, /* ident_map */ + 8, /* strxfrm_multiply */ + 1, /* caseup_multiply */ + 1, /* casedn_multiply */ + 4, /* mbminlen */ + 4, /* mbmaxlen */ + 9, /* min_sort_char */ + 0xFFFF, /* max_sort_char */ + ' ', /* pad char */ + 0, /* escape_with_backslash_is_dangerous */ + &my_charset_utf32_handler, + &my_collation_utf32_uca_handler +}; + +CHARSET_INFO my_charset_utf32_lithuanian_uca_ci= +{ + 172,0,0, /* number */ + MY_CS_UTF32_UCA_FLAGS,/* state */ + "utf32", /* csname */ + "utf32_lithuanian_ci",/* name */ + "", /* comment */ + lithuanian, /* tailoring */ + NULL, /* ctype */ + NULL, /* to_lower */ + NULL, /* to_upper */ + NULL, /* sort_order */ + NULL, /* contractions */ + NULL, /* sort_order_big*/ + NULL, /* tab_to_uni */ + NULL, /* tab_from_uni */ + my_unicase_default, /* caseinfo */ + NULL, /* state_map */ + NULL, /* ident_map */ + 8, /* strxfrm_multiply */ + 1, /* caseup_multiply */ + 1, /* casedn_multiply */ + 4, /* mbminlen */ + 4, /* mbmaxlen */ + 9, /* min_sort_char */ + 0xFFFF, /* max_sort_char */ + ' ', /* pad char */ + 0, /* escape_with_backslash_is_dangerous */ + &my_charset_utf32_handler, + &my_collation_utf32_uca_handler +}; + +CHARSET_INFO my_charset_utf32_slovak_uca_ci= +{ + 173,0,0, /* number */ + MY_CS_UTF32_UCA_FLAGS,/* state */ + "utf32", /* csname */ + "utf32_slovak_ci", /* name */ + "", /* comment */ + slovak, /* tailoring */ + NULL, /* ctype */ + NULL, /* to_lower */ + NULL, /* to_upper */ + NULL, /* sort_order */ + NULL, /* contractions */ + NULL, /* sort_order_big*/ + NULL, /* tab_to_uni */ + NULL, /* tab_from_uni */ + my_unicase_default, /* caseinfo */ + NULL, /* state_map */ + NULL, /* ident_map */ + 8, /* strxfrm_multiply */ + 1, /* caseup_multiply */ + 1, /* casedn_multiply */ + 4, /* mbminlen */ + 4, /* mbmaxlen */ + 9, /* min_sort_char */ + 0xFFFF, /* max_sort_char */ + ' ', /* pad char */ + 0, /* escape_with_backslash_is_dangerous */ + &my_charset_utf32_handler, + &my_collation_utf32_uca_handler +}; + +CHARSET_INFO my_charset_utf32_spanish2_uca_ci= +{ + 174,0,0, /* number */ + MY_CS_UTF32_UCA_FLAGS,/* state */ + "utf32", /* csname */ + "utf32_spanish2_ci", /* name */ + "", /* comment */ + spanish2, /* tailoring */ + NULL, /* ctype */ + NULL, /* to_lower */ + NULL, /* to_upper */ + NULL, /* sort_order */ + NULL, /* contractions */ + NULL, /* sort_order_big*/ + NULL, /* tab_to_uni */ + NULL, /* tab_from_uni */ + my_unicase_default, /* caseinfo */ + NULL, /* state_map */ + NULL, /* ident_map */ + 8, /* strxfrm_multiply */ + 1, /* caseup_multiply */ + 1, /* casedn_multiply */ + 4, /* mbminlen */ + 4, /* mbmaxlen */ + 9, /* min_sort_char */ + 0xFFFF, /* max_sort_char */ + ' ', /* pad char */ + 0, /* escape_with_backslash_is_dangerous */ + &my_charset_utf32_handler, + &my_collation_utf32_uca_handler +}; + +CHARSET_INFO my_charset_utf32_roman_uca_ci= +{ + 175,0,0, /* number */ + MY_CS_UTF32_UCA_FLAGS,/* state */ + "utf32", /* csname */ + "utf32_roman_ci", /* name */ + "", /* comment */ + roman, /* tailoring */ + NULL, /* ctype */ + NULL, /* to_lower */ + NULL, /* to_upper */ + NULL, /* sort_order */ + NULL, /* contractions */ + NULL, /* sort_order_big*/ + NULL, /* tab_to_uni */ + NULL, /* tab_from_uni */ + my_unicase_default, /* caseinfo */ + NULL, /* state_map */ + NULL, /* ident_map */ + 8, /* strxfrm_multiply */ + 1, /* caseup_multiply */ + 1, /* casedn_multiply */ + 4, /* mbminlen */ + 4, /* mbmaxlen */ + 9, /* min_sort_char */ + 0xFFFF, /* max_sort_char */ + ' ', /* pad char */ + 0, /* escape_with_backslash_is_dangerous */ + &my_charset_utf32_handler, + &my_collation_utf32_uca_handler +}; + +CHARSET_INFO my_charset_utf32_persian_uca_ci= +{ + 176,0,0, /* number */ + MY_CS_UTF32_UCA_FLAGS,/* state */ + "utf32", /* csname */ + "utf32_persian_ci", /* name */ + "", /* comment */ + persian, /* tailoring */ + NULL, /* ctype */ + NULL, /* to_lower */ + NULL, /* to_upper */ + NULL, /* sort_order */ + NULL, /* contractions */ + NULL, /* sort_order_big*/ + NULL, /* tab_to_uni */ + NULL, /* tab_from_uni */ + my_unicase_default, /* caseinfo */ + NULL, /* state_map */ + NULL, /* ident_map */ + 8, /* strxfrm_multiply */ + 1, /* caseup_multiply */ + 1, /* casedn_multiply */ + 4, /* mbminlen */ + 4, /* mbmaxlen */ + 9, /* min_sort_char */ + 0xFFFF, /* max_sort_char */ + ' ', /* pad char */ + 0, /* escape_with_backslash_is_dangerous */ + &my_charset_utf32_handler, + &my_collation_utf32_uca_handler +}; + +CHARSET_INFO my_charset_utf32_esperanto_uca_ci= +{ + 177,0,0, /* number */ + MY_CS_UTF32_UCA_FLAGS,/* state */ + "utf32", /* csname */ + "utf32_esperanto_ci",/* name */ + "", /* comment */ + esperanto, /* tailoring */ + NULL, /* ctype */ + NULL, /* to_lower */ + NULL, /* to_upper */ + NULL, /* sort_order */ + NULL, /* contractions */ + NULL, /* sort_order_big*/ + NULL, /* tab_to_uni */ + NULL, /* tab_from_uni */ + my_unicase_default, /* caseinfo */ + NULL, /* state_map */ + NULL, /* ident_map */ + 8, /* strxfrm_multiply */ + 1, /* caseup_multiply */ + 1, /* casedn_multiply */ + 4, /* mbminlen */ + 4, /* mbmaxlen */ + 9, /* min_sort_char */ + 0xFFFF, /* max_sort_char */ + ' ', /* pad char */ + 0, /* escape_with_backslash_is_dangerous */ + &my_charset_utf32_handler, + &my_collation_utf32_uca_handler +}; + +CHARSET_INFO my_charset_utf32_hungarian_uca_ci= +{ + 178,0,0, /* number */ + MY_CS_UTF32_UCA_FLAGS,/* state */ + "utf32", /* csname */ + "utf32_hungarian_ci",/* name */ + "", /* comment */ + hungarian, /* tailoring */ + NULL, /* ctype */ + NULL, /* to_lower */ + NULL, /* to_upper */ + NULL, /* sort_order */ + NULL, /* contractions */ + NULL, /* sort_order_big*/ + NULL, /* tab_to_uni */ + NULL, /* tab_from_uni */ + my_unicase_default, /* caseinfo */ + NULL, /* state_map */ + NULL, /* ident_map */ + 8, /* strxfrm_multiply */ + 1, /* caseup_multiply */ + 1, /* casedn_multiply */ + 4, /* mbminlen */ + 4, /* mbmaxlen */ + 9, /* min_sort_char */ + 0xFFFF, /* max_sort_char */ + ' ', /* pad char */ + 0, /* escape_with_backslash_is_dangerous */ + &my_charset_utf32_handler, + &my_collation_utf32_uca_handler +}; + +CHARSET_INFO my_charset_utf32_sinhala_uca_ci= +{ + 179,0,0, /* number */ + MY_CS_UTF32_UCA_FLAGS,/* state */ + "utf32", /* csname */ + "utf32_sinhala_ci", /* name */ + "", /* comment */ + sinhala, /* tailoring */ + NULL, /* ctype */ + NULL, /* to_lower */ + NULL, /* to_upper */ + NULL, /* sort_order */ + NULL, /* contractions */ + NULL, /* sort_order_big*/ + NULL, /* tab_to_uni */ + NULL, /* tab_from_uni */ + my_unicase_default, /* caseinfo */ + NULL, /* state_map */ + NULL, /* ident_map */ + 8, /* strxfrm_multiply */ + 1, /* caseup_multiply */ + 1, /* casedn_multiply */ + 4, /* mbminlen */ + 4, /* mbmaxlen */ + 9, /* min_sort_char */ + 0xFFFF, /* max_sort_char */ + ' ', /* pad char */ + 0, /* escape_with_backslash_is_dangerous */ + &my_charset_utf32_handler, + &my_collation_utf32_uca_handler +}; + +#endif /* HAVE_CHARSET_utf32 */ + + +#ifdef HAVE_CHARSET_utf16 + + +MY_COLLATION_HANDLER my_collation_utf16_uca_handler = +{ + my_coll_init_uca, /* init */ + my_strnncoll_any_uca, + my_strnncollsp_any_uca, + my_strnxfrm_any_uca, + my_strnxfrmlen_simple, + my_like_range_utf16, + my_wildcmp_uca, + NULL, + my_instr_mb, + my_hash_sort_any_uca, + my_propagate_complex +}; + +extern MY_CHARSET_HANDLER my_charset_utf16_handler; + +#define MY_CS_UTF16_UCA_FLAGS (MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII) + +CHARSET_INFO my_charset_utf16_unicode_ci= +{ + 101,0,0, /* number */ + MY_CS_UTF16_UCA_FLAGS,/* state */ + "utf16", /* csname */ + "utf16_unicode_ci", /* name */ + "", /* comment */ + "", /* tailoring */ + NULL, /* ctype */ + NULL, /* to_lower */ + NULL, /* to_upper */ + uca_length, /* sort_order */ + NULL, /* contractions */ + uca_weight, /* sort_order_big*/ + NULL, /* tab_to_uni */ + NULL, /* tab_from_uni */ + my_unicase_default, /* caseinfo */ + NULL, /* state_map */ + NULL, /* ident_map */ + 8, /* strxfrm_multiply */ + 1, /* caseup_multiply */ + 1, /* casedn_multiply */ + 2, /* mbminlen */ + 4, /* mbmaxlen */ + 9, /* min_sort_char */ + 0xFFFF, /* max_sort_char */ + ' ', /* pad char */ + 0, /* escape_with_backslash_is_dangerous */ + &my_charset_utf16_handler, + &my_collation_utf16_uca_handler +}; + + +CHARSET_INFO my_charset_utf16_icelandic_uca_ci= +{ + 102,0,0, /* number */ + MY_CS_UTF16_UCA_FLAGS,/* state */ + "utf16", /* csname */ + "utf16_icelandic_ci",/* name */ + "", /* comment */ + icelandic, /* tailoring */ + NULL, /* ctype */ + NULL, /* to_lower */ + NULL, /* to_upper */ + NULL, /* sort_order */ + NULL, /* contractions */ + NULL, /* sort_order_big*/ + NULL, /* tab_to_uni */ + NULL, /* tab_from_uni */ + my_unicase_default, /* caseinfo */ + NULL, /* state_map */ + NULL, /* ident_map */ + 8, /* strxfrm_multiply */ + 1, /* caseup_multiply */ + 1, /* casedn_multiply */ + 2, /* mbminlen */ + 4, /* mbmaxlen */ + 9, /* min_sort_char */ + 0xFFFF, /* max_sort_char */ + ' ', /* pad char */ + 0, /* escape_with_backslash_is_dangerous */ + &my_charset_utf16_handler, + &my_collation_utf16_uca_handler +}; + +CHARSET_INFO my_charset_utf16_latvian_uca_ci= +{ + 103,0,0, /* number */ + MY_CS_UTF16_UCA_FLAGS,/* state */ + "utf16", /* cs name */ + "utf16_latvian_ci", /* name */ + "", /* comment */ + latvian, /* tailoring */ + NULL, /* ctype */ + NULL, /* to_lower */ + NULL, /* to_upper */ + NULL, /* sort_order */ + NULL, /* contractions */ + NULL, /* sort_order_big*/ + NULL, /* tab_to_uni */ + NULL, /* tab_from_uni */ + my_unicase_default, /* caseinfo */ + NULL, /* state_map */ + NULL, /* ident_map */ + 8, /* strxfrm_multiply */ + 1, /* caseup_multiply */ + 1, /* casedn_multiply */ + 2, /* mbminlen */ + 4, /* mbmaxlen */ + 9, /* min_sort_char */ + 0xFFFF, /* max_sort_char */ + ' ', /* pad char */ + 0, /* escape_with_backslash_is_dangerous */ + &my_charset_utf16_handler, + &my_collation_utf16_uca_handler +}; + +CHARSET_INFO my_charset_utf16_romanian_uca_ci= +{ + 104,0,0, /* number */ + MY_CS_UTF16_UCA_FLAGS,/* state */ + "utf16", /* cs name */ + "utf16_romanian_ci", /* name */ + "", /* comment */ + romanian, /* tailoring */ + NULL, /* ctype */ + NULL, /* to_lower */ + NULL, /* to_upper */ + NULL, /* sort_order */ + NULL, /* contractions */ + NULL, /* sort_order_big*/ + NULL, /* tab_to_uni */ + NULL, /* tab_from_uni */ + my_unicase_default, /* caseinfo */ + NULL, /* state_map */ + NULL, /* ident_map */ + 8, /* strxfrm_multiply */ + 1, /* caseup_multiply */ + 1, /* casedn_multiply */ + 2, /* mbminlen */ + 4, /* mbmaxlen */ + 9, /* min_sort_char */ + 0xFFFF, /* max_sort_char */ + ' ', /* pad char */ + 0, /* escape_with_backslash_is_dangerous */ + &my_charset_utf16_handler, + &my_collation_utf16_uca_handler +}; + +CHARSET_INFO my_charset_utf16_slovenian_uca_ci= +{ + 105,0,0, /* number */ + MY_CS_UTF16_UCA_FLAGS,/* state */ + "utf16", /* cs name */ + "utf16_slovenian_ci",/* name */ + "", /* comment */ + slovenian, /* tailoring */ + NULL, /* ctype */ + NULL, /* to_lower */ + NULL, /* to_upper */ + NULL, /* sort_order */ + NULL, /* contractions */ + NULL, /* sort_order_big*/ + NULL, /* tab_to_uni */ + NULL, /* tab_from_uni */ + my_unicase_default, /* caseinfo */ + NULL, /* state_map */ + NULL, /* ident_map */ + 8, /* strxfrm_multiply */ + 1, /* caseup_multiply */ + 1, /* casedn_multiply */ + 2, /* mbminlen */ + 4, /* mbmaxlen */ + 9, /* min_sort_char */ + 0xFFFF, /* max_sort_char */ + ' ', /* pad char */ + 0, /* escape_with_backslash_is_dangerous */ + &my_charset_utf16_handler, + &my_collation_utf16_uca_handler +}; + +CHARSET_INFO my_charset_utf16_polish_uca_ci= +{ + 106,0,0, /* number */ + MY_CS_UTF16_UCA_FLAGS,/* state */ + "utf16", /* cs name */ + "utf16_polish_ci", /* name */ + "", /* comment */ + polish, /* tailoring */ + NULL, /* ctype */ + NULL, /* to_lower */ + NULL, /* to_upper */ + NULL, /* sort_order */ + NULL, /* contractions */ + NULL, /* sort_order_big*/ + NULL, /* tab_to_uni */ + NULL, /* tab_from_uni */ + my_unicase_default, /* caseinfo */ + NULL, /* state_map */ + NULL, /* ident_map */ + 8, /* strxfrm_multiply */ + 1, /* caseup_multiply */ + 1, /* casedn_multiply */ + 2, /* mbminlen */ + 4, /* mbmaxlen */ + 9, /* min_sort_char */ + 0xFFFF, /* max_sort_char */ + ' ', /* pad char */ + 0, /* escape_with_backslash_is_dangerous */ + &my_charset_utf16_handler, + &my_collation_utf16_uca_handler +}; + +CHARSET_INFO my_charset_utf16_estonian_uca_ci= +{ + 107,0,0, /* number */ + MY_CS_UTF16_UCA_FLAGS,/* state */ + "utf16", /* cs name */ + "utf16_estonian_ci", /* name */ + "", /* comment */ + estonian, /* tailoring */ + NULL, /* ctype */ + NULL, /* to_lower */ + NULL, /* to_upper */ + NULL, /* sort_order */ + NULL, /* contractions */ + NULL, /* sort_order_big*/ + NULL, /* tab_to_uni */ + NULL, /* tab_from_uni */ + my_unicase_default, /* caseinfo */ + NULL, /* state_map */ + NULL, /* ident_map */ + 8, /* strxfrm_multiply */ + 1, /* caseup_multiply */ + 1, /* casedn_multiply */ + 2, /* mbminlen */ + 4, /* mbmaxlen */ + 9, /* min_sort_char */ + 0xFFFF, /* max_sort_char */ + ' ', /* pad char */ + 0, /* escape_with_backslash_is_dangerous */ + &my_charset_utf16_handler, + &my_collation_utf16_uca_handler +}; + +CHARSET_INFO my_charset_utf16_spanish_uca_ci= +{ + 108,0,0, /* number */ + MY_CS_UTF16_UCA_FLAGS,/* state */ + "utf16", /* cs name */ + "utf16_spanish_ci", /* name */ + "", /* comment */ + spanish, /* tailoring */ + NULL, /* ctype */ + NULL, /* to_lower */ + NULL, /* to_upper */ + NULL, /* sort_order */ + NULL, /* contractions */ + NULL, /* sort_order_big*/ + NULL, /* tab_to_uni */ + NULL, /* tab_from_uni */ + my_unicase_default, /* caseinfo */ + NULL, /* state_map */ + NULL, /* ident_map */ + 8, /* strxfrm_multiply */ + 1, /* caseup_multiply */ + 1, /* casedn_multiply */ + 2, /* mbminlen */ + 4, /* mbmaxlen */ + 9, /* min_sort_char */ + 0xFFFF, /* max_sort_char */ + ' ', /* pad char */ + 0, /* escape_with_backslash_is_dangerous */ + &my_charset_utf16_handler, + &my_collation_utf16_uca_handler +}; + +CHARSET_INFO my_charset_utf16_swedish_uca_ci= +{ + 109,0,0, /* number */ + MY_CS_UTF16_UCA_FLAGS,/* state */ + "utf16", /* cs name */ + "utf16_swedish_ci", /* name */ + "", /* comment */ + swedish, /* tailoring */ + NULL, /* ctype */ + NULL, /* to_lower */ + NULL, /* to_upper */ + NULL, /* sort_order */ + NULL, /* contractions */ + NULL, /* sort_order_big*/ + NULL, /* tab_to_uni */ + NULL, /* tab_from_uni */ + my_unicase_default, /* caseinfo */ + NULL, /* state_map */ + NULL, /* ident_map */ + 8, /* strxfrm_multiply */ + 1, /* caseup_multiply */ + 1, /* casedn_multiply */ + 2, /* mbminlen */ + 4, /* mbmaxlen */ + 9, /* min_sort_char */ + 0xFFFF, /* max_sort_char */ + ' ', /* pad char */ + 0, /* escape_with_backslash_is_dangerous */ + &my_charset_utf16_handler, + &my_collation_utf16_uca_handler +}; + +CHARSET_INFO my_charset_utf16_turkish_uca_ci= +{ + 110,0,0, /* number */ + MY_CS_UTF16_UCA_FLAGS,/* state */ + "utf16", /* cs name */ + "utf16_turkish_ci", /* name */ + "", /* comment */ + turkish, /* tailoring */ + NULL, /* ctype */ + NULL, /* to_lower */ + NULL, /* to_upper */ + NULL, /* sort_order */ + NULL, /* contractions */ + NULL, /* sort_order_big*/ + NULL, /* tab_to_uni */ + NULL, /* tab_from_uni */ + my_unicase_turkish, /* caseinfo */ + NULL, /* state_map */ + NULL, /* ident_map */ + 8, /* strxfrm_multiply */ + 1, /* caseup_multiply */ + 1, /* casedn_multiply */ + 2, /* mbminlen */ + 4, /* mbmaxlen */ + 9, /* min_sort_char */ + 0xFFFF, /* max_sort_char */ + ' ', /* pad char */ + 0, /* escape_with_backslash_is_dangerous */ + &my_charset_utf16_handler, + &my_collation_utf16_uca_handler +}; + +CHARSET_INFO my_charset_utf16_czech_uca_ci= +{ + 111,0,0, /* number */ + MY_CS_UTF16_UCA_FLAGS,/* state */ + "utf16", /* cs name */ + "utf16_czech_ci", /* name */ + "", /* comment */ + czech, /* tailoring */ + NULL, /* ctype */ + NULL, /* to_lower */ + NULL, /* to_upper */ + NULL, /* sort_order */ + NULL, /* contractions */ + NULL, /* sort_order_big*/ + NULL, /* tab_to_uni */ + NULL, /* tab_from_uni */ + my_unicase_default, /* caseinfo */ + NULL, /* state_map */ + NULL, /* ident_map */ + 8, /* strxfrm_multiply */ + 1, /* caseup_multiply */ + 1, /* casedn_multiply */ + 2, /* mbminlen */ + 4, /* mbmaxlen */ + 9, /* min_sort_char */ + 0xFFFF, /* max_sort_char */ + ' ', /* pad char */ + 0, /* escape_with_backslash_is_dangerous */ + &my_charset_utf16_handler, + &my_collation_utf16_uca_handler +}; + + +CHARSET_INFO my_charset_utf16_danish_uca_ci= +{ + 112,0,0, /* number */ + MY_CS_UTF16_UCA_FLAGS,/* state */ + "utf16", /* cs name */ + "utf16_danish_ci", /* name */ + "", /* comment */ + danish, /* tailoring */ + NULL, /* ctype */ + NULL, /* to_lower */ + NULL, /* to_upper */ + NULL, /* sort_order */ + NULL, /* contractions */ + NULL, /* sort_order_big*/ + NULL, /* tab_to_uni */ + NULL, /* tab_from_uni */ + my_unicase_default, /* caseinfo */ + NULL, /* state_map */ + NULL, /* ident_map */ + 8, /* strxfrm_multiply */ + 1, /* caseup_multiply */ + 1, /* casedn_multiply */ + 2, /* mbminlen */ + 4, /* mbmaxlen */ + 9, /* min_sort_char */ + 0xFFFF, /* max_sort_char */ + ' ', /* pad char */ + 0, /* escape_with_backslash_is_dangerous */ + &my_charset_utf16_handler, + &my_collation_utf16_uca_handler +}; + +CHARSET_INFO my_charset_utf16_lithuanian_uca_ci= +{ + 113,0,0, /* number */ + MY_CS_UTF16_UCA_FLAGS,/* state */ + "utf16", /* cs name */ + "utf16_lithuanian_ci",/* name */ + "", /* comment */ + lithuanian, /* tailoring */ + NULL, /* ctype */ + NULL, /* to_lower */ + NULL, /* to_upper */ + NULL, /* sort_order */ + NULL, /* contractions */ + NULL, /* sort_order_big*/ + NULL, /* tab_to_uni */ + NULL, /* tab_from_uni */ + my_unicase_default, /* caseinfo */ + NULL, /* state_map */ + NULL, /* ident_map */ + 8, /* strxfrm_multiply */ + 1, /* caseup_multiply */ + 1, /* casedn_multiply */ + 2, /* mbminlen */ + 4, /* mbmaxlen */ + 9, /* min_sort_char */ + 0xFFFF, /* max_sort_char */ + ' ', /* pad char */ + 0, /* escape_with_backslash_is_dangerous */ + &my_charset_utf16_handler, + &my_collation_utf16_uca_handler +}; + +CHARSET_INFO my_charset_utf16_slovak_uca_ci= +{ + 114,0,0, /* number */ + MY_CS_UTF16_UCA_FLAGS,/* state */ + "utf16", /* cs name */ + "utf16_slovak_ci", /* name */ + "", /* comment */ + slovak, /* tailoring */ + NULL, /* ctype */ + NULL, /* to_lower */ + NULL, /* to_upper */ + NULL, /* sort_order */ + NULL, /* contractions */ + NULL, /* sort_order_big*/ + NULL, /* tab_to_uni */ + NULL, /* tab_from_uni */ + my_unicase_default, /* caseinfo */ + NULL, /* state_map */ + NULL, /* ident_map */ + 8, /* strxfrm_multiply */ + 1, /* caseup_multiply */ + 1, /* casedn_multiply */ + 2, /* mbminlen */ + 4, /* mbmaxlen */ + 9, /* min_sort_char */ + 0xFFFF, /* max_sort_char */ + ' ', /* pad char */ + 0, /* escape_with_backslash_is_dangerous */ + &my_charset_utf16_handler, + &my_collation_utf16_uca_handler +}; + +CHARSET_INFO my_charset_utf16_spanish2_uca_ci= +{ + 115,0,0, /* number */ + MY_CS_UTF16_UCA_FLAGS,/* state */ + "utf16", /* cs name */ + "utf16_spanish2_ci",/* name */ + "", /* comment */ + spanish2, /* tailoring */ + NULL, /* ctype */ + NULL, /* to_lower */ + NULL, /* to_upper */ + NULL, /* sort_order */ + NULL, /* contractions */ + NULL, /* sort_order_big*/ + NULL, /* tab_to_uni */ + NULL, /* tab_from_uni */ + my_unicase_default, /* caseinfo */ + NULL, /* state_map */ + NULL, /* ident_map */ + 8, /* strxfrm_multiply */ + 1, /* caseup_multiply */ + 1, /* casedn_multiply */ + 2, /* mbminlen */ + 4, /* mbmaxlen */ + 9, /* min_sort_char */ + 0xFFFF, /* max_sort_char */ + ' ', /* pad char */ + 0, /* escape_with_backslash_is_dangerous */ + &my_charset_utf16_handler, + &my_collation_utf16_uca_handler +}; + +CHARSET_INFO my_charset_utf16_roman_uca_ci= +{ + 116,0,0, /* number */ + MY_CS_UTF16_UCA_FLAGS,/* state */ + "utf16", /* cs name */ + "utf16_roman_ci", /* name */ + "", /* comment */ + roman, /* tailoring */ + NULL, /* ctype */ + NULL, /* to_lower */ + NULL, /* to_upper */ + NULL, /* sort_order */ + NULL, /* contractions */ + NULL, /* sort_order_big*/ + NULL, /* tab_to_uni */ + NULL, /* tab_from_uni */ + my_unicase_default, /* caseinfo */ + NULL, /* state_map */ + NULL, /* ident_map */ + 8, /* strxfrm_multiply */ + 1, /* caseup_multiply */ + 1, /* casedn_multiply */ + 2, /* mbminlen */ + 4, /* mbmaxlen */ + 9, /* min_sort_char */ + 0xFFFF, /* max_sort_char */ + ' ', /* pad char */ + 0, /* escape_with_backslash_is_dangerous */ + &my_charset_utf16_handler, + &my_collation_utf16_uca_handler +}; + +CHARSET_INFO my_charset_utf16_persian_uca_ci= +{ + 117,0,0, /* number */ + MY_CS_UTF16_UCA_FLAGS,/* state */ + "utf16", /* cs name */ + "utf16_persian_ci", /* name */ + "", /* comment */ + persian, /* tailoring */ + NULL, /* ctype */ + NULL, /* to_lower */ + NULL, /* to_upper */ + NULL, /* sort_order */ + NULL, /* contractions */ + NULL, /* sort_order_big*/ + NULL, /* tab_to_uni */ + NULL, /* tab_from_uni */ + my_unicase_default, /* caseinfo */ + NULL, /* state_map */ + NULL, /* ident_map */ + 8, /* strxfrm_multiply */ + 1, /* caseup_multiply */ + 1, /* casedn_multiply */ + 2, /* mbminlen */ + 4, /* mbmaxlen */ + 9, /* min_sort_char */ + 0xFFFF, /* max_sort_char */ + ' ', /* pad char */ + 0, /* escape_with_backslash_is_dangerous */ + &my_charset_utf16_handler, + &my_collation_utf16_uca_handler +}; + +CHARSET_INFO my_charset_utf16_esperanto_uca_ci= +{ + 118,0,0, /* number */ + MY_CS_UTF16_UCA_FLAGS,/* state */ + "utf16", /* cs name */ + "utf16_esperanto_ci",/* name */ + "", /* comment */ + esperanto, /* tailoring */ + NULL, /* ctype */ + NULL, /* to_lower */ + NULL, /* to_upper */ + NULL, /* sort_order */ + NULL, /* contractions */ + NULL, /* sort_order_big*/ + NULL, /* tab_to_uni */ + NULL, /* tab_from_uni */ + my_unicase_default, /* caseinfo */ + NULL, /* state_map */ + NULL, /* ident_map */ + 8, /* strxfrm_multiply */ + 1, /* caseup_multiply */ + 1, /* casedn_multiply */ + 2, /* mbminlen */ + 4, /* mbmaxlen */ + 9, /* min_sort_char */ + 0xFFFF, /* max_sort_char */ + ' ', /* pad char */ + 0, /* escape_with_backslash_is_dangerous */ + &my_charset_utf16_handler, + &my_collation_utf16_uca_handler +}; + +CHARSET_INFO my_charset_utf16_hungarian_uca_ci= +{ + 119,0,0, /* number */ + MY_CS_UTF16_UCA_FLAGS,/* state */ + "utf16", /* cs name */ + "utf16_hungarian_ci",/* name */ + "", /* comment */ + hungarian, /* tailoring */ + NULL, /* ctype */ + NULL, /* to_lower */ + NULL, /* to_upper */ + NULL, /* sort_order */ + NULL, /* contractions */ + NULL, /* sort_order_big*/ + NULL, /* tab_to_uni */ + NULL, /* tab_from_uni */ + my_unicase_default,/* caseinfo */ + NULL, /* state_map */ + NULL, /* ident_map */ + 8, /* strxfrm_multiply */ + 1, /* caseup_multiply */ + 1, /* casedn_multiply */ + 2, /* mbminlen */ + 4, /* mbmaxlen */ + 9, /* min_sort_char */ + 0xFFFF, /* max_sort_char */ + ' ', /* pad char */ + 0, /* escape_with_backslash_is_dangerous */ + &my_charset_utf16_handler, + &my_collation_utf16_uca_handler +}; + +CHARSET_INFO my_charset_utf16_sinhala_uca_ci= +{ + 120,0,0, /* number */ + MY_CS_UTF16_UCA_FLAGS,/* state */ + "utf16", /* cs name */ + "utf16_sinhala_ci",/* name */ + "", /* comment */ + sinhala, /* tailoring */ + NULL, /* ctype */ + NULL, /* to_lower */ + NULL, /* to_upper */ + NULL, /* sort_order */ + NULL, /* contractions */ + NULL, /* sort_order_big*/ + NULL, /* tab_to_uni */ + NULL, /* tab_from_uni */ + my_unicase_default,/* caseinfo */ + NULL, /* state_map */ + NULL, /* ident_map */ + 8, /* strxfrm_multiply */ + 1, /* caseup_multiply */ + 1, /* casedn_multiply */ + 2, /* mbminlen */ + 4, /* mbmaxlen */ + 9, /* min_sort_char */ + 0xFFFF, /* max_sort_char */ + ' ', /* pad char */ + 0, /* escape_with_backslash_is_dangerous */ + &my_charset_utf16_handler, + &my_collation_utf16_uca_handler +}; + +#endif /* HAVE_CHARSET_utf16 */ + + + #endif /* HAVE_UCA_COLLATIONS */ diff --git a/strings/ctype-ucs2.c b/strings/ctype-ucs2.c index 069131ba354..6de0ea8f7e8 100644 --- a/strings/ctype-ucs2.c +++ b/strings/ctype-ucs2.c @@ -25,479 +25,81 @@ #include <stdarg.h> -#ifdef HAVE_CHARSET_ucs2 - -#ifndef EILSEQ -#define EILSEQ ENOENT +#if defined(HAVE_CHARSET_utf16) || defined(HAVE_CHARSET_ucs2) +#define HAVE_CHARSET_mb2 #endif -static uchar ctype_ucs2[] = { - 0, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 40, 40, 40, 40, 40, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 72, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, - 132,132,132,132,132,132,132,132,132,132, 16, 16, 16, 16, 16, 16, - 16,129,129,129,129,129,129, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 16, 16, 16, 16, 16, - 16,130,130,130,130,130,130, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 16, 16, 16, 16, 32, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 -}; - -static uchar to_lower_ucs2[] = { - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, - 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, - 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, - 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, - 64, 97, 98, 99,100,101,102,103,104,105,106,107,108,109,110,111, - 112,113,114,115,116,117,118,119,120,121,122, 91, 92, 93, 94, 95, - 96, 97, 98, 99,100,101,102,103,104,105,106,107,108,109,110,111, - 112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127, - 128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143, - 144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159, - 160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175, - 176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191, - 192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207, - 208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223, - 224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239, - 240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255 -}; +#if defined(HAVE_CHARSET_mb2) || defined(HAVE_CHARSET_utf32) +#define HAVE_CHARSET_mb2_or_mb4 +#endif -static uchar to_upper_ucs2[] = { - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, - 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, - 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, - 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, - 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, - 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, - 96, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, - 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90,123,124,125,126,127, - 128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143, - 144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159, - 160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175, - 176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191, - 192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207, - 208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223, - 224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239, - 240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255 -}; +#ifndef EILSEQ +#define EILSEQ ENOENT +#endif -static int my_ucs2_uni(CHARSET_INFO *cs __attribute__((unused)), - my_wc_t * pwc, const uchar *s, const uchar *e) -{ - if (s+2 > e) /* Need 2 characters */ - return MY_CS_TOOSMALL2; - - *pwc= ((uchar)s[0]) * 256 + ((uchar)s[1]); - return 2; -} +#undef ULONGLONG_MAX +#define ULONGLONG_MAX (~(ulonglong) 0) +#define MAX_NEGATIVE_NUMBER ((ulonglong) LL(0x8000000000000000)) +#define INIT_CNT 9 +#define LFACTOR ULL(1000000000) +#define LFACTOR1 ULL(10000000000) +#define LFACTOR2 ULL(100000000000) -static int my_uni_ucs2(CHARSET_INFO *cs __attribute__((unused)) , - my_wc_t wc, uchar *r, uchar *e) -{ - if ( r+2 > e ) - return MY_CS_TOOSMALL2; - - r[0]= (uchar) (wc >> 8); - r[1]= (uchar) (wc & 0xFF); - return 2; -} +static unsigned long lfactor[9]= +{ 1L, 10L, 100L, 1000L, 10000L, 100000L, 1000000L, 10000000L, 100000000L }; +#define REPLACEMENT_CHAR 0xFFFD; -static size_t my_caseup_ucs2(CHARSET_INFO *cs, char *src, size_t srclen, - char *dst __attribute__((unused)), - size_t dstlen __attribute__((unused))) -{ - my_wc_t wc; - int res; - char *srcend= src + srclen; - MY_UNICASE_INFO **uni_plane= cs->caseinfo; - DBUG_ASSERT(src == dst && srclen == dstlen); - - while ((src < srcend) && - (res= my_ucs2_uni(cs, &wc, (uchar *)src, (uchar*) srcend)) > 0) - { - int plane= (wc>>8) & 0xFF; - wc= uni_plane[plane] ? uni_plane[plane][wc & 0xFF].toupper : wc; - if (res != my_uni_ucs2(cs, wc, (uchar*) src, (uchar*) srcend)) - break; - src+= res; - } - return srclen; -} -static void my_hash_sort_ucs2(CHARSET_INFO *cs, const uchar *s, size_t slen, - ulong *n1, ulong *n2) +#ifdef HAVE_CHARSET_mb2_or_mb4 +static inline int +my_bincmp(const uchar *s, const uchar *se, + const uchar *t, const uchar *te) { - my_wc_t wc; - int res; - const uchar *e=s+slen; - MY_UNICASE_INFO **uni_plane= cs->caseinfo; - - while (e > s+1 && e[-1] == ' ' && e[-2] == '\0') - e-= 2; - - while ((s < e) && (res=my_ucs2_uni(cs,&wc, (uchar *)s, (uchar*)e)) >0) - { - int plane = (wc>>8) & 0xFF; - wc = uni_plane[plane] ? uni_plane[plane][wc & 0xFF].sort : wc; - n1[0]^= (((n1[0] & 63)+n2[0])*(wc & 0xFF))+ (n1[0] << 8); - n2[0]+=3; - n1[0]^= (((n1[0] & 63)+n2[0])*(wc >> 8))+ (n1[0] << 8); - n2[0]+=3; - s+=res; - } + int slen= (int) (se - s), tlen= (int) (te - t); + int len= min(slen, tlen); + int cmp= memcmp(s, t, len); + return cmp ? cmp : slen - tlen; } -static size_t my_caseup_str_ucs2(CHARSET_INFO * cs __attribute__((unused)), - char * s __attribute__((unused))) +static size_t +my_caseup_str_mb2_or_mb4(CHARSET_INFO * cs __attribute__((unused)), + char * s __attribute__((unused))) { + DBUG_ASSERT(0); return 0; } -static size_t my_casedn_ucs2(CHARSET_INFO *cs, char *src, size_t srclen, - char *dst __attribute__((unused)), - size_t dstlen __attribute__((unused))) -{ - my_wc_t wc; - int res; - char *srcend= src + srclen; - MY_UNICASE_INFO **uni_plane= cs->caseinfo; - DBUG_ASSERT(src == dst && srclen == dstlen); - - while ((src < srcend) && - (res= my_ucs2_uni(cs, &wc, (uchar*) src, (uchar*) srcend)) > 0) - { - int plane= (wc>>8) & 0xFF; - wc= uni_plane[plane] ? uni_plane[plane][wc & 0xFF].tolower : wc; - if (res != my_uni_ucs2(cs, wc, (uchar*) src, (uchar*) srcend)) - break; - src+= res; - } - return srclen; -} - - -static size_t my_casedn_str_ucs2(CHARSET_INFO *cs __attribute__((unused)), - char * s __attribute__((unused))) +static size_t +my_casedn_str_mb2_or_mb4(CHARSET_INFO *cs __attribute__((unused)), + char * s __attribute__((unused))) { + DBUG_ASSERT(0); return 0; } -static int my_strnncoll_ucs2(CHARSET_INFO *cs, - const uchar *s, size_t slen, - const uchar *t, size_t tlen, - my_bool t_is_prefix) +static int +my_strcasecmp_mb2_or_mb4(CHARSET_INFO *cs __attribute__((unused)), + const char *s __attribute__((unused)), + const char *t __attribute__((unused))) { - int s_res,t_res; - my_wc_t UNINIT_VAR(s_wc),t_wc; - const uchar *se=s+slen; - const uchar *te=t+tlen; - MY_UNICASE_INFO **uni_plane= cs->caseinfo; - - while ( s < se && t < te ) - { - int plane; - s_res=my_ucs2_uni(cs,&s_wc, s, se); - t_res=my_ucs2_uni(cs,&t_wc, t, te); - - if ( s_res <= 0 || t_res <= 0 ) - { - /* Incorrect string, compare by char value */ - return ((int)s[0]-(int)t[0]); - } - - plane=(s_wc>>8) & 0xFF; - s_wc = uni_plane[plane] ? uni_plane[plane][s_wc & 0xFF].sort : s_wc; - plane=(t_wc>>8) & 0xFF; - t_wc = uni_plane[plane] ? uni_plane[plane][t_wc & 0xFF].sort : t_wc; - if ( s_wc != t_wc ) - { - return s_wc > t_wc ? 1 : -1; - } - - s+=s_res; - t+=t_res; - } - return (int) (t_is_prefix ? t-te : ((se-s) - (te-t))); -} - -/* - Compare strings, discarding end space - - SYNOPSIS - my_strnncollsp_ucs2() - cs character set handler - a First string to compare - a_length Length of 'a' - b Second string to compare - b_length Length of 'b' - - IMPLEMENTATION - If one string is shorter as the other, then we space extend the other - so that the strings have equal length. - - This will ensure that the following things hold: - - "a" == "a " - "a\0" < "a" - "a\0" < "a " - - RETURN - < 0 a < b - = 0 a == b - > 0 a > b -*/ - -static int my_strnncollsp_ucs2(CHARSET_INFO *cs __attribute__((unused)), - const uchar *s, size_t slen, - const uchar *t, size_t tlen, - my_bool diff_if_only_endspace_difference - __attribute__((unused))) -{ - const uchar *se, *te; - size_t minlen; - MY_UNICASE_INFO **uni_plane= cs->caseinfo; - - /* extra safety to make sure the lengths are even numbers */ - slen&= ~1; - tlen&= ~1; - - se= s + slen; - te= t + tlen; - - for (minlen= min(slen, tlen); minlen; minlen-= 2) - { - int s_wc = uni_plane[s[0]] ? (int) uni_plane[s[0]][s[1]].sort : - (((int) s[0]) << 8) + (int) s[1]; - - int t_wc = uni_plane[t[0]] ? (int) uni_plane[t[0]][t[1]].sort : - (((int) t[0]) << 8) + (int) t[1]; - if ( s_wc != t_wc ) - return s_wc > t_wc ? 1 : -1; - - s+= 2; - t+= 2; - } - - if (slen != tlen) - { - int swap= 1; - if (slen < tlen) - { - s= t; - se= te; - swap= -1; - } - - for ( ; s < se ; s+= 2) - { - if (s[0] || s[1] != ' ') - return (s[0] == 0 && s[1] < ' ') ? -swap : swap; - } - } + DBUG_ASSERT(0); return 0; } -static int my_strncasecmp_ucs2(CHARSET_INFO *cs, - const char *s, const char *t, size_t len) -{ - int s_res,t_res; - my_wc_t UNINIT_VAR(s_wc),t_wc; - const char *se=s+len; - const char *te=t+len; - MY_UNICASE_INFO **uni_plane= cs->caseinfo; - - while ( s < se && t < te ) - { - int plane; - - s_res=my_ucs2_uni(cs,&s_wc, (const uchar*)s, (const uchar*)se); - t_res=my_ucs2_uni(cs,&t_wc, (const uchar*)t, (const uchar*)te); - - if ( s_res <= 0 || t_res <= 0 ) - { - /* Incorrect string, compare by char value */ - return ((int)s[0]-(int)t[0]); - } - - plane=(s_wc>>8) & 0xFF; - s_wc = uni_plane[plane] ? uni_plane[plane][s_wc & 0xFF].tolower : s_wc; - - plane=(t_wc>>8) & 0xFF; - t_wc = uni_plane[plane] ? uni_plane[plane][t_wc & 0xFF].tolower : t_wc; - - if ( s_wc != t_wc ) - return ((int) s_wc) - ((int) t_wc); - - s+=s_res; - t+=t_res; - } - return (int) ( (se-s) - (te-t) ); -} - - -static int my_strcasecmp_ucs2(CHARSET_INFO *cs, const char *s, const char *t) -{ - size_t s_len= strlen(s); - size_t t_len= strlen(t); - size_t len = (s_len > t_len) ? s_len : t_len; - return my_strncasecmp_ucs2(cs, s, t, len); -} - - -static size_t my_strnxfrm_ucs2(CHARSET_INFO *cs, - uchar *dst, size_t dstlen, const uchar *src, - size_t srclen) -{ - my_wc_t wc; - int res; - int plane; - uchar *de = dst + dstlen; - const uchar *se = src + srclen; - MY_UNICASE_INFO **uni_plane= cs->caseinfo; - - while( src < se && dst < de ) - { - if ((res=my_ucs2_uni(cs,&wc, src, se))<0) - { - break; - } - src+=res; - srclen-=res; - - plane=(wc>>8) & 0xFF; - wc = uni_plane[plane] ? uni_plane[plane][wc & 0xFF].sort : wc; - - if ((res=my_uni_ucs2(cs,wc,dst,de)) <0) - { - break; - } - dst+=res; - } - if (dst < de) - cs->cset->fill(cs, (char*) dst, (size_t) (de - dst), ' '); - return dstlen; -} - - -static uint my_ismbchar_ucs2(CHARSET_INFO *cs __attribute__((unused)), - const char *b __attribute__((unused)), - const char *e __attribute__((unused))) -{ - return 2; -} - - -static uint my_mbcharlen_ucs2(CHARSET_INFO *cs __attribute__((unused)) , - uint c __attribute__((unused))) -{ - return 2; -} - - -static int my_vsnprintf_ucs2(char *dst, size_t n, const char* fmt, va_list ap) +static long +my_strntol_mb2_or_mb4(CHARSET_INFO *cs, + const char *nptr, size_t l, int base, + char **endptr, int *err) { - char *start=dst, *end=dst+n-1; - for (; *fmt ; fmt++) - { - if (fmt[0] != '%') - { - if (dst == end) /* End of buffer */ - break; - - *dst++='\0'; *dst++= *fmt; /* Copy ordinary char */ - continue; - } - - fmt++; - - /* Skip if max size is used (to be compatible with printf) */ - while ( (*fmt>='0' && *fmt<='9') || *fmt == '.' || *fmt == '-') - fmt++; - - if (*fmt == 'l') - fmt++; - - if (*fmt == 's') /* String parameter */ - { - reg2 char *par = va_arg(ap, char *); - size_t plen; - size_t left_len = (size_t)(end-dst); - if (!par) par = (char*)"(null)"; - plen= strlen(par); - if (left_len <= plen*2) - plen = left_len/2 - 1; - - for ( ; plen ; plen--, dst+=2, par++) - { - dst[0]='\0'; - dst[1]=par[0]; - } - continue; - } - else if (*fmt == 'd' || *fmt == 'u') /* Integer parameter */ - { - register int iarg; - char nbuf[16]; - char *pbuf=nbuf; - - if ((size_t) (end-dst) < 32) - break; - iarg = va_arg(ap, int); - if (*fmt == 'd') - int10_to_str((long) iarg, nbuf, -10); - else - int10_to_str((long) (uint) iarg,nbuf,10); - - for (; pbuf[0]; pbuf++) - { - *dst++='\0'; - *dst++=*pbuf; - } - continue; - } - - /* We come here on '%%', unknown code or too long parameter */ - if (dst == end) - break; - *dst++='\0'; - *dst++='%'; /* % used as % or unknown code */ - } - - DBUG_ASSERT(dst <= end); - *dst='\0'; /* End of errmessage */ - return (size_t) (dst - start); -} - -static size_t my_snprintf_ucs2(CHARSET_INFO *cs __attribute__((unused)), - char* to, size_t n, const char* fmt, ...) -{ - va_list args; - va_start(args,fmt); - return my_vsnprintf_ucs2(to, n, fmt, args); -} - - -long my_strntol_ucs2(CHARSET_INFO *cs, - const char *nptr, size_t l, int base, - char **endptr, int *err) -{ - int negative=0; + int negative= 0; int overflow; int cnv; my_wc_t wc; @@ -511,7 +113,7 @@ long my_strntol_ucs2(CHARSET_INFO *cs, *err= 0; do { - if ((cnv=cs->cset->mb_wc(cs,&wc,s,e))>0) + if ((cnv= cs->cset->mb_wc(cs, &wc, s, e))>0) { switch (wc) { @@ -524,12 +126,12 @@ long my_strntol_ucs2(CHARSET_INFO *cs, } else /* No more characters or bad multibyte sequence */ { - if (endptr !=NULL ) - *endptr = (char*)s; - err[0] = (cnv==MY_CS_ILSEQ) ? EILSEQ : EDOM; + if (endptr != NULL ) + *endptr= (char*) s; + err[0]= (cnv==MY_CS_ILSEQ) ? EILSEQ : EDOM; return 0; } - s+=cnv; + s+= cnv; } while (1); bs: @@ -539,39 +141,39 @@ bs: base = 10; #endif - overflow = 0; - res = 0; - save = s; - cutoff = ((uint32)~0L) / (uint32) base; - cutlim = (uint) (((uint32)~0L) % (uint32) base); + overflow= 0; + res= 0; + save= s; + cutoff= ((uint32)~0L) / (uint32) base; + cutlim= (uint) (((uint32)~0L) % (uint32) base); do { - if ((cnv=cs->cset->mb_wc(cs,&wc,s,e))>0) + if ((cnv= cs->cset->mb_wc(cs, &wc, s, e)) > 0) { - s+=cnv; - if ( wc>='0' && wc<='9') - wc -= '0'; - else if ( wc>='A' && wc<='Z') - wc = wc - 'A' + 10; - else if ( wc>='a' && wc<='z') - wc = wc - 'a' + 10; + s+= cnv; + if (wc >= '0' && wc <= '9') + wc-= '0'; + else if (wc >= 'A' && wc <= 'Z') + wc= wc - 'A' + 10; + else if (wc >= 'a' && wc <= 'z') + wc= wc - 'a' + 10; else break; if ((int)wc >= base) break; if (res > cutoff || (res == cutoff && wc > cutlim)) - overflow = 1; + overflow= 1; else { - res *= (uint32) base; - res += wc; + res*= (uint32) base; + res+= wc; } } - else if (cnv==MY_CS_ILSEQ) + else if (cnv == MY_CS_ILSEQ) { if (endptr !=NULL ) - *endptr = (char*)s; - err[0]=EILSEQ; + *endptr = (char*) s; + err[0]= EILSEQ; return 0; } else @@ -586,21 +188,21 @@ bs: if (s == save) { - err[0]=EDOM; + err[0]= EDOM; return 0L; } if (negative) { if (res > (uint32) INT_MIN32) - overflow = 1; + overflow= 1; } else if (res > INT_MAX32) - overflow = 1; + overflow= 1; if (overflow) { - err[0]=ERANGE; + err[0]= ERANGE; return negative ? INT_MIN32 : INT_MAX32; } @@ -608,11 +210,12 @@ bs: } -ulong my_strntoul_ucs2(CHARSET_INFO *cs, - const char *nptr, size_t l, int base, - char **endptr, int *err) +static ulong +my_strntoul_mb2_or_mb4(CHARSET_INFO *cs, + const char *nptr, size_t l, int base, + char **endptr, int *err) { - int negative=0; + int negative= 0; int overflow; int cnv; my_wc_t wc; @@ -620,13 +223,13 @@ ulong my_strntoul_ucs2(CHARSET_INFO *cs, register uint32 cutoff; register uint32 res; register const uchar *s= (const uchar*) nptr; - register const uchar *e= (const uchar*) nptr+l; + register const uchar *e= (const uchar*) nptr + l; const uchar *save; *err= 0; do { - if ((cnv=cs->cset->mb_wc(cs,&wc,s,e))>0) + if ((cnv= cs->cset->mb_wc(cs, &wc, s, e)) > 0) { switch (wc) { @@ -640,11 +243,11 @@ ulong my_strntoul_ucs2(CHARSET_INFO *cs, else /* No more characters or bad multibyte sequence */ { if (endptr !=NULL ) - *endptr = (char*)s; - err[0] = (cnv==MY_CS_ILSEQ) ? EILSEQ : EDOM; + *endptr= (char*)s; + err[0]= (cnv == MY_CS_ILSEQ) ? EILSEQ : EDOM; return 0; } - s+=cnv; + s+= cnv; } while (1); bs: @@ -654,40 +257,40 @@ bs: base = 10; #endif - overflow = 0; - res = 0; - save = s; - cutoff = ((uint32)~0L) / (uint32) base; - cutlim = (uint) (((uint32)~0L) % (uint32) base); + overflow= 0; + res= 0; + save= s; + cutoff= ((uint32)~0L) / (uint32) base; + cutlim= (uint) (((uint32)~0L) % (uint32) base); do { - if ((cnv=cs->cset->mb_wc(cs,&wc,s,e))>0) + if ((cnv= cs->cset->mb_wc(cs, &wc, s, e)) > 0) { - s+=cnv; - if ( wc>='0' && wc<='9') - wc -= '0'; - else if ( wc>='A' && wc<='Z') - wc = wc - 'A' + 10; - else if ( wc>='a' && wc<='z') - wc = wc - 'a' + 10; + s+= cnv; + if (wc >= '0' && wc <= '9') + wc-= '0'; + else if (wc >= 'A' && wc <= 'Z') + wc= wc - 'A' + 10; + else if (wc >= 'a' && wc <= 'z') + wc= wc - 'a' + 10; else break; - if ((int)wc >= base) + if ((int) wc >= base) break; if (res > cutoff || (res == cutoff && wc > cutlim)) overflow = 1; else { - res *= (uint32) base; - res += wc; + res*= (uint32) base; + res+= wc; } } - else if (cnv==MY_CS_ILSEQ) + else if (cnv == MY_CS_ILSEQ) { - if (endptr !=NULL ) - *endptr = (char*)s; - err[0]=EILSEQ; + if (endptr != NULL ) + *endptr= (char*)s; + err[0]= EILSEQ; return 0; } else @@ -698,17 +301,17 @@ bs: } while(1); if (endptr != NULL) - *endptr = (char *) s; + *endptr= (char *) s; if (s == save) { - err[0]=EDOM; + err[0]= EDOM; return 0L; } if (overflow) { - err[0]=(ERANGE); + err[0]= (ERANGE); return (~(uint32) 0); } @@ -716,10 +319,10 @@ bs: } - -longlong my_strntoll_ucs2(CHARSET_INFO *cs, - const char *nptr, size_t l, int base, - char **endptr, int *err) +static longlong +my_strntoll_mb2_or_mb4(CHARSET_INFO *cs, + const char *nptr, size_t l, int base, + char **endptr, int *err) { int negative=0; int overflow; @@ -832,13 +435,12 @@ bs: } - - -ulonglong my_strntoull_ucs2(CHARSET_INFO *cs, - const char *nptr, size_t l, int base, - char **endptr, int *err) +static ulonglong +my_strntoull_mb2_or_mb4(CHARSET_INFO *cs, + const char *nptr, size_t l, int base, + char **endptr, int *err) { - int negative=0; + int negative= 0; int overflow; int cnv; my_wc_t wc; @@ -846,13 +448,13 @@ ulonglong my_strntoull_ucs2(CHARSET_INFO *cs, register unsigned int cutlim; register ulonglong res; register const uchar *s= (const uchar*) nptr; - register const uchar *e= (const uchar*) nptr+l; + register const uchar *e= (const uchar*) nptr + l; const uchar *save; *err= 0; do { - if ((cnv=cs->cset->mb_wc(cs,&wc,s,e))>0) + if ((cnv= cs->cset->mb_wc(cs,&wc,s,e)) > 0) { switch (wc) { @@ -942,49 +544,51 @@ bs: } -double my_strntod_ucs2(CHARSET_INFO *cs __attribute__((unused)), - char *nptr, size_t length, - char **endptr, int *err) +static double +my_strntod_mb2_or_mb4(CHARSET_INFO *cs, + char *nptr, size_t length, + char **endptr, int *err) { char buf[256]; double res; - register char *b=buf; + register char *b= buf; register const uchar *s= (const uchar*) nptr; const uchar *end; my_wc_t wc; - int cnv; + int cnv; *err= 0; /* Cut too long strings */ if (length >= sizeof(buf)) - length= sizeof(buf)-1; - end= s+length; + length= sizeof(buf) - 1; + end= s + length; - while ((cnv=cs->cset->mb_wc(cs,&wc,s,end)) > 0) + while ((cnv= cs->cset->mb_wc(cs,&wc,s,end)) > 0) { - s+=cnv; + s+= cnv; if (wc > (int) (uchar) 'e' || !wc) - break; /* Can't be part of double */ + break; /* Can't be part of double */ *b++= (char) wc; } *endptr= b; res= my_strtod(buf, endptr, err); - *endptr= nptr + (size_t) (*endptr- buf); + *endptr= nptr + cs->mbminlen * (size_t) (*endptr - buf); return res; } -ulonglong my_strntoull10rnd_ucs2(CHARSET_INFO *cs __attribute__((unused)), - const char *nptr, size_t length, - int unsign_fl, - char **endptr, int *err) +static ulonglong +my_strntoull10rnd_mb2_or_mb4(CHARSET_INFO *cs, + const char *nptr, size_t length, + int unsign_fl, + char **endptr, int *err) { - char buf[256], *b= buf; + char buf[256], *b= buf; ulonglong res; const uchar *end, *s= (const uchar*) nptr; my_wc_t wc; - int cnv; + int cnv; /* Cut too long strings */ if (length >= sizeof(buf)) @@ -1000,7 +604,7 @@ ulonglong my_strntoull10rnd_ucs2(CHARSET_INFO *cs __attribute__((unused)), } res= my_strntoull10rnd_8bit(cs, buf, b - buf, unsign_fl, endptr, err); - *endptr= (char*) nptr + 2 * (size_t) (*endptr- buf); + *endptr= (char*) nptr + cs->mbminlen * (size_t) (*endptr - buf); return res; } @@ -1009,23 +613,24 @@ ulonglong my_strntoull10rnd_ucs2(CHARSET_INFO *cs __attribute__((unused)), This is a fast version optimized for the case of radix 10 / -10 */ -size_t my_l10tostr_ucs2(CHARSET_INFO *cs, - char *dst, size_t len, int radix, long int val) +static size_t +my_l10tostr_mb2_or_mb4(CHARSET_INFO *cs, + char *dst, size_t len, int radix, long int val) { char buffer[66]; register char *p, *db, *de; long int new_val; - int sl=0; + int sl= 0; unsigned long int uval = (unsigned long int) val; - p = &buffer[sizeof(buffer)-1]; - *p='\0'; + p= &buffer[sizeof(buffer) - 1]; + *p= '\0'; if (radix < 0) { if (val < 0) { - sl = 1; + sl= 1; /* Avoid integer overflow in (-val) for LONGLONG_MIN (BUG#31799). */ uval = (unsigned long int)0 - uval; } @@ -1033,57 +638,58 @@ size_t my_l10tostr_ucs2(CHARSET_INFO *cs, new_val = (long) (uval / 10); *--p = '0'+ (char) (uval - (unsigned long) new_val * 10); - val = new_val; + val= new_val; while (val != 0) { - new_val=val/10; - *--p = '0' + (char) (val-new_val*10); + new_val= val / 10; + *--p= '0' + (char) (val - new_val * 10); val= new_val; } if (sl) { - *--p='-'; + *--p= '-'; } - for ( db=dst, de=dst+len ; (dst<de) && *p ; p++) + for ( db= dst, de= dst + len ; (dst < de) && *p ; p++) { - int cnvres=cs->cset->wc_mb(cs,(my_wc_t)p[0],(uchar*) dst, (uchar*) de); - if (cnvres>0) - dst+=cnvres; + int cnvres= cs->cset->wc_mb(cs,(my_wc_t)p[0],(uchar*) dst, (uchar*) de); + if (cnvres > 0) + dst+= cnvres; else break; } - return (int) (dst-db); + return (int) (dst - db); } -size_t my_ll10tostr_ucs2(CHARSET_INFO *cs __attribute__((unused)), - char *dst, size_t len, int radix, longlong val) +static size_t +my_ll10tostr_mb2_or_mb4(CHARSET_INFO *cs, + char *dst, size_t len, int radix, longlong val) { char buffer[65]; register char *p, *db, *de; long long_val; - int sl=0; + int sl= 0; ulonglong uval= (ulonglong) val; if (radix < 0) { if (val < 0) { - sl=1; + sl= 1; /* Avoid integer overflow in (-val) for LONGLONG_MIN (BUG#31799). */ uval = (ulonglong)0 - uval; } } - p = &buffer[sizeof(buffer)-1]; + p= &buffer[sizeof(buffer)-1]; *p='\0'; if (uval == 0) { - *--p='0'; + *--p= '0'; goto cnv; } @@ -1091,7 +697,7 @@ size_t my_ll10tostr_ucs2(CHARSET_INFO *cs __attribute__((unused)), { ulonglong quo= uval/(uint) 10; uint rem= (uint) (uval- quo* (uint) 10); - *--p = '0' + rem; + *--p= '0' + rem; uval= quo; } @@ -1099,44 +705,34 @@ size_t my_ll10tostr_ucs2(CHARSET_INFO *cs __attribute__((unused)), while (long_val != 0) { long quo= long_val/10; - *--p = (char) ('0' + (long_val - quo*10)); + *--p= (char) ('0' + (long_val - quo*10)); long_val= quo; } cnv: if (sl) { - *--p='-'; + *--p= '-'; } - for ( db=dst, de=dst+len ; (dst<de) && *p ; p++) + for ( db= dst, de= dst + len ; (dst < de) && *p ; p++) { - int cnvres=cs->cset->wc_mb(cs, (my_wc_t) p[0], (uchar*) dst, (uchar*) de); - if (cnvres>0) - dst+=cnvres; + int cnvres= cs->cset->wc_mb(cs, (my_wc_t) p[0], (uchar*) dst, (uchar*) de); + if (cnvres > 0) + dst+= cnvres; else break; } - return (int) (dst-db); + return (int) (dst -db); } +#endif /* HAVE_CHARSET_mb2_or_mb4 */ -#undef ULONGLONG_MAX -#define ULONGLONG_MAX (~(ulonglong) 0) -#define MAX_NEGATIVE_NUMBER ((ulonglong) LL(0x8000000000000000)) -#define INIT_CNT 9 -#define LFACTOR ULL(1000000000) -#define LFACTOR1 ULL(10000000000) -#define LFACTOR2 ULL(100000000000) -static unsigned long lfactor[9]= -{ - 1L, 10L, 100L, 1000L, 10000L, 100000L, 1000000L, 10000000L, 100000000L -}; - - -longlong my_strtoll10_ucs2(CHARSET_INFO *cs __attribute__((unused)), - const char *nptr, char **endptr, int *error) +#ifdef HAVE_CHARSET_mb2 +static longlong +my_strtoll10_mb2(CHARSET_INFO *cs __attribute__((unused)), + const char *nptr, char **endptr, int *error) { const char *s, *end, *start, *n_end, *true_end; uchar c; @@ -1162,11 +758,11 @@ longlong my_strtoll10_ucs2(CHARSET_INFO *cs __attribute__((unused)), goto no_conv; } - /* Check for a sign. */ + /* Check for a sign. */ negative= 0; if (!s[0] && s[1] == '-') { - *error= -1; /* Mark as negative number */ + *error= -1; /* Mark as negative number */ negative= 1; s+= 2; if (s == end) @@ -1182,7 +778,7 @@ longlong my_strtoll10_ucs2(CHARSET_INFO *cs __attribute__((unused)), { s+= 2; if (s == end) - goto no_conv; + goto no_conv; } cutoff= ULONGLONG_MAX / LFACTOR2; cutoff2= ULONGLONG_MAX % LFACTOR2 / 100; @@ -1197,7 +793,7 @@ longlong my_strtoll10_ucs2(CHARSET_INFO *cs __attribute__((unused)), { s+= 2; if (s == end) - goto end_i; /* Return 0 */ + goto end_i; /* Return 0 */ } while (!s[0] && s[1] == '0'); n_end= s + 2 * INIT_CNT; @@ -1226,7 +822,7 @@ longlong my_strtoll10_ucs2(CHARSET_INFO *cs __attribute__((unused)), /* Handle next 9 digits and store them in j */ j= 0; - start= s; /* Used to know how much to shift i */ + start= s; /* Used to know how much to shift i */ n_end= true_end= s + 2 * INIT_CNT; if (n_end > end) n_end= end; @@ -1266,7 +862,7 @@ longlong my_strtoll10_ucs2(CHARSET_INFO *cs __attribute__((unused)), li=i*LFACTOR2+ (ulonglong) j*100 + k; return (longlong) li; -overflow: /* *endptr is set here */ +overflow: /* *endptr is set here */ *error= MY_ERRNO_ERANGE; return negative ? LONGLONG_MIN : (longlong) ULONGLONG_MAX; @@ -1303,6 +899,2242 @@ no_conv: } +static size_t +my_scan_mb2(CHARSET_INFO *cs __attribute__((unused)), + const char *str, const char *end, int sequence_type) +{ + const char *str0= str; + end--; /* for easier loop condition, because of two bytes per character */ + + switch (sequence_type) + { + case MY_SEQ_SPACES: + for ( ; str < end; str+= 2) + { + if (str[0] != '\0' || str[1] != ' ') + break; + } + return (size_t) (str - str0); + default: + return 0; + } +} + + +static void +my_fill_mb2(CHARSET_INFO *cs __attribute__((unused)), + char *s, size_t l, int fill) +{ + for ( ; l >= 2; s[0]= 0, s[1]= fill, s+= 2, l-= 2); +} + + +static int +my_vsnprintf_mb2(char *dst, size_t n, const char* fmt, va_list ap) +{ + char *start=dst, *end= dst + n - 1; + for (; *fmt ; fmt++) + { + if (fmt[0] != '%') + { + if (dst == end) /* End of buffer */ + break; + + *dst++='\0'; + *dst++= *fmt; /* Copy ordinary char */ + continue; + } + + fmt++; + + /* Skip if max size is used (to be compatible with printf) */ + while ( (*fmt >= '0' && *fmt <= '9') || *fmt == '.' || *fmt == '-') + fmt++; + + if (*fmt == 'l') + fmt++; + + if (*fmt == 's') /* String parameter */ + { + char *par= va_arg(ap, char *); + size_t plen; + size_t left_len= (size_t)(end-dst); + if (!par) + par= (char*) "(null)"; + plen= strlen(par); + if (left_len <= plen * 2) + plen = left_len / 2 - 1; + + for ( ; plen ; plen--, dst+=2, par++) + { + dst[0]= '\0'; + dst[1]= par[0]; + } + continue; + } + else if (*fmt == 'd' || *fmt == 'u') /* Integer parameter */ + { + int iarg; + char nbuf[16]; + char *pbuf= nbuf; + + if ((size_t) (end - dst) < 32) + break; + iarg= va_arg(ap, int); + if (*fmt == 'd') + int10_to_str((long) iarg, nbuf, -10); + else + int10_to_str((long) (uint) iarg, nbuf,10); + + for (; pbuf[0]; pbuf++) + { + *dst++= '\0'; + *dst++= *pbuf; + } + continue; + } + + /* We come here on '%%', unknown code or too long parameter */ + if (dst == end) + break; + *dst++= '\0'; + *dst++= '%'; /* % used as % or unknown code */ + } + + DBUG_ASSERT(dst <= end); + *dst='\0'; /* End of errmessage */ + return (size_t) (dst - start); +} + + +static size_t +my_snprintf_mb2(CHARSET_INFO *cs __attribute__((unused)), + char* to, size_t n, const char* fmt, ...) +{ + va_list args; + va_start(args,fmt); + return my_vsnprintf_mb2(to, n, fmt, args); +} + + +static size_t +my_lengthsp_mb2(CHARSET_INFO *cs __attribute__((unused)), + const char *ptr, size_t length) +{ + const char *end= ptr + length; + while (end > ptr + 1 && end[-1] == ' ' && end[-2] == '\0') + end-= 2; + return (size_t) (end - ptr); +} + +#endif /* HAVE_CHARSET_mb2*/ + + + + +#ifdef HAVE_CHARSET_utf16 + +/* + D800..DB7F - Non-provate surrogate high (896 pages) + DB80..DBFF - Private surrogate high (128 pages) + DC00..DFFF - Surrogate low (1024 codes in a page) +*/ + +#define MY_UTF16_HIGH_HEAD(x) ((((uchar) (x)) & 0xFC) == 0xD8) +#define MY_UTF16_LOW_HEAD(x) ((((uchar) (x)) & 0xFC) == 0xDC) +#define MY_UTF16_SURROGATE(x) (((x) & 0xF800) == 0xD800) + +static int +my_utf16_uni(CHARSET_INFO *cs __attribute__((unused)), + my_wc_t *pwc, const uchar *s, const uchar *e) +{ + if (s + 2 > e) + return MY_CS_TOOSMALL2; + + /* + High bytes: 0xD[89AB] = B'110110??' + Low bytes: 0xD[CDEF] = B'110111??' + Surrogate mask: 0xFC = B'11111100' + */ + + if (MY_UTF16_HIGH_HEAD(*s)) /* Surrogate head */ + { + if (s + 4 > e) + return MY_CS_TOOSMALL4; + + if (!MY_UTF16_LOW_HEAD(s[2])) /* Broken surrigate pair */ + return MY_CS_ILSEQ; + + /* + s[0]= 110110?? (<< 18) + s[1]= ???????? (<< 10) + s[2]= 110111?? (<< 8) + s[3]= ???????? (<< 0) + */ + + *pwc= ((s[0] & 3) << 18) + (s[1] << 10) + + ((s[2] & 3) << 8) + s[3] + 0x10000; + + return 4; + } + + if (MY_UTF16_LOW_HEAD(*s)) /* Low surrogate part without high part */ + return MY_CS_ILSEQ; + + *pwc= (s[0] << 8) + s[1]; + return 2; +} + + +static int +my_uni_utf16(CHARSET_INFO *cs __attribute__((unused)), + my_wc_t wc, uchar *s, uchar *e) +{ + if (wc <= 0xFFFF) + { + if (s + 2 > e) + return MY_CS_TOOSMALL2; + if (MY_UTF16_SURROGATE(wc)) + return MY_CS_ILUNI; + *s++= (uchar) (wc >> 8); + *s= (uchar) (wc & 0xFF); + return 2; + } + + if (wc <= 0x10FFFF) + { + if (s + 4 > e) + return MY_CS_TOOSMALL4; + *s++= (uchar) ((wc-= 0x10000) >> 18) | 0xD8; + *s++= (uchar) (wc >> 10) & 0xFF; + *s++= (uchar) ((wc >> 8) & 3) | 0xDC; + *s= (uchar) wc & 0xFF; + return 4; + } + + return MY_CS_ILUNI; +} + + +static inline void +my_tolower_utf16(MY_UNICASE_INFO **uni_plane, my_wc_t *wc) +{ + int page= *wc >> 8; + if (page < 256 && uni_plane[page]) + *wc= uni_plane[page][*wc & 0xFF].tolower; +} + + +static inline void +my_toupper_utf16(MY_UNICASE_INFO **uni_plane, my_wc_t *wc) +{ + int page= *wc >> 8; + if (page < 256 && uni_plane[page]) + *wc= uni_plane[page][*wc & 0xFF].toupper; +} + + +static inline void +my_tosort_utf16(MY_UNICASE_INFO **uni_plane, my_wc_t *wc) +{ + int page= *wc >> 8; + if (page < 256) + { + if (uni_plane[page]) + *wc= uni_plane[page][*wc & 0xFF].sort; + } + else + { + *wc= REPLACEMENT_CHAR; + } +} + + +static size_t +my_caseup_utf16(CHARSET_INFO *cs, char *src, size_t srclen, + char *dst __attribute__((unused)), + size_t dstlen __attribute__((unused))) +{ + my_wc_t wc; + int res; + char *srcend= src + srclen; + MY_UNICASE_INFO **uni_plane= cs->caseinfo; + DBUG_ASSERT(src == dst && srclen == dstlen); + + while ((src < srcend) && + (res= my_utf16_uni(cs, &wc, (uchar *)src, (uchar*) srcend)) > 0) + { + my_toupper_utf16(uni_plane, &wc); + if (res != my_uni_utf16(cs, wc, (uchar*) src, (uchar*) srcend)) + break; + src+= res; + } + return srclen; +} + + +static void +my_hash_sort_utf16(CHARSET_INFO *cs, const uchar *s, size_t slen, + ulong *n1, ulong *n2) +{ + my_wc_t wc; + int res; + const uchar *e= s+slen; + MY_UNICASE_INFO **uni_plane= cs->caseinfo; + + while (e > s + 1 && e[-1] == ' ' && e[-2] == '\0') + e-= 2; + + while ((s < e) && (res= my_utf16_uni(cs, &wc, (uchar *)s, (uchar*)e)) > 0) + { + my_tosort_utf16(uni_plane, &wc); + n1[0]^= (((n1[0] & 63) + n2[0]) * (wc & 0xFF)) + (n1[0] << 8); + n2[0]+= 3; + n1[0]^= (((n1[0] & 63) + n2[0]) * (wc >> 8)) + (n1[0] << 8); + n2[0]+= 3; + s+= res; + } +} + + +static size_t +my_casedn_utf16(CHARSET_INFO *cs, char *src, size_t srclen, + char *dst __attribute__((unused)), + size_t dstlen __attribute__((unused))) +{ + my_wc_t wc; + int res; + char *srcend= src + srclen; + MY_UNICASE_INFO **uni_plane= cs->caseinfo; + DBUG_ASSERT(src == dst && srclen == dstlen); + + while ((src < srcend) && + (res= my_utf16_uni(cs, &wc, (uchar*) src, (uchar*) srcend)) > 0) + { + my_tolower_utf16(uni_plane, &wc); + if (res != my_uni_utf16(cs, wc, (uchar*) src, (uchar*) srcend)) + break; + src+= res; + } + return srclen; +} + + +static int +my_strnncoll_utf16(CHARSET_INFO *cs, + const uchar *s, size_t slen, + const uchar *t, size_t tlen, + my_bool t_is_prefix) +{ + int s_res, t_res; + my_wc_t s_wc,t_wc; + const uchar *se= s + slen; + const uchar *te= t + tlen; + MY_UNICASE_INFO **uni_plane= cs->caseinfo; + LINT_INIT(s_wc); + LINT_INIT(t_wc); + + while (s < se && t < te) + { + s_res= my_utf16_uni(cs, &s_wc, s, se); + t_res= my_utf16_uni(cs, &t_wc, t, te); + + if (s_res <= 0 || t_res <= 0) + { + /* Incorrect string, compare by char value */ + return my_bincmp(s, se, t, te); + } + + my_tosort_utf16(uni_plane, &s_wc); + my_tosort_utf16(uni_plane, &t_wc); + + if (s_wc != t_wc) + { + return s_wc > t_wc ? 1 : -1; + } + + s+= s_res; + t+= t_res; + } + return (int) (t_is_prefix ? (t - te) : ((se - s) - (te - t))); +} + + +/** + Compare strings, discarding end space + + If one string is shorter as the other, then we space extend the other + so that the strings have equal length. + + This will ensure that the following things hold: + + "a" == "a " + "a\0" < "a" + "a\0" < "a " + + @param cs Character set pinter. + @param a First string to compare. + @param a_length Length of 'a'. + @param b Second string to compare. + @param b_length Length of 'b'. + + IMPLEMENTATION + + @return Comparison result. + @retval Negative number, if a less than b. + @retval 0, if a is equal to b + @retval Positive number, if a > b +*/ + +static int +my_strnncollsp_utf16(CHARSET_INFO *cs, + const uchar *s, size_t slen, + const uchar *t, size_t tlen, + my_bool diff_if_only_endspace_difference) +{ + int res; + my_wc_t s_wc, t_wc; + const uchar *se= s + slen, *te= t + tlen; + MY_UNICASE_INFO **uni_plane= cs->caseinfo; + LINT_INIT(s_wc); + LINT_INIT(t_wc); + + DBUG_ASSERT((slen % 2) == 0); + DBUG_ASSERT((tlen % 2) == 0); + +#ifndef VARCHAR_WITH_DIFF_ENDSPACE_ARE_DIFFERENT_FOR_UNIQUE + diff_if_only_endspace_difference= FALSE; +#endif + + while (s < se && t < te) + { + int s_res= my_utf16_uni(cs, &s_wc, s, se); + int t_res= my_utf16_uni(cs, &t_wc, t, te); + + if (s_res <= 0 || t_res <= 0) + { + /* Incorrect string, compare bytewise */ + return my_bincmp(s, se, t, te); + } + + my_tosort_utf16(uni_plane, &s_wc); + my_tosort_utf16(uni_plane, &t_wc); + + if (s_wc != t_wc) + { + return s_wc > t_wc ? 1 : -1; + } + + s+= s_res; + t+= t_res; + } + + slen= (size_t) (se - s); + tlen= (size_t) (te - t); + res= 0; + + if (slen != tlen) + { + int s_res, swap= 1; + if (diff_if_only_endspace_difference) + res= 1; /* Assume 's' is bigger */ + if (slen < tlen) + { + slen= tlen; + s= t; + se= te; + swap= -1; + res= -res; + } + + for ( ; s < se; s+= s_res) + { + if ((s_res= my_utf16_uni(cs, &s_wc, s, se)) < 0) + { + DBUG_ASSERT(0); + return 0; + } + if (s_wc != ' ') + return (s_wc < ' ') ? -swap : swap; + } + } + return res; +} + + +static uint +my_ismbchar_utf16(CHARSET_INFO *cs __attribute__((unused)), + const char *b __attribute__((unused)), + const char *e __attribute__((unused))) +{ + if (b + 2 > e) + return 0; + + if (MY_UTF16_HIGH_HEAD(*b)) + { + return (b + 4 <= e) && MY_UTF16_LOW_HEAD(b[2]) ? 4 : 0; + } + + if (MY_UTF16_LOW_HEAD(*b)) + return 0; + + return 2; +} + + +static uint +my_mbcharlen_utf16(CHARSET_INFO *cs __attribute__((unused)), + uint c __attribute__((unused))) +{ + return MY_UTF16_HIGH_HEAD(c) ? 4 : 2; +} + + +static size_t +my_numchars_utf16(CHARSET_INFO *cs, + const char *b, const char *e) +{ + size_t nchars= 0; + for ( ; ; nchars++) + { + size_t charlen= my_ismbchar_utf16(cs, b, e); + if (!charlen) + break; + b+= charlen; + } + return nchars; +} + + +static size_t +my_charpos_utf16(CHARSET_INFO *cs, + const char *b, const char *e, size_t pos) +{ + const char *b0= b; + uint charlen; + + for ( ; pos; b+= charlen, pos--) + { + if (!(charlen= my_ismbchar(cs, b, e))) + return (e + 2 - b0); /* Error, return pos outside the string */ + } + return (size_t) (pos ? (e + 2 - b0) : (b - b0)); +} + + +static size_t +my_well_formed_len_utf16(CHARSET_INFO *cs, + const char *b, const char *e, + size_t nchars, int *error) +{ + const char *b0= b; + uint charlen; + *error= 0; + + for ( ; nchars; b+= charlen, nchars--) + { + if (!(charlen= my_ismbchar(cs, b, e))) + { + *error= b < e ? 1 : 0; + break; + } + } + return (size_t) (b - b0); +} + + +static int +my_wildcmp_utf16_ci(CHARSET_INFO *cs, + const char *str,const char *str_end, + const char *wildstr,const char *wildend, + int escape, int w_one, int w_many) +{ + MY_UNICASE_INFO **uni_plane= cs->caseinfo; + return my_wildcmp_unicode(cs, str, str_end, wildstr, wildend, + escape, w_one, w_many, uni_plane); +} + + +static int +my_wildcmp_utf16_bin(CHARSET_INFO *cs, + const char *str,const char *str_end, + const char *wildstr,const char *wildend, + int escape, int w_one, int w_many) +{ + return my_wildcmp_unicode(cs, str, str_end, wildstr, wildend, + escape, w_one, w_many, NULL); +} + + +static int +my_strnncoll_utf16_bin(CHARSET_INFO *cs, + const uchar *s, size_t slen, + const uchar *t, size_t tlen, + my_bool t_is_prefix) +{ + int s_res,t_res; + my_wc_t s_wc,t_wc; + const uchar *se=s+slen; + const uchar *te=t+tlen; + LINT_INIT(s_wc); + LINT_INIT(t_wc); + + while ( s < se && t < te ) + { + s_res= my_utf16_uni(cs,&s_wc, s, se); + t_res= my_utf16_uni(cs,&t_wc, t, te); + + if (s_res <= 0 || t_res <= 0) + { + /* Incorrect string, compare by char value */ + return my_bincmp(s, se, t, te); + } + if (s_wc != t_wc) + { + return s_wc > t_wc ? 1 : -1; + } + + s+= s_res; + t+= t_res; + } + return (int) (t_is_prefix ? (t - te) : ((se - s) - (te - t))); +} + + +static int +my_strnncollsp_utf16_bin(CHARSET_INFO *cs, + const uchar *s, size_t slen, + const uchar *t, size_t tlen, + my_bool diff_if_only_endspace_difference) +{ + int res; + my_wc_t s_wc, t_wc; + const uchar *se= s + slen, *te= t + tlen; + LINT_INIT(s_wc); + LINT_INIT(t_wc); + + DBUG_ASSERT((slen % 2) == 0); + DBUG_ASSERT((tlen % 2) == 0); + +#ifndef VARCHAR_WITH_DIFF_ENDSPACE_ARE_DIFFERENT_FOR_UNIQUE + diff_if_only_endspace_difference= FALSE; +#endif + + while (s < se && t < te) + { + int s_res= my_utf16_uni(cs, &s_wc, s, se); + int t_res= my_utf16_uni(cs, &t_wc, t, te); + + if (s_res <= 0 || t_res <= 0) + { + /* Incorrect string, compare bytewise */ + return my_bincmp(s, se, t, te); + } + + if (s_wc != t_wc) + { + return s_wc > t_wc ? 1 : -1; + } + + s+= s_res; + t+= t_res; + } + + slen= (size_t) (se - s); + tlen= (size_t) (te - t); + res= 0; + + if (slen != tlen) + { + int s_res, swap= 1; + if (diff_if_only_endspace_difference) + res= 1; /* Assume 's' is bigger */ + if (slen < tlen) + { + slen= tlen; + s= t; + se= te; + swap= -1; + res= -res; + } + + for ( ; s < se; s+= s_res) + { + if ((s_res= my_utf16_uni(cs, &s_wc, s, se)) < 0) + { + DBUG_ASSERT(0); + return 0; + } + if (s_wc != ' ') + return (s_wc < ' ') ? -swap : swap; + } + } + return res; +} + + +static void +my_hash_sort_utf16_bin(CHARSET_INFO *cs __attribute__((unused)), + const uchar *key, size_t len,ulong *nr1, ulong *nr2) +{ + const uchar *pos = key; + + key+= len; + + while (key > pos + 1 && key[-1] == ' ' && key[-2] == '\0') + key-= 2; + + for (; pos < (uchar*) key ; pos++) + { + nr1[0]^= (ulong) ((((uint) nr1[0] & 63) + nr2[0]) * + ((uint)*pos)) + (nr1[0] << 8); + nr2[0]+= 3; + } +} + + +/** + Calculate min_str and max_str that ranges a LIKE string. + + @param ptr Pointer to LIKE pattern. + @param ptr_length Length of LIKE pattern. + @param escape Escape character in LIKE. (Normally '\'). + All escape characters should be removed + from min_str and max_str. + @param res_length Length of min_str and max_str. + @param min_str Smallest case sensitive string that ranges LIKE. + Should be space padded to res_length. + @param max_str Largest case sensitive string that ranges LIKE. + Normally padded with the biggest character sort value. + + @return Optimization status. + @retval FALSE if LIKE pattern can be optimized + @rerval TRUE if LIKE can't be optimized. +*/ + +my_bool +my_like_range_utf16(CHARSET_INFO *cs, + const char *ptr, size_t ptr_length, + pbool escape, pbool w_one, pbool w_many, + size_t res_length, + char *min_str,char *max_str, + size_t *min_length,size_t *max_length) +{ + const char *end=ptr+ptr_length; + char *min_org=min_str; + char *min_end=min_str+res_length; + size_t charlen= res_length / cs->mbmaxlen; + + for ( ; ptr + 1 < end && min_str + 1 < min_end && charlen > 0 + ; ptr+=2, charlen--) + { + if (ptr[0] == '\0' && ptr[1] == escape && ptr + 1 < end) + { + ptr+=2; /* Skip escape */ + *min_str++= *max_str++ = ptr[0]; + *min_str++= *max_str++ = ptr[1]; + continue; + } + if (ptr[0] == '\0' && ptr[1] == w_one) /* '_' in SQL */ + { + *min_str++= (char) (cs->min_sort_char >> 8); + *min_str++= (char) (cs->min_sort_char & 255); + *max_str++= (char) (cs->max_sort_char >> 8); + *max_str++= (char) (cs->max_sort_char & 255); + continue; + } + if (ptr[0] == '\0' && ptr[1] == w_many) /* '%' in SQL */ + { + /* + Calculate length of keys: + 'a\0\0... is the smallest possible string when we have space expand + a\ff\ff... is the biggest possible string + */ + *min_length= ((cs->state & MY_CS_BINSORT) ? (size_t) (min_str - min_org) : + res_length); + *max_length= res_length; + do { + *min_str++ = 0; + *min_str++ = 0; + *max_str++ = (char) (cs->max_sort_char >> 8); + *max_str++ = (char) (cs->max_sort_char & 255); + } while (min_str + 1 < min_end); + return FALSE; + } + *min_str++= *max_str++ = ptr[0]; + *min_str++= *max_str++ = ptr[1]; + } + + /* Temporary fix for handling w_one at end of string (key compression) */ + { + char *tmp; + for (tmp= min_str ; tmp-1 > min_org && tmp[-1] == '\0' && tmp[-2]=='\0';) + { + *--tmp=' '; + *--tmp='\0'; + } + } + + *min_length= *max_length = (size_t) (min_str - min_org); + while (min_str + 1 < min_end) + { + *min_str++ = *max_str++ = '\0'; + *min_str++ = *max_str++ = ' '; /* Because if key compression */ + } + return FALSE; +} + + +static MY_COLLATION_HANDLER my_collation_utf16_general_ci_handler = +{ + NULL, /* init */ + my_strnncoll_utf16, + my_strnncollsp_utf16, + my_strnxfrm_unicode, + my_strnxfrmlen_simple, + my_like_range_utf16, + my_wildcmp_utf16_ci, + my_strcasecmp_mb2_or_mb4, + my_instr_mb, + my_hash_sort_utf16, + my_propagate_simple +}; + + +static MY_COLLATION_HANDLER my_collation_utf16_bin_handler = +{ + NULL, /* init */ + my_strnncoll_utf16_bin, + my_strnncollsp_utf16_bin, + my_strnxfrm_unicode, + my_strnxfrmlen_simple, + my_like_range_utf16, + my_wildcmp_utf16_bin, + my_strcasecmp_mb2_or_mb4, + my_instr_mb, + my_hash_sort_utf16_bin, + my_propagate_simple +}; + + +MY_CHARSET_HANDLER my_charset_utf16_handler= +{ + NULL, /* init */ + my_ismbchar_utf16, /* ismbchar */ + my_mbcharlen_utf16, /* mbcharlen */ + my_numchars_utf16, + my_charpos_utf16, + my_well_formed_len_utf16, + my_lengthsp_mb2, + my_numcells_mb, + my_utf16_uni, /* mb_wc */ + my_uni_utf16, /* wc_mb */ + my_mb_ctype_mb, + my_caseup_str_mb2_or_mb4, + my_casedn_str_mb2_or_mb4, + my_caseup_utf16, + my_casedn_utf16, + my_snprintf_mb2, + my_l10tostr_mb2_or_mb4, + my_ll10tostr_mb2_or_mb4, + my_fill_mb2, + my_strntol_mb2_or_mb4, + my_strntoul_mb2_or_mb4, + my_strntoll_mb2_or_mb4, + my_strntoull_mb2_or_mb4, + my_strntod_mb2_or_mb4, + my_strtoll10_mb2, + my_strntoull10rnd_mb2_or_mb4, + my_scan_mb2 +}; + + +CHARSET_INFO my_charset_utf16_general_ci= +{ + 54,0,0, /* number */ + MY_CS_COMPILED|MY_CS_PRIMARY|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII, + "utf16", /* cs name */ + "utf16_general_ci", /* name */ + "UTF-16 Unicode", /* comment */ + NULL, /* tailoring */ + NULL, /* ctype */ + NULL, /* to_lower */ + NULL, /* to_upper */ + NULL, /* sort_order */ + NULL, /* contractions */ + NULL, /* sort_order_big*/ + NULL, /* tab_to_uni */ + NULL, /* tab_from_uni */ + my_unicase_default, /* caseinfo */ + NULL, /* state_map */ + NULL, /* ident_map */ + 1, /* strxfrm_multiply */ + 1, /* caseup_multiply */ + 1, /* casedn_multiply */ + 2, /* mbminlen */ + 4, /* mbmaxlen */ + 0, /* min_sort_char */ + 0xFFFF, /* max_sort_char */ + ' ', /* pad char */ + 0, /* escape_with_backslash_is_dangerous */ + &my_charset_utf16_handler, + &my_collation_utf16_general_ci_handler +}; + + +CHARSET_INFO my_charset_utf16_bin= +{ + 55,0,0, /* number */ + MY_CS_COMPILED|MY_CS_BINSORT|MY_CS_UNICODE|MY_CS_NONASCII, + "utf16", /* cs name */ + "utf16_bin", /* name */ + "UTF-16 Unicode", /* comment */ + NULL, /* tailoring */ + NULL, /* ctype */ + NULL, /* to_lower */ + NULL, /* to_upper */ + NULL, /* sort_order */ + NULL, /* contractions */ + NULL, /* sort_order_big*/ + NULL, /* tab_to_uni */ + NULL, /* tab_from_uni */ + my_unicase_default, /* caseinfo */ + NULL, /* state_map */ + NULL, /* ident_map */ + 1, /* strxfrm_multiply */ + 1, /* caseup_multiply */ + 1, /* casedn_multiply */ + 2, /* mbminlen */ + 4, /* mbmaxlen */ + 0, /* min_sort_char */ + 0xFFFF, /* max_sort_char */ + ' ', /* pad char */ + 0, /* escape_with_backslash_is_dangerous */ + &my_charset_utf16_handler, + &my_collation_utf16_bin_handler +}; + +#endif /* HAVE_CHARSET_utf16 */ + + +#ifdef HAVE_CHARSET_utf32 + +static int +my_utf32_uni(CHARSET_INFO *cs __attribute__((unused)), + my_wc_t *pwc, const uchar *s, const uchar *e) +{ + if (s + 4 > e) + return MY_CS_TOOSMALL4; + *pwc= (s[0] << 24) + (s[1] << 16) + (s[2] << 8) + (s[3]); + return 4; +} + + +static int +my_uni_utf32(CHARSET_INFO *cs __attribute__((unused)), + my_wc_t wc, uchar *s, uchar *e) +{ + if (s + 4 > e) + return MY_CS_TOOSMALL4; + + s[0]= (uchar) (wc >> 24); + s[1]= (uchar) (wc >> 16) & 0xFF; + s[2]= (uchar) (wc >> 8) & 0xFF; + s[3]= (uchar) wc & 0xFF; + return 4; +} + + +static inline void +my_tolower_utf32(MY_UNICASE_INFO **uni_plane, my_wc_t *wc) +{ + int page= *wc >> 8; + if (page < 256 && uni_plane[page]) + *wc= uni_plane[page][*wc & 0xFF].tolower; +} + + +static inline void +my_toupper_utf32(MY_UNICASE_INFO **uni_plane, my_wc_t *wc) +{ + int page= *wc >> 8; + if (page < 256 && uni_plane[page]) + *wc= uni_plane[page][*wc & 0xFF].toupper; +} + + +static inline void +my_tosort_utf32(MY_UNICASE_INFO **uni_plane, my_wc_t *wc) +{ + int page= *wc >> 8; + if (page < 256) + { + if (uni_plane[page]) + *wc= uni_plane[page][*wc & 0xFF].sort; + } + else + { + *wc= REPLACEMENT_CHAR; + } +} + + +static size_t +my_caseup_utf32(CHARSET_INFO *cs, char *src, size_t srclen, + char *dst __attribute__((unused)), + size_t dstlen __attribute__((unused))) +{ + my_wc_t wc; + int res; + char *srcend= src + srclen; + MY_UNICASE_INFO **uni_plane= cs->caseinfo; + DBUG_ASSERT(src == dst && srclen == dstlen); + + while ((src < srcend) && + (res= my_utf32_uni(cs, &wc, (uchar *)src, (uchar*) srcend)) > 0) + { + my_toupper_utf32(uni_plane, &wc); + if (res != my_uni_utf32(cs, wc, (uchar*) src, (uchar*) srcend)) + break; + src+= res; + } + return srclen; +} + + +static inline void +my_hash_add(ulong *n1, ulong *n2, uint ch) +{ + n1[0]^= (((n1[0] & 63) + n2[0]) * (ch)) + (n1[0] << 8); + n2[0]+= 3; +} + + +static void +my_hash_sort_utf32(CHARSET_INFO *cs, const uchar *s, size_t slen, + ulong *n1, ulong *n2) +{ + my_wc_t wc; + int res; + const uchar *e= s + slen; + MY_UNICASE_INFO **uni_plane= cs->caseinfo; + + /* Skip trailing spaces */ + while (e > s + 3 && e[-1] == ' ' && !e[-2] && !e[-3] && !e[-4]) + e-= 4; + + while ((res= my_utf32_uni(cs, &wc, (uchar*) s, (uchar*) e)) > 0) + { + my_tosort_utf32(uni_plane, &wc); + my_hash_add(n1, n2, (uint) (wc >> 24)); + my_hash_add(n1, n2, (uint) (wc >> 16) & 0xFF); + my_hash_add(n1, n2, (uint) (wc >> 8) & 0xFF); + my_hash_add(n1, n2, (uint) (wc & 0xFF)); + s+= res; + } +} + + +static size_t +my_casedn_utf32(CHARSET_INFO *cs, char *src, size_t srclen, + char *dst __attribute__((unused)), + size_t dstlen __attribute__((unused))) +{ + my_wc_t wc; + int res; + char *srcend= src + srclen; + MY_UNICASE_INFO **uni_plane= cs->caseinfo; + DBUG_ASSERT(src == dst && srclen == dstlen); + + while ((res= my_utf32_uni(cs, &wc, (uchar*) src, (uchar*) srcend)) > 0) + { + my_tolower_utf32(uni_plane,&wc); + if (res != my_uni_utf32(cs, wc, (uchar*) src, (uchar*) srcend)) + break; + src+= res; + } + return srclen; +} + + +static int +my_strnncoll_utf32(CHARSET_INFO *cs, + const uchar *s, size_t slen, + const uchar *t, size_t tlen, + my_bool t_is_prefix) +{ + my_wc_t s_wc,t_wc; + const uchar *se= s + slen; + const uchar *te= t + tlen; + MY_UNICASE_INFO **uni_plane= cs->caseinfo; + LINT_INIT(s_wc); + LINT_INIT(t_wc); + + while (s < se && t < te) + { + int s_res= my_utf32_uni(cs, &s_wc, s, se); + int t_res= my_utf32_uni(cs, &t_wc, t, te); + + if ( s_res <= 0 || t_res <= 0) + { + /* Incorrect string, compare by char value */ + return my_bincmp(s, se, t, te); + } + + my_tosort_utf32(uni_plane, &s_wc); + my_tosort_utf32(uni_plane, &t_wc); + + if (s_wc != t_wc) + { + return s_wc > t_wc ? 1 : -1; + } + + s+= s_res; + t+= t_res; + } + return (int) (t_is_prefix ? (t - te) : ((se - s) - (te - t))); +} + + +/** + Compare strings, discarding end space + + If one string is shorter as the other, then we space extend the other + so that the strings have equal length. + + This will ensure that the following things hold: + + "a" == "a " + "a\0" < "a" + "a\0" < "a " + + @param cs Character set pinter. + @param a First string to compare. + @param a_length Length of 'a'. + @param b Second string to compare. + @param b_length Length of 'b'. + + IMPLEMENTATION + + @return Comparison result. + @retval Negative number, if a less than b. + @retval 0, if a is equal to b + @retval Positive number, if a > b +*/ + + +static int +my_strnncollsp_utf32(CHARSET_INFO *cs, + const uchar *s, size_t slen, + const uchar *t, size_t tlen, + my_bool diff_if_only_endspace_difference) +{ + int res; + my_wc_t s_wc, t_wc; + const uchar *se= s + slen, *te= t + tlen; + MY_UNICASE_INFO **uni_plane= cs->caseinfo; + LINT_INIT(s_wc); + LINT_INIT(t_wc); + + DBUG_ASSERT((slen % 4) == 0); + DBUG_ASSERT((tlen % 4) == 0); + +#ifndef VARCHAR_WITH_DIFF_ENDSPACE_ARE_DIFFERENT_FOR_UNIQUE + diff_if_only_endspace_difference= FALSE; +#endif + + while ( s < se && t < te ) + { + int s_res= my_utf32_uni(cs, &s_wc, s, se); + int t_res= my_utf32_uni(cs, &t_wc, t, te); + + if ( s_res <= 0 || t_res <= 0 ) + { + /* Incorrect string, compare bytewise */ + return my_bincmp(s, se, t, te); + } + + my_tosort_utf32(uni_plane, &s_wc); + my_tosort_utf32(uni_plane, &t_wc); + + if ( s_wc != t_wc ) + { + return s_wc > t_wc ? 1 : -1; + } + + s+= s_res; + t+= t_res; + } + + slen= (size_t) (se - s); + tlen= (size_t) (te - t); + res= 0; + + if (slen != tlen) + { + int s_res, swap= 1; + if (diff_if_only_endspace_difference) + res= 1; /* Assume 's' is bigger */ + if (slen < tlen) + { + slen= tlen; + s= t; + se= te; + swap= -1; + res= -res; + } + + for ( ; s < se; s+= s_res) + { + if ((s_res= my_utf32_uni(cs, &s_wc, s, se)) < 0) + { + DBUG_ASSERT(0); + return 0; + } + if (s_wc != ' ') + return (s_wc < ' ') ? -swap : swap; + } + } + return res; +} + + +static size_t +my_strnxfrmlen_utf32(CHARSET_INFO *cs __attribute__((unused)), size_t len) +{ + return len / 2; +} + + +static uint +my_ismbchar_utf32(CHARSET_INFO *cs __attribute__((unused)), + const char *b __attribute__((unused)), + const char *e __attribute__((unused))) +{ + return 4; +} + + +static uint +my_mbcharlen_utf32(CHARSET_INFO *cs __attribute__((unused)) , + uint c __attribute__((unused))) +{ + return 4; +} + + +static int +my_vsnprintf_utf32(char *dst, size_t n, const char* fmt, va_list ap) +{ + char *start= dst, *end= dst + n; + DBUG_ASSERT((n % 4) == 0); + for (; *fmt ; fmt++) + { + if (fmt[0] != '%') + { + if (dst >= end) /* End of buffer */ + break; + + *dst++= '\0'; + *dst++= '\0'; + *dst++= '\0'; + *dst++= *fmt; /* Copy ordinary char */ + continue; + } + + fmt++; + + /* Skip if max size is used (to be compatible with printf) */ + while ( (*fmt>='0' && *fmt<='9') || *fmt == '.' || *fmt == '-') + fmt++; + + if (*fmt == 'l') + fmt++; + + if (*fmt == 's') /* String parameter */ + { + reg2 char *par= va_arg(ap, char *); + size_t plen; + size_t left_len= (size_t)(end - dst); + if (!par) par= (char*)"(null)"; + plen= strlen(par); + if (left_len <= plen*4) + plen= left_len / 4 - 1; + + for ( ; plen ; plen--, dst+= 4, par++) + { + dst[0]= '\0'; + dst[1]= '\0'; + dst[2]= '\0'; + dst[3]= par[0]; + } + continue; + } + else if (*fmt == 'd' || *fmt == 'u') /* Integer parameter */ + { + register int iarg; + char nbuf[16]; + char *pbuf= nbuf; + + if ((size_t) (end - dst) < 64) + break; + iarg= va_arg(ap, int); + if (*fmt == 'd') + int10_to_str((long) iarg, nbuf, -10); + else + int10_to_str((long) (uint) iarg,nbuf,10); + + for (; pbuf[0]; pbuf++) + { + *dst++= '\0'; + *dst++= '\0'; + *dst++= '\0'; + *dst++= *pbuf; + } + continue; + } + + /* We come here on '%%', unknown code or too long parameter */ + if (dst == end) + break; + *dst++= '\0'; + *dst++= '\0'; + *dst++= '\0'; + *dst++= '%'; /* % used as % or unknown code */ + } + + DBUG_ASSERT(dst < end); + *dst++= '\0'; + *dst++= '\0'; + *dst++= '\0'; + *dst++= '\0'; /* End of errmessage */ + return (size_t) (dst - start - 4); +} + + +static size_t +my_snprintf_utf32(CHARSET_INFO *cs __attribute__((unused)), + char* to, size_t n, const char* fmt, ...) +{ + va_list args; + va_start(args,fmt); + return my_vsnprintf_utf32(to, n, fmt, args); +} + + +static longlong +my_strtoll10_utf32(CHARSET_INFO *cs __attribute__((unused)), + const char *nptr, char **endptr, int *error) +{ + const char *s, *end, *start, *n_end, *true_end; + uchar c; + unsigned long i, j, k; + ulonglong li; + int negative; + ulong cutoff, cutoff2, cutoff3; + + s= nptr; + /* If fixed length string */ + if (endptr) + { + /* Make sure string length is even */ + end= s + ((*endptr - s) / 4) * 4; + while (s < end && !s[0] && !s[1] && !s[2] && + (s[3] == ' ' || s[3] == '\t')) + s+= 4; + if (s == end) + goto no_conv; + } + else + { + /* We don't support null terminated strings in UCS2 */ + goto no_conv; + } + + /* Check for a sign. */ + negative= 0; + if (!s[0] && !s[1] && !s[2] && s[3] == '-') + { + *error= -1; /* Mark as negative number */ + negative= 1; + s+= 4; + if (s == end) + goto no_conv; + cutoff= MAX_NEGATIVE_NUMBER / LFACTOR2; + cutoff2= (MAX_NEGATIVE_NUMBER % LFACTOR2) / 100; + cutoff3= MAX_NEGATIVE_NUMBER % 100; + } + else + { + *error= 0; + if (!s[0] && !s[1] && !s[2] && s[3] == '+') + { + s+= 4; + if (s == end) + goto no_conv; + } + cutoff= ULONGLONG_MAX / LFACTOR2; + cutoff2= ULONGLONG_MAX % LFACTOR2 / 100; + cutoff3= ULONGLONG_MAX % 100; + } + + /* Handle case where we have a lot of pre-zero */ + if (!s[0] && !s[1] && !s[2] && s[3] == '0') + { + i= 0; + do + { + s+= 4; + if (s == end) + goto end_i; /* Return 0 */ + } + while (!s[0] && !s[1] && !s[2] && s[3] == '0'); + n_end= s + 4 * INIT_CNT; + } + else + { + /* Read first digit to check that it's a valid number */ + if (s[0] || s[1] || s[2] || (c= (s[3]-'0')) > 9) + goto no_conv; + i= c; + s+= 4; + n_end= s + 4 * (INIT_CNT-1); + } + + /* Handle first 9 digits and store them in i */ + if (n_end > end) + n_end= end; + for (; s != n_end ; s+= 4) + { + if (s[0] || s[1] || s[2] || (c= (s[3] - '0')) > 9) + goto end_i; + i= i * 10 + c; + } + if (s == end) + goto end_i; + + /* Handle next 9 digits and store them in j */ + j= 0; + start= s; /* Used to know how much to shift i */ + n_end= true_end= s + 4 * INIT_CNT; + if (n_end > end) + n_end= end; + do + { + if (s[0] || s[1] || s[2] || (c= (s[3] - '0')) > 9) + goto end_i_and_j; + j= j * 10 + c; + s+= 4; + } while (s != n_end); + if (s == end) + { + if (s != true_end) + goto end_i_and_j; + goto end3; + } + if (s[0] || s[1] || s[2] || (c= (s[3] - '0')) > 9) + goto end3; + + /* Handle the next 1 or 2 digits and store them in k */ + k=c; + s+= 4; + if (s == end || s[0] || s[1] || s[2] || (c= (s[3]-'0')) > 9) + goto end4; + k= k * 10 + c; + s+= 2; + *endptr= (char*) s; + + /* number string should have ended here */ + if (s != end && !s[0] && !s[1] && !s[2] && (c= (s[3] - '0')) <= 9) + goto overflow; + + /* Check that we didn't get an overflow with the last digit */ + if (i > cutoff || (i == cutoff && ((j > cutoff2 || j == cutoff2) && + k > cutoff3))) + goto overflow; + li= i * LFACTOR2+ (ulonglong) j * 100 + k; + return (longlong) li; + +overflow: /* *endptr is set here */ + *error= MY_ERRNO_ERANGE; + return negative ? LONGLONG_MIN : (longlong) ULONGLONG_MAX; + +end_i: + *endptr= (char*) s; + return (negative ? ((longlong) -(long) i) : (longlong) i); + +end_i_and_j: + li= (ulonglong) i * lfactor[(size_t) (s-start) / 4] + j; + *endptr= (char*) s; + return (negative ? -((longlong) li) : (longlong) li); + +end3: + li= (ulonglong) i*LFACTOR+ (ulonglong) j; + *endptr= (char*) s; + return (negative ? -((longlong) li) : (longlong) li); + +end4: + li= (ulonglong) i*LFACTOR1+ (ulonglong) j * 10 + k; + *endptr= (char*) s; + if (negative) + { + if (li > MAX_NEGATIVE_NUMBER) + goto overflow; + return -((longlong) li); + } + return (longlong) li; + +no_conv: + /* There was no number to convert. */ + *error= MY_ERRNO_EDOM; + *endptr= (char *) nptr; + return 0; +} + + +static size_t +my_numchars_utf32(CHARSET_INFO *cs __attribute__((unused)), + const char *b, const char *e) +{ + return (size_t) (e - b) / 4; +} + + +static size_t +my_charpos_utf32(CHARSET_INFO *cs __attribute__((unused)), + const char *b, const char *e, size_t pos) +{ + size_t string_length= (size_t) (e - b); + return pos * 4 > string_length ? string_length + 4 : pos * 4; +} + + +static size_t +my_well_formed_len_utf32(CHARSET_INFO *cs __attribute__((unused)), + const char *b, const char *e, + size_t nchars, int *error) +{ + /* Ensure string length is divisible by 4 */ + const char *b0= b; + size_t length= e - b; + DBUG_ASSERT((length % 4) == 0); + *error= 0; + nchars*= 4; + if (length > nchars) + { + length= nchars; + e= b + nchars; + } + for (; b < e; b+= 4) + { + /* Don't accept characters greater than U+10FFFF */ + if (b[0] || (uchar) b[1] > 0x10) + { + *error= 1; + return b - b0; + } + } + return length; +} + + +static +void my_fill_utf32(CHARSET_INFO *cs, + char *s, size_t slen, int fill) +{ + char buf[10]; + uint buflen; + char *e= s + slen; + + DBUG_ASSERT((slen % 4) == 0); + + buflen= cs->cset->wc_mb(cs, (my_wc_t) fill, (uchar*) buf, + (uchar*) buf + sizeof(buf)); + DBUG_ASSERT(buflen == 4); + while (s < e) + { + memcpy(s, buf, 4); + s+= 4; + } +} + + +static size_t +my_lengthsp_utf32(CHARSET_INFO *cs __attribute__((unused)), + const char *ptr, size_t length) +{ + const char *end= ptr + length; + DBUG_ASSERT((length % 4) == 0); + while (end > ptr + 3 && end[-1] == ' ' && !end[-2] && !end[-3] && !end[-4]) + end-= 4; + return (size_t) (end - ptr); +} + + +static int +my_wildcmp_utf32_ci(CHARSET_INFO *cs, + const char *str, const char *str_end, + const char *wildstr, const char *wildend, + int escape, int w_one, int w_many) +{ + MY_UNICASE_INFO **uni_plane= cs->caseinfo; + return my_wildcmp_unicode(cs, str, str_end, wildstr, wildend, + escape, w_one, w_many, uni_plane); +} + + +static int +my_wildcmp_utf32_bin(CHARSET_INFO *cs, + const char *str,const char *str_end, + const char *wildstr,const char *wildend, + int escape, int w_one, int w_many) +{ + return my_wildcmp_unicode(cs, str, str_end, wildstr, wildend, + escape, w_one, w_many, NULL); +} + + +static int +my_strnncoll_utf32_bin(CHARSET_INFO *cs, + const uchar *s, size_t slen, + const uchar *t, size_t tlen, + my_bool t_is_prefix) +{ + my_wc_t s_wc, t_wc; + const uchar *se= s + slen; + const uchar *te= t + tlen; + LINT_INIT(s_wc); + LINT_INIT(t_wc); + + while (s < se && t < te) + { + int s_res= my_utf32_uni(cs, &s_wc, s, se); + int t_res= my_utf32_uni(cs, &t_wc, t, te); + + if (s_res <= 0 || t_res <= 0) + { + /* Incorrect string, compare by char value */ + return my_bincmp(s, se, t, te); + } + if (s_wc != t_wc) + { + return s_wc > t_wc ? 1 : -1; + } + + s+= s_res; + t+= t_res; + } + return (int) (t_is_prefix ? (t-te) : ((se - s) - (te - t))); +} + + +static inline my_wc_t +my_utf32_get(const uchar *s) +{ + return + ((my_wc_t) s[0] << 24) + + ((my_wc_t) s[1] << 16) + + ((my_wc_t) s[2] << 8) + + s[3]; +} + + +static int +my_strnncollsp_utf32_bin(CHARSET_INFO *cs __attribute__((unused)), + const uchar *s, size_t slen, + const uchar *t, size_t tlen, + my_bool diff_if_only_endspace_difference + __attribute__((unused))) +{ + const uchar *se, *te; + size_t minlen; + + DBUG_ASSERT((slen % 4) == 0); + DBUG_ASSERT((tlen % 4) == 0); + + se= s + slen; + te= t + tlen; + + for (minlen= min(slen, tlen); minlen; minlen-= 4) + { + my_wc_t s_wc= my_utf32_get(s); + my_wc_t t_wc= my_utf32_get(t); + if (s_wc != t_wc) + return s_wc > t_wc ? 1 : -1; + + s+= 4; + t+= 4; + } + + if (slen != tlen) + { + int swap= 1; + if (slen < tlen) + { + s= t; + se= te; + swap= -1; + } + + for ( ; s < se ; s+= 4) + { + my_wc_t s_wc= my_utf32_get(s); + if (s_wc != ' ') + return (s_wc < ' ') ? -swap : swap; + } + } + return 0; +} + + +/** + Calculate min_str and max_str that ranges a LIKE string. + + @param ptr Pointer to LIKE pattern. + @param ptr_length Length of LIKE pattern. + @param escape Escape character in LIKE. (Normally '\'). + All escape characters should be removed + from min_str and max_str. + @param res_length Length of min_str and max_str. + @param min_str Smallest case sensitive string that ranges LIKE. + Should be space padded to res_length. + @param max_str Largest case sensitive string that ranges LIKE. + Normally padded with the biggest character sort value. + + @return Optimization status. + @retval FALSE if LIKE pattern can be optimized + @rerval TRUE if LIKE can't be optimized. +*/ + +my_bool +my_like_range_utf32(CHARSET_INFO *cs, + const char *ptr, size_t ptr_length, + pbool escape, pbool w_one, pbool w_many, + size_t res_length, + char *min_str,char *max_str, + size_t *min_length,size_t *max_length) +{ + const char *end= ptr + ptr_length; + char *min_org= min_str; + char *min_end= min_str + res_length; + char *max_end= max_str + res_length; + size_t charlen= res_length / cs->mbmaxlen; + + DBUG_ASSERT((res_length % 4) == 0); + + for ( ; charlen > 0; ptr+= 4, charlen--) + { + my_wc_t wc; + int res; + if ((res= my_utf32_uni(cs, &wc, ptr, end)) < 0) + { + my_fill_utf32(cs, min_str, min_end - min_str, cs->min_sort_char); + my_fill_utf32(cs, max_str, min_end - min_str, cs->max_sort_char); + /* min_length and max_legnth are not important */ + return TRUE; + } + + if (wc == (my_wc_t) escape) + { + ptr+= 4; /* Skip escape */ + if ((res= my_utf32_uni(cs, &wc, ptr, end)) < 0) + { + my_fill_utf32(cs, min_str, min_end - min_str, cs->min_sort_char); + my_fill_utf32(cs, max_str, max_end - min_str, cs->max_sort_char); + /* min_length and max_length are not important */ + return TRUE; + } + if (my_uni_utf32(cs, wc, min_str, min_end) != 4 || + my_uni_utf32(cs, wc, max_str, max_end) != 4) + goto pad_set_lengths; + *min_str++= 4; + *max_str++= 4; + continue; + } + + if (wc == (my_wc_t) w_one) + { + if (my_uni_utf32(cs, cs->min_sort_char, min_str, min_end) != 4 || + my_uni_utf32(cs, cs->max_sort_char, max_str, max_end) != 4) + goto pad_set_lengths; + min_str+= 4; + max_str+= 4; + continue; + } + + if (wc == (my_wc_t) w_many) + { + /* + Calculate length of keys: + 'a\0\0... is the smallest possible string when we have space expand + a\ff\ff... is the biggest possible string + */ + *min_length= ((cs->state & MY_CS_BINSORT) ? + (size_t) (min_str - min_org) : + res_length); + *max_length= res_length; + goto pad_min_max; + } + + /* Normal character */ + if (my_uni_utf32(cs, wc, min_str, min_end) != 4 || + my_uni_utf32(cs, wc, max_str, max_end) != 4) + goto pad_set_lengths; + min_str+= 4; + max_str+= 4; + } + +pad_set_lengths: + *min_length= *max_length= (size_t) (min_str - min_org); + +pad_min_max: + my_fill_utf32(cs, min_str, min_end - min_str, cs->min_sort_char); + my_fill_utf32(cs, max_str, max_end - max_str, cs->max_sort_char); + return FALSE; +} + + +static size_t +my_scan_utf32(CHARSET_INFO *cs, + const char *str, const char *end, int sequence_type) +{ + const char *str0= str; + + switch (sequence_type) + { + case MY_SEQ_SPACES: + for ( ; str < end; ) + { + my_wc_t wc; + int res= my_utf32_uni(cs, &wc, str, end); + if (res < 0 || wc != ' ') + break; + str+= res; + } + return (size_t) (str - str0); + default: + return 0; + } +} + + +static MY_COLLATION_HANDLER my_collation_utf32_general_ci_handler = +{ + NULL, /* init */ + my_strnncoll_utf32, + my_strnncollsp_utf32, + my_strnxfrm_unicode, + my_strnxfrmlen_utf32, + my_like_range_utf32, + my_wildcmp_utf32_ci, + my_strcasecmp_mb2_or_mb4, + my_instr_mb, + my_hash_sort_utf32, + my_propagate_simple +}; + + +static MY_COLLATION_HANDLER my_collation_utf32_bin_handler = +{ + NULL, /* init */ + my_strnncoll_utf32_bin, + my_strnncollsp_utf32_bin, + my_strnxfrm_unicode, + my_strnxfrmlen_utf32, + my_like_range_utf32, + my_wildcmp_utf32_bin, + my_strcasecmp_mb2_or_mb4, + my_instr_mb, + my_hash_sort_utf32, + my_propagate_simple +}; + + +MY_CHARSET_HANDLER my_charset_utf32_handler= +{ + NULL, /* init */ + my_ismbchar_utf32, + my_mbcharlen_utf32, + my_numchars_utf32, + my_charpos_utf32, + my_well_formed_len_utf32, + my_lengthsp_utf32, + my_numcells_mb, + my_utf32_uni, + my_uni_utf32, + my_mb_ctype_mb, + my_caseup_str_mb2_or_mb4, + my_casedn_str_mb2_or_mb4, + my_caseup_utf32, + my_casedn_utf32, + my_snprintf_utf32, + my_l10tostr_mb2_or_mb4, + my_ll10tostr_mb2_or_mb4, + my_fill_utf32, + my_strntol_mb2_or_mb4, + my_strntoul_mb2_or_mb4, + my_strntoll_mb2_or_mb4, + my_strntoull_mb2_or_mb4, + my_strntod_mb2_or_mb4, + my_strtoll10_utf32, + my_strntoull10rnd_mb2_or_mb4, + my_scan_utf32 +}; + + +CHARSET_INFO my_charset_utf32_general_ci= +{ + 60,0,0, /* number */ + MY_CS_COMPILED|MY_CS_PRIMARY|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII, + "utf32", /* cs name */ + "utf32_general_ci", /* name */ + "UTF-32 Unicode", /* comment */ + NULL, /* tailoring */ + NULL, /* ctype */ + NULL, /* to_lower */ + NULL, /* to_upper */ + NULL, /* sort_order */ + NULL, /* contractions */ + NULL, /* sort_order_big*/ + NULL, /* tab_to_uni */ + NULL, /* tab_from_uni */ + my_unicase_default, /* caseinfo */ + NULL, /* state_map */ + NULL, /* ident_map */ + 1, /* strxfrm_multiply */ + 1, /* caseup_multiply */ + 1, /* casedn_multiply */ + 4, /* mbminlen */ + 4, /* mbmaxlen */ + 0, /* min_sort_char */ + 0xFFFF, /* max_sort_char */ + ' ', /* pad char */ + 0, /* escape_with_backslash_is_dangerous */ + &my_charset_utf32_handler, + &my_collation_utf32_general_ci_handler +}; + + +CHARSET_INFO my_charset_utf32_bin= +{ + 61,0,0, /* number */ + MY_CS_COMPILED|MY_CS_BINSORT|MY_CS_UNICODE|MY_CS_NONASCII, + "utf32", /* cs name */ + "utf32_bin", /* name */ + "UTF-32 Unicode", /* comment */ + NULL, /* tailoring */ + NULL, /* ctype */ + NULL, /* to_lower */ + NULL, /* to_upper */ + NULL, /* sort_order */ + NULL, /* contractions */ + NULL, /* sort_order_big*/ + NULL, /* tab_to_uni */ + NULL, /* tab_from_uni */ + my_unicase_default, /* caseinfo */ + NULL, /* state_map */ + NULL, /* ident_map */ + 1, /* strxfrm_multiply */ + 1, /* caseup_multiply */ + 1, /* casedn_multiply */ + 4, /* mbminlen */ + 4, /* mbmaxlen */ + 0, /* min_sort_char */ + 0xFFFF, /* max_sort_char */ + ' ', /* pad char */ + 0, /* escape_with_backslash_is_dangerous */ + &my_charset_utf32_handler, + &my_collation_utf32_bin_handler +}; + + +#endif /* HAVE_CHARSET_utf32 */ + + +#ifdef HAVE_CHARSET_ucs2 + +static uchar ctype_ucs2[] = { + 0, + 32, 32, 32, 32, 32, 32, 32, 32, 32, 40, 40, 40, 40, 40, 32, 32, + 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, + 72, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 132,132,132,132,132,132,132,132,132,132, 16, 16, 16, 16, 16, 16, + 16,129,129,129,129,129,129, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 16, 16, 16, 16, 16, + 16,130,130,130,130,130,130, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 16, 16, 16, 16, 32, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 +}; + +static uchar to_lower_ucs2[] = { + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, + 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, + 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, + 64, 97, 98, 99,100,101,102,103,104,105,106,107,108,109,110,111, + 112,113,114,115,116,117,118,119,120,121,122, 91, 92, 93, 94, 95, + 96, 97, 98, 99,100,101,102,103,104,105,106,107,108,109,110,111, + 112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127, + 128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143, + 144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159, + 160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175, + 176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191, + 192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207, + 208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223, + 224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239, + 240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255 +}; + +static uchar to_upper_ucs2[] = { + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, + 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, + 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, + 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, + 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, + 96, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, + 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90,123,124,125,126,127, + 128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143, + 144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159, + 160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175, + 176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191, + 192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207, + 208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223, + 224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239, + 240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255 +}; + + +static int my_ucs2_uni(CHARSET_INFO *cs __attribute__((unused)), + my_wc_t * pwc, const uchar *s, const uchar *e) +{ + if (s+2 > e) /* Need 2 characters */ + return MY_CS_TOOSMALL2; + + *pwc= ((uchar)s[0]) * 256 + ((uchar)s[1]); + return 2; +} + +static int my_uni_ucs2(CHARSET_INFO *cs __attribute__((unused)) , + my_wc_t wc, uchar *r, uchar *e) +{ + if ( r+2 > e ) + return MY_CS_TOOSMALL2; + + r[0]= (uchar) (wc >> 8); + r[1]= (uchar) (wc & 0xFF); + return 2; +} + + +static size_t my_caseup_ucs2(CHARSET_INFO *cs, char *src, size_t srclen, + char *dst __attribute__((unused)), + size_t dstlen __attribute__((unused))) +{ + my_wc_t wc; + int res; + char *srcend= src + srclen; + MY_UNICASE_INFO **uni_plane= cs->caseinfo; + DBUG_ASSERT(src == dst && srclen == dstlen); + + while ((src < srcend) && + (res= my_ucs2_uni(cs, &wc, (uchar *)src, (uchar*) srcend)) > 0) + { + int plane= (wc>>8) & 0xFF; + wc= uni_plane[plane] ? uni_plane[plane][wc & 0xFF].toupper : wc; + if (res != my_uni_ucs2(cs, wc, (uchar*) src, (uchar*) srcend)) + break; + src+= res; + } + return srclen; +} + + +static void my_hash_sort_ucs2(CHARSET_INFO *cs, const uchar *s, size_t slen, + ulong *n1, ulong *n2) +{ + my_wc_t wc; + int res; + const uchar *e=s+slen; + MY_UNICASE_INFO **uni_plane= cs->caseinfo; + + while (e > s+1 && e[-1] == ' ' && e[-2] == '\0') + e-= 2; + + while ((s < e) && (res=my_ucs2_uni(cs,&wc, (uchar *)s, (uchar*)e)) >0) + { + int plane = (wc>>8) & 0xFF; + wc = uni_plane[plane] ? uni_plane[plane][wc & 0xFF].sort : wc; + n1[0]^= (((n1[0] & 63)+n2[0])*(wc & 0xFF))+ (n1[0] << 8); + n2[0]+=3; + n1[0]^= (((n1[0] & 63)+n2[0])*(wc >> 8))+ (n1[0] << 8); + n2[0]+=3; + s+=res; + } +} + + +static size_t my_casedn_ucs2(CHARSET_INFO *cs, char *src, size_t srclen, + char *dst __attribute__((unused)), + size_t dstlen __attribute__((unused))) +{ + my_wc_t wc; + int res; + char *srcend= src + srclen; + MY_UNICASE_INFO **uni_plane= cs->caseinfo; + DBUG_ASSERT(src == dst && srclen == dstlen); + + while ((src < srcend) && + (res= my_ucs2_uni(cs, &wc, (uchar*) src, (uchar*) srcend)) > 0) + { + int plane= (wc>>8) & 0xFF; + wc= uni_plane[plane] ? uni_plane[plane][wc & 0xFF].tolower : wc; + if (res != my_uni_ucs2(cs, wc, (uchar*) src, (uchar*) srcend)) + break; + src+= res; + } + return srclen; +} + + +static int my_strnncoll_ucs2(CHARSET_INFO *cs, + const uchar *s, size_t slen, + const uchar *t, size_t tlen, + my_bool t_is_prefix) +{ + int s_res,t_res; + my_wc_t UNINIT_VAR(s_wc),t_wc; + const uchar *se=s+slen; + const uchar *te=t+tlen; + MY_UNICASE_INFO **uni_plane= cs->caseinfo; + + while ( s < se && t < te ) + { + int plane; + s_res=my_ucs2_uni(cs,&s_wc, s, se); + t_res=my_ucs2_uni(cs,&t_wc, t, te); + + if ( s_res <= 0 || t_res <= 0 ) + { + /* Incorrect string, compare by char value */ + return ((int)s[0]-(int)t[0]); + } + + plane=(s_wc>>8) & 0xFF; + s_wc = uni_plane[plane] ? uni_plane[plane][s_wc & 0xFF].sort : s_wc; + plane=(t_wc>>8) & 0xFF; + t_wc = uni_plane[plane] ? uni_plane[plane][t_wc & 0xFF].sort : t_wc; + if ( s_wc != t_wc ) + { + return s_wc > t_wc ? 1 : -1; + } + + s+=s_res; + t+=t_res; + } + return (int) (t_is_prefix ? t-te : ((se-s) - (te-t))); +} + +/* + Compare strings, discarding end space + + SYNOPSIS + my_strnncollsp_ucs2() + cs character set handler + a First string to compare + a_length Length of 'a' + b Second string to compare + b_length Length of 'b' + + IMPLEMENTATION + If one string is shorter as the other, then we space extend the other + so that the strings have equal length. + + This will ensure that the following things hold: + + "a" == "a " + "a\0" < "a" + "a\0" < "a " + + RETURN + < 0 a < b + = 0 a == b + > 0 a > b +*/ + +static int my_strnncollsp_ucs2(CHARSET_INFO *cs __attribute__((unused)), + const uchar *s, size_t slen, + const uchar *t, size_t tlen, + my_bool diff_if_only_endspace_difference + __attribute__((unused))) +{ + const uchar *se, *te; + size_t minlen; + MY_UNICASE_INFO **uni_plane= cs->caseinfo; + + /* extra safety to make sure the lengths are even numbers */ + slen&= ~1; + tlen&= ~1; + + se= s + slen; + te= t + tlen; + + for (minlen= min(slen, tlen); minlen; minlen-= 2) + { + int s_wc = uni_plane[s[0]] ? (int) uni_plane[s[0]][s[1]].sort : + (((int) s[0]) << 8) + (int) s[1]; + + int t_wc = uni_plane[t[0]] ? (int) uni_plane[t[0]][t[1]].sort : + (((int) t[0]) << 8) + (int) t[1]; + if ( s_wc != t_wc ) + return s_wc > t_wc ? 1 : -1; + + s+= 2; + t+= 2; + } + + if (slen != tlen) + { + int swap= 1; + if (slen < tlen) + { + s= t; + se= te; + swap= -1; + } + + for ( ; s < se ; s+= 2) + { + if (s[0] || s[1] != ' ') + return (s[0] == 0 && s[1] < ' ') ? -swap : swap; + } + } + return 0; +} + + +static uint my_ismbchar_ucs2(CHARSET_INFO *cs __attribute__((unused)), + const char *b __attribute__((unused)), + const char *e __attribute__((unused))) +{ + return 2; +} + + +static uint my_mbcharlen_ucs2(CHARSET_INFO *cs __attribute__((unused)) , + uint c __attribute__((unused))) +{ + return 2; +} + + static size_t my_numchars_ucs2(CHARSET_INFO *cs __attribute__((unused)), const char *b, const char *e) @@ -1336,25 +3168,6 @@ size_t my_well_formed_len_ucs2(CHARSET_INFO *cs __attribute__((unused)), static -void my_fill_ucs2(CHARSET_INFO *cs __attribute__((unused)), - char *s, size_t l, int fill) -{ - for ( ; l >= 2; s[0]= 0, s[1]= fill, s+=2, l-=2); -} - - -static -size_t my_lengthsp_ucs2(CHARSET_INFO *cs __attribute__((unused)), - const char *ptr, size_t length) -{ - const char *end= ptr+length; - while (end > ptr+1 && end[-1] == ' ' && end[-2] == '\0') - end-=2; - return (size_t) (end-ptr); -} - - -static int my_wildcmp_ucs2_ci(CHARSET_INFO *cs, const char *str,const char *str_end, const char *wildstr,const char *wildend, @@ -1457,29 +3270,6 @@ static int my_strnncollsp_ucs2_bin(CHARSET_INFO *cs __attribute__((unused)), static -int my_strcasecmp_ucs2_bin(CHARSET_INFO *cs, const char *s, const char *t) -{ - size_t s_len= strlen(s); - size_t t_len= strlen(t); - size_t len = (s_len > t_len) ? s_len : t_len; - return my_strncasecmp_ucs2(cs, s, t, len); -} - - -static -size_t my_strnxfrm_ucs2_bin(CHARSET_INFO *cs __attribute__((unused)), - uchar *dst, size_t dstlen, - const uchar *src, size_t srclen) -{ - if (dst != src) - memcpy(dst,src,srclen= min(dstlen,srclen)); - if (dstlen > srclen) - cs->cset->fill(cs, (char*) dst + srclen, dstlen - srclen, ' '); - return dstlen; -} - - -static void my_hash_sort_ucs2_bin(CHARSET_INFO *cs __attribute__((unused)), const uchar *key, size_t len,ulong *nr1, ulong *nr2) { @@ -1613,38 +3403,16 @@ fill_max_and_min: -size_t my_scan_ucs2(CHARSET_INFO *cs __attribute__((unused)), - const char *str, const char *end, int sequence_type) -{ - const char *str0= str; - end--; /* for easier loop condition, because of two bytes per character */ - - switch (sequence_type) - { - case MY_SEQ_SPACES: - for ( ; str < end; str+= 2) - { - if (str[0] != '\0' || str[1] != ' ') - break; - } - return (size_t) (str - str0); - default: - return 0; - } -} - - - static MY_COLLATION_HANDLER my_collation_ucs2_general_ci_handler = { NULL, /* init */ my_strnncoll_ucs2, my_strnncollsp_ucs2, - my_strnxfrm_ucs2, + my_strnxfrm_unicode, my_strnxfrmlen_simple, my_like_range_ucs2, my_wildcmp_ucs2_ci, - my_strcasecmp_ucs2, + my_strcasecmp_mb2_or_mb4, my_instr_mb, my_hash_sort_ucs2, my_propagate_simple @@ -1656,11 +3424,11 @@ static MY_COLLATION_HANDLER my_collation_ucs2_bin_handler = NULL, /* init */ my_strnncoll_ucs2_bin, my_strnncollsp_ucs2_bin, - my_strnxfrm_ucs2_bin, + my_strnxfrm_unicode, my_strnxfrmlen_simple, my_like_range_ucs2, my_wildcmp_ucs2_bin, - my_strcasecmp_ucs2_bin, + my_strcasecmp_mb2_or_mb4, my_instr_mb, my_hash_sort_ucs2_bin, my_propagate_simple @@ -1675,27 +3443,27 @@ MY_CHARSET_HANDLER my_charset_ucs2_handler= my_numchars_ucs2, my_charpos_ucs2, my_well_formed_len_ucs2, - my_lengthsp_ucs2, + my_lengthsp_mb2, my_numcells_mb, my_ucs2_uni, /* mb_wc */ my_uni_ucs2, /* wc_mb */ my_mb_ctype_mb, - my_caseup_str_ucs2, - my_casedn_str_ucs2, + my_caseup_str_mb2_or_mb4, + my_casedn_str_mb2_or_mb4, my_caseup_ucs2, my_casedn_ucs2, - my_snprintf_ucs2, - my_l10tostr_ucs2, - my_ll10tostr_ucs2, - my_fill_ucs2, - my_strntol_ucs2, - my_strntoul_ucs2, - my_strntoll_ucs2, - my_strntoull_ucs2, - my_strntod_ucs2, - my_strtoll10_ucs2, - my_strntoull10rnd_ucs2, - my_scan_ucs2 + my_snprintf_mb2, + my_l10tostr_mb2_or_mb4, + my_ll10tostr_mb2_or_mb4, + my_fill_mb2, + my_strntol_mb2_or_mb4, + my_strntoul_mb2_or_mb4, + my_strntoll_mb2_or_mb4, + my_strntoull_mb2_or_mb4, + my_strntod_mb2_or_mb4, + my_strtoll10_mb2, + my_strntoull10rnd_mb2_or_mb4, + my_scan_mb2 }; @@ -1764,4 +3532,4 @@ CHARSET_INFO my_charset_ucs2_bin= }; -#endif +#endif /* HAVE_CHARSET_ucs2 */ diff --git a/strings/ctype-utf8.c b/strings/ctype-utf8.c index 91f633e45ce..7de5cdd00ee 100644 --- a/strings/ctype-utf8.c +++ b/strings/ctype-utf8.c @@ -27,6 +27,16 @@ #define EILSEQ ENOENT #endif + +#define MY_UTF8MB3_GENERAL_CI MY_UTF8MB3 "_general_ci" +#define MY_UTF8MB3_GENERAL_CS MY_UTF8MB3 "_general_cs" +#define MY_UTF8MB3_BIN MY_UTF8MB3 "_bin" +#define MY_UTF8MB4_GENERAL_CI MY_UTF8MB4 "_general_ci" +#define MY_UTF8MB4_GENERAL_CS MY_UTF8MB4 "_general_cs" +#define MY_UTF8MB4_BIN MY_UTF8MB4 "_bin" + + + #ifndef HAVE_CHARSET_utf8 #define HAVE_CHARSET_utf8 #endif @@ -39,6 +49,14 @@ #define HAVE_UNIDATA #endif +#ifdef HAVE_CHARSET_utf16 +#define HAVE_UNIDATA +#endif + +#ifdef HAVE_CHARSET_utf32 +#define HAVE_UNIDATA +#endif + #ifdef HAVE_UNIDATA #include "my_uctype.h" @@ -1702,6 +1720,24 @@ MY_UNICASE_INFO *my_unicase_turkish[256]= }; +#define REPLACEMENT_CHAR 0xFFFD; + + +static inline void +my_tosort_unicode(MY_UNICASE_INFO **uni_plane, my_wc_t *wc) +{ + int page= *wc >> 8; + if (page < 256) + { + if (uni_plane[page]) + *wc= uni_plane[page][*wc & 0xFF].sort; + } + else + { + *wc= REPLACEMENT_CHAR; + } +} + /* ** Compare string against string with wildcard @@ -1712,13 +1748,14 @@ MY_UNICASE_INFO *my_unicase_turkish[256]= ** 1 if matched with wildcard */ -int my_wildcmp_unicode(CHARSET_INFO *cs, - const char *str,const char *str_end, - const char *wildstr,const char *wildend, - int escape, int w_one, int w_many, - MY_UNICASE_INFO **weights) +int +my_wildcmp_unicode(CHARSET_INFO *cs, + const char *str,const char *str_end, + const char *wildstr,const char *wildend, + int escape, int w_one, int w_many, + MY_UNICASE_INFO **weights) { - int result= -1; /* Not found, using wildcards */ + int result= -1; /* Not found, using wildcards */ my_wc_t s_wc, w_wc; int scan, plane; int (*mb_wc)(struct charset_info_st *, my_wc_t *, @@ -1734,14 +1771,14 @@ int my_wildcmp_unicode(CHARSET_INFO *cs, (const uchar*)wildend)) <= 0) return 1; - if (w_wc == (my_wc_t)w_many) + if (w_wc == (my_wc_t) w_many) { - result= 1; /* Found an anchor char */ + result= 1; /* Found an anchor char */ break; } wildstr+= scan; - if (w_wc == (my_wc_t)escape && wildstr < wildend) + if (w_wc == (my_wc_t) escape && wildstr < wildend) { if ((scan= mb_wc(cs, &w_wc, (const uchar*)wildstr, (const uchar*)wildend)) <= 0) @@ -1755,29 +1792,27 @@ int my_wildcmp_unicode(CHARSET_INFO *cs, return 1; str+= scan; - if (!escaped && w_wc == (my_wc_t)w_one) + if (!escaped && w_wc == (my_wc_t) w_one) { - result= 1; /* Found an anchor char */ + result= 1; /* Found an anchor char */ } else { if (weights) { - plane=(s_wc>>8) & 0xFF; - s_wc = weights[plane] ? weights[plane][s_wc & 0xFF].sort : s_wc; - plane=(w_wc>>8) & 0xFF; - w_wc = weights[plane] ? weights[plane][w_wc & 0xFF].sort : w_wc; + my_tosort_unicode(weights, &s_wc); + my_tosort_unicode(weights, &w_wc); } if (s_wc != w_wc) - return 1; /* No match */ + return 1; /* No match */ } if (wildstr == wildend) - return (str != str_end); /* Match if both are at end */ + return (str != str_end); /* Match if both are at end */ } - if (w_wc == (my_wc_t)w_many) - { /* Found w_many */ + if (w_wc == (my_wc_t) w_many) + { /* Found w_many */ /* Remove any '%' and '_' from the wild search string */ for ( ; wildstr != wildend ; ) @@ -1786,29 +1821,29 @@ int my_wildcmp_unicode(CHARSET_INFO *cs, (const uchar*)wildend)) <= 0) return 1; - if (w_wc == (my_wc_t)w_many) - { - wildstr+= scan; - continue; - } - - if (w_wc == (my_wc_t)w_one) - { - wildstr+= scan; + if (w_wc == (my_wc_t)w_many) + { + wildstr+= scan; + continue; + } + + if (w_wc == (my_wc_t)w_one) + { + wildstr+= scan; if ((scan= mb_wc(cs, &s_wc, (const uchar*)str, (const uchar*)str_end)) <=0) return 1; str+= scan; - continue; - } - break; /* Not a wild character */ + continue; + } + break; /* Not a wild character */ } if (wildstr == wildend) - return 0; /* Ok if w_many is last */ + return 0; /* Ok if w_many is last */ if (str == str_end) - return -1; + return -1; if ((scan= mb_wc(cs, &w_wc, (const uchar*)wildstr, (const uchar*)wildend)) <=0) @@ -1836,10 +1871,8 @@ int my_wildcmp_unicode(CHARSET_INFO *cs, return 1; if (weights) { - plane=(s_wc>>8) & 0xFF; - s_wc = weights[plane] ? weights[plane][s_wc & 0xFF].sort : s_wc; - plane=(w_wc>>8) & 0xFF; - w_wc = weights[plane] ? weights[plane][w_wc & 0xFF].sort : w_wc; + my_tosort_unicode(weights, &s_wc); + my_tosort_unicode(weights, &w_wc); } if (s_wc == w_wc) @@ -1861,8 +1894,53 @@ int my_wildcmp_unicode(CHARSET_INFO *cs, return (str != str_end ? 1 : 0); } -#endif +/* + This function is shared between utf8mb3/utf8mb4/ucs2/utf16/utf32 +*/ +size_t +my_strnxfrm_unicode(CHARSET_INFO *cs, + uchar *dst, size_t dstlen, + const uchar *src, size_t srclen) +{ + my_wc_t wc; + int res; + uchar *de= dst + dstlen; + uchar *de_beg= de - 1; + const uchar *se = src + srclen; + MY_UNICASE_INFO **uni_plane= (cs->state & MY_CS_BINSORT) ? + NULL : cs->caseinfo; + LINT_INIT(wc); + DBUG_ASSERT(src); + + while (dst < de_beg) + { + if ((res= cs->cset->mb_wc(cs,&wc, src, se)) <= 0) + break; + src+=res; + + if (uni_plane) + my_tosort_unicode(uni_plane, &wc); + + *dst++= (uchar) (wc >> 8); + if (dst < de) + *dst++= (uchar) (wc & 0xFF); + } + + while (dst < de_beg) /* Fill the tail with keys for space character */ + { + *dst++= 0x00; + *dst++= 0x20; + } + + if (dst < de) /* Clear the last byte, if "dstlen" was an odd number */ + *dst= 0x00; + + return dstlen; +} + + +#endif /* HAVE_UNIDATA */ #ifdef HAVE_CHARSET_utf8 @@ -2569,44 +2647,6 @@ size_t my_strnxfrmlen_utf8(CHARSET_INFO *cs __attribute__((unused)), } -static size_t my_strnxfrm_utf8(CHARSET_INFO *cs, - uchar *dst, size_t dstlen, - const uchar *src, size_t srclen) -{ - my_wc_t wc; - int res; - int plane; - uchar *de= dst + dstlen; - uchar *de_beg= de - 1; - const uchar *se = src + srclen; - MY_UNICASE_INFO **uni_plane= cs->caseinfo; - - while (dst < de_beg) - { - if ((res=my_utf8_uni(cs,&wc, src, se)) <= 0) - break; - src+=res; - - plane=(wc>>8) & 0xFF; - wc = uni_plane[plane] ? uni_plane[plane][wc & 0xFF].sort : wc; - - *dst++= (uchar)(wc >> 8); - *dst++= (uchar)(wc & 0xFF); - - } - - while (dst < de_beg) /* Fill the tail with keys for space character */ - { - *dst++= 0x00; - *dst++= 0x20; - } - - if (dst < de) /* Clear the last byte, if "dstlen" was an odd number */ - *dst= 0x00; - - return dstlen; -} - static uint my_ismbchar_utf8(CHARSET_INFO *cs,const char *b, const char *e) { my_wc_t wc; @@ -2642,7 +2682,7 @@ static MY_COLLATION_HANDLER my_collation_ci_handler = NULL, /* init */ my_strnncoll_utf8, my_strnncollsp_utf8, - my_strnxfrm_utf8, + my_strnxfrm_unicode, my_strnxfrmlen_utf8, my_like_range_mb, my_wildcmp_utf8, @@ -2891,7 +2931,7 @@ static MY_COLLATION_HANDLER my_collation_cs_handler = NULL, /* init */ my_strnncoll_utf8_cs, my_strnncollsp_utf8_cs, - my_strnxfrm_utf8, + my_strnxfrm_unicode, my_strnxfrmlen_utf8, my_like_range_simple, my_wildcmp_mb, @@ -4154,7 +4194,7 @@ static MY_COLLATION_HANDLER my_collation_filename_handler = NULL, /* init */ my_strnncoll_utf8, my_strnncollsp_utf8, - my_strnxfrm_utf8, + my_strnxfrm_unicode, my_strnxfrmlen_utf8, my_like_range_mb, my_wildcmp_utf8, @@ -4284,3 +4324,859 @@ int main() +#ifdef HAVE_CHARSET_utf8mb4 + +/* + We consider bytes with code more than 127 as a letter. + This garantees that word boundaries work fine with regular + expressions. Note, there is no need to mark byte 255 as a + letter, it is illegal byte in UTF8. +*/ +static uchar ctype_utf8mb4[]= +{ + 0, + 32, 32, 32, 32, 32, 32, 32, 32, 32, 40, 40, 40, 40, 40, 32, 32, + 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, + 72, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 132,132,132,132,132,132,132,132,132,132, 16, 16, 16, 16, 16, 16, + 16,129,129,129,129,129,129, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 16, 16, 16, 16, 16, + 16,130,130,130,130,130,130, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 16, 16, 16, 16, 32, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0 +}; + + +static uchar to_lower_utf8mb4[]= +{ + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, + 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, + 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, + 64, 97, 98, 99,100,101,102,103,104,105,106,107,108,109,110,111, + 112,113,114,115,116,117,118,119,120,121,122, 91, 92, 93, 94, 95, + 96, 97, 98, 99,100,101,102,103,104,105,106,107,108,109,110,111, + 112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127, + 128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143, + 144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159, + 160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175, + 176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191, + 192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207, + 208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223, + 224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239, + 240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255 +}; + + +static uchar to_upper_utf8mb4[]= +{ + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, + 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, + 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, + 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, + 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, + 96, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, + 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90,123,124,125,126,127, + 128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143, + 144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159, + 160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175, + 176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191, + 192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207, + 208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223, + 224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239, + 240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255 +}; + + +static inline int +bincmp_utf8mb4(const uchar *s, const uchar *se, + const uchar *t, const uchar *te) +{ + int slen= (int) (se - s), tlen= (int) (te - t); + int len= min(slen, tlen); + int cmp= memcmp(s, t, len); + return cmp ? cmp : slen - tlen; +} + + +static int +my_mb_wc_utf8mb4(CHARSET_INFO *cs __attribute__((unused)), + my_wc_t * pwc, const uchar *s, const uchar *e) +{ + uchar c; + + if (s >= e) + return MY_CS_TOOSMALL; + + c= s[0]; + if (c < 0x80) + { + *pwc= c; + return 1; + } + else if (c < 0xc2) + return MY_CS_ILSEQ; + else if (c < 0xe0) + { + if (s + 2 > e) /* We need 2 characters */ + return MY_CS_TOOSMALL2; + + if (!((s[1] ^ 0x80) < 0x40)) + return MY_CS_ILSEQ; + + *pwc= ((my_wc_t) (c & 0x1f) << 6) | (my_wc_t) (s[1] ^ 0x80); + return 2; + } + else if (c < 0xf0) + { + if (s + 3 > e) /* We need 3 characters */ + return MY_CS_TOOSMALL3; + + if (!((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40 && + (c >= 0xe1 || s[1] >= 0xa0))) + return MY_CS_ILSEQ; + + *pwc= ((my_wc_t) (c & 0x0f) << 12) | + ((my_wc_t) (s[1] ^ 0x80) << 6) | + (my_wc_t) (s[2] ^ 0x80); + return 3; + } + else if (c < 0xf5) + { + if (s + 4 > e) /* We need 4 characters */ + return MY_CS_TOOSMALL4; + + /* + UTF-8 quick four-byte mask: + 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx + Encoding allows to encode U+00010000..U+001FFFFF + + The maximum character defined in the Unicode standard is U+0010FFFF. + Higher characters U+00110000..U+001FFFFF are not used. + + 11110000.10010000.10xxxxxx.10xxxxxx == F0.90.80.80 == U+00010000 (min) + 11110100.10001111.10111111.10111111 == F4.8F.BF.BF == U+0010FFFF (max) + + Valid codes: + [F0][90..BF][80..BF][80..BF] + [F1][80..BF][80..BF][80..BF] + [F2][80..BF][80..BF][80..BF] + [F3][80..BF][80..BF][80..BF] + [F4][80..8F][80..BF][80..BF] + */ + + if (!((s[1] ^ 0x80) < 0x40 && + (s[2] ^ 0x80) < 0x40 && + (s[3] ^ 0x80) < 0x40 && + (c >= 0xf1 || s[1] >= 0x90) && + (c <= 0xf3 || s[1] <= 0x8F))) + return MY_CS_ILSEQ; + *pwc = ((my_wc_t) (c & 0x07) << 18) | + ((my_wc_t) (s[1] ^ 0x80) << 12) | + ((my_wc_t) (s[2] ^ 0x80) << 6) | + (my_wc_t) (s[3] ^ 0x80); + return 4; + } + return MY_CS_ILSEQ; +} + + +/* + The same as above, but without range check + for example, for a null-terminated string +*/ +static int +my_mb_wc_utf8mb4_no_range(CHARSET_INFO *cs __attribute__((unused)), + my_wc_t *pwc, const uchar *s) +{ + uchar c; + + c= s[0]; + if (c < 0x80) + { + *pwc = c; + return 1; + } + + if (c < 0xc2) + return MY_CS_ILSEQ; + + if (c < 0xe0) + { + if (!((s[1] ^ 0x80) < 0x40)) + return MY_CS_ILSEQ; + + *pwc = ((my_wc_t) (c & 0x1f) << 6) | (my_wc_t) (s[1] ^ 0x80); + return 2; + } + + if (c < 0xf0) + { + if (!((s[1] ^ 0x80) < 0x40 && + (s[2] ^ 0x80) < 0x40 && + (c >= 0xe1 || s[1] >= 0xa0))) + return MY_CS_ILSEQ; + *pwc= ((my_wc_t) (c & 0x0f) << 12) | + ((my_wc_t) (s[1] ^ 0x80) << 6) | + (my_wc_t) (s[2] ^ 0x80); + + return 3; + } + else if (c < 0xf5) + { + if (!((s[1] ^ 0x80) < 0x40 && + (s[2] ^ 0x80) < 0x40 && + (s[3] ^ 0x80) < 0x40 && + (c >= 0xf1 || s[1] >= 0x90) && + (c <= 0xf3 || s[1] <= 0x8F))) + return MY_CS_ILSEQ; + *pwc = ((my_wc_t) (c & 0x07) << 18) | + ((my_wc_t) (s[1] ^ 0x80) << 12) | + ((my_wc_t) (s[2] ^ 0x80) << 6) | + (my_wc_t) (s[3] ^ 0x80); + return 4; + } + return MY_CS_ILSEQ; +} + + +static int +my_wc_mb_utf8mb4(CHARSET_INFO *cs __attribute__((unused)), + my_wc_t wc, uchar *r, uchar *e) +{ + int count; + + if (r >= e) + return MY_CS_TOOSMALL; + + if (wc < 0x80) + count= 1; + else if (wc < 0x800) + count= 2; + else if (wc < 0x10000) + count= 3; + else if (wc < 0x200000) + count= 4; + else return MY_CS_ILUNI; + + if (r + count > e) + return MY_CS_TOOSMALLN(count); + + switch (count) { + /* Fall through all cases!!! */ + case 4: r[3] = (uchar) (0x80 | (wc & 0x3f)); wc = wc >> 6; wc |= 0x10000; + case 3: r[2] = (uchar) (0x80 | (wc & 0x3f)); wc = wc >> 6; wc |= 0x800; + case 2: r[1] = (uchar) (0x80 | (wc & 0x3f)); wc = wc >> 6; wc |= 0xc0; + case 1: r[0] = (uchar) wc; + } + return count; +} + + +/* + The same as above, but without range check. +*/ +static int +my_wc_mb_utf8mb4_no_range(CHARSET_INFO *cs __attribute__((unused)), + my_wc_t wc, uchar *r) +{ + int count; + + if (wc < 0x80) + count= 1; + else if (wc < 0x800) + count= 2; + else if (wc < 0x10000) + count= 3; + else if (wc < 0x200000) + count= 4; + else + return MY_CS_ILUNI; + + switch (count) + { + /* Fall through all cases!!! */ + case 4: r[3]= (uchar) (0x80 | (wc & 0x3f)); wc= wc >> 6; wc |= 0x10000; + case 3: r[2]= (uchar) (0x80 | (wc & 0x3f)); wc= wc >> 6; wc |= 0x800; + case 2: r[1]= (uchar) (0x80 | (wc & 0x3f)); wc= wc >> 6; wc |= 0xc0; + case 1: r[0]= (uchar) wc; + } + return count; +} + + +static inline void +my_tolower_utf8mb4(MY_UNICASE_INFO **uni_plane, my_wc_t *wc) +{ + int page= *wc >> 8; + if (page < 256 && uni_plane[page]) + *wc= uni_plane[page][*wc & 0xFF].tolower; +} + + +static inline void +my_toupper_utf8mb4(MY_UNICASE_INFO **uni_plane, my_wc_t *wc) +{ + int page= *wc >> 8; + if (page < 256 && uni_plane[page]) + *wc= uni_plane[page][*wc & 0xFF].toupper; +} + + +static size_t +my_caseup_utf8mb4(CHARSET_INFO *cs, char *src, size_t srclen, + char *dst, size_t dstlen) +{ + my_wc_t wc; + int srcres, dstres; + char *srcend= src + srclen, *dstend= dst + dstlen, *dst0= dst; + MY_UNICASE_INFO **uni_plane= cs->caseinfo; + DBUG_ASSERT(src != dst || cs->caseup_multiply == 1); + + while ((src < srcend) && + (srcres= my_mb_wc_utf8mb4(cs, &wc, + (uchar *) src, (uchar*) srcend)) > 0) + { + my_toupper_utf8mb4(uni_plane, &wc); + if ((dstres= my_wc_mb_utf8mb4(cs, wc, (uchar*) dst, (uchar*) dstend)) <= 0) + break; + src+= srcres; + dst+= dstres; + } + return (size_t) (dst - dst0); +} + + +static inline void +my_hash_add(ulong *n1, ulong *n2, uint ch) +{ + n1[0]^= (((n1[0] & 63) + n2[0]) * (ch)) + (n1[0] << 8); + n2[0]+= 3; +} + + +static void +my_hash_sort_utf8mb4(CHARSET_INFO *cs, const uchar *s, size_t slen, + ulong *n1, ulong *n2) +{ + my_wc_t wc; + int res; + const uchar *e= s + slen; + MY_UNICASE_INFO **uni_plane= cs->caseinfo; + + /* + Remove end space. We do this to be able to compare + 'A ' and 'A' as identical + */ + while (e > s && e[-1] == ' ') + e--; + + while ((res= my_mb_wc_utf8mb4(cs, &wc, (uchar*) s, (uchar*) e)) > 0) + { + my_tosort_unicode(uni_plane, &wc); + my_hash_add(n1, n2, (uint) (wc & 0xFF)); + my_hash_add(n1, n2, (uint) (wc >> 8) & 0xFF); + if (wc > 0xFFFF) + { + /* + Put the highest byte only if it is non-zero, + to make hash functions for utf8mb3 and utf8mb4 + compatible for BMP characters. + This is useful to keep order of records in + test results, e.g. for "SHOW GRANTS". + */ + my_hash_add(n1, n2, (uint) (wc >> 16) & 0xFF); + } + s+= res; + } +} + + +static size_t +my_caseup_str_utf8mb4(CHARSET_INFO *cs, char *src) +{ + my_wc_t wc; + int srcres, dstres; + char *dst= src, *dst0= src; + MY_UNICASE_INFO **uni_plane= cs->caseinfo; + DBUG_ASSERT(cs->caseup_multiply == 1); + + while (*src && + (srcres= my_mb_wc_utf8mb4_no_range(cs, &wc, (uchar *) src)) > 0) + { + my_toupper_utf8mb4(uni_plane, &wc); + if ((dstres= my_wc_mb_utf8mb4_no_range(cs, wc, (uchar*) dst)) <= 0) + break; + src+= srcres; + dst+= dstres; + } + *dst= '\0'; + return (size_t) (dst - dst0); +} + + +static size_t +my_casedn_utf8mb4(CHARSET_INFO *cs, + char *src, size_t srclen, + char *dst, size_t dstlen) +{ + my_wc_t wc; + int srcres, dstres; + char *srcend= src + srclen, *dstend= dst + dstlen, *dst0= dst; + MY_UNICASE_INFO **uni_plane= cs->caseinfo; + DBUG_ASSERT(src != dst || cs->casedn_multiply == 1); + + while ((src < srcend) && + (srcres= my_mb_wc_utf8mb4(cs, &wc, + (uchar*) src, (uchar*) srcend)) > 0) + { + my_tolower_utf8mb4(uni_plane, &wc); + if ((dstres= my_wc_mb_utf8mb4(cs, wc, (uchar*) dst, (uchar*) dstend)) <= 0) + break; + src+= srcres; + dst+= dstres; + } + return (size_t) (dst - dst0); +} + + +static size_t +my_casedn_str_utf8mb4(CHARSET_INFO *cs, char *src) +{ + my_wc_t wc; + int srcres, dstres; + char *dst= src, *dst0= src; + MY_UNICASE_INFO **uni_plane= cs->caseinfo; + DBUG_ASSERT(cs->casedn_multiply == 1); + + while (*src && + (srcres= my_mb_wc_utf8mb4_no_range(cs, &wc, (uchar *) src)) > 0) + { + my_tolower_utf8mb4(uni_plane, &wc); + if ((dstres= my_wc_mb_utf8mb4_no_range(cs, wc, (uchar*) dst)) <= 0) + break; + src+= srcres; + dst+= dstres; + } + + /* + In rare cases lower string can be shorter than + the original string, for example: + + "U+0130 LATIN CAPITAL LETTER I WITH DOT ABOVE" + (which is 0xC4B0 in utf8, i.e. two bytes) + + is converted into + + "U+0069 LATIN SMALL LETTER I" + (which is 0x69 in utf8, i.e. one byte) + + So, we need to put '\0' terminator after converting. + */ + + *dst= '\0'; + return (size_t) (dst - dst0); +} + + +static int +my_strnncoll_utf8mb4(CHARSET_INFO *cs, + const uchar *s, size_t slen, + const uchar *t, size_t tlen, + my_bool t_is_prefix) +{ + my_wc_t s_wc,t_wc; + const uchar *se= s + slen; + const uchar *te= t + tlen; + MY_UNICASE_INFO **uni_plane= cs->caseinfo; + LINT_INIT(s_wc); + LINT_INIT(t_wc); + + while ( s < se && t < te ) + { + int s_res= my_mb_wc_utf8mb4(cs, &s_wc, s, se); + int t_res= my_mb_wc_utf8mb4(cs, &t_wc, t, te); + + if ( s_res <= 0 || t_res <= 0 ) + { + /* Incorrect string, compare bytewise */ + return bincmp_utf8mb4(s, se, t, te); + } + + my_tosort_unicode(uni_plane, &s_wc); + my_tosort_unicode(uni_plane, &t_wc); + + if ( s_wc != t_wc ) + { + return s_wc > t_wc ? 1 : -1; + } + + s+= s_res; + t+= t_res; + } + return (int) (t_is_prefix ? (t - te) : ((se - s) - (te - t))); +} + + +/** + + Compare strings, discarding end space + + If one string is shorter as the other, then we space extend the other + so that the strings have equal length. + + This will ensure that the following things hold: + + "a" == "a " + "a\0" < "a" + "a\0" < "a " + + @param cs Character set pinter. + @param a First string to compare. + @param a_length Length of 'a'. + @param b Second string to compare. + @param b_length Length of 'b'. + @param diff_if_only_endspace_difference + Set to 1 if the strings should be regarded as different + if they only difference in end space + + @return Comparison result. + @retval Negative number, if a less than b. + @retval 0, if a is equal to b + @retval Positive number, if a > b +*/ + +static int +my_strnncollsp_utf8mb4(CHARSET_INFO *cs, + const uchar *s, size_t slen, + const uchar *t, size_t tlen, + my_bool diff_if_only_endspace_difference) +{ + int res; + my_wc_t s_wc, t_wc; + const uchar *se= s + slen, *te= t + tlen; + MY_UNICASE_INFO **uni_plane= cs->caseinfo; + LINT_INIT(s_wc); + LINT_INIT(t_wc); + +#ifndef VARCHAR_WITH_DIFF_ENDSPACE_ARE_DIFFERENT_FOR_UNIQUE + diff_if_only_endspace_difference= FALSE; +#endif + + while ( s < se && t < te ) + { + int s_res= my_mb_wc_utf8mb4(cs, &s_wc, s, se); + int t_res= my_mb_wc_utf8mb4(cs, &t_wc, t, te); + + if ( s_res <= 0 || t_res <= 0 ) + { + /* Incorrect string, compare bytewise */ + return bincmp_utf8mb4(s, se, t, te); + } + + my_tosort_unicode(uni_plane, &s_wc); + my_tosort_unicode(uni_plane, &t_wc); + + if ( s_wc != t_wc ) + { + return s_wc > t_wc ? 1 : -1; + } + + s+=s_res; + t+=t_res; + } + + slen= (size_t) (se-s); + tlen= (size_t) (te-t); + res= 0; + + if (slen != tlen) + { + int swap= 1; + if (diff_if_only_endspace_difference) + res= 1; /* Assume 'a' is bigger */ + if (slen < tlen) + { + slen= tlen; + s= t; + se= te; + swap= -1; + res= -res; + } + /* + This following loop uses the fact that in UTF-8 + all multibyte characters are greater than space, + and all multibyte head characters are greater than + space. It means if we meet a character greater + than space, it always means that the longer string + is greater. So we can reuse the same loop from the + 8bit version, without having to process full multibute + sequences. + */ + for ( ; s < se; s++) + { + if (*s != ' ') + return (*s < ' ') ? -swap : swap; + } + } + return res; +} + + +/** + Compare 0-terminated UTF8 strings. + + @param cs character set handler + @param s First 0-terminated string to compare + @param t Second 0-terminated string to compare + + @return Comparison result. + @retval negative number if s < t + @retval positive number if s > t + @retval 0 is the strings are equal +*/ + +static int +my_strcasecmp_utf8mb4(CHARSET_INFO *cs, const char *s, const char *t) +{ + MY_UNICASE_INFO **uni_plane= cs->caseinfo; + while (s[0] && t[0]) + { + my_wc_t s_wc,t_wc; + + if ((uchar) s[0] < 128) + { + /* + s[0] is between 0 and 127. + It represents a single byte character. + Convert it into weight according to collation. + */ + s_wc= plane00[(uchar) s[0]].tolower; + s++; + } + else + { + int res= my_mb_wc_utf8mb4_no_range(cs, &s_wc, (const uchar*) s); + + /* + In the case of wrong multibyte sequence we will + call strcmp() for byte-to-byte comparison. + */ + if (res <= 0) + return strcmp(s, t); + s+= res; + + my_tolower_utf8mb4(uni_plane, &s_wc); + } + + + /* Do the same for the second string */ + + if ((uchar) t[0] < 128) + { + /* Convert single byte character into weight */ + t_wc= plane00[(uchar) t[0]].tolower; + t++; + } + else + { + int res= my_mb_wc_utf8mb4_no_range(cs, &t_wc, (const uchar*) t); + if (res <= 0) + return strcmp(s, t); + t+= res; + + my_tolower_utf8mb4(uni_plane, &t_wc); + } + + /* Now we have two weights, let's compare them */ + if ( s_wc != t_wc ) + return ((int) s_wc) - ((int) t_wc); + } + return ((int) (uchar) s[0]) - ((int) (uchar) t[0]); +} + + +static int +my_wildcmp_utf8mb4(CHARSET_INFO *cs, + const char *str, const char *strend, + const char *wildstr, const char *wildend, + int escape, int w_one, int w_many) +{ + return my_wildcmp_unicode(cs, str, strend, wildstr, wildend, + escape, w_one, w_many, cs->caseinfo); +} + + +static size_t +my_strnxfrmlen_utf8mb4(CHARSET_INFO *cs __attribute__((unused)), size_t len) +{ + /* TODO: fix when working on WL "Unicode new version" */ + return (len * 2 + 2) / 4; +} + + +static uint +my_ismbchar_utf8mb4(CHARSET_INFO *cs, const char *b, const char *e) +{ + my_wc_t wc; + int res= my_mb_wc_utf8mb4(cs,&wc, (const uchar*)b, (const uchar*)e); + return (res > 1) ? res : 0; +} + + +static uint +my_mbcharlen_utf8mb4(CHARSET_INFO *cs __attribute__((unused)), uint c) +{ + if (c < 0x80) + return 1; + if (c < 0xc2) + return 0; /* Illegal mb head */ + if (c < 0xe0) + return 2; + if (c < 0xf0) + return 3; + if (c < 0xf8) + return 4; + return 0; /* Illegal mb head */; +} + + +static MY_COLLATION_HANDLER my_collation_utf8mb4_general_ci_handler= +{ + NULL, /* init */ + my_strnncoll_utf8mb4, + my_strnncollsp_utf8mb4, + my_strnxfrm_unicode, + my_strnxfrmlen_utf8mb4, + my_like_range_mb, + my_wildcmp_utf8mb4, + my_strcasecmp_utf8mb4, + my_instr_mb, + my_hash_sort_utf8mb4, + my_propagate_complex +}; + + +static MY_COLLATION_HANDLER my_collation_utf8mb4_bin_handler = +{ + NULL, /* init */ + my_strnncoll_mb_bin, + my_strnncollsp_mb_bin, + my_strnxfrm_unicode, + my_strnxfrmlen_utf8mb4, + my_like_range_mb, + my_wildcmp_mb_bin, + my_strcasecmp_mb_bin, + my_instr_mb, + my_hash_sort_mb_bin, + my_propagate_simple +}; + + +MY_CHARSET_HANDLER my_charset_utf8mb4_handler= +{ + NULL, /* init */ + my_ismbchar_utf8mb4, + my_mbcharlen_utf8mb4, + my_numchars_mb, + my_charpos_mb, + my_well_formed_len_mb, + my_lengthsp_8bit, + my_numcells_mb, + my_mb_wc_utf8mb4, + my_wc_mb_utf8mb4, + my_mb_ctype_mb, + my_caseup_str_utf8mb4, + my_casedn_str_utf8mb4, + my_caseup_utf8mb4, + my_casedn_utf8mb4, + my_snprintf_8bit, + my_long10_to_str_8bit, + my_longlong10_to_str_8bit, + my_fill_8bit, + my_strntol_8bit, + my_strntoul_8bit, + my_strntoll_8bit, + my_strntoull_8bit, + my_strntod_8bit, + my_strtoll10_8bit, + my_strntoull10rnd_8bit, + my_scan_8bit +}; + + + +CHARSET_INFO my_charset_utf8mb4_general_ci= +{ + 45,0,0, /* number */ + MY_CS_COMPILED|MY_CS_PRIMARY|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_UNICODE_SUPPLEMENT, /* state */ + MY_UTF8MB4, /* cs name */ + MY_UTF8MB4_GENERAL_CI,/* name */ + "UTF-8 Unicode", /* comment */ + NULL, /* tailoring */ + ctype_utf8mb4, /* ctype */ + to_lower_utf8mb4, /* to_lower */ + to_upper_utf8mb4, /* to_upper */ + to_upper_utf8mb4, /* sort_order */ + NULL, /* contractions */ + NULL, /* sort_order_big*/ + NULL, /* tab_to_uni */ + NULL, /* tab_from_uni */ + my_unicase_default, /* caseinfo */ + NULL, /* state_map */ + NULL, /* ident_map */ + 1, /* strxfrm_multiply */ + 1, /* caseup_multiply */ + 1, /* casedn_multiply */ + 1, /* mbminlen */ + 4, /* mbmaxlen */ + 0, /* min_sort_char */ + 0xFFFF, /* max_sort_char */ + ' ', /* pad char */ + 0, /* escape_with_backslash_is_dangerous */ + &my_charset_utf8mb4_handler, + &my_collation_utf8mb4_general_ci_handler +}; + + +CHARSET_INFO my_charset_utf8mb4_bin= +{ + 46,0,0, /* number */ + MY_CS_COMPILED|MY_CS_BINSORT|MY_CS_UNICODE|MY_CS_UNICODE_SUPPLEMENT, /* state */ + MY_UTF8MB4, /* cs name */ + MY_UTF8MB4_BIN, /* name */ + "UTF-8 Unicode", /* comment */ + NULL, /* tailoring */ + ctype_utf8mb4, /* ctype */ + to_lower_utf8mb4, /* to_lower */ + to_upper_utf8mb4, /* to_upper */ + NULL, /* sort_order */ + NULL, /* contractions */ + NULL, /* sort_order_big*/ + NULL, /* tab_to_uni */ + NULL, /* tab_from_uni */ + my_unicase_default, /* caseinfo */ + NULL, /* state_map */ + NULL, /* ident_map */ + 1, /* strxfrm_multiply */ + 1, /* caseup_multiply */ + 1, /* casedn_multiply */ + 1, /* mbminlen */ + 4, /* mbmaxlen */ + 0, /* min_sort_char */ + 0xFFFF, /* max_sort_char */ + ' ', /* pad char */ + 0, /* escape_with_backslash_is_dangerous */ + &my_charset_utf8mb4_handler, + &my_collation_utf8mb4_bin_handler +}; + +#endif /* HAVE_CHARSET_utf8mb4 */ |