diff options
author | Alexander Barkov <bar@mariadb.com> | 2022-03-12 15:38:44 +0400 |
---|---|---|
committer | Alexander Barkov <bar@mariadb.com> | 2022-03-14 14:42:59 +0400 |
commit | 03c3dc63655aabcfc309208188e44c200f680404 (patch) | |
tree | 208f97da028dcc135b0d6668e35ea956f1ea87d2 | |
parent | ed6e271f786504916dbcbd3d55ee17cd3f2566ef (diff) | |
download | mariadb-git-03c3dc63655aabcfc309208188e44c200f680404.tar.gz |
MDEV-23210 Assertion `(length % 4) == 0' failed in my_lengthsp_utf32 on ALTER TABLE, SELECT and INSERT
Problem:
Parse-time conversion from binary to tricky character sets like utf32
produced ill-formed strings. So, later a chash happened in debug builds,
or a wrong SHOW CREATE TABLE was returned in release builds.
Fix:
1. Backporting a few methods from 10.3:
- THD::check_string_for_wellformedness()
- THD::convert_string() overloads
- THD::make_text_string_connection()
2. Adding a new method THD::reinterpret_string_from_binary(),
which makes sure to either returns a well-formed string
(optionally prepending with zero bytes), or returns an error.
-rw-r--r-- | mysql-test/r/ctype_utf32.result | 23 | ||||
-rw-r--r-- | mysql-test/r/ctype_utf32_uca.result | 15 | ||||
-rw-r--r-- | mysql-test/t/ctype_utf32.test | 19 | ||||
-rw-r--r-- | mysql-test/t/ctype_utf32_uca.test | 13 | ||||
-rw-r--r-- | sql/sql_class.cc | 69 | ||||
-rw-r--r-- | sql/sql_class.h | 63 | ||||
-rw-r--r-- | sql/sql_yacc.yy | 10 |
7 files changed, 202 insertions, 10 deletions
diff --git a/mysql-test/r/ctype_utf32.result b/mysql-test/r/ctype_utf32.result index 584ca12f8c3..143fff9e419 100644 --- a/mysql-test/r/ctype_utf32.result +++ b/mysql-test/r/ctype_utf32.result @@ -2890,5 +2890,28 @@ HEX(c1) 0000006100000063 DROP TABLE t1; # +# MDEV-23210 Assertion `(length % 4) == 0' failed in my_lengthsp_utf32 on ALTER TABLE, SELECT and INSERT +# +CREATE TABLE t1 (a CHAR(1)); +SET COLLATION_CONNECTION=utf32_general_ci, CHARACTER_SET_CLIENT=binary; +ALTER TABLE t1 CHANGE a a ENUM('a','a') CHARACTER SET utf32; +ERROR HY000: Column 'a' has duplicated value 'a' in ENUM +ALTER TABLE t1 CHANGE a a ENUM('aaa') CHARACTER SET utf32; +ERROR HY000: Invalid utf32 character string: '\x00aaa' +ALTER TABLE t1 CHANGE a a ENUM('aa') CHARACTER SET utf32; +SHOW CREATE TABLE t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `a` enum('慡') CHARACTER SET utf32 DEFAULT NULL +) ENGINE=MyISAM DEFAULT CHARSET=latin1 +ALTER TABLE t1 CHANGE a a ENUM('a','b') CHARACTER SET utf32; +SHOW CREATE TABLE t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `a` enum('a','b') CHARACTER SET utf32 DEFAULT NULL +) ENGINE=MyISAM DEFAULT CHARSET=latin1 +DROP TABLE t1; +SET NAMES utf8; +# # End of 10.2 tests # diff --git a/mysql-test/r/ctype_utf32_uca.result b/mysql-test/r/ctype_utf32_uca.result index 46ca6e7baee..2f6e44dc402 100644 --- a/mysql-test/r/ctype_utf32_uca.result +++ b/mysql-test/r/ctype_utf32_uca.result @@ -7941,5 +7941,20 @@ EXECUTE s; DEALLOCATE PREPARE s; SET NAMES utf8; # +# MDEV-23210 Assertion `(length % 4) == 0' failed in my_lengthsp_utf32 on ALTER TABLE, SELECT and INSERT +# +CREATE TABLE t1 (a CHAR(1)); +SET COLLATION_CONNECTION=utf32_myanmar_ci, CHARACTER_SET_CLIENT=binary; +ALTER TABLE t1 CHANGE a a ENUM('a','a') CHARACTER SET utf32; +ERROR HY000: Column 'a' has duplicated value 'a' in ENUM +ALTER TABLE t1 CHANGE a a ENUM('a','b') CHARACTER SET utf32; +SHOW CREATE TABLE t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `a` enum('a','b') CHARACTER SET utf32 DEFAULT NULL +) ENGINE=MyISAM DEFAULT CHARSET=latin1 +DROP TABLE t1; +SET NAMES utf8; +# # End of 10.2 tests # diff --git a/mysql-test/t/ctype_utf32.test b/mysql-test/t/ctype_utf32.test index 891fd14d15f..46ff333b5f7 100644 --- a/mysql-test/t/ctype_utf32.test +++ b/mysql-test/t/ctype_utf32.test @@ -1048,6 +1048,25 @@ INSERT INTO t1 (c1) VALUES (1),(2),(3); SELECT HEX(c1) FROM t1 ORDER BY c1; DROP TABLE t1; + +--echo # +--echo # MDEV-23210 Assertion `(length % 4) == 0' failed in my_lengthsp_utf32 on ALTER TABLE, SELECT and INSERT +--echo # + +CREATE TABLE t1 (a CHAR(1)); +SET COLLATION_CONNECTION=utf32_general_ci, CHARACTER_SET_CLIENT=binary; +--error ER_DUPLICATED_VALUE_IN_TYPE +ALTER TABLE t1 CHANGE a a ENUM('a','a') CHARACTER SET utf32; +--error ER_INVALID_CHARACTER_STRING +ALTER TABLE t1 CHANGE a a ENUM('aaa') CHARACTER SET utf32; +ALTER TABLE t1 CHANGE a a ENUM('aa') CHARACTER SET utf32; +SHOW CREATE TABLE t1; +ALTER TABLE t1 CHANGE a a ENUM('a','b') CHARACTER SET utf32; +SHOW CREATE TABLE t1; +DROP TABLE t1; +SET NAMES utf8; + + --echo # --echo # End of 10.2 tests --echo # diff --git a/mysql-test/t/ctype_utf32_uca.test b/mysql-test/t/ctype_utf32_uca.test index 2969480b0ef..9073d8c57f5 100644 --- a/mysql-test/t/ctype_utf32_uca.test +++ b/mysql-test/t/ctype_utf32_uca.test @@ -290,6 +290,19 @@ EXECUTE s; DEALLOCATE PREPARE s; SET NAMES utf8; +--echo # +--echo # MDEV-23210 Assertion `(length % 4) == 0' failed in my_lengthsp_utf32 on ALTER TABLE, SELECT and INSERT +--echo # + +CREATE TABLE t1 (a CHAR(1)); +SET COLLATION_CONNECTION=utf32_myanmar_ci, CHARACTER_SET_CLIENT=binary; +--error ER_DUPLICATED_VALUE_IN_TYPE +ALTER TABLE t1 CHANGE a a ENUM('a','a') CHARACTER SET utf32; +ALTER TABLE t1 CHANGE a a ENUM('a','b') CHARACTER SET utf32; +SHOW CREATE TABLE t1; +DROP TABLE t1; +SET NAMES utf8; + --echo # --echo # End of 10.2 tests diff --git a/sql/sql_class.cc b/sql/sql_class.cc index 479578679f1..4edf573e596 100644 --- a/sql/sql_class.cc +++ b/sql/sql_class.cc @@ -2148,7 +2148,7 @@ void THD::cleanup_after_query() */ bool THD::convert_string(LEX_STRING *to, CHARSET_INFO *to_cs, - const char *from, uint from_length, + const char *from, size_t from_length, CHARSET_INFO *from_cs) { DBUG_ENTER("THD::convert_string"); @@ -2171,6 +2171,58 @@ bool THD::convert_string(LEX_STRING *to, CHARSET_INFO *to_cs, /* + Reinterpret a binary string to a character string + + @param[OUT] to The result will be written here, + either the original string as is, + or a newly alloced fixed string with + some zero bytes prepended. + @param cs The destination character set + @param str The binary string + @param length The length of the binary string + + @return false on success + @return true on error +*/ + +bool THD::reinterpret_string_from_binary(LEX_CSTRING *to, CHARSET_INFO *cs, + const char *str, size_t length) +{ + /* + When reinterpreting from binary to tricky character sets like + UCS2, UTF16, UTF32, we may need to prepend some zero bytes. + This is possible in scenarios like this: + SET COLLATION_CONNECTION=utf32_general_ci, CHARACTER_SET_CLIENT=binary; + This code is similar to String::copy_aligned(). + */ + size_t incomplete= length % cs->mbminlen; // Bytes in an incomplete character + if (incomplete) + { + size_t zeros= cs->mbminlen - incomplete; + size_t aligned_length= zeros + length; + char *dst= (char*) alloc(aligned_length + 1); + if (!dst) + { + to->str= NULL; // Safety + to->length= 0; + return true; + } + bzero(dst, zeros); + memcpy(dst + zeros, str, length); + dst[aligned_length]= '\0'; + to->str= dst; + to->length= aligned_length; + } + else + { + to->str= str; + to->length= length; + } + return check_string_for_wellformedness(to->str, to->length, cs); +} + + +/* Convert a string between two character sets. dstcs and srccs cannot be &my_charset_bin. */ @@ -2274,6 +2326,21 @@ bool THD::convert_string(String *s, CHARSET_INFO *from_cs, CHARSET_INFO *to_cs) } +bool THD::check_string_for_wellformedness(const char *str, + size_t length, + CHARSET_INFO *cs) const +{ + size_t wlen= Well_formed_prefix(cs, str, length).length(); + if (wlen < length) + { + ErrConvString err(str, length, &my_charset_bin); + my_error(ER_INVALID_CHARACTER_STRING, MYF(0), cs->csname, err.ptr()); + return true; + } + return false; +} + + /* Update some cache variables when character set changes */ diff --git a/sql/sql_class.h b/sql/sql_class.h index 3f0fba8fc10..a748def9b56 100644 --- a/sql/sql_class.h +++ b/sql/sql_class.h @@ -3503,8 +3503,31 @@ public: return true; // EOM } bool convert_string(LEX_STRING *to, CHARSET_INFO *to_cs, - const char *from, uint from_length, + const char *from, size_t from_length, CHARSET_INFO *from_cs); + bool reinterpret_string_from_binary(LEX_CSTRING *to, CHARSET_INFO *to_cs, + const char *from, size_t from_length); + bool convert_string(LEX_CSTRING *to, CHARSET_INFO *to_cs, + const char *from, size_t from_length, + CHARSET_INFO *from_cs) + { + LEX_STRING tmp; + bool rc= convert_string(&tmp, to_cs, from, from_length, from_cs); + to->str= tmp.str; + to->length= tmp.length; + return rc; + } + bool convert_string(LEX_CSTRING *to, CHARSET_INFO *tocs, + const LEX_CSTRING *from, CHARSET_INFO *fromcs, + bool simple_copy_is_possible) + { + if (!simple_copy_is_possible) + return unlikely(convert_string(to, tocs, from->str, from->length, fromcs)); + if (fromcs == &my_charset_bin) + return reinterpret_string_from_binary(to, tocs, from->str, from->length); + *to= *from; + return false; + } /* Convert a strings between character sets. Uses my_convert_fix(), which uses an mb_wc .. mc_mb loop internally. @@ -3540,6 +3563,44 @@ public: bool convert_string(String *s, CHARSET_INFO *from_cs, CHARSET_INFO *to_cs); + /* + Check if the string is wellformed, raise an error if not wellformed. + @param str - The string to check. + @param length - the string length. + */ + bool check_string_for_wellformedness(const char *str, + size_t length, + CHARSET_INFO *cs) const; + + bool make_text_string_connection(LEX_CSTRING *to, + const LEX_CSTRING *from) + { + return convert_string(to, variables.collation_connection, + from, charset(), charset_is_collation_connection); + } +#if MYSQL_VERSION_ID < 100300 + /* + A wrapper method for 10.2. It fixes the problem + that various fields in bison %union use LEX_STRING. + In 10.3 those fields are fixed to use LEX_CSTRING. + Please remove this wrapper when mering to 10.3. + */ + bool make_text_string_connection(LEX_STRING *to, + const LEX_STRING *from) + { + LEX_CSTRING cto; + LEX_CSTRING cfrom; + bool rc; + cfrom.str= from->str; + cfrom.length= from->length; + rc= make_text_string_connection(&cto, &cfrom); + to->str= (char*) cto.str; + to->length= cto.length; + return rc; + } +#else +#error Remove the above wrapper +#endif void add_changed_table(TABLE *table); void add_changed_table(const char *key, long key_length); CHANGED_TABLE_LIST * changed_table_dup(const char *key, long key_length); diff --git a/sql/sql_yacc.yy b/sql/sql_yacc.yy index 6f3274aced5..34f37efafdb 100644 --- a/sql/sql_yacc.yy +++ b/sql/sql_yacc.yy @@ -14571,14 +14571,8 @@ TEXT_STRING_sys: TEXT_STRING_literal: TEXT_STRING { - if (thd->charset_is_collation_connection) - $$= $1; - else - { - if (thd->convert_string(&$$, thd->variables.collation_connection, - $1.str, $1.length, thd->charset())) - MYSQL_YYABORT; - } + if (thd->make_text_string_connection(&$$, &$1)) + MYSQL_YYABORT; } ; |