summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAlexander Barkov <bar@mariadb.com>2022-03-12 15:38:44 +0400
committerAlexander Barkov <bar@mariadb.com>2022-03-14 14:42:59 +0400
commit03c3dc63655aabcfc309208188e44c200f680404 (patch)
tree208f97da028dcc135b0d6668e35ea956f1ea87d2
parented6e271f786504916dbcbd3d55ee17cd3f2566ef (diff)
downloadmariadb-git-03c3dc63655aabcfc309208188e44c200f680404.tar.gz
MDEV-23210 Assertion `(length % 4) == 0' failed in my_lengthsp_utf32 on ALTER TABLE, SELECT and INSERT
Problem: Parse-time conversion from binary to tricky character sets like utf32 produced ill-formed strings. So, later a chash happened in debug builds, or a wrong SHOW CREATE TABLE was returned in release builds. Fix: 1. Backporting a few methods from 10.3: - THD::check_string_for_wellformedness() - THD::convert_string() overloads - THD::make_text_string_connection() 2. Adding a new method THD::reinterpret_string_from_binary(), which makes sure to either returns a well-formed string (optionally prepending with zero bytes), or returns an error.
-rw-r--r--mysql-test/r/ctype_utf32.result23
-rw-r--r--mysql-test/r/ctype_utf32_uca.result15
-rw-r--r--mysql-test/t/ctype_utf32.test19
-rw-r--r--mysql-test/t/ctype_utf32_uca.test13
-rw-r--r--sql/sql_class.cc69
-rw-r--r--sql/sql_class.h63
-rw-r--r--sql/sql_yacc.yy10
7 files changed, 202 insertions, 10 deletions
diff --git a/mysql-test/r/ctype_utf32.result b/mysql-test/r/ctype_utf32.result
index 584ca12f8c3..143fff9e419 100644
--- a/mysql-test/r/ctype_utf32.result
+++ b/mysql-test/r/ctype_utf32.result
@@ -2890,5 +2890,28 @@ HEX(c1)
0000006100000063
DROP TABLE t1;
#
+# MDEV-23210 Assertion `(length % 4) == 0' failed in my_lengthsp_utf32 on ALTER TABLE, SELECT and INSERT
+#
+CREATE TABLE t1 (a CHAR(1));
+SET COLLATION_CONNECTION=utf32_general_ci, CHARACTER_SET_CLIENT=binary;
+ALTER TABLE t1 CHANGE a a ENUM('a','a') CHARACTER SET utf32;
+ERROR HY000: Column 'a' has duplicated value 'a' in ENUM
+ALTER TABLE t1 CHANGE a a ENUM('aaa') CHARACTER SET utf32;
+ERROR HY000: Invalid utf32 character string: '\x00aaa'
+ALTER TABLE t1 CHANGE a a ENUM('aa') CHARACTER SET utf32;
+SHOW CREATE TABLE t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `a` enum('慡') CHARACTER SET utf32 DEFAULT NULL
+) ENGINE=MyISAM DEFAULT CHARSET=latin1
+ALTER TABLE t1 CHANGE a a ENUM('a','b') CHARACTER SET utf32;
+SHOW CREATE TABLE t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `a` enum('a','b') CHARACTER SET utf32 DEFAULT NULL
+) ENGINE=MyISAM DEFAULT CHARSET=latin1
+DROP TABLE t1;
+SET NAMES utf8;
+#
# End of 10.2 tests
#
diff --git a/mysql-test/r/ctype_utf32_uca.result b/mysql-test/r/ctype_utf32_uca.result
index 46ca6e7baee..2f6e44dc402 100644
--- a/mysql-test/r/ctype_utf32_uca.result
+++ b/mysql-test/r/ctype_utf32_uca.result
@@ -7941,5 +7941,20 @@ EXECUTE s;
DEALLOCATE PREPARE s;
SET NAMES utf8;
#
+# MDEV-23210 Assertion `(length % 4) == 0' failed in my_lengthsp_utf32 on ALTER TABLE, SELECT and INSERT
+#
+CREATE TABLE t1 (a CHAR(1));
+SET COLLATION_CONNECTION=utf32_myanmar_ci, CHARACTER_SET_CLIENT=binary;
+ALTER TABLE t1 CHANGE a a ENUM('a','a') CHARACTER SET utf32;
+ERROR HY000: Column 'a' has duplicated value 'a' in ENUM
+ALTER TABLE t1 CHANGE a a ENUM('a','b') CHARACTER SET utf32;
+SHOW CREATE TABLE t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `a` enum('a','b') CHARACTER SET utf32 DEFAULT NULL
+) ENGINE=MyISAM DEFAULT CHARSET=latin1
+DROP TABLE t1;
+SET NAMES utf8;
+#
# End of 10.2 tests
#
diff --git a/mysql-test/t/ctype_utf32.test b/mysql-test/t/ctype_utf32.test
index 891fd14d15f..46ff333b5f7 100644
--- a/mysql-test/t/ctype_utf32.test
+++ b/mysql-test/t/ctype_utf32.test
@@ -1048,6 +1048,25 @@ INSERT INTO t1 (c1) VALUES (1),(2),(3);
SELECT HEX(c1) FROM t1 ORDER BY c1;
DROP TABLE t1;
+
+--echo #
+--echo # MDEV-23210 Assertion `(length % 4) == 0' failed in my_lengthsp_utf32 on ALTER TABLE, SELECT and INSERT
+--echo #
+
+CREATE TABLE t1 (a CHAR(1));
+SET COLLATION_CONNECTION=utf32_general_ci, CHARACTER_SET_CLIENT=binary;
+--error ER_DUPLICATED_VALUE_IN_TYPE
+ALTER TABLE t1 CHANGE a a ENUM('a','a') CHARACTER SET utf32;
+--error ER_INVALID_CHARACTER_STRING
+ALTER TABLE t1 CHANGE a a ENUM('aaa') CHARACTER SET utf32;
+ALTER TABLE t1 CHANGE a a ENUM('aa') CHARACTER SET utf32;
+SHOW CREATE TABLE t1;
+ALTER TABLE t1 CHANGE a a ENUM('a','b') CHARACTER SET utf32;
+SHOW CREATE TABLE t1;
+DROP TABLE t1;
+SET NAMES utf8;
+
+
--echo #
--echo # End of 10.2 tests
--echo #
diff --git a/mysql-test/t/ctype_utf32_uca.test b/mysql-test/t/ctype_utf32_uca.test
index 2969480b0ef..9073d8c57f5 100644
--- a/mysql-test/t/ctype_utf32_uca.test
+++ b/mysql-test/t/ctype_utf32_uca.test
@@ -290,6 +290,19 @@ EXECUTE s;
DEALLOCATE PREPARE s;
SET NAMES utf8;
+--echo #
+--echo # MDEV-23210 Assertion `(length % 4) == 0' failed in my_lengthsp_utf32 on ALTER TABLE, SELECT and INSERT
+--echo #
+
+CREATE TABLE t1 (a CHAR(1));
+SET COLLATION_CONNECTION=utf32_myanmar_ci, CHARACTER_SET_CLIENT=binary;
+--error ER_DUPLICATED_VALUE_IN_TYPE
+ALTER TABLE t1 CHANGE a a ENUM('a','a') CHARACTER SET utf32;
+ALTER TABLE t1 CHANGE a a ENUM('a','b') CHARACTER SET utf32;
+SHOW CREATE TABLE t1;
+DROP TABLE t1;
+SET NAMES utf8;
+
--echo #
--echo # End of 10.2 tests
diff --git a/sql/sql_class.cc b/sql/sql_class.cc
index 479578679f1..4edf573e596 100644
--- a/sql/sql_class.cc
+++ b/sql/sql_class.cc
@@ -2148,7 +2148,7 @@ void THD::cleanup_after_query()
*/
bool THD::convert_string(LEX_STRING *to, CHARSET_INFO *to_cs,
- const char *from, uint from_length,
+ const char *from, size_t from_length,
CHARSET_INFO *from_cs)
{
DBUG_ENTER("THD::convert_string");
@@ -2171,6 +2171,58 @@ bool THD::convert_string(LEX_STRING *to, CHARSET_INFO *to_cs,
/*
+ Reinterpret a binary string to a character string
+
+ @param[OUT] to The result will be written here,
+ either the original string as is,
+ or a newly alloced fixed string with
+ some zero bytes prepended.
+ @param cs The destination character set
+ @param str The binary string
+ @param length The length of the binary string
+
+ @return false on success
+ @return true on error
+*/
+
+bool THD::reinterpret_string_from_binary(LEX_CSTRING *to, CHARSET_INFO *cs,
+ const char *str, size_t length)
+{
+ /*
+ When reinterpreting from binary to tricky character sets like
+ UCS2, UTF16, UTF32, we may need to prepend some zero bytes.
+ This is possible in scenarios like this:
+ SET COLLATION_CONNECTION=utf32_general_ci, CHARACTER_SET_CLIENT=binary;
+ This code is similar to String::copy_aligned().
+ */
+ size_t incomplete= length % cs->mbminlen; // Bytes in an incomplete character
+ if (incomplete)
+ {
+ size_t zeros= cs->mbminlen - incomplete;
+ size_t aligned_length= zeros + length;
+ char *dst= (char*) alloc(aligned_length + 1);
+ if (!dst)
+ {
+ to->str= NULL; // Safety
+ to->length= 0;
+ return true;
+ }
+ bzero(dst, zeros);
+ memcpy(dst + zeros, str, length);
+ dst[aligned_length]= '\0';
+ to->str= dst;
+ to->length= aligned_length;
+ }
+ else
+ {
+ to->str= str;
+ to->length= length;
+ }
+ return check_string_for_wellformedness(to->str, to->length, cs);
+}
+
+
+/*
Convert a string between two character sets.
dstcs and srccs cannot be &my_charset_bin.
*/
@@ -2274,6 +2326,21 @@ bool THD::convert_string(String *s, CHARSET_INFO *from_cs, CHARSET_INFO *to_cs)
}
+bool THD::check_string_for_wellformedness(const char *str,
+ size_t length,
+ CHARSET_INFO *cs) const
+{
+ size_t wlen= Well_formed_prefix(cs, str, length).length();
+ if (wlen < length)
+ {
+ ErrConvString err(str, length, &my_charset_bin);
+ my_error(ER_INVALID_CHARACTER_STRING, MYF(0), cs->csname, err.ptr());
+ return true;
+ }
+ return false;
+}
+
+
/*
Update some cache variables when character set changes
*/
diff --git a/sql/sql_class.h b/sql/sql_class.h
index 3f0fba8fc10..a748def9b56 100644
--- a/sql/sql_class.h
+++ b/sql/sql_class.h
@@ -3503,8 +3503,31 @@ public:
return true; // EOM
}
bool convert_string(LEX_STRING *to, CHARSET_INFO *to_cs,
- const char *from, uint from_length,
+ const char *from, size_t from_length,
CHARSET_INFO *from_cs);
+ bool reinterpret_string_from_binary(LEX_CSTRING *to, CHARSET_INFO *to_cs,
+ const char *from, size_t from_length);
+ bool convert_string(LEX_CSTRING *to, CHARSET_INFO *to_cs,
+ const char *from, size_t from_length,
+ CHARSET_INFO *from_cs)
+ {
+ LEX_STRING tmp;
+ bool rc= convert_string(&tmp, to_cs, from, from_length, from_cs);
+ to->str= tmp.str;
+ to->length= tmp.length;
+ return rc;
+ }
+ bool convert_string(LEX_CSTRING *to, CHARSET_INFO *tocs,
+ const LEX_CSTRING *from, CHARSET_INFO *fromcs,
+ bool simple_copy_is_possible)
+ {
+ if (!simple_copy_is_possible)
+ return unlikely(convert_string(to, tocs, from->str, from->length, fromcs));
+ if (fromcs == &my_charset_bin)
+ return reinterpret_string_from_binary(to, tocs, from->str, from->length);
+ *to= *from;
+ return false;
+ }
/*
Convert a strings between character sets.
Uses my_convert_fix(), which uses an mb_wc .. mc_mb loop internally.
@@ -3540,6 +3563,44 @@ public:
bool convert_string(String *s, CHARSET_INFO *from_cs, CHARSET_INFO *to_cs);
+ /*
+ Check if the string is wellformed, raise an error if not wellformed.
+ @param str - The string to check.
+ @param length - the string length.
+ */
+ bool check_string_for_wellformedness(const char *str,
+ size_t length,
+ CHARSET_INFO *cs) const;
+
+ bool make_text_string_connection(LEX_CSTRING *to,
+ const LEX_CSTRING *from)
+ {
+ return convert_string(to, variables.collation_connection,
+ from, charset(), charset_is_collation_connection);
+ }
+#if MYSQL_VERSION_ID < 100300
+ /*
+ A wrapper method for 10.2. It fixes the problem
+ that various fields in bison %union use LEX_STRING.
+ In 10.3 those fields are fixed to use LEX_CSTRING.
+ Please remove this wrapper when mering to 10.3.
+ */
+ bool make_text_string_connection(LEX_STRING *to,
+ const LEX_STRING *from)
+ {
+ LEX_CSTRING cto;
+ LEX_CSTRING cfrom;
+ bool rc;
+ cfrom.str= from->str;
+ cfrom.length= from->length;
+ rc= make_text_string_connection(&cto, &cfrom);
+ to->str= (char*) cto.str;
+ to->length= cto.length;
+ return rc;
+ }
+#else
+#error Remove the above wrapper
+#endif
void add_changed_table(TABLE *table);
void add_changed_table(const char *key, long key_length);
CHANGED_TABLE_LIST * changed_table_dup(const char *key, long key_length);
diff --git a/sql/sql_yacc.yy b/sql/sql_yacc.yy
index 6f3274aced5..34f37efafdb 100644
--- a/sql/sql_yacc.yy
+++ b/sql/sql_yacc.yy
@@ -14571,14 +14571,8 @@ TEXT_STRING_sys:
TEXT_STRING_literal:
TEXT_STRING
{
- if (thd->charset_is_collation_connection)
- $$= $1;
- else
- {
- if (thd->convert_string(&$$, thd->variables.collation_connection,
- $1.str, $1.length, thd->charset()))
- MYSQL_YYABORT;
- }
+ if (thd->make_text_string_connection(&$$, &$1))
+ MYSQL_YYABORT;
}
;