diff options
author | Alexander Barkov <bar@mariadb.com> | 2022-03-12 15:38:44 +0400 |
---|---|---|
committer | Alexander Barkov <bar@mariadb.com> | 2022-03-14 14:42:59 +0400 |
commit | 03c3dc63655aabcfc309208188e44c200f680404 (patch) | |
tree | 208f97da028dcc135b0d6668e35ea956f1ea87d2 /sql/sql_class.h | |
parent | ed6e271f786504916dbcbd3d55ee17cd3f2566ef (diff) | |
download | mariadb-git-03c3dc63655aabcfc309208188e44c200f680404.tar.gz |
MDEV-23210 Assertion `(length % 4) == 0' failed in my_lengthsp_utf32 on ALTER TABLE, SELECT and INSERT
Problem:
Parse-time conversion from binary to tricky character sets like utf32
produced ill-formed strings. So, later a chash happened in debug builds,
or a wrong SHOW CREATE TABLE was returned in release builds.
Fix:
1. Backporting a few methods from 10.3:
- THD::check_string_for_wellformedness()
- THD::convert_string() overloads
- THD::make_text_string_connection()
2. Adding a new method THD::reinterpret_string_from_binary(),
which makes sure to either returns a well-formed string
(optionally prepending with zero bytes), or returns an error.
Diffstat (limited to 'sql/sql_class.h')
-rw-r--r-- | sql/sql_class.h | 63 |
1 files changed, 62 insertions, 1 deletions
diff --git a/sql/sql_class.h b/sql/sql_class.h index 3f0fba8fc10..a748def9b56 100644 --- a/sql/sql_class.h +++ b/sql/sql_class.h @@ -3503,8 +3503,31 @@ public: return true; // EOM } bool convert_string(LEX_STRING *to, CHARSET_INFO *to_cs, - const char *from, uint from_length, + const char *from, size_t from_length, CHARSET_INFO *from_cs); + bool reinterpret_string_from_binary(LEX_CSTRING *to, CHARSET_INFO *to_cs, + const char *from, size_t from_length); + bool convert_string(LEX_CSTRING *to, CHARSET_INFO *to_cs, + const char *from, size_t from_length, + CHARSET_INFO *from_cs) + { + LEX_STRING tmp; + bool rc= convert_string(&tmp, to_cs, from, from_length, from_cs); + to->str= tmp.str; + to->length= tmp.length; + return rc; + } + bool convert_string(LEX_CSTRING *to, CHARSET_INFO *tocs, + const LEX_CSTRING *from, CHARSET_INFO *fromcs, + bool simple_copy_is_possible) + { + if (!simple_copy_is_possible) + return unlikely(convert_string(to, tocs, from->str, from->length, fromcs)); + if (fromcs == &my_charset_bin) + return reinterpret_string_from_binary(to, tocs, from->str, from->length); + *to= *from; + return false; + } /* Convert a strings between character sets. Uses my_convert_fix(), which uses an mb_wc .. mc_mb loop internally. @@ -3540,6 +3563,44 @@ public: bool convert_string(String *s, CHARSET_INFO *from_cs, CHARSET_INFO *to_cs); + /* + Check if the string is wellformed, raise an error if not wellformed. + @param str - The string to check. + @param length - the string length. + */ + bool check_string_for_wellformedness(const char *str, + size_t length, + CHARSET_INFO *cs) const; + + bool make_text_string_connection(LEX_CSTRING *to, + const LEX_CSTRING *from) + { + return convert_string(to, variables.collation_connection, + from, charset(), charset_is_collation_connection); + } +#if MYSQL_VERSION_ID < 100300 + /* + A wrapper method for 10.2. It fixes the problem + that various fields in bison %union use LEX_STRING. + In 10.3 those fields are fixed to use LEX_CSTRING. + Please remove this wrapper when mering to 10.3. + */ + bool make_text_string_connection(LEX_STRING *to, + const LEX_STRING *from) + { + LEX_CSTRING cto; + LEX_CSTRING cfrom; + bool rc; + cfrom.str= from->str; + cfrom.length= from->length; + rc= make_text_string_connection(&cto, &cfrom); + to->str= (char*) cto.str; + to->length= cto.length; + return rc; + } +#else +#error Remove the above wrapper +#endif void add_changed_table(TABLE *table); void add_changed_table(const char *key, long key_length); CHANGED_TABLE_LIST * changed_table_dup(const char *key, long key_length); |