diff options
author | Alexander Barkov <bar@mariadb.com> | 2021-10-22 18:41:35 +0400 |
---|---|---|
committer | Alexander Barkov <bar@mariadb.com> | 2021-10-27 06:09:57 +0400 |
commit | e97b785d764f85009412947600195001be01a706 (patch) | |
tree | 6ef5ec6254429167b78f7d4af2314e15630c6343 /sql/item.cc | |
parent | f9b856b0525dd9d82a9b42be690a26a295c73c64 (diff) | |
download | mariadb-git-e97b785d764f85009412947600195001be01a706.tar.gz |
MDEV-22380: Assertion `name.length == strlen(name.str)' failed ...bb-10.3-bar-MDEV-22380
Also fixes:
MDEV-25399 Assertion `name.length == strlen(name.str)' failed in Item_func_sp::make_send_field
Also fixes a problem that in this scenario:
SET NAMES binary;
SELECT 'some not well-formed utf8 string';
the auto-generated column name copied the binary string value directly
to the Item name, without checking utf8 well-formedness.
After this change auto-generated column names work as follows:
- Zero bytes 0x00 are copied to the name using HEX notation
- In case of "SET NAMES binary", all bytes sequences that do not make
well-formed utf8 characters are copied to the name using HEX notation.
Diffstat (limited to 'sql/item.cc')
-rw-r--r-- | sql/item.cc | 96 |
1 files changed, 76 insertions, 20 deletions
diff --git a/sql/item.cc b/sql/item.cc index c79e706d875..1319d5c4b62 100644 --- a/sql/item.cc +++ b/sql/item.cc @@ -1217,6 +1217,80 @@ bool Item::check_type_scalar(const char *opname) const } +extern "C" { + +/* + All values greater than MY_NAME_BINARY_VALUE are + interpreted as binary bytes. + The exact constant value does not matter, + but it must be greater than 0x10FFFF, + which is the maximum possible character in Unicode. +*/ +#define MY_NAME_BINARY_VALUE 0x200000 + +/* + Print all binary bytes as well as zero character U+0000 in hex notation. + Print other characters normally. +*/ +static int +my_wc_mb_item_name(CHARSET_INFO *cs, my_wc_t wc, uchar *str, uchar *end) +{ + if (wc == 0 || wc >= MY_NAME_BINARY_VALUE) + { + if (str + 4 >= end) + return MY_CS_TOOSMALL3; + str[0]= '\\'; + str[1]= 'x'; + str[2]= _dig_vec_upper[(uchar) (wc >> 4)]; + str[3]= _dig_vec_upper[(uchar) wc & 0x0F]; + return 4; + } + return my_charset_utf8_handler.wc_mb(cs, wc, str, end); +} + + +/* + Scan characters and mark all illegal sequences as binary byte values, + to have my_wc_mb_utf8_escape_name() print them using HEX notation. +*/ +static int +my_mb_wc_item_name(CHARSET_INFO *cs, my_wc_t *pwc, + const uchar *str, const uchar *end) +{ + int rc= cs->cset->mb_wc(cs, pwc, str, end); + if (rc == MY_CS_ILSEQ) + { + *pwc= MY_NAME_BINARY_VALUE + *str; + return 1; + } + return rc; +} + +} + + +static LEX_CSTRING +make_name(THD *thd, + const char *str, size_t length, CHARSET_INFO *cs, + size_t max_octet_length) +{ + uint errors; + size_t dst_nbytes= length * system_charset_info->mbmaxlen; + set_if_smaller(dst_nbytes, max_octet_length); + char *dst= (char*) thd->alloc(dst_nbytes + 1); + if (!dst) + return null_clex_str; + uint32 cnv_length= my_convert_using_func(dst, dst_nbytes, system_charset_info, + my_wc_mb_item_name, + str, length, + cs == &my_charset_bin ? + system_charset_info : cs, + my_mb_wc_item_name, &errors); + dst[cnv_length]= '\0'; + return Lex_cstring(dst, cnv_length); +} + + void Item::set_name(THD *thd, const char *str, size_t length, CHARSET_INFO *cs) { if (!length) @@ -1267,32 +1341,14 @@ void Item::set_name(THD *thd, const char *str, size_t length, CHARSET_INFO *cs) ER_REMOVED_SPACES, ER_THD(thd, ER_REMOVED_SPACES), buff); } - if (!my_charset_same(cs, system_charset_info)) - { - size_t res_length; - name.str= sql_strmake_with_convert(thd, str, length, cs, - MAX_ALIAS_NAME, system_charset_info, - &res_length); - name.length= res_length; - } - else - name.str= thd->strmake(str, (name.length= MY_MIN(length,MAX_ALIAS_NAME))); + name= make_name(thd, str, length, cs, MAX_ALIAS_NAME - 1); } void Item::set_name_no_truncate(THD *thd, const char *str, uint length, CHARSET_INFO *cs) { - if (!my_charset_same(cs, system_charset_info)) - { - size_t res_length; - name.str= sql_strmake_with_convert(thd, str, length, cs, - UINT_MAX, system_charset_info, - &res_length); - name.length= res_length; - } - else - name.str= thd->strmake(str, (name.length= length)); + name= make_name(thd, str, length, cs, UINT_MAX - 1); } |