summaryrefslogtreecommitdiff
path: root/sql/item.cc
diff options
context:
space:
mode:
authorAlexander Barkov <bar@mariadb.com>2021-10-22 18:41:35 +0400
committerAlexander Barkov <bar@mariadb.com>2021-10-27 06:09:57 +0400
commite97b785d764f85009412947600195001be01a706 (patch)
tree6ef5ec6254429167b78f7d4af2314e15630c6343 /sql/item.cc
parentf9b856b0525dd9d82a9b42be690a26a295c73c64 (diff)
downloadmariadb-git-e97b785d764f85009412947600195001be01a706.tar.gz
MDEV-22380: Assertion `name.length == strlen(name.str)' failed ...bb-10.3-bar-MDEV-22380
Also fixes: MDEV-25399 Assertion `name.length == strlen(name.str)' failed in Item_func_sp::make_send_field Also fixes a problem that in this scenario: SET NAMES binary; SELECT 'some not well-formed utf8 string'; the auto-generated column name copied the binary string value directly to the Item name, without checking utf8 well-formedness. After this change auto-generated column names work as follows: - Zero bytes 0x00 are copied to the name using HEX notation - In case of "SET NAMES binary", all bytes sequences that do not make well-formed utf8 characters are copied to the name using HEX notation.
Diffstat (limited to 'sql/item.cc')
-rw-r--r--sql/item.cc96
1 files changed, 76 insertions, 20 deletions
diff --git a/sql/item.cc b/sql/item.cc
index c79e706d875..1319d5c4b62 100644
--- a/sql/item.cc
+++ b/sql/item.cc
@@ -1217,6 +1217,80 @@ bool Item::check_type_scalar(const char *opname) const
}
+extern "C" {
+
+/*
+ All values greater than MY_NAME_BINARY_VALUE are
+ interpreted as binary bytes.
+ The exact constant value does not matter,
+ but it must be greater than 0x10FFFF,
+ which is the maximum possible character in Unicode.
+*/
+#define MY_NAME_BINARY_VALUE 0x200000
+
+/*
+ Print all binary bytes as well as zero character U+0000 in hex notation.
+ Print other characters normally.
+*/
+static int
+my_wc_mb_item_name(CHARSET_INFO *cs, my_wc_t wc, uchar *str, uchar *end)
+{
+ if (wc == 0 || wc >= MY_NAME_BINARY_VALUE)
+ {
+ if (str + 4 >= end)
+ return MY_CS_TOOSMALL3;
+ str[0]= '\\';
+ str[1]= 'x';
+ str[2]= _dig_vec_upper[(uchar) (wc >> 4)];
+ str[3]= _dig_vec_upper[(uchar) wc & 0x0F];
+ return 4;
+ }
+ return my_charset_utf8_handler.wc_mb(cs, wc, str, end);
+}
+
+
+/*
+ Scan characters and mark all illegal sequences as binary byte values,
+ to have my_wc_mb_utf8_escape_name() print them using HEX notation.
+*/
+static int
+my_mb_wc_item_name(CHARSET_INFO *cs, my_wc_t *pwc,
+ const uchar *str, const uchar *end)
+{
+ int rc= cs->cset->mb_wc(cs, pwc, str, end);
+ if (rc == MY_CS_ILSEQ)
+ {
+ *pwc= MY_NAME_BINARY_VALUE + *str;
+ return 1;
+ }
+ return rc;
+}
+
+}
+
+
+static LEX_CSTRING
+make_name(THD *thd,
+ const char *str, size_t length, CHARSET_INFO *cs,
+ size_t max_octet_length)
+{
+ uint errors;
+ size_t dst_nbytes= length * system_charset_info->mbmaxlen;
+ set_if_smaller(dst_nbytes, max_octet_length);
+ char *dst= (char*) thd->alloc(dst_nbytes + 1);
+ if (!dst)
+ return null_clex_str;
+ uint32 cnv_length= my_convert_using_func(dst, dst_nbytes, system_charset_info,
+ my_wc_mb_item_name,
+ str, length,
+ cs == &my_charset_bin ?
+ system_charset_info : cs,
+ my_mb_wc_item_name, &errors);
+ dst[cnv_length]= '\0';
+ return Lex_cstring(dst, cnv_length);
+}
+
+
void Item::set_name(THD *thd, const char *str, size_t length, CHARSET_INFO *cs)
{
if (!length)
@@ -1267,32 +1341,14 @@ void Item::set_name(THD *thd, const char *str, size_t length, CHARSET_INFO *cs)
ER_REMOVED_SPACES, ER_THD(thd, ER_REMOVED_SPACES),
buff);
}
- if (!my_charset_same(cs, system_charset_info))
- {
- size_t res_length;
- name.str= sql_strmake_with_convert(thd, str, length, cs,
- MAX_ALIAS_NAME, system_charset_info,
- &res_length);
- name.length= res_length;
- }
- else
- name.str= thd->strmake(str, (name.length= MY_MIN(length,MAX_ALIAS_NAME)));
+ name= make_name(thd, str, length, cs, MAX_ALIAS_NAME - 1);
}
void Item::set_name_no_truncate(THD *thd, const char *str, uint length,
CHARSET_INFO *cs)
{
- if (!my_charset_same(cs, system_charset_info))
- {
- size_t res_length;
- name.str= sql_strmake_with_convert(thd, str, length, cs,
- UINT_MAX, system_charset_info,
- &res_length);
- name.length= res_length;
- }
- else
- name.str= thd->strmake(str, (name.length= length));
+ name= make_name(thd, str, length, cs, UINT_MAX - 1);
}