diff options
author | Alexander Barkov <bar@mariadb.com> | 2021-10-27 07:14:45 +0400 |
---|---|---|
committer | Alexander Barkov <bar@mariadb.com> | 2021-10-27 07:14:45 +0400 |
commit | 7b75242918518faea0e50207bde2052168a28897 (patch) | |
tree | 7657486877261d8bb7ac4a159d8b73c1e78fb8a0 /sql/item.cc | |
parent | d74d95961a31b47986d943216489513896108782 (diff) | |
parent | e97b785d764f85009412947600195001be01a706 (diff) | |
download | mariadb-git-7b75242918518faea0e50207bde2052168a28897.tar.gz |
Merge remote-tracking branch 'origin/10.3' into 10.4
Diffstat (limited to 'sql/item.cc')
-rw-r--r-- | sql/item.cc | 96 |
1 files changed, 76 insertions, 20 deletions
diff --git a/sql/item.cc b/sql/item.cc index 7a00af71b2f..c45212af617 100644 --- a/sql/item.cc +++ b/sql/item.cc @@ -1096,6 +1096,80 @@ bool Item::check_type_scalar(const char *opname) const } +extern "C" { + +/* + All values greater than MY_NAME_BINARY_VALUE are + interpreted as binary bytes. + The exact constant value does not matter, + but it must be greater than 0x10FFFF, + which is the maximum possible character in Unicode. +*/ +#define MY_NAME_BINARY_VALUE 0x200000 + +/* + Print all binary bytes as well as zero character U+0000 in hex notation. + Print other characters normally. +*/ +static int +my_wc_mb_item_name(CHARSET_INFO *cs, my_wc_t wc, uchar *str, uchar *end) +{ + if (wc == 0 || wc >= MY_NAME_BINARY_VALUE) + { + if (str + 4 >= end) + return MY_CS_TOOSMALL3; + str[0]= '\\'; + str[1]= 'x'; + str[2]= _dig_vec_upper[(uchar) (wc >> 4)]; + str[3]= _dig_vec_upper[(uchar) wc & 0x0F]; + return 4; + } + return my_charset_utf8_handler.wc_mb(cs, wc, str, end); +} + + +/* + Scan characters and mark all illegal sequences as binary byte values, + to have my_wc_mb_utf8_escape_name() print them using HEX notation. +*/ +static int +my_mb_wc_item_name(CHARSET_INFO *cs, my_wc_t *pwc, + const uchar *str, const uchar *end) +{ + int rc= cs->cset->mb_wc(cs, pwc, str, end); + if (rc == MY_CS_ILSEQ) + { + *pwc= MY_NAME_BINARY_VALUE + *str; + return 1; + } + return rc; +} + +} + + +static LEX_CSTRING +make_name(THD *thd, + const char *str, size_t length, CHARSET_INFO *cs, + size_t max_octet_length) +{ + uint errors; + size_t dst_nbytes= length * system_charset_info->mbmaxlen; + set_if_smaller(dst_nbytes, max_octet_length); + char *dst= (char*) thd->alloc(dst_nbytes + 1); + if (!dst) + return null_clex_str; + uint32 cnv_length= my_convert_using_func(dst, dst_nbytes, system_charset_info, + my_wc_mb_item_name, + str, length, + cs == &my_charset_bin ? + system_charset_info : cs, + my_mb_wc_item_name, &errors); + dst[cnv_length]= '\0'; + return Lex_cstring(dst, cnv_length); +} + + void Item::set_name(THD *thd, const char *str, size_t length, CHARSET_INFO *cs) { if (!length) @@ -1146,32 +1220,14 @@ void Item::set_name(THD *thd, const char *str, size_t length, CHARSET_INFO *cs) ER_REMOVED_SPACES, ER_THD(thd, ER_REMOVED_SPACES), buff); } - if (!my_charset_same(cs, system_charset_info)) - { - size_t res_length; - name.str= sql_strmake_with_convert(thd, str, length, cs, - MAX_ALIAS_NAME, system_charset_info, - &res_length); - name.length= res_length; - } - else - name.str= thd->strmake(str, (name.length= MY_MIN(length,MAX_ALIAS_NAME))); + name= make_name(thd, str, length, cs, MAX_ALIAS_NAME - 1); } void Item::set_name_no_truncate(THD *thd, const char *str, uint length, CHARSET_INFO *cs) { - if (!my_charset_same(cs, system_charset_info)) - { - size_t res_length; - name.str= sql_strmake_with_convert(thd, str, length, cs, - UINT_MAX, system_charset_info, - &res_length); - name.length= res_length; - } - else - name.str= thd->strmake(str, (name.length= length)); + name= make_name(thd, str, length, cs, UINT_MAX - 1); } |