diff options
-rw-r--r-- | mysql-test/suite/engines/iuds/r/strings_charsets_update_delete.result | bin | 112951 -> 113476 bytes | |||
-rw-r--r-- | mysql-test/suite/sys_vars/r/character_set_connection_func.result | 6 | ||||
-rw-r--r-- | sql/item.cc | 63 | ||||
-rw-r--r-- | sql/item.h | 9 | ||||
-rw-r--r-- | sql/item_strfunc.cc | 10 | ||||
-rw-r--r-- | sql/sql_class.cc | 38 | ||||
-rw-r--r-- | sql/sql_string.cc | 68 | ||||
-rw-r--r-- | sql/sql_string.h | 6 | ||||
-rw-r--r-- | sql/sql_yacc.yy | 10 |
9 files changed, 164 insertions, 46 deletions
diff --git a/mysql-test/suite/engines/iuds/r/strings_charsets_update_delete.result b/mysql-test/suite/engines/iuds/r/strings_charsets_update_delete.result Binary files differindex 08eecb1c17d..b862e23e3d3 100644 --- a/mysql-test/suite/engines/iuds/r/strings_charsets_update_delete.result +++ b/mysql-test/suite/engines/iuds/r/strings_charsets_update_delete.result diff --git a/mysql-test/suite/sys_vars/r/character_set_connection_func.result b/mysql-test/suite/sys_vars/r/character_set_connection_func.result index 6fc33a4f369..9e1dbc62565 100644 --- a/mysql-test/suite/sys_vars/r/character_set_connection_func.result +++ b/mysql-test/suite/sys_vars/r/character_set_connection_func.result @@ -23,6 +23,8 @@ SET @@session.character_set_connection = latin1; SELECT 'ЁЂЃЄ' AS utf_text; utf_text ???? +Warnings: +Warning 1105 Can't convert the character string from utf8 to latin1: '\xD0\x81\xD0\x82\xD0\x83...' SET @@session.character_set_connection = utf8; SELECT 'ЁЂЃЄ' AS utf_text; utf_text @@ -30,6 +32,8 @@ utf_text '---now inserting utf8 string with different character_set_connection--' SET @@session.character_set_connection = ascii; INSERT INTO t1 VALUES('ЁЂЃЄ'); +Warnings: +Warning 1105 Can't convert the character string from utf8 to ascii: '\xD0\x81\xD0\x82\xD0\x83...' SELECT * FROM t1; b ???? @@ -39,6 +43,8 @@ SET @@session.character_set_connection = ascii; SET @@session.character_set_client = latin1; SET @@session.character_set_results = latin1; INSERT INTO t1 VALUES('ЁЂЃЄ'); +Warnings: +Warning 1105 Can't convert the character string from latin1 to ascii: '\xD0\x81\xD0\x82\xD0\x83...' SELECT * FROM t1; b ???????? diff --git a/sql/item.cc b/sql/item.cc index bd80d73ebfb..207c94feabd 100644 --- a/sql/item.cc +++ b/sql/item.cc @@ -1,5 +1,5 @@ /* - Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved. + Copyright (c) 2000, 2015, Oracle and/or its affiliates. All rights reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -4807,39 +4807,60 @@ bool Item::is_datetime() } -String *Item::check_well_formed_result(String *str, bool send_error) +/** + Verifies that the input string is well-formed according to its character set. + @param send_error If true, call my_error if string is not well-formed. + @param truncate If true, set to null/truncate if not well-formed. + + @return + If well-formed: input string. + If not well-formed: + if truncate is true and strict mode: NULL pointer and we set this + Item's value to NULL. + if truncate is true and not strict mode: input string truncated up to + last good character. + if truncate is false: input string is returned. + */ +String *Item::check_well_formed_result(String *str, + bool send_error, + bool truncate) { /* Check whether we got a well-formed string */ CHARSET_INFO *cs= str->charset(); - int well_formed_error; - uint wlen= cs->cset->well_formed_len(cs, - str->ptr(), str->ptr() + str->length(), - str->length(), &well_formed_error); - if (wlen < str->length()) + + size_t valid_length; + bool length_error; + + if (validate_string(cs, str->ptr(), str->length(), + &valid_length, &length_error)) { + const char *str_end= str->ptr() + str->length(); + const char *print_byte= str->ptr() + valid_length; THD *thd= current_thd; char hexbuf[7]; - enum MYSQL_ERROR::enum_warning_level level; - uint diff= str->length() - wlen; + enum MYSQL_ERROR::enum_warning_level level= MYSQL_ERROR::WARN_LEVEL_WARN; + uint diff= str_end - print_byte; set_if_smaller(diff, 3); - octet2hex(hexbuf, str->ptr() + wlen, diff); - if (send_error) + octet2hex(hexbuf, print_byte, diff); + if (send_error && length_error) { my_error(ER_INVALID_CHARACTER_STRING, MYF(0), cs->csname, hexbuf); return 0; } - if ((thd->variables.sql_mode & - (MODE_STRICT_TRANS_TABLES | MODE_STRICT_ALL_TABLES))) + if (truncate && length_error) { - level= MYSQL_ERROR::WARN_LEVEL_ERROR; - null_value= 1; - str= 0; - } - else - { - level= MYSQL_ERROR::WARN_LEVEL_WARN; - str->length(wlen); + if ((thd->variables.sql_mode & + (MODE_STRICT_TRANS_TABLES | MODE_STRICT_ALL_TABLES))) + { + level= MYSQL_ERROR::WARN_LEVEL_ERROR; + null_value= 1; + str= 0; + } + else + { + str->length(valid_length); + } } push_warning_printf(thd, level, ER_INVALID_CHARACTER_STRING, ER(ER_INVALID_CHARACTER_STRING), cs->csname, hexbuf); diff --git a/sql/item.h b/sql/item.h index c82d23b6d5a..15fe7ce5afa 100644 --- a/sql/item.h +++ b/sql/item.h @@ -1052,7 +1052,9 @@ public: bool is_datetime(); virtual Field::geometry_type get_geometry_type() const { return Field::GEOM_GEOMETRY; }; - String *check_well_formed_result(String *str, bool send_error= 0); + String *check_well_formed_result(String *str, + bool send_error, + bool truncate); bool eq_by_collation(Item *item, bool binary_cmp, CHARSET_INFO *cs); /** @@ -1929,6 +1931,11 @@ public: decimals=NOT_FIXED_DEC; // it is constant => can be used without fix_fields (and frequently used) fixed= 1; + /* + Check if the string has any character that can't be + interpreted using the relevant charset. + */ + check_well_formed_result(&str_value, false, false); } /* Just create an item and do not fill string representation */ Item_string(CHARSET_INFO *cs, Derivation dv= DERIVATION_COERCIBLE) diff --git a/sql/item_strfunc.cc b/sql/item_strfunc.cc index 25473815b9c..5e6da3b1300 100644 --- a/sql/item_strfunc.cc +++ b/sql/item_strfunc.cc @@ -1,5 +1,5 @@ /* - Copyright (c) 2000, 2012, Oracle and/or its affiliates. All rights reserved. + Copyright (c) 2000, 2015, Oracle and/or its affiliates. All rights reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -2373,7 +2373,9 @@ String *Item_func_char::val_str(String *str) } } str->realloc(str->length()); // Add end 0 (for Purify) - return check_well_formed_result(str); + return check_well_formed_result(str, + false, // send warning + true); // truncate } @@ -2773,7 +2775,9 @@ String *Item_func_conv_charset::val_str(String *str) } null_value= tmp_value.copy(arg->ptr(), arg->length(), arg->charset(), conv_charset, &dummy_errors); - return null_value ? 0 : check_well_formed_result(&tmp_value); + return null_value ? 0 : check_well_formed_result(&tmp_value, + false, // send warning + true); // truncate } void Item_func_conv_charset::fix_length_and_dec() diff --git a/sql/sql_class.cc b/sql/sql_class.cc index d71da6403ae..39d6316a512 100644 --- a/sql/sql_class.cc +++ b/sql/sql_class.cc @@ -1,5 +1,5 @@ /* - Copyright (c) 2000, 2012, Oracle and/or its affiliates. All rights reserved. + Copyright (c) 2000, 2015, Oracle and/or its affiliates. All rights reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -1313,21 +1313,17 @@ LEX_STRING *THD::make_lex_string(LEX_STRING *lex_str, /* Convert a string to another character set - SYNOPSIS - convert_string() - to Store new allocated string here - to_cs New character set for allocated string - from String to convert - from_length Length of string to convert - from_cs Original character set + @param to Store new allocated string here + @param to_cs New character set for allocated string + @param from String to convert + @param from_length Length of string to convert + @param from_cs Original character set - NOTES - to will be 0-terminated to make it easy to pass to system funcs + @note to will be 0-terminated to make it easy to pass to system funcs - RETURN - 0 ok - 1 End of memory. - In this case to->str will point to 0 and to->length will be 0. + @retval false ok + @retval true End of memory. + In this case to->str will point to 0 and to->length will be 0. */ bool THD::convert_string(LEX_STRING *to, CHARSET_INFO *to_cs, @@ -1336,15 +1332,25 @@ bool THD::convert_string(LEX_STRING *to, CHARSET_INFO *to_cs, { DBUG_ENTER("convert_string"); size_t new_length= to_cs->mbmaxlen * from_length; - uint dummy_errors; + uint errors= 0; if (!(to->str= (char*) alloc(new_length+1))) { to->length= 0; // Safety fix DBUG_RETURN(1); // EOM } to->length= copy_and_convert((char*) to->str, new_length, to_cs, - from, from_length, from_cs, &dummy_errors); + from, from_length, from_cs, &errors); to->str[to->length]=0; // Safety + if (errors != 0) + { + char printable_buff[32]; + convert_to_printable(printable_buff, sizeof(printable_buff), + from, from_length, from_cs, 6); + push_warning_printf(this, MYSQL_ERROR::WARN_LEVEL_WARN, ER_UNKNOWN_ERROR, + "Can't convert the character string from %s to %s: '%.64s'", + from_cs->csname, to_cs->csname, printable_buff); + } + DBUG_RETURN(0); } diff --git a/sql/sql_string.cc b/sql/sql_string.cc index f692014011c..511bf3c9547 100644 --- a/sql/sql_string.cc +++ b/sql/sql_string.cc @@ -1,5 +1,5 @@ /* - Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved. + Copyright (c) 2000, 2015, Oracle and/or its affiliates. All rights reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -1231,3 +1231,69 @@ uint convert_to_printable(char *to, size_t to_len, *t= '\0'; return t - to; } + +/** + Check if an input byte sequence is a valid character string of a given charset + + @param cs The input character set. + @param str The input byte sequence to validate. + @param length A byte length of the str. + @param [out] valid_length A byte length of a valid prefix of the str. + @param [out] length_error True in the case of a character length error: + some byte[s] in the input is not a valid + prefix for a character, i.e. the byte length + of that invalid character is undefined. + + @retval true if the whole input byte sequence is a valid character string. + The length_error output parameter is undefined. + + @return + if the whole input byte sequence is a valid character string + then + return false + else + if the length of some character in the input is undefined (MY_CS_ILSEQ) + or the last character is truncated (MY_CS_TOOSMALL) + then + *length_error= true; // fatal error! + else + *length_error= false; // non-fatal error: there is no wide character + // encoding for some input character + return true +*/ +bool validate_string(CHARSET_INFO *cs, const char *str, uint32 length, + size_t *valid_length, bool *length_error) +{ + if (cs->mbmaxlen > 1) + { + int well_formed_error; + *valid_length= cs->cset->well_formed_len(cs, str, str + length, + length, &well_formed_error); + *length_error= well_formed_error; + return well_formed_error; + } + + /* + well_formed_len() is not functional on single-byte character sets, + so use mb_wc() instead: + */ + *length_error= false; + + const uchar *from= reinterpret_cast<const uchar *>(str); + const uchar *from_end= from + length; + my_charset_conv_mb_wc mb_wc= cs->cset->mb_wc; + + while (from < from_end) + { + my_wc_t wc; + int cnvres= (*mb_wc)(cs, &wc, (uchar*) from, from_end); + if (cnvres <= 0) + { + *valid_length= from - reinterpret_cast<const uchar *>(str); + return true; + } + from+= cnvres; + } + *valid_length= length; + return false; +} diff --git a/sql/sql_string.h b/sql/sql_string.h index c65560dd1d1..e06180b108f 100644 --- a/sql/sql_string.h +++ b/sql/sql_string.h @@ -1,5 +1,5 @@ /* - Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved. + Copyright (c) 2000, 2015, Oracle and/or its affiliates. All rights reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -413,3 +413,7 @@ static inline bool check_if_only_end_space(CHARSET_INFO *cs, char *str, { return str+ cs->cset->scan(cs, str, end, MY_SEQ_SPACES) == end; } + +bool +validate_string(CHARSET_INFO *cs, const char *str, uint32 length, + size_t *valid_length, bool *length_error); diff --git a/sql/sql_yacc.yy b/sql/sql_yacc.yy index 29516d34855..d9a32f2dd35 100644 --- a/sql/sql_yacc.yy +++ b/sql/sql_yacc.yy @@ -1,5 +1,5 @@ /* - Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved. + Copyright (c) 2000, 2015, Oracle and/or its affiliates. All rights reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -11070,7 +11070,9 @@ literal: str ? str->length() : 0, $1); if (!item_str || - !item_str->check_well_formed_result(&item_str->str_value, TRUE)) + !item_str->check_well_formed_result(&item_str->str_value, + true, //send error + true)) //truncate { MYSQL_YYABORT; } @@ -11099,7 +11101,9 @@ literal: str ? str->length() : 0, $1); if (!item_str || - !item_str->check_well_formed_result(&item_str->str_value, TRUE)) + !item_str->check_well_formed_result(&item_str->str_value, + true, //send error + true)) //truncate { MYSQL_YYABORT; } |