diff options
Diffstat (limited to 'sql')
-rw-r--r-- | sql/item.cc | 54 | ||||
-rw-r--r-- | sql/item.h | 9 | ||||
-rw-r--r-- | sql/item_strfunc.cc | 18 | ||||
-rw-r--r-- | sql/sql_class.cc | 37 | ||||
-rw-r--r-- | sql/sql_string.cc | 68 | ||||
-rw-r--r-- | sql/sql_string.h | 5 | ||||
-rw-r--r-- | sql/sql_yacc.yy | 8 |
7 files changed, 152 insertions, 47 deletions
diff --git a/sql/item.cc b/sql/item.cc index 96f15b92a54..fd590574e56 100644 --- a/sql/item.cc +++ b/sql/item.cc @@ -1,5 +1,5 @@ /* - Copyright (c) 2000, 2014, Oracle and/or its affiliates. All rights reserved. + Copyright (c) 2000, 2015, Oracle and/or its affiliates. All rights reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -5146,45 +5146,55 @@ enum_field_types Item::field_type() const /** Verifies that the input string is well-formed according to its character set. @param send_error If true, call my_error if string is not well-formed. - - Will truncate input string if it is not well-formed. + @param truncate If true, set to null/truncate if not well-formed. @return If well-formed: input string. If not well-formed: - if strict mode: NULL pointer and we set this Item's value to NULL - if not strict mode: input string truncated up to last good character + if truncate is true and strict mode: NULL pointer and we set this + Item's value to NULL. + if truncate is true and not strict mode: input string truncated up to + last good character. + if truncate is false: input string is returned. */ -String *Item::check_well_formed_result(String *str, bool send_error) +String *Item::check_well_formed_result(String *str, + bool send_error, + bool truncate) { /* Check whether we got a well-formed string */ CHARSET_INFO *cs= str->charset(); - int well_formed_error; - uint wlen= cs->cset->well_formed_len(cs, - str->ptr(), str->ptr() + str->length(), - str->length(), &well_formed_error); - if (wlen < str->length()) + + size_t valid_length; + bool length_error; + + if (validate_string(cs, str->ptr(), str->length(), + &valid_length, &length_error)) { + const char *str_end= str->ptr() + str->length(); + const char *print_byte= str->ptr() + valid_length; THD *thd= current_thd; char hexbuf[7]; - uint diff= str->length() - wlen; + uint diff= str_end - print_byte; set_if_smaller(diff, 3); - octet2hex(hexbuf, str->ptr() + wlen, diff); - if (send_error) + octet2hex(hexbuf, print_byte, diff); + if (send_error && length_error) { my_error(ER_INVALID_CHARACTER_STRING, MYF(0), cs->csname, hexbuf); return 0; } - if ((thd->variables.sql_mode & - (MODE_STRICT_TRANS_TABLES | MODE_STRICT_ALL_TABLES))) - { - null_value= 1; - str= 0; - } - else + if (truncate && length_error) { - str->length(wlen); + if ((thd->variables.sql_mode & + (MODE_STRICT_TRANS_TABLES | MODE_STRICT_ALL_TABLES))) + { + null_value= 1; + str= 0; + } + else + { + str->length(valid_length); + } } push_warning_printf(thd, MYSQL_ERROR::WARN_LEVEL_WARN, ER_INVALID_CHARACTER_STRING, ER(ER_INVALID_CHARACTER_STRING), cs->csname, hexbuf); diff --git a/sql/item.h b/sql/item.h index a4c0f4b7393..629f9afa241 100644 --- a/sql/item.h +++ b/sql/item.h @@ -1214,7 +1214,9 @@ public: } virtual Field::geometry_type get_geometry_type() const { return Field::GEOM_GEOMETRY; }; - String *check_well_formed_result(String *str, bool send_error= 0); + String *check_well_formed_result(String *str, + bool send_error, + bool truncate); bool eq_by_collation(Item *item, bool binary_cmp, CHARSET_INFO *cs); uint32 max_char_length() const { return max_length / collation.collation->mbmaxlen; } @@ -2193,6 +2195,11 @@ public: decimals=NOT_FIXED_DEC; // it is constant => can be used without fix_fields (and frequently used) fixed= 1; + /* + Check if the string has any character that can't be + interpreted using the relevant charset. + */ + check_well_formed_result(&str_value, false, false); } /* Just create an item and do not fill string representation */ Item_string(CHARSET_INFO *cs, Derivation dv= DERIVATION_COERCIBLE) diff --git a/sql/item_strfunc.cc b/sql/item_strfunc.cc index eab8b4ddb00..7496b4f3cf4 100644 --- a/sql/item_strfunc.cc +++ b/sql/item_strfunc.cc @@ -1,5 +1,5 @@ /* - Copyright (c) 2000, 2014, Oracle and/or its affiliates. All rights reserved. + Copyright (c) 2000, 2015, Oracle and/or its affiliates. All rights reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -2652,7 +2652,9 @@ String *Item_func_char::val_str(String *str) } } str->realloc(str->length()); // Add end 0 (for Purify) - return check_well_formed_result(str); + return check_well_formed_result(str, + false, // send warning + true); // truncate } @@ -2818,7 +2820,9 @@ String *Item_func_rpad::val_str(String *str) if (use_mb(rpad->charset())) { // This will chop off any trailing illegal characters from rpad. - String *well_formed_pad= args[2]->check_well_formed_result(rpad, false); + String *well_formed_pad= args[2]->check_well_formed_result(rpad, + false, //send warning + true); //truncate if (!well_formed_pad) goto err; } @@ -2931,7 +2935,9 @@ String *Item_func_lpad::val_str(String *str) if (use_mb(pad->charset())) { // This will chop off any trailing illegal characters from pad. - String *well_formed_pad= args[2]->check_well_formed_result(pad, false); + String *well_formed_pad= args[2]->check_well_formed_result(pad, + false, // send warning + true); // truncate if (!well_formed_pad) goto err; } @@ -3047,7 +3053,9 @@ String *Item_func_conv_charset::val_str(String *str) } null_value= tmp_value.copy(arg->ptr(), arg->length(), arg->charset(), conv_charset, &dummy_errors); - return null_value ? 0 : check_well_formed_result(&tmp_value); + return null_value ? 0 : check_well_formed_result(&tmp_value, + false, // send warning + true); // truncate } void Item_func_conv_charset::fix_length_and_dec() diff --git a/sql/sql_class.cc b/sql/sql_class.cc index cbdaca0fac5..c3d2a092fd6 100644 --- a/sql/sql_class.cc +++ b/sql/sql_class.cc @@ -1805,21 +1805,17 @@ LEX_STRING *THD::make_lex_string(LEX_STRING *lex_str, /* Convert a string to another character set - SYNOPSIS - convert_string() - to Store new allocated string here - to_cs New character set for allocated string - from String to convert - from_length Length of string to convert - from_cs Original character set + @param to Store new allocated string here + @param to_cs New character set for allocated string + @param from String to convert + @param from_length Length of string to convert + @param from_cs Original character set - NOTES - to will be 0-terminated to make it easy to pass to system funcs + @note to will be 0-terminated to make it easy to pass to system funcs - RETURN - 0 ok - 1 End of memory. - In this case to->str will point to 0 and to->length will be 0. + @retval false ok + @retval true End of memory. + In this case to->str will point to 0 and to->length will be 0. */ bool THD::convert_string(LEX_STRING *to, CHARSET_INFO *to_cs, @@ -1828,15 +1824,26 @@ bool THD::convert_string(LEX_STRING *to, CHARSET_INFO *to_cs, { DBUG_ENTER("convert_string"); size_t new_length= to_cs->mbmaxlen * from_length; - uint dummy_errors; + uint errors= 0; if (!(to->str= (char*) alloc(new_length+1))) { to->length= 0; // Safety fix DBUG_RETURN(1); // EOM } to->length= copy_and_convert((char*) to->str, new_length, to_cs, - from, from_length, from_cs, &dummy_errors); + from, from_length, from_cs, &errors); to->str[to->length]=0; // Safety + if (errors != 0) + { + char printable_buff[32]; + convert_to_printable(printable_buff, sizeof(printable_buff), + from, from_length, from_cs, 6); + push_warning_printf(this, MYSQL_ERROR::WARN_LEVEL_WARN, + ER_INVALID_CHARACTER_STRING, + ER_THD(this, ER_INVALID_CHARACTER_STRING), + from_cs->csname, printable_buff); + } + DBUG_RETURN(0); } diff --git a/sql/sql_string.cc b/sql/sql_string.cc index 41725128415..edde1b8d8e0 100644 --- a/sql/sql_string.cc +++ b/sql/sql_string.cc @@ -1,4 +1,4 @@ -/* Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved. +/* Copyright (c) 2000, 2015, Oracle and/or its affiliates. All rights reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -1281,3 +1281,69 @@ uint convert_to_printable(char *to, size_t to_len, *t= '\0'; return t - to; } + +/** + Check if an input byte sequence is a valid character string of a given charset + + @param cs The input character set. + @param str The input byte sequence to validate. + @param length A byte length of the str. + @param [out] valid_length A byte length of a valid prefix of the str. + @param [out] length_error True in the case of a character length error: + some byte[s] in the input is not a valid + prefix for a character, i.e. the byte length + of that invalid character is undefined. + + @retval true if the whole input byte sequence is a valid character string. + The length_error output parameter is undefined. + + @return + if the whole input byte sequence is a valid character string + then + return false + else + if the length of some character in the input is undefined (MY_CS_ILSEQ) + or the last character is truncated (MY_CS_TOOSMALL) + then + *length_error= true; // fatal error! + else + *length_error= false; // non-fatal error: there is no wide character + // encoding for some input character + return true +*/ +bool validate_string(CHARSET_INFO *cs, const char *str, uint32 length, + size_t *valid_length, bool *length_error) +{ + if (cs->mbmaxlen > 1) + { + int well_formed_error; + *valid_length= cs->cset->well_formed_len(cs, str, str + length, + length, &well_formed_error); + *length_error= well_formed_error; + return well_formed_error; + } + + /* + well_formed_len() is not functional on single-byte character sets, + so use mb_wc() instead: + */ + *length_error= false; + + const uchar *from= reinterpret_cast<const uchar *>(str); + const uchar *from_end= from + length; + my_charset_conv_mb_wc mb_wc= cs->cset->mb_wc; + + while (from < from_end) + { + my_wc_t wc; + int cnvres= (*mb_wc)(cs, &wc, (uchar*) from, from_end); + if (cnvres <= 0) + { + *valid_length= from - reinterpret_cast<const uchar *>(str); + return true; + } + from+= cnvres; + } + *valid_length= length; + return false; +} diff --git a/sql/sql_string.h b/sql/sql_string.h index 971af9ea91a..95b39f5e15e 100644 --- a/sql/sql_string.h +++ b/sql/sql_string.h @@ -1,7 +1,7 @@ #ifndef SQL_STRING_INCLUDED #define SQL_STRING_INCLUDED -/* Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved. +/* Copyright (c) 2000, 2015, Oracle and/or its affiliates. All rights reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -432,4 +432,7 @@ static inline bool check_if_only_end_space(CHARSET_INFO *cs, char *str, return str+ cs->cset->scan(cs, str, end, MY_SEQ_SPACES) == end; } +bool +validate_string(CHARSET_INFO *cs, const char *str, uint32 length, + size_t *valid_length, bool *length_error); #endif /* SQL_STRING_INCLUDED */ diff --git a/sql/sql_yacc.yy b/sql/sql_yacc.yy index b32f7d26cf3..cd86f96900e 100644 --- a/sql/sql_yacc.yy +++ b/sql/sql_yacc.yy @@ -11932,7 +11932,9 @@ literal: str ? str->length() : 0, $1); if (!item_str || - !item_str->check_well_formed_result(&item_str->str_value, TRUE)) + !item_str->check_well_formed_result(&item_str->str_value, + true, //send error + true)) //truncate { MYSQL_YYABORT; } @@ -11961,7 +11963,9 @@ literal: str ? str->length() : 0, $1); if (!item_str || - !item_str->check_well_formed_result(&item_str->str_value, TRUE)) + !item_str->check_well_formed_result(&item_str->str_value, + true, //send error + true)) //truncate { MYSQL_YYABORT; } |