diff options
author | unknown <bar@mysql.com/bar.intranet.mysql.r18.ru> | 2006-10-30 10:14:03 +0400 |
---|---|---|
committer | unknown <bar@mysql.com/bar.intranet.mysql.r18.ru> | 2006-10-30 10:14:03 +0400 |
commit | 00c1c19e9410a942ed7b2b49e7dc7916a9d8010f (patch) | |
tree | 56d83c3d4412f22521f650d31f0aad70abfa1ade /sql | |
parent | 2bfeecca15d4301522b7c38250c1dfe30e039f3b (diff) | |
download | mariadb-git-00c1c19e9410a942ed7b2b49e7dc7916a9d8010f.tar.gz |
Bug#18908: ERROR 1406 (22001): Data too long for column :: using utf8
Problem: Too confusing error message when cannot convert
between string and column character sets on INSERT and UPDATE.
Fix: producing a better error message, instead of "Data too long"
in such cases
Additional changes: Adding "DROP TABLE IF EXISTS" into several
tests to be safe against failures in previous tests.
mysql-test/include/strict_autoinc.inc:
Adding DROP TABLE to be safe against previous tests failure.
mysql-test/r/ctype_recoding.result:
Fixing test results
mysql-test/r/ctype_utf8.result:
Fixing test results
mysql-test/r/fulltext.result:
Fixing test results
mysql-test/r/strict_autoinc_1myisam.result:
Adding DROP TABLE to be safe against previous tests failure.
mysql-test/r/strict_autoinc_2innodb.result:
Adding DROP TABLE to be safe against previous tests failure.
mysql-test/r/strict_autoinc_3heap.result:
Adding DROP TABLE to be safe against previous tests failure.
mysql-test/r/strict_autoinc_4bdb.result:
Adding DROP TABLE to be safe against previous tests failure.
mysql-test/r/strict_autoinc_5ndb.result:
Adding DROP TABLE to be safe against previous tests failure.
sql/field.cc:
- producing better error messages than
"DATA TRUNCATED" or "DATA TOO LONG" (in strict mode)
in case of "not well formed source" and
"cannot convert to field character set"
- Performance improvements: copying directly to
the target, instead of using an intermediate
String.
- Moving duplicate code into report_data_too_long() function.
sql/sql_string.cc:
Adding a new function to convert strings between character sets,
but not more than "nchar" characters - a helper function for
Field_string::store(), Field_varstring::store() and Field_blob::store().
sql/sql_string.h:
Adding new function prototype.
Diffstat (limited to 'sql')
-rw-r--r-- | sql/field.cc | 353 | ||||
-rw-r--r-- | sql/sql_string.cc | 156 | ||||
-rw-r--r-- | sql/sql_string.h | 8 |
3 files changed, 381 insertions, 136 deletions
diff --git a/sql/field.cc b/sql/field.cc index 4860f6ea3da..50b16e70cf2 100644 --- a/sql/field.cc +++ b/sql/field.cc @@ -5799,37 +5799,148 @@ void Field_datetime::sql_type(String &res) const ** A string may be varchar or binary ****************************************************************************/ +/* + Report "not well formed" or "cannot convert" error + after storing a character string info a field. + + SYNOPSIS + check_string_copy_error() + field - Field + well_formed_error_pos - where not well formed data was first met + cannot_convert_error_pos - where a not-convertable character was first met + end - end of the string + + NOTES + As of version 5.0 both cases return the same error: + + "Invalid string value: 'xxx' for column 't' at row 1" + + Future versions will possibly introduce a new error message: + + "Cannot convert character string: 'xxx' for column 't' at row 1" + + RETURN + FALSE - If errors didn't happen + TRUE - If an error happened +*/ + +static bool +check_string_copy_error(Field_str *field, + const char *well_formed_error_pos, + const char *cannot_convert_error_pos, + const char *end) +{ + const char *pos, *end_orig; + char tmp[64], *t; + + if (!(pos= well_formed_error_pos) && + !(pos= cannot_convert_error_pos)) + return FALSE; + + end_orig= end; + set_if_smaller(end, pos + 6); + + for (t= tmp; pos < end; pos++) + { + if (((unsigned char) *pos) >= 0x20 && + ((unsigned char) *pos) <= 0x7F) + { + *t++= *pos; + } + else + { + *t++= '\\'; + *t++= 'x'; + *t++= _dig_vec_upper[((unsigned char) *pos) >> 4]; + *t++= _dig_vec_upper[((unsigned char) *pos) & 15]; + } + } + if (end_orig > end) + { + *t++= '.'; + *t++= '.'; + *t++= '.'; + } + *t= '\0'; + push_warning_printf(field->table->in_use, + field->table->in_use->abort_on_warning ? + MYSQL_ERROR::WARN_LEVEL_ERROR : + MYSQL_ERROR::WARN_LEVEL_WARN, + ER_TRUNCATED_WRONG_VALUE_FOR_FIELD, + ER(ER_TRUNCATED_WRONG_VALUE_FOR_FIELD), + "string", tmp, field->field_name, + (ulong) field->table->in_use->row_count); + return TRUE; +} + + + +/* + Send a truncation warning or a truncation error + after storing a too long character string info a field. + + SYNOPSIS + report_data_too_long() + field - Field + + RETURN + N/A +*/ + +inline void +report_data_too_long(Field_str *field) +{ + if (field->table->in_use->abort_on_warning) + field->set_warning(MYSQL_ERROR::WARN_LEVEL_ERROR, ER_DATA_TOO_LONG, 1); + else + field->set_warning(MYSQL_ERROR::WARN_LEVEL_WARN, WARN_DATA_TRUNCATED, 1); +} + + +/* + Test if the given string contains important data: + not spaces for character string, + or any data for binary string. + + SYNOPSIS + test_if_important_data() + cs Character set + str String to test + strend String end + + RETURN + FALSE - If string does not have important data + TRUE - If string has some important data +*/ + +static bool +test_if_important_data(CHARSET_INFO *cs, const char *str, const char *strend) +{ + if (cs != &my_charset_bin) + str+= cs->cset->scan(cs, str, strend, MY_SEQ_SPACES); + return (str < strend); +} + + /* Copy a string and fill with space */ int Field_string::store(const char *from,uint length,CHARSET_INFO *cs) { - int error= 0, well_formed_error; - uint32 not_used; - char buff[STRING_BUFFER_USUAL_SIZE]; - String tmpstr(buff,sizeof(buff), &my_charset_bin); uint copy_length; + const char *well_formed_error_pos; + const char *cannot_convert_error_pos; + const char *from_end_pos; /* See the comment for Field_long::store(long long) */ DBUG_ASSERT(table->in_use == current_thd); - /* Convert character set if necessary */ - if (String::needs_conversion(length, cs, field_charset, ¬_used)) - { - uint conv_errors; - tmpstr.copy(from, length, cs, field_charset, &conv_errors); - from= tmpstr.ptr(); - length= tmpstr.length(); - if (conv_errors) - error= 2; - } - - /* Make sure we don't break a multibyte sequence or copy malformed data. */ - copy_length= field_charset->cset->well_formed_len(field_charset, - from,from+length, - field_length/ - field_charset->mbmaxlen, - &well_formed_error); - memmove(ptr, from, copy_length); + copy_length= well_formed_copy_nchars(field_charset, + ptr, field_length, + cs, from, length, + field_length / field_charset->mbmaxlen, + &well_formed_error_pos, + &cannot_convert_error_pos, + &from_end_pos); /* Append spaces if the string was shorter than the field. */ if (copy_length < field_length) @@ -5837,32 +5948,23 @@ int Field_string::store(const char *from,uint length,CHARSET_INFO *cs) field_length-copy_length, field_charset->pad_char); + if (check_string_copy_error(this, well_formed_error_pos, + cannot_convert_error_pos, from + length)) + return 2; + /* Check if we lost any important data (anything in a binary string, or any non-space in others). */ - if ((copy_length < length) && table->in_use->count_cuted_fields) + if ((from_end_pos < from + length) && table->in_use->count_cuted_fields) { - if (binary()) - error= 2; - else + if (test_if_important_data(field_charset, from_end_pos, from + length)) { - const char *end=from+length; - from+= copy_length; - from+= field_charset->cset->scan(field_charset, from, end, - MY_SEQ_SPACES); - if (from != end) - error= 2; + report_data_too_long(this); + return 2; } } - if (error) - { - if (table->in_use->abort_on_warning) - set_warning(MYSQL_ERROR::WARN_LEVEL_ERROR, ER_DATA_TOO_LONG, 1); - else - set_warning(MYSQL_ERROR::WARN_LEVEL_WARN, WARN_DATA_TRUNCATED, 1); - } - return error; + return 0; } @@ -6195,58 +6297,35 @@ Field *Field_string::new_field(MEM_ROOT *root, struct st_table *new_table, int Field_varstring::store(const char *from,uint length,CHARSET_INFO *cs) { - uint32 not_used, copy_length; - char buff[STRING_BUFFER_USUAL_SIZE]; - String tmpstr(buff,sizeof(buff), &my_charset_bin); - int error_code= 0, well_formed_error; - enum MYSQL_ERROR::enum_warning_level level= MYSQL_ERROR::WARN_LEVEL_WARN; + uint copy_length; + const char *well_formed_error_pos; + const char *cannot_convert_error_pos; + const char *from_end_pos; + + copy_length= well_formed_copy_nchars(field_charset, + ptr + length_bytes, field_length, + cs, from, length, + field_length / field_charset->mbmaxlen, + &well_formed_error_pos, + &cannot_convert_error_pos, + &from_end_pos); - /* Convert character set if necessary */ - if (String::needs_conversion(length, cs, field_charset, ¬_used)) - { - uint conv_errors; - tmpstr.copy(from, length, cs, field_charset, &conv_errors); - from= tmpstr.ptr(); - length= tmpstr.length(); - if (conv_errors) - error_code= WARN_DATA_TRUNCATED; - } - /* - Make sure we don't break a multibyte sequence - as well as don't copy a malformed data. - */ - copy_length= field_charset->cset->well_formed_len(field_charset, - from,from+length, - field_length/ - field_charset->mbmaxlen, - &well_formed_error); - memmove(ptr + length_bytes, from, copy_length); if (length_bytes == 1) *ptr= (uchar) copy_length; else int2store(ptr, copy_length); + if (check_string_copy_error(this, well_formed_error_pos, + cannot_convert_error_pos, from + length)) + return 2; + // Check if we lost something other than just trailing spaces - if ((copy_length < length) && table->in_use->count_cuted_fields && - !error_code) - { - if (!binary()) - { - const char *end= from + length; - from+= copy_length; - from+= field_charset->cset->scan(field_charset, from, end, MY_SEQ_SPACES); - /* If we lost only spaces then produce a NOTE, not a WARNING */ - if (from == end) - level= MYSQL_ERROR::WARN_LEVEL_NOTE; - } - error_code= WARN_DATA_TRUNCATED; - } - if (error_code) + if ((from_end_pos < from + length) && table->in_use->count_cuted_fields) { - if (level == MYSQL_ERROR::WARN_LEVEL_WARN && - table->in_use->abort_on_warning) - error_code= ER_DATA_TOO_LONG; - set_warning(level, error_code, 1); + if (test_if_important_data(field_charset, from_end_pos, from + length)) + report_data_too_long(this); + else /* If we lost only spaces then produce a NOTE, not a WARNING */ + set_warning(MYSQL_ERROR::WARN_LEVEL_NOTE, WARN_DATA_TRUNCATED, 1); return 2; } return 0; @@ -6828,68 +6907,70 @@ void Field_blob::put_length(char *pos, uint32 length) int Field_blob::store(const char *from,uint length,CHARSET_INFO *cs) { - int error= 0, well_formed_error; + uint copy_length, new_length; + const char *well_formed_error_pos; + const char *cannot_convert_error_pos; + const char *from_end_pos, *tmp; + char buff[STRING_BUFFER_USUAL_SIZE]; + String tmpstr(buff,sizeof(buff), &my_charset_bin); + if (!length) { bzero(ptr,Field_blob::pack_length()); + return 0; } - else - { - bool was_conversion; - char buff[STRING_BUFFER_USUAL_SIZE]; - String tmpstr(buff,sizeof(buff), &my_charset_bin); - uint copy_length; - uint32 not_used; - /* Convert character set if necessary */ - if ((was_conversion= String::needs_conversion(length, cs, field_charset, - ¬_used))) - { - uint conv_errors; - if (tmpstr.copy(from, length, cs, field_charset, &conv_errors)) - { - /* Fatal OOM error */ - bzero(ptr,Field_blob::pack_length()); - return -1; - } - from= tmpstr.ptr(); - length= tmpstr.length(); - if (conv_errors) - error= 2; - } - - copy_length= max_data_length(); - /* - copy_length is OK as last argument to well_formed_len as this is never - used to limit the length of the data. The cut of long data is done with - the 'min()' call below. - */ - copy_length= field_charset->cset->well_formed_len(field_charset, - from,from + - min(length, copy_length), - copy_length, - &well_formed_error); - if (copy_length < length) - error= 2; - Field_blob::store_length(copy_length); - if (was_conversion || table->copy_blobs || copy_length <= MAX_FIELD_WIDTH) - { // Must make a copy - if (from != value.ptr()) // For valgrind - { - value.copy(from,copy_length,charset()); - from=value.ptr(); - } + if (from == value.ptr()) + { + uint32 dummy_offset; + if (!String::needs_conversion(length, cs, field_charset, &dummy_offset)) + { + Field_blob::store_length(length); + bmove(ptr+packlength,(char*) &from,sizeof(char*)); + return 0; } - bmove(ptr+packlength,(char*) &from,sizeof(char*)); + if (tmpstr.copy(from, length, cs)) + goto oom_error; + from= tmpstr.ptr(); } - if (error) + + new_length= min(max_data_length(), field_charset->mbmaxlen * length); + if (value.alloc(new_length)) + goto oom_error; + + /* + "length" is OK as "nchars" argument to well_formed_copy_nchars as this + is never used to limit the length of the data. The cut of long data + is done with the new_length value. + */ + copy_length= well_formed_copy_nchars(field_charset, + (char*) value.ptr(), new_length, + cs, from, length, + length, + &well_formed_error_pos, + &cannot_convert_error_pos, + &from_end_pos); + + Field_blob::store_length(copy_length); + tmp= value.ptr(); + bmove(ptr+packlength,(char*) &tmp,sizeof(char*)); + + if (check_string_copy_error(this, well_formed_error_pos, + cannot_convert_error_pos, from + length)) + return 2; + + if (copy_length < length) { - if (table->in_use->abort_on_warning) - set_warning(MYSQL_ERROR::WARN_LEVEL_ERROR, ER_DATA_TOO_LONG, 1); - else - set_warning(MYSQL_ERROR::WARN_LEVEL_WARN, WARN_DATA_TRUNCATED, 1); + report_data_too_long(this); + return 2; } + return 0; + +oom_error: + /* Fatal OOM error */ + bzero(ptr,Field_blob::pack_length()); + return -1; } diff --git a/sql/sql_string.cc b/sql/sql_string.cc index 7aaca809113..85ff1fddc45 100644 --- a/sql/sql_string.cc +++ b/sql/sql_string.cc @@ -854,6 +854,162 @@ outp: } +/* + copy a string, + with optional character set conversion, + with optional left padding (for binary -> UCS2 conversion) + + SYNOPSIS + well_formed_copy_nhars() + to Store result here + to_length Maxinum length of "to" string + to_cs Character set of "to" string + from Copy from here + from_length Length of from string + from_cs From character set + nchars Copy not more that nchars characters + well_formed_error_pos Return position when "from" is not well formed + or NULL otherwise. + cannot_convert_error_pos Return position where a not convertable + character met, or NULL otherwise. + from_end_pos Return position where scanning of "from" + string stopped. + NOTES + + RETURN + length of bytes copied to 'to' +*/ + + +uint32 +well_formed_copy_nchars(CHARSET_INFO *to_cs, + char *to, uint to_length, + CHARSET_INFO *from_cs, + const char *from, uint from_length, + uint nchars, + const char **well_formed_error_pos, + const char **cannot_convert_error_pos, + const char **from_end_pos) +{ + uint res; + + if ((to_cs == &my_charset_bin) || + (from_cs == &my_charset_bin) || + (to_cs == from_cs) || + my_charset_same(from_cs, to_cs)) + { + if (to_length < to_cs->mbminlen || !nchars) + { + *from_end_pos= from; + *cannot_convert_error_pos= NULL; + *well_formed_error_pos= NULL; + return 0; + } + + if (to_cs == &my_charset_bin) + { + res= min(min(nchars, to_length), from_length); + memmove(to, from, res); + *from_end_pos= from + res; + *well_formed_error_pos= NULL; + *cannot_convert_error_pos= NULL; + } + else + { + int well_formed_error; + uint from_offset; + + if ((from_offset= (from_length % to_cs->mbminlen)) && + (from_cs == &my_charset_bin)) + { + /* + Copying from BINARY to UCS2 needs to prepend zeros sometimes: + INSERT INTO t1 (ucs2_column) VALUES (0x01); + 0x01 -> 0x0001 + */ + uint pad_length= to_cs->mbminlen - from_offset; + bzero(to, pad_length); + memmove(to + pad_length, from, from_offset); + nchars--; + from+= from_offset; + from_length-= from_offset; + to+= to_cs->mbminlen; + to_length-= to_cs->mbminlen; + } + + set_if_smaller(from_length, to_length); + res= to_cs->cset->well_formed_len(to_cs, from, from + from_length, + nchars, &well_formed_error); + memmove(to, from, res); + *from_end_pos= from + res; + *well_formed_error_pos= well_formed_error ? from + res : NULL; + *cannot_convert_error_pos= NULL; + if (from_offset) + res+= to_cs->mbminlen; + } + } + else + { + int cnvres; + my_wc_t wc; + int (*mb_wc)(struct charset_info_st *, my_wc_t *, + const uchar *, const uchar *)= from_cs->cset->mb_wc; + int (*wc_mb)(struct charset_info_st *, my_wc_t, + uchar *s, uchar *e)= to_cs->cset->wc_mb; + const uchar *from_end= (const uchar*) from + from_length; + uchar *to_end= (uchar*) to + to_length; + char *to_start= to; + *well_formed_error_pos= NULL; + *cannot_convert_error_pos= NULL; + + for ( ; nchars; nchars--) + { + const char *from_prev= from; + if ((cnvres= (*mb_wc)(from_cs, &wc, (uchar*) from, from_end)) > 0) + from+= cnvres; + else if (cnvres == MY_CS_ILSEQ) + { + if (!*well_formed_error_pos) + *well_formed_error_pos= from; + from++; + wc= '?'; + } + else if (cnvres > MY_CS_TOOSMALL) + { + /* + A correct multibyte sequence detected + But it doesn't have Unicode mapping. + */ + if (!*cannot_convert_error_pos) + *cannot_convert_error_pos= from; + from+= (-cnvres); + wc= '?'; + } + else + break; // Not enough characters + +outp: + if ((cnvres= (*wc_mb)(to_cs, wc, (uchar*) to, to_end)) > 0) + to+= cnvres; + else if (cnvres == MY_CS_ILUNI && wc != '?') + { + if (!*cannot_convert_error_pos) + *cannot_convert_error_pos= from_prev; + wc= '?'; + goto outp; + } + else + break; + } + *from_end_pos= from; + res= to - to_start; + } + return (uint32) res; +} + + + + void String::print(String *str) { char *st= (char*)Ptr, *end= st+str_length; diff --git a/sql/sql_string.h b/sql/sql_string.h index 0659f684afe..09b8478adf8 100644 --- a/sql/sql_string.h +++ b/sql/sql_string.h @@ -32,6 +32,14 @@ String *copy_if_not_alloced(String *a,String *b,uint32 arg_length); uint32 copy_and_convert(char *to, uint32 to_length, CHARSET_INFO *to_cs, const char *from, uint32 from_length, CHARSET_INFO *from_cs, uint *errors); +uint32 well_formed_copy_nchars(CHARSET_INFO *to_cs, + char *to, uint to_length, + CHARSET_INFO *from_cs, + const char *from, uint from_length, + uint nchars, + const char **well_formed_error_pos, + const char **cannot_convert_error_pos, + const char **from_end_pos); class String { |