diff options
author | unknown <bar@mysql.com/bar.intranet.mysql.r18.ru> | 2006-10-30 10:14:03 +0400 |
---|---|---|
committer | unknown <bar@mysql.com/bar.intranet.mysql.r18.ru> | 2006-10-30 10:14:03 +0400 |
commit | 00c1c19e9410a942ed7b2b49e7dc7916a9d8010f (patch) | |
tree | 56d83c3d4412f22521f650d31f0aad70abfa1ade /sql/sql_string.cc | |
parent | 2bfeecca15d4301522b7c38250c1dfe30e039f3b (diff) | |
download | mariadb-git-00c1c19e9410a942ed7b2b49e7dc7916a9d8010f.tar.gz |
Bug#18908: ERROR 1406 (22001): Data too long for column :: using utf8
Problem: Too confusing error message when cannot convert
between string and column character sets on INSERT and UPDATE.
Fix: producing a better error message, instead of "Data too long"
in such cases
Additional changes: Adding "DROP TABLE IF EXISTS" into several
tests to be safe against failures in previous tests.
mysql-test/include/strict_autoinc.inc:
Adding DROP TABLE to be safe against previous tests failure.
mysql-test/r/ctype_recoding.result:
Fixing test results
mysql-test/r/ctype_utf8.result:
Fixing test results
mysql-test/r/fulltext.result:
Fixing test results
mysql-test/r/strict_autoinc_1myisam.result:
Adding DROP TABLE to be safe against previous tests failure.
mysql-test/r/strict_autoinc_2innodb.result:
Adding DROP TABLE to be safe against previous tests failure.
mysql-test/r/strict_autoinc_3heap.result:
Adding DROP TABLE to be safe against previous tests failure.
mysql-test/r/strict_autoinc_4bdb.result:
Adding DROP TABLE to be safe against previous tests failure.
mysql-test/r/strict_autoinc_5ndb.result:
Adding DROP TABLE to be safe against previous tests failure.
sql/field.cc:
- producing better error messages than
"DATA TRUNCATED" or "DATA TOO LONG" (in strict mode)
in case of "not well formed source" and
"cannot convert to field character set"
- Performance improvements: copying directly to
the target, instead of using an intermediate
String.
- Moving duplicate code into report_data_too_long() function.
sql/sql_string.cc:
Adding a new function to convert strings between character sets,
but not more than "nchar" characters - a helper function for
Field_string::store(), Field_varstring::store() and Field_blob::store().
sql/sql_string.h:
Adding new function prototype.
Diffstat (limited to 'sql/sql_string.cc')
-rw-r--r-- | sql/sql_string.cc | 156 |
1 files changed, 156 insertions, 0 deletions
diff --git a/sql/sql_string.cc b/sql/sql_string.cc index 7aaca809113..85ff1fddc45 100644 --- a/sql/sql_string.cc +++ b/sql/sql_string.cc @@ -854,6 +854,162 @@ outp: } +/* + copy a string, + with optional character set conversion, + with optional left padding (for binary -> UCS2 conversion) + + SYNOPSIS + well_formed_copy_nhars() + to Store result here + to_length Maxinum length of "to" string + to_cs Character set of "to" string + from Copy from here + from_length Length of from string + from_cs From character set + nchars Copy not more that nchars characters + well_formed_error_pos Return position when "from" is not well formed + or NULL otherwise. + cannot_convert_error_pos Return position where a not convertable + character met, or NULL otherwise. + from_end_pos Return position where scanning of "from" + string stopped. + NOTES + + RETURN + length of bytes copied to 'to' +*/ + + +uint32 +well_formed_copy_nchars(CHARSET_INFO *to_cs, + char *to, uint to_length, + CHARSET_INFO *from_cs, + const char *from, uint from_length, + uint nchars, + const char **well_formed_error_pos, + const char **cannot_convert_error_pos, + const char **from_end_pos) +{ + uint res; + + if ((to_cs == &my_charset_bin) || + (from_cs == &my_charset_bin) || + (to_cs == from_cs) || + my_charset_same(from_cs, to_cs)) + { + if (to_length < to_cs->mbminlen || !nchars) + { + *from_end_pos= from; + *cannot_convert_error_pos= NULL; + *well_formed_error_pos= NULL; + return 0; + } + + if (to_cs == &my_charset_bin) + { + res= min(min(nchars, to_length), from_length); + memmove(to, from, res); + *from_end_pos= from + res; + *well_formed_error_pos= NULL; + *cannot_convert_error_pos= NULL; + } + else + { + int well_formed_error; + uint from_offset; + + if ((from_offset= (from_length % to_cs->mbminlen)) && + (from_cs == &my_charset_bin)) + { + /* + Copying from BINARY to UCS2 needs to prepend zeros sometimes: + INSERT INTO t1 (ucs2_column) VALUES (0x01); + 0x01 -> 0x0001 + */ + uint pad_length= to_cs->mbminlen - from_offset; + bzero(to, pad_length); + memmove(to + pad_length, from, from_offset); + nchars--; + from+= from_offset; + from_length-= from_offset; + to+= to_cs->mbminlen; + to_length-= to_cs->mbminlen; + } + + set_if_smaller(from_length, to_length); + res= to_cs->cset->well_formed_len(to_cs, from, from + from_length, + nchars, &well_formed_error); + memmove(to, from, res); + *from_end_pos= from + res; + *well_formed_error_pos= well_formed_error ? from + res : NULL; + *cannot_convert_error_pos= NULL; + if (from_offset) + res+= to_cs->mbminlen; + } + } + else + { + int cnvres; + my_wc_t wc; + int (*mb_wc)(struct charset_info_st *, my_wc_t *, + const uchar *, const uchar *)= from_cs->cset->mb_wc; + int (*wc_mb)(struct charset_info_st *, my_wc_t, + uchar *s, uchar *e)= to_cs->cset->wc_mb; + const uchar *from_end= (const uchar*) from + from_length; + uchar *to_end= (uchar*) to + to_length; + char *to_start= to; + *well_formed_error_pos= NULL; + *cannot_convert_error_pos= NULL; + + for ( ; nchars; nchars--) + { + const char *from_prev= from; + if ((cnvres= (*mb_wc)(from_cs, &wc, (uchar*) from, from_end)) > 0) + from+= cnvres; + else if (cnvres == MY_CS_ILSEQ) + { + if (!*well_formed_error_pos) + *well_formed_error_pos= from; + from++; + wc= '?'; + } + else if (cnvres > MY_CS_TOOSMALL) + { + /* + A correct multibyte sequence detected + But it doesn't have Unicode mapping. + */ + if (!*cannot_convert_error_pos) + *cannot_convert_error_pos= from; + from+= (-cnvres); + wc= '?'; + } + else + break; // Not enough characters + +outp: + if ((cnvres= (*wc_mb)(to_cs, wc, (uchar*) to, to_end)) > 0) + to+= cnvres; + else if (cnvres == MY_CS_ILUNI && wc != '?') + { + if (!*cannot_convert_error_pos) + *cannot_convert_error_pos= from_prev; + wc= '?'; + goto outp; + } + else + break; + } + *from_end_pos= from; + res= to - to_start; + } + return (uint32) res; +} + + + + void String::print(String *str) { char *st= (char*)Ptr, *end= st+str_length; |