summaryrefslogtreecommitdiff
path: root/sql
diff options
context:
space:
mode:
authorunknown <bar@mysql.com/bar.intranet.mysql.r18.ru>2006-10-30 10:14:03 +0400
committerunknown <bar@mysql.com/bar.intranet.mysql.r18.ru>2006-10-30 10:14:03 +0400
commit00c1c19e9410a942ed7b2b49e7dc7916a9d8010f (patch)
tree56d83c3d4412f22521f650d31f0aad70abfa1ade /sql
parent2bfeecca15d4301522b7c38250c1dfe30e039f3b (diff)
downloadmariadb-git-00c1c19e9410a942ed7b2b49e7dc7916a9d8010f.tar.gz
Bug#18908: ERROR 1406 (22001): Data too long for column :: using utf8
Problem: Too confusing error message when cannot convert between string and column character sets on INSERT and UPDATE. Fix: producing a better error message, instead of "Data too long" in such cases Additional changes: Adding "DROP TABLE IF EXISTS" into several tests to be safe against failures in previous tests. mysql-test/include/strict_autoinc.inc: Adding DROP TABLE to be safe against previous tests failure. mysql-test/r/ctype_recoding.result: Fixing test results mysql-test/r/ctype_utf8.result: Fixing test results mysql-test/r/fulltext.result: Fixing test results mysql-test/r/strict_autoinc_1myisam.result: Adding DROP TABLE to be safe against previous tests failure. mysql-test/r/strict_autoinc_2innodb.result: Adding DROP TABLE to be safe against previous tests failure. mysql-test/r/strict_autoinc_3heap.result: Adding DROP TABLE to be safe against previous tests failure. mysql-test/r/strict_autoinc_4bdb.result: Adding DROP TABLE to be safe against previous tests failure. mysql-test/r/strict_autoinc_5ndb.result: Adding DROP TABLE to be safe against previous tests failure. sql/field.cc: - producing better error messages than "DATA TRUNCATED" or "DATA TOO LONG" (in strict mode) in case of "not well formed source" and "cannot convert to field character set" - Performance improvements: copying directly to the target, instead of using an intermediate String. - Moving duplicate code into report_data_too_long() function. sql/sql_string.cc: Adding a new function to convert strings between character sets, but not more than "nchar" characters - a helper function for Field_string::store(), Field_varstring::store() and Field_blob::store(). sql/sql_string.h: Adding new function prototype.
Diffstat (limited to 'sql')
-rw-r--r--sql/field.cc353
-rw-r--r--sql/sql_string.cc156
-rw-r--r--sql/sql_string.h8
3 files changed, 381 insertions, 136 deletions
diff --git a/sql/field.cc b/sql/field.cc
index 4860f6ea3da..50b16e70cf2 100644
--- a/sql/field.cc
+++ b/sql/field.cc
@@ -5799,37 +5799,148 @@ void Field_datetime::sql_type(String &res) const
** A string may be varchar or binary
****************************************************************************/
+/*
+ Report "not well formed" or "cannot convert" error
+ after storing a character string info a field.
+
+ SYNOPSIS
+ check_string_copy_error()
+ field - Field
+ well_formed_error_pos - where not well formed data was first met
+ cannot_convert_error_pos - where a not-convertable character was first met
+ end - end of the string
+
+ NOTES
+ As of version 5.0 both cases return the same error:
+
+ "Invalid string value: 'xxx' for column 't' at row 1"
+
+ Future versions will possibly introduce a new error message:
+
+ "Cannot convert character string: 'xxx' for column 't' at row 1"
+
+ RETURN
+ FALSE - If errors didn't happen
+ TRUE - If an error happened
+*/
+
+static bool
+check_string_copy_error(Field_str *field,
+ const char *well_formed_error_pos,
+ const char *cannot_convert_error_pos,
+ const char *end)
+{
+ const char *pos, *end_orig;
+ char tmp[64], *t;
+
+ if (!(pos= well_formed_error_pos) &&
+ !(pos= cannot_convert_error_pos))
+ return FALSE;
+
+ end_orig= end;
+ set_if_smaller(end, pos + 6);
+
+ for (t= tmp; pos < end; pos++)
+ {
+ if (((unsigned char) *pos) >= 0x20 &&
+ ((unsigned char) *pos) <= 0x7F)
+ {
+ *t++= *pos;
+ }
+ else
+ {
+ *t++= '\\';
+ *t++= 'x';
+ *t++= _dig_vec_upper[((unsigned char) *pos) >> 4];
+ *t++= _dig_vec_upper[((unsigned char) *pos) & 15];
+ }
+ }
+ if (end_orig > end)
+ {
+ *t++= '.';
+ *t++= '.';
+ *t++= '.';
+ }
+ *t= '\0';
+ push_warning_printf(field->table->in_use,
+ field->table->in_use->abort_on_warning ?
+ MYSQL_ERROR::WARN_LEVEL_ERROR :
+ MYSQL_ERROR::WARN_LEVEL_WARN,
+ ER_TRUNCATED_WRONG_VALUE_FOR_FIELD,
+ ER(ER_TRUNCATED_WRONG_VALUE_FOR_FIELD),
+ "string", tmp, field->field_name,
+ (ulong) field->table->in_use->row_count);
+ return TRUE;
+}
+
+
+
+/*
+ Send a truncation warning or a truncation error
+ after storing a too long character string info a field.
+
+ SYNOPSIS
+ report_data_too_long()
+ field - Field
+
+ RETURN
+ N/A
+*/
+
+inline void
+report_data_too_long(Field_str *field)
+{
+ if (field->table->in_use->abort_on_warning)
+ field->set_warning(MYSQL_ERROR::WARN_LEVEL_ERROR, ER_DATA_TOO_LONG, 1);
+ else
+ field->set_warning(MYSQL_ERROR::WARN_LEVEL_WARN, WARN_DATA_TRUNCATED, 1);
+}
+
+
+/*
+ Test if the given string contains important data:
+ not spaces for character string,
+ or any data for binary string.
+
+ SYNOPSIS
+ test_if_important_data()
+ cs Character set
+ str String to test
+ strend String end
+
+ RETURN
+ FALSE - If string does not have important data
+ TRUE - If string has some important data
+*/
+
+static bool
+test_if_important_data(CHARSET_INFO *cs, const char *str, const char *strend)
+{
+ if (cs != &my_charset_bin)
+ str+= cs->cset->scan(cs, str, strend, MY_SEQ_SPACES);
+ return (str < strend);
+}
+
+
/* Copy a string and fill with space */
int Field_string::store(const char *from,uint length,CHARSET_INFO *cs)
{
- int error= 0, well_formed_error;
- uint32 not_used;
- char buff[STRING_BUFFER_USUAL_SIZE];
- String tmpstr(buff,sizeof(buff), &my_charset_bin);
uint copy_length;
+ const char *well_formed_error_pos;
+ const char *cannot_convert_error_pos;
+ const char *from_end_pos;
/* See the comment for Field_long::store(long long) */
DBUG_ASSERT(table->in_use == current_thd);
- /* Convert character set if necessary */
- if (String::needs_conversion(length, cs, field_charset, &not_used))
- {
- uint conv_errors;
- tmpstr.copy(from, length, cs, field_charset, &conv_errors);
- from= tmpstr.ptr();
- length= tmpstr.length();
- if (conv_errors)
- error= 2;
- }
-
- /* Make sure we don't break a multibyte sequence or copy malformed data. */
- copy_length= field_charset->cset->well_formed_len(field_charset,
- from,from+length,
- field_length/
- field_charset->mbmaxlen,
- &well_formed_error);
- memmove(ptr, from, copy_length);
+ copy_length= well_formed_copy_nchars(field_charset,
+ ptr, field_length,
+ cs, from, length,
+ field_length / field_charset->mbmaxlen,
+ &well_formed_error_pos,
+ &cannot_convert_error_pos,
+ &from_end_pos);
/* Append spaces if the string was shorter than the field. */
if (copy_length < field_length)
@@ -5837,32 +5948,23 @@ int Field_string::store(const char *from,uint length,CHARSET_INFO *cs)
field_length-copy_length,
field_charset->pad_char);
+ if (check_string_copy_error(this, well_formed_error_pos,
+ cannot_convert_error_pos, from + length))
+ return 2;
+
/*
Check if we lost any important data (anything in a binary string,
or any non-space in others).
*/
- if ((copy_length < length) && table->in_use->count_cuted_fields)
+ if ((from_end_pos < from + length) && table->in_use->count_cuted_fields)
{
- if (binary())
- error= 2;
- else
+ if (test_if_important_data(field_charset, from_end_pos, from + length))
{
- const char *end=from+length;
- from+= copy_length;
- from+= field_charset->cset->scan(field_charset, from, end,
- MY_SEQ_SPACES);
- if (from != end)
- error= 2;
+ report_data_too_long(this);
+ return 2;
}
}
- if (error)
- {
- if (table->in_use->abort_on_warning)
- set_warning(MYSQL_ERROR::WARN_LEVEL_ERROR, ER_DATA_TOO_LONG, 1);
- else
- set_warning(MYSQL_ERROR::WARN_LEVEL_WARN, WARN_DATA_TRUNCATED, 1);
- }
- return error;
+ return 0;
}
@@ -6195,58 +6297,35 @@ Field *Field_string::new_field(MEM_ROOT *root, struct st_table *new_table,
int Field_varstring::store(const char *from,uint length,CHARSET_INFO *cs)
{
- uint32 not_used, copy_length;
- char buff[STRING_BUFFER_USUAL_SIZE];
- String tmpstr(buff,sizeof(buff), &my_charset_bin);
- int error_code= 0, well_formed_error;
- enum MYSQL_ERROR::enum_warning_level level= MYSQL_ERROR::WARN_LEVEL_WARN;
+ uint copy_length;
+ const char *well_formed_error_pos;
+ const char *cannot_convert_error_pos;
+ const char *from_end_pos;
+
+ copy_length= well_formed_copy_nchars(field_charset,
+ ptr + length_bytes, field_length,
+ cs, from, length,
+ field_length / field_charset->mbmaxlen,
+ &well_formed_error_pos,
+ &cannot_convert_error_pos,
+ &from_end_pos);
- /* Convert character set if necessary */
- if (String::needs_conversion(length, cs, field_charset, &not_used))
- {
- uint conv_errors;
- tmpstr.copy(from, length, cs, field_charset, &conv_errors);
- from= tmpstr.ptr();
- length= tmpstr.length();
- if (conv_errors)
- error_code= WARN_DATA_TRUNCATED;
- }
- /*
- Make sure we don't break a multibyte sequence
- as well as don't copy a malformed data.
- */
- copy_length= field_charset->cset->well_formed_len(field_charset,
- from,from+length,
- field_length/
- field_charset->mbmaxlen,
- &well_formed_error);
- memmove(ptr + length_bytes, from, copy_length);
if (length_bytes == 1)
*ptr= (uchar) copy_length;
else
int2store(ptr, copy_length);
+ if (check_string_copy_error(this, well_formed_error_pos,
+ cannot_convert_error_pos, from + length))
+ return 2;
+
// Check if we lost something other than just trailing spaces
- if ((copy_length < length) && table->in_use->count_cuted_fields &&
- !error_code)
- {
- if (!binary())
- {
- const char *end= from + length;
- from+= copy_length;
- from+= field_charset->cset->scan(field_charset, from, end, MY_SEQ_SPACES);
- /* If we lost only spaces then produce a NOTE, not a WARNING */
- if (from == end)
- level= MYSQL_ERROR::WARN_LEVEL_NOTE;
- }
- error_code= WARN_DATA_TRUNCATED;
- }
- if (error_code)
+ if ((from_end_pos < from + length) && table->in_use->count_cuted_fields)
{
- if (level == MYSQL_ERROR::WARN_LEVEL_WARN &&
- table->in_use->abort_on_warning)
- error_code= ER_DATA_TOO_LONG;
- set_warning(level, error_code, 1);
+ if (test_if_important_data(field_charset, from_end_pos, from + length))
+ report_data_too_long(this);
+ else /* If we lost only spaces then produce a NOTE, not a WARNING */
+ set_warning(MYSQL_ERROR::WARN_LEVEL_NOTE, WARN_DATA_TRUNCATED, 1);
return 2;
}
return 0;
@@ -6828,68 +6907,70 @@ void Field_blob::put_length(char *pos, uint32 length)
int Field_blob::store(const char *from,uint length,CHARSET_INFO *cs)
{
- int error= 0, well_formed_error;
+ uint copy_length, new_length;
+ const char *well_formed_error_pos;
+ const char *cannot_convert_error_pos;
+ const char *from_end_pos, *tmp;
+ char buff[STRING_BUFFER_USUAL_SIZE];
+ String tmpstr(buff,sizeof(buff), &my_charset_bin);
+
if (!length)
{
bzero(ptr,Field_blob::pack_length());
+ return 0;
}
- else
- {
- bool was_conversion;
- char buff[STRING_BUFFER_USUAL_SIZE];
- String tmpstr(buff,sizeof(buff), &my_charset_bin);
- uint copy_length;
- uint32 not_used;
- /* Convert character set if necessary */
- if ((was_conversion= String::needs_conversion(length, cs, field_charset,
- &not_used)))
- {
- uint conv_errors;
- if (tmpstr.copy(from, length, cs, field_charset, &conv_errors))
- {
- /* Fatal OOM error */
- bzero(ptr,Field_blob::pack_length());
- return -1;
- }
- from= tmpstr.ptr();
- length= tmpstr.length();
- if (conv_errors)
- error= 2;
- }
-
- copy_length= max_data_length();
- /*
- copy_length is OK as last argument to well_formed_len as this is never
- used to limit the length of the data. The cut of long data is done with
- the 'min()' call below.
- */
- copy_length= field_charset->cset->well_formed_len(field_charset,
- from,from +
- min(length, copy_length),
- copy_length,
- &well_formed_error);
- if (copy_length < length)
- error= 2;
- Field_blob::store_length(copy_length);
- if (was_conversion || table->copy_blobs || copy_length <= MAX_FIELD_WIDTH)
- { // Must make a copy
- if (from != value.ptr()) // For valgrind
- {
- value.copy(from,copy_length,charset());
- from=value.ptr();
- }
+ if (from == value.ptr())
+ {
+ uint32 dummy_offset;
+ if (!String::needs_conversion(length, cs, field_charset, &dummy_offset))
+ {
+ Field_blob::store_length(length);
+ bmove(ptr+packlength,(char*) &from,sizeof(char*));
+ return 0;
}
- bmove(ptr+packlength,(char*) &from,sizeof(char*));
+ if (tmpstr.copy(from, length, cs))
+ goto oom_error;
+ from= tmpstr.ptr();
}
- if (error)
+
+ new_length= min(max_data_length(), field_charset->mbmaxlen * length);
+ if (value.alloc(new_length))
+ goto oom_error;
+
+ /*
+ "length" is OK as "nchars" argument to well_formed_copy_nchars as this
+ is never used to limit the length of the data. The cut of long data
+ is done with the new_length value.
+ */
+ copy_length= well_formed_copy_nchars(field_charset,
+ (char*) value.ptr(), new_length,
+ cs, from, length,
+ length,
+ &well_formed_error_pos,
+ &cannot_convert_error_pos,
+ &from_end_pos);
+
+ Field_blob::store_length(copy_length);
+ tmp= value.ptr();
+ bmove(ptr+packlength,(char*) &tmp,sizeof(char*));
+
+ if (check_string_copy_error(this, well_formed_error_pos,
+ cannot_convert_error_pos, from + length))
+ return 2;
+
+ if (copy_length < length)
{
- if (table->in_use->abort_on_warning)
- set_warning(MYSQL_ERROR::WARN_LEVEL_ERROR, ER_DATA_TOO_LONG, 1);
- else
- set_warning(MYSQL_ERROR::WARN_LEVEL_WARN, WARN_DATA_TRUNCATED, 1);
+ report_data_too_long(this);
+ return 2;
}
+
return 0;
+
+oom_error:
+ /* Fatal OOM error */
+ bzero(ptr,Field_blob::pack_length());
+ return -1;
}
diff --git a/sql/sql_string.cc b/sql/sql_string.cc
index 7aaca809113..85ff1fddc45 100644
--- a/sql/sql_string.cc
+++ b/sql/sql_string.cc
@@ -854,6 +854,162 @@ outp:
}
+/*
+ copy a string,
+ with optional character set conversion,
+ with optional left padding (for binary -> UCS2 conversion)
+
+ SYNOPSIS
+ well_formed_copy_nhars()
+ to Store result here
+ to_length Maxinum length of "to" string
+ to_cs Character set of "to" string
+ from Copy from here
+ from_length Length of from string
+ from_cs From character set
+ nchars Copy not more that nchars characters
+ well_formed_error_pos Return position when "from" is not well formed
+ or NULL otherwise.
+ cannot_convert_error_pos Return position where a not convertable
+ character met, or NULL otherwise.
+ from_end_pos Return position where scanning of "from"
+ string stopped.
+ NOTES
+
+ RETURN
+ length of bytes copied to 'to'
+*/
+
+
+uint32
+well_formed_copy_nchars(CHARSET_INFO *to_cs,
+ char *to, uint to_length,
+ CHARSET_INFO *from_cs,
+ const char *from, uint from_length,
+ uint nchars,
+ const char **well_formed_error_pos,
+ const char **cannot_convert_error_pos,
+ const char **from_end_pos)
+{
+ uint res;
+
+ if ((to_cs == &my_charset_bin) ||
+ (from_cs == &my_charset_bin) ||
+ (to_cs == from_cs) ||
+ my_charset_same(from_cs, to_cs))
+ {
+ if (to_length < to_cs->mbminlen || !nchars)
+ {
+ *from_end_pos= from;
+ *cannot_convert_error_pos= NULL;
+ *well_formed_error_pos= NULL;
+ return 0;
+ }
+
+ if (to_cs == &my_charset_bin)
+ {
+ res= min(min(nchars, to_length), from_length);
+ memmove(to, from, res);
+ *from_end_pos= from + res;
+ *well_formed_error_pos= NULL;
+ *cannot_convert_error_pos= NULL;
+ }
+ else
+ {
+ int well_formed_error;
+ uint from_offset;
+
+ if ((from_offset= (from_length % to_cs->mbminlen)) &&
+ (from_cs == &my_charset_bin))
+ {
+ /*
+ Copying from BINARY to UCS2 needs to prepend zeros sometimes:
+ INSERT INTO t1 (ucs2_column) VALUES (0x01);
+ 0x01 -> 0x0001
+ */
+ uint pad_length= to_cs->mbminlen - from_offset;
+ bzero(to, pad_length);
+ memmove(to + pad_length, from, from_offset);
+ nchars--;
+ from+= from_offset;
+ from_length-= from_offset;
+ to+= to_cs->mbminlen;
+ to_length-= to_cs->mbminlen;
+ }
+
+ set_if_smaller(from_length, to_length);
+ res= to_cs->cset->well_formed_len(to_cs, from, from + from_length,
+ nchars, &well_formed_error);
+ memmove(to, from, res);
+ *from_end_pos= from + res;
+ *well_formed_error_pos= well_formed_error ? from + res : NULL;
+ *cannot_convert_error_pos= NULL;
+ if (from_offset)
+ res+= to_cs->mbminlen;
+ }
+ }
+ else
+ {
+ int cnvres;
+ my_wc_t wc;
+ int (*mb_wc)(struct charset_info_st *, my_wc_t *,
+ const uchar *, const uchar *)= from_cs->cset->mb_wc;
+ int (*wc_mb)(struct charset_info_st *, my_wc_t,
+ uchar *s, uchar *e)= to_cs->cset->wc_mb;
+ const uchar *from_end= (const uchar*) from + from_length;
+ uchar *to_end= (uchar*) to + to_length;
+ char *to_start= to;
+ *well_formed_error_pos= NULL;
+ *cannot_convert_error_pos= NULL;
+
+ for ( ; nchars; nchars--)
+ {
+ const char *from_prev= from;
+ if ((cnvres= (*mb_wc)(from_cs, &wc, (uchar*) from, from_end)) > 0)
+ from+= cnvres;
+ else if (cnvres == MY_CS_ILSEQ)
+ {
+ if (!*well_formed_error_pos)
+ *well_formed_error_pos= from;
+ from++;
+ wc= '?';
+ }
+ else if (cnvres > MY_CS_TOOSMALL)
+ {
+ /*
+ A correct multibyte sequence detected
+ But it doesn't have Unicode mapping.
+ */
+ if (!*cannot_convert_error_pos)
+ *cannot_convert_error_pos= from;
+ from+= (-cnvres);
+ wc= '?';
+ }
+ else
+ break; // Not enough characters
+
+outp:
+ if ((cnvres= (*wc_mb)(to_cs, wc, (uchar*) to, to_end)) > 0)
+ to+= cnvres;
+ else if (cnvres == MY_CS_ILUNI && wc != '?')
+ {
+ if (!*cannot_convert_error_pos)
+ *cannot_convert_error_pos= from_prev;
+ wc= '?';
+ goto outp;
+ }
+ else
+ break;
+ }
+ *from_end_pos= from;
+ res= to - to_start;
+ }
+ return (uint32) res;
+}
+
+
+
+
void String::print(String *str)
{
char *st= (char*)Ptr, *end= st+str_length;
diff --git a/sql/sql_string.h b/sql/sql_string.h
index 0659f684afe..09b8478adf8 100644
--- a/sql/sql_string.h
+++ b/sql/sql_string.h
@@ -32,6 +32,14 @@ String *copy_if_not_alloced(String *a,String *b,uint32 arg_length);
uint32 copy_and_convert(char *to, uint32 to_length, CHARSET_INFO *to_cs,
const char *from, uint32 from_length,
CHARSET_INFO *from_cs, uint *errors);
+uint32 well_formed_copy_nchars(CHARSET_INFO *to_cs,
+ char *to, uint to_length,
+ CHARSET_INFO *from_cs,
+ const char *from, uint from_length,
+ uint nchars,
+ const char **well_formed_error_pos,
+ const char **cannot_convert_error_pos,
+ const char **from_end_pos);
class String
{