diff options
-rw-r--r-- | include/m_ctype.h | 4 | ||||
-rw-r--r-- | mysql-test/r/ctype_mb.result | 2 | ||||
-rw-r--r-- | mysql-test/r/ctype_utf8.result | 10 | ||||
-rw-r--r-- | mysql-test/t/ctype_utf8.test | 7 | ||||
-rw-r--r-- | sql/field.cc | 48 | ||||
-rw-r--r-- | sql/item_strfunc.h | 3 | ||||
-rw-r--r-- | sql/item_timefunc.cc | 5 | ||||
-rw-r--r-- | strings/ctype-big5.c | 1 | ||||
-rw-r--r-- | strings/ctype-bin.c | 1 | ||||
-rw-r--r-- | strings/ctype-euc_kr.c | 1 | ||||
-rw-r--r-- | strings/ctype-gb2312.c | 1 | ||||
-rw-r--r-- | strings/ctype-gbk.c | 1 | ||||
-rw-r--r-- | strings/ctype-latin1.c | 1 | ||||
-rw-r--r-- | strings/ctype-mb.c | 16 | ||||
-rw-r--r-- | strings/ctype-simple.c | 10 | ||||
-rw-r--r-- | strings/ctype-sjis.c | 1 | ||||
-rw-r--r-- | strings/ctype-tis620.c | 7 | ||||
-rw-r--r-- | strings/ctype-ucs2.c | 12 | ||||
-rw-r--r-- | strings/ctype-ujis.c | 1 | ||||
-rw-r--r-- | strings/ctype-utf8.c | 1 |
20 files changed, 105 insertions, 28 deletions
diff --git a/include/m_ctype.h b/include/m_ctype.h index 4a9415f43f9..88c3418fc0d 100644 --- a/include/m_ctype.h +++ b/include/m_ctype.h @@ -144,6 +144,8 @@ typedef struct my_charset_handler_st int (*mbcharlen)(struct charset_info_st *, uint); uint (*numchars)(struct charset_info_st *, const char *b, const char *e); uint (*charpos)(struct charset_info_st *, const char *b, const char *e, uint pos); + uint (*wellformedlen)(struct charset_info_st *, + const char *b,const char *e, uint nchars); uint (*lengthsp)(struct charset_info_st *, const char *ptr, uint length); /* Unicode convertion */ @@ -311,6 +313,7 @@ int my_wildcmp_8bit(CHARSET_INFO *, uint my_numchars_8bit(CHARSET_INFO *, const char *b, const char *e); uint my_charpos_8bit(CHARSET_INFO *, const char *b, const char *e, uint pos); +uint my_wellformedlen_8bit(CHARSET_INFO *, const char *b, const char *e, uint pos); int my_mbcharlen_8bit(CHARSET_INFO *, uint c); @@ -327,6 +330,7 @@ int my_wildcmp_mb(CHARSET_INFO *, int escape, int w_one, int w_many); uint my_numchars_mb(CHARSET_INFO *, const char *b, const char *e); uint my_charpos_mb(CHARSET_INFO *, const char *b, const char *e, uint pos); +uint my_wellformedlen_mb(CHARSET_INFO *, const char *b, const char *e, uint pos); uint my_instr_mb(struct charset_info_st *, const char *b, uint b_length, const char *s, uint s_length, diff --git a/mysql-test/r/ctype_mb.result b/mysql-test/r/ctype_mb.result index 288033e7530..edccb047c85 100644 --- a/mysql-test/r/ctype_mb.result +++ b/mysql-test/r/ctype_mb.result @@ -22,7 +22,7 @@ Warning 1264 Data truncated for column 'c2' at row 1 Warning 1264 Data truncated for column 'c3' at row 1 SELECT * FROM t1; c1 c2 c3 -aaaabbbbcccc aaaabbbbcccc aaaabbbbcccc +aaaa aaaa aaaa DROP TABLE t1; CREATE TABLE t1 (a CHAR(4) CHARACTER SET utf8, KEY key_a(a(3))); SHOW CREATE TABLE t1; diff --git a/mysql-test/r/ctype_utf8.result b/mysql-test/r/ctype_utf8.result index 7c05b1ea446..31f26c421b6 100644 --- a/mysql-test/r/ctype_utf8.result +++ b/mysql-test/r/ctype_utf8.result @@ -73,9 +73,17 @@ create table t1 select date_format("2004-01-19 10:10:10", "%Y-%m-%d"); show create table t1; Table Create Table t1 CREATE TABLE `t1` ( - `date_format("2004-01-19 10:10:10", "%Y-%m-%d")` char(4) character set utf8 default NULL + `date_format("2004-01-19 10:10:10", "%Y-%m-%d")` char(10) character set utf8 default NULL ) ENGINE=MyISAM DEFAULT CHARSET=latin1 select * from t1; date_format("2004-01-19 10:10:10", "%Y-%m-%d") 2004-01-19 drop table t1; +set names koi8r; +create table t1 (s1 char(1) character set utf8); +insert into t1 values (_koi8r'ÁÂ'); +Warnings: +Warning 1264 Data truncated for column 's1' at row 1 +select s1,hex(s1),char_length(s1),octet_length(s1) from t1; +s1 hex(s1) char_length(s1) octet_length(s1) +Á D0B0 1 2 diff --git a/mysql-test/t/ctype_utf8.test b/mysql-test/t/ctype_utf8.test index 5e9324dd68f..c2ea1ed20a4 100644 --- a/mysql-test/t/ctype_utf8.test +++ b/mysql-test/t/ctype_utf8.test @@ -53,3 +53,10 @@ show create table t1; select * from t1; drop table t1; +# +# Bug #2366 Wrong utf8 behaviour when data is trancated +# +set names koi8r; +create table t1 (s1 char(1) character set utf8); +insert into t1 values (_koi8r'ÁÂ'); +select s1,hex(s1),char_length(s1),octet_length(s1) from t1; diff --git a/sql/field.cc b/sql/field.cc index 9f25b770ab0..57b77693275 100644 --- a/sql/field.cc +++ b/sql/field.cc @@ -4060,6 +4060,8 @@ int Field_string::store(const char *from,uint length,CHARSET_INFO *cs) int error= 0; char buff[80]; String tmpstr(buff,sizeof(buff), &my_charset_bin); + uint copy_length; + /* Convert character set if nesessary */ if (String::needs_conversion(from, length, cs, field_charset)) { @@ -4067,27 +4069,31 @@ int Field_string::store(const char *from,uint length,CHARSET_INFO *cs) from= tmpstr.ptr(); length= tmpstr.length(); } - if (length <= field_length) - { - memcpy(ptr,from,length); - if (length < field_length) - field_charset->cset->fill(field_charset,ptr+length,field_length-length, - ' '); - } - else - { - memcpy(ptr,from,field_length); - if (current_thd->count_cuted_fields) - { // Check if we loosed some info - const char *end=from+length; - from+= field_length; - from+= field_charset->cset->scan(field_charset, from, end, - MY_SEQ_SPACES); - if (from != end) - { - set_warning(MYSQL_ERROR::WARN_LEVEL_WARN, ER_WARN_DATA_TRUNCATED); - error=1; - } + + /* + Make sure we don't break a multybite sequence + as well as don't copy a malformed data. + */ + copy_length= field_charset->cset->wellformedlen(field_charset, + from,from+length, + field_length/ + field_charset->mbmaxlen); + memcpy(ptr,from,copy_length); + if (copy_length < field_length) // Append spaces if shorter + field_charset->cset->fill(field_charset,ptr+copy_length, + field_length-copy_length,' '); + + + if (current_thd->count_cuted_fields && (copy_length < length)) + { // Check if we loosed some info + const char *end=from+length; + from+= copy_length; + from+= field_charset->cset->scan(field_charset, from, end, + MY_SEQ_SPACES); + if (from != end) + { + set_warning(MYSQL_ERROR::WARN_LEVEL_WARN, ER_WARN_DATA_TRUNCATED); + error=1; } } return error; diff --git a/sql/item_strfunc.h b/sql/item_strfunc.h index fd0afb19726..465300e721e 100644 --- a/sql/item_strfunc.h +++ b/sql/item_strfunc.h @@ -518,7 +518,8 @@ public: void fix_length_and_dec() { collation.set(default_charset()); - decimals=0; max_length=args[0]->max_length*2; + decimals=0; + max_length=args[0]->max_length*2*collation.collation->mbmaxlen; } }; diff --git a/sql/item_timefunc.cc b/sql/item_timefunc.cc index ed5b9ecc0db..062e7fc7b44 100644 --- a/sql/item_timefunc.cc +++ b/sql/item_timefunc.cc @@ -1286,12 +1286,13 @@ void Item_func_date_format::fix_length_and_dec() if (args[1]->type() == STRING_ITEM) { // Optimize the normal case fixed_length=1; - max_length=format_length(((Item_string*) args[1])->const_string()); + max_length= format_length(((Item_string*) args[1])->const_string())* + collation.collation->mbmaxlen; } else { fixed_length=0; - max_length=args[1]->max_length*10; + max_length=args[1]->max_length*10*collation.collation->mbmaxlen; set_if_smaller(max_length,MAX_BLOB_WIDTH); } maybe_null=1; // If wrong date diff --git a/strings/ctype-big5.c b/strings/ctype-big5.c index 8d4081fb2aa..574156a99ed 100644 --- a/strings/ctype-big5.c +++ b/strings/ctype-big5.c @@ -6245,6 +6245,7 @@ static MY_CHARSET_HANDLER my_charset_big5_handler= mbcharlen_big5, my_numchars_mb, my_charpos_mb, + my_wellformedlen_mb, my_lengthsp_8bit, my_mb_wc_big5, /* mb_wc */ my_wc_mb_big5, /* wc_mb */ diff --git a/strings/ctype-bin.c b/strings/ctype-bin.c index 43575bbc277..fc22938d46e 100644 --- a/strings/ctype-bin.c +++ b/strings/ctype-bin.c @@ -347,6 +347,7 @@ static MY_CHARSET_HANDLER my_charset_handler= my_mbcharlen_8bit, /* mbcharlen */ my_numchars_8bit, my_charpos_8bit, + my_wellformedlen_8bit, my_lengthsp_8bit, my_mb_wc_bin, my_wc_mb_bin, diff --git a/strings/ctype-euc_kr.c b/strings/ctype-euc_kr.c index 366a5d500ed..278e8529e83 100644 --- a/strings/ctype-euc_kr.c +++ b/strings/ctype-euc_kr.c @@ -8653,6 +8653,7 @@ static MY_CHARSET_HANDLER my_charset_handler= mbcharlen_euc_kr, my_numchars_mb, my_charpos_mb, + my_wellformedlen_mb, my_lengthsp_8bit, my_mb_wc_euc_kr, /* mb_wc */ my_wc_mb_euc_kr, /* wc_mb */ diff --git a/strings/ctype-gb2312.c b/strings/ctype-gb2312.c index 44a58b2b906..722f00f0f7a 100644 --- a/strings/ctype-gb2312.c +++ b/strings/ctype-gb2312.c @@ -5704,6 +5704,7 @@ static MY_CHARSET_HANDLER my_charset_handler= mbcharlen_gb2312, my_numchars_mb, my_charpos_mb, + my_wellformedlen_mb, my_lengthsp_8bit, my_mb_wc_gb2312, /* mb_wc */ my_wc_mb_gb2312, /* wc_mb */ diff --git a/strings/ctype-gbk.c b/strings/ctype-gbk.c index 5475c3bd363..9e71a18e531 100644 --- a/strings/ctype-gbk.c +++ b/strings/ctype-gbk.c @@ -9900,6 +9900,7 @@ static MY_CHARSET_HANDLER my_charset_handler= mbcharlen_gbk, my_numchars_mb, my_charpos_mb, + my_wellformedlen_mb, my_lengthsp_8bit, my_mb_wc_gbk, my_wc_mb_gbk, diff --git a/strings/ctype-latin1.c b/strings/ctype-latin1.c index c00ded21575..933737b5f61 100644 --- a/strings/ctype-latin1.c +++ b/strings/ctype-latin1.c @@ -180,6 +180,7 @@ static MY_CHARSET_HANDLER my_charset_handler= my_mbcharlen_8bit, my_numchars_8bit, my_charpos_8bit, + my_wellformedlen_8bit, my_lengthsp_8bit, my_mb_wc_latin1, my_wc_mb_latin1, diff --git a/strings/ctype-mb.c b/strings/ctype-mb.c index b5e8c4598a0..377bf311d38 100644 --- a/strings/ctype-mb.c +++ b/strings/ctype-mb.c @@ -274,6 +274,22 @@ uint my_charpos_mb(CHARSET_INFO *cs __attribute__((unused)), return pos ? e+2-b0 : b-b0; } +uint my_wellformedlen_mb(CHARSET_INFO *cs __attribute__((unused)), + const char *b, const char *e, uint pos) +{ + uint mblen; + const char *b0=b; + + while (pos && b<e) + { + b+= (mblen= my_ismbchar(cs,b,e)) ? mblen : 1; + pos--; + } + return b-b0; +} + + + uint my_instr_mb(CHARSET_INFO *cs, const char *b, uint b_length, const char *s, uint s_length, diff --git a/strings/ctype-simple.c b/strings/ctype-simple.c index 62eb1418970..fb2dee99ba5 100644 --- a/strings/ctype-simple.c +++ b/strings/ctype-simple.c @@ -1020,6 +1020,15 @@ uint my_charpos_8bit(CHARSET_INFO *cs __attribute__((unused)), return pos; } +uint my_wellformedlen_8bit(CHARSET_INFO *cs __attribute__((unused)), + const char *b, + const char *e, + uint nchars) +{ + uint nbytes= e-b; + return nbytes < nchars ? nbytes : nchars; +} + uint my_lengthsp_8bit(CHARSET_INFO *cs __attribute__((unused)), const char *ptr, uint length) { @@ -1096,6 +1105,7 @@ MY_CHARSET_HANDLER my_charset_8bit_handler= my_mbcharlen_8bit, /* mbcharlen */ my_numchars_8bit, my_charpos_8bit, + my_wellformedlen_8bit, my_lengthsp_8bit, my_mb_wc_8bit, my_wc_mb_8bit, diff --git a/strings/ctype-sjis.c b/strings/ctype-sjis.c index 42f32fe739b..feff0fff227 100644 --- a/strings/ctype-sjis.c +++ b/strings/ctype-sjis.c @@ -4489,6 +4489,7 @@ static MY_CHARSET_HANDLER my_charset_handler= mbcharlen_sjis, my_numchars_mb, my_charpos_mb, + my_wellformedlen_mb, my_lengthsp_8bit, my_mb_wc_sjis, /* mb_wc */ my_wc_mb_sjis, /* wc_mb */ diff --git a/strings/ctype-tis620.c b/strings/ctype-tis620.c index 09552a0dc23..d577b964405 100644 --- a/strings/ctype-tis620.c +++ b/strings/ctype-tis620.c @@ -874,7 +874,8 @@ NULL,NULL,NULL,NULL,NULL,NULL,NULL,plFF static -int my_mb_wc_tis620(CHARSET_INFO *cs,my_wc_t *wc, +int my_mb_wc_tis620(CHARSET_INFO *cs __attribute__((unused)), + my_wc_t *wc, const unsigned char *str, const unsigned char *end __attribute__((unused))) { @@ -886,7 +887,8 @@ int my_mb_wc_tis620(CHARSET_INFO *cs,my_wc_t *wc, } static -int my_wc_mb_tis620(CHARSET_INFO *cs,my_wc_t wc, +int my_wc_mb_tis620(CHARSET_INFO *cs __attribute__((unused)), + my_wc_t wc, unsigned char *str, unsigned char *end __attribute__((unused))) { @@ -919,6 +921,7 @@ static MY_CHARSET_HANDLER my_charset_handler= my_mbcharlen_8bit, /* mbcharlen */ my_numchars_8bit, my_charpos_8bit, + my_wellformedlen_8bit, my_lengthsp_8bit, my_mb_wc_tis620, /* mb_wc */ my_wc_mb_tis620, /* wc_mb */ diff --git a/strings/ctype-ucs2.c b/strings/ctype-ucs2.c index 775501027d0..bb74e0cf56b 100644 --- a/strings/ctype-ucs2.c +++ b/strings/ctype-ucs2.c @@ -1002,6 +1002,17 @@ uint my_charpos_ucs2(CHARSET_INFO *cs __attribute__((unused)), } static +uint my_wellformedlen_ucs2(CHARSET_INFO *cs __attribute__((unused)), + const char *b, + const char *e, + uint nchars) +{ + uint nbytes= (e-b) & ~ (uint)1; + nchars*= 2; + return nbytes < nchars ? nbytes : nchars; +} + +static void my_fill_ucs2(CHARSET_INFO *cs __attribute__((unused)), char *s, uint l, int fill) { @@ -1287,6 +1298,7 @@ static MY_CHARSET_HANDLER my_charset_ucs2_handler= my_mbcharlen_ucs2, /* mbcharlen */ my_numchars_ucs2, my_charpos_ucs2, + my_wellformedlen_ucs2, my_lengthsp_ucs2, my_ucs2_uni, /* mb_wc */ my_uni_ucs2, /* wc_mb */ diff --git a/strings/ctype-ujis.c b/strings/ctype-ujis.c index f6928e9426e..f27ddcf3e30 100644 --- a/strings/ctype-ujis.c +++ b/strings/ctype-ujis.c @@ -8444,6 +8444,7 @@ static MY_CHARSET_HANDLER my_charset_handler= mbcharlen_ujis, my_numchars_mb, my_charpos_mb, + my_wellformedlen_mb, my_lengthsp_8bit, my_mb_wc_euc_jp, /* mb_wc */ my_wc_mb_euc_jp, /* wc_mb */ diff --git a/strings/ctype-utf8.c b/strings/ctype-utf8.c index 8004fba75b7..ef9719bf040 100644 --- a/strings/ctype-utf8.c +++ b/strings/ctype-utf8.c @@ -1969,6 +1969,7 @@ static MY_CHARSET_HANDLER my_charset_handler= my_mbcharlen_utf8, my_numchars_mb, my_charpos_mb, + my_wellformedlen_mb, my_lengthsp_8bit, my_utf8_uni, my_uni_utf8, |