summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--include/m_ctype.h4
-rw-r--r--mysql-test/r/ctype_mb.result2
-rw-r--r--mysql-test/r/ctype_utf8.result10
-rw-r--r--mysql-test/t/ctype_utf8.test7
-rw-r--r--sql/field.cc48
-rw-r--r--sql/item_strfunc.h3
-rw-r--r--sql/item_timefunc.cc5
-rw-r--r--strings/ctype-big5.c1
-rw-r--r--strings/ctype-bin.c1
-rw-r--r--strings/ctype-euc_kr.c1
-rw-r--r--strings/ctype-gb2312.c1
-rw-r--r--strings/ctype-gbk.c1
-rw-r--r--strings/ctype-latin1.c1
-rw-r--r--strings/ctype-mb.c16
-rw-r--r--strings/ctype-simple.c10
-rw-r--r--strings/ctype-sjis.c1
-rw-r--r--strings/ctype-tis620.c7
-rw-r--r--strings/ctype-ucs2.c12
-rw-r--r--strings/ctype-ujis.c1
-rw-r--r--strings/ctype-utf8.c1
20 files changed, 105 insertions, 28 deletions
diff --git a/include/m_ctype.h b/include/m_ctype.h
index 4a9415f43f9..88c3418fc0d 100644
--- a/include/m_ctype.h
+++ b/include/m_ctype.h
@@ -144,6 +144,8 @@ typedef struct my_charset_handler_st
int (*mbcharlen)(struct charset_info_st *, uint);
uint (*numchars)(struct charset_info_st *, const char *b, const char *e);
uint (*charpos)(struct charset_info_st *, const char *b, const char *e, uint pos);
+ uint (*wellformedlen)(struct charset_info_st *,
+ const char *b,const char *e, uint nchars);
uint (*lengthsp)(struct charset_info_st *, const char *ptr, uint length);
/* Unicode convertion */
@@ -311,6 +313,7 @@ int my_wildcmp_8bit(CHARSET_INFO *,
uint my_numchars_8bit(CHARSET_INFO *, const char *b, const char *e);
uint my_charpos_8bit(CHARSET_INFO *, const char *b, const char *e, uint pos);
+uint my_wellformedlen_8bit(CHARSET_INFO *, const char *b, const char *e, uint pos);
int my_mbcharlen_8bit(CHARSET_INFO *, uint c);
@@ -327,6 +330,7 @@ int my_wildcmp_mb(CHARSET_INFO *,
int escape, int w_one, int w_many);
uint my_numchars_mb(CHARSET_INFO *, const char *b, const char *e);
uint my_charpos_mb(CHARSET_INFO *, const char *b, const char *e, uint pos);
+uint my_wellformedlen_mb(CHARSET_INFO *, const char *b, const char *e, uint pos);
uint my_instr_mb(struct charset_info_st *,
const char *b, uint b_length,
const char *s, uint s_length,
diff --git a/mysql-test/r/ctype_mb.result b/mysql-test/r/ctype_mb.result
index 288033e7530..edccb047c85 100644
--- a/mysql-test/r/ctype_mb.result
+++ b/mysql-test/r/ctype_mb.result
@@ -22,7 +22,7 @@ Warning 1264 Data truncated for column 'c2' at row 1
Warning 1264 Data truncated for column 'c3' at row 1
SELECT * FROM t1;
c1 c2 c3
-aaaabbbbcccc aaaabbbbcccc aaaabbbbcccc
+aaaa aaaa aaaa
DROP TABLE t1;
CREATE TABLE t1 (a CHAR(4) CHARACTER SET utf8, KEY key_a(a(3)));
SHOW CREATE TABLE t1;
diff --git a/mysql-test/r/ctype_utf8.result b/mysql-test/r/ctype_utf8.result
index 7c05b1ea446..31f26c421b6 100644
--- a/mysql-test/r/ctype_utf8.result
+++ b/mysql-test/r/ctype_utf8.result
@@ -73,9 +73,17 @@ create table t1 select date_format("2004-01-19 10:10:10", "%Y-%m-%d");
show create table t1;
Table Create Table
t1 CREATE TABLE `t1` (
- `date_format("2004-01-19 10:10:10", "%Y-%m-%d")` char(4) character set utf8 default NULL
+ `date_format("2004-01-19 10:10:10", "%Y-%m-%d")` char(10) character set utf8 default NULL
) ENGINE=MyISAM DEFAULT CHARSET=latin1
select * from t1;
date_format("2004-01-19 10:10:10", "%Y-%m-%d")
2004-01-19
drop table t1;
+set names koi8r;
+create table t1 (s1 char(1) character set utf8);
+insert into t1 values (_koi8r'ÁÂ');
+Warnings:
+Warning 1264 Data truncated for column 's1' at row 1
+select s1,hex(s1),char_length(s1),octet_length(s1) from t1;
+s1 hex(s1) char_length(s1) octet_length(s1)
+Á D0B0 1 2
diff --git a/mysql-test/t/ctype_utf8.test b/mysql-test/t/ctype_utf8.test
index 5e9324dd68f..c2ea1ed20a4 100644
--- a/mysql-test/t/ctype_utf8.test
+++ b/mysql-test/t/ctype_utf8.test
@@ -53,3 +53,10 @@ show create table t1;
select * from t1;
drop table t1;
+#
+# Bug #2366 Wrong utf8 behaviour when data is trancated
+#
+set names koi8r;
+create table t1 (s1 char(1) character set utf8);
+insert into t1 values (_koi8r'ÁÂ');
+select s1,hex(s1),char_length(s1),octet_length(s1) from t1;
diff --git a/sql/field.cc b/sql/field.cc
index 9f25b770ab0..57b77693275 100644
--- a/sql/field.cc
+++ b/sql/field.cc
@@ -4060,6 +4060,8 @@ int Field_string::store(const char *from,uint length,CHARSET_INFO *cs)
int error= 0;
char buff[80];
String tmpstr(buff,sizeof(buff), &my_charset_bin);
+ uint copy_length;
+
/* Convert character set if nesessary */
if (String::needs_conversion(from, length, cs, field_charset))
{
@@ -4067,27 +4069,31 @@ int Field_string::store(const char *from,uint length,CHARSET_INFO *cs)
from= tmpstr.ptr();
length= tmpstr.length();
}
- if (length <= field_length)
- {
- memcpy(ptr,from,length);
- if (length < field_length)
- field_charset->cset->fill(field_charset,ptr+length,field_length-length,
- ' ');
- }
- else
- {
- memcpy(ptr,from,field_length);
- if (current_thd->count_cuted_fields)
- { // Check if we loosed some info
- const char *end=from+length;
- from+= field_length;
- from+= field_charset->cset->scan(field_charset, from, end,
- MY_SEQ_SPACES);
- if (from != end)
- {
- set_warning(MYSQL_ERROR::WARN_LEVEL_WARN, ER_WARN_DATA_TRUNCATED);
- error=1;
- }
+
+ /*
+ Make sure we don't break a multybite sequence
+ as well as don't copy a malformed data.
+ */
+ copy_length= field_charset->cset->wellformedlen(field_charset,
+ from,from+length,
+ field_length/
+ field_charset->mbmaxlen);
+ memcpy(ptr,from,copy_length);
+ if (copy_length < field_length) // Append spaces if shorter
+ field_charset->cset->fill(field_charset,ptr+copy_length,
+ field_length-copy_length,' ');
+
+
+ if (current_thd->count_cuted_fields && (copy_length < length))
+ { // Check if we loosed some info
+ const char *end=from+length;
+ from+= copy_length;
+ from+= field_charset->cset->scan(field_charset, from, end,
+ MY_SEQ_SPACES);
+ if (from != end)
+ {
+ set_warning(MYSQL_ERROR::WARN_LEVEL_WARN, ER_WARN_DATA_TRUNCATED);
+ error=1;
}
}
return error;
diff --git a/sql/item_strfunc.h b/sql/item_strfunc.h
index fd0afb19726..465300e721e 100644
--- a/sql/item_strfunc.h
+++ b/sql/item_strfunc.h
@@ -518,7 +518,8 @@ public:
void fix_length_and_dec()
{
collation.set(default_charset());
- decimals=0; max_length=args[0]->max_length*2;
+ decimals=0;
+ max_length=args[0]->max_length*2*collation.collation->mbmaxlen;
}
};
diff --git a/sql/item_timefunc.cc b/sql/item_timefunc.cc
index ed5b9ecc0db..062e7fc7b44 100644
--- a/sql/item_timefunc.cc
+++ b/sql/item_timefunc.cc
@@ -1286,12 +1286,13 @@ void Item_func_date_format::fix_length_and_dec()
if (args[1]->type() == STRING_ITEM)
{ // Optimize the normal case
fixed_length=1;
- max_length=format_length(((Item_string*) args[1])->const_string());
+ max_length= format_length(((Item_string*) args[1])->const_string())*
+ collation.collation->mbmaxlen;
}
else
{
fixed_length=0;
- max_length=args[1]->max_length*10;
+ max_length=args[1]->max_length*10*collation.collation->mbmaxlen;
set_if_smaller(max_length,MAX_BLOB_WIDTH);
}
maybe_null=1; // If wrong date
diff --git a/strings/ctype-big5.c b/strings/ctype-big5.c
index 8d4081fb2aa..574156a99ed 100644
--- a/strings/ctype-big5.c
+++ b/strings/ctype-big5.c
@@ -6245,6 +6245,7 @@ static MY_CHARSET_HANDLER my_charset_big5_handler=
mbcharlen_big5,
my_numchars_mb,
my_charpos_mb,
+ my_wellformedlen_mb,
my_lengthsp_8bit,
my_mb_wc_big5, /* mb_wc */
my_wc_mb_big5, /* wc_mb */
diff --git a/strings/ctype-bin.c b/strings/ctype-bin.c
index 43575bbc277..fc22938d46e 100644
--- a/strings/ctype-bin.c
+++ b/strings/ctype-bin.c
@@ -347,6 +347,7 @@ static MY_CHARSET_HANDLER my_charset_handler=
my_mbcharlen_8bit, /* mbcharlen */
my_numchars_8bit,
my_charpos_8bit,
+ my_wellformedlen_8bit,
my_lengthsp_8bit,
my_mb_wc_bin,
my_wc_mb_bin,
diff --git a/strings/ctype-euc_kr.c b/strings/ctype-euc_kr.c
index 366a5d500ed..278e8529e83 100644
--- a/strings/ctype-euc_kr.c
+++ b/strings/ctype-euc_kr.c
@@ -8653,6 +8653,7 @@ static MY_CHARSET_HANDLER my_charset_handler=
mbcharlen_euc_kr,
my_numchars_mb,
my_charpos_mb,
+ my_wellformedlen_mb,
my_lengthsp_8bit,
my_mb_wc_euc_kr, /* mb_wc */
my_wc_mb_euc_kr, /* wc_mb */
diff --git a/strings/ctype-gb2312.c b/strings/ctype-gb2312.c
index 44a58b2b906..722f00f0f7a 100644
--- a/strings/ctype-gb2312.c
+++ b/strings/ctype-gb2312.c
@@ -5704,6 +5704,7 @@ static MY_CHARSET_HANDLER my_charset_handler=
mbcharlen_gb2312,
my_numchars_mb,
my_charpos_mb,
+ my_wellformedlen_mb,
my_lengthsp_8bit,
my_mb_wc_gb2312, /* mb_wc */
my_wc_mb_gb2312, /* wc_mb */
diff --git a/strings/ctype-gbk.c b/strings/ctype-gbk.c
index 5475c3bd363..9e71a18e531 100644
--- a/strings/ctype-gbk.c
+++ b/strings/ctype-gbk.c
@@ -9900,6 +9900,7 @@ static MY_CHARSET_HANDLER my_charset_handler=
mbcharlen_gbk,
my_numchars_mb,
my_charpos_mb,
+ my_wellformedlen_mb,
my_lengthsp_8bit,
my_mb_wc_gbk,
my_wc_mb_gbk,
diff --git a/strings/ctype-latin1.c b/strings/ctype-latin1.c
index c00ded21575..933737b5f61 100644
--- a/strings/ctype-latin1.c
+++ b/strings/ctype-latin1.c
@@ -180,6 +180,7 @@ static MY_CHARSET_HANDLER my_charset_handler=
my_mbcharlen_8bit,
my_numchars_8bit,
my_charpos_8bit,
+ my_wellformedlen_8bit,
my_lengthsp_8bit,
my_mb_wc_latin1,
my_wc_mb_latin1,
diff --git a/strings/ctype-mb.c b/strings/ctype-mb.c
index b5e8c4598a0..377bf311d38 100644
--- a/strings/ctype-mb.c
+++ b/strings/ctype-mb.c
@@ -274,6 +274,22 @@ uint my_charpos_mb(CHARSET_INFO *cs __attribute__((unused)),
return pos ? e+2-b0 : b-b0;
}
+uint my_wellformedlen_mb(CHARSET_INFO *cs __attribute__((unused)),
+ const char *b, const char *e, uint pos)
+{
+ uint mblen;
+ const char *b0=b;
+
+ while (pos && b<e)
+ {
+ b+= (mblen= my_ismbchar(cs,b,e)) ? mblen : 1;
+ pos--;
+ }
+ return b-b0;
+}
+
+
+
uint my_instr_mb(CHARSET_INFO *cs,
const char *b, uint b_length,
const char *s, uint s_length,
diff --git a/strings/ctype-simple.c b/strings/ctype-simple.c
index 62eb1418970..fb2dee99ba5 100644
--- a/strings/ctype-simple.c
+++ b/strings/ctype-simple.c
@@ -1020,6 +1020,15 @@ uint my_charpos_8bit(CHARSET_INFO *cs __attribute__((unused)),
return pos;
}
+uint my_wellformedlen_8bit(CHARSET_INFO *cs __attribute__((unused)),
+ const char *b,
+ const char *e,
+ uint nchars)
+{
+ uint nbytes= e-b;
+ return nbytes < nchars ? nbytes : nchars;
+}
+
uint my_lengthsp_8bit(CHARSET_INFO *cs __attribute__((unused)),
const char *ptr, uint length)
{
@@ -1096,6 +1105,7 @@ MY_CHARSET_HANDLER my_charset_8bit_handler=
my_mbcharlen_8bit, /* mbcharlen */
my_numchars_8bit,
my_charpos_8bit,
+ my_wellformedlen_8bit,
my_lengthsp_8bit,
my_mb_wc_8bit,
my_wc_mb_8bit,
diff --git a/strings/ctype-sjis.c b/strings/ctype-sjis.c
index 42f32fe739b..feff0fff227 100644
--- a/strings/ctype-sjis.c
+++ b/strings/ctype-sjis.c
@@ -4489,6 +4489,7 @@ static MY_CHARSET_HANDLER my_charset_handler=
mbcharlen_sjis,
my_numchars_mb,
my_charpos_mb,
+ my_wellformedlen_mb,
my_lengthsp_8bit,
my_mb_wc_sjis, /* mb_wc */
my_wc_mb_sjis, /* wc_mb */
diff --git a/strings/ctype-tis620.c b/strings/ctype-tis620.c
index 09552a0dc23..d577b964405 100644
--- a/strings/ctype-tis620.c
+++ b/strings/ctype-tis620.c
@@ -874,7 +874,8 @@ NULL,NULL,NULL,NULL,NULL,NULL,NULL,plFF
static
-int my_mb_wc_tis620(CHARSET_INFO *cs,my_wc_t *wc,
+int my_mb_wc_tis620(CHARSET_INFO *cs __attribute__((unused)),
+ my_wc_t *wc,
const unsigned char *str,
const unsigned char *end __attribute__((unused)))
{
@@ -886,7 +887,8 @@ int my_mb_wc_tis620(CHARSET_INFO *cs,my_wc_t *wc,
}
static
-int my_wc_mb_tis620(CHARSET_INFO *cs,my_wc_t wc,
+int my_wc_mb_tis620(CHARSET_INFO *cs __attribute__((unused)),
+ my_wc_t wc,
unsigned char *str,
unsigned char *end __attribute__((unused)))
{
@@ -919,6 +921,7 @@ static MY_CHARSET_HANDLER my_charset_handler=
my_mbcharlen_8bit, /* mbcharlen */
my_numchars_8bit,
my_charpos_8bit,
+ my_wellformedlen_8bit,
my_lengthsp_8bit,
my_mb_wc_tis620, /* mb_wc */
my_wc_mb_tis620, /* wc_mb */
diff --git a/strings/ctype-ucs2.c b/strings/ctype-ucs2.c
index 775501027d0..bb74e0cf56b 100644
--- a/strings/ctype-ucs2.c
+++ b/strings/ctype-ucs2.c
@@ -1002,6 +1002,17 @@ uint my_charpos_ucs2(CHARSET_INFO *cs __attribute__((unused)),
}
static
+uint my_wellformedlen_ucs2(CHARSET_INFO *cs __attribute__((unused)),
+ const char *b,
+ const char *e,
+ uint nchars)
+{
+ uint nbytes= (e-b) & ~ (uint)1;
+ nchars*= 2;
+ return nbytes < nchars ? nbytes : nchars;
+}
+
+static
void my_fill_ucs2(CHARSET_INFO *cs __attribute__((unused)),
char *s, uint l, int fill)
{
@@ -1287,6 +1298,7 @@ static MY_CHARSET_HANDLER my_charset_ucs2_handler=
my_mbcharlen_ucs2, /* mbcharlen */
my_numchars_ucs2,
my_charpos_ucs2,
+ my_wellformedlen_ucs2,
my_lengthsp_ucs2,
my_ucs2_uni, /* mb_wc */
my_uni_ucs2, /* wc_mb */
diff --git a/strings/ctype-ujis.c b/strings/ctype-ujis.c
index f6928e9426e..f27ddcf3e30 100644
--- a/strings/ctype-ujis.c
+++ b/strings/ctype-ujis.c
@@ -8444,6 +8444,7 @@ static MY_CHARSET_HANDLER my_charset_handler=
mbcharlen_ujis,
my_numchars_mb,
my_charpos_mb,
+ my_wellformedlen_mb,
my_lengthsp_8bit,
my_mb_wc_euc_jp, /* mb_wc */
my_wc_mb_euc_jp, /* wc_mb */
diff --git a/strings/ctype-utf8.c b/strings/ctype-utf8.c
index 8004fba75b7..ef9719bf040 100644
--- a/strings/ctype-utf8.c
+++ b/strings/ctype-utf8.c
@@ -1969,6 +1969,7 @@ static MY_CHARSET_HANDLER my_charset_handler=
my_mbcharlen_utf8,
my_numchars_mb,
my_charpos_mb,
+ my_wellformedlen_mb,
my_lengthsp_8bit,
my_utf8_uni,
my_uni_utf8,