diff options
author | unknown <bar@mysql.com> | 2005-12-09 16:37:58 +0400 |
---|---|---|
committer | unknown <bar@mysql.com> | 2005-12-09 16:37:58 +0400 |
commit | 7063bd4d2bfe4688db60b28a15843406299a58f0 (patch) | |
tree | 2868d73a9285634c6a4b999453f6fb27a07e03eb | |
parent | 5aeb69296a4e134f0215da3e6bcce4956b7d76ad (diff) | |
download | mariadb-git-7063bd4d2bfe4688db60b28a15843406299a58f0.tar.gz |
Bug#15377 Valid multibyte sequences are truncated on INSERT
ctype-euc_kr.c:
ctype-gb2312.c:
Adding specific well_formed_length functions
for gb2312 and euckr, to allow storing characters
which are correct according to the character set
specifications but just don't have Unicode mapping.
Previously only those which have Unicode mapping
could be stored, while unassigned characters lead
to data truncation.
Many files:
new file
strings/ctype-gb2312.c:
Bug#15377 Valid multibyte sequences are truncated on INSERT
Adding specific well_formed_length functions
for gb2312 and euckr, to allow storing characters
which are correct according to the character set.
Previously only those which have Unicode mapping
could be stored.
strings/ctype-euc_kr.c:
Adding specific well_formed_length functions
for gb2312 and euckr, to allow storing characters
which are correct according to the character set.
Previously only those which have Unicode mapping
could be stored.
-rw-r--r-- | mysql-test/include/have_euckr.inc | 4 | ||||
-rw-r--r-- | mysql-test/include/have_gb2312.inc | 4 | ||||
-rw-r--r-- | mysql-test/r/ctype_euckr.result | 167 | ||||
-rw-r--r-- | mysql-test/r/ctype_gb2312.result | 167 | ||||
-rw-r--r-- | mysql-test/r/have_euckr.require | 2 | ||||
-rw-r--r-- | mysql-test/r/have_gb2312.require | 2 | ||||
-rw-r--r-- | mysql-test/t/ctype_euckr.test | 33 | ||||
-rw-r--r-- | mysql-test/t/ctype_gb2312.test | 33 | ||||
-rw-r--r-- | strings/ctype-euc_kr.c | 37 | ||||
-rw-r--r-- | strings/ctype-gb2312.c | 37 |
10 files changed, 484 insertions, 2 deletions
diff --git a/mysql-test/include/have_euckr.inc b/mysql-test/include/have_euckr.inc new file mode 100644 index 00000000000..af794aafc04 --- /dev/null +++ b/mysql-test/include/have_euckr.inc @@ -0,0 +1,4 @@ +-- require r/have_euckr.require +disable_query_log; +show collation like "euckr_korean_ci"; +enable_query_log; diff --git a/mysql-test/include/have_gb2312.inc b/mysql-test/include/have_gb2312.inc new file mode 100644 index 00000000000..4328bc67639 --- /dev/null +++ b/mysql-test/include/have_gb2312.inc @@ -0,0 +1,4 @@ +-- require r/have_gb2312.require +disable_query_log; +show collation like "gb2312_chinese_ci"; +enable_query_log; diff --git a/mysql-test/r/ctype_euckr.result b/mysql-test/r/ctype_euckr.result new file mode 100644 index 00000000000..6017bc07763 --- /dev/null +++ b/mysql-test/r/ctype_euckr.result @@ -0,0 +1,167 @@ +drop table if exists t1; +SET @test_character_set= 'euckr'; +SET @test_collation= 'euckr_korean_ci'; +SET @safe_character_set_server= @@character_set_server; +SET @safe_collation_server= @@collation_server; +SET character_set_server= @test_character_set; +SET collation_server= @test_collation; +CREATE DATABASE d1; +USE d1; +CREATE TABLE t1 (c CHAR(10), KEY(c)); +SHOW FULL COLUMNS FROM t1; +Field Type Collation Null Key Default Extra Privileges Comment +c char(10) euckr_korean_ci YES MUL NULL +INSERT INTO t1 VALUES ('aaa'),('aaaa'),('aaaaa'); +SELECT c as want3results FROM t1 WHERE c LIKE 'aaa%'; +want3results +aaa +aaaa +aaaaa +DROP TABLE t1; +CREATE TABLE t1 (c1 varchar(15), KEY c1 (c1(2))); +SHOW FULL COLUMNS FROM t1; +Field Type Collation Null Key Default Extra Privileges Comment +c1 varchar(15) euckr_korean_ci YES MUL NULL +INSERT INTO t1 VALUES ('location'),('loberge'),('lotre'),('boabab'); +SELECT c1 as want3results from t1 where c1 like 'l%'; +want3results +location +loberge +lotre +SELECT c1 as want3results from t1 where c1 like 'lo%'; +want3results +location +loberge +lotre +SELECT c1 as want1result from t1 where c1 like 'loc%'; +want1result +location +SELECT c1 as want1result from t1 where c1 like 'loca%'; +want1result +location +SELECT c1 as want1result from t1 where c1 like 'locat%'; +want1result +location +SELECT c1 as want1result from t1 where c1 like 'locati%'; +want1result +location +SELECT c1 as want1result from t1 where c1 like 'locatio%'; +want1result +location +SELECT c1 as want1result from t1 where c1 like 'location%'; +want1result +location +DROP TABLE t1; +DROP DATABASE d1; +USE test; +SET character_set_server= @safe_character_set_server; +SET collation_server= @safe_collation_server; +SET NAMES euckr; +SET collation_connection='euckr_korean_ci'; +create table t1 select repeat('a',4000) a; +delete from t1; +insert into t1 values ('a'), ('a '), ('a\t'); +select collation(a),hex(a) from t1 order by a; +collation(a) hex(a) +euckr_korean_ci 6109 +euckr_korean_ci 61 +euckr_korean_ci 6120 +drop table t1; +create table t1 engine=innodb select repeat('a',50) as c1; +alter table t1 add index(c1(5)); +insert into t1 values ('abcdefg'),('abcde100'),('abcde110'),('abcde111'); +select collation(c1) from t1 limit 1; +collation(c1) +euckr_korean_ci +select c1 from t1 where c1 like 'abcdef%' order by c1; +c1 +abcdefg +select c1 from t1 where c1 like 'abcde1%' order by c1; +c1 +abcde100 +abcde110 +abcde111 +select c1 from t1 where c1 like 'abcde11%' order by c1; +c1 +abcde110 +abcde111 +select c1 from t1 where c1 like 'abcde111%' order by c1; +c1 +abcde111 +drop table t1; +select @@collation_connection; +@@collation_connection +euckr_korean_ci +create table t1 ROW_FORMAT=DYNAMIC select repeat('a',50) as c1 ; +insert into t1 values('abcdef'); +insert into t1 values('_bcdef'); +insert into t1 values('a_cdef'); +insert into t1 values('ab_def'); +insert into t1 values('abc_ef'); +insert into t1 values('abcd_f'); +insert into t1 values('abcde_'); +select c1 as c1u from t1 where c1 like 'ab\_def'; +c1u +ab_def +select c1 as c2h from t1 where c1 like 'ab#_def' escape '#'; +c2h +ab_def +drop table t1; +SET collation_connection='euckr_bin'; +create table t1 select repeat('a',4000) a; +delete from t1; +insert into t1 values ('a'), ('a '), ('a\t'); +select collation(a),hex(a) from t1 order by a; +collation(a) hex(a) +euckr_bin 6109 +euckr_bin 61 +euckr_bin 6120 +drop table t1; +create table t1 engine=innodb select repeat('a',50) as c1; +alter table t1 add index(c1(5)); +insert into t1 values ('abcdefg'),('abcde100'),('abcde110'),('abcde111'); +select collation(c1) from t1 limit 1; +collation(c1) +euckr_bin +select c1 from t1 where c1 like 'abcdef%' order by c1; +c1 +abcdefg +select c1 from t1 where c1 like 'abcde1%' order by c1; +c1 +abcde100 +abcde110 +abcde111 +select c1 from t1 where c1 like 'abcde11%' order by c1; +c1 +abcde110 +abcde111 +select c1 from t1 where c1 like 'abcde111%' order by c1; +c1 +abcde111 +drop table t1; +select @@collation_connection; +@@collation_connection +euckr_bin +create table t1 ROW_FORMAT=DYNAMIC select repeat('a',50) as c1 ; +insert into t1 values('abcdef'); +insert into t1 values('_bcdef'); +insert into t1 values('a_cdef'); +insert into t1 values('ab_def'); +insert into t1 values('abc_ef'); +insert into t1 values('abcd_f'); +insert into t1 values('abcde_'); +select c1 as c1u from t1 where c1 like 'ab\_def'; +c1u +ab_def +select c1 as c2h from t1 where c1 like 'ab#_def' escape '#'; +c2h +ab_def +drop table t1; +SET NAMES euckr; +CREATE TABLE t1 (a text) character set euckr; +INSERT INTO t1 VALUES (0xA2E6),(0xFEF7); +SELECT hex(a) FROM t1 ORDER BY a; +hex(a) +A2E6 +FEF7 +DROP TABLE t1; diff --git a/mysql-test/r/ctype_gb2312.result b/mysql-test/r/ctype_gb2312.result new file mode 100644 index 00000000000..314c336bab9 --- /dev/null +++ b/mysql-test/r/ctype_gb2312.result @@ -0,0 +1,167 @@ +drop table if exists t1; +SET @test_character_set= 'gb2312'; +SET @test_collation= 'gb2312_chinese_ci'; +SET @safe_character_set_server= @@character_set_server; +SET @safe_collation_server= @@collation_server; +SET character_set_server= @test_character_set; +SET collation_server= @test_collation; +CREATE DATABASE d1; +USE d1; +CREATE TABLE t1 (c CHAR(10), KEY(c)); +SHOW FULL COLUMNS FROM t1; +Field Type Collation Null Key Default Extra Privileges Comment +c char(10) gb2312_chinese_ci YES MUL NULL +INSERT INTO t1 VALUES ('aaa'),('aaaa'),('aaaaa'); +SELECT c as want3results FROM t1 WHERE c LIKE 'aaa%'; +want3results +aaa +aaaa +aaaaa +DROP TABLE t1; +CREATE TABLE t1 (c1 varchar(15), KEY c1 (c1(2))); +SHOW FULL COLUMNS FROM t1; +Field Type Collation Null Key Default Extra Privileges Comment +c1 varchar(15) gb2312_chinese_ci YES MUL NULL +INSERT INTO t1 VALUES ('location'),('loberge'),('lotre'),('boabab'); +SELECT c1 as want3results from t1 where c1 like 'l%'; +want3results +location +loberge +lotre +SELECT c1 as want3results from t1 where c1 like 'lo%'; +want3results +location +loberge +lotre +SELECT c1 as want1result from t1 where c1 like 'loc%'; +want1result +location +SELECT c1 as want1result from t1 where c1 like 'loca%'; +want1result +location +SELECT c1 as want1result from t1 where c1 like 'locat%'; +want1result +location +SELECT c1 as want1result from t1 where c1 like 'locati%'; +want1result +location +SELECT c1 as want1result from t1 where c1 like 'locatio%'; +want1result +location +SELECT c1 as want1result from t1 where c1 like 'location%'; +want1result +location +DROP TABLE t1; +DROP DATABASE d1; +USE test; +SET character_set_server= @safe_character_set_server; +SET collation_server= @safe_collation_server; +SET NAMES gb2312; +SET collation_connection='gb2312_chinese_ci'; +create table t1 select repeat('a',4000) a; +delete from t1; +insert into t1 values ('a'), ('a '), ('a\t'); +select collation(a),hex(a) from t1 order by a; +collation(a) hex(a) +gb2312_chinese_ci 6109 +gb2312_chinese_ci 61 +gb2312_chinese_ci 6120 +drop table t1; +create table t1 engine=innodb select repeat('a',50) as c1; +alter table t1 add index(c1(5)); +insert into t1 values ('abcdefg'),('abcde100'),('abcde110'),('abcde111'); +select collation(c1) from t1 limit 1; +collation(c1) +gb2312_chinese_ci +select c1 from t1 where c1 like 'abcdef%' order by c1; +c1 +abcdefg +select c1 from t1 where c1 like 'abcde1%' order by c1; +c1 +abcde100 +abcde110 +abcde111 +select c1 from t1 where c1 like 'abcde11%' order by c1; +c1 +abcde110 +abcde111 +select c1 from t1 where c1 like 'abcde111%' order by c1; +c1 +abcde111 +drop table t1; +select @@collation_connection; +@@collation_connection +gb2312_chinese_ci +create table t1 ROW_FORMAT=DYNAMIC select repeat('a',50) as c1 ; +insert into t1 values('abcdef'); +insert into t1 values('_bcdef'); +insert into t1 values('a_cdef'); +insert into t1 values('ab_def'); +insert into t1 values('abc_ef'); +insert into t1 values('abcd_f'); +insert into t1 values('abcde_'); +select c1 as c1u from t1 where c1 like 'ab\_def'; +c1u +ab_def +select c1 as c2h from t1 where c1 like 'ab#_def' escape '#'; +c2h +ab_def +drop table t1; +SET collation_connection='gb2312_bin'; +create table t1 select repeat('a',4000) a; +delete from t1; +insert into t1 values ('a'), ('a '), ('a\t'); +select collation(a),hex(a) from t1 order by a; +collation(a) hex(a) +gb2312_bin 6109 +gb2312_bin 61 +gb2312_bin 6120 +drop table t1; +create table t1 engine=innodb select repeat('a',50) as c1; +alter table t1 add index(c1(5)); +insert into t1 values ('abcdefg'),('abcde100'),('abcde110'),('abcde111'); +select collation(c1) from t1 limit 1; +collation(c1) +gb2312_bin +select c1 from t1 where c1 like 'abcdef%' order by c1; +c1 +abcdefg +select c1 from t1 where c1 like 'abcde1%' order by c1; +c1 +abcde100 +abcde110 +abcde111 +select c1 from t1 where c1 like 'abcde11%' order by c1; +c1 +abcde110 +abcde111 +select c1 from t1 where c1 like 'abcde111%' order by c1; +c1 +abcde111 +drop table t1; +select @@collation_connection; +@@collation_connection +gb2312_bin +create table t1 ROW_FORMAT=DYNAMIC select repeat('a',50) as c1 ; +insert into t1 values('abcdef'); +insert into t1 values('_bcdef'); +insert into t1 values('a_cdef'); +insert into t1 values('ab_def'); +insert into t1 values('abc_ef'); +insert into t1 values('abcd_f'); +insert into t1 values('abcde_'); +select c1 as c1u from t1 where c1 like 'ab\_def'; +c1u +ab_def +select c1 as c2h from t1 where c1 like 'ab#_def' escape '#'; +c2h +ab_def +drop table t1; +SET NAMES gb2312; +CREATE TABLE t1 (a text) character set gb2312; +INSERT INTO t1 VALUES (0xA2A1),(0xD7FE); +SELECT hex(a) FROM t1 ORDER BY a; +hex(a) +A2A1 +D7FE +DROP TABLE t1; diff --git a/mysql-test/r/have_euckr.require b/mysql-test/r/have_euckr.require new file mode 100644 index 00000000000..0771ceec570 --- /dev/null +++ b/mysql-test/r/have_euckr.require @@ -0,0 +1,2 @@ +Collation Charset Id Default Compiled Sortlen +euckr_korean_ci euckr 19 Yes Yes 1 diff --git a/mysql-test/r/have_gb2312.require b/mysql-test/r/have_gb2312.require new file mode 100644 index 00000000000..9bcb7c94a14 --- /dev/null +++ b/mysql-test/r/have_gb2312.require @@ -0,0 +1,2 @@ +Collation Charset Id Default Compiled Sortlen +gb2312_chinese_ci gb2312 24 Yes Yes 1 diff --git a/mysql-test/t/ctype_euckr.test b/mysql-test/t/ctype_euckr.test new file mode 100644 index 00000000000..56939817b2f --- /dev/null +++ b/mysql-test/t/ctype_euckr.test @@ -0,0 +1,33 @@ +-- source include/have_euckr.inc + +# +# Tests with the euckr character set +# +--disable_warnings +drop table if exists t1; +--enable_warnings + +SET @test_character_set= 'euckr'; +SET @test_collation= 'euckr_korean_ci'; +-- source include/ctype_common.inc + +SET NAMES euckr; +SET collation_connection='euckr_korean_ci'; +-- source include/ctype_filesort.inc +-- source include/ctype_innodb_like.inc +-- source include/ctype_like_escape.inc +SET collation_connection='euckr_bin'; +-- source include/ctype_filesort.inc +-- source include/ctype_innodb_like.inc +-- source include/ctype_like_escape.inc + +# +# Bug#15377 Valid multibyte sequences are truncated on INSERT +# +SET NAMES euckr; +CREATE TABLE t1 (a text) character set euckr; +INSERT INTO t1 VALUES (0xA2E6),(0xFEF7); +SELECT hex(a) FROM t1 ORDER BY a; +DROP TABLE t1; + +# End of 4.1 tests diff --git a/mysql-test/t/ctype_gb2312.test b/mysql-test/t/ctype_gb2312.test new file mode 100644 index 00000000000..835818d441c --- /dev/null +++ b/mysql-test/t/ctype_gb2312.test @@ -0,0 +1,33 @@ +-- source include/have_gb2312.inc + +# +# Tests with the gb2312 character set +# +--disable_warnings +drop table if exists t1; +--enable_warnings + +SET @test_character_set= 'gb2312'; +SET @test_collation= 'gb2312_chinese_ci'; +-- source include/ctype_common.inc + +SET NAMES gb2312; +SET collation_connection='gb2312_chinese_ci'; +-- source include/ctype_filesort.inc +-- source include/ctype_innodb_like.inc +-- source include/ctype_like_escape.inc +SET collation_connection='gb2312_bin'; +-- source include/ctype_filesort.inc +-- source include/ctype_innodb_like.inc +-- source include/ctype_like_escape.inc + +# +# Bug#15377 Valid multibyte sequences are truncated on INSERT +# +SET NAMES gb2312; +CREATE TABLE t1 (a text) character set gb2312; +INSERT INTO t1 VALUES (0xA2A1),(0xD7FE); +SELECT hex(a) FROM t1 ORDER BY a; +DROP TABLE t1; + +# End of 4.1 tests diff --git a/strings/ctype-euc_kr.c b/strings/ctype-euc_kr.c index f15e97de5be..2863b192f50 100644 --- a/strings/ctype-euc_kr.c +++ b/strings/ctype-euc_kr.c @@ -8635,6 +8635,41 @@ my_mb_wc_euc_kr(CHARSET_INFO *cs __attribute__((unused)), } +/* + Returns well formed length of a EUC-KR string. +*/ +static uint +my_well_formed_len_euckr(CHARSET_INFO *cs __attribute__((unused)), + const char *b, const char *e, + uint pos, int *error) +{ + const char *b0= b; + const char *emb= e - 1; /* Last possible end of an MB character */ + + *error= 0; + while (pos-- && b < e) + { + if ((uchar) b[0] < 128) + { + /* Single byte ascii character */ + b++; + } + else if (b < emb && iseuc_kr(*b) && iseuc_kr(b[1])) + { + /* Double byte character */ + b+= 2; + } + else + { + /* Wrong byte sequence */ + *error= 1; + break; + } + } + return (uint) (b - b0); +} + + static MY_COLLATION_HANDLER my_collation_ci_handler = { NULL, /* init */ @@ -8655,7 +8690,7 @@ static MY_CHARSET_HANDLER my_charset_handler= mbcharlen_euc_kr, my_numchars_mb, my_charpos_mb, - my_well_formed_len_mb, + my_well_formed_len_euckr, my_lengthsp_8bit, my_numcells_8bit, my_mb_wc_euc_kr, /* mb_wc */ diff --git a/strings/ctype-gb2312.c b/strings/ctype-gb2312.c index 0cbad2d1c55..52dd61a8462 100644 --- a/strings/ctype-gb2312.c +++ b/strings/ctype-gb2312.c @@ -5686,6 +5686,41 @@ my_mb_wc_gb2312(CHARSET_INFO *cs __attribute__((unused)), } +/* + Returns well formed length of a EUC-KR string. +*/ +static uint +my_well_formed_len_gb2312(CHARSET_INFO *cs __attribute__((unused)), + const char *b, const char *e, + uint pos, int *error) +{ + const char *b0= b; + const char *emb= e - 1; /* Last possible end of an MB character */ + + *error= 0; + while (pos-- && b < e) + { + if ((uchar) b[0] < 128) + { + /* Single byte ascii character */ + b++; + } + else if (b < emb && isgb2312head(*b) && isgb2312tail(b[1])) + { + /* Double byte character */ + b+= 2; + } + else + { + /* Wrong byte sequence */ + *error= 1; + break; + } + } + return (uint) (b - b0); +} + + static MY_COLLATION_HANDLER my_collation_ci_handler = { NULL, /* init */ @@ -5706,7 +5741,7 @@ static MY_CHARSET_HANDLER my_charset_handler= mbcharlen_gb2312, my_numchars_mb, my_charpos_mb, - my_well_formed_len_mb, + my_well_formed_len_gb2312, my_lengthsp_8bit, my_numcells_8bit, my_mb_wc_gb2312, /* mb_wc */ |