summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorunknown <bar@mysql.com>2005-12-09 16:37:58 +0400
committerunknown <bar@mysql.com>2005-12-09 16:37:58 +0400
commit7063bd4d2bfe4688db60b28a15843406299a58f0 (patch)
tree2868d73a9285634c6a4b999453f6fb27a07e03eb
parent5aeb69296a4e134f0215da3e6bcce4956b7d76ad (diff)
downloadmariadb-git-7063bd4d2bfe4688db60b28a15843406299a58f0.tar.gz
Bug#15377 Valid multibyte sequences are truncated on INSERT
ctype-euc_kr.c: ctype-gb2312.c: Adding specific well_formed_length functions for gb2312 and euckr, to allow storing characters which are correct according to the character set specifications but just don't have Unicode mapping. Previously only those which have Unicode mapping could be stored, while unassigned characters lead to data truncation. Many files: new file strings/ctype-gb2312.c: Bug#15377 Valid multibyte sequences are truncated on INSERT Adding specific well_formed_length functions for gb2312 and euckr, to allow storing characters which are correct according to the character set. Previously only those which have Unicode mapping could be stored. strings/ctype-euc_kr.c: Adding specific well_formed_length functions for gb2312 and euckr, to allow storing characters which are correct according to the character set. Previously only those which have Unicode mapping could be stored.
-rw-r--r--mysql-test/include/have_euckr.inc4
-rw-r--r--mysql-test/include/have_gb2312.inc4
-rw-r--r--mysql-test/r/ctype_euckr.result167
-rw-r--r--mysql-test/r/ctype_gb2312.result167
-rw-r--r--mysql-test/r/have_euckr.require2
-rw-r--r--mysql-test/r/have_gb2312.require2
-rw-r--r--mysql-test/t/ctype_euckr.test33
-rw-r--r--mysql-test/t/ctype_gb2312.test33
-rw-r--r--strings/ctype-euc_kr.c37
-rw-r--r--strings/ctype-gb2312.c37
10 files changed, 484 insertions, 2 deletions
diff --git a/mysql-test/include/have_euckr.inc b/mysql-test/include/have_euckr.inc
new file mode 100644
index 00000000000..af794aafc04
--- /dev/null
+++ b/mysql-test/include/have_euckr.inc
@@ -0,0 +1,4 @@
+-- require r/have_euckr.require
+disable_query_log;
+show collation like "euckr_korean_ci";
+enable_query_log;
diff --git a/mysql-test/include/have_gb2312.inc b/mysql-test/include/have_gb2312.inc
new file mode 100644
index 00000000000..4328bc67639
--- /dev/null
+++ b/mysql-test/include/have_gb2312.inc
@@ -0,0 +1,4 @@
+-- require r/have_gb2312.require
+disable_query_log;
+show collation like "gb2312_chinese_ci";
+enable_query_log;
diff --git a/mysql-test/r/ctype_euckr.result b/mysql-test/r/ctype_euckr.result
new file mode 100644
index 00000000000..6017bc07763
--- /dev/null
+++ b/mysql-test/r/ctype_euckr.result
@@ -0,0 +1,167 @@
+drop table if exists t1;
+SET @test_character_set= 'euckr';
+SET @test_collation= 'euckr_korean_ci';
+SET @safe_character_set_server= @@character_set_server;
+SET @safe_collation_server= @@collation_server;
+SET character_set_server= @test_character_set;
+SET collation_server= @test_collation;
+CREATE DATABASE d1;
+USE d1;
+CREATE TABLE t1 (c CHAR(10), KEY(c));
+SHOW FULL COLUMNS FROM t1;
+Field Type Collation Null Key Default Extra Privileges Comment
+c char(10) euckr_korean_ci YES MUL NULL
+INSERT INTO t1 VALUES ('aaa'),('aaaa'),('aaaaa');
+SELECT c as want3results FROM t1 WHERE c LIKE 'aaa%';
+want3results
+aaa
+aaaa
+aaaaa
+DROP TABLE t1;
+CREATE TABLE t1 (c1 varchar(15), KEY c1 (c1(2)));
+SHOW FULL COLUMNS FROM t1;
+Field Type Collation Null Key Default Extra Privileges Comment
+c1 varchar(15) euckr_korean_ci YES MUL NULL
+INSERT INTO t1 VALUES ('location'),('loberge'),('lotre'),('boabab');
+SELECT c1 as want3results from t1 where c1 like 'l%';
+want3results
+location
+loberge
+lotre
+SELECT c1 as want3results from t1 where c1 like 'lo%';
+want3results
+location
+loberge
+lotre
+SELECT c1 as want1result from t1 where c1 like 'loc%';
+want1result
+location
+SELECT c1 as want1result from t1 where c1 like 'loca%';
+want1result
+location
+SELECT c1 as want1result from t1 where c1 like 'locat%';
+want1result
+location
+SELECT c1 as want1result from t1 where c1 like 'locati%';
+want1result
+location
+SELECT c1 as want1result from t1 where c1 like 'locatio%';
+want1result
+location
+SELECT c1 as want1result from t1 where c1 like 'location%';
+want1result
+location
+DROP TABLE t1;
+DROP DATABASE d1;
+USE test;
+SET character_set_server= @safe_character_set_server;
+SET collation_server= @safe_collation_server;
+SET NAMES euckr;
+SET collation_connection='euckr_korean_ci';
+create table t1 select repeat('a',4000) a;
+delete from t1;
+insert into t1 values ('a'), ('a '), ('a\t');
+select collation(a),hex(a) from t1 order by a;
+collation(a) hex(a)
+euckr_korean_ci 6109
+euckr_korean_ci 61
+euckr_korean_ci 6120
+drop table t1;
+create table t1 engine=innodb select repeat('a',50) as c1;
+alter table t1 add index(c1(5));
+insert into t1 values ('abcdefg'),('abcde100'),('abcde110'),('abcde111');
+select collation(c1) from t1 limit 1;
+collation(c1)
+euckr_korean_ci
+select c1 from t1 where c1 like 'abcdef%' order by c1;
+c1
+abcdefg
+select c1 from t1 where c1 like 'abcde1%' order by c1;
+c1
+abcde100
+abcde110
+abcde111
+select c1 from t1 where c1 like 'abcde11%' order by c1;
+c1
+abcde110
+abcde111
+select c1 from t1 where c1 like 'abcde111%' order by c1;
+c1
+abcde111
+drop table t1;
+select @@collation_connection;
+@@collation_connection
+euckr_korean_ci
+create table t1 ROW_FORMAT=DYNAMIC select repeat('a',50) as c1 ;
+insert into t1 values('abcdef');
+insert into t1 values('_bcdef');
+insert into t1 values('a_cdef');
+insert into t1 values('ab_def');
+insert into t1 values('abc_ef');
+insert into t1 values('abcd_f');
+insert into t1 values('abcde_');
+select c1 as c1u from t1 where c1 like 'ab\_def';
+c1u
+ab_def
+select c1 as c2h from t1 where c1 like 'ab#_def' escape '#';
+c2h
+ab_def
+drop table t1;
+SET collation_connection='euckr_bin';
+create table t1 select repeat('a',4000) a;
+delete from t1;
+insert into t1 values ('a'), ('a '), ('a\t');
+select collation(a),hex(a) from t1 order by a;
+collation(a) hex(a)
+euckr_bin 6109
+euckr_bin 61
+euckr_bin 6120
+drop table t1;
+create table t1 engine=innodb select repeat('a',50) as c1;
+alter table t1 add index(c1(5));
+insert into t1 values ('abcdefg'),('abcde100'),('abcde110'),('abcde111');
+select collation(c1) from t1 limit 1;
+collation(c1)
+euckr_bin
+select c1 from t1 where c1 like 'abcdef%' order by c1;
+c1
+abcdefg
+select c1 from t1 where c1 like 'abcde1%' order by c1;
+c1
+abcde100
+abcde110
+abcde111
+select c1 from t1 where c1 like 'abcde11%' order by c1;
+c1
+abcde110
+abcde111
+select c1 from t1 where c1 like 'abcde111%' order by c1;
+c1
+abcde111
+drop table t1;
+select @@collation_connection;
+@@collation_connection
+euckr_bin
+create table t1 ROW_FORMAT=DYNAMIC select repeat('a',50) as c1 ;
+insert into t1 values('abcdef');
+insert into t1 values('_bcdef');
+insert into t1 values('a_cdef');
+insert into t1 values('ab_def');
+insert into t1 values('abc_ef');
+insert into t1 values('abcd_f');
+insert into t1 values('abcde_');
+select c1 as c1u from t1 where c1 like 'ab\_def';
+c1u
+ab_def
+select c1 as c2h from t1 where c1 like 'ab#_def' escape '#';
+c2h
+ab_def
+drop table t1;
+SET NAMES euckr;
+CREATE TABLE t1 (a text) character set euckr;
+INSERT INTO t1 VALUES (0xA2E6),(0xFEF7);
+SELECT hex(a) FROM t1 ORDER BY a;
+hex(a)
+A2E6
+FEF7
+DROP TABLE t1;
diff --git a/mysql-test/r/ctype_gb2312.result b/mysql-test/r/ctype_gb2312.result
new file mode 100644
index 00000000000..314c336bab9
--- /dev/null
+++ b/mysql-test/r/ctype_gb2312.result
@@ -0,0 +1,167 @@
+drop table if exists t1;
+SET @test_character_set= 'gb2312';
+SET @test_collation= 'gb2312_chinese_ci';
+SET @safe_character_set_server= @@character_set_server;
+SET @safe_collation_server= @@collation_server;
+SET character_set_server= @test_character_set;
+SET collation_server= @test_collation;
+CREATE DATABASE d1;
+USE d1;
+CREATE TABLE t1 (c CHAR(10), KEY(c));
+SHOW FULL COLUMNS FROM t1;
+Field Type Collation Null Key Default Extra Privileges Comment
+c char(10) gb2312_chinese_ci YES MUL NULL
+INSERT INTO t1 VALUES ('aaa'),('aaaa'),('aaaaa');
+SELECT c as want3results FROM t1 WHERE c LIKE 'aaa%';
+want3results
+aaa
+aaaa
+aaaaa
+DROP TABLE t1;
+CREATE TABLE t1 (c1 varchar(15), KEY c1 (c1(2)));
+SHOW FULL COLUMNS FROM t1;
+Field Type Collation Null Key Default Extra Privileges Comment
+c1 varchar(15) gb2312_chinese_ci YES MUL NULL
+INSERT INTO t1 VALUES ('location'),('loberge'),('lotre'),('boabab');
+SELECT c1 as want3results from t1 where c1 like 'l%';
+want3results
+location
+loberge
+lotre
+SELECT c1 as want3results from t1 where c1 like 'lo%';
+want3results
+location
+loberge
+lotre
+SELECT c1 as want1result from t1 where c1 like 'loc%';
+want1result
+location
+SELECT c1 as want1result from t1 where c1 like 'loca%';
+want1result
+location
+SELECT c1 as want1result from t1 where c1 like 'locat%';
+want1result
+location
+SELECT c1 as want1result from t1 where c1 like 'locati%';
+want1result
+location
+SELECT c1 as want1result from t1 where c1 like 'locatio%';
+want1result
+location
+SELECT c1 as want1result from t1 where c1 like 'location%';
+want1result
+location
+DROP TABLE t1;
+DROP DATABASE d1;
+USE test;
+SET character_set_server= @safe_character_set_server;
+SET collation_server= @safe_collation_server;
+SET NAMES gb2312;
+SET collation_connection='gb2312_chinese_ci';
+create table t1 select repeat('a',4000) a;
+delete from t1;
+insert into t1 values ('a'), ('a '), ('a\t');
+select collation(a),hex(a) from t1 order by a;
+collation(a) hex(a)
+gb2312_chinese_ci 6109
+gb2312_chinese_ci 61
+gb2312_chinese_ci 6120
+drop table t1;
+create table t1 engine=innodb select repeat('a',50) as c1;
+alter table t1 add index(c1(5));
+insert into t1 values ('abcdefg'),('abcde100'),('abcde110'),('abcde111');
+select collation(c1) from t1 limit 1;
+collation(c1)
+gb2312_chinese_ci
+select c1 from t1 where c1 like 'abcdef%' order by c1;
+c1
+abcdefg
+select c1 from t1 where c1 like 'abcde1%' order by c1;
+c1
+abcde100
+abcde110
+abcde111
+select c1 from t1 where c1 like 'abcde11%' order by c1;
+c1
+abcde110
+abcde111
+select c1 from t1 where c1 like 'abcde111%' order by c1;
+c1
+abcde111
+drop table t1;
+select @@collation_connection;
+@@collation_connection
+gb2312_chinese_ci
+create table t1 ROW_FORMAT=DYNAMIC select repeat('a',50) as c1 ;
+insert into t1 values('abcdef');
+insert into t1 values('_bcdef');
+insert into t1 values('a_cdef');
+insert into t1 values('ab_def');
+insert into t1 values('abc_ef');
+insert into t1 values('abcd_f');
+insert into t1 values('abcde_');
+select c1 as c1u from t1 where c1 like 'ab\_def';
+c1u
+ab_def
+select c1 as c2h from t1 where c1 like 'ab#_def' escape '#';
+c2h
+ab_def
+drop table t1;
+SET collation_connection='gb2312_bin';
+create table t1 select repeat('a',4000) a;
+delete from t1;
+insert into t1 values ('a'), ('a '), ('a\t');
+select collation(a),hex(a) from t1 order by a;
+collation(a) hex(a)
+gb2312_bin 6109
+gb2312_bin 61
+gb2312_bin 6120
+drop table t1;
+create table t1 engine=innodb select repeat('a',50) as c1;
+alter table t1 add index(c1(5));
+insert into t1 values ('abcdefg'),('abcde100'),('abcde110'),('abcde111');
+select collation(c1) from t1 limit 1;
+collation(c1)
+gb2312_bin
+select c1 from t1 where c1 like 'abcdef%' order by c1;
+c1
+abcdefg
+select c1 from t1 where c1 like 'abcde1%' order by c1;
+c1
+abcde100
+abcde110
+abcde111
+select c1 from t1 where c1 like 'abcde11%' order by c1;
+c1
+abcde110
+abcde111
+select c1 from t1 where c1 like 'abcde111%' order by c1;
+c1
+abcde111
+drop table t1;
+select @@collation_connection;
+@@collation_connection
+gb2312_bin
+create table t1 ROW_FORMAT=DYNAMIC select repeat('a',50) as c1 ;
+insert into t1 values('abcdef');
+insert into t1 values('_bcdef');
+insert into t1 values('a_cdef');
+insert into t1 values('ab_def');
+insert into t1 values('abc_ef');
+insert into t1 values('abcd_f');
+insert into t1 values('abcde_');
+select c1 as c1u from t1 where c1 like 'ab\_def';
+c1u
+ab_def
+select c1 as c2h from t1 where c1 like 'ab#_def' escape '#';
+c2h
+ab_def
+drop table t1;
+SET NAMES gb2312;
+CREATE TABLE t1 (a text) character set gb2312;
+INSERT INTO t1 VALUES (0xA2A1),(0xD7FE);
+SELECT hex(a) FROM t1 ORDER BY a;
+hex(a)
+A2A1
+D7FE
+DROP TABLE t1;
diff --git a/mysql-test/r/have_euckr.require b/mysql-test/r/have_euckr.require
new file mode 100644
index 00000000000..0771ceec570
--- /dev/null
+++ b/mysql-test/r/have_euckr.require
@@ -0,0 +1,2 @@
+Collation Charset Id Default Compiled Sortlen
+euckr_korean_ci euckr 19 Yes Yes 1
diff --git a/mysql-test/r/have_gb2312.require b/mysql-test/r/have_gb2312.require
new file mode 100644
index 00000000000..9bcb7c94a14
--- /dev/null
+++ b/mysql-test/r/have_gb2312.require
@@ -0,0 +1,2 @@
+Collation Charset Id Default Compiled Sortlen
+gb2312_chinese_ci gb2312 24 Yes Yes 1
diff --git a/mysql-test/t/ctype_euckr.test b/mysql-test/t/ctype_euckr.test
new file mode 100644
index 00000000000..56939817b2f
--- /dev/null
+++ b/mysql-test/t/ctype_euckr.test
@@ -0,0 +1,33 @@
+-- source include/have_euckr.inc
+
+#
+# Tests with the euckr character set
+#
+--disable_warnings
+drop table if exists t1;
+--enable_warnings
+
+SET @test_character_set= 'euckr';
+SET @test_collation= 'euckr_korean_ci';
+-- source include/ctype_common.inc
+
+SET NAMES euckr;
+SET collation_connection='euckr_korean_ci';
+-- source include/ctype_filesort.inc
+-- source include/ctype_innodb_like.inc
+-- source include/ctype_like_escape.inc
+SET collation_connection='euckr_bin';
+-- source include/ctype_filesort.inc
+-- source include/ctype_innodb_like.inc
+-- source include/ctype_like_escape.inc
+
+#
+# Bug#15377 Valid multibyte sequences are truncated on INSERT
+#
+SET NAMES euckr;
+CREATE TABLE t1 (a text) character set euckr;
+INSERT INTO t1 VALUES (0xA2E6),(0xFEF7);
+SELECT hex(a) FROM t1 ORDER BY a;
+DROP TABLE t1;
+
+# End of 4.1 tests
diff --git a/mysql-test/t/ctype_gb2312.test b/mysql-test/t/ctype_gb2312.test
new file mode 100644
index 00000000000..835818d441c
--- /dev/null
+++ b/mysql-test/t/ctype_gb2312.test
@@ -0,0 +1,33 @@
+-- source include/have_gb2312.inc
+
+#
+# Tests with the gb2312 character set
+#
+--disable_warnings
+drop table if exists t1;
+--enable_warnings
+
+SET @test_character_set= 'gb2312';
+SET @test_collation= 'gb2312_chinese_ci';
+-- source include/ctype_common.inc
+
+SET NAMES gb2312;
+SET collation_connection='gb2312_chinese_ci';
+-- source include/ctype_filesort.inc
+-- source include/ctype_innodb_like.inc
+-- source include/ctype_like_escape.inc
+SET collation_connection='gb2312_bin';
+-- source include/ctype_filesort.inc
+-- source include/ctype_innodb_like.inc
+-- source include/ctype_like_escape.inc
+
+#
+# Bug#15377 Valid multibyte sequences are truncated on INSERT
+#
+SET NAMES gb2312;
+CREATE TABLE t1 (a text) character set gb2312;
+INSERT INTO t1 VALUES (0xA2A1),(0xD7FE);
+SELECT hex(a) FROM t1 ORDER BY a;
+DROP TABLE t1;
+
+# End of 4.1 tests
diff --git a/strings/ctype-euc_kr.c b/strings/ctype-euc_kr.c
index f15e97de5be..2863b192f50 100644
--- a/strings/ctype-euc_kr.c
+++ b/strings/ctype-euc_kr.c
@@ -8635,6 +8635,41 @@ my_mb_wc_euc_kr(CHARSET_INFO *cs __attribute__((unused)),
}
+/*
+ Returns well formed length of a EUC-KR string.
+*/
+static uint
+my_well_formed_len_euckr(CHARSET_INFO *cs __attribute__((unused)),
+ const char *b, const char *e,
+ uint pos, int *error)
+{
+ const char *b0= b;
+ const char *emb= e - 1; /* Last possible end of an MB character */
+
+ *error= 0;
+ while (pos-- && b < e)
+ {
+ if ((uchar) b[0] < 128)
+ {
+ /* Single byte ascii character */
+ b++;
+ }
+ else if (b < emb && iseuc_kr(*b) && iseuc_kr(b[1]))
+ {
+ /* Double byte character */
+ b+= 2;
+ }
+ else
+ {
+ /* Wrong byte sequence */
+ *error= 1;
+ break;
+ }
+ }
+ return (uint) (b - b0);
+}
+
+
static MY_COLLATION_HANDLER my_collation_ci_handler =
{
NULL, /* init */
@@ -8655,7 +8690,7 @@ static MY_CHARSET_HANDLER my_charset_handler=
mbcharlen_euc_kr,
my_numchars_mb,
my_charpos_mb,
- my_well_formed_len_mb,
+ my_well_formed_len_euckr,
my_lengthsp_8bit,
my_numcells_8bit,
my_mb_wc_euc_kr, /* mb_wc */
diff --git a/strings/ctype-gb2312.c b/strings/ctype-gb2312.c
index 0cbad2d1c55..52dd61a8462 100644
--- a/strings/ctype-gb2312.c
+++ b/strings/ctype-gb2312.c
@@ -5686,6 +5686,41 @@ my_mb_wc_gb2312(CHARSET_INFO *cs __attribute__((unused)),
}
+/*
+ Returns well formed length of a EUC-KR string.
+*/
+static uint
+my_well_formed_len_gb2312(CHARSET_INFO *cs __attribute__((unused)),
+ const char *b, const char *e,
+ uint pos, int *error)
+{
+ const char *b0= b;
+ const char *emb= e - 1; /* Last possible end of an MB character */
+
+ *error= 0;
+ while (pos-- && b < e)
+ {
+ if ((uchar) b[0] < 128)
+ {
+ /* Single byte ascii character */
+ b++;
+ }
+ else if (b < emb && isgb2312head(*b) && isgb2312tail(b[1]))
+ {
+ /* Double byte character */
+ b+= 2;
+ }
+ else
+ {
+ /* Wrong byte sequence */
+ *error= 1;
+ break;
+ }
+ }
+ return (uint) (b - b0);
+}
+
+
static MY_COLLATION_HANDLER my_collation_ci_handler =
{
NULL, /* init */
@@ -5706,7 +5741,7 @@ static MY_CHARSET_HANDLER my_charset_handler=
mbcharlen_gb2312,
my_numchars_mb,
my_charpos_mb,
- my_well_formed_len_mb,
+ my_well_formed_len_gb2312,
my_lengthsp_8bit,
my_numcells_8bit,
my_mb_wc_gb2312, /* mb_wc */