diff options
author | unknown <bar@mysql.com> | 2005-03-25 16:08:54 +0400 |
---|---|---|
committer | unknown <bar@mysql.com> | 2005-03-25 16:08:54 +0400 |
commit | 90aa6e00a730483e9aa324990d28780571601b58 (patch) | |
tree | 0b1831ee34ec7e5f948d0f95d9bceb93087c90db | |
parent | cdf8e293d9e71ea00d346a4e29aeafe241ec6fc5 (diff) | |
download | mariadb-git-90aa6e00a730483e9aa324990d28780571601b58.tar.gz |
Allow inserting of extra HKSCS and cp950 characters into a Big5 column.
-rw-r--r-- | mysql-test/r/ctype_big5.result | 7 | ||||
-rw-r--r-- | mysql-test/t/ctype_big5.test | 9 | ||||
-rw-r--r-- | strings/ctype-big5.c | 39 |
3 files changed, 54 insertions, 1 deletions
diff --git a/mysql-test/r/ctype_big5.result b/mysql-test/r/ctype_big5.result index 8f4ee3d0558..c63704f6d9d 100644 --- a/mysql-test/r/ctype_big5.result +++ b/mysql-test/r/ctype_big5.result @@ -77,3 +77,10 @@ big5_bin 6109 big5_bin 61 big5_bin 6120 drop table t1; +SET NAMES big5; +CREATE TABLE t1 (a text) character set big5; +INSERT INTO t1 VALUES ('ùØ'); +SELECT * FROM t1; +a +ùØ +DROP TABLE t1; diff --git a/mysql-test/t/ctype_big5.test b/mysql-test/t/ctype_big5.test index 8b75123ca32..b5cf610d941 100644 --- a/mysql-test/t/ctype_big5.test +++ b/mysql-test/t/ctype_big5.test @@ -16,3 +16,12 @@ SET collation_connection='big5_chinese_ci'; -- source include/ctype_filesort.inc SET collation_connection='big5_bin'; -- source include/ctype_filesort.inc + +# +# Bugs#9357: TEXT columns break string with special word in BIG5 charset. +# +SET NAMES big5; +CREATE TABLE t1 (a text) character set big5; +INSERT INTO t1 VALUES ('ùØ'); +SELECT * FROM t1; +DROP TABLE t1; diff --git a/strings/ctype-big5.c b/strings/ctype-big5.c index 270b02212af..58847a96591 100644 --- a/strings/ctype-big5.c +++ b/strings/ctype-big5.c @@ -6271,6 +6271,43 @@ my_mb_wc_big5(CHARSET_INFO *cs __attribute__((unused)), return 2; } + +/* + Returns a well formed length of a BIG5 string. + CP950 and HKSCS additional characters are also accepted. +*/ +static +uint my_well_formed_len_big5(CHARSET_INFO *cs __attribute__((unused)), + const char *b, const char *e, uint pos) +{ + const char *b0= b; + const char *emb= e - 1; /* Last possible end of an MB character */ + while (pos && b < e) + { + /* + Cast to int8 for extra safety. "char" can be unsigned + by default on some platforms. + */ + if (((int8)b[0]) >= 0) + { + /* Single byte ascii character */ + b++; + } + else if ((b < emb) && isbig5code((uchar)*b, (uchar)b[1])) + { + /* Double byte character */ + b+= 2; + } + else + { + /* Wrong byte sequence */ + break; + } + } + return b - b0; +} + + static MY_COLLATION_HANDLER my_collation_big5_chinese_ci_handler = { NULL, /* init */ @@ -6291,7 +6328,7 @@ static MY_CHARSET_HANDLER my_charset_big5_handler= mbcharlen_big5, my_numchars_mb, my_charpos_mb, - my_well_formed_len_mb, + my_well_formed_len_big5, my_lengthsp_8bit, my_numcells_8bit, my_mb_wc_big5, /* mb_wc */ |