summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--mysql-test/r/ctype_euckr.result41
-rw-r--r--mysql-test/t/ctype_euckr.test23
-rw-r--r--strings/ctype-euc_kr.c27
3 files changed, 87 insertions, 4 deletions
diff --git a/mysql-test/r/ctype_euckr.result b/mysql-test/r/ctype_euckr.result
index 6017bc07763..57e3e2ed8f8 100644
--- a/mysql-test/r/ctype_euckr.result
+++ b/mysql-test/r/ctype_euckr.result
@@ -165,3 +165,44 @@ hex(a)
A2E6
FEF7
DROP TABLE t1;
+create table t1 (s1 varchar(5) character set euckr);
+insert into t1 values (0xA141);
+insert into t1 values (0xA15A);
+insert into t1 values (0xA161);
+insert into t1 values (0xA17A);
+insert into t1 values (0xA181);
+insert into t1 values (0xA1FE);
+insert into t1 values (0xA140);
+Warnings:
+Warning 1366 Incorrect string value: '\xA1@' for column 's1' at row 1
+insert into t1 values (0xA15B);
+Warnings:
+Warning 1366 Incorrect string value: '\xA1[' for column 's1' at row 1
+insert into t1 values (0xA160);
+Warnings:
+Warning 1366 Incorrect string value: '\xA1`' for column 's1' at row 1
+insert into t1 values (0xA17B);
+Warnings:
+Warning 1366 Incorrect string value: '\xA1{' for column 's1' at row 1
+insert into t1 values (0xA180);
+Warnings:
+Warning 1366 Incorrect string value: '\xA1\x80' for column 's1' at row 1
+insert into t1 values (0xA1FF);
+Warnings:
+Warning 1366 Incorrect string value: '\xA1\xFF' for column 's1' at row 1
+select hex(s1), hex(convert(s1 using utf8)) from t1 order by binary s1;
+hex(s1) hex(convert(s1 using utf8))
+
+
+
+
+
+
+A141 ECA2A5
+A15A ECA381
+A161 ECA382
+A17A ECA3A5
+A181 ECA3A6
+A1FE EFBFA2
+drop table t1;
+End of 5.0 tests
diff --git a/mysql-test/t/ctype_euckr.test b/mysql-test/t/ctype_euckr.test
index 56939817b2f..05e4b04eded 100644
--- a/mysql-test/t/ctype_euckr.test
+++ b/mysql-test/t/ctype_euckr.test
@@ -31,3 +31,26 @@ SELECT hex(a) FROM t1 ORDER BY a;
DROP TABLE t1;
# End of 4.1 tests
+
+#
+#Bug #30315 Character sets: insertion of euckr code value 0xa141 fails
+#
+create table t1 (s1 varchar(5) character set euckr);
+# Insert some valid characters
+insert into t1 values (0xA141);
+insert into t1 values (0xA15A);
+insert into t1 values (0xA161);
+insert into t1 values (0xA17A);
+insert into t1 values (0xA181);
+insert into t1 values (0xA1FE);
+# Insert some invalid characters
+insert into t1 values (0xA140);
+insert into t1 values (0xA15B);
+insert into t1 values (0xA160);
+insert into t1 values (0xA17B);
+insert into t1 values (0xA180);
+insert into t1 values (0xA1FF);
+select hex(s1), hex(convert(s1 using utf8)) from t1 order by binary s1;
+drop table t1;
+
+--echo End of 5.0 tests
diff --git a/strings/ctype-euc_kr.c b/strings/ctype-euc_kr.c
index abaa8f1a516..1aaf65c98b3 100644
--- a/strings/ctype-euc_kr.c
+++ b/strings/ctype-euc_kr.c
@@ -179,20 +179,39 @@ static uchar NEAR sort_order_euc_kr[]=
/* Support for Korean(EUC_KR) characters, by powerm90@tinc.co.kr and mrpark@tinc.co.kr */
-#define iseuc_kr(c) ((0xa1<=(uchar)(c) && (uchar)(c)<=0xfe))
+/*
+ Unicode mapping is done according to:
+ ftp://ftp.unicode.org/Public/MAPPINGS/OBSOLETE/EASTASIA/KSC/KSC5601.TXT
+
+ Valid multi-byte characters:
+
+ [A1..FE][41..5A,61..7A,81..FE]
+
+ Note, 0x5C is not a valid MB tail,
+ so escape_with_backslash_is_dangerous is not set.
+*/
+
+#define iseuc_kr_head(c) ((0xa1<=(uchar)(c) && (uchar)(c)<=0xfe))
+
+#define iseuc_kr_tail1(c) ((uchar) (c) >= 0x41 && (uchar) (c) <= 0x5A)
+#define iseuc_kr_tail2(c) ((uchar) (c) >= 0x61 && (uchar) (c) <= 0x7A)
+#define iseuc_kr_tail3(c) ((uchar) (c) >= 0x81 && (uchar) (c) <= 0xFE)
+#define iseuc_kr_tail(c) (iseuc_kr_tail1(c) || \
+ iseuc_kr_tail2(c) || \
+ iseuc_kr_tail3(c))
static int ismbchar_euc_kr(CHARSET_INFO *cs __attribute__((unused)),
const char* p, const char *e)
{
return ((*(uchar*)(p)<0x80)? 0:\
- iseuc_kr(*(p)) && (e)-(p)>1 && iseuc_kr(*((p)+1))? 2:\
+ iseuc_kr_head(*(p)) && (e)-(p)>1 && iseuc_kr_tail(*((p)+1))? 2:\
0);
}
static int mbcharlen_euc_kr(CHARSET_INFO *cs __attribute__((unused)),uint c)
{
- return (iseuc_kr(c) ? 2 : 1);
+ return (iseuc_kr_head(c) ? 2 : 1);
}
@@ -8653,7 +8672,7 @@ my_well_formed_len_euckr(CHARSET_INFO *cs __attribute__((unused)),
/* Single byte ascii character */
b++;
}
- else if (b < emb && iseuc_kr(*b) && iseuc_kr(b[1]))
+ else if (b < emb && iseuc_kr_head(*b) && iseuc_kr_tail(b[1]))
{
/* Double byte character */
b+= 2;