diff options
author | Alexander Barkov <bar@mariadb.org> | 2015-07-03 10:33:17 +0400 |
---|---|---|
committer | Alexander Barkov <bar@mariadb.org> | 2015-07-03 10:33:17 +0400 |
commit | 95d07ee408abd98769093759a076f4665a176d77 (patch) | |
tree | 24b4b0c016574b97b0fae5067f009119aa744f3c | |
parent | 302bf7c4664b904482ecc133476e822d497b114d (diff) | |
download | mariadb-git-95d07ee408abd98769093759a076f4665a176d77.tar.gz |
MDEV-8215 Asian MB3 charsets: compare broken bytes as "greater than any non-broken character"
-rw-r--r-- | include/m_ctype.h | 1 | ||||
-rw-r--r-- | mysql-test/r/ctype_eucjpms.result | 42 | ||||
-rw-r--r-- | mysql-test/r/ctype_ujis.result | 42 | ||||
-rw-r--r-- | mysql-test/t/ctype_eucjpms.test | 25 | ||||
-rw-r--r-- | mysql-test/t/ctype_ujis.test | 25 | ||||
-rw-r--r-- | strings/ctype-eucjpms.c | 45 | ||||
-rw-r--r-- | strings/ctype-mb.c | 16 | ||||
-rw-r--r-- | strings/ctype-ujis.c | 45 | ||||
-rw-r--r-- | strings/strcoll.ic | 12 | ||||
-rw-r--r-- | unittest/strings/strings-t.c | 30 |
10 files changed, 256 insertions, 27 deletions
diff --git a/include/m_ctype.h b/include/m_ctype.h index 8300619d5c7..9db8fca12b3 100644 --- a/include/m_ctype.h +++ b/include/m_ctype.h @@ -351,7 +351,6 @@ struct my_collation_handler_st my_bool (*propagate)(CHARSET_INFO *cs, const uchar *str, size_t len); }; -extern MY_COLLATION_HANDLER my_collation_mb_bin_handler; extern MY_COLLATION_HANDLER my_collation_8bit_bin_handler; extern MY_COLLATION_HANDLER my_collation_8bit_simple_ci_handler; extern MY_COLLATION_HANDLER my_collation_ucs2_uca_handler; diff --git a/mysql-test/r/ctype_eucjpms.result b/mysql-test/r/ctype_eucjpms.result index 131e7043e58..df1f79fc218 100644 --- a/mysql-test/r/ctype_eucjpms.result +++ b/mysql-test/r/ctype_eucjpms.result @@ -33841,3 +33841,45 @@ ERROR HY000: Invalid eucjpms character string: '8EA0' # # End of 10.0 tests # +# +# Start of 10.1 tests +# +# +# MDEV-8215 Asian MB3 charsets: compare broken bytes as "greater than any non-broken character" +# +CREATE TABLE t1 (a VARCHAR(10) CHARACTER SET eucjpms, KEY(a)); +INSERT INTO t1 VALUES ('a'),(0x7F); +INSERT INTO t1 VALUES (0x8EA1),(0x8EDF); +INSERT INTO t1 VALUES (0x8FA1A1),(0x8FFEFE); +INSERT INTO t1 VALUES (0xA1A1),(0xDEDE),(0xDFDF),(0xE0E0),(0xFEFE); +SELECT HEX(a) FROM t1 ORDER BY a; +HEX(a) +61 +7F +8EA1 +8EDF +8FA1A1 +8FFEFE +A1A1 +DEDE +DFDF +E0E0 +FEFE +ALTER TABLE t1 MODIFY a VARCHAR(10) CHARACTER SET eucjpms COLLATE eucjpms_bin; +SELECT HEX(a) FROM t1 ORDER BY a; +HEX(a) +61 +7F +8EA1 +8EDF +8FA1A1 +8FFEFE +A1A1 +DEDE +DFDF +E0E0 +FEFE +DROP TABLE t1; +# +# End of 10.1 tests +# diff --git a/mysql-test/r/ctype_ujis.result b/mysql-test/r/ctype_ujis.result index 4ad47cbc84a..b578e558c0a 100644 --- a/mysql-test/r/ctype_ujis.result +++ b/mysql-test/r/ctype_ujis.result @@ -26144,3 +26144,45 @@ ERROR HY000: Invalid ujis character string: '8EA0' # # End of 10.0 tests # +# +# Start of 10.1 tests +# +# +# MDEV-8215 Asian MB3 charsets: compare broken bytes as "greater than any non-broken character" +# +CREATE TABLE t1 (a VARCHAR(10) CHARACTER SET ujis, KEY(a)); +INSERT INTO t1 VALUES ('a'),(0x7F); +INSERT INTO t1 VALUES (0x8EA1),(0x8EDF); +INSERT INTO t1 VALUES (0x8FA1A1),(0x8FFEFE); +INSERT INTO t1 VALUES (0xA1A1),(0xDEDE),(0xDFDF),(0xE0E0),(0xFEFE); +SELECT HEX(a) FROM t1 ORDER BY a; +HEX(a) +61 +7F +8EA1 +8EDF +8FA1A1 +8FFEFE +A1A1 +DEDE +DFDF +E0E0 +FEFE +ALTER TABLE t1 MODIFY a VARCHAR(10) CHARACTER SET ujis COLLATE ujis_bin; +SELECT HEX(a) FROM t1 ORDER BY a; +HEX(a) +61 +7F +8EA1 +8EDF +8FA1A1 +8FFEFE +A1A1 +DEDE +DFDF +E0E0 +FEFE +DROP TABLE t1; +# +# End of 10.1 tests +# diff --git a/mysql-test/t/ctype_eucjpms.test b/mysql-test/t/ctype_eucjpms.test index 2dd806ed027..d533e38b2a2 100644 --- a/mysql-test/t/ctype_eucjpms.test +++ b/mysql-test/t/ctype_eucjpms.test @@ -541,3 +541,28 @@ SELECT _eucjpms 0x8EA0; --echo # --echo # End of 10.0 tests --echo # + +--echo # +--echo # Start of 10.1 tests +--echo # + +--echo # +--echo # MDEV-8215 Asian MB3 charsets: compare broken bytes as "greater than any non-broken character" +--echo # +CREATE TABLE t1 (a VARCHAR(10) CHARACTER SET eucjpms, KEY(a)); +# [x00-x7F] # ASCII/JIS-Roman +# [x8E][xA1-xDF] # half-width katakana +# [x8F][xA1-xFE][xA1-xFE] # JIS X 0212-1990 +# [xA1-xFE][xA1-xFE] # JIS X 0208:1997 +INSERT INTO t1 VALUES ('a'),(0x7F); +INSERT INTO t1 VALUES (0x8EA1),(0x8EDF); +INSERT INTO t1 VALUES (0x8FA1A1),(0x8FFEFE); +INSERT INTO t1 VALUES (0xA1A1),(0xDEDE),(0xDFDF),(0xE0E0),(0xFEFE); +SELECT HEX(a) FROM t1 ORDER BY a; +ALTER TABLE t1 MODIFY a VARCHAR(10) CHARACTER SET eucjpms COLLATE eucjpms_bin; +SELECT HEX(a) FROM t1 ORDER BY a; +DROP TABLE t1; + +--echo # +--echo # End of 10.1 tests +--echo # diff --git a/mysql-test/t/ctype_ujis.test b/mysql-test/t/ctype_ujis.test index 94fc7ffe4c0..6fc928c7e60 100644 --- a/mysql-test/t/ctype_ujis.test +++ b/mysql-test/t/ctype_ujis.test @@ -1369,3 +1369,28 @@ SELECT _ujis 0x8EA0; --echo # --echo # End of 10.0 tests --echo # + + +--echo # +--echo # Start of 10.1 tests +--echo # + +--echo # +--echo # MDEV-8215 Asian MB3 charsets: compare broken bytes as "greater than any non-broken character" +--echo # +CREATE TABLE t1 (a VARCHAR(10) CHARACTER SET ujis, KEY(a)); +# [x00-x7F] # ASCII/JIS-Roman +# [x8E][xA1-xDF] # half-width katakana +# [x8F][xA1-xFE][xA1-xFE] # JIS X 0212-1990 +# [xA1-xFE][xA1-xFE] # JIS X 0208:1997 +INSERT INTO t1 VALUES ('a'),(0x7F); +INSERT INTO t1 VALUES (0x8EA1),(0x8EDF); +INSERT INTO t1 VALUES (0x8FA1A1),(0x8FFEFE); +INSERT INTO t1 VALUES (0xA1A1),(0xDEDE),(0xDFDF),(0xE0E0),(0xFEFE); +SELECT HEX(a) FROM t1 ORDER BY a; +ALTER TABLE t1 MODIFY a VARCHAR(10) CHARACTER SET ujis COLLATE ujis_bin; +SELECT HEX(a) FROM t1 ORDER BY a;DROP TABLE t1; + +--echo # +--echo # End of 10.1 tests +--echo # diff --git a/strings/ctype-eucjpms.c b/strings/ctype-eucjpms.c index 827feda927b..d331f643079 100644 --- a/strings/ctype-eucjpms.c +++ b/strings/ctype-eucjpms.c @@ -194,6 +194,7 @@ static const uchar sort_order_eucjpms[]= #define MY_FUNCTION_NAME(x) my_ ## x ## _eucjpms +#define IS_MB1_CHAR(x) ((uchar) (x) < 0x80) #define IS_MB2_JIS(x,y) (iseucjpms(x) && iseucjpms(y)) #define IS_MB2_KATA(x,y) (iseucjpms_ss2(x) && iskata(y)) #define IS_MB2_CHAR(x,y) (IS_MB2_KATA(x,y) || IS_MB2_JIS(x,y)) @@ -201,6 +202,23 @@ static const uchar sort_order_eucjpms[]= #define DEFINE_ASIAN_ROUTINES #include "ctype-mb.ic" +#define MY_FUNCTION_NAME(x) my_ ## x ## _eucjpms_japanese_ci +#define WEIGHT_ILSEQ(x) (0xFF0000 + (uchar) (x)) +#define WEIGHT_MB1(x) ((int) sort_order_eucjpms[(uchar) (x)]) +#define WEIGHT_MB2(x,y) ((((uint) (uchar)(x)) << 16) | \ + (((uint) (uchar) (y)) << 8)) +#define WEIGHT_MB3(x,y,z) (WEIGHT_MB2(x,y) | ((uint) (uchar) z)) +#include "strcoll.ic" + + +#define MY_FUNCTION_NAME(x) my_ ## x ## _eucjpms_bin +#define WEIGHT_ILSEQ(x) (0xFF0000 + (uchar) (x)) +#define WEIGHT_MB1(x) ((int) (uchar) (x)) +#define WEIGHT_MB2(x,y) ((((uint) (uchar)(x)) << 16) | \ + (((uint) (uchar) (y)) << 8)) +#define WEIGHT_MB3(x,y,z) (WEIGHT_MB2(x,y) | ((uint) (uchar) z)) +#include "strcoll.ic" + static uint ismbchar_eucjpms(CHARSET_INFO *cs __attribute__((unused)), const char* p, const char *e) @@ -67467,11 +67485,11 @@ size_t my_numcells_eucjpms(CHARSET_INFO *cs __attribute__((unused)), } -static MY_COLLATION_HANDLER my_collation_ci_handler = +static MY_COLLATION_HANDLER my_collation_eucjpms_japanese_ci_handler = { NULL, /* init */ - my_strnncoll_simple,/* strnncoll */ - my_strnncollsp_simple, + my_strnncoll_eucjpms_japanese_ci, + my_strnncollsp_eucjpms_japanese_ci, my_strnxfrm_mb, /* strnxfrm */ my_strnxfrmlen_simple, my_like_range_mb, /* like_range */ @@ -67482,6 +67500,23 @@ static MY_COLLATION_HANDLER my_collation_ci_handler = my_propagate_simple }; + +static MY_COLLATION_HANDLER my_collation_eucjpms_bin_handler = +{ + NULL, /* init */ + my_strnncoll_eucjpms_bin, + my_strnncollsp_eucjpms_bin, + my_strnxfrm_mb, + my_strnxfrmlen_simple, + my_like_range_mb, + my_wildcmp_mb_bin, + my_strcasecmp_mb_bin, + my_instr_mb, + my_hash_sort_mb_bin, + my_propagate_simple +}; + + static MY_CHARSET_HANDLER my_charset_handler= { NULL, /* init */ @@ -67547,7 +67582,7 @@ struct charset_info_st my_charset_eucjpms_japanese_ci= 0, /* escape_with_backslash_is_dangerous */ 1, /* levels_for_order */ &my_charset_handler, - &my_collation_ci_handler + &my_collation_eucjpms_japanese_ci_handler }; @@ -67580,7 +67615,7 @@ struct charset_info_st my_charset_eucjpms_bin= 0, /* escape_with_backslash_is_dangerous */ 1, /* levels_for_order */ &my_charset_handler, - &my_collation_mb_bin_handler + &my_collation_eucjpms_bin_handler }; diff --git a/strings/ctype-mb.c b/strings/ctype-mb.c index 5947c3d4f4a..a7f9e144fe8 100644 --- a/strings/ctype-mb.c +++ b/strings/ctype-mb.c @@ -1560,20 +1560,4 @@ int my_mb_ctype_mb(CHARSET_INFO *cs, int *ctype, } -MY_COLLATION_HANDLER my_collation_mb_bin_handler = -{ - NULL, /* init */ - my_strnncoll_mb_bin, - my_strnncollsp_mb_bin, - my_strnxfrm_mb, - my_strnxfrmlen_simple, - my_like_range_mb, - my_wildcmp_mb_bin, - my_strcasecmp_mb_bin, - my_instr_mb, - my_hash_sort_mb_bin, - my_propagate_simple -}; - - #endif diff --git a/strings/ctype-ujis.c b/strings/ctype-ujis.c index cb000a2afa0..497ad67da05 100644 --- a/strings/ctype-ujis.c +++ b/strings/ctype-ujis.c @@ -193,6 +193,7 @@ static const uchar sort_order_ujis[]= #define isujis_ss3(c) ((uchar) (c) == 0x8f) #define MY_FUNCTION_NAME(x) my_ ## x ## _ujis +#define IS_MB1_CHAR(x) ((uchar) (x) < 0x80) #define IS_MB2_JIS(x,y) (isujis(x) && isujis(y)) #define IS_MB2_KATA(x,y) (isujis_ss2(x) && iskata(y)) #define IS_MB2_CHAR(x, y) (IS_MB2_KATA(x,y) || IS_MB2_JIS(x,y)) @@ -200,6 +201,23 @@ static const uchar sort_order_ujis[]= #define DEFINE_ASIAN_ROUTINES #include "ctype-mb.ic" +#define MY_FUNCTION_NAME(x) my_ ## x ## _ujis_japanese_ci +#define WEIGHT_ILSEQ(x) (0xFF0000 + (uchar) (x)) +#define WEIGHT_MB1(x) ((int) sort_order_ujis[(uchar) (x)]) +#define WEIGHT_MB2(x,y) ((((uint) (uchar)(x)) << 16) | \ + (((uint) (uchar) (y)) << 8)) +#define WEIGHT_MB3(x,y,z) (WEIGHT_MB2(x,y) | ((uint) (uchar) z)) +#include "strcoll.ic" + + +#define MY_FUNCTION_NAME(x) my_ ## x ## _ujis_bin +#define WEIGHT_ILSEQ(x) (0xFF0000 + (uchar) (x)) +#define WEIGHT_MB1(x) ((int) (uchar) (x)) +#define WEIGHT_MB2(x,y) ((((uint) (uchar)(x)) << 16) | \ + (((uint) (uchar) (y)) << 8)) +#define WEIGHT_MB3(x,y,z) (WEIGHT_MB2(x,y) | ((uint) (uchar) z)) +#include "strcoll.ic" + static uint ismbchar_ujis(CHARSET_INFO *cs __attribute__((unused)), const char* p, const char *e) @@ -67211,11 +67229,11 @@ my_caseup_ujis(CHARSET_INFO * cs, char *src, size_t srclen, #ifdef HAVE_CHARSET_ujis -static MY_COLLATION_HANDLER my_collation_ci_handler = +static MY_COLLATION_HANDLER my_collation_ujis_japanese_ci_handler = { NULL, /* init */ - my_strnncoll_simple,/* strnncoll */ - my_strnncollsp_simple, + my_strnncoll_ujis_japanese_ci, + my_strnncollsp_ujis_japanese_ci, my_strnxfrm_mb, /* strnxfrm */ my_strnxfrmlen_simple, my_like_range_mb, /* like_range */ @@ -67226,6 +67244,23 @@ static MY_COLLATION_HANDLER my_collation_ci_handler = my_propagate_simple }; + +static MY_COLLATION_HANDLER my_collation_ujis_bin_handler = +{ + NULL, /* init */ + my_strnncoll_ujis_bin, + my_strnncollsp_ujis_bin, + my_strnxfrm_mb, + my_strnxfrmlen_simple, + my_like_range_mb, + my_wildcmp_mb_bin, + my_strcasecmp_mb_bin, + my_instr_mb, + my_hash_sort_mb_bin, + my_propagate_simple +}; + + static MY_CHARSET_HANDLER my_charset_handler= { NULL, /* init */ @@ -67291,7 +67326,7 @@ struct charset_info_st my_charset_ujis_japanese_ci= 0, /* escape_with_backslash_is_dangerous */ 1, /* levels_for_order */ &my_charset_handler, - &my_collation_ci_handler + &my_collation_ujis_japanese_ci_handler }; @@ -67324,7 +67359,7 @@ struct charset_info_st my_charset_ujis_bin= 0, /* escape_with_backslash_is_dangerous */ 1, /* levels_for_order */ &my_charset_handler, - &my_collation_mb_bin_handler + &my_collation_ujis_bin_handler }; diff --git a/strings/strcoll.ic b/strings/strcoll.ic index f230c4f7411..693252b3052 100644 --- a/strings/strcoll.ic +++ b/strings/strcoll.ic @@ -95,6 +95,17 @@ MY_FUNCTION_NAME(scan_weight)(int *weight, const uchar *str, const uchar *end) return 2; /* A valid two-byte character */ } +#ifdef IS_MB3_CHAR + if (str + 3 > end) /* Incomplete three-byte character */ + goto bad; + + if (IS_MB3_CHAR(str[0], str[1], str[2])) + { + *weight= WEIGHT_MB3(str[0], str[1], str[2]); + return 3; /* A valid three-byte character */ + } +#endif + bad: *weight= WEIGHT_ILSEQ(str[0]); /* Bad byte */ return 1; @@ -228,4 +239,5 @@ MY_FUNCTION_NAME(strnncollsp)(CHARSET_INFO *cs __attribute__((unused)), #undef WEIGHT_ILSEQ #undef WEIGHT_MB1 #undef WEIGHT_MB2 +#undef WEIGHT_MB3 #undef WEIGHT_PAD_SPACE diff --git a/unittest/strings/strings-t.c b/unittest/strings/strings-t.c index c7824d07047..0f5f8c48cec 100644 --- a/unittest/strings/strings-t.c +++ b/unittest/strings/strings-t.c @@ -333,6 +333,20 @@ STRNNCOLL_PARAM strcoll_8181_A1_E0E0[]= }; +/* + A shared test for eucjpms and ujis. +*/ +STRNNCOLL_PARAM strcoll_ujis[]= +{ + {CSTR("\x8E\xA1"), CSTR("\x8E"), -1}, /* Good MB2 vs incomplete MB2 */ + {CSTR("\x8E\xA1"), CSTR("\x8F\xA1"), -1}, /* Good MB2 vs incomplete MB3 */ + {CSTR("\x8E\xA1"), CSTR("\x8F\xA1\xA1"), -1}, /* Good MB2 vs good MB3 */ + {CSTR("\xA1\xA1"), CSTR("\x8F\xA1\xA1"), 1}, /* Good MB2 vs good MB3 */ + {CSTR("\x8E"), CSTR("\x8F\xA1"), -1}, /* Incomplete MB2 vs incomplete MB3 */ + {NULL, 0, NULL, 0, 0} +}; + + static void str2hex(char *dst, size_t dstlen, const char *src, size_t srclen) { @@ -415,6 +429,14 @@ test_strcollsp() failed+= strcollsp(&my_charset_cp932_japanese_ci, strcoll_8181_A1_E0E0); failed+= strcollsp(&my_charset_cp932_bin, strcoll_8181_A1_E0E0); #endif +#ifdef HAVE_CHARSET_eucjpms + failed+= strcollsp(&my_charset_eucjpms_japanese_ci, strcoll_mb2_common); + failed+= strcollsp(&my_charset_eucjpms_bin, strcoll_mb2_common); + failed+= strcollsp(&my_charset_eucjpms_japanese_ci, strcoll_mb2_A1A1_mb2_F9FE); + failed+= strcollsp(&my_charset_eucjpms_bin, strcoll_mb2_A1A1_mb2_F9FE); + failed+= strcollsp(&my_charset_eucjpms_japanese_ci, strcoll_ujis); + failed+= strcollsp(&my_charset_eucjpms_bin, strcoll_ujis); +#endif #ifdef HAVE_CHARSET_euckr failed+= strcollsp(&my_charset_euckr_korean_ci, strcoll_mb2_common); failed+= strcollsp(&my_charset_euckr_korean_ci, strcoll_mb2_A1A1_mb2_F9FE); @@ -441,6 +463,14 @@ test_strcollsp() failed+= strcollsp(&my_charset_sjis_japanese_ci, strcoll_8181_A1_E0E0); failed+= strcollsp(&my_charset_sjis_bin, strcoll_8181_A1_E0E0); #endif +#ifdef HAVE_CHARSET_ujis + failed+= strcollsp(&my_charset_ujis_japanese_ci, strcoll_mb2_common); + failed+= strcollsp(&my_charset_ujis_bin, strcoll_mb2_common); + failed+= strcollsp(&my_charset_ujis_japanese_ci, strcoll_mb2_A1A1_mb2_F9FE); + failed+= strcollsp(&my_charset_ujis_bin, strcoll_mb2_A1A1_mb2_F9FE); + failed+= strcollsp(&my_charset_ujis_japanese_ci, strcoll_ujis); + failed+= strcollsp(&my_charset_ujis_bin, strcoll_ujis); +#endif return failed; } |