summaryrefslogtreecommitdiff
path: root/strings
diff options
context:
space:
mode:
authorAlexander Barkov <bar@mariadb.org>2015-07-03 10:33:17 +0400
committerAlexander Barkov <bar@mariadb.org>2015-07-03 10:33:17 +0400
commit95d07ee408abd98769093759a076f4665a176d77 (patch)
tree24b4b0c016574b97b0fae5067f009119aa744f3c /strings
parent302bf7c4664b904482ecc133476e822d497b114d (diff)
downloadmariadb-git-95d07ee408abd98769093759a076f4665a176d77.tar.gz
MDEV-8215 Asian MB3 charsets: compare broken bytes as "greater than any non-broken character"
Diffstat (limited to 'strings')
-rw-r--r--strings/ctype-eucjpms.c45
-rw-r--r--strings/ctype-mb.c16
-rw-r--r--strings/ctype-ujis.c45
-rw-r--r--strings/strcoll.ic12
4 files changed, 92 insertions, 26 deletions
diff --git a/strings/ctype-eucjpms.c b/strings/ctype-eucjpms.c
index 827feda927b..d331f643079 100644
--- a/strings/ctype-eucjpms.c
+++ b/strings/ctype-eucjpms.c
@@ -194,6 +194,7 @@ static const uchar sort_order_eucjpms[]=
#define MY_FUNCTION_NAME(x) my_ ## x ## _eucjpms
+#define IS_MB1_CHAR(x) ((uchar) (x) < 0x80)
#define IS_MB2_JIS(x,y) (iseucjpms(x) && iseucjpms(y))
#define IS_MB2_KATA(x,y) (iseucjpms_ss2(x) && iskata(y))
#define IS_MB2_CHAR(x,y) (IS_MB2_KATA(x,y) || IS_MB2_JIS(x,y))
@@ -201,6 +202,23 @@ static const uchar sort_order_eucjpms[]=
#define DEFINE_ASIAN_ROUTINES
#include "ctype-mb.ic"
+#define MY_FUNCTION_NAME(x) my_ ## x ## _eucjpms_japanese_ci
+#define WEIGHT_ILSEQ(x) (0xFF0000 + (uchar) (x))
+#define WEIGHT_MB1(x) ((int) sort_order_eucjpms[(uchar) (x)])
+#define WEIGHT_MB2(x,y) ((((uint) (uchar)(x)) << 16) | \
+ (((uint) (uchar) (y)) << 8))
+#define WEIGHT_MB3(x,y,z) (WEIGHT_MB2(x,y) | ((uint) (uchar) z))
+#include "strcoll.ic"
+
+
+#define MY_FUNCTION_NAME(x) my_ ## x ## _eucjpms_bin
+#define WEIGHT_ILSEQ(x) (0xFF0000 + (uchar) (x))
+#define WEIGHT_MB1(x) ((int) (uchar) (x))
+#define WEIGHT_MB2(x,y) ((((uint) (uchar)(x)) << 16) | \
+ (((uint) (uchar) (y)) << 8))
+#define WEIGHT_MB3(x,y,z) (WEIGHT_MB2(x,y) | ((uint) (uchar) z))
+#include "strcoll.ic"
+
static uint ismbchar_eucjpms(CHARSET_INFO *cs __attribute__((unused)),
const char* p, const char *e)
@@ -67467,11 +67485,11 @@ size_t my_numcells_eucjpms(CHARSET_INFO *cs __attribute__((unused)),
}
-static MY_COLLATION_HANDLER my_collation_ci_handler =
+static MY_COLLATION_HANDLER my_collation_eucjpms_japanese_ci_handler =
{
NULL, /* init */
- my_strnncoll_simple,/* strnncoll */
- my_strnncollsp_simple,
+ my_strnncoll_eucjpms_japanese_ci,
+ my_strnncollsp_eucjpms_japanese_ci,
my_strnxfrm_mb, /* strnxfrm */
my_strnxfrmlen_simple,
my_like_range_mb, /* like_range */
@@ -67482,6 +67500,23 @@ static MY_COLLATION_HANDLER my_collation_ci_handler =
my_propagate_simple
};
+
+static MY_COLLATION_HANDLER my_collation_eucjpms_bin_handler =
+{
+ NULL, /* init */
+ my_strnncoll_eucjpms_bin,
+ my_strnncollsp_eucjpms_bin,
+ my_strnxfrm_mb,
+ my_strnxfrmlen_simple,
+ my_like_range_mb,
+ my_wildcmp_mb_bin,
+ my_strcasecmp_mb_bin,
+ my_instr_mb,
+ my_hash_sort_mb_bin,
+ my_propagate_simple
+};
+
+
static MY_CHARSET_HANDLER my_charset_handler=
{
NULL, /* init */
@@ -67547,7 +67582,7 @@ struct charset_info_st my_charset_eucjpms_japanese_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_handler,
- &my_collation_ci_handler
+ &my_collation_eucjpms_japanese_ci_handler
};
@@ -67580,7 +67615,7 @@ struct charset_info_st my_charset_eucjpms_bin=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_handler,
- &my_collation_mb_bin_handler
+ &my_collation_eucjpms_bin_handler
};
diff --git a/strings/ctype-mb.c b/strings/ctype-mb.c
index 5947c3d4f4a..a7f9e144fe8 100644
--- a/strings/ctype-mb.c
+++ b/strings/ctype-mb.c
@@ -1560,20 +1560,4 @@ int my_mb_ctype_mb(CHARSET_INFO *cs, int *ctype,
}
-MY_COLLATION_HANDLER my_collation_mb_bin_handler =
-{
- NULL, /* init */
- my_strnncoll_mb_bin,
- my_strnncollsp_mb_bin,
- my_strnxfrm_mb,
- my_strnxfrmlen_simple,
- my_like_range_mb,
- my_wildcmp_mb_bin,
- my_strcasecmp_mb_bin,
- my_instr_mb,
- my_hash_sort_mb_bin,
- my_propagate_simple
-};
-
-
#endif
diff --git a/strings/ctype-ujis.c b/strings/ctype-ujis.c
index cb000a2afa0..497ad67da05 100644
--- a/strings/ctype-ujis.c
+++ b/strings/ctype-ujis.c
@@ -193,6 +193,7 @@ static const uchar sort_order_ujis[]=
#define isujis_ss3(c) ((uchar) (c) == 0x8f)
#define MY_FUNCTION_NAME(x) my_ ## x ## _ujis
+#define IS_MB1_CHAR(x) ((uchar) (x) < 0x80)
#define IS_MB2_JIS(x,y) (isujis(x) && isujis(y))
#define IS_MB2_KATA(x,y) (isujis_ss2(x) && iskata(y))
#define IS_MB2_CHAR(x, y) (IS_MB2_KATA(x,y) || IS_MB2_JIS(x,y))
@@ -200,6 +201,23 @@ static const uchar sort_order_ujis[]=
#define DEFINE_ASIAN_ROUTINES
#include "ctype-mb.ic"
+#define MY_FUNCTION_NAME(x) my_ ## x ## _ujis_japanese_ci
+#define WEIGHT_ILSEQ(x) (0xFF0000 + (uchar) (x))
+#define WEIGHT_MB1(x) ((int) sort_order_ujis[(uchar) (x)])
+#define WEIGHT_MB2(x,y) ((((uint) (uchar)(x)) << 16) | \
+ (((uint) (uchar) (y)) << 8))
+#define WEIGHT_MB3(x,y,z) (WEIGHT_MB2(x,y) | ((uint) (uchar) z))
+#include "strcoll.ic"
+
+
+#define MY_FUNCTION_NAME(x) my_ ## x ## _ujis_bin
+#define WEIGHT_ILSEQ(x) (0xFF0000 + (uchar) (x))
+#define WEIGHT_MB1(x) ((int) (uchar) (x))
+#define WEIGHT_MB2(x,y) ((((uint) (uchar)(x)) << 16) | \
+ (((uint) (uchar) (y)) << 8))
+#define WEIGHT_MB3(x,y,z) (WEIGHT_MB2(x,y) | ((uint) (uchar) z))
+#include "strcoll.ic"
+
static uint ismbchar_ujis(CHARSET_INFO *cs __attribute__((unused)),
const char* p, const char *e)
@@ -67211,11 +67229,11 @@ my_caseup_ujis(CHARSET_INFO * cs, char *src, size_t srclen,
#ifdef HAVE_CHARSET_ujis
-static MY_COLLATION_HANDLER my_collation_ci_handler =
+static MY_COLLATION_HANDLER my_collation_ujis_japanese_ci_handler =
{
NULL, /* init */
- my_strnncoll_simple,/* strnncoll */
- my_strnncollsp_simple,
+ my_strnncoll_ujis_japanese_ci,
+ my_strnncollsp_ujis_japanese_ci,
my_strnxfrm_mb, /* strnxfrm */
my_strnxfrmlen_simple,
my_like_range_mb, /* like_range */
@@ -67226,6 +67244,23 @@ static MY_COLLATION_HANDLER my_collation_ci_handler =
my_propagate_simple
};
+
+static MY_COLLATION_HANDLER my_collation_ujis_bin_handler =
+{
+ NULL, /* init */
+ my_strnncoll_ujis_bin,
+ my_strnncollsp_ujis_bin,
+ my_strnxfrm_mb,
+ my_strnxfrmlen_simple,
+ my_like_range_mb,
+ my_wildcmp_mb_bin,
+ my_strcasecmp_mb_bin,
+ my_instr_mb,
+ my_hash_sort_mb_bin,
+ my_propagate_simple
+};
+
+
static MY_CHARSET_HANDLER my_charset_handler=
{
NULL, /* init */
@@ -67291,7 +67326,7 @@ struct charset_info_st my_charset_ujis_japanese_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_handler,
- &my_collation_ci_handler
+ &my_collation_ujis_japanese_ci_handler
};
@@ -67324,7 +67359,7 @@ struct charset_info_st my_charset_ujis_bin=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_handler,
- &my_collation_mb_bin_handler
+ &my_collation_ujis_bin_handler
};
diff --git a/strings/strcoll.ic b/strings/strcoll.ic
index f230c4f7411..693252b3052 100644
--- a/strings/strcoll.ic
+++ b/strings/strcoll.ic
@@ -95,6 +95,17 @@ MY_FUNCTION_NAME(scan_weight)(int *weight, const uchar *str, const uchar *end)
return 2; /* A valid two-byte character */
}
+#ifdef IS_MB3_CHAR
+ if (str + 3 > end) /* Incomplete three-byte character */
+ goto bad;
+
+ if (IS_MB3_CHAR(str[0], str[1], str[2]))
+ {
+ *weight= WEIGHT_MB3(str[0], str[1], str[2]);
+ return 3; /* A valid three-byte character */
+ }
+#endif
+
bad:
*weight= WEIGHT_ILSEQ(str[0]); /* Bad byte */
return 1;
@@ -228,4 +239,5 @@ MY_FUNCTION_NAME(strnncollsp)(CHARSET_INFO *cs __attribute__((unused)),
#undef WEIGHT_ILSEQ
#undef WEIGHT_MB1
#undef WEIGHT_MB2
+#undef WEIGHT_MB3
#undef WEIGHT_PAD_SPACE