summaryrefslogtreecommitdiff
path: root/strings/ctype-sjis.c
diff options
context:
space:
mode:
Diffstat (limited to 'strings/ctype-sjis.c')
-rw-r--r--strings/ctype-sjis.c239
1 files changed, 76 insertions, 163 deletions
diff --git a/strings/ctype-sjis.c b/strings/ctype-sjis.c
index dc6b234cf5c..f0f005685fe 100644
--- a/strings/ctype-sjis.c
+++ b/strings/ctype-sjis.c
@@ -176,10 +176,20 @@ static const uchar sort_order_sjis[]=
(uchar) '\370',(uchar) '\371',(uchar) '\372',(uchar) '\373',(uchar) '\374',(uchar) '\375',(uchar) '\376',(uchar) '\377'
};
-#define issjishead(c) ((0x81<=(c) && (c)<=0x9f) || \
- ((0xe0<=(c)) && (c)<=0xfc))
-#define issjistail(c) ((0x40<=(c) && (c)<=0x7e) || \
- (0x80<=(c) && (c)<=0xfc))
+#define issjishead(c) ((0x81 <= (uchar) (c) && (uchar) (c) <= 0x9f) || \
+ (0xe0 <= (uchar) (c) && (uchar) (c) <= 0xfc))
+#define issjistail(c) ((0x40 <= (uchar) (c) && (uchar) (c) <= 0x7e) || \
+ (0x80 <= (uchar) (c) && (uchar) (c) <= 0xfc))
+
+#define issjiskata(c) ((0xA1 <= (uchar) (c) && (uchar) (c) <= 0xDF))
+
+
+#define MY_FUNCTION_NAME(x) my_ ## x ## _sjis
+#define IS_8BIT_CHAR(x) issjiskata(x)
+#define IS_MB1_CHAR(x) ((uchar) (x) < 0x80 || issjiskata(x))
+#define IS_MB2_CHAR(x,y) (issjishead(x) && issjistail(y))
+#define DEFINE_ASIAN_ROUTINES
+#include "ctype-mb.ic"
static uint ismbchar_sjis(CHARSET_INFO *cs __attribute__((unused)),
@@ -197,7 +207,7 @@ static uint mbcharlen_sjis(CHARSET_INFO *cs __attribute__((unused)),uint c)
#define sjiscode(c,d) ((((uint) (uchar)(c)) << 8) | (uint) (uchar) (d))
-static MY_UNICASE_INFO c81[256]=
+static MY_UNICASE_CHARACTER c81[256]=
{
/* 8100-810F */
{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},
@@ -407,7 +417,7 @@ static MY_UNICASE_INFO c81[256]=
};
-static MY_UNICASE_INFO c82[256]=
+static MY_UNICASE_CHARACTER c82[256]=
{
/* 8200-820F */
{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},
@@ -615,7 +625,7 @@ static MY_UNICASE_INFO c82[256]=
};
-static MY_UNICASE_INFO c83[256]=
+static MY_UNICASE_CHARACTER c83[256]=
{
/* 8300-830F */
{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},
@@ -825,7 +835,7 @@ static MY_UNICASE_INFO c83[256]=
};
-static MY_UNICASE_INFO c84[256]=
+static MY_UNICASE_CHARACTER c84[256]=
{
/* 8400-840F */
{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},
@@ -1035,7 +1045,7 @@ static MY_UNICASE_INFO c84[256]=
};
-static MY_UNICASE_INFO *my_caseinfo_sjis[256]=
+static MY_UNICASE_CHARACTER *my_caseinfo_pages_sjis[256]=
{
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, /* 0 */
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
@@ -1072,111 +1082,11 @@ static MY_UNICASE_INFO *my_caseinfo_sjis[256]=
};
-static int my_strnncoll_sjis_internal(CHARSET_INFO *cs,
- const uchar **a_res, size_t a_length,
- const uchar **b_res, size_t b_length)
-{
- const uchar *a= *a_res, *b= *b_res;
- const uchar *a_end= a + a_length;
- const uchar *b_end= b + b_length;
- while (a < a_end && b < b_end)
- {
- if (ismbchar_sjis(cs,(char*) a, (char*) a_end) &&
- ismbchar_sjis(cs,(char*) b, (char*) b_end))
- {
- uint a_char= sjiscode(*a, *(a+1));
- uint b_char= sjiscode(*b, *(b+1));
- if (a_char != b_char)
- return (int) a_char - (int) b_char;
- a += 2;
- b += 2;
- } else
- {
- if (sort_order_sjis[(uchar)*a] != sort_order_sjis[(uchar)*b])
- return sort_order_sjis[(uchar)*a] - sort_order_sjis[(uchar)*b];
- a++;
- b++;
- }
- }
- *a_res= a;
- *b_res= b;
- return 0;
-}
-
-
-static int my_strnncoll_sjis(CHARSET_INFO *cs __attribute__((unused)),
- const uchar *a, size_t a_length,
- const uchar *b, size_t b_length,
- my_bool b_is_prefix)
+static MY_UNICASE_INFO my_caseinfo_sjis=
{
- int res= my_strnncoll_sjis_internal(cs, &a, a_length, &b, b_length);
- if (b_is_prefix && a_length > b_length)
- a_length= b_length;
- return res ? res : (int) (a_length - b_length);
-}
-
-
-static int my_strnncollsp_sjis(CHARSET_INFO *cs __attribute__((unused)),
- const uchar *a, size_t a_length,
- const uchar *b, size_t b_length,
- my_bool diff_if_only_endspace_difference)
-{
- const uchar *a_end= a + a_length, *b_end= b + b_length;
- int res= my_strnncoll_sjis_internal(cs, &a, a_length, &b, b_length);
-
-#ifndef VARCHAR_WITH_DIFF_ENDSPACE_ARE_DIFFERENT_FOR_UNIQUE
- diff_if_only_endspace_difference= 0;
-#endif
-
- if (!res && (a != a_end || b != b_end))
- {
- int swap= 1;
- if (diff_if_only_endspace_difference)
- res= 1; /* Assume 'a' is bigger */
- /*
- Check the next not space character of the longer key. If it's < ' ',
- then it's smaller than the other key.
- */
- if (a == a_end)
- {
- /* put shorter key in a */
- a_end= b_end;
- a= b;
- swap= -1; /* swap sign of result */
- res= -res;
- }
- for (; a < a_end ; a++)
- {
- if (*a != ' ')
- return (*a < ' ') ? -swap : swap;
- }
- }
- return res;
-}
-
-
-
-static size_t my_strnxfrm_sjis(CHARSET_INFO *cs __attribute__((unused)),
- uchar *dest, size_t len,
- const uchar *src, size_t srclen)
-{
- uchar *d_end = dest + len;
- uchar *s_end = (uchar*) src + srclen;
- while (dest < d_end && src < s_end)
- {
- if (ismbchar_sjis(cs,(char*) src, (char*) s_end))
- {
- *dest++ = *src++;
- if (dest < d_end && src < s_end)
- *dest++ = *src++;
- }
- else
- *dest++ = sort_order_sjis[(uchar)*src++];
- }
- if (len > srclen)
- bfill(dest, len - srclen, ' ');
- return len;
-}
+ 0xFFFF,
+ my_caseinfo_pages_sjis
+};
/* SJIS->Unicode conversion table */
@@ -34105,54 +34015,37 @@ size_t my_numcells_sjis(CHARSET_INFO *cs __attribute__((unused)),
return clen;
}
+
/*
- Returns a well formed length of a SJIS string.
- CP932 additional characters are also accepted.
+ sjis_chinese_ci and sjis_bin sort character blocks in this order:
+ 1. [00..7F] - 7BIT characters (ASCII)
+ 2. [81..9F][40..7E,80..FC] - MB2 characters, part1
+ 3. [A1..DF] - 8BIT characters (Kana)
+ 4. [E0..FC][40..7E,80..FC] - MB2 characters, part2
*/
-static
-size_t my_well_formed_len_sjis(CHARSET_INFO *cs __attribute__((unused)),
- const char *b, const char *e,
- size_t pos, int *error)
-{
- const char *b0= b;
- *error= 0;
- while (pos-- && b < e)
- {
- if ((uchar) b[0] < 128)
- {
- /* Single byte ascii character */
- b++;
- }
- else if (issjishead((uchar)*b) && (e-b)>1 && issjistail((uchar)b[1]))
- {
- /* Double byte character */
- b+= 2;
- }
- else if (((uchar)*b) >= 0xA1 && ((uchar)*b) <= 0xDF)
- {
- /* Half width kana */
- b++;
- }
- else
- {
- /* Wrong byte sequence */
- *error= 1;
- break;
- }
- }
- return (size_t) (b - b0);
-}
+#define MY_FUNCTION_NAME(x) my_ ## x ## _sjis_japanese_ci
+#define WEIGHT_PAD_SPACE (256 * (int) ' ')
+#define WEIGHT_MB1(x) (256 * (int) sort_order_sjis[(uchar) (x)])
+#define WEIGHT_MB2(x,y) (sjiscode(x, y))
+#include "strcoll.ic"
+
+#define MY_FUNCTION_NAME(x) my_ ## x ## _sjis_bin
+#define WEIGHT_PAD_SPACE (256 * (int) ' ')
+#define WEIGHT_MB1(x) (256 * (int) (uchar) (x))
+#define WEIGHT_MB2(x,y) (sjiscode(x, y))
+#include "strcoll.ic"
-static MY_COLLATION_HANDLER my_collation_ci_handler =
+
+static MY_COLLATION_HANDLER my_collation_handler_sjis_japanese_ci=
{
- NULL, /* init */
- my_strnncoll_sjis,
- my_strnncollsp_sjis,
- my_strnxfrm_sjis,
+ NULL, /* init */
+ my_strnncoll_sjis_japanese_ci,
+ my_strnncollsp_sjis_japanese_ci,
+ my_strnxfrm_mb,
my_strnxfrmlen_simple,
my_like_range_mb,
- my_wildcmp_mb, /* wildcmp */
+ my_wildcmp_mb,
my_strcasecmp_8bit,
my_instr_mb,
my_hash_sort_simple,
@@ -34160,6 +34053,22 @@ static MY_COLLATION_HANDLER my_collation_ci_handler =
};
+static MY_COLLATION_HANDLER my_collation_handler_sjis_bin=
+{
+ NULL, /* init */
+ my_strnncoll_sjis_bin,
+ my_strnncollsp_sjis_bin,
+ my_strnxfrm_mb,
+ my_strnxfrmlen_simple,
+ my_like_range_mb,
+ my_wildcmp_mb_bin,
+ my_strcasecmp_mb_bin,
+ my_instr_mb,
+ my_hash_sort_mb_bin,
+ my_propagate_simple
+};
+
+
static MY_CHARSET_HANDLER my_charset_handler=
{
NULL, /* init */
@@ -34188,7 +34097,11 @@ static MY_CHARSET_HANDLER my_charset_handler=
my_strntod_8bit,
my_strtoll10_8bit,
my_strntoull10rnd_8bit,
- my_scan_8bit
+ my_scan_8bit,
+ my_charlen_sjis,
+ my_well_formed_char_length_sjis,
+ my_copy_fix_mb,
+ my_native_to_mb_sjis,
};
@@ -34204,11 +34117,10 @@ struct charset_info_st my_charset_sjis_japanese_ci=
to_lower_sjis,
to_upper_sjis,
sort_order_sjis,
- NULL, /* contractions */
- NULL, /* sort_order_big*/
+ NULL, /* uca */
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
- my_caseinfo_sjis, /* caseinfo */
+ &my_caseinfo_sjis, /* caseinfo */
NULL, /* state_map */
NULL, /* ident_map */
1, /* strxfrm_multiply */
@@ -34220,8 +34132,9 @@ struct charset_info_st my_charset_sjis_japanese_ci=
0xFCFC, /* max_sort_char */
' ', /* pad char */
1, /* escape_with_backslash_is_dangerous */
+ 1, /* levels_for_order */
&my_charset_handler,
- &my_collation_ci_handler
+ &my_collation_handler_sjis_japanese_ci
};
struct charset_info_st my_charset_sjis_bin=
@@ -34236,11 +34149,10 @@ struct charset_info_st my_charset_sjis_bin=
to_lower_sjis,
to_upper_sjis,
NULL, /* sort_order */
- NULL, /* contractions */
- NULL, /* sort_order_big*/
+ NULL, /* uca */
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
- my_caseinfo_sjis, /* caseinfo */
+ &my_caseinfo_sjis, /* caseinfo */
NULL, /* state_map */
NULL, /* ident_map */
1, /* strxfrm_multiply */
@@ -34252,8 +34164,9 @@ struct charset_info_st my_charset_sjis_bin=
0xFCFC, /* max_sort_char */
' ', /* pad char */
1, /* escape_with_backslash_is_dangerous */
+ 1, /* levels_for_order */
&my_charset_handler,
- &my_collation_mb_bin_handler
+ &my_collation_handler_sjis_bin
};
#endif