diff options
Diffstat (limited to 'strings/ctype-sjis.c')
-rw-r--r-- | strings/ctype-sjis.c | 239 |
1 files changed, 76 insertions, 163 deletions
diff --git a/strings/ctype-sjis.c b/strings/ctype-sjis.c index dc6b234cf5c..f0f005685fe 100644 --- a/strings/ctype-sjis.c +++ b/strings/ctype-sjis.c @@ -176,10 +176,20 @@ static const uchar sort_order_sjis[]= (uchar) '\370',(uchar) '\371',(uchar) '\372',(uchar) '\373',(uchar) '\374',(uchar) '\375',(uchar) '\376',(uchar) '\377' }; -#define issjishead(c) ((0x81<=(c) && (c)<=0x9f) || \ - ((0xe0<=(c)) && (c)<=0xfc)) -#define issjistail(c) ((0x40<=(c) && (c)<=0x7e) || \ - (0x80<=(c) && (c)<=0xfc)) +#define issjishead(c) ((0x81 <= (uchar) (c) && (uchar) (c) <= 0x9f) || \ + (0xe0 <= (uchar) (c) && (uchar) (c) <= 0xfc)) +#define issjistail(c) ((0x40 <= (uchar) (c) && (uchar) (c) <= 0x7e) || \ + (0x80 <= (uchar) (c) && (uchar) (c) <= 0xfc)) + +#define issjiskata(c) ((0xA1 <= (uchar) (c) && (uchar) (c) <= 0xDF)) + + +#define MY_FUNCTION_NAME(x) my_ ## x ## _sjis +#define IS_8BIT_CHAR(x) issjiskata(x) +#define IS_MB1_CHAR(x) ((uchar) (x) < 0x80 || issjiskata(x)) +#define IS_MB2_CHAR(x,y) (issjishead(x) && issjistail(y)) +#define DEFINE_ASIAN_ROUTINES +#include "ctype-mb.ic" static uint ismbchar_sjis(CHARSET_INFO *cs __attribute__((unused)), @@ -197,7 +207,7 @@ static uint mbcharlen_sjis(CHARSET_INFO *cs __attribute__((unused)),uint c) #define sjiscode(c,d) ((((uint) (uchar)(c)) << 8) | (uint) (uchar) (d)) -static MY_UNICASE_INFO c81[256]= +static MY_UNICASE_CHARACTER c81[256]= { /* 8100-810F */ {0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0}, @@ -407,7 +417,7 @@ static MY_UNICASE_INFO c81[256]= }; -static MY_UNICASE_INFO c82[256]= +static MY_UNICASE_CHARACTER c82[256]= { /* 8200-820F */ {0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0}, @@ -615,7 +625,7 @@ static MY_UNICASE_INFO c82[256]= }; -static MY_UNICASE_INFO c83[256]= +static MY_UNICASE_CHARACTER c83[256]= { /* 8300-830F */ {0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0}, @@ -825,7 +835,7 @@ static MY_UNICASE_INFO c83[256]= }; -static MY_UNICASE_INFO c84[256]= +static MY_UNICASE_CHARACTER c84[256]= { /* 8400-840F */ {0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0}, @@ -1035,7 +1045,7 @@ static MY_UNICASE_INFO c84[256]= }; -static MY_UNICASE_INFO *my_caseinfo_sjis[256]= +static MY_UNICASE_CHARACTER *my_caseinfo_pages_sjis[256]= { NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, /* 0 */ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, @@ -1072,111 +1082,11 @@ static MY_UNICASE_INFO *my_caseinfo_sjis[256]= }; -static int my_strnncoll_sjis_internal(CHARSET_INFO *cs, - const uchar **a_res, size_t a_length, - const uchar **b_res, size_t b_length) -{ - const uchar *a= *a_res, *b= *b_res; - const uchar *a_end= a + a_length; - const uchar *b_end= b + b_length; - while (a < a_end && b < b_end) - { - if (ismbchar_sjis(cs,(char*) a, (char*) a_end) && - ismbchar_sjis(cs,(char*) b, (char*) b_end)) - { - uint a_char= sjiscode(*a, *(a+1)); - uint b_char= sjiscode(*b, *(b+1)); - if (a_char != b_char) - return (int) a_char - (int) b_char; - a += 2; - b += 2; - } else - { - if (sort_order_sjis[(uchar)*a] != sort_order_sjis[(uchar)*b]) - return sort_order_sjis[(uchar)*a] - sort_order_sjis[(uchar)*b]; - a++; - b++; - } - } - *a_res= a; - *b_res= b; - return 0; -} - - -static int my_strnncoll_sjis(CHARSET_INFO *cs __attribute__((unused)), - const uchar *a, size_t a_length, - const uchar *b, size_t b_length, - my_bool b_is_prefix) +static MY_UNICASE_INFO my_caseinfo_sjis= { - int res= my_strnncoll_sjis_internal(cs, &a, a_length, &b, b_length); - if (b_is_prefix && a_length > b_length) - a_length= b_length; - return res ? res : (int) (a_length - b_length); -} - - -static int my_strnncollsp_sjis(CHARSET_INFO *cs __attribute__((unused)), - const uchar *a, size_t a_length, - const uchar *b, size_t b_length, - my_bool diff_if_only_endspace_difference) -{ - const uchar *a_end= a + a_length, *b_end= b + b_length; - int res= my_strnncoll_sjis_internal(cs, &a, a_length, &b, b_length); - -#ifndef VARCHAR_WITH_DIFF_ENDSPACE_ARE_DIFFERENT_FOR_UNIQUE - diff_if_only_endspace_difference= 0; -#endif - - if (!res && (a != a_end || b != b_end)) - { - int swap= 1; - if (diff_if_only_endspace_difference) - res= 1; /* Assume 'a' is bigger */ - /* - Check the next not space character of the longer key. If it's < ' ', - then it's smaller than the other key. - */ - if (a == a_end) - { - /* put shorter key in a */ - a_end= b_end; - a= b; - swap= -1; /* swap sign of result */ - res= -res; - } - for (; a < a_end ; a++) - { - if (*a != ' ') - return (*a < ' ') ? -swap : swap; - } - } - return res; -} - - - -static size_t my_strnxfrm_sjis(CHARSET_INFO *cs __attribute__((unused)), - uchar *dest, size_t len, - const uchar *src, size_t srclen) -{ - uchar *d_end = dest + len; - uchar *s_end = (uchar*) src + srclen; - while (dest < d_end && src < s_end) - { - if (ismbchar_sjis(cs,(char*) src, (char*) s_end)) - { - *dest++ = *src++; - if (dest < d_end && src < s_end) - *dest++ = *src++; - } - else - *dest++ = sort_order_sjis[(uchar)*src++]; - } - if (len > srclen) - bfill(dest, len - srclen, ' '); - return len; -} + 0xFFFF, + my_caseinfo_pages_sjis +}; /* SJIS->Unicode conversion table */ @@ -34105,54 +34015,37 @@ size_t my_numcells_sjis(CHARSET_INFO *cs __attribute__((unused)), return clen; } + /* - Returns a well formed length of a SJIS string. - CP932 additional characters are also accepted. + sjis_chinese_ci and sjis_bin sort character blocks in this order: + 1. [00..7F] - 7BIT characters (ASCII) + 2. [81..9F][40..7E,80..FC] - MB2 characters, part1 + 3. [A1..DF] - 8BIT characters (Kana) + 4. [E0..FC][40..7E,80..FC] - MB2 characters, part2 */ -static -size_t my_well_formed_len_sjis(CHARSET_INFO *cs __attribute__((unused)), - const char *b, const char *e, - size_t pos, int *error) -{ - const char *b0= b; - *error= 0; - while (pos-- && b < e) - { - if ((uchar) b[0] < 128) - { - /* Single byte ascii character */ - b++; - } - else if (issjishead((uchar)*b) && (e-b)>1 && issjistail((uchar)b[1])) - { - /* Double byte character */ - b+= 2; - } - else if (((uchar)*b) >= 0xA1 && ((uchar)*b) <= 0xDF) - { - /* Half width kana */ - b++; - } - else - { - /* Wrong byte sequence */ - *error= 1; - break; - } - } - return (size_t) (b - b0); -} +#define MY_FUNCTION_NAME(x) my_ ## x ## _sjis_japanese_ci +#define WEIGHT_PAD_SPACE (256 * (int) ' ') +#define WEIGHT_MB1(x) (256 * (int) sort_order_sjis[(uchar) (x)]) +#define WEIGHT_MB2(x,y) (sjiscode(x, y)) +#include "strcoll.ic" + +#define MY_FUNCTION_NAME(x) my_ ## x ## _sjis_bin +#define WEIGHT_PAD_SPACE (256 * (int) ' ') +#define WEIGHT_MB1(x) (256 * (int) (uchar) (x)) +#define WEIGHT_MB2(x,y) (sjiscode(x, y)) +#include "strcoll.ic" -static MY_COLLATION_HANDLER my_collation_ci_handler = + +static MY_COLLATION_HANDLER my_collation_handler_sjis_japanese_ci= { - NULL, /* init */ - my_strnncoll_sjis, - my_strnncollsp_sjis, - my_strnxfrm_sjis, + NULL, /* init */ + my_strnncoll_sjis_japanese_ci, + my_strnncollsp_sjis_japanese_ci, + my_strnxfrm_mb, my_strnxfrmlen_simple, my_like_range_mb, - my_wildcmp_mb, /* wildcmp */ + my_wildcmp_mb, my_strcasecmp_8bit, my_instr_mb, my_hash_sort_simple, @@ -34160,6 +34053,22 @@ static MY_COLLATION_HANDLER my_collation_ci_handler = }; +static MY_COLLATION_HANDLER my_collation_handler_sjis_bin= +{ + NULL, /* init */ + my_strnncoll_sjis_bin, + my_strnncollsp_sjis_bin, + my_strnxfrm_mb, + my_strnxfrmlen_simple, + my_like_range_mb, + my_wildcmp_mb_bin, + my_strcasecmp_mb_bin, + my_instr_mb, + my_hash_sort_mb_bin, + my_propagate_simple +}; + + static MY_CHARSET_HANDLER my_charset_handler= { NULL, /* init */ @@ -34188,7 +34097,11 @@ static MY_CHARSET_HANDLER my_charset_handler= my_strntod_8bit, my_strtoll10_8bit, my_strntoull10rnd_8bit, - my_scan_8bit + my_scan_8bit, + my_charlen_sjis, + my_well_formed_char_length_sjis, + my_copy_fix_mb, + my_native_to_mb_sjis, }; @@ -34204,11 +34117,10 @@ struct charset_info_st my_charset_sjis_japanese_ci= to_lower_sjis, to_upper_sjis, sort_order_sjis, - NULL, /* contractions */ - NULL, /* sort_order_big*/ + NULL, /* uca */ NULL, /* tab_to_uni */ NULL, /* tab_from_uni */ - my_caseinfo_sjis, /* caseinfo */ + &my_caseinfo_sjis, /* caseinfo */ NULL, /* state_map */ NULL, /* ident_map */ 1, /* strxfrm_multiply */ @@ -34220,8 +34132,9 @@ struct charset_info_st my_charset_sjis_japanese_ci= 0xFCFC, /* max_sort_char */ ' ', /* pad char */ 1, /* escape_with_backslash_is_dangerous */ + 1, /* levels_for_order */ &my_charset_handler, - &my_collation_ci_handler + &my_collation_handler_sjis_japanese_ci }; struct charset_info_st my_charset_sjis_bin= @@ -34236,11 +34149,10 @@ struct charset_info_st my_charset_sjis_bin= to_lower_sjis, to_upper_sjis, NULL, /* sort_order */ - NULL, /* contractions */ - NULL, /* sort_order_big*/ + NULL, /* uca */ NULL, /* tab_to_uni */ NULL, /* tab_from_uni */ - my_caseinfo_sjis, /* caseinfo */ + &my_caseinfo_sjis, /* caseinfo */ NULL, /* state_map */ NULL, /* ident_map */ 1, /* strxfrm_multiply */ @@ -34252,8 +34164,9 @@ struct charset_info_st my_charset_sjis_bin= 0xFCFC, /* max_sort_char */ ' ', /* pad char */ 1, /* escape_with_backslash_is_dangerous */ + 1, /* levels_for_order */ &my_charset_handler, - &my_collation_mb_bin_handler + &my_collation_handler_sjis_bin }; #endif |