diff options
author | Alexander Barkov <bar@mariadb.org> | 2015-03-04 09:16:43 +0400 |
---|---|---|
committer | Alexander Barkov <bar@mariadb.org> | 2015-03-04 09:16:43 +0400 |
commit | a7ed8523e35ff2e82701cd1f483c8f665f322f3b (patch) | |
tree | 436d9089a3028f07661d85fe28ed65bde23a4045 /strings/ctype-ujis.c | |
parent | d8c1165c28ae6ce2e29ecd5492c2540bfd6b2177 (diff) | |
download | mariadb-git-a7ed8523e35ff2e82701cd1f483c8f665f322f3b.tar.gz |
Adding a shared include file ctype-mb.ic and removing a number
of very similar copies of my_well_formed_len_xxx(), implemented
for big5, cp932, euckr, eucjpms, gb2312m gbk, sjis, ujis.
Diffstat (limited to 'strings/ctype-ujis.c')
-rw-r--r-- | strings/ctype-ujis.c | 81 |
1 files changed, 20 insertions, 61 deletions
diff --git a/strings/ctype-ujis.c b/strings/ctype-ujis.c index f208d15f364..99f5be3fa38 100644 --- a/strings/ctype-ujis.c +++ b/strings/ctype-ujis.c @@ -179,10 +179,26 @@ static const uchar sort_order_ujis[]= }; -#define isujis(c) ((0xa1<=((c)&0xff) && ((c)&0xff)<=0xfe)) -#define iskata(c) ((0xa1<=((c)&0xff) && ((c)&0xff)<=0xdf)) -#define isujis_ss2(c) (((c)&0xff) == 0x8e) -#define isujis_ss3(c) (((c)&0xff) == 0x8f) +/* + EUC-JP encoding subcomponents: + [x00-x7F] # ASCII/JIS-Roman (one-byte/character) + [x8E][xA1-xDF] # half-width katakana (two bytes/char) + [x8F][xA1-xFE][xA1-xFE] # JIS X 0212-1990 (three bytes/char) + [xA1-xFE][xA1-xFE] # JIS X 0208:1997 (two bytes/char) +*/ + +#define isujis(c) (0xa1 <= (uchar) (c) && (uchar) (c) <= 0xfe) +#define iskata(c) (0xa1 <= (uchar) (c) && (uchar) (c) <= 0xdf) +#define isujis_ss2(c) ((uchar) (c) == 0x8e) +#define isujis_ss3(c) ((uchar) (c) == 0x8f) + +#define MY_FUNCTION_NAME(x) my_ ## x ## _ujis +#define IS_MB2_JIS(x,y) (isujis(x) && isujis(y)) +#define IS_MB2_KATA(x,y) (isujis_ss2(x) && iskata(y)) +#define IS_MB2_CHAR(x, y) (IS_MB2_KATA(x,y) || IS_MB2_JIS(x,y)) +#define IS_MB3_CHAR(x, y, z) (isujis_ss3(x) && IS_MB2_JIS(y,z)) +#define WELL_FORMED_LEN +#include "ctype-mb.ic" static uint ismbchar_ujis(CHARSET_INFO *cs __attribute__((unused)), @@ -201,63 +217,6 @@ static uint mbcharlen_ujis(CHARSET_INFO *cs __attribute__((unused)),uint c) } -/* - EUC-JP encoding subcomponents: - [x00-x7F] # ASCII/JIS-Roman (one-byte/character) - [x8E][xA1-xDF] # half-width katakana (two bytes/char) - [x8F][xA1-xFE][xA1-xFE] # JIS X 0212-1990 (three bytes/char) - [xA1-xFE][xA1-xFE] # JIS X 0208:1997 (two bytes/char) -*/ - -static -size_t my_well_formed_len_ujis(CHARSET_INFO *cs __attribute__((unused)), - const char *beg, const char *end, - size_t pos, int *error) -{ - const uchar *b= (uchar *) beg; - - for ( *error= 0 ; pos && b < (uchar*) end; pos--, b++) - { - char *chbeg; - uint ch= *b; - - if (ch <= 0x7F) /* one byte */ - continue; - - chbeg= (char *) b++; - if (b >= (uchar *) end) /* need more bytes */ - { - *error= 1; - return (size_t) (chbeg - beg); /* unexpected EOL */ - } - - if (isujis_ss2(ch)) /* [x8E][xA1-xDF] */ - { - if (iskata(*b)) - continue; - *error= 1; - return (size_t) (chbeg - beg); /* invalid sequence */ - } - - if (isujis_ss3(ch)) /* [x8F][xA1-xFE][xA1-xFE] */ - { - ch= *b++; - if (b >= (uchar*) end) - { - *error= 1; - return (size_t) (chbeg - beg); /* unexpected EOL */ - } - } - - if (isujis(ch) && isujis(*b)) /* [xA1-xFE][xA1-xFE] */ - continue; - *error= 1; - return (size_t) (chbeg - beg); /* invalid sequence */ - } - return (size_t) (b - (uchar *) beg); -} - - static size_t my_numcells_eucjp(CHARSET_INFO *cs __attribute__((unused)), const char *str, const char *str_end) |