summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAlexander Barkov <bar@mariadb.org>2015-03-04 09:16:43 +0400
committerAlexander Barkov <bar@mariadb.org>2015-03-04 09:16:43 +0400
commita7ed8523e35ff2e82701cd1f483c8f665f322f3b (patch)
tree436d9089a3028f07661d85fe28ed65bde23a4045
parentd8c1165c28ae6ce2e29ecd5492c2540bfd6b2177 (diff)
downloadmariadb-git-a7ed8523e35ff2e82701cd1f483c8f665f322f3b.tar.gz
Adding a shared include file ctype-mb.ic and removing a number
of very similar copies of my_well_formed_len_xxx(), implemented for big5, cp932, euckr, eucjpms, gb2312m gbk, sjis, ujis.
-rw-r--r--strings/ctype-big5.c43
-rw-r--r--strings/ctype-cp932.c60
-rw-r--r--strings/ctype-euc_kr.c41
-rw-r--r--strings/ctype-eucjpms.c79
-rw-r--r--strings/ctype-gb2312.c41
-rw-r--r--strings/ctype-gbk.c43
-rw-r--r--strings/ctype-mb.ic94
-rw-r--r--strings/ctype-sjis.c55
-rw-r--r--strings/ctype-ujis.c81
9 files changed, 184 insertions, 353 deletions
diff --git a/strings/ctype-big5.c b/strings/ctype-big5.c
index a9eb2b1b318..d631bd0a34e 100644
--- a/strings/ctype-big5.c
+++ b/strings/ctype-big5.c
@@ -34,6 +34,7 @@
/*
Support for Chinese(BIG5) characters, by jou@nematic.ieo.nctu.edu.tw
+ CP950 and HKSCS additional characters are also accepted.
modified by Wei He (hewei@mail.ied.ac.cn)
modified by Alex Barkov <bar@udm.net>
*/
@@ -47,6 +48,12 @@
#define big5head(e) ((uchar)(e>>8))
#define big5tail(e) ((uchar)(e&0xff))
+#define MY_FUNCTION_NAME(x) my_ ## x ## _big5
+#define IS_MB2_CHAR(x,y) (isbig5head(x) && isbig5tail(y))
+#define WELL_FORMED_LEN
+#include "ctype-mb.ic"
+
+
static const uchar ctype_big5[257] =
{
0, /* For standard library */
@@ -6843,42 +6850,6 @@ my_mb_wc_big5(CHARSET_INFO *cs __attribute__((unused)),
}
-/*
- Returns a well formed length of a BIG5 string.
- CP950 and HKSCS additional characters are also accepted.
-*/
-static
-size_t my_well_formed_len_big5(CHARSET_INFO *cs __attribute__((unused)),
- const char *b, const char *e,
- size_t pos, int *error)
-{
- const char *b0= b;
- const char *emb= e - 1; /* Last possible end of an MB character */
-
- *error= 0;
- while (pos-- && b < e)
- {
- if ((uchar) b[0] < 128)
- {
- /* Single byte ascii character */
- b++;
- }
- else if ((b < emb) && isbig5code((uchar)*b, (uchar)b[1]))
- {
- /* Double byte character */
- b+= 2;
- }
- else
- {
- /* Wrong byte sequence */
- *error= 1;
- break;
- }
- }
- return (size_t) (b - b0);
-}
-
-
static MY_COLLATION_HANDLER my_collation_big5_chinese_ci_handler =
{
NULL, /* init */
diff --git a/strings/ctype-cp932.c b/strings/ctype-cp932.c
index 66b352721db..13129a6a874 100644
--- a/strings/ctype-cp932.c
+++ b/strings/ctype-cp932.c
@@ -176,10 +176,18 @@ static const uchar sort_order_cp932[]=
(uchar) '\370',(uchar) '\371',(uchar) '\372',(uchar) '\373',(uchar) '\374',(uchar) '\375',(uchar) '\376',(uchar) '\377'
};
-#define iscp932head(c) ((0x81<=(c) && (c)<=0x9f) || \
- ((0xe0<=(c)) && (c)<=0xfc))
-#define iscp932tail(c) ((0x40<=(c) && (c)<=0x7e) || \
- (0x80<=(c) && (c)<=0xfc))
+#define iscp932head(c) ((0x81 <= (uchar) (c) && (uchar) (c) <= 0x9f) || \
+ (0xe0 <= (uchar) (c) && (uchar) (c) <= 0xfc))
+#define iscp932tail(c) ((0x40 <= (uchar) (c) && (uchar) (c) <= 0x7e) || \
+ (0x80 <= (uchar) (c) && (uchar) (c) <= 0xfc))
+
+#define iscp932kata(c) (0xA1 <= (uchar) (c) && (uchar) (c) <= 0xDF)
+
+#define MY_FUNCTION_NAME(x) my_ ## x ## _cp932
+#define IS_8BIT_CHAR(x) iscp932kata(x)
+#define IS_MB2_CHAR(x,y) (iscp932head(x) && iscp932tail(y))
+#define WELL_FORMED_LEN
+#include "ctype-mb.ic"
static uint ismbchar_cp932(CHARSET_INFO *cs __attribute__((unused)),
@@ -34711,50 +34719,6 @@ size_t my_numcells_cp932(CHARSET_INFO *cs __attribute__((unused)),
return clen;
}
-/*
- Returns a well formed length of a cp932 string.
- cp932 additional characters are also accepted.
-*/
-
-static
-size_t my_well_formed_len_cp932(CHARSET_INFO *cs __attribute__((unused)),
- const char *b, const char *e,
- size_t pos, int *error)
-{
- const char *b0= b;
- *error= 0;
- while (pos-- && b < e)
- {
- /*
- Cast to int8 for extra safety.
- "char" can be unsigned by default
- on some platforms.
- */
- if (((int8)b[0]) >= 0)
- {
- /* Single byte ascii character */
- b++;
- }
- else if (iscp932head((uchar)*b) && (e-b)>1 && iscp932tail((uchar)b[1]))
- {
- /* Double byte character */
- b+= 2;
- }
- else if (((uchar)*b) >= 0xA1 && ((uchar)*b) <= 0xDF)
- {
- /* Half width kana */
- b++;
- }
- else
- {
- /* Wrong byte sequence */
- *error= 1;
- break;
- }
- }
- return (size_t) (b - b0);
-}
-
static MY_COLLATION_HANDLER my_collation_ci_handler =
{
diff --git a/strings/ctype-euc_kr.c b/strings/ctype-euc_kr.c
index 36d99eec375..eab9539ad45 100644
--- a/strings/ctype-euc_kr.c
+++ b/strings/ctype-euc_kr.c
@@ -202,6 +202,12 @@ static const uchar sort_order_euc_kr[]=
iseuc_kr_tail3(c))
+#define MY_FUNCTION_NAME(x) my_ ## x ## _euckr
+#define IS_MB2_CHAR(x,y) (iseuc_kr_head(x) && iseuc_kr_tail(y))
+#define WELL_FORMED_LEN
+#include "ctype-mb.ic"
+
+
static uint ismbchar_euc_kr(CHARSET_INFO *cs __attribute__((unused)),
const char* p, const char *e)
{
@@ -9929,41 +9935,6 @@ my_mb_wc_euc_kr(CHARSET_INFO *cs __attribute__((unused)),
}
-/*
- Returns well formed length of a EUC-KR string.
-*/
-static size_t
-my_well_formed_len_euckr(CHARSET_INFO *cs __attribute__((unused)),
- const char *b, const char *e,
- size_t pos, int *error)
-{
- const char *b0= b;
- const char *emb= e - 1; /* Last possible end of an MB character */
-
- *error= 0;
- while (pos-- && b < e)
- {
- if ((uchar) b[0] < 128)
- {
- /* Single byte ascii character */
- b++;
- }
- else if (b < emb && iseuc_kr_head(*b) && iseuc_kr_tail(b[1]))
- {
- /* Double byte character */
- b+= 2;
- }
- else
- {
- /* Wrong byte sequence */
- *error= 1;
- break;
- }
- }
- return (size_t) (b - b0);
-}
-
-
static MY_COLLATION_HANDLER my_collation_ci_handler =
{
NULL, /* init */
diff --git a/strings/ctype-eucjpms.c b/strings/ctype-eucjpms.c
index 8c47b666cf4..52873c2f87e 100644
--- a/strings/ctype-eucjpms.c
+++ b/strings/ctype-eucjpms.c
@@ -180,10 +180,26 @@ static const uchar sort_order_eucjpms[]=
};
-#define iseucjpms(c) ((0xa1<=((c)&0xff) && ((c)&0xff)<=0xfe))
-#define iskata(c) ((0xa1<=((c)&0xff) && ((c)&0xff)<=0xdf))
-#define iseucjpms_ss2(c) (((c)&0xff) == 0x8e)
-#define iseucjpms_ss3(c) (((c)&0xff) == 0x8f)
+/*
+ EUCJPMS encoding subcomponents:
+ [x00-x7F] # ASCII/JIS-Roman (one-byte/character)
+ [x8E][xA1-xDF] # half-width katakana (two bytes/char)
+ [x8F][xA1-xFE][xA1-xFE] # JIS X 0212-1990 (three bytes/char)
+ [xA1-xFE][xA1-xFE] # JIS X 0208:1997 (two bytes/char)
+*/
+#define iseucjpms(c) (0xa1 <= (uchar) (c) && (uchar) (c) <= 0xfe)
+#define iskata(c) (0xa1 <= (uchar) (c) && (uchar) (c) <= 0xdf)
+#define iseucjpms_ss2(c) ((uchar) (c) == 0x8e)
+#define iseucjpms_ss3(c) ((uchar) (c) == 0x8f)
+
+
+#define MY_FUNCTION_NAME(x) my_ ## x ## _eucjpms
+#define IS_MB2_JIS(x,y) (iseucjpms(x) && iseucjpms(y))
+#define IS_MB2_KATA(x,y) (iseucjpms_ss2(x) && iskata(y))
+#define IS_MB2_CHAR(x,y) (IS_MB2_KATA(x,y) || IS_MB2_JIS(x,y))
+#define IS_MB3_CHAR(x,y,z) (iseucjpms_ss3(x) && IS_MB2_JIS(y,z))
+#define WELL_FORMED_LEN
+#include "ctype-mb.ic"
static uint ismbchar_eucjpms(CHARSET_INFO *cs __attribute__((unused)),
@@ -67416,61 +67432,6 @@ my_wc_mb_eucjpms(CHARSET_INFO *cs __attribute__((unused)),
}
-/*
- EUCJPMS encoding subcomponents:
- [x00-x7F] # ASCII/JIS-Roman (one-byte/character)
- [x8E][xA1-xDF] # half-width katakana (two bytes/char)
- [x8F][xA1-xFE][xA1-xFE] # JIS X 0212-1990 (three bytes/char)
- [xA1-xFE][xA1-xFE] # JIS X 0208:1997 (two bytes/char)
-*/
-
-static
-size_t my_well_formed_len_eucjpms(CHARSET_INFO *cs __attribute__((unused)),
- const char *beg, const char *end, size_t pos,
- int *error)
-{
- const uchar *b= (uchar *) beg;
- *error=0;
-
- for ( ; pos && b < (uchar*) end; pos--, b++)
- {
- char *chbeg;
- uint ch= *b;
-
- if (ch <= 0x7F) /* one byte */
- continue;
-
- chbeg= (char *) b++;
- if (b >= (uchar *) end) /* need more bytes */
- return (uint) (chbeg - beg); /* unexpected EOL */
-
- if (iseucjpms_ss2(ch)) /* [x8E][xA1-xDF] */
- {
- if (iskata(*b))
- continue;
- *error=1;
- return (uint) (chbeg - beg); /* invalid sequence */
- }
-
- if (iseucjpms_ss3(ch)) /* [x8F][xA1-xFE][xA1-xFE] */
- {
- ch= *b++;
- if (b >= (uchar*) end)
- {
- *error= 1;
- return (uint)(chbeg - beg); /* unexpected EOL */
- }
- }
-
- if (iseucjpms(ch) && iseucjpms(*b)) /* [xA1-xFE][xA1-xFE] */
- continue;
- *error=1;
- return (size_t) (chbeg - beg); /* invalid sequence */
- }
- return (size_t) (b - (uchar *) beg);
-}
-
-
static
size_t my_numcells_eucjpms(CHARSET_INFO *cs __attribute__((unused)),
const char *str, const char *str_end)
diff --git a/strings/ctype-gb2312.c b/strings/ctype-gb2312.c
index b5aeed2088f..a4268b8fd68 100644
--- a/strings/ctype-gb2312.c
+++ b/strings/ctype-gb2312.c
@@ -165,6 +165,12 @@ static const uchar sort_order_gb2312[]=
#define isgb2312tail(c) (0xa1<=(uchar)(c) && (uchar)(c)<=0xfe)
+#define MY_FUNCTION_NAME(x) my_ ## x ## _gb2312
+#define IS_MB2_CHAR(x,y) (isgb2312head(x) && isgb2312tail(y))
+#define WELL_FORMED_LEN
+#include "ctype-mb.ic"
+
+
static uint ismbchar_gb2312(CHARSET_INFO *cs __attribute__((unused)),
const char* p, const char *e)
{
@@ -6332,41 +6338,6 @@ my_mb_wc_gb2312(CHARSET_INFO *cs __attribute__((unused)),
}
-/*
- Returns well formed length of a EUC-KR string.
-*/
-static size_t
-my_well_formed_len_gb2312(CHARSET_INFO *cs __attribute__((unused)),
- const char *b, const char *e,
- size_t pos, int *error)
-{
- const char *b0= b;
- const char *emb= e - 1; /* Last possible end of an MB character */
-
- *error= 0;
- while (pos-- && b < e)
- {
- if ((uchar) b[0] < 128)
- {
- /* Single byte ascii character */
- b++;
- }
- else if (b < emb && isgb2312head(*b) && isgb2312tail(b[1]))
- {
- /* Double byte character */
- b+= 2;
- }
- else
- {
- /* Wrong byte sequence */
- *error= 1;
- break;
- }
- }
- return (size_t) (b - b0);
-}
-
-
static MY_COLLATION_HANDLER my_collation_ci_handler =
{
NULL, /* init */
diff --git a/strings/ctype-gbk.c b/strings/ctype-gbk.c
index d282d96145d..392fdb487b6 100644
--- a/strings/ctype-gbk.c
+++ b/strings/ctype-gbk.c
@@ -43,6 +43,12 @@
#define gbkhead(e) ((uchar)(e>>8))
#define gbktail(e) ((uchar)(e&0xff))
+#define MY_FUNCTION_NAME(x) my_ ## x ## _gbk
+#define IS_MB2_CHAR(x,y) (isgbkhead(x) && isgbktail(y))
+#define WELL_FORMED_LEN
+#include "ctype-mb.ic"
+
+
static const uchar ctype_gbk[257] =
{
0, /* For standard library */
@@ -10726,43 +10732,6 @@ my_mb_wc_gbk(CHARSET_INFO *cs __attribute__((unused)),
}
-/*
- Returns well formed length of a GBK string.
-*/
-static
-size_t my_well_formed_len_gbk(CHARSET_INFO *cs __attribute__((unused)),
- const char *b, const char *e,
- size_t pos, int *error)
-{
- const char *b0= b;
- const char *emb= e - 1; /* Last possible end of an MB character */
-
- *error= 0;
- while (pos-- && b < e)
- {
- if ((uchar) b[0] < 128)
- {
- /* Single byte ascii character */
- b++;
- }
- else if ((b < emb) && isgbkcode((uchar)*b, (uchar)b[1]))
- {
- /* Double byte character */
- b+= 2;
- }
- else
- {
- /* Wrong byte sequence */
- *error= 1;
- break;
- }
- }
- return (size_t) (b - b0);
-}
-
-
-
-
static MY_COLLATION_HANDLER my_collation_ci_handler =
{
NULL, /* init */
diff --git a/strings/ctype-mb.ic b/strings/ctype-mb.ic
new file mode 100644
index 00000000000..70cc89c9af0
--- /dev/null
+++ b/strings/ctype-mb.ic
@@ -0,0 +1,94 @@
+/*
+ Copyright (c) 2015, MariaDB Foundation
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+*/
+
+
+#ifndef MY_FUNCTION_NAME
+#error MY_FUNCTION_NAME is not defined
+#endif
+
+#if defined(IS_MB3_CHAR) && !defined(IS_MB2_CHAR)
+#error IS_MB3_CHAR is defined, while IS_MB2_CHAR is not!
+#endif
+
+#if defined(IS_MB4_CHAR) && !defined(IS_MB3_CHAR)
+#error IS_MB4_CHAR is defined, while IS_MB3_CHAR is not!
+#endif
+
+
+#ifdef WELL_FORMED_LEN
+/**
+ Returns well formed length of a character string with
+ variable character length for character sets with:
+ - mbminlen == 1
+ - mbmaxlen == 2, 3, or 4
+*/
+static size_t
+MY_FUNCTION_NAME(well_formed_len)(CHARSET_INFO *cs __attribute__((unused)),
+ const char *b, const char *e,
+ size_t nchars, int *error)
+{
+ const char *b0= b;
+
+ DBUG_ASSERT(cs->mbminlen == 1);
+ DBUG_ASSERT(cs->mbmaxlen <= 4);
+
+ for (*error= 0 ; b < e && nchars-- ; )
+ {
+ if ((uchar) b[0] < 128)
+ {
+ b++; /* Single byte ASCII character */
+ continue;
+ }
+
+ if (b + 2 <= e && IS_MB2_CHAR(b[0], b[1]))
+ {
+ b+= 2; /* Double byte character */
+ continue;
+ }
+
+#ifdef IS_MB3_CHAR
+ if (b + 3 <= e && IS_MB3_CHAR(b[0], b[1], b[2]))
+ {
+ b+= 3; /* Three-byte character */
+ continue;
+ }
+#endif
+
+#ifdef IS_MB4_CHAR
+ if (b + 4 <= e && IS_MB4_CHAR(b[0], b[1], b[2], b[3]))
+ {
+ b+= 4; /* Four-byte character */
+ continue;
+ }
+#endif
+
+#ifdef IS_8BIT_CHAR
+ if (IS_8BIT_CHAR(b[0]))
+ {
+ b++; /* Single byte non-ASCII character, e.g. half width kana in sjis */
+ continue;
+ }
+#endif
+
+ /* Wrong byte sequence */
+ *error= 1;
+ break;
+ }
+ return b - b0;
+}
+
+#endif /* WELL_FORMED_LEN */
diff --git a/strings/ctype-sjis.c b/strings/ctype-sjis.c
index 2038632c9d3..432e2e5e823 100644
--- a/strings/ctype-sjis.c
+++ b/strings/ctype-sjis.c
@@ -176,10 +176,19 @@ static const uchar sort_order_sjis[]=
(uchar) '\370',(uchar) '\371',(uchar) '\372',(uchar) '\373',(uchar) '\374',(uchar) '\375',(uchar) '\376',(uchar) '\377'
};
-#define issjishead(c) ((0x81<=(c) && (c)<=0x9f) || \
- ((0xe0<=(c)) && (c)<=0xfc))
-#define issjistail(c) ((0x40<=(c) && (c)<=0x7e) || \
- (0x80<=(c) && (c)<=0xfc))
+#define issjishead(c) ((0x81 <= (uchar) (c) && (uchar) (c) <= 0x9f) || \
+ (0xe0 <= (uchar) (c) && (uchar) (c) <= 0xfc))
+#define issjistail(c) ((0x40 <= (uchar) (c) && (uchar) (c) <= 0x7e) || \
+ (0x80 <= (uchar) (c) && (uchar) (c) <= 0xfc))
+
+#define issjiskata(c) ((0xA1 <= (uchar) (c) && (uchar) (c) <= 0xDF))
+
+
+#define MY_FUNCTION_NAME(x) my_ ## x ## _sjis
+#define IS_8BIT_CHAR(x) issjiskata(x)
+#define IS_MB2_CHAR(x,y) (issjishead(x) && issjistail(y))
+#define WELL_FORMED_LEN
+#include "ctype-mb.ic"
static uint ismbchar_sjis(CHARSET_INFO *cs __attribute__((unused)),
@@ -34089,44 +34098,6 @@ size_t my_numcells_sjis(CHARSET_INFO *cs __attribute__((unused)),
return clen;
}
-/*
- Returns a well formed length of a SJIS string.
- CP932 additional characters are also accepted.
-*/
-static
-size_t my_well_formed_len_sjis(CHARSET_INFO *cs __attribute__((unused)),
- const char *b, const char *e,
- size_t pos, int *error)
-{
- const char *b0= b;
- *error= 0;
- while (pos-- && b < e)
- {
- if ((uchar) b[0] < 128)
- {
- /* Single byte ascii character */
- b++;
- }
- else if (issjishead((uchar)*b) && (e-b)>1 && issjistail((uchar)b[1]))
- {
- /* Double byte character */
- b+= 2;
- }
- else if (((uchar)*b) >= 0xA1 && ((uchar)*b) <= 0xDF)
- {
- /* Half width kana */
- b++;
- }
- else
- {
- /* Wrong byte sequence */
- *error= 1;
- break;
- }
- }
- return (size_t) (b - b0);
-}
-
static MY_COLLATION_HANDLER my_collation_ci_handler =
{
diff --git a/strings/ctype-ujis.c b/strings/ctype-ujis.c
index f208d15f364..99f5be3fa38 100644
--- a/strings/ctype-ujis.c
+++ b/strings/ctype-ujis.c
@@ -179,10 +179,26 @@ static const uchar sort_order_ujis[]=
};
-#define isujis(c) ((0xa1<=((c)&0xff) && ((c)&0xff)<=0xfe))
-#define iskata(c) ((0xa1<=((c)&0xff) && ((c)&0xff)<=0xdf))
-#define isujis_ss2(c) (((c)&0xff) == 0x8e)
-#define isujis_ss3(c) (((c)&0xff) == 0x8f)
+/*
+ EUC-JP encoding subcomponents:
+ [x00-x7F] # ASCII/JIS-Roman (one-byte/character)
+ [x8E][xA1-xDF] # half-width katakana (two bytes/char)
+ [x8F][xA1-xFE][xA1-xFE] # JIS X 0212-1990 (three bytes/char)
+ [xA1-xFE][xA1-xFE] # JIS X 0208:1997 (two bytes/char)
+*/
+
+#define isujis(c) (0xa1 <= (uchar) (c) && (uchar) (c) <= 0xfe)
+#define iskata(c) (0xa1 <= (uchar) (c) && (uchar) (c) <= 0xdf)
+#define isujis_ss2(c) ((uchar) (c) == 0x8e)
+#define isujis_ss3(c) ((uchar) (c) == 0x8f)
+
+#define MY_FUNCTION_NAME(x) my_ ## x ## _ujis
+#define IS_MB2_JIS(x,y) (isujis(x) && isujis(y))
+#define IS_MB2_KATA(x,y) (isujis_ss2(x) && iskata(y))
+#define IS_MB2_CHAR(x, y) (IS_MB2_KATA(x,y) || IS_MB2_JIS(x,y))
+#define IS_MB3_CHAR(x, y, z) (isujis_ss3(x) && IS_MB2_JIS(y,z))
+#define WELL_FORMED_LEN
+#include "ctype-mb.ic"
static uint ismbchar_ujis(CHARSET_INFO *cs __attribute__((unused)),
@@ -201,63 +217,6 @@ static uint mbcharlen_ujis(CHARSET_INFO *cs __attribute__((unused)),uint c)
}
-/*
- EUC-JP encoding subcomponents:
- [x00-x7F] # ASCII/JIS-Roman (one-byte/character)
- [x8E][xA1-xDF] # half-width katakana (two bytes/char)
- [x8F][xA1-xFE][xA1-xFE] # JIS X 0212-1990 (three bytes/char)
- [xA1-xFE][xA1-xFE] # JIS X 0208:1997 (two bytes/char)
-*/
-
-static
-size_t my_well_formed_len_ujis(CHARSET_INFO *cs __attribute__((unused)),
- const char *beg, const char *end,
- size_t pos, int *error)
-{
- const uchar *b= (uchar *) beg;
-
- for ( *error= 0 ; pos && b < (uchar*) end; pos--, b++)
- {
- char *chbeg;
- uint ch= *b;
-
- if (ch <= 0x7F) /* one byte */
- continue;
-
- chbeg= (char *) b++;
- if (b >= (uchar *) end) /* need more bytes */
- {
- *error= 1;
- return (size_t) (chbeg - beg); /* unexpected EOL */
- }
-
- if (isujis_ss2(ch)) /* [x8E][xA1-xDF] */
- {
- if (iskata(*b))
- continue;
- *error= 1;
- return (size_t) (chbeg - beg); /* invalid sequence */
- }
-
- if (isujis_ss3(ch)) /* [x8F][xA1-xFE][xA1-xFE] */
- {
- ch= *b++;
- if (b >= (uchar*) end)
- {
- *error= 1;
- return (size_t) (chbeg - beg); /* unexpected EOL */
- }
- }
-
- if (isujis(ch) && isujis(*b)) /* [xA1-xFE][xA1-xFE] */
- continue;
- *error= 1;
- return (size_t) (chbeg - beg); /* invalid sequence */
- }
- return (size_t) (b - (uchar *) beg);
-}
-
-
static
size_t my_numcells_eucjp(CHARSET_INFO *cs __attribute__((unused)),
const char *str, const char *str_end)