Adding a shared include file ctype-mb.ic and removing a number

of very similar copies of my_well_formed_len_xxx(), implemented for big5, cp932, euckr, eucjpms, gb2312m gbk, sjis, ujis.
author: Alexander Barkov <bar@mariadb.org> 2015-03-04 09:16:43 +0400
committer: Alexander Barkov <bar@mariadb.org> 2015-03-04 09:16:43 +0400
commit: a7ed8523e35ff2e82701cd1f483c8f665f322f3b (patch)
tree: 436d9089a3028f07661d85fe28ed65bde23a4045
parent: d8c1165c28ae6ce2e29ecd5492c2540bfd6b2177 (diff)
download: mariadb-git-a7ed8523e35ff2e82701cd1f483c8f665f322f3b.tar.gz
9 files changed, 184 insertions, 353 deletions
diff --git a/strings/ctype-big5.c b/strings/ctype-big5.c
index a9eb2b1b318..d631bd0a34e 100644
--- a/strings/ctype-big5.c
+++ b/strings/ctype-big5.c
@@ -34,6 +34,7 @@
 
 /* 
   Support for Chinese(BIG5) characters, by jou@nematic.ieo.nctu.edu.tw
+  CP950 and HKSCS additional characters are also accepted.
   modified by Wei He (hewei@mail.ied.ac.cn) 
   modified by Alex Barkov <bar@udm.net>
 */
@@ -47,6 +48,12 @@
 #define big5head(e)	((uchar)(e>>8))
 #define big5tail(e)	((uchar)(e&0xff))
 
+#define MY_FUNCTION_NAME(x)   my_ ## x ## _big5
+#define IS_MB2_CHAR(x,y)      (isbig5head(x) && isbig5tail(y))
+#define WELL_FORMED_LEN
+#include "ctype-mb.ic"
+
+
 static const uchar ctype_big5[257] =
 {
   0,				/* For standard library */
@@ -6843,42 +6850,6 @@ my_mb_wc_big5(CHARSET_INFO *cs __attribute__((unused)),
 }
 
 
-/*
-  Returns a well formed length of a BIG5 string.
-  CP950 and HKSCS additional characters are also accepted.
-*/
-static
-size_t my_well_formed_len_big5(CHARSET_INFO *cs __attribute__((unused)),
-                               const char *b, const char *e,
-                               size_t pos, int *error)
-{
-  const char *b0= b;
-  const char *emb= e - 1; /* Last possible end of an MB character */
-
-  *error= 0;
-  while (pos-- && b < e)
-  {
-    if ((uchar) b[0] < 128)
-    {
-      /* Single byte ascii character */
-      b++;
-    }
-    else  if ((b < emb) && isbig5code((uchar)*b, (uchar)b[1]))
-    {
-      /* Double byte character */
-      b+= 2;
-    }
-    else
-    {
-      /* Wrong byte sequence */
-      *error= 1;
-      break;
-    }
-  }
-  return (size_t) (b - b0);
-}
-
-
 static MY_COLLATION_HANDLER my_collation_big5_chinese_ci_handler =
 {
   NULL,			/* init */
diff --git a/strings/ctype-cp932.c b/strings/ctype-cp932.c
index 66b352721db..13129a6a874 100644
--- a/strings/ctype-cp932.c
+++ b/strings/ctype-cp932.c
@@ -176,10 +176,18 @@ static const uchar sort_order_cp932[]=
   (uchar) '\370',(uchar) '\371',(uchar) '\372',(uchar) '\373',(uchar) '\374',(uchar) '\375',(uchar) '\376',(uchar) '\377'
 };
 
-#define iscp932head(c) ((0x81<=(c) && (c)<=0x9f) || \
-                       ((0xe0<=(c)) && (c)<=0xfc))
-#define iscp932tail(c) ((0x40<=(c) && (c)<=0x7e) || \
-                       (0x80<=(c) && (c)<=0xfc))
+#define iscp932head(c) ((0x81 <= (uchar) (c) && (uchar) (c) <= 0x9f) || \
+                        (0xe0 <= (uchar) (c) && (uchar) (c) <= 0xfc))
+#define iscp932tail(c) ((0x40 <= (uchar) (c) && (uchar) (c) <= 0x7e) || \
+                        (0x80 <= (uchar) (c) && (uchar) (c) <= 0xfc))
+
+#define iscp932kata(c)  (0xA1 <= (uchar) (c) && (uchar) (c) <= 0xDF)
+
+#define MY_FUNCTION_NAME(x)   my_ ## x ## _cp932
+#define IS_8BIT_CHAR(x)       iscp932kata(x)
+#define IS_MB2_CHAR(x,y)      (iscp932head(x) && iscp932tail(y))
+#define WELL_FORMED_LEN
+#include "ctype-mb.ic"
 
 
 static uint ismbchar_cp932(CHARSET_INFO *cs __attribute__((unused)),
@@ -34711,50 +34719,6 @@ size_t my_numcells_cp932(CHARSET_INFO *cs __attribute__((unused)),
   return clen;
 }
 
-/*
-  Returns a well formed length of a cp932 string.
-  cp932 additional characters are also accepted.
-*/
-
-static
-size_t my_well_formed_len_cp932(CHARSET_INFO *cs __attribute__((unused)),
-                                const char *b, const char *e,
-                                size_t pos, int *error)
-{
-  const char *b0= b;
-  *error= 0;
-  while (pos-- && b < e)
-  {
-    /*
-      Cast to int8 for extra safety.
-      "char" can be unsigned by default
-      on some platforms.
-    */
-    if (((int8)b[0]) >= 0)
-    {
-      /* Single byte ascii character */
-      b++;
-    }
-    else  if (iscp932head((uchar)*b) && (e-b)>1 && iscp932tail((uchar)b[1]))
-    {
-      /* Double byte character */
-      b+= 2;
-    }
-    else if (((uchar)*b) >= 0xA1 && ((uchar)*b) <= 0xDF)
-    {
-      /* Half width kana */
-      b++;
-    }
-    else
-    {
-      /* Wrong byte sequence */
-      *error= 1;
-      break;
-    }
-  }
-  return (size_t) (b - b0);
-}
-
 
 static MY_COLLATION_HANDLER my_collation_ci_handler =
 {
diff --git a/strings/ctype-euc_kr.c b/strings/ctype-euc_kr.c
index 36d99eec375..eab9539ad45 100644
--- a/strings/ctype-euc_kr.c
+++ b/strings/ctype-euc_kr.c
@@ -202,6 +202,12 @@ static const uchar sort_order_euc_kr[]=
                               iseuc_kr_tail3(c))
 
 
+#define MY_FUNCTION_NAME(x)   my_ ## x ## _euckr
+#define IS_MB2_CHAR(x,y)      (iseuc_kr_head(x) && iseuc_kr_tail(y))
+#define WELL_FORMED_LEN
+#include "ctype-mb.ic"
+
+
 static uint ismbchar_euc_kr(CHARSET_INFO *cs __attribute__((unused)),
                             const char* p, const char *e)
 {
@@ -9929,41 +9935,6 @@ my_mb_wc_euc_kr(CHARSET_INFO *cs __attribute__((unused)),
 }
 
 
-/*
-  Returns well formed length of a EUC-KR string.
-*/
-static size_t
-my_well_formed_len_euckr(CHARSET_INFO *cs __attribute__((unused)),
-                         const char *b, const char *e,
-                         size_t pos, int *error)
-{
-  const char *b0= b;
-  const char *emb= e - 1; /* Last possible end of an MB character */
-
-  *error= 0;
-  while (pos-- && b < e)
-  {
-    if ((uchar) b[0] < 128)
-    {
-      /* Single byte ascii character */
-      b++;
-    }
-    else  if (b < emb && iseuc_kr_head(*b) && iseuc_kr_tail(b[1]))
-    {
-      /* Double byte character */
-      b+= 2;
-    }
-    else
-    {
-      /* Wrong byte sequence */
-      *error= 1;
-      break;
-    }
-  }
-  return (size_t) (b - b0);
-}
-
-
 static MY_COLLATION_HANDLER my_collation_ci_handler =
 {
   NULL,			/* init */
diff --git a/strings/ctype-eucjpms.c b/strings/ctype-eucjpms.c
index 8c47b666cf4..52873c2f87e 100644
--- a/strings/ctype-eucjpms.c
+++ b/strings/ctype-eucjpms.c
@@ -180,10 +180,26 @@ static const uchar sort_order_eucjpms[]=
 };
 
 
-#define iseucjpms(c)     ((0xa1<=((c)&0xff) && ((c)&0xff)<=0xfe))
-#define iskata(c)     ((0xa1<=((c)&0xff) && ((c)&0xff)<=0xdf))
-#define iseucjpms_ss2(c) (((c)&0xff) == 0x8e)
-#define iseucjpms_ss3(c) (((c)&0xff) == 0x8f)
+/*
+  EUCJPMS encoding subcomponents:
+  [x00-x7F]                     # ASCII/JIS-Roman (one-byte/character)
+  [x8E][xA1-xDF]                # half-width katakana (two bytes/char)
+  [x8F][xA1-xFE][xA1-xFE]       # JIS X 0212-1990 (three bytes/char)
+  [xA1-xFE][xA1-xFE]            # JIS X 0208:1997 (two bytes/char)
+*/
+#define iseucjpms(c)     (0xa1 <= (uchar) (c) && (uchar) (c) <= 0xfe)
+#define iskata(c)        (0xa1 <= (uchar) (c) && (uchar) (c) <= 0xdf)
+#define iseucjpms_ss2(c) ((uchar) (c) == 0x8e)
+#define iseucjpms_ss3(c) ((uchar) (c) == 0x8f)
+
+
+#define MY_FUNCTION_NAME(x)   my_ ## x ## _eucjpms
+#define IS_MB2_JIS(x,y)       (iseucjpms(x)     && iseucjpms(y))
+#define IS_MB2_KATA(x,y)      (iseucjpms_ss2(x) && iskata(y))
+#define IS_MB2_CHAR(x,y)      (IS_MB2_KATA(x,y) || IS_MB2_JIS(x,y))
+#define IS_MB3_CHAR(x,y,z)    (iseucjpms_ss3(x) && IS_MB2_JIS(y,z))
+#define WELL_FORMED_LEN
+#include "ctype-mb.ic"
 
 
 static uint ismbchar_eucjpms(CHARSET_INFO *cs __attribute__((unused)),
@@ -67416,61 +67432,6 @@ my_wc_mb_eucjpms(CHARSET_INFO *cs __attribute__((unused)),
 }
 
 
-/*
-  EUCJPMS encoding subcomponents:
-  [x00-x7F]                     # ASCII/JIS-Roman (one-byte/character)
-  [x8E][xA1-xDF]                # half-width katakana (two bytes/char)
-  [x8F][xA1-xFE][xA1-xFE]       # JIS X 0212-1990 (three bytes/char)
-  [xA1-xFE][xA1-xFE]            # JIS X 0208:1997 (two bytes/char)
-*/
-
-static
-size_t my_well_formed_len_eucjpms(CHARSET_INFO *cs __attribute__((unused)),
-                                  const char *beg, const char *end, size_t pos,
-                                  int *error)
-{
-  const uchar *b= (uchar *) beg;
-  *error=0;
-
-  for ( ; pos && b < (uchar*) end; pos--, b++)
-  {
-    char *chbeg;
-    uint ch= *b;
-
-    if (ch <= 0x7F)                 /* one byte */
-      continue;
-
-    chbeg= (char *) b++;
-    if (b >= (uchar *) end)         /* need more bytes */
-      return (uint) (chbeg - beg);  /* unexpected EOL  */
-
-    if (iseucjpms_ss2(ch))          /* [x8E][xA1-xDF] */
-    {
-      if (iskata(*b))
-        continue;
-      *error=1;
-      return (uint) (chbeg - beg);  /* invalid sequence */
-    }
-
-    if (iseucjpms_ss3(ch))          /* [x8F][xA1-xFE][xA1-xFE] */
-    {
-      ch= *b++;
-      if (b >= (uchar*) end)
-      {
-        *error= 1;
-        return (uint)(chbeg - beg); /* unexpected EOL */
-      }
-    }
-
-    if (iseucjpms(ch) && iseucjpms(*b)) /* [xA1-xFE][xA1-xFE] */
-      continue;
-    *error=1;
-    return (size_t) (chbeg - beg);    /* invalid sequence */
-  }
-  return (size_t) (b - (uchar *) beg);
-}
-
-
 static
 size_t my_numcells_eucjpms(CHARSET_INFO *cs __attribute__((unused)),
                            const char *str, const char *str_end)
diff --git a/strings/ctype-gb2312.c b/strings/ctype-gb2312.c
index b5aeed2088f..a4268b8fd68 100644
--- a/strings/ctype-gb2312.c
+++ b/strings/ctype-gb2312.c
@@ -165,6 +165,12 @@ static const uchar sort_order_gb2312[]=
 #define isgb2312tail(c) (0xa1<=(uchar)(c) && (uchar)(c)<=0xfe)
 
 
+#define MY_FUNCTION_NAME(x)   my_ ## x ## _gb2312
+#define IS_MB2_CHAR(x,y)      (isgb2312head(x) && isgb2312tail(y))
+#define WELL_FORMED_LEN
+#include "ctype-mb.ic"
+
+
 static uint ismbchar_gb2312(CHARSET_INFO *cs __attribute__((unused)),
 		    const char* p, const char *e)
 {
@@ -6332,41 +6338,6 @@ my_mb_wc_gb2312(CHARSET_INFO *cs  __attribute__((unused)),
 }
 
 
-/*
-  Returns well formed length of a EUC-KR string.
-*/
-static size_t
-my_well_formed_len_gb2312(CHARSET_INFO *cs __attribute__((unused)),
-                          const char *b, const char *e,
-                          size_t pos, int *error)
-{
-  const char *b0= b;
-  const char *emb= e - 1; /* Last possible end of an MB character */
-
-  *error= 0;
-  while (pos-- && b < e)
-  {
-    if ((uchar) b[0] < 128)
-    {
-      /* Single byte ascii character */
-      b++;
-    }
-    else  if (b < emb && isgb2312head(*b) && isgb2312tail(b[1]))
-    {
-      /* Double byte character */
-      b+= 2;
-    }
-    else
-    {
-      /* Wrong byte sequence */
-      *error= 1;
-      break;
-    }
-  }
-  return (size_t) (b - b0);
-}
-
-
 static MY_COLLATION_HANDLER my_collation_ci_handler =
 {
   NULL,			/* init */
diff --git a/strings/ctype-gbk.c b/strings/ctype-gbk.c
index d282d96145d..392fdb487b6 100644
--- a/strings/ctype-gbk.c
+++ b/strings/ctype-gbk.c
@@ -43,6 +43,12 @@
 #define gbkhead(e)     ((uchar)(e>>8))
 #define gbktail(e)     ((uchar)(e&0xff))
 
+#define MY_FUNCTION_NAME(x)   my_ ## x ## _gbk
+#define IS_MB2_CHAR(x,y)      (isgbkhead(x) && isgbktail(y))
+#define WELL_FORMED_LEN
+#include "ctype-mb.ic"
+
+
 static const uchar ctype_gbk[257] =
 {
   0,				/* For standard library */
@@ -10726,43 +10732,6 @@ my_mb_wc_gbk(CHARSET_INFO *cs __attribute__((unused)),
 }
 
 
-/*
-  Returns well formed length of a GBK string.
-*/
-static
-size_t my_well_formed_len_gbk(CHARSET_INFO *cs __attribute__((unused)),
-                              const char *b, const char *e,
-                              size_t pos, int *error)
-{
-  const char *b0= b;
-  const char *emb= e - 1; /* Last possible end of an MB character */
-
-  *error= 0;
-  while (pos-- && b < e)
-  {
-    if ((uchar) b[0] < 128)
-    {
-      /* Single byte ascii character */
-      b++;
-    }
-    else  if ((b < emb) && isgbkcode((uchar)*b, (uchar)b[1]))
-    {
-      /* Double byte character */
-      b+= 2;
-    }
-    else
-    {
-      /* Wrong byte sequence */
-      *error= 1;
-      break;
-    }
-  }
-  return (size_t) (b - b0);
-}
-
-
-                             
-
 static MY_COLLATION_HANDLER my_collation_ci_handler =
 {
   NULL,			/* init */
diff --git a/strings/ctype-mb.ic b/strings/ctype-mb.ic
new file mode 100644
index 00000000000..70cc89c9af0
--- /dev/null
+++ b/strings/ctype-mb.ic
@@ -0,0 +1,94 @@
+/*
+   Copyright (c) 2015, MariaDB Foundation
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; version 2 of the License.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+*/
+
+
+#ifndef MY_FUNCTION_NAME
+#error MY_FUNCTION_NAME is not defined
+#endif
+
+#if defined(IS_MB3_CHAR) && !defined(IS_MB2_CHAR)
+#error IS_MB3_CHAR is defined, while IS_MB2_CHAR is not!
+#endif
+
+#if defined(IS_MB4_CHAR) && !defined(IS_MB3_CHAR)
+#error IS_MB4_CHAR is defined, while IS_MB3_CHAR is not!
+#endif
+
+
+#ifdef WELL_FORMED_LEN
+/**
+  Returns well formed length of a character string with
+  variable character length for character sets with:
+  - mbminlen == 1
+  - mbmaxlen == 2, 3, or 4
+*/
+static size_t
+MY_FUNCTION_NAME(well_formed_len)(CHARSET_INFO *cs __attribute__((unused)),
+                                  const char *b, const char *e,
+                                  size_t nchars, int *error)
+{
+  const char *b0= b;
+
+  DBUG_ASSERT(cs->mbminlen == 1);
+  DBUG_ASSERT(cs->mbmaxlen <= 4);
+
+  for (*error= 0 ; b < e && nchars-- ; )
+  {
+    if ((uchar) b[0] < 128)
+    {
+      b++; /* Single byte ASCII character */
+      continue;
+    }
+
+    if (b + 2 <= e && IS_MB2_CHAR(b[0], b[1]))
+    {
+      b+= 2; /* Double byte character */
+      continue;
+    }
+
+#ifdef IS_MB3_CHAR
+    if (b + 3 <= e && IS_MB3_CHAR(b[0], b[1], b[2]))
+    {
+      b+= 3; /* Three-byte character */
+      continue;
+    }
+#endif
+
+#ifdef IS_MB4_CHAR
+    if (b + 4 <= e && IS_MB4_CHAR(b[0], b[1], b[2], b[3]))
+    {
+      b+= 4; /* Four-byte character */
+      continue;
+    }
+#endif
+
+#ifdef IS_8BIT_CHAR
+    if (IS_8BIT_CHAR(b[0]))
+    {      
+      b++; /* Single byte non-ASCII character, e.g. half width kana in sjis */
+      continue;
+    }
+#endif
+
+    /* Wrong byte sequence */
+    *error= 1;
+    break;
+  }
+  return b - b0;
+}
+
+#endif /* WELL_FORMED_LEN */
diff --git a/strings/ctype-sjis.c b/strings/ctype-sjis.c
index 2038632c9d3..432e2e5e823 100644
--- a/strings/ctype-sjis.c
+++ b/strings/ctype-sjis.c
@@ -176,10 +176,19 @@ static const uchar sort_order_sjis[]=
   (uchar) '\370',(uchar) '\371',(uchar) '\372',(uchar) '\373',(uchar) '\374',(uchar) '\375',(uchar) '\376',(uchar) '\377'
 };
 
-#define issjishead(c) ((0x81<=(c) && (c)<=0x9f) || \
-                       ((0xe0<=(c)) && (c)<=0xfc))
-#define issjistail(c) ((0x40<=(c) && (c)<=0x7e) || \
-                       (0x80<=(c) && (c)<=0xfc))
+#define issjishead(c) ((0x81 <= (uchar) (c) && (uchar) (c) <= 0x9f) || \
+                       (0xe0 <= (uchar) (c) && (uchar) (c) <= 0xfc))
+#define issjistail(c) ((0x40 <= (uchar) (c) && (uchar) (c) <= 0x7e) || \
+                       (0x80 <= (uchar) (c) && (uchar) (c) <= 0xfc))
+
+#define issjiskata(c) ((0xA1 <= (uchar) (c) && (uchar) (c) <= 0xDF))
+
+
+#define MY_FUNCTION_NAME(x)   my_ ## x ## _sjis
+#define IS_8BIT_CHAR(x)       issjiskata(x)
+#define IS_MB2_CHAR(x,y)      (issjishead(x) && issjistail(y))
+#define WELL_FORMED_LEN
+#include "ctype-mb.ic"
 
 
 static uint ismbchar_sjis(CHARSET_INFO *cs __attribute__((unused)),
@@ -34089,44 +34098,6 @@ size_t my_numcells_sjis(CHARSET_INFO *cs __attribute__((unused)),
   return clen;
 }
 
-/*
-  Returns a well formed length of a SJIS string.
-  CP932 additional characters are also accepted.
-*/
-static
-size_t my_well_formed_len_sjis(CHARSET_INFO *cs __attribute__((unused)),
-                               const char *b, const char *e,
-                               size_t pos, int *error)
-{
-  const char *b0= b;
-  *error= 0;
-  while (pos-- && b < e)
-  {
-    if ((uchar) b[0] < 128)
-    {
-      /* Single byte ascii character */
-      b++;
-    }
-    else  if (issjishead((uchar)*b) && (e-b)>1 && issjistail((uchar)b[1]))
-    {
-      /* Double byte character */
-      b+= 2;
-    }
-    else if (((uchar)*b) >= 0xA1 && ((uchar)*b) <= 0xDF)
-    {
-      /* Half width kana */
-      b++;
-    }
-    else
-    {
-      /* Wrong byte sequence */
-      *error= 1;
-      break;
-    }
-  }
-  return (size_t) (b - b0);
-}
-
 
 static MY_COLLATION_HANDLER my_collation_ci_handler =
 {
diff --git a/strings/ctype-ujis.c b/strings/ctype-ujis.c
index f208d15f364..99f5be3fa38 100644
--- a/strings/ctype-ujis.c
+++ b/strings/ctype-ujis.c
@@ -179,10 +179,26 @@ static const uchar sort_order_ujis[]=
 };
 
 
-#define isujis(c)     ((0xa1<=((c)&0xff) && ((c)&0xff)<=0xfe))
-#define iskata(c)     ((0xa1<=((c)&0xff) && ((c)&0xff)<=0xdf))
-#define isujis_ss2(c) (((c)&0xff) == 0x8e)
-#define isujis_ss3(c) (((c)&0xff) == 0x8f)
+/*
+  EUC-JP encoding subcomponents:
+  [x00-x7F]                        # ASCII/JIS-Roman (one-byte/character)  
+  [x8E][xA1-xDF]                   # half-width katakana (two bytes/char)  
+  [x8F][xA1-xFE][xA1-xFE]          # JIS X 0212-1990 (three bytes/char)  
+  [xA1-xFE][xA1-xFE]               # JIS X 0208:1997 (two bytes/char)
+*/
+
+#define isujis(c)             (0xa1 <= (uchar) (c) && (uchar) (c) <= 0xfe)
+#define iskata(c)             (0xa1 <= (uchar) (c) && (uchar) (c) <= 0xdf)
+#define isujis_ss2(c)         ((uchar) (c) == 0x8e)
+#define isujis_ss3(c)         ((uchar) (c) == 0x8f)
+
+#define MY_FUNCTION_NAME(x)   my_ ## x ## _ujis
+#define IS_MB2_JIS(x,y)       (isujis(x)        && isujis(y))
+#define IS_MB2_KATA(x,y)      (isujis_ss2(x)    && iskata(y))
+#define IS_MB2_CHAR(x, y)     (IS_MB2_KATA(x,y) || IS_MB2_JIS(x,y))
+#define IS_MB3_CHAR(x, y, z)  (isujis_ss3(x)    && IS_MB2_JIS(y,z))
+#define WELL_FORMED_LEN
+#include "ctype-mb.ic"
 
 
 static uint ismbchar_ujis(CHARSET_INFO *cs __attribute__((unused)),
@@ -201,63 +217,6 @@ static uint mbcharlen_ujis(CHARSET_INFO *cs __attribute__((unused)),uint c)
 }
 
 
-/*
-  EUC-JP encoding subcomponents:
-  [x00-x7F]                        # ASCII/JIS-Roman (one-byte/character)  
-  [x8E][xA1-xDF]                   # half-width katakana (two bytes/char)  
-  [x8F][xA1-xFE][xA1-xFE]          # JIS X 0212-1990 (three bytes/char)  
-  [xA1-xFE][xA1-xFE]               # JIS X 0208:1997 (two bytes/char)
-*/
-
-static
-size_t my_well_formed_len_ujis(CHARSET_INFO *cs __attribute__((unused)),
-                               const char *beg, const char *end,
-                               size_t pos, int *error)
-{
-  const uchar *b= (uchar *) beg;
-  
-  for ( *error= 0 ; pos && b < (uchar*) end; pos--, b++)
-  {
-    char *chbeg;
-    uint ch= *b;
-    
-    if (ch <= 0x7F)                 /* one byte */
-      continue;
-    
-    chbeg= (char *) b++;
-    if (b >= (uchar *) end)         /* need more bytes */
-    {
-      *error= 1;
-      return (size_t) (chbeg - beg);            /* unexpected EOL  */ 
-    }
-    
-    if (isujis_ss2(ch))            /* [x8E][xA1-xDF] */
-    {
-      if (iskata(*b))
-        continue;
-      *error= 1;
-      return (size_t) (chbeg - beg);  /* invalid sequence */
-    }
-    
-    if (isujis_ss3(ch))           /* [x8F][xA1-xFE][xA1-xFE] */
-    {
-      ch= *b++;
-      if (b >= (uchar*) end)
-      {
-        *error= 1;
-        return (size_t) (chbeg - beg); /* unexpected EOL */
-      }
-    }
-    
-    if (isujis(ch) && isujis(*b)) /* [xA1-xFE][xA1-xFE] */
-      continue;
-    *error= 1;
-    return (size_t) (chbeg - beg);    /* invalid sequence */
-  }
-  return (size_t) (b - (uchar *) beg);
-}
-
-
 static
 size_t my_numcells_eucjp(CHARSET_INFO *cs __attribute__((unused)),
                        const char *str, const char *str_end)
author	Alexander Barkov <bar@mariadb.org>	2015-03-04 09:16:43 +0400
committer	Alexander Barkov <bar@mariadb.org>	2015-03-04 09:16:43 +0400
commit	a7ed8523e35ff2e82701cd1f483c8f665f322f3b (patch)
tree	436d9089a3028f07661d85fe28ed65bde23a4045
parent	d8c1165c28ae6ce2e29ecd5492c2540bfd6b2177 (diff)
download	mariadb-git-a7ed8523e35ff2e82701cd1f483c8f665f322f3b.tar.gz