Bug#15375 Unassigned multibyte codes are broken

into parts when converting to Unicode. m_ctype.h: Reorganizing mb_wc return codes to be able to return "an unassigned N-byte-long character". sql_string.cc: Adding code to detect and properly handle unassigned characters (i.e. the those character which are correctly formed according to the character specifications, but don't have Unicode mapping). Many files: Fixing conversion function to return new codes. ctype_ujis.test, ctype_gbk.test, ctype_big5.test: Adding a test case. ctype_ujis.result, ctype_gbk.result, ctype_big5.result: Fixing results accordingly. include/m_ctype.h: Reorganizing mb_wc return codes to be able to return "an unassigned N-byte long character". Bug#15375 Unassigned multibyte codes are broken into parts when converting to Unicode. mysql-test/r/ctype_big5.result: Fixing results accordingly. mysql-test/r/ctype_gbk.result: Fixing results accordingly. mysql-test/r/ctype_ujis.result: Fixing results accordingly. mysql-test/t/ctype_big5.test: Adding a test case. mysql-test/t/ctype_gbk.test: Adding a test case. mysql-test/t/ctype_ujis.test: Adding a test case. sql/sql_string.cc: Adding code to detect and properly hanlde unassigned characters (i.e. the those character which are correctly formed according to the character specifications, but don't have Unicode mapping). strings/ctype-big5.c: Fixing conversion function to return new codes. strings/ctype-bin.c: Fixing conversion function to return new codes. strings/ctype-cp932.c: Fixing conversion function to return new codes. strings/ctype-euc_kr.c: Fixing conversion function to return new codes. strings/ctype-gb2312.c: Fixing conversion function to return new codes. strings/ctype-gbk.c: Fixing conversion function to return new codes. strings/ctype-latin1.c: Fixing conversion function to return new codes. strings/ctype-simple.c: Fixing conversion function to return new codes. strings/ctype-sjis.c: Fixing conversion function to return new codes. strings/ctype-tis620.c: Fixing conversion function to return new codes. strings/ctype-ucs2.c: Fixing conversion function to return new codes. strings/ctype-ujis.c: Fixing conversion function to return new codes. strings/ctype-utf8.c: Fixing conversion function to return new codes.
author: unknown <bar@mysql.com> 2005-12-12 21:42:09 +0400
committer: unknown <bar@mysql.com> 2005-12-12 21:42:09 +0400
commit: a464e01713d19d358eb7feab2cfdf88bb274d5df (patch)
tree: 6c15cf077b53c2157ad83b68ecbe604863e5aabf
parent: 5aeb69296a4e134f0215da3e6bcce4956b7d76ad (diff)
download: mariadb-git-a464e01713d19d358eb7feab2cfdf88bb274d5df.tar.gz
21 files changed, 122 insertions, 56 deletions
diff --git a/include/m_ctype.h b/include/m_ctype.h
index a5dc7cc00c6..a6ffbaa713f 100644
--- a/include/m_ctype.h
+++ b/include/m_ctype.h
@@ -44,10 +44,19 @@ typedef struct unicase_info_st
   uint16 sort;
 } MY_UNICASE_INFO;
 
-#define MY_CS_ILSEQ	0
-#define MY_CS_ILUNI	0
-#define MY_CS_TOOSMALL	-1
-#define MY_CS_TOOFEW(n)	(-1-(n))
+
+/* wm_wc and wc_mb return codes */
+#define MY_CS_ILSEQ	0     /* Wrong by sequence: wb_wc                   */
+#define MY_CS_ILUNI	0     /* Cannot encode Unicode to charset: wc_mb    */
+#define MY_CS_TOOSMALL  -101  /* Need at least one byte:    wc_mb and mb_wc */
+#define MY_CS_TOOSMALL2 -102  /* Need at least two bytes:   wc_mb and mb_wc */
+#define MY_CS_TOOSMALL3 -103  /* Need at least three bytes: wc_mb and mb_wc */
+/* These following three are currently not really used */
+#define MY_CS_TOOSMALL4 -104  /* Need at least 4 bytes: wc_mb and mb_wc */
+#define MY_CS_TOOSMALL5 -105  /* Need at least 5 bytes: wc_mb and mb_wc */
+#define MY_CS_TOOSMALL6 -106  /* Need at least 6 bytes: wc_mb and mb_wc */
+/* A helper macros for "need at least n bytes" */
+#define MY_CS_TOOSMALLN(n)    (-100-(n))
 
 #define MY_SEQ_INTTAIL	1
 #define MY_SEQ_SPACES	2
diff --git a/mysql-test/r/ctype_big5.result b/mysql-test/r/ctype_big5.result
index 4c5832a57e9..6574908101c 100644
--- a/mysql-test/r/ctype_big5.result
+++ b/mysql-test/r/ctype_big5.result
@@ -189,3 +189,6 @@ select hex(a) from t1 where a = _big5 0xF9DC;
 hex(a)
 E5ABBA
 drop table t1;
+select hex(convert(_big5 0xC84041 using ucs2));
+hex(convert(_big5 0xC84041 using ucs2))
+003F0041
diff --git a/mysql-test/r/ctype_gbk.result b/mysql-test/r/ctype_gbk.result
index aaffe692126..241539ecf42 100644
--- a/mysql-test/r/ctype_gbk.result
+++ b/mysql-test/r/ctype_gbk.result
@@ -165,3 +165,6 @@ hex(a)
 A1A1
 A3A0
 DROP TABLE t1;
+select hex(convert(_gbk 0xA14041 using ucs2));
+hex(convert(_gbk 0xA14041 using ucs2))
+003F0041
diff --git a/mysql-test/r/ctype_ujis.result b/mysql-test/r/ctype_ujis.result
index c86cbe9ce53..77258ba1730 100644
--- a/mysql-test/r/ctype_ujis.result
+++ b/mysql-test/r/ctype_ujis.result
@@ -2307,3 +2307,9 @@ select c1 as c2h from t1 where c1 like 'ab#_def' escape '#';
 c2h
 ab_def
 drop table t1;
+select hex(convert(_ujis 0xA5FE41 using ucs2));
+hex(convert(_ujis 0xA5FE41 using ucs2))
+003F0041
+select hex(convert(_ujis 0x8FABF841 using ucs2));
+hex(convert(_ujis 0x8FABF841 using ucs2))
+003F0041
diff --git a/mysql-test/t/ctype_big5.test b/mysql-test/t/ctype_big5.test
index ffe2a12234e..200002cd235 100644
--- a/mysql-test/t/ctype_big5.test
+++ b/mysql-test/t/ctype_big5.test
@@ -53,4 +53,14 @@ alter table t1 convert to character set utf8;
 select hex(a) from t1 where a = _big5 0xF9DC;
 drop table t1;
 
+#
+# Bugs#15375: Unassigned multibyte codes are broken
+# into parts when converting to Unicode.
+# This query should return 0x003F0041. I.e. it should
+# scan unassigned double-byte character 0xC840, convert
+# it as QUESTION MARK 0x003F and then scan the next
+# character, which is a single byte character 0x41.
+#
+select hex(convert(_big5 0xC84041 using ucs2));
+
 # End of 4.1 tests
diff --git a/mysql-test/t/ctype_gbk.test b/mysql-test/t/ctype_gbk.test
index 5eeade96186..7aec48586d8 100644
--- a/mysql-test/t/ctype_gbk.test
+++ b/mysql-test/t/ctype_gbk.test
@@ -31,4 +31,14 @@ INSERT INTO t1 VALUES (0xA3A0),(0xA1A1);
 SELECT hex(a) FROM t1 ORDER BY a;
 DROP TABLE t1;
 
+#
+# Bugs#15375: Unassigned multibyte codes are broken
+# into parts when converting to Unicode.
+# This query should return 0x003F0041. I.e. it should
+# scan unassigned double-byte character 0xA140, convert
+# it as QUESTION MARK 0x003F and then scan the next
+# character, which is a single byte character 0x41.
+#
+select hex(convert(_gbk 0xA14041 using ucs2));
+
 # End of 4.1 tests
diff --git a/mysql-test/t/ctype_ujis.test b/mysql-test/t/ctype_ujis.test
index bf74371510b..12d05f44a94 100644
--- a/mysql-test/t/ctype_ujis.test
+++ b/mysql-test/t/ctype_ujis.test
@@ -1152,4 +1152,19 @@ SET collation_connection='ujis_bin';
 -- source include/ctype_innodb_like.inc
 -- source include/ctype_like_escape.inc
 
+#
+# Bugs#15375: Unassigned multibyte codes are broken
+# into parts when converting to Unicode.
+# This query should return 0x003F0041. I.e. it should
+# scan unassigned double-byte character 0xA5FE, convert
+# it as QUESTION MARK 0x003F and then scan the next
+# character, which is a single byte character 0x41.
+#
+select hex(convert(_ujis 0xA5FE41 using ucs2));
+# This one should return 0x003F0041:
+# scan unassigned three-byte character 0x8FABF8,
+# convert it as QUESTION MARK 0x003F and then scan
+# the next character, which is a single byte character 0x41.
+select hex(convert(_ujis 0x8FABF841 using ucs2));
+
 # End of 4.1 tests
diff --git a/sql/sql_string.cc b/sql/sql_string.cc
index ab2db4aaf53..939ffe8d9d2 100644
--- a/sql/sql_string.cc
+++ b/sql/sql_string.cc
@@ -806,8 +806,18 @@ copy_and_convert(char *to, uint32 to_length, CHARSET_INFO *to_cs,
       from++;
       wc= '?';
     }
+    else if (cnvres > MY_CS_TOOSMALL)
+    {
+      /*
+        A correct multibyte sequence detected
+        But it doesn't have Unicode mapping.
+      */
+      error_count++;
+      from+= (-cnvres);
+      wc= '?';
+    }
     else
-      break;					// Impossible char.
+      break;  // Not enough characters
 
 outp:
     if ((cnvres= (*wc_mb)(to_cs, wc, (uchar*) to, to_end)) > 0)
diff --git a/strings/ctype-big5.c b/strings/ctype-big5.c
index e15554fa576..89a40b15288 100644
--- a/strings/ctype-big5.c
+++ b/strings/ctype-big5.c
@@ -6259,7 +6259,7 @@ my_mb_wc_big5(CHARSET_INFO *cs __attribute__((unused)),
   int hi=s[0];
   
   if (s >= e)
-    return MY_CS_TOOFEW(0);
+    return MY_CS_TOOSMALL;
   
   if (hi<0x80)
   {
@@ -6268,10 +6268,10 @@ my_mb_wc_big5(CHARSET_INFO *cs __attribute__((unused)),
   }
   
   if (s+2>e)
-    return MY_CS_TOOFEW(0);
+    return MY_CS_TOOSMALL2;
 
   if (!(pwc[0]=func_big5_uni_onechar((hi<<8)+s[1])))
-    return MY_CS_ILSEQ;
+    return -2;
   
   return 2;
 }
diff --git a/strings/ctype-bin.c b/strings/ctype-bin.c
index 1ac79ac9ca0..7d0932e77bc 100644
--- a/strings/ctype-bin.c
+++ b/strings/ctype-bin.c
@@ -220,7 +220,7 @@ static int my_mb_wc_bin(CHARSET_INFO *cs __attribute__((unused)),
 			const unsigned char *end __attribute__((unused)))
 {
   if (str >= end)
-    return MY_CS_TOOFEW(0);
+    return MY_CS_TOOSMALL;
   
   *wc=str[0];
   return 1;
diff --git a/strings/ctype-cp932.c b/strings/ctype-cp932.c
index 63f95a28037..e8c62b0315e 100644
--- a/strings/ctype-cp932.c
+++ b/strings/ctype-cp932.c
@@ -5355,7 +5355,7 @@ my_mb_wc_cp932(CHARSET_INFO *cs  __attribute__((unused)),
   int hi=s[0];
   
   if (s >= e)
-    return MY_CS_TOOFEW(0);
+    return MY_CS_TOOSMALL;
   
   if (hi < 0x80)
   {
@@ -5370,10 +5370,10 @@ my_mb_wc_cp932(CHARSET_INFO *cs  __attribute__((unused)),
   }
   
   if (s+2>e)
-    return MY_CS_TOOFEW(0);
+    return MY_CS_TOOSMALL2;
   
   if (!(pwc[0]=func_cp932_uni_onechar((hi<<8)+s[1])))
-    return MY_CS_ILSEQ;
+    return -2;
   
   return 2;
 }
diff --git a/strings/ctype-euc_kr.c b/strings/ctype-euc_kr.c
index f15e97de5be..8ebcb34c0ab 100644
--- a/strings/ctype-euc_kr.c
+++ b/strings/ctype-euc_kr.c
@@ -8601,7 +8601,7 @@ my_wc_mb_euc_kr(CHARSET_INFO *cs __attribute__((unused)),
     return MY_CS_ILUNI;
   
   if (s+2>e)
-    return MY_CS_TOOSMALL;
+    return MY_CS_TOOSMALL2;
   
   s[0]=code>>8;
   s[1]=code&0xFF;
@@ -8617,7 +8617,7 @@ my_mb_wc_euc_kr(CHARSET_INFO *cs __attribute__((unused)),
   int hi=s[0];
   
   if (s >= e)
-    return MY_CS_TOOFEW(0);
+    return MY_CS_TOOSMALL;
   
   if (hi<0x80)
   {
@@ -8626,10 +8626,10 @@ my_mb_wc_euc_kr(CHARSET_INFO *cs __attribute__((unused)),
   }
   
   if (s+2>e)
-    return MY_CS_TOOFEW(0);
+    return MY_CS_TOOSMALL2;
   
   if (!(pwc[0]=func_ksc5601_uni_onechar((hi<<8)+s[1])))
-    return MY_CS_ILSEQ;
+    return -2;
   
   return 2;
 }
diff --git a/strings/ctype-gb2312.c b/strings/ctype-gb2312.c
index 0cbad2d1c55..4009b032dd8 100644
--- a/strings/ctype-gb2312.c
+++ b/strings/ctype-gb2312.c
@@ -5651,7 +5651,7 @@ my_wc_mb_gb2312(CHARSET_INFO *cs  __attribute__((unused)),
     return MY_CS_ILUNI;
   
   if (s+2>e)
-    return MY_CS_TOOSMALL;
+    return MY_CS_TOOSMALL2;
   
   code|=0x8080;
   s[0]=code>>8;
@@ -5668,7 +5668,7 @@ my_mb_wc_gb2312(CHARSET_INFO *cs  __attribute__((unused)),
   hi=(int) s[0];
   
   if (s >= e)
-    return MY_CS_TOOFEW(0);
+    return MY_CS_TOOSMALL;
   
   if (hi<0x80)
   {
@@ -5677,10 +5677,10 @@ my_mb_wc_gb2312(CHARSET_INFO *cs  __attribute__((unused)),
   }
   
   if (s+2>e)
-    return MY_CS_TOOFEW(0);
+    return MY_CS_TOOSMALL2;
   
   if (!(pwc[0]=func_gb2312_uni_onechar(((hi<<8)+s[1])&0x7F7F)))
-    return MY_CS_ILSEQ;
+    return -2;
   
   return 2;
 }
diff --git a/strings/ctype-gbk.c b/strings/ctype-gbk.c
index b5b86984794..a58c99fa1d6 100644
--- a/strings/ctype-gbk.c
+++ b/strings/ctype-gbk.c
@@ -9889,7 +9889,7 @@ my_wc_mb_gbk(CHARSET_INFO *cs  __attribute__((unused)),
     return MY_CS_ILUNI;
   
   if (s+2>e)
-    return MY_CS_TOOSMALL;
+    return MY_CS_TOOSMALL2;
     
   s[0]=code>>8;
   s[1]=code&0xFF;
@@ -9903,7 +9903,7 @@ my_mb_wc_gbk(CHARSET_INFO *cs __attribute__((unused)),
   int hi;
   
   if (s >= e)
-    return MY_CS_TOOFEW(0);
+    return MY_CS_TOOSMALL;
   
   hi=s[0];
   
@@ -9914,10 +9914,10 @@ my_mb_wc_gbk(CHARSET_INFO *cs __attribute__((unused)),
   }
   
   if (s+2>e)
-    return MY_CS_TOOFEW(0);
+    return MY_CS_TOOSMALL2;
     
   if (!(pwc[0]=func_gbk_uni_onechar( (hi<<8) + s[1])))
-    return MY_CS_ILSEQ;
+    return -2;
   
   return 2;
   
diff --git a/strings/ctype-latin1.c b/strings/ctype-latin1.c
index e5deba885e7..5d3fc75f337 100644
--- a/strings/ctype-latin1.c
+++ b/strings/ctype-latin1.c
@@ -363,10 +363,10 @@ int my_mb_wc_latin1(CHARSET_INFO *cs  __attribute__((unused)),
 		    const unsigned char *end __attribute__((unused)))
 {
   if (str >= end)
-    return MY_CS_TOOFEW(0);
+    return MY_CS_TOOSMALL;
   
   *wc=cs_to_uni[*str];
-  return (!wc[0] && str[0]) ? MY_CS_ILSEQ : 1;
+  return (!wc[0] && str[0]) ? -1 : 1;
 }
 
 static
diff --git a/strings/ctype-simple.c b/strings/ctype-simple.c
index 095b2f3a4ac..ccdfb5936b7 100644
--- a/strings/ctype-simple.c
+++ b/strings/ctype-simple.c
@@ -207,10 +207,10 @@ int my_mb_wc_8bit(CHARSET_INFO *cs,my_wc_t *wc,
 		  const unsigned char *end __attribute__((unused)))
 {
   if (str >= end)
-    return MY_CS_TOOFEW(0);
+    return MY_CS_TOOSMALL;
   
   *wc=cs->tab_to_uni[*str];
-  return (!wc[0] && str[0]) ? MY_CS_ILSEQ : 1;
+  return (!wc[0] && str[0]) ? -1 : 1;
 }
 
 int my_wc_mb_8bit(CHARSET_INFO *cs,my_wc_t wc,
diff --git a/strings/ctype-sjis.c b/strings/ctype-sjis.c
index da79f1796b8..38a9c9a6428 100644
--- a/strings/ctype-sjis.c
+++ b/strings/ctype-sjis.c
@@ -4501,7 +4501,7 @@ my_wc_mb_sjis(CHARSET_INFO *cs  __attribute__((unused)),
 
 mb:
   if (s+2>e)
-    return MY_CS_TOOSMALL;
+    return MY_CS_TOOSMALL2;
   
   s[0]=code>>8;
   s[1]=code&0xFF;
@@ -4515,7 +4515,7 @@ my_mb_wc_sjis(CHARSET_INFO *cs  __attribute__((unused)),
   int hi=s[0];
   
   if (s >= e)
-    return MY_CS_TOOFEW(0);
+    return MY_CS_TOOSMALL;
   
   if (hi < 0x80)
   {
@@ -4530,10 +4530,10 @@ my_mb_wc_sjis(CHARSET_INFO *cs  __attribute__((unused)),
   }
   
   if (s+2>e)
-    return MY_CS_TOOFEW(0);
+    return MY_CS_TOOSMALL2;
   
   if (!(pwc[0]=func_sjis_uni_onechar((hi<<8)+s[1])))
-    return MY_CS_ILSEQ;
+    return -2;
   
   return 2;
 }
diff --git a/strings/ctype-tis620.c b/strings/ctype-tis620.c
index 35cf10f1bcc..35c712b65e8 100644
--- a/strings/ctype-tis620.c
+++ b/strings/ctype-tis620.c
@@ -820,10 +820,10 @@ int my_mb_wc_tis620(CHARSET_INFO *cs  __attribute__((unused)),
 		  const unsigned char *end __attribute__((unused)))
 {
   if (str >= end)
-    return MY_CS_TOOFEW(0);
+    return MY_CS_TOOSMALL;
   
   *wc=cs_to_uni[*str];
-  return (!wc[0] && str[0]) ? MY_CS_ILSEQ : 1;
+  return (!wc[0] && str[0]) ? -1 : 1;
 }
 
 static
diff --git a/strings/ctype-ucs2.c b/strings/ctype-ucs2.c
index ad07fd9903c..2dd906d7953 100644
--- a/strings/ctype-ucs2.c
+++ b/strings/ctype-ucs2.c
@@ -95,7 +95,7 @@ static int my_ucs2_uni(CHARSET_INFO *cs __attribute__((unused)),
 		       my_wc_t * pwc, const uchar *s, const uchar *e)
 {
   if (s+2 > e) /* Need 2 characters */
-    return MY_CS_TOOFEW(0);
+    return MY_CS_TOOSMALL2;
   
   *pwc= ((unsigned char)s[0]) * 256  + ((unsigned char)s[1]);
   return 2;
@@ -105,7 +105,7 @@ static int my_uni_ucs2(CHARSET_INFO *cs __attribute__((unused)) ,
 		       my_wc_t wc, uchar *r, uchar *e)
 {
   if ( r+2 > e ) 
-    return MY_CS_TOOSMALL;
+    return MY_CS_TOOSMALL2;
   
   r[0]= (uchar) (wc >> 8);
   r[1]= (uchar) (wc & 0xFF);
diff --git a/strings/ctype-ujis.c b/strings/ctype-ujis.c
index b3bba85968e..e7a5cc97867 100644
--- a/strings/ctype-ujis.c
+++ b/strings/ctype-ujis.c
@@ -242,7 +242,7 @@ my_mb_wc_jisx0201(CHARSET_INFO *cs  __attribute__((unused)),
 		  const uchar *e __attribute__((unused)))
 {
   wc[0]=tab_jisx0201_uni[*s];
-  return (!wc[0] && s[0]) ? MY_CS_ILSEQ : 1;
+  return (!wc[0] && s[0]) ? -1 : 1;
 }
 
 
@@ -8341,7 +8341,7 @@ my_mb_wc_euc_jp(CHARSET_INFO *cs,my_wc_t *pwc, const uchar *s, const uchar *e)
   int c1,c2,c3;
   
   if (s >= e)
-    return MY_CS_TOOFEW(0);
+    return MY_CS_TOOSMALL;
   
   c1=s[0];
   
@@ -8353,7 +8353,7 @@ my_mb_wc_euc_jp(CHARSET_INFO *cs,my_wc_t *pwc, const uchar *s, const uchar *e)
   }
   
   if (s+2>e)
-    return MY_CS_TOOFEW(0);
+    return MY_CS_TOOSMALL2;
     
   c2=s[1];
   
@@ -8368,7 +8368,7 @@ my_mb_wc_euc_jp(CHARSET_INFO *cs,my_wc_t *pwc, const uchar *s, const uchar *e)
     {
       pwc[0]=my_jisx0208_uni_onechar( ((c1-0x80) << 8) + (c2-0x80));
       if (!pwc[0])
-        return MY_CS_ILSEQ;
+        return -2;
     }
     else
     {
@@ -8388,7 +8388,7 @@ my_mb_wc_euc_jp(CHARSET_INFO *cs,my_wc_t *pwc, const uchar *s, const uchar *e)
     
     ret = my_mb_wc_jisx0201(cs,pwc,s+1,e);
     if (ret!=1)
-      return ret;
+      return -2;
     return 2;
   }
   
@@ -8399,7 +8399,7 @@ my_mb_wc_euc_jp(CHARSET_INFO *cs,my_wc_t *pwc, const uchar *s, const uchar *e)
       return MY_CS_ILSEQ;
     
     if (s+3>e)
-      return MY_CS_TOOFEW(0);
+      return MY_CS_TOOSMALL3;
     
     c3=s[2];
     if (c3 < 0xA1 || c3>=0xFF)
@@ -8408,8 +8408,8 @@ my_mb_wc_euc_jp(CHARSET_INFO *cs,my_wc_t *pwc, const uchar *s, const uchar *e)
     if (c2<0xF5)
     {
       pwc[0]=my_jisx0212_uni_onechar((c2-0x80)*256 + (c3-0x80));
-      if (!pwc)
-        return MY_CS_ILSEQ;
+      if (!pwc[0])
+        return -3;
     }
     else
     {
@@ -8440,7 +8440,7 @@ my_wc_mb_euc_jp(CHARSET_INFO *c,my_wc_t wc, unsigned char *s, unsigned char *e)
   if ((jp=my_uni_jisx0208_onechar(wc)))
   {
     if (s+2>e)
-      return MY_CS_TOOSMALL;
+      return MY_CS_TOOSMALL2;
       
     jp+=0x8080;
     s[0]=jp>>8;
@@ -8452,7 +8452,7 @@ my_wc_mb_euc_jp(CHARSET_INFO *c,my_wc_t wc, unsigned char *s, unsigned char *e)
   if (my_wc_mb_jisx0201(c,wc,s,e) == 1)
   {
     if (s+2>e)
-      return MY_CS_TOOSMALL;
+      return MY_CS_TOOSMALL2;
     s[1]= s[0];
     s[0]= 0x8E;
     return 2;
@@ -8462,7 +8462,7 @@ my_wc_mb_euc_jp(CHARSET_INFO *c,my_wc_t wc, unsigned char *s, unsigned char *e)
   if ((jp=my_uni_jisx0212_onechar(wc)))
   {
     if (s+3>e)
-      return MY_CS_TOOSMALL;
+      return MY_CS_TOOSMALL3;
       
     jp+=0x8080;
     s[0]=0x8F;
@@ -8476,7 +8476,7 @@ my_wc_mb_euc_jp(CHARSET_INFO *c,my_wc_t wc, unsigned char *s, unsigned char *e)
   if (wc>=0xE000 && wc<0xE3AC)
   { 
     if (s+2>e)
-      return MY_CS_TOOSMALL;
+      return MY_CS_TOOSMALL2;
       
     c1=((unsigned)(wc-0xE000)/94)+0xF5;
     s[0]=c1;
@@ -8490,7 +8490,7 @@ my_wc_mb_euc_jp(CHARSET_INFO *c,my_wc_t wc, unsigned char *s, unsigned char *e)
   if (wc>=0xE3AC && wc<0xE758)
   {
     if (s+3>e)
-      return MY_CS_TOOSMALL;
+      return MY_CS_TOOSMALL3;
       
     s[0]=0x8F;
     c1=((unsigned)(wc-0xE3AC)/94)+0xF5;
diff --git a/strings/ctype-utf8.c b/strings/ctype-utf8.c
index b3e78ce27e9..94e8e6ba797 100644
--- a/strings/ctype-utf8.c
+++ b/strings/ctype-utf8.c
@@ -1765,7 +1765,7 @@ static int my_utf8_uni(CHARSET_INFO *cs __attribute__((unused)),
   unsigned char c;
 
   if (s >= e)
-    return MY_CS_TOOFEW(0);
+    return MY_CS_TOOSMALL;
 
   c= s[0];
   if (c < 0x80)
@@ -1778,7 +1778,7 @@ static int my_utf8_uni(CHARSET_INFO *cs __attribute__((unused)),
   else if (c < 0xe0)
   {
     if (s+2 > e) /* We need 2 characters */
-      return MY_CS_TOOFEW(0);
+      return MY_CS_TOOSMALL2;
 
     if (!((s[1] ^ 0x80) < 0x40))
       return MY_CS_ILSEQ;
@@ -1789,7 +1789,7 @@ static int my_utf8_uni(CHARSET_INFO *cs __attribute__((unused)),
   else if (c < 0xf0)
   {
     if (s+3 > e) /* We need 3 characters */
-      return MY_CS_TOOFEW(0);
+      return MY_CS_TOOSMALL3;
 
     if (!((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40 && (c >= 0xe1 || s[1] >= 0xa0)))
       return MY_CS_ILSEQ;
@@ -1804,7 +1804,7 @@ static int my_utf8_uni(CHARSET_INFO *cs __attribute__((unused)),
   else if (c < 0xf8 && sizeof(my_wc_t)*8 >= 32)
   {
     if (s+4 > e) /* We need 4 characters */
-      return MY_CS_TOOFEW(0);
+      return MY_CS_TOOSMALL4;
 
     if (!((s[1] ^ 0x80) < 0x40 &&
           (s[2] ^ 0x80) < 0x40 &&
@@ -1822,7 +1822,7 @@ static int my_utf8_uni(CHARSET_INFO *cs __attribute__((unused)),
    else if (c < 0xfc && sizeof(my_wc_t)*8 >= 32)
   {
     if (s+5 >e) /* We need 5 characters */
-      return MY_CS_TOOFEW(0);
+      return MY_CS_TOOSMALL5;
 
     if (!((s[1] ^ 0x80) < 0x40 &&
           (s[2] ^ 0x80) < 0x40 &&
@@ -1841,7 +1841,7 @@ static int my_utf8_uni(CHARSET_INFO *cs __attribute__((unused)),
   else if (c < 0xfe && sizeof(my_wc_t)*8 >= 32)
   {
     if ( s+6 >e ) /* We need 6 characters */
-      return MY_CS_TOOFEW(0);
+      return MY_CS_TOOSMALL6;
 
     if (!((s[1] ^ 0x80) < 0x40   &&
           (s[2] ^ 0x80) < 0x40   &&
@@ -1892,7 +1892,7 @@ static int my_uni_utf8 (CHARSET_INFO *cs __attribute__((unused)) ,
     Because of it (r+count > e), not (r+count-1 >e )
    */
   if ( r+count > e )
-    return MY_CS_TOOSMALL;
+    return MY_CS_TOOSMALLN(count);
 
   switch (count) {
     /* Fall through all cases!!! */
author	unknown <bar@mysql.com>	2005-12-12 21:42:09 +0400
committer	unknown <bar@mysql.com>	2005-12-12 21:42:09 +0400
commit	a464e01713d19d358eb7feab2cfdf88bb274d5df (patch)
tree	6c15cf077b53c2157ad83b68ecbe604863e5aabf
parent	5aeb69296a4e134f0215da3e6bcce4956b7d76ad (diff)
download	mariadb-git-a464e01713d19d358eb7feab2cfdf88bb274d5df.tar.gz