Bug#15377 Valid multibyte sequences are truncated on INSERT

ctype-euc_kr.c: ctype-gb2312.c: Adding specific well_formed_length functions for gb2312 and euckr, to allow storing characters which are correct according to the character set specifications but just don't have Unicode mapping. Previously only those which have Unicode mapping could be stored, while unassigned characters lead to data truncation. Many files: new file strings/ctype-gb2312.c: Bug#15377 Valid multibyte sequences are truncated on INSERT Adding specific well_formed_length functions for gb2312 and euckr, to allow storing characters which are correct according to the character set. Previously only those which have Unicode mapping could be stored. strings/ctype-euc_kr.c: Adding specific well_formed_length functions for gb2312 and euckr, to allow storing characters which are correct according to the character set. Previously only those which have Unicode mapping could be stored.
author: unknown <bar@mysql.com> 2005-12-09 16:37:58 +0400
committer: unknown <bar@mysql.com> 2005-12-09 16:37:58 +0400
commit: 7063bd4d2bfe4688db60b28a15843406299a58f0 (patch)
tree: 2868d73a9285634c6a4b999453f6fb27a07e03eb /strings
parent: 5aeb69296a4e134f0215da3e6bcce4956b7d76ad (diff)
download: mariadb-git-7063bd4d2bfe4688db60b28a15843406299a58f0.tar.gz
2 files changed, 72 insertions, 2 deletions
diff --git a/strings/ctype-euc_kr.c b/strings/ctype-euc_kr.c
index f15e97de5be..2863b192f50 100644
--- a/strings/ctype-euc_kr.c
+++ b/strings/ctype-euc_kr.c
@@ -8635,6 +8635,41 @@ my_mb_wc_euc_kr(CHARSET_INFO *cs __attribute__((unused)),
 }
 
 
+/*
+  Returns well formed length of a EUC-KR string.
+*/
+static uint
+my_well_formed_len_euckr(CHARSET_INFO *cs __attribute__((unused)),
+                         const char *b, const char *e,
+                         uint pos, int *error)
+{
+  const char *b0= b;
+  const char *emb= e - 1; /* Last possible end of an MB character */
+
+  *error= 0;
+  while (pos-- && b < e)
+  {
+    if ((uchar) b[0] < 128)
+    {
+      /* Single byte ascii character */
+      b++;
+    }
+    else  if (b < emb && iseuc_kr(*b) && iseuc_kr(b[1]))
+    {
+      /* Double byte character */
+      b+= 2;
+    }
+    else
+    {
+      /* Wrong byte sequence */
+      *error= 1;
+      break;
+    }
+  }
+  return (uint) (b - b0);
+}
+
+
 static MY_COLLATION_HANDLER my_collation_ci_handler =
 {
   NULL,			/* init */
@@ -8655,7 +8690,7 @@ static MY_CHARSET_HANDLER my_charset_handler=
   mbcharlen_euc_kr,
   my_numchars_mb,
   my_charpos_mb,
-  my_well_formed_len_mb,
+  my_well_formed_len_euckr,
   my_lengthsp_8bit,
   my_numcells_8bit,
   my_mb_wc_euc_kr,	/* mb_wc   */
diff --git a/strings/ctype-gb2312.c b/strings/ctype-gb2312.c
index 0cbad2d1c55..52dd61a8462 100644
--- a/strings/ctype-gb2312.c
+++ b/strings/ctype-gb2312.c
@@ -5686,6 +5686,41 @@ my_mb_wc_gb2312(CHARSET_INFO *cs  __attribute__((unused)),
 }
 
 
+/*
+  Returns well formed length of a EUC-KR string.
+*/
+static uint
+my_well_formed_len_gb2312(CHARSET_INFO *cs __attribute__((unused)),
+                          const char *b, const char *e,
+                          uint pos, int *error)
+{
+  const char *b0= b;
+  const char *emb= e - 1; /* Last possible end of an MB character */
+
+  *error= 0;
+  while (pos-- && b < e)
+  {
+    if ((uchar) b[0] < 128)
+    {
+      /* Single byte ascii character */
+      b++;
+    }
+    else  if (b < emb && isgb2312head(*b) && isgb2312tail(b[1]))
+    {
+      /* Double byte character */
+      b+= 2;
+    }
+    else
+    {
+      /* Wrong byte sequence */
+      *error= 1;
+      break;
+    }
+  }
+  return (uint) (b - b0);
+}
+
+
 static MY_COLLATION_HANDLER my_collation_ci_handler =
 {
   NULL,			/* init */
@@ -5706,7 +5741,7 @@ static MY_CHARSET_HANDLER my_charset_handler=
   mbcharlen_gb2312,
   my_numchars_mb,
   my_charpos_mb,
-  my_well_formed_len_mb,
+  my_well_formed_len_gb2312,
   my_lengthsp_8bit,
   my_numcells_8bit,
   my_mb_wc_gb2312,	/* mb_wc      */
author	unknown <bar@mysql.com>	2005-12-09 16:37:58 +0400
committer	unknown <bar@mysql.com>	2005-12-09 16:37:58 +0400
commit	7063bd4d2bfe4688db60b28a15843406299a58f0 (patch)
tree	2868d73a9285634c6a4b999453f6fb27a07e03eb /strings
parent	5aeb69296a4e134f0215da3e6bcce4956b7d76ad (diff)
download	mariadb-git-7063bd4d2bfe4688db60b28a15843406299a58f0.tar.gz