After merge fixes

Use server character set if --default-character-set is not used Added convert_string() for more efficient alloc+character-set convert of strings
author: monty@mashka.mysql.fi <> 2003-08-19 00:08:08 +0300
committer: monty@mashka.mysql.fi <> 2003-08-19 00:08:08 +0300
commit: 4f7512160bfd467e5d53c3e133e1842fac2737ce (patch)
tree: df4f8c8cf4d399e322333c5516d5c19507b1fda8 /strings
parent: 2263e3e51faba531a0a7055dbf706a6a8719ad70 (diff)
download: mariadb-git-4f7512160bfd467e5d53c3e133e1842fac2737ce.tar.gz
1 files changed, 117 insertions, 164 deletions
diff --git a/strings/ctype-latin1.c b/strings/ctype-latin1.c
index b0c60e2e3eb..cf21f129664 100644
--- a/strings/ctype-latin1.c
+++ b/strings/ctype-latin1.c
@@ -262,13 +262,42 @@ static uchar sort_order_latin1_de[] = {
    68, 78, 79, 79, 79, 79,214,247,216, 85, 85, 85,220, 89,222, 89
 };
 
-#define L1_AE 196
-#define L1_ae 228
-#define L1_OE 214
-#define L1_oe 246
-#define L1_UE 220
-#define L1_ue 252
-#define L1_ss 223
+
+/*
+  same as sort_order_latin_de, but maps ALL accented chars to unaccented ones
+*/
+
+uchar combo1map[]={
+    0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,
+   16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
+   32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
+   48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
+   64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79,
+   80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95,
+   96, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79,
+   80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90,123,124,125,126,127,
+  128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,
+  144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,
+  160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,
+  176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,
+   65, 65, 65, 65, 65, 65, 92, 67, 69, 69, 69, 69, 73, 73, 73, 73,
+   68, 78, 79, 79, 79, 79, 79,215,216, 85, 85, 85, 85, 89,222, 83,
+   65, 65, 65, 65, 65, 65, 92, 67, 69, 69, 69, 69, 73, 73, 73, 73,
+   68, 78, 79, 79, 79, 79, 79,247,216, 85, 85, 85, 85, 89,222, 89
+};
+
+uchar combo2map[]={
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,69, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0,69, 0, 0, 0, 0, 0,69, 0, 0,83, 0, 0, 0, 0,69, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,69, 0, 0, 0, 0, 0,69, 0, 0, 0, 0
+};
 
 
 /*
@@ -285,101 +314,48 @@ static uchar sort_order_latin1_de[] = {
 */
 
 
-#define CHECK_S1_COMBO(ch1, ch2, str1, str1_end, res_if_str1_smaller, str2, fst, snd, accent)   \
-  /* Invariant: ch1 == fst == sort_order_latin1_de[accent] && ch1 != ch2 */ \
-  if (ch2 != accent)							\
-  {									\
-    ch1= fst;								\
-    goto normal;							\
-  }									\
-  if (str1 == str1_end)							\
-    return res_if_str1_smaller;						\
-  {									\
-     int diff = (int) sort_order_latin1_de[*str1] - snd;		\
-     if (diff)								\
-        return diff*(-(res_if_str1_smaller));				\
-      /* They are equal (e.g., "Ae" == '�') */				\
-     str1++;								\
-  }
-
-
 static int my_strnncoll_latin1_de(CHARSET_INFO *cs __attribute__((unused)),
-                           const uchar * s1, uint len1,
-                           const uchar * s2, uint len2)
+				  const uchar *s1, uint len1,
+				  const uchar *s2, uint len2)
 {
   const uchar *e1 = s1 + len1;
   const uchar *e2 = s2 + len2;
+  uchar c1, c12=0, c2, c22=0;
 
-  while (s1 < e1 && s2 < e2)
+  while ((s1 < e1 || c12) && (s2 < e2 || c22))
   {
-    /*
-      Because sort_order_latin1_de doesn't convert '�', � or � we
-      can use it here.
-    */
-    uchar c1 = sort_order_latin1_de[*s1++];
-    uchar c2 = sort_order_latin1_de[*s2++];
-    if (c1 != c2)
+    if (c12)
+    {
+      c1=c12; c12=0;
+    }
+    else
     {
-      switch (c1) {
-      case 'A':
-	CHECK_S1_COMBO(c1, c2, s1, e1, -1, s2, 'A', 'E', L1_AE);
-	break;
-      case 'O':
-	CHECK_S1_COMBO(c1, c2, s1, e1, -1, s2, 'O', 'E', L1_OE);
-	break;
-      case 'U':
-	CHECK_S1_COMBO(c1, c2, s1, e1, -1, s2, 'U', 'E', L1_UE);
-	break;
-      case 'S':
-	CHECK_S1_COMBO(c1, c2, s1, e1, -1, s2, 'S', 'S', L1_ss);
-	break;
-      case L1_AE:
-	CHECK_S1_COMBO(c1, c2, s2, e2, 1, s1, 'A', 'E', 'A');
-	break;
-      case L1_OE:
-	CHECK_S1_COMBO(c1, c2, s2, e2, 1, s1, 'O', 'E', 'O');
-	break;
-      case L1_UE:
-	CHECK_S1_COMBO(c1, c2, s2, e2, 1, s1, 'U', 'E', 'U');
-	break;
-      case L1_ss:
-	CHECK_S1_COMBO(c1, c2, s2, e2, 1, s1, 'S', 'S', 'S');
-	break;
-      default:
-	/*
-	  Handle the case where 'c2' is a special character
-	  If this is true, we know that c1 can't match this character.
-	*/
-    normal:
-	switch (c2) {
-	case L1_AE:
-	  return  (int) c1 - (int) 'A';
-	case L1_OE:
-	  return  (int) c1 - (int) 'O';
-	case L1_UE:
-	  return  (int) c1 - (int) 'U';
-	case L1_ss:
-	  return  (int) c1 - (int) 'S';
-	default:
-	{
-	  int diff= (int) c1 - (int) c2;
-	  if (diff)
-	    return diff;
-	}
-	break;
-	}
-      }
+      c12=combo2map[*s1];
+      c1=combo1map[*s1++];
     }
+    if (c22)
+    {
+      c2=c22; c22=0;
+    }
+    else
+    {
+      c22=combo2map[*s2];
+      c2=combo1map[*s2++];
+    }
+    if (c1 != c2) return (int)c1 - (int)c2;
   }
-  /* A simple test of string lengths won't work -- we test to see
-   * which string ran out first */
-  return s1 < e1 ? 1 : s2 < e2 ? -1 : 0;
+
+  /*
+    A simple test of string lengths won't work -- we test to see
+    which string ran out first
+  */
+  return (s1 < e1 || c12) ? 1 : (s2 < e2 || c22) ? -1 : 0;
 }
 
-static
-int my_strnncollsp_latin1_de(CHARSET_INFO * cs, 
-			const uchar *s, uint slen, 
-			const uchar *t, uint tlen)
+
+static int my_strnncollsp_latin1_de(CHARSET_INFO *cs,
+				    const uchar *s, uint slen,
+				    const uchar *t, uint tlen)
 {
   for ( ; slen && my_isspace(cs, s[slen-1]) ; slen--);
   for ( ; tlen && my_isspace(cs, t[tlen-1]) ; tlen--);
@@ -388,99 +364,76 @@ int my_strnncollsp_latin1_de(CHARSET_INFO * cs,
 
 
 static int my_strnxfrm_latin1_de(CHARSET_INFO *cs __attribute__((unused)),
-                          uchar * dest, uint len,
-                          const uchar * src, uint srclen)
+				 uchar * dest, uint len,
+				 const uchar * src, uint srclen)
 {
   const uchar *dest_orig = dest;
   const uchar *de = dest + len;
   const uchar *se = src + srclen;
-  while (src < se && dest < de)
+  for ( ; src < se && dest < de ; src++)
   {
-    uchar chr=sort_order_latin1_de[*src];
-    switch (chr) {
-    case L1_AE:
-      *dest++ = 'A';
-      if (dest < de)
-	*dest++ = 'E';
-      break;
-    case L1_OE:
-      *dest++ = 'O';
-      if (dest < de)
-	*dest++ = 'E';
-      break;
-    case L1_UE:
-      *dest++ = 'U';
-      if (dest < de)
-	*dest++ = 'E';
-      break;
-    case L1_ss:
-      *dest++ = 'S';
-      if (dest < de)
-	*dest++ = 'S';
-      break;
-    default:
-      *dest++= chr;
-      break;
-    }
-    ++src;
+    uchar chr=combo1map[*src];
+    *dest++=chr;
+    if ((chr=combo2map[*src]) && dest < de)
+      *dest++=chr;
   }
-  return dest - dest_orig;
+  return (int) (dest - dest_orig);
 }
 
 
 static MY_COLLATION_HANDLER my_collation_german2_ci_handler=
 {
-    my_strnncoll_latin1_de,
-    my_strnncollsp_latin1_de,
-    my_strnxfrm_latin1_de,
-    my_like_range_simple,
-    my_wildcmp_8bit,
-    my_strcasecmp_8bit,
-    my_hash_sort_simple
+  my_strnncoll_latin1_de,
+  my_strnncollsp_latin1_de,
+  my_strnxfrm_latin1_de,
+  my_like_range_simple,
+  my_wildcmp_8bit,
+  my_strcasecmp_8bit,
+  my_hash_sort_simple
 };
 
 
 CHARSET_INFO my_charset_latin1_german2_ci=
 {
-    31,0,0,				/* number    */
-    MY_CS_COMPILED|MY_CS_STRNXFRM,	/* state     */
-    "latin1",				/* cs name    */
-    "latin1_german2_ci",		/* name      */
-    "",					/* comment   */
-    ctype_latin1,
-    to_lower_latin1,
-    to_upper_latin1,
-    sort_order_latin1_de,
-    latin1_uni,				/* tab_to_uni   */
-    NULL,				/* tab_from_uni */
-    "","",
-    2,					/* strxfrm_multiply */
-    1,					/* mbmaxlen  */
-    0,
-    &my_charset_handler,
-    &my_collation_german2_ci_handler
+  31,0,0,				/* number    */
+  MY_CS_COMPILED|MY_CS_STRNXFRM,	/* state     */
+  "latin1",				/* cs name    */
+  "latin1_german2_ci",			/* name      */
+  "",					/* comment   */
+  ctype_latin1,
+  to_lower_latin1,
+  to_upper_latin1,
+  sort_order_latin1_de,
+  latin1_uni,				/* tab_to_uni   */
+  NULL,					/* tab_from_uni */
+  "","",
+  2,					/* strxfrm_multiply */
+  1,					/* mbmaxlen  */
+  0,
+  &my_charset_handler,
+  &my_collation_german2_ci_handler
 };
 
 
 CHARSET_INFO my_charset_latin1_bin=
 {
-    47,0,0,				/* number    */
-    MY_CS_COMPILED|MY_CS_BINSORT,	/* state     */
-    "latin1",				/* cs name    */
-    "latin1_bin",			/* name      */
-    "",					/* comment   */
-    ctype_latin1,
-    to_lower_latin1,
-    to_upper_latin1,
-    sort_order_latin1_de,
-    latin1_uni,				/* tab_to_uni   */
-    NULL,				/* tab_from_uni */
-    "",
-    "",
-    0,					/* strxfrm_multiply */
-    1,					/* mbmaxlen  */
-    0,
-    &my_charset_handler,
-    &my_collation_bin_handler
+  47,0,0,				/* number    */
+  MY_CS_COMPILED|MY_CS_BINSORT,		/* state     */
+  "latin1",				/* cs name    */
+  "latin1_bin",				/* name      */
+  "",					/* comment   */
+  ctype_latin1,
+  to_lower_latin1,
+  to_upper_latin1,
+  sort_order_latin1_de,
+  latin1_uni,				/* tab_to_uni   */
+  NULL,					/* tab_from_uni */
+  "",
+  "",
+  0,					/* strxfrm_multiply */
+  1,					/* mbmaxlen  */
+  0,
+  &my_charset_handler,
+  &my_collation_bin_handler
 };
author	monty@mashka.mysql.fi <>	2003-08-19 00:08:08 +0300
committer	monty@mashka.mysql.fi <>	2003-08-19 00:08:08 +0300
commit	4f7512160bfd467e5d53c3e133e1842fac2737ce (patch)
tree	df4f8c8cf4d399e322333c5516d5c19507b1fda8 /strings
parent	2263e3e51faba531a0a7055dbf706a6a8719ad70 (diff)
download	mariadb-git-4f7512160bfd467e5d53c3e133e1842fac2737ce.tar.gz