Fixes for German sorting order.

Docs/manual.texi: Update for German sorting configure.in: Don't make the German sort order default myisam/mi_delete_all.c: Truncate files on DELETE FROM table_name to not get warnings when checking files myisam/mi_search.c: Fix for multi-byte character sets. sql/item_cmpfunc.cc: Use current character set when using STRCMP() strings/ctype-latin1_de.c: F
author: unknown <monty@hundin.mysql.fi> 2001-09-11 01:40:52 +0300
committer: unknown <monty@hundin.mysql.fi> 2001-09-11 01:40:52 +0300
commit: 34925f8d823c700c939f0622b6af85001458a226 (patch)
tree: 6a962f88c6b25e60b6f09ab8b8c7eb0cbb47bc22 /strings
parent: 354882c8ce232770b2f3604cbcd5142aad6b002c (diff)
download: mariadb-git-34925f8d823c700c939f0622b6af85001458a226.tar.gz
1 files changed, 73 insertions, 145 deletions
diff --git a/strings/ctype-latin1_de.c b/strings/ctype-latin1_de.c
index c73c89cef46..a0a9686e0c1 100644
--- a/strings/ctype-latin1_de.c
+++ b/strings/ctype-latin1_de.c
@@ -99,12 +99,10 @@ uchar to_upper_latin1_de[] = {
  * This is a simple latin1 mapping table, which maps all accented
  * characters to their non-accented equivalents.  Note: in this
  * table, '�' is mapped to 'A', '�' is mapped to 'Y', etc. - all
- * accented characters are treated the same way.
- *
- * SPECIAL NOTE: '�' (the sz ligature), which isn't really an
- * accented 's', is mapped to 'S', to simplify the sorting
- * functions.
+ * accented characters except the following are treated the same way.
+ * �, �, �, �, �, �, �, �
  */
+
 uchar sort_order_latin1_de[] = {
     0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,
    16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
@@ -118,10 +116,10 @@ uchar sort_order_latin1_de[] = {
   144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,
   160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,
   176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,
-   65, 65, 65, 65, 65, 65, 92, 67, 69, 69, 69, 69, 73, 73, 73, 73,
-   68, 78, 79, 79, 79, 79, 79,215,216, 85, 85, 85, 85, 89,222, 83,
-   65, 65, 65, 65, 65, 65, 92, 67, 69, 69, 69, 69, 73, 73, 73, 73,
-   68, 78, 79, 79, 79, 79, 79,247,216, 85, 85, 85, 85, 89,222, 89
+   65, 65, 65, 65,196, 65, 92, 67, 69,201, 69, 69, 73, 73, 73, 73,
+   68, 78, 79, 79, 79, 79,214,215,216, 85, 85, 85,220, 89,222,223,
+   65, 65, 65, 65,196, 65, 92, 67, 69,201, 69, 69, 73, 73, 73, 73,
+   68, 78, 79, 79, 79, 79,214,247,216, 85, 85, 85,220, 89,222, 89
 };
 
 #define L1_AE 196
@@ -132,6 +130,39 @@ uchar sort_order_latin1_de[] = {
 #define L1_ue 252
 #define L1_ss 223
 
+
+/*
+  Some notes about the following comparison rules:
+  By definition, my_strnncoll_latin_de must works exactly as if had called
+  my_strnxfrm_latin_de() on both strings and compared the result strings.
+
+  This means that:
+  � must also matches �E and A�, because my_strxn_frm_latin_de() will convert
+  both to AE.
+
+  The other option would be to not do any accent removal in
+  sort_order_latin_de[] at all
+*/
+
+
+#define CHECK_S1_COMBO(ch1, ch2, str1, str1_end, res_if_str1_smaller, str2, fst, snd, accent)   \
+  /* Invariant: ch1 == fst == sort_order_latin1_de[accent] && ch1 != ch2 */ \
+  if (ch2 != accent)							\
+  {									\
+    ch1= fst;								\
+    goto normal;							\
+  }									\
+  if (str1 == str1_end)							\
+    return res_if_str1_smaller;						\
+  {									\
+     int diff = (int) sort_order_latin1_de[*str1] - snd;		\
+     if (diff)								\
+        return diff*(-(res_if_str1_smaller));				\
+      /* They are equal (e.g., "Ae" == '�') */				\
+     str1++;								\
+  }
+
+
 int my_strnncoll_latin1_de(const uchar * s1, int len1,
                            const uchar * s2, int len2)
 {
@@ -140,172 +171,71 @@ int my_strnncoll_latin1_de(const uchar * s1, int len1,
 
   while (s1 < e1 && s2 < e2)
   {
-    /* to_upper is used instead of sort_order, because we don't want
-     * '�' to match "�E", only "AE".  This couples the to_upper and
-     * sort_order tables together, but that is acceptable. */
-    uchar c1 = to_upper_latin1_de[*s1];
-    uchar c2 = to_upper_latin1_de[*s2];
+    /*
+      Because sort_order_latin1_de doesn't convert '�', � or � we
+      can use it here.
+    */
+    uchar c1 = sort_order_latin1_de[*s1++];
+    uchar c2 = sort_order_latin1_de[*s2++];
     if (c1 != c2)
     {
-      switch (c1)
-      {
-
-#define CHECK_S1_COMBO(fst, snd, accent)                                  \
-  /* Invariant: c1 == fst == sort_order_latin1_de[accent] && c1 != c2 */  \
-  if (c2 == accent)                                                       \
-  {                                                                       \
-    if (s1 + 1 < e1)                                                      \
-    {                                                                     \
-      if (to_upper_latin1_de[*(s1 + 1)] == snd)                           \
-      {                                                                   \
-	/* They are equal (e.g., "Ae" == '�') */                          \
-	s1 += 2;                                                          \
-	s2 += 1;                                                          \
-      }                                                                   \
-      else                                                                \
-      {                                                                   \
-	int diff = sort_order_latin1_de[*(s1 + 1)] - snd;                 \
-	if (diff)                                                         \
-	  return diff;                                                    \
-	else                                                              \
-	  /* Comparison between, e.g., "A�" and '�' */                    \
-	  return 1;                                                       \
-      }                                                                   \
-    }                                                                     \
-    else                                                                  \
-      return -1;                                                          \
-  }                                                                       \
-  else                                                                    \
-    /* The following should work even if c2 is [����] */                  \
-    return fst - sort_order_latin1_de[c2]
-
+      switch (c1) {
       case 'A':
-	CHECK_S1_COMBO('A', 'E', L1_AE);
+	CHECK_S1_COMBO(c1, c2, s1, e1, -1, s2, 'A', 'E', L1_AE);
 	break;
       case 'O':
-	CHECK_S1_COMBO('O', 'E', L1_OE);
+	CHECK_S1_COMBO(c1, c2, s1, e1, -1, s2, 'O', 'E', L1_OE);
 	break;
       case 'U':
-	CHECK_S1_COMBO('U', 'E', L1_UE);
+	CHECK_S1_COMBO(c1, c2, s1, e1, -1, s2, 'U', 'E', L1_UE);
 	break;
       case 'S':
-	CHECK_S1_COMBO('S', 'S', L1_ss);
+	CHECK_S1_COMBO(c1, c2, s1, e1, -1, s2, 'S', 'S', L1_ss);
 	break;
-
-#define CHECK_S2_COMBO(fst, snd)                                          \
-  /* Invariant: sort_order_latin1_de[c1] == fst && c1 != c2 */            \
-  if (c2 == fst)                                                          \
-  {                                                                       \
-    if (s2 + 1 < e2)                                                      \
-    {                                                                     \
-      if (to_upper_latin1_de[*(s2 + 1)] == snd)                           \
-      {                                                                   \
-	/* They are equal (e.g., '�' == "Ae") */                          \
-	s1 += 1;                                                          \
-	s2 += 2;                                                          \
-      }                                                                   \
-      else                                                                \
-      {                                                                   \
-	int diff = sort_order_latin1_de[*(s1 + 1)] - snd;                 \
-	if (diff)                                                         \
-	  return diff;                                                    \
-	else                                                              \
-	  /* Comparison between, e.g., '�' and "A�" */                    \
-	  return -1;                                                      \
-      }                                                                   \
-    }                                                                     \
-    else                                                                  \
-      return 1;                                                           \
-  }                                                                       \
-  else                                                                    \
-    /* The following should work even if c2 is [����] */                  \
-    return fst - sort_order_latin1_de[c2]
-
       case L1_AE:
-	CHECK_S2_COMBO('A', 'E');
+	CHECK_S1_COMBO(c1, c2, s2, e2, 1, s1, 'A', 'E', 'A');
 	break;
       case L1_OE:
-	CHECK_S2_COMBO('O', 'E');
+	CHECK_S1_COMBO(c1, c2, s2, e2, 1, s1, 'O', 'E', 'O');
 	break;
       case L1_UE:
-	CHECK_S2_COMBO('U', 'E');
+	CHECK_S1_COMBO(c1, c2, s2, e2, 1, s1, 'U', 'E', 'U');
 	break;
       case L1_ss:
-	CHECK_S2_COMBO('S', 'S');
+	CHECK_S1_COMBO(c1, c2, s2, e2, 1, s1, 'S', 'S', 'S');
 	break;
       default:
+	/*
+	  Handle the case where 'c2' is a special character
+	  If this is true, we know that c1 can't match this character.
+	*/
+    normal:
 	switch (c2) {
 	case L1_AE:
+	  return  (int) c1 - (int) 'A';
 	case L1_OE:
+	  return  (int) c1 - (int) 'O';
 	case L1_UE:
+	  return  (int) c1 - (int) 'U';
 	case L1_ss:
-	  /* Make sure these do not match (e.g., "�" != "�") */
-	  return sort_order_latin1_de[c1] - sort_order_latin1_de[c2];
-	  break;
+	  return  (int) c1 - (int) 'S';
 	default:
-	  if (sort_order_latin1_de[*s1] != sort_order_latin1_de[*s2])
-	    return sort_order_latin1_de[*s1] - sort_order_latin1_de[*s2];
-	  ++s1;
-	  ++s2;
-	  break;
+	{
+	  int diff= (int) c1 - (int) c2;
+	  if (diff)
+	    return diff;
 	}
 	break;
-
-#undef CHECK_S1_COMBO
-#undef CHECK_S2_COMBO
-
-      }
-    }
-    else
-    {
-      /* In order to consistently treat "ae" == '�', but to NOT allow
-       * "a�" == '�', we must look ahead here to ensure that the second
-       * letter in a combo really is the unaccented 'e' (or 's' for
-       * "ss") and is not an accented character with the same sort_order. */
-      ++s1;
-      ++s2;
-      if (s1 < e1 && s2 < e2)
-      {
-	switch (c1)
-	{
-	case 'A':
-	case 'O':
-	case 'U':
-	  if (sort_order_latin1_de[*s1] == 'E' &&
-	      to_upper_latin1_de[*s1] != 'E' &&
-	      to_upper_latin1_de[*s2] == 'E')
-	    /* Comparison between, e.g., "A�" and "AE" */
-	    return 1;
-	  if (sort_order_latin1_de[*s2] == 'E' &&
-	      to_upper_latin1_de[*s2] != 'E' &&
-	      to_upper_latin1_de[*s1] == 'E')
-	    /* Comparison between, e.g., "AE" and "A�" */
-	    return -1;
-	  break;
-	case 'S':
-	  if (sort_order_latin1_de[*s1] == 'S' &&
-	      to_upper_latin1_de[*s1] != 'S' &&
-	      to_upper_latin1_de[*s2] == 'S')
-	    /* Comparison between, e.g., "S�" and "SS" */
-	    return 1;
-	  if (sort_order_latin1_de[*s2] == 'S' &&
-	      to_upper_latin1_de[*s2] != 'S' &&
-	      to_upper_latin1_de[*s1] == 'S')
-	    /* Comparison between, e.g., "SS" and "S�" */
-	    return -1;
-	  break;
-	default:
-	  break;
 	}
       }
     }
   }
-
   /* A simple test of string lengths won't work -- we test to see
    * which string ran out first */
   return s1 < e1 ? 1 : s2 < e2 ? -1 : 0;
 }
 
+
 int my_strnxfrm_latin1_de(uchar * dest, const uchar * src, int len, int srclen)
 {
   const uchar *dest_orig = dest;
@@ -313,22 +243,19 @@ int my_strnxfrm_latin1_de(uchar * dest, const uchar * src, int len, int srclen)
   const uchar *se = src + srclen;
   while (src < se && dest < de)
   {
-    switch (*src)
-    {
+    uchar chr=sort_order_latin1_de[*src];
+    switch (chr) {
     case L1_AE:
-    case L1_ae:
       *dest++ = 'A';
       if (dest < de)
 	*dest++ = 'E';
       break;
     case L1_OE:
-    case L1_oe:
       *dest++ = 'O';
       if (dest < de)
 	*dest++ = 'E';
       break;
     case L1_UE:
-    case L1_ue:
       *dest++ = 'U';
       if (dest < de)
 	*dest++ = 'E';
@@ -339,7 +266,7 @@ int my_strnxfrm_latin1_de(uchar * dest, const uchar * src, int len, int srclen)
 	*dest++ = 'S';
       break;
     default:
-      *dest++ = sort_order_latin1_de[*src];
+      *dest++= chr;
       break;
     }
     ++src;
@@ -347,6 +274,7 @@ int my_strnxfrm_latin1_de(uchar * dest, const uchar * src, int len, int srclen)
   return dest - dest_orig;
 }
 
+
 int my_strcoll_latin1_de(const uchar * s1, const uchar * s2)
 {
   /* XXX QQ: This should be fixed to not call strlen */
author	unknown <monty@hundin.mysql.fi>	2001-09-11 01:40:52 +0300
committer	unknown <monty@hundin.mysql.fi>	2001-09-11 01:40:52 +0300
commit	34925f8d823c700c939f0622b6af85001458a226 (patch)
tree	6a962f88c6b25e60b6f09ab8b8c7eb0cbb47bc22 /strings
parent	354882c8ce232770b2f3604cbcd5142aad6b002c (diff)
download	mariadb-git-34925f8d823c700c939f0622b6af85001458a226.tar.gz