Merge bk-internal.mysql.com:/home/bk/mysql-4.1

into mysql.com:/home/my/mysql-4.1 myisam/mi_check.c: Auto merged sql/field.cc: Auto merged strings/ctype-sjis.c: Auto merged strings/ctype-ucs2.c: Auto merged
author: unknown <monty@mysql.com> 2004-03-25 15:05:42 +0200
committer: unknown <monty@mysql.com> 2004-03-25 15:05:42 +0200
commit: fe596dee5869ac1f99a9d88061bc9dff402849f6 (patch)
tree: b090f762169aabf4fa3602b52d4463cd9c66106b /strings
parent: 23e480a80c64ca9b390a6fa82f68d9f8bbb1fa67 (diff)
parent: 887d6f144d85b9a869e4f8030c41816bbd32771b (diff)
download: mariadb-git-fe596dee5869ac1f99a9d88061bc9dff402849f6.tar.gz
11 files changed, 557 insertions, 345 deletions
diff --git a/strings/ctype-big5.c b/strings/ctype-big5.c
index ee55cfda6c1..2bde29ecc47 100644
--- a/strings/ctype-big5.c
+++ b/strings/ctype-big5.c
@@ -218,40 +218,80 @@ static uint16 big5strokexfrm(uint16 i)
   return 0xA140;
 }
 
-static int my_strnncoll_big5(CHARSET_INFO *cs __attribute__((unused)), 
-                      const uchar * s1, uint len1, 
-                      const uchar * s2, uint len2)
+
+
+static int my_strnncoll_big5_internal(const uchar **a_res,
+				      const uchar **b_res, uint length)
 {
-  uint len;
+  const char *a= *a_res, *b= *b_res;
 
-  len = min(len1,len2);
-  while (len--)
+  while (length--)
   {
-    if ((len > 0) && isbig5code(*s1,*(s1+1)) && isbig5code(*s2, *(s2+1)))
+    if ((length > 0) && isbig5code(*a,*(a+1)) && isbig5code(*b, *(b+1)))
     {
-      if (*s1 != *s2 || *(s1+1) != *(s2+1))
-	return ((int) big5code(*s1,*(s1+1)) -
-		(int) big5code(*s2,*(s2+1)));
-      s1 +=2;
-      s2 +=2;
-      len--;
-    } else if (sort_order_big5[(uchar) *s1++] != sort_order_big5[(uchar) *s2++])
-      return ((int) sort_order_big5[(uchar) s1[-1]] -
-	      (int) sort_order_big5[(uchar) s2[-1]]);
+      if (*a != *b || *(a+1) != *(b+1))
+	return ((int) big5code(*a,*(a+1)) -
+		(int) big5code(*b,*(b+1)));
+      a+= 2;
+      b+= 2;
+      length--;
+    }
+    else if (sort_order_big5[(uchar) *a++] !=
+	     sort_order_big5[(uchar) *b++])
+      return ((int) sort_order_big5[(uchar) a[-1]] -
+	      (int) sort_order_big5[(uchar) b[-1]]);
   }
-  return (int) (len1-len2);
+  *a_res= a;
+  *b_res= b;
+  return 0;
 }
 
-static
-int my_strnncollsp_big5(CHARSET_INFO * cs, 
-			const uchar *s, uint slen, 
-			const uchar *t, uint tlen)
+
+/* Compare strings */
+
+static int my_strnncoll_big5(CHARSET_INFO *cs __attribute__((unused)), 
+			     const uchar *a, uint a_length,
+			     const uchar *b, uint b_length)
 {
-  for ( ; slen && s[slen-1] == ' ' ; slen--);
-  for ( ; tlen && t[tlen-1] == ' ' ; tlen--);
-  return my_strnncoll_big5(cs,s,slen,t,tlen);
+  uint length= min(a_length, b_length);
+  int res= my_strnncoll_big5_internal(&a, &b, length);
+  return res ? res : (int) (a_length - b_length);
 }
 
+
+/* compare strings, ignore end space */
+
+static int my_strnncollsp_big5(CHARSET_INFO * cs __attribute__((unused)), 
+			       const uchar *a, uint a_length, 
+			       const uchar *b, uint b_length)
+{
+  uint length= min(a_length, b_length);
+  int res= my_strnncoll_big5_internal(&a, &b, length);
+  if (!res && a_length != b_length)
+  {
+    const uchar *end;
+    int swap= 0;
+    /*
+      Check the next not space character of the longer key. If it's < ' ',
+      then it's smaller than the other key.
+    */
+    if (a_length < b_length)
+    {
+      /* put shorter key in a */
+      a_length= b_length;
+      a= b;
+      swap= -1;				/* swap sign of result */
+    }
+    for (end= a + a_length-length; a < end ; a++)
+    {
+      if (*a != ' ')
+	return ((int) *a - (int) ' ') ^ swap;
+    }
+  }
+  return res;
+}
+
+
 static int my_strnxfrm_big5(CHARSET_INFO *cs __attribute__((unused)),
                      uchar * dest, uint len, 
                      const uchar * src, uint srclen)
@@ -377,7 +417,7 @@ static my_bool my_like_range_big5(CHARSET_INFO *cs __attribute__((unused)),
       *min_length= (uint) (min_str-min_org);
       *max_length= res_length;
       do {
-	*min_str++ = ' ';		/* Because if key compression */
+	*min_str++ = 0;
 	*max_str++ = max_sort_char;
       } while (min_str != min_end);
       return 0;
diff --git a/strings/ctype-czech.c b/strings/ctype-czech.c
index ed8c0b5b415..5094a7c45da 100644
--- a/strings/ctype-czech.c
+++ b/strings/ctype-czech.c
@@ -165,169 +165,144 @@ static struct wordvalue doubles[] = {
 	Na konci p�ipoj�me znak 0
  */
 
-#define ADD_TO_RESULT(dest, len, totlen, value)				\
-	if ((totlen) < (len)) { dest[totlen] = value; } (totlen++);
-
-#define NEXT_CMP_VALUE(src, p, store, pass, value, len)			\
-	while (1)		/* we will make a loop */		\
-		{							\
-		if (IS_END(p, src, len))				\
-				/* when we are at the end of string */	\
-			{	/* return either 0 for end of string */	\
-					/* or 1 for end of pass */	\
-			if (pass == 3)	{ value = 0; break; }		\
-			if (pass == 0)	p = store;			\
-			else		p = src;			\
-			value = 1; pass++; break;			\
-			}						\
-					/* not at end of string */	\
-		value = CZ_SORT_TABLE[pass][*p];			\
-									\
-		if (value == 0)	{ p++; continue; } /* ignore value */	\
-		if (value == 2)		/* space */			\
-			{						\
-			const uchar * tmp;				\
-			const uchar * runner = ++p;			\
-			while (!(IS_END(runner, src, len)) && (CZ_SORT_TABLE[pass][*runner] == 2))							\
-				runner++;	/* skip all spaces */	\
-			if (IS_END(runner, src, len) && SKIP_TRAILING_SPACES)										\
-				p = runner;				\
-			if ((pass <= 2) && !(IS_END(runner, src, len)))	\
-				p = runner;				\
-			if (IS_END(p, src, len))			\
-				continue;				\
-				/* we switch passes */			\
-			if (pass > 1)					\
-				break;					\
-			tmp = p;					\
-			if (pass == 0)	pass = 1;			\
-			else		pass = 0;			\
-			p = store; store = tmp;				\
-			break;						\
-			}						\
-		if (value == 255)					\
-			{						\
-			int i;						\
-			for (i = 0; i < (int) sizeof(doubles); i++)	\
-				{					\
-				const char * pattern = doubles[i].word;	\
-				const char * q = (const char *) p;	\
-				int j = 0;				\
-				while (pattern[j])			\
-					{				\
-					if (IS_END(q, src, len) || (*q != pattern[j]))									\
-						{ break ; }		\
-					j++; q++;			\
-					}				\
-				if (!(pattern[j]))			\
-					{				\
-					value = (int)(doubles[i].outvalue[pass]);									\
-					p = (const uchar *) q - 1;			\
-					break;				\
-					}				\
-				}					\
-			}						\
-		p++;							\
-		break;							\
-		}
-
-#define IS_END(p, src, len)	(!(*p))
-
-#if 0
-/* Function strcoll, with Czech sorting, for zero terminated strings */
-static int my_strcoll_czech(const uchar * s1, const uchar * s2)
-	{
-	int v1, v2;
-	const uchar * p1, * p2, * store1, * store2;
-	int pass1 = 0, pass2 = 0;
-	int diff;
-
-	p1 = s1;	p2 = s2;
-	store1 = s1;	store2 = s2;
-
-	do
-		{
-		NEXT_CMP_VALUE(s1, p1, store1, pass1, v1, 0);
-		NEXT_CMP_VALUE(s2, p2, store2, pass2, v2, 0);
-		diff = v1 - v2;
-		if (diff != 0)		return diff;
-		}
-	while (v1);
-	return 0;
-	}
-#endif
+#define ADD_TO_RESULT(dest, len, totlen, value)			\
+if ((totlen) < (len)) { dest[totlen] = value; } (totlen++);
+#define IS_END(p, src, len)	(((char *)p - (char *)src) >= (len))
 
-#if 0
-/* Function strxfrm, with Czech sorting, for zero terminated strings */
-static int my_strxfrm_czech(uchar * dest, const uchar * src, int len)
+#define NEXT_CMP_VALUE(src, p, store, pass, value, len)		\
+while (1)						\
+{							\
+  if (IS_END(p, src, len))				\
+  {							\
+    /* when we are at the end of string */		\
+    /* return either 0 for end of string */		\
+   /* or 1 for end of pass */				\
+   value= 0;						\
+   if (pass != 3)					\
+   {							\
+     p= (pass++ == 0) ? store : src;			\
+     value = 1;						\
+   }							\
+   break;						\
+  }							\
+  /* not at end of string */				\
+  value = CZ_SORT_TABLE[pass][*p];			\
+  if (value == 0)					\
+  { p++; continue; } /* ignore value */			\
+  if (value == 2) /* space */				\
+  {							\
+    const uchar * tmp;					\
+    const uchar * runner = ++p;				\
+    while (!(IS_END(runner, src, len)) && (CZ_SORT_TABLE[pass][*runner] == 2)) \
+     runner++;	/* skip all spaces */			\
+    if (IS_END(runner, src, len) && SKIP_TRAILING_SPACES) \
+      p = runner;					\
+    if ((pass <= 2) && !(IS_END(runner, src, len)))	\
+      p = runner;					\
+    if (IS_END(p, src, len))				\
+      continue;						\
+    /* we switch passes */				\
+    if (pass > 1)					\
+      break;						\
+    tmp = p;						\
+    pass= 1-pass;					\
+    p = store; store = tmp;				\
+    break;						\
+  }							\
+  if (value == 255)					\
+  {							\
+    int i;						\
+    for (i = 0; i < (int) sizeof(doubles); i++)		\
+    {							\
+      const char * pattern = doubles[i].word;		\
+      const char * q = (const char *) p;		\
+      int j = 0;					\
+      while (pattern[j])				\
+      {							\
+	if (IS_END(q, src, len) || (*q != pattern[j]))	\
+	 break;						\
+	j++; q++;					\
+      }							\
+      if (!(pattern[j]))				\
+      {							\
+	value = (int)(doubles[i].outvalue[pass]);	\
+	p= (const uchar *) q - 1;			\
+	break;						\
+      }							\
+    }							\
+  }							\
+  p++;							\
+  break;						\
+}
+
+/*
+  Function strnncoll, actually strcoll, with Czech sorting, which expect
+  the length of the strings being specified
+*/
+
+static int my_strnncoll_czech(CHARSET_INFO *cs __attribute__((unused)),
+			      const uchar * s1, uint len1, 
+			      const uchar * s2, uint len2)
 {
-	int value;
-	const uchar * p, * store;
-	int pass = 0;
-	int totlen = 0;
-	p = store = src;
-
-	do
-		{
-		NEXT_CMP_VALUE(src, p, store, pass, value, 0);
-		ADD_TO_RESULT(dest, len, totlen, value);
-		}
-	while (value);
-	return totlen;
-	}
-#endif
+  int v1, v2;
+  const uchar * p1, * p2, * store1, * store2;
+  int pass1 = 0, pass2 = 0;
 
+  p1 = s1;	p2 = s2;
+  store1 = s1;	store2 = s2;
 
-#undef IS_END
+  do
+  {
+    int diff;
+    NEXT_CMP_VALUE(s1, p1, store1, pass1, v1, (int)len1);
+    NEXT_CMP_VALUE(s2, p2, store2, pass2, v2, (int)len2);
+    if ((diff = v1 - v2))
+      return diff;
+  }
+  while (v1);
+  return 0;
+}
 
-#define IS_END(p, src, len)	(((char *)p - (char *)src) >= (len))
 
-/* Function strnncoll, actually strcoll, with Czech sorting, which expect
-   the length of the strings being specified */
-static int my_strnncoll_czech(CHARSET_INFO *cs __attribute__((unused)),
-                       const uchar * s1, uint len1, 
-                       const uchar * s2, uint len2)
-	{
-	int v1, v2;
-	const uchar * p1, * p2, * store1, * store2;
-	int pass1 = 0, pass2 = 0;
-	int diff;
-
-	p1 = s1;	p2 = s2;
-	store1 = s1;	store2 = s2;
-
-	do
-		{
-		NEXT_CMP_VALUE(s1, p1, store1, pass1, v1, (int)len1);
-		NEXT_CMP_VALUE(s2, p2, store2, pass2, v2, (int)len2);
-		diff = v1 - v2;
-
-		if (diff != 0)		return diff;
-		}
-	while (v1);
-	return 0;
-	}
-
-/* Function strnxfrm, actually strxfrm, with Czech sorting, which expect
-   the length of the strings being specified */
+
+/*
+  TODO: Fix this one to compare strings as they are done in ctype-simple1
+*/
+
+static
+int my_strnncollsp_czech(CHARSET_INFO * cs, 
+			const uchar *s, uint slen, 
+			const uchar *t, uint tlen)
+{
+  for ( ; slen && s[slen-1] == ' ' ; slen--);
+  for ( ; tlen && t[tlen-1] == ' ' ; tlen--);
+  return my_strnncoll_czech(cs,s,slen,t,tlen);
+}
+
+
+/*
+  Function strnxfrm, actually strxfrm, with Czech sorting, which expect
+  the length of the strings being specified
+*/
+
 static int my_strnxfrm_czech(CHARSET_INFO *cs __attribute__((unused)), 
-                      uchar * dest, uint len,
-                      const uchar * src, uint srclen)
-	{
-	int value;
-	const uchar * p, * store;
-	int pass = 0;
-	int totlen = 0;
-	p = src;	store = src;
-
-	do
-		{
-		NEXT_CMP_VALUE(src, p, store, pass, value, (int)srclen);
-		ADD_TO_RESULT(dest, (int)len, totlen, value);
-		}
-	while (value);
-	return totlen;
-	}
+			     uchar * dest, uint len,
+			     const uchar * src, uint srclen)
+{
+  int value;
+  const uchar * p, * store;
+  int pass = 0;
+  int totlen = 0;
+  p = src;	store = src;
+
+  do
+  {
+    NEXT_CMP_VALUE(src, p, store, pass, value, (int)srclen);
+    ADD_TO_RESULT(dest, (int)len, totlen, value);
+  }
+  while (value);
+  return totlen;
+}
 
 #undef IS_END
 
@@ -595,16 +570,6 @@ static MY_UNI_IDX idx_uni_8859_2[]={
 };
 
 
-static
-int my_strnncollsp_czech(CHARSET_INFO * cs, 
-			const uchar *s, uint slen, 
-			const uchar *t, uint tlen)
-{
-  for ( ; slen && s[slen-1] == ' ' ; slen--);
-  for ( ; tlen && t[tlen-1] == ' ' ; tlen--);
-  return my_strnncoll_czech(cs,s,slen,t,tlen);
-}
-
 static MY_COLLATION_HANDLER my_collation_latin2_czech_ci_handler =
 {
   my_strnncoll_czech,
diff --git a/strings/ctype-gbk.c b/strings/ctype-gbk.c
index 98511406ba9..1990060e67b 100644
--- a/strings/ctype-gbk.c
+++ b/strings/ctype-gbk.c
@@ -2582,40 +2582,74 @@ static uint16 gbksortorder(uint16 i)
 }
 
 
-int my_strnncoll_gbk(CHARSET_INFO *cs __attribute__((unused)),
-                     const uchar * s1, uint len1, 
-                     const uchar * s2, uint len2)
+int my_strnncoll_gbk_internal(const uchar **a_res, const uchar **b_res,
+			      uint length)
 {
-  uint len,c1,c2; 
+  const char *a= *a_res, *b= *b_res;
+  uint a_char,b_char; 
 
-  len = min(len1,len2);
-  while (len--)
+  while (length--)
   {
-    if ((len > 0) && isgbkcode(*s1,*(s1+1)) && isgbkcode(*s2, *(s2+1)))
+    if ((length > 0) && isgbkcode(*a,*(a+1)) && isgbkcode(*b, *(b+1)))
     {
-      c1=gbkcode(*s1,*(s1+1));
-      c2=gbkcode(*s2,*(s2+1));
-      if (c1!=c2)
-        return ((int) gbksortorder((uint16) c1) -
-		(int) gbksortorder((uint16) c2));
-      s1+=2;
-      s2+=2;
-      --len;
-    } else if (sort_order_gbk[(uchar) *s1++] != sort_order_gbk[(uchar) *s2++])
-      return ((int) sort_order_gbk[(uchar) s1[-1]] -
-	      (int) sort_order_gbk[(uchar) s2[-1]]);
+      a_char= gbkcode(*a,*(a+1));
+      b_char= gbkcode(*b,*(b+1));
+      if (a_char != b_char)
+        return ((int) gbksortorder((uint16) a_char) -
+		(int) gbksortorder((uint16) b_char));
+      a+= 2;
+      b+= 2;
+      length--;
+    }
+    else if (sort_order_gbk[(uchar) *a++] != sort_order_gbk[(uchar) *b++])
+      return ((int) sort_order_gbk[(uchar) a[-1]] -
+	      (int) sort_order_gbk[(uchar) b[-1]]);
   }
-  return (int) (len1-len2);
+  *a_res= a;
+  *b_res= b;
+  return 0;
 }
 
-static
-int my_strnncollsp_gbk(CHARSET_INFO * cs, 
-			const uchar *s, uint slen, 
-			const uchar *t, uint tlen)
+
+
+int my_strnncoll_gbk(CHARSET_INFO *cs __attribute__((unused)),
+		     const uchar *a, uint a_length,
+		     const uchar *b, uint b_length)
 {
-  for ( ; slen && s[slen-1] == ' ' ; slen--);
-  for ( ; tlen && t[tlen-1] == ' ' ; tlen--);
-  return my_strnncoll_gbk(cs,s,slen,t,tlen);
+  uint length= min(a_length, b_length);
+  int res= my_strnncoll_gbk_internal(&a, &b, length);
+  return res ? res : (int) (a_length - b_length);
+}
+
+
+static int my_strnncollsp_gbk(CHARSET_INFO * cs __attribute__((unused)),
+			      const uchar *a, uint a_length, 
+			      const uchar *b, uint b_length)
+{
+  uint length= min(a_length, b_length);
+  int res= my_strnncoll_gbk_internal(&a, &b, length);
+  if (!res && a_length != b_length)
+  {
+    const uchar *end;
+    int swap= 0;
+    /*
+      Check the next not space character of the longer key. If it's < ' ',
+      then it's smaller than the other key.
+    */
+    if (a_length < b_length)
+    {
+      /* put shorter key in a */
+      a_length= b_length;
+      a= b;
+      swap= -1;				/* swap sign of result */
+    }
+    for (end= a + a_length-length; a < end ; a++)
+    {
+      if (*a != ' ')
+	return ((int) *a - (int) ' ') ^ swap;
+    }
+  }
+  return res;
 }
 
 
@@ -2696,7 +2730,7 @@ static my_bool my_like_range_gbk(CHARSET_INFO *cs __attribute__((unused)),
       *min_length= (uint) (min_str - min_org);
       *max_length= res_length;
       do {
-	*min_str++ = '\0';		/* Because if key compression */
+	*min_str++= 0;
 	*max_str++ = max_sort_char;
       } while (min_str != min_end);
       return 0;
diff --git a/strings/ctype-latin1.c b/strings/ctype-latin1.c
index 0682b15d135..7a010c3bef8 100644
--- a/strings/ctype-latin1.c
+++ b/strings/ctype-latin1.c
@@ -319,51 +319,105 @@ uchar combo2map[]={
 
 
 static int my_strnncoll_latin1_de(CHARSET_INFO *cs __attribute__((unused)),
-				  const uchar *s1, uint len1,
-				  const uchar *s2, uint len2)
+				  const uchar *a, uint a_length,
+				  const uchar *b, uint b_length)
 {
-  const uchar *e1 = s1 + len1;
-  const uchar *e2 = s2 + len2;
-  uchar c1, c12=0, c2, c22=0;
+  const uchar *a_end= a + a_length;
+  const uchar *b_end= b + b_length;
+  uchar a_char, a_extend= 0, b_char, b_extend= 0;
 
-  while ((s1 < e1 || c12) && (s2 < e2 || c22))
+  while ((a < a_end || a_extend) && (b < b_end || b_extend))
   {
-    if (c12)
+    if (a_extend)
     {
-      c1=c12; c12=0;
+      a_char=a_extend; a_extend=0;
     }
     else
     {
-      c12=combo2map[*s1];
-      c1=combo1map[*s1++];
+      a_extend=combo2map[*a];
+      a_char=combo1map[*a++];
     }
-    if (c22)
+    if (b_extend)
     {
-      c2=c22; c22=0;
+      b_char=b_extend; b_extend=0;
     }
     else
     {
-      c22=combo2map[*s2];
-      c2=combo1map[*s2++];
+      b_extend=combo2map[*b];
+      b_char=combo1map[*b++];
     }
-    if (c1 != c2) return (int)c1 - (int)c2;
+    if (a_char != b_char)
+      return (int) a_char - (int) b_char;
   }
-
   /*
     A simple test of string lengths won't work -- we test to see
     which string ran out first
   */
-  return (s1 < e1 || c12) ? 1 : (s2 < e2 || c22) ? -1 : 0;
+  return ((a < a_end || a_extend) ? 1 :
+	  (b < b_end || b_extend) ? -1 : 0);
 }
 
 
-static int my_strnncollsp_latin1_de(CHARSET_INFO *cs,
-				    const uchar *s, uint slen,
-				    const uchar *t, uint tlen)
+static int my_strnncollsp_latin1_de(CHARSET_INFO *cs __attribute__((unused)),
+				    const uchar *a, uint a_length,
+				    const uchar *b, uint b_length)
 {
-  for ( ; slen && s[slen-1] == ' ' ; slen--);
-  for ( ; tlen && t[tlen-1] == ' ' ; tlen--);
-  return my_strnncoll_latin1_de(cs,s,slen,t,tlen);
+  const uchar *a_end= a + a_length;
+  const uchar *b_end= b + b_length;
+  uchar a_char, a_extend= 0, b_char, b_extend= 0;
+
+  while ((a < a_end || a_extend) && (b < b_end || b_extend))
+  {
+    if (a_extend)
+    {
+      a_char=a_extend;
+      a_extend= 0;
+    }
+    else
+    {
+      a_extend= combo2map[*a];
+      a_char=   combo1map[*a++];
+    }
+    if (b_extend)
+    {
+      b_char= b_extend;
+      b_extend= 0;
+    }
+    else
+    {
+      b_extend= combo2map[*b];
+      b_char=   combo1map[*b++];
+    }
+    if (a_char != b_char)
+      return (int) a_char - (int) b_char;
+  }
+  /* Check if double character last */
+  if (a_extend)
+    return 1;
+  if (b_extend)
+    return -1;
+
+  if (a != a_end || b != b_end)
+  {
+    int swap= 0;
+    /*
+      Check the next not space character of the longer key. If it's < ' ',
+      then it's smaller than the other key.
+    */
+    if (a == a_end)
+    {
+      /* put shorter key in a */
+      a_end= b_end;
+      a= b;
+      swap= -1;					/* swap sign of result */
+    }
+    for ( ; a < a_end ; a++)
+    {
+      if (*a != ' ')
+	return ((int) *a - (int) ' ') ^ swap;
+    }
+  }
+  return 0;
 }
 
 
@@ -385,6 +439,32 @@ static int my_strnxfrm_latin1_de(CHARSET_INFO *cs __attribute__((unused)),
 }
 
 
+void my_hash_sort_latin1_de(CHARSET_INFO *cs __attribute__((unused)),
+			    const uchar *key, uint len,
+			    ulong *nr1, ulong *nr2)
+{
+  const uchar *end= key+len;
+  /*
+    Remove end space. We have to do this to be able to compare
+    'AE' and '�' as identical
+  */
+  while (end > key && end[-1] == ' ')
+    end--;
+
+  for (; key < end ; key++)
+  {
+    uint X= (uint) combo1map[(uint) *key];
+    nr1[0]^=(ulong) ((((uint) nr1[0] & 63)+nr2[0]) * X) + (nr1[0] << 8);
+    nr2[0]+=3;
+    if ((X= combo2map[*key]))
+    {
+      nr1[0]^=(ulong) ((((uint) nr1[0] & 63)+nr2[0]) * X) + (nr1[0] << 8);
+      nr2[0]+=3;
+    }
+  }
+}
+
+
 static MY_COLLATION_HANDLER my_collation_german2_ci_handler=
 {
   my_strnncoll_latin1_de,
@@ -394,7 +474,7 @@ static MY_COLLATION_HANDLER my_collation_german2_ci_handler=
   my_wildcmp_8bit,
   my_strcasecmp_8bit,
   my_instr_simple,
-  my_hash_sort_simple
+  my_hash_sort_latin1_de
 };
 
 
diff --git a/strings/ctype-mb.c b/strings/ctype-mb.c
index 2f7cf698664..ed772a68845 100644
--- a/strings/ctype-mb.c
+++ b/strings/ctype-mb.c
@@ -347,6 +347,7 @@ uint my_instr_mb(CHARSET_INFO *cs,
   return 0;
 }
 
+
 /* BINARY collations handlers for MB charsets */
 
 static int my_strnncoll_mb_bin(CHARSET_INFO * cs __attribute__((unused)),
@@ -357,20 +358,6 @@ static int my_strnncoll_mb_bin(CHARSET_INFO * cs __attribute__((unused)),
   return cmp ? cmp : (int) (slen - tlen);
 }
 
-static int my_strnncollsp_mb_bin(CHARSET_INFO * cs __attribute__((unused)),
-				 const uchar *s, uint slen,
-				 const uchar *t, uint tlen)
-{
-  int len, cmp;
-
-  for ( ; slen && s[slen-1] == ' ' ; slen--);
-  for ( ; tlen && t[tlen-1] == ' ' ; tlen--);
-
-  len  = ( slen > tlen ) ? tlen : slen;
-
-  cmp= memcmp(s,t,len);
-  return cmp ? cmp : (int) (slen - tlen);
-}
 
 static int my_strnxfrm_mb_bin(CHARSET_INFO *cs __attribute__((unused)),
 			    uchar * dest, uint len,
@@ -526,7 +513,7 @@ static int my_wildcmp_mb_bin(CHARSET_INFO *cs,
 MY_COLLATION_HANDLER my_collation_mb_bin_handler =
 {
     my_strnncoll_mb_bin,
-    my_strnncollsp_mb_bin,
+    my_strnncoll_mb_bin,
     my_strnxfrm_mb_bin,
     my_like_range_simple,
     my_wildcmp_mb_bin,
diff --git a/strings/ctype-simple.c b/strings/ctype-simple.c
index 0aae60a0b56..c8eb3c07a3f 100644
--- a/strings/ctype-simple.c
+++ b/strings/ctype-simple.c
@@ -60,25 +60,69 @@ int my_strnncoll_simple(CHARSET_INFO * cs, const uchar *s, uint slen,
 }
 
 
-int my_strnncollsp_simple(CHARSET_INFO * cs, const uchar *s, uint slen, 
-			const uchar *t, uint tlen)
+/*
+  Compare strings, discarding end space
+
+  SYNOPSIS
+    my_strnncollsp_simple()
+    cs			character set handler
+    a			First string to compare
+    a_length		Length of 'a'
+    b			Second string to compare
+    b_length		Length of 'b'
+
+  IMPLEMENTATION
+    If one string is shorter as the other, then we space extend the other
+    so that the strings have equal length.
+
+    This will ensure that the following things hold:
+
+    "a"  == "a "
+    "a\0" < "a"
+    "a\0" < "a "
+
+  RETURN
+    < 0	 a <  b
+    = 0	 a == b
+    > 0	 a > b
+*/
+
+int my_strnncollsp_simple(CHARSET_INFO * cs, const uchar *a, uint a_length, 
+			  const uchar *b, uint b_length)
 {
-  uchar *map= cs->sort_order;
-  int len;
-  
-  for ( ; slen && s[slen-1] == ' ' ; slen--);
-  for ( ; tlen && t[tlen-1] == ' ' ; tlen--);
-  
-  len  = ( slen > tlen ) ? tlen : slen;
-  
-  while (len--)
+  const uchar *map= cs->sort_order, *end;
+  uint length;
+
+  end= a + (length= min(a_length, b_length));
+  while (a < end)
   {
-    if (map[*s++] != map[*t++])
-      return ((int) map[s[-1]] - (int) map[t[-1]]);
+    if (map[*a++] != map[*b++])
+      return ((int) map[a[-1]] - (int) map[b[-1]]);
   }
-  return (int) (slen-tlen);
+  if (a_length != b_length)
+  {
+    int swap= 0;
+    /*
+      Check the next not space character of the longer key. If it's < ' ',
+      then it's smaller than the other key.
+    */
+    if (a_length < b_length)
+    {
+      /* put shorter key in s */
+      a_length= b_length;
+      a= b;
+      swap= -1;					/* swap sign of result */
+    }
+    for (end= a + a_length-length; a < end ; a++)
+    {
+      if (*a != ' ')
+	return ((int) *a - (int) ' ') ^ swap;
+    }
+  }
+  return 0;
 }
 
+
 void my_caseup_str_8bit(CHARSET_INFO * cs,char *str)
 {
   register uchar *map=cs->to_upper;
@@ -169,8 +213,8 @@ int my_snprintf_8bit(CHARSET_INFO *cs  __attribute__((unused)),
 
 
 void my_hash_sort_simple(CHARSET_INFO *cs,
-				const uchar *key, uint len,
-				ulong *nr1, ulong *nr2)
+			 const uchar *key, uint len,
+			 ulong *nr1, ulong *nr2)
 {
   register uchar *sort_order=cs->sort_order;
   const uchar *pos = key;
@@ -953,9 +997,10 @@ my_bool my_like_range_simple(CHARSET_INFO *cs,
     {
       *min_length= (uint) (min_str - min_org);
       *max_length=res_length;
-      do {
-	*min_str++ = ' ';		/* Because if key compression */
-	*max_str++ = (char) cs->max_sort_char;
+      do
+      {
+	*min_str++= 0;
+	*max_str++= (char) cs->max_sort_char;
       } while (min_str != min_end);
       return 0;
     }
@@ -963,13 +1008,6 @@ my_bool my_like_range_simple(CHARSET_INFO *cs,
   }
   *min_length= *max_length = (uint) (min_str - min_org);
 
-  /* Temporary fix for handling w_one at end of string (key compression) */
-  {
-    char *tmp;
-    for (tmp= min_str ; tmp > min_org && tmp[-1] == '\0';)
-      *--tmp=' ';
-  }
-
   while (min_str != min_end)
     *min_str++ = *max_str++ = ' ';	/* Because if key compression */
   return 0;
diff --git a/strings/ctype-sjis.c b/strings/ctype-sjis.c
index 8ba650d1486..68cd77f96fc 100644
--- a/strings/ctype-sjis.c
+++ b/strings/ctype-sjis.c
@@ -184,7 +184,7 @@ static uchar NEAR sort_order_sjis[]=
 
 
 static int ismbchar_sjis(CHARSET_INFO *cs __attribute__((unused)),
-		  const char* p, const char *e)
+			 const char* p, const char *e)
 {
   return (issjishead((uchar) *p) && (e-p)>1 && issjistail((uchar)p[1]) ? 2: 0);
 }
@@ -197,59 +197,101 @@ static int mbcharlen_sjis(CHARSET_INFO *cs __attribute__((unused)),uint c)
 
 #define sjiscode(c,d)	((((uint) (uchar)(c)) << 8) | (uint) (uchar) (d))
 
-static int my_strnncoll_sjis(CHARSET_INFO *cs __attribute__((unused)),
-                      const uchar *s1, uint len1,
-                      const uchar *s2, uint len2)
+
+static int my_strnncoll_sjis_internal(CHARSET_INFO *cs,
+				      const uchar **a_res, uint a_length,
+				      const uchar **b_res, uint b_length)
 {
-  const uchar *e1 = s1 + len1;
-  const uchar *e2 = s2 + len2;
-  while (s1 < e1 && s2 < e2) {
-    if (ismbchar_sjis(cs,(char*) s1, (char*) e1) &&
-	ismbchar_sjis(cs,(char*) s2, (char*) e2)) {
-      uint c1 = sjiscode(*s1, *(s1+1));
-      uint c2 = sjiscode(*s2, *(s2+1));
-      if (c1 != c2)
-	return c1 - c2;
-      s1 += 2;
-      s2 += 2;
-    } else {
-      if (sort_order_sjis[(uchar)*s1] != sort_order_sjis[(uchar)*s2])
-	return sort_order_sjis[(uchar)*s1] - sort_order_sjis[(uchar)*s2];
-      s1++;
-      s2++;
+  const uchar *a= *a_res, *b= *b_res;
+  const uchar *a_end= a + a_length;
+  const uchar *b_end= b + b_length;
+  while (a < a_end && b < b_end)
+  {
+    if (ismbchar_sjis(cs,(char*) a, (char*) a_end) &&
+	ismbchar_sjis(cs,(char*) b, (char*) b_end))
+    {
+      uint a_char= sjiscode(*a, *(a+1));
+      uint b_char= sjiscode(*b, *(b+1));
+      if (a_char != b_char)
+	return a_char - b_char;
+      a += 2;
+      b += 2;
+    } else
+    {
+      if (sort_order_sjis[(uchar)*a] != sort_order_sjis[(uchar)*b])
+	return sort_order_sjis[(uchar)*a] - sort_order_sjis[(uchar)*b];
+      a++;
+      b++;
     }
   }
-  return len1 - len2;
+  *a_res= a;
+  *b_res= b;
+  return 0;
+}
+
+
+static int my_strnncoll_sjis(CHARSET_INFO *cs __attribute__((unused)),
+			      const uchar *a, uint a_length, 
+			      const uchar *b, uint b_length)
+{
+  int res= my_strnncoll_sjis_internal(cs, &a, a_length, &b, b_length);
+  return res ? res : (int) (a_length - b_length);
 }
 
-static
-int my_strnncollsp_sjis(CHARSET_INFO * cs, 
-			const uchar *s, uint slen, 
-			const uchar *t, uint tlen)
+
+static int my_strnncollsp_sjis(CHARSET_INFO *cs __attribute__((unused)),
+			       const uchar *a, uint a_length, 
+			       const uchar *b, uint b_length)
 {
-  for ( ; slen && s[slen-1] == ' ' ; slen--);
-  for ( ; tlen && t[tlen-1] == ' ' ; tlen--);
-  return my_strnncoll_sjis(cs,s,slen,t,tlen);
+  const uchar *a_end= a + a_length;
+  const uchar *b_end= b + b_length;
+  int res= my_strnncoll_sjis_internal(cs, &a, a_length, &b, b_length);
+  if (!res && (a != a_end || b != b_end))
+  {
+    int swap= 0;
+    /*
+      Check the next not space character of the longer key. If it's < ' ',
+      then it's smaller than the other key.
+    */
+    if (a == a_end)
+    {
+      /* put shorter key in a */
+      a_end= b_end;
+      a= b;
+      swap= -1;				/* swap sign of result */
+    }
+    for (; a < a_end ; a++)
+    {
+      if (*a != ' ')
+	return ((int) *a - (int) ' ') ^ swap;
+    }
+  }
+  return res;
 }
 
+
+
 static int my_strnxfrm_sjis(CHARSET_INFO *cs __attribute__((unused)),
                      uchar *dest, uint len,
                      const uchar *src, uint srclen)
 {
   uchar *d_end = dest + len;
   uchar *s_end = (uchar*) src + srclen;
-  while (dest < d_end && src < s_end) {
-    if (ismbchar_sjis(cs,(char*) src, (char*) s_end)) {
+  while (dest < d_end && src < s_end)
+  {
+    if (ismbchar_sjis(cs,(char*) src, (char*) s_end))
+    {
       *dest++ = *src++;
       if (dest < d_end && src < s_end)
 	*dest++ = *src++;
-    } else {
-      *dest++ = sort_order_sjis[(uchar)*src++];
     }
+    else
+      *dest++ = sort_order_sjis[(uchar)*src++];
   }
   return srclen;
 }
 
+
 /*
 ** Calculate min_str and max_str that ranges a LIKE string.
 ** Arguments:
@@ -300,12 +342,14 @@ static my_bool my_like_range_sjis(CHARSET_INFO *cs __attribute__((unused)),
       ptr++;
       continue;
     }
-    if (*ptr == w_many) {		/* '%' in SQL */
+    if (*ptr == w_many)
+    {						/* '%' in SQL */
       *min_length = (uint)(min_str - min_org);
       *max_length = res_length;
-      do {
-	*min_str++ = ' ';		/* Because if key compression */
-	*max_str++ = max_sort_char;
+      do
+      {
+	*min_str++= 0;
+	*max_str++= max_sort_char;
       } while (min_str < min_end);
       return 0;
     }
diff --git a/strings/ctype-tis620.c b/strings/ctype-tis620.c
index 59be820863a..954a3768536 100644
--- a/strings/ctype-tis620.c
+++ b/strings/ctype-tis620.c
@@ -518,6 +518,10 @@ static uint thai2sortable(uchar *tstr, uint len)
   strncoll() replacement, compare 2 string, both are converted to sortable
   string
 
+  NOTE:
+    We can't cut strings at end \0 as this would break comparision with
+    LIKE characters, where the min range is stored as end \0
+
   Arg: 2 Strings and it compare length
   Ret: strcmp result
 */
@@ -530,9 +534,6 @@ int my_strnncoll_tis620(CHARSET_INFO *cs __attribute__((unused)),
   uchar *tc1, *tc2;
   int i;
 
-  /* Cut strings at end \0 */
-  len1= (int) strnlen((char*) s1,len1);
-  len2= (int) strnlen((char*) s2,len2);
   tc1= buf;
   if ((len1 + len2 +2) > (int) sizeof(buf))
     tc1= (uchar*) malloc(len1+len2);
@@ -550,6 +551,10 @@ int my_strnncoll_tis620(CHARSET_INFO *cs __attribute__((unused)),
 }
 
 
+/*
+  TODO: Has to be fixed like strnncollsp in ctype-simple.c
+*/
+
 static
 int my_strnncollsp_tis620(CHARSET_INFO * cs, 
 			  const uchar *s, uint slen, 
@@ -637,8 +642,9 @@ my_bool my_like_range_tis620(CHARSET_INFO *cs __attribute__((unused)),
     {
       *min_length= (uint) (min_str - min_org);
       *max_length=res_length;
-      do {
-	*min_str++ = ' ';		/* Because of key compression */
+      do
+      {
+	*min_str++ = 0;
 	*max_str++ = max_sort_chr;
       } while (min_str != min_end);
       return 0;
diff --git a/strings/ctype-ucs2.c b/strings/ctype-ucs2.c
index 26d7568e6a2..99d97a9614b 100644
--- a/strings/ctype-ucs2.c
+++ b/strings/ctype-ucs2.c
@@ -90,8 +90,8 @@ static uchar to_upper_ucs2[] = {
 };
 
 
-static int my_ucs2_uni (CHARSET_INFO *cs __attribute__((unused)),
-                 my_wc_t * pwc, const uchar *s, const uchar *e)
+static int my_ucs2_uni(CHARSET_INFO *cs __attribute__((unused)),
+		       my_wc_t * pwc, const uchar *s, const uchar *e)
 {
   if (s+2 > e) /* Need 2 characters */
     return MY_CS_TOOFEW(0);
@@ -100,8 +100,8 @@ static int my_ucs2_uni (CHARSET_INFO *cs __attribute__((unused)),
   return 2;
 }
 
-static int my_uni_ucs2 (CHARSET_INFO *cs __attribute__((unused)) ,
-                 my_wc_t wc, uchar *r, uchar *e)
+static int my_uni_ucs2(CHARSET_INFO *cs __attribute__((unused)) ,
+		       my_wc_t wc, uchar *r, uchar *e)
 {
   if ( r+2 > e ) 
     return MY_CS_TOOSMALL;
@@ -128,13 +128,15 @@ static void my_caseup_ucs2(CHARSET_INFO *cs, char *s, uint slen)
   }
 }
 
-static void my_hash_sort_ucs2(CHARSET_INFO *cs, const uchar *s, uint slen, ulong *n1, ulong *n2)
+
+static void my_hash_sort_ucs2(CHARSET_INFO *cs, const uchar *s, uint slen,
+			      ulong *n1, ulong *n2)
 {
   my_wc_t wc;
   int res;
   const uchar *e=s+slen;
 
-  while ((s < e) && (res=my_ucs2_uni(cs,&wc, (uchar *)s, (uchar*)e))>0 )
+  while ((s < e) && (res=my_ucs2_uni(cs,&wc, (uchar *)s, (uchar*)e)) >0)
   {
     int plane = (wc>>8) & 0xFF;
     wc = uni_plane[plane] ? uni_plane[plane][wc & 0xFF].sort : wc;
@@ -148,7 +150,7 @@ static void my_hash_sort_ucs2(CHARSET_INFO *cs, const uchar *s, uint slen, ulong
 
 
 static void my_caseup_str_ucs2(CHARSET_INFO * cs  __attribute__((unused)), 
-                        char * s __attribute__((unused)))
+			       char * s __attribute__((unused)))
 {
 }
 
@@ -173,13 +175,14 @@ static void my_casedn_ucs2(CHARSET_INFO *cs, char *s, uint slen)
 }
 
 static void my_casedn_str_ucs2(CHARSET_INFO *cs __attribute__((unused)), 
-                        char * s __attribute__((unused)))
+			       char * s __attribute__((unused)))
 {
 }
 
 
 static int my_strnncoll_ucs2(CHARSET_INFO *cs, 
-		const uchar *s, uint slen, const uchar *t, uint tlen)
+			     const uchar *s, uint slen, 
+			     const uchar *t, uint tlen)
 {
   int s_res,t_res;
   my_wc_t s_wc,t_wc;
@@ -213,8 +216,9 @@ static int my_strnncoll_ucs2(CHARSET_INFO *cs,
   return ( (se-s) - (te-t) );
 }
 
+
 static int my_strncasecmp_ucs2(CHARSET_INFO *cs,
-		const char *s, const char *t,  uint len)
+			       const char *s, const char *t,  uint len)
 {
   int s_res,t_res;
   my_wc_t s_wc,t_wc;
@@ -249,6 +253,7 @@ static int my_strncasecmp_ucs2(CHARSET_INFO *cs,
   return ( (se-s) - (te-t) );
 }
 
+
 static int my_strcasecmp_ucs2(CHARSET_INFO *cs, const char *s, const char *t)
 {
   uint s_len=strlen(s);
@@ -257,6 +262,7 @@ static int my_strcasecmp_ucs2(CHARSET_INFO *cs, const char *s, const char *t)
   return  my_strncasecmp_ucs2(cs, s, t, len);
 }
 
+
 static int my_strnxfrm_ucs2(CHARSET_INFO *cs, 
 	uchar *dst, uint dstlen, const uchar *src, uint srclen)
 {
@@ -288,6 +294,7 @@ static int my_strnxfrm_ucs2(CHARSET_INFO *cs,
   return dst - dst_orig;
 }
 
+
 static int my_ismbchar_ucs2(CHARSET_INFO *cs __attribute__((unused)),
                      const char *b __attribute__((unused)),
                      const char *e __attribute__((unused)))
@@ -295,6 +302,7 @@ static int my_ismbchar_ucs2(CHARSET_INFO *cs __attribute__((unused)),
   return 2;
 }
 
+
 static int my_mbcharlen_ucs2(CHARSET_INFO *cs  __attribute__((unused)) , 
                       uint c __attribute__((unused)))
 {
@@ -380,8 +388,8 @@ static int my_vsnprintf_ucs2(char *dst, uint n, const char* fmt, va_list ap)
   return (uint) (dst - start);
 }
 
-static int my_snprintf_ucs2(CHARSET_INFO *cs __attribute__((unused))
-			    ,char* to, uint n, const char* fmt, ...)
+static int my_snprintf_ucs2(CHARSET_INFO *cs __attribute__((unused)),
+			    char* to, uint n, const char* fmt, ...)
 {
   va_list args;
   va_start(args,fmt);
@@ -389,9 +397,9 @@ static int my_snprintf_ucs2(CHARSET_INFO *cs __attribute__((unused))
 }
 
 
-long        my_strntol_ucs2(CHARSET_INFO *cs,
-			   const char *nptr, uint l, int base,
-			   char **endptr, int *err)
+long my_strntol_ucs2(CHARSET_INFO *cs,
+		     const char *nptr, uint l, int base,
+		     char **endptr, int *err)
 {
   int      negative=0;
   int      overflow;
@@ -504,9 +512,9 @@ bs:
 }
 
 
-ulong      my_strntoul_ucs2(CHARSET_INFO *cs,
-			   const char *nptr, uint l, int base, 
-			   char **endptr, int *err)
+ulong my_strntoul_ucs2(CHARSET_INFO *cs,
+		       const char *nptr, uint l, int base, 
+		       char **endptr, int *err)
 {
   int      negative=0;
   int      overflow;
@@ -1334,8 +1342,8 @@ my_bool my_like_range_ucs2(CHARSET_INFO *cs,
       *min_length= (uint) (min_str - min_org);
       *max_length=res_length;
       do {
-        *min_str++ = '\0';
-	*min_str++ = ' ';		/* Because if key compression */
+        *min_str++ = 0;
+	*min_str++ = 0;
 	*max_str++ = (char) cs->max_sort_char >>8;
 	*max_str++ = (char) cs->max_sort_char & 255;
       } while (min_str + 1 < min_end);
diff --git a/strings/ctype-utf8.c b/strings/ctype-utf8.c
index dca73e5a79f..886ecfbd0c9 100644
--- a/strings/ctype-utf8.c
+++ b/strings/ctype-utf8.c
@@ -1801,7 +1801,8 @@ static void my_casedn_str_utf8(CHARSET_INFO *cs, char * s)
 
 
 static int my_strnncoll_utf8(CHARSET_INFO *cs, 
-		const uchar *s, uint slen, const uchar *t, uint tlen)
+			     const uchar *s, uint slen,
+			     const uchar *t, uint tlen)
 {
   int s_res,t_res;
   my_wc_t s_wc,t_wc;
@@ -1835,6 +1836,11 @@ static int my_strnncoll_utf8(CHARSET_INFO *cs,
   return ( (se-s) - (te-t) );
 }
 
+
+/*
+  TODO: Has to be fixed as strnncollsp in ctype-simple
+*/
+
 static
 int my_strnncollsp_utf8(CHARSET_INFO * cs, 
 			const uchar *s, uint slen, 
diff --git a/strings/ctype-win1250ch.c b/strings/ctype-win1250ch.c
index bda349f1988..8fd4e612713 100644
--- a/strings/ctype-win1250ch.c
+++ b/strings/ctype-win1250ch.c
@@ -467,6 +467,10 @@ static int my_strnncoll_win1250ch(CHARSET_INFO *cs __attribute__((unused)),
 }
 
 
+/*
+  TODO: Has to be fixed as strnncollsp in ctype-simple
+*/
+
 static
 int my_strnncollsp_win1250ch(CHARSET_INFO * cs, 
 			     const uchar *s, uint slen,
author	unknown <monty@mysql.com>	2004-03-25 15:05:42 +0200
committer	unknown <monty@mysql.com>	2004-03-25 15:05:42 +0200
commit	fe596dee5869ac1f99a9d88061bc9dff402849f6 (patch)
tree	b090f762169aabf4fa3602b52d4463cd9c66106b /strings
parent	23e480a80c64ca9b390a6fa82f68d9f8bbb1fa67 (diff)
parent	887d6f144d85b9a869e4f8030c41816bbd32771b (diff)
download	mariadb-git-fe596dee5869ac1f99a9d88061bc9dff402849f6.tar.gz