Bug #6040 can't retrieve records with umlaut characters in case insensitive manner

author: unknown <bar@mysql.com> 2004-10-18 15:23:24 +0500
committer: unknown <bar@mysql.com> 2004-10-18 15:23:24 +0500
commit: 5267ec8a5ac0ce18857ace639382e06631e0a62f (patch)
tree: 8f676b681ea799bf7fbdb31b3bfb29430f1640d6 /strings
parent: 2310f00af2bedf78a98836ab953f7dfc71654d3d (diff)
download: mariadb-git-5267ec8a5ac0ce18857ace639382e06631e0a62f.tar.gz
2 files changed, 171 insertions, 163 deletions
diff --git a/strings/ctype-ucs2.c b/strings/ctype-ucs2.c
index 3247e1d7424..851c2044f47 100644
--- a/strings/ctype-ucs2.c
+++ b/strings/ctype-ucs2.c
@@ -1231,172 +1231,14 @@ uint my_lengthsp_ucs2(CHARSET_INFO *cs __attribute__((unused)),
 }
 
 
-/*
-** Compare string against string with wildcard
-**	0 if matched
-**	-1 if not matched with wildcard
-**	 1 if matched with wildcard
-*/
-
-static
-int my_wildcmp_ucs2(CHARSET_INFO *cs,
-		    const char *str,const char *str_end,
-		    const char *wildstr,const char *wildend,
-		    int escape, int w_one, int w_many,
-		    MY_UNICASE_INFO **weights)
-{
-  int result= -1;			/* Not found, using wildcards */
-  my_wc_t s_wc, w_wc;
-  int scan, plane;
-  
-  while (wildstr != wildend)
-  {
-    
-    while (1)
-    {
-      scan= my_ucs2_uni(cs,&w_wc, (const uchar*)wildstr,
-			(const uchar*)wildend);
-      if (scan <= 0)
-        return 1;
-      
-      if (w_wc ==  (my_wc_t)escape)
-      {
-        wildstr+= scan;
-        scan= my_ucs2_uni(cs,&w_wc, (const uchar*)wildstr,
-			  (const uchar*)wildend);
-        if (scan <= 0)
-          return 1;
-      }
-      
-      if (w_wc == (my_wc_t)w_many)
-      {
-        result= 1;				/* Found an anchor char */
-        break;
-      }
-      
-      wildstr+= scan;
-      scan= my_ucs2_uni(cs, &s_wc, (const uchar*)str, (const uchar*)str_end);
-      if (scan <=0)
-        return 1;
-      str+= scan;
-      
-      if (w_wc == (my_wc_t)w_one)
-      {
-        result= 1;				/* Found an anchor char */
-      }
-      else
-      {
-        if (weights)
-        {
-          plane=(s_wc>>8) & 0xFF;
-          s_wc = weights[plane] ? weights[plane][s_wc & 0xFF].sort : s_wc;
-          plane=(w_wc>>8) & 0xFF;
-          w_wc = weights[plane] ? weights[plane][w_wc & 0xFF].sort : w_wc;
-        }
-        if (s_wc != w_wc)
-          return 1;				/* No match */
-      }
-      if (wildstr == wildend)
-	return (str != str_end);		/* Match if both are at end */
-    }
-    
-    
-    if (w_wc == (my_wc_t)w_many)
-    {						/* Found w_many */
-    
-      /* Remove any '%' and '_' from the wild search string */
-      for ( ; wildstr != wildend ; )
-      {
-        scan= my_ucs2_uni(cs,&w_wc, (const uchar*)wildstr,
-			  (const uchar*)wildend);
-        if (scan <= 0)
-          return 1;
-        
-	if (w_wc == (my_wc_t)w_many)
-	{
-	  wildstr+= scan;
-	  continue;
-	} 
-	
-	if (w_wc == (my_wc_t)w_one)
-	{
-	  wildstr+= scan;
-	  scan= my_ucs2_uni(cs, &s_wc, (const uchar*)str,
-			    (const uchar*)str_end);
-          if (scan <=0)
-            return 1;
-          str+= scan;
-	  continue;
-	}
-	break;					/* Not a wild character */
-      }
-      
-      if (wildstr == wildend)
-	return 0;				/* Ok if w_many is last */
-      
-      if (str == str_end)
-	return -1;
-      
-      scan= my_ucs2_uni(cs,&w_wc, (const uchar*)wildstr,
-			(const uchar*)wildend);
-      if (scan <= 0)
-        return 1;
-      
-      if (w_wc ==  (my_wc_t)escape)
-      {
-        wildstr+= scan;
-        scan= my_ucs2_uni(cs,&w_wc, (const uchar*)wildstr,
-			  (const uchar*)wildend);
-        if (scan <= 0)
-          return 1;
-      }
-      
-      while (1)
-      {
-        /* Skip until the first character from wildstr is found */
-        while (str != str_end)
-        {
-          scan= my_ucs2_uni(cs,&s_wc, (const uchar*)str,
-			    (const uchar*)str_end);
-          if (scan <= 0)
-            return 1;
-          if (weights)
-          {
-            plane=(s_wc>>8) & 0xFF;
-            s_wc = weights[plane] ? weights[plane][s_wc & 0xFF].sort : s_wc;
-            plane=(w_wc>>8) & 0xFF;
-            w_wc = weights[plane] ? weights[plane][w_wc & 0xFF].sort : w_wc;
-          }
-          
-          if (s_wc == w_wc)
-            break;
-          str+= scan;
-        }
-        if (str == str_end)
-          return -1;
-        
-        result= my_wildcmp_ucs2(cs,str,str_end,wildstr,wildend,escape,
-                                w_one,w_many,weights);
-        
-        if (result <= 0)
-          return result;
-        
-        str+= scan;
-      } 
-    }
-  }
-  return (str != str_end ? 1 : 0);
-}
-
-
 static
 int my_wildcmp_ucs2_ci(CHARSET_INFO *cs,
 		    const char *str,const char *str_end,
 		    const char *wildstr,const char *wildend,
 		    int escape, int w_one, int w_many)
 {
-  return my_wildcmp_ucs2(cs,str,str_end,wildstr,wildend,
-                         escape,w_one,w_many,uni_plane); 
+  return my_wildcmp_unicode(cs,str,str_end,wildstr,wildend,
+                            escape,w_one,w_many,uni_plane); 
 }
 
 
@@ -1406,8 +1248,8 @@ int my_wildcmp_ucs2_bin(CHARSET_INFO *cs,
 		    const char *wildstr,const char *wildend,
 		    int escape, int w_one, int w_many)
 {
-  return my_wildcmp_ucs2(cs,str,str_end,wildstr,wildend,
-                         escape,w_one,w_many,NULL); 
+  return my_wildcmp_unicode(cs,str,str_end,wildstr,wildend,
+                            escape,w_one,w_many,NULL); 
 }
 
 
diff --git a/strings/ctype-utf8.c b/strings/ctype-utf8.c
index fd6610b72b1..f5d40fb8ded 100644
--- a/strings/ctype-utf8.c
+++ b/strings/ctype-utf8.c
@@ -1518,6 +1518,161 @@ MY_UNICASE_INFO *uni_plane[256]={
 
 };
 
+
+/*
+** Compare string against string with wildcard
+** This function is used in UTF8 and UCS2
+**
+**	0 if matched
+**	-1 if not matched with wildcard
+**	 1 if matched with wildcard
+*/
+
+int my_wildcmp_unicode(CHARSET_INFO *cs,
+		       const char *str,const char *str_end,
+		       const char *wildstr,const char *wildend,
+		       int escape, int w_one, int w_many,
+		       MY_UNICASE_INFO **weights)
+{
+  int result= -1;			/* Not found, using wildcards */
+  my_wc_t s_wc, w_wc;
+  int scan, plane;
+  int (*mb_wc)(struct charset_info_st *cs, my_wc_t *wc,
+               const unsigned char *s,const unsigned char *e);
+  mb_wc= cs->cset->mb_wc;
+  
+  while (wildstr != wildend)
+  {
+    while (1)
+    {
+      if ((scan= mb_wc(cs, &w_wc, (const uchar*)wildstr,
+                       (const uchar*)wildend)) <= 0)
+        return 1;
+      
+      if (w_wc ==  (my_wc_t)escape)
+      {
+        wildstr+= scan;
+        if ((scan= mb_wc(cs,&w_wc, (const uchar*)wildstr,
+                         (const uchar*)wildend)) <= 0)
+          return 1;
+      }
+      
+      if (w_wc == (my_wc_t)w_many)
+      {
+        result= 1;				/* Found an anchor char */
+        break;
+      }
+      
+      wildstr+= scan;
+      if ((scan= mb_wc(cs, &s_wc, (const uchar*)str,
+                       (const uchar*)str_end)) <=0)
+        return 1;
+      str+= scan;
+      
+      if (w_wc == (my_wc_t)w_one)
+      {
+        result= 1;				/* Found an anchor char */
+      }
+      else
+      {
+        if (weights)
+        {
+          plane=(s_wc>>8) & 0xFF;
+          s_wc = weights[plane] ? weights[plane][s_wc & 0xFF].sort : s_wc;
+          plane=(w_wc>>8) & 0xFF;
+          w_wc = weights[plane] ? weights[plane][w_wc & 0xFF].sort : w_wc;
+        }
+        if (s_wc != w_wc)
+          return 1;				/* No match */
+      }
+      if (wildstr == wildend)
+	return (str != str_end);		/* Match if both are at end */
+    }
+    
+    
+    if (w_wc == (my_wc_t)w_many)
+    {						/* Found w_many */
+    
+      /* Remove any '%' and '_' from the wild search string */
+      for ( ; wildstr != wildend ; )
+      {
+        if ((scan= mb_wc(cs, &w_wc, (const uchar*)wildstr,
+                         (const uchar*)wildend)) <= 0)
+          return 1;
+        
+	if (w_wc == (my_wc_t)w_many)
+	{
+	  wildstr+= scan;
+	  continue;
+	} 
+	
+	if (w_wc == (my_wc_t)w_one)
+	{
+	  wildstr+= scan;
+          if ((scan= mb_wc(cs, &s_wc, (const uchar*)str,
+                           (const uchar*)str_end)) <=0)
+            return 1;
+          str+= scan;
+	  continue;
+	}
+	break;					/* Not a wild character */
+      }
+      
+      if (wildstr == wildend)
+	return 0;				/* Ok if w_many is last */
+      
+      if (str == str_end)
+	return -1;
+      
+      if ((scan= mb_wc(cs, &w_wc, (const uchar*)wildstr,
+                       (const uchar*)wildend)) <=0)
+        return 1;
+      
+      if (w_wc ==  (my_wc_t)escape)
+      {
+        wildstr+= scan;
+        if ((scan= mb_wc(cs, &w_wc, (const uchar*)wildstr,
+                         (const uchar*)wildend)) <=0)
+          return 1;
+      }
+      
+      while (1)
+      {
+        /* Skip until the first character from wildstr is found */
+        while (str != str_end)
+        {
+          if ((scan= mb_wc(cs, &s_wc, (const uchar*)str,
+                           (const uchar*)str_end)) <=0)
+            return 1;
+          if (weights)
+          {
+            plane=(s_wc>>8) & 0xFF;
+            s_wc = weights[plane] ? weights[plane][s_wc & 0xFF].sort : s_wc;
+            plane=(w_wc>>8) & 0xFF;
+            w_wc = weights[plane] ? weights[plane][w_wc & 0xFF].sort : w_wc;
+          }
+          
+          if (s_wc == w_wc)
+            break;
+          str+= scan;
+        }
+        if (str == str_end)
+          return -1;
+        
+        result= my_wildcmp_unicode(cs, str, str_end, wildstr, wildend,
+                                   escape, w_one, w_many,
+                                   weights);
+        
+        if (result <= 0)
+          return result;
+        
+        str+= scan;
+      } 
+    }
+  }
+  return (str != str_end ? 1 : 0);
+}
+
 #endif
 
 
@@ -1992,6 +2147,17 @@ static int my_strcasecmp_utf8(CHARSET_INFO *cs, const char *s, const char *t)
   return  my_strncasecmp_utf8(cs, s, t, len);
 }
 
+static
+int my_wildcmp_utf8(CHARSET_INFO *cs,
+		    const char *str,const char *str_end,
+		    const char *wildstr,const char *wildend,
+		    int escape, int w_one, int w_many)
+{
+  return my_wildcmp_unicode(cs,str,str_end,wildstr,wildend,
+                            escape,w_one,w_many,uni_plane); 
+}
+
+
 static int my_strnxfrm_utf8(CHARSET_INFO *cs,
                             uchar *dst, uint dstlen,
                             const uchar *src, uint srclen)
@@ -2060,7 +2226,7 @@ static MY_COLLATION_HANDLER my_collation_ci_handler =
     my_strnncollsp_utf8,
     my_strnxfrm_utf8,
     my_like_range_mb,
-    my_wildcmp_mb,
+    my_wildcmp_utf8,
     my_strcasecmp_utf8,
     my_instr_mb,
     my_hash_sort_utf8
author	unknown <bar@mysql.com>	2004-10-18 15:23:24 +0500
committer	unknown <bar@mysql.com>	2004-10-18 15:23:24 +0500
commit	5267ec8a5ac0ce18857ace639382e06631e0a62f (patch)
tree	8f676b681ea799bf7fbdb31b3bfb29430f1640d6 /strings
parent	2310f00af2bedf78a98836ab953f7dfc71654d3d (diff)
download	mariadb-git-5267ec8a5ac0ce18857ace639382e06631e0a62f.tar.gz