MDEV-17064 LIKE function has error behavior on the fields in which the collation is xxx_unicode_xx

Synchronizing sources in: - my_wildcmp_uca_impl() handling utf8_unicode_ci - my_wildcmp_unicode_impl() handling utf8_general_ci The latter has already had a fix for a similar MySQL bug in utf8_general_ci: Bug#11754 SET NAMES utf8 followed by SELECT "A\\" LIKE "A\\" returns 0 So fix is now propagated to utf8_unicode_ci.
author: Alexander Barkov <bar@mariadb.com> 2018-10-15 13:22:18 +0400
committer: Alexander Barkov <bar@mariadb.com> 2018-10-15 13:22:18 +0400
commit: 34f8a4071e2a77e3263f0fbf2adf1c9e3f8464b1 (patch)
tree: 1cf8f34a2bdac4e293da332f3e0aba863238b51e /strings
parent: ae3fe14c17b4982fd576f65dccc5aba2e630cb31 (diff)
download: mariadb-git-34f8a4071e2a77e3263f0fbf2adf1c9e3f8464b1.tar.gz
2 files changed, 80 insertions, 82 deletions
diff --git a/strings/ctype-uca.c b/strings/ctype-uca.c
index 2351ee9d932..4ccf8170c3e 100644
--- a/strings/ctype-uca.c
+++ b/strings/ctype-uca.c
@@ -21069,11 +21069,11 @@ int my_wildcmp_uca_impl(CHARSET_INFO *cs,
                         const char *wildstr,const char *wildend,
                         int escape, int w_one, int w_many, int recurse_level)
 {
-  int result= -1;			/* Not found, using wildcards */
+  int result= -1;                             /* Not found, using wildcards */
   my_wc_t s_wc, w_wc;
   int scan;
   my_charset_conv_mb_wc mb_wc= cs->cset->mb_wc;
-  
+
   if (my_string_stack_guard && my_string_stack_guard(recurse_level))
     return 1;
   while (wildstr != wildend)
@@ -21082,119 +21082,121 @@ int my_wildcmp_uca_impl(CHARSET_INFO *cs,
     {
       my_bool escaped= 0;
       if ((scan= mb_wc(cs, &w_wc, (const uchar*)wildstr,
-		       (const uchar*)wildend)) <= 0)
-	return 1;
+                       (const uchar*)wildend)) <= 0)
+        return 1;
 
-      if (w_wc == (my_wc_t)w_many)
+      if (w_wc == (my_wc_t) w_many)
       {
-        result= 1;				/* Found an anchor char */
+        result= 1;                                /* Found an anchor char */
         break;
       }
 
       wildstr+= scan;
-      if (w_wc ==  (my_wc_t)escape)
+      if (w_wc ==  (my_wc_t) escape && wildstr < wildend)
       {
         if ((scan= mb_wc(cs, &w_wc, (const uchar*)wildstr,
-			(const uchar*)wildend)) <= 0)
+                         (const uchar*)wildend)) <= 0)
           return 1;
         wildstr+= scan;
         escaped= 1;
       }
-      
+
       if ((scan= mb_wc(cs, &s_wc, (const uchar*)str,
-      		       (const uchar*)str_end)) <= 0)
+                       (const uchar*)str_end)) <= 0)
         return 1;
       str+= scan;
-      
-      if (!escaped && w_wc == (my_wc_t)w_one)
+
+      if (!escaped && w_wc == (my_wc_t) w_one)
       {
-        result= 1;				/* Found an anchor char */
+        result= 1;                                /* Found an anchor char */
       }
       else
       {
         if (my_uca_charcmp(cs,s_wc,w_wc))
-          return 1;
+          return 1;                               /* No match */
       }
       if (wildstr == wildend)
-	return (str != str_end);		/* Match if both are at end */
+        return (str != str_end);                  /* Match if both are at end */
     }
-    
-    
-    if (w_wc == (my_wc_t)w_many)
-    {						/* Found w_many */
-    
+
+    if (w_wc == (my_wc_t) w_many)
+    {                                             /* Found w_many */
       /* Remove any '%' and '_' from the wild search string */
       for ( ; wildstr != wildend ; )
       {
         if ((scan= mb_wc(cs, &w_wc, (const uchar*)wildstr,
-			 (const uchar*)wildend)) <= 0)
+                         (const uchar*)wildend)) <= 0)
           return 1;
-        
-	if (w_wc == (my_wc_t)w_many)
-	{
-	  wildstr+= scan;
-	  continue;
-	} 
-	
-	if (w_wc == (my_wc_t)w_one)
-	{
-	  wildstr+= scan;
-	  if ((scan= mb_wc(cs, &s_wc, (const uchar*)str,
-			   (const uchar*)str_end)) <= 0)
+
+        if (w_wc == (my_wc_t) w_many)
+        {
+          wildstr+= scan;
+          continue;
+        }
+
+        if (w_wc == (my_wc_t) w_one)
+        {
+          wildstr+= scan;
+          if ((scan= mb_wc(cs, &s_wc, (const uchar*)str,
+                           (const uchar*)str_end)) <= 0)
             return 1;
           str+= scan;
-	  continue;
-	}
-	break;					/* Not a wild character */
+          continue;
+        }
+        break;                                        /* Not a wild character */
       }
-      
+
       if (wildstr == wildend)
-	return 0;				/* Ok if w_many is last */
-      
+        return 0;                                /* Ok if w_many is last */
+
       if (str == str_end)
-	return -1;
-      
+        return -1;
+
       if ((scan= mb_wc(cs, &w_wc, (const uchar*)wildstr,
-		       (const uchar*)wildend)) <= 0)
+                       (const uchar*)wildend)) <= 0)
         return 1;
-      
-      if (w_wc ==  (my_wc_t)escape)
+      wildstr+= scan;
+
+      if (w_wc ==  (my_wc_t) escape)
       {
-        wildstr+= scan;
-        if ((scan= mb_wc(cs, &w_wc, (const uchar*)wildstr,
-			 (const uchar*)wildend)) <= 0)
-          return 1;
+        if (wildstr < wildend)
+        {
+          if ((scan= mb_wc(cs, &w_wc, (const uchar*)wildstr,
+                           (const uchar*)wildend)) <= 0)
+            return 1;
+          wildstr+= scan;
+        }
       }
-      
+
       while (1)
       {
         /* Skip until the first character from wildstr is found */
         while (str != str_end)
         {
           if ((scan= mb_wc(cs, &s_wc, (const uchar*)str,
-			   (const uchar*)str_end)) <= 0)
+                           (const uchar*)str_end)) <= 0)
             return 1;
-          
+
           if (!my_uca_charcmp(cs,s_wc,w_wc))
             break;
           str+= scan;
         }
         if (str == str_end)
           return -1;
-        
+
+        str+= scan;
         result= my_wildcmp_uca_impl(cs, str, str_end, wildstr, wildend,
-                                    escape, w_one, w_many, recurse_level+1);
-        
+                                    escape, w_one, w_many,
+                                    recurse_level + 1);
         if (result <= 0)
           return result;
-        
-        str+= scan;
-      } 
+      }
     }
   }
   return (str != str_end ? 1 : 0);
 }
 
+
 int my_wildcmp_uca(CHARSET_INFO *cs,
                    const char *str,const char *str_end,
                    const char *wildstr,const char *wildend,
diff --git a/strings/ctype-utf8.c b/strings/ctype-utf8.c
index 7d7e61ce511..3cb832c5414 100644
--- a/strings/ctype-utf8.c
+++ b/strings/ctype-utf8.c
@@ -4400,9 +4400,7 @@ int my_wildcmp_unicode_impl(CHARSET_INFO *cs,
   int result= -1;                             /* Not found, using wildcards */
   my_wc_t s_wc, w_wc;
   int scan;
-  int (*mb_wc)(CHARSET_INFO *, my_wc_t *,
-               const uchar *, const uchar *);
-  mb_wc= cs->cset->mb_wc;
+  my_charset_conv_mb_wc mb_wc= cs->cset->mb_wc;
 
   if (my_string_stack_guard && my_string_stack_guard(recurse_level))
     return 1;
@@ -4430,12 +4428,12 @@ int my_wildcmp_unicode_impl(CHARSET_INFO *cs,
         wildstr+= scan;
         escaped= 1;
       }
-      
+
       if ((scan= mb_wc(cs, &s_wc, (const uchar*)str,
                        (const uchar*)str_end)) <= 0)
         return 1;
       str+= scan;
-      
+
       if (!escaped && w_wc == (my_wc_t) w_one)
       {
         result= 1;                                /* Found an anchor char */
@@ -4453,86 +4451,84 @@ int my_wildcmp_unicode_impl(CHARSET_INFO *cs,
       if (wildstr == wildend)
         return (str != str_end);                  /* Match if both are at end */
     }
-    
-    
+
     if (w_wc == (my_wc_t) w_many)
     {                                             /* Found w_many */
-    
       /* Remove any '%' and '_' from the wild search string */
       for ( ; wildstr != wildend ; )
       {
         if ((scan= mb_wc(cs, &w_wc, (const uchar*)wildstr,
                          (const uchar*)wildend)) <= 0)
           return 1;
-        
-        if (w_wc == (my_wc_t)w_many)
+
+        if (w_wc == (my_wc_t) w_many)
         {
           wildstr+= scan;
           continue;
         } 
-        
-        if (w_wc == (my_wc_t)w_one)
+
+        if (w_wc == (my_wc_t) w_one)
         {
           wildstr+= scan;
           if ((scan= mb_wc(cs, &s_wc, (const uchar*)str,
-                           (const uchar*)str_end)) <=0)
+                           (const uchar*)str_end)) <= 0)
             return 1;
           str+= scan;
           continue;
         }
         break;                                        /* Not a wild character */
       }
-      
+
       if (wildstr == wildend)
         return 0;                                /* Ok if w_many is last */
-      
+
       if (str == str_end)
         return -1;
-      
+
       if ((scan= mb_wc(cs, &w_wc, (const uchar*)wildstr,
-                       (const uchar*)wildend)) <=0)
+                       (const uchar*)wildend)) <= 0)
         return 1;
       wildstr+= scan;
-      
-      if (w_wc ==  (my_wc_t)escape)
+
+      if (w_wc ==  (my_wc_t) escape)
       {
         if (wildstr < wildend)
         {
           if ((scan= mb_wc(cs, &w_wc, (const uchar*)wildstr,
-                           (const uchar*)wildend)) <=0)
+                           (const uchar*)wildend)) <= 0)
             return 1;
           wildstr+= scan;
         }
       }
-      
+
       while (1)
       {
         /* Skip until the first character from wildstr is found */
         while (str != str_end)
         {
           if ((scan= mb_wc(cs, &s_wc, (const uchar*)str,
-                           (const uchar*)str_end)) <=0)
+                           (const uchar*)str_end)) <= 0)
             return 1;
           if (weights)
           {
             my_tosort_unicode(weights, &s_wc, cs->state);
             my_tosort_unicode(weights, &w_wc, cs->state);
           }
-          
+
           if (s_wc == w_wc)
             break;
           str+= scan;
         }
         if (str == str_end)
           return -1;
-        
+
         str+= scan;
         result= my_wildcmp_unicode_impl(cs, str, str_end, wildstr, wildend,
                                         escape, w_one, w_many,
                                         weights, recurse_level + 1);
         if (result <= 0)
           return result;
-      } 
+      }
     }
   }
   return (str != str_end ? 1 : 0);
author	Alexander Barkov <bar@mariadb.com>	2018-10-15 13:22:18 +0400
committer	Alexander Barkov <bar@mariadb.com>	2018-10-15 13:22:18 +0400
commit	34f8a4071e2a77e3263f0fbf2adf1c9e3f8464b1 (patch)
tree	1cf8f34a2bdac4e293da332f3e0aba863238b51e /strings
parent	ae3fe14c17b4982fd576f65dccc5aba2e630cb31 (diff)
download	mariadb-git-34f8a4071e2a77e3263f0fbf2adf1c9e3f8464b1.tar.gz