1 files changed, 192 insertions, 7 deletions
diff --git a/strings/ctype-mb.c b/strings/ctype-mb.c
index 76af3ef2cdf..8d864dfaad5 100644
--- a/strings/ctype-mb.c
+++ b/strings/ctype-mb.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 2000 MySQL AB
+/* Copyright (c) 2000, 2011, Oracle and/or its affiliates. All rights reserved.
 
    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
@@ -636,7 +636,7 @@ static void pad_max_char(CHARSET_INFO *cs, char *str, char *end)
   DBUG_ASSERT(buflen > 0);
   do
   {
-    if ((str + buflen) < end)
+    if ((str + buflen) <= end)
     {
       /* Enough space for the characer */
       memcpy(str, buf, buflen);
@@ -683,7 +683,6 @@ my_bool my_like_range_mb(CHARSET_INFO *cs,
   char *min_end= min_str + res_length;
   char *max_end= max_str + res_length;
   size_t maxcharlen= res_length / cs->mbmaxlen;
-  my_bool have_contractions= my_uca_have_contractions(cs);
 
   for (; ptr != end && min_str != min_end && maxcharlen ; maxcharlen--)
   {
@@ -751,8 +750,8 @@ fill_max_and_min:
         'ab\min\min\min\min' and 'ab\max\max\max\max'.
 
       */
-      if (have_contractions && ptr + 1 < end &&
-          my_uca_can_be_contraction_head(cs, (uchar) *ptr))
+      if (contraction_flags && ptr + 1 < end &&
+          contraction_flags[(uchar) *ptr])
       {
         /* Ptr[0] is a contraction head. */
         
@@ -774,8 +773,8 @@ fill_max_and_min:
           is not a contraction, then we put only ptr[0],
           and continue with ptr[1] on the next loop.
         */
-        if (my_uca_can_be_contraction_tail(cs, (uchar) ptr[1]) &&
-            my_uca_contraction2_weight(cs, (uchar) ptr[0], (uchar) ptr[1]))
+        if (contraction_flags[(uchar) ptr[1]] &&
+            cs->contractions[(*ptr-0x40)*0x40 + ptr[1] - 0x40])
         {
           /* Contraction found */
           if (maxcharlen == 1 || min_str + 1 >= min_end)
@@ -801,6 +800,192 @@ fill_max_and_min:
 }
 
 
+/**
+   Calculate min_str and max_str that ranges a LIKE string.
+   Generic function, currently used for ucs2, utf16, utf32,
+   but should be suitable for any other character sets with
+   cs->min_sort_char and cs->max_sort_char represented in
+   Unicode code points.
+
+   @param cs           Character set and collation pointer
+   @param ptr          Pointer to LIKE pattern.
+   @param ptr_length   Length of LIKE pattern.
+   @param escape       Escape character pattern,  typically '\'.
+   @param w_one        'One character' pattern,   typically '_'.
+   @param w_many       'Many characters' pattern, typically '%'.
+   @param res_length   Length of min_str and max_str.
+
+   @param[out] min_str Smallest string that ranges LIKE.
+   @param[out] max_str Largest string that ranges LIKE.
+   @param[out] min_len Length of min_str
+   @param[out] max_len Length of max_str
+
+   @return Optimization status.
+   @retval FALSE if LIKE pattern can be optimized
+   @rerval TRUE if LIKE can't be optimized.
+*/
+my_bool
+my_like_range_generic(CHARSET_INFO *cs,
+                      const char *ptr, size_t ptr_length,
+                      pbool escape, pbool w_one, pbool w_many,
+                      size_t res_length,
+                      char *min_str,char *max_str,
+                      size_t *min_length,size_t *max_length)
+{
+  const char *end= ptr + ptr_length;
+  const char *min_org= min_str;
+  const char *max_org= max_str;
+  char *min_end= min_str + res_length;
+  char *max_end= max_str + res_length;
+  size_t charlen= res_length / cs->mbmaxlen;
+  size_t res_length_diff;
+  my_bool have_contractions= my_cs_have_contractions(cs);
+
+  for ( ; charlen > 0; charlen--)
+  {
+    my_wc_t wc, wc2;
+    int res;
+    if ((res= cs->cset->mb_wc(cs, &wc, (uchar*) ptr, (uchar*) end)) <= 0)
+    {
+      if (res == MY_CS_ILSEQ) /* Bad sequence */
+        return TRUE; /* min_length and max_length are not important */
+      break; /* End of the string */
+    }
+    ptr+= res;
+
+    if (wc == (my_wc_t) escape)
+    {
+      if ((res= cs->cset->mb_wc(cs, &wc, (uchar*) ptr, (uchar*) end)) <= 0)
+      {
+        if (res == MY_CS_ILSEQ)
+          return TRUE; /* min_length and max_length are not important */
+        /*
+           End of the string: Escape is the last character.
+           Put escape as a normal character.
+           We'll will leave the loop on the next iteration.
+        */
+      }
+      else
+        ptr+= res;
+
+      /* Put escape character to min_str and max_str  */
+      if ((res= cs->cset->wc_mb(cs, wc,
+                                (uchar*) min_str, (uchar*) min_end)) <= 0)
+        goto pad_set_lengths; /* No space */
+      min_str+= res;
+
+      if ((res= cs->cset->wc_mb(cs, wc,
+                                (uchar*) max_str, (uchar*) max_end)) <= 0)
+        goto pad_set_lengths; /* No space */
+      max_str+= res;
+      continue;
+    }
+    else if (wc == (my_wc_t) w_one)
+    {
+      if ((res= cs->cset->wc_mb(cs, cs->min_sort_char,
+                                (uchar*) min_str, (uchar*) min_end)) <= 0)
+        goto pad_set_lengths;
+      min_str+= res;
+
+      if ((res= cs->cset->wc_mb(cs, cs->max_sort_char,
+                                (uchar*) max_str, (uchar*) max_end)) <= 0)
+        goto pad_set_lengths;
+      max_str+= res;
+      continue;
+    }
+    else if (wc == (my_wc_t) w_many)
+    {
+      /*
+        Calculate length of keys:
+        a\min\min... is the smallest possible string
+        a\max\max... is the biggest possible string
+      */
+      *min_length= ((cs->state & MY_CS_BINSORT) ?
+                    (size_t) (min_str - min_org) :
+                    res_length);
+      *max_length= res_length;
+      goto pad_min_max;
+    }
+
+    if (have_contractions &&
+        my_cs_can_be_contraction_head(cs, wc) &&
+        (res= cs->cset->mb_wc(cs, &wc2, (uchar*) ptr, (uchar*) end)) > 0)
+    {
+      uint16 *weight;
+      if ((wc2 == (my_wc_t) w_one || wc2 == (my_wc_t) w_many))
+      {
+        /* Contraction head followed by a wildcard */
+        *min_length= *max_length= res_length;
+        goto pad_min_max;
+      }
+
+      if (my_cs_can_be_contraction_tail(cs, wc2) &&
+          (weight= my_cs_contraction2_weight(cs, wc, wc2)) && weight[0])
+      {
+        /* Contraction found */
+        if (charlen == 1)
+        {
+          /* contraction does not fit to result */
+          *min_length= *max_length= res_length;
+          goto pad_min_max;
+        }
+
+        ptr+= res;
+        charlen--;
+
+        /* Put contraction head */
+        if ((res= cs->cset->wc_mb(cs, wc,
+                                  (uchar*) min_str, (uchar*) min_end)) <= 0)
+          goto pad_set_lengths;
+        min_str+= res;
+
+        if ((res= cs->cset->wc_mb(cs, wc,
+                                  (uchar*) max_str, (uchar*) max_end)) <= 0)
+          goto pad_set_lengths;
+        max_str+= res;
+        wc= wc2; /* Prepare to put contraction tail */
+      }
+    }
+
+    /* Normal character, or contraction tail */
+    if ((res= cs->cset->wc_mb(cs, wc,
+                              (uchar*) min_str, (uchar*) min_end)) <= 0)
+      goto pad_set_lengths;
+    min_str+= res;
+    if ((res= cs->cset->wc_mb(cs, wc,
+                              (uchar*) max_str, (uchar*) max_end)) <= 0)
+      goto pad_set_lengths;
+    max_str+= res;
+  }
+
+pad_set_lengths:
+  *min_length= (size_t) (min_str - min_org);
+  *max_length= (size_t) (max_str - max_org);
+
+pad_min_max:
+  /*
+    Fill up max_str and min_str to res_length.
+    fill() cannot set incomplete characters and
+    requires that "length" argument is divisible to mbminlen.
+    Make sure to call fill() with proper "length" argument.
+  */
+  res_length_diff= res_length % cs->mbminlen;
+  cs->cset->fill(cs, min_str, min_end - min_str - res_length_diff,
+                 cs->min_sort_char);
+  cs->cset->fill(cs, max_str, max_end - max_str - res_length_diff,
+                 cs->max_sort_char);
+
+  /* In case of incomplete characters set the remainder to 0x00's */
+  if (res_length_diff)
+  {
+    /* Example: odd res_length for ucs2 */
+    memset(min_end - res_length_diff, 0, res_length_diff);
+    memset(max_end - res_length_diff, 0, res_length_diff);
+  }
+  return FALSE;
+}
+
+
 int
 my_wildcmp_mb_bin(CHARSET_INFO *cs,
                   const char *str,const char *str_end,