9 files changed, 221 insertions, 39 deletions
diff --git a/strings/ctype-bin.c b/strings/ctype-bin.c
index e9de0ade557..f9d29ca1739 100644
--- a/strings/ctype-bin.c
+++ b/strings/ctype-bin.c
@@ -271,6 +271,29 @@ static int my_wc_mb_bin(CHARSET_INFO *cs __attribute__((unused)),
 }
 
 
+void my_hash_sort_8bit_bin(CHARSET_INFO *cs __attribute__((unused)),
+		      const uchar *key, uint len,ulong *nr1, ulong *nr2)
+{
+  const uchar *pos = key;
+  
+  key+= len;
+  
+  /*
+     Remove trailing spaces. We have to do this to be able to compare
+    'A ' and 'A' as identical
+  */
+  while (key > pos && key[-1] == ' ')
+    key--;
+
+  for (; pos < (uchar*) key ; pos++)
+  {
+    nr1[0]^=(ulong) ((((uint) nr1[0] & 63)+nr2[0]) * 
+	     ((uint)*pos)) + (nr1[0] << 8);
+    nr2[0]+=3;
+  }
+}
+
+
 void my_hash_sort_bin(CHARSET_INFO *cs __attribute__((unused)),
 		      const uchar *key, uint len,ulong *nr1, ulong *nr2)
 {
@@ -471,7 +494,7 @@ MY_COLLATION_HANDLER my_collation_8bit_bin_handler =
     my_wildcmp_bin,
     my_strcasecmp_bin,
     my_instr_bin,
-    my_hash_sort_bin,
+    my_hash_sort_8bit_bin,
     my_propagate_simple
 };
 
diff --git a/strings/ctype-cp932.c b/strings/ctype-cp932.c
index 0ece0ef1270..42325648037 100644
--- a/strings/ctype-cp932.c
+++ b/strings/ctype-cp932.c
@@ -250,9 +250,16 @@ static int my_strnncollsp_cp932(CHARSET_INFO *cs __attribute__((unused)),
   const uchar *a_end= a + a_length;
   const uchar *b_end= b + b_length;
   int res= my_strnncoll_cp932_internal(cs, &a, a_length, &b, b_length);
+
+#ifndef VARCHAR_WITH_DIFF_ENDSPACE_ARE_DIFFERENT_FOR_UNIQUE
+  diff_if_only_endspace_difference= 0;
+#endif
+
   if (!res && (a != a_end || b != b_end))
   {
-    int swap= 0;
+    int swap= 1;
+    if (diff_if_only_endspace_difference)
+      res= 1;                                   /* Assume 'a' is bigger */
     /*
       Check the next not space character of the longer key. If it's < ' ',
       then it's smaller than the other key.
@@ -263,11 +270,12 @@ static int my_strnncollsp_cp932(CHARSET_INFO *cs __attribute__((unused)),
       a_end= b_end;
       a= b;
       swap= -1;				/* swap sign of result */
+      res= -res;
     }
     for (; a < a_end ; a++)
     {
-      if (*a != ' ')
-	return ((int) *a - (int) ' ') ^ swap;
+      if (*a != (uchar) ' ')
+	return (*a < (uchar) ' ') ? -swap : swap;
     }
   }
   return res;
diff --git a/strings/ctype-extra.c b/strings/ctype-extra.c
index 2a7fcbd383e..38aa3a05adf 100644
--- a/strings/ctype-extra.c
+++ b/strings/ctype-extra.c
@@ -923,7 +923,7 @@ uint16 to_uni_ascii_general_ci[] = {
 0x0060,0x0061,0x0062,0x0063,0x0064,0x0065,0x0066,0x0067,
 0x0068,0x0069,0x006A,0x006B,0x006C,0x006D,0x006E,0x006F,
 0x0070,0x0071,0x0072,0x0073,0x0074,0x0075,0x0076,0x0077,
-0x0078,0x0079,0x007A,0x007B,0x007C,0x007D,0x007E,0x0000,
+0x0078,0x0079,0x007A,0x007B,0x007C,0x007D,0x007E,0x007F,
 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,
 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,
 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,
@@ -4604,7 +4604,7 @@ uint16 to_uni_ascii_bin[] = {
 0x0060,0x0061,0x0062,0x0063,0x0064,0x0065,0x0066,0x0067,
 0x0068,0x0069,0x006A,0x006B,0x006C,0x006D,0x006E,0x006F,
 0x0070,0x0071,0x0072,0x0073,0x0074,0x0075,0x0076,0x0077,
-0x0078,0x0079,0x007A,0x007B,0x007C,0x007D,0x007E,0x0000,
+0x0078,0x0079,0x007A,0x007B,0x007C,0x007D,0x007E,0x007F,
 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,
 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,
 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,
diff --git a/strings/ctype-mb.c b/strings/ctype-mb.c
index 40cec669766..b370714e464 100644
--- a/strings/ctype-mb.c
+++ b/strings/ctype-mb.c
@@ -467,6 +467,13 @@ static void my_hash_sort_mb_bin(CHARSET_INFO *cs __attribute__((unused)),
   
   key+= len;
   
+  /*
+     Remove trailing spaces. We have to do this to be able to compare
+    'A ' and 'A' as identical
+  */
+  while (key > pos && key[-1] == ' ')
+    key--;
+  
   for (; pos < (uchar*) key ; pos++)
   {
     nr1[0]^=(ulong) ((((uint) nr1[0] & 63)+nr2[0]) * 
@@ -556,6 +563,8 @@ my_bool my_like_range_mb(CHARSET_INFO *cs,
   char *min_end= min_str + res_length;
   char *max_end= max_str + res_length;
   uint maxcharlen= res_length / cs->mbmaxlen;
+  const char *contraction_flags= cs->contractions ? 
+              ((const char*) cs->contractions) + 0x40*0x40 : NULL;
 
   for (; ptr != end && min_str != min_end && maxcharlen ; maxcharlen--)
   {
@@ -564,6 +573,7 @@ my_bool my_like_range_mb(CHARSET_INFO *cs,
       ptr++;                                    /* Skip escape */
     else if (*ptr == w_one || *ptr == w_many)   /* '_' and '%' in SQL */
     {      
+fill_max_and_min:
       /*
         Calculate length of keys:
         'a\0\0... is the smallest possible string when we have space expand
@@ -595,8 +605,74 @@ my_bool my_like_range_mb(CHARSET_INFO *cs,
        *min_str++= *max_str++= *ptr++;
     }
     else
-       *min_str++= *max_str++= *ptr++;    
+    {
+      /*
+        Special case for collations with contractions.
+        For example, in Chezh, 'ch' is a separate letter
+        which is sorted between 'h' and 'i'.
+        If the pattern 'abc%', 'c' at the end can mean:
+        - letter 'c' itself,
+        - beginning of the contraction 'ch'.
+
+        If we simply return this LIKE range:
+
+         'abc\min\min\min' and 'abc\max\max\max'
 
+        then this query: SELECT * FROM t1 WHERE a LIKE 'abc%'
+        will only find values starting from 'abc[^h]',
+        but won't find values starting from 'abch'.
+
+        We must ignore contraction heads followed by w_one or w_many.
+        ('Contraction head' means any letter which can be the first
+        letter in a contraction)
+
+        For example, for Czech 'abc%', we will return LIKE range,
+        which is equal to LIKE range for 'ab%':
+
+        'ab\min\min\min\min' and 'ab\max\max\max\max'.
+
+      */
+      if (contraction_flags && ptr + 1 < end &&
+          contraction_flags[(uchar) *ptr])
+      {
+        /* Ptr[0] is a contraction head. */
+        
+        if (ptr[1] == w_one || ptr[1] == w_many)
+        {
+          /* Contraction head followed by a wildcard, quit. */
+          goto fill_max_and_min;
+        }
+        
+        /*
+          Some letters can be both contraction heads and contraction tails.
+          For example, in Danish 'aa' is a separate single letter which
+          is sorted after 'z'. So 'a' can be both head and tail.
+          
+          If ptr[0]+ptr[1] is a contraction,
+          then put both letters together.
+          
+          If ptr[1] can be a contraction part, but ptr[0]+ptr[1]
+          is not a contraction, then we put only ptr[0],
+          and continue with ptr[1] on the next loop.
+        */
+        if (contraction_flags[(uchar) ptr[1]] &&
+            cs->contractions[(*ptr-0x40)*0x40 + ptr[1] - 0x40])
+        {
+          /* Contraction found */
+          if (maxcharlen == 1 || min_str + 1 >= min_end)
+          {
+            /* Both contraction parts don't fit, quit */
+            goto fill_max_and_min;
+          }
+          
+          /* Put contraction head */
+          *min_str++= *max_str++= *ptr++;
+          maxcharlen--;
+        }
+      }
+      /* Put contraction tail, or a single character */
+      *min_str++= *max_str++= *ptr++;    
+    }
   }
 
   *min_length= *max_length = (uint) (min_str - min_org);
diff --git a/strings/ctype-simple.c b/strings/ctype-simple.c
index e57204f8d33..8b1b0d6790d 100644
--- a/strings/ctype-simple.c
+++ b/strings/ctype-simple.c
@@ -179,8 +179,8 @@ int my_strnncollsp_simple(CHARSET_INFO * cs, const uchar *a, uint a_length,
     }
     for (end= a + a_length-length; a < end ; a++)
     {
-      if (*a != ' ')
-	return (*a < ' ') ? -swap : swap;
+      if (map[*a] != ' ')
+	return (map[*a] < ' ') ? -swap : swap;
     }
   }
   return res;
diff --git a/strings/ctype-uca.c b/strings/ctype-uca.c
index 4dbda0b9239..81fb9ee1970 100644
--- a/strings/ctype-uca.c
+++ b/strings/ctype-uca.c
@@ -7937,10 +7937,16 @@ static my_bool create_tailoring(CHARSET_INFO *cs, void *(*alloc)(uint))
   /* Now process contractions */
   if (ncontractions)
   {
-    uint size= 0x40*0x40*sizeof(uint16); /* 8K, for basic latin letter only */
+    /*
+      8K for weights for basic latin letter pairs,
+      plus 256 bytes for "is contraction part" flags.
+    */
+    uint size= 0x40*0x40*sizeof(uint16) + 256;
+    char *contraction_flags;
     if (!(cs->contractions= (uint16*) (*alloc)(size)))
         return 1;
     bzero((void*)cs->contractions, size);
+    contraction_flags= ((char*) cs->contractions) + 0x40*0x40;
     for (i=0; i < rc; i++)
     {
       if (rule[i].curr[1])
@@ -7966,6 +7972,9 @@ static my_bool create_tailoring(CHARSET_INFO *cs, void *(*alloc)(uint))
         
         /* Copy base weight applying primary difference */
         cs->contractions[offsc]= offsb[0] + rule[i].diff[0];
+        /* Mark both letters as "is contraction part */
+        contraction_flags[rule[i].curr[0]]= 1;
+        contraction_flags[rule[i].curr[1]]= 1;
       }
     }
   }
diff --git a/strings/ctype-ucs2.c b/strings/ctype-ucs2.c
index 6b1ba3c1ef6..b5353c55e4c 100644
--- a/strings/ctype-ucs2.c
+++ b/strings/ctype-ucs2.c
@@ -1484,7 +1484,10 @@ void my_hash_sort_ucs2_bin(CHARSET_INFO *cs __attribute__((unused)),
   const uchar *pos = key;
   
   key+= len;
-  
+
+  while (key > pos+1 && key[-1] == ' ' && key[-2] == '\0')
+    key-= 2;
+
   for (; pos < (uchar*) key ; pos++)
   {
     nr1[0]^=(ulong) ((((uint) nr1[0] & 63)+nr2[0]) * 
@@ -1521,6 +1524,8 @@ my_bool my_like_range_ucs2(CHARSET_INFO *cs,
   char *min_org=min_str;
   char *min_end=min_str+res_length;
   uint charlen= res_length / cs->mbmaxlen;
+  const char *contraction_flags= cs->contractions ?
+             ((const char*) cs->contractions) + 0x40*0x40 : NULL;
   
   for ( ; ptr + 1 < end && min_str + 1 < min_end && charlen > 0
         ; ptr+=2, charlen--)
@@ -1542,6 +1547,7 @@ my_bool my_like_range_ucs2(CHARSET_INFO *cs,
     }
     if (ptr[0] == '\0' && ptr[1] == w_many)	/* '%' in SQL */
     {
+fill_max_and_min:
       /*
         Calculate length of keys:
         'a\0\0... is the smallest possible string when we have space expand
@@ -1558,6 +1564,38 @@ my_bool my_like_range_ucs2(CHARSET_INFO *cs,
       } while (min_str + 1 < min_end);
       return 0;
     }
+
+    if (contraction_flags && ptr + 3 < end &&
+        ptr[0] == '\0' && contraction_flags[(uchar) ptr[1]])
+    {
+      /* Contraction head found */
+      if (ptr[2] == '\0' && (ptr[3] == w_one || ptr[3] == w_many))
+      {
+        /* Contraction head followed by a wildcard, quit */
+        goto fill_max_and_min;
+      }
+      
+      /*
+        Check if the second letter can be contraction part,
+        and if two letters really produce a contraction.
+      */
+      if (ptr[2] == '\0' && contraction_flags[(uchar) ptr[3]] &&
+          cs->contractions[(ptr[1]-0x40)*0x40 + ptr[3] - 0x40])
+      {
+        /* Contraction found */
+        if (charlen == 1 || min_str + 2 >= min_end)
+        {
+          /* Full contraction doesn't fit, quit */
+          goto fill_max_and_min;
+        }
+        
+        /* Put contraction head */
+        *min_str++= *max_str++= *ptr++;
+        *min_str++= *max_str++= *ptr++;
+        charlen--;
+      }
+    }
+    /* Put contraction tail, or a single character */
     *min_str++= *max_str++ = ptr[0];
     *min_str++= *max_str++ = ptr[1];
   }
diff --git a/strings/ctype-utf8.c b/strings/ctype-utf8.c
index 387ce16a43d..4682868562f 100644
--- a/strings/ctype-utf8.c
+++ b/strings/ctype-utf8.c
@@ -2802,16 +2802,19 @@ static int my_strnncoll_utf8_cs(CHARSET_INFO *cs,
 static int my_strnncollsp_utf8_cs(CHARSET_INFO *cs, 
                                   const uchar *s, uint slen,
                                   const uchar *t, uint tlen,
-                                  my_bool diff_if_only_endspace_difference
-                                  __attribute__((unused)))
+                                  my_bool diff_if_only_endspace_difference)
 {
-  int s_res,t_res;
-  my_wc_t s_wc,t_wc;
-  const uchar *se= s+slen;
-  const uchar *te= t+tlen;
-  int save_diff = 0;
+  int s_res, t_res, res;
+  my_wc_t s_wc, t_wc;
+  const uchar *se= s + slen;
+  const uchar *te= t + tlen;
+  int save_diff= 0;
   MY_UNICASE_INFO **uni_plane= cs->caseinfo;
-  
+
+#ifndef VARCHAR_WITH_DIFF_ENDSPACE_ARE_DIFFERENT_FOR_UNIQUE
+  diff_if_only_endspace_difference= 0;
+#endif
+    
   while ( s < se && t < te )
   {
     int plane;
@@ -2843,16 +2846,20 @@ static int my_strnncollsp_utf8_cs(CHARSET_INFO *cs,
   
   slen= se-s;
   tlen= te-t;
+  res= 0;
   
   if (slen != tlen)
   {
-    int swap= 0;
+    int swap= 1;
+    if (diff_if_only_endspace_difference)
+      res= 1;                                   /* Assume 'a' is bigger */
     if (slen < tlen)
     {
       slen= tlen;
       s= t;
       se= te;
       swap= -1;
+      res= -res;
     }
     /*
       This following loop uses the fact that in UTF-8
@@ -2866,8 +2873,8 @@ static int my_strnncollsp_utf8_cs(CHARSET_INFO *cs,
     */
     for ( ; s < se; s++)
     {
-      if (*s != ' ')
-        return ((int)*s -  (int) ' ') ^ swap;
+      if (*s != (uchar) ' ')
+        return (*s < (uchar) ' ') ? -swap : swap;
     }
   }
   return save_diff;
diff --git a/strings/strtod.c b/strings/strtod.c
index 7196cafb2c9..5fe59d10bd2 100644
--- a/strings/strtod.c
+++ b/strings/strtod.c
@@ -31,13 +31,40 @@
 
 #define MAX_DBL_EXP	308
 #define MAX_RESULT_FOR_MAX_EXP 1.7976931348623157
-static double scaler10[] = {
-  1.0, 1e10, 1e20, 1e30, 1e40, 1e50, 1e60, 1e70, 1e80, 1e90
-};
-static double scaler1[] = {
-  1.0, 10.0, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9
-};
 
+const double log_10[] = {
+  1e000, 1e001, 1e002, 1e003, 1e004, 1e005, 1e006, 1e007, 1e008, 1e009,
+  1e010, 1e011, 1e012, 1e013, 1e014, 1e015, 1e016, 1e017, 1e018, 1e019,
+  1e020, 1e021, 1e022, 1e023, 1e024, 1e025, 1e026, 1e027, 1e028, 1e029,
+  1e030, 1e031, 1e032, 1e033, 1e034, 1e035, 1e036, 1e037, 1e038, 1e039,
+  1e040, 1e041, 1e042, 1e043, 1e044, 1e045, 1e046, 1e047, 1e048, 1e049,
+  1e050, 1e051, 1e052, 1e053, 1e054, 1e055, 1e056, 1e057, 1e058, 1e059,
+  1e060, 1e061, 1e062, 1e063, 1e064, 1e065, 1e066, 1e067, 1e068, 1e069,
+  1e070, 1e071, 1e072, 1e073, 1e074, 1e075, 1e076, 1e077, 1e078, 1e079,
+  1e080, 1e081, 1e082, 1e083, 1e084, 1e085, 1e086, 1e087, 1e088, 1e089,
+  1e090, 1e091, 1e092, 1e093, 1e094, 1e095, 1e096, 1e097, 1e098, 1e099,
+  1e100, 1e101, 1e102, 1e103, 1e104, 1e105, 1e106, 1e107, 1e108, 1e109,
+  1e110, 1e111, 1e112, 1e113, 1e114, 1e115, 1e116, 1e117, 1e118, 1e119,
+  1e120, 1e121, 1e122, 1e123, 1e124, 1e125, 1e126, 1e127, 1e128, 1e129,
+  1e130, 1e131, 1e132, 1e133, 1e134, 1e135, 1e136, 1e137, 1e138, 1e139,
+  1e140, 1e141, 1e142, 1e143, 1e144, 1e145, 1e146, 1e147, 1e148, 1e149,
+  1e150, 1e151, 1e152, 1e153, 1e154, 1e155, 1e156, 1e157, 1e158, 1e159,
+  1e160, 1e161, 1e162, 1e163, 1e164, 1e165, 1e166, 1e167, 1e168, 1e169,
+  1e170, 1e171, 1e172, 1e173, 1e174, 1e175, 1e176, 1e177, 1e178, 1e179,
+  1e180, 1e181, 1e182, 1e183, 1e184, 1e185, 1e186, 1e187, 1e188, 1e189,
+  1e190, 1e191, 1e192, 1e193, 1e194, 1e195, 1e196, 1e197, 1e198, 1e199,
+  1e200, 1e201, 1e202, 1e203, 1e204, 1e205, 1e206, 1e207, 1e208, 1e209,
+  1e210, 1e211, 1e212, 1e213, 1e214, 1e215, 1e216, 1e217, 1e218, 1e219,
+  1e220, 1e221, 1e222, 1e223, 1e224, 1e225, 1e226, 1e227, 1e228, 1e229,
+  1e230, 1e231, 1e232, 1e233, 1e234, 1e235, 1e236, 1e237, 1e238, 1e239,
+  1e240, 1e241, 1e242, 1e243, 1e244, 1e245, 1e246, 1e247, 1e248, 1e249,
+  1e250, 1e251, 1e252, 1e253, 1e254, 1e255, 1e256, 1e257, 1e258, 1e259,
+  1e260, 1e261, 1e262, 1e263, 1e264, 1e265, 1e266, 1e267, 1e268, 1e269,
+  1e270, 1e271, 1e272, 1e273, 1e274, 1e275, 1e276, 1e277, 1e278, 1e279,
+  1e280, 1e281, 1e282, 1e283, 1e284, 1e285, 1e286, 1e287, 1e288, 1e289,
+  1e290, 1e291, 1e292, 1e293, 1e294, 1e295, 1e296, 1e297, 1e298, 1e299,
+  1e300, 1e301, 1e302, 1e303, 1e304, 1e305, 1e306, 1e307, 1e308
+};
 
 /*
   Convert string to double (string doesn't have to be null terminated)
@@ -57,7 +84,7 @@ double my_strtod(const char *str, char **end_ptr, int *error)
 {
   double result= 0.0;
   uint negative= 0, ndigits, dec_digits= 0, neg_exp= 0;
-  int exponent= 0, digits_after_dec_point= 0, tmp_exp;
+  int exponent= 0, digits_after_dec_point= 0, tmp_exp, step;
   const char *old_str, *end= *end_ptr, *start_of_number;
   char next_char;
   my_bool overflow=0;
@@ -179,16 +206,10 @@ double my_strtod(const char *str, char **end_ptr, int *error)
       exponent= -exponent;
       neg_exp= 1;                               /* neg_exp was 0 before */
     }
-    while (exponent >= 100)
-    {
-      result= neg_exp ? result/1.0e100 : result*1.0e100;
-      exponent-= 100;
-    }
-    scaler= scaler10[exponent/10]*scaler1[exponent%10];
-    if (neg_exp)
-      result/= scaler;
-    else
-      result*= scaler;
+    step= array_elements(log_10) - 1;
+    for (; exponent > step; exponent-= step)
+      result= neg_exp ? result / log_10[step] : result * log_10[step];
+    result= neg_exp ? result / log_10[exponent] : result * log_10[exponent];
   }
 
 done: