8 files changed, 133 insertions, 77 deletions
diff --git a/strings/ctype-bin.c b/strings/ctype-bin.c
index 4beb7047d00..3ca4ba2b430 100644
--- a/strings/ctype-bin.c
+++ b/strings/ctype-bin.c
@@ -288,9 +288,7 @@ void my_hash_sort_8bit_bin(CHARSET_INFO *cs __attribute__((unused)),
 
   for (; key < end ; key++)
   {
-    tmp1^= (ulong) ((((uint) tmp1 & 63) + tmp2) *
-                    ((uint) *key)) + (tmp1 << 8);
-    tmp2+= 3;
+    MY_HASH_ADD(tmp1, tmp2, (uint) *key);
   }
 
   *nr1= tmp1;
@@ -307,9 +305,7 @@ void my_hash_sort_bin(CHARSET_INFO *cs __attribute__((unused)),
 
   for (; key < end ; key++)
   {
-    tmp1^= (ulong) ((((uint) tmp1 & 63) + tmp2) *
-                    ((uint) *key)) + (tmp1 << 8);
-    tmp2+= 3;
+    MY_HASH_ADD(tmp1, tmp2, (uint) *key);
   }
 
   *nr1= tmp1;
diff --git a/strings/ctype-latin1.c b/strings/ctype-latin1.c
index 2c84f86fad0..babf74599ea 100644
--- a/strings/ctype-latin1.c
+++ b/strings/ctype-latin1.c
@@ -691,6 +691,8 @@ void my_hash_sort_latin1_de(CHARSET_INFO *cs __attribute__((unused)),
 			    ulong *nr1, ulong *nr2)
 {
   const uchar *end;
+  register ulong m1= *nr1, m2= *nr2;
+    
   /*
     Remove end space. We have to do this to be able to compare
     'AE' and '�' as identical
@@ -700,14 +702,14 @@ void my_hash_sort_latin1_de(CHARSET_INFO *cs __attribute__((unused)),
   for (; key < end ; key++)
   {
     uint X= (uint) combo1map[(uint) *key];
-    nr1[0]^=(ulong) ((((uint) nr1[0] & 63)+nr2[0]) * X) + (nr1[0] << 8);
-    nr2[0]+=3;
+    MY_HASH_ADD(m1, m2, X);
     if ((X= combo2map[*key]))
     {
-      nr1[0]^=(ulong) ((((uint) nr1[0] & 63)+nr2[0]) * X) + (nr1[0] << 8);
-      nr2[0]+=3;
+      MY_HASH_ADD(m1, m2, X);
     }
   }
+  *nr1= m1;
+  *nr2= m2;
 }
 
 
diff --git a/strings/ctype-mb.c b/strings/ctype-mb.c
index f4e70fd1dd5..1ef772e1d5e 100644
--- a/strings/ctype-mb.c
+++ b/strings/ctype-mb.c
@@ -680,6 +680,8 @@ void
 my_hash_sort_mb_bin(CHARSET_INFO *cs __attribute__((unused)),
                     const uchar *key, size_t len,ulong *nr1, ulong *nr2)
 {
+  register ulong m1= *nr1, m2= *nr2;
+
   /*
      Remove trailing spaces. We have to do this to be able to compare
     'A ' and 'A' as identical
@@ -688,10 +690,10 @@ my_hash_sort_mb_bin(CHARSET_INFO *cs __attribute__((unused)),
   
   for (; key < end ; key++)
   {
-    nr1[0]^=(ulong) ((((uint) nr1[0] & 63)+nr2[0]) * 
-	     ((uint)*key)) + (nr1[0] << 8);
-    nr2[0]+=3;
+    MY_HASH_ADD(m1, m2, (uint)*key);
   }
+  *nr1= m1;
+  *nr2= m2;
 }
 
 
diff --git a/strings/ctype-simple.c b/strings/ctype-simple.c
index f5484965314..0785ba35700 100644
--- a/strings/ctype-simple.c
+++ b/strings/ctype-simple.c
@@ -306,24 +306,48 @@ void my_hash_sort_simple(CHARSET_INFO *cs,
 {
   register const uchar *sort_order=cs->sort_order;
   const uchar *end;
-  ulong n1, n2;
+  register ulong m1= *nr1, m2= *nr2;
+  uint16 space_weight= sort_order[' '];
 
   /*
-    Remove end space. We have to do this to be able to compare
-    'A ' and 'A' as identical
+    Remove all trailing characters that are equal to space.
+    We have to do this to be able to compare 'A ' and 'A' as identical.
+
+    If the key is long enough, cut the trailing spaces (0x20) using an
+    optimized function implemented in skip_trailing_spaces().
+
+    "len > 16" is just some heuristic here.
+    Calling skip_triling_space() for short values is not desirable,
+    because its initialization block may be more expensive than the
+    performance gained.
+  */
+
+  end= len > 16 ? skip_trailing_space(key, len) : key + len;
+
+  /*
+    We removed all trailing characters that are binary equal to space 0x20.
+    Now remove all trailing characters that have weights equal to space.
+    Some 8bit simple collations may have such characters:
+    - cp1250_general_ci    0xA0 NO-BREAK SPACE == 0x20 SPACE
+    - cp1251_ukrainian_ci  0x60 GRAVE ACCENT   == 0x20 SPACE
+    - koi8u_general_ci     0x60 GRAVE ACCENT   == 0x20 SPACE
   */
-  end= skip_trailing_space(key, len);
 
-  n1= *nr1;
-  n2= *nr2;
+  for ( ; key < end ; )
+  {
+    if (sort_order[*--end] != space_weight)
+    {
+      end++;
+      break;
+    }
+  }
+
   for (; key < (uchar*) end ; key++)
   {
-    n1^=(ulong) ((((uint) n1 & 63)+n2) *
-	     ((uint) sort_order[(uint) *key])) + (n1 << 8);
-    n2+=3;
+    MY_HASH_ADD(m1, m2, (uint) sort_order[(uint) *key]);
   }
-  *nr1= n1;
-  *nr2= n2;
+  *nr1= m1;
+  *nr2= m2;
 }
 
 
diff --git a/strings/ctype-uca.c b/strings/ctype-uca.c
index 5165a43e852..19ce17c3856 100644
--- a/strings/ctype-uca.c
+++ b/strings/ctype-uca.c
@@ -20873,21 +20873,45 @@ static int my_strnncollsp_uca(CHARSET_INFO *cs,
 static void my_hash_sort_uca(CHARSET_INFO *cs,
                              my_uca_scanner_handler *scanner_handler,
 			     const uchar *s, size_t slen,
-			     ulong *n1, ulong *n2)
+			     ulong *nr1, ulong *nr2)
 {
   int   s_res;
   my_uca_scanner scanner;
-  
-  slen= cs->cset->lengthsp(cs, (char*) s, slen);
+  int space_weight= my_space_weight(cs);
+  register ulong m1= *nr1, m2= *nr2;
+
   scanner_handler->init(&scanner, cs, &cs->uca->level[0], s, slen);
   
   while ((s_res= scanner_handler->next(&scanner)) >0)
   {
-    n1[0]^= (((n1[0] & 63)+n2[0])*(s_res >> 8))+ (n1[0] << 8);
-    n2[0]+=3;
-    n1[0]^= (((n1[0] & 63)+n2[0])*(s_res & 0xFF))+ (n1[0] << 8);
-    n2[0]+=3;
+    if (s_res == space_weight)
+    {
+      /* Combine all spaces to be able to skip end spaces */
+      uint count= 0;
+      do
+      {
+        count++;
+        if ((s_res= scanner_handler->next(&scanner)) <= 0)
+        {
+          /* Skip strings at end of string */
+          goto end;
+        }
+      }
+      while (s_res == space_weight);
+
+      /* Add back that has for the space characters */
+      do
+      {
+        MY_HASH_ADD_16(m1, m2, space_weight);
+      }
+      while (--count != 0);
+
+    }
+    MY_HASH_ADD_16(m1, m2, s_res);
   }
+end:
+  *nr1= m1;
+  *nr2= m2;
 }
 
 
diff --git a/strings/ctype-ucs2.c b/strings/ctype-ucs2.c
index d2f7d34f0aa..593f9a12950 100644
--- a/strings/ctype-ucs2.c
+++ b/strings/ctype-ucs2.c
@@ -1222,23 +1222,23 @@ my_caseup_utf16(CHARSET_INFO *cs, char *src, size_t srclen,
 
 static void
 my_hash_sort_utf16(CHARSET_INFO *cs, const uchar *s, size_t slen,
-                   ulong *n1, ulong *n2)
+                   ulong *nr1, ulong *nr2)
 {
   my_wc_t wc;
   my_charset_conv_mb_wc mb_wc= cs->cset->mb_wc;
   int res;
   const uchar *e= s + cs->cset->lengthsp(cs, (const char *) s, slen);
   MY_UNICASE_INFO *uni_plane= cs->caseinfo;
+  register ulong m1= *nr1, m2= *nr2;
 
   while ((s < e) && (res= mb_wc(cs, &wc, (uchar *) s, (uchar *) e)) > 0)
   {
     my_tosort_utf16(uni_plane, &wc);
-    n1[0]^= (((n1[0] & 63) + n2[0]) * (wc & 0xFF)) + (n1[0] << 8);
-    n2[0]+= 3;
-    n1[0]^= (((n1[0] & 63) + n2[0]) * (wc >> 8)) + (n1[0] << 8);
-    n2[0]+= 3;
+    MY_HASH_ADD_16(m1, m2, wc);
     s+= res;
   }
+  *nr1= m1;
+  *nr2= m2;
 }
 
 
@@ -1611,12 +1611,14 @@ my_hash_sort_utf16_bin(CHARSET_INFO *cs,
                        const uchar *pos, size_t len, ulong *nr1, ulong *nr2)
 {
   const uchar *end= pos + cs->cset->lengthsp(cs, (const char *) pos, len);
+  register ulong m1= *nr1, m2= *nr2;
+
   for ( ; pos < end ; pos++)
   {
-    nr1[0]^= (ulong) ((((uint) nr1[0] & 63) + nr2[0]) * 
-              ((uint)*pos)) + (nr1[0] << 8);
-    nr2[0]+= 3;
+    MY_HASH_ADD(m1, m2, (uint)*pos);
   }
+  *nr1= m1;
+  *nr2= m2;
 }
 
 
@@ -2007,22 +2009,15 @@ my_caseup_utf32(CHARSET_INFO *cs, char *src, size_t srclen,
 }
 
 
-static inline void
-my_hash_add(ulong *n1, ulong *n2, uint ch)
-{
-  n1[0]^= (((n1[0] & 63) + n2[0]) * (ch)) + (n1[0] << 8);
-  n2[0]+= 3;
-}
-
-
 static void
 my_hash_sort_utf32(CHARSET_INFO *cs, const uchar *s, size_t slen,
-                   ulong *n1, ulong *n2)
+                   ulong *nr1, ulong *nr2)
 {
   my_wc_t wc;
   int res;
   const uchar *e= s + slen;
   MY_UNICASE_INFO *uni_plane= cs->caseinfo;
+  register ulong m1= *nr1, m2= *nr2;
 
   /* Skip trailing spaces */
   while (e > s + 3 && e[-1] == ' ' && !e[-2] && !e[-3] && !e[-4])
@@ -2031,12 +2026,14 @@ my_hash_sort_utf32(CHARSET_INFO *cs, const uchar *s, size_t slen,
   while ((res= my_utf32_uni(cs, &wc, (uchar*) s, (uchar*) e)) > 0)
   {
     my_tosort_utf32(uni_plane, &wc);
-    my_hash_add(n1, n2, (uint) (wc >> 24));
-    my_hash_add(n1, n2, (uint) (wc >> 16) & 0xFF);
-    my_hash_add(n1, n2, (uint) (wc >> 8)  & 0xFF);
-    my_hash_add(n1, n2, (uint) (wc & 0xFF));
+    MY_HASH_ADD(m1, m2, (uint) (wc >> 24));
+    MY_HASH_ADD(m1, m2, (uint) (wc >> 16) & 0xFF);
+    MY_HASH_ADD(m1, m2, (uint) (wc >> 8)  & 0xFF);
+    MY_HASH_ADD(m1, m2, (uint) (wc & 0xFF));
     s+= res;
   }
+  *nr1= m1;
+  *nr2= m2;
 }
 
 
@@ -2976,12 +2973,13 @@ static size_t my_caseup_ucs2(CHARSET_INFO *cs, char *src, size_t srclen,
 
 
 static void my_hash_sort_ucs2(CHARSET_INFO *cs, const uchar *s, size_t slen,
-			      ulong *n1, ulong *n2)
+			      ulong *nr1, ulong *nr2)
 {
   my_wc_t wc;
   int res;
   const uchar *e=s+slen;
   MY_UNICASE_INFO *uni_plane= cs->caseinfo;
+  register ulong m1= *nr1, m2= *nr2;
 
   while (e > s+1 && e[-1] == ' ' && e[-2] == '\0')
     e-= 2;
@@ -2989,12 +2987,11 @@ static void my_hash_sort_ucs2(CHARSET_INFO *cs, const uchar *s, size_t slen,
   while ((s < e) && (res=my_ucs2_uni(cs,&wc, (uchar *)s, (uchar*)e)) >0)
   {
     my_tosort_ucs2(uni_plane, &wc);
-    n1[0]^= (((n1[0] & 63)+n2[0])*(wc & 0xFF))+ (n1[0] << 8);
-    n2[0]+=3;
-    n1[0]^= (((n1[0] & 63)+n2[0])*(wc >> 8))+ (n1[0] << 8);
-    n2[0]+=3;
+    MY_HASH_ADD_16(m1, m2, wc);
     s+=res;
   }
+  *nr1= m1;
+  *nr2= m2;
 }
 
 
@@ -3312,16 +3309,17 @@ void my_hash_sort_ucs2_bin(CHARSET_INFO *cs __attribute__((unused)),
 			   const uchar *key, size_t len,ulong *nr1, ulong *nr2)
 {
   const uchar *end = key + len;
+  register ulong m1= *nr1, m2= *nr2;
 
   while (end > key+1 && end[-1] == ' ' && end[-2] == '\0')
     end-= 2;
 
   for (; key < (uchar*) end ; key++)
   {
-    nr1[0]^=(ulong) ((((uint) nr1[0] & 63)+nr2[0]) * 
-	     ((uint)*key)) + (nr1[0] << 8);
-    nr2[0]+=3;
+    MY_HASH_ADD(m1, m2, (uint)*key);
   }
+  *nr1= m1;
+  *nr2= m2;
 }
 
 
diff --git a/strings/ctype-utf8.c b/strings/ctype-utf8.c
index 96d5ea26a3c..d0a64d11c84 100644
--- a/strings/ctype-utf8.c
+++ b/strings/ctype-utf8.c
@@ -5087,12 +5087,13 @@ static size_t my_caseup_utf8(CHARSET_INFO *cs, char *src, size_t srclen,
 
 
 static void my_hash_sort_utf8(CHARSET_INFO *cs, const uchar *s, size_t slen,
-                              ulong *n1, ulong *n2)
+                              ulong *nr1, ulong *nr2)
 {
   my_wc_t wc;
   int res;
   const uchar *e=s+slen;
   MY_UNICASE_INFO *uni_plane= cs->caseinfo;
+  register ulong m1= *nr1, m2= *nr2;
 
   /*
     Remove end space. We have to do this to be able to compare
@@ -5104,12 +5105,11 @@ static void my_hash_sort_utf8(CHARSET_INFO *cs, const uchar *s, size_t slen,
   while ((s < e) && (res=my_utf8_uni(cs,&wc, (uchar *)s, (uchar*)e))>0 )
   {
     my_tosort_unicode(uni_plane, &wc, cs->state);
-    n1[0]^= (((n1[0] & 63)+n2[0])*(wc & 0xFF))+ (n1[0] << 8);
-    n2[0]+=3;
-    n1[0]^= (((n1[0] & 63)+n2[0])*(wc >> 8))+ (n1[0] << 8);
-    n2[0]+=3;
+    MY_HASH_ADD_16(m1, m2, wc);
     s+=res;
   }
+  *nr1= m1;
+  *nr2= m2;
 }
 
 
@@ -7597,22 +7597,15 @@ my_caseup_utf8mb4(CHARSET_INFO *cs, char *src, size_t srclen,
 }
 
 
-static inline void
-my_hash_add(ulong *n1, ulong *n2, uint ch)
-{
-  n1[0]^= (((n1[0] & 63) + n2[0]) * (ch)) + (n1[0] << 8);
-  n2[0]+= 3;
-}
-
-
 static void
 my_hash_sort_utf8mb4(CHARSET_INFO *cs, const uchar *s, size_t slen,
-                     ulong *n1, ulong *n2)
+                     ulong *nr1, ulong *nr2)
 {
   my_wc_t wc;
   int res;
   const uchar *e= s + slen;
   MY_UNICASE_INFO *uni_plane= cs->caseinfo;
+  register ulong m1= *nr1, m2= *nr2;
 
   /*
     Remove end space. We do this to be able to compare
@@ -7624,8 +7617,7 @@ my_hash_sort_utf8mb4(CHARSET_INFO *cs, const uchar *s, size_t slen,
   while ((res= my_mb_wc_utf8mb4(cs, &wc, (uchar*) s, (uchar*) e)) > 0)
   {
     my_tosort_unicode(uni_plane, &wc, cs->state);
-    my_hash_add(n1, n2, (uint) (wc & 0xFF));
-    my_hash_add(n1, n2, (uint) (wc >> 8)  & 0xFF);
+    MY_HASH_ADD_16(m1, m2, (uint) (wc & 0xFFFF));
     if (wc > 0xFFFF)
     {
        /*
@@ -7635,10 +7627,12 @@ my_hash_sort_utf8mb4(CHARSET_INFO *cs, const uchar *s, size_t slen,
         This is useful to keep order of records in
         test results, e.g. for "SHOW GRANTS".
       */
-      my_hash_add(n1, n2, (uint) (wc >> 16) & 0xFF);
+      MY_HASH_ADD(m1, m2, (uint) ((wc >> 16) & 0xFF));
     }
     s+= res;
   }
+  *nr1= m1;
+  *nr2= m2;
 }
 
 
diff --git a/strings/strings_def.h b/strings/strings_def.h
index 6bd8e8f5575..d601f5ca697 100644
--- a/strings/strings_def.h
+++ b/strings/strings_def.h
@@ -100,4 +100,20 @@ static inline const uchar *skip_trailing_space(const uchar *ptr,size_t len)
     end--;
   return (end);
 }
+
+/* Macros for hashing characters */
+
+#define MY_HASH_ADD(A, B, value) \
+  do { A^= (((A & 63)+B)*((value)))+ (A << 8); B+=3; } while(0)
+
+#define MY_HASH_ADD_16(A, B, value) \
+  do { MY_HASH_ADD(A, B, ((value) & 0xFF)) ; MY_HASH_ADD(A, B, ((value >>8 ))); } while(0) 
+
+/*
+  This one is needed to ensure we get the exact same hash as MariaDB 5.1
+  This is needed to ensure that old partitioned tables still work as before.
+*/
+#define MY_HASH_ADD_16_INV(A, B, value) \
+  do { MY_HASH_ADD(A, B, ((value >> 8))) ; MY_HASH_ADD(A, B, ((value & 0xFF ))); } while(0) 
+
 #endif