Bug #25207522: INCORRECT ORDER-BY BEHAVIOR ON A PARTITIONED TABLE WITH A COMPOSITE PREFIX INDEX

Fix prefix key comparison in partitioning. Comparions must take into account no more than prefix_len characters. It used to compare prefix_len*mbmaxlen bytes.
author: Sergei Golubchik <serg@mariadb.org> 2020-07-24 17:43:10 +0200
committer: Sergei Golubchik <serg@mariadb.org> 2020-07-29 14:56:24 +0200
commit: 0b5b2f864153bf236a844e225ed6f04d79c757d8 (patch)
tree: 775b7ba966f61c99fa569cf95879ef98ea63c80f
parent: d5970779fac361a9ba56fccf0e9ed5b492b17d7e (diff)
download: mariadb-git-0b5b2f864153bf236a844e225ed6f04d79c757d8.tar.gz
5 files changed, 136 insertions, 24 deletions
diff --git a/mysql-test/r/partition.result b/mysql-test/r/partition.result
index 6cea712e482..57fa374d4f1 100644
--- a/mysql-test/r/partition.result
+++ b/mysql-test/r/partition.result
@@ -2756,5 +2756,45 @@ SELECT 1 FROM t1 WHERE a XOR 'a';
 1
 DROP TABLE t1;
 #
+# Bug #25207522: INCORRECT ORDER-BY BEHAVIOR ON A PARTITIONED TABLE
+# WITH A COMPOSITE PREFIX INDEX
+#
+create table t1(id int unsigned not null,
+data varchar(2) default null,
+key data_idx (data(1),id)
+) default charset=utf8
+partition by range (id) (
+partition p10 values less than (10),
+partition p20 values less than (20)
+);
+insert t1 values (6, 'ab'), (4, 'ab'), (5, 'ab'), (16, 'ab'), (14, 'ab'), (15, 'ab'), (5, 'ac'), (15, 'aa') ;
+select id from t1 where data = 'ab' order by id;
+id
+4
+5
+6
+14
+15
+16
+drop table t1;
+create table t1(id int unsigned not null,
+data text default null,
+key data_idx (data(1),id)
+) default charset=utf8
+partition by range (id) (
+partition p10 values less than (10),
+partition p20 values less than (20)
+);
+insert t1 values (6, 'ab'), (4, 'ab'), (5, 'ab'), (16, 'ab'), (14, 'ab'), (15, 'ab'), (5, 'ac'), (15, 'aa') ;
+select id from t1 where data = 'ab' order by id;
+id
+4
+5
+6
+14
+15
+16
+drop table t1;
+#
 # End of 10.1 tests
 #
diff --git a/mysql-test/t/partition.test b/mysql-test/t/partition.test
index 0f965cc810a..23f3fa0b4d8 100644
--- a/mysql-test/t/partition.test
+++ b/mysql-test/t/partition.test
@@ -2971,5 +2971,33 @@ SELECT 1 FROM t1 WHERE a XOR 'a';
 DROP TABLE t1;
 
 --echo #
+--echo # Bug #25207522: INCORRECT ORDER-BY BEHAVIOR ON A PARTITIONED TABLE
+--echo # WITH A COMPOSITE PREFIX INDEX
+--echo #
+create table t1(id int unsigned not null,
+  data varchar(2) default null,
+  key data_idx (data(1),id)
+) default charset=utf8
+partition by range (id) (
+  partition p10 values less than (10),
+  partition p20 values less than (20)
+);
+insert t1 values (6, 'ab'), (4, 'ab'), (5, 'ab'), (16, 'ab'), (14, 'ab'), (15, 'ab'), (5, 'ac'), (15, 'aa') ;
+select id from t1 where data = 'ab' order by id;
+drop table t1;
+
+create table t1(id int unsigned not null,
+  data text default null,
+  key data_idx (data(1),id)
+) default charset=utf8
+partition by range (id) (
+  partition p10 values less than (10),
+  partition p20 values less than (20)
+);
+insert t1 values (6, 'ab'), (4, 'ab'), (5, 'ab'), (16, 'ab'), (14, 'ab'), (15, 'ab'), (5, 'ac'), (15, 'aa') ;
+select id from t1 where data = 'ab' order by id;
+drop table t1;
+
+--echo #
 --echo # End of 10.1 tests
 --echo #
diff --git a/sql/field.cc b/sql/field.cc
index 32edff3dd2d..1ec5e8e5e41 100644
--- a/sql/field.cc
+++ b/sql/field.cc
@@ -7537,8 +7537,7 @@ my_decimal *Field_varstring::val_decimal(my_decimal *decimal_value)
 }
 
 
-int Field_varstring::cmp_max(const uchar *a_ptr, const uchar *b_ptr,
-                             uint max_len)
+int Field_varstring::cmp(const uchar *a_ptr, const uchar *b_ptr)
 {
   uint a_length, b_length;
   int diff;
@@ -7553,8 +7552,8 @@ int Field_varstring::cmp_max(const uchar *a_ptr, const uchar *b_ptr,
     a_length= uint2korr(a_ptr);
     b_length= uint2korr(b_ptr);
   }
-  set_if_smaller(a_length, max_len);
-  set_if_smaller(b_length, max_len);
+  set_if_smaller(a_length, field_length);
+  set_if_smaller(b_length, field_length);
   diff= field_charset->coll->strnncollsp(field_charset,
                                          a_ptr+
                                          length_bytes,
@@ -7566,6 +7565,43 @@ int Field_varstring::cmp_max(const uchar *a_ptr, const uchar *b_ptr,
 }
 
 
+static int cmp_str_prefix(const uchar *ua, size_t alen, const uchar *ub,
+                          size_t blen, size_t prefix, CHARSET_INFO *cs)
+{
+  const char *a= (char*)ua, *b= (char*)ub;
+  MY_STRCOPY_STATUS status;
+  prefix/= cs->mbmaxlen;
+  alen= cs->cset->well_formed_char_length(cs, a, a + alen, prefix, &status);
+  blen= cs->cset->well_formed_char_length(cs, b, b + blen, prefix, &status);
+  return cs->coll->strnncollsp(cs, ua, alen, ub, blen, 0);
+}
+
+
+
+int Field_varstring::cmp_prefix(const uchar *a_ptr, const uchar *b_ptr,
+                                size_t prefix_len)
+{
+  /* avoid expensive well_formed_char_length if possible */
+  if (prefix_len == table->field[field_index]->field_length)
+    return Field_varstring::cmp(a_ptr, b_ptr);
+
+  size_t a_length, b_length;
+
+  if (length_bytes == 1)
+  {
+    a_length= *a_ptr;
+    b_length= *b_ptr;
+  }
+  else
+  {
+    a_length= uint2korr(a_ptr);
+    b_length= uint2korr(b_ptr);
+  }
+  return cmp_str_prefix(a_ptr+length_bytes, a_length, b_ptr+length_bytes,
+                        b_length, prefix_len, field_charset);
+}
+
+
 /**
   @note
     varstring and blob keys are ALWAYS stored with a 2 byte length prefix
@@ -8114,16 +8150,24 @@ int Field_blob::cmp(const uchar *a,uint32 a_length, const uchar *b,
 }
 
 
-int Field_blob::cmp_max(const uchar *a_ptr, const uchar *b_ptr,
-                        uint max_length)
+int Field_blob::cmp(const uchar *a_ptr, const uchar *b_ptr)
+{
+  uchar *blob1,*blob2;
+  memcpy(&blob1, a_ptr+packlength, sizeof(char*));
+  memcpy(&blob2, b_ptr+packlength, sizeof(char*));
+  size_t a_len= get_length(a_ptr), b_len= get_length(b_ptr);
+  return cmp(blob1, a_len, blob2, b_len);
+}
+
+
+int Field_blob::cmp_prefix(const uchar *a_ptr, const uchar *b_ptr,
+                           size_t prefix_len)
 {
   uchar *blob1,*blob2;
   memcpy(&blob1, a_ptr+packlength, sizeof(char*));
   memcpy(&blob2, b_ptr+packlength, sizeof(char*));
-  uint a_len= get_length(a_ptr), b_len= get_length(b_ptr);
-  set_if_smaller(a_len, max_length);
-  set_if_smaller(b_len, max_length);
-  return Field_blob::cmp(blob1,a_len,blob2,b_len);
+  size_t a_len= get_length(a_ptr), b_len= get_length(b_ptr);
+  return cmp_str_prefix(blob1, a_len, blob2, b_len, prefix_len, field_charset);
 }
 
 
@@ -9407,7 +9451,7 @@ my_decimal *Field_bit::val_decimal(my_decimal *deciaml_value)
     The a and b pointer must be pointers to the field in a record
     (not the table->record[0] necessarily)
 */
-int Field_bit::cmp_max(const uchar *a, const uchar *b, uint max_len)
+int Field_bit::cmp_prefix(const uchar *a, const uchar *b, size_t prefix_len)
 {
   my_ptrdiff_t a_diff= a - ptr;
   my_ptrdiff_t b_diff= b - ptr;
diff --git a/sql/field.h b/sql/field.h
index e21994e3087..27a3f130aa7 100644
--- a/sql/field.h
+++ b/sql/field.h
@@ -954,9 +954,13 @@ public:
     return type();
   }
   inline  int cmp(const uchar *str) { return cmp(ptr,str); }
-  virtual int cmp_max(const uchar *a, const uchar *b, uint max_len)
-    { return cmp(a, b); }
   virtual int cmp(const uchar *,const uchar *)=0;
+  /*
+    The following method is used for comparing prefix keys.
+    Currently it's only used in partitioning.
+  */
+  virtual int cmp_prefix(const uchar *a, const uchar *b, size_t prefix_len)
+  { return cmp(a, b); }
   virtual int cmp_binary(const uchar *a,const uchar *b, uint32 max_length=~0L)
   { return memcmp(a,b,pack_length()); }
   virtual int cmp_offset(uint row_offset)
@@ -2991,11 +2995,8 @@ public:
   longlong val_int(void);
   String *val_str(String*,String *);
   my_decimal *val_decimal(my_decimal *);
-  int cmp_max(const uchar *, const uchar *, uint max_length);
-  int cmp(const uchar *a,const uchar *b)
-  {
-    return cmp_max(a, b, ~0L);
-  }
+  int cmp(const uchar *a,const uchar *b);
+  int cmp_prefix(const uchar *a, const uchar *b, size_t prefix_len);
   void sort_string(uchar *buff,uint length);
   uint get_key_image(uchar *buff,uint length, imagetype type);
   void set_key_image(const uchar *buff,uint length);
@@ -3077,9 +3078,8 @@ public:
   longlong val_int(void);
   String *val_str(String*,String *);
   my_decimal *val_decimal(my_decimal *);
-  int cmp_max(const uchar *, const uchar *, uint max_length);
-  int cmp(const uchar *a,const uchar *b)
-    { return cmp_max(a, b, ~0L); }
+  int cmp(const uchar *a,const uchar *b);
+  int cmp_prefix(const uchar *a, const uchar *b, size_t prefix_len);
   int cmp(const uchar *a, uint32 a_length, const uchar *b, uint32 b_length);
   int cmp_binary(const uchar *a,const uchar *b, uint32 max_length=~0L);
   int key_cmp(const uchar *,const uchar*);
@@ -3399,7 +3399,7 @@ public:
   }
   int cmp_binary_offset(uint row_offset)
   { return cmp_offset(row_offset); }
-  int cmp_max(const uchar *a, const uchar *b, uint max_length);
+  int cmp_prefix(const uchar *a, const uchar *b, size_t prefix_len);
   int key_cmp(const uchar *a, const uchar *b)
   { return cmp_binary((uchar *) a, (uchar *) b); }
   int key_cmp(const uchar *str, uint length);
diff --git a/sql/key.cc b/sql/key.cc
index 013733db62a..9b09387885b 100644
--- a/sql/key.cc
+++ b/sql/key.cc
@@ -623,8 +623,8 @@ int key_rec_cmp(void *key_p, uchar *first_rec, uchar *second_rec)
         max length. The exceptions are the BLOB and VARCHAR field types
         that take the max length into account.
       */
-      if ((result= field->cmp_max(field->ptr+first_diff, field->ptr+sec_diff,
-                             key_part->length)))
+      if ((result= field->cmp_prefix(field->ptr+first_diff, field->ptr+sec_diff,
+                                     key_part->length)))
         DBUG_RETURN(result);
 next_loop:
       key_part++;
author	Sergei Golubchik <serg@mariadb.org>	2020-07-24 17:43:10 +0200
committer	Sergei Golubchik <serg@mariadb.org>	2020-07-29 14:56:24 +0200
commit	0b5b2f864153bf236a844e225ed6f04d79c757d8 (patch)
tree	775b7ba966f61c99fa569cf95879ef98ea63c80f
parent	d5970779fac361a9ba56fccf0e9ed5b492b17d7e (diff)
download	mariadb-git-0b5b2f864153bf236a844e225ed6f04d79c757d8.tar.gz