diff options
-rw-r--r-- | mysql-test/r/partition.result | 39 | ||||
-rw-r--r-- | mysql-test/r/partition_innodb.result | 39 | ||||
-rw-r--r-- | mysql-test/r/partition_order.result | 6 | ||||
-rw-r--r-- | mysql-test/t/partition.test | 46 | ||||
-rw-r--r-- | mysql-test/t/partition_innodb.test | 47 | ||||
-rw-r--r-- | sql/ha_partition.cc | 145 | ||||
-rw-r--r-- | sql/ha_partition.h | 21 |
7 files changed, 211 insertions, 132 deletions
diff --git a/mysql-test/r/partition.result b/mysql-test/r/partition.result index 521d31ae114..4379cf17ec0 100644 --- a/mysql-test/r/partition.result +++ b/mysql-test/r/partition.result @@ -2494,45 +2494,6 @@ i 4 DROP TABLE t1; # -# MDEV-5177: ha_partition and innodb index intersection produce fewer rows (MySQL Bug#70703) -# -create table t1 ( -a int not null, -b int not null, -pk int not null, -primary key (pk), -key(a), -key(b) -) partition by hash(pk) partitions 10; -insert into t1 values (1,2,4); -insert into t1 values (1,0,17); -insert into t1 values (1,2,25); -insert into t1 values (10,20,122); -insert into t1 values (10,20,123); -create table t2 (a int); -insert into t2 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9); -insert into t1 select 1,2, 200 + A.a + 10*B.a + 100*C.a from t2 A, t2 B, t2 C; -insert into t1 select 10+A.a + 10*B.a + 100*C.a + 1000*D.a, -10+A.a + 10*B.a + 100*C.a + 1000*D.a, -2000 + A.a + 10*B.a + 100*C.a + 1000*D.a -from t2 A, t2 B, t2 C ,t2 D; -explain select * from t1 where a=1 and b=2 and pk between 1 and 999999 ; -id select_type table type possible_keys key key_len ref rows Extra -1 SIMPLE t1 ref PRIMARY,a,b b 4 const 982 Using where -create temporary table t3 as -select * from t1 where a=1 and b=2 and pk between 1 and 999 ; -select count(*) from t3; -count(*) -802 -drop table t3; -create temporary table t3 as -select * from t1 ignore index(a,b) where a=1 and b=2 and pk between 1 and 999 ; -select count(*) from t3; -count(*) -802 -drop table t3; -drop table t1,t2; -# # MDEV-5555: Incorrect index_merge on BTREE indices # CREATE TABLE t1 ( diff --git a/mysql-test/r/partition_innodb.result b/mysql-test/r/partition_innodb.result index 45b406a5ad0..30a225cadeb 100644 --- a/mysql-test/r/partition_innodb.result +++ b/mysql-test/r/partition_innodb.result @@ -639,4 +639,43 @@ col1 col2 col3 1 2 2013-03-11 16:33:04 1 2 2013-03-11 16:33:24 DROP TABLE t1; +# +# MDEV-5177: ha_partition and innodb index intersection produce fewer rows (MySQL Bug#70703) +# +create table t1 ( +a int not null, +b int not null, +pk int not null, +primary key (pk), +key(a), +key(b) +) engine=innodb partition by hash(pk) partitions 10; +insert into t1 values (1,2,4); +insert into t1 values (1,0,17); +insert into t1 values (1,2,25); +insert into t1 values (10,20,122); +insert into t1 values (10,20,123); +create table t2 (a int); +insert into t2 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9); +insert into t1 select 1,2, 200 + A.a + 10*B.a + 100*C.a from t2 A, t2 B, t2 C; +insert into t1 select 10+A.a + 10*B.a + 100*C.a + 1000*D.a, +10+A.a + 10*B.a + 100*C.a + 1000*D.a, +2000 + A.a + 10*B.a + 100*C.a + 1000*D.a +from t2 A, t2 B, t2 C ,t2 D; +explain select * from t1 where a=1 and b=2 and pk between 1 and 999999 ; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 index_merge PRIMARY,a,b b,a 4,4 NULL # Using intersect(b,a); Using where; Using index +create temporary table t3 as +select * from t1 where a=1 and b=2 and pk between 1 and 999 ; +select count(*) from t3; +count(*) +802 +drop table t3; +create temporary table t3 as +select * from t1 ignore index(a,b) where a=1 and b=2 and pk between 1 and 999 ; +select count(*) from t3; +count(*) +802 +drop table t3; +drop table t1,t2; set global default_storage_engine=default; diff --git a/mysql-test/r/partition_order.result b/mysql-test/r/partition_order.result index 06c1b63a382..cecfc90eefb 100644 --- a/mysql-test/r/partition_order.result +++ b/mysql-test/r/partition_order.result @@ -734,8 +734,8 @@ a b 7 1 35 2 3 3 -2 4 30 4 +2 4 4 5 6 6 select * from t1 force index (b) where b < 10 ORDER BY b; @@ -744,16 +744,16 @@ a b 7 1 35 2 3 3 -2 4 30 4 +2 4 4 5 6 6 select * from t1 force index (b) where b < 10 ORDER BY b DESC; a b 6 6 4 5 -30 4 2 4 +30 4 3 3 35 2 7 1 diff --git a/mysql-test/t/partition.test b/mysql-test/t/partition.test index 63f29ffd978..bb67dddf14a 100644 --- a/mysql-test/t/partition.test +++ b/mysql-test/t/partition.test @@ -2496,52 +2496,6 @@ SELECT * from t1 order by i; DROP TABLE t1; --echo # ---echo # MDEV-5177: ha_partition and innodb index intersection produce fewer rows (MySQL Bug#70703) ---echo # -create table t1 ( - a int not null, - b int not null, - pk int not null, - primary key (pk), - key(a), - key(b) -) partition by hash(pk) partitions 10; - -insert into t1 values (1,2,4); # both -insert into t1 values (1,0,17); # left -insert into t1 values (1,2,25); # both - -insert into t1 values (10,20,122); -insert into t1 values (10,20,123); - -# Now, fill in some data so that the optimizer choses index_merge -create table t2 (a int); -insert into t2 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9); - -insert into t1 select 1,2, 200 + A.a + 10*B.a + 100*C.a from t2 A, t2 B, t2 C; - -insert into t1 select 10+A.a + 10*B.a + 100*C.a + 1000*D.a, - 10+A.a + 10*B.a + 100*C.a + 1000*D.a, - 2000 + A.a + 10*B.a + 100*C.a + 1000*D.a - from t2 A, t2 B, t2 C ,t2 D; - -# This should show index_merge, using intersect -explain select * from t1 where a=1 and b=2 and pk between 1 and 999999 ; -# 794 rows in output -create temporary table t3 as -select * from t1 where a=1 and b=2 and pk between 1 and 999 ; -select count(*) from t3; -drop table t3; - -# 802 rows in output -create temporary table t3 as -select * from t1 ignore index(a,b) where a=1 and b=2 and pk between 1 and 999 ; -select count(*) from t3; -drop table t3; - -drop table t1,t2; - ---echo # --echo # MDEV-5555: Incorrect index_merge on BTREE indices --echo # diff --git a/mysql-test/t/partition_innodb.test b/mysql-test/t/partition_innodb.test index 7badc143c8a..f6611255eb7 100644 --- a/mysql-test/t/partition_innodb.test +++ b/mysql-test/t/partition_innodb.test @@ -716,5 +716,52 @@ GROUP BY 1, 2, 3; DROP TABLE t1; +--echo # +--echo # MDEV-5177: ha_partition and innodb index intersection produce fewer rows (MySQL Bug#70703) +--echo # +create table t1 ( + a int not null, + b int not null, + pk int not null, + primary key (pk), + key(a), + key(b) +) engine=innodb partition by hash(pk) partitions 10; + +insert into t1 values (1,2,4); # both +insert into t1 values (1,0,17); # left +insert into t1 values (1,2,25); # both + +insert into t1 values (10,20,122); +insert into t1 values (10,20,123); + +# Now, fill in some data so that the optimizer choses index_merge +create table t2 (a int); +insert into t2 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9); + +insert into t1 select 1,2, 200 + A.a + 10*B.a + 100*C.a from t2 A, t2 B, t2 C; + +insert into t1 select 10+A.a + 10*B.a + 100*C.a + 1000*D.a, + 10+A.a + 10*B.a + 100*C.a + 1000*D.a, + 2000 + A.a + 10*B.a + 100*C.a + 1000*D.a + from t2 A, t2 B, t2 C ,t2 D; + +# This should show index_merge, using intersect +--replace_column 9 # +explain select * from t1 where a=1 and b=2 and pk between 1 and 999999 ; +# 794 rows in output +create temporary table t3 as +select * from t1 where a=1 and b=2 and pk between 1 and 999 ; +select count(*) from t3; +drop table t3; + +# 802 rows in output +create temporary table t3 as +select * from t1 ignore index(a,b) where a=1 and b=2 and pk between 1 and 999 ; +select count(*) from t3; +drop table t3; + +drop table t1,t2; + set global default_storage_engine=default; diff --git a/sql/ha_partition.cc b/sql/ha_partition.cc index 0d30265ce9a..37c3b172bc4 100644 --- a/sql/ha_partition.cc +++ b/sql/ha_partition.cc @@ -80,7 +80,8 @@ static handler *partition_create_handler(handlerton *hton, static uint partition_flags(); static uint alter_table_flags(uint flags); -extern "C" int cmp_key_then_part_id(void *key_p, uchar *ref1, uchar *ref2); +extern "C" int cmp_key_part_id(void *key_p, uchar *ref1, uchar *ref2); +extern "C" int cmp_key_rowid_part_id(void *ptr, uchar *ref1, uchar *ref2); static int partition_initialize(void *p) { @@ -4514,7 +4515,10 @@ bool ha_partition::init_record_priority_queue() uint alloc_len; uint used_parts= bitmap_bits_set(&m_part_info->used_partitions); /* Allocate record buffer for each used partition. */ - alloc_len= used_parts * (m_rec_length + PARTITION_BYTES_IN_POS); + m_priority_queue_rec_len= m_rec_length + PARTITION_BYTES_IN_POS; + if (!m_using_extended_keys) + m_priority_queue_rec_len += m_file[0]->ref_length; + alloc_len= used_parts * m_priority_queue_rec_len; /* Allocate a key for temporary use when setting up the scan. */ alloc_len+= table_share->max_key_length; @@ -4536,13 +4540,25 @@ bool ha_partition::init_record_priority_queue() { DBUG_PRINT("info", ("init rec-buf for part %u", i)); int2store(ptr, i); - ptr+= m_rec_length + PARTITION_BYTES_IN_POS; + ptr+= m_priority_queue_rec_len; } } while (++i < m_tot_parts); m_start_key.key= (const uchar*)ptr; + /* Initialize priority queue, initialized to reading forward. */ - if (init_queue(&m_queue, used_parts, 0, - 0, cmp_key_then_part_id, (void*)m_curr_key_info, 0, 0)) + int (*cmp_func)(void *, uchar *, uchar *); + void *cmp_arg; + if (!m_using_extended_keys) + { + cmp_func= cmp_key_rowid_part_id; + cmp_arg= (void*)this; + } + else + { + cmp_func= cmp_key_part_id; + cmp_arg= (void*)m_curr_key_info; + } + if (init_queue(&m_queue, used_parts, 0, 0, cmp_func, cmp_arg, 0, 0)) { my_free(m_ordered_rec_buffer); m_ordered_rec_buffer= NULL; @@ -4609,9 +4625,13 @@ int ha_partition::index_init(uint inx, bool sorted) DBUG_PRINT("info", ("Clustered pk, using pk as secondary cmp")); m_curr_key_info[1]= table->key_info+table->s->primary_key; m_curr_key_info[2]= NULL; + m_using_extended_keys= TRUE; } else + { m_curr_key_info[1]= NULL; + m_using_extended_keys= FALSE; + } if (init_record_priority_queue()) DBUG_RETURN(HA_ERR_OUT_OF_MEM); @@ -4738,36 +4758,12 @@ int ha_partition::index_read_map(uchar *buf, const uchar *key, } -/* - @brief - Provide ordering by (key_value, partition_id). - - @detail - Ordering by partition id is required so that key scans on key=const - return rows in rowid order (this is required for some variants of - index_merge to work). - - In ha_partition, rowid is a (partition_id, underlying_table_rowid). - handle_ordered_index_scan must return rows ordered by (key, rowid). - - If two rows have the same key value and come from different partitions, - it is sufficient to return them in the order of their partition_id. -*/ - -extern "C" int cmp_key_then_part_id(void *key_p, uchar *ref1, uchar *ref2) +/* Compare two part_no partition numbers */ +static int cmp_part_ids(uchar *ref1, uchar *ref2) { - my_ptrdiff_t diff1, diff2; - int res; - - if ((res= key_rec_cmp(key_p, ref1 + PARTITION_BYTES_IN_POS, - ref2 + PARTITION_BYTES_IN_POS))) - { - return res; - } - /* The following was taken from ha_partition::cmp_ref */ - diff1= ref2[1] - ref1[1]; - diff2= ref2[0] - ref1[0]; + my_ptrdiff_t diff1= ref2[1] - ref1[1]; + my_ptrdiff_t diff2= ref2[0] - ref1[0]; if (!diff1 && !diff2) return 0; @@ -4784,6 +4780,45 @@ extern "C" int cmp_key_then_part_id(void *key_p, uchar *ref1, uchar *ref2) } +/* + @brief + Provide ordering by (key_value, part_no). +*/ + +extern "C" int cmp_key_part_id(void *key_p, uchar *ref1, uchar *ref2) +{ + int res; + if ((res= key_rec_cmp(key_p, ref1 + PARTITION_BYTES_IN_POS, + ref2 + PARTITION_BYTES_IN_POS))) + { + return res; + } + return cmp_part_ids(ref1, ref2); +} + +/* + @brief + Provide ordering by (key_value, underying_table_rowid, part_no). +*/ +extern "C" int cmp_key_rowid_part_id(void *ptr, uchar *ref1, uchar *ref2) +{ + ha_partition *file= (ha_partition*)ptr; + int res; + + if ((res= key_rec_cmp(file->m_curr_key_info, ref1 + PARTITION_BYTES_IN_POS, + ref2 + PARTITION_BYTES_IN_POS))) + { + return res; + } + if ((res= file->m_file[0]->cmp_ref(ref1 + PARTITION_BYTES_IN_POS + file->m_rec_length, + ref2 + PARTITION_BYTES_IN_POS + file->m_rec_length))) + { + return res; + } + return cmp_part_ids(ref1, ref2); +} + + /** Common routine for a number of index_read variants @@ -5484,7 +5519,7 @@ int ha_partition::handle_ordered_index_scan(uchar *buf, bool reverse_order) for (; first_used_part < m_part_spec.start_part; first_used_part++) { if (bitmap_is_set(&(m_part_info->used_partitions), first_used_part)) - part_rec_buf_ptr+= m_rec_length + PARTITION_BYTES_IN_POS; + part_rec_buf_ptr+= m_priority_queue_rec_len; } DBUG_PRINT("info", ("m_part_spec.start_part %u first_used_part %u", m_part_spec.start_part, first_used_part)); @@ -5539,6 +5574,11 @@ int ha_partition::handle_ordered_index_scan(uchar *buf, bool reverse_order) if (!error) { found= TRUE; + if (!m_using_extended_keys) + { + file->position(rec_buf_ptr); + memcpy(rec_buf_ptr + m_rec_length, file->ref, file->ref_length); + } /* Initialize queue without order first, simply insert */ @@ -5555,7 +5595,7 @@ int ha_partition::handle_ordered_index_scan(uchar *buf, bool reverse_order) m_key_not_found= true; saved_error= error; } - part_rec_buf_ptr+= m_rec_length + PARTITION_BYTES_IN_POS; + part_rec_buf_ptr+= m_priority_queue_rec_len; } if (found) { @@ -5564,7 +5604,7 @@ int ha_partition::handle_ordered_index_scan(uchar *buf, bool reverse_order) after that read the first entry and copy it to the buffer to return in. */ queue_set_max_at_top(&m_queue, reverse_order); - queue_set_cmp_arg(&m_queue, (void*)m_curr_key_info); + queue_set_cmp_arg(&m_queue, m_using_extended_keys? m_curr_key_info : (void*)this); m_queue.elements= j - queue_first_element(&m_queue); queue_fix(&m_queue); return_top_record(buf); @@ -5640,7 +5680,7 @@ int ha_partition::handle_ordered_index_scan_key_not_found() else if (error != HA_ERR_END_OF_FILE && error != HA_ERR_KEY_NOT_FOUND) DBUG_RETURN(error); } - part_buf+= m_rec_length + PARTITION_BYTES_IN_POS; + part_buf += m_priority_queue_rec_len; } DBUG_ASSERT(curr_rec_buf); bitmap_clear_all(&m_key_not_found_partitions); @@ -5724,6 +5764,13 @@ int ha_partition::handle_ordered_next(uchar *buf, bool is_next_same) else error= file->ha_index_next_same(rec_buf, m_start_key.key, m_start_key.length); + + if (!m_using_extended_keys) + { + file->position(rec_buf); + memcpy(rec_buf + m_rec_length, file->ref, file->ref_length); + } + if (error) { if (error == HA_ERR_END_OF_FILE) @@ -7711,19 +7758,29 @@ uint ha_partition::min_record_length(uint options) const int ha_partition::cmp_ref(const uchar *ref1, const uchar *ref2) { - uint part_id; + int cmp; my_ptrdiff_t diff1, diff2; - handler *file; DBUG_ENTER("ha_partition::cmp_ref"); + cmp = m_file[0]->cmp_ref((ref1 + PARTITION_BYTES_IN_POS), + (ref2 + PARTITION_BYTES_IN_POS)); + if (cmp) + DBUG_RETURN(cmp); + if ((ref1[0] == ref2[0]) && (ref1[1] == ref2[1])) { - part_id= uint2korr(ref1); - file= m_file[part_id]; - DBUG_ASSERT(part_id < m_tot_parts); - DBUG_RETURN(file->cmp_ref((ref1 + PARTITION_BYTES_IN_POS), - (ref2 + PARTITION_BYTES_IN_POS))); + /* This means that the references are same and are in same partition.*/ + DBUG_RETURN(0); } + + /* + In Innodb we compare with either primary key value or global DB_ROW_ID so + it is not possible that the two references are equal and are in different + partitions, but in myisam it is possible since we are comparing offsets. + Remove this assert if DB_ROW_ID is changed to be per partition. + */ + DBUG_ASSERT(!m_innodb); + diff1= ref2[1] - ref1[1]; diff2= ref2[0] - ref1[0]; if (diff1 > 0) diff --git a/sql/ha_partition.h b/sql/ha_partition.h index cec377b9e29..e8e3858d076 100644 --- a/sql/ha_partition.h +++ b/sql/ha_partition.h @@ -49,6 +49,8 @@ enum partition_keywords /* offset to the engines array */ #define PAR_ENGINES_OFFSET 12 +extern "C" int cmp_key_rowid_part_id(void *ptr, uchar *ref1, uchar *ref2); + class ha_partition :public handler { private: @@ -88,6 +90,22 @@ private: uchar *m_rec0; // table->record[0] const uchar *m_err_rec; // record which gave error QUEUE m_queue; // Prio queue used by sorted read + + /* + Length of an element in m_ordered_rec_buffer. The elements are composed of + + [part_no] [table->record copy] [underlying_table_rowid] + + underlying_table_rowid is only stored when the table has no extended keys. + */ + uint m_priority_queue_rec_len; + + /* + If true, then sorting records by key value also sorts them by their + underlying_table_rowid. + */ + bool m_using_extended_keys; + /* Since the partition handler is a handler on top of other handlers, it is necessary to keep information about what the underlying handler @@ -1172,6 +1190,9 @@ public: DBUG_ASSERT(h == m_file[i]->ht); return h; } + + + friend int cmp_key_rowid_part_id(void *ptr, uchar *ref1, uchar *ref2); }; #endif /* HA_PARTITION_INCLUDED */ |