summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorIgor Babaev <igor@askmonty.org>2021-05-26 23:41:59 -0700
committerIgor Babaev <igor@askmonty.org>2021-06-03 14:44:03 -0700
commit0b797130c674a4dd8b8dcf35d3ade38353e19284 (patch)
treeb76f8e64ea4b4eebf16df750cf35dabb51507ff2
parentfa0bbff032bc5717715fdf32ce4c8ebdfcf73944 (diff)
downloadmariadb-git-0b797130c674a4dd8b8dcf35d3ade38353e19284.tar.gz
MDEV-25714 Join using derived with aggregation returns incorrect results
If a join query uses a derived table (view / CTE) with GROUP BY clause then the execution plan for such join may employ split optimization. When this optimization is employed the derived table is not materialized. Rather only some partitions of the derived table are subject to grouping. Split optimization can be applied only if: - there are some indexes over the tables used in the join specifying the derived table whose prefixes partially cover the field items used in the GROUP BY list (such indexes are called splitting indexes) - the WHERE condition of the join query contains conjunctive equalities between columns of the derived table that comprise major parts of splitting indexes and columns of the other join tables. When the optimizer evaluates extending of a partial join by the rows of the derived table it always considers a possibility of using split optimization. Different splitting indexes can be used depending on the extended partial join. At some rare conditions, for example, when there is a non-splitting covering index for a table joined in the join specifying the derived table usage of a splitting index to produce rows needed for grouping may be still less beneficial than usage of such covering index without any splitting technique. The function JOIN_TAB::choose_best_splitting() must take this into account. Approved by Oleksandr Byelkin <sanja@mariadb.com>
-rw-r--r--mysql-test/main/derived_cond_pushdown.result2
-rw-r--r--mysql-test/main/derived_split_innodb.result61
-rw-r--r--mysql-test/main/derived_split_innodb.test37
-rw-r--r--sql/opt_split.cc27
4 files changed, 121 insertions, 6 deletions
diff --git a/mysql-test/main/derived_cond_pushdown.result b/mysql-test/main/derived_cond_pushdown.result
index 7644e65a868..52d8ccd1b80 100644
--- a/mysql-test/main/derived_cond_pushdown.result
+++ b/mysql-test/main/derived_cond_pushdown.result
@@ -16703,7 +16703,7 @@ EXPLAIN EXTENDED
SELECT * FROM v1 JOIN v2 ON v1.f = v2.f;
id select_type table type possible_keys key key_len ref rows filtered Extra
1 PRIMARY NULL NULL NULL NULL NULL NULL NULL NULL Impossible WHERE noticed after reading const tables
-3 LATERAL DERIVED NULL NULL NULL NULL NULL NULL NULL NULL no matching row in const table
+3 DERIVED NULL NULL NULL NULL NULL NULL NULL NULL no matching row in const table
Warnings:
Note 1003 /* select#1 */ select NULL AS `f`,`v2`.`f` AS `f` from `test`.`t1` `a` straight_join `test`.`t1` `b` join `test`.`v2` where 0
DROP VIEW v1,v2;
diff --git a/mysql-test/main/derived_split_innodb.result b/mysql-test/main/derived_split_innodb.result
index ff71b7df097..6119145105b 100644
--- a/mysql-test/main/derived_split_innodb.result
+++ b/mysql-test/main/derived_split_innodb.result
@@ -175,3 +175,64 @@ id select_type table type possible_keys key key_len ref rows Extra
3 LATERAL DERIVED t1 ref a,a_2 a 5 test.t1.a 1 Using where; Using temporary; Using filesort
3 LATERAL DERIVED t2 ref c c 5 test.t1.b 1 Using index
DROP TABLE t1, t2;
+#
+# Bug mdev-25714: usage non-splitting covering index is cheaper than
+# usage of the best splitting index for one group
+#
+create table t1 (
+id int not null, itemid int not null, index idx (itemid)
+) engine=innodb;
+insert into t1 values (1, 2), (2,2), (4,2), (4,2), (0,3), (3,3);
+create table t2 (id int not null) engine=innodb;
+insert into t2 values (2);
+create table t3 (
+id int not null, itemid int not null, userid int not null, primary key (id),
+index idx1 (userid, itemid), index idx2 (itemid)
+) engine innodb;
+insert into t3 values (1,1,1), (2,1,1), (3,2,1), (4,2,1), (5,3,1);
+analyze table t1,t2,t3;
+Table Op Msg_type Msg_text
+test.t1 analyze status OK
+test.t2 analyze status OK
+test.t3 analyze status OK
+set optimizer_switch='split_materialized=on';
+explain select t1.id, t1.itemid, dt.id, t2.id
+from t1,
+(select itemid, max(id) as id from t3 where userid = 1 group by itemid) dt,
+t2
+where t1.id = dt.id and t1.itemid = dt.itemid and t2.id=t1.itemid;
+id select_type table type possible_keys key key_len ref rows Extra
+1 PRIMARY t2 ALL NULL NULL NULL NULL 1
+1 PRIMARY t1 ref idx idx 4 test.t2.id 1
+1 PRIMARY <derived2> ref key0 key0 9 test.t2.id,test.t1.id 2
+2 DERIVED t3 ref idx1,idx2 idx1 4 const 5 Using where; Using index
+select t1.id, t1.itemid, dt.id, t2.id
+from t1,
+(select itemid, max(id) as id from t3 where userid = 1 group by itemid) dt,
+t2
+where t1.id = dt.id and t1.itemid = dt.itemid and t2.id=t1.itemid;
+id itemid id id
+4 2 4 2
+4 2 4 2
+set optimizer_switch='split_materialized=off';
+explain select t1.id, t1.itemid, dt.id, t2.id
+from t1,
+(select itemid, max(id) as id from t3 where userid = 1 group by itemid) dt,
+t2
+where t1.id = dt.id and t1.itemid = dt.itemid and t2.id=t1.itemid;
+id select_type table type possible_keys key key_len ref rows Extra
+1 PRIMARY t2 ALL NULL NULL NULL NULL 1
+1 PRIMARY t1 ref idx idx 4 test.t2.id 1
+1 PRIMARY <derived2> ref key0 key0 9 test.t2.id,test.t1.id 2
+2 DERIVED t3 ref idx1 idx1 4 const 5 Using where; Using index
+select t1.id, t1.itemid, dt.id, t2.id
+from t1,
+(select itemid, max(id) as id from t3 where userid = 1 group by itemid) dt,
+t2
+where t1.id = dt.id and t1.itemid = dt.itemid and t2.id=t1.itemid;
+id itemid id id
+4 2 4 2
+4 2 4 2
+drop table t1,t2,t3;
+set optimizer_switch='split_materialized=default';
+# End of 10.3 tests
diff --git a/mysql-test/main/derived_split_innodb.test b/mysql-test/main/derived_split_innodb.test
index 19a6ecf216f..6f33c71eede 100644
--- a/mysql-test/main/derived_split_innodb.test
+++ b/mysql-test/main/derived_split_innodb.test
@@ -150,3 +150,40 @@ eval set statement optimizer_switch='split_materialized=on' for $query;
DROP TABLE t1, t2;
+--echo #
+--echo # Bug mdev-25714: usage non-splitting covering index is cheaper than
+--echo # usage of the best splitting index for one group
+--echo #
+
+create table t1 (
+ id int not null, itemid int not null, index idx (itemid)
+) engine=innodb;
+insert into t1 values (1, 2), (2,2), (4,2), (4,2), (0,3), (3,3);
+create table t2 (id int not null) engine=innodb;
+insert into t2 values (2);
+create table t3 (
+ id int not null, itemid int not null, userid int not null, primary key (id),
+ index idx1 (userid, itemid), index idx2 (itemid)
+) engine innodb;
+insert into t3 values (1,1,1), (2,1,1), (3,2,1), (4,2,1), (5,3,1);
+analyze table t1,t2,t3;
+
+let $q=
+select t1.id, t1.itemid, dt.id, t2.id
+ from t1,
+ (select itemid, max(id) as id from t3 where userid = 1 group by itemid) dt,
+ t2
+ where t1.id = dt.id and t1.itemid = dt.itemid and t2.id=t1.itemid;
+
+set optimizer_switch='split_materialized=on';
+eval explain $q;
+eval $q;
+
+set optimizer_switch='split_materialized=off';
+eval explain $q;
+eval $q;
+
+drop table t1,t2,t3;
+set optimizer_switch='split_materialized=default';
+
+--echo # End of 10.3 tests
diff --git a/sql/opt_split.cc b/sql/opt_split.cc
index 395422de3c3..d272638f00c 100644
--- a/sql/opt_split.cc
+++ b/sql/opt_split.cc
@@ -960,11 +960,7 @@ SplM_plan_info * JOIN_TAB::choose_best_splitting(double record_count,
in the cache
*/
spl_plan= spl_opt_info->find_plan(best_table, best_key, best_key_parts);
- if (!spl_plan &&
- (spl_plan= (SplM_plan_info *) thd->alloc(sizeof(SplM_plan_info))) &&
- (spl_plan->best_positions=
- (POSITION *) thd->alloc(sizeof(POSITION) * join->table_count)) &&
- !spl_opt_info->plan_cache.push_back(spl_plan))
+ if (!spl_plan)
{
/*
The plan for the chosen key has not been found in the cache.
@@ -974,6 +970,27 @@ SplM_plan_info * JOIN_TAB::choose_best_splitting(double record_count,
reset_validity_vars_for_keyuses(best_key_keyuse_ext_start, best_table,
best_key, remaining_tables, true);
choose_plan(join, all_table_map & ~join->const_table_map);
+
+ /*
+ Check that the chosen plan is really a splitting plan.
+ If not or if there is not enough memory to save the plan in the cache
+ then just return with no splitting plan.
+ */
+ POSITION *first_non_const_pos= join->best_positions + join->const_tables;
+ TABLE *table= first_non_const_pos->table->table;
+ key_map spl_keys= table->keys_usable_for_splitting;
+ if (!(first_non_const_pos->key &&
+ spl_keys.is_set(first_non_const_pos->key->key)) ||
+ !(spl_plan= (SplM_plan_info *) thd->alloc(sizeof(SplM_plan_info))) ||
+ !(spl_plan->best_positions=
+ (POSITION *) thd->alloc(sizeof(POSITION) * join->table_count)) ||
+ spl_opt_info->plan_cache.push_back(spl_plan))
+ {
+ reset_validity_vars_for_keyuses(best_key_keyuse_ext_start, best_table,
+ best_key, remaining_tables, false);
+ return 0;
+ }
+
spl_plan->keyuse_ext_start= best_key_keyuse_ext_start;
spl_plan->table= best_table;
spl_plan->key= best_key;