From 5aeaba11c7dcbda40fd9cd68ab060a446ab6eb77 Mon Sep 17 00:00:00 2001 From: Igor Babaev Date: Wed, 10 Oct 2018 04:40:30 -0700 Subject: MDEV-17382 Hash join algorithm should not be used to join materialized derived table / view by equality Now rows of a materialized derived table are always put into a temporary table before join operation. If BNLH is used to join this table with the result of a partial join then both operands of the join are actually put into main memory. In most cases this is not efficient. We could avoid this by sending the rows of the derived table directly to the join operation. However this kind of data flow is not supported yet. Fixed by not allowing usage of hash join algorithm to join a materialized derived table if it's joined by an equality predicate of the form f=e where f is a field of the derived table. Adjusted the results of the test case for 10.2-compatibility. --- mysql-test/r/derived_opt.result | 35 ++++++++++++++++++++++++++++++++++- mysql-test/r/innodb_mrr_cpk.result | 2 +- mysql-test/t/derived_opt.test | 38 ++++++++++++++++++++++++++++++++++++++ sql/sql_select.cc | 8 ++++++++ 4 files changed, 81 insertions(+), 2 deletions(-) diff --git a/mysql-test/r/derived_opt.result b/mysql-test/r/derived_opt.result index 6e4ea1b5d36..e43243c909a 100644 --- a/mysql-test/r/derived_opt.result +++ b/mysql-test/r/derived_opt.result @@ -499,9 +499,42 @@ where D1.a= t1.a; id select_type table type possible_keys key key_len ref rows Extra 1 PRIMARY t1 ALL NULL NULL NULL NULL 10 Using where -1 PRIMARY hash_ALL key0 #hash#key0 5 test.t1.a 100 Using join buffer (flat, BNLH join) +1 PRIMARY ref key0 key0 5 test.t1.a 10 2 DERIVED t2 ALL NULL NULL NULL NULL 100 Using filesort set join_cache_level=@tmp_jcl; set optimizer_switch=@tmp_os; drop table t1, t2; +# +# Bug mdev-17382: equi-join of derived table with join_cache_level=4 +# +CREATE TABLE t1 ( +id int NOT NULL, +amount decimal DEFAULT NULL, +PRIMARY KEY (id) +); +CREATE TABLE t2 ( +id int NOT NULL, +name varchar(50) DEFAULT NULL, +PRIMARY KEY (id) +); +INSERT INTO t1 VALUES +(1, 10.0000), (2, 20.0000), (3, 30.0000), (4, 40.0000), +(5, NULL), (6, NULL), (7, 70.0000), (8, 80.0000); +INSERT INTO t2 VALUES +(1,'A'), (2,'B'), (3,'C'), (4,'D'), (5, NULL), (6, NULL), +(7,'E'), (8,'F'), (9,'G'), (10,'H'), (11, NULL), (12, NULL); +set join_cache_level=4; +EXPLAIN +SELECT t2.id,t2.name,t.total_amt +FROM t2 +LEFT JOIN +(SELECT id, sum(amount) total_amt FROM t1 GROUP BY id) AS t +ON t2.id=t.id +WHERE t2.id < 3; +id select_type table type possible_keys key key_len ref rows Extra +1 PRIMARY t2 range PRIMARY PRIMARY 4 NULL 3 Using index condition +1 PRIMARY ref key0 key0 5 test.t2.id 2 +2 LATERAL DERIVED t1 eq_ref PRIMARY PRIMARY 4 test.t2.id 1 +set join_cache_level=default; +DROP TABLE t1,t2; set optimizer_switch=@exit_optimizer_switch; diff --git a/mysql-test/r/innodb_mrr_cpk.result b/mysql-test/r/innodb_mrr_cpk.result index 28d7dd51df8..a2e43d7d127 100644 --- a/mysql-test/r/innodb_mrr_cpk.result +++ b/mysql-test/r/innodb_mrr_cpk.result @@ -226,7 +226,7 @@ set join_cache_level=3; explain SELECT 1 FROM (SELECT url, id FROM t2 LIMIT 1 OFFSET 20) derived RIGHT JOIN t1 ON t1.id = derived.id; id select_type table type possible_keys key key_len ref rows Extra 1 PRIMARY t1 ALL NULL NULL NULL NULL # -1 PRIMARY hash_ALL key0 #hash#key0 25 test.t1.id # Using join buffer (flat, BNLH join) +1 PRIMARY ref key0 key0 25 test.t1.id # 2 DERIVED t2 ALL NULL NULL NULL NULL # set join_cache_level= @tmp_mdev5037; drop table t0,t1,t2; diff --git a/mysql-test/t/derived_opt.test b/mysql-test/t/derived_opt.test index 7f19553e4e5..aab95f69f26 100644 --- a/mysql-test/t/derived_opt.test +++ b/mysql-test/t/derived_opt.test @@ -363,5 +363,43 @@ set join_cache_level=@tmp_jcl; set optimizer_switch=@tmp_os; drop table t1, t2; +--echo # +--echo # Bug mdev-17382: equi-join of derived table with join_cache_level=4 +--echo # + +CREATE TABLE t1 ( + id int NOT NULL, + amount decimal DEFAULT NULL, +PRIMARY KEY (id) +); + +CREATE TABLE t2 ( + id int NOT NULL, + name varchar(50) DEFAULT NULL, +PRIMARY KEY (id) +); + +INSERT INTO t1 VALUES +(1, 10.0000), (2, 20.0000), (3, 30.0000), (4, 40.0000), +(5, NULL), (6, NULL), (7, 70.0000), (8, 80.0000); + +INSERT INTO t2 VALUES +(1,'A'), (2,'B'), (3,'C'), (4,'D'), (5, NULL), (6, NULL), +(7,'E'), (8,'F'), (9,'G'), (10,'H'), (11, NULL), (12, NULL); + +set join_cache_level=4; + +EXPLAIN +SELECT t2.id,t2.name,t.total_amt + FROM t2 + LEFT JOIN + (SELECT id, sum(amount) total_amt FROM t1 GROUP BY id) AS t + ON t2.id=t.id + WHERE t2.id < 3; + +set join_cache_level=default; + +DROP TABLE t1,t2; + # The following command must be the last one the file set optimizer_switch=@exit_optimizer_switch; diff --git a/sql/sql_select.cc b/sql/sql_select.cc index 6fd161f7160..2e123df75f0 100644 --- a/sql/sql_select.cc +++ b/sql/sql_select.cc @@ -11415,7 +11415,15 @@ uint check_join_cache_usage(JOIN_TAB *tab, effort now. */ if (tab->table->pos_in_table_list->is_materialized_derived()) + { no_bka_cache= true; + /* + Don't use hash join algorithm if the temporary table for the rows + of the derived table will be created with an equi-join key. + */ + if (tab->table->s->keys) + no_hashed_cache= true; + } /* Don't use join buffering if we're dictated not to by no_jbuf_after -- cgit v1.2.1