From b1ab2ba5833ad466ea9c08b89d89bb51e1d445ed Mon Sep 17 00:00:00 2001 From: Varun Gupta Date: Thu, 7 Nov 2019 15:24:21 +0530 Subject: MDEV-20519: Query plan regression with optimizer_use_condition_selectivity > 1 The issue here is the wrong estimate of the cardinality of a partial join, the cardinality is too high because the function table_cond_selectivity() returns an absurd number 100 while selectivity cannot be greater than 1. When accessing table t by outer reference t1.a via index we do not perform any range analysis for t. Yet we see TABLE::quick_key_parts[key] and TABLE->quick_rows[key] contain a non-zero value though these should have been remained untouched and equal to 0. Thus real cause of the problem is that TABLE::init does not clean the arrays TABLE::quick_key_parts[] and TABLE::>quick_rows[]. It should have done it because the TABLE structure created for any instance of a table can be reused for many queries. --- mysql-test/r/selectivity.result | 56 ++++++++++++++++++++++++++++++++++ mysql-test/r/selectivity_innodb.result | 56 ++++++++++++++++++++++++++++++++++ mysql-test/t/selectivity.test | 32 +++++++++++++++++++ sql/sql_select.cc | 4 --- sql/table.cc | 21 +++++++++++++ sql/table.h | 1 + 6 files changed, 166 insertions(+), 4 deletions(-) diff --git a/mysql-test/r/selectivity.result b/mysql-test/r/selectivity.result index 5fe6986e9ff..dc7e9494946 100644 --- a/mysql-test/r/selectivity.result +++ b/mysql-test/r/selectivity.result @@ -1810,4 +1810,60 @@ b a a b 9 9 10 10 set optimizer_use_condition_selectivity= @@optimizer_use_condition_selectivity; drop table t1,t2,t3; +# +# MDEV-20519: Query plan regression with optimizer_use_condition_selectivity=4 +# +create table t1 (id int, a int, PRIMARY KEY(id), key(a)); +insert into t1 select seq,seq from seq_1_to_100; +create table t2 (id int, a int, b int, PRIMARY KEY(id), key(a), key(b)); +insert into t2 select seq,seq,seq from seq_1_to_100; +set optimizer_switch='exists_to_in=off'; +set optimizer_use_condition_selectivity=2; +SELECT * FROM t1 +WHERE +EXISTS (SELECT * FROM t1 A INNER JOIN t2 ON t2.a = A.id +WHERE A.a=t1.a AND t2.b < 20); +id a +1 1 +2 2 +3 3 +4 4 +5 5 +6 6 +7 7 +8 8 +9 9 +10 10 +11 11 +12 12 +13 13 +14 14 +15 15 +16 16 +17 17 +18 18 +19 19 +explain SELECT * FROM t1 +WHERE +EXISTS (SELECT * FROM t1 A INNER JOIN t2 ON t2.a = A.id +WHERE A.a=t1.a AND t2.b < 20); +id select_type table type possible_keys key key_len ref rows Extra +1 PRIMARY t1 ALL NULL NULL NULL NULL 100 Using where +2 DEPENDENT SUBQUERY A ref PRIMARY,a a 5 test.t1.a 1 +2 DEPENDENT SUBQUERY t2 ref a,b a 5 test.A.id 1 Using where +EXPLAIN SELECT * FROM t1 A, t1 B WHERE A.a = B.a and A.id = 65; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE A const PRIMARY,a PRIMARY 4 const 1 +1 SIMPLE B ref a a 5 const 1 +explain SELECT * FROM t1 +WHERE +EXISTS (SELECT * FROM t1 A INNER JOIN t2 ON t2.a = A.id +WHERE A.a=t1.a AND t2.b < 20); +id select_type table type possible_keys key key_len ref rows Extra +1 PRIMARY t1 ALL NULL NULL NULL NULL 100 Using where +2 DEPENDENT SUBQUERY A ref PRIMARY,a a 5 test.t1.a 1 +2 DEPENDENT SUBQUERY t2 ref a,b a 5 test.A.id 1 Using where +set optimizer_switch= @save_optimizer_switch; +set optimizer_use_condition_selectivity= @save_optimizer_use_condition_selectivity; +drop table t1,t2; # End of 10.1 tests diff --git a/mysql-test/r/selectivity_innodb.result b/mysql-test/r/selectivity_innodb.result index 70ddd5bf1a9..f0cbf1662aa 100644 --- a/mysql-test/r/selectivity_innodb.result +++ b/mysql-test/r/selectivity_innodb.result @@ -1820,6 +1820,62 @@ b a a b 9 9 10 10 set optimizer_use_condition_selectivity= @@optimizer_use_condition_selectivity; drop table t1,t2,t3; +# +# MDEV-20519: Query plan regression with optimizer_use_condition_selectivity=4 +# +create table t1 (id int, a int, PRIMARY KEY(id), key(a)); +insert into t1 select seq,seq from seq_1_to_100; +create table t2 (id int, a int, b int, PRIMARY KEY(id), key(a), key(b)); +insert into t2 select seq,seq,seq from seq_1_to_100; +set optimizer_switch='exists_to_in=off'; +set optimizer_use_condition_selectivity=2; +SELECT * FROM t1 +WHERE +EXISTS (SELECT * FROM t1 A INNER JOIN t2 ON t2.a = A.id +WHERE A.a=t1.a AND t2.b < 20); +id a +1 1 +2 2 +3 3 +4 4 +5 5 +6 6 +7 7 +8 8 +9 9 +10 10 +11 11 +12 12 +13 13 +14 14 +15 15 +16 16 +17 17 +18 18 +19 19 +explain SELECT * FROM t1 +WHERE +EXISTS (SELECT * FROM t1 A INNER JOIN t2 ON t2.a = A.id +WHERE A.a=t1.a AND t2.b < 20); +id select_type table type possible_keys key key_len ref rows Extra +1 PRIMARY t1 index NULL a 5 NULL 100 Using where; Using index +2 DEPENDENT SUBQUERY A ref PRIMARY,a a 5 test.t1.a 1 Using index +2 DEPENDENT SUBQUERY t2 ref a,b a 5 test.A.id 1 Using where +EXPLAIN SELECT * FROM t1 A, t1 B WHERE A.a = B.a and A.id = 65; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE A const PRIMARY,a PRIMARY 4 const 1 +1 SIMPLE B ref a a 5 const 1 Using index +explain SELECT * FROM t1 +WHERE +EXISTS (SELECT * FROM t1 A INNER JOIN t2 ON t2.a = A.id +WHERE A.a=t1.a AND t2.b < 20); +id select_type table type possible_keys key key_len ref rows Extra +1 PRIMARY t1 index NULL a 5 NULL 100 Using where; Using index +2 DEPENDENT SUBQUERY A ref PRIMARY,a a 5 test.t1.a 1 Using index +2 DEPENDENT SUBQUERY t2 ref a,b a 5 test.A.id 1 Using where +set optimizer_switch= @save_optimizer_switch; +set optimizer_use_condition_selectivity= @save_optimizer_use_condition_selectivity; +drop table t1,t2; # End of 10.1 tests set optimizer_switch=@save_optimizer_switch_for_selectivity_test; set @tmp_ust= @@use_stat_tables; diff --git a/mysql-test/t/selectivity.test b/mysql-test/t/selectivity.test index a93ad5efb07..7df326edb44 100644 --- a/mysql-test/t/selectivity.test +++ b/mysql-test/t/selectivity.test @@ -1234,5 +1234,37 @@ set optimizer_use_condition_selectivity= @@optimizer_use_condition_selectivity; drop table t1,t2,t3; + +--echo # +--echo # MDEV-20519: Query plan regression with optimizer_use_condition_selectivity=4 +--echo # + + + +create table t1 (id int, a int, PRIMARY KEY(id), key(a)); +insert into t1 select seq,seq from seq_1_to_100; + +create table t2 (id int, a int, b int, PRIMARY KEY(id), key(a), key(b)); +insert into t2 select seq,seq,seq from seq_1_to_100; + +set optimizer_switch='exists_to_in=off'; +set optimizer_use_condition_selectivity=2; + +let $query= SELECT * FROM t1 + WHERE + EXISTS (SELECT * FROM t1 A INNER JOIN t2 ON t2.a = A.id + WHERE A.a=t1.a AND t2.b < 20); + +eval $query; +eval explain $query; + +EXPLAIN SELECT * FROM t1 A, t1 B WHERE A.a = B.a and A.id = 65; + +eval explain $query; + +set optimizer_switch= @save_optimizer_switch; +set optimizer_use_condition_selectivity= @save_optimizer_use_condition_selectivity; +drop table t1,t2; + --echo # End of 10.1 tests diff --git a/sql/sql_select.cc b/sql/sql_select.cc index dfc9f729118..55d371bdbfd 100644 --- a/sql/sql_select.cc +++ b/sql/sql_select.cc @@ -7654,7 +7654,6 @@ double table_cond_selectivity(JOIN *join, uint idx, JOIN_TAB *s, something went wrong. */ sel /= (double)table->quick_rows[key] / (double) table->stat_records(); - DBUG_ASSERT(0 < sel && sel <= 2.0); set_if_smaller(sel, 1.0); used_range_selectivity= true; } @@ -7703,7 +7702,6 @@ double table_cond_selectivity(JOIN *join, uint idx, JOIN_TAB *s, if (table->field[fldno]->cond_selectivity > 0) { sel /= table->field[fldno]->cond_selectivity; - DBUG_ASSERT(0 < sel && sel <= 2.0); set_if_smaller(sel, 1.0); } /* @@ -7761,7 +7759,6 @@ double table_cond_selectivity(JOIN *join, uint idx, JOIN_TAB *s, if (field->cond_selectivity > 0) { sel/= field->cond_selectivity; - DBUG_ASSERT(0 < sel && sel <= 2.0); set_if_smaller(sel, 1.0); } break; @@ -7773,7 +7770,6 @@ double table_cond_selectivity(JOIN *join, uint idx, JOIN_TAB *s, sel*= table_multi_eq_cond_selectivity(join, idx, s, rem_tables, keyparts, ref_keyuse_steps); - DBUG_ASSERT(0.0 < sel && sel <= 1.0); return sel; } diff --git a/sql/table.cc b/sql/table.cc index 94cd174ffd7..52e63ea7389 100644 --- a/sql/table.cc +++ b/sql/table.cc @@ -4162,6 +4162,8 @@ void TABLE::init(THD *thd, TABLE_LIST *tl) created= TRUE; cond_selectivity= 1.0; cond_selectivity_sampling_explain= NULL; + quick_condition_rows=0; + initialize_quick_structures(); #ifdef HAVE_REPLICATION /* used in RBR Triggers */ master_had_triggers= 0; @@ -7546,3 +7548,22 @@ bool fk_modifies_child(enum_fk_option opt) static bool can_write[]= { false, false, true, true, false, true }; return can_write[opt]; } + + +/* + @brief + Initialize all the quick structures that are used to stored the + estimates when the range optimizer is run. + @details + This is specifically needed when we read the TABLE structure from the + table cache. There can be some garbage data from previous queries + that need to be reset here. +*/ + +void TABLE::initialize_quick_structures() +{ + bzero(quick_rows, sizeof(quick_rows)); + bzero(quick_key_parts, sizeof(quick_key_parts)); + bzero(quick_costs, sizeof(quick_costs)); + bzero(quick_n_ranges, sizeof(quick_n_ranges)); +} diff --git a/sql/table.h b/sql/table.h index 98ec9f005ea..44803b5aacd 100644 --- a/sql/table.h +++ b/sql/table.h @@ -1450,6 +1450,7 @@ public: } bool update_const_key_parts(COND *conds); + void initialize_quick_structures(); my_ptrdiff_t default_values_offset() const { return (my_ptrdiff_t) (s->default_values - record[0]); } -- cgit v1.2.1