diff options
author | Varun Gupta <varun.gupta@mariadb.com> | 2020-05-20 18:59:52 +0530 |
---|---|---|
committer | Varun Gupta <varun.gupta@mariadb.com> | 2020-11-26 01:20:49 +0530 |
commit | efb171c2eac06508489804601f20c702bca1954c (patch) | |
tree | 7da131efa4776cce13a51b632dad3f6df961ec5e | |
parent | c498250888ec126fddda2867d1239b2a7734482f (diff) | |
download | mariadb-git-10.6-mdev22360.tar.gz |
MDEV-22360: Sufficient conditions for accurate calculation of join cardinality10.6-mdev22360
The aim of this task is to check if the estimate of join cardinality are accurate or not.
The implementation to check if we have the accurate estimate of the join cardinality is a
simple one, we have to walk over the WHERE clause.
The approach can be broken into 2 cases:
Case 1: WHERE clause is an AND conjunct
For an AND item at the top level, we need to walk over all the top level conjuncts and call walk
individually on them. This is done in such a way because for an AND conjunct at the top
level we may have accurate selectivity, even if the predicate belongs to a different column.
Eg: t1.a > 10 and t2.a < 5.
For this AND item we will have accurate selectivities.
For AND conjuncts (not at the top level), the entire conjunct needs to be resolved to one column.
Eg: t1.a = t2.a AND ( (t1.a > 5 AND t2.a < 10) OR t1.a <= 0)
Case 2:
2a) OR item
For an OR item at the top level, we need to make sure that all the columns inside the OR
conjunct need to belong to one column directly or indirectly.
This needs to happen for an OR conjunct even if it is not at the
top level.
Eg: (t1.a > 5 or t1.a < 0);
2b) Single predicate at the top level
Eg:
t1.a= t2.a [ For this case we need to make sure we know number of distinct values for t1.a and t2.a ]
t1.a > 5 [ sargable predicate, get the estimate from the range optimizer ]
We need to make sure that for the predicates in the WHERE clause we have estimates either
from the first component of the index or from the EITS.
The implementation of these is covered with the callback
function passed to walk function.
-rw-r--r-- | mysql-test/include/join_cardinality.inc | 131 | ||||
-rw-r--r-- | mysql-test/main/join_cardinality.result | 960 | ||||
-rw-r--r-- | mysql-test/main/join_cardinality.test | 310 | ||||
-rw-r--r-- | mysql-test/main/opt_trace.result | 18 | ||||
-rw-r--r-- | mysql-test/main/opt_trace_index_merge.result | 1 | ||||
-rw-r--r-- | mysql-test/main/opt_trace_index_merge_innodb.result | 1 | ||||
-rw-r--r-- | sql/field.cc | 200 | ||||
-rw-r--r-- | sql/field.h | 22 | ||||
-rw-r--r-- | sql/item.cc | 213 | ||||
-rw-r--r-- | sql/item.h | 40 | ||||
-rw-r--r-- | sql/item_cmpfunc.cc | 113 | ||||
-rw-r--r-- | sql/item_cmpfunc.h | 10 | ||||
-rw-r--r-- | sql/item_func.h | 6 | ||||
-rw-r--r-- | sql/opt_range.cc | 4 | ||||
-rw-r--r-- | sql/sql_select.cc | 37 | ||||
-rw-r--r-- | sql/sql_select.h | 1 | ||||
-rw-r--r-- | sql/sql_statistics.cc | 38 | ||||
-rw-r--r-- | sql/sql_statistics.h | 1 | ||||
-rw-r--r-- | sql/table.cc | 1 |
19 files changed, 2067 insertions, 40 deletions
diff --git a/mysql-test/include/join_cardinality.inc b/mysql-test/include/join_cardinality.inc new file mode 100644 index 00000000000..1ab6edb75cb --- /dev/null +++ b/mysql-test/include/join_cardinality.inc @@ -0,0 +1,131 @@ +--echo # +--echo # BASIC CASES +--echo # + +EXPLAIN SELECT * FROM t1 WHERE t1.b <> 10; +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; + +EXPLAIN SELECT * FROM t1 WHERE t1.b > 10; +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; + +EXPLAIN SELECT * FROM t1 WHERE t1.b < 10; +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; + +EXPLAIN SELECT * FROM t1 WHERE t1.b = 10; +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; + +EXPLAIN SELECT * FROM t1 WHERE t1.b+2 = 10; +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; + +EXPLAIN SELECT * FROM t1 WHERE a IN (1,2,3); +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; + +EXPLAIN SELECT * FROM t1 WHERE a BETWEEN 1 and 5; +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; + +--echo # Equi-join condition +EXPLAIN SELECT * FROM t1,t2 WHERE t1.a=t2.b; +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; + +--echo # CASE where selectivity estimates are not accurate + +EXPLAIN SELECT * FROM t1 WHERE abs(t1.b) = 10; +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; + +--echo # +--echo # The cardinality here would be not accurate because we don't have +--echo # selectivity for the predicate t1.b=10 +--echo # + +EXPLAIN SELECT * FROM t1 WHERE t1.c = 10; +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; + +--echo # +--echo # BASIC CASES FOR AND CONJUNCT +--echo # + +EXPLAIN SELECT * FROM t1 WHERE a >= 5 and b <= 6; +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; + +EXPLAIN SELECT * FROM t1 WHERE a BETWEEN 1 AND 5 AND b <= 6; +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; + +EXPLAIN SELECT * FROM t1,t2 WHERE t1.a > 5 AND t2.b < 10; +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; + +--echo # +--echo # BASIC CASES FOR OR CONJUNCT +--echo # + +EXPLAIN SELECT * FROM t1 WHERE a >= 5 OR a <= 2; +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; + +EXPLAIN SELECT * FROM t1 WHERE a BETWEEN 1 AND 5 OR a <= 0; +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; + +--echo # The cardinality here would be not accurate because for the +--echo # OR conjunct all predicates can't be resolved to one column + +EXPLAIN SELECT * FROM t1,t2 WHERE t1.a > 5 OR t2.b < 10; +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; + +EXPLAIN SELECT * FROM t1 WHERE a >= 5 OR b <= 6; +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; + +--echo # +--echo # COMPLEX CASES WITH AND-OR CONJUNCTS +--echo # + +EXPLAIN SELECT * from t1,t2 WHERE t1.a=t2.b and (t1.a > 5 or t1.a < 10); +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; + +EXPLAIN SELECT * from t1,t2 WHERE t1.a=t2.b and (t1.a > 5 or t2.b < 10); +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; + +EXPLAIN SELECT * from t1,t2 WHERE t1.a=t2.b and (t1.a > 5 or t2.a < 10); +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; + +EXPLAIN SELECT * FROM t1 +WHERE ((t1.a > 20 and t1.a < 30) or (t1.a > 40 and t1.a < 50)) AND +((t1.a > 25 and t1.a < 35) or (t1.a > 45 and t1.a < 55)) OR +((t1.a > 120 and t1.a < 130) or (t1.a > 140 and t1.a < 150)) AND +((t1.a > 125 and t1.a < 135) or (t1.a > 145 and t1.a < 155)); +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; + +EXPLAIN EXTENDED SELECT * from t1 WHERE ((t1.a IS NULL) or (t1.a = 5)); +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; + +--echo # +--echo # Test with subquery +--echo # + +EXPLAIN SELECT * from t1 WHERE t1.a = (select t2.a from t2 limit 1); +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; + +EXPLAIN SELECT * from t1 +WHERE t1.a = (select t2.a from t2 where t1.b=t2.b limit 1); +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; diff --git a/mysql-test/main/join_cardinality.result b/mysql-test/main/join_cardinality.result new file mode 100644 index 00000000000..f1379c53e1e --- /dev/null +++ b/mysql-test/main/join_cardinality.result @@ -0,0 +1,960 @@ +CREATE TABLE t1(a INT, b INT, c INT, KEY(b), KEY(a)); +INSERT INTO t1 SELECT seq, seq, seq from seq_1_to_100; +CREATE TABLE t2(a INT, b INT, key(b)); +INSERT INTO t2 SELECT seq, seq from seq_1_to_100; +SET @save_optimizer_switch=@@optimizer_switch; +SET optimizer_trace=1; +SET optimizer_switch='rowid_filter=off'; +# +# RUNNING the join cardinality tests when statistics are +# available only from indexes +# +# +# BASIC CASES +# +EXPLAIN SELECT * FROM t1 WHERE t1.b <> 10; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ALL b NULL NULL NULL 100 Using where +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; +JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +[ + true +] +EXPLAIN SELECT * FROM t1 WHERE t1.b > 10; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ALL b NULL NULL NULL 100 Using where +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; +JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +[ + true +] +EXPLAIN SELECT * FROM t1 WHERE t1.b < 10; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 range b b 5 NULL 5 Using index condition +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; +JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +[ + true +] +EXPLAIN SELECT * FROM t1 WHERE t1.b = 10; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ref b b 5 const 1 +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; +JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +[ + true +] +EXPLAIN SELECT * FROM t1 WHERE t1.b+2 = 10; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 100 Using where +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; +JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +[ + false +] +EXPLAIN SELECT * FROM t1 WHERE a IN (1,2,3); +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 range a a 5 NULL 3 Using index condition +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; +JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +[ + true +] +EXPLAIN SELECT * FROM t1 WHERE a BETWEEN 1 and 5; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 range a a 5 NULL 3 Using index condition +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; +JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +[ + true +] +# Equi-join condition +EXPLAIN SELECT * FROM t1,t2 WHERE t1.a=t2.b; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t2 ALL b NULL NULL NULL 100 Using where +1 SIMPLE t1 ref a a 5 test.t2.b 1 +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; +JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +[ + true +] +# CASE where selectivity estimates are not accurate +EXPLAIN SELECT * FROM t1 WHERE abs(t1.b) = 10; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 100 Using where +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; +JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +[ + false +] +# +# The cardinality here would be not accurate because we don't have +# selectivity for the predicate t1.b=10 +# +EXPLAIN SELECT * FROM t1 WHERE t1.c = 10; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 100 Using where +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; +JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +[ + false +] +# +# BASIC CASES FOR AND CONJUNCT +# +EXPLAIN SELECT * FROM t1 WHERE a >= 5 and b <= 6; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 range b,a b 5 NULL 3 Using index condition; Using where +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; +JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +[ + true +] +EXPLAIN SELECT * FROM t1 WHERE a BETWEEN 1 AND 5 AND b <= 6; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 range b,a b 5 NULL 3 Using index condition; Using where +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; +JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +[ + true +] +EXPLAIN SELECT * FROM t1,t2 WHERE t1.a > 5 AND t2.b < 10; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t2 range b b 5 NULL 5 Using index condition +1 SIMPLE t1 ALL a NULL NULL NULL 100 Using where; Using join buffer (flat, BNL join) +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; +JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +[ + true +] +# +# BASIC CASES FOR OR CONJUNCT +# +EXPLAIN SELECT * FROM t1 WHERE a >= 5 OR a <= 2; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ALL a NULL NULL NULL 100 Using where +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; +JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +[ + true +] +EXPLAIN SELECT * FROM t1 WHERE a BETWEEN 1 AND 5 OR a <= 0; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 range a a 5 NULL 4 Using index condition +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; +JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +[ + true +] +# The cardinality here would be not accurate because for the +# OR conjunct all predicates can't be resolved to one column +EXPLAIN SELECT * FROM t1,t2 WHERE t1.a > 5 OR t2.b < 10; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ALL a NULL NULL NULL 100 +1 SIMPLE t2 ALL b NULL NULL NULL 100 Using where; Using join buffer (flat, BNL join) +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; +JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +[ + false +] +EXPLAIN SELECT * FROM t1 WHERE a >= 5 OR b <= 6; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ALL b,a NULL NULL NULL 100 Using where +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; +JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +[ + false +] +# +# COMPLEX CASES WITH AND-OR CONJUNCTS +# +EXPLAIN SELECT * from t1,t2 WHERE t1.a=t2.b and (t1.a > 5 or t1.a < 10); +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t2 ALL b NULL NULL NULL 100 Using where +1 SIMPLE t1 ref a a 5 test.t2.b 1 +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; +JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +[ + true +] +EXPLAIN SELECT * from t1,t2 WHERE t1.a=t2.b and (t1.a > 5 or t2.b < 10); +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t2 ALL b NULL NULL NULL 100 Using where +1 SIMPLE t1 ref a a 5 test.t2.b 1 +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; +JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +[ + true +] +EXPLAIN SELECT * from t1,t2 WHERE t1.a=t2.b and (t1.a > 5 or t2.a < 10); +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t2 ALL b NULL NULL NULL 100 Using where +1 SIMPLE t1 ref a a 5 test.t2.b 1 +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; +JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +[ + false +] +EXPLAIN SELECT * FROM t1 +WHERE ((t1.a > 20 and t1.a < 30) or (t1.a > 40 and t1.a < 50)) AND +((t1.a > 25 and t1.a < 35) or (t1.a > 45 and t1.a < 55)) OR +((t1.a > 120 and t1.a < 130) or (t1.a > 140 and t1.a < 150)) AND +((t1.a > 125 and t1.a < 135) or (t1.a > 145 and t1.a < 155)); +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 range a a 5 NULL 6 Using index condition +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; +JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +[ + true +] +EXPLAIN EXTENDED SELECT * from t1 WHERE ((t1.a IS NULL) or (t1.a = 5)); +id select_type table type possible_keys key key_len ref rows filtered Extra +1 SIMPLE t1 ref_or_null a a 5 const 2 100.00 Using index condition +Warnings: +Note 1003 select `test`.`t1`.`a` AS `a`,`test`.`t1`.`b` AS `b`,`test`.`t1`.`c` AS `c` from `test`.`t1` where `test`.`t1`.`a` is null or `test`.`t1`.`a` = 5 +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; +JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +[ + true +] +# +# Test with subquery +# +EXPLAIN SELECT * from t1 WHERE t1.a = (select t2.a from t2 limit 1); +id select_type table type possible_keys key key_len ref rows Extra +1 PRIMARY t1 ref a a 5 const 1 Using where +2 SUBQUERY t2 ALL NULL NULL NULL NULL 100 +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; +JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +[ + true +] +EXPLAIN SELECT * from t1 +WHERE t1.a = (select t2.a from t2 where t1.b=t2.b limit 1); +id select_type table type possible_keys key key_len ref rows Extra +1 PRIMARY t1 ALL NULL NULL NULL NULL 100 Using where +2 DEPENDENT SUBQUERY t2 ref b b 5 test.t1.b 1 +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; +JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +[ + false, + true +] +DROP TABLE t1,t2; +CREATE TABLE t1(a INT, b INT, c INT); +INSERT INTO t1 SELECT seq, seq, seq from seq_1_to_100; +CREATE TABLE t2(a INT, b INT); +INSERT INTO t2 SELECT seq, seq from seq_1_to_100; +# +# RUNNING the join cardinality tests when only statistics +# are available from stat tables +# +# +# BASIC CASES +# +EXPLAIN SELECT * FROM t1 WHERE t1.b <> 10; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 100 Using where +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; +JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +[ + true +] +EXPLAIN SELECT * FROM t1 WHERE t1.b > 10; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 100 Using where +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; +JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +[ + true +] +EXPLAIN SELECT * FROM t1 WHERE t1.b < 10; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 100 Using where +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; +JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +[ + true +] +EXPLAIN SELECT * FROM t1 WHERE t1.b = 10; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 100 Using where +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; +JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +[ + true +] +EXPLAIN SELECT * FROM t1 WHERE t1.b+2 = 10; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 100 Using where +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; +JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +[ + false +] +EXPLAIN SELECT * FROM t1 WHERE a IN (1,2,3); +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 100 Using where +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; +JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +[ + true +] +EXPLAIN SELECT * FROM t1 WHERE a BETWEEN 1 and 5; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 100 Using where +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; +JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +[ + true +] +# Equi-join condition +EXPLAIN SELECT * FROM t1,t2 WHERE t1.a=t2.b; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 100 +1 SIMPLE t2 ALL NULL NULL NULL NULL 100 Using where; Using join buffer (flat, BNL join) +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; +JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +[ + true +] +# CASE where selectivity estimates are not accurate +EXPLAIN SELECT * FROM t1 WHERE abs(t1.b) = 10; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 100 Using where +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; +JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +[ + false +] +# +# The cardinality here would be not accurate because we don't have +# selectivity for the predicate t1.b=10 +# +EXPLAIN SELECT * FROM t1 WHERE t1.c = 10; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 100 Using where +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; +JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +[ + false +] +# +# BASIC CASES FOR AND CONJUNCT +# +EXPLAIN SELECT * FROM t1 WHERE a >= 5 and b <= 6; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 100 Using where +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; +JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +[ + true +] +EXPLAIN SELECT * FROM t1 WHERE a BETWEEN 1 AND 5 AND b <= 6; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 100 Using where +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; +JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +[ + true +] +EXPLAIN SELECT * FROM t1,t2 WHERE t1.a > 5 AND t2.b < 10; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t2 ALL NULL NULL NULL NULL 100 Using where +1 SIMPLE t1 ALL NULL NULL NULL NULL 100 Using where; Using join buffer (flat, BNL join) +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; +JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +[ + true +] +# +# BASIC CASES FOR OR CONJUNCT +# +EXPLAIN SELECT * FROM t1 WHERE a >= 5 OR a <= 2; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 100 Using where +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; +JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +[ + true +] +EXPLAIN SELECT * FROM t1 WHERE a BETWEEN 1 AND 5 OR a <= 0; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 100 Using where +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; +JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +[ + true +] +# The cardinality here would be not accurate because for the +# OR conjunct all predicates can't be resolved to one column +EXPLAIN SELECT * FROM t1,t2 WHERE t1.a > 5 OR t2.b < 10; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 100 +1 SIMPLE t2 ALL NULL NULL NULL NULL 100 Using where; Using join buffer (flat, BNL join) +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; +JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +[ + false +] +EXPLAIN SELECT * FROM t1 WHERE a >= 5 OR b <= 6; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 100 Using where +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; +JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +[ + false +] +# +# COMPLEX CASES WITH AND-OR CONJUNCTS +# +EXPLAIN SELECT * from t1,t2 WHERE t1.a=t2.b and (t1.a > 5 or t1.a < 10); +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 100 Using where +1 SIMPLE t2 ALL NULL NULL NULL NULL 100 Using where; Using join buffer (flat, BNL join) +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; +JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +[ + true +] +EXPLAIN SELECT * from t1,t2 WHERE t1.a=t2.b and (t1.a > 5 or t2.b < 10); +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 100 Using where +1 SIMPLE t2 ALL NULL NULL NULL NULL 100 Using where; Using join buffer (flat, BNL join) +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; +JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +[ + true +] +EXPLAIN SELECT * from t1,t2 WHERE t1.a=t2.b and (t1.a > 5 or t2.a < 10); +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 100 +1 SIMPLE t2 ALL NULL NULL NULL NULL 100 Using where; Using join buffer (flat, BNL join) +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; +JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +[ + false +] +EXPLAIN SELECT * FROM t1 +WHERE ((t1.a > 20 and t1.a < 30) or (t1.a > 40 and t1.a < 50)) AND +((t1.a > 25 and t1.a < 35) or (t1.a > 45 and t1.a < 55)) OR +((t1.a > 120 and t1.a < 130) or (t1.a > 140 and t1.a < 150)) AND +((t1.a > 125 and t1.a < 135) or (t1.a > 145 and t1.a < 155)); +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 100 Using where +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; +JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +[ + true +] +EXPLAIN EXTENDED SELECT * from t1 WHERE ((t1.a IS NULL) or (t1.a = 5)); +id select_type table type possible_keys key key_len ref rows filtered Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 100 1.00 Using where +Warnings: +Note 1003 select `test`.`t1`.`a` AS `a`,`test`.`t1`.`b` AS `b`,`test`.`t1`.`c` AS `c` from `test`.`t1` where `test`.`t1`.`a` is null or `test`.`t1`.`a` = 5 +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; +JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +[ + true +] +# +# Test with subquery +# +EXPLAIN SELECT * from t1 WHERE t1.a = (select t2.a from t2 limit 1); +id select_type table type possible_keys key key_len ref rows Extra +1 PRIMARY t1 ALL NULL NULL NULL NULL 100 Using where +2 SUBQUERY t2 ALL NULL NULL NULL NULL 100 +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; +JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +[ + true +] +EXPLAIN SELECT * from t1 +WHERE t1.a = (select t2.a from t2 where t1.b=t2.b limit 1); +id select_type table type possible_keys key key_len ref rows Extra +1 PRIMARY t1 ALL NULL NULL NULL NULL 100 Using where +2 DEPENDENT SUBQUERY t2 ALL NULL NULL NULL NULL 100 Using where +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; +JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +[ + false, + true +] +DROP TABLE t1,t2; +# +# Combination with statistics from stat tables and +# statistics from indexes +# +CREATE TABLE t1(a INT, b INT, KEY(a)); +INSERT INTO t1 SELECT seq, seq from seq_1_to_100; +ANALYZE SELECT * from t1 WHERE t1.b > 3 AND t1.a=t1.b; +id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra +1 SIMPLE t1 ALL a NULL NULL NULL 100 100.00 98.00 97.00 Using where +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; +JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +[ + false +] +ANALYZE TABLE t1 PERSISTENT FOR COLUMNS (b) INDEXES (); +Table Op Msg_type Msg_text +test.t1 analyze status Engine-independent statistics collected +test.t1 analyze status Table is already up to date +ANALYZE SELECT * from t1 WHERE t1.b > 3 AND t1.a=10; +id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra +1 SIMPLE t1 ref a a 5 const 1 1.00 97.66 100.00 Using where +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; +JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +[ + true +] +ANALYZE SELECT * from t1 WHERE t1.b > 3 AND t1.a=t1.b; +id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra +1 SIMPLE t1 ALL a NULL NULL NULL 100 100.00 95.70 97.00 Using where +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; +JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +[ + true +] +ANALYZE SELECT * from t1 WHERE t1.b > 3 OR t1.a=t1.b; +id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 100 100.00 100.00 100.00 Using where +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; +JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +[ + false +] +DROP TABLE t1; +# +# Tests with simple predicates +# +CREATE TABLE t1(a INT, b INT, c VARCHAR(10), KEY(a), KEY(b)); +INSERT INTO t1 SELECT seq, seq, seq from seq_1_to_10; +# +# Tests with LIKE PREDICATES +# +EXPLAIN SELECT * from t1 WHERE t1.a LIKE '%2%'; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 10 Using where +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; +JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +[ + false +] +EXPLAIN SELECT * from t1 WHERE t1.a LIKE '2%'; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ALL a NULL NULL NULL 10 Using where +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; +JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +[ + true +] +# +# Tests with NULL predicates +# +EXPLAIN SELECT * from t1 WHERE t1.a IS NULL; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ref a a 5 const 1 Using index condition +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; +JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +[ + true +] +EXPLAIN SELECT * from t1 WHERE t1.a IS NOT NULL; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ALL a NULL NULL NULL 10 Using where +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; +JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +[ + true +] +# +# Tests with </<=/>/>=/BETWEEN +# +EXPLAIN SELECT * from t1 WHERE t1.a > 5; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ALL a NULL NULL NULL 10 Using where +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; +JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +[ + true +] +EXPLAIN SELECT * from t1 WHERE 5 < t1.a; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ALL a NULL NULL NULL 10 Using where +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; +JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +[ + true +] +EXPLAIN SELECT * from t1 WHERE t1.a >= 10 and t1.b <= 10; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 range a,b a 5 NULL 1 Using index condition; Using where +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; +JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +[ + true +] +EXPLAIN SELECT * from t1 WHERE t1.a BETWEEN 1 AND 5; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ALL a NULL NULL NULL 10 Using where +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; +JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +[ + true +] +EXPLAIN SELECT * from t1 WHERE 5 BETWEEN 1 AND t1.a; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ALL a NULL NULL NULL 10 Using where +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; +JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +[ + true +] +EXPLAIN SELECT * from t1 WHERE 1 BETWEEN t1.a AND t1.b; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 range a,b a 5 NULL 1 Using where +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; +JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +[ + false +] +# +# Tests with IN predicate +# +EXPLAIN SELECT * from t1 WHERE t1.a IN (1,2,3); +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 range a a 5 NULL 3 Using index condition +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; +JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +[ + true +] +EXPLAIN SELECT * from t1 WHERE (t1.a,t1.b) IN ((1,1), (2,2), (3,3)); +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 index_merge a,b a,a,a 5,5,5 NULL 3 Using union(a,a,a); Using where +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; +JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +[ + false +] +EXPLAIN SELECT * from t1 WHERE +t1.a=t1.b AND (t1.a,t1.b) IN ((1,1), (2,2), (3,3)); +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 index_merge a,b a,a,a 5,5,5 NULL 3 Using union(a,a,a); Using where +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; +JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +[ + false +] +EXPLAIN SELECT * from t1 +WHERE t1.a=t1.b AND (abs(t1.a),t1.b) IN ((1,1), (2,2), (3,3)); +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 range a,b b 5 NULL 3 Using where +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; +JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +[ + false +] +# +# Tests using different functional predicates +# +EXPLAIN SELECT * from t1 WHERE a < 5 XOR a > 10; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 10 Using where +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; +JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +[ + false +] +EXPLAIN SELECT * from t1 WHERE abs(a) > 10; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 10 Using where +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; +JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +[ + false +] +EXPLAIN SELECT * from t1 WHERE a > 10+1; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 range a a 5 NULL 1 Using index condition +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; +JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +[ + true +] +DROP TABLE t1; +# +# Tests when updating stats tables manually +# +CREATE TABLE t1(a INT, b INT); +INSERT INTO t1 SELECT seq, seq from seq_1_to_100; +INSERT INTO +mysql.column_stats(db_name, table_name, column_name, nulls_ratio) VALUES ('test', 't1', 'a', 0); +# +# estimates from stat tables not available, all the fields should be NOT NULL +# +EXPLAIN SELECT * from t1 WHERE a < 10; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 100 Using where +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; +JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +[ + false +] +DROP TABLE t1; +# +# Using multiple equalities at the top level +# +CREATE TABLE t1(a INT, b INT); +INSERT INTO t1 SELECT seq, seq from seq_1_to_10; +CREATE TABLE t2(a INT, b INT); +INSERT INTO t2 SELECT seq, seq from seq_1_to_10; +ANALYZE TABLE t1 PERSISTENT FOR ALL; +Table Op Msg_type Msg_text +test.t1 analyze status Engine-independent statistics collected +test.t1 analyze status OK +ANALYZE TABLE t2 PERSISTENT FOR ALL; +Table Op Msg_type Msg_text +test.t2 analyze status Engine-independent statistics collected +test.t2 analyze status OK +EXPLAIN +SELECT * FROM t1,t2 WHERE t1.a= t2.a AND t1.b= t2.b; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 10 +1 SIMPLE t2 ALL NULL NULL NULL NULL 10 Using where; Using join buffer (flat, BNL join) +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; +JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +[ + true +] +EXPLAIN +SELECT * FROM t1,t2 WHERE t1.a= t2.a AND t1.b= t2.b; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 10 +1 SIMPLE t2 ALL NULL NULL NULL NULL 10 Using where; Using join buffer (flat, BNL join) +# +# Tests with views (using Item_direct_view_ref) +# +CREATE VIEW v1 AS SELECT a, b FROM t1; +EXPLAIN SELECT * FROM v1,t1 where v1.b = 10 AND v1.b=t1.b; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 10 Using where +1 SIMPLE t1 ALL NULL NULL NULL NULL 10 Using where; Using join buffer (flat, BNL join) +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; +JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +[ + true +] +DROP TABLE t1,t2; +DROP VIEW v1; +# +# TESTS to get ndv from rec_per_key +# +CREATE TABLE t1 (a INT, b INT); +INSERT INTO t1 SELECT seq, seq FROM seq_1_to_10; +ALTER TABLE t1 ADD KEY k1(a); +set @save_use_stat_tables= @@use_stat_tables; +set use_stat_tables='never'; +analyze table t1; +Table Op Msg_type Msg_text +test.t1 analyze status OK +CREATE TABLE t2 like t1; +INSERT INTO t2 SELECT * FROM t1; +ANALYZE TABLE t2 PERSISTENT FOR ALL; +Table Op Msg_type Msg_text +test.t2 analyze status Engine-independent statistics collected +test.t2 analyze status Table is already up to date +EXPLAIN SELECT * FROM t1, t2 where t1.a=t2.a and t2.a<3; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 range k1 k1 5 NULL 2 Using index condition +1 SIMPLE t2 ref k1 k1 5 test.t1.a 1 +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; +JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +[ + true +] +set use_stat_tables=@save_use_stat_tables; +DROP TABLE t1,t2; +# +# Tests with multi-component keys +# +CREATE TABLE t1 (a INT, b INT, KEY(a,b)); +INSERT INTO t1 SELECT seq, seq FROM seq_1_to_10; +EXPLAIN SELECT * FROM t1 where t1.a<3 AND t1.b < 10; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 range a a 5 NULL 2 Using where; Using index +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; +JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +[ + false +] +ALTER TABLE t1 ADD KEY k1(a); +EXPLAIN SELECT * FROM t1 where t1.a<3; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 range a,k1 a 5 NULL 2 Using where; Using index +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; +JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +[ + true +] +DROP TABLE t1; +# +# Tests when statistics from stat tables cannot be used because of the system variables +# use_stat_tables and optimizer_use_condition_selectivity +# +CREATE TABLE t1(a INT, b INT, c INT); +INSERT INTO t1 SELECT seq, seq, seq from seq_1_to_10; +CREATE TABLE t2(a INT, b INT); +INSERT INTO t2 SELECT seq, seq from seq_1_to_10; +ANALYZE TABLE t1 PERSISTENT FOR ALL; +Table Op Msg_type Msg_text +test.t1 analyze status Engine-independent statistics collected +test.t1 analyze status OK +ANALYZE TABLE t2 PERSISTENT FOR ALL; +Table Op Msg_type Msg_text +test.t2 analyze status Engine-independent statistics collected +test.t2 analyze status OK +SELECT * FROM t1; +a b c +1 1 1 +2 2 2 +3 3 3 +4 4 4 +5 5 5 +6 6 6 +7 7 7 +8 8 8 +9 9 9 +10 10 10 +SELECT * FROM t2; +a b +1 1 +2 2 +3 3 +4 4 +5 5 +6 6 +7 7 +8 8 +9 9 +10 10 +SET @save_use_stat_tables= @@use_stat_tables; +SET @save_optimizer_use_condition_selectivity= @@optimizer_use_condition_selectivity; +SET use_stat_tables='never'; +EXPLAIN SELECT * FROM t1,t2 WHERE t1.a=t2.b; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 10 +1 SIMPLE t2 ALL NULL NULL NULL NULL 10 Using where; Using join buffer (flat, BNL join) +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; +JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +[ + false +] +EXPLAIN SELECT * FROM t1 WHERE t1.a > 10; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 10 Using where +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; +JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +[ + false +] +SET use_stat_tables= @save_use_stat_tables; +EXPLAIN SELECT * FROM t1 WHERE t1.a > 10; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 10 Using where +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; +JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +[ + true +] +set optimizer_use_condition_selectivity=1; +EXPLAIN SELECT * FROM t1 WHERE t1.a > 10; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 10 Using where +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; +JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +[ + false +] +SET optimizer_use_condition_selectivity= @save_optimizer_use_condition_selectivity; +DROP TABLE t1,t2; +SET optimizer_switch=@save_optimizer_switch; diff --git a/mysql-test/main/join_cardinality.test b/mysql-test/main/join_cardinality.test new file mode 100644 index 00000000000..ae67350edca --- /dev/null +++ b/mysql-test/main/join_cardinality.test @@ -0,0 +1,310 @@ +--source include/have_sequence.inc + +CREATE TABLE t1(a INT, b INT, c INT, KEY(b), KEY(a)); +INSERT INTO t1 SELECT seq, seq, seq from seq_1_to_100; + +CREATE TABLE t2(a INT, b INT, key(b)); +INSERT INTO t2 SELECT seq, seq from seq_1_to_100; + + +SET @save_optimizer_switch=@@optimizer_switch; +SET optimizer_trace=1; +SET optimizer_switch='rowid_filter=off'; + +--echo # +--echo # RUNNING the join cardinality tests when statistics are +--echo # available only from indexes +--echo # + +--source include/join_cardinality.inc + +DROP TABLE t1,t2; + +CREATE TABLE t1(a INT, b INT, c INT); +INSERT INTO t1 SELECT seq, seq, seq from seq_1_to_100; + +CREATE TABLE t2(a INT, b INT); +INSERT INTO t2 SELECT seq, seq from seq_1_to_100; + +--disable_result_log +--disable_query_log +ANALYZE TABLE t1 PERSISTENT FOR COLUMNS(a,b) INDEXES(); +ANALYZE TABLE t2 PERSISTENT FOR ALL; +--enable_query_log +--enable_result_log + +--echo # +--echo # RUNNING the join cardinality tests when only statistics +--echo # are available from stat tables +--echo # + +--source include/join_cardinality.inc + +DROP TABLE t1,t2; + + +--echo # +--echo # Combination with statistics from stat tables and +--echo # statistics from indexes +--echo # + +CREATE TABLE t1(a INT, b INT, KEY(a)); +INSERT INTO t1 SELECT seq, seq from seq_1_to_100; + +ANALYZE SELECT * from t1 WHERE t1.b > 3 AND t1.a=t1.b; +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; + +ANALYZE TABLE t1 PERSISTENT FOR COLUMNS (b) INDEXES (); + +ANALYZE SELECT * from t1 WHERE t1.b > 3 AND t1.a=10; +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; + +ANALYZE SELECT * from t1 WHERE t1.b > 3 AND t1.a=t1.b; +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; + +ANALYZE SELECT * from t1 WHERE t1.b > 3 OR t1.a=t1.b; +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; + +DROP TABLE t1; + +--echo # +--echo # Tests with simple predicates +--echo # + +CREATE TABLE t1(a INT, b INT, c VARCHAR(10), KEY(a), KEY(b)); +INSERT INTO t1 SELECT seq, seq, seq from seq_1_to_10; + +--echo # +--echo # Tests with LIKE PREDICATES +--echo # + +EXPLAIN SELECT * from t1 WHERE t1.a LIKE '%2%'; +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; +EXPLAIN SELECT * from t1 WHERE t1.a LIKE '2%'; +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; + +--echo # +--echo # Tests with NULL predicates +--echo # + +EXPLAIN SELECT * from t1 WHERE t1.a IS NULL; +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; + +EXPLAIN SELECT * from t1 WHERE t1.a IS NOT NULL; +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; + +--echo # +--echo # Tests with </<=/>/>=/BETWEEN +--echo # + +EXPLAIN SELECT * from t1 WHERE t1.a > 5; +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; + +EXPLAIN SELECT * from t1 WHERE 5 < t1.a; +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; + +EXPLAIN SELECT * from t1 WHERE t1.a >= 10 and t1.b <= 10; +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; + +EXPLAIN SELECT * from t1 WHERE t1.a BETWEEN 1 AND 5; +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; + +EXPLAIN SELECT * from t1 WHERE 5 BETWEEN 1 AND t1.a; +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; + +EXPLAIN SELECT * from t1 WHERE 1 BETWEEN t1.a AND t1.b; +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; + +--echo # +--echo # Tests with IN predicate +--echo # + +EXPLAIN SELECT * from t1 WHERE t1.a IN (1,2,3); +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; + +EXPLAIN SELECT * from t1 WHERE (t1.a,t1.b) IN ((1,1), (2,2), (3,3)); +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; + +EXPLAIN SELECT * from t1 WHERE +t1.a=t1.b AND (t1.a,t1.b) IN ((1,1), (2,2), (3,3)); +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; + +EXPLAIN SELECT * from t1 +WHERE t1.a=t1.b AND (abs(t1.a),t1.b) IN ((1,1), (2,2), (3,3)); +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; + +--echo # +--echo # Tests using different functional predicates +--echo # + +EXPLAIN SELECT * from t1 WHERE a < 5 XOR a > 10; +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; + +EXPLAIN SELECT * from t1 WHERE abs(a) > 10; +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; + +EXPLAIN SELECT * from t1 WHERE a > 10+1; +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; + +DROP TABLE t1; + +--echo # +--echo # Tests when updating stats tables manually +--echo # + +CREATE TABLE t1(a INT, b INT); +INSERT INTO t1 SELECT seq, seq from seq_1_to_100; + +INSERT INTO +mysql.column_stats(db_name, table_name, column_name, nulls_ratio) VALUES ('test', 't1', 'a', 0); + +--echo # +--echo # estimates from stat tables not available, all the fields should be NOT NULL +--echo # + +EXPLAIN SELECT * from t1 WHERE a < 10; +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; + +DROP TABLE t1; + +--echo # +--echo # Using multiple equalities at the top level +--echo # + +CREATE TABLE t1(a INT, b INT); +INSERT INTO t1 SELECT seq, seq from seq_1_to_10; +CREATE TABLE t2(a INT, b INT); +INSERT INTO t2 SELECT seq, seq from seq_1_to_10; + +ANALYZE TABLE t1 PERSISTENT FOR ALL; +ANALYZE TABLE t2 PERSISTENT FOR ALL; + +EXPLAIN +SELECT * FROM t1,t2 WHERE t1.a= t2.a AND t1.b= t2.b; +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; + +EXPLAIN +SELECT * FROM t1,t2 WHERE t1.a= t2.a AND t1.b= t2.b; + +--echo # +--echo # Tests with views (using Item_direct_view_ref) +--echo # + +CREATE VIEW v1 AS SELECT a, b FROM t1; + +EXPLAIN SELECT * FROM v1,t1 where v1.b = 10 AND v1.b=t1.b; +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; + +DROP TABLE t1,t2; +DROP VIEW v1; + + +--echo # +--echo # TESTS to get ndv from rec_per_key +--echo # + +CREATE TABLE t1 (a INT, b INT); +INSERT INTO t1 SELECT seq, seq FROM seq_1_to_10; +ALTER TABLE t1 ADD KEY k1(a); + +set @save_use_stat_tables= @@use_stat_tables; +set use_stat_tables='never'; +analyze table t1; + +CREATE TABLE t2 like t1; +INSERT INTO t2 SELECT * FROM t1; +ANALYZE TABLE t2 PERSISTENT FOR ALL; +EXPLAIN SELECT * FROM t1, t2 where t1.a=t2.a and t2.a<3; +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; +set use_stat_tables=@save_use_stat_tables; + +DROP TABLE t1,t2; + +--echo # +--echo # Tests with multi-component keys +--echo # + +CREATE TABLE t1 (a INT, b INT, KEY(a,b)); +INSERT INTO t1 SELECT seq, seq FROM seq_1_to_10; + +EXPLAIN SELECT * FROM t1 where t1.a<3 AND t1.b < 10; +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; + +ALTER TABLE t1 ADD KEY k1(a); +EXPLAIN SELECT * FROM t1 where t1.a<3; +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; + +DROP TABLE t1; + +--echo # +--echo # Tests when statistics from stat tables cannot be used because of the system variables +--echo # use_stat_tables and optimizer_use_condition_selectivity +--echo # + +CREATE TABLE t1(a INT, b INT, c INT); +INSERT INTO t1 SELECT seq, seq, seq from seq_1_to_10; +CREATE TABLE t2(a INT, b INT); +INSERT INTO t2 SELECT seq, seq from seq_1_to_10; + +ANALYZE TABLE t1 PERSISTENT FOR ALL; +ANALYZE TABLE t2 PERSISTENT FOR ALL; + +SELECT * FROM t1; +SELECT * FROM t2; + +SET @save_use_stat_tables= @@use_stat_tables; +SET @save_optimizer_use_condition_selectivity= @@optimizer_use_condition_selectivity; +SET use_stat_tables='never'; + +EXPLAIN SELECT * FROM t1,t2 WHERE t1.a=t2.b; +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; + +EXPLAIN SELECT * FROM t1 WHERE t1.a > 10; +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; + +SET use_stat_tables= @save_use_stat_tables; + +EXPLAIN SELECT * FROM t1 WHERE t1.a > 10; +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; + +set optimizer_use_condition_selectivity=1; + +EXPLAIN SELECT * FROM t1 WHERE t1.a > 10; +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; + +SET optimizer_use_condition_selectivity= @save_optimizer_use_condition_selectivity; + +DROP TABLE t1,t2; + +SET optimizer_switch=@save_optimizer_switch; diff --git a/mysql-test/main/opt_trace.result b/mysql-test/main/opt_trace.result index 8f4f6ec97ce..c10018adab6 100644 --- a/mysql-test/main/opt_trace.result +++ b/mysql-test/main/opt_trace.result @@ -124,6 +124,7 @@ select * from v1 { ] }, { + "cardinality_accurate": true, "considered_execution_plans": [ { "plan_prefix": [], @@ -270,6 +271,7 @@ select * from (select * from t1 where t1.a=1)q { ] }, { + "cardinality_accurate": true, "considered_execution_plans": [ { "plan_prefix": [], @@ -421,6 +423,7 @@ select * from v2 { ] }, { + "cardinality_accurate": true, "considered_execution_plans": [ { "plan_prefix": [], @@ -985,6 +988,7 @@ explain select * from t1,t2 where t1.a=t2.b+2 and t2.a= t1.b { ] }, { + "cardinality_accurate": false, "considered_execution_plans": [ { "plan_prefix": [], @@ -1434,6 +1438,7 @@ EXPLAIN SELECT MIN(d) FROM t1 where b=2 and c=3 group by a { ] }, { + "cardinality_accurate": true, "considered_execution_plans": [ { "plan_prefix": [], @@ -1628,6 +1633,7 @@ EXPLAIN SELECT id,MIN(a),MAX(a) FROM t1 WHERE a>=20010104e0 GROUP BY id { ] }, { + "cardinality_accurate": false, "considered_execution_plans": [ { "plan_prefix": [], @@ -1811,6 +1817,7 @@ EXPLAIN SELECT * FROM t1 WHERE a = 20010104e0 GROUP BY id { ] }, { + "cardinality_accurate": false, "considered_execution_plans": [ { "plan_prefix": [], @@ -2076,6 +2083,7 @@ explain select * from t1 where a=1 and b=2 order by c limit 1 { ] }, { + "cardinality_accurate": true, "considered_execution_plans": [ { "plan_prefix": [], @@ -2848,6 +2856,7 @@ explain extended select * from t1 where a in (select pk from t10) { "execution_plan_for_potential_materialization": { "steps": [ { + "cardinality_accurate": true, "considered_execution_plans": [ { "plan_prefix": [], @@ -2878,6 +2887,7 @@ explain extended select * from t1 where a in (select pk from t10) { } }, { + "cardinality_accurate": true, "considered_execution_plans": [ { "plan_prefix": [], @@ -3296,6 +3306,7 @@ explain select * from t1 where pk = 2 and a=5 and b=1 { ] }, { + "cardinality_accurate": true, "considered_execution_plans": [ { "plan_prefix": [], @@ -3924,6 +3935,7 @@ explain delete t0,t1 from t0, t1 where t0.a=t1.a and t1.a<3 { ] }, { + "cardinality_accurate": false, "considered_execution_plans": [ { "plan_prefix": [], @@ -4422,6 +4434,7 @@ explain select * from t1 where a in (select t_inner_1.a from t1 t_inner_1, t1 t_ "execution_plan_for_potential_materialization": { "steps": [ { + "cardinality_accurate": false, "considered_execution_plans": [ { "plan_prefix": [], @@ -4499,6 +4512,7 @@ explain select * from t1 where a in (select t_inner_1.a from t1 t_inner_1, t1 t_ } }, { + "cardinality_accurate": false, "considered_execution_plans": [ { "plan_prefix": [], @@ -4948,6 +4962,7 @@ t_outer_2.a in (select t_inner_3.a from t2 t_inner_3, t1 t_inner_4) { } }, { + "cardinality_accurate": false, "considered_execution_plans": [ { "plan_prefix": [], @@ -6521,6 +6536,7 @@ t_outer_2.a in (select t_inner_3.a from t2 t_inner_3, t1 t_inner_4) { "execution_plan_for_potential_materialization": { "steps": [ { + "cardinality_accurate": false, "considered_execution_plans": [ { "plan_prefix": [], @@ -6595,6 +6611,7 @@ t_outer_2.a in (select t_inner_3.a from t2 t_inner_3, t1 t_inner_4) { ] }, { + "cardinality_accurate": false, "considered_execution_plans": [ { "plan_prefix": [], @@ -6672,6 +6689,7 @@ t_outer_2.a in (select t_inner_3.a from t2 t_inner_3, t1 t_inner_4) { } }, { + "cardinality_accurate": false, "considered_execution_plans": [ { "plan_prefix": [], diff --git a/mysql-test/main/opt_trace_index_merge.result b/mysql-test/main/opt_trace_index_merge.result index 3b79a7a44e8..da2d0591002 100644 --- a/mysql-test/main/opt_trace_index_merge.result +++ b/mysql-test/main/opt_trace_index_merge.result @@ -200,6 +200,7 @@ explain select * from t1 where a=1 or b=1 { ] }, { + "cardinality_accurate": false, "considered_execution_plans": [ { "plan_prefix": [], diff --git a/mysql-test/main/opt_trace_index_merge_innodb.result b/mysql-test/main/opt_trace_index_merge_innodb.result index 509569021a5..49c59c96058 100644 --- a/mysql-test/main/opt_trace_index_merge_innodb.result +++ b/mysql-test/main/opt_trace_index_merge_innodb.result @@ -198,6 +198,7 @@ explain select * from t1 where pk1 != 0 and key1 = 1 { ] }, { + "cardinality_accurate": true, "considered_execution_plans": [ { "plan_prefix": [], diff --git a/sql/field.cc b/sql/field.cc index fe3aebce05d..91dc1deb360 100644 --- a/sql/field.cc +++ b/sql/field.cc @@ -36,6 +36,8 @@ #include "tztime.h" // struct Time_zone #include "filesort.h" // change_double_for_sort #include "log_event.h" // class Table_map_log_event +#include "sql_statistics.h" +#include "sql_partition.h" #include <m_ctype.h> // Maximum allowed exponent value for converting string to decimal @@ -1851,6 +1853,7 @@ Field::Field(uchar *ptr_arg,uint32 length_arg,uchar *null_ptr_arg, field_index= 0; cond_selectivity= 1.0; next_equal_field= NULL; + stats_available= 0; } @@ -11358,6 +11361,203 @@ void Field::print_key_value_binary(String *out, const uchar* key, uint32 length) } +/* + @brief + Check if statistics for a column are available via keys + + @details + If the column is the first component of a key, then statistics + for the column are available from the range optimizer. + Sets the bit in Field::stats_table + a) NDV is available + b) Statistics are available for the non-const argument of a + range predicate +*/ + +void Field::statistics_available_via_keys() +{ + uint key; + key_map::Iterator it(key_start); + while ((key= it++) != key_map::Iterator::BITMAP_END) + { + KEY *keyinfo= table->key_info + key; + if (keyinfo->usable_key_parts == 1 && + field_index + 1 == keyinfo->key_part->fieldnr) + { + stats_available|= (1 << STATISTICS_FOR_RANGE_PREDICATES_AVAILABLE); + return; + } + } +} + + +/* + @brief + Check if statistics for a column are available via stat tables +*/ + +void Field::statistics_available_via_stat_tables() +{ + THD *thd= table->in_use; + if (!(check_eits_preferred(thd) && + thd->variables.optimizer_use_condition_selectivity > 2)) + return; + if (!(table->stats_is_read && + read_stats && !read_stats->no_stat_values_provided())) + return; + stats_available|= (1 << STATISTICS_FOR_RANGE_PREDICATES_AVAILABLE); + if (!read_stats->is_null(COLUMN_STAT_AVG_FREQUENCY)) + stats_available|= (1 << STATISTICS_FOR_NDV_AVAILABLE); +} + + +/* + @brief + Check if statistics for a column are available via indexes or stat tables + + @retval + TRUE : statistics available for the column + FALSE : OTHERWISE +*/ + +bool Field::is_statistics_available_for_range_predicates() +{ + if (!(stats_available & (1 << STATISTICS_CACHED))) + { + is_statistics_available(); + stats_available|= (1 << STATISTICS_CACHED); + } + return (stats_available & (1 << STATISTICS_FOR_RANGE_PREDICATES_AVAILABLE)); +} + + +void Field::is_statistics_available() +{ + statistics_available_via_keys(); + statistics_available_via_stat_tables(); + is_ndv_available_via_keys(); + is_ndv_available_via_stat_tables(); +} + +/* + @brief + Check if ndv for a column are available via indexes or stat tables + + @retval + TRUE : ndv available for the column + FALSE : OTHERWISE +*/ + +bool Field::is_ndv_available() +{ + if (!(stats_available & (1 << STATISTICS_CACHED))) + { + is_statistics_available(); + stats_available|= (1 << STATISTICS_CACHED); + } + return (stats_available & (1 << STATISTICS_FOR_NDV_AVAILABLE)); +} + + +/* + @brief + Check if number of distinct values(ndv) for a column are available via keys + + @retval + TRUE : ndv available from keys + FALSE : otherwise +*/ + +bool Field::is_ndv_available_via_keys() +{ + uint key; + key_map::Iterator it(key_start); + while ((key= it++) != key_map::Iterator::BITMAP_END) + { + KEY *keyinfo= table->key_info + key; + if (is_first_component_of_key(keyinfo) && keyinfo->actual_rec_per_key(0)) + { + stats_available|= (1 << STATISTICS_FOR_NDV_AVAILABLE); + return true; + } + } + return false; +} + + +/* + @brief + Check if ndv for a column are available via statistical tables + + @retval + TRUE : ndv available from statistical tables + FALSE : otherwise +*/ + +bool Field::is_ndv_available_via_stat_tables() +{ + if (!check_eits_preferred(table->in_use)) + return false; + if (!(read_stats && !read_stats->no_stat_values_provided() && + !read_stats->is_null(COLUMN_STAT_AVG_FREQUENCY))) + return false; + stats_available|= (1 << STATISTICS_FOR_NDV_AVAILABLE); + return true; +} + + +/* + @brief + Checks if a field is the first component of a given key + + @param + key given key + + @retval + TRUE : field is the first component of the given key + FALSE : otherwise +*/ + +bool Field::is_first_component_of_key(KEY *key) +{ + DBUG_ASSERT(key->usable_key_parts >= 1); + return field_index + 1 == key->key_part->fieldnr; +} + + +/* + Check whether EITS statistics for a field are usable or not + + TRUE : Use EITS for the columns + FALSE: Otherwise +*/ + +bool Field::is_eits_usable() +{ + // check if column_statistics was allocated for this field + if (!read_stats) + return false; + + DBUG_ASSERT(table->stats_is_read); + + /* + (1): checks if we have EITS statistics for a particular column + (2): Don't use EITS for GEOMETRY columns + (3): Disabling reading EITS statistics for columns involved in the + partition list of a table. We assume the selectivity for + such columns would be handled during partition pruning. + */ + + return !read_stats->no_stat_values_provided() && //(1) + type() != MYSQL_TYPE_GEOMETRY && //(2) +#ifdef WITH_PARTITION_STORAGE_ENGINE + (!table->part_info || + !table->part_info->field_in_partition_expr(this)) && //(3) +#endif + true; +} + + Virtual_column_info* Virtual_column_info::clone(THD *thd) { Virtual_column_info* dst= new (thd->mem_root) Virtual_column_info(*this); diff --git a/sql/field.h b/sql/field.h index dfc02149f9d..c60c5e3bece 100644 --- a/sql/field.h +++ b/sql/field.h @@ -869,6 +869,18 @@ public: */ Column_statistics_collected *collected_stats; + enum stats_availability + { + STATISTICS_CACHED=0, + STATISTICS_FOR_RANGE_PREDICATES_AVAILABLE, + STATISTICS_FOR_NDV_AVAILABLE + }; + + /* + Caches the value of whether statistics are available for a field or not. + */ + uint stats_available; + /* This is additional data provided for any computed(virtual) field, default function or check constraint. @@ -1907,6 +1919,16 @@ public: /* Mark field in read map. Updates also virtual fields */ void register_field_in_read_map(); + bool is_first_component_of_key(KEY *key); + void statistics_available_via_keys(); + void statistics_available_via_stat_tables(); + void is_statistics_available(); + bool is_statistics_available_for_range_predicates(); + bool is_ndv_available(); + bool is_ndv_available_via_stat_tables(); + bool is_ndv_available_via_keys(); + bool is_eits_usable(); + virtual Compression_method *compression_method() const { return 0; } virtual Virtual_tmp_table **virtual_tmp_table_addr() diff --git a/sql/item.cc b/sql/item.cc index 52274380cd1..f80db684bb3 100644 --- a/sql/item.cc +++ b/sql/item.cc @@ -7491,6 +7491,122 @@ Item *Item::build_pushable_cond(THD *thd, } +/* + @brief + Check if selectivity estimates are accurate for a conditional formula + + @details + This function checks whether this item belongs to a certain class of + condition for which we can calculate an accurate selectivity estimate. + + The definition of the class of condition is recursive. + 1. Simple formula + a. Formula in the form of range predicates: + + The predicate would be of type: + col op const + where op can be + + op: + | > + | >= + | < + | <= + | = + | <> + Also the other cases are with + [NOT] IN predicate, + [NOT] NULL predicate and + LIKE predicate. + The predicate should have only one non-constant argument and + this argument will be a reference to a column that is used either + as the first component of an index or statistics are available via + statistical tables. + + b. Equalities: + For an equality to have accurate selectivity estimates, + the number of distinct values for each column in the equality + needs to be known. + Eg: t1.a= t2.a is transformed to MULTIPLE_EQUAL(t1.a, t2.a) + For this case we need to make sure we know number of distinct + values for t1.a and t2.a + + The number of distinct values for a column can be known by + 1) from indexes via rec_per_key + 2) from statistical tables via avg_frequency. + + 2. AND / OR formula over formulas defined in section 1 of the definition. + + a) AND Formula + For AND formula the check for accurate selectivity estimates depends + whether or not the AND formula is at the top level. + + i) Top level + For an AND formula at the top level, we need to check if + accurate estimates are available for all the predicates + inside an AND formula. + If this is true then accurate selectivity estimates are available + for the AND formula. + + Eg: t1.a > 10 and t2.a < 5 + + if we have accurate selectivity estimates + for t1.a > 10 and t2.a < 5 via indexes or statistical tables, + then selectivity estimates for this AND formula are accurate + + ii) Non-top level + For all the predicates inside an AND formula + accurate selectivity estimates are needed + and each predicate need to be resolved by one + column (table column). If this scenario is satisfied then + accurate selectivity estimates is available for the AND formula. + Eg: t1.a = t2.a AND ( (t1.a > 5 AND t2.a < 10) OR t1.a <= 0) + + b) OR Formula + + For an OR predicate, we need to make sure that the + whole OR predicate can be resolved by one column + directly or indirectly (that is via multiple equalities). + If this is possible then for the resolved column we need to have + statistics either from the first component of an index or + via statistical tables. + + Eg: t1.a=t2.b and (t2.b > 5 or t1.a < 0); + + In the end for all fields we may have selectivity from an index or + statistical tables. + + @notes + The implementation for this function use the 'walk' method to traverse + the tree of this item with predicate_selectivity_checker() as the + call-back parameter of the method. + + + @retval + TRUE selectivity estimates are accurate + FALSE OTHERWISE +*/ + +bool Item::with_accurate_selectivity_estimation() +{ + if (type() == Item::COND_ITEM && + ((Item_cond*) this)->functype() == Item_func::COND_AND_FUNC) + { + List_iterator<Item> li(*((Item_cond*) this)->argument_list()); + Item *item; + while ((item= li++)) + { + SAME_FIELD arg= {NULL, false}; + if (item->walk(&Item::predicate_selectivity_checker, 0, &arg)) + return false; + } + return true; + } + SAME_FIELD arg= {NULL, false}; + return !walk(&Item::predicate_selectivity_checker, 0, &arg); +} + + static Item *get_field_item_for_having(THD *thd, Item *item, st_select_lex *sel) { @@ -9217,6 +9333,103 @@ Item_field::excl_dep_on_grouping_fields(st_select_lex *sel) } +/* + @brief + Checks if a formula of a condition contains the same column + + @details + In the function we try to check if a formula of a condition depends + (directly or indirectly through equalities inferred from the + conjuncted multiple equalities) only on one column. + + Eg: + WHERE clause is: + t1.a=t2.b and (t1.a > 5 or t2.b < 1); + + the predicate (t1.a > 5 or t2.b < 1) can be resolved with the help of + equalities to conclude that it depends on one column. + + This is used mostly for OR conjuncts where we need to make sure + that the entire OR conjunct contains only one column, so that we may + get accurate estimates. + + @retval + TRUE : the formula does not depend on one column + FALSE : OTHERWISE +*/ + +bool Item_field::dep_on_one_column(void *arg) +{ + SAME_FIELD *same_field_arg= (SAME_FIELD*)arg; + + /* + The same_field_arg is passed as a parameter because when we start walking + over the condition tree we don't know which column the predicate will be + dependent on. So as soon as we encounter a leaf of the condition tree + which is a field item, we set the SAME_FIELD::item to the found + field item and then compare the rest of the fields in the predicate with + the field item. + */ + + if (same_field_arg->item == NULL) + { + same_field_arg->item= this; + same_field_arg->is_stats_available= + field->is_statistics_available_for_range_predicates() || + (item_equal && + item_equal->is_statistics_available_for_range_predicates()); + return !same_field_arg->is_stats_available; + } + + /* Found the same field while traversing the condition tree */ + DBUG_ASSERT(same_field_arg->item->real_item()->type() == Item::FIELD_ITEM); + if (((Item_field*)same_field_arg->item->real_item())->field == field) + return false; + + if (!same_field_arg->item->get_item_equal()) + return true; + + return !(same_field_arg->item->get_item_equal() == item_equal); +} + + +bool Item_direct_view_ref::dep_on_one_column(void *arg) +{ + SAME_FIELD *same_field_arg= (SAME_FIELD*)arg; + DBUG_ASSERT(real_item()->type() == Item::FIELD_ITEM); + Item_field *field_item= (Item_field*)real_item(); + + /* + The same_field_arg is passed as a parameter because when we start walking + over the condition tree we don't know which column the predicate will be + dependent on. So as soon as we encounter a leaf of the condition tree + which is a field item, we set the SAME_FIELD::item to the found + field item and then compare the rest of the fields in the predicate with + the field item. + */ + + if (same_field_arg->item == NULL) + { + same_field_arg->item= this; + same_field_arg->is_stats_available= + field_item->field->is_statistics_available_for_range_predicates() || + (item_equal && + item_equal->is_statistics_available_for_range_predicates()); + return !same_field_arg->is_stats_available; + } + + /* Found the same field while traversing the condition tree */ + DBUG_ASSERT(same_field_arg->item->real_item()->type() == Item::FIELD_ITEM); + if (((Item_field*)same_field_arg->item->real_item())->field == field_item->field) + return false; + + if (!same_field_arg->item->get_item_equal()) + return true; + + return !(same_field_arg->item->get_item_equal() == item_equal); +} + + bool Item_direct_view_ref::excl_dep_on_table(table_map tab_map) { table_map used= used_tables(); diff --git a/sql/item.h b/sql/item.h index fb480b4c578..8a6b16ad82d 100644 --- a/sql/item.h +++ b/sql/item.h @@ -446,6 +446,26 @@ typedef struct replace_equal_field_arg struct st_join_table *context_tab; } REPLACE_EQUAL_FIELD_ARG; + +/* + Structure storing information for a field on which the entire predicate is + dependent on (directly or indirectly via equalities) +*/ +typedef struct same_field +{ + /* + field item for the first encountered column while traversing + over the conditional predicate + */ + Item *item; + /* + Set to true if the statistics for the field are available + directly (via keys or stat tables) or indirectly (via equalities) + */ + bool is_stats_available; +}SAME_FIELD; + + class Settable_routine_parameter { public: @@ -1968,7 +1988,25 @@ public: virtual bool count_sargable_conds(void *arg) { return 0; } virtual bool limit_index_condition_pushdown_processor(void *arg) { return 0; } virtual bool exists2in_processor(void *arg) { return 0; } + virtual bool find_selective_predicates_list_processor(void *arg) { return 0; } + + bool with_accurate_selectivity_estimation(); + + /* + @brief + Check if selectivity of a predicate is available via indexes or EITS + + @param + arg Structure storing information whether the AND/OR conjunct + can be resolved via a single column. + + @retval + FALSE : SUCCESS + TRUE : OTHERWISE + */ + virtual bool predicate_selectivity_checker(void *arg) { return FALSE; } + virtual bool dep_on_one_column(void *arg) { return true; } bool cleanup_is_expensive_cache_processor(void *arg) { is_expensive_cache= (int8)(-1); @@ -3576,6 +3614,7 @@ public: return field->table->pos_in_table_list->outer_join; } bool check_index_dependence(void *arg); + bool dep_on_one_column(void *arg); friend class Item_default_value; friend class Item_insert_value; friend class st_select_lex_unit; @@ -5946,6 +5985,7 @@ public: Item *field_transformer_for_having_pushdown(THD *thd, uchar *arg) { return this; } Item *remove_item_direct_ref() { return this; } + bool dep_on_one_column(void *arg); }; diff --git a/sql/item_cmpfunc.cc b/sql/item_cmpfunc.cc index 83eb605f463..8084af72f60 100644 --- a/sql/item_cmpfunc.cc +++ b/sql/item_cmpfunc.cc @@ -2106,6 +2106,31 @@ bool Item_func_between::count_sargable_conds(void *arg) return 0; } +bool Item_func_between::predicate_selectivity_checker(void *arg) +{ + if (arguments()[0]->real_item()->type() == Item::FIELD_ITEM) + { + if (is_range_predicate(args[0], args[1]) && + is_range_predicate(args[0], args[2])) + { + return args[0]->dep_on_one_column(arg); + } + return true; + } + + for (uint i= 1 ; i < arg_count ; i++) + { + if (arguments()[i]->real_item()->type() == Item::FIELD_ITEM) + { + if (!is_range_predicate(args[i], args[0])) + return true; + if (args[i]->dep_on_one_column(arg)) + return true; + } + } + return false; +} + void Item_func_between::fix_after_pullout(st_select_lex *new_parent, Item **ref, bool merge) @@ -4290,6 +4315,16 @@ bool Item_func_in::count_sargable_conds(void *arg) } +bool Item_func_in::predicate_selectivity_checker(void *arg) +{ + if (!all_items_are_consts(args + 1, arg_count - 1)) + return true; + if (key_item()->dep_on_one_column(arg)) + return true; + return false; +} + + bool Item_func_in::list_contains_null() { Item **arg,**arg_end; @@ -5516,6 +5551,16 @@ bool Item_func_null_predicate::count_sargable_conds(void *arg) } +bool Item_func_null_predicate::predicate_selectivity_checker(void *arg) +{ + if (is_range_predicate(args[0], NULL)) + { + return args[0]->dep_on_one_column(arg); + } + return true; +} + + longlong Item_func_isnull::val_int() { DBUG_ASSERT(fixed == 1); @@ -5597,6 +5642,17 @@ bool Item_bool_func2::count_sargable_conds(void *arg) return 0; } + +bool Item_bool_func2::predicate_selectivity_checker(void *arg) +{ + if (is_range_predicate(args[0], args[1])) + return args[0]->dep_on_one_column(arg); + if (is_range_predicate(args[1], args[0])) + return args[1]->dep_on_one_column(arg); + return true; +} + + void Item_func_like::print(String *str, enum_query_type query_type) { args[0]->print_parenthesised(str, query_type, precedence()); @@ -5696,8 +5752,18 @@ SEL_TREE *Item_func_like::get_mm_tree(RANGE_OPT_PARAM *param, Item **cond_ptr) bool sargable_pattern= with_sargable_pattern(); param->thd->mem_root= tmp_root; return sargable_pattern ? - Item_bool_func2::get_mm_tree(param, cond_ptr) : - Item_func::get_mm_tree(param, cond_ptr); + Item_bool_func2::get_mm_tree(param, cond_ptr) : + Item_func::get_mm_tree(param, cond_ptr); +} + + +bool Item_func_like::predicate_selectivity_checker(void *arg) +{ + if (with_sargable_pattern()) + { + return args[0]->dep_on_one_column(arg); + } + return true; } @@ -7139,6 +7205,49 @@ bool Item_equal::count_sargable_conds(void *arg) } +bool Item_equal::predicate_selectivity_checker(void *arg) +{ + /* + For equality conditions like tbl1.col = tbl2.col + we only want to know if the number of distinct values (ndv) is + available for all the fields in the multiple equality or not. + */ + Item_equal_fields_iterator it(*this); + while (it++) + { + Field *field= it.get_curr_field(); + if (!(field->is_ndv_available())) + return true; + } + + it.rewind(); + Item *item; + SAME_FIELD *same_field= (SAME_FIELD *) arg; + while ((item= it++)) + { + if (same_field->item) + return item->dep_on_one_column(arg); + item->dep_on_one_column(arg); + break; + } + return false; +} + + +bool Item_equal::is_statistics_available_for_range_predicates() +{ + bool found= false; + Item_equal_fields_iterator it(*this); + while (it++) + { + Field *field= it.get_curr_field(); + if (field->is_statistics_available_for_range_predicates()) + found= true; + } + return found; +} + + /** @brief Evaluate multiple equality diff --git a/sql/item_cmpfunc.h b/sql/item_cmpfunc.h index fa715badfc7..f467efd0e10 100644 --- a/sql/item_cmpfunc.h +++ b/sql/item_cmpfunc.h @@ -227,6 +227,7 @@ public: bool fix_length_and_dec() { decimals=0; max_length=1; return FALSE; } uint decimal_precision() const { return 1; } bool need_parentheses_in_default() { return true; } + bool predicate_selectivity_checker(void *arg) { return TRUE; } }; @@ -418,6 +419,7 @@ public: COND *remove_eq_conds(THD *thd, Item::cond_result *cond_value, bool top_level); bool count_sargable_conds(void *arg); + bool predicate_selectivity_checker(void *arg); /* Specifies which result type the function uses to compare its arguments. This method is used in equal field propagation. @@ -936,6 +938,7 @@ public: bool find_not_null_fields(table_map allowed); void fix_after_pullout(st_select_lex *new_parent, Item **ref, bool merge); bool count_sargable_conds(void *arg); + bool predicate_selectivity_checker(void *arg); void add_key_fields(JOIN *join, KEY_FIELD **key_fields, uint *and_level, table_map usable_tables, SARGABLE_PARAM **sargables); @@ -2473,6 +2476,7 @@ public: bool find_not_null_fields(table_map allowed); void fix_after_pullout(st_select_lex *new_parent, Item **ref, bool merge); bool count_sargable_conds(void *arg); + bool predicate_selectivity_checker(void *arg); Item *get_copy(THD *thd) { return get_item_copy<Item_func_in>(thd, this); } Item *build_clone(THD *thd) @@ -2570,6 +2574,7 @@ public: return FALSE; } bool count_sargable_conds(void *arg); + bool predicate_selectivity_checker(void *arg); }; @@ -2813,6 +2818,7 @@ public: Item *get_copy(THD *thd) { return get_item_copy<Item_func_like>(thd, this); } + bool predicate_selectivity_checker(void *arg); }; @@ -3220,6 +3226,8 @@ public: uint elements_count() { return equal_items.elements; } friend class Item_equal_fields_iterator; bool count_sargable_conds(void *arg); + bool predicate_selectivity_checker(void *arg); + bool is_statistics_available_for_range_predicates(); Item *multiple_equality_transformer(THD *thd, uchar *arg); friend class Item_equal_iterator<List_iterator_fast,Item>; friend class Item_equal_iterator<List_iterator,Item>; @@ -3371,6 +3379,7 @@ public: SEL_TREE *get_mm_tree(RANGE_OPT_PARAM *param, Item **cond_ptr); Item *get_copy(THD *thd) { return get_item_copy<Item_cond_and>(thd, this); } + bool predicate_selectivity_checker(void *arg) { return FALSE; } }; inline bool is_cond_and(Item *item) @@ -3395,6 +3404,7 @@ public: Item *neg_transformer(THD *thd); Item *get_copy(THD *thd) { return get_item_copy<Item_cond_or>(thd, this); } + bool predicate_selectivity_checker(void *arg) { return FALSE; } }; class Item_func_dyncol_check :public Item_bool_func diff --git a/sql/item_func.h b/sql/item_func.h index 6a4a9fa5dae..d04cc8643cf 100644 --- a/sql/item_func.h +++ b/sql/item_func.h @@ -399,6 +399,12 @@ public: Item_func *get_item_func() { return this; } bool is_simplified_cond_processor(void *arg) { return const_item() && !val_int(); } + bool predicate_selectivity_checker(void *arg) + { + if (const_item() && !is_expensive()) + return false; + return true; + } }; diff --git a/sql/opt_range.cc b/sql/opt_range.cc index adc0572cb1c..d3f49097189 100644 --- a/sql/opt_range.cc +++ b/sql/opt_range.cc @@ -3105,7 +3105,7 @@ bool create_key_parts_for_pseudo_indexes(RANGE_OPT_PARAM *param, { Field *field= *field_ptr; if (bitmap_is_set(used_fields, field->field_index) && - is_eits_usable(field)) + field->is_eits_usable()) parts++; } @@ -3126,7 +3126,7 @@ bool create_key_parts_for_pseudo_indexes(RANGE_OPT_PARAM *param, Field *field= *field_ptr; if (bitmap_is_set(used_fields, field->field_index)) { - if (!is_eits_usable(field)) + if (!field->is_eits_usable()) continue; uint16 store_length; diff --git a/sql/sql_select.cc b/sql/sql_select.cc index bb9d82a7039..3553ee78c5b 100644 --- a/sql/sql_select.cc +++ b/sql/sql_select.cc @@ -8296,6 +8296,11 @@ choose_plan(JOIN *join, table_map join_tables) jtab_sort_func, (void*)join->emb_sjm_nest); Json_writer_object wrapper(thd); + + if (join->conds) + wrapper.add("cardinality_accurate", + join->conds->with_accurate_selectivity_estimation()); + Json_writer_array trace_plan(thd,"considered_execution_plans"); if (!join->emb_sjm_nest) @@ -29449,6 +29454,38 @@ void unpack_to_base_table_fields(TABLE *table) } + +/* + @brief + Checks if a predicate is a range predicate with a constant part + + @param + + @item the item referring to the field of the table + @value the item referring to the expression on the + rhs of a predicate + + @details + Range predicate is defined as the form of field op const + where op can be operators like </<=/=/>/>=/BETWEEN etc. + Also the statistics for the field should be available via + an index or statistical tables. + + @retval + TRUE : Success + FALSE : Otherwise +*/ + +bool is_range_predicate(Item *item, Item *value) +{ + Item *field= item->real_item(); + if (field->type() == Item::FIELD_ITEM && !field->const_item() && + (!value || !value->is_expensive())) + return true; + return false; +} + + /** @} (end of group Query_Optimizer) */ diff --git a/sql/sql_select.h b/sql/sql_select.h index 2aba63ddc79..4d5a6e0abf5 100644 --- a/sql/sql_select.h +++ b/sql/sql_select.h @@ -2531,5 +2531,6 @@ void propagate_new_equalities(THD *thd, Item *cond, List<Item_equal> *new_equalities, COND_EQUAL *inherited, bool *is_simplifiable_cond); +bool is_range_predicate(Item *item, Item *value); #endif /* SQL_SELECT_INCLUDED */ diff --git a/sql/sql_statistics.cc b/sql/sql_statistics.cc index 2636299e330..71d472feaba 100644 --- a/sql/sql_statistics.cc +++ b/sql/sql_statistics.cc @@ -31,7 +31,6 @@ #include "opt_range.h" #include "uniques.h" #include "sql_show.h" -#include "sql_partition.h" /* The system variable 'use_stat_tables' can take one of the @@ -3159,7 +3158,10 @@ static void dump_stats_from_share_to_table(TABLE *table) Field **field_ptr= table_share->field; Field **table_field_ptr= table->field; for ( ; *field_ptr; field_ptr++, table_field_ptr++) + { (*table_field_ptr)->read_stats= (*field_ptr)->read_stats; + (*table_field_ptr)->stats_available= (*field_ptr)->stats_available; + } table->stats_is_read= true; } @@ -3978,37 +3980,3 @@ bool is_stat_table(const LEX_CSTRING *db, LEX_CSTRING *table) } return false; } - -/* - Check wheter we can use EITS statistics for a field or not - - TRUE : Use EITS for the columns - FALSE: Otherwise -*/ - -bool is_eits_usable(Field *field) -{ - Column_statistics* col_stats= field->read_stats; - - // check if column_statistics was allocated for this field - if (!col_stats) - return false; - - DBUG_ASSERT(field->table->stats_is_read); - - /* - (1): checks if we have EITS statistics for a particular column - (2): Don't use EITS for GEOMETRY columns - (3): Disabling reading EITS statistics for columns involved in the - partition list of a table. We assume the selecticivity for - such columns would be handled during partition pruning. - */ - - return !col_stats->no_stat_values_provided() && //(1) - field->type() != MYSQL_TYPE_GEOMETRY && //(2) -#ifdef WITH_PARTITION_STORAGE_ENGINE - (!field->table->part_info || - !field->table->part_info->field_in_partition_expr(field)) && //(3) -#endif - true; -} diff --git a/sql/sql_statistics.h b/sql/sql_statistics.h index 20ecf06bfee..34471fd5270 100644 --- a/sql/sql_statistics.h +++ b/sql/sql_statistics.h @@ -138,7 +138,6 @@ double get_column_range_cardinality(Field *field, key_range *max_endp, uint range_flag); bool is_stat_table(const LEX_CSTRING *db, LEX_CSTRING *table); -bool is_eits_usable(Field* field); class Histogram { diff --git a/sql/table.cc b/sql/table.cc index c48a6fed89a..02f4ece9c40 100644 --- a/sql/table.cc +++ b/sql/table.cc @@ -5435,6 +5435,7 @@ void TABLE::init(THD *thd, TABLE_LIST *tl) { (*f_ptr)->next_equal_field= NULL; (*f_ptr)->cond_selectivity= 1.0; + (*f_ptr)->stats_available= 0; } notnull_cond= 0; |