diff options
-rw-r--r-- | mysql-test/include/join_cardinality.inc | 131 | ||||
-rw-r--r-- | mysql-test/main/join_cardinality.result | 960 | ||||
-rw-r--r-- | mysql-test/main/join_cardinality.test | 310 | ||||
-rw-r--r-- | mysql-test/main/opt_trace.result | 18 | ||||
-rw-r--r-- | mysql-test/main/opt_trace_index_merge.result | 1 | ||||
-rw-r--r-- | mysql-test/main/opt_trace_index_merge_innodb.result | 1 | ||||
-rw-r--r-- | sql/field.cc | 200 | ||||
-rw-r--r-- | sql/field.h | 22 | ||||
-rw-r--r-- | sql/item.cc | 213 | ||||
-rw-r--r-- | sql/item.h | 40 | ||||
-rw-r--r-- | sql/item_cmpfunc.cc | 113 | ||||
-rw-r--r-- | sql/item_cmpfunc.h | 10 | ||||
-rw-r--r-- | sql/item_func.h | 6 | ||||
-rw-r--r-- | sql/opt_range.cc | 4 | ||||
-rw-r--r-- | sql/sql_select.cc | 37 | ||||
-rw-r--r-- | sql/sql_select.h | 1 | ||||
-rw-r--r-- | sql/sql_statistics.cc | 38 | ||||
-rw-r--r-- | sql/sql_statistics.h | 1 | ||||
-rw-r--r-- | sql/table.cc | 1 |
19 files changed, 2067 insertions, 40 deletions
diff --git a/mysql-test/include/join_cardinality.inc b/mysql-test/include/join_cardinality.inc new file mode 100644 index 00000000000..1ab6edb75cb --- /dev/null +++ b/mysql-test/include/join_cardinality.inc @@ -0,0 +1,131 @@ +--echo # +--echo # BASIC CASES +--echo # + +EXPLAIN SELECT * FROM t1 WHERE t1.b <> 10; +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; + +EXPLAIN SELECT * FROM t1 WHERE t1.b > 10; +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; + +EXPLAIN SELECT * FROM t1 WHERE t1.b < 10; +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; + +EXPLAIN SELECT * FROM t1 WHERE t1.b = 10; +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; + +EXPLAIN SELECT * FROM t1 WHERE t1.b+2 = 10; +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; + +EXPLAIN SELECT * FROM t1 WHERE a IN (1,2,3); +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; + +EXPLAIN SELECT * FROM t1 WHERE a BETWEEN 1 and 5; +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; + +--echo # Equi-join condition +EXPLAIN SELECT * FROM t1,t2 WHERE t1.a=t2.b; +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; + +--echo # CASE where selectivity estimates are not accurate + +EXPLAIN SELECT * FROM t1 WHERE abs(t1.b) = 10; +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; + +--echo # +--echo # The cardinality here would be not accurate because we don't have +--echo # selectivity for the predicate t1.b=10 +--echo # + +EXPLAIN SELECT * FROM t1 WHERE t1.c = 10; +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; + +--echo # +--echo # BASIC CASES FOR AND CONJUNCT +--echo # + +EXPLAIN SELECT * FROM t1 WHERE a >= 5 and b <= 6; +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; + +EXPLAIN SELECT * FROM t1 WHERE a BETWEEN 1 AND 5 AND b <= 6; +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; + +EXPLAIN SELECT * FROM t1,t2 WHERE t1.a > 5 AND t2.b < 10; +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; + +--echo # +--echo # BASIC CASES FOR OR CONJUNCT +--echo # + +EXPLAIN SELECT * FROM t1 WHERE a >= 5 OR a <= 2; +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; + +EXPLAIN SELECT * FROM t1 WHERE a BETWEEN 1 AND 5 OR a <= 0; +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; + +--echo # The cardinality here would be not accurate because for the +--echo # OR conjunct all predicates can't be resolved to one column + +EXPLAIN SELECT * FROM t1,t2 WHERE t1.a > 5 OR t2.b < 10; +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; + +EXPLAIN SELECT * FROM t1 WHERE a >= 5 OR b <= 6; +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; + +--echo # +--echo # COMPLEX CASES WITH AND-OR CONJUNCTS +--echo # + +EXPLAIN SELECT * from t1,t2 WHERE t1.a=t2.b and (t1.a > 5 or t1.a < 10); +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; + +EXPLAIN SELECT * from t1,t2 WHERE t1.a=t2.b and (t1.a > 5 or t2.b < 10); +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; + +EXPLAIN SELECT * from t1,t2 WHERE t1.a=t2.b and (t1.a > 5 or t2.a < 10); +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; + +EXPLAIN SELECT * FROM t1 +WHERE ((t1.a > 20 and t1.a < 30) or (t1.a > 40 and t1.a < 50)) AND +((t1.a > 25 and t1.a < 35) or (t1.a > 45 and t1.a < 55)) OR +((t1.a > 120 and t1.a < 130) or (t1.a > 140 and t1.a < 150)) AND +((t1.a > 125 and t1.a < 135) or (t1.a > 145 and t1.a < 155)); +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; + +EXPLAIN EXTENDED SELECT * from t1 WHERE ((t1.a IS NULL) or (t1.a = 5)); +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; + +--echo # +--echo # Test with subquery +--echo # + +EXPLAIN SELECT * from t1 WHERE t1.a = (select t2.a from t2 limit 1); +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; + +EXPLAIN SELECT * from t1 +WHERE t1.a = (select t2.a from t2 where t1.b=t2.b limit 1); +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; diff --git a/mysql-test/main/join_cardinality.result b/mysql-test/main/join_cardinality.result new file mode 100644 index 00000000000..f1379c53e1e --- /dev/null +++ b/mysql-test/main/join_cardinality.result @@ -0,0 +1,960 @@ +CREATE TABLE t1(a INT, b INT, c INT, KEY(b), KEY(a)); +INSERT INTO t1 SELECT seq, seq, seq from seq_1_to_100; +CREATE TABLE t2(a INT, b INT, key(b)); +INSERT INTO t2 SELECT seq, seq from seq_1_to_100; +SET @save_optimizer_switch=@@optimizer_switch; +SET optimizer_trace=1; +SET optimizer_switch='rowid_filter=off'; +# +# RUNNING the join cardinality tests when statistics are +# available only from indexes +# +# +# BASIC CASES +# +EXPLAIN SELECT * FROM t1 WHERE t1.b <> 10; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ALL b NULL NULL NULL 100 Using where +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; +JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +[ + true +] +EXPLAIN SELECT * FROM t1 WHERE t1.b > 10; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ALL b NULL NULL NULL 100 Using where +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; +JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +[ + true +] +EXPLAIN SELECT * FROM t1 WHERE t1.b < 10; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 range b b 5 NULL 5 Using index condition +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; +JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +[ + true +] +EXPLAIN SELECT * FROM t1 WHERE t1.b = 10; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ref b b 5 const 1 +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; +JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +[ + true +] +EXPLAIN SELECT * FROM t1 WHERE t1.b+2 = 10; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 100 Using where +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; +JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +[ + false +] +EXPLAIN SELECT * FROM t1 WHERE a IN (1,2,3); +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 range a a 5 NULL 3 Using index condition +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; +JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +[ + true +] +EXPLAIN SELECT * FROM t1 WHERE a BETWEEN 1 and 5; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 range a a 5 NULL 3 Using index condition +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; +JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +[ + true +] +# Equi-join condition +EXPLAIN SELECT * FROM t1,t2 WHERE t1.a=t2.b; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t2 ALL b NULL NULL NULL 100 Using where +1 SIMPLE t1 ref a a 5 test.t2.b 1 +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; +JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +[ + true +] +# CASE where selectivity estimates are not accurate +EXPLAIN SELECT * FROM t1 WHERE abs(t1.b) = 10; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 100 Using where +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; +JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +[ + false +] +# +# The cardinality here would be not accurate because we don't have +# selectivity for the predicate t1.b=10 +# +EXPLAIN SELECT * FROM t1 WHERE t1.c = 10; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 100 Using where +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; +JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +[ + false +] +# +# BASIC CASES FOR AND CONJUNCT +# +EXPLAIN SELECT * FROM t1 WHERE a >= 5 and b <= 6; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 range b,a b 5 NULL 3 Using index condition; Using where +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; +JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +[ + true +] +EXPLAIN SELECT * FROM t1 WHERE a BETWEEN 1 AND 5 AND b <= 6; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 range b,a b 5 NULL 3 Using index condition; Using where +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; +JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +[ + true +] +EXPLAIN SELECT * FROM t1,t2 WHERE t1.a > 5 AND t2.b < 10; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t2 range b b 5 NULL 5 Using index condition +1 SIMPLE t1 ALL a NULL NULL NULL 100 Using where; Using join buffer (flat, BNL join) +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; +JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +[ + true +] +# +# BASIC CASES FOR OR CONJUNCT +# +EXPLAIN SELECT * FROM t1 WHERE a >= 5 OR a <= 2; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ALL a NULL NULL NULL 100 Using where +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; +JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +[ + true +] +EXPLAIN SELECT * FROM t1 WHERE a BETWEEN 1 AND 5 OR a <= 0; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 range a a 5 NULL 4 Using index condition +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; +JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +[ + true +] +# The cardinality here would be not accurate because for the +# OR conjunct all predicates can't be resolved to one column +EXPLAIN SELECT * FROM t1,t2 WHERE t1.a > 5 OR t2.b < 10; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ALL a NULL NULL NULL 100 +1 SIMPLE t2 ALL b NULL NULL NULL 100 Using where; Using join buffer (flat, BNL join) +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; +JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +[ + false +] +EXPLAIN SELECT * FROM t1 WHERE a >= 5 OR b <= 6; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ALL b,a NULL NULL NULL 100 Using where +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; +JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +[ + false +] +# +# COMPLEX CASES WITH AND-OR CONJUNCTS +# +EXPLAIN SELECT * from t1,t2 WHERE t1.a=t2.b and (t1.a > 5 or t1.a < 10); +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t2 ALL b NULL NULL NULL 100 Using where +1 SIMPLE t1 ref a a 5 test.t2.b 1 +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; +JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +[ + true +] +EXPLAIN SELECT * from t1,t2 WHERE t1.a=t2.b and (t1.a > 5 or t2.b < 10); +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t2 ALL b NULL NULL NULL 100 Using where +1 SIMPLE t1 ref a a 5 test.t2.b 1 +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; +JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +[ + true +] +EXPLAIN SELECT * from t1,t2 WHERE t1.a=t2.b and (t1.a > 5 or t2.a < 10); +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t2 ALL b NULL NULL NULL 100 Using where +1 SIMPLE t1 ref a a 5 test.t2.b 1 +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; +JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +[ + false +] +EXPLAIN SELECT * FROM t1 +WHERE ((t1.a > 20 and t1.a < 30) or (t1.a > 40 and t1.a < 50)) AND +((t1.a > 25 and t1.a < 35) or (t1.a > 45 and t1.a < 55)) OR +((t1.a > 120 and t1.a < 130) or (t1.a > 140 and t1.a < 150)) AND +((t1.a > 125 and t1.a < 135) or (t1.a > 145 and t1.a < 155)); +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 range a a 5 NULL 6 Using index condition +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; +JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +[ + true +] +EXPLAIN EXTENDED SELECT * from t1 WHERE ((t1.a IS NULL) or (t1.a = 5)); +id select_type table type possible_keys key key_len ref rows filtered Extra +1 SIMPLE t1 ref_or_null a a 5 const 2 100.00 Using index condition +Warnings: +Note 1003 select `test`.`t1`.`a` AS `a`,`test`.`t1`.`b` AS `b`,`test`.`t1`.`c` AS `c` from `test`.`t1` where `test`.`t1`.`a` is null or `test`.`t1`.`a` = 5 +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; +JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +[ + true +] +# +# Test with subquery +# +EXPLAIN SELECT * from t1 WHERE t1.a = (select t2.a from t2 limit 1); +id select_type table type possible_keys key key_len ref rows Extra +1 PRIMARY t1 ref a a 5 const 1 Using where +2 SUBQUERY t2 ALL NULL NULL NULL NULL 100 +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; +JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +[ + true +] +EXPLAIN SELECT * from t1 +WHERE t1.a = (select t2.a from t2 where t1.b=t2.b limit 1); +id select_type table type possible_keys key key_len ref rows Extra +1 PRIMARY t1 ALL NULL NULL NULL NULL 100 Using where +2 DEPENDENT SUBQUERY t2 ref b b 5 test.t1.b 1 +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; +JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +[ + false, + true +] +DROP TABLE t1,t2; +CREATE TABLE t1(a INT, b INT, c INT); +INSERT INTO t1 SELECT seq, seq, seq from seq_1_to_100; +CREATE TABLE t2(a INT, b INT); +INSERT INTO t2 SELECT seq, seq from seq_1_to_100; +# +# RUNNING the join cardinality tests when only statistics +# are available from stat tables +# +# +# BASIC CASES +# +EXPLAIN SELECT * FROM t1 WHERE t1.b <> 10; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 100 Using where +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; +JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +[ + true +] +EXPLAIN SELECT * FROM t1 WHERE t1.b > 10; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 100 Using where +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; +JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +[ + true +] +EXPLAIN SELECT * FROM t1 WHERE t1.b < 10; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 100 Using where +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; +JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +[ + true +] +EXPLAIN SELECT * FROM t1 WHERE t1.b = 10; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 100 Using where +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; +JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +[ + true +] +EXPLAIN SELECT * FROM t1 WHERE t1.b+2 = 10; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 100 Using where +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; +JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +[ + false +] +EXPLAIN SELECT * FROM t1 WHERE a IN (1,2,3); +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 100 Using where +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; +JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +[ + true +] +EXPLAIN SELECT * FROM t1 WHERE a BETWEEN 1 and 5; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 100 Using where +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; +JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +[ + true +] +# Equi-join condition +EXPLAIN SELECT * FROM t1,t2 WHERE t1.a=t2.b; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 100 +1 SIMPLE t2 ALL NULL NULL NULL NULL 100 Using where; Using join buffer (flat, BNL join) +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; +JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +[ + true +] +# CASE where selectivity estimates are not accurate +EXPLAIN SELECT * FROM t1 WHERE abs(t1.b) = 10; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 100 Using where +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; +JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +[ + false +] +# +# The cardinality here would be not accurate because we don't have +# selectivity for the predicate t1.b=10 +# +EXPLAIN SELECT * FROM t1 WHERE t1.c = 10; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 100 Using where +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; +JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +[ + false +] +# +# BASIC CASES FOR AND CONJUNCT +# +EXPLAIN SELECT * FROM t1 WHERE a >= 5 and b <= 6; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 100 Using where +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; +JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +[ + true +] +EXPLAIN SELECT * FROM t1 WHERE a BETWEEN 1 AND 5 AND b <= 6; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 100 Using where +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; +JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +[ + true +] +EXPLAIN SELECT * FROM t1,t2 WHERE t1.a > 5 AND t2.b < 10; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t2 ALL NULL NULL NULL NULL 100 Using where +1 SIMPLE t1 ALL NULL NULL NULL NULL 100 Using where; Using join buffer (flat, BNL join) +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; +JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +[ + true +] +# +# BASIC CASES FOR OR CONJUNCT +# +EXPLAIN SELECT * FROM t1 WHERE a >= 5 OR a <= 2; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 100 Using where +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; +JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +[ + true +] +EXPLAIN SELECT * FROM t1 WHERE a BETWEEN 1 AND 5 OR a <= 0; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 100 Using where +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; +JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +[ + true +] +# The cardinality here would be not accurate because for the +# OR conjunct all predicates can't be resolved to one column +EXPLAIN SELECT * FROM t1,t2 WHERE t1.a > 5 OR t2.b < 10; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 100 +1 SIMPLE t2 ALL NULL NULL NULL NULL 100 Using where; Using join buffer (flat, BNL join) +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; +JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +[ + false +] +EXPLAIN SELECT * FROM t1 WHERE a >= 5 OR b <= 6; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 100 Using where +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; +JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +[ + false +] +# +# COMPLEX CASES WITH AND-OR CONJUNCTS +# +EXPLAIN SELECT * from t1,t2 WHERE t1.a=t2.b and (t1.a > 5 or t1.a < 10); +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 100 Using where +1 SIMPLE t2 ALL NULL NULL NULL NULL 100 Using where; Using join buffer (flat, BNL join) +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; +JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +[ + true +] +EXPLAIN SELECT * from t1,t2 WHERE t1.a=t2.b and (t1.a > 5 or t2.b < 10); +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 100 Using where +1 SIMPLE t2 ALL NULL NULL NULL NULL 100 Using where; Using join buffer (flat, BNL join) +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; +JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +[ + true +] +EXPLAIN SELECT * from t1,t2 WHERE t1.a=t2.b and (t1.a > 5 or t2.a < 10); +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 100 +1 SIMPLE t2 ALL NULL NULL NULL NULL 100 Using where; Using join buffer (flat, BNL join) +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; +JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +[ + false +] +EXPLAIN SELECT * FROM t1 +WHERE ((t1.a > 20 and t1.a < 30) or (t1.a > 40 and t1.a < 50)) AND +((t1.a > 25 and t1.a < 35) or (t1.a > 45 and t1.a < 55)) OR +((t1.a > 120 and t1.a < 130) or (t1.a > 140 and t1.a < 150)) AND +((t1.a > 125 and t1.a < 135) or (t1.a > 145 and t1.a < 155)); +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 100 Using where +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; +JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +[ + true +] +EXPLAIN EXTENDED SELECT * from t1 WHERE ((t1.a IS NULL) or (t1.a = 5)); +id select_type table type possible_keys key key_len ref rows filtered Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 100 1.00 Using where +Warnings: +Note 1003 select `test`.`t1`.`a` AS `a`,`test`.`t1`.`b` AS `b`,`test`.`t1`.`c` AS `c` from `test`.`t1` where `test`.`t1`.`a` is null or `test`.`t1`.`a` = 5 +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; +JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +[ + true +] +# +# Test with subquery +# +EXPLAIN SELECT * from t1 WHERE t1.a = (select t2.a from t2 limit 1); +id select_type table type possible_keys key key_len ref rows Extra +1 PRIMARY t1 ALL NULL NULL NULL NULL 100 Using where +2 SUBQUERY t2 ALL NULL NULL NULL NULL 100 +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; +JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +[ + true +] +EXPLAIN SELECT * from t1 +WHERE t1.a = (select t2.a from t2 where t1.b=t2.b limit 1); +id select_type table type possible_keys key key_len ref rows Extra +1 PRIMARY t1 ALL NULL NULL NULL NULL 100 Using where +2 DEPENDENT SUBQUERY t2 ALL NULL NULL NULL NULL 100 Using where +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; +JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +[ + false, + true +] +DROP TABLE t1,t2; +# +# Combination with statistics from stat tables and +# statistics from indexes +# +CREATE TABLE t1(a INT, b INT, KEY(a)); +INSERT INTO t1 SELECT seq, seq from seq_1_to_100; +ANALYZE SELECT * from t1 WHERE t1.b > 3 AND t1.a=t1.b; +id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra +1 SIMPLE t1 ALL a NULL NULL NULL 100 100.00 98.00 97.00 Using where +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; +JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +[ + false +] +ANALYZE TABLE t1 PERSISTENT FOR COLUMNS (b) INDEXES (); +Table Op Msg_type Msg_text +test.t1 analyze status Engine-independent statistics collected +test.t1 analyze status Table is already up to date +ANALYZE SELECT * from t1 WHERE t1.b > 3 AND t1.a=10; +id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra +1 SIMPLE t1 ref a a 5 const 1 1.00 97.66 100.00 Using where +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; +JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +[ + true +] +ANALYZE SELECT * from t1 WHERE t1.b > 3 AND t1.a=t1.b; +id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra +1 SIMPLE t1 ALL a NULL NULL NULL 100 100.00 95.70 97.00 Using where +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; +JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +[ + true +] +ANALYZE SELECT * from t1 WHERE t1.b > 3 OR t1.a=t1.b; +id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 100 100.00 100.00 100.00 Using where +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; +JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +[ + false +] +DROP TABLE t1; +# +# Tests with simple predicates +# +CREATE TABLE t1(a INT, b INT, c VARCHAR(10), KEY(a), KEY(b)); +INSERT INTO t1 SELECT seq, seq, seq from seq_1_to_10; +# +# Tests with LIKE PREDICATES +# +EXPLAIN SELECT * from t1 WHERE t1.a LIKE '%2%'; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 10 Using where +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; +JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +[ + false +] +EXPLAIN SELECT * from t1 WHERE t1.a LIKE '2%'; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ALL a NULL NULL NULL 10 Using where +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; +JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +[ + true +] +# +# Tests with NULL predicates +# +EXPLAIN SELECT * from t1 WHERE t1.a IS NULL; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ref a a 5 const 1 Using index condition +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; +JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +[ + true +] +EXPLAIN SELECT * from t1 WHERE t1.a IS NOT NULL; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ALL a NULL NULL NULL 10 Using where +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; +JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +[ + true +] +# +# Tests with </<=/>/>=/BETWEEN +# +EXPLAIN SELECT * from t1 WHERE t1.a > 5; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ALL a NULL NULL NULL 10 Using where +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; +JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +[ + true +] +EXPLAIN SELECT * from t1 WHERE 5 < t1.a; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ALL a NULL NULL NULL 10 Using where +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; +JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +[ + true +] +EXPLAIN SELECT * from t1 WHERE t1.a >= 10 and t1.b <= 10; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 range a,b a 5 NULL 1 Using index condition; Using where +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; +JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +[ + true +] +EXPLAIN SELECT * from t1 WHERE t1.a BETWEEN 1 AND 5; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ALL a NULL NULL NULL 10 Using where +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; +JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +[ + true +] +EXPLAIN SELECT * from t1 WHERE 5 BETWEEN 1 AND t1.a; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ALL a NULL NULL NULL 10 Using where +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; +JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +[ + true +] +EXPLAIN SELECT * from t1 WHERE 1 BETWEEN t1.a AND t1.b; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 range a,b a 5 NULL 1 Using where +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; +JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +[ + false +] +# +# Tests with IN predicate +# +EXPLAIN SELECT * from t1 WHERE t1.a IN (1,2,3); +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 range a a 5 NULL 3 Using index condition +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; +JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +[ + true +] +EXPLAIN SELECT * from t1 WHERE (t1.a,t1.b) IN ((1,1), (2,2), (3,3)); +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 index_merge a,b a,a,a 5,5,5 NULL 3 Using union(a,a,a); Using where +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; +JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +[ + false +] +EXPLAIN SELECT * from t1 WHERE +t1.a=t1.b AND (t1.a,t1.b) IN ((1,1), (2,2), (3,3)); +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 index_merge a,b a,a,a 5,5,5 NULL 3 Using union(a,a,a); Using where +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; +JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +[ + false +] +EXPLAIN SELECT * from t1 +WHERE t1.a=t1.b AND (abs(t1.a),t1.b) IN ((1,1), (2,2), (3,3)); +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 range a,b b 5 NULL 3 Using where +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; +JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +[ + false +] +# +# Tests using different functional predicates +# +EXPLAIN SELECT * from t1 WHERE a < 5 XOR a > 10; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 10 Using where +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; +JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +[ + false +] +EXPLAIN SELECT * from t1 WHERE abs(a) > 10; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 10 Using where +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; +JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +[ + false +] +EXPLAIN SELECT * from t1 WHERE a > 10+1; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 range a a 5 NULL 1 Using index condition +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; +JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +[ + true +] +DROP TABLE t1; +# +# Tests when updating stats tables manually +# +CREATE TABLE t1(a INT, b INT); +INSERT INTO t1 SELECT seq, seq from seq_1_to_100; +INSERT INTO +mysql.column_stats(db_name, table_name, column_name, nulls_ratio) VALUES ('test', 't1', 'a', 0); +# +# estimates from stat tables not available, all the fields should be NOT NULL +# +EXPLAIN SELECT * from t1 WHERE a < 10; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 100 Using where +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; +JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +[ + false +] +DROP TABLE t1; +# +# Using multiple equalities at the top level +# +CREATE TABLE t1(a INT, b INT); +INSERT INTO t1 SELECT seq, seq from seq_1_to_10; +CREATE TABLE t2(a INT, b INT); +INSERT INTO t2 SELECT seq, seq from seq_1_to_10; +ANALYZE TABLE t1 PERSISTENT FOR ALL; +Table Op Msg_type Msg_text +test.t1 analyze status Engine-independent statistics collected +test.t1 analyze status OK +ANALYZE TABLE t2 PERSISTENT FOR ALL; +Table Op Msg_type Msg_text +test.t2 analyze status Engine-independent statistics collected +test.t2 analyze status OK +EXPLAIN +SELECT * FROM t1,t2 WHERE t1.a= t2.a AND t1.b= t2.b; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 10 +1 SIMPLE t2 ALL NULL NULL NULL NULL 10 Using where; Using join buffer (flat, BNL join) +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; +JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +[ + true +] +EXPLAIN +SELECT * FROM t1,t2 WHERE t1.a= t2.a AND t1.b= t2.b; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 10 +1 SIMPLE t2 ALL NULL NULL NULL NULL 10 Using where; Using join buffer (flat, BNL join) +# +# Tests with views (using Item_direct_view_ref) +# +CREATE VIEW v1 AS SELECT a, b FROM t1; +EXPLAIN SELECT * FROM v1,t1 where v1.b = 10 AND v1.b=t1.b; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 10 Using where +1 SIMPLE t1 ALL NULL NULL NULL NULL 10 Using where; Using join buffer (flat, BNL join) +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; +JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +[ + true +] +DROP TABLE t1,t2; +DROP VIEW v1; +# +# TESTS to get ndv from rec_per_key +# +CREATE TABLE t1 (a INT, b INT); +INSERT INTO t1 SELECT seq, seq FROM seq_1_to_10; +ALTER TABLE t1 ADD KEY k1(a); +set @save_use_stat_tables= @@use_stat_tables; +set use_stat_tables='never'; +analyze table t1; +Table Op Msg_type Msg_text +test.t1 analyze status OK +CREATE TABLE t2 like t1; +INSERT INTO t2 SELECT * FROM t1; +ANALYZE TABLE t2 PERSISTENT FOR ALL; +Table Op Msg_type Msg_text +test.t2 analyze status Engine-independent statistics collected +test.t2 analyze status Table is already up to date +EXPLAIN SELECT * FROM t1, t2 where t1.a=t2.a and t2.a<3; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 range k1 k1 5 NULL 2 Using index condition +1 SIMPLE t2 ref k1 k1 5 test.t1.a 1 +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; +JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +[ + true +] +set use_stat_tables=@save_use_stat_tables; +DROP TABLE t1,t2; +# +# Tests with multi-component keys +# +CREATE TABLE t1 (a INT, b INT, KEY(a,b)); +INSERT INTO t1 SELECT seq, seq FROM seq_1_to_10; +EXPLAIN SELECT * FROM t1 where t1.a<3 AND t1.b < 10; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 range a a 5 NULL 2 Using where; Using index +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; +JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +[ + false +] +ALTER TABLE t1 ADD KEY k1(a); +EXPLAIN SELECT * FROM t1 where t1.a<3; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 range a,k1 a 5 NULL 2 Using where; Using index +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; +JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +[ + true +] +DROP TABLE t1; +# +# Tests when statistics from stat tables cannot be used because of the system variables +# use_stat_tables and optimizer_use_condition_selectivity +# +CREATE TABLE t1(a INT, b INT, c INT); +INSERT INTO t1 SELECT seq, seq, seq from seq_1_to_10; +CREATE TABLE t2(a INT, b INT); +INSERT INTO t2 SELECT seq, seq from seq_1_to_10; +ANALYZE TABLE t1 PERSISTENT FOR ALL; +Table Op Msg_type Msg_text +test.t1 analyze status Engine-independent statistics collected +test.t1 analyze status OK +ANALYZE TABLE t2 PERSISTENT FOR ALL; +Table Op Msg_type Msg_text +test.t2 analyze status Engine-independent statistics collected +test.t2 analyze status OK +SELECT * FROM t1; +a b c +1 1 1 +2 2 2 +3 3 3 +4 4 4 +5 5 5 +6 6 6 +7 7 7 +8 8 8 +9 9 9 +10 10 10 +SELECT * FROM t2; +a b +1 1 +2 2 +3 3 +4 4 +5 5 +6 6 +7 7 +8 8 +9 9 +10 10 +SET @save_use_stat_tables= @@use_stat_tables; +SET @save_optimizer_use_condition_selectivity= @@optimizer_use_condition_selectivity; +SET use_stat_tables='never'; +EXPLAIN SELECT * FROM t1,t2 WHERE t1.a=t2.b; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 10 +1 SIMPLE t2 ALL NULL NULL NULL NULL 10 Using where; Using join buffer (flat, BNL join) +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; +JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +[ + false +] +EXPLAIN SELECT * FROM t1 WHERE t1.a > 10; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 10 Using where +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; +JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +[ + false +] +SET use_stat_tables= @save_use_stat_tables; +EXPLAIN SELECT * FROM t1 WHERE t1.a > 10; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 10 Using where +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; +JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +[ + true +] +set optimizer_use_condition_selectivity=1; +EXPLAIN SELECT * FROM t1 WHERE t1.a > 10; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 10 Using where +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; +JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +[ + false +] +SET optimizer_use_condition_selectivity= @save_optimizer_use_condition_selectivity; +DROP TABLE t1,t2; +SET optimizer_switch=@save_optimizer_switch; diff --git a/mysql-test/main/join_cardinality.test b/mysql-test/main/join_cardinality.test new file mode 100644 index 00000000000..ae67350edca --- /dev/null +++ b/mysql-test/main/join_cardinality.test @@ -0,0 +1,310 @@ +--source include/have_sequence.inc + +CREATE TABLE t1(a INT, b INT, c INT, KEY(b), KEY(a)); +INSERT INTO t1 SELECT seq, seq, seq from seq_1_to_100; + +CREATE TABLE t2(a INT, b INT, key(b)); +INSERT INTO t2 SELECT seq, seq from seq_1_to_100; + + +SET @save_optimizer_switch=@@optimizer_switch; +SET optimizer_trace=1; +SET optimizer_switch='rowid_filter=off'; + +--echo # +--echo # RUNNING the join cardinality tests when statistics are +--echo # available only from indexes +--echo # + +--source include/join_cardinality.inc + +DROP TABLE t1,t2; + +CREATE TABLE t1(a INT, b INT, c INT); +INSERT INTO t1 SELECT seq, seq, seq from seq_1_to_100; + +CREATE TABLE t2(a INT, b INT); +INSERT INTO t2 SELECT seq, seq from seq_1_to_100; + +--disable_result_log +--disable_query_log +ANALYZE TABLE t1 PERSISTENT FOR COLUMNS(a,b) INDEXES(); +ANALYZE TABLE t2 PERSISTENT FOR ALL; +--enable_query_log +--enable_result_log + +--echo # +--echo # RUNNING the join cardinality tests when only statistics +--echo # are available from stat tables +--echo # + +--source include/join_cardinality.inc + +DROP TABLE t1,t2; + + +--echo # +--echo # Combination with statistics from stat tables and +--echo # statistics from indexes +--echo # + +CREATE TABLE t1(a INT, b INT, KEY(a)); +INSERT INTO t1 SELECT seq, seq from seq_1_to_100; + +ANALYZE SELECT * from t1 WHERE t1.b > 3 AND t1.a=t1.b; +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; + +ANALYZE TABLE t1 PERSISTENT FOR COLUMNS (b) INDEXES (); + +ANALYZE SELECT * from t1 WHERE t1.b > 3 AND t1.a=10; +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; + +ANALYZE SELECT * from t1 WHERE t1.b > 3 AND t1.a=t1.b; +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; + +ANALYZE SELECT * from t1 WHERE t1.b > 3 OR t1.a=t1.b; +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; + +DROP TABLE t1; + +--echo # +--echo # Tests with simple predicates +--echo # + +CREATE TABLE t1(a INT, b INT, c VARCHAR(10), KEY(a), KEY(b)); +INSERT INTO t1 SELECT seq, seq, seq from seq_1_to_10; + +--echo # +--echo # Tests with LIKE PREDICATES +--echo # + +EXPLAIN SELECT * from t1 WHERE t1.a LIKE '%2%'; +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; +EXPLAIN SELECT * from t1 WHERE t1.a LIKE '2%'; +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; + +--echo # +--echo # Tests with NULL predicates +--echo # + +EXPLAIN SELECT * from t1 WHERE t1.a IS NULL; +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; + +EXPLAIN SELECT * from t1 WHERE t1.a IS NOT NULL; +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; + +--echo # +--echo # Tests with </<=/>/>=/BETWEEN +--echo # + +EXPLAIN SELECT * from t1 WHERE t1.a > 5; +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; + +EXPLAIN SELECT * from t1 WHERE 5 < t1.a; +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; + +EXPLAIN SELECT * from t1 WHERE t1.a >= 10 and t1.b <= 10; +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; + +EXPLAIN SELECT * from t1 WHERE t1.a BETWEEN 1 AND 5; +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; + +EXPLAIN SELECT * from t1 WHERE 5 BETWEEN 1 AND t1.a; +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; + +EXPLAIN SELECT * from t1 WHERE 1 BETWEEN t1.a AND t1.b; +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; + +--echo # +--echo # Tests with IN predicate +--echo # + +EXPLAIN SELECT * from t1 WHERE t1.a IN (1,2,3); +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; + +EXPLAIN SELECT * from t1 WHERE (t1.a,t1.b) IN ((1,1), (2,2), (3,3)); +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; + +EXPLAIN SELECT * from t1 WHERE +t1.a=t1.b AND (t1.a,t1.b) IN ((1,1), (2,2), (3,3)); +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; + +EXPLAIN SELECT * from t1 +WHERE t1.a=t1.b AND (abs(t1.a),t1.b) IN ((1,1), (2,2), (3,3)); +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; + +--echo # +--echo # Tests using different functional predicates +--echo # + +EXPLAIN SELECT * from t1 WHERE a < 5 XOR a > 10; +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; + +EXPLAIN SELECT * from t1 WHERE abs(a) > 10; +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; + +EXPLAIN SELECT * from t1 WHERE a > 10+1; +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; + +DROP TABLE t1; + +--echo # +--echo # Tests when updating stats tables manually +--echo # + +CREATE TABLE t1(a INT, b INT); +INSERT INTO t1 SELECT seq, seq from seq_1_to_100; + +INSERT INTO +mysql.column_stats(db_name, table_name, column_name, nulls_ratio) VALUES ('test', 't1', 'a', 0); + +--echo # +--echo # estimates from stat tables not available, all the fields should be NOT NULL +--echo # + +EXPLAIN SELECT * from t1 WHERE a < 10; +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; + +DROP TABLE t1; + +--echo # +--echo # Using multiple equalities at the top level +--echo # + +CREATE TABLE t1(a INT, b INT); +INSERT INTO t1 SELECT seq, seq from seq_1_to_10; +CREATE TABLE t2(a INT, b INT); +INSERT INTO t2 SELECT seq, seq from seq_1_to_10; + +ANALYZE TABLE t1 PERSISTENT FOR ALL; +ANALYZE TABLE t2 PERSISTENT FOR ALL; + +EXPLAIN +SELECT * FROM t1,t2 WHERE t1.a= t2.a AND t1.b= t2.b; +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; + +EXPLAIN +SELECT * FROM t1,t2 WHERE t1.a= t2.a AND t1.b= t2.b; + +--echo # +--echo # Tests with views (using Item_direct_view_ref) +--echo # + +CREATE VIEW v1 AS SELECT a, b FROM t1; + +EXPLAIN SELECT * FROM v1,t1 where v1.b = 10 AND v1.b=t1.b; +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; + +DROP TABLE t1,t2; +DROP VIEW v1; + + +--echo # +--echo # TESTS to get ndv from rec_per_key +--echo # + +CREATE TABLE t1 (a INT, b INT); +INSERT INTO t1 SELECT seq, seq FROM seq_1_to_10; +ALTER TABLE t1 ADD KEY k1(a); + +set @save_use_stat_tables= @@use_stat_tables; +set use_stat_tables='never'; +analyze table t1; + +CREATE TABLE t2 like t1; +INSERT INTO t2 SELECT * FROM t1; +ANALYZE TABLE t2 PERSISTENT FOR ALL; +EXPLAIN SELECT * FROM t1, t2 where t1.a=t2.a and t2.a<3; +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; +set use_stat_tables=@save_use_stat_tables; + +DROP TABLE t1,t2; + +--echo # +--echo # Tests with multi-component keys +--echo # + +CREATE TABLE t1 (a INT, b INT, KEY(a,b)); +INSERT INTO t1 SELECT seq, seq FROM seq_1_to_10; + +EXPLAIN SELECT * FROM t1 where t1.a<3 AND t1.b < 10; +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; + +ALTER TABLE t1 ADD KEY k1(a); +EXPLAIN SELECT * FROM t1 where t1.a<3; +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; + +DROP TABLE t1; + +--echo # +--echo # Tests when statistics from stat tables cannot be used because of the system variables +--echo # use_stat_tables and optimizer_use_condition_selectivity +--echo # + +CREATE TABLE t1(a INT, b INT, c INT); +INSERT INTO t1 SELECT seq, seq, seq from seq_1_to_10; +CREATE TABLE t2(a INT, b INT); +INSERT INTO t2 SELECT seq, seq from seq_1_to_10; + +ANALYZE TABLE t1 PERSISTENT FOR ALL; +ANALYZE TABLE t2 PERSISTENT FOR ALL; + +SELECT * FROM t1; +SELECT * FROM t2; + +SET @save_use_stat_tables= @@use_stat_tables; +SET @save_optimizer_use_condition_selectivity= @@optimizer_use_condition_selectivity; +SET use_stat_tables='never'; + +EXPLAIN SELECT * FROM t1,t2 WHERE t1.a=t2.b; +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; + +EXPLAIN SELECT * FROM t1 WHERE t1.a > 10; +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; + +SET use_stat_tables= @save_use_stat_tables; + +EXPLAIN SELECT * FROM t1 WHERE t1.a > 10; +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; + +set optimizer_use_condition_selectivity=1; + +EXPLAIN SELECT * FROM t1 WHERE t1.a > 10; +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; + +SET optimizer_use_condition_selectivity= @save_optimizer_use_condition_selectivity; + +DROP TABLE t1,t2; + +SET optimizer_switch=@save_optimizer_switch; diff --git a/mysql-test/main/opt_trace.result b/mysql-test/main/opt_trace.result index 8f4f6ec97ce..c10018adab6 100644 --- a/mysql-test/main/opt_trace.result +++ b/mysql-test/main/opt_trace.result @@ -124,6 +124,7 @@ select * from v1 { ] }, { + "cardinality_accurate": true, "considered_execution_plans": [ { "plan_prefix": [], @@ -270,6 +271,7 @@ select * from (select * from t1 where t1.a=1)q { ] }, { + "cardinality_accurate": true, "considered_execution_plans": [ { "plan_prefix": [], @@ -421,6 +423,7 @@ select * from v2 { ] }, { + "cardinality_accurate": true, "considered_execution_plans": [ { "plan_prefix": [], @@ -985,6 +988,7 @@ explain select * from t1,t2 where t1.a=t2.b+2 and t2.a= t1.b { ] }, { + "cardinality_accurate": false, "considered_execution_plans": [ { "plan_prefix": [], @@ -1434,6 +1438,7 @@ EXPLAIN SELECT MIN(d) FROM t1 where b=2 and c=3 group by a { ] }, { + "cardinality_accurate": true, "considered_execution_plans": [ { "plan_prefix": [], @@ -1628,6 +1633,7 @@ EXPLAIN SELECT id,MIN(a),MAX(a) FROM t1 WHERE a>=20010104e0 GROUP BY id { ] }, { + "cardinality_accurate": false, "considered_execution_plans": [ { "plan_prefix": [], @@ -1811,6 +1817,7 @@ EXPLAIN SELECT * FROM t1 WHERE a = 20010104e0 GROUP BY id { ] }, { + "cardinality_accurate": false, "considered_execution_plans": [ { "plan_prefix": [], @@ -2076,6 +2083,7 @@ explain select * from t1 where a=1 and b=2 order by c limit 1 { ] }, { + "cardinality_accurate": true, "considered_execution_plans": [ { "plan_prefix": [], @@ -2848,6 +2856,7 @@ explain extended select * from t1 where a in (select pk from t10) { "execution_plan_for_potential_materialization": { "steps": [ { + "cardinality_accurate": true, "considered_execution_plans": [ { "plan_prefix": [], @@ -2878,6 +2887,7 @@ explain extended select * from t1 where a in (select pk from t10) { } }, { + "cardinality_accurate": true, "considered_execution_plans": [ { "plan_prefix": [], @@ -3296,6 +3306,7 @@ explain select * from t1 where pk = 2 and a=5 and b=1 { ] }, { + "cardinality_accurate": true, "considered_execution_plans": [ { "plan_prefix": [], @@ -3924,6 +3935,7 @@ explain delete t0,t1 from t0, t1 where t0.a=t1.a and t1.a<3 { ] }, { + "cardinality_accurate": false, "considered_execution_plans": [ { "plan_prefix": [], @@ -4422,6 +4434,7 @@ explain select * from t1 where a in (select t_inner_1.a from t1 t_inner_1, t1 t_ "execution_plan_for_potential_materialization": { "steps": [ { + "cardinality_accurate": false, "considered_execution_plans": [ { "plan_prefix": [], @@ -4499,6 +4512,7 @@ explain select * from t1 where a in (select t_inner_1.a from t1 t_inner_1, t1 t_ } }, { + "cardinality_accurate": false, "considered_execution_plans": [ { "plan_prefix": [], @@ -4948,6 +4962,7 @@ t_outer_2.a in (select t_inner_3.a from t2 t_inner_3, t1 t_inner_4) { } }, { + "cardinality_accurate": false, "considered_execution_plans": [ { "plan_prefix": [], @@ -6521,6 +6536,7 @@ t_outer_2.a in (select t_inner_3.a from t2 t_inner_3, t1 t_inner_4) { "execution_plan_for_potential_materialization": { "steps": [ { + "cardinality_accurate": false, "considered_execution_plans": [ { "plan_prefix": [], @@ -6595,6 +6611,7 @@ t_outer_2.a in (select t_inner_3.a from t2 t_inner_3, t1 t_inner_4) { ] }, { + "cardinality_accurate": false, "considered_execution_plans": [ { "plan_prefix": [], @@ -6672,6 +6689,7 @@ t_outer_2.a in (select t_inner_3.a from t2 t_inner_3, t1 t_inner_4) { } }, { + "cardinality_accurate": false, "considered_execution_plans": [ { "plan_prefix": [], diff --git a/mysql-test/main/opt_trace_index_merge.result b/mysql-test/main/opt_trace_index_merge.result index 3b79a7a44e8..da2d0591002 100644 --- a/mysql-test/main/opt_trace_index_merge.result +++ b/mysql-test/main/opt_trace_index_merge.result @@ -200,6 +200,7 @@ explain select * from t1 where a=1 or b=1 { ] }, { + "cardinality_accurate": false, "considered_execution_plans": [ { "plan_prefix": [], diff --git a/mysql-test/main/opt_trace_index_merge_innodb.result b/mysql-test/main/opt_trace_index_merge_innodb.result index 509569021a5..49c59c96058 100644 --- a/mysql-test/main/opt_trace_index_merge_innodb.result +++ b/mysql-test/main/opt_trace_index_merge_innodb.result @@ -198,6 +198,7 @@ explain select * from t1 where pk1 != 0 and key1 = 1 { ] }, { + "cardinality_accurate": true, "considered_execution_plans": [ { "plan_prefix": [], diff --git a/sql/field.cc b/sql/field.cc index fe3aebce05d..91dc1deb360 100644 --- a/sql/field.cc +++ b/sql/field.cc @@ -36,6 +36,8 @@ #include "tztime.h" // struct Time_zone #include "filesort.h" // change_double_for_sort #include "log_event.h" // class Table_map_log_event +#include "sql_statistics.h" +#include "sql_partition.h" #include <m_ctype.h> // Maximum allowed exponent value for converting string to decimal @@ -1851,6 +1853,7 @@ Field::Field(uchar *ptr_arg,uint32 length_arg,uchar *null_ptr_arg, field_index= 0; cond_selectivity= 1.0; next_equal_field= NULL; + stats_available= 0; } @@ -11358,6 +11361,203 @@ void Field::print_key_value_binary(String *out, const uchar* key, uint32 length) } +/* + @brief + Check if statistics for a column are available via keys + + @details + If the column is the first component of a key, then statistics + for the column are available from the range optimizer. + Sets the bit in Field::stats_table + a) NDV is available + b) Statistics are available for the non-const argument of a + range predicate +*/ + +void Field::statistics_available_via_keys() +{ + uint key; + key_map::Iterator it(key_start); + while ((key= it++) != key_map::Iterator::BITMAP_END) + { + KEY *keyinfo= table->key_info + key; + if (keyinfo->usable_key_parts == 1 && + field_index + 1 == keyinfo->key_part->fieldnr) + { + stats_available|= (1 << STATISTICS_FOR_RANGE_PREDICATES_AVAILABLE); + return; + } + } +} + + +/* + @brief + Check if statistics for a column are available via stat tables +*/ + +void Field::statistics_available_via_stat_tables() +{ + THD *thd= table->in_use; + if (!(check_eits_preferred(thd) && + thd->variables.optimizer_use_condition_selectivity > 2)) + return; + if (!(table->stats_is_read && + read_stats && !read_stats->no_stat_values_provided())) + return; + stats_available|= (1 << STATISTICS_FOR_RANGE_PREDICATES_AVAILABLE); + if (!read_stats->is_null(COLUMN_STAT_AVG_FREQUENCY)) + stats_available|= (1 << STATISTICS_FOR_NDV_AVAILABLE); +} + + +/* + @brief + Check if statistics for a column are available via indexes or stat tables + + @retval + TRUE : statistics available for the column + FALSE : OTHERWISE +*/ + +bool Field::is_statistics_available_for_range_predicates() +{ + if (!(stats_available & (1 << STATISTICS_CACHED))) + { + is_statistics_available(); + stats_available|= (1 << STATISTICS_CACHED); + } + return (stats_available & (1 << STATISTICS_FOR_RANGE_PREDICATES_AVAILABLE)); +} + + +void Field::is_statistics_available() +{ + statistics_available_via_keys(); + statistics_available_via_stat_tables(); + is_ndv_available_via_keys(); + is_ndv_available_via_stat_tables(); +} + +/* + @brief + Check if ndv for a column are available via indexes or stat tables + + @retval + TRUE : ndv available for the column + FALSE : OTHERWISE +*/ + +bool Field::is_ndv_available() +{ + if (!(stats_available & (1 << STATISTICS_CACHED))) + { + is_statistics_available(); + stats_available|= (1 << STATISTICS_CACHED); + } + return (stats_available & (1 << STATISTICS_FOR_NDV_AVAILABLE)); +} + + +/* + @brief + Check if number of distinct values(ndv) for a column are available via keys + + @retval + TRUE : ndv available from keys + FALSE : otherwise +*/ + +bool Field::is_ndv_available_via_keys() +{ + uint key; + key_map::Iterator it(key_start); + while ((key= it++) != key_map::Iterator::BITMAP_END) + { + KEY *keyinfo= table->key_info + key; + if (is_first_component_of_key(keyinfo) && keyinfo->actual_rec_per_key(0)) + { + stats_available|= (1 << STATISTICS_FOR_NDV_AVAILABLE); + return true; + } + } + return false; +} + + +/* + @brief + Check if ndv for a column are available via statistical tables + + @retval + TRUE : ndv available from statistical tables + FALSE : otherwise +*/ + +bool Field::is_ndv_available_via_stat_tables() +{ + if (!check_eits_preferred(table->in_use)) + return false; + if (!(read_stats && !read_stats->no_stat_values_provided() && + !read_stats->is_null(COLUMN_STAT_AVG_FREQUENCY))) + return false; + stats_available|= (1 << STATISTICS_FOR_NDV_AVAILABLE); + return true; +} + + +/* + @brief + Checks if a field is the first component of a given key + + @param + key given key + + @retval + TRUE : field is the first component of the given key + FALSE : otherwise +*/ + +bool Field::is_first_component_of_key(KEY *key) +{ + DBUG_ASSERT(key->usable_key_parts >= 1); + return field_index + 1 == key->key_part->fieldnr; +} + + +/* + Check whether EITS statistics for a field are usable or not + + TRUE : Use EITS for the columns + FALSE: Otherwise +*/ + +bool Field::is_eits_usable() +{ + // check if column_statistics was allocated for this field + if (!read_stats) + return false; + + DBUG_ASSERT(table->stats_is_read); + + /* + (1): checks if we have EITS statistics for a particular column + (2): Don't use EITS for GEOMETRY columns + (3): Disabling reading EITS statistics for columns involved in the + partition list of a table. We assume the selectivity for + such columns would be handled during partition pruning. + */ + + return !read_stats->no_stat_values_provided() && //(1) + type() != MYSQL_TYPE_GEOMETRY && //(2) +#ifdef WITH_PARTITION_STORAGE_ENGINE + (!table->part_info || + !table->part_info->field_in_partition_expr(this)) && //(3) +#endif + true; +} + + Virtual_column_info* Virtual_column_info::clone(THD *thd) { Virtual_column_info* dst= new (thd->mem_root) Virtual_column_info(*this); diff --git a/sql/field.h b/sql/field.h index dfc02149f9d..c60c5e3bece 100644 --- a/sql/field.h +++ b/sql/field.h @@ -869,6 +869,18 @@ public: */ Column_statistics_collected *collected_stats; + enum stats_availability + { + STATISTICS_CACHED=0, + STATISTICS_FOR_RANGE_PREDICATES_AVAILABLE, + STATISTICS_FOR_NDV_AVAILABLE + }; + + /* + Caches the value of whether statistics are available for a field or not. + */ + uint stats_available; + /* This is additional data provided for any computed(virtual) field, default function or check constraint. @@ -1907,6 +1919,16 @@ public: /* Mark field in read map. Updates also virtual fields */ void register_field_in_read_map(); + bool is_first_component_of_key(KEY *key); + void statistics_available_via_keys(); + void statistics_available_via_stat_tables(); + void is_statistics_available(); + bool is_statistics_available_for_range_predicates(); + bool is_ndv_available(); + bool is_ndv_available_via_stat_tables(); + bool is_ndv_available_via_keys(); + bool is_eits_usable(); + virtual Compression_method *compression_method() const { return 0; } virtual Virtual_tmp_table **virtual_tmp_table_addr() diff --git a/sql/item.cc b/sql/item.cc index 52274380cd1..f80db684bb3 100644 --- a/sql/item.cc +++ b/sql/item.cc @@ -7491,6 +7491,122 @@ Item *Item::build_pushable_cond(THD *thd, } +/* + @brief + Check if selectivity estimates are accurate for a conditional formula + + @details + This function checks whether this item belongs to a certain class of + condition for which we can calculate an accurate selectivity estimate. + + The definition of the class of condition is recursive. + 1. Simple formula + a. Formula in the form of range predicates: + + The predicate would be of type: + col op const + where op can be + + op: + | > + | >= + | < + | <= + | = + | <> + Also the other cases are with + [NOT] IN predicate, + [NOT] NULL predicate and + LIKE predicate. + The predicate should have only one non-constant argument and + this argument will be a reference to a column that is used either + as the first component of an index or statistics are available via + statistical tables. + + b. Equalities: + For an equality to have accurate selectivity estimates, + the number of distinct values for each column in the equality + needs to be known. + Eg: t1.a= t2.a is transformed to MULTIPLE_EQUAL(t1.a, t2.a) + For this case we need to make sure we know number of distinct + values for t1.a and t2.a + + The number of distinct values for a column can be known by + 1) from indexes via rec_per_key + 2) from statistical tables via avg_frequency. + + 2. AND / OR formula over formulas defined in section 1 of the definition. + + a) AND Formula + For AND formula the check for accurate selectivity estimates depends + whether or not the AND formula is at the top level. + + i) Top level + For an AND formula at the top level, we need to check if + accurate estimates are available for all the predicates + inside an AND formula. + If this is true then accurate selectivity estimates are available + for the AND formula. + + Eg: t1.a > 10 and t2.a < 5 + + if we have accurate selectivity estimates + for t1.a > 10 and t2.a < 5 via indexes or statistical tables, + then selectivity estimates for this AND formula are accurate + + ii) Non-top level + For all the predicates inside an AND formula + accurate selectivity estimates are needed + and each predicate need to be resolved by one + column (table column). If this scenario is satisfied then + accurate selectivity estimates is available for the AND formula. + Eg: t1.a = t2.a AND ( (t1.a > 5 AND t2.a < 10) OR t1.a <= 0) + + b) OR Formula + + For an OR predicate, we need to make sure that the + whole OR predicate can be resolved by one column + directly or indirectly (that is via multiple equalities). + If this is possible then for the resolved column we need to have + statistics either from the first component of an index or + via statistical tables. + + Eg: t1.a=t2.b and (t2.b > 5 or t1.a < 0); + + In the end for all fields we may have selectivity from an index or + statistical tables. + + @notes + The implementation for this function use the 'walk' method to traverse + the tree of this item with predicate_selectivity_checker() as the + call-back parameter of the method. + + + @retval + TRUE selectivity estimates are accurate + FALSE OTHERWISE +*/ + +bool Item::with_accurate_selectivity_estimation() +{ + if (type() == Item::COND_ITEM && + ((Item_cond*) this)->functype() == Item_func::COND_AND_FUNC) + { + List_iterator<Item> li(*((Item_cond*) this)->argument_list()); + Item *item; + while ((item= li++)) + { + SAME_FIELD arg= {NULL, false}; + if (item->walk(&Item::predicate_selectivity_checker, 0, &arg)) + return false; + } + return true; + } + SAME_FIELD arg= {NULL, false}; + return !walk(&Item::predicate_selectivity_checker, 0, &arg); +} + + static Item *get_field_item_for_having(THD *thd, Item *item, st_select_lex *sel) { @@ -9217,6 +9333,103 @@ Item_field::excl_dep_on_grouping_fields(st_select_lex *sel) } +/* + @brief + Checks if a formula of a condition contains the same column + + @details + In the function we try to check if a formula of a condition depends + (directly or indirectly through equalities inferred from the + conjuncted multiple equalities) only on one column. + + Eg: + WHERE clause is: + t1.a=t2.b and (t1.a > 5 or t2.b < 1); + + the predicate (t1.a > 5 or t2.b < 1) can be resolved with the help of + equalities to conclude that it depends on one column. + + This is used mostly for OR conjuncts where we need to make sure + that the entire OR conjunct contains only one column, so that we may + get accurate estimates. + + @retval + TRUE : the formula does not depend on one column + FALSE : OTHERWISE +*/ + +bool Item_field::dep_on_one_column(void *arg) +{ + SAME_FIELD *same_field_arg= (SAME_FIELD*)arg; + + /* + The same_field_arg is passed as a parameter because when we start walking + over the condition tree we don't know which column the predicate will be + dependent on. So as soon as we encounter a leaf of the condition tree + which is a field item, we set the SAME_FIELD::item to the found + field item and then compare the rest of the fields in the predicate with + the field item. + */ + + if (same_field_arg->item == NULL) + { + same_field_arg->item= this; + same_field_arg->is_stats_available= + field->is_statistics_available_for_range_predicates() || + (item_equal && + item_equal->is_statistics_available_for_range_predicates()); + return !same_field_arg->is_stats_available; + } + + /* Found the same field while traversing the condition tree */ + DBUG_ASSERT(same_field_arg->item->real_item()->type() == Item::FIELD_ITEM); + if (((Item_field*)same_field_arg->item->real_item())->field == field) + return false; + + if (!same_field_arg->item->get_item_equal()) + return true; + + return !(same_field_arg->item->get_item_equal() == item_equal); +} + + +bool Item_direct_view_ref::dep_on_one_column(void *arg) +{ + SAME_FIELD *same_field_arg= (SAME_FIELD*)arg; + DBUG_ASSERT(real_item()->type() == Item::FIELD_ITEM); + Item_field *field_item= (Item_field*)real_item(); + + /* + The same_field_arg is passed as a parameter because when we start walking + over the condition tree we don't know which column the predicate will be + dependent on. So as soon as we encounter a leaf of the condition tree + which is a field item, we set the SAME_FIELD::item to the found + field item and then compare the rest of the fields in the predicate with + the field item. + */ + + if (same_field_arg->item == NULL) + { + same_field_arg->item= this; + same_field_arg->is_stats_available= + field_item->field->is_statistics_available_for_range_predicates() || + (item_equal && + item_equal->is_statistics_available_for_range_predicates()); + return !same_field_arg->is_stats_available; + } + + /* Found the same field while traversing the condition tree */ + DBUG_ASSERT(same_field_arg->item->real_item()->type() == Item::FIELD_ITEM); + if (((Item_field*)same_field_arg->item->real_item())->field == field_item->field) + return false; + + if (!same_field_arg->item->get_item_equal()) + return true; + + return !(same_field_arg->item->get_item_equal() == item_equal); +} + + bool Item_direct_view_ref::excl_dep_on_table(table_map tab_map) { table_map used= used_tables(); diff --git a/sql/item.h b/sql/item.h index fb480b4c578..8a6b16ad82d 100644 --- a/sql/item.h +++ b/sql/item.h @@ -446,6 +446,26 @@ typedef struct replace_equal_field_arg struct st_join_table *context_tab; } REPLACE_EQUAL_FIELD_ARG; + +/* + Structure storing information for a field on which the entire predicate is + dependent on (directly or indirectly via equalities) +*/ +typedef struct same_field +{ + /* + field item for the first encountered column while traversing + over the conditional predicate + */ + Item *item; + /* + Set to true if the statistics for the field are available + directly (via keys or stat tables) or indirectly (via equalities) + */ + bool is_stats_available; +}SAME_FIELD; + + class Settable_routine_parameter { public: @@ -1968,7 +1988,25 @@ public: virtual bool count_sargable_conds(void *arg) { return 0; } virtual bool limit_index_condition_pushdown_processor(void *arg) { return 0; } virtual bool exists2in_processor(void *arg) { return 0; } + virtual bool find_selective_predicates_list_processor(void *arg) { return 0; } + + bool with_accurate_selectivity_estimation(); + + /* + @brief + Check if selectivity of a predicate is available via indexes or EITS + + @param + arg Structure storing information whether the AND/OR conjunct + can be resolved via a single column. + + @retval + FALSE : SUCCESS + TRUE : OTHERWISE + */ + virtual bool predicate_selectivity_checker(void *arg) { return FALSE; } + virtual bool dep_on_one_column(void *arg) { return true; } bool cleanup_is_expensive_cache_processor(void *arg) { is_expensive_cache= (int8)(-1); @@ -3576,6 +3614,7 @@ public: return field->table->pos_in_table_list->outer_join; } bool check_index_dependence(void *arg); + bool dep_on_one_column(void *arg); friend class Item_default_value; friend class Item_insert_value; friend class st_select_lex_unit; @@ -5946,6 +5985,7 @@ public: Item *field_transformer_for_having_pushdown(THD *thd, uchar *arg) { return this; } Item *remove_item_direct_ref() { return this; } + bool dep_on_one_column(void *arg); }; diff --git a/sql/item_cmpfunc.cc b/sql/item_cmpfunc.cc index 83eb605f463..8084af72f60 100644 --- a/sql/item_cmpfunc.cc +++ b/sql/item_cmpfunc.cc @@ -2106,6 +2106,31 @@ bool Item_func_between::count_sargable_conds(void *arg) return 0; } +bool Item_func_between::predicate_selectivity_checker(void *arg) +{ + if (arguments()[0]->real_item()->type() == Item::FIELD_ITEM) + { + if (is_range_predicate(args[0], args[1]) && + is_range_predicate(args[0], args[2])) + { + return args[0]->dep_on_one_column(arg); + } + return true; + } + + for (uint i= 1 ; i < arg_count ; i++) + { + if (arguments()[i]->real_item()->type() == Item::FIELD_ITEM) + { + if (!is_range_predicate(args[i], args[0])) + return true; + if (args[i]->dep_on_one_column(arg)) + return true; + } + } + return false; +} + void Item_func_between::fix_after_pullout(st_select_lex *new_parent, Item **ref, bool merge) @@ -4290,6 +4315,16 @@ bool Item_func_in::count_sargable_conds(void *arg) } +bool Item_func_in::predicate_selectivity_checker(void *arg) +{ + if (!all_items_are_consts(args + 1, arg_count - 1)) + return true; + if (key_item()->dep_on_one_column(arg)) + return true; + return false; +} + + bool Item_func_in::list_contains_null() { Item **arg,**arg_end; @@ -5516,6 +5551,16 @@ bool Item_func_null_predicate::count_sargable_conds(void *arg) } +bool Item_func_null_predicate::predicate_selectivity_checker(void *arg) +{ + if (is_range_predicate(args[0], NULL)) + { + return args[0]->dep_on_one_column(arg); + } + return true; +} + + longlong Item_func_isnull::val_int() { DBUG_ASSERT(fixed == 1); @@ -5597,6 +5642,17 @@ bool Item_bool_func2::count_sargable_conds(void *arg) return 0; } + +bool Item_bool_func2::predicate_selectivity_checker(void *arg) +{ + if (is_range_predicate(args[0], args[1])) + return args[0]->dep_on_one_column(arg); + if (is_range_predicate(args[1], args[0])) + return args[1]->dep_on_one_column(arg); + return true; +} + + void Item_func_like::print(String *str, enum_query_type query_type) { args[0]->print_parenthesised(str, query_type, precedence()); @@ -5696,8 +5752,18 @@ SEL_TREE *Item_func_like::get_mm_tree(RANGE_OPT_PARAM *param, Item **cond_ptr) bool sargable_pattern= with_sargable_pattern(); param->thd->mem_root= tmp_root; return sargable_pattern ? - Item_bool_func2::get_mm_tree(param, cond_ptr) : - Item_func::get_mm_tree(param, cond_ptr); + Item_bool_func2::get_mm_tree(param, cond_ptr) : + Item_func::get_mm_tree(param, cond_ptr); +} + + +bool Item_func_like::predicate_selectivity_checker(void *arg) +{ + if (with_sargable_pattern()) + { + return args[0]->dep_on_one_column(arg); + } + return true; } @@ -7139,6 +7205,49 @@ bool Item_equal::count_sargable_conds(void *arg) } +bool Item_equal::predicate_selectivity_checker(void *arg) +{ + /* + For equality conditions like tbl1.col = tbl2.col + we only want to know if the number of distinct values (ndv) is + available for all the fields in the multiple equality or not. + */ + Item_equal_fields_iterator it(*this); + while (it++) + { + Field *field= it.get_curr_field(); + if (!(field->is_ndv_available())) + return true; + } + + it.rewind(); + Item *item; + SAME_FIELD *same_field= (SAME_FIELD *) arg; + while ((item= it++)) + { + if (same_field->item) + return item->dep_on_one_column(arg); + item->dep_on_one_column(arg); + break; + } + return false; +} + + +bool Item_equal::is_statistics_available_for_range_predicates() +{ + bool found= false; + Item_equal_fields_iterator it(*this); + while (it++) + { + Field *field= it.get_curr_field(); + if (field->is_statistics_available_for_range_predicates()) + found= true; + } + return found; +} + + /** @brief Evaluate multiple equality diff --git a/sql/item_cmpfunc.h b/sql/item_cmpfunc.h index fa715badfc7..f467efd0e10 100644 --- a/sql/item_cmpfunc.h +++ b/sql/item_cmpfunc.h @@ -227,6 +227,7 @@ public: bool fix_length_and_dec() { decimals=0; max_length=1; return FALSE; } uint decimal_precision() const { return 1; } bool need_parentheses_in_default() { return true; } + bool predicate_selectivity_checker(void *arg) { return TRUE; } }; @@ -418,6 +419,7 @@ public: COND *remove_eq_conds(THD *thd, Item::cond_result *cond_value, bool top_level); bool count_sargable_conds(void *arg); + bool predicate_selectivity_checker(void *arg); /* Specifies which result type the function uses to compare its arguments. This method is used in equal field propagation. @@ -936,6 +938,7 @@ public: bool find_not_null_fields(table_map allowed); void fix_after_pullout(st_select_lex *new_parent, Item **ref, bool merge); bool count_sargable_conds(void *arg); + bool predicate_selectivity_checker(void *arg); void add_key_fields(JOIN *join, KEY_FIELD **key_fields, uint *and_level, table_map usable_tables, SARGABLE_PARAM **sargables); @@ -2473,6 +2476,7 @@ public: bool find_not_null_fields(table_map allowed); void fix_after_pullout(st_select_lex *new_parent, Item **ref, bool merge); bool count_sargable_conds(void *arg); + bool predicate_selectivity_checker(void *arg); Item *get_copy(THD *thd) { return get_item_copy<Item_func_in>(thd, this); } Item *build_clone(THD *thd) @@ -2570,6 +2574,7 @@ public: return FALSE; } bool count_sargable_conds(void *arg); + bool predicate_selectivity_checker(void *arg); }; @@ -2813,6 +2818,7 @@ public: Item *get_copy(THD *thd) { return get_item_copy<Item_func_like>(thd, this); } + bool predicate_selectivity_checker(void *arg); }; @@ -3220,6 +3226,8 @@ public: uint elements_count() { return equal_items.elements; } friend class Item_equal_fields_iterator; bool count_sargable_conds(void *arg); + bool predicate_selectivity_checker(void *arg); + bool is_statistics_available_for_range_predicates(); Item *multiple_equality_transformer(THD *thd, uchar *arg); friend class Item_equal_iterator<List_iterator_fast,Item>; friend class Item_equal_iterator<List_iterator,Item>; @@ -3371,6 +3379,7 @@ public: SEL_TREE *get_mm_tree(RANGE_OPT_PARAM *param, Item **cond_ptr); Item *get_copy(THD *thd) { return get_item_copy<Item_cond_and>(thd, this); } + bool predicate_selectivity_checker(void *arg) { return FALSE; } }; inline bool is_cond_and(Item *item) @@ -3395,6 +3404,7 @@ public: Item *neg_transformer(THD *thd); Item *get_copy(THD *thd) { return get_item_copy<Item_cond_or>(thd, this); } + bool predicate_selectivity_checker(void *arg) { return FALSE; } }; class Item_func_dyncol_check :public Item_bool_func diff --git a/sql/item_func.h b/sql/item_func.h index 6a4a9fa5dae..d04cc8643cf 100644 --- a/sql/item_func.h +++ b/sql/item_func.h @@ -399,6 +399,12 @@ public: Item_func *get_item_func() { return this; } bool is_simplified_cond_processor(void *arg) { return const_item() && !val_int(); } + bool predicate_selectivity_checker(void *arg) + { + if (const_item() && !is_expensive()) + return false; + return true; + } }; diff --git a/sql/opt_range.cc b/sql/opt_range.cc index adc0572cb1c..d3f49097189 100644 --- a/sql/opt_range.cc +++ b/sql/opt_range.cc @@ -3105,7 +3105,7 @@ bool create_key_parts_for_pseudo_indexes(RANGE_OPT_PARAM *param, { Field *field= *field_ptr; if (bitmap_is_set(used_fields, field->field_index) && - is_eits_usable(field)) + field->is_eits_usable()) parts++; } @@ -3126,7 +3126,7 @@ bool create_key_parts_for_pseudo_indexes(RANGE_OPT_PARAM *param, Field *field= *field_ptr; if (bitmap_is_set(used_fields, field->field_index)) { - if (!is_eits_usable(field)) + if (!field->is_eits_usable()) continue; uint16 store_length; diff --git a/sql/sql_select.cc b/sql/sql_select.cc index bb9d82a7039..3553ee78c5b 100644 --- a/sql/sql_select.cc +++ b/sql/sql_select.cc @@ -8296,6 +8296,11 @@ choose_plan(JOIN *join, table_map join_tables) jtab_sort_func, (void*)join->emb_sjm_nest); Json_writer_object wrapper(thd); + + if (join->conds) + wrapper.add("cardinality_accurate", + join->conds->with_accurate_selectivity_estimation()); + Json_writer_array trace_plan(thd,"considered_execution_plans"); if (!join->emb_sjm_nest) @@ -29449,6 +29454,38 @@ void unpack_to_base_table_fields(TABLE *table) } + +/* + @brief + Checks if a predicate is a range predicate with a constant part + + @param + + @item the item referring to the field of the table + @value the item referring to the expression on the + rhs of a predicate + + @details + Range predicate is defined as the form of field op const + where op can be operators like </<=/=/>/>=/BETWEEN etc. + Also the statistics for the field should be available via + an index or statistical tables. + + @retval + TRUE : Success + FALSE : Otherwise +*/ + +bool is_range_predicate(Item *item, Item *value) +{ + Item *field= item->real_item(); + if (field->type() == Item::FIELD_ITEM && !field->const_item() && + (!value || !value->is_expensive())) + return true; + return false; +} + + /** @} (end of group Query_Optimizer) */ diff --git a/sql/sql_select.h b/sql/sql_select.h index 2aba63ddc79..4d5a6e0abf5 100644 --- a/sql/sql_select.h +++ b/sql/sql_select.h @@ -2531,5 +2531,6 @@ void propagate_new_equalities(THD *thd, Item *cond, List<Item_equal> *new_equalities, COND_EQUAL *inherited, bool *is_simplifiable_cond); +bool is_range_predicate(Item *item, Item *value); #endif /* SQL_SELECT_INCLUDED */ diff --git a/sql/sql_statistics.cc b/sql/sql_statistics.cc index 2636299e330..71d472feaba 100644 --- a/sql/sql_statistics.cc +++ b/sql/sql_statistics.cc @@ -31,7 +31,6 @@ #include "opt_range.h" #include "uniques.h" #include "sql_show.h" -#include "sql_partition.h" /* The system variable 'use_stat_tables' can take one of the @@ -3159,7 +3158,10 @@ static void dump_stats_from_share_to_table(TABLE *table) Field **field_ptr= table_share->field; Field **table_field_ptr= table->field; for ( ; *field_ptr; field_ptr++, table_field_ptr++) + { (*table_field_ptr)->read_stats= (*field_ptr)->read_stats; + (*table_field_ptr)->stats_available= (*field_ptr)->stats_available; + } table->stats_is_read= true; } @@ -3978,37 +3980,3 @@ bool is_stat_table(const LEX_CSTRING *db, LEX_CSTRING *table) } return false; } - -/* - Check wheter we can use EITS statistics for a field or not - - TRUE : Use EITS for the columns - FALSE: Otherwise -*/ - -bool is_eits_usable(Field *field) -{ - Column_statistics* col_stats= field->read_stats; - - // check if column_statistics was allocated for this field - if (!col_stats) - return false; - - DBUG_ASSERT(field->table->stats_is_read); - - /* - (1): checks if we have EITS statistics for a particular column - (2): Don't use EITS for GEOMETRY columns - (3): Disabling reading EITS statistics for columns involved in the - partition list of a table. We assume the selecticivity for - such columns would be handled during partition pruning. - */ - - return !col_stats->no_stat_values_provided() && //(1) - field->type() != MYSQL_TYPE_GEOMETRY && //(2) -#ifdef WITH_PARTITION_STORAGE_ENGINE - (!field->table->part_info || - !field->table->part_info->field_in_partition_expr(field)) && //(3) -#endif - true; -} diff --git a/sql/sql_statistics.h b/sql/sql_statistics.h index 20ecf06bfee..34471fd5270 100644 --- a/sql/sql_statistics.h +++ b/sql/sql_statistics.h @@ -138,7 +138,6 @@ double get_column_range_cardinality(Field *field, key_range *max_endp, uint range_flag); bool is_stat_table(const LEX_CSTRING *db, LEX_CSTRING *table); -bool is_eits_usable(Field* field); class Histogram { diff --git a/sql/table.cc b/sql/table.cc index c48a6fed89a..02f4ece9c40 100644 --- a/sql/table.cc +++ b/sql/table.cc @@ -5435,6 +5435,7 @@ void TABLE::init(THD *thd, TABLE_LIST *tl) { (*f_ptr)->next_equal_field= NULL; (*f_ptr)->cond_selectivity= 1.0; + (*f_ptr)->stats_available= 0; } notnull_cond= 0; |