summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--mysql-test/include/join_cardinality.inc131
-rw-r--r--mysql-test/main/join_cardinality.result960
-rw-r--r--mysql-test/main/join_cardinality.test310
-rw-r--r--mysql-test/main/opt_trace.result18
-rw-r--r--mysql-test/main/opt_trace_index_merge.result1
-rw-r--r--mysql-test/main/opt_trace_index_merge_innodb.result1
-rw-r--r--sql/field.cc200
-rw-r--r--sql/field.h22
-rw-r--r--sql/item.cc213
-rw-r--r--sql/item.h40
-rw-r--r--sql/item_cmpfunc.cc113
-rw-r--r--sql/item_cmpfunc.h10
-rw-r--r--sql/item_func.h6
-rw-r--r--sql/opt_range.cc4
-rw-r--r--sql/sql_select.cc37
-rw-r--r--sql/sql_select.h1
-rw-r--r--sql/sql_statistics.cc38
-rw-r--r--sql/sql_statistics.h1
-rw-r--r--sql/table.cc1
19 files changed, 2067 insertions, 40 deletions
diff --git a/mysql-test/include/join_cardinality.inc b/mysql-test/include/join_cardinality.inc
new file mode 100644
index 00000000000..1ab6edb75cb
--- /dev/null
+++ b/mysql-test/include/join_cardinality.inc
@@ -0,0 +1,131 @@
+--echo #
+--echo # BASIC CASES
+--echo #
+
+EXPLAIN SELECT * FROM t1 WHERE t1.b <> 10;
+SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
+
+EXPLAIN SELECT * FROM t1 WHERE t1.b > 10;
+SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
+
+EXPLAIN SELECT * FROM t1 WHERE t1.b < 10;
+SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
+
+EXPLAIN SELECT * FROM t1 WHERE t1.b = 10;
+SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
+
+EXPLAIN SELECT * FROM t1 WHERE t1.b+2 = 10;
+SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
+
+EXPLAIN SELECT * FROM t1 WHERE a IN (1,2,3);
+SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
+
+EXPLAIN SELECT * FROM t1 WHERE a BETWEEN 1 and 5;
+SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
+
+--echo # Equi-join condition
+EXPLAIN SELECT * FROM t1,t2 WHERE t1.a=t2.b;
+SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
+
+--echo # CASE where selectivity estimates are not accurate
+
+EXPLAIN SELECT * FROM t1 WHERE abs(t1.b) = 10;
+SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
+
+--echo #
+--echo # The cardinality here would be not accurate because we don't have
+--echo # selectivity for the predicate t1.b=10
+--echo #
+
+EXPLAIN SELECT * FROM t1 WHERE t1.c = 10;
+SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
+
+--echo #
+--echo # BASIC CASES FOR AND CONJUNCT
+--echo #
+
+EXPLAIN SELECT * FROM t1 WHERE a >= 5 and b <= 6;
+SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
+
+EXPLAIN SELECT * FROM t1 WHERE a BETWEEN 1 AND 5 AND b <= 6;
+SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
+
+EXPLAIN SELECT * FROM t1,t2 WHERE t1.a > 5 AND t2.b < 10;
+SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
+
+--echo #
+--echo # BASIC CASES FOR OR CONJUNCT
+--echo #
+
+EXPLAIN SELECT * FROM t1 WHERE a >= 5 OR a <= 2;
+SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
+
+EXPLAIN SELECT * FROM t1 WHERE a BETWEEN 1 AND 5 OR a <= 0;
+SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
+
+--echo # The cardinality here would be not accurate because for the
+--echo # OR conjunct all predicates can't be resolved to one column
+
+EXPLAIN SELECT * FROM t1,t2 WHERE t1.a > 5 OR t2.b < 10;
+SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
+
+EXPLAIN SELECT * FROM t1 WHERE a >= 5 OR b <= 6;
+SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
+
+--echo #
+--echo # COMPLEX CASES WITH AND-OR CONJUNCTS
+--echo #
+
+EXPLAIN SELECT * from t1,t2 WHERE t1.a=t2.b and (t1.a > 5 or t1.a < 10);
+SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
+
+EXPLAIN SELECT * from t1,t2 WHERE t1.a=t2.b and (t1.a > 5 or t2.b < 10);
+SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
+
+EXPLAIN SELECT * from t1,t2 WHERE t1.a=t2.b and (t1.a > 5 or t2.a < 10);
+SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
+
+EXPLAIN SELECT * FROM t1
+WHERE ((t1.a > 20 and t1.a < 30) or (t1.a > 40 and t1.a < 50)) AND
+((t1.a > 25 and t1.a < 35) or (t1.a > 45 and t1.a < 55)) OR
+((t1.a > 120 and t1.a < 130) or (t1.a > 140 and t1.a < 150)) AND
+((t1.a > 125 and t1.a < 135) or (t1.a > 145 and t1.a < 155));
+SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
+
+EXPLAIN EXTENDED SELECT * from t1 WHERE ((t1.a IS NULL) or (t1.a = 5));
+SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
+
+--echo #
+--echo # Test with subquery
+--echo #
+
+EXPLAIN SELECT * from t1 WHERE t1.a = (select t2.a from t2 limit 1);
+SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
+
+EXPLAIN SELECT * from t1
+WHERE t1.a = (select t2.a from t2 where t1.b=t2.b limit 1);
+SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
diff --git a/mysql-test/main/join_cardinality.result b/mysql-test/main/join_cardinality.result
new file mode 100644
index 00000000000..f1379c53e1e
--- /dev/null
+++ b/mysql-test/main/join_cardinality.result
@@ -0,0 +1,960 @@
+CREATE TABLE t1(a INT, b INT, c INT, KEY(b), KEY(a));
+INSERT INTO t1 SELECT seq, seq, seq from seq_1_to_100;
+CREATE TABLE t2(a INT, b INT, key(b));
+INSERT INTO t2 SELECT seq, seq from seq_1_to_100;
+SET @save_optimizer_switch=@@optimizer_switch;
+SET optimizer_trace=1;
+SET optimizer_switch='rowid_filter=off';
+#
+# RUNNING the join cardinality tests when statistics are
+# available only from indexes
+#
+#
+# BASIC CASES
+#
+EXPLAIN SELECT * FROM t1 WHERE t1.b <> 10;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 ALL b NULL NULL NULL 100 Using where
+SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
+JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+[
+ true
+]
+EXPLAIN SELECT * FROM t1 WHERE t1.b > 10;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 ALL b NULL NULL NULL 100 Using where
+SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
+JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+[
+ true
+]
+EXPLAIN SELECT * FROM t1 WHERE t1.b < 10;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range b b 5 NULL 5 Using index condition
+SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
+JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+[
+ true
+]
+EXPLAIN SELECT * FROM t1 WHERE t1.b = 10;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 ref b b 5 const 1
+SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
+JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+[
+ true
+]
+EXPLAIN SELECT * FROM t1 WHERE t1.b+2 = 10;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 ALL NULL NULL NULL NULL 100 Using where
+SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
+JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+[
+ false
+]
+EXPLAIN SELECT * FROM t1 WHERE a IN (1,2,3);
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range a a 5 NULL 3 Using index condition
+SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
+JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+[
+ true
+]
+EXPLAIN SELECT * FROM t1 WHERE a BETWEEN 1 and 5;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range a a 5 NULL 3 Using index condition
+SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
+JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+[
+ true
+]
+# Equi-join condition
+EXPLAIN SELECT * FROM t1,t2 WHERE t1.a=t2.b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 ALL b NULL NULL NULL 100 Using where
+1 SIMPLE t1 ref a a 5 test.t2.b 1
+SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
+JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+[
+ true
+]
+# CASE where selectivity estimates are not accurate
+EXPLAIN SELECT * FROM t1 WHERE abs(t1.b) = 10;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 ALL NULL NULL NULL NULL 100 Using where
+SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
+JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+[
+ false
+]
+#
+# The cardinality here would be not accurate because we don't have
+# selectivity for the predicate t1.b=10
+#
+EXPLAIN SELECT * FROM t1 WHERE t1.c = 10;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 ALL NULL NULL NULL NULL 100 Using where
+SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
+JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+[
+ false
+]
+#
+# BASIC CASES FOR AND CONJUNCT
+#
+EXPLAIN SELECT * FROM t1 WHERE a >= 5 and b <= 6;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range b,a b 5 NULL 3 Using index condition; Using where
+SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
+JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+[
+ true
+]
+EXPLAIN SELECT * FROM t1 WHERE a BETWEEN 1 AND 5 AND b <= 6;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range b,a b 5 NULL 3 Using index condition; Using where
+SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
+JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+[
+ true
+]
+EXPLAIN SELECT * FROM t1,t2 WHERE t1.a > 5 AND t2.b < 10;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 range b b 5 NULL 5 Using index condition
+1 SIMPLE t1 ALL a NULL NULL NULL 100 Using where; Using join buffer (flat, BNL join)
+SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
+JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+[
+ true
+]
+#
+# BASIC CASES FOR OR CONJUNCT
+#
+EXPLAIN SELECT * FROM t1 WHERE a >= 5 OR a <= 2;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 ALL a NULL NULL NULL 100 Using where
+SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
+JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+[
+ true
+]
+EXPLAIN SELECT * FROM t1 WHERE a BETWEEN 1 AND 5 OR a <= 0;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range a a 5 NULL 4 Using index condition
+SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
+JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+[
+ true
+]
+# The cardinality here would be not accurate because for the
+# OR conjunct all predicates can't be resolved to one column
+EXPLAIN SELECT * FROM t1,t2 WHERE t1.a > 5 OR t2.b < 10;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 ALL a NULL NULL NULL 100
+1 SIMPLE t2 ALL b NULL NULL NULL 100 Using where; Using join buffer (flat, BNL join)
+SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
+JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+[
+ false
+]
+EXPLAIN SELECT * FROM t1 WHERE a >= 5 OR b <= 6;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 ALL b,a NULL NULL NULL 100 Using where
+SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
+JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+[
+ false
+]
+#
+# COMPLEX CASES WITH AND-OR CONJUNCTS
+#
+EXPLAIN SELECT * from t1,t2 WHERE t1.a=t2.b and (t1.a > 5 or t1.a < 10);
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 ALL b NULL NULL NULL 100 Using where
+1 SIMPLE t1 ref a a 5 test.t2.b 1
+SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
+JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+[
+ true
+]
+EXPLAIN SELECT * from t1,t2 WHERE t1.a=t2.b and (t1.a > 5 or t2.b < 10);
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 ALL b NULL NULL NULL 100 Using where
+1 SIMPLE t1 ref a a 5 test.t2.b 1
+SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
+JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+[
+ true
+]
+EXPLAIN SELECT * from t1,t2 WHERE t1.a=t2.b and (t1.a > 5 or t2.a < 10);
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 ALL b NULL NULL NULL 100 Using where
+1 SIMPLE t1 ref a a 5 test.t2.b 1
+SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
+JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+[
+ false
+]
+EXPLAIN SELECT * FROM t1
+WHERE ((t1.a > 20 and t1.a < 30) or (t1.a > 40 and t1.a < 50)) AND
+((t1.a > 25 and t1.a < 35) or (t1.a > 45 and t1.a < 55)) OR
+((t1.a > 120 and t1.a < 130) or (t1.a > 140 and t1.a < 150)) AND
+((t1.a > 125 and t1.a < 135) or (t1.a > 145 and t1.a < 155));
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range a a 5 NULL 6 Using index condition
+SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
+JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+[
+ true
+]
+EXPLAIN EXTENDED SELECT * from t1 WHERE ((t1.a IS NULL) or (t1.a = 5));
+id select_type table type possible_keys key key_len ref rows filtered Extra
+1 SIMPLE t1 ref_or_null a a 5 const 2 100.00 Using index condition
+Warnings:
+Note 1003 select `test`.`t1`.`a` AS `a`,`test`.`t1`.`b` AS `b`,`test`.`t1`.`c` AS `c` from `test`.`t1` where `test`.`t1`.`a` is null or `test`.`t1`.`a` = 5
+SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
+JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+[
+ true
+]
+#
+# Test with subquery
+#
+EXPLAIN SELECT * from t1 WHERE t1.a = (select t2.a from t2 limit 1);
+id select_type table type possible_keys key key_len ref rows Extra
+1 PRIMARY t1 ref a a 5 const 1 Using where
+2 SUBQUERY t2 ALL NULL NULL NULL NULL 100
+SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
+JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+[
+ true
+]
+EXPLAIN SELECT * from t1
+WHERE t1.a = (select t2.a from t2 where t1.b=t2.b limit 1);
+id select_type table type possible_keys key key_len ref rows Extra
+1 PRIMARY t1 ALL NULL NULL NULL NULL 100 Using where
+2 DEPENDENT SUBQUERY t2 ref b b 5 test.t1.b 1
+SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
+JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+[
+ false,
+ true
+]
+DROP TABLE t1,t2;
+CREATE TABLE t1(a INT, b INT, c INT);
+INSERT INTO t1 SELECT seq, seq, seq from seq_1_to_100;
+CREATE TABLE t2(a INT, b INT);
+INSERT INTO t2 SELECT seq, seq from seq_1_to_100;
+#
+# RUNNING the join cardinality tests when only statistics
+# are available from stat tables
+#
+#
+# BASIC CASES
+#
+EXPLAIN SELECT * FROM t1 WHERE t1.b <> 10;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 ALL NULL NULL NULL NULL 100 Using where
+SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
+JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+[
+ true
+]
+EXPLAIN SELECT * FROM t1 WHERE t1.b > 10;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 ALL NULL NULL NULL NULL 100 Using where
+SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
+JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+[
+ true
+]
+EXPLAIN SELECT * FROM t1 WHERE t1.b < 10;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 ALL NULL NULL NULL NULL 100 Using where
+SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
+JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+[
+ true
+]
+EXPLAIN SELECT * FROM t1 WHERE t1.b = 10;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 ALL NULL NULL NULL NULL 100 Using where
+SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
+JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+[
+ true
+]
+EXPLAIN SELECT * FROM t1 WHERE t1.b+2 = 10;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 ALL NULL NULL NULL NULL 100 Using where
+SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
+JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+[
+ false
+]
+EXPLAIN SELECT * FROM t1 WHERE a IN (1,2,3);
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 ALL NULL NULL NULL NULL 100 Using where
+SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
+JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+[
+ true
+]
+EXPLAIN SELECT * FROM t1 WHERE a BETWEEN 1 and 5;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 ALL NULL NULL NULL NULL 100 Using where
+SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
+JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+[
+ true
+]
+# Equi-join condition
+EXPLAIN SELECT * FROM t1,t2 WHERE t1.a=t2.b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 ALL NULL NULL NULL NULL 100
+1 SIMPLE t2 ALL NULL NULL NULL NULL 100 Using where; Using join buffer (flat, BNL join)
+SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
+JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+[
+ true
+]
+# CASE where selectivity estimates are not accurate
+EXPLAIN SELECT * FROM t1 WHERE abs(t1.b) = 10;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 ALL NULL NULL NULL NULL 100 Using where
+SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
+JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+[
+ false
+]
+#
+# The cardinality here would be not accurate because we don't have
+# selectivity for the predicate t1.b=10
+#
+EXPLAIN SELECT * FROM t1 WHERE t1.c = 10;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 ALL NULL NULL NULL NULL 100 Using where
+SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
+JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+[
+ false
+]
+#
+# BASIC CASES FOR AND CONJUNCT
+#
+EXPLAIN SELECT * FROM t1 WHERE a >= 5 and b <= 6;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 ALL NULL NULL NULL NULL 100 Using where
+SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
+JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+[
+ true
+]
+EXPLAIN SELECT * FROM t1 WHERE a BETWEEN 1 AND 5 AND b <= 6;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 ALL NULL NULL NULL NULL 100 Using where
+SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
+JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+[
+ true
+]
+EXPLAIN SELECT * FROM t1,t2 WHERE t1.a > 5 AND t2.b < 10;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 ALL NULL NULL NULL NULL 100 Using where
+1 SIMPLE t1 ALL NULL NULL NULL NULL 100 Using where; Using join buffer (flat, BNL join)
+SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
+JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+[
+ true
+]
+#
+# BASIC CASES FOR OR CONJUNCT
+#
+EXPLAIN SELECT * FROM t1 WHERE a >= 5 OR a <= 2;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 ALL NULL NULL NULL NULL 100 Using where
+SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
+JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+[
+ true
+]
+EXPLAIN SELECT * FROM t1 WHERE a BETWEEN 1 AND 5 OR a <= 0;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 ALL NULL NULL NULL NULL 100 Using where
+SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
+JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+[
+ true
+]
+# The cardinality here would be not accurate because for the
+# OR conjunct all predicates can't be resolved to one column
+EXPLAIN SELECT * FROM t1,t2 WHERE t1.a > 5 OR t2.b < 10;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 ALL NULL NULL NULL NULL 100
+1 SIMPLE t2 ALL NULL NULL NULL NULL 100 Using where; Using join buffer (flat, BNL join)
+SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
+JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+[
+ false
+]
+EXPLAIN SELECT * FROM t1 WHERE a >= 5 OR b <= 6;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 ALL NULL NULL NULL NULL 100 Using where
+SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
+JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+[
+ false
+]
+#
+# COMPLEX CASES WITH AND-OR CONJUNCTS
+#
+EXPLAIN SELECT * from t1,t2 WHERE t1.a=t2.b and (t1.a > 5 or t1.a < 10);
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 ALL NULL NULL NULL NULL 100 Using where
+1 SIMPLE t2 ALL NULL NULL NULL NULL 100 Using where; Using join buffer (flat, BNL join)
+SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
+JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+[
+ true
+]
+EXPLAIN SELECT * from t1,t2 WHERE t1.a=t2.b and (t1.a > 5 or t2.b < 10);
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 ALL NULL NULL NULL NULL 100 Using where
+1 SIMPLE t2 ALL NULL NULL NULL NULL 100 Using where; Using join buffer (flat, BNL join)
+SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
+JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+[
+ true
+]
+EXPLAIN SELECT * from t1,t2 WHERE t1.a=t2.b and (t1.a > 5 or t2.a < 10);
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 ALL NULL NULL NULL NULL 100
+1 SIMPLE t2 ALL NULL NULL NULL NULL 100 Using where; Using join buffer (flat, BNL join)
+SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
+JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+[
+ false
+]
+EXPLAIN SELECT * FROM t1
+WHERE ((t1.a > 20 and t1.a < 30) or (t1.a > 40 and t1.a < 50)) AND
+((t1.a > 25 and t1.a < 35) or (t1.a > 45 and t1.a < 55)) OR
+((t1.a > 120 and t1.a < 130) or (t1.a > 140 and t1.a < 150)) AND
+((t1.a > 125 and t1.a < 135) or (t1.a > 145 and t1.a < 155));
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 ALL NULL NULL NULL NULL 100 Using where
+SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
+JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+[
+ true
+]
+EXPLAIN EXTENDED SELECT * from t1 WHERE ((t1.a IS NULL) or (t1.a = 5));
+id select_type table type possible_keys key key_len ref rows filtered Extra
+1 SIMPLE t1 ALL NULL NULL NULL NULL 100 1.00 Using where
+Warnings:
+Note 1003 select `test`.`t1`.`a` AS `a`,`test`.`t1`.`b` AS `b`,`test`.`t1`.`c` AS `c` from `test`.`t1` where `test`.`t1`.`a` is null or `test`.`t1`.`a` = 5
+SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
+JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+[
+ true
+]
+#
+# Test with subquery
+#
+EXPLAIN SELECT * from t1 WHERE t1.a = (select t2.a from t2 limit 1);
+id select_type table type possible_keys key key_len ref rows Extra
+1 PRIMARY t1 ALL NULL NULL NULL NULL 100 Using where
+2 SUBQUERY t2 ALL NULL NULL NULL NULL 100
+SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
+JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+[
+ true
+]
+EXPLAIN SELECT * from t1
+WHERE t1.a = (select t2.a from t2 where t1.b=t2.b limit 1);
+id select_type table type possible_keys key key_len ref rows Extra
+1 PRIMARY t1 ALL NULL NULL NULL NULL 100 Using where
+2 DEPENDENT SUBQUERY t2 ALL NULL NULL NULL NULL 100 Using where
+SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
+JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+[
+ false,
+ true
+]
+DROP TABLE t1,t2;
+#
+# Combination with statistics from stat tables and
+# statistics from indexes
+#
+CREATE TABLE t1(a INT, b INT, KEY(a));
+INSERT INTO t1 SELECT seq, seq from seq_1_to_100;
+ANALYZE SELECT * from t1 WHERE t1.b > 3 AND t1.a=t1.b;
+id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra
+1 SIMPLE t1 ALL a NULL NULL NULL 100 100.00 98.00 97.00 Using where
+SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
+JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+[
+ false
+]
+ANALYZE TABLE t1 PERSISTENT FOR COLUMNS (b) INDEXES ();
+Table Op Msg_type Msg_text
+test.t1 analyze status Engine-independent statistics collected
+test.t1 analyze status Table is already up to date
+ANALYZE SELECT * from t1 WHERE t1.b > 3 AND t1.a=10;
+id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra
+1 SIMPLE t1 ref a a 5 const 1 1.00 97.66 100.00 Using where
+SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
+JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+[
+ true
+]
+ANALYZE SELECT * from t1 WHERE t1.b > 3 AND t1.a=t1.b;
+id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra
+1 SIMPLE t1 ALL a NULL NULL NULL 100 100.00 95.70 97.00 Using where
+SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
+JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+[
+ true
+]
+ANALYZE SELECT * from t1 WHERE t1.b > 3 OR t1.a=t1.b;
+id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra
+1 SIMPLE t1 ALL NULL NULL NULL NULL 100 100.00 100.00 100.00 Using where
+SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
+JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+[
+ false
+]
+DROP TABLE t1;
+#
+# Tests with simple predicates
+#
+CREATE TABLE t1(a INT, b INT, c VARCHAR(10), KEY(a), KEY(b));
+INSERT INTO t1 SELECT seq, seq, seq from seq_1_to_10;
+#
+# Tests with LIKE PREDICATES
+#
+EXPLAIN SELECT * from t1 WHERE t1.a LIKE '%2%';
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 ALL NULL NULL NULL NULL 10 Using where
+SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
+JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+[
+ false
+]
+EXPLAIN SELECT * from t1 WHERE t1.a LIKE '2%';
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 ALL a NULL NULL NULL 10 Using where
+SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
+JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+[
+ true
+]
+#
+# Tests with NULL predicates
+#
+EXPLAIN SELECT * from t1 WHERE t1.a IS NULL;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 ref a a 5 const 1 Using index condition
+SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
+JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+[
+ true
+]
+EXPLAIN SELECT * from t1 WHERE t1.a IS NOT NULL;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 ALL a NULL NULL NULL 10 Using where
+SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
+JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+[
+ true
+]
+#
+# Tests with </<=/>/>=/BETWEEN
+#
+EXPLAIN SELECT * from t1 WHERE t1.a > 5;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 ALL a NULL NULL NULL 10 Using where
+SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
+JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+[
+ true
+]
+EXPLAIN SELECT * from t1 WHERE 5 < t1.a;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 ALL a NULL NULL NULL 10 Using where
+SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
+JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+[
+ true
+]
+EXPLAIN SELECT * from t1 WHERE t1.a >= 10 and t1.b <= 10;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range a,b a 5 NULL 1 Using index condition; Using where
+SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
+JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+[
+ true
+]
+EXPLAIN SELECT * from t1 WHERE t1.a BETWEEN 1 AND 5;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 ALL a NULL NULL NULL 10 Using where
+SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
+JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+[
+ true
+]
+EXPLAIN SELECT * from t1 WHERE 5 BETWEEN 1 AND t1.a;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 ALL a NULL NULL NULL 10 Using where
+SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
+JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+[
+ true
+]
+EXPLAIN SELECT * from t1 WHERE 1 BETWEEN t1.a AND t1.b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range a,b a 5 NULL 1 Using where
+SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
+JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+[
+ false
+]
+#
+# Tests with IN predicate
+#
+EXPLAIN SELECT * from t1 WHERE t1.a IN (1,2,3);
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range a a 5 NULL 3 Using index condition
+SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
+JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+[
+ true
+]
+EXPLAIN SELECT * from t1 WHERE (t1.a,t1.b) IN ((1,1), (2,2), (3,3));
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 index_merge a,b a,a,a 5,5,5 NULL 3 Using union(a,a,a); Using where
+SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
+JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+[
+ false
+]
+EXPLAIN SELECT * from t1 WHERE
+t1.a=t1.b AND (t1.a,t1.b) IN ((1,1), (2,2), (3,3));
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 index_merge a,b a,a,a 5,5,5 NULL 3 Using union(a,a,a); Using where
+SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
+JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+[
+ false
+]
+EXPLAIN SELECT * from t1
+WHERE t1.a=t1.b AND (abs(t1.a),t1.b) IN ((1,1), (2,2), (3,3));
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range a,b b 5 NULL 3 Using where
+SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
+JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+[
+ false
+]
+#
+# Tests using different functional predicates
+#
+EXPLAIN SELECT * from t1 WHERE a < 5 XOR a > 10;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 ALL NULL NULL NULL NULL 10 Using where
+SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
+JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+[
+ false
+]
+EXPLAIN SELECT * from t1 WHERE abs(a) > 10;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 ALL NULL NULL NULL NULL 10 Using where
+SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
+JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+[
+ false
+]
+EXPLAIN SELECT * from t1 WHERE a > 10+1;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range a a 5 NULL 1 Using index condition
+SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
+JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+[
+ true
+]
+DROP TABLE t1;
+#
+# Tests when updating stats tables manually
+#
+CREATE TABLE t1(a INT, b INT);
+INSERT INTO t1 SELECT seq, seq from seq_1_to_100;
+INSERT INTO
+mysql.column_stats(db_name, table_name, column_name, nulls_ratio) VALUES ('test', 't1', 'a', 0);
+#
+# estimates from stat tables not available, all the fields should be NOT NULL
+#
+EXPLAIN SELECT * from t1 WHERE a < 10;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 ALL NULL NULL NULL NULL 100 Using where
+SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
+JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+[
+ false
+]
+DROP TABLE t1;
+#
+# Using multiple equalities at the top level
+#
+CREATE TABLE t1(a INT, b INT);
+INSERT INTO t1 SELECT seq, seq from seq_1_to_10;
+CREATE TABLE t2(a INT, b INT);
+INSERT INTO t2 SELECT seq, seq from seq_1_to_10;
+ANALYZE TABLE t1 PERSISTENT FOR ALL;
+Table Op Msg_type Msg_text
+test.t1 analyze status Engine-independent statistics collected
+test.t1 analyze status OK
+ANALYZE TABLE t2 PERSISTENT FOR ALL;
+Table Op Msg_type Msg_text
+test.t2 analyze status Engine-independent statistics collected
+test.t2 analyze status OK
+EXPLAIN
+SELECT * FROM t1,t2 WHERE t1.a= t2.a AND t1.b= t2.b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 ALL NULL NULL NULL NULL 10
+1 SIMPLE t2 ALL NULL NULL NULL NULL 10 Using where; Using join buffer (flat, BNL join)
+SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
+JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+[
+ true
+]
+EXPLAIN
+SELECT * FROM t1,t2 WHERE t1.a= t2.a AND t1.b= t2.b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 ALL NULL NULL NULL NULL 10
+1 SIMPLE t2 ALL NULL NULL NULL NULL 10 Using where; Using join buffer (flat, BNL join)
+#
+# Tests with views (using Item_direct_view_ref)
+#
+CREATE VIEW v1 AS SELECT a, b FROM t1;
+EXPLAIN SELECT * FROM v1,t1 where v1.b = 10 AND v1.b=t1.b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 ALL NULL NULL NULL NULL 10 Using where
+1 SIMPLE t1 ALL NULL NULL NULL NULL 10 Using where; Using join buffer (flat, BNL join)
+SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
+JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+[
+ true
+]
+DROP TABLE t1,t2;
+DROP VIEW v1;
+#
+# TESTS to get ndv from rec_per_key
+#
+CREATE TABLE t1 (a INT, b INT);
+INSERT INTO t1 SELECT seq, seq FROM seq_1_to_10;
+ALTER TABLE t1 ADD KEY k1(a);
+set @save_use_stat_tables= @@use_stat_tables;
+set use_stat_tables='never';
+analyze table t1;
+Table Op Msg_type Msg_text
+test.t1 analyze status OK
+CREATE TABLE t2 like t1;
+INSERT INTO t2 SELECT * FROM t1;
+ANALYZE TABLE t2 PERSISTENT FOR ALL;
+Table Op Msg_type Msg_text
+test.t2 analyze status Engine-independent statistics collected
+test.t2 analyze status Table is already up to date
+EXPLAIN SELECT * FROM t1, t2 where t1.a=t2.a and t2.a<3;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range k1 k1 5 NULL 2 Using index condition
+1 SIMPLE t2 ref k1 k1 5 test.t1.a 1
+SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
+JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+[
+ true
+]
+set use_stat_tables=@save_use_stat_tables;
+DROP TABLE t1,t2;
+#
+# Tests with multi-component keys
+#
+CREATE TABLE t1 (a INT, b INT, KEY(a,b));
+INSERT INTO t1 SELECT seq, seq FROM seq_1_to_10;
+EXPLAIN SELECT * FROM t1 where t1.a<3 AND t1.b < 10;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range a a 5 NULL 2 Using where; Using index
+SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
+JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+[
+ false
+]
+ALTER TABLE t1 ADD KEY k1(a);
+EXPLAIN SELECT * FROM t1 where t1.a<3;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range a,k1 a 5 NULL 2 Using where; Using index
+SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
+JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+[
+ true
+]
+DROP TABLE t1;
+#
+# Tests when statistics from stat tables cannot be used because of the system variables
+# use_stat_tables and optimizer_use_condition_selectivity
+#
+CREATE TABLE t1(a INT, b INT, c INT);
+INSERT INTO t1 SELECT seq, seq, seq from seq_1_to_10;
+CREATE TABLE t2(a INT, b INT);
+INSERT INTO t2 SELECT seq, seq from seq_1_to_10;
+ANALYZE TABLE t1 PERSISTENT FOR ALL;
+Table Op Msg_type Msg_text
+test.t1 analyze status Engine-independent statistics collected
+test.t1 analyze status OK
+ANALYZE TABLE t2 PERSISTENT FOR ALL;
+Table Op Msg_type Msg_text
+test.t2 analyze status Engine-independent statistics collected
+test.t2 analyze status OK
+SELECT * FROM t1;
+a b c
+1 1 1
+2 2 2
+3 3 3
+4 4 4
+5 5 5
+6 6 6
+7 7 7
+8 8 8
+9 9 9
+10 10 10
+SELECT * FROM t2;
+a b
+1 1
+2 2
+3 3
+4 4
+5 5
+6 6
+7 7
+8 8
+9 9
+10 10
+SET @save_use_stat_tables= @@use_stat_tables;
+SET @save_optimizer_use_condition_selectivity= @@optimizer_use_condition_selectivity;
+SET use_stat_tables='never';
+EXPLAIN SELECT * FROM t1,t2 WHERE t1.a=t2.b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 ALL NULL NULL NULL NULL 10
+1 SIMPLE t2 ALL NULL NULL NULL NULL 10 Using where; Using join buffer (flat, BNL join)
+SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
+JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+[
+ false
+]
+EXPLAIN SELECT * FROM t1 WHERE t1.a > 10;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 ALL NULL NULL NULL NULL 10 Using where
+SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
+JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+[
+ false
+]
+SET use_stat_tables= @save_use_stat_tables;
+EXPLAIN SELECT * FROM t1 WHERE t1.a > 10;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 ALL NULL NULL NULL NULL 10 Using where
+SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
+JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+[
+ true
+]
+set optimizer_use_condition_selectivity=1;
+EXPLAIN SELECT * FROM t1 WHERE t1.a > 10;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 ALL NULL NULL NULL NULL 10 Using where
+SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
+JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+[
+ false
+]
+SET optimizer_use_condition_selectivity= @save_optimizer_use_condition_selectivity;
+DROP TABLE t1,t2;
+SET optimizer_switch=@save_optimizer_switch;
diff --git a/mysql-test/main/join_cardinality.test b/mysql-test/main/join_cardinality.test
new file mode 100644
index 00000000000..ae67350edca
--- /dev/null
+++ b/mysql-test/main/join_cardinality.test
@@ -0,0 +1,310 @@
+--source include/have_sequence.inc
+
+CREATE TABLE t1(a INT, b INT, c INT, KEY(b), KEY(a));
+INSERT INTO t1 SELECT seq, seq, seq from seq_1_to_100;
+
+CREATE TABLE t2(a INT, b INT, key(b));
+INSERT INTO t2 SELECT seq, seq from seq_1_to_100;
+
+
+SET @save_optimizer_switch=@@optimizer_switch;
+SET optimizer_trace=1;
+SET optimizer_switch='rowid_filter=off';
+
+--echo #
+--echo # RUNNING the join cardinality tests when statistics are
+--echo # available only from indexes
+--echo #
+
+--source include/join_cardinality.inc
+
+DROP TABLE t1,t2;
+
+CREATE TABLE t1(a INT, b INT, c INT);
+INSERT INTO t1 SELECT seq, seq, seq from seq_1_to_100;
+
+CREATE TABLE t2(a INT, b INT);
+INSERT INTO t2 SELECT seq, seq from seq_1_to_100;
+
+--disable_result_log
+--disable_query_log
+ANALYZE TABLE t1 PERSISTENT FOR COLUMNS(a,b) INDEXES();
+ANALYZE TABLE t2 PERSISTENT FOR ALL;
+--enable_query_log
+--enable_result_log
+
+--echo #
+--echo # RUNNING the join cardinality tests when only statistics
+--echo # are available from stat tables
+--echo #
+
+--source include/join_cardinality.inc
+
+DROP TABLE t1,t2;
+
+
+--echo #
+--echo # Combination with statistics from stat tables and
+--echo # statistics from indexes
+--echo #
+
+CREATE TABLE t1(a INT, b INT, KEY(a));
+INSERT INTO t1 SELECT seq, seq from seq_1_to_100;
+
+ANALYZE SELECT * from t1 WHERE t1.b > 3 AND t1.a=t1.b;
+SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
+
+ANALYZE TABLE t1 PERSISTENT FOR COLUMNS (b) INDEXES ();
+
+ANALYZE SELECT * from t1 WHERE t1.b > 3 AND t1.a=10;
+SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
+
+ANALYZE SELECT * from t1 WHERE t1.b > 3 AND t1.a=t1.b;
+SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
+
+ANALYZE SELECT * from t1 WHERE t1.b > 3 OR t1.a=t1.b;
+SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
+
+DROP TABLE t1;
+
+--echo #
+--echo # Tests with simple predicates
+--echo #
+
+CREATE TABLE t1(a INT, b INT, c VARCHAR(10), KEY(a), KEY(b));
+INSERT INTO t1 SELECT seq, seq, seq from seq_1_to_10;
+
+--echo #
+--echo # Tests with LIKE PREDICATES
+--echo #
+
+EXPLAIN SELECT * from t1 WHERE t1.a LIKE '%2%';
+SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
+EXPLAIN SELECT * from t1 WHERE t1.a LIKE '2%';
+SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
+
+--echo #
+--echo # Tests with NULL predicates
+--echo #
+
+EXPLAIN SELECT * from t1 WHERE t1.a IS NULL;
+SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
+
+EXPLAIN SELECT * from t1 WHERE t1.a IS NOT NULL;
+SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
+
+--echo #
+--echo # Tests with </<=/>/>=/BETWEEN
+--echo #
+
+EXPLAIN SELECT * from t1 WHERE t1.a > 5;
+SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
+
+EXPLAIN SELECT * from t1 WHERE 5 < t1.a;
+SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
+
+EXPLAIN SELECT * from t1 WHERE t1.a >= 10 and t1.b <= 10;
+SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
+
+EXPLAIN SELECT * from t1 WHERE t1.a BETWEEN 1 AND 5;
+SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
+
+EXPLAIN SELECT * from t1 WHERE 5 BETWEEN 1 AND t1.a;
+SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
+
+EXPLAIN SELECT * from t1 WHERE 1 BETWEEN t1.a AND t1.b;
+SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
+
+--echo #
+--echo # Tests with IN predicate
+--echo #
+
+EXPLAIN SELECT * from t1 WHERE t1.a IN (1,2,3);
+SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
+
+EXPLAIN SELECT * from t1 WHERE (t1.a,t1.b) IN ((1,1), (2,2), (3,3));
+SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
+
+EXPLAIN SELECT * from t1 WHERE
+t1.a=t1.b AND (t1.a,t1.b) IN ((1,1), (2,2), (3,3));
+SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
+
+EXPLAIN SELECT * from t1
+WHERE t1.a=t1.b AND (abs(t1.a),t1.b) IN ((1,1), (2,2), (3,3));
+SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
+
+--echo #
+--echo # Tests using different functional predicates
+--echo #
+
+EXPLAIN SELECT * from t1 WHERE a < 5 XOR a > 10;
+SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
+
+EXPLAIN SELECT * from t1 WHERE abs(a) > 10;
+SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
+
+EXPLAIN SELECT * from t1 WHERE a > 10+1;
+SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
+
+DROP TABLE t1;
+
+--echo #
+--echo # Tests when updating stats tables manually
+--echo #
+
+CREATE TABLE t1(a INT, b INT);
+INSERT INTO t1 SELECT seq, seq from seq_1_to_100;
+
+INSERT INTO
+mysql.column_stats(db_name, table_name, column_name, nulls_ratio) VALUES ('test', 't1', 'a', 0);
+
+--echo #
+--echo # estimates from stat tables not available, all the fields should be NOT NULL
+--echo #
+
+EXPLAIN SELECT * from t1 WHERE a < 10;
+SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
+
+DROP TABLE t1;
+
+--echo #
+--echo # Using multiple equalities at the top level
+--echo #
+
+CREATE TABLE t1(a INT, b INT);
+INSERT INTO t1 SELECT seq, seq from seq_1_to_10;
+CREATE TABLE t2(a INT, b INT);
+INSERT INTO t2 SELECT seq, seq from seq_1_to_10;
+
+ANALYZE TABLE t1 PERSISTENT FOR ALL;
+ANALYZE TABLE t2 PERSISTENT FOR ALL;
+
+EXPLAIN
+SELECT * FROM t1,t2 WHERE t1.a= t2.a AND t1.b= t2.b;
+SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
+
+EXPLAIN
+SELECT * FROM t1,t2 WHERE t1.a= t2.a AND t1.b= t2.b;
+
+--echo #
+--echo # Tests with views (using Item_direct_view_ref)
+--echo #
+
+CREATE VIEW v1 AS SELECT a, b FROM t1;
+
+EXPLAIN SELECT * FROM v1,t1 where v1.b = 10 AND v1.b=t1.b;
+SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
+
+DROP TABLE t1,t2;
+DROP VIEW v1;
+
+
+--echo #
+--echo # TESTS to get ndv from rec_per_key
+--echo #
+
+CREATE TABLE t1 (a INT, b INT);
+INSERT INTO t1 SELECT seq, seq FROM seq_1_to_10;
+ALTER TABLE t1 ADD KEY k1(a);
+
+set @save_use_stat_tables= @@use_stat_tables;
+set use_stat_tables='never';
+analyze table t1;
+
+CREATE TABLE t2 like t1;
+INSERT INTO t2 SELECT * FROM t1;
+ANALYZE TABLE t2 PERSISTENT FOR ALL;
+EXPLAIN SELECT * FROM t1, t2 where t1.a=t2.a and t2.a<3;
+SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
+set use_stat_tables=@save_use_stat_tables;
+
+DROP TABLE t1,t2;
+
+--echo #
+--echo # Tests with multi-component keys
+--echo #
+
+CREATE TABLE t1 (a INT, b INT, KEY(a,b));
+INSERT INTO t1 SELECT seq, seq FROM seq_1_to_10;
+
+EXPLAIN SELECT * FROM t1 where t1.a<3 AND t1.b < 10;
+SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
+
+ALTER TABLE t1 ADD KEY k1(a);
+EXPLAIN SELECT * FROM t1 where t1.a<3;
+SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
+
+DROP TABLE t1;
+
+--echo #
+--echo # Tests when statistics from stat tables cannot be used because of the system variables
+--echo # use_stat_tables and optimizer_use_condition_selectivity
+--echo #
+
+CREATE TABLE t1(a INT, b INT, c INT);
+INSERT INTO t1 SELECT seq, seq, seq from seq_1_to_10;
+CREATE TABLE t2(a INT, b INT);
+INSERT INTO t2 SELECT seq, seq from seq_1_to_10;
+
+ANALYZE TABLE t1 PERSISTENT FOR ALL;
+ANALYZE TABLE t2 PERSISTENT FOR ALL;
+
+SELECT * FROM t1;
+SELECT * FROM t2;
+
+SET @save_use_stat_tables= @@use_stat_tables;
+SET @save_optimizer_use_condition_selectivity= @@optimizer_use_condition_selectivity;
+SET use_stat_tables='never';
+
+EXPLAIN SELECT * FROM t1,t2 WHERE t1.a=t2.b;
+SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
+
+EXPLAIN SELECT * FROM t1 WHERE t1.a > 10;
+SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
+
+SET use_stat_tables= @save_use_stat_tables;
+
+EXPLAIN SELECT * FROM t1 WHERE t1.a > 10;
+SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
+
+set optimizer_use_condition_selectivity=1;
+
+EXPLAIN SELECT * FROM t1 WHERE t1.a > 10;
+SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
+
+SET optimizer_use_condition_selectivity= @save_optimizer_use_condition_selectivity;
+
+DROP TABLE t1,t2;
+
+SET optimizer_switch=@save_optimizer_switch;
diff --git a/mysql-test/main/opt_trace.result b/mysql-test/main/opt_trace.result
index 8f4f6ec97ce..c10018adab6 100644
--- a/mysql-test/main/opt_trace.result
+++ b/mysql-test/main/opt_trace.result
@@ -124,6 +124,7 @@ select * from v1 {
]
},
{
+ "cardinality_accurate": true,
"considered_execution_plans": [
{
"plan_prefix": [],
@@ -270,6 +271,7 @@ select * from (select * from t1 where t1.a=1)q {
]
},
{
+ "cardinality_accurate": true,
"considered_execution_plans": [
{
"plan_prefix": [],
@@ -421,6 +423,7 @@ select * from v2 {
]
},
{
+ "cardinality_accurate": true,
"considered_execution_plans": [
{
"plan_prefix": [],
@@ -985,6 +988,7 @@ explain select * from t1,t2 where t1.a=t2.b+2 and t2.a= t1.b {
]
},
{
+ "cardinality_accurate": false,
"considered_execution_plans": [
{
"plan_prefix": [],
@@ -1434,6 +1438,7 @@ EXPLAIN SELECT MIN(d) FROM t1 where b=2 and c=3 group by a {
]
},
{
+ "cardinality_accurate": true,
"considered_execution_plans": [
{
"plan_prefix": [],
@@ -1628,6 +1633,7 @@ EXPLAIN SELECT id,MIN(a),MAX(a) FROM t1 WHERE a>=20010104e0 GROUP BY id {
]
},
{
+ "cardinality_accurate": false,
"considered_execution_plans": [
{
"plan_prefix": [],
@@ -1811,6 +1817,7 @@ EXPLAIN SELECT * FROM t1 WHERE a = 20010104e0 GROUP BY id {
]
},
{
+ "cardinality_accurate": false,
"considered_execution_plans": [
{
"plan_prefix": [],
@@ -2076,6 +2083,7 @@ explain select * from t1 where a=1 and b=2 order by c limit 1 {
]
},
{
+ "cardinality_accurate": true,
"considered_execution_plans": [
{
"plan_prefix": [],
@@ -2848,6 +2856,7 @@ explain extended select * from t1 where a in (select pk from t10) {
"execution_plan_for_potential_materialization": {
"steps": [
{
+ "cardinality_accurate": true,
"considered_execution_plans": [
{
"plan_prefix": [],
@@ -2878,6 +2887,7 @@ explain extended select * from t1 where a in (select pk from t10) {
}
},
{
+ "cardinality_accurate": true,
"considered_execution_plans": [
{
"plan_prefix": [],
@@ -3296,6 +3306,7 @@ explain select * from t1 where pk = 2 and a=5 and b=1 {
]
},
{
+ "cardinality_accurate": true,
"considered_execution_plans": [
{
"plan_prefix": [],
@@ -3924,6 +3935,7 @@ explain delete t0,t1 from t0, t1 where t0.a=t1.a and t1.a<3 {
]
},
{
+ "cardinality_accurate": false,
"considered_execution_plans": [
{
"plan_prefix": [],
@@ -4422,6 +4434,7 @@ explain select * from t1 where a in (select t_inner_1.a from t1 t_inner_1, t1 t_
"execution_plan_for_potential_materialization": {
"steps": [
{
+ "cardinality_accurate": false,
"considered_execution_plans": [
{
"plan_prefix": [],
@@ -4499,6 +4512,7 @@ explain select * from t1 where a in (select t_inner_1.a from t1 t_inner_1, t1 t_
}
},
{
+ "cardinality_accurate": false,
"considered_execution_plans": [
{
"plan_prefix": [],
@@ -4948,6 +4962,7 @@ t_outer_2.a in (select t_inner_3.a from t2 t_inner_3, t1 t_inner_4) {
}
},
{
+ "cardinality_accurate": false,
"considered_execution_plans": [
{
"plan_prefix": [],
@@ -6521,6 +6536,7 @@ t_outer_2.a in (select t_inner_3.a from t2 t_inner_3, t1 t_inner_4) {
"execution_plan_for_potential_materialization": {
"steps": [
{
+ "cardinality_accurate": false,
"considered_execution_plans": [
{
"plan_prefix": [],
@@ -6595,6 +6611,7 @@ t_outer_2.a in (select t_inner_3.a from t2 t_inner_3, t1 t_inner_4) {
]
},
{
+ "cardinality_accurate": false,
"considered_execution_plans": [
{
"plan_prefix": [],
@@ -6672,6 +6689,7 @@ t_outer_2.a in (select t_inner_3.a from t2 t_inner_3, t1 t_inner_4) {
}
},
{
+ "cardinality_accurate": false,
"considered_execution_plans": [
{
"plan_prefix": [],
diff --git a/mysql-test/main/opt_trace_index_merge.result b/mysql-test/main/opt_trace_index_merge.result
index 3b79a7a44e8..da2d0591002 100644
--- a/mysql-test/main/opt_trace_index_merge.result
+++ b/mysql-test/main/opt_trace_index_merge.result
@@ -200,6 +200,7 @@ explain select * from t1 where a=1 or b=1 {
]
},
{
+ "cardinality_accurate": false,
"considered_execution_plans": [
{
"plan_prefix": [],
diff --git a/mysql-test/main/opt_trace_index_merge_innodb.result b/mysql-test/main/opt_trace_index_merge_innodb.result
index 509569021a5..49c59c96058 100644
--- a/mysql-test/main/opt_trace_index_merge_innodb.result
+++ b/mysql-test/main/opt_trace_index_merge_innodb.result
@@ -198,6 +198,7 @@ explain select * from t1 where pk1 != 0 and key1 = 1 {
]
},
{
+ "cardinality_accurate": true,
"considered_execution_plans": [
{
"plan_prefix": [],
diff --git a/sql/field.cc b/sql/field.cc
index fe3aebce05d..91dc1deb360 100644
--- a/sql/field.cc
+++ b/sql/field.cc
@@ -36,6 +36,8 @@
#include "tztime.h" // struct Time_zone
#include "filesort.h" // change_double_for_sort
#include "log_event.h" // class Table_map_log_event
+#include "sql_statistics.h"
+#include "sql_partition.h"
#include <m_ctype.h>
// Maximum allowed exponent value for converting string to decimal
@@ -1851,6 +1853,7 @@ Field::Field(uchar *ptr_arg,uint32 length_arg,uchar *null_ptr_arg,
field_index= 0;
cond_selectivity= 1.0;
next_equal_field= NULL;
+ stats_available= 0;
}
@@ -11358,6 +11361,203 @@ void Field::print_key_value_binary(String *out, const uchar* key, uint32 length)
}
+/*
+ @brief
+ Check if statistics for a column are available via keys
+
+ @details
+ If the column is the first component of a key, then statistics
+ for the column are available from the range optimizer.
+ Sets the bit in Field::stats_table
+ a) NDV is available
+ b) Statistics are available for the non-const argument of a
+ range predicate
+*/
+
+void Field::statistics_available_via_keys()
+{
+ uint key;
+ key_map::Iterator it(key_start);
+ while ((key= it++) != key_map::Iterator::BITMAP_END)
+ {
+ KEY *keyinfo= table->key_info + key;
+ if (keyinfo->usable_key_parts == 1 &&
+ field_index + 1 == keyinfo->key_part->fieldnr)
+ {
+ stats_available|= (1 << STATISTICS_FOR_RANGE_PREDICATES_AVAILABLE);
+ return;
+ }
+ }
+}
+
+
+/*
+ @brief
+ Check if statistics for a column are available via stat tables
+*/
+
+void Field::statistics_available_via_stat_tables()
+{
+ THD *thd= table->in_use;
+ if (!(check_eits_preferred(thd) &&
+ thd->variables.optimizer_use_condition_selectivity > 2))
+ return;
+ if (!(table->stats_is_read &&
+ read_stats && !read_stats->no_stat_values_provided()))
+ return;
+ stats_available|= (1 << STATISTICS_FOR_RANGE_PREDICATES_AVAILABLE);
+ if (!read_stats->is_null(COLUMN_STAT_AVG_FREQUENCY))
+ stats_available|= (1 << STATISTICS_FOR_NDV_AVAILABLE);
+}
+
+
+/*
+ @brief
+ Check if statistics for a column are available via indexes or stat tables
+
+ @retval
+ TRUE : statistics available for the column
+ FALSE : OTHERWISE
+*/
+
+bool Field::is_statistics_available_for_range_predicates()
+{
+ if (!(stats_available & (1 << STATISTICS_CACHED)))
+ {
+ is_statistics_available();
+ stats_available|= (1 << STATISTICS_CACHED);
+ }
+ return (stats_available & (1 << STATISTICS_FOR_RANGE_PREDICATES_AVAILABLE));
+}
+
+
+void Field::is_statistics_available()
+{
+ statistics_available_via_keys();
+ statistics_available_via_stat_tables();
+ is_ndv_available_via_keys();
+ is_ndv_available_via_stat_tables();
+}
+
+/*
+ @brief
+ Check if ndv for a column are available via indexes or stat tables
+
+ @retval
+ TRUE : ndv available for the column
+ FALSE : OTHERWISE
+*/
+
+bool Field::is_ndv_available()
+{
+ if (!(stats_available & (1 << STATISTICS_CACHED)))
+ {
+ is_statistics_available();
+ stats_available|= (1 << STATISTICS_CACHED);
+ }
+ return (stats_available & (1 << STATISTICS_FOR_NDV_AVAILABLE));
+}
+
+
+/*
+ @brief
+ Check if number of distinct values(ndv) for a column are available via keys
+
+ @retval
+ TRUE : ndv available from keys
+ FALSE : otherwise
+*/
+
+bool Field::is_ndv_available_via_keys()
+{
+ uint key;
+ key_map::Iterator it(key_start);
+ while ((key= it++) != key_map::Iterator::BITMAP_END)
+ {
+ KEY *keyinfo= table->key_info + key;
+ if (is_first_component_of_key(keyinfo) && keyinfo->actual_rec_per_key(0))
+ {
+ stats_available|= (1 << STATISTICS_FOR_NDV_AVAILABLE);
+ return true;
+ }
+ }
+ return false;
+}
+
+
+/*
+ @brief
+ Check if ndv for a column are available via statistical tables
+
+ @retval
+ TRUE : ndv available from statistical tables
+ FALSE : otherwise
+*/
+
+bool Field::is_ndv_available_via_stat_tables()
+{
+ if (!check_eits_preferred(table->in_use))
+ return false;
+ if (!(read_stats && !read_stats->no_stat_values_provided() &&
+ !read_stats->is_null(COLUMN_STAT_AVG_FREQUENCY)))
+ return false;
+ stats_available|= (1 << STATISTICS_FOR_NDV_AVAILABLE);
+ return true;
+}
+
+
+/*
+ @brief
+ Checks if a field is the first component of a given key
+
+ @param
+ key given key
+
+ @retval
+ TRUE : field is the first component of the given key
+ FALSE : otherwise
+*/
+
+bool Field::is_first_component_of_key(KEY *key)
+{
+ DBUG_ASSERT(key->usable_key_parts >= 1);
+ return field_index + 1 == key->key_part->fieldnr;
+}
+
+
+/*
+ Check whether EITS statistics for a field are usable or not
+
+ TRUE : Use EITS for the columns
+ FALSE: Otherwise
+*/
+
+bool Field::is_eits_usable()
+{
+ // check if column_statistics was allocated for this field
+ if (!read_stats)
+ return false;
+
+ DBUG_ASSERT(table->stats_is_read);
+
+ /*
+ (1): checks if we have EITS statistics for a particular column
+ (2): Don't use EITS for GEOMETRY columns
+ (3): Disabling reading EITS statistics for columns involved in the
+ partition list of a table. We assume the selectivity for
+ such columns would be handled during partition pruning.
+ */
+
+ return !read_stats->no_stat_values_provided() && //(1)
+ type() != MYSQL_TYPE_GEOMETRY && //(2)
+#ifdef WITH_PARTITION_STORAGE_ENGINE
+ (!table->part_info ||
+ !table->part_info->field_in_partition_expr(this)) && //(3)
+#endif
+ true;
+}
+
+
Virtual_column_info* Virtual_column_info::clone(THD *thd)
{
Virtual_column_info* dst= new (thd->mem_root) Virtual_column_info(*this);
diff --git a/sql/field.h b/sql/field.h
index dfc02149f9d..c60c5e3bece 100644
--- a/sql/field.h
+++ b/sql/field.h
@@ -869,6 +869,18 @@ public:
*/
Column_statistics_collected *collected_stats;
+ enum stats_availability
+ {
+ STATISTICS_CACHED=0,
+ STATISTICS_FOR_RANGE_PREDICATES_AVAILABLE,
+ STATISTICS_FOR_NDV_AVAILABLE
+ };
+
+ /*
+ Caches the value of whether statistics are available for a field or not.
+ */
+ uint stats_available;
+
/*
This is additional data provided for any computed(virtual) field,
default function or check constraint.
@@ -1907,6 +1919,16 @@ public:
/* Mark field in read map. Updates also virtual fields */
void register_field_in_read_map();
+ bool is_first_component_of_key(KEY *key);
+ void statistics_available_via_keys();
+ void statistics_available_via_stat_tables();
+ void is_statistics_available();
+ bool is_statistics_available_for_range_predicates();
+ bool is_ndv_available();
+ bool is_ndv_available_via_stat_tables();
+ bool is_ndv_available_via_keys();
+ bool is_eits_usable();
+
virtual Compression_method *compression_method() const { return 0; }
virtual Virtual_tmp_table **virtual_tmp_table_addr()
diff --git a/sql/item.cc b/sql/item.cc
index 52274380cd1..f80db684bb3 100644
--- a/sql/item.cc
+++ b/sql/item.cc
@@ -7491,6 +7491,122 @@ Item *Item::build_pushable_cond(THD *thd,
}
+/*
+ @brief
+ Check if selectivity estimates are accurate for a conditional formula
+
+ @details
+ This function checks whether this item belongs to a certain class of
+ condition for which we can calculate an accurate selectivity estimate.
+
+ The definition of the class of condition is recursive.
+ 1. Simple formula
+ a. Formula in the form of range predicates:
+
+ The predicate would be of type:
+ col op const
+ where op can be
+
+ op:
+ | >
+ | >=
+ | <
+ | <=
+ | =
+ | <>
+ Also the other cases are with
+ [NOT] IN predicate,
+ [NOT] NULL predicate and
+ LIKE predicate.
+ The predicate should have only one non-constant argument and
+ this argument will be a reference to a column that is used either
+ as the first component of an index or statistics are available via
+ statistical tables.
+
+ b. Equalities:
+ For an equality to have accurate selectivity estimates,
+ the number of distinct values for each column in the equality
+ needs to be known.
+ Eg: t1.a= t2.a is transformed to MULTIPLE_EQUAL(t1.a, t2.a)
+ For this case we need to make sure we know number of distinct
+ values for t1.a and t2.a
+
+ The number of distinct values for a column can be known by
+ 1) from indexes via rec_per_key
+ 2) from statistical tables via avg_frequency.
+
+ 2. AND / OR formula over formulas defined in section 1 of the definition.
+
+ a) AND Formula
+ For AND formula the check for accurate selectivity estimates depends
+ whether or not the AND formula is at the top level.
+
+ i) Top level
+ For an AND formula at the top level, we need to check if
+ accurate estimates are available for all the predicates
+ inside an AND formula.
+ If this is true then accurate selectivity estimates are available
+ for the AND formula.
+
+ Eg: t1.a > 10 and t2.a < 5
+
+ if we have accurate selectivity estimates
+ for t1.a > 10 and t2.a < 5 via indexes or statistical tables,
+ then selectivity estimates for this AND formula are accurate
+
+ ii) Non-top level
+ For all the predicates inside an AND formula
+ accurate selectivity estimates are needed
+ and each predicate need to be resolved by one
+ column (table column). If this scenario is satisfied then
+ accurate selectivity estimates is available for the AND formula.
+ Eg: t1.a = t2.a AND ( (t1.a > 5 AND t2.a < 10) OR t1.a <= 0)
+
+ b) OR Formula
+
+ For an OR predicate, we need to make sure that the
+ whole OR predicate can be resolved by one column
+ directly or indirectly (that is via multiple equalities).
+ If this is possible then for the resolved column we need to have
+ statistics either from the first component of an index or
+ via statistical tables.
+
+ Eg: t1.a=t2.b and (t2.b > 5 or t1.a < 0);
+
+ In the end for all fields we may have selectivity from an index or
+ statistical tables.
+
+ @notes
+ The implementation for this function use the 'walk' method to traverse
+ the tree of this item with predicate_selectivity_checker() as the
+ call-back parameter of the method.
+
+
+ @retval
+ TRUE selectivity estimates are accurate
+ FALSE OTHERWISE
+*/
+
+bool Item::with_accurate_selectivity_estimation()
+{
+ if (type() == Item::COND_ITEM &&
+ ((Item_cond*) this)->functype() == Item_func::COND_AND_FUNC)
+ {
+ List_iterator<Item> li(*((Item_cond*) this)->argument_list());
+ Item *item;
+ while ((item= li++))
+ {
+ SAME_FIELD arg= {NULL, false};
+ if (item->walk(&Item::predicate_selectivity_checker, 0, &arg))
+ return false;
+ }
+ return true;
+ }
+ SAME_FIELD arg= {NULL, false};
+ return !walk(&Item::predicate_selectivity_checker, 0, &arg);
+}
+
+
static
Item *get_field_item_for_having(THD *thd, Item *item, st_select_lex *sel)
{
@@ -9217,6 +9333,103 @@ Item_field::excl_dep_on_grouping_fields(st_select_lex *sel)
}
+/*
+ @brief
+ Checks if a formula of a condition contains the same column
+
+ @details
+ In the function we try to check if a formula of a condition depends
+ (directly or indirectly through equalities inferred from the
+ conjuncted multiple equalities) only on one column.
+
+ Eg:
+ WHERE clause is:
+ t1.a=t2.b and (t1.a > 5 or t2.b < 1);
+
+ the predicate (t1.a > 5 or t2.b < 1) can be resolved with the help of
+ equalities to conclude that it depends on one column.
+
+ This is used mostly for OR conjuncts where we need to make sure
+ that the entire OR conjunct contains only one column, so that we may
+ get accurate estimates.
+
+ @retval
+ TRUE : the formula does not depend on one column
+ FALSE : OTHERWISE
+*/
+
+bool Item_field::dep_on_one_column(void *arg)
+{
+ SAME_FIELD *same_field_arg= (SAME_FIELD*)arg;
+
+ /*
+ The same_field_arg is passed as a parameter because when we start walking
+ over the condition tree we don't know which column the predicate will be
+ dependent on. So as soon as we encounter a leaf of the condition tree
+ which is a field item, we set the SAME_FIELD::item to the found
+ field item and then compare the rest of the fields in the predicate with
+ the field item.
+ */
+
+ if (same_field_arg->item == NULL)
+ {
+ same_field_arg->item= this;
+ same_field_arg->is_stats_available=
+ field->is_statistics_available_for_range_predicates() ||
+ (item_equal &&
+ item_equal->is_statistics_available_for_range_predicates());
+ return !same_field_arg->is_stats_available;
+ }
+
+ /* Found the same field while traversing the condition tree */
+ DBUG_ASSERT(same_field_arg->item->real_item()->type() == Item::FIELD_ITEM);
+ if (((Item_field*)same_field_arg->item->real_item())->field == field)
+ return false;
+
+ if (!same_field_arg->item->get_item_equal())
+ return true;
+
+ return !(same_field_arg->item->get_item_equal() == item_equal);
+}
+
+
+bool Item_direct_view_ref::dep_on_one_column(void *arg)
+{
+ SAME_FIELD *same_field_arg= (SAME_FIELD*)arg;
+ DBUG_ASSERT(real_item()->type() == Item::FIELD_ITEM);
+ Item_field *field_item= (Item_field*)real_item();
+
+ /*
+ The same_field_arg is passed as a parameter because when we start walking
+ over the condition tree we don't know which column the predicate will be
+ dependent on. So as soon as we encounter a leaf of the condition tree
+ which is a field item, we set the SAME_FIELD::item to the found
+ field item and then compare the rest of the fields in the predicate with
+ the field item.
+ */
+
+ if (same_field_arg->item == NULL)
+ {
+ same_field_arg->item= this;
+ same_field_arg->is_stats_available=
+ field_item->field->is_statistics_available_for_range_predicates() ||
+ (item_equal &&
+ item_equal->is_statistics_available_for_range_predicates());
+ return !same_field_arg->is_stats_available;
+ }
+
+ /* Found the same field while traversing the condition tree */
+ DBUG_ASSERT(same_field_arg->item->real_item()->type() == Item::FIELD_ITEM);
+ if (((Item_field*)same_field_arg->item->real_item())->field == field_item->field)
+ return false;
+
+ if (!same_field_arg->item->get_item_equal())
+ return true;
+
+ return !(same_field_arg->item->get_item_equal() == item_equal);
+}
+
+
bool Item_direct_view_ref::excl_dep_on_table(table_map tab_map)
{
table_map used= used_tables();
diff --git a/sql/item.h b/sql/item.h
index fb480b4c578..8a6b16ad82d 100644
--- a/sql/item.h
+++ b/sql/item.h
@@ -446,6 +446,26 @@ typedef struct replace_equal_field_arg
struct st_join_table *context_tab;
} REPLACE_EQUAL_FIELD_ARG;
+
+/*
+ Structure storing information for a field on which the entire predicate is
+ dependent on (directly or indirectly via equalities)
+*/
+typedef struct same_field
+{
+ /*
+ field item for the first encountered column while traversing
+ over the conditional predicate
+ */
+ Item *item;
+ /*
+ Set to true if the statistics for the field are available
+ directly (via keys or stat tables) or indirectly (via equalities)
+ */
+ bool is_stats_available;
+}SAME_FIELD;
+
+
class Settable_routine_parameter
{
public:
@@ -1968,7 +1988,25 @@ public:
virtual bool count_sargable_conds(void *arg) { return 0; }
virtual bool limit_index_condition_pushdown_processor(void *arg) { return 0; }
virtual bool exists2in_processor(void *arg) { return 0; }
+
virtual bool find_selective_predicates_list_processor(void *arg) { return 0; }
+
+ bool with_accurate_selectivity_estimation();
+
+ /*
+ @brief
+ Check if selectivity of a predicate is available via indexes or EITS
+
+ @param
+ arg Structure storing information whether the AND/OR conjunct
+ can be resolved via a single column.
+
+ @retval
+ FALSE : SUCCESS
+ TRUE : OTHERWISE
+ */
+ virtual bool predicate_selectivity_checker(void *arg) { return FALSE; }
+ virtual bool dep_on_one_column(void *arg) { return true; }
bool cleanup_is_expensive_cache_processor(void *arg)
{
is_expensive_cache= (int8)(-1);
@@ -3576,6 +3614,7 @@ public:
return field->table->pos_in_table_list->outer_join;
}
bool check_index_dependence(void *arg);
+ bool dep_on_one_column(void *arg);
friend class Item_default_value;
friend class Item_insert_value;
friend class st_select_lex_unit;
@@ -5946,6 +5985,7 @@ public:
Item *field_transformer_for_having_pushdown(THD *thd, uchar *arg)
{ return this; }
Item *remove_item_direct_ref() { return this; }
+ bool dep_on_one_column(void *arg);
};
diff --git a/sql/item_cmpfunc.cc b/sql/item_cmpfunc.cc
index 83eb605f463..8084af72f60 100644
--- a/sql/item_cmpfunc.cc
+++ b/sql/item_cmpfunc.cc
@@ -2106,6 +2106,31 @@ bool Item_func_between::count_sargable_conds(void *arg)
return 0;
}
+bool Item_func_between::predicate_selectivity_checker(void *arg)
+{
+ if (arguments()[0]->real_item()->type() == Item::FIELD_ITEM)
+ {
+ if (is_range_predicate(args[0], args[1]) &&
+ is_range_predicate(args[0], args[2]))
+ {
+ return args[0]->dep_on_one_column(arg);
+ }
+ return true;
+ }
+
+ for (uint i= 1 ; i < arg_count ; i++)
+ {
+ if (arguments()[i]->real_item()->type() == Item::FIELD_ITEM)
+ {
+ if (!is_range_predicate(args[i], args[0]))
+ return true;
+ if (args[i]->dep_on_one_column(arg))
+ return true;
+ }
+ }
+ return false;
+}
+
void Item_func_between::fix_after_pullout(st_select_lex *new_parent,
Item **ref, bool merge)
@@ -4290,6 +4315,16 @@ bool Item_func_in::count_sargable_conds(void *arg)
}
+bool Item_func_in::predicate_selectivity_checker(void *arg)
+{
+ if (!all_items_are_consts(args + 1, arg_count - 1))
+ return true;
+ if (key_item()->dep_on_one_column(arg))
+ return true;
+ return false;
+}
+
+
bool Item_func_in::list_contains_null()
{
Item **arg,**arg_end;
@@ -5516,6 +5551,16 @@ bool Item_func_null_predicate::count_sargable_conds(void *arg)
}
+bool Item_func_null_predicate::predicate_selectivity_checker(void *arg)
+{
+ if (is_range_predicate(args[0], NULL))
+ {
+ return args[0]->dep_on_one_column(arg);
+ }
+ return true;
+}
+
+
longlong Item_func_isnull::val_int()
{
DBUG_ASSERT(fixed == 1);
@@ -5597,6 +5642,17 @@ bool Item_bool_func2::count_sargable_conds(void *arg)
return 0;
}
+
+bool Item_bool_func2::predicate_selectivity_checker(void *arg)
+{
+ if (is_range_predicate(args[0], args[1]))
+ return args[0]->dep_on_one_column(arg);
+ if (is_range_predicate(args[1], args[0]))
+ return args[1]->dep_on_one_column(arg);
+ return true;
+}
+
+
void Item_func_like::print(String *str, enum_query_type query_type)
{
args[0]->print_parenthesised(str, query_type, precedence());
@@ -5696,8 +5752,18 @@ SEL_TREE *Item_func_like::get_mm_tree(RANGE_OPT_PARAM *param, Item **cond_ptr)
bool sargable_pattern= with_sargable_pattern();
param->thd->mem_root= tmp_root;
return sargable_pattern ?
- Item_bool_func2::get_mm_tree(param, cond_ptr) :
- Item_func::get_mm_tree(param, cond_ptr);
+ Item_bool_func2::get_mm_tree(param, cond_ptr) :
+ Item_func::get_mm_tree(param, cond_ptr);
+}
+
+
+bool Item_func_like::predicate_selectivity_checker(void *arg)
+{
+ if (with_sargable_pattern())
+ {
+ return args[0]->dep_on_one_column(arg);
+ }
+ return true;
}
@@ -7139,6 +7205,49 @@ bool Item_equal::count_sargable_conds(void *arg)
}
+bool Item_equal::predicate_selectivity_checker(void *arg)
+{
+ /*
+ For equality conditions like tbl1.col = tbl2.col
+ we only want to know if the number of distinct values (ndv) is
+ available for all the fields in the multiple equality or not.
+ */
+ Item_equal_fields_iterator it(*this);
+ while (it++)
+ {
+ Field *field= it.get_curr_field();
+ if (!(field->is_ndv_available()))
+ return true;
+ }
+
+ it.rewind();
+ Item *item;
+ SAME_FIELD *same_field= (SAME_FIELD *) arg;
+ while ((item= it++))
+ {
+ if (same_field->item)
+ return item->dep_on_one_column(arg);
+ item->dep_on_one_column(arg);
+ break;
+ }
+ return false;
+}
+
+
+bool Item_equal::is_statistics_available_for_range_predicates()
+{
+ bool found= false;
+ Item_equal_fields_iterator it(*this);
+ while (it++)
+ {
+ Field *field= it.get_curr_field();
+ if (field->is_statistics_available_for_range_predicates())
+ found= true;
+ }
+ return found;
+}
+
+
/**
@brief
Evaluate multiple equality
diff --git a/sql/item_cmpfunc.h b/sql/item_cmpfunc.h
index fa715badfc7..f467efd0e10 100644
--- a/sql/item_cmpfunc.h
+++ b/sql/item_cmpfunc.h
@@ -227,6 +227,7 @@ public:
bool fix_length_and_dec() { decimals=0; max_length=1; return FALSE; }
uint decimal_precision() const { return 1; }
bool need_parentheses_in_default() { return true; }
+ bool predicate_selectivity_checker(void *arg) { return TRUE; }
};
@@ -418,6 +419,7 @@ public:
COND *remove_eq_conds(THD *thd, Item::cond_result *cond_value,
bool top_level);
bool count_sargable_conds(void *arg);
+ bool predicate_selectivity_checker(void *arg);
/*
Specifies which result type the function uses to compare its arguments.
This method is used in equal field propagation.
@@ -936,6 +938,7 @@ public:
bool find_not_null_fields(table_map allowed);
void fix_after_pullout(st_select_lex *new_parent, Item **ref, bool merge);
bool count_sargable_conds(void *arg);
+ bool predicate_selectivity_checker(void *arg);
void add_key_fields(JOIN *join, KEY_FIELD **key_fields,
uint *and_level, table_map usable_tables,
SARGABLE_PARAM **sargables);
@@ -2473,6 +2476,7 @@ public:
bool find_not_null_fields(table_map allowed);
void fix_after_pullout(st_select_lex *new_parent, Item **ref, bool merge);
bool count_sargable_conds(void *arg);
+ bool predicate_selectivity_checker(void *arg);
Item *get_copy(THD *thd)
{ return get_item_copy<Item_func_in>(thd, this); }
Item *build_clone(THD *thd)
@@ -2570,6 +2574,7 @@ public:
return FALSE;
}
bool count_sargable_conds(void *arg);
+ bool predicate_selectivity_checker(void *arg);
};
@@ -2813,6 +2818,7 @@ public:
Item *get_copy(THD *thd)
{ return get_item_copy<Item_func_like>(thd, this); }
+ bool predicate_selectivity_checker(void *arg);
};
@@ -3220,6 +3226,8 @@ public:
uint elements_count() { return equal_items.elements; }
friend class Item_equal_fields_iterator;
bool count_sargable_conds(void *arg);
+ bool predicate_selectivity_checker(void *arg);
+ bool is_statistics_available_for_range_predicates();
Item *multiple_equality_transformer(THD *thd, uchar *arg);
friend class Item_equal_iterator<List_iterator_fast,Item>;
friend class Item_equal_iterator<List_iterator,Item>;
@@ -3371,6 +3379,7 @@ public:
SEL_TREE *get_mm_tree(RANGE_OPT_PARAM *param, Item **cond_ptr);
Item *get_copy(THD *thd)
{ return get_item_copy<Item_cond_and>(thd, this); }
+ bool predicate_selectivity_checker(void *arg) { return FALSE; }
};
inline bool is_cond_and(Item *item)
@@ -3395,6 +3404,7 @@ public:
Item *neg_transformer(THD *thd);
Item *get_copy(THD *thd)
{ return get_item_copy<Item_cond_or>(thd, this); }
+ bool predicate_selectivity_checker(void *arg) { return FALSE; }
};
class Item_func_dyncol_check :public Item_bool_func
diff --git a/sql/item_func.h b/sql/item_func.h
index 6a4a9fa5dae..d04cc8643cf 100644
--- a/sql/item_func.h
+++ b/sql/item_func.h
@@ -399,6 +399,12 @@ public:
Item_func *get_item_func() { return this; }
bool is_simplified_cond_processor(void *arg)
{ return const_item() && !val_int(); }
+ bool predicate_selectivity_checker(void *arg)
+ {
+ if (const_item() && !is_expensive())
+ return false;
+ return true;
+ }
};
diff --git a/sql/opt_range.cc b/sql/opt_range.cc
index adc0572cb1c..d3f49097189 100644
--- a/sql/opt_range.cc
+++ b/sql/opt_range.cc
@@ -3105,7 +3105,7 @@ bool create_key_parts_for_pseudo_indexes(RANGE_OPT_PARAM *param,
{
Field *field= *field_ptr;
if (bitmap_is_set(used_fields, field->field_index) &&
- is_eits_usable(field))
+ field->is_eits_usable())
parts++;
}
@@ -3126,7 +3126,7 @@ bool create_key_parts_for_pseudo_indexes(RANGE_OPT_PARAM *param,
Field *field= *field_ptr;
if (bitmap_is_set(used_fields, field->field_index))
{
- if (!is_eits_usable(field))
+ if (!field->is_eits_usable())
continue;
uint16 store_length;
diff --git a/sql/sql_select.cc b/sql/sql_select.cc
index bb9d82a7039..3553ee78c5b 100644
--- a/sql/sql_select.cc
+++ b/sql/sql_select.cc
@@ -8296,6 +8296,11 @@ choose_plan(JOIN *join, table_map join_tables)
jtab_sort_func, (void*)join->emb_sjm_nest);
Json_writer_object wrapper(thd);
+
+ if (join->conds)
+ wrapper.add("cardinality_accurate",
+ join->conds->with_accurate_selectivity_estimation());
+
Json_writer_array trace_plan(thd,"considered_execution_plans");
if (!join->emb_sjm_nest)
@@ -29449,6 +29454,38 @@ void unpack_to_base_table_fields(TABLE *table)
}
+
+/*
+ @brief
+ Checks if a predicate is a range predicate with a constant part
+
+ @param
+
+ @item the item referring to the field of the table
+ @value the item referring to the expression on the
+ rhs of a predicate
+
+ @details
+ Range predicate is defined as the form of field op const
+ where op can be operators like </<=/=/>/>=/BETWEEN etc.
+ Also the statistics for the field should be available via
+ an index or statistical tables.
+
+ @retval
+ TRUE : Success
+ FALSE : Otherwise
+*/
+
+bool is_range_predicate(Item *item, Item *value)
+{
+ Item *field= item->real_item();
+ if (field->type() == Item::FIELD_ITEM && !field->const_item() &&
+ (!value || !value->is_expensive()))
+ return true;
+ return false;
+}
+
+
/**
@} (end of group Query_Optimizer)
*/
diff --git a/sql/sql_select.h b/sql/sql_select.h
index 2aba63ddc79..4d5a6e0abf5 100644
--- a/sql/sql_select.h
+++ b/sql/sql_select.h
@@ -2531,5 +2531,6 @@ void propagate_new_equalities(THD *thd, Item *cond,
List<Item_equal> *new_equalities,
COND_EQUAL *inherited,
bool *is_simplifiable_cond);
+bool is_range_predicate(Item *item, Item *value);
#endif /* SQL_SELECT_INCLUDED */
diff --git a/sql/sql_statistics.cc b/sql/sql_statistics.cc
index 2636299e330..71d472feaba 100644
--- a/sql/sql_statistics.cc
+++ b/sql/sql_statistics.cc
@@ -31,7 +31,6 @@
#include "opt_range.h"
#include "uniques.h"
#include "sql_show.h"
-#include "sql_partition.h"
/*
The system variable 'use_stat_tables' can take one of the
@@ -3159,7 +3158,10 @@ static void dump_stats_from_share_to_table(TABLE *table)
Field **field_ptr= table_share->field;
Field **table_field_ptr= table->field;
for ( ; *field_ptr; field_ptr++, table_field_ptr++)
+ {
(*table_field_ptr)->read_stats= (*field_ptr)->read_stats;
+ (*table_field_ptr)->stats_available= (*field_ptr)->stats_available;
+ }
table->stats_is_read= true;
}
@@ -3978,37 +3980,3 @@ bool is_stat_table(const LEX_CSTRING *db, LEX_CSTRING *table)
}
return false;
}
-
-/*
- Check wheter we can use EITS statistics for a field or not
-
- TRUE : Use EITS for the columns
- FALSE: Otherwise
-*/
-
-bool is_eits_usable(Field *field)
-{
- Column_statistics* col_stats= field->read_stats;
-
- // check if column_statistics was allocated for this field
- if (!col_stats)
- return false;
-
- DBUG_ASSERT(field->table->stats_is_read);
-
- /*
- (1): checks if we have EITS statistics for a particular column
- (2): Don't use EITS for GEOMETRY columns
- (3): Disabling reading EITS statistics for columns involved in the
- partition list of a table. We assume the selecticivity for
- such columns would be handled during partition pruning.
- */
-
- return !col_stats->no_stat_values_provided() && //(1)
- field->type() != MYSQL_TYPE_GEOMETRY && //(2)
-#ifdef WITH_PARTITION_STORAGE_ENGINE
- (!field->table->part_info ||
- !field->table->part_info->field_in_partition_expr(field)) && //(3)
-#endif
- true;
-}
diff --git a/sql/sql_statistics.h b/sql/sql_statistics.h
index 20ecf06bfee..34471fd5270 100644
--- a/sql/sql_statistics.h
+++ b/sql/sql_statistics.h
@@ -138,7 +138,6 @@ double get_column_range_cardinality(Field *field,
key_range *max_endp,
uint range_flag);
bool is_stat_table(const LEX_CSTRING *db, LEX_CSTRING *table);
-bool is_eits_usable(Field* field);
class Histogram
{
diff --git a/sql/table.cc b/sql/table.cc
index c48a6fed89a..02f4ece9c40 100644
--- a/sql/table.cc
+++ b/sql/table.cc
@@ -5435,6 +5435,7 @@ void TABLE::init(THD *thd, TABLE_LIST *tl)
{
(*f_ptr)->next_equal_field= NULL;
(*f_ptr)->cond_selectivity= 1.0;
+ (*f_ptr)->stats_available= 0;
}
notnull_cond= 0;