summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorOleksandr Byelkin <sanja@mariadb.com>2023-05-04 11:50:34 +0200
committerOleksandr Byelkin <sanja@mariadb.com>2023-05-04 11:50:34 +0200
commit16e5bc4cbc35d1be32c38da681dedfa527eb3434 (patch)
treefc36dc9b1714343036767386f385739f1947635b
parent13a294a2c999b205cad4ef7af725fab6556cf78f (diff)
parentd7fae797f4d4b1b32a910df06567c2536a258c5a (diff)
downloadmariadb-git-16e5bc4cbc35d1be32c38da681dedfa527eb3434.tar.gz
Merge branch '10.9' into 10.10
-rw-r--r--mysql-test/main/derived_split_innodb.result537
-rw-r--r--mysql-test/main/derived_split_innodb.test212
-rw-r--r--mysql-test/main/explain_innodb.result18
-rw-r--r--mysql-test/main/explain_innodb.test19
-rw-r--r--mysql-test/main/opt_trace.result250
-rw-r--r--mysql-test/main/opt_trace.test77
-rw-r--r--mysql-test/main/selectivity.result69
-rw-r--r--mysql-test/main/selectivity.test84
-rw-r--r--mysql-test/main/selectivity_innodb.result69
-rw-r--r--mysql-test/main/selectivity_innodb_notembedded.result104
-rw-r--r--mysql-test/main/selectivity_innodb_notembedded.test16
-rw-r--r--mysql-test/main/selectivity_notembedded.result95
-rw-r--r--mysql-test/main/selectivity_notembedded.test121
-rw-r--r--sql/item_subselect.cc6
-rw-r--r--sql/opt_split.cc240
-rw-r--r--sql/sql_select.cc43
-rw-r--r--sql/sql_select.h31
17 files changed, 1636 insertions, 355 deletions
diff --git a/mysql-test/main/derived_split_innodb.result b/mysql-test/main/derived_split_innodb.result
index 74876836a53..2ca0de3bd91 100644
--- a/mysql-test/main/derived_split_innodb.result
+++ b/mysql-test/main/derived_split_innodb.result
@@ -287,4 +287,541 @@ id select_type table type possible_keys key key_len ref rows Extra
2 DERIVED t4 ALL NULL NULL NULL NULL 40 Using filesort
drop table t3, t4;
# End of 10.3 tests
+#
+# MDEV-26301: Split optimization refills temporary table too many times
+#
+create table t1(a int, b int);
+insert into t1 select seq,seq from seq_1_to_5;
+create table t2(a int, b int, key(a));
+insert into t2
+select A.seq,B.seq from seq_1_to_25 A, seq_1_to_2 B;
+create table t3(a int, b int, key(a));
+insert into t3
+select A.seq,B.seq from seq_1_to_5 A, seq_1_to_3 B;
+analyze table t1,t2,t3 persistent for all;
+Table Op Msg_type Msg_text
+test.t1 analyze status Engine-independent statistics collected
+test.t1 analyze status OK
+test.t2 analyze status Engine-independent statistics collected
+test.t2 analyze status Table is already up to date
+test.t3 analyze status Engine-independent statistics collected
+test.t3 analyze status Table is already up to date
+explain
+select * from
+(t1 left join t2 on t2.a=t1.b) left join t3 on t3.a=t1.b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 ALL NULL NULL NULL NULL 5
+1 SIMPLE t2 ref a a 5 test.t1.b 2 Using where
+1 SIMPLE t3 ref a a 5 test.t1.b 3 Using where
+create table t10 (
+grp_id int,
+col1 int,
+key(grp_id)
+);
+insert into t10
+select
+A.seq,
+B.seq
+from
+seq_1_to_100 A,
+seq_1_to_100 B;
+create table t11 (
+col1 int,
+col2 int
+);
+insert into t11
+select A.seq, A.seq from seq_1_to_10 A;
+analyze table t10,t11 persistent for all;
+Table Op Msg_type Msg_text
+test.t10 analyze status Engine-independent statistics collected
+test.t10 analyze status Table is already up to date
+test.t11 analyze status Engine-independent statistics collected
+test.t11 analyze status OK
+explain select * from
+(
+(t1 left join t2 on t2.a=t1.b)
+left join t3 on t3.a=t1.b
+) left join (select grp_id, count(*)
+from t10 left join t11 on t11.col1=t10.col1
+group by grp_id) T on T.grp_id=t1.b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 PRIMARY t1 ALL NULL NULL NULL NULL 5
+1 PRIMARY t2 ref a a 5 test.t1.b 2 Using where
+1 PRIMARY t3 ref a a 5 test.t1.b 3 Using where
+1 PRIMARY <derived2> ref key0 key0 5 test.t1.b 10 Using where
+2 LATERAL DERIVED t10 ref grp_id grp_id 5 test.t1.b 100
+2 LATERAL DERIVED t11 ALL NULL NULL NULL NULL 10 Using where; Using join buffer (flat, BNL join)
+# The important part in the below output is:
+# "lateral": 1,
+# "query_block": {
+# "select_id": 2,
+# "r_loops": 5, <-- must be 5, not 30.
+analyze format=json select * from
+(
+(t1 left join t2 on t2.a=t1.b)
+left join t3 on t3.a=t1.b
+) left join (select grp_id, count(*)
+from t10 left join t11 on t11.col1=t10.col1
+group by grp_id) T on T.grp_id=t1.b;
+ANALYZE
+{
+ "query_block": {
+ "select_id": 1,
+ "r_loops": 1,
+ "r_total_time_ms": "REPLACED",
+ "const_condition": "1",
+ "nested_loop": [
+ {
+ "table": {
+ "table_name": "t1",
+ "access_type": "ALL",
+ "r_loops": 1,
+ "rows": 5,
+ "r_rows": 5,
+ "r_table_time_ms": "REPLACED",
+ "r_other_time_ms": "REPLACED",
+ "filtered": 100,
+ "r_filtered": 100
+ }
+ },
+ {
+ "table": {
+ "table_name": "t2",
+ "access_type": "ref",
+ "possible_keys": ["a"],
+ "key": "a",
+ "key_length": "5",
+ "used_key_parts": ["a"],
+ "ref": ["test.t1.b"],
+ "r_loops": 5,
+ "rows": 2,
+ "r_rows": 2,
+ "r_table_time_ms": "REPLACED",
+ "r_other_time_ms": "REPLACED",
+ "filtered": 100,
+ "r_filtered": 100,
+ "attached_condition": "trigcond(trigcond(t1.b is not null))"
+ }
+ },
+ {
+ "table": {
+ "table_name": "t3",
+ "access_type": "ref",
+ "possible_keys": ["a"],
+ "key": "a",
+ "key_length": "5",
+ "used_key_parts": ["a"],
+ "ref": ["test.t1.b"],
+ "r_loops": 10,
+ "rows": 3,
+ "r_rows": 3,
+ "r_table_time_ms": "REPLACED",
+ "r_other_time_ms": "REPLACED",
+ "filtered": 100,
+ "r_filtered": 100,
+ "attached_condition": "trigcond(trigcond(t1.b is not null))"
+ }
+ },
+ {
+ "table": {
+ "table_name": "<derived2>",
+ "access_type": "ref",
+ "possible_keys": ["key0"],
+ "key": "key0",
+ "key_length": "5",
+ "used_key_parts": ["grp_id"],
+ "ref": ["test.t1.b"],
+ "r_loops": 30,
+ "rows": 10,
+ "r_rows": 1,
+ "r_table_time_ms": "REPLACED",
+ "r_other_time_ms": "REPLACED",
+ "filtered": 100,
+ "r_filtered": 100,
+ "attached_condition": "trigcond(trigcond(t1.b is not null))",
+ "materialized": {
+ "lateral": 1,
+ "query_block": {
+ "select_id": 2,
+ "r_loops": 5,
+ "r_total_time_ms": "REPLACED",
+ "outer_ref_condition": "t1.b is not null",
+ "nested_loop": [
+ {
+ "table": {
+ "table_name": "t10",
+ "access_type": "ref",
+ "possible_keys": ["grp_id"],
+ "key": "grp_id",
+ "key_length": "5",
+ "used_key_parts": ["grp_id"],
+ "ref": ["test.t1.b"],
+ "r_loops": 5,
+ "rows": 100,
+ "r_rows": 100,
+ "r_table_time_ms": "REPLACED",
+ "r_other_time_ms": "REPLACED",
+ "filtered": 100,
+ "r_filtered": 100
+ }
+ },
+ {
+ "block-nl-join": {
+ "table": {
+ "table_name": "t11",
+ "access_type": "ALL",
+ "r_loops": 5,
+ "rows": 10,
+ "r_rows": 10,
+ "r_table_time_ms": "REPLACED",
+ "r_other_time_ms": "REPLACED",
+ "filtered": 100,
+ "r_filtered": 100
+ },
+ "buffer_type": "flat",
+ "buffer_size": "1Kb",
+ "join_type": "BNL",
+ "attached_condition": "trigcond(t11.col1 = t10.col1)",
+ "r_loops": 500,
+ "r_filtered": 10,
+ "r_unpack_time_ms": "REPLACED",
+ "r_other_time_ms": "REPLACED",
+ "r_effective_rows": 10
+ }
+ }
+ ]
+ }
+ }
+ }
+ }
+ ]
+ }
+}
+create table t21 (pk int primary key);
+insert into t21 values (1),(2),(3);
+create table t22 (pk int primary key);
+insert into t22 values (1),(2),(3);
+explain
+select * from
+t21, t22,
+(
+(t1 left join t2 on t2.a=t1.b)
+left join t3 on t3.a=t1.b
+) left join (select grp_id, count(*)
+from t10 left join t11 on t11.col1=t10.col1
+group by grp_id) T on T.grp_id=t1.b
+where
+t21.pk=1 and t22.pk=2;
+id select_type table type possible_keys key key_len ref rows Extra
+1 PRIMARY t21 const PRIMARY PRIMARY 4 const 1 Using index
+1 PRIMARY t22 const PRIMARY PRIMARY 4 const 1 Using index
+1 PRIMARY t1 ALL NULL NULL NULL NULL 5
+1 PRIMARY t2 ref a a 5 test.t1.b 2 Using where
+1 PRIMARY t3 ref a a 5 test.t1.b 3 Using where
+1 PRIMARY <derived2> ref key0 key0 5 test.t1.b 10 Using where
+2 LATERAL DERIVED t10 ref grp_id grp_id 5 test.t1.b 100
+2 LATERAL DERIVED t11 ALL NULL NULL NULL NULL 10 Using where; Using join buffer (flat, BNL join)
+explain
+select * from
+t21,
+(
+(t1 left join t2 on t2.a=t1.b)
+left join t3 on t3.a=t1.b
+) left join (select grp_id, count(*)
+from
+t22 join t10 left join t11 on t11.col1=t10.col1
+where
+t22.pk=1
+group by grp_id) T on T.grp_id=t1.b
+where
+t21.pk=1;
+id select_type table type possible_keys key key_len ref rows Extra
+1 PRIMARY t21 const PRIMARY PRIMARY 4 const 1 Using index
+1 PRIMARY t1 ALL NULL NULL NULL NULL 5
+1 PRIMARY t2 ref a a 5 test.t1.b 2 Using where
+1 PRIMARY t3 ref a a 5 test.t1.b 3 Using where
+1 PRIMARY <derived2> ref key0 key0 5 test.t1.b 10 Using where
+2 LATERAL DERIVED t22 const PRIMARY PRIMARY 4 const 1 Using index
+2 LATERAL DERIVED t10 ref grp_id grp_id 5 test.t1.b 100
+2 LATERAL DERIVED t11 ALL NULL NULL NULL NULL 10 Using where; Using join buffer (flat, BNL join)
+create table t5 (
+pk int primary key
+);
+insert into t5 select seq from seq_1_to_1000;
+explain
+select * from
+t21,
+(
+(((t1 join t5 on t5.pk=t1.b)) left join t2 on t2.a=t1.b)
+left join t3 on t3.a=t1.b
+) left join (select grp_id, count(*)
+from
+t22 join t10 left join t11 on t11.col1=t10.col1
+where
+t22.pk=1
+group by grp_id) T on T.grp_id=t1.b
+where
+t21.pk=1;
+id select_type table type possible_keys key key_len ref rows Extra
+1 PRIMARY t21 const PRIMARY PRIMARY 4 const 1 Using index
+1 PRIMARY t1 ALL NULL NULL NULL NULL 5 Using where
+1 PRIMARY t5 eq_ref PRIMARY PRIMARY 4 test.t1.b 1 Using index
+1 PRIMARY t2 ref a a 5 test.t1.b 2
+1 PRIMARY t3 ref a a 5 test.t1.b 3
+1 PRIMARY <derived2> ref key0 key0 5 test.t1.b 10 Using where
+2 LATERAL DERIVED t22 const PRIMARY PRIMARY 4 const 1 Using index
+2 LATERAL DERIVED t10 ref grp_id grp_id 5 test.t5.pk 100 Using index condition
+2 LATERAL DERIVED t11 ALL NULL NULL NULL NULL 10 Using where; Using join buffer (flat, BNL join)
+drop table t1,t2,t3,t5, t10, t11, t21, t22;
+create table t1(a int, b int);
+insert into t1 select seq,seq from seq_1_to_5;
+create table t2(a int, b int, key(a));
+insert into t2
+select A.seq,B.seq from seq_1_to_25 A, seq_1_to_2 B;
+create table t3(a int, b int, key(a));
+insert into t3
+select A.seq,B.seq from seq_1_to_5 A, seq_1_to_3 B;
+analyze table t1,t2,t3 persistent for all;
+Table Op Msg_type Msg_text
+test.t1 analyze status Engine-independent statistics collected
+test.t1 analyze status OK
+test.t2 analyze status Engine-independent statistics collected
+test.t2 analyze status Table is already up to date
+test.t3 analyze status Engine-independent statistics collected
+test.t3 analyze status Table is already up to date
+create table t10 (
+grp_id int,
+col1 int,
+key(grp_id)
+);
+insert into t10
+select
+A.seq,
+B.seq
+from
+seq_1_to_100 A,
+seq_1_to_100 B;
+create table t11 (
+col1 int,
+col2 int
+);
+insert into t11
+select A.seq, A.seq from seq_1_to_10 A;
+analyze table t10,t11 persistent for all;
+Table Op Msg_type Msg_text
+test.t10 analyze status Engine-independent statistics collected
+test.t10 analyze status Table is already up to date
+test.t11 analyze status Engine-independent statistics collected
+test.t11 analyze status OK
+explain select *
+from
+(
+(t1 left join t2 on t2.a=t1.b)
+left join
+t3
+on t3.a=t1.b
+)
+left join
+(
+select grp_id, count(*)
+from t10 left join t11 on t11.col1=t10.col1
+group by grp_id
+)dt
+on dt.grp_id=t1.b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 PRIMARY t1 ALL NULL NULL NULL NULL 5
+1 PRIMARY t2 ref a a 5 test.t1.b 2 Using where
+1 PRIMARY t3 ref a a 5 test.t1.b 3 Using where
+1 PRIMARY <derived2> ref key0 key0 5 test.t1.b 10 Using where
+2 LATERAL DERIVED t10 ref grp_id grp_id 5 test.t1.b 100
+2 LATERAL DERIVED t11 ALL NULL NULL NULL NULL 10 Using where; Using join buffer (flat, BNL join)
+select *
+from
+(
+(t1 left join t2 on t2.a=t1.b)
+left join
+t3
+on t3.a=t1.b
+)
+left join
+(
+select grp_id, count(*)
+from t10 left join t11 on t11.col1=t10.col1
+group by grp_id
+)dt
+on dt.grp_id=t1.b;
+a b a b a b grp_id count(*)
+1 1 1 1 1 1 1 100
+1 1 1 1 1 2 1 100
+1 1 1 1 1 3 1 100
+1 1 1 2 1 1 1 100
+1 1 1 2 1 2 1 100
+1 1 1 2 1 3 1 100
+2 2 2 1 2 1 2 100
+2 2 2 1 2 2 2 100
+2 2 2 1 2 3 2 100
+2 2 2 2 2 1 2 100
+2 2 2 2 2 2 2 100
+2 2 2 2 2 3 2 100
+3 3 3 1 3 1 3 100
+3 3 3 1 3 2 3 100
+3 3 3 1 3 3 3 100
+3 3 3 2 3 1 3 100
+3 3 3 2 3 2 3 100
+3 3 3 2 3 3 3 100
+4 4 4 1 4 1 4 100
+4 4 4 1 4 2 4 100
+4 4 4 1 4 3 4 100
+4 4 4 2 4 1 4 100
+4 4 4 2 4 2 4 100
+4 4 4 2 4 3 4 100
+5 5 5 1 5 1 5 100
+5 5 5 1 5 2 5 100
+5 5 5 1 5 3 5 100
+5 5 5 2 5 1 5 100
+5 5 5 2 5 2 5 100
+5 5 5 2 5 3 5 100
+set join_cache_level=4;
+explain select *
+from
+(
+(t1 left join t2 on t2.a=t1.b)
+left join
+t3
+on t3.a=t1.b
+)
+left join
+(
+select grp_id, count(*)
+from t10 left join t11 on t11.col1=t10.col1
+group by grp_id
+)dt
+on dt.grp_id=t1.b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 PRIMARY t1 ALL NULL NULL NULL NULL 5
+1 PRIMARY t2 ref a a 5 test.t1.b 2 Using where
+1 PRIMARY t3 ref a a 5 test.t1.b 3 Using where
+1 PRIMARY <derived2> ref key0 key0 5 test.t1.b 10 Using where
+2 LATERAL DERIVED t10 ref grp_id grp_id 5 test.t1.b 100
+2 LATERAL DERIVED t11 hash_ALL NULL #hash#$hj 5 test.t10.col1 10 Using where; Using join buffer (flat, BNLH join)
+select *
+from
+(
+(t1 left join t2 on t2.a=t1.b)
+left join
+t3
+on t3.a=t1.b
+)
+left join
+(
+select grp_id, count(*)
+from t10 left join t11 on t11.col1=t10.col1
+group by grp_id
+)dt
+on dt.grp_id=t1.b;
+a b a b a b grp_id count(*)
+1 1 1 1 1 1 1 100
+1 1 1 1 1 2 1 100
+1 1 1 1 1 3 1 100
+1 1 1 2 1 1 1 100
+1 1 1 2 1 2 1 100
+1 1 1 2 1 3 1 100
+2 2 2 1 2 1 2 100
+2 2 2 1 2 2 2 100
+2 2 2 1 2 3 2 100
+2 2 2 2 2 1 2 100
+2 2 2 2 2 2 2 100
+2 2 2 2 2 3 2 100
+3 3 3 1 3 1 3 100
+3 3 3 1 3 2 3 100
+3 3 3 1 3 3 3 100
+3 3 3 2 3 1 3 100
+3 3 3 2 3 2 3 100
+3 3 3 2 3 3 3 100
+4 4 4 1 4 1 4 100
+4 4 4 1 4 2 4 100
+4 4 4 1 4 3 4 100
+4 4 4 2 4 1 4 100
+4 4 4 2 4 2 4 100
+4 4 4 2 4 3 4 100
+5 5 5 1 5 1 5 100
+5 5 5 1 5 2 5 100
+5 5 5 1 5 3 5 100
+5 5 5 2 5 1 5 100
+5 5 5 2 5 2 5 100
+5 5 5 2 5 3 5 100
+set join_cache_level=default;
+drop index a on t2;
+drop index a on t3;
+explain select *
+from
+(
+(t1 left join t2 on t2.a=t1.b)
+left join
+t3
+on t3.a=t1.b
+)
+left join
+(
+select grp_id, count(*)
+from t10 left join t11 on t11.col1=t10.col1
+group by grp_id
+)dt
+on dt.grp_id=t1.b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 PRIMARY t1 ALL NULL NULL NULL NULL 5
+1 PRIMARY t2 ALL NULL NULL NULL NULL 50 Using where; Using join buffer (flat, BNL join)
+1 PRIMARY t3 ALL NULL NULL NULL NULL 15 Using where; Using join buffer (incremental, BNL join)
+1 PRIMARY <derived2> ref key0 key0 5 test.t1.b 1000 Using where
+2 DERIVED t10 ALL grp_id NULL NULL NULL 10000 Using temporary; Using filesort
+2 DERIVED t11 ALL NULL NULL NULL NULL 10 Using where; Using join buffer (flat, BNL join)
+select *
+from
+(
+(t1 left join t2 on t2.a=t1.b)
+left join
+t3
+on t3.a=t1.b
+)
+left join
+(
+select grp_id, count(*)
+from t10 left join t11 on t11.col1=t10.col1
+group by grp_id
+)dt
+on dt.grp_id=t1.b;
+a b a b a b grp_id count(*)
+1 1 1 1 1 1 1 100
+1 1 1 2 1 1 1 100
+1 1 1 1 1 2 1 100
+1 1 1 2 1 2 1 100
+1 1 1 1 1 3 1 100
+1 1 1 2 1 3 1 100
+2 2 2 1 2 1 2 100
+2 2 2 2 2 1 2 100
+2 2 2 1 2 2 2 100
+2 2 2 2 2 2 2 100
+2 2 2 1 2 3 2 100
+2 2 2 2 2 3 2 100
+3 3 3 1 3 1 3 100
+3 3 3 2 3 1 3 100
+3 3 3 1 3 2 3 100
+3 3 3 2 3 2 3 100
+3 3 3 1 3 3 3 100
+3 3 3 2 3 3 3 100
+4 4 4 1 4 1 4 100
+4 4 4 2 4 1 4 100
+4 4 4 1 4 2 4 100
+4 4 4 2 4 2 4 100
+4 4 4 1 4 3 4 100
+4 4 4 2 4 3 4 100
+5 5 5 1 5 1 5 100
+5 5 5 2 5 1 5 100
+5 5 5 1 5 2 5 100
+5 5 5 2 5 2 5 100
+5 5 5 1 5 3 5 100
+5 5 5 2 5 3 5 100
+drop table t1,t2,t3;
+drop table t10, t11;
+# End of 10.4 tests
SET GLOBAL innodb_stats_persistent=@save_innodb_stats_persistent;
diff --git a/mysql-test/main/derived_split_innodb.test b/mysql-test/main/derived_split_innodb.test
index 1ebe27cd12c..2dd7988f223 100644
--- a/mysql-test/main/derived_split_innodb.test
+++ b/mysql-test/main/derived_split_innodb.test
@@ -233,4 +233,216 @@ drop table t3, t4;
--echo # End of 10.3 tests
+
+--echo #
+--echo # MDEV-26301: Split optimization refills temporary table too many times
+--echo #
+
+# 5 values
+create table t1(a int, b int);
+insert into t1 select seq,seq from seq_1_to_5;
+
+# 5 value groups of size 2 each
+create table t2(a int, b int, key(a));
+insert into t2
+select A.seq,B.seq from seq_1_to_25 A, seq_1_to_2 B;
+
+# 5 value groups of size 3 each
+create table t3(a int, b int, key(a));
+insert into t3
+select A.seq,B.seq from seq_1_to_5 A, seq_1_to_3 B;
+
+analyze table t1,t2,t3 persistent for all;
+
+explain
+select * from
+ (t1 left join t2 on t2.a=t1.b) left join t3 on t3.a=t1.b;
+
+# Now, create tables for Groups.
+
+create table t10 (
+ grp_id int,
+ col1 int,
+ key(grp_id)
+);
+
+# 100 groups of 100 values each
+insert into t10
+select
+ A.seq,
+ B.seq
+from
+ seq_1_to_100 A,
+ seq_1_to_100 B;
+
+# and X10 multiplier
+
+create table t11 (
+ col1 int,
+ col2 int
+);
+insert into t11
+select A.seq, A.seq from seq_1_to_10 A;
+
+analyze table t10,t11 persistent for all;
+
+let $q1=
+select * from
+ (
+ (t1 left join t2 on t2.a=t1.b)
+ left join t3 on t3.a=t1.b
+ ) left join (select grp_id, count(*)
+ from t10 left join t11 on t11.col1=t10.col1
+ group by grp_id) T on T.grp_id=t1.b;
+
+eval
+explain $q1;
+
+--echo # The important part in the below output is:
+--echo # "lateral": 1,
+--echo # "query_block": {
+--echo # "select_id": 2,
+--echo # "r_loops": 5, <-- must be 5, not 30.
+--source include/analyze-format.inc
+
+eval
+analyze format=json $q1;
+
+create table t21 (pk int primary key);
+insert into t21 values (1),(2),(3);
+
+create table t22 (pk int primary key);
+insert into t22 values (1),(2),(3);
+
+# Same as above but throw in a couple of const tables.
+explain
+select * from
+ t21, t22,
+ (
+ (t1 left join t2 on t2.a=t1.b)
+ left join t3 on t3.a=t1.b
+ ) left join (select grp_id, count(*)
+ from t10 left join t11 on t11.col1=t10.col1
+ group by grp_id) T on T.grp_id=t1.b
+where
+ t21.pk=1 and t22.pk=2;
+
+explain
+select * from
+ t21,
+ (
+ (t1 left join t2 on t2.a=t1.b)
+ left join t3 on t3.a=t1.b
+ ) left join (select grp_id, count(*)
+ from
+ t22 join t10 left join t11 on t11.col1=t10.col1
+ where
+ t22.pk=1
+ group by grp_id) T on T.grp_id=t1.b
+where
+ t21.pk=1;
+
+# And also add a non-const table
+
+create table t5 (
+ pk int primary key
+ );
+insert into t5 select seq from seq_1_to_1000;
+
+explain
+select * from
+ t21,
+ (
+ (((t1 join t5 on t5.pk=t1.b)) left join t2 on t2.a=t1.b)
+ left join t3 on t3.a=t1.b
+ ) left join (select grp_id, count(*)
+ from
+ t22 join t10 left join t11 on t11.col1=t10.col1
+ where
+ t22.pk=1
+ group by grp_id) T on T.grp_id=t1.b
+where
+ t21.pk=1;
+
+drop table t1,t2,t3,t5, t10, t11, t21, t22;
+
+# 5 values
+create table t1(a int, b int);
+insert into t1 select seq,seq from seq_1_to_5;
+
+# 5 value groups of size 2 each
+create table t2(a int, b int, key(a));
+insert into t2
+select A.seq,B.seq from seq_1_to_25 A, seq_1_to_2 B;
+
+# 5 value groups of size 3 each
+create table t3(a int, b int, key(a));
+insert into t3
+select A.seq,B.seq from seq_1_to_5 A, seq_1_to_3 B;
+
+analyze table t1,t2,t3 persistent for all;
+
+create table t10 (
+ grp_id int,
+ col1 int,
+ key(grp_id)
+);
+
+# 100 groups of 100 values each
+insert into t10
+select
+ A.seq,
+ B.seq
+from
+ seq_1_to_100 A,
+ seq_1_to_100 B;
+
+# and X10 multiplier
+
+create table t11 (
+ col1 int,
+ col2 int
+);
+insert into t11
+select A.seq, A.seq from seq_1_to_10 A;
+
+analyze table t10,t11 persistent for all;
+
+let $q=
+select *
+from
+ (
+ (t1 left join t2 on t2.a=t1.b)
+ left join
+ t3
+ on t3.a=t1.b
+ )
+ left join
+ (
+ select grp_id, count(*)
+ from t10 left join t11 on t11.col1=t10.col1
+ group by grp_id
+ )dt
+ on dt.grp_id=t1.b;
+
+eval explain $q;
+eval $q;
+
+set join_cache_level=4;
+eval explain $q;
+eval $q;
+
+set join_cache_level=default;
+
+drop index a on t2;
+drop index a on t3;
+
+eval explain $q;
+eval $q;
+
+drop table t1,t2,t3;
+drop table t10, t11;
+
+--echo # End of 10.4 tests
+
SET GLOBAL innodb_stats_persistent=@save_innodb_stats_persistent;
diff --git a/mysql-test/main/explain_innodb.result b/mysql-test/main/explain_innodb.result
index b46665c279c..c44d7baadea 100644
--- a/mysql-test/main/explain_innodb.result
+++ b/mysql-test/main/explain_innodb.result
@@ -18,3 +18,21 @@ id select_type table type possible_keys key key_len ref rows Extra
2 DERIVED t1 range NULL id 53 NULL 2 Using index for group-by
SET GLOBAL slow_query_log = @sql_tmp;
drop table t1;
+#
+# MDEV-31181: Server crash in subselect_uniquesubquery_engine::print
+# upon EXPLAIN EXTENDED DELETE
+#
+CREATE TABLE t1 (a INT);
+INSERT INTO t1 VALUES (1),(2);
+CREATE TABLE t2 (pk INT PRIMARY KEY);
+INSERT INTO t2 VALUES (1),(2);
+EXPLAIN EXTENDED DELETE FROM t1 WHERE a IN (SELECT pk FROM t2);
+id select_type table type possible_keys key key_len ref rows filtered Extra
+1 PRIMARY t1 ALL NULL NULL NULL NULL 2 100.00 Using where
+2 DEPENDENT SUBQUERY t2 unique_subquery PRIMARY PRIMARY 4 func 1 100.00 Using index
+Warnings:
+Note 1003 /* select#1 */ delete from `test`.`t1` where <in_optimizer>(`test`.`t1`.`a`,<exists>(<primary_index_lookup>(<cache>(`test`.`t1`.`a`))))
+drop table t1, t2;
+#
+# End of 10.4 tests
+#
diff --git a/mysql-test/main/explain_innodb.test b/mysql-test/main/explain_innodb.test
index 2c29a6e26da..3dcad4c2d49 100644
--- a/mysql-test/main/explain_innodb.test
+++ b/mysql-test/main/explain_innodb.test
@@ -18,3 +18,22 @@ SELECT * FROM (SELECT id FROM t1 GROUP BY id) dt WHERE 1=0;
SET GLOBAL slow_query_log = @sql_tmp;
drop table t1;
+
+
+--echo #
+--echo # MDEV-31181: Server crash in subselect_uniquesubquery_engine::print
+--echo # upon EXPLAIN EXTENDED DELETE
+--echo #
+
+CREATE TABLE t1 (a INT);
+INSERT INTO t1 VALUES (1),(2);
+CREATE TABLE t2 (pk INT PRIMARY KEY);
+INSERT INTO t2 VALUES (1),(2);
+
+EXPLAIN EXTENDED DELETE FROM t1 WHERE a IN (SELECT pk FROM t2);
+
+drop table t1, t2;
+
+--echo #
+--echo # End of 10.4 tests
+--echo #
diff --git a/mysql-test/main/opt_trace.result b/mysql-test/main/opt_trace.result
index 70038a5f605..77e6103f783 100644
--- a/mysql-test/main/opt_trace.result
+++ b/mysql-test/main/opt_trace.result
@@ -481,6 +481,11 @@ select * from v2 {
]
},
{
+ "check_split_materialized": {
+ "not_applicable": "no candidate field can be accessed through ref"
+ }
+ },
+ {
"best_join_order": ["t1"]
},
{
@@ -828,6 +833,11 @@ explain select * from v1 {
]
},
{
+ "check_split_materialized": {
+ "not_applicable": "group list has no candidates"
+ }
+ },
+ {
"best_join_order": ["t1"]
},
{
@@ -10343,6 +10353,110 @@ SET optimizer_trace=DEFAULT;
DROP VIEW v;
DROP TABLE t;
#
+# MDEV-26301: Split optimization improvements: Optimizer Trace coverage
+#
+create table t1(a int, b int);
+insert into t1 select seq,seq from seq_1_to_5;
+create table t2(a int, b int, key(a));
+insert into t2
+select A.seq,B.seq from seq_1_to_25 A, seq_1_to_2 B;
+create table t3(a int, b int, key(a));
+insert into t3
+select A.seq,B.seq from seq_1_to_5 A, seq_1_to_3 B;
+analyze table t1,t2,t3 persistent for all;
+Table Op Msg_type Msg_text
+test.t1 analyze status Engine-independent statistics collected
+test.t1 analyze status OK
+test.t2 analyze status Engine-independent statistics collected
+test.t2 analyze status Table is already up to date
+test.t3 analyze status Engine-independent statistics collected
+test.t3 analyze status Table is already up to date
+create table t10 (
+grp_id int,
+col1 int,
+key(grp_id)
+);
+insert into t10
+select
+A.seq,
+B.seq
+from
+seq_1_to_100 A,
+seq_1_to_100 B;
+create table t11 (
+col1 int,
+col2 int
+);
+insert into t11
+select A.seq, A.seq from seq_1_to_10 A;
+analyze table t10,t11 persistent for all;
+Table Op Msg_type Msg_text
+test.t10 analyze status Engine-independent statistics collected
+test.t10 analyze status Table is already up to date
+test.t11 analyze status Engine-independent statistics collected
+test.t11 analyze status OK
+set optimizer_trace=1;
+explain
+select * from
+(
+(t1 left join t2 on t2.a=t1.b)
+left join t3 on t3.a=t1.b
+) left join (select grp_id, count(*)
+from t10 left join t11 on t11.col1=t10.col1
+group by grp_id) T on T.grp_id=t1.b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 PRIMARY t1 ALL NULL NULL NULL NULL 5
+1 PRIMARY t2 ref a a 5 test.t1.b 2 Using where
+1 PRIMARY t3 ref a a 5 test.t1.b 3 Using where
+1 PRIMARY <derived2> ref key0 key0 5 test.t1.b 10 Using where
+2 LATERAL DERIVED t10 ref grp_id grp_id 5 test.t1.b 100
+2 LATERAL DERIVED t11 ALL NULL NULL NULL NULL 10 Using where; Using join buffer (flat, BNL join)
+select json_detailed(json_extract(trace, '$**.check_split_materialized')) as JS
+from information_schema.optimizer_trace;
+JS
+[
+ {
+ "split_candidates":
+ ["t10.grp_id"]
+ }
+]
+select
+json_detailed(
+json_remove(
+json_extract(trace, '$**.choose_best_splitting')
+, '$[0].split_plan_search[0]'
+ )
+) as JS
+from information_schema.optimizer_trace;
+JS
+[
+ {
+ "considered_keys":
+ [
+ {
+ "table_name": "t10",
+ "index": "grp_id",
+ "rec_per_key": 100,
+ "param_tables": 1
+ }
+ ],
+ "refills": 5,
+ "spl_pd_boundary": 2,
+ "split_plan_search":
+ [],
+ "lead_table": "t10",
+ "index": "grp_id",
+ "parts": 1,
+ "split_sel": 0.001,
+ "cost": 2535.968504,
+ "unsplit_cost": 253440.0075,
+ "records": 100,
+ "chosen": true
+ }
+]
+drop table t1,t2,t3,t10,t11;
+set optimizer_trace=DEFAULT;
+#
# End of 10.4 tests
#
set optimizer_trace='enabled=on';
@@ -10763,79 +10877,83 @@ from
information_schema.optimizer_trace;
json_detailed(json_extract(trace, '$**.choose_best_splitting'))
[
- [
- {
- "considered_execution_plans":
- [
- {
- "plan_prefix":
- [],
- "get_costs_for_tables":
- [
- {
- "best_access_path":
+ {
+ "considered_keys":
+ []
+ },
+ {
+ "considered_keys":
+ [
+ {
+ "table_name": "t2",
+ "index": "idx_a",
+ "rec_per_key": 1.8367,
+ "param_tables": 1
+ }
+ ],
+ "refills": 4,
+ "spl_pd_boundary": 2,
+ "split_plan_search":
+ [
+ {
+ "considered_execution_plans":
+ [
+ {
+ "plan_prefix":
+ [],
+ "get_costs_for_tables":
+ [
{
- "table": "t2",
- "considered_access_paths":
- [
+ "best_access_path":
+ {
+ "table": "t2",
+ "considered_access_paths":
+ [
+ {
+ "access_type": "ref",
+ "index": "idx_a",
+ "used_range_estimates": false,
+ "reason": "not available",
+ "rows": 1.8367,
+ "cost": 2.000585794,
+ "chosen": true
+ },
+ {
+ "type": "scan",
+ "chosen": false,
+ "cause": "cost"
+ }
+ ],
+ "chosen_access_method":
{
- "access_type": "ref",
- "index": "idx_a",
- "used_range_estimates": false,
- "reason": "not available",
- "rows": 1.8367,
+ "type": "ref",
+ "records": 1.8367,
"cost": 2.000585794,
- "chosen": true
- },
- {
- "type": "scan",
- "chosen": false,
- "cause": "cost"
+ "uses_join_buffering": false
}
- ],
- "chosen_access_method":
- {
- "type": "ref",
- "records": 1.8367,
- "cost": 2.000585794,
- "uses_join_buffering": false
}
}
- }
- ]
- },
- {
- "plan_prefix":
- [],
- "table": "t2",
- "rows_for_plan": 1.8367,
- "cost_for_plan": 2.367925794,
- "cost_for_sorting": 1.8367
- }
- ]
- },
- {
- "best_splitting":
- {
- "table": "t2",
- "key": "idx_a",
- "record_count": 4,
- "cost": 2.488945919,
- "unsplit_cost": 25.72361682
+ ]
+ },
+ {
+ "plan_prefix":
+ [],
+ "table": "t2",
+ "rows_for_plan": 1.8367,
+ "cost_for_plan": 2.367925794,
+ "cost_for_sorting": 1.8367
+ }
+ ]
}
- }
- ]
-]
-select
-json_detailed(json_extract(trace, '$**.lateral_derived'))
-from
-information_schema.optimizer_trace;
-json_detailed(json_extract(trace, '$**.lateral_derived'))
-[
- {
- "startup_cost": 9.955783677,
- "splitting_cost": 2.488945919,
- "records": 1
+ ],
+ "lead_table": "t2",
+ "index": "idx_a",
+ "parts": 1,
+ "split_sel": 0.020407778,
+ "cost": 2.488945919,
+ "unsplit_cost": 25.72361682,
+ "records": 1,
+ "chosen": true
}
]
drop table t1,t2;
diff --git a/mysql-test/main/opt_trace.test b/mysql-test/main/opt_trace.test
index 0cacc4a60ea..3f1f1fd1204 100644
--- a/mysql-test/main/opt_trace.test
+++ b/mysql-test/main/opt_trace.test
@@ -702,6 +702,76 @@ DROP VIEW v;
DROP TABLE t;
--echo #
+--echo # MDEV-26301: Split optimization improvements: Optimizer Trace coverage
+--echo #
+
+# 5 values
+create table t1(a int, b int);
+insert into t1 select seq,seq from seq_1_to_5;
+
+# 5 value groups of size 2 each
+create table t2(a int, b int, key(a));
+insert into t2
+select A.seq,B.seq from seq_1_to_25 A, seq_1_to_2 B;
+
+# 5 value groups of size 3 each
+create table t3(a int, b int, key(a));
+insert into t3
+select A.seq,B.seq from seq_1_to_5 A, seq_1_to_3 B;
+
+analyze table t1,t2,t3 persistent for all;
+
+create table t10 (
+ grp_id int,
+ col1 int,
+ key(grp_id)
+);
+
+# 100 groups of 100 values each
+insert into t10
+select
+ A.seq,
+ B.seq
+from
+ seq_1_to_100 A,
+ seq_1_to_100 B;
+
+# and X10 multiplier
+create table t11 (
+ col1 int,
+ col2 int
+);
+insert into t11
+select A.seq, A.seq from seq_1_to_10 A;
+
+analyze table t10,t11 persistent for all;
+
+set optimizer_trace=1;
+explain
+select * from
+ (
+ (t1 left join t2 on t2.a=t1.b)
+ left join t3 on t3.a=t1.b
+ ) left join (select grp_id, count(*)
+ from t10 left join t11 on t11.col1=t10.col1
+ group by grp_id) T on T.grp_id=t1.b;
+
+select json_detailed(json_extract(trace, '$**.check_split_materialized')) as JS
+from information_schema.optimizer_trace;
+
+select
+ json_detailed(
+ json_remove(
+ json_extract(trace, '$**.choose_best_splitting')
+ , '$[0].split_plan_search[0]'
+ )
+ ) as JS
+from information_schema.optimizer_trace;
+
+drop table t1,t2,t3,t10,t11;
+set optimizer_trace=DEFAULT;
+
+--echo #
--echo # End of 10.4 tests
--echo #
@@ -909,13 +979,6 @@ from
information_schema.optimizer_trace;
--enable_view_protocol
-# Same as above. just to show that splitting plan has some coverage in the
-# trace.
-select
- json_detailed(json_extract(trace, '$**.lateral_derived'))
-from
- information_schema.optimizer_trace;
-
drop table t1,t2;
--echo #
diff --git a/mysql-test/main/selectivity.result b/mysql-test/main/selectivity.result
index 59d76da8816..62466de1113 100644
--- a/mysql-test/main/selectivity.result
+++ b/mysql-test/main/selectivity.result
@@ -1943,75 +1943,6 @@ Warnings:
Note 1003 select `test`.`t1`.`a` AS `a` from `test`.`t1` where `test`.`t1`.`a` = 2
DROP TABLE t1;
# End of 10.2 tests
-#
-# MDEV-31067: selectivity_from_histogram >1.0 for a DOUBLE_PREC_HB histogram
-#
-create table t0(a int);
-insert into t0 select 1 from seq_1_to_78;
-create table t1(a int);
-insert into t1 select 1 from seq_1_to_26;
-create table t10 (a int);
-insert into t10 select 0 from t0, seq_1_to_4;
-insert into t10 select 8693 from t1;
-insert into t10 select 8694 from t1;
-insert into t10 select 8695 from t1;
-insert into t10 select 34783 from t1;
-insert into t10 select 34784 from t1;
-insert into t10 select 34785 from t1;
-insert into t10 select 34785 from t0, seq_1_to_8;
-insert into t10 select 65214 from t1;
-insert into t10 select 65215 from t1;
-insert into t10 select 65216 from t1;
-insert into t10 select 65216 from t0, seq_1_to_52;
-insert into t10 select 65217 from t1;
-insert into t10 select 65218 from t1;
-insert into t10 select 65219 from t1;
-insert into t10 select 65219 from t0;
-insert into t10 select 73913 from t1;
-insert into t10 select 73914 from t1;
-insert into t10 select 73915 from t1;
-insert into t10 select 73915 from t0, seq_1_to_40;
-insert into t10 select 78257 from t1;
-insert into t10 select 78258 from t1;
-insert into t10 select 78259 from t1;
-insert into t10 select 91300 from t1;
-insert into t10 select 91301 from t1;
-insert into t10 select 91302 from t1;
-insert into t10 select 91302 from t0, seq_1_to_6;
-insert into t10 select 91303 from t1;
-insert into t10 select 91304 from t1;
-insert into t10 select 91305 from t1;
-insert into t10 select 91305 from t0, seq_1_to_8;
-insert into t10 select 99998 from t1;
-insert into t10 select 99999 from t1;
-insert into t10 select 100000 from t1;
-set use_stat_tables=preferably;
-analyze table t10 persistent for all;
-Table Op Msg_type Msg_text
-test.t10 analyze status Engine-independent statistics collected
-test.t10 analyze status OK
-flush tables;
-set @tmp=@@optimizer_trace;
-set optimizer_trace=1;
-explain select * from t10 where a in (91303);
-id select_type table type possible_keys key key_len ref rows Extra
-1 SIMPLE t10 ALL NULL NULL NULL NULL 9984 Using where
-# Must have selectivity_from_histogram <= 1.0:
-select json_detailed(json_extract(trace, '$**.selectivity_for_columns'))
-from information_schema.optimizer_trace;
-json_detailed(json_extract(trace, '$**.selectivity_for_columns'))
-[
- [
- {
- "column_name": "a",
- "ranges":
- ["91303 <= a <= 91303"],
- "selectivity_from_histogram": 0.035714283
- }
- ]
-]
-set optimizer_trace=@tmp;
-drop table t0,t1,t10;
set optimizer_use_condition_selectivity= @save_optimizer_use_condition_selectivity;
set histogram_size=@save_histogram_size;
set use_stat_tables= @save_use_stat_tables;
diff --git a/mysql-test/main/selectivity.test b/mysql-test/main/selectivity.test
index 6f9d4d63eee..df3850d74b7 100644
--- a/mysql-test/main/selectivity.test
+++ b/mysql-test/main/selectivity.test
@@ -1326,90 +1326,12 @@ DROP TABLE t1;
--echo # End of 10.2 tests
---echo #
---echo # MDEV-31067: selectivity_from_histogram >1.0 for a DOUBLE_PREC_HB histogram
---echo #
-create table t0(a int); # This holds how many rows we hold in a bucket.
-insert into t0 select 1 from seq_1_to_78;
-
-create table t1(a int); # one-third of a bucket
-insert into t1 select 1 from seq_1_to_26;
-
-create table t10 (a int);
-insert into t10 select 0 from t0, seq_1_to_4;
-
-insert into t10 select 8693 from t1;
-insert into t10 select 8694 from t1;
-insert into t10 select 8695 from t1;
-
-
-insert into t10 select 34783 from t1;
-insert into t10 select 34784 from t1;
-insert into t10 select 34785 from t1;
-
-
-insert into t10 select 34785 from t0, seq_1_to_8;
-
-insert into t10 select 65214 from t1;
-insert into t10 select 65215 from t1;
-insert into t10 select 65216 from t1;
-
-insert into t10 select 65216 from t0, seq_1_to_52;
-
-insert into t10 select 65217 from t1;
-insert into t10 select 65218 from t1;
-insert into t10 select 65219 from t1;
-
-insert into t10 select 65219 from t0;
-
-
-insert into t10 select 73913 from t1;
-insert into t10 select 73914 from t1;
-insert into t10 select 73915 from t1;
-
-insert into t10 select 73915 from t0, seq_1_to_40;
-
-insert into t10 select 78257 from t1;
-insert into t10 select 78258 from t1;
-insert into t10 select 78259 from t1;
-
-insert into t10 select 91300 from t1;
-insert into t10 select 91301 from t1;
-insert into t10 select 91302 from t1;
-
-insert into t10 select 91302 from t0, seq_1_to_6;
-
-insert into t10 select 91303 from t1; # Only 1/3rd of bucket matches the search tuple
-insert into t10 select 91304 from t1;
-insert into t10 select 91305 from t1;
-
-insert into t10 select 91305 from t0, seq_1_to_8;
-
-insert into t10 select 99998 from t1;
-insert into t10 select 99999 from t1;
-insert into t10 select 100000 from t1;
-
-set use_stat_tables=preferably;
-analyze table t10 persistent for all;
-flush tables;
-
-set @tmp=@@optimizer_trace;
-set optimizer_trace=1;
-explain select * from t10 where a in (91303);
-
---echo # Must have selectivity_from_histogram <= 1.0:
-select json_detailed(json_extract(trace, '$**.selectivity_for_columns'))
-from information_schema.optimizer_trace;
-
-set optimizer_trace=@tmp;
-drop table t0,t1,t10;
-
-set optimizer_use_condition_selectivity= @save_optimizer_use_condition_selectivity;
-set histogram_size=@save_histogram_size;
-set use_stat_tables= @save_use_stat_tables;
#
# Clean up
#
+set optimizer_use_condition_selectivity= @save_optimizer_use_condition_selectivity;
+set histogram_size=@save_histogram_size;
+set use_stat_tables= @save_use_stat_tables;
--source include/restore_charset.inc
set @@global.histogram_size=@save_histogram_size;
diff --git a/mysql-test/main/selectivity_innodb.result b/mysql-test/main/selectivity_innodb.result
index ddb3cf3795b..a4366214643 100644
--- a/mysql-test/main/selectivity_innodb.result
+++ b/mysql-test/main/selectivity_innodb.result
@@ -1955,75 +1955,6 @@ Warnings:
Note 1003 select `test`.`t1`.`a` AS `a` from `test`.`t1` where `test`.`t1`.`a` = 2
DROP TABLE t1;
# End of 10.2 tests
-#
-# MDEV-31067: selectivity_from_histogram >1.0 for a DOUBLE_PREC_HB histogram
-#
-create table t0(a int);
-insert into t0 select 1 from seq_1_to_78;
-create table t1(a int);
-insert into t1 select 1 from seq_1_to_26;
-create table t10 (a int);
-insert into t10 select 0 from t0, seq_1_to_4;
-insert into t10 select 8693 from t1;
-insert into t10 select 8694 from t1;
-insert into t10 select 8695 from t1;
-insert into t10 select 34783 from t1;
-insert into t10 select 34784 from t1;
-insert into t10 select 34785 from t1;
-insert into t10 select 34785 from t0, seq_1_to_8;
-insert into t10 select 65214 from t1;
-insert into t10 select 65215 from t1;
-insert into t10 select 65216 from t1;
-insert into t10 select 65216 from t0, seq_1_to_52;
-insert into t10 select 65217 from t1;
-insert into t10 select 65218 from t1;
-insert into t10 select 65219 from t1;
-insert into t10 select 65219 from t0;
-insert into t10 select 73913 from t1;
-insert into t10 select 73914 from t1;
-insert into t10 select 73915 from t1;
-insert into t10 select 73915 from t0, seq_1_to_40;
-insert into t10 select 78257 from t1;
-insert into t10 select 78258 from t1;
-insert into t10 select 78259 from t1;
-insert into t10 select 91300 from t1;
-insert into t10 select 91301 from t1;
-insert into t10 select 91302 from t1;
-insert into t10 select 91302 from t0, seq_1_to_6;
-insert into t10 select 91303 from t1;
-insert into t10 select 91304 from t1;
-insert into t10 select 91305 from t1;
-insert into t10 select 91305 from t0, seq_1_to_8;
-insert into t10 select 99998 from t1;
-insert into t10 select 99999 from t1;
-insert into t10 select 100000 from t1;
-set use_stat_tables=preferably;
-analyze table t10 persistent for all;
-Table Op Msg_type Msg_text
-test.t10 analyze status Engine-independent statistics collected
-test.t10 analyze status OK
-flush tables;
-set @tmp=@@optimizer_trace;
-set optimizer_trace=1;
-explain select * from t10 where a in (91303);
-id select_type table type possible_keys key key_len ref rows Extra
-1 SIMPLE t10 ALL NULL NULL NULL NULL 9984 Using where
-# Must have selectivity_from_histogram <= 1.0:
-select json_detailed(json_extract(trace, '$**.selectivity_for_columns'))
-from information_schema.optimizer_trace;
-json_detailed(json_extract(trace, '$**.selectivity_for_columns'))
-[
- [
- {
- "column_name": "a",
- "ranges":
- ["91303 <= a <= 91303"],
- "selectivity_from_histogram": 0.035714283
- }
- ]
-]
-set optimizer_trace=@tmp;
-drop table t0,t1,t10;
set optimizer_use_condition_selectivity= @save_optimizer_use_condition_selectivity;
set histogram_size=@save_histogram_size;
set use_stat_tables= @save_use_stat_tables;
diff --git a/mysql-test/main/selectivity_innodb_notembedded.result b/mysql-test/main/selectivity_innodb_notembedded.result
new file mode 100644
index 00000000000..8b06fe7556b
--- /dev/null
+++ b/mysql-test/main/selectivity_innodb_notembedded.result
@@ -0,0 +1,104 @@
+SET SESSION STORAGE_ENGINE='InnoDB';
+Warnings:
+Warning 1287 '@@storage_engine' is deprecated and will be removed in a future release. Please use '@@default_storage_engine' instead
+set @save_optimizer_switch_for_selectivity_test=@@optimizer_switch;
+set optimizer_switch='extended_keys=on';
+drop table if exists t0,t1,t2,t3;
+select @@global.use_stat_tables;
+@@global.use_stat_tables
+COMPLEMENTARY
+select @@session.use_stat_tables;
+@@session.use_stat_tables
+COMPLEMENTARY
+set @save_use_stat_tables=@@use_stat_tables;
+set use_stat_tables='preferably';
+set @save_optimizer_use_condition_selectivity=@@optimizer_use_condition_selectivity;
+set @save_histogram_size=@@histogram_size;
+set @save_histogram_type=@@histogram_type;
+set join_cache_level=2;
+set @@global.histogram_size=0,@@local.histogram_size=0;
+set histogram_type='single_prec_hb';
+set optimizer_use_condition_selectivity=3;
+#
+# MDEV-31067: selectivity_from_histogram >1.0 for a DOUBLE_PREC_HB histogram
+#
+create table t0(a int);
+insert into t0 select 1 from seq_1_to_78;
+create table t1(a int);
+insert into t1 select 1 from seq_1_to_26;
+create table t10 (a int);
+insert into t10 select 0 from t0, seq_1_to_4;
+insert into t10 select 8693 from t1;
+insert into t10 select 8694 from t1;
+insert into t10 select 8695 from t1;
+insert into t10 select 34783 from t1;
+insert into t10 select 34784 from t1;
+insert into t10 select 34785 from t1;
+insert into t10 select 34785 from t0, seq_1_to_8;
+insert into t10 select 65214 from t1;
+insert into t10 select 65215 from t1;
+insert into t10 select 65216 from t1;
+insert into t10 select 65216 from t0, seq_1_to_52;
+insert into t10 select 65217 from t1;
+insert into t10 select 65218 from t1;
+insert into t10 select 65219 from t1;
+insert into t10 select 65219 from t0;
+insert into t10 select 73913 from t1;
+insert into t10 select 73914 from t1;
+insert into t10 select 73915 from t1;
+insert into t10 select 73915 from t0, seq_1_to_40;
+insert into t10 select 78257 from t1;
+insert into t10 select 78258 from t1;
+insert into t10 select 78259 from t1;
+insert into t10 select 91300 from t1;
+insert into t10 select 91301 from t1;
+insert into t10 select 91302 from t1;
+insert into t10 select 91302 from t0, seq_1_to_6;
+insert into t10 select 91303 from t1;
+insert into t10 select 91304 from t1;
+insert into t10 select 91305 from t1;
+insert into t10 select 91305 from t0, seq_1_to_8;
+insert into t10 select 99998 from t1;
+insert into t10 select 99999 from t1;
+insert into t10 select 100000 from t1;
+set use_stat_tables=preferably;
+analyze table t10 persistent for all;
+Table Op Msg_type Msg_text
+test.t10 analyze status Engine-independent statistics collected
+test.t10 analyze status OK
+flush tables;
+set @tmp=@@optimizer_trace;
+set optimizer_trace=1;
+explain select * from t10 where a in (91303);
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t10 ALL NULL NULL NULL NULL 9984 Using where
+# Must have selectivity_from_histogram <= 1.0:
+select json_detailed(json_extract(trace, '$**.selectivity_for_columns')) as sel
+from information_schema.optimizer_trace;
+sel
+[
+ [
+ {
+ "column_name": "a",
+ "ranges":
+ ["91303 <= a <= 91303"],
+ "selectivity_from_histogram": 0.035714283
+ }
+ ]
+]
+set optimizer_trace=@tmp;
+drop table t0,t1,t10;
+set optimizer_use_condition_selectivity= @save_optimizer_use_condition_selectivity;
+set histogram_size=@save_histogram_size;
+set use_stat_tables= @save_use_stat_tables;
+#
+# End of 10.4 tests
+#
+#
+# Clean up
+#
+set @@global.histogram_size=@save_histogram_size;
+set optimizer_switch=@save_optimizer_switch_for_selectivity_test;
+SET SESSION STORAGE_ENGINE=DEFAULT;
+Warnings:
+Warning 1287 '@@storage_engine' is deprecated and will be removed in a future release. Please use '@@default_storage_engine' instead
diff --git a/mysql-test/main/selectivity_innodb_notembedded.test b/mysql-test/main/selectivity_innodb_notembedded.test
new file mode 100644
index 00000000000..387f7dcb7de
--- /dev/null
+++ b/mysql-test/main/selectivity_innodb_notembedded.test
@@ -0,0 +1,16 @@
+--source include/have_innodb.inc
+# This test is slow on buildbot.
+--source include/big_test.inc
+--source include/default_optimizer_switch.inc
+--source include/not_embedded.inc
+
+SET SESSION STORAGE_ENGINE='InnoDB';
+
+set @save_optimizer_switch_for_selectivity_test=@@optimizer_switch;
+set optimizer_switch='extended_keys=on';
+
+--source selectivity_notembedded.test
+
+set optimizer_switch=@save_optimizer_switch_for_selectivity_test;
+
+SET SESSION STORAGE_ENGINE=DEFAULT;
diff --git a/mysql-test/main/selectivity_notembedded.result b/mysql-test/main/selectivity_notembedded.result
new file mode 100644
index 00000000000..d2e90a19a68
--- /dev/null
+++ b/mysql-test/main/selectivity_notembedded.result
@@ -0,0 +1,95 @@
+drop table if exists t0,t1,t2,t3;
+select @@global.use_stat_tables;
+@@global.use_stat_tables
+COMPLEMENTARY
+select @@session.use_stat_tables;
+@@session.use_stat_tables
+COMPLEMENTARY
+set @save_use_stat_tables=@@use_stat_tables;
+set use_stat_tables='preferably';
+set @save_optimizer_use_condition_selectivity=@@optimizer_use_condition_selectivity;
+set @save_histogram_size=@@histogram_size;
+set @save_histogram_type=@@histogram_type;
+set join_cache_level=2;
+set @@global.histogram_size=0,@@local.histogram_size=0;
+set histogram_type='single_prec_hb';
+set optimizer_use_condition_selectivity=3;
+#
+# MDEV-31067: selectivity_from_histogram >1.0 for a DOUBLE_PREC_HB histogram
+#
+create table t0(a int);
+insert into t0 select 1 from seq_1_to_78;
+create table t1(a int);
+insert into t1 select 1 from seq_1_to_26;
+create table t10 (a int);
+insert into t10 select 0 from t0, seq_1_to_4;
+insert into t10 select 8693 from t1;
+insert into t10 select 8694 from t1;
+insert into t10 select 8695 from t1;
+insert into t10 select 34783 from t1;
+insert into t10 select 34784 from t1;
+insert into t10 select 34785 from t1;
+insert into t10 select 34785 from t0, seq_1_to_8;
+insert into t10 select 65214 from t1;
+insert into t10 select 65215 from t1;
+insert into t10 select 65216 from t1;
+insert into t10 select 65216 from t0, seq_1_to_52;
+insert into t10 select 65217 from t1;
+insert into t10 select 65218 from t1;
+insert into t10 select 65219 from t1;
+insert into t10 select 65219 from t0;
+insert into t10 select 73913 from t1;
+insert into t10 select 73914 from t1;
+insert into t10 select 73915 from t1;
+insert into t10 select 73915 from t0, seq_1_to_40;
+insert into t10 select 78257 from t1;
+insert into t10 select 78258 from t1;
+insert into t10 select 78259 from t1;
+insert into t10 select 91300 from t1;
+insert into t10 select 91301 from t1;
+insert into t10 select 91302 from t1;
+insert into t10 select 91302 from t0, seq_1_to_6;
+insert into t10 select 91303 from t1;
+insert into t10 select 91304 from t1;
+insert into t10 select 91305 from t1;
+insert into t10 select 91305 from t0, seq_1_to_8;
+insert into t10 select 99998 from t1;
+insert into t10 select 99999 from t1;
+insert into t10 select 100000 from t1;
+set use_stat_tables=preferably;
+analyze table t10 persistent for all;
+Table Op Msg_type Msg_text
+test.t10 analyze status Engine-independent statistics collected
+test.t10 analyze status OK
+flush tables;
+set @tmp=@@optimizer_trace;
+set optimizer_trace=1;
+explain select * from t10 where a in (91303);
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t10 ALL NULL NULL NULL NULL 9984 Using where
+# Must have selectivity_from_histogram <= 1.0:
+select json_detailed(json_extract(trace, '$**.selectivity_for_columns')) as sel
+from information_schema.optimizer_trace;
+sel
+[
+ [
+ {
+ "column_name": "a",
+ "ranges":
+ ["91303 <= a <= 91303"],
+ "selectivity_from_histogram": 0.035714283
+ }
+ ]
+]
+set optimizer_trace=@tmp;
+drop table t0,t1,t10;
+set optimizer_use_condition_selectivity= @save_optimizer_use_condition_selectivity;
+set histogram_size=@save_histogram_size;
+set use_stat_tables= @save_use_stat_tables;
+#
+# End of 10.4 tests
+#
+#
+# Clean up
+#
+set @@global.histogram_size=@save_histogram_size;
diff --git a/mysql-test/main/selectivity_notembedded.test b/mysql-test/main/selectivity_notembedded.test
new file mode 100644
index 00000000000..6752bd3c7e1
--- /dev/null
+++ b/mysql-test/main/selectivity_notembedded.test
@@ -0,0 +1,121 @@
+--source include/no_valgrind_without_big.inc
+--source include/have_stat_tables.inc
+--source include/have_sequence.inc
+--source include/default_charset.inc
+--source include/not_embedded.inc
+
+--disable_warnings
+drop table if exists t0,t1,t2,t3;
+--enable_warnings
+
+select @@global.use_stat_tables;
+select @@session.use_stat_tables;
+
+set @save_use_stat_tables=@@use_stat_tables;
+set use_stat_tables='preferably';
+
+--source include/default_optimizer_switch.inc
+set @save_optimizer_use_condition_selectivity=@@optimizer_use_condition_selectivity;
+set @save_histogram_size=@@histogram_size;
+set @save_histogram_type=@@histogram_type;
+set join_cache_level=2;
+set @@global.histogram_size=0,@@local.histogram_size=0;
+set histogram_type='single_prec_hb';
+
+# check that statistics on nulls is used
+
+set optimizer_use_condition_selectivity=3;
+
+--echo #
+--echo # MDEV-31067: selectivity_from_histogram >1.0 for a DOUBLE_PREC_HB histogram
+--echo #
+create table t0(a int); # This holds how many rows we hold in a bucket.
+insert into t0 select 1 from seq_1_to_78;
+
+create table t1(a int); # one-third of a bucket
+insert into t1 select 1 from seq_1_to_26;
+
+create table t10 (a int);
+insert into t10 select 0 from t0, seq_1_to_4;
+
+insert into t10 select 8693 from t1;
+insert into t10 select 8694 from t1;
+insert into t10 select 8695 from t1;
+
+
+insert into t10 select 34783 from t1;
+insert into t10 select 34784 from t1;
+insert into t10 select 34785 from t1;
+
+
+insert into t10 select 34785 from t0, seq_1_to_8;
+
+insert into t10 select 65214 from t1;
+insert into t10 select 65215 from t1;
+insert into t10 select 65216 from t1;
+
+insert into t10 select 65216 from t0, seq_1_to_52;
+
+insert into t10 select 65217 from t1;
+insert into t10 select 65218 from t1;
+insert into t10 select 65219 from t1;
+
+insert into t10 select 65219 from t0;
+
+
+insert into t10 select 73913 from t1;
+insert into t10 select 73914 from t1;
+insert into t10 select 73915 from t1;
+
+insert into t10 select 73915 from t0, seq_1_to_40;
+
+
+insert into t10 select 78257 from t1;
+insert into t10 select 78258 from t1;
+insert into t10 select 78259 from t1;
+
+insert into t10 select 91300 from t1;
+insert into t10 select 91301 from t1;
+insert into t10 select 91302 from t1;
+
+insert into t10 select 91302 from t0, seq_1_to_6;
+
+insert into t10 select 91303 from t1; # Only 1/3rd of bucket matches the search tuple
+insert into t10 select 91304 from t1;
+insert into t10 select 91305 from t1;
+
+insert into t10 select 91305 from t0, seq_1_to_8;
+
+insert into t10 select 99998 from t1;
+insert into t10 select 99999 from t1;
+insert into t10 select 100000 from t1;
+
+set use_stat_tables=preferably;
+analyze table t10 persistent for all;
+flush tables;
+
+set @tmp=@@optimizer_trace;
+set optimizer_trace=1;
+explain select * from t10 where a in (91303);
+
+--echo # Must have selectivity_from_histogram <= 1.0:
+select json_detailed(json_extract(trace, '$**.selectivity_for_columns')) as sel
+from information_schema.optimizer_trace;
+
+set optimizer_trace=@tmp;
+drop table t0,t1,t10;
+
+set optimizer_use_condition_selectivity= @save_optimizer_use_condition_selectivity;
+set histogram_size=@save_histogram_size;
+set use_stat_tables= @save_use_stat_tables;
+
+
+--echo #
+--echo # End of 10.4 tests
+--echo #
+
+--echo #
+--echo # Clean up
+--echo #
+--source include/restore_charset.inc
+set @@global.histogram_size=@save_histogram_size;
diff --git a/sql/item_subselect.cc b/sql/item_subselect.cc
index a61ce0ba4e4..6e4c5f48928 100644
--- a/sql/item_subselect.cc
+++ b/sql/item_subselect.cc
@@ -4624,6 +4624,12 @@ void subselect_uniquesubquery_engine::print(String *str,
{
str->append(STRING_WITH_LEN("<primary_index_lookup>("));
tab->ref.items[0]->print(str, query_type);
+ if (!tab->table)
+ {
+ // table is not opened so unknown
+ str->append(')');
+ return;
+ }
str->append(STRING_WITH_LEN(" in "));
if (tab->table->s->table_category == TABLE_CATEGORY_TEMPORARY)
{
diff --git a/sql/opt_split.cc b/sql/opt_split.cc
index 86ed442814c..db18e7eebfb 100644
--- a/sql/opt_split.cc
+++ b/sql/opt_split.cc
@@ -65,7 +65,7 @@
If we have only one equi-join condition then we either push it as
for Q1R or we don't. In a general case we may have much more options.
Consider the query (Q3)
- SELECT
+ SELECT *
FROM t1,t2 (SELECT t3.a, t3.b, MIN(t3.c) as min
FROM t3 GROUP BY a,b) t
WHERE t.a = t1.a AND t.b = t2.b
@@ -102,6 +102,47 @@
If we just drop the index on t3(a,b) the chances that the splitting
will be used becomes much lower but they still exists providing that
the fanout of the partial join of t1 and t2 is small enough.
+
+ The lateral derived table LT formed as a result of SM optimization applied
+ to a materialized derived table DT must be joined after all parameters
+ of splitting has been evaluated, i.e. after all expressions used in the
+ equalities pushed into DT that make the employed splitting effective
+ could be evaluated. With the chosen join order all the parameters can be
+ evaluated after the last table LPT that contains any columns referenced in
+ the parameters has been joined and the table APT following LPT in the chosen
+ join order is accessed.
+ Usually the formed lateral derived table LT is accessed right after the table
+ LPT. As in such cases table LT must be refilled for each combination of
+ splitting parameters this table must be populated before each access to LT
+ and the estimate of the expected number of refills that could be suggested in
+ such cases is the number of rows in the partial join ending with table LPT.
+ However in other cases the chosen join order may contain tables between LPT
+ and LT.
+ Consider the query (Q4)
+ SELECT *
+ FROM t1 JOIN t2 ON t1.b = t2.b
+ LEFT JOIN (SELECT t3.a, t3.b, MIN(t3.c) as min
+ FROM t3 GROUP BY a,b) t
+ ON t.a = t1.a AND t.c > 0
+ [WHERE P(t1,t2)];
+ Let's assume that the join order t1,t2,t was chosen for this query and
+ SP optimization was applied to t with splitting over t3.a using the index
+ on column t3.a. Here the table t1 serves as LPT, t2 as APT while t with
+ pushed condition t.a = t1.a serves as LT. Note that here LT is accessed
+ after t2, not right after t1. Here the number of refills of the lateral
+ derived is not more that the number of key values of t1.a that might be
+ less than the cardinality of the partial join (t1,t2). That's why it makes
+ sense to signal that t3 has to be refilled just before t2 is accessed.
+ However if the cardinality of the partial join (t1,t2) happens to be less
+ than the cardinality of the partial join (t1) due to additional selective
+ condition P(t1,t2) then the flag informing about necessity of a new refill
+ can be set either when accessing t2 or right after it has been joined.
+ The current code sets such flag right after generating a record of the
+ partial join with minimal cardinality for all those partial joins that
+ end between APT and LT. It allows sometimes to push extra conditions
+ into the lateral derived without any increase of the number of refills.
+ However this flag can be set only after the last join table between
+ APT and LT using join buffer has been joined.
*/
/*
@@ -249,6 +290,7 @@ public:
double unsplit_card;
/* Lastly evaluated execution plan for 'join' with pushed equalities */
SplM_plan_info *last_plan;
+ double last_refills;
SplM_plan_info *find_plan(TABLE *table, uint key, uint parts);
};
@@ -346,6 +388,9 @@ bool JOIN::check_for_splittable_materialized()
if (!partition_list)
return false;
+ Json_writer_object trace_wrapper(thd);
+ Json_writer_object trace_split(thd, "check_split_materialized");
+
ORDER *ord;
Dynamic_array<SplM_field_ext_info> candidates(PSI_INSTRUMENT_MEM);
@@ -391,7 +436,10 @@ bool JOIN::check_for_splittable_materialized()
}
}
if (candidates.elements() == 0) // no candidates satisfying (8.1) && (8.2)
+ {
+ trace_split.add("not_applicable", "group list has no candidates");
return false;
+ }
/*
For each table from this join find the keys that can be used for ref access
@@ -450,7 +498,11 @@ bool JOIN::check_for_splittable_materialized()
}
if (!spl_field_cnt) // No candidate field can be accessed by ref => !(9)
+ {
+ trace_split.add("not_applicable",
+ "no candidate field can be accessed through ref");
return false;
+ }
/*
Create a structure of the type SplM_opt_info and fill it with
@@ -468,16 +520,22 @@ bool JOIN::check_for_splittable_materialized()
spl_opt_info->tables_usable_for_splitting= 0;
spl_opt_info->spl_field_cnt= spl_field_cnt;
spl_opt_info->spl_fields= spl_field;
- for (cand= cand_start; cand < cand_end; cand++)
+
{
- if (!cand->is_usable_for_ref_access)
- continue;
- spl_field->producing_item= cand->producing_item;
- spl_field->underlying_field= cand->underlying_field;
- spl_field->mat_field= cand->mat_field;
- spl_opt_info->tables_usable_for_splitting|=
- cand->underlying_field->table->map;
- spl_field++;
+ Json_writer_array trace_range(thd, "split_candidates");
+ for (cand= cand_start; cand < cand_end; cand++)
+ {
+ if (!cand->is_usable_for_ref_access)
+ continue;
+ trace_range.add(cand->producing_item);
+
+ spl_field->producing_item= cand->producing_item;
+ spl_field->underlying_field= cand->underlying_field;
+ spl_field->mat_field= cand->mat_field;
+ spl_opt_info->tables_usable_for_splitting|=
+ cand->underlying_field->table->map;
+ spl_field++;
+ }
}
/* Attach this info to the table T */
@@ -732,7 +790,7 @@ void JOIN::add_keyuses_for_splitting()
bzero((char*) &keyuse_ext_end, sizeof(keyuse_ext_end));
if (ext_keyuses_for_splitting->push(keyuse_ext_end))
goto err;
-
+ // psergey-todo: trace anything here?
spl_opt_info->unsplit_card= join_record_count;
rec_len= table->s->rec_buff_length;
@@ -830,13 +888,13 @@ SplM_plan_info *SplM_opt_info::find_plan(TABLE *table, uint key, uint parts)
static
void reset_validity_vars_for_keyuses(KEYUSE_EXT *key_keyuse_ext_start,
TABLE *table, uint key,
- table_map remaining_tables,
+ table_map excluded_tables,
bool validity_val)
{
KEYUSE_EXT *keyuse_ext= key_keyuse_ext_start;
do
{
- if (!(keyuse_ext->needed_in_prefix & remaining_tables))
+ if (!(keyuse_ext->needed_in_prefix & excluded_tables))
{
/*
The enabling/disabling flags are set just in KEYUSE_EXT structures.
@@ -856,8 +914,11 @@ void reset_validity_vars_for_keyuses(KEYUSE_EXT *key_keyuse_ext_start,
Choose the best splitting to extend the evaluated partial join
@param
- record_count estimated cardinality of the extended partial join
+ idx index for joined table T in current partial join P
remaining_tables tables not joined yet
+ spl_pd_boundary OUT bitmap of the table from P extended by T that
+ starts the sub-sequence of tables S from which
+ no conditions are allowed to be pushed into T.
@details
This function is called during the search for the best execution
@@ -872,17 +933,19 @@ void reset_validity_vars_for_keyuses(KEYUSE_EXT *key_keyuse_ext_start,
splitting the function set it as the true plan of materialization
of the table T.
The function caches the found plans for materialization of table T
- together if the info what key was used for splitting. Next time when
+ together with the info what key was used for splitting. Next time when
the optimizer prefers to use the same key the plan is taken from
the cache of plans
@retval
Pointer to the info on the found plan that employs the pushed equalities
if the plan has been chosen, NULL - otherwise.
+ If the function returns NULL the value of spl_param_tables is set to 0.
*/
-SplM_plan_info * JOIN_TAB::choose_best_splitting(double record_count,
- table_map remaining_tables)
+SplM_plan_info * JOIN_TAB::choose_best_splitting(uint idx,
+ table_map remaining_tables,
+ table_map *spl_pd_boundary)
{
SplM_opt_info *spl_opt_info= table->spl_opt_info;
DBUG_ASSERT(spl_opt_info != NULL);
@@ -897,7 +960,9 @@ SplM_plan_info * JOIN_TAB::choose_best_splitting(double record_count,
SplM_plan_info *spl_plan= 0;
uint best_key= 0;
uint best_key_parts= 0;
-
+ table_map best_param_tables;
+ Json_writer_object trace_obj(thd, "choose_best_splitting");
+ Json_writer_array trace_arr(thd, "considered_keys");
/*
Check whether there are keys that can be used to join T employing splitting
and if so, select the best out of such keys
@@ -915,6 +980,7 @@ SplM_plan_info * JOIN_TAB::choose_best_splitting(double record_count,
uint key= keyuse_ext->key;
KEYUSE_EXT *key_keyuse_ext_start= keyuse_ext;
key_part_map found_parts= 0;
+ table_map needed_in_prefix= 0;
do
{
if (keyuse_ext->needed_in_prefix & remaining_tables)
@@ -940,6 +1006,7 @@ SplM_plan_info * JOIN_TAB::choose_best_splitting(double record_count,
KEY *key_info= table->key_info + key;
double rec_per_key=
key_info->actual_rec_per_key(keyuse_ext->keypart);
+ needed_in_prefix|= keyuse_ext->needed_in_prefix;
if (rec_per_key < best_rec_per_key)
{
best_table= keyuse_ext->table;
@@ -947,6 +1014,14 @@ SplM_plan_info * JOIN_TAB::choose_best_splitting(double record_count,
best_key_parts= keyuse_ext->keypart + 1;
best_rec_per_key= rec_per_key;
best_key_keyuse_ext_start= key_keyuse_ext_start;
+ best_param_tables= needed_in_prefix;
+ // trace table, key_name, parts, needed_tables.
+ Json_writer_object cur_index(thd);
+ cur_index.
+ add("table_name", best_table->alias.ptr()).
+ add("index", best_table->key_info[best_key].name).
+ add("rec_per_key", best_rec_per_key).
+ add("param_tables", best_param_tables);
}
keyuse_ext++;
}
@@ -954,14 +1029,41 @@ SplM_plan_info * JOIN_TAB::choose_best_splitting(double record_count,
}
while (keyuse_ext->table == table);
}
+ trace_arr.end();
+
spl_opt_info->last_plan= 0;
+ double refills= DBL_MAX;
+ table_map excluded_tables= remaining_tables | this->join->sjm_lookup_tables;
if (best_table)
{
+ *spl_pd_boundary= this->table->map;
+ if (!best_param_tables)
+ refills= 1;
+ else
+ {
+ table_map last_found= this->table->map;
+ for (POSITION *pos= &this->join->positions[idx - 1]; ; pos--)
+ {
+ if (pos->table->table->map & excluded_tables)
+ continue;
+ if (pos->partial_join_cardinality < refills)
+ {
+ *spl_pd_boundary= last_found;
+ refills= pos->partial_join_cardinality;
+ }
+ last_found= pos->table->table->map;
+ if ((last_found & best_param_tables) || pos->use_join_buffer)
+ break;
+ }
+ }
+
+ trace_obj.add("refills", refills).
+ add("spl_pd_boundary", *spl_pd_boundary);
+
/*
The key for splitting was chosen, look for the plan for this key
in the cache
*/
- Json_writer_array spl_trace(thd, "choose_best_splitting");
spl_plan= spl_opt_info->find_plan(best_table, best_key, best_key_parts);
if (!spl_plan)
{
@@ -969,11 +1071,13 @@ SplM_plan_info * JOIN_TAB::choose_best_splitting(double record_count,
The plan for the chosen key has not been found in the cache.
Build a new plan and save info on it in the cache
*/
+ Json_writer_array wrapper(thd, "split_plan_search");
table_map all_table_map= (((table_map) 1) << join->table_count) - 1;
reset_validity_vars_for_keyuses(best_key_keyuse_ext_start, best_table,
- best_key, remaining_tables, true);
+ best_key, excluded_tables, true);
choose_plan(join, all_table_map & ~join->const_table_map);
+ wrapper.end();
/*
Check that the chosen plan is really a splitting plan.
If not or if there is not enough memory to save the plan in the cache
@@ -990,7 +1094,8 @@ SplM_plan_info * JOIN_TAB::choose_best_splitting(double record_count,
spl_opt_info->plan_cache.push_back(spl_plan))
{
reset_validity_vars_for_keyuses(best_key_keyuse_ext_start, best_table,
- best_key, remaining_tables, false);
+ best_key, excluded_tables, false);
+ trace_obj.add("split_plan_discarded", "constructed unapplicable query plan");
return 0;
}
@@ -1010,32 +1115,41 @@ SplM_plan_info * JOIN_TAB::choose_best_splitting(double record_count,
spl_plan->cost= join->best_positions[join->table_count-1].read_time +
+ oper_cost;
- if (unlikely(thd->trace_started()))
- {
- Json_writer_object wrapper(thd);
- Json_writer_object find_trace(thd, "best_splitting");
- find_trace.add("table", best_table->alias.c_ptr());
- find_trace.add("key", best_table->key_info[best_key].name);
- find_trace.add("record_count", record_count);
- find_trace.add("cost", spl_plan->cost);
- find_trace.add("unsplit_cost", spl_opt_info->unsplit_cost);
- }
memcpy((char *) spl_plan->best_positions,
(char *) join->best_positions,
sizeof(POSITION) * join->table_count);
reset_validity_vars_for_keyuses(best_key_keyuse_ext_start, best_table,
- best_key, remaining_tables, false);
+ best_key, excluded_tables, false);
}
+ else
+ trace_obj.add("cached_plan_found", 1);
+
if (spl_plan)
{
- if(record_count * spl_plan->cost < spl_opt_info->unsplit_cost - 0.01)
+ if (unlikely(thd->trace_started()))
+ {
+ trace_obj.
+ add("lead_table", spl_plan->table->alias.ptr()).
+ add("index", spl_plan->table->key_info[spl_plan->key].name).
+ add("parts", spl_plan->parts).
+ add("split_sel", spl_plan->split_sel).
+ add("cost", spl_plan->cost).
+ add("unsplit_cost", spl_opt_info->unsplit_cost).
+ add("records", (ha_rows) (records * spl_plan->split_sel));
+ }
+
+ if (refills * spl_plan->cost < spl_opt_info->unsplit_cost - 0.01)
{
/*
The best plan that employs splitting is cheaper than
the plan without splitting
*/
spl_opt_info->last_plan= spl_plan;
+ spl_opt_info->last_refills= refills;
+ trace_obj.add("chosen", true);
}
+ else
+ trace_obj.add("chosen", false);
}
}
@@ -1044,16 +1158,14 @@ SplM_plan_info * JOIN_TAB::choose_best_splitting(double record_count,
spl_plan= spl_opt_info->last_plan;
if (spl_plan)
{
- startup_cost= record_count * spl_plan->cost;
+ startup_cost= spl_opt_info->last_refills * spl_plan->cost;
records= (ha_rows) (records * spl_plan->split_sel);
-
- Json_writer_object trace(thd, "lateral_derived");
- trace.add("startup_cost", startup_cost);
- trace.add("splitting_cost", spl_plan->cost);
- trace.add("records", records);
}
else
+ {
startup_cost= spl_opt_info->unsplit_cost;
+ *spl_pd_boundary= 0;
+ }
return spl_plan;
}
@@ -1063,13 +1175,13 @@ SplM_plan_info * JOIN_TAB::choose_best_splitting(double record_count,
Inject equalities for splitting used by the materialization join
@param
- excluded_tables used to filter out the equalities that cannot
- be pushed.
+ excluded_tables used to filter out the equalities that are not
+ to be pushed.
@details
This function injects equalities pushed into a derived table T for which
the split optimization has been chosen by the optimizer. The function
- is called by JOIN::inject_splitting_cond_for_all_tables_with_split_op().
+ is called by JOIN::inject_splitting_cond_for_all_tables_with_split_opt().
All equalities usable for splitting T whose right parts do not depend on
any of the 'excluded_tables' can be pushed into the where clause of the
derived table T.
@@ -1157,7 +1269,7 @@ bool is_eq_cond_injected_for_split_opt(Item_func_eq *eq_item)
@param
spl_plan info on the splitting plan chosen for the splittable table T
- remaining_tables the table T is joined just before these tables
+ excluded_tables tables that cannot be used in equalities pushed into T
is_const_table the table T is a constant table
@details
@@ -1172,7 +1284,7 @@ bool is_eq_cond_injected_for_split_opt(Item_func_eq *eq_item)
*/
bool JOIN_TAB::fix_splitting(SplM_plan_info *spl_plan,
- table_map remaining_tables,
+ table_map excluded_tables,
bool is_const_table)
{
SplM_opt_info *spl_opt_info= table->spl_opt_info;
@@ -1180,6 +1292,7 @@ bool JOIN_TAB::fix_splitting(SplM_plan_info *spl_plan,
JOIN *md_join= spl_opt_info->join;
if (spl_plan && !is_const_table)
{
+ is_split_derived= true;
memcpy((char *) md_join->best_positions,
(char *) spl_plan->best_positions,
sizeof(POSITION) * md_join->table_count);
@@ -1190,7 +1303,7 @@ bool JOIN_TAB::fix_splitting(SplM_plan_info *spl_plan,
reset_validity_vars_for_keyuses(spl_plan->keyuse_ext_start,
spl_plan->table,
spl_plan->key,
- remaining_tables,
+ excluded_tables,
true);
}
else if (md_join->save_qep)
@@ -1226,8 +1339,21 @@ bool JOIN::fix_all_splittings_in_plan()
if (tab->table->is_splittable())
{
SplM_plan_info *spl_plan= cur_pos->spl_plan;
+ table_map excluded_tables= (all_tables & ~prev_tables) |
+ sjm_lookup_tables;
+ ;
+ if (spl_plan)
+ {
+ POSITION *pos= cur_pos;
+ table_map spl_pd_boundary= pos->spl_pd_boundary;
+ do
+ {
+ excluded_tables|= pos->table->table->map;
+ }
+ while (!((pos--)->table->table->map & spl_pd_boundary));
+ }
if (tab->fix_splitting(spl_plan,
- all_tables & ~prev_tables,
+ excluded_tables,
tablenr < const_tables ))
return true;
}
@@ -1266,13 +1392,21 @@ bool JOIN::inject_splitting_cond_for_all_tables_with_split_opt()
continue;
SplM_opt_info *spl_opt_info= tab->table->spl_opt_info;
JOIN *join= spl_opt_info->join;
- /*
- Currently the equalities referencing columns of SJM tables with
- look-up access cannot be pushed into materialized derived.
- */
- if (join->inject_best_splitting_cond((all_tables & ~prev_tables) |
- sjm_lookup_tables))
- return true;
+ table_map excluded_tables= (all_tables & ~prev_tables) | sjm_lookup_tables;
+ table_map spl_pd_boundary= cur_pos->spl_pd_boundary;
+ for (POSITION *pos= cur_pos; ; pos--)
+ {
+ excluded_tables|= pos->table->table->map;
+ pos->table->no_forced_join_cache= true;
+ if (pos->table->table->map & spl_pd_boundary)
+ {
+ pos->table->split_derived_to_update|= tab->table->map;
+ break;
+ }
+ }
+
+ if (join->inject_best_splitting_cond(excluded_tables))
+ return true;
}
return false;
}
diff --git a/sql/sql_select.cc b/sql/sql_select.cc
index da7d0f0dfbe..09eb0fffa10 100644
--- a/sql/sql_select.cc
+++ b/sql/sql_select.cc
@@ -7684,6 +7684,7 @@ void set_position(JOIN *join,uint idx,JOIN_TAB *table,KEYUSE *key)
join->positions[idx].records_read=1.0; /* This is a const table */
join->positions[idx].cond_selectivity= 1.0;
join->positions[idx].ref_depend_map= 0;
+ join->positions[idx].partial_join_cardinality= 1;
// join->positions[idx].loosescan_key= MAX_KEY; /* Not a LooseScan */
join->positions[idx].sj_strategy= SJ_OPT_NONE;
@@ -7701,6 +7702,7 @@ void set_position(JOIN *join,uint idx,JOIN_TAB *table,KEYUSE *key)
}
join->best_ref[idx]=table;
join->positions[idx].spl_plan= 0;
+ join->positions[idx].spl_pd_boundary= 0;
}
@@ -7987,6 +7989,7 @@ best_access_path(JOIN *join,
MY_BITMAP *eq_join_set= &s->table->eq_join_set;
KEYUSE *hj_start_key= 0;
SplM_plan_info *spl_plan= 0;
+ table_map spl_pd_boundary= 0;
Range_rowid_filter_cost_info *filter= 0;
const char* cause= NULL;
enum join_type best_type= JT_UNKNOWN, type= JT_UNKNOWN;
@@ -8005,9 +8008,11 @@ best_access_path(JOIN *join,
loose_scan_opt.init(join, s, remaining_tables);
if (s->table->is_splittable())
- spl_plan= s->choose_best_splitting(record_count, remaining_tables);
- Json_writer_array trace_paths(thd, "considered_access_paths");
+ spl_plan= s->choose_best_splitting(idx,
+ remaining_tables,
+ &spl_pd_boundary);
+ Json_writer_array trace_paths(thd, "considered_access_paths");
if (s->keyuse)
{ /* Use key if possible */
KEYUSE *keyuse;
@@ -8892,8 +8897,9 @@ best_access_path(JOIN *join,
best_filter= filter;
/* range/index_merge/ALL/index access method are "independent", so: */
best_ref_depends_map= 0;
- best_uses_jbuf= MY_TEST(!disable_jbuf && !((s->table->map &
- join->outer_join)));
+ best_uses_jbuf= MY_TEST(!disable_jbuf &&
+ (join->allowed_outer_join_with_cache ||
+ !(s->table->map & join->outer_join)));
spl_plan= 0;
best_type= type;
}
@@ -8916,6 +8922,7 @@ best_access_path(JOIN *join,
pos->loosescan_picker.loosescan_key= MAX_KEY;
pos->use_join_buffer= best_uses_jbuf;
pos->spl_plan= spl_plan;
+ pos->spl_pd_boundary= !spl_plan ? 0 : spl_pd_boundary;
pos->range_rowid_filter_info= best_filter;
pos->key_dependent= (best_type == JT_EQ_REF ? (table_map) 0 :
key_dependent & remaining_tables);
@@ -9449,6 +9456,9 @@ optimize_straight_join(JOIN *join, table_map remaining_tables)
pushdown_cond_selectivity= table_cond_selectivity(join, idx, s,
remaining_tables);
position->cond_selectivity= pushdown_cond_selectivity;
+ double partial_join_cardinality= record_count *
+ pushdown_cond_selectivity;
+ join->positions[idx].partial_join_cardinality= partial_join_cardinality;
++idx;
}
@@ -10784,8 +10794,10 @@ best_extension_by_limited_search(JOIN *join,
}
}
- if ((search_depth > 1) &&
- ((remaining_tables & ~real_table_bit) & allowed_tables))
+ join->positions[idx].partial_join_cardinality= partial_join_cardinality;
+
+ if ((search_depth > 1) && (remaining_tables & ~real_table_bit) &
+ allowed_tables)
{
/* Recursively expand the current partial plan */
Json_writer_array trace_rest(thd, "rest_of_plan");
@@ -13850,6 +13862,9 @@ uint check_join_cache_usage(JOIN_TAB *tab,
join->return_tab= 0;
+ if (tab->no_forced_join_cache)
+ return 0;
+
/*
Don't use join cache if @@join_cache_level==0 or this table is the first
one join suborder (either at top level or inside a bush)
@@ -14818,7 +14833,8 @@ bool JOIN_TAB::preread_init()
DBUG_RETURN(TRUE);
if (!(derived->get_unit()->uncacheable & UNCACHEABLE_DEPENDENT) ||
- derived->is_nonrecursive_derived_with_rec_ref())
+ derived->is_nonrecursive_derived_with_rec_ref() ||
+ is_split_derived)
preread_init_done= TRUE;
if (select && select->quick)
select->quick->replace_handler(table->file);
@@ -18396,6 +18412,9 @@ void optimize_wo_join_buffering(JOIN *join, uint first_tab, uint last_tab,
reopt_remaining_tables &
~real_table_bit);
}
+ double partial_join_cardinality= rec_count *
+ pushdown_cond_selectivity;
+ join->positions[i].partial_join_cardinality= partial_join_cardinality;
(*outer_rec_count) *= pushdown_cond_selectivity;
if (!rs->emb_sj_nest)
*outer_rec_count= COST_MULT(*outer_rec_count, pos.records_read);
@@ -22010,6 +22029,16 @@ sub_select(JOIN *join,JOIN_TAB *join_tab,bool end_of_records)
{
DBUG_ENTER("sub_select");
+ if (join_tab->split_derived_to_update && !end_of_records)
+ {
+ table_map tab_map= join_tab->split_derived_to_update;
+ for (uint i= 0; tab_map; i++, tab_map>>= 1)
+ {
+ if (tab_map & 1)
+ join->map2table[i]->preread_init_done= false;
+ }
+ }
+
if (join_tab->last_inner)
{
JOIN_TAB *last_inner_tab= join_tab->last_inner;
diff --git a/sql/sql_select.h b/sql/sql_select.h
index dfa7df5ce0c..b17f51cf1d8 100644
--- a/sql/sql_select.h
+++ b/sql/sql_select.h
@@ -405,6 +405,8 @@ typedef struct st_join_table {
*/
bool idx_cond_fact_out;
bool use_join_cache;
+ /* TRUE <=> it is prohibited to join this table using join buffer */
+ bool no_forced_join_cache;
uint used_join_cache_level;
ulong join_buffer_size_limit;
JOIN_CACHE *cache;
@@ -531,6 +533,16 @@ typedef struct st_join_table {
bool preread_init_done;
+ /* true <=> split optimization has been applied to this materialized table */
+ bool is_split_derived;
+
+ /*
+ Bitmap of split materialized derived tables that can be filled just before
+ this join table is to be joined. All parameters of the split derived tables
+ belong to tables preceding this join table.
+ */
+ table_map split_derived_to_update;
+
/*
Cost info to the range filter used when joining this join table
(Defined when the best join order has been already chosen)
@@ -693,9 +705,10 @@ typedef struct st_join_table {
void partial_cleanup();
void add_keyuses_for_splitting();
- SplM_plan_info *choose_best_splitting(double record_count,
- table_map remaining_tables);
- bool fix_splitting(SplM_plan_info *spl_plan, table_map remaining_tables,
+ SplM_plan_info *choose_best_splitting(uint idx,
+ table_map remaining_tables,
+ table_map *spl_pd_boundary);
+ bool fix_splitting(SplM_plan_info *spl_plan, table_map excluded_tables,
bool is_const_table);
} JOIN_TAB;
@@ -963,9 +976,21 @@ public:
*/
KEYUSE *key;
+ /* Cardinality of current partial join ending with this position */
+ double partial_join_cardinality;
+
/* Info on splitting plan used at this position */
SplM_plan_info *spl_plan;
+ /*
+ If spl_plan is NULL the value of spl_pd_boundary is 0. Otherwise
+ spl_pd_boundary contains the bitmap of the table from the current
+ partial join ending at this position that starts the sub-sequence of
+ tables S from which no conditions are allowed to be used in the plan
+ spl_plan for the split table joined at this position.
+ */
+ table_map spl_pd_boundary;
+
/* Cost info for the range filter used at this position */
Range_rowid_filter_cost_info *range_rowid_filter_info;