summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSergei Petrunia <sergey@mariadb.com>2023-05-03 13:49:32 +0300
committerOleksandr Byelkin <sanja@mariadb.com>2023-05-03 14:11:25 +0200
commited3e6f66a265952afded33fb134f6f8bcc31aa89 (patch)
treed2b9fc116cfe6e38803fde02e575efccad1529be
parentce7ffe61d836fe9f0cfc1087f058bc40d66e5cfb (diff)
downloadmariadb-git-ed3e6f66a265952afded33fb134f6f8bcc31aa89.tar.gz
MDEV-26301: Split optimization refills: Optimizer Trace coverage
Add Optimizer Trace printouts.
-rw-r--r--mysql-test/main/opt_trace.result114
-rw-r--r--mysql-test/main/opt_trace.test70
-rw-r--r--sql/opt_split.cc72
-rw-r--r--sql/sql_select.cc2
4 files changed, 245 insertions, 13 deletions
diff --git a/mysql-test/main/opt_trace.result b/mysql-test/main/opt_trace.result
index a7a8fb88e6d..7856f9248ba 100644
--- a/mysql-test/main/opt_trace.result
+++ b/mysql-test/main/opt_trace.result
@@ -450,6 +450,11 @@ select * from v2 {
]
},
{
+ "check_split_materialized": {
+ "not_applicable": "no candidate field can be accessed through ref"
+ }
+ },
+ {
"best_join_order": ["t1"]
},
{
@@ -773,6 +778,11 @@ explain select * from v1 {
]
},
{
+ "check_split_materialized": {
+ "not_applicable": "group list has no candidates"
+ }
+ },
+ {
"best_join_order": ["t1"]
},
{
@@ -8861,5 +8871,109 @@ SET optimizer_trace=DEFAULT;
DROP VIEW v;
DROP TABLE t;
#
+# MDEV-26301: Split optimization improvements: Optimizer Trace coverage
+#
+create table t1(a int, b int);
+insert into t1 select seq,seq from seq_1_to_5;
+create table t2(a int, b int, key(a));
+insert into t2
+select A.seq,B.seq from seq_1_to_25 A, seq_1_to_2 B;
+create table t3(a int, b int, key(a));
+insert into t3
+select A.seq,B.seq from seq_1_to_5 A, seq_1_to_3 B;
+analyze table t1,t2,t3 persistent for all;
+Table Op Msg_type Msg_text
+test.t1 analyze status Engine-independent statistics collected
+test.t1 analyze status OK
+test.t2 analyze status Engine-independent statistics collected
+test.t2 analyze status Table is already up to date
+test.t3 analyze status Engine-independent statistics collected
+test.t3 analyze status Table is already up to date
+create table t10 (
+grp_id int,
+col1 int,
+key(grp_id)
+);
+insert into t10
+select
+A.seq,
+B.seq
+from
+seq_1_to_100 A,
+seq_1_to_100 B;
+create table t11 (
+col1 int,
+col2 int
+);
+insert into t11
+select A.seq, A.seq from seq_1_to_10 A;
+analyze table t10,t11 persistent for all;
+Table Op Msg_type Msg_text
+test.t10 analyze status Engine-independent statistics collected
+test.t10 analyze status Table is already up to date
+test.t11 analyze status Engine-independent statistics collected
+test.t11 analyze status OK
+set optimizer_trace=1;
+explain
+select * from
+(
+(t1 left join t2 on t2.a=t1.b)
+left join t3 on t3.a=t1.b
+) left join (select grp_id, count(*)
+from t10 left join t11 on t11.col1=t10.col1
+group by grp_id) T on T.grp_id=t1.b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 PRIMARY t1 ALL NULL NULL NULL NULL 5
+1 PRIMARY t2 ref a a 5 test.t1.b 2 Using where
+1 PRIMARY t3 ref a a 5 test.t1.b 3 Using where
+1 PRIMARY <derived2> ref key0 key0 5 test.t1.b 10 Using where
+2 LATERAL DERIVED t10 ref grp_id grp_id 5 test.t1.b 100
+2 LATERAL DERIVED t11 ALL NULL NULL NULL NULL 10 Using where; Using join buffer (flat, BNL join)
+select json_detailed(json_extract(trace, '$**.check_split_materialized')) as JS
+from information_schema.optimizer_trace;
+JS
+[
+ {
+ "split_candidates":
+ ["t10.grp_id"]
+ }
+]
+select
+json_detailed(
+json_remove(
+json_extract(trace, '$**.choose_best_splitting')
+, '$[0].split_plan_search[0]'
+ )
+) as JS
+from information_schema.optimizer_trace;
+JS
+[
+ {
+ "considered_keys":
+ [
+ {
+ "table_name": "t10",
+ "index": "grp_id",
+ "rec_per_key": 100,
+ "param_tables": 1
+ }
+ ],
+ "refills": 5,
+ "spl_pd_boundary": 2,
+ "split_plan_search":
+ [],
+ "lead_table": "t10",
+ "index": "grp_id",
+ "parts": 1,
+ "split_sel": 0.001,
+ "cost": 2536,
+ "records": 100,
+ "refills": 5,
+ "chosen": true
+ }
+]
+drop table t1,t2,t3,t10,t11;
+set optimizer_trace=DEFAULT;
+#
# End of 10.4 tests
#
diff --git a/mysql-test/main/opt_trace.test b/mysql-test/main/opt_trace.test
index e0be5360069..2f1047df7c5 100644
--- a/mysql-test/main/opt_trace.test
+++ b/mysql-test/main/opt_trace.test
@@ -697,5 +697,75 @@ DROP VIEW v;
DROP TABLE t;
--echo #
+--echo # MDEV-26301: Split optimization improvements: Optimizer Trace coverage
+--echo #
+
+# 5 values
+create table t1(a int, b int);
+insert into t1 select seq,seq from seq_1_to_5;
+
+# 5 value groups of size 2 each
+create table t2(a int, b int, key(a));
+insert into t2
+select A.seq,B.seq from seq_1_to_25 A, seq_1_to_2 B;
+
+# 5 value groups of size 3 each
+create table t3(a int, b int, key(a));
+insert into t3
+select A.seq,B.seq from seq_1_to_5 A, seq_1_to_3 B;
+
+analyze table t1,t2,t3 persistent for all;
+
+create table t10 (
+ grp_id int,
+ col1 int,
+ key(grp_id)
+);
+
+# 100 groups of 100 values each
+insert into t10
+select
+ A.seq,
+ B.seq
+from
+ seq_1_to_100 A,
+ seq_1_to_100 B;
+
+# and X10 multiplier
+create table t11 (
+ col1 int,
+ col2 int
+);
+insert into t11
+select A.seq, A.seq from seq_1_to_10 A;
+
+analyze table t10,t11 persistent for all;
+
+set optimizer_trace=1;
+explain
+select * from
+ (
+ (t1 left join t2 on t2.a=t1.b)
+ left join t3 on t3.a=t1.b
+ ) left join (select grp_id, count(*)
+ from t10 left join t11 on t11.col1=t10.col1
+ group by grp_id) T on T.grp_id=t1.b;
+
+select json_detailed(json_extract(trace, '$**.check_split_materialized')) as JS
+from information_schema.optimizer_trace;
+
+select
+ json_detailed(
+ json_remove(
+ json_extract(trace, '$**.choose_best_splitting')
+ , '$[0].split_plan_search[0]'
+ )
+ ) as JS
+from information_schema.optimizer_trace;
+
+drop table t1,t2,t3,t10,t11;
+set optimizer_trace=DEFAULT;
+
+--echo #
--echo # End of 10.4 tests
--echo #
diff --git a/sql/opt_split.cc b/sql/opt_split.cc
index 3a873e220b6..bb3aec9ee8d 100644
--- a/sql/opt_split.cc
+++ b/sql/opt_split.cc
@@ -228,6 +228,7 @@
#include "mariadb.h"
#include "sql_select.h"
+#include "opt_trace.h"
/* Info on a splitting field */
struct SplM_field_info
@@ -387,6 +388,9 @@ bool JOIN::check_for_splittable_materialized()
if (!partition_list)
return false;
+ Json_writer_object trace_wrapper(thd);
+ Json_writer_object trace_split(thd, "check_split_materialized");
+
ORDER *ord;
Dynamic_array<SplM_field_ext_info> candidates;
@@ -432,7 +436,10 @@ bool JOIN::check_for_splittable_materialized()
}
}
if (candidates.elements() == 0) // no candidates satisfying (8.1) && (8.2)
+ {
+ trace_split.add("not_applicable", "group list has no candidates");
return false;
+ }
/*
For each table from this join find the keys that can be used for ref access
@@ -491,7 +498,11 @@ bool JOIN::check_for_splittable_materialized()
}
if (!spl_field_cnt) // No candidate field can be accessed by ref => !(9)
+ {
+ trace_split.add("not_applicable",
+ "no candidate field can be accessed through ref");
return false;
+ }
/*
Create a structure of the type SplM_opt_info and fill it with
@@ -509,16 +520,22 @@ bool JOIN::check_for_splittable_materialized()
spl_opt_info->tables_usable_for_splitting= 0;
spl_opt_info->spl_field_cnt= spl_field_cnt;
spl_opt_info->spl_fields= spl_field;
- for (cand= cand_start; cand < cand_end; cand++)
+
{
- if (!cand->is_usable_for_ref_access)
- continue;
- spl_field->producing_item= cand->producing_item;
- spl_field->underlying_field= cand->underlying_field;
- spl_field->mat_field= cand->mat_field;
- spl_opt_info->tables_usable_for_splitting|=
- cand->underlying_field->table->map;
- spl_field++;
+ Json_writer_array trace_range(thd, "split_candidates");
+ for (cand= cand_start; cand < cand_end; cand++)
+ {
+ if (!cand->is_usable_for_ref_access)
+ continue;
+ trace_range.add(cand->producing_item);
+
+ spl_field->producing_item= cand->producing_item;
+ spl_field->underlying_field= cand->underlying_field;
+ spl_field->mat_field= cand->mat_field;
+ spl_opt_info->tables_usable_for_splitting|=
+ cand->underlying_field->table->map;
+ spl_field++;
+ }
}
/* Attach this info to the table T */
@@ -773,7 +790,7 @@ void JOIN::add_keyuses_for_splitting()
bzero((char*) &keyuse_ext_end, sizeof(keyuse_ext_end));
if (ext_keyuses_for_splitting->push(keyuse_ext_end))
goto err;
-
+ // psergey-todo: trace anything here?
spl_opt_info->unsplit_card= join_record_count;
rec_len= table->s->rec_buff_length;
@@ -946,7 +963,8 @@ SplM_plan_info * JOIN_TAB::choose_best_splitting(uint idx,
uint best_key= 0;
uint best_key_parts= 0;
table_map best_param_tables;
-
+ Json_writer_object trace_obj(thd, "choose_best_splitting");
+ Json_writer_array trace_arr(thd, "considered_keys");
/*
Check whether there are keys that can be used to join T employing splitting
and if so, select the best out of such keys
@@ -999,6 +1017,13 @@ SplM_plan_info * JOIN_TAB::choose_best_splitting(uint idx,
best_rec_per_key= rec_per_key;
best_key_keyuse_ext_start= key_keyuse_ext_start;
best_param_tables= needed_in_prefix;
+ // trace table, key_name, parts, needed_tables.
+ Json_writer_object cur_index(thd);
+ cur_index.
+ add("table_name", best_table->alias.ptr()).
+ add("index", best_table->key_info[best_key].name).
+ add("rec_per_key", best_rec_per_key).
+ add("param_tables", best_param_tables);
}
keyuse_ext++;
}
@@ -1006,6 +1031,8 @@ SplM_plan_info * JOIN_TAB::choose_best_splitting(uint idx,
}
while (keyuse_ext->table == table);
}
+ trace_arr.end();
+
spl_opt_info->last_plan= 0;
double refills= DBL_MAX;
table_map excluded_tables= remaining_tables | this->join->sjm_lookup_tables;
@@ -1031,6 +1058,10 @@ SplM_plan_info * JOIN_TAB::choose_best_splitting(uint idx,
break;
}
}
+
+ trace_obj.add("refills", refills).
+ add("spl_pd_boundary", *spl_pd_boundary);
+
/*
The key for splitting was chosen, look for the plan for this key
in the cache
@@ -1042,11 +1073,13 @@ SplM_plan_info * JOIN_TAB::choose_best_splitting(uint idx,
The plan for the chosen key has not been found in the cache.
Build a new plan and save info on it in the cache
*/
+ Json_writer_array wrapper(thd, "split_plan_search");
table_map all_table_map= (((table_map) 1) << join->table_count) - 1;
reset_validity_vars_for_keyuses(best_key_keyuse_ext_start, best_table,
best_key, excluded_tables, true);
choose_plan(join, all_table_map & ~join->const_table_map);
+ wrapper.end();
/*
Check that the chosen plan is really a splitting plan.
If not or if there is not enough memory to save the plan in the cache
@@ -1064,6 +1097,7 @@ SplM_plan_info * JOIN_TAB::choose_best_splitting(uint idx,
{
reset_validity_vars_for_keyuses(best_key_keyuse_ext_start, best_table,
best_key, excluded_tables, false);
+ trace_obj.add("split_plan_discarded", "constructed unapplicable query plan");
return 0;
}
@@ -1089,9 +1123,20 @@ SplM_plan_info * JOIN_TAB::choose_best_splitting(uint idx,
reset_validity_vars_for_keyuses(best_key_keyuse_ext_start, best_table,
best_key, excluded_tables, false);
}
+ else
+ trace_obj.add("cached_plan_found", 1);
if (spl_plan)
{
+ trace_obj.
+ add("lead_table", spl_plan->table->alias.ptr()).
+ add("index", spl_plan->table->key_info[spl_plan->key].name).
+ add("parts", spl_plan->parts).
+ add("split_sel", spl_plan->split_sel).
+ add("cost", spl_plan->cost).
+ add("records", (ha_rows) (records * spl_plan->split_sel)).
+ add("refills", refills);
+
if (refills * spl_plan->cost < spl_opt_info->unsplit_cost)
{
/*
@@ -1100,7 +1145,10 @@ SplM_plan_info * JOIN_TAB::choose_best_splitting(uint idx,
*/
spl_opt_info->last_plan= spl_plan;
spl_opt_info->last_refills= refills;
+ trace_obj.add("chosen", true);
}
+ else
+ trace_obj.add("chosen", false);
}
}
@@ -1132,7 +1180,7 @@ SplM_plan_info * JOIN_TAB::choose_best_splitting(uint idx,
@details
This function injects equalities pushed into a derived table T for which
the split optimization has been chosen by the optimizer. The function
- is called by JOIN::inject_splitting_cond_for_all_tables_with_split_op().
+ is called by JOIN::inject_splitting_cond_for_all_tables_with_split_opt().
All equalities usable for splitting T whose right parts do not depend on
any of the 'excluded_tables' can be pushed into the where clause of the
derived table T.
diff --git a/sql/sql_select.cc b/sql/sql_select.cc
index bc5fd2be8b5..5e2ce06add7 100644
--- a/sql/sql_select.cc
+++ b/sql/sql_select.cc
@@ -7444,7 +7444,6 @@ best_access_path(JOIN *join,
DBUG_ENTER("best_access_path");
Json_writer_object trace_wrapper(thd, "best_access_path");
- Json_writer_array trace_paths(thd, "considered_access_paths");
bitmap_clear_all(eq_join_set);
@@ -7455,6 +7454,7 @@ best_access_path(JOIN *join,
remaining_tables,
&spl_pd_boundary);
+ Json_writer_array trace_paths(thd, "considered_access_paths");
if (s->keyuse)
{ /* Use key if possible */
KEYUSE *keyuse;