From ed3e6f66a265952afded33fb134f6f8bcc31aa89 Mon Sep 17 00:00:00 2001 From: Sergei Petrunia Date: Wed, 3 May 2023 13:49:32 +0300 Subject: MDEV-26301: Split optimization refills: Optimizer Trace coverage Add Optimizer Trace printouts. --- mysql-test/main/opt_trace.result | 114 +++++++++++++++++++++++++++++++++++++++ mysql-test/main/opt_trace.test | 70 ++++++++++++++++++++++++ sql/opt_split.cc | 72 ++++++++++++++++++++----- sql/sql_select.cc | 2 +- 4 files changed, 245 insertions(+), 13 deletions(-) diff --git a/mysql-test/main/opt_trace.result b/mysql-test/main/opt_trace.result index a7a8fb88e6d..7856f9248ba 100644 --- a/mysql-test/main/opt_trace.result +++ b/mysql-test/main/opt_trace.result @@ -449,6 +449,11 @@ select * from v2 { } ] }, + { + "check_split_materialized": { + "not_applicable": "no candidate field can be accessed through ref" + } + }, { "best_join_order": ["t1"] }, @@ -772,6 +777,11 @@ explain select * from v1 { } ] }, + { + "check_split_materialized": { + "not_applicable": "group list has no candidates" + } + }, { "best_join_order": ["t1"] }, @@ -8861,5 +8871,109 @@ SET optimizer_trace=DEFAULT; DROP VIEW v; DROP TABLE t; # +# MDEV-26301: Split optimization improvements: Optimizer Trace coverage +# +create table t1(a int, b int); +insert into t1 select seq,seq from seq_1_to_5; +create table t2(a int, b int, key(a)); +insert into t2 +select A.seq,B.seq from seq_1_to_25 A, seq_1_to_2 B; +create table t3(a int, b int, key(a)); +insert into t3 +select A.seq,B.seq from seq_1_to_5 A, seq_1_to_3 B; +analyze table t1,t2,t3 persistent for all; +Table Op Msg_type Msg_text +test.t1 analyze status Engine-independent statistics collected +test.t1 analyze status OK +test.t2 analyze status Engine-independent statistics collected +test.t2 analyze status Table is already up to date +test.t3 analyze status Engine-independent statistics collected +test.t3 analyze status Table is already up to date +create table t10 ( +grp_id int, +col1 int, +key(grp_id) +); +insert into t10 +select +A.seq, +B.seq +from +seq_1_to_100 A, +seq_1_to_100 B; +create table t11 ( +col1 int, +col2 int +); +insert into t11 +select A.seq, A.seq from seq_1_to_10 A; +analyze table t10,t11 persistent for all; +Table Op Msg_type Msg_text +test.t10 analyze status Engine-independent statistics collected +test.t10 analyze status Table is already up to date +test.t11 analyze status Engine-independent statistics collected +test.t11 analyze status OK +set optimizer_trace=1; +explain +select * from +( +(t1 left join t2 on t2.a=t1.b) +left join t3 on t3.a=t1.b +) left join (select grp_id, count(*) +from t10 left join t11 on t11.col1=t10.col1 +group by grp_id) T on T.grp_id=t1.b; +id select_type table type possible_keys key key_len ref rows Extra +1 PRIMARY t1 ALL NULL NULL NULL NULL 5 +1 PRIMARY t2 ref a a 5 test.t1.b 2 Using where +1 PRIMARY t3 ref a a 5 test.t1.b 3 Using where +1 PRIMARY ref key0 key0 5 test.t1.b 10 Using where +2 LATERAL DERIVED t10 ref grp_id grp_id 5 test.t1.b 100 +2 LATERAL DERIVED t11 ALL NULL NULL NULL NULL 10 Using where; Using join buffer (flat, BNL join) +select json_detailed(json_extract(trace, '$**.check_split_materialized')) as JS +from information_schema.optimizer_trace; +JS +[ + { + "split_candidates": + ["t10.grp_id"] + } +] +select +json_detailed( +json_remove( +json_extract(trace, '$**.choose_best_splitting') +, '$[0].split_plan_search[0]' + ) +) as JS +from information_schema.optimizer_trace; +JS +[ + { + "considered_keys": + [ + { + "table_name": "t10", + "index": "grp_id", + "rec_per_key": 100, + "param_tables": 1 + } + ], + "refills": 5, + "spl_pd_boundary": 2, + "split_plan_search": + [], + "lead_table": "t10", + "index": "grp_id", + "parts": 1, + "split_sel": 0.001, + "cost": 2536, + "records": 100, + "refills": 5, + "chosen": true + } +] +drop table t1,t2,t3,t10,t11; +set optimizer_trace=DEFAULT; +# # End of 10.4 tests # diff --git a/mysql-test/main/opt_trace.test b/mysql-test/main/opt_trace.test index e0be5360069..2f1047df7c5 100644 --- a/mysql-test/main/opt_trace.test +++ b/mysql-test/main/opt_trace.test @@ -696,6 +696,76 @@ SET optimizer_trace=DEFAULT; DROP VIEW v; DROP TABLE t; +--echo # +--echo # MDEV-26301: Split optimization improvements: Optimizer Trace coverage +--echo # + +# 5 values +create table t1(a int, b int); +insert into t1 select seq,seq from seq_1_to_5; + +# 5 value groups of size 2 each +create table t2(a int, b int, key(a)); +insert into t2 +select A.seq,B.seq from seq_1_to_25 A, seq_1_to_2 B; + +# 5 value groups of size 3 each +create table t3(a int, b int, key(a)); +insert into t3 +select A.seq,B.seq from seq_1_to_5 A, seq_1_to_3 B; + +analyze table t1,t2,t3 persistent for all; + +create table t10 ( + grp_id int, + col1 int, + key(grp_id) +); + +# 100 groups of 100 values each +insert into t10 +select + A.seq, + B.seq +from + seq_1_to_100 A, + seq_1_to_100 B; + +# and X10 multiplier +create table t11 ( + col1 int, + col2 int +); +insert into t11 +select A.seq, A.seq from seq_1_to_10 A; + +analyze table t10,t11 persistent for all; + +set optimizer_trace=1; +explain +select * from + ( + (t1 left join t2 on t2.a=t1.b) + left join t3 on t3.a=t1.b + ) left join (select grp_id, count(*) + from t10 left join t11 on t11.col1=t10.col1 + group by grp_id) T on T.grp_id=t1.b; + +select json_detailed(json_extract(trace, '$**.check_split_materialized')) as JS +from information_schema.optimizer_trace; + +select + json_detailed( + json_remove( + json_extract(trace, '$**.choose_best_splitting') + , '$[0].split_plan_search[0]' + ) + ) as JS +from information_schema.optimizer_trace; + +drop table t1,t2,t3,t10,t11; +set optimizer_trace=DEFAULT; + --echo # --echo # End of 10.4 tests --echo # diff --git a/sql/opt_split.cc b/sql/opt_split.cc index 3a873e220b6..bb3aec9ee8d 100644 --- a/sql/opt_split.cc +++ b/sql/opt_split.cc @@ -228,6 +228,7 @@ #include "mariadb.h" #include "sql_select.h" +#include "opt_trace.h" /* Info on a splitting field */ struct SplM_field_info @@ -387,6 +388,9 @@ bool JOIN::check_for_splittable_materialized() if (!partition_list) return false; + Json_writer_object trace_wrapper(thd); + Json_writer_object trace_split(thd, "check_split_materialized"); + ORDER *ord; Dynamic_array candidates; @@ -432,7 +436,10 @@ bool JOIN::check_for_splittable_materialized() } } if (candidates.elements() == 0) // no candidates satisfying (8.1) && (8.2) + { + trace_split.add("not_applicable", "group list has no candidates"); return false; + } /* For each table from this join find the keys that can be used for ref access @@ -491,7 +498,11 @@ bool JOIN::check_for_splittable_materialized() } if (!spl_field_cnt) // No candidate field can be accessed by ref => !(9) + { + trace_split.add("not_applicable", + "no candidate field can be accessed through ref"); return false; + } /* Create a structure of the type SplM_opt_info and fill it with @@ -509,16 +520,22 @@ bool JOIN::check_for_splittable_materialized() spl_opt_info->tables_usable_for_splitting= 0; spl_opt_info->spl_field_cnt= spl_field_cnt; spl_opt_info->spl_fields= spl_field; - for (cand= cand_start; cand < cand_end; cand++) + { - if (!cand->is_usable_for_ref_access) - continue; - spl_field->producing_item= cand->producing_item; - spl_field->underlying_field= cand->underlying_field; - spl_field->mat_field= cand->mat_field; - spl_opt_info->tables_usable_for_splitting|= - cand->underlying_field->table->map; - spl_field++; + Json_writer_array trace_range(thd, "split_candidates"); + for (cand= cand_start; cand < cand_end; cand++) + { + if (!cand->is_usable_for_ref_access) + continue; + trace_range.add(cand->producing_item); + + spl_field->producing_item= cand->producing_item; + spl_field->underlying_field= cand->underlying_field; + spl_field->mat_field= cand->mat_field; + spl_opt_info->tables_usable_for_splitting|= + cand->underlying_field->table->map; + spl_field++; + } } /* Attach this info to the table T */ @@ -773,7 +790,7 @@ void JOIN::add_keyuses_for_splitting() bzero((char*) &keyuse_ext_end, sizeof(keyuse_ext_end)); if (ext_keyuses_for_splitting->push(keyuse_ext_end)) goto err; - + // psergey-todo: trace anything here? spl_opt_info->unsplit_card= join_record_count; rec_len= table->s->rec_buff_length; @@ -946,7 +963,8 @@ SplM_plan_info * JOIN_TAB::choose_best_splitting(uint idx, uint best_key= 0; uint best_key_parts= 0; table_map best_param_tables; - + Json_writer_object trace_obj(thd, "choose_best_splitting"); + Json_writer_array trace_arr(thd, "considered_keys"); /* Check whether there are keys that can be used to join T employing splitting and if so, select the best out of such keys @@ -999,6 +1017,13 @@ SplM_plan_info * JOIN_TAB::choose_best_splitting(uint idx, best_rec_per_key= rec_per_key; best_key_keyuse_ext_start= key_keyuse_ext_start; best_param_tables= needed_in_prefix; + // trace table, key_name, parts, needed_tables. + Json_writer_object cur_index(thd); + cur_index. + add("table_name", best_table->alias.ptr()). + add("index", best_table->key_info[best_key].name). + add("rec_per_key", best_rec_per_key). + add("param_tables", best_param_tables); } keyuse_ext++; } @@ -1006,6 +1031,8 @@ SplM_plan_info * JOIN_TAB::choose_best_splitting(uint idx, } while (keyuse_ext->table == table); } + trace_arr.end(); + spl_opt_info->last_plan= 0; double refills= DBL_MAX; table_map excluded_tables= remaining_tables | this->join->sjm_lookup_tables; @@ -1031,6 +1058,10 @@ SplM_plan_info * JOIN_TAB::choose_best_splitting(uint idx, break; } } + + trace_obj.add("refills", refills). + add("spl_pd_boundary", *spl_pd_boundary); + /* The key for splitting was chosen, look for the plan for this key in the cache @@ -1042,11 +1073,13 @@ SplM_plan_info * JOIN_TAB::choose_best_splitting(uint idx, The plan for the chosen key has not been found in the cache. Build a new plan and save info on it in the cache */ + Json_writer_array wrapper(thd, "split_plan_search"); table_map all_table_map= (((table_map) 1) << join->table_count) - 1; reset_validity_vars_for_keyuses(best_key_keyuse_ext_start, best_table, best_key, excluded_tables, true); choose_plan(join, all_table_map & ~join->const_table_map); + wrapper.end(); /* Check that the chosen plan is really a splitting plan. If not or if there is not enough memory to save the plan in the cache @@ -1064,6 +1097,7 @@ SplM_plan_info * JOIN_TAB::choose_best_splitting(uint idx, { reset_validity_vars_for_keyuses(best_key_keyuse_ext_start, best_table, best_key, excluded_tables, false); + trace_obj.add("split_plan_discarded", "constructed unapplicable query plan"); return 0; } @@ -1089,9 +1123,20 @@ SplM_plan_info * JOIN_TAB::choose_best_splitting(uint idx, reset_validity_vars_for_keyuses(best_key_keyuse_ext_start, best_table, best_key, excluded_tables, false); } + else + trace_obj.add("cached_plan_found", 1); if (spl_plan) { + trace_obj. + add("lead_table", spl_plan->table->alias.ptr()). + add("index", spl_plan->table->key_info[spl_plan->key].name). + add("parts", spl_plan->parts). + add("split_sel", spl_plan->split_sel). + add("cost", spl_plan->cost). + add("records", (ha_rows) (records * spl_plan->split_sel)). + add("refills", refills); + if (refills * spl_plan->cost < spl_opt_info->unsplit_cost) { /* @@ -1100,7 +1145,10 @@ SplM_plan_info * JOIN_TAB::choose_best_splitting(uint idx, */ spl_opt_info->last_plan= spl_plan; spl_opt_info->last_refills= refills; + trace_obj.add("chosen", true); } + else + trace_obj.add("chosen", false); } } @@ -1132,7 +1180,7 @@ SplM_plan_info * JOIN_TAB::choose_best_splitting(uint idx, @details This function injects equalities pushed into a derived table T for which the split optimization has been chosen by the optimizer. The function - is called by JOIN::inject_splitting_cond_for_all_tables_with_split_op(). + is called by JOIN::inject_splitting_cond_for_all_tables_with_split_opt(). All equalities usable for splitting T whose right parts do not depend on any of the 'excluded_tables' can be pushed into the where clause of the derived table T. diff --git a/sql/sql_select.cc b/sql/sql_select.cc index bc5fd2be8b5..5e2ce06add7 100644 --- a/sql/sql_select.cc +++ b/sql/sql_select.cc @@ -7444,7 +7444,6 @@ best_access_path(JOIN *join, DBUG_ENTER("best_access_path"); Json_writer_object trace_wrapper(thd, "best_access_path"); - Json_writer_array trace_paths(thd, "considered_access_paths"); bitmap_clear_all(eq_join_set); @@ -7455,6 +7454,7 @@ best_access_path(JOIN *join, remaining_tables, &spl_pd_boundary); + Json_writer_array trace_paths(thd, "considered_access_paths"); if (s->keyuse) { /* Use key if possible */ KEYUSE *keyuse; -- cgit v1.2.1