diff options
author | Sergei Petrunia <sergey@mariadb.com> | 2022-07-19 14:13:17 +0300 |
---|---|---|
committer | Sergei Petrunia <sergey@mariadb.com> | 2022-07-19 15:19:54 +0300 |
commit | ac3b3807cc9b21e23e65b1a70c7dcda1f45a3c57 (patch) | |
tree | 7ee8b4cb3c71d3bb5c0f318cc62c9ce4e735bf41 | |
parent | ec4e5900d0ae36fe2bc639d8ce3174b951cfb689 (diff) | |
download | mariadb-git-bb-10.10-mdev28929-v4.tar.gz |
MDEV-28929: Plan selection takes forever with MDEV-28852 ...bb-10.10-spetrunia2bb-10.10-mdev28929-v4
Part #2: Extend heuristic pruning to use multiple tables as the
"Model tables".
Before the patch, heuristic pruning uses only one "Model table":
The table which had the best cost AND record became the "Model table".
After that, if a table's cost and record were both worse than
those of the Model Table, the table would be pruned away.
This didn't work well when the first table (the optimizer sorts them
by record_count) had low record_count but relatively high cost: nothing
could be pruned afterwards.
The patch adds the two additional "Model tables": one with the least
cost and the other with the least record_count.
(In both cases, a table can be pruned away if BOTH its cost and
record_count are worse than those of a Model table)
The new pruning is active when the number of tables to consider for
the prefix is higher than @@optimizer_extra_pruning_depth.
One can see the new pruning in the Optimizer Trace as
- "pruned_by_heuristic":"min_record_count", or
- "pruned_by_heuristic":"min_read_time".
Old heuristic pruning shows as "pruned_by_heuristic":1.
-rw-r--r-- | mysql-test/main/mysqld--help.result | 5 | ||||
-rw-r--r-- | mysql-test/main/opt_trace.result | 162 | ||||
-rw-r--r-- | sql/sql_class.h | 1 | ||||
-rw-r--r-- | sql/sql_select.cc | 88 | ||||
-rw-r--r-- | sql/sql_select.h | 7 | ||||
-rw-r--r-- | sql/sys_vars.cc | 7 |
6 files changed, 92 insertions, 178 deletions
diff --git a/mysql-test/main/mysqld--help.result b/mysql-test/main/mysqld--help.result index 9b4084600be..58c371064ba 100644 --- a/mysql-test/main/mysqld--help.result +++ b/mysql-test/main/mysqld--help.result @@ -699,6 +699,10 @@ The following specify which files/extra groups are read (specified before remain max_connections*5 or max_connections + table_cache*2 (whichever is larger) number of file descriptors (Automatically configured unless set explicitly) + --optimizer-extra-pruning-depth=# + If the optimizer needs to enumerate join prefix of this + size or larger, then it will try agressively prune away + the search space. --optimizer-max-sel-arg-weight=# The maximum weight of the SEL_ARG graph. Set to 0 for no limit @@ -1662,6 +1666,7 @@ old-alter-table DEFAULT old-mode UTF8_IS_UTF8MB3 old-passwords FALSE old-style-user-limits FALSE +optimizer-extra-pruning-depth 8 optimizer-max-sel-arg-weight 32000 optimizer-prune-level 2 optimizer-search-depth 62 diff --git a/mysql-test/main/opt_trace.result b/mysql-test/main/opt_trace.result index b7d450d22d8..da6c1f320e9 100644 --- a/mysql-test/main/opt_trace.result +++ b/mysql-test/main/opt_trace.result @@ -6035,86 +6035,7 @@ t_outer_2.a in (select t_inner_3.a from t2 t_inner_3, t1 t_inner_4) { "rows_for_plan": 729, "cost_for_plan": 176.0410156, "semijoin_strategy_choice": [], - "rest_of_plan": [ - { - "plan_prefix": [ - "t_outer_1", - "t_outer_2", - "t_inner_1", - "t_inner_3" - ], - "get_costs_for_tables": [ - { - "best_access_path": { - "table": "t_inner_4", - "considered_access_paths": [ - { - "access_type": "scan", - "resulting_rows": 3, - "cost": 2.005126953, - "chosen": true - } - ], - "chosen_access_method": { - "type": "scan", - "records": 3, - "cost": 2.005126953, - "uses_join_buffering": true - } - } - }, - { - "best_access_path": { - "table": "t_inner_2", - "considered_access_paths": [ - { - "access_type": "scan", - "resulting_rows": 9, - "cost": 2.015380859, - "chosen": true - } - ], - "chosen_access_method": { - "type": "scan", - "records": 9, - "cost": 2.015380859, - "uses_join_buffering": true - } - } - } - ] - }, - { - "plan_prefix": [ - "t_outer_1", - "t_outer_2", - "t_inner_1", - "t_inner_3" - ], - "table": "t_inner_2", - "rows_for_plan": 6561, - "cost_for_plan": 1490.256396, - "semijoin_strategy_choice": [], - "pruned_by_cost": true, - "current_cost": 1490.256396, - "best_cost": 568.8615234 - }, - { - "plan_prefix": [ - "t_outer_1", - "t_outer_2", - "t_inner_1", - "t_inner_3" - ], - "table": "t_inner_4", - "rows_for_plan": 2187, - "cost_for_plan": 615.4461426, - "semijoin_strategy_choice": [], - "pruned_by_cost": true, - "current_cost": 615.4461426, - "best_cost": 568.8615234 - } - ] + "pruned_by_heuristic": "min_read_time" } ] }, @@ -6596,86 +6517,7 @@ t_outer_2.a in (select t_inner_3.a from t2 t_inner_3, t1 t_inner_4) { "rows_for_plan": 729, "cost_for_plan": 172.4410156, "semijoin_strategy_choice": [], - "rest_of_plan": [ - { - "plan_prefix": [ - "t_outer_1", - "t_inner_1", - "t_outer_2", - "t_inner_3" - ], - "get_costs_for_tables": [ - { - "best_access_path": { - "table": "t_inner_4", - "considered_access_paths": [ - { - "access_type": "scan", - "resulting_rows": 3, - "cost": 2.005126953, - "chosen": true - } - ], - "chosen_access_method": { - "type": "scan", - "records": 3, - "cost": 2.005126953, - "uses_join_buffering": true - } - } - }, - { - "best_access_path": { - "table": "t_inner_2", - "considered_access_paths": [ - { - "access_type": "scan", - "resulting_rows": 9, - "cost": 2.015380859, - "chosen": true - } - ], - "chosen_access_method": { - "type": "scan", - "records": 9, - "cost": 2.015380859, - "uses_join_buffering": true - } - } - } - ] - }, - { - "plan_prefix": [ - "t_outer_1", - "t_inner_1", - "t_outer_2", - "t_inner_3" - ], - "table": "t_inner_2", - "rows_for_plan": 6561, - "cost_for_plan": 1486.656396, - "semijoin_strategy_choice": [], - "pruned_by_cost": true, - "current_cost": 1486.656396, - "best_cost": 565.2615234 - }, - { - "plan_prefix": [ - "t_outer_1", - "t_inner_1", - "t_outer_2", - "t_inner_3" - ], - "table": "t_inner_4", - "rows_for_plan": 2187, - "cost_for_plan": 611.8461426, - "semijoin_strategy_choice": [], - "pruned_by_cost": true, - "current_cost": 611.8461426, - "best_cost": 565.2615234 - } - ] + "pruned_by_heuristic": "min_read_time" } ] }, diff --git a/sql/sql_class.h b/sql/sql_class.h index e468e628e61..f95b95dc9a1 100644 --- a/sql/sql_class.h +++ b/sql/sql_class.h @@ -720,6 +720,7 @@ typedef struct system_variables ulong net_retry_count; ulong net_wait_timeout; ulong net_write_timeout; + ulong optimizer_extra_pruning_depth; ulong optimizer_prune_level; ulong optimizer_search_depth; ulong optimizer_selectivity_sampling_limit; diff --git a/sql/sql_select.cc b/sql/sql_select.cc index 1fe24e2eb1c..3db50da3009 100644 --- a/sql/sql_select.cc +++ b/sql/sql_select.cc @@ -109,8 +109,7 @@ static ha_rows get_quick_record_count(THD *thd, SQL_SELECT *select, const key_map *keys,ha_rows limit); static void optimize_straight_join(JOIN *join, table_map join_tables); static bool greedy_search(JOIN *join, table_map remaining_tables, - uint depth, uint prune_level, - uint use_cond_selectivity); + uint depth, uint use_cond_selectivity); enum enum_best_search { SEARCH_ABORT= -2, @@ -124,7 +123,6 @@ best_extension_by_limited_search(JOIN *join, table_map remaining_tables, uint idx, double record_count, double read_time, uint depth, - uint prune_level, uint use_cond_selectivity, table_map *processed_eq_ref_tables); static uint determine_search_depth(JOIN* join); @@ -8778,7 +8776,6 @@ bool choose_plan(JOIN *join, table_map join_tables) { uint search_depth= join->thd->variables.optimizer_search_depth; - uint prune_level= join->thd->variables.optimizer_prune_level; uint use_cond_selectivity= join->thd->variables.optimizer_use_condition_selectivity; bool straight_join= MY_TEST(join->select_options & SELECT_STRAIGHT_JOIN); @@ -8786,6 +8783,9 @@ choose_plan(JOIN *join, table_map join_tables) DBUG_ENTER("choose_plan"); join->cur_embedding_map= 0; + join->extra_heuristic_pruning= false; + join->prune_level= join->thd->variables.optimizer_prune_level; + reset_nj_counters(join, join->join_list); qsort2_cmp jtab_sort_func; @@ -8842,8 +8842,14 @@ choose_plan(JOIN *join, table_map join_tables) if (search_depth == 0) /* Automatically determine a reasonable value for 'search_depth' */ search_depth= determine_search_depth(join); - if (greedy_search(join, join_tables, search_depth, prune_level, - use_cond_selectivity)) + + if (join->prune_level >= 1 && + search_depth >= thd->variables.optimizer_extra_pruning_depth) + { + join->extra_heuristic_pruning= true; + } + + if (greedy_search(join, join_tables, search_depth, use_cond_selectivity)) DBUG_RETURN(TRUE); } @@ -9231,8 +9237,6 @@ optimize_straight_join(JOIN *join, table_map remaining_tables) for the query @param remaining_tables set of tables not included into the partial plan yet @param search_depth controlls the exhaustiveness of the search - @param prune_level the pruning heuristics that should be applied during - search @param use_cond_selectivity specifies how the selectivity of the conditions pushed to a table should be taken into account @@ -9246,7 +9250,6 @@ static bool greedy_search(JOIN *join, table_map remaining_tables, uint search_depth, - uint prune_level, uint use_cond_selectivity) { double record_count= 1.0; @@ -9280,7 +9283,6 @@ greedy_search(JOIN *join, if ((int) best_extension_by_limited_search(join, remaining_tables, idx, record_count, read_time, search_depth, - prune_level, use_cond_selectivity, &eq_ref_tables) < (int) SEARCH_OK) @@ -10159,8 +10161,7 @@ get_costs_for_tables(JOIN *join, table_map remaining_tables, uint idx, When 'best_extension_by_limited_search' is called for the first time, 'join->best_read' must be set to the largest possible value (e.g. DBL_MAX). The actual implementation provides a way to optionally use pruning - heuristic (controlled by the parameter 'prune_level') to reduce the search - space by skipping some partial plans. + heuristic to reduce the search space by skipping some partial plans. @note The parameter 'search_depth' provides control over the recursion @@ -10179,8 +10180,6 @@ get_costs_for_tables(JOIN *join, table_map remaining_tables, uint idx, @param search_depth maximum depth of the recursion and thus size of the found optimal plan (0 < search_depth <= join->tables+1). - @param prune_level pruning heuristics that should be applied during - optimization (values: 0 = EXHAUSTIVE, 1 = PRUNE_BY_TIME_OR_ROWS) @param use_cond_selectivity specifies how the selectivity of the conditions pushed to a table should be taken into account @@ -10203,7 +10202,6 @@ best_extension_by_limited_search(JOIN *join, double record_count, double read_time, uint search_depth, - uint prune_level, uint use_cond_selectivity, table_map *processed_eq_ref_tables) { @@ -10270,7 +10268,7 @@ best_extension_by_limited_search(JOIN *join, Json_writer_array arr(thd, "get_costs_for_tables"); - if (idx > join->const_tables && prune_level >= 2 && + if (idx > join->const_tables && join->prune_level >= 2 && join->positions[idx-1].type == JT_EQ_REF && (join->eq_ref_tables & allowed_current_tables)) { @@ -10312,6 +10310,12 @@ best_extension_by_limited_search(JOIN *join, join->sort_positions + join->sort_space); accepted_tables= 0; + double min_rec_count= DBL_MAX; + double min_rec_count_read_time= DBL_MAX; + + double min_cost= DBL_MAX; + double min_cost_record_count= DBL_MAX; + for (SORT_POSITION *pos= sort ; pos < sort_end ; pos++) { s= *pos->join_tab; @@ -10375,8 +10379,31 @@ best_extension_by_limited_search(JOIN *join, Prune some less promising partial plans. This heuristic may miss the optimal QEPs, thus it results in a non-exhaustive search. */ - if (prune_level >= 1) + if (join->prune_level >= 1) { + // Collect the members with min_cost and min_read_time. + bool min_rec_hit= false; + bool min_cost_hit= false; + + if (join->extra_heuristic_pruning && + (!(position->key_dependent & allowed_tables) || + position->records_read < 2.0)) + { + if (current_record_count < min_rec_count) + { + min_rec_count= current_record_count; + min_rec_count_read_time= current_read_time; + min_rec_hit= true; + } + + if (current_read_time < min_cost) + { + min_cost_record_count= current_record_count; + min_cost= current_read_time; + min_cost_hit= true; + } + } + if (best_record_count > current_record_count || best_read_time > current_read_time || (idx == join->const_tables && // 's' is the first table in the QEP @@ -10401,6 +10428,13 @@ best_extension_by_limited_search(JOIN *join, } else { + /* + Typically, we get here if: + best_record_count < current_record_count && + best_read_time < current_read_time + That is, both record_count and read_time are worse than the best_ + ones. This plan doesn't look promising, prune it away. + */ DBUG_EXECUTE("opt", print_plan(join, idx+1, current_record_count, read_time, @@ -10411,6 +10445,25 @@ best_extension_by_limited_search(JOIN *join, restore_prev_sj_state(remaining_tables, s, idx); continue; } + + const char* prune_reason= NULL; + if (!min_rec_hit && + current_record_count >= min_rec_count && + current_read_time >= min_rec_count_read_time) + prune_reason= "min_record_count"; + + if (!min_cost_hit && + current_record_count >= min_cost_record_count && + current_read_time >= min_cost) + prune_reason= "min_read_time"; + + if (prune_reason) + { + trace_one_table.add("pruned_by_heuristic", prune_reason); + restore_prev_nj_state(s); + restore_prev_sj_state(remaining_tables, s, idx); + continue; + } } double pushdown_cond_selectivity= 1.0; @@ -10448,7 +10501,6 @@ best_extension_by_limited_search(JOIN *join, partial_join_cardinality, current_read_time, search_depth - 1, - prune_level, use_cond_selectivity, &found_eq_ref_tables); swap_variables(JOIN_TAB*, join->best_ref[idx], *pos->join_tab); diff --git a/sql/sql_select.h b/sql/sql_select.h index 46a359c746f..e7c8b3527f6 100644 --- a/sql/sql_select.h +++ b/sql/sql_select.h @@ -1329,6 +1329,13 @@ public: */ table_map cur_sj_inner_tables; + /* A copy of thd->variables.optimizer_prune_level */ + uint prune_level; + /* + If true, do extra heuristic pruning (enabled based on + optimizer_extra_pruning_depth) + */ + bool extra_heuristic_pruning; #ifndef DBUG_OFF void dbug_verify_sj_inner_tables(uint n_positions) const; #endif diff --git a/sql/sys_vars.cc b/sql/sys_vars.cc index 43b6f83d568..fc9b4262394 100644 --- a/sql/sys_vars.cc +++ b/sql/sys_vars.cc @@ -2781,6 +2781,13 @@ static Sys_var_ulong Sys_optimizer_search_depth( SESSION_VAR(optimizer_search_depth), CMD_LINE(REQUIRED_ARG), VALID_RANGE(0, MAX_TABLES+1), DEFAULT(MAX_TABLES+1), BLOCK_SIZE(1)); +static Sys_var_ulong Sys_optimizer_extra_pruning_depth( + "optimizer_extra_pruning_depth", + "If the optimizer needs to enumerate join prefix of this size or " + "larger, then it will try agressively prune away the search space.", + SESSION_VAR(optimizer_extra_pruning_depth), CMD_LINE(REQUIRED_ARG), + VALID_RANGE(0, MAX_TABLES+1), DEFAULT(8), BLOCK_SIZE(1)); + /* this is used in the sigsegv handler */ export const char *optimizer_switch_names[]= { |