summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSergei Petrunia <sergey@mariadb.com>2022-07-19 14:13:17 +0300
committerSergei Petrunia <sergey@mariadb.com>2022-07-19 15:19:54 +0300
commitac3b3807cc9b21e23e65b1a70c7dcda1f45a3c57 (patch)
tree7ee8b4cb3c71d3bb5c0f318cc62c9ce4e735bf41
parentec4e5900d0ae36fe2bc639d8ce3174b951cfb689 (diff)
downloadmariadb-git-bb-10.10-mdev28929-v4.tar.gz
MDEV-28929: Plan selection takes forever with MDEV-28852 ...bb-10.10-spetrunia2bb-10.10-mdev28929-v4
Part #2: Extend heuristic pruning to use multiple tables as the "Model tables". Before the patch, heuristic pruning uses only one "Model table": The table which had the best cost AND record became the "Model table". After that, if a table's cost and record were both worse than those of the Model Table, the table would be pruned away. This didn't work well when the first table (the optimizer sorts them by record_count) had low record_count but relatively high cost: nothing could be pruned afterwards. The patch adds the two additional "Model tables": one with the least cost and the other with the least record_count. (In both cases, a table can be pruned away if BOTH its cost and record_count are worse than those of a Model table) The new pruning is active when the number of tables to consider for the prefix is higher than @@optimizer_extra_pruning_depth. One can see the new pruning in the Optimizer Trace as - "pruned_by_heuristic":"min_record_count", or - "pruned_by_heuristic":"min_read_time". Old heuristic pruning shows as "pruned_by_heuristic":1.
-rw-r--r--mysql-test/main/mysqld--help.result5
-rw-r--r--mysql-test/main/opt_trace.result162
-rw-r--r--sql/sql_class.h1
-rw-r--r--sql/sql_select.cc88
-rw-r--r--sql/sql_select.h7
-rw-r--r--sql/sys_vars.cc7
6 files changed, 92 insertions, 178 deletions
diff --git a/mysql-test/main/mysqld--help.result b/mysql-test/main/mysqld--help.result
index 9b4084600be..58c371064ba 100644
--- a/mysql-test/main/mysqld--help.result
+++ b/mysql-test/main/mysqld--help.result
@@ -699,6 +699,10 @@ The following specify which files/extra groups are read (specified before remain
max_connections*5 or max_connections + table_cache*2
(whichever is larger) number of file descriptors
(Automatically configured unless set explicitly)
+ --optimizer-extra-pruning-depth=#
+ If the optimizer needs to enumerate join prefix of this
+ size or larger, then it will try agressively prune away
+ the search space.
--optimizer-max-sel-arg-weight=#
The maximum weight of the SEL_ARG graph. Set to 0 for no
limit
@@ -1662,6 +1666,7 @@ old-alter-table DEFAULT
old-mode UTF8_IS_UTF8MB3
old-passwords FALSE
old-style-user-limits FALSE
+optimizer-extra-pruning-depth 8
optimizer-max-sel-arg-weight 32000
optimizer-prune-level 2
optimizer-search-depth 62
diff --git a/mysql-test/main/opt_trace.result b/mysql-test/main/opt_trace.result
index b7d450d22d8..da6c1f320e9 100644
--- a/mysql-test/main/opt_trace.result
+++ b/mysql-test/main/opt_trace.result
@@ -6035,86 +6035,7 @@ t_outer_2.a in (select t_inner_3.a from t2 t_inner_3, t1 t_inner_4) {
"rows_for_plan": 729,
"cost_for_plan": 176.0410156,
"semijoin_strategy_choice": [],
- "rest_of_plan": [
- {
- "plan_prefix": [
- "t_outer_1",
- "t_outer_2",
- "t_inner_1",
- "t_inner_3"
- ],
- "get_costs_for_tables": [
- {
- "best_access_path": {
- "table": "t_inner_4",
- "considered_access_paths": [
- {
- "access_type": "scan",
- "resulting_rows": 3,
- "cost": 2.005126953,
- "chosen": true
- }
- ],
- "chosen_access_method": {
- "type": "scan",
- "records": 3,
- "cost": 2.005126953,
- "uses_join_buffering": true
- }
- }
- },
- {
- "best_access_path": {
- "table": "t_inner_2",
- "considered_access_paths": [
- {
- "access_type": "scan",
- "resulting_rows": 9,
- "cost": 2.015380859,
- "chosen": true
- }
- ],
- "chosen_access_method": {
- "type": "scan",
- "records": 9,
- "cost": 2.015380859,
- "uses_join_buffering": true
- }
- }
- }
- ]
- },
- {
- "plan_prefix": [
- "t_outer_1",
- "t_outer_2",
- "t_inner_1",
- "t_inner_3"
- ],
- "table": "t_inner_2",
- "rows_for_plan": 6561,
- "cost_for_plan": 1490.256396,
- "semijoin_strategy_choice": [],
- "pruned_by_cost": true,
- "current_cost": 1490.256396,
- "best_cost": 568.8615234
- },
- {
- "plan_prefix": [
- "t_outer_1",
- "t_outer_2",
- "t_inner_1",
- "t_inner_3"
- ],
- "table": "t_inner_4",
- "rows_for_plan": 2187,
- "cost_for_plan": 615.4461426,
- "semijoin_strategy_choice": [],
- "pruned_by_cost": true,
- "current_cost": 615.4461426,
- "best_cost": 568.8615234
- }
- ]
+ "pruned_by_heuristic": "min_read_time"
}
]
},
@@ -6596,86 +6517,7 @@ t_outer_2.a in (select t_inner_3.a from t2 t_inner_3, t1 t_inner_4) {
"rows_for_plan": 729,
"cost_for_plan": 172.4410156,
"semijoin_strategy_choice": [],
- "rest_of_plan": [
- {
- "plan_prefix": [
- "t_outer_1",
- "t_inner_1",
- "t_outer_2",
- "t_inner_3"
- ],
- "get_costs_for_tables": [
- {
- "best_access_path": {
- "table": "t_inner_4",
- "considered_access_paths": [
- {
- "access_type": "scan",
- "resulting_rows": 3,
- "cost": 2.005126953,
- "chosen": true
- }
- ],
- "chosen_access_method": {
- "type": "scan",
- "records": 3,
- "cost": 2.005126953,
- "uses_join_buffering": true
- }
- }
- },
- {
- "best_access_path": {
- "table": "t_inner_2",
- "considered_access_paths": [
- {
- "access_type": "scan",
- "resulting_rows": 9,
- "cost": 2.015380859,
- "chosen": true
- }
- ],
- "chosen_access_method": {
- "type": "scan",
- "records": 9,
- "cost": 2.015380859,
- "uses_join_buffering": true
- }
- }
- }
- ]
- },
- {
- "plan_prefix": [
- "t_outer_1",
- "t_inner_1",
- "t_outer_2",
- "t_inner_3"
- ],
- "table": "t_inner_2",
- "rows_for_plan": 6561,
- "cost_for_plan": 1486.656396,
- "semijoin_strategy_choice": [],
- "pruned_by_cost": true,
- "current_cost": 1486.656396,
- "best_cost": 565.2615234
- },
- {
- "plan_prefix": [
- "t_outer_1",
- "t_inner_1",
- "t_outer_2",
- "t_inner_3"
- ],
- "table": "t_inner_4",
- "rows_for_plan": 2187,
- "cost_for_plan": 611.8461426,
- "semijoin_strategy_choice": [],
- "pruned_by_cost": true,
- "current_cost": 611.8461426,
- "best_cost": 565.2615234
- }
- ]
+ "pruned_by_heuristic": "min_read_time"
}
]
},
diff --git a/sql/sql_class.h b/sql/sql_class.h
index e468e628e61..f95b95dc9a1 100644
--- a/sql/sql_class.h
+++ b/sql/sql_class.h
@@ -720,6 +720,7 @@ typedef struct system_variables
ulong net_retry_count;
ulong net_wait_timeout;
ulong net_write_timeout;
+ ulong optimizer_extra_pruning_depth;
ulong optimizer_prune_level;
ulong optimizer_search_depth;
ulong optimizer_selectivity_sampling_limit;
diff --git a/sql/sql_select.cc b/sql/sql_select.cc
index 1fe24e2eb1c..3db50da3009 100644
--- a/sql/sql_select.cc
+++ b/sql/sql_select.cc
@@ -109,8 +109,7 @@ static ha_rows get_quick_record_count(THD *thd, SQL_SELECT *select,
const key_map *keys,ha_rows limit);
static void optimize_straight_join(JOIN *join, table_map join_tables);
static bool greedy_search(JOIN *join, table_map remaining_tables,
- uint depth, uint prune_level,
- uint use_cond_selectivity);
+ uint depth, uint use_cond_selectivity);
enum enum_best_search {
SEARCH_ABORT= -2,
@@ -124,7 +123,6 @@ best_extension_by_limited_search(JOIN *join,
table_map remaining_tables,
uint idx, double record_count,
double read_time, uint depth,
- uint prune_level,
uint use_cond_selectivity,
table_map *processed_eq_ref_tables);
static uint determine_search_depth(JOIN* join);
@@ -8778,7 +8776,6 @@ bool
choose_plan(JOIN *join, table_map join_tables)
{
uint search_depth= join->thd->variables.optimizer_search_depth;
- uint prune_level= join->thd->variables.optimizer_prune_level;
uint use_cond_selectivity=
join->thd->variables.optimizer_use_condition_selectivity;
bool straight_join= MY_TEST(join->select_options & SELECT_STRAIGHT_JOIN);
@@ -8786,6 +8783,9 @@ choose_plan(JOIN *join, table_map join_tables)
DBUG_ENTER("choose_plan");
join->cur_embedding_map= 0;
+ join->extra_heuristic_pruning= false;
+ join->prune_level= join->thd->variables.optimizer_prune_level;
+
reset_nj_counters(join, join->join_list);
qsort2_cmp jtab_sort_func;
@@ -8842,8 +8842,14 @@ choose_plan(JOIN *join, table_map join_tables)
if (search_depth == 0)
/* Automatically determine a reasonable value for 'search_depth' */
search_depth= determine_search_depth(join);
- if (greedy_search(join, join_tables, search_depth, prune_level,
- use_cond_selectivity))
+
+ if (join->prune_level >= 1 &&
+ search_depth >= thd->variables.optimizer_extra_pruning_depth)
+ {
+ join->extra_heuristic_pruning= true;
+ }
+
+ if (greedy_search(join, join_tables, search_depth, use_cond_selectivity))
DBUG_RETURN(TRUE);
}
@@ -9231,8 +9237,6 @@ optimize_straight_join(JOIN *join, table_map remaining_tables)
for the query
@param remaining_tables set of tables not included into the partial plan yet
@param search_depth controlls the exhaustiveness of the search
- @param prune_level the pruning heuristics that should be applied during
- search
@param use_cond_selectivity specifies how the selectivity of the conditions
pushed to a table should be taken into account
@@ -9246,7 +9250,6 @@ static bool
greedy_search(JOIN *join,
table_map remaining_tables,
uint search_depth,
- uint prune_level,
uint use_cond_selectivity)
{
double record_count= 1.0;
@@ -9280,7 +9283,6 @@ greedy_search(JOIN *join,
if ((int) best_extension_by_limited_search(join, remaining_tables, idx,
record_count,
read_time, search_depth,
- prune_level,
use_cond_selectivity,
&eq_ref_tables) <
(int) SEARCH_OK)
@@ -10159,8 +10161,7 @@ get_costs_for_tables(JOIN *join, table_map remaining_tables, uint idx,
When 'best_extension_by_limited_search' is called for the first time,
'join->best_read' must be set to the largest possible value (e.g. DBL_MAX).
The actual implementation provides a way to optionally use pruning
- heuristic (controlled by the parameter 'prune_level') to reduce the search
- space by skipping some partial plans.
+ heuristic to reduce the search space by skipping some partial plans.
@note
The parameter 'search_depth' provides control over the recursion
@@ -10179,8 +10180,6 @@ get_costs_for_tables(JOIN *join, table_map remaining_tables, uint idx,
@param search_depth maximum depth of the recursion and thus size of the
found optimal plan
(0 < search_depth <= join->tables+1).
- @param prune_level pruning heuristics that should be applied during
- optimization
(values: 0 = EXHAUSTIVE, 1 = PRUNE_BY_TIME_OR_ROWS)
@param use_cond_selectivity specifies how the selectivity of the conditions
pushed to a table should be taken into account
@@ -10203,7 +10202,6 @@ best_extension_by_limited_search(JOIN *join,
double record_count,
double read_time,
uint search_depth,
- uint prune_level,
uint use_cond_selectivity,
table_map *processed_eq_ref_tables)
{
@@ -10270,7 +10268,7 @@ best_extension_by_limited_search(JOIN *join,
Json_writer_array arr(thd, "get_costs_for_tables");
- if (idx > join->const_tables && prune_level >= 2 &&
+ if (idx > join->const_tables && join->prune_level >= 2 &&
join->positions[idx-1].type == JT_EQ_REF &&
(join->eq_ref_tables & allowed_current_tables))
{
@@ -10312,6 +10310,12 @@ best_extension_by_limited_search(JOIN *join,
join->sort_positions + join->sort_space);
accepted_tables= 0;
+ double min_rec_count= DBL_MAX;
+ double min_rec_count_read_time= DBL_MAX;
+
+ double min_cost= DBL_MAX;
+ double min_cost_record_count= DBL_MAX;
+
for (SORT_POSITION *pos= sort ; pos < sort_end ; pos++)
{
s= *pos->join_tab;
@@ -10375,8 +10379,31 @@ best_extension_by_limited_search(JOIN *join,
Prune some less promising partial plans. This heuristic may miss
the optimal QEPs, thus it results in a non-exhaustive search.
*/
- if (prune_level >= 1)
+ if (join->prune_level >= 1)
{
+ // Collect the members with min_cost and min_read_time.
+ bool min_rec_hit= false;
+ bool min_cost_hit= false;
+
+ if (join->extra_heuristic_pruning &&
+ (!(position->key_dependent & allowed_tables) ||
+ position->records_read < 2.0))
+ {
+ if (current_record_count < min_rec_count)
+ {
+ min_rec_count= current_record_count;
+ min_rec_count_read_time= current_read_time;
+ min_rec_hit= true;
+ }
+
+ if (current_read_time < min_cost)
+ {
+ min_cost_record_count= current_record_count;
+ min_cost= current_read_time;
+ min_cost_hit= true;
+ }
+ }
+
if (best_record_count > current_record_count ||
best_read_time > current_read_time ||
(idx == join->const_tables && // 's' is the first table in the QEP
@@ -10401,6 +10428,13 @@ best_extension_by_limited_search(JOIN *join,
}
else
{
+ /*
+ Typically, we get here if:
+ best_record_count < current_record_count &&
+ best_read_time < current_read_time
+ That is, both record_count and read_time are worse than the best_
+ ones. This plan doesn't look promising, prune it away.
+ */
DBUG_EXECUTE("opt", print_plan(join, idx+1,
current_record_count,
read_time,
@@ -10411,6 +10445,25 @@ best_extension_by_limited_search(JOIN *join,
restore_prev_sj_state(remaining_tables, s, idx);
continue;
}
+
+ const char* prune_reason= NULL;
+ if (!min_rec_hit &&
+ current_record_count >= min_rec_count &&
+ current_read_time >= min_rec_count_read_time)
+ prune_reason= "min_record_count";
+
+ if (!min_cost_hit &&
+ current_record_count >= min_cost_record_count &&
+ current_read_time >= min_cost)
+ prune_reason= "min_read_time";
+
+ if (prune_reason)
+ {
+ trace_one_table.add("pruned_by_heuristic", prune_reason);
+ restore_prev_nj_state(s);
+ restore_prev_sj_state(remaining_tables, s, idx);
+ continue;
+ }
}
double pushdown_cond_selectivity= 1.0;
@@ -10448,7 +10501,6 @@ best_extension_by_limited_search(JOIN *join,
partial_join_cardinality,
current_read_time,
search_depth - 1,
- prune_level,
use_cond_selectivity,
&found_eq_ref_tables);
swap_variables(JOIN_TAB*, join->best_ref[idx], *pos->join_tab);
diff --git a/sql/sql_select.h b/sql/sql_select.h
index 46a359c746f..e7c8b3527f6 100644
--- a/sql/sql_select.h
+++ b/sql/sql_select.h
@@ -1329,6 +1329,13 @@ public:
*/
table_map cur_sj_inner_tables;
+ /* A copy of thd->variables.optimizer_prune_level */
+ uint prune_level;
+ /*
+ If true, do extra heuristic pruning (enabled based on
+ optimizer_extra_pruning_depth)
+ */
+ bool extra_heuristic_pruning;
#ifndef DBUG_OFF
void dbug_verify_sj_inner_tables(uint n_positions) const;
#endif
diff --git a/sql/sys_vars.cc b/sql/sys_vars.cc
index 43b6f83d568..fc9b4262394 100644
--- a/sql/sys_vars.cc
+++ b/sql/sys_vars.cc
@@ -2781,6 +2781,13 @@ static Sys_var_ulong Sys_optimizer_search_depth(
SESSION_VAR(optimizer_search_depth), CMD_LINE(REQUIRED_ARG),
VALID_RANGE(0, MAX_TABLES+1), DEFAULT(MAX_TABLES+1), BLOCK_SIZE(1));
+static Sys_var_ulong Sys_optimizer_extra_pruning_depth(
+ "optimizer_extra_pruning_depth",
+ "If the optimizer needs to enumerate join prefix of this size or "
+ "larger, then it will try agressively prune away the search space.",
+ SESSION_VAR(optimizer_extra_pruning_depth), CMD_LINE(REQUIRED_ARG),
+ VALID_RANGE(0, MAX_TABLES+1), DEFAULT(8), BLOCK_SIZE(1));
+
/* this is used in the sigsegv handler */
export const char *optimizer_switch_names[]=
{