diff options
author | Monty <monty@mariadb.org> | 2023-03-13 18:38:02 +0200 |
---|---|---|
committer | Monty <monty@mariadb.org> | 2023-03-13 18:38:02 +0200 |
commit | 46b01311f8c14e497cd222f2cb99d7754aadf445 (patch) | |
tree | 92882a2110889c90e256b7f11f47dcebb7119541 | |
parent | 43c8f317019a0cd2c341e369c7b2c5e010aec311 (diff) | |
download | mariadb-git-bb-10.6.9-hash-join-improvements.tar.gz |
fixup! 2cc16653817dd80df1c57f7d8499352fc1eeaa5bbb-10.6.9-hash-join-improvements
-rw-r--r-- | sql/sql_select.cc | 110 |
1 files changed, 77 insertions, 33 deletions
diff --git a/sql/sql_select.cc b/sql/sql_select.cc index d03216cc98d..a4d3c9bc4e2 100644 --- a/sql/sql_select.cc +++ b/sql/sql_select.cc @@ -7569,20 +7569,28 @@ void set_position(JOIN *join,uint idx,JOIN_TAB *table,KEYUSE *key) Estimate how many records we will get if we read just this table and apply a part of WHERE that can be checked for it. + @param s Current JOIN_TAB + @param use_cond_selectivity Value of optimizer_use_condition_selectivity. + If > 1 then use table->cond_selecitivity. + @param force_estiamte Set to 1 if we should not call + use_found_constraint. To be deleted in 11.0 + @return 0.0 No matching rows + @return >= 1.0 Number of expected matching rows + @detail Estimate how many records we will get if we - read the given table with its "independent" access method (either quick select or full table/index scan), - apply the part of WHERE that refers only to this table. - @seealso + @see also table_cond_selectivity() produces selectivity of condition that is checked after joining rows from this table to rows from preceding tables. */ -inline -double matching_candidates_in_table(JOIN_TAB *s, bool with_found_constraint, - uint use_cond_selectivity) +static double apply_selectivity_for_table(JOIN_TAB *s, + uint use_cond_selectivity, + bool *force_estimate) { ha_rows records; double dbl_records; @@ -7593,34 +7601,47 @@ double matching_candidates_in_table(JOIN_TAB *s, bool with_found_constraint, double sel= table->cond_selectivity; double table_records= rows2double(s->records); dbl_records= table_records * sel; + *force_estimate= 1; // Don't call use_found_constraint() return dbl_records; } records = s->found_records; /* - If there is a filtering condition on the table (i.e. ref analyzer found - at least one "table.keyXpartY= exprZ", where exprZ refers only to tables - preceding this table in the join order we're now considering), then - assume that 25% of the rows will be filtered out by this condition. - - This heuristic is supposed to force tables used in exprZ to be before - this table in join order. + If applicable, get a more accurate estimate. */ - if (with_found_constraint) - records-= records/4; - - /* - If applicable, get a more accurate estimate. Don't use the two - heuristics at once. - */ + DBUG_ASSERT(s->table->opt_range_condition_rows <= s->found_records); if (s->table->opt_range_condition_rows != s->found_records) + { + *force_estimate= 1; // Don't call use_found_constraint() records= s->table->opt_range_condition_rows; + } dbl_records= (double)records; return dbl_records; } +/* + Take into account that the table's WHERE clause has conditions on earlier + tables that can reduce the number of accepted rows. + + @param records Number of original rows (after selectivity) + + If there is a filtering condition on the table (i.e. ref analyzer found + at least one "table.keyXpartY= exprZ", where exprZ refers only to tables + preceding this table in the join order we're now considering), then + assume that 25% of the rows will be filtered out by this condition. + + This heuristic is supposed to force tables used in exprZ to be before + this table in join order. +*/ + +inline double use_found_constraint(double records) +{ + records-= records/4; + return records; +} + /* Calculate the cost of reading a set of rows trough an index @@ -7683,7 +7704,8 @@ double adjust_quick_cost(double quick_cost, ha_rows records) */ double hash_join_fanout(JOIN *join, JOIN_TAB *s, table_map remaining_tables, - double rnd_records, KEYUSE *hj_start_key) + double rnd_records, KEYUSE *hj_start_key, + bool *stats_found) { THD *thd= join->thd; /* @@ -7752,6 +7774,7 @@ double hash_join_fanout(JOIN *join, JOIN_TAB *s, table_map remaining_tables, add("avg_frequency", freq); if (freq < min_freq) min_freq= freq; + *stats_found= 1; } } } @@ -8380,17 +8403,13 @@ best_access_path(JOIN *join, Json_writer_object trace_access_hash(thd); trace_access_hash.add("type", "hash"); trace_access_hash.add("index", "hj-key"); - double join_sel= 0.1; + double join_sel; + bool stats_found= 0, force_estimate= 0; /* Estimate the cost of the hash join access to the table */ - double rnd_records= matching_candidates_in_table(s, found_constraint, - use_cond_selectivity); - - tmp= s->quick ? s->quick->read_time : s->scan_time(); - double cmp_time= (s->records - rnd_records)/TIME_FOR_COMPARE; - tmp= COST_ADD(tmp, cmp_time); + double rnd_records= apply_selectivity_for_table(s, use_cond_selectivity, + &force_estimate); DBUG_ASSERT(hj_start_key); - fanout= rnd_records; if (optimizer_flag(thd, OPTIMIZER_SWITCH_HASH_JOIN_CARDINALITY)) { /* @@ -8398,9 +8417,30 @@ best_access_path(JOIN *join, Use "fanout" for an estimate of # matching records. */ fanout= hash_join_fanout(join, s, remaining_tables, rnd_records, - hj_start_key); + hj_start_key, &stats_found); join_sel= 1.0; // Don't do the "10% heuristic" } + if (!stats_found) + { + /* + No OPTIMIZER_SWITCH_HASH_JOIN_CARDINALITY or no field statistics + found. + + Take into account if there is non constant constraints used with + earlier tables in the where expression. + If yes, this will set fanout to rnd_records/4. + We estimate that there will be HASH_FANOUT (10%) + hash matches / row. + */ + if (found_constraint && !force_estimate) + rnd_records= use_found_constraint(rnd_records); + fanout= rnd_records; + join_sel= 0.1; + } + + tmp= s->quick ? s->quick->read_time : s->scan_time(); + double cmp_time= (s->records - rnd_records)/TIME_FOR_COMPARE; + tmp= COST_ADD(tmp, cmp_time); /* We read the table as many times as join buffer becomes full. */ @@ -8489,9 +8529,13 @@ best_access_path(JOIN *join, !(s->table->force_index && best_key && !s->quick) && // (4) !(best_key && s->table->pos_in_table_list->jtbm_subselect)) // (5) { // Check full join - double rnd_records= matching_candidates_in_table(s, found_constraint, - use_cond_selectivity); - + bool force_estimate= 0; + double rnd_records= apply_selectivity_for_table(s, + use_cond_selectivity, + &force_estimate); + rnd_records= ((found_constraint && !force_estimate) ? + use_found_constraint(rnd_records) : + rnd_records); /* Range optimizer never proposes a RANGE if it isn't better than FULL: so if RANGE is present, it's always preferred to FULL. @@ -9700,7 +9744,7 @@ double table_multi_eq_cond_selectivity(JOIN *join, uint idx, JOIN_TAB *s, with previous tables. For quick selects and full table scans, selectivity of COND(this_table) - is accounted for in matching_candidates_in_table(). Here, we only count + is accounted for in apply_selectivity_for_table(). Here, we only count selectivity of COND(this_table, previous_tables). For other access methods, we need to calculate selectivity of the whole @@ -9902,7 +9946,7 @@ double table_cond_selectivity(JOIN *join, uint idx, JOIN_TAB *s, /* The table is accessed with full table scan, or quick select. Selectivity of COND(table) is already accounted for in - matching_candidates_in_table(). + apply_selectivity_for_table(). */ sel= 1; } |