diff options
author | Monty <monty@mariadb.org> | 2022-10-31 18:02:36 +0200 |
---|---|---|
committer | Monty <monty@mariadb.org> | 2022-12-20 23:11:23 +0200 |
commit | b5e6fa18810bf43803ea1e50e6535a45eddbdf1c (patch) | |
tree | 91335c8a0e6cace2a1934f9abbad92c7a12691ec | |
parent | 55ea9670fbcfedbf85fbc78a49c7a15a860f8d72 (diff) | |
download | mariadb-git-b5e6fa18810bf43803ea1e50e6535a45eddbdf1c.tar.gz |
Fixes for 'Filtering'
- table_after_join_selectivity() should use records_init (new bug)
- get_examined_rows() changed to double to get similar results
as in MariaDB 10.11
- Fixed bug where table_after_join_selectivity() did not correct
selectivity in the case where a RANGE is used instead of a REF.
This can happen if the range can use more key_parts than the REF.
WHERE key_part1=10 and key_part2 < 10
Other things:
- Use JT_RANGE instead of JT_ALL for RANGE access in all parts of the code.
Before we used JT_ALL for RANGE.
- Force RANGE be used in best_access_path() if the range used more key
parts than ref. In the original code, this was done much later in
make_join_select)(). However we need to know in
table_after_join_selectivity() if we have used RANGE or not.
- Added more information about filtering to optimizer_trace.
-rw-r--r-- | mysql-test/main/selectivity.result | 12 | ||||
-rw-r--r-- | mysql-test/main/selectivity.test | 2 | ||||
-rw-r--r-- | sql/opt_subselect.cc | 1 | ||||
-rw-r--r-- | sql/sql_select.cc | 140 | ||||
-rw-r--r-- | sql/sql_select.h | 2 | ||||
-rw-r--r-- | sql/sql_update.cc | 1 |
6 files changed, 106 insertions, 52 deletions
diff --git a/mysql-test/main/selectivity.result b/mysql-test/main/selectivity.result index 2505dbcbcc0..9ab15a08839 100644 --- a/mysql-test/main/selectivity.result +++ b/mysql-test/main/selectivity.result @@ -1808,6 +1808,18 @@ b a a b 7 7 8 8 8 8 9 9 9 9 10 10 +analyze table t1; +Table Op Msg_type Msg_text +test.t1 analyze status Engine-independent statistics collected +test.t1 analyze status OK +explain extended select t1.b,t2.a,t3.a,t3.b from t1,t2,t3 +where t1.c = t2.a AND t1.d = t3.a and t1.a = 50 and t1.b <= 100; +id select_type table type possible_keys key key_len ref rows filtered Extra +1 SIMPLE t1 range a a 10 NULL 9 100.00 Using index condition; Using where +1 SIMPLE t2 eq_ref PRIMARY PRIMARY 4 test.t1.c 1 100.00 Using index +1 SIMPLE t3 eq_ref PRIMARY PRIMARY 4 test.t1.d 1 100.00 +Warnings: +Note 1003 select `test`.`t1`.`b` AS `b`,`test`.`t2`.`a` AS `a`,`test`.`t3`.`a` AS `a`,`test`.`t3`.`b` AS `b` from `test`.`t1` join `test`.`t2` join `test`.`t3` where `test`.`t2`.`a` = `test`.`t1`.`c` and `test`.`t3`.`a` = `test`.`t1`.`d` and `test`.`t1`.`a` = 50 and `test`.`t1`.`b` <= 100 set optimizer_use_condition_selectivity= @@optimizer_use_condition_selectivity; drop table t1,t2,t3; # diff --git a/mysql-test/main/selectivity.test b/mysql-test/main/selectivity.test index 2662e2250b0..f2992ac1073 100644 --- a/mysql-test/main/selectivity.test +++ b/mysql-test/main/selectivity.test @@ -1234,6 +1234,8 @@ eval $query; set optimizer_use_condition_selectivity=2; eval explain extended $query; eval $query; +analyze table t1; +eval explain extended $query; set optimizer_use_condition_selectivity= @@optimizer_use_condition_selectivity; drop table t1,t2,t3; diff --git a/sql/opt_subselect.cc b/sql/opt_subselect.cc index 7c9343d77b5..54df6cb227b 100644 --- a/sql/opt_subselect.cc +++ b/sql/opt_subselect.cc @@ -4131,6 +4131,7 @@ void fix_semijoin_strategies_for_picked_join_order(JOIN *join) */ if (join->best_positions[idx].key) { + DBUG_ASSERT(join->best_positions[idx].type != JT_RANGE); delete join->best_positions[idx].table->quick; join->best_positions[idx].table->quick= NULL; } diff --git a/sql/sql_select.cc b/sql/sql_select.cc index 4ef9e8192c4..4c06b79201b 100644 --- a/sql/sql_select.cc +++ b/sql/sql_select.cc @@ -104,6 +104,11 @@ #define double_to_rows(A) ((A) >= ((double)HA_ROWS_MAX) ? HA_ROWS_MAX : (ha_rows) (A)) +inline double safe_filtered(double a, double b) +{ + return b != 0 ? a/b*100.0 : 0.0; +} + const char *join_type_str[]={ "UNKNOWN","system","const","eq_ref","ref", "MAYBE_REF","ALL","range","index","fulltext", "ref_or_null","unique_subquery","index_subquery", @@ -3221,6 +3226,7 @@ int JOIN::optimize_stage2() */ if ((order || group_list) && tab->type != JT_ALL && + tab->type != JT_RANGE && tab->type != JT_NEXT && tab->type != JT_FT && tab->type != JT_REF_OR_NULL && @@ -8052,6 +8058,7 @@ struct best_plan enum join_type type; uint forced_index; uint max_key_part; + table_map found_ref; bool uses_jbuf; }; @@ -8099,6 +8106,7 @@ best_access_path(JOIN *join, best.max_key_part= 0; best.type= JT_UNKNOWN; best.forced_index= MAX_KEY; + best.found_ref= 0; best.ref_depends_map= 0; best.uses_jbuf= FALSE; best.spl_plan= 0; @@ -8692,6 +8700,7 @@ best_access_path(JOIN *join, records_after_filter : records_best_filter); best.key= start_key; + best.found_ref= found_ref; best.max_key_part= max_key_part; best.ref_depends_map= found_ref; best.filter= filter; @@ -8868,6 +8877,7 @@ best_access_path(JOIN *join, const char *scan_type= ""; enum join_type type; uint forced_index= MAX_KEY; + bool force_plan= 0; /* Range optimizer never proposes a RANGE if it isn't better @@ -8941,6 +8951,17 @@ best_access_path(JOIN *join, table->opt_range[key_no].selectivity= filter->selectivity; } } + if (best.key && key_no == best.key->key && + !best.found_ref && + best.max_key_part < table->opt_range[best.key->key].key_parts && + table->opt_range[best.key->key].ranges == 1) + { + /* + Force to use range as it is using the 'best key' and using more + key parts (and thus will read less rows) + */ + force_plan= 1; + } type= JT_RANGE; } else @@ -9094,7 +9115,7 @@ best_access_path(JOIN *join, } } - if (cur_cost + COST_EPS < best.cost) + if (cur_cost + COST_EPS < best.cost || force_plan) { /* If the table has a range (s->quick is set) make_join_select() @@ -9689,7 +9710,17 @@ optimize_straight_join(JOIN *join, table_map remaining_tables) double ratio= current_record_count / original_record_count; /* QQQ This is just to stop an assert later */ if (ratio < 1) + { position->records_out*= ratio; + } + if (unlikely(trace_one_table.trace_started())) + { + trace_one_table. + add("sj_rows_out", position->records_out). + add("sj_rows_for_plan", current_record_count). + add("sj_filtered", safe_filtered(position->records_out, + position->records_init)); + } } remaining_tables&= ~(s->table->map); @@ -9703,8 +9734,10 @@ optimize_straight_join(JOIN *join, table_map remaining_tables) pushdown_cond_selectivity != 1.0) { trace_one_table. + add("rows_out", records_out). add("pushdown_cond_selectivity", pushdown_cond_selectivity). - add("rows_out", records_out); + add("filtered", safe_filtered(position->records_out, + position->records_init)); } position->cond_selectivity= pushdown_cond_selectivity; position->records_out= records_out; @@ -10340,8 +10373,9 @@ double table_after_join_selectivity(JOIN *join, uint idx, JOIN_TAB *s, { key_part_map quick_key_map= (key_part_map(1) << table->opt_range[key].key_parts) - 1; - if (table->opt_range[key].rows && - !(quick_key_map & ~table->const_key_parts[key])) + if ((table->opt_range[key].rows && + !(quick_key_map & ~table->const_key_parts[key])) || + s->type == JT_RANGE) { /* Ok, there is an equality for each of the key parts used by the @@ -10502,7 +10536,7 @@ double table_after_join_selectivity(JOIN *join, uint idx, JOIN_TAB *s, all range or ref access, which should not be smaller than what we calculated above. */ - records_out= pos->records_after_filter * sel; + records_out= pos->records_init * sel; set_if_smaller(records_out, pos->records_out); } @@ -10962,8 +10996,11 @@ best_extension_by_limited_search(JOIN *join, position->records_out*= ratio; if (unlikely(trace_one_table.trace_started())) { - trace_one_table.add("sj_rows_out", position->records_out); - trace_one_table.add("sj_rows_for_plan", current_record_count); + trace_one_table. + add("sj_rows_out", position->records_out). + add("sj_rows_for_plan", current_record_count). + add("sj_filtered", safe_filtered(position->records_out, + position->records_init)); } } /* Expand only partial plans with lower cost than the best QEP so far */ @@ -11089,6 +11126,13 @@ best_extension_by_limited_search(JOIN *join, remaining_tables & ~real_table_bit, &position->records_out); + if (unlikely(trace_one_table.trace_started()) && + pushdown_cond_selectivity != 1.0) + trace_one_table. + add("pushdown_cond_selectivity", pushdown_cond_selectivity). + add("filtered", safe_filtered(position->records_out, + position->records_init)). + add("rows_out", position->records_out); } join->positions[idx].cond_selectivity= pushdown_cond_selectivity; @@ -12990,15 +13034,13 @@ make_join_select(JOIN *join,SQL_SELECT *select,COND *cond) SELECT * FROM t1 LEFT OUTER JOIN (t2 JOIN t3) ON X */ JOIN_TAB *first_inner_tab= tab->first_inner; + COND *tmp; if (!tab->bush_children) current_map= tab->table->map; else current_map= tab->bush_children->start->emb_sj_nest->sj_inner_tables; - bool use_quick_range=0; - COND *tmp; - /* Tables that are within SJ-Materialization nests cannot have their conditions referring to preceding non-const tables. @@ -13016,7 +13058,7 @@ make_join_select(JOIN *join,SQL_SELECT *select,COND *cond) used_tables|=current_map; - if (tab->type == JT_REF && tab->quick && + if ((tab->type == JT_REF || tab->type == JT_RANGE) && tab->quick && (((uint) tab->ref.key == tab->quick->index && tab->ref.key_length < tab->quick->max_used_key_length) || (!is_hash_join_key_no(tab->ref.key) && @@ -13030,8 +13072,7 @@ make_join_select(JOIN *join,SQL_SELECT *select,COND *cond) add("ref_to_range", true). add("cause", "range uses longer key"); } - tab->type=JT_ALL; - use_quick_range=1; + tab->type= JT_RANGE; tab->use_quick=1; tab->ref.key= -1; tab->ref.key_parts=0; // Don't use ref key. @@ -13096,7 +13137,7 @@ make_join_select(JOIN *join,SQL_SELECT *select,COND *cond) if (cond && !tmp && tab->quick) { // Outer join - if (tab->type != JT_ALL && !is_hj) + if ((tab->type != JT_ALL && tab->type != JT_RANGE) && !is_hj) { /* Don't use the quick method @@ -13220,8 +13261,7 @@ make_join_select(JOIN *join,SQL_SELECT *select,COND *cond) tab->table->reginfo.impossible_range) DBUG_RETURN(1); } - else if ((tab->type == JT_ALL || tab->type == JT_NEXT) && - ! use_quick_range) + else if ((tab->type == JT_ALL || tab->type == JT_NEXT)) { if (!tab->const_keys.is_clear_all() && tab->table->reginfo.impossible_range) @@ -14330,6 +14370,7 @@ uint check_join_cache_usage(JOIN_TAB *tab, switch (tab->type) { case JT_NEXT: case JT_ALL: + case JT_RANGE: if (cache_level == 1) prev_cache= 0; if ((tab->cache= new (root) JOIN_CACHE_BNL(join, tab, prev_cache))) @@ -14412,7 +14453,7 @@ uint check_join_cache_usage(JOIN_TAB *tab, } no_join_cache: - if (tab->type != JT_ALL && tab->is_ref_for_hash_join()) + if (tab->type != JT_ALL && tab->type != JT_RANGE && tab->is_ref_for_hash_join()) { tab->type= JT_ALL; tab->ref.key_parts= 0; @@ -14494,6 +14535,7 @@ restart: case JT_REF_OR_NULL: case JT_NEXT: case JT_ALL: + case JT_RANGE: tab->used_join_cache_level= check_join_cache_usage(tab, options, no_jbuf_after, idx, @@ -14768,7 +14810,10 @@ make_join_readinfo(JOIN *join, ulonglong options, uint no_jbuf_after) } break; case JT_ALL: + case JT_RANGE: case JT_HASH: + { + bool have_quick_select= tab->select && tab->select->quick; /* If previous table use cache If the incoming data set is already sorted don't use cache. @@ -14806,7 +14851,7 @@ make_join_readinfo(JOIN *join, ulonglong options, uint no_jbuf_after) } else { - if (tab->select && tab->select->quick) + if (have_quick_select) { if (statistics) join->thd->inc_status_select_full_range_join(); @@ -14823,11 +14868,10 @@ make_join_readinfo(JOIN *join, ulonglong options, uint no_jbuf_after) } if (!table->no_keyread) { - if (!(tab->select && tab->select->quick && - tab->select->quick->index != MAX_KEY && //not index_merge - table->covering_keys.is_set(tab->select->quick->index)) && - (!table->covering_keys.is_clear_all() && - !(tab->select && tab->select->quick))) + if (!(have_quick_select && + tab->select->quick->index != MAX_KEY && //not index_merge + table->covering_keys.is_set(tab->select->quick->index)) && + (!table->covering_keys.is_clear_all() && ! have_quick_select)) { // Only read index tree if (tab->loosescan_match_tab) tab->index= tab->loosescan_key; @@ -14838,12 +14882,13 @@ make_join_readinfo(JOIN *join, ulonglong options, uint no_jbuf_after) tab->type= tab->type == JT_ALL ? JT_NEXT : JT_HASH_NEXT; } } - if (tab->select && tab->select->quick && + if (have_quick_select && tab->select->quick->index != MAX_KEY && !tab->table->covering_keys.is_set(tab->select->quick->index)) push_index_cond(tab, tab->select->quick->index); } break; + } case JT_FT: break; /* purecov: begin deadcode */ @@ -14945,8 +14990,7 @@ bool error_if_full_join(JOIN *join) for (JOIN_TAB *tab=first_top_level_tab(join, WITH_CONST_TABLES); tab; tab= next_top_level_tab(join, tab)) { - if ((tab->type == JT_ALL || tab->type == JT_NEXT) && - (!tab->select || !tab->select->quick)) + if ((tab->type == JT_ALL || tab->type == JT_NEXT)) { my_message(ER_UPDATE_WITHOUT_KEY_IN_SAFE_MODE, ER_THD(join->thd, @@ -15160,15 +15204,18 @@ void JOIN_TAB::estimate_scan_time() @todo: why not use JOIN_TAB::found_records or JOIN_TAB::records_read */ -ha_rows JOIN_TAB::get_examined_rows() +double JOIN_TAB::get_examined_rows() { double examined_rows; SQL_SELECT *sel= filesort? filesort->select : this->select; if (sel && sel->quick && use_quick != 2) - examined_rows= (double)sel->quick->records; - else if (type == JT_NEXT || type == JT_ALL || - type == JT_HASH || type ==JT_HASH_NEXT) + { + examined_rows= (double) sel->quick->records; + DBUG_ASSERT(examined_rows == sel->quick->records); + } + else if (type == JT_NEXT || type == JT_ALL || type == JT_RANGE || + type == JT_HASH || type == JT_HASH_NEXT) { if (limit) { @@ -15196,8 +15243,8 @@ ha_rows JOIN_TAB::get_examined_rows() examined_rows= records_init; if (examined_rows >= (double) HA_ROWS_MAX) - return HA_ROWS_MAX; - return (ha_rows) examined_rows; + return (double) HA_ROWS_MAX; + return examined_rows; } @@ -25445,7 +25492,7 @@ test_if_skip_sort_order(JOIN_TAB *tab,ORDER *order,ha_rows select_limit, goto use_filesort; } DBUG_ASSERT(tab->select->quick); - tab->type= JT_ALL; + tab->type= JT_RANGE; tab->ref.key= -1; tab->ref.key_parts= 0; tab->use_quick= 1; @@ -25508,7 +25555,7 @@ test_if_skip_sort_order(JOIN_TAB *tab,ORDER *order,ha_rows select_limit, */ if (best_key < 0 || ((select_limit >= table_records) && - (tab->type == JT_ALL && + ((tab->type == JT_ALL || tab->type == JT_RANGE) && tab->join->table_count > tab->join->const_tables + 1) && !(table->file->index_flags(best_key, 0, 1) & HA_CLUSTERED_INDEX))) goto use_filesort; @@ -25646,7 +25693,7 @@ check_reverse_order: method is actually used. */ DBUG_ASSERT(tab->select->quick); - tab->type=JT_ALL; + tab->type= JT_RANGE; tab->use_quick=1; tab->ref.key= -1; tab->ref.key_parts=0; // Don't use ref key. @@ -28577,7 +28624,7 @@ bool JOIN_TAB::save_explain_data(Explain_table_access *eta, /* "type" column */ enum join_type tab_type= type; - if ((type == JT_ALL || type == JT_HASH) && + if ((type == JT_ALL || type == JT_RANGE || type == JT_HASH) && tab_select && tab_select->quick && use_quick != 2) { cur_quick= tab_select->quick; @@ -28586,9 +28633,9 @@ bool JOIN_TAB::save_explain_data(Explain_table_access *eta, (quick_type == QUICK_SELECT_I::QS_TYPE_INDEX_INTERSECT) || (quick_type == QUICK_SELECT_I::QS_TYPE_ROR_INTERSECT) || (quick_type == QUICK_SELECT_I::QS_TYPE_ROR_UNION)) - tab_type= type == JT_ALL ? JT_INDEX_MERGE : JT_HASH_INDEX_MERGE; + tab_type= type == JT_HASH ? JT_HASH_INDEX_MERGE : JT_INDEX_MERGE; else - tab_type= type == JT_ALL ? JT_RANGE : JT_HASH_RANGE; + tab_type= type == JT_HASH ? JT_HASH_RANGE : JT_RANGE; } eta->type= tab_type; @@ -28726,7 +28773,7 @@ bool JOIN_TAB::save_explain_data(Explain_table_access *eta, } else { - ha_rows examined_rows= get_examined_rows(); + double examined_rows= get_examined_rows(); eta->rows_set= true; eta->rows= double_to_rows(examined_rows); @@ -28735,19 +28782,9 @@ bool JOIN_TAB::save_explain_data(Explain_table_access *eta, float f= 0.0; if (examined_rows) { -#ifdef OLD_CODE // QQQ - double pushdown_cond_selectivity= cond_selectivity; - if (pushdown_cond_selectivity != 1.0) - f= (float) (100.0 * pushdown_cond_selectivity); - else if (range_rowid_filter_info) - f= (float) (100.0 * range_rowid_filter_info->selectivity); - else - f= (float) (100.0 * records_read / examined_rows); -#else - f= (float) (100.0 * records_out / examined_rows); -#endif + f= (float) (100.0 * records_out / examined_rows); + set_if_smaller(f, 100.0); } - set_if_smaller(f, 100.0); eta->filtered_set= true; eta->filtered= f; } @@ -31411,6 +31448,7 @@ void JOIN::init_join_cache_and_keyread() break; case JT_HASH: case JT_ALL: + case JT_RANGE: SQL_SELECT *select; select= tab->select ? tab->select : (tab->filesort ? tab->filesort->select : NULL); diff --git a/sql/sql_select.h b/sql/sql_select.h index 73673d905b8..1e6f61f3239 100644 --- a/sql/sql_select.h +++ b/sql/sql_select.h @@ -675,7 +675,7 @@ typedef struct st_join_table { return (is_hash_join_key_no(key) ? hj_key : table->key_info+key); } void estimate_scan_time(); - ha_rows get_examined_rows(); + double get_examined_rows(); bool preread_init(); bool pfs_batch_update(JOIN *join); diff --git a/sql/sql_update.cc b/sql/sql_update.cc index 45a7da787ad..5327e1f1095 100644 --- a/sql/sql_update.cc +++ b/sql/sql_update.cc @@ -2271,6 +2271,7 @@ static bool safe_update_on_fly(THD *thd, JOIN_TAB *join_tab, case JT_REF: case JT_REF_OR_NULL: return !is_key_used(table, join_tab->ref.key, table->write_set); + case JT_RANGE: case JT_ALL: if (bitmap_is_overlapping(&table->tmp_set, table->write_set)) return FALSE; |