diff options
author | Igor Babaev <igor@askmonty.org> | 2019-02-03 14:56:12 -0800 |
---|---|---|
committer | Igor Babaev <igor@askmonty.org> | 2019-02-03 14:56:12 -0800 |
commit | 658128af43b4d7c6db445164f8ed25ed4d1e3109 (patch) | |
tree | 7a71580cca55759b8bb2730e117436478948d77f /sql/sql_select.cc | |
parent | 5f46670bd09babbee75a24ac82eb4ade0706da66 (diff) | |
download | mariadb-git-658128af43b4d7c6db445164f8ed25ed4d1e3109.tar.gz |
MDEV-16188 Use in-memory PK filters built from range index scans
This patch contains a full implementation of the optimization
that allows to use in-memory rowid / primary filters built for range
conditions over indexes. In many cases usage of such filters reduce
the number of disk seeks spent for fetching table rows.
In this implementation the choice of what possible filter to be applied
(if any) is made purely on cost-based considerations.
This implementation re-achitectured the partial implementation of
the feature pushed by Galina Shalygina in the commit
8d5a11122c32f4d9eb87536886c6e893377bdd07.
Besides this patch contains a better implementation of the generic
handler function handler::multi_range_read_info_const() that
takes into account gaps between ranges when calculating the cost of
range index scans. It also contains some corrections of the
implementation of the handler function records_in_range() for MyISAM.
This patch supports the feature for InnoDB and MyISAM.
Diffstat (limited to 'sql/sql_select.cc')
-rw-r--r-- | sql/sql_select.cc | 280 |
1 files changed, 222 insertions, 58 deletions
diff --git a/sql/sql_select.cc b/sql/sql_select.cc index 8f55497d8d0..b5b77c2c43b 100644 --- a/sql/sql_select.cc +++ b/sql/sql_select.cc @@ -101,6 +101,9 @@ static int sort_keyuse(KEYUSE *a,KEYUSE *b); static bool are_tables_local(JOIN_TAB *jtab, table_map used_tables); static bool create_ref_for_key(JOIN *join, JOIN_TAB *j, KEYUSE *org_keyuse, bool allow_full_scan, table_map used_tables); +static ha_rows get_quick_record_count(THD *thd, SQL_SELECT *select, + TABLE *table, + const key_map *keys,ha_rows limit); void best_access_path(JOIN *join, JOIN_TAB *s, table_map remaining_tables, uint idx, bool disable_jbuf, double record_count, @@ -1462,6 +1465,116 @@ int JOIN::optimize() } +/** + @brief + Create range filters objects needed in execution for all join tables + + @details + For each join table from the chosen execution plan such that a range filter + is used when joining this table the function creates a Rowid_filter object + for this range filter. In order to do this the function first constructs + a quick select to scan the range for this range filter. Then it creates + a container for the range filter and finally constructs a Range_rowid_filter + object a pointer to which is set in the field JOIN_TAB::rowid_filter of + the joined table. + + @retval false always +*/ + +bool JOIN::make_range_rowid_filters() +{ + DBUG_ENTER("make_range_rowid_filters"); + + JOIN_TAB *tab; + + for (tab= first_linear_tab(this, WITH_BUSH_ROOTS, WITHOUT_CONST_TABLES); + tab; + tab= next_linear_tab(this, tab, WITH_BUSH_ROOTS)) + { + if (!tab->range_rowid_filter_info) + continue; + int err; + SQL_SELECT *sel= NULL; + Rowid_filter_container *filter_container= NULL; + Item **sargable_cond= get_sargable_cond(this, tab->table); + sel= make_select(tab->table, const_table_map, const_table_map, + *sargable_cond, (SORT_INFO*) 0, 1, &err); + if (!sel) + continue; + + key_map filter_map; + filter_map.clear_all(); + filter_map.set_bit(tab->range_rowid_filter_info->key_no); + filter_map.merge(tab->table->with_impossible_ranges); + bool force_index_save= tab->table->force_index; + tab->table->force_index= true; + (void) sel->test_quick_select(thd, filter_map, (table_map) 0, + (ha_rows) HA_POS_ERROR, + true, false, true); + tab->table->force_index= force_index_save; + if (thd->is_error()) + goto no_filter; + DBUG_ASSERT(sel->quick); + filter_container= + tab->range_rowid_filter_info->create_container(); + if (filter_container) + { + tab->rowid_filter= + new (thd->mem_root) Range_rowid_filter(tab->table, + tab->range_rowid_filter_info, + filter_container, sel); + if (tab->rowid_filter) + continue; + } + no_filter: + if (sel->quick) + delete sel->quick; + delete sel; + } + + DBUG_RETURN(0); +} + + +/** + @brief + Allocate memory the rowid containers of the used the range filters + + @details + For each join table from the chosen execution plan such that a range filter + is used when joining this table the function allocate memory for the + rowid container employed by the filter. On success it lets the table engine + know that what rowid filter will be used when accessing the table rows. + + @retval false always +*/ + +bool +JOIN::init_range_rowid_filters() +{ + DBUG_ENTER("init_range_rowid_filters"); + + JOIN_TAB *tab; + + for (tab= first_linear_tab(this, WITH_BUSH_ROOTS, WITHOUT_CONST_TABLES); + tab; + tab= next_linear_tab(this, tab, WITH_BUSH_ROOTS)) + { + if (!tab->rowid_filter) + continue; + if (tab->rowid_filter->get_container()->alloc()) + { + delete tab->rowid_filter; + tab->rowid_filter= 0; + continue; + } + tab->table->file->rowid_filter_push(tab->rowid_filter); + tab->is_rowid_filter_built= false; + } + DBUG_RETURN(0); +} + + int JOIN::init_join_caches() { JOIN_TAB *tab; @@ -1980,6 +2093,9 @@ int JOIN::optimize_stage2() if (get_best_combination()) DBUG_RETURN(1); + if (make_range_rowid_filters()) + DBUG_RETURN(1); + if (select_lex->handle_derived(thd->lex, DT_OPTIMIZE)) DBUG_RETURN(1); @@ -2644,6 +2760,9 @@ int JOIN::optimize_stage2() if (init_join_caches()) DBUG_RETURN(1); + if (init_range_rowid_filters()) + DBUG_RETURN(1); + error= 0; if (select_options & SELECT_DESCRIBE) @@ -5035,9 +5154,8 @@ make_join_statistics(JOIN *join, List<TABLE_LIST> &tables_list, select->quick=0; impossible_range= records == 0 && s->table->reginfo.impossible_range; if (join->thd->lex->sql_command == SQLCOM_SELECT && - join->table_count > 1 && optimizer_flag(join->thd, OPTIMIZER_SWITCH_USE_ROWID_FILTER)) - s->table->select_usable_range_filters(join->thd); + s->table->init_cost_info_for_usable_range_rowid_filters(join->thd); } if (!impossible_range) { @@ -6843,14 +6961,14 @@ best_access_path(JOIN *join, double best_time= DBL_MAX; double records= DBL_MAX; table_map best_ref_depends_map= 0; - Range_filter_cost_info *best_filter= 0; + Range_rowid_filter_cost_info *best_filter= 0; double tmp; ha_rows rec; bool best_uses_jbuf= FALSE; MY_BITMAP *eq_join_set= &s->table->eq_join_set; KEYUSE *hj_start_key= 0; SplM_plan_info *spl_plan= 0; - Range_filter_cost_info *filter= 0; + Range_rowid_filter_cost_info *filter= 0; disable_jbuf= disable_jbuf || idx == join->const_tables; @@ -7243,11 +7361,18 @@ best_access_path(JOIN *join, loose_scan_opt.check_ref_access_part2(key, start_key, records, tmp); } /* not ft_key */ - filter= table->best_filter_for_current_join_order(start_key->key, - records, - record_count); - if (filter && (filter->get_filter_gain(record_count*records) < tmp)) - tmp= tmp - filter->get_filter_gain(record_count*records); + if (records < DBL_MAX) + { + double rows= record_count * records; + filter= + table->best_range_rowid_filter_for_partial_join(start_key->key, rows); + if (filter) + { + tmp-= filter->get_adjusted_gain(rows, s->worst_seeks) - + filter->get_cmp_gain(rows); + DBUG_ASSERT(tmp >= 0); + } + } if (tmp + 0.0001 < best_time - records/(double) TIME_FOR_COMPARE) { @@ -7352,6 +7477,7 @@ best_access_path(JOIN *join, Here we estimate its cost. */ + filter= 0; if (s->quick) { /* @@ -7367,6 +7493,19 @@ best_access_path(JOIN *join, (s->quick->read_time + (s->found_records - rnd_records)/(double) TIME_FOR_COMPARE); + if ( s->quick->get_type() == QUICK_SELECT_I::QS_TYPE_RANGE) + { + double rows= record_count * s->found_records; + uint key_no= s->quick->index; + filter= s->table->best_range_rowid_filter_for_partial_join(key_no, + rows); + if (filter) + { + tmp-= filter->get_gain(rows); + DBUG_ASSERT(tmp >= 0); + } + } + loose_scan_opt.check_range_access(join, idx, s->quick); } else @@ -7412,21 +7551,23 @@ best_access_path(JOIN *join, else tmp+= s->startup_cost; - filter= s->table->best_filter_for_current_join_order(MAX_KEY, - rnd_records, - record_count); - if (filter && (filter->get_filter_gain(record_count*rnd_records) < tmp)) - tmp= tmp - filter->get_filter_gain(record_count*rnd_records); - /* We estimate the cost of evaluating WHERE clause for found records as record_count * rnd_records / TIME_FOR_COMPARE. This cost plus tmp give us total cost of using TABLE SCAN */ + + double filter_cmp_gain= 0; + if (filter) + { + filter_cmp_gain= filter->get_cmp_gain(record_count * s->found_records); + } + if (best == DBL_MAX || (tmp + record_count/(double) TIME_FOR_COMPARE*rnd_records < (best_key->is_for_hash_join() ? best_time : - best + record_count/(double) TIME_FOR_COMPARE*records))) + best + record_count/(double) TIME_FOR_COMPARE*records - + filter_cmp_gain))) { /* If the table has a range (s->quick is set) make_join_select() @@ -7435,7 +7576,9 @@ best_access_path(JOIN *join, best= tmp; records= best_records; best_key= 0; - best_filter= filter; + best_filter= 0; + if (s->quick && s->quick->get_type() == QUICK_SELECT_I::QS_TYPE_RANGE) + best_filter= filter; /* range/index_merge/ALL/index access method are "independent", so: */ best_ref_depends_map= 0; best_uses_jbuf= MY_TEST(!disable_jbuf && !((s->table->map & @@ -7453,7 +7596,7 @@ best_access_path(JOIN *join, pos->loosescan_picker.loosescan_key= MAX_KEY; pos->use_join_buffer= best_uses_jbuf; pos->spl_plan= spl_plan; - pos->filter= best_filter; + pos->range_rowid_filter_info= best_filter; loose_scan_opt.save_to_position(s, loose_scan_pos); @@ -9719,7 +9862,7 @@ bool JOIN::get_best_combination() is_hash_join_key_no(j->ref.key)) hash_join= TRUE; - j->filter= best_positions[tablenr].filter; + j->range_rowid_filter_info= best_positions[tablenr].range_rowid_filter_info; loop_end: /* @@ -10782,6 +10925,11 @@ make_join_select(JOIN *join,SQL_SELECT *select,COND *cond) sel->quick=tab->quick; // Use value from get_quick_... sel->quick_keys.clear_all(); sel->needed_reg.clear_all(); + if (is_hj && tab->rowid_filter) + { + delete tab->rowid_filter; + tab->rowid_filter= 0; + } } else { @@ -12455,6 +12603,23 @@ bool error_if_full_join(JOIN *join) } +void JOIN_TAB::build_range_rowid_filter_if_needed() +{ + if (rowid_filter && !is_rowid_filter_built) + { + if (!rowid_filter->build()) + { + is_rowid_filter_built= true; + } + else + { + delete rowid_filter; + rowid_filter= 0; + } + } +} + + /** cleanup JOIN_TAB. @@ -12478,6 +12643,11 @@ void JOIN_TAB::cleanup() select= 0; delete quick; quick= 0; + if (rowid_filter) + { + delete rowid_filter; + rowid_filter= 0; + } if (cache) { cache->free(); @@ -12589,9 +12759,7 @@ ha_rows JOIN_TAB::get_examined_rows() double examined_rows; SQL_SELECT *sel= filesort? filesort->select : this->select; - if (filter) - examined_rows= records_read; - else if (sel && sel->quick && use_quick != 2) + if (sel && sel->quick && use_quick != 2) examined_rows= (double)sel->quick->records; else if (type == JT_NEXT || type == JT_ALL || type == JT_HASH || type ==JT_HASH_NEXT) @@ -19361,6 +19529,8 @@ sub_select(JOIN *join,JOIN_TAB *join_tab,bool end_of_records) if (!join_tab->preread_init_done && join_tab->preread_init()) DBUG_RETURN(NESTED_LOOP_ERROR); + join_tab->build_range_rowid_filter_if_needed(); + join->return_tab= join_tab; if (join_tab->last_inner) @@ -20305,6 +20475,8 @@ int join_init_read_record(JOIN_TAB *tab) if (tab->filesort && tab->sort_table()) // Sort table. return 1; + tab->build_range_rowid_filter_if_needed(); + DBUG_EXECUTE_IF("kill_join_init_read_record", tab->join->thd->set_killed(KILL_QUERY);); if (tab->select && tab->select->quick && tab->select->quick->reset()) @@ -20319,6 +20491,8 @@ int join_init_read_record(JOIN_TAB *tab) tab->join->thd->reset_killed();); if (!tab->preread_init_done && tab->preread_init()) return 1; + + if (init_read_record(&tab->read_record, tab->join->thd, tab->table, tab->select, tab->filesort_result, 1,1, FALSE)) return 1; @@ -22352,6 +22526,12 @@ check_reverse_order: tab->use_quick=1; tab->ref.key= -1; tab->ref.key_parts=0; // Don't use ref key. + tab->range_rowid_filter_info= 0; + if (tab->rowid_filter) + { + delete tab->rowid_filter; + tab->rowid_filter= 0; + } tab->read_first_record= join_init_read_record; if (tab->is_using_loose_index_scan()) tab->join->tmp_table_param.precomputed_group_by= TRUE; @@ -24977,11 +25157,13 @@ int print_explain_message_line(select_result_sink *result, item_list.push_back(item_null, mem_root); /* `rows` */ + StringBuffer<64> rows_str; if (rows) { - item_list.push_back(new (mem_root) Item_int(thd, *rows, - MY_INT64_NUM_DECIMAL_DIGITS), - mem_root); + rows_str.append_ulonglong((ulonglong)(*rows)); + item_list.push_back(new (mem_root) + Item_string_sys(thd, rows_str.ptr(), + rows_str.length()), mem_root); } else item_list.push_back(item_null, mem_root); @@ -25046,31 +25228,6 @@ int append_possible_keys(MEM_ROOT *alloc, String_list &list, TABLE *table, } -/** - This method saves the data that should be printed in EXPLAIN - if any filter was used for this table. -*/ - -bool JOIN_TAB::save_filter_explain_data(Explain_table_access *eta) -{ - if (!filter) - return 0; - KEY *pk_key= get_keyinfo_by_key_no(filter->key_no); - StringBuffer<64> buff_for_pk; - const char *tmp_buff; - buff_for_pk.append("filter:"); - tmp_buff= pk_key->name.str; - buff_for_pk.append(tmp_buff, strlen(tmp_buff), system_charset_info); - if (!(eta->ref_list.append_str(join->thd->mem_root, - buff_for_pk.c_ptr_safe()))) - return 1; - eta->key.set_filter_key_length(pk_key->key_length); - (filter->selectivity*100 >= 1) ? eta->filter_perc= round(filter->selectivity*100) : - eta->filter_perc= 1; - return 0; -} - - bool JOIN_TAB::save_explain_data(Explain_table_access *eta, table_map prefix_tables, bool distinct_arg, JOIN_TAB *first_top_tab) @@ -25106,7 +25263,7 @@ bool JOIN_TAB::save_explain_data(Explain_table_access *eta, filesort))) return 1; } - + // psergey-todo: data for filtering! tracker= &eta->tracker; jbuf_tracker= &eta->jbuf_tracker; @@ -25207,6 +25364,20 @@ bool JOIN_TAB::save_explain_data(Explain_table_access *eta, // psergey-todo: ^ check for error return code /* Build "key", "key_len", and "ref" */ + + if (rowid_filter) + { + Range_rowid_filter *range_filter= (Range_rowid_filter *) rowid_filter; + QUICK_SELECT_I *quick= range_filter->get_select()->quick; + + Explain_rowid_filter *erf= new (thd->mem_root) Explain_rowid_filter; + erf->quick= quick->get_explain(thd->mem_root); + erf->selectivity= range_rowid_filter_info->selectivity; + erf->rows= quick->records; + eta->rowid_filter= erf; + //psergey-todo: also do setup for ANALYZE here. + } + if (tab_type == JT_NEXT) { key_info= table->key_info+index; @@ -25326,13 +25497,6 @@ bool JOIN_TAB::save_explain_data(Explain_table_access *eta, eta->filtered= f; } - if ((tab_select && tab_select->quick && tab_type != JT_CONST) || - (key_info && ref.key_parts && tab_type != JT_FT)) - { - if (save_filter_explain_data(eta)) - return 1; - } - /* Build "Extra" field and save it */ key_read= table->file->keyread_enabled(); if ((tab_type == JT_NEXT || tab_type == JT_CONST) && |