diff options
author | Galina Shalygina <galina.shalygina@mariadb.com> | 2018-08-16 00:24:52 +0300 |
---|---|---|
committer | Galina Shalygina <galina.shalygina@mariadb.com> | 2018-09-28 23:50:22 +0300 |
commit | 8d5a11122c32f4d9eb87536886c6e893377bdd07 (patch) | |
tree | ab8cc222d336acd0006a544abb362affc149f671 | |
parent | befc09f00263d5375b2bb2ea0fac70b6cb0cb7fd (diff) | |
download | mariadb-git-8d5a11122c32f4d9eb87536886c6e893377bdd07.tar.gz |
MDEV-16188: Use in-memory PK filters built from range index scans
First phase: make optimizer choose to use filter and show it in EXPLAIN.
-rw-r--r-- | mysql-test/main/rowid_filter.test | 142 | ||||
-rw-r--r-- | sql/CMakeLists.txt | 1 | ||||
-rw-r--r-- | sql/handler.cc | 23 | ||||
-rw-r--r-- | sql/handler.h | 2 | ||||
-rw-r--r-- | sql/opt_range.cc | 3 | ||||
-rw-r--r-- | sql/rowid_filter.cc | 218 | ||||
-rw-r--r-- | sql/rowid_filter.h | 183 | ||||
-rw-r--r-- | sql/sql_class.cc | 6 | ||||
-rw-r--r-- | sql/sql_class.h | 1 | ||||
-rw-r--r-- | sql/sql_explain.cc | 32 | ||||
-rw-r--r-- | sql/sql_explain.h | 15 | ||||
-rw-r--r-- | sql/sql_priv.h | 5 | ||||
-rw-r--r-- | sql/sql_select.cc | 68 | ||||
-rw-r--r-- | sql/sql_select.h | 9 | ||||
-rw-r--r-- | sql/sys_vars.cc | 8 | ||||
-rw-r--r-- | sql/table.cc | 2 | ||||
-rw-r--r-- | sql/table.h | 22 |
17 files changed, 722 insertions, 18 deletions
diff --git a/mysql-test/main/rowid_filter.test b/mysql-test/main/rowid_filter.test new file mode 100644 index 00000000000..4f41c408d94 --- /dev/null +++ b/mysql-test/main/rowid_filter.test @@ -0,0 +1,142 @@ +--disable_warnings +DROP DATABASE IF EXISTS dbt3_s001; +--enable_warnings + +CREATE DATABASE dbt3_s001; + +use dbt3_s001; + +--disable_query_log +--disable_result_log +--disable_warnings +--source include/dbt3_s001.inc +--enable_warnings +--enable_result_log +--enable_query_log + +--echo # lineitem : {i_l_receiptdate, i_l_shipdate} -> i_l_receiptdate +EXPLAIN SELECT * +FROM orders JOIN lineitem ON o_orderkey=l_orderkey +WHERE l_shipdate BETWEEN '1997-01-01' AND '1997-02-01' AND + l_receiptdate BETWEEN '1997-01-10' AND '1997-01-25'; + +--echo # lineitem : {i_l_receiptdate, i_l_shipdate} -> i_l_receiptdate +--echo # orders : {i_o_orderdate} -> i_o_orderdate +EXPLAIN SELECT * +FROM orders JOIN lineitem ON o_orderkey=l_orderkey +WHERE l_shipdate BETWEEN '1997-01-01' AND '1997-02-01' AND + l_receiptdate BETWEEN '1997-01-10' AND '1997-01-25' AND + o_orderdate > '1997-01-15'; + +--echo # lineitem : {i_l_receiptdate, i_l_shipdate, +--echo # i_l_commitdate} -> i_l_receiptdate +EXPLAIN SELECT * +FROM orders JOIN lineitem ON o_orderkey=l_orderkey +WHERE l_shipdate BETWEEN '1997-01-01' AND '1997-02-01' AND + l_receiptdate BETWEEN '1997-01-10' AND '1997-01-25' AND + l_commitdate BETWEEN '1997-01-05' AND '1997-01-25'; + +--echo # lineitem : {i_l_receiptdate, i_l_shipdate, +--echo # i_l_commitdate} -> i_l_commitdate +EXPLAIN SELECT * +FROM orders JOIN lineitem ON o_orderkey=l_orderkey +WHERE l_shipdate BETWEEN '1997-01-01' AND '1997-02-01' AND + l_receiptdate BETWEEN '1997-01-01' AND '1997-01-25' AND + l_commitdate BETWEEN '1997-01-15' AND '1997-01-25'; + +CREATE INDEX i_l_extendedprice ON lineitem(l_extendedprice); + +--echo # lineitem : {i_l_receiptdate, i_l_shipdate, i_l_commitdate, +--echo # i_l_extendedprice} -> i_l_extendedprice +EXPLAIN SELECT * +FROM orders JOIN lineitem ON o_orderkey=l_orderkey +WHERE l_shipdate BETWEEN '1996-11-01' AND '1997-01-21' AND + l_receiptdate BETWEEN '1996-11-21' AND '1997-01-25' AND + l_commitdate BETWEEN '1996-11-25' AND '1997-01-20' AND + l_extendedprice BETWEEN 26000 AND 27000; + +--echo # lineitem : {i_l_shipdate, i_l_extendedprice} -> i_l_shipdate +EXPLAIN SELECT * +FROM orders JOIN lineitem ON o_orderkey=l_orderkey +WHERE l_shipdate BETWEEN '1997-01-11' AND '1997-01-21' AND + l_extendedprice BETWEEN 26000 AND 27000; + +--echo # lineitem : {i_l_shipdate, i_l_extendedprice} -> i_l_extendedprice +--echo # intersection point in the I quadrant +EXPLAIN SELECT * +FROM orders JOIN lineitem ON o_orderkey=l_orderkey +WHERE (l_shipdate BETWEEN '1997-01-11' AND '1997-01-26' OR + l_shipdate BETWEEN '1995-02-01' AND '1995-02-14' OR + l_shipdate BETWEEN '1994-12-12' AND '1994-12-28' + ) AND l_extendedprice BETWEEN 26000 AND 27000; + +--echo # lineitem : {i_l_shipdate, i_l_extendedprice} -> i_l_shipdate +--echo # parallel lines +EXPLAIN SELECT * +FROM orders JOIN lineitem ON o_orderkey=l_orderkey +WHERE (l_shipdate BETWEEN '1997-01-11' AND '1997-01-26' OR + l_shipdate BETWEEN '1995-02-01' AND '1995-02-16' OR + l_shipdate BETWEEN '1994-12-12' AND '1994-12-27' + ) AND l_extendedprice BETWEEN 26000 AND 27000; + + +CREATE INDEX i_l_discount ON lineitem(l_discount); +CREATE INDEX i_l_tax ON lineitem(l_tax); + +--echo # lineitem : {i_l_receiptdate, i_l_shipdate, i_l_commitdate, +--echo # i_l_extendedprice, i_l_discount, i_l_tax} +--echo # -> {i_l_extendedprice} +EXPLAIN SELECT * +FROM orders JOIN lineitem ON o_orderkey=l_orderkey +WHERE l_shipdate BETWEEN '1996-11-01' AND '1997-01-21' AND + l_receiptdate BETWEEN '1996-11-21' AND '1997-01-25' AND + l_commitdate BETWEEN '1996-11-25' AND '1997-01-20' AND + l_extendedprice BETWEEN 26000 AND 27000 AND + l_discount BETWEEN 0 AND 0.01 AND + l_tax BETWEEN 0.03 AND 0.04; + +DROP INDEX i_l_extendedprice on lineitem; +DROP INDEX i_l_discount on lineitem; +DROP INDEX i_l_tax on lineitem; + +SET max_rowid_filter_size= 1024; + +--echo # lineitem : {i_l_shipdate, i_l_receiptdate, i_l_commitdate} +--echo # -> i_l_shipdate +--echo # i_l_commitdate isn't in-memory -> isn't used +EXPLAIN SELECT * +FROM orders JOIN lineitem ON o_orderkey=l_orderkey +WHERE l_shipdate BETWEEN '1996-12-28' AND '1997-01-20' AND + l_receiptdate BETWEEN '1996-12-21' AND '1997-01-25' AND + l_commitdate BETWEEN '1996-12-01' AND '1997-01-25'; + +SET max_rowid_filter_size= DEFAULT; + +--echo # lineitem : {i_l_shipdate, i_l_commitdate} -> i_l_commitdate +EXPLAIN SELECT * +FROM orders JOIN lineitem ON o_orderkey=l_orderkey +WHERE l_shipdate BETWEEN '1993-01-01' AND '1997-01-30' AND + l_commitdate BETWEEN '1997-01-10' AND '1997-01-12'; + +--echo # lineitem : {i_l_shipdate, i_l_commitdate} -> i_l_commitdate +EXPLAIN SELECT * +FROM orders JOIN lineitem ON o_orderkey=l_orderkey +WHERE l_shipdate BETWEEN '1993-01-01' AND '1997-01-30' AND + l_commitdate BETWEEN '1993-01-10' AND '1997-01-12'; + +--echo # lineitem : {i_l_shipdate, i_l_commitdate, i_l_receiptdate} +--echo # -> i_l_receiptdate +EXPLAIN SELECT * +FROM orders JOIN lineitem ON o_orderkey=l_orderkey +WHERE l_shipdate BETWEEN '1993-01-01' AND '1997-01-30' AND + l_commitdate BETWEEN '1993-01-10' AND '1997-01-12' AND + l_receiptdate BETWEEN '1997-01-10' AND '1997-01-12'; + +--echo # lineitem : {i_l_shipdate, i_l_receiptdate} -> i_l_receiptdate +--echo # indexes with high selectivity +EXPLAIN SELECT * +FROM orders JOIN lineitem ON o_orderkey=l_orderkey +WHERE l_shipdate BETWEEN '1997-01-09' AND '1997-01-10' AND + l_receiptdate BETWEEN '1997-01-09' AND '1997-01-10'; + +DROP DATABASE dbt3_s001; diff --git a/sql/CMakeLists.txt b/sql/CMakeLists.txt index 0037ff23153..13a116a71d4 100644 --- a/sql/CMakeLists.txt +++ b/sql/CMakeLists.txt @@ -134,6 +134,7 @@ SET (SQL_SOURCE sql_sequence.cc sql_sequence.h ha_sequence.h sql_tvc.cc sql_tvc.h opt_split.cc + rowid_filter.cc rowid_filter.h ${WSREP_SOURCES} table_cache.cc encryption.cc temporary_tables.cc proxy_protocol.cc diff --git a/sql/handler.cc b/sql/handler.cc index b3481c7e429..d48a4660d52 100644 --- a/sql/handler.cc +++ b/sql/handler.cc @@ -2601,6 +2601,22 @@ LEX_CSTRING *handler::engine_name() } +/** + The method returns the cost of the random I/O accesses when + index is used. +*/ + +double handler::get_io_cost(uint index, ha_rows rows, uint *length) +{ + uint len= table->key_info[index].key_length + ref_length; + if (index == table->s->primary_key && table->file->primary_key_is_clustered()) + len= table->s->stored_rec_length; + double keys_per_block= (stats.block_size/2.0/len+1); + *length= len; + return (rows + keys_per_block-1)/ keys_per_block; +} + + double handler::keyread_time(uint index, uint ranges, ha_rows rows) { /* @@ -2612,11 +2628,8 @@ double handler::keyread_time(uint index, uint ranges, ha_rows rows) engines that support that (e.g. InnoDB) may want to overwrite this method. The model counts in the time to read index entries from cache. */ - size_t len= table->key_info[index].key_length + ref_length; - if (index == table->s->primary_key && table->file->primary_key_is_clustered()) - len= table->s->stored_rec_length; - double keys_per_block= (stats.block_size/2.0/len+1); - return (rows + keys_per_block-1)/ keys_per_block + + uint len; + return get_io_cost(index, rows, &len) + len*rows/(stats.block_size+1)/TIME_FOR_COMPARE ; } diff --git a/sql/handler.h b/sql/handler.h index ce8711bd5ab..bfc8bd774b3 100644 --- a/sql/handler.h +++ b/sql/handler.h @@ -3221,6 +3221,8 @@ public: */ virtual double keyread_time(uint index, uint ranges, ha_rows rows); + double get_io_cost(uint index, ha_rows rows, uint *length); + virtual const key_map *keys_to_use_for_scanning() { return &key_map_empty; } /* diff --git a/sql/opt_range.cc b/sql/opt_range.cc index 6f28ab59c15..9134ad5ab62 100644 --- a/sql/opt_range.cc +++ b/sql/opt_range.cc @@ -10515,6 +10515,7 @@ ha_rows check_quick_select(PARAM *param, uint idx, bool index_only, handler *file= param->table->file; ha_rows rows= HA_POS_ERROR; uint keynr= param->real_keynr[idx]; + uint length; DBUG_ENTER("check_quick_select"); /* Handle cases when we don't have a valid non-empty list of range */ @@ -10573,6 +10574,8 @@ ha_rows check_quick_select(PARAM *param, uint idx, bool index_only, MY_MIN(param->table->quick_condition_rows, rows); param->table->quick_rows[keynr]= rows; param->table->quick_costs[keynr]= cost->total_cost(); + param->table->quick_key_io[keynr]= + file->get_io_cost(keynr, param->table->quick_rows[keynr], &length); } } /* Figure out if the key scan is ROR (returns rows in ROWID order) or not */ diff --git a/sql/rowid_filter.cc b/sql/rowid_filter.cc new file mode 100644 index 00000000000..bcca9a0f528 --- /dev/null +++ b/sql/rowid_filter.cc @@ -0,0 +1,218 @@ +#include "rowid_filter.h" + + +/** + Sets information about filter with key_numb index. + It sets a cardinality of filter, calculates its selectivity + and gets slope and interscept values. +*/ + +void Range_filter_cost_info::init(TABLE *tab, uint key_numb) +{ + table= tab; + key_no= key_numb; + cardinality= table->quick_rows[key_no]; + b= filter_io_cost() + filter_write_cost() + filter_sort_cost(); + selectivity= cardinality/((double) table->stat_records()); + a= (1 + COST_COND_EVAL)*(1 - selectivity) - lookup_cost(); + intersect_x_axis_abcissa= b/a; +} + + +/** + @brief + Sort available filters by their building cost in the increasing order + + @details + The method starts sorting available filters from the first filter that + is not defined as the best filter. If there are two filters that are + defined as the best filters there is no need to sort other filters. + Best filters are already sorted by their building cost and have the + smallest bulding cost in comparison with other filters by definition. + + As the sorting method bubble sort is used. +*/ + +void TABLE::sort_range_filter_cost_info_array() +{ + if (best_filter_count == 2) + return; + + for (uint i= best_filter_count; i < range_filter_cost_info_elements-1; i++) + { + for (uint j= i+1; j < range_filter_cost_info_elements; j++) + { + if (range_filter_cost_info[i].intersect_x_axis_abcissa > + range_filter_cost_info[j].intersect_x_axis_abcissa) + swap_variables(Range_filter_cost_info, + range_filter_cost_info[i], + range_filter_cost_info[j]); + } + } +} + + +/** + @brief + The method searches for the filters that can reduce the join cost the most + + @details + The method looks through the available filters trying to choose the best + filter and eliminate as many filters as possible. + + Filters are considered as a linear functions. The best filter is the linear + function that intersects all other linear functions not in the I quadrant + and has the biggest a (slope) value. This filter will reduce the partial + join cost the most. If it is possible the second best filter is also + chosen. The second best filter can be used if the ref access is made on + the index of the first best filter. + + So there is no need to store all other filters except filters that + intersect in the I quadrant. It is impossible to say on this step which + filter is better and will give the biggest gain. + + The number of filters that can be used is stored in the + range_filter_cost_info_elements variable. +*/ + +void TABLE::prune_range_filters() +{ + key_map pruned_filter_map; + pruned_filter_map.clear_all(); + Range_filter_cost_info *max_slope_filters[2] = {0, 0}; + + for (uint i= 0; i < range_filter_cost_info_elements; i++) + { + Range_filter_cost_info *filter= &range_filter_cost_info[i]; + if (filter->a < 0) + { + range_filter_cost_info_elements--; + swap_variables(Range_filter_cost_info, range_filter_cost_info[i], + range_filter_cost_info[range_filter_cost_info_elements]); + continue; + } + for (uint j= i+1; j < range_filter_cost_info_elements; j++) + { + Range_filter_cost_info *cand_filter= &range_filter_cost_info[j]; + + double intersect_x= filter->get_intersect_x(cand_filter); + double intersect_y= filter->get_intersect_y(intersect_x); + + if (intersect_x > 0 && intersect_y > 0) + { + pruned_filter_map.set_bit(cand_filter->key_no); + pruned_filter_map.set_bit(filter->key_no); + } + } + if (!pruned_filter_map.is_set(filter->key_no)) + { + if (!max_slope_filters[0]) + max_slope_filters[0]= filter; + else + { + if (!max_slope_filters[1] || + max_slope_filters[1]->a < filter->a) + max_slope_filters[1]= filter; + if (max_slope_filters[0]->a < max_slope_filters[1]->a) + swap_variables(Range_filter_cost_info*, max_slope_filters[0], + max_slope_filters[1]); + } + } + } + + for (uint i= 0; i<2; i++) + { + if (max_slope_filters[i]) + { + swap_variables(Range_filter_cost_info, + range_filter_cost_info[i], + *max_slope_filters[i]); + if (i == 0 && + max_slope_filters[1] == &range_filter_cost_info[0]) + max_slope_filters[1]= max_slope_filters[0]; + + best_filter_count++; + max_slope_filters[i]= &range_filter_cost_info[i]; + } + } + sort_range_filter_cost_info_array(); +} + + +void TABLE::select_usable_range_filters(THD *thd) +{ + uint key_no; + key_map usable_range_filter_keys; + usable_range_filter_keys.clear_all(); + key_map::Iterator it(quick_keys); + while ((key_no= it++) != key_map::Iterator::BITMAP_END) + { + if (quick_rows[key_no] > + thd->variables.max_rowid_filter_size/file->ref_length) + continue; + usable_range_filter_keys.set_bit(key_no); + } + + if (usable_range_filter_keys.is_clear_all()) + return; + + range_filter_cost_info_elements= usable_range_filter_keys.bits_set(); + range_filter_cost_info= + new (thd->mem_root) Range_filter_cost_info [range_filter_cost_info_elements]; + Range_filter_cost_info *curr_filter_cost_info= range_filter_cost_info; + + key_map::Iterator li(usable_range_filter_keys); + while ((key_no= li++) != key_map::Iterator::BITMAP_END) + { + curr_filter_cost_info->init(this, key_no); + curr_filter_cost_info++; + } + prune_range_filters(); +} + + +Range_filter_cost_info +*TABLE::best_filter_for_current_join_order(uint ref_key_no, + double record_count, + double records) +{ + if (!this || range_filter_cost_info_elements == 0) + return 0; + + double card= record_count*records; + Range_filter_cost_info *best_filter= &range_filter_cost_info[0]; + + if (card < best_filter->intersect_x_axis_abcissa) + return 0; + if (best_filter_count != 0) + { + if (best_filter->key_no == ref_key_no) + { + if (best_filter_count == 2) + { + best_filter= &range_filter_cost_info[1]; + if (card < best_filter->intersect_x_axis_abcissa) + return 0; + return best_filter; + } + } + else + return best_filter; + } + + double best_filter_improvement= 0.0; + best_filter= 0; + + for (uint i= best_filter_count; i < range_filter_cost_info_elements; i++) + { + Range_filter_cost_info *filter= &range_filter_cost_info[i]; + if (card < filter->intersect_x_axis_abcissa) + break; + if (best_filter_improvement < filter->get_filter_gain(card)) + { + best_filter_improvement= filter->get_filter_gain(card); + best_filter= filter; + } + } + return best_filter; +} diff --git a/sql/rowid_filter.h b/sql/rowid_filter.h new file mode 100644 index 00000000000..0d8520f25c5 --- /dev/null +++ b/sql/rowid_filter.h @@ -0,0 +1,183 @@ +#ifndef ROWID_FILTER_INCLUDED +#define ROWID_FILTER_INCLUDED + +/** + It makes sense to apply filters for a certain join order when the following + inequality holds: + + #T + c4*#T > #T*sel(Fi) + c4*#T*sel(Fi) + + I/O(Fi) + c1*#(Fi) + c2*#(Fi)*log(#(Fi)) + + c3*#T (1), + + where #T - the fanout of the partial join + Fi - a filter for the index with the number i in + the key_map of available indexes for this table + sel(Fi) - the selectivity of the index with the number + i + c4*#T, + c4*#T*sel(Fi) - a cost to apply available predicates + c4 - a constant to apply available predicates + I/O(Fi) - a cost of the I/O accesses to Fi + #(Fi) - a number of estimated records that range + access would use + c1*#(Fi) - a cost to write in Fi + c1 - a constant to write one element in Fi + c2*#(Fi)*log(#(Fi)) - a cost to sort in Fi + c2 - a sorting constant + c3*(#T) - a cost to look-up into a current partial join + c3 - a constant to look-up into Fi + + Let's set a new variable FBCi (filter building cost for the filter with + index i): + + FBCi = I/O(Fi) + c1*#(Fi) + c2*#(Fi)*log(#(Fi)) + + It can be seen that FBCi doesn't depend on #T. + + So using this variable (1) can be rewritten: + + #T + c4*#T > #T*sel(Fi) + c4*#T*sel(Fi) + + FBCi + + c3*#T + + To get a possible cost improvement when a filter is used right part + of the (1) inequality should be deducted from the left part. + Denote it as G(#T): + + G(#T)= #T + c4*#T - (#T*sel(Fi) + c4*#T*sel(Fi) + FBCi + c3*#T) (2) + + On the prepare stage when filters are created #T value isn't known. + + To find out what filter is the best among available one for the table + (what filter gives the biggest gain) a knowledge about linear functions + can be used. Consider filter gain as a linear function: + + Gi(#T)= ai*#T + bi (3) + + where ai= 1+c4-c3-sel(Fi)*(1+c4), + bi= -FBCi + + Filter gain can be interpreted as an ordinate, #T as abscissa. + + So the aim is to find the linear function that has the biggest ordinate value + for each positive abscissa (because #T can't be negative) comparing with + the other available functions. + + Consider two filters Fi, Fj or linear functions with a positive slope. + To find out which linear function is better let's find their intersection + point coordinates. + + Gi(#T0)= Gj(#T0) (using (2))=> + #T0= (bj - bi)/(ai - aj) (using (3)) + => + #T0= (BCFj-BCFi)/((sel(Fj)-sel(Fi))*(1+c4)) + + If put #T0 value into the (3) formula G(#T0) can be easily found. + + It can be seen that if two linear functions intersect in II, III or IV + quadrants the linear function with a bigger slope value will always + be better. + + If two functions intersect in the I quadrant for #T1 < #T0 a function + with a smaller slope value will give a better gain and when #T1 > #T0 + function with a bigger slope will give better gain. + + for each #T1 > #T0 if (ai > aj) => (Gi(#T1) >= Gj(#T1)) + #T1 <= #T0 if (ai > aj) => (Gi(#T1) <= Gj(#T1)) + + So both linear functions should be saved. + + Interesting cases: + + 1. For Fi,Fj filters ai=aj. + + In this case intercepts bi and bj should be compared. + The filter with the biggest intercept will give a better result. + + 2. Only one filter remains after the calculations and for some join order + it is equal to the index that is used to access table. Therefore, this + filter can't be used. + + In this case the gain is computed for every filter that can be constructed + for this table. + + After information about filters is computed for each partial join order + it is checked if the filter can be applied to the current table. + If it gives a cost improvement it is saved as the best plan for this + partial join. +*/ + +#include "mariadb.h" +#include "sql_class.h" +#include "table.h" + +/* Cost to write into filter */ +#define COST_WRITE 0.01 +/* Weight factor for filter sorting */ +#define CNST_SORT 0.01 +/* Cost to evaluate condition */ +#define COST_COND_EVAL 0.2 + +class QUICK_RANGE_SELECT; + +class Range_filter_cost_info : public Sql_alloc +{ +public: + TABLE *table; + uint key_no; + double cardinality; + double b; // intercept of the linear function + double a; // slope of the linear function + double selectivity; + double intersect_x_axis_abcissa; + + /** + Filter cost functions + */ + /* Cost to lookup into filter */ + inline double lookup_cost() + { + return log(cardinality)*0.01; + } + + /* IO accesses cost to access filter */ + inline double filter_io_cost() + { return table->quick_key_io[key_no]; } + + /* Cost to write elements in filter */ + inline double filter_write_cost() + { return COST_WRITE*cardinality; } + + /* Cost to sort elements in filter */ + inline double filter_sort_cost() + { + return CNST_SORT*cardinality*log(cardinality); + } + /* End of filter cost functions */ + + Range_filter_cost_info() : table(0), key_no(0) {} + + void init(TABLE *tab, uint key_numb); + + inline double get_intersect_x(Range_filter_cost_info *filter) + { + if (a == filter->a) + return DBL_MAX; + return (b - filter->b)/(a - filter->a); + } + inline double get_intersect_y(double intersect_x) + { + if (intersect_x == DBL_MAX) + return DBL_MAX; + return intersect_x*a - b; + } + + /** + Get a gain that a usage of filter in some partial join order + with the cardinaly card gives + */ + inline double get_filter_gain(double card) + { return card*a - b; } +}; + +#endif /* ROWID_FILTER_INCLUDED */ diff --git a/sql/sql_class.cc b/sql/sql_class.cc index 0bbfdba88c7..d6b2cd6fd76 100644 --- a/sql/sql_class.cc +++ b/sql/sql_class.cc @@ -2668,12 +2668,12 @@ void THD::make_explain_field_list(List<Item> &field_list, uint8 explain_flags, mem_root); item->maybe_null=1; field_list.push_back(item=new (mem_root) - Item_empty_string(this, "ref", + Item_empty_string(this, "ref|filter", NAME_CHAR_LEN*MAX_REF_PARTS, cs), mem_root); item->maybe_null=1; - field_list.push_back(item= new (mem_root) - Item_return_int(this, "rows", 10, MYSQL_TYPE_LONGLONG), + field_list.push_back(item=new (mem_root) + Item_empty_string(this, "rows", NAME_CHAR_LEN, cs), mem_root); if (is_analyze) { diff --git a/sql/sql_class.h b/sql/sql_class.h index 25e2d736cdc..aaa54447c0b 100644 --- a/sql/sql_class.h +++ b/sql/sql_class.h @@ -727,6 +727,7 @@ typedef struct system_variables uint column_compression_threshold; uint column_compression_zlib_level; uint in_subquery_conversion_threshold; + ulonglong max_rowid_filter_size; vers_asof_timestamp_t vers_asof_timestamp; ulong vers_alter_history; diff --git a/sql/sql_explain.cc b/sql/sql_explain.cc index 1c45b05ccc5..23bc1e906a0 100644 --- a/sql/sql_explain.cc +++ b/sql/sql_explain.cc @@ -1146,6 +1146,15 @@ void Explain_table_access::fill_key_len_str(String *key_len_str) const length= longlong10_to_str(hash_next_key.get_key_len(), buf, 10) - buf; key_len_str->append(buf, length); } + + if (key.get_filter_key_length() != (uint)-1) + { + char buf[64]; + size_t length; + key_len_str->append(','); + length= longlong10_to_str(key.get_filter_key_length(), buf, 10) - buf; + key_len_str->append(buf, length); + } } @@ -1274,12 +1283,20 @@ int Explain_table_access::print_explain(select_result_sink *output, uint8 explai push_string_list(thd, &item_list, ref_list, &ref_list_buf); /* `rows` */ + StringBuffer<64> rows_str; if (rows_set) { + rows_str.append_ulonglong((ulonglong)rows); + + if (is_filter_set()) + { + rows_str.append(" ("); + rows_str.append_ulonglong(filter_perc); + rows_str.append("%)"); + } item_list.push_back(new (mem_root) - Item_int(thd, (longlong) (ulonglong) rows, - MY_INT64_NUM_DECIMAL_DIGITS), - mem_root); + Item_string_sys(thd, rows_str.ptr(), + rows_str.length()), mem_root); } else item_list.push_back(item_null, mem_root); @@ -1359,6 +1376,15 @@ int Explain_table_access::print_explain(select_result_sink *output, uint8 explai extra_buf.append(STRING_WITH_LEN("Using filesort")); } + if (is_filter_set()) + { + if (first) + first= false; + else + extra_buf.append(STRING_WITH_LEN("; ")); + extra_buf.append(STRING_WITH_LEN("Using filter")); + } + item_list.push_back(new (mem_root) Item_string_sys(thd, extra_buf.ptr(), extra_buf.length()), diff --git a/sql/sql_explain.h b/sql/sql_explain.h index 38250cc40ce..71f90477977 100644 --- a/sql/sql_explain.h +++ b/sql/sql_explain.h @@ -583,6 +583,7 @@ class Explain_index_use : public Sql_alloc { char *key_name; uint key_len; + uint key_len_for_filter; public: String_list key_parts_list; @@ -595,12 +596,16 @@ public: { key_name= NULL; key_len= (uint)-1; + key_len_for_filter= (uint)-1; } bool set(MEM_ROOT *root, KEY *key_name, uint key_len_arg); bool set_pseudo_key(MEM_ROOT *root, const char *key_name); + void set_filter_key_length(uint key_length_arg) + { key_len_for_filter= key_length_arg; } inline const char *get_key_name() const { return key_name; } inline uint get_key_len() const { return key_len; } + inline uint get_filter_key_length() const { return key_len_for_filter; } }; @@ -689,7 +694,8 @@ public: cache_cond(NULL), pushed_index_cond(NULL), sjm_nest(NULL), - pre_join_sort(NULL) + pre_join_sort(NULL), + filter_perc(UINT_MAX) {} ~Explain_table_access() { delete sjm_nest; } @@ -796,6 +802,13 @@ public: Exec_time_tracker op_tracker; Table_access_tracker jbuf_tracker; + /** + How many rows are left after the filter was applied + to the initial rows count in percentages. + */ + double filter_perc; + inline bool is_filter_set() const { return (filter_perc != UINT_MAX); } + int print_explain(select_result_sink *output, uint8 explain_flags, bool is_analyze, uint select_id, const char *select_type, diff --git a/sql/sql_priv.h b/sql/sql_priv.h index fa12b645041..8a439498df1 100644 --- a/sql/sql_priv.h +++ b/sql/sql_priv.h @@ -233,6 +233,8 @@ #define OPTIMIZER_SWITCH_COND_PUSHDOWN_FOR_DERIVED (1ULL << 30) #define OPTIMIZER_SWITCH_SPLIT_MATERIALIZED (1ULL << 31) #define OPTIMIZER_SWITCH_COND_PUSHDOWN_FOR_SUBQUERY (1ULL << 32) +#define OPTIMIZER_SWITCH_USE_ROWID_FILTER (1ULL << 33) + #define OPTIMIZER_SWITCH_DEFAULT (OPTIMIZER_SWITCH_INDEX_MERGE | \ OPTIMIZER_SWITCH_INDEX_MERGE_UNION | \ @@ -260,7 +262,8 @@ OPTIMIZER_SWITCH_ORDERBY_EQ_PROP | \ OPTIMIZER_SWITCH_COND_PUSHDOWN_FOR_DERIVED | \ OPTIMIZER_SWITCH_SPLIT_MATERIALIZED | \ - OPTIMIZER_SWITCH_COND_PUSHDOWN_FOR_SUBQUERY) + OPTIMIZER_SWITCH_COND_PUSHDOWN_FOR_SUBQUERY |\ + OPTIMIZER_SWITCH_USE_ROWID_FILTER) /* Replication uses 8 bytes to store SQL_MODE in the binary log. The day you diff --git a/sql/sql_select.cc b/sql/sql_select.cc index bfd1c7580fc..616fea0f401 100644 --- a/sql/sql_select.cc +++ b/sql/sql_select.cc @@ -64,6 +64,7 @@ #include "sys_vars_shared.h" #include "sp_head.h" #include "sp_rcontext.h" +#include "rowid_filter.h" /* A key part number that means we're using a fulltext scan. @@ -4950,6 +4951,10 @@ make_join_statistics(JOIN *join, List<TABLE_LIST> &tables_list, s->needed_reg=select->needed_reg; select->quick=0; impossible_range= records == 0 && s->table->reginfo.impossible_range; + if (join->thd->lex->sql_command == SQLCOM_SELECT && + join->table_count > 1 && + optimizer_flag(join->thd, OPTIMIZER_SWITCH_USE_ROWID_FILTER)) + s->table->select_usable_range_filters(join->thd); } if (!impossible_range) { @@ -6744,12 +6749,14 @@ best_access_path(JOIN *join, double best_time= DBL_MAX; double records= DBL_MAX; table_map best_ref_depends_map= 0; + Range_filter_cost_info *best_filter= 0; double tmp; ha_rows rec; bool best_uses_jbuf= FALSE; MY_BITMAP *eq_join_set= &s->table->eq_join_set; KEYUSE *hj_start_key= 0; SplM_plan_info *spl_plan= 0; + Range_filter_cost_info *filter= 0; disable_jbuf= disable_jbuf || idx == join->const_tables; @@ -6781,6 +6788,7 @@ best_access_path(JOIN *join, key_part_map found_part= 0; table_map found_ref= 0; uint key= keyuse->key; + filter= 0; bool ft_key= (keyuse->keypart == FT_KEYPART); /* Bitmap of keyparts where the ref access is over 'keypart=const': */ key_part_map const_part= 0; @@ -7141,6 +7149,12 @@ best_access_path(JOIN *join, loose_scan_opt.check_ref_access_part2(key, start_key, records, tmp); } /* not ft_key */ + filter= table->best_filter_for_current_join_order(start_key->key, + records, + record_count); + if (filter && (filter->get_filter_gain(record_count*records) < tmp)) + tmp= tmp - filter->get_filter_gain(record_count*records); + if (tmp + 0.0001 < best_time - records/(double) TIME_FOR_COMPARE) { best_time= tmp + records/(double) TIME_FOR_COMPARE; @@ -7149,6 +7163,7 @@ best_access_path(JOIN *join, best_key= start_key; best_max_key_part= max_key_part; best_ref_depends_map= found_ref; + best_filter= filter; } } /* for each key */ records= best_records; @@ -7190,6 +7205,7 @@ best_access_path(JOIN *join, best_key= hj_start_key; best_ref_depends_map= 0; best_uses_jbuf= TRUE; + best_filter= 0; } /* @@ -7294,8 +7310,15 @@ best_access_path(JOIN *join, tmp+= (s->records - rnd_records)/(double) TIME_FOR_COMPARE; } } - + double best_records= rnd_records; tmp += s->startup_cost; + + filter= s->table->best_filter_for_current_join_order(MAX_KEY, + rnd_records, + record_count); + if (filter && (filter->get_filter_gain(record_count*rnd_records) < tmp)) + tmp= tmp - filter->get_filter_gain(record_count*rnd_records); + /* We estimate the cost of evaluating WHERE clause for found records as record_count * rnd_records / TIME_FOR_COMPARE. This cost plus @@ -7311,8 +7334,9 @@ best_access_path(JOIN *join, will ensure that this will be used */ best= tmp; - records= rnd_records; + records= best_records; best_key= 0; + best_filter= filter; /* range/index_merge/ALL/index access method are "independent", so: */ best_ref_depends_map= 0; best_uses_jbuf= MY_TEST(!disable_jbuf && !((s->table->map & @@ -7329,6 +7353,7 @@ best_access_path(JOIN *join, pos->loosescan_picker.loosescan_key= MAX_KEY; pos->use_join_buffer= best_uses_jbuf; pos->spl_plan= spl_plan; + pos->filter= best_filter; loose_scan_opt.save_to_position(s, loose_scan_pos); @@ -9429,6 +9454,7 @@ bool JOIN::inject_cond_into_where(Item *injected_cond) static Item * const null_ptr= NULL; + /* Set up join struct according to the picked join order in @@ -9588,6 +9614,8 @@ bool JOIN::get_best_combination() is_hash_join_key_no(j->ref.key)) hash_join= TRUE; + j->filter= best_positions[tablenr].filter; + loop_end: /* Save records_read in JOIN_TAB so that select_describe()/etc don't have @@ -12448,7 +12476,9 @@ ha_rows JOIN_TAB::get_examined_rows() double examined_rows; SQL_SELECT *sel= filesort? filesort->select : this->select; - if (sel && sel->quick && use_quick != 2) + if (filter) + examined_rows= records_read; + else if (sel && sel->quick && use_quick != 2) examined_rows= (double)sel->quick->records; else if (type == JT_NEXT || type == JT_ALL || type == JT_HASH || type ==JT_HASH_NEXT) @@ -24883,6 +24913,31 @@ int append_possible_keys(MEM_ROOT *alloc, String_list &list, TABLE *table, } +/** + This method saves the data that should be printed in EXPLAIN + if any filter was used for this table. +*/ + +bool JOIN_TAB::save_filter_explain_data(Explain_table_access *eta) +{ + if (!filter) + return 0; + KEY *pk_key= get_keyinfo_by_key_no(filter->key_no); + StringBuffer<64> buff_for_pk; + const char *tmp_buff; + buff_for_pk.append("filter:"); + tmp_buff= pk_key->name.str; + buff_for_pk.append(tmp_buff, strlen(tmp_buff), system_charset_info); + if (!(eta->ref_list.append_str(join->thd->mem_root, + buff_for_pk.c_ptr_safe()))) + return 1; + eta->key.set_filter_key_length(pk_key->key_length); + (filter->selectivity*100 >= 1) ? eta->filter_perc= round(filter->selectivity*100) : + eta->filter_perc= 1; + return 0; +} + + bool JOIN_TAB::save_explain_data(Explain_table_access *eta, table_map prefix_tables, bool distinct_arg, JOIN_TAB *first_top_tab) @@ -25138,6 +25193,13 @@ bool JOIN_TAB::save_explain_data(Explain_table_access *eta, eta->filtered= f; } + if ((tab_select && tab_select->quick && tab_type != JT_CONST) || + (key_info && ref.key_parts && tab_type != JT_FT)) + { + if (save_filter_explain_data(eta)) + return 1; + } + /* Build "Extra" field and save it */ key_read= table->file->keyread_enabled(); if ((tab_type == JT_NEXT || tab_type == JT_CONST) && diff --git a/sql/sql_select.h b/sql/sql_select.h index 4140a0293f8..de3dff46189 100644 --- a/sql/sql_select.h +++ b/sql/sql_select.h @@ -506,6 +506,9 @@ typedef struct st_join_table { uint n_sj_tables; bool preread_init_done; + /* Copy of POSITION::filter, set by get_best_combination() */ + Range_filter_cost_info *filter; + Dynamic_array<char*> rowid_filter_pk; void cleanup(); inline bool is_using_loose_index_scan() @@ -656,6 +659,7 @@ typedef struct st_join_table { SplM_plan_info *choose_best_splitting(double record_count, table_map remaining_tables); bool fix_splitting(SplM_plan_info *spl_plan, table_map remaining_tables); + bool save_filter_explain_data(Explain_table_access *eta); } JOIN_TAB; @@ -881,6 +885,9 @@ public: }; +class Range_filter_cost_info; + + /** Information about a position of table within a join order. Used in join optimization. @@ -963,6 +970,8 @@ typedef struct st_position /* Info on splitting plan used at this position */ SplM_plan_info *spl_plan; + /* The index for which filter can be built */ + Range_filter_cost_info *filter; } POSITION; typedef Bounds_checked_array<Item_null_result*> Item_null_array; diff --git a/sql/sys_vars.cc b/sql/sys_vars.cc index 75a962939ae..ba9b2c814ce 100644 --- a/sql/sys_vars.cc +++ b/sql/sys_vars.cc @@ -2493,6 +2493,7 @@ export const char *optimizer_switch_names[]= "condition_pushdown_for_derived", "split_materialized", "condition_pushdown_for_subquery", + "rowid_filter", "default", NullS }; @@ -6113,3 +6114,10 @@ static Sys_var_enum Sys_secure_timestamp( "historical behavior, anyone can modify session timestamp", READ_ONLY GLOBAL_VAR(opt_secure_timestamp), CMD_LINE(REQUIRED_ARG), secure_timestamp_levels, DEFAULT(SECTIME_NO)); + +static Sys_var_ulonglong Sys_max_rowid_filter_size( + "max_rowid_filter_size", + "The maximum number of rows that fit in memory", + SESSION_VAR(max_rowid_filter_size), CMD_LINE(REQUIRED_ARG), + VALID_RANGE(1024, (ulonglong)~(intptr)0), DEFAULT(128*1024), + BLOCK_SIZE(1)); diff --git a/sql/table.cc b/sql/table.cc index e98836cd93c..2031d27b549 100644 --- a/sql/table.cc +++ b/sql/table.cc @@ -4633,6 +4633,8 @@ void TABLE::init(THD *thd, TABLE_LIST *tl) created= TRUE; cond_selectivity= 1.0; cond_selectivity_sampling_explain= NULL; + best_filter_count= 0; + range_filter_cost_info_elements= 0; #ifdef HAVE_REPLICATION /* used in RBR Triggers */ master_had_triggers= 0; diff --git a/sql/table.h b/sql/table.h index b75fa9074a4..14bc928bcb5 100644 --- a/sql/table.h +++ b/sql/table.h @@ -55,6 +55,7 @@ class Virtual_column_info; class Table_triggers_list; class TMP_TABLE_PARAM; class SEQUENCE; +class Range_filter_cost_info; /* Used to identify NESTED_JOIN structures within a join (applicable only to @@ -1199,8 +1200,10 @@ public: */ key_part_map const_key_parts[MAX_KEY]; - uint quick_key_parts[MAX_KEY]; - uint quick_n_ranges[MAX_KEY]; + uint quick_key_parts[MAX_KEY]; + uint quick_n_ranges[MAX_KEY]; + /* For each key I/O access cost is stored */ + double quick_key_io[MAX_KEY]; /* Estimate of number of records that satisfy SARGable part of the table @@ -1491,6 +1494,21 @@ public: void deny_splitting(); void add_splitting_info_for_key_field(struct KEY_FIELD *key_field); + + /** + Range filter info + */ + /* Minimum possible #T value to apply filter*/ + uint best_filter_count; + uint range_filter_cost_info_elements; + Range_filter_cost_info *range_filter_cost_info; + Range_filter_cost_info + *best_filter_for_current_join_order(uint ref_key_no, + double record_count, + double records); + void sort_range_filter_cost_info_array(); + void prune_range_filters(); + void select_usable_range_filters(THD *thd); /** System Versioning support */ |