summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGalina Shalygina <galina.shalygina@mariadb.com>2018-08-16 00:24:52 +0300
committerGalina Shalygina <galina.shalygina@mariadb.com>2018-09-28 23:50:22 +0300
commit8d5a11122c32f4d9eb87536886c6e893377bdd07 (patch)
treeab8cc222d336acd0006a544abb362affc149f671
parentbefc09f00263d5375b2bb2ea0fac70b6cb0cb7fd (diff)
downloadmariadb-git-8d5a11122c32f4d9eb87536886c6e893377bdd07.tar.gz
MDEV-16188: Use in-memory PK filters built from range index scans
First phase: make optimizer choose to use filter and show it in EXPLAIN.
-rw-r--r--mysql-test/main/rowid_filter.test142
-rw-r--r--sql/CMakeLists.txt1
-rw-r--r--sql/handler.cc23
-rw-r--r--sql/handler.h2
-rw-r--r--sql/opt_range.cc3
-rw-r--r--sql/rowid_filter.cc218
-rw-r--r--sql/rowid_filter.h183
-rw-r--r--sql/sql_class.cc6
-rw-r--r--sql/sql_class.h1
-rw-r--r--sql/sql_explain.cc32
-rw-r--r--sql/sql_explain.h15
-rw-r--r--sql/sql_priv.h5
-rw-r--r--sql/sql_select.cc68
-rw-r--r--sql/sql_select.h9
-rw-r--r--sql/sys_vars.cc8
-rw-r--r--sql/table.cc2
-rw-r--r--sql/table.h22
17 files changed, 722 insertions, 18 deletions
diff --git a/mysql-test/main/rowid_filter.test b/mysql-test/main/rowid_filter.test
new file mode 100644
index 00000000000..4f41c408d94
--- /dev/null
+++ b/mysql-test/main/rowid_filter.test
@@ -0,0 +1,142 @@
+--disable_warnings
+DROP DATABASE IF EXISTS dbt3_s001;
+--enable_warnings
+
+CREATE DATABASE dbt3_s001;
+
+use dbt3_s001;
+
+--disable_query_log
+--disable_result_log
+--disable_warnings
+--source include/dbt3_s001.inc
+--enable_warnings
+--enable_result_log
+--enable_query_log
+
+--echo # lineitem : {i_l_receiptdate, i_l_shipdate} -> i_l_receiptdate
+EXPLAIN SELECT *
+FROM orders JOIN lineitem ON o_orderkey=l_orderkey
+WHERE l_shipdate BETWEEN '1997-01-01' AND '1997-02-01' AND
+ l_receiptdate BETWEEN '1997-01-10' AND '1997-01-25';
+
+--echo # lineitem : {i_l_receiptdate, i_l_shipdate} -> i_l_receiptdate
+--echo # orders : {i_o_orderdate} -> i_o_orderdate
+EXPLAIN SELECT *
+FROM orders JOIN lineitem ON o_orderkey=l_orderkey
+WHERE l_shipdate BETWEEN '1997-01-01' AND '1997-02-01' AND
+ l_receiptdate BETWEEN '1997-01-10' AND '1997-01-25' AND
+ o_orderdate > '1997-01-15';
+
+--echo # lineitem : {i_l_receiptdate, i_l_shipdate,
+--echo # i_l_commitdate} -> i_l_receiptdate
+EXPLAIN SELECT *
+FROM orders JOIN lineitem ON o_orderkey=l_orderkey
+WHERE l_shipdate BETWEEN '1997-01-01' AND '1997-02-01' AND
+ l_receiptdate BETWEEN '1997-01-10' AND '1997-01-25' AND
+ l_commitdate BETWEEN '1997-01-05' AND '1997-01-25';
+
+--echo # lineitem : {i_l_receiptdate, i_l_shipdate,
+--echo # i_l_commitdate} -> i_l_commitdate
+EXPLAIN SELECT *
+FROM orders JOIN lineitem ON o_orderkey=l_orderkey
+WHERE l_shipdate BETWEEN '1997-01-01' AND '1997-02-01' AND
+ l_receiptdate BETWEEN '1997-01-01' AND '1997-01-25' AND
+ l_commitdate BETWEEN '1997-01-15' AND '1997-01-25';
+
+CREATE INDEX i_l_extendedprice ON lineitem(l_extendedprice);
+
+--echo # lineitem : {i_l_receiptdate, i_l_shipdate, i_l_commitdate,
+--echo # i_l_extendedprice} -> i_l_extendedprice
+EXPLAIN SELECT *
+FROM orders JOIN lineitem ON o_orderkey=l_orderkey
+WHERE l_shipdate BETWEEN '1996-11-01' AND '1997-01-21' AND
+ l_receiptdate BETWEEN '1996-11-21' AND '1997-01-25' AND
+ l_commitdate BETWEEN '1996-11-25' AND '1997-01-20' AND
+ l_extendedprice BETWEEN 26000 AND 27000;
+
+--echo # lineitem : {i_l_shipdate, i_l_extendedprice} -> i_l_shipdate
+EXPLAIN SELECT *
+FROM orders JOIN lineitem ON o_orderkey=l_orderkey
+WHERE l_shipdate BETWEEN '1997-01-11' AND '1997-01-21' AND
+ l_extendedprice BETWEEN 26000 AND 27000;
+
+--echo # lineitem : {i_l_shipdate, i_l_extendedprice} -> i_l_extendedprice
+--echo # intersection point in the I quadrant
+EXPLAIN SELECT *
+FROM orders JOIN lineitem ON o_orderkey=l_orderkey
+WHERE (l_shipdate BETWEEN '1997-01-11' AND '1997-01-26' OR
+ l_shipdate BETWEEN '1995-02-01' AND '1995-02-14' OR
+ l_shipdate BETWEEN '1994-12-12' AND '1994-12-28'
+ ) AND l_extendedprice BETWEEN 26000 AND 27000;
+
+--echo # lineitem : {i_l_shipdate, i_l_extendedprice} -> i_l_shipdate
+--echo # parallel lines
+EXPLAIN SELECT *
+FROM orders JOIN lineitem ON o_orderkey=l_orderkey
+WHERE (l_shipdate BETWEEN '1997-01-11' AND '1997-01-26' OR
+ l_shipdate BETWEEN '1995-02-01' AND '1995-02-16' OR
+ l_shipdate BETWEEN '1994-12-12' AND '1994-12-27'
+ ) AND l_extendedprice BETWEEN 26000 AND 27000;
+
+
+CREATE INDEX i_l_discount ON lineitem(l_discount);
+CREATE INDEX i_l_tax ON lineitem(l_tax);
+
+--echo # lineitem : {i_l_receiptdate, i_l_shipdate, i_l_commitdate,
+--echo # i_l_extendedprice, i_l_discount, i_l_tax}
+--echo # -> {i_l_extendedprice}
+EXPLAIN SELECT *
+FROM orders JOIN lineitem ON o_orderkey=l_orderkey
+WHERE l_shipdate BETWEEN '1996-11-01' AND '1997-01-21' AND
+ l_receiptdate BETWEEN '1996-11-21' AND '1997-01-25' AND
+ l_commitdate BETWEEN '1996-11-25' AND '1997-01-20' AND
+ l_extendedprice BETWEEN 26000 AND 27000 AND
+ l_discount BETWEEN 0 AND 0.01 AND
+ l_tax BETWEEN 0.03 AND 0.04;
+
+DROP INDEX i_l_extendedprice on lineitem;
+DROP INDEX i_l_discount on lineitem;
+DROP INDEX i_l_tax on lineitem;
+
+SET max_rowid_filter_size= 1024;
+
+--echo # lineitem : {i_l_shipdate, i_l_receiptdate, i_l_commitdate}
+--echo # -> i_l_shipdate
+--echo # i_l_commitdate isn't in-memory -> isn't used
+EXPLAIN SELECT *
+FROM orders JOIN lineitem ON o_orderkey=l_orderkey
+WHERE l_shipdate BETWEEN '1996-12-28' AND '1997-01-20' AND
+ l_receiptdate BETWEEN '1996-12-21' AND '1997-01-25' AND
+ l_commitdate BETWEEN '1996-12-01' AND '1997-01-25';
+
+SET max_rowid_filter_size= DEFAULT;
+
+--echo # lineitem : {i_l_shipdate, i_l_commitdate} -> i_l_commitdate
+EXPLAIN SELECT *
+FROM orders JOIN lineitem ON o_orderkey=l_orderkey
+WHERE l_shipdate BETWEEN '1993-01-01' AND '1997-01-30' AND
+ l_commitdate BETWEEN '1997-01-10' AND '1997-01-12';
+
+--echo # lineitem : {i_l_shipdate, i_l_commitdate} -> i_l_commitdate
+EXPLAIN SELECT *
+FROM orders JOIN lineitem ON o_orderkey=l_orderkey
+WHERE l_shipdate BETWEEN '1993-01-01' AND '1997-01-30' AND
+ l_commitdate BETWEEN '1993-01-10' AND '1997-01-12';
+
+--echo # lineitem : {i_l_shipdate, i_l_commitdate, i_l_receiptdate}
+--echo # -> i_l_receiptdate
+EXPLAIN SELECT *
+FROM orders JOIN lineitem ON o_orderkey=l_orderkey
+WHERE l_shipdate BETWEEN '1993-01-01' AND '1997-01-30' AND
+ l_commitdate BETWEEN '1993-01-10' AND '1997-01-12' AND
+ l_receiptdate BETWEEN '1997-01-10' AND '1997-01-12';
+
+--echo # lineitem : {i_l_shipdate, i_l_receiptdate} -> i_l_receiptdate
+--echo # indexes with high selectivity
+EXPLAIN SELECT *
+FROM orders JOIN lineitem ON o_orderkey=l_orderkey
+WHERE l_shipdate BETWEEN '1997-01-09' AND '1997-01-10' AND
+ l_receiptdate BETWEEN '1997-01-09' AND '1997-01-10';
+
+DROP DATABASE dbt3_s001;
diff --git a/sql/CMakeLists.txt b/sql/CMakeLists.txt
index 0037ff23153..13a116a71d4 100644
--- a/sql/CMakeLists.txt
+++ b/sql/CMakeLists.txt
@@ -134,6 +134,7 @@ SET (SQL_SOURCE
sql_sequence.cc sql_sequence.h ha_sequence.h
sql_tvc.cc sql_tvc.h
opt_split.cc
+ rowid_filter.cc rowid_filter.h
${WSREP_SOURCES}
table_cache.cc encryption.cc temporary_tables.cc
proxy_protocol.cc
diff --git a/sql/handler.cc b/sql/handler.cc
index b3481c7e429..d48a4660d52 100644
--- a/sql/handler.cc
+++ b/sql/handler.cc
@@ -2601,6 +2601,22 @@ LEX_CSTRING *handler::engine_name()
}
+/**
+ The method returns the cost of the random I/O accesses when
+ index is used.
+*/
+
+double handler::get_io_cost(uint index, ha_rows rows, uint *length)
+{
+ uint len= table->key_info[index].key_length + ref_length;
+ if (index == table->s->primary_key && table->file->primary_key_is_clustered())
+ len= table->s->stored_rec_length;
+ double keys_per_block= (stats.block_size/2.0/len+1);
+ *length= len;
+ return (rows + keys_per_block-1)/ keys_per_block;
+}
+
+
double handler::keyread_time(uint index, uint ranges, ha_rows rows)
{
/*
@@ -2612,11 +2628,8 @@ double handler::keyread_time(uint index, uint ranges, ha_rows rows)
engines that support that (e.g. InnoDB) may want to overwrite this method.
The model counts in the time to read index entries from cache.
*/
- size_t len= table->key_info[index].key_length + ref_length;
- if (index == table->s->primary_key && table->file->primary_key_is_clustered())
- len= table->s->stored_rec_length;
- double keys_per_block= (stats.block_size/2.0/len+1);
- return (rows + keys_per_block-1)/ keys_per_block +
+ uint len;
+ return get_io_cost(index, rows, &len) +
len*rows/(stats.block_size+1)/TIME_FOR_COMPARE ;
}
diff --git a/sql/handler.h b/sql/handler.h
index ce8711bd5ab..bfc8bd774b3 100644
--- a/sql/handler.h
+++ b/sql/handler.h
@@ -3221,6 +3221,8 @@ public:
*/
virtual double keyread_time(uint index, uint ranges, ha_rows rows);
+ double get_io_cost(uint index, ha_rows rows, uint *length);
+
virtual const key_map *keys_to_use_for_scanning() { return &key_map_empty; }
/*
diff --git a/sql/opt_range.cc b/sql/opt_range.cc
index 6f28ab59c15..9134ad5ab62 100644
--- a/sql/opt_range.cc
+++ b/sql/opt_range.cc
@@ -10515,6 +10515,7 @@ ha_rows check_quick_select(PARAM *param, uint idx, bool index_only,
handler *file= param->table->file;
ha_rows rows= HA_POS_ERROR;
uint keynr= param->real_keynr[idx];
+ uint length;
DBUG_ENTER("check_quick_select");
/* Handle cases when we don't have a valid non-empty list of range */
@@ -10573,6 +10574,8 @@ ha_rows check_quick_select(PARAM *param, uint idx, bool index_only,
MY_MIN(param->table->quick_condition_rows, rows);
param->table->quick_rows[keynr]= rows;
param->table->quick_costs[keynr]= cost->total_cost();
+ param->table->quick_key_io[keynr]=
+ file->get_io_cost(keynr, param->table->quick_rows[keynr], &length);
}
}
/* Figure out if the key scan is ROR (returns rows in ROWID order) or not */
diff --git a/sql/rowid_filter.cc b/sql/rowid_filter.cc
new file mode 100644
index 00000000000..bcca9a0f528
--- /dev/null
+++ b/sql/rowid_filter.cc
@@ -0,0 +1,218 @@
+#include "rowid_filter.h"
+
+
+/**
+ Sets information about filter with key_numb index.
+ It sets a cardinality of filter, calculates its selectivity
+ and gets slope and interscept values.
+*/
+
+void Range_filter_cost_info::init(TABLE *tab, uint key_numb)
+{
+ table= tab;
+ key_no= key_numb;
+ cardinality= table->quick_rows[key_no];
+ b= filter_io_cost() + filter_write_cost() + filter_sort_cost();
+ selectivity= cardinality/((double) table->stat_records());
+ a= (1 + COST_COND_EVAL)*(1 - selectivity) - lookup_cost();
+ intersect_x_axis_abcissa= b/a;
+}
+
+
+/**
+ @brief
+ Sort available filters by their building cost in the increasing order
+
+ @details
+ The method starts sorting available filters from the first filter that
+ is not defined as the best filter. If there are two filters that are
+ defined as the best filters there is no need to sort other filters.
+ Best filters are already sorted by their building cost and have the
+ smallest bulding cost in comparison with other filters by definition.
+
+ As the sorting method bubble sort is used.
+*/
+
+void TABLE::sort_range_filter_cost_info_array()
+{
+ if (best_filter_count == 2)
+ return;
+
+ for (uint i= best_filter_count; i < range_filter_cost_info_elements-1; i++)
+ {
+ for (uint j= i+1; j < range_filter_cost_info_elements; j++)
+ {
+ if (range_filter_cost_info[i].intersect_x_axis_abcissa >
+ range_filter_cost_info[j].intersect_x_axis_abcissa)
+ swap_variables(Range_filter_cost_info,
+ range_filter_cost_info[i],
+ range_filter_cost_info[j]);
+ }
+ }
+}
+
+
+/**
+ @brief
+ The method searches for the filters that can reduce the join cost the most
+
+ @details
+ The method looks through the available filters trying to choose the best
+ filter and eliminate as many filters as possible.
+
+ Filters are considered as a linear functions. The best filter is the linear
+ function that intersects all other linear functions not in the I quadrant
+ and has the biggest a (slope) value. This filter will reduce the partial
+ join cost the most. If it is possible the second best filter is also
+ chosen. The second best filter can be used if the ref access is made on
+ the index of the first best filter.
+
+ So there is no need to store all other filters except filters that
+ intersect in the I quadrant. It is impossible to say on this step which
+ filter is better and will give the biggest gain.
+
+ The number of filters that can be used is stored in the
+ range_filter_cost_info_elements variable.
+*/
+
+void TABLE::prune_range_filters()
+{
+ key_map pruned_filter_map;
+ pruned_filter_map.clear_all();
+ Range_filter_cost_info *max_slope_filters[2] = {0, 0};
+
+ for (uint i= 0; i < range_filter_cost_info_elements; i++)
+ {
+ Range_filter_cost_info *filter= &range_filter_cost_info[i];
+ if (filter->a < 0)
+ {
+ range_filter_cost_info_elements--;
+ swap_variables(Range_filter_cost_info, range_filter_cost_info[i],
+ range_filter_cost_info[range_filter_cost_info_elements]);
+ continue;
+ }
+ for (uint j= i+1; j < range_filter_cost_info_elements; j++)
+ {
+ Range_filter_cost_info *cand_filter= &range_filter_cost_info[j];
+
+ double intersect_x= filter->get_intersect_x(cand_filter);
+ double intersect_y= filter->get_intersect_y(intersect_x);
+
+ if (intersect_x > 0 && intersect_y > 0)
+ {
+ pruned_filter_map.set_bit(cand_filter->key_no);
+ pruned_filter_map.set_bit(filter->key_no);
+ }
+ }
+ if (!pruned_filter_map.is_set(filter->key_no))
+ {
+ if (!max_slope_filters[0])
+ max_slope_filters[0]= filter;
+ else
+ {
+ if (!max_slope_filters[1] ||
+ max_slope_filters[1]->a < filter->a)
+ max_slope_filters[1]= filter;
+ if (max_slope_filters[0]->a < max_slope_filters[1]->a)
+ swap_variables(Range_filter_cost_info*, max_slope_filters[0],
+ max_slope_filters[1]);
+ }
+ }
+ }
+
+ for (uint i= 0; i<2; i++)
+ {
+ if (max_slope_filters[i])
+ {
+ swap_variables(Range_filter_cost_info,
+ range_filter_cost_info[i],
+ *max_slope_filters[i]);
+ if (i == 0 &&
+ max_slope_filters[1] == &range_filter_cost_info[0])
+ max_slope_filters[1]= max_slope_filters[0];
+
+ best_filter_count++;
+ max_slope_filters[i]= &range_filter_cost_info[i];
+ }
+ }
+ sort_range_filter_cost_info_array();
+}
+
+
+void TABLE::select_usable_range_filters(THD *thd)
+{
+ uint key_no;
+ key_map usable_range_filter_keys;
+ usable_range_filter_keys.clear_all();
+ key_map::Iterator it(quick_keys);
+ while ((key_no= it++) != key_map::Iterator::BITMAP_END)
+ {
+ if (quick_rows[key_no] >
+ thd->variables.max_rowid_filter_size/file->ref_length)
+ continue;
+ usable_range_filter_keys.set_bit(key_no);
+ }
+
+ if (usable_range_filter_keys.is_clear_all())
+ return;
+
+ range_filter_cost_info_elements= usable_range_filter_keys.bits_set();
+ range_filter_cost_info=
+ new (thd->mem_root) Range_filter_cost_info [range_filter_cost_info_elements];
+ Range_filter_cost_info *curr_filter_cost_info= range_filter_cost_info;
+
+ key_map::Iterator li(usable_range_filter_keys);
+ while ((key_no= li++) != key_map::Iterator::BITMAP_END)
+ {
+ curr_filter_cost_info->init(this, key_no);
+ curr_filter_cost_info++;
+ }
+ prune_range_filters();
+}
+
+
+Range_filter_cost_info
+*TABLE::best_filter_for_current_join_order(uint ref_key_no,
+ double record_count,
+ double records)
+{
+ if (!this || range_filter_cost_info_elements == 0)
+ return 0;
+
+ double card= record_count*records;
+ Range_filter_cost_info *best_filter= &range_filter_cost_info[0];
+
+ if (card < best_filter->intersect_x_axis_abcissa)
+ return 0;
+ if (best_filter_count != 0)
+ {
+ if (best_filter->key_no == ref_key_no)
+ {
+ if (best_filter_count == 2)
+ {
+ best_filter= &range_filter_cost_info[1];
+ if (card < best_filter->intersect_x_axis_abcissa)
+ return 0;
+ return best_filter;
+ }
+ }
+ else
+ return best_filter;
+ }
+
+ double best_filter_improvement= 0.0;
+ best_filter= 0;
+
+ for (uint i= best_filter_count; i < range_filter_cost_info_elements; i++)
+ {
+ Range_filter_cost_info *filter= &range_filter_cost_info[i];
+ if (card < filter->intersect_x_axis_abcissa)
+ break;
+ if (best_filter_improvement < filter->get_filter_gain(card))
+ {
+ best_filter_improvement= filter->get_filter_gain(card);
+ best_filter= filter;
+ }
+ }
+ return best_filter;
+}
diff --git a/sql/rowid_filter.h b/sql/rowid_filter.h
new file mode 100644
index 00000000000..0d8520f25c5
--- /dev/null
+++ b/sql/rowid_filter.h
@@ -0,0 +1,183 @@
+#ifndef ROWID_FILTER_INCLUDED
+#define ROWID_FILTER_INCLUDED
+
+/**
+ It makes sense to apply filters for a certain join order when the following
+ inequality holds:
+
+ #T + c4*#T > #T*sel(Fi) + c4*#T*sel(Fi) +
+ I/O(Fi) + c1*#(Fi) + c2*#(Fi)*log(#(Fi)) +
+ c3*#T (1),
+
+ where #T - the fanout of the partial join
+ Fi - a filter for the index with the number i in
+ the key_map of available indexes for this table
+ sel(Fi) - the selectivity of the index with the number
+ i
+ c4*#T,
+ c4*#T*sel(Fi) - a cost to apply available predicates
+ c4 - a constant to apply available predicates
+ I/O(Fi) - a cost of the I/O accesses to Fi
+ #(Fi) - a number of estimated records that range
+ access would use
+ c1*#(Fi) - a cost to write in Fi
+ c1 - a constant to write one element in Fi
+ c2*#(Fi)*log(#(Fi)) - a cost to sort in Fi
+ c2 - a sorting constant
+ c3*(#T) - a cost to look-up into a current partial join
+ c3 - a constant to look-up into Fi
+
+ Let's set a new variable FBCi (filter building cost for the filter with
+ index i):
+
+ FBCi = I/O(Fi) + c1*#(Fi) + c2*#(Fi)*log(#(Fi))
+
+ It can be seen that FBCi doesn't depend on #T.
+
+ So using this variable (1) can be rewritten:
+
+ #T + c4*#T > #T*sel(Fi) + c4*#T*sel(Fi) +
+ FBCi +
+ c3*#T
+
+ To get a possible cost improvement when a filter is used right part
+ of the (1) inequality should be deducted from the left part.
+ Denote it as G(#T):
+
+ G(#T)= #T + c4*#T - (#T*sel(Fi) + c4*#T*sel(Fi) + FBCi + c3*#T) (2)
+
+ On the prepare stage when filters are created #T value isn't known.
+
+ To find out what filter is the best among available one for the table
+ (what filter gives the biggest gain) a knowledge about linear functions
+ can be used. Consider filter gain as a linear function:
+
+ Gi(#T)= ai*#T + bi (3)
+
+ where ai= 1+c4-c3-sel(Fi)*(1+c4),
+ bi= -FBCi
+
+ Filter gain can be interpreted as an ordinate, #T as abscissa.
+
+ So the aim is to find the linear function that has the biggest ordinate value
+ for each positive abscissa (because #T can't be negative) comparing with
+ the other available functions.
+
+ Consider two filters Fi, Fj or linear functions with a positive slope.
+ To find out which linear function is better let's find their intersection
+ point coordinates.
+
+ Gi(#T0)= Gj(#T0) (using (2))=>
+ #T0= (bj - bi)/(ai - aj) (using (3))
+ =>
+ #T0= (BCFj-BCFi)/((sel(Fj)-sel(Fi))*(1+c4))
+
+ If put #T0 value into the (3) formula G(#T0) can be easily found.
+
+ It can be seen that if two linear functions intersect in II, III or IV
+ quadrants the linear function with a bigger slope value will always
+ be better.
+
+ If two functions intersect in the I quadrant for #T1 < #T0 a function
+ with a smaller slope value will give a better gain and when #T1 > #T0
+ function with a bigger slope will give better gain.
+
+ for each #T1 > #T0 if (ai > aj) => (Gi(#T1) >= Gj(#T1))
+ #T1 <= #T0 if (ai > aj) => (Gi(#T1) <= Gj(#T1))
+
+ So both linear functions should be saved.
+
+ Interesting cases:
+
+ 1. For Fi,Fj filters ai=aj.
+
+ In this case intercepts bi and bj should be compared.
+ The filter with the biggest intercept will give a better result.
+
+ 2. Only one filter remains after the calculations and for some join order
+ it is equal to the index that is used to access table. Therefore, this
+ filter can't be used.
+
+ In this case the gain is computed for every filter that can be constructed
+ for this table.
+
+ After information about filters is computed for each partial join order
+ it is checked if the filter can be applied to the current table.
+ If it gives a cost improvement it is saved as the best plan for this
+ partial join.
+*/
+
+#include "mariadb.h"
+#include "sql_class.h"
+#include "table.h"
+
+/* Cost to write into filter */
+#define COST_WRITE 0.01
+/* Weight factor for filter sorting */
+#define CNST_SORT 0.01
+/* Cost to evaluate condition */
+#define COST_COND_EVAL 0.2
+
+class QUICK_RANGE_SELECT;
+
+class Range_filter_cost_info : public Sql_alloc
+{
+public:
+ TABLE *table;
+ uint key_no;
+ double cardinality;
+ double b; // intercept of the linear function
+ double a; // slope of the linear function
+ double selectivity;
+ double intersect_x_axis_abcissa;
+
+ /**
+ Filter cost functions
+ */
+ /* Cost to lookup into filter */
+ inline double lookup_cost()
+ {
+ return log(cardinality)*0.01;
+ }
+
+ /* IO accesses cost to access filter */
+ inline double filter_io_cost()
+ { return table->quick_key_io[key_no]; }
+
+ /* Cost to write elements in filter */
+ inline double filter_write_cost()
+ { return COST_WRITE*cardinality; }
+
+ /* Cost to sort elements in filter */
+ inline double filter_sort_cost()
+ {
+ return CNST_SORT*cardinality*log(cardinality);
+ }
+ /* End of filter cost functions */
+
+ Range_filter_cost_info() : table(0), key_no(0) {}
+
+ void init(TABLE *tab, uint key_numb);
+
+ inline double get_intersect_x(Range_filter_cost_info *filter)
+ {
+ if (a == filter->a)
+ return DBL_MAX;
+ return (b - filter->b)/(a - filter->a);
+ }
+ inline double get_intersect_y(double intersect_x)
+ {
+ if (intersect_x == DBL_MAX)
+ return DBL_MAX;
+ return intersect_x*a - b;
+ }
+
+ /**
+ Get a gain that a usage of filter in some partial join order
+ with the cardinaly card gives
+ */
+ inline double get_filter_gain(double card)
+ { return card*a - b; }
+};
+
+#endif /* ROWID_FILTER_INCLUDED */
diff --git a/sql/sql_class.cc b/sql/sql_class.cc
index 0bbfdba88c7..d6b2cd6fd76 100644
--- a/sql/sql_class.cc
+++ b/sql/sql_class.cc
@@ -2668,12 +2668,12 @@ void THD::make_explain_field_list(List<Item> &field_list, uint8 explain_flags,
mem_root);
item->maybe_null=1;
field_list.push_back(item=new (mem_root)
- Item_empty_string(this, "ref",
+ Item_empty_string(this, "ref|filter",
NAME_CHAR_LEN*MAX_REF_PARTS, cs),
mem_root);
item->maybe_null=1;
- field_list.push_back(item= new (mem_root)
- Item_return_int(this, "rows", 10, MYSQL_TYPE_LONGLONG),
+ field_list.push_back(item=new (mem_root)
+ Item_empty_string(this, "rows", NAME_CHAR_LEN, cs),
mem_root);
if (is_analyze)
{
diff --git a/sql/sql_class.h b/sql/sql_class.h
index 25e2d736cdc..aaa54447c0b 100644
--- a/sql/sql_class.h
+++ b/sql/sql_class.h
@@ -727,6 +727,7 @@ typedef struct system_variables
uint column_compression_threshold;
uint column_compression_zlib_level;
uint in_subquery_conversion_threshold;
+ ulonglong max_rowid_filter_size;
vers_asof_timestamp_t vers_asof_timestamp;
ulong vers_alter_history;
diff --git a/sql/sql_explain.cc b/sql/sql_explain.cc
index 1c45b05ccc5..23bc1e906a0 100644
--- a/sql/sql_explain.cc
+++ b/sql/sql_explain.cc
@@ -1146,6 +1146,15 @@ void Explain_table_access::fill_key_len_str(String *key_len_str) const
length= longlong10_to_str(hash_next_key.get_key_len(), buf, 10) - buf;
key_len_str->append(buf, length);
}
+
+ if (key.get_filter_key_length() != (uint)-1)
+ {
+ char buf[64];
+ size_t length;
+ key_len_str->append(',');
+ length= longlong10_to_str(key.get_filter_key_length(), buf, 10) - buf;
+ key_len_str->append(buf, length);
+ }
}
@@ -1274,12 +1283,20 @@ int Explain_table_access::print_explain(select_result_sink *output, uint8 explai
push_string_list(thd, &item_list, ref_list, &ref_list_buf);
/* `rows` */
+ StringBuffer<64> rows_str;
if (rows_set)
{
+ rows_str.append_ulonglong((ulonglong)rows);
+
+ if (is_filter_set())
+ {
+ rows_str.append(" (");
+ rows_str.append_ulonglong(filter_perc);
+ rows_str.append("%)");
+ }
item_list.push_back(new (mem_root)
- Item_int(thd, (longlong) (ulonglong) rows,
- MY_INT64_NUM_DECIMAL_DIGITS),
- mem_root);
+ Item_string_sys(thd, rows_str.ptr(),
+ rows_str.length()), mem_root);
}
else
item_list.push_back(item_null, mem_root);
@@ -1359,6 +1376,15 @@ int Explain_table_access::print_explain(select_result_sink *output, uint8 explai
extra_buf.append(STRING_WITH_LEN("Using filesort"));
}
+ if (is_filter_set())
+ {
+ if (first)
+ first= false;
+ else
+ extra_buf.append(STRING_WITH_LEN("; "));
+ extra_buf.append(STRING_WITH_LEN("Using filter"));
+ }
+
item_list.push_back(new (mem_root)
Item_string_sys(thd, extra_buf.ptr(),
extra_buf.length()),
diff --git a/sql/sql_explain.h b/sql/sql_explain.h
index 38250cc40ce..71f90477977 100644
--- a/sql/sql_explain.h
+++ b/sql/sql_explain.h
@@ -583,6 +583,7 @@ class Explain_index_use : public Sql_alloc
{
char *key_name;
uint key_len;
+ uint key_len_for_filter;
public:
String_list key_parts_list;
@@ -595,12 +596,16 @@ public:
{
key_name= NULL;
key_len= (uint)-1;
+ key_len_for_filter= (uint)-1;
}
bool set(MEM_ROOT *root, KEY *key_name, uint key_len_arg);
bool set_pseudo_key(MEM_ROOT *root, const char *key_name);
+ void set_filter_key_length(uint key_length_arg)
+ { key_len_for_filter= key_length_arg; }
inline const char *get_key_name() const { return key_name; }
inline uint get_key_len() const { return key_len; }
+ inline uint get_filter_key_length() const { return key_len_for_filter; }
};
@@ -689,7 +694,8 @@ public:
cache_cond(NULL),
pushed_index_cond(NULL),
sjm_nest(NULL),
- pre_join_sort(NULL)
+ pre_join_sort(NULL),
+ filter_perc(UINT_MAX)
{}
~Explain_table_access() { delete sjm_nest; }
@@ -796,6 +802,13 @@ public:
Exec_time_tracker op_tracker;
Table_access_tracker jbuf_tracker;
+ /**
+ How many rows are left after the filter was applied
+ to the initial rows count in percentages.
+ */
+ double filter_perc;
+ inline bool is_filter_set() const { return (filter_perc != UINT_MAX); }
+
int print_explain(select_result_sink *output, uint8 explain_flags,
bool is_analyze,
uint select_id, const char *select_type,
diff --git a/sql/sql_priv.h b/sql/sql_priv.h
index fa12b645041..8a439498df1 100644
--- a/sql/sql_priv.h
+++ b/sql/sql_priv.h
@@ -233,6 +233,8 @@
#define OPTIMIZER_SWITCH_COND_PUSHDOWN_FOR_DERIVED (1ULL << 30)
#define OPTIMIZER_SWITCH_SPLIT_MATERIALIZED (1ULL << 31)
#define OPTIMIZER_SWITCH_COND_PUSHDOWN_FOR_SUBQUERY (1ULL << 32)
+#define OPTIMIZER_SWITCH_USE_ROWID_FILTER (1ULL << 33)
+
#define OPTIMIZER_SWITCH_DEFAULT (OPTIMIZER_SWITCH_INDEX_MERGE | \
OPTIMIZER_SWITCH_INDEX_MERGE_UNION | \
@@ -260,7 +262,8 @@
OPTIMIZER_SWITCH_ORDERBY_EQ_PROP | \
OPTIMIZER_SWITCH_COND_PUSHDOWN_FOR_DERIVED | \
OPTIMIZER_SWITCH_SPLIT_MATERIALIZED | \
- OPTIMIZER_SWITCH_COND_PUSHDOWN_FOR_SUBQUERY)
+ OPTIMIZER_SWITCH_COND_PUSHDOWN_FOR_SUBQUERY |\
+ OPTIMIZER_SWITCH_USE_ROWID_FILTER)
/*
Replication uses 8 bytes to store SQL_MODE in the binary log. The day you
diff --git a/sql/sql_select.cc b/sql/sql_select.cc
index bfd1c7580fc..616fea0f401 100644
--- a/sql/sql_select.cc
+++ b/sql/sql_select.cc
@@ -64,6 +64,7 @@
#include "sys_vars_shared.h"
#include "sp_head.h"
#include "sp_rcontext.h"
+#include "rowid_filter.h"
/*
A key part number that means we're using a fulltext scan.
@@ -4950,6 +4951,10 @@ make_join_statistics(JOIN *join, List<TABLE_LIST> &tables_list,
s->needed_reg=select->needed_reg;
select->quick=0;
impossible_range= records == 0 && s->table->reginfo.impossible_range;
+ if (join->thd->lex->sql_command == SQLCOM_SELECT &&
+ join->table_count > 1 &&
+ optimizer_flag(join->thd, OPTIMIZER_SWITCH_USE_ROWID_FILTER))
+ s->table->select_usable_range_filters(join->thd);
}
if (!impossible_range)
{
@@ -6744,12 +6749,14 @@ best_access_path(JOIN *join,
double best_time= DBL_MAX;
double records= DBL_MAX;
table_map best_ref_depends_map= 0;
+ Range_filter_cost_info *best_filter= 0;
double tmp;
ha_rows rec;
bool best_uses_jbuf= FALSE;
MY_BITMAP *eq_join_set= &s->table->eq_join_set;
KEYUSE *hj_start_key= 0;
SplM_plan_info *spl_plan= 0;
+ Range_filter_cost_info *filter= 0;
disable_jbuf= disable_jbuf || idx == join->const_tables;
@@ -6781,6 +6788,7 @@ best_access_path(JOIN *join,
key_part_map found_part= 0;
table_map found_ref= 0;
uint key= keyuse->key;
+ filter= 0;
bool ft_key= (keyuse->keypart == FT_KEYPART);
/* Bitmap of keyparts where the ref access is over 'keypart=const': */
key_part_map const_part= 0;
@@ -7141,6 +7149,12 @@ best_access_path(JOIN *join,
loose_scan_opt.check_ref_access_part2(key, start_key, records, tmp);
} /* not ft_key */
+ filter= table->best_filter_for_current_join_order(start_key->key,
+ records,
+ record_count);
+ if (filter && (filter->get_filter_gain(record_count*records) < tmp))
+ tmp= tmp - filter->get_filter_gain(record_count*records);
+
if (tmp + 0.0001 < best_time - records/(double) TIME_FOR_COMPARE)
{
best_time= tmp + records/(double) TIME_FOR_COMPARE;
@@ -7149,6 +7163,7 @@ best_access_path(JOIN *join,
best_key= start_key;
best_max_key_part= max_key_part;
best_ref_depends_map= found_ref;
+ best_filter= filter;
}
} /* for each key */
records= best_records;
@@ -7190,6 +7205,7 @@ best_access_path(JOIN *join,
best_key= hj_start_key;
best_ref_depends_map= 0;
best_uses_jbuf= TRUE;
+ best_filter= 0;
}
/*
@@ -7294,8 +7310,15 @@ best_access_path(JOIN *join,
tmp+= (s->records - rnd_records)/(double) TIME_FOR_COMPARE;
}
}
-
+ double best_records= rnd_records;
tmp += s->startup_cost;
+
+ filter= s->table->best_filter_for_current_join_order(MAX_KEY,
+ rnd_records,
+ record_count);
+ if (filter && (filter->get_filter_gain(record_count*rnd_records) < tmp))
+ tmp= tmp - filter->get_filter_gain(record_count*rnd_records);
+
/*
We estimate the cost of evaluating WHERE clause for found records
as record_count * rnd_records / TIME_FOR_COMPARE. This cost plus
@@ -7311,8 +7334,9 @@ best_access_path(JOIN *join,
will ensure that this will be used
*/
best= tmp;
- records= rnd_records;
+ records= best_records;
best_key= 0;
+ best_filter= filter;
/* range/index_merge/ALL/index access method are "independent", so: */
best_ref_depends_map= 0;
best_uses_jbuf= MY_TEST(!disable_jbuf && !((s->table->map &
@@ -7329,6 +7353,7 @@ best_access_path(JOIN *join,
pos->loosescan_picker.loosescan_key= MAX_KEY;
pos->use_join_buffer= best_uses_jbuf;
pos->spl_plan= spl_plan;
+ pos->filter= best_filter;
loose_scan_opt.save_to_position(s, loose_scan_pos);
@@ -9429,6 +9454,7 @@ bool JOIN::inject_cond_into_where(Item *injected_cond)
static Item * const null_ptr= NULL;
+
/*
Set up join struct according to the picked join order in
@@ -9588,6 +9614,8 @@ bool JOIN::get_best_combination()
is_hash_join_key_no(j->ref.key))
hash_join= TRUE;
+ j->filter= best_positions[tablenr].filter;
+
loop_end:
/*
Save records_read in JOIN_TAB so that select_describe()/etc don't have
@@ -12448,7 +12476,9 @@ ha_rows JOIN_TAB::get_examined_rows()
double examined_rows;
SQL_SELECT *sel= filesort? filesort->select : this->select;
- if (sel && sel->quick && use_quick != 2)
+ if (filter)
+ examined_rows= records_read;
+ else if (sel && sel->quick && use_quick != 2)
examined_rows= (double)sel->quick->records;
else if (type == JT_NEXT || type == JT_ALL ||
type == JT_HASH || type ==JT_HASH_NEXT)
@@ -24883,6 +24913,31 @@ int append_possible_keys(MEM_ROOT *alloc, String_list &list, TABLE *table,
}
+/**
+ This method saves the data that should be printed in EXPLAIN
+ if any filter was used for this table.
+*/
+
+bool JOIN_TAB::save_filter_explain_data(Explain_table_access *eta)
+{
+ if (!filter)
+ return 0;
+ KEY *pk_key= get_keyinfo_by_key_no(filter->key_no);
+ StringBuffer<64> buff_for_pk;
+ const char *tmp_buff;
+ buff_for_pk.append("filter:");
+ tmp_buff= pk_key->name.str;
+ buff_for_pk.append(tmp_buff, strlen(tmp_buff), system_charset_info);
+ if (!(eta->ref_list.append_str(join->thd->mem_root,
+ buff_for_pk.c_ptr_safe())))
+ return 1;
+ eta->key.set_filter_key_length(pk_key->key_length);
+ (filter->selectivity*100 >= 1) ? eta->filter_perc= round(filter->selectivity*100) :
+ eta->filter_perc= 1;
+ return 0;
+}
+
+
bool JOIN_TAB::save_explain_data(Explain_table_access *eta,
table_map prefix_tables,
bool distinct_arg, JOIN_TAB *first_top_tab)
@@ -25138,6 +25193,13 @@ bool JOIN_TAB::save_explain_data(Explain_table_access *eta,
eta->filtered= f;
}
+ if ((tab_select && tab_select->quick && tab_type != JT_CONST) ||
+ (key_info && ref.key_parts && tab_type != JT_FT))
+ {
+ if (save_filter_explain_data(eta))
+ return 1;
+ }
+
/* Build "Extra" field and save it */
key_read= table->file->keyread_enabled();
if ((tab_type == JT_NEXT || tab_type == JT_CONST) &&
diff --git a/sql/sql_select.h b/sql/sql_select.h
index 4140a0293f8..de3dff46189 100644
--- a/sql/sql_select.h
+++ b/sql/sql_select.h
@@ -506,6 +506,9 @@ typedef struct st_join_table {
uint n_sj_tables;
bool preread_init_done;
+ /* Copy of POSITION::filter, set by get_best_combination() */
+ Range_filter_cost_info *filter;
+ Dynamic_array<char*> rowid_filter_pk;
void cleanup();
inline bool is_using_loose_index_scan()
@@ -656,6 +659,7 @@ typedef struct st_join_table {
SplM_plan_info *choose_best_splitting(double record_count,
table_map remaining_tables);
bool fix_splitting(SplM_plan_info *spl_plan, table_map remaining_tables);
+ bool save_filter_explain_data(Explain_table_access *eta);
} JOIN_TAB;
@@ -881,6 +885,9 @@ public:
};
+class Range_filter_cost_info;
+
+
/**
Information about a position of table within a join order. Used in join
optimization.
@@ -963,6 +970,8 @@ typedef struct st_position
/* Info on splitting plan used at this position */
SplM_plan_info *spl_plan;
+ /* The index for which filter can be built */
+ Range_filter_cost_info *filter;
} POSITION;
typedef Bounds_checked_array<Item_null_result*> Item_null_array;
diff --git a/sql/sys_vars.cc b/sql/sys_vars.cc
index 75a962939ae..ba9b2c814ce 100644
--- a/sql/sys_vars.cc
+++ b/sql/sys_vars.cc
@@ -2493,6 +2493,7 @@ export const char *optimizer_switch_names[]=
"condition_pushdown_for_derived",
"split_materialized",
"condition_pushdown_for_subquery",
+ "rowid_filter",
"default",
NullS
};
@@ -6113,3 +6114,10 @@ static Sys_var_enum Sys_secure_timestamp(
"historical behavior, anyone can modify session timestamp",
READ_ONLY GLOBAL_VAR(opt_secure_timestamp), CMD_LINE(REQUIRED_ARG),
secure_timestamp_levels, DEFAULT(SECTIME_NO));
+
+static Sys_var_ulonglong Sys_max_rowid_filter_size(
+ "max_rowid_filter_size",
+ "The maximum number of rows that fit in memory",
+ SESSION_VAR(max_rowid_filter_size), CMD_LINE(REQUIRED_ARG),
+ VALID_RANGE(1024, (ulonglong)~(intptr)0), DEFAULT(128*1024),
+ BLOCK_SIZE(1));
diff --git a/sql/table.cc b/sql/table.cc
index e98836cd93c..2031d27b549 100644
--- a/sql/table.cc
+++ b/sql/table.cc
@@ -4633,6 +4633,8 @@ void TABLE::init(THD *thd, TABLE_LIST *tl)
created= TRUE;
cond_selectivity= 1.0;
cond_selectivity_sampling_explain= NULL;
+ best_filter_count= 0;
+ range_filter_cost_info_elements= 0;
#ifdef HAVE_REPLICATION
/* used in RBR Triggers */
master_had_triggers= 0;
diff --git a/sql/table.h b/sql/table.h
index b75fa9074a4..14bc928bcb5 100644
--- a/sql/table.h
+++ b/sql/table.h
@@ -55,6 +55,7 @@ class Virtual_column_info;
class Table_triggers_list;
class TMP_TABLE_PARAM;
class SEQUENCE;
+class Range_filter_cost_info;
/*
Used to identify NESTED_JOIN structures within a join (applicable only to
@@ -1199,8 +1200,10 @@ public:
*/
key_part_map const_key_parts[MAX_KEY];
- uint quick_key_parts[MAX_KEY];
- uint quick_n_ranges[MAX_KEY];
+ uint quick_key_parts[MAX_KEY];
+ uint quick_n_ranges[MAX_KEY];
+ /* For each key I/O access cost is stored */
+ double quick_key_io[MAX_KEY];
/*
Estimate of number of records that satisfy SARGable part of the table
@@ -1491,6 +1494,21 @@ public:
void deny_splitting();
void add_splitting_info_for_key_field(struct KEY_FIELD *key_field);
+
+ /**
+ Range filter info
+ */
+ /* Minimum possible #T value to apply filter*/
+ uint best_filter_count;
+ uint range_filter_cost_info_elements;
+ Range_filter_cost_info *range_filter_cost_info;
+ Range_filter_cost_info
+ *best_filter_for_current_join_order(uint ref_key_no,
+ double record_count,
+ double records);
+ void sort_range_filter_cost_info_array();
+ void prune_range_filters();
+ void select_usable_range_filters(THD *thd);
/**
System Versioning support
*/