1 files changed, 395 insertions, 127 deletions
diff --git a/sql/rowid_filter.h b/sql/rowid_filter.h
index 0d8520f25c5..3cc2c31555c 100644
--- a/sql/rowid_filter.h
+++ b/sql/rowid_filter.h
@@ -1,183 +1,451 @@
+/*
+   Copyright (c) 2018, 2019 MariaDB
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; version 2 of the License.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301  USA */
+
 #ifndef ROWID_FILTER_INCLUDED
 #define ROWID_FILTER_INCLUDED
 
-/**
- It makes sense to apply filters for a certain join order when the following
- inequality holds:
 
- #T + c4*#T > #T*sel(Fi) + c4*#T*sel(Fi) +
-              I/O(Fi) + c1*#(Fi) + c2*#(Fi)*log(#(Fi)) +
-              c3*#T (1),
+#include "mariadb.h"
+#include "sql_array.h"
+
+/*
+
+  What rowid / primary filters are
+  --------------------------------
+
+  Consider a join query Q of the form
+    SELECT * FROM T1, ... , Tk WHERE P.
+
+  For any of the table reference Ti(Q) from the from clause of Q different
+  rowid / primary key filters (pk-filters for short) can be built.
+  A pk-filter F built for Ti(Q) is a set of rowids / primary keys of Ti
+  F= {pk1,...,pkN} such that for any row r=r1||...||rk from the result set of Q
+  ri's rowid / primary key pk(ri) is contained in F.
+
+  When pk-filters are useful
+  --------------------------
+
+  If building a pk-filter F for Ti(Q )is not too costly and its cardinality #F
+  is much less than the cardinality of T - #T then using the pk-filter when
+  executing Q might be quite beneficial.
+
+  Let r be a random row from Ti. Let s(F) be the probability that pk(r)
+  belongs to F. Let BC(F) be the cost of building F.
+
+  Suppose that the optimizer has chosen for Q a plan with this join order
+  T1 => ... Tk and that the table Ti is accessed by a ref access using index I.
+  Let K = {k1,...,kM} be the set of all rowid/primary keys values used to access
+  rows of Ti when looking for matches in this table.to join Ti by index I.
+
+  Let's assume that two set sets K and F are uncorrelated.  With this assumption
+  if before accessing data from Ti by the rowid / primary key k we first
+  check whether k is in F then we can expect saving on M*(1-s(S)) accesses of
+  data rows from Ti. If we can guarantee that test whether k is in F is
+  relatively cheap then we can gain a lot assuming that BC(F) is much less
+  then the cost of fetching M*(1-s(S)) records from Ti and following
+  evaluation of conditions pushed into Ti.
+
+  Making pk-filter test cheap
+  ---------------------------
+
+  If the search structure to test whether an element is in F can be fully
+  placed in RAM then this test is expected to be be much cheaper than a random
+  access of a record from Ti. We'll consider two search structures for
+  pk-filters: ordered array and bloom filter. Ordered array is easy to
+  implement, but it's space consuming. If a filter contains primary keys
+  then at least space for each primary key from the filter must be allocated
+  in the search structure. On a the opposite a bloom filter requires a
+  fixed number of bits and this number does not depend on the cardinality
+  of the pk-filter (10 bits per element will serve pk-filter of any size).
 
- where #T         - the fanout of the partial join
-       Fi         - a filter for the index with the number i in 
-                    the key_map of available indexes for this table
-       sel(Fi)    - the selectivity of the index with the number 
-                    i
-       c4*#T,
-       c4*#T*sel(Fi) - a cost to apply available predicates
-       c4         - a constant to apply available predicates
-       I/O(Fi)    - a cost of the I/O accesses to Fi
-       #(Fi)      - a number of estimated records that range
-                    access would use
-       c1*#(Fi)   - a cost to write in Fi
-       c1         - a constant to write one element in Fi
-       c2*#(Fi)*log(#(Fi)) - a cost to sort in Fi
-       c2         - a sorting constant
-       c3*(#T)    - a cost to look-up into a current partial join
-       c3         - a constant to look-up into Fi
+*/
 
-  Let's set a new variable FBCi (filter building cost for the filter with
-  index i):
+/*
+
+  How and when the optimizer builds and uses range rowid filters
+  --------------------------------------------------------------
+
+  1. In make_join_statistics()
+       for each join table s
+         after the call of get_quick_record_count()
+           the TABLE::method init_cost_info_for_usable_range_rowid_filters()
+           is called
+           The method build an array of Range_rowid_filter_cost_info elements
+           containing the cost info on possible range filters for s->table.
+           The array is optimized for further usage.
+
+  2. For each partial join order when the optimizer considers joining
+     table s to this partial join
+       In the function best_access_path()
+       a. When evaluating a ref access r by index idx to join s
+          the optimizer estimates the effect of usage of each possible
+          range filter f and chooses one with the best gain. The gain
+          is taken into account when the cost of thr ref access r is
+          calculated. If it turns out that this is the best ref access
+          to join s then the info about the chosen filter together
+          with the info on r is remembered in the corresponding element
+          of the array of POSITION structures.
+          [We evaluate every pair (ref access, range_filter) rather then
+           every pair (best ref access, range filter) because if the index
+           ref_idx used for ref access r correlates with the index rf_idx
+           used  by the filter f then the pair (r,f) is not evaluated
+           at all as we don't know how to estimate the effect of correlation
+           between ref_idx and rf_idx.]
+       b. When evaluating the best range access to join table s the
+          optimizer estimates the effect of usage of each possible
+          range filter f and chooses one with the best gain.
+          [Here we should have evaluated every pair (range access,
+           range filter) as well, but it's not done yet.]
+
+  3. When the cheapest execution plan has been chosen and after the
+     call of JOIN::get_best_combination()
+       The method JOIN::make_range_rowid_filters() is called
+       For each range rowid filter used in the chosen execution plan
+       the method creates a quick select object to be able to perform
+       index range scan to fill the filter at the execution stage.
+       The method also creates Range_rowid_filter objects that are
+       used at the execution stage.
+
+  4. Just before the execution stage
+       The method JOIN::init_range_rowid_filters() is called.
+       For each join table s that is to be accessed with usage of a range
+       filter the method allocates containers for the range filter and
+       it lets the engine know that the filter will be used when
+       accessing s.
+
+  5. At the execution stage
+       In the function sub_select() just before the first access of a join
+       table s employing a range filter
+         The method JOIN_TAB::build_range_rowid_filter_if_needed() is called
+         The method fills the filter using the quick select created by
+         JOIN::make_range_rowid_filters().
+
+  6. The accessed key tuples are checked against the filter within the engine
+     using the info pushed into it.
 
-  FBCi = I/O(Fi) + c1*#(Fi) + c2*#(Fi)*log(#(Fi))
+*/
 
-  It can be seen that FBCi doesn't depend on #T.
+class TABLE;
+class SQL_SELECT;
+class Rowid_filter_container;
+class Range_rowid_filter_cost_info;
 
-  So using this variable (1) can be rewritten:
+/* Cost to write rowid into array */
+#define ARRAY_WRITE_COST      0.005
+/* Factor used to calculate cost of sorting rowids in array */
+#define ARRAY_SORT_C          0.01
+/* Cost to evaluate condition */
+#define COST_COND_EVAL  0.2
 
-  #T + c4*#T > #T*sel(Fi) + c4*#T*sel(Fi) +
-               FBCi +
-               c3*#T
+typedef enum
+{
+  SORTED_ARRAY_CONTAINER,
+  BLOOM_FILTER_CONTAINER
+} Rowid_filter_container_type;
 
-  To get a possible cost improvement when a filter is used right part
-  of the (1) inequality should be deducted from the left part.
-  Denote it as G(#T):
+/**
+  @class Rowid_filter_container
 
-  G(#T)= #T + c4*#T - (#T*sel(Fi) + c4*#T*sel(Fi) + FBCi + c3*#T) (2)
+  The interface for different types of containers to store info on the set
+  of rowids / primary keys that defines a pk-filter.
 
-  On the prepare stage when filters are created #T value isn't known.
+  There will be two implementations of this abstract class.
+  - sorted array
+  - bloom filter
+*/
 
-  To find out what filter is the best among available one for the table
-  (what filter gives the biggest gain) a knowledge about linear functions
-  can be used. Consider filter gain as a linear function:
+class Rowid_filter_container : public Sql_alloc
+{
+public:
 
-  Gi(#T)= ai*#T + bi (3)
+  virtual Rowid_filter_container_type get_type() = 0;
 
-  where ai= 1+c4-c3-sel(Fi)*(1+c4),
-        bi= -FBCi
+  /* Allocate memory for the container */
+  virtual bool alloc() = 0;
 
-  Filter gain can be interpreted as an ordinate, #T as abscissa.
+  /*
+    @brief Add info on a rowid / primary to the container
+    @param ctxt   The context info (opaque)
+    @param elem   The rowid / primary key to be added to the container
+    @retval       true if elem is successfully added
+  */
+  virtual bool add(void *ctxt, char *elem) = 0;
 
-  So the aim is to find the linear function that has the biggest ordinate value
-  for each positive abscissa (because #T can't be negative) comparing with
-  the other available functions.
+  /*
+    @brief Check whether a rowid / primary key is in container
+    @param ctxt   The context info (opaque)
+    @param elem   The rowid / primary key to be checked against the container
+    @retval       False if elem is definitely not in the container
+  */
+  virtual bool check(void *ctxt, char *elem) = 0;
 
-  Consider two filters Fi, Fj or linear functions with a positive slope.
-  To find out which linear function is better let's find their intersection
-  point coordinates.
+  virtual ~Rowid_filter_container() {}
+};
 
-  Gi(#T0)= Gj(#T0) (using (2))=>
-  #T0= (bj - bi)/(ai - aj) (using (3))
-  =>
-  #T0= (BCFj-BCFi)/((sel(Fj)-sel(Fi))*(1+c4))
 
-  If put #T0 value into the (3) formula G(#T0) can be easily found.
+/**
+  @class Rowid_filter
 
-  It can be seen that if two linear functions intersect in II, III or IV
-  quadrants the linear function with a bigger slope value will always
-  be better.
+  The interface for different types of pk-filters
 
-  If two functions intersect in the I quadrant for #T1 < #T0 a function
-  with a smaller slope value will give a better gain and when #T1 > #T0
-  function with a bigger slope will give better gain.
+  Currently we support only range pk filters.
+*/
 
-  for each #T1 > #T0  if (ai > aj) => (Gi(#T1) >= Gj(#T1))
-           #T1 <= #T0 if (ai > aj) => (Gi(#T1) <= Gj(#T1))
+class Rowid_filter : public Sql_alloc
+{
+protected:
 
-  So both linear functions should be saved.
+  /* The container to store info the set of elements in the filter */
+  Rowid_filter_container *container;
 
-  Interesting cases:
+public:
+  Rowid_filter(Rowid_filter_container *container_arg)
+    : container(container_arg) {}
 
-  1. For Fi,Fj filters ai=aj. 
+  /*
+    Build the filter :
+    fill it with info on the set of elements placed there
+  */
+  virtual bool build() = 0;
 
-    In this case intercepts bi and bj should be compared.
-    The filter with the biggest intercept will give a better result.
+  /*
+    Check whether an element is in the filter.
+    Returns false is the elements is definitely not in the filter.
+  */
+  virtual bool check(char *elem) = 0;
 
-  2. Only one filter remains after the calculations and for some join order
-     it is equal to the index that is used to access table. Therefore, this
-     filter can't be used.
-     
-     In this case the gain is computed for every filter that can be constructed
-     for this table.
+  virtual ~Rowid_filter() {}
 
-  After information about filters is computed for each partial join order
-  it is checked if the filter can be applied to the current table.
-  If it gives a cost improvement it is saved as the best plan for this
-  partial join.
-*/
+  Rowid_filter_container *get_container() { return container; }
+};
 
-#include "mariadb.h"
-#include "sql_class.h"
-#include "table.h"
 
-/* Cost to write into filter */
-#define COST_WRITE      0.01
-/* Weight factor for filter sorting */
-#define CNST_SORT       0.01
-/* Cost to evaluate condition */
-#define COST_COND_EVAL  0.2
+/**
+  @class Rowid_filter_container
 
-class QUICK_RANGE_SELECT;
+  The implementation of the Rowid_interface used for pk-filters
+  that are filled when performing range index scans.
+*/
 
-class Range_filter_cost_info : public Sql_alloc
+class Range_rowid_filter: public Rowid_filter
 {
-public:
+  /* The table for which the rowid filter is built */
   TABLE *table;
-  uint key_no;
-  double cardinality;
-  double b;                         // intercept of the linear function
-  double a;                         // slope of the linear function
-  double selectivity;
-  double intersect_x_axis_abcissa;
+  /* The select to perform the range scan to fill the filter */
+  SQL_SELECT *select;
+  /* The cost info on the filter (used for EXPLAIN/ANALYZE) */
+  Range_rowid_filter_cost_info *cost_info;
+
+public:
+  Range_rowid_filter(TABLE *tab,
+                     Range_rowid_filter_cost_info *cost_arg,
+                     Rowid_filter_container *container_arg,
+                     SQL_SELECT *sel)
+    : Rowid_filter(container_arg), table(tab), select(sel), cost_info(cost_arg)
+  {}
+
+  ~Range_rowid_filter();
+
+  bool build() { return fill(); }
 
-  /**
-    Filter cost functions
+  bool check(char *elem) { return container->check(table, elem); }
+
+  bool fill();
+
+  SQL_SELECT *get_select() { return select; }
+};
+
+
+/**
+  @class Refpos_container_sorted_array
+
+  The wrapper class over Dynamic_array<char> to facilitate operations over
+  array of elements of the type char[N] where N is the same for all elements
+*/
+
+class Refpos_container_sorted_array : public Sql_alloc
+{
+  /* 
+    Maximum number of elements in the array
+    (Now is used only at the initialization of the dynamic array)
   */
-  /* Cost to lookup into filter */
-  inline double lookup_cost()
+  uint max_elements;
+  /* Number of bytes allocated for an element */
+  uint elem_size;
+  /* The dynamic array over which the wrapper is built */
+  Dynamic_array<char> *array;
+
+public:
+
+ Refpos_container_sorted_array(uint max_elems, uint elem_sz)
+    :  max_elements(max_elems), elem_size(elem_sz), array(0) {}
+
+  ~Refpos_container_sorted_array()
   {
-    return log(cardinality)*0.01;
+    delete array;
+    array= 0;
   }
 
-  /* IO accesses cost to access filter */
-  inline double filter_io_cost()
-  { return table->quick_key_io[key_no]; }
+  bool alloc()
+  {
+    array= new Dynamic_array<char> (elem_size * max_elements,
+                                    elem_size * max_elements/sizeof(char) + 1);
+    return array == NULL;
+  }
 
-  /* Cost to write elements in filter */
-  inline double filter_write_cost()
-  { return COST_WRITE*cardinality; }
+  bool add(char *elem)
+  {
+    for (uint i= 0; i < elem_size; i++)
+    {
+      if (array->append(elem[i]))
+	return true;
+    }
+    return false;
+  }
 
-  /* Cost to sort elements in filter */
-  inline double filter_sort_cost()
-  { 
-    return CNST_SORT*cardinality*log(cardinality);
+  char *get_pos(uint n)
+  {
+    return array->get_pos(n * elem_size);
   }
-  /* End of filter cost functions */
 
-  Range_filter_cost_info() : table(0), key_no(0) {}
+  uint elements() { return array->elements() / elem_size; }
+
+  void sort (int (*cmp) (void *ctxt, const void *el1, const void *el2),
+                         void *cmp_arg)
+  {
+    my_qsort2(array->front(), array->elements()/elem_size,
+              elem_size, (qsort2_cmp) cmp, cmp_arg);
+  }
+};
+
+
+/**
+  @class Rowid_filter_sorted_array
+
+  The implementation of the Rowid_filter_container interface as
+  a sorted array container of rowids / primary keys
+*/
+
+class Rowid_filter_sorted_array: public Rowid_filter_container
+{
+  /* The dynamic array to store rowids / primary keys */
+  Refpos_container_sorted_array refpos_container;
+  /* Initially false, becomes true after the first call of (check() */
+  bool is_checked;
+
+public:
+  Rowid_filter_sorted_array(uint elems, uint elem_size)
+    : refpos_container(elems, elem_size), is_checked(false) {}
+
+  Rowid_filter_container_type get_type()
+  { return SORTED_ARRAY_CONTAINER; }
+
+  bool alloc() { return refpos_container.alloc(); }
+
+  bool add(void *ctxt, char *elem) { return refpos_container.add(elem); }
+
+  bool check(void *ctxt, char *elem);
+};
+
+/**
+  @class Range_rowid_filter_cost_info
+
+  An objects of this class is created for each potentially usable
+  range filter. It contains the info that allows to figure out
+  whether usage of the range filter promises some gain.
+*/
+
+class Range_rowid_filter_cost_info : public Sql_alloc
+{
+  /* The table for which the range filter is to be built (if needed) */
+  TABLE *table;
+  /* Estimated number of elements in the filter */
+  double est_elements;
+  /* The cost of building the range filter */
+  double b;
+  /*
+     a*N-b yields the gain of the filter
+     for N key tuples of the index key_no
+  */
+  double a;
+  /* The value of N where the gain is  0 */
+  double cross_x;
+  /* Used for pruning of the potential range filters */
+  key_map abs_independent;
+
+public:
+  /* The type of the container of the range filter */
+  Rowid_filter_container_type container_type;
+  /* The index whose range scan would be used to build the range filter */
+  uint key_no;
+  /* The selectivity of the range filter */
+  double selectivity;
+
+  Range_rowid_filter_cost_info() : table(0), key_no(0) {}
 
-  void init(TABLE *tab, uint key_numb);
+  void init(Rowid_filter_container_type cont_type,
+            TABLE *tab, uint key_no);
 
-  inline double get_intersect_x(Range_filter_cost_info *filter)
+  double build_cost(Rowid_filter_container_type container_type);
+
+  inline double lookup_cost(Rowid_filter_container_type cont_type);
+
+  inline double
+  avg_access_and_eval_gain_per_row(Rowid_filter_container_type cont_type);
+
+  /* Get the gain that usage of filter promises for 'rows' key tuples */
+  inline double get_gain(double rows)
   {
-    if (a == filter->a)
-      return DBL_MAX;
-    return (b - filter->b)/(a - filter->a);
+    return rows * a - b;
   }
-  inline double get_intersect_y(double intersect_x)
+
+  /*
+    The gain promised by usage of the filter at the assumption that
+    when the table is accessed without the filter at most worst_seeks
+    pages are fetched from disk to access the data rows of the table
+  */
+  inline double get_adjusted_gain(double rows, double worst_seeks)
   {
-    if (intersect_x == DBL_MAX)
-      return DBL_MAX;
-    return intersect_x*a - b;
+    return get_gain(rows) -
+           (1 - selectivity) * (rows - MY_MIN(rows, worst_seeks));
   }
 
-  /**
-    Get a gain that a usage of filter in some partial join order
-    with the cardinaly card gives
+  /*
+    The gain promised by usage of the filter
+    due to less condition evaluations
   */
-  inline double get_filter_gain(double card)
-  {  return card*a - b;  }
+  inline double get_cmp_gain(double rows)
+  {
+    return rows * (1 - selectivity) / TIME_FOR_COMPARE;
+  }
+
+  Rowid_filter_container *create_container();
+
+  double get_a() { return a; }
+
+  friend
+  void TABLE::prune_range_rowid_filters();
+
+  friend
+  void TABLE::init_cost_info_for_usable_range_rowid_filters(THD *thd);
+
+  friend
+  Range_rowid_filter_cost_info *
+  TABLE::best_range_rowid_filter_for_partial_join(uint access_key_no,
+                                                  double records);
 };
 
 #endif /* ROWID_FILTER_INCLUDED */