MDEV-28846 Poor performance when rowid filter contains no elements

When a range rowid filter was used with an index ref access the cost of accessing the index entries for the records rejected by the filter was not taken into account. For a ref access by an index with big average number of records per key this led to poor execution plans if selectivity of the used filter was high. The patch resolves this problem. It also introduces a minor optimization that skips look-ups into a filter that turns out to be empty. With this patch the output of ANALYZE stmt reports the number of look-ups into used rowid filters. The patch also back-ports from 10.5 the code that properly sets the field TABLE::file::table for opened temporary tables. The test cases that were supposed to use rowid filters have been adjusted in order to use similar execution plans after this fix. Approved by Oleksandr Byelkin <sanja@mariadb.com>
author: Igor Babaev <igor@askmonty.org> 2022-10-17 16:44:10 -0700
committer: Igor Babaev <igor@askmonty.org> 2022-10-25 11:43:32 -0700
commit: 58cd0bd59ef011be54f162237f2ff017c3148e7b (patch)
tree: 7c6b5282b00ff9d9a509d85ff7d0ff996fa0d740 /sql
parent: f1bbc1cd19d0d81fee5433efcb570a8845172241 (diff)
download: mariadb-git-58cd0bd59ef011be54f162237f2ff017c3148e7b.tar.gz
7 files changed, 73 insertions, 7 deletions
diff --git a/sql/handler.h b/sql/handler.h
index cd999f30bc0..aa68c30480e 100644
--- a/sql/handler.h
+++ b/sql/handler.h
@@ -3156,6 +3156,11 @@ public:
     DBUG_ASSERT(m_lock_type == F_UNLCK);
     DBUG_ASSERT(inited == NONE);
   }
+  /* To check if table has been properely opened */
+  bool is_open()
+  {
+    return ref != 0;
+  }
   virtual handler *clone(const char *name, MEM_ROOT *mem_root);
   /** This is called after create to allow us to set up cached variables */
   void init()
@@ -4804,6 +4809,7 @@ public:
     ha_share= arg_ha_share;
     return false;
   }
+  void set_table(TABLE* table_arg) { table= table_arg; }
   int get_lock_type() const { return m_lock_type; }
 public:
   /* XXX to be removed, see ha_partition::partition_ht() */
diff --git a/sql/item_func.cc b/sql/item_func.cc
index f4596803c2d..9c29280970b 100644
--- a/sql/item_func.cc
+++ b/sql/item_func.cc
@@ -6019,7 +6019,7 @@ bool Item_func_match::init_search(THD *thd, bool no_order)
 {
   DBUG_ENTER("Item_func_match::init_search");
 
-  if (!table->file->get_table()) // the handler isn't opened yet
+  if (!table->file->is_open())
     DBUG_RETURN(0);
 
   /* Check if init_search() has been called before */
diff --git a/sql/rowid_filter.h b/sql/rowid_filter.h
index 467b6884ca6..b76b8b1e635 100644
--- a/sql/rowid_filter.h
+++ b/sql/rowid_filter.h
@@ -192,6 +192,9 @@ public:
   */
   virtual bool check(void *ctxt, char *elem) = 0;
 
+  /* True if the container does not contain any element */
+  virtual bool is_empty() = 0;
+
   virtual ~Rowid_filter_container() {}
 };
 
@@ -231,6 +234,8 @@ public:
 
   virtual ~Rowid_filter() {}
 
+  bool is_empty() { return container->is_empty(); }
+
   Rowid_filter_container *get_container() { return container; }
 
   void set_tracker(Rowid_filter_tracker *track_arg) { tracker= track_arg; }
@@ -268,6 +273,8 @@ public:
 
   bool check(char *elem)
   {
+    if (container->is_empty())
+      return false;
     bool was_checked= container->check(table, elem);
     tracker->increment_checked_elements_count(was_checked);
     return was_checked;
@@ -339,6 +346,8 @@ public:
     my_qsort2(array->front(), array->elements()/elem_size,
               elem_size, (qsort2_cmp) cmp, cmp_arg);
   }
+
+  bool is_empty() { return elements() == 0; }
 };
 
 
@@ -368,6 +377,8 @@ public:
   bool add(void *ctxt, char *elem) { return refpos_container.add(elem); }
 
   bool check(void *ctxt, char *elem);
+
+  bool is_empty() { return refpos_container.is_empty(); }
 };
 
 /**
diff --git a/sql/sql_analyze_stmt.h b/sql/sql_analyze_stmt.h
index eec52822ae5..40876d178e0 100644
--- a/sql/sql_analyze_stmt.h
+++ b/sql/sql_analyze_stmt.h
@@ -355,11 +355,14 @@ public:
 
   uint get_container_elements() { return container_elements; }
 
+  uint get_container_lookups() { return n_checks; }
+
   double get_r_selectivity_pct()
   {
-    return (double)n_positive_checks/(double)n_checks;
+    return n_checks ? (double)n_positive_checks/(double)n_checks : 0;
   }
 
   size_t get_container_buff_size() { return container_buff_size; }
+
 };
 
diff --git a/sql/sql_explain.cc b/sql/sql_explain.cc
index 1681da63ac1..70e300997f9 100644
--- a/sql/sql_explain.cc
+++ b/sql/sql_explain.cc
@@ -1676,6 +1676,7 @@ void Explain_rowid_filter::print_explain_json(Explain_query *query,
   if (is_analyze)
   {
     writer->add_member("r_rows").add_double(tracker->get_container_elements());
+    writer->add_member("r_lookups").add_ll(tracker->get_container_lookups());
     writer->add_member("r_selectivity_pct").
       add_double(tracker->get_r_selectivity_pct() * 100.0);
     writer->add_member("r_buffer_size").
diff --git a/sql/sql_insert.cc b/sql/sql_insert.cc
index 76fd6385041..ca3de361865 100644
--- a/sql/sql_insert.cc
+++ b/sql/sql_insert.cc
@@ -4171,7 +4171,7 @@ void select_insert::abort_result_set()
     table will be assigned with view table structure, but that table will
     not be opened really (it is dummy to check fields types & Co).
    */
-  if (table && table->file->get_table())
+  if (table && table->file->is_open())
   {
     bool changed, transactional_table;
     /*
diff --git a/sql/sql_select.cc b/sql/sql_select.cc
index a91b4571b21..5ec88e5259c 100644
--- a/sql/sql_select.cc
+++ b/sql/sql_select.cc
@@ -7400,6 +7400,7 @@ best_access_path(JOIN      *join,
   table_map best_ref_depends_map= 0;
   Range_rowid_filter_cost_info *best_filter= 0;
   double tmp;
+  double keyread_tmp= 0;
   ha_rows rec;
   bool best_uses_jbuf= FALSE;
   MY_BITMAP *eq_join_set= &s->table->eq_join_set;
@@ -7666,11 +7667,16 @@ best_access_path(JOIN      *join,
             tmp= records;
             set_if_smaller(tmp, (double) thd->variables.max_seeks_for_key);
             if (table->covering_keys.is_set(key))
-              tmp= table->file->keyread_time(key, 1, (ha_rows) tmp);
+              keyread_tmp=
+                tmp= table->file->keyread_time(key, 1, (ha_rows) tmp);
             else
+            {
+              keyread_tmp= table->file->keyread_time(key, 1, (ha_rows) tmp);
               tmp= table->file->read_time(key, 1,
                                           (ha_rows) MY_MIN(tmp,s->worst_seeks));
+            }
             tmp= COST_MULT(tmp, record_count);
+            keyread_tmp= COST_MULT(keyread_tmp, record_count);
           }
         }
         else
@@ -7847,11 +7853,16 @@ best_access_path(JOIN      *join,
             /* Limit the number of matched rows */
             set_if_smaller(tmp, (double) thd->variables.max_seeks_for_key);
             if (table->covering_keys.is_set(key))
-              tmp= table->file->keyread_time(key, 1, (ha_rows) tmp);
+              keyread_tmp=
+                tmp= table->file->keyread_time(key, 1, (ha_rows) tmp);
             else
+	    {
+              keyread_tmp= table->file->keyread_time(key, 1, (ha_rows) tmp);
               tmp= table->file->read_time(key, 1,
                                           (ha_rows) MY_MIN(tmp,s->worst_seeks));
+            }
             tmp= COST_MULT(tmp, record_count);
+            keyread_tmp= COST_MULT(keyread_tmp, record_count);
           }
           else
           {
@@ -7870,7 +7881,35 @@ best_access_path(JOIN      *join,
 	  (found_part & 1))   // start_key->key can be used for index access
       {
         double rows= record_count * records;
-        double access_cost_factor= MY_MIN(tmp / rows, 1.0);
+
+        /*
+          If we use filter F with selectivity s the the cost of fetching data
+          by key using this filter will be
+             cost_of_fetching_1_row * rows * s +
+             cost_of_fetching_1_key_tuple * rows * (1 - s) +
+             cost_of_1_lookup_into_filter * rows
+          Without using any filter the cost would be just
+             cost_of_fetching_1_row * rows
+
+          So the gain in access cost per row will be
+             cost_of_fetching_1_row * (1 - s) -
+             cost_of_fetching_1_key_tuple * (1 - s) -
+             cost_of_1_lookup_into_filter
+             =
+             (cost_of_fetching_1_row - cost_of_fetching_1_key_tuple) * (1 - s)
+             - cost_of_1_lookup_into_filter
+
+          Here we have:
+             cost_of_fetching_1_row = tmp/rows
+             cost_of_fetching_1_key_tuple = keyread_tmp/rows
+
+          Note that access_cost_factor may be greater than 1.0. In this case
+          we still can expect a gain of using rowid filter due to smaller number
+          of checks for conditions pushed to the joined table.
+	*/
+        double rows_access_cost= MY_MIN(rows, s->worst_seeks);
+        double access_cost_factor= MY_MIN((rows_access_cost - keyread_tmp) /
+                                           rows, 1.0);
         filter=
           table->best_range_rowid_filter_for_partial_join(start_key->key, rows,
                                                           access_cost_factor);
@@ -8029,8 +8068,11 @@ best_access_path(JOIN      *join,
       if ( s->quick->get_type() == QUICK_SELECT_I::QS_TYPE_RANGE)
       {
         double rows= record_count * s->found_records;
-        double access_cost_factor= MY_MIN(tmp / rows, 1.0);
         uint key_no= s->quick->index;
+
+        /* See the comment concerning using rowid filter for with ref access */
+        keyread_tmp= s->table->quick_index_only_costs[key_no];
+        double access_cost_factor= MY_MIN((rows - keyread_tmp) / rows, 1.0);
         filter=
         s->table->best_range_rowid_filter_for_partial_join(key_no, rows,
                                                            access_cost_factor);
@@ -18810,6 +18852,7 @@ create_tmp_table(THD *thd, TMP_TABLE_PARAM *param, List<Item> &fields,
     delete table->file;
     goto err;
   }
+  table->file->set_table(table);
 
   if (!using_unique_constraint)
     reclength+= group_null_items;	// null flag is stored separately
@@ -20651,6 +20694,8 @@ sub_select(JOIN *join,JOIN_TAB *join_tab,bool end_of_records)
     DBUG_RETURN(NESTED_LOOP_ERROR);
 
   join_tab->build_range_rowid_filter_if_needed();
+  if (join_tab->rowid_filter && join_tab->rowid_filter->is_empty())
+    rc= NESTED_LOOP_NO_MORE_ROWS;
 
   join->return_tab= join_tab;
author	Igor Babaev <igor@askmonty.org>	2022-10-17 16:44:10 -0700
committer	Igor Babaev <igor@askmonty.org>	2022-10-25 11:43:32 -0700
commit	58cd0bd59ef011be54f162237f2ff017c3148e7b (patch)
tree	7c6b5282b00ff9d9a509d85ff7d0ff996fa0d740 /sql
parent	f1bbc1cd19d0d81fee5433efcb570a8845172241 (diff)
download	mariadb-git-58cd0bd59ef011be54f162237f2ff017c3148e7b.tar.gz