MDEV-6384: It seems like OPTIMIZER take into account the order of indexes in the table

When ORDER BY ... LIMIT check whether it should switch from index IDX1 to index IDX2, it should not ignore the fact that IDX2 may have a potential range or ref(const) access. Istead, it should calculate their costs: there is now a saved range optimizer cost and code to re-calculate what best_access_path() calculated for ref(const). /* in current cost model these two can be very different numbers unfortunately */
author: Sergei Petrunia <psergey@askmonty.org> 2014-08-27 20:08:32 +0400
committer: Sergei Petrunia <psergey@askmonty.org> 2014-08-27 20:08:32 +0400
commit: f1a1683309899e484c0c0d38933530dcd5012c75 (patch)
tree: 81c4ad21d30783910ab21f84e2a53dfb39ab081a
parent: be00e279c6061134a33a8099fd69d4304735d02e (diff)
download: mariadb-git-f1a1683309899e484c0c0d38933530dcd5012c75.tar.gz
3 files changed, 119 insertions, 2 deletions
diff --git a/mysql-test/r/innodb_ext_key.result b/mysql-test/r/innodb_ext_key.result
index 9140f306f77..cae402a9f12 100644
--- a/mysql-test/r/innodb_ext_key.result
+++ b/mysql-test/r/innodb_ext_key.result
@@ -1002,7 +1002,7 @@ insert into t2 (b) values (null), (null), (null);
 set optimizer_switch='extended_keys=on';
 explain select a from t1 where b is null order by a desc limit 2;
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
-1	SIMPLE	t1	ref	b	b	9	const	2	Using where; Using filesort
+1	SIMPLE	t1	index	b	PRIMARY	8	NULL	3	Using where
 select a from t1 where b is null order by a desc limit 2;
 a
 3
diff --git a/sql/opt_range.cc b/sql/opt_range.cc
index b795411130d..99c731d1541 100644
--- a/sql/opt_range.cc
+++ b/sql/opt_range.cc
@@ -10690,6 +10690,7 @@ ha_rows check_quick_select(PARAM *param, uint idx, bool index_only,
       param->table->quick_condition_rows=
         MY_MIN(param->table->quick_condition_rows, rows);
       param->table->quick_rows[keynr]= rows;
+      param->table->quick_costs[keynr]= cost->total_cost();
     }
   }
   /* Figure out if the key scan is ROR (returns rows in ROWID order) or not */
diff --git a/sql/sql_select.cc b/sql/sql_select.cc
index 52f05e46e40..79d74d02708 100644
--- a/sql/sql_select.cc
+++ b/sql/sql_select.cc
@@ -20224,7 +20224,7 @@ test_if_skip_sort_order(JOIN_TAB *tab,ORDER *order,ha_rows select_limit,
          !(table->file->index_flags(best_key, 0, 1) & HA_CLUSTERED_INDEX)))
       goto use_filesort;
 
-    if (select &&
+    if (select && // psergey:  why doesn't this use a quick?
         table->quick_keys.is_set(best_key) && best_key != ref_key)
     {
       key_map map;
@@ -24693,6 +24693,109 @@ void JOIN::cache_const_exprs()
   }
 }
 
+ 
+/*
+  Get a cost of reading rows_limit rows through index keynr.
+
+  @detail
+   - If there is a quick select, we try to use it.
+   - if there is a ref(const) access, we try to use it, too.
+   - quick and ref(const) use different cost formulas, so if both are possible
+      we should make a cost-based choice.
+
+  @return 
+    true   There was a possible quick or ref access, its cost is in the OUT
+           parameters.
+    false  No quick or ref(const) possible (and so, the caller will attempt 
+           to use a full index scan on this index).
+*/
+
+static bool get_range_limit_read_cost(const JOIN_TAB *tab, 
+                                      const TABLE *table, 
+                                      uint keynr, 
+                                      ha_rows rows_limit,
+                                      double *read_time)
+{
+  bool res= false;
+  /* 
+    We need to adjust the estimates if we had a quick select (or ref(const)) on
+    index keynr.
+  */
+  if (table->quick_keys.is_set(keynr))
+  {
+    /*
+      Start from quick select's rows and cost. These are always cheaper than
+      full index scan/cost.
+    */
+    double best_rows= table->quick_rows[keynr];
+    double best_cost= table->quick_costs[keynr];
+    
+    /*
+      Check if ref(const) access was possible on this index. 
+    */
+    if (tab)
+    {
+      key_part_map const_parts= 0;
+      key_part_map map= 1;
+      uint kp;
+      /* Find how many key parts would be used by ref(const) */
+      for (kp=0; kp < MAX_REF_PARTS; map=map << 1, kp++)
+      {
+        if (!(table->const_key_parts[keynr] & map))
+          break;
+        const_parts |= map;
+      }
+      
+      if (kp > 0)
+      {
+        ha_rows ref_rows;
+        /*
+          Two possible cases:
+          1. ref(const) uses the same #key parts as range access. 
+          2. ref(const) uses fewer key parts, becasue there is a
+            range_cond(key_part+1).
+        */
+        if (kp == table->quick_key_parts[keynr])
+          ref_rows= table->quick_rows[keynr];
+        else
+          ref_rows= table->key_info[keynr].actual_rec_per_key(kp-1);
+
+        if (ref_rows > 0)
+        {
+          double tmp= ref_rows;
+          /* Reuse the cost formula from best_access_path: */
+          set_if_smaller(tmp, (double) tab->join->thd->variables.max_seeks_for_key);
+          if (table->covering_keys.is_set(keynr))
+            tmp= table->file->keyread_time(keynr, 1, (ha_rows) tmp);
+          else
+            tmp= table->file->read_time(keynr, 1,
+                                        (ha_rows) MY_MIN(tmp,tab->worst_seeks));
+          if (tmp < best_cost)
+          {
+            best_cost= tmp;
+            best_rows= ref_rows;
+          }
+        }
+      }
+    }
+ 
+    if (best_rows > rows_limit)
+    {
+      /*
+        LIMIT clause specifies that we will need to read fewer records than
+        quick select will return. Assume that quick select's cost is
+        proportional to the number of records we need to return (e.g. if we 
+        only need 1/3rd of records, it will cost us 1/3rd of quick select's
+        read time)
+      */
+      best_cost *= rows_limit / best_rows;
+    }
+    *read_time= best_cost;
+    res= true;
+  }
+  return res;
+}
+
 
 /**
   Find a cheaper access key than a given @a key
@@ -24786,6 +24889,11 @@ test_if_cheaper_ordering(const JOIN_TAB *tab, ORDER *order, TABLE *table,
   }
   else
     read_time= table->file->scan_time();
+  
+  /*
+    TODO: add cost of sorting here.
+  */
+  read_time += COST_EPS;
 
   /*
     Calculate the selectivity of the ref_key for REF_ACCESS. For
@@ -24945,6 +25053,14 @@ test_if_cheaper_ordering(const JOIN_TAB *tab, ORDER *order, TABLE *table,
         */
         index_scan_time= select_limit/rec_per_key *
                          MY_MIN(rec_per_key, table->file->scan_time());
+        double range_scan_time;
+        if (get_range_limit_read_cost(tab, table, nr, select_limit, 
+                                       &range_scan_time))
+        {
+          if (range_scan_time < index_scan_time)
+            index_scan_time= range_scan_time;
+        }
+
         if ((ref_key < 0 && (group || table->force_index || is_covering)) ||
             index_scan_time < read_time)
         {
author	Sergei Petrunia <psergey@askmonty.org>	2014-08-27 20:08:32 +0400
committer	Sergei Petrunia <psergey@askmonty.org>	2014-08-27 20:08:32 +0400
commit	f1a1683309899e484c0c0d38933530dcd5012c75 (patch)
tree	81c4ad21d30783910ab21f84e2a53dfb39ab081a
parent	be00e279c6061134a33a8099fd69d4304735d02e (diff)
download	mariadb-git-f1a1683309899e484c0c0d38933530dcd5012c75.tar.gz