MDEV-18073: get_range_limit_read_cost() doesnt adjust LIMIT for the range access

The computation about which "fraction" of range/ref access cost we will need to perform, was incorrect. Adjusted the computation.
author: Sergei Petrunia <psergey@askmonty.org> 2018-12-24 16:46:17 +0300
committer: Sergei Petrunia <psergey@askmonty.org> 2018-12-24 16:46:17 +0300
commit: 588c5aea9cf5eda0bb4dc29437c467ddf39eac6e (patch)
tree: a8fb3836b8d06412425d92989ae29a96e34623c2
parent: fb230256ae3473e4b4191abfea645bd0faa58a82 (diff)
download: mariadb-git-588c5aea9cf5eda0bb4dc29437c467ddf39eac6e.tar.gz
1 files changed, 37 insertions, 6 deletions
diff --git a/sql/sql_select.cc b/sql/sql_select.cc
index 2682c06f0cf..d106a49bda8 100644
--- a/sql/sql_select.cc
+++ b/sql/sql_select.cc
@@ -25730,16 +25730,22 @@ void JOIN::cache_const_exprs()
 
  
 /*
-  Get a cost of reading rows_limit rows through index keynr.
+  Get the cost of using index keynr to read #LIMIT matching rows
 
   @detail
    - If there is a quick select, we try to use it.
    - if there is a ref(const) access, we try to use it, too.
    - quick and ref(const) use different cost formulas, so if both are possible
       we should make a cost-based choice.
-  
+
+  rows_limit is the number of rows we would need to read when using a full
+  index scan. This is generally higher than the N from "LIMIT N" clause,
+  because there's a WHERE condition (a part of which is used to construct a
+  range access we are considering using here)
+
   @param  tab              JOIN_TAB with table access (is NULL for single-table
                            UPDATE/DELETE)
+  @param  rows_limit       See explanation above
   @param  read_time OUT    Cost of reading using quick or ref(const) access.
 
 
@@ -25752,6 +25758,7 @@ void JOIN::cache_const_exprs()
 
 static bool get_range_limit_read_cost(const JOIN_TAB *tab, 
                                       const TABLE *table, 
+                                      ha_rows table_records,
                                       uint keynr, 
                                       ha_rows rows_limit,
                                       double *read_time)
@@ -25818,8 +25825,32 @@ static bool get_range_limit_read_cost(const JOIN_TAB *tab,
         }
       }
     }
+
+    /*
+      Consider an example:
+
+        SELECT *
+        FROM t1
+        WHERE key1 BETWEEN 10 AND 20 AND col2='foo'
+        ORDER BY key1 LIMIT 10
+
+      If we were using a full index scan on key1, we would need to read this
+      many rows to get 10 matches:
+
+        10 / selectivity(key1 BETWEEN 10 AND 20 AND col2='foo')
+
+      This is the number we get in rows_limit.
+      But we intend to use range access on key1. The rows returned by quick
+      select will satisfy the range part of the condition,
+      "key1 BETWEEN 10 and 20". We will still need to filter them with
+      the remainder condition, (col2='foo').
+
+      The selectivity of the range access is (best_rows/table_records). We need
+      to discount it from the rows_limit:
+    */
+    double rows_limit_for_quick= rows_limit * (best_rows / table_records);
  
-    if (best_rows > rows_limit)
+    if (best_rows > rows_limit_for_quick)
     {
       /*
         LIMIT clause specifies that we will need to read fewer records than
@@ -25828,7 +25859,7 @@ static bool get_range_limit_read_cost(const JOIN_TAB *tab,
         only need 1/3rd of records, it will cost us 1/3rd of quick select's
         read time)
       */
-      best_cost *= rows_limit / best_rows;
+      best_cost *= rows_limit_for_quick / best_rows;
     }
     *read_time= best_cost;
     res= true;
@@ -26121,8 +26152,8 @@ test_if_cheaper_ordering(const JOIN_TAB *tab, ORDER *order, TABLE *table,
         index_scan_time= select_limit/rec_per_key *
                          MY_MIN(rec_per_key, table->file->scan_time());
         double range_scan_time;
-        if (get_range_limit_read_cost(tab, table, nr, select_limit, 
-                                       &range_scan_time))
+        if (get_range_limit_read_cost(tab, table, table_records, nr,
+                                      select_limit, &range_scan_time))
         {
           if (range_scan_time < index_scan_time)
             index_scan_time= range_scan_time;
author	Sergei Petrunia <psergey@askmonty.org>	2018-12-24 16:46:17 +0300
committer	Sergei Petrunia <psergey@askmonty.org>	2018-12-24 16:46:17 +0300
commit	588c5aea9cf5eda0bb4dc29437c467ddf39eac6e (patch)
tree	a8fb3836b8d06412425d92989ae29a96e34623c2
parent	fb230256ae3473e4b4191abfea645bd0faa58a82 (diff)
download	mariadb-git-588c5aea9cf5eda0bb4dc29437c467ddf39eac6e.tar.gz