From 63a88e137365c4158e22c1c8aba7847b5a0ffe86 Mon Sep 17 00:00:00 2001
From: Gleb Shchepa <gshchepa@mysql.com>
Date: Fri, 5 Mar 2010 23:45:55 +0400
Subject: Bug #39653: find_shortest_key in sql_select.cc does not            
 consider clustered primary keys

Choosing a shortest index for the covering index scan,
the optimizer ignored the fact, that the clustered primary
key read involves whole table data.

The find_shortest_key function has been modified to
take into account that fact that a clustered PK has a
longest key of possible covering indices.


mysql-test/r/innodb_mysql.result:
  Test case for bug #39653.
mysql-test/t/innodb_mysql.test:
  Test case for bug #39653.
sql/sql_select.cc:
  Bug #39653: find_shortest_key in sql_select.cc does not
              consider clustered primary keys

  The find_shortest_key function has been modified to
  take into account that fact that a clustered PK has a
  longest key of possible covering indices.
---
 mysql-test/r/innodb_mysql.result | 22 ++++++++++++++++++++++
 mysql-test/t/innodb_mysql.test   | 18 ++++++++++++++++++
 sql/sql_select.cc                | 27 +++++++++++++++++++++++++--
 3 files changed, 65 insertions(+), 2 deletions(-)

diff --git a/mysql-test/r/innodb_mysql.result b/mysql-test/r/innodb_mysql.result
index c4279018cb8..586cd5477a7 100644
--- a/mysql-test/r/innodb_mysql.result
+++ b/mysql-test/r/innodb_mysql.result
@@ -2295,4 +2295,26 @@ id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
 1	SIMPLE	t2	ref	f1	f1	4	test.t1.f1	1	Using index
 drop table t1,t2;
 #
+#
+# Bug #39653: find_shortest_key in sql_select.cc does not consider
+#             clustered primary keys
+#
+CREATE TABLE t1 (a INT PRIMARY KEY, b INT, c INT, d INT, e INT, f INT,
+KEY (b,c)) ENGINE=INNODB;
+INSERT INTO t1 VALUES (1,1,1,1,1,1), (2,2,2,2,2,2), (3,3,3,3,3,3),
+(4,4,4,4,4,4), (5,5,5,5,5,5), (6,6,6,6,6,6),
+(7,7,7,7,7,7), (8,8,8,8,8,8), (9,9,9,9,9,9),
+(11,11,11,11,11,11);
+EXPLAIN SELECT COUNT(*) FROM t1;
+id	1
+select_type	SIMPLE
+table	t1
+type	index
+possible_keys	NULL
+key	b
+key_len	10
+ref	NULL
+rows	10
+Extra	Using index
+DROP TABLE t1;
 End of 5.1 tests
diff --git a/mysql-test/t/innodb_mysql.test b/mysql-test/t/innodb_mysql.test
index a3c11b8b8d6..75fff9656e2 100644
--- a/mysql-test/t/innodb_mysql.test
+++ b/mysql-test/t/innodb_mysql.test
@@ -558,4 +558,22 @@ drop table t1,t2;
 --echo #
 
 
+--echo #
+--echo # Bug #39653: find_shortest_key in sql_select.cc does not consider
+--echo #             clustered primary keys
+--echo #
+
+CREATE TABLE t1 (a INT PRIMARY KEY, b INT, c INT, d INT, e INT, f INT,
+                 KEY (b,c)) ENGINE=INNODB;
+
+INSERT INTO t1 VALUES (1,1,1,1,1,1), (2,2,2,2,2,2), (3,3,3,3,3,3),
+                      (4,4,4,4,4,4), (5,5,5,5,5,5), (6,6,6,6,6,6),
+		      (7,7,7,7,7,7), (8,8,8,8,8,8), (9,9,9,9,9,9),
+		      (11,11,11,11,11,11);
+
+--query_vertical EXPLAIN SELECT COUNT(*) FROM t1
+
+DROP TABLE t1;
+
+
 --echo End of 5.1 tests
diff --git a/sql/sql_select.cc b/sql/sql_select.cc
index 773aa7da139..bc68d3b03e0 100644
--- a/sql/sql_select.cc
+++ b/sql/sql_select.cc
@@ -12912,12 +12912,35 @@ static int test_if_order_by_key(ORDER *order, TABLE *table, uint idx,
 
 uint find_shortest_key(TABLE *table, const key_map *usable_keys)
 {
-  uint min_length= (uint) ~0;
   uint best= MAX_KEY;
+  uint usable_clustered_pk= (table->file->primary_key_is_clustered() &&
+                             table->s->primary_key != MAX_KEY &&
+                             usable_keys->is_set(table->s->primary_key)) ?
+                            table->s->primary_key : MAX_KEY;
   if (!usable_keys->is_clear_all())
   {
+    uint min_length= (uint) ~0;
     for (uint nr=0; nr < table->s->keys ; nr++)
     {
+      /*
+       As far as 
+       1) clustered primary key entry data set is a set of all record
+          fields (key fields and not key fields) and
+       2) secondary index entry data is a union of its key fields and
+          primary key fields (at least InnoDB and its derivatives don't
+          duplicate primary key fields there, even if the primary and
+          the secondary keys have a common subset of key fields),
+       then secondary index entry data is always a subset of primary key
+       entry, and the PK is always longer.
+       Unfortunately, key_info[nr].key_length doesn't show the length
+       of key/pointer pair but a sum of key field lengths only, thus
+       we can't estimate index IO volume comparing only this key_length
+       value of seconday keys and clustered PK.
+       So, try secondary keys first, and choose PK only if there are no
+       usable secondary covering keys:
+      */
+      if (nr == usable_clustered_pk)
+        continue;
       if (usable_keys->is_set(nr))
       {
         if (table->key_info[nr].key_length < min_length)
@@ -12928,7 +12951,7 @@ uint find_shortest_key(TABLE *table, const key_map *usable_keys)
       }
     }
   }
-  return best;
+  return best != MAX_KEY ? best : usable_clustered_pk;
 }
 
 /**
-- 
cgit v1.2.1