BUG#14940: Slow join order is chosen: [2nd commit with post-review fixes]

- Re-worked the prev_record_reads() function to return the lower bound of number of different table access scans that will be performed.
author: sergefp@mysql.com <> 2006-09-29 15:58:47 +0400
committer: sergefp@mysql.com <> 2006-09-29 15:58:47 +0400
commit: 47b357522a2e9329b2c0376db0199508fe3654d8 (patch)
tree: 5ddf809bcc56ad6e7f36bca0a68ae71a0e62064b
parent: 16a5d97a5d335d10f9c96bea00801fcc28c545e9 (diff)
download: mariadb-git-47b357522a2e9329b2c0376db0199508fe3654d8.tar.gz
4 files changed, 161 insertions, 13 deletions
diff --git a/mysql-test/r/join.result b/mysql-test/r/join.result
index 75daa0fd46d..897ec4f7a6a 100644
--- a/mysql-test/r/join.result
+++ b/mysql-test/r/join.result
@@ -776,3 +776,31 @@ id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
 1	SIMPLE	t2	ALL	a,b	NULL	NULL	NULL	1000	Using where
 1	SIMPLE	t3	ref	b	b	5	test.t2.b	1	Using where
 drop table t1, t2, t3;
+create table t1 (a int);
+insert into t1 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
+create table t2 (a int, b int, primary key(a));
+insert into t2 select @v:=A.a+10*B.a, @v  from t1 A, t1 B;
+explain select * from t1;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	ALL	NULL	NULL	NULL	NULL	10	
+show status like '%cost%';
+Variable_name	Value
+Last_query_cost	4.016090
+select 'The cost of accessing t1 (dont care if it changes' '^';
+The cost of accessing t1 (dont care if it changes
+The cost of accessing t1 (dont care if it changes^
+select 'vv: Following query must use ALL(t1), eq_ref(A), eq_ref(B): vv' Z;
+Z
+vv: Following query must use ALL(t1), eq_ref(A), eq_ref(B): vv
+explain select * from t1, t2 A, t2 B where A.a = t1.a and B.a=A.b;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	ALL	NULL	NULL	NULL	NULL	10	
+1	SIMPLE	A	eq_ref	PRIMARY	PRIMARY	4	test.t1.a	1	
+1	SIMPLE	B	eq_ref	PRIMARY	PRIMARY	4	test.A.b	1	
+show status like '%cost%';
+Variable_name	Value
+Last_query_cost	24.016090
+select '^^: The above should be ~= 20 + cost(select * from t1). Value less than 20 is an error' Z;
+Z
+^^: The above should be ~= 20 + cost(select * from t1). Value less than 20 is an error
+drop table t1, t2;
diff --git a/mysql-test/t/join.test b/mysql-test/t/join.test
index 98bfb33b1e6..d0005f3b8f7 100644
--- a/mysql-test/t/join.test
+++ b/mysql-test/t/join.test
@@ -609,3 +609,24 @@ explain select * from t2,t3 where t2.a < 200 and t2.b=t3.b;
 
 drop table t1, t2, t3;
 
+# BUG#14940 {Wrong query plan is chosen because of odd results of
+# prev_record_reads() function }
+create table t1 (a int); 
+insert into t1 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
+
+create table t2 (a int, b int, primary key(a));
+insert into t2 select @v:=A.a+10*B.a, @v  from t1 A, t1 B;
+
+explain select * from t1;
+show status like '%cost%';
+select 'The cost of accessing t1 (dont care if it changes' '^';
+
+select 'vv: Following query must use ALL(t1), eq_ref(A), eq_ref(B): vv' Z;
+
+explain select * from t1, t2 A, t2 B where A.a = t1.a and B.a=A.b;
+show status like '%cost%';
+select '^^: The above should be ~= 20 + cost(select * from t1). Value less than 20 is an error' Z;
+
+
+
+drop table t1, t2;
diff --git a/sql/sql_select.cc b/sql/sql_select.cc
index 7eda55c6a3f..a199b0a43bb 100644
--- a/sql/sql_select.cc
+++ b/sql/sql_select.cc
@@ -70,7 +70,7 @@ static int join_tab_cmp_straight(const void* ptr1, const void* ptr2);
 static void find_best(JOIN *join,table_map rest_tables,uint index,
 		      double record_count,double read_time);
 static uint cache_record_length(JOIN *join,uint index);
-static double prev_record_reads(JOIN *join,table_map found_ref);
+static double prev_record_reads(JOIN *join, uint idx, table_map found_ref);
 static bool get_best_combination(JOIN *join);
 static store_key *get_store_key(THD *thd,
 				KEYUSE *keyuse, table_map used_tables,
@@ -3373,6 +3373,7 @@ set_position(JOIN *join,uint idx,JOIN_TAB *table,KEYUSE *key)
   join->positions[idx].table= table;
   join->positions[idx].key=key;
   join->positions[idx].records_read=1.0;	/* This is a const table */
+  join->positions[idx].ref_depend_map= 0;
 
   /* Move the const table as down as possible in best_ref */
   JOIN_TAB **pos=join->best_ref+idx+1;
@@ -3430,6 +3431,7 @@ best_access_path(JOIN      *join,
   double best=              DBL_MAX;
   double best_time=         DBL_MAX;
   double records=           DBL_MAX;
+  table_map best_ref_depends_map;
   double tmp;
   ha_rows rec;
 
@@ -3458,13 +3460,20 @@ best_access_path(JOIN      *join,
 
       /* Calculate how many key segments of the current key we can use */
       start_key= keyuse;
-      do
-      { /* for each keypart */
+
+      do /* For each keypart */
+      {
         uint keypart= keyuse->keypart;
         table_map best_part_found_ref= 0;
         double best_prev_record_reads= DBL_MAX;
-        do
+        
+        do /* For each way to access the keypart */
         {
+
+          /*
+            if 1. expression doesn't refer to forward tables
+               2. we won't get two ref-or-null's
+          */
           if (!(remaining_tables & keyuse->used_tables) &&
               !(ref_or_null_part && (keyuse->optimize &
                                      KEY_OPTIMIZE_REF_OR_NULL)))
@@ -3472,8 +3481,9 @@ best_access_path(JOIN      *join,
             found_part|= keyuse->keypart_map;
             if (!(keyuse->used_tables & ~join->const_table_map))
               const_part|= keyuse->keypart_map;
-            double tmp= prev_record_reads(join, (found_ref |
-                                                 keyuse->used_tables));
+
+            double tmp= prev_record_reads(join, idx, (found_ref |
+                                                      keyuse->used_tables));
             if (tmp < best_prev_record_reads)
             {
               best_part_found_ref= keyuse->used_tables;
@@ -3512,7 +3522,7 @@ best_access_path(JOIN      *join,
           Really, there should be records=0.0 (yes!)
           but 1.0 would be probably safer
         */
-        tmp= prev_record_reads(join, found_ref);
+        tmp= prev_record_reads(join, idx, found_ref);
         records= 1.0;
       }
       else
@@ -3527,7 +3537,7 @@ best_access_path(JOIN      *join,
           max_key_part= (uint) ~0;
           if ((keyinfo->flags & (HA_NOSAME | HA_NULL_PART_KEY)) == HA_NOSAME)
           {
-            tmp = prev_record_reads(join, found_ref);
+            tmp = prev_record_reads(join, idx, found_ref);
             records=1.0;
           }
           else
@@ -3757,6 +3767,7 @@ best_access_path(JOIN      *join,
         best_records= records;
         best_key= start_key;
         best_max_key_part= max_key_part;
+        best_ref_depends_map= found_ref;
       }
     }
     records= best_records;
@@ -3885,6 +3896,8 @@ best_access_path(JOIN      *join,
       best= tmp;
       records= rows2double(rnd_records);
       best_key= 0;
+      /* range/index_merge/ALL/index access method are "independent", so: */
+      best_ref_depends_map= 0;
     }
   }
 
@@ -3893,6 +3906,7 @@ best_access_path(JOIN      *join,
   join->positions[idx].read_time=    best;
   join->positions[idx].key=          best_key;
   join->positions[idx].table=        s;
+  join->positions[idx].ref_depend_map= best_ref_depends_map;
 
   if (!best_key &&
       idx == join->const_tables &&
@@ -4660,17 +4674,86 @@ cache_record_length(JOIN *join,uint idx)
 }
 
 
+/*
+  Get the number of different row combinations for subset of partial join
+
+  SYNOPSIS
+    prev_record_reads()
+      join       The join structure
+      idx        Number of tables in the partial join order (i.e. the
+                 partial join order is in join->positions[0..idx-1])
+
+      found_ref  Bitmap of tables for which we need to find # of distinct
+                 row combinations.
+
+  DESCRIPTION
+    Given a partial join order (in join->positions[0..idx-1]) and a subset of
+    tables within that join order (specified in found_ref), find out how many
+    distinct row combinations of subset tables will be in the result of the
+    partial join order.
+     
+    This is used as follows: Suppose we have a table accessed with a ref-based
+    method. The ref access depends on current rows of tables in found_ref.
+    We want to count # of different ref accesses. We assume two ref accesses
+    will be different if at least one of access parameters is different.
+    Example: consider a query
+
+    SELECT * FROM t1, t2, t3 WHERE t1.key=c1 AND t2.key=c2 AND t3.key=t1.field
+
+    and a join order:
+      t1,  ref access on t1.key=c1
+      t2,  ref access on t2.key=c2       
+      t3,  ref access on t3.key=t1.field 
+    
+    For t1: n_ref_scans = 1, n_distinct_ref_scans = 1
+    For t2: n_ref_scans = records_read(t1), n_distinct_ref_scans=1
+    For t3: n_ref_scans = records_read(t1)*records_read(t2)
+            n_distinct_ref_scans = #records_read(t1)
+    
+    The reason for having this function (at least the latest version of it)
+    is that we need to account for buffering in join execution. 
+    
+    An edge-case example: if we have a non-first table in join accessed via
+    ref(const) or ref(param) where there is a small number of different
+    values of param, then the access will likely hit the disk cache and will
+    not require any disk seeks.
+    
+    The proper solution would be to assume an LRU disk cache of some size,
+    calculate probability of cache hits, etc. For now we just count
+    identical ref accesses as one.
+
+  RETURN 
+    Expected number of row combinations
+*/
+
 static double
-prev_record_reads(JOIN *join,table_map found_ref)
+prev_record_reads(JOIN *join, uint idx, table_map found_ref)
 {
   double found=1.0;
-  found_ref&= ~OUTER_REF_TABLE_BIT;
-  for (POSITION *pos=join->positions ; found_ref ; pos++)
+  POSITION *pos_end= join->positions - 1;
+  for (POSITION *pos= join->positions + idx - 1; pos != pos_end; pos--)
   {
     if (pos->table->table->map & found_ref)
     {
-      found_ref&= ~pos->table->table->map;
-      found*=pos->records_read;
+      found_ref|= pos->ref_depend_map;
+      /* 
+        For the case of "t1 LEFT JOIN t2 ON ..." where t2 is a const table 
+        with no matching row we will get position[t2].records_read==0. 
+        Actually the size of output is one null-complemented row, therefore 
+        we will use value of 1 whenever we get records_read==0.
+
+        Note
+        - the above case can't occur if inner part of outer join has more 
+          than one table: table with no matches will not be marked as const.
+
+        - Ideally we should add 1 to records_read for every possible null-
+          complemented row. We're not doing it because: 1. it will require
+          non-trivial code and add overhead. 2. The value of records_read
+          is an inprecise estimate and adding 1 (or, in the worst case,
+          #max_nested_outer_joins=64-1) will not make it any more precise.
+      */
+      if (pos->records_read)
+        found*= pos->records_read;
     }
   }
   return found;
@@ -10242,6 +10325,7 @@ join_read_const_table(JOIN_TAB *tab, POSITION *pos)
       tab->info="const row not found";
       /* Mark for EXPLAIN that the row was not found */
       pos->records_read=0.0;
+      pos->ref_depend_map= 0;
       if (!table->maybe_null || error > 0)
 	DBUG_RETURN(error);
     }
@@ -10267,6 +10351,7 @@ join_read_const_table(JOIN_TAB *tab, POSITION *pos)
       tab->info="unique row not found";
       /* Mark for EXPLAIN that the row was not found */
       pos->records_read=0.0;
+      pos->ref_depend_map= 0;
       if (!table->maybe_null || error > 0)
 	DBUG_RETURN(error);
     }
diff --git a/sql/sql_select.h b/sql/sql_select.h
index 3b0c312757d..eb6d2d5d34f 100644
--- a/sql/sql_select.h
+++ b/sql/sql_select.h
@@ -176,7 +176,13 @@ enum_nested_loop_state sub_select(JOIN *join,JOIN_TAB *join_tab, bool
 */
 typedef struct st_position
 {
+  /*
+    The "fanout": number of output rows that will be produced (after
+    pushed down selection condition is applied) per each row combination of
+    previous tables.
+  */
   double records_read;
+
   /* 
     Cost accessing the table in course of the entire complete join execution,
     i.e. cost of one access method use (e.g. 'range' or 'ref' scan ) times 
@@ -184,7 +190,15 @@ typedef struct st_position
   */
   double read_time;
   JOIN_TAB *table;
+
+  /*
+    NULL  -  'index' or 'range' or 'index_merge' or 'ALL' access is used.
+    Other - [eq_]ref[_or_null] access is used. Pointer to {t.keypart1 = expr}
+  */
   KEYUSE *key;
+
+  /* If ref-based access is used: bitmap of tables this table depends on  */
+  table_map ref_depend_map;
 } POSITION;
author	sergefp@mysql.com <>	2006-09-29 15:58:47 +0400
committer	sergefp@mysql.com <>	2006-09-29 15:58:47 +0400
commit	47b357522a2e9329b2c0376db0199508fe3654d8 (patch)
tree	5ddf809bcc56ad6e7f36bca0a68ae71a0e62064b
parent	16a5d97a5d335d10f9c96bea00801fcc28c545e9 (diff)
download	mariadb-git-47b357522a2e9329b2c0376db0199508fe3654d8.tar.gz