Code cleanup, added comments.

author: Sergey Petrunya <psergey@askmonty.org> 2011-05-10 11:31:02 +0100
committer: Sergey Petrunya <psergey@askmonty.org> 2011-05-10 11:31:02 +0100
commit: f34b421839c78ccc56db4ecc7bbb97929f309801 (patch)
tree: 2beb645a7956ba6e55dcb5f27cfbfb9921518d8d
parent: 8d29fddb972e3ab052c82b47a3030d3048e5d224 (diff)
download: mariadb-git-f34b421839c78ccc56db4ecc7bbb97929f309801.tar.gz
3 files changed, 46 insertions, 8 deletions
diff --git a/sql/item_subselect.cc b/sql/item_subselect.cc
index 1807745f722..a827bc7d055 100644
--- a/sql/item_subselect.cc
+++ b/sql/item_subselect.cc
@@ -4085,8 +4085,6 @@ void subselect_hash_sj_engine::cleanup()
   result->cleanup(); /* Resets the temp table as well. */
 }
 
-JOIN_TAB *first_top_level_tab(JOIN *join, enum enum_with_const_tables with_const);
-JOIN_TAB *next_top_level_tab(JOIN *join, JOIN_TAB *tab);
 
 /*
   Get fanout produced by tables specified in the table_map
@@ -4215,6 +4213,24 @@ void check_out_index_stats(JOIN *join)
 #endif
 
 
+/*
+  Get an estimate of how many records will be produced after the GROUP BY
+  operation.
+
+  @param join           Join we're operating on 
+  @param join_op_rows   How many records will be produced by the join
+                        operations (this is what join optimizer produces)
+  
+  @seealso
+     See also optimize_semijoin_nests(), grep for "Adjust output cardinality 
+     estimates".  Very similar code there that is not joined with this one
+     because we operate on different data structs and too much effort is
+     needed to abstract them out.
+
+  @return
+     Number of records we expect to get after the GROUP BY operation
+*/
+
 double get_post_group_estimate(JOIN* join, double join_op_rows)
 {
   table_map tables_in_group_list= table_map(0);
@@ -4240,7 +4256,9 @@ double get_post_group_estimate(JOIN* join, double join_op_rows)
   double out_rows;
   
   out_rows= get_fanout_with_deps(join, tables_in_group_list);
-  
+
+#if 0
+  /* The following will be needed when making use of index stats: */
   /* 
     Also generate max. number of records for each of the tables mentioned 
     in the group-list. We'll use that a baseline number that we'll try to 
@@ -4259,10 +4277,24 @@ double get_post_group_estimate(JOIN* join, double join_op_rows)
     Try to bring down estimates using index statistics.
   */
   //check_out_index_stats(join);
+#endif
+
   return out_rows;
 }
 
 
+/*
+  Optimize the underlying subselect's join
+
+  @param out_rows   OUT   How many records we expect to get in the 
+                          materialized table
+  @param cost       OUT   Cost to materialize the subquery
+
+  @return 
+    0  OK
+    1  Fatal error
+*/
+
 int subselect_hash_sj_engine::optimize(double *out_rows, double *cost)
 {
   int res;
@@ -4271,7 +4303,8 @@ int subselect_hash_sj_engine::optimize(double *out_rows, double *cost)
   JOIN *join= materialize_join;
 
   thd->lex->current_select= join->select_lex;
-  res= join->optimize();
+  if ((res= join->optimize()))
+    DBUG_RETURN(res);
 
   /* Calculate #rows and cost of join execution */
   get_partial_join_cost(join, join->table_count - join->const_tables, 
@@ -4291,15 +4324,15 @@ int subselect_hash_sj_engine::optimize(double *out_rows, double *cost)
   if (!join->group_list && !join->group_optimized_away &&
       join->tmp_table_param.sum_func_count)
   {
-    DBUG_PRINT("info",("Materialized join will have only 1 row (has "
-                       "aggregates but not GROUP BY"));
+    DBUG_PRINT("info",("Materialized join will have only 1 row (it has "
+                       "aggregates but no GROUP BY"));
     *out_rows= 1;
   }
   
   /* Now with grouping */
   if (join->group_list)
   {
-    DBUG_PRINT("info",("Materialized join has grouping, trying to estimate"));
+    DBUG_PRINT("info",("Materialized join has grouping, trying to estimate it"));
     double output_rows= get_post_group_estimate(materialize_join, *out_rows);
     DBUG_PRINT("info",("Got value of %g", output_rows));
     *out_rows= output_rows;
@@ -4308,6 +4341,7 @@ int subselect_hash_sj_engine::optimize(double *out_rows, double *cost)
   DBUG_RETURN(res);
 }
 
+
 /**
   Execute a subquery IN predicate via materialization.
 
diff --git a/sql/opt_subselect.cc b/sql/opt_subselect.cc
index 24727d2f656..3c6d97a0843 100644
--- a/sql/opt_subselect.cc
+++ b/sql/opt_subselect.cc
@@ -1671,6 +1671,8 @@ bool optimize_semijoin_nests(JOIN *join, table_map all_table_map)
                 "oe IN (SELECT t.key ...)" it is trivial. 
               - Functional dependencies between the tables in the semi-join
                 nest (the payoff is probably less here?)
+          
+          See also get_post_group_estimate().
         */
         {
           for (uint i=0 ; i < join->const_tables + sjm->tables ; i++)
@@ -3537,7 +3539,6 @@ int do_sj_dups_weedout(THD *thd, SJ_TMP_TABLE *sjtbl)
     FALSE  OK 
     TRUE   Out of memory error
 */
-JOIN_TAB *first_linear_tab(JOIN *join, enum enum_with_const_tables const_tbls);
 
 int setup_semijoin_dups_elimination(JOIN *join, ulonglong options, 
                                     uint no_jbuf_after)
diff --git a/sql/sql_select.h b/sql/sql_select.h
index 4fbb8121208..d652204e54a 100644
--- a/sql/sql_select.h
+++ b/sql/sql_select.h
@@ -1076,6 +1076,9 @@ JOIN_TAB *first_linear_tab(JOIN *join, enum enum_with_const_tables const_tbls);
 JOIN_TAB *next_linear_tab(JOIN* join, JOIN_TAB* tab, 
                           enum enum_with_bush_roots include_bush_roots);
 
+JOIN_TAB *first_top_level_tab(JOIN *join, enum enum_with_const_tables with_const);
+JOIN_TAB *next_top_level_tab(JOIN *join, JOIN_TAB *tab);
+
 typedef struct st_select_check {
   uint const_ref,reg_ref;
 } SELECT_CHECK;
author	Sergey Petrunya <psergey@askmonty.org>	2011-05-10 11:31:02 +0100
committer	Sergey Petrunya <psergey@askmonty.org>	2011-05-10 11:31:02 +0100
commit	f34b421839c78ccc56db4ecc7bbb97929f309801 (patch)
tree	2beb645a7956ba6e55dcb5f27cfbfb9921518d8d
parent	8d29fddb972e3ab052c82b47a3030d3048e5d224 (diff)
download	mariadb-git-f34b421839c78ccc56db4ecc7bbb97929f309801.tar.gz