summaryrefslogtreecommitdiff
path: root/sql
diff options
context:
space:
mode:
authorunknown <timour@askmonty.org>2011-06-21 15:50:07 +0300
committerunknown <timour@askmonty.org>2011-06-21 15:50:07 +0300
commita02682abcc53199e0110ec9f24f2063fa21bd6b5 (patch)
treee7b69db65008324eceb278eccaeff7ca09d9a064 /sql
parent0cf912c23f5c5bec885e0a35e2511b5b83327433 (diff)
downloadmariadb-git-a02682abcc53199e0110ec9f24f2063fa21bd6b5.tar.gz
MWL#89
- Added regression test with queries over the WORLD database. - Discovered and fixed several bugs in the related cost calculation functionality both in the semijoin and non-semijon subquery code. - Added DBUG printing of the cost variables used to decide between IN-EXISTS and MATERIALIZATION.
Diffstat (limited to 'sql')
-rw-r--r--sql/item_subselect.cc13
-rw-r--r--sql/item_subselect.h25
-rw-r--r--sql/opt_subselect.cc78
-rw-r--r--sql/sql_select.cc2
4 files changed, 64 insertions, 54 deletions
diff --git a/sql/item_subselect.cc b/sql/item_subselect.cc
index 6ae6590c973..5da8f686835 100644
--- a/sql/item_subselect.cc
+++ b/sql/item_subselect.cc
@@ -38,11 +38,14 @@ Item_subselect::Item_subselect():
Item_result_field(), value_assigned(0), own_engine(0), thd(0), old_engine(0),
used_tables_cache(0), have_to_be_excluded(0), const_item_cache(1),
inside_first_fix_fields(0), done_first_fix_fields(FALSE),
- substitution(0), expr_cache(0), engine(0), forced_const(FALSE), eliminated(FALSE),
+ expr_cache(0), forced_const(FALSE), substitution(0), engine(0), eliminated(FALSE),
engine_changed(0), changed(0), is_correlated(FALSE)
{
DBUG_ENTER("Item_subselect::Item_subselect");
DBUG_PRINT("enter", ("this: 0x%lx", (ulong) this));
+#ifndef DBUG_OFF
+ exec_counter= 0;
+#endif
with_subselect= 1;
reset();
/*
@@ -130,6 +133,10 @@ void Item_subselect::cleanup()
value_assigned= 0;
expr_cache= 0;
forced_const= FALSE;
+ DBUG_PRINT("info", ("exec_counter: %d", exec_counter));
+#ifndef DBUG_OFF
+ exec_counter= 0;
+#endif
DBUG_VOID_RETURN;
}
@@ -548,7 +555,9 @@ bool Item_subselect::exec()
DBUG_EXECUTE_IF("subselect_exec_fail", return 1;);
res= engine->exec();
-
+#ifndef DBUG_OFF
+ ++exec_counter;
+#endif
if (engine_changed)
{
engine_changed= 0;
diff --git a/sql/item_subselect.h b/sql/item_subselect.h
index 8358cebf07d..82030bffa91 100644
--- a/sql/item_subselect.h
+++ b/sql/item_subselect.h
@@ -52,6 +52,17 @@ protected:
bool inside_first_fix_fields;
bool done_first_fix_fields;
+ Item *expr_cache;
+ /*
+ Set to TRUE if at optimization or execution time we determine that this
+ item's value is a constant. We need this member because it is not possible
+ to substitute 'this' with a constant item.
+ */
+ bool forced_const;
+#ifndef DBUG_OFF
+ /* Count the number of times this subquery predicate has been executed. */
+ uint exec_counter;
+#endif
public:
/*
Used inside Item_subselect::fix_fields() according to this scenario:
@@ -66,19 +77,13 @@ public:
substitution= NULL;
< Item_subselect::fix_fields
*/
+ /* TODO make this protected member again. */
Item *substitution;
- /* unit of subquery */
- st_select_lex_unit *unit;
- Item *expr_cache;
/* engine that perform execution of subselect (single select or union) */
+ /* TODO make this protected member again. */
subselect_engine *engine;
- /*
- Set to TRUE if at optimization or execution time we determine that this
- item's value is a constant. We need this member because it is not possible
- to substitute 'this' with a constant item.
- */
- bool forced_const;
-
+ /* unit of subquery */
+ st_select_lex_unit *unit;
/* A reference from inside subquery predicate to somewhere outside of it */
class Ref_to_outside : public Sql_alloc
{
diff --git a/sql/opt_subselect.cc b/sql/opt_subselect.cc
index 96ac9df8a6f..0d325a4b0e1 100644
--- a/sql/opt_subselect.cc
+++ b/sql/opt_subselect.cc
@@ -4324,8 +4324,6 @@ bool JOIN::choose_subquery_plan(table_map join_tables)
{
JOIN *outer_join;
JOIN *inner_join= this;
- /* Number of (partial) rows of the outer JOIN filtered by the IN predicate. */
- double outer_record_count;
/* Number of unique value combinations filtered by the IN predicate. */
double outer_lookup_keys;
/* Cost and row count of the unmodified subquery. */
@@ -4345,38 +4343,37 @@ bool JOIN::choose_subquery_plan(table_map join_tables)
outer_join= unit->outer_select() ? unit->outer_select()->join : NULL;
if (outer_join)
{
- uint outer_partial_plan_len;
+ /*
+ The index of the last JOIN_TAB in the outer JOIN where in_subs is
+ attached (pushed to).
+ */
+ uint max_outer_join_tab_idx;
/*
Make_cond_for_table is called for predicates only in the WHERE/ON
clauses. In all other cases, predicates are not pushed to any
- JOIN_TAB, and their joi_tab_idx remains MAX_TABLES. Such predicates
+ JOIN_TAB, and their join_tab_idx remains MAX_TABLES. Such predicates
are evaluated for each complete row of the outer join.
*/
- outer_partial_plan_len= (in_subs->get_join_tab_idx() == MAX_TABLES) ?
- outer_join->table_count :
- in_subs->get_join_tab_idx() + 1;
- outer_join->get_partial_cost_and_fanout(outer_partial_plan_len,
+ DBUG_ASSERT(outer_join->table_count > 0);
+ max_outer_join_tab_idx= (in_subs->get_join_tab_idx() == MAX_TABLES) ?
+ outer_join->table_count - 1:
+ in_subs->get_join_tab_idx();
+ /*
+ TODO:
+ Currently outer_lookup_keys is computed as the number of rows in
+ the partial join including the JOIN_TAB where the IN predicate is
+ pushed to. In the general case this is a gross overestimate because
+ due to caching we are interested only in the number of unique keys.
+ The search key may be formed by columns from much fewer than all
+ tables in the partial join. Example:
+ select * from t1, t2 where t1.c1 = t2.key AND t2.c2 IN (select ...);
+ If the join order: t1, t2, the number of unique lookup keys is ~ to
+ the number of unique values t2.c2 in the partial join t1 join t2.
+ */
+ outer_join->get_partial_cost_and_fanout(max_outer_join_tab_idx,
table_map(-1),
&dummy,
- &outer_record_count);
-
- if (outer_join->table_count > outer_join->const_tables)
- {
- outer_join->get_partial_cost_and_fanout(outer_partial_plan_len,
- in_subs->used_tables(),
- &dummy,
- &outer_lookup_keys);
- /*
- outer_lookup_keys= prev_record_reads(outer_join->best_positions,
- outer_partial_plan_len,
- in_subs->used_tables());
- */
- }
- else
- {
- /* If all tables are constant, positions is undefined. */
- outer_lookup_keys= 1;
- }
+ &outer_lookup_keys);
}
else
{
@@ -4384,17 +4381,8 @@ bool JOIN::choose_subquery_plan(table_map join_tables)
TODO: outer_join can be NULL for DELETE statements.
How to compute its cost?
*/
- outer_record_count= 1;
- outer_lookup_keys=1;
+ outer_lookup_keys= 1;
}
- /*
- There cannot be more lookup keys than the total number of records.
- TODO: this a temporary solution until we find a better way to compute
- get_partial_join_cost() and prev_record_reads() in a consitent manner,
- where it is guaranteed that (outer_lookup_keys <= outer_record_count).
- */
- if (outer_lookup_keys > outer_record_count)
- outer_lookup_keys= outer_record_count;
/*
B. Estimate the cost and number of records of the subquery both
@@ -4442,7 +4430,7 @@ bool JOIN::choose_subquery_plan(table_map join_tables)
write_cost * inner_record_count_1;
materialize_strategy_cost= materialization_cost +
- outer_record_count * lookup_cost;
+ outer_lookup_keys * lookup_cost;
/* C.2 Compute the cost of the IN=>EXISTS strategy. */
in_exists_strategy_cost= outer_lookup_keys * inner_read_time_2;
@@ -4452,6 +4440,14 @@ bool JOIN::choose_subquery_plan(table_map join_tables)
in_subs->in_strategy&= ~SUBS_MATERIALIZATION;
else
in_subs->in_strategy&= ~SUBS_IN_TO_EXISTS;
+
+ DBUG_PRINT("info",
+ ("mat_strategy_cost: %.2f, mat_cost: %.2f, write_cost: %.2f, lookup_cost: %.2f",
+ materialize_strategy_cost, materialization_cost, write_cost, lookup_cost));
+ DBUG_PRINT("info",
+ ("inx_strategy_cost: %.2f, inner_read_time_2: %.2f",
+ in_exists_strategy_cost, inner_read_time_2));
+ DBUG_PRINT("info",("outer_lookup_keys: %.2f", outer_lookup_keys));
}
/*
@@ -4507,9 +4503,9 @@ bool JOIN::choose_subquery_plan(table_map join_tables)
const_tables != table_count)
{
/*
- The subquery was not reoptimized either because the user allowed only the
- IN-EXISTS strategy, or because materialization was not possible based on
- semantic analysis. Clenup the original plan and reoptimize.
+ The subquery was not reoptimized either because the user allowed only
+ the IN-EXISTS strategy, or because materialization was not possible
+ based on semantic analysis. Cleanup the original plan and reoptimize.
*/
for (uint i= 0; i < table_count; i++)
{
diff --git a/sql/sql_select.cc b/sql/sql_select.cc
index 38eaca686b6..3f824179bac 100644
--- a/sql/sql_select.cc
+++ b/sql/sql_select.cc
@@ -5969,7 +5969,7 @@ void JOIN::get_partial_cost_and_fanout(uint end_tab_idx,
}
for (tab= first_depth_first_tab(this), i= const_tables;
- tab;
+ (i <= end_tab_idx && tab);
tab= next_depth_first_tab(this, tab), i++)
{
/*