diff options
author | unknown <timour@askmonty.org> | 2011-06-21 15:50:07 +0300 |
---|---|---|
committer | unknown <timour@askmonty.org> | 2011-06-21 15:50:07 +0300 |
commit | a02682abcc53199e0110ec9f24f2063fa21bd6b5 (patch) | |
tree | e7b69db65008324eceb278eccaeff7ca09d9a064 /sql | |
parent | 0cf912c23f5c5bec885e0a35e2511b5b83327433 (diff) | |
download | mariadb-git-a02682abcc53199e0110ec9f24f2063fa21bd6b5.tar.gz |
MWL#89
- Added regression test with queries over the WORLD database.
- Discovered and fixed several bugs in the related cost calculation
functionality both in the semijoin and non-semijon subquery code.
- Added DBUG printing of the cost variables used to decide between
IN-EXISTS and MATERIALIZATION.
Diffstat (limited to 'sql')
-rw-r--r-- | sql/item_subselect.cc | 13 | ||||
-rw-r--r-- | sql/item_subselect.h | 25 | ||||
-rw-r--r-- | sql/opt_subselect.cc | 78 | ||||
-rw-r--r-- | sql/sql_select.cc | 2 |
4 files changed, 64 insertions, 54 deletions
diff --git a/sql/item_subselect.cc b/sql/item_subselect.cc index 6ae6590c973..5da8f686835 100644 --- a/sql/item_subselect.cc +++ b/sql/item_subselect.cc @@ -38,11 +38,14 @@ Item_subselect::Item_subselect(): Item_result_field(), value_assigned(0), own_engine(0), thd(0), old_engine(0), used_tables_cache(0), have_to_be_excluded(0), const_item_cache(1), inside_first_fix_fields(0), done_first_fix_fields(FALSE), - substitution(0), expr_cache(0), engine(0), forced_const(FALSE), eliminated(FALSE), + expr_cache(0), forced_const(FALSE), substitution(0), engine(0), eliminated(FALSE), engine_changed(0), changed(0), is_correlated(FALSE) { DBUG_ENTER("Item_subselect::Item_subselect"); DBUG_PRINT("enter", ("this: 0x%lx", (ulong) this)); +#ifndef DBUG_OFF + exec_counter= 0; +#endif with_subselect= 1; reset(); /* @@ -130,6 +133,10 @@ void Item_subselect::cleanup() value_assigned= 0; expr_cache= 0; forced_const= FALSE; + DBUG_PRINT("info", ("exec_counter: %d", exec_counter)); +#ifndef DBUG_OFF + exec_counter= 0; +#endif DBUG_VOID_RETURN; } @@ -548,7 +555,9 @@ bool Item_subselect::exec() DBUG_EXECUTE_IF("subselect_exec_fail", return 1;); res= engine->exec(); - +#ifndef DBUG_OFF + ++exec_counter; +#endif if (engine_changed) { engine_changed= 0; diff --git a/sql/item_subselect.h b/sql/item_subselect.h index 8358cebf07d..82030bffa91 100644 --- a/sql/item_subselect.h +++ b/sql/item_subselect.h @@ -52,6 +52,17 @@ protected: bool inside_first_fix_fields; bool done_first_fix_fields; + Item *expr_cache; + /* + Set to TRUE if at optimization or execution time we determine that this + item's value is a constant. We need this member because it is not possible + to substitute 'this' with a constant item. + */ + bool forced_const; +#ifndef DBUG_OFF + /* Count the number of times this subquery predicate has been executed. */ + uint exec_counter; +#endif public: /* Used inside Item_subselect::fix_fields() according to this scenario: @@ -66,19 +77,13 @@ public: substitution= NULL; < Item_subselect::fix_fields */ + /* TODO make this protected member again. */ Item *substitution; - /* unit of subquery */ - st_select_lex_unit *unit; - Item *expr_cache; /* engine that perform execution of subselect (single select or union) */ + /* TODO make this protected member again. */ subselect_engine *engine; - /* - Set to TRUE if at optimization or execution time we determine that this - item's value is a constant. We need this member because it is not possible - to substitute 'this' with a constant item. - */ - bool forced_const; - + /* unit of subquery */ + st_select_lex_unit *unit; /* A reference from inside subquery predicate to somewhere outside of it */ class Ref_to_outside : public Sql_alloc { diff --git a/sql/opt_subselect.cc b/sql/opt_subselect.cc index 96ac9df8a6f..0d325a4b0e1 100644 --- a/sql/opt_subselect.cc +++ b/sql/opt_subselect.cc @@ -4324,8 +4324,6 @@ bool JOIN::choose_subquery_plan(table_map join_tables) { JOIN *outer_join; JOIN *inner_join= this; - /* Number of (partial) rows of the outer JOIN filtered by the IN predicate. */ - double outer_record_count; /* Number of unique value combinations filtered by the IN predicate. */ double outer_lookup_keys; /* Cost and row count of the unmodified subquery. */ @@ -4345,38 +4343,37 @@ bool JOIN::choose_subquery_plan(table_map join_tables) outer_join= unit->outer_select() ? unit->outer_select()->join : NULL; if (outer_join) { - uint outer_partial_plan_len; + /* + The index of the last JOIN_TAB in the outer JOIN where in_subs is + attached (pushed to). + */ + uint max_outer_join_tab_idx; /* Make_cond_for_table is called for predicates only in the WHERE/ON clauses. In all other cases, predicates are not pushed to any - JOIN_TAB, and their joi_tab_idx remains MAX_TABLES. Such predicates + JOIN_TAB, and their join_tab_idx remains MAX_TABLES. Such predicates are evaluated for each complete row of the outer join. */ - outer_partial_plan_len= (in_subs->get_join_tab_idx() == MAX_TABLES) ? - outer_join->table_count : - in_subs->get_join_tab_idx() + 1; - outer_join->get_partial_cost_and_fanout(outer_partial_plan_len, + DBUG_ASSERT(outer_join->table_count > 0); + max_outer_join_tab_idx= (in_subs->get_join_tab_idx() == MAX_TABLES) ? + outer_join->table_count - 1: + in_subs->get_join_tab_idx(); + /* + TODO: + Currently outer_lookup_keys is computed as the number of rows in + the partial join including the JOIN_TAB where the IN predicate is + pushed to. In the general case this is a gross overestimate because + due to caching we are interested only in the number of unique keys. + The search key may be formed by columns from much fewer than all + tables in the partial join. Example: + select * from t1, t2 where t1.c1 = t2.key AND t2.c2 IN (select ...); + If the join order: t1, t2, the number of unique lookup keys is ~ to + the number of unique values t2.c2 in the partial join t1 join t2. + */ + outer_join->get_partial_cost_and_fanout(max_outer_join_tab_idx, table_map(-1), &dummy, - &outer_record_count); - - if (outer_join->table_count > outer_join->const_tables) - { - outer_join->get_partial_cost_and_fanout(outer_partial_plan_len, - in_subs->used_tables(), - &dummy, - &outer_lookup_keys); - /* - outer_lookup_keys= prev_record_reads(outer_join->best_positions, - outer_partial_plan_len, - in_subs->used_tables()); - */ - } - else - { - /* If all tables are constant, positions is undefined. */ - outer_lookup_keys= 1; - } + &outer_lookup_keys); } else { @@ -4384,17 +4381,8 @@ bool JOIN::choose_subquery_plan(table_map join_tables) TODO: outer_join can be NULL for DELETE statements. How to compute its cost? */ - outer_record_count= 1; - outer_lookup_keys=1; + outer_lookup_keys= 1; } - /* - There cannot be more lookup keys than the total number of records. - TODO: this a temporary solution until we find a better way to compute - get_partial_join_cost() and prev_record_reads() in a consitent manner, - where it is guaranteed that (outer_lookup_keys <= outer_record_count). - */ - if (outer_lookup_keys > outer_record_count) - outer_lookup_keys= outer_record_count; /* B. Estimate the cost and number of records of the subquery both @@ -4442,7 +4430,7 @@ bool JOIN::choose_subquery_plan(table_map join_tables) write_cost * inner_record_count_1; materialize_strategy_cost= materialization_cost + - outer_record_count * lookup_cost; + outer_lookup_keys * lookup_cost; /* C.2 Compute the cost of the IN=>EXISTS strategy. */ in_exists_strategy_cost= outer_lookup_keys * inner_read_time_2; @@ -4452,6 +4440,14 @@ bool JOIN::choose_subquery_plan(table_map join_tables) in_subs->in_strategy&= ~SUBS_MATERIALIZATION; else in_subs->in_strategy&= ~SUBS_IN_TO_EXISTS; + + DBUG_PRINT("info", + ("mat_strategy_cost: %.2f, mat_cost: %.2f, write_cost: %.2f, lookup_cost: %.2f", + materialize_strategy_cost, materialization_cost, write_cost, lookup_cost)); + DBUG_PRINT("info", + ("inx_strategy_cost: %.2f, inner_read_time_2: %.2f", + in_exists_strategy_cost, inner_read_time_2)); + DBUG_PRINT("info",("outer_lookup_keys: %.2f", outer_lookup_keys)); } /* @@ -4507,9 +4503,9 @@ bool JOIN::choose_subquery_plan(table_map join_tables) const_tables != table_count) { /* - The subquery was not reoptimized either because the user allowed only the - IN-EXISTS strategy, or because materialization was not possible based on - semantic analysis. Clenup the original plan and reoptimize. + The subquery was not reoptimized either because the user allowed only + the IN-EXISTS strategy, or because materialization was not possible + based on semantic analysis. Cleanup the original plan and reoptimize. */ for (uint i= 0; i < table_count; i++) { diff --git a/sql/sql_select.cc b/sql/sql_select.cc index 38eaca686b6..3f824179bac 100644 --- a/sql/sql_select.cc +++ b/sql/sql_select.cc @@ -5969,7 +5969,7 @@ void JOIN::get_partial_cost_and_fanout(uint end_tab_idx, } for (tab= first_depth_first_tab(this), i= const_tables; - tab; + (i <= end_tab_idx && tab); tab= next_depth_first_tab(this, tab), i++) { /* |