diff options
author | Sergey Petrunya <psergey@askmonty.org> | 2011-11-23 04:25:52 +0400 |
---|---|---|
committer | Sergey Petrunya <psergey@askmonty.org> | 2011-11-23 04:25:52 +0400 |
commit | 694ce95557bc174dae2bc32279024102848cf5ee (patch) | |
tree | a8bfdfa04a772a3eb4b44fee12a28624102bde71 /sql | |
parent | 7f746fbe74e08d79217bdf7c7cba628e3b6bef85 (diff) | |
download | mariadb-git-694ce95557bc174dae2bc32279024102848cf5ee.tar.gz |
Semi-join optimizations code cleanup:
- Break down POSITION/advance_sj_state() into four classes
representing potential semi-join strategies.
- Treat all strategies uniformly (before, DuplicateWeedout
was special as it was the catch-all strategy. Now, we're
still relying on it to be the catch-all, but are able to
function,e.g. with firstmatch=on,duplicate_weedout=off.
- Update test results (checked)
Diffstat (limited to 'sql')
-rw-r--r-- | sql/opt_subselect.cc | 784 | ||||
-rw-r--r-- | sql/opt_subselect.h | 7 | ||||
-rw-r--r-- | sql/sql_select.cc | 23 | ||||
-rw-r--r-- | sql/sql_select.h | 327 |
4 files changed, 716 insertions, 425 deletions
diff --git a/sql/opt_subselect.cc b/sql/opt_subselect.cc index fdda502e706..c7d07c21746 100644 --- a/sql/opt_subselect.cc +++ b/sql/opt_subselect.cc @@ -2168,229 +2168,178 @@ bool find_eq_ref_candidate(TABLE *table, table_map sj_inner_tables) See setup_semijoin_dups_elimination() for a description of what kinds of join prefixes each strategy can handle. */ +bool is_multiple_semi_joins(POSITION *prefix, uint idx, table_map inner_tables) +{ + for (int i= (int)idx; i >= 0; i--) + { + TABLE_LIST *emb_sj_nest; + if ((emb_sj_nest= prefix[i].table->emb_sj_nest)) + { + if (inner_tables & emb_sj_nest->sj_inner_tables) + return !test(inner_tables == emb_sj_nest->sj_inner_tables); + } + } + return FALSE; +} -void advance_sj_state(JOIN *join, table_map remaining_tables, - const JOIN_TAB *new_join_tab, uint idx, - double *current_record_count, double *current_read_time, + +void advance_sj_state(JOIN *join, table_map remaining_tables, uint idx, + double *current_record_count, double *current_read_time, POSITION *loose_scan_pos) { - TABLE_LIST *emb_sj_nest; POSITION *pos= join->positions + idx; - remaining_tables &= ~new_join_tab->table->map; - bool disable_jbuf= join->thd->variables.join_cache_level == 0; - - pos->prefix_cost.convert_from_cost(*current_read_time); - pos->prefix_record_count= *current_record_count; - pos->sj_strategy= SJ_OPT_NONE; - - pos->prefix_dups_producing_tables= join->cur_dups_producing_tables; + const JOIN_TAB *new_join_tab= pos->table; + Semi_join_strategy_picker *pickers[]= + { + &pos->firstmatch_picker, + &pos->loosescan_picker, + &pos->sjmat_picker, + &pos->dups_weedout_picker, + NULL, + }; - /* We're performing optimization inside SJ-Materialization nest */ if (join->emb_sjm_nest) { - pos->invalidate_firstmatch_prefix(); - pos->first_loosescan_table= MAX_TABLES; - pos->dupsweedout_tables= 0; - pos->sjm_scan_need_tables= 0; + /* + We're performing optimization inside SJ-Materialization nest: + - there are no other semi-joins inside semi-join nests + - attempts to build semi-join strategies here will confuse + the optimizer, so bail out. + */ return; } - /* Initialize the state or copy it from prev. tables */ - if (idx == join->const_tables) + /* + Update join->cur_sj_inner_tables (Used by FirstMatch in this function and + LooseScan detector in best_access_path) + */ + remaining_tables &= ~new_join_tab->table->map; + pos->prefix_dups_producing_tables= join->cur_dups_producing_tables; + TABLE_LIST *emb_sj_nest; + if ((emb_sj_nest= new_join_tab->emb_sj_nest)) { - pos->invalidate_firstmatch_prefix(); - pos->first_loosescan_table= MAX_TABLES; - pos->dupsweedout_tables= 0; - pos->sjm_scan_need_tables= 0; - LINT_INIT(pos->sjm_scan_last_inner); + /// join->cur_sj_inner_tables |= emb_sj_nest->sj_inner_tables; + join->cur_dups_producing_tables |= emb_sj_nest->sj_inner_tables; + + /* Remove the sj_nest if all of its SJ-inner tables are in cur_table_map */ + /// if (!(remaining_tables & + /// emb_sj_nest->sj_inner_tables & ~new_join_tab->table->map)) + /// join->cur_sj_inner_tables &= ~emb_sj_nest->sj_inner_tables; } - else + + Semi_join_strategy_picker **strategy; + if (idx == join->const_tables) { - // FirstMatch - pos->first_firstmatch_table= - (pos[-1].sj_strategy == SJ_OPT_FIRST_MATCH) ? - MAX_TABLES : pos[-1].first_firstmatch_table; - pos->first_firstmatch_rtbl= pos[-1].first_firstmatch_rtbl; - pos->firstmatch_need_tables= pos[-1].firstmatch_need_tables; - - // LooseScan - pos->first_loosescan_table= - (pos[-1].sj_strategy == SJ_OPT_LOOSE_SCAN) ? - MAX_TABLES : pos[-1].first_loosescan_table; - pos->loosescan_need_tables= pos[-1].loosescan_need_tables; - - // SJ-Materialization Scan - pos->sjm_scan_need_tables= - (pos[-1].sj_strategy == SJ_OPT_MATERIALIZE_SCAN) ? - 0 : pos[-1].sjm_scan_need_tables; - pos->sjm_scan_last_inner= pos[-1].sjm_scan_last_inner; - - // Duplicate Weedout - pos->dupsweedout_tables= pos[-1].dupsweedout_tables; - pos->first_dupsweedout_table= pos[-1].first_dupsweedout_table; + /* First table, initialize pickers */ + for (strategy= pickers; *strategy != NULL; strategy++) + (*strategy)->set_empty(); + pos->inner_tables_handled_with_other_sjs= 0; } - - table_map handled_by_fm_or_ls= 0; - /* FirstMatch Strategy */ - if (new_join_tab->emb_sj_nest && - optimizer_flag(join->thd, OPTIMIZER_SWITCH_FIRSTMATCH) && - !join->outer_join) + else { - const table_map outer_corr_tables= - new_join_tab->emb_sj_nest->nested_join->sj_corr_tables | - new_join_tab->emb_sj_nest->nested_join->sj_depends_on; - const table_map sj_inner_tables= - new_join_tab->emb_sj_nest->sj_inner_tables & ~join->const_table_map; - - /* - Enter condition: - 1. The next join tab belongs to semi-join nest - (verified for the encompassing code block above). - 2. We're not in a duplicate producer range yet - 3. All outer tables that - - the subquery is correlated with, or - - referred to from the outer_expr - are in the join prefix - 4. All inner tables are still part of remaining_tables. - */ - if (!join->cur_sj_inner_tables && // (2) - !(remaining_tables & outer_corr_tables) && // (3) - (sj_inner_tables == // (4) - ((remaining_tables | new_join_tab->table->map) & sj_inner_tables))) + for (strategy= pickers; *strategy != NULL; strategy++) { - /* Start tracking potential FirstMatch range */ - pos->first_firstmatch_table= idx; - pos->firstmatch_need_tables= sj_inner_tables; - pos->first_firstmatch_rtbl= remaining_tables; + (*strategy)->set_from_prev(pos - 1); } + pos->inner_tables_handled_with_other_sjs= + pos[-1].inner_tables_handled_with_other_sjs; + } + + pos->prefix_cost.convert_from_cost(*current_read_time); + pos->prefix_record_count= *current_record_count; + + { + pos->sj_strategy= SJ_OPT_NONE; - if (pos->in_firstmatch_prefix()) + for (strategy= pickers; *strategy != NULL; strategy++) { - if (outer_corr_tables & pos->first_firstmatch_rtbl) - { - /* - Trying to add an sj-inner table whose sj-nest has an outer correlated - table that was not in the prefix. This means FirstMatch can't be used. - */ - pos->invalidate_firstmatch_prefix(); - } - else - { - /* Record that we need all of this semi-join's inner tables, too */ - pos->firstmatch_need_tables|= sj_inner_tables; - } - - if (pos->in_firstmatch_prefix() && - !(pos->firstmatch_need_tables & remaining_tables)) + table_map handled_fanout; + sj_strategy_enum sj_strategy; + double rec_count= *current_record_count; + double read_time= *current_read_time; + if ((*strategy)->check_qep(join, idx, remaining_tables, + new_join_tab, + &rec_count, + &read_time, + &handled_fanout, + &sj_strategy, + loose_scan_pos)) { /* - Got a complete FirstMatch range. - Calculate correct costs and fanout - */ - optimize_wo_join_buffering(join, pos->first_firstmatch_table, idx, - remaining_tables, FALSE, idx, - current_record_count, - current_read_time); - /* - We don't yet know what are the other strategies, so pick the - FirstMatch. - - We ought to save the alternate POSITIONs produced by - optimize_wo_join_buffering but the problem is that providing save - space uses too much space. Instead, we will re-calculate the - alternate POSITIONs after we've picked the best QEP. + It's possible to use the strategy. Use it, if + - it removes semi-join fanout that was not removed before + - using it is cheaper than using something else, + and {if some other strategy has removed fanout + that this strategy is trying to remove, then it + did remove the fanout only for one semi-join} + This is to avoid a situation when + 1. strategy X removes fanout for semijoin X,Y + 2. using strategy Z is cheaper, but it only removes + fanout from semijoin X. + 3. We have no clue what to do about fanount of semi-join Y. */ - pos->sj_strategy= SJ_OPT_FIRST_MATCH; - handled_by_fm_or_ls= pos->firstmatch_need_tables; + if ((join->cur_dups_producing_tables & handled_fanout) || + (read_time < *current_read_time && + !(handled_fanout & pos->inner_tables_handled_with_other_sjs))) + { + /* Mark strategy as used */ + (*strategy)->mark_used(); + pos->sj_strategy= sj_strategy; + *current_read_time= read_time; + *current_record_count= rec_count; + join->cur_dups_producing_tables &= ~handled_fanout; + //TODO: update bitmap of semi-joins that were handled together with + // others. + if (is_multiple_semi_joins(join->positions, idx, handled_fanout)) + pos->inner_tables_handled_with_other_sjs |= handled_fanout; + } + else + { + /* We decided not to apply the strategy. */ + (*strategy)->set_empty(); + } } } } - /* LooseScan Strategy */ - { - POSITION *first=join->positions+pos->first_loosescan_table; - /* - LooseScan strategy can't handle interleaving between tables from the - semi-join that LooseScan is handling and any other tables. - - If we were considering LooseScan for the join prefix (1) - and the table we're adding creates an interleaving (2) - then - stop considering loose scan - */ - if ((pos->first_loosescan_table != MAX_TABLES) && // (1) - (first->table->emb_sj_nest->sj_inner_tables & remaining_tables) && //(2) - new_join_tab->emb_sj_nest != first->table->emb_sj_nest) //(2) - { - pos->first_loosescan_table= MAX_TABLES; - } - - /* - If we got an option to use LooseScan for the current table, start - considering using LooseScan strategy - */ - if (loose_scan_pos->read_time != DBL_MAX && !join->outer_join) - { - pos->first_loosescan_table= idx; - pos->loosescan_need_tables= - new_join_tab->emb_sj_nest->sj_inner_tables | - new_join_tab->emb_sj_nest->nested_join->sj_depends_on | - new_join_tab->emb_sj_nest->nested_join->sj_corr_tables; - } - - if ((pos->first_loosescan_table != MAX_TABLES) && - !(remaining_tables & pos->loosescan_need_tables) && - (pos->table->table->map & pos->loosescan_need_tables)) - { - /* - Ok we have LooseScan plan and also have all LooseScan sj-nest's - inner tables and outer correlated tables into the prefix. - */ - - first=join->positions + pos->first_loosescan_table; - uint n_tables= my_count_bits(first->table->emb_sj_nest->sj_inner_tables); - /* Got a complete LooseScan range. Calculate its cost */ - /* - The same problem as with FirstMatch - we need to save POSITIONs - somewhere but reserving space for all cases would require too - much space. We will re-calculate POSITION structures later on. - */ - optimize_wo_join_buffering(join, pos->first_loosescan_table, idx, - remaining_tables, - TRUE, //first_alt - disable_jbuf ? join->table_count : - pos->first_loosescan_table + n_tables, - current_record_count, - current_read_time); - /* - We don't yet have any other strategies that could handle this - semi-join nest (the other options are Duplicate Elimination or - Materialization, which need at least the same set of tables in - the join prefix to be considered) so unconditionally pick the - LooseScan. - */ - pos->sj_strategy= SJ_OPT_LOOSE_SCAN; - handled_by_fm_or_ls= first->table->emb_sj_nest->sj_inner_tables; - } - } - - /* - Update join->cur_sj_inner_tables (Used by FirstMatch in this function and - LooseScan detector in best_access_path) - */ if ((emb_sj_nest= new_join_tab->emb_sj_nest)) { join->cur_sj_inner_tables |= emb_sj_nest->sj_inner_tables; - join->cur_dups_producing_tables |= emb_sj_nest->sj_inner_tables; /* Remove the sj_nest if all of its SJ-inner tables are in cur_table_map */ if (!(remaining_tables & emb_sj_nest->sj_inner_tables & ~new_join_tab->table->map)) join->cur_sj_inner_tables &= ~emb_sj_nest->sj_inner_tables; } - join->cur_dups_producing_tables &= ~handled_by_fm_or_ls; - /* 4. SJ-Materialization and SJ-Materialization-scan strategy handler */ + pos->prefix_cost.convert_from_cost(*current_read_time); + pos->prefix_record_count= *current_record_count; +} + + +void Sj_materialization_picker::set_from_prev(struct st_position *prev) +{ + if (prev->sjmat_picker.is_used) + set_empty(); + else + { + sjm_scan_need_tables= prev->sjmat_picker.sjm_scan_need_tables; + sjm_scan_last_inner= prev->sjmat_picker.sjm_scan_last_inner; + } + is_used= FALSE; +} + + +bool Sj_materialization_picker::check_qep(JOIN *join, + uint idx, + table_map remaining_tables, + const JOIN_TAB *new_join_tab, + double *record_count, + double *read_time, + table_map *handled_fanout, + sj_strategy_enum *strategy, + POSITION *loose_scan_pos) +{ bool sjm_scan; SJ_MATERIALIZATION_INFO *mat_info; if ((mat_info= at_sjmat_pos(join, remaining_tables, @@ -2417,11 +2366,11 @@ void advance_sj_state(JOIN *join, table_map remaining_tables, The simple way to model this is to remove SJM-SCAN(...) fanout once we reach the point #2. */ - pos->sjm_scan_need_tables= + sjm_scan_need_tables= new_join_tab->emb_sj_nest->sj_inner_tables | new_join_tab->emb_sj_nest->nested_join->sj_depends_on | new_join_tab->emb_sj_nest->nested_join->sj_corr_tables; - pos->sjm_scan_last_inner= idx; + sjm_scan_last_inner= idx; } else { @@ -2444,34 +2393,31 @@ void advance_sj_state(JOIN *join, table_map remaining_tables, mat_read_time += mat_info->materialization_cost.total_cost() + prefix_rec_count * mat_info->lookup_cost.total_cost(); - if (mat_read_time < *current_read_time || join->cur_dups_producing_tables) - { - /* - NOTE: When we pick to use SJM[-Scan] we don't memcpy its POSITION - elements to join->positions as that makes it hard to return things - back when making one step back in join optimization. That's done - after the QEP has been chosen. - */ - pos->sj_strategy= SJ_OPT_MATERIALIZE; - *current_read_time= mat_read_time; - *current_record_count= prefix_rec_count; - join->cur_dups_producing_tables&= - ~new_join_tab->emb_sj_nest->sj_inner_tables; - } + /* + NOTE: When we pick to use SJM[-Scan] we don't memcpy its POSITION + elements to join->positions as that makes it hard to return things + back when making one step back in join optimization. That's done + after the QEP has been chosen. + */ + *read_time= mat_read_time; + *record_count= prefix_rec_count; + *handled_fanout= new_join_tab->emb_sj_nest->sj_inner_tables; + *strategy= SJ_OPT_MATERIALIZE; + return TRUE; } } /* 4.A SJM-Scan second phase check */ - if (pos->sjm_scan_need_tables && /* Have SJM-Scan prefix */ - !(pos->sjm_scan_need_tables & remaining_tables)) + if (sjm_scan_need_tables && /* Have SJM-Scan prefix */ + !(sjm_scan_need_tables & remaining_tables)) { TABLE_LIST *mat_nest= - join->positions[pos->sjm_scan_last_inner].table->emb_sj_nest; + join->positions[sjm_scan_last_inner].table->emb_sj_nest; SJ_MATERIALIZATION_INFO *mat_info= mat_nest->sj_mat_info; double prefix_cost; double prefix_rec_count; - int first_tab= pos->sjm_scan_last_inner + 1 - mat_info->tables; + int first_tab= sjm_scan_last_inner + 1 - mat_info->tables; /* Get the prefix cost */ if (first_tab == (int)join->const_tables) { @@ -2496,6 +2442,7 @@ void advance_sj_state(JOIN *join, table_map remaining_tables, POSITION curpos, dummy; /* Need to re-run best-access-path as we prefix_rec_count has changed */ + bool disable_jbuf= (join->thd->variables.join_cache_level == 0); for (i= first_tab + mat_info->tables; i <= idx; i++) { best_access_path(join, join->positions[i].table, rem_tables, i, @@ -2504,142 +2451,333 @@ void advance_sj_state(JOIN *join, table_map remaining_tables, prefix_cost += curpos.read_time; } + *strategy= SJ_OPT_MATERIALIZE_SCAN; + *read_time= prefix_cost; + *record_count= prefix_rec_count; + *handled_fanout= mat_nest->sj_inner_tables; + return TRUE; + } + return FALSE; +} + + +void LooseScan_picker::set_from_prev(struct st_position *prev) +{ + if (prev->loosescan_picker.is_used) + set_empty(); + else + { + first_loosescan_table= prev->loosescan_picker.first_loosescan_table; + loosescan_need_tables= prev->loosescan_picker.loosescan_need_tables; + } + is_used= FALSE; +} + + +bool LooseScan_picker::check_qep(JOIN *join, + uint idx, + table_map remaining_tables, + const JOIN_TAB *new_join_tab, + double *record_count, + double *read_time, + table_map *handled_fanout, + sj_strategy_enum *strategy, + struct st_position *loose_scan_pos) +{ + POSITION *first= join->positions + first_loosescan_table; + /* + LooseScan strategy can't handle interleaving between tables from the + semi-join that LooseScan is handling and any other tables. + + If we were considering LooseScan for the join prefix (1) + and the table we're adding creates an interleaving (2) + then + stop considering loose scan + */ + if ((first_loosescan_table != MAX_TABLES) && // (1) + (first->table->emb_sj_nest->sj_inner_tables & remaining_tables) && //(2) + new_join_tab->emb_sj_nest != first->table->emb_sj_nest) //(2) + { + first_loosescan_table= MAX_TABLES; + } + + /* + If we got an option to use LooseScan for the current table, start + considering using LooseScan strategy + */ + if (loose_scan_pos->read_time != DBL_MAX && !join->outer_join) + { + first_loosescan_table= idx; + loosescan_need_tables= + new_join_tab->emb_sj_nest->sj_inner_tables | + new_join_tab->emb_sj_nest->nested_join->sj_depends_on | + new_join_tab->emb_sj_nest->nested_join->sj_corr_tables; + } + + if ((first_loosescan_table != MAX_TABLES) && + !(remaining_tables & loosescan_need_tables) && + (new_join_tab->table->map & loosescan_need_tables)) + { + /* + Ok we have LooseScan plan and also have all LooseScan sj-nest's + inner tables and outer correlated tables into the prefix. + */ + + first= join->positions + first_loosescan_table; + uint n_tables= my_count_bits(first->table->emb_sj_nest->sj_inner_tables); + /* Got a complete LooseScan range. Calculate its cost */ /* - Use the strategy if - * it is cheaper then what we've had, or - * we haven't picked any other semi-join strategy yet - In the second case, we pick this strategy unconditionally because - comparing cost without semi-join duplicate removal with cost with - duplicate removal is not an apples-to-apples comparison. + The same problem as with FirstMatch - we need to save POSITIONs + somewhere but reserving space for all cases would require too + much space. We will re-calculate POSITION structures later on. */ - if (prefix_cost < *current_read_time || join->cur_dups_producing_tables) - { - pos->sj_strategy= SJ_OPT_MATERIALIZE_SCAN; - *current_read_time= prefix_cost; - *current_record_count= prefix_rec_count; - join->cur_dups_producing_tables&= ~mat_nest->sj_inner_tables; + bool disable_jbuf= (join->thd->variables.join_cache_level == 0); + optimize_wo_join_buffering(join, first_loosescan_table, idx, + remaining_tables, + TRUE, //first_alt + disable_jbuf ? join->table_count : + first_loosescan_table + n_tables, + record_count, + read_time); + /* + We don't yet have any other strategies that could handle this + semi-join nest (the other options are Duplicate Elimination or + Materialization, which need at least the same set of tables in + the join prefix to be considered) so unconditionally pick the + LooseScan. + */ + *strategy= SJ_OPT_LOOSE_SCAN; + *handled_fanout= first->table->emb_sj_nest->sj_inner_tables; + return TRUE; + } + return FALSE; +} - } +void Firstmatch_picker::set_from_prev(struct st_position *prev) +{ + if (prev->firstmatch_picker.is_used) + invalidate_firstmatch_prefix(); + else + { + first_firstmatch_table= prev->firstmatch_picker.first_firstmatch_table; + first_firstmatch_rtbl= prev->firstmatch_picker.first_firstmatch_rtbl; + firstmatch_need_tables= prev->firstmatch_picker.firstmatch_need_tables; } + is_used= FALSE; +} - /* 5. Duplicate Weedout strategy handler */ +bool Firstmatch_picker::check_qep(JOIN *join, + uint idx, + table_map remaining_tables, + const JOIN_TAB *new_join_tab, + double *record_count, + double *read_time, + table_map *handled_fanout, + sj_strategy_enum *strategy, + POSITION *loose_scan_pos) +{ + if (new_join_tab->emb_sj_nest && + optimizer_flag(join->thd, OPTIMIZER_SWITCH_FIRSTMATCH) && + !join->outer_join) { + const table_map outer_corr_tables= + new_join_tab->emb_sj_nest->nested_join->sj_corr_tables | + new_join_tab->emb_sj_nest->nested_join->sj_depends_on; + const table_map sj_inner_tables= + new_join_tab->emb_sj_nest->sj_inner_tables & ~join->const_table_map; + /* - Duplicate weedout can be applied after all ON-correlated and - correlated + Enter condition: + 1. The next join tab belongs to semi-join nest + (verified for the encompassing code block above). + 2. We're not in a duplicate producer range yet + 3. All outer tables that + - the subquery is correlated with, or + - referred to from the outer_expr + are in the join prefix + 4. All inner tables are still part of remaining_tables. */ - TABLE_LIST *nest; - if ((nest= new_join_tab->emb_sj_nest)) - { - if (!pos->dupsweedout_tables) - pos->first_dupsweedout_table= idx; - - pos->dupsweedout_tables |= nest->sj_inner_tables | - nest->nested_join->sj_depends_on | - nest->nested_join->sj_corr_tables; - } - - if (pos->dupsweedout_tables) + if (!join->cur_sj_inner_tables && // (2) + !(remaining_tables & outer_corr_tables) && // (3) + (sj_inner_tables == // (4) + ((remaining_tables | new_join_tab->table->map) & sj_inner_tables))) { - /* we're in the process of constructing a DuplicateWeedout range */ - TABLE_LIST *emb= new_join_tab->table->pos_in_table_list->embedding; - /* and we've entered an inner side of an outer join*/ - if (emb && emb->on_expr) - pos->dupsweedout_tables |= emb->nested_join->used_tables; + /* Start tracking potential FirstMatch range */ + first_firstmatch_table= idx; + firstmatch_need_tables= sj_inner_tables; + first_firstmatch_rtbl= remaining_tables; } - if (pos->dupsweedout_tables && - !(remaining_tables & - ~new_join_tab->table->map & pos->dupsweedout_tables)) + if (in_firstmatch_prefix()) { - /* - Ok, reached a state where we could put a dups weedout point. - Walk back and calculate - - the join cost (this is needed as the accumulated cost may assume - some other duplicate elimination method) - - extra fanout that will be removed by duplicate elimination - - duplicate elimination cost - There are two cases: - 1. We have other strategy/ies to remove all of the duplicates. - 2. We don't. - - We need to calculate the cost in case #2 also because we need to make - choice between this join order and others. - */ - uint first_tab= pos->first_dupsweedout_table; - double dups_cost; - double prefix_rec_count; - double sj_inner_fanout= 1.0; - double sj_outer_fanout= 1.0; - uint temptable_rec_size; - if (first_tab == join->const_tables) + if (outer_corr_tables & first_firstmatch_rtbl) { - prefix_rec_count= 1.0; - temptable_rec_size= 0; - dups_cost= 0.0; + /* + Trying to add an sj-inner table whose sj-nest has an outer correlated + table that was not in the prefix. This means FirstMatch can't be used. + */ + invalidate_firstmatch_prefix(); } else { - dups_cost= join->positions[first_tab - 1].prefix_cost.total_cost(); - prefix_rec_count= join->positions[first_tab - 1].prefix_record_count; - temptable_rec_size= 8; /* This is not true but we'll make it so */ + /* Record that we need all of this semi-join's inner tables, too */ + firstmatch_need_tables|= sj_inner_tables; } - - table_map dups_removed_fanout= 0; - for (uint j= pos->first_dupsweedout_table; j <= idx; j++) + + if (in_firstmatch_prefix() && + !(firstmatch_need_tables & remaining_tables)) { - POSITION *p= join->positions + j; - dups_cost += p->read_time; - if (p->table->emb_sj_nest) - { - sj_inner_fanout *= p->records_read; - dups_removed_fanout |= p->table->table->map; - } - else - { - sj_outer_fanout *= p->records_read; - temptable_rec_size += p->table->table->file->ref_length; - } + /* + Got a complete FirstMatch range. + Calculate correct costs and fanout + */ + optimize_wo_join_buffering(join, first_firstmatch_table, idx, + remaining_tables, FALSE, idx, + record_count, + read_time); + /* + We ought to save the alternate POSITIONs produced by + optimize_wo_join_buffering but the problem is that providing save + space uses too much space. Instead, we will re-calculate the + alternate POSITIONs after we've picked the best QEP. + */ + *handled_fanout= firstmatch_need_tables; + /* *record_count and *read_time were set by the above call */ + *strategy= SJ_OPT_FIRST_MATCH; + return TRUE; } + } + } + return FALSE; +} - /* - Add the cost of temptable use. The table will have sj_outer_fanout - records, and we will make - - sj_outer_fanout table writes - - sj_inner_fanout*sj_outer_fanout lookups. - */ - double one_lookup_cost= get_tmp_table_lookup_cost(join->thd, - sj_outer_fanout, - temptable_rec_size); - double one_write_cost= get_tmp_table_write_cost(join->thd, - sj_outer_fanout, - temptable_rec_size); +void Duplicate_weedout_picker::set_from_prev(POSITION *prev) +{ + if (prev->dups_weedout_picker.is_used) + set_empty(); + else + { + dupsweedout_tables= prev->dups_weedout_picker.dupsweedout_tables; + first_dupsweedout_table= prev->dups_weedout_picker.first_dupsweedout_table; + } + is_used= FALSE; +} - double write_cost= join->positions[first_tab].prefix_record_count* - sj_outer_fanout * one_write_cost; - double full_lookup_cost= join->positions[first_tab].prefix_record_count* - sj_outer_fanout* sj_inner_fanout * - one_lookup_cost; - dups_cost += write_cost + full_lookup_cost; + +bool Duplicate_weedout_picker::check_qep(JOIN *join, + uint idx, + table_map remaining_tables, + const JOIN_TAB *new_join_tab, + double *record_count, + double *read_time, + table_map *handled_fanout, + sj_strategy_enum *strategy, + POSITION *loose_scan_pos + ) +{ + TABLE_LIST *nest; + if ((nest= new_join_tab->emb_sj_nest)) + { + if (!dupsweedout_tables) + first_dupsweedout_table= idx; + + dupsweedout_tables |= nest->sj_inner_tables | + nest->nested_join->sj_depends_on | + nest->nested_join->sj_corr_tables; + } + + if (dupsweedout_tables) + { + /* we're in the process of constructing a DuplicateWeedout range */ + TABLE_LIST *emb= new_join_tab->table->pos_in_table_list->embedding; + /* and we've entered an inner side of an outer join*/ + if (emb && emb->on_expr) + dupsweedout_tables |= emb->nested_join->used_tables; + } + + /* If this is the last table that we need for DuplicateWeedout range */ + if (dupsweedout_tables && !(remaining_tables & ~new_join_tab->table->map & + dupsweedout_tables)) + { + /* + Ok, reached a state where we could put a dups weedout point. + Walk back and calculate + - the join cost (this is needed as the accumulated cost may assume + some other duplicate elimination method) + - extra fanout that will be removed by duplicate elimination + - duplicate elimination cost + There are two cases: + 1. We have other strategy/ies to remove all of the duplicates. + 2. We don't. - /* - Use the strategy if - * it is cheaper then what we've had, or - * we haven't picked any other semi-join strategy yet - The second part is necessary because this strategy is the last one - to consider (it needs "the most" tables in the prefix) and we can't - leave duplicate-producing tables not handled by any strategy. - */ - if (dups_cost < *current_read_time || join->cur_dups_producing_tables) + We need to calculate the cost in case #2 also because we need to make + choice between this join order and others. + */ + uint first_tab= first_dupsweedout_table; + double dups_cost; + double prefix_rec_count; + double sj_inner_fanout= 1.0; + double sj_outer_fanout= 1.0; + uint temptable_rec_size; + if (first_tab == join->const_tables) + { + prefix_rec_count= 1.0; + temptable_rec_size= 0; + dups_cost= 0.0; + } + else + { + dups_cost= join->positions[first_tab - 1].prefix_cost.total_cost(); + prefix_rec_count= join->positions[first_tab - 1].prefix_record_count; + temptable_rec_size= 8; /* This is not true but we'll make it so */ + } + + table_map dups_removed_fanout= 0; + for (uint j= first_dupsweedout_table; j <= idx; j++) + { + POSITION *p= join->positions + j; + dups_cost += p->read_time; + if (p->table->emb_sj_nest) + { + sj_inner_fanout *= p->records_read; + dups_removed_fanout |= p->table->table->map; + } + else { - pos->sj_strategy= SJ_OPT_DUPS_WEEDOUT; - *current_read_time= dups_cost; - *current_record_count= prefix_rec_count * sj_outer_fanout; - join->cur_dups_producing_tables &= ~dups_removed_fanout; + sj_outer_fanout *= p->records_read; + temptable_rec_size += p->table->table->file->ref_length; } } + + /* + Add the cost of temptable use. The table will have sj_outer_fanout + records, and we will make + - sj_outer_fanout table writes + - sj_inner_fanout*sj_outer_fanout lookups. + + */ + double one_lookup_cost= get_tmp_table_lookup_cost(join->thd, + sj_outer_fanout, + temptable_rec_size); + double one_write_cost= get_tmp_table_write_cost(join->thd, + sj_outer_fanout, + temptable_rec_size); + + double write_cost= join->positions[first_tab].prefix_record_count* + sj_outer_fanout * one_write_cost; + double full_lookup_cost= join->positions[first_tab].prefix_record_count* + sj_outer_fanout* sj_inner_fanout * + one_lookup_cost; + dups_cost += write_cost + full_lookup_cost; + + *read_time= dups_cost; + *record_count= prefix_rec_count * sj_outer_fanout; + *handled_fanout= dups_removed_fanout; + *strategy= SJ_OPT_DUPS_WEEDOUT; + return TRUE; } + return FALSE; } @@ -2836,11 +2974,11 @@ void fix_semijoin_strategies_for_picked_join_order(JOIN *join) } else if (pos->sj_strategy == SJ_OPT_MATERIALIZE_SCAN) { - POSITION *first_inner= join->best_positions + pos->sjm_scan_last_inner; + POSITION *first_inner= join->best_positions + pos->sjmat_picker.sjm_scan_last_inner; SJ_MATERIALIZATION_INFO *sjm= first_inner->table->emb_sj_nest->sj_mat_info; sjm->is_used= TRUE; sjm->is_sj_scan= TRUE; - first= pos->sjm_scan_last_inner - sjm->tables + 1; + first= pos->sjmat_picker.sjm_scan_last_inner - sjm->tables + 1; memcpy(join->best_positions + first, sjm->positions, sizeof(POSITION) * sjm->tables); join->best_positions[first].sj_strategy= SJ_OPT_MATERIALIZE_SCAN; @@ -2878,7 +3016,7 @@ void fix_semijoin_strategies_for_picked_join_order(JOIN *join) if (pos->sj_strategy == SJ_OPT_FIRST_MATCH) { - first= pos->first_firstmatch_table; + first= pos->firstmatch_picker.first_firstmatch_table; join->best_positions[first].sj_strategy= SJ_OPT_FIRST_MATCH; join->best_positions[first].n_sj_tables= tablenr - first + 1; POSITION dummy; // For loose scan paths @@ -2911,7 +3049,7 @@ void fix_semijoin_strategies_for_picked_join_order(JOIN *join) if (pos->sj_strategy == SJ_OPT_LOOSE_SCAN) { - first= pos->first_loosescan_table; + first= pos->loosescan_picker.first_loosescan_table; POSITION *first_pos= join->best_positions + first; POSITION loose_scan_pos; // For loose scan paths double record_count= (first== join->const_tables)? 1.0: @@ -2950,7 +3088,7 @@ void fix_semijoin_strategies_for_picked_join_order(JOIN *join) Duplicate Weedout starting at pos->first_dupsweedout_table, ending at this table. */ - first= pos->first_dupsweedout_table; + first= pos->dups_weedout_picker.first_dupsweedout_table; join->best_positions[first].sj_strategy= SJ_OPT_DUPS_WEEDOUT; join->best_positions[first].n_sj_tables= tablenr - first + 1; } @@ -3893,8 +4031,8 @@ int setup_semijoin_dups_elimination(JOIN *join, ulonglong options, /* Calculate key length */ keylen= 0; - keyno= pos->loosescan_key; - for (uint kp=0; kp < pos->loosescan_parts; kp++) + keyno= pos->loosescan_picker.loosescan_key; + for (uint kp=0; kp < pos->loosescan_picker.loosescan_parts; kp++) keylen += tab->table->key_info[keyno].key_part[kp].store_length; tab->loosescan_key_len= keylen; diff --git a/sql/opt_subselect.h b/sql/opt_subselect.h index 571fcbaa935..7d560588995 100644 --- a/sql/opt_subselect.h +++ b/sql/opt_subselect.h @@ -263,8 +263,8 @@ public: { pos->records_read= best_loose_scan_records; pos->key= best_loose_scan_start_key; - pos->loosescan_key= best_loose_scan_key; - pos->loosescan_parts= best_max_loose_keypart + 1; + pos->loosescan_picker.loosescan_key= best_loose_scan_key; + pos->loosescan_picker.loosescan_parts= best_max_loose_keypart + 1; pos->use_join_buffer= FALSE; pos->table= tab; // todo need ref_depend_map ? @@ -277,8 +277,7 @@ public: }; -void advance_sj_state(JOIN *join, const table_map remaining_tables, - const JOIN_TAB *new_join_tab, uint idx, +void advance_sj_state(JOIN *join, const table_map remaining_tables, uint idx, double *current_record_count, double *current_read_time, POSITION *loose_scan_pos); void restore_prev_sj_state(const table_map remaining_tables, diff --git a/sql/sql_select.cc b/sql/sql_select.cc index b9088a7ce60..ce20dfea32e 100644 --- a/sql/sql_select.cc +++ b/sql/sql_select.cc @@ -85,7 +85,7 @@ static int join_tab_cmp_embedded_first(const void *emb, const void* ptr1, const static bool find_best(JOIN *join,table_map rest_tables,uint index, double record_count,double read_time); static uint cache_record_length(JOIN *join,uint index); -static bool get_best_combination(JOIN *join); +bool get_best_combination(JOIN *join); static store_key *get_store_key(THD *thd, KEYUSE *keyuse, table_map used_tables, KEY_PART_INFO *key_part, uchar *key_buff, @@ -4883,7 +4883,7 @@ void set_position(JOIN *join,uint idx,JOIN_TAB *table,KEYUSE *key) join->positions[idx].records_read=1.0; /* This is a const table */ join->positions[idx].ref_depend_map= 0; - join->positions[idx].loosescan_key= MAX_KEY; /* Not a LooseScan */ +// join->positions[idx].loosescan_key= MAX_KEY; /* Not a LooseScan */ join->positions[idx].sj_strategy= SJ_OPT_NONE; join->positions[idx].use_join_buffer= FALSE; @@ -5533,7 +5533,7 @@ best_access_path(JOIN *join, pos->key= best_key; pos->table= s; pos->ref_depend_map= best_ref_depends_map; - pos->loosescan_key= MAX_KEY; + pos->loosescan_picker.loosescan_key= MAX_KEY; pos->use_join_buffer= best_uses_jbuf; loose_scan_opt.save_to_position(s, loose_scan_pos); @@ -5840,7 +5840,7 @@ optimize_straight_join(JOIN *join, table_map join_tables) /* compute the cost of the new plan extended with 's' */ record_count*= join->positions[idx].records_read; read_time+= join->positions[idx].read_time; - advance_sj_state(join, join_tables, s, idx, &record_count, &read_time, + advance_sj_state(join, join_tables, idx, &record_count, &read_time, &loose_scan_pos); join_tables&= ~(s->table->map); @@ -6356,7 +6356,7 @@ best_extension_by_limited_search(JOIN *join, current_record_count= record_count * position->records_read; current_read_time= read_time + position->read_time; - advance_sj_state(join, remaining_tables, s, idx, ¤t_record_count, + advance_sj_state(join, remaining_tables, idx, ¤t_record_count, ¤t_read_time, &loose_scan_pos); /* Expand only partial plans with lower cost than the best QEP so far */ @@ -6513,7 +6513,7 @@ find_best(JOIN *join,table_map rest_tables,uint idx,double record_count, */ double current_record_count=record_count*records; double current_read_time=read_time+best; - advance_sj_state(join, rest_tables, s, idx, ¤t_record_count, + advance_sj_state(join, rest_tables, idx, ¤t_record_count, ¤t_read_time, &loose_scan_pos); if (best_record_count > current_record_count || @@ -7013,7 +7013,7 @@ static Item * const null_ptr= NULL; TRUE Out of memory */ -static bool +bool get_best_combination(JOIN *join) { uint tablenr; @@ -7091,13 +7091,6 @@ get_best_combination(JOIN *join) *j= *join->best_positions[tablenr].table; -#if 0 -/* SJ-Materialization is represented with join tab ranges */ - if (j->sj_strategy == SJ_OPT_MATERIALIZE || - j->sj_strategy == SJ_OPT_MATERIALIZE) - j->sj_strategy= SJ_OPT_NONE; -#endif - j->bush_root_tab= sjm_nest_root; form=join->table[tablenr]=j->table; @@ -7120,7 +7113,7 @@ get_best_combination(JOIN *join) (join->best_positions[tablenr].sj_strategy == SJ_OPT_LOOSE_SCAN)) { j->type=JT_ALL; - j->index= join->best_positions[tablenr].loosescan_key; + j->index= join->best_positions[tablenr].loosescan_picker.loosescan_key; if (tablenr != join->const_tables) join->full_join=1; } diff --git a/sql/sql_select.h b/sql/sql_select.h index 5476ef9b46c..21c5dc7d65e 100644 --- a/sql/sql_select.h +++ b/sql/sql_select.h @@ -158,6 +158,17 @@ enum enum_nested_loop_state }; +/* Possible sj_strategy values */ +enum sj_strategy_enum +{ + SJ_OPT_NONE=0, + SJ_OPT_DUPS_WEEDOUT=1, + SJ_OPT_LOOSE_SCAN =2, + SJ_OPT_FIRST_MATCH =3, + SJ_OPT_MATERIALIZE =4, + SJ_OPT_MATERIALIZE_SCAN=5 +}; + /* Values for JOIN_TAB::packed_info */ #define TAB_INFO_HAVE_VALUE 1 #define TAB_INFO_USING_INDEX 2 @@ -374,7 +385,7 @@ typedef struct st_join_table { POSITION::sj_strategy field. This field is set up by the fix_semijoin_strategies_for_picked_join_order. */ - uint sj_strategy; + enum sj_strategy_enum sj_strategy; uint n_sj_tables; @@ -496,6 +507,214 @@ enum_nested_loop_state end_write_group(JOIN *join, JOIN_TAB *join_tab __attribute__((unused)), bool end_of_records); +/* psergey */ + + +struct st_position; + +class Semi_join_strategy_picker +{ +public: + /* Called when starting to build a new join prefix */ + virtual void set_empty() = 0; + + /* + Update internal state after another table has been added to the join + prefix + */ + virtual void set_from_prev(struct st_position *prev) = 0; + + virtual bool check_qep(JOIN *join, + uint idx, + table_map remaining_tables, + const JOIN_TAB *new_join_tab, + double *record_count, + double *read_time, + table_map *handled_fanout, + sj_strategy_enum *strategy, + struct st_position *loose_scan_pos) = 0; + + virtual void mark_used() = 0; + + virtual ~Semi_join_strategy_picker() {} +}; + + +/* + Duplicate Weedout strategy optimization state +*/ + +class Duplicate_weedout_picker : public Semi_join_strategy_picker +{ + /* The first table that the strategy will need to handle */ + uint first_dupsweedout_table; + + /* + Tables that we will need to have in the prefix to do the weedout step + (all inner and all outer that the involved semi-joins are correlated with) + */ + table_map dupsweedout_tables; + + bool is_used; +public: + void set_empty() + { + dupsweedout_tables= 0; + first_dupsweedout_table= MAX_TABLES; + is_used= FALSE; + } + void set_from_prev(struct st_position *prev); + + bool check_qep(JOIN *join, + uint idx, + table_map remaining_tables, + const JOIN_TAB *new_join_tab, + double *record_count, + double *read_time, + table_map *handled_fanout, + sj_strategy_enum *stratey, + struct st_position *loose_scan_pos); + + void mark_used() { is_used= TRUE; } + friend void fix_semijoin_strategies_for_picked_join_order(JOIN *join); +}; + + +class Firstmatch_picker : public Semi_join_strategy_picker +{ + /* + Index of the first inner table that we intend to handle with this + strategy + */ + uint first_firstmatch_table; + /* + Tables that were not in the join prefix when we've started considering + FirstMatch strategy. + */ + table_map first_firstmatch_rtbl; + /* + Tables that need to be in the prefix before we can calculate the cost + of using FirstMatch strategy. + */ + table_map firstmatch_need_tables; + + bool is_used; + + bool in_firstmatch_prefix() { return (first_firstmatch_table != MAX_TABLES); } + void invalidate_firstmatch_prefix() { first_firstmatch_table= MAX_TABLES; } +public: + void set_empty() + { + invalidate_firstmatch_prefix(); + is_used= FALSE; + } + + void set_from_prev(struct st_position *prev); + bool check_qep(JOIN *join, + uint idx, + table_map remaining_tables, + const JOIN_TAB *new_join_tab, + double *record_count, + double *read_time, + table_map *handled_fanout, + sj_strategy_enum *strategy, + struct st_position *loose_scan_pos); + + void mark_used() { is_used= TRUE; } + friend void fix_semijoin_strategies_for_picked_join_order(JOIN *join); +}; + + +class LooseScan_picker : public Semi_join_strategy_picker +{ + /* The first (i.e. driving) table we're doing loose scan for */ + uint first_loosescan_table; + /* + Tables that need to be in the prefix before we can calculate the cost + of using LooseScan strategy. + */ + table_map loosescan_need_tables; + + /* + keyno - Planning to do LooseScan on this key. If keyuse is NULL then + this is a full index scan, otherwise this is a ref+loosescan + scan (and keyno matches the KEUSE's) + MAX_KEY - Not doing a LooseScan + */ + uint loosescan_key; // final (one for strategy instance ) + uint loosescan_parts; /* Number of keyparts to be kept distinct */ + + bool is_used; +public: + void set_empty() + { + first_loosescan_table= MAX_TABLES; + is_used= FALSE; + } + + void set_from_prev(struct st_position *prev); + bool check_qep(JOIN *join, + uint idx, + table_map remaining_tables, + const JOIN_TAB *new_join_tab, + double *record_count, + double *read_time, + table_map *handled_fanout, + sj_strategy_enum *strategy, + struct st_position *loose_scan_pos); + void mark_used() { is_used= TRUE; } + + friend class Loose_scan_opt; + friend void best_access_path(JOIN *join, + JOIN_TAB *s, + table_map remaining_tables, + uint idx, + bool disable_jbuf, + double record_count, + struct st_position *pos, + struct st_position *loose_scan_pos); + friend bool get_best_combination(JOIN *join); + friend int setup_semijoin_dups_elimination(JOIN *join, ulonglong options, + uint no_jbuf_after); + friend void fix_semijoin_strategies_for_picked_join_order(JOIN *join); +}; + + +class Sj_materialization_picker : public Semi_join_strategy_picker +{ + bool is_used; + + /* The last inner table (valid once we're after it) */ + uint sjm_scan_last_inner; + /* + Tables that we need to have in the prefix to calculate the correct cost. + Basically, we need all inner tables and outer tables mentioned in the + semi-join's ON expression so we can correctly account for fanout. + */ + table_map sjm_scan_need_tables; + +public: + void set_empty() + { + sjm_scan_need_tables= 0; + LINT_INIT(sjm_scan_last_inner); + is_used= FALSE; + } + void set_from_prev(struct st_position *prev); + bool check_qep(JOIN *join, + uint idx, + table_map remaining_tables, + const JOIN_TAB *new_join_tab, + double *record_count, + double *read_time, + table_map *handled_fanout, + sj_strategy_enum *strategy, + struct st_position *loose_scan_pos); + void mark_used() { is_used= TRUE; } + + friend void fix_semijoin_strategies_for_picked_join_order(JOIN *join); +}; + /** Information about a position of table within a join order. Used in join @@ -503,6 +722,9 @@ end_write_group(JOIN *join, JOIN_TAB *join_tab __attribute__((unused)), */ typedef struct st_position { + /* The table that's put into join order */ + JOIN_TAB *table; + /* The "fanout": number of output rows that will be produced (after pushed down selection condition is applied) per each row combination of @@ -516,7 +738,10 @@ typedef struct st_position number the access method will be invoked. */ double read_time; - JOIN_TAB *table; + + /* Cumulative cost and record count for the join prefix */ + COST_VECT prefix_cost; + double prefix_record_count; /* NULL - 'index' or 'range' or 'index_merge' or 'ALL' access is used. @@ -526,14 +751,13 @@ typedef struct st_position /* If ref-based access is used: bitmap of tables this table depends on */ table_map ref_depend_map; - - bool use_join_buffer; - - - /* These form a stack of partial join order costs and output sizes */ - COST_VECT prefix_cost; - double prefix_record_count; - + + /* + TRUE <=> join buffering will be used. At the moment this is based on + *very* imprecise guesses made in best_access_path(). + */ + bool use_join_buffer; + /* Current optimization state: Semi-join strategy to be used for this and preceding join tables. @@ -546,7 +770,8 @@ typedef struct st_position this applies to. The values of covered_preceding_positions->sj_strategy must be ignored. */ - uint sj_strategy; + enum sj_strategy_enum sj_strategy; + /* Valid only after fix_semijoin_strategies_for_picked_join_order() call: if sj_strategy!=SJ_OPT_NONE, this is the number of subsequent tables that @@ -554,67 +779,15 @@ typedef struct st_position */ uint n_sj_tables; -/* LooseScan strategy members */ - - /* The first (i.e. driving) table we're doing loose scan for */ - uint first_loosescan_table; - /* - Tables that need to be in the prefix before we can calculate the cost - of using LooseScan strategy. - */ - table_map loosescan_need_tables; - - /* - keyno - Planning to do LooseScan on this key. If keyuse is NULL then - this is a full index scan, otherwise this is a ref+loosescan - scan (and keyno matches the KEUSE's) - MAX_KEY - Not doing a LooseScan - */ - uint loosescan_key; // final (one for strategy instance ) - uint loosescan_parts; /* Number of keyparts to be kept distinct */ - -/* FirstMatch strategy */ - /* - Index of the first inner table that we intend to handle with this - strategy - */ - uint first_firstmatch_table; - /* - Tables that were not in the join prefix when we've started considering - FirstMatch strategy. - */ - table_map first_firstmatch_rtbl; - /* - Tables that need to be in the prefix before we can calculate the cost - of using FirstMatch strategy. - */ - table_map firstmatch_need_tables; - - bool in_firstmatch_prefix() { return (first_firstmatch_table != MAX_TABLES); } - void invalidate_firstmatch_prefix() { first_firstmatch_table= MAX_TABLES; } - -/* Duplicate Weedout strategy */ - /* The first table that the strategy will need to handle */ - uint first_dupsweedout_table; - /* - Tables that we will need to have in the prefix to do the weedout step - (all inner and all outer that the involved semi-joins are correlated with) - */ - table_map dupsweedout_tables; - -/* SJ-Materialization-Scan strategy */ - /* The last inner table (valid once we're after it) */ - uint sjm_scan_last_inner; - /* - Tables that we need to have in the prefix to calculate the correct cost. - Basically, we need all inner tables and outer tables mentioned in the - semi-join's ON expression so we can correctly account for fanout. - */ - table_map sjm_scan_need_tables; - table_map prefix_dups_producing_tables; -} POSITION; + table_map inner_tables_handled_with_other_sjs; + + Duplicate_weedout_picker dups_weedout_picker; + Firstmatch_picker firstmatch_picker; + LooseScan_picker loosescan_picker; + Sj_materialization_picker sjmat_picker; +} POSITION; typedef struct st_rollup { @@ -626,18 +799,6 @@ typedef struct st_rollup } ROLLUP; -#define SJ_OPT_NONE 0 -#define SJ_OPT_DUPS_WEEDOUT 1 -#define SJ_OPT_LOOSE_SCAN 2 -#define SJ_OPT_FIRST_MATCH 3 -#define SJ_OPT_MATERIALIZE 4 -#define SJ_OPT_MATERIALIZE_SCAN 5 - -inline bool sj_is_materialize_strategy(uint strategy) -{ - return strategy >= SJ_OPT_MATERIALIZE; -} - class JOIN_TAB_RANGE: public Sql_alloc { public: @@ -808,7 +969,7 @@ public: they produce. */ table_map cur_dups_producing_tables; - + /* We also maintain a stack of join optimization states in * join->positions[] */ /******* Join optimization state members end *******/ /* |