From 7f746fbe74e08d79217bdf7c7cba628e3b6bef85 Mon Sep 17 00:00:00 2001 From: Sergey Petrunya Date: Sat, 12 Nov 2011 20:50:11 +0200 Subject: BUG#887468: Second assertion `keypart_map' failed in maria_rkey with semijoin - in advance_sj_state: Do not try to construct LooseScan strategy if we're already behind the last LooseScan table. --- sql/opt_subselect.cc | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'sql') diff --git a/sql/opt_subselect.cc b/sql/opt_subselect.cc index aa0a8152bbb..fdda502e706 100644 --- a/sql/opt_subselect.cc +++ b/sql/opt_subselect.cc @@ -2339,7 +2339,8 @@ void advance_sj_state(JOIN *join, table_map remaining_tables, } if ((pos->first_loosescan_table != MAX_TABLES) && - !(remaining_tables & pos->loosescan_need_tables)) + !(remaining_tables & pos->loosescan_need_tables) && + (pos->table->table->map & pos->loosescan_need_tables)) { /* Ok we have LooseScan plan and also have all LooseScan sj-nest's -- cgit v1.2.1 From 694ce95557bc174dae2bc32279024102848cf5ee Mon Sep 17 00:00:00 2001 From: Sergey Petrunya Date: Wed, 23 Nov 2011 04:25:52 +0400 Subject: Semi-join optimizations code cleanup: - Break down POSITION/advance_sj_state() into four classes representing potential semi-join strategies. - Treat all strategies uniformly (before, DuplicateWeedout was special as it was the catch-all strategy. Now, we're still relying on it to be the catch-all, but are able to function,e.g. with firstmatch=on,duplicate_weedout=off. - Update test results (checked) --- sql/opt_subselect.cc | 784 ++++++++++++++++++++++++++++++--------------------- sql/opt_subselect.h | 7 +- sql/sql_select.cc | 23 +- sql/sql_select.h | 327 +++++++++++++++------ 4 files changed, 716 insertions(+), 425 deletions(-) (limited to 'sql') diff --git a/sql/opt_subselect.cc b/sql/opt_subselect.cc index fdda502e706..c7d07c21746 100644 --- a/sql/opt_subselect.cc +++ b/sql/opt_subselect.cc @@ -2168,229 +2168,178 @@ bool find_eq_ref_candidate(TABLE *table, table_map sj_inner_tables) See setup_semijoin_dups_elimination() for a description of what kinds of join prefixes each strategy can handle. */ +bool is_multiple_semi_joins(POSITION *prefix, uint idx, table_map inner_tables) +{ + for (int i= (int)idx; i >= 0; i--) + { + TABLE_LIST *emb_sj_nest; + if ((emb_sj_nest= prefix[i].table->emb_sj_nest)) + { + if (inner_tables & emb_sj_nest->sj_inner_tables) + return !test(inner_tables == emb_sj_nest->sj_inner_tables); + } + } + return FALSE; +} -void advance_sj_state(JOIN *join, table_map remaining_tables, - const JOIN_TAB *new_join_tab, uint idx, - double *current_record_count, double *current_read_time, + +void advance_sj_state(JOIN *join, table_map remaining_tables, uint idx, + double *current_record_count, double *current_read_time, POSITION *loose_scan_pos) { - TABLE_LIST *emb_sj_nest; POSITION *pos= join->positions + idx; - remaining_tables &= ~new_join_tab->table->map; - bool disable_jbuf= join->thd->variables.join_cache_level == 0; - - pos->prefix_cost.convert_from_cost(*current_read_time); - pos->prefix_record_count= *current_record_count; - pos->sj_strategy= SJ_OPT_NONE; - - pos->prefix_dups_producing_tables= join->cur_dups_producing_tables; + const JOIN_TAB *new_join_tab= pos->table; + Semi_join_strategy_picker *pickers[]= + { + &pos->firstmatch_picker, + &pos->loosescan_picker, + &pos->sjmat_picker, + &pos->dups_weedout_picker, + NULL, + }; - /* We're performing optimization inside SJ-Materialization nest */ if (join->emb_sjm_nest) { - pos->invalidate_firstmatch_prefix(); - pos->first_loosescan_table= MAX_TABLES; - pos->dupsweedout_tables= 0; - pos->sjm_scan_need_tables= 0; + /* + We're performing optimization inside SJ-Materialization nest: + - there are no other semi-joins inside semi-join nests + - attempts to build semi-join strategies here will confuse + the optimizer, so bail out. + */ return; } - /* Initialize the state or copy it from prev. tables */ - if (idx == join->const_tables) + /* + Update join->cur_sj_inner_tables (Used by FirstMatch in this function and + LooseScan detector in best_access_path) + */ + remaining_tables &= ~new_join_tab->table->map; + pos->prefix_dups_producing_tables= join->cur_dups_producing_tables; + TABLE_LIST *emb_sj_nest; + if ((emb_sj_nest= new_join_tab->emb_sj_nest)) { - pos->invalidate_firstmatch_prefix(); - pos->first_loosescan_table= MAX_TABLES; - pos->dupsweedout_tables= 0; - pos->sjm_scan_need_tables= 0; - LINT_INIT(pos->sjm_scan_last_inner); + /// join->cur_sj_inner_tables |= emb_sj_nest->sj_inner_tables; + join->cur_dups_producing_tables |= emb_sj_nest->sj_inner_tables; + + /* Remove the sj_nest if all of its SJ-inner tables are in cur_table_map */ + /// if (!(remaining_tables & + /// emb_sj_nest->sj_inner_tables & ~new_join_tab->table->map)) + /// join->cur_sj_inner_tables &= ~emb_sj_nest->sj_inner_tables; } - else + + Semi_join_strategy_picker **strategy; + if (idx == join->const_tables) { - // FirstMatch - pos->first_firstmatch_table= - (pos[-1].sj_strategy == SJ_OPT_FIRST_MATCH) ? - MAX_TABLES : pos[-1].first_firstmatch_table; - pos->first_firstmatch_rtbl= pos[-1].first_firstmatch_rtbl; - pos->firstmatch_need_tables= pos[-1].firstmatch_need_tables; - - // LooseScan - pos->first_loosescan_table= - (pos[-1].sj_strategy == SJ_OPT_LOOSE_SCAN) ? - MAX_TABLES : pos[-1].first_loosescan_table; - pos->loosescan_need_tables= pos[-1].loosescan_need_tables; - - // SJ-Materialization Scan - pos->sjm_scan_need_tables= - (pos[-1].sj_strategy == SJ_OPT_MATERIALIZE_SCAN) ? - 0 : pos[-1].sjm_scan_need_tables; - pos->sjm_scan_last_inner= pos[-1].sjm_scan_last_inner; - - // Duplicate Weedout - pos->dupsweedout_tables= pos[-1].dupsweedout_tables; - pos->first_dupsweedout_table= pos[-1].first_dupsweedout_table; + /* First table, initialize pickers */ + for (strategy= pickers; *strategy != NULL; strategy++) + (*strategy)->set_empty(); + pos->inner_tables_handled_with_other_sjs= 0; } - - table_map handled_by_fm_or_ls= 0; - /* FirstMatch Strategy */ - if (new_join_tab->emb_sj_nest && - optimizer_flag(join->thd, OPTIMIZER_SWITCH_FIRSTMATCH) && - !join->outer_join) + else { - const table_map outer_corr_tables= - new_join_tab->emb_sj_nest->nested_join->sj_corr_tables | - new_join_tab->emb_sj_nest->nested_join->sj_depends_on; - const table_map sj_inner_tables= - new_join_tab->emb_sj_nest->sj_inner_tables & ~join->const_table_map; - - /* - Enter condition: - 1. The next join tab belongs to semi-join nest - (verified for the encompassing code block above). - 2. We're not in a duplicate producer range yet - 3. All outer tables that - - the subquery is correlated with, or - - referred to from the outer_expr - are in the join prefix - 4. All inner tables are still part of remaining_tables. - */ - if (!join->cur_sj_inner_tables && // (2) - !(remaining_tables & outer_corr_tables) && // (3) - (sj_inner_tables == // (4) - ((remaining_tables | new_join_tab->table->map) & sj_inner_tables))) + for (strategy= pickers; *strategy != NULL; strategy++) { - /* Start tracking potential FirstMatch range */ - pos->first_firstmatch_table= idx; - pos->firstmatch_need_tables= sj_inner_tables; - pos->first_firstmatch_rtbl= remaining_tables; + (*strategy)->set_from_prev(pos - 1); } + pos->inner_tables_handled_with_other_sjs= + pos[-1].inner_tables_handled_with_other_sjs; + } + + pos->prefix_cost.convert_from_cost(*current_read_time); + pos->prefix_record_count= *current_record_count; + + { + pos->sj_strategy= SJ_OPT_NONE; - if (pos->in_firstmatch_prefix()) + for (strategy= pickers; *strategy != NULL; strategy++) { - if (outer_corr_tables & pos->first_firstmatch_rtbl) - { - /* - Trying to add an sj-inner table whose sj-nest has an outer correlated - table that was not in the prefix. This means FirstMatch can't be used. - */ - pos->invalidate_firstmatch_prefix(); - } - else - { - /* Record that we need all of this semi-join's inner tables, too */ - pos->firstmatch_need_tables|= sj_inner_tables; - } - - if (pos->in_firstmatch_prefix() && - !(pos->firstmatch_need_tables & remaining_tables)) + table_map handled_fanout; + sj_strategy_enum sj_strategy; + double rec_count= *current_record_count; + double read_time= *current_read_time; + if ((*strategy)->check_qep(join, idx, remaining_tables, + new_join_tab, + &rec_count, + &read_time, + &handled_fanout, + &sj_strategy, + loose_scan_pos)) { /* - Got a complete FirstMatch range. - Calculate correct costs and fanout - */ - optimize_wo_join_buffering(join, pos->first_firstmatch_table, idx, - remaining_tables, FALSE, idx, - current_record_count, - current_read_time); - /* - We don't yet know what are the other strategies, so pick the - FirstMatch. - - We ought to save the alternate POSITIONs produced by - optimize_wo_join_buffering but the problem is that providing save - space uses too much space. Instead, we will re-calculate the - alternate POSITIONs after we've picked the best QEP. + It's possible to use the strategy. Use it, if + - it removes semi-join fanout that was not removed before + - using it is cheaper than using something else, + and {if some other strategy has removed fanout + that this strategy is trying to remove, then it + did remove the fanout only for one semi-join} + This is to avoid a situation when + 1. strategy X removes fanout for semijoin X,Y + 2. using strategy Z is cheaper, but it only removes + fanout from semijoin X. + 3. We have no clue what to do about fanount of semi-join Y. */ - pos->sj_strategy= SJ_OPT_FIRST_MATCH; - handled_by_fm_or_ls= pos->firstmatch_need_tables; + if ((join->cur_dups_producing_tables & handled_fanout) || + (read_time < *current_read_time && + !(handled_fanout & pos->inner_tables_handled_with_other_sjs))) + { + /* Mark strategy as used */ + (*strategy)->mark_used(); + pos->sj_strategy= sj_strategy; + *current_read_time= read_time; + *current_record_count= rec_count; + join->cur_dups_producing_tables &= ~handled_fanout; + //TODO: update bitmap of semi-joins that were handled together with + // others. + if (is_multiple_semi_joins(join->positions, idx, handled_fanout)) + pos->inner_tables_handled_with_other_sjs |= handled_fanout; + } + else + { + /* We decided not to apply the strategy. */ + (*strategy)->set_empty(); + } } } } - /* LooseScan Strategy */ - { - POSITION *first=join->positions+pos->first_loosescan_table; - /* - LooseScan strategy can't handle interleaving between tables from the - semi-join that LooseScan is handling and any other tables. - - If we were considering LooseScan for the join prefix (1) - and the table we're adding creates an interleaving (2) - then - stop considering loose scan - */ - if ((pos->first_loosescan_table != MAX_TABLES) && // (1) - (first->table->emb_sj_nest->sj_inner_tables & remaining_tables) && //(2) - new_join_tab->emb_sj_nest != first->table->emb_sj_nest) //(2) - { - pos->first_loosescan_table= MAX_TABLES; - } - - /* - If we got an option to use LooseScan for the current table, start - considering using LooseScan strategy - */ - if (loose_scan_pos->read_time != DBL_MAX && !join->outer_join) - { - pos->first_loosescan_table= idx; - pos->loosescan_need_tables= - new_join_tab->emb_sj_nest->sj_inner_tables | - new_join_tab->emb_sj_nest->nested_join->sj_depends_on | - new_join_tab->emb_sj_nest->nested_join->sj_corr_tables; - } - - if ((pos->first_loosescan_table != MAX_TABLES) && - !(remaining_tables & pos->loosescan_need_tables) && - (pos->table->table->map & pos->loosescan_need_tables)) - { - /* - Ok we have LooseScan plan and also have all LooseScan sj-nest's - inner tables and outer correlated tables into the prefix. - */ - - first=join->positions + pos->first_loosescan_table; - uint n_tables= my_count_bits(first->table->emb_sj_nest->sj_inner_tables); - /* Got a complete LooseScan range. Calculate its cost */ - /* - The same problem as with FirstMatch - we need to save POSITIONs - somewhere but reserving space for all cases would require too - much space. We will re-calculate POSITION structures later on. - */ - optimize_wo_join_buffering(join, pos->first_loosescan_table, idx, - remaining_tables, - TRUE, //first_alt - disable_jbuf ? join->table_count : - pos->first_loosescan_table + n_tables, - current_record_count, - current_read_time); - /* - We don't yet have any other strategies that could handle this - semi-join nest (the other options are Duplicate Elimination or - Materialization, which need at least the same set of tables in - the join prefix to be considered) so unconditionally pick the - LooseScan. - */ - pos->sj_strategy= SJ_OPT_LOOSE_SCAN; - handled_by_fm_or_ls= first->table->emb_sj_nest->sj_inner_tables; - } - } - - /* - Update join->cur_sj_inner_tables (Used by FirstMatch in this function and - LooseScan detector in best_access_path) - */ if ((emb_sj_nest= new_join_tab->emb_sj_nest)) { join->cur_sj_inner_tables |= emb_sj_nest->sj_inner_tables; - join->cur_dups_producing_tables |= emb_sj_nest->sj_inner_tables; /* Remove the sj_nest if all of its SJ-inner tables are in cur_table_map */ if (!(remaining_tables & emb_sj_nest->sj_inner_tables & ~new_join_tab->table->map)) join->cur_sj_inner_tables &= ~emb_sj_nest->sj_inner_tables; } - join->cur_dups_producing_tables &= ~handled_by_fm_or_ls; - /* 4. SJ-Materialization and SJ-Materialization-scan strategy handler */ + pos->prefix_cost.convert_from_cost(*current_read_time); + pos->prefix_record_count= *current_record_count; +} + + +void Sj_materialization_picker::set_from_prev(struct st_position *prev) +{ + if (prev->sjmat_picker.is_used) + set_empty(); + else + { + sjm_scan_need_tables= prev->sjmat_picker.sjm_scan_need_tables; + sjm_scan_last_inner= prev->sjmat_picker.sjm_scan_last_inner; + } + is_used= FALSE; +} + + +bool Sj_materialization_picker::check_qep(JOIN *join, + uint idx, + table_map remaining_tables, + const JOIN_TAB *new_join_tab, + double *record_count, + double *read_time, + table_map *handled_fanout, + sj_strategy_enum *strategy, + POSITION *loose_scan_pos) +{ bool sjm_scan; SJ_MATERIALIZATION_INFO *mat_info; if ((mat_info= at_sjmat_pos(join, remaining_tables, @@ -2417,11 +2366,11 @@ void advance_sj_state(JOIN *join, table_map remaining_tables, The simple way to model this is to remove SJM-SCAN(...) fanout once we reach the point #2. */ - pos->sjm_scan_need_tables= + sjm_scan_need_tables= new_join_tab->emb_sj_nest->sj_inner_tables | new_join_tab->emb_sj_nest->nested_join->sj_depends_on | new_join_tab->emb_sj_nest->nested_join->sj_corr_tables; - pos->sjm_scan_last_inner= idx; + sjm_scan_last_inner= idx; } else { @@ -2444,34 +2393,31 @@ void advance_sj_state(JOIN *join, table_map remaining_tables, mat_read_time += mat_info->materialization_cost.total_cost() + prefix_rec_count * mat_info->lookup_cost.total_cost(); - if (mat_read_time < *current_read_time || join->cur_dups_producing_tables) - { - /* - NOTE: When we pick to use SJM[-Scan] we don't memcpy its POSITION - elements to join->positions as that makes it hard to return things - back when making one step back in join optimization. That's done - after the QEP has been chosen. - */ - pos->sj_strategy= SJ_OPT_MATERIALIZE; - *current_read_time= mat_read_time; - *current_record_count= prefix_rec_count; - join->cur_dups_producing_tables&= - ~new_join_tab->emb_sj_nest->sj_inner_tables; - } + /* + NOTE: When we pick to use SJM[-Scan] we don't memcpy its POSITION + elements to join->positions as that makes it hard to return things + back when making one step back in join optimization. That's done + after the QEP has been chosen. + */ + *read_time= mat_read_time; + *record_count= prefix_rec_count; + *handled_fanout= new_join_tab->emb_sj_nest->sj_inner_tables; + *strategy= SJ_OPT_MATERIALIZE; + return TRUE; } } /* 4.A SJM-Scan second phase check */ - if (pos->sjm_scan_need_tables && /* Have SJM-Scan prefix */ - !(pos->sjm_scan_need_tables & remaining_tables)) + if (sjm_scan_need_tables && /* Have SJM-Scan prefix */ + !(sjm_scan_need_tables & remaining_tables)) { TABLE_LIST *mat_nest= - join->positions[pos->sjm_scan_last_inner].table->emb_sj_nest; + join->positions[sjm_scan_last_inner].table->emb_sj_nest; SJ_MATERIALIZATION_INFO *mat_info= mat_nest->sj_mat_info; double prefix_cost; double prefix_rec_count; - int first_tab= pos->sjm_scan_last_inner + 1 - mat_info->tables; + int first_tab= sjm_scan_last_inner + 1 - mat_info->tables; /* Get the prefix cost */ if (first_tab == (int)join->const_tables) { @@ -2496,6 +2442,7 @@ void advance_sj_state(JOIN *join, table_map remaining_tables, POSITION curpos, dummy; /* Need to re-run best-access-path as we prefix_rec_count has changed */ + bool disable_jbuf= (join->thd->variables.join_cache_level == 0); for (i= first_tab + mat_info->tables; i <= idx; i++) { best_access_path(join, join->positions[i].table, rem_tables, i, @@ -2504,142 +2451,333 @@ void advance_sj_state(JOIN *join, table_map remaining_tables, prefix_cost += curpos.read_time; } + *strategy= SJ_OPT_MATERIALIZE_SCAN; + *read_time= prefix_cost; + *record_count= prefix_rec_count; + *handled_fanout= mat_nest->sj_inner_tables; + return TRUE; + } + return FALSE; +} + + +void LooseScan_picker::set_from_prev(struct st_position *prev) +{ + if (prev->loosescan_picker.is_used) + set_empty(); + else + { + first_loosescan_table= prev->loosescan_picker.first_loosescan_table; + loosescan_need_tables= prev->loosescan_picker.loosescan_need_tables; + } + is_used= FALSE; +} + + +bool LooseScan_picker::check_qep(JOIN *join, + uint idx, + table_map remaining_tables, + const JOIN_TAB *new_join_tab, + double *record_count, + double *read_time, + table_map *handled_fanout, + sj_strategy_enum *strategy, + struct st_position *loose_scan_pos) +{ + POSITION *first= join->positions + first_loosescan_table; + /* + LooseScan strategy can't handle interleaving between tables from the + semi-join that LooseScan is handling and any other tables. + + If we were considering LooseScan for the join prefix (1) + and the table we're adding creates an interleaving (2) + then + stop considering loose scan + */ + if ((first_loosescan_table != MAX_TABLES) && // (1) + (first->table->emb_sj_nest->sj_inner_tables & remaining_tables) && //(2) + new_join_tab->emb_sj_nest != first->table->emb_sj_nest) //(2) + { + first_loosescan_table= MAX_TABLES; + } + + /* + If we got an option to use LooseScan for the current table, start + considering using LooseScan strategy + */ + if (loose_scan_pos->read_time != DBL_MAX && !join->outer_join) + { + first_loosescan_table= idx; + loosescan_need_tables= + new_join_tab->emb_sj_nest->sj_inner_tables | + new_join_tab->emb_sj_nest->nested_join->sj_depends_on | + new_join_tab->emb_sj_nest->nested_join->sj_corr_tables; + } + + if ((first_loosescan_table != MAX_TABLES) && + !(remaining_tables & loosescan_need_tables) && + (new_join_tab->table->map & loosescan_need_tables)) + { + /* + Ok we have LooseScan plan and also have all LooseScan sj-nest's + inner tables and outer correlated tables into the prefix. + */ + + first= join->positions + first_loosescan_table; + uint n_tables= my_count_bits(first->table->emb_sj_nest->sj_inner_tables); + /* Got a complete LooseScan range. Calculate its cost */ /* - Use the strategy if - * it is cheaper then what we've had, or - * we haven't picked any other semi-join strategy yet - In the second case, we pick this strategy unconditionally because - comparing cost without semi-join duplicate removal with cost with - duplicate removal is not an apples-to-apples comparison. + The same problem as with FirstMatch - we need to save POSITIONs + somewhere but reserving space for all cases would require too + much space. We will re-calculate POSITION structures later on. */ - if (prefix_cost < *current_read_time || join->cur_dups_producing_tables) - { - pos->sj_strategy= SJ_OPT_MATERIALIZE_SCAN; - *current_read_time= prefix_cost; - *current_record_count= prefix_rec_count; - join->cur_dups_producing_tables&= ~mat_nest->sj_inner_tables; + bool disable_jbuf= (join->thd->variables.join_cache_level == 0); + optimize_wo_join_buffering(join, first_loosescan_table, idx, + remaining_tables, + TRUE, //first_alt + disable_jbuf ? join->table_count : + first_loosescan_table + n_tables, + record_count, + read_time); + /* + We don't yet have any other strategies that could handle this + semi-join nest (the other options are Duplicate Elimination or + Materialization, which need at least the same set of tables in + the join prefix to be considered) so unconditionally pick the + LooseScan. + */ + *strategy= SJ_OPT_LOOSE_SCAN; + *handled_fanout= first->table->emb_sj_nest->sj_inner_tables; + return TRUE; + } + return FALSE; +} - } +void Firstmatch_picker::set_from_prev(struct st_position *prev) +{ + if (prev->firstmatch_picker.is_used) + invalidate_firstmatch_prefix(); + else + { + first_firstmatch_table= prev->firstmatch_picker.first_firstmatch_table; + first_firstmatch_rtbl= prev->firstmatch_picker.first_firstmatch_rtbl; + firstmatch_need_tables= prev->firstmatch_picker.firstmatch_need_tables; } + is_used= FALSE; +} - /* 5. Duplicate Weedout strategy handler */ +bool Firstmatch_picker::check_qep(JOIN *join, + uint idx, + table_map remaining_tables, + const JOIN_TAB *new_join_tab, + double *record_count, + double *read_time, + table_map *handled_fanout, + sj_strategy_enum *strategy, + POSITION *loose_scan_pos) +{ + if (new_join_tab->emb_sj_nest && + optimizer_flag(join->thd, OPTIMIZER_SWITCH_FIRSTMATCH) && + !join->outer_join) { + const table_map outer_corr_tables= + new_join_tab->emb_sj_nest->nested_join->sj_corr_tables | + new_join_tab->emb_sj_nest->nested_join->sj_depends_on; + const table_map sj_inner_tables= + new_join_tab->emb_sj_nest->sj_inner_tables & ~join->const_table_map; + /* - Duplicate weedout can be applied after all ON-correlated and - correlated + Enter condition: + 1. The next join tab belongs to semi-join nest + (verified for the encompassing code block above). + 2. We're not in a duplicate producer range yet + 3. All outer tables that + - the subquery is correlated with, or + - referred to from the outer_expr + are in the join prefix + 4. All inner tables are still part of remaining_tables. */ - TABLE_LIST *nest; - if ((nest= new_join_tab->emb_sj_nest)) - { - if (!pos->dupsweedout_tables) - pos->first_dupsweedout_table= idx; - - pos->dupsweedout_tables |= nest->sj_inner_tables | - nest->nested_join->sj_depends_on | - nest->nested_join->sj_corr_tables; - } - - if (pos->dupsweedout_tables) + if (!join->cur_sj_inner_tables && // (2) + !(remaining_tables & outer_corr_tables) && // (3) + (sj_inner_tables == // (4) + ((remaining_tables | new_join_tab->table->map) & sj_inner_tables))) { - /* we're in the process of constructing a DuplicateWeedout range */ - TABLE_LIST *emb= new_join_tab->table->pos_in_table_list->embedding; - /* and we've entered an inner side of an outer join*/ - if (emb && emb->on_expr) - pos->dupsweedout_tables |= emb->nested_join->used_tables; + /* Start tracking potential FirstMatch range */ + first_firstmatch_table= idx; + firstmatch_need_tables= sj_inner_tables; + first_firstmatch_rtbl= remaining_tables; } - if (pos->dupsweedout_tables && - !(remaining_tables & - ~new_join_tab->table->map & pos->dupsweedout_tables)) + if (in_firstmatch_prefix()) { - /* - Ok, reached a state where we could put a dups weedout point. - Walk back and calculate - - the join cost (this is needed as the accumulated cost may assume - some other duplicate elimination method) - - extra fanout that will be removed by duplicate elimination - - duplicate elimination cost - There are two cases: - 1. We have other strategy/ies to remove all of the duplicates. - 2. We don't. - - We need to calculate the cost in case #2 also because we need to make - choice between this join order and others. - */ - uint first_tab= pos->first_dupsweedout_table; - double dups_cost; - double prefix_rec_count; - double sj_inner_fanout= 1.0; - double sj_outer_fanout= 1.0; - uint temptable_rec_size; - if (first_tab == join->const_tables) + if (outer_corr_tables & first_firstmatch_rtbl) { - prefix_rec_count= 1.0; - temptable_rec_size= 0; - dups_cost= 0.0; + /* + Trying to add an sj-inner table whose sj-nest has an outer correlated + table that was not in the prefix. This means FirstMatch can't be used. + */ + invalidate_firstmatch_prefix(); } else { - dups_cost= join->positions[first_tab - 1].prefix_cost.total_cost(); - prefix_rec_count= join->positions[first_tab - 1].prefix_record_count; - temptable_rec_size= 8; /* This is not true but we'll make it so */ + /* Record that we need all of this semi-join's inner tables, too */ + firstmatch_need_tables|= sj_inner_tables; } - - table_map dups_removed_fanout= 0; - for (uint j= pos->first_dupsweedout_table; j <= idx; j++) + + if (in_firstmatch_prefix() && + !(firstmatch_need_tables & remaining_tables)) { - POSITION *p= join->positions + j; - dups_cost += p->read_time; - if (p->table->emb_sj_nest) - { - sj_inner_fanout *= p->records_read; - dups_removed_fanout |= p->table->table->map; - } - else - { - sj_outer_fanout *= p->records_read; - temptable_rec_size += p->table->table->file->ref_length; - } + /* + Got a complete FirstMatch range. + Calculate correct costs and fanout + */ + optimize_wo_join_buffering(join, first_firstmatch_table, idx, + remaining_tables, FALSE, idx, + record_count, + read_time); + /* + We ought to save the alternate POSITIONs produced by + optimize_wo_join_buffering but the problem is that providing save + space uses too much space. Instead, we will re-calculate the + alternate POSITIONs after we've picked the best QEP. + */ + *handled_fanout= firstmatch_need_tables; + /* *record_count and *read_time were set by the above call */ + *strategy= SJ_OPT_FIRST_MATCH; + return TRUE; } + } + } + return FALSE; +} - /* - Add the cost of temptable use. The table will have sj_outer_fanout - records, and we will make - - sj_outer_fanout table writes - - sj_inner_fanout*sj_outer_fanout lookups. - */ - double one_lookup_cost= get_tmp_table_lookup_cost(join->thd, - sj_outer_fanout, - temptable_rec_size); - double one_write_cost= get_tmp_table_write_cost(join->thd, - sj_outer_fanout, - temptable_rec_size); +void Duplicate_weedout_picker::set_from_prev(POSITION *prev) +{ + if (prev->dups_weedout_picker.is_used) + set_empty(); + else + { + dupsweedout_tables= prev->dups_weedout_picker.dupsweedout_tables; + first_dupsweedout_table= prev->dups_weedout_picker.first_dupsweedout_table; + } + is_used= FALSE; +} - double write_cost= join->positions[first_tab].prefix_record_count* - sj_outer_fanout * one_write_cost; - double full_lookup_cost= join->positions[first_tab].prefix_record_count* - sj_outer_fanout* sj_inner_fanout * - one_lookup_cost; - dups_cost += write_cost + full_lookup_cost; + +bool Duplicate_weedout_picker::check_qep(JOIN *join, + uint idx, + table_map remaining_tables, + const JOIN_TAB *new_join_tab, + double *record_count, + double *read_time, + table_map *handled_fanout, + sj_strategy_enum *strategy, + POSITION *loose_scan_pos + ) +{ + TABLE_LIST *nest; + if ((nest= new_join_tab->emb_sj_nest)) + { + if (!dupsweedout_tables) + first_dupsweedout_table= idx; + + dupsweedout_tables |= nest->sj_inner_tables | + nest->nested_join->sj_depends_on | + nest->nested_join->sj_corr_tables; + } + + if (dupsweedout_tables) + { + /* we're in the process of constructing a DuplicateWeedout range */ + TABLE_LIST *emb= new_join_tab->table->pos_in_table_list->embedding; + /* and we've entered an inner side of an outer join*/ + if (emb && emb->on_expr) + dupsweedout_tables |= emb->nested_join->used_tables; + } + + /* If this is the last table that we need for DuplicateWeedout range */ + if (dupsweedout_tables && !(remaining_tables & ~new_join_tab->table->map & + dupsweedout_tables)) + { + /* + Ok, reached a state where we could put a dups weedout point. + Walk back and calculate + - the join cost (this is needed as the accumulated cost may assume + some other duplicate elimination method) + - extra fanout that will be removed by duplicate elimination + - duplicate elimination cost + There are two cases: + 1. We have other strategy/ies to remove all of the duplicates. + 2. We don't. - /* - Use the strategy if - * it is cheaper then what we've had, or - * we haven't picked any other semi-join strategy yet - The second part is necessary because this strategy is the last one - to consider (it needs "the most" tables in the prefix) and we can't - leave duplicate-producing tables not handled by any strategy. - */ - if (dups_cost < *current_read_time || join->cur_dups_producing_tables) + We need to calculate the cost in case #2 also because we need to make + choice between this join order and others. + */ + uint first_tab= first_dupsweedout_table; + double dups_cost; + double prefix_rec_count; + double sj_inner_fanout= 1.0; + double sj_outer_fanout= 1.0; + uint temptable_rec_size; + if (first_tab == join->const_tables) + { + prefix_rec_count= 1.0; + temptable_rec_size= 0; + dups_cost= 0.0; + } + else + { + dups_cost= join->positions[first_tab - 1].prefix_cost.total_cost(); + prefix_rec_count= join->positions[first_tab - 1].prefix_record_count; + temptable_rec_size= 8; /* This is not true but we'll make it so */ + } + + table_map dups_removed_fanout= 0; + for (uint j= first_dupsweedout_table; j <= idx; j++) + { + POSITION *p= join->positions + j; + dups_cost += p->read_time; + if (p->table->emb_sj_nest) + { + sj_inner_fanout *= p->records_read; + dups_removed_fanout |= p->table->table->map; + } + else { - pos->sj_strategy= SJ_OPT_DUPS_WEEDOUT; - *current_read_time= dups_cost; - *current_record_count= prefix_rec_count * sj_outer_fanout; - join->cur_dups_producing_tables &= ~dups_removed_fanout; + sj_outer_fanout *= p->records_read; + temptable_rec_size += p->table->table->file->ref_length; } } + + /* + Add the cost of temptable use. The table will have sj_outer_fanout + records, and we will make + - sj_outer_fanout table writes + - sj_inner_fanout*sj_outer_fanout lookups. + + */ + double one_lookup_cost= get_tmp_table_lookup_cost(join->thd, + sj_outer_fanout, + temptable_rec_size); + double one_write_cost= get_tmp_table_write_cost(join->thd, + sj_outer_fanout, + temptable_rec_size); + + double write_cost= join->positions[first_tab].prefix_record_count* + sj_outer_fanout * one_write_cost; + double full_lookup_cost= join->positions[first_tab].prefix_record_count* + sj_outer_fanout* sj_inner_fanout * + one_lookup_cost; + dups_cost += write_cost + full_lookup_cost; + + *read_time= dups_cost; + *record_count= prefix_rec_count * sj_outer_fanout; + *handled_fanout= dups_removed_fanout; + *strategy= SJ_OPT_DUPS_WEEDOUT; + return TRUE; } + return FALSE; } @@ -2836,11 +2974,11 @@ void fix_semijoin_strategies_for_picked_join_order(JOIN *join) } else if (pos->sj_strategy == SJ_OPT_MATERIALIZE_SCAN) { - POSITION *first_inner= join->best_positions + pos->sjm_scan_last_inner; + POSITION *first_inner= join->best_positions + pos->sjmat_picker.sjm_scan_last_inner; SJ_MATERIALIZATION_INFO *sjm= first_inner->table->emb_sj_nest->sj_mat_info; sjm->is_used= TRUE; sjm->is_sj_scan= TRUE; - first= pos->sjm_scan_last_inner - sjm->tables + 1; + first= pos->sjmat_picker.sjm_scan_last_inner - sjm->tables + 1; memcpy(join->best_positions + first, sjm->positions, sizeof(POSITION) * sjm->tables); join->best_positions[first].sj_strategy= SJ_OPT_MATERIALIZE_SCAN; @@ -2878,7 +3016,7 @@ void fix_semijoin_strategies_for_picked_join_order(JOIN *join) if (pos->sj_strategy == SJ_OPT_FIRST_MATCH) { - first= pos->first_firstmatch_table; + first= pos->firstmatch_picker.first_firstmatch_table; join->best_positions[first].sj_strategy= SJ_OPT_FIRST_MATCH; join->best_positions[first].n_sj_tables= tablenr - first + 1; POSITION dummy; // For loose scan paths @@ -2911,7 +3049,7 @@ void fix_semijoin_strategies_for_picked_join_order(JOIN *join) if (pos->sj_strategy == SJ_OPT_LOOSE_SCAN) { - first= pos->first_loosescan_table; + first= pos->loosescan_picker.first_loosescan_table; POSITION *first_pos= join->best_positions + first; POSITION loose_scan_pos; // For loose scan paths double record_count= (first== join->const_tables)? 1.0: @@ -2950,7 +3088,7 @@ void fix_semijoin_strategies_for_picked_join_order(JOIN *join) Duplicate Weedout starting at pos->first_dupsweedout_table, ending at this table. */ - first= pos->first_dupsweedout_table; + first= pos->dups_weedout_picker.first_dupsweedout_table; join->best_positions[first].sj_strategy= SJ_OPT_DUPS_WEEDOUT; join->best_positions[first].n_sj_tables= tablenr - first + 1; } @@ -3893,8 +4031,8 @@ int setup_semijoin_dups_elimination(JOIN *join, ulonglong options, /* Calculate key length */ keylen= 0; - keyno= pos->loosescan_key; - for (uint kp=0; kp < pos->loosescan_parts; kp++) + keyno= pos->loosescan_picker.loosescan_key; + for (uint kp=0; kp < pos->loosescan_picker.loosescan_parts; kp++) keylen += tab->table->key_info[keyno].key_part[kp].store_length; tab->loosescan_key_len= keylen; diff --git a/sql/opt_subselect.h b/sql/opt_subselect.h index 571fcbaa935..7d560588995 100644 --- a/sql/opt_subselect.h +++ b/sql/opt_subselect.h @@ -263,8 +263,8 @@ public: { pos->records_read= best_loose_scan_records; pos->key= best_loose_scan_start_key; - pos->loosescan_key= best_loose_scan_key; - pos->loosescan_parts= best_max_loose_keypart + 1; + pos->loosescan_picker.loosescan_key= best_loose_scan_key; + pos->loosescan_picker.loosescan_parts= best_max_loose_keypart + 1; pos->use_join_buffer= FALSE; pos->table= tab; // todo need ref_depend_map ? @@ -277,8 +277,7 @@ public: }; -void advance_sj_state(JOIN *join, const table_map remaining_tables, - const JOIN_TAB *new_join_tab, uint idx, +void advance_sj_state(JOIN *join, const table_map remaining_tables, uint idx, double *current_record_count, double *current_read_time, POSITION *loose_scan_pos); void restore_prev_sj_state(const table_map remaining_tables, diff --git a/sql/sql_select.cc b/sql/sql_select.cc index b9088a7ce60..ce20dfea32e 100644 --- a/sql/sql_select.cc +++ b/sql/sql_select.cc @@ -85,7 +85,7 @@ static int join_tab_cmp_embedded_first(const void *emb, const void* ptr1, const static bool find_best(JOIN *join,table_map rest_tables,uint index, double record_count,double read_time); static uint cache_record_length(JOIN *join,uint index); -static bool get_best_combination(JOIN *join); +bool get_best_combination(JOIN *join); static store_key *get_store_key(THD *thd, KEYUSE *keyuse, table_map used_tables, KEY_PART_INFO *key_part, uchar *key_buff, @@ -4883,7 +4883,7 @@ void set_position(JOIN *join,uint idx,JOIN_TAB *table,KEYUSE *key) join->positions[idx].records_read=1.0; /* This is a const table */ join->positions[idx].ref_depend_map= 0; - join->positions[idx].loosescan_key= MAX_KEY; /* Not a LooseScan */ +// join->positions[idx].loosescan_key= MAX_KEY; /* Not a LooseScan */ join->positions[idx].sj_strategy= SJ_OPT_NONE; join->positions[idx].use_join_buffer= FALSE; @@ -5533,7 +5533,7 @@ best_access_path(JOIN *join, pos->key= best_key; pos->table= s; pos->ref_depend_map= best_ref_depends_map; - pos->loosescan_key= MAX_KEY; + pos->loosescan_picker.loosescan_key= MAX_KEY; pos->use_join_buffer= best_uses_jbuf; loose_scan_opt.save_to_position(s, loose_scan_pos); @@ -5840,7 +5840,7 @@ optimize_straight_join(JOIN *join, table_map join_tables) /* compute the cost of the new plan extended with 's' */ record_count*= join->positions[idx].records_read; read_time+= join->positions[idx].read_time; - advance_sj_state(join, join_tables, s, idx, &record_count, &read_time, + advance_sj_state(join, join_tables, idx, &record_count, &read_time, &loose_scan_pos); join_tables&= ~(s->table->map); @@ -6356,7 +6356,7 @@ best_extension_by_limited_search(JOIN *join, current_record_count= record_count * position->records_read; current_read_time= read_time + position->read_time; - advance_sj_state(join, remaining_tables, s, idx, ¤t_record_count, + advance_sj_state(join, remaining_tables, idx, ¤t_record_count, ¤t_read_time, &loose_scan_pos); /* Expand only partial plans with lower cost than the best QEP so far */ @@ -6513,7 +6513,7 @@ find_best(JOIN *join,table_map rest_tables,uint idx,double record_count, */ double current_record_count=record_count*records; double current_read_time=read_time+best; - advance_sj_state(join, rest_tables, s, idx, ¤t_record_count, + advance_sj_state(join, rest_tables, idx, ¤t_record_count, ¤t_read_time, &loose_scan_pos); if (best_record_count > current_record_count || @@ -7013,7 +7013,7 @@ static Item * const null_ptr= NULL; TRUE Out of memory */ -static bool +bool get_best_combination(JOIN *join) { uint tablenr; @@ -7091,13 +7091,6 @@ get_best_combination(JOIN *join) *j= *join->best_positions[tablenr].table; -#if 0 -/* SJ-Materialization is represented with join tab ranges */ - if (j->sj_strategy == SJ_OPT_MATERIALIZE || - j->sj_strategy == SJ_OPT_MATERIALIZE) - j->sj_strategy= SJ_OPT_NONE; -#endif - j->bush_root_tab= sjm_nest_root; form=join->table[tablenr]=j->table; @@ -7120,7 +7113,7 @@ get_best_combination(JOIN *join) (join->best_positions[tablenr].sj_strategy == SJ_OPT_LOOSE_SCAN)) { j->type=JT_ALL; - j->index= join->best_positions[tablenr].loosescan_key; + j->index= join->best_positions[tablenr].loosescan_picker.loosescan_key; if (tablenr != join->const_tables) join->full_join=1; } diff --git a/sql/sql_select.h b/sql/sql_select.h index 5476ef9b46c..21c5dc7d65e 100644 --- a/sql/sql_select.h +++ b/sql/sql_select.h @@ -158,6 +158,17 @@ enum enum_nested_loop_state }; +/* Possible sj_strategy values */ +enum sj_strategy_enum +{ + SJ_OPT_NONE=0, + SJ_OPT_DUPS_WEEDOUT=1, + SJ_OPT_LOOSE_SCAN =2, + SJ_OPT_FIRST_MATCH =3, + SJ_OPT_MATERIALIZE =4, + SJ_OPT_MATERIALIZE_SCAN=5 +}; + /* Values for JOIN_TAB::packed_info */ #define TAB_INFO_HAVE_VALUE 1 #define TAB_INFO_USING_INDEX 2 @@ -374,7 +385,7 @@ typedef struct st_join_table { POSITION::sj_strategy field. This field is set up by the fix_semijoin_strategies_for_picked_join_order. */ - uint sj_strategy; + enum sj_strategy_enum sj_strategy; uint n_sj_tables; @@ -496,6 +507,214 @@ enum_nested_loop_state end_write_group(JOIN *join, JOIN_TAB *join_tab __attribute__((unused)), bool end_of_records); +/* psergey */ + + +struct st_position; + +class Semi_join_strategy_picker +{ +public: + /* Called when starting to build a new join prefix */ + virtual void set_empty() = 0; + + /* + Update internal state after another table has been added to the join + prefix + */ + virtual void set_from_prev(struct st_position *prev) = 0; + + virtual bool check_qep(JOIN *join, + uint idx, + table_map remaining_tables, + const JOIN_TAB *new_join_tab, + double *record_count, + double *read_time, + table_map *handled_fanout, + sj_strategy_enum *strategy, + struct st_position *loose_scan_pos) = 0; + + virtual void mark_used() = 0; + + virtual ~Semi_join_strategy_picker() {} +}; + + +/* + Duplicate Weedout strategy optimization state +*/ + +class Duplicate_weedout_picker : public Semi_join_strategy_picker +{ + /* The first table that the strategy will need to handle */ + uint first_dupsweedout_table; + + /* + Tables that we will need to have in the prefix to do the weedout step + (all inner and all outer that the involved semi-joins are correlated with) + */ + table_map dupsweedout_tables; + + bool is_used; +public: + void set_empty() + { + dupsweedout_tables= 0; + first_dupsweedout_table= MAX_TABLES; + is_used= FALSE; + } + void set_from_prev(struct st_position *prev); + + bool check_qep(JOIN *join, + uint idx, + table_map remaining_tables, + const JOIN_TAB *new_join_tab, + double *record_count, + double *read_time, + table_map *handled_fanout, + sj_strategy_enum *stratey, + struct st_position *loose_scan_pos); + + void mark_used() { is_used= TRUE; } + friend void fix_semijoin_strategies_for_picked_join_order(JOIN *join); +}; + + +class Firstmatch_picker : public Semi_join_strategy_picker +{ + /* + Index of the first inner table that we intend to handle with this + strategy + */ + uint first_firstmatch_table; + /* + Tables that were not in the join prefix when we've started considering + FirstMatch strategy. + */ + table_map first_firstmatch_rtbl; + /* + Tables that need to be in the prefix before we can calculate the cost + of using FirstMatch strategy. + */ + table_map firstmatch_need_tables; + + bool is_used; + + bool in_firstmatch_prefix() { return (first_firstmatch_table != MAX_TABLES); } + void invalidate_firstmatch_prefix() { first_firstmatch_table= MAX_TABLES; } +public: + void set_empty() + { + invalidate_firstmatch_prefix(); + is_used= FALSE; + } + + void set_from_prev(struct st_position *prev); + bool check_qep(JOIN *join, + uint idx, + table_map remaining_tables, + const JOIN_TAB *new_join_tab, + double *record_count, + double *read_time, + table_map *handled_fanout, + sj_strategy_enum *strategy, + struct st_position *loose_scan_pos); + + void mark_used() { is_used= TRUE; } + friend void fix_semijoin_strategies_for_picked_join_order(JOIN *join); +}; + + +class LooseScan_picker : public Semi_join_strategy_picker +{ + /* The first (i.e. driving) table we're doing loose scan for */ + uint first_loosescan_table; + /* + Tables that need to be in the prefix before we can calculate the cost + of using LooseScan strategy. + */ + table_map loosescan_need_tables; + + /* + keyno - Planning to do LooseScan on this key. If keyuse is NULL then + this is a full index scan, otherwise this is a ref+loosescan + scan (and keyno matches the KEUSE's) + MAX_KEY - Not doing a LooseScan + */ + uint loosescan_key; // final (one for strategy instance ) + uint loosescan_parts; /* Number of keyparts to be kept distinct */ + + bool is_used; +public: + void set_empty() + { + first_loosescan_table= MAX_TABLES; + is_used= FALSE; + } + + void set_from_prev(struct st_position *prev); + bool check_qep(JOIN *join, + uint idx, + table_map remaining_tables, + const JOIN_TAB *new_join_tab, + double *record_count, + double *read_time, + table_map *handled_fanout, + sj_strategy_enum *strategy, + struct st_position *loose_scan_pos); + void mark_used() { is_used= TRUE; } + + friend class Loose_scan_opt; + friend void best_access_path(JOIN *join, + JOIN_TAB *s, + table_map remaining_tables, + uint idx, + bool disable_jbuf, + double record_count, + struct st_position *pos, + struct st_position *loose_scan_pos); + friend bool get_best_combination(JOIN *join); + friend int setup_semijoin_dups_elimination(JOIN *join, ulonglong options, + uint no_jbuf_after); + friend void fix_semijoin_strategies_for_picked_join_order(JOIN *join); +}; + + +class Sj_materialization_picker : public Semi_join_strategy_picker +{ + bool is_used; + + /* The last inner table (valid once we're after it) */ + uint sjm_scan_last_inner; + /* + Tables that we need to have in the prefix to calculate the correct cost. + Basically, we need all inner tables and outer tables mentioned in the + semi-join's ON expression so we can correctly account for fanout. + */ + table_map sjm_scan_need_tables; + +public: + void set_empty() + { + sjm_scan_need_tables= 0; + LINT_INIT(sjm_scan_last_inner); + is_used= FALSE; + } + void set_from_prev(struct st_position *prev); + bool check_qep(JOIN *join, + uint idx, + table_map remaining_tables, + const JOIN_TAB *new_join_tab, + double *record_count, + double *read_time, + table_map *handled_fanout, + sj_strategy_enum *strategy, + struct st_position *loose_scan_pos); + void mark_used() { is_used= TRUE; } + + friend void fix_semijoin_strategies_for_picked_join_order(JOIN *join); +}; + /** Information about a position of table within a join order. Used in join @@ -503,6 +722,9 @@ end_write_group(JOIN *join, JOIN_TAB *join_tab __attribute__((unused)), */ typedef struct st_position { + /* The table that's put into join order */ + JOIN_TAB *table; + /* The "fanout": number of output rows that will be produced (after pushed down selection condition is applied) per each row combination of @@ -516,7 +738,10 @@ typedef struct st_position number the access method will be invoked. */ double read_time; - JOIN_TAB *table; + + /* Cumulative cost and record count for the join prefix */ + COST_VECT prefix_cost; + double prefix_record_count; /* NULL - 'index' or 'range' or 'index_merge' or 'ALL' access is used. @@ -526,14 +751,13 @@ typedef struct st_position /* If ref-based access is used: bitmap of tables this table depends on */ table_map ref_depend_map; - - bool use_join_buffer; - - - /* These form a stack of partial join order costs and output sizes */ - COST_VECT prefix_cost; - double prefix_record_count; - + + /* + TRUE <=> join buffering will be used. At the moment this is based on + *very* imprecise guesses made in best_access_path(). + */ + bool use_join_buffer; + /* Current optimization state: Semi-join strategy to be used for this and preceding join tables. @@ -546,7 +770,8 @@ typedef struct st_position this applies to. The values of covered_preceding_positions->sj_strategy must be ignored. */ - uint sj_strategy; + enum sj_strategy_enum sj_strategy; + /* Valid only after fix_semijoin_strategies_for_picked_join_order() call: if sj_strategy!=SJ_OPT_NONE, this is the number of subsequent tables that @@ -554,67 +779,15 @@ typedef struct st_position */ uint n_sj_tables; -/* LooseScan strategy members */ - - /* The first (i.e. driving) table we're doing loose scan for */ - uint first_loosescan_table; - /* - Tables that need to be in the prefix before we can calculate the cost - of using LooseScan strategy. - */ - table_map loosescan_need_tables; - - /* - keyno - Planning to do LooseScan on this key. If keyuse is NULL then - this is a full index scan, otherwise this is a ref+loosescan - scan (and keyno matches the KEUSE's) - MAX_KEY - Not doing a LooseScan - */ - uint loosescan_key; // final (one for strategy instance ) - uint loosescan_parts; /* Number of keyparts to be kept distinct */ - -/* FirstMatch strategy */ - /* - Index of the first inner table that we intend to handle with this - strategy - */ - uint first_firstmatch_table; - /* - Tables that were not in the join prefix when we've started considering - FirstMatch strategy. - */ - table_map first_firstmatch_rtbl; - /* - Tables that need to be in the prefix before we can calculate the cost - of using FirstMatch strategy. - */ - table_map firstmatch_need_tables; - - bool in_firstmatch_prefix() { return (first_firstmatch_table != MAX_TABLES); } - void invalidate_firstmatch_prefix() { first_firstmatch_table= MAX_TABLES; } - -/* Duplicate Weedout strategy */ - /* The first table that the strategy will need to handle */ - uint first_dupsweedout_table; - /* - Tables that we will need to have in the prefix to do the weedout step - (all inner and all outer that the involved semi-joins are correlated with) - */ - table_map dupsweedout_tables; - -/* SJ-Materialization-Scan strategy */ - /* The last inner table (valid once we're after it) */ - uint sjm_scan_last_inner; - /* - Tables that we need to have in the prefix to calculate the correct cost. - Basically, we need all inner tables and outer tables mentioned in the - semi-join's ON expression so we can correctly account for fanout. - */ - table_map sjm_scan_need_tables; - table_map prefix_dups_producing_tables; -} POSITION; + table_map inner_tables_handled_with_other_sjs; + + Duplicate_weedout_picker dups_weedout_picker; + Firstmatch_picker firstmatch_picker; + LooseScan_picker loosescan_picker; + Sj_materialization_picker sjmat_picker; +} POSITION; typedef struct st_rollup { @@ -626,18 +799,6 @@ typedef struct st_rollup } ROLLUP; -#define SJ_OPT_NONE 0 -#define SJ_OPT_DUPS_WEEDOUT 1 -#define SJ_OPT_LOOSE_SCAN 2 -#define SJ_OPT_FIRST_MATCH 3 -#define SJ_OPT_MATERIALIZE 4 -#define SJ_OPT_MATERIALIZE_SCAN 5 - -inline bool sj_is_materialize_strategy(uint strategy) -{ - return strategy >= SJ_OPT_MATERIALIZE; -} - class JOIN_TAB_RANGE: public Sql_alloc { public: @@ -808,7 +969,7 @@ public: they produce. */ table_map cur_dups_producing_tables; - + /* We also maintain a stack of join optimization states in * join->positions[] */ /******* Join optimization state members end *******/ /* -- cgit v1.2.1 From f84dbf4b205f311f379a8fd0c6dc1f0fd893e073 Mon Sep 17 00:00:00 2001 From: Sergey Petrunya Date: Fri, 25 Nov 2011 05:56:58 +0400 Subject: Semi-join optimizations code cleanup part 2: - Make EXPLAIN display "Start temporary" at the start of the fanout (it used to display at the first table whose rowid gets into temp. table which is not that useful for the user) - Updated test results (all checked) --- sql/opt_subselect.cc | 153 ++++++++++++++++++++++++++++----------------------- sql/opt_subselect.h | 3 +- sql/sql_select.cc | 10 ++-- sql/sql_select.h | 2 + 4 files changed, 93 insertions(+), 75 deletions(-) (limited to 'sql') diff --git a/sql/opt_subselect.cc b/sql/opt_subselect.cc index c7d07c21746..79eb0cb9551 100644 --- a/sql/opt_subselect.cc +++ b/sql/opt_subselect.cc @@ -2168,7 +2168,8 @@ bool find_eq_ref_candidate(TABLE *table, table_map sj_inner_tables) See setup_semijoin_dups_elimination() for a description of what kinds of join prefixes each strategy can handle. */ -bool is_multiple_semi_joins(POSITION *prefix, uint idx, table_map inner_tables) + +bool is_multiple_semi_joins(JOIN *join, POSITION *prefix, uint idx, table_map inner_tables) { for (int i= (int)idx; i >= 0; i--) { @@ -2176,7 +2177,8 @@ bool is_multiple_semi_joins(POSITION *prefix, uint idx, table_map inner_tables) if ((emb_sj_nest= prefix[i].table->emb_sj_nest)) { if (inner_tables & emb_sj_nest->sj_inner_tables) - return !test(inner_tables == emb_sj_nest->sj_inner_tables); + return !test(inner_tables == (emb_sj_nest->sj_inner_tables & + ~join->const_table_map)); } } return FALSE; @@ -2206,6 +2208,7 @@ void advance_sj_state(JOIN *join, table_map remaining_tables, uint idx, - attempts to build semi-join strategies here will confuse the optimizer, so bail out. */ + pos->sj_strategy= SJ_OPT_NONE; return; } @@ -2290,7 +2293,7 @@ void advance_sj_state(JOIN *join, table_map remaining_tables, uint idx, join->cur_dups_producing_tables &= ~handled_fanout; //TODO: update bitmap of semi-joins that were handled together with // others. - if (is_multiple_semi_joins(join->positions, idx, handled_fanout)) + if (is_multiple_semi_joins(join, join->positions, idx, handled_fanout)) pos->inner_tables_handled_with_other_sjs |= handled_fanout; } else @@ -3906,6 +3909,80 @@ int do_sj_dups_weedout(THD *thd, SJ_TMP_TABLE *sjtbl) } +int init_dups_weedout(JOIN *join, uint first_table, int first_fanout_table, uint n_tables) +{ + THD *thd= join->thd; + DBUG_ENTER("init_dups_weedout"); + SJ_TMP_TABLE::TAB sjtabs[MAX_TABLES]; + SJ_TMP_TABLE::TAB *last_tab= sjtabs; + uint jt_rowid_offset= 0; // # tuple bytes are already occupied (w/o NULL bytes) + uint jt_null_bits= 0; // # null bits in tuple bytes + /* + Walk through the range and remember + - tables that need their rowids to be put into temptable + - the last outer table + */ + for (JOIN_TAB *j=join->join_tab + first_table; + j < join->join_tab + first_table + n_tables; j++) + { + if (sj_table_is_included(join, j)) + { + last_tab->join_tab= j; + last_tab->rowid_offset= jt_rowid_offset; + jt_rowid_offset += j->table->file->ref_length; + if (j->table->maybe_null) + { + last_tab->null_byte= jt_null_bits / 8; + last_tab->null_bit= jt_null_bits++; + } + last_tab++; + j->table->prepare_for_position(); + j->keep_current_rowid= TRUE; + } + } + + SJ_TMP_TABLE *sjtbl; + if (jt_rowid_offset) /* Temptable has at least one rowid */ + { + size_t tabs_size= (last_tab - sjtabs) * sizeof(SJ_TMP_TABLE::TAB); + if (!(sjtbl= (SJ_TMP_TABLE*)thd->alloc(sizeof(SJ_TMP_TABLE))) || + !(sjtbl->tabs= (SJ_TMP_TABLE::TAB*) thd->alloc(tabs_size))) + DBUG_RETURN(TRUE); /* purecov: inspected */ + memcpy(sjtbl->tabs, sjtabs, tabs_size); + sjtbl->is_degenerate= FALSE; + sjtbl->tabs_end= sjtbl->tabs + (last_tab - sjtabs); + sjtbl->rowid_len= jt_rowid_offset; + sjtbl->null_bits= jt_null_bits; + sjtbl->null_bytes= (jt_null_bits + 7)/8; + sjtbl->tmp_table= + create_duplicate_weedout_tmp_table(thd, + sjtbl->rowid_len + + sjtbl->null_bytes, + sjtbl); + join->sj_tmp_tables.push_back(sjtbl->tmp_table); + } + else + { + /* + This is a special case where the entire subquery predicate does + not depend on anything at all, ie this is + WHERE const IN (uncorrelated select) + */ + if (!(sjtbl= (SJ_TMP_TABLE*)thd->alloc(sizeof(SJ_TMP_TABLE)))) + DBUG_RETURN(TRUE); /* purecov: inspected */ + sjtbl->tmp_table= NULL; + sjtbl->is_degenerate= TRUE; + sjtbl->have_degenerate_row= FALSE; + } + + sjtbl->next_flush_table= join->join_tab[first_table].flush_weedout_table; + join->join_tab[first_table].flush_weedout_table= sjtbl; + join->join_tab[first_fanout_table].first_weedout_table= sjtbl; + join->join_tab[first_table + n_tables - 1].check_weed_out_table= sjtbl; + DBUG_RETURN(0); +} + + /* Setup the strategies to eliminate semi-join duplicates. @@ -4006,9 +4083,9 @@ int setup_semijoin_dups_elimination(JOIN *join, ulonglong options, uint no_jbuf_after) { uint i; - THD *thd= join->thd; DBUG_ENTER("setup_semijoin_dups_elimination"); - + + POSITION *pos= join->best_positions + join->const_tables; for (i= join->const_tables ; i < join->top_join_tab_count; ) { @@ -4049,6 +4126,7 @@ int setup_semijoin_dups_elimination(JOIN *join, ulonglong options, forwards, but do not destroy other duplicate elimination methods. */ uint first_table= i; + uint join_cache_level= join->thd->variables.join_cache_level; for (uint j= i; j < i + pos->n_sj_tables; j++) { @@ -4072,70 +4150,7 @@ int setup_semijoin_dups_elimination(JOIN *join, ulonglong options, } } - SJ_TMP_TABLE::TAB sjtabs[MAX_TABLES]; - SJ_TMP_TABLE::TAB *last_tab= sjtabs; - uint jt_rowid_offset= 0; // # tuple bytes are already occupied (w/o NULL bytes) - uint jt_null_bits= 0; // # null bits in tuple bytes - /* - Walk through the range and remember - - tables that need their rowids to be put into temptable - - the last outer table - */ - for (JOIN_TAB *j=join->join_tab + first_table; - j < join->join_tab + i + pos->n_sj_tables; j++) - { - if (sj_table_is_included(join, j)) - { - last_tab->join_tab= j; - last_tab->rowid_offset= jt_rowid_offset; - jt_rowid_offset += j->table->file->ref_length; - if (j->table->maybe_null) - { - last_tab->null_byte= jt_null_bits / 8; - last_tab->null_bit= jt_null_bits++; - } - last_tab++; - j->table->prepare_for_position(); - j->keep_current_rowid= TRUE; - } - } - - SJ_TMP_TABLE *sjtbl; - if (jt_rowid_offset) /* Temptable has at least one rowid */ - { - size_t tabs_size= (last_tab - sjtabs) * sizeof(SJ_TMP_TABLE::TAB); - if (!(sjtbl= (SJ_TMP_TABLE*)thd->alloc(sizeof(SJ_TMP_TABLE))) || - !(sjtbl->tabs= (SJ_TMP_TABLE::TAB*) thd->alloc(tabs_size))) - DBUG_RETURN(TRUE); /* purecov: inspected */ - memcpy(sjtbl->tabs, sjtabs, tabs_size); - sjtbl->is_degenerate= FALSE; - sjtbl->tabs_end= sjtbl->tabs + (last_tab - sjtabs); - sjtbl->rowid_len= jt_rowid_offset; - sjtbl->null_bits= jt_null_bits; - sjtbl->null_bytes= (jt_null_bits + 7)/8; - sjtbl->tmp_table= - create_duplicate_weedout_tmp_table(thd, - sjtbl->rowid_len + - sjtbl->null_bytes, - sjtbl); - join->sj_tmp_tables.push_back(sjtbl->tmp_table); - } - else - { - /* - This is a special case where the entire subquery predicate does - not depend on anything at all, ie this is - WHERE const IN (uncorrelated select) - */ - if (!(sjtbl= (SJ_TMP_TABLE*)thd->alloc(sizeof(SJ_TMP_TABLE)))) - DBUG_RETURN(TRUE); /* purecov: inspected */ - sjtbl->tmp_table= NULL; - sjtbl->is_degenerate= TRUE; - sjtbl->have_degenerate_row= FALSE; - } - join->join_tab[first_table].flush_weedout_table= sjtbl; - join->join_tab[i + pos->n_sj_tables - 1].check_weed_out_table= sjtbl; - + init_dups_weedout(join, first_table, i, i + pos->n_sj_tables - first_table); i+= pos->n_sj_tables; pos+= pos->n_sj_tables; break; diff --git a/sql/opt_subselect.h b/sql/opt_subselect.h index 7d560588995..0e7fa22cfe7 100644 --- a/sql/opt_subselect.h +++ b/sql/opt_subselect.h @@ -358,8 +358,7 @@ public: ENGINE_COLUMNDEF *start_recinfo; ENGINE_COLUMNDEF *recinfo; - /* Pointer to next table (next->start_idx > this->end_idx) */ - SJ_TMP_TABLE *next; + SJ_TMP_TABLE *next_flush_table; }; int setup_semijoin_dups_elimination(JOIN *join, ulonglong options, diff --git a/sql/sql_select.cc b/sql/sql_select.cc index ce20dfea32e..f56cb920a3d 100644 --- a/sql/sql_select.cc +++ b/sql/sql_select.cc @@ -15087,10 +15087,12 @@ sub_select(JOIN *join,JOIN_TAB *join_tab,bool end_of_records) int error; enum_nested_loop_state rc= NESTED_LOOP_OK; READ_RECORD *info= &join_tab->read_record; - - if (join_tab->flush_weedout_table) + + for (SJ_TMP_TABLE *flush_dups_table= join_tab->flush_weedout_table; + flush_dups_table; + flush_dups_table= flush_dups_table->next_flush_table) { - do_sj_reset(join_tab->flush_weedout_table); + do_sj_reset(flush_dups_table); } if (!join_tab->preread_init_done && join_tab->preread_init()) @@ -21013,7 +21015,7 @@ static void select_describe(JOIN *join, bool need_tmp_table, bool need_order, extra.append(STRING_WITH_LEN("; LooseScan")); } - if (tab->flush_weedout_table) + if (tab->first_weedout_table) extra.append(STRING_WITH_LEN("; Start temporary")); if (tab->check_weed_out_table) extra.append(STRING_WITH_LEN("; End temporary")); diff --git a/sql/sql_select.h b/sql/sql_select.h index 21c5dc7d65e..2b307c20dbe 100644 --- a/sql/sql_select.h +++ b/sql/sql_select.h @@ -341,6 +341,8 @@ typedef struct st_join_table { /* Variables for semi-join duplicate elimination */ SJ_TMP_TABLE *flush_weedout_table; SJ_TMP_TABLE *check_weed_out_table; + /* for EXPLAIN only: */ + SJ_TMP_TABLE *first_weedout_table; /* If set, means we should stop join enumeration after we've got the first -- cgit v1.2.1 From 69e7b1887696a88472d25951341d5bb1df134c0b Mon Sep 17 00:00:00 2001 From: Sergey Petrunya Date: Fri, 25 Nov 2011 14:57:27 +0400 Subject: Remove garbage comments --- sql/opt_subselect.cc | 8 -------- 1 file changed, 8 deletions(-) (limited to 'sql') diff --git a/sql/opt_subselect.cc b/sql/opt_subselect.cc index 45b8b2facf1..d9f02d381e4 100644 --- a/sql/opt_subselect.cc +++ b/sql/opt_subselect.cc @@ -2217,16 +2217,8 @@ void advance_sj_state(JOIN *join, table_map remaining_tables, uint idx, pos->prefix_dups_producing_tables= join->cur_dups_producing_tables; TABLE_LIST *emb_sj_nest; if ((emb_sj_nest= new_join_tab->emb_sj_nest)) - { - /// join->cur_sj_inner_tables |= emb_sj_nest->sj_inner_tables; join->cur_dups_producing_tables |= emb_sj_nest->sj_inner_tables; - /* Remove the sj_nest if all of its SJ-inner tables are in cur_table_map */ - /// if (!(remaining_tables & - /// emb_sj_nest->sj_inner_tables & ~new_join_tab->table->map)) - /// join->cur_sj_inner_tables &= ~emb_sj_nest->sj_inner_tables; - } - Semi_join_strategy_picker **strategy; if (idx == join->const_tables) { -- cgit v1.2.1 From 8325848b6ec4189b8339355689060a19d226a92f Mon Sep 17 00:00:00 2001 From: Sergey Petrunya Date: Fri, 25 Nov 2011 23:54:36 +0400 Subject: Subquery code cleanups: - Make functions that operate on SJ_TMP_TABLE be member functions - Make Loose_scan_opt data members private --- sql/opt_subselect.cc | 86 +++++++++++++++++++++++---------------------------- sql/opt_subselect.h | 10 +++--- sql/sql_join_cache.cc | 2 +- sql/sql_select.cc | 6 ++-- 4 files changed, 48 insertions(+), 56 deletions(-) (limited to 'sql') diff --git a/sql/opt_subselect.cc b/sql/opt_subselect.cc index d9f02d381e4..f8e495de9bf 100644 --- a/sql/opt_subselect.cc +++ b/sql/opt_subselect.cc @@ -3472,12 +3472,8 @@ static bool is_cond_sj_in_equality(Item *item) SYNOPSIS - create_duplicate_weedout_tmp_table() + create_sj_weedout_tmp_table() thd Thread handle - uniq_tuple_length_arg Length of the table's column - sjtbl Update sjtbl->[start_]recinfo values which - will be needed if we'll need to convert the - created temptable from HEAP to MyISAM/Maria. DESCRIPTION Create a temporary table to weed out duplicate rowid combinations. The @@ -3502,9 +3498,8 @@ static bool is_cond_sj_in_equality(Item *item) NULL on error */ -TABLE *create_duplicate_weedout_tmp_table(THD *thd, - uint uniq_tuple_length_arg, - SJ_TMP_TABLE *sjtbl) +bool +SJ_TMP_TABLE::create_sj_weedout_tmp_table(THD *thd) { MEM_ROOT *mem_root_save, own_root; TABLE *table; @@ -3517,15 +3512,17 @@ TABLE *create_duplicate_weedout_tmp_table(THD *thd, uchar *group_buff; uchar *bitmaps; uint *blob_field; - ENGINE_COLUMNDEF *recinfo, *start_recinfo; bool using_unique_constraint=FALSE; bool use_packed_rows= FALSE; Field *field, *key_field; uint null_pack_length, null_count; uchar *null_flags; uchar *pos; - DBUG_ENTER("create_duplicate_weedout_tmp_table"); - DBUG_ASSERT(!sjtbl->is_degenerate); + DBUG_ENTER("create_sj_weedout_tmp_table"); + DBUG_ASSERT(!is_degenerate); + + tmp_table= NULL; + uint uniq_tuple_length_arg= rowid_len + null_bytes; /* STEP 1: Get temporary table name */ @@ -3567,7 +3564,7 @@ TABLE *create_duplicate_weedout_tmp_table(THD *thd, { if (temp_pool_slot != MY_BIT_NONE) bitmap_lock_clear_bit(&temp_pool, temp_pool_slot); - DBUG_RETURN(NULL); + DBUG_RETURN(TRUE); } strmov(tmpname,path); @@ -3771,20 +3768,19 @@ TABLE *create_duplicate_weedout_tmp_table(THD *thd, if (create_internal_tmp_table(table, keyinfo, start_recinfo, &recinfo, 0)) goto err; } - sjtbl->start_recinfo= start_recinfo; - sjtbl->recinfo= recinfo; if (open_tmp_table(table)) goto err; thd->mem_root= mem_root_save; - DBUG_RETURN(table); + tmp_table= table; + DBUG_RETURN(FALSE); err: thd->mem_root= mem_root_save; free_tmp_table(thd,table); /* purecov: inspected */ if (temp_pool_slot != MY_BIT_NONE) bitmap_lock_clear_bit(&temp_pool, temp_pool_slot); - DBUG_RETURN(NULL); /* purecov: inspected */ + DBUG_RETURN(TRUE); /* purecov: inspected */ } @@ -3792,25 +3788,25 @@ err: SemiJoinDuplicateElimination: Reset the temporary table */ -int do_sj_reset(SJ_TMP_TABLE *sj_tbl) +int SJ_TMP_TABLE::sj_weedout_delete_rows() { - DBUG_ENTER("do_sj_reset"); - if (sj_tbl->tmp_table) + DBUG_ENTER("SJ_TMP_TABLE::sj_weedout_delete_rows"); + if (tmp_table) { - int rc= sj_tbl->tmp_table->file->ha_delete_all_rows(); + int rc= tmp_table->file->ha_delete_all_rows(); DBUG_RETURN(rc); } - sj_tbl->have_degenerate_row= FALSE; + have_degenerate_row= FALSE; DBUG_RETURN(0); } + /* SemiJoinDuplicateElimination: Weed out duplicate row combinations SYNPOSIS - do_sj_dups_weedout() + sj_weedout_check_row() thd Thread handle - sjtbl Duplicate weedout table DESCRIPTION Try storing current record combination of outer tables (i.e. their @@ -3823,47 +3819,47 @@ int do_sj_reset(SJ_TMP_TABLE *sj_tbl) 0 The row combination is not a duplicate (continue) */ -int do_sj_dups_weedout(THD *thd, SJ_TMP_TABLE *sjtbl) +int SJ_TMP_TABLE::sj_weedout_check_row(THD *thd) { int error; - SJ_TMP_TABLE::TAB *tab= sjtbl->tabs; - SJ_TMP_TABLE::TAB *tab_end= sjtbl->tabs_end; + SJ_TMP_TABLE::TAB *tab= tabs; + SJ_TMP_TABLE::TAB *tab_end= tabs_end; uchar *ptr; uchar *nulls_ptr; - DBUG_ENTER("do_sj_dups_weedout"); + DBUG_ENTER("SJ_TMP_TABLE::sj_weedout_check_row"); - if (sjtbl->is_degenerate) + if (is_degenerate) { - if (sjtbl->have_degenerate_row) + if (have_degenerate_row) DBUG_RETURN(1); - sjtbl->have_degenerate_row= TRUE; + have_degenerate_row= TRUE; DBUG_RETURN(0); } - ptr= sjtbl->tmp_table->record[0] + 1; + ptr= tmp_table->record[0] + 1; /* Put the the rowids tuple into table->record[0]: */ // 1. Store the length - if (((Field_varstring*)(sjtbl->tmp_table->field[0]))->length_bytes == 1) + if (((Field_varstring*)(tmp_table->field[0]))->length_bytes == 1) { - *ptr= (uchar)(sjtbl->rowid_len + sjtbl->null_bytes); + *ptr= (uchar)(rowid_len + null_bytes); ptr++; } else { - int2store(ptr, sjtbl->rowid_len + sjtbl->null_bytes); + int2store(ptr, rowid_len + null_bytes); ptr += 2; } nulls_ptr= ptr; // 2. Zero the null bytes - if (sjtbl->null_bytes) + if (null_bytes) { - bzero(ptr, sjtbl->null_bytes); - ptr += sjtbl->null_bytes; + bzero(ptr, null_bytes); + ptr += null_bytes; } // 3. Put the rowids @@ -3883,15 +3879,14 @@ int do_sj_dups_weedout(THD *thd, SJ_TMP_TABLE *sjtbl) } } - error= sjtbl->tmp_table->file->ha_write_tmp_row(sjtbl->tmp_table->record[0]); + error= tmp_table->file->ha_write_tmp_row(tmp_table->record[0]); if (error) { /* create_internal_tmp_table_from_heap will generate error if needed */ - if (!sjtbl->tmp_table->file->is_fatal_error(error, HA_CHECK_DUP)) + if (!tmp_table->file->is_fatal_error(error, HA_CHECK_DUP)) DBUG_RETURN(1); /* Duplicate */ - if (create_internal_tmp_table_from_heap(thd, sjtbl->tmp_table, - sjtbl->start_recinfo, - &sjtbl->recinfo, error, 1)) + if (create_internal_tmp_table_from_heap(thd, tmp_table, start_recinfo, + &recinfo, error, 1)) DBUG_RETURN(-1); } DBUG_RETURN(0); @@ -3943,11 +3938,8 @@ int init_dups_weedout(JOIN *join, uint first_table, int first_fanout_table, uint sjtbl->rowid_len= jt_rowid_offset; sjtbl->null_bits= jt_null_bits; sjtbl->null_bytes= (jt_null_bits + 7)/8; - sjtbl->tmp_table= - create_duplicate_weedout_tmp_table(thd, - sjtbl->rowid_len + - sjtbl->null_bytes, - sjtbl); + if (sjtbl->create_sj_weedout_tmp_table(thd)) + DBUG_RETURN(TRUE); join->sj_tmp_tables.push_back(sjtbl->tmp_table); } else diff --git a/sql/opt_subselect.h b/sql/opt_subselect.h index b5886ae06a2..94c6153f0c1 100644 --- a/sql/opt_subselect.h +++ b/sql/opt_subselect.h @@ -40,7 +40,6 @@ ulonglong get_bound_sj_equalities(TABLE_LIST *sj_nest, class Loose_scan_opt { -public: /* All methods must check this before doing anything else */ bool try_loosescan; @@ -71,6 +70,7 @@ public: uint best_max_loose_keypart; +public: Loose_scan_opt(): try_loosescan(FALSE), bound_sj_equalities(0), @@ -288,10 +288,6 @@ void fix_semijoin_strategies_for_picked_join_order(JOIN *join); bool setup_sj_materialization_part1(JOIN_TAB *sjm_tab); bool setup_sj_materialization_part2(JOIN_TAB *sjm_tab); -TABLE *create_duplicate_weedout_tmp_table(THD *thd, uint uniq_tuple_length_arg, - SJ_TMP_TABLE *sjtbl); -int do_sj_reset(SJ_TMP_TABLE *sj_tbl); -int do_sj_dups_weedout(THD *thd, SJ_TMP_TABLE *sjtbl); /* Temporary table used by semi-join DuplicateElimination strategy @@ -359,6 +355,10 @@ public: ENGINE_COLUMNDEF *recinfo; SJ_TMP_TABLE *next_flush_table; + + int sj_weedout_delete_rows(); + int sj_weedout_check_row(THD *thd); + bool create_sj_weedout_tmp_table(THD *thd); }; int setup_semijoin_dups_elimination(JOIN *join, ulonglong options, diff --git a/sql/sql_join_cache.cc b/sql/sql_join_cache.cc index f53bf738b86..af4b157ba90 100644 --- a/sql/sql_join_cache.cc +++ b/sql/sql_join_cache.cc @@ -2367,7 +2367,7 @@ enum_nested_loop_state JOIN_CACHE::generate_full_extensions(uchar *rec_ptr) int res= 0; if (!join_tab->check_weed_out_table || - !(res= do_sj_dups_weedout(join->thd, join_tab->check_weed_out_table))) + !(res= join_tab->check_weed_out_table->sj_weedout_check_row(join->thd))) { set_curr_rec_link(rec_ptr); rc= (join_tab->next_select)(join, join_tab+1, 0); diff --git a/sql/sql_select.cc b/sql/sql_select.cc index 11fadf56656..d8c17012196 100644 --- a/sql/sql_select.cc +++ b/sql/sql_select.cc @@ -15105,7 +15105,7 @@ sub_select(JOIN *join,JOIN_TAB *join_tab,bool end_of_records) flush_dups_table; flush_dups_table= flush_dups_table->next_flush_table) { - do_sj_reset(flush_dups_table); + flush_dups_table->sj_weedout_delete_rows(); } if (!join_tab->preread_init_done && join_tab->preread_init()) @@ -15321,7 +15321,7 @@ evaluate_join_record(JOIN *join, JOIN_TAB *join_tab, if (join_tab->check_weed_out_table && found) { - int res= do_sj_dups_weedout(join->thd, join_tab->check_weed_out_table); + int res= join_tab->check_weed_out_table->sj_weedout_check_row(join->thd); if (res == -1) DBUG_RETURN(NESTED_LOOP_ERROR); else if (res == 1) @@ -15445,7 +15445,7 @@ evaluate_null_complemented_join_record(JOIN *join, JOIN_TAB *join_tab) */ if (join_tab->check_weed_out_table) { - int res= do_sj_dups_weedout(join->thd, join_tab->check_weed_out_table); + int res= join_tab->check_weed_out_table->sj_weedout_check_row(join->thd); if (res == -1) return NESTED_LOOP_ERROR; else if (res == 1) -- cgit v1.2.1