diff options
author | Sergey Petrunya <psergey@askmonty.org> | 2011-11-26 12:27:52 +0400 |
---|---|---|
committer | Sergey Petrunya <psergey@askmonty.org> | 2011-11-26 12:27:52 +0400 |
commit | d84ea521c539e4f63511f1787df4b4dcb12886d1 (patch) | |
tree | 6fcae09f28bb19e94ccce2124640f884db2b53bc /sql | |
parent | fa366521cf80fd2ed968a0a4ac9d68ca4d7c5b20 (diff) | |
parent | 8325848b6ec4189b8339355689060a19d226a92f (diff) | |
download | mariadb-git-d84ea521c539e4f63511f1787df4b4dcb12886d1.tar.gz |
Merge
Diffstat (limited to 'sql')
-rw-r--r-- | sql/opt_subselect.cc | 998 | ||||
-rw-r--r-- | sql/opt_subselect.h | 20 | ||||
-rw-r--r-- | sql/sql_join_cache.cc | 2 | ||||
-rw-r--r-- | sql/sql_select.cc | 37 | ||||
-rw-r--r-- | sql/sql_select.h | 329 |
5 files changed, 840 insertions, 546 deletions
diff --git a/sql/opt_subselect.cc b/sql/opt_subselect.cc index c1b2eab5279..f065de31ac5 100644 --- a/sql/opt_subselect.cc +++ b/sql/opt_subselect.cc @@ -2166,227 +2166,172 @@ bool find_eq_ref_candidate(TABLE *table, table_map sj_inner_tables) join prefixes each strategy can handle. */ -void advance_sj_state(JOIN *join, table_map remaining_tables, - const JOIN_TAB *new_join_tab, uint idx, - double *current_record_count, double *current_read_time, +bool is_multiple_semi_joins(JOIN *join, POSITION *prefix, uint idx, table_map inner_tables) +{ + for (int i= (int)idx; i >= 0; i--) + { + TABLE_LIST *emb_sj_nest; + if ((emb_sj_nest= prefix[i].table->emb_sj_nest)) + { + if (inner_tables & emb_sj_nest->sj_inner_tables) + return !test(inner_tables == (emb_sj_nest->sj_inner_tables & + ~join->const_table_map)); + } + } + return FALSE; +} + + +void advance_sj_state(JOIN *join, table_map remaining_tables, uint idx, + double *current_record_count, double *current_read_time, POSITION *loose_scan_pos) { - TABLE_LIST *emb_sj_nest; POSITION *pos= join->positions + idx; - remaining_tables &= ~new_join_tab->table->map; - bool disable_jbuf= join->thd->variables.join_cache_level == 0; - - pos->prefix_cost.convert_from_cost(*current_read_time); - pos->prefix_record_count= *current_record_count; - pos->sj_strategy= SJ_OPT_NONE; - - pos->prefix_dups_producing_tables= join->cur_dups_producing_tables; + const JOIN_TAB *new_join_tab= pos->table; + Semi_join_strategy_picker *pickers[]= + { + &pos->firstmatch_picker, + &pos->loosescan_picker, + &pos->sjmat_picker, + &pos->dups_weedout_picker, + NULL, + }; - /* We're performing optimization inside SJ-Materialization nest */ if (join->emb_sjm_nest) { - pos->invalidate_firstmatch_prefix(); - pos->first_loosescan_table= MAX_TABLES; - pos->dupsweedout_tables= 0; - pos->sjm_scan_need_tables= 0; + /* + We're performing optimization inside SJ-Materialization nest: + - there are no other semi-joins inside semi-join nests + - attempts to build semi-join strategies here will confuse + the optimizer, so bail out. + */ + pos->sj_strategy= SJ_OPT_NONE; return; } - /* Initialize the state or copy it from prev. tables */ + /* + Update join->cur_sj_inner_tables (Used by FirstMatch in this function and + LooseScan detector in best_access_path) + */ + remaining_tables &= ~new_join_tab->table->map; + pos->prefix_dups_producing_tables= join->cur_dups_producing_tables; + TABLE_LIST *emb_sj_nest; + if ((emb_sj_nest= new_join_tab->emb_sj_nest)) + join->cur_dups_producing_tables |= emb_sj_nest->sj_inner_tables; + + Semi_join_strategy_picker **strategy; if (idx == join->const_tables) { - pos->invalidate_firstmatch_prefix(); - pos->first_loosescan_table= MAX_TABLES; - pos->dupsweedout_tables= 0; - pos->sjm_scan_need_tables= 0; - LINT_INIT(pos->sjm_scan_last_inner); + /* First table, initialize pickers */ + for (strategy= pickers; *strategy != NULL; strategy++) + (*strategy)->set_empty(); + pos->inner_tables_handled_with_other_sjs= 0; } else { - // FirstMatch - pos->first_firstmatch_table= - (pos[-1].sj_strategy == SJ_OPT_FIRST_MATCH) ? - MAX_TABLES : pos[-1].first_firstmatch_table; - pos->first_firstmatch_rtbl= pos[-1].first_firstmatch_rtbl; - pos->firstmatch_need_tables= pos[-1].firstmatch_need_tables; - - // LooseScan - pos->first_loosescan_table= - (pos[-1].sj_strategy == SJ_OPT_LOOSE_SCAN) ? - MAX_TABLES : pos[-1].first_loosescan_table; - pos->loosescan_need_tables= pos[-1].loosescan_need_tables; - - // SJ-Materialization Scan - pos->sjm_scan_need_tables= - (pos[-1].sj_strategy == SJ_OPT_MATERIALIZE_SCAN) ? - 0 : pos[-1].sjm_scan_need_tables; - pos->sjm_scan_last_inner= pos[-1].sjm_scan_last_inner; - - // Duplicate Weedout - pos->dupsweedout_tables= pos[-1].dupsweedout_tables; - pos->first_dupsweedout_table= pos[-1].first_dupsweedout_table; - } - - table_map handled_by_fm_or_ls= 0; - /* FirstMatch Strategy */ - if (new_join_tab->emb_sj_nest && - optimizer_flag(join->thd, OPTIMIZER_SWITCH_FIRSTMATCH) && - !join->outer_join) - { - const table_map outer_corr_tables= - new_join_tab->emb_sj_nest->nested_join->sj_corr_tables | - new_join_tab->emb_sj_nest->nested_join->sj_depends_on; - const table_map sj_inner_tables= - new_join_tab->emb_sj_nest->sj_inner_tables & ~join->const_table_map; - - /* - Enter condition: - 1. The next join tab belongs to semi-join nest - (verified for the encompassing code block above). - 2. We're not in a duplicate producer range yet - 3. All outer tables that - - the subquery is correlated with, or - - referred to from the outer_expr - are in the join prefix - 4. All inner tables are still part of remaining_tables. - */ - if (!join->cur_sj_inner_tables && // (2) - !(remaining_tables & outer_corr_tables) && // (3) - (sj_inner_tables == // (4) - ((remaining_tables | new_join_tab->table->map) & sj_inner_tables))) + for (strategy= pickers; *strategy != NULL; strategy++) { - /* Start tracking potential FirstMatch range */ - pos->first_firstmatch_table= idx; - pos->firstmatch_need_tables= sj_inner_tables; - pos->first_firstmatch_rtbl= remaining_tables; + (*strategy)->set_from_prev(pos - 1); } + pos->inner_tables_handled_with_other_sjs= + pos[-1].inner_tables_handled_with_other_sjs; + } + + pos->prefix_cost.convert_from_cost(*current_read_time); + pos->prefix_record_count= *current_record_count; + + { + pos->sj_strategy= SJ_OPT_NONE; - if (pos->in_firstmatch_prefix()) + for (strategy= pickers; *strategy != NULL; strategy++) { - if (outer_corr_tables & pos->first_firstmatch_rtbl) + table_map handled_fanout; + sj_strategy_enum sj_strategy; + double rec_count= *current_record_count; + double read_time= *current_read_time; + if ((*strategy)->check_qep(join, idx, remaining_tables, + new_join_tab, + &rec_count, + &read_time, + &handled_fanout, + &sj_strategy, + loose_scan_pos)) { /* - Trying to add an sj-inner table whose sj-nest has an outer correlated - table that was not in the prefix. This means FirstMatch can't be used. - */ - pos->invalidate_firstmatch_prefix(); - } - else - { - /* Record that we need all of this semi-join's inner tables, too */ - pos->firstmatch_need_tables|= sj_inner_tables; - } - - if (pos->in_firstmatch_prefix() && - !(pos->firstmatch_need_tables & remaining_tables)) - { - /* - Got a complete FirstMatch range. - Calculate correct costs and fanout - */ - optimize_wo_join_buffering(join, pos->first_firstmatch_table, idx, - remaining_tables, FALSE, idx, - current_record_count, - current_read_time); - /* - We don't yet know what are the other strategies, so pick the - FirstMatch. - - We ought to save the alternate POSITIONs produced by - optimize_wo_join_buffering but the problem is that providing save - space uses too much space. Instead, we will re-calculate the - alternate POSITIONs after we've picked the best QEP. + It's possible to use the strategy. Use it, if + - it removes semi-join fanout that was not removed before + - using it is cheaper than using something else, + and {if some other strategy has removed fanout + that this strategy is trying to remove, then it + did remove the fanout only for one semi-join} + This is to avoid a situation when + 1. strategy X removes fanout for semijoin X,Y + 2. using strategy Z is cheaper, but it only removes + fanout from semijoin X. + 3. We have no clue what to do about fanount of semi-join Y. */ - pos->sj_strategy= SJ_OPT_FIRST_MATCH; - handled_by_fm_or_ls= pos->firstmatch_need_tables; + if ((join->cur_dups_producing_tables & handled_fanout) || + (read_time < *current_read_time && + !(handled_fanout & pos->inner_tables_handled_with_other_sjs))) + { + /* Mark strategy as used */ + (*strategy)->mark_used(); + pos->sj_strategy= sj_strategy; + *current_read_time= read_time; + *current_record_count= rec_count; + join->cur_dups_producing_tables &= ~handled_fanout; + //TODO: update bitmap of semi-joins that were handled together with + // others. + if (is_multiple_semi_joins(join, join->positions, idx, handled_fanout)) + pos->inner_tables_handled_with_other_sjs |= handled_fanout; + } + else + { + /* We decided not to apply the strategy. */ + (*strategy)->set_empty(); + } } } } - /* LooseScan Strategy */ - { - POSITION *first=join->positions+pos->first_loosescan_table; - /* - LooseScan strategy can't handle interleaving between tables from the - semi-join that LooseScan is handling and any other tables. - - If we were considering LooseScan for the join prefix (1) - and the table we're adding creates an interleaving (2) - then - stop considering loose scan - */ - if ((pos->first_loosescan_table != MAX_TABLES) && // (1) - (first->table->emb_sj_nest->sj_inner_tables & remaining_tables) && //(2) - new_join_tab->emb_sj_nest != first->table->emb_sj_nest) //(2) - { - pos->first_loosescan_table= MAX_TABLES; - } - - /* - If we got an option to use LooseScan for the current table, start - considering using LooseScan strategy - */ - if (loose_scan_pos->read_time != DBL_MAX && !join->outer_join) - { - pos->first_loosescan_table= idx; - pos->loosescan_need_tables= - new_join_tab->emb_sj_nest->sj_inner_tables | - new_join_tab->emb_sj_nest->nested_join->sj_depends_on | - new_join_tab->emb_sj_nest->nested_join->sj_corr_tables; - } - - if ((pos->first_loosescan_table != MAX_TABLES) && - !(remaining_tables & pos->loosescan_need_tables)) - { - /* - Ok we have LooseScan plan and also have all LooseScan sj-nest's - inner tables and outer correlated tables into the prefix. - */ - - first=join->positions + pos->first_loosescan_table; - uint n_tables= my_count_bits(first->table->emb_sj_nest->sj_inner_tables); - /* Got a complete LooseScan range. Calculate its cost */ - /* - The same problem as with FirstMatch - we need to save POSITIONs - somewhere but reserving space for all cases would require too - much space. We will re-calculate POSITION structures later on. - */ - optimize_wo_join_buffering(join, pos->first_loosescan_table, idx, - remaining_tables, - TRUE, //first_alt - disable_jbuf ? join->table_count : - pos->first_loosescan_table + n_tables, - current_record_count, - current_read_time); - /* - We don't yet have any other strategies that could handle this - semi-join nest (the other options are Duplicate Elimination or - Materialization, which need at least the same set of tables in - the join prefix to be considered) so unconditionally pick the - LooseScan. - */ - pos->sj_strategy= SJ_OPT_LOOSE_SCAN; - handled_by_fm_or_ls= first->table->emb_sj_nest->sj_inner_tables; - } - } - - /* - Update join->cur_sj_inner_tables (Used by FirstMatch in this function and - LooseScan detector in best_access_path) - */ if ((emb_sj_nest= new_join_tab->emb_sj_nest)) { join->cur_sj_inner_tables |= emb_sj_nest->sj_inner_tables; - join->cur_dups_producing_tables |= emb_sj_nest->sj_inner_tables; /* Remove the sj_nest if all of its SJ-inner tables are in cur_table_map */ if (!(remaining_tables & emb_sj_nest->sj_inner_tables & ~new_join_tab->table->map)) join->cur_sj_inner_tables &= ~emb_sj_nest->sj_inner_tables; } - join->cur_dups_producing_tables &= ~handled_by_fm_or_ls; - /* 4. SJ-Materialization and SJ-Materialization-scan strategy handler */ + pos->prefix_cost.convert_from_cost(*current_read_time); + pos->prefix_record_count= *current_record_count; +} + + +void Sj_materialization_picker::set_from_prev(struct st_position *prev) +{ + if (prev->sjmat_picker.is_used) + set_empty(); + else + { + sjm_scan_need_tables= prev->sjmat_picker.sjm_scan_need_tables; + sjm_scan_last_inner= prev->sjmat_picker.sjm_scan_last_inner; + } + is_used= FALSE; +} + + +bool Sj_materialization_picker::check_qep(JOIN *join, + uint idx, + table_map remaining_tables, + const JOIN_TAB *new_join_tab, + double *record_count, + double *read_time, + table_map *handled_fanout, + sj_strategy_enum *strategy, + POSITION *loose_scan_pos) +{ bool sjm_scan; SJ_MATERIALIZATION_INFO *mat_info; if ((mat_info= at_sjmat_pos(join, remaining_tables, @@ -2413,11 +2358,11 @@ void advance_sj_state(JOIN *join, table_map remaining_tables, The simple way to model this is to remove SJM-SCAN(...) fanout once we reach the point #2. */ - pos->sjm_scan_need_tables= + sjm_scan_need_tables= new_join_tab->emb_sj_nest->sj_inner_tables | new_join_tab->emb_sj_nest->nested_join->sj_depends_on | new_join_tab->emb_sj_nest->nested_join->sj_corr_tables; - pos->sjm_scan_last_inner= idx; + sjm_scan_last_inner= idx; } else { @@ -2440,34 +2385,31 @@ void advance_sj_state(JOIN *join, table_map remaining_tables, mat_read_time += mat_info->materialization_cost.total_cost() + prefix_rec_count * mat_info->lookup_cost.total_cost(); - if (mat_read_time < *current_read_time || join->cur_dups_producing_tables) - { - /* - NOTE: When we pick to use SJM[-Scan] we don't memcpy its POSITION - elements to join->positions as that makes it hard to return things - back when making one step back in join optimization. That's done - after the QEP has been chosen. - */ - pos->sj_strategy= SJ_OPT_MATERIALIZE; - *current_read_time= mat_read_time; - *current_record_count= prefix_rec_count; - join->cur_dups_producing_tables&= - ~new_join_tab->emb_sj_nest->sj_inner_tables; - } + /* + NOTE: When we pick to use SJM[-Scan] we don't memcpy its POSITION + elements to join->positions as that makes it hard to return things + back when making one step back in join optimization. That's done + after the QEP has been chosen. + */ + *read_time= mat_read_time; + *record_count= prefix_rec_count; + *handled_fanout= new_join_tab->emb_sj_nest->sj_inner_tables; + *strategy= SJ_OPT_MATERIALIZE; + return TRUE; } } /* 4.A SJM-Scan second phase check */ - if (pos->sjm_scan_need_tables && /* Have SJM-Scan prefix */ - !(pos->sjm_scan_need_tables & remaining_tables)) + if (sjm_scan_need_tables && /* Have SJM-Scan prefix */ + !(sjm_scan_need_tables & remaining_tables)) { TABLE_LIST *mat_nest= - join->positions[pos->sjm_scan_last_inner].table->emb_sj_nest; + join->positions[sjm_scan_last_inner].table->emb_sj_nest; SJ_MATERIALIZATION_INFO *mat_info= mat_nest->sj_mat_info; double prefix_cost; double prefix_rec_count; - int first_tab= pos->sjm_scan_last_inner + 1 - mat_info->tables; + int first_tab= sjm_scan_last_inner + 1 - mat_info->tables; /* Get the prefix cost */ if (first_tab == (int)join->const_tables) { @@ -2492,6 +2434,7 @@ void advance_sj_state(JOIN *join, table_map remaining_tables, POSITION curpos, dummy; /* Need to re-run best-access-path as we prefix_rec_count has changed */ + bool disable_jbuf= (join->thd->variables.join_cache_level == 0); for (i= first_tab + mat_info->tables; i <= idx; i++) { best_access_path(join, join->positions[i].table, rem_tables, i, @@ -2500,142 +2443,333 @@ void advance_sj_state(JOIN *join, table_map remaining_tables, prefix_cost += curpos.read_time; } + *strategy= SJ_OPT_MATERIALIZE_SCAN; + *read_time= prefix_cost; + *record_count= prefix_rec_count; + *handled_fanout= mat_nest->sj_inner_tables; + return TRUE; + } + return FALSE; +} + + +void LooseScan_picker::set_from_prev(struct st_position *prev) +{ + if (prev->loosescan_picker.is_used) + set_empty(); + else + { + first_loosescan_table= prev->loosescan_picker.first_loosescan_table; + loosescan_need_tables= prev->loosescan_picker.loosescan_need_tables; + } + is_used= FALSE; +} + + +bool LooseScan_picker::check_qep(JOIN *join, + uint idx, + table_map remaining_tables, + const JOIN_TAB *new_join_tab, + double *record_count, + double *read_time, + table_map *handled_fanout, + sj_strategy_enum *strategy, + struct st_position *loose_scan_pos) +{ + POSITION *first= join->positions + first_loosescan_table; + /* + LooseScan strategy can't handle interleaving between tables from the + semi-join that LooseScan is handling and any other tables. + + If we were considering LooseScan for the join prefix (1) + and the table we're adding creates an interleaving (2) + then + stop considering loose scan + */ + if ((first_loosescan_table != MAX_TABLES) && // (1) + (first->table->emb_sj_nest->sj_inner_tables & remaining_tables) && //(2) + new_join_tab->emb_sj_nest != first->table->emb_sj_nest) //(2) + { + first_loosescan_table= MAX_TABLES; + } + + /* + If we got an option to use LooseScan for the current table, start + considering using LooseScan strategy + */ + if (loose_scan_pos->read_time != DBL_MAX && !join->outer_join) + { + first_loosescan_table= idx; + loosescan_need_tables= + new_join_tab->emb_sj_nest->sj_inner_tables | + new_join_tab->emb_sj_nest->nested_join->sj_depends_on | + new_join_tab->emb_sj_nest->nested_join->sj_corr_tables; + } + + if ((first_loosescan_table != MAX_TABLES) && + !(remaining_tables & loosescan_need_tables) && + (new_join_tab->table->map & loosescan_need_tables)) + { + /* + Ok we have LooseScan plan and also have all LooseScan sj-nest's + inner tables and outer correlated tables into the prefix. + */ + + first= join->positions + first_loosescan_table; + uint n_tables= my_count_bits(first->table->emb_sj_nest->sj_inner_tables); + /* Got a complete LooseScan range. Calculate its cost */ /* - Use the strategy if - * it is cheaper then what we've had, or - * we haven't picked any other semi-join strategy yet - In the second case, we pick this strategy unconditionally because - comparing cost without semi-join duplicate removal with cost with - duplicate removal is not an apples-to-apples comparison. + The same problem as with FirstMatch - we need to save POSITIONs + somewhere but reserving space for all cases would require too + much space. We will re-calculate POSITION structures later on. */ - if (prefix_cost < *current_read_time || join->cur_dups_producing_tables) - { - pos->sj_strategy= SJ_OPT_MATERIALIZE_SCAN; - *current_read_time= prefix_cost; - *current_record_count= prefix_rec_count; - join->cur_dups_producing_tables&= ~mat_nest->sj_inner_tables; + bool disable_jbuf= (join->thd->variables.join_cache_level == 0); + optimize_wo_join_buffering(join, first_loosescan_table, idx, + remaining_tables, + TRUE, //first_alt + disable_jbuf ? join->table_count : + first_loosescan_table + n_tables, + record_count, + read_time); + /* + We don't yet have any other strategies that could handle this + semi-join nest (the other options are Duplicate Elimination or + Materialization, which need at least the same set of tables in + the join prefix to be considered) so unconditionally pick the + LooseScan. + */ + *strategy= SJ_OPT_LOOSE_SCAN; + *handled_fanout= first->table->emb_sj_nest->sj_inner_tables; + return TRUE; + } + return FALSE; +} - } +void Firstmatch_picker::set_from_prev(struct st_position *prev) +{ + if (prev->firstmatch_picker.is_used) + invalidate_firstmatch_prefix(); + else + { + first_firstmatch_table= prev->firstmatch_picker.first_firstmatch_table; + first_firstmatch_rtbl= prev->firstmatch_picker.first_firstmatch_rtbl; + firstmatch_need_tables= prev->firstmatch_picker.firstmatch_need_tables; } + is_used= FALSE; +} - /* 5. Duplicate Weedout strategy handler */ +bool Firstmatch_picker::check_qep(JOIN *join, + uint idx, + table_map remaining_tables, + const JOIN_TAB *new_join_tab, + double *record_count, + double *read_time, + table_map *handled_fanout, + sj_strategy_enum *strategy, + POSITION *loose_scan_pos) +{ + if (new_join_tab->emb_sj_nest && + optimizer_flag(join->thd, OPTIMIZER_SWITCH_FIRSTMATCH) && + !join->outer_join) { + const table_map outer_corr_tables= + new_join_tab->emb_sj_nest->nested_join->sj_corr_tables | + new_join_tab->emb_sj_nest->nested_join->sj_depends_on; + const table_map sj_inner_tables= + new_join_tab->emb_sj_nest->sj_inner_tables & ~join->const_table_map; + /* - Duplicate weedout can be applied after all ON-correlated and - correlated + Enter condition: + 1. The next join tab belongs to semi-join nest + (verified for the encompassing code block above). + 2. We're not in a duplicate producer range yet + 3. All outer tables that + - the subquery is correlated with, or + - referred to from the outer_expr + are in the join prefix + 4. All inner tables are still part of remaining_tables. */ - TABLE_LIST *nest; - if ((nest= new_join_tab->emb_sj_nest)) - { - if (!pos->dupsweedout_tables) - pos->first_dupsweedout_table= idx; - - pos->dupsweedout_tables |= nest->sj_inner_tables | - nest->nested_join->sj_depends_on | - nest->nested_join->sj_corr_tables; - } - - if (pos->dupsweedout_tables) + if (!join->cur_sj_inner_tables && // (2) + !(remaining_tables & outer_corr_tables) && // (3) + (sj_inner_tables == // (4) + ((remaining_tables | new_join_tab->table->map) & sj_inner_tables))) { - /* we're in the process of constructing a DuplicateWeedout range */ - TABLE_LIST *emb= new_join_tab->table->pos_in_table_list->embedding; - /* and we've entered an inner side of an outer join*/ - if (emb && emb->on_expr) - pos->dupsweedout_tables |= emb->nested_join->used_tables; + /* Start tracking potential FirstMatch range */ + first_firstmatch_table= idx; + firstmatch_need_tables= sj_inner_tables; + first_firstmatch_rtbl= remaining_tables; } - if (pos->dupsweedout_tables && - !(remaining_tables & - ~new_join_tab->table->map & pos->dupsweedout_tables)) + if (in_firstmatch_prefix()) { - /* - Ok, reached a state where we could put a dups weedout point. - Walk back and calculate - - the join cost (this is needed as the accumulated cost may assume - some other duplicate elimination method) - - extra fanout that will be removed by duplicate elimination - - duplicate elimination cost - There are two cases: - 1. We have other strategy/ies to remove all of the duplicates. - 2. We don't. - - We need to calculate the cost in case #2 also because we need to make - choice between this join order and others. - */ - uint first_tab= pos->first_dupsweedout_table; - double dups_cost; - double prefix_rec_count; - double sj_inner_fanout= 1.0; - double sj_outer_fanout= 1.0; - uint temptable_rec_size; - if (first_tab == join->const_tables) + if (outer_corr_tables & first_firstmatch_rtbl) { - prefix_rec_count= 1.0; - temptable_rec_size= 0; - dups_cost= 0.0; + /* + Trying to add an sj-inner table whose sj-nest has an outer correlated + table that was not in the prefix. This means FirstMatch can't be used. + */ + invalidate_firstmatch_prefix(); } else { - dups_cost= join->positions[first_tab - 1].prefix_cost.total_cost(); - prefix_rec_count= join->positions[first_tab - 1].prefix_record_count; - temptable_rec_size= 8; /* This is not true but we'll make it so */ + /* Record that we need all of this semi-join's inner tables, too */ + firstmatch_need_tables|= sj_inner_tables; } - - table_map dups_removed_fanout= 0; - for (uint j= pos->first_dupsweedout_table; j <= idx; j++) + + if (in_firstmatch_prefix() && + !(firstmatch_need_tables & remaining_tables)) { - POSITION *p= join->positions + j; - dups_cost += p->read_time; - if (p->table->emb_sj_nest) - { - sj_inner_fanout *= p->records_read; - dups_removed_fanout |= p->table->table->map; - } - else - { - sj_outer_fanout *= p->records_read; - temptable_rec_size += p->table->table->file->ref_length; - } + /* + Got a complete FirstMatch range. + Calculate correct costs and fanout + */ + optimize_wo_join_buffering(join, first_firstmatch_table, idx, + remaining_tables, FALSE, idx, + record_count, + read_time); + /* + We ought to save the alternate POSITIONs produced by + optimize_wo_join_buffering but the problem is that providing save + space uses too much space. Instead, we will re-calculate the + alternate POSITIONs after we've picked the best QEP. + */ + *handled_fanout= firstmatch_need_tables; + /* *record_count and *read_time were set by the above call */ + *strategy= SJ_OPT_FIRST_MATCH; + return TRUE; } + } + } + return FALSE; +} - /* - Add the cost of temptable use. The table will have sj_outer_fanout - records, and we will make - - sj_outer_fanout table writes - - sj_inner_fanout*sj_outer_fanout lookups. - */ - double one_lookup_cost= get_tmp_table_lookup_cost(join->thd, - sj_outer_fanout, - temptable_rec_size); - double one_write_cost= get_tmp_table_write_cost(join->thd, - sj_outer_fanout, - temptable_rec_size); +void Duplicate_weedout_picker::set_from_prev(POSITION *prev) +{ + if (prev->dups_weedout_picker.is_used) + set_empty(); + else + { + dupsweedout_tables= prev->dups_weedout_picker.dupsweedout_tables; + first_dupsweedout_table= prev->dups_weedout_picker.first_dupsweedout_table; + } + is_used= FALSE; +} - double write_cost= join->positions[first_tab].prefix_record_count* - sj_outer_fanout * one_write_cost; - double full_lookup_cost= join->positions[first_tab].prefix_record_count* - sj_outer_fanout* sj_inner_fanout * - one_lookup_cost; - dups_cost += write_cost + full_lookup_cost; + +bool Duplicate_weedout_picker::check_qep(JOIN *join, + uint idx, + table_map remaining_tables, + const JOIN_TAB *new_join_tab, + double *record_count, + double *read_time, + table_map *handled_fanout, + sj_strategy_enum *strategy, + POSITION *loose_scan_pos + ) +{ + TABLE_LIST *nest; + if ((nest= new_join_tab->emb_sj_nest)) + { + if (!dupsweedout_tables) + first_dupsweedout_table= idx; + + dupsweedout_tables |= nest->sj_inner_tables | + nest->nested_join->sj_depends_on | + nest->nested_join->sj_corr_tables; + } + + if (dupsweedout_tables) + { + /* we're in the process of constructing a DuplicateWeedout range */ + TABLE_LIST *emb= new_join_tab->table->pos_in_table_list->embedding; + /* and we've entered an inner side of an outer join*/ + if (emb && emb->on_expr) + dupsweedout_tables |= emb->nested_join->used_tables; + } + + /* If this is the last table that we need for DuplicateWeedout range */ + if (dupsweedout_tables && !(remaining_tables & ~new_join_tab->table->map & + dupsweedout_tables)) + { + /* + Ok, reached a state where we could put a dups weedout point. + Walk back and calculate + - the join cost (this is needed as the accumulated cost may assume + some other duplicate elimination method) + - extra fanout that will be removed by duplicate elimination + - duplicate elimination cost + There are two cases: + 1. We have other strategy/ies to remove all of the duplicates. + 2. We don't. - /* - Use the strategy if - * it is cheaper then what we've had, or - * we haven't picked any other semi-join strategy yet - The second part is necessary because this strategy is the last one - to consider (it needs "the most" tables in the prefix) and we can't - leave duplicate-producing tables not handled by any strategy. - */ - if (dups_cost < *current_read_time || join->cur_dups_producing_tables) + We need to calculate the cost in case #2 also because we need to make + choice between this join order and others. + */ + uint first_tab= first_dupsweedout_table; + double dups_cost; + double prefix_rec_count; + double sj_inner_fanout= 1.0; + double sj_outer_fanout= 1.0; + uint temptable_rec_size; + if (first_tab == join->const_tables) + { + prefix_rec_count= 1.0; + temptable_rec_size= 0; + dups_cost= 0.0; + } + else + { + dups_cost= join->positions[first_tab - 1].prefix_cost.total_cost(); + prefix_rec_count= join->positions[first_tab - 1].prefix_record_count; + temptable_rec_size= 8; /* This is not true but we'll make it so */ + } + + table_map dups_removed_fanout= 0; + for (uint j= first_dupsweedout_table; j <= idx; j++) + { + POSITION *p= join->positions + j; + dups_cost += p->read_time; + if (p->table->emb_sj_nest) + { + sj_inner_fanout *= p->records_read; + dups_removed_fanout |= p->table->table->map; + } + else { - pos->sj_strategy= SJ_OPT_DUPS_WEEDOUT; - *current_read_time= dups_cost; - *current_record_count= prefix_rec_count * sj_outer_fanout; - join->cur_dups_producing_tables &= ~dups_removed_fanout; + sj_outer_fanout *= p->records_read; + temptable_rec_size += p->table->table->file->ref_length; } } + + /* + Add the cost of temptable use. The table will have sj_outer_fanout + records, and we will make + - sj_outer_fanout table writes + - sj_inner_fanout*sj_outer_fanout lookups. + + */ + double one_lookup_cost= get_tmp_table_lookup_cost(join->thd, + sj_outer_fanout, + temptable_rec_size); + double one_write_cost= get_tmp_table_write_cost(join->thd, + sj_outer_fanout, + temptable_rec_size); + + double write_cost= join->positions[first_tab].prefix_record_count* + sj_outer_fanout * one_write_cost; + double full_lookup_cost= join->positions[first_tab].prefix_record_count* + sj_outer_fanout* sj_inner_fanout * + one_lookup_cost; + dups_cost += write_cost + full_lookup_cost; + + *read_time= dups_cost; + *record_count= prefix_rec_count * sj_outer_fanout; + *handled_fanout= dups_removed_fanout; + *strategy= SJ_OPT_DUPS_WEEDOUT; + return TRUE; } + return FALSE; } @@ -2832,11 +2966,11 @@ void fix_semijoin_strategies_for_picked_join_order(JOIN *join) } else if (pos->sj_strategy == SJ_OPT_MATERIALIZE_SCAN) { - POSITION *first_inner= join->best_positions + pos->sjm_scan_last_inner; + POSITION *first_inner= join->best_positions + pos->sjmat_picker.sjm_scan_last_inner; SJ_MATERIALIZATION_INFO *sjm= first_inner->table->emb_sj_nest->sj_mat_info; sjm->is_used= TRUE; sjm->is_sj_scan= TRUE; - first= pos->sjm_scan_last_inner - sjm->tables + 1; + first= pos->sjmat_picker.sjm_scan_last_inner - sjm->tables + 1; memcpy(join->best_positions + first, sjm->positions, sizeof(POSITION) * sjm->tables); join->best_positions[first].sj_strategy= SJ_OPT_MATERIALIZE_SCAN; @@ -2874,7 +3008,7 @@ void fix_semijoin_strategies_for_picked_join_order(JOIN *join) if (pos->sj_strategy == SJ_OPT_FIRST_MATCH) { - first= pos->first_firstmatch_table; + first= pos->firstmatch_picker.first_firstmatch_table; join->best_positions[first].sj_strategy= SJ_OPT_FIRST_MATCH; join->best_positions[first].n_sj_tables= tablenr - first + 1; POSITION dummy; // For loose scan paths @@ -2907,7 +3041,7 @@ void fix_semijoin_strategies_for_picked_join_order(JOIN *join) if (pos->sj_strategy == SJ_OPT_LOOSE_SCAN) { - first= pos->first_loosescan_table; + first= pos->loosescan_picker.first_loosescan_table; POSITION *first_pos= join->best_positions + first; POSITION loose_scan_pos; // For loose scan paths double record_count= (first== join->const_tables)? 1.0: @@ -2946,7 +3080,7 @@ void fix_semijoin_strategies_for_picked_join_order(JOIN *join) Duplicate Weedout starting at pos->first_dupsweedout_table, ending at this table. */ - first= pos->first_dupsweedout_table; + first= pos->dups_weedout_picker.first_dupsweedout_table; join->best_positions[first].sj_strategy= SJ_OPT_DUPS_WEEDOUT; join->best_positions[first].n_sj_tables= tablenr - first + 1; } @@ -3337,12 +3471,8 @@ static bool is_cond_sj_in_equality(Item *item) SYNOPSIS - create_duplicate_weedout_tmp_table() + create_sj_weedout_tmp_table() thd Thread handle - uniq_tuple_length_arg Length of the table's column - sjtbl Update sjtbl->[start_]recinfo values which - will be needed if we'll need to convert the - created temptable from HEAP to MyISAM/Maria. DESCRIPTION Create a temporary table to weed out duplicate rowid combinations. The @@ -3367,9 +3497,8 @@ static bool is_cond_sj_in_equality(Item *item) NULL on error */ -TABLE *create_duplicate_weedout_tmp_table(THD *thd, - uint uniq_tuple_length_arg, - SJ_TMP_TABLE *sjtbl) +bool +SJ_TMP_TABLE::create_sj_weedout_tmp_table(THD *thd) { MEM_ROOT *mem_root_save, own_root; TABLE *table; @@ -3382,15 +3511,17 @@ TABLE *create_duplicate_weedout_tmp_table(THD *thd, uchar *group_buff; uchar *bitmaps; uint *blob_field; - ENGINE_COLUMNDEF *recinfo, *start_recinfo; bool using_unique_constraint=FALSE; bool use_packed_rows= FALSE; Field *field, *key_field; uint null_pack_length, null_count; uchar *null_flags; uchar *pos; - DBUG_ENTER("create_duplicate_weedout_tmp_table"); - DBUG_ASSERT(!sjtbl->is_degenerate); + DBUG_ENTER("create_sj_weedout_tmp_table"); + DBUG_ASSERT(!is_degenerate); + + tmp_table= NULL; + uint uniq_tuple_length_arg= rowid_len + null_bytes; /* STEP 1: Get temporary table name */ @@ -3432,7 +3563,7 @@ TABLE *create_duplicate_weedout_tmp_table(THD *thd, { if (temp_pool_slot != MY_BIT_NONE) bitmap_lock_clear_bit(&temp_pool, temp_pool_slot); - DBUG_RETURN(NULL); + DBUG_RETURN(TRUE); } strmov(tmpname,path); @@ -3636,20 +3767,19 @@ TABLE *create_duplicate_weedout_tmp_table(THD *thd, if (create_internal_tmp_table(table, keyinfo, start_recinfo, &recinfo, 0)) goto err; } - sjtbl->start_recinfo= start_recinfo; - sjtbl->recinfo= recinfo; if (open_tmp_table(table)) goto err; thd->mem_root= mem_root_save; - DBUG_RETURN(table); + tmp_table= table; + DBUG_RETURN(FALSE); err: thd->mem_root= mem_root_save; free_tmp_table(thd,table); /* purecov: inspected */ if (temp_pool_slot != MY_BIT_NONE) bitmap_lock_clear_bit(&temp_pool, temp_pool_slot); - DBUG_RETURN(NULL); /* purecov: inspected */ + DBUG_RETURN(TRUE); /* purecov: inspected */ } @@ -3657,25 +3787,25 @@ err: SemiJoinDuplicateElimination: Reset the temporary table */ -int do_sj_reset(SJ_TMP_TABLE *sj_tbl) +int SJ_TMP_TABLE::sj_weedout_delete_rows() { - DBUG_ENTER("do_sj_reset"); - if (sj_tbl->tmp_table) + DBUG_ENTER("SJ_TMP_TABLE::sj_weedout_delete_rows"); + if (tmp_table) { - int rc= sj_tbl->tmp_table->file->ha_delete_all_rows(); + int rc= tmp_table->file->ha_delete_all_rows(); DBUG_RETURN(rc); } - sj_tbl->have_degenerate_row= FALSE; + have_degenerate_row= FALSE; DBUG_RETURN(0); } + /* SemiJoinDuplicateElimination: Weed out duplicate row combinations SYNPOSIS - do_sj_dups_weedout() + sj_weedout_check_row() thd Thread handle - sjtbl Duplicate weedout table DESCRIPTION Try storing current record combination of outer tables (i.e. their @@ -3688,47 +3818,47 @@ int do_sj_reset(SJ_TMP_TABLE *sj_tbl) 0 The row combination is not a duplicate (continue) */ -int do_sj_dups_weedout(THD *thd, SJ_TMP_TABLE *sjtbl) +int SJ_TMP_TABLE::sj_weedout_check_row(THD *thd) { int error; - SJ_TMP_TABLE::TAB *tab= sjtbl->tabs; - SJ_TMP_TABLE::TAB *tab_end= sjtbl->tabs_end; + SJ_TMP_TABLE::TAB *tab= tabs; + SJ_TMP_TABLE::TAB *tab_end= tabs_end; uchar *ptr; uchar *nulls_ptr; - DBUG_ENTER("do_sj_dups_weedout"); + DBUG_ENTER("SJ_TMP_TABLE::sj_weedout_check_row"); - if (sjtbl->is_degenerate) + if (is_degenerate) { - if (sjtbl->have_degenerate_row) + if (have_degenerate_row) DBUG_RETURN(1); - sjtbl->have_degenerate_row= TRUE; + have_degenerate_row= TRUE; DBUG_RETURN(0); } - ptr= sjtbl->tmp_table->record[0] + 1; + ptr= tmp_table->record[0] + 1; /* Put the the rowids tuple into table->record[0]: */ // 1. Store the length - if (((Field_varstring*)(sjtbl->tmp_table->field[0]))->length_bytes == 1) + if (((Field_varstring*)(tmp_table->field[0]))->length_bytes == 1) { - *ptr= (uchar)(sjtbl->rowid_len + sjtbl->null_bytes); + *ptr= (uchar)(rowid_len + null_bytes); ptr++; } else { - int2store(ptr, sjtbl->rowid_len + sjtbl->null_bytes); + int2store(ptr, rowid_len + null_bytes); ptr += 2; } nulls_ptr= ptr; // 2. Zero the null bytes - if (sjtbl->null_bytes) + if (null_bytes) { - bzero(ptr, sjtbl->null_bytes); - ptr += sjtbl->null_bytes; + bzero(ptr, null_bytes); + ptr += null_bytes; } // 3. Put the rowids @@ -3748,21 +3878,91 @@ int do_sj_dups_weedout(THD *thd, SJ_TMP_TABLE *sjtbl) } } - error= sjtbl->tmp_table->file->ha_write_tmp_row(sjtbl->tmp_table->record[0]); + error= tmp_table->file->ha_write_tmp_row(tmp_table->record[0]); if (error) { /* create_internal_tmp_table_from_heap will generate error if needed */ - if (!sjtbl->tmp_table->file->is_fatal_error(error, HA_CHECK_DUP)) + if (!tmp_table->file->is_fatal_error(error, HA_CHECK_DUP)) DBUG_RETURN(1); /* Duplicate */ - if (create_internal_tmp_table_from_heap(thd, sjtbl->tmp_table, - sjtbl->start_recinfo, - &sjtbl->recinfo, error, 1)) + if (create_internal_tmp_table_from_heap(thd, tmp_table, start_recinfo, + &recinfo, error, 1)) DBUG_RETURN(-1); } DBUG_RETURN(0); } +int init_dups_weedout(JOIN *join, uint first_table, int first_fanout_table, uint n_tables) +{ + THD *thd= join->thd; + DBUG_ENTER("init_dups_weedout"); + SJ_TMP_TABLE::TAB sjtabs[MAX_TABLES]; + SJ_TMP_TABLE::TAB *last_tab= sjtabs; + uint jt_rowid_offset= 0; // # tuple bytes are already occupied (w/o NULL bytes) + uint jt_null_bits= 0; // # null bits in tuple bytes + /* + Walk through the range and remember + - tables that need their rowids to be put into temptable + - the last outer table + */ + for (JOIN_TAB *j=join->join_tab + first_table; + j < join->join_tab + first_table + n_tables; j++) + { + if (sj_table_is_included(join, j)) + { + last_tab->join_tab= j; + last_tab->rowid_offset= jt_rowid_offset; + jt_rowid_offset += j->table->file->ref_length; + if (j->table->maybe_null) + { + last_tab->null_byte= jt_null_bits / 8; + last_tab->null_bit= jt_null_bits++; + } + last_tab++; + j->table->prepare_for_position(); + j->keep_current_rowid= TRUE; + } + } + + SJ_TMP_TABLE *sjtbl; + if (jt_rowid_offset) /* Temptable has at least one rowid */ + { + size_t tabs_size= (last_tab - sjtabs) * sizeof(SJ_TMP_TABLE::TAB); + if (!(sjtbl= (SJ_TMP_TABLE*)thd->alloc(sizeof(SJ_TMP_TABLE))) || + !(sjtbl->tabs= (SJ_TMP_TABLE::TAB*) thd->alloc(tabs_size))) + DBUG_RETURN(TRUE); /* purecov: inspected */ + memcpy(sjtbl->tabs, sjtabs, tabs_size); + sjtbl->is_degenerate= FALSE; + sjtbl->tabs_end= sjtbl->tabs + (last_tab - sjtabs); + sjtbl->rowid_len= jt_rowid_offset; + sjtbl->null_bits= jt_null_bits; + sjtbl->null_bytes= (jt_null_bits + 7)/8; + if (sjtbl->create_sj_weedout_tmp_table(thd)) + DBUG_RETURN(TRUE); + join->sj_tmp_tables.push_back(sjtbl->tmp_table); + } + else + { + /* + This is a special case where the entire subquery predicate does + not depend on anything at all, ie this is + WHERE const IN (uncorrelated select) + */ + if (!(sjtbl= (SJ_TMP_TABLE*)thd->alloc(sizeof(SJ_TMP_TABLE)))) + DBUG_RETURN(TRUE); /* purecov: inspected */ + sjtbl->tmp_table= NULL; + sjtbl->is_degenerate= TRUE; + sjtbl->have_degenerate_row= FALSE; + } + + sjtbl->next_flush_table= join->join_tab[first_table].flush_weedout_table; + join->join_tab[first_table].flush_weedout_table= sjtbl; + join->join_tab[first_fanout_table].first_weedout_table= sjtbl; + join->join_tab[first_table + n_tables - 1].check_weed_out_table= sjtbl; + DBUG_RETURN(0); +} + + /* Setup the strategies to eliminate semi-join duplicates. @@ -3863,9 +4063,9 @@ int setup_semijoin_dups_elimination(JOIN *join, ulonglong options, uint no_jbuf_after) { uint i; - THD *thd= join->thd; DBUG_ENTER("setup_semijoin_dups_elimination"); - + + POSITION *pos= join->best_positions + join->const_tables; for (i= join->const_tables ; i < join->top_join_tab_count; ) { @@ -3888,8 +4088,8 @@ int setup_semijoin_dups_elimination(JOIN *join, ulonglong options, /* Calculate key length */ keylen= 0; - keyno= pos->loosescan_key; - for (uint kp=0; kp < pos->loosescan_parts; kp++) + keyno= pos->loosescan_picker.loosescan_key; + for (uint kp=0; kp < pos->loosescan_picker.loosescan_parts; kp++) keylen += tab->table->key_info[keyno].key_part[kp].store_length; tab->loosescan_key_len= keylen; @@ -3906,6 +4106,7 @@ int setup_semijoin_dups_elimination(JOIN *join, ulonglong options, forwards, but do not destroy other duplicate elimination methods. */ uint first_table= i; + uint join_cache_level= join->thd->variables.join_cache_level; for (uint j= i; j < i + pos->n_sj_tables; j++) { @@ -3929,70 +4130,7 @@ int setup_semijoin_dups_elimination(JOIN *join, ulonglong options, } } - SJ_TMP_TABLE::TAB sjtabs[MAX_TABLES]; - SJ_TMP_TABLE::TAB *last_tab= sjtabs; - uint jt_rowid_offset= 0; // # tuple bytes are already occupied (w/o NULL bytes) - uint jt_null_bits= 0; // # null bits in tuple bytes - /* - Walk through the range and remember - - tables that need their rowids to be put into temptable - - the last outer table - */ - for (JOIN_TAB *j=join->join_tab + first_table; - j < join->join_tab + i + pos->n_sj_tables; j++) - { - if (sj_table_is_included(join, j)) - { - last_tab->join_tab= j; - last_tab->rowid_offset= jt_rowid_offset; - jt_rowid_offset += j->table->file->ref_length; - if (j->table->maybe_null) - { - last_tab->null_byte= jt_null_bits / 8; - last_tab->null_bit= jt_null_bits++; - } - last_tab++; - j->table->prepare_for_position(); - j->keep_current_rowid= TRUE; - } - } - - SJ_TMP_TABLE *sjtbl; - if (jt_rowid_offset) /* Temptable has at least one rowid */ - { - size_t tabs_size= (last_tab - sjtabs) * sizeof(SJ_TMP_TABLE::TAB); - if (!(sjtbl= (SJ_TMP_TABLE*)thd->alloc(sizeof(SJ_TMP_TABLE))) || - !(sjtbl->tabs= (SJ_TMP_TABLE::TAB*) thd->alloc(tabs_size))) - DBUG_RETURN(TRUE); /* purecov: inspected */ - memcpy(sjtbl->tabs, sjtabs, tabs_size); - sjtbl->is_degenerate= FALSE; - sjtbl->tabs_end= sjtbl->tabs + (last_tab - sjtabs); - sjtbl->rowid_len= jt_rowid_offset; - sjtbl->null_bits= jt_null_bits; - sjtbl->null_bytes= (jt_null_bits + 7)/8; - sjtbl->tmp_table= - create_duplicate_weedout_tmp_table(thd, - sjtbl->rowid_len + - sjtbl->null_bytes, - sjtbl); - join->sj_tmp_tables.push_back(sjtbl->tmp_table); - } - else - { - /* - This is a special case where the entire subquery predicate does - not depend on anything at all, ie this is - WHERE const IN (uncorrelated select) - */ - if (!(sjtbl= (SJ_TMP_TABLE*)thd->alloc(sizeof(SJ_TMP_TABLE)))) - DBUG_RETURN(TRUE); /* purecov: inspected */ - sjtbl->tmp_table= NULL; - sjtbl->is_degenerate= TRUE; - sjtbl->have_degenerate_row= FALSE; - } - join->join_tab[first_table].flush_weedout_table= sjtbl; - join->join_tab[i + pos->n_sj_tables - 1].check_weed_out_table= sjtbl; - + init_dups_weedout(join, first_table, i, i + pos->n_sj_tables - first_table); i+= pos->n_sj_tables; pos+= pos->n_sj_tables; break; diff --git a/sql/opt_subselect.h b/sql/opt_subselect.h index 823b09a1f73..94c6153f0c1 100644 --- a/sql/opt_subselect.h +++ b/sql/opt_subselect.h @@ -40,7 +40,6 @@ ulonglong get_bound_sj_equalities(TABLE_LIST *sj_nest, class Loose_scan_opt { -public: /* All methods must check this before doing anything else */ bool try_loosescan; @@ -71,6 +70,7 @@ public: uint best_max_loose_keypart; +public: Loose_scan_opt(): try_loosescan(FALSE), bound_sj_equalities(0), @@ -263,8 +263,8 @@ public: { pos->records_read= best_loose_scan_records; pos->key= best_loose_scan_start_key; - pos->loosescan_key= best_loose_scan_key; - pos->loosescan_parts= best_max_loose_keypart + 1; + pos->loosescan_picker.loosescan_key= best_loose_scan_key; + pos->loosescan_picker.loosescan_parts= best_max_loose_keypart + 1; pos->use_join_buffer= FALSE; pos->table= tab; // todo need ref_depend_map ? @@ -277,8 +277,7 @@ public: }; -void advance_sj_state(JOIN *join, const table_map remaining_tables, - const JOIN_TAB *new_join_tab, uint idx, +void advance_sj_state(JOIN *join, const table_map remaining_tables, uint idx, double *current_record_count, double *current_read_time, POSITION *loose_scan_pos); void restore_prev_sj_state(const table_map remaining_tables, @@ -289,10 +288,6 @@ void fix_semijoin_strategies_for_picked_join_order(JOIN *join); bool setup_sj_materialization_part1(JOIN_TAB *sjm_tab); bool setup_sj_materialization_part2(JOIN_TAB *sjm_tab); -TABLE *create_duplicate_weedout_tmp_table(THD *thd, uint uniq_tuple_length_arg, - SJ_TMP_TABLE *sjtbl); -int do_sj_reset(SJ_TMP_TABLE *sj_tbl); -int do_sj_dups_weedout(THD *thd, SJ_TMP_TABLE *sjtbl); /* Temporary table used by semi-join DuplicateElimination strategy @@ -359,8 +354,11 @@ public: ENGINE_COLUMNDEF *start_recinfo; ENGINE_COLUMNDEF *recinfo; - /* Pointer to next table (next->start_idx > this->end_idx) */ - SJ_TMP_TABLE *next; + SJ_TMP_TABLE *next_flush_table; + + int sj_weedout_delete_rows(); + int sj_weedout_check_row(THD *thd); + bool create_sj_weedout_tmp_table(THD *thd); }; int setup_semijoin_dups_elimination(JOIN *join, ulonglong options, diff --git a/sql/sql_join_cache.cc b/sql/sql_join_cache.cc index f53bf738b86..af4b157ba90 100644 --- a/sql/sql_join_cache.cc +++ b/sql/sql_join_cache.cc @@ -2367,7 +2367,7 @@ enum_nested_loop_state JOIN_CACHE::generate_full_extensions(uchar *rec_ptr) int res= 0; if (!join_tab->check_weed_out_table || - !(res= do_sj_dups_weedout(join->thd, join_tab->check_weed_out_table))) + !(res= join_tab->check_weed_out_table->sj_weedout_check_row(join->thd))) { set_curr_rec_link(rec_ptr); rc= (join_tab->next_select)(join, join_tab+1, 0); diff --git a/sql/sql_select.cc b/sql/sql_select.cc index 6b2c89de00e..d8c17012196 100644 --- a/sql/sql_select.cc +++ b/sql/sql_select.cc @@ -85,7 +85,7 @@ static int join_tab_cmp_embedded_first(const void *emb, const void* ptr1, const static bool find_best(JOIN *join,table_map rest_tables,uint index, double record_count,double read_time); static uint cache_record_length(JOIN *join,uint index); -static bool get_best_combination(JOIN *join); +bool get_best_combination(JOIN *join); static store_key *get_store_key(THD *thd, KEYUSE *keyuse, table_map used_tables, KEY_PART_INFO *key_part, uchar *key_buff, @@ -4887,7 +4887,7 @@ void set_position(JOIN *join,uint idx,JOIN_TAB *table,KEYUSE *key) join->positions[idx].records_read=1.0; /* This is a const table */ join->positions[idx].ref_depend_map= 0; - join->positions[idx].loosescan_key= MAX_KEY; /* Not a LooseScan */ +// join->positions[idx].loosescan_key= MAX_KEY; /* Not a LooseScan */ join->positions[idx].sj_strategy= SJ_OPT_NONE; join->positions[idx].use_join_buffer= FALSE; @@ -5537,7 +5537,7 @@ best_access_path(JOIN *join, pos->key= best_key; pos->table= s; pos->ref_depend_map= best_ref_depends_map; - pos->loosescan_key= MAX_KEY; + pos->loosescan_picker.loosescan_key= MAX_KEY; pos->use_join_buffer= best_uses_jbuf; loose_scan_opt.save_to_position(s, loose_scan_pos); @@ -5844,7 +5844,7 @@ optimize_straight_join(JOIN *join, table_map join_tables) /* compute the cost of the new plan extended with 's' */ record_count*= join->positions[idx].records_read; read_time+= join->positions[idx].read_time; - advance_sj_state(join, join_tables, s, idx, &record_count, &read_time, + advance_sj_state(join, join_tables, idx, &record_count, &read_time, &loose_scan_pos); join_tables&= ~(s->table->map); @@ -6360,7 +6360,7 @@ best_extension_by_limited_search(JOIN *join, current_record_count= record_count * position->records_read; current_read_time= read_time + position->read_time; - advance_sj_state(join, remaining_tables, s, idx, ¤t_record_count, + advance_sj_state(join, remaining_tables, idx, ¤t_record_count, ¤t_read_time, &loose_scan_pos); /* Expand only partial plans with lower cost than the best QEP so far */ @@ -6517,7 +6517,7 @@ find_best(JOIN *join,table_map rest_tables,uint idx,double record_count, */ double current_record_count=record_count*records; double current_read_time=read_time+best; - advance_sj_state(join, rest_tables, s, idx, ¤t_record_count, + advance_sj_state(join, rest_tables, idx, ¤t_record_count, ¤t_read_time, &loose_scan_pos); if (best_record_count > current_record_count || @@ -7017,7 +7017,7 @@ static Item * const null_ptr= NULL; TRUE Out of memory */ -static bool +bool get_best_combination(JOIN *join) { uint tablenr; @@ -7095,13 +7095,6 @@ get_best_combination(JOIN *join) *j= *join->best_positions[tablenr].table; -#if 0 -/* SJ-Materialization is represented with join tab ranges */ - if (j->sj_strategy == SJ_OPT_MATERIALIZE || - j->sj_strategy == SJ_OPT_MATERIALIZE) - j->sj_strategy= SJ_OPT_NONE; -#endif - j->bush_root_tab= sjm_nest_root; form=join->table[tablenr]=j->table; @@ -7124,7 +7117,7 @@ get_best_combination(JOIN *join) (join->best_positions[tablenr].sj_strategy == SJ_OPT_LOOSE_SCAN)) { j->type=JT_ALL; - j->index= join->best_positions[tablenr].loosescan_key; + j->index= join->best_positions[tablenr].loosescan_picker.loosescan_key; if (tablenr != join->const_tables) join->full_join=1; } @@ -15107,10 +15100,12 @@ sub_select(JOIN *join,JOIN_TAB *join_tab,bool end_of_records) int error; enum_nested_loop_state rc= NESTED_LOOP_OK; READ_RECORD *info= &join_tab->read_record; - - if (join_tab->flush_weedout_table) + + for (SJ_TMP_TABLE *flush_dups_table= join_tab->flush_weedout_table; + flush_dups_table; + flush_dups_table= flush_dups_table->next_flush_table) { - do_sj_reset(join_tab->flush_weedout_table); + flush_dups_table->sj_weedout_delete_rows(); } if (!join_tab->preread_init_done && join_tab->preread_init()) @@ -15326,7 +15321,7 @@ evaluate_join_record(JOIN *join, JOIN_TAB *join_tab, if (join_tab->check_weed_out_table && found) { - int res= do_sj_dups_weedout(join->thd, join_tab->check_weed_out_table); + int res= join_tab->check_weed_out_table->sj_weedout_check_row(join->thd); if (res == -1) DBUG_RETURN(NESTED_LOOP_ERROR); else if (res == 1) @@ -15450,7 +15445,7 @@ evaluate_null_complemented_join_record(JOIN *join, JOIN_TAB *join_tab) */ if (join_tab->check_weed_out_table) { - int res= do_sj_dups_weedout(join->thd, join_tab->check_weed_out_table); + int res= join_tab->check_weed_out_table->sj_weedout_check_row(join->thd); if (res == -1) return NESTED_LOOP_ERROR; else if (res == 1) @@ -21033,7 +21028,7 @@ static void select_describe(JOIN *join, bool need_tmp_table, bool need_order, extra.append(STRING_WITH_LEN("; LooseScan")); } - if (tab->flush_weedout_table) + if (tab->first_weedout_table) extra.append(STRING_WITH_LEN("; Start temporary")); if (tab->check_weed_out_table) extra.append(STRING_WITH_LEN("; End temporary")); diff --git a/sql/sql_select.h b/sql/sql_select.h index 1d18b80d72e..4333b825c28 100644 --- a/sql/sql_select.h +++ b/sql/sql_select.h @@ -158,6 +158,17 @@ enum enum_nested_loop_state }; +/* Possible sj_strategy values */ +enum sj_strategy_enum +{ + SJ_OPT_NONE=0, + SJ_OPT_DUPS_WEEDOUT=1, + SJ_OPT_LOOSE_SCAN =2, + SJ_OPT_FIRST_MATCH =3, + SJ_OPT_MATERIALIZE =4, + SJ_OPT_MATERIALIZE_SCAN=5 +}; + /* Values for JOIN_TAB::packed_info */ #define TAB_INFO_HAVE_VALUE 1 #define TAB_INFO_USING_INDEX 2 @@ -330,6 +341,8 @@ typedef struct st_join_table { /* Variables for semi-join duplicate elimination */ SJ_TMP_TABLE *flush_weedout_table; SJ_TMP_TABLE *check_weed_out_table; + /* for EXPLAIN only: */ + SJ_TMP_TABLE *first_weedout_table; /* If set, means we should stop join enumeration after we've got the first @@ -374,7 +387,7 @@ typedef struct st_join_table { POSITION::sj_strategy field. This field is set up by the fix_semijoin_strategies_for_picked_join_order. */ - uint sj_strategy; + enum sj_strategy_enum sj_strategy; uint n_sj_tables; @@ -500,6 +513,214 @@ enum_nested_loop_state end_write_group(JOIN *join, JOIN_TAB *join_tab __attribute__((unused)), bool end_of_records); +/* psergey */ + + +struct st_position; + +class Semi_join_strategy_picker +{ +public: + /* Called when starting to build a new join prefix */ + virtual void set_empty() = 0; + + /* + Update internal state after another table has been added to the join + prefix + */ + virtual void set_from_prev(struct st_position *prev) = 0; + + virtual bool check_qep(JOIN *join, + uint idx, + table_map remaining_tables, + const JOIN_TAB *new_join_tab, + double *record_count, + double *read_time, + table_map *handled_fanout, + sj_strategy_enum *strategy, + struct st_position *loose_scan_pos) = 0; + + virtual void mark_used() = 0; + + virtual ~Semi_join_strategy_picker() {} +}; + + +/* + Duplicate Weedout strategy optimization state +*/ + +class Duplicate_weedout_picker : public Semi_join_strategy_picker +{ + /* The first table that the strategy will need to handle */ + uint first_dupsweedout_table; + + /* + Tables that we will need to have in the prefix to do the weedout step + (all inner and all outer that the involved semi-joins are correlated with) + */ + table_map dupsweedout_tables; + + bool is_used; +public: + void set_empty() + { + dupsweedout_tables= 0; + first_dupsweedout_table= MAX_TABLES; + is_used= FALSE; + } + void set_from_prev(struct st_position *prev); + + bool check_qep(JOIN *join, + uint idx, + table_map remaining_tables, + const JOIN_TAB *new_join_tab, + double *record_count, + double *read_time, + table_map *handled_fanout, + sj_strategy_enum *stratey, + struct st_position *loose_scan_pos); + + void mark_used() { is_used= TRUE; } + friend void fix_semijoin_strategies_for_picked_join_order(JOIN *join); +}; + + +class Firstmatch_picker : public Semi_join_strategy_picker +{ + /* + Index of the first inner table that we intend to handle with this + strategy + */ + uint first_firstmatch_table; + /* + Tables that were not in the join prefix when we've started considering + FirstMatch strategy. + */ + table_map first_firstmatch_rtbl; + /* + Tables that need to be in the prefix before we can calculate the cost + of using FirstMatch strategy. + */ + table_map firstmatch_need_tables; + + bool is_used; + + bool in_firstmatch_prefix() { return (first_firstmatch_table != MAX_TABLES); } + void invalidate_firstmatch_prefix() { first_firstmatch_table= MAX_TABLES; } +public: + void set_empty() + { + invalidate_firstmatch_prefix(); + is_used= FALSE; + } + + void set_from_prev(struct st_position *prev); + bool check_qep(JOIN *join, + uint idx, + table_map remaining_tables, + const JOIN_TAB *new_join_tab, + double *record_count, + double *read_time, + table_map *handled_fanout, + sj_strategy_enum *strategy, + struct st_position *loose_scan_pos); + + void mark_used() { is_used= TRUE; } + friend void fix_semijoin_strategies_for_picked_join_order(JOIN *join); +}; + + +class LooseScan_picker : public Semi_join_strategy_picker +{ + /* The first (i.e. driving) table we're doing loose scan for */ + uint first_loosescan_table; + /* + Tables that need to be in the prefix before we can calculate the cost + of using LooseScan strategy. + */ + table_map loosescan_need_tables; + + /* + keyno - Planning to do LooseScan on this key. If keyuse is NULL then + this is a full index scan, otherwise this is a ref+loosescan + scan (and keyno matches the KEUSE's) + MAX_KEY - Not doing a LooseScan + */ + uint loosescan_key; // final (one for strategy instance ) + uint loosescan_parts; /* Number of keyparts to be kept distinct */ + + bool is_used; +public: + void set_empty() + { + first_loosescan_table= MAX_TABLES; + is_used= FALSE; + } + + void set_from_prev(struct st_position *prev); + bool check_qep(JOIN *join, + uint idx, + table_map remaining_tables, + const JOIN_TAB *new_join_tab, + double *record_count, + double *read_time, + table_map *handled_fanout, + sj_strategy_enum *strategy, + struct st_position *loose_scan_pos); + void mark_used() { is_used= TRUE; } + + friend class Loose_scan_opt; + friend void best_access_path(JOIN *join, + JOIN_TAB *s, + table_map remaining_tables, + uint idx, + bool disable_jbuf, + double record_count, + struct st_position *pos, + struct st_position *loose_scan_pos); + friend bool get_best_combination(JOIN *join); + friend int setup_semijoin_dups_elimination(JOIN *join, ulonglong options, + uint no_jbuf_after); + friend void fix_semijoin_strategies_for_picked_join_order(JOIN *join); +}; + + +class Sj_materialization_picker : public Semi_join_strategy_picker +{ + bool is_used; + + /* The last inner table (valid once we're after it) */ + uint sjm_scan_last_inner; + /* + Tables that we need to have in the prefix to calculate the correct cost. + Basically, we need all inner tables and outer tables mentioned in the + semi-join's ON expression so we can correctly account for fanout. + */ + table_map sjm_scan_need_tables; + +public: + void set_empty() + { + sjm_scan_need_tables= 0; + LINT_INIT(sjm_scan_last_inner); + is_used= FALSE; + } + void set_from_prev(struct st_position *prev); + bool check_qep(JOIN *join, + uint idx, + table_map remaining_tables, + const JOIN_TAB *new_join_tab, + double *record_count, + double *read_time, + table_map *handled_fanout, + sj_strategy_enum *strategy, + struct st_position *loose_scan_pos); + void mark_used() { is_used= TRUE; } + + friend void fix_semijoin_strategies_for_picked_join_order(JOIN *join); +}; + /** Information about a position of table within a join order. Used in join @@ -507,6 +728,9 @@ end_write_group(JOIN *join, JOIN_TAB *join_tab __attribute__((unused)), */ typedef struct st_position { + /* The table that's put into join order */ + JOIN_TAB *table; + /* The "fanout": number of output rows that will be produced (after pushed down selection condition is applied) per each row combination of @@ -520,7 +744,10 @@ typedef struct st_position number the access method will be invoked. */ double read_time; - JOIN_TAB *table; + + /* Cumulative cost and record count for the join prefix */ + COST_VECT prefix_cost; + double prefix_record_count; /* NULL - 'index' or 'range' or 'index_merge' or 'ALL' access is used. @@ -530,14 +757,13 @@ typedef struct st_position /* If ref-based access is used: bitmap of tables this table depends on */ table_map ref_depend_map; - - bool use_join_buffer; - - - /* These form a stack of partial join order costs and output sizes */ - COST_VECT prefix_cost; - double prefix_record_count; - + + /* + TRUE <=> join buffering will be used. At the moment this is based on + *very* imprecise guesses made in best_access_path(). + */ + bool use_join_buffer; + /* Current optimization state: Semi-join strategy to be used for this and preceding join tables. @@ -550,7 +776,8 @@ typedef struct st_position this applies to. The values of covered_preceding_positions->sj_strategy must be ignored. */ - uint sj_strategy; + enum sj_strategy_enum sj_strategy; + /* Valid only after fix_semijoin_strategies_for_picked_join_order() call: if sj_strategy!=SJ_OPT_NONE, this is the number of subsequent tables that @@ -558,67 +785,15 @@ typedef struct st_position */ uint n_sj_tables; -/* LooseScan strategy members */ - - /* The first (i.e. driving) table we're doing loose scan for */ - uint first_loosescan_table; - /* - Tables that need to be in the prefix before we can calculate the cost - of using LooseScan strategy. - */ - table_map loosescan_need_tables; - - /* - keyno - Planning to do LooseScan on this key. If keyuse is NULL then - this is a full index scan, otherwise this is a ref+loosescan - scan (and keyno matches the KEUSE's) - MAX_KEY - Not doing a LooseScan - */ - uint loosescan_key; // final (one for strategy instance ) - uint loosescan_parts; /* Number of keyparts to be kept distinct */ - -/* FirstMatch strategy */ - /* - Index of the first inner table that we intend to handle with this - strategy - */ - uint first_firstmatch_table; - /* - Tables that were not in the join prefix when we've started considering - FirstMatch strategy. - */ - table_map first_firstmatch_rtbl; - /* - Tables that need to be in the prefix before we can calculate the cost - of using FirstMatch strategy. - */ - table_map firstmatch_need_tables; - - bool in_firstmatch_prefix() { return (first_firstmatch_table != MAX_TABLES); } - void invalidate_firstmatch_prefix() { first_firstmatch_table= MAX_TABLES; } - -/* Duplicate Weedout strategy */ - /* The first table that the strategy will need to handle */ - uint first_dupsweedout_table; - /* - Tables that we will need to have in the prefix to do the weedout step - (all inner and all outer that the involved semi-joins are correlated with) - */ - table_map dupsweedout_tables; - -/* SJ-Materialization-Scan strategy */ - /* The last inner table (valid once we're after it) */ - uint sjm_scan_last_inner; - /* - Tables that we need to have in the prefix to calculate the correct cost. - Basically, we need all inner tables and outer tables mentioned in the - semi-join's ON expression so we can correctly account for fanout. - */ - table_map sjm_scan_need_tables; - table_map prefix_dups_producing_tables; -} POSITION; + table_map inner_tables_handled_with_other_sjs; + + Duplicate_weedout_picker dups_weedout_picker; + Firstmatch_picker firstmatch_picker; + LooseScan_picker loosescan_picker; + Sj_materialization_picker sjmat_picker; +} POSITION; typedef struct st_rollup { @@ -630,18 +805,6 @@ typedef struct st_rollup } ROLLUP; -#define SJ_OPT_NONE 0 -#define SJ_OPT_DUPS_WEEDOUT 1 -#define SJ_OPT_LOOSE_SCAN 2 -#define SJ_OPT_FIRST_MATCH 3 -#define SJ_OPT_MATERIALIZE 4 -#define SJ_OPT_MATERIALIZE_SCAN 5 - -inline bool sj_is_materialize_strategy(uint strategy) -{ - return strategy >= SJ_OPT_MATERIALIZE; -} - class JOIN_TAB_RANGE: public Sql_alloc { public: @@ -812,7 +975,7 @@ public: they produce. */ table_map cur_dups_producing_tables; - + /* We also maintain a stack of join optimization states in * join->positions[] */ /******* Join optimization state members end *******/ /* |