summaryrefslogtreecommitdiff
path: root/sql
diff options
context:
space:
mode:
authorSergey Petrunya <psergey@askmonty.org>2011-11-23 04:25:52 +0400
committerSergey Petrunya <psergey@askmonty.org>2011-11-23 04:25:52 +0400
commit694ce95557bc174dae2bc32279024102848cf5ee (patch)
treea8bfdfa04a772a3eb4b44fee12a28624102bde71 /sql
parent7f746fbe74e08d79217bdf7c7cba628e3b6bef85 (diff)
downloadmariadb-git-694ce95557bc174dae2bc32279024102848cf5ee.tar.gz
Semi-join optimizations code cleanup:
- Break down POSITION/advance_sj_state() into four classes representing potential semi-join strategies. - Treat all strategies uniformly (before, DuplicateWeedout was special as it was the catch-all strategy. Now, we're still relying on it to be the catch-all, but are able to function,e.g. with firstmatch=on,duplicate_weedout=off. - Update test results (checked)
Diffstat (limited to 'sql')
-rw-r--r--sql/opt_subselect.cc784
-rw-r--r--sql/opt_subselect.h7
-rw-r--r--sql/sql_select.cc23
-rw-r--r--sql/sql_select.h327
4 files changed, 716 insertions, 425 deletions
diff --git a/sql/opt_subselect.cc b/sql/opt_subselect.cc
index fdda502e706..c7d07c21746 100644
--- a/sql/opt_subselect.cc
+++ b/sql/opt_subselect.cc
@@ -2168,229 +2168,178 @@ bool find_eq_ref_candidate(TABLE *table, table_map sj_inner_tables)
See setup_semijoin_dups_elimination() for a description of what kinds of
join prefixes each strategy can handle.
*/
+bool is_multiple_semi_joins(POSITION *prefix, uint idx, table_map inner_tables)
+{
+ for (int i= (int)idx; i >= 0; i--)
+ {
+ TABLE_LIST *emb_sj_nest;
+ if ((emb_sj_nest= prefix[i].table->emb_sj_nest))
+ {
+ if (inner_tables & emb_sj_nest->sj_inner_tables)
+ return !test(inner_tables == emb_sj_nest->sj_inner_tables);
+ }
+ }
+ return FALSE;
+}
-void advance_sj_state(JOIN *join, table_map remaining_tables,
- const JOIN_TAB *new_join_tab, uint idx,
- double *current_record_count, double *current_read_time,
+
+void advance_sj_state(JOIN *join, table_map remaining_tables, uint idx,
+ double *current_record_count, double *current_read_time,
POSITION *loose_scan_pos)
{
- TABLE_LIST *emb_sj_nest;
POSITION *pos= join->positions + idx;
- remaining_tables &= ~new_join_tab->table->map;
- bool disable_jbuf= join->thd->variables.join_cache_level == 0;
-
- pos->prefix_cost.convert_from_cost(*current_read_time);
- pos->prefix_record_count= *current_record_count;
- pos->sj_strategy= SJ_OPT_NONE;
-
- pos->prefix_dups_producing_tables= join->cur_dups_producing_tables;
+ const JOIN_TAB *new_join_tab= pos->table;
+ Semi_join_strategy_picker *pickers[]=
+ {
+ &pos->firstmatch_picker,
+ &pos->loosescan_picker,
+ &pos->sjmat_picker,
+ &pos->dups_weedout_picker,
+ NULL,
+ };
- /* We're performing optimization inside SJ-Materialization nest */
if (join->emb_sjm_nest)
{
- pos->invalidate_firstmatch_prefix();
- pos->first_loosescan_table= MAX_TABLES;
- pos->dupsweedout_tables= 0;
- pos->sjm_scan_need_tables= 0;
+ /*
+ We're performing optimization inside SJ-Materialization nest:
+ - there are no other semi-joins inside semi-join nests
+ - attempts to build semi-join strategies here will confuse
+ the optimizer, so bail out.
+ */
return;
}
- /* Initialize the state or copy it from prev. tables */
- if (idx == join->const_tables)
+ /*
+ Update join->cur_sj_inner_tables (Used by FirstMatch in this function and
+ LooseScan detector in best_access_path)
+ */
+ remaining_tables &= ~new_join_tab->table->map;
+ pos->prefix_dups_producing_tables= join->cur_dups_producing_tables;
+ TABLE_LIST *emb_sj_nest;
+ if ((emb_sj_nest= new_join_tab->emb_sj_nest))
{
- pos->invalidate_firstmatch_prefix();
- pos->first_loosescan_table= MAX_TABLES;
- pos->dupsweedout_tables= 0;
- pos->sjm_scan_need_tables= 0;
- LINT_INIT(pos->sjm_scan_last_inner);
+ /// join->cur_sj_inner_tables |= emb_sj_nest->sj_inner_tables;
+ join->cur_dups_producing_tables |= emb_sj_nest->sj_inner_tables;
+
+ /* Remove the sj_nest if all of its SJ-inner tables are in cur_table_map */
+ /// if (!(remaining_tables &
+ /// emb_sj_nest->sj_inner_tables & ~new_join_tab->table->map))
+ /// join->cur_sj_inner_tables &= ~emb_sj_nest->sj_inner_tables;
}
- else
+
+ Semi_join_strategy_picker **strategy;
+ if (idx == join->const_tables)
{
- // FirstMatch
- pos->first_firstmatch_table=
- (pos[-1].sj_strategy == SJ_OPT_FIRST_MATCH) ?
- MAX_TABLES : pos[-1].first_firstmatch_table;
- pos->first_firstmatch_rtbl= pos[-1].first_firstmatch_rtbl;
- pos->firstmatch_need_tables= pos[-1].firstmatch_need_tables;
-
- // LooseScan
- pos->first_loosescan_table=
- (pos[-1].sj_strategy == SJ_OPT_LOOSE_SCAN) ?
- MAX_TABLES : pos[-1].first_loosescan_table;
- pos->loosescan_need_tables= pos[-1].loosescan_need_tables;
-
- // SJ-Materialization Scan
- pos->sjm_scan_need_tables=
- (pos[-1].sj_strategy == SJ_OPT_MATERIALIZE_SCAN) ?
- 0 : pos[-1].sjm_scan_need_tables;
- pos->sjm_scan_last_inner= pos[-1].sjm_scan_last_inner;
-
- // Duplicate Weedout
- pos->dupsweedout_tables= pos[-1].dupsweedout_tables;
- pos->first_dupsweedout_table= pos[-1].first_dupsweedout_table;
+ /* First table, initialize pickers */
+ for (strategy= pickers; *strategy != NULL; strategy++)
+ (*strategy)->set_empty();
+ pos->inner_tables_handled_with_other_sjs= 0;
}
-
- table_map handled_by_fm_or_ls= 0;
- /* FirstMatch Strategy */
- if (new_join_tab->emb_sj_nest &&
- optimizer_flag(join->thd, OPTIMIZER_SWITCH_FIRSTMATCH) &&
- !join->outer_join)
+ else
{
- const table_map outer_corr_tables=
- new_join_tab->emb_sj_nest->nested_join->sj_corr_tables |
- new_join_tab->emb_sj_nest->nested_join->sj_depends_on;
- const table_map sj_inner_tables=
- new_join_tab->emb_sj_nest->sj_inner_tables & ~join->const_table_map;
-
- /*
- Enter condition:
- 1. The next join tab belongs to semi-join nest
- (verified for the encompassing code block above).
- 2. We're not in a duplicate producer range yet
- 3. All outer tables that
- - the subquery is correlated with, or
- - referred to from the outer_expr
- are in the join prefix
- 4. All inner tables are still part of remaining_tables.
- */
- if (!join->cur_sj_inner_tables && // (2)
- !(remaining_tables & outer_corr_tables) && // (3)
- (sj_inner_tables == // (4)
- ((remaining_tables | new_join_tab->table->map) & sj_inner_tables)))
+ for (strategy= pickers; *strategy != NULL; strategy++)
{
- /* Start tracking potential FirstMatch range */
- pos->first_firstmatch_table= idx;
- pos->firstmatch_need_tables= sj_inner_tables;
- pos->first_firstmatch_rtbl= remaining_tables;
+ (*strategy)->set_from_prev(pos - 1);
}
+ pos->inner_tables_handled_with_other_sjs=
+ pos[-1].inner_tables_handled_with_other_sjs;
+ }
+
+ pos->prefix_cost.convert_from_cost(*current_read_time);
+ pos->prefix_record_count= *current_record_count;
+
+ {
+ pos->sj_strategy= SJ_OPT_NONE;
- if (pos->in_firstmatch_prefix())
+ for (strategy= pickers; *strategy != NULL; strategy++)
{
- if (outer_corr_tables & pos->first_firstmatch_rtbl)
- {
- /*
- Trying to add an sj-inner table whose sj-nest has an outer correlated
- table that was not in the prefix. This means FirstMatch can't be used.
- */
- pos->invalidate_firstmatch_prefix();
- }
- else
- {
- /* Record that we need all of this semi-join's inner tables, too */
- pos->firstmatch_need_tables|= sj_inner_tables;
- }
-
- if (pos->in_firstmatch_prefix() &&
- !(pos->firstmatch_need_tables & remaining_tables))
+ table_map handled_fanout;
+ sj_strategy_enum sj_strategy;
+ double rec_count= *current_record_count;
+ double read_time= *current_read_time;
+ if ((*strategy)->check_qep(join, idx, remaining_tables,
+ new_join_tab,
+ &rec_count,
+ &read_time,
+ &handled_fanout,
+ &sj_strategy,
+ loose_scan_pos))
{
/*
- Got a complete FirstMatch range.
- Calculate correct costs and fanout
- */
- optimize_wo_join_buffering(join, pos->first_firstmatch_table, idx,
- remaining_tables, FALSE, idx,
- current_record_count,
- current_read_time);
- /*
- We don't yet know what are the other strategies, so pick the
- FirstMatch.
-
- We ought to save the alternate POSITIONs produced by
- optimize_wo_join_buffering but the problem is that providing save
- space uses too much space. Instead, we will re-calculate the
- alternate POSITIONs after we've picked the best QEP.
+ It's possible to use the strategy. Use it, if
+ - it removes semi-join fanout that was not removed before
+ - using it is cheaper than using something else,
+ and {if some other strategy has removed fanout
+ that this strategy is trying to remove, then it
+ did remove the fanout only for one semi-join}
+ This is to avoid a situation when
+ 1. strategy X removes fanout for semijoin X,Y
+ 2. using strategy Z is cheaper, but it only removes
+ fanout from semijoin X.
+ 3. We have no clue what to do about fanount of semi-join Y.
*/
- pos->sj_strategy= SJ_OPT_FIRST_MATCH;
- handled_by_fm_or_ls= pos->firstmatch_need_tables;
+ if ((join->cur_dups_producing_tables & handled_fanout) ||
+ (read_time < *current_read_time &&
+ !(handled_fanout & pos->inner_tables_handled_with_other_sjs)))
+ {
+ /* Mark strategy as used */
+ (*strategy)->mark_used();
+ pos->sj_strategy= sj_strategy;
+ *current_read_time= read_time;
+ *current_record_count= rec_count;
+ join->cur_dups_producing_tables &= ~handled_fanout;
+ //TODO: update bitmap of semi-joins that were handled together with
+ // others.
+ if (is_multiple_semi_joins(join->positions, idx, handled_fanout))
+ pos->inner_tables_handled_with_other_sjs |= handled_fanout;
+ }
+ else
+ {
+ /* We decided not to apply the strategy. */
+ (*strategy)->set_empty();
+ }
}
}
}
- /* LooseScan Strategy */
- {
- POSITION *first=join->positions+pos->first_loosescan_table;
- /*
- LooseScan strategy can't handle interleaving between tables from the
- semi-join that LooseScan is handling and any other tables.
-
- If we were considering LooseScan for the join prefix (1)
- and the table we're adding creates an interleaving (2)
- then
- stop considering loose scan
- */
- if ((pos->first_loosescan_table != MAX_TABLES) && // (1)
- (first->table->emb_sj_nest->sj_inner_tables & remaining_tables) && //(2)
- new_join_tab->emb_sj_nest != first->table->emb_sj_nest) //(2)
- {
- pos->first_loosescan_table= MAX_TABLES;
- }
-
- /*
- If we got an option to use LooseScan for the current table, start
- considering using LooseScan strategy
- */
- if (loose_scan_pos->read_time != DBL_MAX && !join->outer_join)
- {
- pos->first_loosescan_table= idx;
- pos->loosescan_need_tables=
- new_join_tab->emb_sj_nest->sj_inner_tables |
- new_join_tab->emb_sj_nest->nested_join->sj_depends_on |
- new_join_tab->emb_sj_nest->nested_join->sj_corr_tables;
- }
-
- if ((pos->first_loosescan_table != MAX_TABLES) &&
- !(remaining_tables & pos->loosescan_need_tables) &&
- (pos->table->table->map & pos->loosescan_need_tables))
- {
- /*
- Ok we have LooseScan plan and also have all LooseScan sj-nest's
- inner tables and outer correlated tables into the prefix.
- */
-
- first=join->positions + pos->first_loosescan_table;
- uint n_tables= my_count_bits(first->table->emb_sj_nest->sj_inner_tables);
- /* Got a complete LooseScan range. Calculate its cost */
- /*
- The same problem as with FirstMatch - we need to save POSITIONs
- somewhere but reserving space for all cases would require too
- much space. We will re-calculate POSITION structures later on.
- */
- optimize_wo_join_buffering(join, pos->first_loosescan_table, idx,
- remaining_tables,
- TRUE, //first_alt
- disable_jbuf ? join->table_count :
- pos->first_loosescan_table + n_tables,
- current_record_count,
- current_read_time);
- /*
- We don't yet have any other strategies that could handle this
- semi-join nest (the other options are Duplicate Elimination or
- Materialization, which need at least the same set of tables in
- the join prefix to be considered) so unconditionally pick the
- LooseScan.
- */
- pos->sj_strategy= SJ_OPT_LOOSE_SCAN;
- handled_by_fm_or_ls= first->table->emb_sj_nest->sj_inner_tables;
- }
- }
-
- /*
- Update join->cur_sj_inner_tables (Used by FirstMatch in this function and
- LooseScan detector in best_access_path)
- */
if ((emb_sj_nest= new_join_tab->emb_sj_nest))
{
join->cur_sj_inner_tables |= emb_sj_nest->sj_inner_tables;
- join->cur_dups_producing_tables |= emb_sj_nest->sj_inner_tables;
/* Remove the sj_nest if all of its SJ-inner tables are in cur_table_map */
if (!(remaining_tables &
emb_sj_nest->sj_inner_tables & ~new_join_tab->table->map))
join->cur_sj_inner_tables &= ~emb_sj_nest->sj_inner_tables;
}
- join->cur_dups_producing_tables &= ~handled_by_fm_or_ls;
- /* 4. SJ-Materialization and SJ-Materialization-scan strategy handler */
+ pos->prefix_cost.convert_from_cost(*current_read_time);
+ pos->prefix_record_count= *current_record_count;
+}
+
+
+void Sj_materialization_picker::set_from_prev(struct st_position *prev)
+{
+ if (prev->sjmat_picker.is_used)
+ set_empty();
+ else
+ {
+ sjm_scan_need_tables= prev->sjmat_picker.sjm_scan_need_tables;
+ sjm_scan_last_inner= prev->sjmat_picker.sjm_scan_last_inner;
+ }
+ is_used= FALSE;
+}
+
+
+bool Sj_materialization_picker::check_qep(JOIN *join,
+ uint idx,
+ table_map remaining_tables,
+ const JOIN_TAB *new_join_tab,
+ double *record_count,
+ double *read_time,
+ table_map *handled_fanout,
+ sj_strategy_enum *strategy,
+ POSITION *loose_scan_pos)
+{
bool sjm_scan;
SJ_MATERIALIZATION_INFO *mat_info;
if ((mat_info= at_sjmat_pos(join, remaining_tables,
@@ -2417,11 +2366,11 @@ void advance_sj_state(JOIN *join, table_map remaining_tables,
The simple way to model this is to remove SJM-SCAN(...) fanout once
we reach the point #2.
*/
- pos->sjm_scan_need_tables=
+ sjm_scan_need_tables=
new_join_tab->emb_sj_nest->sj_inner_tables |
new_join_tab->emb_sj_nest->nested_join->sj_depends_on |
new_join_tab->emb_sj_nest->nested_join->sj_corr_tables;
- pos->sjm_scan_last_inner= idx;
+ sjm_scan_last_inner= idx;
}
else
{
@@ -2444,34 +2393,31 @@ void advance_sj_state(JOIN *join, table_map remaining_tables,
mat_read_time += mat_info->materialization_cost.total_cost() +
prefix_rec_count * mat_info->lookup_cost.total_cost();
- if (mat_read_time < *current_read_time || join->cur_dups_producing_tables)
- {
- /*
- NOTE: When we pick to use SJM[-Scan] we don't memcpy its POSITION
- elements to join->positions as that makes it hard to return things
- back when making one step back in join optimization. That's done
- after the QEP has been chosen.
- */
- pos->sj_strategy= SJ_OPT_MATERIALIZE;
- *current_read_time= mat_read_time;
- *current_record_count= prefix_rec_count;
- join->cur_dups_producing_tables&=
- ~new_join_tab->emb_sj_nest->sj_inner_tables;
- }
+ /*
+ NOTE: When we pick to use SJM[-Scan] we don't memcpy its POSITION
+ elements to join->positions as that makes it hard to return things
+ back when making one step back in join optimization. That's done
+ after the QEP has been chosen.
+ */
+ *read_time= mat_read_time;
+ *record_count= prefix_rec_count;
+ *handled_fanout= new_join_tab->emb_sj_nest->sj_inner_tables;
+ *strategy= SJ_OPT_MATERIALIZE;
+ return TRUE;
}
}
/* 4.A SJM-Scan second phase check */
- if (pos->sjm_scan_need_tables && /* Have SJM-Scan prefix */
- !(pos->sjm_scan_need_tables & remaining_tables))
+ if (sjm_scan_need_tables && /* Have SJM-Scan prefix */
+ !(sjm_scan_need_tables & remaining_tables))
{
TABLE_LIST *mat_nest=
- join->positions[pos->sjm_scan_last_inner].table->emb_sj_nest;
+ join->positions[sjm_scan_last_inner].table->emb_sj_nest;
SJ_MATERIALIZATION_INFO *mat_info= mat_nest->sj_mat_info;
double prefix_cost;
double prefix_rec_count;
- int first_tab= pos->sjm_scan_last_inner + 1 - mat_info->tables;
+ int first_tab= sjm_scan_last_inner + 1 - mat_info->tables;
/* Get the prefix cost */
if (first_tab == (int)join->const_tables)
{
@@ -2496,6 +2442,7 @@ void advance_sj_state(JOIN *join, table_map remaining_tables,
POSITION curpos, dummy;
/* Need to re-run best-access-path as we prefix_rec_count has changed */
+ bool disable_jbuf= (join->thd->variables.join_cache_level == 0);
for (i= first_tab + mat_info->tables; i <= idx; i++)
{
best_access_path(join, join->positions[i].table, rem_tables, i,
@@ -2504,142 +2451,333 @@ void advance_sj_state(JOIN *join, table_map remaining_tables,
prefix_cost += curpos.read_time;
}
+ *strategy= SJ_OPT_MATERIALIZE_SCAN;
+ *read_time= prefix_cost;
+ *record_count= prefix_rec_count;
+ *handled_fanout= mat_nest->sj_inner_tables;
+ return TRUE;
+ }
+ return FALSE;
+}
+
+
+void LooseScan_picker::set_from_prev(struct st_position *prev)
+{
+ if (prev->loosescan_picker.is_used)
+ set_empty();
+ else
+ {
+ first_loosescan_table= prev->loosescan_picker.first_loosescan_table;
+ loosescan_need_tables= prev->loosescan_picker.loosescan_need_tables;
+ }
+ is_used= FALSE;
+}
+
+
+bool LooseScan_picker::check_qep(JOIN *join,
+ uint idx,
+ table_map remaining_tables,
+ const JOIN_TAB *new_join_tab,
+ double *record_count,
+ double *read_time,
+ table_map *handled_fanout,
+ sj_strategy_enum *strategy,
+ struct st_position *loose_scan_pos)
+{
+ POSITION *first= join->positions + first_loosescan_table;
+ /*
+ LooseScan strategy can't handle interleaving between tables from the
+ semi-join that LooseScan is handling and any other tables.
+
+ If we were considering LooseScan for the join prefix (1)
+ and the table we're adding creates an interleaving (2)
+ then
+ stop considering loose scan
+ */
+ if ((first_loosescan_table != MAX_TABLES) && // (1)
+ (first->table->emb_sj_nest->sj_inner_tables & remaining_tables) && //(2)
+ new_join_tab->emb_sj_nest != first->table->emb_sj_nest) //(2)
+ {
+ first_loosescan_table= MAX_TABLES;
+ }
+
+ /*
+ If we got an option to use LooseScan for the current table, start
+ considering using LooseScan strategy
+ */
+ if (loose_scan_pos->read_time != DBL_MAX && !join->outer_join)
+ {
+ first_loosescan_table= idx;
+ loosescan_need_tables=
+ new_join_tab->emb_sj_nest->sj_inner_tables |
+ new_join_tab->emb_sj_nest->nested_join->sj_depends_on |
+ new_join_tab->emb_sj_nest->nested_join->sj_corr_tables;
+ }
+
+ if ((first_loosescan_table != MAX_TABLES) &&
+ !(remaining_tables & loosescan_need_tables) &&
+ (new_join_tab->table->map & loosescan_need_tables))
+ {
+ /*
+ Ok we have LooseScan plan and also have all LooseScan sj-nest's
+ inner tables and outer correlated tables into the prefix.
+ */
+
+ first= join->positions + first_loosescan_table;
+ uint n_tables= my_count_bits(first->table->emb_sj_nest->sj_inner_tables);
+ /* Got a complete LooseScan range. Calculate its cost */
/*
- Use the strategy if
- * it is cheaper then what we've had, or
- * we haven't picked any other semi-join strategy yet
- In the second case, we pick this strategy unconditionally because
- comparing cost without semi-join duplicate removal with cost with
- duplicate removal is not an apples-to-apples comparison.
+ The same problem as with FirstMatch - we need to save POSITIONs
+ somewhere but reserving space for all cases would require too
+ much space. We will re-calculate POSITION structures later on.
*/
- if (prefix_cost < *current_read_time || join->cur_dups_producing_tables)
- {
- pos->sj_strategy= SJ_OPT_MATERIALIZE_SCAN;
- *current_read_time= prefix_cost;
- *current_record_count= prefix_rec_count;
- join->cur_dups_producing_tables&= ~mat_nest->sj_inner_tables;
+ bool disable_jbuf= (join->thd->variables.join_cache_level == 0);
+ optimize_wo_join_buffering(join, first_loosescan_table, idx,
+ remaining_tables,
+ TRUE, //first_alt
+ disable_jbuf ? join->table_count :
+ first_loosescan_table + n_tables,
+ record_count,
+ read_time);
+ /*
+ We don't yet have any other strategies that could handle this
+ semi-join nest (the other options are Duplicate Elimination or
+ Materialization, which need at least the same set of tables in
+ the join prefix to be considered) so unconditionally pick the
+ LooseScan.
+ */
+ *strategy= SJ_OPT_LOOSE_SCAN;
+ *handled_fanout= first->table->emb_sj_nest->sj_inner_tables;
+ return TRUE;
+ }
+ return FALSE;
+}
- }
+void Firstmatch_picker::set_from_prev(struct st_position *prev)
+{
+ if (prev->firstmatch_picker.is_used)
+ invalidate_firstmatch_prefix();
+ else
+ {
+ first_firstmatch_table= prev->firstmatch_picker.first_firstmatch_table;
+ first_firstmatch_rtbl= prev->firstmatch_picker.first_firstmatch_rtbl;
+ firstmatch_need_tables= prev->firstmatch_picker.firstmatch_need_tables;
}
+ is_used= FALSE;
+}
- /* 5. Duplicate Weedout strategy handler */
+bool Firstmatch_picker::check_qep(JOIN *join,
+ uint idx,
+ table_map remaining_tables,
+ const JOIN_TAB *new_join_tab,
+ double *record_count,
+ double *read_time,
+ table_map *handled_fanout,
+ sj_strategy_enum *strategy,
+ POSITION *loose_scan_pos)
+{
+ if (new_join_tab->emb_sj_nest &&
+ optimizer_flag(join->thd, OPTIMIZER_SWITCH_FIRSTMATCH) &&
+ !join->outer_join)
{
+ const table_map outer_corr_tables=
+ new_join_tab->emb_sj_nest->nested_join->sj_corr_tables |
+ new_join_tab->emb_sj_nest->nested_join->sj_depends_on;
+ const table_map sj_inner_tables=
+ new_join_tab->emb_sj_nest->sj_inner_tables & ~join->const_table_map;
+
/*
- Duplicate weedout can be applied after all ON-correlated and
- correlated
+ Enter condition:
+ 1. The next join tab belongs to semi-join nest
+ (verified for the encompassing code block above).
+ 2. We're not in a duplicate producer range yet
+ 3. All outer tables that
+ - the subquery is correlated with, or
+ - referred to from the outer_expr
+ are in the join prefix
+ 4. All inner tables are still part of remaining_tables.
*/
- TABLE_LIST *nest;
- if ((nest= new_join_tab->emb_sj_nest))
- {
- if (!pos->dupsweedout_tables)
- pos->first_dupsweedout_table= idx;
-
- pos->dupsweedout_tables |= nest->sj_inner_tables |
- nest->nested_join->sj_depends_on |
- nest->nested_join->sj_corr_tables;
- }
-
- if (pos->dupsweedout_tables)
+ if (!join->cur_sj_inner_tables && // (2)
+ !(remaining_tables & outer_corr_tables) && // (3)
+ (sj_inner_tables == // (4)
+ ((remaining_tables | new_join_tab->table->map) & sj_inner_tables)))
{
- /* we're in the process of constructing a DuplicateWeedout range */
- TABLE_LIST *emb= new_join_tab->table->pos_in_table_list->embedding;
- /* and we've entered an inner side of an outer join*/
- if (emb && emb->on_expr)
- pos->dupsweedout_tables |= emb->nested_join->used_tables;
+ /* Start tracking potential FirstMatch range */
+ first_firstmatch_table= idx;
+ firstmatch_need_tables= sj_inner_tables;
+ first_firstmatch_rtbl= remaining_tables;
}
- if (pos->dupsweedout_tables &&
- !(remaining_tables &
- ~new_join_tab->table->map & pos->dupsweedout_tables))
+ if (in_firstmatch_prefix())
{
- /*
- Ok, reached a state where we could put a dups weedout point.
- Walk back and calculate
- - the join cost (this is needed as the accumulated cost may assume
- some other duplicate elimination method)
- - extra fanout that will be removed by duplicate elimination
- - duplicate elimination cost
- There are two cases:
- 1. We have other strategy/ies to remove all of the duplicates.
- 2. We don't.
-
- We need to calculate the cost in case #2 also because we need to make
- choice between this join order and others.
- */
- uint first_tab= pos->first_dupsweedout_table;
- double dups_cost;
- double prefix_rec_count;
- double sj_inner_fanout= 1.0;
- double sj_outer_fanout= 1.0;
- uint temptable_rec_size;
- if (first_tab == join->const_tables)
+ if (outer_corr_tables & first_firstmatch_rtbl)
{
- prefix_rec_count= 1.0;
- temptable_rec_size= 0;
- dups_cost= 0.0;
+ /*
+ Trying to add an sj-inner table whose sj-nest has an outer correlated
+ table that was not in the prefix. This means FirstMatch can't be used.
+ */
+ invalidate_firstmatch_prefix();
}
else
{
- dups_cost= join->positions[first_tab - 1].prefix_cost.total_cost();
- prefix_rec_count= join->positions[first_tab - 1].prefix_record_count;
- temptable_rec_size= 8; /* This is not true but we'll make it so */
+ /* Record that we need all of this semi-join's inner tables, too */
+ firstmatch_need_tables|= sj_inner_tables;
}
-
- table_map dups_removed_fanout= 0;
- for (uint j= pos->first_dupsweedout_table; j <= idx; j++)
+
+ if (in_firstmatch_prefix() &&
+ !(firstmatch_need_tables & remaining_tables))
{
- POSITION *p= join->positions + j;
- dups_cost += p->read_time;
- if (p->table->emb_sj_nest)
- {
- sj_inner_fanout *= p->records_read;
- dups_removed_fanout |= p->table->table->map;
- }
- else
- {
- sj_outer_fanout *= p->records_read;
- temptable_rec_size += p->table->table->file->ref_length;
- }
+ /*
+ Got a complete FirstMatch range.
+ Calculate correct costs and fanout
+ */
+ optimize_wo_join_buffering(join, first_firstmatch_table, idx,
+ remaining_tables, FALSE, idx,
+ record_count,
+ read_time);
+ /*
+ We ought to save the alternate POSITIONs produced by
+ optimize_wo_join_buffering but the problem is that providing save
+ space uses too much space. Instead, we will re-calculate the
+ alternate POSITIONs after we've picked the best QEP.
+ */
+ *handled_fanout= firstmatch_need_tables;
+ /* *record_count and *read_time were set by the above call */
+ *strategy= SJ_OPT_FIRST_MATCH;
+ return TRUE;
}
+ }
+ }
+ return FALSE;
+}
- /*
- Add the cost of temptable use. The table will have sj_outer_fanout
- records, and we will make
- - sj_outer_fanout table writes
- - sj_inner_fanout*sj_outer_fanout lookups.
- */
- double one_lookup_cost= get_tmp_table_lookup_cost(join->thd,
- sj_outer_fanout,
- temptable_rec_size);
- double one_write_cost= get_tmp_table_write_cost(join->thd,
- sj_outer_fanout,
- temptable_rec_size);
+void Duplicate_weedout_picker::set_from_prev(POSITION *prev)
+{
+ if (prev->dups_weedout_picker.is_used)
+ set_empty();
+ else
+ {
+ dupsweedout_tables= prev->dups_weedout_picker.dupsweedout_tables;
+ first_dupsweedout_table= prev->dups_weedout_picker.first_dupsweedout_table;
+ }
+ is_used= FALSE;
+}
- double write_cost= join->positions[first_tab].prefix_record_count*
- sj_outer_fanout * one_write_cost;
- double full_lookup_cost= join->positions[first_tab].prefix_record_count*
- sj_outer_fanout* sj_inner_fanout *
- one_lookup_cost;
- dups_cost += write_cost + full_lookup_cost;
+
+bool Duplicate_weedout_picker::check_qep(JOIN *join,
+ uint idx,
+ table_map remaining_tables,
+ const JOIN_TAB *new_join_tab,
+ double *record_count,
+ double *read_time,
+ table_map *handled_fanout,
+ sj_strategy_enum *strategy,
+ POSITION *loose_scan_pos
+ )
+{
+ TABLE_LIST *nest;
+ if ((nest= new_join_tab->emb_sj_nest))
+ {
+ if (!dupsweedout_tables)
+ first_dupsweedout_table= idx;
+
+ dupsweedout_tables |= nest->sj_inner_tables |
+ nest->nested_join->sj_depends_on |
+ nest->nested_join->sj_corr_tables;
+ }
+
+ if (dupsweedout_tables)
+ {
+ /* we're in the process of constructing a DuplicateWeedout range */
+ TABLE_LIST *emb= new_join_tab->table->pos_in_table_list->embedding;
+ /* and we've entered an inner side of an outer join*/
+ if (emb && emb->on_expr)
+ dupsweedout_tables |= emb->nested_join->used_tables;
+ }
+
+ /* If this is the last table that we need for DuplicateWeedout range */
+ if (dupsweedout_tables && !(remaining_tables & ~new_join_tab->table->map &
+ dupsweedout_tables))
+ {
+ /*
+ Ok, reached a state where we could put a dups weedout point.
+ Walk back and calculate
+ - the join cost (this is needed as the accumulated cost may assume
+ some other duplicate elimination method)
+ - extra fanout that will be removed by duplicate elimination
+ - duplicate elimination cost
+ There are two cases:
+ 1. We have other strategy/ies to remove all of the duplicates.
+ 2. We don't.
- /*
- Use the strategy if
- * it is cheaper then what we've had, or
- * we haven't picked any other semi-join strategy yet
- The second part is necessary because this strategy is the last one
- to consider (it needs "the most" tables in the prefix) and we can't
- leave duplicate-producing tables not handled by any strategy.
- */
- if (dups_cost < *current_read_time || join->cur_dups_producing_tables)
+ We need to calculate the cost in case #2 also because we need to make
+ choice between this join order and others.
+ */
+ uint first_tab= first_dupsweedout_table;
+ double dups_cost;
+ double prefix_rec_count;
+ double sj_inner_fanout= 1.0;
+ double sj_outer_fanout= 1.0;
+ uint temptable_rec_size;
+ if (first_tab == join->const_tables)
+ {
+ prefix_rec_count= 1.0;
+ temptable_rec_size= 0;
+ dups_cost= 0.0;
+ }
+ else
+ {
+ dups_cost= join->positions[first_tab - 1].prefix_cost.total_cost();
+ prefix_rec_count= join->positions[first_tab - 1].prefix_record_count;
+ temptable_rec_size= 8; /* This is not true but we'll make it so */
+ }
+
+ table_map dups_removed_fanout= 0;
+ for (uint j= first_dupsweedout_table; j <= idx; j++)
+ {
+ POSITION *p= join->positions + j;
+ dups_cost += p->read_time;
+ if (p->table->emb_sj_nest)
+ {
+ sj_inner_fanout *= p->records_read;
+ dups_removed_fanout |= p->table->table->map;
+ }
+ else
{
- pos->sj_strategy= SJ_OPT_DUPS_WEEDOUT;
- *current_read_time= dups_cost;
- *current_record_count= prefix_rec_count * sj_outer_fanout;
- join->cur_dups_producing_tables &= ~dups_removed_fanout;
+ sj_outer_fanout *= p->records_read;
+ temptable_rec_size += p->table->table->file->ref_length;
}
}
+
+ /*
+ Add the cost of temptable use. The table will have sj_outer_fanout
+ records, and we will make
+ - sj_outer_fanout table writes
+ - sj_inner_fanout*sj_outer_fanout lookups.
+
+ */
+ double one_lookup_cost= get_tmp_table_lookup_cost(join->thd,
+ sj_outer_fanout,
+ temptable_rec_size);
+ double one_write_cost= get_tmp_table_write_cost(join->thd,
+ sj_outer_fanout,
+ temptable_rec_size);
+
+ double write_cost= join->positions[first_tab].prefix_record_count*
+ sj_outer_fanout * one_write_cost;
+ double full_lookup_cost= join->positions[first_tab].prefix_record_count*
+ sj_outer_fanout* sj_inner_fanout *
+ one_lookup_cost;
+ dups_cost += write_cost + full_lookup_cost;
+
+ *read_time= dups_cost;
+ *record_count= prefix_rec_count * sj_outer_fanout;
+ *handled_fanout= dups_removed_fanout;
+ *strategy= SJ_OPT_DUPS_WEEDOUT;
+ return TRUE;
}
+ return FALSE;
}
@@ -2836,11 +2974,11 @@ void fix_semijoin_strategies_for_picked_join_order(JOIN *join)
}
else if (pos->sj_strategy == SJ_OPT_MATERIALIZE_SCAN)
{
- POSITION *first_inner= join->best_positions + pos->sjm_scan_last_inner;
+ POSITION *first_inner= join->best_positions + pos->sjmat_picker.sjm_scan_last_inner;
SJ_MATERIALIZATION_INFO *sjm= first_inner->table->emb_sj_nest->sj_mat_info;
sjm->is_used= TRUE;
sjm->is_sj_scan= TRUE;
- first= pos->sjm_scan_last_inner - sjm->tables + 1;
+ first= pos->sjmat_picker.sjm_scan_last_inner - sjm->tables + 1;
memcpy(join->best_positions + first,
sjm->positions, sizeof(POSITION) * sjm->tables);
join->best_positions[first].sj_strategy= SJ_OPT_MATERIALIZE_SCAN;
@@ -2878,7 +3016,7 @@ void fix_semijoin_strategies_for_picked_join_order(JOIN *join)
if (pos->sj_strategy == SJ_OPT_FIRST_MATCH)
{
- first= pos->first_firstmatch_table;
+ first= pos->firstmatch_picker.first_firstmatch_table;
join->best_positions[first].sj_strategy= SJ_OPT_FIRST_MATCH;
join->best_positions[first].n_sj_tables= tablenr - first + 1;
POSITION dummy; // For loose scan paths
@@ -2911,7 +3049,7 @@ void fix_semijoin_strategies_for_picked_join_order(JOIN *join)
if (pos->sj_strategy == SJ_OPT_LOOSE_SCAN)
{
- first= pos->first_loosescan_table;
+ first= pos->loosescan_picker.first_loosescan_table;
POSITION *first_pos= join->best_positions + first;
POSITION loose_scan_pos; // For loose scan paths
double record_count= (first== join->const_tables)? 1.0:
@@ -2950,7 +3088,7 @@ void fix_semijoin_strategies_for_picked_join_order(JOIN *join)
Duplicate Weedout starting at pos->first_dupsweedout_table, ending at
this table.
*/
- first= pos->first_dupsweedout_table;
+ first= pos->dups_weedout_picker.first_dupsweedout_table;
join->best_positions[first].sj_strategy= SJ_OPT_DUPS_WEEDOUT;
join->best_positions[first].n_sj_tables= tablenr - first + 1;
}
@@ -3893,8 +4031,8 @@ int setup_semijoin_dups_elimination(JOIN *join, ulonglong options,
/* Calculate key length */
keylen= 0;
- keyno= pos->loosescan_key;
- for (uint kp=0; kp < pos->loosescan_parts; kp++)
+ keyno= pos->loosescan_picker.loosescan_key;
+ for (uint kp=0; kp < pos->loosescan_picker.loosescan_parts; kp++)
keylen += tab->table->key_info[keyno].key_part[kp].store_length;
tab->loosescan_key_len= keylen;
diff --git a/sql/opt_subselect.h b/sql/opt_subselect.h
index 571fcbaa935..7d560588995 100644
--- a/sql/opt_subselect.h
+++ b/sql/opt_subselect.h
@@ -263,8 +263,8 @@ public:
{
pos->records_read= best_loose_scan_records;
pos->key= best_loose_scan_start_key;
- pos->loosescan_key= best_loose_scan_key;
- pos->loosescan_parts= best_max_loose_keypart + 1;
+ pos->loosescan_picker.loosescan_key= best_loose_scan_key;
+ pos->loosescan_picker.loosescan_parts= best_max_loose_keypart + 1;
pos->use_join_buffer= FALSE;
pos->table= tab;
// todo need ref_depend_map ?
@@ -277,8 +277,7 @@ public:
};
-void advance_sj_state(JOIN *join, const table_map remaining_tables,
- const JOIN_TAB *new_join_tab, uint idx,
+void advance_sj_state(JOIN *join, const table_map remaining_tables, uint idx,
double *current_record_count, double *current_read_time,
POSITION *loose_scan_pos);
void restore_prev_sj_state(const table_map remaining_tables,
diff --git a/sql/sql_select.cc b/sql/sql_select.cc
index b9088a7ce60..ce20dfea32e 100644
--- a/sql/sql_select.cc
+++ b/sql/sql_select.cc
@@ -85,7 +85,7 @@ static int join_tab_cmp_embedded_first(const void *emb, const void* ptr1, const
static bool find_best(JOIN *join,table_map rest_tables,uint index,
double record_count,double read_time);
static uint cache_record_length(JOIN *join,uint index);
-static bool get_best_combination(JOIN *join);
+bool get_best_combination(JOIN *join);
static store_key *get_store_key(THD *thd,
KEYUSE *keyuse, table_map used_tables,
KEY_PART_INFO *key_part, uchar *key_buff,
@@ -4883,7 +4883,7 @@ void set_position(JOIN *join,uint idx,JOIN_TAB *table,KEYUSE *key)
join->positions[idx].records_read=1.0; /* This is a const table */
join->positions[idx].ref_depend_map= 0;
- join->positions[idx].loosescan_key= MAX_KEY; /* Not a LooseScan */
+// join->positions[idx].loosescan_key= MAX_KEY; /* Not a LooseScan */
join->positions[idx].sj_strategy= SJ_OPT_NONE;
join->positions[idx].use_join_buffer= FALSE;
@@ -5533,7 +5533,7 @@ best_access_path(JOIN *join,
pos->key= best_key;
pos->table= s;
pos->ref_depend_map= best_ref_depends_map;
- pos->loosescan_key= MAX_KEY;
+ pos->loosescan_picker.loosescan_key= MAX_KEY;
pos->use_join_buffer= best_uses_jbuf;
loose_scan_opt.save_to_position(s, loose_scan_pos);
@@ -5840,7 +5840,7 @@ optimize_straight_join(JOIN *join, table_map join_tables)
/* compute the cost of the new plan extended with 's' */
record_count*= join->positions[idx].records_read;
read_time+= join->positions[idx].read_time;
- advance_sj_state(join, join_tables, s, idx, &record_count, &read_time,
+ advance_sj_state(join, join_tables, idx, &record_count, &read_time,
&loose_scan_pos);
join_tables&= ~(s->table->map);
@@ -6356,7 +6356,7 @@ best_extension_by_limited_search(JOIN *join,
current_record_count= record_count * position->records_read;
current_read_time= read_time + position->read_time;
- advance_sj_state(join, remaining_tables, s, idx, &current_record_count,
+ advance_sj_state(join, remaining_tables, idx, &current_record_count,
&current_read_time, &loose_scan_pos);
/* Expand only partial plans with lower cost than the best QEP so far */
@@ -6513,7 +6513,7 @@ find_best(JOIN *join,table_map rest_tables,uint idx,double record_count,
*/
double current_record_count=record_count*records;
double current_read_time=read_time+best;
- advance_sj_state(join, rest_tables, s, idx, &current_record_count,
+ advance_sj_state(join, rest_tables, idx, &current_record_count,
&current_read_time, &loose_scan_pos);
if (best_record_count > current_record_count ||
@@ -7013,7 +7013,7 @@ static Item * const null_ptr= NULL;
TRUE Out of memory
*/
-static bool
+bool
get_best_combination(JOIN *join)
{
uint tablenr;
@@ -7091,13 +7091,6 @@ get_best_combination(JOIN *join)
*j= *join->best_positions[tablenr].table;
-#if 0
-/* SJ-Materialization is represented with join tab ranges */
- if (j->sj_strategy == SJ_OPT_MATERIALIZE ||
- j->sj_strategy == SJ_OPT_MATERIALIZE)
- j->sj_strategy= SJ_OPT_NONE;
-#endif
-
j->bush_root_tab= sjm_nest_root;
form=join->table[tablenr]=j->table;
@@ -7120,7 +7113,7 @@ get_best_combination(JOIN *join)
(join->best_positions[tablenr].sj_strategy == SJ_OPT_LOOSE_SCAN))
{
j->type=JT_ALL;
- j->index= join->best_positions[tablenr].loosescan_key;
+ j->index= join->best_positions[tablenr].loosescan_picker.loosescan_key;
if (tablenr != join->const_tables)
join->full_join=1;
}
diff --git a/sql/sql_select.h b/sql/sql_select.h
index 5476ef9b46c..21c5dc7d65e 100644
--- a/sql/sql_select.h
+++ b/sql/sql_select.h
@@ -158,6 +158,17 @@ enum enum_nested_loop_state
};
+/* Possible sj_strategy values */
+enum sj_strategy_enum
+{
+ SJ_OPT_NONE=0,
+ SJ_OPT_DUPS_WEEDOUT=1,
+ SJ_OPT_LOOSE_SCAN =2,
+ SJ_OPT_FIRST_MATCH =3,
+ SJ_OPT_MATERIALIZE =4,
+ SJ_OPT_MATERIALIZE_SCAN=5
+};
+
/* Values for JOIN_TAB::packed_info */
#define TAB_INFO_HAVE_VALUE 1
#define TAB_INFO_USING_INDEX 2
@@ -374,7 +385,7 @@ typedef struct st_join_table {
POSITION::sj_strategy field. This field is set up by the
fix_semijoin_strategies_for_picked_join_order.
*/
- uint sj_strategy;
+ enum sj_strategy_enum sj_strategy;
uint n_sj_tables;
@@ -496,6 +507,214 @@ enum_nested_loop_state
end_write_group(JOIN *join, JOIN_TAB *join_tab __attribute__((unused)),
bool end_of_records);
+/* psergey */
+
+
+struct st_position;
+
+class Semi_join_strategy_picker
+{
+public:
+ /* Called when starting to build a new join prefix */
+ virtual void set_empty() = 0;
+
+ /*
+ Update internal state after another table has been added to the join
+ prefix
+ */
+ virtual void set_from_prev(struct st_position *prev) = 0;
+
+ virtual bool check_qep(JOIN *join,
+ uint idx,
+ table_map remaining_tables,
+ const JOIN_TAB *new_join_tab,
+ double *record_count,
+ double *read_time,
+ table_map *handled_fanout,
+ sj_strategy_enum *strategy,
+ struct st_position *loose_scan_pos) = 0;
+
+ virtual void mark_used() = 0;
+
+ virtual ~Semi_join_strategy_picker() {}
+};
+
+
+/*
+ Duplicate Weedout strategy optimization state
+*/
+
+class Duplicate_weedout_picker : public Semi_join_strategy_picker
+{
+ /* The first table that the strategy will need to handle */
+ uint first_dupsweedout_table;
+
+ /*
+ Tables that we will need to have in the prefix to do the weedout step
+ (all inner and all outer that the involved semi-joins are correlated with)
+ */
+ table_map dupsweedout_tables;
+
+ bool is_used;
+public:
+ void set_empty()
+ {
+ dupsweedout_tables= 0;
+ first_dupsweedout_table= MAX_TABLES;
+ is_used= FALSE;
+ }
+ void set_from_prev(struct st_position *prev);
+
+ bool check_qep(JOIN *join,
+ uint idx,
+ table_map remaining_tables,
+ const JOIN_TAB *new_join_tab,
+ double *record_count,
+ double *read_time,
+ table_map *handled_fanout,
+ sj_strategy_enum *stratey,
+ struct st_position *loose_scan_pos);
+
+ void mark_used() { is_used= TRUE; }
+ friend void fix_semijoin_strategies_for_picked_join_order(JOIN *join);
+};
+
+
+class Firstmatch_picker : public Semi_join_strategy_picker
+{
+ /*
+ Index of the first inner table that we intend to handle with this
+ strategy
+ */
+ uint first_firstmatch_table;
+ /*
+ Tables that were not in the join prefix when we've started considering
+ FirstMatch strategy.
+ */
+ table_map first_firstmatch_rtbl;
+ /*
+ Tables that need to be in the prefix before we can calculate the cost
+ of using FirstMatch strategy.
+ */
+ table_map firstmatch_need_tables;
+
+ bool is_used;
+
+ bool in_firstmatch_prefix() { return (first_firstmatch_table != MAX_TABLES); }
+ void invalidate_firstmatch_prefix() { first_firstmatch_table= MAX_TABLES; }
+public:
+ void set_empty()
+ {
+ invalidate_firstmatch_prefix();
+ is_used= FALSE;
+ }
+
+ void set_from_prev(struct st_position *prev);
+ bool check_qep(JOIN *join,
+ uint idx,
+ table_map remaining_tables,
+ const JOIN_TAB *new_join_tab,
+ double *record_count,
+ double *read_time,
+ table_map *handled_fanout,
+ sj_strategy_enum *strategy,
+ struct st_position *loose_scan_pos);
+
+ void mark_used() { is_used= TRUE; }
+ friend void fix_semijoin_strategies_for_picked_join_order(JOIN *join);
+};
+
+
+class LooseScan_picker : public Semi_join_strategy_picker
+{
+ /* The first (i.e. driving) table we're doing loose scan for */
+ uint first_loosescan_table;
+ /*
+ Tables that need to be in the prefix before we can calculate the cost
+ of using LooseScan strategy.
+ */
+ table_map loosescan_need_tables;
+
+ /*
+ keyno - Planning to do LooseScan on this key. If keyuse is NULL then
+ this is a full index scan, otherwise this is a ref+loosescan
+ scan (and keyno matches the KEUSE's)
+ MAX_KEY - Not doing a LooseScan
+ */
+ uint loosescan_key; // final (one for strategy instance )
+ uint loosescan_parts; /* Number of keyparts to be kept distinct */
+
+ bool is_used;
+public:
+ void set_empty()
+ {
+ first_loosescan_table= MAX_TABLES;
+ is_used= FALSE;
+ }
+
+ void set_from_prev(struct st_position *prev);
+ bool check_qep(JOIN *join,
+ uint idx,
+ table_map remaining_tables,
+ const JOIN_TAB *new_join_tab,
+ double *record_count,
+ double *read_time,
+ table_map *handled_fanout,
+ sj_strategy_enum *strategy,
+ struct st_position *loose_scan_pos);
+ void mark_used() { is_used= TRUE; }
+
+ friend class Loose_scan_opt;
+ friend void best_access_path(JOIN *join,
+ JOIN_TAB *s,
+ table_map remaining_tables,
+ uint idx,
+ bool disable_jbuf,
+ double record_count,
+ struct st_position *pos,
+ struct st_position *loose_scan_pos);
+ friend bool get_best_combination(JOIN *join);
+ friend int setup_semijoin_dups_elimination(JOIN *join, ulonglong options,
+ uint no_jbuf_after);
+ friend void fix_semijoin_strategies_for_picked_join_order(JOIN *join);
+};
+
+
+class Sj_materialization_picker : public Semi_join_strategy_picker
+{
+ bool is_used;
+
+ /* The last inner table (valid once we're after it) */
+ uint sjm_scan_last_inner;
+ /*
+ Tables that we need to have in the prefix to calculate the correct cost.
+ Basically, we need all inner tables and outer tables mentioned in the
+ semi-join's ON expression so we can correctly account for fanout.
+ */
+ table_map sjm_scan_need_tables;
+
+public:
+ void set_empty()
+ {
+ sjm_scan_need_tables= 0;
+ LINT_INIT(sjm_scan_last_inner);
+ is_used= FALSE;
+ }
+ void set_from_prev(struct st_position *prev);
+ bool check_qep(JOIN *join,
+ uint idx,
+ table_map remaining_tables,
+ const JOIN_TAB *new_join_tab,
+ double *record_count,
+ double *read_time,
+ table_map *handled_fanout,
+ sj_strategy_enum *strategy,
+ struct st_position *loose_scan_pos);
+ void mark_used() { is_used= TRUE; }
+
+ friend void fix_semijoin_strategies_for_picked_join_order(JOIN *join);
+};
+
/**
Information about a position of table within a join order. Used in join
@@ -503,6 +722,9 @@ end_write_group(JOIN *join, JOIN_TAB *join_tab __attribute__((unused)),
*/
typedef struct st_position
{
+ /* The table that's put into join order */
+ JOIN_TAB *table;
+
/*
The "fanout": number of output rows that will be produced (after
pushed down selection condition is applied) per each row combination of
@@ -516,7 +738,10 @@ typedef struct st_position
number the access method will be invoked.
*/
double read_time;
- JOIN_TAB *table;
+
+ /* Cumulative cost and record count for the join prefix */
+ COST_VECT prefix_cost;
+ double prefix_record_count;
/*
NULL - 'index' or 'range' or 'index_merge' or 'ALL' access is used.
@@ -526,14 +751,13 @@ typedef struct st_position
/* If ref-based access is used: bitmap of tables this table depends on */
table_map ref_depend_map;
-
- bool use_join_buffer;
-
-
- /* These form a stack of partial join order costs and output sizes */
- COST_VECT prefix_cost;
- double prefix_record_count;
-
+
+ /*
+ TRUE <=> join buffering will be used. At the moment this is based on
+ *very* imprecise guesses made in best_access_path().
+ */
+ bool use_join_buffer;
+
/*
Current optimization state: Semi-join strategy to be used for this
and preceding join tables.
@@ -546,7 +770,8 @@ typedef struct st_position
this applies to. The values of covered_preceding_positions->sj_strategy
must be ignored.
*/
- uint sj_strategy;
+ enum sj_strategy_enum sj_strategy;
+
/*
Valid only after fix_semijoin_strategies_for_picked_join_order() call:
if sj_strategy!=SJ_OPT_NONE, this is the number of subsequent tables that
@@ -554,67 +779,15 @@ typedef struct st_position
*/
uint n_sj_tables;
-/* LooseScan strategy members */
-
- /* The first (i.e. driving) table we're doing loose scan for */
- uint first_loosescan_table;
- /*
- Tables that need to be in the prefix before we can calculate the cost
- of using LooseScan strategy.
- */
- table_map loosescan_need_tables;
-
- /*
- keyno - Planning to do LooseScan on this key. If keyuse is NULL then
- this is a full index scan, otherwise this is a ref+loosescan
- scan (and keyno matches the KEUSE's)
- MAX_KEY - Not doing a LooseScan
- */
- uint loosescan_key; // final (one for strategy instance )
- uint loosescan_parts; /* Number of keyparts to be kept distinct */
-
-/* FirstMatch strategy */
- /*
- Index of the first inner table that we intend to handle with this
- strategy
- */
- uint first_firstmatch_table;
- /*
- Tables that were not in the join prefix when we've started considering
- FirstMatch strategy.
- */
- table_map first_firstmatch_rtbl;
- /*
- Tables that need to be in the prefix before we can calculate the cost
- of using FirstMatch strategy.
- */
- table_map firstmatch_need_tables;
-
- bool in_firstmatch_prefix() { return (first_firstmatch_table != MAX_TABLES); }
- void invalidate_firstmatch_prefix() { first_firstmatch_table= MAX_TABLES; }
-
-/* Duplicate Weedout strategy */
- /* The first table that the strategy will need to handle */
- uint first_dupsweedout_table;
- /*
- Tables that we will need to have in the prefix to do the weedout step
- (all inner and all outer that the involved semi-joins are correlated with)
- */
- table_map dupsweedout_tables;
-
-/* SJ-Materialization-Scan strategy */
- /* The last inner table (valid once we're after it) */
- uint sjm_scan_last_inner;
- /*
- Tables that we need to have in the prefix to calculate the correct cost.
- Basically, we need all inner tables and outer tables mentioned in the
- semi-join's ON expression so we can correctly account for fanout.
- */
- table_map sjm_scan_need_tables;
-
table_map prefix_dups_producing_tables;
-} POSITION;
+ table_map inner_tables_handled_with_other_sjs;
+
+ Duplicate_weedout_picker dups_weedout_picker;
+ Firstmatch_picker firstmatch_picker;
+ LooseScan_picker loosescan_picker;
+ Sj_materialization_picker sjmat_picker;
+} POSITION;
typedef struct st_rollup
{
@@ -626,18 +799,6 @@ typedef struct st_rollup
} ROLLUP;
-#define SJ_OPT_NONE 0
-#define SJ_OPT_DUPS_WEEDOUT 1
-#define SJ_OPT_LOOSE_SCAN 2
-#define SJ_OPT_FIRST_MATCH 3
-#define SJ_OPT_MATERIALIZE 4
-#define SJ_OPT_MATERIALIZE_SCAN 5
-
-inline bool sj_is_materialize_strategy(uint strategy)
-{
- return strategy >= SJ_OPT_MATERIALIZE;
-}
-
class JOIN_TAB_RANGE: public Sql_alloc
{
public:
@@ -808,7 +969,7 @@ public:
they produce.
*/
table_map cur_dups_producing_tables;
-
+
/* We also maintain a stack of join optimization states in * join->positions[] */
/******* Join optimization state members end *******/
/*