summaryrefslogtreecommitdiff
path: root/sql/opt_subselect.cc
diff options
context:
space:
mode:
Diffstat (limited to 'sql/opt_subselect.cc')
-rw-r--r--sql/opt_subselect.cc553
1 files changed, 348 insertions, 205 deletions
diff --git a/sql/opt_subselect.cc b/sql/opt_subselect.cc
index e91b2ad14c3..34cc25f9058 100644
--- a/sql/opt_subselect.cc
+++ b/sql/opt_subselect.cc
@@ -35,6 +35,7 @@
#include "sql_test.h"
#include <my_bit.h>
#include "opt_trace.h"
+#include "optimizer_defaults.h"
/*
This file contains optimizations for semi-join subqueries.
@@ -448,7 +449,8 @@ static bool convert_subq_to_sj(JOIN *parent_join, Item_in_subselect *subq_pred);
static bool convert_subq_to_jtbm(JOIN *parent_join,
Item_in_subselect *subq_pred, bool *remove);
static TABLE_LIST *alloc_join_nest(THD *thd);
-static uint get_tmp_table_rec_length(Ref_ptr_array p_list, uint elements);
+static uint get_tmp_table_rec_length(Ref_ptr_array p_list, uint elements,
+ bool *blobs_used);
bool find_eq_ref_candidate(TABLE *table, table_map sj_inner_tables);
static SJ_MATERIALIZATION_INFO *
at_sjmat_pos(const JOIN *join, table_map remaining_tables, const JOIN_TAB *tab,
@@ -664,17 +666,6 @@ int check_and_do_in_subquery_rewrites(JOIN *join)
DBUG_RETURN(-1);
}
}
- /* Check if any table is not supporting comparable rowids */
- {
- List_iterator_fast<TABLE_LIST> li(select_lex->outer_select()->leaf_tables);
- TABLE_LIST *tbl;
- while ((tbl = li++))
- {
- TABLE *table= tbl->table;
- if (table && table->file->ha_table_flags() & HA_NON_COMPARABLE_ROWID)
- join->not_usable_rowid_map|= table->map;
- }
- }
DBUG_PRINT("info", ("Checking if subq can be converted to semi-join"));
/*
@@ -696,9 +687,10 @@ int check_and_do_in_subquery_rewrites(JOIN *join)
11. It is first optimisation (the subquery could be moved from ON
clause during first optimisation and then be considered for SJ
on the second when it is too late)
- 12. All tables supports comparable rowids.
- This is needed for DuplicateWeedout strategy to work (which
- is the catch-all semi-join strategy so it must be applicable).
+
+ There are also other requirements which cannot be checked at this phase,
+ yet. They are checked later in convert_join_subqueries_to_semijoins(),
+ look for calls to block_conversion_to_sj().
*/
if (optimizer_flag(thd, OPTIMIZER_SWITCH_SEMIJOIN) &&
in_subs && // 1
@@ -713,8 +705,7 @@ int check_and_do_in_subquery_rewrites(JOIN *join)
!((join->select_options | // 10
select_lex->outer_select()->join->select_options) // 10
& SELECT_STRAIGHT_JOIN) && // 10
- select_lex->first_cond_optimization && // 11
- join->not_usable_rowid_map == 0) // 12
+ select_lex->first_cond_optimization) // 11
{
DBUG_PRINT("info", ("Subquery is semi-join conversion candidate"));
@@ -910,8 +901,10 @@ bool subquery_types_allow_materialization(THD* thd, Item_in_subselect *in_subs)
outer,
converted_from_in_predicate))
{
- trace_transform.add("possible", false);
- trace_transform.add("cause", "types mismatch");
+ if (unlikely(trace_transform.trace_started()))
+ trace_transform.
+ add("possible", false).
+ add("cause", "types mismatch");
DBUG_RETURN(FALSE);
}
}
@@ -933,8 +926,10 @@ bool subquery_types_allow_materialization(THD* thd, Item_in_subselect *in_subs)
{
in_subs->types_allow_materialization= TRUE;
in_subs->sjm_scan_allowed= all_are_fields;
- trace_transform.add("sjm_scan_allowed", all_are_fields)
- .add("possible", true);
+ if (unlikely(trace_transform.trace_started()))
+ trace_transform.
+ add("sjm_scan_allowed", all_are_fields).
+ add("possible", true);
DBUG_PRINT("info",("subquery_types_allow_materialization: ok, allowed"));
DBUG_RETURN(TRUE);
}
@@ -1224,7 +1219,36 @@ bool convert_join_subqueries_to_semijoins(JOIN *join)
}
}
- if (join->select_options & SELECT_STRAIGHT_JOIN)
+ /*
+ Compute join->not_usable_rowid_map.
+ The idea is:
+ - DuplicateWeedout strategy requires that one is able to get the rowid
+ (call h->position()) for tables in the parent select. Obtained Rowid
+ values must be stable across table scans.
+ = Rowids are typically available. The only known exception is federatedx
+ tables.
+ - The optimizer requires that DuplicateWeedout strategy is always
+ applicable. It is the only strategy that is applicable for any join
+ order. The optimizer is not prepared for the situation where it has
+ constructed a join order and then it turns out that there's no semi-join
+ strategy that can be used for it.
+
+ Because of the above, we will not use semi-joins if the parent select has
+ tables which do not support rowids.
+ */
+ {
+ List_iterator_fast<TABLE_LIST> li(join->select_lex->leaf_tables);
+ TABLE_LIST *tbl;
+ while ((tbl = li++))
+ {
+ TABLE *table= tbl->table;
+ if (table && table->file->ha_table_flags() & HA_NON_COMPARABLE_ROWID)
+ join->not_usable_rowid_map|= table->map;
+ }
+ }
+
+ if (join->select_options & SELECT_STRAIGHT_JOIN ||
+ join->not_usable_rowid_map != 0)
{
/* Block conversion to semijoins for all candidates */
li.rewind();
@@ -1294,8 +1318,10 @@ bool convert_join_subqueries_to_semijoins(JOIN *join)
OPT_TRACE_TRANSFORM(thd, trace_wrapper, trace_transform,
in_subq->get_select_lex()->select_number,
"IN (SELECT)", "semijoin");
- trace_transform.add("converted_to_semi_join", false)
- .add("cause", "subquery attached to the ON clause");
+ if (unlikely(trace_transform.trace_started()))
+ trace_transform.
+ add("converted_to_semi_join", false).
+ add("cause", "subquery attached to the ON clause");
break;
}
@@ -1307,9 +1333,10 @@ bool convert_join_subqueries_to_semijoins(JOIN *join)
if (join->table_count +
in_subq->unit->first_select()->join->table_count >= MAX_TABLES)
{
- trace_transform.add("converted_to_semi_join", false);
- trace_transform.add("cause",
- "table in parent join now exceeds MAX_TABLES");
+ if (unlikely(trace_transform.trace_started()))
+ trace_transform.
+ add("converted_to_semi_join", false).
+ add("cause", "table in parent join now exceeds MAX_TABLES");
break;
}
if (convert_subq_to_sj(join, in_subq))
@@ -1441,6 +1468,7 @@ void get_delayed_table_estimates(TABLE *table,
Item_in_subselect *item= table->pos_in_table_list->jtbm_subselect;
Table_function_json_table *table_function=
table->pos_in_table_list->table_function;
+ handler *file= table->file;
if (table_function)
{
@@ -1460,9 +1488,11 @@ void get_delayed_table_estimates(TABLE *table,
/* Calculate cost of scanning the temptable */
double data_size= COST_MULT(item->jtbm_record_count,
hash_sj_engine->tmp_table->s->reclength);
- /* Do like in handler::scan_time() */
- *scan_time= ((data_size/table->file->stats.block_size+2) *
- table->file->avg_io_cost());
+
+ /* Do like in handler::ha_scan_and_compare_time, but ignore the where cost */
+ *scan_time= ((data_size/IO_SIZE * table->file->DISK_READ_COST *
+ table->file->DISK_READ_RATIO) +
+ *out_rows * file->ROW_COPY_COST);
}
@@ -2490,8 +2520,7 @@ bool optimize_semijoin_nests(JOIN *join, table_map all_table_map)
!sj_nest->sj_subq_pred->is_correlated &&
sj_nest->sj_subq_pred->types_allow_materialization)
{
- join->emb_sjm_nest= sj_nest;
- if (choose_plan(join, all_table_map &~join->const_table_map))
+ if (choose_plan(join, all_table_map &~join->const_table_map, sj_nest))
DBUG_RETURN(TRUE); /* purecov: inspected */
/*
The best plan to run the subquery is now in join->best_positions,
@@ -2507,24 +2536,13 @@ bool optimize_semijoin_nests(JOIN *join, table_map all_table_map)
sjm->is_used= FALSE;
double subjoin_out_rows, subjoin_read_time;
- /*
- join->get_partial_cost_and_fanout(n_tables + join->const_tables,
- table_map(-1),
- &subjoin_read_time,
- &subjoin_out_rows);
- */
- join->get_prefix_cost_and_fanout(n_tables,
+ join->get_prefix_cost_and_fanout(n_tables,
&subjoin_read_time,
&subjoin_out_rows);
- sjm->materialization_cost.convert_from_cost(subjoin_read_time);
+ sjm->materialization_cost=subjoin_read_time;
sjm->rows_with_duplicates= sjm->rows= subjoin_out_rows;
- // Don't use the following list because it has "stale" items. use
- // ref_pointer_array instead:
- //
- //List<Item> &right_expr_list=
- // sj_nest->sj_subq_pred->unit->first_select()->item_list;
/*
Adjust output cardinality estimates. If the subquery has form
@@ -2557,8 +2575,12 @@ bool optimize_semijoin_nests(JOIN *join, table_map all_table_map)
int tableno;
double rows= 1.0;
while ((tableno = tm_it.next_bit()) != Table_map_iterator::BITMAP_END)
- rows= COST_MULT(rows,
- join->map2table[tableno]->table->opt_range_condition_rows);
+ {
+ ha_rows tbl_rows=join->map2table[tableno]->
+ table->opt_range_condition_rows;
+
+ rows= COST_MULT(rows, rows2double(tbl_rows));
+ }
sjm->rows= MY_MIN(sjm->rows, rows);
}
memcpy((uchar*) sjm->positions,
@@ -2568,33 +2590,43 @@ bool optimize_semijoin_nests(JOIN *join, table_map all_table_map)
/*
Calculate temporary table parameters and usage costs
*/
+ bool blobs_used;
uint rowlen= get_tmp_table_rec_length(subq_select->ref_pointer_array,
- subq_select->item_list.elements);
- double lookup_cost= get_tmp_table_lookup_cost(join->thd,
- subjoin_out_rows, rowlen);
- double write_cost= get_tmp_table_write_cost(join->thd,
- subjoin_out_rows, rowlen);
+ subq_select->item_list.elements,
+ &blobs_used);
+ TMPTABLE_COSTS cost= get_tmp_table_costs(join->thd,
+ subjoin_out_rows, rowlen,
+ blobs_used, 1);
+ double scan_cost, total_cost;
+ double row_copy_cost= ROW_COPY_COST_THD(thd);
/*
Let materialization cost include the cost to write the data into the
- temporary table:
+ temporary table. Note that smj->materialization_cost already includes
+ row copy and compare costs of finding the original row.
*/
- sjm->materialization_cost.add_io(subjoin_out_rows, write_cost);
-
+ sjm->materialization_cost+=subjoin_out_rows * cost.write + cost.create;
+
/*
Set the cost to do a full scan of the temptable (will need this to
- consider doing sjm-scan):
- */
- sjm->scan_cost.reset();
- sjm->scan_cost.add_io(sjm->rows, lookup_cost);
-
- sjm->lookup_cost.convert_from_cost(lookup_cost);
+ consider doing sjm-scan). See ha_scan_time() for the basics of
+ the calculations.
+ We don't need to check the where clause for each row, so no
+ WHERE_COST is needed.
+ */
+ scan_cost= (rowlen * (double) sjm->rows) / cost.block_size;
+ total_cost= (scan_cost * cost.cache_hit_ratio * cost.avg_io_cost +
+ TABLE_SCAN_SETUP_COST_THD(thd) +
+ row_copy_cost * sjm->rows);
+ sjm->scan_cost=total_cost;
+
+ /* When reading a row, we have also to check the where clause */
+ sjm->lookup_cost= cost.lookup + WHERE_COST_THD(thd);
sj_nest->sj_mat_info= sjm;
DBUG_EXECUTE("opt", print_sjm(sjm););
}
}
}
- join->emb_sjm_nest= NULL;
DBUG_RETURN(FALSE);
}
@@ -2616,11 +2648,13 @@ bool optimize_semijoin_nests(JOIN *join, table_map all_table_map)
Length of the temptable record, in bytes
*/
-static uint get_tmp_table_rec_length(Ref_ptr_array p_items, uint elements)
+static uint get_tmp_table_rec_length(Ref_ptr_array p_items, uint elements,
+ bool *blobs_used)
{
uint len= 0;
Item *item;
- //List_iterator<Item> it(items);
+
+ *blobs_used= 0;
for (uint i= 0; i < elements ; i++)
{
item = p_items[i];
@@ -2643,6 +2677,8 @@ static uint get_tmp_table_rec_length(Ref_ptr_array p_items, uint elements)
len += 8;
else
len += item->max_length;
+ if (item->max_length > MAX_FIELD_VARCHARLENGTH)
+ *blobs_used= 1;
break;
case DECIMAL_RESULT:
len += 10;
@@ -2658,46 +2694,62 @@ static uint get_tmp_table_rec_length(Ref_ptr_array p_items, uint elements)
/**
- The cost of a lookup into a unique hash/btree index on a temporary table
- with 'row_count' rows each of size 'row_size'.
+ The cost of a create, write and read into a unique hash/btree index on
+ a temporary table with 'row_count' rows each of size 'row_size'.
@param thd current query context
@param row_count number of rows in the temp table
@param row_size average size in bytes of the rows
- @return the cost of one lookup
-*/
+ @return The cost of using the temporary table
-double
-get_tmp_table_lookup_cost(THD *thd, double row_count, uint row_size)
-{
- if (row_count > thd->variables.max_heap_table_size / (double) row_size)
- return (double) DISK_TEMPTABLE_LOOKUP_COST;
- else
- return (double) HEAP_TEMPTABLE_LOOKUP_COST;
-}
-
-/**
- The cost of writing a row into a temporary table with 'row_count' unique
- rows each of size 'row_size'.
-
- @param thd current query context
- @param row_count number of rows in the temp table
- @param row_size average size in bytes of the rows
-
- @return the cost of writing one row
+ TODO:
+ This is an optimistic estimate. We are not taking into account:
+ - That we first write into a memory and then overflow to disk.
+ - If binary trees would be used for heap tables.
+ - The addition cost of writing a row to memory/disk and possible
+ index reorganization.
*/
-double
-get_tmp_table_write_cost(THD *thd, double row_count, uint row_size)
+TMPTABLE_COSTS
+get_tmp_table_costs(THD *thd, double row_count, uint row_size, bool blobs_used,
+ bool add_copy_cost)
{
- double lookup_cost= get_tmp_table_lookup_cost(thd, row_count, row_size);
- /*
- TODO:
- This is an optimistic estimate. Add additional costs resulting from
- actually writing the row to memory/disk and possible index reorganization.
- */
- return lookup_cost;
+ TMPTABLE_COSTS cost;
+ /* From heap_prepare_hp_create_info(), assuming one hash key used */
+ row_size+= sizeof(char*)*2;
+ row_size= MY_ALIGN(MY_MAX(row_size, sizeof(char*)) + 1, sizeof(char*));
+
+ if (row_count > thd->variables.max_heap_table_size / (double) row_size ||
+ blobs_used)
+ {
+ double row_copy_cost= (add_copy_cost ?
+ tmp_table_optimizer_costs.row_copy_cost :
+ 0);
+ /* Disk based table */
+ cost.lookup= ((tmp_table_optimizer_costs.key_lookup_cost *
+ tmp_table_optimizer_costs.disk_read_ratio) +
+ row_copy_cost);
+ cost.write= cost.lookup;
+ cost.create= DISK_TEMPTABLE_CREATE_COST;
+ cost.block_size= DISK_TEMPTABLE_BLOCK_SIZE;
+ cost.avg_io_cost= tmp_table_optimizer_costs.disk_read_cost;
+ cost.cache_hit_ratio= tmp_table_optimizer_costs.disk_read_ratio;
+ }
+ else
+ {
+ /* Values are as they are in heap.h */
+ double row_copy_cost= (add_copy_cost ?
+ heap_optimizer_costs.row_copy_cost :
+ 0);
+ cost.lookup= HEAP_TEMPTABLE_LOOKUP_COST + row_copy_cost;
+ cost.write= cost.lookup;
+ cost.create= HEAP_TEMPTABLE_CREATE_COST;
+ cost.block_size= 1;
+ cost.avg_io_cost= 0;
+ cost.cache_hit_ratio= 0;
+ }
+ return cost;
}
@@ -2942,9 +2994,16 @@ void optimize_semi_joins(JOIN *join, table_map remaining_tables, uint idx,
2. using strategy Z is cheaper, but it only removes
fanout from semijoin X.
3. We have no clue what to do about fanount of semi-join Y.
+
+ For the first iteration read_time will always be bigger than
+ *current_read_time (as the 'strategy' is an addition to the
+ chosen plan) . If a strategy was picked
+ (dusp_producing_tables & handled_fanout is true), then
+ *current_read_time is updated and the cost for the next
+ strategy can be smaller than *current_read_time.
*/
if ((dups_producing_tables & handled_fanout) ||
- (read_time < *current_read_time &&
+ (read_time + COST_EPS < *current_read_time &&
!(handled_fanout & pos->inner_tables_handled_with_other_sjs)))
{
DBUG_ASSERT(pos->sj_strategy != sj_strategy);
@@ -3142,9 +3201,9 @@ bool Sj_materialization_picker::check_qep(JOIN *join,
mat_read_time=
COST_ADD(prefix_cost,
- COST_ADD(mat_info->materialization_cost.total_cost(),
+ COST_ADD(mat_info->materialization_cost,
COST_MULT(prefix_rec_count,
- mat_info->lookup_cost.total_cost())));
+ mat_info->lookup_cost)));
/*
NOTE: When we pick to use SJM[-Scan] we don't memcpy its POSITION
@@ -3158,8 +3217,9 @@ bool Sj_materialization_picker::check_qep(JOIN *join,
*strategy= SJ_OPT_MATERIALIZE;
if (unlikely(trace.trace_started()))
{
- trace.add("records", *record_count);
- trace.add("read_time", *read_time);
+ trace.
+ add("rows", *record_count).
+ add("cost", *read_time);
}
return TRUE;
}
@@ -3193,9 +3253,9 @@ bool Sj_materialization_picker::check_qep(JOIN *join,
/* Add materialization cost */
prefix_cost=
COST_ADD(prefix_cost,
- COST_ADD(mat_info->materialization_cost.total_cost(),
+ COST_ADD(mat_info->materialization_cost,
COST_MULT(prefix_rec_count,
- mat_info->scan_cost.total_cost())));
+ mat_info->scan_cost)));
prefix_rec_count= COST_MULT(prefix_rec_count, mat_info->rows);
uint i;
@@ -3212,10 +3272,8 @@ bool Sj_materialization_picker::check_qep(JOIN *join,
best_access_path(join, join->positions[i].table, rem_tables,
join->positions, i,
disable_jbuf, prefix_rec_count, &curpos, &dummy);
- prefix_rec_count= COST_MULT(prefix_rec_count, curpos.records_read);
+ prefix_rec_count= COST_MULT(prefix_rec_count, curpos.records_out);
prefix_cost= COST_ADD(prefix_cost, curpos.read_time);
- prefix_cost= COST_ADD(prefix_cost,
- prefix_rec_count / TIME_FOR_COMPARE);
//TODO: take into account join condition selectivity here
}
@@ -3240,8 +3298,9 @@ bool Sj_materialization_picker::check_qep(JOIN *join,
*handled_fanout= mat_nest->sj_inner_tables;
if (unlikely(trace.trace_started()))
{
- trace.add("records", *record_count);
- trace.add("read_time", *read_time);
+ trace.
+ add("rows", *record_count).
+ add("cost", *read_time);
}
return TRUE;
}
@@ -3340,8 +3399,9 @@ bool LooseScan_picker::check_qep(JOIN *join,
*handled_fanout= first->table->emb_sj_nest->sj_inner_tables;
if (unlikely(trace.trace_started()))
{
- trace.add("records", *record_count);
- trace.add("read_time", *read_time);
+ trace.
+ add("rows", *record_count).
+ add("cost", *read_time);
}
return TRUE;
}
@@ -3432,13 +3492,13 @@ bool Firstmatch_picker::check_qep(JOIN *join,
optimizer_flag(join->thd, OPTIMIZER_SWITCH_SEMIJOIN_WITH_CACHE))
{
/*
- An important special case: only one inner table, and @@optimizer_switch
- allows join buffering.
+ An important special case: only one inner table, and
+ @@optimizer_switch allows join buffering.
- read_time is the same (i.e. FirstMatch doesn't add any cost
- remove fanout added by the last table
*/
if (*record_count)
- *record_count /= join->positions[idx].records_read;
+ *record_count /= join->positions[idx].records_out;
}
else
{
@@ -3458,8 +3518,9 @@ bool Firstmatch_picker::check_qep(JOIN *join,
*strategy= SJ_OPT_FIRST_MATCH;
if (unlikely(trace.trace_started()))
{
- trace.add("records", *record_count);
- trace.add("read_time", *read_time);
+ trace.
+ add("rows", *record_count).
+ add("cost", *read_time);
}
return TRUE;
}
@@ -3471,6 +3532,56 @@ bool Firstmatch_picker::check_qep(JOIN *join,
}
+/*
+ Duplicate_weedout strategy is described at
+ https://mariadb.com/kb/en/duplicateweedout-strategy/
+
+ The idea is that if one has a subquery of type:
+
+ select *
+ from Country
+ where
+ Country.code IN (select City.Country
+ from City
+ where
+ ...)
+
+ Before semi join optimization it was executed this way:
+ - Scan rows in Country
+ - For each accepted row, execute the sub query with
+ 'Country.code = City.Country' added to the WHERE clause and with
+ LIMIT 1
+
+ With semi join optimization it can be converted to the following semi join.
+
+ select * from Country semi-join City
+ where Country.code = City.Country and ...
+
+ This is executed as:
+
+ - Scan rows in Country
+ - Scan rows in City with 'Country.code = City.Country' added to the
+ subquery WHERE clause. Stop scanning after the first match.
+
+ or
+
+ - Create temporary table to store City.Country (with a unique key)
+ - Scan rows in City (according to plan for City) and put them into the
+ temporary table
+ - Scan the temporary table
+ - Do index lookup in Country table with City.Country
+
+With Duplicate_weedout we would try to instead do:
+
+ - Create temporary table to hold unique rowid's for the Country
+ - Scan rows in City (according to plan for City)
+ - Scan rows in Country (according to plan for Country)
+ - Write Country.id rowid to temporary table. If there was no
+ conflicting row in the temporary table, accept the row combination.
+ - Delete temporary table
+*/
+
+
void Duplicate_weedout_picker::set_from_prev(POSITION *prev)
{
if (prev->dups_weedout_picker.is_used)
@@ -3535,46 +3646,42 @@ bool Duplicate_weedout_picker::check_qep(JOIN *join,
*/
uint first_tab= first_dupsweedout_table;
double dups_cost;
- double prefix_rec_count;
+ double first_weedout_table_rec_count;
double sj_inner_fanout= 1.0;
double sj_outer_fanout= 1.0;
uint temptable_rec_size;
- Json_writer_object trace(join->thd);
- trace.add("strategy", "DuplicateWeedout");
if (first_tab == join->const_tables)
{
- prefix_rec_count= 1.0;
+ first_weedout_table_rec_count= 1.0;
temptable_rec_size= 0;
dups_cost= 0.0;
}
else
{
dups_cost= join->positions[first_tab - 1].prefix_cost;
- prefix_rec_count= join->positions[first_tab - 1].prefix_record_count;
+ first_weedout_table_rec_count=
+ join->positions[first_tab - 1].prefix_record_count;
temptable_rec_size= 8; /* This is not true but we'll make it so */
}
table_map dups_removed_fanout= 0;
- double current_fanout= prefix_rec_count;
for (uint j= first_dupsweedout_table; j <= idx; j++)
{
POSITION *p= join->positions + j;
- current_fanout= COST_MULT(current_fanout, p->records_read);
- dups_cost= COST_ADD(dups_cost,
- COST_ADD(p->read_time,
- current_fanout / TIME_FOR_COMPARE));
+ dups_cost= COST_ADD(dups_cost, p->read_time);
+
if (p->table->emb_sj_nest)
{
- sj_inner_fanout= COST_MULT(sj_inner_fanout, p->records_read);
+ sj_inner_fanout= COST_MULT(sj_inner_fanout, p->records_out);
dups_removed_fanout |= p->table->table->map;
}
else
{
+ sj_outer_fanout= COST_MULT(sj_outer_fanout, p->records_out);
/* Ensure that table supports comparable rowids */
DBUG_ASSERT(!(p->table->table->file->ha_table_flags() & HA_NON_COMPARABLE_ROWID));
- sj_outer_fanout= COST_MULT(sj_outer_fanout, p->records_read);
temptable_rec_size += p->table->table->file->ref_length;
}
}
@@ -3583,32 +3690,38 @@ bool Duplicate_weedout_picker::check_qep(JOIN *join,
Add the cost of temptable use. The table will have sj_outer_fanout
records, and we will make
- sj_outer_fanout table writes
- - sj_inner_fanout*sj_outer_fanout lookups.
+ - sj_inner_fanout*sj_outer_fanout lookups.
+ There is no row copy cost (as we are only copying rowid) and no
+ compare cost (as we are only checking if the row exists by
+ checking if we got a write error.
*/
- double one_lookup_cost= get_tmp_table_lookup_cost(join->thd,
- sj_outer_fanout,
- temptable_rec_size);
- double one_write_cost= get_tmp_table_write_cost(join->thd,
- sj_outer_fanout,
- temptable_rec_size);
-
- double write_cost= COST_MULT(join->positions[first_tab].prefix_record_count,
- sj_outer_fanout * one_write_cost);
- double full_lookup_cost=
- COST_MULT(join->positions[first_tab].prefix_record_count,
- COST_MULT(sj_outer_fanout,
- sj_inner_fanout * one_lookup_cost));
- dups_cost= COST_ADD(dups_cost, COST_ADD(write_cost, full_lookup_cost));
+ TMPTABLE_COSTS one_cost= get_tmp_table_costs(join->thd,
+ sj_outer_fanout,
+ temptable_rec_size,
+ 0, 0);
+ double write_cost= (one_cost.create +
+ first_weedout_table_rec_count * sj_outer_fanout * one_cost.write);
+ double full_lookup_cost= (first_weedout_table_rec_count* sj_outer_fanout *
+ sj_inner_fanout * one_cost.lookup);
+ *read_time= dups_cost + write_cost + full_lookup_cost;
- *read_time= dups_cost;
- *record_count= prefix_rec_count * sj_outer_fanout;
+ *record_count= first_weedout_table_rec_count * sj_outer_fanout;
*handled_fanout= dups_removed_fanout;
*strategy= SJ_OPT_DUPS_WEEDOUT;
- if (unlikely(trace.trace_started()))
+ if (unlikely(join->thd->trace_started()))
{
- trace.add("records", *record_count);
- trace.add("read_time", *read_time);
+ Json_writer_object trace(join->thd);
+ trace.
+ add("strategy", "DuplicateWeedout").
+ add("prefix_row_count", first_weedout_table_rec_count).
+ add("tmp_table_rows", sj_outer_fanout).
+ add("sj_inner_fanout", sj_inner_fanout).
+ add("rows", *record_count).
+ add("dups_cost", dups_cost).
+ add("write_cost", write_cost).
+ add("full_lookup_cost", full_lookup_cost).
+ add("total_cost", *read_time);
}
return TRUE;
}
@@ -3657,33 +3770,37 @@ void JOIN::dbug_verify_sj_inner_tables(uint prefix_size) const
*/
void restore_prev_sj_state(const table_map remaining_tables,
- const JOIN_TAB *tab, uint idx)
+ const JOIN_TAB *tab, uint idx)
{
TABLE_LIST *emb_sj_nest;
- if (tab->emb_sj_nest)
+ if ((emb_sj_nest= tab->emb_sj_nest))
{
- table_map subq_tables= tab->emb_sj_nest->sj_inner_tables;
+ table_map subq_tables= emb_sj_nest->sj_inner_tables;
tab->join->sjm_lookup_tables &= ~subq_tables;
- }
- if (!tab->join->emb_sjm_nest && (emb_sj_nest= tab->emb_sj_nest))
- {
- table_map subq_tables= emb_sj_nest->sj_inner_tables &
- ~tab->join->const_table_map;
- /* If we're removing the last SJ-inner table, remove the sj-nest */
- if ((remaining_tables & subq_tables) == subq_tables)
- {
- // All non-const tables of the SJ nest are in the remaining_tables.
- // we are not in the nest anymore.
- tab->join->cur_sj_inner_tables &= ~emb_sj_nest->sj_inner_tables;
- }
- else
+ if (!tab->join->emb_sjm_nest)
{
- // Semi-join nest has:
- // - a table being removed (not in the prefix)
- // - some tables in the prefix.
- tab->join->cur_sj_inner_tables |= emb_sj_nest->sj_inner_tables;
+ table_map subq_tables= (emb_sj_nest->sj_inner_tables &
+ ~tab->join->const_table_map);
+ /* If we're removing the last SJ-inner table, remove the sj-nest */
+ if ((remaining_tables & subq_tables) == subq_tables)
+ {
+ /*
+ All non-const tables of the SJ nest are in the remaining_tables.
+ we are not in the nest anymore.
+ */
+ tab->join->cur_sj_inner_tables &= ~emb_sj_nest->sj_inner_tables;
+ }
+ else
+ {
+ /*
+ Semi-join nest has:
+ - a table being removed (not in the prefix)
+ - some tables in the prefix.
+ */
+ tab->join->cur_sj_inner_tables |= emb_sj_nest->sj_inner_tables;
+ }
}
}
@@ -3795,6 +3912,8 @@ at_sjmat_pos(const JOIN *join, table_map remaining_tables, const JOIN_TAB *tab,
static void recalculate_prefix_record_count(JOIN *join, uint start, uint end)
{
+ DBUG_ASSERT(start >= join->const_tables);
+
for (uint j= start; j < end ;j++)
{
double prefix_count;
@@ -3802,7 +3921,7 @@ static void recalculate_prefix_record_count(JOIN *join, uint start, uint end)
prefix_count= 1.0;
else
prefix_count= COST_MULT(join->best_positions[j-1].prefix_record_count,
- join->best_positions[j-1].records_read);
+ join->best_positions[j-1].records_out);
join->best_positions[j].prefix_record_count= prefix_count;
}
@@ -3954,7 +4073,7 @@ void fix_semijoin_strategies_for_picked_join_order(JOIN *join)
join->best_positions, i,
FALSE, prefix_rec_count,
join->best_positions + i, &dummy);
- prefix_rec_count *= join->best_positions[i].records_read;
+ prefix_rec_count *= join->best_positions[i].records_out;
rem_tables &= ~join->best_positions[i].table->table->map;
}
}
@@ -3966,7 +4085,7 @@ void fix_semijoin_strategies_for_picked_join_order(JOIN *join)
join->best_positions[first].n_sj_tables= tablenr - first + 1;
POSITION dummy; // For loose scan paths
double record_count= (first== join->const_tables)? 1.0:
- join->best_positions[tablenr - 1].prefix_record_count;
+ join->best_positions[first - 1].prefix_record_count;
table_map rem_tables= remaining_tables;
uint idx;
@@ -3996,7 +4115,7 @@ void fix_semijoin_strategies_for_picked_join_order(JOIN *join)
TRUE /* no jbuf */,
record_count, join->best_positions + idx, &dummy);
}
- record_count *= join->best_positions[idx].records_read;
+ record_count *= join->best_positions[idx].records_out;
rem_tables &= ~join->best_positions[idx].table->table->map;
}
}
@@ -4007,7 +4126,7 @@ void fix_semijoin_strategies_for_picked_join_order(JOIN *join)
POSITION *first_pos= join->best_positions + first;
POSITION loose_scan_pos; // For loose scan paths
double record_count= (first== join->const_tables)? 1.0:
- join->best_positions[tablenr - 1].prefix_record_count;
+ join->best_positions[first - 1].prefix_record_count;
table_map rem_tables= remaining_tables;
uint idx;
@@ -4048,13 +4167,14 @@ void fix_semijoin_strategies_for_picked_join_order(JOIN *join)
*/
if (join->best_positions[idx].key)
{
+ DBUG_ASSERT(join->best_positions[idx].type != JT_RANGE);
delete join->best_positions[idx].table->quick;
join->best_positions[idx].table->quick= NULL;
}
}
}
rem_tables &= ~join->best_positions[idx].table->table->map;
- record_count *= join->best_positions[idx].records_read;
+ record_count *= join->best_positions[idx].records_out;
}
first_pos->sj_strategy= SJ_OPT_LOOSE_SCAN;
first_pos->n_sj_tables= my_count_bits(first_pos->table->emb_sj_nest->sj_inner_tables);
@@ -4778,7 +4898,7 @@ SJ_TMP_TABLE::create_sj_weedout_tmp_table(THD *thd)
DBUG_PRINT("info",("Creating group key in temporary table"));
share->keys=1;
share->uniques= MY_TEST(using_unique_constraint);
- table->key_info=keyinfo;
+ table->key_info= share->key_info= keyinfo;
keyinfo->key_part=key_part_info;
keyinfo->flags=HA_NOSAME;
keyinfo->usable_key_parts= keyinfo->user_defined_key_parts= 1;
@@ -5272,7 +5392,8 @@ int setup_semijoin_dups_elimination(JOIN *join, ulonglong options,
Got a table that's not within any semi-join nest. This is a case
like this:
- SELECT * FROM ot1, nt1 WHERE ot1.col IN (SELECT expr FROM it1, it2)
+ SELECT * FROM ot1, nt1 WHERE
+ ot1.col IN (SELECT expr FROM it1, it2)
with a join order of
@@ -5749,10 +5870,10 @@ enum_nested_loop_state join_tab_execution_startup(JOIN_TAB *tab)
((subselect_hash_sj_engine*)in_subs->engine);
if (!hash_sj_engine->is_materialized)
{
- hash_sj_engine->materialize_join->exec();
+ int error= hash_sj_engine->materialize_join->exec();
hash_sj_engine->is_materialized= TRUE;
- if (unlikely(hash_sj_engine->materialize_join->error) ||
+ if (unlikely(error) ||
unlikely(tab->join->thd->is_fatal_error))
DBUG_RETURN(NESTED_LOOP_ERROR);
}
@@ -6532,18 +6653,14 @@ bool JOIN::choose_subquery_plan(table_map join_tables)
IN/ALL/ANY optimizations are not applicable for so called fake select
(this select exists only to filter results of union if it is needed).
*/
- if (select_lex == select_lex->master_unit()->fake_select_lex)
- return 0;
-
- if (is_in_subquery())
- {
- in_subs= unit->item->get_IN_subquery();
- if (in_subs->create_in_to_exists_cond(this))
- return true;
- }
- else
+ if (select_lex == select_lex->master_unit()->fake_select_lex ||
+ likely(!is_in_subquery()))
return false;
+ in_subs= unit->item->get_IN_subquery();
+ if (in_subs->create_in_to_exists_cond(this))
+ return true;
+
/* A strategy must be chosen earlier. */
DBUG_ASSERT(in_subs->has_strategy());
DBUG_ASSERT(in_to_exists_where || in_to_exists_having);
@@ -6574,6 +6691,7 @@ bool JOIN::choose_subquery_plan(table_map join_tables)
/* The cost of the IN->EXISTS strategy. */
double in_exists_strategy_cost;
double dummy;
+ const char *strategy;
/*
A. Estimate the number of rows of the outer table that will be filtered
@@ -6635,7 +6753,6 @@ bool JOIN::choose_subquery_plan(table_map join_tables)
/* Get the cost of the modified IN-EXISTS plan. */
inner_read_time_2= inner_join->best_read;
-
}
else
{
@@ -6647,39 +6764,58 @@ bool JOIN::choose_subquery_plan(table_map join_tables)
C. Compute execution costs.
*/
/* C.1 Compute the cost of the materialization strategy. */
- //uint rowlen= get_tmp_table_rec_length(unit->first_select()->item_list);
- uint rowlen= get_tmp_table_rec_length(ref_ptrs,
- select_lex->item_list.elements);
- /* The cost of writing one row into the temporary table. */
- double write_cost= get_tmp_table_write_cost(thd, inner_record_count_1,
- rowlen);
- /* The cost of a lookup into the unique index of the materialized table. */
- double lookup_cost= get_tmp_table_lookup_cost(thd, inner_record_count_1,
- rowlen);
+ bool blobs_used;
+ uint rowlen= get_tmp_table_rec_length(ref_ptrs,
+ select_lex->item_list.elements,
+ &blobs_used);
+ /* The cost of using the temp table */
+ TMPTABLE_COSTS cost= get_tmp_table_costs(thd, inner_record_count_1,
+ rowlen, blobs_used, 1);
/*
The cost of executing the subquery and storing its result in an indexed
temporary table.
*/
- double materialization_cost= COST_ADD(inner_read_time_1,
- COST_MULT(write_cost,
- inner_record_count_1));
+ double materialization_cost=
+ COST_ADD(cost.create,
+ COST_ADD(inner_read_time_1,
+ COST_MULT((cost.write + WHERE_COST_THD(thd)),
+ inner_record_count_1)));
- materialize_strategy_cost= COST_ADD(materialization_cost,
- COST_MULT(outer_lookup_keys,
- lookup_cost));
+ materialize_strategy_cost=
+ COST_ADD(materialization_cost,
+ COST_MULT(outer_lookup_keys, cost.lookup));
/* C.2 Compute the cost of the IN=>EXISTS strategy. */
- in_exists_strategy_cost= COST_MULT(outer_lookup_keys, inner_read_time_2);
+ in_exists_strategy_cost=
+ COST_MULT(outer_lookup_keys, inner_read_time_2);
/* C.3 Compare the costs and choose the cheaper strategy. */
if (materialize_strategy_cost >= in_exists_strategy_cost)
+ {
in_subs->set_strategy(SUBS_IN_TO_EXISTS);
+ strategy= "in_to_exists";
+ }
else
+ {
in_subs->set_strategy(SUBS_MATERIALIZATION);
+ strategy= "materialization";
+ }
+ if (unlikely(thd->trace_started()))
+ {
+ Json_writer_object trace_wrapper(thd);
+ Json_writer_object trace_subquery(thd, "subquery_plan");
+ trace_subquery.
+ add("rows", inner_record_count_1).
+ add("materialization_cost", materialize_strategy_cost).
+ add("in_exist_cost", in_exists_strategy_cost).
+ add("choosen", strategy);
+ }
DBUG_PRINT("info",
- ("mat_strategy_cost: %.2f, mat_cost: %.2f, write_cost: %.2f, lookup_cost: %.2f",
- materialize_strategy_cost, materialization_cost, write_cost, lookup_cost));
+ ("mat_strategy_cost: %.2f mat_cost: %.2f write_cost: %.2f "
+ "lookup_cost: %.2f",
+ materialize_strategy_cost, materialization_cost, cost.write,
+ cost.lookup));
DBUG_PRINT("info",
("inx_strategy_cost: %.2f, inner_read_time_2: %.2f",
in_exists_strategy_cost, inner_read_time_2));
@@ -6705,6 +6841,13 @@ bool JOIN::choose_subquery_plan(table_map join_tables)
implementation, fall back to IN-TO-EXISTS.
*/
in_subs->set_strategy(SUBS_IN_TO_EXISTS);
+
+ if (unlikely(thd->trace_started()))
+ {
+ Json_writer_object trace_wrapper(thd);
+ Json_writer_object trace_subquery(thd, "subquery_plan_revert");
+ trace_subquery.add("choosen", "in_to_exists");
+ }
}
if (in_subs->test_strategy(SUBS_MATERIALIZATION))