summaryrefslogtreecommitdiff
path: root/sql/opt_subselect.cc
diff options
context:
space:
mode:
Diffstat (limited to 'sql/opt_subselect.cc')
-rw-r--r--sql/opt_subselect.cc301
1 files changed, 194 insertions, 107 deletions
diff --git a/sql/opt_subselect.cc b/sql/opt_subselect.cc
index 52d6a784e15..6f2de3796c4 100644
--- a/sql/opt_subselect.cc
+++ b/sql/opt_subselect.cc
@@ -448,7 +448,8 @@ static bool convert_subq_to_sj(JOIN *parent_join, Item_in_subselect *subq_pred);
static bool convert_subq_to_jtbm(JOIN *parent_join,
Item_in_subselect *subq_pred, bool *remove);
static TABLE_LIST *alloc_join_nest(THD *thd);
-static uint get_tmp_table_rec_length(Ref_ptr_array p_list, uint elements);
+static uint get_tmp_table_rec_length(Ref_ptr_array p_list, uint elements,
+ bool *blobs_used);
bool find_eq_ref_candidate(TABLE *table, table_map sj_inner_tables);
static SJ_MATERIALIZATION_INFO *
at_sjmat_pos(const JOIN *join, table_map remaining_tables, const JOIN_TAB *tab,
@@ -1467,9 +1468,9 @@ void get_delayed_table_estimates(TABLE *table,
/* Calculate cost of scanning the temptable */
double data_size= COST_MULT(item->jtbm_record_count,
hash_sj_engine->tmp_table->s->reclength);
- /* Do like in handler::scan_time() */
+ /* Do like in handler::ha_scan_and_compare_time, but ignore the where cost */
*scan_time= ((data_size/table->file->stats.block_size+2) *
- table->file->avg_io_cost());
+ table->file->avg_io_cost()) + *out_rows * ROW_COPY_COST;
}
@@ -2564,27 +2565,28 @@ bool optimize_semijoin_nests(JOIN *join, table_map all_table_map)
/*
Calculate temporary table parameters and usage costs
*/
+ bool blobs_used;
uint rowlen= get_tmp_table_rec_length(subq_select->ref_pointer_array,
- subq_select->item_list.elements);
- double lookup_cost= get_tmp_table_lookup_cost(join->thd,
- subjoin_out_rows, rowlen);
- double write_cost= get_tmp_table_write_cost(join->thd,
- subjoin_out_rows, rowlen);
-
+ subq_select->item_list.elements,
+ &blobs_used);
+ TMPTABLE_COSTS cost= get_tmp_table_costs(join->thd,
+ subjoin_out_rows, rowlen,
+ blobs_used);
/*
Let materialization cost include the cost to write the data into the
temporary table:
*/
- sjm->materialization_cost.add_io(subjoin_out_rows, write_cost);
-
+ sjm->materialization_cost.add_io(subjoin_out_rows, cost.write);
+ sjm->materialization_cost.copy_cost+= cost.create;
+
/*
Set the cost to do a full scan of the temptable (will need this to
consider doing sjm-scan):
*/
sjm->scan_cost.reset();
- sjm->scan_cost.add_io(sjm->rows, lookup_cost);
+ sjm->scan_cost.add_io(sjm->rows, cost.lookup);
- sjm->lookup_cost.convert_from_cost(lookup_cost);
+ sjm->lookup_cost.convert_from_cost(cost.lookup);
sj_nest->sj_mat_info= sjm;
DBUG_EXECUTE("opt", print_sjm(sjm););
}
@@ -2612,11 +2614,13 @@ bool optimize_semijoin_nests(JOIN *join, table_map all_table_map)
Length of the temptable record, in bytes
*/
-static uint get_tmp_table_rec_length(Ref_ptr_array p_items, uint elements)
+static uint get_tmp_table_rec_length(Ref_ptr_array p_items, uint elements,
+ bool *blobs_used)
{
uint len= 0;
Item *item;
- //List_iterator<Item> it(items);
+
+ *blobs_used= 0;
for (uint i= 0; i < elements ; i++)
{
item = p_items[i];
@@ -2639,6 +2643,8 @@ static uint get_tmp_table_rec_length(Ref_ptr_array p_items, uint elements)
len += 8;
else
len += item->max_length;
+ if (item->max_length > MAX_FIELD_VARCHARLENGTH)
+ *blobs_used= 1;
break;
case DECIMAL_RESULT:
len += 10;
@@ -2654,46 +2660,48 @@ static uint get_tmp_table_rec_length(Ref_ptr_array p_items, uint elements)
/**
- The cost of a lookup into a unique hash/btree index on a temporary table
- with 'row_count' rows each of size 'row_size'.
+ The cost of a create, write and read into a unique hash/btree index on
+ a temporary table with 'row_count' rows each of size 'row_size'.
@param thd current query context
@param row_count number of rows in the temp table
@param row_size average size in bytes of the rows
- @return the cost of one lookup
+ @return The cost of using the temporary table
+
+ TODO:
+ This is an optimistic estimate. We are not taking into account:
+ - That we first write into a memory and then overflow to disk.
+ - If binary trees would be used for heap tables.
+ - The addition cost of writing a row to memory/disk and possible
+ index reorganization.
*/
-double
-get_tmp_table_lookup_cost(THD *thd, double row_count, uint row_size)
+TMPTABLE_COSTS
+get_tmp_table_costs(THD *thd, double row_count, uint row_size, bool blobs_used)
{
- if (row_count > thd->variables.max_heap_table_size / (double) row_size)
- return (double) DISK_TEMPTABLE_LOOKUP_COST;
- else
- return (double) HEAP_TEMPTABLE_LOOKUP_COST;
-}
+ TMPTABLE_COSTS cost;
-/**
- The cost of writing a row into a temporary table with 'row_count' unique
- rows each of size 'row_size'.
+ /* From heap_prepare_hp_create_info(), assuming one hash key used */
+ row_size+= sizeof(char*)*2;
+ row_size= MY_ALIGN(MY_MAX(row_size, sizeof(char*)) + 1, sizeof(char*));
- @param thd current query context
- @param row_count number of rows in the temp table
- @param row_size average size in bytes of the rows
-
- @return the cost of writing one row
-*/
-
-double
-get_tmp_table_write_cost(THD *thd, double row_count, uint row_size)
-{
- double lookup_cost= get_tmp_table_lookup_cost(thd, row_count, row_size);
- /*
- TODO:
- This is an optimistic estimate. Add additional costs resulting from
- actually writing the row to memory/disk and possible index reorganization.
- */
- return lookup_cost;
+ if (row_count > thd->variables.max_heap_table_size / (double) row_size ||
+ blobs_used)
+ {
+ /* Disk based table */
+ cost.lookup= (DISK_TEMPTABLE_LOOKUP_COST *
+ cache_hit_cost(thd->variables.optimizer_cache_hit_ratio));
+ cost.write= cost.lookup;
+ cost.create= DISK_TEMPTABLE_CREATE_COST;
+ }
+ else
+ {
+ cost.lookup= HEAP_TEMPTABLE_LOOKUP_COST;
+ cost.write= cost.lookup;
+ cost.create= HEAP_TEMPTABLE_CREATE_COST;
+ }
+ return cost;
}
@@ -2938,9 +2946,16 @@ void optimize_semi_joins(JOIN *join, table_map remaining_tables, uint idx,
2. using strategy Z is cheaper, but it only removes
fanout from semijoin X.
3. We have no clue what to do about fanount of semi-join Y.
+
+ For the first iteration read_time will always be bigger than
+ *current_read_time (as the 'strategy' is an addition to the
+ chosen plan) . If a strategy was picked
+ (dusp_producing_tables & handled_fanout is true), then
+ *current_read_time is updated and the cost for the next
+ strategy can be smaller than *current_read_time.
*/
if ((dups_producing_tables & handled_fanout) ||
- (read_time < *current_read_time &&
+ (read_time + COST_EPS < *current_read_time &&
!(handled_fanout & pos->inner_tables_handled_with_other_sjs)))
{
DBUG_ASSERT(pos->sj_strategy != sj_strategy);
@@ -3211,8 +3226,6 @@ bool Sj_materialization_picker::check_qep(JOIN *join,
disable_jbuf, prefix_rec_count, &curpos, &dummy);
prefix_rec_count= COST_MULT(prefix_rec_count, curpos.records_read);
prefix_cost= COST_ADD(prefix_cost, curpos.read_time);
- prefix_cost= COST_ADD(prefix_cost,
- prefix_rec_count / TIME_FOR_COMPARE);
//TODO: take into account join condition selectivity here
}
@@ -3237,8 +3250,9 @@ bool Sj_materialization_picker::check_qep(JOIN *join,
*handled_fanout= mat_nest->sj_inner_tables;
if (unlikely(trace.trace_started()))
{
- trace.add("records", *record_count);
- trace.add("read_time", *read_time);
+ trace.
+ add("records", *record_count).
+ add("cost", *read_time);
}
return TRUE;
}
@@ -3339,7 +3353,7 @@ bool LooseScan_picker::check_qep(JOIN *join,
{
trace.
add("records", *record_count).
- add("read_time", *read_time);
+ add("cost", *read_time);
}
return TRUE;
}
@@ -3456,8 +3470,9 @@ bool Firstmatch_picker::check_qep(JOIN *join,
*strategy= SJ_OPT_FIRST_MATCH;
if (unlikely(trace.trace_started()))
{
- trace.add("records", *record_count);
- trace.add("read_time", *read_time);
+ trace.
+ add("records", *record_count).
+ add("cost", *read_time);
}
return TRUE;
}
@@ -3469,6 +3484,56 @@ bool Firstmatch_picker::check_qep(JOIN *join,
}
+/*
+ Duplicate_weedout strategy is described at
+ https://mariadb.com/kb/en/duplicateweedout-strategy/
+
+ The idea is that if one has a subquery of type:
+
+ select *
+ from Country
+ where
+ Country.code IN (select City.Country
+ from City
+ where
+ ...)
+
+ Before semi join optimization it was executed this way:
+ - Scan rows in Country
+ - For each accepted row, execute the sub query with
+ 'Country.code = City.Country' added to the WHERE clause and with
+ LIMIT 1
+
+ With semi join optimization it can be converted to the following semi join.
+
+ select * from Country semi-join City
+ where Country.code = City.Country and ...
+
+ This is executed as:
+
+ - Scan rows in Country
+ - Scan rows in City with 'Country.code = City.Country' added to the
+ subquery WHERE clause. Stop scanning after the first match.
+
+ or
+
+ - Create temporary table to store City.Country (with a unique key)
+ - Scan rows in City (according to plan for City) and put them into the
+ temporary table
+ - Scan the temporary table
+ - Do index lookup in Country table with City.Country
+
+With Duplicate_weedout we would try to instead do:
+
+ - Create temporary table to hold unique rowid's for the Country
+ - Scan rows in City (according to plan for City)
+ - Scan rows in Country (according to plan for Country)
+ - Write Country.id rowid to temporary table. If there was no
+ conflicting row in the temporary table, accept the row combination.
+ - Delete temporary table
+*/
+
+
void Duplicate_weedout_picker::set_from_prev(POSITION *prev)
{
if (prev->dups_weedout_picker.is_used)
@@ -3537,8 +3602,6 @@ bool Duplicate_weedout_picker::check_qep(JOIN *join,
double sj_inner_fanout= 1.0;
double sj_outer_fanout= 1.0;
uint temptable_rec_size;
- Json_writer_object trace(join->thd);
- trace.add("strategy", "DuplicateWeedout");
if (first_tab == join->const_tables)
{
@@ -3554,14 +3617,11 @@ bool Duplicate_weedout_picker::check_qep(JOIN *join,
}
table_map dups_removed_fanout= 0;
- double current_fanout= prefix_rec_count;
for (uint j= first_dupsweedout_table; j <= idx; j++)
{
POSITION *p= join->positions + j;
- current_fanout= COST_MULT(current_fanout, p->records_read);
- dups_cost= COST_ADD(dups_cost,
- COST_ADD(p->read_time,
- current_fanout / TIME_FOR_COMPARE));
+ dups_cost= COST_ADD(dups_cost, p->read_time);
+
if (p->table->emb_sj_nest)
{
sj_inner_fanout= COST_MULT(sj_inner_fanout, p->records_read);
@@ -3583,29 +3643,33 @@ bool Duplicate_weedout_picker::check_qep(JOIN *join,
- sj_outer_fanout table writes
- sj_inner_fanout*sj_outer_fanout lookups.
*/
- double one_lookup_cost= get_tmp_table_lookup_cost(join->thd,
- sj_outer_fanout,
- temptable_rec_size);
- double one_write_cost= get_tmp_table_write_cost(join->thd,
- sj_outer_fanout,
- temptable_rec_size);
-
- double write_cost= COST_MULT(join->positions[first_tab].prefix_record_count,
- sj_outer_fanout * one_write_cost);
+ TMPTABLE_COSTS one_cost= get_tmp_table_costs(join->thd,
+ sj_outer_fanout,
+ temptable_rec_size,
+ 0);
+ double write_cost=
+ COST_ADD(one_cost.create,
+ COST_MULT(join->positions[first_tab].prefix_record_count,
+ sj_outer_fanout * one_cost.write));
double full_lookup_cost=
- COST_MULT(join->positions[first_tab].prefix_record_count,
- COST_MULT(sj_outer_fanout,
- sj_inner_fanout * one_lookup_cost));
- dups_cost= COST_ADD(dups_cost, COST_ADD(write_cost, full_lookup_cost));
+ COST_MULT(join->positions[first_tab].prefix_record_count,
+ COST_MULT(sj_outer_fanout,
+ sj_inner_fanout * one_cost.lookup));
+ *read_time= COST_ADD(dups_cost, COST_ADD(write_cost, full_lookup_cost));
- *read_time= dups_cost;
*record_count= prefix_rec_count * sj_outer_fanout;
*handled_fanout= dups_removed_fanout;
*strategy= SJ_OPT_DUPS_WEEDOUT;
- if (unlikely(trace.trace_started()))
+ if (unlikely(join->thd->trace_started()))
{
- trace.add("records", *record_count);
- trace.add("read_time", *read_time);
+ Json_writer_object trace(join->thd);
+ trace.
+ add("strategy", "DuplicateWeedout").
+ add("records", *record_count).
+ add("dups_cost", dups_cost).
+ add("write_cost", write_cost).
+ add("full_lookup_cost", full_lookup_cost).
+ add("total_cost", *read_time);
}
return TRUE;
}
@@ -3796,6 +3860,8 @@ at_sjmat_pos(const JOIN *join, table_map remaining_tables, const JOIN_TAB *tab,
static void recalculate_prefix_record_count(JOIN *join, uint start, uint end)
{
+ DBUG_ASSERT(start >= join->const_tables);
+
for (uint j= start; j < end ;j++)
{
double prefix_count;
@@ -3967,7 +4033,7 @@ void fix_semijoin_strategies_for_picked_join_order(JOIN *join)
join->best_positions[first].n_sj_tables= tablenr - first + 1;
POSITION dummy; // For loose scan paths
double record_count= (first== join->const_tables)? 1.0:
- join->best_positions[tablenr - 1].prefix_record_count;
+ join->best_positions[first - 1].prefix_record_count;
table_map rem_tables= remaining_tables;
uint idx;
@@ -4008,7 +4074,7 @@ void fix_semijoin_strategies_for_picked_join_order(JOIN *join)
POSITION *first_pos= join->best_positions + first;
POSITION loose_scan_pos; // For loose scan paths
double record_count= (first== join->const_tables)? 1.0:
- join->best_positions[tablenr - 1].prefix_record_count;
+ join->best_positions[first - 1].prefix_record_count;
table_map rem_tables= remaining_tables;
uint idx;
@@ -6532,18 +6598,14 @@ bool JOIN::choose_subquery_plan(table_map join_tables)
IN/ALL/ANY optimizations are not applicable for so called fake select
(this select exists only to filter results of union if it is needed).
*/
- if (select_lex == select_lex->master_unit()->fake_select_lex)
- return 0;
-
- if (is_in_subquery())
- {
- in_subs= unit->item->get_IN_subquery();
- if (in_subs->create_in_to_exists_cond(this))
- return true;
- }
- else
+ if (select_lex == select_lex->master_unit()->fake_select_lex ||
+ likely(!is_in_subquery()))
return false;
+ in_subs= unit->item->get_IN_subquery();
+ if (in_subs->create_in_to_exists_cond(this))
+ return true;
+
/* A strategy must be chosen earlier. */
DBUG_ASSERT(in_subs->has_strategy());
DBUG_ASSERT(in_to_exists_where || in_to_exists_having);
@@ -6574,6 +6636,7 @@ bool JOIN::choose_subquery_plan(table_map join_tables)
/* The cost of the IN->EXISTS strategy. */
double in_exists_strategy_cost;
double dummy;
+ const char *strategy;
/*
A. Estimate the number of rows of the outer table that will be filtered
@@ -6646,39 +6709,57 @@ bool JOIN::choose_subquery_plan(table_map join_tables)
C. Compute execution costs.
*/
/* C.1 Compute the cost of the materialization strategy. */
- //uint rowlen= get_tmp_table_rec_length(unit->first_select()->item_list);
- uint rowlen= get_tmp_table_rec_length(ref_ptrs,
- select_lex->item_list.elements);
- /* The cost of writing one row into the temporary table. */
- double write_cost= get_tmp_table_write_cost(thd, inner_record_count_1,
- rowlen);
- /* The cost of a lookup into the unique index of the materialized table. */
- double lookup_cost= get_tmp_table_lookup_cost(thd, inner_record_count_1,
- rowlen);
+ bool blobs_used;
+ uint rowlen= get_tmp_table_rec_length(ref_ptrs,
+ select_lex->item_list.elements,
+ &blobs_used);
+ /* The cost of using the temp table */
+ TMPTABLE_COSTS cost= get_tmp_table_costs(thd, inner_record_count_1,
+ rowlen, blobs_used);
/*
The cost of executing the subquery and storing its result in an indexed
temporary table.
*/
- double materialization_cost= COST_ADD(inner_read_time_1,
- COST_MULT(write_cost,
- inner_record_count_1));
+ double materialization_cost=
+ COST_ADD(cost.create,
+ COST_ADD(inner_read_time_1,
+ COST_MULT(cost.write, inner_record_count_1)));
- materialize_strategy_cost= COST_ADD(materialization_cost,
- COST_MULT(outer_lookup_keys,
- lookup_cost));
+ materialize_strategy_cost=
+ COST_ADD(materialization_cost,
+ COST_MULT(outer_lookup_keys, cost.lookup));
/* C.2 Compute the cost of the IN=>EXISTS strategy. */
- in_exists_strategy_cost= COST_MULT(outer_lookup_keys, inner_read_time_2);
+ in_exists_strategy_cost=
+ COST_MULT(outer_lookup_keys, inner_read_time_2);
/* C.3 Compare the costs and choose the cheaper strategy. */
if (materialize_strategy_cost >= in_exists_strategy_cost)
+ {
in_subs->set_strategy(SUBS_IN_TO_EXISTS);
+ strategy= "in_to_exists";
+ }
else
+ {
in_subs->set_strategy(SUBS_MATERIALIZATION);
+ strategy= "materialization";
+ }
+ if (unlikely(thd->trace_started()))
+ {
+ Json_writer_object wrapper(thd);
+ Json_writer_object trace_subquery(thd, "subquery_plan");
+ trace_subquery.
+ add("records", inner_record_count_1).
+ add("materialization_cost", materialize_strategy_cost).
+ add("in_exist_cost", in_exists_strategy_cost).
+ add("choosen", strategy);
+ }
DBUG_PRINT("info",
- ("mat_strategy_cost: %.2f, mat_cost: %.2f, write_cost: %.2f, lookup_cost: %.2f",
- materialize_strategy_cost, materialization_cost, write_cost, lookup_cost));
+ ("mat_strategy_cost: %.2f mat_cost: %.2f write_cost: %.2f "
+ "lookup_cost: %.2f",
+ materialize_strategy_cost, materialization_cost, cost.write,
+ cost.lookup));
DBUG_PRINT("info",
("inx_strategy_cost: %.2f, inner_read_time_2: %.2f",
in_exists_strategy_cost, inner_read_time_2));
@@ -6704,6 +6785,12 @@ bool JOIN::choose_subquery_plan(table_map join_tables)
implementation, fall back to IN-TO-EXISTS.
*/
in_subs->set_strategy(SUBS_IN_TO_EXISTS);
+
+ if (unlikely(thd->trace_started()))
+ {
+ Json_writer_object trace_subquery(thd, "subquery_plan_revert");
+ trace_subquery.add("choosen", "in_to_exists");
+ }
}
if (in_subs->test_strategy(SUBS_MATERIALIZATION))