diff options
Diffstat (limited to 'sql/opt_subselect.cc')
-rw-r--r-- | sql/opt_subselect.cc | 553 |
1 files changed, 348 insertions, 205 deletions
diff --git a/sql/opt_subselect.cc b/sql/opt_subselect.cc index e91b2ad14c3..34cc25f9058 100644 --- a/sql/opt_subselect.cc +++ b/sql/opt_subselect.cc @@ -35,6 +35,7 @@ #include "sql_test.h" #include <my_bit.h> #include "opt_trace.h" +#include "optimizer_defaults.h" /* This file contains optimizations for semi-join subqueries. @@ -448,7 +449,8 @@ static bool convert_subq_to_sj(JOIN *parent_join, Item_in_subselect *subq_pred); static bool convert_subq_to_jtbm(JOIN *parent_join, Item_in_subselect *subq_pred, bool *remove); static TABLE_LIST *alloc_join_nest(THD *thd); -static uint get_tmp_table_rec_length(Ref_ptr_array p_list, uint elements); +static uint get_tmp_table_rec_length(Ref_ptr_array p_list, uint elements, + bool *blobs_used); bool find_eq_ref_candidate(TABLE *table, table_map sj_inner_tables); static SJ_MATERIALIZATION_INFO * at_sjmat_pos(const JOIN *join, table_map remaining_tables, const JOIN_TAB *tab, @@ -664,17 +666,6 @@ int check_and_do_in_subquery_rewrites(JOIN *join) DBUG_RETURN(-1); } } - /* Check if any table is not supporting comparable rowids */ - { - List_iterator_fast<TABLE_LIST> li(select_lex->outer_select()->leaf_tables); - TABLE_LIST *tbl; - while ((tbl = li++)) - { - TABLE *table= tbl->table; - if (table && table->file->ha_table_flags() & HA_NON_COMPARABLE_ROWID) - join->not_usable_rowid_map|= table->map; - } - } DBUG_PRINT("info", ("Checking if subq can be converted to semi-join")); /* @@ -696,9 +687,10 @@ int check_and_do_in_subquery_rewrites(JOIN *join) 11. It is first optimisation (the subquery could be moved from ON clause during first optimisation and then be considered for SJ on the second when it is too late) - 12. All tables supports comparable rowids. - This is needed for DuplicateWeedout strategy to work (which - is the catch-all semi-join strategy so it must be applicable). + + There are also other requirements which cannot be checked at this phase, + yet. They are checked later in convert_join_subqueries_to_semijoins(), + look for calls to block_conversion_to_sj(). */ if (optimizer_flag(thd, OPTIMIZER_SWITCH_SEMIJOIN) && in_subs && // 1 @@ -713,8 +705,7 @@ int check_and_do_in_subquery_rewrites(JOIN *join) !((join->select_options | // 10 select_lex->outer_select()->join->select_options) // 10 & SELECT_STRAIGHT_JOIN) && // 10 - select_lex->first_cond_optimization && // 11 - join->not_usable_rowid_map == 0) // 12 + select_lex->first_cond_optimization) // 11 { DBUG_PRINT("info", ("Subquery is semi-join conversion candidate")); @@ -910,8 +901,10 @@ bool subquery_types_allow_materialization(THD* thd, Item_in_subselect *in_subs) outer, converted_from_in_predicate)) { - trace_transform.add("possible", false); - trace_transform.add("cause", "types mismatch"); + if (unlikely(trace_transform.trace_started())) + trace_transform. + add("possible", false). + add("cause", "types mismatch"); DBUG_RETURN(FALSE); } } @@ -933,8 +926,10 @@ bool subquery_types_allow_materialization(THD* thd, Item_in_subselect *in_subs) { in_subs->types_allow_materialization= TRUE; in_subs->sjm_scan_allowed= all_are_fields; - trace_transform.add("sjm_scan_allowed", all_are_fields) - .add("possible", true); + if (unlikely(trace_transform.trace_started())) + trace_transform. + add("sjm_scan_allowed", all_are_fields). + add("possible", true); DBUG_PRINT("info",("subquery_types_allow_materialization: ok, allowed")); DBUG_RETURN(TRUE); } @@ -1224,7 +1219,36 @@ bool convert_join_subqueries_to_semijoins(JOIN *join) } } - if (join->select_options & SELECT_STRAIGHT_JOIN) + /* + Compute join->not_usable_rowid_map. + The idea is: + - DuplicateWeedout strategy requires that one is able to get the rowid + (call h->position()) for tables in the parent select. Obtained Rowid + values must be stable across table scans. + = Rowids are typically available. The only known exception is federatedx + tables. + - The optimizer requires that DuplicateWeedout strategy is always + applicable. It is the only strategy that is applicable for any join + order. The optimizer is not prepared for the situation where it has + constructed a join order and then it turns out that there's no semi-join + strategy that can be used for it. + + Because of the above, we will not use semi-joins if the parent select has + tables which do not support rowids. + */ + { + List_iterator_fast<TABLE_LIST> li(join->select_lex->leaf_tables); + TABLE_LIST *tbl; + while ((tbl = li++)) + { + TABLE *table= tbl->table; + if (table && table->file->ha_table_flags() & HA_NON_COMPARABLE_ROWID) + join->not_usable_rowid_map|= table->map; + } + } + + if (join->select_options & SELECT_STRAIGHT_JOIN || + join->not_usable_rowid_map != 0) { /* Block conversion to semijoins for all candidates */ li.rewind(); @@ -1294,8 +1318,10 @@ bool convert_join_subqueries_to_semijoins(JOIN *join) OPT_TRACE_TRANSFORM(thd, trace_wrapper, trace_transform, in_subq->get_select_lex()->select_number, "IN (SELECT)", "semijoin"); - trace_transform.add("converted_to_semi_join", false) - .add("cause", "subquery attached to the ON clause"); + if (unlikely(trace_transform.trace_started())) + trace_transform. + add("converted_to_semi_join", false). + add("cause", "subquery attached to the ON clause"); break; } @@ -1307,9 +1333,10 @@ bool convert_join_subqueries_to_semijoins(JOIN *join) if (join->table_count + in_subq->unit->first_select()->join->table_count >= MAX_TABLES) { - trace_transform.add("converted_to_semi_join", false); - trace_transform.add("cause", - "table in parent join now exceeds MAX_TABLES"); + if (unlikely(trace_transform.trace_started())) + trace_transform. + add("converted_to_semi_join", false). + add("cause", "table in parent join now exceeds MAX_TABLES"); break; } if (convert_subq_to_sj(join, in_subq)) @@ -1441,6 +1468,7 @@ void get_delayed_table_estimates(TABLE *table, Item_in_subselect *item= table->pos_in_table_list->jtbm_subselect; Table_function_json_table *table_function= table->pos_in_table_list->table_function; + handler *file= table->file; if (table_function) { @@ -1460,9 +1488,11 @@ void get_delayed_table_estimates(TABLE *table, /* Calculate cost of scanning the temptable */ double data_size= COST_MULT(item->jtbm_record_count, hash_sj_engine->tmp_table->s->reclength); - /* Do like in handler::scan_time() */ - *scan_time= ((data_size/table->file->stats.block_size+2) * - table->file->avg_io_cost()); + + /* Do like in handler::ha_scan_and_compare_time, but ignore the where cost */ + *scan_time= ((data_size/IO_SIZE * table->file->DISK_READ_COST * + table->file->DISK_READ_RATIO) + + *out_rows * file->ROW_COPY_COST); } @@ -2490,8 +2520,7 @@ bool optimize_semijoin_nests(JOIN *join, table_map all_table_map) !sj_nest->sj_subq_pred->is_correlated && sj_nest->sj_subq_pred->types_allow_materialization) { - join->emb_sjm_nest= sj_nest; - if (choose_plan(join, all_table_map &~join->const_table_map)) + if (choose_plan(join, all_table_map &~join->const_table_map, sj_nest)) DBUG_RETURN(TRUE); /* purecov: inspected */ /* The best plan to run the subquery is now in join->best_positions, @@ -2507,24 +2536,13 @@ bool optimize_semijoin_nests(JOIN *join, table_map all_table_map) sjm->is_used= FALSE; double subjoin_out_rows, subjoin_read_time; - /* - join->get_partial_cost_and_fanout(n_tables + join->const_tables, - table_map(-1), - &subjoin_read_time, - &subjoin_out_rows); - */ - join->get_prefix_cost_and_fanout(n_tables, + join->get_prefix_cost_and_fanout(n_tables, &subjoin_read_time, &subjoin_out_rows); - sjm->materialization_cost.convert_from_cost(subjoin_read_time); + sjm->materialization_cost=subjoin_read_time; sjm->rows_with_duplicates= sjm->rows= subjoin_out_rows; - // Don't use the following list because it has "stale" items. use - // ref_pointer_array instead: - // - //List<Item> &right_expr_list= - // sj_nest->sj_subq_pred->unit->first_select()->item_list; /* Adjust output cardinality estimates. If the subquery has form @@ -2557,8 +2575,12 @@ bool optimize_semijoin_nests(JOIN *join, table_map all_table_map) int tableno; double rows= 1.0; while ((tableno = tm_it.next_bit()) != Table_map_iterator::BITMAP_END) - rows= COST_MULT(rows, - join->map2table[tableno]->table->opt_range_condition_rows); + { + ha_rows tbl_rows=join->map2table[tableno]-> + table->opt_range_condition_rows; + + rows= COST_MULT(rows, rows2double(tbl_rows)); + } sjm->rows= MY_MIN(sjm->rows, rows); } memcpy((uchar*) sjm->positions, @@ -2568,33 +2590,43 @@ bool optimize_semijoin_nests(JOIN *join, table_map all_table_map) /* Calculate temporary table parameters and usage costs */ + bool blobs_used; uint rowlen= get_tmp_table_rec_length(subq_select->ref_pointer_array, - subq_select->item_list.elements); - double lookup_cost= get_tmp_table_lookup_cost(join->thd, - subjoin_out_rows, rowlen); - double write_cost= get_tmp_table_write_cost(join->thd, - subjoin_out_rows, rowlen); + subq_select->item_list.elements, + &blobs_used); + TMPTABLE_COSTS cost= get_tmp_table_costs(join->thd, + subjoin_out_rows, rowlen, + blobs_used, 1); + double scan_cost, total_cost; + double row_copy_cost= ROW_COPY_COST_THD(thd); /* Let materialization cost include the cost to write the data into the - temporary table: + temporary table. Note that smj->materialization_cost already includes + row copy and compare costs of finding the original row. */ - sjm->materialization_cost.add_io(subjoin_out_rows, write_cost); - + sjm->materialization_cost+=subjoin_out_rows * cost.write + cost.create; + /* Set the cost to do a full scan of the temptable (will need this to - consider doing sjm-scan): - */ - sjm->scan_cost.reset(); - sjm->scan_cost.add_io(sjm->rows, lookup_cost); - - sjm->lookup_cost.convert_from_cost(lookup_cost); + consider doing sjm-scan). See ha_scan_time() for the basics of + the calculations. + We don't need to check the where clause for each row, so no + WHERE_COST is needed. + */ + scan_cost= (rowlen * (double) sjm->rows) / cost.block_size; + total_cost= (scan_cost * cost.cache_hit_ratio * cost.avg_io_cost + + TABLE_SCAN_SETUP_COST_THD(thd) + + row_copy_cost * sjm->rows); + sjm->scan_cost=total_cost; + + /* When reading a row, we have also to check the where clause */ + sjm->lookup_cost= cost.lookup + WHERE_COST_THD(thd); sj_nest->sj_mat_info= sjm; DBUG_EXECUTE("opt", print_sjm(sjm);); } } } - join->emb_sjm_nest= NULL; DBUG_RETURN(FALSE); } @@ -2616,11 +2648,13 @@ bool optimize_semijoin_nests(JOIN *join, table_map all_table_map) Length of the temptable record, in bytes */ -static uint get_tmp_table_rec_length(Ref_ptr_array p_items, uint elements) +static uint get_tmp_table_rec_length(Ref_ptr_array p_items, uint elements, + bool *blobs_used) { uint len= 0; Item *item; - //List_iterator<Item> it(items); + + *blobs_used= 0; for (uint i= 0; i < elements ; i++) { item = p_items[i]; @@ -2643,6 +2677,8 @@ static uint get_tmp_table_rec_length(Ref_ptr_array p_items, uint elements) len += 8; else len += item->max_length; + if (item->max_length > MAX_FIELD_VARCHARLENGTH) + *blobs_used= 1; break; case DECIMAL_RESULT: len += 10; @@ -2658,46 +2694,62 @@ static uint get_tmp_table_rec_length(Ref_ptr_array p_items, uint elements) /** - The cost of a lookup into a unique hash/btree index on a temporary table - with 'row_count' rows each of size 'row_size'. + The cost of a create, write and read into a unique hash/btree index on + a temporary table with 'row_count' rows each of size 'row_size'. @param thd current query context @param row_count number of rows in the temp table @param row_size average size in bytes of the rows - @return the cost of one lookup -*/ + @return The cost of using the temporary table -double -get_tmp_table_lookup_cost(THD *thd, double row_count, uint row_size) -{ - if (row_count > thd->variables.max_heap_table_size / (double) row_size) - return (double) DISK_TEMPTABLE_LOOKUP_COST; - else - return (double) HEAP_TEMPTABLE_LOOKUP_COST; -} - -/** - The cost of writing a row into a temporary table with 'row_count' unique - rows each of size 'row_size'. - - @param thd current query context - @param row_count number of rows in the temp table - @param row_size average size in bytes of the rows - - @return the cost of writing one row + TODO: + This is an optimistic estimate. We are not taking into account: + - That we first write into a memory and then overflow to disk. + - If binary trees would be used for heap tables. + - The addition cost of writing a row to memory/disk and possible + index reorganization. */ -double -get_tmp_table_write_cost(THD *thd, double row_count, uint row_size) +TMPTABLE_COSTS +get_tmp_table_costs(THD *thd, double row_count, uint row_size, bool blobs_used, + bool add_copy_cost) { - double lookup_cost= get_tmp_table_lookup_cost(thd, row_count, row_size); - /* - TODO: - This is an optimistic estimate. Add additional costs resulting from - actually writing the row to memory/disk and possible index reorganization. - */ - return lookup_cost; + TMPTABLE_COSTS cost; + /* From heap_prepare_hp_create_info(), assuming one hash key used */ + row_size+= sizeof(char*)*2; + row_size= MY_ALIGN(MY_MAX(row_size, sizeof(char*)) + 1, sizeof(char*)); + + if (row_count > thd->variables.max_heap_table_size / (double) row_size || + blobs_used) + { + double row_copy_cost= (add_copy_cost ? + tmp_table_optimizer_costs.row_copy_cost : + 0); + /* Disk based table */ + cost.lookup= ((tmp_table_optimizer_costs.key_lookup_cost * + tmp_table_optimizer_costs.disk_read_ratio) + + row_copy_cost); + cost.write= cost.lookup; + cost.create= DISK_TEMPTABLE_CREATE_COST; + cost.block_size= DISK_TEMPTABLE_BLOCK_SIZE; + cost.avg_io_cost= tmp_table_optimizer_costs.disk_read_cost; + cost.cache_hit_ratio= tmp_table_optimizer_costs.disk_read_ratio; + } + else + { + /* Values are as they are in heap.h */ + double row_copy_cost= (add_copy_cost ? + heap_optimizer_costs.row_copy_cost : + 0); + cost.lookup= HEAP_TEMPTABLE_LOOKUP_COST + row_copy_cost; + cost.write= cost.lookup; + cost.create= HEAP_TEMPTABLE_CREATE_COST; + cost.block_size= 1; + cost.avg_io_cost= 0; + cost.cache_hit_ratio= 0; + } + return cost; } @@ -2942,9 +2994,16 @@ void optimize_semi_joins(JOIN *join, table_map remaining_tables, uint idx, 2. using strategy Z is cheaper, but it only removes fanout from semijoin X. 3. We have no clue what to do about fanount of semi-join Y. + + For the first iteration read_time will always be bigger than + *current_read_time (as the 'strategy' is an addition to the + chosen plan) . If a strategy was picked + (dusp_producing_tables & handled_fanout is true), then + *current_read_time is updated and the cost for the next + strategy can be smaller than *current_read_time. */ if ((dups_producing_tables & handled_fanout) || - (read_time < *current_read_time && + (read_time + COST_EPS < *current_read_time && !(handled_fanout & pos->inner_tables_handled_with_other_sjs))) { DBUG_ASSERT(pos->sj_strategy != sj_strategy); @@ -3142,9 +3201,9 @@ bool Sj_materialization_picker::check_qep(JOIN *join, mat_read_time= COST_ADD(prefix_cost, - COST_ADD(mat_info->materialization_cost.total_cost(), + COST_ADD(mat_info->materialization_cost, COST_MULT(prefix_rec_count, - mat_info->lookup_cost.total_cost()))); + mat_info->lookup_cost))); /* NOTE: When we pick to use SJM[-Scan] we don't memcpy its POSITION @@ -3158,8 +3217,9 @@ bool Sj_materialization_picker::check_qep(JOIN *join, *strategy= SJ_OPT_MATERIALIZE; if (unlikely(trace.trace_started())) { - trace.add("records", *record_count); - trace.add("read_time", *read_time); + trace. + add("rows", *record_count). + add("cost", *read_time); } return TRUE; } @@ -3193,9 +3253,9 @@ bool Sj_materialization_picker::check_qep(JOIN *join, /* Add materialization cost */ prefix_cost= COST_ADD(prefix_cost, - COST_ADD(mat_info->materialization_cost.total_cost(), + COST_ADD(mat_info->materialization_cost, COST_MULT(prefix_rec_count, - mat_info->scan_cost.total_cost()))); + mat_info->scan_cost))); prefix_rec_count= COST_MULT(prefix_rec_count, mat_info->rows); uint i; @@ -3212,10 +3272,8 @@ bool Sj_materialization_picker::check_qep(JOIN *join, best_access_path(join, join->positions[i].table, rem_tables, join->positions, i, disable_jbuf, prefix_rec_count, &curpos, &dummy); - prefix_rec_count= COST_MULT(prefix_rec_count, curpos.records_read); + prefix_rec_count= COST_MULT(prefix_rec_count, curpos.records_out); prefix_cost= COST_ADD(prefix_cost, curpos.read_time); - prefix_cost= COST_ADD(prefix_cost, - prefix_rec_count / TIME_FOR_COMPARE); //TODO: take into account join condition selectivity here } @@ -3240,8 +3298,9 @@ bool Sj_materialization_picker::check_qep(JOIN *join, *handled_fanout= mat_nest->sj_inner_tables; if (unlikely(trace.trace_started())) { - trace.add("records", *record_count); - trace.add("read_time", *read_time); + trace. + add("rows", *record_count). + add("cost", *read_time); } return TRUE; } @@ -3340,8 +3399,9 @@ bool LooseScan_picker::check_qep(JOIN *join, *handled_fanout= first->table->emb_sj_nest->sj_inner_tables; if (unlikely(trace.trace_started())) { - trace.add("records", *record_count); - trace.add("read_time", *read_time); + trace. + add("rows", *record_count). + add("cost", *read_time); } return TRUE; } @@ -3432,13 +3492,13 @@ bool Firstmatch_picker::check_qep(JOIN *join, optimizer_flag(join->thd, OPTIMIZER_SWITCH_SEMIJOIN_WITH_CACHE)) { /* - An important special case: only one inner table, and @@optimizer_switch - allows join buffering. + An important special case: only one inner table, and + @@optimizer_switch allows join buffering. - read_time is the same (i.e. FirstMatch doesn't add any cost - remove fanout added by the last table */ if (*record_count) - *record_count /= join->positions[idx].records_read; + *record_count /= join->positions[idx].records_out; } else { @@ -3458,8 +3518,9 @@ bool Firstmatch_picker::check_qep(JOIN *join, *strategy= SJ_OPT_FIRST_MATCH; if (unlikely(trace.trace_started())) { - trace.add("records", *record_count); - trace.add("read_time", *read_time); + trace. + add("rows", *record_count). + add("cost", *read_time); } return TRUE; } @@ -3471,6 +3532,56 @@ bool Firstmatch_picker::check_qep(JOIN *join, } +/* + Duplicate_weedout strategy is described at + https://mariadb.com/kb/en/duplicateweedout-strategy/ + + The idea is that if one has a subquery of type: + + select * + from Country + where + Country.code IN (select City.Country + from City + where + ...) + + Before semi join optimization it was executed this way: + - Scan rows in Country + - For each accepted row, execute the sub query with + 'Country.code = City.Country' added to the WHERE clause and with + LIMIT 1 + + With semi join optimization it can be converted to the following semi join. + + select * from Country semi-join City + where Country.code = City.Country and ... + + This is executed as: + + - Scan rows in Country + - Scan rows in City with 'Country.code = City.Country' added to the + subquery WHERE clause. Stop scanning after the first match. + + or + + - Create temporary table to store City.Country (with a unique key) + - Scan rows in City (according to plan for City) and put them into the + temporary table + - Scan the temporary table + - Do index lookup in Country table with City.Country + +With Duplicate_weedout we would try to instead do: + + - Create temporary table to hold unique rowid's for the Country + - Scan rows in City (according to plan for City) + - Scan rows in Country (according to plan for Country) + - Write Country.id rowid to temporary table. If there was no + conflicting row in the temporary table, accept the row combination. + - Delete temporary table +*/ + + void Duplicate_weedout_picker::set_from_prev(POSITION *prev) { if (prev->dups_weedout_picker.is_used) @@ -3535,46 +3646,42 @@ bool Duplicate_weedout_picker::check_qep(JOIN *join, */ uint first_tab= first_dupsweedout_table; double dups_cost; - double prefix_rec_count; + double first_weedout_table_rec_count; double sj_inner_fanout= 1.0; double sj_outer_fanout= 1.0; uint temptable_rec_size; - Json_writer_object trace(join->thd); - trace.add("strategy", "DuplicateWeedout"); if (first_tab == join->const_tables) { - prefix_rec_count= 1.0; + first_weedout_table_rec_count= 1.0; temptable_rec_size= 0; dups_cost= 0.0; } else { dups_cost= join->positions[first_tab - 1].prefix_cost; - prefix_rec_count= join->positions[first_tab - 1].prefix_record_count; + first_weedout_table_rec_count= + join->positions[first_tab - 1].prefix_record_count; temptable_rec_size= 8; /* This is not true but we'll make it so */ } table_map dups_removed_fanout= 0; - double current_fanout= prefix_rec_count; for (uint j= first_dupsweedout_table; j <= idx; j++) { POSITION *p= join->positions + j; - current_fanout= COST_MULT(current_fanout, p->records_read); - dups_cost= COST_ADD(dups_cost, - COST_ADD(p->read_time, - current_fanout / TIME_FOR_COMPARE)); + dups_cost= COST_ADD(dups_cost, p->read_time); + if (p->table->emb_sj_nest) { - sj_inner_fanout= COST_MULT(sj_inner_fanout, p->records_read); + sj_inner_fanout= COST_MULT(sj_inner_fanout, p->records_out); dups_removed_fanout |= p->table->table->map; } else { + sj_outer_fanout= COST_MULT(sj_outer_fanout, p->records_out); /* Ensure that table supports comparable rowids */ DBUG_ASSERT(!(p->table->table->file->ha_table_flags() & HA_NON_COMPARABLE_ROWID)); - sj_outer_fanout= COST_MULT(sj_outer_fanout, p->records_read); temptable_rec_size += p->table->table->file->ref_length; } } @@ -3583,32 +3690,38 @@ bool Duplicate_weedout_picker::check_qep(JOIN *join, Add the cost of temptable use. The table will have sj_outer_fanout records, and we will make - sj_outer_fanout table writes - - sj_inner_fanout*sj_outer_fanout lookups. + - sj_inner_fanout*sj_outer_fanout lookups. + There is no row copy cost (as we are only copying rowid) and no + compare cost (as we are only checking if the row exists by + checking if we got a write error. */ - double one_lookup_cost= get_tmp_table_lookup_cost(join->thd, - sj_outer_fanout, - temptable_rec_size); - double one_write_cost= get_tmp_table_write_cost(join->thd, - sj_outer_fanout, - temptable_rec_size); - - double write_cost= COST_MULT(join->positions[first_tab].prefix_record_count, - sj_outer_fanout * one_write_cost); - double full_lookup_cost= - COST_MULT(join->positions[first_tab].prefix_record_count, - COST_MULT(sj_outer_fanout, - sj_inner_fanout * one_lookup_cost)); - dups_cost= COST_ADD(dups_cost, COST_ADD(write_cost, full_lookup_cost)); + TMPTABLE_COSTS one_cost= get_tmp_table_costs(join->thd, + sj_outer_fanout, + temptable_rec_size, + 0, 0); + double write_cost= (one_cost.create + + first_weedout_table_rec_count * sj_outer_fanout * one_cost.write); + double full_lookup_cost= (first_weedout_table_rec_count* sj_outer_fanout * + sj_inner_fanout * one_cost.lookup); + *read_time= dups_cost + write_cost + full_lookup_cost; - *read_time= dups_cost; - *record_count= prefix_rec_count * sj_outer_fanout; + *record_count= first_weedout_table_rec_count * sj_outer_fanout; *handled_fanout= dups_removed_fanout; *strategy= SJ_OPT_DUPS_WEEDOUT; - if (unlikely(trace.trace_started())) + if (unlikely(join->thd->trace_started())) { - trace.add("records", *record_count); - trace.add("read_time", *read_time); + Json_writer_object trace(join->thd); + trace. + add("strategy", "DuplicateWeedout"). + add("prefix_row_count", first_weedout_table_rec_count). + add("tmp_table_rows", sj_outer_fanout). + add("sj_inner_fanout", sj_inner_fanout). + add("rows", *record_count). + add("dups_cost", dups_cost). + add("write_cost", write_cost). + add("full_lookup_cost", full_lookup_cost). + add("total_cost", *read_time); } return TRUE; } @@ -3657,33 +3770,37 @@ void JOIN::dbug_verify_sj_inner_tables(uint prefix_size) const */ void restore_prev_sj_state(const table_map remaining_tables, - const JOIN_TAB *tab, uint idx) + const JOIN_TAB *tab, uint idx) { TABLE_LIST *emb_sj_nest; - if (tab->emb_sj_nest) + if ((emb_sj_nest= tab->emb_sj_nest)) { - table_map subq_tables= tab->emb_sj_nest->sj_inner_tables; + table_map subq_tables= emb_sj_nest->sj_inner_tables; tab->join->sjm_lookup_tables &= ~subq_tables; - } - if (!tab->join->emb_sjm_nest && (emb_sj_nest= tab->emb_sj_nest)) - { - table_map subq_tables= emb_sj_nest->sj_inner_tables & - ~tab->join->const_table_map; - /* If we're removing the last SJ-inner table, remove the sj-nest */ - if ((remaining_tables & subq_tables) == subq_tables) - { - // All non-const tables of the SJ nest are in the remaining_tables. - // we are not in the nest anymore. - tab->join->cur_sj_inner_tables &= ~emb_sj_nest->sj_inner_tables; - } - else + if (!tab->join->emb_sjm_nest) { - // Semi-join nest has: - // - a table being removed (not in the prefix) - // - some tables in the prefix. - tab->join->cur_sj_inner_tables |= emb_sj_nest->sj_inner_tables; + table_map subq_tables= (emb_sj_nest->sj_inner_tables & + ~tab->join->const_table_map); + /* If we're removing the last SJ-inner table, remove the sj-nest */ + if ((remaining_tables & subq_tables) == subq_tables) + { + /* + All non-const tables of the SJ nest are in the remaining_tables. + we are not in the nest anymore. + */ + tab->join->cur_sj_inner_tables &= ~emb_sj_nest->sj_inner_tables; + } + else + { + /* + Semi-join nest has: + - a table being removed (not in the prefix) + - some tables in the prefix. + */ + tab->join->cur_sj_inner_tables |= emb_sj_nest->sj_inner_tables; + } } } @@ -3795,6 +3912,8 @@ at_sjmat_pos(const JOIN *join, table_map remaining_tables, const JOIN_TAB *tab, static void recalculate_prefix_record_count(JOIN *join, uint start, uint end) { + DBUG_ASSERT(start >= join->const_tables); + for (uint j= start; j < end ;j++) { double prefix_count; @@ -3802,7 +3921,7 @@ static void recalculate_prefix_record_count(JOIN *join, uint start, uint end) prefix_count= 1.0; else prefix_count= COST_MULT(join->best_positions[j-1].prefix_record_count, - join->best_positions[j-1].records_read); + join->best_positions[j-1].records_out); join->best_positions[j].prefix_record_count= prefix_count; } @@ -3954,7 +4073,7 @@ void fix_semijoin_strategies_for_picked_join_order(JOIN *join) join->best_positions, i, FALSE, prefix_rec_count, join->best_positions + i, &dummy); - prefix_rec_count *= join->best_positions[i].records_read; + prefix_rec_count *= join->best_positions[i].records_out; rem_tables &= ~join->best_positions[i].table->table->map; } } @@ -3966,7 +4085,7 @@ void fix_semijoin_strategies_for_picked_join_order(JOIN *join) join->best_positions[first].n_sj_tables= tablenr - first + 1; POSITION dummy; // For loose scan paths double record_count= (first== join->const_tables)? 1.0: - join->best_positions[tablenr - 1].prefix_record_count; + join->best_positions[first - 1].prefix_record_count; table_map rem_tables= remaining_tables; uint idx; @@ -3996,7 +4115,7 @@ void fix_semijoin_strategies_for_picked_join_order(JOIN *join) TRUE /* no jbuf */, record_count, join->best_positions + idx, &dummy); } - record_count *= join->best_positions[idx].records_read; + record_count *= join->best_positions[idx].records_out; rem_tables &= ~join->best_positions[idx].table->table->map; } } @@ -4007,7 +4126,7 @@ void fix_semijoin_strategies_for_picked_join_order(JOIN *join) POSITION *first_pos= join->best_positions + first; POSITION loose_scan_pos; // For loose scan paths double record_count= (first== join->const_tables)? 1.0: - join->best_positions[tablenr - 1].prefix_record_count; + join->best_positions[first - 1].prefix_record_count; table_map rem_tables= remaining_tables; uint idx; @@ -4048,13 +4167,14 @@ void fix_semijoin_strategies_for_picked_join_order(JOIN *join) */ if (join->best_positions[idx].key) { + DBUG_ASSERT(join->best_positions[idx].type != JT_RANGE); delete join->best_positions[idx].table->quick; join->best_positions[idx].table->quick= NULL; } } } rem_tables &= ~join->best_positions[idx].table->table->map; - record_count *= join->best_positions[idx].records_read; + record_count *= join->best_positions[idx].records_out; } first_pos->sj_strategy= SJ_OPT_LOOSE_SCAN; first_pos->n_sj_tables= my_count_bits(first_pos->table->emb_sj_nest->sj_inner_tables); @@ -4778,7 +4898,7 @@ SJ_TMP_TABLE::create_sj_weedout_tmp_table(THD *thd) DBUG_PRINT("info",("Creating group key in temporary table")); share->keys=1; share->uniques= MY_TEST(using_unique_constraint); - table->key_info=keyinfo; + table->key_info= share->key_info= keyinfo; keyinfo->key_part=key_part_info; keyinfo->flags=HA_NOSAME; keyinfo->usable_key_parts= keyinfo->user_defined_key_parts= 1; @@ -5272,7 +5392,8 @@ int setup_semijoin_dups_elimination(JOIN *join, ulonglong options, Got a table that's not within any semi-join nest. This is a case like this: - SELECT * FROM ot1, nt1 WHERE ot1.col IN (SELECT expr FROM it1, it2) + SELECT * FROM ot1, nt1 WHERE + ot1.col IN (SELECT expr FROM it1, it2) with a join order of @@ -5749,10 +5870,10 @@ enum_nested_loop_state join_tab_execution_startup(JOIN_TAB *tab) ((subselect_hash_sj_engine*)in_subs->engine); if (!hash_sj_engine->is_materialized) { - hash_sj_engine->materialize_join->exec(); + int error= hash_sj_engine->materialize_join->exec(); hash_sj_engine->is_materialized= TRUE; - if (unlikely(hash_sj_engine->materialize_join->error) || + if (unlikely(error) || unlikely(tab->join->thd->is_fatal_error)) DBUG_RETURN(NESTED_LOOP_ERROR); } @@ -6532,18 +6653,14 @@ bool JOIN::choose_subquery_plan(table_map join_tables) IN/ALL/ANY optimizations are not applicable for so called fake select (this select exists only to filter results of union if it is needed). */ - if (select_lex == select_lex->master_unit()->fake_select_lex) - return 0; - - if (is_in_subquery()) - { - in_subs= unit->item->get_IN_subquery(); - if (in_subs->create_in_to_exists_cond(this)) - return true; - } - else + if (select_lex == select_lex->master_unit()->fake_select_lex || + likely(!is_in_subquery())) return false; + in_subs= unit->item->get_IN_subquery(); + if (in_subs->create_in_to_exists_cond(this)) + return true; + /* A strategy must be chosen earlier. */ DBUG_ASSERT(in_subs->has_strategy()); DBUG_ASSERT(in_to_exists_where || in_to_exists_having); @@ -6574,6 +6691,7 @@ bool JOIN::choose_subquery_plan(table_map join_tables) /* The cost of the IN->EXISTS strategy. */ double in_exists_strategy_cost; double dummy; + const char *strategy; /* A. Estimate the number of rows of the outer table that will be filtered @@ -6635,7 +6753,6 @@ bool JOIN::choose_subquery_plan(table_map join_tables) /* Get the cost of the modified IN-EXISTS plan. */ inner_read_time_2= inner_join->best_read; - } else { @@ -6647,39 +6764,58 @@ bool JOIN::choose_subquery_plan(table_map join_tables) C. Compute execution costs. */ /* C.1 Compute the cost of the materialization strategy. */ - //uint rowlen= get_tmp_table_rec_length(unit->first_select()->item_list); - uint rowlen= get_tmp_table_rec_length(ref_ptrs, - select_lex->item_list.elements); - /* The cost of writing one row into the temporary table. */ - double write_cost= get_tmp_table_write_cost(thd, inner_record_count_1, - rowlen); - /* The cost of a lookup into the unique index of the materialized table. */ - double lookup_cost= get_tmp_table_lookup_cost(thd, inner_record_count_1, - rowlen); + bool blobs_used; + uint rowlen= get_tmp_table_rec_length(ref_ptrs, + select_lex->item_list.elements, + &blobs_used); + /* The cost of using the temp table */ + TMPTABLE_COSTS cost= get_tmp_table_costs(thd, inner_record_count_1, + rowlen, blobs_used, 1); /* The cost of executing the subquery and storing its result in an indexed temporary table. */ - double materialization_cost= COST_ADD(inner_read_time_1, - COST_MULT(write_cost, - inner_record_count_1)); + double materialization_cost= + COST_ADD(cost.create, + COST_ADD(inner_read_time_1, + COST_MULT((cost.write + WHERE_COST_THD(thd)), + inner_record_count_1))); - materialize_strategy_cost= COST_ADD(materialization_cost, - COST_MULT(outer_lookup_keys, - lookup_cost)); + materialize_strategy_cost= + COST_ADD(materialization_cost, + COST_MULT(outer_lookup_keys, cost.lookup)); /* C.2 Compute the cost of the IN=>EXISTS strategy. */ - in_exists_strategy_cost= COST_MULT(outer_lookup_keys, inner_read_time_2); + in_exists_strategy_cost= + COST_MULT(outer_lookup_keys, inner_read_time_2); /* C.3 Compare the costs and choose the cheaper strategy. */ if (materialize_strategy_cost >= in_exists_strategy_cost) + { in_subs->set_strategy(SUBS_IN_TO_EXISTS); + strategy= "in_to_exists"; + } else + { in_subs->set_strategy(SUBS_MATERIALIZATION); + strategy= "materialization"; + } + if (unlikely(thd->trace_started())) + { + Json_writer_object trace_wrapper(thd); + Json_writer_object trace_subquery(thd, "subquery_plan"); + trace_subquery. + add("rows", inner_record_count_1). + add("materialization_cost", materialize_strategy_cost). + add("in_exist_cost", in_exists_strategy_cost). + add("choosen", strategy); + } DBUG_PRINT("info", - ("mat_strategy_cost: %.2f, mat_cost: %.2f, write_cost: %.2f, lookup_cost: %.2f", - materialize_strategy_cost, materialization_cost, write_cost, lookup_cost)); + ("mat_strategy_cost: %.2f mat_cost: %.2f write_cost: %.2f " + "lookup_cost: %.2f", + materialize_strategy_cost, materialization_cost, cost.write, + cost.lookup)); DBUG_PRINT("info", ("inx_strategy_cost: %.2f, inner_read_time_2: %.2f", in_exists_strategy_cost, inner_read_time_2)); @@ -6705,6 +6841,13 @@ bool JOIN::choose_subquery_plan(table_map join_tables) implementation, fall back to IN-TO-EXISTS. */ in_subs->set_strategy(SUBS_IN_TO_EXISTS); + + if (unlikely(thd->trace_started())) + { + Json_writer_object trace_wrapper(thd); + Json_writer_object trace_subquery(thd, "subquery_plan_revert"); + trace_subquery.add("choosen", "in_to_exists"); + } } if (in_subs->test_strategy(SUBS_MATERIALIZATION)) |