diff options
author | Sergey Petrunya <psergey@askmonty.org> | 2010-05-23 23:13:18 +0400 |
---|---|---|
committer | Sergey Petrunya <psergey@askmonty.org> | 2010-05-23 23:13:18 +0400 |
commit | 3f595889d35c81540eb14ef3c53105cb6c4db833 (patch) | |
tree | b3c1f3047009d3d94a41c169093b843883ee4470 /sql | |
parent | ed8aa9868e074e6489f3b71f585606b6eecc97c6 (diff) | |
download | mariadb-git-3f595889d35c81540eb14ef3c53105cb6c4db833.tar.gz |
Subqueries: Inside-out execution for non-semijoin materialized subqueries that are AND-parts of the WHERE
- Code cleanup
- Query plan change is due to s/ha_rows JOIN_TAB::read_time/double JOIN_TAB::read_time/
Diffstat (limited to 'sql')
-rw-r--r-- | sql/item_subselect.cc | 4 | ||||
-rw-r--r-- | sql/item_subselect.h | 4 | ||||
-rw-r--r-- | sql/opt_range.cc | 3 | ||||
-rw-r--r-- | sql/opt_subselect.cc | 71 | ||||
-rw-r--r-- | sql/opt_subselect.h | 6 | ||||
-rw-r--r-- | sql/sql_select.cc | 70 | ||||
-rw-r--r-- | sql/sql_select.h | 5 | ||||
-rw-r--r-- | sql/table.cc | 6 | ||||
-rw-r--r-- | sql/table.h | 7 |
9 files changed, 115 insertions, 61 deletions
diff --git a/sql/item_subselect.cc b/sql/item_subselect.cc index 1e782120867..508c2af057d 100644 --- a/sql/item_subselect.cc +++ b/sql/item_subselect.cc @@ -190,10 +190,6 @@ bool Item_subselect::fix_fields(THD *thd_param, Item **ref) changed= 1; inside_first_fix_fields= FALSE; - - // all transformation is done (used by prepared statements) - changed= 1; - /* Substitute the current item with an Item_in_optimizer that was created by Item_in_subselect::select_in_like_transformer and diff --git a/sql/item_subselect.h b/sql/item_subselect.h index 3c3d6cdbf00..3e8ec9b174d 100644 --- a/sql/item_subselect.h +++ b/sql/item_subselect.h @@ -393,12 +393,12 @@ public: join->sj_subselects. jtbm-todo: option 1: let sj_subselects list pairs. */ - bool convert_to_semi_join; + bool is_flattenable_semijoin; /* Cost to populate the temporary table (set on if-needed basis). */ - double startup_cost; + //double startup_cost; bool *get_cond_guard(int i) { diff --git a/sql/opt_range.cc b/sql/opt_range.cc index c6aea650f5d..7c1bbc82ea4 100644 --- a/sql/opt_range.cc +++ b/sql/opt_range.cc @@ -2285,7 +2285,8 @@ int SQL_SELECT::test_quick_select(THD *thd, key_map keys_to_use, quick=0; needed_reg.clear_all(); quick_keys.clear_all(); - if (keys_to_use.is_clear_all() || head->pos_in_table_list->jtbm_subselect) + DBUG_ASSERT(!head->is_filled_at_execution()); + if (keys_to_use.is_clear_all() || head->is_filled_at_execution()) DBUG_RETURN(0); records= head->file->stats.records; if (!records) diff --git a/sql/opt_subselect.cc b/sql/opt_subselect.cc index d80fa48a169..51bd17e015c 100644 --- a/sql/opt_subselect.cc +++ b/sql/opt_subselect.cc @@ -172,7 +172,7 @@ int check_and_do_in_subquery_rewrites(JOIN *join) (void)subquery_types_allow_materialization(in_subs); in_subs->emb_on_expr_nest= thd->thd_marker.emb_on_expr_nest; - in_subs->convert_to_semi_join= TRUE; //JTBM + in_subs->is_flattenable_semijoin= TRUE; /* Register the subquery for further processing in flatten_subqueries() */ select_lex-> @@ -240,7 +240,7 @@ int check_and_do_in_subquery_rewrites(JOIN *join) thd->thd_marker.emb_on_expr_nest == (TABLE_LIST*)0x1) { in_subs->emb_on_expr_nest= thd->thd_marker.emb_on_expr_nest; - in_subs->convert_to_semi_join= FALSE; + in_subs->is_flattenable_semijoin= FALSE; select_lex->outer_select()-> join->sj_subselects.append(thd->mem_root, in_subs); } @@ -358,7 +358,19 @@ bool subquery_types_allow_materialization(Item_in_subselect *in_subs) } -static bool make_in_exists_conversion(THD *thd, JOIN *join, Item_in_subselect *item) +/* + Finalize IN->EXISTS conversion in case we couldn't use materialization. + + DESCRIPTION Invoke the IN->EXISTS converter + Replace the Item_in_subselect with its wrapper Item_in_optimizer in WHERE. + + RETURN + FALSE - Ok + TRUE - Fatal error +*/ + +static +bool make_in_exists_conversion(THD *thd, JOIN *join, Item_in_subselect *item) { DBUG_ENTER("make_in_exists_conversion"); JOIN *child_join= item->unit->first_select()->join; @@ -381,20 +393,15 @@ static bool make_in_exists_conversion(THD *thd, JOIN *join, Item_in_subselect *i Item *substitute= item->substitution; bool do_fix_fields= !item->substitution->fixed; - Item **tree= (item->emb_on_expr_nest == (TABLE_LIST*)1)? - &join->conds : &(item->emb_on_expr_nest->on_expr); - - Item *replace_me= item; /* - JTBM: the subquery was already mapped with Item_in_optimizer, so we - should search for that, not for original Item_in_subselect. - TODO: what about delaying that rewrite until here? + The Item_subselect has already been wrapped with Item_in_optimizer, so we + should search for item->optimizer, not 'item'. */ - if (!item->convert_to_semi_join) - { //psergey-jtbm-fix: this branch is always taken?? - replace_me= item->optimizer; - } + Item *replace_me= item->optimizer; + DBUG_ASSERT(replace_me==substitute); + Item **tree= (item->emb_on_expr_nest == (TABLE_LIST*)1)? + &join->conds : &(item->emb_on_expr_nest->on_expr); if (replace_where_subcondition(join, tree, replace_me, substitute, do_fix_fields)) DBUG_RETURN(TRUE); @@ -528,7 +535,7 @@ bool convert_join_subqueries_to_semijoins(JOIN *join) in_subq++) { bool remove_item= TRUE; - if ((*in_subq)->convert_to_semi_join) + if ((*in_subq)->is_flattenable_semijoin) { if (convert_subq_to_sj(join, *in_subq)) DBUG_RETURN(TRUE); @@ -548,7 +555,7 @@ bool convert_join_subqueries_to_semijoins(JOIN *join) should search for that, not for original Item_in_subselect. TODO: what about delaying that rewrite until here? */ - if (!(*in_subq)->convert_to_semi_join) + if (!(*in_subq)->is_flattenable_semijoin) { replace_me= (*in_subq)->optimizer; } @@ -593,7 +600,7 @@ skip_conversion: should search for that, not for original Item_in_subselect. TODO: what about delaying that rewrite until here? */ - if (!(*in_subq)->convert_to_semi_join) + if (!(*in_subq)->is_flattenable_semijoin) { replace_me= (*in_subq)->optimizer; } @@ -622,9 +629,33 @@ skip_conversion: } -void get_temptable_params(Item_in_subselect *item, ha_rows *out_rows, - ha_rows *scan_time) +/* + Get #output_rows and scan_time estimates for a "delayed" table. + + SYNOPSIS + get_delayed_table_estimates() + table IN Table to get estimates for + out_rows OUT E(#rows in the table) + scan_time OUT E(scan_time). + startup_cost OUT cost to populate the table. + + DESCRIPTION + Get #output_rows and scan_time estimates for a "delayed" table. By + "delayed" here we mean that the table is filled at the start of query + execution. This means that the optimizer can't use table statistics to + get #rows estimate for it, it has to call this function instead. + + This function is expected to make different actions depending on the nature + of the table. At the moment there is only one kind of delayed tables, + non-flattenable semi-joins. +*/ + +void get_delayed_table_estimates(TABLE *table, + ha_rows *out_rows, + double *scan_time, + double *startup_cost) { + Item_in_subselect *item= table->pos_in_table_list->jtbm_subselect; item->optimize(); DBUG_ASSERT(item->engine->engine_type() == @@ -644,7 +675,7 @@ void get_temptable_params(Item_in_subselect *item, ha_rows *out_rows, read_time += join->best_positions[i].read_time; } *out_rows= rows; - item->startup_cost= read_time; + *startup_cost= read_time; /* Calculate cost of scanning the temptable */ double data_size= rows * hash_sj_engine->tmp_table->s->reclength; /* Do like in handler::read_time */ diff --git a/sql/opt_subselect.h b/sql/opt_subselect.h index 75e4a0df202..2f6c9d87367 100644 --- a/sql/opt_subselect.h +++ b/sql/opt_subselect.h @@ -367,8 +367,10 @@ int clear_sj_tmp_tables(JOIN *join); int rewrite_to_index_subquery_engine(JOIN *join); -void get_temptable_params(Item_in_subselect *item, ha_rows *out_rows, - ha_rows *scan_time); +void get_delayed_table_estimates(TABLE *table, + ha_rows *out_rows, + double *scan_time, + double *startup_cost); bool do_jtbm_materialization_if_needed(JOIN_TAB *tab); diff --git a/sql/sql_select.cc b/sql/sql_select.cc index 597c9ed5372..a81b08511b8 100644 --- a/sql/sql_select.cc +++ b/sql/sql_select.cc @@ -2581,10 +2581,10 @@ make_join_statistics(JOIN *join, TABLE_LIST *tables_arg, COND *conds, { /* s is the only inner table of an outer join */ #ifdef WITH_PARTITION_STORAGE_ENGINE - if (!table->pos_in_table_list->jtbm_subselect && + if (!table->is_filled_at_execution() && (!table->file->stats.records || table->no_partitions_used) && !embedding) #else - if (!table->pos_in_table_list->jtbm_subselect && + if (!table->is_filled_at_execution() && !table->file->stats.records && !embedding) #endif { // Empty table @@ -2619,7 +2619,7 @@ make_join_statistics(JOIN *join, TABLE_LIST *tables_arg, COND *conds, #else const bool no_partitions_used= FALSE; #endif - if (!table->pos_in_table_list->jtbm_subselect && + if (!table->is_filled_at_execution() && (table->s->system || table->file->stats.records <= 1 || no_partitions_used) && !s->dependent && @@ -2719,7 +2719,7 @@ make_join_statistics(JOIN *join, TABLE_LIST *tables_arg, COND *conds, { table=s->table; - if (table->pos_in_table_list->jtbm_subselect) + if (table->is_filled_at_execution()) continue; /* @@ -2874,6 +2874,7 @@ make_join_statistics(JOIN *join, TABLE_LIST *tables_arg, COND *conds, for (s=stat ; s < stat_end ; s++) { + s->startup_cost= 0; if (s->type == JT_SYSTEM || s->type == JT_CONST) { /* Only one matching row */ @@ -2882,18 +2883,17 @@ make_join_statistics(JOIN *join, TABLE_LIST *tables_arg, COND *conds, } /* Approximate found rows and time to read them */ - if (s->table->pos_in_table_list->jtbm_subselect) + if (s->table->is_filled_at_execution()) { - get_temptable_params(s->table->pos_in_table_list->jtbm_subselect, - &s->records, - &s->read_time); - s->found_records= s->records; - table->quick_condition_rows=s->records; + get_delayed_table_estimates(s->table, &s->records, &s->read_time, + &s->startup_cost); + s->found_records= s->records; + table->quick_condition_rows=s->records; } else { - s->found_records=s->records=s->table->file->stats.records; - s->read_time=(ha_rows) s->table->file->scan_time(); + s->found_records= s->records= s->table->file->stats.records; + s->read_time= s->table->file->scan_time(); } @@ -2922,7 +2922,7 @@ make_join_statistics(JOIN *join, TABLE_LIST *tables_arg, COND *conds, (!s->table->pos_in_table_list->embedding || // (2) (s->table->pos_in_table_list->embedding && // (3) s->table->pos_in_table_list->embedding->sj_on_expr)) && // (3) - !s->table->pos_in_table_list->jtbm_subselect) + !s->table->is_filled_at_execution()) { ha_rows records; SQL_SELECT *select; @@ -2960,7 +2960,7 @@ make_join_statistics(JOIN *join, TABLE_LIST *tables_arg, COND *conds, if (records != HA_POS_ERROR) { s->found_records=records; - s->read_time= (ha_rows) (s->quick ? s->quick->read_time : 0.0); + s->read_time= s->quick ? s->quick->read_time : 0.0; } delete select; } @@ -4288,7 +4288,6 @@ best_access_path(JOIN *join, ha_rows rec; bool best_uses_jbuf= FALSE; Item_in_subselect* jtbm_subselect= s->table->pos_in_table_list->jtbm_subselect; - bool jtbm_ref_used= FALSE; Loose_scan_opt loose_scan_opt; DBUG_ENTER("best_access_path"); @@ -4641,8 +4640,8 @@ best_access_path(JOIN *join, else tmp= best_time; // Do nothing } - if (jtbm_subselect) - tmp += jtbm_subselect->startup_cost; + + tmp += s->startup_cost; loose_scan_opt.check_ref_access_part2(key, start_key, records, tmp); } /* not ft_key */ if (tmp < best_time - records/(double) TIME_FOR_COMPARE) @@ -4653,8 +4652,6 @@ best_access_path(JOIN *join, best_key= start_key; best_max_key_part= max_key_part; best_ref_depends_map= found_ref; - if (jtbm_subselect) - jtbm_ref_used= TRUE; } } /* for each key */ records= best_records; @@ -4687,6 +4684,11 @@ best_access_path(JOIN *join, Since we have a 'ref' access path, and FORCE INDEX instructs us to choose it over ALL/index, there is no need to consider a full table scan. + (5) Non-flattenable semi-joins: don't consider doing a scan of temporary + table if we had an option to make lookups into it. In real-world cases, + lookups are cheaper than full scans, but when the table is small, they + can be [considered to be] more expensive, which causes lookups not to + be used for cases with small datasets, which is annoying. */ if ((records >= s->found_records || best > s->read_time) && // (1) !(s->quick && best_key && s->quick->index == best_key->key && // (2) @@ -4694,7 +4696,7 @@ best_access_path(JOIN *join, !((s->table->file->ha_table_flags() & HA_TABLE_SCAN_ON_INDEX) && // (3) ! s->table->covering_keys.is_clear_all() && best_key && !s->quick) &&// (3) !(s->table->force_index && best_key && !s->quick) && // (4) - !jtbm_ref_used) + !(best_key && jtbm_subselect)) // (5) { // Check full join ha_rows rnd_records= s->found_records; /* @@ -4741,12 +4743,19 @@ best_access_path(JOIN *join, } else { +#if 0 /* Estimate cost of reading table. */ - if (jtbm_subselect) + if (jtbm_subselect) //psergey-jtbm-todo: why the difference? tmp= s->read_time; else tmp= s->table->file->scan_time(); - + //psergey-debug: + if (!jtbm_subselect && fabs(s->read_time - s->table->file->scan_time()) > 1.0) + { + fprintf(stderr, "Q:%s\n", thd->query()); + } +#endif + tmp= s->read_time; if ((s->table->map & join->outer_join) || disable_jbuf) // Can't use join cache { /* @@ -4775,8 +4784,7 @@ best_access_path(JOIN *join, } } - if (jtbm_subselect) - tmp += jtbm_subselect->startup_cost; + tmp += s->startup_cost; /* We estimate the cost of evaluating WHERE clause for found records as record_count * rnd_records / TIME_FOR_COMPARE. This cost plus @@ -4799,7 +4807,7 @@ best_access_path(JOIN *join, join->outer_join))); } } - + /* Update the cost information for the current partial plan */ pos->records_read= records; pos->read_time= best; @@ -6722,12 +6730,12 @@ make_join_select(JOIN *join,SQL_SELECT *select,COND *cond) the index if we are using limit and this is the first table */ - if ((cond && - (!tab->keys.is_subset(tab->const_keys) && i > 0)) || - (!tab->const_keys.is_clear_all() && i == join->const_tables && - join->unit->select_limit_cnt < - join->best_positions[i].records_read && - !(join->select_options & OPTION_FOUND_ROWS))) + if (!tab->table->is_filled_at_execution() && + ((cond && (!tab->keys.is_subset(tab->const_keys) && i > 0)) || + (!tab->const_keys.is_clear_all() && i == join->const_tables && + join->unit->select_limit_cnt < + join->best_positions[i].records_read && + !(join->select_options & OPTION_FOUND_ROWS)))) { /* Join with outer join condition */ COND *orig_cond=sel->cond; diff --git a/sql/sql_select.h b/sql/sql_select.h index 733ea310ad3..ab62f5f7d46 100644 --- a/sql/sql_select.h +++ b/sql/sql_select.h @@ -210,8 +210,11 @@ typedef struct st_join_table { method (but not 'index' for some reason), i.e. this matches method which E(#records) is in found_records. */ - ha_rows read_time; + double read_time; + /* Startup cost for execution */ + double startup_cost; + table_map dependent,key_dependent; uint use_quick,index; uint status; ///< Save status for cache diff --git a/sql/table.cc b/sql/table.cc index 7c63138acc7..27aabad1cd5 100644 --- a/sql/table.cc +++ b/sql/table.cc @@ -5114,6 +5114,12 @@ bool st_table::is_children_attached(void) (parent && parent->children_attached)); } + +bool st_table::is_filled_at_execution() +{ + return test(pos_in_table_list->jtbm_subselect); +} + /* Cleanup this table for re-execution. diff --git a/sql/table.h b/sql/table.h index b564d762547..c9fc12708f6 100644 --- a/sql/table.h +++ b/sql/table.h @@ -914,6 +914,13 @@ struct st_table { inline bool needs_reopen_or_name_lock() { return s->version != refresh_version; } bool is_children_attached(void); + + /* + If TRUE, the table is filled at execution phase (and so, the optimizer + should not do things like range analysis or constant table detection on + it). + */ + bool is_filled_at_execution(); }; enum enum_schema_table_state |