diff options
author | Sergey Petrunya <psergey@askmonty.org> | 2011-04-02 14:09:00 +0400 |
---|---|---|
committer | Sergey Petrunya <psergey@askmonty.org> | 2011-04-02 14:09:00 +0400 |
commit | 8fb724281e00757c4a81c57b081602b3cb4e6726 (patch) | |
tree | 428e55fc35e017a80ef28cba76f3fbbf85fda97d /sql | |
parent | d5adc29d1c39027c827074f936d3f28e71f87800 (diff) | |
parent | b86abed53de628c650a1c47a0287aaa32228a051 (diff) | |
download | mariadb-git-8fb724281e00757c4a81c57b081602b3cb4e6726.tar.gz |
Merge MWL#90 with main 5.3 tree
Diffstat (limited to 'sql')
-rw-r--r-- | sql/item_cmpfunc.cc | 55 | ||||
-rw-r--r-- | sql/item_subselect.cc | 72 | ||||
-rw-r--r-- | sql/item_subselect.h | 134 | ||||
-rw-r--r-- | sql/opt_range.cc | 12 | ||||
-rw-r--r-- | sql/opt_subselect.cc | 679 | ||||
-rw-r--r-- | sql/opt_subselect.h | 10 | ||||
-rw-r--r-- | sql/sql_base.cc | 14 | ||||
-rw-r--r-- | sql/sql_cursor.cc | 8 | ||||
-rw-r--r-- | sql/sql_delete.cc | 7 | ||||
-rw-r--r-- | sql/sql_join_cache.cc | 219 | ||||
-rw-r--r-- | sql/sql_join_cache.h | 13 | ||||
-rw-r--r-- | sql/sql_select.cc | 1365 | ||||
-rw-r--r-- | sql/sql_select.h | 95 | ||||
-rw-r--r-- | sql/sql_show.cc | 5 | ||||
-rw-r--r-- | sql/sql_test.cc | 89 | ||||
-rw-r--r-- | sql/sql_union.cc | 3 | ||||
-rw-r--r-- | sql/table.cc | 13 | ||||
-rw-r--r-- | sql/table.h | 18 |
18 files changed, 1933 insertions, 878 deletions
diff --git a/sql/item_cmpfunc.cc b/sql/item_cmpfunc.cc index af0138402a9..555117e4c07 100644 --- a/sql/item_cmpfunc.cc +++ b/sql/item_cmpfunc.cc @@ -5905,28 +5905,12 @@ Item_field* Item_equal::get_first(Item_field *field) { /* It's a field from an materialized semi-join. We can substitute it only - for a field from the same semi-join. + for a field from the same semi-join. Find the first of such items. */ - JOIN_TAB *first= field_tab; - JOIN *join= field_tab->join; - int tab_idx= field_tab - field_tab->join->join_tab; - DBUG_ASSERT(join->join_tab[tab_idx].table->map & - emb_nest->sj_inner_tables); - - /* Find the first table of this semi-join nest */ - for (int i= tab_idx-1; i >= (int)join->const_tables; i--) - { - if (join->join_tab[i].table->map & emb_nest->sj_inner_tables) - first= join->join_tab + i; - else - // Found first tab that doesn't belong to current SJ. - break; - } - /* Find an item to substitute for. */ while ((item= it++)) { - if (item->field->table->reginfo.join_tab >= first) + if (item->field->table->pos_in_table_list->embedding == emb_nest) { /* If we found given field then return NULL to avoid unnecessary @@ -5938,32 +5922,27 @@ Item_field* Item_equal::get_first(Item_field *field) } else { -#if 0 /* The field is not in SJ-Materialization nest. We must return the first - field that's not embedded in a SJ-Materialization nest. - Example: suppose we have a join order: + field in the join order. The field may be inside a semi-join nest, i.e + a join order may look like this: SJ-Mat(it1 it2) ot1 ot2 - and equality ot2.col = ot1.col = it2.col - If we're looking for best substitute for 'ot2.col', we should pick ot1.col - and not it2.col, because when we run a join between ot1 and ot2 - execution of SJ-Mat(...) has already finished and we can't rely on the - value of it*.*. - psergey-fix-fix: ^^ THAT IS INCORRECT ^^. Pick the first, whatever that - is. + where we're looking what to substitute ot2.col for. In this case we must + still return it1.col, here's a proof why: + + First let's note that either it1.col or it2.col participates in + subquery's IN-equality. It can't be otherwise, because materialization is + only applicable to uncorrelated subqueries, so the only way we could + infer "it1.col=ot1.col" is from the IN-equality. Ok, so IN-eqality has + it1.col or it2.col on its inner side. it1.col is first such item in the + join order, so it's not possible for SJ-Mat to be + SJ-Materialization-lookup, it is SJ-Materialization-Scan. The scan part + of this strategy will unpack value of it1.col=it2.col into it1.col + (that's the first equal item inside the subquery), and we'll be able to + get it from there. qed. */ - while ((item= it++)) - { - TABLE_LIST *emb_nest= item->field->table->pos_in_table_list->embedding; - if (!emb_nest || !emb_nest->sj_mat_info || - !emb_nest->sj_mat_info->is_used) - { - return item; - } - } -#endif return fields.head(); } // Shouldn't get here. diff --git a/sql/item_subselect.cc b/sql/item_subselect.cc index 854b328f862..3aaa06feb7d 100644 --- a/sql/item_subselect.cc +++ b/sql/item_subselect.cc @@ -33,11 +33,11 @@ Item_subselect::Item_subselect(): - Item_result_field(), value_assigned(0), thd(0), substitution(0), - expr_cache(0), engine(0), old_engine(0), used_tables_cache(0), - have_to_be_excluded(0), const_item_cache(1), inside_first_fix_fields(0), - done_first_fix_fields(FALSE), eliminated(FALSE), engine_changed(0), - changed(0), is_correlated(FALSE) + Item_result_field(), value_assigned(0), thd(0), old_engine(0), + used_tables_cache(0), have_to_be_excluded(0), const_item_cache(1), + inside_first_fix_fields(0), done_first_fix_fields(FALSE), + substitution(0), expr_cache(0), engine(0), eliminated(FALSE), + engine_changed(0), changed(0), is_correlated(FALSE) { with_subselect= 1; reset(); @@ -195,11 +195,7 @@ bool Item_subselect::fix_fields(THD *thd_param, Item **ref) { // all transformation is done (used by prepared statements) changed= 1; - inside_first_fix_fields= FALSE; - - - // all transformation is done (used by prepared statements) - changed= 1; + inside_first_fix_fields= FALSE; /* Substitute the current item with an Item_in_optimizer that was @@ -224,13 +220,13 @@ bool Item_subselect::fix_fields(THD *thd_param, Item **ref) if (!(*ref)->fixed) res= (*ref)->fix_fields(thd, ref); goto end; -//psergey-merge: done_first_fix_fields= FALSE; + } // Is it one field subselect? if (engine->cols() > max_columns) { my_error(ER_OPERAND_COLUMNS, MYF(0), 1); -//psergey-merge: done_first_fix_fields= FALSE; + goto end; } fix_length_and_dec(); @@ -248,6 +244,7 @@ bool Item_subselect::fix_fields(THD *thd_param, Item **ref) end: done_first_fix_fields= FALSE; + inside_first_fix_fields= FALSE; thd->where= save_where; return res; } @@ -478,6 +475,12 @@ bool Item_subselect::exec() return (res); } +int Item_subselect::optimize() +{ + int res; + res= engine->optimize(); + return res; +} /** Check if an expression cache is needed for this subquery @@ -784,9 +787,6 @@ Item_singlerow_subselect::select_transformer(JOIN *join) void Item_singlerow_subselect::store(uint i, Item *item) { row[i]->store(item); - //psergey-merge: can do without that: row[i]->cache_value(); - //psergey-backport-timours: ^ really, without that ^ - //psergey-try-merge-again: row[i]->cache_value(); } @@ -1009,7 +1009,7 @@ Item_in_subselect::Item_in_subselect(Item * left_exp, st_select_lex *select_lex): Item_exists_subselect(), left_expr_cache(0), first_execution(TRUE), is_constant(FALSE), optimizer(0), pushed_cond_guards(NULL), - exec_method(NOT_TRANSFORMED), upper_item(0) + exec_method(NOT_TRANSFORMED), is_flattenable_semijoin(FALSE), upper_item(0) { DBUG_ENTER("Item_in_subselect::Item_in_subselect"); left_expr= left_exp; @@ -2230,7 +2230,7 @@ void Item_in_subselect::update_used_tables() @retval FALSE an execution method was chosen successfully */ -bool Item_in_subselect::setup_engine() +bool Item_in_subselect::setup_engine(bool dont_switch_arena) { subselect_hash_sj_engine *new_engine= NULL; bool res= FALSE; @@ -2245,14 +2245,15 @@ bool Item_in_subselect::setup_engine() old_engine= (subselect_single_select_engine*) engine; - if (arena->is_conventional()) + if (arena->is_conventional() || dont_switch_arena) arena= 0; else thd->set_n_backup_active_arena(arena, &backup); if (!(new_engine= new subselect_hash_sj_engine(thd, this, old_engine)) || - new_engine->init_permanent(unit->get_unit_column_types())) + new_engine->init_permanent(unit->get_unit_column_types(), + old_engine->get_identifier())) { Item_subselect::trans_res trans_res; /* @@ -2323,7 +2324,7 @@ bool Item_in_subselect::init_left_expr_cache() An IN predicate might be evaluated in a query for which all tables have been optimzied away. */ - if (!outer_join || !outer_join->tables || !outer_join->tables_list) + if (!outer_join || !outer_join->table_count || !outer_join->tables_list) return TRUE; if (!(left_expr_cache= new List<Cached_item>)) @@ -2708,9 +2709,9 @@ int subselect_single_select_engine::exec() pushed down into the subquery. Those optimizations are ref[_or_null] acceses. Change them to be full table scans. */ - for (uint i=join->const_tables ; i < join->tables ; i++) + for (JOIN_TAB *tab= first_linear_tab(join, WITHOUT_CONST_TABLES); tab; + tab= next_linear_tab(join, tab, WITH_BUSH_ROOTS)) { - JOIN_TAB *tab=join->join_tab+i; if (tab && tab->keyuse) { for (uint i= 0; i < tab->ref.key_parts; i++) @@ -3794,6 +3795,8 @@ bitmap_init_memroot(MY_BITMAP *map, uint n_bits, MEM_ROOT *mem_root) reexecution. @param tmp_columns the items that produce the data for the temp table + @param subquery_id subquery's identifier (to make "<subquery%d>" name for + EXPLAIN) @details - Create a temporary table to store the result of the IN subquery. The @@ -3809,7 +3812,8 @@ bitmap_init_memroot(MY_BITMAP *map, uint n_bits, MEM_ROOT *mem_root) @retval FALSE otherwise */ -bool subselect_hash_sj_engine::init_permanent(List<Item> *tmp_columns) +bool subselect_hash_sj_engine::init_permanent(List<Item> *tmp_columns, + uint subquery_id) { /* Options to create_tmp_table. */ ulonglong tmp_create_options= thd->options | TMP_TABLE_ALL_COLUMNS; @@ -3844,12 +3848,19 @@ bool subselect_hash_sj_engine::init_permanent(List<Item> *tmp_columns) DBUG_RETURN(TRUE); } */ + char buf[32]; + uint len= my_snprintf(buf, sizeof(buf), "<subquery%d>", subquery_id); + char *name; + if (!(name= (char*)thd->alloc(len + 1))) + DBUG_RETURN(TRUE); + memcpy(name, buf, len+1); + if (!(result= new select_materialize_with_stats)) DBUG_RETURN(TRUE); if (((select_union*) result)->create_result_table( thd, tmp_columns, TRUE, tmp_create_options, - "materialized subselect", TRUE)) + name, TRUE)) DBUG_RETURN(TRUE); tmp_table= ((select_union*) result)->table; @@ -3930,7 +3941,7 @@ bool subselect_hash_sj_engine::make_semi_join_conds() if (!(tmp_table_ref= (TABLE_LIST*) thd->alloc(sizeof(TABLE_LIST)))) DBUG_RETURN(TRUE); - tmp_table_ref->init_one_table("", "materialized subselect", TL_READ); + tmp_table_ref->init_one_table("", tmp_table->alias.c_ptr(), TL_READ); tmp_table_ref->table= tmp_table; context= new Name_resolution_context; @@ -4075,6 +4086,17 @@ void subselect_hash_sj_engine::cleanup() } +int subselect_hash_sj_engine::optimize() +{ + int res; + SELECT_LEX *save_select= thd->lex->current_select; + thd->lex->current_select= materialize_join->select_lex; + res= materialize_join->optimize(); + thd->lex->current_select= save_select; + + return res; +} + /** Execute a subquery IN predicate via materialization. diff --git a/sql/item_subselect.h b/sql/item_subselect.h index 3b43d75f43f..559e8747068 100644 --- a/sql/item_subselect.h +++ b/sql/item_subselect.h @@ -36,6 +36,22 @@ class Item_subselect :public Item_result_field protected: /* thread handler, will be assigned in fix_fields only */ THD *thd; + /* old engine if engine was changed */ + subselect_engine *old_engine; + /* cache of used external tables */ + table_map used_tables_cache; + /* allowed number of columns (1 for single value subqueries) */ + uint max_columns; + /* where subquery is placed */ + enum_parsing_place parsing_place; + /* work with 'substitution' */ + bool have_to_be_excluded; + /* cache of constant state */ + bool const_item_cache; + + bool inside_first_fix_fields; + bool done_first_fix_fields; +public: /* Used inside Item_subselect::fix_fields() according to this scenario: > Item_subselect::fix_fields @@ -46,32 +62,15 @@ protected: < child_join->prepare < engine->prepare *ref= substitution; + substitution= NULL; < Item_subselect::fix_fields */ Item *substitution; -public: /* unit of subquery */ st_select_lex_unit *unit; -protected: Item *expr_cache; /* engine that perform execution of subselect (single select or union) */ subselect_engine *engine; - /* old engine if engine was changed */ - subselect_engine *old_engine; - /* cache of used external tables */ - table_map used_tables_cache; - /* allowed number of columns (1 for single value subqueries) */ - uint max_columns; - /* where subquery is placed */ - enum_parsing_place parsing_place; - /* work with 'substitution' */ - bool have_to_be_excluded; - /* cache of constant state */ - bool const_item_cache; - - bool inside_first_fix_fields; - bool done_first_fix_fields; -public: /* A reference from inside subquery predicate to somewhere outside of it */ class Ref_to_outside : public Sql_alloc { @@ -148,6 +147,7 @@ public: bool mark_as_dependent(THD *thd, st_select_lex *select, Item *item); void fix_after_pullout(st_select_lex *new_parent, Item **ref); void recalc_used_tables(st_select_lex *new_parent, bool after_pullout); + virtual int optimize(); virtual bool exec(); virtual void fix_length_and_dec(); table_map used_tables() const; @@ -312,6 +312,8 @@ public: }; +TABLE_LIST * const NO_JOIN_NEST=(TABLE_LIST*)0x1; + /** Representation of IN subquery predicates of the form "left_expr IN (SELECT ...)". @@ -350,10 +352,10 @@ protected: all JOIN in UNION */ Item *expr; - Item_in_optimizer *optimizer; bool was_null; bool abort_on_null; public: + Item_in_optimizer *optimizer; /* Used to trigger on/off conditions that were pushed down to subselect */ bool *pushed_cond_guards; @@ -362,7 +364,7 @@ public: /* Used by subquery optimizations to keep track about in which clause this subquery predicate is located: - (TABLE_LIST*) 1 - the predicate is an AND-part of the WHERE + NO_JOIN_NEST - the predicate is an AND-part of the WHERE join nest pointer - the predicate is an AND-part of ON expression of a join nest NULL - for all other locations @@ -374,7 +376,7 @@ public: - pointer to join nest if the subquery predicate is in the ON expression - (TABLE_LIST*)1 if the predicate is in the WHERE. */ - TABLE_LIST *expr_join_nest; + //TABLE_LIST *expr_join_nest; /* Types of left_expr and subquery's select list allow to perform subquery materialization. Currently, we set this to FALSE when it as well could @@ -396,6 +398,11 @@ public: }; enum_exec_method exec_method; + /* + TRUE<=>this is a flattenable semi-join, false overwise. + */ + bool is_flattenable_semijoin; + bool *get_cond_guard(int i) { return pushed_cond_guards ? pushed_cond_guards + i : NULL; @@ -412,7 +419,7 @@ public: Item_in_subselect(Item * left_expr, st_select_lex *select_lex); Item_in_subselect() :Item_exists_subselect(), left_expr_cache(0), first_execution(TRUE), - is_constant(FALSE), optimizer(0), abort_on_null(0), + is_constant(FALSE), abort_on_null(0), optimizer(0), pushed_cond_guards(NULL), exec_method(NOT_TRANSFORMED), upper_item(0) {} void cleanup(); @@ -445,7 +452,7 @@ public: bool fix_fields(THD *thd, Item **ref); void fix_after_pullout(st_select_lex *new_parent, Item **ref); void update_used_tables(); - bool setup_engine(); + bool setup_engine(bool dont_switch_arena); bool init_left_expr_cache(); /* Inform 'this' that it was computed, and contains a valid result. */ void set_first_execution() { if (first_execution) first_execution= FALSE; } @@ -521,6 +528,7 @@ public: THD * get_thd() { return thd; } virtual int prepare()= 0; virtual void fix_length_and_dec(Item_cache** row)= 0; + virtual int optimize() { DBUG_ASSERT(0); return 0; } /* Execute the engine @@ -751,7 +759,7 @@ inline bool Item_subselect::is_uncacheable() const class subselect_hash_sj_engine : public subselect_engine { -protected: +public: /* The table into which the subquery is materialized. */ TABLE *tmp_table; /* TRUE if the subquery was materialized into a temp table. */ @@ -763,64 +771,34 @@ protected: of subselect_single_select_engine::[prepare | cols]. */ subselect_single_select_engine *materialize_engine; - /* The engine used to compute the IN predicate. */ - subselect_engine *lookup_engine; /* QEP to execute the subquery and materialize its result into a temporary table. Created during the first call to exec(). */ JOIN *materialize_join; - - /* Keyparts of the only non-NULL composite index in a rowid merge. */ - MY_BITMAP non_null_key_parts; - /* Keyparts of the single column indexes with NULL, one keypart per index. */ - MY_BITMAP partial_match_key_parts; - uint count_partial_match_columns; - uint count_null_only_columns; /* A conjunction of all the equality condtions between all pairs of expressions that are arguments of an IN predicate. We need these to post-filter some IN results because index lookups sometimes match values that are actually not equal to the search key in SQL terms. - */ + */ Item_cond_and *semi_join_conds; - /* Possible execution strategies that can be used to compute hash semi-join.*/ - enum exec_strategy { - UNDEFINED, - COMPLETE_MATCH, /* Use regular index lookups. */ - PARTIAL_MATCH, /* Use some partial matching strategy. */ - PARTIAL_MATCH_MERGE, /* Use partial matching through index merging. */ - PARTIAL_MATCH_SCAN, /* Use partial matching through table scan. */ - IMPOSSIBLE /* Subquery materialization is not applicable. */ - }; - /* The chosen execution strategy. Computed after materialization. */ - exec_strategy strategy; -protected: - exec_strategy get_strategy_using_schema(); - exec_strategy get_strategy_using_data(); - ulonglong rowid_merge_buff_size(bool has_non_null_key, - bool has_covering_null_row, - MY_BITMAP *partial_match_key_parts); - void choose_partial_match_strategy(bool has_non_null_key, - bool has_covering_null_row, - MY_BITMAP *partial_match_key_parts); - bool make_semi_join_conds(); - subselect_uniquesubquery_engine* make_unique_engine(); -public: subselect_hash_sj_engine(THD *thd, Item_subselect *in_predicate, subselect_single_select_engine *old_engine) - :subselect_engine(thd, in_predicate, NULL), tmp_table(NULL), - is_materialized(FALSE), materialize_engine(old_engine), lookup_engine(NULL), - materialize_join(NULL), count_partial_match_columns(0), - count_null_only_columns(0), semi_join_conds(NULL), strategy(UNDEFINED) + : subselect_engine(thd, in_predicate, NULL), + tmp_table(NULL), is_materialized(FALSE), materialize_engine(old_engine), + materialize_join(NULL), semi_join_conds(NULL), lookup_engine(NULL), + count_partial_match_columns(0), count_null_only_columns(0), + strategy(UNDEFINED) {} ~subselect_hash_sj_engine(); - bool init_permanent(List<Item> *tmp_columns); + bool init_permanent(List<Item> *tmp_columns, uint subquery_id); bool init_runtime(); void cleanup(); int prepare() { return 0; } /* Override virtual function in base class. */ + int optimize(); int exec(); virtual void print(String *str, enum_query_type query_type); uint cols() @@ -840,6 +818,38 @@ public: //=>base class bool change_result(Item_subselect *si, select_result_interceptor *result); bool no_tables();//=>base class + +protected: + /* The engine used to compute the IN predicate. */ + subselect_engine *lookup_engine; + /* Keyparts of the only non-NULL composite index in a rowid merge. */ + MY_BITMAP non_null_key_parts; + /* Keyparts of the single column indexes with NULL, one keypart per index. */ + MY_BITMAP partial_match_key_parts; + uint count_partial_match_columns; + uint count_null_only_columns; + /* Possible execution strategies that can be used to compute hash semi-join.*/ + enum exec_strategy { + UNDEFINED, + COMPLETE_MATCH, /* Use regular index lookups. */ + PARTIAL_MATCH, /* Use some partial matching strategy. */ + PARTIAL_MATCH_MERGE, /* Use partial matching through index merging. */ + PARTIAL_MATCH_SCAN, /* Use partial matching through table scan. */ + IMPOSSIBLE /* Subquery materialization is not applicable. */ + }; + /* The chosen execution strategy. Computed after materialization. */ + exec_strategy strategy; + exec_strategy get_strategy_using_schema(); + exec_strategy get_strategy_using_data(); + ulonglong rowid_merge_buff_size(bool has_non_null_key, + bool has_covering_null_row, + MY_BITMAP *partial_match_key_parts); + void choose_partial_match_strategy(bool has_non_null_key, + bool has_covering_null_row, + MY_BITMAP *partial_match_key_parts); + bool make_semi_join_conds(); + subselect_uniquesubquery_engine* make_unique_engine(); + }; diff --git a/sql/opt_range.cc b/sql/opt_range.cc index 91ac52f6246..ad12f39ba6b 100644 --- a/sql/opt_range.cc +++ b/sql/opt_range.cc @@ -341,8 +341,9 @@ public: SEL_ARG(Field *field, uint8 part, uchar *min_value, uchar *max_value, uint8 min_flag, uint8 max_flag, uint8 maybe_flag); SEL_ARG(enum Type type_arg) - :min_flag(0),elements(1),use_count(1),left(0),right(0),next_key_part(0), - color(BLACK), type(type_arg) + :min_flag(0), max_part_no(0) /* first key part means 1. 0 mean 'no parts'*/, + elements(1),use_count(1),left(0),right(0), + next_key_part(0), color(BLACK), type(type_arg) {} inline bool is_same(SEL_ARG *arg) { @@ -2909,7 +2910,8 @@ int SQL_SELECT::test_quick_select(THD *thd, key_map keys_to_use, quick=0; needed_reg.clear_all(); quick_keys.clear_all(); - if (keys_to_use.is_clear_all()) + DBUG_ASSERT(!head->is_filled_at_execution()); + if (keys_to_use.is_clear_all() || head->is_filled_at_execution()) DBUG_RETURN(0); records= head->file->stats.records; if (!records) @@ -4317,7 +4319,7 @@ double get_sweep_read_cost(const PARAM *param, ha_rows records) return 1; */ JOIN *join= param->thd->lex->select_lex.join; - if (!join || join->tables == 1) + if (!join || join->table_count == 1) { /* No join, assume reading is done in one 'sweep' */ result= busy_blocks*(DISK_SEEK_BASE_COST + @@ -11268,7 +11270,7 @@ get_best_group_min_max(PARAM *param, SEL_TREE *tree) /* Perform few 'cheap' tests whether this access method is applicable. */ if (!join) DBUG_RETURN(NULL); /* This is not a select statement. */ - if ((join->tables != 1) || /* The query must reference one table. */ + if ((join->table_count != 1) || /* The query must reference one table. */ ((!join->group_list) && /* Neither GROUP BY nor a DISTINCT query. */ (!join->select_distinct)) || (join->select_lex->olap == ROLLUP_TYPE)) /* Check (B3) for ROLLUP */ diff --git a/sql/opt_subselect.cc b/sql/opt_subselect.cc index 319704a6e8f..395e26caadc 100644 --- a/sql/opt_subselect.cc +++ b/sql/opt_subselect.cc @@ -2,7 +2,7 @@ @file @brief - Subquery optimization code here. + Semi-join subquery optimizations code */ @@ -16,7 +16,162 @@ #include <my_bit.h> -// Our own: +/* + This file contains optimizations for semi-join subqueries. + + Contents + -------- + 1. What is a semi-join subquery + 2. General idea about semi-join execution + 2.1 Correlated vs uncorrelated semi-joins + 2.2 Mergeable vs non-mergeable semi-joins + 3. Code-level view of semi-join processing + 3.1 Conversion + 3.1.1 Merged semi-join TABLE_LIST object + 3.1.2 Non-merged semi-join data structure + 3.2 Semi-joins and query optimization + 3.2.1 Non-merged semi-joins and join optimization + 3.2.2 Merged semi-joins and join optimization + 3.3 Semi-joins and query execution + + 1. What is a semi-join subquery + ------------------------------- + We use this definition of semi-join: + + outer_tbl SEMI JOIN inner_tbl ON cond = {set of outer_tbl.row such that + exist inner_tbl.row, for which + cond(outer_tbl.row,inner_tbl.row) + is satisfied} + + That is, semi-join operation is similar to inner join operation, with + exception that we don't care how many matches a row from outer_tbl has in + inner_tbl. + + In SQL terms: a semi-join subquery is an IN subquery that is an AND-part of + the WHERE/ON clause. + + 2. General idea about semi-join execution + ----------------------------------------- + We can execute semi-join in a way similar to inner join, with exception that + we need to somehow ensure that we do not generate record combinations that + differ only in rows of inner tables. + There is a number of different ways to achieve this property, implemented by + a number of semi-join execution strategies. + Some strategies can handle any semi-joins, other can be applied only to + semi-joins that have certain properties that are described below: + + 2.1 Correlated vs uncorrelated semi-joins + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Uncorrelated semi-joins are special in the respect that they allow to + - execute the subquery (possible as it's uncorrelated) + - somehow make sure that generated set does not have duplicates + - perform an inner join with outer tables. + + or, rephrasing in SQL form: + + SELECT ... FROM ot WHERE ot.col IN (SELECT it.col FROM it WHERE uncorr_cond) + -> + SELECT ... FROM ot JOIN (SELECT DISTINCT it.col FROM it WHERE uncorr_cond) + + 2.2 Mergeable vs non-mergeable semi-joins + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Semi-join operation has some degree of commutability with inner join + operation: we can join subquery's tables with ouside table(s) and eliminate + duplicate record combination after that: + + ot1 JOIN ot2 SEMI_JOIN{it1,it2} (it1 JOIN it2) ON sjcond(ot2,it*) -> + | + +-------------------------------+ + v + ot1 SEMI_JOIN{it1,it2} (it1 JOIN it2 JOIN ot2) ON sjcond(ot2,it*) + + In order for this to work, subquery's top-level operation must be join, and + grouping or ordering with limit (grouping or ordering with limit are not + commutative with duplicate removal). In other words, the conversion is + possible when the subquery doesn't have GROUP BY clause, any aggregate + functions*, or ORDER BY ... LIMIT clause. + + Definitions: + - Subquery whose top-level operation is a join is called *mergeable semi-join* + - All other kinds of semi-join subqueries are considered non-mergeable. + + *- this requirement is actually too strong, but its exceptions are too + complicated to be considered here. + + 3. Code-level view of semi-join processing + ------------------------------------------ + + 3.1 Conversion and pre-optimization data structures + --------------------------------------------------- + * When doing JOIN::prepare for the subquery, we detect that it can be + converted into a semi-join and register it in parent_join->sj_subselects + + * At the start of parent_join->optimize(), the predicate is converted into + a semi-join node. A semi-join node is a TABLE_LIST object that is linked + somewhere in parent_join->join_list (either it is just present there, or + it is a descendant of some of its members). + + There are two kinds of semi-joins: + - Merged semi-joins + - Non-merged semi-joins + + 3.1.1 Merged semi-join TABLE_LIST object + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Merged semi-join object is a TABLE_LIST that contains a sub-join of + subquery tables and the semi-join ON expression (in this respect it is + very similar to nested outer join representation) + Merged semi-join represents this SQL: + + ... SEMI JOIN (inner_tbl1 JOIN ... JOIN inner_tbl_n) ON sj_on_expr + + Semi-join objects of this kind have TABLE_LIST::sj_subq_pred set. + + 3.1.2 Non-merged semi-join data structure + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Non-merged semi-join object is a leaf TABLE_LIST object that has a subquery + that produces rows. It is similar to a base table and represents this SQL: + + ... SEMI_JOIN (SELECT non_mergeable_select) ON sj_on_expr + + Subquery items that were converted into semi-joins are removed from the WHERE + clause. (They do remain in PS-saved WHERE clause, and they replace themselves + with Item_int(1) on subsequent re-executions). + + 3.2 Semi-joins and join optimization + ------------------------------------ + + 3.2.1 Non-merged semi-joins and join optimization + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + For join optimization purposes, non-merged semi-join nests are similar to + base tables - they've got one JOIN_TAB, which can be accessed with one of + two methods: + - full table scan (representing SJ-Materialization-Scan strategy) + - eq_ref-like table lookup (representing SJ-Materialization-Lookup) + + Unlike regular base tables, non-merged semi-joins have: + - non-zero JOIN_TAB::startup_cost, and + - join_tab->table->is_filled_at_execution()==TRUE, which means one + cannot do const table detection or range analysis or other table data- + dependent inferences + // instead, get_delayed_table_estimates() runs optimization on the nest so that + // we get an idea about temptable size + + 3.2.2 Merged semi-joins and join optimization + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + - optimize_semijoin_nests() does pre-optimization + - during join optimization, the join has one JOIN_TAB (or is it POSITION?) + array, and suffix-based detection is used, see advance_sj_state() + - after join optimization is done, get_best_combination() switches + the data-structure to prefix-based, multiple JOIN_TAB ranges format. + + 3.3 Semi-joins and query execution + ---------------------------------- + * Join executor has hooks for all semi-join strategies. + TODO elaborate. + +*/ + + static bool subquery_types_allow_materialization(Item_in_subselect *in_subs); static bool replace_where_subcondition(JOIN *join, Item **expr, @@ -25,6 +180,8 @@ static bool replace_where_subcondition(JOIN *join, Item **expr, static int subq_sj_candidate_cmp(Item_in_subselect* const *el1, Item_in_subselect* const *el2); static bool convert_subq_to_sj(JOIN *parent_join, Item_in_subselect *subq_pred); +static bool convert_subq_to_jtbm(JOIN *parent_join, + Item_in_subselect *subq_pred, bool *remove); static TABLE_LIST *alloc_join_nest(THD *thd); static void fix_list_after_tbl_changes(SELECT_LEX *new_parent, List<TABLE_LIST> *tlist); @@ -46,21 +203,29 @@ static bool sj_table_is_included(JOIN *join, JOIN_TAB *join_tab); static Item *remove_additional_cond(Item* conds); static void remove_subq_pushed_predicates(JOIN *join, Item **where); +enum_nested_loop_state +end_sj_materialize(JOIN *join, JOIN_TAB *join_tab, bool end_of_records); + /* Check if we need JOIN::prepare()-phase subquery rewrites and if yes, do them + SYNOPSIS + check_and_do_in_subquery_rewrites() + join Subquery's join + DESCRIPTION Check if we need to do - - subquery->semi-join rewrite + - subquery -> mergeable semi-join rewrite - if the subquery can be handled with materialization - 'substitution' rewrite for table-less subqueries like "(select 1)" - - and mark appropriately + - IN->EXISTS rewrite + and, depending on the rewrite, either do it, or record it to be done at a + later phase. RETURN - 0 - OK - -1 - Some sort of query error + 0 - OK + Other - Some sort of query error */ int check_and_do_in_subquery_rewrites(JOIN *join) @@ -166,11 +331,11 @@ int check_and_do_in_subquery_rewrites(JOIN *join) (void)subquery_types_allow_materialization(in_subs); in_subs->emb_on_expr_nest= thd->thd_marker.emb_on_expr_nest; + in_subs->is_flattenable_semijoin= TRUE; /* Register the subquery for further processing in flatten_subqueries() */ select_lex-> outer_select()->join->sj_subselects.append(thd->mem_root, in_subs); - in_subs->expr_join_nest= thd->thd_marker.emb_on_expr_nest; } else { @@ -220,10 +385,24 @@ int check_and_do_in_subquery_rewrites(JOIN *join) (in_subs->is_top_level_item() || optimizer_flag(thd, OPTIMIZER_SWITCH_PARTIAL_MATCH_ROWID_MERGE) || optimizer_flag(thd, OPTIMIZER_SWITCH_PARTIAL_MATCH_TABLE_SCAN)) &&//4 - !in_subs->is_correlated && // 5 - in_subs->exec_method == Item_in_subselect::NOT_TRANSFORMED) // 6 + !in_subs->is_correlated) // 5 { + if (in_subs->exec_method == Item_in_subselect::NOT_TRANSFORMED) in_subs->exec_method= Item_in_subselect::MATERIALIZATION; + + /* + If the subquery is an AND-part of WHERE register for being processed + with jtbm strategy + */ + if (in_subs->exec_method == Item_in_subselect::MATERIALIZATION && + thd->thd_marker.emb_on_expr_nest == NO_JOIN_NEST && + optimizer_flag(thd, OPTIMIZER_SWITCH_SEMIJOIN)) + { + in_subs->emb_on_expr_nest= thd->thd_marker.emb_on_expr_nest; + in_subs->is_flattenable_semijoin= FALSE; + select_lex->outer_select()-> + join->sj_subselects.append(thd->mem_root, in_subs); + } } Item_subselect::trans_res trans_res; @@ -339,6 +518,69 @@ bool subquery_types_allow_materialization(Item_in_subselect *in_subs) /* + Finalize IN->EXISTS conversion in case we couldn't use materialization. + + DESCRIPTION Invoke the IN->EXISTS converter + Replace the Item_in_subselect with its wrapper Item_in_optimizer in WHERE. + + RETURN + FALSE - Ok + TRUE - Fatal error +*/ + +static +bool make_in_exists_conversion(THD *thd, JOIN *join, Item_in_subselect *item) +{ + DBUG_ENTER("make_in_exists_conversion"); + JOIN *child_join= item->unit->first_select()->join; + Item_subselect::trans_res res; + item->changed= 0; + item->fixed= 0; + + SELECT_LEX *save_select_lex= thd->lex->current_select; + thd->lex->current_select= item->unit->first_select(); + + res= item->select_transformer(child_join); + + thd->lex->current_select= save_select_lex; + + if (res == Item_subselect::RES_ERROR) + DBUG_RETURN(TRUE); + + item->changed= 1; + item->fixed= 1; + + Item *substitute= item->substitution; + bool do_fix_fields= !item->substitution->fixed; + /* + The Item_subselect has already been wrapped with Item_in_optimizer, so we + should search for item->optimizer, not 'item'. + */ + Item *replace_me= item->optimizer; + DBUG_ASSERT(replace_me==substitute); + + Item **tree= (item->emb_on_expr_nest == NO_JOIN_NEST)? + &join->conds : &(item->emb_on_expr_nest->on_expr); + if (replace_where_subcondition(join, tree, replace_me, substitute, + do_fix_fields)) + DBUG_RETURN(TRUE); + item->substitution= NULL; + + if (!thd->stmt_arena->is_conventional()) + { + tree= (item->emb_on_expr_nest == (TABLE_LIST*)NO_JOIN_NEST)? + &join->select_lex->prep_where : + &(item->emb_on_expr_nest->prep_on_expr); + + if (replace_where_subcondition(join, tree, replace_me, substitute, + FALSE)) + DBUG_RETURN(TRUE); + } + DBUG_RETURN(FALSE); +} + + +/* Convert semi-join subquery predicates into semi-join join nests SYNOPSIS @@ -404,7 +646,7 @@ bool convert_join_subqueries_to_semijoins(JOIN *join) { st_select_lex *child_select= (*in_subq)->get_select_lex(); JOIN *child_join= child_select->join; - child_join->outer_tables = child_join->tables; + child_join->outer_tables = child_join->table_count; /* child_select->where contains only the WHERE predicate of the @@ -445,25 +687,45 @@ bool convert_join_subqueries_to_semijoins(JOIN *join) // #tables-in-parent-query + #tables-in-subquery < MAX_TABLES /* Replace all subqueries to be flattened with Item_int(1) */ arena= thd->activate_stmt_arena_if_needed(&backup); - for (in_subq= join->sj_subselects.front(); - in_subq != in_subq_end && - join->tables + (*in_subq)->unit->first_select()->join->tables < MAX_TABLES; - in_subq++) - { - Item **tree= ((*in_subq)->emb_on_expr_nest == (TABLE_LIST*)1)? - &join->conds : &((*in_subq)->emb_on_expr_nest->on_expr); - if (replace_where_subcondition(join, tree, *in_subq, new Item_int(1), - FALSE)) - DBUG_RETURN(TRUE); /* purecov: inspected */ - } for (in_subq= join->sj_subselects.front(); - in_subq != in_subq_end && - join->tables + (*in_subq)->unit->first_select()->join->tables < MAX_TABLES; + in_subq != in_subq_end; in_subq++) { - if (convert_subq_to_sj(join, *in_subq)) - DBUG_RETURN(TRUE); + bool remove_item= TRUE; + if ((*in_subq)->is_flattenable_semijoin) + { + if (join->table_count + + (*in_subq)->unit->first_select()->join->table_count >= MAX_TABLES) + break; + if (convert_subq_to_sj(join, *in_subq)) + DBUG_RETURN(TRUE); + } + else + { + if (join->table_count + 1 >= MAX_TABLES) + break; + if (convert_subq_to_jtbm(join, *in_subq, &remove_item)) + DBUG_RETURN(TRUE); + } + if (remove_item) + { + Item **tree= ((*in_subq)->emb_on_expr_nest == NO_JOIN_NEST)? + &join->conds : &((*in_subq)->emb_on_expr_nest->on_expr); + Item *replace_me= *in_subq; + /* + JTBM: the subquery was already mapped with Item_in_optimizer, so we + should search for that, not for original Item_in_subselect. + TODO: what about delaying that rewrite until here? + */ + if (!(*in_subq)->is_flattenable_semijoin) + { + replace_me= (*in_subq)->optimizer; + } + if (replace_where_subcondition(join, tree, replace_me, new Item_int(1), + FALSE)) + DBUG_RETURN(TRUE); /* purecov: inspected */ + } } skip_conversion: /* @@ -492,20 +754,32 @@ skip_conversion: Item *substitute= (*in_subq)->substitution; bool do_fix_fields= !(*in_subq)->substitution->fixed; - Item **tree= ((*in_subq)->emb_on_expr_nest == (TABLE_LIST*)1)? + Item **tree= ((*in_subq)->emb_on_expr_nest == NO_JOIN_NEST)? &join->conds : &((*in_subq)->emb_on_expr_nest->on_expr); - if (replace_where_subcondition(join, tree, *in_subq, substitute, + + Item *replace_me= *in_subq; + /* + JTBM: the subquery was already mapped with Item_in_optimizer, so we + should search for that, not for original Item_in_subselect. + TODO: what about delaying that rewrite until here? + */ + if (!(*in_subq)->is_flattenable_semijoin) + { + replace_me= (*in_subq)->optimizer; + } + + if (replace_where_subcondition(join, tree, replace_me, substitute, do_fix_fields)) DBUG_RETURN(TRUE); (*in_subq)->substitution= NULL; if (!thd->stmt_arena->is_conventional()) { - tree= ((*in_subq)->emb_on_expr_nest == (TABLE_LIST*)1)? + tree= ((*in_subq)->emb_on_expr_nest == NO_JOIN_NEST)? &join->select_lex->prep_where : &((*in_subq)->emb_on_expr_nest->prep_on_expr); - if (replace_where_subcondition(join, tree, *in_subq, substitute, + if (replace_where_subcondition(join, tree, replace_me, substitute, FALSE)) DBUG_RETURN(TRUE); } @@ -517,6 +791,59 @@ skip_conversion: DBUG_RETURN(FALSE); } + +/* + Get #output_rows and scan_time estimates for a "delayed" table. + + SYNOPSIS + get_delayed_table_estimates() + table IN Table to get estimates for + out_rows OUT E(#rows in the table) + scan_time OUT E(scan_time). + startup_cost OUT cost to populate the table. + + DESCRIPTION + Get #output_rows and scan_time estimates for a "delayed" table. By + "delayed" here we mean that the table is filled at the start of query + execution. This means that the optimizer can't use table statistics to + get #rows estimate for it, it has to call this function instead. + + This function is expected to make different actions depending on the nature + of the table. At the moment there is only one kind of delayed tables, + non-flattenable semi-joins. +*/ + +void get_delayed_table_estimates(TABLE *table, + ha_rows *out_rows, + double *scan_time, + double *startup_cost) +{ + Item_in_subselect *item= table->pos_in_table_list->jtbm_subselect; + item->optimize(); + + DBUG_ASSERT(item->engine->engine_type() == + subselect_engine::HASH_SJ_ENGINE); + + subselect_hash_sj_engine *hash_sj_engine= + ((subselect_hash_sj_engine*)item->engine); + JOIN *join= hash_sj_engine->materialize_join; + + double rows; + double read_time; + + /* Calculate #rows and cost of join execution */ + get_partial_join_cost(join, join->table_count - join->const_tables, + &read_time, &rows); + + *out_rows= (ha_rows)rows; + *startup_cost= read_time; + /* Calculate cost of scanning the temptable */ + double data_size= rows * hash_sj_engine->tmp_table->s->reclength; + /* Do like in handler::read_time */ + *scan_time= data_size/IO_SIZE + 2; +} + + /** @brief Replaces an expression destructively inside the expression tree of the WHERE clase. @@ -534,6 +861,7 @@ skip_conversion: @return <code>true</code> if there was an error, <code>false</code> if successful. */ + static bool replace_where_subcondition(JOIN *join, Item **expr, Item *old_cond, Item *new_cond, bool do_fix_fields) @@ -615,9 +943,9 @@ static bool convert_subq_to_sj(JOIN *parent_join, Item_in_subselect *subq_pred) 1. Find out where to put the predicate into. Note: for "t1 LEFT JOIN t2" this will be t2, a leaf. */ - if ((void*)subq_pred->expr_join_nest != (void*)1) + if ((void*)subq_pred->emb_on_expr_nest != (void*)NO_JOIN_NEST) { - if (subq_pred->expr_join_nest->nested_join) + if (subq_pred->emb_on_expr_nest->nested_join) { /* We're dealing with @@ -626,10 +954,10 @@ static bool convert_subq_to_sj(JOIN *parent_join, Item_in_subselect *subq_pred) The sj-nest will be inserted into the brackets nest. */ - emb_tbl_nest= subq_pred->expr_join_nest; + emb_tbl_nest= subq_pred->emb_on_expr_nest; emb_join_list= &emb_tbl_nest->nested_join->join_list; } - else if (!subq_pred->expr_join_nest->outer_join) + else if (!subq_pred->emb_on_expr_nest->outer_join) { /* We're dealing with @@ -639,13 +967,13 @@ static bool convert_subq_to_sj(JOIN *parent_join, Item_in_subselect *subq_pred) The sj-nest will be tblX's "sibling", i.e. another child of its parent. This is ok because tblX is joined as an inner join. */ - emb_tbl_nest= subq_pred->expr_join_nest->embedding; + emb_tbl_nest= subq_pred->emb_on_expr_nest->embedding; if (emb_tbl_nest) emb_join_list= &emb_tbl_nest->nested_join->join_list; } - else if (!subq_pred->expr_join_nest->nested_join) + else if (!subq_pred->emb_on_expr_nest->nested_join) { - TABLE_LIST *outer_tbl= subq_pred->expr_join_nest; + TABLE_LIST *outer_tbl= subq_pred->emb_on_expr_nest; TABLE_LIST *wrap_nest; /* We're dealing with @@ -769,12 +1097,11 @@ static bool convert_subq_to_sj(JOIN *parent_join, Item_in_subselect *subq_pred) /* 3. Remove the original subquery predicate from the WHERE/ON */ // The subqueries were replaced for Item_int(1) earlier - subq_pred->exec_method= - Item_in_subselect::SEMI_JOIN; // for subsequent executions + subq_pred->exec_method= Item_in_subselect::SEMI_JOIN; // for subsequent executions /*TODO: also reset the 'with_subselect' there. */ - /* n. Adjust the parent_join->tables counter */ - uint table_no= parent_join->tables; + /* n. Adjust the parent_join->table_count counter */ + uint table_no= parent_join->table_count; /* n. Walk through child's tables and adjust table->map */ for (tl= subq_lex->leaf_tables; tl; tl= tl->next_leaf, table_no++) { @@ -787,7 +1114,7 @@ static bool convert_subq_to_sj(JOIN *parent_join, Item_in_subselect *subq_pred) emb= emb->embedding) emb->select_lex= parent_join->select_lex; } - parent_join->tables += subq_lex->join->tables; + parent_join->table_count += subq_lex->join->table_count; /* Put the subquery's WHERE into semi-join's sj_on_expr @@ -887,6 +1214,133 @@ static bool convert_subq_to_sj(JOIN *parent_join, Item_in_subselect *subq_pred) DBUG_RETURN(FALSE); } + +const int SUBQERY_TEMPTABLE_NAME_MAX_LEN= 20; + +static void create_subquery_temptable_name(char *to, uint number) +{ + DBUG_ASSERT(number < 10000); + to= strmov(to, "<subquery"); + to= int10_to_str((int) number, to, 10); + to[0]= '>'; + to[1]= 0; +} + + +/* + Convert subquery predicate into non-mergeable semi-join nest. + + TODO: + why does this do IN-EXISTS conversion? Can't we unify it with mergeable + semi-joins? currently, convert_subq_to_sj() cannot fail to convert (unless + fatal errors) + + + RETURN + FALSE - Ok + TRUE - Fatal error +*/ + +static bool convert_subq_to_jtbm(JOIN *parent_join, + Item_in_subselect *subq_pred, + bool *remove_item) +{ + SELECT_LEX *parent_lex= parent_join->select_lex; + List<TABLE_LIST> *emb_join_list= &parent_lex->top_join_list; + TABLE_LIST *emb_tbl_nest= NULL; // will change when we learn to handle outer joins + TABLE_LIST *tl; + DBUG_ENTER("convert_subq_to_jtbm"); + + if (subq_pred->setup_engine(TRUE)) + DBUG_RETURN(TRUE); + + if (subq_pred->engine->engine_type() != subselect_engine::HASH_SJ_ENGINE) + { + *remove_item= FALSE; + bool res; + res= make_in_exists_conversion(parent_join->thd, parent_join, subq_pred); + DBUG_RETURN(res); + } + *remove_item= TRUE; + + TABLE_LIST *jtbm; + char *tbl_alias; + if (!(tbl_alias= (char*)parent_join->thd->calloc(SUBQERY_TEMPTABLE_NAME_MAX_LEN)) || + !(jtbm= alloc_join_nest(parent_join->thd))) //todo: this is not a join nest! + { + DBUG_RETURN(TRUE); + } + + jtbm->join_list= emb_join_list; + jtbm->embedding= emb_tbl_nest; + jtbm->jtbm_subselect= subq_pred; + jtbm->nested_join= NULL; + + /* Nests do not participate in those 'chains', so: */ + /* jtbm->next_leaf= jtbm->next_local= jtbm->next_global == NULL*/ + emb_join_list->push_back(jtbm); + + /* + Inject the jtbm table into TABLE_LIST::next_leaf list, so that + make_join_statistics() and co. can find it. + */ + for (tl= parent_lex->leaf_tables; tl->next_leaf; tl= tl->next_leaf) + {} + tl->next_leaf= jtbm; + + /* + Same as above for TABLE_LIST::next_local chain + (a theory: a next_local chain always starts with ::leaf_tables + because view's tables are inserted after the view) + */ + for (tl= parent_lex->leaf_tables; tl->next_local; tl= tl->next_local) + {} + tl->next_local= jtbm; + + /* A theory: no need to re-connect the next_global chain */ + + subselect_hash_sj_engine *hash_sj_engine= + ((subselect_hash_sj_engine*)subq_pred->engine); + jtbm->table= hash_sj_engine->tmp_table; + + jtbm->table->tablenr= parent_join->table_count; + jtbm->table->map= table_map(1) << (parent_join->table_count); + + parent_join->table_count++; + DBUG_ASSERT(parent_join->table_count < MAX_TABLES); + + Item *conds= hash_sj_engine->semi_join_conds; + conds->fix_after_pullout(parent_lex, &conds); + + DBUG_EXECUTE("where", print_where(conds,"SJ-EXPR", QT_ORDINARY);); + + create_subquery_temptable_name(tbl_alias, hash_sj_engine->materialize_join-> + select_lex->select_number); + jtbm->alias= tbl_alias; + + /* Inject sj_on_expr into the parent's WHERE or ON */ + if (emb_tbl_nest) + { + DBUG_ASSERT(0); + /*emb_tbl_nest->on_expr= and_items(emb_tbl_nest->on_expr, + sj_nest->sj_on_expr); + emb_tbl_nest->on_expr->fix_fields(parent_join->thd, &emb_tbl_nest->on_expr); + */ + } + else + { + /* Inject into the WHERE */ + parent_join->conds= and_items(parent_join->conds, conds); + parent_join->conds->fix_fields(parent_join->thd, &parent_join->conds); + parent_join->select_lex->where= parent_join->conds; + } + + /* Don't unlink the child subselect, as the subquery will be used. */ + + DBUG_RETURN(FALSE); +} + + static TABLE_LIST *alloc_join_nest(THD *thd) { TABLE_LIST *tbl; @@ -1245,6 +1699,7 @@ bool optimize_semijoin_nests(JOIN *join, table_map all_table_map) DBUG_RETURN(FALSE); } + /* Get estimated record length for semi-join materialization temptable @@ -1301,7 +1756,7 @@ static uint get_tmp_table_rec_length(List<Item> &items) return len; } -//psergey-todo: is the below a kind of table elimination?? + /* Check if table's KEYUSE elements have an eq_ref(outer_tables) candidate @@ -1318,6 +1773,8 @@ static uint get_tmp_table_rec_length(List<Item> &items) Check again if it is feasible to factor common parts with constant table search + Also check if it's feasible to factor common parts with table elimination + RETURN TRUE - There exists an eq_ref(outer-tables) candidate FALSE - Otherwise @@ -1368,6 +1825,7 @@ bool find_eq_ref_candidate(TABLE *table, table_map sj_inner_tables) return FALSE; } + /* Do semi-join optimization step after we've added a new tab to join prefix @@ -2024,7 +2482,7 @@ at_sjmat_pos(const JOIN *join, table_map remaining_tables, const JOIN_TAB *tab, void fix_semijoin_strategies_for_picked_join_order(JOIN *join) { - uint table_count=join->tables; + uint table_count=join->table_count; uint tablenr; table_map remaining_tables= 0; table_map handled_tabs= 0; @@ -2188,6 +2646,7 @@ void fix_semijoin_strategies_for_picked_join_order(JOIN *join) } } + /* Setup semi-join materialization strategy for one semi-join nest @@ -2209,10 +2668,11 @@ void fix_semijoin_strategies_for_picked_join_order(JOIN *join) TRUE Error */ -bool setup_sj_materialization(JOIN_TAB *tab) +bool setup_sj_materialization(JOIN_TAB *sjm_tab) { uint i; DBUG_ENTER("setup_sj_materialization"); + JOIN_TAB *tab= sjm_tab->bush_children->start; TABLE_LIST *emb_sj_nest= tab->table->pos_in_table_list->embedding; SJ_MATERIALIZATION_INFO *sjm= emb_sj_nest->sj_mat_info; THD *thd= tab->join->thd; @@ -2240,10 +2700,13 @@ bool setup_sj_materialization(JOIN_TAB *tab) DBUG_RETURN(TRUE); /* purecov: inspected */ sjm->table->file->extra(HA_EXTRA_WRITE_CACHE); sjm->table->file->extra(HA_EXTRA_IGNORE_DUP_KEY); + tab->join->sj_tmp_tables.push_back(sjm->table); tab->join->sjm_info_list.push_back(sjm); sjm->materialized= FALSE; + sjm_tab->table= sjm->table; + if (!sjm->is_sj_scan) { KEY *tmp_key; /* The only index on the temporary table. */ @@ -2256,8 +2719,7 @@ bool setup_sj_materialization(JOIN_TAB *tab) temptable. */ TABLE_REF *tab_ref; - if (!(tab_ref= (TABLE_REF*) thd->alloc(sizeof(TABLE_REF)))) - DBUG_RETURN(TRUE); /* purecov: inspected */ + tab_ref= &sjm_tab->ref; tab_ref->key= 0; /* The only temp table index. */ tab_ref->key_length= tmp_key->key_length; if (!(tab_ref->key_buff= @@ -2290,12 +2752,22 @@ bool setup_sj_materialization(JOIN_TAB *tab) use that information instead. */ cur_ref_buff + null_count, - null_count ? tab_ref->key_buff : 0, + null_count ? cur_ref_buff : 0, cur_key_part->length, tab_ref->items[i], FALSE); cur_ref_buff+= cur_key_part->store_length; } *ref_key= NULL; /* End marker. */ + + /* + We don't ever have guarded conditions for SJM tables, but code at SQL + layer depends on cond_guards array being alloced. + */ + if (!(tab_ref->cond_guards= (bool**) thd->calloc(sizeof(uint*)*tmp_key_parts))) + { + DBUG_RETURN(TRUE); + } + tab_ref->key_err= 1; tab_ref->key_parts= tmp_key_parts; sjm->tab_ref= tab_ref; @@ -2315,6 +2787,8 @@ bool setup_sj_materialization(JOIN_TAB *tab) if (!(sjm->in_equality= create_subq_in_equalities(thd, sjm, emb_sj_nest->sj_subq_pred))) DBUG_RETURN(TRUE); /* purecov: inspected */ + sjm_tab->type= JT_EQ_REF; + sjm_tab->select_cond= sjm->in_equality; } else { @@ -2367,9 +2841,11 @@ bool setup_sj_materialization(JOIN_TAB *tab) then substitute_for_best_equal_field() will change the conditions according to the join order: - it1 - it2 it1.col=it2.col - ot cond(it1.col) + table | attached condition + ------+-------------------- + it1 | + it2 | it1.col=it2.col + ot | cond(it1.col) although we've originally had "SELECT it2.col", conditions attached to subsequent outer tables will refer to it1.col, so SJM-Scan will @@ -2398,8 +2874,18 @@ bool setup_sj_materialization(JOIN_TAB *tab) /* The write_set for source tables must be set up to allow the copying */ bitmap_set_bit(copy_to->table->write_set, copy_to->field_index); } + sjm_tab->type= JT_ALL; + + /* Initialize full scan */ + sjm_tab->read_first_record= join_read_record_no_init; + sjm_tab->read_record.copy_field= sjm->copy_field; + sjm_tab->read_record.copy_field_end= sjm->copy_field + + sjm->sjm_table_cols.elements; + sjm_tab->read_record.read_record= rr_sequential_and_unpack; } + sjm_tab->bush_children->end[-1].next_select= end_sj_materialize; + DBUG_RETURN(FALSE); } @@ -3020,6 +3506,7 @@ int do_sj_dups_weedout(THD *thd, SJ_TMP_TABLE *sjtbl) FALSE OK TRUE Out of memory error */ +JOIN_TAB *first_linear_tab(JOIN *join, enum enum_with_const_tables const_tbls); int setup_semijoin_dups_elimination(JOIN *join, ulonglong options, uint no_jbuf_after) @@ -3027,17 +3514,19 @@ int setup_semijoin_dups_elimination(JOIN *join, ulonglong options, uint i; THD *thd= join->thd; DBUG_ENTER("setup_semijoin_dups_elimination"); - - for (i= join->const_tables ; i < join->tables; ) + + POSITION *pos= join->best_positions + join->const_tables; + for (i= join->const_tables ; i < join->top_join_tab_count; ) { JOIN_TAB *tab=join->join_tab + i; - POSITION *pos= join->best_positions + i; + //POSITION *pos= join->best_positions + i; uint keylen, keyno; switch (pos->sj_strategy) { case SJ_OPT_MATERIALIZE: case SJ_OPT_MATERIALIZE_SCAN: /* Do nothing */ - i+= pos->n_sj_tables; + i+= 1;// It used to be pos->n_sj_tables, but now they are embedded in a nest + pos += pos->n_sj_tables; break; case SJ_OPT_LOOSE_SCAN: { @@ -3054,6 +3543,7 @@ int setup_semijoin_dups_elimination(JOIN *join, ulonglong options, if (pos->n_sj_tables > 1) tab[pos->n_sj_tables - 1].do_firstmatch= tab; i+= pos->n_sj_tables; + pos+= pos->n_sj_tables; break; } case SJ_OPT_DUPS_WEEDOUT: @@ -3151,6 +3641,7 @@ int setup_semijoin_dups_elimination(JOIN *join, ulonglong options, join->join_tab[i + pos->n_sj_tables - 1].check_weed_out_table= sjtbl; i+= pos->n_sj_tables; + pos+= pos->n_sj_tables; break; } case SJ_OPT_FIRST_MATCH: @@ -3173,10 +3664,12 @@ int setup_semijoin_dups_elimination(JOIN *join, ulonglong options, } j[-1].do_firstmatch= jump_to; i+= pos->n_sj_tables; + pos+= pos->n_sj_tables; break; } case SJ_OPT_NONE: i++; + pos++; break; } } @@ -3363,7 +3856,7 @@ int rewrite_to_index_subquery_engine(JOIN *join) if (!join->group_list && !join->order && join->unit->item && join->unit->item->substype() == Item_subselect::IN_SUBS && - join->tables == 1 && join->conds && + join->table_count == 1 && join->conds && !join->unit->is_union()) { if (!join->having) @@ -3500,3 +3993,77 @@ static void remove_subq_pushed_predicates(JOIN *join, Item **where) } +/* + Join tab execution startup function. + + SYNOPSIS + join_tab_execution_startup() + tab Join tab to perform startup actions for + + DESCRIPTION + Join tab execution startup function. This is different from + tab->read_first_record in the regard that this has actions that are to be + done once per join execution. + + Currently there are only two possible startup functions, so we have them + both here inside if (...) branches. In future we could switch to function + pointers. + + RETURN + NESTED_LOOP_OK - OK + NESTED_LOOP_ERROR| NESTED_LOOP_KILLED - Error, abort the join execution +*/ + +enum_nested_loop_state join_tab_execution_startup(JOIN_TAB *tab) +{ + Item_in_subselect *in_subs; + DBUG_ENTER("join_tab_execution_startup"); + + if (tab->table->pos_in_table_list && + (in_subs= tab->table->pos_in_table_list->jtbm_subselect)) + { + /* It's a non-merged SJM nest */ + DBUG_ASSERT(in_subs->engine->engine_type() == + subselect_engine::HASH_SJ_ENGINE); + + subselect_hash_sj_engine *hash_sj_engine= + ((subselect_hash_sj_engine*)in_subs->engine); + if (!hash_sj_engine->is_materialized) + { + hash_sj_engine->materialize_join->exec(); + hash_sj_engine->is_materialized= TRUE; + + if (hash_sj_engine->materialize_join->error || tab->join->thd->is_fatal_error) + DBUG_RETURN(NESTED_LOOP_ERROR); + } + } + else if (tab->bush_children) + { + /* It's a merged SJM nest */ + enum_nested_loop_state rc; + SJ_MATERIALIZATION_INFO *sjm= tab->bush_children->start->emb_sj_nest->sj_mat_info; + + if (!sjm->materialized) + { + JOIN *join= tab->join; + JOIN_TAB *join_tab= tab->bush_children->start; + JOIN_TAB *save_return_tab= join->return_tab; + /* + Now run the join for the inner tables. The first call is to run the + join, the second one is to signal EOF (this is essential for some + join strategies, e.g. it will make join buffering flush the records) + */ + if ((rc= sub_select(join, join_tab, FALSE/* no EOF */)) < 0 || + (rc= sub_select(join, join_tab, TRUE/* now EOF */)) < 0) + { + join->return_tab= save_return_tab; + DBUG_RETURN(rc); /* it's NESTED_LOOP_(ERROR|KILLED)*/ + } + join->return_tab= save_return_tab; + sjm->materialized= TRUE; + } + } + + DBUG_RETURN(NESTED_LOOP_OK); +} + diff --git a/sql/opt_subselect.h b/sql/opt_subselect.h index 47d85d5c38d..4d89609e1a8 100644 --- a/sql/opt_subselect.h +++ b/sql/opt_subselect.h @@ -1,4 +1,6 @@ -/* */ +/* + Semi-join subquery optimization code definitions +*/ #ifdef USE_PRAGMA_INTERFACE #pragma interface /* gcc class implementation */ @@ -366,4 +368,10 @@ int clear_sj_tmp_tables(JOIN *join); int rewrite_to_index_subquery_engine(JOIN *join); +void get_delayed_table_estimates(TABLE *table, + ha_rows *out_rows, + double *scan_time, + double *startup_cost); + +enum_nested_loop_state join_tab_execution_startup(JOIN_TAB *tab); diff --git a/sql/sql_base.cc b/sql/sql_base.cc index fa495a776e1..eb8a817b8d2 100644 --- a/sql/sql_base.cc +++ b/sql/sql_base.cc @@ -7797,6 +7797,18 @@ bool setup_tables(THD *thd, Name_resolution_context *context, if (res) DBUG_RETURN(1); } + if (table_list->jtbm_subselect) + { + Item *item= table_list->jtbm_subselect; + if (item->fix_fields(thd, &item)) + { + my_error(ER_TOO_MANY_TABLES,MYF(0),MAX_TABLES); /* psergey-todo: WHY ER_TOO_MANY_TABLES ???*/ + DBUG_RETURN(1); + } + DBUG_ASSERT(item == table_list->jtbm_subselect); + if (table_list->jtbm_subselect->setup_engine(FALSE)) + DBUG_RETURN(1); + } } /* Precompute and store the row types of NATURAL/USING joins. */ @@ -8195,7 +8207,7 @@ int setup_conds(THD *thd, TABLE_LIST *tables, TABLE_LIST *leaves, goto err_no_arena; } - thd->thd_marker.emb_on_expr_nest= (TABLE_LIST*)1; + thd->thd_marker.emb_on_expr_nest= NO_JOIN_NEST; if (*conds) { thd->where="where clause"; diff --git a/sql/sql_cursor.cc b/sql/sql_cursor.cc index 308c49fc15c..2f52726eea5 100644 --- a/sql/sql_cursor.cc +++ b/sql/sql_cursor.cc @@ -381,14 +381,14 @@ Sensitive_cursor::open(JOIN *join_arg) /* Prepare JOIN for reading rows. */ join->tmp_table= 0; - join->join_tab[join->tables-1].next_select= setup_end_select_func(join); + join->join_tab[join->top_join_tab_count - 1].next_select= setup_end_select_func(join); join->send_records= 0; join->fetch_limit= join->unit->offset_limit_cnt; /* Disable JOIN CACHE as it is not working with cursors yet */ - for (JOIN_TAB *tab= join_tab; - tab != join->join_tab + join->tables - 1; - tab++) + for (JOIN_TAB *tab= first_linear_tab(join, WITHOUT_CONST_TABLES); + tab != join->join_tab + join->top_join_tab_count - 1; + tab= next_linear_tab(join, tab, WITH_BUSH_ROOTS)) { if (tab->next_select == sub_select_cache) tab->next_select= sub_select; diff --git a/sql/sql_delete.cc b/sql/sql_delete.cc index e2cb17090a1..49ffa3913f8 100644 --- a/sql/sql_delete.cc +++ b/sql/sql_delete.cc @@ -677,9 +677,10 @@ multi_delete::initialize_tables(JOIN *join) walk= delete_tables; - for (JOIN_TAB *tab=join->join_tab, *end=join->join_tab+join->tables; - tab < end; - tab++) + + for (JOIN_TAB *tab= first_linear_tab(join, WITH_CONST_TABLES); + tab; + tab= next_linear_tab(join, tab, WITH_BUSH_ROOTS)) { if (tab->table->map & tables_to_delete_from) { diff --git a/sql/sql_join_cache.cc b/sql/sql_join_cache.cc index e952cf3e2ef..a62c1c30ee2 100644 --- a/sql/sql_join_cache.cc +++ b/sql/sql_join_cache.cc @@ -33,6 +33,7 @@ #define NO_MORE_RECORDS_IN_BUFFER (uint)(-1) +static void save_or_restore_used_tabs(JOIN_TAB *join_tab, bool save); /***************************************************************************** * Join cache module @@ -139,52 +140,6 @@ uint add_table_data_fields_to_join_cache(JOIN_TAB *tab, } /* - Get the next table whose records are stored in the join buffer of this cache - - SYNOPSIS - get_next_table() - tab the table for which the next table is to be returned - - DESCRIPTION - For a given table whose records are stored in this cache the function - returns the next such table if there is any. - The function takes into account that the tables whose records are - are stored in the same cache now can interleave with tables from - materialized semijoin subqueries. - - TODO - This function should be modified/simplified after the new code for - materialized semijoins is merged. - - RETURN - The next join table whose records are stored in the buffer of this cache - if such table exists, 0 - otherwise -*/ - -JOIN_TAB *JOIN_CACHE::get_next_table(JOIN_TAB *tab) -{ - - if (++tab == join_tab) - return NULL; - if (join_tab->first_sjm_sibling) - return tab; - uint i= tab-join->join_tab; - /* - Temporary measure before MWL#90 refactorings are there: if 'tab' is at upper - level (i.e. it's not inside an SJM nest), still include into the join buffer - the tables from within SJM nest. We might need the subquery's select list - columns, because SJ-Materialization-Scan upacks data to those. - - while (sj_is_materialize_strategy(join->best_positions[i].sj_strategy) && - i < join->tables) - i+= join->best_positions[i].n_sj_tables; - - */ - return join->join_tab+i < join_tab ? join->join_tab+i : NULL; -} - - -/* Determine different counters of fields associated with a record in the cache SYNOPSIS @@ -203,12 +158,55 @@ JOIN_TAB *JOIN_CACHE::get_next_table(JOIN_TAB *tab) void JOIN_CACHE::calc_record_fields() { - JOIN_TAB *tab = prev_cache ? prev_cache->join_tab : - (join_tab->first_sjm_sibling ? - join_tab->first_sjm_sibling : - join->join_tab+join->const_tables); - tables= join_tab-tab; + JOIN_TAB *tab; + + if (prev_cache) + tab= prev_cache->join_tab; + else + { + if (join_tab->bush_root_tab) + { + /* + --ot1--SJM1--------------ot2--... + | + | + +-it1--...--itN + ^____________ this->join_tab is somewhere here, + inside an sjm nest. + + The join buffer should store the values of it1.*, it2.*, .. + It should not store values of ot1.*. + */ + tab= join_tab->bush_root_tab->bush_children->start; + } + else + { + /* + -ot1--ot2--SJM1--SJM2--------------ot3--...--otN + | | ^ + | +-it21--...--it2N | + | \-- we're somewhere here, + +-it11--...--it1N at the top level + + The join buffer should store the values of + + ot1.*, ot2.*, it1{i}, it2{j}.*, ot3.*, ... + + that is, we should start from the first non-const top-level table. + + We will need to store columns of SJ-inner tables (it_X_Y.*), but we're + not interested in storing the columns of materialization tables + themselves. Beause of that, if the first non-const top-level table is a + materialized table, we move to its bush_children: + */ + tab= join->join_tab + join->const_tables; + if (tab->bush_children) + tab= tab->bush_children->start; + } + } + DBUG_ASSERT(!tab->bush_children); + start_tab= tab; fields= 0; blobs= 0; flag_fields= 0; @@ -216,7 +214,11 @@ void JOIN_CACHE::calc_record_fields() data_field_ptr_count= 0; referenced_fields= 0; - for ( ; tab ; tab= get_next_table(tab)) + /* + The following loop will get inside SJM nests, because data may be unpacked + to sjm-inner tables. + */ + for (; tab != join_tab ; tab= next_linear_tab(join, tab, WITHOUT_BUSH_ROOTS)) { tab->calc_used_field_length(FALSE); flag_fields+= test(tab->used_null_fields || tab->used_uneven_bit_fields); @@ -245,7 +247,8 @@ void JOIN_CACHE::calc_record_fields() that occur in the ref expressions and marks these fields in the bitmap tab->table->tmp_set. The function counts the number of them stored in this cache and the total number of them stored in the previous caches - and saves the results of the counting in 'local_key_arg_fields' and 'external_key_arg_fields' respectively. + and saves the results of the counting in 'local_key_arg_fields' and + 'external_key_arg_fields' respectively. NOTES The function does not do anything if no key is used to join the records @@ -269,8 +272,8 @@ void JOIN_CACHE::collect_info_on_key_args() cache= this; do { - for (tab= cache->join_tab-cache->tables; tab ; - tab= cache->get_next_table(tab)) + for (tab= cache->start_tab; tab != cache->join_tab; + tab= next_linear_tab(join, tab, WITHOUT_BUSH_ROOTS)) { uint key_args; bitmap_clear_all(&tab->table->tmp_set); @@ -386,7 +389,8 @@ void JOIN_CACHE::create_flag_fields() ©); /* Create fields for all null bitmaps and null row flags that are needed */ - for (tab= join_tab-tables; tab; tab= get_next_table(tab)) + for (tab= start_tab; tab != join_tab; + tab= next_linear_tab(join, tab, WITHOUT_BUSH_ROOTS)) { TABLE *table= tab->table; @@ -473,8 +477,8 @@ void JOIN_CACHE::create_key_arg_fields() while (ext_key_arg_cnt) { cache= cache->prev_cache; - for (tab= cache->join_tab-cache->tables; tab; - tab= cache->get_next_table(tab)) + for (tab= cache->start_tab; tab != cache->join_tab; + tab= next_linear_tab(join, tab, WITHOUT_BUSH_ROOTS)) { CACHE_FIELD *copy_end; MY_BITMAP *key_read_set= &tab->table->tmp_set; @@ -524,7 +528,8 @@ void JOIN_CACHE::create_key_arg_fields() /* Now create local fields that are used to build ref for this key access */ copy= field_descr+flag_fields; - for (tab= join_tab-tables; tab; tab= get_next_table(tab)) + for (tab= start_tab; tab != join_tab; + tab= next_linear_tab(join, tab, WITHOUT_BUSH_ROOTS)) { length+= add_table_data_fields_to_join_cache(tab, &tab->table->tmp_set, &data_field_count, ©, @@ -573,14 +578,15 @@ void JOIN_CACHE::create_key_arg_fields() none */ -void JOIN_CACHE:: create_remaining_fields() +void JOIN_CACHE::create_remaining_fields() { JOIN_TAB *tab; bool all_read_fields= !is_key_access(); CACHE_FIELD *copy= field_descr+flag_fields+data_field_count; CACHE_FIELD **copy_ptr= blob_ptr+data_field_ptr_count; - for (tab= join_tab-tables; tab; tab= get_next_table(tab)) + for (tab= start_tab; tab != join_tab; + tab= next_linear_tab(join, tab, WITHOUT_BUSH_ROOTS)) { MY_BITMAP *rem_field_set; TABLE *table= tab->table; @@ -737,8 +743,11 @@ ulong JOIN_CACHE::get_min_join_buffer_size() if (!min_buff_size) { size_t len= 0; - for (JOIN_TAB *tab= join_tab-tables; tab < join_tab; tab++) + for (JOIN_TAB *tab= start_tab; tab != join_tab; + tab= next_linear_tab(join, tab, WITHOUT_BUSH_ROOTS)) + { len+= tab->get_max_used_fieldlength(); + } len+= get_record_max_affix_length() + get_max_key_addon_space_per_record(); size_t min_sz= len*min_records; size_t add_sz= 0; @@ -790,8 +799,11 @@ ulong JOIN_CACHE::get_max_join_buffer_size(bool optimize_buff_size) size_t max_sz; size_t min_sz= get_min_join_buffer_size(); size_t len= 0; - for (JOIN_TAB *tab= join_tab-tables; tab < join_tab; tab++) + for (JOIN_TAB *tab= start_tab; tab != join_tab; + tab= next_linear_tab(join, tab, WITHOUT_BUSH_ROOTS)) + { len+= tab->get_used_fieldlength(); + } len+= get_record_max_affix_length(); avg_record_length= len; len+= get_max_key_addon_space_per_record() + avg_aux_buffer_incr; @@ -865,7 +877,9 @@ int JOIN_CACHE::alloc_buffer() set_if_bigger(max_records, 10); min_buff_size= get_min_join_buffer_size(); buff_size= get_max_join_buffer_size(optimize_buff_size); - for (tab= join->join_tab+join->const_tables; tab <= join_tab; tab++) + + for (tab= start_tab; tab!= join_tab; + tab= next_linear_tab(join, tab, WITHOUT_BUSH_ROOTS)) { cache= tab->cache; if (cache) @@ -2154,10 +2168,19 @@ enum_nested_loop_state JOIN_CACHE::join_matching_records(bool skip_last) join_tab->select->quick= 0; } - /* Prepare to retrieve all records of the joined table */ - if ((error= join_tab_scan->open())) - goto finish; /* psergey-note: if this returns error, we will assert in net_send_statement() */ + if ((rc= join_tab_execution_startup(join_tab)) < 0) + goto finish2; + /* Prepare to retrieve all records of the joined table */ + if ((error= join_tab_scan->open())) + { + /* + TODO: if we get here, we will assert in net_send_statement(). Add test + coverage and fix. + */ + goto finish; + } + while (!(error= join_tab_scan->next())) { if (join->thd->killed) @@ -2199,6 +2222,7 @@ enum_nested_loop_state JOIN_CACHE::join_matching_records(bool skip_last) finish: if (error) rc= error < 0 ? NESTED_LOOP_NO_MORE_ROWS: NESTED_LOOP_ERROR; +finish2: join_tab_scan->close(); return rc; } @@ -3191,12 +3215,7 @@ uint JOIN_CACHE_HASHED::get_next_key(uchar ** key) int JOIN_TAB_SCAN::open() { - JOIN_TAB *bound= join_tab-cache->tables; - for (JOIN_TAB *tab= join_tab-1; tab != bound && !tab->cache; tab--) - { - tab->status= tab->table->status; - tab->table->status= 0; - } + save_or_restore_used_tabs(join_tab, FALSE); is_first_record= TRUE; return join_init_read_record(join_tab); } @@ -3251,6 +3270,49 @@ int JOIN_TAB_SCAN::next() } +/* + Walk back in join order from join_tab until we encounter a join tab with + tab->cache!=NULL, and save/restore tab->table->status along the way. + + @param save TRUE save + FALSE restore +*/ + +static void save_or_restore_used_tabs(JOIN_TAB *join_tab, bool save) +{ + JOIN_TAB *first= join_tab->bush_root_tab? + join_tab->bush_root_tab->bush_children->start : + join_tab->join->join_tab + join_tab->join->const_tables; + + for (JOIN_TAB *tab= join_tab-1; tab != first && !tab->cache; tab--) + { + if (tab->bush_children) + { + for (JOIN_TAB *child= tab->bush_children->start; + child != tab->bush_children->end; + child++) + { + if (save) + child->table->status= child->status; + else + { + tab->status= tab->table->status; + tab->table->status= 0; + } + } + } + + if (save) + tab->table->status= tab->status; + else + { + tab->status= tab->table->status; + tab->table->status= 0; + } + } +} + + /* Perform finalizing actions for a scan over the table records @@ -3267,9 +3329,7 @@ int JOIN_TAB_SCAN::next() void JOIN_TAB_SCAN::close() { - JOIN_TAB *bound= join_tab-cache->tables; - for (JOIN_TAB *tab= join_tab-1; tab != bound && !tab->cache; tab--) - tab->table->status= tab->status; + save_or_restore_used_tabs(join_tab, TRUE); } @@ -3669,12 +3729,7 @@ int JOIN_TAB_SCAN_MRR::open() /* Dynamic range access is never used with BKA */ DBUG_ASSERT(join_tab->use_quick != 2); - JOIN_TAB *bound= join_tab-cache->tables; - for (JOIN_TAB *tab= join_tab-1; tab != bound && !tab->cache; tab--) - { - tab->status= tab->table->status; - tab->table->status= 0; - } + save_or_restore_used_tabs(join_tab, FALSE); init_mrr_buff(); diff --git a/sql/sql_join_cache.h b/sql/sql_join_cache.h index 5498192122f..ad8641cf0cd 100644 --- a/sql/sql_join_cache.h +++ b/sql/sql_join_cache.h @@ -121,11 +121,14 @@ protected: */ JOIN *join; - /* - Cardinality of the range of join tables whose fields can be put into the - cache. A table from the range not necessarily contributes to the cache. + /* + JOIN_TAB of the first table that can have it's fields in the join cache. + That is, tables in the [start_tab, tab) range can have their fields in the + join cache. + If a join tab in the range represents an SJM-nest, then all tables from the + nest can have their fields in the join cache, too. */ - uint tables; + JOIN_TAB *start_tab; /* The total number of flag and data fields that can appear in a record @@ -647,8 +650,6 @@ public: buff= 0; } - JOIN_TAB *get_next_table(JOIN_TAB *tab); - friend class JOIN_CACHE_HASHED; friend class JOIN_CACHE_BNL; friend class JOIN_CACHE_BKA; diff --git a/sql/sql_select.cc b/sql/sql_select.cc index 012ddea421a..4ebdb9ed562 100644 --- a/sql/sql_select.cc +++ b/sql/sql_select.cc @@ -100,7 +100,7 @@ static void revise_cache_usage(JOIN_TAB *join_tab); static bool make_join_readinfo(JOIN *join, ulonglong options, uint no_jbuf_after); static bool only_eq_ref_tables(JOIN *join, ORDER *order, table_map tables); static void update_depend_map(JOIN *join); -static void update_depend_map(JOIN *join, ORDER *order); +static void update_depend_map_for_order(JOIN *join, ORDER *order); static ORDER *remove_const(JOIN *join,ORDER *first_order,COND *cond, bool change_list, bool *simple_order); static int return_zero_rows(JOIN *join, select_result *res,TABLE_LIST *tables, @@ -238,8 +238,6 @@ static void add_group_and_distinct_keys(JOIN *join, JOIN_TAB *join_tab); void get_partial_join_cost(JOIN *join, uint idx, double *read_time_arg, double *record_count_arg); static uint make_join_orderinfo(JOIN *join); -static int -join_read_record_no_init(JOIN_TAB *tab); Item_equal *find_item_equal(COND_EQUAL *cond_equal, Field *field, bool *inherited_fl); @@ -550,7 +548,7 @@ JOIN::prepare(Item ***rref_pointer_array, table_ptr; table_ptr= table_ptr->next_leaf) { - tables++; /* Count the number of tables in the join. */ + table_count++; /* Count the number of tables in the join. */ /* If the query uses implicit grouping where the select list contains both aggregate functions and non-aggregate fields, any non-aggregated field @@ -880,7 +878,7 @@ JOIN::optimize() "Impossible HAVING" : "Impossible WHERE")); zero_result_cause= having_value == Item::COND_FALSE ? "Impossible HAVING" : "Impossible WHERE"; - tables= 0; + table_count= 0; error= 0; goto setup_subq_exit; } @@ -930,7 +928,7 @@ JOIN::optimize() { DBUG_PRINT("info",("No matching min/max row")); zero_result_cause= "No matching min/max row"; - tables= 0; + table_count= 0; error=0; goto setup_subq_exit; } @@ -944,14 +942,14 @@ JOIN::optimize() { DBUG_PRINT("info",("No matching min/max row")); zero_result_cause= "No matching min/max row"; - tables= 0; + table_count= 0; error=0; goto setup_subq_exit; } DBUG_PRINT("info",("Select tables optimized away")); zero_result_cause= "Select tables optimized away"; tables_list= 0; // All tables resolved - const_tables= tables; + const_tables= table_count; /* Extract all table-independent conditions and replace the WHERE clause with them. All other conditions were computed by opt_sum_query @@ -1007,7 +1005,7 @@ JOIN::optimize() else { /* Remove distinct if only const tables */ - select_distinct= select_distinct && (const_tables != tables); + select_distinct= select_distinct && (const_tables != table_count); } thd_proc_info(thd, "preparing"); @@ -1074,8 +1072,9 @@ JOIN::optimize() /* Perform the optimization on fields evaluation mentioned above for all on expressions. - */ - for (JOIN_TAB *tab= join_tab + const_tables; tab < join_tab + tables ; tab++) + */ + for (JOIN_TAB *tab= first_linear_tab(this, WITHOUT_CONST_TABLES); tab; + tab= next_linear_tab(this, tab, WITH_BUSH_ROOTS)) { if (*tab->on_expr_ref) { @@ -1084,26 +1083,24 @@ JOIN::optimize() map2table); (*tab->on_expr_ref)->update_used_tables(); } - - } /* Perform the optimization on fields evaliation mentioned above for all used ref items. */ - for (JOIN_TAB *tab= join_tab + const_tables; tab < join_tab + tables; tab++) + for (JOIN_TAB *tab= first_linear_tab(this, WITHOUT_CONST_TABLES); tab; + tab= next_linear_tab(this, tab, WITH_BUSH_ROOTS)) { uint key_copy_index=0; for (uint i=0; i < tab->ref.key_parts; i++) { - Item **ref_item_ptr= tab->ref.items+i; Item *ref_item= *ref_item_ptr; if (!ref_item->used_tables() && !(select_options & SELECT_DESCRIBE)) continue; COND_EQUAL *equals= tab->first_inner ? tab->first_inner->cond_equal : - cond_equal; + cond_equal; ref_item= substitute_for_best_equal_field(ref_item, equals, map2table); ref_item->update_used_tables(); if (*ref_item_ptr != ref_item) @@ -1112,14 +1109,14 @@ JOIN::optimize() Item *item= ref_item->real_item(); store_key *key_copy= tab->ref.key_copy[key_copy_index]; if (key_copy->type() == store_key::FIELD_STORE_KEY) - { + { store_key_field *field_copy= ((store_key_field *)key_copy); field_copy->change_source_field((Item_field *) item); } } key_copy_index++; } - } + } if (conds && const_table_map != found_const_table_map && (select_options & SELECT_DESCRIBE)) @@ -1166,7 +1163,7 @@ JOIN::optimize() The FROM clause must contain a single non-constant table. */ - if (tables - const_tables == 1 && (group_list || select_distinct) && + if (table_count - const_tables == 1 && (group_list || select_distinct) && !tmp_table_param.sum_func_count && (!join_tab[const_tables].select || !join_tab[const_tables].select->quick || @@ -1217,7 +1214,7 @@ JOIN::optimize() if (! hidden_group_fields && rollup.state == ROLLUP::STATE_NONE) select_distinct=0; } - else if (select_distinct && tables - const_tables == 1 && + else if (select_distinct && table_count - const_tables == 1 && rollup.state == ROLLUP::STATE_NONE) { /* @@ -1346,7 +1343,7 @@ JOIN::optimize() When the WITH ROLLUP modifier is present, we cannot skip temporary table creation for the DISTINCT clause just because there are only const tables. */ - need_tmp= ((const_tables != tables && + need_tmp= ((const_tables != table_count && ((select_distinct || !simple_order || !simple_group) || (group_list && order) || test(select_options & OPTION_BUFFER_RESULT))) || @@ -1360,7 +1357,7 @@ JOIN::optimize() Yet the current implementation of FORCE INDEX hints does not allow us to do it in a clean manner. */ - no_jbuf_after= 1 ? tables : make_join_orderinfo(this); + no_jbuf_after= 1 ? table_count : make_join_orderinfo(this); select_opts_for_readinfo= (select_options & (SELECT_DESCRIBE | SELECT_NO_JOIN_CACHE)) | @@ -1392,13 +1389,16 @@ JOIN::optimize() */ if (need_tmp || select_distinct || group_list || order) { - for (uint i = const_tables; i < tables; i++) - join_tab[i].table->prepare_for_position(); + for (uint i= 0; i < table_count; i++) + { + if (!(table[i]->map & const_table_map)) + table[i]->prepare_for_position(); + } } DBUG_EXECUTE("info",TEST_join(this);); - if (const_tables != tables) + if (const_tables != table_count) { /* Because filesort always does a full table scan or a quick range scan @@ -1573,7 +1573,7 @@ JOIN::optimize() if (exec_tmp_table1->distinct) { table_map used_tables= thd->used_tables; - JOIN_TAB *last_join_tab= join_tab+tables-1; + JOIN_TAB *last_join_tab= join_tab + top_join_tab_count - 1; do { if (used_tables & last_join_tab->table->map) @@ -1644,9 +1644,8 @@ bool JOIN::setup_subquery_caches() if (conds) conds= conds->transform(&Item::expr_cache_insert_transformer, (uchar*) thd); - for (JOIN_TAB *tab= join_tab + const_tables; - tab < join_tab + tables ; - tab++) + for (JOIN_TAB *tab= first_linear_tab(this, WITHOUT_CONST_TABLES); + tab; tab= next_linear_tab(this, tab, WITH_BUSH_ROOTS)) { if (tab->select_cond) tab->select_cond= @@ -1800,12 +1799,17 @@ JOIN::reinit() set_items_ref_array(items0); if (join_tab_save) - memcpy(join_tab, join_tab_save, sizeof(JOIN_TAB) * tables); + memcpy(join_tab, join_tab_save, sizeof(JOIN_TAB) * table_count); /* need to reset ref access state (see join_read_key) */ if (join_tab) - for (uint i= 0; i < tables; i++) - join_tab[i].ref.key_err= TRUE; + { + for (JOIN_TAB *tab= first_linear_tab(this, WITH_CONST_TABLES); tab; + tab= next_linear_tab(this, tab, WITH_BUSH_ROOTS)) + { + tab->ref.key_err= TRUE; + } + } if (tmp_join) restore_tmp(); @@ -1862,7 +1866,7 @@ JOIN::save_join_tab() if (!join_tab_save && select_lex->master_unit()->uncacheable) { if (!(join_tab_save= (JOIN_TAB*)thd->memdup((uchar*) join_tab, - sizeof(JOIN_TAB) * tables))) + sizeof(JOIN_TAB) * table_count))) return 1; } return 0; @@ -1902,7 +1906,7 @@ JOIN::exec() } (void) result->prepare2(); // Currently, this cannot fail. - if (!tables_list && (tables || !select_lex->with_sum_func)) + if (!tables_list && (table_count || !select_lex->with_sum_func)) { // Only test of functions if (select_options & SELECT_DESCRIBE) select_describe(this, FALSE, FALSE, FALSE, @@ -1955,7 +1959,7 @@ JOIN::exec() FOUND_ROWS() may be called. Never reset the examined row count here. It must be accumulated from all join iterations of all join parts. */ - if (tables) + if (table_count) thd->limit_found_rows= 0; if (zero_result_cause) @@ -1992,7 +1996,7 @@ JOIN::exec() } if (order && (order != group_list || !(select_options & SELECT_BIG_RESULT)) && - (const_tables == tables || + (const_tables == table_count || ((simple_order || skip_sort_order) && test_if_skip_sort_order(&join_tab[const_tables], order, select_limit, 0, @@ -2003,7 +2007,7 @@ JOIN::exec() select_describe(this, need_tmp, order != 0 && !skip_sort_order, select_distinct, - !tables ? "No tables used" : NullS); + !table_count ? "No tables used" : NullS); DBUG_VOID_RETURN; } @@ -2037,7 +2041,7 @@ JOIN::exec() thd_proc_info(thd, copy_to_tmp_table); DBUG_PRINT("info", ("%s", thd->proc_info)); if (!curr_join->sort_and_group && - curr_join->const_tables != curr_join->tables) + curr_join->const_tables != curr_join->table_count) { JOIN_TAB *first_tab= curr_join->join_tab + curr_join->const_tables; first_tab->sorted= test(first_tab->loosescan_match_tab); @@ -2208,7 +2212,7 @@ JOIN::exec() DBUG_VOID_RETURN; curr_join->group_list= 0; if (!curr_join->sort_and_group && - curr_join->const_tables != curr_join->tables) + curr_join->const_tables != curr_join->table_count) { JOIN_TAB *first_tab= curr_join->join_tab + curr_join->const_tables; first_tab->sorted= test(first_tab->loosescan_match_tab); @@ -2221,7 +2225,7 @@ JOIN::exec() DBUG_VOID_RETURN; } end_read_record(&curr_join->join_tab->read_record); - curr_join->const_tables= curr_join->tables; // Mark free for cleanup() + curr_join->const_tables= curr_join->table_count; // Mark free for cleanup() curr_join->join_tab[0].table= 0; // Table is freed // No sum funcs anymore @@ -2393,7 +2397,7 @@ JOIN::exec() WHERE clause for any tables after the sorted one. */ JOIN_TAB *curr_table= &curr_join->join_tab[curr_join->const_tables+1]; - JOIN_TAB *end_table= &curr_join->join_tab[curr_join->tables]; + JOIN_TAB *end_table= &curr_join->join_tab[curr_join->top_join_tab_count]; for (; curr_table < end_table ; curr_table++) { /* @@ -2431,7 +2435,7 @@ JOIN::exec() curr_join->group_list ? TRUE : FALSE)) DBUG_VOID_RETURN; sortorder= curr_join->sortorder; - if (curr_join->const_tables != curr_join->tables && + if (curr_join->const_tables != curr_join->table_count && !curr_join->join_tab[curr_join->const_tables].table->sort.io_cache) { /* @@ -2453,7 +2457,7 @@ JOIN::exec() curr_join->fields= curr_fields_list; curr_join->procedure= procedure; - if (is_top_level_join() && thd->cursor && tables != const_tables) + if (is_top_level_join() && thd->cursor && table_count != const_tables) { /* We are here if this is JOIN::exec for the last select of the main unit @@ -2520,9 +2524,11 @@ JOIN::destroy() { if (join_tab != tmp_join->join_tab) { - JOIN_TAB *tab, *end; - for (tab= join_tab, end= tab+tables ; tab != end ; tab++) + for (JOIN_TAB *tab= first_linear_tab(this, WITH_CONST_TABLES); tab; + tab= next_linear_tab(this, tab, WITH_BUSH_ROOTS)) + { tab->cleanup(); + } } tmp_join->tmp_join= 0; /* @@ -2763,7 +2769,7 @@ bool JOIN::setup_subquery_materialization() { Item_in_subselect *in_subs= (Item_in_subselect*) subquery_predicate; if (in_subs->exec_method == Item_in_subselect::MATERIALIZATION && - in_subs->setup_engine()) + in_subs->setup_engine(FALSE)) return TRUE; } } @@ -2848,8 +2854,10 @@ make_join_statistics(JOIN *join, TABLE_LIST *tables_arg, COND *conds, JOIN_TAB *stat_vector[MAX_TABLES+1]; DBUG_ENTER("make_join_statistics"); - table_count=join->tables; - stat=(JOIN_TAB*) join->thd->calloc(sizeof(JOIN_TAB)*table_count); + LINT_INIT(table); /* inited in all loops */ + table_count=join->table_count; + + stat=(JOIN_TAB*) join->thd->calloc(sizeof(JOIN_TAB)*(table_count)); stat_ref=(JOIN_TAB**) join->thd->alloc(sizeof(JOIN_TAB*)*MAX_TABLES); table_vector=(TABLE**) join->thd->alloc(sizeof(TABLE*)*(table_count*2)); if (!stat || !stat_ref || !table_vector) @@ -2886,10 +2894,8 @@ make_join_statistics(JOIN *join, TABLE_LIST *tables_arg, COND *conds, bzero((char*) table->const_key_parts, sizeof(key_part_map)*table->s->keys); all_table_map|= table->map; s->join=join; - s->info=0; // For describe s->dependent= tables->dep_tables; - s->key_dependent= 0; if (tables->schema_table) table->file->stats.records= 2; table->quick_condition_rows= table->file->stats.records; @@ -2899,9 +2905,11 @@ make_join_statistics(JOIN *join, TABLE_LIST *tables_arg, COND *conds, { /* s is the only inner table of an outer join */ #ifdef WITH_PARTITION_STORAGE_ENGINE - if ((!table->file->stats.records || table->no_partitions_used) && !embedding) + if (!table->is_filled_at_execution() && + (!table->file->stats.records || table->no_partitions_used) && !embedding) #else - if (!table->file->stats.records && !embedding) + if (!table->is_filled_at_execution() && + !table->file->stats.records && !embedding) #endif { // Empty table s->dependent= 0; // Ignore LEFT JOIN depend. @@ -2935,7 +2943,8 @@ make_join_statistics(JOIN *join, TABLE_LIST *tables_arg, COND *conds, #else const bool no_partitions_used= FALSE; #endif - if ((table->s->system || table->file->stats.records <= 1 || + if (!table->is_filled_at_execution() && + (table->s->system || table->file->stats.records <= 1 || no_partitions_used) && !s->dependent && (table->file->ha_table_flags() & HA_STATS_RECORDS_IS_EXACT) && @@ -2988,7 +2997,7 @@ make_join_statistics(JOIN *join, TABLE_LIST *tables_arg, COND *conds, { if (s->dependent & s->table->map) { - join->tables=0; // Don't use join->table + join->table_count=0; // Don't use join->table my_message(ER_WRONG_OUTER_JOIN, ER(ER_WRONG_OUTER_JOIN), MYF(0)); goto error; } @@ -2997,7 +3006,7 @@ make_join_statistics(JOIN *join, TABLE_LIST *tables_arg, COND *conds, } if (conds || outer_join) - if (update_ref_and_keys(join->thd, keyuse_array, stat, join->tables, + if (update_ref_and_keys(join->thd, keyuse_array, stat, join->table_count, conds, join->cond_equal, ~outer_join, join->select_lex, &sargables)) goto error; @@ -3047,6 +3056,9 @@ make_join_statistics(JOIN *join, TABLE_LIST *tables_arg, COND *conds, { table=s->table; + if (table->is_filled_at_execution()) + continue; + /* If equi-join condition by a key is null rejecting and after a substitution of a const table the key value happens to be null @@ -3204,15 +3216,34 @@ make_join_statistics(JOIN *join, TABLE_LIST *tables_arg, COND *conds, for (s=stat ; s < stat_end ; s++) { + s->startup_cost= 0; if (s->type == JT_SYSTEM || s->type == JT_CONST) { /* Only one matching row */ - s->found_records=s->records=s->read_time=1; s->worst_seeks=1.0; + s->found_records= s->records= 1; + s->read_time=1.0; + s->worst_seeks=1.0; continue; } /* Approximate found rows and time to read them */ - s->found_records=s->records=s->table->file->stats.records; - s->read_time=(ha_rows) s->table->file->scan_time(); + + if (s->table->is_filled_at_execution()) + { + get_delayed_table_estimates(s->table, &s->records, &s->read_time, + &s->startup_cost); + s->found_records= s->records; + table->quick_condition_rows=s->records; + } + else + { + s->found_records= s->records= s->table->file->stats.records; + s->read_time= s->table->file->scan_time(); + /* + table->quick_condition_rows has already been set to + table->file->stats.records + */ + } + /* Set a max range of how many seeks we can expect when using keys @@ -3235,10 +3266,11 @@ make_join_statistics(JOIN *join, TABLE_LIST *tables_arg, COND *conds, Don't do range analysis if we're on the inner side of an outer join (2). Do range analysis if we're on the inner side of a semi-join (3). */ - if (!s->const_keys.is_clear_all() && // (1) - (!s->table->pos_in_table_list->embedding || // (2) - (s->table->pos_in_table_list->embedding && // (3) - s->table->pos_in_table_list->embedding->sj_on_expr))) // (3) + if (!s->const_keys.is_clear_all() && // (1) + (!s->table->pos_in_table_list->embedding || // (2) + (s->table->pos_in_table_list->embedding && // (3) + s->table->pos_in_table_list->embedding->sj_on_expr)) && // (3) + !s->table->is_filled_at_execution()) { ha_rows records; SQL_SELECT *select; @@ -3276,7 +3308,7 @@ make_join_statistics(JOIN *join, TABLE_LIST *tables_arg, COND *conds, if (records != HA_POS_ERROR) { s->found_records=records; - s->read_time= (ha_rows) (s->quick ? s->quick->read_time : 0.0); + s->read_time= s->quick ? s->quick->read_time : 0.0; } delete select; } @@ -3287,18 +3319,18 @@ make_join_statistics(JOIN *join, TABLE_LIST *tables_arg, COND *conds, join->join_tab=stat; join->map2table=stat_ref; - join->table= join->all_tables=table_vector; + join->table= table_vector; join->const_tables=const_count; join->found_const_table_map=found_const_table_map; - if (join->const_tables != join->tables) + if (join->const_tables != join->table_count) optimize_keyuse(join, keyuse_array); if (optimize_semijoin_nests(join, all_table_map)) DBUG_RETURN(TRUE); /* purecov: inspected */ /* Find an optimal join order of the non-constant tables. */ - if (join->const_tables != join->tables) + if (join->const_tables != join->table_count) { if (choose_plan(join, all_table_map & ~join->const_table_map)) goto error; @@ -4516,7 +4548,7 @@ static void optimize_keyuse(JOIN *join, DYNAMIC_ARRAY *keyuse_array) for (tablenr=0 ; ! (map & 1) ; map>>=1, tablenr++) ; if (map == 1) // Only one table { - TABLE *tmp_table=join->all_tables[tablenr]; + TABLE *tmp_table=join->table[tablenr]; keyuse->ref_table_rows= max(tmp_table->file->stats.records, 100); } } @@ -5073,8 +5105,9 @@ best_access_path(JOIN *join, else tmp= best_time; // Do nothing } - loose_scan_opt.check_ref_access_part2(key, start_key, records, tmp); + tmp += s->startup_cost; + loose_scan_opt.check_ref_access_part2(key, start_key, records, tmp); } /* not ft_key */ if (tmp + 0.0001 < best_time - records/(double) TIME_FOR_COMPARE) { @@ -5151,13 +5184,19 @@ best_access_path(JOIN *join, Since we have a 'ref' access path, and FORCE INDEX instructs us to choose it over ALL/index, there is no need to consider a full table scan. + (5) Non-flattenable semi-joins: don't consider doing a scan of temporary + table if we had an option to make lookups into it. In real-world cases, + lookups are cheaper than full scans, but when the table is small, they + can be [considered to be] more expensive, which causes lookups not to + be used for cases with small datasets, which is annoying. */ if ((records >= s->found_records || best > s->read_time) && // (1) !(s->quick && best_key && s->quick->index == best_key->key && // (2) best_max_key_part >= s->table->quick_key_parts[best_key->key]) &&// (2) !((s->table->file->ha_table_flags() & HA_TABLE_SCAN_ON_INDEX) && // (3) ! s->table->covering_keys.is_clear_all() && best_key && !s->quick) &&// (3) - !(s->table->force_index && best_key && !s->quick)) // (4) + !(s->table->force_index && best_key && !s->quick) && // (4) + !(best_key && s->table->pos_in_table_list->jtbm_subselect)) // (5) { // Check full join ha_rows rnd_records= matching_candidates_in_table(s, found_constraint); @@ -5186,8 +5225,7 @@ best_access_path(JOIN *join, } else { - /* Estimate cost of reading table. */ - tmp= s->table->file->scan_time(); + tmp= s->read_time; if ((s->table->map & join->outer_join) || disable_jbuf) // Can't use join cache { /* @@ -5216,6 +5254,7 @@ best_access_path(JOIN *join, } } + tmp += s->startup_cost; /* We estimate the cost of evaluating WHERE clause for found records as record_count * rnd_records / TIME_FOR_COMPARE. This cost plus @@ -5239,7 +5278,7 @@ best_access_path(JOIN *join, join->outer_join))); } } - + /* Update the cost information for the current partial plan */ pos->records_read= records; pos->read_time= best; @@ -5317,7 +5356,7 @@ choose_plan(JOIN *join, table_map join_tables) jtab_sort_func= straight_join ? join_tab_cmp_straight : join_tab_cmp; } my_qsort2(join->best_ref + join->const_tables, - join->tables - join->const_tables, sizeof(JOIN_TAB*), + join->table_count - join->const_tables, sizeof(JOIN_TAB*), jtab_sort_func, (void*)join->emb_sjm_nest); join->cur_sj_inner_tables= 0; @@ -5493,7 +5532,7 @@ join_tab_cmp_embedded_first(const void *emb, const void* ptr1, const void* ptr2 static uint determine_search_depth(JOIN *join) { - uint table_count= join->tables - join->const_tables; + uint table_count= join->table_count - join->const_tables; uint search_depth; /* TODO: this value should be determined dynamically, based on statistics: */ uint max_tables_for_exhaustive_opt= 7; @@ -6196,8 +6235,8 @@ void JOIN_TAB::calc_used_field_length(bool max_fl) set_if_smaller(rec_length, table->file->stats.mean_rec_length); /* - psergey-todo: why we don't count here rowid that we might need to store - when using DuplicateElimination? + TODO: why we don't count here rowid that we might need to store when + using DuplicateElimination? */ used_fields=fields; used_fieldlength=rec_length; @@ -6381,6 +6420,210 @@ prev_record_reads(JOIN *join, uint idx, table_map found_ref) /* + Enumerate join tabs in breadth-first fashion, including const tables. +*/ + +JOIN_TAB *first_breadth_first_tab(JOIN *join) +{ + return join->join_tab; /* There's always one (i.e. first) table */ +} + + +JOIN_TAB *next_breadth_first_tab(JOIN *join, JOIN_TAB *tab) +{ + if (!tab->bush_root_tab) + { + /* We're at top level. Get the next top-level tab */ + tab++; + if (tab < join->join_tab + join->top_join_tab_count) + return tab; + + /* No more top-level tabs. Switch to enumerating SJM nest children */ + tab= join->join_tab; + } + else + { + /* We're inside of an SJM nest */ + if (!tab->last_leaf_in_bush) + { + /* There's one more table in the nest, return it. */ + return ++tab; + } + else + { + /* + There are no more tables in this nest. Get out of it and then we'll + proceed to the next nest. + */ + tab= tab->bush_root_tab + 1; + } + } + + /* + Ok, "tab" points to a top-level table, and we need to find the next SJM + nest and enter it. + */ + for (; tab < join->join_tab + join->top_join_tab_count; tab++) + { + if (tab->bush_children) + return tab->bush_children->start; + } + return NULL; +} + + +JOIN_TAB *first_top_level_tab(JOIN *join, enum enum_with_const_tables with_const) +{ + JOIN_TAB *tab= join->join_tab; + if (with_const == WITH_CONST_TABLES) + { + if (join->const_tables == join->table_count) + return NULL; + tab += join->const_tables; + } + return tab; +} + + +JOIN_TAB *next_top_level_tab(JOIN *join, JOIN_TAB *tab) +{ + tab= next_breadth_first_tab(join, tab); + if (tab->bush_root_tab) + tab= NULL; + return tab; +} + + +JOIN_TAB *first_linear_tab(JOIN *join, enum enum_with_const_tables const_tbls) +{ + JOIN_TAB *first= join->join_tab; + if (const_tbls == WITHOUT_CONST_TABLES) + first+= join->const_tables; + if (first < join->join_tab + join->top_join_tab_count) + return first; + return NULL; /* All tables were const tables */ +} + + +/* + A helper function to loop over all join's join_tab in sequential fashion + + DESCRIPTION + Depending on include_bush_roots parameter, JOIN_TABs that represent + SJM-scan/lookups are either returned or omitted. + + SJM-Bush children are returned right after (or in place of) their container + join tab (TODO: does anybody depend on this? A: make_join_readinfo() seems + to) + + For example, if we have this structure: + + ot1--ot2--sjm1----------------ot3-... + | + +--it1--it2--it3 + + calls to next_linear_tab( include_bush_roots=TRUE) will return: + + ot1 ot2 sjm1 it1 it2 it3 ot3 ... + + while calls to next_linear_tab( include_bush_roots=FALSE) will return: + + ot1 ot2 it1 it2 it3 ot3 ... + + (note that sjm1 won't be returned). +*/ + +JOIN_TAB *next_linear_tab(JOIN* join, JOIN_TAB* tab, + enum enum_with_bush_roots include_bush_roots) +{ + if (include_bush_roots == WITH_BUSH_ROOTS && tab->bush_children) + { + /* This JOIN_TAB is a SJM nest; Start from first table in nest */ + return tab->bush_children->start; + } + + DBUG_ASSERT(!tab->last_leaf_in_bush || tab->bush_root_tab); + + if (tab->bush_root_tab) /* Are we inside an SJM nest */ + { + /* Inside SJM nest */ + if (!tab->last_leaf_in_bush) + return tab+1; /* Return next in nest */ + /* Continue from the sjm on the top level */ + tab= tab->bush_root_tab; + } + + /* If no more JOIN_TAB's on the top level */ + if (++tab == join->join_tab + join->top_join_tab_count) + return NULL; + + if (include_bush_roots == WITHOUT_BUSH_ROOTS && tab->bush_children) + { + /* This JOIN_TAB is a SJM nest; Start from first table in nest */ + tab= tab->bush_children->start; + } + return tab; +} + + +/* + Start to iterate over all join tables in bush-children-first order, excluding + the const tables (see next_depth_first_tab() comment for details) +*/ + +JOIN_TAB *first_depth_first_tab(JOIN* join) +{ + JOIN_TAB* tab; + /* This means we're starting the enumeration */ + if (join->const_tables == join->top_join_tab_count) + return NULL; + + tab= join->join_tab + join->const_tables; + + return (tab->bush_children) ? tab->bush_children->start : tab; +} + + +/* + A helper function to iterate over all join tables in bush-children-first order + + DESCRIPTION + + For example, for this join plan + + ot1--ot2--sjm1------------ot3-... + | + | + it1--it2--it3 + + call to first_depth_first_tab() will return ot1, and subsequent calls to + next_depth_first_tab() will return: + + ot2 it1 it2 it3 sjm ot3 ... +*/ + +JOIN_TAB *next_depth_first_tab(JOIN* join, JOIN_TAB* tab) +{ + /* If we're inside SJM nest and have reached its end, get out */ + if (tab->last_leaf_in_bush) + return tab->bush_root_tab; + + /* Move to next tab in the array we're traversing */ + tab++; + + if (tab == join->join_tab +join->top_join_tab_count) + return NULL; /* Outside SJM nest and reached EOF */ + + if (tab->bush_children) + return tab->bush_children->start; + + return tab; +} + + +static Item * const null_ptr= NULL; + +/* Set up join struct according to the picked join order in SYNOPSIS @@ -6395,6 +6638,11 @@ prev_record_reads(JOIN *join, uint idx, table_map found_ref) - create join->join_tab array and put there the JOIN_TABs in the join order - create data structures describing ref access methods. + NOTE + In this function we switch from pre-join-optimization JOIN_TABs to + post-join-optimization JOIN_TABs. This is achieved by copying the entire + JOIN_TAB objects. + RETURN FALSE OK TRUE Out of memory @@ -6403,7 +6651,7 @@ prev_record_reads(JOIN *join, uint idx, table_map found_ref) static bool get_best_combination(JOIN *join) { - uint i,tablenr; + uint tablenr; table_map used_tables; JOIN_TAB *join_tab,*j; KEYUSE *keyuse; @@ -6411,7 +6659,7 @@ get_best_combination(JOIN *join) THD *thd=join->thd; DBUG_ENTER("get_best_combination"); - table_count=join->tables; + table_count=join->table_count; if (!(join->join_tab=join_tab= (JOIN_TAB*) thd->alloc(sizeof(JOIN_TAB)*table_count))) DBUG_RETURN(TRUE); @@ -6422,10 +6670,64 @@ get_best_combination(JOIN *join) fix_semijoin_strategies_for_picked_join_order(join); + JOIN_TAB_RANGE *root_range; + if (!(root_range= new JOIN_TAB_RANGE)) + DBUG_RETURN(TRUE); + root_range->start= join->join_tab; + /* root_range->end will be set later */ + join->join_tab_ranges.empty(); + + if (join->join_tab_ranges.push_back(root_range)) + DBUG_RETURN(TRUE); + + JOIN_TAB *sjm_nest_end= NULL; + JOIN_TAB *sjm_nest_root= NULL; + for (j=join_tab, tablenr=0 ; tablenr < table_count ; tablenr++,j++) { TABLE *form; + POSITION *cur_pos= &join->best_positions[tablenr]; + if (cur_pos->sj_strategy == SJ_OPT_MATERIALIZE || + cur_pos->sj_strategy == SJ_OPT_MATERIALIZE_SCAN) + { + /* + Ok, we've entered an SJ-Materialization semi-join (note that this can't + be done recursively, semi-joins are not allowed to be nested). + 1. Put into main join order a JOIN_TAB that represents a lookup or scan + in the temptable. + */ + bzero(j, sizeof(JOIN_TAB)); + j->join= join; + j->table= NULL; //temporary way to tell SJM tables from others. + j->ref.key = -1; + j->on_expr_ref= (Item**) &null_ptr; + j->keys= key_map(1); /* The unique index is always in 'possible keys' in EXPLAIN */ + + /* + 2. Proceed with processing SJM nest's join tabs, putting them into the + sub-order + */ + SJ_MATERIALIZATION_INFO *sjm= cur_pos->table->emb_sj_nest->sj_mat_info; + j->records= j->records_read= (ha_rows)(sjm->is_sj_scan? sjm->rows : 1); + JOIN_TAB *jt; + JOIN_TAB_RANGE *jt_range; + if (!(jt= (JOIN_TAB*)join->thd->alloc(sizeof(JOIN_TAB)*sjm->tables)) || + !(jt_range= new JOIN_TAB_RANGE)) + DBUG_RETURN(TRUE); + jt_range->start= jt; + jt_range->end= jt + sjm->tables; + join->join_tab_ranges.push_back(jt_range); + j->bush_children= jt_range; + sjm_nest_end= jt + sjm->tables; + sjm_nest_root= j; + + j= jt; + } + *j= *join->best_positions[tablenr].table; + + j->bush_root_tab= sjm_nest_root; + form=join->table[tablenr]=j->table; used_tables|= form->map; form->reginfo.join_tab=j; @@ -6433,14 +6735,14 @@ get_best_combination(JOIN *join) form->reginfo.not_exists_optimize=0; // Only with LEFT JOIN DBUG_PRINT("info",("type: %d", j->type)); if (j->type == JT_CONST) - continue; // Handled in make_join_stat.. + goto loop_end; // Handled in make_join_stat.. j->loosescan_match_tab= NULL; //non-nulls will be set later j->ref.key = -1; j->ref.key_parts=0; if (j->type == JT_SYSTEM) - continue; + goto loop_end; if ( !(keyuse= join->best_positions[tablenr].key) || (join->best_positions[tablenr].sj_strategy == SJ_OPT_LOOSE_SCAN)) { @@ -6451,10 +6753,27 @@ get_best_combination(JOIN *join) } else if (create_ref_for_key(join, j, keyuse, used_tables)) DBUG_RETURN(TRUE); // Something went wrong + loop_end: + /* + Save records_read in JOIN_TAB so that select_describe()/etc don't have + to access join->best_positions[]. + */ + j->records_read= (ha_rows)join->best_positions[tablenr].records_read; + join->map2table[j->table->tablenr]= j; + + /* If we've reached the end of sjm nest, switch back to main sequence */ + if (j + 1 == sjm_nest_end) + { + j->last_leaf_in_bush= TRUE; + j= sjm_nest_root; + sjm_nest_root= NULL; + sjm_nest_end= NULL; + } } + root_range->end= j; - for (i=0 ; i < table_count ; i++) - join->map2table[join->join_tab[i].table->tablenr]=join->join_tab+i; + join->top_join_tab_count= join->join_tab_ranges.head()->end - + join->join_tab_ranges.head()->start; update_depend_map(join); DBUG_RETURN(0); } @@ -6801,7 +7120,8 @@ JOIN::make_simple_join(JOIN *parent, TABLE *temp_table) join_tab= parent->join_tab_reexec; table= &parent->table_reexec[0]; parent->table_reexec[0]= temp_table; - tables= 1; + table_count= top_join_tab_count= 1; + const_tables= 0; const_table_map= 0; eliminated_tables= 0; @@ -6846,6 +7166,9 @@ JOIN::make_simple_join(JOIN *parent, TABLE *temp_table) join_tab->loosescan_match_tab= NULL; join_tab->emb_sj_nest= NULL; join_tab->pre_idx_push_select_cond= NULL; + join_tab->bush_root_tab= NULL; + join_tab->bush_children= NULL; + join_tab->last_leaf_in_bush= FALSE; bzero((char*) &join_tab->read_record,sizeof(join_tab->read_record)); temp_table->status=0; temp_table->null_row=0; @@ -6926,9 +7249,11 @@ inline void add_cond_and_fix(Item **e1, Item *e2) static void add_not_null_conds(JOIN *join) { DBUG_ENTER("add_not_null_conds"); - for (uint i=join->const_tables ; i < join->tables ; i++) + + for (JOIN_TAB *tab= first_linear_tab(join, WITHOUT_CONST_TABLES); + tab; + tab= next_linear_tab(join, tab, WITH_BUSH_ROOTS)) { - JOIN_TAB *tab=join->join_tab+i; if (tab->type == JT_REF || tab->type == JT_EQ_REF || tab->type == JT_REF_OR_NULL) { @@ -7057,14 +7382,21 @@ add_found_match_trig_cond(JOIN_TAB *tab, COND *cond, JOIN_TAB *root_tab) This function can be called only after the execution plan has been chosen. */ + static void make_outerjoin_info(JOIN *join) { DBUG_ENTER("make_outerjoin_info"); - for (uint i=join->const_tables ; i < join->tables ; i++) + for (JOIN_TAB *tab= first_linear_tab(join, WITHOUT_CONST_TABLES); tab; + tab= next_linear_tab(join, tab, WITHOUT_BUSH_ROOTS)) { - JOIN_TAB *tab=join->join_tab+i; TABLE *table=tab->table; + /* + psergey: The following is probably incorrect, fix it when we get + semi+outer joins processing to work: + */ + if (!table) + continue; TABLE_LIST *tbl= table->pos_in_table_list; TABLE_LIST *embedding= tbl->embedding; @@ -7135,9 +7467,9 @@ make_join_select(JOIN *join,SQL_SELECT *select,COND *cond) */ if (cond) /* Because of QUICK_GROUP_MIN_MAX_SELECT */ { /* there may be a select without a cond. */ - if (join->tables > 1) + if (join->table_count > 1) cond->update_used_tables(); // Tablenr may have changed - if (join->const_tables == join->tables && + if (join->const_tables == join->table_count && thd->lex->current_select->master_unit() == &thd->lex->unit) // not upper level SELECT join->const_table_map|=RAND_TABLE_BIT; @@ -7159,8 +7491,9 @@ make_join_select(JOIN *join,SQL_SELECT *select,COND *cond) add_cond_and_fix(&const_cond, join->join_tab[i].select_cond); DBUG_EXECUTE("where",print_where(const_cond,"constants", QT_ORDINARY);); - for (JOIN_TAB *tab= join->join_tab+join->const_tables; - tab < join->join_tab+join->tables ; tab++) + for (JOIN_TAB *tab= first_linear_tab(join, WITHOUT_CONST_TABLES); + tab; + tab= next_linear_tab(join, tab, WITHOUT_BUSH_ROOTS)) { if (*tab->on_expr_ref) { @@ -7208,16 +7541,22 @@ make_join_select(JOIN *join,SQL_SELECT *select,COND *cond) OUTER_REF_TABLE_BIT | RAND_TABLE_BIT); JOIN_TAB *tab; table_map current_map; - for (uint i=join->const_tables ; i < join->tables ; i++) + uint i= join->const_tables; + for (tab= first_depth_first_tab(join); tab; + tab= next_depth_first_tab(join, tab), i++) { bool is_hj; - tab= join->join_tab+i; /* first_inner is the X in queries like: SELECT * FROM t1 LEFT OUTER JOIN (t2 JOIN t3) ON X */ - JOIN_TAB *first_inner_tab= tab->first_inner; - current_map= tab->table->map; + JOIN_TAB *first_inner_tab= tab->first_inner; + + if (tab->table) + current_map= tab->table->map; + else + current_map= tab->bush_children->start->emb_sj_nest->sj_inner_tables; + bool use_quick_range=0; COND *tmp; @@ -7240,7 +7579,7 @@ make_join_select(JOIN *join,SQL_SELECT *select,COND *cond) Following force including random expression in last table condition. It solve problem with select like SELECT * FROM t1 WHERE rand() > 0.5 */ - if (i == join->tables-1) + if (tab == join->join_tab + join->top_join_tab_count - 1) current_map|= OUTER_REF_TABLE_BIT | RAND_TABLE_BIT; used_tables|=current_map; @@ -7260,16 +7599,29 @@ make_join_select(JOIN *join,SQL_SELECT *select,COND *cond) We will use join cache here : prevent sorting of the first table only and sort at the end. */ - if (i != join->const_tables && join->tables > join->const_tables + 1) + if (i != join->const_tables && join->table_count > join->const_tables + 1) join->full_join= 1; } tmp= NULL; + if (cond) - tmp= make_cond_for_table(cond, used_tables, current_map, FALSE, FALSE); - /* Add conditions added by add_not_null_conds(). */ - if (tab->select_cond) - add_cond_and_fix(&tmp, tab->select_cond); + { + if (tab->bush_children) + { + // Reached the materialization tab + tmp= make_cond_after_sjm(cond, cond, save_used_tables, used_tables); + used_tables= save_used_tables | used_tables; + save_used_tables= 0; + } + else + tmp= make_cond_for_table(cond, used_tables, current_map, FALSE, FALSE); + + /* Add conditions added by add_not_null_conds(). */ + if (tab->select_cond) + add_cond_and_fix(&tmp, tab->select_cond); + } + is_hj= (tab->type == JT_REF || tab->type == JT_EQ_REF) && (join->allowed_join_cache_types & JOIN_CACHE_HASHED_BIT) && @@ -7308,9 +7660,9 @@ make_join_select(JOIN *join,SQL_SELECT *select,COND *cond) if (tmp || !cond || tab->type == JT_REF || tab->type == JT_REF_OR_NULL || tab->type == JT_EQ_REF || first_inner_tab) { - DBUG_EXECUTE("where", - print_where(tmp,tab->table->alias.c_ptr(), - QT_ORDINARY);); + DBUG_EXECUTE("where",print_where(tmp, + tab->table? tab->table->alias.c_ptr() :"sjm-nest", + QT_ORDINARY);); SQL_SELECT *sel= tab->select= ((SQL_SELECT*) thd->memdup((uchar*) select, sizeof(*select))); @@ -7334,15 +7686,19 @@ make_join_select(JOIN *join,SQL_SELECT *select,COND *cond) tab->set_select_cond(tmp, __LINE__); /* Push condition to storage engine if this is enabled and the condition is not guarded */ - if (thd->variables.engine_condition_pushdown && !first_inner_tab) + if (tab->table) { - COND *push_cond= - make_cond_for_table(tmp, current_map, current_map, FALSE, FALSE); - if (push_cond) + tab->table->file->pushed_cond= NULL; /* psergey-todo: why we have removed this but get it with merge? */ + if (thd->variables.engine_condition_pushdown && !first_inner_tab) { - /* Push condition to handler */ - if (!tab->table->file->cond_push(push_cond)) - tab->table->file->pushed_cond= push_cond; + COND *push_cond= + make_cond_for_table(tmp, current_map, current_map, FALSE, FALSE); + if (push_cond) + { + /* Push condition to handler */ + if (!tab->table->file->cond_push(push_cond)) + tab->table->file->pushed_cond= push_cond; + } } } } @@ -7375,7 +7731,7 @@ make_join_select(JOIN *join,SQL_SELECT *select,COND *cond) } tab->quick=0; } - uint ref_key=(uint) sel->head->reginfo.join_tab->ref.key+1; + uint ref_key= sel->head? (uint) sel->head->reginfo.join_tab->ref.key+1 : 0; if (i == join->const_tables && ref_key) { if (!tab->const_keys.is_clear_all() && @@ -7394,12 +7750,12 @@ make_join_select(JOIN *join,SQL_SELECT *select,COND *cond) the index if we are using limit and this is the first table */ - if ((cond && - (!tab->keys.is_subset(tab->const_keys) && i > 0)) || - (!tab->const_keys.is_clear_all() && i == join->const_tables && - join->unit->select_limit_cnt < - join->best_positions[i].records_read && - !(join->select_options & OPTION_FOUND_ROWS))) + if (!tab->table->is_filled_at_execution() && + ((cond && (!tab->keys.is_subset(tab->const_keys) && i > 0)) || + (!tab->const_keys.is_clear_all() && i == join->const_tables && + join->unit->select_limit_cnt < + join->best_positions[i].records_read && + !(join->select_options & OPTION_FOUND_ROWS)))) { /* Join with outer join condition */ COND *orig_cond=sel->cond; @@ -7472,7 +7828,7 @@ make_join_select(JOIN *join,SQL_SELECT *select,COND *cond) } /* - Push down conditions from all on expressions. + Push down conditions from all ON expressions. Each of these conditions are guarded by a variable that turns if off just before null complemented row for outer joins is formed. Thus, the condition from an @@ -7481,8 +7837,9 @@ make_join_select(JOIN *join,SQL_SELECT *select,COND *cond) */ /* First push down constant conditions from on expressions */ - for (JOIN_TAB *join_tab= join->join_tab+join->const_tables; - join_tab < join->join_tab+join->tables ; join_tab++) + for (JOIN_TAB *join_tab= first_linear_tab(join, WITHOUT_CONST_TABLES); + join_tab; + join_tab= next_linear_tab(join, join_tab, WITH_BUSH_ROOTS)) { if (*join_tab->on_expr_ref) { @@ -7507,10 +7864,10 @@ make_join_select(JOIN *join,SQL_SELECT *select,COND *cond) } } - /* Push down non-constant conditions from on expressions */ + /* Push down non-constant conditions from ON expressions */ JOIN_TAB *last_tab= tab; while (first_inner_tab && first_inner_tab->last_inner == last_tab) - { + { /* Table tab is the last inner table of an outer join. An on expression is always attached to it. @@ -7519,8 +7876,19 @@ make_join_select(JOIN *join,SQL_SELECT *select,COND *cond) table_map used_tables2= (join->const_table_map | OUTER_REF_TABLE_BIT | RAND_TABLE_BIT); - for (tab= join->join_tab+join->const_tables; tab <= last_tab ; tab++) + for (JOIN_TAB *tab= first_linear_tab(join, WITHOUT_CONST_TABLES); + tab; + tab= (tab == last_tab)? NULL: next_linear_tab(join, tab, + WITH_BUSH_ROOTS)) { + if (!tab->table) + { + /* + psergey-todo: this is probably incorrect, fix this when we get + correct processing for outer joins + semi joins + */ + continue; + } current_map= tab->table->map; used_tables2|= current_map; COND *tmp_cond= make_cond_for_table(on_expr, used_tables2, @@ -7566,37 +7934,12 @@ make_join_select(JOIN *join,SQL_SELECT *select,COND *cond) (*sel_cond_ref)->update_used_tables(); if (cond_tab->select) cond_tab->select->cond= cond_tab->select_cond; - } + } + //if (tab == last_tab) //psergey-todo: fix this. + // break; } first_inner_tab= first_inner_tab->first_upper; } - - if (save_used_tables && !(used_tables & - ~(tab->emb_sj_nest->sj_inner_tables | - join->const_table_map | PSEUDO_TABLE_BITS))) - { - /* - We have reached the end of semi join nest. That is, the join order - looks like this: - - outer_tbl1 SJ-Materialize(inner_tbl1 ... inner_tblN) outer_tbl ... - ^ - \-we're here - At this point, we need to produce two conditions - - A condition that can be checked when we have all of the sj-inner - tables (inner_tbl1 ... inner_tblN). This will be used while doing - materialization. - - A condition that can be checked when we have all of the tables - in the prefix (both inner and outer). - */ - tab->emb_sj_nest->sj_mat_info->join_cond= - cond ? - make_cond_after_sjm(cond, cond, save_used_tables, used_tables): - NULL; - used_tables= save_used_tables | used_tables; - save_used_tables= 0; - } - } } DBUG_RETURN(0); @@ -7627,11 +7970,17 @@ make_join_select(JOIN *join,SQL_SELECT *select,COND *cond) static uint make_join_orderinfo(JOIN *join) { + /* + This function needs to be fixed to take into account that we now have SJM + nests. + */ + DBUG_ASSERT(0); + JOIN_TAB *tab; if (join->need_tmp) - return join->tables; + return join->table_count; tab= join->get_sort_by_join_tab(); - return tab ? tab-join->join_tab : join->tables; + return tab ? tab-join->join_tab : join->table_count; } /* @@ -7805,7 +8154,7 @@ void revise_cache_usage(JOIN_TAB *join_tab) SYNOPSIS end_sj_materialize() join The join - join_tab Last join table + join_tab Points to right after the last join_tab in materialization bush end_of_records FALSE <=> This call is made to pass another record combination TRUE <=> EOF (no action) @@ -7823,7 +8172,7 @@ void revise_cache_usage(JOIN_TAB *join_tab) NESTED_LOOP_ERROR */ -static enum_nested_loop_state +enum_nested_loop_state end_sj_materialize(JOIN *join, JOIN_TAB *join_tab, bool end_of_records) { int error; @@ -7998,6 +8347,7 @@ static uint check_join_cache_usage(JOIN_TAB *tab, ulonglong options, uint no_jbuf_after, + uint table_index, JOIN_TAB *prev_tab) { COST_VECT cost; @@ -8013,11 +8363,14 @@ uint check_join_cache_usage(JOIN_TAB *tab, !(join->allowed_join_cache_types & JOIN_CACHE_HASHED_BIT); bool no_bka_cache= !(join->allowed_join_cache_types & JOIN_CACHE_BKA_BIT); - uint i= tab - join->join_tab; join->return_tab= 0; - if (cache_level == 0 || i == join->const_tables || !prev_tab) + /* + Don't use join cache if @@join_cache_level==0 or this table is the first + one join suborder (either at top level or inside a bush) + */ + if (cache_level == 0 || !prev_tab) return 0; if (force_unlinked_cache && (cache_level%2 == 0)) @@ -8025,10 +8378,7 @@ uint check_join_cache_usage(JOIN_TAB *tab, if (options & SELECT_NO_JOIN_CACHE) goto no_join_cache; - /* - psergey-todo: why the below when execution code seems to handle the - "range checked for each record" case? - */ + if (tab->use_quick == 2) goto no_join_cache; @@ -8051,11 +8401,16 @@ uint check_join_cache_usage(JOIN_TAB *tab, } /* - Don't use join buffering if we're dictated not to by no_jbuf_after (this - ...) + Don't use join buffering if we're dictated not to by no_jbuf_after + (This is not meaningfully used currently) + */ + if (table_index > no_jbuf_after) + goto no_join_cache; + + /* + TODO: BNL join buffer should be perfectly ok with tab->bush_children. */ - if (!(i <= no_jbuf_after) || tab->loosescan_match_tab || - sj_is_materialize_strategy(join->best_positions[i].sj_strategy)) + if (tab->loosescan_match_tab || tab->bush_children) goto no_join_cache; for (JOIN_TAB *first_inner= tab->first_inner; first_inner; @@ -8185,6 +8540,9 @@ no_join_cache: join join whose tables are to be checked options options of the join no_jbuf_after don't use join buffering after table with this number + (The tables are assumed to be numbered in + first_linear_tab(join, WITHOUT_CONST_TABLES), + next_linear_tab(join, WITH_CONST_TABLES) order). DESCRIPTION For each table after the first non-constant table the function checks @@ -8208,29 +8566,36 @@ no_join_cache: void check_join_cache_usage_for_tables(JOIN *join, ulonglong options, uint no_jbuf_after) { - JOIN_TAB *first_sjm_table= NULL; - JOIN_TAB *last_sjm_table= NULL; + JOIN_TAB *tab; + JOIN_TAB *prev_tab; - for (uint i= join->const_tables; i < join->tables; i++) - join->join_tab[i].used_join_cache_level= join->max_allowed_join_cache_level; - - for (uint i= join->const_tables; i < join->tables; i++) + for (tab= first_linear_tab(join, WITHOUT_CONST_TABLES); + tab; + tab= next_linear_tab(join, tab, WITH_BUSH_ROOTS)) { - JOIN_TAB *tab= join->join_tab+i; - - if (sj_is_materialize_strategy(join->best_positions[i].sj_strategy)) - { - first_sjm_table= tab; - last_sjm_table= tab + join->best_positions[i].n_sj_tables; - for (JOIN_TAB *sjm_tab= first_sjm_table; - sjm_tab != last_sjm_table; sjm_tab++) - sjm_tab->first_sjm_sibling= first_sjm_table; - } - if (!(tab >= first_sjm_table && tab < last_sjm_table)) - tab->first_sjm_sibling= NULL; - + tab->used_join_cache_level= join->max_allowed_join_cache_level; + } + + uint idx= join->const_tables; + for (tab= first_linear_tab(join, WITHOUT_CONST_TABLES); + tab; + tab= next_linear_tab(join, tab, WITH_BUSH_ROOTS)) + { +restart: tab->icp_other_tables_ok= TRUE; tab->idx_cond_fact_out= TRUE; + + /* + Check if we have a preceding join_tab, as something that will feed us + records that we could buffer. We don't have it, if + - this is the first non-const table in the join order, + - this is the first table inside an SJM nest. + */ + prev_tab= tab - 1; + if (tab == join->join_tab + join->const_tables || + (tab->bush_root_tab && tab->bush_root_tab->bush_children->start == tab)) + prev_tab= NULL; + switch (tab->type) { case JT_SYSTEM: case JT_CONST: @@ -8240,16 +8605,25 @@ void check_join_cache_usage_for_tables(JOIN *join, ulonglong options, case JT_ALL: tab->used_join_cache_level= check_join_cache_usage(tab, options, no_jbuf_after, - tab == last_sjm_table ? - first_sjm_table : - tab-1); + idx, + prev_tab); tab->use_join_cache= test(tab->used_join_cache_level); + /* + psergey-merge: todo: raise the question that this is really stupid that + we can first allocate a join buffer, then decide not to use it and free + it. + */ if (join->return_tab) - i= join->return_tab-join->join_tab-1; // always >= 0 + { + tab= join->return_tab; + goto restart; + } break; default: tab->used_join_cache_level= 0; - } + } + if (!tab->bush_children) + idx++; } } @@ -8279,8 +8653,8 @@ void check_join_cache_usage_for_tables(JOIN *join, ulonglong options, static bool make_join_readinfo(JOIN *join, ulonglong options, uint no_jbuf_after) { + JOIN_TAB *tab; uint i; - DBUG_ENTER("make_join_readinfo"); bool statistics= test(!(join->select_options & SELECT_DESCRIBE)); @@ -8289,11 +8663,15 @@ make_join_readinfo(JOIN *join, ulonglong options, uint no_jbuf_after) if (!join->select_lex->sj_nests.is_empty() && setup_semijoin_dups_elimination(join, options, no_jbuf_after)) DBUG_RETURN(TRUE); /* purecov: inspected */ + + /* For const tables, set partial_join_cardinality to 1. */ + for (tab= join->join_tab; tab != join->join_tab + join->const_tables; tab++) + tab->partial_join_cardinality= 1; - for (i= 0; i < join->const_tables; i++) - join->join_tab[i].partial_join_cardinality= 1; - - for (i=join->const_tables ; i < join->tables ; i++) + JOIN_TAB *prev_tab= NULL; + for (tab= first_linear_tab(join, WITHOUT_CONST_TABLES), i= join->const_tables; + tab; + prev_tab=tab, tab= next_linear_tab(join, tab, WITH_BUSH_ROOTS)) { /* The approximation below for partial join cardinality is not good because @@ -8301,24 +8679,50 @@ make_join_readinfo(JOIN *join, ulonglong options, uint no_jbuf_after) - it does not differentiate between inner joins, outer joins and semi-joins. Later it should be improved. */ - JOIN_TAB *tab=join->join_tab+i; + + if (tab->bush_root_tab && tab->bush_root_tab->bush_children->start == tab) + prev_tab= NULL; + DBUG_ASSERT(tab->bush_children || tab->table == join->best_positions[i].table->table); + tab->partial_join_cardinality= join->best_positions[i].records_read * - (i ? (tab-1)->partial_join_cardinality : 1); + (prev_tab? prev_tab->partial_join_cardinality : 1); + if (!tab->bush_children) + i++; } check_join_cache_usage_for_tables(join, options, no_jbuf_after); - - for (i=join->const_tables ; i < join->tables ; i++) + + JOIN_TAB *first_tab; + for (tab= first_tab= first_linear_tab(join, WITHOUT_CONST_TABLES); + tab; + tab= next_linear_tab(join, tab, WITH_BUSH_ROOTS)) { - JOIN_TAB *tab=join->join_tab+i; + if (tab->bush_children) + { + if (setup_sj_materialization(tab)) + return TRUE; + } + TABLE *table=tab->table; uint jcl= tab->used_join_cache_level; tab->read_record.table= table; tab->read_record.file=table->file; tab->read_record.unlock_row= rr_unlock_row; - tab->next_select=sub_select; /* normal select */ tab->sorted= sorted; sorted= 0; // only first must be sorted + + + /* + We should not set tab->next_select for the last table in the + SMJ-nest, as setup_sj_materialization() has already set it to + end_sj_materialize. + */ + if (!(tab->bush_root_tab && + tab->bush_root_tab->bush_children->end == tab + 1)) + { + tab->next_select=sub_select; /* normal select */ + } + if (tab->loosescan_match_tab) { @@ -8327,20 +8731,6 @@ make_join_readinfo(JOIN *join, ulonglong options, uint no_jbuf_after) return TRUE; /* purecov: inspected */ tab->sorted= TRUE; } - - /* - SJ-Materialization - */ - if (sj_is_materialize_strategy(join->best_positions[i].sj_strategy)) - { - if (i == join->const_tables) - join->first_select= sub_select_sjm; - else - tab[-1].next_select= sub_select_sjm; - - if (setup_sj_materialization(tab)) - return TRUE; - } table->status=STATUS_NO_RECORD; pick_table_access_method (tab); @@ -8410,8 +8800,9 @@ make_join_readinfo(JOIN *join, ulonglong options, uint no_jbuf_after) } else { - tab->read_first_record= join_init_read_record; - if (i == join->const_tables) + if (!tab->bush_children) + tab->read_first_record= join_init_read_record; + if (tab == first_tab) { if (tab->select && tab->select->quick) { @@ -8486,15 +8877,18 @@ make_join_readinfo(JOIN *join, ulonglong options, uint no_jbuf_after) abort(); /* purecov: deadcode */ } } - join->join_tab[join->tables-1].next_select=0; /* Set by do_select */ + uint n_top_tables= join->join_tab_ranges.head()->end - + join->join_tab_ranges.head()->start; + + join->join_tab[n_top_tables - 1].next_select=0; /* Set by do_select */ -/* + /* If a join buffer is used to join a table the ordering by an index for the first non-constant table cannot be employed anymore. */ - for (i=join->const_tables ; i < join->tables ; i++) + for (tab= join->join_tab + join->const_tables ; + tab != join->join_tab + n_top_tables ; tab++) { - JOIN_TAB *tab=join->join_tab+i; if (tab->use_join_cache) { JOIN_TAB *sort_by_tab= join->group && join->simple_group && @@ -8540,9 +8934,8 @@ make_join_readinfo(JOIN *join, ulonglong options, uint no_jbuf_after) bool error_if_full_join(JOIN *join) { - for (JOIN_TAB *tab=join->join_tab, *end=join->join_tab+join->tables; - tab < end; - tab++) + for (JOIN_TAB *tab=first_top_level_tab(join, WITH_CONST_TABLES); tab; + tab= next_top_level_tab(join, tab)) { if (tab->type == JT_ALL && (!tab->select || !tab->select->quick)) { @@ -8559,6 +8952,9 @@ bool error_if_full_join(JOIN *join) /** cleanup JOIN_TAB. + + DESCRIPTION + This is invoked when we've finished all join executions. */ void JOIN_TAB::cleanup() @@ -8577,6 +8973,12 @@ void JOIN_TAB::cleanup() { table->disable_keyread(); table->file->ha_index_or_rnd_end(); + + if (table->pos_in_table_list && + table->pos_in_table_list->jtbm_subselect) + { + table->pos_in_table_list->jtbm_subselect->cleanup(); + } /* We need to reset this for next select (Tested in part_of_refkey) @@ -8771,12 +9173,12 @@ void JOIN::cleanup(bool full) if (table) { - JOIN_TAB *tab,*end; + JOIN_TAB *tab; /* Only a sorted table may be cached. This sorted table is always the first non const table in join->table */ - if (tables > const_tables) // Test for not-const tables + if (table_count > const_tables) // Test for not-const tables { free_io_cache(table[const_tables]); filesort_free_buffers(table[const_tables],full); @@ -8784,13 +9186,17 @@ void JOIN::cleanup(bool full) if (full) { - for (tab= join_tab, end= tab+tables; tab != end; tab++) + for (tab= first_linear_tab(this, WITH_CONST_TABLES); tab; + tab= next_linear_tab(this, tab, WITH_BUSH_ROOTS)) + { tab->cleanup(); + } table= 0; } else { - for (tab= join_tab, end= tab+tables; tab != end; tab++) + for (tab= first_linear_tab(this, WITH_CONST_TABLES); tab; + tab= next_linear_tab(this, tab, WITH_BUSH_ROOTS)) { if (tab->table) tab->table->file->ha_index_or_rnd_end(); @@ -8928,9 +9334,8 @@ only_eq_ref_tables(JOIN *join,ORDER *order,table_map tables) static void update_depend_map(JOIN *join) { - JOIN_TAB *join_tab=join->join_tab, *end=join_tab+join->tables; - - for (; join_tab != end ; join_tab++) + for (JOIN_TAB *join_tab= first_linear_tab(join, WITH_CONST_TABLES); join_tab; + join_tab= next_linear_tab(join, join_tab, WITH_BUSH_ROOTS)) { TABLE_REF *ref= &join_tab->ref; table_map depend_map=0; @@ -8941,11 +9346,11 @@ static void update_depend_map(JOIN *join) ref->depend_map=depend_map & ~OUTER_REF_TABLE_BIT; depend_map&= ~OUTER_REF_TABLE_BIT; for (JOIN_TAB **tab=join->map2table; - depend_map ; - tab++,depend_map>>=1 ) + depend_map ; + tab++,depend_map>>=1 ) { if (depend_map & 1) - ref->depend_map|=(*tab)->ref.depend_map; + ref->depend_map|=(*tab)->ref.depend_map; } } } @@ -8953,7 +9358,7 @@ static void update_depend_map(JOIN *join) /** Update the dependency map for the sort order. */ -static void update_depend_map(JOIN *join, ORDER *order) +static void update_depend_map_for_order(JOIN *join, ORDER *order) { for (; order ; order=order->next) { @@ -9000,21 +9405,30 @@ static ORDER * remove_const(JOIN *join,ORDER *first_order, COND *cond, bool change_list, bool *simple_order) { - if (join->tables == join->const_tables) + if (join->table_count == join->const_tables) return change_list ? 0 : first_order; // No need to sort ORDER *order,**prev_ptr; - table_map first_table= join->join_tab[join->const_tables].table->map; + table_map first_table; table_map not_const_tables= ~join->const_table_map; table_map ref; + bool first_is_base_table= FALSE; DBUG_ENTER("remove_const"); + + LINT_INIT(first_table); /* protected by first_is_base_table */ + if (join->join_tab[join->const_tables].table) + { + first_table= join->join_tab[join->const_tables].table->map; + first_is_base_table= TRUE; + } + prev_ptr= &first_order; *simple_order= *join->join_tab[join->const_tables].on_expr_ref ? 0 : 1; /* NOTE: A variable of not_const_tables ^ first_table; breaks gcc 2.7 */ - update_depend_map(join, first_order); + update_depend_map_for_order(join, first_order); for (order=first_order; order ; order=order->next) { table_map order_tables=order->item[0]->used_tables(); @@ -9029,7 +9443,7 @@ remove_const(JOIN *join,ORDER *first_order, COND *cond, table for all queries containing more than one table, ROLLUP, and an outer join. */ - (join->tables > 1 && join->rollup.state == ROLLUP::STATE_INITED && + (join->table_count > 1 && join->rollup.state == ROLLUP::STATE_INITED && join->outer_join)) *simple_order=0; // Must do a temp table to sort else if (!(order_tables & not_const_tables)) @@ -9052,7 +9466,7 @@ remove_const(JOIN *join,ORDER *first_order, COND *cond, DBUG_PRINT("info",("removing: %s", order->item[0]->full_name())); continue; } - if ((ref=order_tables & (not_const_tables ^ first_table))) + if (first_is_base_table && (ref=order_tables & (not_const_tables ^ first_table))) { if (!(order_tables & first_table) && only_eq_ref_tables(join,first_order, ref)) @@ -9128,8 +9542,11 @@ static void clear_tables(JOIN *join) must clear only the non-const tables, as const tables are not re-calculated. */ - for (uint i=join->const_tables ; i < join->tables ; i++) - mark_as_null_row(join->table[i]); // All fields are NULL + for (uint i= 0 ; i < join->table_count ; i++) + { + if (!(join->table[i]->map & join->const_table_map)) + mark_as_null_row(join->table[i]); // All fields are NULL + } } /***************************************************************************** @@ -9954,7 +10371,25 @@ static int compare_fields_by_table_order(Item_field *field1, if (outer_ref) return cmp; JOIN_TAB **idx= (JOIN_TAB **) table_join_idx; - cmp= idx[field2->field->table->tablenr]-idx[field1->field->table->tablenr]; + + JOIN_TAB *tab1= idx[field1->field->table->tablenr]; + JOIN_TAB *tab2= idx[field2->field->table->tablenr]; + + /* + if one of the table is inside a merged SJM nest and another one isn't, + compare SJM bush roots of the tables. + */ + if (tab1->bush_root_tab != tab2->bush_root_tab) + { + if (tab1->bush_root_tab) + tab1= tab1->bush_root_tab; + + if (tab2->bush_root_tab) + tab2= tab2->bush_root_tab; + } + + cmp= tab2 - tab1; + if (!cmp) { JOIN_TAB *tab= idx[field1->field->table->tablenr]; @@ -10078,7 +10513,9 @@ Item *eliminate_item_equal(COND *cond, COND_EQUAL *upper_levels, /* Pick the "head" item: the constant one or the first in the join order - that's not inside some SJM nest. + (if the first in the join order happends to be inside an SJM nest, that's + ok, because this is where the value will be unpacked after + materialization). */ if (item_const) head= item_const; @@ -11179,6 +11616,20 @@ void optimize_wo_join_buffering(JOIN *join, uint first_tab, uint last_tab, *outer_rec_count= rec_count; for (i= first_tab; i <= last_tab; i++) reopt_remaining_tables |= join->positions[i].table->table->map; + + /* + best_access_path() optimization depends on the value of + join->cur_sj_inner_tables. Our goal in this function is to do a + re-optimization with disabled join buffering, but no other changes. + In order to achieve this, cur_sj_inner_tables needs have the same + value it had during the original invocations of best_access_path. + + We know that this function, optimize_wo_join_buffering() is called to + re-optimize semi-join join order range, which allows to conclude that + the "original" value of cur_sj_inner_tables was 0. + */ + table_map save_cur_sj_inner_tables= join->cur_sj_inner_tables; + join->cur_sj_inner_tables= 0; for (i= first_tab; i <= last_tab; i++) { @@ -11205,6 +11656,8 @@ void optimize_wo_join_buffering(JOIN *join, uint first_tab, uint last_tab, if (!rs->emb_sj_nest) *outer_rec_count *= pos.records_read; } + join->cur_sj_inner_tables= save_cur_sj_inner_tables; + *reopt_cost= cost; } @@ -12215,7 +12668,7 @@ create_tmp_table(THD *thd,TMP_TABLE_PARAM *param,List<Item> &fields, for distinct, as we want the distinct index to be usable in this case too. */ - item->marker == 4 || param->bit_fields_as_long, // psergey-feb17 + item->marker == 4 || param->bit_fields_as_long, force_copy_fields, param->convert_blob_length); @@ -12489,6 +12942,8 @@ create_tmp_table(THD *thd,TMP_TABLE_PARAM *param,List<Item> &fields, share->keys=1; share->uniques= test(using_unique_constraint); table->key_info= table->s->key_info= keyinfo; + table->keys_in_use_for_query.set_bit(0); + share->keys_in_use.set_bit(0); keyinfo->key_part=key_part_info; keyinfo->flags=HA_NOSAME; keyinfo->usable_key_parts=keyinfo->key_parts= param->group_parts; @@ -12504,6 +12959,8 @@ create_tmp_table(THD *thd,TMP_TABLE_PARAM *param,List<Item> &fields, bool maybe_null=(*cur_group->item)->maybe_null; key_part_info->null_bit=0; key_part_info->field= field; + if (cur_group == group) + field->key_start.set_bit(0); key_part_info->offset= field->offset(table->record[0]); key_part_info->length= (uint16) field->key_length(); key_part_info->type= (uint8) field->key_type(); @@ -12573,6 +13030,8 @@ create_tmp_table(THD *thd,TMP_TABLE_PARAM *param,List<Item> &fields, keyinfo->key_parts * sizeof(KEY_PART_INFO)))) goto err; bzero((void*) key_part_info, keyinfo->key_parts * sizeof(KEY_PART_INFO)); + table->keys_in_use_for_query.set_bit(0); + share->keys_in_use.set_bit(0); table->key_info= table->s->key_info= keyinfo; keyinfo->key_part=key_part_info; keyinfo->flags=HA_NOSAME | HA_NULL_ARE_EQUAL; @@ -12609,8 +13068,14 @@ create_tmp_table(THD *thd,TMP_TABLE_PARAM *param,List<Item> &fields, i < field_count; i++, reg_field++, key_part_info++) { - key_part_info->null_bit=0; key_part_info->field= *reg_field; + (*reg_field)->flags |= PART_KEY_FLAG; + if (key_part_info == keyinfo->key_part) + (*reg_field)->key_start.set_bit(0); + key_part_info->null_bit= (*reg_field)->null_bit; + key_part_info->null_offset= (uint) ((*reg_field)->null_ptr - + (uchar*) table->record[0]); + key_part_info->offset= (*reg_field)->offset(table->record[0]); key_part_info->length= (uint16) (*reg_field)->pack_length(); /* TODO: @@ -13427,14 +13892,13 @@ do_select(JOIN *join,List<Item> *fields,TABLE *table,Procedure *procedure) } /* Set up select_end */ Next_select_func end_select= setup_end_select_func(join); - if (join->tables) + if (join->table_count) { - join->join_tab[join->tables-1].next_select= end_select; - + join->join_tab[join->top_join_tab_count - 1].next_select= end_select; join_tab=join->join_tab+join->const_tables; } join->send_records=0; - if (join->tables == join->const_tables) + if (join->table_count == join->const_tables) { /* HAVING will be checked after processing aggregate functions, @@ -13474,13 +13938,13 @@ do_select(JOIN *join,List<Item> *fields,TABLE *table,Procedure *procedure) } else { - DBUG_ASSERT(join->tables); + DBUG_ASSERT(join->table_count); if (join->outer_ref_cond && !join->outer_ref_cond->val_int()) error= NESTED_LOOP_NO_MORE_ROWS; else - error= join->first_select(join,join_tab,0); + error= sub_select(join,join_tab,0); if (error == NESTED_LOOP_OK || error == NESTED_LOOP_NO_MORE_ROWS) - error= join->first_select(join,join_tab,1); + error= sub_select(join,join_tab,1); if (error == NESTED_LOOP_QUERY_LIMIT) error= NESTED_LOOP_OK; /* select_limit used */ } @@ -13547,143 +14011,6 @@ int rr_sequential_and_unpack(READ_RECORD *info) /* - Semi-join materialization join function - - SYNOPSIS - sub_select_sjm() - join The join - join_tab The first table in the materialization nest - end_of_records FALSE <=> This call is made to pass another record - combination - TRUE <=> EOF - - DESCRIPTION - This is a join execution function that does materialization of a join - suborder before joining it to the rest of the join. - - The table pointed by join_tab is the first of the materialized tables. - This function first creates the materialized table and then switches to - joining the materialized table with the rest of the join. - - The materialized table can be accessed in two ways: - - index lookups - - full table scan - - RETURN - One of enum_nested_loop_state values -*/ - -enum_nested_loop_state -sub_select_sjm(JOIN *join, JOIN_TAB *join_tab, bool end_of_records) -{ - int res; - enum_nested_loop_state rc; - - DBUG_ENTER("sub_select_sjm"); - - if (!join_tab->emb_sj_nest) - { - /* - We're handling GROUP BY/ORDER BY, this is the first table, and we've - actually executed the join already and now we're just reading the - result of the join from the temporary table. - Bypass to regular join handling. - Yes, it would be nicer if sub_select_sjm wasn't called at all in this - case but there's no easy way to arrange this. - */ - rc= sub_select(join, join_tab, end_of_records); - DBUG_RETURN(rc); - } - - SJ_MATERIALIZATION_INFO *sjm= join_tab->emb_sj_nest->sj_mat_info; - if (end_of_records) - { - rc= (*join_tab[sjm->tables - 1].next_select)(join, - join_tab + sjm->tables, - end_of_records); - DBUG_RETURN(rc); - } - if (!sjm->materialized) - { - /* - Do the materialization. First, put end_sj_materialize after the last - inner table so we can catch record combinations of sj-inner tables. - */ - Next_select_func next_func= join_tab[sjm->tables - 1].next_select; - join_tab[sjm->tables - 1].next_select= end_sj_materialize; - - /* - Now run the join for the inner tables. The first call is to run the - join, the second one is to signal EOF (this is essential for some - join strategies, e.g. it will make join buffering flush the records) - */ - if ((rc= sub_select(join, join_tab, FALSE)) < 0 || - (rc= sub_select(join, join_tab, TRUE/*EOF*/)) < 0) - { - join_tab[sjm->tables - 1].next_select= next_func; - DBUG_RETURN(rc); /* it's NESTED_LOOP_(ERROR|KILLED)*/ - } - join_tab[sjm->tables - 1].next_select= next_func; - - /* - Ok, materialization finished. Initialize the access to the temptable - */ - sjm->materialized= TRUE; - join_tab->read_record.read_record= join_no_more_records; - if (sjm->is_sj_scan) - { - /* Initialize full scan */ - JOIN_TAB *last_tab= join_tab + (sjm->tables - 1); - init_read_record(&last_tab->read_record, join->thd, - sjm->table, NULL, TRUE, TRUE, FALSE); - - DBUG_ASSERT(last_tab->read_record.read_record == rr_sequential); - last_tab->read_first_record= join_read_record_no_init; - last_tab->read_record.copy_field= sjm->copy_field; - last_tab->read_record.copy_field_end= sjm->copy_field + - sjm->sjm_table_cols.elements; - last_tab->read_record.read_record= rr_sequential_and_unpack; - } - } - else - { - if (sjm->is_sj_scan) - { - /* Reset the cursor for a new scan over the table */ - if (sjm->table->file->ha_rnd_init(TRUE)) - DBUG_RETURN(NESTED_LOOP_ERROR); - } - } - - if (sjm->is_sj_scan) - { - /* Do full scan of the materialized table */ - JOIN_TAB *last_tab= join_tab + (sjm->tables - 1); - - Item *save_cond= last_tab->select_cond; - last_tab->set_select_cond(sjm->join_cond, __LINE__); - - rc= sub_select(join, last_tab, end_of_records); - last_tab->set_select_cond(save_cond, __LINE__); - DBUG_RETURN(rc); - } - else - { - /* Do index lookup in the materialized table */ - if ((res= join_read_key2(join_tab->join->thd, join_tab, - sjm->table, sjm->tab_ref)) == 1) - DBUG_RETURN(NESTED_LOOP_ERROR); /* purecov: inspected */ - if (res || !sjm->in_equality->val_int()) - DBUG_RETURN(NESTED_LOOP_NO_MORE_ROWS); - } - rc= (*join_tab[sjm->tables - 1].next_select)(join, - join_tab + sjm->tables, - end_of_records); - DBUG_RETURN(rc); -} - - -/* Fill the join buffer with partial records, retrieve all full matches for them SYNOPSIS @@ -13908,7 +14235,7 @@ sub_select(JOIN *join,JOIN_TAB *join_tab,bool end_of_records) if (join->resume_nested_loop) { /* If not the last table, plunge down the nested loop */ - if (join_tab < join->join_tab + join->tables - 1) + if (join_tab < join->join_tab + join->top_join_tab_count - 1) rc= (*join_tab->next_select)(join, join_tab + 1, 0); else { @@ -13935,6 +14262,10 @@ sub_select(JOIN *join,JOIN_TAB *join_tab,bool end_of_records) } join->thd->row_count= 0; + if (rc != NESTED_LOOP_NO_MORE_ROWS && + (rc= join_tab_execution_startup(join_tab)) < 0) + DBUG_RETURN(rc); + if (join_tab->loosescan_match_tab) join_tab->loosescan_match_tab->found_match= FALSE; @@ -14721,13 +15052,28 @@ int join_init_read_record(JOIN_TAB *tab) return (*tab->read_record.read_record)(&tab->read_record); } -static int +int join_read_record_no_init(JOIN_TAB *tab) { + Copy_field *save_copy, *save_copy_end; + + /* + init_read_record resets all elements of tab->read_record(). + Remember things that we don't want to have reset. + */ + save_copy= tab->read_record.copy_field; + save_copy_end= tab->read_record.copy_field_end; + + init_read_record(&tab->read_record, tab->join->thd, tab->table, + tab->select,1,1, FALSE); + + tab->read_record.copy_field= save_copy; + tab->read_record.copy_field_end= save_copy_end; + tab->read_record.read_record= rr_sequential_and_unpack; + return (*tab->read_record.read_record)(&tab->read_record); } - static int join_read_first(JOIN_TAB *tab) { @@ -14921,7 +15267,7 @@ end_send(JOIN *join, JOIN_TAB *join_tab __attribute__((unused)), if (join->select_options & OPTION_FOUND_ROWS) { JOIN_TAB *jt=join->join_tab; - if ((join->tables == 1) && !join->tmp_table && !join->sort_and_group + if ((join->table_count == 1) && !join->tmp_table && !join->sort_and_group && !join->send_group_parts && !join->having && !jt->select_cond && !(jt->select && jt->select->quick) && (jt->table->file->ha_table_flags() & HA_STATS_RECORDS_IS_EXACT) && @@ -15229,7 +15575,7 @@ end_update(JOIN *join, JOIN_TAB *join_tab __attribute__((unused)), table->file->print_error(error, MYF(0));/* purecov: inspected */ DBUG_RETURN(NESTED_LOOP_ERROR); /* purecov: inspected */ } - join->join_tab[join->tables-1].next_select=end_unique_update; + join->join_tab[join->top_join_tab_count-1].next_select=end_unique_update; } join->send_records++; DBUG_RETURN(NESTED_LOOP_OK); @@ -15375,9 +15721,23 @@ end_write_group(JOIN *join, JOIN_TAB *join_tab __attribute__((unused)), Check if "left_item=right_item" equality is guaranteed to be true by use of [eq]ref access on left_item->field->table. - @return - TRUE if right_item is used removable reference key on left_item - FALSE otherwise + SYNOPSIS + test_if_ref() + root_cond + left_item + right_item + + DESCRIPTION + Check if the given "left_item = right_item" equality is guaranteed to be + true by use of [eq_]ref access method. + + We need root_cond as we can't remove ON expressions even if employed ref + access guarantees that they are true. This is because TODO + + RETURN + TRUE if right_item is used removable reference key on left_item + FALSE Otherwise + */ bool test_if_ref(Item *root_cond, Item_field *left_item,Item *right_item) @@ -15440,7 +15800,8 @@ bool test_if_ref(Item *root_cond, Item_field *left_item,Item *right_item) SYNOPSIS make_cond_for_table() cond Condition to analyze - tables Tables for which "current field values" are available + tables Tables for which "current field values" are available (this + includes used_table) used_table Table that we're extracting the condition for (may also include PSEUDO_TABLE_BITS exclude_expensive_cond Do not push expensive conditions @@ -15476,7 +15837,8 @@ make_cond_for_table(Item *cond, table_map tables, table_map used_table, exclude_expensive_cond, retain_ref_cond); } - + + static Item * make_cond_for_table_from_pred(Item *root_cond, Item *cond, table_map tables, table_map used_table, @@ -15498,6 +15860,7 @@ make_cond_for_table_from_pred(Item *root_cond, Item *cond, */ !((used_table & 1) && cond->is_expensive())) return (COND*) 0; // Already checked + if (cond->type() == Item::COND_ITEM) { if (((Item_cond*) cond)->functype() == Item_func::COND_AND_FUNC) @@ -15575,6 +15938,7 @@ make_cond_for_table_from_pred(Item *root_cond, Item *cond, */ (!used_table && exclude_expensive_cond && cond->is_expensive())) return (COND*) 0; // Can't check this yet + if (cond->marker == 2 || cond->eq_cmp_result() == Item::COND_OK) return cond; // Not boolean op @@ -15601,14 +15965,29 @@ make_cond_for_table_from_pred(Item *root_cond, Item *cond, } +/* + The difference of this from make_cond_for_table() is that we're in the + following state: + 1. conditions referring to 'tables' have been checked + 2. conditions referring to sjm_tables have been checked, too + 3. We need condition that couldn't be checked in #1 or #2 but + can be checked when we get both (tables | sjm_tables). + +*/ static COND * make_cond_after_sjm(Item *root_cond, Item *cond, table_map tables, table_map sjm_tables) { + /* + We assume that conditions that refer to only join prefix tables or + sjm_tables have already been checked. + */ if ((!(cond->used_tables() & ~tables) || !(cond->used_tables() & ~sjm_tables))) return (COND*) 0; // Already checked + + /* AND/OR recursive descent */ if (cond->type() == Item::COND_ITEM) { if (((Item_cond*) cond)->functype() == Item_func::COND_AND_FUNC) @@ -16215,8 +16594,6 @@ test_if_skip_sort_order(JOIN_TAB *tab,ORDER *order,ha_rows select_limit, &usable_keys)) < MAX_KEY) { /* Found key that can be used to retrieve data in sorted order */ - //psergey-mrr:if (tab->pre_idx_push_select_cond) - // tab->select_cond= tab->select->cond= tab->pre_idx_push_select_cond; if (tab->ref.key >= 0) { /* @@ -16322,7 +16699,7 @@ test_if_skip_sort_order(JOIN_TAB *tab,ORDER *order,ha_rows select_limit, ref_key_quick_rows= table->quick_rows[ref_key]; read_time= join->best_positions[tablenr].read_time; - for (uint i= tablenr+1; i < join->tables; i++) + for (uint i= tablenr+1; i < join->table_count; i++) fanout*= join->best_positions[i].records_read; // fanout is always >= 1 for (nr=0; nr < table->s->keys ; nr++) @@ -16498,7 +16875,7 @@ test_if_skip_sort_order(JOIN_TAB *tab,ORDER *order,ha_rows select_limit, */ if ((select_limit >= table_records) && (tab->type == JT_ALL && - tab->join->tables > tab->join->const_tables + 1) && + tab->join->table_count > tab->join->const_tables + 1) && ((unsigned) best_key != table->s->primary_key || !table->file->primary_key_is_clustered())) DBUG_RETURN(0); @@ -16698,7 +17075,7 @@ create_sort_index(THD *thd, JOIN *join, ORDER *order, JOIN_TAB *tab; DBUG_ENTER("create_sort_index"); - if (join->tables == join->const_tables) + if (join->table_count == join->const_tables) DBUG_RETURN(0); // One row, no need to sort tab= join->join_tab + join->const_tables; table= tab->table; @@ -19194,16 +19571,19 @@ static void select_describe(JOIN *join, bool need_tmp_table, bool need_order, { table_map used_tables=0; - uchar sjm_nests[MAX_TABLES]; - uint sjm_nests_cur=0; - uint sjm_nests_end= 0; - uint end_table= join->tables; bool printing_materialize_nest= FALSE; uint select_id= join->select_lex->select_number; - for (uint i=0 ; i < end_table ; i++) + for (JOIN_TAB *tab= first_breadth_first_tab(join); tab; + tab= next_breadth_first_tab(join, tab)) { - JOIN_TAB *tab=join->join_tab+i; + if (tab->bush_root_tab) + { + JOIN_TAB *first_sibling= tab->bush_root_tab->bush_children->start; + select_id= first_sibling->emb_sj_nest->sj_subq_pred->get_identifier(); + printing_materialize_nest= TRUE; + } + TABLE *table=tab->table; TABLE_LIST *table_list= tab->table->pos_in_table_list; char buff[512]; @@ -19243,84 +19623,6 @@ static void select_describe(JOIN *join, bool need_tmp_table, bool need_order, join->select_lex->type; item_list.push_back(new Item_string(stype, strlen(stype), cs)); - /* - Special processing for SJ-Materialization nests: print the fake table - and delay printing of the SJM nest contents until later. - */ - uint sj_strategy= join->best_positions[i].sj_strategy; - if (sj_is_materialize_strategy(sj_strategy) && - !printing_materialize_nest) - { - /* table */ - int len= my_snprintf(table_name_buffer, - sizeof(table_name_buffer)-1, - "subselect%d", - tab->emb_sj_nest->sj_subq_pred->get_identifier()); - item_list.push_back(new Item_string(table_name_buffer, len, cs)); - /* partitions */ - if (join->thd->lex->describe & DESCRIBE_PARTITIONS) - item_list.push_back(item_null); - /* type */ - uint type= (sj_strategy == SJ_OPT_MATERIALIZE_SCAN)? JT_ALL : JT_EQ_REF; - item_list.push_back(new Item_string(join_type_str[type], - strlen(join_type_str[type]), - cs)); - /* possible_keys */ - item_list.push_back(new Item_string("unique_key", - strlen("unique_key"), cs)); - if (sj_strategy == SJ_OPT_MATERIALIZE_SCAN) - { - item_list.push_back(item_null); /* key */ - item_list.push_back(item_null); /* key_len */ - item_list.push_back(item_null); /* ref */ - } - else - { - /* key */ - item_list.push_back(new Item_string("unique_key", strlen("unique_key"), cs)); - /* key_len */ - uint klen= tab->emb_sj_nest->sj_mat_info->table->key_info[0].key_length; - uint buflen= longlong10_to_str(klen, keylen_str_buf, 10) - keylen_str_buf; - item_list.push_back(new Item_string(keylen_str_buf, buflen, cs)); - /* ref */ - item_list.push_back(new Item_string("func", strlen("func"), cs)); - } - /* rows */ - ha_rows rows= (ha_rows) ((sj_strategy == SJ_OPT_MATERIALIZE_SCAN)? - tab->emb_sj_nest->sj_mat_info->rows : 1.0); - item_list.push_back(new Item_int((longlong)rows, - MY_INT64_NUM_DECIMAL_DIGITS)); - /* filtered */ - if (join->thd->lex->describe & DESCRIBE_EXTENDED) - item_list.push_back(new Item_float(1.0, 2)); - - /* Extra */ - if (need_tmp_table) - { - need_tmp_table=0; - extra.append(STRING_WITH_LEN("; Using temporary")); - } - if (need_order) - { - need_order=0; - extra.append(STRING_WITH_LEN("; Using filesort")); - } - /* Skip initial "; "*/ - const char *str= extra.ptr(); - uint32 extra_len= extra.length(); - if (extra_len) - { - str += 2; - extra_len -= 2; - } - item_list.push_back(new Item_string(str, extra_len, cs)); - - /* Register the nest for further processing: */ - sjm_nests[sjm_nests_end++]= i; - i += join->best_positions[i].n_sj_tables-1; - goto loop_end; - } - if ((tab->type == JT_ALL || tab->type == JT_HASH) && tab->select && tab->select->quick) { @@ -19343,6 +19645,16 @@ static void select_describe(JOIN *join, bool need_tmp_table, bool need_order, table->derived_select_number); item_list.push_back(new Item_string(table_name_buffer, len, cs)); } + else if (tab->bush_children) + { + JOIN_TAB *ctab= tab->bush_children->start; + /* table */ + int len= my_snprintf(table_name_buffer, + sizeof(table_name_buffer)-1, + "<subquery%d>", + ctab->emb_sj_nest->sj_subq_pred->get_identifier()); + item_list.push_back(new Item_string(table_name_buffer, len, cs)); + } else { TABLE_LIST *real_table= table->pos_in_table_list; @@ -19458,7 +19770,8 @@ static void select_describe(JOIN *join, bool need_tmp_table, bool need_order, } else { - if (table_list->schema_table && + if (table_list && /* SJM bushes don't have table_list */ + table_list->schema_table && table_list->schema_table->i_s_requested_object & OPTIMIZE_I_S_TABLE) { const char *tmp_buff; @@ -19489,7 +19802,8 @@ static void select_describe(JOIN *join, bool need_tmp_table, bool need_order, } /* Add "rows" field to item_list. */ - if (table_list->schema_table) + if (table_list /* SJM bushes don't have table_list */ && + table_list->schema_table) { /* in_rows */ if (join->thd->lex->describe & DESCRIBE_EXTENDED) @@ -19508,12 +19822,23 @@ static void select_describe(JOIN *join, bool need_tmp_table, bool need_order, examined_rows= tab->limit; else { - tab->table->file->info(HA_STATUS_VARIABLE); - examined_rows= tab->table->file->stats.records; + if (!tab->table->pos_in_table_list || + tab->table->is_filled_at_execution()) // temporary, is_filled_at_execution + { + examined_rows= tab->records; + } + else + { + /* + handler->info(HA_STATUS_VARIABLE) has been called in + make_join_statistics() + */ + examined_rows= tab->table->file->stats.records; + } } } else - examined_rows=(ha_rows)join->best_positions[i].records_read; + examined_rows=(ha_rows)tab->records_read; item_list.push_back(new Item_int((longlong) (ulonglong) examined_rows, MY_INT64_NUM_DECIMAL_DIGITS)); @@ -19521,21 +19846,9 @@ static void select_describe(JOIN *join, bool need_tmp_table, bool need_order, /* Add "filtered" field to item_list. */ if (join->thd->lex->describe & DESCRIBE_EXTENDED) { - /* - psergey-todo: - in the code above, we cast to integer when asssigning to - examined_rows. - In the code below, we may divide original value but result of - conversion of the same value to integer, which may produce a - value that's greater than 100%, which looks very odd. - I'm not fixing this right away because that might trigger a wave - of small EXPLAIN EXTENDED output changes, which I don't have time - to deal with right now. - */ float f= 0.0; if (examined_rows) - f= (float) (100.0 * join->best_positions[i].records_read / - examined_rows); + f= (float) (100.0 * tab->records_read / examined_rows); item_list.push_back(new Item_float(f, 2)); } } @@ -19620,7 +19933,8 @@ static void select_describe(JOIN *join, bool need_tmp_table, bool need_order, extra.append(STRING_WITH_LEN("; Using where")); } } - if (table_list->schema_table && + if (table_list /* SJM bushes don't have table_list */ && + table_list->schema_table && table_list->schema_table->i_s_requested_object & OPTIMIZE_I_S_TABLE) { if (!table_list->table_open_method) @@ -19699,25 +20013,6 @@ static void select_describe(JOIN *join, bool need_tmp_table, bool need_order, } } - /* - if (sj_is_materialize_strategy(sj_strategy)) - { - if (join->best_positions[i].n_sj_tables == 1) - extra.append(STRING_WITH_LEN("; Materialize")); - else - { - last_sjm_table= i + join->best_positions[i].n_sj_tables - 1; - extra.append(STRING_WITH_LEN("; Start materialize")); - } - if (sj_strategy == SJ_OPT_MATERIALIZE_SCAN) - extra.append(STRING_WITH_LEN("; Scan")); - } - else if (last_sjm_table == i) - { - extra.append(STRING_WITH_LEN("; End materialize")); - } - */ - for (uint part= 0; part < tab->ref.key_parts; part++) { if (tab->ref.cond_guards[part]) @@ -19743,14 +20038,6 @@ static void select_describe(JOIN *join, bool need_tmp_table, bool need_order, } item_list.push_back(new Item_string(str, len, cs)); } - loop_end: - if (i+1 == end_table && sjm_nests_cur != sjm_nests_end) - { - printing_materialize_nest= TRUE; - i= sjm_nests[sjm_nests_cur++] - 1; - end_table= (i+1) + join->best_positions[i+1].n_sj_tables; - select_id= join->join_tab[i+1].emb_sj_nest->sj_subq_pred->get_identifier(); - } // For next iteration used_tables|=table->map; @@ -20004,6 +20291,14 @@ void TABLE_LIST::print(THD *thd, table_map eliminated_tables, String *str, print_join(thd, eliminated_tables, str, &nested_join->join_list, query_type); str->append(')'); } + else if (jtbm_subselect) + { + str->append(STRING_WITH_LEN(" <materialize> (")); + subselect_hash_sj_engine *hash_engine; + hash_engine= (subselect_hash_sj_engine*)jtbm_subselect->engine; + hash_engine->materialize_engine->print(str, query_type); + str->append(')'); + } else { const char *cmp_name; // Name to compare with alias diff --git a/sql/sql_select.h b/sql/sql_select.h index b08fa7ff0e6..87175f7c1b8 100644 --- a/sql/sql_select.h +++ b/sql/sql_select.h @@ -165,13 +165,25 @@ enum enum_nested_loop_state typedef enum_nested_loop_state (*Next_select_func)(JOIN *, struct st_join_table *, bool); + +/* + Function prototype for reading first record for a join tab + + RETURN + 0 - OK + -1 - Record not found + Other - A fatal error +*/ typedef int (*Read_record_func)(struct st_join_table *tab); + Next_select_func setup_end_select_func(JOIN *join); int rr_sequential(READ_RECORD *info); +int rr_sequential_and_unpack(READ_RECORD *info); class JOIN_CACHE; class SJ_TMP_TABLE; +class JOIN_TAB_RANGE; typedef struct st_join_table { st_join_table() {} /* Remove gcc warning */ @@ -200,6 +212,21 @@ typedef struct st_join_table { st_join_table *last_inner; /**< last table table for embedding outer join */ st_join_table *first_upper; /**< first inner table for embedding outer join */ st_join_table *first_unmatched; /**< used for optimization purposes only */ + + /* + For join tabs that are inside an SJM bush: root of the bush + */ + st_join_table *bush_root_tab; + + /* TRUE <=> This join_tab is inside an SJM bush and is the last leaf tab here */ + bool last_leaf_in_bush; + + /* + ptr - this is a bush, and ptr points to description of child join_tab + range + NULL - this join tab has no bush children + */ + JOIN_TAB_RANGE *bush_children; /* Special content for EXPLAIN 'Extra' column or NULL if none */ const char *info; @@ -237,7 +264,13 @@ typedef struct st_join_table { method (but not 'index' for some reason), i.e. this matches method which E(#records) is in found_records. */ - ha_rows read_time; + double read_time; + + /* psergey-todo: make the below have type double, like POSITION::records_read? */ + ha_rows records_read; + + /* Startup cost for execution */ + double startup_cost; double partial_join_cardinality; @@ -330,12 +363,10 @@ typedef struct st_join_table { /* Semi-join strategy to be used for this join table. This is a copy of POSITION::sj_strategy field. This field is set up by the - fix_semijion_strategies_for_picked_join_order. + fix_semijoin_strategies_for_picked_join_order. */ uint sj_strategy; - struct st_join_table *first_sjm_sibling; - void cleanup(); inline bool is_using_loose_index_scan() { @@ -450,9 +481,6 @@ enum_nested_loop_state sub_select_cache(JOIN *join, JOIN_TAB *join_tab, bool end_of_records); enum_nested_loop_state sub_select(JOIN *join,JOIN_TAB *join_tab, bool end_of_records); -enum_nested_loop_state sub_select_sjm(JOIN *join, JOIN_TAB *join_tab, - bool end_of_records); - enum_nested_loop_state end_send_group(JOIN *join, JOIN_TAB *join_tab __attribute__((unused)), bool end_of_records); @@ -598,26 +626,53 @@ inline bool sj_is_materialize_strategy(uint strategy) return strategy >= SJ_OPT_MATERIALIZE; } +class JOIN_TAB_RANGE: public Sql_alloc +{ +public: + JOIN_TAB *start; + JOIN_TAB *end; +}; + class JOIN :public Sql_alloc { JOIN(const JOIN &rhs); /**< not implemented */ JOIN& operator=(const JOIN &rhs); /**< not implemented */ public: - JOIN_TAB *join_tab,**best_ref; + JOIN_TAB *join_tab, **best_ref; JOIN_TAB **map2table; ///< mapping between table indexes and JOIN_TABs JOIN_TAB *join_tab_save; ///< saved join_tab for subquery reexecution + + List<JOIN_TAB_RANGE> join_tab_ranges; + + /* + Base tables participating in the join. After join optimization is done, the + tables are stored in the join order (but the only really important part is + that const tables are first). + */ TABLE **table; - TABLE **all_tables; /** The table which has an index that allows to produce the requried ordering. A special value of 0x1 means that the ordering will be produced by passing 1st non-const table to filesort(). NULL means no such table exists. */ TABLE *sort_by_table; - uint tables; /**< Number of tables in the join */ + /* + Number of tables in the join. + (In MySQL, it is named 'tables' and is also the number of elements in + join->join_tab array. In MariaDB, the latter is not true, so we've renamed + the variable) + */ + uint table_count; uint outer_tables; /**< Number of tables that are not inside semijoin */ uint const_tables; + /* + Number of tables in the top join_tab array. Normally this matches + (join_tab_ranges.head()->end - join_tab_ranges.head()->start). + + We keep it here so that it is saved/restored with JOIN::restore_tmp. + */ + uint top_join_tab_count; uint send_group_parts; bool group; /**< If query contains GROUP BY clause */ /** @@ -693,7 +748,6 @@ public: /* We also maintain a stack of join optimization states in * join->positions[] */ /******* Join optimization state members end *******/ - Next_select_func first_select; /* The cost of best complete join plan found so far during optimization, after optimization phase - cost of picked join order (not taking into @@ -824,8 +878,12 @@ public: bool union_part; ///< this subselect is part of union bool optimized; ///< flag to avoid double optimization in EXPLAIN + /* + Subqueries that will need to be converted to semi-join nests, including + those converted to jtbm nests. The list is emptied when conversion is done. + */ Array<Item_in_subselect> sj_subselects; - + /* Temporary tables used to weed-out semi-join duplicates */ List<TABLE> sj_tmp_tables; /* SJM nests that are executed with SJ-Materialization strategy */ @@ -858,7 +916,8 @@ public: { join_tab= join_tab_save= 0; table= 0; - tables= 0; + table_count= 0; + top_join_tab_count= 0; const_tables= 0; eliminated_tables= 0; join_list= 0; @@ -913,7 +972,6 @@ public: rollup.state= ROLLUP::STATE_NONE; no_const_tables= FALSE; - first_select= sub_select; outer_ref_cond= 0; } @@ -975,7 +1033,7 @@ public: } inline table_map all_tables_map() { - return (table_map(1) << tables) - 1; + return (table_map(1) << table_count) - 1; } /* Return the table for which an index scan can be used to satisfy @@ -1008,6 +1066,12 @@ private: void cleanup_item_list(List<Item> &items) const; }; +enum enum_with_bush_roots { WITH_BUSH_ROOTS, WITHOUT_BUSH_ROOTS}; +enum enum_with_const_tables { WITH_CONST_TABLES, WITHOUT_CONST_TABLES}; + +JOIN_TAB *first_linear_tab(JOIN *join, enum enum_with_const_tables const_tbls); +JOIN_TAB *next_linear_tab(JOIN* join, JOIN_TAB* tab, + enum enum_with_bush_roots include_bush_roots); typedef struct st_select_check { uint const_ref,reg_ref; @@ -1255,6 +1319,7 @@ int safe_index_read(JOIN_TAB *tab); COND *remove_eq_conds(THD *thd, COND *cond, Item::cond_result *cond_value); int test_if_item_cache_changed(List<Cached_item> &list); int join_init_read_record(JOIN_TAB *tab); +int join_read_record_no_init(JOIN_TAB *tab); void set_position(JOIN *join,uint idx,JOIN_TAB *table,KEYUSE *key); inline Item * and_items(Item* cond, Item *item) { diff --git a/sql/sql_show.cc b/sql/sql_show.cc index 833b695e4bf..06d55a661eb 100644 --- a/sql/sql_show.cc +++ b/sql/sql_show.cc @@ -6615,14 +6615,15 @@ int make_schema_select(THD *thd, SELECT_LEX *sel, bool get_schema_tables_result(JOIN *join, enum enum_schema_table_state executed_place) { - JOIN_TAB *tmp_join_tab= join->join_tab+join->tables; THD *thd= join->thd; LEX *lex= thd->lex; bool result= 0; DBUG_ENTER("get_schema_tables_result"); thd->no_warnings_for_error= 1; - for (JOIN_TAB *tab= join->join_tab; tab < tmp_join_tab; tab++) + for (JOIN_TAB *tab= first_linear_tab(join, WITH_CONST_TABLES); + tab; + tab= next_linear_tab(join, tab, WITHOUT_BUSH_ROOTS)) { if (!tab->table || !tab->table->pos_in_table_list) break; diff --git a/sql/sql_test.cc b/sql/sql_test.cc index af89b3cd525..cdab2ee8a6f 100644 --- a/sql/sql_test.cc +++ b/sql/sql_test.cc @@ -166,58 +166,67 @@ void TEST_filesort(SORT_FIELD *sortorder,uint s_length) void TEST_join(JOIN *join) { - uint i,ref; + uint ref; + int i; + List_iterator<JOIN_TAB_RANGE> it(join->join_tab_ranges); + JOIN_TAB_RANGE *jt_range; DBUG_ENTER("TEST_join"); - /* - Assemble results of all the calls to full_name() first, - in order not to garble the tabular output below. - */ - String ref_key_parts[MAX_TABLES]; - for (i= 0; i < join->tables; i++) - { - JOIN_TAB *tab= join->join_tab + i; - for (ref= 0; ref < tab->ref.key_parts; ref++) - { - ref_key_parts[i].append(tab->ref.items[ref]->full_name()); - ref_key_parts[i].append(" "); - } - } - DBUG_LOCK_FILE; VOID(fputs("\nInfo about JOIN\n",DBUG_FILE)); - for (i=0 ; i < join->tables ; i++) + + while ((jt_range= it++)) { - JOIN_TAB *tab=join->join_tab+i; - TABLE *form=tab->table; - char key_map_buff[128]; - fprintf(DBUG_FILE,"%-16.16s type: %-7s q_keys: %s refs: %d key: %d len: %d\n", - form->alias.c_ptr(), - join_type_str[tab->type], - tab->keys.print(key_map_buff), - tab->ref.key_parts, - tab->ref.key, - tab->ref.key_length); - if (tab->select) + /* + Assemble results of all the calls to full_name() first, + in order not to garble the tabular output below. + */ + String ref_key_parts[MAX_TABLES]; + int tables_in_range= jt_range->end - jt_range->start; + for (i= 0; i < tables_in_range; i++) { - char buf[MAX_KEY/8+1]; - if (tab->use_quick == 2) - fprintf(DBUG_FILE, - " quick select checked for each record (keys: %s)\n", - tab->select->quick_keys.print(buf)); - else if (tab->select->quick) + JOIN_TAB *tab= jt_range->start + i; + for (ref= 0; ref < tab->ref.key_parts; ref++) { - fprintf(DBUG_FILE, " quick select used:\n"); - tab->select->quick->dbug_dump(18, FALSE); + ref_key_parts[i].append(tab->ref.items[ref]->full_name()); + ref_key_parts[i].append(" "); } - else - VOID(fputs(" select used\n",DBUG_FILE)); } - if (tab->ref.key_parts) + + for (i= 0; i < tables_in_range; i++) { - fprintf(DBUG_FILE, + JOIN_TAB *tab= jt_range->start + i; + TABLE *form=tab->table; + char key_map_buff[128]; + fprintf(DBUG_FILE,"%-16.16s type: %-7s q_keys: %s refs: %d key: %d len: %d\n", + form->alias.c_ptr(), + join_type_str[tab->type], + tab->keys.print(key_map_buff), + tab->ref.key_parts, + tab->ref.key, + tab->ref.key_length); + if (tab->select) + { + char buf[MAX_KEY/8+1]; + if (tab->use_quick == 2) + fprintf(DBUG_FILE, + " quick select checked for each record (keys: %s)\n", + tab->select->quick_keys.print(buf)); + else if (tab->select->quick) + { + fprintf(DBUG_FILE, " quick select used:\n"); + tab->select->quick->dbug_dump(18, FALSE); + } + else + VOID(fputs(" select used\n",DBUG_FILE)); + } + if (tab->ref.key_parts) + { + fprintf(DBUG_FILE, " refs: %s\n", ref_key_parts[i].c_ptr_safe()); + } } + VOID(fputs("\n",DBUG_FILE)); } DBUG_UNLOCK_FILE; DBUG_VOID_RETURN; diff --git a/sql/sql_union.cc b/sql/sql_union.cc index a94ad9f3b4b..6897411c036 100644 --- a/sql/sql_union.cc +++ b/sql/sql_union.cc @@ -690,7 +690,8 @@ bool st_select_lex_unit::cleanup() if ((join= fake_select_lex->join)) { join->tables_list= 0; - join->tables= 0; + join->table_count= 0; + join->top_join_tab_count= 0; } error|= fake_select_lex->cleanup(); /* diff --git a/sql/table.cc b/sql/table.cc index 41fb9a1c8b4..1238bec283f 100644 --- a/sql/table.cc +++ b/sql/table.cc @@ -5340,6 +5340,19 @@ bool st_table::is_children_attached(void) (parent && parent->children_attached)); } + +/* + Return TRUE if the table is filled at execution phase + + (and so, the optimizer must not do anything that depends on the contents of + the table, like range analysis or constant table detection) +*/ + +bool st_table::is_filled_at_execution() +{ + return test(pos_in_table_list->jtbm_subselect); +} + /* Cleanup this table for re-execution. diff --git a/sql/table.h b/sql/table.h index d43f5c85849..5d414278b02 100644 --- a/sql/table.h +++ b/sql/table.h @@ -967,6 +967,12 @@ struct st_table { file->extra(HA_EXTRA_KEYREAD); DBUG_VOID_RETURN; } + /* + Returns TRUE if the table is filled at execution phase (and so, the + optimizer must not do anything that depends on the contents of the table, + like range analysis or constant table detection) + */ + bool is_filled_at_execution(); inline void disable_keyread() { DBUG_ENTER("disable_keyread"); @@ -1195,7 +1201,7 @@ class Item_in_subselect; 1) table (TABLE_LIST::view == NULL) - base table (TABLE_LIST::derived == NULL) - - subquery - TABLE_LIST::table is a temp table + - FROM-clause subquery - TABLE_LIST::table is a temp table (TABLE_LIST::derived != NULL) - information schema table (TABLE_LIST::schema_table != NULL) @@ -1214,6 +1220,8 @@ class Item_in_subselect; (TABLE_LIST::natural_join != NULL) - JOIN ... USING (TABLE_LIST::join_using_fields != NULL) + - semi-join nest (sj_on_expr!= NULL && sj_subq_pred!=NULL) + 4) jtbm semi-join (jtbm_subselect != NULL) */ class Index_hint; @@ -1256,8 +1264,14 @@ struct TABLE_LIST */ table_map sj_inner_tables; /* Number of IN-compared expressions */ - uint sj_in_exprs; + uint sj_in_exprs; + + /* If this is a non-jtbm semi-join nest: corresponding subselect predicate */ Item_in_subselect *sj_subq_pred; + + /* If this is a jtbm semi-join object: corresponding subselect predicate */ + Item_in_subselect *jtbm_subselect; + SJ_MATERIALIZATION_INFO *sj_mat_info; /* |