diff options
author | Sergey Petrunya <psergey@askmonty.org> | 2010-04-06 00:16:45 +0400 |
---|---|---|
committer | Sergey Petrunya <psergey@askmonty.org> | 2010-04-06 00:16:45 +0400 |
commit | 2775f80f7d287cb0ed478543bf135b9399f56d66 (patch) | |
tree | a7cdf4eb98620b2921959eb94428dc3a02aac7c0 /sql | |
parent | cb325eb2b2f738b63d162fb0d46cf335e4ae84a4 (diff) | |
download | mariadb-git-2775f80f7d287cb0ed478543bf135b9399f56d66.tar.gz |
MWL#90: Subqueries: Inside-out execution for non-semijoin materialized
subqueries that are AND-parts of the WHERE
- First code (needs cleanup).
Diffstat (limited to 'sql')
-rw-r--r-- | sql/item_subselect.cc | 23 | ||||
-rw-r--r-- | sql/item_subselect.h | 21 | ||||
-rw-r--r-- | sql/opt_range.cc | 2 | ||||
-rw-r--r-- | sql/opt_subselect.cc | 295 | ||||
-rw-r--r-- | sql/sql_base.cc | 11 | ||||
-rw-r--r-- | sql/sql_join_cache.cc | 5 | ||||
-rw-r--r-- | sql/sql_select.cc | 109 | ||||
-rw-r--r-- | sql/sql_select.h | 12 | ||||
-rw-r--r-- | sql/table.h | 1 |
9 files changed, 444 insertions, 35 deletions
diff --git a/sql/item_subselect.cc b/sql/item_subselect.cc index 741cd2e3e85..1a962f02779 100644 --- a/sql/item_subselect.cc +++ b/sql/item_subselect.cc @@ -468,6 +468,12 @@ bool Item_subselect::exec() return (res); } +int Item_subselect::optimize() +{ + int res; + res= engine->optimize(); + return res; +} /* Compute the IN predicate if the left operand's cache changed. @@ -2088,7 +2094,7 @@ void Item_in_subselect::update_used_tables() @retval FALSE an execution method was chosen successfully */ -bool Item_in_subselect::setup_engine() +bool Item_in_subselect::setup_engine(bool dont_switch_arena) { subselect_hash_sj_engine *new_engine= NULL; bool res= FALSE; @@ -2103,7 +2109,7 @@ bool Item_in_subselect::setup_engine() old_engine= (subselect_single_select_engine*) engine; - if (arena->is_conventional()) + if (arena->is_conventional() || dont_switch_arena) arena= 0; else thd->set_n_backup_active_arena(arena, &backup); @@ -3457,7 +3463,7 @@ subselect_hash_sj_engine::get_strategy_using_schema() bitmap_set_bit(&partial_match_key_parts, i); ++count_partial_match_columns; } - } + }; } /* If no column contains NULLs use regular hash index lookups. */ @@ -3970,6 +3976,17 @@ void subselect_hash_sj_engine::cleanup() } +int subselect_hash_sj_engine::optimize() +{ + int res= 0; + SELECT_LEX *save_select= thd->lex->current_select; + thd->lex->current_select= materialize_join->select_lex; + res= materialize_join->optimize(); + thd->lex->current_select= save_select; + + return res; +} + /** Execute a subquery IN predicate via materialization. diff --git a/sql/item_subselect.h b/sql/item_subselect.h index e538f02d80a..5414c2b6bb9 100644 --- a/sql/item_subselect.h +++ b/sql/item_subselect.h @@ -52,9 +52,9 @@ public: public: /* unit of subquery */ st_select_lex_unit *unit; -protected: /* engine that perform execution of subselect (single select or union) */ subselect_engine *engine; +protected: /* old engine if engine was changed */ subselect_engine *old_engine; /* cache of used external tables */ @@ -139,6 +139,7 @@ public: bool mark_as_dependent(THD *thd, st_select_lex *select, Item *item); void fix_after_pullout(st_select_lex *new_parent, Item **ref); void recalc_used_tables(st_select_lex *new_parent, bool after_pullout); + virtual int optimize(); virtual bool exec(); virtual void fix_length_and_dec(); table_map used_tables() const; @@ -333,7 +334,9 @@ protected: all JOIN in UNION */ Item *expr; +public: Item_in_optimizer *optimizer; +protected: bool was_null; bool abort_on_null; public: @@ -379,6 +382,10 @@ public: }; enum_exec_method exec_method; + /* JTBM: temporary measure to tell JTBM predicates from SJ predicates */ + bool convert_to_semi_join; + double startup_cost; + bool *get_cond_guard(int i) { return pushed_cond_guards ? pushed_cond_guards + i : NULL; @@ -428,7 +435,7 @@ public: bool fix_fields(THD *thd, Item **ref); void fix_after_pullout(st_select_lex *new_parent, Item **ref); void update_used_tables(); - bool setup_engine(); + bool setup_engine(bool dont_switch_arena); bool init_left_expr_cache(); /* Inform 'this' that it was computed, and contains a valid result. */ void set_first_execution() { if (first_execution) first_execution= FALSE; } @@ -502,6 +509,7 @@ public: THD * get_thd() { return thd; } virtual int prepare()= 0; virtual void fix_length_and_dec(Item_cache** row)= 0; + virtual int optimize() { DBUG_ASSERT(0); return 0; } /* Execute the engine @@ -734,7 +742,7 @@ inline bool Item_subselect::is_uncacheable() const class subselect_hash_sj_engine : public subselect_engine { -protected: +public: /* The table into which the subquery is materialized. */ TABLE *tmp_table; /* TRUE if the subquery was materialized into a temp table. */ @@ -746,14 +754,16 @@ protected: of subselect_single_select_engine::[prepare | cols]. */ subselect_single_select_engine *materialize_engine; +protected: /* The engine used to compute the IN predicate. */ subselect_engine *lookup_engine; /* QEP to execute the subquery and materialize its result into a temporary table. Created during the first call to exec(). */ +public: JOIN *materialize_join; - +protected: /* Keyparts of the only non-NULL composite index in a rowid merge. */ MY_BITMAP non_null_key_parts; /* Keyparts of the single column indexes with NULL, one keypart per index. */ @@ -766,7 +776,9 @@ protected: IN results because index lookups sometimes match values that are actually not equal to the search key in SQL terms. */ +public: Item_cond_and *semi_join_conds; +protected: /* Possible execution strategies that can be used to compute hash semi-join.*/ enum exec_strategy { UNDEFINED, @@ -806,6 +818,7 @@ public: bool init_runtime(); void cleanup(); int prepare() { return 0; } /* Override virtual function in base class. */ + int optimize(); int exec(); virtual void print(String *str, enum_query_type query_type); uint cols() diff --git a/sql/opt_range.cc b/sql/opt_range.cc index 27ecdea9568..c6aea650f5d 100644 --- a/sql/opt_range.cc +++ b/sql/opt_range.cc @@ -2285,7 +2285,7 @@ int SQL_SELECT::test_quick_select(THD *thd, key_map keys_to_use, quick=0; needed_reg.clear_all(); quick_keys.clear_all(); - if (keys_to_use.is_clear_all()) + if (keys_to_use.is_clear_all() || head->pos_in_table_list->jtbm_subselect) DBUG_RETURN(0); records= head->file->stats.records; if (!records) diff --git a/sql/opt_subselect.cc b/sql/opt_subselect.cc index 1855224440c..8bf263da17e 100644 --- a/sql/opt_subselect.cc +++ b/sql/opt_subselect.cc @@ -25,6 +25,8 @@ static bool replace_where_subcondition(JOIN *join, Item **expr, static int subq_sj_candidate_cmp(Item_in_subselect* const *el1, Item_in_subselect* const *el2); static bool convert_subq_to_sj(JOIN *parent_join, Item_in_subselect *subq_pred); +static bool convert_subq_to_jtbm(JOIN *parent_join, + Item_in_subselect *subq_pred, bool *remove); static TABLE_LIST *alloc_join_nest(THD *thd); static void fix_list_after_tbl_changes(SELECT_LEX *new_parent, List<TABLE_LIST> *tlist); @@ -166,6 +168,7 @@ int check_and_do_in_subquery_rewrites(JOIN *join) (void)subquery_types_allow_materialization(in_subs); in_subs->emb_on_expr_nest= thd->thd_marker.emb_on_expr_nest; + in_subs->convert_to_semi_join= TRUE; //JTBM /* Register the subquery for further processing in flatten_subqueries() */ select_lex-> @@ -220,10 +223,21 @@ int check_and_do_in_subquery_rewrites(JOIN *join) (in_subs->is_top_level_item() || optimizer_flag(thd, OPTIMIZER_SWITCH_PARTIAL_MATCH_ROWID_MERGE) || optimizer_flag(thd, OPTIMIZER_SWITCH_PARTIAL_MATCH_TABLE_SCAN)) &&//4 - !in_subs->is_correlated && // 5 - in_subs->exec_method == Item_in_subselect::NOT_TRANSFORMED) // 6 + !in_subs->is_correlated) // 5 { + if (in_subs->exec_method == Item_in_subselect::NOT_TRANSFORMED) in_subs->exec_method= Item_in_subselect::MATERIALIZATION; + + // psergey-jtbm: "if we're top-level, register for + // conversion-to-join-tab". + if (in_subs->exec_method == Item_in_subselect::MATERIALIZATION && + thd->thd_marker.emb_on_expr_nest == (TABLE_LIST*)0x1) + { + in_subs->emb_on_expr_nest= thd->thd_marker.emb_on_expr_nest; + in_subs->convert_to_semi_join= FALSE; //JTBM + select_lex->outer_select()-> + join->sj_subselects.append(thd->mem_root, in_subs);//JTBM + } } Item_subselect::trans_res trans_res; @@ -338,6 +352,60 @@ bool subquery_types_allow_materialization(Item_in_subselect *in_subs) } +static bool make_in_exists_conversion(THD *thd, JOIN *join, Item_in_subselect *item) +{ + DBUG_ENTER("make_in_exists_conversion"); + JOIN *child_join= item->unit->first_select()->join; + Item_subselect::trans_res res; + item->changed= 0; + item->fixed= 0; + + SELECT_LEX *save_select_lex= thd->lex->current_select; + thd->lex->current_select= item->unit->first_select(); + + res= item->select_transformer(child_join); + + thd->lex->current_select= save_select_lex; + + if (res == Item_subselect::RES_ERROR) + DBUG_RETURN(TRUE); + + item->changed= 1; + item->fixed= 1; + + Item *substitute= item->substitution; + bool do_fix_fields= !item->substitution->fixed; + Item **tree= (item->emb_on_expr_nest == (TABLE_LIST*)1)? + &join->conds : &(item->emb_on_expr_nest->on_expr); + + Item *replace_me= item; + /* + JTBM: the subquery was already mapped with Item_in_optimizer, so we + should search for that, not for original Item_in_subselect. + TODO: what about delaying that rewrite until here? + */ + if (!item->convert_to_semi_join) + { + replace_me= item->optimizer; + } + + if (replace_where_subcondition(join, tree, replace_me, substitute, + do_fix_fields)) + DBUG_RETURN(TRUE); + item->substitution= NULL; + + if (!thd->stmt_arena->is_conventional()) + { + tree= (item->emb_on_expr_nest == (TABLE_LIST*)1)? + &join->select_lex->prep_where : + &(item->emb_on_expr_nest->prep_on_expr); + + if (replace_where_subcondition(join, tree, replace_me, substitute, + FALSE)) + DBUG_RETURN(TRUE); + } + DBUG_RETURN(FALSE); +} /* Convert semi-join subquery predicates into semi-join join nests @@ -445,6 +513,7 @@ bool convert_join_subqueries_to_semijoins(JOIN *join) // #tables-in-parent-query + #tables-in-subquery < MAX_TABLES /* Replace all subqueries to be flattened with Item_int(1) */ arena= thd->activate_stmt_arena_if_needed(&backup); +#if 0 for (in_subq= join->sj_subselects.front(); in_subq != in_subq_end && join->tables + (*in_subq)->unit->first_select()->join->tables < MAX_TABLES; @@ -452,18 +521,58 @@ bool convert_join_subqueries_to_semijoins(JOIN *join) { Item **tree= ((*in_subq)->emb_on_expr_nest == (TABLE_LIST*)1)? &join->conds : &((*in_subq)->emb_on_expr_nest->on_expr); - if (replace_where_subcondition(join, tree, *in_subq, new Item_int(1), + Item *replace_me= *in_subq; + /* + JTBM: the subquery was already mapped with Item_in_optimizer, so we + should search for that, not for original Item_in_subselect. + TODO: what about delaying that rewrite until here? + */ + if (!(*in_subq)->convert_to_semi_join) + { + replace_me= (*in_subq)->optimizer; + } + if (replace_where_subcondition(join, tree, replace_me, new Item_int(1), FALSE)) DBUG_RETURN(TRUE); /* purecov: inspected */ } +#endif for (in_subq= join->sj_subselects.front(); in_subq != in_subq_end && join->tables + (*in_subq)->unit->first_select()->join->tables < MAX_TABLES; in_subq++) { - if (convert_subq_to_sj(join, *in_subq)) - DBUG_RETURN(TRUE); + bool remove_item= TRUE; + //psergey-jtbm: todo: here: check if we should convert to semi-join or + // to JTBM nest. + if ((*in_subq)->convert_to_semi_join) + { + if (convert_subq_to_sj(join, *in_subq)) + DBUG_RETURN(TRUE); + } + else + { + if (convert_subq_to_jtbm(join, *in_subq, &remove_item)) + DBUG_RETURN(TRUE); + } + if (remove_item) + { + Item **tree= ((*in_subq)->emb_on_expr_nest == (TABLE_LIST*)1)? + &join->conds : &((*in_subq)->emb_on_expr_nest->on_expr); + Item *replace_me= *in_subq; + /* + JTBM: the subquery was already mapped with Item_in_optimizer, so we + should search for that, not for original Item_in_subselect. + TODO: what about delaying that rewrite until here? + */ + if (!(*in_subq)->convert_to_semi_join) + { + replace_me= (*in_subq)->optimizer; + } + if (replace_where_subcondition(join, tree, replace_me, new Item_int(1), + FALSE)) + DBUG_RETURN(TRUE); /* purecov: inspected */ + } } skip_conversion: /* @@ -494,7 +603,19 @@ skip_conversion: bool do_fix_fields= !(*in_subq)->substitution->fixed; Item **tree= ((*in_subq)->emb_on_expr_nest == (TABLE_LIST*)1)? &join->conds : &((*in_subq)->emb_on_expr_nest->on_expr); - if (replace_where_subcondition(join, tree, *in_subq, substitute, + + Item *replace_me= *in_subq; + /* + JTBM: the subquery was already mapped with Item_in_optimizer, so we + should search for that, not for original Item_in_subselect. + TODO: what about delaying that rewrite until here? + */ + if (!(*in_subq)->convert_to_semi_join) + { + replace_me= (*in_subq)->optimizer; + } + + if (replace_where_subcondition(join, tree, replace_me, substitute, do_fix_fields)) DBUG_RETURN(TRUE); (*in_subq)->substitution= NULL; @@ -505,7 +626,7 @@ skip_conversion: &join->select_lex->prep_where : &((*in_subq)->emb_on_expr_nest->prep_on_expr); - if (replace_where_subcondition(join, tree, *in_subq, substitute, + if (replace_where_subcondition(join, tree, replace_me, substitute, FALSE)) DBUG_RETURN(TRUE); } @@ -517,6 +638,36 @@ skip_conversion: DBUG_RETURN(FALSE); } + +void get_temptable_params(Item_in_subselect *item, ha_rows *out_rows, + ha_rows *scan_time) +{ + item->optimize(); + + DBUG_ASSERT(item->engine->engine_type() == + subselect_engine::HASH_SJ_ENGINE); + + subselect_hash_sj_engine *hash_sj_engine= + ((subselect_hash_sj_engine*)item->engine); + JOIN *join= hash_sj_engine->materialize_join; + + double rows= 1; + double read_time= 0.0; + + /* Calculate #rows and cost of join execution */ + for (uint i= join->const_tables; i < join->tables; i++) + { + rows *= join->best_positions[i].records_read; + read_time += join->best_positions[i].read_time; + } + *out_rows= rows; + item->startup_cost= read_time; + /* Calculate cost of scanning the temptable */ + double data_size= rows * hash_sj_engine->tmp_table->s->reclength; + /* Do like in handler::read_time */ + *scan_time= data_size/IO_SIZE + 2; +} + /** @brief Replaces an expression destructively inside the expression tree of the WHERE clase. @@ -887,6 +1038,114 @@ static bool convert_subq_to_sj(JOIN *parent_join, Item_in_subselect *subq_pred) DBUG_RETURN(FALSE); } + +static bool convert_subq_to_jtbm(JOIN *parent_join, + Item_in_subselect *subq_pred, + bool *remove_item) +{ + SELECT_LEX *parent_lex= parent_join->select_lex; + List<TABLE_LIST> *emb_join_list= &parent_lex->top_join_list; + TABLE_LIST *emb_tbl_nest= NULL; // will change when we learn to handle outer joins + TABLE_LIST *tl;//, *last_leaf; + DBUG_ENTER("convert_subq_to_jtbm"); + + if (subq_pred->setup_engine(TRUE)) + DBUG_RETURN(TRUE); + + if (subq_pred->engine->engine_type() != subselect_engine::HASH_SJ_ENGINE) + { + *remove_item= FALSE; + make_in_exists_conversion(parent_join->thd, parent_join, subq_pred); + DBUG_RETURN(FALSE); + } + *remove_item= TRUE; + + TABLE_LIST *jtbm; + char *tbl_alias; + const char alias_mask[]="SUBQUERY#%d"; + if (!(tbl_alias= (char*)parent_join->thd->calloc(sizeof(alias_mask)+5)) || + !(jtbm= alloc_join_nest(parent_join->thd))) //todo: this is not a join nest! + { + DBUG_RETURN(TRUE); + } + + jtbm->join_list= emb_join_list; + jtbm->embedding= emb_tbl_nest; + jtbm->alias= (char*)"(jtbm)"; + jtbm->jtbm_subselect= subq_pred; + jtbm->nested_join= NULL; + + /* Nests do not participate in those 'chains', so: */ + /* jtbm->next_leaf= jtbm->next_local= jtbm->next_global == NULL*/ + emb_join_list->push_back(jtbm); + + /* Inject ourselves into next-leaf list */ + /* + JTBM: Inject us into next_leaf and lext_local chains.. + so that make_join_statistics et al find us. + */ + + /* + Reconnect the next_leaf chain. + TODO: Do we have to put subquery's tables at the end of the chain? + Inserting them at the beginning would be a bit faster. + NOTE: We actually insert them at the front! That's because the order is + reversed in this list. + */ + for (tl= parent_lex->leaf_tables; tl->next_leaf; tl= tl->next_leaf) ; + tl->next_leaf= jtbm; + + /* + Same as above for next_local chain + (a theory: a next_local chain always starts with ::leaf_tables + because view's tables are inserted after the view) + */ + for (tl= parent_lex->leaf_tables; tl->next_local; tl= tl->next_local) ; + tl->next_local= jtbm; + + /* A theory: no need to re-connect the next_global chain */ + + subselect_hash_sj_engine *hash_sj_engine= + ((subselect_hash_sj_engine*)subq_pred->engine); + jtbm->table= hash_sj_engine->tmp_table; + + jtbm->table->tablenr= parent_join->tables; + jtbm->table->map= table_map(1) << (parent_join->tables); + + parent_join->tables++; + + Item *conds= hash_sj_engine->semi_join_conds; + conds->fix_after_pullout(parent_lex, &conds); + + DBUG_EXECUTE("where", print_where(conds,"SJ-EXPR", QT_ORDINARY);); + + my_snprintf(tbl_alias, sizeof(alias_mask)+5, alias_mask, + hash_sj_engine->materialize_join->select_lex->select_number); + jtbm->alias= tbl_alias; + + /* Inject sj_on_expr into the parent's WHERE or ON */ + if (emb_tbl_nest) + { + DBUG_ASSERT(0); + /*emb_tbl_nest->on_expr= and_items(emb_tbl_nest->on_expr, + sj_nest->sj_on_expr); + emb_tbl_nest->on_expr->fix_fields(parent_join->thd, &emb_tbl_nest->on_expr); + */ + } + else + { + /* Inject into the WHERE */ + parent_join->conds= and_items(parent_join->conds, conds); + parent_join->conds->fix_fields(parent_join->thd, &parent_join->conds); + parent_join->select_lex->where= parent_join->conds; + } + + /* Don't unlink the child, the subquery is still there and used */ + + DBUG_RETURN(FALSE); +} + + static TABLE_LIST *alloc_join_nest(THD *thd) { TABLE_LIST *tbl; @@ -3503,4 +3762,26 @@ static void remove_subq_pushed_predicates(JOIN *join, Item **where) } } +int do_jtbm_materialization_if_needed(JOIN_TAB *tab) +{ + Item_in_subselect *in_subs; + if (tab->table->pos_in_table_list && + (in_subs= tab->table->pos_in_table_list->jtbm_subselect)) + { + DBUG_ASSERT(in_subs->engine->engine_type() == + subselect_engine::HASH_SJ_ENGINE); + + subselect_hash_sj_engine *hash_sj_engine= + ((subselect_hash_sj_engine*)in_subs->engine); + if (!hash_sj_engine->is_materialized) + { + hash_sj_engine->materialize_join->exec(); + hash_sj_engine->is_materialized= TRUE; + + if (hash_sj_engine->materialize_join->error || tab->join->thd->is_fatal_error) + return 1; + } + } + return 0; +} diff --git a/sql/sql_base.cc b/sql/sql_base.cc index 84aa162241b..a2f7cf85b1c 100644 --- a/sql/sql_base.cc +++ b/sql/sql_base.cc @@ -7661,6 +7661,17 @@ bool setup_tables(THD *thd, Name_resolution_context *context, if (res) DBUG_RETURN(1); } + if (table_list->jtbm_subselect) + { + Item *item= table_list->jtbm_subselect; + if (item->fix_fields(thd, &item)) + { + my_error(ER_TOO_MANY_TABLES,MYF(0),MAX_TABLES); + DBUG_RETURN(1); + } + DBUG_ASSERT(item == table_list->jtbm_subselect); + table_list->jtbm_subselect->setup_engine(FALSE); + } } /* Precompute and store the row types of NATURAL/USING joins. */ diff --git a/sql/sql_join_cache.cc b/sql/sql_join_cache.cc index d88cc7a9f7f..ee863f5ca74 100644 --- a/sql/sql_join_cache.cc +++ b/sql/sql_join_cache.cc @@ -33,6 +33,7 @@ #define NO_MORE_RECORDS_IN_BUFFER (uint)(-1) +int do_jtbm_materialization_if_needed(JOIN_TAB *tab); /***************************************************************************** * Join cache module @@ -1778,6 +1779,10 @@ enum_nested_loop_state JOIN_CACHE_BNL::join_matching_records(bool skip_last) } /* Start retrieving all records of the joined table */ + + //jtbm-todo: error handling! + do_jtbm_materialization_if_needed(join_tab); + if ((error= join_init_read_record(join_tab))) { rc= error < 0 ? NESTED_LOOP_NO_MORE_ROWS: NESTED_LOOP_ERROR; diff --git a/sql/sql_select.cc b/sql/sql_select.cc index bc81628f680..36e5d11d81a 100644 --- a/sql/sql_select.cc +++ b/sql/sql_select.cc @@ -243,6 +243,10 @@ join_read_record_no_init(JOIN_TAB *tab); Item_equal *find_item_equal(COND_EQUAL *cond_equal, Field *field, bool *inherited_fl); +void get_temptable_params(Item_in_subselect *item, ha_rows *out_rows, + ha_rows *scan_time); +int do_jtbm_materialization_if_needed(JOIN_TAB *tab); + /** This handles SELECT with and without UNION. */ @@ -2445,7 +2449,7 @@ bool JOIN::setup_subquery_materialization() { Item_in_subselect *in_subs= (Item_in_subselect*) subquery_predicate; if (in_subs->exec_method == Item_in_subselect::MATERIALIZATION && - in_subs->setup_engine()) + in_subs->setup_engine(FALSE)) return TRUE; } } @@ -2531,9 +2535,10 @@ make_join_statistics(JOIN *join, TABLE_LIST *tables_arg, COND *conds, DBUG_ENTER("make_join_statistics"); table_count=join->tables; - stat=(JOIN_TAB*) join->thd->calloc(sizeof(JOIN_TAB)*table_count); + + stat=(JOIN_TAB*) join->thd->calloc(sizeof(JOIN_TAB)*(table_count)); stat_ref=(JOIN_TAB**) join->thd->alloc(sizeof(JOIN_TAB*)*MAX_TABLES); - table_vector=(TABLE**) join->thd->alloc(sizeof(TABLE*)*(table_count*2)); + table_vector=(TABLE**) join->thd->alloc(sizeof(TABLE*)*((table_count)*2)); if (!stat || !stat_ref || !table_vector) DBUG_RETURN(1); // Eom /* purecov: inspected */ @@ -2542,7 +2547,7 @@ make_join_statistics(JOIN *join, TABLE_LIST *tables_arg, COND *conds, stat_end=stat+table_count; found_const_table_map= all_table_map=0; const_count=0; - + for (s= stat, i= 0; tables; s++, tables= tables->next_leaf, i++) @@ -2565,7 +2570,7 @@ make_join_statistics(JOIN *join, TABLE_LIST *tables_arg, COND *conds, table->reginfo.join_tab=s; table->reginfo.not_exists_optimize=0; bzero((char*) table->const_key_parts, sizeof(key_part_map)*table->s->keys); - all_table_map|= table->map; + all_table_map|= s->table->map; s->join=join; s->info=0; // For describe @@ -2574,15 +2579,17 @@ make_join_statistics(JOIN *join, TABLE_LIST *tables_arg, COND *conds, if (tables->schema_table) table->file->stats.records= 2; table->quick_condition_rows= table->file->stats.records; - + s->on_expr_ref= &tables->on_expr; if (*s->on_expr_ref) { /* s is the only inner table of an outer join */ #ifdef WITH_PARTITION_STORAGE_ENGINE - if ((!table->file->stats.records || table->no_partitions_used) && !embedding) + if (!table->pos_in_table_list->jtbm_subselect && + (!table->file->stats.records || table->no_partitions_used) && !embedding) #else - if (!table->file->stats.records && !embedding) + if (!table->pos_in_table_list->jtbm_subselect && + !table->file->stats.records && !embedding) #endif { // Empty table s->dependent= 0; // Ignore LEFT JOIN depend. @@ -2616,7 +2623,8 @@ make_join_statistics(JOIN *join, TABLE_LIST *tables_arg, COND *conds, #else const bool no_partitions_used= FALSE; #endif - if ((table->s->system || table->file->stats.records <= 1 || + if (!table->pos_in_table_list->jtbm_subselect && + (table->s->system || table->file->stats.records <= 1 || no_partitions_used) && !s->dependent && (table->file->ha_table_flags() & HA_STATS_RECORDS_IS_EXACT) && @@ -2626,6 +2634,8 @@ make_join_statistics(JOIN *join, TABLE_LIST *tables_arg, COND *conds, no_rows_const_tables |= table->map; } } + //psergey-todo: inject jtbm JOIN_TABS here. + stat_vector[i]=0; join->outer_join=outer_join; @@ -2714,6 +2724,9 @@ make_join_statistics(JOIN *join, TABLE_LIST *tables_arg, COND *conds, { table=s->table; + if (table->pos_in_table_list->jtbm_subselect) + continue; + /* If equi-join condition by a key is null rejecting and after a substitution of a const table the key value happens to be null @@ -2873,8 +2886,21 @@ make_join_statistics(JOIN *join, TABLE_LIST *tables_arg, COND *conds, continue; } /* Approximate found rows and time to read them */ - s->found_records=s->records=s->table->file->stats.records; - s->read_time=(ha_rows) s->table->file->scan_time(); + + if (s->table->pos_in_table_list->jtbm_subselect) + { + get_temptable_params(s->table->pos_in_table_list->jtbm_subselect, + &s->records, + &s->read_time); + s->found_records= s->records; + table->quick_condition_rows=s->records; + } + else + { + s->found_records=s->records=s->table->file->stats.records; + s->read_time=(ha_rows) s->table->file->scan_time(); + } + /* Set a max range of how many seeks we can expect when using keys @@ -2897,10 +2923,11 @@ make_join_statistics(JOIN *join, TABLE_LIST *tables_arg, COND *conds, Don't do range analysis if we're on the inner side of an outer join (2). Do range analysis if we're on the inner side of a semi-join (3). */ - if (!s->const_keys.is_clear_all() && // (1) - (!s->table->pos_in_table_list->embedding || // (2) - (s->table->pos_in_table_list->embedding && // (3) - s->table->pos_in_table_list->embedding->sj_on_expr))) // (3) + if (!s->const_keys.is_clear_all() && // (1) + (!s->table->pos_in_table_list->embedding || // (2) + (s->table->pos_in_table_list->embedding && // (3) + s->table->pos_in_table_list->embedding->sj_on_expr)) && // (3) + !s->table->pos_in_table_list->jtbm_subselect) { ha_rows records; SQL_SELECT *select; @@ -4265,6 +4292,8 @@ best_access_path(JOIN *join, double tmp; ha_rows rec; bool best_uses_jbuf= FALSE; + Item_in_subselect* jtbm_subselect= s->table->pos_in_table_list->jtbm_subselect; + bool jtbm_ref_used= FALSE; Loose_scan_opt loose_scan_opt; DBUG_ENTER("best_access_path"); @@ -4617,8 +4646,9 @@ best_access_path(JOIN *join, else tmp= best_time; // Do nothing } + if (jtbm_subselect) + tmp += jtbm_subselect->startup_cost; loose_scan_opt.check_ref_access_part2(key, start_key, records, tmp); - } /* not ft_key */ if (tmp < best_time - records/(double) TIME_FOR_COMPARE) { @@ -4628,6 +4658,8 @@ best_access_path(JOIN *join, best_key= start_key; best_max_key_part= max_key_part; best_ref_depends_map= found_ref; + if (jtbm_subselect) + jtbm_ref_used= TRUE; } } /* for each key */ records= best_records; @@ -4666,7 +4698,8 @@ best_access_path(JOIN *join, best_max_key_part >= s->table->quick_key_parts[best_key->key]) &&// (2) !((s->table->file->ha_table_flags() & HA_TABLE_SCAN_ON_INDEX) && // (3) ! s->table->covering_keys.is_clear_all() && best_key && !s->quick) &&// (3) - !(s->table->force_index && best_key && !s->quick)) // (4) + !(s->table->force_index && best_key && !s->quick) && // (4) + !jtbm_ref_used) { // Check full join ha_rows rnd_records= s->found_records; /* @@ -4714,7 +4747,11 @@ best_access_path(JOIN *join, else { /* Estimate cost of reading table. */ - tmp= s->table->file->scan_time(); + if (jtbm_subselect) + tmp= s->read_time; + else + tmp= s->table->file->scan_time(); + if ((s->table->map & join->outer_join) || disable_jbuf) // Can't use join cache { /* @@ -4743,6 +4780,8 @@ best_access_path(JOIN *join, } } + if (jtbm_subselect) + tmp += jtbm_subselect->startup_cost; /* We estimate the cost of evaluating WHERE clause for found records as record_count * rnd_records / TIME_FOR_COMPARE. This cost plus @@ -7684,6 +7723,12 @@ void JOIN_TAB::cleanup() table->file->extra(HA_EXTRA_NO_KEYREAD); } table->file->ha_index_or_rnd_end(); + //psergey-jtbm2: + if (table->pos_in_table_list && + table->pos_in_table_list->jtbm_subselect) + { + table->pos_in_table_list->jtbm_subselect->cleanup(); + } /* We need to reset this for next select (Tested in part_of_refkey) @@ -11396,6 +11441,8 @@ create_tmp_table(THD *thd,TMP_TABLE_PARAM *param,List<Item> &fields, share->keys=1; share->uniques= test(using_unique_constraint); table->key_info= table->s->key_info= keyinfo; + table->keys_in_use_for_query.set_bit(0); + share->keys_in_use.set_bit(0); keyinfo->key_part=key_part_info; keyinfo->flags=HA_NOSAME; keyinfo->usable_key_parts=keyinfo->key_parts= param->group_parts; @@ -11411,6 +11458,8 @@ create_tmp_table(THD *thd,TMP_TABLE_PARAM *param,List<Item> &fields, bool maybe_null=(*cur_group->item)->maybe_null; key_part_info->null_bit=0; key_part_info->field= field; + if (cur_group == group) + field->key_start.set_bit(0); key_part_info->offset= field->offset(table->record[0]); key_part_info->length= (uint16) field->key_length(); key_part_info->type= (uint8) field->key_type(); @@ -11481,6 +11530,8 @@ create_tmp_table(THD *thd,TMP_TABLE_PARAM *param,List<Item> &fields, keyinfo->key_parts * sizeof(KEY_PART_INFO)))) goto err; bzero((void*) key_part_info, keyinfo->key_parts * sizeof(KEY_PART_INFO)); + table->keys_in_use_for_query.set_bit(0); + share->keys_in_use.set_bit(0); table->key_info= table->s->key_info= keyinfo; keyinfo->key_part=key_part_info; keyinfo->flags=HA_NOSAME | HA_NULL_ARE_EQUAL; @@ -11519,6 +11570,14 @@ create_tmp_table(THD *thd,TMP_TABLE_PARAM *param,List<Item> &fields, { key_part_info->null_bit=0; key_part_info->field= *reg_field; + //psergey-jtbm: + (*reg_field)->flags |= PART_KEY_FLAG; + if (key_part_info == keyinfo->key_part) + (*reg_field)->key_start.set_bit(0); + key_part_info->null_bit= (*reg_field)->null_bit; + key_part_info->null_offset= (uint) ((*reg_field)->null_ptr - + (uchar*) table->record[0]); + key_part_info->offset= (*reg_field)->offset(table->record[0]); key_part_info->length= (uint16) (*reg_field)->pack_length(); /* TODO: @@ -12804,7 +12863,9 @@ sub_select(JOIN *join,JOIN_TAB *join_tab,bool end_of_records) join_tab->last_inner->first_unmatched= join_tab; } join->thd->row_count= 0; - + + //jtbm-todo: error handling! + do_jtbm_materialization_if_needed(join_tab); error= (*join_tab->read_first_record)(join_tab); if (join_tab->keep_current_rowid) @@ -18169,7 +18230,7 @@ static void select_describe(JOIN *join, bool need_tmp_table, bool need_order, if (tab->select && tab->select->quick) examined_rows= tab->select->quick->records; else if (tab->type == JT_NEXT || tab->type == JT_ALL) - examined_rows= tab->limit ? tab->limit : tab->table->file->records(); + examined_rows= tab->limit ? tab->limit : tab->records; else examined_rows=(ha_rows)join->best_positions[i].records_read; @@ -18658,6 +18719,14 @@ void TABLE_LIST::print(THD *thd, table_map eliminated_tables, String *str, print_join(thd, eliminated_tables, str, &nested_join->join_list, query_type); str->append(')'); } + else if (jtbm_subselect) + { + str->append(STRING_WITH_LEN(" <materialize> (")); + subselect_hash_sj_engine *hash_engine; + hash_engine= (subselect_hash_sj_engine*)jtbm_subselect->engine; + hash_engine->materialize_engine->print(str, query_type); + str->append(')'); + } else { const char *cmp_name; // Name to compare with alias diff --git a/sql/sql_select.h b/sql/sql_select.h index bc2c1b0f2cf..a0a722b2fc8 100644 --- a/sql/sql_select.h +++ b/sql/sql_select.h @@ -1551,7 +1551,18 @@ public: bool union_part; ///< this subselect is part of union bool optimized; ///< flag to avoid double optimization in EXPLAIN + /* + Subqueries that will need to be converted to semi-join nests (the list + is emptied when conversion is done + */ Array<Item_in_subselect> sj_subselects; + + /* + Subqueries that will need to be converted to JOIN_TABs + (Note this is different from the above in the respect that it's part + of WHERE clause or something like that?) + */ + //Array<Item_in_subselect> jtbm_subselects; /* Temporary tables used to weed-out semi-join duplicates */ List<TABLE> sj_tmp_tables; @@ -1575,6 +1586,7 @@ public: JOIN(THD *thd_arg, List<Item> &fields_arg, ulonglong select_options_arg, select_result *result_arg) :fields_list(fields_arg), sj_subselects(thd_arg->mem_root, 4) + //jtbm_subselects(thd_arg->mem_root, 4) { init(thd_arg, fields_arg, select_options_arg, result_arg); } diff --git a/sql/table.h b/sql/table.h index 26d2de22cf5..5898c5bb12d 100644 --- a/sql/table.h +++ b/sql/table.h @@ -1194,6 +1194,7 @@ struct TABLE_LIST /* Number of IN-compared expressions */ uint sj_in_exprs; Item_in_subselect *sj_subq_pred; + Item_in_subselect *jtbm_subselect; SJ_MATERIALIZATION_INFO *sj_mat_info; /* |