diff options
Diffstat (limited to 'sql/item_subselect.cc')
-rw-r--r-- | sql/item_subselect.cc | 2781 |
1 files changed, 2689 insertions, 92 deletions
diff --git a/sql/item_subselect.cc b/sql/item_subselect.cc index 5cf585e1a56..53950b70e69 100644 --- a/sql/item_subselect.cc +++ b/sql/item_subselect.cc @@ -39,20 +39,18 @@ #include "sql_select.h" #include "sql_parse.h" // check_stack_overrun -inline Item * and_items(Item* cond, Item *item) -{ - return (cond? (new Item_cond_and(cond, item)) : item); -} Item_subselect::Item_subselect(): Item_result_field(), value_assigned(0), thd(0), substitution(0), - engine(0), old_engine(0), used_tables_cache(0), have_to_be_excluded(0), - const_item_cache(1), engine_changed(0), changed(0), is_correlated(FALSE) + expr_cache(0), engine(0), old_engine(0), used_tables_cache(0), + have_to_be_excluded(0), const_item_cache(1), inside_first_fix_fields(0), + done_first_fix_fields(FALSE), eliminated(FALSE), engine_changed(0), + changed(0), is_correlated(FALSE) { with_subselect= 1; reset(); /* - item value is NULL if select_subselect not changed this value + Item value is NULL if select_result_interceptor didn't change this value (i.e. some rows will be found returned) */ null_value= TRUE; @@ -60,7 +58,7 @@ Item_subselect::Item_subselect(): void Item_subselect::init(st_select_lex *select_lex, - select_subselect *result) + select_result_interceptor *result) { /* Please see Item_singlerow_subselect::invalidate_and_restore_select_lex(), @@ -102,6 +100,8 @@ void Item_subselect::init(st_select_lex *select_lex, SELECT_LEX *upper= unit->outer_select(); if (upper->parsing_place == IN_HAVING) upper->subquery_in_having= 1; + /* The subquery is an expression cache candidate */ + upper->expr_cache_may_be_used[upper->parsing_place]= TRUE; } DBUG_VOID_RETURN; } @@ -125,8 +125,10 @@ void Item_subselect::cleanup() } if (engine) engine->cleanup(); + depends_on.empty(); reset(); value_assigned= 0; + expr_cache= 0; DBUG_VOID_RETURN; } @@ -153,6 +155,22 @@ void Item_singlerow_subselect::cleanup() DBUG_VOID_RETURN; } + +void Item_in_subselect::cleanup() +{ + DBUG_ENTER("Item_in_subselect::cleanup"); + if (left_expr_cache) + { + left_expr_cache->delete_elements(); + delete left_expr_cache; + left_expr_cache= NULL; + } + first_execution= TRUE; + is_constant= FALSE; + Item_subselect::cleanup(); + DBUG_VOID_RETURN; +} + Item_subselect::~Item_subselect() { delete engine; @@ -174,19 +192,42 @@ bool Item_subselect::fix_fields(THD *thd_param, Item **ref) DBUG_ASSERT(fixed == 0); engine->set_thd((thd= thd_param)); + if (!done_first_fix_fields) + { + done_first_fix_fields= TRUE; + inside_first_fix_fields= TRUE; + upper_refs.empty(); + /* + psergey-todo: remove _first_fix_fields calls, we need changes on every + execution + */ + } + + eliminated= FALSE; + parent_select= thd_param->lex->current_select; if (check_stack_overrun(thd, STACK_MIN_SIZE, (uchar*)&res)) return TRUE; + if (!(res= engine->prepare())) { // all transformation is done (used by prepared statements) changed= 1; + inside_first_fix_fields= FALSE; + + // all transformation is done (used by prepared statements) + changed= 1; + + /* + Substitute the current item with an Item_in_optimizer that was + created by Item_in_subselect::select_in_like_transformer and + call fix_fields for the substituted item which in turn calls + engine->prepare for the subquery predicate. + */ if (substitution) { - int ret= 0; - // did we changed top item of WHERE condition if (unit->outer_select()->where == (*ref)) unit->outer_select()->where= substitution; // correct WHERE for PS @@ -200,20 +241,21 @@ bool Item_subselect::fix_fields(THD *thd_param, Item **ref) substitution= 0; thd->where= "checking transformed subquery"; if (!(*ref)->fixed) - ret= (*ref)->fix_fields(thd, ref); - thd->where= save_where; - return ret; + res= (*ref)->fix_fields(thd, ref); + goto end; +//psergey-merge: done_first_fix_fields= FALSE; } // Is it one field subselect? if (engine->cols() > max_columns) { my_error(ER_OPERAND_COLUMNS, MYF(0), 1); - return TRUE; +//psergey-merge: done_first_fix_fields= FALSE; + goto end; } fix_length_and_dec(); } else - goto err; + goto end; if ((uncacheable= engine->uncacheable())) { @@ -223,12 +265,170 @@ bool Item_subselect::fix_fields(THD *thd_param, Item **ref) } fixed= 1; -err: +end: + done_first_fix_fields= FALSE; thd->where= save_where; return res; } +bool Item_subselect::enumerate_field_refs_processor(uchar *arg) +{ + List_iterator<Ref_to_outside> it(upper_refs); + Ref_to_outside *upper; + + while ((upper= it++)) + { + if (upper->item->walk(&Item::enumerate_field_refs_processor, FALSE, arg)) + return TRUE; + } + return FALSE; +} + +bool Item_subselect::mark_as_eliminated_processor(uchar *arg) +{ + eliminated= TRUE; + return FALSE; +} + + +bool Item_subselect::mark_as_dependent(THD *thd, st_select_lex *select, + Item *item) +{ + if (inside_first_fix_fields) + { + is_correlated= TRUE; + Ref_to_outside *upper; + if (!(upper= new (thd->stmt_arena->mem_root) Ref_to_outside())) + return TRUE; + upper->select= select; + upper->item= item; + if (upper_refs.push_back(upper, thd->stmt_arena->mem_root)) + return TRUE; + } + return FALSE; +} + + +/* + Adjust attributes after our parent select has been merged into grandparent + + DESCRIPTION + Subquery is a composite object which may be correlated, that is, it may + have + 1. references to tables of the parent select (i.e. one that has the clause + with the subquery predicate) + 2. references to tables of the grandparent select + 3. references to tables of further ancestors. + + Before the pullout, this item indicates: + - #1 with table bits in used_tables() + - #2 and #3 with OUTER_REF_TABLE_BIT. + + After parent has been merged with grandparent: + - references to parent and grandparent tables should be indicated with + table bits. + - references to greatgrandparent and further ancestors - with + OUTER_REF_TABLE_BIT. +*/ + +void Item_subselect::fix_after_pullout(st_select_lex *new_parent, Item **ref) +{ + recalc_used_tables(new_parent, TRUE); + parent_select= new_parent; +} + + +class Field_fixer: public Field_enumerator +{ +public: + table_map used_tables; /* Collect used_tables here */ + st_select_lex *new_parent; /* Select we're in */ + virtual void visit_field(Item_field *item) + { + //for (TABLE_LIST *tbl= new_parent->leaf_tables; tbl; tbl= tbl->next_local) + //{ + // if (tbl->table == field->table) + // { + used_tables|= item->field->table->map; + // return; + // } + //} + //used_tables |= OUTER_REF_TABLE_BIT; + } +}; + + +/* + Recalculate used_tables_cache +*/ + +void Item_subselect::recalc_used_tables(st_select_lex *new_parent, + bool after_pullout) +{ + List_iterator<Ref_to_outside> it(upper_refs); + Ref_to_outside *upper; + + used_tables_cache= 0; + while ((upper= it++)) + { + bool found= FALSE; + /* + Check if + 1. the upper reference refers to the new immediate parent select, or + 2. one of the further ancestors. + + We rely on the fact that the tree of selects is modified by some kind of + 'flattening', i.e. a process where child selects are merged into their + parents. + The merged selects are removed from the select tree but keep pointers to + their parents. + */ + for (st_select_lex *sel= upper->select; sel; sel= sel->outer_select()) + { + /* + If we've reached the new parent select by walking upwards from + reference's original select, this means that the reference is now + referring to the direct parent: + */ + if (sel == new_parent) + { + found= TRUE; + /* + upper->item may be NULL when we've referred to a grouping function, + in which case we don't care about what it's table_map really is, + because item->with_sum_func==1 will ensure correct placement of the + item. + */ + if (upper->item) + { + // Now, iterate over fields and collect used_tables() attribute: + Field_fixer fixer; + fixer.used_tables= 0; + fixer.new_parent= new_parent; + upper->item->walk(&Item::enumerate_field_refs_processor, FALSE, + (uchar*)&fixer); + used_tables_cache |= fixer.used_tables; + /* + if (after_pullout) + upper->item->fix_after_pullout(new_parent, &(upper->item)); + upper->item->update_used_tables(); + used_tables_cache |= upper->item->used_tables(); + */ + } + } + } + if (!found) + used_tables_cache|= OUTER_REF_TABLE_BIT; + } + /* + Don't update const_tables_cache yet as we don't yet know which of the + parent's tables are constant. Parent will call update_used_tables() after + he has done const table detection, and that will be our chance to update + const_tables_cache. + */ +} + bool Item_subselect::walk(Item_processor processor, bool walk_subquery, uchar *argument) { @@ -246,6 +446,7 @@ bool Item_subselect::walk(Item_processor processor, bool walk_subquery, if (lex->having && (lex->having)->walk(processor, walk_subquery, argument)) return 1; + /* TODO: why does this walk WHERE/HAVING but not ON expressions of outer joins? */ while ((item=li++)) { @@ -295,6 +496,97 @@ bool Item_subselect::exec() return (res); } + +/** + Check if an expression cache is needed for this subquery + + @param thd Thread handle + + @details + The function checks whether a cache is needed for a subquery and whether + the result of the subquery can be put in cache. + + @retval TRUE cache is needed + @retval FALSE otherwise +*/ + +bool Item_subselect::expr_cache_is_needed(THD *thd) +{ + return (depends_on.elements && + engine->cols() == 1 && + optimizer_flag(thd, OPTIMIZER_SWITCH_SUBQUERY_CACHE) && + !(engine->uncacheable() & (UNCACHEABLE_RAND | + UNCACHEABLE_SIDEEFFECT))); +} + + +/** + Check if an expression cache is needed for this subquery + + @param thd Thread handle + + @details + The function checks whether a cache is needed for a subquery and whether + the result of the subquery can be put in cache. + + @note + This method allows many columns in the subquery because it is supported by + Item_in optimizer and result of the IN subquery will be scalar in this + case. + + @retval TRUE cache is needed + @retval FALSE otherwise +*/ + +bool Item_in_subselect::expr_cache_is_needed(THD *thd) +{ + return (depends_on.elements && + optimizer_flag(thd, OPTIMIZER_SWITCH_SUBQUERY_CACHE) && + !(engine->uncacheable() & (UNCACHEABLE_RAND | + UNCACHEABLE_SIDEEFFECT))); +} + + +/* + Compute the IN predicate if the left operand's cache changed. +*/ + +bool Item_in_subselect::exec() +{ + DBUG_ENTER("Item_in_subselect::exec"); + /* + Initialize the cache of the left predicate operand. This has to be done as + late as now, because Cached_item directly contains a resolved field (not + an item, and in some cases (when temp tables are created), these fields + end up pointing to the wrong field. One solution is to change Cached_item + to not resolve its field upon creation, but to resolve it dynamically + from a given Item_ref object. + TODO: the cache should be applied conditionally based on: + - rules - e.g. only if the left operand is known to be ordered, and/or + - on a cost-based basis, that takes into account the cost of a cache + lookup, the cache hit rate, and the savings per cache hit. + */ + if (!left_expr_cache && exec_method == MATERIALIZATION) + init_left_expr_cache(); + + /* + If the new left operand is already in the cache, reuse the old result. + Use the cached result only if this is not the first execution of IN + because the cache is not valid for the first execution. + */ + if (!first_execution && left_expr_cache && + test_if_item_cache_changed(*left_expr_cache) < 0) + DBUG_RETURN(FALSE); + + /* + The exec() method below updates item::value, and item::null_value, thus if + we don't call it, the next call to item::val_int() will return whatever + result was computed by its previous call. + */ + DBUG_RETURN(Item_subselect::exec()); +} + + Item::Type Item_subselect::type() const { return SUBSELECT_ITEM; @@ -327,6 +619,7 @@ Item *Item_subselect::get_tmp_table_item(THD *thd_arg) void Item_subselect::update_used_tables() { + recalc_used_tables(parent_select, FALSE); if (!engine->uncacheable()) { // did all used tables become static? @@ -435,6 +728,7 @@ void Item_maxmin_subselect::print(String *str, enum_query_type query_type) void Item_singlerow_subselect::reset() { + eliminated= FALSE; null_value= TRUE; if (value) value->null_value= TRUE; @@ -454,8 +748,9 @@ void Item_singlerow_subselect::reset() Item_subselect::trans_res Item_singlerow_subselect::select_transformer(JOIN *join) { + DBUG_ENTER("Item_singlerow_subselect::select_transformer"); if (changed) - return RES_OK; + DBUG_RETURN(RES_OK); SELECT_LEX *select_lex= join->select_lex; Query_arena *arena= thd->stmt_arena; @@ -498,15 +793,18 @@ Item_singlerow_subselect::select_transformer(JOIN *join) */ substitution->walk(&Item::remove_dependence_processor, 0, (uchar *) select_lex->outer_select()); - return RES_REDUCE; + DBUG_RETURN(RES_REDUCE); } - return RES_OK; + DBUG_RETURN(RES_OK); } void Item_singlerow_subselect::store(uint i, Item *item) { row[i]->store(item); + //psergey-merge: can do without that: row[i]->cache_value(); + //psergey-backport-timours: ^ really, without that ^ + //psergey-try-merge-again: row[i]->cache_value(); } @@ -547,6 +845,40 @@ void Item_singlerow_subselect::fix_length_and_dec() maybe_null= engine->may_be_null(); } + +/** + Add an expression cache for this subquery if it is needed + + @param thd_arg Thread handle + + @details + The function checks whether an expression cache is needed for this item + and if if so wraps the item into an item of the class + Item_exp_cache_wrapper with an appropriate expression cache set up there. + + @note + used from Item::transform() + + @return + new wrapper item if an expression cache is needed, + this item - otherwise +*/ + +Item* Item_singlerow_subselect::expr_cache_insert_transformer(uchar *thd_arg) +{ + THD *thd= (THD*) thd_arg; + DBUG_ENTER("Item_singlerow_subselect::expr_cache_insert_transformer"); + + if (expr_cache) + DBUG_RETURN(expr_cache); + + if (expr_cache_is_needed(thd) && + (expr_cache= set_expr_cache(thd, depends_on))) + DBUG_RETURN(expr_cache); + DBUG_RETURN(this); +} + + uint Item_singlerow_subselect::cols() { return engine->cols(); @@ -693,8 +1025,9 @@ bool Item_in_subselect::test_limit(st_select_lex_unit *unit_arg) Item_in_subselect::Item_in_subselect(Item * left_exp, st_select_lex *select_lex): - Item_exists_subselect(), optimizer(0), transformed(0), - pushed_cond_guards(NULL), upper_item(0) + Item_exists_subselect(), left_expr_cache(0), first_execution(TRUE), + is_constant(FALSE), optimizer(0), pushed_cond_guards(NULL), + exec_method(NOT_TRANSFORMED), upper_item(0) { DBUG_ENTER("Item_in_subselect::Item_in_subselect"); left_expr= left_exp; @@ -708,13 +1041,18 @@ Item_in_subselect::Item_in_subselect(Item * left_exp, DBUG_VOID_RETURN; } +int Item_in_subselect::get_identifier() +{ + return engine->get_identifier(); +} + Item_allany_subselect::Item_allany_subselect(Item * left_exp, chooser_compare_func_creator fc, st_select_lex *select_lex, bool all_arg) :Item_in_subselect(), func_creator(fc), all(all_arg) { - DBUG_ENTER("Item_in_subselect::Item_in_subselect"); + DBUG_ENTER("Item_allany_subselect::Item_allany_subselect"); left_expr= left_exp; func= func_creator(all_arg); init(select_lex, new select_exists_subselect(this)); @@ -736,6 +1074,40 @@ void Item_exists_subselect::fix_length_and_dec() unit->global_parameters->select_limit= new Item_int((int32) 1); } + +/** + Add an expression cache for this subquery if it is needed + + @param thd_arg Thread handle + + @details + The function checks whether an expression cache is needed for this item + and if if so wraps the item into an item of the class + Item_exp_cache_wrapper with an appropriate expression cache set up there. + + @note + used from Item::transform() + + @return + new wrapper item if an expression cache is needed, + this item - otherwise +*/ + +Item* Item_exists_subselect::expr_cache_insert_transformer(uchar *thd_arg) +{ + THD *thd= (THD*) thd_arg; + DBUG_ENTER("Item_exists_subselect::expr_cache_insert_transformer"); + + if (expr_cache) + DBUG_RETURN(expr_cache); + + if (substype() == EXISTS_SUBS && expr_cache_is_needed(thd) && + (expr_cache= set_expr_cache(thd, depends_on))) + DBUG_RETURN(expr_cache); + DBUG_RETURN(this); +} + + double Item_exists_subselect::val_real() { DBUG_ASSERT(fixed == 1); @@ -885,6 +1257,8 @@ bool Item_in_subselect::val_bool() { DBUG_ASSERT(fixed == 1); null_value= was_null= FALSE; + if (is_constant) + return value; if (exec()) { reset(); @@ -960,10 +1334,10 @@ my_decimal *Item_in_subselect::val_decimal(my_decimal *decimal_value) HAVING trigcond(<is_not_null_test>(ie)) RETURN - RES_OK - OK, either subquery was transformed, or appopriate - predicates where injected into it. - RES_REDUCE - The subquery was reduced to non-subquery - RES_ERROR - Error + RES_OK Either subquery was transformed, or appopriate + predicates where injected into it. + RES_REDUCE The subquery was reduced to non-subquery + RES_ERROR Error */ Item_subselect::trans_res @@ -977,6 +1351,7 @@ Item_in_subselect::single_value_transformer(JOIN *join, Check that the right part of the subselect contains no more than one column. E.g. in SELECT 1 IN (SELECT * ..) the right part is (SELECT * ...) */ + // psergey: duplicated_subselect_card_check if (select_lex->item_list.elements > 1) { my_error(ER_OPERAND_COLUMNS, MYF(0), 1); @@ -1071,9 +1446,9 @@ Item_in_subselect::single_value_transformer(JOIN *join, SELECT_LEX_UNIT *master_unit= select_lex->master_unit(); substitution= optimizer; - SELECT_LEX *current= thd->lex->current_select, *up; + SELECT_LEX *current= thd->lex->current_select; - thd->lex->current_select= up= current->return_after_parsing(); + thd->lex->current_select= current->return_after_parsing(); //optimizer never use Item **ref => we can pass 0 as parameter if (!optimizer || optimizer->fix_left(thd, 0)) { @@ -1082,6 +1457,9 @@ Item_in_subselect::single_value_transformer(JOIN *join, } thd->lex->current_select= current; + /* We will refer to upper level cache array => we have to save it for SP */ + optimizer->keep_top_level_cache(); + /* As far as Item_ref_in_optimizer do not substitute itself on fix_fields we can use same item for all selects. @@ -1092,7 +1470,9 @@ Item_in_subselect::single_value_transformer(JOIN *join, (char *)in_left_expr_name); master_unit->uncacheable|= UNCACHEABLE_DEPENDENT; + //psergey: placed then removed: select_lex->uncacheable|= UNCACHEABLE_DEPENDENT; } + if (!abort_on_null && left_expr->maybe_null && !pushed_cond_guards) { if (!(pushed_cond_guards= (bool*)join->thd->alloc(sizeof(bool)))) @@ -1100,6 +1480,60 @@ Item_in_subselect::single_value_transformer(JOIN *join, pushed_cond_guards[0]= TRUE; } + /* + If this IN predicate can be computed via materialization, do not + perform the IN -> EXISTS transformation. + */ + if (exec_method == MATERIALIZATION) + DBUG_RETURN(RES_OK); + + /* Perform the IN=>EXISTS transformation. */ + DBUG_RETURN(single_value_in_to_exists_transformer(join, func)); +} + + +/** + Transofrm an IN predicate into EXISTS via predicate injection. + + @details The transformation injects additional predicates into the subquery + (and makes the subquery correlated) as follows. + + - If the subquery has aggregates, GROUP BY, or HAVING, convert to + + SELECT ie FROM ... HAVING subq_having AND + trigcond(oe $cmp$ ref_or_null_helper<ie>) + + the addition is wrapped into trigger only when we want to distinguish + between NULL and FALSE results. + + - Otherwise (no aggregates/GROUP BY/HAVING) convert it to one of the + following: + + = If we don't need to distinguish between NULL and FALSE subquery: + + SELECT 1 FROM ... WHERE (oe $cmp$ ie) AND subq_where + + = If we need to distinguish between those: + + SELECT 1 FROM ... + WHERE subq_where AND trigcond((oe $cmp$ ie) OR (ie IS NULL)) + HAVING trigcond(<is_not_null_test>(ie)) + + @param join Join object of the subquery (i.e. 'child' join). + @param func Subquery comparison creator + + @retval RES_OK Either subquery was transformed, or appopriate + predicates where injected into it. + @retval RES_REDUCE The subquery was reduced to non-subquery + @retval RES_ERROR Error +*/ + +Item_subselect::trans_res +Item_in_subselect::single_value_in_to_exists_transformer(JOIN * join, Comp_creator *func) +{ + SELECT_LEX *select_lex= join->select_lex; + DBUG_ENTER("Item_in_subselect::single_value_in_to_exists_transformer"); + select_lex->uncacheable|= UNCACHEABLE_DEPENDENT; if (join->having || select_lex->with_sum_func || select_lex->group_list.elements) @@ -1279,27 +1713,29 @@ Item_subselect::trans_res Item_in_subselect::row_value_transformer(JOIN *join) { SELECT_LEX *select_lex= join->select_lex; - Item *having_item= 0; uint cols_num= left_expr->cols(); - bool is_having_used= (join->having || select_lex->with_sum_func || - select_lex->group_list.first || - !select_lex->table_list.elements); + DBUG_ENTER("Item_in_subselect::row_value_transformer"); - if (select_lex->item_list.elements != left_expr->cols()) + // psergey: duplicated_subselect_card_check + if (select_lex->item_list.elements != cols_num) { - my_error(ER_OPERAND_COLUMNS, MYF(0), left_expr->cols()); + my_error(ER_OPERAND_COLUMNS, MYF(0), cols_num); DBUG_RETURN(RES_ERROR); } + /* + Wrap the current IN predicate in an Item_in_optimizer. The actual + substitution in the Item tree takes place in Item_subselect::fix_fields. + */ if (!substitution) { //first call for this unit SELECT_LEX_UNIT *master_unit= select_lex->master_unit(); substitution= optimizer; - SELECT_LEX *current= thd->lex->current_select, *up; - thd->lex->current_select= up= current->return_after_parsing(); + SELECT_LEX *current= thd->lex->current_select; + thd->lex->current_select= current->return_after_parsing(); //optimizer never use Item **ref => we can pass 0 as parameter if (!optimizer || optimizer->fix_left(thd, 0)) { @@ -1323,6 +1759,48 @@ Item_in_subselect::row_value_transformer(JOIN *join) } } + /* + If this IN predicate can be computed via materialization, do not + perform the IN -> EXISTS transformation. + */ + if (exec_method == MATERIALIZATION) + DBUG_RETURN(RES_OK); + + /* Perform the IN=>EXISTS transformation. */ + DBUG_RETURN(row_value_in_to_exists_transformer(join)); +} + + +/** + Tranform a (possibly non-correlated) IN subquery into a correlated EXISTS. + + @todo + The IF-ELSE below can be refactored so that there is no duplication of the + statements that create the new conditions. For this we have to invert the IF + and the FOR statements as this: + for (each left operand) + create the equi-join condition + if (is_having_used || !abort_on_null) + create the "is null" and is_not_null_test items + if (is_having_used) + add the equi-join and the null tests to HAVING + else + add the equi-join and the "is null" to WHERE + add the is_not_null_test to HAVING +*/ + +Item_subselect::trans_res +Item_in_subselect::row_value_in_to_exists_transformer(JOIN * join) +{ + SELECT_LEX *select_lex= join->select_lex; + Item *having_item= 0; + uint cols_num= left_expr->cols(); + bool is_having_used= (join->having || select_lex->with_sum_func || + select_lex->group_list.first || + !select_lex->table_list.elements); + + DBUG_ENTER("Item_in_subselect::row_value_in_to_exists_transformer"); + select_lex->uncacheable|= UNCACHEABLE_DEPENDENT; if (is_having_used) { @@ -1545,7 +2023,7 @@ Item_subselect::trans_res Item_in_subselect::select_in_like_transformer(JOIN *join, Comp_creator *func) { Query_arena *arena, backup; - SELECT_LEX *current= thd->lex->current_select, *up; + SELECT_LEX *current= thd->lex->current_select; const char *save_where= thd->where; Item_subselect::trans_res res= RES_ERROR; bool result; @@ -1566,9 +2044,7 @@ Item_in_subselect::select_in_like_transformer(JOIN *join, Comp_creator *func) } if (changed) - { DBUG_RETURN(RES_OK); - } thd->where= "IN/ALL/ANY subquery"; @@ -1576,6 +2052,8 @@ Item_in_subselect::select_in_like_transformer(JOIN *join, Comp_creator *func) In some optimisation cases we will not need this Item_in_optimizer object, but we can't know it here, but here we need address correct reference on left expresion. + + //psergey: he means degenerate cases like "... IN (SELECT 1)" */ if (!optimizer) { @@ -1587,7 +2065,7 @@ Item_in_subselect::select_in_like_transformer(JOIN *join, Comp_creator *func) goto err; } - thd->lex->current_select= up= current->return_after_parsing(); + thd->lex->current_select= current->return_after_parsing(); result= (!left_expr->fixed && left_expr->fix_fields(thd, optimizer->arguments())); /* fix_fields can change reference to left_expr, we need reassign it */ @@ -1597,7 +2075,12 @@ Item_in_subselect::select_in_like_transformer(JOIN *join, Comp_creator *func) if (result) goto err; - transformed= 1; + /* + If we didn't choose an execution method up to this point, we choose + the IN=>EXISTS transformation. + */ + if (exec_method == NOT_TRANSFORMED) + exec_method= IN_TO_EXISTS; arena= thd->activate_stmt_arena_if_needed(&backup); /* @@ -1631,7 +2114,7 @@ err: void Item_in_subselect::print(String *str, enum_query_type query_type) { - if (transformed) + if (exec_method == IN_TO_EXISTS) str->append(STRING_WITH_LEN("<exists>")); else { @@ -1644,28 +2127,250 @@ void Item_in_subselect::print(String *str, enum_query_type query_type) bool Item_in_subselect::fix_fields(THD *thd_arg, Item **ref) { - bool result = 0; - - if (thd_arg->lex->view_prepare_mode && left_expr && !left_expr->fixed) - result = left_expr->fix_fields(thd_arg, &left_expr); + uint outer_cols_num; + List<Item> *inner_cols; + + if (exec_method == SEMI_JOIN) + return !( (*ref)= new Item_int(1)); + + /* + Check if the outer and inner IN operands match in those cases when we + will not perform IN=>EXISTS transformation. Currently this is when we + use subquery materialization. + + The condition below is true when this method was called recursively from + inside JOIN::prepare for the JOIN object created by the call chain + Item_subselect::fix_fields -> subselect_single_select_engine::prepare, + which creates a JOIN object for the subquery and calls JOIN::prepare for + the JOIN of the subquery. + Notice that in some cases, this doesn't happen, and the check_cols() + test for each Item happens later in + Item_in_subselect::row_value_in_to_exists_transformer. + The reason for this mess is that our JOIN::prepare phase works top-down + instead of bottom-up, so we first do name resoluton and semantic checks + for the outer selects, then for the inner. + */ + if (engine && + engine->engine_type() == subselect_engine::SINGLE_SELECT_ENGINE && + ((subselect_single_select_engine*)engine)->join) + { + outer_cols_num= left_expr->cols(); + + if (unit->is_union()) + inner_cols= &(unit->types); + else + inner_cols= &(unit->first_select()->item_list); + if (outer_cols_num != inner_cols->elements) + { + my_error(ER_OPERAND_COLUMNS, MYF(0), outer_cols_num); + return TRUE; + } + if (outer_cols_num > 1) + { + List_iterator<Item> inner_col_it(*inner_cols); + Item *inner_col; + for (uint i= 0; i < outer_cols_num; i++) + { + inner_col= inner_col_it++; + if (inner_col->check_cols(left_expr->element_index(i)->cols())) + return TRUE; + } + } + } + + if (thd_arg->lex->view_prepare_mode && left_expr && !left_expr->fixed && + left_expr->fix_fields(thd_arg, &left_expr)) + return TRUE; + if (Item_subselect::fix_fields(thd_arg, ref)) + return TRUE; + + fixed= TRUE; + + return FALSE; +} + + +void Item_in_subselect::fix_after_pullout(st_select_lex *new_parent, Item **ref) +{ + left_expr->fix_after_pullout(new_parent, &left_expr); + Item_subselect::fix_after_pullout(new_parent, ref); +} + +void Item_in_subselect::update_used_tables() +{ + Item_subselect::update_used_tables(); + left_expr->update_used_tables(); + used_tables_cache |= left_expr->used_tables(); +} + +/** + Try to create an engine to compute the subselect via materialization, + and if this fails, revert to execution via the IN=>EXISTS transformation. + + @details + The purpose of this method is to hide the implementation details + of this Item's execution. The method creates a new engine for + materialized execution, and initializes the engine. + + If this initialization fails + - either because it wasn't possible to create the needed temporary table + and its index, + - or because of a memory allocation error, + then we revert back to execution via the IN=>EXISTS tranformation. + + The initialization of the new engine is divided in two parts - a permanent + one that lives across prepared statements, and one that is repeated for each + execution. + + @returns + @retval TRUE memory allocation error occurred + @retval FALSE an execution method was chosen successfully +*/ + +bool Item_in_subselect::setup_engine() +{ + subselect_hash_sj_engine *new_engine= NULL; + bool res= FALSE; + + DBUG_ENTER("Item_in_subselect::setup_engine"); + + if (engine->engine_type() == subselect_engine::SINGLE_SELECT_ENGINE) + { + /* Create/initialize objects in permanent memory. */ + subselect_single_select_engine *old_engine; + Query_arena *arena= thd->stmt_arena, backup; + + old_engine= (subselect_single_select_engine*) engine; + + if (arena->is_conventional()) + arena= 0; + else + thd->set_n_backup_active_arena(arena, &backup); + + if (!(new_engine= new subselect_hash_sj_engine(thd, this, + old_engine)) || + new_engine->init_permanent(unit->get_unit_column_types())) + { + Item_subselect::trans_res trans_res; + /* + If for some reason we cannot use materialization for this IN predicate, + delete all materialization-related objects, and apply the IN=>EXISTS + transformation. + */ + delete new_engine; + new_engine= NULL; + exec_method= NOT_TRANSFORMED; + if (left_expr->cols() == 1) + trans_res= single_value_in_to_exists_transformer(old_engine->join, + &eq_creator); + else + trans_res= row_value_in_to_exists_transformer(old_engine->join); + res= (trans_res != Item_subselect::RES_OK); + } + if (new_engine) + engine= new_engine; + + if (arena) + thd->restore_active_arena(arena, &backup); + } + else + { + DBUG_ASSERT(engine->engine_type() == subselect_engine::HASH_SJ_ENGINE); + new_engine= (subselect_hash_sj_engine*) engine; + } + + /* Initilizations done in runtime memory, repeated for each execution. */ + if (new_engine) + { + /* + Reset the LIMIT 1 set in Item_exists_subselect::fix_length_and_dec. + TODO: + Currently we set the subquery LIMIT to infinity, and this is correct + because we forbid at parse time LIMIT inside IN subqueries (see + Item_in_subselect::test_limit). However, once we allow this, here + we should set the correct limit if given in the query. + */ + unit->global_parameters->select_limit= NULL; + if ((res= new_engine->init_runtime())) + DBUG_RETURN(res); + } - return result || Item_subselect::fix_fields(thd_arg, ref); + DBUG_RETURN(res); +} + + +/** + Initialize the cache of the left operand of the IN predicate. + + @note This method has the same purpose as alloc_group_fields(), + but it takes a different kind of collection of items, and the + list we push to is dynamically allocated. + + @retval TRUE if a memory allocation error occurred or the cache is + not applicable to the current query + @retval FALSE if success +*/ + +bool Item_in_subselect::init_left_expr_cache() +{ + JOIN *outer_join; + + outer_join= unit->outer_select()->join; + /* + An IN predicate might be evaluated in a query for which all tables have + been optimzied away. + */ + if (!outer_join || !outer_join->tables || !outer_join->tables_list) + return TRUE; + + if (!(left_expr_cache= new List<Cached_item>)) + return TRUE; + + for (uint i= 0; i < left_expr->cols(); i++) + { + Cached_item *cur_item_cache= new_Cached_item(thd, + left_expr->element_index(i), + FALSE); + if (!cur_item_cache || left_expr_cache->push_front(cur_item_cache)) + return TRUE; + } + return FALSE; +} + + +/* + Callback to test if an IN predicate is expensive. + + @details + IN predicates are considered expensive only if they will be executed via + materialization. The return value affects the behavior of + make_cond_for_table() in such a way that it is unchanged when we use + the IN=>EXISTS transformation to compute IN. + + @retval TRUE if the predicate is expensive + @retval FALSE otherwise +*/ + +bool Item_in_subselect::is_expensive_processor(uchar *arg) +{ + return exec_method == MATERIALIZATION; } Item_subselect::trans_res Item_allany_subselect::select_transformer(JOIN *join) { - transformed= 1; + DBUG_ENTER("Item_allany_subselect::select_transformer"); + exec_method= IN_TO_EXISTS; if (upper_item) upper_item->show= 1; - return select_in_like_transformer(join, func); + DBUG_RETURN(select_in_like_transformer(join, func)); } void Item_allany_subselect::print(String *str, enum_query_type query_type) { - if (transformed) + if (exec_method == IN_TO_EXISTS) str->append(STRING_WITH_LEN("<exists>")); else { @@ -1688,20 +2393,23 @@ void subselect_engine::set_thd(THD *thd_arg) subselect_single_select_engine:: subselect_single_select_engine(st_select_lex *select, - select_subselect *result_arg, + select_result_interceptor *result_arg, Item_subselect *item_arg) :subselect_engine(item_arg, result_arg), - prepared(0), optimized(0), executed(0), - select_lex(select), join(0) + prepared(0), executed(0), select_lex(select), join(0) { select_lex->master_unit()->item= item_arg; } +int subselect_single_select_engine::get_identifier() +{ + return select_lex->select_number; +} void subselect_single_select_engine::cleanup() { DBUG_ENTER("subselect_single_select_engine::cleanup"); - prepared= optimized= executed= 0; + prepared= executed= 0; join= 0; result->cleanup(); DBUG_VOID_RETURN; @@ -1744,19 +2452,19 @@ bool subselect_union_engine::no_rows() return test(!unit->fake_select_lex->join->send_records); } + void subselect_uniquesubquery_engine::cleanup() { DBUG_ENTER("subselect_uniquesubquery_engine::cleanup"); - /* - subselect_uniquesubquery_engine have not 'result' assigbed, so we do not - cleanup() it - */ + /* Tell handler we don't need the index anymore */ + if (tab->table->file->inited) + tab->table->file->ha_index_end(); DBUG_VOID_RETURN; } subselect_union_engine::subselect_union_engine(st_select_lex_unit *u, - select_subselect *result_arg, + select_result_interceptor *result_arg, Item_subselect *item_arg) :subselect_engine(item_arg, result_arg) { @@ -1765,10 +2473,40 @@ subselect_union_engine::subselect_union_engine(st_select_lex_unit *u, } +/** + Create and prepare the JOIN object that represents the query execution + plan for the subquery. + + @details + This method is called from Item_subselect::fix_fields. For prepared + statements it is called both during the PREPARE and EXECUTE phases in the + following ways: + - During PREPARE the optimizer needs some properties + (join->fields_list.elements) of the JOIN to proceed with preparation of + the remaining query (namely to complete ::fix_fields for the subselect + related classes. In the end of PREPARE the JOIN is deleted. + - When we EXECUTE the query, Item_subselect::fix_fields is called again, and + the JOIN object is re-created again, prepared and executed. In the end of + execution it is deleted. + In all cases the JOIN is created in runtime memory (not in the permanent + memory root). + + @todo + Re-check what properties of 'join' are needed during prepare, and see if + we can avoid creating a JOIN during JOIN::prepare of the outer join. + + @retval 0 if success + @retval 1 if error +*/ + int subselect_single_select_engine::prepare() { if (prepared) return 0; + if (select_lex->join) + { + select_lex->cleanup(); + } join= new JOIN(thd, select_lex->item_list, select_lex->options | SELECT_NO_UNLOCK, result); if (!join || !result) @@ -1799,8 +2537,8 @@ int subselect_union_engine::prepare() int subselect_uniquesubquery_engine::prepare() { - //this never should be called - DBUG_ASSERT(0); + /* Should never be called. */ + DBUG_ASSERT(FALSE); return 1; } @@ -1847,7 +2585,7 @@ void subselect_engine::set_row(List<Item> &item_list, Item_cache **row) if (!(row[i]= Item_cache::get_cache(sel_item))) return; row[i]->setup(sel_item); - row[i]->store(sel_item); + //psergey-backport-timours: row[i]->store(sel_item); } if (item_list.elements > 1) res_type= ROW_RESULT; @@ -1896,11 +2634,10 @@ int subselect_single_select_engine::exec() char const *save_where= thd->where; SELECT_LEX *save_select= thd->lex->current_select; thd->lex->current_select= select_lex; - if (!optimized) + if (!join->optimized) { SELECT_LEX_UNIT *unit= select_lex->master_unit(); - optimized= 1; unit->set_limit(unit->global_parameters); if (join->optimize()) { @@ -2043,21 +2780,31 @@ int subselect_uniquesubquery_engine::scan_table() if (table->file->inited) table->file->ha_index_end(); - table->file->ha_rnd_init(1); + if (table->file->ha_rnd_init_with_error(1)) + DBUG_RETURN(1); table->file->extra_opt(HA_EXTRA_CACHE, current_thd->variables.read_buff_size); table->null_row= 0; for (;;) { - error=table->file->rnd_next(table->record[0]); - if (error && error != HA_ERR_END_OF_FILE) - { - error= report_error(table, error); - break; + error=table->file->ha_rnd_next(table->record[0]); + if (error) { + if (error == HA_ERR_RECORD_DELETED) + { + error= 0; + continue; + } + if (error == HA_ERR_END_OF_FILE) + { + error= 0; + break; + } + else + { + error= report_error(table, error); + break; + } } - /* No more rows */ - if (table->status) - break; if (!cond || cond->val_int()) { @@ -2168,6 +2915,56 @@ bool subselect_uniquesubquery_engine::copy_ref_key() /* + @retval 1 A NULL was found in the outer reference, index lookup is + not applicable, the outer ref is unsusable as a lookup key, + use some other method to find a match. + @retval 0 The outer ref was copied into an index lookup key. + @retval -1 The outer ref cannot possibly match any row, IN is FALSE. +*/ +/* TIMOUR: this method is a variant of copy_ref_key(), needs refactoring. */ + +int subselect_uniquesubquery_engine::copy_ref_key_simple() +{ + for (store_key **copy= tab->ref.key_copy ; *copy ; copy++) + { + enum store_key::store_key_result store_res; + store_res= (*copy)->copy(); + tab->ref.key_err= store_res; + + /* + When there is a NULL part in the key we don't need to make index + lookup for such key thus we don't need to copy whole key. + If we later should do a sequential scan return OK. Fail otherwise. + + See also the comment for the subselect_uniquesubquery_engine::exec() + function. + */ + null_keypart= (*copy)->null_key; + if (null_keypart) + return 1; + + /* + Check if the error is equal to STORE_KEY_FATAL. This is not expressed + using the store_key::store_key_result enum because ref.key_err is a + boolean and we want to detect both TRUE and STORE_KEY_FATAL from the + space of the union of the values of [TRUE, FALSE] and + store_key::store_key_result. + TODO: fix the variable an return types. + */ + if (store_res == store_key::STORE_KEY_FATAL) + { + /* + Error converting the left IN operand to the column type of the right + IN operand. + */ + return -1; + } + } + return 0; +} + + +/* Execute subselect SYNOPSIS @@ -2207,7 +3004,13 @@ int subselect_uniquesubquery_engine::exec() /* TODO: change to use of 'full_scan' here? */ if (copy_ref_key()) + { + /* + TIMOUR: copy_ref_key() == 1 means NULL result, not error, why return 1? + Check who reiles on this result. + */ DBUG_RETURN(1); + } if (table->status) { /* @@ -2223,10 +3026,11 @@ int subselect_uniquesubquery_engine::exec() if (!table->file->inited) table->file->ha_index_init(tab->ref.key, 0); - error= table->file->index_read_map(table->record[0], - tab->ref.key_buff, - make_prev_keypart_map(tab->ref.key_parts), - HA_READ_KEY_EXACT); + error= table->file->ha_index_read_map(table->record[0], + tab->ref.key_buff, + make_prev_keypart_map(tab-> + ref.key_parts), + HA_READ_KEY_EXACT); if (error && error != HA_ERR_KEY_NOT_FOUND && error != HA_ERR_END_OF_FILE) error= report_error(table, error); @@ -2247,10 +3051,51 @@ int subselect_uniquesubquery_engine::exec() } +/* + TIMOUR: write comment +*/ + +int subselect_uniquesubquery_engine::index_lookup() +{ + DBUG_ENTER("subselect_uniquesubquery_engine::index_lookup"); + int error; + TABLE *table= tab->table; + + if (!table->file->inited) + table->file->ha_index_init(tab->ref.key, 0); + error= table->file->ha_index_read_map(table->record[0], + tab->ref.key_buff, + make_prev_keypart_map(tab-> + ref.key_parts), + HA_READ_KEY_EXACT); + DBUG_PRINT("info", ("lookup result: %i", error)); + + if (error && error != HA_ERR_KEY_NOT_FOUND && error != HA_ERR_END_OF_FILE) + { + /* + TIMOUR: I don't understand at all when do we need to call report_error. + In most places where we access an index, we don't do this. Why here? + */ + error= report_error(table, error); + DBUG_RETURN(error); + } + + table->null_row= 0; + if (!error && (!cond || cond->val_int())) + ((Item_in_subselect *) item)->value= 1; + else + ((Item_in_subselect *) item)->value= 0; + + DBUG_RETURN(0); +} + + + subselect_uniquesubquery_engine::~subselect_uniquesubquery_engine() { /* Tell handler we don't need the index anymore */ - tab->table->file->ha_index_end(); + //psergey-merge-todo: the following was gone in 6.0: + //psergey-merge: don't need this after all: tab->table->file->ha_index_end(); } @@ -2258,7 +3103,7 @@ subselect_uniquesubquery_engine::~subselect_uniquesubquery_engine() Index-lookup subselect 'engine' - run the subquery SYNOPSIS - subselect_uniquesubquery_engine:exec() + subselect_indexsubquery_engine:exec() full_scan DESCRIPTION @@ -2344,10 +3189,11 @@ int subselect_indexsubquery_engine::exec() if (!table->file->inited) table->file->ha_index_init(tab->ref.key, 1); - error= table->file->index_read_map(table->record[0], - tab->ref.key_buff, - make_prev_keypart_map(tab->ref.key_parts), - HA_READ_KEY_EXACT); + error= table->file->ha_index_read_map(table->record[0], + tab->ref.key_buff, + make_prev_keypart_map(tab-> + ref.key_parts), + HA_READ_KEY_EXACT); if (error && error != HA_ERR_KEY_NOT_FOUND && error != HA_ERR_END_OF_FILE) error= report_error(table, error); @@ -2368,9 +3214,9 @@ int subselect_indexsubquery_engine::exec() ((Item_in_subselect *) item)->value= 1; break; } - error= table->file->index_next_same(table->record[0], - tab->ref.key_buff, - tab->ref.key_length); + error= table->file->ha_index_next_same(table->record[0], + tab->ref.key_buff, + tab->ref.key_length); if (error && error != HA_ERR_END_OF_FILE) { error= report_error(table, error); @@ -2395,8 +3241,10 @@ int subselect_indexsubquery_engine::exec() uint subselect_single_select_engine::cols() { - DBUG_ASSERT(select_lex->join != 0); // should be called after fix_fields() - return select_lex->join->fields_list.elements; + //psergey-sj-backport: the following assert was gone in 6.0: + //DBUG_ASSERT(select_lex->join != 0); // should be called after fix_fields() + //return select_lex->join->fields_list.elements; + return select_lex->item_list.elements; } @@ -2478,10 +3326,20 @@ void subselect_union_engine::print(String *str, enum_query_type query_type) void subselect_uniquesubquery_engine::print(String *str, enum_query_type query_type) { + char *table_name= tab->table->s->table_name.str; str->append(STRING_WITH_LEN("<primary_index_lookup>(")); tab->ref.items[0]->print(str, query_type); str->append(STRING_WITH_LEN(" in ")); - str->append(tab->table->s->table_name.str, tab->table->s->table_name.length); + if (tab->table->s->table_category == TABLE_CATEGORY_TEMPORARY) + { + /* + Temporary tables' names change across runs, so they can't be used for + EXPLAIN EXTENDED. + */ + str->append(STRING_WITH_LEN("<temporary table>")); + } + else + str->append(table_name, tab->table->s->table_name.length); KEY *key_info= tab->table->key_info+ tab->ref.key; str->append(STRING_WITH_LEN(" on ")); str->append(key_info->name); @@ -2493,6 +3351,29 @@ void subselect_uniquesubquery_engine::print(String *str, str->append(')'); } +/* +TODO: +The above ::print method should be changed as below. Do it after +all other tests pass. + +void subselect_uniquesubquery_engine::print(String *str) +{ + KEY *key_info= tab->table->key_info + tab->ref.key; + str->append(STRING_WITH_LEN("<primary_index_lookup>(")); + for (uint i= 0; i < key_info->key_parts; i++) + tab->ref.items[i]->print(str); + str->append(STRING_WITH_LEN(" in ")); + str->append(tab->table->s->table_name.str, tab->table->s->table_name.length); + str->append(STRING_WITH_LEN(" on ")); + str->append(key_info->name); + if (cond) + { + str->append(STRING_WITH_LEN(" where ")); + cond->print(str); + } + str->append(')'); +} +*/ void subselect_indexsubquery_engine::print(String *str, enum_query_type query_type) @@ -2532,7 +3413,7 @@ void subselect_indexsubquery_engine::print(String *str, */ bool subselect_single_select_engine::change_result(Item_subselect *si, - select_subselect *res) + select_result_interceptor *res) { item= si; result= res; @@ -2553,7 +3434,7 @@ bool subselect_single_select_engine::change_result(Item_subselect *si, */ bool subselect_union_engine::change_result(Item_subselect *si, - select_subselect *res) + select_result_interceptor *res) { item= si; int rc= unit->change_result(res, result); @@ -2575,7 +3456,7 @@ bool subselect_union_engine::change_result(Item_subselect *si, */ bool subselect_uniquesubquery_engine::change_result(Item_subselect *si, - select_subselect *res) + select_result_interceptor *res) { DBUG_ASSERT(0); return TRUE; @@ -2643,5 +3524,1721 @@ bool subselect_union_engine::no_tables() bool subselect_uniquesubquery_engine::no_tables() { /* returning value is correct, but this method should never be called */ + DBUG_ASSERT(FALSE); + return 0; +} + + +/****************************************************************************** + WL#1110 - Implementation of class subselect_hash_sj_engine +******************************************************************************/ + + +/** + Check if an IN predicate should be executed via partial matching using + only schema information. + + @details + This test essentially has three results: + - partial matching is applicable, but cannot be executed due to a + limitation in the total number of indexes, as a result we can't + use subquery materialization at all. + - partial matching is either applicable or not, and this can be + determined by looking at 'this->max_keys'. + If max_keys > 1, then we need partial matching because there are + more indexes than just the one we use during materialization to + remove duplicates. + + @note + TIMOUR: The schema-based analysis for partial matching can be done once for + prepared statement and remembered. It is done here to remove the need to + save/restore all related variables between each re-execution, thus making + the code simpler. + + @retval PARTIAL_MATCH if a partial match should be used + @retval COMPLETE_MATCH if a complete match (index lookup) should be used +*/ + +subselect_hash_sj_engine::exec_strategy +subselect_hash_sj_engine::get_strategy_using_schema() +{ + Item_in_subselect *item_in= (Item_in_subselect *) item; + + if (item_in->is_top_level_item()) + return COMPLETE_MATCH; + else + { + List_iterator<Item> inner_col_it(*item_in->unit->get_unit_column_types()); + Item *outer_col, *inner_col; + + for (uint i= 0; i < item_in->left_expr->cols(); i++) + { + outer_col= item_in->left_expr->element_index(i); + inner_col= inner_col_it++; + + if (!inner_col->maybe_null && !outer_col->maybe_null) + bitmap_set_bit(&non_null_key_parts, i); + else + { + bitmap_set_bit(&partial_match_key_parts, i); + ++count_partial_match_columns; + } + } + } + + /* If no column contains NULLs use regular hash index lookups. */ + if (count_partial_match_columns) + return PARTIAL_MATCH; + return COMPLETE_MATCH; +} + + +/** + Test whether an IN predicate must be computed via partial matching + based on the NULL statistics for each column of a materialized subquery. + + @details The procedure analyzes column NULL statistics, updates the + matching type of columns that cannot be NULL or that contain only NULLs. + Based on this, the procedure determines the final execution strategy for + the [NOT] IN predicate. + + @retval PARTIAL_MATCH if a partial match should be used + @retval COMPLETE_MATCH if a complete match (index lookup) should be used +*/ + +subselect_hash_sj_engine::exec_strategy +subselect_hash_sj_engine::get_strategy_using_data() +{ + Item_in_subselect *item_in= (Item_in_subselect *) item; + select_materialize_with_stats *result_sink= + (select_materialize_with_stats *) result; + Item *outer_col; + + /* + If we already determined that a complete match is enough based on schema + information, nothing can be better. + */ + if (strategy == COMPLETE_MATCH) + return COMPLETE_MATCH; + + for (uint i= 0; i < item_in->left_expr->cols(); i++) + { + if (!bitmap_is_set(&partial_match_key_parts, i)) + continue; + outer_col= item_in->left_expr->element_index(i); + /* + If column 'i' doesn't contain NULLs, and the corresponding outer reference + cannot have a NULL value, then 'i' is a non-nullable column. + */ + if (result_sink->get_null_count_of_col(i) == 0 && !outer_col->maybe_null) + { + bitmap_clear_bit(&partial_match_key_parts, i); + bitmap_set_bit(&non_null_key_parts, i); + --count_partial_match_columns; + } + if (result_sink->get_null_count_of_col(i) == + tmp_table->file->stats.records) + ++count_null_only_columns; + } + + /* If no column contains NULLs use regular hash index lookups. */ + if (!count_partial_match_columns) + return COMPLETE_MATCH; + return PARTIAL_MATCH; +} + + +void +subselect_hash_sj_engine::choose_partial_match_strategy( + bool has_non_null_key, bool has_covering_null_row, + MY_BITMAP *partial_match_key_parts) +{ + size_t pm_buff_size; + + DBUG_ASSERT(strategy == PARTIAL_MATCH); + /* + Choose according to global optimizer switch. If only one of the switches is + 'ON', then the remaining strategy is the only possible one. The only cases + when this will be overriden is when the total size of all buffers for the + merge strategy is bigger than the 'rowid_merge_buff_size' system variable, + or if there isn't enough physical memory to allocate the buffers. + */ + if (!optimizer_flag(thd, OPTIMIZER_SWITCH_PARTIAL_MATCH_ROWID_MERGE) && + optimizer_flag(thd, OPTIMIZER_SWITCH_PARTIAL_MATCH_TABLE_SCAN)) + strategy= PARTIAL_MATCH_SCAN; + else if + ( optimizer_flag(thd, OPTIMIZER_SWITCH_PARTIAL_MATCH_ROWID_MERGE) && + !optimizer_flag(thd, OPTIMIZER_SWITCH_PARTIAL_MATCH_TABLE_SCAN)) + strategy= PARTIAL_MATCH_MERGE; + + /* + If both switches are ON, or both are OFF, we interpret that as "let the + optimizer decide". Perform a cost based choice between the two partial + matching strategies. + */ + /* + TIMOUR: the above interpretation of the switch values could be changed to: + - if both are ON - let the optimizer decide, + - if both are OFF - do not use partial matching, therefore do not use + materialization in non-top-level predicates. + The problem with this is that we know for sure if we need partial matching + only after the subquery is materialized, and this is too late to revert to + the IN=>EXISTS strategy. + */ + if (strategy == PARTIAL_MATCH) + { + /* + TIMOUR: Currently we use a super simplistic measure. This will be + addressed in a separate task. + */ + if (tmp_table->file->stats.records < 100) + strategy= PARTIAL_MATCH_SCAN; + else + strategy= PARTIAL_MATCH_MERGE; + } + + /* Check if there is enough memory for the rowid merge strategy. */ + if (strategy == PARTIAL_MATCH_MERGE) + { + pm_buff_size= rowid_merge_buff_size(has_non_null_key, + has_covering_null_row, + partial_match_key_parts); + if (pm_buff_size > thd->variables.rowid_merge_buff_size) + strategy= PARTIAL_MATCH_SCAN; + } +} + + +/* + Compute the memory size of all buffers proportional to the number of rows + in tmp_table. + + @details + If the result is bigger than thd->variables.rowid_merge_buff_size, partial + matching via merging is not applicable. +*/ + +size_t subselect_hash_sj_engine::rowid_merge_buff_size( + bool has_non_null_key, bool has_covering_null_row, + MY_BITMAP *partial_match_key_parts) +{ + size_t buff_size; /* Total size of all buffers used by partial matching. */ + ha_rows row_count= tmp_table->file->stats.records; + uint rowid_length= tmp_table->file->ref_length; + select_materialize_with_stats *result_sink= + (select_materialize_with_stats *) result; + + /* Size of the subselect_rowid_merge_engine::row_num_to_rowid buffer. */ + buff_size= row_count * rowid_length * sizeof(uchar); + + if (has_non_null_key) + { + /* Add the size of Ordered_key::key_buff of the only non-NULL key. */ + buff_size+= row_count * sizeof(rownum_t); + } + + if (!has_covering_null_row) + { + for (uint i= 0; i < partial_match_key_parts->n_bits; i++) + { + if (!bitmap_is_set(partial_match_key_parts, i) || + result_sink->get_null_count_of_col(i) == row_count) + continue; /* In these cases we wouldn't construct Ordered keys. */ + + /* Add the size of Ordered_key::key_buff */ + buff_size+= (row_count - result_sink->get_null_count_of_col(i)) * + sizeof(rownum_t); + /* Add the size of Ordered_key::null_key */ + buff_size+= bitmap_buffer_size(result_sink->get_max_null_of_col(i)); + } + } + + return buff_size; +} + + +/* + Initialize a MY_BITMAP with a buffer allocated on the current + memory root. + TIMOUR: move to bitmap C file? +*/ + +static my_bool +bitmap_init_memroot(MY_BITMAP *map, uint n_bits, MEM_ROOT *mem_root) +{ + my_bitmap_map *bitmap_buf; + + if (!(bitmap_buf= (my_bitmap_map*) alloc_root(mem_root, + bitmap_buffer_size(n_bits))) || + bitmap_init(map, bitmap_buf, n_bits, FALSE)) + return TRUE; + bitmap_clear_all(map); + return FALSE; +} + + +/** + Create all structures needed for IN execution that can live between PS + reexecution. + + @param tmp_columns the items that produce the data for the temp table + + @details + - Create a temporary table to store the result of the IN subquery. The + temporary table has one hash index on all its columns. + - Create a new result sink that sends the result stream of the subquery to + the temporary table, + + @notice: + Currently Item_subselect::init() already chooses and creates at parse + time an engine with a corresponding JOIN to execute the subquery. + + @retval TRUE if error + @retval FALSE otherwise +*/ + +bool subselect_hash_sj_engine::init_permanent(List<Item> *tmp_columns) +{ + /* Options to create_tmp_table. */ + ulonglong tmp_create_options= thd->options | TMP_TABLE_ALL_COLUMNS; + /* | TMP_TABLE_FORCE_MYISAM; TIMOUR: force MYISAM */ + + DBUG_ENTER("subselect_hash_sj_engine::init_permanent"); + + if (bitmap_init_memroot(&non_null_key_parts, tmp_columns->elements, + thd->mem_root) || + bitmap_init_memroot(&partial_match_key_parts, tmp_columns->elements, + thd->mem_root)) + DBUG_RETURN(TRUE); + + /* + Create and initialize a select result interceptor that stores the + result stream in a temporary table. The temporary table itself is + managed (created/filled/etc) internally by the interceptor. + */ +/* + TIMOUR: + Select a more efficient result sink when we know there is no need to collect + data statistics. + + if (strategy == COMPLETE_MATCH) + { + if (!(result= new select_union)) + DBUG_RETURN(TRUE); + } + else if (strategy == PARTIAL_MATCH) + { + if (!(result= new select_materialize_with_stats)) + DBUG_RETURN(TRUE); + } +*/ + if (!(result= new select_materialize_with_stats)) + DBUG_RETURN(TRUE); + + if (((select_union*) result)->create_result_table( + thd, tmp_columns, TRUE, tmp_create_options, + "materialized subselect", TRUE)) + DBUG_RETURN(TRUE); + + tmp_table= ((select_union*) result)->table; + + /* + If the subquery has blobs, or the total key lenght is bigger than + some length, or the total number of key parts is more than the + allowed maximum (currently MAX_REF_PARTS == 16), then the created + index cannot be used for lookups and we can't use hash semi + join. If this is the case, delete the temporary table since it + will not be used, and tell the caller we failed to initialize the + engine. + */ + if (tmp_table->s->keys == 0) + { + DBUG_ASSERT( + tmp_table->s->uniques || + tmp_table->key_info->key_length >= tmp_table->file->max_key_length() || + tmp_table->key_info->key_parts > tmp_table->file->max_key_parts()); + free_tmp_table(thd, tmp_table); + tmp_table= NULL; + delete result; + result= NULL; + DBUG_RETURN(TRUE); + } + + /* + Make sure there is only one index on the temp table, and it doesn't have + the extra key part created when s->uniques > 0. + */ + DBUG_ASSERT(tmp_table->s->keys == 1 && + ((Item_in_subselect *) item)->left_expr->cols() == + tmp_table->key_info->key_parts); + + if (make_semi_join_conds() || + /* A unique_engine is used both for complete and partial matching. */ + !(lookup_engine= make_unique_engine())) + DBUG_RETURN(TRUE); + + DBUG_RETURN(FALSE); +} + + +/* + Create an artificial condition to post-filter those rows matched by index + lookups that cannot be distinguished by the index lookup procedure. + + @notes + The need for post-filtering may occur e.g. because of + truncation. Prepared statements execution requires that fix_fields is + called for every execution. In order to call fix_fields we need to + create a Name_resolution_context and a corresponding TABLE_LIST for + the temporary table for the subquery, so that all column references + to the materialized subquery table can be resolved correctly. + + @returns + @retval TRUE memory allocation error occurred + @retval FALSE the conditions were created and resolved (fixed) +*/ + +bool subselect_hash_sj_engine::make_semi_join_conds() +{ + /* + Table reference for tmp_table that is used to resolve column references + (Item_fields) to columns in tmp_table. + */ + TABLE_LIST *tmp_table_ref; + /* Name resolution context for all tmp_table columns created below. */ + Name_resolution_context *context; + Item_in_subselect *item_in= (Item_in_subselect *) item; + + DBUG_ENTER("subselect_hash_sj_engine::make_semi_join_conds"); + DBUG_ASSERT(semi_join_conds == NULL); + + if (!(semi_join_conds= new Item_cond_and)) + DBUG_RETURN(TRUE); + + if (!(tmp_table_ref= (TABLE_LIST*) thd->alloc(sizeof(TABLE_LIST)))) + DBUG_RETURN(TRUE); + + tmp_table_ref->init_one_table("", "materialized subselect", TL_READ); + tmp_table_ref->table= tmp_table; + + context= new Name_resolution_context; + context->init(); + context->first_name_resolution_table= + context->last_name_resolution_table= tmp_table_ref; + + for (uint i= 0; i < item_in->left_expr->cols(); i++) + { + Item_func_eq *eq_cond; /* New equi-join condition for the current column. */ + /* Item for the corresponding field from the materialized temp table. */ + Item_field *right_col_item; + + if (!(right_col_item= new Item_field(thd, context, tmp_table->field[i])) || + !(eq_cond= new Item_func_eq(item_in->left_expr->element_index(i), + right_col_item)) || + (((Item_cond_and*)semi_join_conds)->add(eq_cond))) + { + delete semi_join_conds; + semi_join_conds= NULL; + DBUG_RETURN(TRUE); + } + } + if (semi_join_conds->fix_fields(thd, (Item**)&semi_join_conds)) + DBUG_RETURN(TRUE); + + DBUG_RETURN(FALSE); +} + + +/** + Create a new uniquesubquery engine for the execution of an IN predicate. + + @details + Create and initialize a new JOIN_TAB, and Table_ref objects to perform + lookups into the indexed temporary table. + + @retval A new subselect_hash_sj_engine object + @retval NULL if a memory allocation error occurs +*/ + +subselect_uniquesubquery_engine* +subselect_hash_sj_engine::make_unique_engine() +{ + Item_in_subselect *item_in= (Item_in_subselect *) item; + Item_iterator_row it(item_in->left_expr); + /* The only index on the temporary table. */ + KEY *tmp_key= tmp_table->key_info; + JOIN_TAB *tab; + + DBUG_ENTER("subselect_hash_sj_engine::make_unique_engine"); + + /* + Create and initialize the JOIN_TAB that represents an index lookup + plan operator into the materialized subquery result. Notice that: + - this JOIN_TAB has no corresponding JOIN (and doesn't need one), and + - here we initialize only those members that are used by + subselect_uniquesubquery_engine, so these objects are incomplete. + */ + if (!(tab= (JOIN_TAB*) thd->alloc(sizeof(JOIN_TAB)))) + DBUG_RETURN(NULL); + + tab->table= tmp_table; + tab->ref.tmp_table_index_lookup_init(thd, tmp_key, it, FALSE); + + DBUG_RETURN(new subselect_uniquesubquery_engine(thd, tab, item, + semi_join_conds)); +} + + +/** + Initialize members of the engine that need to be re-initilized at each + execution. + + @retval TRUE if a memory allocation error occurred + @retval FALSE if success +*/ + +bool subselect_hash_sj_engine::init_runtime() +{ + /* + Create and optimize the JOIN that will be used to materialize + the subquery if not yet created. + */ + materialize_engine->prepare(); + /* + Repeat name resolution for 'cond' since cond is not part of any + clause of the query, and it is not 'fixed' during JOIN::prepare. + */ + if (semi_join_conds && !semi_join_conds->fixed && + semi_join_conds->fix_fields(thd, (Item**)&semi_join_conds)) + return TRUE; + /* Let our engine reuse this query plan for materialization. */ + materialize_join= materialize_engine->join; + materialize_join->change_result(result); + return FALSE; +} + + +subselect_hash_sj_engine::~subselect_hash_sj_engine() +{ + delete lookup_engine; + delete result; + if (tmp_table) + free_tmp_table(thd, tmp_table); +} + + +/** + Cleanup performed after each PS execution. + + @details + Called in the end of JOIN::prepare for PS from Item_subselect::cleanup. +*/ + +void subselect_hash_sj_engine::cleanup() +{ + enum_engine_type lookup_engine_type= lookup_engine->engine_type(); + is_materialized= FALSE; + bitmap_clear_all(&non_null_key_parts); + bitmap_clear_all(&partial_match_key_parts); + count_partial_match_columns= 0; + count_null_only_columns= 0; + strategy= UNDEFINED; + materialize_engine->cleanup(); + if (lookup_engine_type == TABLE_SCAN_ENGINE || + lookup_engine_type == ROWID_MERGE_ENGINE) + { + subselect_engine *inner_lookup_engine; + inner_lookup_engine= + ((subselect_partial_match_engine*) lookup_engine)->lookup_engine; + /* + Partial match engines are recreated for each PS execution inside + subselect_hash_sj_engine::exec(). + */ + delete lookup_engine; + lookup_engine= inner_lookup_engine; + } + DBUG_ASSERT(lookup_engine->engine_type() == UNIQUESUBQUERY_ENGINE); + lookup_engine->cleanup(); + result->cleanup(); /* Resets the temp table as well. */ +} + + +/** + Execute a subquery IN predicate via materialization. + + @details + If needed materialize the subquery into a temporary table, then + copmpute the predicate via a lookup into this table. + + @retval TRUE if error + @retval FALSE otherwise +*/ + +int subselect_hash_sj_engine::exec() +{ + Item_in_subselect *item_in= (Item_in_subselect *) item; + SELECT_LEX *save_select= thd->lex->current_select; + subselect_partial_match_engine *pm_engine= NULL; + int res= 0; + + DBUG_ENTER("subselect_hash_sj_engine::exec"); + + /* + Optimize and materialize the subquery during the first execution of + the subquery predicate. + */ + thd->lex->current_select= materialize_engine->select_lex; + if ((res= materialize_join->optimize())) + goto err; /* purecov: inspected */ + DBUG_ASSERT(!is_materialized); /* We should materialize only once. */ + materialize_join->exec(); + if ((res= test(materialize_join->error || thd->is_fatal_error))) + goto err; + + /* + TODO: + - Unlock all subquery tables as we don't need them. To implement this + we need to add new functionality to JOIN::join_free that can unlock + all tables in a subquery (and all its subqueries). + - The temp table used for grouping in the subquery can be freed + immediately after materialization (yet it's done together with + unlocking). + */ + is_materialized= TRUE; + /* + If the subquery returned no rows, the temporary table is empty, so we know + directly that the result of IN is FALSE. We first update the table + statistics, then we test if the temporary table for the query result is + empty. + */ + tmp_table->file->info(HA_STATUS_VARIABLE); + if (!tmp_table->file->stats.records) + { + item_in->value= FALSE; + /* The value of IN will not change during this execution. */ + item_in->is_constant= TRUE; + item_in->set_first_execution(); + /* TIMOUR: check if we need this: item_in->null_value= FALSE; */ + DBUG_RETURN(FALSE); + } + + /* + TIMOUR: The schema-based analysis for partial matching can be done once for + prepared statement and remembered. It is done here to remove the need to + save/restore all related variables between each re-execution, thus making + the code simpler. + */ + strategy= get_strategy_using_schema(); + /* This call may discover that we don't need partial matching at all. */ + strategy= get_strategy_using_data(); + if (strategy == PARTIAL_MATCH) + { + uint count_pm_keys; /* Total number of keys needed for partial matching. */ + MY_BITMAP *nn_key_parts; /* The key parts of the only non-NULL index. */ + uint covering_null_row_width; + select_materialize_with_stats *result_sink= + (select_materialize_with_stats *) result; + + nn_key_parts= (count_partial_match_columns < tmp_table->s->fields) ? + &non_null_key_parts : NULL; + + if (result_sink->get_max_nulls_in_row() == + tmp_table->s->fields - + (nn_key_parts ? bitmap_bits_set(nn_key_parts) : 0)) + covering_null_row_width= result_sink->get_max_nulls_in_row(); + else + covering_null_row_width= 0; + + if (covering_null_row_width) + count_pm_keys= nn_key_parts ? 1 : 0; + else + count_pm_keys= count_partial_match_columns - count_null_only_columns + + (nn_key_parts ? 1 : 0); + + choose_partial_match_strategy(test(nn_key_parts), + test(covering_null_row_width), + &partial_match_key_parts); + DBUG_ASSERT(strategy == PARTIAL_MATCH_MERGE || + strategy == PARTIAL_MATCH_SCAN); + if (strategy == PARTIAL_MATCH_MERGE) + { + pm_engine= + new subselect_rowid_merge_engine((subselect_uniquesubquery_engine*) + lookup_engine, tmp_table, + count_pm_keys, + covering_null_row_width, + item, result, + semi_join_conds->argument_list()); + if (!pm_engine || + ((subselect_rowid_merge_engine*) pm_engine)-> + init(nn_key_parts, &partial_match_key_parts)) + { + /* + The call to init() would fail if there was not enough memory to allocate + all buffers for the rowid merge strategy. In this case revert to table + scanning which doesn't need any big buffers. + */ + delete pm_engine; + pm_engine= NULL; + strategy= PARTIAL_MATCH_SCAN; + } + } + + if (strategy == PARTIAL_MATCH_SCAN) + { + if (!(pm_engine= + new subselect_table_scan_engine((subselect_uniquesubquery_engine*) + lookup_engine, tmp_table, + item, result, + semi_join_conds->argument_list(), + covering_null_row_width))) + { + /* This is an irrecoverable error. */ + res= 1; + goto err; + } + } + } + + if (pm_engine) + lookup_engine= pm_engine; + item_in->change_engine(lookup_engine); + +err: + thd->lex->current_select= save_select; + DBUG_RETURN(res); +} + + +/** + Print the state of this engine into a string for debugging and views. +*/ + +void subselect_hash_sj_engine::print(String *str, enum_query_type query_type) +{ + str->append(STRING_WITH_LEN(" <materialize> (")); + materialize_engine->print(str, query_type); + str->append(STRING_WITH_LEN(" ), ")); + + if (lookup_engine) + lookup_engine->print(str, query_type); + else + str->append(STRING_WITH_LEN( + "<engine selected at execution time>" + )); +} + +void subselect_hash_sj_engine::fix_length_and_dec(Item_cache** row) +{ + DBUG_ASSERT(FALSE); +} + +void subselect_hash_sj_engine::exclude() +{ + DBUG_ASSERT(FALSE); +} + +bool subselect_hash_sj_engine::no_tables() +{ + DBUG_ASSERT(FALSE); + return FALSE; +} + +bool subselect_hash_sj_engine::change_result(Item_subselect *si, + select_result_interceptor *res) +{ + DBUG_ASSERT(FALSE); + return TRUE; +} + + +Ordered_key::Ordered_key(uint keyid_arg, TABLE *tbl_arg, Item *search_key_arg, + ha_rows null_count_arg, ha_rows min_null_row_arg, + ha_rows max_null_row_arg, uchar *row_num_to_rowid_arg) + : keyid(keyid_arg), tbl(tbl_arg), search_key(search_key_arg), + row_num_to_rowid(row_num_to_rowid_arg), null_count(null_count_arg) +{ + DBUG_ASSERT(tbl->file->stats.records > null_count); + key_buff_elements= tbl->file->stats.records - null_count; + cur_key_idx= HA_POS_ERROR; + + DBUG_ASSERT((null_count && min_null_row_arg && max_null_row_arg) || + (!null_count && !min_null_row_arg && !max_null_row_arg)); + if (null_count) + { + /* The counters are 1-based, for key access we need 0-based indexes. */ + min_null_row= min_null_row_arg - 1; + max_null_row= max_null_row_arg - 1; + } + else + min_null_row= max_null_row= 0; +} + + +Ordered_key::~Ordered_key() +{ + my_free((char*) key_buff, MYF(0)); + bitmap_free(&null_key); +} + + +/* + Cleanup that needs to be done for each PS (re)execution. +*/ + +void Ordered_key::cleanup() +{ + /* + Currently these keys are recreated for each PS re-execution, thus + there is nothing to cleanup, the whole object goes away after execution + is over. All handler related initialization/deinitialization is done by + the parent subselect_rowid_merge_engine object. + */ +} + + +/* + Initialize a multi-column index. +*/ + +bool Ordered_key::init(MY_BITMAP *columns_to_index) +{ + THD *thd= tbl->in_use; + uint cur_key_col= 0; + Item_field *cur_tmp_field; + Item_func_lt *fn_less_than; + + key_column_count= bitmap_bits_set(columns_to_index); + + // TIMOUR: check for mem allocation err, revert to scan + + key_columns= (Item_field**) thd->alloc(key_column_count * + sizeof(Item_field*)); + compare_pred= (Item_func_lt**) thd->alloc(key_column_count * + sizeof(Item_func_lt*)); + + for (uint i= 0; i < columns_to_index->n_bits; i++) + { + if (!bitmap_is_set(columns_to_index, i)) + continue; + cur_tmp_field= new Item_field(tbl->field[i]); + /* Create the predicate (tmp_column[i] < outer_ref[i]). */ + fn_less_than= new Item_func_lt(cur_tmp_field, + search_key->element_index(i)); + fn_less_than->fix_fields(thd, (Item**) &fn_less_than); + key_columns[cur_key_col]= cur_tmp_field; + compare_pred[cur_key_col]= fn_less_than; + ++cur_key_col; + } + + if (alloc_keys_buffers()) + { + /* TIMOUR revert to partial match via table scan. */ + return TRUE; + } + return FALSE; +} + + +/* + Initialize a single-column index. +*/ + +bool Ordered_key::init(int col_idx) +{ + THD *thd= tbl->in_use; + + key_column_count= 1; + + // TIMOUR: check for mem allocation err, revert to scan + + key_columns= (Item_field**) thd->alloc(sizeof(Item_field*)); + compare_pred= (Item_func_lt**) thd->alloc(sizeof(Item_func_lt*)); + + key_columns[0]= new Item_field(tbl->field[col_idx]); + /* Create the predicate (tmp_column[i] < outer_ref[i]). */ + compare_pred[0]= new Item_func_lt(key_columns[0], + search_key->element_index(col_idx)); + compare_pred[0]->fix_fields(thd, (Item**)&compare_pred[0]); + + if (alloc_keys_buffers()) + { + /* TIMOUR revert to partial match via table scan. */ + return TRUE; + } + return FALSE; +} + + +/* + Allocate the buffers for both the row number, and the NULL-bitmap indexes. +*/ + +bool Ordered_key::alloc_keys_buffers() +{ + DBUG_ASSERT(key_buff_elements > 0); + + if (!(key_buff= (rownum_t*) my_malloc(key_buff_elements * sizeof(rownum_t), + MYF(MY_WME)))) + return TRUE; + + /* + TIMOUR: it is enough to create bitmaps with size + (max_null_row - min_null_row), and then use min_null_row as + lookup offset. + */ + /* Notice that max_null_row is max array index, we need count, so +1. */ + if (bitmap_init(&null_key, NULL, max_null_row + 1, FALSE)) + return TRUE; + + cur_key_idx= HA_POS_ERROR; + + return FALSE; +} + + +/* + Quick sort comparison function that compares two rows of the same table + indentfied with their row numbers. + + @retval -1 + @retval 0 + @retval +1 +*/ + +int +Ordered_key::cmp_keys_by_row_data(ha_rows a, ha_rows b) +{ + uchar *rowid_a, *rowid_b; + int error, cmp_res; + /* The length in bytes of the rowids (positions) of tmp_table. */ + uint rowid_length= tbl->file->ref_length; + + if (a == b) + return 0; + /* Get the corresponding rowids. */ + rowid_a= row_num_to_rowid + a * rowid_length; + rowid_b= row_num_to_rowid + b * rowid_length; + /* Fetch the rows for comparison. */ + error= tbl->file->ha_rnd_pos(tbl->record[0], rowid_a); + DBUG_ASSERT(!error); + error= tbl->file->ha_rnd_pos(tbl->record[1], rowid_b); + DBUG_ASSERT(!error); + /* + Compare the two rows by the corresponding values of the indexed + columns. + */ + for (uint i= 0; i < key_column_count; i++) + { + Field *cur_field= key_columns[i]->field; + if ((cmp_res= cur_field->cmp_offset(tbl->s->rec_buff_length))) + return (cmp_res > 0 ? 1 : -1); + } + return 0; +} + + +int +Ordered_key::cmp_keys_by_row_data_and_rownum(Ordered_key *key, + rownum_t* a, rownum_t* b) +{ + /* The result of comparing the two keys according to their row data. */ + int cmp_row_res= key->cmp_keys_by_row_data(*a, *b); + if (cmp_row_res) + return cmp_row_res; + return (*a < *b) ? -1 : (*a > *b) ? 1 : 0; +} + + +void Ordered_key::sort_keys() +{ + my_qsort2(key_buff, key_buff_elements, sizeof(rownum_t), + (qsort2_cmp) &cmp_keys_by_row_data_and_rownum, (void*) this); + /* Invalidate the current row position. */ + cur_key_idx= HA_POS_ERROR; +} + + +/* + The fraction of rows that do not contain NULL in the columns indexed by + this key. + + @retval 1 if there are no NULLs + @retval 0 if only NULLs +*/ + +double Ordered_key::null_selectivity() +{ + /* We should not be processing empty tables. */ + DBUG_ASSERT(tbl->file->stats.records); + return (1 - (double) null_count / (double) tbl->file->stats.records); +} + + +/* + Compare the value(s) of the current key in 'search_key' with the + data of the current table record. + + @notes The comparison result follows from the way compare_pred + is created in Ordered_key::init. Currently compare_pred compares + a field in of the current row with the corresponding Item that + contains the search key. + + @param row_num Number of the row (not index in the key_buff array) + + @retval -1 if (current row < search_key) + @retval 0 if (current row == search_key) + @retval +1 if (current row > search_key) +*/ + +int Ordered_key::cmp_key_with_search_key(rownum_t row_num) +{ + /* The length in bytes of the rowids (positions) of tmp_table. */ + uint rowid_length= tbl->file->ref_length; + uchar *cur_rowid= row_num_to_rowid + row_num * rowid_length; + int error, cmp_res; + + error= tbl->file->ha_rnd_pos(tbl->record[0], cur_rowid); + DBUG_ASSERT(!error); + + for (uint i= 0; i < key_column_count; i++) + { + cmp_res= compare_pred[i]->get_comparator()->compare(); + /* Unlike Arg_comparator::compare_row() here there should be no NULLs. */ + DBUG_ASSERT(!compare_pred[i]->null_value); + if (cmp_res) + return (cmp_res > 0 ? 1 : -1); + } + return 0; +} + + +/* + Find a key in a sorted array of keys via binary search. + + see create_subq_in_equalities() +*/ + +bool Ordered_key::lookup() +{ + DBUG_ASSERT(key_buff_elements); + + ha_rows lo= 0; + ha_rows hi= key_buff_elements - 1; + ha_rows mid; + int cmp_res; + + while (lo <= hi) + { + mid= lo + (hi - lo) / 2; + cmp_res= cmp_key_with_search_key(key_buff[mid]); + /* + In order to find the minimum match, check if the pevious element is + equal or smaller than the found one. If equal, we need to search further + to the left. + */ + if (!cmp_res && mid > 0) + cmp_res= !cmp_key_with_search_key(key_buff[mid - 1]) ? 1 : 0; + + if (cmp_res == -1) + { + /* row[mid] < search_key */ + lo= mid + 1; + } + else if (cmp_res == 1) + { + /* row[mid] > search_key */ + if (!mid) + goto not_found; + hi= mid - 1; + } + else + { + /* row[mid] == search_key */ + cur_key_idx= mid; + return TRUE; + } + } +not_found: + cur_key_idx= HA_POS_ERROR; + return FALSE; +} + + +/* + Move the current index pointer to the next key with the same column + values as the current key. Since the index is sorted, all such keys + are contiguous. +*/ + +bool Ordered_key::next_same() +{ + DBUG_ASSERT(key_buff_elements); + + if (cur_key_idx < key_buff_elements - 1) + { + /* + TIMOUR: + The below is quite inefficient, since as a result we will fetch every + row (except the last one) twice. There must be a more efficient way, + e.g. swapping record[0] and record[1], and reading only the new record. + */ + if (!cmp_keys_by_row_data(key_buff[cur_key_idx], key_buff[cur_key_idx + 1])) + { + ++cur_key_idx; + return TRUE; + } + } + return FALSE; +} + + +void Ordered_key::print(String *str) +{ + uint i; + str->append("{idx="); + str->qs_append(keyid); + str->append(", ("); + for (i= 0; i < key_column_count - 1; i++) + { + str->append(key_columns[i]->field->field_name); + str->append(", "); + } + str->append(key_columns[i]->field->field_name); + str->append("), "); + + str->append("null_bitmap: (bits="); + str->qs_append(null_key.n_bits); + str->append(", nulls= "); + str->qs_append((double)null_count); + str->append(", min_null= "); + str->qs_append((double)min_null_row); + str->append(", max_null= "); + str->qs_append((double)max_null_row); + str->append("), "); + + str->append('}'); +} + + +subselect_partial_match_engine::subselect_partial_match_engine( + subselect_uniquesubquery_engine *engine_arg, + TABLE *tmp_table_arg, Item_subselect *item_arg, + select_result_interceptor *result_arg, + List<Item> *equi_join_conds_arg, + uint covering_null_row_width_arg) + :subselect_engine(item_arg, result_arg), + tmp_table(tmp_table_arg), lookup_engine(engine_arg), + equi_join_conds(equi_join_conds_arg), + covering_null_row_width(covering_null_row_width_arg) +{} + + +int subselect_partial_match_engine::exec() +{ + Item_in_subselect *item_in= (Item_in_subselect *) item; + int res; + + /* Try to find a matching row by index lookup. */ + res= lookup_engine->copy_ref_key_simple(); + if (res == -1) + { + /* The result is FALSE based on the outer reference. */ + item_in->value= 0; + item_in->null_value= 0; + return 0; + } + else if (res == 0) + { + /* Search for a complete match. */ + if ((res= lookup_engine->index_lookup())) + { + /* An error occured during lookup(). */ + item_in->value= 0; + item_in->null_value= 0; + return res; + } + else if (item_in->value) + { + /* + A complete match was found, the result of IN is TRUE. + Notice: (this->item == lookup_engine->item) + */ + return 0; + } + } + + if (covering_null_row_width == tmp_table->s->fields) + { + /* + If there is a NULL-only row that coveres all columns the result of IN + is UNKNOWN. + */ + item_in->value= 0; + /* + TIMOUR: which one is the right way to propagate an UNKNOWN result? + Should we also set empty_result_set= FALSE; ??? + */ + //item_in->was_null= 1; + item_in->null_value= 1; + return 0; + } + + /* + There is no complete match. Look for a partial match (UNKNOWN result), or + no match (FALSE). + */ + if (tmp_table->file->inited) + tmp_table->file->ha_index_end(); + + if (partial_match()) + { + /* The result of IN is UNKNOWN. */ + item_in->value= 0; + /* + TIMOUR: which one is the right way to propagate an UNKNOWN result? + Should we also set empty_result_set= FALSE; ??? + */ + //item_in->was_null= 1; + item_in->null_value= 1; + } + else + { + /* The result of IN is FALSE. */ + item_in->value= 0; + /* + TIMOUR: which one is the right way to propagate an UNKNOWN result? + Should we also set empty_result_set= FALSE; ??? + */ + //item_in->was_null= 0; + item_in->null_value= 0; + } + return 0; } + + +void subselect_partial_match_engine::print(String *str, + enum_query_type query_type) +{ + /* + Should never be called as the actual engine cannot be known at query + optimization time. + DBUG_ASSERT(FALSE); + */ +} + + +/* + @param non_null_key_parts + @param partial_match_key_parts A union of all single-column NULL key parts. + @param count_partial_match_columns Number of NULL keyparts (set bits above). + + @retval FALSE the engine was initialized successfully + @retval TRUE there was some (memory allocation) error during initialization, + such errors should be interpreted as revert to other strategy +*/ + +bool +subselect_rowid_merge_engine::init(MY_BITMAP *non_null_key_parts, + MY_BITMAP *partial_match_key_parts) +{ + /* The length in bytes of the rowids (positions) of tmp_table. */ + uint rowid_length= tmp_table->file->ref_length; + ha_rows row_count= tmp_table->file->stats.records; + rownum_t cur_rownum= 0; + select_materialize_with_stats *result_sink= + (select_materialize_with_stats *) result; + uint cur_keyid= 0; + Item_in_subselect *item_in= (Item_in_subselect*) item; + int error; + + if (keys_count == 0) + { + /* There is nothing to initialize, we will only do regular lookups. */ + return FALSE; + } + + DBUG_ASSERT(!covering_null_row_width || (covering_null_row_width && + keys_count == 1 && + non_null_key_parts)); + /* + Allocate buffers to hold the merged keys and the mapping between rowids and + row numbers. + */ + if (!(merge_keys= (Ordered_key**) thd->alloc(keys_count * + sizeof(Ordered_key*))) || + !(row_num_to_rowid= (uchar*) my_malloc(row_count * rowid_length * + sizeof(uchar), MYF(MY_WME)))) + return TRUE; + + /* Create the only non-NULL key if there is any. */ + if (non_null_key_parts) + { + non_null_key= new Ordered_key(cur_keyid, tmp_table, item_in->left_expr, + 0, 0, 0, row_num_to_rowid); + if (non_null_key->init(non_null_key_parts)) + return TRUE; + merge_keys[cur_keyid]= non_null_key; + merge_keys[cur_keyid]->first(); + ++cur_keyid; + } + + /* + If there is a covering NULL row, the only key that is needed is the + only non-NULL key that is already created above. We create keys on + NULL-able columns only if there is no covering NULL row. + */ + if (!covering_null_row_width) + { + if (bitmap_init_memroot(&matching_keys, keys_count, thd->mem_root) || + bitmap_init_memroot(&matching_outer_cols, keys_count, thd->mem_root) || + bitmap_init_memroot(&null_only_columns, keys_count, thd->mem_root)) + return TRUE; + + /* + Create one single-column NULL-key for each column in + partial_match_key_parts. + */ + for (uint i= 0; i < partial_match_key_parts->n_bits; i++) + { + if (!bitmap_is_set(partial_match_key_parts, i)) + continue; + + if (result_sink->get_null_count_of_col(i) == row_count) + { + bitmap_set_bit(&null_only_columns, cur_keyid); + continue; + } + else + { + merge_keys[cur_keyid]= new Ordered_key( + cur_keyid, tmp_table, + item_in->left_expr->element_index(i), + result_sink->get_null_count_of_col(i), + result_sink->get_min_null_of_col(i), + result_sink->get_max_null_of_col(i), + row_num_to_rowid); + if (merge_keys[cur_keyid]->init(i)) + return TRUE; + merge_keys[cur_keyid]->first(); + } + ++cur_keyid; + } + } + DBUG_ASSERT(cur_keyid == keys_count); + + /* Populate the indexes with data from the temporary table. */ + if (tmp_table->file->ha_rnd_init_with_error(1)) + return TRUE; + tmp_table->file->extra_opt(HA_EXTRA_CACHE, + current_thd->variables.read_buff_size); + tmp_table->null_row= 0; + while (TRUE) + { + error= tmp_table->file->ha_rnd_next(tmp_table->record[0]); + if (error == HA_ERR_RECORD_DELETED) + { + /* We get this for duplicate records that should not be in tmp_table. */ + continue; + } + /* + This is a temp table that we fully own, there should be no other + cause to stop the iteration than EOF. + */ + DBUG_ASSERT(!error || error == HA_ERR_END_OF_FILE); + if (error == HA_ERR_END_OF_FILE) + { + DBUG_ASSERT(cur_rownum == tmp_table->file->stats.records); + break; + } + + /* + Save the position of this record in the row_num -> rowid mapping. + */ + tmp_table->file->position(tmp_table->record[0]); + memcpy(row_num_to_rowid + cur_rownum * rowid_length, + tmp_table->file->ref, rowid_length); + + /* Add the current row number to the corresponding keys. */ + if (non_null_key) + { + /* By definition there are no NULLs in the non-NULL key. */ + non_null_key->add_key(cur_rownum); + } + + for (uint i= (non_null_key ? 1 : 0); i < keys_count; i++) + { + /* + Check if the first and only indexed column contains NULL in the curent + row, and add the row number to the corresponding key. + */ + if (tmp_table->field[merge_keys[i]->get_field_idx(0)]->is_null()) + merge_keys[i]->set_null(cur_rownum); + else + merge_keys[i]->add_key(cur_rownum); + } + ++cur_rownum; + } + + tmp_table->file->ha_rnd_end(); + + /* Sort all the keys by their NULL selectivity. */ + my_qsort(merge_keys, keys_count, sizeof(Ordered_key*), + (qsort_cmp) cmp_keys_by_null_selectivity); + + /* Sort the keys in each of the indexes. */ + for (uint i= 0; i < keys_count; i++) + merge_keys[i]->sort_keys(); + + if (init_queue(&pq, keys_count, 0, FALSE, + subselect_rowid_merge_engine::cmp_keys_by_cur_rownum, NULL, + 0, 0)) + return TRUE; + + return FALSE; +} + + +subselect_rowid_merge_engine::~subselect_rowid_merge_engine() +{ + /* None of the resources below is allocated if there are no ordered keys. */ + if (keys_count) + { + my_free((char*) row_num_to_rowid, MYF(0)); + for (uint i= 0; i < keys_count; i++) + delete merge_keys[i]; + delete_queue(&pq); + if (tmp_table->file->inited == handler::RND) + tmp_table->file->ha_rnd_end(); + } +} + + +void subselect_rowid_merge_engine::cleanup() +{ +} + + +/* + Quick sort comparison function to compare keys in order of decreasing bitmap + selectivity, so that the most selective keys come first. + + @param k1 first key to compare + @param k2 second key to compare + + @retval 1 if k1 is less selective than k2 + @retval 0 if k1 is equally selective as k2 + @retval -1 if k1 is more selective than k2 +*/ + +int +subselect_rowid_merge_engine::cmp_keys_by_null_selectivity(Ordered_key **k1, + Ordered_key **k2) +{ + double k1_sel= (*k1)->null_selectivity(); + double k2_sel= (*k2)->null_selectivity(); + if (k1_sel < k2_sel) + return 1; + if (k1_sel > k2_sel) + return -1; + return 0; +} + + +/* +*/ + +int +subselect_rowid_merge_engine::cmp_keys_by_cur_rownum(void *arg, + uchar *k1, uchar *k2) +{ + rownum_t r1= ((Ordered_key*) k1)->current(); + rownum_t r2= ((Ordered_key*) k2)->current(); + + return (r1 < r2) ? -1 : (r1 > r2) ? 1 : 0; +} + + +/* + Check if certain table row contains a NULL in all columns for which there is + no match in the corresponding value index. + + @retval TRUE if a NULL row exists + @retval FALSE otherwise +*/ + +bool subselect_rowid_merge_engine::test_null_row(rownum_t row_num) +{ + Ordered_key *cur_key; + uint cur_id; + for (uint i = 0; i < keys_count; i++) + { + cur_key= merge_keys[i]; + cur_id= cur_key->get_keyid(); + if (bitmap_is_set(&matching_keys, cur_id)) + { + /* + The key 'i' (with id 'cur_keyid') already matches a value in row 'row_num', + thus we skip it as it can't possibly match a NULL. + */ + continue; + } + if (!cur_key->is_null(row_num)) + return FALSE; + } + return TRUE; +} + + +/* + @retval TRUE there is a partial match (UNKNOWN) + @retval FALSE there is no match at all (FALSE) +*/ + +bool subselect_rowid_merge_engine::partial_match() +{ + Ordered_key *min_key; /* Key that contains the current minimum position. */ + rownum_t min_row_num; /* Current row number of min_key. */ + Ordered_key *cur_key; + rownum_t cur_row_num; + uint count_nulls_in_search_key= 0; + bool res= FALSE; + + /* If there is a non-NULL key, it must be the first key in the keys array. */ + DBUG_ASSERT(!non_null_key || (non_null_key && merge_keys[0] == non_null_key)); + /* The prioryty queue for keys must be empty. */ + DBUG_ASSERT(!pq.elements); + + /* All data accesses during execution are via handler::ha_rnd_pos() */ + if (tmp_table->file->ha_rnd_init_with_error(0)) + { + res= FALSE; + goto end; + } + + /* Check if there is a match for the columns of the only non-NULL key. */ + if (non_null_key && !non_null_key->lookup()) + { + res= FALSE; + goto end; + } + + /* + If there is a NULL (sub)row that covers all NULL-able columns, + then there is a guranteed partial match, and we don't need to search + for the matching row. + */ + if (covering_null_row_width) + { + res= TRUE; + goto end; + } + + if (non_null_key) + queue_insert(&pq, (uchar *) non_null_key); + /* + Do not add the non_null_key, since it was already processed above. + */ + bitmap_clear_all(&matching_outer_cols); + for (uint i= test(non_null_key); i < keys_count; i++) + { + DBUG_ASSERT(merge_keys[i]->get_column_count() == 1); + if (merge_keys[i]->get_search_key(0)->is_null()) + { + ++count_nulls_in_search_key; + bitmap_set_bit(&matching_outer_cols, merge_keys[i]->get_keyid()); + } + else if (merge_keys[i]->lookup()) + queue_insert(&pq, (uchar *) merge_keys[i]); + } + + /* + If the outer reference consists of only NULLs, or if it has NULLs in all + nullable columns, the result is UNKNOWN. + */ + if (count_nulls_in_search_key == + ((Item_in_subselect *) item)->left_expr->cols() - + (non_null_key ? non_null_key->get_column_count() : 0)) + { + res= TRUE; + goto end; + } + + /* + If there is no NULL (sub)row that covers all NULL columns, and there is no + single match for any of the NULL columns, the result is FALSE. + */ + if (pq.elements - test(non_null_key) == 0) + { + res= FALSE; + goto end; + } + + DBUG_ASSERT(pq.elements); + + min_key= (Ordered_key*) queue_remove_top(&pq); + min_row_num= min_key->current(); + bitmap_copy(&matching_keys, &null_only_columns); + bitmap_set_bit(&matching_keys, min_key->get_keyid()); + bitmap_union(&matching_keys, &matching_outer_cols); + if (min_key->next_same()) + queue_insert(&pq, (uchar *) min_key); + + if (pq.elements == 0) + { + /* + Check the only matching row of the only key min_key for NULL matches + in the other columns. + */ + res= test_null_row(min_row_num); + goto end; + } + + while (TRUE) + { + cur_key= (Ordered_key*) queue_remove_top(&pq); + cur_row_num= cur_key->current(); + + if (cur_row_num == min_row_num) + bitmap_set_bit(&matching_keys, cur_key->get_keyid()); + else + { + /* Follows from the correct use of priority queue. */ + DBUG_ASSERT(cur_row_num > min_row_num); + if (test_null_row(min_row_num)) + { + res= TRUE; + goto end; + } + else + { + min_key= cur_key; + min_row_num= cur_row_num; + bitmap_copy(&matching_keys, &null_only_columns); + bitmap_set_bit(&matching_keys, min_key->get_keyid()); + bitmap_union(&matching_keys, &matching_outer_cols); + } + } + + if (cur_key->next_same()) + queue_insert(&pq, (uchar *) cur_key); + + if (pq.elements == 0) + { + /* Check the last row of the last column in PQ for NULL matches. */ + res= test_null_row(min_row_num); + goto end; + } + } + + /* We should never get here - all branches must be handled explicitly above. */ + DBUG_ASSERT(FALSE); + +end: + queue_remove_all(&pq); + tmp_table->file->ha_rnd_end(); + return res; +} + + +subselect_table_scan_engine::subselect_table_scan_engine( + subselect_uniquesubquery_engine *engine_arg, + TABLE *tmp_table_arg, + Item_subselect *item_arg, + select_result_interceptor *result_arg, + List<Item> *equi_join_conds_arg, + uint covering_null_row_width_arg) + :subselect_partial_match_engine(engine_arg, tmp_table_arg, item_arg, + result_arg, equi_join_conds_arg, + covering_null_row_width_arg) +{} + + +/* + TIMOUR: + This method is based on subselect_uniquesubquery_engine::scan_table(). + Consider refactoring somehow, 80% of the code is the same. + + for each row_i in tmp_table + { + count_matches= 0; + for each row element row_i[j] + { + if (outer_ref[j] is NULL || row_i[j] is NULL || outer_ref[j] == row_i[j]) + ++count_matches; + } + if (count_matches == outer_ref.elements) + return TRUE + } + return FALSE +*/ + +bool subselect_table_scan_engine::partial_match() +{ + List_iterator_fast<Item> equality_it(*equi_join_conds); + Item *cur_eq; + uint count_matches; + int error; + bool res; + + if (tmp_table->file->ha_rnd_init_with_error(1)) + { + res= FALSE; + goto end; + } + + tmp_table->file->extra_opt(HA_EXTRA_CACHE, + current_thd->variables.read_buff_size); + /* + TIMOUR: + scan_table() also calls "table->null_row= 0;", why, do we need it? + */ + for (;;) + { + error= tmp_table->file->ha_rnd_next(tmp_table->record[0]); + if (error) { + if (error == HA_ERR_RECORD_DELETED) + { + error= 0; + continue; + } + if (error == HA_ERR_END_OF_FILE) + { + error= 0; + break; + } + else + { + error= report_error(tmp_table, error); + break; + } + } + + equality_it.rewind(); + count_matches= 0; + while ((cur_eq= equality_it++)) + { + DBUG_ASSERT(cur_eq->type() == Item::FUNC_ITEM && + ((Item_func*)cur_eq)->functype() == Item_func::EQ_FUNC); + if (!cur_eq->val_int() && !cur_eq->null_value) + break; + ++count_matches; + } + if (count_matches == tmp_table->s->fields) + { + res= TRUE; /* Found a matching row. */ + goto end; + } + } + + res= FALSE; +end: + tmp_table->file->ha_rnd_end(); + return res; +} + + +void subselect_table_scan_engine::cleanup() +{ +} |