diff options
author | unknown <timour@askmonty.org> | 2010-03-09 12:14:06 +0200 |
---|---|---|
committer | unknown <timour@askmonty.org> | 2010-03-09 12:14:06 +0200 |
commit | 6a138b7f014204d50686c67046963553ca96f1cf (patch) | |
tree | 6bbb1c4a226e291a50c10b9540ff856c466c5110 /sql/item_subselect.cc | |
parent | 96cf9a667c92df5da9b83bd22cb8833e95ed3b06 (diff) | |
download | mariadb-git-6a138b7f014204d50686c67046963553ca96f1cf.tar.gz |
MWL#68 Subquery optimization: Efficient NOT IN execution with NULLs
* Implemented a second partial matching strategy via table scan.
This strategy is a fallback when there is no memory for rowid merging.
* Refactored the selection and creation of partial matching strategies,
so that the choice of strategy is encapsulated in a separate method
choose_partial_match_strategy().
* Refactored the representation of partial match strategies so that:
- each strategy is represented by a polymorphic class, and
- the base class for all partial match strategies contains common
execution code.
* Added an estimate of the memory needed for the rowid merge strategy,
and the system variable "rowid_merge_buff_size" to control the maximum
memory to be used by the rowid merge algorithm.
* Added two optimizer_switch system variables to control the choice of
partial match strategy:
"partial_match_rowid_merge", "partial_match_table_scan".
* Fixed multiple problems with deallocation of resources by the partial
match strategies.
sql/mysql_priv.h:
* Added two optimizer_switch system variables to control the choice of
partial match strategy:
"partial_match_rowid_merge", "partial_match_table_scan".
sql/mysqld.cc:
* Added two optimizer_switch system variables to control the choice of
partial match strategy:
"partial_match_rowid_merge", "partial_match_table_scan".
* Added a system variable "rowid_merge_buff_size" to control the maximum
memory to be used by the rowid merge algorithm.
sql/set_var.cc:
* Added a system variable "rowid_merge_buff_size" to control the maximum
memory to be used by the rowid merge algorithm.
sql/sql_class.h:
* Added a system variable "rowid_merge_buff_size" to control the maximum
memory to be used by the rowid merge algorithm.
support-files/build-tags:
Newer versions of BZR require the recursive flag in order to list all files.
Diffstat (limited to 'sql/item_subselect.cc')
-rw-r--r-- | sql/item_subselect.cc | 715 |
1 files changed, 504 insertions, 211 deletions
diff --git a/sql/item_subselect.cc b/sql/item_subselect.cc index 43c399fd07c..4e9267429cd 100644 --- a/sql/item_subselect.cc +++ b/sql/item_subselect.cc @@ -2910,13 +2910,7 @@ int subselect_uniquesubquery_engine::exec() /* - TIMOUR: this needs more thinking, as exec() is a wrong IMO because: - - we don't need empty_result_set, as it is == 1 <=> when - item->value == 0 - - scan_table() returns >0 even when there was no actuall error, - but we only found EOF while scanning. - - scan_table should not check table->status, but it should check - HA_ERR_END_OF_FILE + TIMOUR: write comment */ int subselect_uniquesubquery_engine::index_lookup() @@ -2924,8 +2918,6 @@ int subselect_uniquesubquery_engine::index_lookup() DBUG_ENTER("subselect_uniquesubquery_engine::index_lookup"); int error; TABLE *table= tab->table; - empty_result_set= TRUE; - table->status= 0; if (!table->file->inited) table->file->ha_index_init(tab->ref.key, 0); @@ -2934,25 +2926,25 @@ int subselect_uniquesubquery_engine::index_lookup() make_prev_keypart_map(tab-> ref.key_parts), HA_READ_KEY_EXACT); - DBUG_PRINT("info", ("lookup result: %i", error)); - if (error && - error != HA_ERR_KEY_NOT_FOUND && error != HA_ERR_END_OF_FILE) - error= report_error(table, error); - else + + if (error && error != HA_ERR_KEY_NOT_FOUND && error != HA_ERR_END_OF_FILE) { - error= 0; - table->null_row= 0; - if (!table->status && (!cond || cond->val_int())) - { - ((Item_in_subselect *) item)->value= 1; - empty_result_set= FALSE; - } - else - ((Item_in_subselect *) item)->value= 0; + /* + TIMOUR: I don't understand at all when do we need to call report_error. + In most places where we access an index, we don't do this. Why here? + */ + error= report_error(table, error); + DBUG_RETURN(error); } - DBUG_RETURN(error); + table->null_row= 0; + if (!error && (!cond || cond->val_int())) + ((Item_in_subselect *) item)->value= 1; + else + ((Item_in_subselect *) item)->value= 0; + + DBUG_RETURN(0); } @@ -3415,19 +3407,24 @@ bool subselect_uniquesubquery_engine::no_tables() If max_keys > 1, then we need partial matching because there are more indexes than just the one we use during materialization to remove duplicates. + + @note + TIMOUR: The schema-based analysis for partial matching can be done once for + prepared statement and remembered. It is done here to remove the need to + save/restore all related variables between each re-execution, thus making + the code simpler. + + @retval PARTIAL_MATCH if a partial match should be used + @retval COMPLETE_MATCH if a complete match (index lookup) should be used */ -void subselect_hash_sj_engine::set_strategy_using_schema() +subselect_hash_sj_engine::exec_strategy +subselect_hash_sj_engine::get_strategy_using_schema() { Item_in_subselect *item_in= (Item_in_subselect *) item; - DBUG_ENTER("subselect_hash_sj_engine::set_strategy_using_schema"); - if (item_in->is_top_level_item()) - { - strategy= COMPLETE_MATCH; - DBUG_VOID_RETURN; - } + return COMPLETE_MATCH; else { List_iterator<Item> inner_col_it(*item_in->unit->get_unit_column_types()); @@ -3450,10 +3447,8 @@ void subselect_hash_sj_engine::set_strategy_using_schema() /* If no column contains NULLs use regular hash index lookups. */ if (count_partial_match_columns) - strategy= PARTIAL_MATCH; - else - strategy= COMPLETE_MATCH; - DBUG_VOID_RETURN; + return PARTIAL_MATCH; + return COMPLETE_MATCH; } @@ -3465,19 +3460,25 @@ void subselect_hash_sj_engine::set_strategy_using_schema() matching type of columns that cannot be NULL or that contain only NULLs. Based on this, the procedure determines the final execution strategy for the [NOT] IN predicate. + + @retval PARTIAL_MATCH if a partial match should be used + @retval COMPLETE_MATCH if a complete match (index lookup) should be used */ -void subselect_hash_sj_engine::set_strategy_using_data() +subselect_hash_sj_engine::exec_strategy +subselect_hash_sj_engine::get_strategy_using_data() { Item_in_subselect *item_in= (Item_in_subselect *) item; select_materialize_with_stats *result_sink= (select_materialize_with_stats *) result; Item *outer_col; - DBUG_ENTER("subselect_hash_sj_engine::set_strategy_using_data"); - - /* Call this procedure only if already selected partial matching. */ - DBUG_ASSERT(strategy == PARTIAL_MATCH); + /* + If we already determined that a complete match is enough based on schema + information, nothing can be better. + */ + if (strategy == COMPLETE_MATCH) + return COMPLETE_MATCH; for (uint i= 0; i < item_in->left_expr->cols(); i++) { @@ -3501,9 +3502,117 @@ void subselect_hash_sj_engine::set_strategy_using_data() /* If no column contains NULLs use regular hash index lookups. */ if (!count_partial_match_columns) - strategy= COMPLETE_MATCH; + return COMPLETE_MATCH; + return PARTIAL_MATCH; +} - DBUG_VOID_RETURN; + +void +subselect_hash_sj_engine::choose_partial_match_strategy( + bool has_non_null_key, bool has_covering_null_row, + MY_BITMAP *partial_match_key_parts) +{ + size_t pm_buff_size; + + DBUG_ASSERT(strategy == PARTIAL_MATCH); + /* + Choose according to global optimizer switch. If only one of the switches is + 'ON', then the remaining strategy is the only possible one. The only cases + when this will be overriden is when the total size of all buffers for the + merge strategy is bigger than the 'rowid_merge_buff_size' system variable, + or if there isn't enough physical memory to allocate the buffers. + */ + if (!optimizer_flag(thd, OPTIMIZER_SWITCH_PARTIAL_MATCH_ROWID_MERGE) && + optimizer_flag(thd, OPTIMIZER_SWITCH_PARTIAL_MATCH_TABLE_SCAN)) + strategy= PARTIAL_MATCH_SCAN; + else if + ( optimizer_flag(thd, OPTIMIZER_SWITCH_PARTIAL_MATCH_ROWID_MERGE) && + !optimizer_flag(thd, OPTIMIZER_SWITCH_PARTIAL_MATCH_TABLE_SCAN)) + strategy= PARTIAL_MATCH_MERGE; + + /* + If both switches are ON, or both are OFF, we interpret that as "let the + optimizer decide". Perform a cost based choice between the two partial + matching strategies. + */ + /* + TIMOUR: the above interpretation of the switch values could be changed to: + - if both are ON - let the optimizer decide, + - if both are OFF - do not use partial matching, therefore do not use + materialization in non-top-level predicates. + The problem with this is that we know for sure if we need partial matching + only after the subquery is materialized, and this is too late to revert to + the IN=>EXISTS strategy. + */ + if (strategy == PARTIAL_MATCH) + { + /* + TIMOUR: Currently we use a super simplistic measure. This will be + addressed in a separate task. + */ + if (tmp_table->file->stats.records < 100) + strategy= PARTIAL_MATCH_SCAN; + else + strategy= PARTIAL_MATCH_MERGE; + } + + /* Check if there is enough memory for the rowid merge strategy. */ + if (strategy == PARTIAL_MATCH_MERGE) + { + pm_buff_size= rowid_merge_buff_size(has_non_null_key, + has_covering_null_row, + partial_match_key_parts); + if (pm_buff_size > thd->variables.rowid_merge_buff_size) + strategy= PARTIAL_MATCH_SCAN; + } +} + + +/* + Compute the memory size of all buffers proportional to the number of rows + in tmp_table. + + @details + If the result is bigger than thd->variables.rowid_merge_buff_size, partial + matching via merging is not applicable. +*/ + +size_t subselect_hash_sj_engine::rowid_merge_buff_size( + bool has_non_null_key, bool has_covering_null_row, + MY_BITMAP *partial_match_key_parts) +{ + size_t buff_size; /* Total size of all buffers used by partial matching. */ + ha_rows row_count= tmp_table->file->stats.records; + uint rowid_length= tmp_table->file->ref_length; + select_materialize_with_stats *result_sink= + (select_materialize_with_stats *) result; + + /* Size of the subselect_rowid_merge_engine::row_num_to_rowid buffer. */ + buff_size= row_count * rowid_length * sizeof(uchar); + + if (has_non_null_key) + { + /* Add the size of Ordered_key::key_buff of the only non-NULL key. */ + buff_size+= row_count * sizeof(rownum_t); + } + + if (!has_covering_null_row) + { + for (uint i= 0; i < partial_match_key_parts->n_bits; i++) + { + if (!bitmap_is_set(partial_match_key_parts, i) || + result_sink->get_null_count_of_col(i) == row_count) + continue; /* In these cases we wouldn't construct Ordered keys. */ + + /* Add the size of Ordered_key::key_buff */ + buff_size+= (row_count - result_sink->get_null_count_of_col(i)) * + sizeof(rownum_t); + /* Add the size of Ordered_key::null_key */ + buff_size+= bitmap_buffer_size(result_sink->get_max_null_of_col(i)); + } + } + + return buff_size; } @@ -3561,7 +3670,6 @@ bool subselect_hash_sj_engine::init_permanent(List<Item> *tmp_columns) thd->mem_root)) DBUG_RETURN(TRUE); - set_strategy_using_schema(); /* Create and initialize a select result interceptor that stores the result stream in a temporary table. The temporary table itself is @@ -3623,7 +3731,9 @@ bool subselect_hash_sj_engine::init_permanent(List<Item> *tmp_columns) ((Item_in_subselect *) item)->left_expr->cols() == tmp_table->key_info->key_parts); - if (make_semi_join_conds()) + if (make_semi_join_conds() || + /* A unique_engine is used both for complete and partial matching. */ + !(lookup_engine= make_unique_engine())) DBUG_RETURN(TRUE); DBUG_RETURN(FALSE); @@ -3691,7 +3801,7 @@ bool subselect_hash_sj_engine::make_semi_join_conds() DBUG_RETURN(TRUE); } } - if (semi_join_conds->fix_fields(thd, &semi_join_conds)) + if (semi_join_conds->fix_fields(thd, (Item**)&semi_join_conds)) DBUG_RETURN(TRUE); DBUG_RETURN(FALSE); @@ -3791,7 +3901,7 @@ bool subselect_hash_sj_engine::init_runtime() clause of the query, and it is not 'fixed' during JOIN::prepare. */ if (semi_join_conds && !semi_join_conds->fixed && - semi_join_conds->fix_fields(thd, &semi_join_conds)) + semi_join_conds->fix_fields(thd, (Item**)&semi_join_conds)) return TRUE; /* Let our engine reuse this query plan for materialization. */ materialize_join= materialize_engine->join; @@ -3802,6 +3912,7 @@ bool subselect_hash_sj_engine::init_runtime() subselect_hash_sj_engine::~subselect_hash_sj_engine() { + delete lookup_engine; delete result; if (tmp_table) free_tmp_table(thd, tmp_table); @@ -3817,9 +3928,30 @@ subselect_hash_sj_engine::~subselect_hash_sj_engine() void subselect_hash_sj_engine::cleanup() { + enum_engine_type lookup_engine_type= lookup_engine->engine_type(); is_materialized= FALSE; - result->cleanup(); /* Resets the temp table as well. */ + bitmap_clear_all(&non_null_key_parts); + bitmap_clear_all(&partial_match_key_parts); + count_partial_match_columns= 0; + count_null_only_columns= 0; + strategy= UNDEFINED; materialize_engine->cleanup(); + if (lookup_engine_type == TABLE_SCAN_ENGINE || + lookup_engine_type == ROWID_MERGE_ENGINE) + { + subselect_engine *inner_lookup_engine; + inner_lookup_engine= + ((subselect_partial_match_engine*) lookup_engine)->lookup_engine; + /* + Partial match engines are recreated for each PS execution inside + subselect_hash_sj_engine::exec(). + */ + delete lookup_engine; + lookup_engine= inner_lookup_engine; + } + DBUG_ASSERT(lookup_engine->engine_type() == UNIQUESUBQUERY_ENGINE); + lookup_engine->cleanup(); + result->cleanup(); /* Resets the temp table as well. */ } @@ -3838,6 +3970,7 @@ int subselect_hash_sj_engine::exec() { Item_in_subselect *item_in= (Item_in_subselect *) item; SELECT_LEX *save_select= thd->lex->current_select; + subselect_partial_match_engine *pm_engine= NULL; int res= 0; DBUG_ENTER("subselect_hash_sj_engine::exec"); @@ -3881,59 +4014,86 @@ int subselect_hash_sj_engine::exec() DBUG_RETURN(FALSE); } - if (strategy == PARTIAL_MATCH) - set_strategy_using_data(); - - /* A unique_engine is used both for complete and partial matching. */ - if (!(lookup_engine= make_unique_engine())) - { - res= 1; - goto err; - } - + /* + TIMOUR: The schema-based analysis for partial matching can be done once for + prepared statement and remembered. It is done here to remove the need to + save/restore all related variables between each re-execution, thus making + the code simpler. + */ + strategy= get_strategy_using_schema(); + /* This call may discover that we don't need partial matching at all. */ + strategy= get_strategy_using_data(); if (strategy == PARTIAL_MATCH) { - subselect_rowid_merge_engine *rowid_merge_engine; - uint count_pm_keys; - MY_BITMAP *nn_key_parts; - bool has_covering_null_row; + uint count_pm_keys; /* Total number of keys needed for partial matching. */ + MY_BITMAP *nn_key_parts; /* The key parts of the only non-NULL index. */ + uint covering_null_row_width; select_materialize_with_stats *result_sink= (select_materialize_with_stats *) result; - /* Total number of keys needed for partial matching. */ nn_key_parts= (count_partial_match_columns < tmp_table->s->fields) ? &non_null_key_parts : NULL; - has_covering_null_row= (result_sink->get_max_nulls_in_row() == - tmp_table->s->fields - - (nn_key_parts ? bitmap_bits_set(nn_key_parts) : 0)); + if (result_sink->get_max_nulls_in_row() == + tmp_table->s->fields - + (nn_key_parts ? bitmap_bits_set(nn_key_parts) : 0)) + covering_null_row_width= result_sink->get_max_nulls_in_row(); + else + covering_null_row_width= 0; - if (has_covering_null_row) + if (covering_null_row_width) count_pm_keys= nn_key_parts ? 1 : 0; else count_pm_keys= count_partial_match_columns - count_null_only_columns + (nn_key_parts ? 1 : 0); - if (!(rowid_merge_engine= - new subselect_rowid_merge_engine((subselect_uniquesubquery_engine*) - lookup_engine, - tmp_table, - count_pm_keys, - has_covering_null_row, - item, result)) || - rowid_merge_engine->init(nn_key_parts, &partial_match_key_parts)) + choose_partial_match_strategy(test(nn_key_parts), + test(covering_null_row_width), + &partial_match_key_parts); + DBUG_ASSERT(strategy == PARTIAL_MATCH_MERGE || + strategy == PARTIAL_MATCH_SCAN); + if (strategy == PARTIAL_MATCH_MERGE) { - strategy= PARTIAL_MATCH_SCAN; - delete rowid_merge_engine; - /* TIMOUR: setup execution structures for partial match via scanning. */ + pm_engine= + new subselect_rowid_merge_engine((subselect_uniquesubquery_engine*) + lookup_engine, tmp_table, + count_pm_keys, + covering_null_row_width, + item, result, + semi_join_conds->argument_list()); + if (!pm_engine || + ((subselect_rowid_merge_engine*) pm_engine)-> + init(nn_key_parts, &partial_match_key_parts)) + { + /* + The call to init() would fail if there was not enough memory to allocate + all buffers for the rowid merge strategy. In this case revert to table + scanning which doesn't need any big buffers. + */ + delete pm_engine; + pm_engine= NULL; + strategy= PARTIAL_MATCH_SCAN; + } } - else + + if (strategy == PARTIAL_MATCH_SCAN) { - strategy= PARTIAL_MATCH_INDEX; - lookup_engine= rowid_merge_engine; + if (!(pm_engine= + new subselect_table_scan_engine((subselect_uniquesubquery_engine*) + lookup_engine, tmp_table, + item, result, + semi_join_conds->argument_list(), + covering_null_row_width))) + { + /* This is an irrecoverable error. */ + res= 1; + goto err; + } } } + if (pm_engine) + lookup_engine= pm_engine; item_in->change_engine(lookup_engine); err: @@ -4009,10 +4169,8 @@ Ordered_key::Ordered_key(uint keyid_arg, TABLE *tbl_arg, Item *search_key_arg, Ordered_key::~Ordered_key() { - /* - All data structures are allocated on thd->mem_root, thus we don't - free them here. - */ + my_free((char*) key_buff, MYF(0)); + bitmap_free(&null_key); } @@ -4030,6 +4188,7 @@ void Ordered_key::cleanup() */ } + /* Initialize a multi-column index. */ @@ -4103,14 +4262,16 @@ bool Ordered_key::init(int col_idx) } +/* + Allocate the buffers for both the row number, and the NULL-bitmap indexes. +*/ + bool Ordered_key::alloc_keys_buffers() { - THD *thd= tbl->in_use; - DBUG_ASSERT(key_buff_elements > 0); - if (!(key_buff= (rownum_t*) thd->alloc(key_buff_elements * - sizeof(rownum_t)))) + if (!(key_buff= (rownum_t*) my_malloc(key_buff_elements * sizeof(rownum_t), + MYF(MY_WME)))) return TRUE; /* @@ -4118,10 +4279,8 @@ bool Ordered_key::alloc_keys_buffers() (max_null_row - min_null_row), and then use min_null_row as lookup offset. */ - if (bitmap_init_memroot(&null_key, - /* this is max array index, we need count, so +1. */ - max_null_row + 1, - thd->mem_root)) + /* Notice that max_null_row is max array index, we need count, so +1. */ + if (bitmap_init(&null_key, NULL, max_null_row + 1, FALSE)) return TRUE; cur_key_idx= HA_POS_ERROR; @@ -4193,8 +4352,9 @@ void Ordered_key::sort_keys() /* - The probability that a certain row does not contain a NULL in some row in - a NULL-indexed column. + The fraction of rows that do not contain NULL in the columns indexed by + this key. + @retval 1 if there are no NULLs @retval 0 if only NULLs */ @@ -4353,10 +4513,122 @@ void Ordered_key::print(String *str) } +subselect_partial_match_engine::subselect_partial_match_engine( + subselect_uniquesubquery_engine *engine_arg, + TABLE *tmp_table_arg, Item_subselect *item_arg, + select_result_interceptor *result_arg, + List<Item> *equi_join_conds_arg, + uint covering_null_row_width_arg) + :subselect_engine(item_arg, result_arg), + tmp_table(tmp_table_arg), lookup_engine(engine_arg), + equi_join_conds(equi_join_conds_arg), + covering_null_row_width(covering_null_row_width_arg) +{} + + +int subselect_partial_match_engine::exec() +{ + Item_in_subselect *item_in= (Item_in_subselect *) item; + int res; + + /* Try to find a matching row by index lookup. */ + res= lookup_engine->copy_ref_key_simple(); + if (res == -1) + { + /* The result is FALSE based on the outer reference. */ + item_in->value= 0; + item_in->null_value= 0; + return 0; + } + else if (res == 0) + { + /* Search for a complete match. */ + if ((res= lookup_engine->index_lookup())) + { + /* An error occured during lookup(). */ + item_in->value= 0; + item_in->null_value= 0; + return res; + } + else if (item_in->value) + { + /* + A complete match was found, the result of IN is TRUE. + Notice: (this->item == lookup_engine->item) + */ + return 0; + } + } + + if (covering_null_row_width == tmp_table->s->fields) + { + /* + If there is a NULL-only row that coveres all columns the result of IN + is UNKNOWN. + */ + item_in->value= 0; + /* + TIMOUR: which one is the right way to propagate an UNKNOWN result? + Should we also set empty_result_set= FALSE; ??? + */ + //item_in->was_null= 1; + item_in->null_value= 1; + return 0; + } + + /* + There is no complete match. Look for a partial match (UNKNOWN result), or + no match (FALSE). + */ + if (tmp_table->file->inited) + tmp_table->file->ha_index_end(); + + if (partial_match()) + { + /* The result of IN is UNKNOWN. */ + item_in->value= 0; + /* + TIMOUR: which one is the right way to propagate an UNKNOWN result? + Should we also set empty_result_set= FALSE; ??? + */ + //item_in->was_null= 1; + item_in->null_value= 1; + } + else + { + /* The result of IN is FALSE. */ + item_in->value= 0; + /* + TIMOUR: which one is the right way to propagate an UNKNOWN result? + Should we also set empty_result_set= FALSE; ??? + */ + //item_in->was_null= 0; + item_in->null_value= 0; + } + + return 0; +} + + +void subselect_partial_match_engine::print(String *str, + enum_query_type query_type) +{ + /* + Should never be called as the actual engine cannot be known at query + optimization time. + */ + DBUG_ASSERT(FALSE); +} + + /* @param non_null_key_parts @param partial_match_key_parts A union of all single-column NULL key parts. @param count_partial_match_columns Number of NULL keyparts (set bits above). + + @retval FALSE the engine was initialized successfully + @retval TRUE there was some (memory allocation) error during initialization, + such errors should be interpreted as revert to other strategy */ bool @@ -4379,14 +4651,17 @@ subselect_rowid_merge_engine::init(MY_BITMAP *non_null_key_parts, return FALSE; } - DBUG_ASSERT(!has_covering_null_row || (has_covering_null_row && - keys_count == 1 && - non_null_key_parts)); - + DBUG_ASSERT(!covering_null_row_width || (covering_null_row_width && + keys_count == 1 && + non_null_key_parts)); + /* + Allocate buffers to hold the merged keys and the mapping between rowids and + row numbers. + */ if (!(merge_keys= (Ordered_key**) thd->alloc(keys_count * sizeof(Ordered_key*))) || - !(row_num_to_rowid= (uchar*) thd->alloc(row_count * rowid_length * - sizeof(uchar)))) + !(row_num_to_rowid= (uchar*) my_malloc(row_count * rowid_length * + sizeof(uchar), MYF(MY_WME)))) return TRUE; /* Create the only non-NULL key if there is any. */ @@ -4395,10 +4670,7 @@ subselect_rowid_merge_engine::init(MY_BITMAP *non_null_key_parts, non_null_key= new Ordered_key(cur_keyid, tmp_table, item_in->left_expr, 0, 0, 0, row_num_to_rowid); if (non_null_key->init(non_null_key_parts)) - { - // TIMOUR: revert to partial matching via scanning return TRUE; - } merge_keys[cur_keyid]= non_null_key; merge_keys[cur_keyid]->first(); ++cur_keyid; @@ -4406,9 +4678,10 @@ subselect_rowid_merge_engine::init(MY_BITMAP *non_null_key_parts, /* If there is a covering NULL row, the only key that is needed is the - only non-NULL key that is already created above. + only non-NULL key that is already created above. We create keys on + NULL-able columns only if there is no covering NULL row. */ - if (!has_covering_null_row) + if (!covering_null_row_width) { if (bitmap_init_memroot(&matching_keys, keys_count, thd->mem_root) || bitmap_init_memroot(&matching_outer_cols, keys_count, thd->mem_root) || @@ -4436,10 +4709,7 @@ subselect_rowid_merge_engine::init(MY_BITMAP *non_null_key_parts, result_sink->get_max_null_of_col(i), row_num_to_rowid); if (merge_keys[cur_keyid]->init(i)) - { - // TIMOUR: revert to partial matching via scanning return TRUE; - } merge_keys[cur_keyid]->first(); } ++cur_keyid; @@ -4510,10 +4780,7 @@ subselect_rowid_merge_engine::init(MY_BITMAP *non_null_key_parts, if (init_queue(&pq, keys_count, 0, FALSE, subselect_rowid_merge_engine::cmp_keys_by_cur_rownum, NULL)) - { - // TIMOUR: revert to partial matching via scanning return TRUE; - } return FALSE; } @@ -4521,26 +4788,21 @@ subselect_rowid_merge_engine::init(MY_BITMAP *non_null_key_parts, subselect_rowid_merge_engine::~subselect_rowid_merge_engine() { - delete_queue(&pq); + /* None of the resources below is allocated if there are no ordered keys. */ + if (keys_count) + { + my_free((char*) row_num_to_rowid, MYF(0)); + for (uint i= 0; i < keys_count; i++) + delete merge_keys[i]; + delete_queue(&pq); + if (tmp_table->file->inited == handler::RND) + tmp_table->file->ha_rnd_end(); + } } void subselect_rowid_merge_engine::cleanup() { - lookup_engine->cleanup(); - /* Tell handler we don't need the index anymore */ - if (tmp_table->file->inited) - tmp_table->file->ha_rnd_end(); - queue_remove_all(&pq); -} - - -void subselect_rowid_merge_engine::print(String *str, enum_query_type query_type) -{ - str->append(STRING_WITH_LEN("<rowid_merge>(")); - for (uint i= 0; i < keys_count; i++) - merge_keys[i]->print(str); - str->append(')'); } @@ -4627,20 +4889,31 @@ bool subselect_rowid_merge_engine::partial_match() Ordered_key *cur_key; rownum_t cur_row_num; uint count_nulls_in_search_key= 0; + bool res= FALSE; /* If there is a non-NULL key, it must be the first key in the keys array. */ DBUG_ASSERT(!non_null_key || (non_null_key && merge_keys[0] == non_null_key)); + + /* All data accesses during execution are via handler::ha_rnd_pos() */ + tmp_table->file->ha_rnd_init(0); + /* Check if there is a match for the columns of the only non-NULL key. */ if (non_null_key && !non_null_key->lookup()) - return FALSE; + { + res= FALSE; + goto end; + } /* If there is a NULL (sub)row that covers all NULL-able columns, then there is a guranteed partial match, and we don't need to search for the matching row. */ - if (has_covering_null_row) - return TRUE; + if (covering_null_row_width) + { + res= TRUE; + goto end; + } if (non_null_key) queue_insert(&pq, (uchar *) non_null_key); @@ -4667,14 +4940,20 @@ bool subselect_rowid_merge_engine::partial_match() if (count_nulls_in_search_key == ((Item_in_subselect *) item)->left_expr->cols() - (non_null_key ? non_null_key->get_column_count() : 0)) - return TRUE; + { + res= TRUE; + goto end; + } /* If there is no NULL (sub)row that covers all NULL columns, and there is no single match for any of the NULL columns, the result is FALSE. */ if (pq.elements - test(non_null_key) == 0) - return FALSE; + { + res= FALSE; + goto end; + } DBUG_ASSERT(pq.elements); @@ -4692,10 +4971,8 @@ bool subselect_rowid_merge_engine::partial_match() Check the only matching row of the only key min_key for NULL matches in the other columns. */ - if (test_null_row(min_row_num)) - return TRUE; - else - return FALSE; + res= test_null_row(min_row_num); + goto end; } while (TRUE) @@ -4710,7 +4987,10 @@ bool subselect_rowid_merge_engine::partial_match() /* Follows from the correct use of priority queue. */ DBUG_ASSERT(cur_row_num > min_row_num); if (test_null_row(min_row_num)) - return TRUE; + { + res= TRUE; + goto end; + } else { min_key= cur_key; @@ -4727,99 +5007,112 @@ bool subselect_rowid_merge_engine::partial_match() if (pq.elements == 0) { /* Check the last row of the last column in PQ for NULL matches. */ - if (test_null_row(min_row_num)) - return TRUE; - else - return FALSE; + res= test_null_row(min_row_num); + goto end; } } - /* We should never get here. */ + /* We should never get here - all branches must be handled explicitly above. */ DBUG_ASSERT(FALSE); - return FALSE; + +end: + tmp_table->file->ha_rnd_end(); + return res; } -int subselect_rowid_merge_engine::exec() -{ - Item_in_subselect *item_in= (Item_in_subselect *) item; - int res; +subselect_table_scan_engine::subselect_table_scan_engine( + subselect_uniquesubquery_engine *engine_arg, + TABLE *tmp_table_arg, + Item_subselect *item_arg, + select_result_interceptor *result_arg, + List<Item> *equi_join_conds_arg, + uint covering_null_row_width_arg) + :subselect_partial_match_engine(engine_arg, tmp_table_arg, item_arg, + result_arg, equi_join_conds_arg, + covering_null_row_width_arg) +{} - /* Try to find a matching row by index lookup. */ - res= lookup_engine->copy_ref_key_simple(); - if (res == -1) - { - /* The result is FALSE based on the outer reference. */ - item_in->value= 0; - item_in->null_value= 0; - return 0; - } - else if (res == 0) + +/* + TIMOUR: + This method is based on subselect_uniquesubquery_engine::scan_table(). + Consider refactoring somehow, 80% of the code is the same. + + for each row_i in tmp_table { - if ((res= lookup_engine->index_lookup())) + count_matches= 0; + for each row element row_i[j] { - /* An error occured during lookup(). */ - item_in->value= 0; - item_in->null_value= 0; - return res; - } - else if (item_in->value) - { - /* - A complete match was found, the result of IN is TRUE. - Notice: (this->item == lookup_engine->item) - */ - return 0; + if (outer_ref[j] is NULL || row_i[j] is NULL || outer_ref[j] == row_i[j]) + ++count_matches; } + if (count_matches == outer_ref.elements) + return TRUE } + return FALSE +*/ - if (has_covering_null_row && !keys_count) - { - /* - If there is a NULL-only row that coveres all columns the result of IN - is UNKNOWN. - */ - item_in->value= 0; - /* - TIMOUR: which one is the right way to propagate an UNKNOWN result? - Should we also set empty_result_set= FALSE; ??? - */ - //item_in->was_null= 1; - item_in->null_value= 1; - return 0; - } +bool subselect_table_scan_engine::partial_match() +{ + List_iterator_fast<Item> equality_it(*equi_join_conds); + Item *cur_eq; + uint count_matches; + int error; + bool res; - /* All data accesses during execution are via handler::ha_rnd_pos() */ - if (tmp_table->file->inited) - tmp_table->file->ha_index_end(); - tmp_table->file->ha_rnd_init(0); + tmp_table->file->ha_rnd_init(1); + tmp_table->file->extra_opt(HA_EXTRA_CACHE, + current_thd->variables.read_buff_size); /* - There is no complete match. Look for a partial match (UNKNOWN result), or - no match (FALSE). + TIMOUR: + scan_table() also calls "table->null_row= 0;", why, do we need it? */ - if (partial_match()) - { - /* The result of IN is UNKNOWN. */ - item_in->value= 0; - /* - TIMOUR: which one is the right way to propagate an UNKNOWN result? - Should we also set empty_result_set= FALSE; ??? - */ - //item_in->was_null= 1; - item_in->null_value= 1; - } - else + for (;;) { - /* The result of IN is FALSE. */ - item_in->value= 0; - /* - TIMOUR: which one is the right way to propagate an UNKNOWN result? - Should we also set empty_result_set= FALSE; ??? - */ - //item_in->was_null= 0; - item_in->null_value= 0; + error= tmp_table->file->ha_rnd_next(tmp_table->record[0]); + if (error) { + if (error == HA_ERR_RECORD_DELETED) + { + error= 0; + continue; + } + if (error == HA_ERR_END_OF_FILE) + { + error= 0; + break; + } + else + { + error= report_error(tmp_table, error); + break; + } + } + + equality_it.rewind(); + count_matches= 0; + while ((cur_eq= equality_it++)) + { + DBUG_ASSERT(cur_eq->type() == Item::FUNC_ITEM && + ((Item_func*)cur_eq)->functype() == Item_func::EQ_FUNC); + if (!cur_eq->val_int() && !cur_eq->null_value) + break; + ++count_matches; + } + if (count_matches == tmp_table->s->fields) + { + res= TRUE; /* Found a matching row. */ + goto end; + } } + + res= FALSE; +end: tmp_table->file->ha_rnd_end(); + return res; +} - return 0; + +void subselect_table_scan_engine::cleanup() +{ } |