MWL#68 Subquery optimization: Efficient NOT IN execution with NULLs

* Implemented a second partial matching strategy via table scan. This strategy is a fallback when there is no memory for rowid merging. * Refactored the selection and creation of partial matching strategies, so that the choice of strategy is encapsulated in a separate method choose_partial_match_strategy(). * Refactored the representation of partial match strategies so that: - each strategy is represented by a polymorphic class, and - the base class for all partial match strategies contains common execution code. * Added an estimate of the memory needed for the rowid merge strategy, and the system variable "rowid_merge_buff_size" to control the maximum memory to be used by the rowid merge algorithm. * Added two optimizer_switch system variables to control the choice of partial match strategy: "partial_match_rowid_merge", "partial_match_table_scan". * Fixed multiple problems with deallocation of resources by the partial match strategies. sql/mysql_priv.h: * Added two optimizer_switch system variables to control the choice of partial match strategy: "partial_match_rowid_merge", "partial_match_table_scan". sql/mysqld.cc: * Added two optimizer_switch system variables to control the choice of partial match strategy: "partial_match_rowid_merge", "partial_match_table_scan". * Added a system variable "rowid_merge_buff_size" to control the maximum memory to be used by the rowid merge algorithm. sql/set_var.cc: * Added a system variable "rowid_merge_buff_size" to control the maximum memory to be used by the rowid merge algorithm. sql/sql_class.h: * Added a system variable "rowid_merge_buff_size" to control the maximum memory to be used by the rowid merge algorithm. support-files/build-tags: Newer versions of BZR require the recursive flag in order to list all files.
author: unknown <timour@askmonty.org> 2010-03-09 12:14:06 +0200
committer: unknown <timour@askmonty.org> 2010-03-09 12:14:06 +0200
commit: 6a138b7f014204d50686c67046963553ca96f1cf (patch)
tree: 6bbb1c4a226e291a50c10b9540ff856c466c5110 /sql/item_subselect.cc
parent: 96cf9a667c92df5da9b83bd22cb8833e95ed3b06 (diff)
download: mariadb-git-6a138b7f014204d50686c67046963553ca96f1cf.tar.gz
1 files changed, 504 insertions, 211 deletions
diff --git a/sql/item_subselect.cc b/sql/item_subselect.cc
index 43c399fd07c..4e9267429cd 100644
--- a/sql/item_subselect.cc
+++ b/sql/item_subselect.cc
@@ -2910,13 +2910,7 @@ int subselect_uniquesubquery_engine::exec()
 
 
 /*
-  TIMOUR: this needs more thinking, as exec() is a wrong IMO because:
-  - we don't need empty_result_set, as it is == 1 <=> when
-    item->value == 0
-  - scan_table() returns >0 even when there was no actuall error,
-    but we only found EOF while scanning.
-  - scan_table should not check table->status, but it should check
-    HA_ERR_END_OF_FILE
+  TIMOUR: write comment
 */
 
 int subselect_uniquesubquery_engine::index_lookup()
@@ -2924,8 +2918,6 @@ int subselect_uniquesubquery_engine::index_lookup()
   DBUG_ENTER("subselect_uniquesubquery_engine::index_lookup");
   int error;
   TABLE *table= tab->table;
-  empty_result_set= TRUE;
-  table->status= 0;
  
   if (!table->file->inited)
     table->file->ha_index_init(tab->ref.key, 0);
@@ -2934,25 +2926,25 @@ int subselect_uniquesubquery_engine::index_lookup()
                                         make_prev_keypart_map(tab->
                                                               ref.key_parts),
                                         HA_READ_KEY_EXACT);
-
   DBUG_PRINT("info", ("lookup result: %i", error));
-  if (error &&
-      error != HA_ERR_KEY_NOT_FOUND && error != HA_ERR_END_OF_FILE)
-    error= report_error(table, error);
-  else
+
+  if (error && error != HA_ERR_KEY_NOT_FOUND && error != HA_ERR_END_OF_FILE)
   {
-    error= 0;
-    table->null_row= 0;
-    if (!table->status && (!cond || cond->val_int()))
-    {
-      ((Item_in_subselect *) item)->value= 1;
-      empty_result_set= FALSE;
-    }
-    else
-      ((Item_in_subselect *) item)->value= 0;
+    /*
+      TIMOUR: I don't understand at all when do we need to call report_error.
+      In most places where we access an index, we don't do this. Why here?
+    */
+    error= report_error(table, error);
+    DBUG_RETURN(error);
   }
 
-  DBUG_RETURN(error);
+  table->null_row= 0;
+  if (!error && (!cond || cond->val_int()))
+    ((Item_in_subselect *) item)->value= 1;
+  else
+    ((Item_in_subselect *) item)->value= 0;
+
+  DBUG_RETURN(0);
 }
 
 
@@ -3415,19 +3407,24 @@ bool subselect_uniquesubquery_engine::no_tables()
   If max_keys > 1, then we need partial matching because there are
   more indexes than just the one we use during materialization to
   remove duplicates.
+
+  @note
+  TIMOUR: The schema-based analysis for partial matching can be done once for
+  prepared statement and remembered. It is done here to remove the need to
+  save/restore all related variables between each re-execution, thus making
+  the code simpler.
+
+  @retval PARTIAL_MATCH  if a partial match should be used
+  @retval COMPLETE_MATCH if a complete match (index lookup) should be used
 */
 
-void subselect_hash_sj_engine::set_strategy_using_schema()
+subselect_hash_sj_engine::exec_strategy
+subselect_hash_sj_engine::get_strategy_using_schema()
 {
   Item_in_subselect *item_in= (Item_in_subselect *) item;
 
-  DBUG_ENTER("subselect_hash_sj_engine::set_strategy_using_schema");
-
   if (item_in->is_top_level_item())
-  {
-    strategy= COMPLETE_MATCH;
-    DBUG_VOID_RETURN;
-  }
+    return COMPLETE_MATCH;
   else
   {
     List_iterator<Item> inner_col_it(*item_in->unit->get_unit_column_types());
@@ -3450,10 +3447,8 @@ void subselect_hash_sj_engine::set_strategy_using_schema()
 
   /* If no column contains NULLs use regular hash index lookups. */
   if (count_partial_match_columns)
-    strategy= PARTIAL_MATCH;
-  else
-    strategy= COMPLETE_MATCH;
-  DBUG_VOID_RETURN;
+    return PARTIAL_MATCH;
+  return COMPLETE_MATCH;
 }
 
 
@@ -3465,19 +3460,25 @@ void subselect_hash_sj_engine::set_strategy_using_schema()
   matching type of columns that cannot be NULL or that contain only NULLs.
   Based on this, the procedure determines the final execution strategy for
   the [NOT] IN predicate.
+
+  @retval PARTIAL_MATCH  if a partial match should be used
+  @retval COMPLETE_MATCH if a complete match (index lookup) should be used
 */
 
-void subselect_hash_sj_engine::set_strategy_using_data()
+subselect_hash_sj_engine::exec_strategy
+subselect_hash_sj_engine::get_strategy_using_data()
 {
   Item_in_subselect *item_in= (Item_in_subselect *) item;
   select_materialize_with_stats *result_sink=
     (select_materialize_with_stats *) result;
   Item *outer_col;
 
-  DBUG_ENTER("subselect_hash_sj_engine::set_strategy_using_data");
-
-  /* Call this procedure only if already selected partial matching. */
-  DBUG_ASSERT(strategy == PARTIAL_MATCH);
+  /*
+    If we already determined that a complete match is enough based on schema
+    information, nothing can be better.
+  */
+  if (strategy == COMPLETE_MATCH)
+    return COMPLETE_MATCH;
 
   for (uint i= 0; i < item_in->left_expr->cols(); i++)
   {
@@ -3501,9 +3502,117 @@ void subselect_hash_sj_engine::set_strategy_using_data()
 
   /* If no column contains NULLs use regular hash index lookups. */
   if (!count_partial_match_columns)
-    strategy= COMPLETE_MATCH;
+    return COMPLETE_MATCH;
+  return PARTIAL_MATCH;
+}
 
-  DBUG_VOID_RETURN;
+
+void
+subselect_hash_sj_engine::choose_partial_match_strategy(
+  bool has_non_null_key, bool has_covering_null_row,
+  MY_BITMAP *partial_match_key_parts)
+{
+  size_t pm_buff_size;
+
+  DBUG_ASSERT(strategy == PARTIAL_MATCH);
+  /*
+    Choose according to global optimizer switch. If only one of the switches is
+    'ON', then the remaining strategy is the only possible one. The only cases
+    when this will be overriden is when the total size of all buffers for the
+    merge strategy is bigger than the 'rowid_merge_buff_size' system variable,
+    or if there isn't enough physical memory to allocate the buffers.
+  */
+  if (!optimizer_flag(thd, OPTIMIZER_SWITCH_PARTIAL_MATCH_ROWID_MERGE) &&
+       optimizer_flag(thd, OPTIMIZER_SWITCH_PARTIAL_MATCH_TABLE_SCAN))
+    strategy= PARTIAL_MATCH_SCAN;
+  else if
+     ( optimizer_flag(thd, OPTIMIZER_SWITCH_PARTIAL_MATCH_ROWID_MERGE) &&
+      !optimizer_flag(thd, OPTIMIZER_SWITCH_PARTIAL_MATCH_TABLE_SCAN))
+    strategy= PARTIAL_MATCH_MERGE;
+
+  /*
+    If both switches are ON, or both are OFF, we interpret that as "let the
+    optimizer decide". Perform a cost based choice between the two partial
+    matching strategies.
+  */
+  /*
+    TIMOUR: the above interpretation of the switch values could be changed to:
+    - if both are ON - let the optimizer decide,
+    - if both are OFF - do not use partial matching, therefore do not use
+      materialization in non-top-level predicates.
+    The problem with this is that we know for sure if we need partial matching
+    only after the subquery is materialized, and this is too late to revert to
+    the IN=>EXISTS strategy.
+  */
+  if (strategy == PARTIAL_MATCH)
+  {
+    /*
+      TIMOUR: Currently we use a super simplistic measure. This will be
+      addressed in a separate task.
+    */
+    if (tmp_table->file->stats.records < 100)
+      strategy= PARTIAL_MATCH_SCAN;
+    else
+      strategy= PARTIAL_MATCH_MERGE;
+  }
+
+  /* Check if there is enough memory for the rowid merge strategy. */
+  if (strategy == PARTIAL_MATCH_MERGE)
+  {
+    pm_buff_size= rowid_merge_buff_size(has_non_null_key,
+                                        has_covering_null_row,
+                                        partial_match_key_parts);
+    if (pm_buff_size > thd->variables.rowid_merge_buff_size)
+      strategy= PARTIAL_MATCH_SCAN;
+  }
+}
+
+
+/*
+  Compute the memory size of all buffers proportional to the number of rows
+  in tmp_table.
+
+  @details
+  If the result is bigger than thd->variables.rowid_merge_buff_size, partial
+  matching via merging is not applicable.
+*/
+
+size_t subselect_hash_sj_engine::rowid_merge_buff_size(
+  bool has_non_null_key, bool has_covering_null_row,
+  MY_BITMAP *partial_match_key_parts)
+{
+  size_t buff_size; /* Total size of all buffers used by partial matching. */
+  ha_rows row_count= tmp_table->file->stats.records;
+  uint rowid_length= tmp_table->file->ref_length;
+  select_materialize_with_stats *result_sink=
+    (select_materialize_with_stats *) result;
+
+  /* Size of the subselect_rowid_merge_engine::row_num_to_rowid buffer. */
+  buff_size= row_count * rowid_length * sizeof(uchar);
+
+  if (has_non_null_key)
+  {
+    /* Add the size of Ordered_key::key_buff of the only non-NULL key. */
+    buff_size+= row_count * sizeof(rownum_t);
+  }
+
+  if (!has_covering_null_row)
+  {
+    for (uint i= 0; i < partial_match_key_parts->n_bits; i++)
+    {
+      if (!bitmap_is_set(partial_match_key_parts, i) ||
+          result_sink->get_null_count_of_col(i) == row_count)
+        continue; /* In these cases we wouldn't construct Ordered keys. */
+
+      /* Add the size of Ordered_key::key_buff */
+      buff_size+= (row_count - result_sink->get_null_count_of_col(i)) *
+                         sizeof(rownum_t);
+      /* Add the size of Ordered_key::null_key */
+      buff_size+= bitmap_buffer_size(result_sink->get_max_null_of_col(i));
+    }
+  }
+
+  return buff_size;
 }
 
 
@@ -3561,7 +3670,6 @@ bool subselect_hash_sj_engine::init_permanent(List<Item> *tmp_columns)
                             thd->mem_root))
     DBUG_RETURN(TRUE);
 
-  set_strategy_using_schema();
   /*
     Create and initialize a select result interceptor that stores the
     result stream in a temporary table. The temporary table itself is
@@ -3623,7 +3731,9 @@ bool subselect_hash_sj_engine::init_permanent(List<Item> *tmp_columns)
               ((Item_in_subselect *) item)->left_expr->cols() ==
               tmp_table->key_info->key_parts);
 
-  if (make_semi_join_conds())
+  if (make_semi_join_conds() ||
+      /* A unique_engine is used both for complete and partial matching. */
+      !(lookup_engine= make_unique_engine()))
     DBUG_RETURN(TRUE);
 
   DBUG_RETURN(FALSE);
@@ -3691,7 +3801,7 @@ bool subselect_hash_sj_engine::make_semi_join_conds()
       DBUG_RETURN(TRUE);
     }
   }
-  if (semi_join_conds->fix_fields(thd, &semi_join_conds))
+  if (semi_join_conds->fix_fields(thd, (Item**)&semi_join_conds))
     DBUG_RETURN(TRUE);
 
   DBUG_RETURN(FALSE);
@@ -3791,7 +3901,7 @@ bool subselect_hash_sj_engine::init_runtime()
     clause of the query, and it is not 'fixed' during JOIN::prepare.
   */
   if (semi_join_conds && !semi_join_conds->fixed &&
-      semi_join_conds->fix_fields(thd, &semi_join_conds))
+      semi_join_conds->fix_fields(thd, (Item**)&semi_join_conds))
     return TRUE;
   /* Let our engine reuse this query plan for materialization. */
   materialize_join= materialize_engine->join;
@@ -3802,6 +3912,7 @@ bool subselect_hash_sj_engine::init_runtime()
 
 subselect_hash_sj_engine::~subselect_hash_sj_engine()
 {
+  delete lookup_engine;
   delete result;
   if (tmp_table)
     free_tmp_table(thd, tmp_table);
@@ -3817,9 +3928,30 @@ subselect_hash_sj_engine::~subselect_hash_sj_engine()
 
 void subselect_hash_sj_engine::cleanup()
 {
+  enum_engine_type lookup_engine_type= lookup_engine->engine_type();
   is_materialized= FALSE;
-  result->cleanup(); /* Resets the temp table as well. */
+  bitmap_clear_all(&non_null_key_parts);
+  bitmap_clear_all(&partial_match_key_parts);
+  count_partial_match_columns= 0;
+  count_null_only_columns= 0;
+  strategy= UNDEFINED;
   materialize_engine->cleanup();
+  if (lookup_engine_type == TABLE_SCAN_ENGINE ||
+      lookup_engine_type == ROWID_MERGE_ENGINE)
+  {
+    subselect_engine *inner_lookup_engine;
+    inner_lookup_engine=
+      ((subselect_partial_match_engine*) lookup_engine)->lookup_engine;
+    /*
+      Partial match engines are recreated for each PS execution inside
+      subselect_hash_sj_engine::exec().
+    */
+    delete lookup_engine;
+    lookup_engine= inner_lookup_engine;
+  }
+  DBUG_ASSERT(lookup_engine->engine_type() == UNIQUESUBQUERY_ENGINE);
+  lookup_engine->cleanup();
+  result->cleanup(); /* Resets the temp table as well. */
 }
 
 
@@ -3838,6 +3970,7 @@ int subselect_hash_sj_engine::exec()
 {
   Item_in_subselect *item_in= (Item_in_subselect *) item;
   SELECT_LEX *save_select= thd->lex->current_select;
+  subselect_partial_match_engine *pm_engine= NULL;
   int res= 0;
 
   DBUG_ENTER("subselect_hash_sj_engine::exec");
@@ -3881,59 +4014,86 @@ int subselect_hash_sj_engine::exec()
     DBUG_RETURN(FALSE);
   }
 
-  if (strategy == PARTIAL_MATCH)
-    set_strategy_using_data();
-
-  /* A unique_engine is used both for complete and partial matching. */
-  if (!(lookup_engine= make_unique_engine()))
-  {
-    res= 1;
-    goto err;
-  }
-
+  /*
+    TIMOUR: The schema-based analysis for partial matching can be done once for
+    prepared statement and remembered. It is done here to remove the need to
+    save/restore all related variables between each re-execution, thus making
+    the code simpler.
+  */
+  strategy= get_strategy_using_schema();
+  /* This call may discover that we don't need partial matching at all. */
+  strategy= get_strategy_using_data();
   if (strategy == PARTIAL_MATCH)
   {
-    subselect_rowid_merge_engine *rowid_merge_engine;
-    uint count_pm_keys;
-    MY_BITMAP *nn_key_parts;
-    bool has_covering_null_row;
+    uint count_pm_keys; /* Total number of keys needed for partial matching. */
+    MY_BITMAP *nn_key_parts; /* The key parts of the only non-NULL index. */
+    uint covering_null_row_width;
     select_materialize_with_stats *result_sink=
       (select_materialize_with_stats *) result;
 
-    /* Total number of keys needed for partial matching. */
     nn_key_parts= (count_partial_match_columns < tmp_table->s->fields) ?
                   &non_null_key_parts : NULL;
 
-    has_covering_null_row= (result_sink->get_max_nulls_in_row() ==
-                            tmp_table->s->fields -
-                            (nn_key_parts ? bitmap_bits_set(nn_key_parts) : 0));
+    if (result_sink->get_max_nulls_in_row() ==
+        tmp_table->s->fields -
+        (nn_key_parts ? bitmap_bits_set(nn_key_parts) : 0))
+      covering_null_row_width= result_sink->get_max_nulls_in_row();
+    else
+      covering_null_row_width= 0;
 
-    if (has_covering_null_row)
+    if (covering_null_row_width)
       count_pm_keys= nn_key_parts ? 1 : 0;
     else
       count_pm_keys= count_partial_match_columns - count_null_only_columns +
         (nn_key_parts ? 1 : 0);
 
-    if (!(rowid_merge_engine=
-          new subselect_rowid_merge_engine((subselect_uniquesubquery_engine*)
-                                           lookup_engine,
-                                           tmp_table,
-                                           count_pm_keys,
-                                           has_covering_null_row,
-                                           item, result)) ||
-        rowid_merge_engine->init(nn_key_parts, &partial_match_key_parts))
+    choose_partial_match_strategy(test(nn_key_parts),
+                                  test(covering_null_row_width),
+                                  &partial_match_key_parts);
+    DBUG_ASSERT(strategy == PARTIAL_MATCH_MERGE ||
+                strategy == PARTIAL_MATCH_SCAN);
+    if (strategy == PARTIAL_MATCH_MERGE)
     {
-      strategy= PARTIAL_MATCH_SCAN;
-      delete rowid_merge_engine;
-      /* TIMOUR: setup execution structures for partial match via scanning. */
+      pm_engine=
+        new subselect_rowid_merge_engine((subselect_uniquesubquery_engine*)
+                                         lookup_engine, tmp_table,
+                                         count_pm_keys,
+                                         covering_null_row_width,
+                                         item, result,
+                                         semi_join_conds->argument_list());
+      if (!pm_engine ||
+          ((subselect_rowid_merge_engine*) pm_engine)->
+            init(nn_key_parts, &partial_match_key_parts))
+      {
+        /*
+          The call to init() would fail if there was not enough memory to allocate
+          all buffers for the rowid merge strategy. In this case revert to table
+          scanning which doesn't need any big buffers.
+        */
+        delete pm_engine;
+        pm_engine= NULL;
+        strategy= PARTIAL_MATCH_SCAN;
+      }
     }
-    else
+
+    if (strategy == PARTIAL_MATCH_SCAN)
     {
-      strategy= PARTIAL_MATCH_INDEX;
-      lookup_engine= rowid_merge_engine;
+      if (!(pm_engine=
+            new subselect_table_scan_engine((subselect_uniquesubquery_engine*)
+                                            lookup_engine, tmp_table,
+                                            item, result,
+                                            semi_join_conds->argument_list(),
+                                            covering_null_row_width)))
+      {
+        /* This is an irrecoverable error. */
+        res= 1;
+        goto err;
+      }
     }
   }
 
+  if (pm_engine)
+    lookup_engine= pm_engine;
   item_in->change_engine(lookup_engine);
 
 err:
@@ -4009,10 +4169,8 @@ Ordered_key::Ordered_key(uint keyid_arg, TABLE *tbl_arg, Item *search_key_arg,
 
 Ordered_key::~Ordered_key()
 {
-  /*
-    All data structures are allocated on thd->mem_root, thus we don't
-    free them here.
-  */
+  my_free((char*) key_buff, MYF(0));
+  bitmap_free(&null_key);
 }
 
 
@@ -4030,6 +4188,7 @@ void Ordered_key::cleanup()
   */
 }
 
+
 /*
   Initialize a multi-column index.
 */
@@ -4103,14 +4262,16 @@ bool Ordered_key::init(int col_idx)
 }
 
 
+/*
+  Allocate the buffers for both the row number, and the NULL-bitmap indexes.
+*/
+
 bool Ordered_key::alloc_keys_buffers()
 {
-  THD *thd= tbl->in_use;
-
   DBUG_ASSERT(key_buff_elements > 0);
 
-  if (!(key_buff= (rownum_t*) thd->alloc(key_buff_elements *
-                                         sizeof(rownum_t))))
+  if (!(key_buff= (rownum_t*) my_malloc(key_buff_elements * sizeof(rownum_t),
+                                        MYF(MY_WME))))
     return TRUE;
 
   /*
@@ -4118,10 +4279,8 @@ bool Ordered_key::alloc_keys_buffers()
     (max_null_row - min_null_row), and then use min_null_row as
     lookup offset.
   */
-  if (bitmap_init_memroot(&null_key,
-                          /* this is max array index, we need count, so +1. */
-                          max_null_row + 1,
-                          thd->mem_root))
+  /* Notice that max_null_row is max array index, we need count, so +1. */
+  if (bitmap_init(&null_key, NULL, max_null_row + 1, FALSE))
     return TRUE;
 
   cur_key_idx= HA_POS_ERROR;
@@ -4193,8 +4352,9 @@ void Ordered_key::sort_keys()
 
 
 /*
-  The probability that a certain row does not contain a NULL in some row in
-  a NULL-indexed column.
+  The fraction of rows that do not contain NULL in the columns indexed by
+  this key.
+
   @retval  1  if there are no NULLs
   @retval  0  if only NULLs
 */
@@ -4353,10 +4513,122 @@ void Ordered_key::print(String *str)
 }
 
 
+subselect_partial_match_engine::subselect_partial_match_engine(
+  subselect_uniquesubquery_engine *engine_arg,
+  TABLE *tmp_table_arg, Item_subselect *item_arg,
+  select_result_interceptor *result_arg,
+  List<Item> *equi_join_conds_arg,
+  uint covering_null_row_width_arg)
+  :subselect_engine(item_arg, result_arg),
+   tmp_table(tmp_table_arg), lookup_engine(engine_arg),
+   equi_join_conds(equi_join_conds_arg),
+   covering_null_row_width(covering_null_row_width_arg)
+{}
+
+
+int subselect_partial_match_engine::exec()
+{
+  Item_in_subselect *item_in= (Item_in_subselect *) item;
+  int res;
+
+  /* Try to find a matching row by index lookup. */
+  res= lookup_engine->copy_ref_key_simple();
+  if (res == -1)
+  {
+    /* The result is FALSE based on the outer reference. */
+    item_in->value= 0;
+    item_in->null_value= 0;
+    return 0;
+  }
+  else if (res == 0)
+  {
+    /* Search for a complete match. */
+    if ((res= lookup_engine->index_lookup()))
+    {
+      /* An error occured during lookup(). */
+      item_in->value= 0;
+      item_in->null_value= 0;
+      return res;
+    }
+    else if (item_in->value)
+    {
+      /*
+        A complete match was found, the result of IN is TRUE.
+        Notice: (this->item == lookup_engine->item)
+      */
+      return 0;
+    }
+  }
+
+  if (covering_null_row_width == tmp_table->s->fields)
+  {
+    /*
+      If there is a NULL-only row that coveres all columns the result of IN
+      is UNKNOWN. 
+    */
+    item_in->value= 0;
+    /*
+      TIMOUR: which one is the right way to propagate an UNKNOWN result?
+      Should we also set empty_result_set= FALSE; ???
+    */
+    //item_in->was_null= 1;
+    item_in->null_value= 1;
+    return 0;
+  }
+
+  /*
+    There is no complete match. Look for a partial match (UNKNOWN result), or
+    no match (FALSE).
+  */
+  if (tmp_table->file->inited)
+    tmp_table->file->ha_index_end();
+
+  if (partial_match())
+  {
+    /* The result of IN is UNKNOWN. */
+    item_in->value= 0;
+    /*
+      TIMOUR: which one is the right way to propagate an UNKNOWN result?
+      Should we also set empty_result_set= FALSE; ???
+    */
+    //item_in->was_null= 1;
+    item_in->null_value= 1;
+  }
+  else
+  {
+    /* The result of IN is FALSE. */
+    item_in->value= 0;
+    /*
+      TIMOUR: which one is the right way to propagate an UNKNOWN result?
+      Should we also set empty_result_set= FALSE; ???
+    */
+    //item_in->was_null= 0;
+    item_in->null_value= 0;
+  }
+
+  return 0;
+}
+
+
+void subselect_partial_match_engine::print(String *str,
+                                           enum_query_type query_type)
+{
+  /*
+    Should never be called as the actual engine cannot be known at query
+    optimization time.
+  */
+  DBUG_ASSERT(FALSE);
+}
+
+
 /*
   @param non_null_key_parts  
   @param partial_match_key_parts  A union of all single-column NULL key parts.
   @param count_partial_match_columns Number of NULL keyparts (set bits above).
+
+  @retval FALSE  the engine was initialized successfully
+  @retval TRUE   there was some (memory allocation) error during initialization,
+                 such errors should be interpreted as revert to other strategy
 */
 
 bool
@@ -4379,14 +4651,17 @@ subselect_rowid_merge_engine::init(MY_BITMAP *non_null_key_parts,
     return FALSE;
   }
 
-  DBUG_ASSERT(!has_covering_null_row || (has_covering_null_row &&
-                                         keys_count == 1 &&
-                                         non_null_key_parts));
-
+  DBUG_ASSERT(!covering_null_row_width || (covering_null_row_width &&
+                                           keys_count == 1 &&
+                                           non_null_key_parts));
+  /*
+    Allocate buffers to hold the merged keys and the mapping between rowids and
+    row numbers.
+  */
   if (!(merge_keys= (Ordered_key**) thd->alloc(keys_count *
                                                sizeof(Ordered_key*))) ||
-      !(row_num_to_rowid= (uchar*) thd->alloc(row_count * rowid_length *
-                                              sizeof(uchar))))
+      !(row_num_to_rowid= (uchar*) my_malloc(row_count * rowid_length *
+                                             sizeof(uchar), MYF(MY_WME))))
     return TRUE;
 
   /* Create the only non-NULL key if there is any. */
@@ -4395,10 +4670,7 @@ subselect_rowid_merge_engine::init(MY_BITMAP *non_null_key_parts,
     non_null_key= new Ordered_key(cur_keyid, tmp_table, item_in->left_expr,
                                   0, 0, 0, row_num_to_rowid);
     if (non_null_key->init(non_null_key_parts))
-    {
-      // TIMOUR: revert to partial matching via scanning
       return TRUE;
-    }
     merge_keys[cur_keyid]= non_null_key;
     merge_keys[cur_keyid]->first();
     ++cur_keyid;
@@ -4406,9 +4678,10 @@ subselect_rowid_merge_engine::init(MY_BITMAP *non_null_key_parts,
 
   /*
     If there is a covering NULL row, the only key that is needed is the
-    only non-NULL key that is already created above.
+    only non-NULL key that is already created above. We create keys on
+    NULL-able columns only if there is no covering NULL row.
   */
-  if (!has_covering_null_row)
+  if (!covering_null_row_width)
   {
     if (bitmap_init_memroot(&matching_keys, keys_count, thd->mem_root) ||
         bitmap_init_memroot(&matching_outer_cols, keys_count, thd->mem_root) ||
@@ -4436,10 +4709,7 @@ subselect_rowid_merge_engine::init(MY_BITMAP *non_null_key_parts,
                                      result_sink->get_max_null_of_col(i),
                                      row_num_to_rowid);
         if (merge_keys[cur_keyid]->init(i))
-        {
-          // TIMOUR: revert to partial matching via scanning
           return TRUE;
-        }
         merge_keys[cur_keyid]->first();
       }
       ++cur_keyid;
@@ -4510,10 +4780,7 @@ subselect_rowid_merge_engine::init(MY_BITMAP *non_null_key_parts,
 
   if (init_queue(&pq, keys_count, 0, FALSE,
                  subselect_rowid_merge_engine::cmp_keys_by_cur_rownum, NULL))
-  {
-    // TIMOUR: revert to partial matching via scanning
     return TRUE;
-  }
 
   return FALSE;
 }
@@ -4521,26 +4788,21 @@ subselect_rowid_merge_engine::init(MY_BITMAP *non_null_key_parts,
 
 subselect_rowid_merge_engine::~subselect_rowid_merge_engine()
 {
-  delete_queue(&pq);
+  /* None of the resources below is allocated if there are no ordered keys. */
+  if (keys_count)
+  {
+    my_free((char*) row_num_to_rowid, MYF(0));
+    for (uint i= 0; i < keys_count; i++)
+      delete merge_keys[i];
+    delete_queue(&pq);
+    if (tmp_table->file->inited == handler::RND)
+      tmp_table->file->ha_rnd_end();
+  }
 }
 
 
 void subselect_rowid_merge_engine::cleanup()
 {
-  lookup_engine->cleanup();
-  /* Tell handler we don't need the index anymore */
-  if (tmp_table->file->inited)
-    tmp_table->file->ha_rnd_end();
-  queue_remove_all(&pq);
-}
-
-
-void subselect_rowid_merge_engine::print(String *str, enum_query_type query_type)
-{
-  str->append(STRING_WITH_LEN("<rowid_merge>("));
-  for (uint i= 0; i < keys_count; i++)
-    merge_keys[i]->print(str);
-  str->append(')');
 }
 
 
@@ -4627,20 +4889,31 @@ bool subselect_rowid_merge_engine::partial_match()
   Ordered_key *cur_key;
   rownum_t cur_row_num;
   uint count_nulls_in_search_key= 0;
+  bool res= FALSE;
 
   /* If there is a non-NULL key, it must be the first key in the keys array. */
   DBUG_ASSERT(!non_null_key || (non_null_key && merge_keys[0] == non_null_key));
+
+  /* All data accesses during execution are via handler::ha_rnd_pos() */
+  tmp_table->file->ha_rnd_init(0);
+
   /* Check if there is a match for the columns of the only non-NULL key. */
   if (non_null_key && !non_null_key->lookup())
-    return FALSE;
+  {
+    res= FALSE;
+    goto end;
+  }
 
   /*
     If there is a NULL (sub)row that covers all NULL-able columns,
     then there is a guranteed partial match, and we don't need to search
     for the matching row.
    */
-  if (has_covering_null_row)
-    return TRUE;
+  if (covering_null_row_width)
+  {
+    res= TRUE;
+    goto end;
+  }
 
   if (non_null_key)
     queue_insert(&pq, (uchar *) non_null_key);
@@ -4667,14 +4940,20 @@ bool subselect_rowid_merge_engine::partial_match()
   if (count_nulls_in_search_key ==
       ((Item_in_subselect *) item)->left_expr->cols() -
       (non_null_key ? non_null_key->get_column_count() : 0))
-    return TRUE;
+  {
+    res= TRUE;
+    goto end;
+  }
 
   /*
     If there is no NULL (sub)row that covers all NULL columns, and there is no
     single match for any of the NULL columns, the result is FALSE.
   */
   if (pq.elements - test(non_null_key) == 0)
-    return FALSE;
+  {
+    res= FALSE;
+    goto end;
+  }
 
   DBUG_ASSERT(pq.elements);
 
@@ -4692,10 +4971,8 @@ bool subselect_rowid_merge_engine::partial_match()
       Check the only matching row of the only key min_key for NULL matches
       in the other columns.
     */
-    if (test_null_row(min_row_num))
-      return TRUE;
-    else
-      return FALSE;
+    res= test_null_row(min_row_num);
+    goto end;
   }
 
   while (TRUE)
@@ -4710,7 +4987,10 @@ bool subselect_rowid_merge_engine::partial_match()
       /* Follows from the correct use of priority queue. */
       DBUG_ASSERT(cur_row_num > min_row_num);
       if (test_null_row(min_row_num))
-        return TRUE;
+      {
+        res= TRUE;
+        goto end;
+      }
       else
       {
         min_key= cur_key;
@@ -4727,99 +5007,112 @@ bool subselect_rowid_merge_engine::partial_match()
     if (pq.elements == 0)
     {
       /* Check the last row of the last column in PQ for NULL matches. */
-      if (test_null_row(min_row_num))
-        return TRUE;
-      else
-        return FALSE;
+      res= test_null_row(min_row_num);
+      goto end;
     }
   }
 
-  /* We should never get here. */
+  /* We should never get here - all branches must be handled explicitly above. */
   DBUG_ASSERT(FALSE);
-  return FALSE;
+
+end:
+  tmp_table->file->ha_rnd_end();
+  return res;
 }
 
 
-int subselect_rowid_merge_engine::exec()
-{
-  Item_in_subselect *item_in= (Item_in_subselect *) item;
-  int res;
+subselect_table_scan_engine::subselect_table_scan_engine(
+  subselect_uniquesubquery_engine *engine_arg,
+  TABLE *tmp_table_arg,
+  Item_subselect *item_arg,
+  select_result_interceptor *result_arg,
+  List<Item> *equi_join_conds_arg,
+  uint covering_null_row_width_arg)
+  :subselect_partial_match_engine(engine_arg, tmp_table_arg, item_arg,
+                                  result_arg, equi_join_conds_arg,
+                                  covering_null_row_width_arg)
+{}
 
-  /* Try to find a matching row by index lookup. */
-  res= lookup_engine->copy_ref_key_simple();
-  if (res == -1)
-  {
-    /* The result is FALSE based on the outer reference. */
-    item_in->value= 0;
-    item_in->null_value= 0;
-    return 0;
-  }
-  else if (res == 0)
+
+/*
+  TIMOUR:
+  This method is based on subselect_uniquesubquery_engine::scan_table().
+  Consider refactoring somehow, 80% of the code is the same.
+
+  for each row_i in tmp_table
   {
-    if ((res= lookup_engine->index_lookup()))
+    count_matches= 0;
+    for each row element row_i[j]
     {
-      /* An error occured during lookup(). */
-      item_in->value= 0;
-      item_in->null_value= 0;
-      return res;
-    }
-    else if (item_in->value)
-    {
-      /*
-        A complete match was found, the result of IN is TRUE.
-        Notice: (this->item == lookup_engine->item)
-      */
-      return 0;
+      if (outer_ref[j] is NULL || row_i[j] is NULL || outer_ref[j] == row_i[j])
+        ++count_matches;
     }
+    if (count_matches == outer_ref.elements)
+      return TRUE
   }
+  return FALSE
+*/
 
-  if (has_covering_null_row && !keys_count)
-  {
-    /*
-      If there is a NULL-only row that coveres all columns the result of IN
-      is UNKNOWN. 
-    */
-    item_in->value= 0;
-    /*
-      TIMOUR: which one is the right way to propagate an UNKNOWN result?
-      Should we also set empty_result_set= FALSE; ???
-    */
-    //item_in->was_null= 1;
-    item_in->null_value= 1;
-    return 0;
-  }
+bool subselect_table_scan_engine::partial_match()
+{
+  List_iterator_fast<Item> equality_it(*equi_join_conds);
+  Item *cur_eq;
+  uint count_matches;
+  int error;
+  bool res;
 
-  /* All data accesses during execution are via handler::ha_rnd_pos() */
-  if (tmp_table->file->inited)
-    tmp_table->file->ha_index_end();
-  tmp_table->file->ha_rnd_init(0);
+  tmp_table->file->ha_rnd_init(1);
+  tmp_table->file->extra_opt(HA_EXTRA_CACHE,
+                             current_thd->variables.read_buff_size);
   /*
-    There is no complete match. Look for a partial match (UNKNOWN result), or
-    no match (FALSE).
+  TIMOUR:
+  scan_table() also calls "table->null_row= 0;", why, do we need it?
   */
-  if (partial_match())
-  {
-    /* The result of IN is UNKNOWN. */
-    item_in->value= 0;
-    /*
-      TIMOUR: which one is the right way to propagate an UNKNOWN result?
-      Should we also set empty_result_set= FALSE; ???
-    */
-    //item_in->was_null= 1;
-    item_in->null_value= 1;
-  }
-  else
+  for (;;)
   {
-    /* The result of IN is FALSE. */
-    item_in->value= 0;
-    /*
-      TIMOUR: which one is the right way to propagate an UNKNOWN result?
-      Should we also set empty_result_set= FALSE; ???
-    */
-    //item_in->was_null= 0;
-    item_in->null_value= 0;
+    error= tmp_table->file->ha_rnd_next(tmp_table->record[0]);
+    if (error) {
+      if (error == HA_ERR_RECORD_DELETED)
+      {
+        error= 0;
+        continue;
+      }
+      if (error == HA_ERR_END_OF_FILE)
+      {
+        error= 0;
+        break;
+      }
+      else
+      {
+        error= report_error(tmp_table, error);
+        break;
+      }
+    }
+
+    equality_it.rewind();
+    count_matches= 0;
+    while ((cur_eq= equality_it++))
+    {
+      DBUG_ASSERT(cur_eq->type() == Item::FUNC_ITEM &&
+                  ((Item_func*)cur_eq)->functype() == Item_func::EQ_FUNC);
+      if (!cur_eq->val_int() && !cur_eq->null_value)
+        break;
+      ++count_matches;
+    }
+    if (count_matches == tmp_table->s->fields)
+    {
+      res= TRUE; /* Found a matching row. */
+      goto end;
+    }
   }
+
+  res= FALSE;
+end:
   tmp_table->file->ha_rnd_end();
+  return res;
+}
 
-  return 0;
+
+void subselect_table_scan_engine::cleanup()
+{
 }
author	unknown <timour@askmonty.org>	2010-03-09 12:14:06 +0200
committer	unknown <timour@askmonty.org>	2010-03-09 12:14:06 +0200
commit	6a138b7f014204d50686c67046963553ca96f1cf (patch)
tree	6bbb1c4a226e291a50c10b9540ff856c466c5110 /sql/item_subselect.cc
parent	96cf9a667c92df5da9b83bd22cb8833e95ed3b06 (diff)
download	mariadb-git-6a138b7f014204d50686c67046963553ca96f1cf.tar.gz