MWL#17: Table elimination

- Moved table elimination code to sql/opt_table_elimination.cc - Added comments .bzrignore: MWL#17: Table elimination - Moved table elimination code to sql/opt_table_elimination.cc libmysqld/Makefile.am: MWL#17: Table elimination - Moved table elimination code to sql/opt_table_elimination.cc sql/CMakeLists.txt: MWL#17: Table elimination - Moved table elimination code to sql/opt_table_elimination.cc sql/Makefile.am: MWL#17: Table elimination - Moved table elimination code to sql/opt_table_elimination.cc
author: Sergey Petrunia <psergey@askmonty.org> 2009-06-25 14:05:53 +0400
committer: Sergey Petrunia <psergey@askmonty.org> 2009-06-25 14:05:53 +0400
commit: 4102605fba3edab2935b7b1d46b7c7569bd889e7 (patch)
tree: 045921bc2673d3dff38340a417242024dc08e936 /sql
parent: defbdce7e8c69df85cdd8c630643ef1152330660 (diff)
download: mariadb-git-4102605fba3edab2935b7b1d46b7c7569bd889e7.tar.gz
9 files changed, 546 insertions, 403 deletions
diff --git a/sql/CMakeLists.txt b/sql/CMakeLists.txt
index 83872803dd4..6c15f607f58 100755
--- a/sql/CMakeLists.txt
+++ b/sql/CMakeLists.txt
@@ -73,7 +73,7 @@ ADD_EXECUTABLE(mysqld
                partition_info.cc rpl_utility.cc rpl_injector.cc sql_locale.cc
                rpl_rli.cc rpl_mi.cc sql_servers.cc
                sql_connect.cc scheduler.cc 
-               sql_profile.cc event_parse_data.cc
+               sql_profile.cc event_parse_data.cc opt_table_elimination.cc
                ${PROJECT_SOURCE_DIR}/sql/sql_yacc.cc
                ${PROJECT_SOURCE_DIR}/sql/sql_yacc.h
                ${PROJECT_SOURCE_DIR}/include/mysqld_error.h
diff --git a/sql/Makefile.am b/sql/Makefile.am
index a3559b38ce4..1237bcb21cb 100644
--- a/sql/Makefile.am
+++ b/sql/Makefile.am
@@ -121,7 +121,8 @@ mysqld_SOURCES =	sql_lex.cc sql_handler.cc sql_partition.cc \
                         event_queue.cc event_db_repository.cc events.cc \
 			sql_plugin.cc sql_binlog.cc \
 			sql_builtin.cc sql_tablespace.cc partition_info.cc \
-			sql_servers.cc event_parse_data.cc
+			sql_servers.cc event_parse_data.cc \
+                        opt_table_elimination.cc
 
 nodist_mysqld_SOURCES =	mini_client_errors.c pack.c client.c my_time.c my_user.c 
 
diff --git a/sql/item.cc b/sql/item.cc
index d317b16a264..3cecb8b6e34 100644
--- a/sql/item.cc
+++ b/sql/item.cc
@@ -1915,17 +1915,22 @@ void Item_field::reset_field(Field *f)
   name= (char*) f->field_name;
 }
 
+
 bool Item_field::check_column_usage_processor(uchar *arg)
 {
   Field_processor_info* info=(Field_processor_info*)arg;
+
+  /* It is ok if this is a column of an allowed table: */
   if (used_tables() & ~info->allowed_tables)
     return FALSE;
 
   if (field->table == info->table)
   {
+    /* It is not ok to use columns that are not part of the key of interest: */
     if (!(field->part_of_key.is_set(info->keyno)))
        return TRUE;
-
+    
+    /* Find which key part we're using and mark it in needed_key_parts */
     KEY *key= &field->table->key_info[info->keyno];
     for (uint part= 0; part < key->key_parts; part++)
     {
@@ -1935,10 +1940,17 @@ bool Item_field::check_column_usage_processor(uchar *arg)
         break;
       }
     }
+    return FALSE;
   }
-  return FALSE;
+
+  /* 
+    We get here when this refers to a table that's neither the table of
+    interest, nor one of the allowed tables. 
+  */
+  return TRUE;
 }
 
+
 const char *Item_ident::full_name() const
 {
   char *tmp;
diff --git a/sql/item.h b/sql/item.h
index e9fb39443bc..6f0bf02bdae 100644
--- a/sql/item.h
+++ b/sql/item.h
@@ -731,7 +731,11 @@ public:
   virtual bool val_bool_result() { return val_bool(); }
   virtual bool is_null_result() { return is_null(); }
 
-  /* bit map of tables used by item */
+  /* 
+    Bitmap of tables used by item
+    (note: if you need to check dependencies on individual columns, check out
+     check_column_usage_processor)
+  */
   virtual table_map used_tables() const { return (table_map) 0L; }
   /*
     Return table map of tables that can't be NULL tables (tables that are
@@ -1013,7 +1017,7 @@ public:
   bool eq_by_collation(Item *item, bool binary_cmp, CHARSET_INFO *cs); 
 };
 
-
+/* Data for Item::check_column_usage_processor */
 typedef struct  
 {
   table_map allowed_tables;
@@ -1022,6 +1026,7 @@ typedef struct
   uint needed_key_parts;
 } Field_processor_info;
 
+
 class sp_head;
 
 
diff --git a/sql/item_subselect.cc b/sql/item_subselect.cc
index 7559a183e60..45b540f5009 100644
--- a/sql/item_subselect.cc
+++ b/sql/item_subselect.cc
@@ -250,7 +250,7 @@ bool Item_subselect::walk(Item_processor processor, bool walk_subquery,
       if (lex->having && (lex->having)->walk(processor, walk_subquery,
                                              argument))
         return 1;
-      /* TODO: why doesn't this walk the OUTER JOINs' ON expressions */
+      /* TODO: why does this walk WHERE/HAVING but not ON expressions of outer joins? */
 
       while ((item=li++))
       {
diff --git a/sql/item_sum.h b/sql/item_sum.h
index aec5830f381..e884452d6e6 100644
--- a/sql/item_sum.h
+++ b/sql/item_sum.h
@@ -351,9 +351,10 @@ public:
     Return bitmap of tables that are needed to evaluate the item.
 
     The implementation takes into account the used strategy: items resolved
-    at optimization phase report 0.
-    Items that depend on the number of rows only, e.g. COUNT(*) will report
-    zero, but will still false from const_item().
+    at optimization phase will report 0.
+    Items that depend on the number of join output records, but not columns
+    of any particular table (like COUNT(*)) will report 0 from used_tables(),
+    but will still return false from const_item().
   */
   table_map used_tables() const { return used_tables_cache; }
   void update_used_tables ();
diff --git a/sql/opt_table_elimination.cc b/sql/opt_table_elimination.cc
new file mode 100644
index 00000000000..2ba287ea1f8
--- /dev/null
+++ b/sql/opt_table_elimination.cc
@@ -0,0 +1,493 @@
+/**
+  @file
+
+  @brief
+    Table Elimination Module
+
+  @defgroup Table_Elimination Table Elimination Module
+  @{
+*/
+
+#ifdef USE_PRAGMA_IMPLEMENTATION
+#pragma implementation				// gcc: Class implementation
+#endif
+
+#include "mysql_priv.h"
+#include "sql_select.h"
+
+/*
+  OVERVIEW
+    The module has one entry point - eliminate_tables() function, which one 
+    needs to call (once) sometime after update_ref_and_keys() but before the
+    join optimization.  
+    eliminate_tables() operates over the JOIN structures. Logically, it
+    removes the right sides of outer join nests. Physically, it changes the
+    following members:
+
+    * Eliminated tables are marked as constant and moved to the front of the
+      join order.
+    * In addition to this, they are recorded in JOIN::eliminated_tables bitmap.
+
+    * All join nests have their NESTED_JOIN::n_tables updated to discount
+      the eliminated tables
+
+    * Items that became disused because they were in the ON expression of an 
+      eliminated outer join are notified by means of the Item tree walk which 
+      calls Item::mark_as_eliminated_processor for every item
+      - At the moment the only Item that cares is Item_subselect with its 
+        Item_subselect::eliminated flag which is used by EXPLAIN code to
+        check if the subquery should be shown in EXPLAIN.
+
+    Table elimination is intended to be done on every PS re-execution.
+*/
+
+static int
+eliminate_tables_for_join_list(JOIN *join, List<TABLE_LIST> *join_list,
+                               table_map used_tables_elsewhere, 
+                               uint *const_tbl_count, table_map *const_tables);
+static bool table_has_one_match(TABLE *table, table_map bound_tables);
+static void 
+mark_table_as_eliminated(JOIN *join, TABLE *table, uint *const_tbl_count,
+                         table_map *const_tables);
+static bool 
+extra_keyuses_bind_all_keyparts(table_map bound_tables, TABLE *table, 
+                                KEYUSE *key_start, KEYUSE *key_end, 
+                                uint n_keyuses, table_map bound_parts);
+
+/*
+  Perform table elimination
+
+  SYNOPSIS
+    eliminate_tables()
+      join                   Join to work on
+      const_tbl_count INOUT  Number of constant tables (this includes
+                             eliminated tables)
+      const_tables    INOUT  Bitmap of constant tables
+
+  DESCRIPTION
+   
+   TODO fix comment
+  
+    SELECT * FROM t1 LEFT JOIN 
+      (t2 JOIN t3) ON t3.primary_key=t1.col AND 
+                      t4.primary_key= t2.col
+
+  CRITERIA FOR REMOVING ONE OJ NEST
+    we can't rely on sole presense of eq_refs. Because if we do, we'll miss
+    things like this:
+
+    SELECT * FROM flights LEFT JOIN 
+      (pax as S1 JOIN pax as S2 ON S2.id=S1.spouse AND s1.id=s2.spouse)
+  
+   (no-polygamy schema/query but there can be many couples on the flight)
+   ..
+  
+  REMOVAL PROCESS
+    We can remove an inner side of an outer join if it there is a warranty
+    that it will produce not more than one record:
+
+     ... t1 LEFT JOIN t2 ON (t2.unique_key = expr) ...
+
+    For nested outer joins:
+    - The process naturally occurs bottom-up (in order to remove an
+      outer-join we need to analyze its contents)
+    - If we failed to remove an outer join nest, it makes no sense to 
+      try removing its ancestors, as the 
+         ot LEFT JOIN it ON cond
+      pair may possibly produce two records (one record via match and
+      another one as access-method record).
+
+  Q: If we haven't removed an OUTER JOIN, does it make sense to attempt
+  removing its ancestors? 
+  A: No as the innermost outer join will produce two records => no ancestor 
+  outer join nest will be able to provide the max_fanout==1 guarantee.
+*/
+
+void eliminate_tables(JOIN *join, uint *const_tbl_count, 
+                      table_map *const_tables)
+{
+  Item *item;
+  table_map used_tables;
+  DBUG_ENTER("eliminate_tables");
+  
+  DBUG_ASSERT(join->eliminated_tables == 0);
+  
+  /* MWL#17 is only about outer join elimination, so: */
+  if (!join->outer_join)
+    DBUG_VOID_RETURN;
+
+  /* Find the tables that are referred to from WHERE/HAVING */
+  used_tables= (join->conds?  join->conds->used_tables() : 0) | 
+               (join->having? join->having->used_tables() : 0);
+  
+  /* Add tables referred to from the select list */
+  List_iterator<Item> it(join->fields_list);
+  while ((item= it++))
+    used_tables |= item->used_tables();
+ 
+  /* Add tables referred to from ORDER BY and GROUP BY lists */
+  ORDER *all_lists[]= { join->order, join->group_list};
+  for (int i=0; i < 2; i++)
+  {
+    for (ORDER *cur_list= all_lists[i]; cur_list; cur_list= cur_list->next)
+      used_tables |= (*(cur_list->item))->used_tables();
+  }
+  
+  THD* thd= join->thd;
+  if (join->select_lex == &thd->lex->select_lex)
+  {
+    /* Multi-table UPDATE and DELETE: don't eliminate the tables we modify: */
+    used_tables |= thd->table_map_for_update;
+
+    /* Multi-table UPDATE: don't eliminate tables referred from SET statement */
+    if (thd->lex->sql_command == SQLCOM_UPDATE_MULTI)
+    {
+      List_iterator<Item> it2(thd->lex->value_list);
+      while ((item= it2++))
+        used_tables |= item->used_tables();
+    }
+  }
+
+  if (((1 << join->tables) - 1) & ~used_tables)
+  {
+    /* There are some time tables that we probably could eliminate */
+    eliminate_tables_for_join_list(join, join->join_list, used_tables,
+                                   const_tbl_count, const_tables);
+  }
+  DBUG_VOID_RETURN;
+}
+
+
+
+
+/*
+  Now on to traversal. There can be a situation like this:
+
+    FROM t1 
+         LEFT JOIN t2 ON cond(t1,t2)
+         LEFT JOIN t3 ON cond(..., possibly-t2)  // <--(*)
+         LEFT JOIN t4 ON cond(..., possibly-t2)
+
+  Besides that, simplify_joins() may have created back references, so when
+  we're e.g. looking at outer join (*) we need to look both forward and
+  backward to check if there are any references in preceding/following
+  outer joins'
+
+  TODO would it create only following-sibling references or
+  preceding-sibling as well?
+    And if not, should we rely on that?
+    
+*/
+
+static int
+eliminate_tables_for_join_list(JOIN *join, List<TABLE_LIST> *join_list,
+                               table_map used_tables_elsewhere, 
+                               uint *const_tbl_count, table_map *const_tables)
+{
+  List_iterator<TABLE_LIST> it(*join_list);
+  table_map used_tables_on_right[MAX_TABLES]; // todo change to alloca
+  table_map used_tables_on_left;
+  TABLE_LIST *tbl;
+  int i, n_tables;
+  int eliminated=0;
+
+  /* Collect the reverse-bitmap-array */
+  for (i=0; (tbl= it++); i++)
+  {
+    used_tables_on_right[i]= 0;
+    if (tbl->on_expr)
+      used_tables_on_right[i]= tbl->on_expr->used_tables();
+    if (tbl->nested_join)
+      used_tables_on_right[i]= tbl->nested_join->used_tables;
+  }
+  n_tables= i;
+
+  for (i= n_tables - 2; i > 0; i--)
+    used_tables_on_right[i] |= used_tables_on_right[i+1];
+  
+  it.rewind();
+  
+  /* Walk through tables and join nests and see if we can eliminate them */
+  used_tables_on_left= 0;
+  i= 1;
+  while ((tbl= it++))
+  {
+    table_map tables_used_outside= used_tables_on_left |
+                                   used_tables_on_right[i] |
+                                   used_tables_elsewhere;
+    table_map cur_tables= 0;
+
+    if (tbl->nested_join)
+    {
+      DBUG_ASSERT(tbl->on_expr); 
+      /*
+        There can be cases where table removal is applicable for tables
+        within the outer join but not for the outer join itself. Ask to
+        remove the children first.
+
+        TODO: NoHopelessEliminationAttempts: the below call can return 
+        information about whether it would make any sense to try removing 
+        this entire outer join nest.
+      */
+      int eliminated_in_children= 
+        eliminate_tables_for_join_list(join, &tbl->nested_join->join_list,
+                                       tables_used_outside,
+                                       const_tbl_count, const_tables);
+      tbl->nested_join->n_tables -=eliminated_in_children; 
+      cur_tables= tbl->nested_join->used_tables;
+      if (!(cur_tables & tables_used_outside))
+      {
+        /* 
+          Check if all embedded tables together can produce at most one
+          record combination. This is true when
+           - each of them has one_match(outer-tables) property
+             (this is a stronger condition than all of them together having
+              this property but that's irrelevant here)
+           - there are no outer joins among them
+             (except for the case  of outer join which has all inner tables
+              to be constant and is guaranteed to produce only one record.
+              that record will be null-complemented)
+        */
+        bool one_match= TRUE;
+        List_iterator<TABLE_LIST> it2(tbl->nested_join->join_list);
+        TABLE_LIST *inner;
+        while ((inner= it2++))
+        {
+          /*
+            Bail out if we see an outer join (TODO: handle the above
+            null-complemntated-rows-only case)
+          */
+          if (inner->on_expr)
+          {
+            one_match= FALSE;
+            break;
+          }
+
+          if (inner->table && // <-- to be removed after NoHopelessEliminationAttempts
+              !table_has_one_match(inner->table,
+                                   ~tbl->nested_join->used_tables))
+          {
+            one_match= FALSE;
+            break;
+          }
+        }
+        if (one_match)
+        {
+          it2.rewind();
+          while ((inner= it2++))
+          {
+            mark_table_as_eliminated(join, inner->table, const_tbl_count, 
+                                     const_tables);
+          }
+          eliminated += tbl->nested_join->join_list.elements;
+          //psergey-todo: do we need to do anything about removing the join
+          //nest?
+          tbl->on_expr->walk(&Item::mark_as_eliminated_processor, FALSE, NULL);
+        }
+        else
+        {
+          eliminated += eliminated_in_children;
+        }
+      }
+    }
+    else if (tbl->on_expr)
+    {
+      cur_tables= tbl->on_expr->used_tables();
+      /* Check and remove */
+      if (!(tbl->table->map & tables_used_outside) && 
+          table_has_one_match(tbl->table, (table_map)-1))
+      {
+        mark_table_as_eliminated(join, tbl->table, const_tbl_count, 
+                                 const_tables);
+        tbl->on_expr->walk(&Item::mark_as_eliminated_processor, FALSE, NULL);
+        eliminated += 1;
+      }
+    }
+
+    /* Update bitmap of tables we've seen on the left */
+    i++;
+    used_tables_on_left |= cur_tables;
+  }
+  return eliminated;
+}
+
+
+/*
+  Mark table as eliminated:
+   - Mark it as constant table
+   - Move it to the front of join order
+   - Record it in join->eliminated_tables
+*/
+
+static 
+void mark_table_as_eliminated(JOIN *join, TABLE *table, uint *const_tbl_count,
+                              table_map *const_tables)
+{
+  JOIN_TAB *tab= table->reginfo.join_tab;
+  if (!(*const_tables & tab->table->map))
+  {
+    DBUG_PRINT("info", ("Eliminated table %s", table->alias));
+    tab->type= JT_CONST;
+    join->eliminated_tables |= table->map;
+    *const_tables |= table->map;
+    join->const_table_map|= table->map;
+    set_position(join, (*const_tbl_count)++, tab, (KEYUSE*)0);
+  }
+}
+
+
+/*
+  Check the table will produce at most one matching record
+
+  SYNOPSIS
+    table_has_one_match()
+      table          The [base] table being checked 
+      bound_tables   Tables that should be considered bound.
+
+  DESCRIPTION
+    Check if the given table will produce at most one matching record for 
+    each record combination of tables in bound_tables.
+
+  RETURN 
+    TRUE   Yes, at most one match
+    FALSE  No
+*/
+
+static bool table_has_one_match(TABLE *table, table_map bound_tables)
+{
+  KEYUSE *keyuse= table->reginfo.join_tab->keyuse;
+  if (keyuse)
+  {
+    /*
+      Walk through all of the KEYUSE elements and
+       - locate unique keys
+       - check if we have eq_ref access for them
+          TODO any other reqs?
+      loops are constructed like in best_access_path
+    */
+    while (keyuse->table == table)
+    {
+      uint key= keyuse->key;
+      key_part_map bound_parts=0;
+      uint n_unusable=0;
+      bool ft_key= test(keyuse->keypart == FT_KEYPART);
+      KEY *keyinfo= table->key_info + key; 
+      KEYUSE *key_start = keyuse;
+      
+      do  /* For each keypart and each way to read it */
+      {
+        if (keyuse->usable)
+        {
+          if(!(keyuse->used_tables & ~bound_tables) &&
+             !(keyuse->optimize & KEY_OPTIMIZE_REF_OR_NULL))
+          {
+            bound_parts |= keyuse->keypart_map;
+          }
+        }
+        else
+          n_unusable++;
+        keyuse++;
+      } while (keyuse->table == table && keyuse->key == key);
+      
+      if (ft_key || ((keyinfo->flags & (HA_NOSAME | HA_NULL_PART_KEY)) 
+                     != HA_NOSAME))
+      {
+        continue; 
+      }
+
+      if (bound_parts == PREV_BITS(key_part_map, keyinfo->key_parts) ||
+          extra_keyuses_bind_all_keyparts(bound_tables, table, key_start,
+                                          keyuse, n_unusable, bound_parts))
+      {
+        return TRUE;
+      }
+    }
+  }
+  return FALSE;
+}
+
+
+typedef struct st_keyuse_w_needed_reg
+{
+  KEYUSE *keyuse;
+  key_part_map dependency_parts;
+} Keyuse_w_needed_reg;
+
+
+/*
+  SYNOPSIS
+    extra_keyuses_bind_all_keyparts()
+      bound_tables   Tables which can be considered constants
+      table          Table we're examining
+      key_start      Start of KEYUSE array with elements describing the key
+                     of interest
+      key_end        End of the array + 1
+      n_keyuses      Number
+      bound_parts    Key parts whose values are known to be bound.
+
+  DESCRIPTION
+    Check if unusable KEYUSE elements cause all parts of key to be bound. An 
+    unusable keyuse element makes a keypart bound when it
+    represents the following:
+
+      keyXpartY=func(bound_columns, preceding_tables)
+
+  RETURN 
+    TRUE   Yes, at most one match
+    FALSE  No
+*/
+
+static bool 
+extra_keyuses_bind_all_keyparts(table_map bound_tables, TABLE *table, 
+                                KEYUSE *key_start, KEYUSE *key_end, 
+                                uint n_keyuses, table_map bound_parts)
+{
+  if (n_keyuses && bound_parts)
+  {
+    KEY *keyinfo= table->key_info + key_start->key; 
+    Keyuse_w_needed_reg *uses;
+    if (!(uses= (Keyuse_w_needed_reg*)my_alloca(sizeof(Keyuse_w_needed_reg)*
+                                                n_keyuses)))
+      return FALSE;
+    uint n_uses=0;
+    /* First, collect an array<keyuse, key_parts_it_depends_on>*/
+    for (KEYUSE *k= key_start; k!=key_end; k++)
+    {
+      if (!k->usable && !(k->used_tables & ~bound_tables))
+      {
+        Field_processor_info fp= {bound_tables, table, k->key, 0};
+        if (!k->val->walk(&Item::check_column_usage_processor, FALSE, 
+                          (uchar*)&fp))
+        {
+          uses[n_uses].keyuse= k;
+          uses[n_uses].dependency_parts= fp.needed_key_parts;
+          n_uses++;
+        }
+      }
+    }
+
+    /* Now compute transitive closure */
+    uint n_bounded;
+    do 
+    {
+      n_bounded= 0;
+      for (uint i=0; i< n_uses; i++)
+      {
+        /* needed_parts is covered by what is already bound*/
+        if (!(uses[i].dependency_parts & ~bound_parts))
+        {
+          bound_parts|= key_part_map(1) << uses[i].keyuse->keypart;
+          n_bounded++;
+        }
+        if (bound_parts == PREV_BITS(key_part_map, keyinfo->key_parts))
+          return TRUE;
+      }
+    } while (n_bounded != 0);
+  }
+  return FALSE;
+}
+
+/**
+  @} (end of group Table_Elimination)
+*/
+
diff --git a/sql/sql_select.cc b/sql/sql_select.cc
index 79fa62fd37c..4d85570b6d2 100644
--- a/sql/sql_select.cc
+++ b/sql/sql_select.cc
@@ -42,11 +42,6 @@
 #define TMP_ENGINE_HTON myisam_hton
 #endif
 
-#define FT_KEYPART   (MAX_REF_PARTS+10)
-/* Values in optimize */
-#define KEY_OPTIMIZE_EXISTS		1
-#define KEY_OPTIMIZE_REF_OR_NULL	2
-
 const char *join_type_str[]={ "UNKNOWN","system","const","eq_ref","ref",
 			      "MAYBE_REF","ALL","range","index","fulltext",
 			      "ref_or_null","unique_subquery","index_subquery",
@@ -65,7 +60,6 @@ static bool update_ref_and_keys(THD *thd, DYNAMIC_ARRAY *keyuse,
                                 table_map table_map, SELECT_LEX *select_lex,
                                 st_sargable_param **sargables);
 static int sort_keyuse(KEYUSE *a,KEYUSE *b);
-static void set_position(JOIN *join,uint index,JOIN_TAB *table,KEYUSE *key);
 static bool create_ref_for_key(JOIN *join, JOIN_TAB *j, KEYUSE *org_keyuse,
 			       table_map used_tables);
 static bool choose_plan(JOIN *join,table_map join_tables);
@@ -2386,10 +2380,13 @@ mysql_select(THD *thd, Item ***rref_pointer_array,
   }
   else
   {
-    // psergey{
+    /*
+      When in EXPLAIN, delay deleting the joins so that they are still
+      available when we're producing EXPLAIN EXTENDED warning text.
+    */
     if (select_options & SELECT_DESCRIBE)
       free_join= 0;
-    // }psergey
+
     if (!(join= new JOIN(thd, fields, select_options, result)))
 	DBUG_RETURN(TRUE);
     thd_proc_info(thd, "init");
@@ -2477,383 +2474,6 @@ static ha_rows get_quick_record_count(THD *thd, SQL_SELECT *select,
   DBUG_RETURN(HA_POS_ERROR);			/* This shouldn't happend */
 }
 
-/********************************************************************
- * Table elimination code starts
- ********************************************************************/
-typedef struct st_keyuse_w_needed_reg
-{
-  KEYUSE *first;
-  key_part_map second;
-  
-} Keyuse_w_needed_reg;
-
-static
-bool has_eq_ref_access_candidate(TABLE *table, table_map can_refer_to_these)
-{
-  KEYUSE *keyuse= table->reginfo.join_tab->keyuse;
-  if (keyuse)
-  {
-    /*
-      walk through all of the KEYUSE elements and
-       - locate unique keys
-       - check if we have eq_ref access for them
-          TODO any other reqs?
-      loops are constructed like in best_access_path
-    */
-    while (keyuse->table == table)
-    {
-      uint key= keyuse->key;
-      key_part_map bound_parts=0;
-      uint n_unusable=0;
-      bool ft_key= test(keyuse->keypart == FT_KEYPART);
-      KEY *keyinfo= table->key_info + key; 
-      KEYUSE *key_start = keyuse;
-      
-      do  /* For each keypart and each way to read it */
-      {
-        if (keyuse->usable)
-        {
-          if(!(keyuse->used_tables & ~can_refer_to_these) &&
-             !(keyuse->optimize & KEY_OPTIMIZE_REF_OR_NULL))
-          {
-            bound_parts |= keyuse->keypart_map;
-          }
-        }
-        else
-          n_unusable++;
-        keyuse++;
-      } while (keyuse->table == table && keyuse->key == key);
-      
-      if (ft_key || ((keyinfo->flags & (HA_NOSAME | HA_NULL_PART_KEY)) 
-                     != HA_NOSAME))
-      { 
-        continue; 
-      }
-
-      if (bound_parts == PREV_BITS(key_part_map, keyinfo->key_parts))
-        return TRUE;
-      /* 
-        Ok, usable keyuse elements didn't help us. Try making use of
-        unusable KEYUSEs (psergey-todo: sane comments:)
-      */
-      if (n_unusable && bound_parts)
-      {
-        /*
-          Check if unusable KEYUSE elements cause all parts of key to be
-          bound. An unusable keyuse element makes a key part bound when it
-          represents the following:
-
-            keyXpartY=func(bound_columns, preceding_tables)
-
-          .
-        */
-        Keyuse_w_needed_reg *uses;
-        if (!(uses= (Keyuse_w_needed_reg*)my_alloca(sizeof(Keyuse_w_needed_reg)*n_unusable)))
-          return FALSE;
-        uint n_uses=0;
-        for (KEYUSE *k= key_start; k!=keyuse; k++)
-        {
-          if (!k->usable && !(k->used_tables & ~can_refer_to_these))
-          {
-            //Walk k->val and check which key parts it depends on.
-            Field_processor_info fp= {can_refer_to_these, table, k->key, 0};
-            if (!k->val->walk(&Item::check_column_usage_processor, FALSE, 
-                              (uchar*)&fp))
-            {
-              uses[n_uses].first= k;
-              uses[n_uses].second= fp.needed_key_parts;
-              n_uses++;
-            }
-          }
-        }
-        /* Now compute transitive closure */
-        uint n_bounded;
-        do 
-        {
-          n_bounded= 0;
-          for (uint i=0; i< n_uses; i++)
-          {
-            /* needed_parts is covered by what is already bound*/
-            if (!(uses[i].second & ~bound_parts))
-            {
-              bound_parts|= key_part_map(1) << uses[i].first->keypart;
-              n_bounded++;
-            }
-            if (bound_parts == PREV_BITS(key_part_map, keyinfo->key_parts))
-              return TRUE;
-          }
-        } while (n_bounded != 0);
-      }
-    }
-  }
-  return FALSE;
-}
-
-
-static void mark_table_as_eliminated(JOIN *join, TABLE *table, uint *const_tbl_count, 
-                                      table_map *const_tables)
-{
-  JOIN_TAB *tab= table->reginfo.join_tab;
-  if (!(*const_tables & tab->table->map))
-  {
-    DBUG_PRINT("info", ("Eliminated table %s", table->alias));
-    tab->type= JT_CONST;
-    join->eliminated_tables |= table->map;
-    *const_tables |= table->map;
-    join->const_table_map|= table->map;
-    set_position(join, (*const_tbl_count)++, tab, (KEYUSE*)0);
-  }
-}
-
-
-/*
-  Now on to traversal. There can be a situation like this:
-
-    FROM t1 
-         LEFT JOIN t2 ON cond(t1,t2)
-         LEFT JOIN t3 ON cond(..., possibly-t2)  // <--(*)
-         LEFT JOIN t4 ON cond(..., possibly-t2)
-
-  Besides that, simplify_joins() may have created back references, so when
-  we're e.g. looking at outer join (*) we need to look both forward and
-  backward to check if there are any references in preceding/following
-  outer joins'
-
-  TODO would it create only following-sibling references or
-  preceding-sibling as well?
-    And if not, should we rely on that?
-    
-*/
-
-int
-eliminate_tables_for_join_list(JOIN *join, List<TABLE_LIST> *join_list, 
-                               table_map used_tables_elsewhere, 
-                               uint *const_tbl_count, table_map *const_tables)
-{
-  List_iterator<TABLE_LIST> it(*join_list);
-  table_map used_tables_on_right[MAX_TABLES]; // todo change to alloca
-  table_map used_tables_on_left;
-  TABLE_LIST *tbl;
-  int i, n_tables;
-  int eliminated=0;
-
-  /* Collect the reverse-bitmap-array */
-  for (i=0; (tbl= it++); i++)
-  {
-    used_tables_on_right[i]= 0;
-    if (tbl->on_expr)
-      used_tables_on_right[i]= tbl->on_expr->used_tables();
-    if (tbl->nested_join)
-      used_tables_on_right[i]= tbl->nested_join->used_tables;
-  }
-  n_tables= i;
-
-  for (i= n_tables - 2; i > 0; i--)
-    used_tables_on_right[i] |= used_tables_on_right[i+1];
-  
-  it.rewind();
-  
-  /* Walk through tables and join nests and see if we can eliminate them */
-  used_tables_on_left= 0;
-  i= 1;
-  while ((tbl= it++))
-  {
-    table_map tables_used_outside= used_tables_on_left |
-                                   used_tables_on_right[i] |
-                                   used_tables_elsewhere;
-    table_map cur_tables= 0;
-
-    if (tbl->nested_join)
-    {
-      DBUG_ASSERT(tbl->on_expr); 
-      /*
-        There can be cases where table removal is applicable for tables
-        within the outer join but not for the outer join itself. Ask to
-        remove the children first.
-
-        TODO: NoHopelessEliminationAttempts: the below call can return 
-        information about whether it would make any sense to try removing 
-        this entire outer join nest.
-      */
-      int eliminated_in_children= 
-        eliminate_tables_for_join_list(join, &tbl->nested_join->join_list,
-                                       tables_used_outside,
-                                       const_tbl_count, const_tables);
-      tbl->nested_join->n_tables -=eliminated_in_children; 
-      cur_tables= tbl->nested_join->used_tables;
-      if (!(cur_tables & tables_used_outside))
-      {
-        /* 
-          Check if all embedded tables together can produce at most one
-          record combination. This is true when
-           - each of them has one_match(outer-tables) property
-             (this is a stronger condition than all of them together having
-              this property but that's irrelevant here)
-           - there are no outer joins among them
-             (except for the case  of outer join which has all inner tables
-              to be constant and is guaranteed to produce only one record.
-              that record will be null-complemented)
-        */
-        bool one_match= TRUE;
-        List_iterator<TABLE_LIST> it2(tbl->nested_join->join_list);
-        TABLE_LIST *inner;
-        while ((inner= it2++))
-        {
-          /*
-            Bail out if we see an outer join (TODO: handle the above
-            null-complemntated-rows-only case)
-          */
-          if (inner->on_expr)
-          {
-            one_match= FALSE;
-            break;
-          }
-
-          if (inner->table && // <-- to be removed after NoHopelessEliminationAttempts
-              !has_eq_ref_access_candidate(inner->table,
-                                           ~tbl->nested_join->used_tables))
-          {
-            one_match= FALSE;
-            break;
-          }
-        }
-        if (one_match)
-        {
-          it2.rewind();
-          while ((inner= it2++))
-          {
-            mark_table_as_eliminated(join, inner->table, const_tbl_count, 
-                                     const_tables);
-          }
-          eliminated += tbl->nested_join->join_list.elements;
-          //psergey-todo: do we need to do anything about removing the join
-          //nest?
-          tbl->on_expr->walk(&Item::mark_as_eliminated_processor, FALSE, NULL);
-        }
-        else
-        {
-          eliminated += eliminated_in_children;
-        }
-      }
-    }
-    else if (tbl->on_expr)
-    {
-      cur_tables= tbl->on_expr->used_tables();
-      /* Check and remove */
-      if (!(tbl->table->map & tables_used_outside) && 
-          has_eq_ref_access_candidate(tbl->table, (table_map)-1))
-      {
-        mark_table_as_eliminated(join, tbl->table, const_tbl_count, 
-                                 const_tables);
-        tbl->on_expr->walk(&Item::mark_as_eliminated_processor, FALSE, NULL);
-        eliminated += 1;
-      }
-    }
-
-    /* Update bitmap of tables we've seen on the left */
-    i++;
-    used_tables_on_left |= cur_tables;
-  }
-  return eliminated;
-}
-
-
-/*
-  Perform table elimination based on outer join
-
-  SELECT * FROM t1 LEFT JOIN 
-    (t2 JOIN t3) ON t3.primary_key=t1.col AND 
-                    t4.primary_key= t2.col
-
-  CRITERIA FOR REMOVING ONE OJ NEST
-    we can't rely on sole presense of eq_refs. Because if we do, we'll miss
-    things like this:
-
-    SELECT * FROM flights LEFT JOIN 
-      (pax as S1 JOIN pax as S2 ON S2.id=S1.spouse AND s1.id=s2.spouse)
-  
-   (no-polygamy schema/query but there can be many couples on the flight)
-   ..
-  
-  REMOVAL PROCESS
-    We can remove an inner side of an outer join if it there is a warranty
-    that it will produce not more than one record:
-
-     ... t1 LEFT JOIN t2 ON (t2.unique_key = expr) ...
-
-    For nested outer joins:
-    - The process naturally occurs bottom-up (in order to remove an
-      outer-join we need to analyze its contents)
-    - If we failed to remove an outer join nest, it makes no sense to 
-      try removing its ancestors, as the 
-         ot LEFT JOIN it ON cond
-      pair may possibly produce two records (one record via match and
-      another one as access-method record).
-
-  Q: If we haven't removed an OUTER JOIN, does it make sense to attempt
-  removing its ancestors? 
-  A: No as the innermost outer join will produce two records => no ancestor 
-  outer join nest will be able to provide the max_fanout==1 guarantee.
-
-  psergey-todo: .
-*/
-
-static void eliminate_tables(JOIN *join, uint *const_tbl_count, table_map *const_tables)
-{
-  Item *item;
-  table_map used_tables;
-  DBUG_ENTER("eliminate_tables");
-  
-  DBUG_ASSERT(join->eliminated_tables == 0);
-  
-  /* MWL#17 is only about outer join elimination, so: */
-  if (!join->outer_join)
-    DBUG_VOID_RETURN;
-
-  /* Find the tables that are referred to from WHERE/HAVING */
-  used_tables= (join->conds?  join->conds->used_tables() : 0) | 
-               (join->having? join->having->used_tables() : 0);
-  
-  /* Add tables referred to from the select list */
-  List_iterator<Item> it(join->fields_list);
-  while ((item= it++))
-    used_tables |= item->used_tables();
- 
-  /* Add tables referred to from ORDER BY and GROUP BY lists */
-  ORDER *all_lists[]= { join->order, join->group_list};
-  for (int i=0; i < 2; i++)
-  {
-    for (ORDER *cur_list= all_lists[i]; cur_list; cur_list= cur_list->next)
-      used_tables |= (*(cur_list->item))->used_tables();
-  }
-  
-  THD* thd= join->thd;
-  if (join->select_lex == &thd->lex->select_lex)
-  {
-    /* Multi-table UPDATE and DELETE: don't eliminate the tables we modify: */
-    used_tables |= thd->table_map_for_update;
-
-    /* Multi-table UPDATE: don't eliminate tables referred from SET statement */
-    if (thd->lex->sql_command == SQLCOM_UPDATE_MULTI)
-    {
-      List_iterator<Item> it2(thd->lex->value_list);
-      while ((item= it2++))
-        used_tables |= item->used_tables();
-    }
-  }
-
-  if (((1 << join->tables) - 1) & ~used_tables)
-  {
-    /* There are some time tables that we probably could eliminate */
-    eliminate_tables_for_join_list(join, join->join_list, used_tables,
-                                   const_tbl_count, const_tables);
-  }
-  DBUG_VOID_RETURN;
-}
-
-/********************************************************************
- * Table elimination code ends
- ********************************************************************/
 
 /*
    This structure is used to collect info on potentially sargable
@@ -3219,10 +2839,6 @@ make_join_statistics(JOIN *join, TABLE_LIST *tables_arg, COND *conds,
     }
   }
 
-  //psergey-todo: table elimination
-  //eliminate_tables(join, &const_count, &found_const_table_map);
-  //:psergey-todo
-
   /* Calc how many (possible) matched records in each table */
 
   for (s=stat ; s < stat_end ; s++)
@@ -3354,7 +2970,7 @@ typedef struct key_field_t {
   */
   bool          null_rejecting; 
   bool         *cond_guard; /* See KEYUSE::cond_guard */
-  bool          usable;
+  bool          usable; /* See KEYUSE::usable */
 } KEY_FIELD;
 
 
@@ -4428,8 +4044,7 @@ add_group_and_distinct_keys(JOIN *join, JOIN_TAB *join_tab)
 
 /** Save const tables first as used tables. */
 
-static void
-set_position(JOIN *join,uint idx,JOIN_TAB *table,KEYUSE *key)
+void set_position(JOIN *join,uint idx,JOIN_TAB *table,KEYUSE *key)
 {
   join->positions[idx].table= table;
   join->positions[idx].key=key;
@@ -17021,7 +16636,6 @@ bool mysql_explain_union(THD *thd, SELECT_LEX_UNIT *unit, select_result *result)
     unit->fake_select_lex->options|= SELECT_DESCRIBE;
     if (!(res= unit->prepare(thd, result, SELECT_NO_UNLOCK | SELECT_DESCRIBE)))
       res= unit->exec();
-    //psergey-move: res|= unit->cleanup();
   }
   else
   {
diff --git a/sql/sql_select.h b/sql/sql_select.h
index a1e382d1bcd..9bce799b02d 100644
--- a/sql/sql_select.h
+++ b/sql/sql_select.h
@@ -28,6 +28,11 @@
 #include "procedure.h"
 #include <myisam.h>
 
+#define FT_KEYPART   (MAX_REF_PARTS+10)
+/* Values in optimize */
+#define KEY_OPTIMIZE_EXISTS		1
+#define KEY_OPTIMIZE_REF_OR_NULL	2
+
 typedef struct keyuse_t {
   TABLE *table;
   Item	*val;				/**< or value if no field */
@@ -51,6 +56,14 @@ typedef struct keyuse_t {
     NULL  - Otherwise (the source equality can't be turned off)
   */
   bool *cond_guard;
+  /*
+    TRUE  <=> This keyuse can be used to construct key access.
+    FALSE <=> Otherwise. Currently unusable KEYUSEs represent equalities
+              where one table column refers to another one, like this:
+                t.keyXpartA=func(t.keyXpartB)
+              This equality cannot be used for index access but is useful
+              for table elimination.
+  */
   bool usable;
 } KEYUSE;
 
@@ -734,9 +747,13 @@ bool error_if_full_join(JOIN *join);
 int report_error(TABLE *table, int error);
 int safe_index_read(JOIN_TAB *tab);
 COND *remove_eq_conds(THD *thd, COND *cond, Item::cond_result *cond_value);
+void set_position(JOIN *join,uint idx,JOIN_TAB *table,KEYUSE *key);
 
 inline bool optimizer_flag(THD *thd, uint flag)
 { 
   return (thd->variables.optimizer_switch & flag);
 }
 
+void eliminate_tables(JOIN *join, uint *const_tbl_count, 
+                      table_map *const_tables);
+
author	Sergey Petrunia <psergey@askmonty.org>	2009-06-25 14:05:53 +0400
committer	Sergey Petrunia <psergey@askmonty.org>	2009-06-25 14:05:53 +0400
commit	4102605fba3edab2935b7b1d46b7c7569bd889e7 (patch)
tree	045921bc2673d3dff38340a417242024dc08e936 /sql
parent	defbdce7e8c69df85cdd8c630643ef1152330660 (diff)
download	mariadb-git-4102605fba3edab2935b7b1d46b7c7569bd889e7.tar.gz