merging in index_merge (in progress, not yet working)

BitKeeper/etc/logging_ok: auto-union
author: unknown <psergey@psergey.(none)> 2003-11-13 22:14:37 +0300
committer: unknown <psergey@psergey.(none)> 2003-11-13 22:14:37 +0300
commit: 738728bd1144a29a9b8b380c6a129afc3acdcfc4 (patch)
tree: ee449f3b5dcb528030efbeea367fa2b4c2183c94
parent: 4696bb41b4cce563ffff8d7b6c32576214109113 (diff)
parent: 6e464cc06d8340cb5f0f26fd6894301eef55af1f (diff)
download: mariadb-git-738728bd1144a29a9b8b380c6a129afc3acdcfc4.tar.gz
12 files changed, 1232 insertions, 152 deletions
diff --git a/BitKeeper/etc/logging_ok b/BitKeeper/etc/logging_ok
index 0874f6117df..9efd243ee92 100644
--- a/BitKeeper/etc/logging_ok
+++ b/BitKeeper/etc/logging_ok
@@ -102,6 +102,7 @@ peter@mysql.com
 peterg@mysql.com
 pgulutzan@linux.local
 pmartin@build.mysql2.com
+psergey@psergey.(none)
 ram@gw.mysql.r18.ru
 ram@gw.udmsearch.izhnet.ru
 ram@mysql.r18.ru
diff --git a/sql/opt_ft.cc b/sql/opt_ft.cc
index 74349819937..9d1fc55c777 100644
--- a/sql/opt_ft.cc
+++ b/sql/opt_ft.cc
@@ -26,7 +26,7 @@
 ** Create a FT or QUICK RANGE based on a key
 ****************************************************************************/
 
-QUICK_SELECT *get_ft_or_quick_select_for_ref(THD *thd, TABLE *table,
+QUICK_RANGE_SELECT *get_ft_or_quick_select_for_ref(THD *thd, TABLE *table,
 					     JOIN_TAB *tab)
 {
   if (tab->type == JT_FT)
diff --git a/sql/opt_ft.h b/sql/opt_ft.h
index 69b6b72f3fc..65a36ea495d 100644
--- a/sql/opt_ft.h
+++ b/sql/opt_ft.h
@@ -24,18 +24,17 @@
 #pragma interface			/* gcc class implementation */
 #endif
 
-class FT_SELECT: public QUICK_SELECT {
+class FT_SELECT: public QUICK_RANGE_SELECT {
 public:
   TABLE_REF *ref;
 
   FT_SELECT(THD *thd, TABLE *table, TABLE_REF *tref) :
-      QUICK_SELECT (thd, table, tref->key, 1), ref(tref) { init(); }
-
-  int init() { return error=file->ft_init(); }
+      QUICK_RANGE_SELECT (thd, table, tref->key, 1), ref(tref) { init(); }
   int get_next() { return error=file->ft_read(record); }
+  int get_type() { return QS_TYPE_FULLTEXT; }
 };
 
-QUICK_SELECT *get_ft_or_quick_select_for_ref(THD *thd, TABLE *table,
+QUICK_RANGE_SELECT *get_ft_or_quick_select_for_ref(THD *thd, TABLE *table,
 					     JOIN_TAB *tab);
 
 #endif
diff --git a/sql/opt_range.cc b/sql/opt_range.cc
index b356bda6112..a25de57107b 100644
--- a/sql/opt_range.cc
+++ b/sql/opt_range.cc
@@ -267,14 +267,17 @@ public:
   SEL_ARG *clone_tree();
 };
 
+class SEL_IMERGE;
 
 class SEL_TREE :public Sql_alloc
 {
 public:
   enum Type { IMPOSSIBLE, ALWAYS, MAYBE, KEY, KEY_SMALLER } type;
   SEL_TREE(enum Type type_arg) :type(type_arg) {}
-  SEL_TREE() :type(KEY) { bzero((char*) keys,sizeof(keys));}
+  SEL_TREE() :type(KEY), keys_map(0) { bzero((char*) keys,sizeof(keys));}
   SEL_ARG *keys[MAX_KEY];
+  key_map keys_map;        /* bitmask of non-NULL elements in keys         */
+  List<SEL_IMERGE> merges; /* possible ways to read rows using index_merge */
 };
 
 
@@ -302,10 +305,19 @@ static ha_rows check_quick_keys(PARAM *param,uint index,SEL_ARG *key_tree,
 				char *min_key,uint min_key_flag,
 				char *max_key, uint max_key_flag);
 
-static QUICK_SELECT *get_quick_select(PARAM *param,uint index,
-				      SEL_ARG *key_tree);
+QUICK_RANGE_SELECT *get_quick_select(PARAM *param,uint index,
+                                     SEL_ARG *key_tree, MEM_ROOT *alloc = NULL);
+static int get_quick_select_params(SEL_TREE *tree, PARAM& param,
+                                   key_map& needed_reg, TABLE *head,
+                                   bool index_read_can_be_used,
+                                   double* read_time, 
+                                   ha_rows* records,
+                                   SEL_ARG*** key_to_read);
 #ifndef DBUG_OFF
-static void print_quick(QUICK_SELECT *quick,key_map needed_reg);
+void print_quick_sel_imerge(QUICK_INDEX_MERGE_SELECT *quick,
+                            key_map needed_reg);
+void print_quick_sel_range(QUICK_RANGE_SELECT *quick,key_map needed_reg);
+
 #endif
 static SEL_TREE *tree_and(PARAM *param,SEL_TREE *tree1,SEL_TREE *tree2);
 static SEL_TREE *tree_or(PARAM *param,SEL_TREE *tree1,SEL_TREE *tree2);
@@ -313,16 +325,234 @@ static SEL_ARG *sel_add(SEL_ARG *key1,SEL_ARG *key2);
 static SEL_ARG *key_or(SEL_ARG *key1,SEL_ARG *key2);
 static SEL_ARG *key_and(SEL_ARG *key1,SEL_ARG *key2,uint clone_flag);
 static bool get_range(SEL_ARG **e1,SEL_ARG **e2,SEL_ARG *root1);
-static bool get_quick_keys(PARAM *param,QUICK_SELECT *quick,KEY_PART *key,
+bool get_quick_keys(PARAM *param,QUICK_RANGE_SELECT *quick,KEY_PART *key,
 			   SEL_ARG *key_tree,char *min_key,uint min_key_flag,
 			   char *max_key,uint max_key_flag);
 static bool eq_tree(SEL_ARG* a,SEL_ARG *b);
 
 static SEL_ARG null_element(SEL_ARG::IMPOSSIBLE);
 static bool null_part_in_key(KEY_PART *key_part, const char *key, uint length);
+bool sel_trees_can_be_ored(SEL_TREE *tree1, SEL_TREE *tree2, PARAM* param);
+
+
+/*
+  SEL_IMERGE is a list of possible ways to do index merge, i.e. it is 
+  a condition in the following form:
+   (t_1||t_2||...||t_N) && (next) 
+
+  where all t_i are SEL_TREEs, next is another SEL_IMERGE and no pair 
+  (t_i,t_j) contains SEL_ARGS for the same index.
+
+  SEL_TREE contained in SEL_IMERGE always has merges=NULL.
+
+  This class relies on memory manager to do the cleanup.
+*/
+
+class SEL_IMERGE : public Sql_alloc
+{
+  enum { PREALLOCED_TREES= 10};
+public:
+  SEL_TREE *trees_prealloced[PREALLOCED_TREES];  
+  SEL_TREE **trees;             /* trees used to do index_merge   */
+  SEL_TREE **trees_next;        /* last of these trees            */
+  SEL_TREE **trees_end;         /* end of allocated space         */
+
+  SEL_ARG  ***best_keys;        /* best keys to read in SEL_TREEs */
+
+  SEL_IMERGE() :
+    trees(&trees_prealloced[0]),
+    trees_next(trees),
+    trees_end(trees + PREALLOCED_TREES)
+  {}
+  int or_sel_tree(PARAM *param, SEL_TREE *tree);
+  int or_sel_tree_with_checks(PARAM *param, SEL_TREE *new_tree);
+  int or_sel_imerge_with_checks(PARAM *param, SEL_IMERGE* imerge);
+};
+
+
+/* 
+  Add SEL_TREE to this index_merge without any checks,
+
+  NOTES 
+    This function implements the following: 
+      (x_1||...||x_N) || t = (x_1||...||x_N||t), where x_i, t are SEL_TREEs
+
+  RETURN
+     0 - OK
+    -1 - Out of memory.
+*/
+
+int SEL_IMERGE::or_sel_tree(PARAM *param, SEL_TREE *tree)
+{
+  if (trees_next == trees_end)
+  {
+    const int realloc_ratio= 2;		/* Double size for next round */
+    uint old_elements= (trees_end - trees);
+    uint old_size= sizeof(SEL_TREE**) * old_elements;
+    uint new_size= old_size * realloc_ratio;
+    SEL_TREE **new_trees;
+    if (!(new_trees= (SEL_TREE**)alloc_root(param->mem_root, new_size)))
+      return -1;
+    memcpy(new_trees, trees, old_size);
+    trees=      new_trees;
+    trees_next= trees + old_elements;
+    trees_end=  trees + old_elements * realloc_ratio;
+  }
+  *(trees_next++)= tree;
+  return 0;
+}
+
+
+/*
+  Perform OR operation on this SEL_IMERGE and supplied SEL_TREE new_tree,
+  combining new_tree with one of the trees in this SEL_IMERGE if they both
+  have SEL_ARGs for the same key.
+ 
+  SYNOPSIS
+    or_sel_tree_with_checks()
+      param    PARAM from SQL_SELECT::test_quick_select
+      new_tree SEL_TREE with type KEY or KEY_SMALLER.
+
+  NOTES 
+    This does the following:
+    (t_1||...||t_k)||new_tree = 
+     either 
+       = (t_1||...||t_k||new_tree)
+     or
+       = (t_1||....||(t_j|| new_tree)||...||t_k),
+    
+     where t_i, y are SEL_TREEs.
+    new_tree is combined with the first t_j it has a SEL_ARG on common 
+    key with. As a consequence of this, choice of keys to do index_merge 
+    read may depend on the order of conditions in WHERE part of the query.
+
+  RETURN 
+    0  OK
+    1  One of the trees was combined with new_tree to SEL_TREE::ALWAYS, 
+       and (*this) should be discarded.
+   -1  An error occurred.
+*/
+
+int SEL_IMERGE::or_sel_tree_with_checks(PARAM *param, SEL_TREE *new_tree)
+{
+  for (SEL_TREE** tree = trees;
+       tree != trees_next;
+       tree++)
+  {
+    if (sel_trees_can_be_ored(*tree, new_tree, param))
+    {
+      *tree = tree_or(param, *tree, new_tree);
+      if (!*tree)
+        return 1;
+      if (((*tree)->type == SEL_TREE::MAYBE) ||
+          ((*tree)->type == SEL_TREE::ALWAYS))
+        return 1;
+      /* SEL_TREE::IMPOSSIBLE is impossible here */
+      return 0;
+    }
+  }
+
+  /* new tree cannot be combined with any of existing trees */
+  return or_sel_tree(param, new_tree);
+}
+
+
+/*
+  Perform OR operation on this index_merge and supplied index_merge list.
+
+  RETURN
+    0 - OK
+    1 - One of conditions in result is always TRUE and this SEL_IMERGE 
+        should be discarded.
+   -1 - An error occurred
+*/
+
+int SEL_IMERGE::or_sel_imerge_with_checks(PARAM *param, SEL_IMERGE* imerge)
+{
+  for (SEL_TREE** tree= imerge->trees;
+       tree != imerge->trees_next;
+       tree++)
+  {
+    if (or_sel_tree_with_checks(param, *tree))
+      return 1;
+  }
+  return 0;
+}
+
+
+/* 
+  Perform AND operation on two index_merge lists, storing result in *im1.
+
+*/
+
+inline void imerge_list_and_list(List<SEL_IMERGE> *im1, List<SEL_IMERGE> *im2)
+{
+  im1->concat(im2);
+}
+
+
+/*
+  Perform OR operation on 2 index_merge lists, storing result in first list.
+
+  NOTES 
+    The following conversion is implemented:
+     (a_1 &&...&& a_N)||(b_1 &&...&& b_K) = AND_i,j(a_i || b_j) =>
+      => (a_1||b_1).
+     
+    i.e. all conjuncts except the first one are currently dropped. 
+    This is done to avoid producing N*K ways to do index_merge.
+
+    If (a_1||b_1) produce a condition that is always true, NULL is 
+    returned and index_merge is discarded. (while it is actually 
+    possible to try harder).
+
+    As a consequence of this, choice of keys to do index_merge 
+    read may depend on the order of conditions in WHERE part of 
+    the query.
+
+  RETURN
+    0     OK, result is stored in *im1 
+    other Error, both passed lists are unusable
+
+*/
+
+int imerge_list_or_list(PARAM *param, 
+                        List<SEL_IMERGE> *im1,
+                        List<SEL_IMERGE> *im2)
+{
+  SEL_IMERGE *imerge= im1->head();
+  im1->empty();
+  im1->push_back(imerge);
+  
+  return imerge->or_sel_imerge_with_checks(param, im2->head());
+}
+
+
+/*
+  Perform OR operation on index_merge list and key tree.
+
+  RETURN
+    0     OK, result is stored in *im1 
+    other Error
+  
+*/
+
+int imerge_list_or_tree(PARAM *param, 
+                        List<SEL_IMERGE> *im1,
+                        SEL_TREE *tree)
+{
+  SEL_IMERGE *imerge;
+  List_iterator<SEL_IMERGE> it(*im1);
+  while((imerge= it++))
+  {
+    if (imerge->or_sel_tree_with_checks(param, tree))
+      it.remove();
+  }
+  return im1->is_empty();
+}
 
 /***************************************************************************
-** Basic functions for SQL_SELECT and QUICK_SELECT
+** Basic functions for SQL_SELECT and QUICK_RANGE_SELECT
 ***************************************************************************/
 
 	/* make a select from mysql info
@@ -383,6 +613,7 @@ QUICK_SELECT::QUICK_SELECT(THD *thd, TABLE *table, uint key_nr, bool no_alloc)
   :dont_free(0),error(0),index(key_nr),max_used_key_length(0),
    used_key_parts(0), head(table), it(ranges),range(0)
 {
+  //!!psergey: split this again to QUICK_SELECT_I
   if (!no_alloc)
   {
     // Allocates everything through the internal memroot
@@ -391,12 +622,16 @@ QUICK_SELECT::QUICK_SELECT(THD *thd, TABLE *table, uint key_nr, bool no_alloc)
   }
   else
     bzero((char*) &alloc,sizeof(alloc));
-  file=head->file;
-  record=head->record[0];
-  init();
+  file= head->file;
+  record= head->record[0];
 }
 
-QUICK_SELECT::~QUICK_SELECT()
+int QUICK_RANGE_SELECT::init()
+{
+  return (error= file->index_init(index));
+}
+
+QUICK_RANGE_SELECT::~QUICK_RANGE_SELECT()
 {
   if (!dont_free)
   {
@@ -405,6 +640,42 @@ QUICK_SELECT::~QUICK_SELECT()
   }
 }
 
+
+QUICK_INDEX_MERGE_SELECT::QUICK_INDEX_MERGE_SELECT(THD *thd, TABLE *table)
+  :cur_quick_it(quick_selects), index_merge(thd)
+{
+  index= MAX_KEY;
+  head= table;
+  init_sql_alloc(&alloc,1024,0);
+}
+
+int QUICK_INDEX_MERGE_SELECT::init()
+{
+  int error;
+  cur_quick_it.rewind();
+  cur_quick_select= cur_quick_it++;
+  if (error= index_merge.init(head))
+    return error;
+  return cur_quick_select->init();
+}
+
+void QUICK_INDEX_MERGE_SELECT::reset()
+{
+  cur_quick_select->reset();
+}
+
+bool 
+QUICK_INDEX_MERGE_SELECT::push_quick_back(QUICK_RANGE_SELECT *quick_sel_range)
+{
+  return quick_selects.push_back(quick_sel_range);
+}
+
+QUICK_INDEX_MERGE_SELECT::~QUICK_INDEX_MERGE_SELECT()
+{
+  quick_selects.delete_elements();
+  free_root(&alloc,MYF(0));
+}
+
 QUICK_RANGE::QUICK_RANGE()
   :min_key(0),max_key(0),min_length(0),max_length(0),
    flag(NO_MIN_RANGE | NO_MAX_RANGE)
@@ -588,6 +859,8 @@ int SQL_SELECT::test_quick_select(THD *thd, key_map keys_to_use,
   uint basflag;
   uint idx;
   double scan_time;
+  QUICK_INDEX_MERGE_SELECT *quick_imerge;
+  THD *thd= current_thd;
   DBUG_ENTER("test_quick_select");
   DBUG_PRINT("enter",("keys_to_use: %lu  prev_tables: %lu  const_tables: %lu",
 		      (ulong) keys_to_use, (ulong) prev_tables,
@@ -636,6 +909,7 @@ int SQL_SELECT::test_quick_select(THD *thd, key_map keys_to_use,
 
     thd->no_errors=1;				// Don't warn about NULL
     init_sql_alloc(&alloc, thd->variables.range_alloc_block_size, 0);
+    //!!todo:remerge this.
     if (!(param.key_parts = (KEY_PART*) alloc_root(&alloc,
 						   sizeof(KEY_PART)*
 						   head->key_parts)))
@@ -681,70 +955,210 @@ int SQL_SELECT::test_quick_select(THD *thd, key_map keys_to_use,
 	read_time= (double) HA_POS_ERROR;
       }
       else if (tree->type == SEL_TREE::KEY ||
-	       tree->type == SEL_TREE::KEY_SMALLER)
+               tree->type == SEL_TREE::KEY_SMALLER)
       {
-	SEL_ARG **key,**end,**best_key=0;
-
-
-	for (idx=0,key=tree->keys, end=key+param.keys ;
-	     key != end ;
-	     key++,idx++)
-	{
-	  ha_rows found_records;
-	  double found_read_time;
-	  if (*key)
-	  {
-	    uint keynr= param.real_keynr[idx];
-	    if ((*key)->type == SEL_ARG::MAYBE_KEY ||
-		(*key)->maybe_flag)
-	        needed_reg|= (key_map) 1 << keynr;
-
-	    found_records=check_quick_select(&param, idx, *key);
-	    if (found_records != HA_POS_ERROR && found_records > 2 &&
-		head->used_keys & ((table_map) 1 << keynr) &&
-		(head->file->index_flags(keynr) & HA_KEY_READ_ONLY))
-	    {
-	      /*
-		We can resolve this by only reading through this key.
-		Assume that we will read trough the whole key range
-		and that all key blocks are half full (normally things are
-		much better).
-	      */
-	      uint keys_per_block= (head->file->block_size/2/
-				    (head->key_info[keynr].key_length+
-				     head->file->ref_length) + 1);
-	      found_read_time=((double) (found_records+keys_per_block-1)/
-			       (double) keys_per_block);
-	    }
-	    else
-	      found_read_time= (head->file->read_time(keynr,
-						      param.range_count,
-						      found_records)+
-				(double) found_records / TIME_FOR_COMPARE);
-	    if (read_time > found_read_time && found_records != HA_POS_ERROR)
-	    {
-	      read_time=found_read_time;
-	      records=found_records;
-	      best_key=key;
-	    }
-	  }
-	}
-	if (best_key && records)
-	{
-	  if ((quick=get_quick_select(&param,(uint) (best_key-tree->keys),
-				      *best_key)))
-	  {
-	    quick->records=records;
-	    quick->read_time=read_time;
-	  }
-	}
+        /*
+          It is possible to use a quick select (but maybe it would be slower
+          than 'all' table scan).
+        */
+	SEL_ARG **best_key= 0;
+	ha_rows found_records;
+	double found_read_time= read_time;
+
+        if (!get_quick_select_params(tree, param, needed_reg, head, true,
+                                     &found_read_time, &found_records,
+                                     &best_key))
+        {
+          /* 
+            Ok, quick select is better than 'all' table scan and we have its 
+            parameters, so construct it.
+          */
+          read_time= found_read_time;
+          records= found_records;
+
+          if ((quick= get_quick_select(&param,(uint) (best_key-tree->keys),
+                                       *best_key)) && (!quick->init()))
+          {
+            quick->records= records;
+            quick->read_time= read_time;
+          }
+        }
+
+        /* 
+           btw, tree type SEL_TREE::INDEX_MERGE was not introduced 
+           intentionally
+        */
+
+        /* if no range select could be built, try using index_merge */
+        if (!quick && !tree->merges.is_empty())
+        {
+          DBUG_PRINT("info",("No range reads possible,"
+                             " trying to construct index_merge"));
+          SEL_IMERGE *imerge;
+          SEL_IMERGE *min_imerge= NULL;
+          double  min_imerge_cost= DBL_MAX;
+          ha_rows min_imerge_records;
+          
+          List_iterator_fast<SEL_IMERGE> it(tree->merges);
+          while ((imerge= it++))
+          {
+            double  imerge_cost= 0;
+            ha_rows imerge_total_records= 0;
+            double  tree_read_time;
+            ha_rows tree_records;
+            imerge->best_keys=
+              (SEL_ARG***)alloc_root(&alloc,
+                                     (imerge->trees_next - imerge->trees)*
+                                     sizeof(void*));
+            for (SEL_TREE **ptree= imerge->trees;
+                 ptree != imerge->trees_next;
+                 ptree++)
+            {
+              tree_read_time= read_time;              
+              if (get_quick_select_params(*ptree, param, needed_reg, head, 
+                                          false,
+                                          &tree_read_time, &tree_records,
+                                          &(imerge->best_keys[ptree - 
+                                          imerge->trees])))
+                goto imerge_fail;
+
+              imerge_cost += tree_read_time;
+              imerge_total_records += tree_records;
+            }
+            imerge_total_records= min(imerge_total_records, 
+                                      head->file->records);
+            imerge_cost += imerge_total_records / TIME_FOR_COMPARE;
+            if (imerge_cost < min_imerge_cost)
+            {
+              min_imerge= imerge;
+              min_imerge_cost= imerge_cost;
+              min_imerge_records= imerge_total_records;
+            }
+imerge_fail:;
+          }
+          
+          if (!min_imerge)
+            goto end_free;
+          
+          records= min_imerge_records;
+          /* ok, got minimal imerge, *min_imerge, with cost min_imerge_cost */
+          
+          if (head->used_keys)
+          {
+            /* check if "ALL" +"using index" read would be faster */
+            int key_for_use= find_shortest_key(head, head->used_keys);
+            ha_rows total_table_records= (0 == head->file->records)? 1 : 
+                                          head->file->records;
+            uint keys_per_block= (head->file->block_size/2/
+                                  (head->key_info[key_for_use].key_length+
+                                  head->file->ref_length) + 1);
+            double all_index_scan_read_time= ((double)(total_table_records+
+                                              keys_per_block-1)/
+                                              (double) keys_per_block);
+
+            DBUG_PRINT("info", 
+                       ("'all' scan will be using key %d, read time %g",
+                       key_for_use, all_index_scan_read_time));
+            if (all_index_scan_read_time < min_imerge_cost)
+            {
+              DBUG_PRINT("info", 
+                         ("index merge would be slower, "
+                         "will do full 'index' scan"));
+              goto end_free;
+            }
+          }
+          else
+          {
+            /* check if "ALL" would be faster */
+            if (read_time < min_imerge_cost)
+            {
+              DBUG_PRINT("info", 
+                         ("index merge would be slower, "
+                         "will do full table scan"));
+              goto end_free;
+            }
+          }
+
+          if (!(quick= quick_imerge= new QUICK_INDEX_MERGE_SELECT(thd, head)))
+            goto end_free;
+
+          quick->records= min_imerge_records;
+          quick->read_time= min_imerge_cost;
+          
+          my_pthread_setspecific_ptr(THR_MALLOC, &quick_imerge->alloc);
+
+          QUICK_RANGE_SELECT *new_quick;
+          for (SEL_TREE **ptree = min_imerge->trees;
+               ptree != min_imerge->trees_next;
+               ptree++)
+          {
+            SEL_ARG **tree_best_key= 
+              min_imerge->best_keys[ptree - min_imerge->trees];
+            if ((new_quick= get_quick_select(&param,
+                                             (uint)(tree_best_key-
+                                             (*ptree)->keys),
+                                             *tree_best_key,
+                                             &quick_imerge->alloc)))
+            {
+              new_quick->records= min_imerge_records;
+              new_quick->read_time= min_imerge_cost;
+              /*
+                QUICK_RANGE_SELECT::QUICK_RANGE_SELECT leaves THR_MALLOC
+                pointing to its allocator, restore it back
+              */
+              //my_pthread_setspecific_ptr(THR_MALLOC,old_root);
+              quick_imerge->last_quick_select= new_quick;
+
+              if (quick_imerge->push_quick_back(new_quick))
+              {
+                delete new_quick;
+                delete quick;
+                quick= quick_imerge= NULL;
+                goto end_free;
+              }
+            }
+            else
+            {
+              delete quick;
+              quick= quick_imerge= NULL;
+              goto end_free;
+            }
+          }
+
+          free_root(&alloc,MYF(0));
+          my_pthread_setspecific_ptr(THR_MALLOC,old_root);          
+          if (quick->init())
+          {
+            delete quick;
+            quick= quick_imerge= NULL;
+            DBUG_PRINT("error", 
+                       ("Failed to allocate index merge structures,"
+                       "falling back to full scan."));
+          }
+          else
+          {
+            /* with 'using filesort' quick->reset() is not called */
+            quick->reset();
+          }
+
+	        needed_reg|= (key_map) 1 << keynr; /!!todo entire function is not merged properly
       }
     }
+end_free:
     free_root(&alloc,MYF(0));			// Return memory & allocator
     my_pthread_setspecific_ptr(THR_MALLOC,old_root);
     thd->no_errors=0;
   }
-  DBUG_EXECUTE("info",print_quick(quick,needed_reg););
+
+  DBUG_EXECUTE("info",
+    {
+      if (quick_imerge)
+        print_quick_sel_imerge(quick_imerge, needed_reg);
+      else
+        print_quick_sel_range((QUICK_RANGE_SELECT*)quick, needed_reg);
+    }
+  );
+
   /*
     Assume that if the user is using 'limit' we will only need to scan
     limit rows if we are using a key
@@ -752,6 +1166,77 @@ int SQL_SELECT::test_quick_select(THD *thd, key_map keys_to_use,
   DBUG_RETURN(records ? test(quick) : -1);
 }
 
+
+/*
+  Calculate quick select read time, # of records, and best key to use 
+  without constructing QUICK_SELECT
+*/
+
+static int get_quick_select_params(SEL_TREE *tree, PARAM& param, 
+                                   key_map& needed_reg, TABLE *head,
+                                   bool index_read_can_be_used,
+                                   double* read_time, ha_rows* records,
+                                   SEL_ARG*** key_to_read)
+{
+  int idx;
+  int result = 1;
+  /*
+    Note that there may be trees that have type SEL_TREE::KEY but contain 
+    no key reads at all. For example, tree for expression "key1 is not null"
+    where key1 is defined as "not null".
+  */
+  SEL_ARG **key,**end;
+
+  for (idx= 0,key=tree->keys, end=key+param.keys ;
+       key != end ;
+       key++,idx++)
+  {
+    ha_rows found_records;
+    double found_read_time;
+    if (*key)
+    {
+      uint keynr= param.real_keynr[idx];
+      if ((*key)->type == SEL_ARG::MAYBE_KEY ||
+          (*key)->maybe_flag)
+        needed_reg|= (key_map) 1 << keynr;
+      
+      key_map usable_keys = index_read_can_be_used? 
+                            (head->used_keys & ((key_map) 1 << keynr)) : 0;
+
+      found_records=check_quick_select(&param, idx, *key);
+      if (found_records != HA_POS_ERROR && found_records > 2 &&
+          usable_keys &&
+          (head->file->index_flags(keynr) & HA_KEY_READ_ONLY))
+      {
+        /*
+          We can resolve this by only reading through this key.
+          Assume that we will read trough the whole key range
+          and that all key blocks are half full (normally things are
+          much better).
+        */
+        uint keys_per_block= (head->file->block_size/2/
+			      (head->key_info[keynr].key_length+
+			       head->file->ref_length) + 1);
+	found_read_time=((double) (found_records+keys_per_block-1)/
+			 (double) keys_per_block);
+      }
+      else
+	found_read_time= (head->file->read_time(keynr,
+						param.range_count,
+						found_records)+
+			  (double) found_records / TIME_FOR_COMPARE);
+      if (*read_time > found_read_time)
+      {
+        *read_time=   found_read_time;
+        *records=     found_records;
+        *key_to_read= key;
+        result = 0;
+      }
+    }
+  }
+  return result;
+}
+
 	/* make a select tree of all keys in condition */
 
 static SEL_TREE *get_mm_tree(PARAM *param,COND *cond)
@@ -931,6 +1416,7 @@ get_mm_parts(PARAM *param, Field *field, Item_func::Functype type,
       }
       sel_arg->part=(uchar) key_part->part;
       tree->keys[key_part->key]=sel_add(tree->keys[key_part->key],sel_arg);
+      tree->keys_map |= 1 << key_part->key;
     }
   }
   DBUG_RETURN(tree);
@@ -1195,6 +1681,8 @@ tree_and(PARAM *param,SEL_TREE *tree1,SEL_TREE *tree2)
     DBUG_RETURN(tree1);
   }
 
+  bool trees_have_key = false;
+  key_map  result_keys= 0;
   /* Join the trees key per key */
   SEL_ARG **key1,**key2,**end;
   for (key1= tree1->keys,key2= tree2->keys,end=key1+param->keys ;
@@ -1203,6 +1691,7 @@ tree_and(PARAM *param,SEL_TREE *tree1,SEL_TREE *tree2)
     uint flag=0;
     if (*key1 || *key2)
     {
+      trees_have_key = true;
       if (*key1 && !(*key1)->simple_key())
 	flag|=CLONE_KEY1_MAYBE;
       if (*key2 && !(*key2)->simple_key())
@@ -1211,17 +1700,57 @@ tree_and(PARAM *param,SEL_TREE *tree1,SEL_TREE *tree2)
       if ((*key1)->type == SEL_ARG::IMPOSSIBLE)
       {
 	tree1->type= SEL_TREE::IMPOSSIBLE;
-	break;
+        DBUG_RETURN(tree1);
       }
+      result_keys |= 1 << (key1 - tree1->keys);
 #ifdef EXTRA_DEBUG
       (*key1)->test_use_count(*key1);
 #endif
     }
   }
+  tree1->keys_map= result_keys;
+  /* dispose index_merge if there is a "range" option */
+  if (trees_have_key)
+  {
+    tree1->merges.empty();
+    DBUG_RETURN(tree1);
+  }
+
+  /* ok, both trees are index_merge trees */
+  imerge_list_and_list(&tree1->merges, &tree2->merges);
+  
   DBUG_RETURN(tree1);
 }
 
 
+/*
+  Check if two SEL_TREES can be combined into one without using index_merge
+*/
+
+bool sel_trees_can_be_ored(SEL_TREE *tree1, SEL_TREE *tree2, PARAM* param)
+{
+  key_map common_keys= tree1->keys_map & tree2->keys_map;
+  DBUG_ENTER("sel_trees_can_be_ored");
+
+  if (!common_keys)
+    DBUG_RETURN(false);
+  
+  /* trees have a common key, check if they refer to same key part */  
+  SEL_ARG **key1,**key2;
+  for (uint key_no=0; key_no < param->keys; key_no++, common_keys= common_keys >> 1)
+  {
+    if (common_keys & 1)
+    {
+      key1= tree1->keys + key_no;
+      key2= tree2->keys + key_no;
+      if ((*key1)->part == (*key2)->part)
+      {
+        DBUG_RETURN(true);
+      }
+    }
+  }
+  DBUG_RETURN(false);
+}
 
 static SEL_TREE *
 tree_or(PARAM *param,SEL_TREE *tree1,SEL_TREE *tree2)
@@ -1238,19 +1767,61 @@ tree_or(PARAM *param,SEL_TREE *tree1,SEL_TREE *tree2)
   if (tree2->type == SEL_TREE::MAYBE)
     DBUG_RETURN(tree2);
 
-  /* Join the trees key per key */
-  SEL_ARG **key1,**key2,**end;
-  SEL_TREE *result=0;
-  for (key1= tree1->keys,key2= tree2->keys,end=key1+param->keys ;
-       key1 != end ; key1++,key2++)
+  SEL_TREE *result= 0;
+  key_map  result_keys= 0;
+  if (sel_trees_can_be_ored(tree1, tree2, param))
   {
-    *key1=key_or(*key1,*key2);
-    if (*key1)
+    /* Join the trees key per key */
+    SEL_ARG **key1,**key2,**end;
+    for (key1= tree1->keys,key2= tree2->keys,end= key1+param->keys ;
+         key1 != end ; key1++,key2++)
     {
-      result=tree1;				// Added to tree1
+      *key1=key_or(*key1,*key2);
+      if (*key1)
+      {
+        result=tree1;				// Added to tree1
+        result_keys |= 1 << (key1 - tree1->keys);
 #ifdef EXTRA_DEBUG
-      (*key1)->test_use_count(*key1);
+        (*key1)->test_use_count(*key1);
 #endif
+      }
+    }
+    if (result)
+      result->keys_map= result_keys;
+  }
+  else
+  {
+    /* ok, two trees have KEY type but cannot be used without index merge */
+    if (tree1->merges.is_empty() && tree2->merges.is_empty())
+    {
+      SEL_IMERGE *merge;
+      /* both trees are "range" trees, produce new index merge structure */
+      if (!(result= new SEL_TREE()) || !(merge= new SEL_IMERGE()) ||
+          (result->merges.push_back(merge)) ||
+          (merge->or_sel_tree(param, tree1)) ||
+          (merge->or_sel_tree(param, tree2)))
+        result= NULL;
+      else
+        result->type= tree1->type;
+    }
+    else if (!tree1->merges.is_empty() && !tree2->merges.is_empty())
+    {
+      if (imerge_list_or_list(param, &tree1->merges, &tree2->merges))
+        result= new SEL_TREE(SEL_TREE::ALWAYS);
+      else
+        result= tree1;
+    }
+    else
+    {
+      /* one tree is index merge tree and another is range tree */
+      if (tree1->merges.is_empty())
+        swap(SEL_TREE*, tree1, tree2);
+
+      /* add tree2 to tree1->merges, checking if it collapses to ALWAYS */
+      if (imerge_list_or_tree(param, &tree1->merges, tree2))
+        result= new SEL_TREE(SEL_TREE::ALWAYS);
+      else
+        result= tree1;
     }
   }
   DBUG_RETURN(result);
@@ -2243,15 +2814,17 @@ check_quick_keys(PARAM *param,uint idx,SEL_ARG *key_tree,
 /****************************************************************************
 ** change a tree to a structure to be used by quick_select
 ** This uses it's own malloc tree
+** The caller should call QUICK_SELCT::init for returned quick select
 ****************************************************************************/
-
-static QUICK_SELECT *
-get_quick_select(PARAM *param,uint idx,SEL_ARG *key_tree)
+QUICK_RANGE_SELECT *
+get_quick_select(PARAM *param,uint idx,SEL_ARG *key_tree,
+                 MEM_ROOT *parent_alloc)
 {
-  QUICK_SELECT *quick;
+  QUICK_RANGE_SELECT *quick;
   DBUG_ENTER("get_quick_select");
-  if ((quick=new QUICK_SELECT(param->thd, param->table,
-			      param->real_keynr[idx])))
+  if ((quick=new QUICK_RANGE_SELECT(param->table,param->real_keynr[idx],
+                                    test(parent_alloc),
+                                    parent_alloc))) //!!todo: add thd to param
   {
     if (quick->error ||
 	get_quick_keys(param,quick,param->key[idx],key_tree,param->min_key,0,
@@ -2263,9 +2836,10 @@ get_quick_select(PARAM *param,uint idx,SEL_ARG *key_tree)
     else
     {
       quick->key_parts=(KEY_PART*)
-	memdup_root(&quick->alloc,(char*) param->key[idx],
-		   sizeof(KEY_PART)*
-		   param->table->key_info[param->real_keynr[idx]].key_parts);
+        memdup_root(parent_alloc? parent_alloc : &quick->alloc,
+                    (char*) param->key[idx],
+                    sizeof(KEY_PART)*
+                    param->table->key_info[param->real_keynr[idx]].key_parts);
     }
   }
   DBUG_RETURN(quick);
@@ -2275,9 +2849,8 @@ get_quick_select(PARAM *param,uint idx,SEL_ARG *key_tree)
 /*
 ** Fix this to get all possible sub_ranges
 */
-
-static bool
-get_quick_keys(PARAM *param,QUICK_SELECT *quick,KEY_PART *key,
+bool
+get_quick_keys(PARAM *param,QUICK_RANGE_SELECT *quick,KEY_PART *key,
 	       SEL_ARG *key_tree,char *min_key,uint min_key_flag,
 	       char *max_key, uint max_key_flag)
 {
@@ -2386,7 +2959,7 @@ get_quick_keys(PARAM *param,QUICK_SELECT *quick,KEY_PART *key,
   Return 1 if there is only one range and this uses the whole primary key
 */
 
-bool QUICK_SELECT::unique_key_range()
+bool QUICK_RANGE_SELECT::unique_key_range()
 {
   if (ranges.elements == 1)
   {
@@ -2423,16 +2996,22 @@ static bool null_part_in_key(KEY_PART *key_part, const char *key, uint length)
 ** Create a QUICK RANGE based on a key
 ****************************************************************************/
 
-QUICK_SELECT *get_quick_select_for_ref(THD *thd, TABLE *table, TABLE_REF *ref)
+QUICK_RANGE_SELECT *get_quick_select_for_ref(THD *thd, TABLE *table, TABLE_REF *ref) //!!todo: make use of thd
 {
   table->file->index_end();			// Remove old cursor
-  QUICK_SELECT *quick=new QUICK_SELECT(thd, table, ref->key, 1);
+  QUICK_RANGE_SELECT *quick=new QUICK_RANGE_SELECT(thd, table, ref->key, 1);  
   KEY *key_info = &table->key_info[ref->key];
   KEY_PART *key_part;
   uint part;
 
   if (!quick)
     return 0;			/* no ranges found */
+  if (quick->init())            /* !!todo: psergey: check if init is called exactly one time.*/
+  {
+    delete quick;
+    return 0;
+  }
+
   if (cp_buffer_from_ref(ref))
   {
     if (thd->is_fatal_error)
@@ -2470,11 +3049,204 @@ err:
   return 0;
 }
 
+INDEX_MERGE::INDEX_MERGE(THD *thd_arg) :
+  dont_save(false), thd(thd_arg)  
+{}
+
+String *INDEX_MERGE::Item_rowid::val_str(String *str)
+{
+  str->set_quick((char*)head->file->ref, head->file->ref_length, collation.collation);
+  return str;
+}
+
+
+/* 
+  Initialize index_merge operation.
+  RETURN 
+    0     - OK 
+    other - error.
+*/
+
+int INDEX_MERGE::init(TABLE *table)
+{
+  DBUG_ENTER("INDEX_MERGE::init");
+  
+  head= table;
+  if (!(rowid_item= new Item_rowid(table)))
+    DBUG_RETURN(1);  
+
+  tmp_table_param.copy_field= 0;
+  tmp_table_param.end_write_records= HA_POS_ERROR;
+  tmp_table_param.group_length= table->file->ref_length;
+  tmp_table_param.group_parts= 1;
+  tmp_table_param.group_null_parts= 0;
+  tmp_table_param.hidden_field_count= 0;
+  tmp_table_param.field_count= 0;
+  tmp_table_param.func_count= 1;
+  tmp_table_param.sum_func_count= 0;
+  tmp_table_param.quick_group= 1;
+
+  bzero(&order, sizeof(ORDER));
+  order.item= (Item**)&rowid_item;
+  order.asc= 1;
+
+  fields.push_back(rowid_item);
+
+  temp_table= create_tmp_table(thd,
+                               &tmp_table_param,
+                               fields,
+                               &order,
+                               false,
+                               0,
+                               SELECT_DISTINCT,
+                               HA_POS_ERROR,
+                               (char *)"");
+  DBUG_RETURN(!temp_table);
+}
+
+/*
+  Check if record with ROWID record_pos has already been processed and 
+  if not - store the ROWID value.
+
+  RETURN
+    0 - record has not been processed yet
+    1 - record has already been processed.
+   -1 - an error occurred and query processing should be terminated.
+        Error code is stored in INDEX_MERGE::error
+*/
+
+int INDEX_MERGE::check_record_in()
+{ 
+  return (dont_save)? 
+           check_record() : 
+           put_record();
+}
+
+
+/*
+  Stop remembering records in check(). 
+  (this should be called just before the last key scan)
+
+  RETURN 
+    0 - OK
+    1 - error occurred initializing table index. 
+*/
+
+int INDEX_MERGE::start_last_quick_select()
+{
+  int result= 0;
+  if (!temp_table->uniques)
+  {
+    dont_save= true;
+    result= temp_table->file->index_init(0);
+  }
+  return result;
+}
+
+
+inline int INDEX_MERGE::put_record()
+{
+  DBUG_ENTER("INDEX_MERGE::put_record");
+  
+  copy_funcs(tmp_table_param.items_to_copy);
+  
+  if ((error= temp_table->file->write_row(temp_table->record[0])))
+  {
+    if (error == HA_ERR_FOUND_DUPP_KEY ||
+	error == HA_ERR_FOUND_DUPP_UNIQUE)
+      DBUG_RETURN(1);
+
+    DBUG_PRINT("info", 
+               ("Error writing row to temp. table: %d, converting to myisam", 
+               error));
+    if (create_myisam_from_heap(current_thd, temp_table, &tmp_table_param,
+				error,1))
+    {
+      DBUG_PRINT("info", ("Table conversion failed, bailing out"));
+      DBUG_RETURN(-1);
+    }
+  }
+
+  DBUG_RETURN(0);
+}
+
+inline int INDEX_MERGE::check_record()
+{
+  int result= 1;
+  DBUG_ENTER("INDEX_MERGE::check_record");  
+
+  if ((error= temp_table->file->index_read(temp_table->record[0],
+                                           head->file->ref,
+                                           head->file->ref_length,
+                                           HA_READ_KEY_EXACT)))
+  {
+    if (error != HA_ERR_KEY_NOT_FOUND)
+      result= -1;
+    else
+      result= 0;
+  }
+
+  DBUG_RETURN(result);
+}
+
+INDEX_MERGE::~INDEX_MERGE()
+{
+  if (temp_table)
+  {
+    DBUG_PRINT("info", ("Freeing temp. table"));
+    free_tmp_table(current_thd, temp_table);
+  }
+  /* rowid_item is freed automatically */
+  list_node* node;
+  node= fields.first_node();
+  fields.remove(&node);
+}
+
+int QUICK_INDEX_MERGE_SELECT::get_next()
+{
+  int       result;
+  int       put_result;
+  DBUG_ENTER("QUICK_INDEX_MERGE_SELECT::get_next");
+
+  do
+  { 
+    while ((result= cur_quick_select->get_next()) == HA_ERR_END_OF_FILE)
+    {      
+      cur_quick_select= cur_quick_it++;
+      if (!cur_quick_select)
+        break;
+
+      cur_quick_select->init();
+      cur_quick_select->reset();
+      
+      if (last_quick_select == cur_quick_select)
+      {
+        if ((result= index_merge.start_last_quick_select()))
+          DBUG_RETURN(result);
+      }
+    }
+
+    if (result)
+    {
+      /* 
+        table read error (including HA_ERR_END_OF_FILE on last quick select
+        in index_merge)
+      */
+      DBUG_RETURN(result);
+    }
+    
+    cur_quick_select->file->position(cur_quick_select->record);
+    put_result= index_merge.check_record_in();
+  }while(put_result == 1); /* While record is processed */
+
+  DBUG_RETURN((put_result != -1) ? result : index_merge.error);
+}
+
 	/* get next possible record using quick-struct */
 
-int QUICK_SELECT::get_next()
+int QUICK_RANGE_SELECT::get_next()
 {
-  DBUG_ENTER("get_next");
+  DBUG_ENTER("QUICK_RANGE_SELECT::get_next");
 
   for (;;)
   {
@@ -2561,7 +3333,7 @@ int QUICK_SELECT::get_next()
   Returns 0 if key <= range->max_key
 */
 
-int QUICK_SELECT::cmp_next(QUICK_RANGE *range_arg)
+int QUICK_RANGE_SELECT::cmp_next(QUICK_RANGE *range_arg)
 {
   if (range_arg->flag & NO_MAX_RANGE)
     return 0;					/* key can't be to large */
@@ -2602,8 +3374,9 @@ int QUICK_SELECT::cmp_next(QUICK_RANGE *range_arg)
   for now, this seems to work right at least.
  */
 
-QUICK_SELECT_DESC::QUICK_SELECT_DESC(QUICK_SELECT *q, uint used_key_parts)
-  : QUICK_SELECT(*q), rev_it(rev_ranges)
+QUICK_SELECT_DESC::QUICK_SELECT_DESC(QUICK_RANGE_SELECT *q, 
+                                     uint used_key_parts)
+ : QUICK_RANGE_SELECT(*q), rev_it(rev_ranges)
 {
   bool not_read_after_key = file->table_flags() & HA_NOT_READ_AFTER_KEY;
   QUICK_RANGE *r;
@@ -2870,7 +3643,23 @@ print_key(KEY_PART *key_part,const char *key,uint used_length)
   }
 }
 
-static void print_quick(QUICK_SELECT *quick,key_map needed_reg)
+void print_quick_sel_imerge(QUICK_INDEX_MERGE_SELECT *quick, 
+                            key_map needed_reg)
+{
+  DBUG_ENTER("print_param");
+  if (! _db_on_ || !quick)
+    DBUG_VOID_RETURN;
+
+  List_iterator_fast<QUICK_RANGE_SELECT> it(quick->quick_selects);
+  QUICK_RANGE_SELECT* quick_range_sel;
+  while ((quick_range_sel= it++))
+  {
+    print_quick_sel_range(quick_range_sel, needed_reg);
+  }
+  DBUG_VOID_RETURN;
+}
+
+void print_quick_sel_range(QUICK_RANGE_SELECT *quick,key_map needed_reg)
 {
   QUICK_RANGE *range;
   DBUG_ENTER("print_param");
diff --git a/sql/opt_range.h b/sql/opt_range.h
index 128f6259055..e4fcbf657b7 100644
--- a/sql/opt_range.h
+++ b/sql/opt_range.h
@@ -65,41 +65,199 @@ class QUICK_RANGE :public Sql_alloc {
     }
 };
 
+class INDEX_MERGE; 
 
-class QUICK_SELECT {
+/*
+  Quick select interface. 
+  This class is parent for all QUICK_*_SELECT and FT_SELECT classes.
+*/
+
+class QUICK_SELECT_I
+{
 public:
+  ha_rows records;  /* estimate of # of records to be retrieved */
+  double  read_time; /* time to perform this retrieval          */
+  TABLE   *head;
+
+  /*
+    the only index this quick select uses, or MAX_KEY for 
+    QUICK_INDEX_MERGE_SELECT
+  */
+  uint index; 
+  uint max_used_key_length, used_key_parts;
+
+  QUICK_SELECT_I();
+  virtual ~QUICK_SELECT_I(){};
+  virtual int  init() = 0;
+  virtual void reset(void) = 0;
+  virtual int  get_next() = 0;   /* get next record to retrieve */
+  virtual bool reverse_sorted() = 0;
+  virtual bool unique_key_range() { return false; }
+
+  enum { 
+    QS_TYPE_RANGE = 0,
+    QS_TYPE_INDEX_MERGE = 1,
+    QS_TYPE_RANGE_DESC = 2,
+    QS_TYPE_FULLTEXT   = 3
+  };
+
+  /* Get type of this quick select - one of the QS_* values */
+  virtual int get_type() = 0; 
+};
+
+struct st_qsel_param;
+class SEL_ARG;
+
+class QUICK_RANGE_SELECT : public QUICK_SELECT_I 
+{
+protected:
   bool next,dont_free;
+public:
   int error;
-  uint index, max_used_key_length, used_key_parts;
-  TABLE *head;
   handler *file;
   byte    *record;
+protected:
+  friend void print_quick_sel_range(QUICK_RANGE_SELECT *quick,
+                                    key_map needed_reg);
+  friend QUICK_RANGE_SELECT *get_quick_select_for_ref(TABLE *table, 
+                                                      struct st_table_ref *ref);
+  friend bool get_quick_keys(struct st_qsel_param *param,
+                             QUICK_RANGE_SELECT *quick,KEY_PART *key,
+                             SEL_ARG *key_tree,char *min_key,uint min_key_flag,
+                             char *max_key, uint max_key_flag);
+  friend QUICK_RANGE_SELECT *get_quick_select(struct st_qsel_param*,uint idx,
+                                              SEL_ARG *key_tree,
+                                              MEM_ROOT *alloc);
+  friend class QUICK_SELECT_DESC;
+
   List<QUICK_RANGE> ranges;
   List_iterator<QUICK_RANGE> it;
   QUICK_RANGE *range;
   MEM_ROOT alloc;
-
   KEY_PART *key_parts;
-  ha_rows records;
-  double read_time;
-
-  QUICK_SELECT(THD *thd, TABLE *table,uint index_arg,bool no_alloc=0);
-  virtual ~QUICK_SELECT();
-  void reset(void) { next=0; it.rewind(); }
-  int init() { return error=file->index_init(index); }
-  virtual int get_next();
-  virtual bool reverse_sorted() { return 0; }
+  QUICK_SELECT(THD *thd, TABLE *table,uint index_arg,bool no_alloc=0); //!!todo: add thd to my ctor below
   int cmp_next(QUICK_RANGE *range);
+public:
+  QUICK_RANGE_SELECT(TABLE *table,uint index_arg,bool no_alloc=0, 
+                     MEM_ROOT *parent_alloc=NULL);
+  ~QUICK_RANGE_SELECT();
+  
+  void reset(void) { next=0; it.rewind(); }
+  int init();
+  int get_next();
+  bool reverse_sorted() { return 0; }
   bool unique_key_range();
+  int get_type() { return QS_TYPE_RANGE; }
+};
+
+/*
+  Helper class for keeping track of rows that have been passed to output 
+  in index_merge access method. 
+
+  NOTES
+    Current implementation uses a temporary table to store ROWIDs of rows that
+    have been passed to output. In the future it might be changed to use more 
+    efficient mechanisms, like Unique class.
+*/
+
+class INDEX_MERGE
+{
+public:
+  INDEX_MERGE(THD *thd_arg);
+  ~INDEX_MERGE(); 
+  
+  int init(TABLE *table);
+  int check_record_in();
+  int start_last_quick_select();
+  int error;
+private:
+  /* The only field in temporary table */
+  class Item_rowid : public Item_str_func
+  {
+    TABLE *head; /* source table */
+  public:
+    Item_rowid(TABLE *table) : head(table) 
+    {
+      max_length= table->file->ref_length;
+      collation.set(&my_charset_bin);
+    };
+    const char *func_name() const { return "rowid"; }
+    bool const_item() const { return 0; }
+    String *val_str(String *);
+    void fix_length_and_dec()
+    {}
+  };
+
+  /* Check if record has been processed and save it if it wasn't  */
+  inline int put_record(); 
+  
+  /* Check if record has been processed without saving it         */
+  inline int check_record();
+  
+  /* If true, check_record_in does't store ROWIDs it is passed.   */
+  bool  dont_save;
+
+  THD *thd;
+  TABLE *head;                     /* source table                        */
+  TABLE *temp_table;               /* temp. table used for values storage */
+  TMP_TABLE_PARAM tmp_table_param; /* temp. table creation parameters     */
+  Item_rowid *rowid_item;          /* the only field in temp. table       */
+  List<Item> fields;               /* temp. table fields list 
+                                      (the only element is rowid_item)    */
+  ORDER order;                     /* key for temp. table (rowid_item)    */
 };
 
 
-class QUICK_SELECT_DESC: public QUICK_SELECT
+/*
+  Index merge quick select. 
+  It is implemented as a container for several QUICK_RANGE_SELECTs.
+*/
+
+class QUICK_INDEX_MERGE_SELECT : public QUICK_SELECT_I 
+{
+public:
+  QUICK_INDEX_MERGE_SELECT(THD *thd, TABLE *table);
+  ~QUICK_INDEX_MERGE_SELECT();
+
+  int  init();
+  void reset(void);
+  int  get_next();
+  bool reverse_sorted() { return false; }
+  bool unique_key_range() { return false; }
+  int get_type() { return QS_TYPE_INDEX_MERGE; }
+
+  bool push_quick_back(QUICK_RANGE_SELECT *quick_sel_range);
+
+  /* range quick selects this index_merge read consists of */
+  List<QUICK_RANGE_SELECT> quick_selects;
+  
+  /* quick select which is currently used for rows retrieval */
+  List_iterator_fast<QUICK_RANGE_SELECT> cur_quick_it;
+  QUICK_RANGE_SELECT* cur_quick_select;
+  
+  /*
+    Last element in quick_selects list. 
+    INDEX_MERGE::start_last_quick_select is called before retrieving
+    rows for it. 
+  */
+  QUICK_RANGE_SELECT* last_quick_select;
+  
+  /*
+    Used to keep track of what records have been already passed to output 
+    when doing index_merge access (NULL means no index_merge) 
+  */
+  INDEX_MERGE index_merge;
+
+  MEM_ROOT    alloc;
+};
+
+class QUICK_SELECT_DESC: public QUICK_RANGE_SELECT
 {
 public:
-  QUICK_SELECT_DESC(QUICK_SELECT *q, uint used_key_parts);
+  QUICK_SELECT_DESC(QUICK_RANGE_SELECT *q, uint used_key_parts);
   int get_next();
   bool reverse_sorted() { return 1; }
+  int get_type() { return QS_TYPE_RANGE_DESC; }
 private:
   int cmp_prev(QUICK_RANGE *range);
   bool range_reads_after_key(QUICK_RANGE *range);
@@ -114,7 +272,7 @@ private:
 
 class SQL_SELECT :public Sql_alloc {
  public:
-  QUICK_SELECT *quick;		// If quick-select used
+  QUICK_SELECT_I *quick;	// If quick-select used
   COND		*cond;		// where condition
   TABLE	*head;
   IO_CACHE file;		// Positions to used records
@@ -134,7 +292,7 @@ class SQL_SELECT :public Sql_alloc {
 			ha_rows limit, bool force_quick_range=0);
 };
 
-QUICK_SELECT *get_quick_select_for_ref(THD *thd, TABLE *table,
+QUICK_RANGE_SELECT *get_quick_select_for_ref(THD *thd, TABLE *table,
 				       struct st_table_ref *ref);
 
 #endif
diff --git a/sql/sql_class.cc b/sql/sql_class.cc
index f3e57a0abbf..9a1221bbfd7 100644
--- a/sql/sql_class.cc
+++ b/sql/sql_class.cc
@@ -562,8 +562,8 @@ int THD::send_explain_fields(select_result *result)
   item->maybe_null=1;
   field_list.push_back(item=new Item_empty_string("key",NAME_LEN));
   item->maybe_null=1;
-  field_list.push_back(item=new Item_return_int("key_len",3,
-						MYSQL_TYPE_LONGLONG));
+  field_list.push_back(item=new Item_empty_string("key_len",
+						  NAME_LEN*MAX_KEY));
   item->maybe_null=1;
   field_list.push_back(item=new Item_empty_string("ref",
 						  NAME_LEN*MAX_REF_PARTS));
diff --git a/sql/sql_list.h b/sql/sql_list.h
index 7200046e6c5..2f0425f1ddc 100644
--- a/sql/sql_list.h
+++ b/sql/sql_list.h
@@ -135,6 +135,12 @@ public:
       last= &first;
     return tmp->info;
   }
+  inline void concat(base_list *list)
+  {
+    *last= list->first;
+    last= list->last;
+    elements+= list->elements;
+  }
   inline list_node* last_node() { return *last; }
   inline list_node* first_node() { return first;}
   inline void *head() { return first->info; }
@@ -255,6 +261,7 @@ public:
     }
     empty();
   }
+  inline void concat(List<T> *list) { base_list::concat(list); }
 };
 
 
diff --git a/sql/sql_select.cc b/sql/sql_select.cc
index 718ba141e3d..b26be9c44d9 100644
--- a/sql/sql_select.cc
+++ b/sql/sql_select.cc
@@ -32,7 +32,8 @@
 
 const char *join_type_str[]={ "UNKNOWN","system","const","eq_ref","ref",
 			      "MAYBE_REF","ALL","range","index","fulltext",
-			      "ref_or_null","unique_subquery","index_subquery"
+			      "ref_or_null","unique_subquery","index_subquery",
+                              "index_merge" //!!todo: psergey: check if constant values are same
 };
 
 static void optimize_keyuse(JOIN *join, DYNAMIC_ARRAY *keyuse_array);
@@ -114,7 +115,6 @@ static int join_read_next_same_or_null(READ_RECORD *info);
 static COND *make_cond_for_table(COND *cond,table_map table,
 				 table_map used_table);
 static Item* part_of_refkey(TABLE *form,Field *field);
-static uint find_shortest_key(TABLE *table, key_map usable_keys);
 static bool test_if_skip_sort_order(JOIN_TAB *tab,ORDER *order,
 				    ha_rows select_limit, bool no_changes);
 static int create_sort_index(THD *thd, JOIN *join, ORDER *order,
@@ -3340,7 +3340,7 @@ make_join_select(JOIN *join,SQL_SELECT *select,COND *cond)
 	     with key reading */
 	  if (tab->needed_reg == 0 && tab->type != JT_EQ_REF
 	      && tab->type != JT_FT && (tab->type != JT_REF ||
-	       (uint) tab->ref.key == tab->quick->index))
+               (uint) tab->ref.key == tab->quick->index))
 	  {
 	    sel->quick=tab->quick;		// Use value from get_quick_...
 	    sel->quick_keys=0;
@@ -6589,7 +6589,7 @@ static int test_if_order_by_key(ORDER *order, TABLE *table, uint idx,
   return reverse;
 }
 
-static uint find_shortest_key(TABLE *table, key_map usable_keys)
+uint find_shortest_key(TABLE *table, key_map usable_keys)
 {
   uint min_length= (uint) ~0;
   uint best= MAX_KEY;
@@ -6719,6 +6719,9 @@ test_if_skip_sort_order(JOIN_TAB *tab,ORDER *order,ha_rows select_limit,
   }
   else if (select && select->quick)		// Range found by opt_range
   {
+    /* assume results are not ordered when index merge is used */
+    if (select->quick->get_type() == QUICK_SELECT_I::QS_TYPE_INDEX_MERGE)
+      DBUG_RETURN(0);
     ref_key=	   select->quick->index;
     ref_key_parts= select->quick->used_key_parts;
   }
@@ -6753,6 +6756,10 @@ test_if_skip_sort_order(JOIN_TAB *tab,ORDER *order,ha_rows select_limit,
 	}
 	else
 	{
+          /* 
+            We have verified above that select->quick is not 
+            index_merge quick select. 
+          */
 	  select->quick->index= new_ref_key;
 	  select->quick->init();
 	}
@@ -6774,10 +6781,13 @@ test_if_skip_sort_order(JOIN_TAB *tab,ORDER *order,ha_rows select_limit,
 	  */
 	  if (!select->quick->reverse_sorted())
 	  {
-            if (table->file->index_flags(ref_key) & HA_NOT_READ_PREFIX_LAST)
+            if (table->file->index_flags(ref_key) & HA_NOT_READ_PREFIX_LAST ||
+                (select->quick->get_type() == 
+                QUICK_SELECT_I::QS_TYPE_INDEX_MERGE))
               DBUG_RETURN(0);			// Use filesort
-	    // ORDER BY range_key DESC
-	    QUICK_SELECT_DESC *tmp=new QUICK_SELECT_DESC(select->quick,
+            
+            // ORDER BY range_key DESC
+	    QUICK_SELECT_DESC *tmp=new QUICK_SELECT_DESC((QUICK_RANGE_SELECT*)(select->quick),
 							 used_key_parts);
 	    if (!tmp || tmp->error)
 	    {
@@ -6912,8 +6922,11 @@ create_sort_index(THD *thd, JOIN *join, ORDER *order,
     {
       select->quick=tab->quick;
       tab->quick=0;
-      /* We can only use 'Only index' if quick key is same as ref_key */
-      if (table->key_read && (uint) tab->ref.key != select->quick->index)
+      /* 
+        We can only use 'Only index' if quick key is same as ref_key
+        and in index_merge 'Only index' cannot be used
+      */
+      if (table->key_read && ((uint) tab->ref.key != select->quick->index))
       {
 	table->key_read=0;
 	table->file->extra(HA_EXTRA_NO_KEYREAD);
@@ -8717,12 +8730,15 @@ static void select_describe(JOIN *join, bool need_tmp_table, bool need_order,
       JOIN_TAB *tab=join->join_tab+i;
       TABLE *table=tab->table;
       char buff[512],*buff_ptr=buff;
-      char buff1[512], buff2[512];
+      char buff1[512], buff2[512], buff3[512];
+      char keylen_str_buf[64];
       char derived_name[64];
       String tmp1(buff1,sizeof(buff1),cs);
       String tmp2(buff2,sizeof(buff2),cs);
+      String tmp3(buff3,sizeof(buff3),cs);
       tmp1.length(0);
       tmp2.length(0);
+      tmp3.length(0);
 
       item_list.empty();
       item_list.push_back(new Item_int((int32) 
@@ -8731,7 +8747,13 @@ static void select_describe(JOIN *join, bool need_tmp_table, bool need_order,
 					  strlen(join->select_lex->type),
 					  cs));
       if (tab->type == JT_ALL && tab->select && tab->select->quick)
-	tab->type= JT_RANGE;
+      {
+        if (tab->select->quick->get_type() == 
+            QUICK_SELECT_I::QS_TYPE_INDEX_MERGE)
+          tab->type = JT_INDEX_MERGE;
+        else
+	  tab->type = JT_RANGE;
+      }
       if (table->derived_select_number)
       {
 	/* Derived table name generation */
@@ -8765,10 +8787,14 @@ static void select_describe(JOIN *join, bool need_tmp_table, bool need_order,
       if (tab->ref.key_parts)
       {
 	KEY *key_info=table->key_info+ tab->ref.key;
+        register uint length;
 	item_list.push_back(new Item_string(key_info->name,
 					    strlen(key_info->name),
 					    system_charset_info));
-	item_list.push_back(new Item_int((int32) tab->ref.key_length));
+        length= longlong2str(tab->ref.key_length, keylen_str_buf, 10) - 
+                keylen_str_buf;
+        item_list.push_back(new Item_string(keylen_str_buf, length,
+                                            system_charset_info));
 	for (store_key **ref=tab->ref.key_copy ; *ref ; ref++)
 	{
 	  if (tmp2.length())
@@ -8780,18 +8806,60 @@ static void select_describe(JOIN *join, bool need_tmp_table, bool need_order,
       else if (tab->type == JT_NEXT)
       {
 	KEY *key_info=table->key_info+ tab->index;
+        register uint length;
 	item_list.push_back(new Item_string(key_info->name,
 					    strlen(key_info->name),cs));
-	item_list.push_back(new Item_int((int32) key_info->key_length));
+        length= longlong2str(key_info->key_length, keylen_str_buf, 10) - 
+                keylen_str_buf;
+        item_list.push_back(new Item_string(keylen_str_buf, 
+                                            length,
+                                            system_charset_info));
 	item_list.push_back(item_null);
       }
       else if (tab->select && tab->select->quick)
       {
-	KEY *key_info=table->key_info+ tab->select->quick->index;
-	item_list.push_back(new Item_string(key_info->name,
-					    strlen(key_info->name),cs));
-	item_list.push_back(new Item_int((int32) tab->select->quick->
-					 max_used_key_length));
+        if (tab->select->quick->get_type() == 
+            QUICK_SELECT_I::QS_TYPE_INDEX_MERGE)
+        {
+          QUICK_INDEX_MERGE_SELECT *quick_imerge=
+            (QUICK_INDEX_MERGE_SELECT*)tab->select->quick;
+          QUICK_RANGE_SELECT *quick;
+
+          List_iterator_fast<QUICK_RANGE_SELECT> it(quick_imerge->
+                                                    quick_selects);
+          while ((quick= it++))
+          {
+	    KEY *key_info= table->key_info + quick->index;
+            register uint length;
+            if (tmp3.length())
+	      tmp3.append(',');
+	    
+            tmp3.append(key_info->name);
+
+	    if (tmp2.length())
+	      tmp2.append(',');
+
+            length= longlong2str(quick->max_used_key_length, keylen_str_buf,
+                                 10) - 
+                    keylen_str_buf;
+
+            tmp2.append(keylen_str_buf, length);
+          }
+        }
+        else
+        {
+	  KEY *key_info= table->key_info + tab->select->quick->index;
+          register uint length;
+          tmp3.append(key_info->name);
+          
+          length= longlong2str(tab->select->quick->max_used_key_length, 
+                               keylen_str_buf, 10) -
+                  keylen_str_buf;
+          tmp2.append(keylen_str_buf, length);
+        }
+
+	item_list.push_back(new Item_string(tmp3.ptr(),tmp3.length(),cs));
+	item_list.push_back(new Item_string(tmp2.ptr(),tmp2.length(),cs));
 	item_list.push_back(item_null);
       }
       else
diff --git a/sql/sql_select.h b/sql/sql_select.h
index 6c17a646ee6..5bea2f1cf17 100644
--- a/sql/sql_select.h
+++ b/sql/sql_select.h
@@ -76,7 +76,7 @@ typedef struct st_join_cache {
 
 enum join_type { JT_UNKNOWN,JT_SYSTEM,JT_CONST,JT_EQ_REF,JT_REF,JT_MAYBE_REF,
 		 JT_ALL, JT_RANGE, JT_NEXT, JT_FT, JT_REF_OR_NULL,
-		 JT_UNIQUE_SUBQUERY, JT_INDEX_SUBQUERY};
+		 JT_UNIQUE_SUBQUERY, JT_INDEX_SUBQUERY, JT_INDEX_MERGE};
 
 class JOIN;
 
@@ -85,7 +85,7 @@ typedef struct st_join_table {
   KEYUSE	*keyuse;			/* pointer to first used key */
   SQL_SELECT	*select;
   COND		*select_cond;
-  QUICK_SELECT	*quick;
+  QUICK_SELECT_I *quick;
   Item		*on_expr;
   const char	*info;
   byte		*null_ref_key;
@@ -307,6 +307,7 @@ void copy_fields(TMP_TABLE_PARAM *param);
 void copy_funcs(Item **func_ptr);
 bool create_myisam_from_heap(THD *thd, TABLE *table, TMP_TABLE_PARAM *param,
 			     int error, bool ignore_last_dupp_error);
+uint find_shortest_key(TABLE *table, key_map usable_keys);
 
 /* functions from opt_sum.cc */
 int opt_sum_query(TABLE_LIST *tables, List<Item> &all_fields,COND *conds);
diff --git a/sql/sql_test.cc b/sql/sql_test.cc
index 112d42e4643..e884e0e0e07 100644
--- a/sql/sql_test.cc
+++ b/sql/sql_test.cc
@@ -178,9 +178,39 @@ TEST_join(JOIN *join)
 		"                  quick select checked for each record (keys: %d)\n",
 		(int) tab->select->quick_keys);
       else if (tab->select->quick)
-	fprintf(DBUG_FILE,"                  quick select used on key %s, length: %d\n",
+      {
+        int quick_type= tab->select->quick->get_type();
+        if ((quick_type == QUICK_SELECT_I::QS_TYPE_RANGE) || 
+            (quick_type == QUICK_SELECT_I::QS_TYPE_RANGE_DESC))
+        {
+	  fprintf(DBUG_FILE,
+                "                  quick select used on key %s, length: %d\n",
 		form->key_info[tab->select->quick->index].name,
 		tab->select->quick->max_used_key_length);
+        }
+        else if (quick_type == QUICK_SELECT_I::QS_TYPE_INDEX_MERGE)
+        {
+          QUICK_INDEX_MERGE_SELECT *quick_imerge= 
+              (QUICK_INDEX_MERGE_SELECT*)tab->select->quick;
+          QUICK_RANGE_SELECT *quick;
+          fprintf(DBUG_FILE,
+                  "                  index_merge quick select used\n");
+                    
+          List_iterator_fast<QUICK_RANGE_SELECT> it(quick_imerge->quick_selects);
+          while ((quick = it++))
+          {
+  	    fprintf(DBUG_FILE,
+                "                  range quick select: key %s, length: %d\n",
+		form->key_info[quick->index].name,
+		quick->max_used_key_length);
+          }
+        }
+        else
+        {
+          fprintf(DBUG_FILE,
+                  "                  quick select of unknown nature used\n");
+        }
+      }
       else
 	VOID(fputs("                  select used\n",DBUG_FILE));
     }
diff --git a/sql/sql_union.cc b/sql/sql_union.cc
index 934cda3b1a8..ff68f26d765 100644
--- a/sql/sql_union.cc
+++ b/sql/sql_union.cc
@@ -117,7 +117,8 @@ int st_select_lex_unit::prepare(THD *thd, select_result *sel_result,
 {
   SELECT_LEX *lex_select_save= thd->lex->current_select;
   SELECT_LEX *select_cursor,*sl;
-  DBUG_ENTER("st_select_lex_unit::prepare");
+  SELECT_LEX *sl;
+  DBUG_ENTER("st_select_lex_unit::prepare");  
 
   if (prepared)
     DBUG_RETURN(0);
@@ -187,7 +188,7 @@ int st_select_lex_unit::prepare(THD *thd, select_result *sel_result,
   union_result->not_describe=1;
   union_result->tmp_table_param=tmp_table_param;
 
-  for (;sl; sl= sl->next_select())
+  for (;sl; sl= sl->next_select()) //!!todo: psergey: all my changes around this were to shut up the compiler. check they didn't make it here
   {
     JOIN *join= new JOIN(thd, sl->item_list, 
 			 sl->options | thd->options | SELECT_NO_UNLOCK,
diff --git a/sql/sql_update.cc b/sql/sql_update.cc
index 8c400b2e98e..4ea14fcc68f 100644
--- a/sql/sql_update.cc
+++ b/sql/sql_update.cc
@@ -177,10 +177,18 @@ int mysql_update(THD *thd,
   init_ftfuncs(thd, &thd->lex->select_lex, 1);
   /* Check if we are modifying a key that we are used to search with */
   if (select && select->quick)
-    used_key_is_modified= (!select->quick->unique_key_range() &&
-			   check_if_key_used(table,
-					     (used_index=select->quick->index),
-					     fields));
+  {
+    if (select->quick->get_type() != QUICK_SELECT_I::QS_TYPE_INDEX_MERGE)
+    {
+      used_index= select->quick->index;
+      used_key_is_modified= (!select->quick->unique_key_range() &&
+			      check_if_key_used(table,used_index,fields));
+    }
+    else
+    {
+      used_key_is_modified= true;
+    }
+  }
   else if ((used_index=table->file->key_used_on_scan) < MAX_KEY)
     used_key_is_modified=check_if_key_used(table, used_index, fields);
   else
@@ -699,8 +707,26 @@ static bool safe_update_on_fly(JOIN_TAB *join_tab, List<Item> *fields)
   case JT_ALL:
     /* If range search on index */
     if (join_tab->quick)
-      return !check_if_key_used(table, join_tab->quick->index,
-				*fields);
+    {
+      if (join_tab->quick->get_type() != QUICK_SELECT_I::QS_TYPE_INDEX_MERGE)
+      {
+        return !check_if_key_used(table,join_tab->quick->index,*fields);
+      }
+      else
+      {
+        QUICK_INDEX_MERGE_SELECT *qsel_imerge=
+          (QUICK_INDEX_MERGE_SELECT*)(join_tab->quick);
+        List_iterator_fast<QUICK_RANGE_SELECT> it(qsel_imerge->quick_selects);
+        QUICK_RANGE_SELECT *quick;
+        while ((quick= it++))
+        {
+          if (check_if_key_used(table, quick->index, *fields))
+            return 0;
+        }
+        return 1;
+      }
+    }
+
     /* If scanning in clustered key */
     if ((table->file->table_flags() & HA_PRIMARY_KEY_IN_READ_INDEX) &&
 	table->primary_key < MAX_KEY)
author	unknown <psergey@psergey.(none)>	2003-11-13 22:14:37 +0300
committer	unknown <psergey@psergey.(none)>	2003-11-13 22:14:37 +0300
commit	738728bd1144a29a9b8b380c6a129afc3acdcfc4 (patch)
tree	ee449f3b5dcb528030efbeea367fa2b4c2183c94
parent	4696bb41b4cce563ffff8d7b6c32576214109113 (diff)
parent	6e464cc06d8340cb5f0f26fd6894301eef55af1f (diff)
download	mariadb-git-738728bd1144a29a9b8b380c6a129afc3acdcfc4.tar.gz