From 3600d09ab4323098676fa51c869a787fec9d42cc Mon Sep 17 00:00:00 2001
From: unknown <sergefp@mysql.com>
Date: Thu, 13 May 2004 01:38:40 +0400
Subject: This is first cset for WL#1394 "Optimize index merge when all
 involved index ranges include only values with equal keys" The main idea is
 to exploit the fact that key scans for "key=const" return ordered sequences
 of rowids.

include/my_base.h:
  Added HA_EXTRA_KEYREAD_PRESERVE_FIELDS flag
include/my_bitmap.h:
  Added a couple of utility functions
include/my_sys.h:
  Added my_conunt_bits_ushort function
innobase/include/row0mysql.h:
  Added support for HA_EXTRA_KEYREAD_PRESERVE_FIELDS
innobase/row/row0sel.c:
  Added support for HA_EXTRA_KEYREAD_PRESERVE_FIELDS
mysys/my_bit.c:
  Added my_count_bits_ushort function
mysys/my_bitmap.c:
  Added a couple of utility functions
sql/ha_berkeley.cc:
  Added cmp_ref rowid comparison function.
sql/ha_berkeley.h:
  Added cmp_ref rowid comparison function.
sql/ha_heap.h:
  Added cmp_ref rowid comparison function.
sql/ha_innodb.cc:
  Added cmp_ref rowid comparison function and support from HA_EXTRA_KEYREAD_PRESERVE_FIELDS
sql/ha_innodb.h:
  Added cmp_ref rowid comparison function.
sql/handler.h:
  Added cmp_ref rowid comparison function.
sql/opt_range.cc:
  Added QUICK_ROR_{INTERSECT,UNION}_SELECT classes and related optimizer code
sql/opt_range.h:
  Added QUICK_ROR_{INTERSECT,UNION}_SELECT classes
sql/sql_delete.cc:
  Changed to use new ROWID comparison function
  also always call quick->reset() for quick selects
sql/sql_select.cc:
  Account for new quick select types
sql/sql_select.h:
  New, proper rowid ordering/comparison function to be used with Unique class etc.
sql/sql_test.cc:
  Account for new quick select types
sql/sql_update.cc:
  Account for new quick select types
---
 sql/ha_berkeley.cc |   25 +
 sql/ha_berkeley.h  |    1 +
 sql/ha_heap.h      |    7 +-
 sql/ha_innodb.cc   |   57 ++
 sql/ha_innodb.h    |    1 +
 sql/handler.h      |    5 +
 sql/opt_range.cc   | 2565 ++++++++++++++++++++++++++++++++++++++++++++--------
 sql/opt_range.h    |  163 +++-
 sql/sql_delete.cc  |   16 +-
 sql/sql_select.cc  |   76 +-
 sql/sql_select.h   |    2 +-
 sql/sql_test.cc    |   33 +-
 sql/sql_update.cc  |   41 +-
 13 files changed, 2484 insertions(+), 508 deletions(-)

(limited to 'sql')

diff --git a/sql/ha_berkeley.cc b/sql/ha_berkeley.cc
index c4735403267..3daeae160cd 100644
--- a/sql/ha_berkeley.cc
+++ b/sql/ha_berkeley.cc
@@ -2457,4 +2457,29 @@ ha_rows ha_berkeley::estimate_number_of_rows()
   return share->rows + HA_BERKELEY_EXTRA_ROWS;
 }
 
+int ha_berkeley::cmp_ref(const byte *ref1, const byte *ref2)
+{
+  if (hidden_primary_key)
+    return memcmp(ref1, ref2, BDB_HIDDEN_PRIMARY_KEY_LENGTH);
+
+  int result;
+  Field *field;
+  KEY *key_info=table->key_info+table->primary_key;
+  KEY_PART_INFO *key_part=key_info->key_part;
+  KEY_PART_INFO *end=key_part+key_info->key_parts;
+
+  for (; key_part != end; key_part++)
+  {
+    field=  key_part->field; 
+    result= field->pack_cmp((const char*)ref1, (const char*)ref2, 
+                            key_part->length);
+    if (result)
+      return result;
+    ref1 += field->packed_col_length((const char*)ref1, key_part->length);
+    ref2 += field->packed_col_length((const char*)ref2, key_part->length);
+  }
+
+  return 0;
+}
+
 #endif /* HAVE_BERKELEY_DB */
diff --git a/sql/ha_berkeley.h b/sql/ha_berkeley.h
index f225c24eaf7..f5f6194ee77 100644
--- a/sql/ha_berkeley.h
+++ b/sql/ha_berkeley.h
@@ -168,6 +168,7 @@ class ha_berkeley: public handler
   void print_error(int error, myf errflag);
   uint8 table_cache_type() { return HA_CACHE_TBL_TRANSACT; }
   bool primary_key_is_clustered() { return true; }
+  int cmp_ref(const byte *ref1, const byte *ref2);
 };
 
 extern bool berkeley_skip, berkeley_shared_data;
diff --git a/sql/ha_heap.h b/sql/ha_heap.h
index c369c7029b4..80cd143eec3 100644
--- a/sql/ha_heap.h
+++ b/sql/ha_heap.h
@@ -93,5 +93,10 @@ class ha_heap: public handler
 
   THR_LOCK_DATA **store_lock(THD *thd, THR_LOCK_DATA **to,
 			     enum thr_lock_type lock_type);
-
+  int cmp_ref(const byte *ref1, const byte *ref2)
+  {
+    HEAP_PTR ptr1=*(HEAP_PTR*)ref1;
+    HEAP_PTR ptr2=*(HEAP_PTR*)ref2;
+    return ptr1 < ptr2? -1 : (ptr1 > ptr2? 1 : 0);
+  }
 };
diff --git a/sql/ha_innodb.cc b/sql/ha_innodb.cc
index 8bd43ea92cd..da46dc799e9 100644
--- a/sql/ha_innodb.cc
+++ b/sql/ha_innodb.cc
@@ -711,6 +711,8 @@ ha_innobase::init_table_handle_for_HANDLER(void)
         prebuilt->read_just_key = FALSE;
 
 	prebuilt->used_in_HANDLER = TRUE;
+
+	prebuilt->keep_other_fields_on_keyread = FALSE;
 }
 
 /*************************************************************************
@@ -4454,9 +4456,11 @@ ha_innobase::extra(
                         if (prebuilt->blob_heap) {
                                 row_mysql_prebuilt_free_blob_heap(prebuilt);
                         }
+                        prebuilt->keep_other_fields_on_keyread = 0;
                         prebuilt->read_just_key = 0;
                         break;
   		case HA_EXTRA_RESET_STATE:
+	        	prebuilt->keep_other_fields_on_keyread = 0;
 	        	prebuilt->read_just_key = 0;
     	        	break;
 		case HA_EXTRA_NO_KEYREAD:
@@ -4468,6 +4472,9 @@ ha_innobase::extra(
 	        case HA_EXTRA_KEYREAD:
 	        	prebuilt->read_just_key = 1;
 	        	break;
+		case HA_EXTRA_KEYREAD_PRESERVE_FIELDS:
+			prebuilt->keep_other_fields_on_keyread = 1;
+			break;
 		default:/* Do nothing */
 			;
 	}
@@ -4526,6 +4533,7 @@ ha_innobase::start_stmt(
 	prebuilt->sql_stat_start = TRUE;
 	prebuilt->hint_no_need_to_fetch_extra_cols = TRUE;
 	prebuilt->read_just_key = 0;
+        prebuilt->keep_other_fields_on_keyread = FALSE;
 
 	if (!prebuilt->mysql_has_locked) {
 	        /* This handle is for a temporary table created inside
@@ -4604,6 +4612,7 @@ ha_innobase::external_lock(
 	prebuilt->hint_no_need_to_fetch_extra_cols = TRUE;
 
 	prebuilt->read_just_key = 0;
+	prebuilt->keep_other_fields_on_keyread = FALSE;
 
 	if (lock_type == F_WRLCK) {
 
@@ -5000,4 +5009,52 @@ ha_innobase::get_auto_increment()
 	return(nr);
 }
 
+int
+ha_innobase::cmp_ref(
+	const mysql_byte *ref1,
+	const mysql_byte *ref2)
+{
+	row_prebuilt_t*	 prebuilt = (row_prebuilt_t*) innobase_prebuilt;
+	enum_field_types mysql_type;
+	Field*		 field;
+	int result;
+
+	if (prebuilt->clust_index_was_generated)
+		return memcmp(ref1, ref2, DATA_ROW_ID_LEN);
+
+	/* Do type-aware comparison of Primary Key members. PK members
+	are always NOT NULL, so no checks for NULL are performed */
+	KEY_PART_INFO *key_part= table->key_info[table->primary_key].key_part;
+	KEY_PART_INFO *key_part_end= 
+	  key_part + table->key_info[table->primary_key].key_parts;
+	for (; key_part != key_part_end; ++key_part) {
+		field = key_part->field;
+		mysql_type = field->type();
+		if (mysql_type == FIELD_TYPE_TINY_BLOB
+		    || mysql_type == FIELD_TYPE_MEDIUM_BLOB
+		    || mysql_type == FIELD_TYPE_BLOB
+		    || mysql_type == FIELD_TYPE_LONG_BLOB) {
+		    
+			ut_a(!ref1[1]);
+			ut_a(!ref2[1]);
+			byte len1= *ref1;
+			byte len2= *ref2;
+			ref1 += 2;
+			ref2 += 2;
+			result =
+			  ((Field_blob*)field)->cmp((const char*)ref1, len1,
+			                            (const char*)ref2, len2);
+		} else {
+			result = 
+			  field->cmp((const char*)ref1, (const char*)ref2);
+		}
+
+		if (result)
+			return result;
+		ref1 += key_part->length;
+		ref2 += key_part->length;
+	}
+	return 0;
+}
+
 #endif /* HAVE_INNOBASE_DB */
diff --git a/sql/ha_innodb.h b/sql/ha_innodb.h
index c305a019fcd..ff224370df1 100644
--- a/sql/ha_innodb.h
+++ b/sql/ha_innodb.h
@@ -188,6 +188,7 @@ class ha_innobase: public handler
 	longlong get_auto_increment();
         uint8 table_cache_type() { return HA_CACHE_TBL_ASKTRANSACT; }
         bool primary_key_is_clustered() { return true; }
+        int cmp_ref(const byte *ref1, const byte *ref2);
 };
 
 extern bool innodb_skip;
diff --git a/sql/handler.h b/sql/handler.h
index 4a8abf5f131..386b9fa4d7d 100644
--- a/sql/handler.h
+++ b/sql/handler.h
@@ -382,6 +382,11 @@ public:
      false otherwise
   */
   virtual bool primary_key_is_clustered() { return false; }
+
+  virtual int cmp_ref(const byte *ref1, const byte *ref2)
+  {
+    return memcmp(ref1, ref2, ref_length);
+  }
 };
 
 	/* Some extern variables used with handlers */
diff --git a/sql/opt_range.cc b/sql/opt_range.cc
index fa1b80f007e..ad48f3050c0 100644
--- a/sql/opt_range.cc
+++ b/sql/opt_range.cc
@@ -23,6 +23,19 @@
 
 */
 
+/*
+  Classes in this file are used in the following way:
+  1. For a selection condition a tree of SEL_IMERGE/SEL_TREE/SEL_ARG objects 
+     is created. #of rows in table and index statistics are ignored at this 
+     step.
+  2. Created SEL_TREE and index stats data are used to construct a 
+     TABLE_READ_PLAN-derived object (TRP_*). Several 'candidate' table read 
+     plans may be created. 
+  3. The least expensive table read plan is used to create a tree of 
+     QUICK_SELECT_I-derived objects which are later used for row retrieval.
+     QUICK_RANGEs are also created in this step.
+*/
+
 #ifdef __GNUC__
 #pragma implementation				// gcc: Class implementation
 #endif
@@ -167,6 +180,22 @@ public:
     min_value=arg->max_value;
     min_flag=arg->max_flag & NEAR_MAX ? 0 : NEAR_MIN;
   }
+  void store_min(uint length,char **min_key,uint min_key_flag)
+  {
+    if ((min_flag & GEOM_FLAG) ||
+        (!(min_flag & NO_MIN_RANGE) &&
+	!(min_key_flag & (NO_MIN_RANGE | NEAR_MIN))))
+    {
+      if (maybe_null && *min_value)
+      {
+	**min_key=1;
+	bzero(*min_key+1,length);
+      }
+      else
+	memcpy(*min_key,min_value,length+(int) maybe_null);
+      (*min_key)+= length+(int) maybe_null;
+    }
+  }
   void store(uint length,char **min_key,uint min_key_flag,
 	     char **max_key, uint max_key_flag)
   {
@@ -280,8 +309,21 @@ public:
     bzero((char*) keys,sizeof(keys));
   }
   SEL_ARG *keys[MAX_KEY];
-  key_map keys_map;        /* bitmask of non-NULL elements in keys         */
-  List<SEL_IMERGE> merges; /* possible ways to read rows using index_merge */
+  key_map keys_map;        /* bitmask of non-NULL elements in keys */
+
+  /* 
+    Possible ways to read rows using index_merge. The list is non-empty only 
+    if type==KEY. Currently can be non empty only if keys_map.is_clear_all().
+  */
+  List<SEL_IMERGE> merges;
+  
+  /* The members below are filled/used only after get_mm_tree is done */
+  key_map ror_scans_map;   /* bitmask of ROR scan-able elements in keys */
+  uint    n_ror_scans;
+
+  struct st_ror_scan_info **ror_scans;     /* list of ROR key scans */
+  struct st_ror_scan_info **ror_scans_end; /* last ROR scan */
+  /* Note that #records for each key scan is stored in table->quick_rows */
 };
 
 
@@ -291,22 +333,44 @@ typedef struct st_qsel_param {
   KEY_PART *key_parts,*key_parts_end,*key[MAX_KEY];
   MEM_ROOT *mem_root;
   table_map prev_tables,read_tables,current_table;
-  uint baseflag, keys, max_key_part, range_count;
-  uint real_keynr[MAX_KEY];
+  uint baseflag, max_key_part, range_count;
+    
+  uint keys; /* number of keys used in the query */
+
+  /* used_key_no -> table_key_no translation table */
+  uint real_keynr[MAX_KEY]; 
+
   char min_key[MAX_KEY_LENGTH+MAX_FIELD_WIDTH],
     max_key[MAX_KEY_LENGTH+MAX_FIELD_WIDTH];
   bool quick;				// Don't calulate possible keys
 
+  uint fields_bitmap_size;    
+  MY_BITMAP needed_fields;    /* bitmask of fields needed by the query */
+
+  key_map *needed_reg;        /* ptr to SQL_SELECT::needed_reg */
+
   uint *imerge_cost_buff;     /* buffer for index_merge cost estimates */
   uint imerge_cost_buff_size; /* size of the buffer */
+  
+  bool is_ror_scan; /* true if last checked tree->key can be used for ROR-scan */
 } PARAM;
 
+class TABLE_READ_PLAN;
+  class TRP_RANGE;
+  class TRP_ROR_INTERSECT;
+  class TRP_ROR_UNION;
+  class TRP_ROR_INDEX_MERGE;
+
+struct st_ror_scan_info;
+
 static SEL_TREE * get_mm_parts(PARAM *param,Field *field,
 			       Item_func::Functype type,Item *value,
 			       Item_result cmp_type);
 static SEL_ARG *get_mm_leaf(PARAM *param,Field *field,KEY_PART *key_part,
 			    Item_func::Functype type,Item *value);
 static SEL_TREE *get_mm_tree(PARAM *param,COND *cond);
+
+static bool is_key_scan_ror(PARAM *param, uint keynr, uint8 nparts);
 static ha_rows check_quick_select(PARAM *param,uint index,SEL_ARG *key_tree);
 static ha_rows check_quick_keys(PARAM *param,uint index,SEL_ARG *key_tree,
 				char *min_key,uint min_key_flag,
@@ -315,25 +379,37 @@ static ha_rows check_quick_keys(PARAM *param,uint index,SEL_ARG *key_tree,
 QUICK_RANGE_SELECT *get_quick_select(PARAM *param,uint index,
                                      SEL_ARG *key_tree, 
                                      MEM_ROOT *alloc = NULL);
-static int get_quick_select_params(SEL_TREE *tree, PARAM *param,
-                                   key_map& needed_reg,
-                                   bool index_read_can_be_used,
-                                   double *read_time, 
-                                   ha_rows *records,
-                                   SEL_ARG*** key_to_read);
+static TRP_RANGE *get_key_scans_params(PARAM *param, SEL_TREE *tree,
+                                       bool index_read_must_be_used, 
+                                       double read_time);
+static
+TRP_ROR_INTERSECT *get_best_ror_intersect(const PARAM *param, SEL_TREE *tree,
+                                          bool force_index_only, 
+                                          double read_time,
+                                          bool *are_all_covering);
+static
+TRP_ROR_INTERSECT *get_best_covering_ror_intersect(PARAM *param, 
+                                                   SEL_TREE *tree, 
+                                                   double read_time);
+static
+TABLE_READ_PLAN *get_best_disjunct_quick(PARAM *param, SEL_IMERGE *imerge,
+                                         double read_time);
 static int get_index_merge_params(PARAM *param, key_map& needed_reg,
                            SEL_IMERGE *imerge, double *read_time, 
                            ha_rows* imerge_rows);
-inline double get_index_only_read_time(PARAM* param, ha_rows records, 
+inline double get_index_only_read_time(const PARAM* param, ha_rows records, 
                                        int keynr);
 
 #ifndef DBUG_OFF
-static void print_quick_sel_imerge(QUICK_INDEX_MERGE_SELECT *quick,
-                                   const key_map *needed_reg);
-void print_quick_sel_range(QUICK_RANGE_SELECT *quick, 
-                                  const key_map *needed_reg);
-
+static void print_sel_tree(PARAM *param, SEL_TREE *tree, key_map *tree_map,
+                           const char *msg);
+static void print_ror_scans_arr(TABLE *table, const char *msg, 
+                                struct st_ror_scan_info **start, 
+                                struct st_ror_scan_info **end);
+static void print_rowid(byte* val, int len);
+static void print_quick(QUICK_SELECT_I *quick, const key_map *needed_reg);
 #endif
+
 static SEL_TREE *tree_and(PARAM *param,SEL_TREE *tree1,SEL_TREE *tree2);
 static SEL_TREE *tree_or(PARAM *param,SEL_TREE *tree1,SEL_TREE *tree2);
 static SEL_ARG *sel_add(SEL_ARG *key1,SEL_ARG *key2);
@@ -629,7 +705,7 @@ QUICK_SELECT_I::QUICK_SELECT_I()
 
 QUICK_RANGE_SELECT::QUICK_RANGE_SELECT(THD *thd, TABLE *table, uint key_nr, 
                                        bool no_alloc, MEM_ROOT *parent_alloc)
-  :dont_free(0),error(0),cur_range(NULL),range(0)
+  :dont_free(0),error(0),free_file(0),cur_range(NULL),range(0)
 {
   index= key_nr;
   head=  table;
@@ -654,12 +730,22 @@ int QUICK_RANGE_SELECT::init()
 
 QUICK_RANGE_SELECT::~QUICK_RANGE_SELECT()
 {  
+  DBUG_ENTER("QUICK_RANGE_SELECT::~QUICK_RANGE_SELECT");
   if (!dont_free)
   {
     file->index_end();
-    delete_dynamic(&ranges); /* ranges are allocated in alloc */
-    free_root(&alloc,MYF(0));
-  }
+    file->extra(HA_EXTRA_NO_KEYREAD);
+    delete_dynamic(&ranges); /* ranges are allocated in alloc */ 
+    if (free_file) 
+    {
+      DBUG_PRINT("info", ("Freeing separate handler %p (free=%d)", file,
+                          free_file));
+      file->reset(); 
+      file->close(); 
+    } 
+    free_root(&alloc,MYF(0)); 
+  } 
+  DBUG_VOID_RETURN; 
 }
 
 
@@ -717,6 +803,257 @@ QUICK_INDEX_MERGE_SELECT::~QUICK_INDEX_MERGE_SELECT()
   free_root(&alloc,MYF(0));
 }
 
+
+QUICK_ROR_INTERSECT_SELECT::QUICK_ROR_INTERSECT_SELECT(THD *thd_param,
+                                                       TABLE *table,
+                                                       bool retrieve_full_rows,
+                                                       MEM_ROOT *parent_alloc)
+  : cpk_quick(NULL), thd(thd_param), reset_called(false), 
+    need_to_fetch_row(retrieve_full_rows)
+{
+  index= MAX_KEY;
+  head= table;  
+  record= head->record[0];
+  if (!parent_alloc)
+    init_sql_alloc(&alloc,1024,0);
+  else
+    bzero(&alloc, sizeof(MEM_ROOT));
+  last_rowid= (byte*)alloc_root(parent_alloc? parent_alloc : &alloc, 
+                                head->file->ref_length);
+}
+
+int QUICK_ROR_INTERSECT_SELECT::init()
+{
+  /* Check if last_rowid was allocated in ctor */
+  return !last_rowid;
+}
+
+
+/*
+  Init this quick select to be a ROR child scan. 
+  NOTE
+    QUICK_ROR_INTERSECT_SELECT::reset() may choose not to call this function
+    but reuse its handler object for doing one of range scans. It duplicates
+    a part of this function' code.
+  RETURN 
+    0  ROR child scan initialized, ok to use.
+    1  error
+*/
+
+int QUICK_RANGE_SELECT::init_ror_child_scan(bool reuse_handler)
+{
+  handler *save_file= file;
+  DBUG_ENTER("QUICK_RANGE_SELECT::init_ror_child_scan");
+  
+  if (reuse_handler)
+  {
+    DBUG_PRINT("info", ("Reusing handler %p", file));
+    if (file->extra(HA_EXTRA_KEYREAD) ||
+        file->extra(HA_EXTRA_RETRIEVE_ALL_COLS) |
+        init() || reset())
+    {
+      DBUG_RETURN(1);
+    }
+    else
+      DBUG_RETURN(0);
+  }
+
+  /* Create a separate handler object for this quick select */
+  if (free_file)
+  {
+    /* already have own 'handler' object. */
+    DBUG_RETURN(0);
+  }
+  
+  if (!(file= get_new_handler(head, head->db_type)))
+    goto failure;
+  DBUG_PRINT("info", ("Allocated new handler %p", file));
+  if (file->ha_open(head->path, head->db_stat, HA_OPEN_IGNORE_IF_LOCKED))
+  {
+    /* Caller will free the memory */ 
+    goto failure;
+  }
+  
+  if (file->extra(HA_EXTRA_KEYREAD) || 
+      file->extra(HA_EXTRA_RETRIEVE_ALL_COLS) ||
+      init() || reset())
+  {
+    file->close();
+    goto failure;
+  }
+  free_file=  true;
+  last_rowid= file->ref;
+  DBUG_RETURN(0);
+
+failure:
+  file= save_file;
+  DBUG_RETURN(1);
+}
+
+int QUICK_ROR_INTERSECT_SELECT::init_ror_child_scan(bool reuse_handler)
+{
+  List_iterator_fast<QUICK_RANGE_SELECT> quick_it(quick_selects);
+  QUICK_RANGE_SELECT* quick;
+  DBUG_ENTER("QUICK_ROR_INTERSECT_SELECT::init_ror_child_scan");
+
+  /* Initialize all merged "children" quick selects */
+  quick_it.rewind();
+  DBUG_ASSERT(!(need_to_fetch_row && !reuse_handler));
+  if (!need_to_fetch_row && reuse_handler)
+  {
+    quick= quick_it++;
+    /* 
+      There is no use for this->file. Reuse it for first of merged range 
+      selects.
+    */
+    if (quick->init_ror_child_scan(true))
+      DBUG_RETURN(1);
+    quick->file->extra(HA_EXTRA_KEYREAD_PRESERVE_FIELDS);
+  }
+  while((quick= quick_it++))
+  {
+    if (quick->init_ror_child_scan(false))
+      DBUG_RETURN(1);
+    quick->file->extra(HA_EXTRA_KEYREAD_PRESERVE_FIELDS);
+    /* Share the same record structure in intersect*/
+    quick->record= head->record[0];
+  }
+
+  if (need_to_fetch_row && head->file->rnd_init())
+  {
+    DBUG_PRINT("error", ("ROR index_merge rnd_init call failed"));
+    DBUG_RETURN(1);
+  }
+  DBUG_RETURN(0);
+}
+
+int QUICK_ROR_INTERSECT_SELECT::reset()
+{
+  int result;
+  DBUG_ENTER("QUICK_ROR_INTERSECT_SELECT::reset");
+  result= init_ror_child_scan(true);
+  DBUG_RETURN(result);
+}
+
+bool 
+QUICK_ROR_INTERSECT_SELECT::push_quick_back(QUICK_RANGE_SELECT *quick)
+{
+  bool res;
+  if (head->file->primary_key_is_clustered() && 
+      quick->index == head->primary_key)
+  {
+    cpk_quick= quick;
+    res= 0;
+  }
+  else
+    res= quick_selects.push_back(quick);
+  return res;
+}
+
+QUICK_ROR_INTERSECT_SELECT::~QUICK_ROR_INTERSECT_SELECT()
+{  
+  DBUG_ENTER("QUICK_ROR_INTERSECT_SELECT::~QUICK_ROR_INTERSECT_SELECT");
+  quick_selects.delete_elements(); 
+  delete cpk_quick;
+  free_root(&alloc,MYF(0));
+  DBUG_VOID_RETURN;
+}
+
+QUICK_ROR_UNION_SELECT::QUICK_ROR_UNION_SELECT(THD *thd_param,
+                                               TABLE *table)
+  : thd(thd_param), reset_called(false)
+{
+  index= MAX_KEY;
+  head= table;
+  rowid_length= table->file->ref_length;
+  record= head->record[0];
+  init_sql_alloc(&alloc, thd->variables.range_alloc_block_size, 0);
+  my_pthread_setspecific_ptr(THR_MALLOC,&alloc);
+}
+
+int QUICK_ROR_UNION_SELECT::init()
+{
+  if (init_queue(&queue, quick_selects.elements, 0,
+                 false , QUICK_ROR_UNION_SELECT::queue_cmp,
+                 (void*) this))
+  {
+    bzero(&queue, sizeof(QUEUE));
+    return 1;
+  }
+  
+  if (!(cur_rowid= (byte*)alloc_root(&alloc, 2*head->file->ref_length)))
+    return 1;
+  prev_rowid= cur_rowid + head->file->ref_length;
+  return 0;
+}
+
+/*
+  Comparison function to be used by priority queue.
+  SYNPOSIS
+    QUICK_ROR_UNION_SELECT::queue_cmp()
+      arg   Pointer to QUICK_ROR_UNION_SELECT
+      val1  First merged select
+      val2  Second merged select
+*/
+int QUICK_ROR_UNION_SELECT::queue_cmp(void *arg, byte *val1, byte *val2)
+{
+  QUICK_ROR_UNION_SELECT *self= (QUICK_ROR_UNION_SELECT*)arg;    
+  return self->head->file->cmp_ref(((QUICK_SELECT_I*)val1)->last_rowid,
+                                   ((QUICK_SELECT_I*)val2)->last_rowid);
+}
+
+int QUICK_ROR_UNION_SELECT::reset()
+{
+  QUICK_SELECT_I* quick;
+  int error;
+  DBUG_ENTER("QUICK_ROR_UNION_SELECT::reset");
+  have_prev_rowid= false;
+  /* 
+    Initialize scans for merged quick selects and put all merged quick 
+    selects into the queue.
+  */
+  List_iterator_fast<QUICK_SELECT_I> it(quick_selects);
+  while ((quick= it++))
+  {
+    if (quick->init_ror_child_scan(false))
+      DBUG_RETURN(1);    
+    if ((error= quick->get_next()))
+    {
+      if (error == HA_ERR_END_OF_FILE)
+        continue;
+      else
+        DBUG_RETURN(error);
+    }
+    quick->save_last_pos();
+    queue_insert(&queue, (byte*)quick);
+  }
+
+  if (head->file->rnd_init())
+  {
+    DBUG_PRINT("error", ("ROR index_merge rnd_init call failed"));
+    DBUG_RETURN(1);
+  }
+
+  DBUG_RETURN(0);
+}
+
+
+bool 
+QUICK_ROR_UNION_SELECT::push_quick_back(QUICK_SELECT_I *quick_sel_range)
+{
+  return quick_selects.push_back(quick_sel_range);
+}
+
+QUICK_ROR_UNION_SELECT::~QUICK_ROR_UNION_SELECT()
+{
+  DBUG_ENTER("QUICK_ROR_UNION_SELECT::~QUICK_ROR_UNION_SELECT");
+  delete_queue(&queue);
+  quick_selects.delete_elements();  
+  free_root(&alloc,MYF(0));
+  DBUG_VOID_RETURN;
+}
+
+
 QUICK_RANGE::QUICK_RANGE()
   :min_key(0),max_key(0),min_length(0),max_length(0),
    flag(NO_MIN_RANGE | NO_MAX_RANGE)
@@ -879,6 +1216,136 @@ SEL_ARG *SEL_ARG::clone_tree()
   return root;
 }
 
+
+/*
+  Table rows retrieval plan. Range optimizer creates QUICK_SELECT_I-derived 
+  objects from table read plans.
+*/
+class TABLE_READ_PLAN
+{
+public:
+  /* 
+    Plan read cost, with or without cost of full row retrieval, depending 
+    on plan creation parameters.
+  */
+  double read_cost; 
+  ha_rows records; /* estimate of #rows to be examined */
+
+  bool is_ror;
+  /* Create quck select for this plan */
+  virtual QUICK_SELECT_I *make_quick(PARAM *param,
+                                     bool retrieve_full_rows,
+                                     MEM_ROOT *parent_alloc=NULL) = 0;
+
+  /* Table read plans are allocated on MEM_ROOT and must not be deleted */
+  static void *operator new(size_t size, MEM_ROOT *mem_root)
+  { return (void*) alloc_root(mem_root, (uint) size); }
+  static void operator delete(void *ptr,size_t size) {}
+};
+
+class TRP_ROR_INTERSECT;
+class TRP_ROR_UNION;
+class TRP_INDEX_MERGE;
+
+
+class TRP_RANGE : public TABLE_READ_PLAN
+{
+public:
+  SEL_ARG *key;
+  uint     key_idx; /* key number in param->keys */
+  TRP_RANGE(SEL_ARG *key_arg, uint idx_arg) 
+   : key(key_arg), key_idx(idx_arg)
+  {}
+  
+  QUICK_SELECT_I *make_quick(PARAM *param, bool retrieve_full_rows,
+                             MEM_ROOT *parent_alloc)
+  {
+    DBUG_ENTER("TRP_RANGE::make_quick");
+    QUICK_RANGE_SELECT *quick;
+    /* ignore retrieve_full_rows there as it is set/used elsewhere */
+    if ((quick= get_quick_select(param, key_idx, key, parent_alloc)))
+    {
+      quick->records= records;
+      quick->read_time= read_cost;
+    }
+    DBUG_RETURN(quick);
+  }
+};
+
+class TRP_ROR_INTERSECT : public TABLE_READ_PLAN
+{
+public:
+  QUICK_SELECT_I *make_quick(PARAM *param, bool retrieve_full_rows, 
+                             MEM_ROOT *parent_alloc);
+  struct st_ror_scan_info **first_scan;
+  struct st_ror_scan_info **last_scan;
+  bool is_covering; /* true if no row retrieval phase is necessary */
+  double index_scan_costs;
+};
+
+/* 
+  ROR-union is currently never covering.
+*/
+class TRP_ROR_UNION : public TABLE_READ_PLAN
+{
+public:
+  QUICK_SELECT_I *make_quick(PARAM *param, bool retrieve_full_rows, 
+                             MEM_ROOT *parent_alloc);
+  TABLE_READ_PLAN **first_ror;  
+  TABLE_READ_PLAN **last_ror;
+  
+};
+
+class TRP_INDEX_MERGE : public TABLE_READ_PLAN
+{
+public:
+  QUICK_SELECT_I *make_quick(PARAM *param, bool retrieve_full_rows, 
+                             MEM_ROOT *parent_alloc);
+  TRP_RANGE **range_scans;
+  TRP_RANGE **range_scans_end;
+};
+
+
+/* 
+  Fill param->needed_fields with bitmap of fields used in the query
+  NOTE
+    Do not put clustered PK members into it as they are implicitly present in
+    all keys.
+*/
+
+static int fill_used_fields_bitmap(PARAM *param)
+{
+  TABLE *table= param->table;
+  param->fields_bitmap_size= (table->fields/8 + 1);
+  uchar *tmp;
+  uint pk;
+  if (!(tmp= (uchar*)alloc_root(param->mem_root,param->fields_bitmap_size)) ||
+      bitmap_init(&param->needed_fields, tmp, param->fields_bitmap_size*8, 
+                  false))
+    return 1;
+  
+  bitmap_clear_all(&param->needed_fields);
+  for (uint i= 0; i < table->fields; i++)
+  {
+    if (param->thd->query_id == table->field[i]->query_id)
+      bitmap_set_bit(&param->needed_fields, i+1);
+  }
+
+  pk= param->table->primary_key;
+  if (param->table->file->primary_key_is_clustered() && pk != MAX_KEY)
+  {
+    KEY_PART_INFO *key_part= param->table->key_info[pk].key_part;
+    KEY_PART_INFO *key_part_end= key_part + 
+                                 param->table->key_info[pk].key_parts;
+    for(;key_part != key_part_end; ++key_part)
+    {
+      bitmap_clear_bit(&param->needed_fields, key_part->fieldnr);
+    }
+  }
+  return 0;
+}
+
+
 /*
   Test if a key can be used in different ranges
 
@@ -887,11 +1354,12 @@ SEL_ARG *SEL_ARG::clone_tree()
                                  limit, force_quick_range)
 
    Updates the following in the select parameter:
-    needed_reg - Bits for keys with may be used if all prev regs are read
-    quick      - Parameter to use when reading records.
+     needed_reg - Bits for keys with may be used if all prev regs are read
+     quick      - Parameter to use when reading records.
+   
    In the table struct the following information is updated:
-    quick_keys - Which keys can be used
-    quick_rows - How many rows the key matches
+     quick_keys - Which keys can be used
+     quick_rows - How many rows the key matches
 
  RETURN VALUES
   -1 if impossible select
@@ -910,7 +1378,6 @@ int SQL_SELECT::test_quick_select(THD *thd, key_map keys_to_use,
   uint basflag;
   uint idx;
   double scan_time;
-  QUICK_INDEX_MERGE_SELECT *quick_imerge= NULL;
   DBUG_ENTER("test_quick_select");
   DBUG_PRINT("enter",("keys_to_use: %lu  prev_tables: %lu  const_tables: %lu",
 		      keys_to_use.to_ulonglong(), (ulong) prev_tables,
@@ -956,13 +1423,17 @@ int SQL_SELECT::test_quick_select(THD *thd, key_map keys_to_use,
     param.table=head;
     param.keys=0;
     param.mem_root= &alloc;
+    param.needed_reg= &needed_reg;
+
     param.imerge_cost_buff_size= 0;
+    
 
     thd->no_errors=1;				// Don't warn about NULL
     init_sql_alloc(&alloc, thd->variables.range_alloc_block_size, 0);
     if (!(param.key_parts = (KEY_PART*) alloc_root(&alloc,
 						   sizeof(KEY_PART)*
-						   head->key_parts)))
+						   head->key_parts))
+                              || fill_used_fields_bitmap(&param))
     {
       thd->no_errors=0;
       free_root(&alloc,MYF(0));			// Return memory & allocator
@@ -971,7 +1442,11 @@ int SQL_SELECT::test_quick_select(THD *thd, key_map keys_to_use,
     key_parts= param.key_parts;
     old_root=my_pthread_getspecific_ptr(MEM_ROOT*,THR_MALLOC);
     my_pthread_setspecific_ptr(THR_MALLOC,&alloc);
-
+    
+    /* 
+      Make an array with description of all key parts of all table keys. 
+      This is used in get_mm_parts function. 
+    */
     for (idx=0 ; idx < head->keys ; idx++)
     {
       if (!keys_to_use.is_set(idx))
@@ -1001,55 +1476,74 @@ int SQL_SELECT::test_quick_select(THD *thd, key_map keys_to_use,
     {
       if (tree->type == SEL_TREE::IMPOSSIBLE)
       {
-	records=0L;				// Return -1 from this function
+	records=0L;			// Return -1 from this function
 	read_time= (double) HA_POS_ERROR;
       }
       else if (tree->type == SEL_TREE::KEY ||
                tree->type == SEL_TREE::KEY_SMALLER)
       {
-        /*
+        TABLE_READ_PLAN *best_trp;
+        /* 
           It is possible to use a quick select (but maybe it would be slower
           than 'all' table scan).
+          Btw, tree type SEL_TREE::INDEX_MERGE was not introduced 
+          intentionally.
         */
-	SEL_ARG **best_key= 0;
-	ha_rows found_records;
-	double found_read_time= read_time;
-
-        if (!get_quick_select_params(tree, &param, needed_reg, false,
-                                     &found_read_time, &found_records,
-                                     &best_key))
+        if (tree->merges.is_empty())
         {
+          double best_read_time= read_time;
+          TRP_ROR_INTERSECT *new_trp;
+          bool can_build_covering= false;
+        
+          /* Get best 'range' plan and prepare data for making other plans */
+          if ((best_trp= get_key_scans_params(&param, tree, false, 
+                                              best_read_time)))
+            best_read_time= best_trp->read_cost;
+          
           /* 
-            Ok, quick select is better than 'all' table scan and we have its 
-            parameters, so construct it.
+            Simultaneous key scans and row deletes on several handler
+            objects are not allowed so don't use ROR-intersection for 
+            table deletes.
           */
-          read_time= found_read_time;
-          records= found_records;
-
-          if ((quick= get_quick_select(&param,(uint) (best_key-tree->keys),
-                                       *best_key)) && (!quick->init()))
+          if (thd->lex->sql_command != SQLCOM_DELETE)
           {
-            quick->records= records;
-            quick->read_time= read_time;
+            /* 
+              Get best non-covering ROR-intersection plan and prepare data for 
+              building covering ROR-intersection
+            */
+            if ((new_trp= get_best_ror_intersect(&param, tree, false, 
+                                                 best_read_time, 
+                                                 &can_build_covering)))
+            {
+              best_trp= new_trp;
+              best_read_time= best_trp->read_cost;
+            }
+          
+            /* 
+              Try constructing covering ROR-intersect only if it looks possible 
+              and worth doing.
+            */
+            if (new_trp && !new_trp->is_covering && can_build_covering &&
+                (new_trp= get_best_covering_ror_intersect(&param, tree, 
+                                                          best_read_time)))
+              best_trp= new_trp;
           }
         }
-
-        /* 
-           Btw, tree type SEL_TREE::INDEX_MERGE was not introduced 
-           intentionally.
-        */
-
-        /* If no range select could be built, try using index_merge. */
-        if (!quick && !tree->merges.is_empty())
+        else
         {
+          /* Try creating index_merge/ROR-union scan. */
+          SEL_IMERGE *imerge;
+          TABLE_READ_PLAN *best_conj_trp= NULL, *new_conj_trp;
+          LINT_INIT(new_conj_trp); /* no empty index_merge lists possible */
           DBUG_PRINT("info",("No range reads possible,"
                              " trying to construct index_merge"));
-          SEL_IMERGE *imerge;
-          SEL_IMERGE *min_imerge= NULL;
-          double  min_imerge_read_time;
-          ha_rows min_imerge_records;
-	  LINT_INIT(min_imerge_records); // Protected by min_imerge
           
+          /* 
+            Calculate cost of 'all'+index_only scan if it is possible. 
+            It is possible that table can be read in two ways:
+             a) 'all' + index_only
+             b) index_merge without index_only.
+          */
           if (!head->used_keys.is_clear_all())
           {           
             int key_for_use= find_shortest_key(head, &head->used_keys);
@@ -1058,104 +1552,39 @@ int SQL_SELECT::test_quick_select(THD *thd, key_map keys_to_use,
             read_time = get_index_only_read_time(&param, total_table_records,
                                                  key_for_use);
             DBUG_PRINT("info", 
-                       ("'all' scan will be using key %d, read time %g",
-                       key_for_use, read_time));
+                       ("'all'+'using index' scan will be using key %d, "
+                        "read time %g", key_for_use, read_time));
           }
-          
-          min_imerge_read_time=read_time;
-          /* 
-            Ok, read_time contains best 'all' read time.
-            Now look for index_merge with cost < read_time 
-           */
+
           List_iterator_fast<SEL_IMERGE> it(tree->merges);
           while ((imerge= it++))
           {
-            if (!get_index_merge_params(&param, needed_reg, imerge, 
-                                        &min_imerge_read_time, 
-                                        &min_imerge_records))
-              min_imerge= imerge;
-          }
-          
-          if (!min_imerge)
-            goto end_free;
-          
-          records= min_imerge_records;
-                    
-          /* Ok, using index_merge is the best option, so construct it. */
-          if (!(quick= quick_imerge= new QUICK_INDEX_MERGE_SELECT(thd, head)))
-            goto end_free;
-
-          quick->records= min_imerge_records;
-          quick->read_time= min_imerge_read_time;
-          
-          my_pthread_setspecific_ptr(THR_MALLOC, &quick_imerge->alloc);
-
-          QUICK_RANGE_SELECT *new_quick;
-          for (SEL_TREE **ptree = min_imerge->trees;
-               ptree != min_imerge->trees_next;
-               ptree++)
-          {
-            SEL_ARG **tree_best_key= 
-              min_imerge->best_keys[ptree - min_imerge->trees];
-            if ((new_quick= get_quick_select(&param,
-                                             (uint)(tree_best_key-
-                                             (*ptree)->keys),
-                                             *tree_best_key,
-                                             &quick_imerge->alloc)))
-            {
-              new_quick->records= min_imerge_records;
-              new_quick->read_time= min_imerge_read_time;
-              /*
-                QUICK_RANGE_SELECT::QUICK_RANGE_SELECT leaves THR_MALLOC
-                pointing to its allocator, restore it back.
-              */
-              quick_imerge->last_quick_select= new_quick;
-
-              if (quick_imerge->push_quick_back(new_quick))
-              {
-                delete new_quick;
-                delete quick;
-                quick= quick_imerge= NULL;
-                goto end_free;
-              }
-            }
-            else
-            {
-              delete quick;
-              quick= quick_imerge= NULL;
-              goto end_free;
-            }
+            new_conj_trp= get_best_disjunct_quick(&param, imerge, read_time);
+            if (!best_conj_trp || (new_conj_trp && new_conj_trp->read_cost < 
+                                                   best_conj_trp->read_cost))
+              best_conj_trp= new_conj_trp;
           }
+          best_trp= best_conj_trp;
+        }
 
-          free_root(&alloc,MYF(0));
-          my_pthread_setspecific_ptr(THR_MALLOC,old_root);          
-          if (quick->init())
+        my_pthread_setspecific_ptr(THR_MALLOC, old_root);        
+        if (best_trp)
+        {
+          records= best_trp->records;
+          if (!(quick= best_trp->make_quick(&param, true)) || quick->init())
           {
             delete quick;
-            quick= quick_imerge= NULL;
-            DBUG_PRINT("error", 
-                       ("Failed to allocate index merge structures,"
-                       "falling back to full scan."));
+            quick= NULL;
           }
-          goto end;
-        }
+        }        
       }
     }
-end_free:
+    my_pthread_setspecific_ptr(THR_MALLOC, old_root);
     free_root(&alloc,MYF(0));			// Return memory & allocator
-    my_pthread_setspecific_ptr(THR_MALLOC,old_root);
-end:
     thd->no_errors=0;
   }
 
-  DBUG_EXECUTE("info",
-    {
-      if (quick_imerge)
-        print_quick_sel_imerge(quick_imerge, &needed_reg);
-      else
-        print_quick_sel_range((QUICK_RANGE_SELECT*)quick, &needed_reg);
-    }
-  );
+  DBUG_EXECUTE("info", print_quick(quick, &needed_reg););
 
   /*
     Assume that if the user is using 'limit' we will only need to scan
@@ -1165,27 +1594,73 @@ end:
 }
 
 
-/*
-  Calculate index merge cost and save parameters for its construction.
-
-  SYNOPSIS 
-    get_index_merge_params()
-      param       in      parameter with structure.
-      needed_reg  in/out  needed_reg from this SQL_SELECT
-      imerge      in      index_merge description structure
-      read_time   in/out  in:  cost of an existing way to read a table
-                          out: cost of index merge
-      imerge_rows out     pessimistic estimate of # of rows to be retrieved
-
+/* 
+  Get cost of 'sweep' full row retrieveal of #records rows.
   RETURN
-    0   Cost of this index_merge is less than passed *read_time, 
-        *imerge_rows and *read_time contain new index_merge parameters.
-    1   Cost of this index_merge is more than *read_time, 
-        *imerge_rows and *read_time are not modified.
-   -1   error
+    0 - ok 
+    1 - sweep is more expensive then full table scan.
+*/
 
-  NOTES
-    index_merge_cost = 
+bool get_sweep_read_cost(const PARAM *param, ha_rows records, double* cost,
+                          double index_reads_cost, double max_cost)
+{
+  if (param->table->file->primary_key_is_clustered())
+  {
+    *cost= param->table->file->read_time(param->table->primary_key, 
+                                         records, records);
+  }
+  else
+  {    
+    double n_blocks=
+      ceil((double)(longlong)param->table->file->data_file_length / IO_SIZE);
+    double busy_blocks=
+      n_blocks * (1.0 - pow(1.0 - 1.0/n_blocks, rows2double(records)));
+    if (busy_blocks < 1.0)
+      busy_blocks= 1.0;
+    DBUG_PRINT("info",("sweep: nblocks= %g, busy_blocks=%g index_blocks=%g",
+                       n_blocks, busy_blocks, index_reads_cost));
+    /*
+      Disabled: Bail out if # of blocks to read is bigger than # of blocks in 
+      table data file.
+    if (max_cost != DBL_MAX  && (busy_blocks+index_reads_cost) >= n_blocks)
+      return 1;
+    */
+    JOIN *join= param->thd->lex->select_lex.join;
+    if (!join || join->tables == 1)
+    {
+      /* No join, assume reading is done in one 'sweep' */
+      *cost= busy_blocks*(DISK_SEEK_BASE_COST + 
+                          DISK_SEEK_PROP_COST*n_blocks/busy_blocks);
+    }
+    else
+    {
+      /* 
+        Possibly this is a join with source table being non-last table, so
+        assume that disk seeks are random here.
+      */
+      *cost = busy_blocks;
+    }
+  }
+  DBUG_PRINT("info",("returning cost=%g", *cost));
+  return 0;
+}
+
+
+/*
+  Get best plan for a SEL_IMERGE disjunctive expression.
+  SYNOPSIS
+    get_best_disjunct_quick()
+      param
+      imerge
+      read_time Don't create scans with cost > read_time
+  RETURN 
+    read plan
+    NULL - OOM or no read scan could be built.
+  
+  NOTES
+
+    index_merge cost is calculated as follows:
+    index_merge_cost = 
       cost(index_reads) +         (see #1)
       cost(rowid_to_row_scan) +   (see #2)
       cost(unique_use)            (see #3)
@@ -1231,158 +1706,247 @@ end:
        (DISK_SEEK_BASE_COST + DISK_SEEK_PROP_COST*n_blocks/E(n_busy_blocks)).
 
     3. Cost of Unique use is calculated in Unique::get_use_cost function.
-*/
-
-static int get_index_merge_params(PARAM *param, key_map& needed_reg,
-                                  SEL_IMERGE *imerge, double *read_time,
-                                  ha_rows* imerge_rows)
-{
-  double  imerge_cost= 0.0; /* cost of this index_merge */
-  bool    imerge_too_expensive= false;
-  double  tree_read_time;
-  ha_rows tree_records;
-  bool    pk_is_clustered= param->table->file->primary_key_is_clustered();
-  bool    have_cpk_scan= false;
-  ha_rows records_for_unique= 0;
-  ha_rows cpk_records= 0;
   
-  DBUG_ENTER("get_index_merge_params");
-
-  /* allocate structs to save construction info */
-  imerge->best_keys=
-    (SEL_ARG***)alloc_root(param->mem_root,
-                           (imerge->trees_next - imerge->trees)*
-                           sizeof(void*));  
-  /*
-    PHASE 1: get the best keys to use for this index_merge 
-  */
+  ROR-union cost is calculated in the same way index_merge, but instead of 
+  Unique a priority queue is used. 
+     
+*/
 
+static
+TABLE_READ_PLAN *get_best_disjunct_quick(PARAM *param, SEL_IMERGE *imerge,
+                                           double read_time)
+{
+  SEL_TREE **ptree;
+  TRP_INDEX_MERGE *imerge_trp= NULL;
+  uint n_child_scans= imerge->trees_next - imerge->trees;
+  TRP_RANGE **range_scans;
+  TRP_RANGE **cur_child;
+  TRP_RANGE **cpk_scan= NULL;
+  bool imerge_too_expensive= false;
+  double imerge_cost= 0.0;
+  ha_rows cpk_scan_records= 0;
+  ha_rows non_cpk_scan_records= 0;
+  bool pk_is_clustered= param->table->file->primary_key_is_clustered();
+  bool all_scans_ror_able= true;
+  bool all_scans_rors= true;
+  uint unique_calc_buff_size;
+  TABLE_READ_PLAN **roru_read_plans;
+  TABLE_READ_PLAN **cur_roru_plan;
+  double roru_index_costs;
+  double blocks_in_index_read;
+  ha_rows roru_total_records;
+  double roru_intersect_part= 1.0;
+  double sweep_cost;
+  DBUG_ENTER("get_best_disjunct_quick");
+  DBUG_PRINT("info", ("Full table scan cost =%g", read_time));
+
+  if (!(range_scans= (TRP_RANGE**)alloc_root(param->mem_root, 
+                                             sizeof(TRP_RANGE*)*
+                                             n_child_scans)))
+    DBUG_RETURN(NULL);
   /*
-    It may be possible to use different keys for index_merge scans, e.g. for
-    query like 
-    ...WHERE (key1 < c2 AND key2 < c2) OR (key3 < c3 AND key4 < c4)
-    we have to make choice between key1 and key2 for one scan and between
-    key3, key4 for another.
-    We assume we'll get the best if we choose the best key read inside each
-    of the conjuncts.
+    Collect best 'range' scan for each of disjuncts, and, while doing so,
+    analyze possibility of ROR scans. Also calculate some values needed by
+    other parts of the code.
   */
-  for (SEL_TREE **ptree= imerge->trees;
+  for (ptree= imerge->trees, cur_child= range_scans;
        ptree != imerge->trees_next;
-       ptree++)
+       ptree++, cur_child++)
   {
-    SEL_ARG **tree_best_key;
-    tree_read_time= *read_time;
-
-    if (get_quick_select_params(*ptree, param, needed_reg, true,
-                                &tree_read_time, &tree_records,
-                                &tree_best_key))
+    DBUG_EXECUTE("info", print_sel_tree(param, *ptree, &(*ptree)->keys_map,
+                                        "tree in SEL_IMERGE"););
+    if (!(*cur_child= get_key_scans_params(param, *ptree, true, read_time)))
     {
       /*
         One of index scans in this index_merge is more expensive than entire
-        table read for another available option. The entire index_merge will 
-        be more expensive then, too. We continue here only to update 
-        SQL_SELECT members.
+        table read for another available option. The entire index_merge (and
+        any possible ROR-union) will be more expensive then, too. We continue
+        here only to update SQL_SELECT members.
       */
       imerge_too_expensive= true;
     }
-
     if (imerge_too_expensive)
       continue;
     
-    uint keynr= param->real_keynr[(tree_best_key-(*ptree)->keys)];
-    imerge->best_keys[ptree - imerge->trees]= tree_best_key;
-    imerge_cost += tree_read_time;
-
-    if (pk_is_clustered && keynr == param->table->primary_key)
+    imerge_cost += (*cur_child)->read_cost;
+    all_scans_ror_able &= ((*ptree)->n_ror_scans > 0);
+    all_scans_rors &= (*cur_child)->is_ror;
+    if (pk_is_clustered && 
+        param->real_keynr[(*cur_child)->key_idx] == param->table->primary_key)
     {
-      have_cpk_scan= true;
-      cpk_records= tree_records;
+      cpk_scan= cur_child;
+      cpk_scan_records= (*cur_child)->records;
     }
     else
-      records_for_unique += tree_records;
-  }  
-  DBUG_PRINT("info",("index_merge cost of index reads: %g", imerge_cost));
-
-  if (imerge_too_expensive)
-    DBUG_RETURN(1);
-  
-  if ((imerge_cost > *read_time) || 
-      ((records_for_unique + cpk_records) >= param->table->file->records) &&
-       *read_time != DBL_MAX)
-  {
-    /* Bail out if it is obvious that index_merge would be more expensive */
-    DBUG_RETURN(1);
+      non_cpk_scan_records += (*cur_child)->records;
   }
-
-  if (have_cpk_scan)
+  
+  DBUG_PRINT("info", ("index_merge scans cost=%g", imerge_cost));
+  if (imerge_too_expensive || (imerge_cost > read_time) || 
+      (non_cpk_scan_records+cpk_scan_records >= param->table->file->records) &&
+      read_time != DBL_MAX)
   {
     /* 
-      Add one ROWID comparison for each row retrieved on non-CPK scan.
-      (it is done in QUICK_RANGE_SELECT::row_in_ranges)
+      Bail out if it is obvious that both index_merge and ROR-union will be 
+      more expensive
     */
-    imerge_cost += records_for_unique / TIME_FOR_COMPARE_ROWID;
+    DBUG_PRINT("info", ("Sum of index_merge scans is more expensive than "
+                        "full table scan, bailing out"));
+    DBUG_RETURN(NULL); 
   }
-
-  /* PHASE 2: Calculate cost(rowid_to_row_scan) */
-  if (pk_is_clustered)
+  if (all_scans_rors)
   {
-    imerge_cost +=
-      param->table->file->read_time(param->table->primary_key,
-                                    records_for_unique,
-                                    records_for_unique)
-       + rows2double(records_for_unique) / TIME_FOR_COMPARE;
-  }
-  else
-  {    
-    double n_blocks=
-      ceil((double)(longlong)param->table->file->data_file_length / IO_SIZE);
-    double busy_blocks=
-       n_blocks * (1.0 - pow(1.0 - 1.0/n_blocks, (double) records_for_unique));
-    
-    JOIN *join= param->thd->lex->select_lex.join;
-    if (!join || join->tables == 1)
-      imerge_cost += busy_blocks*(DISK_SEEK_BASE_COST + 
-                                DISK_SEEK_PROP_COST*n_blocks/busy_blocks);
-    else
-    {
-      /* 
-        It can be a join with source table being non-last table, so assume
-        that disk seeks are random here.
-        (TODO it is possible to determine if this is a last table in 'index
-         checked for each record' join)
-      */
-      imerge_cost += busy_blocks;
-    }
+    roru_read_plans= (TABLE_READ_PLAN**)range_scans;
+    goto skip_to_ror_scan;
   }
-  DBUG_PRINT("info",("index_merge cost with rowid-to-row scan: %g", imerge_cost));
-
-  /* PHASE 3: Add Unique operations cost */
-  register uint unique_calc_buff_size= 
-    Unique::get_cost_calc_buff_size(records_for_unique, 
+  blocks_in_index_read= imerge_cost; 
+  if (cpk_scan) 
+  { 
+    /*
+      Add one ROWID comparison for each row retrieved on non-CPK scan.  (it
+      is done in QUICK_RANGE_SELECT::row_in_ranges) 
+     */ 
+    imerge_cost += non_cpk_scan_records / TIME_FOR_COMPARE_ROWID; 
+  }
+
+  /* Calculate cost(rowid_to_row_scan) */
+  if (get_sweep_read_cost(param, non_cpk_scan_records, &sweep_cost, 
+                           blocks_in_index_read, read_time))
+    goto build_ror_index_merge;
+  imerge_cost += sweep_cost;
+  DBUG_PRINT("info",("index_merge cost with rowid-to-row scan: %g", 
+                     imerge_cost));
+
+  /* Add Unique operations cost */
+  unique_calc_buff_size= 
+    Unique::get_cost_calc_buff_size(non_cpk_scan_records, 
                                     param->table->file->ref_length,
                                     param->thd->variables.sortbuff_size);
   if (param->imerge_cost_buff_size < unique_calc_buff_size)
   {
     if (!(param->imerge_cost_buff= (uint*)alloc_root(param->mem_root,
                                                      unique_calc_buff_size)))
-      DBUG_RETURN(1);
+      DBUG_RETURN(NULL);
     param->imerge_cost_buff_size= unique_calc_buff_size;
   }
 
   imerge_cost += 
-    Unique::get_use_cost(param->imerge_cost_buff, records_for_unique,
+    Unique::get_use_cost(param->imerge_cost_buff, non_cpk_scan_records,
                          param->table->file->ref_length,
                          param->thd->variables.sortbuff_size);
+  DBUG_PRINT("info",("index_merge total cost: %g (wanted: less then %g)", 
+                     imerge_cost, read_time));
+  if (imerge_cost < read_time)
+  {
+    if ((imerge_trp= new (param->mem_root)TRP_INDEX_MERGE))
+    {
+      imerge_trp->read_cost= imerge_cost;
+      imerge_trp->records= non_cpk_scan_records + cpk_scan_records;
+      imerge_trp->records= min(imerge_trp->records, 
+                               param->table->file->records);
+      imerge_trp->range_scans= range_scans;
+      imerge_trp->range_scans_end= range_scans + n_child_scans;
+      read_time= imerge_cost;
+    }
+  }
 
-  DBUG_PRINT("info",("index_merge total cost: %g", imerge_cost));
-  if (imerge_cost < *read_time)
+build_ror_index_merge:  
+  if (!all_scans_ror_able || param->thd->lex->sql_command == SQLCOM_DELETE)
+    DBUG_RETURN(imerge_trp);
+  
+  /* Ok, it is possible to build a ROR-union, try it. */
+  bool dummy;
+  if (!(roru_read_plans= 
+          (TABLE_READ_PLAN**)alloc_root(param->mem_root,
+                                        sizeof(TABLE_READ_PLAN*)*
+                                        n_child_scans)))
+    DBUG_RETURN(imerge_trp);
+skip_to_ror_scan:
+  roru_index_costs= 0.0;
+  roru_total_records= 0;
+  cur_roru_plan= roru_read_plans;
+
+  /* Find 'best' ROR scan for each of trees in disjunction */
+  for (ptree= imerge->trees, cur_child= range_scans;
+       ptree != imerge->trees_next;
+       ptree++, cur_child++, cur_roru_plan++)
   {
-    *read_time=   imerge_cost;
-    records_for_unique += cpk_records;
-    *imerge_rows= min(records_for_unique, param->table->file->records);    
-    DBUG_RETURN(0);
+    /*
+      Assume the best ROR scan is the one that has cheapest full-row-retrieval
+      scan cost. 
+      Also accumulate index_only scan costs as we'll need them to calculate 
+      overall index_intersection cost.
+    */
+    double cost;
+    if ((*cur_child)->is_ror)
+    {
+      /* Ok, we have index_only cost, now get full rows scan cost */
+      cost= param->table->file->
+              read_time(param->real_keynr[(*cur_child)->key_idx], 1, 
+                        (*cur_child)->records) +
+              rows2double((*cur_child)->records) / TIME_FOR_COMPARE;
+    }
+    else
+      cost= read_time;
+
+    TABLE_READ_PLAN *prev_plan= *cur_child;
+    if (!(*cur_roru_plan= get_best_ror_intersect(param, *ptree, false, cost,
+                                                 &dummy)))
+    {
+      if (prev_plan->is_ror)
+        *cur_roru_plan= prev_plan;
+      else
+        DBUG_RETURN(imerge_trp);
+      roru_index_costs += (*cur_roru_plan)->read_cost;
+    }
+    else
+      roru_index_costs += 
+        ((TRP_ROR_INTERSECT*)(*cur_roru_plan))->index_scan_costs;     
+    roru_total_records += (*cur_roru_plan)->records;
+    roru_intersect_part *= (*cur_roru_plan)->records / 
+                           param->table->file->records;
   }
-  DBUG_RETURN(1);
+
+  /* 
+    rows to retrieve= 
+      SUM(rows_in_scan_i) - table_rows * PROD(rows_in_scan_i / table_rows).
+    This is valid because index_merge constructuion guarantees that conditions
+    in disjunction do not share key parts.
+  */
+  roru_total_records -= (ha_rows)(roru_intersect_part*
+                                  param->table->file->records); 
+  /* ok, got a ROR read plan for each of the disjuncts 
+    Calculate cost: 
+    cost(index_union_scan(scan_1, ... scan_n)) =
+      SUM_i(cost_of_index_only_scan(scan_i)) +
+      queue_use_cost(rowid_len, n) +
+      cost_of_row_retrieval
+    See get_merge_buffers_cost function for queue_use_cost formula derivation.
+  */
+  
+  if (get_sweep_read_cost(param, roru_total_records, &sweep_cost, 
+                           roru_index_costs, read_time))
+    DBUG_RETURN(NULL);
+  double roru_total_cost;
+  roru_total_cost= roru_index_costs +  
+                   rows2double(roru_total_records)*log(n_child_scans) / 
+                   (TIME_FOR_COMPARE_ROWID * M_LN2) +
+                   sweep_cost;  
+  DBUG_PRINT("info", ("ROR-union: cost %g, %d members", roru_total_cost, 
+                      n_child_scans));
+  TRP_ROR_UNION* roru;
+  if (roru_total_cost < read_time)
+  {
+    if ((roru= new (param->mem_root) TRP_ROR_UNION))
+    {
+      roru->first_ror= roru_read_plans;
+      roru->last_ror= roru_read_plans + n_child_scans;
+      roru->read_cost= roru_total_cost;
+      roru->records= roru_total_records;
+      DBUG_RETURN(roru);
+    }
+  }
+  DBUG_RETURN(imerge_trp);
 }
 
 
@@ -1401,7 +1965,7 @@ static int get_index_merge_params(PARAM *param, key_map& needed_reg,
     key blocks are half full (normally things are much better).
 */
 
-inline double get_index_only_read_time(PARAM* param, ha_rows records, 
+inline double get_index_only_read_time(const PARAM* param, ha_rows records, 
                                        int keynr)
 {
   double read_time;
@@ -1413,39 +1977,693 @@ inline double get_index_only_read_time(PARAM* param, ha_rows records,
   return read_time; 
 }
 
+typedef struct st_ror_scan_info
+{
+  uint      idx;   /* # of used key in param->keys */
+  uint      keynr; /* # of used key in table */
+  ha_rows   records;
+  SEL_ARG   *sel_arg;
+  /* Fields used in the query and covered by this ROR scan */
+  MY_BITMAP covered_fields;  
+  uint      used_fields_covered;
+  int       key_rec_length; /* length of key record with rowid */
+
+  /*
+    Array of 
+     #rows(keypart_1=c1 AND ... AND key_part_i=c_i) / 
+     #rows(keypart_1=c1 AND ... AND key_part_{i+1}=c_{i+1}) values
+  */
+  double    *key_part_rows;
+  double    index_read_cost;
+  uint      first_uncovered_field;
+  uint      key_components;
+}ROR_SCAN_INFO;
+
+
+/*
+  Create ROR_SCAN_INFO* structure for condition sel_arg on key idx.
+  SYNOPSIS
+    make_ror_scan()
+      param
+      idx   index of key in param->keys
+  RETURN
+    NULL - OOM.
+*/
+
+static
+ROR_SCAN_INFO *make_ror_scan(const PARAM *param, int idx, SEL_ARG *sel_arg)
+{
+  ROR_SCAN_INFO *ror_scan;
+  uchar *bitmap_buf;
+  uint keynr;
+  DBUG_ENTER("make_ror_scan");
+  if (!(ror_scan= (ROR_SCAN_INFO*)alloc_root(param->mem_root,
+                                             sizeof(ROR_SCAN_INFO))))
+    DBUG_RETURN(NULL);
+
+  ror_scan->idx= idx;
+  ror_scan->keynr= keynr= param->real_keynr[idx];
+  ror_scan->key_rec_length= param->table->key_info[keynr].key_length + 
+                            param->table->file->ref_length;
+  ror_scan->sel_arg= sel_arg;
+  ror_scan->records= param->table->quick_rows[keynr];
+  
+  if (!(bitmap_buf= (uchar*)alloc_root(param->mem_root, 
+                                      param->fields_bitmap_size)))
+    DBUG_RETURN(NULL);
+  
+  if (bitmap_init(&ror_scan->covered_fields, bitmap_buf,
+                  param->fields_bitmap_size*8, false))
+    DBUG_RETURN(NULL);
+  bitmap_clear_all(&ror_scan->covered_fields);
+  if (!(ror_scan->key_part_rows= 
+        (double*)alloc_root(param->mem_root, sizeof(double)*
+                            param->table->key_info[keynr].key_parts)))
+    DBUG_RETURN(NULL);
+  
+  KEY_PART_INFO *key_part= param->table->key_info[keynr].key_part;
+  KEY_PART_INFO *key_part_end= key_part + 
+                               param->table->key_info[keynr].key_parts;
+  uint n_used_covered= 0;
+  for (;key_part != key_part_end; ++key_part)
+  {
+    if (bitmap_is_set(&param->needed_fields, key_part->fieldnr))
+    {
+      n_used_covered++;
+      bitmap_set_bit(&ror_scan->covered_fields, key_part->fieldnr);
+    }
+  }
+  ror_scan->index_read_cost= 
+    get_index_only_read_time(param, param->table->quick_rows[ror_scan->keynr],
+                             ror_scan->keynr);
+  /* 
+    Calculate # rows estimates for 
+      (key_part1=c1)
+      (key_part1=c1 AND key_part2=c2) 
+      ...and so on
+  */
+  char key_val[MAX_KEY_LENGTH+MAX_FIELD_WIDTH]; /* values of current key */
+  char *key_ptr= key_val;
+  ha_rows records;
+  ha_rows prev_records= param->table->file->records;
+  double *rows_diff= ror_scan->key_part_rows;
+  key_part= param->table->key_info[keynr].key_part;
+  SEL_ARG *arg= ror_scan->sel_arg;
+  /*
+    We have #rows estimate already for first key part, so do first loop 
+    iteration separately:
+  */
+  arg->store_min(key_part->length, &key_ptr, 0);
+  prev_records= param->table->quick_rows[ror_scan->keynr]; 
+  *(rows_diff++)= rows2double(prev_records) / param->table->file->records;
+  arg=arg->next_key_part; 
+  for(; arg; ++key_part, arg= arg->next_key_part)
+  {    
+    arg->store_min(key_part->length, &key_ptr, 0);
+    records= 
+      param->table->file->records_in_range(ror_scan->keynr,
+                                           (byte*)key_val, key_ptr - key_val,
+                                           HA_READ_KEY_EXACT,
+                                           (byte*)key_val, key_ptr - key_val,
+                                           HA_READ_AFTER_KEY);
+    if (records == HA_POS_ERROR)
+      return NULL; /* shouldn't happen actually */
+    *(rows_diff++)= rows2double(records) / rows2double(prev_records);
+    prev_records= records;
+  }
+  DBUG_RETURN(ror_scan);
+}
+
+
+/*
+  Order ROR_SCAN_INFO** by 
+    E(#records_matched) * key_record_length
+*/
+int cmp_ror_scan_info(ROR_SCAN_INFO** a, ROR_SCAN_INFO** b)
+{
+  double val1= rows2double((*a)->records) * (*a)->key_rec_length;
+  double val2= rows2double((*b)->records) * (*b)->key_rec_length;
+  return (val1 < val2)? -1: (val1 == val2)? 0 : 1;
+}
+
+/*
+  Order ROR_SCAN_INFO** by 
+   (#covered fields in F desc, 
+    #components asc,           
+    number of first not covered component asc)
+*/
+int cmp_ror_scan_info_covering(ROR_SCAN_INFO** a, ROR_SCAN_INFO** b)
+{
+  if ((*a)->used_fields_covered > (*b)->used_fields_covered)
+    return -1;
+  if ((*a)->used_fields_covered < (*b)->used_fields_covered)
+    return 1;
+  if ((*a)->key_components < (*b)->key_components)
+    return -1;
+  if ((*a)->key_components > (*b)->key_components)
+    return 1;
+  if ((*a)->first_uncovered_field < (*b)->first_uncovered_field)
+    return -1;
+  if ((*a)->first_uncovered_field > (*b)->first_uncovered_field)
+    return 1;
+  return 0;
+}
+
+/* Auxiliary structure for incremental ROR-intersection creation */
+typedef struct 
+{
+  const PARAM *param;
+  MY_BITMAP covered_fields; /* union of fields covered by all scans */
+  
+  /* true if covered_fields is a superset of needed_fields */
+  bool is_covering;  
+  
+  double index_scan_costs; /* SUM(cost of 'index-only' scans) */  
+  double total_cost;
+  /* 
+    Fraction of table records that satisfies conditions of all scans.
+    This is the number of full records that will be retrieved if a 
+    non-index_only index intersection will be employed.
+  */
+  double records_fract;
+  ha_rows index_records; /* sum(#records to look in indexes) */
+}ROR_INTERSECT_INFO;
+
+
+/* Allocate a ROR_INTERSECT and initialize it to contain zero scans */
+ROR_INTERSECT_INFO* ror_intersect_init(const PARAM *param, bool is_index_only)
+{
+  ROR_INTERSECT_INFO *info;
+  uchar* buf;
+  if (!(info= (ROR_INTERSECT_INFO*)alloc_root(param->mem_root, 
+                                              sizeof(ROR_INTERSECT_INFO))))
+    return NULL;
+  info->param= param;
+  info->is_covering= is_index_only;
+  info->index_scan_costs= 0.0f;
+  info->records_fract= 1.0f;
+
+  if (!(buf= (uchar*)alloc_root(param->mem_root, param->fields_bitmap_size)))
+    return NULL;
+  if (bitmap_init(&info->covered_fields, buf, param->fields_bitmap_size*8,
+                  false))
+    return NULL;
+  bitmap_clear_all(&info->covered_fields);
+  return info;
+}
+
+/*
+  Check if it makes sense to add a ROR scan to ROR-intersection, and if yes 
+  update parameters of ROR-intersection, including its cost.
+
+  RETURN
+    true   ROR scan added to ROR-intersection, cost updated.
+    false  It doesn't make sense to add this ROR scan to this ROR-intersection.
+  
+  NOTE 
+    Adding a ROR scan to ROR-intersect "makes sense" iff selectivt
+
+    Cost of ROR-intersection is calulated as follows:
+     cost= SUM_i(key_scan_cost_i) + cost_of_full_rows_retrieval
+ 
+    if (union of indexes used covers all needed fields)
+      cost_of_full_rows_retrieval= 0;
+    else
+    {
+      cost_of_full_rows_retrieval= 
+        cost_of_sweep_read(E(rows_to_retrive), rows_in_table);
+    }
+
+    E(rows_to_retrive) is calulated as follows: 
+    Suppose we have a condition on several keys
+    cond=k_11=c_11 AND k_12=c_12 AND ...  // parts of first key 
+         k_21=c_21 AND k_22=c_22 AND ...  // parts of second key 
+          ...
+         k_n1=c_n1 AND k_n3=c_n3 AND ...  (1)
+    
+    where k_ij may be the same as any k_pq (i.e. keys may have common parts).
+
+    A full row is retrieved iff entire cond holds.
+
+    The recursive procedure for finding P(cond) is as follows:
+    
+    First step:
+    Pick 1st part of 1st key and break conjunction (1) into two parts: 
+      cond= (k_11=c_11 AND R)
+
+    Here R may still contain condition(s) equivalent to k_11=c_11. 
+    Nevertheless, the following holds:
+
+      P(k_11=c_11 AND R) = P(k_11=c_11) * P(R|k_11=c_11). 
+
+    Mark k_11 as fixed field (and satisfied condition) F, save P(F),
+    save R to be cond and proceed to recursion step.
+
+    Recursion step:
+    We have set of fixed fields/satisfied conditions) F, probability P(F),
+    and remaining conjunction R
+    Pick next key part on current key and its condition "k_ij=c_ij".
+    We will add "k_ij=c_ij" into F and update P(F).
+    Lets denote k_ij as t,  R = t AND R1, where i1 may still contain t. Then
+
+     P((t AND R1)|F) = P(t|F) * P(R1|t|F) = P(t|F) * P(R|(t AND F)) (2)
+
+    (where '|' mean conditional probability, not "or")
+
+    Consider the first multiplier in (2). One of the following holds:
+    a) F contains condition on field used in t (i.e. t AND F = F).
+      Then P(t|F) = 1
+
+    b) F doesn't contain condition on field used in t. Then F and t are 
+     considered independent. 
+
+     P(t|F) = P(t|(fields_before_t_in_key AND other_fields)) = 
+          = P(t|fields_before_t_in_key).
+
+     P(t|fields_before_t_in_key)= #distinct_values(fields_before_t_in_key) / 
+                                  #distinct_values(fields_before_t_in_key, t)
+    
+    The second multiplier is calculated by applying this step recusively. 
+
+    This function applies recursion steps for all fixed key members of 
+    one key, accumulating sets of covered fields and 
+    The very first step described is done as recursion step with 
+    P(fixed fields)=1 and empty set of fixed fields.
+*/
+
+bool ror_intersect_add(ROR_INTERSECT_INFO *info, ROR_SCAN_INFO* ror_scan)
+{
+  int i;
+  SEL_ARG *sel_arg;
+  KEY_PART_INFO *key_part= 
+    info->param->table->key_info[ror_scan->keynr].key_part;
+  double selectivity_mult= 1.0;
+  DBUG_ENTER("ror_intersect_add");  
+  DBUG_PRINT("info", ("Current selectivity= %g", info->records_fract)); 
+  DBUG_PRINT("info", ("Adding scan on %s", 
+                      info->param->table->key_info[ror_scan->keynr].name));
+  for(i= 0, sel_arg= ror_scan->sel_arg; sel_arg; 
+      i++, sel_arg= sel_arg->next_key_part)
+  {
+    if (!bitmap_is_set(&info->covered_fields, (key_part + i)->fieldnr))
+    {
+      /*
+        P(t|F) = P(t|(fields_before_t_in_key AND other_fields)) =
+               = P(t|fields_before_t_in_key).
+      */
+      selectivity_mult *= ror_scan->key_part_rows[i];
+    }
+  }
+  if (selectivity_mult == 1.0)
+  {
+    /* Don't add this scan if it doesn't improve selectivity. */
+    DBUG_PRINT("info", ("This scan doesn't improve selectivity.")); 
+    DBUG_RETURN(false);
+  }
+  info->records_fract *= selectivity_mult;
+
+  bitmap_union(&info->covered_fields, &ror_scan->covered_fields);
+
+  ha_rows scan_records= info->param->table->quick_rows[ror_scan->keynr];
+  info->index_scan_costs += ror_scan->index_read_cost;
+
+  if (!info->is_covering && bitmap_is_subset(&info->param->needed_fields,
+                                             &info->covered_fields))
+  {
+    DBUG_PRINT("info", ("ROR-intersect is covering now"));
+    /* ROR-intersect became covering */ 
+    info->is_covering= true;
+  }
+  
+  info->index_records += scan_records;
+  info->total_cost= info->index_scan_costs;
+  if (!info->is_covering)
+  {
+    ha_rows table_recs= info->param->table->file->records;
+    double sweep_cost;
+    get_sweep_read_cost(info->param, 
+                         (ha_rows)(table_recs*info->records_fract), 
+                         &sweep_cost, info->index_scan_costs, DBL_MAX);
+
+    info->total_cost += sweep_cost;
+  }
+  DBUG_PRINT("info", ("New selectivity= %g", info->records_fract)); 
+  DBUG_PRINT("info", ("New cost= %g, %scovering", info->total_cost, 
+                      info->is_covering?"" : "non-"));
+  DBUG_RETURN(true);
+}
+
+/* 
+  Get best ROR-intersection plan using non-covering ROR-intersection search 
+  algorithm. The returned plan may be covering.
+
+  SYNOPSIS
+    get_best_ror_intersect()
+      param
+      tree
+      force_index_only If true, don't calculate costs of full rows retrieval.
+      read_time        Do not return read plans with cost > read_time.
+      are_all_covering [out] set to true if union of all scans covers all 
+                       fields needed by the query (and it is possible to build
+                       a covering ROR-intersection)
+  RETURN
+    ROR-intersection table read plan 
+    NULL if OOM or no plan found.
+
+  NOTE
+    get_key_scans_params must be called before for the same SEL_TREE before 
+    this function can be called.
+    
+    The approximate best non-covering plan search algorithm is as follows:
+
+    find_min_ror_intersection_scan()
+    {
+      R= select all ROR scans;
+      order R by (E(#records_matched) * key_record_length).
+  
+      S= first(R); -- set of scans that will be used for ROR-intersection
+      R= R-first(S);
+      min_cost= cost(S);
+      min_scan= make_scan(S);
+      while (R is not empty)
+      {
+        if (!selectivity(S + first(R) < selectivity(S)))
+          continue;
+
+        S= S + first(R);
+        R= R - first(R);
+        if (cost(S) < min_cost)
+        {
+          min_cost= cost(S);
+          min_scan= make_scan(S);
+        }
+      }
+      return min_scan;
+    }
+
+    See ror_intersect_add function for ROR intersection costs.
+
+    Special handling for Clustered PK scans
+    Clustered PK contains all table fields, so using it as a regular scan in 
+    index intersection doesn't make sense: a range scan on CPK will be less 
+    expensive in this case.
+    Clustered PK scan has special handling in ROR-intersection: it is not used
+    to retrieve rows, instead its condition is used to filter row references 
+    we get from scans on other keys.
+*/
+
+static
+TRP_ROR_INTERSECT *get_best_ror_intersect(const PARAM *param, SEL_TREE *tree,
+                                          bool force_index_only, 
+                                          double read_time,
+                                          bool *are_all_covering)
+{
+  uint idx;
+  double min_cost= read_time;
+  DBUG_ENTER("get_best_ror_intersect");
+
+  if (tree->n_ror_scans < 2)
+    DBUG_RETURN(NULL);
+
+  /* Collect ROR-able SEL_ARGs and create ROR_SCAN_INFO for each of them */
+  ROR_SCAN_INFO **cur_ror_scan;
+  if (!(tree->ror_scans= (ROR_SCAN_INFO**)alloc_root(param->mem_root,
+                                                     sizeof(ROR_SCAN_INFO*)*
+                                                     param->keys)))
+    return NULL;
+  
+  for (idx= 0, cur_ror_scan= tree->ror_scans; idx < param->keys; idx++)
+  {
+    if (!tree->ror_scans_map.is_set(idx))
+      continue;
+    if (!(*cur_ror_scan= make_ror_scan(param, idx, tree->keys[idx])))
+      return NULL;
+    cur_ror_scan++;
+  }
+
+  tree->ror_scans_end= cur_ror_scan;
+  DBUG_EXECUTE("info",print_ror_scans_arr(param->table, "original", 
+                                          tree->ror_scans, 
+                                          tree->ror_scans_end););
+  /*
+    Ok, [ror_scans, ror_scans_end) is array of ptrs to initialized 
+    ROR_SCAN_INFOs.
+    Get a minimal key scan using an approximate algorithm.
+  */
+  qsort(tree->ror_scans, tree->n_ror_scans, sizeof(ROR_SCAN_INFO*),
+        (qsort_cmp)cmp_ror_scan_info);
+  DBUG_EXECUTE("info",print_ror_scans_arr(param->table, "ordered", 
+                                          tree->ror_scans, 
+                                          tree->ror_scans_end););
+  
+  ROR_SCAN_INFO **intersect_scans; /* ROR scans used in index intersection */
+  ROR_SCAN_INFO **intersect_scans_end;
+  if (!(intersect_scans= (ROR_SCAN_INFO**)alloc_root(param->mem_root,
+                                                     sizeof(ROR_SCAN_INFO*)*
+                                                     tree->n_ror_scans)))
+    return NULL;
+  intersect_scans_end= intersect_scans;
+
+  /* Create and incrementally update ROR intersection. */
+  ROR_INTERSECT_INFO *intersect;
+  if (!(intersect= ror_intersect_init(param, false)))
+    return NULL;
+  
+  /* [intersect_scans, intersect_scans_best) will hold the best combination */
+  ROR_SCAN_INFO **intersect_scans_best= NULL; 
+  ha_rows       best_rows;
+  bool          is_best_covering;
+  double        best_index_scan_costs;
+  LINT_INIT(best_rows);       /* protected by intersect_scans_best */
+  LINT_INIT(is_best_covering);
+  LINT_INIT(best_index_scan_costs);
+
+  cur_ror_scan= tree->ror_scans;
+  while (cur_ror_scan != tree->ror_scans_end && !intersect->is_covering)
+  {
+    /* S= S + first(R); */
+    if (ror_intersect_add(intersect, *cur_ror_scan))
+      *(intersect_scans_end++)= *cur_ror_scan; 
+    /* R= R-first(R); */
+    cur_ror_scan++;
+    
+    if (intersect->total_cost < min_cost)
+    {
+      /* Local minimum found, save it */
+      min_cost= intersect->total_cost;
+      best_rows= (ha_rows)(intersect->records_fract* 
+                           rows2double(param->table->file->records));
+      is_best_covering= intersect->is_covering;
+      intersect_scans_best= intersect_scans_end;
+      best_index_scan_costs= intersect->index_scan_costs;
+    }
+  }
+
+  /* Ok, return ROR-intersect plan if we have found one */
+  *are_all_covering= intersect->is_covering;
+  uint best_num= intersect_scans_best - intersect_scans;  
+  TRP_ROR_INTERSECT *trp= NULL;
+  if (intersect_scans_best && best_num > 1)
+  {
+    DBUG_EXECUTE("info",print_ror_scans_arr(param->table,
+                                            "used for ROR-intersect",
+                                            intersect_scans,
+                                            intersect_scans_best););
+    if (!(trp= new (param->mem_root) TRP_ROR_INTERSECT))
+      DBUG_RETURN(trp);
+    if (!(trp->first_scan= 
+           (ROR_SCAN_INFO**)alloc_root(param->mem_root, 
+                                       sizeof(ROR_SCAN_INFO*)*best_num)))
+      DBUG_RETURN(NULL);
+    memcpy(trp->first_scan, intersect_scans, best_num*sizeof(ROR_SCAN_INFO*));
+    trp->last_scan=  trp->first_scan + best_num;
+    trp->is_covering= is_best_covering;
+    trp->read_cost= min_cost;
+    trp->records= best_rows? best_rows : 1;
+    trp->index_scan_costs= best_index_scan_costs;
+  }  
+  DBUG_RETURN(trp);
+}
+
 
 /*
-  Calculate quick range select read time, # of records, and best key to use
-  without constructing QUICK_RANGE_SELECT object.
+  Get best covering ROR-intersection.
+  SYNOPSIS
+    get_best_covering_ror_intersect()
+      param
+      tree      SEL_TREE
+      read_time Dont return table read plans with cost > read_time.
+
+  RETURN 
+    Best covering ROR-intersection plan 
+    NULL if no plan found.
+
+  NOTE
+    get_best_ror_intersect must be called for a tree before calling this
+    function for it. 
+    This function invalidates tree->ror_scans member values.
+  
+  The following approximate algorithm is used:
+    I=set of all covering indexes
+    F=set of all fields to cover
+    S={}
+
+    do {
+      Order I by (#covered fields in F desc,
+                  #components asc,
+                  number of first not covered component asc);
+      F=F-covered by first(I);
+      S=S+first(I);
+      I=I-first(I);
+    } while F is not empty.
+*/
+
+static
+TRP_ROR_INTERSECT *get_best_covering_ror_intersect(PARAM *param, 
+                                                   SEL_TREE *tree, 
+                                                   double read_time)
+{
+  ROR_SCAN_INFO **ror_scan_mark;
+  ROR_SCAN_INFO **ror_scans_end= tree->ror_scans_end;  
+  DBUG_ENTER("get_best_covering_ror_intersect");
+  uint nbits= param->fields_bitmap_size*8;
+
+  for (ROR_SCAN_INFO **scan= tree->ror_scans; scan != ror_scans_end; ++scan)
+    (*scan)->key_components= 
+      param->table->key_info[(*scan)->keynr].key_parts;
+    
+  /*
+    Run covering-ROR-search algorithm.
+    Assume set I is [ror_scan .. ror_scans_end) 
+  */
+  
+  /*I=set of all covering indexes */
+  ror_scan_mark= tree->ror_scans;
+  
+  uchar buf[MAX_KEY/8+1];
+  MY_BITMAP covered_fields;
+  if (bitmap_init(&covered_fields, buf, nbits, false))
+    DBUG_RETURN(0);
+  bitmap_clear_all(&covered_fields);
+
+  double total_cost= 0.0f;
+  ha_rows records=0;
+  bool all_covered;  
+  
+  /* Start will all scans and remove one by one until */
+  DBUG_PRINT("info", ("Building covering ROR-intersection"));
+  DBUG_EXECUTE("info", print_ror_scans_arr(param->table,
+                                           "building covering ROR-I",
+                                           ror_scan_mark, ror_scans_end););
+  do {
+    /*
+      Update changed sorting info: 
+        #covered fields,
+	number of first not covered component 
+      Calculate and save these values for each of remaining scans.
+    */
+    for (ROR_SCAN_INFO **scan= ror_scan_mark; scan != ror_scans_end; ++scan)
+    {
+      bitmap_subtract(&(*scan)->covered_fields, &covered_fields);
+      (*scan)->used_fields_covered= 
+        bitmap_bits_set(&(*scan)->covered_fields);
+      (*scan)->first_uncovered_field= 
+        bitmap_get_first(&(*scan)->covered_fields);
+    }
+
+    qsort(ror_scan_mark, ror_scans_end-ror_scan_mark, sizeof(ROR_SCAN_INFO*),
+          (qsort_cmp)cmp_ror_scan_info_covering);
+
+    DBUG_EXECUTE("info", print_ror_scans_arr(param->table,
+                                             "remaining scans",
+                                             ror_scan_mark, ror_scans_end););
+    
+    /* I=I-first(I) */
+    total_cost += (*ror_scan_mark)->index_read_cost;
+    records += (*ror_scan_mark)->records;
+    DBUG_PRINT("info", ("Adding scan on %s", 
+                        param->table->key_info[(*ror_scan_mark)->keynr].name));
+    if (total_cost > read_time)
+      DBUG_RETURN(NULL);
+    /* F=F-covered by first(I) */
+    bitmap_union(&covered_fields, &(*ror_scan_mark)->covered_fields);
+    all_covered= bitmap_is_subset(&param->needed_fields, &covered_fields);
+  } while (!all_covered && (++ror_scan_mark < ror_scans_end));
+  
+  if (!all_covered)
+    DBUG_RETURN(NULL); /* should not happen actually */
+
+  /*
+    Ok, [tree->ror_scans .. ror_scan) holds covering index_intersection with
+    cost total_cost.
+  */
+  DBUG_PRINT("info", ("Covering ROR-intersect scans cost: %g", total_cost));
+  DBUG_EXECUTE("info", print_ror_scans_arr(param->table,
+                                           "creating covering ROR-intersect",
+                                           tree->ror_scans, ror_scan_mark););
+  
+  /* Add priority queue use cost. */
+  total_cost += rows2double(records)*log(ror_scan_mark - tree->ror_scans) / 
+                (TIME_FOR_COMPARE_ROWID * M_LN2);
+  DBUG_PRINT("info", ("Covering ROR-intersect full cost: %g", total_cost));
+
+  if (total_cost > read_time)
+    DBUG_RETURN(NULL);
+
+  TRP_ROR_INTERSECT *trp;
+  if (!(trp= new (param->mem_root) TRP_ROR_INTERSECT))
+    DBUG_RETURN(trp);
+  uint best_num= (ror_scan_mark - tree->ror_scans);
+  if (!(trp->first_scan= (ROR_SCAN_INFO**)alloc_root(param->mem_root,
+                                                     sizeof(ROR_SCAN_INFO*)*
+                                                     best_num)))
+    DBUG_RETURN(NULL);
+  memcpy(trp->first_scan, ror_scan_mark, best_num*sizeof(ROR_SCAN_INFO*));
+  trp->last_scan=  trp->first_scan + best_num;
+  trp->is_covering= true;
+  trp->read_cost= total_cost;
+  trp->records= records;
+
+  DBUG_RETURN(trp);
+}
+
+
+/*
+  Get best "range" table read plan for given SEL_TREE. 
+  Also update PARAM members and store ROR scans info in the SEL_TREE.
   SYNOPSIS
     get_quick_select_params
-      tree        in         make range select for this SEL_TREE 
-      param       in         parameters from test_quick_select
-      needed_reg  in/out     other table data needed by this quick_select
-      index_read_must_be_used if true, assume 'index only' option will be set 
+      param        parameters from test_quick_select
+      tree         make range select for this SEL_TREE 
+      index_read_must_be_used if true, assume 'index only' option will be set
                              (except for clustered PK indexes)
-      read_time   out        read time estimate
-      records     out        # of records estimate
-      key_to_read out        SEL_ARG to be used for creating quick select
+      read_time    don't create read plans with cost > read_time.
+  RETURN
+    Best range read plan 
+    NULL if no plan found or error occurred
 */
 
-static int get_quick_select_params(SEL_TREE *tree, PARAM *param,
-                                   key_map& needed_reg,
-                                   bool index_read_must_be_used,
-                                   double *read_time, ha_rows *records,
-                                   SEL_ARG ***key_to_read)
+static TRP_RANGE *get_key_scans_params(PARAM *param, SEL_TREE *tree,
+                                       bool index_read_must_be_used, 
+                                       double read_time)
 {
   int idx;
-  int result = 1;
+  SEL_ARG **key,**end, **key_to_read= NULL;
+  ha_rows best_records;
+  TRP_RANGE* read_plan= NULL;
   bool pk_is_clustered= param->table->file->primary_key_is_clustered();
+  DBUG_ENTER("get_key_scans_params");
+  LINT_INIT(best_records); /* protected by key_to_read */
   /*
     Note that there may be trees that have type SEL_TREE::KEY but contain no 
     key reads at all, e.g. tree for expression "key1 is not null" where key1 
     is defined as "not null".
-  */
-  SEL_ARG **key,**end;
-
-  for (idx= 0,key=tree->keys, end=key+param->keys ;
+  */  
+  DBUG_EXECUTE("info", print_sel_tree(param, tree, &tree->keys_map, 
+                                      "tree scans"););
+  tree->ror_scans_map.clear_all();
+  tree->n_ror_scans= 0;
+  for (idx= 0,key=tree->keys, end=key+param->keys;
        key != end ;
        key++,idx++)
   {
@@ -1456,43 +2674,157 @@ static int get_quick_select_params(SEL_TREE *tree, PARAM *param,
       uint keynr= param->real_keynr[idx];
       if ((*key)->type == SEL_ARG::MAYBE_KEY ||
           (*key)->maybe_flag)
-        needed_reg.set_bit(keynr);
+        param->needed_reg->set_bit(keynr);
       
       bool read_index_only= index_read_must_be_used? true :
                             (bool)param->table->used_keys.is_set(keynr);
-      found_records=check_quick_select(param, idx, *key);
 
-      if (found_records != HA_POS_ERROR && found_records > 2 &&
-          read_index_only &&
-          (param->table->file->index_flags(keynr) & HA_KEY_READ_ONLY) &&
-          !(pk_is_clustered && keynr == param->table->primary_key))
-      {
-        /* We can resolve this by only reading through this key. */
-        found_read_time=get_index_only_read_time(param, found_records, keynr);
-      }
-      else
-      {
-        /* 
-          cost(read_through_index) = cost(disk_io) + cost(row_in_range_checks)
-          The row_in_range check is in QUICK_RANGE_SELECT::cmp_next function.
-        */
-	found_read_time= (param->table->file->read_time(keynr,
-						        param->range_count,
-						        found_records)+
-			  (double) found_records / TIME_FOR_COMPARE);
-      }
-      if (*read_time > found_read_time && found_records != HA_POS_ERROR)
+      found_records= check_quick_select(param, idx, *key);
+      if (param->is_ror_scan)
+      {
+        tree->n_ror_scans++;
+        tree->ror_scans_map.set_bit(idx);
+      }
+      if (found_records != HA_POS_ERROR && found_records > 2 &&
+          read_index_only &&
+          (param->table->file->index_flags(keynr) & HA_KEY_READ_ONLY) &&
+          !(pk_is_clustered && keynr == param->table->primary_key))
+      {
+        /* We can resolve this by only reading through this key. */
+        found_read_time= get_index_only_read_time(param,found_records,keynr);
+      }
+      else
+      {
+        /* 
+          cost(read_through_index) = cost(disk_io) + cost(row_in_range_checks)
+          The row_in_range check is in QUICK_RANGE_SELECT::cmp_next function.
+        */
+	found_read_time= (param->table->file->read_time(keynr,
+						        param->range_count,
+						        found_records)+
+			  (double) found_records / TIME_FOR_COMPARE);
+      }
+      if (read_time > found_read_time && found_records != HA_POS_ERROR
+          /*|| read_time == DBL_MAX*/ )
+      {        
+        read_time=    found_read_time;
+        best_records= found_records; 
+        key_to_read=  key;
+      }
+
+    }
+  }
+
+  DBUG_EXECUTE("info", print_sel_tree(param, tree, &tree->ror_scans_map,
+                                      "ROR scans"););
+  if (key_to_read)
+  {
+    idx= key_to_read - tree->keys;
+    if ((read_plan= new (param->mem_root) TRP_RANGE(*key_to_read, idx)))
+    {
+      read_plan->records= best_records;
+      read_plan->is_ror= tree->ror_scans_map.is_set(idx);
+      read_plan->read_cost= read_time;
+      DBUG_PRINT("info",("Returning range plan for key %s, cost %g", 
+                         param->table->key_info[param->real_keynr[idx]].name,
+                         read_plan->read_cost));
+    }
+  }
+  else
+    DBUG_PRINT("info", ("No 'range' table read plan found"));
+
+  DBUG_RETURN(read_plan);
+}
+
+
+QUICK_SELECT_I *TRP_INDEX_MERGE::make_quick(PARAM *param, 
+                                            bool retrieve_full_rows,
+                                            MEM_ROOT *parent_alloc)
+{
+  QUICK_INDEX_MERGE_SELECT *quick_imerge;
+  QUICK_RANGE_SELECT *quick;
+  /* index_merge always retrieves full rows, ignore retrieve_full_rows */
+  if (!(quick_imerge= new QUICK_INDEX_MERGE_SELECT(param->thd, param->table)))
+    return NULL;
+
+  quick_imerge->records= records;
+  quick_imerge->read_time= read_cost;
+  for(TRP_RANGE **range_scan= range_scans; range_scan != range_scans_end;
+      range_scan++)
+  {
+    if (!(quick= (QUICK_RANGE_SELECT*)
+           ((*range_scan)->make_quick(param, false, &quick_imerge->alloc)))||
+        quick_imerge->push_quick_back(quick))
+    {
+      delete quick;
+      delete quick_imerge;
+      return NULL;
+    }
+  }
+  return quick_imerge;
+}
+
+QUICK_SELECT_I *TRP_ROR_INTERSECT::make_quick(PARAM *param, 
+                                              bool retrieve_full_rows,
+                                              MEM_ROOT *parent_alloc)
+{
+  QUICK_ROR_INTERSECT_SELECT *quick_intrsect;
+  QUICK_RANGE_SELECT *quick;
+  DBUG_ENTER("TRP_ROR_INTERSECT::make_quick");
+  MEM_ROOT *alloc;
+  
+  if ((quick_intrsect= 
+         new QUICK_ROR_INTERSECT_SELECT(param->thd, param->table,
+                                        retrieve_full_rows? (!is_covering):false,
+                                        parent_alloc)))
+  {
+    DBUG_EXECUTE("info", print_ror_scans_arr(param->table, 
+                                             "creating ROR-intersect",
+                                             first_scan, last_scan););
+    alloc= parent_alloc? parent_alloc: &quick_intrsect->alloc;
+    for(; first_scan != last_scan;++first_scan)
+    {
+      if (!(quick= get_quick_select(param, (*first_scan)->idx,
+                                    (*first_scan)->sel_arg, alloc)) ||
+          quick_intrsect->push_quick_back(quick))
       {
-        *read_time=   found_read_time;
-        *records=     found_records; 
-        *key_to_read= key;
-        result = 0;
+        delete quick_intrsect;
+        DBUG_RETURN(NULL);
       }
     }
+    quick_intrsect->records= records; 
+    quick_intrsect->read_time= read_cost;
+  }
+  DBUG_RETURN(quick_intrsect);
+}
+
+QUICK_SELECT_I *TRP_ROR_UNION::make_quick(PARAM *param, 
+                                          bool retrieve_full_rows,
+                                          MEM_ROOT *parent_alloc)
+{
+  QUICK_ROR_UNION_SELECT *quick_roru;
+  TABLE_READ_PLAN **scan;
+  QUICK_SELECT_I *quick;
+  DBUG_ENTER("TRP_ROR_UNION::make_quick");
+  /* 
+    It is currently impossible to construct a ROR-union that will 
+    not retrieve full rows, ingore retrieve_full_rows.
+  */
+  if ((quick_roru= new QUICK_ROR_UNION_SELECT(param->thd, param->table)))
+  {
+    for(scan= first_ror; scan != last_ror; scan++)
+    {
+      if (!(quick= (*scan)->make_quick(param, false, &quick_roru->alloc)) || 
+          quick_roru->push_quick_back(quick))
+        DBUG_RETURN(NULL);
+    }
+    quick_roru->records= records;
+    quick_roru->read_time= read_cost;
   }
-  return result;
+  DBUG_RETURN(quick_roru);
 }
 
+/****************************************************************************/
 	/* make a select tree of all keys in condition */
 
 static SEL_TREE *get_mm_tree(PARAM *param,COND *cond)
@@ -2975,34 +4307,88 @@ void SEL_ARG::test_use_count(SEL_ARG *root)
 
 /*****************************************************************************
 ** Check how many records we will find by using the found tree
+** NOTE
+**  param->table->quick_* and param->range_count (and maybe others) are 
+**  updated with data of given key scan.
 *****************************************************************************/
 
 static ha_rows
 check_quick_select(PARAM *param,uint idx,SEL_ARG *tree)
 {
   ha_rows records;
+  bool    cpk_scan;
+  uint key;
   DBUG_ENTER("check_quick_select");
 
   if (!tree)
     DBUG_RETURN(HA_POS_ERROR);			// Can't use it
   param->max_key_part=0;
   param->range_count=0;
+  key= param->real_keynr[idx];
+
   if (tree->type == SEL_ARG::IMPOSSIBLE)
     DBUG_RETURN(0L);				// Impossible select. return
   if (tree->type != SEL_ARG::KEY_RANGE || tree->part != 0)
     DBUG_RETURN(HA_POS_ERROR);				// Don't use tree
+
+  enum ha_key_alg key_alg= param->table->key_info[key].algorithm;
+  if ((key_alg != HA_KEY_ALG_BTREE) && (key_alg!= HA_KEY_ALG_UNDEF))
+  {
+    /* Records are not ordered by rowid for other types of indexes. */
+    param->is_ror_scan= false;
+    cpk_scan= false;
+  }
+  else
+  {
+    /*
+      Clustered PK scan is a special case, check_quick_keys doesn't recognize
+      CPK scans as ROR scans (while actually any CPK scan is a ROR scan).
+    */
+    cpk_scan= (param->table->primary_key == param->real_keynr[idx]) &&
+              param->table->file->primary_key_is_clustered();
+    param->is_ror_scan= !cpk_scan; 
+  }
+
   records=check_quick_keys(param,idx,tree,param->min_key,0,param->max_key,0);
   if (records != HA_POS_ERROR)
-  {
-    uint key=param->real_keynr[idx];
+  {    
     param->table->quick_keys.set_bit(key);
     param->table->quick_rows[key]=records;
     param->table->quick_key_parts[key]=param->max_key_part+1;
+    
+    if (cpk_scan)
+      param->is_ror_scan= true;
   }
   DBUG_RETURN(records);
 }
 
 
+/* 
+  SYNOPSIS
+    check_quick_keys()
+      param
+      idx          key to use, its number in list of used keys (that is, 
+                   param->real_keynr[idx] holds the key number in table)
+      
+      key_tree     SEL_ARG tree which cost is calculated.
+      min_key      buffer with min key value tuple
+      min_key_flag 
+      max_key      buffer with max key value tuple
+      max_key_flag
+
+  NOTE
+    The function does the recursive descent on the tree via left, right, and
+    next_key_part edges. The #rows estimates are calculated at the leaf nodes.
+
+    param->min_key and param->max_key are used to hold key segment values.
+
+    The side effects are:
+    param->max_key_part is updated to hold the maximum number of key parts used
+      in scan minus 1.
+    param->range_count is updated.
+    param->is_ror_scan is updated.
+*/
+
 static ha_rows
 check_quick_keys(PARAM *param,uint idx,SEL_ARG *key_tree,
 		 char *min_key,uint min_key_flag, char *max_key,
@@ -3013,6 +4399,7 @@ check_quick_keys(PARAM *param,uint idx,SEL_ARG *key_tree,
   param->max_key_part=max(param->max_key_part,key_tree->part);
   if (key_tree->left != &null_element)
   {
+    param->is_ror_scan= false;
     records=check_quick_keys(param,idx,key_tree->left,min_key,min_key_flag,
 			     max_key,max_key_flag);
     if (records == HA_POS_ERROR)			// Impossible
@@ -3040,6 +4427,9 @@ check_quick_keys(PARAM *param,uint idx,SEL_ARG *key_tree,
 			   tmp_max_key, max_key_flag | key_tree->max_flag);
       goto end;					// Ugly, but efficient
     }
+    else
+      param->is_ror_scan= false;
+
     tmp_min_flag=key_tree->min_flag;
     tmp_max_flag=key_tree->max_flag;
     if (!tmp_min_flag)
@@ -3068,6 +4458,15 @@ check_quick_keys(PARAM *param,uint idx,SEL_ARG *key_tree,
     tmp=1;					// Max one record
   else
   {
+    if (param->is_ror_scan)
+    {
+      if (!(min_key_length == max_key_length &&
+            !memcmp(min_key,max_key, (uint) (tmp_max_key - max_key)) &&
+            !key_tree->min_flag && !key_tree->max_flag && 
+            is_key_scan_ror(param, keynr, key_tree->part + 1)))
+        param->is_ror_scan= false;
+    }
+
     if (tmp_min_flag & GEOM_FLAG)
     {
       tmp= param->table->file->
@@ -3098,6 +4497,7 @@ check_quick_keys(PARAM *param,uint idx,SEL_ARG *key_tree,
   records+=tmp;
   if (key_tree->right != &null_element)
   {
+    param->is_ror_scan= false;
     tmp=check_quick_keys(param,idx,key_tree->right,min_key,min_key_flag,
 			 max_key,max_key_flag);
     if (tmp == HA_POS_ERROR)
@@ -3107,12 +4507,50 @@ check_quick_keys(PARAM *param,uint idx,SEL_ARG *key_tree,
   return records;
 }
 
+/*
+  Check if key scan on key keynr with first nparts key parts fixed is a
+  ROR scan. This function doesn't handle clustered PK scans or HASH index 
+  scans.
+*/
+static bool is_key_scan_ror(PARAM *param, uint keynr, uint8 nparts)
+{
+  KEY *table_key= param->table->key_info + keynr;
+  KEY_PART_INFO *key_part= table_key->key_part + nparts;
+  KEY_PART_INFO *key_part_end= table_key->key_part + 
+                               table_key->key_parts;
+  
+  if (key_part == key_part_end)
+    return true;
+  uint pk_number= param->table->primary_key;
+  if (!param->table->file->primary_key_is_clustered() || pk_number == MAX_KEY)
+    return false;
+
+  KEY_PART_INFO *pk_part= param->table->key_info[pk_number].key_part;
+  KEY_PART_INFO *pk_part_end= pk_part + 
+                              param->table->key_info[pk_number].key_parts;
+  for(;(key_part!=key_part_end) && (pk_part != pk_part_end); 
+      ++key_part, ++pk_part)
+  {
+    if (key_part->field != pk_part->field)
+      return false; 
+  }
+  return (key_part == key_part_end);
+}
 
-/****************************************************************************
-** change a tree to a structure to be used by quick_select
-** This uses it's own malloc tree
-** The caller should call QUICK_SELCT::init for returned quick select
-****************************************************************************/
+
+/*
+  Create a QUICK_RANGE_SELECT from given key and SEL_ARG tree for that key.
+  This uses it's own malloc tree.
+  SYNOPSIS
+    get_quick_select()
+      param 
+      idx          index of used key in param->key.
+      key_tree     SEL_ARG tree for the used key 
+      parent_alloc if not NULL, use it to allocate memory for 
+                   quick select data. Otherwise use quick->alloc.
+
+  The caller should call QUICK_SELCT::init for returned quick select
+*/
 QUICK_RANGE_SELECT *
 get_quick_select(PARAM *param,uint idx,SEL_ARG *key_tree,
                  MEM_ROOT *parent_alloc)
@@ -3292,6 +4730,47 @@ static bool null_part_in_key(KEY_PART *key_part, const char *key, uint length)
   return 0;
 }
 
+bool QUICK_SELECT_I::check_if_keys_used(List<Item> *fields)
+{
+  return check_if_key_used(head, index, *fields); 
+}
+
+bool QUICK_INDEX_MERGE_SELECT::check_if_keys_used(List<Item> *fields)
+{
+  QUICK_RANGE_SELECT *quick;
+  List_iterator_fast<QUICK_RANGE_SELECT> it(quick_selects);
+  while ((quick= it++))
+  {
+    if (check_if_key_used(head, quick->index, *fields))
+      return 1;
+  }
+  return 0;
+}
+
+bool QUICK_ROR_INTERSECT_SELECT::check_if_keys_used(List<Item> *fields)
+{
+  QUICK_RANGE_SELECT *quick;
+  List_iterator_fast<QUICK_RANGE_SELECT> it(quick_selects);
+  while ((quick= it++))
+  {
+    if (check_if_key_used(head, quick->index, *fields))
+      return 1;
+  }
+  return 0;
+}
+
+bool QUICK_ROR_UNION_SELECT::check_if_keys_used(List<Item> *fields)
+{
+  QUICK_SELECT_I *quick;
+  List_iterator_fast<QUICK_SELECT_I> it(quick_selects);
+  while ((quick= it++))
+  {
+    if (quick->check_if_keys_used(fields))
+      return 1;
+  }
+  return 0;
+}
+
 /****************************************************************************
 ** Create a QUICK RANGE based on a key
 ****************************************************************************/
@@ -3351,8 +4830,6 @@ err:
 }
 
 
-#define MEM_STRIP_BUF_SIZE thd->variables.sortbuff_size
-
 /*
   Fetch all row ids into unique.
 
@@ -3386,9 +4863,9 @@ int QUICK_INDEX_MERGE_SELECT::prepare_unique()
 
   cur_quick_select->init();
 
-  unique= new Unique(refposcmp2, (void *) &head->file->ref_length,
+  unique= new Unique(refpos_order_cmp, (void *)head->file,
                      head->file->ref_length,
-                     MEM_STRIP_BUF_SIZE);
+                     thd->variables.sortbuff_size);
   if (!unique)
     DBUG_RETURN(1);
   do
@@ -3421,8 +4898,7 @@ int QUICK_INDEX_MERGE_SELECT::prepare_unique()
       continue;
 
     cur_quick_select->file->position(cur_quick_select->record);
-    result= unique->unique_add((char*)cur_quick_select->file->ref);
-    
+    result= unique->unique_add((char*)cur_quick_select->file->ref);    
     if (result)
       DBUG_RETURN(1);
 
@@ -3475,6 +4951,143 @@ int QUICK_INDEX_MERGE_SELECT::get_next()
   DBUG_RETURN(result);
 }
 
+
+/*
+  NOTES
+    Invariant on enter/exit: all intersected selects have retrieved index 
+    records with rowid <= some_rowid_val and no intersected select has 
+    retrieved any index records with rowid > some_rowid_val.
+    We start fresh and loop until we have retrieved the same rowid in each of
+    the key scans or we got an error.
+
+    If a Clustered PK scan is present, it is used only to check if row 
+    satisfies its conditions (and never used for row retrieval).
+*/
+
+int QUICK_ROR_INTERSECT_SELECT::get_next()
+{
+  List_iterator_fast<QUICK_RANGE_SELECT> quick_it(quick_selects);
+  QUICK_RANGE_SELECT* quick;
+  int error, cmp;
+  uint last_rowid_count=0;
+  DBUG_ENTER("QUICK_ROR_INTERSECT_SELECT::get_next");
+  
+  /* Get a rowid for first quick and save it as a 'candidate' */
+  quick= quick_it++;
+  if (cpk_quick)
+  {
+    do {
+      error= quick->get_next();
+    }while (!error && !cpk_quick->row_in_ranges());
+  }
+  else
+    error= quick->get_next();
+  
+  if (error)
+    DBUG_RETURN(error);
+
+  quick->file->position(quick->record);
+  memcpy(last_rowid, quick->file->ref, head->file->ref_length);
+  last_rowid_count= 1;
+ 
+  while (last_rowid_count < quick_selects.elements)
+  {
+    if (!(quick= quick_it++))
+    {
+      quick_it.rewind();
+      quick= quick_it++;
+    }
+   
+    do {
+      if ((error= quick->get_next()))
+        DBUG_RETURN(error);
+      quick->file->position(quick->record);
+      cmp= head->file->cmp_ref(quick->file->ref, last_rowid);      
+    } while (cmp < 0);
+
+    /* Ok, current select 'caught up' and returned ref >= cur_ref */
+    if (cmp > 0)
+    {
+      /* Found a row with ref > cur_ref. Make it a new 'candidate' */
+      if (cpk_quick)
+      {
+        while (!cpk_quick->row_in_ranges())
+        {
+          if ((error= quick->get_next()))
+            DBUG_RETURN(error);
+        }
+      }
+      memcpy(last_rowid, quick->file->ref, head->file->ref_length);
+      last_rowid_count= 1;      
+    }
+    else
+    {
+      /* current 'candidate' row confirmed by this select */
+      last_rowid_count++;
+    }
+  }
+
+  /* We get here iff we got the same row ref in all scans. */
+  if (need_to_fetch_row)
+    error= head->file->rnd_pos(head->record[0], last_rowid);
+  DBUG_RETURN(error);
+}
+
+
+/* 
+  NOTES
+    Enter/exit invariant: 
+    For each quick select in the queue a {key,rowid} tuple has been 
+    retrieved but the corresponding row hasn't been passed to output.
+*/
+
+int QUICK_ROR_UNION_SELECT::get_next()
+{
+  int error, dup_row;
+  QUICK_SELECT_I *quick;
+  byte *tmp;
+  DBUG_ENTER("QUICK_ROR_UNION_SELECT::get_next");
+  
+  do
+  {
+    if (!queue.elements)
+      DBUG_RETURN(HA_ERR_END_OF_FILE);
+    /* Ok, we have a queue with > 1 scans */
+
+    quick= (QUICK_SELECT_I*)queue_top(&queue);
+    memcpy(cur_rowid, quick->last_rowid, rowid_length);
+
+    /* put into queue rowid from the same stream as top element */
+    if ((error= quick->get_next()))
+    {
+      if (error != HA_ERR_END_OF_FILE)
+        DBUG_RETURN(error);
+      queue_remove(&queue, 0);
+    }
+    else
+    {
+      quick->save_last_pos();
+      queue_replaced(&queue);
+    }
+    
+    if (!have_prev_rowid)
+    {
+      /* No rows have been returned yet */
+      dup_row= false;
+      have_prev_rowid= true;
+    }
+    else
+      dup_row= !head->file->cmp_ref(cur_rowid, prev_rowid);
+  }while (dup_row);
+  
+  tmp= cur_rowid;
+  cur_rowid= prev_rowid;
+  prev_rowid= tmp;
+
+  error= head->file->rnd_pos(quick->record, prev_rowid);
+  DBUG_RETURN(error);
+}
+
 	/* get next possible record using quick-struct */
 
 int QUICK_RANGE_SELECT::get_next()
@@ -3883,6 +5496,156 @@ bool QUICK_SELECT_DESC::test_if_null_range(QUICK_RANGE *range_arg,
 #endif
 
 
+void QUICK_RANGE_SELECT::fill_keys_and_lengths(String *key_names, 
+                                               String *used_lengths)
+{
+  char buf[64];
+  uint length;
+  KEY *key_info= head->key_info + index;
+  key_names->append(key_info->name);
+  length= longlong2str(max_used_key_length, buf, 10) - buf;
+  used_lengths->append(buf, length);
+}
+
+void QUICK_INDEX_MERGE_SELECT::fill_keys_and_lengths(String *key_names,
+                                                     String *used_lengths)
+{
+  char buf[64];
+  uint length;
+  QUICK_RANGE_SELECT *quick;
+  List_iterator_fast<QUICK_RANGE_SELECT> it(quick_selects);
+  while ((quick= it++))
+  {
+    KEY *key_info= head->key_info + quick->index;
+    if (key_names->length())
+      key_names->append(',');
+    key_names->append(key_info->name);
+
+    if (used_lengths->length())
+      used_lengths->append(',');
+    length= longlong2str(quick->max_used_key_length, buf, 10) - buf;
+    used_lengths->append(buf, length);
+  }
+  if (pk_quick_select)
+  {
+    KEY *key_info= head->key_info + pk_quick_select->index;
+    key_names->append(',');
+    key_names->append(key_info->name);
+    length= longlong2str(pk_quick_select->max_used_key_length, buf, 10) - buf;
+    used_lengths->append(',');
+    used_lengths->append(buf, length);
+  }
+}
+
+void QUICK_ROR_INTERSECT_SELECT::fill_keys_and_lengths(String *key_names,
+                                                       String *used_lengths)
+{
+  char buf[64];
+  uint length;
+  bool first= true; 
+  QUICK_RANGE_SELECT *quick;
+  List_iterator_fast<QUICK_RANGE_SELECT> it(quick_selects);
+  while ((quick= it++))
+  {
+    KEY *key_info= head->key_info + quick->index;
+    if (!first)
+      key_names->append(',');
+    key_names->append(key_info->name);
+
+    if (first)
+      first= false;
+    else
+      used_lengths->append(',');
+    length= longlong2str(quick->max_used_key_length, buf, 10) - buf;
+    used_lengths->append(buf, length);
+  }
+  if (cpk_quick)
+  {
+    KEY *key_info= head->key_info + cpk_quick->index;
+    key_names->append(',');
+    key_names->append(key_info->name);
+    length= longlong2str(cpk_quick->max_used_key_length, buf, 10) - buf;
+    used_lengths->append(',');
+    used_lengths->append(buf, length);
+  }
+}
+
+void QUICK_ROR_UNION_SELECT::fill_keys_and_lengths(String *key_names,
+                                                   String *used_lengths)
+{
+  bool first= true; 
+  QUICK_SELECT_I *quick;
+  List_iterator_fast<QUICK_SELECT_I> it(quick_selects);
+  while ((quick= it++))
+  {
+    if (first)
+      first= false;
+    else
+    {  
+      used_lengths->append(',');
+      key_names->append(',');
+    }
+    quick->fill_keys_and_lengths(key_names, used_lengths);
+  }
+}
+
+#ifndef DBUG_OFF
+
+static void print_sel_tree(PARAM *param, SEL_TREE *tree, key_map *tree_map, 
+                           const char *msg)
+{
+  SEL_ARG **key,**end;
+  int idx;
+  char buff[1024];
+  DBUG_ENTER("print_sel_tree");
+  if (! _db_on_)
+    DBUG_VOID_RETURN;
+
+  String tmp(buff,sizeof(buff),&my_charset_bin);
+  tmp.length(0);
+  for (idx= 0,key=tree->keys, end=key+param->keys ;
+       key != end ;
+       key++,idx++)
+  {
+    if (tree_map->is_set(idx))
+    {
+      uint keynr= param->real_keynr[idx];
+      if (tmp.length())
+        tmp.append(',');
+      tmp.append(param->table->key_info[keynr].name);
+    }
+  }
+  if (!tmp.length())
+    tmp.append("(empty)");
+
+  DBUG_PRINT("info", ("SEL_TREE %p (%s) scans:%s", tree, msg, tmp.ptr()));
+
+  DBUG_VOID_RETURN;
+}
+
+static void print_ror_scans_arr(TABLE *table, const char *msg,
+                                struct st_ror_scan_info **start, 
+                                struct st_ror_scan_info **end)
+{
+  DBUG_ENTER("print_ror_scans");
+  if (! _db_on_)
+    DBUG_VOID_RETURN;
+
+  char buff[1024];
+  String tmp(buff,sizeof(buff),&my_charset_bin);
+  tmp.length(0);
+  for(;start != end; start++)
+  {
+    if (tmp.length())
+      tmp.append(',');
+    tmp.append(table->key_info[(*start)->keynr].name);
+  }
+  if (!tmp.length())
+    tmp.append("(empty)");
+  DBUG_PRINT("info", ("ROR key scans (%s): %s", msg, tmp.ptr()));
+  DBUG_VOID_RETURN;
+}
+
 /*****************************************************************************
 ** Print a quick range for debugging
 ** TODO:
@@ -3890,8 +5653,6 @@ bool QUICK_SELECT_DESC::test_if_null_range(QUICK_RANGE *range_arg,
 ** of locking the DEBUG stream !
 *****************************************************************************/
 
-#ifndef DBUG_OFF
-
 static void
 print_key(KEY_PART *key_part,const char *key,uint used_length)
 {
@@ -3923,65 +5684,115 @@ print_key(KEY_PART *key_part,const char *key,uint used_length)
   }
 }
 
-static void print_quick_sel_imerge(QUICK_INDEX_MERGE_SELECT *quick, 
-                            const key_map *needed_reg)
+static void print_quick(QUICK_SELECT_I *quick, const key_map *needed_reg)
 {
+  char buf[MAX_KEY/8+1];
   DBUG_ENTER("print_param");
   if (! _db_on_ || !quick)
     DBUG_VOID_RETURN;
+  DBUG_LOCK_FILE;
+ 
+  quick->dbug_dump(0, true);
+  fprintf(DBUG_FILE,"other_keys: 0x%s:\n", needed_reg->print(buf));
 
-  List_iterator_fast<QUICK_RANGE_SELECT> it(quick->quick_selects);
-  QUICK_RANGE_SELECT* quick_range_sel;
-  while ((quick_range_sel= it++))
-  {
-    print_quick_sel_range(quick_range_sel, needed_reg);
-  }
-  if (quick->pk_quick_select)
-    print_quick_sel_range(quick->pk_quick_select, needed_reg);
-
+  DBUG_UNLOCK_FILE;
   DBUG_VOID_RETURN;
 }
 
-void print_quick_sel_range(QUICK_RANGE_SELECT *quick,
-                           const key_map *needed_reg)
+static void print_rowid(byte* val, int len)
 {
-  QUICK_RANGE *range;
-  char buf[MAX_KEY/8+1];
-  DBUG_ENTER("print_param");
-  if (! _db_on_ || !quick)
-    DBUG_VOID_RETURN;
-
+  byte *pb;
   DBUG_LOCK_FILE;
-  fprintf(DBUG_FILE,"Used quick_range on key: %d (other_keys: 0x%s):\n",
-	  quick->index, needed_reg->print(buf));
+  fputc('\"', DBUG_FILE);
+  for (pb= val; pb!= val + len; ++pb)
+    fprintf(DBUG_FILE, "%c", *pb);
+  fprintf(DBUG_FILE, "\", hex: ");
+
+  for (pb= val; pb!= val + len; ++pb)
+    fprintf(DBUG_FILE, "%x ", *pb);
+  fputc('\n', DBUG_FILE);
+  DBUG_UNLOCK_FILE;
+}
 
-  QUICK_RANGE **pr= (QUICK_RANGE**)quick->ranges.buffer;
-  QUICK_RANGE **last_range= pr + quick->ranges.elements;
-  for (; pr!=last_range; ++pr)
+void QUICK_RANGE_SELECT::dbug_dump(int indent, bool verbose)
+{
+  fprintf(DBUG_FILE, "%*squick range select, key %s, length: %d\n",
+	  indent, "", head->key_info[index].name, max_used_key_length);
+  
+  if (verbose)
   {
-    range= *pr;
-    if (!(range->flag & NO_MIN_RANGE))
+    QUICK_RANGE *range;
+    QUICK_RANGE **pr= (QUICK_RANGE**)ranges.buffer;
+    QUICK_RANGE **last_range= pr + ranges.elements;    
+    for (; pr!=last_range; ++pr)
     {
-      print_key(quick->key_parts,range->min_key,range->min_length);
-      if (range->flag & NEAR_MIN)
-	fputs(" < ",DBUG_FILE);
-      else
-	fputs(" <= ",DBUG_FILE);
-    }
-    fputs("X",DBUG_FILE);
+      fprintf(DBUG_FILE, "%*s", indent + 2, "");
+      range= *pr;
+      if (!(range->flag & NO_MIN_RANGE))
+      {
+        print_key(key_parts,range->min_key,range->min_length);
+        if (range->flag & NEAR_MIN)
+	  fputs(" < ",DBUG_FILE);
+        else
+	  fputs(" <= ",DBUG_FILE);
+      }
+      fputs("X",DBUG_FILE);
 
-    if (!(range->flag & NO_MAX_RANGE))
-    {
-      if (range->flag & NEAR_MAX)
-	fputs(" < ",DBUG_FILE);
-      else
-	fputs(" <= ",DBUG_FILE);
-      print_key(quick->key_parts,range->max_key,range->max_length);
+      if (!(range->flag & NO_MAX_RANGE))
+      {
+        if (range->flag & NEAR_MAX)
+	  fputs(" < ",DBUG_FILE);
+        else
+	  fputs(" <= ",DBUG_FILE);
+        print_key(key_parts,range->max_key,range->max_length);
+      }
+      fputs("\n",DBUG_FILE);
     }
-    fputs("\n",DBUG_FILE);
   }
-  DBUG_UNLOCK_FILE;
-  DBUG_VOID_RETURN;
+}
+
+void QUICK_INDEX_MERGE_SELECT::dbug_dump(int indent, bool verbose)
+{
+  List_iterator_fast<QUICK_RANGE_SELECT> it(quick_selects);
+  QUICK_RANGE_SELECT *quick;
+  fprintf(DBUG_FILE, "%*squick index_merge select\n", indent, "");
+  fprintf(DBUG_FILE, "%*smerged scans {\n", indent, "");
+  while ((quick= it++))
+    quick->dbug_dump(indent+2, verbose);
+  if (pk_quick_select)
+  {
+    fprintf(DBUG_FILE, "%*sclustered PK quick:\n", indent, "");    
+    pk_quick_select->dbug_dump(indent+2, verbose);
+  }
+  fprintf(DBUG_FILE, "%*s}\n", indent, "");
+}
+
+void QUICK_ROR_INTERSECT_SELECT::dbug_dump(int indent, bool verbose)
+{
+  List_iterator_fast<QUICK_RANGE_SELECT> it(quick_selects);
+  QUICK_RANGE_SELECT *quick;
+  fprintf(DBUG_FILE, "%*squick ROR-intersect select, %scovering\n", 
+          indent, "", need_to_fetch_row? "":"non-");
+  fprintf(DBUG_FILE, "%*smerged scans {\n", indent, "");
+  while ((quick= it++))
+    quick->dbug_dump(indent+2, verbose); 
+  if (cpk_quick)
+  {
+    fprintf(DBUG_FILE, "%*sclustered PK quick:\n", indent, "");    
+    cpk_quick->dbug_dump(indent+2, verbose);
+  }
+  fprintf(DBUG_FILE, "%*s}\n", indent, "");
+}
+
+void QUICK_ROR_UNION_SELECT::dbug_dump(int indent, bool verbose)
+{
+  List_iterator_fast<QUICK_SELECT_I> it(quick_selects);
+  QUICK_SELECT_I *quick;
+  fprintf(DBUG_FILE, "%*squick ROR-union select\n", indent, "");
+  fprintf(DBUG_FILE, "%*smerged scans {\n", indent, "");
+  while ((quick= it++))
+    quick->dbug_dump(indent+2, verbose);
+  fprintf(DBUG_FILE, "%*s}\n", indent, "");
 }
 
 #endif
diff --git a/sql/opt_range.h b/sql/opt_range.h
index 7c2981795a2..cd9664ba759 100644
--- a/sql/opt_range.h
+++ b/sql/opt_range.h
@@ -79,14 +79,18 @@ public:
   TABLE   *head;
 
   /*
-    the only index this quick select uses, or MAX_KEY for 
-    QUICK_INDEX_MERGE_SELECT
+    index this quick select uses, or MAX_KEY for quick selects 
+    that use several indexes
   */
-  uint index; 
+  uint index;
+
+  /* applicable iff index!= MAX_KEY */
   uint max_used_key_length, used_key_parts;
 
   QUICK_SELECT_I();
   virtual ~QUICK_SELECT_I(){};
+  
+  /**/
   virtual int  init() = 0;
   virtual int  reset(void) = 0;
   virtual int  get_next() = 0;   /* get next record to retrieve */
@@ -97,27 +101,64 @@ public:
     QS_TYPE_RANGE = 0,
     QS_TYPE_INDEX_MERGE = 1,
     QS_TYPE_RANGE_DESC = 2,
-    QS_TYPE_FULLTEXT   = 3
+    QS_TYPE_FULLTEXT   = 3,
+    QS_TYPE_ROR_INTERSECT = 4,
+    QS_TYPE_ROR_UNION = 5,
   };
 
-  /* Get type of this quick select - one of the QS_* values */
-  virtual int get_type() = 0; 
+  /* Get type of this quick select - one of the QS_TYPE_* values */
+  virtual int get_type() = 0;
+
+  /*
+    Initialize this quick select as a child of a index union or intersection 
+    scan. This call replaces init() call.
+  */
+  virtual int init_ror_child_scan(bool reuse_handler)
+  { DBUG_ASSERT(0); return 1; }
+  
+  virtual void cleanup_ror_child_scan() { DBUG_ASSERT(0); }
+  virtual void save_last_pos(){};
+
+  /* 
+    Fill key_names with list of keys this quick select used;
+    fill used_lenghth with correponding used lengths.
+    This is used by select_describe.
+  */
+  virtual void fill_keys_and_lengths(String *key_names, 
+                                     String *used_lengths)=0;
+
+  virtual bool check_if_keys_used(List<Item> *fields);
+
+  /*
+    rowid of last row retrieved by this quick select. This is used only 
+    when doing ROR-index_merge selects 
+  */
+  byte    *last_rowid;
+  byte    *record;
+#ifndef DBUG_OFF
+  /*
+    Print quick select information to DBUG_FILE. Caller is responsible 
+    for locking DBUG_FILE before this call and unlocking it afterwards.
+  */
+  virtual void dbug_dump(int indent, bool verbose)= 0;
+#endif 
 };
 
+
 struct st_qsel_param;
 class SEL_ARG;
 
-class QUICK_RANGE_SELECT : public QUICK_SELECT_I 
+class QUICK_RANGE_SELECT : public QUICK_SELECT_I
 {
 protected:
   bool next,dont_free;
 public:
   int error;
+protected:
   handler *file;
-  byte    *record;
+  bool free_file; /* if true, this quick select "owns" file and will free it */
+
 protected:
-  friend void print_quick_sel_range(QUICK_RANGE_SELECT *quick,
-                                    const key_map* needed_reg);
   friend
   QUICK_RANGE_SELECT *get_quick_select_for_ref(THD *thd, TABLE *table, 
                                                struct st_table_ref *ref);
@@ -131,17 +172,19 @@ protected:
                                               MEM_ROOT *alloc);
   friend class QUICK_SELECT_DESC;
   friend class QUICK_INDEX_MERGE_SELECT;
+  friend class QUICK_ROR_INTERSECT_SELECT;
 
   DYNAMIC_ARRAY ranges;     /* ordered array of range ptrs */
   QUICK_RANGE **cur_range;  /* current element in ranges  */
 
   QUICK_RANGE *range;
-  MEM_ROOT alloc;
   KEY_PART *key_parts;  
   int cmp_next(QUICK_RANGE *range);
   int cmp_prev(QUICK_RANGE *range);
   bool row_in_ranges();
 public:
+  MEM_ROOT alloc;
+
   QUICK_RANGE_SELECT(THD *thd, TABLE *table,uint index_arg,bool no_alloc=0,
                      MEM_ROOT *parent_alloc=NULL);
   ~QUICK_RANGE_SELECT();
@@ -157,7 +200,16 @@ public:
   int get_next();
   bool reverse_sorted() { return 0; }
   bool unique_key_range();
+  int init_ror_child_scan(bool reuse_handler);
+  void save_last_pos()
+  {
+    file->position(record);
+  };
   int get_type() { return QS_TYPE_RANGE; }
+  void fill_keys_and_lengths(String *key_names, String *used_lengths);
+#ifndef DBUG_OFF
+  virtual void dbug_dump(int indent, bool verbose);
+#endif
 };
 
 
@@ -232,6 +284,11 @@ public:
   bool reverse_sorted() { return false; }
   bool unique_key_range() { return false; }
   int get_type() { return QS_TYPE_INDEX_MERGE; }
+  void fill_keys_and_lengths(String *key_names, String *used_lengths);
+  bool check_if_keys_used(List<Item> *fields);
+#ifndef DBUG_OFF
+  virtual void dbug_dump(int indent, bool verbose);
+#endif
 
   bool push_quick_back(QUICK_RANGE_SELECT *quick_sel_range);
 
@@ -242,9 +299,6 @@ public:
   List_iterator_fast<QUICK_RANGE_SELECT> cur_quick_it;
   QUICK_RANGE_SELECT* cur_quick_select;
   
-  /* last element in quick_selects list */
-  QUICK_RANGE_SELECT* last_quick_select;
-
   /* quick select that uses clustered primary key (NULL if none) */
   QUICK_RANGE_SELECT* pk_quick_select;
   
@@ -262,6 +316,87 @@ public:
   READ_RECORD read_record;
 };
 
+
+/*
+  Rowid-Ordered Retrieval (ROR) index intersection quick select.
+  This quick select produces an intersection of records returned by several
+  QUICK_RANGE_SELECTs that return data ordered by rowid.
+*/
+class QUICK_ROR_INTERSECT_SELECT : public QUICK_SELECT_I 
+{
+public:
+  QUICK_ROR_INTERSECT_SELECT(THD *thd, TABLE *table, 
+                             bool retrieve_full_rows,
+                             MEM_ROOT *parent_alloc);
+  ~QUICK_ROR_INTERSECT_SELECT();
+
+  int  init();
+  int  reset(void);
+  int  get_next();
+  bool reverse_sorted() { return false; }
+  bool unique_key_range() { return false; }
+  int get_type() { return QS_TYPE_ROR_INTERSECT; }
+  void fill_keys_and_lengths(String *key_names, String *used_lengths);
+  bool check_if_keys_used(List<Item> *fields);
+#ifndef DBUG_OFF
+  virtual void dbug_dump(int indent, bool verbose);
+#endif
+  int init_ror_child_scan(bool reuse_handler);
+  bool push_quick_back(QUICK_RANGE_SELECT *quick_sel_range);
+
+  /* range quick selects this intersection consists of */
+  List<QUICK_RANGE_SELECT> quick_selects;
+  
+  QUICK_RANGE_SELECT *cpk_quick;
+  MEM_ROOT alloc;
+  THD *thd;
+  bool reset_called;    
+  bool need_to_fetch_row;
+};
+
+/*
+  Rowid-Ordered Retrieval index union select.
+
+*/
+class QUICK_ROR_UNION_SELECT : public QUICK_SELECT_I 
+{
+public:
+  QUICK_ROR_UNION_SELECT(THD *thd, TABLE *table);
+  ~QUICK_ROR_UNION_SELECT();
+
+  int  init();
+  int  reset(void);
+  int  get_next();
+  bool reverse_sorted() { return false; }
+  bool unique_key_range() { return false; }
+  int get_type() { return QS_TYPE_ROR_UNION; }
+  void fill_keys_and_lengths(String *key_names, String *used_lengths);
+  bool check_if_keys_used(List<Item> *fields);
+#ifndef DBUG_OFF
+  virtual void dbug_dump(int indent, bool verbose);
+#endif
+
+  bool push_quick_back(QUICK_SELECT_I *quick_sel_range);
+
+  /* range quick selects this index_merge read consists of */
+  List<QUICK_SELECT_I> quick_selects;
+  
+  QUEUE queue;
+  MEM_ROOT alloc;
+
+  THD *thd;
+  byte *cur_rowid;
+  byte *prev_rowid;
+  uint rowid_length;
+  bool reset_called;
+  bool have_prev_rowid;
+  
+private:
+  static int queue_cmp(void *arg, byte *val1, byte *val2);
+};
+
+
+
 class QUICK_SELECT_DESC: public QUICK_RANGE_SELECT
 {
 public:
diff --git a/sql/sql_delete.cc b/sql/sql_delete.cc
index 22cb11eed41..72a39e347b1 100644
--- a/sql/sql_delete.cc
+++ b/sql/sql_delete.cc
@@ -151,6 +151,12 @@ int mysql_delete(THD *thd, TABLE_LIST *table_list, COND *conds, SQL_LIST *order,
     select= 0;
   }
 
+  if (select && select->quick && select->quick->reset())
+  {
+    delete select;
+    free_underlaid_joins(thd, &thd->lex->select_lex);
+    DBUG_RETURN(-1);			// This will force out message
+  }
   init_read_record(&info,thd,table,select,1,1);
   deleted=0L;
   init_ftfuncs(thd, &thd->lex->select_lex, 1);
@@ -250,10 +256,10 @@ cleanup:
 
 #define MEM_STRIP_BUF_SIZE current_thd->variables.sortbuff_size
 
-extern "C" int refposcmp2(void* arg, const void *a,const void *b)
+extern "C" int refpos_order_cmp(void* arg, const void *a,const void *b)
 {
-  /* arg is a pointer to file->ref_length */
-  return memcmp(a,b, *(int*) arg);
+  handler *file= (handler*)arg;
+  return file->cmp_ref((const byte*)a, (const byte*)b);
 }
 
 multi_delete::multi_delete(THD *thd_arg, TABLE_LIST *dt,
@@ -317,8 +323,8 @@ multi_delete::initialize_tables(JOIN *join)
   for (walk=walk->next ; walk ; walk=walk->next)
   {
     TABLE *table=walk->table;
-    *tempfiles_ptr++= new Unique (refposcmp2,
-				  (void *) &table->file->ref_length,
+    *tempfiles_ptr++= new Unique (refpos_order_cmp,
+				  (void *) table->file,
 				  table->file->ref_length,
 				  MEM_STRIP_BUF_SIZE);
   }
diff --git a/sql/sql_select.cc b/sql/sql_select.cc
index 9f26d7458d0..528fe38ed3f 100644
--- a/sql/sql_select.cc
+++ b/sql/sql_select.cc
@@ -6976,8 +6976,16 @@ test_if_skip_sort_order(JOIN_TAB *tab,ORDER *order,ha_rows select_limit,
   }
   else if (select && select->quick)		// Range found by opt_range
   {
-    /* assume results are not ordered when index merge is used */
-    if (select->quick->get_type() == QUICK_SELECT_I::QS_TYPE_INDEX_MERGE)
+    int quick_type= select->quick->get_type();
+    /* 
+      assume results are not ordered when index merge is used 
+      TODO: sergeyp: Results of all index merge selects actually are ordered 
+      by clustered PK values.
+    */
+  
+    if (quick_type == QUICK_SELECT_I::QS_TYPE_INDEX_MERGE || 
+        quick_type == QUICK_SELECT_I::QS_TYPE_ROR_UNION || 
+        quick_type == QUICK_SELECT_I::QS_TYPE_ROR_INTERSECT)
       DBUG_RETURN(0);
     ref_key=	   select->quick->index;
     ref_key_parts= select->quick->used_key_parts;
@@ -7038,9 +7046,11 @@ test_if_skip_sort_order(JOIN_TAB *tab,ORDER *order,ha_rows select_limit,
 	  */
 	  if (!select->quick->reverse_sorted())
 	  {
+            int quick_type= select->quick->get_type();
             if (table->file->index_flags(ref_key) & HA_NOT_READ_PREFIX_LAST ||
-                (select->quick->get_type() == 
-                QUICK_SELECT_I::QS_TYPE_INDEX_MERGE))
+                quick_type == QUICK_SELECT_I::QS_TYPE_INDEX_MERGE ||
+                quick_type == QUICK_SELECT_I::QS_TYPE_ROR_INTERSECT ||
+                quick_type == QUICK_SELECT_I::QS_TYPE_ROR_UNION)
               DBUG_RETURN(0);			// Use filesort
             
             // ORDER BY range_key DESC
@@ -8961,6 +8971,7 @@ static void select_describe(JOIN *join, bool need_tmp_table, bool need_order,
   select_result *result=join->result;
   Item *item_null= new Item_null();
   CHARSET_INFO *cs= &my_charset_latin1;
+  int quick_type= -1;
   DBUG_ENTER("select_describe");
   DBUG_PRINT("info", ("Select 0x%lx, type %s, message %s",
 		      (ulong)join->select_lex, join->select_lex->type,
@@ -9006,8 +9017,10 @@ static void select_describe(JOIN *join, bool need_tmp_table, bool need_order,
 					  cs));
       if (tab->type == JT_ALL && tab->select && tab->select->quick)
       {
-        if (tab->select->quick->get_type() == 
-            QUICK_SELECT_I::QS_TYPE_INDEX_MERGE)
+        quick_type= tab->select->quick->get_type();
+        if ((quick_type == QUICK_SELECT_I::QS_TYPE_INDEX_MERGE) ||
+            (quick_type == QUICK_SELECT_I::QS_TYPE_ROR_INTERSECT) ||
+            (quick_type == QUICK_SELECT_I::QS_TYPE_ROR_UNION))
           tab->type = JT_INDEX_MERGE;
         else
 	  tab->type = JT_RANGE;
@@ -9028,6 +9041,7 @@ static void select_describe(JOIN *join, bool need_tmp_table, bool need_order,
 					  strlen(join_type_str[tab->type]),
 					  cs));
       uint j;
+      /* Build "possible_keys" value and add it to item_list */
       if (!tab->keys.is_clear_all())
       {
         for (j=0 ; j < table->keys ; j++)
@@ -9044,6 +9058,8 @@ static void select_describe(JOIN *join, bool need_tmp_table, bool need_order,
 	item_list.push_back(new Item_string(tmp1.ptr(),tmp1.length(),cs));
       else
 	item_list.push_back(item_null);
+
+      /* Build key,key_len, and ref values and add them to item_list */
       if (tab->ref.key_parts)
       {
 	KEY *key_info=table->key_info+ tab->ref.key;
@@ -9078,48 +9094,9 @@ static void select_describe(JOIN *join, bool need_tmp_table, bool need_order,
       }
       else if (tab->select && tab->select->quick)
       {
-        if (tab->select->quick->get_type() == 
-            QUICK_SELECT_I::QS_TYPE_INDEX_MERGE)
-        {
-          QUICK_INDEX_MERGE_SELECT *quick_imerge=
-            (QUICK_INDEX_MERGE_SELECT*)tab->select->quick;
-          QUICK_RANGE_SELECT *quick;
-
-          List_iterator_fast<QUICK_RANGE_SELECT> it(quick_imerge->
-                                                    quick_selects);
-          while ((quick= it++))
-          {
-	    KEY *key_info= table->key_info + quick->index;
-            register uint length;
-            if (tmp3.length())
-	      tmp3.append(',');
-	    
-            tmp3.append(key_info->name);
-
-	    if (tmp2.length())
-	      tmp2.append(',');
-
-            length= longlong2str(quick->max_used_key_length, keylen_str_buf,
-                                 10) - 
-                    keylen_str_buf;
-
-            tmp2.append(keylen_str_buf, length);
-          }
-        }
-        else
-        {
-	  KEY *key_info= table->key_info + tab->select->quick->index;
-          register uint length;
-          tmp3.append(key_info->name);
-          
-          length= longlong2str(tab->select->quick->max_used_key_length, 
-                               keylen_str_buf, 10) -
-                  keylen_str_buf;
-          tmp2.append(keylen_str_buf, length);
-        }
-
-	item_list.push_back(new Item_string(tmp3.ptr(),tmp3.length(),cs));
+        tab->select->quick->fill_keys_and_lengths(&tmp2, &tmp3);
 	item_list.push_back(new Item_string(tmp2.ptr(),tmp2.length(),cs));
+	item_list.push_back(new Item_string(tmp3.ptr(),tmp3.length(),cs));
 	item_list.push_back(item_null);
       }
       else
@@ -9134,7 +9111,10 @@ static void select_describe(JOIN *join, bool need_tmp_table, bool need_order,
       my_bool key_read=table->key_read;
       if (tab->type == JT_NEXT && table->used_keys.is_set(tab->index))
 	key_read=1;
-
+      if (quick_type == QUICK_SELECT_I::QS_TYPE_ROR_INTERSECT &&
+          !((QUICK_ROR_INTERSECT_SELECT*)tab->select->quick)->need_to_fetch_row)
+        key_read=1;
+        
       if (tab->info)
 	item_list.push_back(new Item_string(tab->info,strlen(tab->info),cs));
       else
diff --git a/sql/sql_select.h b/sql/sql_select.h
index 9bba4e9318e..9d784e585b1 100644
--- a/sql/sql_select.h
+++ b/sql/sql_select.h
@@ -329,7 +329,7 @@ uint find_shortest_key(TABLE *table, const key_map *usable_keys);
 int opt_sum_query(TABLE_LIST *tables, List<Item> &all_fields,COND *conds);
 
 /* from sql_delete.cc, used by opt_range.cc */
-extern "C" int refposcmp2(void* arg, const void *a,const void *b);
+extern "C" int refpos_order_cmp(void* arg, const void *a,const void *b);
 
 /* class to copying an field/item to a key struct */
 
diff --git a/sql/sql_test.cc b/sql/sql_test.cc
index cd493fcac30..9e8b6f551c6 100644
--- a/sql/sql_test.cc
+++ b/sql/sql_test.cc
@@ -182,37 +182,8 @@ TEST_join(JOIN *join)
 		tab->select->quick_keys.print(buf));
       else if (tab->select->quick)
       {
-        int quick_type= tab->select->quick->get_type();
-        if ((quick_type == QUICK_SELECT_I::QS_TYPE_RANGE) || 
-            (quick_type == QUICK_SELECT_I::QS_TYPE_RANGE_DESC))
-        {
-	  fprintf(DBUG_FILE,
-                "                  quick select used on key %s, length: %d\n",
-		form->key_info[tab->select->quick->index].name,
-		tab->select->quick->max_used_key_length);
-        }
-        else if (quick_type == QUICK_SELECT_I::QS_TYPE_INDEX_MERGE)
-        {
-          QUICK_INDEX_MERGE_SELECT *quick_imerge= 
-              (QUICK_INDEX_MERGE_SELECT*)tab->select->quick;
-          QUICK_RANGE_SELECT *quick;
-          fprintf(DBUG_FILE,
-                  "                  index_merge quick select used\n");
-                    
-          List_iterator_fast<QUICK_RANGE_SELECT> it(quick_imerge->quick_selects);
-          while ((quick = it++))
-          {
-  	    fprintf(DBUG_FILE,
-                "                  range quick select: key %s, length: %d\n",
-		form->key_info[quick->index].name,
-		quick->max_used_key_length);
-          }
-        }
-        else
-        {
-          fprintf(DBUG_FILE,
-                  "                  quick select of unknown nature used\n");
-        }
+	fprintf(DBUG_FILE, "                  quick select used:\n");
+        tab->select->quick->dbug_dump(18, false);
       }
       else
 	VOID(fputs("                  select used\n",DBUG_FILE));
diff --git a/sql/sql_update.cc b/sql/sql_update.cc
index 6b4d2f9b659..15d86b63aa1 100644
--- a/sql/sql_update.cc
+++ b/sql/sql_update.cc
@@ -176,18 +176,11 @@ int mysql_update(THD *thd,
   }
   init_ftfuncs(thd, &thd->lex->select_lex, 1);
   /* Check if we are modifying a key that we are used to search with */
+  
   if (select && select->quick)
   {
-    if (select->quick->get_type() != QUICK_SELECT_I::QS_TYPE_INDEX_MERGE)
-    {
-      used_index= select->quick->index;
-      used_key_is_modified= (!select->quick->unique_key_range() &&
-			      check_if_key_used(table,used_index,fields));
-    }
-    else
-    {
-      used_key_is_modified= true;
-    }
+    used_key_is_modified= (!select->quick->unique_key_range() &&
+                          select->quick->check_if_keys_used(&fields));
   }
   else if ((used_index=table->file->key_used_on_scan) < MAX_KEY)
     used_key_is_modified=check_if_key_used(table, used_index, fields);
@@ -246,7 +239,9 @@ int mysql_update(THD *thd,
       if (open_cached_file(&tempfile, mysql_tmpdir,TEMP_PREFIX,
 			   DISK_BUFFER_SIZE, MYF(MY_WME)))
 	goto err;
-
+      
+      if (select && select->quick && select->quick->reset())
+        goto err;
       init_read_record(&info,thd,table,select,0,1);
       thd->proc_info="Searching rows for update";
       uint tmp_limit= limit;
@@ -300,6 +295,9 @@ int mysql_update(THD *thd,
 
   if (handle_duplicates == DUP_IGNORE)
     table->file->extra(HA_EXTRA_IGNORE_DUP_KEY);
+  
+  if (select && select->quick && select->quick->reset())
+        goto err;
   init_read_record(&info,thd,table,select,0,1);
 
   updated= found= 0;
@@ -732,26 +730,7 @@ static bool safe_update_on_fly(JOIN_TAB *join_tab, List<Item> *fields)
   case JT_ALL:
     /* If range search on index */
     if (join_tab->quick)
-    {
-      if (join_tab->quick->get_type() != QUICK_SELECT_I::QS_TYPE_INDEX_MERGE)
-      {
-        return !check_if_key_used(table,join_tab->quick->index,*fields);
-      }
-      else
-      {
-        QUICK_INDEX_MERGE_SELECT *qsel_imerge=
-          (QUICK_INDEX_MERGE_SELECT*)(join_tab->quick);
-        List_iterator_fast<QUICK_RANGE_SELECT> it(qsel_imerge->quick_selects);
-        QUICK_RANGE_SELECT *quick;
-        while ((quick= it++))
-        {
-          if (check_if_key_used(table, quick->index, *fields))
-            return 0;
-        }
-        return 1;
-      }
-    }
-
+      return !join_tab->quick->check_if_keys_used(fields);
     /* If scanning in clustered key */
     if ((table->file->table_flags() & HA_PRIMARY_KEY_IN_READ_INDEX) &&
 	table->primary_key < MAX_KEY)
-- 
cgit v1.2.1


From 1ce1880204149404e09411505d457c452a29d55b Mon Sep 17 00:00:00 2001
From: unknown <sergefp@mysql.com>
Date: Sat, 29 May 2004 02:04:01 +0400
Subject:  * New index_merge EXPLAIN output format  * Fixed a problem with
 wrong query results for partially covering keys in ROR-index_merge  *
 ROR-intersection retrieval plan choice algorithm now uses less disk IO    -
 and properly processes clustered PK range scans  * Fixed several minor range
 optimizer problems  * Added more comments  * Code cleanup

mysql-test/r/index_merge.result:
  New index_merge EXPLAIN output format changes
mysql-test/r/index_merge_innodb.result:
  New index_merge EXPLAIN output format changes
mysql-test/r/index_merge_ror.result:
  New index_merge EXPLAIN output format changes
  Added test for scans on keys that cover fields partially
  Fixed a minor ROR optimizer problem
mysql-test/r/index_merge_ror_cpk.result:
  More tests added
mysql-test/t/index_merge_ror.test:
  New index_merge EXPLAIN output format changes
  Added test for scans on keys that cover fields partially
  Fixed a minor ROR optimizer problem
mysql-test/t/index_merge_ror_cpk.test:
  More tests added
mysys/my_bitmap.c:
  Comments added, code cleanup
sql/opt_range.cc:
  Comments added, code cleanup
  Numerous fixes, see changeset description
sql/opt_range.h:
  Comments added, code cleanup
  New index_merge EXPLAIN output format related changes
sql/sql_select.cc:
  Comments added, code cleanup
  New index_merge EXPLAIN output
---
 sql/opt_range.cc  | 1156 +++++++++++++++++++++++++++++++++++++----------------
 sql/opt_range.h   |  197 ++++++---
 sql/sql_select.cc |   52 ++-
 3 files changed, 992 insertions(+), 413 deletions(-)

(limited to 'sql')

diff --git a/sql/opt_range.cc b/sql/opt_range.cc
index eb6eefccf02..ee328dadad6 100644
--- a/sql/opt_range.cc
+++ b/sql/opt_range.cc
@@ -199,19 +199,7 @@ public:
   void store(uint length,char **min_key,uint min_key_flag,
 	     char **max_key, uint max_key_flag)
   {
-    if ((min_flag & GEOM_FLAG) ||
-        (!(min_flag & NO_MIN_RANGE) &&
-	!(min_key_flag & (NO_MIN_RANGE | NEAR_MIN))))
-    {
-      if (maybe_null && *min_value)
-      {
-	**min_key=1;
-	bzero(*min_key+1,length);
-      }
-      else
-	memcpy(*min_key,min_value,length+(int) maybe_null);
-      (*min_key)+= length+(int) maybe_null;
-    }
+    store_min(length, min_key, min_key_flag);
     if (!(max_flag & NO_MAX_RANGE) &&
 	!(max_key_flag & (NO_MAX_RANGE | NEAR_MAX)))
     {
@@ -298,6 +286,7 @@ public:
 
 class SEL_IMERGE;
 
+
 class SEL_TREE :public Sql_alloc
 {
 public:
@@ -319,7 +308,7 @@ public:
   
   /* The members below are filled/used only after get_mm_tree is done */
   key_map ror_scans_map;   /* bitmask of ROR scan-able elements in keys */
-  uint    n_ror_scans;
+  uint    n_ror_scans;     /* number of set bits in ror_scans_map */
 
   struct st_ror_scan_info **ror_scans;     /* list of ROR key scans */
   struct st_ror_scan_info **ror_scans_end; /* last ROR scan */
@@ -330,7 +319,8 @@ public:
 typedef struct st_qsel_param {
   THD	*thd;
   TABLE *table;
-  KEY_PART *key_parts,*key_parts_end,*key[MAX_KEY];
+  KEY_PART *key_parts,*key_parts_end;
+  KEY_PART *key[MAX_KEY]; /* First key parts of keys used in the query */
   MEM_ROOT *mem_root;
   table_map prev_tables,read_tables,current_table;
   uint baseflag, max_key_part, range_count;
@@ -353,7 +343,8 @@ typedef struct st_qsel_param {
   uint *imerge_cost_buff;     /* buffer for index_merge cost estimates */
   uint imerge_cost_buff_size; /* size of the buffer */
   
-  bool is_ror_scan; /* true if last checked tree->key can be used for ROR-scan */
+ /* true if last checked tree->key can be used for ROR-scan */ 
+  bool is_ror_scan; 
 } PARAM;
 
 class TABLE_READ_PLAN;
@@ -625,7 +616,6 @@ int imerge_list_or_list(PARAM *param,
   RETURN
     0     OK, result is stored in *im1.
     other Error
-  
 */
 
 int imerge_list_or_tree(PARAM *param, 
@@ -767,11 +757,12 @@ QUICK_INDEX_MERGE_SELECT::QUICK_INDEX_MERGE_SELECT(THD *thd_param,
   :cur_quick_it(quick_selects),pk_quick_select(NULL),unique(NULL),
    thd(thd_param)
 {
+  DBUG_ENTER("QUICK_INDEX_MERGE_SELECT::QUICK_INDEX_MERGE_SELECT");
   index= MAX_KEY;
   head= table;
-  reset_called= false;
   bzero(&read_record, sizeof(read_record));
   init_sql_alloc(&alloc,1024,0);
+  DBUG_VOID_RETURN;
 }
 
 int QUICK_INDEX_MERGE_SELECT::init()
@@ -785,11 +776,7 @@ int QUICK_INDEX_MERGE_SELECT::reset()
 {
   int result;
   DBUG_ENTER("QUICK_INDEX_MERGE_SELECT::reset");
-  if (reset_called)
-    DBUG_RETURN(0);
-
-  reset_called= true;
-  result = cur_quick_select->reset() || prepare_unique();
+  result= cur_quick_select->reset() || prepare_unique();
   DBUG_RETURN(result);
 }
 
@@ -810,10 +797,12 @@ QUICK_INDEX_MERGE_SELECT::push_quick_back(QUICK_RANGE_SELECT *quick_sel_range)
 
 QUICK_INDEX_MERGE_SELECT::~QUICK_INDEX_MERGE_SELECT()
 {
+  DBUG_ENTER("QUICK_INDEX_MERGE_SELECT::~QUICK_INDEX_MERGE_SELECT");
   delete unique;
   quick_selects.delete_elements();
   delete pk_quick_select;
   free_root(&alloc,MYF(0));
+  DBUG_VOID_RETURN;
 }
 
 
@@ -821,8 +810,7 @@ QUICK_ROR_INTERSECT_SELECT::QUICK_ROR_INTERSECT_SELECT(THD *thd_param,
                                                        TABLE *table,
                                                        bool retrieve_full_rows,
                                                        MEM_ROOT *parent_alloc)
-  : cpk_quick(NULL), thd(thd_param), reset_called(false), 
-    need_to_fetch_row(retrieve_full_rows)
+  : cpk_quick(NULL), thd(thd_param), need_to_fetch_row(retrieve_full_rows)
 {
   index= MAX_KEY;
   head= table;  
@@ -835,28 +823,50 @@ QUICK_ROR_INTERSECT_SELECT::QUICK_ROR_INTERSECT_SELECT(THD *thd_param,
                                 head->file->ref_length);
 }
 
+
+/* 
+  Do post-constructor initialization.
+  SYNOPSIS
+    QUICK_ROR_INTERSECT_SELECT::init()
+  
+  RETURN
+    0      OK
+    other  Error code
+*/
+
 int QUICK_ROR_INTERSECT_SELECT::init()
 {
-  /* Check if last_rowid was allocated in ctor */
+  /* Check if last_rowid was successfully allocated in ctor */
   return !last_rowid;
 }
 
 
 /*
-  Init this quick select to be a ROR child scan. 
-  NOTE
-    QUICK_ROR_INTERSECT_SELECT::reset() may choose not to call this function
-    but reuse its handler object for doing one of range scans. It duplicates
-    a part of this function' code.
+  Initialize this quick select to be a ROR-merged scan.
+
+  SYNOPSIS
+    QUICK_RANGE_SELECT::init_ror_merged_scan()
+      reuse_handler If true, use head->file, otherwise create a separate
+                    handler object
+
+  NOTES
+    This function creates and prepares for subsequent use a separate handler
+    object if it can't reuse head->file. The reason for this is that during 
+    ROR-merge several key scans are performed simultaneously, and a single
+    handler is only capable of preserving context of a single key scan.
+
+    In ROR-merge the quick select doing merge does full records retrieval, 
+    merged quick selects read only keys.
+     
   RETURN 
     0  ROR child scan initialized, ok to use.
     1  error
 */
 
-int QUICK_RANGE_SELECT::init_ror_child_scan(bool reuse_handler)
+int QUICK_RANGE_SELECT::init_ror_merged_scan(bool reuse_handler)
 {
   handler *save_file= file;
-  DBUG_ENTER("QUICK_RANGE_SELECT::init_ror_child_scan");
+  DBUG_ENTER("QUICK_RANGE_SELECT::init_ror_merged_scan");
   
   if (reuse_handler)
   {
@@ -894,7 +904,7 @@ int QUICK_RANGE_SELECT::init_ror_child_scan(bool reuse_handler)
     file->close();
     goto failure;
   }
-  free_file=  true;
+  free_file= true;
   last_rowid= file->ref;
   DBUG_RETURN(0);
 
@@ -903,32 +913,42 @@ failure:
   DBUG_RETURN(1);
 }
 
-int QUICK_ROR_INTERSECT_SELECT::init_ror_child_scan(bool reuse_handler)
+
+/*
+  Initialize this quick select to be a part of a ROR-merged scan.
+  SYNOPSIS
+    QUICK_ROR_INTERSECT_SELECT::init_ror_merged_scan()
+      reuse_handler If true, use head->file, otherwise create separate
+                    handler object.
+  RETURN 
+    0     OK
+    other error code
+*/
+int QUICK_ROR_INTERSECT_SELECT::init_ror_merged_scan(bool reuse_handler)
 {
   List_iterator_fast<QUICK_RANGE_SELECT> quick_it(quick_selects);
   QUICK_RANGE_SELECT* quick;
-  DBUG_ENTER("QUICK_ROR_INTERSECT_SELECT::init_ror_child_scan");
+  DBUG_ENTER("QUICK_ROR_INTERSECT_SELECT::init_ror_merged_scan");
 
   /* Initialize all merged "children" quick selects */
-  quick_it.rewind();
   DBUG_ASSERT(!(need_to_fetch_row && !reuse_handler));
   if (!need_to_fetch_row && reuse_handler)
   {
     quick= quick_it++;
     /* 
-      There is no use for this->file. Reuse it for first of merged range 
+      There is no use of this->file. Use it for the first of merged range
       selects.
     */
-    if (quick->init_ror_child_scan(true))
+    if (quick->init_ror_merged_scan(true))
       DBUG_RETURN(1);
     quick->file->extra(HA_EXTRA_KEYREAD_PRESERVE_FIELDS);
   }
   while((quick= quick_it++))
   {
-    if (quick->init_ror_child_scan(false))
+    if (quick->init_ror_merged_scan(false))
       DBUG_RETURN(1);
     quick->file->extra(HA_EXTRA_KEYREAD_PRESERVE_FIELDS);
-    /* Share the same record structure in intersect*/
+    /* All merged scans share the same record buffer in intersection. */
     quick->record= head->record[0];
   }
 
@@ -940,27 +960,44 @@ int QUICK_ROR_INTERSECT_SELECT::init_ror_child_scan(bool reuse_handler)
   DBUG_RETURN(0);
 }
 
+
+/* 
+  Initialize quick select for row retrieval.
+  SYNOPSIS
+    reset()
+  RETURN
+    0      OK
+    other  Error code
+*/
+
 int QUICK_ROR_INTERSECT_SELECT::reset()
 {
   int result;
   DBUG_ENTER("QUICK_ROR_INTERSECT_SELECT::reset");
-  result= init_ror_child_scan(true);
+  result= init_ror_merged_scan(true);
   DBUG_RETURN(result);
 }
 
+
+/*
+  Add a merged quick select to this ROR-intersection quick select.
+  
+  SYNOPSIS
+    QUICK_ROR_INTERSECT_SELECT::push_quick_back()
+      quick Quick select to be added. The quick select must return
+            rows in rowid order.
+  NOTES
+    This call can only be made before init() is called.
+    
+  RETURN
+    false OK    
+    true  Out of memory.
+*/
+
 bool 
 QUICK_ROR_INTERSECT_SELECT::push_quick_back(QUICK_RANGE_SELECT *quick)
 {
-  bool res;
-  if (head->file->primary_key_is_clustered() && 
-      quick->index == head->primary_key)
-  {
-    cpk_quick= quick;
-    res= 0;
-  }
-  else
-    res= quick_selects.push_back(quick);
-  return res;
+  return quick_selects.push_back(quick);
 }
 
 QUICK_ROR_INTERSECT_SELECT::~QUICK_ROR_INTERSECT_SELECT()
@@ -974,7 +1011,7 @@ QUICK_ROR_INTERSECT_SELECT::~QUICK_ROR_INTERSECT_SELECT()
 
 QUICK_ROR_UNION_SELECT::QUICK_ROR_UNION_SELECT(THD *thd_param,
                                                TABLE *table)
-  : thd(thd_param), reset_called(false)
+  : thd(thd_param)
 {
   index= MAX_KEY;
   head= table;
@@ -984,6 +1021,17 @@ QUICK_ROR_UNION_SELECT::QUICK_ROR_UNION_SELECT(THD *thd_param,
   my_pthread_setspecific_ptr(THR_MALLOC,&alloc);
 }
 
+
+/*
+  Do post-constructor initialization.
+  SYNOPSIS
+    QUICK_ROR_UNION_SELECT::init()
+  
+  RETURN
+    0      OK
+    other  Error code
+*/
+
 int QUICK_ROR_UNION_SELECT::init()
 {
   if (init_queue(&queue, quick_selects.elements, 0,
@@ -1000,8 +1048,11 @@ int QUICK_ROR_UNION_SELECT::init()
   return 0;
 }
 
+
 /*
-  Comparison function to be used by priority queue.
+  Comparison function to be used QUICK_ROR_UNION_SELECT::queue priority 
+  queue.
+
   SYNPOSIS
     QUICK_ROR_UNION_SELECT::queue_cmp()
       arg   Pointer to QUICK_ROR_UNION_SELECT
@@ -1010,11 +1061,22 @@ int QUICK_ROR_UNION_SELECT::init()
 */
 int QUICK_ROR_UNION_SELECT::queue_cmp(void *arg, byte *val1, byte *val2)
 {
-  QUICK_ROR_UNION_SELECT *self= (QUICK_ROR_UNION_SELECT*)arg;    
+  QUICK_ROR_UNION_SELECT *self= (QUICK_ROR_UNION_SELECT*)arg;
   return self->head->file->cmp_ref(((QUICK_SELECT_I*)val1)->last_rowid,
                                    ((QUICK_SELECT_I*)val2)->last_rowid);
 }
 
+
+/* 
+  Initialize quick select for row retrieval.
+  SYNOPSIS
+    reset()
+  
+  RETURN
+    0      OK
+    other  Error code
+*/
+
 int QUICK_ROR_UNION_SELECT::reset()
 {
   QUICK_SELECT_I* quick;
@@ -1028,7 +1090,7 @@ int QUICK_ROR_UNION_SELECT::reset()
   List_iterator_fast<QUICK_SELECT_I> it(quick_selects);
   while ((quick= it++))
   {
-    if (quick->init_ror_child_scan(false))
+    if (quick->init_ror_merged_scan(false))
       DBUG_RETURN(1);    
     if ((error= quick->get_next()))
     {
@@ -1245,13 +1307,33 @@ public:
   double read_cost; 
   ha_rows records; /* estimate of #rows to be examined */
 
+  /* 
+    If true, the scan returns rows in rowid order. This is used only for 
+    scans that can be both ROR and non-ROR.
+  */
   bool is_ror;
-  /* Create quck select for this plan */
+  
+  /*
+    Create quick select for this plan.
+    SYNOPSIS
+     make_quick()
+       param               Parameter from test_quick_select
+       retrieve_full_rows  If true, created quick select will do full record
+                           retrieval.
+       parent_alloc        Memory pool to use, if any.
+    
+    NOTES
+      retrieve_full_rows is ignored by some implementations.
+    
+    RETURN 
+      created quick select
+      NULL on any error.
+  */
   virtual QUICK_SELECT_I *make_quick(PARAM *param,
                                      bool retrieve_full_rows,
                                      MEM_ROOT *parent_alloc=NULL) = 0;
 
-  /* Table read plans are allocated on MEM_ROOT and must not be deleted */
+  /* Table read plans are allocated on MEM_ROOT and are never deleted */
   static void *operator new(size_t size, MEM_ROOT *mem_root)
   { return (void*) alloc_root(mem_root, (uint) size); }
   static void operator delete(void *ptr,size_t size) {}
@@ -1262,11 +1344,19 @@ class TRP_ROR_UNION;
 class TRP_INDEX_MERGE;
 
 
+/*
+  Plan for a QUICK_RANGE_SELECT scan. 
+  TRP_RANGE::make_quick ignores retrieve_full_rows parameter because
+  QUICK_RANGE_SELECT doesn't distinguish between 'index only' scans and full
+  record retrieval scans.
+*/
+
 class TRP_RANGE : public TABLE_READ_PLAN
 {
 public:
-  SEL_ARG *key;
-  uint     key_idx; /* key number in param->keys */
+  SEL_ARG *key; /* set of intervals to be used in "range" method retrieval */
+  uint     key_idx; /* key number in PARAM::key */
+
   TRP_RANGE(SEL_ARG *key_arg, uint idx_arg) 
    : key(key_arg), key_idx(idx_arg)
   {}
@@ -1276,7 +1366,6 @@ public:
   {
     DBUG_ENTER("TRP_RANGE::make_quick");
     QUICK_RANGE_SELECT *quick;
-    /* ignore retrieve_full_rows there as it is set/used elsewhere */
     if ((quick= get_quick_select(param, key_idx, key, parent_alloc)))
     {
       quick->records= records;
@@ -1286,45 +1375,68 @@ public:
   }
 };
 
+
+/* Plan for QUICK_ROR_INTERSECT_SELECT scan. */
+
 class TRP_ROR_INTERSECT : public TABLE_READ_PLAN
 {
 public:
   QUICK_SELECT_I *make_quick(PARAM *param, bool retrieve_full_rows, 
                              MEM_ROOT *parent_alloc);
+  
+  /* Array of pointers to ROR range scans used in this intersection */
   struct st_ror_scan_info **first_scan;
-  struct st_ror_scan_info **last_scan;
+  struct st_ror_scan_info **last_scan; /* End of the above array */
+  struct st_ror_scan_info *cpk_scan;  /* Clustered PK scan, if there is one */
   bool is_covering; /* true if no row retrieval phase is necessary */
-  double index_scan_costs;
+  double index_scan_costs; /* SUM(cost(index_scan)) */
 };
 
+
 /* 
-  ROR-union is currently never covering.
+  Plan for QUICK_ROR_UNION_SELECT scan.
+  QUICK_ROR_UNION_SELECT always retrieves full rows, so retrieve_full_rows
+  is ignored by make_quick. 
 */
+
 class TRP_ROR_UNION : public TABLE_READ_PLAN
 {
 public:
   QUICK_SELECT_I *make_quick(PARAM *param, bool retrieve_full_rows, 
                              MEM_ROOT *parent_alloc);
-  TABLE_READ_PLAN **first_ror;  
-  TABLE_READ_PLAN **last_ror;
-  
+  TABLE_READ_PLAN **first_ror; /* array of ptrs to plans for merged scans */
+  TABLE_READ_PLAN **last_ror;  /* end of the above array */
 };
 
+
+/*
+  Plan for QUICK_INDEX_MERGE_SELECT scan.
+  QUICK_ROR_INTERSECT_SELECT always retrieves full rows, so retrieve_full_rows
+  is ignored by make_quick. 
+*/
+
 class TRP_INDEX_MERGE : public TABLE_READ_PLAN
 {
 public:
   QUICK_SELECT_I *make_quick(PARAM *param, bool retrieve_full_rows, 
                              MEM_ROOT *parent_alloc);
-  TRP_RANGE **range_scans;
-  TRP_RANGE **range_scans_end;
+  TRP_RANGE **range_scans; /* array of ptrs to plans of merged scans */
+  TRP_RANGE **range_scans_end; /* end of the array */
 };
 
 
 /* 
-  Fill param->needed_fields with bitmap of fields used in the query
-  NOTE
-    Do not put clustered PK members into it as they are implicitly present in
-    all keys.
+  Fill param->needed_fields with bitmap of fields used in the query.
+  SYNOPSIS 
+    fill_used_fields_bitmap()
+      param Parameter from test_quick_select function.
+      
+  NOTES
+    Clustered PK members are not put into the bitmap as they are implicitly
+    present in all keys (and it is impossible to avoid reading them).
+  RETURN 
+    0  Ok 
+    1  Out of memory. 
 */
 
 static int fill_used_fields_bitmap(PARAM *param)
@@ -1348,6 +1460,7 @@ static int fill_used_fields_bitmap(PARAM *param)
   pk= param->table->primary_key;
   if (param->table->file->primary_key_is_clustered() && pk != MAX_KEY)
   {
+    /* The table uses clustered PK and it is not internally generated */
     KEY_PART_INFO *key_part= param->table->key_info[pk].key_part;
     KEY_PART_INFO *key_part_end= key_part + 
                                  param->table->key_info[pk].key_parts;
@@ -1361,28 +1474,39 @@ static int fill_used_fields_bitmap(PARAM *param)
 
 
 /*
-  Test if a key can be used in different ranges
+  Test if a key can be used in different ranges 
 
   SYNOPSIS
-   SQL_SELECT::test_quick_select(thd,keys_to_use, prev_tables,
-                                 limit, force_quick_range)
+    SQL_SELECT::test_quick_select()
+      thd               Current thread
+      keys_to_use       Keys to use for range retrieval
+      prev_tables       Tables assumed to be already read when the scan is
+                        performed (but not read at the moment of this call)
+      limit             Query limit 
+      force_quick_range Prefer to use range (instead of full table scan) even 
+                        if it is more expensive. 
+
+  NOTES
+    Updates the following in the select parameter:
+      needed_reg - Bits for keys with may be used if all prev regs are read
+      quick      - Parameter to use when reading records.
+   
+    In the table struct the following information is updated:
+      quick_keys - Which keys can be used
+      quick_rows - How many rows the key matches
 
-   Updates the following in the select parameter:
-     needed_reg - Bits for keys with may be used if all prev regs are read
-     quick      - Parameter to use when reading records.
+  TODO
+   Check if this function really needs to modify keys_to_use, and change the
+   code to pass it by reference if it doesn't.
+
+   In addition to force_quick_range other means can be (an usually are) used 
+   to make this function prefer range over full table scan. Figure out if
+   force_quick_range is really needed.
    
-   In the table struct the following information is updated:
-     quick_keys - Which keys can be used
-     quick_rows - How many rows the key matches
-
- RETURN VALUES
-  -1 if impossible select
-   0 if can't use quick_select
-   1 if found usable range
-
- TODO
-   check if the function really needs to modify keys_to_use, and change the
-   code to pass it by reference if not
+  RETURN
+   -1 if impossible select (i.e. certainly no rows will be selected)
+    0 if can't use quick_select
+    1 if found usable ranges and quick select has been successfully created.
 */
 
 int SQL_SELECT::test_quick_select(THD *thd, key_map keys_to_use,
@@ -1393,6 +1517,7 @@ int SQL_SELECT::test_quick_select(THD *thd, key_map keys_to_use,
   uint idx;
   double scan_time;
   DBUG_ENTER("test_quick_select");
+  printf("\nQUERY: %s\n", thd->query);
   DBUG_PRINT("enter",("keys_to_use: %lu  prev_tables: %lu  const_tables: %lu",
 		      keys_to_use.to_ulonglong(), (ulong) prev_tables,
 		      (ulong) const_tables));
@@ -1485,6 +1610,18 @@ int SQL_SELECT::test_quick_select(THD *thd, key_map keys_to_use,
     }
     param.key_parts_end=key_parts;
 
+    /* Calculate cost of full index read for the shortest covering index */
+    if (!head->used_keys.is_clear_all())
+    {           
+      int key_for_use= find_shortest_key(head, &head->used_keys);
+      double key_read_time= get_index_only_read_time(&param, records, 
+                                                     key_for_use);      
+      DBUG_PRINT("info",  ("'all'+'using index' scan will be using key %d, "
+                           "read time %g", key_for_use, key_read_time));
+      if (key_read_time < read_time)
+        read_time= key_read_time;
+    }
+          
     if ((tree=get_mm_tree(&param,cond)))
     {
       if (tree->type == SEL_TREE::IMPOSSIBLE)
@@ -1499,8 +1636,6 @@ int SQL_SELECT::test_quick_select(THD *thd, key_map keys_to_use,
         /* 
           It is possible to use a quick select (but maybe it would be slower
           than 'all' table scan).
-          Btw, tree type SEL_TREE::INDEX_MERGE was not introduced 
-          intentionally.
         */
         if (tree->merges.is_empty())
         {
@@ -1522,7 +1657,7 @@ int SQL_SELECT::test_quick_select(THD *thd, key_map keys_to_use,
           {
             /* 
               Get best non-covering ROR-intersection plan and prepare data for 
-              building covering ROR-intersection
+              building covering ROR-intersection.
             */
             if ((new_trp= get_best_ror_intersect(&param, tree, false, 
                                                  best_read_time, 
@@ -1550,25 +1685,6 @@ int SQL_SELECT::test_quick_select(THD *thd, key_map keys_to_use,
           LINT_INIT(new_conj_trp); /* no empty index_merge lists possible */
           DBUG_PRINT("info",("No range reads possible,"
                              " trying to construct index_merge"));
-          
-          /* 
-            Calculate cost of 'all'+index_only scan if it is possible. 
-            It is possible that table can be read in two ways:
-             a) 'all' + index_only
-             b) index_merge without index_only.
-          */
-          if (!head->used_keys.is_clear_all())
-          {           
-            int key_for_use= find_shortest_key(head, &head->used_keys);
-            ha_rows total_table_records= (0 == head->file->records)? 1 : 
-                                          head->file->records;
-            read_time = get_index_only_read_time(&param, total_table_records,
-                                                 key_for_use);
-            DBUG_PRINT("info", 
-                       ("'all'+'using index' scan will be using key %d, "
-                        "read time %g", key_for_use, read_time));
-          }
-
           List_iterator_fast<SEL_IMERGE> it(tree->merges);
           while ((imerge= it++))
           {
@@ -1579,8 +1695,9 @@ int SQL_SELECT::test_quick_select(THD *thd, key_map keys_to_use,
           }
           best_trp= best_conj_trp;
         }
+        my_pthread_setspecific_ptr(THR_MALLOC, old_root);
 
-        my_pthread_setspecific_ptr(THR_MALLOC, old_root);        
+        /* If we got a read plan, create a quick select from it. */
         if (best_trp)
         {
           records= best_trp->records;
@@ -1589,7 +1706,7 @@ int SQL_SELECT::test_quick_select(THD *thd, key_map keys_to_use,
             delete quick;
             quick= NULL;
           }
-        }        
+        } 
       }
     }
     my_pthread_setspecific_ptr(THR_MALLOC, old_root);
@@ -1608,19 +1725,22 @@ int SQL_SELECT::test_quick_select(THD *thd, key_map keys_to_use,
 
 
 /* 
-  Get cost of 'sweep' full row retrieveal of #records rows.
+  Get cost of 'sweep' full records retrieval.
+  SYNOPSIS
+    get_sweep_read_cost()
+      param            Parameter from test_quick_select
+      records          # of records to be retrieved 
   RETURN
-    0 - ok 
-    1 - sweep is more expensive then full table scan.
+    cost of sweep  
 */
 
-bool get_sweep_read_cost(const PARAM *param, ha_rows records, double* cost,
-                          double index_reads_cost, double max_cost)
+double get_sweep_read_cost(const PARAM *param, ha_rows records)
 {
+  double result;
   if (param->table->file->primary_key_is_clustered())
   {
-    *cost= param->table->file->read_time(param->table->primary_key, 
-                                         records, records);
+    result= param->table->file->read_time(param->table->primary_key,
+                                          records, records);
   }
   else
   {    
@@ -1630,8 +1750,8 @@ bool get_sweep_read_cost(const PARAM *param, ha_rows records, double* cost,
       n_blocks * (1.0 - pow(1.0 - 1.0/n_blocks, rows2double(records)));
     if (busy_blocks < 1.0)
       busy_blocks= 1.0;
-    DBUG_PRINT("info",("sweep: nblocks= %g, busy_blocks=%g index_blocks=%g",
-                       n_blocks, busy_blocks, index_reads_cost));
+    DBUG_PRINT("info",("sweep: nblocks=%g, busy_blocks=%g", n_blocks, 
+                       busy_blocks));
     /*
       Disabled: Bail out if # of blocks to read is bigger than # of blocks in 
       table data file.
@@ -1642,7 +1762,7 @@ bool get_sweep_read_cost(const PARAM *param, ha_rows records, double* cost,
     if (!join || join->tables == 1)
     {
       /* No join, assume reading is done in one 'sweep' */
-      *cost= busy_blocks*(DISK_SEEK_BASE_COST + 
+      result= busy_blocks*(DISK_SEEK_BASE_COST + 
                           DISK_SEEK_PROP_COST*n_blocks/busy_blocks);
     }
     else
@@ -1651,11 +1771,11 @@ bool get_sweep_read_cost(const PARAM *param, ha_rows records, double* cost,
         Possibly this is a join with source table being non-last table, so
         assume that disk seeks are random here.
       */
-      *cost = busy_blocks;
+      result= busy_blocks;
     }
   }
-  DBUG_PRINT("info",("returning cost=%g", *cost));
-  return 0;
+  DBUG_PRINT("info",("returning cost=%g", result));
+  return result;
 }
 
 
@@ -1663,15 +1783,11 @@ bool get_sweep_read_cost(const PARAM *param, ha_rows records, double* cost,
   Get best plan for a SEL_IMERGE disjunctive expression.
   SYNOPSIS
     get_best_disjunct_quick()
-      param
-      imerge
+      param     Parameter from check_quick_select function
+      imerge    Expression to use
       read_time Don't create scans with cost > read_time
-  RETURN 
-    read plan
-    NULL - OOM or no read scan could be built.
   
   NOTES
-
     index_merge cost is calculated as follows:
     index_merge_cost = 
       cost(index_reads) +         (see #1)
@@ -1723,11 +1839,14 @@ bool get_sweep_read_cost(const PARAM *param, ha_rows records, double* cost,
   ROR-union cost is calculated in the same way index_merge, but instead of 
   Unique a priority queue is used. 
      
+  RETURN 
+    Created read plan
+    NULL - Out of memory or no read scan could be built.
 */
 
 static
 TABLE_READ_PLAN *get_best_disjunct_quick(PARAM *param, SEL_IMERGE *imerge,
-                                           double read_time)
+                                         double read_time)
 {
   SEL_TREE **ptree;
   TRP_INDEX_MERGE *imerge_trp= NULL;
@@ -1823,12 +1942,11 @@ TABLE_READ_PLAN *get_best_disjunct_quick(PARAM *param, SEL_IMERGE *imerge,
   }
 
   /* Calculate cost(rowid_to_row_scan) */
-  if (get_sweep_read_cost(param, non_cpk_scan_records, &sweep_cost, 
-                           blocks_in_index_read, read_time))
-    goto build_ror_index_merge;
-  imerge_cost += sweep_cost;
+  imerge_cost += get_sweep_read_cost(param, non_cpk_scan_records);
   DBUG_PRINT("info",("index_merge cost with rowid-to-row scan: %g", 
                      imerge_cost));
+  if (imerge_cost > read_time)
+    goto build_ror_index_merge;
 
   /* Add Unique operations cost */
   unique_calc_buff_size= 
@@ -1923,7 +2041,7 @@ skip_to_ror_scan:
   /* 
     rows to retrieve= 
       SUM(rows_in_scan_i) - table_rows * PROD(rows_in_scan_i / table_rows).
-    This is valid because index_merge constructuion guarantees that conditions
+    This is valid because index_merge construction guarantees that conditions
     in disjunction do not share key parts.
   */
   roru_total_records -= (ha_rows)(roru_intersect_part*
@@ -1937,14 +2055,12 @@ skip_to_ror_scan:
     See get_merge_buffers_cost function for queue_use_cost formula derivation.
   */
   
-  if (get_sweep_read_cost(param, roru_total_records, &sweep_cost, 
-                           roru_index_costs, read_time))
-    DBUG_RETURN(NULL);
   double roru_total_cost;
   roru_total_cost= roru_index_costs +  
                    rows2double(roru_total_records)*log(n_child_scans) / 
-                   (TIME_FOR_COMPARE_ROWID * M_LN2) +
-                   sweep_cost;  
+                   (TIME_FOR_COMPARE_ROWID * M_LN2) + 
+                   get_sweep_read_cost(param, roru_total_records);
+
   DBUG_PRINT("info", ("ROR-union: cost %g, %d members", roru_total_cost, 
                       n_child_scans));
   TRP_ROR_UNION* roru;
@@ -1965,10 +2081,9 @@ skip_to_ror_scan:
 
 /*
   Calculate cost of 'index only' scan for given index and number of records.
-    (We can resolve this by only reading through this key.)
 
   SYNOPSIS
-    get_whole_index_read_time()
+    get_index_only_read_time()
       param    parameters structure
       records  #of records to read
       keynr    key to read
@@ -1992,35 +2107,40 @@ inline double get_index_only_read_time(const PARAM* param, ha_rows records,
 
 typedef struct st_ror_scan_info
 {
-  uint      idx;   /* # of used key in param->keys */
-  uint      keynr; /* # of used key in table */
-  ha_rows   records;
-  SEL_ARG   *sel_arg;
-  /* Fields used in the query and covered by this ROR scan */
+  uint      idx;      /* # of used key in param->keys */
+  uint      keynr;    /* # of used key in table */
+  ha_rows   records;  /* estimate of # records this scan will return */
+
+  /* Set of intervals over key fields that will be used for row retrieval. */
+  SEL_ARG   *sel_arg; 
+
+  /* Fields used in the query and covered by this ROR scan. */
   MY_BITMAP covered_fields;  
-  uint      used_fields_covered;
-  int       key_rec_length; /* length of key record with rowid */
+  uint      used_fields_covered; /* # of set bits in covered_fields */ 
+  int       key_rec_length; /* length of key record (including rowid) */
 
   /*
-    Array of 
-     #rows(keypart_1=c1 AND ... AND key_part_i=c_i) / 
-     #rows(keypart_1=c1 AND ... AND key_part_{i+1}=c_{i+1}) values
-  */
-  double    *key_part_rows;
-  double    index_read_cost;
-  uint      first_uncovered_field;
-  uint      key_components;
-}ROR_SCAN_INFO;
+    Cost of reading all index records with values in sel_arg intervals set
+    (assuming there is no need to access full table records)
+  */ 
+  double    index_read_cost; 
+  uint      first_uncovered_field; /* first unused bit in covered_fields */
+  uint      key_components; /* # of parts in the key */
+} ROR_SCAN_INFO;
 
 
 /*
-  Create ROR_SCAN_INFO* structure for condition sel_arg on key idx.
+  Create ROR_SCAN_INFO* structure with a single ROR scan on index idx using 
+  sel_arg set of intervals.
+  
   SYNOPSIS
     make_ror_scan()
-      param
-      idx   index of key in param->keys
+      param    Parameter from test_quick_select function
+      idx      Index of key in param->keys
+      sel_arg  Set of intervals for a given key
   RETURN
-    NULL - OOM.
+    NULL - out of memory 
+    ROR scan structure containing a scan for {idx, sel_arg}
 */
 
 static
@@ -2049,10 +2169,6 @@ ROR_SCAN_INFO *make_ror_scan(const PARAM *param, int idx, SEL_ARG *sel_arg)
                   param->fields_bitmap_size*8, false))
     DBUG_RETURN(NULL);
   bitmap_clear_all(&ror_scan->covered_fields);
-  if (!(ror_scan->key_part_rows= 
-        (double*)alloc_root(param->mem_root, sizeof(double)*
-                            param->table->key_info[keynr].key_parts)))
-    DBUG_RETURN(NULL);
   
   KEY_PART_INFO *key_part= param->table->key_info[keynr].key_part;
   KEY_PART_INFO *key_part_end= key_part + 
@@ -2069,50 +2185,23 @@ ROR_SCAN_INFO *make_ror_scan(const PARAM *param, int idx, SEL_ARG *sel_arg)
   ror_scan->index_read_cost= 
     get_index_only_read_time(param, param->table->quick_rows[ror_scan->keynr],
                              ror_scan->keynr);
-  /* 
-    Calculate # rows estimates for 
-      (key_part1=c1)
-      (key_part1=c1 AND key_part2=c2) 
-      ...and so on
-  */
-  char key_val[MAX_KEY_LENGTH+MAX_FIELD_WIDTH]; /* values of current key */
-  char *key_ptr= key_val;
-  ha_rows records;
-  ha_rows prev_records= param->table->file->records;
-  double *rows_diff= ror_scan->key_part_rows;
-  key_part= param->table->key_info[keynr].key_part;
-  SEL_ARG *arg= ror_scan->sel_arg;
-  /*
-    We have #rows estimate already for first key part, so do first loop 
-    iteration separately:
-  */
-  arg->store_min(key_part->length, &key_ptr, 0);
-  prev_records= param->table->quick_rows[ror_scan->keynr]; 
-  *(rows_diff++)= rows2double(prev_records) / param->table->file->records;
-  arg=arg->next_key_part; 
-  for(; arg; ++key_part, arg= arg->next_key_part)
-  {    
-    arg->store_min(key_part->length, &key_ptr, 0);
-    records= 
-      param->table->file->records_in_range(ror_scan->keynr,
-                                           (byte*)key_val, key_ptr - key_val,
-                                           HA_READ_KEY_EXACT,
-                                           (byte*)key_val, key_ptr - key_val,
-                                           HA_READ_AFTER_KEY);
-    if (records == HA_POS_ERROR)
-      return NULL; /* shouldn't happen actually */
-    *(rows_diff++)= rows2double(records) / rows2double(prev_records);
-    prev_records= records;
-  }
   DBUG_RETURN(ror_scan);
 }
 
 
-/*
-  Order ROR_SCAN_INFO** by 
-    E(#records_matched) * key_record_length
+/* 
+  Compare two ROR_SCAN_INFO** by  E(#records_matched) * key_record_length.
+  SYNOPSIS
+    cmp_ror_scan_info()
+      a ptr to first compared value
+      b ptr to second compared value
+
+  RETURN
+   -1 a < b  
+    0 a = b
+    1 a > b
 */
-int cmp_ror_scan_info(ROR_SCAN_INFO** a, ROR_SCAN_INFO** b)
+static int cmp_ror_scan_info(ROR_SCAN_INFO** a, ROR_SCAN_INFO** b)
 {
   double val1= rows2double((*a)->records) * (*a)->key_rec_length;
   double val2= rows2double((*b)->records) * (*b)->key_rec_length;
@@ -2120,12 +2209,22 @@ int cmp_ror_scan_info(ROR_SCAN_INFO** a, ROR_SCAN_INFO** b)
 }
 
 /*
-  Order ROR_SCAN_INFO** by 
+  Compare two ROR_SCAN_INFO** by 
    (#covered fields in F desc, 
     #components asc,           
     number of first not covered component asc)
+
+  SYNOPSIS
+    cmp_ror_scan_info_covering()
+      a ptr to first compared value
+      b ptr to second compared value
+
+  RETURN
+   -1 a < b  
+    0 a = b
+    1 a > b
 */
-int cmp_ror_scan_info_covering(ROR_SCAN_INFO** a, ROR_SCAN_INFO** b)
+static int cmp_ror_scan_info_covering(ROR_SCAN_INFO** a, ROR_SCAN_INFO** b)
 {
   if ((*a)->used_fields_covered > (*b)->used_fields_covered)
     return -1;
@@ -2160,10 +2259,37 @@ typedef struct
   */
   double records_fract;
   ha_rows index_records; /* sum(#records to look in indexes) */
-}ROR_INTERSECT_INFO;
+} ROR_INTERSECT_INFO;
 
 
-/* Allocate a ROR_INTERSECT and initialize it to contain zero scans */
+/*
+  Re-initialize an allocated intersect info to contain zero scans.
+  SYNOPSIS
+    info Intersection info structure to re-initialize.
+*/
+
+static void ror_intersect_reinit(ROR_INTERSECT_INFO *info)
+{
+  info->is_covering= false;
+  info->index_scan_costs= 0.0f;
+  info->records_fract= 1.0f;
+  bitmap_clear_all(&info->covered_fields);
+}
+
+/*
+  Allocate a ROR_INTERSECT_INFO and initialize it to contain zero scans.
+
+  SYNOPSIS
+    ror_intersect_init() 
+      param         Parameter from test_quick_select 
+      is_index_only If true, set ROR_INTERSECT_INFO to be covering
+  
+  RETURN
+    allocated structure
+    NULL on error
+*/
+
+static
 ROR_INTERSECT_INFO* ror_intersect_init(const PARAM *param, bool is_index_only)
 {
   ROR_INTERSECT_INFO *info;
@@ -2172,42 +2298,46 @@ ROR_INTERSECT_INFO* ror_intersect_init(const PARAM *param, bool is_index_only)
                                               sizeof(ROR_INTERSECT_INFO))))
     return NULL;
   info->param= param;
-  info->is_covering= is_index_only;
-  info->index_scan_costs= 0.0f;
-  info->records_fract= 1.0f;
-
   if (!(buf= (uchar*)alloc_root(param->mem_root, param->fields_bitmap_size)))
     return NULL;
   if (bitmap_init(&info->covered_fields, buf, param->fields_bitmap_size*8,
                   false))
     return NULL;
-  bitmap_clear_all(&info->covered_fields);
+  ror_intersect_reinit(info);
   return info;
 }
 
+
 /*
-  Check if it makes sense to add a ROR scan to ROR-intersection, and if yes 
-  update parameters of ROR-intersection, including its cost.
+  Check if adding a ROR scan to a ROR-intersection reduces its cost of 
+  ROR-intersection and if yes, update parameters of ROR-intersection,
+  including its cost.
 
-  RETURN
-    true   ROR scan added to ROR-intersection, cost updated.
-    false  It doesn't make sense to add this ROR scan to this ROR-intersection.
+  SYNOPSIS
+    ror_intersect_add()
+      param        Parameter from test_quick_select 
+      info         ROR-intersection structure to add the scan to.
+      ror_scan     ROR scan info to add.
+      is_cpk_scan  If true, add the scan as CPK scan (this can be inferred
+                   from other parameters and is passed separately only to
+                   avoid duplicating the inference code)
   
-  NOTE 
-    Adding a ROR scan to ROR-intersect "makes sense" iff selectivt
+  NOTES
+    Adding a ROR scan to ROR-intersect "makes sense" iff the cost of ROR-
+    intersection decreases. The cost of ROR-intersection is caclulated as
+    follows:
 
-    Cost of ROR-intersection is calulated as follows:
-     cost= SUM_i(key_scan_cost_i) + cost_of_full_rows_retrieval
+    cost= SUM_i(key_scan_cost_i) + cost_of_full_rows_retrieval
  
     if (union of indexes used covers all needed fields)
       cost_of_full_rows_retrieval= 0;
     else
     {
       cost_of_full_rows_retrieval= 
-        cost_of_sweep_read(E(rows_to_retrive), rows_in_table);
+        cost_of_sweep_read(E(rows_to_retrieve), rows_in_table);
     }
 
-    E(rows_to_retrive) is calulated as follows: 
+    E(rows_to_retrieve) is caclulated as follows: 
     Suppose we have a condition on several keys
     cond=k_11=c_11 AND k_12=c_12 AND ...  // parts of first key 
          k_21=c_21 AND k_22=c_22 AND ...  // parts of second key 
@@ -2233,13 +2363,13 @@ ROR_INTERSECT_INFO* ror_intersect_init(const PARAM *param, bool is_index_only)
     save R to be cond and proceed to recursion step.
 
     Recursion step:
-    We have set of fixed fields/satisfied conditions) F, probability P(F),
+    We have a set of fixed fields/satisfied conditions) F, probability P(F),
     and remaining conjunction R
     Pick next key part on current key and its condition "k_ij=c_ij".
     We will add "k_ij=c_ij" into F and update P(F).
-    Lets denote k_ij as t,  R = t AND R1, where i1 may still contain t. Then
+    Lets denote k_ij as t,  R = t AND R1, where R1 may still contain t. Then
 
-     P((t AND R1)|F) = P(t|F) * P(R1|t|F) = P(t|F) * P(R|(t AND F)) (2)
+     P((t AND R1)|F) = P(t|F) * P(R1|t|F) = P(t|F) * P(R1|(t AND F)) (2)
 
     (where '|' mean conditional probability, not "or")
 
@@ -2253,72 +2383,153 @@ ROR_INTERSECT_INFO* ror_intersect_init(const PARAM *param, bool is_index_only)
      P(t|F) = P(t|(fields_before_t_in_key AND other_fields)) = 
           = P(t|fields_before_t_in_key).
 
-     P(t|fields_before_t_in_key)= #distinct_values(fields_before_t_in_key) / 
-                                  #distinct_values(fields_before_t_in_key, t)
+     P(t|fields_before_t_in_key) = #records(fields_before_t_in_key) /
+                                   #records(fields_before_t_in_key, t)
     
-    The second multiplier is calculated by applying this step recusively. 
+    The second multiplier is calculated by applying this step recursively. 
 
-    This function applies recursion steps for all fixed key members of 
-    one key, accumulating sets of covered fields and 
-    The very first step described is done as recursion step with 
-    P(fixed fields)=1 and empty set of fixed fields.
+  IMPLEMENTATION
+    This function calculates the result of application of the "recursion step"
+    described above for all fixed key members of a single key, accumulating set
+    of covered fields, selectivity, etc.
+
+    The calculation is conducted as follows: 
+    Lets denote #records(keypart1, ... keypartK) as n_k. We need to calculate
+    
+     n_{k1}      n_{k_2}
+    --------- * ---------  * .... (3)
+     n_{k1-1}    n_{k2_1} 
+
+    where k1,k2,... are key parts which fields were not yet marked as fixed 
+    ( this is result of application of option b) of the recursion step for 
+      parts of a single key).  
+    Since it is reasonable to expect that most of the fields are not marked 
+    as fixed, we calcualate (3) as 
+
+                                  n_{i1}      n_{i_2}
+    (3) = n_{max_key_part}  / (   --------- * ---------  * ....  )
+                                  n_{i1-1}    n_{i2_1} 
+      
+    where i1,i2, .. are key parts that were already marked as fixed. 
+    
+    In order to minimize number of expensive records_in_range calls we group
+    and reduce adjacent fractions.
+ 
+  RETURN
+    true   ROR scan added to ROR-intersection, cost updated.
+    false  It doesn't make sense to add this ROR scan to this ROR-intersection.
 */
 
-bool ror_intersect_add(ROR_INTERSECT_INFO *info, ROR_SCAN_INFO* ror_scan)
+bool ror_intersect_add(const PARAM *param, ROR_INTERSECT_INFO *info,
+                       ROR_SCAN_INFO* ror_scan, bool is_cpk_scan=false)
 {
   int i;
   SEL_ARG *sel_arg;
   KEY_PART_INFO *key_part= 
     info->param->table->key_info[ror_scan->keynr].key_part;
   double selectivity_mult= 1.0;
+  char key_val[MAX_KEY_LENGTH+MAX_FIELD_WIDTH]; /* key values tuple */
+
   DBUG_ENTER("ror_intersect_add");  
   DBUG_PRINT("info", ("Current selectivity= %g", info->records_fract)); 
   DBUG_PRINT("info", ("Adding scan on %s", 
                       info->param->table->key_info[ror_scan->keynr].name));
+  SEL_ARG *tuple_arg= NULL;
+  char *key_ptr= key_val;
+  bool cur_covered, prev_covered=
+     bitmap_is_set(&info->covered_fields, key_part->fieldnr);
+
+  ha_rows prev_records= param->table->file->records;
+
   for(i= 0, sel_arg= ror_scan->sel_arg; sel_arg; 
       i++, sel_arg= sel_arg->next_key_part)
   {
-    if (!bitmap_is_set(&info->covered_fields, (key_part + i)->fieldnr))
+    cur_covered= bitmap_is_set(&info->covered_fields, (key_part + i)->fieldnr);
+    if (cur_covered != prev_covered)
     {
-      /*
-        P(t|F) = P(t|(fields_before_t_in_key AND other_fields)) =
-               = P(t|fields_before_t_in_key).
-      */
-      selectivity_mult *= ror_scan->key_part_rows[i];
+      /* create (part1val, ..., part{n-1}val) tuple. */
+      {
+        if (!tuple_arg)
+        {
+          tuple_arg= ror_scan->sel_arg;
+          tuple_arg->store_min(key_part->length, &key_ptr, 0); 
+        }
+        while (tuple_arg->next_key_part != sel_arg)
+        {
+          tuple_arg= tuple_arg->next_key_part; 
+          tuple_arg->store_min(key_part->length, &key_ptr, 0); 
+        }
+      } 
+      ha_rows records;
+      records= param->table->file->
+                 records_in_range(ror_scan->keynr,
+                                  (byte*)key_val, key_ptr - key_val, 
+                                  HA_READ_KEY_EXACT,
+                                  (byte*)key_val, key_ptr - key_val,
+                                  HA_READ_AFTER_KEY);
+      if (cur_covered)
+      {
+        /* uncovered -> covered */
+        double tmp= rows2double(records)/rows2double(prev_records);
+        printf("  Selectivity multiplier: %g\n", tmp);
+        DBUG_PRINT("info", ("Selectivity multiplier: %g", tmp));
+        selectivity_mult *= tmp;
+        prev_records= HA_POS_ERROR;
+      }
+      else
+      {
+        /* covered -> uncovered */
+        prev_records= records; 
+      }
     }
+    else printf("no change, skipping\n");
+    prev_covered= cur_covered;
+  }
+  if (!prev_covered)
+  {
+    double tmp= rows2double(param->table->quick_rows[ror_scan->keynr]) / 
+                rows2double(prev_records);
+    DBUG_PRINT("info", ("Selectivity multiplier: %g", tmp));
+    selectivity_mult *= tmp; 
   }
+
   if (selectivity_mult == 1.0)
   {
     /* Don't add this scan if it doesn't improve selectivity. */
-    DBUG_PRINT("info", ("This scan doesn't improve selectivity.")); 
+    DBUG_PRINT("info", ("The scan doesn't improve selectivity."));
     DBUG_RETURN(false);
   }
-  info->records_fract *= selectivity_mult;
-
-  bitmap_union(&info->covered_fields, &ror_scan->covered_fields);
-
-  ha_rows scan_records= info->param->table->quick_rows[ror_scan->keynr];
-  info->index_scan_costs += ror_scan->index_read_cost;
 
+  info->records_fract *= selectivity_mult;
+  ha_rows cur_scan_records= info->param->table->quick_rows[ror_scan->keynr];  
+  if (is_cpk_scan)
+  {    
+    info->index_scan_costs += rows2double(cur_scan_records)*
+                              TIME_FOR_COMPARE_ROWID;
+  }
+  else
+  {
+    info->index_records += cur_scan_records;
+    info->index_scan_costs += ror_scan->index_read_cost;
+    bitmap_union(&info->covered_fields, &ror_scan->covered_fields);
+  }
+  
   if (!info->is_covering && bitmap_is_subset(&info->param->needed_fields,
                                              &info->covered_fields))
   {
     DBUG_PRINT("info", ("ROR-intersect is covering now"));
-    /* ROR-intersect became covering */ 
     info->is_covering= true;
   }
   
-  info->index_records += scan_records;
+  printf("  records: %g\n", rows2double(param->table->file->records) *
+        info->records_fract);
   info->total_cost= info->index_scan_costs;
   if (!info->is_covering)
   {
     ha_rows table_recs= info->param->table->file->records;
-    double sweep_cost;
-    get_sweep_read_cost(info->param, 
-                         (ha_rows)(table_recs*info->records_fract), 
-                         &sweep_cost, info->index_scan_costs, DBL_MAX);
-
-    info->total_cost += sweep_cost;
+    info->total_cost += 
+      get_sweep_read_cost(info->param, 
+                          (ha_rows)(info->records_fract*table_recs));
   }
   DBUG_PRINT("info", ("New selectivity= %g", info->records_fract)); 
   DBUG_PRINT("info", ("New cost= %g, %scovering", info->total_cost, 
@@ -2326,24 +2537,23 @@ bool ror_intersect_add(ROR_INTERSECT_INFO *info, ROR_SCAN_INFO* ror_scan)
   DBUG_RETURN(true);
 }
 
+
 /* 
   Get best ROR-intersection plan using non-covering ROR-intersection search 
   algorithm. The returned plan may be covering.
 
   SYNOPSIS
     get_best_ror_intersect()
-      param
-      tree
+      param            Parameter from test_quick_select function.
+      tree             Transformed restriction condition to be used to look
+                       for ROR scans.
       force_index_only If true, don't calculate costs of full rows retrieval.
       read_time        Do not return read plans with cost > read_time.
       are_all_covering [out] set to true if union of all scans covers all 
                        fields needed by the query (and it is possible to build
                        a covering ROR-intersection)
-  RETURN
-    ROR-intersection table read plan 
-    NULL if OOM or no plan found.
 
-  NOTE
+  NOTES
     get_key_scans_params must be called before for the same SEL_TREE before 
     this function can be called.
     
@@ -2383,6 +2593,10 @@ bool ror_intersect_add(ROR_INTERSECT_INFO *info, ROR_SCAN_INFO* ror_scan)
     Clustered PK scan has special handling in ROR-intersection: it is not used
     to retrieve rows, instead its condition is used to filter row references 
     we get from scans on other keys.
+
+  RETURN
+    ROR-intersection table read plan 
+    NULL if out of memory or no suitable plan found.
 */
 
 static
@@ -2398,20 +2612,34 @@ TRP_ROR_INTERSECT *get_best_ror_intersect(const PARAM *param, SEL_TREE *tree,
   if (tree->n_ror_scans < 2)
     DBUG_RETURN(NULL);
 
-  /* Collect ROR-able SEL_ARGs and create ROR_SCAN_INFO for each of them */
+  /*
+    Collect ROR-able SEL_ARGs and create ROR_SCAN_INFO for each of them.
+    Also find and save clustered PK scan if there is one.
+  */
   ROR_SCAN_INFO **cur_ror_scan;
+  ROR_SCAN_INFO *cpk_scan= NULL;
+  bool cpk_scan_used= false;
   if (!(tree->ror_scans= (ROR_SCAN_INFO**)alloc_root(param->mem_root,
                                                      sizeof(ROR_SCAN_INFO*)*
                                                      param->keys)))
     return NULL;
-  
+  uint cpk_no= (param->table->file->primary_key_is_clustered())?
+               param->table->primary_key : MAX_KEY;
+ 
   for (idx= 0, cur_ror_scan= tree->ror_scans; idx < param->keys; idx++)
   {
+    ROR_SCAN_INFO *scan;
     if (!tree->ror_scans_map.is_set(idx))
       continue;
-    if (!(*cur_ror_scan= make_ror_scan(param, idx, tree->keys[idx])))
+    if (!(scan= make_ror_scan(param, idx, tree->keys[idx])))
       return NULL;
-    cur_ror_scan++;
+    if (param->real_keynr[idx] == cpk_no)
+    {
+      cpk_scan= scan;
+      tree->n_ror_scans--;
+    }
+    else
+      *(cur_ror_scan++)= scan;
   }
 
   tree->ror_scans_end= cur_ror_scan;
@@ -2421,7 +2649,7 @@ TRP_ROR_INTERSECT *get_best_ror_intersect(const PARAM *param, SEL_TREE *tree,
   /*
     Ok, [ror_scans, ror_scans_end) is array of ptrs to initialized 
     ROR_SCAN_INFOs.
-    Get a minimal key scan using an approximate algorithm.
+    Now, get a minimal key scan using an approximate algorithm.
   */
   qsort(tree->ror_scans, tree->n_ror_scans, sizeof(ROR_SCAN_INFO*),
         (qsort_cmp)cmp_ror_scan_info);
@@ -2452,12 +2680,14 @@ TRP_ROR_INTERSECT *get_best_ror_intersect(const PARAM *param, SEL_TREE *tree,
   LINT_INIT(best_index_scan_costs);
 
   cur_ror_scan= tree->ror_scans;
+  /* Start with one scan */
+  intersect_scans_best= intersect_scans;
   while (cur_ror_scan != tree->ror_scans_end && !intersect->is_covering)
   {
     /* S= S + first(R); */
-    if (ror_intersect_add(intersect, *cur_ror_scan))
+    if (ror_intersect_add(param, intersect, *cur_ror_scan))
       *(intersect_scans_end++)= *cur_ror_scan; 
-    /* R= R-first(R); */
+    /* R= R - first(R); */
     cur_ror_scan++;
     
     if (intersect->total_cost < min_cost)
@@ -2471,17 +2701,47 @@ TRP_ROR_INTERSECT *get_best_ror_intersect(const PARAM *param, SEL_TREE *tree,
       best_index_scan_costs= intersect->index_scan_costs;
     }
   }
-
-  /* Ok, return ROR-intersect plan if we have found one */
+  
+  DBUG_EXECUTE("info",print_ror_scans_arr(param->table,
+                                          "best ROR-intersection",
+                                          intersect_scans,
+                                          intersect_scans_best););
+  
   *are_all_covering= intersect->is_covering;
   uint best_num= intersect_scans_best - intersect_scans;  
+  /*
+    Ok, found the best ROR-intersection of non-CPK key scans.
+    Check if we should add a CPK scan.
+    
+    If the obtained ROR-intersection is covering, it doesn't make sense 
+    to add CPK scan - Clustered PK contains all fields and if we're doing
+    CPK scan doing other CPK scans will only add more overhead.
+  */
+  if (cpk_scan && !intersect->is_covering)
+  {
+    /*
+      Handle the special case: ROR-intersect(PRIMARY, key1) is 
+      the best, but cost(range(key1)) > cost(best_non_ror_range_scan)
+    */
+    if (best_num == 0)
+    { 
+      ror_intersect_reinit(intersect);
+      if (!ror_intersect_add(param, intersect, cpk_scan))
+        DBUG_RETURN(NULL); /* shouldn't happen actually actually */
+      *(intersect_scans_end++)= *cur_ror_scan;
+      best_num++;
+    }
+
+    if (ror_intersect_add(param, intersect, cpk_scan))
+      cpk_scan_used= true;
+    best_rows= (ha_rows)(intersect->records_fract* 
+                         rows2double(param->table->file->records));
+  }
+
+  /* Ok, return ROR-intersect plan if we have found one */
   TRP_ROR_INTERSECT *trp= NULL;
-  if (intersect_scans_best && best_num > 1)
+  if (best_num > 1 || cpk_scan_used)
   {
-    DBUG_EXECUTE("info",print_ror_scans_arr(param->table,
-                                            "used for ROR-intersect",
-                                            intersect_scans,
-                                            intersect_scans_best););
     if (!(trp= new (param->mem_root) TRP_ROR_INTERSECT))
       DBUG_RETURN(trp);
     if (!(trp->first_scan= 
@@ -2494,7 +2754,8 @@ TRP_ROR_INTERSECT *get_best_ror_intersect(const PARAM *param, SEL_TREE *tree,
     trp->read_cost= min_cost;
     trp->records= best_rows? best_rows : 1;
     trp->index_scan_costs= best_index_scan_costs;
-  }  
+    trp->cpk_scan= cpk_scan;
+  } 
   DBUG_RETURN(trp);
 }
 
@@ -2503,15 +2764,15 @@ TRP_ROR_INTERSECT *get_best_ror_intersect(const PARAM *param, SEL_TREE *tree,
   Get best covering ROR-intersection.
   SYNOPSIS
     get_best_covering_ror_intersect()
-      param
-      tree      SEL_TREE
-      read_time Dont return table read plans with cost > read_time.
+      param     Parameter from test_quick_select function.
+      tree      SEL_TREE with sets of intervals for different keys.
+      read_time Don't return table read plans with cost > read_time.
 
   RETURN 
     Best covering ROR-intersection plan 
     NULL if no plan found.
 
-  NOTE
+  NOTES
     get_best_ror_intersect must be called for a tree before calling this
     function for it. 
     This function invalidates tree->ror_scans member values.
@@ -2563,7 +2824,6 @@ TRP_ROR_INTERSECT *get_best_covering_ror_intersect(PARAM *param,
   ha_rows records=0;
   bool all_covered;  
   
-  /* Start will all scans and remove one by one until */
   DBUG_PRINT("info", ("Building covering ROR-intersection"));
   DBUG_EXECUTE("info", print_ror_scans_arr(param->table,
                                            "building covering ROR-I",
@@ -2805,12 +3065,23 @@ QUICK_SELECT_I *TRP_ROR_INTERSECT::make_quick(PARAM *param,
         DBUG_RETURN(NULL);
       }
     }
+    if (cpk_scan)
+    {
+      if (!(quick= get_quick_select(param, cpk_scan->idx,
+                                    cpk_scan->sel_arg, alloc)))
+      {
+        delete quick_intrsect;
+        DBUG_RETURN(NULL);
+      }
+      quick_intrsect->cpk_quick= quick;
+    }
     quick_intrsect->records= records; 
     quick_intrsect->read_time= read_cost;
   }
   DBUG_RETURN(quick_intrsect);
 }
 
+
 QUICK_SELECT_I *TRP_ROR_UNION::make_quick(PARAM *param, 
                                           bool retrieve_full_rows,
                                           MEM_ROOT *parent_alloc)
@@ -2820,8 +3091,8 @@ QUICK_SELECT_I *TRP_ROR_UNION::make_quick(PARAM *param,
   QUICK_SELECT_I *quick;
   DBUG_ENTER("TRP_ROR_UNION::make_quick");
   /* 
-    It is currently impossible to construct a ROR-union that will 
-    not retrieve full rows, ingore retrieve_full_rows.
+    It is impossible to construct a ROR-union that will not retrieve full 
+    rows, ignore retrieve_full_rows parameter.
   */
   if ((quick_roru= new QUICK_ROR_UNION_SELECT(param->thd, param->table)))
   {
@@ -4248,7 +4519,7 @@ SEL_ARG *rb_delete_fixup(SEL_ARG *root,SEL_ARG *key,SEL_ARG *par)
 }
 
 
-	/* Test that the proporties for a red-black tree holds */
+	/* Test that the properties for a red-black tree hold */
 
 #ifdef EXTRA_DEBUG
 int test_rb_tree(SEL_ARG *element,SEL_ARG *parent)
@@ -4336,13 +4607,26 @@ void SEL_ARG::test_use_count(SEL_ARG *root)
 #endif
 
 
+/*
+  Calculate estimate of number records that will be retrieved by a range
+  scan on given index using given SEL_ARG intervals tree.
+  SYNOPSIS
+    check_quick_select
+      param  Parameter from test_quick_select
+      idx    Number of index to use in PARAM::key SEL_TREE::key
+      tree   Transformed selection condition, tree->key[idx] holds intervals
+             tree to be used for scanning.
+  NOTES
+    param->is_ror_scan is set to reflect if the key scan is a ROR (see 
+    is_key_scan_ror function for more info)
+    param->table->quick_*, param->range_count (and maybe others) are 
+    updated with data of given key scan, see check_quick_keys for details.
+    
+  RETURN 
+    Estimate # of records to be retrieved.
+    HA_POS_ERROR if estimate calculation failed due to table handler problems. 
 
-/*****************************************************************************
-** Check how many records we will find by using the found tree
-** NOTE
-**  param->table->quick_* and param->range_count (and maybe others) are 
-**  updated with data of given key scan.
-*****************************************************************************/
+*/
 
 static ha_rows
 check_quick_select(PARAM *param,uint idx,SEL_ARG *tree)
@@ -4396,30 +4680,40 @@ check_quick_select(PARAM *param,uint idx,SEL_ARG *tree)
 }
 
 
-/* 
+/*
+  Recursively calculate estimate of # rows that will be retrieved by 
+  key scan on key idx. 
   SYNOPSIS
     check_quick_keys()
-      param
-      idx          key to use, its number in list of used keys (that is, 
-                   param->real_keynr[idx] holds the key number in table)
-      
-      key_tree     SEL_ARG tree which cost is calculated.
-      min_key      buffer with min key value tuple
-      min_key_flag 
-      max_key      buffer with max key value tuple
+      param         Parameter from test_quick select function.
+      idx           Number of key to use in PARAM::keys in list of used keys 
+                    (param->real_keynr[idx] holds the key number in table)
+      key_tree      SEL_ARG tree being examined.
+      min_key       Buffer with partial min key value tuple
+      min_key_flag  
+      max_key       Buffer with partial max key value tuple
       max_key_flag
 
-  NOTE
-    The function does the recursive descent on the tree via left, right, and
-    next_key_part edges. The #rows estimates are calculated at the leaf nodes.
+  NOTES
+    The function does the recursive descent on the tree via SEL_ARG::left, 
+    SEL_ARG::right, and SEL_ARG::next_key_part edges. The #rows estimates 
+    are calculated using records_in_range calls at the leaf nodes and then
+    summed.
 
-    param->min_key and param->max_key are used to hold key segment values.
+    param->min_key and param->max_key are used to hold prefixes of key value
+    tuples.
 
     The side effects are:
+    
     param->max_key_part is updated to hold the maximum number of key parts used
       in scan minus 1.
-    param->range_count is updated.
-    param->is_ror_scan is updated.
+   
+    param->range_count is incremented if the function finds a range that 
+      wasn't counted by the caller.
+    
+    param->is_ror_scan is cleared if the function detects that the key scan is
+      not a Rowid-Ordered Retrieval scan ( see comments for is_key_scan_ror
+      function for description of which key scans are ROR scans)
 */
 
 static ha_rows
@@ -4432,6 +4726,12 @@ check_quick_keys(PARAM *param,uint idx,SEL_ARG *key_tree,
   param->max_key_part=max(param->max_key_part,key_tree->part);
   if (key_tree->left != &null_element)
   {
+    /*
+      There are at least two intervals for current key part, i.e. condition
+      was converted to something like
+        (keyXpartY less/equals c1) OR (keyXpartY more/equals c2).
+      This is not a ROR scan if the key is not Clustered Primary Key.
+    */
     param->is_ror_scan= false;
     records=check_quick_keys(param,idx,key_tree->left,min_key,min_key_flag,
 			     max_key,max_key_flag);
@@ -4441,12 +4741,25 @@ check_quick_keys(PARAM *param,uint idx,SEL_ARG *key_tree,
 
   uint tmp_min_flag,tmp_max_flag,keynr;
   char *tmp_min_key=min_key,*tmp_max_key=max_key;
-
+  
   key_tree->store(param->key[idx][key_tree->part].part_length,
 		  &tmp_min_key,min_key_flag,&tmp_max_key,max_key_flag);
   uint min_key_length= (uint) (tmp_min_key- param->min_key);
   uint max_key_length= (uint) (tmp_max_key- param->max_key);
 
+  if (param->is_ror_scan)
+  {
+    /* 
+      If the index doesn't cover entire key, mark the scan as non-ROR scan.
+      (TODO sergeyp: investigate if we could do better here)
+    */
+    uint16 fieldnr= param->table->key_info[param->real_keynr[idx]].
+                    key_part[key_tree->part].fieldnr - 1;
+    if (param->table->field[fieldnr]->key_length() != 
+        param->key[idx][key_tree->part].part_length)
+      param->is_ror_scan= false;
+  }
+
   if (key_tree->next_key_part &&
       key_tree->next_key_part->part == key_tree->part+1 &&
       key_tree->next_key_part->type == SEL_ARG::KEY_RANGE)
@@ -4461,7 +4774,10 @@ check_quick_keys(PARAM *param,uint idx,SEL_ARG *key_tree,
       goto end;					// Ugly, but efficient
     }
     else
+    {
+      /* The interval for current key part is not c1 <= keyXpartY <= c1 */
       param->is_ror_scan= false;
+    }
 
     tmp_min_flag=key_tree->min_flag;
     tmp_max_flag=key_tree->max_flag;
@@ -4493,6 +4809,15 @@ check_quick_keys(PARAM *param,uint idx,SEL_ARG *key_tree,
   {
     if (param->is_ror_scan)
     {
+      /*
+        If we get here, the condition on the key was converted to form
+        "(keyXpart1 = c1) AND ... AND (keyXpart{key_tree->part - 1} = cN) AND
+          somecond(keyXpart{key_tree->part})"
+        Check if
+          somecond is "keyXpart{key_tree->part} = const" and
+          uncovered "tail" of KeyX parts is either empty or is identical to
+          first members of clustered primary key.
+      */
       if (!(min_key_length == max_key_length &&
             !memcmp(min_key,max_key, (uint) (tmp_max_key - max_key)) &&
             !key_tree->min_flag && !key_tree->max_flag && 
@@ -4530,6 +4855,12 @@ check_quick_keys(PARAM *param,uint idx,SEL_ARG *key_tree,
   records+=tmp;
   if (key_tree->right != &null_element)
   {
+    /*
+      There are at least two intervals for current key part, i.e. condition
+      was converted to something like
+        (keyXpartY less/equals c1) OR (keyXpartY more/equals c2).
+      This is not a ROR scan if the key is not Clustered Primary Key.
+    */
     param->is_ror_scan= false;
     tmp=check_quick_keys(param,idx,key_tree->right,min_key,min_key_flag,
 			 max_key,max_key_flag);
@@ -4540,11 +4871,46 @@ check_quick_keys(PARAM *param,uint idx,SEL_ARG *key_tree,
   return records;
 }
 
+
 /*
-  Check if key scan on key keynr with first nparts key parts fixed is a
-  ROR scan. This function doesn't handle clustered PK scans or HASH index 
-  scans.
+  Check if key scan on given index with equality conditions on first n key 
+  parts is a ROR scan.
+
+  SYNOPSIS
+    is_key_scan_ror()
+      param  Parameter from test_quick_select 
+      keynr  Number of key in the table. The key must not be a clustered
+             primary key.
+      nparts Number of first key parts for which equality conditions
+             are present.
+    
+  NOTES
+    ROR (Rowid Ordered Retrieval) key scan is a key scan that produces
+    ordered sequence of rowids (ha_xxx::cmp_ref is the comparison function)
+    
+    An index scan is a ROR scan if it is done using a condition in form
+
+        "key1_1=c_1 AND ... AND key1_n=c_n"  (1)
+    
+    where the index is defined on (key1_1, ..., key1_N [,a_1, ..., a_n])
+
+    and the table has a clustered Primary Key 
+
+    PRIMARY KEY(a_1, ..., a_n, b1, ..., b_k) with first key parts being 
+    identical to uncovered parts ot the key being scanned (2)
+    
+    Scans on HASH indexes are not ROR scans, 
+    any range scan on clustered primary key is ROR scan  (3)
+
+    Check (1) is made in check_quick_keys()
+    Check (3) is made check_quick_select()
+    Check (2) is made by this function.
+
+  RETURN 
+    true  If the scan is ROR-scan
+    false otherwise
 */
+
 static bool is_key_scan_ror(PARAM *param, uint keynr, uint8 nparts)
 {
   KEY *table_key= param->table->key_info + keynr;
@@ -4564,7 +4930,8 @@ static bool is_key_scan_ror(PARAM *param, uint keynr, uint8 nparts)
   for(;(key_part!=key_part_end) && (pk_part != pk_part_end); 
       ++key_part, ++pk_part)
   {
-    if (key_part->field != pk_part->field)
+    if ((key_part->field != pk_part->field) || 
+        (key_part->length != pk_part->length))
       return false; 
   }
   return (key_part == key_part_end);
@@ -4573,17 +4940,26 @@ static bool is_key_scan_ror(PARAM *param, uint keynr, uint8 nparts)
 
 /*
   Create a QUICK_RANGE_SELECT from given key and SEL_ARG tree for that key.
-  This uses it's own malloc tree.
+  
   SYNOPSIS
     get_quick_select()
       param 
-      idx          index of used key in param->key.
+      idx          Index of used key in param->key.
       key_tree     SEL_ARG tree for the used key 
-      parent_alloc if not NULL, use it to allocate memory for 
+      parent_alloc If not NULL, use it to allocate memory for 
                    quick select data. Otherwise use quick->alloc.
+  
+  RETURN 
+    NULL on error
+    otherwise created quick select
+
+  NOTES
+    The caller must call QUICK_SELCT::init for returned quick select
 
-  The caller should call QUICK_SELCT::init for returned quick select
+    CAUTION! This function may change THR_MALLOC to a MEM_ROOT which will be 
+    deallocated when the returned quick select is deleted.
 */
+
 QUICK_RANGE_SELECT *
 get_quick_select(PARAM *param,uint idx,SEL_ARG *key_tree,
                  MEM_ROOT *parent_alloc)
@@ -5008,15 +5384,23 @@ int QUICK_INDEX_MERGE_SELECT::get_next()
 
 
 /*
+  Retrieve next record.  
+  SYNOPSIS
+     QUICK_ROR_INTERSECT_SELECT::get_next()  
+  
   NOTES
-    Invariant on enter/exit: all intersected selects have retrieved index 
-    records with rowid <= some_rowid_val and no intersected select has 
+    Invariant on enter/exit: all intersected selects have retrieved all index
+    records with rowid <= some_rowid_val and no intersected select has
     retrieved any index records with rowid > some_rowid_val.
     We start fresh and loop until we have retrieved the same rowid in each of
     the key scans or we got an error.
 
     If a Clustered PK scan is present, it is used only to check if row 
-    satisfies its conditions (and never used for row retrieval).
+    satisfies its condition (and never used for row retrieval).
+
+  RETURN
+   0     - Ok
+   other - Error code if any error occurred.
 */
 
 int QUICK_ROR_INTERSECT_SELECT::get_next()
@@ -5090,10 +5474,18 @@ int QUICK_ROR_INTERSECT_SELECT::get_next()
 
 
 /* 
+  Retrieve next record.  
+  SYNOPSIS
+    QUICK_ROR_UNION_SELECT::get_next()
+  
   NOTES
     Enter/exit invariant: 
     For each quick select in the queue a {key,rowid} tuple has been 
     retrieved but the corresponding row hasn't been passed to output.
+
+  RETURN 
+   0     - Ok
+   other - Error code if any error occurred.
 */
 
 int QUICK_ROR_UNION_SELECT::get_next()
@@ -5107,7 +5499,7 @@ int QUICK_ROR_UNION_SELECT::get_next()
   {
     if (!queue.elements)
       DBUG_RETURN(HA_ERR_END_OF_FILE);
-    /* Ok, we have a queue with > 1 scans */
+    /* Ok, we have a queue with >= 1 scans */
 
     quick= (QUICK_SELECT_I*)queue_top(&queue);
     memcpy(cur_rowid, quick->last_rowid, rowid_length);
@@ -5548,8 +5940,78 @@ bool QUICK_SELECT_DESC::test_if_null_range(QUICK_RANGE *range_arg,
 #endif
 
 
-void QUICK_RANGE_SELECT::fill_keys_and_lengths(String *key_names, 
-                                               String *used_lengths)
+void QUICK_RANGE_SELECT::add_info_string(String *str)
+{
+  KEY *key_info= head->key_info + index;
+  str->append(key_info->name);
+}
+
+void QUICK_INDEX_MERGE_SELECT::add_info_string(String *str)
+{
+  QUICK_RANGE_SELECT *quick;
+  bool first= true;
+  List_iterator_fast<QUICK_RANGE_SELECT> it(quick_selects);
+  str->append("sort_union(");
+  while ((quick= it++))
+  {
+    if (!first)
+      str->append(',');
+    else
+      first= false;
+    quick->add_info_string(str);
+  }
+  if (pk_quick_select)
+  {
+    str->append(',');
+    pk_quick_select->add_info_string(str);
+  }
+  str->append(')');
+}
+
+void QUICK_ROR_INTERSECT_SELECT::add_info_string(String *str)
+{
+  bool first= true; 
+  QUICK_RANGE_SELECT *quick;
+  List_iterator_fast<QUICK_RANGE_SELECT> it(quick_selects);
+  str->append("intersect(");
+  while ((quick= it++))
+  {
+    KEY *key_info= head->key_info + quick->index;
+    if (!first)
+      str->append(',');
+    else 
+      first= false;
+    str->append(key_info->name);
+  }
+  if (cpk_quick)
+  {
+    KEY *key_info= head->key_info + cpk_quick->index;
+    str->append(',');
+    str->append(key_info->name);
+  }
+  str->append(')');
+}
+
+void QUICK_ROR_UNION_SELECT::add_info_string(String *str)
+{
+  bool first= true; 
+  QUICK_SELECT_I *quick;
+  List_iterator_fast<QUICK_SELECT_I> it(quick_selects);
+  str->append("union(");
+  while ((quick= it++))
+  {
+    if (!first)
+      str->append(',');
+    else
+      first= false;
+    quick->add_info_string(str);
+  }
+  str->append(')');
+}
+
+
+void QUICK_RANGE_SELECT::add_keys_and_lengths(String *key_names, 
+                                              String *used_lengths)
 {
   char buf[64];
   uint length;
@@ -5559,22 +6021,27 @@ void QUICK_RANGE_SELECT::fill_keys_and_lengths(String *key_names,
   used_lengths->append(buf, length);
 }
 
-void QUICK_INDEX_MERGE_SELECT::fill_keys_and_lengths(String *key_names,
-                                                     String *used_lengths)
+void QUICK_INDEX_MERGE_SELECT::add_keys_and_lengths(String *key_names,
+                                                    String *used_lengths)
 {
   char buf[64];
   uint length;
+  bool first= true;
   QUICK_RANGE_SELECT *quick;
+  
   List_iterator_fast<QUICK_RANGE_SELECT> it(quick_selects);
   while ((quick= it++))
   {
-    KEY *key_info= head->key_info + quick->index;
-    if (key_names->length())
+    if (first)
+      first= false;
+    else
+    {
       key_names->append(',');
-    key_names->append(key_info->name);
-
-    if (used_lengths->length())
       used_lengths->append(',');
+    }
+      
+    KEY *key_info= head->key_info + quick->index;
+    key_names->append(key_info->name);
     length= longlong2str(quick->max_used_key_length, buf, 10) - buf;
     used_lengths->append(buf, length);
   }
@@ -5589,8 +6056,8 @@ void QUICK_INDEX_MERGE_SELECT::fill_keys_and_lengths(String *key_names,
   }
 }
 
-void QUICK_ROR_INTERSECT_SELECT::fill_keys_and_lengths(String *key_names,
-                                                       String *used_lengths)
+void QUICK_ROR_INTERSECT_SELECT::add_keys_and_lengths(String *key_names,
+                                                      String *used_lengths)
 {
   char buf[64];
   uint length;
@@ -5600,17 +6067,18 @@ void QUICK_ROR_INTERSECT_SELECT::fill_keys_and_lengths(String *key_names,
   while ((quick= it++))
   {
     KEY *key_info= head->key_info + quick->index;
-    if (!first)
-      key_names->append(',');
-    key_names->append(key_info->name);
-
     if (first)
       first= false;
     else
+    {
+      key_names->append(',');
       used_lengths->append(',');
+    }
+    key_names->append(key_info->name);
     length= longlong2str(quick->max_used_key_length, buf, 10) - buf;
     used_lengths->append(buf, length);
   }
+  
   if (cpk_quick)
   {
     KEY *key_info= head->key_info + cpk_quick->index;
@@ -5622,8 +6090,8 @@ void QUICK_ROR_INTERSECT_SELECT::fill_keys_and_lengths(String *key_names,
   }
 }
 
-void QUICK_ROR_UNION_SELECT::fill_keys_and_lengths(String *key_names,
-                                                   String *used_lengths)
+void QUICK_ROR_UNION_SELECT::add_keys_and_lengths(String *key_names,
+                                                  String *used_lengths)
 {
   bool first= true; 
   QUICK_SELECT_I *quick;
@@ -5637,7 +6105,7 @@ void QUICK_ROR_UNION_SELECT::fill_keys_and_lengths(String *key_names,
       used_lengths->append(',');
       key_names->append(',');
     }
-    quick->fill_keys_and_lengths(key_names, used_lengths);
+    quick->add_keys_and_lengths(key_names, used_lengths);
   }
 }
 
@@ -5850,7 +6318,7 @@ void QUICK_ROR_UNION_SELECT::dbug_dump(int indent, bool verbose)
 #endif
 
 /*****************************************************************************
-** Instansiate templates
+** Instantiate templates
 *****************************************************************************/
 
 #ifdef __GNUC__
diff --git a/sql/opt_range.h b/sql/opt_range.h
index 1b522421a01..d992bbb2456 100644
--- a/sql/opt_range.h
+++ b/sql/opt_range.h
@@ -77,27 +77,59 @@ public:
   ha_rows records;  /* estimate of # of records to be retrieved */
   double  read_time; /* time to perform this retrieval          */
   TABLE   *head;
-
   /*
-    index this quick select uses, or MAX_KEY for quick selects 
+    Index this quick select uses, or MAX_KEY for quick selects 
     that use several indexes
   */
   uint index;
 
-  /* applicable iff index!= MAX_KEY */
-  uint max_used_key_length, used_key_parts;
+  /*
+    Total length of first used_key_parts parts of the key.
+    Applicable if index!= MAX_KEY.
+  */
+  uint max_used_key_length;
+
+  /*
+    Max. number of (first) key parts this quick select uses for retrieval.
+    eg. for "(key1p1=c1 AND key1p2=c2) OR key1p1=c2" used_key_parts == 2.
+    Applicable if index!= MAX_KEY.
+  */
+  uint used_key_parts;
 
   QUICK_SELECT_I();
   virtual ~QUICK_SELECT_I(){};
-  /* 
-    Call init() immediately after creation of quick select. if init() call 
-    fails, reset() or get_next() must not be called.
+  
+  /*
+    Do post-constructor initialization.
+    SYNOPSIS
+      init()
+    
+    init() performs initializations that should have been in constructor if 
+    it was possible to return errors from constructors. The join optimizer may 
+    create and then delete quick selects without retrieving any rows so init()
+    must not contain any IO or CPU intensive code.
+
+    If init() call fails the only valid action is to delete this quick select, 
+    reset() and get_next() must not be called.
+    
+    RETURN
+      0      OK
+      other  Error code
   */
   virtual int  init() = 0;
   
   /* 
-    Call reset() before first get_next call. get_next must not be called if 
-    reset() call fails.
+    Initialize quick select for row retrieval.
+    SYNOPSIS
+      reset()
+    
+    reset() should be called when it is certain that row retrieval will be 
+    necessary. This call may do heavyweight initialization like buffering first
+    N records etc. If reset() call fails get_next() must not be called.
+    
+    RETURN
+      0      OK
+      other  Error code
   */
   virtual int  reset(void) = 0;
   virtual int  get_next() = 0;   /* get next record to retrieve */
@@ -117,30 +149,54 @@ public:
   virtual int get_type() = 0;
 
   /*
-    Initialize this quick select as a child of a index union or intersection 
-    scan. This call replaces init() call.
+    Initialize this quick select as a merged scan inside a ROR-union or a ROR-
+    intersection scan. The caller must not additionally call init() if this 
+    function is called.
+    SYNOPSIS
+      init_ror_merged_scan()
+        reuse_handler quick select may use (q: psergey??)
+        (q: is this natural that we do it this way)
+    NOTES 
+      psergey?  
   */
-  virtual int init_ror_child_scan(bool reuse_handler)
+  virtual int init_ror_merged_scan(bool reuse_handler)
   { DBUG_ASSERT(0); return 1; }
   
-  virtual void cleanup_ror_child_scan() { DBUG_ASSERT(0); }
+  /* Save ROWID of last retrieved row in file->ref. (psergey: or table->ref?) */
   virtual void save_last_pos(){};
 
   /* 
-    Fill key_names with list of keys this quick select used;
-    fill used_lenghth with correponding used lengths.
+    Append comma-separated list of keys this quick select uses to key_names;
+    append comma-separated list of corresponding used lengths to used_lengths.
     This is used by select_describe.
   */
-  virtual void fill_keys_and_lengths(String *key_names, 
-                                     String *used_lengths)=0;
-
+  virtual void add_keys_and_lengths(String *key_names, 
+                                    String *used_lengths)=0;
+ 
+  /* 
+    Append text representation of quick select structure (what and how is 
+    merged) to str. The result is added to "Extra" field in EXPLAIN output.
+    This function is implemented only by quick selects that merge other quick
+    selects output and/or can produce output suitable for merging.
+  */
+  virtual void add_info_string(String *str) {};
+  /*
+    Return 1 if any index used by this quick select 
+     a) uses field that is listed in passed field list or 
+     b) is automatically updated (like a timestamp)
+  */
   virtual bool check_if_keys_used(List<Item> *fields);
 
   /*
-    rowid of last row retrieved by this quick select. This is used only 
-    when doing ROR-index_merge selects 
+    rowid of last row retrieved by this quick select. This is used only when 
+    doing ROR-index_merge selects 
   */
   byte    *last_rowid;
+
+  /*
+    Table record buffer used by this quick select. 
+    Currently this is always the same as  head->record[0]. psergey: check that!
+  */
   byte    *record;
 #ifndef DBUG_OFF
   /*
@@ -155,6 +211,10 @@ public:
 struct st_qsel_param;
 class SEL_ARG;
 
+/*
+  Quick select that does a range scan on a single key. The records are 
+  returned in key order.
+*/
 class QUICK_RANGE_SELECT : public QUICK_SELECT_I
 {
 protected:
@@ -163,7 +223,11 @@ public:
   int error;
 protected:
   handler *file;
-  bool free_file; /* if true, this quick select "owns" file and will free it */
+  /*
+    If true, this quick select has its "own" handler object which should be
+    closed no later then this quick select is deleted.
+  */
+  bool free_file;
 
 protected:
   friend
@@ -207,15 +271,16 @@ public:
   int get_next();
   bool reverse_sorted() { return 0; }
   bool unique_key_range();
-  int init_ror_child_scan(bool reuse_handler);
+  int init_ror_merged_scan(bool reuse_handler);
   void save_last_pos()
   {
     file->position(record);
   };
   int get_type() { return QS_TYPE_RANGE; }
-  void fill_keys_and_lengths(String *key_names, String *used_lengths);
+  void add_keys_and_lengths(String *key_names, String *used_lengths);
+  void add_info_string(String *str);
 #ifndef DBUG_OFF
-  virtual void dbug_dump(int indent, bool verbose);
+  void dbug_dump(int indent, bool verbose);
 #endif
 };
 
@@ -291,10 +356,11 @@ public:
   bool reverse_sorted() { return false; }
   bool unique_key_range() { return false; }
   int get_type() { return QS_TYPE_INDEX_MERGE; }
-  void fill_keys_and_lengths(String *key_names, String *used_lengths);
+  void add_keys_and_lengths(String *key_names, String *used_lengths);
+  void add_info_string(String *str);
   bool check_if_keys_used(List<Item> *fields);
 #ifndef DBUG_OFF
-  virtual void dbug_dump(int indent, bool verbose);
+  void dbug_dump(int indent, bool verbose);
 #endif
 
   bool push_quick_back(QUICK_RANGE_SELECT *quick_sel_range);
@@ -317,7 +383,6 @@ public:
 
   THD *thd;
   int prepare_unique();
-  bool reset_called;
   
   /* used to get rows collected in Unique */
   READ_RECORD read_record;
@@ -326,9 +391,22 @@ public:
 
 /*
   Rowid-Ordered Retrieval (ROR) index intersection quick select.
-  This quick select produces an intersection of records returned by several
-  QUICK_RANGE_SELECTs that return data ordered by rowid.
+  This quick select produces intersection of row sequences returned 
+  by several QUICK_RANGE_SELECTs it "merges".
+  
+  All merged QUICK_RANGE_SELECTs must return rowids in rowid order. 
+  QUICK_ROR_INTERSECT_SELECT will return rows in rowid order, too.
+
+  All merged quick selects retrieve {rowid, covered_fields} tuples (not full 
+  table records).
+  QUICK_ROR_INTERSECT_SELECT retrieves full records if it is not being used 
+  by QUICK_ROR_INTERSECT_SELECT and all merged quick selects together don't 
+  cover needed all fields.
+  
+  If one of the merged quick selects is a Clustered PK range scan, it is
+  used only to filter rowid sequence produced by other merged quick selects.
 */
+
 class QUICK_ROR_INTERSECT_SELECT : public QUICK_SELECT_I 
 {
 public:
@@ -343,28 +421,46 @@ public:
   bool reverse_sorted() { return false; }
   bool unique_key_range() { return false; }
   int get_type() { return QS_TYPE_ROR_INTERSECT; }
-  void fill_keys_and_lengths(String *key_names, String *used_lengths);
+  void add_keys_and_lengths(String *key_names, String *used_lengths);
+  void add_info_string(String *str);
   bool check_if_keys_used(List<Item> *fields);
 #ifndef DBUG_OFF
-  virtual void dbug_dump(int indent, bool verbose);
+  void dbug_dump(int indent, bool verbose);
 #endif
-  int init_ror_child_scan(bool reuse_handler);
+  int init_ror_merged_scan(bool reuse_handler);
   bool push_quick_back(QUICK_RANGE_SELECT *quick_sel_range);
 
-  /* range quick selects this intersection consists of */
+  /* 
+    Range quick selects this intersection consists of, not including
+    cpk_quick.
+  */
   List<QUICK_RANGE_SELECT> quick_selects;
   
+  /* 
+    Merged quick select that uses Clustered PK, if there is one. This quick 
+    select is not used for row retrieval, it is used for row retrieval.
+  */
   QUICK_RANGE_SELECT *cpk_quick;
-  MEM_ROOT alloc;
-  THD *thd;
-  bool reset_called;    
-  bool need_to_fetch_row;
+
+  MEM_ROOT alloc; /* Memory pool for this and merged quick selects data. */
+  THD *thd;       /* current thread */
+  bool need_to_fetch_row; /* if true, do retrieve full table records. */
 };
 
+
 /*
   Rowid-Ordered Retrieval index union select.
+  This quick select produces union of row sequences returned by several
+  quick select it "merges".
 
+  All merged quick selects must return rowids in rowid order. 
+  QUICK_ROR_UNION_SELECT will return rows in rowid order, too.
+
+  All merged quick selects are set not to retrieve full table records.
+  ROR-union quick select always retrieves full records.
+ 
 */
+
 class QUICK_ROR_UNION_SELECT : public QUICK_SELECT_I 
 {
 public:
@@ -377,33 +473,30 @@ public:
   bool reverse_sorted() { return false; }
   bool unique_key_range() { return false; }
   int get_type() { return QS_TYPE_ROR_UNION; }
-  void fill_keys_and_lengths(String *key_names, String *used_lengths);
+  void add_keys_and_lengths(String *key_names, String *used_lengths);
+  void add_info_string(String *str);
   bool check_if_keys_used(List<Item> *fields);
 #ifndef DBUG_OFF
-  virtual void dbug_dump(int indent, bool verbose);
+  void dbug_dump(int indent, bool verbose);
 #endif
 
   bool push_quick_back(QUICK_SELECT_I *quick_sel_range);
 
-  /* range quick selects this index_merge read consists of */
-  List<QUICK_SELECT_I> quick_selects;
-  
-  QUEUE queue;
-  MEM_ROOT alloc;
-
-  THD *thd;
-  byte *cur_rowid;
-  byte *prev_rowid;
-  uint rowid_length;
-  bool reset_called;
-  bool have_prev_rowid;
+  List<QUICK_SELECT_I> quick_selects; /* Merged quick selects */
   
+  QUEUE queue;    /* Priority queue for merge operation */
+  MEM_ROOT alloc; /* Memory pool for this and merged quick selects data. */
+
+  THD *thd;             /* current thread */
+  byte *cur_rowid;      /* buffer used in get_next() */
+  byte *prev_rowid;     /* rowid of last row returned by get_next() */
+  bool have_prev_rowid; /* true if prev_rowid has valid data */
+  uint rowid_length;    /* table rowid length */ 
 private:
   static int queue_cmp(void *arg, byte *val1, byte *val2);
 };
 
 
-
 class QUICK_SELECT_DESC: public QUICK_RANGE_SELECT
 {
 public:
diff --git a/sql/sql_select.cc b/sql/sql_select.cc
index 037aedd5f61..27015dbd04d 100644
--- a/sql/sql_select.cc
+++ b/sql/sql_select.cc
@@ -10077,7 +10077,7 @@ static void select_describe(JOIN *join, bool need_tmp_table, bool need_order,
   select_result *result=join->result;
   Item *item_null= new Item_null();
   CHARSET_INFO *cs= &my_charset_latin1;
-  int quick_type= -1;
+  int quick_type;
   DBUG_ENTER("select_describe");
   DBUG_PRINT("info", ("Select 0x%lx, type %s, message %s",
 		      (ulong)join->select_lex, join->select_lex->type,
@@ -10104,17 +10104,20 @@ static void select_describe(JOIN *join, bool need_tmp_table, bool need_order,
     {
       JOIN_TAB *tab=join->join_tab+i;
       TABLE *table=tab->table;
-      char buff[512],*buff_ptr=buff;
+      char buff[512]; 
       char buff1[512], buff2[512], buff3[512];
       char keylen_str_buf[64];
       char derived_name[64];
+      String extra(buff, sizeof(buff),cs);
       String tmp1(buff1,sizeof(buff1),cs);
       String tmp2(buff2,sizeof(buff2),cs);
       String tmp3(buff3,sizeof(buff3),cs);
+      extra.length(0);
       tmp1.length(0);
       tmp2.length(0);
       tmp3.length(0);
 
+      quick_type= -1;
       item_list.empty();
       item_list.push_back(new Item_int((int32)
 				       join->select_lex->select_number));
@@ -10165,7 +10168,7 @@ static void select_describe(JOIN *join, bool need_tmp_table, bool need_order,
       else
 	item_list.push_back(item_null);
 
-      /* Build key,key_len, and ref values and add them to item_list */
+      /* Build "key", "key_len", and "ref" values and add them to item_list */
       if (tab->ref.key_parts)
       {
 	KEY *key_info=table->key_info+ tab->ref.key;
@@ -10200,7 +10203,7 @@ static void select_describe(JOIN *join, bool need_tmp_table, bool need_order,
       }
       else if (tab->select && tab->select->quick)
       {
-        tab->select->quick->fill_keys_and_lengths(&tmp2, &tmp3);
+        tab->select->quick->add_keys_and_lengths(&tmp2, &tmp3);
 	item_list.push_back(new Item_string(tmp2.ptr(),tmp2.length(),cs));
 	item_list.push_back(new Item_string(tmp3.ptr(),tmp3.length(),cs));
 	item_list.push_back(item_null);
@@ -10211,9 +10214,11 @@ static void select_describe(JOIN *join, bool need_tmp_table, bool need_order,
 	item_list.push_back(item_null);
 	item_list.push_back(item_null);
       }
+      /* Add "rows" field to item_list. */
       item_list.push_back(new Item_int((longlong) (ulonglong)
 				       join->best_positions[i]. records_read,
 				       21));
+      /* Build "Extra" field and add it to item_list. */
       my_bool key_read=table->key_read;
       if (tab->type == JT_NEXT && table->used_keys.is_set(tab->index))
 	key_read=1;
@@ -10225,38 +10230,51 @@ static void select_describe(JOIN *join, bool need_tmp_table, bool need_order,
 	item_list.push_back(new Item_string(tab->info,strlen(tab->info),cs));
       else
       {
+        if (quick_type == QUICK_SELECT_I::QS_TYPE_ROR_UNION || 
+            quick_type == QUICK_SELECT_I::QS_TYPE_ROR_INTERSECT ||
+            quick_type == QUICK_SELECT_I::QS_TYPE_INDEX_MERGE)
+        {
+          extra.append("; Using ");
+          tab->select->quick->add_info_string(&extra);
+        }
 	if (tab->select)
 	{
 	  if (tab->use_quick == 2)
 	  {
             char buf[MAX_KEY/8+1];
-	    sprintf(buff_ptr,"; Range checked for each record (index map: 0x%s)",
-                tab->keys.print(buf));
-	    buff_ptr=strend(buff_ptr);
+            extra.append("; Range checked for each record (index map: 0x");
+            extra.append(tab->keys.print(buf));
+            extra.append(')');
 	  }
 	  else
-	    buff_ptr=strmov(buff_ptr,"; Using where");
+            extra.append("; Using where");
 	}
 	if (key_read)
-	  buff_ptr= strmov(buff_ptr,"; Using index");
+	  extra.append("; Using index");
 	if (table->reginfo.not_exists_optimize)
-	  buff_ptr= strmov(buff_ptr,"; Not exists");
+	  extra.append("; Not exists");
 	if (need_tmp_table)
 	{
 	  need_tmp_table=0;
-	  buff_ptr= strmov(buff_ptr,"; Using temporary");
+	  extra.append("; Using temporary");
 	}
 	if (need_order)
 	{
 	  need_order=0;
-	  buff_ptr= strmov(buff_ptr,"; Using filesort");
+	  extra.append("; Using filesort");
 	}
 	if (distinct & test_all_bits(used_tables,thd->used_tables))
-	  buff_ptr= strmov(buff_ptr,"; Distinct");
-	if (buff_ptr == buff)
-	  buff_ptr+= 2;				// Skip inital "; "
-	item_list.push_back(new Item_string(buff+2,(uint) (buff_ptr - buff)-2,
-					    cs));
+	  extra.append("; Distinct");
+        
+        /* Skip initial "; "*/
+        const char *str= extra.ptr();
+        uint32 len= extra.length();
+        if (len)
+        {
+          str += 2;
+          len -= 2;
+        }
+	item_list.push_back(new Item_string(str, len, cs));
       }
       // For next iteration
       used_tables|=table->map;
-- 
cgit v1.2.1


From 2164db3c8c4c086dcc16d73baf7557a344d82129 Mon Sep 17 00:00:00 2001
From: unknown <sergefp@mysql.com>
Date: Sat, 29 May 2004 17:50:05 +0400
Subject:  * Undo of range optimizer fix from previous changeset  * Fixed test
 results.

mysql-test/r/index_merge_ror.result:
  Typo fix
mysql-test/r/rowid_order_bdb.result:
  new index_merge EXPLAIN output format changes
mysql-test/r/rowid_order_innodb.result:
  new index_merge EXPLAIN output format changes
sql/opt_range.cc:
  Undo of previous fix:
  If cost(full_scan_on_covering_index) < cost(best_range_scan) <  cost(full_table_scan)
  use full_scan_on_covering_index, not best_range_scan.
  The fix affects read plan choice for more queries then initially anticipated, so I'm reverting it for now, will get back to this later
---
 sql/opt_range.cc | 24 +++++++++++++-----------
 1 file changed, 13 insertions(+), 11 deletions(-)

(limited to 'sql')

diff --git a/sql/opt_range.cc b/sql/opt_range.cc
index ee328dadad6..59ded7354f1 100644
--- a/sql/opt_range.cc
+++ b/sql/opt_range.cc
@@ -1610,17 +1610,6 @@ int SQL_SELECT::test_quick_select(THD *thd, key_map keys_to_use,
     }
     param.key_parts_end=key_parts;
 
-    /* Calculate cost of full index read for the shortest covering index */
-    if (!head->used_keys.is_clear_all())
-    {           
-      int key_for_use= find_shortest_key(head, &head->used_keys);
-      double key_read_time= get_index_only_read_time(&param, records, 
-                                                     key_for_use);      
-      DBUG_PRINT("info",  ("'all'+'using index' scan will be using key %d, "
-                           "read time %g", key_for_use, key_read_time));
-      if (key_read_time < read_time)
-        read_time= key_read_time;
-    }
           
     if ((tree=get_mm_tree(&param,cond)))
     {
@@ -1683,6 +1672,19 @@ int SQL_SELECT::test_quick_select(THD *thd, key_map keys_to_use,
           SEL_IMERGE *imerge;
           TABLE_READ_PLAN *best_conj_trp= NULL, *new_conj_trp;
           LINT_INIT(new_conj_trp); /* no empty index_merge lists possible */
+
+          /* Calculate cost of full index read for the shortest covering index */
+          if (!head->used_keys.is_clear_all())
+          {           
+            int key_for_use= find_shortest_key(head, &head->used_keys);
+            double key_read_time= get_index_only_read_time(&param, records, 
+                                                           key_for_use);      
+            DBUG_PRINT("info",  ("'all'+'using index' scan will be using key %d, "
+                                 "read time %g", key_for_use, key_read_time));
+            if (key_read_time < read_time)
+              read_time= key_read_time;
+          }
+          
           DBUG_PRINT("info",("No range reads possible,"
                              " trying to construct index_merge"));
           List_iterator_fast<SEL_IMERGE> it(tree->merges);
-- 
cgit v1.2.1


From 40e65508792422e896dad764f3901c7e5971a132 Mon Sep 17 00:00:00 2001
From: unknown <sergefp@mysql.com>
Date: Tue, 1 Jun 2004 15:31:58 +0400
Subject: More code cleanup, debug prints removed Fixed an incorrect optimizer
 choice for ror-intersect(key, clustered_primary) Typo bug fixed in multitable
 update

sql/opt_range.cc:
  More code cleanup, debug prints removed
  Fixed an incorrect optimizer choice for ror-intersect(key, clustered_primary)
sql/opt_range.h:
  More code cleanup, debug prints removed
  Fixed an incorrect optimizer choice for ror-intersect(key, clustered_primary)
sql/sql_update.cc:
  Typo bug fixed
---
 sql/opt_range.cc  | 42 +++++++++++++++++++++---------------------
 sql/opt_range.h   | 14 ++++++++------
 sql/sql_update.cc |  3 ++-
 3 files changed, 31 insertions(+), 28 deletions(-)

(limited to 'sql')

diff --git a/sql/opt_range.cc b/sql/opt_range.cc
index e86a389ccb9..07cc97ad32d 100644
--- a/sql/opt_range.cc
+++ b/sql/opt_range.cc
@@ -377,7 +377,6 @@ static TRP_RANGE *get_key_scans_params(PARAM *param, SEL_TREE *tree,
                                        double read_time);
 static
 TRP_ROR_INTERSECT *get_best_ror_intersect(const PARAM *param, SEL_TREE *tree,
-                                          bool force_index_only, 
                                           double read_time,
                                           bool *are_all_covering);
 static
@@ -1517,7 +1516,7 @@ int SQL_SELECT::test_quick_select(THD *thd, key_map keys_to_use,
   uint idx;
   double scan_time;
   DBUG_ENTER("test_quick_select");
-  printf("\nQUERY: %s\n", thd->query);
+  //printf("\nQUERY: %s\n", thd->query);
   DBUG_PRINT("enter",("keys_to_use: %lu  prev_tables: %lu  const_tables: %lu",
 		      keys_to_use.to_ulonglong(), (ulong) prev_tables,
 		      (ulong) const_tables));
@@ -1651,8 +1650,7 @@ int SQL_SELECT::test_quick_select(THD *thd, key_map keys_to_use,
               Get best non-covering ROR-intersection plan and prepare data for 
               building covering ROR-intersection.
             */
-            if ((new_trp= get_best_ror_intersect(&param, tree, false, 
-                                                 best_read_time, 
+            if ((new_trp= get_best_ror_intersect(&param, tree, best_read_time,
                                                  &can_build_covering)))
             {
               best_trp= new_trp;
@@ -1750,7 +1748,7 @@ double get_sweep_read_cost(const PARAM *param, ha_rows records)
   else
   {    
     double n_blocks=
-      ceil((double)(longlong)param->table->file->data_file_length / IO_SIZE);
+      ceil((double)((longlong)param->table->file->data_file_length / IO_SIZE));
     double busy_blocks=
       n_blocks * (1.0 - pow(1.0 - 1.0/n_blocks, rows2double(records)));
     if (busy_blocks < 1.0)
@@ -2026,7 +2024,7 @@ skip_to_ror_scan:
       cost= read_time;
 
     TABLE_READ_PLAN *prev_plan= *cur_child;
-    if (!(*cur_roru_plan= get_best_ror_intersect(param, *ptree, false, cost,
+    if (!(*cur_roru_plan= get_best_ror_intersect(param, *ptree, cost, 
                                                  &dummy)))
     {
       if (prev_plan->is_ror)
@@ -2476,7 +2474,6 @@ bool ror_intersect_add(const PARAM *param, ROR_INTERSECT_INFO *info,
       {
         /* uncovered -> covered */
         double tmp= rows2double(records)/rows2double(prev_records);
-        printf("  Selectivity multiplier: %g\n", tmp);
         DBUG_PRINT("info", ("Selectivity multiplier: %g", tmp));
         selectivity_mult *= tmp;
         prev_records= HA_POS_ERROR;
@@ -2487,7 +2484,6 @@ bool ror_intersect_add(const PARAM *param, ROR_INTERSECT_INFO *info,
         prev_records= records; 
       }
     }
-    else printf("no change, skipping\n");
     prev_covered= cur_covered;
   }
   if (!prev_covered)
@@ -2526,8 +2522,6 @@ bool ror_intersect_add(const PARAM *param, ROR_INTERSECT_INFO *info,
     info->is_covering= true;
   }
   
-  printf("  records: %g\n", rows2double(param->table->file->records) *
-        info->records_fract);
   info->total_cost= info->index_scan_costs;
   if (!info->is_covering)
   {
@@ -2552,7 +2546,6 @@ bool ror_intersect_add(const PARAM *param, ROR_INTERSECT_INFO *info,
       param            Parameter from test_quick_select function.
       tree             Transformed restriction condition to be used to look
                        for ROR scans.
-      force_index_only If true, don't calculate costs of full rows retrieval.
       read_time        Do not return read plans with cost > read_time.
       are_all_covering [out] set to true if union of all scans covers all 
                        fields needed by the query (and it is possible to build
@@ -2606,7 +2599,6 @@ bool ror_intersect_add(const PARAM *param, ROR_INTERSECT_INFO *info,
 
 static
 TRP_ROR_INTERSECT *get_best_ror_intersect(const PARAM *param, SEL_TREE *tree,
-                                          bool force_index_only, 
                                           double read_time,
                                           bool *are_all_covering)
 {
@@ -2676,7 +2668,7 @@ TRP_ROR_INTERSECT *get_best_ror_intersect(const PARAM *param, SEL_TREE *tree,
     return NULL;
   
   /* [intersect_scans, intersect_scans_best) will hold the best combination */
-  ROR_SCAN_INFO **intersect_scans_best= NULL; 
+  ROR_SCAN_INFO **intersect_scans_best; 
   ha_rows       best_rows;
   bool          is_best_covering;
   double        best_index_scan_costs;
@@ -2730,17 +2722,24 @@ TRP_ROR_INTERSECT *get_best_ror_intersect(const PARAM *param, SEL_TREE *tree,
     */
     if (best_num == 0)
     { 
+      cur_ror_scan= tree->ror_scans;
+      intersect_scans_end= intersect_scans;
       ror_intersect_reinit(intersect);
-      if (!ror_intersect_add(param, intersect, cpk_scan))
+      if (!ror_intersect_add(param, intersect, *cur_ror_scan))
         DBUG_RETURN(NULL); /* shouldn't happen actually actually */
       *(intersect_scans_end++)= *cur_ror_scan;
       best_num++;
     }
 
     if (ror_intersect_add(param, intersect, cpk_scan))
+    {
       cpk_scan_used= true;
-    best_rows= (ha_rows)(intersect->records_fract* 
-                         rows2double(param->table->file->records));
+      min_cost= intersect->total_cost;
+      best_rows= (ha_rows)(intersect->records_fract* 
+                           rows2double(param->table->file->records));
+      is_best_covering= intersect->is_covering;
+      best_index_scan_costs= intersect->index_scan_costs;
+    }
   }
 
   /* Ok, return ROR-intersect plan if we have found one */
@@ -2910,7 +2909,7 @@ TRP_ROR_INTERSECT *get_best_covering_ror_intersect(PARAM *param,
   Get best "range" table read plan for given SEL_TREE. 
   Also update PARAM members and store ROR scans info in the SEL_TREE.
   SYNOPSIS
-    get_quick_select_params
+    get_key_scans_params
       param        parameters from test_quick_select
       tree         make range select for this SEL_TREE 
       index_read_must_be_used if true, assume 'index only' option will be set
@@ -4649,7 +4648,9 @@ check_quick_select(PARAM *param,uint idx,SEL_ARG *tree)
   bool    cpk_scan;
   uint key;
   DBUG_ENTER("check_quick_select");
-
+  
+  param->is_ror_scan= false;
+  
   if (!tree)
     DBUG_RETURN(HA_POS_ERROR);			// Can't use it
   param->max_key_part=0;
@@ -4665,7 +4666,6 @@ check_quick_select(PARAM *param,uint idx,SEL_ARG *tree)
   if ((key_alg != HA_KEY_ALG_BTREE) && (key_alg!= HA_KEY_ALG_UNDEF))
   {
     /* Records are not ordered by rowid for other types of indexes. */
-    param->is_ror_scan= false;
     cpk_scan= false;
   }
   else
@@ -4765,12 +4765,12 @@ check_quick_keys(PARAM *param,uint idx,SEL_ARG *key_tree,
   {
     /* 
       If the index doesn't cover entire key, mark the scan as non-ROR scan.
-      (TODO sergeyp: investigate if we could do better here)
+      Actually we're cutting off some ROR scans here.
     */
     uint16 fieldnr= param->table->key_info[param->real_keynr[idx]].
                     key_part[key_tree->part].fieldnr - 1;
     if (param->table->field[fieldnr]->key_length() != 
-        param->key[idx][key_tree->part].part_length)
+        param->key[idx][key_tree->part].length)
       param->is_ror_scan= false;
   }
 
diff --git a/sql/opt_range.h b/sql/opt_range.h
index 22168a63f8e..654a7e3983c 100644
--- a/sql/opt_range.h
+++ b/sql/opt_range.h
@@ -155,15 +155,18 @@ public:
     function is called.
     SYNOPSIS
       init_ror_merged_scan()
-        reuse_handler quick select may use (q: psergey??)
-        (q: is this natural that we do it this way)
-    NOTES 
-      psergey?  
+        reuse_handler If true, the quick select may use table->handler, otherwise
+                      it must create and use a separate handler object.
+    RETURN
+      0     Ok
+      other Error 
   */
   virtual int init_ror_merged_scan(bool reuse_handler)
   { DBUG_ASSERT(0); return 1; }
   
-  /* Save ROWID of last retrieved row in file->ref. (psergey: or table->ref?) */
+  /*
+    Save ROWID of last retrieved row in file->ref. This used in ROR-merging.
+  */
   virtual void save_last_pos(){};
 
   /* 
@@ -196,7 +199,6 @@ public:
 
   /*
     Table record buffer used by this quick select. 
-    Currently this is always the same as  head->record[0]. psergey: check that!
   */
   byte    *record;
 #ifndef DBUG_OFF
diff --git a/sql/sql_update.cc b/sql/sql_update.cc
index b85ddff84f9..0953e63765a 100644
--- a/sql/sql_update.cc
+++ b/sql/sql_update.cc
@@ -159,6 +159,7 @@ int mysql_update(THD *thd,
   
   if (select && select->quick)
   {
+    used_index= select->quick->index;
     used_key_is_modified= (!select->quick->unique_key_range() &&
                           select->quick->check_if_keys_used(&fields));
   }
@@ -173,7 +174,7 @@ int mysql_update(THD *thd,
       matching rows before updating the table!
     */
     table->file->extra(HA_EXTRA_RETRIEVE_ALL_COLS);
-    if (old_used_keys.is_set(used_index))
+    if ( (used_index != MAX_KEY) && old_used_keys.is_set(used_index))
     {
       table->key_read=1;
       table->file->extra(HA_EXTRA_KEYREAD);
-- 
cgit v1.2.1