summaryrefslogtreecommitdiff
path: root/sql
diff options
context:
space:
mode:
authorunknown <sergefp@mysql.com>2004-05-29 20:55:46 +0400
committerunknown <sergefp@mysql.com>2004-05-29 20:55:46 +0400
commitb7130450950898892f14f2a1fef462ecac49e92a (patch)
tree0107d5b2c95123b7a602a9f396d6aca05ce21a54 /sql
parentb0921b1f5a1f610b04641a83d0d7b21e3fe46d84 (diff)
parent2164db3c8c4c086dcc16d73baf7557a344d82129 (diff)
downloadmariadb-git-b7130450950898892f14f2a1fef462ecac49e92a.tar.gz
Manual merge
include/my_base.h: Auto merged innobase/include/row0mysql.h: Auto merged innobase/row/row0sel.c: Auto merged mysql-test/r/index_merge.result: Auto merged mysql-test/r/index_merge_innodb.result: Auto merged sql/ha_berkeley.cc: Auto merged sql/ha_berkeley.h: Auto merged sql/ha_heap.h: Auto merged sql/ha_innodb.cc: Auto merged sql/ha_innodb.h: Auto merged sql/handler.h: Auto merged sql/sql_delete.cc: Auto merged sql/sql_select.cc: Auto merged sql/sql_select.h: Auto merged sql/sql_test.cc: Auto merged sql/sql_update.cc: Auto merged sql/opt_range.cc: Hand merged sql/opt_range.h: Hand merged
Diffstat (limited to 'sql')
-rw-r--r--sql/ha_berkeley.cc25
-rw-r--r--sql/ha_berkeley.h1
-rw-r--r--sql/ha_heap.h7
-rw-r--r--sql/ha_innodb.cc57
-rw-r--r--sql/ha_innodb.h1
-rw-r--r--sql/handler.h5
-rw-r--r--sql/opt_range.cc3071
-rw-r--r--sql/opt_range.h268
-rw-r--r--sql/sql_delete.cc10
-rw-r--r--sql/sql_select.cc122
-rw-r--r--sql/sql_select.h2
-rw-r--r--sql/sql_test.cc33
-rw-r--r--sql/sql_update.cc39
13 files changed, 3094 insertions, 547 deletions
diff --git a/sql/ha_berkeley.cc b/sql/ha_berkeley.cc
index df5a45480c6..9f2039d4b69 100644
--- a/sql/ha_berkeley.cc
+++ b/sql/ha_berkeley.cc
@@ -2500,4 +2500,29 @@ ha_rows ha_berkeley::estimate_number_of_rows()
return share->rows + HA_BERKELEY_EXTRA_ROWS;
}
+int ha_berkeley::cmp_ref(const byte *ref1, const byte *ref2)
+{
+ if (hidden_primary_key)
+ return memcmp(ref1, ref2, BDB_HIDDEN_PRIMARY_KEY_LENGTH);
+
+ int result;
+ Field *field;
+ KEY *key_info=table->key_info+table->primary_key;
+ KEY_PART_INFO *key_part=key_info->key_part;
+ KEY_PART_INFO *end=key_part+key_info->key_parts;
+
+ for (; key_part != end; key_part++)
+ {
+ field= key_part->field;
+ result= field->pack_cmp((const char*)ref1, (const char*)ref2,
+ key_part->length);
+ if (result)
+ return result;
+ ref1 += field->packed_col_length((const char*)ref1, key_part->length);
+ ref2 += field->packed_col_length((const char*)ref2, key_part->length);
+ }
+
+ return 0;
+}
+
#endif /* HAVE_BERKELEY_DB */
diff --git a/sql/ha_berkeley.h b/sql/ha_berkeley.h
index 525c82febd0..f053fd84046 100644
--- a/sql/ha_berkeley.h
+++ b/sql/ha_berkeley.h
@@ -168,6 +168,7 @@ class ha_berkeley: public handler
void print_error(int error, myf errflag);
uint8 table_cache_type() { return HA_CACHE_TBL_TRANSACT; }
bool primary_key_is_clustered() { return true; }
+ int cmp_ref(const byte *ref1, const byte *ref2);
};
extern bool berkeley_shared_data;
diff --git a/sql/ha_heap.h b/sql/ha_heap.h
index 68406202c76..ffe448a6b6c 100644
--- a/sql/ha_heap.h
+++ b/sql/ha_heap.h
@@ -93,5 +93,10 @@ class ha_heap: public handler
THR_LOCK_DATA **store_lock(THD *thd, THR_LOCK_DATA **to,
enum thr_lock_type lock_type);
-
+ int cmp_ref(const byte *ref1, const byte *ref2)
+ {
+ HEAP_PTR ptr1=*(HEAP_PTR*)ref1;
+ HEAP_PTR ptr2=*(HEAP_PTR*)ref2;
+ return ptr1 < ptr2? -1 : (ptr1 > ptr2? 1 : 0);
+ }
};
diff --git a/sql/ha_innodb.cc b/sql/ha_innodb.cc
index 4c4c32691e5..c3862c17994 100644
--- a/sql/ha_innodb.cc
+++ b/sql/ha_innodb.cc
@@ -701,6 +701,8 @@ ha_innobase::init_table_handle_for_HANDLER(void)
prebuilt->read_just_key = FALSE;
prebuilt->used_in_HANDLER = TRUE;
+
+ prebuilt->keep_other_fields_on_keyread = FALSE;
}
/*************************************************************************
@@ -4540,9 +4542,11 @@ ha_innobase::extra(
if (prebuilt->blob_heap) {
row_mysql_prebuilt_free_blob_heap(prebuilt);
}
+ prebuilt->keep_other_fields_on_keyread = 0;
prebuilt->read_just_key = 0;
break;
case HA_EXTRA_RESET_STATE:
+ prebuilt->keep_other_fields_on_keyread = 0;
prebuilt->read_just_key = 0;
break;
case HA_EXTRA_NO_KEYREAD:
@@ -4561,6 +4565,9 @@ ha_innobase::extra(
case HA_EXTRA_KEYREAD:
prebuilt->read_just_key = 1;
break;
+ case HA_EXTRA_KEYREAD_PRESERVE_FIELDS:
+ prebuilt->keep_other_fields_on_keyread = 1;
+ break;
default:/* Do nothing */
;
}
@@ -4609,6 +4616,7 @@ ha_innobase::start_stmt(
prebuilt->sql_stat_start = TRUE;
prebuilt->hint_need_to_fetch_extra_cols = 0;
prebuilt->read_just_key = 0;
+ prebuilt->keep_other_fields_on_keyread = FALSE;
if (!prebuilt->mysql_has_locked) {
/* This handle is for a temporary table created inside
@@ -4686,6 +4694,7 @@ ha_innobase::external_lock(
prebuilt->hint_need_to_fetch_extra_cols = 0;
prebuilt->read_just_key = 0;
+ prebuilt->keep_other_fields_on_keyread = FALSE;
if (lock_type == F_WRLCK) {
@@ -5122,4 +5131,52 @@ ha_innobase::get_auto_increment()
return(nr);
}
+int
+ha_innobase::cmp_ref(
+ const mysql_byte *ref1,
+ const mysql_byte *ref2)
+{
+ row_prebuilt_t* prebuilt = (row_prebuilt_t*) innobase_prebuilt;
+ enum_field_types mysql_type;
+ Field* field;
+ int result;
+
+ if (prebuilt->clust_index_was_generated)
+ return memcmp(ref1, ref2, DATA_ROW_ID_LEN);
+
+ /* Do type-aware comparison of Primary Key members. PK members
+ are always NOT NULL, so no checks for NULL are performed */
+ KEY_PART_INFO *key_part= table->key_info[table->primary_key].key_part;
+ KEY_PART_INFO *key_part_end=
+ key_part + table->key_info[table->primary_key].key_parts;
+ for (; key_part != key_part_end; ++key_part) {
+ field = key_part->field;
+ mysql_type = field->type();
+ if (mysql_type == FIELD_TYPE_TINY_BLOB
+ || mysql_type == FIELD_TYPE_MEDIUM_BLOB
+ || mysql_type == FIELD_TYPE_BLOB
+ || mysql_type == FIELD_TYPE_LONG_BLOB) {
+
+ ut_a(!ref1[1]);
+ ut_a(!ref2[1]);
+ byte len1= *ref1;
+ byte len2= *ref2;
+ ref1 += 2;
+ ref2 += 2;
+ result =
+ ((Field_blob*)field)->cmp((const char*)ref1, len1,
+ (const char*)ref2, len2);
+ } else {
+ result =
+ field->cmp((const char*)ref1, (const char*)ref2);
+ }
+
+ if (result)
+ return result;
+ ref1 += key_part->length;
+ ref2 += key_part->length;
+ }
+ return 0;
+}
+
#endif /* HAVE_INNOBASE_DB */
diff --git a/sql/ha_innodb.h b/sql/ha_innodb.h
index 62f5e85b5d4..2160d40f5ad 100644
--- a/sql/ha_innodb.h
+++ b/sql/ha_innodb.h
@@ -188,6 +188,7 @@ class ha_innobase: public handler
longlong get_auto_increment();
uint8 table_cache_type() { return HA_CACHE_TBL_ASKTRANSACT; }
bool primary_key_is_clustered() { return true; }
+ int cmp_ref(const byte *ref1, const byte *ref2);
};
extern uint innobase_init_flags, innobase_lock_type;
diff --git a/sql/handler.h b/sql/handler.h
index 15fc6a201d8..00e0ad8bcd0 100644
--- a/sql/handler.h
+++ b/sql/handler.h
@@ -447,6 +447,11 @@ public:
false otherwise
*/
virtual bool primary_key_is_clustered() { return false; }
+
+ virtual int cmp_ref(const byte *ref1, const byte *ref2)
+ {
+ return memcmp(ref1, ref2, ref_length);
+ }
};
/* Some extern variables used with handlers */
diff --git a/sql/opt_range.cc b/sql/opt_range.cc
index 28b9cd79fdd..e86a389ccb9 100644
--- a/sql/opt_range.cc
+++ b/sql/opt_range.cc
@@ -23,6 +23,19 @@
*/
+/*
+ Classes in this file are used in the following way:
+ 1. For a selection condition a tree of SEL_IMERGE/SEL_TREE/SEL_ARG objects
+ is created. #of rows in table and index statistics are ignored at this
+ step.
+ 2. Created SEL_TREE and index stats data are used to construct a
+ TABLE_READ_PLAN-derived object (TRP_*). Several 'candidate' table read
+ plans may be created.
+ 3. The least expensive table read plan is used to create a tree of
+ QUICK_SELECT_I-derived objects which are later used for row retrieval.
+ QUICK_RANGEs are also created in this step.
+*/
+
#ifdef __GNUC__
#pragma implementation // gcc: Class implementation
#endif
@@ -167,8 +180,7 @@ public:
min_value=arg->max_value;
min_flag=arg->max_flag & NEAR_MAX ? 0 : NEAR_MIN;
}
- void store(uint length,char **min_key,uint min_key_flag,
- char **max_key, uint max_key_flag)
+ void store_min(uint length,char **min_key,uint min_key_flag)
{
if ((min_flag & GEOM_FLAG) ||
(!(min_flag & NO_MIN_RANGE) &&
@@ -183,6 +195,11 @@ public:
memcpy(*min_key,min_value,length);
(*min_key)+= length;
}
+ }
+ void store(uint length,char **min_key,uint min_key_flag,
+ char **max_key, uint max_key_flag)
+ {
+ store_min(length, min_key, min_key_flag);
if (!(max_flag & NO_MAX_RANGE) &&
!(max_key_flag & (NO_MAX_RANGE | NEAR_MAX)))
{
@@ -269,6 +286,7 @@ public:
class SEL_IMERGE;
+
class SEL_TREE :public Sql_alloc
{
public:
@@ -280,28 +298,63 @@ public:
bzero((char*) keys,sizeof(keys));
}
SEL_ARG *keys[MAX_KEY];
- key_map keys_map; /* bitmask of non-NULL elements in keys */
- List<SEL_IMERGE> merges; /* possible ways to read rows using index_merge */
+ key_map keys_map; /* bitmask of non-NULL elements in keys */
+
+ /*
+ Possible ways to read rows using index_merge. The list is non-empty only
+ if type==KEY. Currently can be non empty only if keys_map.is_clear_all().
+ */
+ List<SEL_IMERGE> merges;
+
+ /* The members below are filled/used only after get_mm_tree is done */
+ key_map ror_scans_map; /* bitmask of ROR scan-able elements in keys */
+ uint n_ror_scans; /* number of set bits in ror_scans_map */
+
+ struct st_ror_scan_info **ror_scans; /* list of ROR key scans */
+ struct st_ror_scan_info **ror_scans_end; /* last ROR scan */
+ /* Note that #records for each key scan is stored in table->quick_rows */
};
typedef struct st_qsel_param {
THD *thd;
TABLE *table;
- KEY_PART *key_parts,*key_parts_end,*key[MAX_KEY];
+ KEY_PART *key_parts,*key_parts_end;
+ KEY_PART *key[MAX_KEY]; /* First key parts of keys used in the query */
MEM_ROOT *mem_root;
table_map prev_tables,read_tables,current_table;
- uint baseflag, keys, max_key_part, range_count;
- uint real_keynr[MAX_KEY];
+ uint baseflag, max_key_part, range_count;
+
+ uint keys; /* number of keys used in the query */
+
+ /* used_key_no -> table_key_no translation table */
+ uint real_keynr[MAX_KEY];
+
char min_key[MAX_KEY_LENGTH+MAX_FIELD_WIDTH],
max_key[MAX_KEY_LENGTH+MAX_FIELD_WIDTH];
bool quick; // Don't calulate possible keys
COND *cond;
+ uint fields_bitmap_size;
+ MY_BITMAP needed_fields; /* bitmask of fields needed by the query */
+
+ key_map *needed_reg; /* ptr to SQL_SELECT::needed_reg */
+
uint *imerge_cost_buff; /* buffer for index_merge cost estimates */
uint imerge_cost_buff_size; /* size of the buffer */
+
+ /* true if last checked tree->key can be used for ROR-scan */
+ bool is_ror_scan;
} PARAM;
+class TABLE_READ_PLAN;
+ class TRP_RANGE;
+ class TRP_ROR_INTERSECT;
+ class TRP_ROR_UNION;
+ class TRP_ROR_INDEX_MERGE;
+
+struct st_ror_scan_info;
+
static SEL_TREE * get_mm_parts(PARAM *param,COND *cond_func,Field *field,
Item_func::Functype type,Item *value,
Item_result cmp_type);
@@ -309,6 +362,8 @@ static SEL_ARG *get_mm_leaf(PARAM *param,COND *cond_func,Field *field,
KEY_PART *key_part,
Item_func::Functype type,Item *value);
static SEL_TREE *get_mm_tree(PARAM *param,COND *cond);
+
+static bool is_key_scan_ror(PARAM *param, uint keynr, uint8 nparts);
static ha_rows check_quick_select(PARAM *param,uint index,SEL_ARG *key_tree);
static ha_rows check_quick_keys(PARAM *param,uint index,SEL_ARG *key_tree,
char *min_key,uint min_key_flag,
@@ -317,25 +372,37 @@ static ha_rows check_quick_keys(PARAM *param,uint index,SEL_ARG *key_tree,
QUICK_RANGE_SELECT *get_quick_select(PARAM *param,uint index,
SEL_ARG *key_tree,
MEM_ROOT *alloc = NULL);
-static int get_quick_select_params(SEL_TREE *tree, PARAM *param,
- key_map& needed_reg,
- bool index_read_can_be_used,
- double *read_time,
- ha_rows *records,
- SEL_ARG*** key_to_read);
+static TRP_RANGE *get_key_scans_params(PARAM *param, SEL_TREE *tree,
+ bool index_read_must_be_used,
+ double read_time);
+static
+TRP_ROR_INTERSECT *get_best_ror_intersect(const PARAM *param, SEL_TREE *tree,
+ bool force_index_only,
+ double read_time,
+ bool *are_all_covering);
+static
+TRP_ROR_INTERSECT *get_best_covering_ror_intersect(PARAM *param,
+ SEL_TREE *tree,
+ double read_time);
+static
+TABLE_READ_PLAN *get_best_disjunct_quick(PARAM *param, SEL_IMERGE *imerge,
+ double read_time);
static int get_index_merge_params(PARAM *param, key_map& needed_reg,
SEL_IMERGE *imerge, double *read_time,
ha_rows* imerge_rows);
-inline double get_index_only_read_time(PARAM* param, ha_rows records,
+inline double get_index_only_read_time(const PARAM* param, ha_rows records,
int keynr);
#ifndef DBUG_OFF
-static void print_quick_sel_imerge(QUICK_INDEX_MERGE_SELECT *quick,
- const key_map *needed_reg);
-void print_quick_sel_range(QUICK_RANGE_SELECT *quick,
- const key_map *needed_reg);
-
+static void print_sel_tree(PARAM *param, SEL_TREE *tree, key_map *tree_map,
+ const char *msg);
+static void print_ror_scans_arr(TABLE *table, const char *msg,
+ struct st_ror_scan_info **start,
+ struct st_ror_scan_info **end);
+static void print_rowid(byte* val, int len);
+static void print_quick(QUICK_SELECT_I *quick, const key_map *needed_reg);
#endif
+
static SEL_TREE *tree_and(PARAM *param,SEL_TREE *tree1,SEL_TREE *tree2);
static SEL_TREE *tree_or(PARAM *param,SEL_TREE *tree1,SEL_TREE *tree2);
static SEL_ARG *sel_add(SEL_ARG *key1,SEL_ARG *key2);
@@ -549,7 +616,6 @@ int imerge_list_or_list(PARAM *param,
RETURN
0 OK, result is stored in *im1.
other Error
-
*/
int imerge_list_or_tree(PARAM *param,
@@ -642,7 +708,7 @@ QUICK_SELECT_I::QUICK_SELECT_I()
QUICK_RANGE_SELECT::QUICK_RANGE_SELECT(THD *thd, TABLE *table, uint key_nr,
bool no_alloc, MEM_ROOT *parent_alloc)
- :dont_free(0),error(0),cur_range(NULL),range(0)
+ :dont_free(0),error(0),free_file(0),cur_range(NULL),range(0)
{
index= key_nr;
head= table;
@@ -667,12 +733,22 @@ int QUICK_RANGE_SELECT::init()
QUICK_RANGE_SELECT::~QUICK_RANGE_SELECT()
{
+ DBUG_ENTER("QUICK_RANGE_SELECT::~QUICK_RANGE_SELECT");
if (!dont_free)
{
file->index_end();
- delete_dynamic(&ranges); /* ranges are allocated in alloc */
- free_root(&alloc,MYF(0));
- }
+ file->extra(HA_EXTRA_NO_KEYREAD);
+ delete_dynamic(&ranges); /* ranges are allocated in alloc */
+ if (free_file)
+ {
+ DBUG_PRINT("info", ("Freeing separate handler %p (free=%d)", file,
+ free_file));
+ file->reset();
+ file->close();
+ }
+ free_root(&alloc,MYF(0));
+ }
+ DBUG_VOID_RETURN;
}
@@ -681,11 +757,12 @@ QUICK_INDEX_MERGE_SELECT::QUICK_INDEX_MERGE_SELECT(THD *thd_param,
:cur_quick_it(quick_selects),pk_quick_select(NULL),unique(NULL),
thd(thd_param)
{
+ DBUG_ENTER("QUICK_INDEX_MERGE_SELECT::QUICK_INDEX_MERGE_SELECT");
index= MAX_KEY;
head= table;
- reset_called= false;
bzero(&read_record, sizeof(read_record));
init_sql_alloc(&alloc,1024,0);
+ DBUG_VOID_RETURN;
}
int QUICK_INDEX_MERGE_SELECT::init()
@@ -699,11 +776,7 @@ int QUICK_INDEX_MERGE_SELECT::reset()
{
int result;
DBUG_ENTER("QUICK_INDEX_MERGE_SELECT::reset");
- if (reset_called)
- DBUG_RETURN(0);
-
- reset_called= true;
- result = cur_quick_select->reset() || prepare_unique();
+ result= cur_quick_select->reset() || prepare_unique();
DBUG_RETURN(result);
}
@@ -724,12 +797,338 @@ QUICK_INDEX_MERGE_SELECT::push_quick_back(QUICK_RANGE_SELECT *quick_sel_range)
QUICK_INDEX_MERGE_SELECT::~QUICK_INDEX_MERGE_SELECT()
{
+ DBUG_ENTER("QUICK_INDEX_MERGE_SELECT::~QUICK_INDEX_MERGE_SELECT");
delete unique;
quick_selects.delete_elements();
delete pk_quick_select;
free_root(&alloc,MYF(0));
+ DBUG_VOID_RETURN;
+}
+
+
+QUICK_ROR_INTERSECT_SELECT::QUICK_ROR_INTERSECT_SELECT(THD *thd_param,
+ TABLE *table,
+ bool retrieve_full_rows,
+ MEM_ROOT *parent_alloc)
+ : cpk_quick(NULL), thd(thd_param), need_to_fetch_row(retrieve_full_rows)
+{
+ index= MAX_KEY;
+ head= table;
+ record= head->record[0];
+ if (!parent_alloc)
+ init_sql_alloc(&alloc,1024,0);
+ else
+ bzero(&alloc, sizeof(MEM_ROOT));
+ last_rowid= (byte*)alloc_root(parent_alloc? parent_alloc : &alloc,
+ head->file->ref_length);
+}
+
+
+/*
+ Do post-constructor initialization.
+ SYNOPSIS
+ QUICK_ROR_INTERSECT_SELECT::init()
+
+ RETURN
+ 0 OK
+ other Error code
+*/
+
+int QUICK_ROR_INTERSECT_SELECT::init()
+{
+ /* Check if last_rowid was successfully allocated in ctor */
+ return !last_rowid;
+}
+
+
+/*
+ Initialize this quick select to be a ROR-merged scan.
+
+ SYNOPSIS
+ QUICK_RANGE_SELECT::init_ror_merged_scan()
+ reuse_handler If true, use head->file, otherwise create a separate
+ handler object
+
+ NOTES
+ This function creates and prepares for subsequent use a separate handler
+ object if it can't reuse head->file. The reason for this is that during
+ ROR-merge several key scans are performed simultaneously, and a single
+ handler is only capable of preserving context of a single key scan.
+
+ In ROR-merge the quick select doing merge does full records retrieval,
+ merged quick selects read only keys.
+
+ RETURN
+ 0 ROR child scan initialized, ok to use.
+ 1 error
+*/
+
+int QUICK_RANGE_SELECT::init_ror_merged_scan(bool reuse_handler)
+{
+ handler *save_file= file;
+ DBUG_ENTER("QUICK_RANGE_SELECT::init_ror_merged_scan");
+
+ if (reuse_handler)
+ {
+ DBUG_PRINT("info", ("Reusing handler %p", file));
+ if (file->extra(HA_EXTRA_KEYREAD) ||
+ file->extra(HA_EXTRA_RETRIEVE_ALL_COLS) |
+ init() || reset())
+ {
+ DBUG_RETURN(1);
+ }
+ else
+ DBUG_RETURN(0);
+ }
+
+ /* Create a separate handler object for this quick select */
+ if (free_file)
+ {
+ /* already have own 'handler' object. */
+ DBUG_RETURN(0);
+ }
+
+ if (!(file= get_new_handler(head, head->db_type)))
+ goto failure;
+ DBUG_PRINT("info", ("Allocated new handler %p", file));
+ if (file->ha_open(head->path, head->db_stat, HA_OPEN_IGNORE_IF_LOCKED))
+ {
+ /* Caller will free the memory */
+ goto failure;
+ }
+
+ if (file->extra(HA_EXTRA_KEYREAD) ||
+ file->extra(HA_EXTRA_RETRIEVE_ALL_COLS) ||
+ init() || reset())
+ {
+ file->close();
+ goto failure;
+ }
+ free_file= true;
+ last_rowid= file->ref;
+ DBUG_RETURN(0);
+
+failure:
+ file= save_file;
+ DBUG_RETURN(1);
+}
+
+
+/*
+ Initialize this quick select to be a part of a ROR-merged scan.
+ SYNOPSIS
+ QUICK_ROR_INTERSECT_SELECT::init_ror_merged_scan()
+ reuse_handler If true, use head->file, otherwise create separate
+ handler object.
+ RETURN
+ 0 OK
+ other error code
+*/
+int QUICK_ROR_INTERSECT_SELECT::init_ror_merged_scan(bool reuse_handler)
+{
+ List_iterator_fast<QUICK_RANGE_SELECT> quick_it(quick_selects);
+ QUICK_RANGE_SELECT* quick;
+ DBUG_ENTER("QUICK_ROR_INTERSECT_SELECT::init_ror_merged_scan");
+
+ /* Initialize all merged "children" quick selects */
+ DBUG_ASSERT(!(need_to_fetch_row && !reuse_handler));
+ if (!need_to_fetch_row && reuse_handler)
+ {
+ quick= quick_it++;
+ /*
+ There is no use of this->file. Use it for the first of merged range
+ selects.
+ */
+ if (quick->init_ror_merged_scan(true))
+ DBUG_RETURN(1);
+ quick->file->extra(HA_EXTRA_KEYREAD_PRESERVE_FIELDS);
+ }
+ while((quick= quick_it++))
+ {
+ if (quick->init_ror_merged_scan(false))
+ DBUG_RETURN(1);
+ quick->file->extra(HA_EXTRA_KEYREAD_PRESERVE_FIELDS);
+ /* All merged scans share the same record buffer in intersection. */
+ quick->record= head->record[0];
+ }
+
+ if (need_to_fetch_row && head->file->rnd_init())
+ {
+ DBUG_PRINT("error", ("ROR index_merge rnd_init call failed"));
+ DBUG_RETURN(1);
+ }
+ DBUG_RETURN(0);
+}
+
+
+/*
+ Initialize quick select for row retrieval.
+ SYNOPSIS
+ reset()
+ RETURN
+ 0 OK
+ other Error code
+*/
+
+int QUICK_ROR_INTERSECT_SELECT::reset()
+{
+ int result;
+ DBUG_ENTER("QUICK_ROR_INTERSECT_SELECT::reset");
+ result= init_ror_merged_scan(true);
+ DBUG_RETURN(result);
}
+
+/*
+ Add a merged quick select to this ROR-intersection quick select.
+
+ SYNOPSIS
+ QUICK_ROR_INTERSECT_SELECT::push_quick_back()
+ quick Quick select to be added. The quick select must return
+ rows in rowid order.
+ NOTES
+ This call can only be made before init() is called.
+
+ RETURN
+ false OK
+ true Out of memory.
+*/
+
+bool
+QUICK_ROR_INTERSECT_SELECT::push_quick_back(QUICK_RANGE_SELECT *quick)
+{
+ return quick_selects.push_back(quick);
+}
+
+QUICK_ROR_INTERSECT_SELECT::~QUICK_ROR_INTERSECT_SELECT()
+{
+ DBUG_ENTER("QUICK_ROR_INTERSECT_SELECT::~QUICK_ROR_INTERSECT_SELECT");
+ quick_selects.delete_elements();
+ delete cpk_quick;
+ free_root(&alloc,MYF(0));
+ DBUG_VOID_RETURN;
+}
+
+QUICK_ROR_UNION_SELECT::QUICK_ROR_UNION_SELECT(THD *thd_param,
+ TABLE *table)
+ : thd(thd_param)
+{
+ index= MAX_KEY;
+ head= table;
+ rowid_length= table->file->ref_length;
+ record= head->record[0];
+ init_sql_alloc(&alloc, thd->variables.range_alloc_block_size, 0);
+ my_pthread_setspecific_ptr(THR_MALLOC,&alloc);
+}
+
+
+/*
+ Do post-constructor initialization.
+ SYNOPSIS
+ QUICK_ROR_UNION_SELECT::init()
+
+ RETURN
+ 0 OK
+ other Error code
+*/
+
+int QUICK_ROR_UNION_SELECT::init()
+{
+ if (init_queue(&queue, quick_selects.elements, 0,
+ false , QUICK_ROR_UNION_SELECT::queue_cmp,
+ (void*) this))
+ {
+ bzero(&queue, sizeof(QUEUE));
+ return 1;
+ }
+
+ if (!(cur_rowid= (byte*)alloc_root(&alloc, 2*head->file->ref_length)))
+ return 1;
+ prev_rowid= cur_rowid + head->file->ref_length;
+ return 0;
+}
+
+
+/*
+ Comparison function to be used QUICK_ROR_UNION_SELECT::queue priority
+ queue.
+
+ SYNPOSIS
+ QUICK_ROR_UNION_SELECT::queue_cmp()
+ arg Pointer to QUICK_ROR_UNION_SELECT
+ val1 First merged select
+ val2 Second merged select
+*/
+int QUICK_ROR_UNION_SELECT::queue_cmp(void *arg, byte *val1, byte *val2)
+{
+ QUICK_ROR_UNION_SELECT *self= (QUICK_ROR_UNION_SELECT*)arg;
+ return self->head->file->cmp_ref(((QUICK_SELECT_I*)val1)->last_rowid,
+ ((QUICK_SELECT_I*)val2)->last_rowid);
+}
+
+
+/*
+ Initialize quick select for row retrieval.
+ SYNOPSIS
+ reset()
+
+ RETURN
+ 0 OK
+ other Error code
+*/
+
+int QUICK_ROR_UNION_SELECT::reset()
+{
+ QUICK_SELECT_I* quick;
+ int error;
+ DBUG_ENTER("QUICK_ROR_UNION_SELECT::reset");
+ have_prev_rowid= false;
+ /*
+ Initialize scans for merged quick selects and put all merged quick
+ selects into the queue.
+ */
+ List_iterator_fast<QUICK_SELECT_I> it(quick_selects);
+ while ((quick= it++))
+ {
+ if (quick->init_ror_merged_scan(false))
+ DBUG_RETURN(1);
+ if ((error= quick->get_next()))
+ {
+ if (error == HA_ERR_END_OF_FILE)
+ continue;
+ else
+ DBUG_RETURN(error);
+ }
+ quick->save_last_pos();
+ queue_insert(&queue, (byte*)quick);
+ }
+
+ if (head->file->rnd_init())
+ {
+ DBUG_PRINT("error", ("ROR index_merge rnd_init call failed"));
+ DBUG_RETURN(1);
+ }
+
+ DBUG_RETURN(0);
+}
+
+
+bool
+QUICK_ROR_UNION_SELECT::push_quick_back(QUICK_SELECT_I *quick_sel_range)
+{
+ return quick_selects.push_back(quick_sel_range);
+}
+
+QUICK_ROR_UNION_SELECT::~QUICK_ROR_UNION_SELECT()
+{
+ DBUG_ENTER("QUICK_ROR_UNION_SELECT::~QUICK_ROR_UNION_SELECT");
+ delete_queue(&queue);
+ quick_selects.delete_elements();
+ free_root(&alloc,MYF(0));
+ DBUG_VOID_RETURN;
+}
+
+
QUICK_RANGE::QUICK_RANGE()
:min_key(0),max_key(0),min_length(0),max_length(0),
flag(NO_MIN_RANGE | NO_MAX_RANGE)
@@ -893,28 +1292,221 @@ SEL_ARG *SEL_ARG::clone_tree()
return root;
}
+
+/*
+ Table rows retrieval plan. Range optimizer creates QUICK_SELECT_I-derived
+ objects from table read plans.
+*/
+class TABLE_READ_PLAN
+{
+public:
+ /*
+ Plan read cost, with or without cost of full row retrieval, depending
+ on plan creation parameters.
+ */
+ double read_cost;
+ ha_rows records; /* estimate of #rows to be examined */
+
+ /*
+ If true, the scan returns rows in rowid order. This is used only for
+ scans that can be both ROR and non-ROR.
+ */
+ bool is_ror;
+
+ /*
+ Create quick select for this plan.
+ SYNOPSIS
+ make_quick()
+ param Parameter from test_quick_select
+ retrieve_full_rows If true, created quick select will do full record
+ retrieval.
+ parent_alloc Memory pool to use, if any.
+
+ NOTES
+ retrieve_full_rows is ignored by some implementations.
+
+ RETURN
+ created quick select
+ NULL on any error.
+ */
+ virtual QUICK_SELECT_I *make_quick(PARAM *param,
+ bool retrieve_full_rows,
+ MEM_ROOT *parent_alloc=NULL) = 0;
+
+ /* Table read plans are allocated on MEM_ROOT and are never deleted */
+ static void *operator new(size_t size, MEM_ROOT *mem_root)
+ { return (void*) alloc_root(mem_root, (uint) size); }
+ static void operator delete(void *ptr,size_t size) {}
+};
+
+class TRP_ROR_INTERSECT;
+class TRP_ROR_UNION;
+class TRP_INDEX_MERGE;
+
+
+/*
+ Plan for a QUICK_RANGE_SELECT scan.
+ TRP_RANGE::make_quick ignores retrieve_full_rows parameter because
+ QUICK_RANGE_SELECT doesn't distinguish between 'index only' scans and full
+ record retrieval scans.
+*/
+
+class TRP_RANGE : public TABLE_READ_PLAN
+{
+public:
+ SEL_ARG *key; /* set of intervals to be used in "range" method retrieval */
+ uint key_idx; /* key number in PARAM::key */
+
+ TRP_RANGE(SEL_ARG *key_arg, uint idx_arg)
+ : key(key_arg), key_idx(idx_arg)
+ {}
+
+ QUICK_SELECT_I *make_quick(PARAM *param, bool retrieve_full_rows,
+ MEM_ROOT *parent_alloc)
+ {
+ DBUG_ENTER("TRP_RANGE::make_quick");
+ QUICK_RANGE_SELECT *quick;
+ if ((quick= get_quick_select(param, key_idx, key, parent_alloc)))
+ {
+ quick->records= records;
+ quick->read_time= read_cost;
+ }
+ DBUG_RETURN(quick);
+ }
+};
+
+
+/* Plan for QUICK_ROR_INTERSECT_SELECT scan. */
+
+class TRP_ROR_INTERSECT : public TABLE_READ_PLAN
+{
+public:
+ QUICK_SELECT_I *make_quick(PARAM *param, bool retrieve_full_rows,
+ MEM_ROOT *parent_alloc);
+
+ /* Array of pointers to ROR range scans used in this intersection */
+ struct st_ror_scan_info **first_scan;
+ struct st_ror_scan_info **last_scan; /* End of the above array */
+ struct st_ror_scan_info *cpk_scan; /* Clustered PK scan, if there is one */
+ bool is_covering; /* true if no row retrieval phase is necessary */
+ double index_scan_costs; /* SUM(cost(index_scan)) */
+};
+
+
+/*
+ Plan for QUICK_ROR_UNION_SELECT scan.
+ QUICK_ROR_UNION_SELECT always retrieves full rows, so retrieve_full_rows
+ is ignored by make_quick.
+*/
+
+class TRP_ROR_UNION : public TABLE_READ_PLAN
+{
+public:
+ QUICK_SELECT_I *make_quick(PARAM *param, bool retrieve_full_rows,
+ MEM_ROOT *parent_alloc);
+ TABLE_READ_PLAN **first_ror; /* array of ptrs to plans for merged scans */
+ TABLE_READ_PLAN **last_ror; /* end of the above array */
+};
+
+
+/*
+ Plan for QUICK_INDEX_MERGE_SELECT scan.
+ QUICK_ROR_INTERSECT_SELECT always retrieves full rows, so retrieve_full_rows
+ is ignored by make_quick.
+*/
+
+class TRP_INDEX_MERGE : public TABLE_READ_PLAN
+{
+public:
+ QUICK_SELECT_I *make_quick(PARAM *param, bool retrieve_full_rows,
+ MEM_ROOT *parent_alloc);
+ TRP_RANGE **range_scans; /* array of ptrs to plans of merged scans */
+ TRP_RANGE **range_scans_end; /* end of the array */
+};
+
+
+/*
+ Fill param->needed_fields with bitmap of fields used in the query.
+ SYNOPSIS
+ fill_used_fields_bitmap()
+ param Parameter from test_quick_select function.
+
+ NOTES
+ Clustered PK members are not put into the bitmap as they are implicitly
+ present in all keys (and it is impossible to avoid reading them).
+ RETURN
+ 0 Ok
+ 1 Out of memory.
+*/
+
+static int fill_used_fields_bitmap(PARAM *param)
+{
+ TABLE *table= param->table;
+ param->fields_bitmap_size= (table->fields/8 + 1);
+ uchar *tmp;
+ uint pk;
+ if (!(tmp= (uchar*)alloc_root(param->mem_root,param->fields_bitmap_size)) ||
+ bitmap_init(&param->needed_fields, tmp, param->fields_bitmap_size*8,
+ false))
+ return 1;
+
+ bitmap_clear_all(&param->needed_fields);
+ for (uint i= 0; i < table->fields; i++)
+ {
+ if (param->thd->query_id == table->field[i]->query_id)
+ bitmap_set_bit(&param->needed_fields, i+1);
+ }
+
+ pk= param->table->primary_key;
+ if (param->table->file->primary_key_is_clustered() && pk != MAX_KEY)
+ {
+ /* The table uses clustered PK and it is not internally generated */
+ KEY_PART_INFO *key_part= param->table->key_info[pk].key_part;
+ KEY_PART_INFO *key_part_end= key_part +
+ param->table->key_info[pk].key_parts;
+ for(;key_part != key_part_end; ++key_part)
+ {
+ bitmap_clear_bit(&param->needed_fields, key_part->fieldnr);
+ }
+ }
+ return 0;
+}
+
+
/*
- Test if a key can be used in different ranges
+ Test if a key can be used in different ranges
SYNOPSIS
- SQL_SELECT::test_quick_select(thd,keys_to_use, prev_tables,
- limit, force_quick_range)
-
- Updates the following in the select parameter:
- needed_reg - Bits for keys with may be used if all prev regs are read
- quick - Parameter to use when reading records.
- In the table struct the following information is updated:
- quick_keys - Which keys can be used
- quick_rows - How many rows the key matches
-
- RETURN VALUES
- -1 if impossible select
- 0 if can't use quick_select
- 1 if found usable range
-
- TODO
- check if the function really needs to modify keys_to_use, and change the
- code to pass it by reference if not
+ SQL_SELECT::test_quick_select()
+ thd Current thread
+ keys_to_use Keys to use for range retrieval
+ prev_tables Tables assumed to be already read when the scan is
+ performed (but not read at the moment of this call)
+ limit Query limit
+ force_quick_range Prefer to use range (instead of full table scan) even
+ if it is more expensive.
+
+ NOTES
+ Updates the following in the select parameter:
+ needed_reg - Bits for keys with may be used if all prev regs are read
+ quick - Parameter to use when reading records.
+
+ In the table struct the following information is updated:
+ quick_keys - Which keys can be used
+ quick_rows - How many rows the key matches
+
+ TODO
+ Check if this function really needs to modify keys_to_use, and change the
+ code to pass it by reference if it doesn't.
+
+ In addition to force_quick_range other means can be (an usually are) used
+ to make this function prefer range over full table scan. Figure out if
+ force_quick_range is really needed.
+
+ RETURN
+ -1 if impossible select (i.e. certainly no rows will be selected)
+ 0 if can't use quick_select
+ 1 if found usable ranges and quick select has been successfully created.
*/
int SQL_SELECT::test_quick_select(THD *thd, key_map keys_to_use,
@@ -924,8 +1516,8 @@ int SQL_SELECT::test_quick_select(THD *thd, key_map keys_to_use,
uint basflag;
uint idx;
double scan_time;
- QUICK_INDEX_MERGE_SELECT *quick_imerge= NULL;
DBUG_ENTER("test_quick_select");
+ printf("\nQUERY: %s\n", thd->query);
DBUG_PRINT("enter",("keys_to_use: %lu prev_tables: %lu const_tables: %lu",
keys_to_use.to_ulonglong(), (ulong) prev_tables,
(ulong) const_tables));
@@ -971,12 +1563,16 @@ int SQL_SELECT::test_quick_select(THD *thd, key_map keys_to_use,
param.table=head;
param.keys=0;
param.mem_root= &alloc;
+ param.needed_reg= &needed_reg;
+
param.imerge_cost_buff_size= 0;
+
thd->no_errors=1; // Don't warn about NULL
init_sql_alloc(&alloc, thd->variables.range_alloc_block_size, 0);
if (!(param.key_parts = (KEY_PART*) alloc_root(&alloc,
sizeof(KEY_PART)*
- head->key_parts)))
+ head->key_parts))
+ || fill_used_fields_bitmap(&param))
{
thd->no_errors=0;
free_root(&alloc,MYF(0)); // Return memory & allocator
@@ -985,7 +1581,11 @@ int SQL_SELECT::test_quick_select(THD *thd, key_map keys_to_use,
key_parts= param.key_parts;
old_root=my_pthread_getspecific_ptr(MEM_ROOT*,THR_MALLOC);
my_pthread_setspecific_ptr(THR_MALLOC,&alloc);
-
+
+ /*
+ Make an array with description of all key parts of all table keys.
+ This is used in get_mm_parts function.
+ */
key_info= head->key_info;
for (idx=0 ; idx < head->keys ; idx++, key_info++)
{
@@ -1013,165 +1613,113 @@ int SQL_SELECT::test_quick_select(THD *thd, key_map keys_to_use,
}
param.key_parts_end=key_parts;
+
if ((tree=get_mm_tree(&param,cond)))
{
if (tree->type == SEL_TREE::IMPOSSIBLE)
{
- records=0L; // Return -1 from this function
+ records=0L; // Return -1 from this function
read_time= (double) HA_POS_ERROR;
}
else if (tree->type == SEL_TREE::KEY ||
tree->type == SEL_TREE::KEY_SMALLER)
{
- /*
+ TABLE_READ_PLAN *best_trp;
+ /*
It is possible to use a quick select (but maybe it would be slower
than 'all' table scan).
*/
- SEL_ARG **best_key= 0;
- ha_rows found_records;
- double found_read_time= read_time;
-
- if (!get_quick_select_params(tree, &param, needed_reg, false,
- &found_read_time, &found_records,
- &best_key))
+ if (tree->merges.is_empty())
{
+ double best_read_time= read_time;
+ TRP_ROR_INTERSECT *new_trp;
+ bool can_build_covering= false;
+
+ /* Get best 'range' plan and prepare data for making other plans */
+ if ((best_trp= get_key_scans_params(&param, tree, false,
+ best_read_time)))
+ best_read_time= best_trp->read_cost;
+
/*
- Ok, quick select is better than 'all' table scan and we have its
- parameters, so construct it.
+ Simultaneous key scans and row deletes on several handler
+ objects are not allowed so don't use ROR-intersection for
+ table deletes.
*/
- read_time= found_read_time;
- records= found_records;
-
- if ((quick= get_quick_select(&param,(uint) (best_key-tree->keys),
- *best_key)) && (!quick->init()))
+ if (thd->lex->sql_command != SQLCOM_DELETE)
{
- quick->records= records;
- quick->read_time= read_time;
+ /*
+ Get best non-covering ROR-intersection plan and prepare data for
+ building covering ROR-intersection.
+ */
+ if ((new_trp= get_best_ror_intersect(&param, tree, false,
+ best_read_time,
+ &can_build_covering)))
+ {
+ best_trp= new_trp;
+ best_read_time= best_trp->read_cost;
+ }
+
+ /*
+ Try constructing covering ROR-intersect only if it looks possible
+ and worth doing.
+ */
+ if (new_trp && !new_trp->is_covering && can_build_covering &&
+ (new_trp= get_best_covering_ror_intersect(&param, tree,
+ best_read_time)))
+ best_trp= new_trp;
}
}
-
- /*
- Btw, tree type SEL_TREE::INDEX_MERGE was not introduced
- intentionally.
- */
-
- /* If no range select could be built, try using index_merge. */
- if (!quick && !tree->merges.is_empty())
+ else
{
- DBUG_PRINT("info",("No range reads possible,"
- " trying to construct index_merge"));
+ /* Try creating index_merge/ROR-union scan. */
SEL_IMERGE *imerge;
- SEL_IMERGE *min_imerge= NULL;
- double min_imerge_read_time;
- ha_rows min_imerge_records;
- LINT_INIT(min_imerge_records); // Protected by min_imerge
-
+ TABLE_READ_PLAN *best_conj_trp= NULL, *new_conj_trp;
+ LINT_INIT(new_conj_trp); /* no empty index_merge lists possible */
+
+ /* Calculate cost of full index read for the shortest covering index */
if (!head->used_keys.is_clear_all())
{
int key_for_use= find_shortest_key(head, &head->used_keys);
- ha_rows total_table_records= (0 == head->file->records)? 1 :
- head->file->records;
- read_time = get_index_only_read_time(&param, total_table_records,
- key_for_use);
- DBUG_PRINT("info",
- ("'all' scan will be using key %d, read time %g",
- key_for_use, read_time));
+ double key_read_time= get_index_only_read_time(&param, records,
+ key_for_use);
+ DBUG_PRINT("info", ("'all'+'using index' scan will be using key %d, "
+ "read time %g", key_for_use, key_read_time));
+ if (key_read_time < read_time)
+ read_time= key_read_time;
}
- min_imerge_read_time=read_time;
- /*
- Ok, read_time contains best 'all' read time.
- Now look for index_merge with cost < read_time
- */
+ DBUG_PRINT("info",("No range reads possible,"
+ " trying to construct index_merge"));
List_iterator_fast<SEL_IMERGE> it(tree->merges);
while ((imerge= it++))
{
- if (!get_index_merge_params(&param, needed_reg, imerge,
- &min_imerge_read_time,
- &min_imerge_records))
- min_imerge= imerge;
- }
-
- if (!min_imerge)
- goto end_free;
-
- records= min_imerge_records;
-
- /* Ok, using index_merge is the best option, so construct it. */
- if (!(quick= quick_imerge= new QUICK_INDEX_MERGE_SELECT(thd, head)))
- goto end_free;
-
- quick->records= min_imerge_records;
- quick->read_time= min_imerge_read_time;
-
- my_pthread_setspecific_ptr(THR_MALLOC, &quick_imerge->alloc);
-
- QUICK_RANGE_SELECT *new_quick;
- for (SEL_TREE **ptree = min_imerge->trees;
- ptree != min_imerge->trees_next;
- ptree++)
- {
- SEL_ARG **tree_best_key=
- min_imerge->best_keys[ptree - min_imerge->trees];
- if ((new_quick= get_quick_select(&param,
- (uint)(tree_best_key-
- (*ptree)->keys),
- *tree_best_key,
- &quick_imerge->alloc)))
- {
- new_quick->records= min_imerge_records;
- new_quick->read_time= min_imerge_read_time;
- /*
- QUICK_RANGE_SELECT::QUICK_RANGE_SELECT leaves THR_MALLOC
- pointing to its allocator, restore it back.
- */
- quick_imerge->last_quick_select= new_quick;
-
- if (quick_imerge->push_quick_back(new_quick))
- {
- delete new_quick;
- delete quick;
- quick= quick_imerge= NULL;
- goto end_free;
- }
- }
- else
- {
- delete quick;
- quick= quick_imerge= NULL;
- goto end_free;
- }
+ new_conj_trp= get_best_disjunct_quick(&param, imerge, read_time);
+ if (!best_conj_trp || (new_conj_trp && new_conj_trp->read_cost <
+ best_conj_trp->read_cost))
+ best_conj_trp= new_conj_trp;
}
+ best_trp= best_conj_trp;
+ }
+ my_pthread_setspecific_ptr(THR_MALLOC, old_root);
- free_root(&alloc,MYF(0));
- my_pthread_setspecific_ptr(THR_MALLOC,old_root);
- if (quick->init())
+ /* If we got a read plan, create a quick select from it. */
+ if (best_trp)
+ {
+ records= best_trp->records;
+ if (!(quick= best_trp->make_quick(&param, true)) || quick->init())
{
delete quick;
- quick= quick_imerge= NULL;
- DBUG_PRINT("error",
- ("Failed to allocate index merge structures,"
- "falling back to full scan."));
+ quick= NULL;
}
- goto end;
- }
+ }
}
}
-end_free:
+ my_pthread_setspecific_ptr(THR_MALLOC, old_root);
free_root(&alloc,MYF(0)); // Return memory & allocator
- my_pthread_setspecific_ptr(THR_MALLOC,old_root);
-end:
thd->no_errors=0;
}
- DBUG_EXECUTE("info",
- {
- if (quick_imerge)
- print_quick_sel_imerge(quick_imerge, &needed_reg);
- else
- print_quick_sel_range((QUICK_RANGE_SELECT*)quick, &needed_reg);
- }
- );
+ DBUG_EXECUTE("info", print_quick(quick, &needed_reg););
/*
Assume that if the user is using 'limit' we will only need to scan
@@ -1181,26 +1729,71 @@ end:
}
-/*
- Calculate index merge cost and save parameters for its construction.
+/*
+ Get cost of 'sweep' full records retrieval.
+ SYNOPSIS
+ get_sweep_read_cost()
+ param Parameter from test_quick_select
+ records # of records to be retrieved
+ RETURN
+ cost of sweep
+*/
- SYNOPSIS
- get_index_merge_params()
- param in parameter with structure.
- needed_reg in/out needed_reg from this SQL_SELECT
- imerge in index_merge description structure
- read_time in/out in: cost of an existing way to read a table
- out: cost of index merge
- imerge_rows out pessimistic estimate of # of rows to be retrieved
+double get_sweep_read_cost(const PARAM *param, ha_rows records)
+{
+ double result;
+ if (param->table->file->primary_key_is_clustered())
+ {
+ result= param->table->file->read_time(param->table->primary_key,
+ records, records);
+ }
+ else
+ {
+ double n_blocks=
+ ceil((double)(longlong)param->table->file->data_file_length / IO_SIZE);
+ double busy_blocks=
+ n_blocks * (1.0 - pow(1.0 - 1.0/n_blocks, rows2double(records)));
+ if (busy_blocks < 1.0)
+ busy_blocks= 1.0;
+ DBUG_PRINT("info",("sweep: nblocks=%g, busy_blocks=%g", n_blocks,
+ busy_blocks));
+ /*
+ Disabled: Bail out if # of blocks to read is bigger than # of blocks in
+ table data file.
+ if (max_cost != DBL_MAX && (busy_blocks+index_reads_cost) >= n_blocks)
+ return 1;
+ */
+ JOIN *join= param->thd->lex->select_lex.join;
+ if (!join || join->tables == 1)
+ {
+ /* No join, assume reading is done in one 'sweep' */
+ result= busy_blocks*(DISK_SEEK_BASE_COST +
+ DISK_SEEK_PROP_COST*n_blocks/busy_blocks);
+ }
+ else
+ {
+ /*
+ Possibly this is a join with source table being non-last table, so
+ assume that disk seeks are random here.
+ */
+ result= busy_blocks;
+ }
+ }
+ DBUG_PRINT("info",("returning cost=%g", result));
+ return result;
+}
- RETURN
- 0 Cost of this index_merge is less than passed *read_time,
- *imerge_rows and *read_time contain new index_merge parameters.
- 1 Cost of this index_merge is more than *read_time,
- *imerge_rows and *read_time are not modified.
- -1 error
+/*
+ Get best plan for a SEL_IMERGE disjunctive expression.
+ SYNOPSIS
+ get_best_disjunct_quick()
+ param Parameter from check_quick_select function
+ imerge Expression to use
+ read_time Don't create scans with cost > read_time
+
NOTES
+ index_merge cost is calculated as follows:
index_merge_cost =
cost(index_reads) + (see #1)
cost(rowid_to_row_scan) + (see #2)
@@ -1247,167 +1840,255 @@ end:
(DISK_SEEK_BASE_COST + DISK_SEEK_PROP_COST*n_blocks/E(n_busy_blocks)).
3. Cost of Unique use is calculated in Unique::get_use_cost function.
-*/
-
-static int get_index_merge_params(PARAM *param, key_map& needed_reg,
- SEL_IMERGE *imerge, double *read_time,
- ha_rows* imerge_rows)
-{
- double imerge_cost= 0.0; /* cost of this index_merge */
- bool imerge_too_expensive= false;
- double tree_read_time;
- ha_rows tree_records;
- bool pk_is_clustered= param->table->file->primary_key_is_clustered();
- bool have_cpk_scan= false;
- ha_rows records_for_unique= 0;
- ha_rows cpk_records= 0;
- DBUG_ENTER("get_index_merge_params");
-
- /* allocate structs to save construction info */
- imerge->best_keys=
- (SEL_ARG***)alloc_root(param->mem_root,
- (imerge->trees_next - imerge->trees)*
- sizeof(void*));
- /*
- PHASE 1: get the best keys to use for this index_merge
- */
+ ROR-union cost is calculated in the same way index_merge, but instead of
+ Unique a priority queue is used.
+
+ RETURN
+ Created read plan
+ NULL - Out of memory or no read scan could be built.
+*/
+static
+TABLE_READ_PLAN *get_best_disjunct_quick(PARAM *param, SEL_IMERGE *imerge,
+ double read_time)
+{
+ SEL_TREE **ptree;
+ TRP_INDEX_MERGE *imerge_trp= NULL;
+ uint n_child_scans= imerge->trees_next - imerge->trees;
+ TRP_RANGE **range_scans;
+ TRP_RANGE **cur_child;
+ TRP_RANGE **cpk_scan= NULL;
+ bool imerge_too_expensive= false;
+ double imerge_cost= 0.0;
+ ha_rows cpk_scan_records= 0;
+ ha_rows non_cpk_scan_records= 0;
+ bool pk_is_clustered= param->table->file->primary_key_is_clustered();
+ bool all_scans_ror_able= true;
+ bool all_scans_rors= true;
+ uint unique_calc_buff_size;
+ TABLE_READ_PLAN **roru_read_plans;
+ TABLE_READ_PLAN **cur_roru_plan;
+ double roru_index_costs;
+ double blocks_in_index_read;
+ ha_rows roru_total_records;
+ double roru_intersect_part= 1.0;
+ double sweep_cost;
+ DBUG_ENTER("get_best_disjunct_quick");
+ DBUG_PRINT("info", ("Full table scan cost =%g", read_time));
+
+ if (!(range_scans= (TRP_RANGE**)alloc_root(param->mem_root,
+ sizeof(TRP_RANGE*)*
+ n_child_scans)))
+ DBUG_RETURN(NULL);
/*
- It may be possible to use different keys for index_merge scans, e.g. for
- query like
- ...WHERE (key1 < c2 AND key2 < c2) OR (key3 < c3 AND key4 < c4)
- we have to make choice between key1 and key2 for one scan and between
- key3, key4 for another.
- We assume we'll get the best if we choose the best key read inside each
- of the conjuncts.
+ Collect best 'range' scan for each of disjuncts, and, while doing so,
+ analyze possibility of ROR scans. Also calculate some values needed by
+ other parts of the code.
*/
- for (SEL_TREE **ptree= imerge->trees;
+ for (ptree= imerge->trees, cur_child= range_scans;
ptree != imerge->trees_next;
- ptree++)
+ ptree++, cur_child++)
{
- SEL_ARG **tree_best_key;
- tree_read_time= *read_time;
-
- if (get_quick_select_params(*ptree, param, needed_reg, true,
- &tree_read_time, &tree_records,
- &tree_best_key))
+ DBUG_EXECUTE("info", print_sel_tree(param, *ptree, &(*ptree)->keys_map,
+ "tree in SEL_IMERGE"););
+ if (!(*cur_child= get_key_scans_params(param, *ptree, true, read_time)))
{
/*
One of index scans in this index_merge is more expensive than entire
- table read for another available option. The entire index_merge will
- be more expensive then, too. We continue here only to update
- SQL_SELECT members.
+ table read for another available option. The entire index_merge (and
+ any possible ROR-union) will be more expensive then, too. We continue
+ here only to update SQL_SELECT members.
*/
imerge_too_expensive= true;
}
-
if (imerge_too_expensive)
continue;
- uint keynr= param->real_keynr[(tree_best_key-(*ptree)->keys)];
- imerge->best_keys[ptree - imerge->trees]= tree_best_key;
- imerge_cost += tree_read_time;
-
- if (pk_is_clustered && keynr == param->table->primary_key)
+ imerge_cost += (*cur_child)->read_cost;
+ all_scans_ror_able &= ((*ptree)->n_ror_scans > 0);
+ all_scans_rors &= (*cur_child)->is_ror;
+ if (pk_is_clustered &&
+ param->real_keynr[(*cur_child)->key_idx] == param->table->primary_key)
{
- have_cpk_scan= true;
- cpk_records= tree_records;
+ cpk_scan= cur_child;
+ cpk_scan_records= (*cur_child)->records;
}
else
- records_for_unique += tree_records;
- }
- DBUG_PRINT("info",("index_merge cost of index reads: %g", imerge_cost));
-
- if (imerge_too_expensive)
- DBUG_RETURN(1);
-
- if ((imerge_cost > *read_time) ||
- ((records_for_unique + cpk_records) >= param->table->file->records) &&
- *read_time != DBL_MAX)
- {
- /* Bail out if it is obvious that index_merge would be more expensive */
- DBUG_RETURN(1);
+ non_cpk_scan_records += (*cur_child)->records;
}
-
- if (have_cpk_scan)
+
+ DBUG_PRINT("info", ("index_merge scans cost=%g", imerge_cost));
+ if (imerge_too_expensive || (imerge_cost > read_time) ||
+ (non_cpk_scan_records+cpk_scan_records >= param->table->file->records) &&
+ read_time != DBL_MAX)
{
/*
- Add one ROWID comparison for each row retrieved on non-CPK scan.
- (it is done in QUICK_RANGE_SELECT::row_in_ranges)
+ Bail out if it is obvious that both index_merge and ROR-union will be
+ more expensive
*/
- imerge_cost += records_for_unique / TIME_FOR_COMPARE_ROWID;
+ DBUG_PRINT("info", ("Sum of index_merge scans is more expensive than "
+ "full table scan, bailing out"));
+ DBUG_RETURN(NULL);
}
-
- /* PHASE 2: Calculate cost(rowid_to_row_scan) */
- if (pk_is_clustered)
+ if (all_scans_rors)
{
- imerge_cost +=
- param->table->file->read_time(param->table->primary_key,
- records_for_unique,
- records_for_unique)
- + rows2double(records_for_unique) / TIME_FOR_COMPARE;
+ roru_read_plans= (TABLE_READ_PLAN**)range_scans;
+ goto skip_to_ror_scan;
}
- else
- {
- double n_blocks=
- ceil((double) ((longlong)param->table->file->data_file_length / IO_SIZE));
- double busy_blocks=
- n_blocks * (1.0 - pow(1.0 - 1.0/n_blocks, (double) records_for_unique));
-
- JOIN *join= param->thd->lex->select_lex.join;
- if (!join || join->tables == 1)
- imerge_cost += busy_blocks*(DISK_SEEK_BASE_COST +
- DISK_SEEK_PROP_COST*n_blocks/busy_blocks);
- else
- {
- /*
- It can be a join with source table being non-last table, so assume
- that disk seeks are random here.
- (TODO it is possible to determine if this is a last table in 'index
- checked for each record' join)
- */
- imerge_cost += busy_blocks;
- }
- }
- DBUG_PRINT("info",("index_merge cost with rowid-to-row scan: %g", imerge_cost));
-
- /* PHASE 3: Add Unique operations cost */
- register uint unique_calc_buff_size=
- Unique::get_cost_calc_buff_size(records_for_unique,
+ blocks_in_index_read= imerge_cost;
+ if (cpk_scan)
+ {
+ /*
+ Add one ROWID comparison for each row retrieved on non-CPK scan. (it
+ is done in QUICK_RANGE_SELECT::row_in_ranges)
+ */
+ imerge_cost += non_cpk_scan_records / TIME_FOR_COMPARE_ROWID;
+ }
+
+ /* Calculate cost(rowid_to_row_scan) */
+ imerge_cost += get_sweep_read_cost(param, non_cpk_scan_records);
+ DBUG_PRINT("info",("index_merge cost with rowid-to-row scan: %g",
+ imerge_cost));
+ if (imerge_cost > read_time)
+ goto build_ror_index_merge;
+
+ /* Add Unique operations cost */
+ unique_calc_buff_size=
+ Unique::get_cost_calc_buff_size(non_cpk_scan_records,
param->table->file->ref_length,
param->thd->variables.sortbuff_size);
if (param->imerge_cost_buff_size < unique_calc_buff_size)
{
if (!(param->imerge_cost_buff= (uint*)alloc_root(param->mem_root,
unique_calc_buff_size)))
- DBUG_RETURN(1);
+ DBUG_RETURN(NULL);
param->imerge_cost_buff_size= unique_calc_buff_size;
}
imerge_cost +=
- Unique::get_use_cost(param->imerge_cost_buff, records_for_unique,
+ Unique::get_use_cost(param->imerge_cost_buff, non_cpk_scan_records,
param->table->file->ref_length,
param->thd->variables.sortbuff_size);
+ DBUG_PRINT("info",("index_merge total cost: %g (wanted: less then %g)",
+ imerge_cost, read_time));
+ if (imerge_cost < read_time)
+ {
+ if ((imerge_trp= new (param->mem_root)TRP_INDEX_MERGE))
+ {
+ imerge_trp->read_cost= imerge_cost;
+ imerge_trp->records= non_cpk_scan_records + cpk_scan_records;
+ imerge_trp->records= min(imerge_trp->records,
+ param->table->file->records);
+ imerge_trp->range_scans= range_scans;
+ imerge_trp->range_scans_end= range_scans + n_child_scans;
+ read_time= imerge_cost;
+ }
+ }
- DBUG_PRINT("info",("index_merge total cost: %g", imerge_cost));
- if (imerge_cost < *read_time)
+build_ror_index_merge:
+ if (!all_scans_ror_able || param->thd->lex->sql_command == SQLCOM_DELETE)
+ DBUG_RETURN(imerge_trp);
+
+ /* Ok, it is possible to build a ROR-union, try it. */
+ bool dummy;
+ if (!(roru_read_plans=
+ (TABLE_READ_PLAN**)alloc_root(param->mem_root,
+ sizeof(TABLE_READ_PLAN*)*
+ n_child_scans)))
+ DBUG_RETURN(imerge_trp);
+skip_to_ror_scan:
+ roru_index_costs= 0.0;
+ roru_total_records= 0;
+ cur_roru_plan= roru_read_plans;
+
+ /* Find 'best' ROR scan for each of trees in disjunction */
+ for (ptree= imerge->trees, cur_child= range_scans;
+ ptree != imerge->trees_next;
+ ptree++, cur_child++, cur_roru_plan++)
{
- *read_time= imerge_cost;
- records_for_unique += cpk_records;
- *imerge_rows= min(records_for_unique, param->table->file->records);
- DBUG_RETURN(0);
+ /*
+ Assume the best ROR scan is the one that has cheapest full-row-retrieval
+ scan cost.
+ Also accumulate index_only scan costs as we'll need them to calculate
+ overall index_intersection cost.
+ */
+ double cost;
+ if ((*cur_child)->is_ror)
+ {
+ /* Ok, we have index_only cost, now get full rows scan cost */
+ cost= param->table->file->
+ read_time(param->real_keynr[(*cur_child)->key_idx], 1,
+ (*cur_child)->records) +
+ rows2double((*cur_child)->records) / TIME_FOR_COMPARE;
+ }
+ else
+ cost= read_time;
+
+ TABLE_READ_PLAN *prev_plan= *cur_child;
+ if (!(*cur_roru_plan= get_best_ror_intersect(param, *ptree, false, cost,
+ &dummy)))
+ {
+ if (prev_plan->is_ror)
+ *cur_roru_plan= prev_plan;
+ else
+ DBUG_RETURN(imerge_trp);
+ roru_index_costs += (*cur_roru_plan)->read_cost;
+ }
+ else
+ roru_index_costs +=
+ ((TRP_ROR_INTERSECT*)(*cur_roru_plan))->index_scan_costs;
+ roru_total_records += (*cur_roru_plan)->records;
+ roru_intersect_part *= (*cur_roru_plan)->records /
+ param->table->file->records;
}
- DBUG_RETURN(1);
+
+ /*
+ rows to retrieve=
+ SUM(rows_in_scan_i) - table_rows * PROD(rows_in_scan_i / table_rows).
+ This is valid because index_merge construction guarantees that conditions
+ in disjunction do not share key parts.
+ */
+ roru_total_records -= (ha_rows)(roru_intersect_part*
+ param->table->file->records);
+ /* ok, got a ROR read plan for each of the disjuncts
+ Calculate cost:
+ cost(index_union_scan(scan_1, ... scan_n)) =
+ SUM_i(cost_of_index_only_scan(scan_i)) +
+ queue_use_cost(rowid_len, n) +
+ cost_of_row_retrieval
+ See get_merge_buffers_cost function for queue_use_cost formula derivation.
+ */
+
+ double roru_total_cost;
+ roru_total_cost= roru_index_costs +
+ rows2double(roru_total_records)*log(n_child_scans) /
+ (TIME_FOR_COMPARE_ROWID * M_LN2) +
+ get_sweep_read_cost(param, roru_total_records);
+
+ DBUG_PRINT("info", ("ROR-union: cost %g, %d members", roru_total_cost,
+ n_child_scans));
+ TRP_ROR_UNION* roru;
+ if (roru_total_cost < read_time)
+ {
+ if ((roru= new (param->mem_root) TRP_ROR_UNION))
+ {
+ roru->first_ror= roru_read_plans;
+ roru->last_ror= roru_read_plans + n_child_scans;
+ roru->read_cost= roru_total_cost;
+ roru->records= roru_total_records;
+ DBUG_RETURN(roru);
+ }
+ }
+ DBUG_RETURN(imerge_trp);
}
/*
Calculate cost of 'index only' scan for given index and number of records.
- (We can resolve this by only reading through this key.)
SYNOPSIS
- get_whole_index_read_time()
+ get_index_only_read_time()
param parameters structure
records #of records to read
keynr key to read
@@ -1417,7 +2098,7 @@ static int get_index_merge_params(PARAM *param, key_map& needed_reg,
key blocks are half full (normally things are much better).
*/
-inline double get_index_only_read_time(PARAM* param, ha_rows records,
+inline double get_index_only_read_time(const PARAM* param, ha_rows records,
int keynr)
{
double read_time;
@@ -1429,39 +2110,838 @@ inline double get_index_only_read_time(PARAM* param, ha_rows records,
return read_time;
}
+typedef struct st_ror_scan_info
+{
+ uint idx; /* # of used key in param->keys */
+ uint keynr; /* # of used key in table */
+ ha_rows records; /* estimate of # records this scan will return */
+
+ /* Set of intervals over key fields that will be used for row retrieval. */
+ SEL_ARG *sel_arg;
+
+ /* Fields used in the query and covered by this ROR scan. */
+ MY_BITMAP covered_fields;
+ uint used_fields_covered; /* # of set bits in covered_fields */
+ int key_rec_length; /* length of key record (including rowid) */
+
+ /*
+ Cost of reading all index records with values in sel_arg intervals set
+ (assuming there is no need to access full table records)
+ */
+ double index_read_cost;
+ uint first_uncovered_field; /* first unused bit in covered_fields */
+ uint key_components; /* # of parts in the key */
+} ROR_SCAN_INFO;
+
+
+/*
+ Create ROR_SCAN_INFO* structure with a single ROR scan on index idx using
+ sel_arg set of intervals.
+
+ SYNOPSIS
+ make_ror_scan()
+ param Parameter from test_quick_select function
+ idx Index of key in param->keys
+ sel_arg Set of intervals for a given key
+ RETURN
+ NULL - out of memory
+ ROR scan structure containing a scan for {idx, sel_arg}
+*/
+
+static
+ROR_SCAN_INFO *make_ror_scan(const PARAM *param, int idx, SEL_ARG *sel_arg)
+{
+ ROR_SCAN_INFO *ror_scan;
+ uchar *bitmap_buf;
+ uint keynr;
+ DBUG_ENTER("make_ror_scan");
+ if (!(ror_scan= (ROR_SCAN_INFO*)alloc_root(param->mem_root,
+ sizeof(ROR_SCAN_INFO))))
+ DBUG_RETURN(NULL);
+
+ ror_scan->idx= idx;
+ ror_scan->keynr= keynr= param->real_keynr[idx];
+ ror_scan->key_rec_length= param->table->key_info[keynr].key_length +
+ param->table->file->ref_length;
+ ror_scan->sel_arg= sel_arg;
+ ror_scan->records= param->table->quick_rows[keynr];
+
+ if (!(bitmap_buf= (uchar*)alloc_root(param->mem_root,
+ param->fields_bitmap_size)))
+ DBUG_RETURN(NULL);
+
+ if (bitmap_init(&ror_scan->covered_fields, bitmap_buf,
+ param->fields_bitmap_size*8, false))
+ DBUG_RETURN(NULL);
+ bitmap_clear_all(&ror_scan->covered_fields);
+
+ KEY_PART_INFO *key_part= param->table->key_info[keynr].key_part;
+ KEY_PART_INFO *key_part_end= key_part +
+ param->table->key_info[keynr].key_parts;
+ uint n_used_covered= 0;
+ for (;key_part != key_part_end; ++key_part)
+ {
+ if (bitmap_is_set(&param->needed_fields, key_part->fieldnr))
+ {
+ n_used_covered++;
+ bitmap_set_bit(&ror_scan->covered_fields, key_part->fieldnr);
+ }
+ }
+ ror_scan->index_read_cost=
+ get_index_only_read_time(param, param->table->quick_rows[ror_scan->keynr],
+ ror_scan->keynr);
+ DBUG_RETURN(ror_scan);
+}
+
+
+/*
+ Compare two ROR_SCAN_INFO** by E(#records_matched) * key_record_length.
+ SYNOPSIS
+ cmp_ror_scan_info()
+ a ptr to first compared value
+ b ptr to second compared value
+
+ RETURN
+ -1 a < b
+ 0 a = b
+ 1 a > b
+*/
+static int cmp_ror_scan_info(ROR_SCAN_INFO** a, ROR_SCAN_INFO** b)
+{
+ double val1= rows2double((*a)->records) * (*a)->key_rec_length;
+ double val2= rows2double((*b)->records) * (*b)->key_rec_length;
+ return (val1 < val2)? -1: (val1 == val2)? 0 : 1;
+}
+
+/*
+ Compare two ROR_SCAN_INFO** by
+ (#covered fields in F desc,
+ #components asc,
+ number of first not covered component asc)
+
+ SYNOPSIS
+ cmp_ror_scan_info_covering()
+ a ptr to first compared value
+ b ptr to second compared value
+
+ RETURN
+ -1 a < b
+ 0 a = b
+ 1 a > b
+*/
+static int cmp_ror_scan_info_covering(ROR_SCAN_INFO** a, ROR_SCAN_INFO** b)
+{
+ if ((*a)->used_fields_covered > (*b)->used_fields_covered)
+ return -1;
+ if ((*a)->used_fields_covered < (*b)->used_fields_covered)
+ return 1;
+ if ((*a)->key_components < (*b)->key_components)
+ return -1;
+ if ((*a)->key_components > (*b)->key_components)
+ return 1;
+ if ((*a)->first_uncovered_field < (*b)->first_uncovered_field)
+ return -1;
+ if ((*a)->first_uncovered_field > (*b)->first_uncovered_field)
+ return 1;
+ return 0;
+}
+
+/* Auxiliary structure for incremental ROR-intersection creation */
+typedef struct
+{
+ const PARAM *param;
+ MY_BITMAP covered_fields; /* union of fields covered by all scans */
+
+ /* true if covered_fields is a superset of needed_fields */
+ bool is_covering;
+
+ double index_scan_costs; /* SUM(cost of 'index-only' scans) */
+ double total_cost;
+ /*
+ Fraction of table records that satisfies conditions of all scans.
+ This is the number of full records that will be retrieved if a
+ non-index_only index intersection will be employed.
+ */
+ double records_fract;
+ ha_rows index_records; /* sum(#records to look in indexes) */
+} ROR_INTERSECT_INFO;
+
+
+/*
+ Re-initialize an allocated intersect info to contain zero scans.
+ SYNOPSIS
+ info Intersection info structure to re-initialize.
+*/
+
+static void ror_intersect_reinit(ROR_INTERSECT_INFO *info)
+{
+ info->is_covering= false;
+ info->index_scan_costs= 0.0f;
+ info->records_fract= 1.0f;
+ bitmap_clear_all(&info->covered_fields);
+}
+
+/*
+ Allocate a ROR_INTERSECT_INFO and initialize it to contain zero scans.
+
+ SYNOPSIS
+ ror_intersect_init()
+ param Parameter from test_quick_select
+ is_index_only If true, set ROR_INTERSECT_INFO to be covering
+
+ RETURN
+ allocated structure
+ NULL on error
+*/
+
+static
+ROR_INTERSECT_INFO* ror_intersect_init(const PARAM *param, bool is_index_only)
+{
+ ROR_INTERSECT_INFO *info;
+ uchar* buf;
+ if (!(info= (ROR_INTERSECT_INFO*)alloc_root(param->mem_root,
+ sizeof(ROR_INTERSECT_INFO))))
+ return NULL;
+ info->param= param;
+ if (!(buf= (uchar*)alloc_root(param->mem_root, param->fields_bitmap_size)))
+ return NULL;
+ if (bitmap_init(&info->covered_fields, buf, param->fields_bitmap_size*8,
+ false))
+ return NULL;
+ ror_intersect_reinit(info);
+ return info;
+}
+
+
+/*
+ Check if adding a ROR scan to a ROR-intersection reduces its cost of
+ ROR-intersection and if yes, update parameters of ROR-intersection,
+ including its cost.
+
+ SYNOPSIS
+ ror_intersect_add()
+ param Parameter from test_quick_select
+ info ROR-intersection structure to add the scan to.
+ ror_scan ROR scan info to add.
+ is_cpk_scan If true, add the scan as CPK scan (this can be inferred
+ from other parameters and is passed separately only to
+ avoid duplicating the inference code)
+
+ NOTES
+ Adding a ROR scan to ROR-intersect "makes sense" iff the cost of ROR-
+ intersection decreases. The cost of ROR-intersection is caclulated as
+ follows:
+
+ cost= SUM_i(key_scan_cost_i) + cost_of_full_rows_retrieval
+
+ if (union of indexes used covers all needed fields)
+ cost_of_full_rows_retrieval= 0;
+ else
+ {
+ cost_of_full_rows_retrieval=
+ cost_of_sweep_read(E(rows_to_retrieve), rows_in_table);
+ }
+
+ E(rows_to_retrieve) is caclulated as follows:
+ Suppose we have a condition on several keys
+ cond=k_11=c_11 AND k_12=c_12 AND ... // parts of first key
+ k_21=c_21 AND k_22=c_22 AND ... // parts of second key
+ ...
+ k_n1=c_n1 AND k_n3=c_n3 AND ... (1)
+
+ where k_ij may be the same as any k_pq (i.e. keys may have common parts).
+
+ A full row is retrieved iff entire cond holds.
+
+ The recursive procedure for finding P(cond) is as follows:
+
+ First step:
+ Pick 1st part of 1st key and break conjunction (1) into two parts:
+ cond= (k_11=c_11 AND R)
+
+ Here R may still contain condition(s) equivalent to k_11=c_11.
+ Nevertheless, the following holds:
+
+ P(k_11=c_11 AND R) = P(k_11=c_11) * P(R|k_11=c_11).
+
+ Mark k_11 as fixed field (and satisfied condition) F, save P(F),
+ save R to be cond and proceed to recursion step.
+
+ Recursion step:
+ We have a set of fixed fields/satisfied conditions) F, probability P(F),
+ and remaining conjunction R
+ Pick next key part on current key and its condition "k_ij=c_ij".
+ We will add "k_ij=c_ij" into F and update P(F).
+ Lets denote k_ij as t, R = t AND R1, where R1 may still contain t. Then
+
+ P((t AND R1)|F) = P(t|F) * P(R1|t|F) = P(t|F) * P(R1|(t AND F)) (2)
+
+ (where '|' mean conditional probability, not "or")
+
+ Consider the first multiplier in (2). One of the following holds:
+ a) F contains condition on field used in t (i.e. t AND F = F).
+ Then P(t|F) = 1
+
+ b) F doesn't contain condition on field used in t. Then F and t are
+ considered independent.
+
+ P(t|F) = P(t|(fields_before_t_in_key AND other_fields)) =
+ = P(t|fields_before_t_in_key).
+
+ P(t|fields_before_t_in_key) = #records(fields_before_t_in_key) /
+ #records(fields_before_t_in_key, t)
+
+ The second multiplier is calculated by applying this step recursively.
+
+ IMPLEMENTATION
+ This function calculates the result of application of the "recursion step"
+ described above for all fixed key members of a single key, accumulating set
+ of covered fields, selectivity, etc.
+
+ The calculation is conducted as follows:
+ Lets denote #records(keypart1, ... keypartK) as n_k. We need to calculate
+
+ n_{k1} n_{k_2}
+ --------- * --------- * .... (3)
+ n_{k1-1} n_{k2_1}
+
+ where k1,k2,... are key parts which fields were not yet marked as fixed
+ ( this is result of application of option b) of the recursion step for
+ parts of a single key).
+ Since it is reasonable to expect that most of the fields are not marked
+ as fixed, we calcualate (3) as
+
+ n_{i1} n_{i_2}
+ (3) = n_{max_key_part} / ( --------- * --------- * .... )
+ n_{i1-1} n_{i2_1}
+
+ where i1,i2, .. are key parts that were already marked as fixed.
+
+ In order to minimize number of expensive records_in_range calls we group
+ and reduce adjacent fractions.
+
+ RETURN
+ true ROR scan added to ROR-intersection, cost updated.
+ false It doesn't make sense to add this ROR scan to this ROR-intersection.
+*/
+
+bool ror_intersect_add(const PARAM *param, ROR_INTERSECT_INFO *info,
+ ROR_SCAN_INFO* ror_scan, bool is_cpk_scan=false)
+{
+ int i;
+ SEL_ARG *sel_arg;
+ KEY_PART_INFO *key_part=
+ info->param->table->key_info[ror_scan->keynr].key_part;
+ double selectivity_mult= 1.0;
+ char key_val[MAX_KEY_LENGTH+MAX_FIELD_WIDTH]; /* key values tuple */
+
+ DBUG_ENTER("ror_intersect_add");
+ DBUG_PRINT("info", ("Current selectivity= %g", info->records_fract));
+ DBUG_PRINT("info", ("Adding scan on %s",
+ info->param->table->key_info[ror_scan->keynr].name));
+ SEL_ARG *tuple_arg= NULL;
+ char *key_ptr= key_val;
+ bool cur_covered, prev_covered=
+ bitmap_is_set(&info->covered_fields, key_part->fieldnr);
+
+ ha_rows prev_records= param->table->file->records;
+
+ for(i= 0, sel_arg= ror_scan->sel_arg; sel_arg;
+ i++, sel_arg= sel_arg->next_key_part)
+ {
+ cur_covered= bitmap_is_set(&info->covered_fields, (key_part + i)->fieldnr);
+ if (cur_covered != prev_covered)
+ {
+ /* create (part1val, ..., part{n-1}val) tuple. */
+ {
+ if (!tuple_arg)
+ {
+ tuple_arg= ror_scan->sel_arg;
+ tuple_arg->store_min(key_part->length, &key_ptr, 0);
+ }
+ while (tuple_arg->next_key_part != sel_arg)
+ {
+ tuple_arg= tuple_arg->next_key_part;
+ tuple_arg->store_min(key_part->length, &key_ptr, 0);
+ }
+ }
+ ha_rows records;
+ records= param->table->file->
+ records_in_range(ror_scan->keynr,
+ (byte*)key_val, key_ptr - key_val,
+ HA_READ_KEY_EXACT,
+ (byte*)key_val, key_ptr - key_val,
+ HA_READ_AFTER_KEY);
+ if (cur_covered)
+ {
+ /* uncovered -> covered */
+ double tmp= rows2double(records)/rows2double(prev_records);
+ printf(" Selectivity multiplier: %g\n", tmp);
+ DBUG_PRINT("info", ("Selectivity multiplier: %g", tmp));
+ selectivity_mult *= tmp;
+ prev_records= HA_POS_ERROR;
+ }
+ else
+ {
+ /* covered -> uncovered */
+ prev_records= records;
+ }
+ }
+ else printf("no change, skipping\n");
+ prev_covered= cur_covered;
+ }
+ if (!prev_covered)
+ {
+ double tmp= rows2double(param->table->quick_rows[ror_scan->keynr]) /
+ rows2double(prev_records);
+ DBUG_PRINT("info", ("Selectivity multiplier: %g", tmp));
+ selectivity_mult *= tmp;
+ }
+
+ if (selectivity_mult == 1.0)
+ {
+ /* Don't add this scan if it doesn't improve selectivity. */
+ DBUG_PRINT("info", ("The scan doesn't improve selectivity."));
+ DBUG_RETURN(false);
+ }
+
+ info->records_fract *= selectivity_mult;
+ ha_rows cur_scan_records= info->param->table->quick_rows[ror_scan->keynr];
+ if (is_cpk_scan)
+ {
+ info->index_scan_costs += rows2double(cur_scan_records)*
+ TIME_FOR_COMPARE_ROWID;
+ }
+ else
+ {
+ info->index_records += cur_scan_records;
+ info->index_scan_costs += ror_scan->index_read_cost;
+ bitmap_union(&info->covered_fields, &ror_scan->covered_fields);
+ }
+
+ if (!info->is_covering && bitmap_is_subset(&info->param->needed_fields,
+ &info->covered_fields))
+ {
+ DBUG_PRINT("info", ("ROR-intersect is covering now"));
+ info->is_covering= true;
+ }
+
+ printf(" records: %g\n", rows2double(param->table->file->records) *
+ info->records_fract);
+ info->total_cost= info->index_scan_costs;
+ if (!info->is_covering)
+ {
+ ha_rows table_recs= info->param->table->file->records;
+ info->total_cost +=
+ get_sweep_read_cost(info->param,
+ (ha_rows)(info->records_fract*table_recs));
+ }
+ DBUG_PRINT("info", ("New selectivity= %g", info->records_fract));
+ DBUG_PRINT("info", ("New cost= %g, %scovering", info->total_cost,
+ info->is_covering?"" : "non-"));
+ DBUG_RETURN(true);
+}
+
+
+/*
+ Get best ROR-intersection plan using non-covering ROR-intersection search
+ algorithm. The returned plan may be covering.
+
+ SYNOPSIS
+ get_best_ror_intersect()
+ param Parameter from test_quick_select function.
+ tree Transformed restriction condition to be used to look
+ for ROR scans.
+ force_index_only If true, don't calculate costs of full rows retrieval.
+ read_time Do not return read plans with cost > read_time.
+ are_all_covering [out] set to true if union of all scans covers all
+ fields needed by the query (and it is possible to build
+ a covering ROR-intersection)
+
+ NOTES
+ get_key_scans_params must be called before for the same SEL_TREE before
+ this function can be called.
+
+ The approximate best non-covering plan search algorithm is as follows:
+
+ find_min_ror_intersection_scan()
+ {
+ R= select all ROR scans;
+ order R by (E(#records_matched) * key_record_length).
+
+ S= first(R); -- set of scans that will be used for ROR-intersection
+ R= R-first(S);
+ min_cost= cost(S);
+ min_scan= make_scan(S);
+ while (R is not empty)
+ {
+ if (!selectivity(S + first(R) < selectivity(S)))
+ continue;
+
+ S= S + first(R);
+ R= R - first(R);
+ if (cost(S) < min_cost)
+ {
+ min_cost= cost(S);
+ min_scan= make_scan(S);
+ }
+ }
+ return min_scan;
+ }
+
+ See ror_intersect_add function for ROR intersection costs.
+
+ Special handling for Clustered PK scans
+ Clustered PK contains all table fields, so using it as a regular scan in
+ index intersection doesn't make sense: a range scan on CPK will be less
+ expensive in this case.
+ Clustered PK scan has special handling in ROR-intersection: it is not used
+ to retrieve rows, instead its condition is used to filter row references
+ we get from scans on other keys.
+
+ RETURN
+ ROR-intersection table read plan
+ NULL if out of memory or no suitable plan found.
+*/
+
+static
+TRP_ROR_INTERSECT *get_best_ror_intersect(const PARAM *param, SEL_TREE *tree,
+ bool force_index_only,
+ double read_time,
+ bool *are_all_covering)
+{
+ uint idx;
+ double min_cost= read_time;
+ DBUG_ENTER("get_best_ror_intersect");
+
+ if (tree->n_ror_scans < 2)
+ DBUG_RETURN(NULL);
+
+ /*
+ Collect ROR-able SEL_ARGs and create ROR_SCAN_INFO for each of them.
+ Also find and save clustered PK scan if there is one.
+ */
+ ROR_SCAN_INFO **cur_ror_scan;
+ ROR_SCAN_INFO *cpk_scan= NULL;
+ bool cpk_scan_used= false;
+ if (!(tree->ror_scans= (ROR_SCAN_INFO**)alloc_root(param->mem_root,
+ sizeof(ROR_SCAN_INFO*)*
+ param->keys)))
+ return NULL;
+ uint cpk_no= (param->table->file->primary_key_is_clustered())?
+ param->table->primary_key : MAX_KEY;
+
+ for (idx= 0, cur_ror_scan= tree->ror_scans; idx < param->keys; idx++)
+ {
+ ROR_SCAN_INFO *scan;
+ if (!tree->ror_scans_map.is_set(idx))
+ continue;
+ if (!(scan= make_ror_scan(param, idx, tree->keys[idx])))
+ return NULL;
+ if (param->real_keynr[idx] == cpk_no)
+ {
+ cpk_scan= scan;
+ tree->n_ror_scans--;
+ }
+ else
+ *(cur_ror_scan++)= scan;
+ }
+
+ tree->ror_scans_end= cur_ror_scan;
+ DBUG_EXECUTE("info",print_ror_scans_arr(param->table, "original",
+ tree->ror_scans,
+ tree->ror_scans_end););
+ /*
+ Ok, [ror_scans, ror_scans_end) is array of ptrs to initialized
+ ROR_SCAN_INFOs.
+ Now, get a minimal key scan using an approximate algorithm.
+ */
+ qsort(tree->ror_scans, tree->n_ror_scans, sizeof(ROR_SCAN_INFO*),
+ (qsort_cmp)cmp_ror_scan_info);
+ DBUG_EXECUTE("info",print_ror_scans_arr(param->table, "ordered",
+ tree->ror_scans,
+ tree->ror_scans_end););
+
+ ROR_SCAN_INFO **intersect_scans; /* ROR scans used in index intersection */
+ ROR_SCAN_INFO **intersect_scans_end;
+ if (!(intersect_scans= (ROR_SCAN_INFO**)alloc_root(param->mem_root,
+ sizeof(ROR_SCAN_INFO*)*
+ tree->n_ror_scans)))
+ return NULL;
+ intersect_scans_end= intersect_scans;
+
+ /* Create and incrementally update ROR intersection. */
+ ROR_INTERSECT_INFO *intersect;
+ if (!(intersect= ror_intersect_init(param, false)))
+ return NULL;
+
+ /* [intersect_scans, intersect_scans_best) will hold the best combination */
+ ROR_SCAN_INFO **intersect_scans_best= NULL;
+ ha_rows best_rows;
+ bool is_best_covering;
+ double best_index_scan_costs;
+ LINT_INIT(best_rows); /* protected by intersect_scans_best */
+ LINT_INIT(is_best_covering);
+ LINT_INIT(best_index_scan_costs);
+
+ cur_ror_scan= tree->ror_scans;
+ /* Start with one scan */
+ intersect_scans_best= intersect_scans;
+ while (cur_ror_scan != tree->ror_scans_end && !intersect->is_covering)
+ {
+ /* S= S + first(R); */
+ if (ror_intersect_add(param, intersect, *cur_ror_scan))
+ *(intersect_scans_end++)= *cur_ror_scan;
+ /* R= R - first(R); */
+ cur_ror_scan++;
+
+ if (intersect->total_cost < min_cost)
+ {
+ /* Local minimum found, save it */
+ min_cost= intersect->total_cost;
+ best_rows= (ha_rows)(intersect->records_fract*
+ rows2double(param->table->file->records));
+ is_best_covering= intersect->is_covering;
+ intersect_scans_best= intersect_scans_end;
+ best_index_scan_costs= intersect->index_scan_costs;
+ }
+ }
+
+ DBUG_EXECUTE("info",print_ror_scans_arr(param->table,
+ "best ROR-intersection",
+ intersect_scans,
+ intersect_scans_best););
+
+ *are_all_covering= intersect->is_covering;
+ uint best_num= intersect_scans_best - intersect_scans;
+ /*
+ Ok, found the best ROR-intersection of non-CPK key scans.
+ Check if we should add a CPK scan.
+
+ If the obtained ROR-intersection is covering, it doesn't make sense
+ to add CPK scan - Clustered PK contains all fields and if we're doing
+ CPK scan doing other CPK scans will only add more overhead.
+ */
+ if (cpk_scan && !intersect->is_covering)
+ {
+ /*
+ Handle the special case: ROR-intersect(PRIMARY, key1) is
+ the best, but cost(range(key1)) > cost(best_non_ror_range_scan)
+ */
+ if (best_num == 0)
+ {
+ ror_intersect_reinit(intersect);
+ if (!ror_intersect_add(param, intersect, cpk_scan))
+ DBUG_RETURN(NULL); /* shouldn't happen actually actually */
+ *(intersect_scans_end++)= *cur_ror_scan;
+ best_num++;
+ }
+
+ if (ror_intersect_add(param, intersect, cpk_scan))
+ cpk_scan_used= true;
+ best_rows= (ha_rows)(intersect->records_fract*
+ rows2double(param->table->file->records));
+ }
+
+ /* Ok, return ROR-intersect plan if we have found one */
+ TRP_ROR_INTERSECT *trp= NULL;
+ if (best_num > 1 || cpk_scan_used)
+ {
+ if (!(trp= new (param->mem_root) TRP_ROR_INTERSECT))
+ DBUG_RETURN(trp);
+ if (!(trp->first_scan=
+ (ROR_SCAN_INFO**)alloc_root(param->mem_root,
+ sizeof(ROR_SCAN_INFO*)*best_num)))
+ DBUG_RETURN(NULL);
+ memcpy(trp->first_scan, intersect_scans, best_num*sizeof(ROR_SCAN_INFO*));
+ trp->last_scan= trp->first_scan + best_num;
+ trp->is_covering= is_best_covering;
+ trp->read_cost= min_cost;
+ trp->records= best_rows? best_rows : 1;
+ trp->index_scan_costs= best_index_scan_costs;
+ trp->cpk_scan= cpk_scan;
+ }
+ DBUG_RETURN(trp);
+}
+
/*
- Calculate quick range select read time, # of records, and best key to use
- without constructing QUICK_RANGE_SELECT object.
+ Get best covering ROR-intersection.
+ SYNOPSIS
+ get_best_covering_ror_intersect()
+ param Parameter from test_quick_select function.
+ tree SEL_TREE with sets of intervals for different keys.
+ read_time Don't return table read plans with cost > read_time.
+
+ RETURN
+ Best covering ROR-intersection plan
+ NULL if no plan found.
+
+ NOTES
+ get_best_ror_intersect must be called for a tree before calling this
+ function for it.
+ This function invalidates tree->ror_scans member values.
+
+ The following approximate algorithm is used:
+ I=set of all covering indexes
+ F=set of all fields to cover
+ S={}
+
+ do {
+ Order I by (#covered fields in F desc,
+ #components asc,
+ number of first not covered component asc);
+ F=F-covered by first(I);
+ S=S+first(I);
+ I=I-first(I);
+ } while F is not empty.
+*/
+
+static
+TRP_ROR_INTERSECT *get_best_covering_ror_intersect(PARAM *param,
+ SEL_TREE *tree,
+ double read_time)
+{
+ ROR_SCAN_INFO **ror_scan_mark;
+ ROR_SCAN_INFO **ror_scans_end= tree->ror_scans_end;
+ DBUG_ENTER("get_best_covering_ror_intersect");
+ uint nbits= param->fields_bitmap_size*8;
+
+ for (ROR_SCAN_INFO **scan= tree->ror_scans; scan != ror_scans_end; ++scan)
+ (*scan)->key_components=
+ param->table->key_info[(*scan)->keynr].key_parts;
+
+ /*
+ Run covering-ROR-search algorithm.
+ Assume set I is [ror_scan .. ror_scans_end)
+ */
+
+ /*I=set of all covering indexes */
+ ror_scan_mark= tree->ror_scans;
+
+ uchar buf[MAX_KEY/8+1];
+ MY_BITMAP covered_fields;
+ if (bitmap_init(&covered_fields, buf, nbits, false))
+ DBUG_RETURN(0);
+ bitmap_clear_all(&covered_fields);
+
+ double total_cost= 0.0f;
+ ha_rows records=0;
+ bool all_covered;
+
+ DBUG_PRINT("info", ("Building covering ROR-intersection"));
+ DBUG_EXECUTE("info", print_ror_scans_arr(param->table,
+ "building covering ROR-I",
+ ror_scan_mark, ror_scans_end););
+ do {
+ /*
+ Update changed sorting info:
+ #covered fields,
+ number of first not covered component
+ Calculate and save these values for each of remaining scans.
+ */
+ for (ROR_SCAN_INFO **scan= ror_scan_mark; scan != ror_scans_end; ++scan)
+ {
+ bitmap_subtract(&(*scan)->covered_fields, &covered_fields);
+ (*scan)->used_fields_covered=
+ bitmap_bits_set(&(*scan)->covered_fields);
+ (*scan)->first_uncovered_field=
+ bitmap_get_first(&(*scan)->covered_fields);
+ }
+
+ qsort(ror_scan_mark, ror_scans_end-ror_scan_mark, sizeof(ROR_SCAN_INFO*),
+ (qsort_cmp)cmp_ror_scan_info_covering);
+
+ DBUG_EXECUTE("info", print_ror_scans_arr(param->table,
+ "remaining scans",
+ ror_scan_mark, ror_scans_end););
+
+ /* I=I-first(I) */
+ total_cost += (*ror_scan_mark)->index_read_cost;
+ records += (*ror_scan_mark)->records;
+ DBUG_PRINT("info", ("Adding scan on %s",
+ param->table->key_info[(*ror_scan_mark)->keynr].name));
+ if (total_cost > read_time)
+ DBUG_RETURN(NULL);
+ /* F=F-covered by first(I) */
+ bitmap_union(&covered_fields, &(*ror_scan_mark)->covered_fields);
+ all_covered= bitmap_is_subset(&param->needed_fields, &covered_fields);
+ } while (!all_covered && (++ror_scan_mark < ror_scans_end));
+
+ if (!all_covered)
+ DBUG_RETURN(NULL); /* should not happen actually */
+
+ /*
+ Ok, [tree->ror_scans .. ror_scan) holds covering index_intersection with
+ cost total_cost.
+ */
+ DBUG_PRINT("info", ("Covering ROR-intersect scans cost: %g", total_cost));
+ DBUG_EXECUTE("info", print_ror_scans_arr(param->table,
+ "creating covering ROR-intersect",
+ tree->ror_scans, ror_scan_mark););
+
+ /* Add priority queue use cost. */
+ total_cost += rows2double(records)*log(ror_scan_mark - tree->ror_scans) /
+ (TIME_FOR_COMPARE_ROWID * M_LN2);
+ DBUG_PRINT("info", ("Covering ROR-intersect full cost: %g", total_cost));
+
+ if (total_cost > read_time)
+ DBUG_RETURN(NULL);
+
+ TRP_ROR_INTERSECT *trp;
+ if (!(trp= new (param->mem_root) TRP_ROR_INTERSECT))
+ DBUG_RETURN(trp);
+ uint best_num= (ror_scan_mark - tree->ror_scans);
+ if (!(trp->first_scan= (ROR_SCAN_INFO**)alloc_root(param->mem_root,
+ sizeof(ROR_SCAN_INFO*)*
+ best_num)))
+ DBUG_RETURN(NULL);
+ memcpy(trp->first_scan, ror_scan_mark, best_num*sizeof(ROR_SCAN_INFO*));
+ trp->last_scan= trp->first_scan + best_num;
+ trp->is_covering= true;
+ trp->read_cost= total_cost;
+ trp->records= records;
+
+ DBUG_RETURN(trp);
+}
+
+
+/*
+ Get best "range" table read plan for given SEL_TREE.
+ Also update PARAM members and store ROR scans info in the SEL_TREE.
SYNOPSIS
get_quick_select_params
- tree in make range select for this SEL_TREE
- param in parameters from test_quick_select
- needed_reg in/out other table data needed by this quick_select
- index_read_must_be_used if true, assume 'index only' option will be set
+ param parameters from test_quick_select
+ tree make range select for this SEL_TREE
+ index_read_must_be_used if true, assume 'index only' option will be set
(except for clustered PK indexes)
- read_time out read time estimate
- records out # of records estimate
- key_to_read out SEL_ARG to be used for creating quick select
+ read_time don't create read plans with cost > read_time.
+ RETURN
+ Best range read plan
+ NULL if no plan found or error occurred
*/
-static int get_quick_select_params(SEL_TREE *tree, PARAM *param,
- key_map& needed_reg,
- bool index_read_must_be_used,
- double *read_time, ha_rows *records,
- SEL_ARG ***key_to_read)
+static TRP_RANGE *get_key_scans_params(PARAM *param, SEL_TREE *tree,
+ bool index_read_must_be_used,
+ double read_time)
{
int idx;
- int result = 1;
+ SEL_ARG **key,**end, **key_to_read= NULL;
+ ha_rows best_records;
+ TRP_RANGE* read_plan= NULL;
bool pk_is_clustered= param->table->file->primary_key_is_clustered();
+ DBUG_ENTER("get_key_scans_params");
+ LINT_INIT(best_records); /* protected by key_to_read */
/*
Note that there may be trees that have type SEL_TREE::KEY but contain no
key reads at all, e.g. tree for expression "key1 is not null" where key1
is defined as "not null".
- */
- SEL_ARG **key,**end;
-
- for (idx= 0,key=tree->keys, end=key+param->keys ;
+ */
+ DBUG_EXECUTE("info", print_sel_tree(param, tree, &tree->keys_map,
+ "tree scans"););
+ tree->ror_scans_map.clear_all();
+ tree->n_ror_scans= 0;
+ for (idx= 0,key=tree->keys, end=key+param->keys;
key != end ;
key++,idx++)
{
@@ -1472,19 +2952,24 @@ static int get_quick_select_params(SEL_TREE *tree, PARAM *param,
uint keynr= param->real_keynr[idx];
if ((*key)->type == SEL_ARG::MAYBE_KEY ||
(*key)->maybe_flag)
- needed_reg.set_bit(keynr);
+ param->needed_reg->set_bit(keynr);
bool read_index_only= index_read_must_be_used? true :
(bool)param->table->used_keys.is_set(keynr);
- found_records=check_quick_select(param, idx, *key);
+ found_records= check_quick_select(param, idx, *key);
+ if (param->is_ror_scan)
+ {
+ tree->n_ror_scans++;
+ tree->ror_scans_map.set_bit(idx);
+ }
if (found_records != HA_POS_ERROR && found_records > 2 &&
read_index_only &&
(param->table->file->index_flags(keynr) & HA_KEY_READ_ONLY) &&
!(pk_is_clustered && keynr == param->table->primary_key))
{
/* We can resolve this by only reading through this key. */
- found_read_time=get_index_only_read_time(param, found_records, keynr);
+ found_read_time= get_index_only_read_time(param,found_records,keynr);
}
else
{
@@ -1497,18 +2982,138 @@ static int get_quick_select_params(SEL_TREE *tree, PARAM *param,
found_records)+
(double) found_records / TIME_FOR_COMPARE);
}
- if (*read_time > found_read_time && found_records != HA_POS_ERROR)
+ if (read_time > found_read_time && found_records != HA_POS_ERROR
+ /*|| read_time == DBL_MAX*/ )
+ {
+ read_time= found_read_time;
+ best_records= found_records;
+ key_to_read= key;
+ }
+
+ }
+ }
+
+ DBUG_EXECUTE("info", print_sel_tree(param, tree, &tree->ror_scans_map,
+ "ROR scans"););
+ if (key_to_read)
+ {
+ idx= key_to_read - tree->keys;
+ if ((read_plan= new (param->mem_root) TRP_RANGE(*key_to_read, idx)))
+ {
+ read_plan->records= best_records;
+ read_plan->is_ror= tree->ror_scans_map.is_set(idx);
+ read_plan->read_cost= read_time;
+ DBUG_PRINT("info",("Returning range plan for key %s, cost %g",
+ param->table->key_info[param->real_keynr[idx]].name,
+ read_plan->read_cost));
+ }
+ }
+ else
+ DBUG_PRINT("info", ("No 'range' table read plan found"));
+
+ DBUG_RETURN(read_plan);
+}
+
+
+QUICK_SELECT_I *TRP_INDEX_MERGE::make_quick(PARAM *param,
+ bool retrieve_full_rows,
+ MEM_ROOT *parent_alloc)
+{
+ QUICK_INDEX_MERGE_SELECT *quick_imerge;
+ QUICK_RANGE_SELECT *quick;
+ /* index_merge always retrieves full rows, ignore retrieve_full_rows */
+ if (!(quick_imerge= new QUICK_INDEX_MERGE_SELECT(param->thd, param->table)))
+ return NULL;
+
+ quick_imerge->records= records;
+ quick_imerge->read_time= read_cost;
+ for(TRP_RANGE **range_scan= range_scans; range_scan != range_scans_end;
+ range_scan++)
+ {
+ if (!(quick= (QUICK_RANGE_SELECT*)
+ ((*range_scan)->make_quick(param, false, &quick_imerge->alloc)))||
+ quick_imerge->push_quick_back(quick))
+ {
+ delete quick;
+ delete quick_imerge;
+ return NULL;
+ }
+ }
+ return quick_imerge;
+}
+
+QUICK_SELECT_I *TRP_ROR_INTERSECT::make_quick(PARAM *param,
+ bool retrieve_full_rows,
+ MEM_ROOT *parent_alloc)
+{
+ QUICK_ROR_INTERSECT_SELECT *quick_intrsect;
+ QUICK_RANGE_SELECT *quick;
+ DBUG_ENTER("TRP_ROR_INTERSECT::make_quick");
+ MEM_ROOT *alloc;
+
+ if ((quick_intrsect=
+ new QUICK_ROR_INTERSECT_SELECT(param->thd, param->table,
+ retrieve_full_rows? (!is_covering):false,
+ parent_alloc)))
+ {
+ DBUG_EXECUTE("info", print_ror_scans_arr(param->table,
+ "creating ROR-intersect",
+ first_scan, last_scan););
+ alloc= parent_alloc? parent_alloc: &quick_intrsect->alloc;
+ for(; first_scan != last_scan;++first_scan)
+ {
+ if (!(quick= get_quick_select(param, (*first_scan)->idx,
+ (*first_scan)->sel_arg, alloc)) ||
+ quick_intrsect->push_quick_back(quick))
+ {
+ delete quick_intrsect;
+ DBUG_RETURN(NULL);
+ }
+ }
+ if (cpk_scan)
+ {
+ if (!(quick= get_quick_select(param, cpk_scan->idx,
+ cpk_scan->sel_arg, alloc)))
{
- *read_time= found_read_time;
- *records= found_records;
- *key_to_read= key;
- result = 0;
+ delete quick_intrsect;
+ DBUG_RETURN(NULL);
}
+ quick_intrsect->cpk_quick= quick;
}
+ quick_intrsect->records= records;
+ quick_intrsect->read_time= read_cost;
}
- return result;
+ DBUG_RETURN(quick_intrsect);
}
+
+QUICK_SELECT_I *TRP_ROR_UNION::make_quick(PARAM *param,
+ bool retrieve_full_rows,
+ MEM_ROOT *parent_alloc)
+{
+ QUICK_ROR_UNION_SELECT *quick_roru;
+ TABLE_READ_PLAN **scan;
+ QUICK_SELECT_I *quick;
+ DBUG_ENTER("TRP_ROR_UNION::make_quick");
+ /*
+ It is impossible to construct a ROR-union that will not retrieve full
+ rows, ignore retrieve_full_rows parameter.
+ */
+ if ((quick_roru= new QUICK_ROR_UNION_SELECT(param->thd, param->table)))
+ {
+ for(scan= first_ror; scan != last_ror; scan++)
+ {
+ if (!(quick= (*scan)->make_quick(param, false, &quick_roru->alloc)) ||
+ quick_roru->push_quick_back(quick))
+ DBUG_RETURN(NULL);
+ }
+ quick_roru->records= records;
+ quick_roru->read_time= read_cost;
+ }
+ DBUG_RETURN(quick_roru);
+}
+
+/****************************************************************************/
/* make a select tree of all keys in condition */
static SEL_TREE *get_mm_tree(PARAM *param,COND *cond)
@@ -2928,7 +4533,7 @@ SEL_ARG *rb_delete_fixup(SEL_ARG *root,SEL_ARG *key,SEL_ARG *par)
}
- /* Test that the proporties for a red-black tree holds */
+ /* Test that the properties for a red-black tree hold */
#ifdef EXTRA_DEBUG
int test_rb_tree(SEL_ARG *element,SEL_ARG *parent)
@@ -3016,38 +4621,115 @@ void SEL_ARG::test_use_count(SEL_ARG *root)
#endif
+/*
+ Calculate estimate of number records that will be retrieved by a range
+ scan on given index using given SEL_ARG intervals tree.
+ SYNOPSIS
+ check_quick_select
+ param Parameter from test_quick_select
+ idx Number of index to use in PARAM::key SEL_TREE::key
+ tree Transformed selection condition, tree->key[idx] holds intervals
+ tree to be used for scanning.
+ NOTES
+ param->is_ror_scan is set to reflect if the key scan is a ROR (see
+ is_key_scan_ror function for more info)
+ param->table->quick_*, param->range_count (and maybe others) are
+ updated with data of given key scan, see check_quick_keys for details.
+
+ RETURN
+ Estimate # of records to be retrieved.
+ HA_POS_ERROR if estimate calculation failed due to table handler problems.
-/*****************************************************************************
-** Check how many records we will find by using the found tree
-*****************************************************************************/
+*/
static ha_rows
check_quick_select(PARAM *param,uint idx,SEL_ARG *tree)
{
ha_rows records;
+ bool cpk_scan;
+ uint key;
DBUG_ENTER("check_quick_select");
if (!tree)
DBUG_RETURN(HA_POS_ERROR); // Can't use it
param->max_key_part=0;
param->range_count=0;
+ key= param->real_keynr[idx];
+
if (tree->type == SEL_ARG::IMPOSSIBLE)
DBUG_RETURN(0L); // Impossible select. return
if (tree->type != SEL_ARG::KEY_RANGE || tree->part != 0)
DBUG_RETURN(HA_POS_ERROR); // Don't use tree
+
+ enum ha_key_alg key_alg= param->table->key_info[key].algorithm;
+ if ((key_alg != HA_KEY_ALG_BTREE) && (key_alg!= HA_KEY_ALG_UNDEF))
+ {
+ /* Records are not ordered by rowid for other types of indexes. */
+ param->is_ror_scan= false;
+ cpk_scan= false;
+ }
+ else
+ {
+ /*
+ Clustered PK scan is a special case, check_quick_keys doesn't recognize
+ CPK scans as ROR scans (while actually any CPK scan is a ROR scan).
+ */
+ cpk_scan= (param->table->primary_key == param->real_keynr[idx]) &&
+ param->table->file->primary_key_is_clustered();
+ param->is_ror_scan= !cpk_scan;
+ }
+
records=check_quick_keys(param,idx,tree,param->min_key,0,param->max_key,0);
if (records != HA_POS_ERROR)
- {
- uint key=param->real_keynr[idx];
+ {
param->table->quick_keys.set_bit(key);
param->table->quick_rows[key]=records;
param->table->quick_key_parts[key]=param->max_key_part+1;
+
+ if (cpk_scan)
+ param->is_ror_scan= true;
}
DBUG_PRINT("exit", ("Records: %lu", (ulong) records));
DBUG_RETURN(records);
}
+/*
+ Recursively calculate estimate of # rows that will be retrieved by
+ key scan on key idx.
+ SYNOPSIS
+ check_quick_keys()
+ param Parameter from test_quick select function.
+ idx Number of key to use in PARAM::keys in list of used keys
+ (param->real_keynr[idx] holds the key number in table)
+ key_tree SEL_ARG tree being examined.
+ min_key Buffer with partial min key value tuple
+ min_key_flag
+ max_key Buffer with partial max key value tuple
+ max_key_flag
+
+ NOTES
+ The function does the recursive descent on the tree via SEL_ARG::left,
+ SEL_ARG::right, and SEL_ARG::next_key_part edges. The #rows estimates
+ are calculated using records_in_range calls at the leaf nodes and then
+ summed.
+
+ param->min_key and param->max_key are used to hold prefixes of key value
+ tuples.
+
+ The side effects are:
+
+ param->max_key_part is updated to hold the maximum number of key parts used
+ in scan minus 1.
+
+ param->range_count is incremented if the function finds a range that
+ wasn't counted by the caller.
+
+ param->is_ror_scan is cleared if the function detects that the key scan is
+ not a Rowid-Ordered Retrieval scan ( see comments for is_key_scan_ror
+ function for description of which key scans are ROR scans)
+*/
+
static ha_rows
check_quick_keys(PARAM *param,uint idx,SEL_ARG *key_tree,
char *min_key,uint min_key_flag, char *max_key,
@@ -3058,6 +4740,13 @@ check_quick_keys(PARAM *param,uint idx,SEL_ARG *key_tree,
param->max_key_part=max(param->max_key_part,key_tree->part);
if (key_tree->left != &null_element)
{
+ /*
+ There are at least two intervals for current key part, i.e. condition
+ was converted to something like
+ (keyXpartY less/equals c1) OR (keyXpartY more/equals c2).
+ This is not a ROR scan if the key is not Clustered Primary Key.
+ */
+ param->is_ror_scan= false;
records=check_quick_keys(param,idx,key_tree->left,min_key,min_key_flag,
max_key,max_key_flag);
if (records == HA_POS_ERROR) // Impossible
@@ -3066,12 +4755,25 @@ check_quick_keys(PARAM *param,uint idx,SEL_ARG *key_tree,
uint tmp_min_flag,tmp_max_flag,keynr;
char *tmp_min_key=min_key,*tmp_max_key=max_key;
-
+
key_tree->store(param->key[idx][key_tree->part].store_length,
&tmp_min_key,min_key_flag,&tmp_max_key,max_key_flag);
uint min_key_length= (uint) (tmp_min_key- param->min_key);
uint max_key_length= (uint) (tmp_max_key- param->max_key);
+ if (param->is_ror_scan)
+ {
+ /*
+ If the index doesn't cover entire key, mark the scan as non-ROR scan.
+ (TODO sergeyp: investigate if we could do better here)
+ */
+ uint16 fieldnr= param->table->key_info[param->real_keynr[idx]].
+ key_part[key_tree->part].fieldnr - 1;
+ if (param->table->field[fieldnr]->key_length() !=
+ param->key[idx][key_tree->part].part_length)
+ param->is_ror_scan= false;
+ }
+
if (key_tree->next_key_part &&
key_tree->next_key_part->part == key_tree->part+1 &&
key_tree->next_key_part->type == SEL_ARG::KEY_RANGE)
@@ -3085,6 +4787,12 @@ check_quick_keys(PARAM *param,uint idx,SEL_ARG *key_tree,
tmp_max_key, max_key_flag | key_tree->max_flag);
goto end; // Ugly, but efficient
}
+ else
+ {
+ /* The interval for current key part is not c1 <= keyXpartY <= c1 */
+ param->is_ror_scan= false;
+ }
+
tmp_min_flag=key_tree->min_flag;
tmp_max_flag=key_tree->max_flag;
if (!tmp_min_flag)
@@ -3113,6 +4821,24 @@ check_quick_keys(PARAM *param,uint idx,SEL_ARG *key_tree,
tmp=1; // Max one record
else
{
+ if (param->is_ror_scan)
+ {
+ /*
+ If we get here, the condition on the key was converted to form
+ "(keyXpart1 = c1) AND ... AND (keyXpart{key_tree->part - 1} = cN) AND
+ somecond(keyXpart{key_tree->part})"
+ Check if
+ somecond is "keyXpart{key_tree->part} = const" and
+ uncovered "tail" of KeyX parts is either empty or is identical to
+ first members of clustered primary key.
+ */
+ if (!(min_key_length == max_key_length &&
+ !memcmp(min_key,max_key, (uint) (tmp_max_key - max_key)) &&
+ !key_tree->min_flag && !key_tree->max_flag &&
+ is_key_scan_ror(param, keynr, key_tree->part + 1)))
+ param->is_ror_scan= false;
+ }
+
if (tmp_min_flag & GEOM_FLAG)
{
tmp= param->table->file->
@@ -3143,6 +4869,13 @@ check_quick_keys(PARAM *param,uint idx,SEL_ARG *key_tree,
records+=tmp;
if (key_tree->right != &null_element)
{
+ /*
+ There are at least two intervals for current key part, i.e. condition
+ was converted to something like
+ (keyXpartY less/equals c1) OR (keyXpartY more/equals c2).
+ This is not a ROR scan if the key is not Clustered Primary Key.
+ */
+ param->is_ror_scan= false;
tmp=check_quick_keys(param,idx,key_tree->right,min_key,min_key_flag,
max_key,max_key_flag);
if (tmp == HA_POS_ERROR)
@@ -3153,11 +4886,94 @@ check_quick_keys(PARAM *param,uint idx,SEL_ARG *key_tree,
}
-/****************************************************************************
-** change a tree to a structure to be used by quick_select
-** This uses it's own malloc tree
-** The caller should call QUICK_SELCT::init for returned quick select
-****************************************************************************/
+/*
+ Check if key scan on given index with equality conditions on first n key
+ parts is a ROR scan.
+
+ SYNOPSIS
+ is_key_scan_ror()
+ param Parameter from test_quick_select
+ keynr Number of key in the table. The key must not be a clustered
+ primary key.
+ nparts Number of first key parts for which equality conditions
+ are present.
+
+ NOTES
+ ROR (Rowid Ordered Retrieval) key scan is a key scan that produces
+ ordered sequence of rowids (ha_xxx::cmp_ref is the comparison function)
+
+ An index scan is a ROR scan if it is done using a condition in form
+
+ "key1_1=c_1 AND ... AND key1_n=c_n" (1)
+
+ where the index is defined on (key1_1, ..., key1_N [,a_1, ..., a_n])
+
+ and the table has a clustered Primary Key
+
+ PRIMARY KEY(a_1, ..., a_n, b1, ..., b_k) with first key parts being
+ identical to uncovered parts ot the key being scanned (2)
+
+ Scans on HASH indexes are not ROR scans,
+ any range scan on clustered primary key is ROR scan (3)
+
+ Check (1) is made in check_quick_keys()
+ Check (3) is made check_quick_select()
+ Check (2) is made by this function.
+
+ RETURN
+ true If the scan is ROR-scan
+ false otherwise
+*/
+
+static bool is_key_scan_ror(PARAM *param, uint keynr, uint8 nparts)
+{
+ KEY *table_key= param->table->key_info + keynr;
+ KEY_PART_INFO *key_part= table_key->key_part + nparts;
+ KEY_PART_INFO *key_part_end= table_key->key_part +
+ table_key->key_parts;
+
+ if (key_part == key_part_end)
+ return true;
+ uint pk_number= param->table->primary_key;
+ if (!param->table->file->primary_key_is_clustered() || pk_number == MAX_KEY)
+ return false;
+
+ KEY_PART_INFO *pk_part= param->table->key_info[pk_number].key_part;
+ KEY_PART_INFO *pk_part_end= pk_part +
+ param->table->key_info[pk_number].key_parts;
+ for(;(key_part!=key_part_end) && (pk_part != pk_part_end);
+ ++key_part, ++pk_part)
+ {
+ if ((key_part->field != pk_part->field) ||
+ (key_part->length != pk_part->length))
+ return false;
+ }
+ return (key_part == key_part_end);
+}
+
+
+/*
+ Create a QUICK_RANGE_SELECT from given key and SEL_ARG tree for that key.
+
+ SYNOPSIS
+ get_quick_select()
+ param
+ idx Index of used key in param->key.
+ key_tree SEL_ARG tree for the used key
+ parent_alloc If not NULL, use it to allocate memory for
+ quick select data. Otherwise use quick->alloc.
+
+ RETURN
+ NULL on error
+ otherwise created quick select
+
+ NOTES
+ The caller must call QUICK_SELCT::init for returned quick select
+
+ CAUTION! This function may change THR_MALLOC to a MEM_ROOT which will be
+ deallocated when the returned quick select is deleted.
+*/
+
QUICK_RANGE_SELECT *
get_quick_select(PARAM *param,uint idx,SEL_ARG *key_tree,
MEM_ROOT *parent_alloc)
@@ -3346,6 +5162,47 @@ static bool null_part_in_key(KEY_PART *key_part, const char *key, uint length)
}
+bool QUICK_SELECT_I::check_if_keys_used(List<Item> *fields)
+{
+ return check_if_key_used(head, index, *fields);
+}
+
+bool QUICK_INDEX_MERGE_SELECT::check_if_keys_used(List<Item> *fields)
+{
+ QUICK_RANGE_SELECT *quick;
+ List_iterator_fast<QUICK_RANGE_SELECT> it(quick_selects);
+ while ((quick= it++))
+ {
+ if (check_if_key_used(head, quick->index, *fields))
+ return 1;
+ }
+ return 0;
+}
+
+bool QUICK_ROR_INTERSECT_SELECT::check_if_keys_used(List<Item> *fields)
+{
+ QUICK_RANGE_SELECT *quick;
+ List_iterator_fast<QUICK_RANGE_SELECT> it(quick_selects);
+ while ((quick= it++))
+ {
+ if (check_if_key_used(head, quick->index, *fields))
+ return 1;
+ }
+ return 0;
+}
+
+bool QUICK_ROR_UNION_SELECT::check_if_keys_used(List<Item> *fields)
+{
+ QUICK_SELECT_I *quick;
+ List_iterator_fast<QUICK_SELECT_I> it(quick_selects);
+ while ((quick= it++))
+ {
+ if (quick->check_if_keys_used(fields))
+ return 1;
+ }
+ return 0;
+}
+
/****************************************************************************
Create a QUICK RANGE based on a key
****************************************************************************/
@@ -3426,8 +5283,6 @@ err:
}
-#define MEM_STRIP_BUF_SIZE thd->variables.sortbuff_size
-
/*
Fetch all row ids into unique.
@@ -3461,9 +5316,9 @@ int QUICK_INDEX_MERGE_SELECT::prepare_unique()
cur_quick_select->init();
- unique= new Unique(refposcmp2, (void *) &head->file->ref_length,
+ unique= new Unique(refpos_order_cmp, (void *)head->file,
head->file->ref_length,
- MEM_STRIP_BUF_SIZE);
+ thd->variables.sortbuff_size);
if (!unique)
DBUG_RETURN(1);
do
@@ -3496,8 +5351,7 @@ int QUICK_INDEX_MERGE_SELECT::prepare_unique()
continue;
cur_quick_select->file->position(cur_quick_select->record);
- result= unique->unique_add((char*)cur_quick_select->file->ref);
-
+ result= unique->unique_add((char*)cur_quick_select->file->ref);
if (result)
DBUG_RETURN(1);
@@ -3550,6 +5404,159 @@ int QUICK_INDEX_MERGE_SELECT::get_next()
DBUG_RETURN(result);
}
+
+/*
+ Retrieve next record.
+ SYNOPSIS
+ QUICK_ROR_INTERSECT_SELECT::get_next()
+
+ NOTES
+ Invariant on enter/exit: all intersected selects have retrieved all index
+ records with rowid <= some_rowid_val and no intersected select has
+ retrieved any index records with rowid > some_rowid_val.
+ We start fresh and loop until we have retrieved the same rowid in each of
+ the key scans or we got an error.
+
+ If a Clustered PK scan is present, it is used only to check if row
+ satisfies its condition (and never used for row retrieval).
+
+ RETURN
+ 0 - Ok
+ other - Error code if any error occurred.
+*/
+
+int QUICK_ROR_INTERSECT_SELECT::get_next()
+{
+ List_iterator_fast<QUICK_RANGE_SELECT> quick_it(quick_selects);
+ QUICK_RANGE_SELECT* quick;
+ int error, cmp;
+ uint last_rowid_count=0;
+ DBUG_ENTER("QUICK_ROR_INTERSECT_SELECT::get_next");
+
+ /* Get a rowid for first quick and save it as a 'candidate' */
+ quick= quick_it++;
+ if (cpk_quick)
+ {
+ do {
+ error= quick->get_next();
+ }while (!error && !cpk_quick->row_in_ranges());
+ }
+ else
+ error= quick->get_next();
+
+ if (error)
+ DBUG_RETURN(error);
+
+ quick->file->position(quick->record);
+ memcpy(last_rowid, quick->file->ref, head->file->ref_length);
+ last_rowid_count= 1;
+
+ while (last_rowid_count < quick_selects.elements)
+ {
+ if (!(quick= quick_it++))
+ {
+ quick_it.rewind();
+ quick= quick_it++;
+ }
+
+ do {
+ if ((error= quick->get_next()))
+ DBUG_RETURN(error);
+ quick->file->position(quick->record);
+ cmp= head->file->cmp_ref(quick->file->ref, last_rowid);
+ } while (cmp < 0);
+
+ /* Ok, current select 'caught up' and returned ref >= cur_ref */
+ if (cmp > 0)
+ {
+ /* Found a row with ref > cur_ref. Make it a new 'candidate' */
+ if (cpk_quick)
+ {
+ while (!cpk_quick->row_in_ranges())
+ {
+ if ((error= quick->get_next()))
+ DBUG_RETURN(error);
+ }
+ }
+ memcpy(last_rowid, quick->file->ref, head->file->ref_length);
+ last_rowid_count= 1;
+ }
+ else
+ {
+ /* current 'candidate' row confirmed by this select */
+ last_rowid_count++;
+ }
+ }
+
+ /* We get here iff we got the same row ref in all scans. */
+ if (need_to_fetch_row)
+ error= head->file->rnd_pos(head->record[0], last_rowid);
+ DBUG_RETURN(error);
+}
+
+
+/*
+ Retrieve next record.
+ SYNOPSIS
+ QUICK_ROR_UNION_SELECT::get_next()
+
+ NOTES
+ Enter/exit invariant:
+ For each quick select in the queue a {key,rowid} tuple has been
+ retrieved but the corresponding row hasn't been passed to output.
+
+ RETURN
+ 0 - Ok
+ other - Error code if any error occurred.
+*/
+
+int QUICK_ROR_UNION_SELECT::get_next()
+{
+ int error, dup_row;
+ QUICK_SELECT_I *quick;
+ byte *tmp;
+ DBUG_ENTER("QUICK_ROR_UNION_SELECT::get_next");
+
+ do
+ {
+ if (!queue.elements)
+ DBUG_RETURN(HA_ERR_END_OF_FILE);
+ /* Ok, we have a queue with >= 1 scans */
+
+ quick= (QUICK_SELECT_I*)queue_top(&queue);
+ memcpy(cur_rowid, quick->last_rowid, rowid_length);
+
+ /* put into queue rowid from the same stream as top element */
+ if ((error= quick->get_next()))
+ {
+ if (error != HA_ERR_END_OF_FILE)
+ DBUG_RETURN(error);
+ queue_remove(&queue, 0);
+ }
+ else
+ {
+ quick->save_last_pos();
+ queue_replaced(&queue);
+ }
+
+ if (!have_prev_rowid)
+ {
+ /* No rows have been returned yet */
+ dup_row= false;
+ have_prev_rowid= true;
+ }
+ else
+ dup_row= !head->file->cmp_ref(cur_rowid, prev_rowid);
+ }while (dup_row);
+
+ tmp= cur_rowid;
+ cur_rowid= prev_rowid;
+ prev_rowid= tmp;
+
+ error= head->file->rnd_pos(quick->record, prev_rowid);
+ DBUG_RETURN(error);
+}
+
/* get next possible record using quick-struct */
int QUICK_RANGE_SELECT::get_next()
@@ -3969,6 +5976,232 @@ bool QUICK_SELECT_DESC::test_if_null_range(QUICK_RANGE *range_arg,
#endif
+void QUICK_RANGE_SELECT::add_info_string(String *str)
+{
+ KEY *key_info= head->key_info + index;
+ str->append(key_info->name);
+}
+
+void QUICK_INDEX_MERGE_SELECT::add_info_string(String *str)
+{
+ QUICK_RANGE_SELECT *quick;
+ bool first= true;
+ List_iterator_fast<QUICK_RANGE_SELECT> it(quick_selects);
+ str->append("sort_union(");
+ while ((quick= it++))
+ {
+ if (!first)
+ str->append(',');
+ else
+ first= false;
+ quick->add_info_string(str);
+ }
+ if (pk_quick_select)
+ {
+ str->append(',');
+ pk_quick_select->add_info_string(str);
+ }
+ str->append(')');
+}
+
+void QUICK_ROR_INTERSECT_SELECT::add_info_string(String *str)
+{
+ bool first= true;
+ QUICK_RANGE_SELECT *quick;
+ List_iterator_fast<QUICK_RANGE_SELECT> it(quick_selects);
+ str->append("intersect(");
+ while ((quick= it++))
+ {
+ KEY *key_info= head->key_info + quick->index;
+ if (!first)
+ str->append(',');
+ else
+ first= false;
+ str->append(key_info->name);
+ }
+ if (cpk_quick)
+ {
+ KEY *key_info= head->key_info + cpk_quick->index;
+ str->append(',');
+ str->append(key_info->name);
+ }
+ str->append(')');
+}
+
+void QUICK_ROR_UNION_SELECT::add_info_string(String *str)
+{
+ bool first= true;
+ QUICK_SELECT_I *quick;
+ List_iterator_fast<QUICK_SELECT_I> it(quick_selects);
+ str->append("union(");
+ while ((quick= it++))
+ {
+ if (!first)
+ str->append(',');
+ else
+ first= false;
+ quick->add_info_string(str);
+ }
+ str->append(')');
+}
+
+
+void QUICK_RANGE_SELECT::add_keys_and_lengths(String *key_names,
+ String *used_lengths)
+{
+ char buf[64];
+ uint length;
+ KEY *key_info= head->key_info + index;
+ key_names->append(key_info->name);
+ length= longlong2str(max_used_key_length, buf, 10) - buf;
+ used_lengths->append(buf, length);
+}
+
+void QUICK_INDEX_MERGE_SELECT::add_keys_and_lengths(String *key_names,
+ String *used_lengths)
+{
+ char buf[64];
+ uint length;
+ bool first= true;
+ QUICK_RANGE_SELECT *quick;
+
+ List_iterator_fast<QUICK_RANGE_SELECT> it(quick_selects);
+ while ((quick= it++))
+ {
+ if (first)
+ first= false;
+ else
+ {
+ key_names->append(',');
+ used_lengths->append(',');
+ }
+
+ KEY *key_info= head->key_info + quick->index;
+ key_names->append(key_info->name);
+ length= longlong2str(quick->max_used_key_length, buf, 10) - buf;
+ used_lengths->append(buf, length);
+ }
+ if (pk_quick_select)
+ {
+ KEY *key_info= head->key_info + pk_quick_select->index;
+ key_names->append(',');
+ key_names->append(key_info->name);
+ length= longlong2str(pk_quick_select->max_used_key_length, buf, 10) - buf;
+ used_lengths->append(',');
+ used_lengths->append(buf, length);
+ }
+}
+
+void QUICK_ROR_INTERSECT_SELECT::add_keys_and_lengths(String *key_names,
+ String *used_lengths)
+{
+ char buf[64];
+ uint length;
+ bool first= true;
+ QUICK_RANGE_SELECT *quick;
+ List_iterator_fast<QUICK_RANGE_SELECT> it(quick_selects);
+ while ((quick= it++))
+ {
+ KEY *key_info= head->key_info + quick->index;
+ if (first)
+ first= false;
+ else
+ {
+ key_names->append(',');
+ used_lengths->append(',');
+ }
+ key_names->append(key_info->name);
+ length= longlong2str(quick->max_used_key_length, buf, 10) - buf;
+ used_lengths->append(buf, length);
+ }
+
+ if (cpk_quick)
+ {
+ KEY *key_info= head->key_info + cpk_quick->index;
+ key_names->append(',');
+ key_names->append(key_info->name);
+ length= longlong2str(cpk_quick->max_used_key_length, buf, 10) - buf;
+ used_lengths->append(',');
+ used_lengths->append(buf, length);
+ }
+}
+
+void QUICK_ROR_UNION_SELECT::add_keys_and_lengths(String *key_names,
+ String *used_lengths)
+{
+ bool first= true;
+ QUICK_SELECT_I *quick;
+ List_iterator_fast<QUICK_SELECT_I> it(quick_selects);
+ while ((quick= it++))
+ {
+ if (first)
+ first= false;
+ else
+ {
+ used_lengths->append(',');
+ key_names->append(',');
+ }
+ quick->add_keys_and_lengths(key_names, used_lengths);
+ }
+}
+
+#ifndef DBUG_OFF
+
+static void print_sel_tree(PARAM *param, SEL_TREE *tree, key_map *tree_map,
+ const char *msg)
+{
+ SEL_ARG **key,**end;
+ int idx;
+ char buff[1024];
+ DBUG_ENTER("print_sel_tree");
+ if (! _db_on_)
+ DBUG_VOID_RETURN;
+
+ String tmp(buff,sizeof(buff),&my_charset_bin);
+ tmp.length(0);
+ for (idx= 0,key=tree->keys, end=key+param->keys ;
+ key != end ;
+ key++,idx++)
+ {
+ if (tree_map->is_set(idx))
+ {
+ uint keynr= param->real_keynr[idx];
+ if (tmp.length())
+ tmp.append(',');
+ tmp.append(param->table->key_info[keynr].name);
+ }
+ }
+ if (!tmp.length())
+ tmp.append("(empty)");
+
+ DBUG_PRINT("info", ("SEL_TREE %p (%s) scans:%s", tree, msg, tmp.ptr()));
+
+ DBUG_VOID_RETURN;
+}
+
+static void print_ror_scans_arr(TABLE *table, const char *msg,
+ struct st_ror_scan_info **start,
+ struct st_ror_scan_info **end)
+{
+ DBUG_ENTER("print_ror_scans");
+ if (! _db_on_)
+ DBUG_VOID_RETURN;
+
+ char buff[1024];
+ String tmp(buff,sizeof(buff),&my_charset_bin);
+ tmp.length(0);
+ for(;start != end; start++)
+ {
+ if (tmp.length())
+ tmp.append(',');
+ tmp.append(table->key_info[(*start)->keynr].name);
+ }
+ if (!tmp.length())
+ tmp.append("(empty)");
+ DBUG_PRINT("info", ("ROR key scans (%s): %s", msg, tmp.ptr()));
+ DBUG_VOID_RETURN;
+}
+
/*****************************************************************************
** Print a quick range for debugging
** TODO:
@@ -3976,8 +6209,6 @@ bool QUICK_SELECT_DESC::test_if_null_range(QUICK_RANGE *range_arg,
** of locking the DEBUG stream !
*****************************************************************************/
-#ifndef DBUG_OFF
-
static void
print_key(KEY_PART *key_part,const char *key,uint used_length)
{
@@ -4010,72 +6241,122 @@ print_key(KEY_PART *key_part,const char *key,uint used_length)
}
-static void print_quick_sel_imerge(QUICK_INDEX_MERGE_SELECT *quick,
- const key_map *needed_reg)
+static void print_quick(QUICK_SELECT_I *quick, const key_map *needed_reg)
{
+ char buf[MAX_KEY/8+1];
DBUG_ENTER("print_param");
if (! _db_on_ || !quick)
DBUG_VOID_RETURN;
+ DBUG_LOCK_FILE;
+
+ quick->dbug_dump(0, true);
+ fprintf(DBUG_FILE,"other_keys: 0x%s:\n", needed_reg->print(buf));
- List_iterator_fast<QUICK_RANGE_SELECT> it(quick->quick_selects);
- QUICK_RANGE_SELECT* quick_range_sel;
- while ((quick_range_sel= it++))
- {
- print_quick_sel_range(quick_range_sel, needed_reg);
- }
- if (quick->pk_quick_select)
- print_quick_sel_range(quick->pk_quick_select, needed_reg);
-
+ DBUG_UNLOCK_FILE;
DBUG_VOID_RETURN;
}
-void print_quick_sel_range(QUICK_RANGE_SELECT *quick,
- const key_map *needed_reg)
+static void print_rowid(byte* val, int len)
{
- QUICK_RANGE *range;
- char buf[MAX_KEY/8+1];
- DBUG_ENTER("print_param");
- if (! _db_on_ || !quick)
- DBUG_VOID_RETURN;
-
+ byte *pb;
DBUG_LOCK_FILE;
- fprintf(DBUG_FILE,"Used quick_range on key: %d (other_keys: 0x%s):\n",
- quick->index, needed_reg->print(buf));
+ fputc('\"', DBUG_FILE);
+ for (pb= val; pb!= val + len; ++pb)
+ fprintf(DBUG_FILE, "%c", *pb);
+ fprintf(DBUG_FILE, "\", hex: ");
+
+ for (pb= val; pb!= val + len; ++pb)
+ fprintf(DBUG_FILE, "%x ", *pb);
+ fputc('\n', DBUG_FILE);
+ DBUG_UNLOCK_FILE;
+}
- QUICK_RANGE **pr= (QUICK_RANGE**)quick->ranges.buffer;
- QUICK_RANGE **last_range= pr + quick->ranges.elements;
- for (; pr!=last_range; ++pr)
+void QUICK_RANGE_SELECT::dbug_dump(int indent, bool verbose)
+{
+ fprintf(DBUG_FILE, "%*squick range select, key %s, length: %d\n",
+ indent, "", head->key_info[index].name, max_used_key_length);
+
+ if (verbose)
{
- range= *pr;
- if (!(range->flag & NO_MIN_RANGE))
+ QUICK_RANGE *range;
+ QUICK_RANGE **pr= (QUICK_RANGE**)ranges.buffer;
+ QUICK_RANGE **last_range= pr + ranges.elements;
+ for (; pr!=last_range; ++pr)
{
- print_key(quick->key_parts,range->min_key,range->min_length);
- if (range->flag & NEAR_MIN)
- fputs(" < ",DBUG_FILE);
- else
- fputs(" <= ",DBUG_FILE);
- }
- fputs("X",DBUG_FILE);
+ fprintf(DBUG_FILE, "%*s", indent + 2, "");
+ range= *pr;
+ if (!(range->flag & NO_MIN_RANGE))
+ {
+ print_key(key_parts,range->min_key,range->min_length);
+ if (range->flag & NEAR_MIN)
+ fputs(" < ",DBUG_FILE);
+ else
+ fputs(" <= ",DBUG_FILE);
+ }
+ fputs("X",DBUG_FILE);
- if (!(range->flag & NO_MAX_RANGE))
- {
- if (range->flag & NEAR_MAX)
- fputs(" < ",DBUG_FILE);
- else
- fputs(" <= ",DBUG_FILE);
- print_key(quick->key_parts,range->max_key,range->max_length);
+ if (!(range->flag & NO_MAX_RANGE))
+ {
+ if (range->flag & NEAR_MAX)
+ fputs(" < ",DBUG_FILE);
+ else
+ fputs(" <= ",DBUG_FILE);
+ print_key(key_parts,range->max_key,range->max_length);
+ }
+ fputs("\n",DBUG_FILE);
}
- fputs("\n",DBUG_FILE);
}
- DBUG_UNLOCK_FILE;
- DBUG_VOID_RETURN;
+}
+
+void QUICK_INDEX_MERGE_SELECT::dbug_dump(int indent, bool verbose)
+{
+ List_iterator_fast<QUICK_RANGE_SELECT> it(quick_selects);
+ QUICK_RANGE_SELECT *quick;
+ fprintf(DBUG_FILE, "%*squick index_merge select\n", indent, "");
+ fprintf(DBUG_FILE, "%*smerged scans {\n", indent, "");
+ while ((quick= it++))
+ quick->dbug_dump(indent+2, verbose);
+ if (pk_quick_select)
+ {
+ fprintf(DBUG_FILE, "%*sclustered PK quick:\n", indent, "");
+ pk_quick_select->dbug_dump(indent+2, verbose);
+ }
+ fprintf(DBUG_FILE, "%*s}\n", indent, "");
+}
+
+void QUICK_ROR_INTERSECT_SELECT::dbug_dump(int indent, bool verbose)
+{
+ List_iterator_fast<QUICK_RANGE_SELECT> it(quick_selects);
+ QUICK_RANGE_SELECT *quick;
+ fprintf(DBUG_FILE, "%*squick ROR-intersect select, %scovering\n",
+ indent, "", need_to_fetch_row? "":"non-");
+ fprintf(DBUG_FILE, "%*smerged scans {\n", indent, "");
+ while ((quick= it++))
+ quick->dbug_dump(indent+2, verbose);
+ if (cpk_quick)
+ {
+ fprintf(DBUG_FILE, "%*sclustered PK quick:\n", indent, "");
+ cpk_quick->dbug_dump(indent+2, verbose);
+ }
+ fprintf(DBUG_FILE, "%*s}\n", indent, "");
+}
+
+void QUICK_ROR_UNION_SELECT::dbug_dump(int indent, bool verbose)
+{
+ List_iterator_fast<QUICK_SELECT_I> it(quick_selects);
+ QUICK_SELECT_I *quick;
+ fprintf(DBUG_FILE, "%*squick ROR-union select\n", indent, "");
+ fprintf(DBUG_FILE, "%*smerged scans {\n", indent, "");
+ while ((quick= it++))
+ quick->dbug_dump(indent+2, verbose);
+ fprintf(DBUG_FILE, "%*s}\n", indent, "");
}
#endif
/*****************************************************************************
-** Instansiate templates
+** Instantiate templates
*****************************************************************************/
#ifdef __GNUC__
diff --git a/sql/opt_range.h b/sql/opt_range.h
index 30e0fcd7be5..22168a63f8e 100644
--- a/sql/opt_range.h
+++ b/sql/opt_range.h
@@ -78,25 +78,59 @@ public:
ha_rows records; /* estimate of # of records to be retrieved */
double read_time; /* time to perform this retrieval */
TABLE *head;
+ /*
+ Index this quick select uses, or MAX_KEY for quick selects
+ that use several indexes
+ */
+ uint index;
/*
- the only index this quick select uses, or MAX_KEY for
- QUICK_INDEX_MERGE_SELECT
+ Total length of first used_key_parts parts of the key.
+ Applicable if index!= MAX_KEY.
*/
- uint index;
- uint max_used_key_length, used_key_parts;
+ uint max_used_key_length;
+
+ /*
+ Max. number of (first) key parts this quick select uses for retrieval.
+ eg. for "(key1p1=c1 AND key1p2=c2) OR key1p1=c2" used_key_parts == 2.
+ Applicable if index!= MAX_KEY.
+ */
+ uint used_key_parts;
QUICK_SELECT_I();
virtual ~QUICK_SELECT_I(){};
- /*
- Call init() immediately after creation of quick select. if init() call
- fails, reset() or get_next() must not be called.
+
+ /*
+ Do post-constructor initialization.
+ SYNOPSIS
+ init()
+
+ init() performs initializations that should have been in constructor if
+ it was possible to return errors from constructors. The join optimizer may
+ create and then delete quick selects without retrieving any rows so init()
+ must not contain any IO or CPU intensive code.
+
+ If init() call fails the only valid action is to delete this quick select,
+ reset() and get_next() must not be called.
+
+ RETURN
+ 0 OK
+ other Error code
*/
virtual int init() = 0;
/*
- Call reset() before first get_next call. get_next must not be called if
- reset() call fails.
+ Initialize quick select for row retrieval.
+ SYNOPSIS
+ reset()
+
+ reset() should be called when it is certain that row retrieval will be
+ necessary. This call may do heavyweight initialization like buffering first
+ N records etc. If reset() call fails get_next() must not be called.
+
+ RETURN
+ 0 OK
+ other Error code
*/
virtual int reset(void) = 0;
virtual int get_next() = 0; /* get next record to retrieve */
@@ -107,27 +141,96 @@ public:
QS_TYPE_RANGE = 0,
QS_TYPE_INDEX_MERGE = 1,
QS_TYPE_RANGE_DESC = 2,
- QS_TYPE_FULLTEXT = 3
+ QS_TYPE_FULLTEXT = 3,
+ QS_TYPE_ROR_INTERSECT = 4,
+ QS_TYPE_ROR_UNION = 5,
};
- /* Get type of this quick select - one of the QS_* values */
- virtual int get_type() = 0;
+ /* Get type of this quick select - one of the QS_TYPE_* values */
+ virtual int get_type() = 0;
+
+ /*
+ Initialize this quick select as a merged scan inside a ROR-union or a ROR-
+ intersection scan. The caller must not additionally call init() if this
+ function is called.
+ SYNOPSIS
+ init_ror_merged_scan()
+ reuse_handler quick select may use (q: psergey??)
+ (q: is this natural that we do it this way)
+ NOTES
+ psergey?
+ */
+ virtual int init_ror_merged_scan(bool reuse_handler)
+ { DBUG_ASSERT(0); return 1; }
+
+ /* Save ROWID of last retrieved row in file->ref. (psergey: or table->ref?) */
+ virtual void save_last_pos(){};
+
+ /*
+ Append comma-separated list of keys this quick select uses to key_names;
+ append comma-separated list of corresponding used lengths to used_lengths.
+ This is used by select_describe.
+ */
+ virtual void add_keys_and_lengths(String *key_names,
+ String *used_lengths)=0;
+
+ /*
+ Append text representation of quick select structure (what and how is
+ merged) to str. The result is added to "Extra" field in EXPLAIN output.
+ This function is implemented only by quick selects that merge other quick
+ selects output and/or can produce output suitable for merging.
+ */
+ virtual void add_info_string(String *str) {};
+ /*
+ Return 1 if any index used by this quick select
+ a) uses field that is listed in passed field list or
+ b) is automatically updated (like a timestamp)
+ */
+ virtual bool check_if_keys_used(List<Item> *fields);
+
+ /*
+ rowid of last row retrieved by this quick select. This is used only when
+ doing ROR-index_merge selects
+ */
+ byte *last_rowid;
+
+ /*
+ Table record buffer used by this quick select.
+ Currently this is always the same as head->record[0]. psergey: check that!
+ */
+ byte *record;
+#ifndef DBUG_OFF
+ /*
+ Print quick select information to DBUG_FILE. Caller is responsible
+ for locking DBUG_FILE before this call and unlocking it afterwards.
+ */
+ virtual void dbug_dump(int indent, bool verbose)= 0;
+#endif
};
+
struct st_qsel_param;
class SEL_ARG;
-class QUICK_RANGE_SELECT : public QUICK_SELECT_I
+/*
+ Quick select that does a range scan on a single key. The records are
+ returned in key order.
+*/
+class QUICK_RANGE_SELECT : public QUICK_SELECT_I
{
protected:
bool next,dont_free;
public:
int error;
+protected:
handler *file;
- byte *record;
+ /*
+ If true, this quick select has its "own" handler object which should be
+ closed no later then this quick select is deleted.
+ */
+ bool free_file;
+
protected:
- friend void print_quick_sel_range(QUICK_RANGE_SELECT *quick,
- const key_map* needed_reg);
friend
QUICK_RANGE_SELECT *get_quick_select_for_ref(THD *thd, TABLE *table,
struct st_table_ref *ref);
@@ -141,17 +244,19 @@ protected:
MEM_ROOT *alloc);
friend class QUICK_SELECT_DESC;
friend class QUICK_INDEX_MERGE_SELECT;
+ friend class QUICK_ROR_INTERSECT_SELECT;
DYNAMIC_ARRAY ranges; /* ordered array of range ptrs */
QUICK_RANGE **cur_range; /* current element in ranges */
QUICK_RANGE *range;
- MEM_ROOT alloc;
KEY_PART *key_parts;
int cmp_next(QUICK_RANGE *range);
int cmp_prev(QUICK_RANGE *range);
bool row_in_ranges();
public:
+ MEM_ROOT alloc;
+
QUICK_RANGE_SELECT(THD *thd, TABLE *table,uint index_arg,bool no_alloc=0,
MEM_ROOT *parent_alloc=NULL);
~QUICK_RANGE_SELECT();
@@ -167,7 +272,17 @@ public:
int get_next();
bool reverse_sorted() { return 0; }
bool unique_key_range();
+ int init_ror_merged_scan(bool reuse_handler);
+ void save_last_pos()
+ {
+ file->position(record);
+ };
int get_type() { return QS_TYPE_RANGE; }
+ void add_keys_and_lengths(String *key_names, String *used_lengths);
+ void add_info_string(String *str);
+#ifndef DBUG_OFF
+ void dbug_dump(int indent, bool verbose);
+#endif
};
@@ -253,6 +368,12 @@ public:
bool reverse_sorted() { return false; }
bool unique_key_range() { return false; }
int get_type() { return QS_TYPE_INDEX_MERGE; }
+ void add_keys_and_lengths(String *key_names, String *used_lengths);
+ void add_info_string(String *str);
+ bool check_if_keys_used(List<Item> *fields);
+#ifndef DBUG_OFF
+ void dbug_dump(int indent, bool verbose);
+#endif
bool push_quick_back(QUICK_RANGE_SELECT *quick_sel_range);
@@ -263,9 +384,6 @@ public:
List_iterator_fast<QUICK_RANGE_SELECT> cur_quick_it;
QUICK_RANGE_SELECT* cur_quick_select;
- /* last element in quick_selects list */
- QUICK_RANGE_SELECT* last_quick_select;
-
/* quick select that uses clustered primary key (NULL if none) */
QUICK_RANGE_SELECT* pk_quick_select;
@@ -277,12 +395,120 @@ public:
THD *thd;
int prepare_unique();
- bool reset_called;
/* used to get rows collected in Unique */
READ_RECORD read_record;
};
+
+/*
+ Rowid-Ordered Retrieval (ROR) index intersection quick select.
+ This quick select produces intersection of row sequences returned
+ by several QUICK_RANGE_SELECTs it "merges".
+
+ All merged QUICK_RANGE_SELECTs must return rowids in rowid order.
+ QUICK_ROR_INTERSECT_SELECT will return rows in rowid order, too.
+
+ All merged quick selects retrieve {rowid, covered_fields} tuples (not full
+ table records).
+ QUICK_ROR_INTERSECT_SELECT retrieves full records if it is not being used
+ by QUICK_ROR_INTERSECT_SELECT and all merged quick selects together don't
+ cover needed all fields.
+
+ If one of the merged quick selects is a Clustered PK range scan, it is
+ used only to filter rowid sequence produced by other merged quick selects.
+*/
+
+class QUICK_ROR_INTERSECT_SELECT : public QUICK_SELECT_I
+{
+public:
+ QUICK_ROR_INTERSECT_SELECT(THD *thd, TABLE *table,
+ bool retrieve_full_rows,
+ MEM_ROOT *parent_alloc);
+ ~QUICK_ROR_INTERSECT_SELECT();
+
+ int init();
+ int reset(void);
+ int get_next();
+ bool reverse_sorted() { return false; }
+ bool unique_key_range() { return false; }
+ int get_type() { return QS_TYPE_ROR_INTERSECT; }
+ void add_keys_and_lengths(String *key_names, String *used_lengths);
+ void add_info_string(String *str);
+ bool check_if_keys_used(List<Item> *fields);
+#ifndef DBUG_OFF
+ void dbug_dump(int indent, bool verbose);
+#endif
+ int init_ror_merged_scan(bool reuse_handler);
+ bool push_quick_back(QUICK_RANGE_SELECT *quick_sel_range);
+
+ /*
+ Range quick selects this intersection consists of, not including
+ cpk_quick.
+ */
+ List<QUICK_RANGE_SELECT> quick_selects;
+
+ /*
+ Merged quick select that uses Clustered PK, if there is one. This quick
+ select is not used for row retrieval, it is used for row retrieval.
+ */
+ QUICK_RANGE_SELECT *cpk_quick;
+
+ MEM_ROOT alloc; /* Memory pool for this and merged quick selects data. */
+ THD *thd; /* current thread */
+ bool need_to_fetch_row; /* if true, do retrieve full table records. */
+};
+
+
+/*
+ Rowid-Ordered Retrieval index union select.
+ This quick select produces union of row sequences returned by several
+ quick select it "merges".
+
+ All merged quick selects must return rowids in rowid order.
+ QUICK_ROR_UNION_SELECT will return rows in rowid order, too.
+
+ All merged quick selects are set not to retrieve full table records.
+ ROR-union quick select always retrieves full records.
+
+*/
+
+class QUICK_ROR_UNION_SELECT : public QUICK_SELECT_I
+{
+public:
+ QUICK_ROR_UNION_SELECT(THD *thd, TABLE *table);
+ ~QUICK_ROR_UNION_SELECT();
+
+ int init();
+ int reset(void);
+ int get_next();
+ bool reverse_sorted() { return false; }
+ bool unique_key_range() { return false; }
+ int get_type() { return QS_TYPE_ROR_UNION; }
+ void add_keys_and_lengths(String *key_names, String *used_lengths);
+ void add_info_string(String *str);
+ bool check_if_keys_used(List<Item> *fields);
+#ifndef DBUG_OFF
+ void dbug_dump(int indent, bool verbose);
+#endif
+
+ bool push_quick_back(QUICK_SELECT_I *quick_sel_range);
+
+ List<QUICK_SELECT_I> quick_selects; /* Merged quick selects */
+
+ QUEUE queue; /* Priority queue for merge operation */
+ MEM_ROOT alloc; /* Memory pool for this and merged quick selects data. */
+
+ THD *thd; /* current thread */
+ byte *cur_rowid; /* buffer used in get_next() */
+ byte *prev_rowid; /* rowid of last row returned by get_next() */
+ bool have_prev_rowid; /* true if prev_rowid has valid data */
+ uint rowid_length; /* table rowid length */
+private:
+ static int queue_cmp(void *arg, byte *val1, byte *val2);
+};
+
+
class QUICK_SELECT_DESC: public QUICK_RANGE_SELECT
{
public:
diff --git a/sql/sql_delete.cc b/sql/sql_delete.cc
index b9769e3a940..440d404056b 100644
--- a/sql/sql_delete.cc
+++ b/sql/sql_delete.cc
@@ -291,10 +291,10 @@ int mysql_prepare_delete(THD *thd, TABLE_LIST *table_list, Item **conds)
#define MEM_STRIP_BUF_SIZE current_thd->variables.sortbuff_size
-extern "C" int refposcmp2(void* arg, const void *a,const void *b)
+extern "C" int refpos_order_cmp(void* arg, const void *a,const void *b)
{
- /* arg is a pointer to file->ref_length */
- return memcmp(a,b, *(int*) arg);
+ handler *file= (handler*)arg;
+ return file->cmp_ref((const byte*)a, (const byte*)b);
}
multi_delete::multi_delete(THD *thd_arg, TABLE_LIST *dt,
@@ -358,8 +358,8 @@ multi_delete::initialize_tables(JOIN *join)
for (walk=walk->next ; walk ; walk=walk->next)
{
TABLE *table=walk->table;
- *tempfiles_ptr++= new Unique (refposcmp2,
- (void *) &table->file->ref_length,
+ *tempfiles_ptr++= new Unique (refpos_order_cmp,
+ (void *) table->file,
table->file->ref_length,
MEM_STRIP_BUF_SIZE);
}
diff --git a/sql/sql_select.cc b/sql/sql_select.cc
index 9d6a6999591..89a08d5a4ea 100644
--- a/sql/sql_select.cc
+++ b/sql/sql_select.cc
@@ -8058,8 +8058,16 @@ test_if_skip_sort_order(JOIN_TAB *tab,ORDER *order,ha_rows select_limit,
}
else if (select && select->quick) // Range found by opt_range
{
- /* assume results are not ordered when index merge is used */
- if (select->quick->get_type() == QUICK_SELECT_I::QS_TYPE_INDEX_MERGE)
+ int quick_type= select->quick->get_type();
+ /*
+ assume results are not ordered when index merge is used
+ TODO: sergeyp: Results of all index merge selects actually are ordered
+ by clustered PK values.
+ */
+
+ if (quick_type == QUICK_SELECT_I::QS_TYPE_INDEX_MERGE ||
+ quick_type == QUICK_SELECT_I::QS_TYPE_ROR_UNION ||
+ quick_type == QUICK_SELECT_I::QS_TYPE_ROR_INTERSECT)
DBUG_RETURN(0);
ref_key= select->quick->index;
ref_key_parts= select->quick->used_key_parts;
@@ -8120,9 +8128,11 @@ test_if_skip_sort_order(JOIN_TAB *tab,ORDER *order,ha_rows select_limit,
*/
if (!select->quick->reverse_sorted())
{
+ int quick_type= select->quick->get_type();
if (table->file->index_flags(ref_key) & HA_NOT_READ_PREFIX_LAST ||
- (select->quick->get_type() ==
- QUICK_SELECT_I::QS_TYPE_INDEX_MERGE))
+ quick_type == QUICK_SELECT_I::QS_TYPE_INDEX_MERGE ||
+ quick_type == QUICK_SELECT_I::QS_TYPE_ROR_INTERSECT ||
+ quick_type == QUICK_SELECT_I::QS_TYPE_ROR_UNION)
DBUG_RETURN(0); // Use filesort
// ORDER BY range_key DESC
@@ -10102,6 +10112,7 @@ static void select_describe(JOIN *join, bool need_tmp_table, bool need_order,
select_result *result=join->result;
Item *item_null= new Item_null();
CHARSET_INFO *cs= &my_charset_latin1;
+ int quick_type;
DBUG_ENTER("select_describe");
DBUG_PRINT("info", ("Select 0x%lx, type %s, message %s",
(ulong)join->select_lex, join->select_lex->type,
@@ -10128,17 +10139,20 @@ static void select_describe(JOIN *join, bool need_tmp_table, bool need_order,
{
JOIN_TAB *tab=join->join_tab+i;
TABLE *table=tab->table;
- char buff[512],*buff_ptr=buff;
+ char buff[512];
char buff1[512], buff2[512], buff3[512];
char keylen_str_buf[64];
char derived_name[64];
+ String extra(buff, sizeof(buff),cs);
String tmp1(buff1,sizeof(buff1),cs);
String tmp2(buff2,sizeof(buff2),cs);
String tmp3(buff3,sizeof(buff3),cs);
+ extra.length(0);
tmp1.length(0);
tmp2.length(0);
tmp3.length(0);
+ quick_type= -1;
item_list.empty();
item_list.push_back(new Item_int((int32)
join->select_lex->select_number));
@@ -10147,8 +10161,10 @@ static void select_describe(JOIN *join, bool need_tmp_table, bool need_order,
cs));
if (tab->type == JT_ALL && tab->select && tab->select->quick)
{
- if (tab->select->quick->get_type() ==
- QUICK_SELECT_I::QS_TYPE_INDEX_MERGE)
+ quick_type= tab->select->quick->get_type();
+ if ((quick_type == QUICK_SELECT_I::QS_TYPE_INDEX_MERGE) ||
+ (quick_type == QUICK_SELECT_I::QS_TYPE_ROR_INTERSECT) ||
+ (quick_type == QUICK_SELECT_I::QS_TYPE_ROR_UNION))
tab->type = JT_INDEX_MERGE;
else
tab->type = JT_RANGE;
@@ -10169,6 +10185,7 @@ static void select_describe(JOIN *join, bool need_tmp_table, bool need_order,
strlen(join_type_str[tab->type]),
cs));
uint j;
+ /* Build "possible_keys" value and add it to item_list */
if (!tab->keys.is_clear_all())
{
for (j=0 ; j < table->keys ; j++)
@@ -10185,6 +10202,8 @@ static void select_describe(JOIN *join, bool need_tmp_table, bool need_order,
item_list.push_back(new Item_string(tmp1.ptr(),tmp1.length(),cs));
else
item_list.push_back(item_null);
+
+ /* Build "key", "key_len", and "ref" values and add them to item_list */
if (tab->ref.key_parts)
{
KEY *key_info=table->key_info+ tab->ref.key;
@@ -10219,48 +10238,9 @@ static void select_describe(JOIN *join, bool need_tmp_table, bool need_order,
}
else if (tab->select && tab->select->quick)
{
- if (tab->select->quick->get_type() ==
- QUICK_SELECT_I::QS_TYPE_INDEX_MERGE)
- {
- QUICK_INDEX_MERGE_SELECT *quick_imerge=
- (QUICK_INDEX_MERGE_SELECT*)tab->select->quick;
- QUICK_RANGE_SELECT *quick;
-
- List_iterator_fast<QUICK_RANGE_SELECT> it(quick_imerge->
- quick_selects);
- while ((quick= it++))
- {
- KEY *key_info= table->key_info + quick->index;
- register uint length;
- if (tmp3.length())
- tmp3.append(',');
-
- tmp3.append(key_info->name);
-
- if (tmp2.length())
- tmp2.append(',');
-
- length= longlong2str(quick->max_used_key_length, keylen_str_buf,
- 10) -
- keylen_str_buf;
-
- tmp2.append(keylen_str_buf, length);
- }
- }
- else
- {
- KEY *key_info= table->key_info + tab->select->quick->index;
- register uint length;
- tmp3.append(key_info->name);
-
- length= longlong2str(tab->select->quick->max_used_key_length,
- keylen_str_buf, 10) -
- keylen_str_buf;
- tmp2.append(keylen_str_buf, length);
- }
-
- item_list.push_back(new Item_string(tmp3.ptr(),tmp3.length(),cs));
+ tab->select->quick->add_keys_and_lengths(&tmp2, &tmp3);
item_list.push_back(new Item_string(tmp2.ptr(),tmp2.length(),cs));
+ item_list.push_back(new Item_string(tmp3.ptr(),tmp3.length(),cs));
item_list.push_back(item_null);
}
else
@@ -10269,50 +10249,68 @@ static void select_describe(JOIN *join, bool need_tmp_table, bool need_order,
item_list.push_back(item_null);
item_list.push_back(item_null);
}
+ /* Add "rows" field to item_list. */
item_list.push_back(new Item_int((longlong) (ulonglong)
join->best_positions[i]. records_read,
21));
+ /* Build "Extra" field and add it to item_list. */
my_bool key_read=table->key_read;
if ((tab->type == JT_NEXT || tab->type == JT_CONST) &&
table->used_keys.is_set(tab->index))
key_read=1;
-
+ if (quick_type == QUICK_SELECT_I::QS_TYPE_ROR_INTERSECT &&
+ !((QUICK_ROR_INTERSECT_SELECT*)tab->select->quick)->need_to_fetch_row)
+ key_read=1;
+
if (tab->info)
item_list.push_back(new Item_string(tab->info,strlen(tab->info),cs));
else
{
+ if (quick_type == QUICK_SELECT_I::QS_TYPE_ROR_UNION ||
+ quick_type == QUICK_SELECT_I::QS_TYPE_ROR_INTERSECT ||
+ quick_type == QUICK_SELECT_I::QS_TYPE_INDEX_MERGE)
+ {
+ extra.append("; Using ");
+ tab->select->quick->add_info_string(&extra);
+ }
if (tab->select)
{
if (tab->use_quick == 2)
{
char buf[MAX_KEY/8+1];
- sprintf(buff_ptr,"; Range checked for each record (index map: 0x%s)",
- tab->keys.print(buf));
- buff_ptr=strend(buff_ptr);
+ extra.append("; Range checked for each record (index map: 0x");
+ extra.append(tab->keys.print(buf));
+ extra.append(')');
}
else
- buff_ptr=strmov(buff_ptr,"; Using where");
+ extra.append("; Using where");
}
if (key_read)
- buff_ptr= strmov(buff_ptr,"; Using index");
+ extra.append("; Using index");
if (table->reginfo.not_exists_optimize)
- buff_ptr= strmov(buff_ptr,"; Not exists");
+ extra.append("; Not exists");
if (need_tmp_table)
{
need_tmp_table=0;
- buff_ptr= strmov(buff_ptr,"; Using temporary");
+ extra.append("; Using temporary");
}
if (need_order)
{
need_order=0;
- buff_ptr= strmov(buff_ptr,"; Using filesort");
+ extra.append("; Using filesort");
}
if (distinct & test_all_bits(used_tables,thd->used_tables))
- buff_ptr= strmov(buff_ptr,"; Distinct");
- if (buff_ptr == buff)
- buff_ptr+= 2; // Skip inital "; "
- item_list.push_back(new Item_string(buff+2,(uint) (buff_ptr - buff)-2,
- cs));
+ extra.append("; Distinct");
+
+ /* Skip initial "; "*/
+ const char *str= extra.ptr();
+ uint32 len= extra.length();
+ if (len)
+ {
+ str += 2;
+ len -= 2;
+ }
+ item_list.push_back(new Item_string(str, len, cs));
}
// For next iteration
used_tables|=table->map;
diff --git a/sql/sql_select.h b/sql/sql_select.h
index b8a56fda757..b2864c166ce 100644
--- a/sql/sql_select.h
+++ b/sql/sql_select.h
@@ -339,7 +339,7 @@ uint find_shortest_key(TABLE *table, const key_map *usable_keys);
int opt_sum_query(TABLE_LIST *tables, List<Item> &all_fields,COND *conds);
/* from sql_delete.cc, used by opt_range.cc */
-extern "C" int refposcmp2(void* arg, const void *a,const void *b);
+extern "C" int refpos_order_cmp(void* arg, const void *a,const void *b);
/* class to copying an field/item to a key struct */
diff --git a/sql/sql_test.cc b/sql/sql_test.cc
index 90a43f82a94..5c467402497 100644
--- a/sql/sql_test.cc
+++ b/sql/sql_test.cc
@@ -182,37 +182,8 @@ TEST_join(JOIN *join)
tab->select->quick_keys.print(buf));
else if (tab->select->quick)
{
- int quick_type= tab->select->quick->get_type();
- if ((quick_type == QUICK_SELECT_I::QS_TYPE_RANGE) ||
- (quick_type == QUICK_SELECT_I::QS_TYPE_RANGE_DESC))
- {
- fprintf(DBUG_FILE,
- " quick select used on key %s, length: %d\n",
- form->key_info[tab->select->quick->index].name,
- tab->select->quick->max_used_key_length);
- }
- else if (quick_type == QUICK_SELECT_I::QS_TYPE_INDEX_MERGE)
- {
- QUICK_INDEX_MERGE_SELECT *quick_imerge=
- (QUICK_INDEX_MERGE_SELECT*)tab->select->quick;
- QUICK_RANGE_SELECT *quick;
- fprintf(DBUG_FILE,
- " index_merge quick select used\n");
-
- List_iterator_fast<QUICK_RANGE_SELECT> it(quick_imerge->quick_selects);
- while ((quick = it++))
- {
- fprintf(DBUG_FILE,
- " range quick select: key %s, length: %d\n",
- form->key_info[quick->index].name,
- quick->max_used_key_length);
- }
- }
- else
- {
- fprintf(DBUG_FILE,
- " quick select of unknown nature used\n");
- }
+ fprintf(DBUG_FILE, " quick select used:\n");
+ tab->select->quick->dbug_dump(18, false);
}
else
VOID(fputs(" select used\n",DBUG_FILE));
diff --git a/sql/sql_update.cc b/sql/sql_update.cc
index 5c6ed023485..b85ddff84f9 100644
--- a/sql/sql_update.cc
+++ b/sql/sql_update.cc
@@ -156,18 +156,11 @@ int mysql_update(THD *thd,
}
init_ftfuncs(thd, &thd->lex->select_lex, 1);
/* Check if we are modifying a key that we are used to search with */
+
if (select && select->quick)
{
- if (select->quick->get_type() != QUICK_SELECT_I::QS_TYPE_INDEX_MERGE)
- {
- used_index= select->quick->index;
- used_key_is_modified= (!select->quick->unique_key_range() &&
- check_if_key_used(table,used_index,fields));
- }
- else
- {
- used_key_is_modified= true;
- }
+ used_key_is_modified= (!select->quick->unique_key_range() &&
+ select->quick->check_if_keys_used(&fields));
}
else if ((used_index=table->file->key_used_on_scan) < MAX_KEY)
used_key_is_modified=check_if_key_used(table, used_index, fields);
@@ -226,7 +219,7 @@ int mysql_update(THD *thd,
if (open_cached_file(&tempfile, mysql_tmpdir,TEMP_PREFIX,
DISK_BUFFER_SIZE, MYF(MY_WME)))
goto err;
-
+
/* If quick select is used, initialize it before retrieving rows. */
if (select && select->quick && select->quick->reset())
goto err;
@@ -286,6 +279,9 @@ int mysql_update(THD *thd,
if (handle_duplicates == DUP_IGNORE)
table->file->extra(HA_EXTRA_IGNORE_DUP_KEY);
+
+ if (select && select->quick && select->quick->reset())
+ goto err;
init_read_record(&info,thd,table,select,0,1);
updated= found= 0;
@@ -833,26 +829,7 @@ static bool safe_update_on_fly(JOIN_TAB *join_tab, List<Item> *fields)
case JT_ALL:
/* If range search on index */
if (join_tab->quick)
- {
- if (join_tab->quick->get_type() != QUICK_SELECT_I::QS_TYPE_INDEX_MERGE)
- {
- return !check_if_key_used(table,join_tab->quick->index,*fields);
- }
- else
- {
- QUICK_INDEX_MERGE_SELECT *qsel_imerge=
- (QUICK_INDEX_MERGE_SELECT*)(join_tab->quick);
- List_iterator_fast<QUICK_RANGE_SELECT> it(qsel_imerge->quick_selects);
- QUICK_RANGE_SELECT *quick;
- while ((quick= it++))
- {
- if (check_if_key_used(table, quick->index, *fields))
- return 0;
- }
- return 1;
- }
- }
-
+ return !join_tab->quick->check_if_keys_used(fields);
/* If scanning in clustered key */
if ((table->file->table_flags() & HA_PRIMARY_KEY_IN_READ_INDEX) &&
table->primary_key < MAX_KEY)