diff options
author | unknown <sergefp@mysql.com> | 2004-05-29 20:55:46 +0400 |
---|---|---|
committer | unknown <sergefp@mysql.com> | 2004-05-29 20:55:46 +0400 |
commit | b7130450950898892f14f2a1fef462ecac49e92a (patch) | |
tree | 0107d5b2c95123b7a602a9f396d6aca05ce21a54 /sql | |
parent | b0921b1f5a1f610b04641a83d0d7b21e3fe46d84 (diff) | |
parent | 2164db3c8c4c086dcc16d73baf7557a344d82129 (diff) | |
download | mariadb-git-b7130450950898892f14f2a1fef462ecac49e92a.tar.gz |
Manual merge
include/my_base.h:
Auto merged
innobase/include/row0mysql.h:
Auto merged
innobase/row/row0sel.c:
Auto merged
mysql-test/r/index_merge.result:
Auto merged
mysql-test/r/index_merge_innodb.result:
Auto merged
sql/ha_berkeley.cc:
Auto merged
sql/ha_berkeley.h:
Auto merged
sql/ha_heap.h:
Auto merged
sql/ha_innodb.cc:
Auto merged
sql/ha_innodb.h:
Auto merged
sql/handler.h:
Auto merged
sql/sql_delete.cc:
Auto merged
sql/sql_select.cc:
Auto merged
sql/sql_select.h:
Auto merged
sql/sql_test.cc:
Auto merged
sql/sql_update.cc:
Auto merged
sql/opt_range.cc:
Hand merged
sql/opt_range.h:
Hand merged
Diffstat (limited to 'sql')
-rw-r--r-- | sql/ha_berkeley.cc | 25 | ||||
-rw-r--r-- | sql/ha_berkeley.h | 1 | ||||
-rw-r--r-- | sql/ha_heap.h | 7 | ||||
-rw-r--r-- | sql/ha_innodb.cc | 57 | ||||
-rw-r--r-- | sql/ha_innodb.h | 1 | ||||
-rw-r--r-- | sql/handler.h | 5 | ||||
-rw-r--r-- | sql/opt_range.cc | 3071 | ||||
-rw-r--r-- | sql/opt_range.h | 268 | ||||
-rw-r--r-- | sql/sql_delete.cc | 10 | ||||
-rw-r--r-- | sql/sql_select.cc | 122 | ||||
-rw-r--r-- | sql/sql_select.h | 2 | ||||
-rw-r--r-- | sql/sql_test.cc | 33 | ||||
-rw-r--r-- | sql/sql_update.cc | 39 |
13 files changed, 3094 insertions, 547 deletions
diff --git a/sql/ha_berkeley.cc b/sql/ha_berkeley.cc index df5a45480c6..9f2039d4b69 100644 --- a/sql/ha_berkeley.cc +++ b/sql/ha_berkeley.cc @@ -2500,4 +2500,29 @@ ha_rows ha_berkeley::estimate_number_of_rows() return share->rows + HA_BERKELEY_EXTRA_ROWS; } +int ha_berkeley::cmp_ref(const byte *ref1, const byte *ref2) +{ + if (hidden_primary_key) + return memcmp(ref1, ref2, BDB_HIDDEN_PRIMARY_KEY_LENGTH); + + int result; + Field *field; + KEY *key_info=table->key_info+table->primary_key; + KEY_PART_INFO *key_part=key_info->key_part; + KEY_PART_INFO *end=key_part+key_info->key_parts; + + for (; key_part != end; key_part++) + { + field= key_part->field; + result= field->pack_cmp((const char*)ref1, (const char*)ref2, + key_part->length); + if (result) + return result; + ref1 += field->packed_col_length((const char*)ref1, key_part->length); + ref2 += field->packed_col_length((const char*)ref2, key_part->length); + } + + return 0; +} + #endif /* HAVE_BERKELEY_DB */ diff --git a/sql/ha_berkeley.h b/sql/ha_berkeley.h index 525c82febd0..f053fd84046 100644 --- a/sql/ha_berkeley.h +++ b/sql/ha_berkeley.h @@ -168,6 +168,7 @@ class ha_berkeley: public handler void print_error(int error, myf errflag); uint8 table_cache_type() { return HA_CACHE_TBL_TRANSACT; } bool primary_key_is_clustered() { return true; } + int cmp_ref(const byte *ref1, const byte *ref2); }; extern bool berkeley_shared_data; diff --git a/sql/ha_heap.h b/sql/ha_heap.h index 68406202c76..ffe448a6b6c 100644 --- a/sql/ha_heap.h +++ b/sql/ha_heap.h @@ -93,5 +93,10 @@ class ha_heap: public handler THR_LOCK_DATA **store_lock(THD *thd, THR_LOCK_DATA **to, enum thr_lock_type lock_type); - + int cmp_ref(const byte *ref1, const byte *ref2) + { + HEAP_PTR ptr1=*(HEAP_PTR*)ref1; + HEAP_PTR ptr2=*(HEAP_PTR*)ref2; + return ptr1 < ptr2? -1 : (ptr1 > ptr2? 1 : 0); + } }; diff --git a/sql/ha_innodb.cc b/sql/ha_innodb.cc index 4c4c32691e5..c3862c17994 100644 --- a/sql/ha_innodb.cc +++ b/sql/ha_innodb.cc @@ -701,6 +701,8 @@ ha_innobase::init_table_handle_for_HANDLER(void) prebuilt->read_just_key = FALSE; prebuilt->used_in_HANDLER = TRUE; + + prebuilt->keep_other_fields_on_keyread = FALSE; } /************************************************************************* @@ -4540,9 +4542,11 @@ ha_innobase::extra( if (prebuilt->blob_heap) { row_mysql_prebuilt_free_blob_heap(prebuilt); } + prebuilt->keep_other_fields_on_keyread = 0; prebuilt->read_just_key = 0; break; case HA_EXTRA_RESET_STATE: + prebuilt->keep_other_fields_on_keyread = 0; prebuilt->read_just_key = 0; break; case HA_EXTRA_NO_KEYREAD: @@ -4561,6 +4565,9 @@ ha_innobase::extra( case HA_EXTRA_KEYREAD: prebuilt->read_just_key = 1; break; + case HA_EXTRA_KEYREAD_PRESERVE_FIELDS: + prebuilt->keep_other_fields_on_keyread = 1; + break; default:/* Do nothing */ ; } @@ -4609,6 +4616,7 @@ ha_innobase::start_stmt( prebuilt->sql_stat_start = TRUE; prebuilt->hint_need_to_fetch_extra_cols = 0; prebuilt->read_just_key = 0; + prebuilt->keep_other_fields_on_keyread = FALSE; if (!prebuilt->mysql_has_locked) { /* This handle is for a temporary table created inside @@ -4686,6 +4694,7 @@ ha_innobase::external_lock( prebuilt->hint_need_to_fetch_extra_cols = 0; prebuilt->read_just_key = 0; + prebuilt->keep_other_fields_on_keyread = FALSE; if (lock_type == F_WRLCK) { @@ -5122,4 +5131,52 @@ ha_innobase::get_auto_increment() return(nr); } +int +ha_innobase::cmp_ref( + const mysql_byte *ref1, + const mysql_byte *ref2) +{ + row_prebuilt_t* prebuilt = (row_prebuilt_t*) innobase_prebuilt; + enum_field_types mysql_type; + Field* field; + int result; + + if (prebuilt->clust_index_was_generated) + return memcmp(ref1, ref2, DATA_ROW_ID_LEN); + + /* Do type-aware comparison of Primary Key members. PK members + are always NOT NULL, so no checks for NULL are performed */ + KEY_PART_INFO *key_part= table->key_info[table->primary_key].key_part; + KEY_PART_INFO *key_part_end= + key_part + table->key_info[table->primary_key].key_parts; + for (; key_part != key_part_end; ++key_part) { + field = key_part->field; + mysql_type = field->type(); + if (mysql_type == FIELD_TYPE_TINY_BLOB + || mysql_type == FIELD_TYPE_MEDIUM_BLOB + || mysql_type == FIELD_TYPE_BLOB + || mysql_type == FIELD_TYPE_LONG_BLOB) { + + ut_a(!ref1[1]); + ut_a(!ref2[1]); + byte len1= *ref1; + byte len2= *ref2; + ref1 += 2; + ref2 += 2; + result = + ((Field_blob*)field)->cmp((const char*)ref1, len1, + (const char*)ref2, len2); + } else { + result = + field->cmp((const char*)ref1, (const char*)ref2); + } + + if (result) + return result; + ref1 += key_part->length; + ref2 += key_part->length; + } + return 0; +} + #endif /* HAVE_INNOBASE_DB */ diff --git a/sql/ha_innodb.h b/sql/ha_innodb.h index 62f5e85b5d4..2160d40f5ad 100644 --- a/sql/ha_innodb.h +++ b/sql/ha_innodb.h @@ -188,6 +188,7 @@ class ha_innobase: public handler longlong get_auto_increment(); uint8 table_cache_type() { return HA_CACHE_TBL_ASKTRANSACT; } bool primary_key_is_clustered() { return true; } + int cmp_ref(const byte *ref1, const byte *ref2); }; extern uint innobase_init_flags, innobase_lock_type; diff --git a/sql/handler.h b/sql/handler.h index 15fc6a201d8..00e0ad8bcd0 100644 --- a/sql/handler.h +++ b/sql/handler.h @@ -447,6 +447,11 @@ public: false otherwise */ virtual bool primary_key_is_clustered() { return false; } + + virtual int cmp_ref(const byte *ref1, const byte *ref2) + { + return memcmp(ref1, ref2, ref_length); + } }; /* Some extern variables used with handlers */ diff --git a/sql/opt_range.cc b/sql/opt_range.cc index 28b9cd79fdd..e86a389ccb9 100644 --- a/sql/opt_range.cc +++ b/sql/opt_range.cc @@ -23,6 +23,19 @@ */ +/* + Classes in this file are used in the following way: + 1. For a selection condition a tree of SEL_IMERGE/SEL_TREE/SEL_ARG objects + is created. #of rows in table and index statistics are ignored at this + step. + 2. Created SEL_TREE and index stats data are used to construct a + TABLE_READ_PLAN-derived object (TRP_*). Several 'candidate' table read + plans may be created. + 3. The least expensive table read plan is used to create a tree of + QUICK_SELECT_I-derived objects which are later used for row retrieval. + QUICK_RANGEs are also created in this step. +*/ + #ifdef __GNUC__ #pragma implementation // gcc: Class implementation #endif @@ -167,8 +180,7 @@ public: min_value=arg->max_value; min_flag=arg->max_flag & NEAR_MAX ? 0 : NEAR_MIN; } - void store(uint length,char **min_key,uint min_key_flag, - char **max_key, uint max_key_flag) + void store_min(uint length,char **min_key,uint min_key_flag) { if ((min_flag & GEOM_FLAG) || (!(min_flag & NO_MIN_RANGE) && @@ -183,6 +195,11 @@ public: memcpy(*min_key,min_value,length); (*min_key)+= length; } + } + void store(uint length,char **min_key,uint min_key_flag, + char **max_key, uint max_key_flag) + { + store_min(length, min_key, min_key_flag); if (!(max_flag & NO_MAX_RANGE) && !(max_key_flag & (NO_MAX_RANGE | NEAR_MAX))) { @@ -269,6 +286,7 @@ public: class SEL_IMERGE; + class SEL_TREE :public Sql_alloc { public: @@ -280,28 +298,63 @@ public: bzero((char*) keys,sizeof(keys)); } SEL_ARG *keys[MAX_KEY]; - key_map keys_map; /* bitmask of non-NULL elements in keys */ - List<SEL_IMERGE> merges; /* possible ways to read rows using index_merge */ + key_map keys_map; /* bitmask of non-NULL elements in keys */ + + /* + Possible ways to read rows using index_merge. The list is non-empty only + if type==KEY. Currently can be non empty only if keys_map.is_clear_all(). + */ + List<SEL_IMERGE> merges; + + /* The members below are filled/used only after get_mm_tree is done */ + key_map ror_scans_map; /* bitmask of ROR scan-able elements in keys */ + uint n_ror_scans; /* number of set bits in ror_scans_map */ + + struct st_ror_scan_info **ror_scans; /* list of ROR key scans */ + struct st_ror_scan_info **ror_scans_end; /* last ROR scan */ + /* Note that #records for each key scan is stored in table->quick_rows */ }; typedef struct st_qsel_param { THD *thd; TABLE *table; - KEY_PART *key_parts,*key_parts_end,*key[MAX_KEY]; + KEY_PART *key_parts,*key_parts_end; + KEY_PART *key[MAX_KEY]; /* First key parts of keys used in the query */ MEM_ROOT *mem_root; table_map prev_tables,read_tables,current_table; - uint baseflag, keys, max_key_part, range_count; - uint real_keynr[MAX_KEY]; + uint baseflag, max_key_part, range_count; + + uint keys; /* number of keys used in the query */ + + /* used_key_no -> table_key_no translation table */ + uint real_keynr[MAX_KEY]; + char min_key[MAX_KEY_LENGTH+MAX_FIELD_WIDTH], max_key[MAX_KEY_LENGTH+MAX_FIELD_WIDTH]; bool quick; // Don't calulate possible keys COND *cond; + uint fields_bitmap_size; + MY_BITMAP needed_fields; /* bitmask of fields needed by the query */ + + key_map *needed_reg; /* ptr to SQL_SELECT::needed_reg */ + uint *imerge_cost_buff; /* buffer for index_merge cost estimates */ uint imerge_cost_buff_size; /* size of the buffer */ + + /* true if last checked tree->key can be used for ROR-scan */ + bool is_ror_scan; } PARAM; +class TABLE_READ_PLAN; + class TRP_RANGE; + class TRP_ROR_INTERSECT; + class TRP_ROR_UNION; + class TRP_ROR_INDEX_MERGE; + +struct st_ror_scan_info; + static SEL_TREE * get_mm_parts(PARAM *param,COND *cond_func,Field *field, Item_func::Functype type,Item *value, Item_result cmp_type); @@ -309,6 +362,8 @@ static SEL_ARG *get_mm_leaf(PARAM *param,COND *cond_func,Field *field, KEY_PART *key_part, Item_func::Functype type,Item *value); static SEL_TREE *get_mm_tree(PARAM *param,COND *cond); + +static bool is_key_scan_ror(PARAM *param, uint keynr, uint8 nparts); static ha_rows check_quick_select(PARAM *param,uint index,SEL_ARG *key_tree); static ha_rows check_quick_keys(PARAM *param,uint index,SEL_ARG *key_tree, char *min_key,uint min_key_flag, @@ -317,25 +372,37 @@ static ha_rows check_quick_keys(PARAM *param,uint index,SEL_ARG *key_tree, QUICK_RANGE_SELECT *get_quick_select(PARAM *param,uint index, SEL_ARG *key_tree, MEM_ROOT *alloc = NULL); -static int get_quick_select_params(SEL_TREE *tree, PARAM *param, - key_map& needed_reg, - bool index_read_can_be_used, - double *read_time, - ha_rows *records, - SEL_ARG*** key_to_read); +static TRP_RANGE *get_key_scans_params(PARAM *param, SEL_TREE *tree, + bool index_read_must_be_used, + double read_time); +static +TRP_ROR_INTERSECT *get_best_ror_intersect(const PARAM *param, SEL_TREE *tree, + bool force_index_only, + double read_time, + bool *are_all_covering); +static +TRP_ROR_INTERSECT *get_best_covering_ror_intersect(PARAM *param, + SEL_TREE *tree, + double read_time); +static +TABLE_READ_PLAN *get_best_disjunct_quick(PARAM *param, SEL_IMERGE *imerge, + double read_time); static int get_index_merge_params(PARAM *param, key_map& needed_reg, SEL_IMERGE *imerge, double *read_time, ha_rows* imerge_rows); -inline double get_index_only_read_time(PARAM* param, ha_rows records, +inline double get_index_only_read_time(const PARAM* param, ha_rows records, int keynr); #ifndef DBUG_OFF -static void print_quick_sel_imerge(QUICK_INDEX_MERGE_SELECT *quick, - const key_map *needed_reg); -void print_quick_sel_range(QUICK_RANGE_SELECT *quick, - const key_map *needed_reg); - +static void print_sel_tree(PARAM *param, SEL_TREE *tree, key_map *tree_map, + const char *msg); +static void print_ror_scans_arr(TABLE *table, const char *msg, + struct st_ror_scan_info **start, + struct st_ror_scan_info **end); +static void print_rowid(byte* val, int len); +static void print_quick(QUICK_SELECT_I *quick, const key_map *needed_reg); #endif + static SEL_TREE *tree_and(PARAM *param,SEL_TREE *tree1,SEL_TREE *tree2); static SEL_TREE *tree_or(PARAM *param,SEL_TREE *tree1,SEL_TREE *tree2); static SEL_ARG *sel_add(SEL_ARG *key1,SEL_ARG *key2); @@ -549,7 +616,6 @@ int imerge_list_or_list(PARAM *param, RETURN 0 OK, result is stored in *im1. other Error - */ int imerge_list_or_tree(PARAM *param, @@ -642,7 +708,7 @@ QUICK_SELECT_I::QUICK_SELECT_I() QUICK_RANGE_SELECT::QUICK_RANGE_SELECT(THD *thd, TABLE *table, uint key_nr, bool no_alloc, MEM_ROOT *parent_alloc) - :dont_free(0),error(0),cur_range(NULL),range(0) + :dont_free(0),error(0),free_file(0),cur_range(NULL),range(0) { index= key_nr; head= table; @@ -667,12 +733,22 @@ int QUICK_RANGE_SELECT::init() QUICK_RANGE_SELECT::~QUICK_RANGE_SELECT() { + DBUG_ENTER("QUICK_RANGE_SELECT::~QUICK_RANGE_SELECT"); if (!dont_free) { file->index_end(); - delete_dynamic(&ranges); /* ranges are allocated in alloc */ - free_root(&alloc,MYF(0)); - } + file->extra(HA_EXTRA_NO_KEYREAD); + delete_dynamic(&ranges); /* ranges are allocated in alloc */ + if (free_file) + { + DBUG_PRINT("info", ("Freeing separate handler %p (free=%d)", file, + free_file)); + file->reset(); + file->close(); + } + free_root(&alloc,MYF(0)); + } + DBUG_VOID_RETURN; } @@ -681,11 +757,12 @@ QUICK_INDEX_MERGE_SELECT::QUICK_INDEX_MERGE_SELECT(THD *thd_param, :cur_quick_it(quick_selects),pk_quick_select(NULL),unique(NULL), thd(thd_param) { + DBUG_ENTER("QUICK_INDEX_MERGE_SELECT::QUICK_INDEX_MERGE_SELECT"); index= MAX_KEY; head= table; - reset_called= false; bzero(&read_record, sizeof(read_record)); init_sql_alloc(&alloc,1024,0); + DBUG_VOID_RETURN; } int QUICK_INDEX_MERGE_SELECT::init() @@ -699,11 +776,7 @@ int QUICK_INDEX_MERGE_SELECT::reset() { int result; DBUG_ENTER("QUICK_INDEX_MERGE_SELECT::reset"); - if (reset_called) - DBUG_RETURN(0); - - reset_called= true; - result = cur_quick_select->reset() || prepare_unique(); + result= cur_quick_select->reset() || prepare_unique(); DBUG_RETURN(result); } @@ -724,12 +797,338 @@ QUICK_INDEX_MERGE_SELECT::push_quick_back(QUICK_RANGE_SELECT *quick_sel_range) QUICK_INDEX_MERGE_SELECT::~QUICK_INDEX_MERGE_SELECT() { + DBUG_ENTER("QUICK_INDEX_MERGE_SELECT::~QUICK_INDEX_MERGE_SELECT"); delete unique; quick_selects.delete_elements(); delete pk_quick_select; free_root(&alloc,MYF(0)); + DBUG_VOID_RETURN; +} + + +QUICK_ROR_INTERSECT_SELECT::QUICK_ROR_INTERSECT_SELECT(THD *thd_param, + TABLE *table, + bool retrieve_full_rows, + MEM_ROOT *parent_alloc) + : cpk_quick(NULL), thd(thd_param), need_to_fetch_row(retrieve_full_rows) +{ + index= MAX_KEY; + head= table; + record= head->record[0]; + if (!parent_alloc) + init_sql_alloc(&alloc,1024,0); + else + bzero(&alloc, sizeof(MEM_ROOT)); + last_rowid= (byte*)alloc_root(parent_alloc? parent_alloc : &alloc, + head->file->ref_length); +} + + +/* + Do post-constructor initialization. + SYNOPSIS + QUICK_ROR_INTERSECT_SELECT::init() + + RETURN + 0 OK + other Error code +*/ + +int QUICK_ROR_INTERSECT_SELECT::init() +{ + /* Check if last_rowid was successfully allocated in ctor */ + return !last_rowid; +} + + +/* + Initialize this quick select to be a ROR-merged scan. + + SYNOPSIS + QUICK_RANGE_SELECT::init_ror_merged_scan() + reuse_handler If true, use head->file, otherwise create a separate + handler object + + NOTES + This function creates and prepares for subsequent use a separate handler + object if it can't reuse head->file. The reason for this is that during + ROR-merge several key scans are performed simultaneously, and a single + handler is only capable of preserving context of a single key scan. + + In ROR-merge the quick select doing merge does full records retrieval, + merged quick selects read only keys. + + RETURN + 0 ROR child scan initialized, ok to use. + 1 error +*/ + +int QUICK_RANGE_SELECT::init_ror_merged_scan(bool reuse_handler) +{ + handler *save_file= file; + DBUG_ENTER("QUICK_RANGE_SELECT::init_ror_merged_scan"); + + if (reuse_handler) + { + DBUG_PRINT("info", ("Reusing handler %p", file)); + if (file->extra(HA_EXTRA_KEYREAD) || + file->extra(HA_EXTRA_RETRIEVE_ALL_COLS) | + init() || reset()) + { + DBUG_RETURN(1); + } + else + DBUG_RETURN(0); + } + + /* Create a separate handler object for this quick select */ + if (free_file) + { + /* already have own 'handler' object. */ + DBUG_RETURN(0); + } + + if (!(file= get_new_handler(head, head->db_type))) + goto failure; + DBUG_PRINT("info", ("Allocated new handler %p", file)); + if (file->ha_open(head->path, head->db_stat, HA_OPEN_IGNORE_IF_LOCKED)) + { + /* Caller will free the memory */ + goto failure; + } + + if (file->extra(HA_EXTRA_KEYREAD) || + file->extra(HA_EXTRA_RETRIEVE_ALL_COLS) || + init() || reset()) + { + file->close(); + goto failure; + } + free_file= true; + last_rowid= file->ref; + DBUG_RETURN(0); + +failure: + file= save_file; + DBUG_RETURN(1); +} + + +/* + Initialize this quick select to be a part of a ROR-merged scan. + SYNOPSIS + QUICK_ROR_INTERSECT_SELECT::init_ror_merged_scan() + reuse_handler If true, use head->file, otherwise create separate + handler object. + RETURN + 0 OK + other error code +*/ +int QUICK_ROR_INTERSECT_SELECT::init_ror_merged_scan(bool reuse_handler) +{ + List_iterator_fast<QUICK_RANGE_SELECT> quick_it(quick_selects); + QUICK_RANGE_SELECT* quick; + DBUG_ENTER("QUICK_ROR_INTERSECT_SELECT::init_ror_merged_scan"); + + /* Initialize all merged "children" quick selects */ + DBUG_ASSERT(!(need_to_fetch_row && !reuse_handler)); + if (!need_to_fetch_row && reuse_handler) + { + quick= quick_it++; + /* + There is no use of this->file. Use it for the first of merged range + selects. + */ + if (quick->init_ror_merged_scan(true)) + DBUG_RETURN(1); + quick->file->extra(HA_EXTRA_KEYREAD_PRESERVE_FIELDS); + } + while((quick= quick_it++)) + { + if (quick->init_ror_merged_scan(false)) + DBUG_RETURN(1); + quick->file->extra(HA_EXTRA_KEYREAD_PRESERVE_FIELDS); + /* All merged scans share the same record buffer in intersection. */ + quick->record= head->record[0]; + } + + if (need_to_fetch_row && head->file->rnd_init()) + { + DBUG_PRINT("error", ("ROR index_merge rnd_init call failed")); + DBUG_RETURN(1); + } + DBUG_RETURN(0); +} + + +/* + Initialize quick select for row retrieval. + SYNOPSIS + reset() + RETURN + 0 OK + other Error code +*/ + +int QUICK_ROR_INTERSECT_SELECT::reset() +{ + int result; + DBUG_ENTER("QUICK_ROR_INTERSECT_SELECT::reset"); + result= init_ror_merged_scan(true); + DBUG_RETURN(result); } + +/* + Add a merged quick select to this ROR-intersection quick select. + + SYNOPSIS + QUICK_ROR_INTERSECT_SELECT::push_quick_back() + quick Quick select to be added. The quick select must return + rows in rowid order. + NOTES + This call can only be made before init() is called. + + RETURN + false OK + true Out of memory. +*/ + +bool +QUICK_ROR_INTERSECT_SELECT::push_quick_back(QUICK_RANGE_SELECT *quick) +{ + return quick_selects.push_back(quick); +} + +QUICK_ROR_INTERSECT_SELECT::~QUICK_ROR_INTERSECT_SELECT() +{ + DBUG_ENTER("QUICK_ROR_INTERSECT_SELECT::~QUICK_ROR_INTERSECT_SELECT"); + quick_selects.delete_elements(); + delete cpk_quick; + free_root(&alloc,MYF(0)); + DBUG_VOID_RETURN; +} + +QUICK_ROR_UNION_SELECT::QUICK_ROR_UNION_SELECT(THD *thd_param, + TABLE *table) + : thd(thd_param) +{ + index= MAX_KEY; + head= table; + rowid_length= table->file->ref_length; + record= head->record[0]; + init_sql_alloc(&alloc, thd->variables.range_alloc_block_size, 0); + my_pthread_setspecific_ptr(THR_MALLOC,&alloc); +} + + +/* + Do post-constructor initialization. + SYNOPSIS + QUICK_ROR_UNION_SELECT::init() + + RETURN + 0 OK + other Error code +*/ + +int QUICK_ROR_UNION_SELECT::init() +{ + if (init_queue(&queue, quick_selects.elements, 0, + false , QUICK_ROR_UNION_SELECT::queue_cmp, + (void*) this)) + { + bzero(&queue, sizeof(QUEUE)); + return 1; + } + + if (!(cur_rowid= (byte*)alloc_root(&alloc, 2*head->file->ref_length))) + return 1; + prev_rowid= cur_rowid + head->file->ref_length; + return 0; +} + + +/* + Comparison function to be used QUICK_ROR_UNION_SELECT::queue priority + queue. + + SYNPOSIS + QUICK_ROR_UNION_SELECT::queue_cmp() + arg Pointer to QUICK_ROR_UNION_SELECT + val1 First merged select + val2 Second merged select +*/ +int QUICK_ROR_UNION_SELECT::queue_cmp(void *arg, byte *val1, byte *val2) +{ + QUICK_ROR_UNION_SELECT *self= (QUICK_ROR_UNION_SELECT*)arg; + return self->head->file->cmp_ref(((QUICK_SELECT_I*)val1)->last_rowid, + ((QUICK_SELECT_I*)val2)->last_rowid); +} + + +/* + Initialize quick select for row retrieval. + SYNOPSIS + reset() + + RETURN + 0 OK + other Error code +*/ + +int QUICK_ROR_UNION_SELECT::reset() +{ + QUICK_SELECT_I* quick; + int error; + DBUG_ENTER("QUICK_ROR_UNION_SELECT::reset"); + have_prev_rowid= false; + /* + Initialize scans for merged quick selects and put all merged quick + selects into the queue. + */ + List_iterator_fast<QUICK_SELECT_I> it(quick_selects); + while ((quick= it++)) + { + if (quick->init_ror_merged_scan(false)) + DBUG_RETURN(1); + if ((error= quick->get_next())) + { + if (error == HA_ERR_END_OF_FILE) + continue; + else + DBUG_RETURN(error); + } + quick->save_last_pos(); + queue_insert(&queue, (byte*)quick); + } + + if (head->file->rnd_init()) + { + DBUG_PRINT("error", ("ROR index_merge rnd_init call failed")); + DBUG_RETURN(1); + } + + DBUG_RETURN(0); +} + + +bool +QUICK_ROR_UNION_SELECT::push_quick_back(QUICK_SELECT_I *quick_sel_range) +{ + return quick_selects.push_back(quick_sel_range); +} + +QUICK_ROR_UNION_SELECT::~QUICK_ROR_UNION_SELECT() +{ + DBUG_ENTER("QUICK_ROR_UNION_SELECT::~QUICK_ROR_UNION_SELECT"); + delete_queue(&queue); + quick_selects.delete_elements(); + free_root(&alloc,MYF(0)); + DBUG_VOID_RETURN; +} + + QUICK_RANGE::QUICK_RANGE() :min_key(0),max_key(0),min_length(0),max_length(0), flag(NO_MIN_RANGE | NO_MAX_RANGE) @@ -893,28 +1292,221 @@ SEL_ARG *SEL_ARG::clone_tree() return root; } + +/* + Table rows retrieval plan. Range optimizer creates QUICK_SELECT_I-derived + objects from table read plans. +*/ +class TABLE_READ_PLAN +{ +public: + /* + Plan read cost, with or without cost of full row retrieval, depending + on plan creation parameters. + */ + double read_cost; + ha_rows records; /* estimate of #rows to be examined */ + + /* + If true, the scan returns rows in rowid order. This is used only for + scans that can be both ROR and non-ROR. + */ + bool is_ror; + + /* + Create quick select for this plan. + SYNOPSIS + make_quick() + param Parameter from test_quick_select + retrieve_full_rows If true, created quick select will do full record + retrieval. + parent_alloc Memory pool to use, if any. + + NOTES + retrieve_full_rows is ignored by some implementations. + + RETURN + created quick select + NULL on any error. + */ + virtual QUICK_SELECT_I *make_quick(PARAM *param, + bool retrieve_full_rows, + MEM_ROOT *parent_alloc=NULL) = 0; + + /* Table read plans are allocated on MEM_ROOT and are never deleted */ + static void *operator new(size_t size, MEM_ROOT *mem_root) + { return (void*) alloc_root(mem_root, (uint) size); } + static void operator delete(void *ptr,size_t size) {} +}; + +class TRP_ROR_INTERSECT; +class TRP_ROR_UNION; +class TRP_INDEX_MERGE; + + +/* + Plan for a QUICK_RANGE_SELECT scan. + TRP_RANGE::make_quick ignores retrieve_full_rows parameter because + QUICK_RANGE_SELECT doesn't distinguish between 'index only' scans and full + record retrieval scans. +*/ + +class TRP_RANGE : public TABLE_READ_PLAN +{ +public: + SEL_ARG *key; /* set of intervals to be used in "range" method retrieval */ + uint key_idx; /* key number in PARAM::key */ + + TRP_RANGE(SEL_ARG *key_arg, uint idx_arg) + : key(key_arg), key_idx(idx_arg) + {} + + QUICK_SELECT_I *make_quick(PARAM *param, bool retrieve_full_rows, + MEM_ROOT *parent_alloc) + { + DBUG_ENTER("TRP_RANGE::make_quick"); + QUICK_RANGE_SELECT *quick; + if ((quick= get_quick_select(param, key_idx, key, parent_alloc))) + { + quick->records= records; + quick->read_time= read_cost; + } + DBUG_RETURN(quick); + } +}; + + +/* Plan for QUICK_ROR_INTERSECT_SELECT scan. */ + +class TRP_ROR_INTERSECT : public TABLE_READ_PLAN +{ +public: + QUICK_SELECT_I *make_quick(PARAM *param, bool retrieve_full_rows, + MEM_ROOT *parent_alloc); + + /* Array of pointers to ROR range scans used in this intersection */ + struct st_ror_scan_info **first_scan; + struct st_ror_scan_info **last_scan; /* End of the above array */ + struct st_ror_scan_info *cpk_scan; /* Clustered PK scan, if there is one */ + bool is_covering; /* true if no row retrieval phase is necessary */ + double index_scan_costs; /* SUM(cost(index_scan)) */ +}; + + +/* + Plan for QUICK_ROR_UNION_SELECT scan. + QUICK_ROR_UNION_SELECT always retrieves full rows, so retrieve_full_rows + is ignored by make_quick. +*/ + +class TRP_ROR_UNION : public TABLE_READ_PLAN +{ +public: + QUICK_SELECT_I *make_quick(PARAM *param, bool retrieve_full_rows, + MEM_ROOT *parent_alloc); + TABLE_READ_PLAN **first_ror; /* array of ptrs to plans for merged scans */ + TABLE_READ_PLAN **last_ror; /* end of the above array */ +}; + + +/* + Plan for QUICK_INDEX_MERGE_SELECT scan. + QUICK_ROR_INTERSECT_SELECT always retrieves full rows, so retrieve_full_rows + is ignored by make_quick. +*/ + +class TRP_INDEX_MERGE : public TABLE_READ_PLAN +{ +public: + QUICK_SELECT_I *make_quick(PARAM *param, bool retrieve_full_rows, + MEM_ROOT *parent_alloc); + TRP_RANGE **range_scans; /* array of ptrs to plans of merged scans */ + TRP_RANGE **range_scans_end; /* end of the array */ +}; + + +/* + Fill param->needed_fields with bitmap of fields used in the query. + SYNOPSIS + fill_used_fields_bitmap() + param Parameter from test_quick_select function. + + NOTES + Clustered PK members are not put into the bitmap as they are implicitly + present in all keys (and it is impossible to avoid reading them). + RETURN + 0 Ok + 1 Out of memory. +*/ + +static int fill_used_fields_bitmap(PARAM *param) +{ + TABLE *table= param->table; + param->fields_bitmap_size= (table->fields/8 + 1); + uchar *tmp; + uint pk; + if (!(tmp= (uchar*)alloc_root(param->mem_root,param->fields_bitmap_size)) || + bitmap_init(¶m->needed_fields, tmp, param->fields_bitmap_size*8, + false)) + return 1; + + bitmap_clear_all(¶m->needed_fields); + for (uint i= 0; i < table->fields; i++) + { + if (param->thd->query_id == table->field[i]->query_id) + bitmap_set_bit(¶m->needed_fields, i+1); + } + + pk= param->table->primary_key; + if (param->table->file->primary_key_is_clustered() && pk != MAX_KEY) + { + /* The table uses clustered PK and it is not internally generated */ + KEY_PART_INFO *key_part= param->table->key_info[pk].key_part; + KEY_PART_INFO *key_part_end= key_part + + param->table->key_info[pk].key_parts; + for(;key_part != key_part_end; ++key_part) + { + bitmap_clear_bit(¶m->needed_fields, key_part->fieldnr); + } + } + return 0; +} + + /* - Test if a key can be used in different ranges + Test if a key can be used in different ranges SYNOPSIS - SQL_SELECT::test_quick_select(thd,keys_to_use, prev_tables, - limit, force_quick_range) - - Updates the following in the select parameter: - needed_reg - Bits for keys with may be used if all prev regs are read - quick - Parameter to use when reading records. - In the table struct the following information is updated: - quick_keys - Which keys can be used - quick_rows - How many rows the key matches - - RETURN VALUES - -1 if impossible select - 0 if can't use quick_select - 1 if found usable range - - TODO - check if the function really needs to modify keys_to_use, and change the - code to pass it by reference if not + SQL_SELECT::test_quick_select() + thd Current thread + keys_to_use Keys to use for range retrieval + prev_tables Tables assumed to be already read when the scan is + performed (but not read at the moment of this call) + limit Query limit + force_quick_range Prefer to use range (instead of full table scan) even + if it is more expensive. + + NOTES + Updates the following in the select parameter: + needed_reg - Bits for keys with may be used if all prev regs are read + quick - Parameter to use when reading records. + + In the table struct the following information is updated: + quick_keys - Which keys can be used + quick_rows - How many rows the key matches + + TODO + Check if this function really needs to modify keys_to_use, and change the + code to pass it by reference if it doesn't. + + In addition to force_quick_range other means can be (an usually are) used + to make this function prefer range over full table scan. Figure out if + force_quick_range is really needed. + + RETURN + -1 if impossible select (i.e. certainly no rows will be selected) + 0 if can't use quick_select + 1 if found usable ranges and quick select has been successfully created. */ int SQL_SELECT::test_quick_select(THD *thd, key_map keys_to_use, @@ -924,8 +1516,8 @@ int SQL_SELECT::test_quick_select(THD *thd, key_map keys_to_use, uint basflag; uint idx; double scan_time; - QUICK_INDEX_MERGE_SELECT *quick_imerge= NULL; DBUG_ENTER("test_quick_select"); + printf("\nQUERY: %s\n", thd->query); DBUG_PRINT("enter",("keys_to_use: %lu prev_tables: %lu const_tables: %lu", keys_to_use.to_ulonglong(), (ulong) prev_tables, (ulong) const_tables)); @@ -971,12 +1563,16 @@ int SQL_SELECT::test_quick_select(THD *thd, key_map keys_to_use, param.table=head; param.keys=0; param.mem_root= &alloc; + param.needed_reg= &needed_reg; + param.imerge_cost_buff_size= 0; + thd->no_errors=1; // Don't warn about NULL init_sql_alloc(&alloc, thd->variables.range_alloc_block_size, 0); if (!(param.key_parts = (KEY_PART*) alloc_root(&alloc, sizeof(KEY_PART)* - head->key_parts))) + head->key_parts)) + || fill_used_fields_bitmap(¶m)) { thd->no_errors=0; free_root(&alloc,MYF(0)); // Return memory & allocator @@ -985,7 +1581,11 @@ int SQL_SELECT::test_quick_select(THD *thd, key_map keys_to_use, key_parts= param.key_parts; old_root=my_pthread_getspecific_ptr(MEM_ROOT*,THR_MALLOC); my_pthread_setspecific_ptr(THR_MALLOC,&alloc); - + + /* + Make an array with description of all key parts of all table keys. + This is used in get_mm_parts function. + */ key_info= head->key_info; for (idx=0 ; idx < head->keys ; idx++, key_info++) { @@ -1013,165 +1613,113 @@ int SQL_SELECT::test_quick_select(THD *thd, key_map keys_to_use, } param.key_parts_end=key_parts; + if ((tree=get_mm_tree(¶m,cond))) { if (tree->type == SEL_TREE::IMPOSSIBLE) { - records=0L; // Return -1 from this function + records=0L; // Return -1 from this function read_time= (double) HA_POS_ERROR; } else if (tree->type == SEL_TREE::KEY || tree->type == SEL_TREE::KEY_SMALLER) { - /* + TABLE_READ_PLAN *best_trp; + /* It is possible to use a quick select (but maybe it would be slower than 'all' table scan). */ - SEL_ARG **best_key= 0; - ha_rows found_records; - double found_read_time= read_time; - - if (!get_quick_select_params(tree, ¶m, needed_reg, false, - &found_read_time, &found_records, - &best_key)) + if (tree->merges.is_empty()) { + double best_read_time= read_time; + TRP_ROR_INTERSECT *new_trp; + bool can_build_covering= false; + + /* Get best 'range' plan and prepare data for making other plans */ + if ((best_trp= get_key_scans_params(¶m, tree, false, + best_read_time))) + best_read_time= best_trp->read_cost; + /* - Ok, quick select is better than 'all' table scan and we have its - parameters, so construct it. + Simultaneous key scans and row deletes on several handler + objects are not allowed so don't use ROR-intersection for + table deletes. */ - read_time= found_read_time; - records= found_records; - - if ((quick= get_quick_select(¶m,(uint) (best_key-tree->keys), - *best_key)) && (!quick->init())) + if (thd->lex->sql_command != SQLCOM_DELETE) { - quick->records= records; - quick->read_time= read_time; + /* + Get best non-covering ROR-intersection plan and prepare data for + building covering ROR-intersection. + */ + if ((new_trp= get_best_ror_intersect(¶m, tree, false, + best_read_time, + &can_build_covering))) + { + best_trp= new_trp; + best_read_time= best_trp->read_cost; + } + + /* + Try constructing covering ROR-intersect only if it looks possible + and worth doing. + */ + if (new_trp && !new_trp->is_covering && can_build_covering && + (new_trp= get_best_covering_ror_intersect(¶m, tree, + best_read_time))) + best_trp= new_trp; } } - - /* - Btw, tree type SEL_TREE::INDEX_MERGE was not introduced - intentionally. - */ - - /* If no range select could be built, try using index_merge. */ - if (!quick && !tree->merges.is_empty()) + else { - DBUG_PRINT("info",("No range reads possible," - " trying to construct index_merge")); + /* Try creating index_merge/ROR-union scan. */ SEL_IMERGE *imerge; - SEL_IMERGE *min_imerge= NULL; - double min_imerge_read_time; - ha_rows min_imerge_records; - LINT_INIT(min_imerge_records); // Protected by min_imerge - + TABLE_READ_PLAN *best_conj_trp= NULL, *new_conj_trp; + LINT_INIT(new_conj_trp); /* no empty index_merge lists possible */ + + /* Calculate cost of full index read for the shortest covering index */ if (!head->used_keys.is_clear_all()) { int key_for_use= find_shortest_key(head, &head->used_keys); - ha_rows total_table_records= (0 == head->file->records)? 1 : - head->file->records; - read_time = get_index_only_read_time(¶m, total_table_records, - key_for_use); - DBUG_PRINT("info", - ("'all' scan will be using key %d, read time %g", - key_for_use, read_time)); + double key_read_time= get_index_only_read_time(¶m, records, + key_for_use); + DBUG_PRINT("info", ("'all'+'using index' scan will be using key %d, " + "read time %g", key_for_use, key_read_time)); + if (key_read_time < read_time) + read_time= key_read_time; } - min_imerge_read_time=read_time; - /* - Ok, read_time contains best 'all' read time. - Now look for index_merge with cost < read_time - */ + DBUG_PRINT("info",("No range reads possible," + " trying to construct index_merge")); List_iterator_fast<SEL_IMERGE> it(tree->merges); while ((imerge= it++)) { - if (!get_index_merge_params(¶m, needed_reg, imerge, - &min_imerge_read_time, - &min_imerge_records)) - min_imerge= imerge; - } - - if (!min_imerge) - goto end_free; - - records= min_imerge_records; - - /* Ok, using index_merge is the best option, so construct it. */ - if (!(quick= quick_imerge= new QUICK_INDEX_MERGE_SELECT(thd, head))) - goto end_free; - - quick->records= min_imerge_records; - quick->read_time= min_imerge_read_time; - - my_pthread_setspecific_ptr(THR_MALLOC, &quick_imerge->alloc); - - QUICK_RANGE_SELECT *new_quick; - for (SEL_TREE **ptree = min_imerge->trees; - ptree != min_imerge->trees_next; - ptree++) - { - SEL_ARG **tree_best_key= - min_imerge->best_keys[ptree - min_imerge->trees]; - if ((new_quick= get_quick_select(¶m, - (uint)(tree_best_key- - (*ptree)->keys), - *tree_best_key, - &quick_imerge->alloc))) - { - new_quick->records= min_imerge_records; - new_quick->read_time= min_imerge_read_time; - /* - QUICK_RANGE_SELECT::QUICK_RANGE_SELECT leaves THR_MALLOC - pointing to its allocator, restore it back. - */ - quick_imerge->last_quick_select= new_quick; - - if (quick_imerge->push_quick_back(new_quick)) - { - delete new_quick; - delete quick; - quick= quick_imerge= NULL; - goto end_free; - } - } - else - { - delete quick; - quick= quick_imerge= NULL; - goto end_free; - } + new_conj_trp= get_best_disjunct_quick(¶m, imerge, read_time); + if (!best_conj_trp || (new_conj_trp && new_conj_trp->read_cost < + best_conj_trp->read_cost)) + best_conj_trp= new_conj_trp; } + best_trp= best_conj_trp; + } + my_pthread_setspecific_ptr(THR_MALLOC, old_root); - free_root(&alloc,MYF(0)); - my_pthread_setspecific_ptr(THR_MALLOC,old_root); - if (quick->init()) + /* If we got a read plan, create a quick select from it. */ + if (best_trp) + { + records= best_trp->records; + if (!(quick= best_trp->make_quick(¶m, true)) || quick->init()) { delete quick; - quick= quick_imerge= NULL; - DBUG_PRINT("error", - ("Failed to allocate index merge structures," - "falling back to full scan.")); + quick= NULL; } - goto end; - } + } } } -end_free: + my_pthread_setspecific_ptr(THR_MALLOC, old_root); free_root(&alloc,MYF(0)); // Return memory & allocator - my_pthread_setspecific_ptr(THR_MALLOC,old_root); -end: thd->no_errors=0; } - DBUG_EXECUTE("info", - { - if (quick_imerge) - print_quick_sel_imerge(quick_imerge, &needed_reg); - else - print_quick_sel_range((QUICK_RANGE_SELECT*)quick, &needed_reg); - } - ); + DBUG_EXECUTE("info", print_quick(quick, &needed_reg);); /* Assume that if the user is using 'limit' we will only need to scan @@ -1181,26 +1729,71 @@ end: } -/* - Calculate index merge cost and save parameters for its construction. +/* + Get cost of 'sweep' full records retrieval. + SYNOPSIS + get_sweep_read_cost() + param Parameter from test_quick_select + records # of records to be retrieved + RETURN + cost of sweep +*/ - SYNOPSIS - get_index_merge_params() - param in parameter with structure. - needed_reg in/out needed_reg from this SQL_SELECT - imerge in index_merge description structure - read_time in/out in: cost of an existing way to read a table - out: cost of index merge - imerge_rows out pessimistic estimate of # of rows to be retrieved +double get_sweep_read_cost(const PARAM *param, ha_rows records) +{ + double result; + if (param->table->file->primary_key_is_clustered()) + { + result= param->table->file->read_time(param->table->primary_key, + records, records); + } + else + { + double n_blocks= + ceil((double)(longlong)param->table->file->data_file_length / IO_SIZE); + double busy_blocks= + n_blocks * (1.0 - pow(1.0 - 1.0/n_blocks, rows2double(records))); + if (busy_blocks < 1.0) + busy_blocks= 1.0; + DBUG_PRINT("info",("sweep: nblocks=%g, busy_blocks=%g", n_blocks, + busy_blocks)); + /* + Disabled: Bail out if # of blocks to read is bigger than # of blocks in + table data file. + if (max_cost != DBL_MAX && (busy_blocks+index_reads_cost) >= n_blocks) + return 1; + */ + JOIN *join= param->thd->lex->select_lex.join; + if (!join || join->tables == 1) + { + /* No join, assume reading is done in one 'sweep' */ + result= busy_blocks*(DISK_SEEK_BASE_COST + + DISK_SEEK_PROP_COST*n_blocks/busy_blocks); + } + else + { + /* + Possibly this is a join with source table being non-last table, so + assume that disk seeks are random here. + */ + result= busy_blocks; + } + } + DBUG_PRINT("info",("returning cost=%g", result)); + return result; +} - RETURN - 0 Cost of this index_merge is less than passed *read_time, - *imerge_rows and *read_time contain new index_merge parameters. - 1 Cost of this index_merge is more than *read_time, - *imerge_rows and *read_time are not modified. - -1 error +/* + Get best plan for a SEL_IMERGE disjunctive expression. + SYNOPSIS + get_best_disjunct_quick() + param Parameter from check_quick_select function + imerge Expression to use + read_time Don't create scans with cost > read_time + NOTES + index_merge cost is calculated as follows: index_merge_cost = cost(index_reads) + (see #1) cost(rowid_to_row_scan) + (see #2) @@ -1247,167 +1840,255 @@ end: (DISK_SEEK_BASE_COST + DISK_SEEK_PROP_COST*n_blocks/E(n_busy_blocks)). 3. Cost of Unique use is calculated in Unique::get_use_cost function. -*/ - -static int get_index_merge_params(PARAM *param, key_map& needed_reg, - SEL_IMERGE *imerge, double *read_time, - ha_rows* imerge_rows) -{ - double imerge_cost= 0.0; /* cost of this index_merge */ - bool imerge_too_expensive= false; - double tree_read_time; - ha_rows tree_records; - bool pk_is_clustered= param->table->file->primary_key_is_clustered(); - bool have_cpk_scan= false; - ha_rows records_for_unique= 0; - ha_rows cpk_records= 0; - DBUG_ENTER("get_index_merge_params"); - - /* allocate structs to save construction info */ - imerge->best_keys= - (SEL_ARG***)alloc_root(param->mem_root, - (imerge->trees_next - imerge->trees)* - sizeof(void*)); - /* - PHASE 1: get the best keys to use for this index_merge - */ + ROR-union cost is calculated in the same way index_merge, but instead of + Unique a priority queue is used. + + RETURN + Created read plan + NULL - Out of memory or no read scan could be built. +*/ +static +TABLE_READ_PLAN *get_best_disjunct_quick(PARAM *param, SEL_IMERGE *imerge, + double read_time) +{ + SEL_TREE **ptree; + TRP_INDEX_MERGE *imerge_trp= NULL; + uint n_child_scans= imerge->trees_next - imerge->trees; + TRP_RANGE **range_scans; + TRP_RANGE **cur_child; + TRP_RANGE **cpk_scan= NULL; + bool imerge_too_expensive= false; + double imerge_cost= 0.0; + ha_rows cpk_scan_records= 0; + ha_rows non_cpk_scan_records= 0; + bool pk_is_clustered= param->table->file->primary_key_is_clustered(); + bool all_scans_ror_able= true; + bool all_scans_rors= true; + uint unique_calc_buff_size; + TABLE_READ_PLAN **roru_read_plans; + TABLE_READ_PLAN **cur_roru_plan; + double roru_index_costs; + double blocks_in_index_read; + ha_rows roru_total_records; + double roru_intersect_part= 1.0; + double sweep_cost; + DBUG_ENTER("get_best_disjunct_quick"); + DBUG_PRINT("info", ("Full table scan cost =%g", read_time)); + + if (!(range_scans= (TRP_RANGE**)alloc_root(param->mem_root, + sizeof(TRP_RANGE*)* + n_child_scans))) + DBUG_RETURN(NULL); /* - It may be possible to use different keys for index_merge scans, e.g. for - query like - ...WHERE (key1 < c2 AND key2 < c2) OR (key3 < c3 AND key4 < c4) - we have to make choice between key1 and key2 for one scan and between - key3, key4 for another. - We assume we'll get the best if we choose the best key read inside each - of the conjuncts. + Collect best 'range' scan for each of disjuncts, and, while doing so, + analyze possibility of ROR scans. Also calculate some values needed by + other parts of the code. */ - for (SEL_TREE **ptree= imerge->trees; + for (ptree= imerge->trees, cur_child= range_scans; ptree != imerge->trees_next; - ptree++) + ptree++, cur_child++) { - SEL_ARG **tree_best_key; - tree_read_time= *read_time; - - if (get_quick_select_params(*ptree, param, needed_reg, true, - &tree_read_time, &tree_records, - &tree_best_key)) + DBUG_EXECUTE("info", print_sel_tree(param, *ptree, &(*ptree)->keys_map, + "tree in SEL_IMERGE");); + if (!(*cur_child= get_key_scans_params(param, *ptree, true, read_time))) { /* One of index scans in this index_merge is more expensive than entire - table read for another available option. The entire index_merge will - be more expensive then, too. We continue here only to update - SQL_SELECT members. + table read for another available option. The entire index_merge (and + any possible ROR-union) will be more expensive then, too. We continue + here only to update SQL_SELECT members. */ imerge_too_expensive= true; } - if (imerge_too_expensive) continue; - uint keynr= param->real_keynr[(tree_best_key-(*ptree)->keys)]; - imerge->best_keys[ptree - imerge->trees]= tree_best_key; - imerge_cost += tree_read_time; - - if (pk_is_clustered && keynr == param->table->primary_key) + imerge_cost += (*cur_child)->read_cost; + all_scans_ror_able &= ((*ptree)->n_ror_scans > 0); + all_scans_rors &= (*cur_child)->is_ror; + if (pk_is_clustered && + param->real_keynr[(*cur_child)->key_idx] == param->table->primary_key) { - have_cpk_scan= true; - cpk_records= tree_records; + cpk_scan= cur_child; + cpk_scan_records= (*cur_child)->records; } else - records_for_unique += tree_records; - } - DBUG_PRINT("info",("index_merge cost of index reads: %g", imerge_cost)); - - if (imerge_too_expensive) - DBUG_RETURN(1); - - if ((imerge_cost > *read_time) || - ((records_for_unique + cpk_records) >= param->table->file->records) && - *read_time != DBL_MAX) - { - /* Bail out if it is obvious that index_merge would be more expensive */ - DBUG_RETURN(1); + non_cpk_scan_records += (*cur_child)->records; } - - if (have_cpk_scan) + + DBUG_PRINT("info", ("index_merge scans cost=%g", imerge_cost)); + if (imerge_too_expensive || (imerge_cost > read_time) || + (non_cpk_scan_records+cpk_scan_records >= param->table->file->records) && + read_time != DBL_MAX) { /* - Add one ROWID comparison for each row retrieved on non-CPK scan. - (it is done in QUICK_RANGE_SELECT::row_in_ranges) + Bail out if it is obvious that both index_merge and ROR-union will be + more expensive */ - imerge_cost += records_for_unique / TIME_FOR_COMPARE_ROWID; + DBUG_PRINT("info", ("Sum of index_merge scans is more expensive than " + "full table scan, bailing out")); + DBUG_RETURN(NULL); } - - /* PHASE 2: Calculate cost(rowid_to_row_scan) */ - if (pk_is_clustered) + if (all_scans_rors) { - imerge_cost += - param->table->file->read_time(param->table->primary_key, - records_for_unique, - records_for_unique) - + rows2double(records_for_unique) / TIME_FOR_COMPARE; + roru_read_plans= (TABLE_READ_PLAN**)range_scans; + goto skip_to_ror_scan; } - else - { - double n_blocks= - ceil((double) ((longlong)param->table->file->data_file_length / IO_SIZE)); - double busy_blocks= - n_blocks * (1.0 - pow(1.0 - 1.0/n_blocks, (double) records_for_unique)); - - JOIN *join= param->thd->lex->select_lex.join; - if (!join || join->tables == 1) - imerge_cost += busy_blocks*(DISK_SEEK_BASE_COST + - DISK_SEEK_PROP_COST*n_blocks/busy_blocks); - else - { - /* - It can be a join with source table being non-last table, so assume - that disk seeks are random here. - (TODO it is possible to determine if this is a last table in 'index - checked for each record' join) - */ - imerge_cost += busy_blocks; - } - } - DBUG_PRINT("info",("index_merge cost with rowid-to-row scan: %g", imerge_cost)); - - /* PHASE 3: Add Unique operations cost */ - register uint unique_calc_buff_size= - Unique::get_cost_calc_buff_size(records_for_unique, + blocks_in_index_read= imerge_cost; + if (cpk_scan) + { + /* + Add one ROWID comparison for each row retrieved on non-CPK scan. (it + is done in QUICK_RANGE_SELECT::row_in_ranges) + */ + imerge_cost += non_cpk_scan_records / TIME_FOR_COMPARE_ROWID; + } + + /* Calculate cost(rowid_to_row_scan) */ + imerge_cost += get_sweep_read_cost(param, non_cpk_scan_records); + DBUG_PRINT("info",("index_merge cost with rowid-to-row scan: %g", + imerge_cost)); + if (imerge_cost > read_time) + goto build_ror_index_merge; + + /* Add Unique operations cost */ + unique_calc_buff_size= + Unique::get_cost_calc_buff_size(non_cpk_scan_records, param->table->file->ref_length, param->thd->variables.sortbuff_size); if (param->imerge_cost_buff_size < unique_calc_buff_size) { if (!(param->imerge_cost_buff= (uint*)alloc_root(param->mem_root, unique_calc_buff_size))) - DBUG_RETURN(1); + DBUG_RETURN(NULL); param->imerge_cost_buff_size= unique_calc_buff_size; } imerge_cost += - Unique::get_use_cost(param->imerge_cost_buff, records_for_unique, + Unique::get_use_cost(param->imerge_cost_buff, non_cpk_scan_records, param->table->file->ref_length, param->thd->variables.sortbuff_size); + DBUG_PRINT("info",("index_merge total cost: %g (wanted: less then %g)", + imerge_cost, read_time)); + if (imerge_cost < read_time) + { + if ((imerge_trp= new (param->mem_root)TRP_INDEX_MERGE)) + { + imerge_trp->read_cost= imerge_cost; + imerge_trp->records= non_cpk_scan_records + cpk_scan_records; + imerge_trp->records= min(imerge_trp->records, + param->table->file->records); + imerge_trp->range_scans= range_scans; + imerge_trp->range_scans_end= range_scans + n_child_scans; + read_time= imerge_cost; + } + } - DBUG_PRINT("info",("index_merge total cost: %g", imerge_cost)); - if (imerge_cost < *read_time) +build_ror_index_merge: + if (!all_scans_ror_able || param->thd->lex->sql_command == SQLCOM_DELETE) + DBUG_RETURN(imerge_trp); + + /* Ok, it is possible to build a ROR-union, try it. */ + bool dummy; + if (!(roru_read_plans= + (TABLE_READ_PLAN**)alloc_root(param->mem_root, + sizeof(TABLE_READ_PLAN*)* + n_child_scans))) + DBUG_RETURN(imerge_trp); +skip_to_ror_scan: + roru_index_costs= 0.0; + roru_total_records= 0; + cur_roru_plan= roru_read_plans; + + /* Find 'best' ROR scan for each of trees in disjunction */ + for (ptree= imerge->trees, cur_child= range_scans; + ptree != imerge->trees_next; + ptree++, cur_child++, cur_roru_plan++) { - *read_time= imerge_cost; - records_for_unique += cpk_records; - *imerge_rows= min(records_for_unique, param->table->file->records); - DBUG_RETURN(0); + /* + Assume the best ROR scan is the one that has cheapest full-row-retrieval + scan cost. + Also accumulate index_only scan costs as we'll need them to calculate + overall index_intersection cost. + */ + double cost; + if ((*cur_child)->is_ror) + { + /* Ok, we have index_only cost, now get full rows scan cost */ + cost= param->table->file-> + read_time(param->real_keynr[(*cur_child)->key_idx], 1, + (*cur_child)->records) + + rows2double((*cur_child)->records) / TIME_FOR_COMPARE; + } + else + cost= read_time; + + TABLE_READ_PLAN *prev_plan= *cur_child; + if (!(*cur_roru_plan= get_best_ror_intersect(param, *ptree, false, cost, + &dummy))) + { + if (prev_plan->is_ror) + *cur_roru_plan= prev_plan; + else + DBUG_RETURN(imerge_trp); + roru_index_costs += (*cur_roru_plan)->read_cost; + } + else + roru_index_costs += + ((TRP_ROR_INTERSECT*)(*cur_roru_plan))->index_scan_costs; + roru_total_records += (*cur_roru_plan)->records; + roru_intersect_part *= (*cur_roru_plan)->records / + param->table->file->records; } - DBUG_RETURN(1); + + /* + rows to retrieve= + SUM(rows_in_scan_i) - table_rows * PROD(rows_in_scan_i / table_rows). + This is valid because index_merge construction guarantees that conditions + in disjunction do not share key parts. + */ + roru_total_records -= (ha_rows)(roru_intersect_part* + param->table->file->records); + /* ok, got a ROR read plan for each of the disjuncts + Calculate cost: + cost(index_union_scan(scan_1, ... scan_n)) = + SUM_i(cost_of_index_only_scan(scan_i)) + + queue_use_cost(rowid_len, n) + + cost_of_row_retrieval + See get_merge_buffers_cost function for queue_use_cost formula derivation. + */ + + double roru_total_cost; + roru_total_cost= roru_index_costs + + rows2double(roru_total_records)*log(n_child_scans) / + (TIME_FOR_COMPARE_ROWID * M_LN2) + + get_sweep_read_cost(param, roru_total_records); + + DBUG_PRINT("info", ("ROR-union: cost %g, %d members", roru_total_cost, + n_child_scans)); + TRP_ROR_UNION* roru; + if (roru_total_cost < read_time) + { + if ((roru= new (param->mem_root) TRP_ROR_UNION)) + { + roru->first_ror= roru_read_plans; + roru->last_ror= roru_read_plans + n_child_scans; + roru->read_cost= roru_total_cost; + roru->records= roru_total_records; + DBUG_RETURN(roru); + } + } + DBUG_RETURN(imerge_trp); } /* Calculate cost of 'index only' scan for given index and number of records. - (We can resolve this by only reading through this key.) SYNOPSIS - get_whole_index_read_time() + get_index_only_read_time() param parameters structure records #of records to read keynr key to read @@ -1417,7 +2098,7 @@ static int get_index_merge_params(PARAM *param, key_map& needed_reg, key blocks are half full (normally things are much better). */ -inline double get_index_only_read_time(PARAM* param, ha_rows records, +inline double get_index_only_read_time(const PARAM* param, ha_rows records, int keynr) { double read_time; @@ -1429,39 +2110,838 @@ inline double get_index_only_read_time(PARAM* param, ha_rows records, return read_time; } +typedef struct st_ror_scan_info +{ + uint idx; /* # of used key in param->keys */ + uint keynr; /* # of used key in table */ + ha_rows records; /* estimate of # records this scan will return */ + + /* Set of intervals over key fields that will be used for row retrieval. */ + SEL_ARG *sel_arg; + + /* Fields used in the query and covered by this ROR scan. */ + MY_BITMAP covered_fields; + uint used_fields_covered; /* # of set bits in covered_fields */ + int key_rec_length; /* length of key record (including rowid) */ + + /* + Cost of reading all index records with values in sel_arg intervals set + (assuming there is no need to access full table records) + */ + double index_read_cost; + uint first_uncovered_field; /* first unused bit in covered_fields */ + uint key_components; /* # of parts in the key */ +} ROR_SCAN_INFO; + + +/* + Create ROR_SCAN_INFO* structure with a single ROR scan on index idx using + sel_arg set of intervals. + + SYNOPSIS + make_ror_scan() + param Parameter from test_quick_select function + idx Index of key in param->keys + sel_arg Set of intervals for a given key + RETURN + NULL - out of memory + ROR scan structure containing a scan for {idx, sel_arg} +*/ + +static +ROR_SCAN_INFO *make_ror_scan(const PARAM *param, int idx, SEL_ARG *sel_arg) +{ + ROR_SCAN_INFO *ror_scan; + uchar *bitmap_buf; + uint keynr; + DBUG_ENTER("make_ror_scan"); + if (!(ror_scan= (ROR_SCAN_INFO*)alloc_root(param->mem_root, + sizeof(ROR_SCAN_INFO)))) + DBUG_RETURN(NULL); + + ror_scan->idx= idx; + ror_scan->keynr= keynr= param->real_keynr[idx]; + ror_scan->key_rec_length= param->table->key_info[keynr].key_length + + param->table->file->ref_length; + ror_scan->sel_arg= sel_arg; + ror_scan->records= param->table->quick_rows[keynr]; + + if (!(bitmap_buf= (uchar*)alloc_root(param->mem_root, + param->fields_bitmap_size))) + DBUG_RETURN(NULL); + + if (bitmap_init(&ror_scan->covered_fields, bitmap_buf, + param->fields_bitmap_size*8, false)) + DBUG_RETURN(NULL); + bitmap_clear_all(&ror_scan->covered_fields); + + KEY_PART_INFO *key_part= param->table->key_info[keynr].key_part; + KEY_PART_INFO *key_part_end= key_part + + param->table->key_info[keynr].key_parts; + uint n_used_covered= 0; + for (;key_part != key_part_end; ++key_part) + { + if (bitmap_is_set(¶m->needed_fields, key_part->fieldnr)) + { + n_used_covered++; + bitmap_set_bit(&ror_scan->covered_fields, key_part->fieldnr); + } + } + ror_scan->index_read_cost= + get_index_only_read_time(param, param->table->quick_rows[ror_scan->keynr], + ror_scan->keynr); + DBUG_RETURN(ror_scan); +} + + +/* + Compare two ROR_SCAN_INFO** by E(#records_matched) * key_record_length. + SYNOPSIS + cmp_ror_scan_info() + a ptr to first compared value + b ptr to second compared value + + RETURN + -1 a < b + 0 a = b + 1 a > b +*/ +static int cmp_ror_scan_info(ROR_SCAN_INFO** a, ROR_SCAN_INFO** b) +{ + double val1= rows2double((*a)->records) * (*a)->key_rec_length; + double val2= rows2double((*b)->records) * (*b)->key_rec_length; + return (val1 < val2)? -1: (val1 == val2)? 0 : 1; +} + +/* + Compare two ROR_SCAN_INFO** by + (#covered fields in F desc, + #components asc, + number of first not covered component asc) + + SYNOPSIS + cmp_ror_scan_info_covering() + a ptr to first compared value + b ptr to second compared value + + RETURN + -1 a < b + 0 a = b + 1 a > b +*/ +static int cmp_ror_scan_info_covering(ROR_SCAN_INFO** a, ROR_SCAN_INFO** b) +{ + if ((*a)->used_fields_covered > (*b)->used_fields_covered) + return -1; + if ((*a)->used_fields_covered < (*b)->used_fields_covered) + return 1; + if ((*a)->key_components < (*b)->key_components) + return -1; + if ((*a)->key_components > (*b)->key_components) + return 1; + if ((*a)->first_uncovered_field < (*b)->first_uncovered_field) + return -1; + if ((*a)->first_uncovered_field > (*b)->first_uncovered_field) + return 1; + return 0; +} + +/* Auxiliary structure for incremental ROR-intersection creation */ +typedef struct +{ + const PARAM *param; + MY_BITMAP covered_fields; /* union of fields covered by all scans */ + + /* true if covered_fields is a superset of needed_fields */ + bool is_covering; + + double index_scan_costs; /* SUM(cost of 'index-only' scans) */ + double total_cost; + /* + Fraction of table records that satisfies conditions of all scans. + This is the number of full records that will be retrieved if a + non-index_only index intersection will be employed. + */ + double records_fract; + ha_rows index_records; /* sum(#records to look in indexes) */ +} ROR_INTERSECT_INFO; + + +/* + Re-initialize an allocated intersect info to contain zero scans. + SYNOPSIS + info Intersection info structure to re-initialize. +*/ + +static void ror_intersect_reinit(ROR_INTERSECT_INFO *info) +{ + info->is_covering= false; + info->index_scan_costs= 0.0f; + info->records_fract= 1.0f; + bitmap_clear_all(&info->covered_fields); +} + +/* + Allocate a ROR_INTERSECT_INFO and initialize it to contain zero scans. + + SYNOPSIS + ror_intersect_init() + param Parameter from test_quick_select + is_index_only If true, set ROR_INTERSECT_INFO to be covering + + RETURN + allocated structure + NULL on error +*/ + +static +ROR_INTERSECT_INFO* ror_intersect_init(const PARAM *param, bool is_index_only) +{ + ROR_INTERSECT_INFO *info; + uchar* buf; + if (!(info= (ROR_INTERSECT_INFO*)alloc_root(param->mem_root, + sizeof(ROR_INTERSECT_INFO)))) + return NULL; + info->param= param; + if (!(buf= (uchar*)alloc_root(param->mem_root, param->fields_bitmap_size))) + return NULL; + if (bitmap_init(&info->covered_fields, buf, param->fields_bitmap_size*8, + false)) + return NULL; + ror_intersect_reinit(info); + return info; +} + + +/* + Check if adding a ROR scan to a ROR-intersection reduces its cost of + ROR-intersection and if yes, update parameters of ROR-intersection, + including its cost. + + SYNOPSIS + ror_intersect_add() + param Parameter from test_quick_select + info ROR-intersection structure to add the scan to. + ror_scan ROR scan info to add. + is_cpk_scan If true, add the scan as CPK scan (this can be inferred + from other parameters and is passed separately only to + avoid duplicating the inference code) + + NOTES + Adding a ROR scan to ROR-intersect "makes sense" iff the cost of ROR- + intersection decreases. The cost of ROR-intersection is caclulated as + follows: + + cost= SUM_i(key_scan_cost_i) + cost_of_full_rows_retrieval + + if (union of indexes used covers all needed fields) + cost_of_full_rows_retrieval= 0; + else + { + cost_of_full_rows_retrieval= + cost_of_sweep_read(E(rows_to_retrieve), rows_in_table); + } + + E(rows_to_retrieve) is caclulated as follows: + Suppose we have a condition on several keys + cond=k_11=c_11 AND k_12=c_12 AND ... // parts of first key + k_21=c_21 AND k_22=c_22 AND ... // parts of second key + ... + k_n1=c_n1 AND k_n3=c_n3 AND ... (1) + + where k_ij may be the same as any k_pq (i.e. keys may have common parts). + + A full row is retrieved iff entire cond holds. + + The recursive procedure for finding P(cond) is as follows: + + First step: + Pick 1st part of 1st key and break conjunction (1) into two parts: + cond= (k_11=c_11 AND R) + + Here R may still contain condition(s) equivalent to k_11=c_11. + Nevertheless, the following holds: + + P(k_11=c_11 AND R) = P(k_11=c_11) * P(R|k_11=c_11). + + Mark k_11 as fixed field (and satisfied condition) F, save P(F), + save R to be cond and proceed to recursion step. + + Recursion step: + We have a set of fixed fields/satisfied conditions) F, probability P(F), + and remaining conjunction R + Pick next key part on current key and its condition "k_ij=c_ij". + We will add "k_ij=c_ij" into F and update P(F). + Lets denote k_ij as t, R = t AND R1, where R1 may still contain t. Then + + P((t AND R1)|F) = P(t|F) * P(R1|t|F) = P(t|F) * P(R1|(t AND F)) (2) + + (where '|' mean conditional probability, not "or") + + Consider the first multiplier in (2). One of the following holds: + a) F contains condition on field used in t (i.e. t AND F = F). + Then P(t|F) = 1 + + b) F doesn't contain condition on field used in t. Then F and t are + considered independent. + + P(t|F) = P(t|(fields_before_t_in_key AND other_fields)) = + = P(t|fields_before_t_in_key). + + P(t|fields_before_t_in_key) = #records(fields_before_t_in_key) / + #records(fields_before_t_in_key, t) + + The second multiplier is calculated by applying this step recursively. + + IMPLEMENTATION + This function calculates the result of application of the "recursion step" + described above for all fixed key members of a single key, accumulating set + of covered fields, selectivity, etc. + + The calculation is conducted as follows: + Lets denote #records(keypart1, ... keypartK) as n_k. We need to calculate + + n_{k1} n_{k_2} + --------- * --------- * .... (3) + n_{k1-1} n_{k2_1} + + where k1,k2,... are key parts which fields were not yet marked as fixed + ( this is result of application of option b) of the recursion step for + parts of a single key). + Since it is reasonable to expect that most of the fields are not marked + as fixed, we calcualate (3) as + + n_{i1} n_{i_2} + (3) = n_{max_key_part} / ( --------- * --------- * .... ) + n_{i1-1} n_{i2_1} + + where i1,i2, .. are key parts that were already marked as fixed. + + In order to minimize number of expensive records_in_range calls we group + and reduce adjacent fractions. + + RETURN + true ROR scan added to ROR-intersection, cost updated. + false It doesn't make sense to add this ROR scan to this ROR-intersection. +*/ + +bool ror_intersect_add(const PARAM *param, ROR_INTERSECT_INFO *info, + ROR_SCAN_INFO* ror_scan, bool is_cpk_scan=false) +{ + int i; + SEL_ARG *sel_arg; + KEY_PART_INFO *key_part= + info->param->table->key_info[ror_scan->keynr].key_part; + double selectivity_mult= 1.0; + char key_val[MAX_KEY_LENGTH+MAX_FIELD_WIDTH]; /* key values tuple */ + + DBUG_ENTER("ror_intersect_add"); + DBUG_PRINT("info", ("Current selectivity= %g", info->records_fract)); + DBUG_PRINT("info", ("Adding scan on %s", + info->param->table->key_info[ror_scan->keynr].name)); + SEL_ARG *tuple_arg= NULL; + char *key_ptr= key_val; + bool cur_covered, prev_covered= + bitmap_is_set(&info->covered_fields, key_part->fieldnr); + + ha_rows prev_records= param->table->file->records; + + for(i= 0, sel_arg= ror_scan->sel_arg; sel_arg; + i++, sel_arg= sel_arg->next_key_part) + { + cur_covered= bitmap_is_set(&info->covered_fields, (key_part + i)->fieldnr); + if (cur_covered != prev_covered) + { + /* create (part1val, ..., part{n-1}val) tuple. */ + { + if (!tuple_arg) + { + tuple_arg= ror_scan->sel_arg; + tuple_arg->store_min(key_part->length, &key_ptr, 0); + } + while (tuple_arg->next_key_part != sel_arg) + { + tuple_arg= tuple_arg->next_key_part; + tuple_arg->store_min(key_part->length, &key_ptr, 0); + } + } + ha_rows records; + records= param->table->file-> + records_in_range(ror_scan->keynr, + (byte*)key_val, key_ptr - key_val, + HA_READ_KEY_EXACT, + (byte*)key_val, key_ptr - key_val, + HA_READ_AFTER_KEY); + if (cur_covered) + { + /* uncovered -> covered */ + double tmp= rows2double(records)/rows2double(prev_records); + printf(" Selectivity multiplier: %g\n", tmp); + DBUG_PRINT("info", ("Selectivity multiplier: %g", tmp)); + selectivity_mult *= tmp; + prev_records= HA_POS_ERROR; + } + else + { + /* covered -> uncovered */ + prev_records= records; + } + } + else printf("no change, skipping\n"); + prev_covered= cur_covered; + } + if (!prev_covered) + { + double tmp= rows2double(param->table->quick_rows[ror_scan->keynr]) / + rows2double(prev_records); + DBUG_PRINT("info", ("Selectivity multiplier: %g", tmp)); + selectivity_mult *= tmp; + } + + if (selectivity_mult == 1.0) + { + /* Don't add this scan if it doesn't improve selectivity. */ + DBUG_PRINT("info", ("The scan doesn't improve selectivity.")); + DBUG_RETURN(false); + } + + info->records_fract *= selectivity_mult; + ha_rows cur_scan_records= info->param->table->quick_rows[ror_scan->keynr]; + if (is_cpk_scan) + { + info->index_scan_costs += rows2double(cur_scan_records)* + TIME_FOR_COMPARE_ROWID; + } + else + { + info->index_records += cur_scan_records; + info->index_scan_costs += ror_scan->index_read_cost; + bitmap_union(&info->covered_fields, &ror_scan->covered_fields); + } + + if (!info->is_covering && bitmap_is_subset(&info->param->needed_fields, + &info->covered_fields)) + { + DBUG_PRINT("info", ("ROR-intersect is covering now")); + info->is_covering= true; + } + + printf(" records: %g\n", rows2double(param->table->file->records) * + info->records_fract); + info->total_cost= info->index_scan_costs; + if (!info->is_covering) + { + ha_rows table_recs= info->param->table->file->records; + info->total_cost += + get_sweep_read_cost(info->param, + (ha_rows)(info->records_fract*table_recs)); + } + DBUG_PRINT("info", ("New selectivity= %g", info->records_fract)); + DBUG_PRINT("info", ("New cost= %g, %scovering", info->total_cost, + info->is_covering?"" : "non-")); + DBUG_RETURN(true); +} + + +/* + Get best ROR-intersection plan using non-covering ROR-intersection search + algorithm. The returned plan may be covering. + + SYNOPSIS + get_best_ror_intersect() + param Parameter from test_quick_select function. + tree Transformed restriction condition to be used to look + for ROR scans. + force_index_only If true, don't calculate costs of full rows retrieval. + read_time Do not return read plans with cost > read_time. + are_all_covering [out] set to true if union of all scans covers all + fields needed by the query (and it is possible to build + a covering ROR-intersection) + + NOTES + get_key_scans_params must be called before for the same SEL_TREE before + this function can be called. + + The approximate best non-covering plan search algorithm is as follows: + + find_min_ror_intersection_scan() + { + R= select all ROR scans; + order R by (E(#records_matched) * key_record_length). + + S= first(R); -- set of scans that will be used for ROR-intersection + R= R-first(S); + min_cost= cost(S); + min_scan= make_scan(S); + while (R is not empty) + { + if (!selectivity(S + first(R) < selectivity(S))) + continue; + + S= S + first(R); + R= R - first(R); + if (cost(S) < min_cost) + { + min_cost= cost(S); + min_scan= make_scan(S); + } + } + return min_scan; + } + + See ror_intersect_add function for ROR intersection costs. + + Special handling for Clustered PK scans + Clustered PK contains all table fields, so using it as a regular scan in + index intersection doesn't make sense: a range scan on CPK will be less + expensive in this case. + Clustered PK scan has special handling in ROR-intersection: it is not used + to retrieve rows, instead its condition is used to filter row references + we get from scans on other keys. + + RETURN + ROR-intersection table read plan + NULL if out of memory or no suitable plan found. +*/ + +static +TRP_ROR_INTERSECT *get_best_ror_intersect(const PARAM *param, SEL_TREE *tree, + bool force_index_only, + double read_time, + bool *are_all_covering) +{ + uint idx; + double min_cost= read_time; + DBUG_ENTER("get_best_ror_intersect"); + + if (tree->n_ror_scans < 2) + DBUG_RETURN(NULL); + + /* + Collect ROR-able SEL_ARGs and create ROR_SCAN_INFO for each of them. + Also find and save clustered PK scan if there is one. + */ + ROR_SCAN_INFO **cur_ror_scan; + ROR_SCAN_INFO *cpk_scan= NULL; + bool cpk_scan_used= false; + if (!(tree->ror_scans= (ROR_SCAN_INFO**)alloc_root(param->mem_root, + sizeof(ROR_SCAN_INFO*)* + param->keys))) + return NULL; + uint cpk_no= (param->table->file->primary_key_is_clustered())? + param->table->primary_key : MAX_KEY; + + for (idx= 0, cur_ror_scan= tree->ror_scans; idx < param->keys; idx++) + { + ROR_SCAN_INFO *scan; + if (!tree->ror_scans_map.is_set(idx)) + continue; + if (!(scan= make_ror_scan(param, idx, tree->keys[idx]))) + return NULL; + if (param->real_keynr[idx] == cpk_no) + { + cpk_scan= scan; + tree->n_ror_scans--; + } + else + *(cur_ror_scan++)= scan; + } + + tree->ror_scans_end= cur_ror_scan; + DBUG_EXECUTE("info",print_ror_scans_arr(param->table, "original", + tree->ror_scans, + tree->ror_scans_end);); + /* + Ok, [ror_scans, ror_scans_end) is array of ptrs to initialized + ROR_SCAN_INFOs. + Now, get a minimal key scan using an approximate algorithm. + */ + qsort(tree->ror_scans, tree->n_ror_scans, sizeof(ROR_SCAN_INFO*), + (qsort_cmp)cmp_ror_scan_info); + DBUG_EXECUTE("info",print_ror_scans_arr(param->table, "ordered", + tree->ror_scans, + tree->ror_scans_end);); + + ROR_SCAN_INFO **intersect_scans; /* ROR scans used in index intersection */ + ROR_SCAN_INFO **intersect_scans_end; + if (!(intersect_scans= (ROR_SCAN_INFO**)alloc_root(param->mem_root, + sizeof(ROR_SCAN_INFO*)* + tree->n_ror_scans))) + return NULL; + intersect_scans_end= intersect_scans; + + /* Create and incrementally update ROR intersection. */ + ROR_INTERSECT_INFO *intersect; + if (!(intersect= ror_intersect_init(param, false))) + return NULL; + + /* [intersect_scans, intersect_scans_best) will hold the best combination */ + ROR_SCAN_INFO **intersect_scans_best= NULL; + ha_rows best_rows; + bool is_best_covering; + double best_index_scan_costs; + LINT_INIT(best_rows); /* protected by intersect_scans_best */ + LINT_INIT(is_best_covering); + LINT_INIT(best_index_scan_costs); + + cur_ror_scan= tree->ror_scans; + /* Start with one scan */ + intersect_scans_best= intersect_scans; + while (cur_ror_scan != tree->ror_scans_end && !intersect->is_covering) + { + /* S= S + first(R); */ + if (ror_intersect_add(param, intersect, *cur_ror_scan)) + *(intersect_scans_end++)= *cur_ror_scan; + /* R= R - first(R); */ + cur_ror_scan++; + + if (intersect->total_cost < min_cost) + { + /* Local minimum found, save it */ + min_cost= intersect->total_cost; + best_rows= (ha_rows)(intersect->records_fract* + rows2double(param->table->file->records)); + is_best_covering= intersect->is_covering; + intersect_scans_best= intersect_scans_end; + best_index_scan_costs= intersect->index_scan_costs; + } + } + + DBUG_EXECUTE("info",print_ror_scans_arr(param->table, + "best ROR-intersection", + intersect_scans, + intersect_scans_best);); + + *are_all_covering= intersect->is_covering; + uint best_num= intersect_scans_best - intersect_scans; + /* + Ok, found the best ROR-intersection of non-CPK key scans. + Check if we should add a CPK scan. + + If the obtained ROR-intersection is covering, it doesn't make sense + to add CPK scan - Clustered PK contains all fields and if we're doing + CPK scan doing other CPK scans will only add more overhead. + */ + if (cpk_scan && !intersect->is_covering) + { + /* + Handle the special case: ROR-intersect(PRIMARY, key1) is + the best, but cost(range(key1)) > cost(best_non_ror_range_scan) + */ + if (best_num == 0) + { + ror_intersect_reinit(intersect); + if (!ror_intersect_add(param, intersect, cpk_scan)) + DBUG_RETURN(NULL); /* shouldn't happen actually actually */ + *(intersect_scans_end++)= *cur_ror_scan; + best_num++; + } + + if (ror_intersect_add(param, intersect, cpk_scan)) + cpk_scan_used= true; + best_rows= (ha_rows)(intersect->records_fract* + rows2double(param->table->file->records)); + } + + /* Ok, return ROR-intersect plan if we have found one */ + TRP_ROR_INTERSECT *trp= NULL; + if (best_num > 1 || cpk_scan_used) + { + if (!(trp= new (param->mem_root) TRP_ROR_INTERSECT)) + DBUG_RETURN(trp); + if (!(trp->first_scan= + (ROR_SCAN_INFO**)alloc_root(param->mem_root, + sizeof(ROR_SCAN_INFO*)*best_num))) + DBUG_RETURN(NULL); + memcpy(trp->first_scan, intersect_scans, best_num*sizeof(ROR_SCAN_INFO*)); + trp->last_scan= trp->first_scan + best_num; + trp->is_covering= is_best_covering; + trp->read_cost= min_cost; + trp->records= best_rows? best_rows : 1; + trp->index_scan_costs= best_index_scan_costs; + trp->cpk_scan= cpk_scan; + } + DBUG_RETURN(trp); +} + /* - Calculate quick range select read time, # of records, and best key to use - without constructing QUICK_RANGE_SELECT object. + Get best covering ROR-intersection. + SYNOPSIS + get_best_covering_ror_intersect() + param Parameter from test_quick_select function. + tree SEL_TREE with sets of intervals for different keys. + read_time Don't return table read plans with cost > read_time. + + RETURN + Best covering ROR-intersection plan + NULL if no plan found. + + NOTES + get_best_ror_intersect must be called for a tree before calling this + function for it. + This function invalidates tree->ror_scans member values. + + The following approximate algorithm is used: + I=set of all covering indexes + F=set of all fields to cover + S={} + + do { + Order I by (#covered fields in F desc, + #components asc, + number of first not covered component asc); + F=F-covered by first(I); + S=S+first(I); + I=I-first(I); + } while F is not empty. +*/ + +static +TRP_ROR_INTERSECT *get_best_covering_ror_intersect(PARAM *param, + SEL_TREE *tree, + double read_time) +{ + ROR_SCAN_INFO **ror_scan_mark; + ROR_SCAN_INFO **ror_scans_end= tree->ror_scans_end; + DBUG_ENTER("get_best_covering_ror_intersect"); + uint nbits= param->fields_bitmap_size*8; + + for (ROR_SCAN_INFO **scan= tree->ror_scans; scan != ror_scans_end; ++scan) + (*scan)->key_components= + param->table->key_info[(*scan)->keynr].key_parts; + + /* + Run covering-ROR-search algorithm. + Assume set I is [ror_scan .. ror_scans_end) + */ + + /*I=set of all covering indexes */ + ror_scan_mark= tree->ror_scans; + + uchar buf[MAX_KEY/8+1]; + MY_BITMAP covered_fields; + if (bitmap_init(&covered_fields, buf, nbits, false)) + DBUG_RETURN(0); + bitmap_clear_all(&covered_fields); + + double total_cost= 0.0f; + ha_rows records=0; + bool all_covered; + + DBUG_PRINT("info", ("Building covering ROR-intersection")); + DBUG_EXECUTE("info", print_ror_scans_arr(param->table, + "building covering ROR-I", + ror_scan_mark, ror_scans_end);); + do { + /* + Update changed sorting info: + #covered fields, + number of first not covered component + Calculate and save these values for each of remaining scans. + */ + for (ROR_SCAN_INFO **scan= ror_scan_mark; scan != ror_scans_end; ++scan) + { + bitmap_subtract(&(*scan)->covered_fields, &covered_fields); + (*scan)->used_fields_covered= + bitmap_bits_set(&(*scan)->covered_fields); + (*scan)->first_uncovered_field= + bitmap_get_first(&(*scan)->covered_fields); + } + + qsort(ror_scan_mark, ror_scans_end-ror_scan_mark, sizeof(ROR_SCAN_INFO*), + (qsort_cmp)cmp_ror_scan_info_covering); + + DBUG_EXECUTE("info", print_ror_scans_arr(param->table, + "remaining scans", + ror_scan_mark, ror_scans_end);); + + /* I=I-first(I) */ + total_cost += (*ror_scan_mark)->index_read_cost; + records += (*ror_scan_mark)->records; + DBUG_PRINT("info", ("Adding scan on %s", + param->table->key_info[(*ror_scan_mark)->keynr].name)); + if (total_cost > read_time) + DBUG_RETURN(NULL); + /* F=F-covered by first(I) */ + bitmap_union(&covered_fields, &(*ror_scan_mark)->covered_fields); + all_covered= bitmap_is_subset(¶m->needed_fields, &covered_fields); + } while (!all_covered && (++ror_scan_mark < ror_scans_end)); + + if (!all_covered) + DBUG_RETURN(NULL); /* should not happen actually */ + + /* + Ok, [tree->ror_scans .. ror_scan) holds covering index_intersection with + cost total_cost. + */ + DBUG_PRINT("info", ("Covering ROR-intersect scans cost: %g", total_cost)); + DBUG_EXECUTE("info", print_ror_scans_arr(param->table, + "creating covering ROR-intersect", + tree->ror_scans, ror_scan_mark);); + + /* Add priority queue use cost. */ + total_cost += rows2double(records)*log(ror_scan_mark - tree->ror_scans) / + (TIME_FOR_COMPARE_ROWID * M_LN2); + DBUG_PRINT("info", ("Covering ROR-intersect full cost: %g", total_cost)); + + if (total_cost > read_time) + DBUG_RETURN(NULL); + + TRP_ROR_INTERSECT *trp; + if (!(trp= new (param->mem_root) TRP_ROR_INTERSECT)) + DBUG_RETURN(trp); + uint best_num= (ror_scan_mark - tree->ror_scans); + if (!(trp->first_scan= (ROR_SCAN_INFO**)alloc_root(param->mem_root, + sizeof(ROR_SCAN_INFO*)* + best_num))) + DBUG_RETURN(NULL); + memcpy(trp->first_scan, ror_scan_mark, best_num*sizeof(ROR_SCAN_INFO*)); + trp->last_scan= trp->first_scan + best_num; + trp->is_covering= true; + trp->read_cost= total_cost; + trp->records= records; + + DBUG_RETURN(trp); +} + + +/* + Get best "range" table read plan for given SEL_TREE. + Also update PARAM members and store ROR scans info in the SEL_TREE. SYNOPSIS get_quick_select_params - tree in make range select for this SEL_TREE - param in parameters from test_quick_select - needed_reg in/out other table data needed by this quick_select - index_read_must_be_used if true, assume 'index only' option will be set + param parameters from test_quick_select + tree make range select for this SEL_TREE + index_read_must_be_used if true, assume 'index only' option will be set (except for clustered PK indexes) - read_time out read time estimate - records out # of records estimate - key_to_read out SEL_ARG to be used for creating quick select + read_time don't create read plans with cost > read_time. + RETURN + Best range read plan + NULL if no plan found or error occurred */ -static int get_quick_select_params(SEL_TREE *tree, PARAM *param, - key_map& needed_reg, - bool index_read_must_be_used, - double *read_time, ha_rows *records, - SEL_ARG ***key_to_read) +static TRP_RANGE *get_key_scans_params(PARAM *param, SEL_TREE *tree, + bool index_read_must_be_used, + double read_time) { int idx; - int result = 1; + SEL_ARG **key,**end, **key_to_read= NULL; + ha_rows best_records; + TRP_RANGE* read_plan= NULL; bool pk_is_clustered= param->table->file->primary_key_is_clustered(); + DBUG_ENTER("get_key_scans_params"); + LINT_INIT(best_records); /* protected by key_to_read */ /* Note that there may be trees that have type SEL_TREE::KEY but contain no key reads at all, e.g. tree for expression "key1 is not null" where key1 is defined as "not null". - */ - SEL_ARG **key,**end; - - for (idx= 0,key=tree->keys, end=key+param->keys ; + */ + DBUG_EXECUTE("info", print_sel_tree(param, tree, &tree->keys_map, + "tree scans");); + tree->ror_scans_map.clear_all(); + tree->n_ror_scans= 0; + for (idx= 0,key=tree->keys, end=key+param->keys; key != end ; key++,idx++) { @@ -1472,19 +2952,24 @@ static int get_quick_select_params(SEL_TREE *tree, PARAM *param, uint keynr= param->real_keynr[idx]; if ((*key)->type == SEL_ARG::MAYBE_KEY || (*key)->maybe_flag) - needed_reg.set_bit(keynr); + param->needed_reg->set_bit(keynr); bool read_index_only= index_read_must_be_used? true : (bool)param->table->used_keys.is_set(keynr); - found_records=check_quick_select(param, idx, *key); + found_records= check_quick_select(param, idx, *key); + if (param->is_ror_scan) + { + tree->n_ror_scans++; + tree->ror_scans_map.set_bit(idx); + } if (found_records != HA_POS_ERROR && found_records > 2 && read_index_only && (param->table->file->index_flags(keynr) & HA_KEY_READ_ONLY) && !(pk_is_clustered && keynr == param->table->primary_key)) { /* We can resolve this by only reading through this key. */ - found_read_time=get_index_only_read_time(param, found_records, keynr); + found_read_time= get_index_only_read_time(param,found_records,keynr); } else { @@ -1497,18 +2982,138 @@ static int get_quick_select_params(SEL_TREE *tree, PARAM *param, found_records)+ (double) found_records / TIME_FOR_COMPARE); } - if (*read_time > found_read_time && found_records != HA_POS_ERROR) + if (read_time > found_read_time && found_records != HA_POS_ERROR + /*|| read_time == DBL_MAX*/ ) + { + read_time= found_read_time; + best_records= found_records; + key_to_read= key; + } + + } + } + + DBUG_EXECUTE("info", print_sel_tree(param, tree, &tree->ror_scans_map, + "ROR scans");); + if (key_to_read) + { + idx= key_to_read - tree->keys; + if ((read_plan= new (param->mem_root) TRP_RANGE(*key_to_read, idx))) + { + read_plan->records= best_records; + read_plan->is_ror= tree->ror_scans_map.is_set(idx); + read_plan->read_cost= read_time; + DBUG_PRINT("info",("Returning range plan for key %s, cost %g", + param->table->key_info[param->real_keynr[idx]].name, + read_plan->read_cost)); + } + } + else + DBUG_PRINT("info", ("No 'range' table read plan found")); + + DBUG_RETURN(read_plan); +} + + +QUICK_SELECT_I *TRP_INDEX_MERGE::make_quick(PARAM *param, + bool retrieve_full_rows, + MEM_ROOT *parent_alloc) +{ + QUICK_INDEX_MERGE_SELECT *quick_imerge; + QUICK_RANGE_SELECT *quick; + /* index_merge always retrieves full rows, ignore retrieve_full_rows */ + if (!(quick_imerge= new QUICK_INDEX_MERGE_SELECT(param->thd, param->table))) + return NULL; + + quick_imerge->records= records; + quick_imerge->read_time= read_cost; + for(TRP_RANGE **range_scan= range_scans; range_scan != range_scans_end; + range_scan++) + { + if (!(quick= (QUICK_RANGE_SELECT*) + ((*range_scan)->make_quick(param, false, &quick_imerge->alloc)))|| + quick_imerge->push_quick_back(quick)) + { + delete quick; + delete quick_imerge; + return NULL; + } + } + return quick_imerge; +} + +QUICK_SELECT_I *TRP_ROR_INTERSECT::make_quick(PARAM *param, + bool retrieve_full_rows, + MEM_ROOT *parent_alloc) +{ + QUICK_ROR_INTERSECT_SELECT *quick_intrsect; + QUICK_RANGE_SELECT *quick; + DBUG_ENTER("TRP_ROR_INTERSECT::make_quick"); + MEM_ROOT *alloc; + + if ((quick_intrsect= + new QUICK_ROR_INTERSECT_SELECT(param->thd, param->table, + retrieve_full_rows? (!is_covering):false, + parent_alloc))) + { + DBUG_EXECUTE("info", print_ror_scans_arr(param->table, + "creating ROR-intersect", + first_scan, last_scan);); + alloc= parent_alloc? parent_alloc: &quick_intrsect->alloc; + for(; first_scan != last_scan;++first_scan) + { + if (!(quick= get_quick_select(param, (*first_scan)->idx, + (*first_scan)->sel_arg, alloc)) || + quick_intrsect->push_quick_back(quick)) + { + delete quick_intrsect; + DBUG_RETURN(NULL); + } + } + if (cpk_scan) + { + if (!(quick= get_quick_select(param, cpk_scan->idx, + cpk_scan->sel_arg, alloc))) { - *read_time= found_read_time; - *records= found_records; - *key_to_read= key; - result = 0; + delete quick_intrsect; + DBUG_RETURN(NULL); } + quick_intrsect->cpk_quick= quick; } + quick_intrsect->records= records; + quick_intrsect->read_time= read_cost; } - return result; + DBUG_RETURN(quick_intrsect); } + +QUICK_SELECT_I *TRP_ROR_UNION::make_quick(PARAM *param, + bool retrieve_full_rows, + MEM_ROOT *parent_alloc) +{ + QUICK_ROR_UNION_SELECT *quick_roru; + TABLE_READ_PLAN **scan; + QUICK_SELECT_I *quick; + DBUG_ENTER("TRP_ROR_UNION::make_quick"); + /* + It is impossible to construct a ROR-union that will not retrieve full + rows, ignore retrieve_full_rows parameter. + */ + if ((quick_roru= new QUICK_ROR_UNION_SELECT(param->thd, param->table))) + { + for(scan= first_ror; scan != last_ror; scan++) + { + if (!(quick= (*scan)->make_quick(param, false, &quick_roru->alloc)) || + quick_roru->push_quick_back(quick)) + DBUG_RETURN(NULL); + } + quick_roru->records= records; + quick_roru->read_time= read_cost; + } + DBUG_RETURN(quick_roru); +} + +/****************************************************************************/ /* make a select tree of all keys in condition */ static SEL_TREE *get_mm_tree(PARAM *param,COND *cond) @@ -2928,7 +4533,7 @@ SEL_ARG *rb_delete_fixup(SEL_ARG *root,SEL_ARG *key,SEL_ARG *par) } - /* Test that the proporties for a red-black tree holds */ + /* Test that the properties for a red-black tree hold */ #ifdef EXTRA_DEBUG int test_rb_tree(SEL_ARG *element,SEL_ARG *parent) @@ -3016,38 +4621,115 @@ void SEL_ARG::test_use_count(SEL_ARG *root) #endif +/* + Calculate estimate of number records that will be retrieved by a range + scan on given index using given SEL_ARG intervals tree. + SYNOPSIS + check_quick_select + param Parameter from test_quick_select + idx Number of index to use in PARAM::key SEL_TREE::key + tree Transformed selection condition, tree->key[idx] holds intervals + tree to be used for scanning. + NOTES + param->is_ror_scan is set to reflect if the key scan is a ROR (see + is_key_scan_ror function for more info) + param->table->quick_*, param->range_count (and maybe others) are + updated with data of given key scan, see check_quick_keys for details. + + RETURN + Estimate # of records to be retrieved. + HA_POS_ERROR if estimate calculation failed due to table handler problems. -/***************************************************************************** -** Check how many records we will find by using the found tree -*****************************************************************************/ +*/ static ha_rows check_quick_select(PARAM *param,uint idx,SEL_ARG *tree) { ha_rows records; + bool cpk_scan; + uint key; DBUG_ENTER("check_quick_select"); if (!tree) DBUG_RETURN(HA_POS_ERROR); // Can't use it param->max_key_part=0; param->range_count=0; + key= param->real_keynr[idx]; + if (tree->type == SEL_ARG::IMPOSSIBLE) DBUG_RETURN(0L); // Impossible select. return if (tree->type != SEL_ARG::KEY_RANGE || tree->part != 0) DBUG_RETURN(HA_POS_ERROR); // Don't use tree + + enum ha_key_alg key_alg= param->table->key_info[key].algorithm; + if ((key_alg != HA_KEY_ALG_BTREE) && (key_alg!= HA_KEY_ALG_UNDEF)) + { + /* Records are not ordered by rowid for other types of indexes. */ + param->is_ror_scan= false; + cpk_scan= false; + } + else + { + /* + Clustered PK scan is a special case, check_quick_keys doesn't recognize + CPK scans as ROR scans (while actually any CPK scan is a ROR scan). + */ + cpk_scan= (param->table->primary_key == param->real_keynr[idx]) && + param->table->file->primary_key_is_clustered(); + param->is_ror_scan= !cpk_scan; + } + records=check_quick_keys(param,idx,tree,param->min_key,0,param->max_key,0); if (records != HA_POS_ERROR) - { - uint key=param->real_keynr[idx]; + { param->table->quick_keys.set_bit(key); param->table->quick_rows[key]=records; param->table->quick_key_parts[key]=param->max_key_part+1; + + if (cpk_scan) + param->is_ror_scan= true; } DBUG_PRINT("exit", ("Records: %lu", (ulong) records)); DBUG_RETURN(records); } +/* + Recursively calculate estimate of # rows that will be retrieved by + key scan on key idx. + SYNOPSIS + check_quick_keys() + param Parameter from test_quick select function. + idx Number of key to use in PARAM::keys in list of used keys + (param->real_keynr[idx] holds the key number in table) + key_tree SEL_ARG tree being examined. + min_key Buffer with partial min key value tuple + min_key_flag + max_key Buffer with partial max key value tuple + max_key_flag + + NOTES + The function does the recursive descent on the tree via SEL_ARG::left, + SEL_ARG::right, and SEL_ARG::next_key_part edges. The #rows estimates + are calculated using records_in_range calls at the leaf nodes and then + summed. + + param->min_key and param->max_key are used to hold prefixes of key value + tuples. + + The side effects are: + + param->max_key_part is updated to hold the maximum number of key parts used + in scan minus 1. + + param->range_count is incremented if the function finds a range that + wasn't counted by the caller. + + param->is_ror_scan is cleared if the function detects that the key scan is + not a Rowid-Ordered Retrieval scan ( see comments for is_key_scan_ror + function for description of which key scans are ROR scans) +*/ + static ha_rows check_quick_keys(PARAM *param,uint idx,SEL_ARG *key_tree, char *min_key,uint min_key_flag, char *max_key, @@ -3058,6 +4740,13 @@ check_quick_keys(PARAM *param,uint idx,SEL_ARG *key_tree, param->max_key_part=max(param->max_key_part,key_tree->part); if (key_tree->left != &null_element) { + /* + There are at least two intervals for current key part, i.e. condition + was converted to something like + (keyXpartY less/equals c1) OR (keyXpartY more/equals c2). + This is not a ROR scan if the key is not Clustered Primary Key. + */ + param->is_ror_scan= false; records=check_quick_keys(param,idx,key_tree->left,min_key,min_key_flag, max_key,max_key_flag); if (records == HA_POS_ERROR) // Impossible @@ -3066,12 +4755,25 @@ check_quick_keys(PARAM *param,uint idx,SEL_ARG *key_tree, uint tmp_min_flag,tmp_max_flag,keynr; char *tmp_min_key=min_key,*tmp_max_key=max_key; - + key_tree->store(param->key[idx][key_tree->part].store_length, &tmp_min_key,min_key_flag,&tmp_max_key,max_key_flag); uint min_key_length= (uint) (tmp_min_key- param->min_key); uint max_key_length= (uint) (tmp_max_key- param->max_key); + if (param->is_ror_scan) + { + /* + If the index doesn't cover entire key, mark the scan as non-ROR scan. + (TODO sergeyp: investigate if we could do better here) + */ + uint16 fieldnr= param->table->key_info[param->real_keynr[idx]]. + key_part[key_tree->part].fieldnr - 1; + if (param->table->field[fieldnr]->key_length() != + param->key[idx][key_tree->part].part_length) + param->is_ror_scan= false; + } + if (key_tree->next_key_part && key_tree->next_key_part->part == key_tree->part+1 && key_tree->next_key_part->type == SEL_ARG::KEY_RANGE) @@ -3085,6 +4787,12 @@ check_quick_keys(PARAM *param,uint idx,SEL_ARG *key_tree, tmp_max_key, max_key_flag | key_tree->max_flag); goto end; // Ugly, but efficient } + else + { + /* The interval for current key part is not c1 <= keyXpartY <= c1 */ + param->is_ror_scan= false; + } + tmp_min_flag=key_tree->min_flag; tmp_max_flag=key_tree->max_flag; if (!tmp_min_flag) @@ -3113,6 +4821,24 @@ check_quick_keys(PARAM *param,uint idx,SEL_ARG *key_tree, tmp=1; // Max one record else { + if (param->is_ror_scan) + { + /* + If we get here, the condition on the key was converted to form + "(keyXpart1 = c1) AND ... AND (keyXpart{key_tree->part - 1} = cN) AND + somecond(keyXpart{key_tree->part})" + Check if + somecond is "keyXpart{key_tree->part} = const" and + uncovered "tail" of KeyX parts is either empty or is identical to + first members of clustered primary key. + */ + if (!(min_key_length == max_key_length && + !memcmp(min_key,max_key, (uint) (tmp_max_key - max_key)) && + !key_tree->min_flag && !key_tree->max_flag && + is_key_scan_ror(param, keynr, key_tree->part + 1))) + param->is_ror_scan= false; + } + if (tmp_min_flag & GEOM_FLAG) { tmp= param->table->file-> @@ -3143,6 +4869,13 @@ check_quick_keys(PARAM *param,uint idx,SEL_ARG *key_tree, records+=tmp; if (key_tree->right != &null_element) { + /* + There are at least two intervals for current key part, i.e. condition + was converted to something like + (keyXpartY less/equals c1) OR (keyXpartY more/equals c2). + This is not a ROR scan if the key is not Clustered Primary Key. + */ + param->is_ror_scan= false; tmp=check_quick_keys(param,idx,key_tree->right,min_key,min_key_flag, max_key,max_key_flag); if (tmp == HA_POS_ERROR) @@ -3153,11 +4886,94 @@ check_quick_keys(PARAM *param,uint idx,SEL_ARG *key_tree, } -/**************************************************************************** -** change a tree to a structure to be used by quick_select -** This uses it's own malloc tree -** The caller should call QUICK_SELCT::init for returned quick select -****************************************************************************/ +/* + Check if key scan on given index with equality conditions on first n key + parts is a ROR scan. + + SYNOPSIS + is_key_scan_ror() + param Parameter from test_quick_select + keynr Number of key in the table. The key must not be a clustered + primary key. + nparts Number of first key parts for which equality conditions + are present. + + NOTES + ROR (Rowid Ordered Retrieval) key scan is a key scan that produces + ordered sequence of rowids (ha_xxx::cmp_ref is the comparison function) + + An index scan is a ROR scan if it is done using a condition in form + + "key1_1=c_1 AND ... AND key1_n=c_n" (1) + + where the index is defined on (key1_1, ..., key1_N [,a_1, ..., a_n]) + + and the table has a clustered Primary Key + + PRIMARY KEY(a_1, ..., a_n, b1, ..., b_k) with first key parts being + identical to uncovered parts ot the key being scanned (2) + + Scans on HASH indexes are not ROR scans, + any range scan on clustered primary key is ROR scan (3) + + Check (1) is made in check_quick_keys() + Check (3) is made check_quick_select() + Check (2) is made by this function. + + RETURN + true If the scan is ROR-scan + false otherwise +*/ + +static bool is_key_scan_ror(PARAM *param, uint keynr, uint8 nparts) +{ + KEY *table_key= param->table->key_info + keynr; + KEY_PART_INFO *key_part= table_key->key_part + nparts; + KEY_PART_INFO *key_part_end= table_key->key_part + + table_key->key_parts; + + if (key_part == key_part_end) + return true; + uint pk_number= param->table->primary_key; + if (!param->table->file->primary_key_is_clustered() || pk_number == MAX_KEY) + return false; + + KEY_PART_INFO *pk_part= param->table->key_info[pk_number].key_part; + KEY_PART_INFO *pk_part_end= pk_part + + param->table->key_info[pk_number].key_parts; + for(;(key_part!=key_part_end) && (pk_part != pk_part_end); + ++key_part, ++pk_part) + { + if ((key_part->field != pk_part->field) || + (key_part->length != pk_part->length)) + return false; + } + return (key_part == key_part_end); +} + + +/* + Create a QUICK_RANGE_SELECT from given key and SEL_ARG tree for that key. + + SYNOPSIS + get_quick_select() + param + idx Index of used key in param->key. + key_tree SEL_ARG tree for the used key + parent_alloc If not NULL, use it to allocate memory for + quick select data. Otherwise use quick->alloc. + + RETURN + NULL on error + otherwise created quick select + + NOTES + The caller must call QUICK_SELCT::init for returned quick select + + CAUTION! This function may change THR_MALLOC to a MEM_ROOT which will be + deallocated when the returned quick select is deleted. +*/ + QUICK_RANGE_SELECT * get_quick_select(PARAM *param,uint idx,SEL_ARG *key_tree, MEM_ROOT *parent_alloc) @@ -3346,6 +5162,47 @@ static bool null_part_in_key(KEY_PART *key_part, const char *key, uint length) } +bool QUICK_SELECT_I::check_if_keys_used(List<Item> *fields) +{ + return check_if_key_used(head, index, *fields); +} + +bool QUICK_INDEX_MERGE_SELECT::check_if_keys_used(List<Item> *fields) +{ + QUICK_RANGE_SELECT *quick; + List_iterator_fast<QUICK_RANGE_SELECT> it(quick_selects); + while ((quick= it++)) + { + if (check_if_key_used(head, quick->index, *fields)) + return 1; + } + return 0; +} + +bool QUICK_ROR_INTERSECT_SELECT::check_if_keys_used(List<Item> *fields) +{ + QUICK_RANGE_SELECT *quick; + List_iterator_fast<QUICK_RANGE_SELECT> it(quick_selects); + while ((quick= it++)) + { + if (check_if_key_used(head, quick->index, *fields)) + return 1; + } + return 0; +} + +bool QUICK_ROR_UNION_SELECT::check_if_keys_used(List<Item> *fields) +{ + QUICK_SELECT_I *quick; + List_iterator_fast<QUICK_SELECT_I> it(quick_selects); + while ((quick= it++)) + { + if (quick->check_if_keys_used(fields)) + return 1; + } + return 0; +} + /**************************************************************************** Create a QUICK RANGE based on a key ****************************************************************************/ @@ -3426,8 +5283,6 @@ err: } -#define MEM_STRIP_BUF_SIZE thd->variables.sortbuff_size - /* Fetch all row ids into unique. @@ -3461,9 +5316,9 @@ int QUICK_INDEX_MERGE_SELECT::prepare_unique() cur_quick_select->init(); - unique= new Unique(refposcmp2, (void *) &head->file->ref_length, + unique= new Unique(refpos_order_cmp, (void *)head->file, head->file->ref_length, - MEM_STRIP_BUF_SIZE); + thd->variables.sortbuff_size); if (!unique) DBUG_RETURN(1); do @@ -3496,8 +5351,7 @@ int QUICK_INDEX_MERGE_SELECT::prepare_unique() continue; cur_quick_select->file->position(cur_quick_select->record); - result= unique->unique_add((char*)cur_quick_select->file->ref); - + result= unique->unique_add((char*)cur_quick_select->file->ref); if (result) DBUG_RETURN(1); @@ -3550,6 +5404,159 @@ int QUICK_INDEX_MERGE_SELECT::get_next() DBUG_RETURN(result); } + +/* + Retrieve next record. + SYNOPSIS + QUICK_ROR_INTERSECT_SELECT::get_next() + + NOTES + Invariant on enter/exit: all intersected selects have retrieved all index + records with rowid <= some_rowid_val and no intersected select has + retrieved any index records with rowid > some_rowid_val. + We start fresh and loop until we have retrieved the same rowid in each of + the key scans or we got an error. + + If a Clustered PK scan is present, it is used only to check if row + satisfies its condition (and never used for row retrieval). + + RETURN + 0 - Ok + other - Error code if any error occurred. +*/ + +int QUICK_ROR_INTERSECT_SELECT::get_next() +{ + List_iterator_fast<QUICK_RANGE_SELECT> quick_it(quick_selects); + QUICK_RANGE_SELECT* quick; + int error, cmp; + uint last_rowid_count=0; + DBUG_ENTER("QUICK_ROR_INTERSECT_SELECT::get_next"); + + /* Get a rowid for first quick and save it as a 'candidate' */ + quick= quick_it++; + if (cpk_quick) + { + do { + error= quick->get_next(); + }while (!error && !cpk_quick->row_in_ranges()); + } + else + error= quick->get_next(); + + if (error) + DBUG_RETURN(error); + + quick->file->position(quick->record); + memcpy(last_rowid, quick->file->ref, head->file->ref_length); + last_rowid_count= 1; + + while (last_rowid_count < quick_selects.elements) + { + if (!(quick= quick_it++)) + { + quick_it.rewind(); + quick= quick_it++; + } + + do { + if ((error= quick->get_next())) + DBUG_RETURN(error); + quick->file->position(quick->record); + cmp= head->file->cmp_ref(quick->file->ref, last_rowid); + } while (cmp < 0); + + /* Ok, current select 'caught up' and returned ref >= cur_ref */ + if (cmp > 0) + { + /* Found a row with ref > cur_ref. Make it a new 'candidate' */ + if (cpk_quick) + { + while (!cpk_quick->row_in_ranges()) + { + if ((error= quick->get_next())) + DBUG_RETURN(error); + } + } + memcpy(last_rowid, quick->file->ref, head->file->ref_length); + last_rowid_count= 1; + } + else + { + /* current 'candidate' row confirmed by this select */ + last_rowid_count++; + } + } + + /* We get here iff we got the same row ref in all scans. */ + if (need_to_fetch_row) + error= head->file->rnd_pos(head->record[0], last_rowid); + DBUG_RETURN(error); +} + + +/* + Retrieve next record. + SYNOPSIS + QUICK_ROR_UNION_SELECT::get_next() + + NOTES + Enter/exit invariant: + For each quick select in the queue a {key,rowid} tuple has been + retrieved but the corresponding row hasn't been passed to output. + + RETURN + 0 - Ok + other - Error code if any error occurred. +*/ + +int QUICK_ROR_UNION_SELECT::get_next() +{ + int error, dup_row; + QUICK_SELECT_I *quick; + byte *tmp; + DBUG_ENTER("QUICK_ROR_UNION_SELECT::get_next"); + + do + { + if (!queue.elements) + DBUG_RETURN(HA_ERR_END_OF_FILE); + /* Ok, we have a queue with >= 1 scans */ + + quick= (QUICK_SELECT_I*)queue_top(&queue); + memcpy(cur_rowid, quick->last_rowid, rowid_length); + + /* put into queue rowid from the same stream as top element */ + if ((error= quick->get_next())) + { + if (error != HA_ERR_END_OF_FILE) + DBUG_RETURN(error); + queue_remove(&queue, 0); + } + else + { + quick->save_last_pos(); + queue_replaced(&queue); + } + + if (!have_prev_rowid) + { + /* No rows have been returned yet */ + dup_row= false; + have_prev_rowid= true; + } + else + dup_row= !head->file->cmp_ref(cur_rowid, prev_rowid); + }while (dup_row); + + tmp= cur_rowid; + cur_rowid= prev_rowid; + prev_rowid= tmp; + + error= head->file->rnd_pos(quick->record, prev_rowid); + DBUG_RETURN(error); +} + /* get next possible record using quick-struct */ int QUICK_RANGE_SELECT::get_next() @@ -3969,6 +5976,232 @@ bool QUICK_SELECT_DESC::test_if_null_range(QUICK_RANGE *range_arg, #endif +void QUICK_RANGE_SELECT::add_info_string(String *str) +{ + KEY *key_info= head->key_info + index; + str->append(key_info->name); +} + +void QUICK_INDEX_MERGE_SELECT::add_info_string(String *str) +{ + QUICK_RANGE_SELECT *quick; + bool first= true; + List_iterator_fast<QUICK_RANGE_SELECT> it(quick_selects); + str->append("sort_union("); + while ((quick= it++)) + { + if (!first) + str->append(','); + else + first= false; + quick->add_info_string(str); + } + if (pk_quick_select) + { + str->append(','); + pk_quick_select->add_info_string(str); + } + str->append(')'); +} + +void QUICK_ROR_INTERSECT_SELECT::add_info_string(String *str) +{ + bool first= true; + QUICK_RANGE_SELECT *quick; + List_iterator_fast<QUICK_RANGE_SELECT> it(quick_selects); + str->append("intersect("); + while ((quick= it++)) + { + KEY *key_info= head->key_info + quick->index; + if (!first) + str->append(','); + else + first= false; + str->append(key_info->name); + } + if (cpk_quick) + { + KEY *key_info= head->key_info + cpk_quick->index; + str->append(','); + str->append(key_info->name); + } + str->append(')'); +} + +void QUICK_ROR_UNION_SELECT::add_info_string(String *str) +{ + bool first= true; + QUICK_SELECT_I *quick; + List_iterator_fast<QUICK_SELECT_I> it(quick_selects); + str->append("union("); + while ((quick= it++)) + { + if (!first) + str->append(','); + else + first= false; + quick->add_info_string(str); + } + str->append(')'); +} + + +void QUICK_RANGE_SELECT::add_keys_and_lengths(String *key_names, + String *used_lengths) +{ + char buf[64]; + uint length; + KEY *key_info= head->key_info + index; + key_names->append(key_info->name); + length= longlong2str(max_used_key_length, buf, 10) - buf; + used_lengths->append(buf, length); +} + +void QUICK_INDEX_MERGE_SELECT::add_keys_and_lengths(String *key_names, + String *used_lengths) +{ + char buf[64]; + uint length; + bool first= true; + QUICK_RANGE_SELECT *quick; + + List_iterator_fast<QUICK_RANGE_SELECT> it(quick_selects); + while ((quick= it++)) + { + if (first) + first= false; + else + { + key_names->append(','); + used_lengths->append(','); + } + + KEY *key_info= head->key_info + quick->index; + key_names->append(key_info->name); + length= longlong2str(quick->max_used_key_length, buf, 10) - buf; + used_lengths->append(buf, length); + } + if (pk_quick_select) + { + KEY *key_info= head->key_info + pk_quick_select->index; + key_names->append(','); + key_names->append(key_info->name); + length= longlong2str(pk_quick_select->max_used_key_length, buf, 10) - buf; + used_lengths->append(','); + used_lengths->append(buf, length); + } +} + +void QUICK_ROR_INTERSECT_SELECT::add_keys_and_lengths(String *key_names, + String *used_lengths) +{ + char buf[64]; + uint length; + bool first= true; + QUICK_RANGE_SELECT *quick; + List_iterator_fast<QUICK_RANGE_SELECT> it(quick_selects); + while ((quick= it++)) + { + KEY *key_info= head->key_info + quick->index; + if (first) + first= false; + else + { + key_names->append(','); + used_lengths->append(','); + } + key_names->append(key_info->name); + length= longlong2str(quick->max_used_key_length, buf, 10) - buf; + used_lengths->append(buf, length); + } + + if (cpk_quick) + { + KEY *key_info= head->key_info + cpk_quick->index; + key_names->append(','); + key_names->append(key_info->name); + length= longlong2str(cpk_quick->max_used_key_length, buf, 10) - buf; + used_lengths->append(','); + used_lengths->append(buf, length); + } +} + +void QUICK_ROR_UNION_SELECT::add_keys_and_lengths(String *key_names, + String *used_lengths) +{ + bool first= true; + QUICK_SELECT_I *quick; + List_iterator_fast<QUICK_SELECT_I> it(quick_selects); + while ((quick= it++)) + { + if (first) + first= false; + else + { + used_lengths->append(','); + key_names->append(','); + } + quick->add_keys_and_lengths(key_names, used_lengths); + } +} + +#ifndef DBUG_OFF + +static void print_sel_tree(PARAM *param, SEL_TREE *tree, key_map *tree_map, + const char *msg) +{ + SEL_ARG **key,**end; + int idx; + char buff[1024]; + DBUG_ENTER("print_sel_tree"); + if (! _db_on_) + DBUG_VOID_RETURN; + + String tmp(buff,sizeof(buff),&my_charset_bin); + tmp.length(0); + for (idx= 0,key=tree->keys, end=key+param->keys ; + key != end ; + key++,idx++) + { + if (tree_map->is_set(idx)) + { + uint keynr= param->real_keynr[idx]; + if (tmp.length()) + tmp.append(','); + tmp.append(param->table->key_info[keynr].name); + } + } + if (!tmp.length()) + tmp.append("(empty)"); + + DBUG_PRINT("info", ("SEL_TREE %p (%s) scans:%s", tree, msg, tmp.ptr())); + + DBUG_VOID_RETURN; +} + +static void print_ror_scans_arr(TABLE *table, const char *msg, + struct st_ror_scan_info **start, + struct st_ror_scan_info **end) +{ + DBUG_ENTER("print_ror_scans"); + if (! _db_on_) + DBUG_VOID_RETURN; + + char buff[1024]; + String tmp(buff,sizeof(buff),&my_charset_bin); + tmp.length(0); + for(;start != end; start++) + { + if (tmp.length()) + tmp.append(','); + tmp.append(table->key_info[(*start)->keynr].name); + } + if (!tmp.length()) + tmp.append("(empty)"); + DBUG_PRINT("info", ("ROR key scans (%s): %s", msg, tmp.ptr())); + DBUG_VOID_RETURN; +} + /***************************************************************************** ** Print a quick range for debugging ** TODO: @@ -3976,8 +6209,6 @@ bool QUICK_SELECT_DESC::test_if_null_range(QUICK_RANGE *range_arg, ** of locking the DEBUG stream ! *****************************************************************************/ -#ifndef DBUG_OFF - static void print_key(KEY_PART *key_part,const char *key,uint used_length) { @@ -4010,72 +6241,122 @@ print_key(KEY_PART *key_part,const char *key,uint used_length) } -static void print_quick_sel_imerge(QUICK_INDEX_MERGE_SELECT *quick, - const key_map *needed_reg) +static void print_quick(QUICK_SELECT_I *quick, const key_map *needed_reg) { + char buf[MAX_KEY/8+1]; DBUG_ENTER("print_param"); if (! _db_on_ || !quick) DBUG_VOID_RETURN; + DBUG_LOCK_FILE; + + quick->dbug_dump(0, true); + fprintf(DBUG_FILE,"other_keys: 0x%s:\n", needed_reg->print(buf)); - List_iterator_fast<QUICK_RANGE_SELECT> it(quick->quick_selects); - QUICK_RANGE_SELECT* quick_range_sel; - while ((quick_range_sel= it++)) - { - print_quick_sel_range(quick_range_sel, needed_reg); - } - if (quick->pk_quick_select) - print_quick_sel_range(quick->pk_quick_select, needed_reg); - + DBUG_UNLOCK_FILE; DBUG_VOID_RETURN; } -void print_quick_sel_range(QUICK_RANGE_SELECT *quick, - const key_map *needed_reg) +static void print_rowid(byte* val, int len) { - QUICK_RANGE *range; - char buf[MAX_KEY/8+1]; - DBUG_ENTER("print_param"); - if (! _db_on_ || !quick) - DBUG_VOID_RETURN; - + byte *pb; DBUG_LOCK_FILE; - fprintf(DBUG_FILE,"Used quick_range on key: %d (other_keys: 0x%s):\n", - quick->index, needed_reg->print(buf)); + fputc('\"', DBUG_FILE); + for (pb= val; pb!= val + len; ++pb) + fprintf(DBUG_FILE, "%c", *pb); + fprintf(DBUG_FILE, "\", hex: "); + + for (pb= val; pb!= val + len; ++pb) + fprintf(DBUG_FILE, "%x ", *pb); + fputc('\n', DBUG_FILE); + DBUG_UNLOCK_FILE; +} - QUICK_RANGE **pr= (QUICK_RANGE**)quick->ranges.buffer; - QUICK_RANGE **last_range= pr + quick->ranges.elements; - for (; pr!=last_range; ++pr) +void QUICK_RANGE_SELECT::dbug_dump(int indent, bool verbose) +{ + fprintf(DBUG_FILE, "%*squick range select, key %s, length: %d\n", + indent, "", head->key_info[index].name, max_used_key_length); + + if (verbose) { - range= *pr; - if (!(range->flag & NO_MIN_RANGE)) + QUICK_RANGE *range; + QUICK_RANGE **pr= (QUICK_RANGE**)ranges.buffer; + QUICK_RANGE **last_range= pr + ranges.elements; + for (; pr!=last_range; ++pr) { - print_key(quick->key_parts,range->min_key,range->min_length); - if (range->flag & NEAR_MIN) - fputs(" < ",DBUG_FILE); - else - fputs(" <= ",DBUG_FILE); - } - fputs("X",DBUG_FILE); + fprintf(DBUG_FILE, "%*s", indent + 2, ""); + range= *pr; + if (!(range->flag & NO_MIN_RANGE)) + { + print_key(key_parts,range->min_key,range->min_length); + if (range->flag & NEAR_MIN) + fputs(" < ",DBUG_FILE); + else + fputs(" <= ",DBUG_FILE); + } + fputs("X",DBUG_FILE); - if (!(range->flag & NO_MAX_RANGE)) - { - if (range->flag & NEAR_MAX) - fputs(" < ",DBUG_FILE); - else - fputs(" <= ",DBUG_FILE); - print_key(quick->key_parts,range->max_key,range->max_length); + if (!(range->flag & NO_MAX_RANGE)) + { + if (range->flag & NEAR_MAX) + fputs(" < ",DBUG_FILE); + else + fputs(" <= ",DBUG_FILE); + print_key(key_parts,range->max_key,range->max_length); + } + fputs("\n",DBUG_FILE); } - fputs("\n",DBUG_FILE); } - DBUG_UNLOCK_FILE; - DBUG_VOID_RETURN; +} + +void QUICK_INDEX_MERGE_SELECT::dbug_dump(int indent, bool verbose) +{ + List_iterator_fast<QUICK_RANGE_SELECT> it(quick_selects); + QUICK_RANGE_SELECT *quick; + fprintf(DBUG_FILE, "%*squick index_merge select\n", indent, ""); + fprintf(DBUG_FILE, "%*smerged scans {\n", indent, ""); + while ((quick= it++)) + quick->dbug_dump(indent+2, verbose); + if (pk_quick_select) + { + fprintf(DBUG_FILE, "%*sclustered PK quick:\n", indent, ""); + pk_quick_select->dbug_dump(indent+2, verbose); + } + fprintf(DBUG_FILE, "%*s}\n", indent, ""); +} + +void QUICK_ROR_INTERSECT_SELECT::dbug_dump(int indent, bool verbose) +{ + List_iterator_fast<QUICK_RANGE_SELECT> it(quick_selects); + QUICK_RANGE_SELECT *quick; + fprintf(DBUG_FILE, "%*squick ROR-intersect select, %scovering\n", + indent, "", need_to_fetch_row? "":"non-"); + fprintf(DBUG_FILE, "%*smerged scans {\n", indent, ""); + while ((quick= it++)) + quick->dbug_dump(indent+2, verbose); + if (cpk_quick) + { + fprintf(DBUG_FILE, "%*sclustered PK quick:\n", indent, ""); + cpk_quick->dbug_dump(indent+2, verbose); + } + fprintf(DBUG_FILE, "%*s}\n", indent, ""); +} + +void QUICK_ROR_UNION_SELECT::dbug_dump(int indent, bool verbose) +{ + List_iterator_fast<QUICK_SELECT_I> it(quick_selects); + QUICK_SELECT_I *quick; + fprintf(DBUG_FILE, "%*squick ROR-union select\n", indent, ""); + fprintf(DBUG_FILE, "%*smerged scans {\n", indent, ""); + while ((quick= it++)) + quick->dbug_dump(indent+2, verbose); + fprintf(DBUG_FILE, "%*s}\n", indent, ""); } #endif /***************************************************************************** -** Instansiate templates +** Instantiate templates *****************************************************************************/ #ifdef __GNUC__ diff --git a/sql/opt_range.h b/sql/opt_range.h index 30e0fcd7be5..22168a63f8e 100644 --- a/sql/opt_range.h +++ b/sql/opt_range.h @@ -78,25 +78,59 @@ public: ha_rows records; /* estimate of # of records to be retrieved */ double read_time; /* time to perform this retrieval */ TABLE *head; + /* + Index this quick select uses, or MAX_KEY for quick selects + that use several indexes + */ + uint index; /* - the only index this quick select uses, or MAX_KEY for - QUICK_INDEX_MERGE_SELECT + Total length of first used_key_parts parts of the key. + Applicable if index!= MAX_KEY. */ - uint index; - uint max_used_key_length, used_key_parts; + uint max_used_key_length; + + /* + Max. number of (first) key parts this quick select uses for retrieval. + eg. for "(key1p1=c1 AND key1p2=c2) OR key1p1=c2" used_key_parts == 2. + Applicable if index!= MAX_KEY. + */ + uint used_key_parts; QUICK_SELECT_I(); virtual ~QUICK_SELECT_I(){}; - /* - Call init() immediately after creation of quick select. if init() call - fails, reset() or get_next() must not be called. + + /* + Do post-constructor initialization. + SYNOPSIS + init() + + init() performs initializations that should have been in constructor if + it was possible to return errors from constructors. The join optimizer may + create and then delete quick selects without retrieving any rows so init() + must not contain any IO or CPU intensive code. + + If init() call fails the only valid action is to delete this quick select, + reset() and get_next() must not be called. + + RETURN + 0 OK + other Error code */ virtual int init() = 0; /* - Call reset() before first get_next call. get_next must not be called if - reset() call fails. + Initialize quick select for row retrieval. + SYNOPSIS + reset() + + reset() should be called when it is certain that row retrieval will be + necessary. This call may do heavyweight initialization like buffering first + N records etc. If reset() call fails get_next() must not be called. + + RETURN + 0 OK + other Error code */ virtual int reset(void) = 0; virtual int get_next() = 0; /* get next record to retrieve */ @@ -107,27 +141,96 @@ public: QS_TYPE_RANGE = 0, QS_TYPE_INDEX_MERGE = 1, QS_TYPE_RANGE_DESC = 2, - QS_TYPE_FULLTEXT = 3 + QS_TYPE_FULLTEXT = 3, + QS_TYPE_ROR_INTERSECT = 4, + QS_TYPE_ROR_UNION = 5, }; - /* Get type of this quick select - one of the QS_* values */ - virtual int get_type() = 0; + /* Get type of this quick select - one of the QS_TYPE_* values */ + virtual int get_type() = 0; + + /* + Initialize this quick select as a merged scan inside a ROR-union or a ROR- + intersection scan. The caller must not additionally call init() if this + function is called. + SYNOPSIS + init_ror_merged_scan() + reuse_handler quick select may use (q: psergey??) + (q: is this natural that we do it this way) + NOTES + psergey? + */ + virtual int init_ror_merged_scan(bool reuse_handler) + { DBUG_ASSERT(0); return 1; } + + /* Save ROWID of last retrieved row in file->ref. (psergey: or table->ref?) */ + virtual void save_last_pos(){}; + + /* + Append comma-separated list of keys this quick select uses to key_names; + append comma-separated list of corresponding used lengths to used_lengths. + This is used by select_describe. + */ + virtual void add_keys_and_lengths(String *key_names, + String *used_lengths)=0; + + /* + Append text representation of quick select structure (what and how is + merged) to str. The result is added to "Extra" field in EXPLAIN output. + This function is implemented only by quick selects that merge other quick + selects output and/or can produce output suitable for merging. + */ + virtual void add_info_string(String *str) {}; + /* + Return 1 if any index used by this quick select + a) uses field that is listed in passed field list or + b) is automatically updated (like a timestamp) + */ + virtual bool check_if_keys_used(List<Item> *fields); + + /* + rowid of last row retrieved by this quick select. This is used only when + doing ROR-index_merge selects + */ + byte *last_rowid; + + /* + Table record buffer used by this quick select. + Currently this is always the same as head->record[0]. psergey: check that! + */ + byte *record; +#ifndef DBUG_OFF + /* + Print quick select information to DBUG_FILE. Caller is responsible + for locking DBUG_FILE before this call and unlocking it afterwards. + */ + virtual void dbug_dump(int indent, bool verbose)= 0; +#endif }; + struct st_qsel_param; class SEL_ARG; -class QUICK_RANGE_SELECT : public QUICK_SELECT_I +/* + Quick select that does a range scan on a single key. The records are + returned in key order. +*/ +class QUICK_RANGE_SELECT : public QUICK_SELECT_I { protected: bool next,dont_free; public: int error; +protected: handler *file; - byte *record; + /* + If true, this quick select has its "own" handler object which should be + closed no later then this quick select is deleted. + */ + bool free_file; + protected: - friend void print_quick_sel_range(QUICK_RANGE_SELECT *quick, - const key_map* needed_reg); friend QUICK_RANGE_SELECT *get_quick_select_for_ref(THD *thd, TABLE *table, struct st_table_ref *ref); @@ -141,17 +244,19 @@ protected: MEM_ROOT *alloc); friend class QUICK_SELECT_DESC; friend class QUICK_INDEX_MERGE_SELECT; + friend class QUICK_ROR_INTERSECT_SELECT; DYNAMIC_ARRAY ranges; /* ordered array of range ptrs */ QUICK_RANGE **cur_range; /* current element in ranges */ QUICK_RANGE *range; - MEM_ROOT alloc; KEY_PART *key_parts; int cmp_next(QUICK_RANGE *range); int cmp_prev(QUICK_RANGE *range); bool row_in_ranges(); public: + MEM_ROOT alloc; + QUICK_RANGE_SELECT(THD *thd, TABLE *table,uint index_arg,bool no_alloc=0, MEM_ROOT *parent_alloc=NULL); ~QUICK_RANGE_SELECT(); @@ -167,7 +272,17 @@ public: int get_next(); bool reverse_sorted() { return 0; } bool unique_key_range(); + int init_ror_merged_scan(bool reuse_handler); + void save_last_pos() + { + file->position(record); + }; int get_type() { return QS_TYPE_RANGE; } + void add_keys_and_lengths(String *key_names, String *used_lengths); + void add_info_string(String *str); +#ifndef DBUG_OFF + void dbug_dump(int indent, bool verbose); +#endif }; @@ -253,6 +368,12 @@ public: bool reverse_sorted() { return false; } bool unique_key_range() { return false; } int get_type() { return QS_TYPE_INDEX_MERGE; } + void add_keys_and_lengths(String *key_names, String *used_lengths); + void add_info_string(String *str); + bool check_if_keys_used(List<Item> *fields); +#ifndef DBUG_OFF + void dbug_dump(int indent, bool verbose); +#endif bool push_quick_back(QUICK_RANGE_SELECT *quick_sel_range); @@ -263,9 +384,6 @@ public: List_iterator_fast<QUICK_RANGE_SELECT> cur_quick_it; QUICK_RANGE_SELECT* cur_quick_select; - /* last element in quick_selects list */ - QUICK_RANGE_SELECT* last_quick_select; - /* quick select that uses clustered primary key (NULL if none) */ QUICK_RANGE_SELECT* pk_quick_select; @@ -277,12 +395,120 @@ public: THD *thd; int prepare_unique(); - bool reset_called; /* used to get rows collected in Unique */ READ_RECORD read_record; }; + +/* + Rowid-Ordered Retrieval (ROR) index intersection quick select. + This quick select produces intersection of row sequences returned + by several QUICK_RANGE_SELECTs it "merges". + + All merged QUICK_RANGE_SELECTs must return rowids in rowid order. + QUICK_ROR_INTERSECT_SELECT will return rows in rowid order, too. + + All merged quick selects retrieve {rowid, covered_fields} tuples (not full + table records). + QUICK_ROR_INTERSECT_SELECT retrieves full records if it is not being used + by QUICK_ROR_INTERSECT_SELECT and all merged quick selects together don't + cover needed all fields. + + If one of the merged quick selects is a Clustered PK range scan, it is + used only to filter rowid sequence produced by other merged quick selects. +*/ + +class QUICK_ROR_INTERSECT_SELECT : public QUICK_SELECT_I +{ +public: + QUICK_ROR_INTERSECT_SELECT(THD *thd, TABLE *table, + bool retrieve_full_rows, + MEM_ROOT *parent_alloc); + ~QUICK_ROR_INTERSECT_SELECT(); + + int init(); + int reset(void); + int get_next(); + bool reverse_sorted() { return false; } + bool unique_key_range() { return false; } + int get_type() { return QS_TYPE_ROR_INTERSECT; } + void add_keys_and_lengths(String *key_names, String *used_lengths); + void add_info_string(String *str); + bool check_if_keys_used(List<Item> *fields); +#ifndef DBUG_OFF + void dbug_dump(int indent, bool verbose); +#endif + int init_ror_merged_scan(bool reuse_handler); + bool push_quick_back(QUICK_RANGE_SELECT *quick_sel_range); + + /* + Range quick selects this intersection consists of, not including + cpk_quick. + */ + List<QUICK_RANGE_SELECT> quick_selects; + + /* + Merged quick select that uses Clustered PK, if there is one. This quick + select is not used for row retrieval, it is used for row retrieval. + */ + QUICK_RANGE_SELECT *cpk_quick; + + MEM_ROOT alloc; /* Memory pool for this and merged quick selects data. */ + THD *thd; /* current thread */ + bool need_to_fetch_row; /* if true, do retrieve full table records. */ +}; + + +/* + Rowid-Ordered Retrieval index union select. + This quick select produces union of row sequences returned by several + quick select it "merges". + + All merged quick selects must return rowids in rowid order. + QUICK_ROR_UNION_SELECT will return rows in rowid order, too. + + All merged quick selects are set not to retrieve full table records. + ROR-union quick select always retrieves full records. + +*/ + +class QUICK_ROR_UNION_SELECT : public QUICK_SELECT_I +{ +public: + QUICK_ROR_UNION_SELECT(THD *thd, TABLE *table); + ~QUICK_ROR_UNION_SELECT(); + + int init(); + int reset(void); + int get_next(); + bool reverse_sorted() { return false; } + bool unique_key_range() { return false; } + int get_type() { return QS_TYPE_ROR_UNION; } + void add_keys_and_lengths(String *key_names, String *used_lengths); + void add_info_string(String *str); + bool check_if_keys_used(List<Item> *fields); +#ifndef DBUG_OFF + void dbug_dump(int indent, bool verbose); +#endif + + bool push_quick_back(QUICK_SELECT_I *quick_sel_range); + + List<QUICK_SELECT_I> quick_selects; /* Merged quick selects */ + + QUEUE queue; /* Priority queue for merge operation */ + MEM_ROOT alloc; /* Memory pool for this and merged quick selects data. */ + + THD *thd; /* current thread */ + byte *cur_rowid; /* buffer used in get_next() */ + byte *prev_rowid; /* rowid of last row returned by get_next() */ + bool have_prev_rowid; /* true if prev_rowid has valid data */ + uint rowid_length; /* table rowid length */ +private: + static int queue_cmp(void *arg, byte *val1, byte *val2); +}; + + class QUICK_SELECT_DESC: public QUICK_RANGE_SELECT { public: diff --git a/sql/sql_delete.cc b/sql/sql_delete.cc index b9769e3a940..440d404056b 100644 --- a/sql/sql_delete.cc +++ b/sql/sql_delete.cc @@ -291,10 +291,10 @@ int mysql_prepare_delete(THD *thd, TABLE_LIST *table_list, Item **conds) #define MEM_STRIP_BUF_SIZE current_thd->variables.sortbuff_size -extern "C" int refposcmp2(void* arg, const void *a,const void *b) +extern "C" int refpos_order_cmp(void* arg, const void *a,const void *b) { - /* arg is a pointer to file->ref_length */ - return memcmp(a,b, *(int*) arg); + handler *file= (handler*)arg; + return file->cmp_ref((const byte*)a, (const byte*)b); } multi_delete::multi_delete(THD *thd_arg, TABLE_LIST *dt, @@ -358,8 +358,8 @@ multi_delete::initialize_tables(JOIN *join) for (walk=walk->next ; walk ; walk=walk->next) { TABLE *table=walk->table; - *tempfiles_ptr++= new Unique (refposcmp2, - (void *) &table->file->ref_length, + *tempfiles_ptr++= new Unique (refpos_order_cmp, + (void *) table->file, table->file->ref_length, MEM_STRIP_BUF_SIZE); } diff --git a/sql/sql_select.cc b/sql/sql_select.cc index 9d6a6999591..89a08d5a4ea 100644 --- a/sql/sql_select.cc +++ b/sql/sql_select.cc @@ -8058,8 +8058,16 @@ test_if_skip_sort_order(JOIN_TAB *tab,ORDER *order,ha_rows select_limit, } else if (select && select->quick) // Range found by opt_range { - /* assume results are not ordered when index merge is used */ - if (select->quick->get_type() == QUICK_SELECT_I::QS_TYPE_INDEX_MERGE) + int quick_type= select->quick->get_type(); + /* + assume results are not ordered when index merge is used + TODO: sergeyp: Results of all index merge selects actually are ordered + by clustered PK values. + */ + + if (quick_type == QUICK_SELECT_I::QS_TYPE_INDEX_MERGE || + quick_type == QUICK_SELECT_I::QS_TYPE_ROR_UNION || + quick_type == QUICK_SELECT_I::QS_TYPE_ROR_INTERSECT) DBUG_RETURN(0); ref_key= select->quick->index; ref_key_parts= select->quick->used_key_parts; @@ -8120,9 +8128,11 @@ test_if_skip_sort_order(JOIN_TAB *tab,ORDER *order,ha_rows select_limit, */ if (!select->quick->reverse_sorted()) { + int quick_type= select->quick->get_type(); if (table->file->index_flags(ref_key) & HA_NOT_READ_PREFIX_LAST || - (select->quick->get_type() == - QUICK_SELECT_I::QS_TYPE_INDEX_MERGE)) + quick_type == QUICK_SELECT_I::QS_TYPE_INDEX_MERGE || + quick_type == QUICK_SELECT_I::QS_TYPE_ROR_INTERSECT || + quick_type == QUICK_SELECT_I::QS_TYPE_ROR_UNION) DBUG_RETURN(0); // Use filesort // ORDER BY range_key DESC @@ -10102,6 +10112,7 @@ static void select_describe(JOIN *join, bool need_tmp_table, bool need_order, select_result *result=join->result; Item *item_null= new Item_null(); CHARSET_INFO *cs= &my_charset_latin1; + int quick_type; DBUG_ENTER("select_describe"); DBUG_PRINT("info", ("Select 0x%lx, type %s, message %s", (ulong)join->select_lex, join->select_lex->type, @@ -10128,17 +10139,20 @@ static void select_describe(JOIN *join, bool need_tmp_table, bool need_order, { JOIN_TAB *tab=join->join_tab+i; TABLE *table=tab->table; - char buff[512],*buff_ptr=buff; + char buff[512]; char buff1[512], buff2[512], buff3[512]; char keylen_str_buf[64]; char derived_name[64]; + String extra(buff, sizeof(buff),cs); String tmp1(buff1,sizeof(buff1),cs); String tmp2(buff2,sizeof(buff2),cs); String tmp3(buff3,sizeof(buff3),cs); + extra.length(0); tmp1.length(0); tmp2.length(0); tmp3.length(0); + quick_type= -1; item_list.empty(); item_list.push_back(new Item_int((int32) join->select_lex->select_number)); @@ -10147,8 +10161,10 @@ static void select_describe(JOIN *join, bool need_tmp_table, bool need_order, cs)); if (tab->type == JT_ALL && tab->select && tab->select->quick) { - if (tab->select->quick->get_type() == - QUICK_SELECT_I::QS_TYPE_INDEX_MERGE) + quick_type= tab->select->quick->get_type(); + if ((quick_type == QUICK_SELECT_I::QS_TYPE_INDEX_MERGE) || + (quick_type == QUICK_SELECT_I::QS_TYPE_ROR_INTERSECT) || + (quick_type == QUICK_SELECT_I::QS_TYPE_ROR_UNION)) tab->type = JT_INDEX_MERGE; else tab->type = JT_RANGE; @@ -10169,6 +10185,7 @@ static void select_describe(JOIN *join, bool need_tmp_table, bool need_order, strlen(join_type_str[tab->type]), cs)); uint j; + /* Build "possible_keys" value and add it to item_list */ if (!tab->keys.is_clear_all()) { for (j=0 ; j < table->keys ; j++) @@ -10185,6 +10202,8 @@ static void select_describe(JOIN *join, bool need_tmp_table, bool need_order, item_list.push_back(new Item_string(tmp1.ptr(),tmp1.length(),cs)); else item_list.push_back(item_null); + + /* Build "key", "key_len", and "ref" values and add them to item_list */ if (tab->ref.key_parts) { KEY *key_info=table->key_info+ tab->ref.key; @@ -10219,48 +10238,9 @@ static void select_describe(JOIN *join, bool need_tmp_table, bool need_order, } else if (tab->select && tab->select->quick) { - if (tab->select->quick->get_type() == - QUICK_SELECT_I::QS_TYPE_INDEX_MERGE) - { - QUICK_INDEX_MERGE_SELECT *quick_imerge= - (QUICK_INDEX_MERGE_SELECT*)tab->select->quick; - QUICK_RANGE_SELECT *quick; - - List_iterator_fast<QUICK_RANGE_SELECT> it(quick_imerge-> - quick_selects); - while ((quick= it++)) - { - KEY *key_info= table->key_info + quick->index; - register uint length; - if (tmp3.length()) - tmp3.append(','); - - tmp3.append(key_info->name); - - if (tmp2.length()) - tmp2.append(','); - - length= longlong2str(quick->max_used_key_length, keylen_str_buf, - 10) - - keylen_str_buf; - - tmp2.append(keylen_str_buf, length); - } - } - else - { - KEY *key_info= table->key_info + tab->select->quick->index; - register uint length; - tmp3.append(key_info->name); - - length= longlong2str(tab->select->quick->max_used_key_length, - keylen_str_buf, 10) - - keylen_str_buf; - tmp2.append(keylen_str_buf, length); - } - - item_list.push_back(new Item_string(tmp3.ptr(),tmp3.length(),cs)); + tab->select->quick->add_keys_and_lengths(&tmp2, &tmp3); item_list.push_back(new Item_string(tmp2.ptr(),tmp2.length(),cs)); + item_list.push_back(new Item_string(tmp3.ptr(),tmp3.length(),cs)); item_list.push_back(item_null); } else @@ -10269,50 +10249,68 @@ static void select_describe(JOIN *join, bool need_tmp_table, bool need_order, item_list.push_back(item_null); item_list.push_back(item_null); } + /* Add "rows" field to item_list. */ item_list.push_back(new Item_int((longlong) (ulonglong) join->best_positions[i]. records_read, 21)); + /* Build "Extra" field and add it to item_list. */ my_bool key_read=table->key_read; if ((tab->type == JT_NEXT || tab->type == JT_CONST) && table->used_keys.is_set(tab->index)) key_read=1; - + if (quick_type == QUICK_SELECT_I::QS_TYPE_ROR_INTERSECT && + !((QUICK_ROR_INTERSECT_SELECT*)tab->select->quick)->need_to_fetch_row) + key_read=1; + if (tab->info) item_list.push_back(new Item_string(tab->info,strlen(tab->info),cs)); else { + if (quick_type == QUICK_SELECT_I::QS_TYPE_ROR_UNION || + quick_type == QUICK_SELECT_I::QS_TYPE_ROR_INTERSECT || + quick_type == QUICK_SELECT_I::QS_TYPE_INDEX_MERGE) + { + extra.append("; Using "); + tab->select->quick->add_info_string(&extra); + } if (tab->select) { if (tab->use_quick == 2) { char buf[MAX_KEY/8+1]; - sprintf(buff_ptr,"; Range checked for each record (index map: 0x%s)", - tab->keys.print(buf)); - buff_ptr=strend(buff_ptr); + extra.append("; Range checked for each record (index map: 0x"); + extra.append(tab->keys.print(buf)); + extra.append(')'); } else - buff_ptr=strmov(buff_ptr,"; Using where"); + extra.append("; Using where"); } if (key_read) - buff_ptr= strmov(buff_ptr,"; Using index"); + extra.append("; Using index"); if (table->reginfo.not_exists_optimize) - buff_ptr= strmov(buff_ptr,"; Not exists"); + extra.append("; Not exists"); if (need_tmp_table) { need_tmp_table=0; - buff_ptr= strmov(buff_ptr,"; Using temporary"); + extra.append("; Using temporary"); } if (need_order) { need_order=0; - buff_ptr= strmov(buff_ptr,"; Using filesort"); + extra.append("; Using filesort"); } if (distinct & test_all_bits(used_tables,thd->used_tables)) - buff_ptr= strmov(buff_ptr,"; Distinct"); - if (buff_ptr == buff) - buff_ptr+= 2; // Skip inital "; " - item_list.push_back(new Item_string(buff+2,(uint) (buff_ptr - buff)-2, - cs)); + extra.append("; Distinct"); + + /* Skip initial "; "*/ + const char *str= extra.ptr(); + uint32 len= extra.length(); + if (len) + { + str += 2; + len -= 2; + } + item_list.push_back(new Item_string(str, len, cs)); } // For next iteration used_tables|=table->map; diff --git a/sql/sql_select.h b/sql/sql_select.h index b8a56fda757..b2864c166ce 100644 --- a/sql/sql_select.h +++ b/sql/sql_select.h @@ -339,7 +339,7 @@ uint find_shortest_key(TABLE *table, const key_map *usable_keys); int opt_sum_query(TABLE_LIST *tables, List<Item> &all_fields,COND *conds); /* from sql_delete.cc, used by opt_range.cc */ -extern "C" int refposcmp2(void* arg, const void *a,const void *b); +extern "C" int refpos_order_cmp(void* arg, const void *a,const void *b); /* class to copying an field/item to a key struct */ diff --git a/sql/sql_test.cc b/sql/sql_test.cc index 90a43f82a94..5c467402497 100644 --- a/sql/sql_test.cc +++ b/sql/sql_test.cc @@ -182,37 +182,8 @@ TEST_join(JOIN *join) tab->select->quick_keys.print(buf)); else if (tab->select->quick) { - int quick_type= tab->select->quick->get_type(); - if ((quick_type == QUICK_SELECT_I::QS_TYPE_RANGE) || - (quick_type == QUICK_SELECT_I::QS_TYPE_RANGE_DESC)) - { - fprintf(DBUG_FILE, - " quick select used on key %s, length: %d\n", - form->key_info[tab->select->quick->index].name, - tab->select->quick->max_used_key_length); - } - else if (quick_type == QUICK_SELECT_I::QS_TYPE_INDEX_MERGE) - { - QUICK_INDEX_MERGE_SELECT *quick_imerge= - (QUICK_INDEX_MERGE_SELECT*)tab->select->quick; - QUICK_RANGE_SELECT *quick; - fprintf(DBUG_FILE, - " index_merge quick select used\n"); - - List_iterator_fast<QUICK_RANGE_SELECT> it(quick_imerge->quick_selects); - while ((quick = it++)) - { - fprintf(DBUG_FILE, - " range quick select: key %s, length: %d\n", - form->key_info[quick->index].name, - quick->max_used_key_length); - } - } - else - { - fprintf(DBUG_FILE, - " quick select of unknown nature used\n"); - } + fprintf(DBUG_FILE, " quick select used:\n"); + tab->select->quick->dbug_dump(18, false); } else VOID(fputs(" select used\n",DBUG_FILE)); diff --git a/sql/sql_update.cc b/sql/sql_update.cc index 5c6ed023485..b85ddff84f9 100644 --- a/sql/sql_update.cc +++ b/sql/sql_update.cc @@ -156,18 +156,11 @@ int mysql_update(THD *thd, } init_ftfuncs(thd, &thd->lex->select_lex, 1); /* Check if we are modifying a key that we are used to search with */ + if (select && select->quick) { - if (select->quick->get_type() != QUICK_SELECT_I::QS_TYPE_INDEX_MERGE) - { - used_index= select->quick->index; - used_key_is_modified= (!select->quick->unique_key_range() && - check_if_key_used(table,used_index,fields)); - } - else - { - used_key_is_modified= true; - } + used_key_is_modified= (!select->quick->unique_key_range() && + select->quick->check_if_keys_used(&fields)); } else if ((used_index=table->file->key_used_on_scan) < MAX_KEY) used_key_is_modified=check_if_key_used(table, used_index, fields); @@ -226,7 +219,7 @@ int mysql_update(THD *thd, if (open_cached_file(&tempfile, mysql_tmpdir,TEMP_PREFIX, DISK_BUFFER_SIZE, MYF(MY_WME))) goto err; - + /* If quick select is used, initialize it before retrieving rows. */ if (select && select->quick && select->quick->reset()) goto err; @@ -286,6 +279,9 @@ int mysql_update(THD *thd, if (handle_duplicates == DUP_IGNORE) table->file->extra(HA_EXTRA_IGNORE_DUP_KEY); + + if (select && select->quick && select->quick->reset()) + goto err; init_read_record(&info,thd,table,select,0,1); updated= found= 0; @@ -833,26 +829,7 @@ static bool safe_update_on_fly(JOIN_TAB *join_tab, List<Item> *fields) case JT_ALL: /* If range search on index */ if (join_tab->quick) - { - if (join_tab->quick->get_type() != QUICK_SELECT_I::QS_TYPE_INDEX_MERGE) - { - return !check_if_key_used(table,join_tab->quick->index,*fields); - } - else - { - QUICK_INDEX_MERGE_SELECT *qsel_imerge= - (QUICK_INDEX_MERGE_SELECT*)(join_tab->quick); - List_iterator_fast<QUICK_RANGE_SELECT> it(qsel_imerge->quick_selects); - QUICK_RANGE_SELECT *quick; - while ((quick= it++)) - { - if (check_if_key_used(table, quick->index, *fields)) - return 0; - } - return 1; - } - } - + return !join_tab->quick->check_if_keys_used(fields); /* If scanning in clustered key */ if ((table->file->table_flags() & HA_PRIMARY_KEY_IN_READ_INDEX) && table->primary_key < MAX_KEY) |