From 3600d09ab4323098676fa51c869a787fec9d42cc Mon Sep 17 00:00:00 2001 From: unknown Date: Thu, 13 May 2004 01:38:40 +0400 Subject: This is first cset for WL#1394 "Optimize index merge when all involved index ranges include only values with equal keys" The main idea is to exploit the fact that key scans for "key=const" return ordered sequences of rowids. include/my_base.h: Added HA_EXTRA_KEYREAD_PRESERVE_FIELDS flag include/my_bitmap.h: Added a couple of utility functions include/my_sys.h: Added my_conunt_bits_ushort function innobase/include/row0mysql.h: Added support for HA_EXTRA_KEYREAD_PRESERVE_FIELDS innobase/row/row0sel.c: Added support for HA_EXTRA_KEYREAD_PRESERVE_FIELDS mysys/my_bit.c: Added my_count_bits_ushort function mysys/my_bitmap.c: Added a couple of utility functions sql/ha_berkeley.cc: Added cmp_ref rowid comparison function. sql/ha_berkeley.h: Added cmp_ref rowid comparison function. sql/ha_heap.h: Added cmp_ref rowid comparison function. sql/ha_innodb.cc: Added cmp_ref rowid comparison function and support from HA_EXTRA_KEYREAD_PRESERVE_FIELDS sql/ha_innodb.h: Added cmp_ref rowid comparison function. sql/handler.h: Added cmp_ref rowid comparison function. sql/opt_range.cc: Added QUICK_ROR_{INTERSECT,UNION}_SELECT classes and related optimizer code sql/opt_range.h: Added QUICK_ROR_{INTERSECT,UNION}_SELECT classes sql/sql_delete.cc: Changed to use new ROWID comparison function also always call quick->reset() for quick selects sql/sql_select.cc: Account for new quick select types sql/sql_select.h: New, proper rowid ordering/comparison function to be used with Unique class etc. sql/sql_test.cc: Account for new quick select types sql/sql_update.cc: Account for new quick select types --- sql/ha_berkeley.cc | 25 + sql/ha_berkeley.h | 1 + sql/ha_heap.h | 7 +- sql/ha_innodb.cc | 57 ++ sql/ha_innodb.h | 1 + sql/handler.h | 5 + sql/opt_range.cc | 2565 ++++++++++++++++++++++++++++++++++++++++++++-------- sql/opt_range.h | 163 +++- sql/sql_delete.cc | 16 +- sql/sql_select.cc | 76 +- sql/sql_select.h | 2 +- sql/sql_test.cc | 33 +- sql/sql_update.cc | 41 +- 13 files changed, 2484 insertions(+), 508 deletions(-) (limited to 'sql') diff --git a/sql/ha_berkeley.cc b/sql/ha_berkeley.cc index c4735403267..3daeae160cd 100644 --- a/sql/ha_berkeley.cc +++ b/sql/ha_berkeley.cc @@ -2457,4 +2457,29 @@ ha_rows ha_berkeley::estimate_number_of_rows() return share->rows + HA_BERKELEY_EXTRA_ROWS; } +int ha_berkeley::cmp_ref(const byte *ref1, const byte *ref2) +{ + if (hidden_primary_key) + return memcmp(ref1, ref2, BDB_HIDDEN_PRIMARY_KEY_LENGTH); + + int result; + Field *field; + KEY *key_info=table->key_info+table->primary_key; + KEY_PART_INFO *key_part=key_info->key_part; + KEY_PART_INFO *end=key_part+key_info->key_parts; + + for (; key_part != end; key_part++) + { + field= key_part->field; + result= field->pack_cmp((const char*)ref1, (const char*)ref2, + key_part->length); + if (result) + return result; + ref1 += field->packed_col_length((const char*)ref1, key_part->length); + ref2 += field->packed_col_length((const char*)ref2, key_part->length); + } + + return 0; +} + #endif /* HAVE_BERKELEY_DB */ diff --git a/sql/ha_berkeley.h b/sql/ha_berkeley.h index f225c24eaf7..f5f6194ee77 100644 --- a/sql/ha_berkeley.h +++ b/sql/ha_berkeley.h @@ -168,6 +168,7 @@ class ha_berkeley: public handler void print_error(int error, myf errflag); uint8 table_cache_type() { return HA_CACHE_TBL_TRANSACT; } bool primary_key_is_clustered() { return true; } + int cmp_ref(const byte *ref1, const byte *ref2); }; extern bool berkeley_skip, berkeley_shared_data; diff --git a/sql/ha_heap.h b/sql/ha_heap.h index c369c7029b4..80cd143eec3 100644 --- a/sql/ha_heap.h +++ b/sql/ha_heap.h @@ -93,5 +93,10 @@ class ha_heap: public handler THR_LOCK_DATA **store_lock(THD *thd, THR_LOCK_DATA **to, enum thr_lock_type lock_type); - + int cmp_ref(const byte *ref1, const byte *ref2) + { + HEAP_PTR ptr1=*(HEAP_PTR*)ref1; + HEAP_PTR ptr2=*(HEAP_PTR*)ref2; + return ptr1 < ptr2? -1 : (ptr1 > ptr2? 1 : 0); + } }; diff --git a/sql/ha_innodb.cc b/sql/ha_innodb.cc index 8bd43ea92cd..da46dc799e9 100644 --- a/sql/ha_innodb.cc +++ b/sql/ha_innodb.cc @@ -711,6 +711,8 @@ ha_innobase::init_table_handle_for_HANDLER(void) prebuilt->read_just_key = FALSE; prebuilt->used_in_HANDLER = TRUE; + + prebuilt->keep_other_fields_on_keyread = FALSE; } /************************************************************************* @@ -4454,9 +4456,11 @@ ha_innobase::extra( if (prebuilt->blob_heap) { row_mysql_prebuilt_free_blob_heap(prebuilt); } + prebuilt->keep_other_fields_on_keyread = 0; prebuilt->read_just_key = 0; break; case HA_EXTRA_RESET_STATE: + prebuilt->keep_other_fields_on_keyread = 0; prebuilt->read_just_key = 0; break; case HA_EXTRA_NO_KEYREAD: @@ -4468,6 +4472,9 @@ ha_innobase::extra( case HA_EXTRA_KEYREAD: prebuilt->read_just_key = 1; break; + case HA_EXTRA_KEYREAD_PRESERVE_FIELDS: + prebuilt->keep_other_fields_on_keyread = 1; + break; default:/* Do nothing */ ; } @@ -4526,6 +4533,7 @@ ha_innobase::start_stmt( prebuilt->sql_stat_start = TRUE; prebuilt->hint_no_need_to_fetch_extra_cols = TRUE; prebuilt->read_just_key = 0; + prebuilt->keep_other_fields_on_keyread = FALSE; if (!prebuilt->mysql_has_locked) { /* This handle is for a temporary table created inside @@ -4604,6 +4612,7 @@ ha_innobase::external_lock( prebuilt->hint_no_need_to_fetch_extra_cols = TRUE; prebuilt->read_just_key = 0; + prebuilt->keep_other_fields_on_keyread = FALSE; if (lock_type == F_WRLCK) { @@ -5000,4 +5009,52 @@ ha_innobase::get_auto_increment() return(nr); } +int +ha_innobase::cmp_ref( + const mysql_byte *ref1, + const mysql_byte *ref2) +{ + row_prebuilt_t* prebuilt = (row_prebuilt_t*) innobase_prebuilt; + enum_field_types mysql_type; + Field* field; + int result; + + if (prebuilt->clust_index_was_generated) + return memcmp(ref1, ref2, DATA_ROW_ID_LEN); + + /* Do type-aware comparison of Primary Key members. PK members + are always NOT NULL, so no checks for NULL are performed */ + KEY_PART_INFO *key_part= table->key_info[table->primary_key].key_part; + KEY_PART_INFO *key_part_end= + key_part + table->key_info[table->primary_key].key_parts; + for (; key_part != key_part_end; ++key_part) { + field = key_part->field; + mysql_type = field->type(); + if (mysql_type == FIELD_TYPE_TINY_BLOB + || mysql_type == FIELD_TYPE_MEDIUM_BLOB + || mysql_type == FIELD_TYPE_BLOB + || mysql_type == FIELD_TYPE_LONG_BLOB) { + + ut_a(!ref1[1]); + ut_a(!ref2[1]); + byte len1= *ref1; + byte len2= *ref2; + ref1 += 2; + ref2 += 2; + result = + ((Field_blob*)field)->cmp((const char*)ref1, len1, + (const char*)ref2, len2); + } else { + result = + field->cmp((const char*)ref1, (const char*)ref2); + } + + if (result) + return result; + ref1 += key_part->length; + ref2 += key_part->length; + } + return 0; +} + #endif /* HAVE_INNOBASE_DB */ diff --git a/sql/ha_innodb.h b/sql/ha_innodb.h index c305a019fcd..ff224370df1 100644 --- a/sql/ha_innodb.h +++ b/sql/ha_innodb.h @@ -188,6 +188,7 @@ class ha_innobase: public handler longlong get_auto_increment(); uint8 table_cache_type() { return HA_CACHE_TBL_ASKTRANSACT; } bool primary_key_is_clustered() { return true; } + int cmp_ref(const byte *ref1, const byte *ref2); }; extern bool innodb_skip; diff --git a/sql/handler.h b/sql/handler.h index 4a8abf5f131..386b9fa4d7d 100644 --- a/sql/handler.h +++ b/sql/handler.h @@ -382,6 +382,11 @@ public: false otherwise */ virtual bool primary_key_is_clustered() { return false; } + + virtual int cmp_ref(const byte *ref1, const byte *ref2) + { + return memcmp(ref1, ref2, ref_length); + } }; /* Some extern variables used with handlers */ diff --git a/sql/opt_range.cc b/sql/opt_range.cc index fa1b80f007e..ad48f3050c0 100644 --- a/sql/opt_range.cc +++ b/sql/opt_range.cc @@ -23,6 +23,19 @@ */ +/* + Classes in this file are used in the following way: + 1. For a selection condition a tree of SEL_IMERGE/SEL_TREE/SEL_ARG objects + is created. #of rows in table and index statistics are ignored at this + step. + 2. Created SEL_TREE and index stats data are used to construct a + TABLE_READ_PLAN-derived object (TRP_*). Several 'candidate' table read + plans may be created. + 3. The least expensive table read plan is used to create a tree of + QUICK_SELECT_I-derived objects which are later used for row retrieval. + QUICK_RANGEs are also created in this step. +*/ + #ifdef __GNUC__ #pragma implementation // gcc: Class implementation #endif @@ -167,6 +180,22 @@ public: min_value=arg->max_value; min_flag=arg->max_flag & NEAR_MAX ? 0 : NEAR_MIN; } + void store_min(uint length,char **min_key,uint min_key_flag) + { + if ((min_flag & GEOM_FLAG) || + (!(min_flag & NO_MIN_RANGE) && + !(min_key_flag & (NO_MIN_RANGE | NEAR_MIN)))) + { + if (maybe_null && *min_value) + { + **min_key=1; + bzero(*min_key+1,length); + } + else + memcpy(*min_key,min_value,length+(int) maybe_null); + (*min_key)+= length+(int) maybe_null; + } + } void store(uint length,char **min_key,uint min_key_flag, char **max_key, uint max_key_flag) { @@ -280,8 +309,21 @@ public: bzero((char*) keys,sizeof(keys)); } SEL_ARG *keys[MAX_KEY]; - key_map keys_map; /* bitmask of non-NULL elements in keys */ - List merges; /* possible ways to read rows using index_merge */ + key_map keys_map; /* bitmask of non-NULL elements in keys */ + + /* + Possible ways to read rows using index_merge. The list is non-empty only + if type==KEY. Currently can be non empty only if keys_map.is_clear_all(). + */ + List merges; + + /* The members below are filled/used only after get_mm_tree is done */ + key_map ror_scans_map; /* bitmask of ROR scan-able elements in keys */ + uint n_ror_scans; + + struct st_ror_scan_info **ror_scans; /* list of ROR key scans */ + struct st_ror_scan_info **ror_scans_end; /* last ROR scan */ + /* Note that #records for each key scan is stored in table->quick_rows */ }; @@ -291,22 +333,44 @@ typedef struct st_qsel_param { KEY_PART *key_parts,*key_parts_end,*key[MAX_KEY]; MEM_ROOT *mem_root; table_map prev_tables,read_tables,current_table; - uint baseflag, keys, max_key_part, range_count; - uint real_keynr[MAX_KEY]; + uint baseflag, max_key_part, range_count; + + uint keys; /* number of keys used in the query */ + + /* used_key_no -> table_key_no translation table */ + uint real_keynr[MAX_KEY]; + char min_key[MAX_KEY_LENGTH+MAX_FIELD_WIDTH], max_key[MAX_KEY_LENGTH+MAX_FIELD_WIDTH]; bool quick; // Don't calulate possible keys + uint fields_bitmap_size; + MY_BITMAP needed_fields; /* bitmask of fields needed by the query */ + + key_map *needed_reg; /* ptr to SQL_SELECT::needed_reg */ + uint *imerge_cost_buff; /* buffer for index_merge cost estimates */ uint imerge_cost_buff_size; /* size of the buffer */ + + bool is_ror_scan; /* true if last checked tree->key can be used for ROR-scan */ } PARAM; +class TABLE_READ_PLAN; + class TRP_RANGE; + class TRP_ROR_INTERSECT; + class TRP_ROR_UNION; + class TRP_ROR_INDEX_MERGE; + +struct st_ror_scan_info; + static SEL_TREE * get_mm_parts(PARAM *param,Field *field, Item_func::Functype type,Item *value, Item_result cmp_type); static SEL_ARG *get_mm_leaf(PARAM *param,Field *field,KEY_PART *key_part, Item_func::Functype type,Item *value); static SEL_TREE *get_mm_tree(PARAM *param,COND *cond); + +static bool is_key_scan_ror(PARAM *param, uint keynr, uint8 nparts); static ha_rows check_quick_select(PARAM *param,uint index,SEL_ARG *key_tree); static ha_rows check_quick_keys(PARAM *param,uint index,SEL_ARG *key_tree, char *min_key,uint min_key_flag, @@ -315,25 +379,37 @@ static ha_rows check_quick_keys(PARAM *param,uint index,SEL_ARG *key_tree, QUICK_RANGE_SELECT *get_quick_select(PARAM *param,uint index, SEL_ARG *key_tree, MEM_ROOT *alloc = NULL); -static int get_quick_select_params(SEL_TREE *tree, PARAM *param, - key_map& needed_reg, - bool index_read_can_be_used, - double *read_time, - ha_rows *records, - SEL_ARG*** key_to_read); +static TRP_RANGE *get_key_scans_params(PARAM *param, SEL_TREE *tree, + bool index_read_must_be_used, + double read_time); +static +TRP_ROR_INTERSECT *get_best_ror_intersect(const PARAM *param, SEL_TREE *tree, + bool force_index_only, + double read_time, + bool *are_all_covering); +static +TRP_ROR_INTERSECT *get_best_covering_ror_intersect(PARAM *param, + SEL_TREE *tree, + double read_time); +static +TABLE_READ_PLAN *get_best_disjunct_quick(PARAM *param, SEL_IMERGE *imerge, + double read_time); static int get_index_merge_params(PARAM *param, key_map& needed_reg, SEL_IMERGE *imerge, double *read_time, ha_rows* imerge_rows); -inline double get_index_only_read_time(PARAM* param, ha_rows records, +inline double get_index_only_read_time(const PARAM* param, ha_rows records, int keynr); #ifndef DBUG_OFF -static void print_quick_sel_imerge(QUICK_INDEX_MERGE_SELECT *quick, - const key_map *needed_reg); -void print_quick_sel_range(QUICK_RANGE_SELECT *quick, - const key_map *needed_reg); - +static void print_sel_tree(PARAM *param, SEL_TREE *tree, key_map *tree_map, + const char *msg); +static void print_ror_scans_arr(TABLE *table, const char *msg, + struct st_ror_scan_info **start, + struct st_ror_scan_info **end); +static void print_rowid(byte* val, int len); +static void print_quick(QUICK_SELECT_I *quick, const key_map *needed_reg); #endif + static SEL_TREE *tree_and(PARAM *param,SEL_TREE *tree1,SEL_TREE *tree2); static SEL_TREE *tree_or(PARAM *param,SEL_TREE *tree1,SEL_TREE *tree2); static SEL_ARG *sel_add(SEL_ARG *key1,SEL_ARG *key2); @@ -629,7 +705,7 @@ QUICK_SELECT_I::QUICK_SELECT_I() QUICK_RANGE_SELECT::QUICK_RANGE_SELECT(THD *thd, TABLE *table, uint key_nr, bool no_alloc, MEM_ROOT *parent_alloc) - :dont_free(0),error(0),cur_range(NULL),range(0) + :dont_free(0),error(0),free_file(0),cur_range(NULL),range(0) { index= key_nr; head= table; @@ -654,12 +730,22 @@ int QUICK_RANGE_SELECT::init() QUICK_RANGE_SELECT::~QUICK_RANGE_SELECT() { + DBUG_ENTER("QUICK_RANGE_SELECT::~QUICK_RANGE_SELECT"); if (!dont_free) { file->index_end(); - delete_dynamic(&ranges); /* ranges are allocated in alloc */ - free_root(&alloc,MYF(0)); - } + file->extra(HA_EXTRA_NO_KEYREAD); + delete_dynamic(&ranges); /* ranges are allocated in alloc */ + if (free_file) + { + DBUG_PRINT("info", ("Freeing separate handler %p (free=%d)", file, + free_file)); + file->reset(); + file->close(); + } + free_root(&alloc,MYF(0)); + } + DBUG_VOID_RETURN; } @@ -717,6 +803,257 @@ QUICK_INDEX_MERGE_SELECT::~QUICK_INDEX_MERGE_SELECT() free_root(&alloc,MYF(0)); } + +QUICK_ROR_INTERSECT_SELECT::QUICK_ROR_INTERSECT_SELECT(THD *thd_param, + TABLE *table, + bool retrieve_full_rows, + MEM_ROOT *parent_alloc) + : cpk_quick(NULL), thd(thd_param), reset_called(false), + need_to_fetch_row(retrieve_full_rows) +{ + index= MAX_KEY; + head= table; + record= head->record[0]; + if (!parent_alloc) + init_sql_alloc(&alloc,1024,0); + else + bzero(&alloc, sizeof(MEM_ROOT)); + last_rowid= (byte*)alloc_root(parent_alloc? parent_alloc : &alloc, + head->file->ref_length); +} + +int QUICK_ROR_INTERSECT_SELECT::init() +{ + /* Check if last_rowid was allocated in ctor */ + return !last_rowid; +} + + +/* + Init this quick select to be a ROR child scan. + NOTE + QUICK_ROR_INTERSECT_SELECT::reset() may choose not to call this function + but reuse its handler object for doing one of range scans. It duplicates + a part of this function' code. + RETURN + 0 ROR child scan initialized, ok to use. + 1 error +*/ + +int QUICK_RANGE_SELECT::init_ror_child_scan(bool reuse_handler) +{ + handler *save_file= file; + DBUG_ENTER("QUICK_RANGE_SELECT::init_ror_child_scan"); + + if (reuse_handler) + { + DBUG_PRINT("info", ("Reusing handler %p", file)); + if (file->extra(HA_EXTRA_KEYREAD) || + file->extra(HA_EXTRA_RETRIEVE_ALL_COLS) | + init() || reset()) + { + DBUG_RETURN(1); + } + else + DBUG_RETURN(0); + } + + /* Create a separate handler object for this quick select */ + if (free_file) + { + /* already have own 'handler' object. */ + DBUG_RETURN(0); + } + + if (!(file= get_new_handler(head, head->db_type))) + goto failure; + DBUG_PRINT("info", ("Allocated new handler %p", file)); + if (file->ha_open(head->path, head->db_stat, HA_OPEN_IGNORE_IF_LOCKED)) + { + /* Caller will free the memory */ + goto failure; + } + + if (file->extra(HA_EXTRA_KEYREAD) || + file->extra(HA_EXTRA_RETRIEVE_ALL_COLS) || + init() || reset()) + { + file->close(); + goto failure; + } + free_file= true; + last_rowid= file->ref; + DBUG_RETURN(0); + +failure: + file= save_file; + DBUG_RETURN(1); +} + +int QUICK_ROR_INTERSECT_SELECT::init_ror_child_scan(bool reuse_handler) +{ + List_iterator_fast quick_it(quick_selects); + QUICK_RANGE_SELECT* quick; + DBUG_ENTER("QUICK_ROR_INTERSECT_SELECT::init_ror_child_scan"); + + /* Initialize all merged "children" quick selects */ + quick_it.rewind(); + DBUG_ASSERT(!(need_to_fetch_row && !reuse_handler)); + if (!need_to_fetch_row && reuse_handler) + { + quick= quick_it++; + /* + There is no use for this->file. Reuse it for first of merged range + selects. + */ + if (quick->init_ror_child_scan(true)) + DBUG_RETURN(1); + quick->file->extra(HA_EXTRA_KEYREAD_PRESERVE_FIELDS); + } + while((quick= quick_it++)) + { + if (quick->init_ror_child_scan(false)) + DBUG_RETURN(1); + quick->file->extra(HA_EXTRA_KEYREAD_PRESERVE_FIELDS); + /* Share the same record structure in intersect*/ + quick->record= head->record[0]; + } + + if (need_to_fetch_row && head->file->rnd_init()) + { + DBUG_PRINT("error", ("ROR index_merge rnd_init call failed")); + DBUG_RETURN(1); + } + DBUG_RETURN(0); +} + +int QUICK_ROR_INTERSECT_SELECT::reset() +{ + int result; + DBUG_ENTER("QUICK_ROR_INTERSECT_SELECT::reset"); + result= init_ror_child_scan(true); + DBUG_RETURN(result); +} + +bool +QUICK_ROR_INTERSECT_SELECT::push_quick_back(QUICK_RANGE_SELECT *quick) +{ + bool res; + if (head->file->primary_key_is_clustered() && + quick->index == head->primary_key) + { + cpk_quick= quick; + res= 0; + } + else + res= quick_selects.push_back(quick); + return res; +} + +QUICK_ROR_INTERSECT_SELECT::~QUICK_ROR_INTERSECT_SELECT() +{ + DBUG_ENTER("QUICK_ROR_INTERSECT_SELECT::~QUICK_ROR_INTERSECT_SELECT"); + quick_selects.delete_elements(); + delete cpk_quick; + free_root(&alloc,MYF(0)); + DBUG_VOID_RETURN; +} + +QUICK_ROR_UNION_SELECT::QUICK_ROR_UNION_SELECT(THD *thd_param, + TABLE *table) + : thd(thd_param), reset_called(false) +{ + index= MAX_KEY; + head= table; + rowid_length= table->file->ref_length; + record= head->record[0]; + init_sql_alloc(&alloc, thd->variables.range_alloc_block_size, 0); + my_pthread_setspecific_ptr(THR_MALLOC,&alloc); +} + +int QUICK_ROR_UNION_SELECT::init() +{ + if (init_queue(&queue, quick_selects.elements, 0, + false , QUICK_ROR_UNION_SELECT::queue_cmp, + (void*) this)) + { + bzero(&queue, sizeof(QUEUE)); + return 1; + } + + if (!(cur_rowid= (byte*)alloc_root(&alloc, 2*head->file->ref_length))) + return 1; + prev_rowid= cur_rowid + head->file->ref_length; + return 0; +} + +/* + Comparison function to be used by priority queue. + SYNPOSIS + QUICK_ROR_UNION_SELECT::queue_cmp() + arg Pointer to QUICK_ROR_UNION_SELECT + val1 First merged select + val2 Second merged select +*/ +int QUICK_ROR_UNION_SELECT::queue_cmp(void *arg, byte *val1, byte *val2) +{ + QUICK_ROR_UNION_SELECT *self= (QUICK_ROR_UNION_SELECT*)arg; + return self->head->file->cmp_ref(((QUICK_SELECT_I*)val1)->last_rowid, + ((QUICK_SELECT_I*)val2)->last_rowid); +} + +int QUICK_ROR_UNION_SELECT::reset() +{ + QUICK_SELECT_I* quick; + int error; + DBUG_ENTER("QUICK_ROR_UNION_SELECT::reset"); + have_prev_rowid= false; + /* + Initialize scans for merged quick selects and put all merged quick + selects into the queue. + */ + List_iterator_fast it(quick_selects); + while ((quick= it++)) + { + if (quick->init_ror_child_scan(false)) + DBUG_RETURN(1); + if ((error= quick->get_next())) + { + if (error == HA_ERR_END_OF_FILE) + continue; + else + DBUG_RETURN(error); + } + quick->save_last_pos(); + queue_insert(&queue, (byte*)quick); + } + + if (head->file->rnd_init()) + { + DBUG_PRINT("error", ("ROR index_merge rnd_init call failed")); + DBUG_RETURN(1); + } + + DBUG_RETURN(0); +} + + +bool +QUICK_ROR_UNION_SELECT::push_quick_back(QUICK_SELECT_I *quick_sel_range) +{ + return quick_selects.push_back(quick_sel_range); +} + +QUICK_ROR_UNION_SELECT::~QUICK_ROR_UNION_SELECT() +{ + DBUG_ENTER("QUICK_ROR_UNION_SELECT::~QUICK_ROR_UNION_SELECT"); + delete_queue(&queue); + quick_selects.delete_elements(); + free_root(&alloc,MYF(0)); + DBUG_VOID_RETURN; +} + + QUICK_RANGE::QUICK_RANGE() :min_key(0),max_key(0),min_length(0),max_length(0), flag(NO_MIN_RANGE | NO_MAX_RANGE) @@ -879,6 +1216,136 @@ SEL_ARG *SEL_ARG::clone_tree() return root; } + +/* + Table rows retrieval plan. Range optimizer creates QUICK_SELECT_I-derived + objects from table read plans. +*/ +class TABLE_READ_PLAN +{ +public: + /* + Plan read cost, with or without cost of full row retrieval, depending + on plan creation parameters. + */ + double read_cost; + ha_rows records; /* estimate of #rows to be examined */ + + bool is_ror; + /* Create quck select for this plan */ + virtual QUICK_SELECT_I *make_quick(PARAM *param, + bool retrieve_full_rows, + MEM_ROOT *parent_alloc=NULL) = 0; + + /* Table read plans are allocated on MEM_ROOT and must not be deleted */ + static void *operator new(size_t size, MEM_ROOT *mem_root) + { return (void*) alloc_root(mem_root, (uint) size); } + static void operator delete(void *ptr,size_t size) {} +}; + +class TRP_ROR_INTERSECT; +class TRP_ROR_UNION; +class TRP_INDEX_MERGE; + + +class TRP_RANGE : public TABLE_READ_PLAN +{ +public: + SEL_ARG *key; + uint key_idx; /* key number in param->keys */ + TRP_RANGE(SEL_ARG *key_arg, uint idx_arg) + : key(key_arg), key_idx(idx_arg) + {} + + QUICK_SELECT_I *make_quick(PARAM *param, bool retrieve_full_rows, + MEM_ROOT *parent_alloc) + { + DBUG_ENTER("TRP_RANGE::make_quick"); + QUICK_RANGE_SELECT *quick; + /* ignore retrieve_full_rows there as it is set/used elsewhere */ + if ((quick= get_quick_select(param, key_idx, key, parent_alloc))) + { + quick->records= records; + quick->read_time= read_cost; + } + DBUG_RETURN(quick); + } +}; + +class TRP_ROR_INTERSECT : public TABLE_READ_PLAN +{ +public: + QUICK_SELECT_I *make_quick(PARAM *param, bool retrieve_full_rows, + MEM_ROOT *parent_alloc); + struct st_ror_scan_info **first_scan; + struct st_ror_scan_info **last_scan; + bool is_covering; /* true if no row retrieval phase is necessary */ + double index_scan_costs; +}; + +/* + ROR-union is currently never covering. +*/ +class TRP_ROR_UNION : public TABLE_READ_PLAN +{ +public: + QUICK_SELECT_I *make_quick(PARAM *param, bool retrieve_full_rows, + MEM_ROOT *parent_alloc); + TABLE_READ_PLAN **first_ror; + TABLE_READ_PLAN **last_ror; + +}; + +class TRP_INDEX_MERGE : public TABLE_READ_PLAN +{ +public: + QUICK_SELECT_I *make_quick(PARAM *param, bool retrieve_full_rows, + MEM_ROOT *parent_alloc); + TRP_RANGE **range_scans; + TRP_RANGE **range_scans_end; +}; + + +/* + Fill param->needed_fields with bitmap of fields used in the query + NOTE + Do not put clustered PK members into it as they are implicitly present in + all keys. +*/ + +static int fill_used_fields_bitmap(PARAM *param) +{ + TABLE *table= param->table; + param->fields_bitmap_size= (table->fields/8 + 1); + uchar *tmp; + uint pk; + if (!(tmp= (uchar*)alloc_root(param->mem_root,param->fields_bitmap_size)) || + bitmap_init(¶m->needed_fields, tmp, param->fields_bitmap_size*8, + false)) + return 1; + + bitmap_clear_all(¶m->needed_fields); + for (uint i= 0; i < table->fields; i++) + { + if (param->thd->query_id == table->field[i]->query_id) + bitmap_set_bit(¶m->needed_fields, i+1); + } + + pk= param->table->primary_key; + if (param->table->file->primary_key_is_clustered() && pk != MAX_KEY) + { + KEY_PART_INFO *key_part= param->table->key_info[pk].key_part; + KEY_PART_INFO *key_part_end= key_part + + param->table->key_info[pk].key_parts; + for(;key_part != key_part_end; ++key_part) + { + bitmap_clear_bit(¶m->needed_fields, key_part->fieldnr); + } + } + return 0; +} + + /* Test if a key can be used in different ranges @@ -887,11 +1354,12 @@ SEL_ARG *SEL_ARG::clone_tree() limit, force_quick_range) Updates the following in the select parameter: - needed_reg - Bits for keys with may be used if all prev regs are read - quick - Parameter to use when reading records. + needed_reg - Bits for keys with may be used if all prev regs are read + quick - Parameter to use when reading records. + In the table struct the following information is updated: - quick_keys - Which keys can be used - quick_rows - How many rows the key matches + quick_keys - Which keys can be used + quick_rows - How many rows the key matches RETURN VALUES -1 if impossible select @@ -910,7 +1378,6 @@ int SQL_SELECT::test_quick_select(THD *thd, key_map keys_to_use, uint basflag; uint idx; double scan_time; - QUICK_INDEX_MERGE_SELECT *quick_imerge= NULL; DBUG_ENTER("test_quick_select"); DBUG_PRINT("enter",("keys_to_use: %lu prev_tables: %lu const_tables: %lu", keys_to_use.to_ulonglong(), (ulong) prev_tables, @@ -956,13 +1423,17 @@ int SQL_SELECT::test_quick_select(THD *thd, key_map keys_to_use, param.table=head; param.keys=0; param.mem_root= &alloc; + param.needed_reg= &needed_reg; + param.imerge_cost_buff_size= 0; + thd->no_errors=1; // Don't warn about NULL init_sql_alloc(&alloc, thd->variables.range_alloc_block_size, 0); if (!(param.key_parts = (KEY_PART*) alloc_root(&alloc, sizeof(KEY_PART)* - head->key_parts))) + head->key_parts)) + || fill_used_fields_bitmap(¶m)) { thd->no_errors=0; free_root(&alloc,MYF(0)); // Return memory & allocator @@ -971,7 +1442,11 @@ int SQL_SELECT::test_quick_select(THD *thd, key_map keys_to_use, key_parts= param.key_parts; old_root=my_pthread_getspecific_ptr(MEM_ROOT*,THR_MALLOC); my_pthread_setspecific_ptr(THR_MALLOC,&alloc); - + + /* + Make an array with description of all key parts of all table keys. + This is used in get_mm_parts function. + */ for (idx=0 ; idx < head->keys ; idx++) { if (!keys_to_use.is_set(idx)) @@ -1001,55 +1476,74 @@ int SQL_SELECT::test_quick_select(THD *thd, key_map keys_to_use, { if (tree->type == SEL_TREE::IMPOSSIBLE) { - records=0L; // Return -1 from this function + records=0L; // Return -1 from this function read_time= (double) HA_POS_ERROR; } else if (tree->type == SEL_TREE::KEY || tree->type == SEL_TREE::KEY_SMALLER) { - /* + TABLE_READ_PLAN *best_trp; + /* It is possible to use a quick select (but maybe it would be slower than 'all' table scan). + Btw, tree type SEL_TREE::INDEX_MERGE was not introduced + intentionally. */ - SEL_ARG **best_key= 0; - ha_rows found_records; - double found_read_time= read_time; - - if (!get_quick_select_params(tree, ¶m, needed_reg, false, - &found_read_time, &found_records, - &best_key)) + if (tree->merges.is_empty()) { + double best_read_time= read_time; + TRP_ROR_INTERSECT *new_trp; + bool can_build_covering= false; + + /* Get best 'range' plan and prepare data for making other plans */ + if ((best_trp= get_key_scans_params(¶m, tree, false, + best_read_time))) + best_read_time= best_trp->read_cost; + /* - Ok, quick select is better than 'all' table scan and we have its - parameters, so construct it. + Simultaneous key scans and row deletes on several handler + objects are not allowed so don't use ROR-intersection for + table deletes. */ - read_time= found_read_time; - records= found_records; - - if ((quick= get_quick_select(¶m,(uint) (best_key-tree->keys), - *best_key)) && (!quick->init())) + if (thd->lex->sql_command != SQLCOM_DELETE) { - quick->records= records; - quick->read_time= read_time; + /* + Get best non-covering ROR-intersection plan and prepare data for + building covering ROR-intersection + */ + if ((new_trp= get_best_ror_intersect(¶m, tree, false, + best_read_time, + &can_build_covering))) + { + best_trp= new_trp; + best_read_time= best_trp->read_cost; + } + + /* + Try constructing covering ROR-intersect only if it looks possible + and worth doing. + */ + if (new_trp && !new_trp->is_covering && can_build_covering && + (new_trp= get_best_covering_ror_intersect(¶m, tree, + best_read_time))) + best_trp= new_trp; } } - - /* - Btw, tree type SEL_TREE::INDEX_MERGE was not introduced - intentionally. - */ - - /* If no range select could be built, try using index_merge. */ - if (!quick && !tree->merges.is_empty()) + else { + /* Try creating index_merge/ROR-union scan. */ + SEL_IMERGE *imerge; + TABLE_READ_PLAN *best_conj_trp= NULL, *new_conj_trp; + LINT_INIT(new_conj_trp); /* no empty index_merge lists possible */ DBUG_PRINT("info",("No range reads possible," " trying to construct index_merge")); - SEL_IMERGE *imerge; - SEL_IMERGE *min_imerge= NULL; - double min_imerge_read_time; - ha_rows min_imerge_records; - LINT_INIT(min_imerge_records); // Protected by min_imerge + /* + Calculate cost of 'all'+index_only scan if it is possible. + It is possible that table can be read in two ways: + a) 'all' + index_only + b) index_merge without index_only. + */ if (!head->used_keys.is_clear_all()) { int key_for_use= find_shortest_key(head, &head->used_keys); @@ -1058,104 +1552,39 @@ int SQL_SELECT::test_quick_select(THD *thd, key_map keys_to_use, read_time = get_index_only_read_time(¶m, total_table_records, key_for_use); DBUG_PRINT("info", - ("'all' scan will be using key %d, read time %g", - key_for_use, read_time)); + ("'all'+'using index' scan will be using key %d, " + "read time %g", key_for_use, read_time)); } - - min_imerge_read_time=read_time; - /* - Ok, read_time contains best 'all' read time. - Now look for index_merge with cost < read_time - */ + List_iterator_fast it(tree->merges); while ((imerge= it++)) { - if (!get_index_merge_params(¶m, needed_reg, imerge, - &min_imerge_read_time, - &min_imerge_records)) - min_imerge= imerge; - } - - if (!min_imerge) - goto end_free; - - records= min_imerge_records; - - /* Ok, using index_merge is the best option, so construct it. */ - if (!(quick= quick_imerge= new QUICK_INDEX_MERGE_SELECT(thd, head))) - goto end_free; - - quick->records= min_imerge_records; - quick->read_time= min_imerge_read_time; - - my_pthread_setspecific_ptr(THR_MALLOC, &quick_imerge->alloc); - - QUICK_RANGE_SELECT *new_quick; - for (SEL_TREE **ptree = min_imerge->trees; - ptree != min_imerge->trees_next; - ptree++) - { - SEL_ARG **tree_best_key= - min_imerge->best_keys[ptree - min_imerge->trees]; - if ((new_quick= get_quick_select(¶m, - (uint)(tree_best_key- - (*ptree)->keys), - *tree_best_key, - &quick_imerge->alloc))) - { - new_quick->records= min_imerge_records; - new_quick->read_time= min_imerge_read_time; - /* - QUICK_RANGE_SELECT::QUICK_RANGE_SELECT leaves THR_MALLOC - pointing to its allocator, restore it back. - */ - quick_imerge->last_quick_select= new_quick; - - if (quick_imerge->push_quick_back(new_quick)) - { - delete new_quick; - delete quick; - quick= quick_imerge= NULL; - goto end_free; - } - } - else - { - delete quick; - quick= quick_imerge= NULL; - goto end_free; - } + new_conj_trp= get_best_disjunct_quick(¶m, imerge, read_time); + if (!best_conj_trp || (new_conj_trp && new_conj_trp->read_cost < + best_conj_trp->read_cost)) + best_conj_trp= new_conj_trp; } + best_trp= best_conj_trp; + } - free_root(&alloc,MYF(0)); - my_pthread_setspecific_ptr(THR_MALLOC,old_root); - if (quick->init()) + my_pthread_setspecific_ptr(THR_MALLOC, old_root); + if (best_trp) + { + records= best_trp->records; + if (!(quick= best_trp->make_quick(¶m, true)) || quick->init()) { delete quick; - quick= quick_imerge= NULL; - DBUG_PRINT("error", - ("Failed to allocate index merge structures," - "falling back to full scan.")); + quick= NULL; } - goto end; - } + } } } -end_free: + my_pthread_setspecific_ptr(THR_MALLOC, old_root); free_root(&alloc,MYF(0)); // Return memory & allocator - my_pthread_setspecific_ptr(THR_MALLOC,old_root); -end: thd->no_errors=0; } - DBUG_EXECUTE("info", - { - if (quick_imerge) - print_quick_sel_imerge(quick_imerge, &needed_reg); - else - print_quick_sel_range((QUICK_RANGE_SELECT*)quick, &needed_reg); - } - ); + DBUG_EXECUTE("info", print_quick(quick, &needed_reg);); /* Assume that if the user is using 'limit' we will only need to scan @@ -1165,27 +1594,73 @@ end: } -/* - Calculate index merge cost and save parameters for its construction. - - SYNOPSIS - get_index_merge_params() - param in parameter with structure. - needed_reg in/out needed_reg from this SQL_SELECT - imerge in index_merge description structure - read_time in/out in: cost of an existing way to read a table - out: cost of index merge - imerge_rows out pessimistic estimate of # of rows to be retrieved - +/* + Get cost of 'sweep' full row retrieveal of #records rows. RETURN - 0 Cost of this index_merge is less than passed *read_time, - *imerge_rows and *read_time contain new index_merge parameters. - 1 Cost of this index_merge is more than *read_time, - *imerge_rows and *read_time are not modified. - -1 error + 0 - ok + 1 - sweep is more expensive then full table scan. +*/ - NOTES - index_merge_cost = +bool get_sweep_read_cost(const PARAM *param, ha_rows records, double* cost, + double index_reads_cost, double max_cost) +{ + if (param->table->file->primary_key_is_clustered()) + { + *cost= param->table->file->read_time(param->table->primary_key, + records, records); + } + else + { + double n_blocks= + ceil((double)(longlong)param->table->file->data_file_length / IO_SIZE); + double busy_blocks= + n_blocks * (1.0 - pow(1.0 - 1.0/n_blocks, rows2double(records))); + if (busy_blocks < 1.0) + busy_blocks= 1.0; + DBUG_PRINT("info",("sweep: nblocks= %g, busy_blocks=%g index_blocks=%g", + n_blocks, busy_blocks, index_reads_cost)); + /* + Disabled: Bail out if # of blocks to read is bigger than # of blocks in + table data file. + if (max_cost != DBL_MAX && (busy_blocks+index_reads_cost) >= n_blocks) + return 1; + */ + JOIN *join= param->thd->lex->select_lex.join; + if (!join || join->tables == 1) + { + /* No join, assume reading is done in one 'sweep' */ + *cost= busy_blocks*(DISK_SEEK_BASE_COST + + DISK_SEEK_PROP_COST*n_blocks/busy_blocks); + } + else + { + /* + Possibly this is a join with source table being non-last table, so + assume that disk seeks are random here. + */ + *cost = busy_blocks; + } + } + DBUG_PRINT("info",("returning cost=%g", *cost)); + return 0; +} + + +/* + Get best plan for a SEL_IMERGE disjunctive expression. + SYNOPSIS + get_best_disjunct_quick() + param + imerge + read_time Don't create scans with cost > read_time + RETURN + read plan + NULL - OOM or no read scan could be built. + + NOTES + + index_merge cost is calculated as follows: + index_merge_cost = cost(index_reads) + (see #1) cost(rowid_to_row_scan) + (see #2) cost(unique_use) (see #3) @@ -1231,158 +1706,247 @@ end: (DISK_SEEK_BASE_COST + DISK_SEEK_PROP_COST*n_blocks/E(n_busy_blocks)). 3. Cost of Unique use is calculated in Unique::get_use_cost function. -*/ - -static int get_index_merge_params(PARAM *param, key_map& needed_reg, - SEL_IMERGE *imerge, double *read_time, - ha_rows* imerge_rows) -{ - double imerge_cost= 0.0; /* cost of this index_merge */ - bool imerge_too_expensive= false; - double tree_read_time; - ha_rows tree_records; - bool pk_is_clustered= param->table->file->primary_key_is_clustered(); - bool have_cpk_scan= false; - ha_rows records_for_unique= 0; - ha_rows cpk_records= 0; - DBUG_ENTER("get_index_merge_params"); - - /* allocate structs to save construction info */ - imerge->best_keys= - (SEL_ARG***)alloc_root(param->mem_root, - (imerge->trees_next - imerge->trees)* - sizeof(void*)); - /* - PHASE 1: get the best keys to use for this index_merge - */ + ROR-union cost is calculated in the same way index_merge, but instead of + Unique a priority queue is used. + +*/ +static +TABLE_READ_PLAN *get_best_disjunct_quick(PARAM *param, SEL_IMERGE *imerge, + double read_time) +{ + SEL_TREE **ptree; + TRP_INDEX_MERGE *imerge_trp= NULL; + uint n_child_scans= imerge->trees_next - imerge->trees; + TRP_RANGE **range_scans; + TRP_RANGE **cur_child; + TRP_RANGE **cpk_scan= NULL; + bool imerge_too_expensive= false; + double imerge_cost= 0.0; + ha_rows cpk_scan_records= 0; + ha_rows non_cpk_scan_records= 0; + bool pk_is_clustered= param->table->file->primary_key_is_clustered(); + bool all_scans_ror_able= true; + bool all_scans_rors= true; + uint unique_calc_buff_size; + TABLE_READ_PLAN **roru_read_plans; + TABLE_READ_PLAN **cur_roru_plan; + double roru_index_costs; + double blocks_in_index_read; + ha_rows roru_total_records; + double roru_intersect_part= 1.0; + double sweep_cost; + DBUG_ENTER("get_best_disjunct_quick"); + DBUG_PRINT("info", ("Full table scan cost =%g", read_time)); + + if (!(range_scans= (TRP_RANGE**)alloc_root(param->mem_root, + sizeof(TRP_RANGE*)* + n_child_scans))) + DBUG_RETURN(NULL); /* - It may be possible to use different keys for index_merge scans, e.g. for - query like - ...WHERE (key1 < c2 AND key2 < c2) OR (key3 < c3 AND key4 < c4) - we have to make choice between key1 and key2 for one scan and between - key3, key4 for another. - We assume we'll get the best if we choose the best key read inside each - of the conjuncts. + Collect best 'range' scan for each of disjuncts, and, while doing so, + analyze possibility of ROR scans. Also calculate some values needed by + other parts of the code. */ - for (SEL_TREE **ptree= imerge->trees; + for (ptree= imerge->trees, cur_child= range_scans; ptree != imerge->trees_next; - ptree++) + ptree++, cur_child++) { - SEL_ARG **tree_best_key; - tree_read_time= *read_time; - - if (get_quick_select_params(*ptree, param, needed_reg, true, - &tree_read_time, &tree_records, - &tree_best_key)) + DBUG_EXECUTE("info", print_sel_tree(param, *ptree, &(*ptree)->keys_map, + "tree in SEL_IMERGE");); + if (!(*cur_child= get_key_scans_params(param, *ptree, true, read_time))) { /* One of index scans in this index_merge is more expensive than entire - table read for another available option. The entire index_merge will - be more expensive then, too. We continue here only to update - SQL_SELECT members. + table read for another available option. The entire index_merge (and + any possible ROR-union) will be more expensive then, too. We continue + here only to update SQL_SELECT members. */ imerge_too_expensive= true; } - if (imerge_too_expensive) continue; - uint keynr= param->real_keynr[(tree_best_key-(*ptree)->keys)]; - imerge->best_keys[ptree - imerge->trees]= tree_best_key; - imerge_cost += tree_read_time; - - if (pk_is_clustered && keynr == param->table->primary_key) + imerge_cost += (*cur_child)->read_cost; + all_scans_ror_able &= ((*ptree)->n_ror_scans > 0); + all_scans_rors &= (*cur_child)->is_ror; + if (pk_is_clustered && + param->real_keynr[(*cur_child)->key_idx] == param->table->primary_key) { - have_cpk_scan= true; - cpk_records= tree_records; + cpk_scan= cur_child; + cpk_scan_records= (*cur_child)->records; } else - records_for_unique += tree_records; - } - DBUG_PRINT("info",("index_merge cost of index reads: %g", imerge_cost)); - - if (imerge_too_expensive) - DBUG_RETURN(1); - - if ((imerge_cost > *read_time) || - ((records_for_unique + cpk_records) >= param->table->file->records) && - *read_time != DBL_MAX) - { - /* Bail out if it is obvious that index_merge would be more expensive */ - DBUG_RETURN(1); + non_cpk_scan_records += (*cur_child)->records; } - - if (have_cpk_scan) + + DBUG_PRINT("info", ("index_merge scans cost=%g", imerge_cost)); + if (imerge_too_expensive || (imerge_cost > read_time) || + (non_cpk_scan_records+cpk_scan_records >= param->table->file->records) && + read_time != DBL_MAX) { /* - Add one ROWID comparison for each row retrieved on non-CPK scan. - (it is done in QUICK_RANGE_SELECT::row_in_ranges) + Bail out if it is obvious that both index_merge and ROR-union will be + more expensive */ - imerge_cost += records_for_unique / TIME_FOR_COMPARE_ROWID; + DBUG_PRINT("info", ("Sum of index_merge scans is more expensive than " + "full table scan, bailing out")); + DBUG_RETURN(NULL); } - - /* PHASE 2: Calculate cost(rowid_to_row_scan) */ - if (pk_is_clustered) + if (all_scans_rors) { - imerge_cost += - param->table->file->read_time(param->table->primary_key, - records_for_unique, - records_for_unique) - + rows2double(records_for_unique) / TIME_FOR_COMPARE; - } - else - { - double n_blocks= - ceil((double)(longlong)param->table->file->data_file_length / IO_SIZE); - double busy_blocks= - n_blocks * (1.0 - pow(1.0 - 1.0/n_blocks, (double) records_for_unique)); - - JOIN *join= param->thd->lex->select_lex.join; - if (!join || join->tables == 1) - imerge_cost += busy_blocks*(DISK_SEEK_BASE_COST + - DISK_SEEK_PROP_COST*n_blocks/busy_blocks); - else - { - /* - It can be a join with source table being non-last table, so assume - that disk seeks are random here. - (TODO it is possible to determine if this is a last table in 'index - checked for each record' join) - */ - imerge_cost += busy_blocks; - } + roru_read_plans= (TABLE_READ_PLAN**)range_scans; + goto skip_to_ror_scan; } - DBUG_PRINT("info",("index_merge cost with rowid-to-row scan: %g", imerge_cost)); - - /* PHASE 3: Add Unique operations cost */ - register uint unique_calc_buff_size= - Unique::get_cost_calc_buff_size(records_for_unique, + blocks_in_index_read= imerge_cost; + if (cpk_scan) + { + /* + Add one ROWID comparison for each row retrieved on non-CPK scan. (it + is done in QUICK_RANGE_SELECT::row_in_ranges) + */ + imerge_cost += non_cpk_scan_records / TIME_FOR_COMPARE_ROWID; + } + + /* Calculate cost(rowid_to_row_scan) */ + if (get_sweep_read_cost(param, non_cpk_scan_records, &sweep_cost, + blocks_in_index_read, read_time)) + goto build_ror_index_merge; + imerge_cost += sweep_cost; + DBUG_PRINT("info",("index_merge cost with rowid-to-row scan: %g", + imerge_cost)); + + /* Add Unique operations cost */ + unique_calc_buff_size= + Unique::get_cost_calc_buff_size(non_cpk_scan_records, param->table->file->ref_length, param->thd->variables.sortbuff_size); if (param->imerge_cost_buff_size < unique_calc_buff_size) { if (!(param->imerge_cost_buff= (uint*)alloc_root(param->mem_root, unique_calc_buff_size))) - DBUG_RETURN(1); + DBUG_RETURN(NULL); param->imerge_cost_buff_size= unique_calc_buff_size; } imerge_cost += - Unique::get_use_cost(param->imerge_cost_buff, records_for_unique, + Unique::get_use_cost(param->imerge_cost_buff, non_cpk_scan_records, param->table->file->ref_length, param->thd->variables.sortbuff_size); + DBUG_PRINT("info",("index_merge total cost: %g (wanted: less then %g)", + imerge_cost, read_time)); + if (imerge_cost < read_time) + { + if ((imerge_trp= new (param->mem_root)TRP_INDEX_MERGE)) + { + imerge_trp->read_cost= imerge_cost; + imerge_trp->records= non_cpk_scan_records + cpk_scan_records; + imerge_trp->records= min(imerge_trp->records, + param->table->file->records); + imerge_trp->range_scans= range_scans; + imerge_trp->range_scans_end= range_scans + n_child_scans; + read_time= imerge_cost; + } + } - DBUG_PRINT("info",("index_merge total cost: %g", imerge_cost)); - if (imerge_cost < *read_time) +build_ror_index_merge: + if (!all_scans_ror_able || param->thd->lex->sql_command == SQLCOM_DELETE) + DBUG_RETURN(imerge_trp); + + /* Ok, it is possible to build a ROR-union, try it. */ + bool dummy; + if (!(roru_read_plans= + (TABLE_READ_PLAN**)alloc_root(param->mem_root, + sizeof(TABLE_READ_PLAN*)* + n_child_scans))) + DBUG_RETURN(imerge_trp); +skip_to_ror_scan: + roru_index_costs= 0.0; + roru_total_records= 0; + cur_roru_plan= roru_read_plans; + + /* Find 'best' ROR scan for each of trees in disjunction */ + for (ptree= imerge->trees, cur_child= range_scans; + ptree != imerge->trees_next; + ptree++, cur_child++, cur_roru_plan++) { - *read_time= imerge_cost; - records_for_unique += cpk_records; - *imerge_rows= min(records_for_unique, param->table->file->records); - DBUG_RETURN(0); + /* + Assume the best ROR scan is the one that has cheapest full-row-retrieval + scan cost. + Also accumulate index_only scan costs as we'll need them to calculate + overall index_intersection cost. + */ + double cost; + if ((*cur_child)->is_ror) + { + /* Ok, we have index_only cost, now get full rows scan cost */ + cost= param->table->file-> + read_time(param->real_keynr[(*cur_child)->key_idx], 1, + (*cur_child)->records) + + rows2double((*cur_child)->records) / TIME_FOR_COMPARE; + } + else + cost= read_time; + + TABLE_READ_PLAN *prev_plan= *cur_child; + if (!(*cur_roru_plan= get_best_ror_intersect(param, *ptree, false, cost, + &dummy))) + { + if (prev_plan->is_ror) + *cur_roru_plan= prev_plan; + else + DBUG_RETURN(imerge_trp); + roru_index_costs += (*cur_roru_plan)->read_cost; + } + else + roru_index_costs += + ((TRP_ROR_INTERSECT*)(*cur_roru_plan))->index_scan_costs; + roru_total_records += (*cur_roru_plan)->records; + roru_intersect_part *= (*cur_roru_plan)->records / + param->table->file->records; } - DBUG_RETURN(1); + + /* + rows to retrieve= + SUM(rows_in_scan_i) - table_rows * PROD(rows_in_scan_i / table_rows). + This is valid because index_merge constructuion guarantees that conditions + in disjunction do not share key parts. + */ + roru_total_records -= (ha_rows)(roru_intersect_part* + param->table->file->records); + /* ok, got a ROR read plan for each of the disjuncts + Calculate cost: + cost(index_union_scan(scan_1, ... scan_n)) = + SUM_i(cost_of_index_only_scan(scan_i)) + + queue_use_cost(rowid_len, n) + + cost_of_row_retrieval + See get_merge_buffers_cost function for queue_use_cost formula derivation. + */ + + if (get_sweep_read_cost(param, roru_total_records, &sweep_cost, + roru_index_costs, read_time)) + DBUG_RETURN(NULL); + double roru_total_cost; + roru_total_cost= roru_index_costs + + rows2double(roru_total_records)*log(n_child_scans) / + (TIME_FOR_COMPARE_ROWID * M_LN2) + + sweep_cost; + DBUG_PRINT("info", ("ROR-union: cost %g, %d members", roru_total_cost, + n_child_scans)); + TRP_ROR_UNION* roru; + if (roru_total_cost < read_time) + { + if ((roru= new (param->mem_root) TRP_ROR_UNION)) + { + roru->first_ror= roru_read_plans; + roru->last_ror= roru_read_plans + n_child_scans; + roru->read_cost= roru_total_cost; + roru->records= roru_total_records; + DBUG_RETURN(roru); + } + } + DBUG_RETURN(imerge_trp); } @@ -1401,7 +1965,7 @@ static int get_index_merge_params(PARAM *param, key_map& needed_reg, key blocks are half full (normally things are much better). */ -inline double get_index_only_read_time(PARAM* param, ha_rows records, +inline double get_index_only_read_time(const PARAM* param, ha_rows records, int keynr) { double read_time; @@ -1413,39 +1977,693 @@ inline double get_index_only_read_time(PARAM* param, ha_rows records, return read_time; } +typedef struct st_ror_scan_info +{ + uint idx; /* # of used key in param->keys */ + uint keynr; /* # of used key in table */ + ha_rows records; + SEL_ARG *sel_arg; + /* Fields used in the query and covered by this ROR scan */ + MY_BITMAP covered_fields; + uint used_fields_covered; + int key_rec_length; /* length of key record with rowid */ + + /* + Array of + #rows(keypart_1=c1 AND ... AND key_part_i=c_i) / + #rows(keypart_1=c1 AND ... AND key_part_{i+1}=c_{i+1}) values + */ + double *key_part_rows; + double index_read_cost; + uint first_uncovered_field; + uint key_components; +}ROR_SCAN_INFO; + + +/* + Create ROR_SCAN_INFO* structure for condition sel_arg on key idx. + SYNOPSIS + make_ror_scan() + param + idx index of key in param->keys + RETURN + NULL - OOM. +*/ + +static +ROR_SCAN_INFO *make_ror_scan(const PARAM *param, int idx, SEL_ARG *sel_arg) +{ + ROR_SCAN_INFO *ror_scan; + uchar *bitmap_buf; + uint keynr; + DBUG_ENTER("make_ror_scan"); + if (!(ror_scan= (ROR_SCAN_INFO*)alloc_root(param->mem_root, + sizeof(ROR_SCAN_INFO)))) + DBUG_RETURN(NULL); + + ror_scan->idx= idx; + ror_scan->keynr= keynr= param->real_keynr[idx]; + ror_scan->key_rec_length= param->table->key_info[keynr].key_length + + param->table->file->ref_length; + ror_scan->sel_arg= sel_arg; + ror_scan->records= param->table->quick_rows[keynr]; + + if (!(bitmap_buf= (uchar*)alloc_root(param->mem_root, + param->fields_bitmap_size))) + DBUG_RETURN(NULL); + + if (bitmap_init(&ror_scan->covered_fields, bitmap_buf, + param->fields_bitmap_size*8, false)) + DBUG_RETURN(NULL); + bitmap_clear_all(&ror_scan->covered_fields); + if (!(ror_scan->key_part_rows= + (double*)alloc_root(param->mem_root, sizeof(double)* + param->table->key_info[keynr].key_parts))) + DBUG_RETURN(NULL); + + KEY_PART_INFO *key_part= param->table->key_info[keynr].key_part; + KEY_PART_INFO *key_part_end= key_part + + param->table->key_info[keynr].key_parts; + uint n_used_covered= 0; + for (;key_part != key_part_end; ++key_part) + { + if (bitmap_is_set(¶m->needed_fields, key_part->fieldnr)) + { + n_used_covered++; + bitmap_set_bit(&ror_scan->covered_fields, key_part->fieldnr); + } + } + ror_scan->index_read_cost= + get_index_only_read_time(param, param->table->quick_rows[ror_scan->keynr], + ror_scan->keynr); + /* + Calculate # rows estimates for + (key_part1=c1) + (key_part1=c1 AND key_part2=c2) + ...and so on + */ + char key_val[MAX_KEY_LENGTH+MAX_FIELD_WIDTH]; /* values of current key */ + char *key_ptr= key_val; + ha_rows records; + ha_rows prev_records= param->table->file->records; + double *rows_diff= ror_scan->key_part_rows; + key_part= param->table->key_info[keynr].key_part; + SEL_ARG *arg= ror_scan->sel_arg; + /* + We have #rows estimate already for first key part, so do first loop + iteration separately: + */ + arg->store_min(key_part->length, &key_ptr, 0); + prev_records= param->table->quick_rows[ror_scan->keynr]; + *(rows_diff++)= rows2double(prev_records) / param->table->file->records; + arg=arg->next_key_part; + for(; arg; ++key_part, arg= arg->next_key_part) + { + arg->store_min(key_part->length, &key_ptr, 0); + records= + param->table->file->records_in_range(ror_scan->keynr, + (byte*)key_val, key_ptr - key_val, + HA_READ_KEY_EXACT, + (byte*)key_val, key_ptr - key_val, + HA_READ_AFTER_KEY); + if (records == HA_POS_ERROR) + return NULL; /* shouldn't happen actually */ + *(rows_diff++)= rows2double(records) / rows2double(prev_records); + prev_records= records; + } + DBUG_RETURN(ror_scan); +} + + +/* + Order ROR_SCAN_INFO** by + E(#records_matched) * key_record_length +*/ +int cmp_ror_scan_info(ROR_SCAN_INFO** a, ROR_SCAN_INFO** b) +{ + double val1= rows2double((*a)->records) * (*a)->key_rec_length; + double val2= rows2double((*b)->records) * (*b)->key_rec_length; + return (val1 < val2)? -1: (val1 == val2)? 0 : 1; +} + +/* + Order ROR_SCAN_INFO** by + (#covered fields in F desc, + #components asc, + number of first not covered component asc) +*/ +int cmp_ror_scan_info_covering(ROR_SCAN_INFO** a, ROR_SCAN_INFO** b) +{ + if ((*a)->used_fields_covered > (*b)->used_fields_covered) + return -1; + if ((*a)->used_fields_covered < (*b)->used_fields_covered) + return 1; + if ((*a)->key_components < (*b)->key_components) + return -1; + if ((*a)->key_components > (*b)->key_components) + return 1; + if ((*a)->first_uncovered_field < (*b)->first_uncovered_field) + return -1; + if ((*a)->first_uncovered_field > (*b)->first_uncovered_field) + return 1; + return 0; +} + +/* Auxiliary structure for incremental ROR-intersection creation */ +typedef struct +{ + const PARAM *param; + MY_BITMAP covered_fields; /* union of fields covered by all scans */ + + /* true if covered_fields is a superset of needed_fields */ + bool is_covering; + + double index_scan_costs; /* SUM(cost of 'index-only' scans) */ + double total_cost; + /* + Fraction of table records that satisfies conditions of all scans. + This is the number of full records that will be retrieved if a + non-index_only index intersection will be employed. + */ + double records_fract; + ha_rows index_records; /* sum(#records to look in indexes) */ +}ROR_INTERSECT_INFO; + + +/* Allocate a ROR_INTERSECT and initialize it to contain zero scans */ +ROR_INTERSECT_INFO* ror_intersect_init(const PARAM *param, bool is_index_only) +{ + ROR_INTERSECT_INFO *info; + uchar* buf; + if (!(info= (ROR_INTERSECT_INFO*)alloc_root(param->mem_root, + sizeof(ROR_INTERSECT_INFO)))) + return NULL; + info->param= param; + info->is_covering= is_index_only; + info->index_scan_costs= 0.0f; + info->records_fract= 1.0f; + + if (!(buf= (uchar*)alloc_root(param->mem_root, param->fields_bitmap_size))) + return NULL; + if (bitmap_init(&info->covered_fields, buf, param->fields_bitmap_size*8, + false)) + return NULL; + bitmap_clear_all(&info->covered_fields); + return info; +} + +/* + Check if it makes sense to add a ROR scan to ROR-intersection, and if yes + update parameters of ROR-intersection, including its cost. + + RETURN + true ROR scan added to ROR-intersection, cost updated. + false It doesn't make sense to add this ROR scan to this ROR-intersection. + + NOTE + Adding a ROR scan to ROR-intersect "makes sense" iff selectivt + + Cost of ROR-intersection is calulated as follows: + cost= SUM_i(key_scan_cost_i) + cost_of_full_rows_retrieval + + if (union of indexes used covers all needed fields) + cost_of_full_rows_retrieval= 0; + else + { + cost_of_full_rows_retrieval= + cost_of_sweep_read(E(rows_to_retrive), rows_in_table); + } + + E(rows_to_retrive) is calulated as follows: + Suppose we have a condition on several keys + cond=k_11=c_11 AND k_12=c_12 AND ... // parts of first key + k_21=c_21 AND k_22=c_22 AND ... // parts of second key + ... + k_n1=c_n1 AND k_n3=c_n3 AND ... (1) + + where k_ij may be the same as any k_pq (i.e. keys may have common parts). + + A full row is retrieved iff entire cond holds. + + The recursive procedure for finding P(cond) is as follows: + + First step: + Pick 1st part of 1st key and break conjunction (1) into two parts: + cond= (k_11=c_11 AND R) + + Here R may still contain condition(s) equivalent to k_11=c_11. + Nevertheless, the following holds: + + P(k_11=c_11 AND R) = P(k_11=c_11) * P(R|k_11=c_11). + + Mark k_11 as fixed field (and satisfied condition) F, save P(F), + save R to be cond and proceed to recursion step. + + Recursion step: + We have set of fixed fields/satisfied conditions) F, probability P(F), + and remaining conjunction R + Pick next key part on current key and its condition "k_ij=c_ij". + We will add "k_ij=c_ij" into F and update P(F). + Lets denote k_ij as t, R = t AND R1, where i1 may still contain t. Then + + P((t AND R1)|F) = P(t|F) * P(R1|t|F) = P(t|F) * P(R|(t AND F)) (2) + + (where '|' mean conditional probability, not "or") + + Consider the first multiplier in (2). One of the following holds: + a) F contains condition on field used in t (i.e. t AND F = F). + Then P(t|F) = 1 + + b) F doesn't contain condition on field used in t. Then F and t are + considered independent. + + P(t|F) = P(t|(fields_before_t_in_key AND other_fields)) = + = P(t|fields_before_t_in_key). + + P(t|fields_before_t_in_key)= #distinct_values(fields_before_t_in_key) / + #distinct_values(fields_before_t_in_key, t) + + The second multiplier is calculated by applying this step recusively. + + This function applies recursion steps for all fixed key members of + one key, accumulating sets of covered fields and + The very first step described is done as recursion step with + P(fixed fields)=1 and empty set of fixed fields. +*/ + +bool ror_intersect_add(ROR_INTERSECT_INFO *info, ROR_SCAN_INFO* ror_scan) +{ + int i; + SEL_ARG *sel_arg; + KEY_PART_INFO *key_part= + info->param->table->key_info[ror_scan->keynr].key_part; + double selectivity_mult= 1.0; + DBUG_ENTER("ror_intersect_add"); + DBUG_PRINT("info", ("Current selectivity= %g", info->records_fract)); + DBUG_PRINT("info", ("Adding scan on %s", + info->param->table->key_info[ror_scan->keynr].name)); + for(i= 0, sel_arg= ror_scan->sel_arg; sel_arg; + i++, sel_arg= sel_arg->next_key_part) + { + if (!bitmap_is_set(&info->covered_fields, (key_part + i)->fieldnr)) + { + /* + P(t|F) = P(t|(fields_before_t_in_key AND other_fields)) = + = P(t|fields_before_t_in_key). + */ + selectivity_mult *= ror_scan->key_part_rows[i]; + } + } + if (selectivity_mult == 1.0) + { + /* Don't add this scan if it doesn't improve selectivity. */ + DBUG_PRINT("info", ("This scan doesn't improve selectivity.")); + DBUG_RETURN(false); + } + info->records_fract *= selectivity_mult; + + bitmap_union(&info->covered_fields, &ror_scan->covered_fields); + + ha_rows scan_records= info->param->table->quick_rows[ror_scan->keynr]; + info->index_scan_costs += ror_scan->index_read_cost; + + if (!info->is_covering && bitmap_is_subset(&info->param->needed_fields, + &info->covered_fields)) + { + DBUG_PRINT("info", ("ROR-intersect is covering now")); + /* ROR-intersect became covering */ + info->is_covering= true; + } + + info->index_records += scan_records; + info->total_cost= info->index_scan_costs; + if (!info->is_covering) + { + ha_rows table_recs= info->param->table->file->records; + double sweep_cost; + get_sweep_read_cost(info->param, + (ha_rows)(table_recs*info->records_fract), + &sweep_cost, info->index_scan_costs, DBL_MAX); + + info->total_cost += sweep_cost; + } + DBUG_PRINT("info", ("New selectivity= %g", info->records_fract)); + DBUG_PRINT("info", ("New cost= %g, %scovering", info->total_cost, + info->is_covering?"" : "non-")); + DBUG_RETURN(true); +} + +/* + Get best ROR-intersection plan using non-covering ROR-intersection search + algorithm. The returned plan may be covering. + + SYNOPSIS + get_best_ror_intersect() + param + tree + force_index_only If true, don't calculate costs of full rows retrieval. + read_time Do not return read plans with cost > read_time. + are_all_covering [out] set to true if union of all scans covers all + fields needed by the query (and it is possible to build + a covering ROR-intersection) + RETURN + ROR-intersection table read plan + NULL if OOM or no plan found. + + NOTE + get_key_scans_params must be called before for the same SEL_TREE before + this function can be called. + + The approximate best non-covering plan search algorithm is as follows: + + find_min_ror_intersection_scan() + { + R= select all ROR scans; + order R by (E(#records_matched) * key_record_length). + + S= first(R); -- set of scans that will be used for ROR-intersection + R= R-first(S); + min_cost= cost(S); + min_scan= make_scan(S); + while (R is not empty) + { + if (!selectivity(S + first(R) < selectivity(S))) + continue; + + S= S + first(R); + R= R - first(R); + if (cost(S) < min_cost) + { + min_cost= cost(S); + min_scan= make_scan(S); + } + } + return min_scan; + } + + See ror_intersect_add function for ROR intersection costs. + + Special handling for Clustered PK scans + Clustered PK contains all table fields, so using it as a regular scan in + index intersection doesn't make sense: a range scan on CPK will be less + expensive in this case. + Clustered PK scan has special handling in ROR-intersection: it is not used + to retrieve rows, instead its condition is used to filter row references + we get from scans on other keys. +*/ + +static +TRP_ROR_INTERSECT *get_best_ror_intersect(const PARAM *param, SEL_TREE *tree, + bool force_index_only, + double read_time, + bool *are_all_covering) +{ + uint idx; + double min_cost= read_time; + DBUG_ENTER("get_best_ror_intersect"); + + if (tree->n_ror_scans < 2) + DBUG_RETURN(NULL); + + /* Collect ROR-able SEL_ARGs and create ROR_SCAN_INFO for each of them */ + ROR_SCAN_INFO **cur_ror_scan; + if (!(tree->ror_scans= (ROR_SCAN_INFO**)alloc_root(param->mem_root, + sizeof(ROR_SCAN_INFO*)* + param->keys))) + return NULL; + + for (idx= 0, cur_ror_scan= tree->ror_scans; idx < param->keys; idx++) + { + if (!tree->ror_scans_map.is_set(idx)) + continue; + if (!(*cur_ror_scan= make_ror_scan(param, idx, tree->keys[idx]))) + return NULL; + cur_ror_scan++; + } + + tree->ror_scans_end= cur_ror_scan; + DBUG_EXECUTE("info",print_ror_scans_arr(param->table, "original", + tree->ror_scans, + tree->ror_scans_end);); + /* + Ok, [ror_scans, ror_scans_end) is array of ptrs to initialized + ROR_SCAN_INFOs. + Get a minimal key scan using an approximate algorithm. + */ + qsort(tree->ror_scans, tree->n_ror_scans, sizeof(ROR_SCAN_INFO*), + (qsort_cmp)cmp_ror_scan_info); + DBUG_EXECUTE("info",print_ror_scans_arr(param->table, "ordered", + tree->ror_scans, + tree->ror_scans_end);); + + ROR_SCAN_INFO **intersect_scans; /* ROR scans used in index intersection */ + ROR_SCAN_INFO **intersect_scans_end; + if (!(intersect_scans= (ROR_SCAN_INFO**)alloc_root(param->mem_root, + sizeof(ROR_SCAN_INFO*)* + tree->n_ror_scans))) + return NULL; + intersect_scans_end= intersect_scans; + + /* Create and incrementally update ROR intersection. */ + ROR_INTERSECT_INFO *intersect; + if (!(intersect= ror_intersect_init(param, false))) + return NULL; + + /* [intersect_scans, intersect_scans_best) will hold the best combination */ + ROR_SCAN_INFO **intersect_scans_best= NULL; + ha_rows best_rows; + bool is_best_covering; + double best_index_scan_costs; + LINT_INIT(best_rows); /* protected by intersect_scans_best */ + LINT_INIT(is_best_covering); + LINT_INIT(best_index_scan_costs); + + cur_ror_scan= tree->ror_scans; + while (cur_ror_scan != tree->ror_scans_end && !intersect->is_covering) + { + /* S= S + first(R); */ + if (ror_intersect_add(intersect, *cur_ror_scan)) + *(intersect_scans_end++)= *cur_ror_scan; + /* R= R-first(R); */ + cur_ror_scan++; + + if (intersect->total_cost < min_cost) + { + /* Local minimum found, save it */ + min_cost= intersect->total_cost; + best_rows= (ha_rows)(intersect->records_fract* + rows2double(param->table->file->records)); + is_best_covering= intersect->is_covering; + intersect_scans_best= intersect_scans_end; + best_index_scan_costs= intersect->index_scan_costs; + } + } + + /* Ok, return ROR-intersect plan if we have found one */ + *are_all_covering= intersect->is_covering; + uint best_num= intersect_scans_best - intersect_scans; + TRP_ROR_INTERSECT *trp= NULL; + if (intersect_scans_best && best_num > 1) + { + DBUG_EXECUTE("info",print_ror_scans_arr(param->table, + "used for ROR-intersect", + intersect_scans, + intersect_scans_best);); + if (!(trp= new (param->mem_root) TRP_ROR_INTERSECT)) + DBUG_RETURN(trp); + if (!(trp->first_scan= + (ROR_SCAN_INFO**)alloc_root(param->mem_root, + sizeof(ROR_SCAN_INFO*)*best_num))) + DBUG_RETURN(NULL); + memcpy(trp->first_scan, intersect_scans, best_num*sizeof(ROR_SCAN_INFO*)); + trp->last_scan= trp->first_scan + best_num; + trp->is_covering= is_best_covering; + trp->read_cost= min_cost; + trp->records= best_rows? best_rows : 1; + trp->index_scan_costs= best_index_scan_costs; + } + DBUG_RETURN(trp); +} + /* - Calculate quick range select read time, # of records, and best key to use - without constructing QUICK_RANGE_SELECT object. + Get best covering ROR-intersection. + SYNOPSIS + get_best_covering_ror_intersect() + param + tree SEL_TREE + read_time Dont return table read plans with cost > read_time. + + RETURN + Best covering ROR-intersection plan + NULL if no plan found. + + NOTE + get_best_ror_intersect must be called for a tree before calling this + function for it. + This function invalidates tree->ror_scans member values. + + The following approximate algorithm is used: + I=set of all covering indexes + F=set of all fields to cover + S={} + + do { + Order I by (#covered fields in F desc, + #components asc, + number of first not covered component asc); + F=F-covered by first(I); + S=S+first(I); + I=I-first(I); + } while F is not empty. +*/ + +static +TRP_ROR_INTERSECT *get_best_covering_ror_intersect(PARAM *param, + SEL_TREE *tree, + double read_time) +{ + ROR_SCAN_INFO **ror_scan_mark; + ROR_SCAN_INFO **ror_scans_end= tree->ror_scans_end; + DBUG_ENTER("get_best_covering_ror_intersect"); + uint nbits= param->fields_bitmap_size*8; + + for (ROR_SCAN_INFO **scan= tree->ror_scans; scan != ror_scans_end; ++scan) + (*scan)->key_components= + param->table->key_info[(*scan)->keynr].key_parts; + + /* + Run covering-ROR-search algorithm. + Assume set I is [ror_scan .. ror_scans_end) + */ + + /*I=set of all covering indexes */ + ror_scan_mark= tree->ror_scans; + + uchar buf[MAX_KEY/8+1]; + MY_BITMAP covered_fields; + if (bitmap_init(&covered_fields, buf, nbits, false)) + DBUG_RETURN(0); + bitmap_clear_all(&covered_fields); + + double total_cost= 0.0f; + ha_rows records=0; + bool all_covered; + + /* Start will all scans and remove one by one until */ + DBUG_PRINT("info", ("Building covering ROR-intersection")); + DBUG_EXECUTE("info", print_ror_scans_arr(param->table, + "building covering ROR-I", + ror_scan_mark, ror_scans_end);); + do { + /* + Update changed sorting info: + #covered fields, + number of first not covered component + Calculate and save these values for each of remaining scans. + */ + for (ROR_SCAN_INFO **scan= ror_scan_mark; scan != ror_scans_end; ++scan) + { + bitmap_subtract(&(*scan)->covered_fields, &covered_fields); + (*scan)->used_fields_covered= + bitmap_bits_set(&(*scan)->covered_fields); + (*scan)->first_uncovered_field= + bitmap_get_first(&(*scan)->covered_fields); + } + + qsort(ror_scan_mark, ror_scans_end-ror_scan_mark, sizeof(ROR_SCAN_INFO*), + (qsort_cmp)cmp_ror_scan_info_covering); + + DBUG_EXECUTE("info", print_ror_scans_arr(param->table, + "remaining scans", + ror_scan_mark, ror_scans_end);); + + /* I=I-first(I) */ + total_cost += (*ror_scan_mark)->index_read_cost; + records += (*ror_scan_mark)->records; + DBUG_PRINT("info", ("Adding scan on %s", + param->table->key_info[(*ror_scan_mark)->keynr].name)); + if (total_cost > read_time) + DBUG_RETURN(NULL); + /* F=F-covered by first(I) */ + bitmap_union(&covered_fields, &(*ror_scan_mark)->covered_fields); + all_covered= bitmap_is_subset(¶m->needed_fields, &covered_fields); + } while (!all_covered && (++ror_scan_mark < ror_scans_end)); + + if (!all_covered) + DBUG_RETURN(NULL); /* should not happen actually */ + + /* + Ok, [tree->ror_scans .. ror_scan) holds covering index_intersection with + cost total_cost. + */ + DBUG_PRINT("info", ("Covering ROR-intersect scans cost: %g", total_cost)); + DBUG_EXECUTE("info", print_ror_scans_arr(param->table, + "creating covering ROR-intersect", + tree->ror_scans, ror_scan_mark);); + + /* Add priority queue use cost. */ + total_cost += rows2double(records)*log(ror_scan_mark - tree->ror_scans) / + (TIME_FOR_COMPARE_ROWID * M_LN2); + DBUG_PRINT("info", ("Covering ROR-intersect full cost: %g", total_cost)); + + if (total_cost > read_time) + DBUG_RETURN(NULL); + + TRP_ROR_INTERSECT *trp; + if (!(trp= new (param->mem_root) TRP_ROR_INTERSECT)) + DBUG_RETURN(trp); + uint best_num= (ror_scan_mark - tree->ror_scans); + if (!(trp->first_scan= (ROR_SCAN_INFO**)alloc_root(param->mem_root, + sizeof(ROR_SCAN_INFO*)* + best_num))) + DBUG_RETURN(NULL); + memcpy(trp->first_scan, ror_scan_mark, best_num*sizeof(ROR_SCAN_INFO*)); + trp->last_scan= trp->first_scan + best_num; + trp->is_covering= true; + trp->read_cost= total_cost; + trp->records= records; + + DBUG_RETURN(trp); +} + + +/* + Get best "range" table read plan for given SEL_TREE. + Also update PARAM members and store ROR scans info in the SEL_TREE. SYNOPSIS get_quick_select_params - tree in make range select for this SEL_TREE - param in parameters from test_quick_select - needed_reg in/out other table data needed by this quick_select - index_read_must_be_used if true, assume 'index only' option will be set + param parameters from test_quick_select + tree make range select for this SEL_TREE + index_read_must_be_used if true, assume 'index only' option will be set (except for clustered PK indexes) - read_time out read time estimate - records out # of records estimate - key_to_read out SEL_ARG to be used for creating quick select + read_time don't create read plans with cost > read_time. + RETURN + Best range read plan + NULL if no plan found or error occurred */ -static int get_quick_select_params(SEL_TREE *tree, PARAM *param, - key_map& needed_reg, - bool index_read_must_be_used, - double *read_time, ha_rows *records, - SEL_ARG ***key_to_read) +static TRP_RANGE *get_key_scans_params(PARAM *param, SEL_TREE *tree, + bool index_read_must_be_used, + double read_time) { int idx; - int result = 1; + SEL_ARG **key,**end, **key_to_read= NULL; + ha_rows best_records; + TRP_RANGE* read_plan= NULL; bool pk_is_clustered= param->table->file->primary_key_is_clustered(); + DBUG_ENTER("get_key_scans_params"); + LINT_INIT(best_records); /* protected by key_to_read */ /* Note that there may be trees that have type SEL_TREE::KEY but contain no key reads at all, e.g. tree for expression "key1 is not null" where key1 is defined as "not null". - */ - SEL_ARG **key,**end; - - for (idx= 0,key=tree->keys, end=key+param->keys ; + */ + DBUG_EXECUTE("info", print_sel_tree(param, tree, &tree->keys_map, + "tree scans");); + tree->ror_scans_map.clear_all(); + tree->n_ror_scans= 0; + for (idx= 0,key=tree->keys, end=key+param->keys; key != end ; key++,idx++) { @@ -1456,43 +2674,157 @@ static int get_quick_select_params(SEL_TREE *tree, PARAM *param, uint keynr= param->real_keynr[idx]; if ((*key)->type == SEL_ARG::MAYBE_KEY || (*key)->maybe_flag) - needed_reg.set_bit(keynr); + param->needed_reg->set_bit(keynr); bool read_index_only= index_read_must_be_used? true : (bool)param->table->used_keys.is_set(keynr); - found_records=check_quick_select(param, idx, *key); - if (found_records != HA_POS_ERROR && found_records > 2 && - read_index_only && - (param->table->file->index_flags(keynr) & HA_KEY_READ_ONLY) && - !(pk_is_clustered && keynr == param->table->primary_key)) - { - /* We can resolve this by only reading through this key. */ - found_read_time=get_index_only_read_time(param, found_records, keynr); - } - else - { - /* - cost(read_through_index) = cost(disk_io) + cost(row_in_range_checks) - The row_in_range check is in QUICK_RANGE_SELECT::cmp_next function. - */ - found_read_time= (param->table->file->read_time(keynr, - param->range_count, - found_records)+ - (double) found_records / TIME_FOR_COMPARE); - } - if (*read_time > found_read_time && found_records != HA_POS_ERROR) + found_records= check_quick_select(param, idx, *key); + if (param->is_ror_scan) + { + tree->n_ror_scans++; + tree->ror_scans_map.set_bit(idx); + } + if (found_records != HA_POS_ERROR && found_records > 2 && + read_index_only && + (param->table->file->index_flags(keynr) & HA_KEY_READ_ONLY) && + !(pk_is_clustered && keynr == param->table->primary_key)) + { + /* We can resolve this by only reading through this key. */ + found_read_time= get_index_only_read_time(param,found_records,keynr); + } + else + { + /* + cost(read_through_index) = cost(disk_io) + cost(row_in_range_checks) + The row_in_range check is in QUICK_RANGE_SELECT::cmp_next function. + */ + found_read_time= (param->table->file->read_time(keynr, + param->range_count, + found_records)+ + (double) found_records / TIME_FOR_COMPARE); + } + if (read_time > found_read_time && found_records != HA_POS_ERROR + /*|| read_time == DBL_MAX*/ ) + { + read_time= found_read_time; + best_records= found_records; + key_to_read= key; + } + + } + } + + DBUG_EXECUTE("info", print_sel_tree(param, tree, &tree->ror_scans_map, + "ROR scans");); + if (key_to_read) + { + idx= key_to_read - tree->keys; + if ((read_plan= new (param->mem_root) TRP_RANGE(*key_to_read, idx))) + { + read_plan->records= best_records; + read_plan->is_ror= tree->ror_scans_map.is_set(idx); + read_plan->read_cost= read_time; + DBUG_PRINT("info",("Returning range plan for key %s, cost %g", + param->table->key_info[param->real_keynr[idx]].name, + read_plan->read_cost)); + } + } + else + DBUG_PRINT("info", ("No 'range' table read plan found")); + + DBUG_RETURN(read_plan); +} + + +QUICK_SELECT_I *TRP_INDEX_MERGE::make_quick(PARAM *param, + bool retrieve_full_rows, + MEM_ROOT *parent_alloc) +{ + QUICK_INDEX_MERGE_SELECT *quick_imerge; + QUICK_RANGE_SELECT *quick; + /* index_merge always retrieves full rows, ignore retrieve_full_rows */ + if (!(quick_imerge= new QUICK_INDEX_MERGE_SELECT(param->thd, param->table))) + return NULL; + + quick_imerge->records= records; + quick_imerge->read_time= read_cost; + for(TRP_RANGE **range_scan= range_scans; range_scan != range_scans_end; + range_scan++) + { + if (!(quick= (QUICK_RANGE_SELECT*) + ((*range_scan)->make_quick(param, false, &quick_imerge->alloc)))|| + quick_imerge->push_quick_back(quick)) + { + delete quick; + delete quick_imerge; + return NULL; + } + } + return quick_imerge; +} + +QUICK_SELECT_I *TRP_ROR_INTERSECT::make_quick(PARAM *param, + bool retrieve_full_rows, + MEM_ROOT *parent_alloc) +{ + QUICK_ROR_INTERSECT_SELECT *quick_intrsect; + QUICK_RANGE_SELECT *quick; + DBUG_ENTER("TRP_ROR_INTERSECT::make_quick"); + MEM_ROOT *alloc; + + if ((quick_intrsect= + new QUICK_ROR_INTERSECT_SELECT(param->thd, param->table, + retrieve_full_rows? (!is_covering):false, + parent_alloc))) + { + DBUG_EXECUTE("info", print_ror_scans_arr(param->table, + "creating ROR-intersect", + first_scan, last_scan);); + alloc= parent_alloc? parent_alloc: &quick_intrsect->alloc; + for(; first_scan != last_scan;++first_scan) + { + if (!(quick= get_quick_select(param, (*first_scan)->idx, + (*first_scan)->sel_arg, alloc)) || + quick_intrsect->push_quick_back(quick)) { - *read_time= found_read_time; - *records= found_records; - *key_to_read= key; - result = 0; + delete quick_intrsect; + DBUG_RETURN(NULL); } } + quick_intrsect->records= records; + quick_intrsect->read_time= read_cost; + } + DBUG_RETURN(quick_intrsect); +} + +QUICK_SELECT_I *TRP_ROR_UNION::make_quick(PARAM *param, + bool retrieve_full_rows, + MEM_ROOT *parent_alloc) +{ + QUICK_ROR_UNION_SELECT *quick_roru; + TABLE_READ_PLAN **scan; + QUICK_SELECT_I *quick; + DBUG_ENTER("TRP_ROR_UNION::make_quick"); + /* + It is currently impossible to construct a ROR-union that will + not retrieve full rows, ingore retrieve_full_rows. + */ + if ((quick_roru= new QUICK_ROR_UNION_SELECT(param->thd, param->table))) + { + for(scan= first_ror; scan != last_ror; scan++) + { + if (!(quick= (*scan)->make_quick(param, false, &quick_roru->alloc)) || + quick_roru->push_quick_back(quick)) + DBUG_RETURN(NULL); + } + quick_roru->records= records; + quick_roru->read_time= read_cost; } - return result; + DBUG_RETURN(quick_roru); } +/****************************************************************************/ /* make a select tree of all keys in condition */ static SEL_TREE *get_mm_tree(PARAM *param,COND *cond) @@ -2975,34 +4307,88 @@ void SEL_ARG::test_use_count(SEL_ARG *root) /***************************************************************************** ** Check how many records we will find by using the found tree +** NOTE +** param->table->quick_* and param->range_count (and maybe others) are +** updated with data of given key scan. *****************************************************************************/ static ha_rows check_quick_select(PARAM *param,uint idx,SEL_ARG *tree) { ha_rows records; + bool cpk_scan; + uint key; DBUG_ENTER("check_quick_select"); if (!tree) DBUG_RETURN(HA_POS_ERROR); // Can't use it param->max_key_part=0; param->range_count=0; + key= param->real_keynr[idx]; + if (tree->type == SEL_ARG::IMPOSSIBLE) DBUG_RETURN(0L); // Impossible select. return if (tree->type != SEL_ARG::KEY_RANGE || tree->part != 0) DBUG_RETURN(HA_POS_ERROR); // Don't use tree + + enum ha_key_alg key_alg= param->table->key_info[key].algorithm; + if ((key_alg != HA_KEY_ALG_BTREE) && (key_alg!= HA_KEY_ALG_UNDEF)) + { + /* Records are not ordered by rowid for other types of indexes. */ + param->is_ror_scan= false; + cpk_scan= false; + } + else + { + /* + Clustered PK scan is a special case, check_quick_keys doesn't recognize + CPK scans as ROR scans (while actually any CPK scan is a ROR scan). + */ + cpk_scan= (param->table->primary_key == param->real_keynr[idx]) && + param->table->file->primary_key_is_clustered(); + param->is_ror_scan= !cpk_scan; + } + records=check_quick_keys(param,idx,tree,param->min_key,0,param->max_key,0); if (records != HA_POS_ERROR) - { - uint key=param->real_keynr[idx]; + { param->table->quick_keys.set_bit(key); param->table->quick_rows[key]=records; param->table->quick_key_parts[key]=param->max_key_part+1; + + if (cpk_scan) + param->is_ror_scan= true; } DBUG_RETURN(records); } +/* + SYNOPSIS + check_quick_keys() + param + idx key to use, its number in list of used keys (that is, + param->real_keynr[idx] holds the key number in table) + + key_tree SEL_ARG tree which cost is calculated. + min_key buffer with min key value tuple + min_key_flag + max_key buffer with max key value tuple + max_key_flag + + NOTE + The function does the recursive descent on the tree via left, right, and + next_key_part edges. The #rows estimates are calculated at the leaf nodes. + + param->min_key and param->max_key are used to hold key segment values. + + The side effects are: + param->max_key_part is updated to hold the maximum number of key parts used + in scan minus 1. + param->range_count is updated. + param->is_ror_scan is updated. +*/ + static ha_rows check_quick_keys(PARAM *param,uint idx,SEL_ARG *key_tree, char *min_key,uint min_key_flag, char *max_key, @@ -3013,6 +4399,7 @@ check_quick_keys(PARAM *param,uint idx,SEL_ARG *key_tree, param->max_key_part=max(param->max_key_part,key_tree->part); if (key_tree->left != &null_element) { + param->is_ror_scan= false; records=check_quick_keys(param,idx,key_tree->left,min_key,min_key_flag, max_key,max_key_flag); if (records == HA_POS_ERROR) // Impossible @@ -3040,6 +4427,9 @@ check_quick_keys(PARAM *param,uint idx,SEL_ARG *key_tree, tmp_max_key, max_key_flag | key_tree->max_flag); goto end; // Ugly, but efficient } + else + param->is_ror_scan= false; + tmp_min_flag=key_tree->min_flag; tmp_max_flag=key_tree->max_flag; if (!tmp_min_flag) @@ -3068,6 +4458,15 @@ check_quick_keys(PARAM *param,uint idx,SEL_ARG *key_tree, tmp=1; // Max one record else { + if (param->is_ror_scan) + { + if (!(min_key_length == max_key_length && + !memcmp(min_key,max_key, (uint) (tmp_max_key - max_key)) && + !key_tree->min_flag && !key_tree->max_flag && + is_key_scan_ror(param, keynr, key_tree->part + 1))) + param->is_ror_scan= false; + } + if (tmp_min_flag & GEOM_FLAG) { tmp= param->table->file-> @@ -3098,6 +4497,7 @@ check_quick_keys(PARAM *param,uint idx,SEL_ARG *key_tree, records+=tmp; if (key_tree->right != &null_element) { + param->is_ror_scan= false; tmp=check_quick_keys(param,idx,key_tree->right,min_key,min_key_flag, max_key,max_key_flag); if (tmp == HA_POS_ERROR) @@ -3107,12 +4507,50 @@ check_quick_keys(PARAM *param,uint idx,SEL_ARG *key_tree, return records; } +/* + Check if key scan on key keynr with first nparts key parts fixed is a + ROR scan. This function doesn't handle clustered PK scans or HASH index + scans. +*/ +static bool is_key_scan_ror(PARAM *param, uint keynr, uint8 nparts) +{ + KEY *table_key= param->table->key_info + keynr; + KEY_PART_INFO *key_part= table_key->key_part + nparts; + KEY_PART_INFO *key_part_end= table_key->key_part + + table_key->key_parts; + + if (key_part == key_part_end) + return true; + uint pk_number= param->table->primary_key; + if (!param->table->file->primary_key_is_clustered() || pk_number == MAX_KEY) + return false; + + KEY_PART_INFO *pk_part= param->table->key_info[pk_number].key_part; + KEY_PART_INFO *pk_part_end= pk_part + + param->table->key_info[pk_number].key_parts; + for(;(key_part!=key_part_end) && (pk_part != pk_part_end); + ++key_part, ++pk_part) + { + if (key_part->field != pk_part->field) + return false; + } + return (key_part == key_part_end); +} -/**************************************************************************** -** change a tree to a structure to be used by quick_select -** This uses it's own malloc tree -** The caller should call QUICK_SELCT::init for returned quick select -****************************************************************************/ + +/* + Create a QUICK_RANGE_SELECT from given key and SEL_ARG tree for that key. + This uses it's own malloc tree. + SYNOPSIS + get_quick_select() + param + idx index of used key in param->key. + key_tree SEL_ARG tree for the used key + parent_alloc if not NULL, use it to allocate memory for + quick select data. Otherwise use quick->alloc. + + The caller should call QUICK_SELCT::init for returned quick select +*/ QUICK_RANGE_SELECT * get_quick_select(PARAM *param,uint idx,SEL_ARG *key_tree, MEM_ROOT *parent_alloc) @@ -3292,6 +4730,47 @@ static bool null_part_in_key(KEY_PART *key_part, const char *key, uint length) return 0; } +bool QUICK_SELECT_I::check_if_keys_used(List *fields) +{ + return check_if_key_used(head, index, *fields); +} + +bool QUICK_INDEX_MERGE_SELECT::check_if_keys_used(List *fields) +{ + QUICK_RANGE_SELECT *quick; + List_iterator_fast it(quick_selects); + while ((quick= it++)) + { + if (check_if_key_used(head, quick->index, *fields)) + return 1; + } + return 0; +} + +bool QUICK_ROR_INTERSECT_SELECT::check_if_keys_used(List *fields) +{ + QUICK_RANGE_SELECT *quick; + List_iterator_fast it(quick_selects); + while ((quick= it++)) + { + if (check_if_key_used(head, quick->index, *fields)) + return 1; + } + return 0; +} + +bool QUICK_ROR_UNION_SELECT::check_if_keys_used(List *fields) +{ + QUICK_SELECT_I *quick; + List_iterator_fast it(quick_selects); + while ((quick= it++)) + { + if (quick->check_if_keys_used(fields)) + return 1; + } + return 0; +} + /**************************************************************************** ** Create a QUICK RANGE based on a key ****************************************************************************/ @@ -3351,8 +4830,6 @@ err: } -#define MEM_STRIP_BUF_SIZE thd->variables.sortbuff_size - /* Fetch all row ids into unique. @@ -3386,9 +4863,9 @@ int QUICK_INDEX_MERGE_SELECT::prepare_unique() cur_quick_select->init(); - unique= new Unique(refposcmp2, (void *) &head->file->ref_length, + unique= new Unique(refpos_order_cmp, (void *)head->file, head->file->ref_length, - MEM_STRIP_BUF_SIZE); + thd->variables.sortbuff_size); if (!unique) DBUG_RETURN(1); do @@ -3421,8 +4898,7 @@ int QUICK_INDEX_MERGE_SELECT::prepare_unique() continue; cur_quick_select->file->position(cur_quick_select->record); - result= unique->unique_add((char*)cur_quick_select->file->ref); - + result= unique->unique_add((char*)cur_quick_select->file->ref); if (result) DBUG_RETURN(1); @@ -3475,6 +4951,143 @@ int QUICK_INDEX_MERGE_SELECT::get_next() DBUG_RETURN(result); } + +/* + NOTES + Invariant on enter/exit: all intersected selects have retrieved index + records with rowid <= some_rowid_val and no intersected select has + retrieved any index records with rowid > some_rowid_val. + We start fresh and loop until we have retrieved the same rowid in each of + the key scans or we got an error. + + If a Clustered PK scan is present, it is used only to check if row + satisfies its conditions (and never used for row retrieval). +*/ + +int QUICK_ROR_INTERSECT_SELECT::get_next() +{ + List_iterator_fast quick_it(quick_selects); + QUICK_RANGE_SELECT* quick; + int error, cmp; + uint last_rowid_count=0; + DBUG_ENTER("QUICK_ROR_INTERSECT_SELECT::get_next"); + + /* Get a rowid for first quick and save it as a 'candidate' */ + quick= quick_it++; + if (cpk_quick) + { + do { + error= quick->get_next(); + }while (!error && !cpk_quick->row_in_ranges()); + } + else + error= quick->get_next(); + + if (error) + DBUG_RETURN(error); + + quick->file->position(quick->record); + memcpy(last_rowid, quick->file->ref, head->file->ref_length); + last_rowid_count= 1; + + while (last_rowid_count < quick_selects.elements) + { + if (!(quick= quick_it++)) + { + quick_it.rewind(); + quick= quick_it++; + } + + do { + if ((error= quick->get_next())) + DBUG_RETURN(error); + quick->file->position(quick->record); + cmp= head->file->cmp_ref(quick->file->ref, last_rowid); + } while (cmp < 0); + + /* Ok, current select 'caught up' and returned ref >= cur_ref */ + if (cmp > 0) + { + /* Found a row with ref > cur_ref. Make it a new 'candidate' */ + if (cpk_quick) + { + while (!cpk_quick->row_in_ranges()) + { + if ((error= quick->get_next())) + DBUG_RETURN(error); + } + } + memcpy(last_rowid, quick->file->ref, head->file->ref_length); + last_rowid_count= 1; + } + else + { + /* current 'candidate' row confirmed by this select */ + last_rowid_count++; + } + } + + /* We get here iff we got the same row ref in all scans. */ + if (need_to_fetch_row) + error= head->file->rnd_pos(head->record[0], last_rowid); + DBUG_RETURN(error); +} + + +/* + NOTES + Enter/exit invariant: + For each quick select in the queue a {key,rowid} tuple has been + retrieved but the corresponding row hasn't been passed to output. +*/ + +int QUICK_ROR_UNION_SELECT::get_next() +{ + int error, dup_row; + QUICK_SELECT_I *quick; + byte *tmp; + DBUG_ENTER("QUICK_ROR_UNION_SELECT::get_next"); + + do + { + if (!queue.elements) + DBUG_RETURN(HA_ERR_END_OF_FILE); + /* Ok, we have a queue with > 1 scans */ + + quick= (QUICK_SELECT_I*)queue_top(&queue); + memcpy(cur_rowid, quick->last_rowid, rowid_length); + + /* put into queue rowid from the same stream as top element */ + if ((error= quick->get_next())) + { + if (error != HA_ERR_END_OF_FILE) + DBUG_RETURN(error); + queue_remove(&queue, 0); + } + else + { + quick->save_last_pos(); + queue_replaced(&queue); + } + + if (!have_prev_rowid) + { + /* No rows have been returned yet */ + dup_row= false; + have_prev_rowid= true; + } + else + dup_row= !head->file->cmp_ref(cur_rowid, prev_rowid); + }while (dup_row); + + tmp= cur_rowid; + cur_rowid= prev_rowid; + prev_rowid= tmp; + + error= head->file->rnd_pos(quick->record, prev_rowid); + DBUG_RETURN(error); +} + /* get next possible record using quick-struct */ int QUICK_RANGE_SELECT::get_next() @@ -3883,6 +5496,156 @@ bool QUICK_SELECT_DESC::test_if_null_range(QUICK_RANGE *range_arg, #endif +void QUICK_RANGE_SELECT::fill_keys_and_lengths(String *key_names, + String *used_lengths) +{ + char buf[64]; + uint length; + KEY *key_info= head->key_info + index; + key_names->append(key_info->name); + length= longlong2str(max_used_key_length, buf, 10) - buf; + used_lengths->append(buf, length); +} + +void QUICK_INDEX_MERGE_SELECT::fill_keys_and_lengths(String *key_names, + String *used_lengths) +{ + char buf[64]; + uint length; + QUICK_RANGE_SELECT *quick; + List_iterator_fast it(quick_selects); + while ((quick= it++)) + { + KEY *key_info= head->key_info + quick->index; + if (key_names->length()) + key_names->append(','); + key_names->append(key_info->name); + + if (used_lengths->length()) + used_lengths->append(','); + length= longlong2str(quick->max_used_key_length, buf, 10) - buf; + used_lengths->append(buf, length); + } + if (pk_quick_select) + { + KEY *key_info= head->key_info + pk_quick_select->index; + key_names->append(','); + key_names->append(key_info->name); + length= longlong2str(pk_quick_select->max_used_key_length, buf, 10) - buf; + used_lengths->append(','); + used_lengths->append(buf, length); + } +} + +void QUICK_ROR_INTERSECT_SELECT::fill_keys_and_lengths(String *key_names, + String *used_lengths) +{ + char buf[64]; + uint length; + bool first= true; + QUICK_RANGE_SELECT *quick; + List_iterator_fast it(quick_selects); + while ((quick= it++)) + { + KEY *key_info= head->key_info + quick->index; + if (!first) + key_names->append(','); + key_names->append(key_info->name); + + if (first) + first= false; + else + used_lengths->append(','); + length= longlong2str(quick->max_used_key_length, buf, 10) - buf; + used_lengths->append(buf, length); + } + if (cpk_quick) + { + KEY *key_info= head->key_info + cpk_quick->index; + key_names->append(','); + key_names->append(key_info->name); + length= longlong2str(cpk_quick->max_used_key_length, buf, 10) - buf; + used_lengths->append(','); + used_lengths->append(buf, length); + } +} + +void QUICK_ROR_UNION_SELECT::fill_keys_and_lengths(String *key_names, + String *used_lengths) +{ + bool first= true; + QUICK_SELECT_I *quick; + List_iterator_fast it(quick_selects); + while ((quick= it++)) + { + if (first) + first= false; + else + { + used_lengths->append(','); + key_names->append(','); + } + quick->fill_keys_and_lengths(key_names, used_lengths); + } +} + +#ifndef DBUG_OFF + +static void print_sel_tree(PARAM *param, SEL_TREE *tree, key_map *tree_map, + const char *msg) +{ + SEL_ARG **key,**end; + int idx; + char buff[1024]; + DBUG_ENTER("print_sel_tree"); + if (! _db_on_) + DBUG_VOID_RETURN; + + String tmp(buff,sizeof(buff),&my_charset_bin); + tmp.length(0); + for (idx= 0,key=tree->keys, end=key+param->keys ; + key != end ; + key++,idx++) + { + if (tree_map->is_set(idx)) + { + uint keynr= param->real_keynr[idx]; + if (tmp.length()) + tmp.append(','); + tmp.append(param->table->key_info[keynr].name); + } + } + if (!tmp.length()) + tmp.append("(empty)"); + + DBUG_PRINT("info", ("SEL_TREE %p (%s) scans:%s", tree, msg, tmp.ptr())); + + DBUG_VOID_RETURN; +} + +static void print_ror_scans_arr(TABLE *table, const char *msg, + struct st_ror_scan_info **start, + struct st_ror_scan_info **end) +{ + DBUG_ENTER("print_ror_scans"); + if (! _db_on_) + DBUG_VOID_RETURN; + + char buff[1024]; + String tmp(buff,sizeof(buff),&my_charset_bin); + tmp.length(0); + for(;start != end; start++) + { + if (tmp.length()) + tmp.append(','); + tmp.append(table->key_info[(*start)->keynr].name); + } + if (!tmp.length()) + tmp.append("(empty)"); + DBUG_PRINT("info", ("ROR key scans (%s): %s", msg, tmp.ptr())); + DBUG_VOID_RETURN; +} + /***************************************************************************** ** Print a quick range for debugging ** TODO: @@ -3890,8 +5653,6 @@ bool QUICK_SELECT_DESC::test_if_null_range(QUICK_RANGE *range_arg, ** of locking the DEBUG stream ! *****************************************************************************/ -#ifndef DBUG_OFF - static void print_key(KEY_PART *key_part,const char *key,uint used_length) { @@ -3923,65 +5684,115 @@ print_key(KEY_PART *key_part,const char *key,uint used_length) } } -static void print_quick_sel_imerge(QUICK_INDEX_MERGE_SELECT *quick, - const key_map *needed_reg) +static void print_quick(QUICK_SELECT_I *quick, const key_map *needed_reg) { + char buf[MAX_KEY/8+1]; DBUG_ENTER("print_param"); if (! _db_on_ || !quick) DBUG_VOID_RETURN; + DBUG_LOCK_FILE; + + quick->dbug_dump(0, true); + fprintf(DBUG_FILE,"other_keys: 0x%s:\n", needed_reg->print(buf)); - List_iterator_fast it(quick->quick_selects); - QUICK_RANGE_SELECT* quick_range_sel; - while ((quick_range_sel= it++)) - { - print_quick_sel_range(quick_range_sel, needed_reg); - } - if (quick->pk_quick_select) - print_quick_sel_range(quick->pk_quick_select, needed_reg); - + DBUG_UNLOCK_FILE; DBUG_VOID_RETURN; } -void print_quick_sel_range(QUICK_RANGE_SELECT *quick, - const key_map *needed_reg) +static void print_rowid(byte* val, int len) { - QUICK_RANGE *range; - char buf[MAX_KEY/8+1]; - DBUG_ENTER("print_param"); - if (! _db_on_ || !quick) - DBUG_VOID_RETURN; - + byte *pb; DBUG_LOCK_FILE; - fprintf(DBUG_FILE,"Used quick_range on key: %d (other_keys: 0x%s):\n", - quick->index, needed_reg->print(buf)); + fputc('\"', DBUG_FILE); + for (pb= val; pb!= val + len; ++pb) + fprintf(DBUG_FILE, "%c", *pb); + fprintf(DBUG_FILE, "\", hex: "); + + for (pb= val; pb!= val + len; ++pb) + fprintf(DBUG_FILE, "%x ", *pb); + fputc('\n', DBUG_FILE); + DBUG_UNLOCK_FILE; +} - QUICK_RANGE **pr= (QUICK_RANGE**)quick->ranges.buffer; - QUICK_RANGE **last_range= pr + quick->ranges.elements; - for (; pr!=last_range; ++pr) +void QUICK_RANGE_SELECT::dbug_dump(int indent, bool verbose) +{ + fprintf(DBUG_FILE, "%*squick range select, key %s, length: %d\n", + indent, "", head->key_info[index].name, max_used_key_length); + + if (verbose) { - range= *pr; - if (!(range->flag & NO_MIN_RANGE)) + QUICK_RANGE *range; + QUICK_RANGE **pr= (QUICK_RANGE**)ranges.buffer; + QUICK_RANGE **last_range= pr + ranges.elements; + for (; pr!=last_range; ++pr) { - print_key(quick->key_parts,range->min_key,range->min_length); - if (range->flag & NEAR_MIN) - fputs(" < ",DBUG_FILE); - else - fputs(" <= ",DBUG_FILE); - } - fputs("X",DBUG_FILE); + fprintf(DBUG_FILE, "%*s", indent + 2, ""); + range= *pr; + if (!(range->flag & NO_MIN_RANGE)) + { + print_key(key_parts,range->min_key,range->min_length); + if (range->flag & NEAR_MIN) + fputs(" < ",DBUG_FILE); + else + fputs(" <= ",DBUG_FILE); + } + fputs("X",DBUG_FILE); - if (!(range->flag & NO_MAX_RANGE)) - { - if (range->flag & NEAR_MAX) - fputs(" < ",DBUG_FILE); - else - fputs(" <= ",DBUG_FILE); - print_key(quick->key_parts,range->max_key,range->max_length); + if (!(range->flag & NO_MAX_RANGE)) + { + if (range->flag & NEAR_MAX) + fputs(" < ",DBUG_FILE); + else + fputs(" <= ",DBUG_FILE); + print_key(key_parts,range->max_key,range->max_length); + } + fputs("\n",DBUG_FILE); } - fputs("\n",DBUG_FILE); } - DBUG_UNLOCK_FILE; - DBUG_VOID_RETURN; +} + +void QUICK_INDEX_MERGE_SELECT::dbug_dump(int indent, bool verbose) +{ + List_iterator_fast it(quick_selects); + QUICK_RANGE_SELECT *quick; + fprintf(DBUG_FILE, "%*squick index_merge select\n", indent, ""); + fprintf(DBUG_FILE, "%*smerged scans {\n", indent, ""); + while ((quick= it++)) + quick->dbug_dump(indent+2, verbose); + if (pk_quick_select) + { + fprintf(DBUG_FILE, "%*sclustered PK quick:\n", indent, ""); + pk_quick_select->dbug_dump(indent+2, verbose); + } + fprintf(DBUG_FILE, "%*s}\n", indent, ""); +} + +void QUICK_ROR_INTERSECT_SELECT::dbug_dump(int indent, bool verbose) +{ + List_iterator_fast it(quick_selects); + QUICK_RANGE_SELECT *quick; + fprintf(DBUG_FILE, "%*squick ROR-intersect select, %scovering\n", + indent, "", need_to_fetch_row? "":"non-"); + fprintf(DBUG_FILE, "%*smerged scans {\n", indent, ""); + while ((quick= it++)) + quick->dbug_dump(indent+2, verbose); + if (cpk_quick) + { + fprintf(DBUG_FILE, "%*sclustered PK quick:\n", indent, ""); + cpk_quick->dbug_dump(indent+2, verbose); + } + fprintf(DBUG_FILE, "%*s}\n", indent, ""); +} + +void QUICK_ROR_UNION_SELECT::dbug_dump(int indent, bool verbose) +{ + List_iterator_fast it(quick_selects); + QUICK_SELECT_I *quick; + fprintf(DBUG_FILE, "%*squick ROR-union select\n", indent, ""); + fprintf(DBUG_FILE, "%*smerged scans {\n", indent, ""); + while ((quick= it++)) + quick->dbug_dump(indent+2, verbose); + fprintf(DBUG_FILE, "%*s}\n", indent, ""); } #endif diff --git a/sql/opt_range.h b/sql/opt_range.h index 7c2981795a2..cd9664ba759 100644 --- a/sql/opt_range.h +++ b/sql/opt_range.h @@ -79,14 +79,18 @@ public: TABLE *head; /* - the only index this quick select uses, or MAX_KEY for - QUICK_INDEX_MERGE_SELECT + index this quick select uses, or MAX_KEY for quick selects + that use several indexes */ - uint index; + uint index; + + /* applicable iff index!= MAX_KEY */ uint max_used_key_length, used_key_parts; QUICK_SELECT_I(); virtual ~QUICK_SELECT_I(){}; + + /**/ virtual int init() = 0; virtual int reset(void) = 0; virtual int get_next() = 0; /* get next record to retrieve */ @@ -97,27 +101,64 @@ public: QS_TYPE_RANGE = 0, QS_TYPE_INDEX_MERGE = 1, QS_TYPE_RANGE_DESC = 2, - QS_TYPE_FULLTEXT = 3 + QS_TYPE_FULLTEXT = 3, + QS_TYPE_ROR_INTERSECT = 4, + QS_TYPE_ROR_UNION = 5, }; - /* Get type of this quick select - one of the QS_* values */ - virtual int get_type() = 0; + /* Get type of this quick select - one of the QS_TYPE_* values */ + virtual int get_type() = 0; + + /* + Initialize this quick select as a child of a index union or intersection + scan. This call replaces init() call. + */ + virtual int init_ror_child_scan(bool reuse_handler) + { DBUG_ASSERT(0); return 1; } + + virtual void cleanup_ror_child_scan() { DBUG_ASSERT(0); } + virtual void save_last_pos(){}; + + /* + Fill key_names with list of keys this quick select used; + fill used_lenghth with correponding used lengths. + This is used by select_describe. + */ + virtual void fill_keys_and_lengths(String *key_names, + String *used_lengths)=0; + + virtual bool check_if_keys_used(List *fields); + + /* + rowid of last row retrieved by this quick select. This is used only + when doing ROR-index_merge selects + */ + byte *last_rowid; + byte *record; +#ifndef DBUG_OFF + /* + Print quick select information to DBUG_FILE. Caller is responsible + for locking DBUG_FILE before this call and unlocking it afterwards. + */ + virtual void dbug_dump(int indent, bool verbose)= 0; +#endif }; + struct st_qsel_param; class SEL_ARG; -class QUICK_RANGE_SELECT : public QUICK_SELECT_I +class QUICK_RANGE_SELECT : public QUICK_SELECT_I { protected: bool next,dont_free; public: int error; +protected: handler *file; - byte *record; + bool free_file; /* if true, this quick select "owns" file and will free it */ + protected: - friend void print_quick_sel_range(QUICK_RANGE_SELECT *quick, - const key_map* needed_reg); friend QUICK_RANGE_SELECT *get_quick_select_for_ref(THD *thd, TABLE *table, struct st_table_ref *ref); @@ -131,17 +172,19 @@ protected: MEM_ROOT *alloc); friend class QUICK_SELECT_DESC; friend class QUICK_INDEX_MERGE_SELECT; + friend class QUICK_ROR_INTERSECT_SELECT; DYNAMIC_ARRAY ranges; /* ordered array of range ptrs */ QUICK_RANGE **cur_range; /* current element in ranges */ QUICK_RANGE *range; - MEM_ROOT alloc; KEY_PART *key_parts; int cmp_next(QUICK_RANGE *range); int cmp_prev(QUICK_RANGE *range); bool row_in_ranges(); public: + MEM_ROOT alloc; + QUICK_RANGE_SELECT(THD *thd, TABLE *table,uint index_arg,bool no_alloc=0, MEM_ROOT *parent_alloc=NULL); ~QUICK_RANGE_SELECT(); @@ -157,7 +200,16 @@ public: int get_next(); bool reverse_sorted() { return 0; } bool unique_key_range(); + int init_ror_child_scan(bool reuse_handler); + void save_last_pos() + { + file->position(record); + }; int get_type() { return QS_TYPE_RANGE; } + void fill_keys_and_lengths(String *key_names, String *used_lengths); +#ifndef DBUG_OFF + virtual void dbug_dump(int indent, bool verbose); +#endif }; @@ -232,6 +284,11 @@ public: bool reverse_sorted() { return false; } bool unique_key_range() { return false; } int get_type() { return QS_TYPE_INDEX_MERGE; } + void fill_keys_and_lengths(String *key_names, String *used_lengths); + bool check_if_keys_used(List *fields); +#ifndef DBUG_OFF + virtual void dbug_dump(int indent, bool verbose); +#endif bool push_quick_back(QUICK_RANGE_SELECT *quick_sel_range); @@ -242,9 +299,6 @@ public: List_iterator_fast cur_quick_it; QUICK_RANGE_SELECT* cur_quick_select; - /* last element in quick_selects list */ - QUICK_RANGE_SELECT* last_quick_select; - /* quick select that uses clustered primary key (NULL if none) */ QUICK_RANGE_SELECT* pk_quick_select; @@ -262,6 +316,87 @@ public: READ_RECORD read_record; }; + +/* + Rowid-Ordered Retrieval (ROR) index intersection quick select. + This quick select produces an intersection of records returned by several + QUICK_RANGE_SELECTs that return data ordered by rowid. +*/ +class QUICK_ROR_INTERSECT_SELECT : public QUICK_SELECT_I +{ +public: + QUICK_ROR_INTERSECT_SELECT(THD *thd, TABLE *table, + bool retrieve_full_rows, + MEM_ROOT *parent_alloc); + ~QUICK_ROR_INTERSECT_SELECT(); + + int init(); + int reset(void); + int get_next(); + bool reverse_sorted() { return false; } + bool unique_key_range() { return false; } + int get_type() { return QS_TYPE_ROR_INTERSECT; } + void fill_keys_and_lengths(String *key_names, String *used_lengths); + bool check_if_keys_used(List *fields); +#ifndef DBUG_OFF + virtual void dbug_dump(int indent, bool verbose); +#endif + int init_ror_child_scan(bool reuse_handler); + bool push_quick_back(QUICK_RANGE_SELECT *quick_sel_range); + + /* range quick selects this intersection consists of */ + List quick_selects; + + QUICK_RANGE_SELECT *cpk_quick; + MEM_ROOT alloc; + THD *thd; + bool reset_called; + bool need_to_fetch_row; +}; + +/* + Rowid-Ordered Retrieval index union select. + +*/ +class QUICK_ROR_UNION_SELECT : public QUICK_SELECT_I +{ +public: + QUICK_ROR_UNION_SELECT(THD *thd, TABLE *table); + ~QUICK_ROR_UNION_SELECT(); + + int init(); + int reset(void); + int get_next(); + bool reverse_sorted() { return false; } + bool unique_key_range() { return false; } + int get_type() { return QS_TYPE_ROR_UNION; } + void fill_keys_and_lengths(String *key_names, String *used_lengths); + bool check_if_keys_used(List *fields); +#ifndef DBUG_OFF + virtual void dbug_dump(int indent, bool verbose); +#endif + + bool push_quick_back(QUICK_SELECT_I *quick_sel_range); + + /* range quick selects this index_merge read consists of */ + List quick_selects; + + QUEUE queue; + MEM_ROOT alloc; + + THD *thd; + byte *cur_rowid; + byte *prev_rowid; + uint rowid_length; + bool reset_called; + bool have_prev_rowid; + +private: + static int queue_cmp(void *arg, byte *val1, byte *val2); +}; + + + class QUICK_SELECT_DESC: public QUICK_RANGE_SELECT { public: diff --git a/sql/sql_delete.cc b/sql/sql_delete.cc index 22cb11eed41..72a39e347b1 100644 --- a/sql/sql_delete.cc +++ b/sql/sql_delete.cc @@ -151,6 +151,12 @@ int mysql_delete(THD *thd, TABLE_LIST *table_list, COND *conds, SQL_LIST *order, select= 0; } + if (select && select->quick && select->quick->reset()) + { + delete select; + free_underlaid_joins(thd, &thd->lex->select_lex); + DBUG_RETURN(-1); // This will force out message + } init_read_record(&info,thd,table,select,1,1); deleted=0L; init_ftfuncs(thd, &thd->lex->select_lex, 1); @@ -250,10 +256,10 @@ cleanup: #define MEM_STRIP_BUF_SIZE current_thd->variables.sortbuff_size -extern "C" int refposcmp2(void* arg, const void *a,const void *b) +extern "C" int refpos_order_cmp(void* arg, const void *a,const void *b) { - /* arg is a pointer to file->ref_length */ - return memcmp(a,b, *(int*) arg); + handler *file= (handler*)arg; + return file->cmp_ref((const byte*)a, (const byte*)b); } multi_delete::multi_delete(THD *thd_arg, TABLE_LIST *dt, @@ -317,8 +323,8 @@ multi_delete::initialize_tables(JOIN *join) for (walk=walk->next ; walk ; walk=walk->next) { TABLE *table=walk->table; - *tempfiles_ptr++= new Unique (refposcmp2, - (void *) &table->file->ref_length, + *tempfiles_ptr++= new Unique (refpos_order_cmp, + (void *) table->file, table->file->ref_length, MEM_STRIP_BUF_SIZE); } diff --git a/sql/sql_select.cc b/sql/sql_select.cc index 9f26d7458d0..528fe38ed3f 100644 --- a/sql/sql_select.cc +++ b/sql/sql_select.cc @@ -6976,8 +6976,16 @@ test_if_skip_sort_order(JOIN_TAB *tab,ORDER *order,ha_rows select_limit, } else if (select && select->quick) // Range found by opt_range { - /* assume results are not ordered when index merge is used */ - if (select->quick->get_type() == QUICK_SELECT_I::QS_TYPE_INDEX_MERGE) + int quick_type= select->quick->get_type(); + /* + assume results are not ordered when index merge is used + TODO: sergeyp: Results of all index merge selects actually are ordered + by clustered PK values. + */ + + if (quick_type == QUICK_SELECT_I::QS_TYPE_INDEX_MERGE || + quick_type == QUICK_SELECT_I::QS_TYPE_ROR_UNION || + quick_type == QUICK_SELECT_I::QS_TYPE_ROR_INTERSECT) DBUG_RETURN(0); ref_key= select->quick->index; ref_key_parts= select->quick->used_key_parts; @@ -7038,9 +7046,11 @@ test_if_skip_sort_order(JOIN_TAB *tab,ORDER *order,ha_rows select_limit, */ if (!select->quick->reverse_sorted()) { + int quick_type= select->quick->get_type(); if (table->file->index_flags(ref_key) & HA_NOT_READ_PREFIX_LAST || - (select->quick->get_type() == - QUICK_SELECT_I::QS_TYPE_INDEX_MERGE)) + quick_type == QUICK_SELECT_I::QS_TYPE_INDEX_MERGE || + quick_type == QUICK_SELECT_I::QS_TYPE_ROR_INTERSECT || + quick_type == QUICK_SELECT_I::QS_TYPE_ROR_UNION) DBUG_RETURN(0); // Use filesort // ORDER BY range_key DESC @@ -8961,6 +8971,7 @@ static void select_describe(JOIN *join, bool need_tmp_table, bool need_order, select_result *result=join->result; Item *item_null= new Item_null(); CHARSET_INFO *cs= &my_charset_latin1; + int quick_type= -1; DBUG_ENTER("select_describe"); DBUG_PRINT("info", ("Select 0x%lx, type %s, message %s", (ulong)join->select_lex, join->select_lex->type, @@ -9006,8 +9017,10 @@ static void select_describe(JOIN *join, bool need_tmp_table, bool need_order, cs)); if (tab->type == JT_ALL && tab->select && tab->select->quick) { - if (tab->select->quick->get_type() == - QUICK_SELECT_I::QS_TYPE_INDEX_MERGE) + quick_type= tab->select->quick->get_type(); + if ((quick_type == QUICK_SELECT_I::QS_TYPE_INDEX_MERGE) || + (quick_type == QUICK_SELECT_I::QS_TYPE_ROR_INTERSECT) || + (quick_type == QUICK_SELECT_I::QS_TYPE_ROR_UNION)) tab->type = JT_INDEX_MERGE; else tab->type = JT_RANGE; @@ -9028,6 +9041,7 @@ static void select_describe(JOIN *join, bool need_tmp_table, bool need_order, strlen(join_type_str[tab->type]), cs)); uint j; + /* Build "possible_keys" value and add it to item_list */ if (!tab->keys.is_clear_all()) { for (j=0 ; j < table->keys ; j++) @@ -9044,6 +9058,8 @@ static void select_describe(JOIN *join, bool need_tmp_table, bool need_order, item_list.push_back(new Item_string(tmp1.ptr(),tmp1.length(),cs)); else item_list.push_back(item_null); + + /* Build key,key_len, and ref values and add them to item_list */ if (tab->ref.key_parts) { KEY *key_info=table->key_info+ tab->ref.key; @@ -9078,48 +9094,9 @@ static void select_describe(JOIN *join, bool need_tmp_table, bool need_order, } else if (tab->select && tab->select->quick) { - if (tab->select->quick->get_type() == - QUICK_SELECT_I::QS_TYPE_INDEX_MERGE) - { - QUICK_INDEX_MERGE_SELECT *quick_imerge= - (QUICK_INDEX_MERGE_SELECT*)tab->select->quick; - QUICK_RANGE_SELECT *quick; - - List_iterator_fast it(quick_imerge-> - quick_selects); - while ((quick= it++)) - { - KEY *key_info= table->key_info + quick->index; - register uint length; - if (tmp3.length()) - tmp3.append(','); - - tmp3.append(key_info->name); - - if (tmp2.length()) - tmp2.append(','); - - length= longlong2str(quick->max_used_key_length, keylen_str_buf, - 10) - - keylen_str_buf; - - tmp2.append(keylen_str_buf, length); - } - } - else - { - KEY *key_info= table->key_info + tab->select->quick->index; - register uint length; - tmp3.append(key_info->name); - - length= longlong2str(tab->select->quick->max_used_key_length, - keylen_str_buf, 10) - - keylen_str_buf; - tmp2.append(keylen_str_buf, length); - } - - item_list.push_back(new Item_string(tmp3.ptr(),tmp3.length(),cs)); + tab->select->quick->fill_keys_and_lengths(&tmp2, &tmp3); item_list.push_back(new Item_string(tmp2.ptr(),tmp2.length(),cs)); + item_list.push_back(new Item_string(tmp3.ptr(),tmp3.length(),cs)); item_list.push_back(item_null); } else @@ -9134,7 +9111,10 @@ static void select_describe(JOIN *join, bool need_tmp_table, bool need_order, my_bool key_read=table->key_read; if (tab->type == JT_NEXT && table->used_keys.is_set(tab->index)) key_read=1; - + if (quick_type == QUICK_SELECT_I::QS_TYPE_ROR_INTERSECT && + !((QUICK_ROR_INTERSECT_SELECT*)tab->select->quick)->need_to_fetch_row) + key_read=1; + if (tab->info) item_list.push_back(new Item_string(tab->info,strlen(tab->info),cs)); else diff --git a/sql/sql_select.h b/sql/sql_select.h index 9bba4e9318e..9d784e585b1 100644 --- a/sql/sql_select.h +++ b/sql/sql_select.h @@ -329,7 +329,7 @@ uint find_shortest_key(TABLE *table, const key_map *usable_keys); int opt_sum_query(TABLE_LIST *tables, List &all_fields,COND *conds); /* from sql_delete.cc, used by opt_range.cc */ -extern "C" int refposcmp2(void* arg, const void *a,const void *b); +extern "C" int refpos_order_cmp(void* arg, const void *a,const void *b); /* class to copying an field/item to a key struct */ diff --git a/sql/sql_test.cc b/sql/sql_test.cc index cd493fcac30..9e8b6f551c6 100644 --- a/sql/sql_test.cc +++ b/sql/sql_test.cc @@ -182,37 +182,8 @@ TEST_join(JOIN *join) tab->select->quick_keys.print(buf)); else if (tab->select->quick) { - int quick_type= tab->select->quick->get_type(); - if ((quick_type == QUICK_SELECT_I::QS_TYPE_RANGE) || - (quick_type == QUICK_SELECT_I::QS_TYPE_RANGE_DESC)) - { - fprintf(DBUG_FILE, - " quick select used on key %s, length: %d\n", - form->key_info[tab->select->quick->index].name, - tab->select->quick->max_used_key_length); - } - else if (quick_type == QUICK_SELECT_I::QS_TYPE_INDEX_MERGE) - { - QUICK_INDEX_MERGE_SELECT *quick_imerge= - (QUICK_INDEX_MERGE_SELECT*)tab->select->quick; - QUICK_RANGE_SELECT *quick; - fprintf(DBUG_FILE, - " index_merge quick select used\n"); - - List_iterator_fast it(quick_imerge->quick_selects); - while ((quick = it++)) - { - fprintf(DBUG_FILE, - " range quick select: key %s, length: %d\n", - form->key_info[quick->index].name, - quick->max_used_key_length); - } - } - else - { - fprintf(DBUG_FILE, - " quick select of unknown nature used\n"); - } + fprintf(DBUG_FILE, " quick select used:\n"); + tab->select->quick->dbug_dump(18, false); } else VOID(fputs(" select used\n",DBUG_FILE)); diff --git a/sql/sql_update.cc b/sql/sql_update.cc index 6b4d2f9b659..15d86b63aa1 100644 --- a/sql/sql_update.cc +++ b/sql/sql_update.cc @@ -176,18 +176,11 @@ int mysql_update(THD *thd, } init_ftfuncs(thd, &thd->lex->select_lex, 1); /* Check if we are modifying a key that we are used to search with */ + if (select && select->quick) { - if (select->quick->get_type() != QUICK_SELECT_I::QS_TYPE_INDEX_MERGE) - { - used_index= select->quick->index; - used_key_is_modified= (!select->quick->unique_key_range() && - check_if_key_used(table,used_index,fields)); - } - else - { - used_key_is_modified= true; - } + used_key_is_modified= (!select->quick->unique_key_range() && + select->quick->check_if_keys_used(&fields)); } else if ((used_index=table->file->key_used_on_scan) < MAX_KEY) used_key_is_modified=check_if_key_used(table, used_index, fields); @@ -246,7 +239,9 @@ int mysql_update(THD *thd, if (open_cached_file(&tempfile, mysql_tmpdir,TEMP_PREFIX, DISK_BUFFER_SIZE, MYF(MY_WME))) goto err; - + + if (select && select->quick && select->quick->reset()) + goto err; init_read_record(&info,thd,table,select,0,1); thd->proc_info="Searching rows for update"; uint tmp_limit= limit; @@ -300,6 +295,9 @@ int mysql_update(THD *thd, if (handle_duplicates == DUP_IGNORE) table->file->extra(HA_EXTRA_IGNORE_DUP_KEY); + + if (select && select->quick && select->quick->reset()) + goto err; init_read_record(&info,thd,table,select,0,1); updated= found= 0; @@ -732,26 +730,7 @@ static bool safe_update_on_fly(JOIN_TAB *join_tab, List *fields) case JT_ALL: /* If range search on index */ if (join_tab->quick) - { - if (join_tab->quick->get_type() != QUICK_SELECT_I::QS_TYPE_INDEX_MERGE) - { - return !check_if_key_used(table,join_tab->quick->index,*fields); - } - else - { - QUICK_INDEX_MERGE_SELECT *qsel_imerge= - (QUICK_INDEX_MERGE_SELECT*)(join_tab->quick); - List_iterator_fast it(qsel_imerge->quick_selects); - QUICK_RANGE_SELECT *quick; - while ((quick= it++)) - { - if (check_if_key_used(table, quick->index, *fields)) - return 0; - } - return 1; - } - } - + return !join_tab->quick->check_if_keys_used(fields); /* If scanning in clustered key */ if ((table->file->table_flags() & HA_PRIMARY_KEY_IN_READ_INDEX) && table->primary_key < MAX_KEY) -- cgit v1.2.1 From 1ce1880204149404e09411505d457c452a29d55b Mon Sep 17 00:00:00 2001 From: unknown Date: Sat, 29 May 2004 02:04:01 +0400 Subject: * New index_merge EXPLAIN output format * Fixed a problem with wrong query results for partially covering keys in ROR-index_merge * ROR-intersection retrieval plan choice algorithm now uses less disk IO - and properly processes clustered PK range scans * Fixed several minor range optimizer problems * Added more comments * Code cleanup mysql-test/r/index_merge.result: New index_merge EXPLAIN output format changes mysql-test/r/index_merge_innodb.result: New index_merge EXPLAIN output format changes mysql-test/r/index_merge_ror.result: New index_merge EXPLAIN output format changes Added test for scans on keys that cover fields partially Fixed a minor ROR optimizer problem mysql-test/r/index_merge_ror_cpk.result: More tests added mysql-test/t/index_merge_ror.test: New index_merge EXPLAIN output format changes Added test for scans on keys that cover fields partially Fixed a minor ROR optimizer problem mysql-test/t/index_merge_ror_cpk.test: More tests added mysys/my_bitmap.c: Comments added, code cleanup sql/opt_range.cc: Comments added, code cleanup Numerous fixes, see changeset description sql/opt_range.h: Comments added, code cleanup New index_merge EXPLAIN output format related changes sql/sql_select.cc: Comments added, code cleanup New index_merge EXPLAIN output --- sql/opt_range.cc | 1156 +++++++++++++++++++++++++++++++++++++---------------- sql/opt_range.h | 197 ++++++--- sql/sql_select.cc | 52 ++- 3 files changed, 992 insertions(+), 413 deletions(-) (limited to 'sql') diff --git a/sql/opt_range.cc b/sql/opt_range.cc index eb6eefccf02..ee328dadad6 100644 --- a/sql/opt_range.cc +++ b/sql/opt_range.cc @@ -199,19 +199,7 @@ public: void store(uint length,char **min_key,uint min_key_flag, char **max_key, uint max_key_flag) { - if ((min_flag & GEOM_FLAG) || - (!(min_flag & NO_MIN_RANGE) && - !(min_key_flag & (NO_MIN_RANGE | NEAR_MIN)))) - { - if (maybe_null && *min_value) - { - **min_key=1; - bzero(*min_key+1,length); - } - else - memcpy(*min_key,min_value,length+(int) maybe_null); - (*min_key)+= length+(int) maybe_null; - } + store_min(length, min_key, min_key_flag); if (!(max_flag & NO_MAX_RANGE) && !(max_key_flag & (NO_MAX_RANGE | NEAR_MAX))) { @@ -298,6 +286,7 @@ public: class SEL_IMERGE; + class SEL_TREE :public Sql_alloc { public: @@ -319,7 +308,7 @@ public: /* The members below are filled/used only after get_mm_tree is done */ key_map ror_scans_map; /* bitmask of ROR scan-able elements in keys */ - uint n_ror_scans; + uint n_ror_scans; /* number of set bits in ror_scans_map */ struct st_ror_scan_info **ror_scans; /* list of ROR key scans */ struct st_ror_scan_info **ror_scans_end; /* last ROR scan */ @@ -330,7 +319,8 @@ public: typedef struct st_qsel_param { THD *thd; TABLE *table; - KEY_PART *key_parts,*key_parts_end,*key[MAX_KEY]; + KEY_PART *key_parts,*key_parts_end; + KEY_PART *key[MAX_KEY]; /* First key parts of keys used in the query */ MEM_ROOT *mem_root; table_map prev_tables,read_tables,current_table; uint baseflag, max_key_part, range_count; @@ -353,7 +343,8 @@ typedef struct st_qsel_param { uint *imerge_cost_buff; /* buffer for index_merge cost estimates */ uint imerge_cost_buff_size; /* size of the buffer */ - bool is_ror_scan; /* true if last checked tree->key can be used for ROR-scan */ + /* true if last checked tree->key can be used for ROR-scan */ + bool is_ror_scan; } PARAM; class TABLE_READ_PLAN; @@ -625,7 +616,6 @@ int imerge_list_or_list(PARAM *param, RETURN 0 OK, result is stored in *im1. other Error - */ int imerge_list_or_tree(PARAM *param, @@ -767,11 +757,12 @@ QUICK_INDEX_MERGE_SELECT::QUICK_INDEX_MERGE_SELECT(THD *thd_param, :cur_quick_it(quick_selects),pk_quick_select(NULL),unique(NULL), thd(thd_param) { + DBUG_ENTER("QUICK_INDEX_MERGE_SELECT::QUICK_INDEX_MERGE_SELECT"); index= MAX_KEY; head= table; - reset_called= false; bzero(&read_record, sizeof(read_record)); init_sql_alloc(&alloc,1024,0); + DBUG_VOID_RETURN; } int QUICK_INDEX_MERGE_SELECT::init() @@ -785,11 +776,7 @@ int QUICK_INDEX_MERGE_SELECT::reset() { int result; DBUG_ENTER("QUICK_INDEX_MERGE_SELECT::reset"); - if (reset_called) - DBUG_RETURN(0); - - reset_called= true; - result = cur_quick_select->reset() || prepare_unique(); + result= cur_quick_select->reset() || prepare_unique(); DBUG_RETURN(result); } @@ -810,10 +797,12 @@ QUICK_INDEX_MERGE_SELECT::push_quick_back(QUICK_RANGE_SELECT *quick_sel_range) QUICK_INDEX_MERGE_SELECT::~QUICK_INDEX_MERGE_SELECT() { + DBUG_ENTER("QUICK_INDEX_MERGE_SELECT::~QUICK_INDEX_MERGE_SELECT"); delete unique; quick_selects.delete_elements(); delete pk_quick_select; free_root(&alloc,MYF(0)); + DBUG_VOID_RETURN; } @@ -821,8 +810,7 @@ QUICK_ROR_INTERSECT_SELECT::QUICK_ROR_INTERSECT_SELECT(THD *thd_param, TABLE *table, bool retrieve_full_rows, MEM_ROOT *parent_alloc) - : cpk_quick(NULL), thd(thd_param), reset_called(false), - need_to_fetch_row(retrieve_full_rows) + : cpk_quick(NULL), thd(thd_param), need_to_fetch_row(retrieve_full_rows) { index= MAX_KEY; head= table; @@ -835,28 +823,50 @@ QUICK_ROR_INTERSECT_SELECT::QUICK_ROR_INTERSECT_SELECT(THD *thd_param, head->file->ref_length); } + +/* + Do post-constructor initialization. + SYNOPSIS + QUICK_ROR_INTERSECT_SELECT::init() + + RETURN + 0 OK + other Error code +*/ + int QUICK_ROR_INTERSECT_SELECT::init() { - /* Check if last_rowid was allocated in ctor */ + /* Check if last_rowid was successfully allocated in ctor */ return !last_rowid; } /* - Init this quick select to be a ROR child scan. - NOTE - QUICK_ROR_INTERSECT_SELECT::reset() may choose not to call this function - but reuse its handler object for doing one of range scans. It duplicates - a part of this function' code. + Initialize this quick select to be a ROR-merged scan. + + SYNOPSIS + QUICK_RANGE_SELECT::init_ror_merged_scan() + reuse_handler If true, use head->file, otherwise create a separate + handler object + + NOTES + This function creates and prepares for subsequent use a separate handler + object if it can't reuse head->file. The reason for this is that during + ROR-merge several key scans are performed simultaneously, and a single + handler is only capable of preserving context of a single key scan. + + In ROR-merge the quick select doing merge does full records retrieval, + merged quick selects read only keys. + RETURN 0 ROR child scan initialized, ok to use. 1 error */ -int QUICK_RANGE_SELECT::init_ror_child_scan(bool reuse_handler) +int QUICK_RANGE_SELECT::init_ror_merged_scan(bool reuse_handler) { handler *save_file= file; - DBUG_ENTER("QUICK_RANGE_SELECT::init_ror_child_scan"); + DBUG_ENTER("QUICK_RANGE_SELECT::init_ror_merged_scan"); if (reuse_handler) { @@ -894,7 +904,7 @@ int QUICK_RANGE_SELECT::init_ror_child_scan(bool reuse_handler) file->close(); goto failure; } - free_file= true; + free_file= true; last_rowid= file->ref; DBUG_RETURN(0); @@ -903,32 +913,42 @@ failure: DBUG_RETURN(1); } -int QUICK_ROR_INTERSECT_SELECT::init_ror_child_scan(bool reuse_handler) + +/* + Initialize this quick select to be a part of a ROR-merged scan. + SYNOPSIS + QUICK_ROR_INTERSECT_SELECT::init_ror_merged_scan() + reuse_handler If true, use head->file, otherwise create separate + handler object. + RETURN + 0 OK + other error code +*/ +int QUICK_ROR_INTERSECT_SELECT::init_ror_merged_scan(bool reuse_handler) { List_iterator_fast quick_it(quick_selects); QUICK_RANGE_SELECT* quick; - DBUG_ENTER("QUICK_ROR_INTERSECT_SELECT::init_ror_child_scan"); + DBUG_ENTER("QUICK_ROR_INTERSECT_SELECT::init_ror_merged_scan"); /* Initialize all merged "children" quick selects */ - quick_it.rewind(); DBUG_ASSERT(!(need_to_fetch_row && !reuse_handler)); if (!need_to_fetch_row && reuse_handler) { quick= quick_it++; /* - There is no use for this->file. Reuse it for first of merged range + There is no use of this->file. Use it for the first of merged range selects. */ - if (quick->init_ror_child_scan(true)) + if (quick->init_ror_merged_scan(true)) DBUG_RETURN(1); quick->file->extra(HA_EXTRA_KEYREAD_PRESERVE_FIELDS); } while((quick= quick_it++)) { - if (quick->init_ror_child_scan(false)) + if (quick->init_ror_merged_scan(false)) DBUG_RETURN(1); quick->file->extra(HA_EXTRA_KEYREAD_PRESERVE_FIELDS); - /* Share the same record structure in intersect*/ + /* All merged scans share the same record buffer in intersection. */ quick->record= head->record[0]; } @@ -940,27 +960,44 @@ int QUICK_ROR_INTERSECT_SELECT::init_ror_child_scan(bool reuse_handler) DBUG_RETURN(0); } + +/* + Initialize quick select for row retrieval. + SYNOPSIS + reset() + RETURN + 0 OK + other Error code +*/ + int QUICK_ROR_INTERSECT_SELECT::reset() { int result; DBUG_ENTER("QUICK_ROR_INTERSECT_SELECT::reset"); - result= init_ror_child_scan(true); + result= init_ror_merged_scan(true); DBUG_RETURN(result); } + +/* + Add a merged quick select to this ROR-intersection quick select. + + SYNOPSIS + QUICK_ROR_INTERSECT_SELECT::push_quick_back() + quick Quick select to be added. The quick select must return + rows in rowid order. + NOTES + This call can only be made before init() is called. + + RETURN + false OK + true Out of memory. +*/ + bool QUICK_ROR_INTERSECT_SELECT::push_quick_back(QUICK_RANGE_SELECT *quick) { - bool res; - if (head->file->primary_key_is_clustered() && - quick->index == head->primary_key) - { - cpk_quick= quick; - res= 0; - } - else - res= quick_selects.push_back(quick); - return res; + return quick_selects.push_back(quick); } QUICK_ROR_INTERSECT_SELECT::~QUICK_ROR_INTERSECT_SELECT() @@ -974,7 +1011,7 @@ QUICK_ROR_INTERSECT_SELECT::~QUICK_ROR_INTERSECT_SELECT() QUICK_ROR_UNION_SELECT::QUICK_ROR_UNION_SELECT(THD *thd_param, TABLE *table) - : thd(thd_param), reset_called(false) + : thd(thd_param) { index= MAX_KEY; head= table; @@ -984,6 +1021,17 @@ QUICK_ROR_UNION_SELECT::QUICK_ROR_UNION_SELECT(THD *thd_param, my_pthread_setspecific_ptr(THR_MALLOC,&alloc); } + +/* + Do post-constructor initialization. + SYNOPSIS + QUICK_ROR_UNION_SELECT::init() + + RETURN + 0 OK + other Error code +*/ + int QUICK_ROR_UNION_SELECT::init() { if (init_queue(&queue, quick_selects.elements, 0, @@ -1000,8 +1048,11 @@ int QUICK_ROR_UNION_SELECT::init() return 0; } + /* - Comparison function to be used by priority queue. + Comparison function to be used QUICK_ROR_UNION_SELECT::queue priority + queue. + SYNPOSIS QUICK_ROR_UNION_SELECT::queue_cmp() arg Pointer to QUICK_ROR_UNION_SELECT @@ -1010,11 +1061,22 @@ int QUICK_ROR_UNION_SELECT::init() */ int QUICK_ROR_UNION_SELECT::queue_cmp(void *arg, byte *val1, byte *val2) { - QUICK_ROR_UNION_SELECT *self= (QUICK_ROR_UNION_SELECT*)arg; + QUICK_ROR_UNION_SELECT *self= (QUICK_ROR_UNION_SELECT*)arg; return self->head->file->cmp_ref(((QUICK_SELECT_I*)val1)->last_rowid, ((QUICK_SELECT_I*)val2)->last_rowid); } + +/* + Initialize quick select for row retrieval. + SYNOPSIS + reset() + + RETURN + 0 OK + other Error code +*/ + int QUICK_ROR_UNION_SELECT::reset() { QUICK_SELECT_I* quick; @@ -1028,7 +1090,7 @@ int QUICK_ROR_UNION_SELECT::reset() List_iterator_fast it(quick_selects); while ((quick= it++)) { - if (quick->init_ror_child_scan(false)) + if (quick->init_ror_merged_scan(false)) DBUG_RETURN(1); if ((error= quick->get_next())) { @@ -1245,13 +1307,33 @@ public: double read_cost; ha_rows records; /* estimate of #rows to be examined */ + /* + If true, the scan returns rows in rowid order. This is used only for + scans that can be both ROR and non-ROR. + */ bool is_ror; - /* Create quck select for this plan */ + + /* + Create quick select for this plan. + SYNOPSIS + make_quick() + param Parameter from test_quick_select + retrieve_full_rows If true, created quick select will do full record + retrieval. + parent_alloc Memory pool to use, if any. + + NOTES + retrieve_full_rows is ignored by some implementations. + + RETURN + created quick select + NULL on any error. + */ virtual QUICK_SELECT_I *make_quick(PARAM *param, bool retrieve_full_rows, MEM_ROOT *parent_alloc=NULL) = 0; - /* Table read plans are allocated on MEM_ROOT and must not be deleted */ + /* Table read plans are allocated on MEM_ROOT and are never deleted */ static void *operator new(size_t size, MEM_ROOT *mem_root) { return (void*) alloc_root(mem_root, (uint) size); } static void operator delete(void *ptr,size_t size) {} @@ -1262,11 +1344,19 @@ class TRP_ROR_UNION; class TRP_INDEX_MERGE; +/* + Plan for a QUICK_RANGE_SELECT scan. + TRP_RANGE::make_quick ignores retrieve_full_rows parameter because + QUICK_RANGE_SELECT doesn't distinguish between 'index only' scans and full + record retrieval scans. +*/ + class TRP_RANGE : public TABLE_READ_PLAN { public: - SEL_ARG *key; - uint key_idx; /* key number in param->keys */ + SEL_ARG *key; /* set of intervals to be used in "range" method retrieval */ + uint key_idx; /* key number in PARAM::key */ + TRP_RANGE(SEL_ARG *key_arg, uint idx_arg) : key(key_arg), key_idx(idx_arg) {} @@ -1276,7 +1366,6 @@ public: { DBUG_ENTER("TRP_RANGE::make_quick"); QUICK_RANGE_SELECT *quick; - /* ignore retrieve_full_rows there as it is set/used elsewhere */ if ((quick= get_quick_select(param, key_idx, key, parent_alloc))) { quick->records= records; @@ -1286,45 +1375,68 @@ public: } }; + +/* Plan for QUICK_ROR_INTERSECT_SELECT scan. */ + class TRP_ROR_INTERSECT : public TABLE_READ_PLAN { public: QUICK_SELECT_I *make_quick(PARAM *param, bool retrieve_full_rows, MEM_ROOT *parent_alloc); + + /* Array of pointers to ROR range scans used in this intersection */ struct st_ror_scan_info **first_scan; - struct st_ror_scan_info **last_scan; + struct st_ror_scan_info **last_scan; /* End of the above array */ + struct st_ror_scan_info *cpk_scan; /* Clustered PK scan, if there is one */ bool is_covering; /* true if no row retrieval phase is necessary */ - double index_scan_costs; + double index_scan_costs; /* SUM(cost(index_scan)) */ }; + /* - ROR-union is currently never covering. + Plan for QUICK_ROR_UNION_SELECT scan. + QUICK_ROR_UNION_SELECT always retrieves full rows, so retrieve_full_rows + is ignored by make_quick. */ + class TRP_ROR_UNION : public TABLE_READ_PLAN { public: QUICK_SELECT_I *make_quick(PARAM *param, bool retrieve_full_rows, MEM_ROOT *parent_alloc); - TABLE_READ_PLAN **first_ror; - TABLE_READ_PLAN **last_ror; - + TABLE_READ_PLAN **first_ror; /* array of ptrs to plans for merged scans */ + TABLE_READ_PLAN **last_ror; /* end of the above array */ }; + +/* + Plan for QUICK_INDEX_MERGE_SELECT scan. + QUICK_ROR_INTERSECT_SELECT always retrieves full rows, so retrieve_full_rows + is ignored by make_quick. +*/ + class TRP_INDEX_MERGE : public TABLE_READ_PLAN { public: QUICK_SELECT_I *make_quick(PARAM *param, bool retrieve_full_rows, MEM_ROOT *parent_alloc); - TRP_RANGE **range_scans; - TRP_RANGE **range_scans_end; + TRP_RANGE **range_scans; /* array of ptrs to plans of merged scans */ + TRP_RANGE **range_scans_end; /* end of the array */ }; /* - Fill param->needed_fields with bitmap of fields used in the query - NOTE - Do not put clustered PK members into it as they are implicitly present in - all keys. + Fill param->needed_fields with bitmap of fields used in the query. + SYNOPSIS + fill_used_fields_bitmap() + param Parameter from test_quick_select function. + + NOTES + Clustered PK members are not put into the bitmap as they are implicitly + present in all keys (and it is impossible to avoid reading them). + RETURN + 0 Ok + 1 Out of memory. */ static int fill_used_fields_bitmap(PARAM *param) @@ -1348,6 +1460,7 @@ static int fill_used_fields_bitmap(PARAM *param) pk= param->table->primary_key; if (param->table->file->primary_key_is_clustered() && pk != MAX_KEY) { + /* The table uses clustered PK and it is not internally generated */ KEY_PART_INFO *key_part= param->table->key_info[pk].key_part; KEY_PART_INFO *key_part_end= key_part + param->table->key_info[pk].key_parts; @@ -1361,28 +1474,39 @@ static int fill_used_fields_bitmap(PARAM *param) /* - Test if a key can be used in different ranges + Test if a key can be used in different ranges SYNOPSIS - SQL_SELECT::test_quick_select(thd,keys_to_use, prev_tables, - limit, force_quick_range) + SQL_SELECT::test_quick_select() + thd Current thread + keys_to_use Keys to use for range retrieval + prev_tables Tables assumed to be already read when the scan is + performed (but not read at the moment of this call) + limit Query limit + force_quick_range Prefer to use range (instead of full table scan) even + if it is more expensive. + + NOTES + Updates the following in the select parameter: + needed_reg - Bits for keys with may be used if all prev regs are read + quick - Parameter to use when reading records. + + In the table struct the following information is updated: + quick_keys - Which keys can be used + quick_rows - How many rows the key matches - Updates the following in the select parameter: - needed_reg - Bits for keys with may be used if all prev regs are read - quick - Parameter to use when reading records. + TODO + Check if this function really needs to modify keys_to_use, and change the + code to pass it by reference if it doesn't. + + In addition to force_quick_range other means can be (an usually are) used + to make this function prefer range over full table scan. Figure out if + force_quick_range is really needed. - In the table struct the following information is updated: - quick_keys - Which keys can be used - quick_rows - How many rows the key matches - - RETURN VALUES - -1 if impossible select - 0 if can't use quick_select - 1 if found usable range - - TODO - check if the function really needs to modify keys_to_use, and change the - code to pass it by reference if not + RETURN + -1 if impossible select (i.e. certainly no rows will be selected) + 0 if can't use quick_select + 1 if found usable ranges and quick select has been successfully created. */ int SQL_SELECT::test_quick_select(THD *thd, key_map keys_to_use, @@ -1393,6 +1517,7 @@ int SQL_SELECT::test_quick_select(THD *thd, key_map keys_to_use, uint idx; double scan_time; DBUG_ENTER("test_quick_select"); + printf("\nQUERY: %s\n", thd->query); DBUG_PRINT("enter",("keys_to_use: %lu prev_tables: %lu const_tables: %lu", keys_to_use.to_ulonglong(), (ulong) prev_tables, (ulong) const_tables)); @@ -1485,6 +1610,18 @@ int SQL_SELECT::test_quick_select(THD *thd, key_map keys_to_use, } param.key_parts_end=key_parts; + /* Calculate cost of full index read for the shortest covering index */ + if (!head->used_keys.is_clear_all()) + { + int key_for_use= find_shortest_key(head, &head->used_keys); + double key_read_time= get_index_only_read_time(¶m, records, + key_for_use); + DBUG_PRINT("info", ("'all'+'using index' scan will be using key %d, " + "read time %g", key_for_use, key_read_time)); + if (key_read_time < read_time) + read_time= key_read_time; + } + if ((tree=get_mm_tree(¶m,cond))) { if (tree->type == SEL_TREE::IMPOSSIBLE) @@ -1499,8 +1636,6 @@ int SQL_SELECT::test_quick_select(THD *thd, key_map keys_to_use, /* It is possible to use a quick select (but maybe it would be slower than 'all' table scan). - Btw, tree type SEL_TREE::INDEX_MERGE was not introduced - intentionally. */ if (tree->merges.is_empty()) { @@ -1522,7 +1657,7 @@ int SQL_SELECT::test_quick_select(THD *thd, key_map keys_to_use, { /* Get best non-covering ROR-intersection plan and prepare data for - building covering ROR-intersection + building covering ROR-intersection. */ if ((new_trp= get_best_ror_intersect(¶m, tree, false, best_read_time, @@ -1550,25 +1685,6 @@ int SQL_SELECT::test_quick_select(THD *thd, key_map keys_to_use, LINT_INIT(new_conj_trp); /* no empty index_merge lists possible */ DBUG_PRINT("info",("No range reads possible," " trying to construct index_merge")); - - /* - Calculate cost of 'all'+index_only scan if it is possible. - It is possible that table can be read in two ways: - a) 'all' + index_only - b) index_merge without index_only. - */ - if (!head->used_keys.is_clear_all()) - { - int key_for_use= find_shortest_key(head, &head->used_keys); - ha_rows total_table_records= (0 == head->file->records)? 1 : - head->file->records; - read_time = get_index_only_read_time(¶m, total_table_records, - key_for_use); - DBUG_PRINT("info", - ("'all'+'using index' scan will be using key %d, " - "read time %g", key_for_use, read_time)); - } - List_iterator_fast it(tree->merges); while ((imerge= it++)) { @@ -1579,8 +1695,9 @@ int SQL_SELECT::test_quick_select(THD *thd, key_map keys_to_use, } best_trp= best_conj_trp; } + my_pthread_setspecific_ptr(THR_MALLOC, old_root); - my_pthread_setspecific_ptr(THR_MALLOC, old_root); + /* If we got a read plan, create a quick select from it. */ if (best_trp) { records= best_trp->records; @@ -1589,7 +1706,7 @@ int SQL_SELECT::test_quick_select(THD *thd, key_map keys_to_use, delete quick; quick= NULL; } - } + } } } my_pthread_setspecific_ptr(THR_MALLOC, old_root); @@ -1608,19 +1725,22 @@ int SQL_SELECT::test_quick_select(THD *thd, key_map keys_to_use, /* - Get cost of 'sweep' full row retrieveal of #records rows. + Get cost of 'sweep' full records retrieval. + SYNOPSIS + get_sweep_read_cost() + param Parameter from test_quick_select + records # of records to be retrieved RETURN - 0 - ok - 1 - sweep is more expensive then full table scan. + cost of sweep */ -bool get_sweep_read_cost(const PARAM *param, ha_rows records, double* cost, - double index_reads_cost, double max_cost) +double get_sweep_read_cost(const PARAM *param, ha_rows records) { + double result; if (param->table->file->primary_key_is_clustered()) { - *cost= param->table->file->read_time(param->table->primary_key, - records, records); + result= param->table->file->read_time(param->table->primary_key, + records, records); } else { @@ -1630,8 +1750,8 @@ bool get_sweep_read_cost(const PARAM *param, ha_rows records, double* cost, n_blocks * (1.0 - pow(1.0 - 1.0/n_blocks, rows2double(records))); if (busy_blocks < 1.0) busy_blocks= 1.0; - DBUG_PRINT("info",("sweep: nblocks= %g, busy_blocks=%g index_blocks=%g", - n_blocks, busy_blocks, index_reads_cost)); + DBUG_PRINT("info",("sweep: nblocks=%g, busy_blocks=%g", n_blocks, + busy_blocks)); /* Disabled: Bail out if # of blocks to read is bigger than # of blocks in table data file. @@ -1642,7 +1762,7 @@ bool get_sweep_read_cost(const PARAM *param, ha_rows records, double* cost, if (!join || join->tables == 1) { /* No join, assume reading is done in one 'sweep' */ - *cost= busy_blocks*(DISK_SEEK_BASE_COST + + result= busy_blocks*(DISK_SEEK_BASE_COST + DISK_SEEK_PROP_COST*n_blocks/busy_blocks); } else @@ -1651,11 +1771,11 @@ bool get_sweep_read_cost(const PARAM *param, ha_rows records, double* cost, Possibly this is a join with source table being non-last table, so assume that disk seeks are random here. */ - *cost = busy_blocks; + result= busy_blocks; } } - DBUG_PRINT("info",("returning cost=%g", *cost)); - return 0; + DBUG_PRINT("info",("returning cost=%g", result)); + return result; } @@ -1663,15 +1783,11 @@ bool get_sweep_read_cost(const PARAM *param, ha_rows records, double* cost, Get best plan for a SEL_IMERGE disjunctive expression. SYNOPSIS get_best_disjunct_quick() - param - imerge + param Parameter from check_quick_select function + imerge Expression to use read_time Don't create scans with cost > read_time - RETURN - read plan - NULL - OOM or no read scan could be built. NOTES - index_merge cost is calculated as follows: index_merge_cost = cost(index_reads) + (see #1) @@ -1723,11 +1839,14 @@ bool get_sweep_read_cost(const PARAM *param, ha_rows records, double* cost, ROR-union cost is calculated in the same way index_merge, but instead of Unique a priority queue is used. + RETURN + Created read plan + NULL - Out of memory or no read scan could be built. */ static TABLE_READ_PLAN *get_best_disjunct_quick(PARAM *param, SEL_IMERGE *imerge, - double read_time) + double read_time) { SEL_TREE **ptree; TRP_INDEX_MERGE *imerge_trp= NULL; @@ -1823,12 +1942,11 @@ TABLE_READ_PLAN *get_best_disjunct_quick(PARAM *param, SEL_IMERGE *imerge, } /* Calculate cost(rowid_to_row_scan) */ - if (get_sweep_read_cost(param, non_cpk_scan_records, &sweep_cost, - blocks_in_index_read, read_time)) - goto build_ror_index_merge; - imerge_cost += sweep_cost; + imerge_cost += get_sweep_read_cost(param, non_cpk_scan_records); DBUG_PRINT("info",("index_merge cost with rowid-to-row scan: %g", imerge_cost)); + if (imerge_cost > read_time) + goto build_ror_index_merge; /* Add Unique operations cost */ unique_calc_buff_size= @@ -1923,7 +2041,7 @@ skip_to_ror_scan: /* rows to retrieve= SUM(rows_in_scan_i) - table_rows * PROD(rows_in_scan_i / table_rows). - This is valid because index_merge constructuion guarantees that conditions + This is valid because index_merge construction guarantees that conditions in disjunction do not share key parts. */ roru_total_records -= (ha_rows)(roru_intersect_part* @@ -1937,14 +2055,12 @@ skip_to_ror_scan: See get_merge_buffers_cost function for queue_use_cost formula derivation. */ - if (get_sweep_read_cost(param, roru_total_records, &sweep_cost, - roru_index_costs, read_time)) - DBUG_RETURN(NULL); double roru_total_cost; roru_total_cost= roru_index_costs + rows2double(roru_total_records)*log(n_child_scans) / - (TIME_FOR_COMPARE_ROWID * M_LN2) + - sweep_cost; + (TIME_FOR_COMPARE_ROWID * M_LN2) + + get_sweep_read_cost(param, roru_total_records); + DBUG_PRINT("info", ("ROR-union: cost %g, %d members", roru_total_cost, n_child_scans)); TRP_ROR_UNION* roru; @@ -1965,10 +2081,9 @@ skip_to_ror_scan: /* Calculate cost of 'index only' scan for given index and number of records. - (We can resolve this by only reading through this key.) SYNOPSIS - get_whole_index_read_time() + get_index_only_read_time() param parameters structure records #of records to read keynr key to read @@ -1992,35 +2107,40 @@ inline double get_index_only_read_time(const PARAM* param, ha_rows records, typedef struct st_ror_scan_info { - uint idx; /* # of used key in param->keys */ - uint keynr; /* # of used key in table */ - ha_rows records; - SEL_ARG *sel_arg; - /* Fields used in the query and covered by this ROR scan */ + uint idx; /* # of used key in param->keys */ + uint keynr; /* # of used key in table */ + ha_rows records; /* estimate of # records this scan will return */ + + /* Set of intervals over key fields that will be used for row retrieval. */ + SEL_ARG *sel_arg; + + /* Fields used in the query and covered by this ROR scan. */ MY_BITMAP covered_fields; - uint used_fields_covered; - int key_rec_length; /* length of key record with rowid */ + uint used_fields_covered; /* # of set bits in covered_fields */ + int key_rec_length; /* length of key record (including rowid) */ /* - Array of - #rows(keypart_1=c1 AND ... AND key_part_i=c_i) / - #rows(keypart_1=c1 AND ... AND key_part_{i+1}=c_{i+1}) values - */ - double *key_part_rows; - double index_read_cost; - uint first_uncovered_field; - uint key_components; -}ROR_SCAN_INFO; + Cost of reading all index records with values in sel_arg intervals set + (assuming there is no need to access full table records) + */ + double index_read_cost; + uint first_uncovered_field; /* first unused bit in covered_fields */ + uint key_components; /* # of parts in the key */ +} ROR_SCAN_INFO; /* - Create ROR_SCAN_INFO* structure for condition sel_arg on key idx. + Create ROR_SCAN_INFO* structure with a single ROR scan on index idx using + sel_arg set of intervals. + SYNOPSIS make_ror_scan() - param - idx index of key in param->keys + param Parameter from test_quick_select function + idx Index of key in param->keys + sel_arg Set of intervals for a given key RETURN - NULL - OOM. + NULL - out of memory + ROR scan structure containing a scan for {idx, sel_arg} */ static @@ -2049,10 +2169,6 @@ ROR_SCAN_INFO *make_ror_scan(const PARAM *param, int idx, SEL_ARG *sel_arg) param->fields_bitmap_size*8, false)) DBUG_RETURN(NULL); bitmap_clear_all(&ror_scan->covered_fields); - if (!(ror_scan->key_part_rows= - (double*)alloc_root(param->mem_root, sizeof(double)* - param->table->key_info[keynr].key_parts))) - DBUG_RETURN(NULL); KEY_PART_INFO *key_part= param->table->key_info[keynr].key_part; KEY_PART_INFO *key_part_end= key_part + @@ -2069,50 +2185,23 @@ ROR_SCAN_INFO *make_ror_scan(const PARAM *param, int idx, SEL_ARG *sel_arg) ror_scan->index_read_cost= get_index_only_read_time(param, param->table->quick_rows[ror_scan->keynr], ror_scan->keynr); - /* - Calculate # rows estimates for - (key_part1=c1) - (key_part1=c1 AND key_part2=c2) - ...and so on - */ - char key_val[MAX_KEY_LENGTH+MAX_FIELD_WIDTH]; /* values of current key */ - char *key_ptr= key_val; - ha_rows records; - ha_rows prev_records= param->table->file->records; - double *rows_diff= ror_scan->key_part_rows; - key_part= param->table->key_info[keynr].key_part; - SEL_ARG *arg= ror_scan->sel_arg; - /* - We have #rows estimate already for first key part, so do first loop - iteration separately: - */ - arg->store_min(key_part->length, &key_ptr, 0); - prev_records= param->table->quick_rows[ror_scan->keynr]; - *(rows_diff++)= rows2double(prev_records) / param->table->file->records; - arg=arg->next_key_part; - for(; arg; ++key_part, arg= arg->next_key_part) - { - arg->store_min(key_part->length, &key_ptr, 0); - records= - param->table->file->records_in_range(ror_scan->keynr, - (byte*)key_val, key_ptr - key_val, - HA_READ_KEY_EXACT, - (byte*)key_val, key_ptr - key_val, - HA_READ_AFTER_KEY); - if (records == HA_POS_ERROR) - return NULL; /* shouldn't happen actually */ - *(rows_diff++)= rows2double(records) / rows2double(prev_records); - prev_records= records; - } DBUG_RETURN(ror_scan); } -/* - Order ROR_SCAN_INFO** by - E(#records_matched) * key_record_length +/* + Compare two ROR_SCAN_INFO** by E(#records_matched) * key_record_length. + SYNOPSIS + cmp_ror_scan_info() + a ptr to first compared value + b ptr to second compared value + + RETURN + -1 a < b + 0 a = b + 1 a > b */ -int cmp_ror_scan_info(ROR_SCAN_INFO** a, ROR_SCAN_INFO** b) +static int cmp_ror_scan_info(ROR_SCAN_INFO** a, ROR_SCAN_INFO** b) { double val1= rows2double((*a)->records) * (*a)->key_rec_length; double val2= rows2double((*b)->records) * (*b)->key_rec_length; @@ -2120,12 +2209,22 @@ int cmp_ror_scan_info(ROR_SCAN_INFO** a, ROR_SCAN_INFO** b) } /* - Order ROR_SCAN_INFO** by + Compare two ROR_SCAN_INFO** by (#covered fields in F desc, #components asc, number of first not covered component asc) + + SYNOPSIS + cmp_ror_scan_info_covering() + a ptr to first compared value + b ptr to second compared value + + RETURN + -1 a < b + 0 a = b + 1 a > b */ -int cmp_ror_scan_info_covering(ROR_SCAN_INFO** a, ROR_SCAN_INFO** b) +static int cmp_ror_scan_info_covering(ROR_SCAN_INFO** a, ROR_SCAN_INFO** b) { if ((*a)->used_fields_covered > (*b)->used_fields_covered) return -1; @@ -2160,10 +2259,37 @@ typedef struct */ double records_fract; ha_rows index_records; /* sum(#records to look in indexes) */ -}ROR_INTERSECT_INFO; +} ROR_INTERSECT_INFO; -/* Allocate a ROR_INTERSECT and initialize it to contain zero scans */ +/* + Re-initialize an allocated intersect info to contain zero scans. + SYNOPSIS + info Intersection info structure to re-initialize. +*/ + +static void ror_intersect_reinit(ROR_INTERSECT_INFO *info) +{ + info->is_covering= false; + info->index_scan_costs= 0.0f; + info->records_fract= 1.0f; + bitmap_clear_all(&info->covered_fields); +} + +/* + Allocate a ROR_INTERSECT_INFO and initialize it to contain zero scans. + + SYNOPSIS + ror_intersect_init() + param Parameter from test_quick_select + is_index_only If true, set ROR_INTERSECT_INFO to be covering + + RETURN + allocated structure + NULL on error +*/ + +static ROR_INTERSECT_INFO* ror_intersect_init(const PARAM *param, bool is_index_only) { ROR_INTERSECT_INFO *info; @@ -2172,42 +2298,46 @@ ROR_INTERSECT_INFO* ror_intersect_init(const PARAM *param, bool is_index_only) sizeof(ROR_INTERSECT_INFO)))) return NULL; info->param= param; - info->is_covering= is_index_only; - info->index_scan_costs= 0.0f; - info->records_fract= 1.0f; - if (!(buf= (uchar*)alloc_root(param->mem_root, param->fields_bitmap_size))) return NULL; if (bitmap_init(&info->covered_fields, buf, param->fields_bitmap_size*8, false)) return NULL; - bitmap_clear_all(&info->covered_fields); + ror_intersect_reinit(info); return info; } + /* - Check if it makes sense to add a ROR scan to ROR-intersection, and if yes - update parameters of ROR-intersection, including its cost. + Check if adding a ROR scan to a ROR-intersection reduces its cost of + ROR-intersection and if yes, update parameters of ROR-intersection, + including its cost. - RETURN - true ROR scan added to ROR-intersection, cost updated. - false It doesn't make sense to add this ROR scan to this ROR-intersection. + SYNOPSIS + ror_intersect_add() + param Parameter from test_quick_select + info ROR-intersection structure to add the scan to. + ror_scan ROR scan info to add. + is_cpk_scan If true, add the scan as CPK scan (this can be inferred + from other parameters and is passed separately only to + avoid duplicating the inference code) - NOTE - Adding a ROR scan to ROR-intersect "makes sense" iff selectivt + NOTES + Adding a ROR scan to ROR-intersect "makes sense" iff the cost of ROR- + intersection decreases. The cost of ROR-intersection is caclulated as + follows: - Cost of ROR-intersection is calulated as follows: - cost= SUM_i(key_scan_cost_i) + cost_of_full_rows_retrieval + cost= SUM_i(key_scan_cost_i) + cost_of_full_rows_retrieval if (union of indexes used covers all needed fields) cost_of_full_rows_retrieval= 0; else { cost_of_full_rows_retrieval= - cost_of_sweep_read(E(rows_to_retrive), rows_in_table); + cost_of_sweep_read(E(rows_to_retrieve), rows_in_table); } - E(rows_to_retrive) is calulated as follows: + E(rows_to_retrieve) is caclulated as follows: Suppose we have a condition on several keys cond=k_11=c_11 AND k_12=c_12 AND ... // parts of first key k_21=c_21 AND k_22=c_22 AND ... // parts of second key @@ -2233,13 +2363,13 @@ ROR_INTERSECT_INFO* ror_intersect_init(const PARAM *param, bool is_index_only) save R to be cond and proceed to recursion step. Recursion step: - We have set of fixed fields/satisfied conditions) F, probability P(F), + We have a set of fixed fields/satisfied conditions) F, probability P(F), and remaining conjunction R Pick next key part on current key and its condition "k_ij=c_ij". We will add "k_ij=c_ij" into F and update P(F). - Lets denote k_ij as t, R = t AND R1, where i1 may still contain t. Then + Lets denote k_ij as t, R = t AND R1, where R1 may still contain t. Then - P((t AND R1)|F) = P(t|F) * P(R1|t|F) = P(t|F) * P(R|(t AND F)) (2) + P((t AND R1)|F) = P(t|F) * P(R1|t|F) = P(t|F) * P(R1|(t AND F)) (2) (where '|' mean conditional probability, not "or") @@ -2253,72 +2383,153 @@ ROR_INTERSECT_INFO* ror_intersect_init(const PARAM *param, bool is_index_only) P(t|F) = P(t|(fields_before_t_in_key AND other_fields)) = = P(t|fields_before_t_in_key). - P(t|fields_before_t_in_key)= #distinct_values(fields_before_t_in_key) / - #distinct_values(fields_before_t_in_key, t) + P(t|fields_before_t_in_key) = #records(fields_before_t_in_key) / + #records(fields_before_t_in_key, t) - The second multiplier is calculated by applying this step recusively. + The second multiplier is calculated by applying this step recursively. - This function applies recursion steps for all fixed key members of - one key, accumulating sets of covered fields and - The very first step described is done as recursion step with - P(fixed fields)=1 and empty set of fixed fields. + IMPLEMENTATION + This function calculates the result of application of the "recursion step" + described above for all fixed key members of a single key, accumulating set + of covered fields, selectivity, etc. + + The calculation is conducted as follows: + Lets denote #records(keypart1, ... keypartK) as n_k. We need to calculate + + n_{k1} n_{k_2} + --------- * --------- * .... (3) + n_{k1-1} n_{k2_1} + + where k1,k2,... are key parts which fields were not yet marked as fixed + ( this is result of application of option b) of the recursion step for + parts of a single key). + Since it is reasonable to expect that most of the fields are not marked + as fixed, we calcualate (3) as + + n_{i1} n_{i_2} + (3) = n_{max_key_part} / ( --------- * --------- * .... ) + n_{i1-1} n_{i2_1} + + where i1,i2, .. are key parts that were already marked as fixed. + + In order to minimize number of expensive records_in_range calls we group + and reduce adjacent fractions. + + RETURN + true ROR scan added to ROR-intersection, cost updated. + false It doesn't make sense to add this ROR scan to this ROR-intersection. */ -bool ror_intersect_add(ROR_INTERSECT_INFO *info, ROR_SCAN_INFO* ror_scan) +bool ror_intersect_add(const PARAM *param, ROR_INTERSECT_INFO *info, + ROR_SCAN_INFO* ror_scan, bool is_cpk_scan=false) { int i; SEL_ARG *sel_arg; KEY_PART_INFO *key_part= info->param->table->key_info[ror_scan->keynr].key_part; double selectivity_mult= 1.0; + char key_val[MAX_KEY_LENGTH+MAX_FIELD_WIDTH]; /* key values tuple */ + DBUG_ENTER("ror_intersect_add"); DBUG_PRINT("info", ("Current selectivity= %g", info->records_fract)); DBUG_PRINT("info", ("Adding scan on %s", info->param->table->key_info[ror_scan->keynr].name)); + SEL_ARG *tuple_arg= NULL; + char *key_ptr= key_val; + bool cur_covered, prev_covered= + bitmap_is_set(&info->covered_fields, key_part->fieldnr); + + ha_rows prev_records= param->table->file->records; + for(i= 0, sel_arg= ror_scan->sel_arg; sel_arg; i++, sel_arg= sel_arg->next_key_part) { - if (!bitmap_is_set(&info->covered_fields, (key_part + i)->fieldnr)) + cur_covered= bitmap_is_set(&info->covered_fields, (key_part + i)->fieldnr); + if (cur_covered != prev_covered) { - /* - P(t|F) = P(t|(fields_before_t_in_key AND other_fields)) = - = P(t|fields_before_t_in_key). - */ - selectivity_mult *= ror_scan->key_part_rows[i]; + /* create (part1val, ..., part{n-1}val) tuple. */ + { + if (!tuple_arg) + { + tuple_arg= ror_scan->sel_arg; + tuple_arg->store_min(key_part->length, &key_ptr, 0); + } + while (tuple_arg->next_key_part != sel_arg) + { + tuple_arg= tuple_arg->next_key_part; + tuple_arg->store_min(key_part->length, &key_ptr, 0); + } + } + ha_rows records; + records= param->table->file-> + records_in_range(ror_scan->keynr, + (byte*)key_val, key_ptr - key_val, + HA_READ_KEY_EXACT, + (byte*)key_val, key_ptr - key_val, + HA_READ_AFTER_KEY); + if (cur_covered) + { + /* uncovered -> covered */ + double tmp= rows2double(records)/rows2double(prev_records); + printf(" Selectivity multiplier: %g\n", tmp); + DBUG_PRINT("info", ("Selectivity multiplier: %g", tmp)); + selectivity_mult *= tmp; + prev_records= HA_POS_ERROR; + } + else + { + /* covered -> uncovered */ + prev_records= records; + } } + else printf("no change, skipping\n"); + prev_covered= cur_covered; + } + if (!prev_covered) + { + double tmp= rows2double(param->table->quick_rows[ror_scan->keynr]) / + rows2double(prev_records); + DBUG_PRINT("info", ("Selectivity multiplier: %g", tmp)); + selectivity_mult *= tmp; } + if (selectivity_mult == 1.0) { /* Don't add this scan if it doesn't improve selectivity. */ - DBUG_PRINT("info", ("This scan doesn't improve selectivity.")); + DBUG_PRINT("info", ("The scan doesn't improve selectivity.")); DBUG_RETURN(false); } - info->records_fract *= selectivity_mult; - - bitmap_union(&info->covered_fields, &ror_scan->covered_fields); - - ha_rows scan_records= info->param->table->quick_rows[ror_scan->keynr]; - info->index_scan_costs += ror_scan->index_read_cost; + info->records_fract *= selectivity_mult; + ha_rows cur_scan_records= info->param->table->quick_rows[ror_scan->keynr]; + if (is_cpk_scan) + { + info->index_scan_costs += rows2double(cur_scan_records)* + TIME_FOR_COMPARE_ROWID; + } + else + { + info->index_records += cur_scan_records; + info->index_scan_costs += ror_scan->index_read_cost; + bitmap_union(&info->covered_fields, &ror_scan->covered_fields); + } + if (!info->is_covering && bitmap_is_subset(&info->param->needed_fields, &info->covered_fields)) { DBUG_PRINT("info", ("ROR-intersect is covering now")); - /* ROR-intersect became covering */ info->is_covering= true; } - info->index_records += scan_records; + printf(" records: %g\n", rows2double(param->table->file->records) * + info->records_fract); info->total_cost= info->index_scan_costs; if (!info->is_covering) { ha_rows table_recs= info->param->table->file->records; - double sweep_cost; - get_sweep_read_cost(info->param, - (ha_rows)(table_recs*info->records_fract), - &sweep_cost, info->index_scan_costs, DBL_MAX); - - info->total_cost += sweep_cost; + info->total_cost += + get_sweep_read_cost(info->param, + (ha_rows)(info->records_fract*table_recs)); } DBUG_PRINT("info", ("New selectivity= %g", info->records_fract)); DBUG_PRINT("info", ("New cost= %g, %scovering", info->total_cost, @@ -2326,24 +2537,23 @@ bool ror_intersect_add(ROR_INTERSECT_INFO *info, ROR_SCAN_INFO* ror_scan) DBUG_RETURN(true); } + /* Get best ROR-intersection plan using non-covering ROR-intersection search algorithm. The returned plan may be covering. SYNOPSIS get_best_ror_intersect() - param - tree + param Parameter from test_quick_select function. + tree Transformed restriction condition to be used to look + for ROR scans. force_index_only If true, don't calculate costs of full rows retrieval. read_time Do not return read plans with cost > read_time. are_all_covering [out] set to true if union of all scans covers all fields needed by the query (and it is possible to build a covering ROR-intersection) - RETURN - ROR-intersection table read plan - NULL if OOM or no plan found. - NOTE + NOTES get_key_scans_params must be called before for the same SEL_TREE before this function can be called. @@ -2383,6 +2593,10 @@ bool ror_intersect_add(ROR_INTERSECT_INFO *info, ROR_SCAN_INFO* ror_scan) Clustered PK scan has special handling in ROR-intersection: it is not used to retrieve rows, instead its condition is used to filter row references we get from scans on other keys. + + RETURN + ROR-intersection table read plan + NULL if out of memory or no suitable plan found. */ static @@ -2398,20 +2612,34 @@ TRP_ROR_INTERSECT *get_best_ror_intersect(const PARAM *param, SEL_TREE *tree, if (tree->n_ror_scans < 2) DBUG_RETURN(NULL); - /* Collect ROR-able SEL_ARGs and create ROR_SCAN_INFO for each of them */ + /* + Collect ROR-able SEL_ARGs and create ROR_SCAN_INFO for each of them. + Also find and save clustered PK scan if there is one. + */ ROR_SCAN_INFO **cur_ror_scan; + ROR_SCAN_INFO *cpk_scan= NULL; + bool cpk_scan_used= false; if (!(tree->ror_scans= (ROR_SCAN_INFO**)alloc_root(param->mem_root, sizeof(ROR_SCAN_INFO*)* param->keys))) return NULL; - + uint cpk_no= (param->table->file->primary_key_is_clustered())? + param->table->primary_key : MAX_KEY; + for (idx= 0, cur_ror_scan= tree->ror_scans; idx < param->keys; idx++) { + ROR_SCAN_INFO *scan; if (!tree->ror_scans_map.is_set(idx)) continue; - if (!(*cur_ror_scan= make_ror_scan(param, idx, tree->keys[idx]))) + if (!(scan= make_ror_scan(param, idx, tree->keys[idx]))) return NULL; - cur_ror_scan++; + if (param->real_keynr[idx] == cpk_no) + { + cpk_scan= scan; + tree->n_ror_scans--; + } + else + *(cur_ror_scan++)= scan; } tree->ror_scans_end= cur_ror_scan; @@ -2421,7 +2649,7 @@ TRP_ROR_INTERSECT *get_best_ror_intersect(const PARAM *param, SEL_TREE *tree, /* Ok, [ror_scans, ror_scans_end) is array of ptrs to initialized ROR_SCAN_INFOs. - Get a minimal key scan using an approximate algorithm. + Now, get a minimal key scan using an approximate algorithm. */ qsort(tree->ror_scans, tree->n_ror_scans, sizeof(ROR_SCAN_INFO*), (qsort_cmp)cmp_ror_scan_info); @@ -2452,12 +2680,14 @@ TRP_ROR_INTERSECT *get_best_ror_intersect(const PARAM *param, SEL_TREE *tree, LINT_INIT(best_index_scan_costs); cur_ror_scan= tree->ror_scans; + /* Start with one scan */ + intersect_scans_best= intersect_scans; while (cur_ror_scan != tree->ror_scans_end && !intersect->is_covering) { /* S= S + first(R); */ - if (ror_intersect_add(intersect, *cur_ror_scan)) + if (ror_intersect_add(param, intersect, *cur_ror_scan)) *(intersect_scans_end++)= *cur_ror_scan; - /* R= R-first(R); */ + /* R= R - first(R); */ cur_ror_scan++; if (intersect->total_cost < min_cost) @@ -2471,17 +2701,47 @@ TRP_ROR_INTERSECT *get_best_ror_intersect(const PARAM *param, SEL_TREE *tree, best_index_scan_costs= intersect->index_scan_costs; } } - - /* Ok, return ROR-intersect plan if we have found one */ + + DBUG_EXECUTE("info",print_ror_scans_arr(param->table, + "best ROR-intersection", + intersect_scans, + intersect_scans_best);); + *are_all_covering= intersect->is_covering; uint best_num= intersect_scans_best - intersect_scans; + /* + Ok, found the best ROR-intersection of non-CPK key scans. + Check if we should add a CPK scan. + + If the obtained ROR-intersection is covering, it doesn't make sense + to add CPK scan - Clustered PK contains all fields and if we're doing + CPK scan doing other CPK scans will only add more overhead. + */ + if (cpk_scan && !intersect->is_covering) + { + /* + Handle the special case: ROR-intersect(PRIMARY, key1) is + the best, but cost(range(key1)) > cost(best_non_ror_range_scan) + */ + if (best_num == 0) + { + ror_intersect_reinit(intersect); + if (!ror_intersect_add(param, intersect, cpk_scan)) + DBUG_RETURN(NULL); /* shouldn't happen actually actually */ + *(intersect_scans_end++)= *cur_ror_scan; + best_num++; + } + + if (ror_intersect_add(param, intersect, cpk_scan)) + cpk_scan_used= true; + best_rows= (ha_rows)(intersect->records_fract* + rows2double(param->table->file->records)); + } + + /* Ok, return ROR-intersect plan if we have found one */ TRP_ROR_INTERSECT *trp= NULL; - if (intersect_scans_best && best_num > 1) + if (best_num > 1 || cpk_scan_used) { - DBUG_EXECUTE("info",print_ror_scans_arr(param->table, - "used for ROR-intersect", - intersect_scans, - intersect_scans_best);); if (!(trp= new (param->mem_root) TRP_ROR_INTERSECT)) DBUG_RETURN(trp); if (!(trp->first_scan= @@ -2494,7 +2754,8 @@ TRP_ROR_INTERSECT *get_best_ror_intersect(const PARAM *param, SEL_TREE *tree, trp->read_cost= min_cost; trp->records= best_rows? best_rows : 1; trp->index_scan_costs= best_index_scan_costs; - } + trp->cpk_scan= cpk_scan; + } DBUG_RETURN(trp); } @@ -2503,15 +2764,15 @@ TRP_ROR_INTERSECT *get_best_ror_intersect(const PARAM *param, SEL_TREE *tree, Get best covering ROR-intersection. SYNOPSIS get_best_covering_ror_intersect() - param - tree SEL_TREE - read_time Dont return table read plans with cost > read_time. + param Parameter from test_quick_select function. + tree SEL_TREE with sets of intervals for different keys. + read_time Don't return table read plans with cost > read_time. RETURN Best covering ROR-intersection plan NULL if no plan found. - NOTE + NOTES get_best_ror_intersect must be called for a tree before calling this function for it. This function invalidates tree->ror_scans member values. @@ -2563,7 +2824,6 @@ TRP_ROR_INTERSECT *get_best_covering_ror_intersect(PARAM *param, ha_rows records=0; bool all_covered; - /* Start will all scans and remove one by one until */ DBUG_PRINT("info", ("Building covering ROR-intersection")); DBUG_EXECUTE("info", print_ror_scans_arr(param->table, "building covering ROR-I", @@ -2805,12 +3065,23 @@ QUICK_SELECT_I *TRP_ROR_INTERSECT::make_quick(PARAM *param, DBUG_RETURN(NULL); } } + if (cpk_scan) + { + if (!(quick= get_quick_select(param, cpk_scan->idx, + cpk_scan->sel_arg, alloc))) + { + delete quick_intrsect; + DBUG_RETURN(NULL); + } + quick_intrsect->cpk_quick= quick; + } quick_intrsect->records= records; quick_intrsect->read_time= read_cost; } DBUG_RETURN(quick_intrsect); } + QUICK_SELECT_I *TRP_ROR_UNION::make_quick(PARAM *param, bool retrieve_full_rows, MEM_ROOT *parent_alloc) @@ -2820,8 +3091,8 @@ QUICK_SELECT_I *TRP_ROR_UNION::make_quick(PARAM *param, QUICK_SELECT_I *quick; DBUG_ENTER("TRP_ROR_UNION::make_quick"); /* - It is currently impossible to construct a ROR-union that will - not retrieve full rows, ingore retrieve_full_rows. + It is impossible to construct a ROR-union that will not retrieve full + rows, ignore retrieve_full_rows parameter. */ if ((quick_roru= new QUICK_ROR_UNION_SELECT(param->thd, param->table))) { @@ -4248,7 +4519,7 @@ SEL_ARG *rb_delete_fixup(SEL_ARG *root,SEL_ARG *key,SEL_ARG *par) } - /* Test that the proporties for a red-black tree holds */ + /* Test that the properties for a red-black tree hold */ #ifdef EXTRA_DEBUG int test_rb_tree(SEL_ARG *element,SEL_ARG *parent) @@ -4336,13 +4607,26 @@ void SEL_ARG::test_use_count(SEL_ARG *root) #endif +/* + Calculate estimate of number records that will be retrieved by a range + scan on given index using given SEL_ARG intervals tree. + SYNOPSIS + check_quick_select + param Parameter from test_quick_select + idx Number of index to use in PARAM::key SEL_TREE::key + tree Transformed selection condition, tree->key[idx] holds intervals + tree to be used for scanning. + NOTES + param->is_ror_scan is set to reflect if the key scan is a ROR (see + is_key_scan_ror function for more info) + param->table->quick_*, param->range_count (and maybe others) are + updated with data of given key scan, see check_quick_keys for details. + + RETURN + Estimate # of records to be retrieved. + HA_POS_ERROR if estimate calculation failed due to table handler problems. -/***************************************************************************** -** Check how many records we will find by using the found tree -** NOTE -** param->table->quick_* and param->range_count (and maybe others) are -** updated with data of given key scan. -*****************************************************************************/ +*/ static ha_rows check_quick_select(PARAM *param,uint idx,SEL_ARG *tree) @@ -4396,30 +4680,40 @@ check_quick_select(PARAM *param,uint idx,SEL_ARG *tree) } -/* +/* + Recursively calculate estimate of # rows that will be retrieved by + key scan on key idx. SYNOPSIS check_quick_keys() - param - idx key to use, its number in list of used keys (that is, - param->real_keynr[idx] holds the key number in table) - - key_tree SEL_ARG tree which cost is calculated. - min_key buffer with min key value tuple - min_key_flag - max_key buffer with max key value tuple + param Parameter from test_quick select function. + idx Number of key to use in PARAM::keys in list of used keys + (param->real_keynr[idx] holds the key number in table) + key_tree SEL_ARG tree being examined. + min_key Buffer with partial min key value tuple + min_key_flag + max_key Buffer with partial max key value tuple max_key_flag - NOTE - The function does the recursive descent on the tree via left, right, and - next_key_part edges. The #rows estimates are calculated at the leaf nodes. + NOTES + The function does the recursive descent on the tree via SEL_ARG::left, + SEL_ARG::right, and SEL_ARG::next_key_part edges. The #rows estimates + are calculated using records_in_range calls at the leaf nodes and then + summed. - param->min_key and param->max_key are used to hold key segment values. + param->min_key and param->max_key are used to hold prefixes of key value + tuples. The side effects are: + param->max_key_part is updated to hold the maximum number of key parts used in scan minus 1. - param->range_count is updated. - param->is_ror_scan is updated. + + param->range_count is incremented if the function finds a range that + wasn't counted by the caller. + + param->is_ror_scan is cleared if the function detects that the key scan is + not a Rowid-Ordered Retrieval scan ( see comments for is_key_scan_ror + function for description of which key scans are ROR scans) */ static ha_rows @@ -4432,6 +4726,12 @@ check_quick_keys(PARAM *param,uint idx,SEL_ARG *key_tree, param->max_key_part=max(param->max_key_part,key_tree->part); if (key_tree->left != &null_element) { + /* + There are at least two intervals for current key part, i.e. condition + was converted to something like + (keyXpartY less/equals c1) OR (keyXpartY more/equals c2). + This is not a ROR scan if the key is not Clustered Primary Key. + */ param->is_ror_scan= false; records=check_quick_keys(param,idx,key_tree->left,min_key,min_key_flag, max_key,max_key_flag); @@ -4441,12 +4741,25 @@ check_quick_keys(PARAM *param,uint idx,SEL_ARG *key_tree, uint tmp_min_flag,tmp_max_flag,keynr; char *tmp_min_key=min_key,*tmp_max_key=max_key; - + key_tree->store(param->key[idx][key_tree->part].part_length, &tmp_min_key,min_key_flag,&tmp_max_key,max_key_flag); uint min_key_length= (uint) (tmp_min_key- param->min_key); uint max_key_length= (uint) (tmp_max_key- param->max_key); + if (param->is_ror_scan) + { + /* + If the index doesn't cover entire key, mark the scan as non-ROR scan. + (TODO sergeyp: investigate if we could do better here) + */ + uint16 fieldnr= param->table->key_info[param->real_keynr[idx]]. + key_part[key_tree->part].fieldnr - 1; + if (param->table->field[fieldnr]->key_length() != + param->key[idx][key_tree->part].part_length) + param->is_ror_scan= false; + } + if (key_tree->next_key_part && key_tree->next_key_part->part == key_tree->part+1 && key_tree->next_key_part->type == SEL_ARG::KEY_RANGE) @@ -4461,7 +4774,10 @@ check_quick_keys(PARAM *param,uint idx,SEL_ARG *key_tree, goto end; // Ugly, but efficient } else + { + /* The interval for current key part is not c1 <= keyXpartY <= c1 */ param->is_ror_scan= false; + } tmp_min_flag=key_tree->min_flag; tmp_max_flag=key_tree->max_flag; @@ -4493,6 +4809,15 @@ check_quick_keys(PARAM *param,uint idx,SEL_ARG *key_tree, { if (param->is_ror_scan) { + /* + If we get here, the condition on the key was converted to form + "(keyXpart1 = c1) AND ... AND (keyXpart{key_tree->part - 1} = cN) AND + somecond(keyXpart{key_tree->part})" + Check if + somecond is "keyXpart{key_tree->part} = const" and + uncovered "tail" of KeyX parts is either empty or is identical to + first members of clustered primary key. + */ if (!(min_key_length == max_key_length && !memcmp(min_key,max_key, (uint) (tmp_max_key - max_key)) && !key_tree->min_flag && !key_tree->max_flag && @@ -4530,6 +4855,12 @@ check_quick_keys(PARAM *param,uint idx,SEL_ARG *key_tree, records+=tmp; if (key_tree->right != &null_element) { + /* + There are at least two intervals for current key part, i.e. condition + was converted to something like + (keyXpartY less/equals c1) OR (keyXpartY more/equals c2). + This is not a ROR scan if the key is not Clustered Primary Key. + */ param->is_ror_scan= false; tmp=check_quick_keys(param,idx,key_tree->right,min_key,min_key_flag, max_key,max_key_flag); @@ -4540,11 +4871,46 @@ check_quick_keys(PARAM *param,uint idx,SEL_ARG *key_tree, return records; } + /* - Check if key scan on key keynr with first nparts key parts fixed is a - ROR scan. This function doesn't handle clustered PK scans or HASH index - scans. + Check if key scan on given index with equality conditions on first n key + parts is a ROR scan. + + SYNOPSIS + is_key_scan_ror() + param Parameter from test_quick_select + keynr Number of key in the table. The key must not be a clustered + primary key. + nparts Number of first key parts for which equality conditions + are present. + + NOTES + ROR (Rowid Ordered Retrieval) key scan is a key scan that produces + ordered sequence of rowids (ha_xxx::cmp_ref is the comparison function) + + An index scan is a ROR scan if it is done using a condition in form + + "key1_1=c_1 AND ... AND key1_n=c_n" (1) + + where the index is defined on (key1_1, ..., key1_N [,a_1, ..., a_n]) + + and the table has a clustered Primary Key + + PRIMARY KEY(a_1, ..., a_n, b1, ..., b_k) with first key parts being + identical to uncovered parts ot the key being scanned (2) + + Scans on HASH indexes are not ROR scans, + any range scan on clustered primary key is ROR scan (3) + + Check (1) is made in check_quick_keys() + Check (3) is made check_quick_select() + Check (2) is made by this function. + + RETURN + true If the scan is ROR-scan + false otherwise */ + static bool is_key_scan_ror(PARAM *param, uint keynr, uint8 nparts) { KEY *table_key= param->table->key_info + keynr; @@ -4564,7 +4930,8 @@ static bool is_key_scan_ror(PARAM *param, uint keynr, uint8 nparts) for(;(key_part!=key_part_end) && (pk_part != pk_part_end); ++key_part, ++pk_part) { - if (key_part->field != pk_part->field) + if ((key_part->field != pk_part->field) || + (key_part->length != pk_part->length)) return false; } return (key_part == key_part_end); @@ -4573,17 +4940,26 @@ static bool is_key_scan_ror(PARAM *param, uint keynr, uint8 nparts) /* Create a QUICK_RANGE_SELECT from given key and SEL_ARG tree for that key. - This uses it's own malloc tree. + SYNOPSIS get_quick_select() param - idx index of used key in param->key. + idx Index of used key in param->key. key_tree SEL_ARG tree for the used key - parent_alloc if not NULL, use it to allocate memory for + parent_alloc If not NULL, use it to allocate memory for quick select data. Otherwise use quick->alloc. + + RETURN + NULL on error + otherwise created quick select + + NOTES + The caller must call QUICK_SELCT::init for returned quick select - The caller should call QUICK_SELCT::init for returned quick select + CAUTION! This function may change THR_MALLOC to a MEM_ROOT which will be + deallocated when the returned quick select is deleted. */ + QUICK_RANGE_SELECT * get_quick_select(PARAM *param,uint idx,SEL_ARG *key_tree, MEM_ROOT *parent_alloc) @@ -5008,15 +5384,23 @@ int QUICK_INDEX_MERGE_SELECT::get_next() /* + Retrieve next record. + SYNOPSIS + QUICK_ROR_INTERSECT_SELECT::get_next() + NOTES - Invariant on enter/exit: all intersected selects have retrieved index - records with rowid <= some_rowid_val and no intersected select has + Invariant on enter/exit: all intersected selects have retrieved all index + records with rowid <= some_rowid_val and no intersected select has retrieved any index records with rowid > some_rowid_val. We start fresh and loop until we have retrieved the same rowid in each of the key scans or we got an error. If a Clustered PK scan is present, it is used only to check if row - satisfies its conditions (and never used for row retrieval). + satisfies its condition (and never used for row retrieval). + + RETURN + 0 - Ok + other - Error code if any error occurred. */ int QUICK_ROR_INTERSECT_SELECT::get_next() @@ -5090,10 +5474,18 @@ int QUICK_ROR_INTERSECT_SELECT::get_next() /* + Retrieve next record. + SYNOPSIS + QUICK_ROR_UNION_SELECT::get_next() + NOTES Enter/exit invariant: For each quick select in the queue a {key,rowid} tuple has been retrieved but the corresponding row hasn't been passed to output. + + RETURN + 0 - Ok + other - Error code if any error occurred. */ int QUICK_ROR_UNION_SELECT::get_next() @@ -5107,7 +5499,7 @@ int QUICK_ROR_UNION_SELECT::get_next() { if (!queue.elements) DBUG_RETURN(HA_ERR_END_OF_FILE); - /* Ok, we have a queue with > 1 scans */ + /* Ok, we have a queue with >= 1 scans */ quick= (QUICK_SELECT_I*)queue_top(&queue); memcpy(cur_rowid, quick->last_rowid, rowid_length); @@ -5548,8 +5940,78 @@ bool QUICK_SELECT_DESC::test_if_null_range(QUICK_RANGE *range_arg, #endif -void QUICK_RANGE_SELECT::fill_keys_and_lengths(String *key_names, - String *used_lengths) +void QUICK_RANGE_SELECT::add_info_string(String *str) +{ + KEY *key_info= head->key_info + index; + str->append(key_info->name); +} + +void QUICK_INDEX_MERGE_SELECT::add_info_string(String *str) +{ + QUICK_RANGE_SELECT *quick; + bool first= true; + List_iterator_fast it(quick_selects); + str->append("sort_union("); + while ((quick= it++)) + { + if (!first) + str->append(','); + else + first= false; + quick->add_info_string(str); + } + if (pk_quick_select) + { + str->append(','); + pk_quick_select->add_info_string(str); + } + str->append(')'); +} + +void QUICK_ROR_INTERSECT_SELECT::add_info_string(String *str) +{ + bool first= true; + QUICK_RANGE_SELECT *quick; + List_iterator_fast it(quick_selects); + str->append("intersect("); + while ((quick= it++)) + { + KEY *key_info= head->key_info + quick->index; + if (!first) + str->append(','); + else + first= false; + str->append(key_info->name); + } + if (cpk_quick) + { + KEY *key_info= head->key_info + cpk_quick->index; + str->append(','); + str->append(key_info->name); + } + str->append(')'); +} + +void QUICK_ROR_UNION_SELECT::add_info_string(String *str) +{ + bool first= true; + QUICK_SELECT_I *quick; + List_iterator_fast it(quick_selects); + str->append("union("); + while ((quick= it++)) + { + if (!first) + str->append(','); + else + first= false; + quick->add_info_string(str); + } + str->append(')'); +} + + +void QUICK_RANGE_SELECT::add_keys_and_lengths(String *key_names, + String *used_lengths) { char buf[64]; uint length; @@ -5559,22 +6021,27 @@ void QUICK_RANGE_SELECT::fill_keys_and_lengths(String *key_names, used_lengths->append(buf, length); } -void QUICK_INDEX_MERGE_SELECT::fill_keys_and_lengths(String *key_names, - String *used_lengths) +void QUICK_INDEX_MERGE_SELECT::add_keys_and_lengths(String *key_names, + String *used_lengths) { char buf[64]; uint length; + bool first= true; QUICK_RANGE_SELECT *quick; + List_iterator_fast it(quick_selects); while ((quick= it++)) { - KEY *key_info= head->key_info + quick->index; - if (key_names->length()) + if (first) + first= false; + else + { key_names->append(','); - key_names->append(key_info->name); - - if (used_lengths->length()) used_lengths->append(','); + } + + KEY *key_info= head->key_info + quick->index; + key_names->append(key_info->name); length= longlong2str(quick->max_used_key_length, buf, 10) - buf; used_lengths->append(buf, length); } @@ -5589,8 +6056,8 @@ void QUICK_INDEX_MERGE_SELECT::fill_keys_and_lengths(String *key_names, } } -void QUICK_ROR_INTERSECT_SELECT::fill_keys_and_lengths(String *key_names, - String *used_lengths) +void QUICK_ROR_INTERSECT_SELECT::add_keys_and_lengths(String *key_names, + String *used_lengths) { char buf[64]; uint length; @@ -5600,17 +6067,18 @@ void QUICK_ROR_INTERSECT_SELECT::fill_keys_and_lengths(String *key_names, while ((quick= it++)) { KEY *key_info= head->key_info + quick->index; - if (!first) - key_names->append(','); - key_names->append(key_info->name); - if (first) first= false; else + { + key_names->append(','); used_lengths->append(','); + } + key_names->append(key_info->name); length= longlong2str(quick->max_used_key_length, buf, 10) - buf; used_lengths->append(buf, length); } + if (cpk_quick) { KEY *key_info= head->key_info + cpk_quick->index; @@ -5622,8 +6090,8 @@ void QUICK_ROR_INTERSECT_SELECT::fill_keys_and_lengths(String *key_names, } } -void QUICK_ROR_UNION_SELECT::fill_keys_and_lengths(String *key_names, - String *used_lengths) +void QUICK_ROR_UNION_SELECT::add_keys_and_lengths(String *key_names, + String *used_lengths) { bool first= true; QUICK_SELECT_I *quick; @@ -5637,7 +6105,7 @@ void QUICK_ROR_UNION_SELECT::fill_keys_and_lengths(String *key_names, used_lengths->append(','); key_names->append(','); } - quick->fill_keys_and_lengths(key_names, used_lengths); + quick->add_keys_and_lengths(key_names, used_lengths); } } @@ -5850,7 +6318,7 @@ void QUICK_ROR_UNION_SELECT::dbug_dump(int indent, bool verbose) #endif /***************************************************************************** -** Instansiate templates +** Instantiate templates *****************************************************************************/ #ifdef __GNUC__ diff --git a/sql/opt_range.h b/sql/opt_range.h index 1b522421a01..d992bbb2456 100644 --- a/sql/opt_range.h +++ b/sql/opt_range.h @@ -77,27 +77,59 @@ public: ha_rows records; /* estimate of # of records to be retrieved */ double read_time; /* time to perform this retrieval */ TABLE *head; - /* - index this quick select uses, or MAX_KEY for quick selects + Index this quick select uses, or MAX_KEY for quick selects that use several indexes */ uint index; - /* applicable iff index!= MAX_KEY */ - uint max_used_key_length, used_key_parts; + /* + Total length of first used_key_parts parts of the key. + Applicable if index!= MAX_KEY. + */ + uint max_used_key_length; + + /* + Max. number of (first) key parts this quick select uses for retrieval. + eg. for "(key1p1=c1 AND key1p2=c2) OR key1p1=c2" used_key_parts == 2. + Applicable if index!= MAX_KEY. + */ + uint used_key_parts; QUICK_SELECT_I(); virtual ~QUICK_SELECT_I(){}; - /* - Call init() immediately after creation of quick select. if init() call - fails, reset() or get_next() must not be called. + + /* + Do post-constructor initialization. + SYNOPSIS + init() + + init() performs initializations that should have been in constructor if + it was possible to return errors from constructors. The join optimizer may + create and then delete quick selects without retrieving any rows so init() + must not contain any IO or CPU intensive code. + + If init() call fails the only valid action is to delete this quick select, + reset() and get_next() must not be called. + + RETURN + 0 OK + other Error code */ virtual int init() = 0; /* - Call reset() before first get_next call. get_next must not be called if - reset() call fails. + Initialize quick select for row retrieval. + SYNOPSIS + reset() + + reset() should be called when it is certain that row retrieval will be + necessary. This call may do heavyweight initialization like buffering first + N records etc. If reset() call fails get_next() must not be called. + + RETURN + 0 OK + other Error code */ virtual int reset(void) = 0; virtual int get_next() = 0; /* get next record to retrieve */ @@ -117,30 +149,54 @@ public: virtual int get_type() = 0; /* - Initialize this quick select as a child of a index union or intersection - scan. This call replaces init() call. + Initialize this quick select as a merged scan inside a ROR-union or a ROR- + intersection scan. The caller must not additionally call init() if this + function is called. + SYNOPSIS + init_ror_merged_scan() + reuse_handler quick select may use (q: psergey??) + (q: is this natural that we do it this way) + NOTES + psergey? */ - virtual int init_ror_child_scan(bool reuse_handler) + virtual int init_ror_merged_scan(bool reuse_handler) { DBUG_ASSERT(0); return 1; } - virtual void cleanup_ror_child_scan() { DBUG_ASSERT(0); } + /* Save ROWID of last retrieved row in file->ref. (psergey: or table->ref?) */ virtual void save_last_pos(){}; /* - Fill key_names with list of keys this quick select used; - fill used_lenghth with correponding used lengths. + Append comma-separated list of keys this quick select uses to key_names; + append comma-separated list of corresponding used lengths to used_lengths. This is used by select_describe. */ - virtual void fill_keys_and_lengths(String *key_names, - String *used_lengths)=0; - + virtual void add_keys_and_lengths(String *key_names, + String *used_lengths)=0; + + /* + Append text representation of quick select structure (what and how is + merged) to str. The result is added to "Extra" field in EXPLAIN output. + This function is implemented only by quick selects that merge other quick + selects output and/or can produce output suitable for merging. + */ + virtual void add_info_string(String *str) {}; + /* + Return 1 if any index used by this quick select + a) uses field that is listed in passed field list or + b) is automatically updated (like a timestamp) + */ virtual bool check_if_keys_used(List *fields); /* - rowid of last row retrieved by this quick select. This is used only - when doing ROR-index_merge selects + rowid of last row retrieved by this quick select. This is used only when + doing ROR-index_merge selects */ byte *last_rowid; + + /* + Table record buffer used by this quick select. + Currently this is always the same as head->record[0]. psergey: check that! + */ byte *record; #ifndef DBUG_OFF /* @@ -155,6 +211,10 @@ public: struct st_qsel_param; class SEL_ARG; +/* + Quick select that does a range scan on a single key. The records are + returned in key order. +*/ class QUICK_RANGE_SELECT : public QUICK_SELECT_I { protected: @@ -163,7 +223,11 @@ public: int error; protected: handler *file; - bool free_file; /* if true, this quick select "owns" file and will free it */ + /* + If true, this quick select has its "own" handler object which should be + closed no later then this quick select is deleted. + */ + bool free_file; protected: friend @@ -207,15 +271,16 @@ public: int get_next(); bool reverse_sorted() { return 0; } bool unique_key_range(); - int init_ror_child_scan(bool reuse_handler); + int init_ror_merged_scan(bool reuse_handler); void save_last_pos() { file->position(record); }; int get_type() { return QS_TYPE_RANGE; } - void fill_keys_and_lengths(String *key_names, String *used_lengths); + void add_keys_and_lengths(String *key_names, String *used_lengths); + void add_info_string(String *str); #ifndef DBUG_OFF - virtual void dbug_dump(int indent, bool verbose); + void dbug_dump(int indent, bool verbose); #endif }; @@ -291,10 +356,11 @@ public: bool reverse_sorted() { return false; } bool unique_key_range() { return false; } int get_type() { return QS_TYPE_INDEX_MERGE; } - void fill_keys_and_lengths(String *key_names, String *used_lengths); + void add_keys_and_lengths(String *key_names, String *used_lengths); + void add_info_string(String *str); bool check_if_keys_used(List *fields); #ifndef DBUG_OFF - virtual void dbug_dump(int indent, bool verbose); + void dbug_dump(int indent, bool verbose); #endif bool push_quick_back(QUICK_RANGE_SELECT *quick_sel_range); @@ -317,7 +383,6 @@ public: THD *thd; int prepare_unique(); - bool reset_called; /* used to get rows collected in Unique */ READ_RECORD read_record; @@ -326,9 +391,22 @@ public: /* Rowid-Ordered Retrieval (ROR) index intersection quick select. - This quick select produces an intersection of records returned by several - QUICK_RANGE_SELECTs that return data ordered by rowid. + This quick select produces intersection of row sequences returned + by several QUICK_RANGE_SELECTs it "merges". + + All merged QUICK_RANGE_SELECTs must return rowids in rowid order. + QUICK_ROR_INTERSECT_SELECT will return rows in rowid order, too. + + All merged quick selects retrieve {rowid, covered_fields} tuples (not full + table records). + QUICK_ROR_INTERSECT_SELECT retrieves full records if it is not being used + by QUICK_ROR_INTERSECT_SELECT and all merged quick selects together don't + cover needed all fields. + + If one of the merged quick selects is a Clustered PK range scan, it is + used only to filter rowid sequence produced by other merged quick selects. */ + class QUICK_ROR_INTERSECT_SELECT : public QUICK_SELECT_I { public: @@ -343,28 +421,46 @@ public: bool reverse_sorted() { return false; } bool unique_key_range() { return false; } int get_type() { return QS_TYPE_ROR_INTERSECT; } - void fill_keys_and_lengths(String *key_names, String *used_lengths); + void add_keys_and_lengths(String *key_names, String *used_lengths); + void add_info_string(String *str); bool check_if_keys_used(List *fields); #ifndef DBUG_OFF - virtual void dbug_dump(int indent, bool verbose); + void dbug_dump(int indent, bool verbose); #endif - int init_ror_child_scan(bool reuse_handler); + int init_ror_merged_scan(bool reuse_handler); bool push_quick_back(QUICK_RANGE_SELECT *quick_sel_range); - /* range quick selects this intersection consists of */ + /* + Range quick selects this intersection consists of, not including + cpk_quick. + */ List quick_selects; + /* + Merged quick select that uses Clustered PK, if there is one. This quick + select is not used for row retrieval, it is used for row retrieval. + */ QUICK_RANGE_SELECT *cpk_quick; - MEM_ROOT alloc; - THD *thd; - bool reset_called; - bool need_to_fetch_row; + + MEM_ROOT alloc; /* Memory pool for this and merged quick selects data. */ + THD *thd; /* current thread */ + bool need_to_fetch_row; /* if true, do retrieve full table records. */ }; + /* Rowid-Ordered Retrieval index union select. + This quick select produces union of row sequences returned by several + quick select it "merges". + All merged quick selects must return rowids in rowid order. + QUICK_ROR_UNION_SELECT will return rows in rowid order, too. + + All merged quick selects are set not to retrieve full table records. + ROR-union quick select always retrieves full records. + */ + class QUICK_ROR_UNION_SELECT : public QUICK_SELECT_I { public: @@ -377,33 +473,30 @@ public: bool reverse_sorted() { return false; } bool unique_key_range() { return false; } int get_type() { return QS_TYPE_ROR_UNION; } - void fill_keys_and_lengths(String *key_names, String *used_lengths); + void add_keys_and_lengths(String *key_names, String *used_lengths); + void add_info_string(String *str); bool check_if_keys_used(List *fields); #ifndef DBUG_OFF - virtual void dbug_dump(int indent, bool verbose); + void dbug_dump(int indent, bool verbose); #endif bool push_quick_back(QUICK_SELECT_I *quick_sel_range); - /* range quick selects this index_merge read consists of */ - List quick_selects; - - QUEUE queue; - MEM_ROOT alloc; - - THD *thd; - byte *cur_rowid; - byte *prev_rowid; - uint rowid_length; - bool reset_called; - bool have_prev_rowid; + List quick_selects; /* Merged quick selects */ + QUEUE queue; /* Priority queue for merge operation */ + MEM_ROOT alloc; /* Memory pool for this and merged quick selects data. */ + + THD *thd; /* current thread */ + byte *cur_rowid; /* buffer used in get_next() */ + byte *prev_rowid; /* rowid of last row returned by get_next() */ + bool have_prev_rowid; /* true if prev_rowid has valid data */ + uint rowid_length; /* table rowid length */ private: static int queue_cmp(void *arg, byte *val1, byte *val2); }; - class QUICK_SELECT_DESC: public QUICK_RANGE_SELECT { public: diff --git a/sql/sql_select.cc b/sql/sql_select.cc index 037aedd5f61..27015dbd04d 100644 --- a/sql/sql_select.cc +++ b/sql/sql_select.cc @@ -10077,7 +10077,7 @@ static void select_describe(JOIN *join, bool need_tmp_table, bool need_order, select_result *result=join->result; Item *item_null= new Item_null(); CHARSET_INFO *cs= &my_charset_latin1; - int quick_type= -1; + int quick_type; DBUG_ENTER("select_describe"); DBUG_PRINT("info", ("Select 0x%lx, type %s, message %s", (ulong)join->select_lex, join->select_lex->type, @@ -10104,17 +10104,20 @@ static void select_describe(JOIN *join, bool need_tmp_table, bool need_order, { JOIN_TAB *tab=join->join_tab+i; TABLE *table=tab->table; - char buff[512],*buff_ptr=buff; + char buff[512]; char buff1[512], buff2[512], buff3[512]; char keylen_str_buf[64]; char derived_name[64]; + String extra(buff, sizeof(buff),cs); String tmp1(buff1,sizeof(buff1),cs); String tmp2(buff2,sizeof(buff2),cs); String tmp3(buff3,sizeof(buff3),cs); + extra.length(0); tmp1.length(0); tmp2.length(0); tmp3.length(0); + quick_type= -1; item_list.empty(); item_list.push_back(new Item_int((int32) join->select_lex->select_number)); @@ -10165,7 +10168,7 @@ static void select_describe(JOIN *join, bool need_tmp_table, bool need_order, else item_list.push_back(item_null); - /* Build key,key_len, and ref values and add them to item_list */ + /* Build "key", "key_len", and "ref" values and add them to item_list */ if (tab->ref.key_parts) { KEY *key_info=table->key_info+ tab->ref.key; @@ -10200,7 +10203,7 @@ static void select_describe(JOIN *join, bool need_tmp_table, bool need_order, } else if (tab->select && tab->select->quick) { - tab->select->quick->fill_keys_and_lengths(&tmp2, &tmp3); + tab->select->quick->add_keys_and_lengths(&tmp2, &tmp3); item_list.push_back(new Item_string(tmp2.ptr(),tmp2.length(),cs)); item_list.push_back(new Item_string(tmp3.ptr(),tmp3.length(),cs)); item_list.push_back(item_null); @@ -10211,9 +10214,11 @@ static void select_describe(JOIN *join, bool need_tmp_table, bool need_order, item_list.push_back(item_null); item_list.push_back(item_null); } + /* Add "rows" field to item_list. */ item_list.push_back(new Item_int((longlong) (ulonglong) join->best_positions[i]. records_read, 21)); + /* Build "Extra" field and add it to item_list. */ my_bool key_read=table->key_read; if (tab->type == JT_NEXT && table->used_keys.is_set(tab->index)) key_read=1; @@ -10225,38 +10230,51 @@ static void select_describe(JOIN *join, bool need_tmp_table, bool need_order, item_list.push_back(new Item_string(tab->info,strlen(tab->info),cs)); else { + if (quick_type == QUICK_SELECT_I::QS_TYPE_ROR_UNION || + quick_type == QUICK_SELECT_I::QS_TYPE_ROR_INTERSECT || + quick_type == QUICK_SELECT_I::QS_TYPE_INDEX_MERGE) + { + extra.append("; Using "); + tab->select->quick->add_info_string(&extra); + } if (tab->select) { if (tab->use_quick == 2) { char buf[MAX_KEY/8+1]; - sprintf(buff_ptr,"; Range checked for each record (index map: 0x%s)", - tab->keys.print(buf)); - buff_ptr=strend(buff_ptr); + extra.append("; Range checked for each record (index map: 0x"); + extra.append(tab->keys.print(buf)); + extra.append(')'); } else - buff_ptr=strmov(buff_ptr,"; Using where"); + extra.append("; Using where"); } if (key_read) - buff_ptr= strmov(buff_ptr,"; Using index"); + extra.append("; Using index"); if (table->reginfo.not_exists_optimize) - buff_ptr= strmov(buff_ptr,"; Not exists"); + extra.append("; Not exists"); if (need_tmp_table) { need_tmp_table=0; - buff_ptr= strmov(buff_ptr,"; Using temporary"); + extra.append("; Using temporary"); } if (need_order) { need_order=0; - buff_ptr= strmov(buff_ptr,"; Using filesort"); + extra.append("; Using filesort"); } if (distinct & test_all_bits(used_tables,thd->used_tables)) - buff_ptr= strmov(buff_ptr,"; Distinct"); - if (buff_ptr == buff) - buff_ptr+= 2; // Skip inital "; " - item_list.push_back(new Item_string(buff+2,(uint) (buff_ptr - buff)-2, - cs)); + extra.append("; Distinct"); + + /* Skip initial "; "*/ + const char *str= extra.ptr(); + uint32 len= extra.length(); + if (len) + { + str += 2; + len -= 2; + } + item_list.push_back(new Item_string(str, len, cs)); } // For next iteration used_tables|=table->map; -- cgit v1.2.1 From 2164db3c8c4c086dcc16d73baf7557a344d82129 Mon Sep 17 00:00:00 2001 From: unknown Date: Sat, 29 May 2004 17:50:05 +0400 Subject: * Undo of range optimizer fix from previous changeset * Fixed test results. mysql-test/r/index_merge_ror.result: Typo fix mysql-test/r/rowid_order_bdb.result: new index_merge EXPLAIN output format changes mysql-test/r/rowid_order_innodb.result: new index_merge EXPLAIN output format changes sql/opt_range.cc: Undo of previous fix: If cost(full_scan_on_covering_index) < cost(best_range_scan) < cost(full_table_scan) use full_scan_on_covering_index, not best_range_scan. The fix affects read plan choice for more queries then initially anticipated, so I'm reverting it for now, will get back to this later --- sql/opt_range.cc | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) (limited to 'sql') diff --git a/sql/opt_range.cc b/sql/opt_range.cc index ee328dadad6..59ded7354f1 100644 --- a/sql/opt_range.cc +++ b/sql/opt_range.cc @@ -1610,17 +1610,6 @@ int SQL_SELECT::test_quick_select(THD *thd, key_map keys_to_use, } param.key_parts_end=key_parts; - /* Calculate cost of full index read for the shortest covering index */ - if (!head->used_keys.is_clear_all()) - { - int key_for_use= find_shortest_key(head, &head->used_keys); - double key_read_time= get_index_only_read_time(¶m, records, - key_for_use); - DBUG_PRINT("info", ("'all'+'using index' scan will be using key %d, " - "read time %g", key_for_use, key_read_time)); - if (key_read_time < read_time) - read_time= key_read_time; - } if ((tree=get_mm_tree(¶m,cond))) { @@ -1683,6 +1672,19 @@ int SQL_SELECT::test_quick_select(THD *thd, key_map keys_to_use, SEL_IMERGE *imerge; TABLE_READ_PLAN *best_conj_trp= NULL, *new_conj_trp; LINT_INIT(new_conj_trp); /* no empty index_merge lists possible */ + + /* Calculate cost of full index read for the shortest covering index */ + if (!head->used_keys.is_clear_all()) + { + int key_for_use= find_shortest_key(head, &head->used_keys); + double key_read_time= get_index_only_read_time(¶m, records, + key_for_use); + DBUG_PRINT("info", ("'all'+'using index' scan will be using key %d, " + "read time %g", key_for_use, key_read_time)); + if (key_read_time < read_time) + read_time= key_read_time; + } + DBUG_PRINT("info",("No range reads possible," " trying to construct index_merge")); List_iterator_fast it(tree->merges); -- cgit v1.2.1 From 40e65508792422e896dad764f3901c7e5971a132 Mon Sep 17 00:00:00 2001 From: unknown Date: Tue, 1 Jun 2004 15:31:58 +0400 Subject: More code cleanup, debug prints removed Fixed an incorrect optimizer choice for ror-intersect(key, clustered_primary) Typo bug fixed in multitable update sql/opt_range.cc: More code cleanup, debug prints removed Fixed an incorrect optimizer choice for ror-intersect(key, clustered_primary) sql/opt_range.h: More code cleanup, debug prints removed Fixed an incorrect optimizer choice for ror-intersect(key, clustered_primary) sql/sql_update.cc: Typo bug fixed --- sql/opt_range.cc | 42 +++++++++++++++++++++--------------------- sql/opt_range.h | 14 ++++++++------ sql/sql_update.cc | 3 ++- 3 files changed, 31 insertions(+), 28 deletions(-) (limited to 'sql') diff --git a/sql/opt_range.cc b/sql/opt_range.cc index e86a389ccb9..07cc97ad32d 100644 --- a/sql/opt_range.cc +++ b/sql/opt_range.cc @@ -377,7 +377,6 @@ static TRP_RANGE *get_key_scans_params(PARAM *param, SEL_TREE *tree, double read_time); static TRP_ROR_INTERSECT *get_best_ror_intersect(const PARAM *param, SEL_TREE *tree, - bool force_index_only, double read_time, bool *are_all_covering); static @@ -1517,7 +1516,7 @@ int SQL_SELECT::test_quick_select(THD *thd, key_map keys_to_use, uint idx; double scan_time; DBUG_ENTER("test_quick_select"); - printf("\nQUERY: %s\n", thd->query); + //printf("\nQUERY: %s\n", thd->query); DBUG_PRINT("enter",("keys_to_use: %lu prev_tables: %lu const_tables: %lu", keys_to_use.to_ulonglong(), (ulong) prev_tables, (ulong) const_tables)); @@ -1651,8 +1650,7 @@ int SQL_SELECT::test_quick_select(THD *thd, key_map keys_to_use, Get best non-covering ROR-intersection plan and prepare data for building covering ROR-intersection. */ - if ((new_trp= get_best_ror_intersect(¶m, tree, false, - best_read_time, + if ((new_trp= get_best_ror_intersect(¶m, tree, best_read_time, &can_build_covering))) { best_trp= new_trp; @@ -1750,7 +1748,7 @@ double get_sweep_read_cost(const PARAM *param, ha_rows records) else { double n_blocks= - ceil((double)(longlong)param->table->file->data_file_length / IO_SIZE); + ceil((double)((longlong)param->table->file->data_file_length / IO_SIZE)); double busy_blocks= n_blocks * (1.0 - pow(1.0 - 1.0/n_blocks, rows2double(records))); if (busy_blocks < 1.0) @@ -2026,7 +2024,7 @@ skip_to_ror_scan: cost= read_time; TABLE_READ_PLAN *prev_plan= *cur_child; - if (!(*cur_roru_plan= get_best_ror_intersect(param, *ptree, false, cost, + if (!(*cur_roru_plan= get_best_ror_intersect(param, *ptree, cost, &dummy))) { if (prev_plan->is_ror) @@ -2476,7 +2474,6 @@ bool ror_intersect_add(const PARAM *param, ROR_INTERSECT_INFO *info, { /* uncovered -> covered */ double tmp= rows2double(records)/rows2double(prev_records); - printf(" Selectivity multiplier: %g\n", tmp); DBUG_PRINT("info", ("Selectivity multiplier: %g", tmp)); selectivity_mult *= tmp; prev_records= HA_POS_ERROR; @@ -2487,7 +2484,6 @@ bool ror_intersect_add(const PARAM *param, ROR_INTERSECT_INFO *info, prev_records= records; } } - else printf("no change, skipping\n"); prev_covered= cur_covered; } if (!prev_covered) @@ -2526,8 +2522,6 @@ bool ror_intersect_add(const PARAM *param, ROR_INTERSECT_INFO *info, info->is_covering= true; } - printf(" records: %g\n", rows2double(param->table->file->records) * - info->records_fract); info->total_cost= info->index_scan_costs; if (!info->is_covering) { @@ -2552,7 +2546,6 @@ bool ror_intersect_add(const PARAM *param, ROR_INTERSECT_INFO *info, param Parameter from test_quick_select function. tree Transformed restriction condition to be used to look for ROR scans. - force_index_only If true, don't calculate costs of full rows retrieval. read_time Do not return read plans with cost > read_time. are_all_covering [out] set to true if union of all scans covers all fields needed by the query (and it is possible to build @@ -2606,7 +2599,6 @@ bool ror_intersect_add(const PARAM *param, ROR_INTERSECT_INFO *info, static TRP_ROR_INTERSECT *get_best_ror_intersect(const PARAM *param, SEL_TREE *tree, - bool force_index_only, double read_time, bool *are_all_covering) { @@ -2676,7 +2668,7 @@ TRP_ROR_INTERSECT *get_best_ror_intersect(const PARAM *param, SEL_TREE *tree, return NULL; /* [intersect_scans, intersect_scans_best) will hold the best combination */ - ROR_SCAN_INFO **intersect_scans_best= NULL; + ROR_SCAN_INFO **intersect_scans_best; ha_rows best_rows; bool is_best_covering; double best_index_scan_costs; @@ -2730,17 +2722,24 @@ TRP_ROR_INTERSECT *get_best_ror_intersect(const PARAM *param, SEL_TREE *tree, */ if (best_num == 0) { + cur_ror_scan= tree->ror_scans; + intersect_scans_end= intersect_scans; ror_intersect_reinit(intersect); - if (!ror_intersect_add(param, intersect, cpk_scan)) + if (!ror_intersect_add(param, intersect, *cur_ror_scan)) DBUG_RETURN(NULL); /* shouldn't happen actually actually */ *(intersect_scans_end++)= *cur_ror_scan; best_num++; } if (ror_intersect_add(param, intersect, cpk_scan)) + { cpk_scan_used= true; - best_rows= (ha_rows)(intersect->records_fract* - rows2double(param->table->file->records)); + min_cost= intersect->total_cost; + best_rows= (ha_rows)(intersect->records_fract* + rows2double(param->table->file->records)); + is_best_covering= intersect->is_covering; + best_index_scan_costs= intersect->index_scan_costs; + } } /* Ok, return ROR-intersect plan if we have found one */ @@ -2910,7 +2909,7 @@ TRP_ROR_INTERSECT *get_best_covering_ror_intersect(PARAM *param, Get best "range" table read plan for given SEL_TREE. Also update PARAM members and store ROR scans info in the SEL_TREE. SYNOPSIS - get_quick_select_params + get_key_scans_params param parameters from test_quick_select tree make range select for this SEL_TREE index_read_must_be_used if true, assume 'index only' option will be set @@ -4649,7 +4648,9 @@ check_quick_select(PARAM *param,uint idx,SEL_ARG *tree) bool cpk_scan; uint key; DBUG_ENTER("check_quick_select"); - + + param->is_ror_scan= false; + if (!tree) DBUG_RETURN(HA_POS_ERROR); // Can't use it param->max_key_part=0; @@ -4665,7 +4666,6 @@ check_quick_select(PARAM *param,uint idx,SEL_ARG *tree) if ((key_alg != HA_KEY_ALG_BTREE) && (key_alg!= HA_KEY_ALG_UNDEF)) { /* Records are not ordered by rowid for other types of indexes. */ - param->is_ror_scan= false; cpk_scan= false; } else @@ -4765,12 +4765,12 @@ check_quick_keys(PARAM *param,uint idx,SEL_ARG *key_tree, { /* If the index doesn't cover entire key, mark the scan as non-ROR scan. - (TODO sergeyp: investigate if we could do better here) + Actually we're cutting off some ROR scans here. */ uint16 fieldnr= param->table->key_info[param->real_keynr[idx]]. key_part[key_tree->part].fieldnr - 1; if (param->table->field[fieldnr]->key_length() != - param->key[idx][key_tree->part].part_length) + param->key[idx][key_tree->part].length) param->is_ror_scan= false; } diff --git a/sql/opt_range.h b/sql/opt_range.h index 22168a63f8e..654a7e3983c 100644 --- a/sql/opt_range.h +++ b/sql/opt_range.h @@ -155,15 +155,18 @@ public: function is called. SYNOPSIS init_ror_merged_scan() - reuse_handler quick select may use (q: psergey??) - (q: is this natural that we do it this way) - NOTES - psergey? + reuse_handler If true, the quick select may use table->handler, otherwise + it must create and use a separate handler object. + RETURN + 0 Ok + other Error */ virtual int init_ror_merged_scan(bool reuse_handler) { DBUG_ASSERT(0); return 1; } - /* Save ROWID of last retrieved row in file->ref. (psergey: or table->ref?) */ + /* + Save ROWID of last retrieved row in file->ref. This used in ROR-merging. + */ virtual void save_last_pos(){}; /* @@ -196,7 +199,6 @@ public: /* Table record buffer used by this quick select. - Currently this is always the same as head->record[0]. psergey: check that! */ byte *record; #ifndef DBUG_OFF diff --git a/sql/sql_update.cc b/sql/sql_update.cc index b85ddff84f9..0953e63765a 100644 --- a/sql/sql_update.cc +++ b/sql/sql_update.cc @@ -159,6 +159,7 @@ int mysql_update(THD *thd, if (select && select->quick) { + used_index= select->quick->index; used_key_is_modified= (!select->quick->unique_key_range() && select->quick->check_if_keys_used(&fields)); } @@ -173,7 +174,7 @@ int mysql_update(THD *thd, matching rows before updating the table! */ table->file->extra(HA_EXTRA_RETRIEVE_ALL_COLS); - if (old_used_keys.is_set(used_index)) + if ( (used_index != MAX_KEY) && old_used_keys.is_set(used_index)) { table->key_read=1; table->file->extra(HA_EXTRA_KEYREAD); -- cgit v1.2.1