From 1242eb3d32f2863f847aa96a10e2ab983a1a643b Mon Sep 17 00:00:00 2001 From: Monty Date: Fri, 13 Mar 2020 15:28:42 +0200 Subject: Removed double records_in_range calls from multi_range_read_info_const This was to remove a performance regression between 10.3 and 10.4 In 10.5 we will have a better implementation of records_in_range that will enable us to get more statistics. This change was not done in 10.4 because the 10.5 will be part of a larger change that is not suitable for the GA 10.4 version Other things: - Changed default handler block_size to 8192 to fix things statistics for engines that doesn't set the block size. - Fixed a bug in spider when using multiple part const ranges (Patch from Kentoku) --- sql/multi_range_read.cc | 95 +++++++++---------------------------------------- 1 file changed, 17 insertions(+), 78 deletions(-) (limited to 'sql/multi_range_read.cc') diff --git a/sql/multi_range_read.cc b/sql/multi_range_read.cc index c000187626f..601268ec2f6 100644 --- a/sql/multi_range_read.cc +++ b/sql/multi_range_read.cc @@ -21,18 +21,6 @@ #include "sql_statistics.h" #include "rowid_filter.h" -static ulonglong key_block_no(TABLE *table, uint keyno, ha_rows keyentry_pos) -{ - size_t len= table->key_info[keyno].key_length + table->file->ref_length; - if (keyno == table->s->primary_key && - table->file->primary_key_is_clustered()) - len= table->s->stored_rec_length; - uint keys_per_block= (uint) (table->file->stats.block_size/2.0/len+1); - ulonglong block_no= !keyentry_pos ? 0 : - (keyentry_pos - 1) / keys_per_block + 1; - return block_no; -} - /**************************************************************************** * Default MRR implementation (MRR to non-MRR converter) ***************************************************************************/ @@ -60,24 +48,6 @@ static ulonglong key_block_no(TABLE *table, uint keyno, ha_rows keyentry_pos) for a user to be able to interrupt the calculation by killing the connection/query. - @note - Starting from 10.4 the implementation of this method tries to take into - account gaps between range intervals. Before this we had such paradoxical - cases when, for example, the cost of the index scan by range [1..3] was - almost twice as less than the cost of of the index scan by two intervals - [1..1] and [3..3]. - - @note - The current implementation of the method is not efficient for it - requires extra dives for gaps. Although these dives are not expensive - as they touch the index nodes that with very high probability are in - cache this is still not good. We could avoid it if records in range - also returned positions of the ends of range intervals. It's not - hard to implement it now for MyISAM as this engine provides a function - returning an approximation of the relative position of a key tuple - among other index key tuples. Unfortunately InnoDB now does not provide - anything like this function. - @retval HA_POS_ERROR Error or the engine is unable to perform the requested scan. Values of OUT parameters are undefined. @@ -92,24 +62,23 @@ handler::multi_range_read_info_const(uint keyno, RANGE_SEQ_IF *seq, uint *bufsz, uint *flags, Cost_estimate *cost) { KEY_MULTI_RANGE range; - key_range prev_start_key; range_seq_t seq_it; - ha_rows min_pos= 0; ha_rows total_rows= 0; uint n_ranges=0; uint n_eq_ranges= 0; ulonglong total_touched_blocks= 0; - key_range *prev_min_endp= 0; - ulonglong prev_max_block_no=0; ha_rows max_rows= stats.records; THD *thd= table->in_use; - StringBuffer<64> key_value; - uint limit= thd->variables.eq_range_index_dive_limit; - bool use_statistics_for_eq_range= eq_ranges_exceeds_limit(seq, seq_init_param, limit); + uint len= table->key_info[keyno].key_length + table->file->ref_length; + if (keyno == table->s->primary_key && table->file->primary_key_is_clustered()) + len= table->s->stored_rec_length; + /* Assume block is 75 % full */ + uint avg_block_records= ((uint) (table->file->stats.block_size*3/4))/len + 1; + DBUG_ENTER("multi_range_read_info_const"); /* Default MRR implementation doesn't need buffer */ *bufsz= 0; @@ -118,10 +87,9 @@ handler::multi_range_read_info_const(uint keyno, RANGE_SEQ_IF *seq, while (!seq->next(seq_it, &range)) { ha_rows rows; - ulonglong new_touched_blocks= 0; if (unlikely(thd->killed != 0)) - return HA_POS_ERROR; + DBUG_RETURN(HA_POS_ERROR); n_ranges++; if (range.range_flag & EQ_RANGE) @@ -153,8 +121,6 @@ handler::multi_range_read_info_const(uint keyno, RANGE_SEQ_IF *seq, } else { - ulonglong min_block_no; - ulonglong max_block_no; if ((range.range_flag & UNIQUE_RANGE) && !(range.range_flag & NULL_RANGE)) rows= 1; /* there can be at most one row */ else if (HA_POS_ERROR == (rows= this->records_in_range(keyno, min_endp, @@ -164,41 +130,9 @@ handler::multi_range_read_info_const(uint keyno, RANGE_SEQ_IF *seq, total_rows= HA_POS_ERROR; break; } - if (!max_endp && !(prev_min_endp && prev_min_endp->length)) - min_pos+= max_rows - rows; - else - { - key_range *start_endp= prev_min_endp; - if (start_endp && !start_endp->keypart_map) - start_endp= 0; - /* - Get the estimate of rows in the previous gap - and two ranges surrounding this gap - */ - ha_rows r= this->records_in_range(keyno,start_endp,max_endp); - if (r == HA_POS_ERROR) - { - /* Some engine cannot estimate such ranges */ - total_rows += rows; - continue; - } - min_pos+= r - rows; - } - min_block_no= key_block_no(this->table, keyno, min_pos); - max_block_no= key_block_no(this->table, keyno, min_pos + rows); - new_touched_blocks= max_block_no - min_block_no + - MY_TEST(min_block_no != prev_max_block_no); - prev_max_block_no= max_block_no; - if (!prev_min_endp) - prev_min_endp= &prev_start_key; - /* Save range.start_key for the next iteration step */ - prev_start_key= range.start_key; - key_value.copy((const char *) prev_start_key.key, prev_start_key.length, - key_value.charset()); - prev_start_key.key= (const uchar *) key_value.ptr(); } total_rows += rows; - total_touched_blocks+= new_touched_blocks; + total_touched_blocks+= (rows / avg_block_records +1); } if (total_rows != HA_POS_ERROR) @@ -225,13 +159,18 @@ handler::multi_range_read_info_const(uint keyno, RANGE_SEQ_IF *seq, } else { - cost->io_count= read_time(keyno, - (uint)total_touched_blocks, - (uint) total_rows); + cost->io_count= read_time(keyno, n_ranges, (uint) total_rows); cost->cpu_cost= (double) total_rows / TIME_FOR_COMPARE + 0.01; } } - return total_rows; + DBUG_PRINT("statistics", + ("key: %s rows: %llu total_cost: %.3f io_blocks: %llu " + "idx_io_count: %.3f cpu_cost: %.3f io_count: %.3f", + table->s->keynames.type_names[keyno], + (ulonglong) total_rows, cost->total_cost(), + (ulonglong) total_touched_blocks, + cost->idx_io_count, cost->cpu_cost, cost->io_count)); + DBUG_RETURN(total_rows); } -- cgit v1.2.1