diff options
author | Igor Babaev <igor@askmonty.org> | 2019-02-03 14:56:12 -0800 |
---|---|---|
committer | Igor Babaev <igor@askmonty.org> | 2019-02-03 14:56:12 -0800 |
commit | 658128af43b4d7c6db445164f8ed25ed4d1e3109 (patch) | |
tree | 7a71580cca55759b8bb2730e117436478948d77f /sql/multi_range_read.cc | |
parent | 5f46670bd09babbee75a24ac82eb4ade0706da66 (diff) | |
download | mariadb-git-658128af43b4d7c6db445164f8ed25ed4d1e3109.tar.gz |
MDEV-16188 Use in-memory PK filters built from range index scans
This patch contains a full implementation of the optimization
that allows to use in-memory rowid / primary filters built for range
conditions over indexes. In many cases usage of such filters reduce
the number of disk seeks spent for fetching table rows.
In this implementation the choice of what possible filter to be applied
(if any) is made purely on cost-based considerations.
This implementation re-achitectured the partial implementation of
the feature pushed by Galina Shalygina in the commit
8d5a11122c32f4d9eb87536886c6e893377bdd07.
Besides this patch contains a better implementation of the generic
handler function handler::multi_range_read_info_const() that
takes into account gaps between ranges when calculating the cost of
range index scans. It also contains some corrections of the
implementation of the handler function records_in_range() for MyISAM.
This patch supports the feature for InnoDB and MyISAM.
Diffstat (limited to 'sql/multi_range_read.cc')
-rw-r--r-- | sql/multi_range_read.cc | 141 |
1 files changed, 123 insertions, 18 deletions
diff --git a/sql/multi_range_read.cc b/sql/multi_range_read.cc index d6952e71899..cab278f5859 100644 --- a/sql/multi_range_read.cc +++ b/sql/multi_range_read.cc @@ -20,6 +20,12 @@ #include "key.h" #include "sql_statistics.h" +static ulonglong key_block_no(handler *h, uint keyno, uint keyentry_pos) +{ + return (ulonglong) (h->keyread_time(keyno, 1, keyentry_pos + 1) - + h->keyread_time(keyno, 0, keyentry_pos + 1) + 0.5) + 1; +} + /**************************************************************************** * Default MRR implementation (MRR to non-MRR converter) ***************************************************************************/ @@ -47,6 +53,24 @@ for a user to be able to interrupt the calculation by killing the connection/query. + @note + Starting from 10.4 the implementation of this method tries to take into + account gaps between range intervals. Before this we had such paradoxical + cases when, for example, the cost of the index scan by range [1..3] was + almost twice as less than the cost of of the index scan by two intervals + [1..1] and [3..3]. + + @note + The current implementation of the method is not efficient for it + requires extra dives for gaps. Although these dives are not expensive + as they touch the index nodes that with very high probability are in + cache this is still not good. We could avoid it if records in range + also returned positions of the ends of range intervals. It's not + hard to implement it now for MyISAM as this engine provides a function + returning an approximation of the relative position of a key tuple + among other index key tuples. Unfortunately InnoDB now does not provide + anything like this function. + @retval HA_POS_ERROR Error or the engine is unable to perform the requested scan. Values of OUT parameters are undefined. @@ -61,12 +85,21 @@ handler::multi_range_read_info_const(uint keyno, RANGE_SEQ_IF *seq, uint *bufsz, uint *flags, Cost_estimate *cost) { KEY_MULTI_RANGE range; + key_range prev_start_key; range_seq_t seq_it; - ha_rows rows, total_rows= 0; + ha_rows min_pos= 0; + ha_rows total_rows= 0; uint n_ranges=0; + uint n_eq_ranges= 0; + ulonglong total_touched_blocks= 0; + key_range *prev_min_endp= 0; + ulonglong prev_max_block_no=0; + ha_rows max_rows= stats.records; THD *thd= table->in_use; - uint limit= thd->variables.eq_range_index_dive_limit; + StringBuffer<64> key_value; + uint limit= thd->variables.eq_range_index_dive_limit; + bool use_statistics_for_eq_range= eq_ranges_exceeds_limit(seq, seq_init_param, limit); @@ -77,10 +110,15 @@ handler::multi_range_read_info_const(uint keyno, RANGE_SEQ_IF *seq, seq_it= seq->init(seq_init_param, n_ranges, *flags); while (!seq->next(seq_it, &range)) { + ha_rows rows; + ulonglong new_touched_blocks= 0; + if (unlikely(thd->killed != 0)) return HA_POS_ERROR; n_ranges++; + if (range.range_flag & EQ_RANGE) + n_eq_ranges++; key_range *min_endp, *max_endp; if (range.range_flag & GEOM_FLAG) { @@ -95,38 +133,93 @@ handler::multi_range_read_info_const(uint keyno, RANGE_SEQ_IF *seq, max_endp= range.end_key.length? &range.end_key : NULL; } int keyparts_used= my_count_bits(range.start_key.keypart_map); - if ((range.range_flag & UNIQUE_RANGE) && !(range.range_flag & NULL_RANGE)) - rows= 1; /* there can be at most one row */ - else if (use_statistics_for_eq_range && - !(range.range_flag & NULL_RANGE) && - (range.range_flag & EQ_RANGE) && - table->key_info[keyno].actual_rec_per_key(keyparts_used - 1) > 0.5) - rows= - (ha_rows) table->key_info[keyno].actual_rec_per_key(keyparts_used - 1); + if (use_statistics_for_eq_range && + !(range.range_flag & NULL_RANGE) && + (range.range_flag & EQ_RANGE) && + table->key_info[keyno].actual_rec_per_key(keyparts_used - 1) > 0.5) + { + if ((range.range_flag & UNIQUE_RANGE) && !(range.range_flag & NULL_RANGE)) + rows= 1; /* there can be at most one row */ + else + rows= + (ha_rows) table->key_info[keyno].actual_rec_per_key(keyparts_used-1); + } else { - if (HA_POS_ERROR == (rows= this->records_in_range(keyno, min_endp, + ulonglong min_block_no; + ulonglong max_block_no; + if ((range.range_flag & UNIQUE_RANGE) && !(range.range_flag & NULL_RANGE)) + rows= 1; /* there can be at most one row */ + else if (HA_POS_ERROR == (rows= this->records_in_range(keyno, min_endp, max_endp))) { /* Can't scan one range => can't do MRR scan at all */ total_rows= HA_POS_ERROR; break; } + if (!max_endp && !(prev_min_endp && prev_min_endp->length)) + min_pos+= max_rows - rows; + else + { + key_range *start_endp= prev_min_endp; + if (start_endp && !start_endp->keypart_map) + start_endp= 0; + /* + Get the estimate of rows in the previous gap + and two ranges surrounding this gap + */ + ha_rows r= this->records_in_range(keyno,start_endp,max_endp); + if (r == HA_POS_ERROR) + { + /* Some engine cannot estimate such ranges */ + total_rows += rows; + continue; + } + min_pos+= r - rows; + } + min_block_no= key_block_no(this, keyno, min_pos); + max_block_no= key_block_no(this, keyno, min_pos + rows); + new_touched_blocks= max_block_no - min_block_no + + MY_TEST(min_block_no != prev_max_block_no); + prev_max_block_no= max_block_no; + if (!prev_min_endp) + prev_min_endp= &prev_start_key; + /* Save range.start_key for the next iteration step */ + prev_start_key= range.start_key; + key_value.copy((const char *) prev_start_key.key, prev_start_key.length, + key_value.charset()); + prev_start_key.key= (const uchar *) key_value.ptr(); } total_rows += rows; + total_touched_blocks+= new_touched_blocks; } if (total_rows != HA_POS_ERROR) { + set_if_smaller(total_rows, max_rows); /* The following calculation is the same as in multi_range_read_info(): */ *flags |= HA_MRR_USE_DEFAULT_IMPL; cost->reset(); cost->avg_io_cost= 1; /* assume random seeks */ - if ((*flags & HA_MRR_INDEX_ONLY) && total_rows > 2) - cost->io_count= keyread_time(keyno, n_ranges, (uint)total_rows); + cost->idx_avg_io_cost= 1; + if (!(keyno == table->s->primary_key && primary_key_is_clustered())) + { + cost->idx_io_count= total_touched_blocks + + keyread_time(keyno, 0, total_rows); + cost->cpu_cost= cost->idx_cpu_cost= + (double) total_rows / TIME_FOR_COMPARE_IDX + + (2 * n_ranges - n_eq_ranges) * IDX_LOOKUP_COST; + if (!(*flags & HA_MRR_INDEX_ONLY)) + { + cost->io_count= read_time(keyno, 0, total_rows); + cost->cpu_cost+= (double) total_rows / TIME_FOR_COMPARE; + } + } else - cost->io_count= read_time(keyno, n_ranges, total_rows); - cost->cpu_cost= (double) total_rows / TIME_FOR_COMPARE + 0.01; + { + cost->io_count= read_time(keyno, total_touched_blocks, (uint) total_rows); + cost->cpu_cost= (double) total_rows / TIME_FOR_COMPARE + 0.01; + } } return total_rows; } @@ -183,10 +276,22 @@ ha_rows handler::multi_range_read_info(uint keyno, uint n_ranges, uint n_rows, cost->avg_io_cost= 1; /* assume random seeks */ /* Produce the same cost as non-MRR code does */ - if (*flags & HA_MRR_INDEX_ONLY) - cost->io_count= keyread_time(keyno, n_ranges, n_rows); + if (!(keyno == table->s->primary_key && primary_key_is_clustered())) + { + cost->idx_io_count= n_ranges + keyread_time(keyno, 0, n_rows); + cost->cpu_cost= cost->idx_cpu_cost= + (double) n_rows / TIME_FOR_COMPARE_IDX + n_ranges * IDX_LOOKUP_COST; + if (!(*flags & HA_MRR_INDEX_ONLY)) + { + cost->io_count= read_time(keyno, 0, n_rows); + cost->cpu_cost+= (double) n_rows / TIME_FOR_COMPARE; + } + } else - cost->io_count= read_time(keyno, n_ranges, n_rows); + { + cost->io_count= read_time(keyno, n_ranges, (uint)n_rows); + cost->cpu_cost= (double) n_rows / TIME_FOR_COMPARE + 0.01; + } return 0; } |