summaryrefslogtreecommitdiff
path: root/sql/multi_range_read.h
diff options
context:
space:
mode:
authorSergey Petrunya <psergey@askmonty.org>2010-10-26 15:35:13 +0400
committerSergey Petrunya <psergey@askmonty.org>2010-10-26 15:35:13 +0400
commitd8efc3b155dc7791b00d5e87ccb8b34d1a12156b (patch)
tree061337f0951a71a7bd83d3bc98ed3bfe60ee85ec /sql/multi_range_read.h
parentac8a79b944546ffcbaf366f789873b28e05896f8 (diff)
downloadmariadb-git-d8efc3b155dc7791b00d5e87ccb8b34d1a12156b.tar.gz
DS-MRR improvements: address review feedback for R3 version of the patch
Diffstat (limited to 'sql/multi_range_read.h')
-rw-r--r--sql/multi_range_read.h346
1 files changed, 276 insertions, 70 deletions
diff --git a/sql/multi_range_read.h b/sql/multi_range_read.h
index 3c92dcd2950..fb2a67b6af1 100644
--- a/sql/multi_range_read.h
+++ b/sql/multi_range_read.h
@@ -50,6 +50,26 @@
class DsMrr_impl;
+class Key_parameters
+{
+public:
+ /* TRUE <=> We can get at most one index tuple for a lookup key */
+ bool index_ranges_unique;
+
+ uint key_tuple_length; /* Length of index lookup tuple, in bytes */
+ key_part_map key_tuple_map; /* keyparts used in index lookup tuples */
+
+ /*
+ This is
+ = key_tuple_length if we copy keys to buffer
+ = sizeof(void*) if we're using pointers to materialized keys.
+ */
+ uint key_size_in_keybuf;
+
+ /* TRUE <=> don't copy key values, use pointers to them instead. */
+ bool use_key_pointers;
+};
+
/**
Iterator over (record, range_id) pairs that match given key value.
@@ -57,16 +77,23 @@ class DsMrr_impl;
key value. A key value may have multiple matching records, so we'll need to
produce a cross-product of sets of matching records and range_id-s.
*/
-
+class Mrr_ordered_index_reader;
class Key_value_records_iterator
{
/* Scan parameters */
- DsMrr_impl *dsmrr;
+ Key_parameters *param;
Lifo_buffer_iterator identical_key_it;
uchar *last_identical_key_ptr;
bool get_next_row;
+ //handler *h;
+ /* TRUE <=> We can get at most one index tuple for a lookup key */
+ //bool index_ranges_unique;
+
+ Mrr_ordered_index_reader *owner;
+ /* key_buffer.read() reads to here */
+ uchar *cur_index_tuple;
public:
- bool init(DsMrr_impl *dsmrr);
+ bool init(Mrr_ordered_index_reader *owner_arg);
/*
Get next (key_val, range_id) pair.
@@ -74,10 +101,185 @@ public:
int get_next();
void close();
+ friend class Mrr_ordered_index_reader;
};
/*
+ Something that will manage buffers for those that call it
+*/
+class Buffer_manager
+{
+public:
+ virtual void reset_buffer_sizes()= 0;
+ virtual void setup_buffer_sizes(uint key_size_in_keybuf,
+ key_part_map key_tuple_map)=0;
+ virtual Lifo_buffer* get_key_buffer()= 0;
+ virtual ~Buffer_manager(){}
+};
+
+
+/*
+ Abstract MRR execution strategy
+
+ An object of this class produces (R, range_info) pairs where R can be an
+ index tuple or a table record.
+
+ Getting HA_ERR_END_OF_FILE from get_next() means that the source should be
+ re-filled. if eof() returns true after refill attempt, then end of stream has
+ been reached and get_next() must not be called anymore.
+*/
+
+class Mrr_strategy
+{
+public:
+ virtual int get_next(char **range_info) = 0;
+ virtual int refill_buffer()=0;
+
+ virtual ~Mrr_strategy() {};
+};
+
+
+/* A common base for strategies that do index scans and produce index tuples */
+class Mrr_index_reader : public Mrr_strategy
+{
+public:
+ handler *h;
+
+ virtual int init(handler *h_arg, RANGE_SEQ_IF *seq_funcs,
+ void *seq_init_param, uint n_ranges,
+ uint mode, Buffer_manager *buf_manager_arg) = 0;
+ virtual bool eof() = 0;
+ virtual uchar *get_rowid_ptr()= 0;
+ virtual bool skip_record(char *range_id, uchar *rowid)=0;
+};
+
+
+/*
+ A "bypass" strategy that uses default MRR implementation (i.e.
+ handler::multi_range_read_XXX() calls) to produce rows.
+*/
+
+class Mrr_simple_index_reader : public Mrr_index_reader
+{
+ int res;
+public:
+ int init(handler *h_arg, RANGE_SEQ_IF *seq_funcs,
+ void *seq_init_param, uint n_ranges,
+ uint mode, Buffer_manager *buf_manager_arg);
+ int get_next(char **range_info);
+ int refill_buffer() { return 0; }
+ bool eof() { return test(res); }
+ uchar *get_rowid_ptr() { return h->ref; }
+ bool skip_record(char *range_id, uchar *rowid)
+ {
+ return (h->mrr_funcs.skip_record &&
+ h->mrr_funcs.skip_record(h->mrr_iter, range_id, rowid));
+ }
+};
+
+
+
+/*
+ A strategy that sorts index lookup keys before scanning the index
+*/
+
+class Mrr_ordered_index_reader : public Mrr_index_reader
+{
+public:
+ int init(handler *h_arg, RANGE_SEQ_IF *seq_funcs,
+ void *seq_init_param, uint n_ranges,
+ uint mode, Buffer_manager *buf_manager_arg);
+ int get_next(char **range_info);
+ int refill_buffer();
+ bool eof() { return index_scan_eof; }
+ uchar *get_rowid_ptr() { return h->ref; }
+
+ bool skip_record(char *range_info, uchar *rowid)
+ {
+ return (mrr_funcs.skip_record &&
+ mrr_funcs.skip_record(mrr_iter, range_info, rowid));
+ }
+private:
+ Key_value_records_iterator kv_it;
+
+ bool scanning_key_val_iter;
+
+ char *cur_range_info;
+
+ /* Buffer to store (key, range_id) pairs */
+ Lifo_buffer *key_buffer;
+
+ Buffer_manager *buf_manager;
+
+ /* Initially FALSE, becomes TRUE when we've set key_tuple_xxx members */
+ bool know_key_tuple_params;
+
+ // bool use_key_pointers;
+
+ Key_parameters keypar;
+ /* TRUE <=> need range association, buffers hold {rowid, range_id} pairs */
+ bool is_mrr_assoc;
+
+ bool no_more_keys;
+ RANGE_SEQ_IF mrr_funcs;
+ range_seq_t mrr_iter;
+
+ bool auto_refill;
+
+ bool index_scan_eof;
+
+ static int key_tuple_cmp(void* arg, uchar* key1, uchar* key2);
+ static int key_tuple_cmp_reverse(void* arg, uchar* key1, uchar* key2);
+ //void cleanup();
+
+ friend class Key_value_records_iterator;
+ friend class DsMrr_impl;
+ friend class Mrr_ordered_rndpos_reader;
+};
+
+
+/* MRR strategy that fetches rowids */
+
+class Mrr_ordered_rndpos_reader : public Mrr_strategy
+{
+public:
+ int init(handler *h, Mrr_index_reader *index_reader, uint mode,
+ Lifo_buffer *buf);
+ int get_next(char **range_info);
+ int refill_buffer();
+ void cleanup();
+private:
+ handler *h;
+
+ DsMrr_impl *dsmrr;
+ /* This what we get (rowid, range_info) pairs from */
+ Mrr_index_reader *index_reader;
+ uchar *index_rowid;
+
+ /* TRUE <=> need range association, buffers hold {rowid, range_id} pairs */
+ bool is_mrr_assoc;
+
+ uchar *last_identical_rowid;
+ Lifo_buffer *rowid_buffer;
+
+ /* = h->ref_length [ + sizeof(range_assoc_info) ] */
+ //uint rowid_buff_elem_size;
+
+ /* rowid_buffer.read() will set the following: */
+ uchar *rowid;
+ uchar *rowids_range_id;
+};
+
+class Mrr_strategy_factory
+{
+public:
+ Mrr_ordered_rndpos_reader ordered_rndpos_reader;
+ Mrr_ordered_index_reader ordered_index_reader;
+ Mrr_simple_index_reader simple_index_reader;
+};
+
+/*
DS-MRR implementation for one table. Create/use one object of this class for
each ha_{myisam/innobase/etc} object. That object will be further referred to
as "the handler"
@@ -154,9 +356,58 @@ public:
get record by rowid and return the {record, range_id} pair
4. Repeat the above steps until we've exhausted the list of ranges we're
scanning.
+
+ Buffer space management considerations
+ --------------------------------------
+ With regards to buffer/memory management, MRR interface specifies that
+ - SQL layer provides multi_range_read_init() with buffer of certain size.
+ - MRR implementation may use (i.e. have at its disposal till the end of
+ the MRR scan) all of the buffer, or return the unused end of the buffer
+ to SQL layer.
+
+ DS-MRR needs buffer in order to accumulate and sort rowids and/or keys. When
+ we need to accumulate/sort only keys (or only rowids), it is fairly trivial.
+
+ When we need to accumulate/sort both keys and rowids, efficient buffer use
+ gets complicated. We need to:
+ - First, accumulate keys and sort them
+ - Then use the keys (smaller values go first) to obtain rowids. A key is not
+ needed after we've got matching rowids for it.
+ - Make sure that rowids are accumulated at the front of the buffer, so that we
+ can return the end part of the buffer to SQL layer, should there be too
+ few rowid values to occupy the buffer.
+
+ All of these goals are achieved by using the following scheme:
+
+ | | We get an empty buffer from SQL layer.
+
+ | *-|
+ | *----| First, we fill the buffer with keys. Key_buffer
+ | *-------| part grows from end of the buffer space to start
+ | *----------| (In this picture, the buffer is big enough to
+ | *-------------| accomodate all keys and even have some space left)
+
+ | *=============| We want to do key-ordered index scan, so we sort
+ the keys
+
+ |-x *===========| Then we use the keys get rowids. Rowids are
+ |----x *========| stored from start of buffer space towards the end.
+ |--------x *=====| The part of the buffer occupied with keys
+ |------------x *===| gradually frees up space for rowids. In this
+ |--------------x *=| picture we run out of keys before we've ran out
+ |----------------x | of buffer space (it can be other way as well).
+
+ |================x | Then we sort the rowids.
+
+ | |~~~| The unused part of the buffer is at the end, so
+ we can return it to the SQL layer.
+
+ |================* Sorted rowids are then used to read table records
+ in disk order
+
*/
-class DsMrr_impl
+class DsMrr_impl : public Buffer_manager
{
public:
typedef void (handler::*range_check_toggle_func_t)(bool on);
@@ -181,6 +432,9 @@ public:
void *seq_init_param, uint n_ranges, uint *bufsz,
uint *flags, COST_VECT *cost);
private:
+ /* Buffer to store (key, range_id) pairs */
+ Lifo_buffer *key_buffer;
+
/*
The "owner" handler object (the one that is expected to "own" this object
and call its functions).
@@ -197,20 +451,16 @@ private:
/** Properties of current MRR scan **/
uint keyno; /* index we're running the scan on */
- bool use_default_impl; /* TRUE <=> shortcut all calls to default MRR impl */
/* TRUE <=> need range association, buffers hold {rowid, range_id} pairs */
bool is_mrr_assoc;
/* TRUE <=> sort the keys before making index lookups */
- bool do_sort_keys;
+ //bool do_sort_keys;
/* TRUE <=> sort rowids and use rnd_pos() to get and return full records */
- bool do_rndpos_scan;
-
- /*
- (if do_sort_keys==TRUE) don't copy key values, use pointers to them
- instead.
- */
- bool use_key_pointers;
+ //bool do_rndpos_scan;
+ Mrr_strategy_factory strategy_factory;
+ Mrr_strategy *strategy;
+ Mrr_index_reader *index_strategy;
/* The whole buffer space that we're using */
uchar *full_buf;
@@ -226,12 +476,6 @@ private:
/** Index scaning and key buffer-related members **/
- /* TRUE <=> We can get at most one index tuple for a lookup key */
- bool index_ranges_unique;
-
- /* TRUE<=> we're in a middle of enumerating records for a key range */
- //bool in_index_range;
-
/*
One of the following two is used for key buffer: forward is used when
we only need key buffer, backward is used when we need both key and rowid
@@ -240,39 +484,10 @@ private:
Forward_lifo_buffer forward_key_buf;
Backward_lifo_buffer backward_key_buf;
- /* Buffer to store (key, range_id) pairs */
- Lifo_buffer *key_buffer;
-
- /* Index scan state */
- bool scanning_key_val_iter;
- /*
- TRUE <=> we've got index tuples/rowids for all keys (need this flag because
- we may have a situation where we've read everything from the key buffer but
- haven't finished with getting index tuples for the last key)
- */
- bool index_scan_eof;
- Key_value_records_iterator kv_it;
-
- /* key_buffer.read() reads to here */
- uchar *cur_index_tuple;
-
- /* if in_index_range==TRUE: range_id of the range we're enumerating */
- char *cur_range_info;
-
- /* Initially FALSE, becomes TRUE when we've set key_tuple_xxx members */
- bool know_key_tuple_params;
- uint key_tuple_length; /* Length of index lookup tuple, in bytes */
- key_part_map key_tuple_map; /* keyparts used in index lookup tuples */
-
- /*
- This is
- = key_tuple_length if we copy keys to buffer
- = sizeof(void*) if we're using pointers to materialized keys.
- */
- uint key_size_in_keybuf;
+ Forward_lifo_buffer rowid_buffer;
/* = key_size_in_keybuf [ + sizeof(range_assoc_info) ] */
- uint key_buff_elem_size;
+ //uint key_buff_elem_size_;
/** rnd_pos() scan and rowid buffer-related members **/
@@ -280,36 +495,27 @@ private:
Buffer to store (rowid, range_id) pairs, or just rowids if
is_mrr_assoc==FALSE
*/
- Forward_lifo_buffer rowid_buffer;
-
- /* rowid_buffer.read() will set the following: */
- uchar *rowid;
- uchar *rowids_range_id;
-
- uchar *last_identical_rowid;
-
- bool dsmrr_eof; /* TRUE <=> We have reached EOF when reading index tuples */
-
- /* = h->ref_length [ + sizeof(range_assoc_info) ] */
- uint rowid_buff_elem_size;
+ //Forward_lifo_buffer rowid_buffer;
bool choose_mrr_impl(uint keyno, ha_rows rows, uint *flags, uint *bufsz,
COST_VECT *cost);
bool get_disk_sweep_mrr_cost(uint keynr, ha_rows rows, uint flags,
uint *buffer_size, COST_VECT *cost);
bool check_cpk_scan(THD *thd, uint keyno, uint mrr_flags);
- static int key_tuple_cmp(void* arg, uchar* key1, uchar* key2);
- static int key_tuple_cmp_reverse(void* arg, uchar* key1, uchar* key2);
- int dsmrr_fill_rowid_buffer();
- void dsmrr_fill_key_buffer();
- int dsmrr_next_from_index(char **range_info);
- void setup_buffer_sizes(key_range *sample_key);
void reallocate_buffer_space();
- static range_seq_t key_buf_seq_init(void *init_param, uint n_ranges, uint flags);
- static uint key_buf_seq_next(range_seq_t rseq, KEY_MULTI_RANGE *range);
+ /* Buffer_manager implementation */
+ void setup_buffer_sizes(uint key_size_in_keybuf, key_part_map key_tuple_map);
+ void reset_buffer_sizes();
+ Lifo_buffer* get_key_buffer() { return key_buffer; }
+
friend class Key_value_records_iterator;
+ friend class Mrr_ordered_index_reader;
+ friend class Mrr_ordered_rndpos_reader;
+
+ int setup_two_handlers();
+ void close_second_handler();
};
/**