diff options
author | Sergey Petrunya <psergey@askmonty.org> | 2010-10-26 15:35:13 +0400 |
---|---|---|
committer | Sergey Petrunya <psergey@askmonty.org> | 2010-10-26 15:35:13 +0400 |
commit | d8efc3b155dc7791b00d5e87ccb8b34d1a12156b (patch) | |
tree | 061337f0951a71a7bd83d3bc98ed3bfe60ee85ec /sql/multi_range_read.h | |
parent | ac8a79b944546ffcbaf366f789873b28e05896f8 (diff) | |
download | mariadb-git-d8efc3b155dc7791b00d5e87ccb8b34d1a12156b.tar.gz |
DS-MRR improvements: address review feedback for R3 version of the patch
Diffstat (limited to 'sql/multi_range_read.h')
-rw-r--r-- | sql/multi_range_read.h | 346 |
1 files changed, 276 insertions, 70 deletions
diff --git a/sql/multi_range_read.h b/sql/multi_range_read.h index 3c92dcd2950..fb2a67b6af1 100644 --- a/sql/multi_range_read.h +++ b/sql/multi_range_read.h @@ -50,6 +50,26 @@ class DsMrr_impl; +class Key_parameters +{ +public: + /* TRUE <=> We can get at most one index tuple for a lookup key */ + bool index_ranges_unique; + + uint key_tuple_length; /* Length of index lookup tuple, in bytes */ + key_part_map key_tuple_map; /* keyparts used in index lookup tuples */ + + /* + This is + = key_tuple_length if we copy keys to buffer + = sizeof(void*) if we're using pointers to materialized keys. + */ + uint key_size_in_keybuf; + + /* TRUE <=> don't copy key values, use pointers to them instead. */ + bool use_key_pointers; +}; + /** Iterator over (record, range_id) pairs that match given key value. @@ -57,16 +77,23 @@ class DsMrr_impl; key value. A key value may have multiple matching records, so we'll need to produce a cross-product of sets of matching records and range_id-s. */ - +class Mrr_ordered_index_reader; class Key_value_records_iterator { /* Scan parameters */ - DsMrr_impl *dsmrr; + Key_parameters *param; Lifo_buffer_iterator identical_key_it; uchar *last_identical_key_ptr; bool get_next_row; + //handler *h; + /* TRUE <=> We can get at most one index tuple for a lookup key */ + //bool index_ranges_unique; + + Mrr_ordered_index_reader *owner; + /* key_buffer.read() reads to here */ + uchar *cur_index_tuple; public: - bool init(DsMrr_impl *dsmrr); + bool init(Mrr_ordered_index_reader *owner_arg); /* Get next (key_val, range_id) pair. @@ -74,10 +101,185 @@ public: int get_next(); void close(); + friend class Mrr_ordered_index_reader; }; /* + Something that will manage buffers for those that call it +*/ +class Buffer_manager +{ +public: + virtual void reset_buffer_sizes()= 0; + virtual void setup_buffer_sizes(uint key_size_in_keybuf, + key_part_map key_tuple_map)=0; + virtual Lifo_buffer* get_key_buffer()= 0; + virtual ~Buffer_manager(){} +}; + + +/* + Abstract MRR execution strategy + + An object of this class produces (R, range_info) pairs where R can be an + index tuple or a table record. + + Getting HA_ERR_END_OF_FILE from get_next() means that the source should be + re-filled. if eof() returns true after refill attempt, then end of stream has + been reached and get_next() must not be called anymore. +*/ + +class Mrr_strategy +{ +public: + virtual int get_next(char **range_info) = 0; + virtual int refill_buffer()=0; + + virtual ~Mrr_strategy() {}; +}; + + +/* A common base for strategies that do index scans and produce index tuples */ +class Mrr_index_reader : public Mrr_strategy +{ +public: + handler *h; + + virtual int init(handler *h_arg, RANGE_SEQ_IF *seq_funcs, + void *seq_init_param, uint n_ranges, + uint mode, Buffer_manager *buf_manager_arg) = 0; + virtual bool eof() = 0; + virtual uchar *get_rowid_ptr()= 0; + virtual bool skip_record(char *range_id, uchar *rowid)=0; +}; + + +/* + A "bypass" strategy that uses default MRR implementation (i.e. + handler::multi_range_read_XXX() calls) to produce rows. +*/ + +class Mrr_simple_index_reader : public Mrr_index_reader +{ + int res; +public: + int init(handler *h_arg, RANGE_SEQ_IF *seq_funcs, + void *seq_init_param, uint n_ranges, + uint mode, Buffer_manager *buf_manager_arg); + int get_next(char **range_info); + int refill_buffer() { return 0; } + bool eof() { return test(res); } + uchar *get_rowid_ptr() { return h->ref; } + bool skip_record(char *range_id, uchar *rowid) + { + return (h->mrr_funcs.skip_record && + h->mrr_funcs.skip_record(h->mrr_iter, range_id, rowid)); + } +}; + + + +/* + A strategy that sorts index lookup keys before scanning the index +*/ + +class Mrr_ordered_index_reader : public Mrr_index_reader +{ +public: + int init(handler *h_arg, RANGE_SEQ_IF *seq_funcs, + void *seq_init_param, uint n_ranges, + uint mode, Buffer_manager *buf_manager_arg); + int get_next(char **range_info); + int refill_buffer(); + bool eof() { return index_scan_eof; } + uchar *get_rowid_ptr() { return h->ref; } + + bool skip_record(char *range_info, uchar *rowid) + { + return (mrr_funcs.skip_record && + mrr_funcs.skip_record(mrr_iter, range_info, rowid)); + } +private: + Key_value_records_iterator kv_it; + + bool scanning_key_val_iter; + + char *cur_range_info; + + /* Buffer to store (key, range_id) pairs */ + Lifo_buffer *key_buffer; + + Buffer_manager *buf_manager; + + /* Initially FALSE, becomes TRUE when we've set key_tuple_xxx members */ + bool know_key_tuple_params; + + // bool use_key_pointers; + + Key_parameters keypar; + /* TRUE <=> need range association, buffers hold {rowid, range_id} pairs */ + bool is_mrr_assoc; + + bool no_more_keys; + RANGE_SEQ_IF mrr_funcs; + range_seq_t mrr_iter; + + bool auto_refill; + + bool index_scan_eof; + + static int key_tuple_cmp(void* arg, uchar* key1, uchar* key2); + static int key_tuple_cmp_reverse(void* arg, uchar* key1, uchar* key2); + //void cleanup(); + + friend class Key_value_records_iterator; + friend class DsMrr_impl; + friend class Mrr_ordered_rndpos_reader; +}; + + +/* MRR strategy that fetches rowids */ + +class Mrr_ordered_rndpos_reader : public Mrr_strategy +{ +public: + int init(handler *h, Mrr_index_reader *index_reader, uint mode, + Lifo_buffer *buf); + int get_next(char **range_info); + int refill_buffer(); + void cleanup(); +private: + handler *h; + + DsMrr_impl *dsmrr; + /* This what we get (rowid, range_info) pairs from */ + Mrr_index_reader *index_reader; + uchar *index_rowid; + + /* TRUE <=> need range association, buffers hold {rowid, range_id} pairs */ + bool is_mrr_assoc; + + uchar *last_identical_rowid; + Lifo_buffer *rowid_buffer; + + /* = h->ref_length [ + sizeof(range_assoc_info) ] */ + //uint rowid_buff_elem_size; + + /* rowid_buffer.read() will set the following: */ + uchar *rowid; + uchar *rowids_range_id; +}; + +class Mrr_strategy_factory +{ +public: + Mrr_ordered_rndpos_reader ordered_rndpos_reader; + Mrr_ordered_index_reader ordered_index_reader; + Mrr_simple_index_reader simple_index_reader; +}; + +/* DS-MRR implementation for one table. Create/use one object of this class for each ha_{myisam/innobase/etc} object. That object will be further referred to as "the handler" @@ -154,9 +356,58 @@ public: get record by rowid and return the {record, range_id} pair 4. Repeat the above steps until we've exhausted the list of ranges we're scanning. + + Buffer space management considerations + -------------------------------------- + With regards to buffer/memory management, MRR interface specifies that + - SQL layer provides multi_range_read_init() with buffer of certain size. + - MRR implementation may use (i.e. have at its disposal till the end of + the MRR scan) all of the buffer, or return the unused end of the buffer + to SQL layer. + + DS-MRR needs buffer in order to accumulate and sort rowids and/or keys. When + we need to accumulate/sort only keys (or only rowids), it is fairly trivial. + + When we need to accumulate/sort both keys and rowids, efficient buffer use + gets complicated. We need to: + - First, accumulate keys and sort them + - Then use the keys (smaller values go first) to obtain rowids. A key is not + needed after we've got matching rowids for it. + - Make sure that rowids are accumulated at the front of the buffer, so that we + can return the end part of the buffer to SQL layer, should there be too + few rowid values to occupy the buffer. + + All of these goals are achieved by using the following scheme: + + | | We get an empty buffer from SQL layer. + + | *-| + | *----| First, we fill the buffer with keys. Key_buffer + | *-------| part grows from end of the buffer space to start + | *----------| (In this picture, the buffer is big enough to + | *-------------| accomodate all keys and even have some space left) + + | *=============| We want to do key-ordered index scan, so we sort + the keys + + |-x *===========| Then we use the keys get rowids. Rowids are + |----x *========| stored from start of buffer space towards the end. + |--------x *=====| The part of the buffer occupied with keys + |------------x *===| gradually frees up space for rowids. In this + |--------------x *=| picture we run out of keys before we've ran out + |----------------x | of buffer space (it can be other way as well). + + |================x | Then we sort the rowids. + + | |~~~| The unused part of the buffer is at the end, so + we can return it to the SQL layer. + + |================* Sorted rowids are then used to read table records + in disk order + */ -class DsMrr_impl +class DsMrr_impl : public Buffer_manager { public: typedef void (handler::*range_check_toggle_func_t)(bool on); @@ -181,6 +432,9 @@ public: void *seq_init_param, uint n_ranges, uint *bufsz, uint *flags, COST_VECT *cost); private: + /* Buffer to store (key, range_id) pairs */ + Lifo_buffer *key_buffer; + /* The "owner" handler object (the one that is expected to "own" this object and call its functions). @@ -197,20 +451,16 @@ private: /** Properties of current MRR scan **/ uint keyno; /* index we're running the scan on */ - bool use_default_impl; /* TRUE <=> shortcut all calls to default MRR impl */ /* TRUE <=> need range association, buffers hold {rowid, range_id} pairs */ bool is_mrr_assoc; /* TRUE <=> sort the keys before making index lookups */ - bool do_sort_keys; + //bool do_sort_keys; /* TRUE <=> sort rowids and use rnd_pos() to get and return full records */ - bool do_rndpos_scan; - - /* - (if do_sort_keys==TRUE) don't copy key values, use pointers to them - instead. - */ - bool use_key_pointers; + //bool do_rndpos_scan; + Mrr_strategy_factory strategy_factory; + Mrr_strategy *strategy; + Mrr_index_reader *index_strategy; /* The whole buffer space that we're using */ uchar *full_buf; @@ -226,12 +476,6 @@ private: /** Index scaning and key buffer-related members **/ - /* TRUE <=> We can get at most one index tuple for a lookup key */ - bool index_ranges_unique; - - /* TRUE<=> we're in a middle of enumerating records for a key range */ - //bool in_index_range; - /* One of the following two is used for key buffer: forward is used when we only need key buffer, backward is used when we need both key and rowid @@ -240,39 +484,10 @@ private: Forward_lifo_buffer forward_key_buf; Backward_lifo_buffer backward_key_buf; - /* Buffer to store (key, range_id) pairs */ - Lifo_buffer *key_buffer; - - /* Index scan state */ - bool scanning_key_val_iter; - /* - TRUE <=> we've got index tuples/rowids for all keys (need this flag because - we may have a situation where we've read everything from the key buffer but - haven't finished with getting index tuples for the last key) - */ - bool index_scan_eof; - Key_value_records_iterator kv_it; - - /* key_buffer.read() reads to here */ - uchar *cur_index_tuple; - - /* if in_index_range==TRUE: range_id of the range we're enumerating */ - char *cur_range_info; - - /* Initially FALSE, becomes TRUE when we've set key_tuple_xxx members */ - bool know_key_tuple_params; - uint key_tuple_length; /* Length of index lookup tuple, in bytes */ - key_part_map key_tuple_map; /* keyparts used in index lookup tuples */ - - /* - This is - = key_tuple_length if we copy keys to buffer - = sizeof(void*) if we're using pointers to materialized keys. - */ - uint key_size_in_keybuf; + Forward_lifo_buffer rowid_buffer; /* = key_size_in_keybuf [ + sizeof(range_assoc_info) ] */ - uint key_buff_elem_size; + //uint key_buff_elem_size_; /** rnd_pos() scan and rowid buffer-related members **/ @@ -280,36 +495,27 @@ private: Buffer to store (rowid, range_id) pairs, or just rowids if is_mrr_assoc==FALSE */ - Forward_lifo_buffer rowid_buffer; - - /* rowid_buffer.read() will set the following: */ - uchar *rowid; - uchar *rowids_range_id; - - uchar *last_identical_rowid; - - bool dsmrr_eof; /* TRUE <=> We have reached EOF when reading index tuples */ - - /* = h->ref_length [ + sizeof(range_assoc_info) ] */ - uint rowid_buff_elem_size; + //Forward_lifo_buffer rowid_buffer; bool choose_mrr_impl(uint keyno, ha_rows rows, uint *flags, uint *bufsz, COST_VECT *cost); bool get_disk_sweep_mrr_cost(uint keynr, ha_rows rows, uint flags, uint *buffer_size, COST_VECT *cost); bool check_cpk_scan(THD *thd, uint keyno, uint mrr_flags); - static int key_tuple_cmp(void* arg, uchar* key1, uchar* key2); - static int key_tuple_cmp_reverse(void* arg, uchar* key1, uchar* key2); - int dsmrr_fill_rowid_buffer(); - void dsmrr_fill_key_buffer(); - int dsmrr_next_from_index(char **range_info); - void setup_buffer_sizes(key_range *sample_key); void reallocate_buffer_space(); - static range_seq_t key_buf_seq_init(void *init_param, uint n_ranges, uint flags); - static uint key_buf_seq_next(range_seq_t rseq, KEY_MULTI_RANGE *range); + /* Buffer_manager implementation */ + void setup_buffer_sizes(uint key_size_in_keybuf, key_part_map key_tuple_map); + void reset_buffer_sizes(); + Lifo_buffer* get_key_buffer() { return key_buffer; } + friend class Key_value_records_iterator; + friend class Mrr_ordered_index_reader; + friend class Mrr_ordered_rndpos_reader; + + int setup_two_handlers(); + void close_second_handler(); }; /** |