summaryrefslogtreecommitdiff
path: root/sql/multi_range_read.h
diff options
context:
space:
mode:
authorSergey Petrunya <psergey@askmonty.org>2010-09-19 01:05:47 +0400
committerSergey Petrunya <psergey@askmonty.org>2010-09-19 01:05:47 +0400
commit18a348503a3124f3fcf73fc236948a10d4c3c29d (patch)
tree6750abbc5d872d763054d3b93510cc9c9bd093f7 /sql/multi_range_read.h
parent7b9df6aab2d5cfbb899b8d17860f3e3de95e5154 (diff)
downloadmariadb-git-18a348503a3124f3fcf73fc236948a10d4c3c29d.tar.gz
DS-MRR improvements: better comments, use symbolic name instead of +1/-1 constants.
Diffstat (limited to 'sql/multi_range_read.h')
-rw-r--r--sql/multi_range_read.h183
1 files changed, 119 insertions, 64 deletions
diff --git a/sql/multi_range_read.h b/sql/multi_range_read.h
index 9cd1503596f..21cc0a49d74 100644
--- a/sql/multi_range_read.h
+++ b/sql/multi_range_read.h
@@ -6,9 +6,14 @@
/**
A Disk-Sweep implementation of MRR Interface (DS-MRR for short)
- This is a "plugin"(*) for storage engines that allows make index scans
- read table rows in rowid order. For disk-based storage engines, this is
- faster than reading table rows in whatever-SQL-layer-makes-calls-in order.
+ This is a "plugin"(*) for storage engines that allows to
+ 1. When doing index scans, read table rows in rowid order;
+ 2. when making many index lookups, do them in key order and don't
+ lookup the same key value multiple times;
+ 3. Do both #1 and #2, when applicable.
+ These changes are expected to speed up query execution for disk-based
+ storage engines running io-bound loads and "big" queries (ie. queries that
+ do joins and enumerate lots of records).
(*) - only conceptually. No dynamic loading or binary compatibility of any
kind.
@@ -17,20 +22,25 @@
SQL Layer code
| | |
- -v---v---v---- handler->multi_range_read_XXX() function calls
+ v v v
+ -|---|---|---- handler->multi_range_read_XXX() function calls
| | |
- ____________________________________
- / DS-MRR module \
- | (scan indexes, order rowids, do |
- | full record reads in rowid order) |
- \____________________________________/
+ _____________________________________
+ / DS-MRR module \
+ | (order/de-duplicate lookup keys, |
+ | scan indexes in key order, |
+ | order/de-duplicate rowids, |
+ | retrieve full record reads in rowid |
+ | order) |
+ \_____________________________________/
| | |
-|---|---|----- handler->read_range_first()/read_range_next(),
| | | handler->index_read(), handler->rnd_pos() calls.
| | |
v v v
Storage engine internals
-
+
+
Currently DS-MRR is used by MyISAM, InnoDB/XtraDB and Maria storage engines.
Potentially it can be used with any table handler that has disk-based data
storage and has better performance when reading data in rowid order.
@@ -38,77 +48,104 @@
/*
- A simple memory buffer for reading and writing.
-
- when writing, there is no user-visible "current" position, although
- internally 'pos' points to just after the end of used area (or at the
- start of it for reverse buffer).
+ An in-memory buffer used by DS-MRR implementation.
+ - The buffer contains fixed-size elements. The elements are either atomic
+ byte sequences or pairs.
+ - The buffer resides in memory provided by the user. It is possible to
+ = dynamically (ie. between write operations) add ajacent memory space to
+ the buffer
+ = dynamically remove unused space from the buffer.
+ - Buffer can be set to be either "forward" or "backward".
+
+ The intent of the last two properties is to allow to have two buffers on
+ adjacent memory space, one is being read from (and so its space shrinks)
+ while the other is being written to (and so it needs more and more space).
+
+ Illustration of forward buffer operation:
+
+ +-- next read will read from here
+ |
+ | +-- next write will write to here
+ v v
+ *--------------*===============*----------------*
+ | ^ | ^ | |
+ | | read_pos | write_pos |
+ start | | end
+ | |
+ usused space user data
- When reading, there is current position pointing at start (for reverse
- buffer, end) of the element that will be read next.
- ^^ why end for reverse? it's more logical to point at start
*/
class SimpleBuffer
{
- uchar *start;
- uchar *end;
+public:
+
+ enum enum_direction {
+ BACKWARD=-1, /* buffer is filled/read from bigger to smaller memory addresses */
+ FORWARD=1 /* buffer is filled/read from smaller to bigger memory addresses */
+ };
+
+private:
+ enum_direction direction;
+
+ uchar *start; /* points to start of buffer space */
+ uchar *end; /* points to just beyond the end of buffer space */
+ /*
+ Forward buffer: points to the start of the data that will be read next
+ Backward buffer: points to just beyond the end of the data that will be
+ read next.
+ */
uchar *read_pos;
- uchar *write_pos;
-
/*
- 1 <=> buffer grows/is filled/is read from start to end
- -1 <=> everthing is done from end to start instead.
+ Forward buffer: points to just after the end of the used area.
+ Backward buffer: points to the start of used area.
+ */
+ uchar *write_pos;
+
+ /*
+ Data to be written. write() call will assume that (*write_ptr1) points to
+ write_size1 bytes of data to be written.
+ If write_ptr2!=NULL then the buffer stores pairs, and (*write_ptr2) points
+ to write_size2 bytes of data that form the second component.
*/
- int direction;
-
- /* Pointers to read data from */
uchar **write_ptr1;
size_t write_size1;
- /* Same as above, but may be NULL */
uchar **write_ptr2;
size_t write_size2;
- /* Pointers to write data to */
+ /*
+ read() will do reading by storing pointer to read data into *read_ptr1 (if
+ the buffer stores atomic elements), or into {*read_ptr1, *read_ptr2} (if
+ the buffer stores pairs).
+ */
+ //TODO if write_size1 == read_size1 why have two variables??
uchar **read_ptr1;
size_t read_size1;
- /* Same as above, but may be NULL */
uchar **read_ptr2;
size_t read_size2;
- bool have_space_for(size_t bytes);
- uchar *used_area() { return (direction == 1)? read_pos : write_pos; }
- size_t used_size();
-
- void write(const uchar *data, size_t bytes);
- uchar *read(size_t bytes);
-
public:
- /* Set up writing*/
+ /* Write-mode functions */
void setup_writing(uchar **data1, size_t len1,
uchar **data2, size_t len2);
-
- void sort(qsort2_cmp cmp_func, void *cmp_func_arg);
-
- /* Write-mode functions */
void reset_for_writing();
- void write();
bool can_write();
-
- bool is_empty() { return used_size() == 0; }
+ void write();
/* Read-mode functions */
+ bool is_empty() { return used_size() == 0; }
void reset_for_reading();
- // todo: join with setup-writing? (but what for?)
void setup_reading(uchar **data1, size_t len1,
uchar **data2, size_t len2);
bool read();
- bool have_data(size_t bytes);
+ /* Misc functions */
+ void sort(qsort2_cmp cmp_func, void *cmp_func_arg);
+ bool is_reverse() { return direction == BACKWARD; }
uchar *end_of_space();
- /* Control functions */
- void set_buffer_space(uchar *start_arg, uchar *end_arg, int direction_arg)
+ /* Buffer space control functions */
+ void set_buffer_space(uchar *start_arg, uchar *end_arg, enum_direction direction_arg)
{
start= start_arg;
end= end_arg;
@@ -116,10 +153,10 @@ public:
TRASH(start, end - start);
reset_for_writing();
}
-
+
/*
- Stop/return the unneded space (the one that we have wrote to and have read
- from.
+ Stop using/return the unneded space (the one that we have already wrote
+ to read from).
*/
void remove_unused_space(uchar **unused_start, uchar **unused_end)
{
@@ -142,9 +179,8 @@ public:
uchar *tmp= read_pos;
read_pos= write_pos;
write_pos= tmp;
- direction= -direction;
+ direction= (direction == FORWARD)? BACKWARD: FORWARD;
}
- bool is_reverse() { return direction == -1; }
void grow(uchar *unused_start, uchar *unused_end)
{
@@ -173,10 +209,13 @@ public:
*/
class PeekIterator
{
- // if direction==1 : pointer to what to return next
- // if direction==-1: pointer to the end of what is to be returned next
+ /*
+ if sb->direction==1 : pointer to what to return next
+ if sb->direction==-1: pointer to the end of what is to be returned next
+ */
uchar *pos;
SimpleBuffer *sb;
+
public:
void init(SimpleBuffer *sb_arg)
{
@@ -190,8 +229,10 @@ public:
*/
bool read_next()
{
- // Always read the first component first? (because we do inverted-writes
- // if needed, so no measures need to be taken here).
+ /*
+ Always read the first component first (if the buffer is backwards, we
+ have written the second component first).
+ */
uchar *res;
if ((res= get_next(sb->read_size1)))
{
@@ -223,6 +264,16 @@ public:
}
}
};
+
+private:
+ bool have_space_for(size_t bytes);
+ /* Return pointer to start of the memory area that is occupied by the data */
+ uchar *used_area() { return (direction == FORWARD)? read_pos : write_pos; }
+ size_t used_size();
+
+ void write(const uchar *data, size_t bytes);
+ uchar *read(size_t bytes);
+ bool have_data(size_t bytes);
};
@@ -231,11 +282,11 @@ public:
each ha_{myisam/innobase/etc} object. That object will be further referred to
as "the handler"
- There are actually three strategies
- S1. Bypass DS-MRR, pass all calls to default implementation (i.e. to
+ DsMrr_impl has the following execution strategies:
+ S1. Bypass DS-MRR, pass all calls to default MRR implementation (i.e. to
MRR-to-non-MRR calls converter)
- S2. Regular DS-MRR
- S3. DS-MRR/CPK for doing scans on clustered primary keys.
+ S2. Sort Keys
+ S3. Sort Rowids
S1 is used for cases which DS-MRR is unable to handle for some reason.
@@ -294,9 +345,13 @@ private:
handler *h;
TABLE *table; /* Always equal to h->table */
- /* Secondary handler object. It is used for scanning the index */
+ /*
+ Secondary handler object, if needed (we need it when we need to both scan
+ the index and return rows).
+ */
handler *h2;
-
+
+ /* Full buffer that we're using (the buffer is obtained from SQL layer) */
uchar *full_buf;
uchar *full_buf_end;