summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--storage/innodb_plugin/ChangeLog7
-rw-r--r--storage/innodb_plugin/buf/buf0buf.c6
-rw-r--r--storage/innodb_plugin/buf/buf0rea.c170
-rw-r--r--storage/innodb_plugin/handler/ha_innodb.cc8
-rw-r--r--storage/innodb_plugin/include/buf0buf.h14
-rw-r--r--storage/innodb_plugin/include/buf0buf.ic28
-rw-r--r--storage/innodb_plugin/include/srv0srv.h2
-rw-r--r--storage/innodb_plugin/srv/srv0srv.c4
8 files changed, 232 insertions, 7 deletions
diff --git a/storage/innodb_plugin/ChangeLog b/storage/innodb_plugin/ChangeLog
index 853ae782a44..617df0a61fe 100644
--- a/storage/innodb_plugin/ChangeLog
+++ b/storage/innodb_plugin/ChangeLog
@@ -1,3 +1,10 @@
+2011-07-19 The InnoDB Team
+
+ * buf/buf0buf.c, buf/buf0rea.c, handler/ha_innodb.cc,
+ include/buf0buf.h, include/buf0buf.ic, include/srv0srv.h,
+ srv/srv0srv.c:
+ Fix bug#Bug 12356373 by reintroducing random readahead
+
2011-06-30 The InnoDB Team
* row/row0row.c:
diff --git a/storage/innodb_plugin/buf/buf0buf.c b/storage/innodb_plugin/buf/buf0buf.c
index c0da7dbdbe5..cd1461d22b7 100644
--- a/storage/innodb_plugin/buf/buf0buf.c
+++ b/storage/innodb_plugin/buf/buf0buf.c
@@ -3590,12 +3590,16 @@ buf_print_io(
/* Statistics about read ahead algorithm */
fprintf(file, "Pages read ahead %.2f/s,"
- " evicted without access %.2f/s\n",
+ " evicted without access %.2f/s,"
+ " Random read ahead %.2f/s\n",
(buf_pool->stat.n_ra_pages_read
- buf_pool->old_stat.n_ra_pages_read)
/ time_elapsed,
(buf_pool->stat.n_ra_pages_evicted
- buf_pool->old_stat.n_ra_pages_evicted)
+ / time_elapsed,
+ (buf_pool->stat.n_ra_pages_read_rnd
+ - buf_pool->old_stat.n_ra_pages_read_rnd)
/ time_elapsed);
/* Print some values to help us with visualizing what is
diff --git a/storage/innodb_plugin/buf/buf0rea.c b/storage/innodb_plugin/buf/buf0rea.c
index 81f788baac2..0b36eeb5cd3 100644
--- a/storage/innodb_plugin/buf/buf0rea.c
+++ b/storage/innodb_plugin/buf/buf0rea.c
@@ -38,6 +38,14 @@ Created 11/5/1995 Heikki Tuuri
#include "srv0start.h"
#include "srv0srv.h"
+/** The size in blocks of the area where the random read-ahead algorithm counts
+the accessed pages when deciding whether to read-ahead */
+#define BUF_READ_AHEAD_RANDOM_AREA BUF_READ_AHEAD_AREA
+
+/** There must be at least this many pages in buf_pool in the area to start
+a random read-ahead */
+#define BUF_READ_AHEAD_RANDOM_THRESHOLD (5 + BUF_READ_AHEAD_RANDOM_AREA / 8)
+
/** The linear read-ahead area size */
#define BUF_READ_AHEAD_LINEAR_AREA BUF_READ_AHEAD_AREA
@@ -158,6 +166,165 @@ buf_read_page_low(
}
/********************************************************************//**
+Applies a random read-ahead in buf_pool if there are at least a threshold
+value of accessed pages from the random read-ahead area. Does not read any
+page, not even the one at the position (space, offset), if the read-ahead
+mechanism is not activated. NOTE 1: the calling thread may own latches on
+pages: to avoid deadlocks this function must be written such that it cannot
+end up waiting for these latches! NOTE 2: the calling thread must want
+access to the page given: this rule is set to prevent unintended read-aheads
+performed by ibuf routines, a situation which could result in a deadlock if
+the OS does not support asynchronous i/o.
+@return number of page read requests issued; NOTE that if we read ibuf
+pages, it may happen that the page at the given page number does not
+get read even if we return a positive value! */
+static
+ulint
+buf_read_ahead_random(
+/*==================*/
+ ulint space, /*!< in: space id */
+ ulint zip_size,/*!< in: compressed page size in bytes, or 0 */
+ ulint offset) /*!< in: page number of a page which the current thread
+ wants to access */
+{
+ ib_int64_t tablespace_version;
+ ulint recent_blocks = 0;
+ ulint count;
+ ulint ibuf_mode;
+ ulint low, high;
+ ulint err;
+ ulint i;
+ ulint buf_read_ahead_random_area;
+
+ if (!srv_random_read_ahead) {
+ /* Disabled by user */
+ return(0);
+ }
+
+ if (srv_startup_is_before_trx_rollback_phase) {
+ /* No read-ahead to avoid thread deadlocks */
+ return(0);
+ }
+
+ if (ibuf_bitmap_page(zip_size, offset)
+ || trx_sys_hdr_page(space, offset)) {
+
+ /* If it is an ibuf bitmap page or trx sys hdr, we do
+ no read-ahead, as that could break the ibuf page access
+ order */
+
+ return(0);
+ }
+
+ /* Remember the tablespace version before we ask the tablespace size
+ below: if DISCARD + IMPORT changes the actual .ibd file meanwhile, we
+ do not try to read outside the bounds of the tablespace! */
+
+ tablespace_version = fil_space_get_version(space);
+
+ buf_read_ahead_random_area = BUF_READ_AHEAD_RANDOM_AREA;
+
+ low = (offset / buf_read_ahead_random_area)
+ * buf_read_ahead_random_area;
+ high = (offset / buf_read_ahead_random_area + 1)
+ * buf_read_ahead_random_area;
+ if (high > fil_space_get_size(space)) {
+
+ high = fil_space_get_size(space);
+ }
+
+ buf_pool_mutex_enter();
+
+ if (buf_pool->n_pend_reads
+ > buf_pool->curr_size / BUF_READ_AHEAD_PEND_LIMIT) {
+ buf_pool_mutex_exit();
+
+ return(0);
+ }
+
+ /* Count how many blocks in the area have been recently accessed,
+ that is, reside near the start of the LRU list. */
+
+ for (i = low; i < high; i++) {
+ const buf_page_t* bpage = buf_page_hash_get(space, i);
+
+ if (bpage
+ && buf_page_is_accessed(bpage)
+ && buf_page_peek_if_young(bpage)) {
+
+ recent_blocks++;
+
+ if (recent_blocks >= BUF_READ_AHEAD_RANDOM_THRESHOLD) {
+
+ buf_pool_mutex_exit();
+ goto read_ahead;
+ }
+ }
+ }
+
+ buf_pool_mutex_exit();
+ /* Do nothing */
+ return(0);
+
+read_ahead:
+ /* Read all the suitable blocks within the area */
+
+ if (ibuf_inside()) {
+ ibuf_mode = BUF_READ_IBUF_PAGES_ONLY;
+ } else {
+ ibuf_mode = BUF_READ_ANY_PAGE;
+ }
+
+ count = 0;
+
+ for (i = low; i < high; i++) {
+ /* It is only sensible to do read-ahead in the non-sync aio
+ mode: hence FALSE as the first parameter */
+
+ if (!ibuf_bitmap_page(zip_size, i)) {
+ count += buf_read_page_low(
+ &err, FALSE,
+ ibuf_mode | OS_AIO_SIMULATED_WAKE_LATER,
+ space, zip_size, FALSE,
+ tablespace_version, i);
+ if (err == DB_TABLESPACE_DELETED) {
+ ut_print_timestamp(stderr);
+ fprintf(stderr,
+ " InnoDB: Warning: in random"
+ " readahead trying to access\n"
+ "InnoDB: tablespace %lu page %lu,\n"
+ "InnoDB: but the tablespace does not"
+ " exist or is just being dropped.\n",
+ (ulong) space, (ulong) i);
+ }
+ }
+ }
+
+ /* In simulated aio we wake the aio handler threads only after
+ queuing all aio requests, in native aio the following call does
+ nothing: */
+
+ os_aio_simulated_wake_handler_threads();
+
+#ifdef UNIV_DEBUG
+ if (buf_debug_prints && (count > 0)) {
+ fprintf(stderr,
+ "Random read-ahead space %lu offset %lu pages %lu\n",
+ (ulong) space, (ulong) offset,
+ (ulong) count);
+ }
+#endif /* UNIV_DEBUG */
+
+ /* Read ahead is considered one I/O operation for the purpose of
+ LRU policy decision. */
+ buf_LRU_stat_inc_io();
+
+ buf_pool->stat.n_ra_pages_read_rnd += count;
+ return(count);
+}
+
+
+/********************************************************************//**
High-level function which reads a page asynchronously from a file to the
buffer buf_pool if it is not already there. Sets the io_fix flag and sets
an exclusive lock on the buffer frame. The flag is cleared and the x-lock
@@ -175,6 +342,9 @@ buf_read_page(
ulint count;
ulint err;
+ count = buf_read_ahead_random(space, zip_size, offset);
+ srv_buf_pool_reads += count;
+
tablespace_version = fil_space_get_version(space);
/* We do the i/o in the synchronous aio mode to save thread
diff --git a/storage/innodb_plugin/handler/ha_innodb.cc b/storage/innodb_plugin/handler/ha_innodb.cc
index a9c9f379528..6f88ea3fde1 100644
--- a/storage/innodb_plugin/handler/ha_innodb.cc
+++ b/storage/innodb_plugin/handler/ha_innodb.cc
@@ -501,6 +501,8 @@ static SHOW_VAR innodb_status_variables[]= {
(char*) &export_vars.innodb_buffer_pool_pages_misc, SHOW_LONG},
{"buffer_pool_pages_total",
(char*) &export_vars.innodb_buffer_pool_pages_total, SHOW_LONG},
+ {"buffer_pool_read_ahead_rnd",
+ (char*) &export_vars.innodb_buffer_pool_read_ahead_rnd, SHOW_LONG},
{"buffer_pool_read_ahead",
(char*) &export_vars.innodb_buffer_pool_read_ahead, SHOW_LONG},
{"buffer_pool_read_ahead_evicted",
@@ -11027,6 +11029,11 @@ static MYSQL_SYSVAR_UINT(change_buffering_debug, ibuf_debug,
NULL, NULL, 0, 0, 1, 0);
#endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */
+static MYSQL_SYSVAR_BOOL(random_read_ahead, srv_random_read_ahead,
+ PLUGIN_VAR_NOCMDARG,
+ "Whether to use read ahead for random access within an extent.",
+ NULL, NULL, FALSE);
+
static MYSQL_SYSVAR_ULONG(read_ahead_threshold, srv_read_ahead_threshold,
PLUGIN_VAR_RQCMDARG,
"Number of pages that must be accessed sequentially for InnoDB to "
@@ -11091,6 +11098,7 @@ static struct st_mysql_sys_var* innobase_system_variables[]= {
#if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG
MYSQL_SYSVAR(change_buffering_debug),
#endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */
+ MYSQL_SYSVAR(random_read_ahead),
MYSQL_SYSVAR(read_ahead_threshold),
MYSQL_SYSVAR(io_capacity),
NULL
diff --git a/storage/innodb_plugin/include/buf0buf.h b/storage/innodb_plugin/include/buf0buf.h
index 86c47a6edba..9856bfce409 100644
--- a/storage/innodb_plugin/include/buf0buf.h
+++ b/storage/innodb_plugin/include/buf0buf.h
@@ -427,6 +427,18 @@ buf_block_get_freed_page_clock(
__attribute__((pure));
/********************************************************************//**
+Tells if a block is still close enough to the MRU end of the LRU list
+meaning that it is not in danger of getting evicted and also implying
+that it has been accessed recently.
+Note that this is for heuristics only and does not reserve buffer pool
+mutex.
+@return TRUE if block is close to MRU end of LRU */
+UNIV_INLINE
+ibool
+buf_page_peek_if_young(
+/*===================*/
+ const buf_page_t* bpage); /*!< in: block */
+/********************************************************************//**
Recommends a move of a block to the start of the LRU list if there is danger
of dropping from the buffer pool. NOTE: does not reserve the buffer pool
mutex.
@@ -1334,6 +1346,8 @@ struct buf_pool_stat_struct{
ulint n_pages_written;/*!< number write operations */
ulint n_pages_created;/*!< number of pages created
in the pool with no read */
+ ulint n_ra_pages_read_rnd;/*!< number of pages read in
+ as part of random read ahead */
ulint n_ra_pages_read;/*!< number of pages read in
as part of read ahead */
ulint n_ra_pages_evicted;/*!< number of read ahead
diff --git a/storage/innodb_plugin/include/buf0buf.ic b/storage/innodb_plugin/include/buf0buf.ic
index 35c63f034f5..2f3d65e80bd 100644
--- a/storage/innodb_plugin/include/buf0buf.ic
+++ b/storage/innodb_plugin/include/buf0buf.ic
@@ -62,6 +62,27 @@ buf_block_get_freed_page_clock(
}
/********************************************************************//**
+Tells if a block is still close enough to the MRU end of the LRU list
+meaning that it is not in danger of getting evicted and also implying
+that it has been accessed recently.
+Note that this is for heuristics only and does not reserve buffer pool
+mutex.
+@return TRUE if block is close to MRU end of LRU */
+UNIV_INLINE
+ibool
+buf_page_peek_if_young(
+/*===================*/
+ const buf_page_t* bpage) /*!< in: block */
+{
+ /* FIXME: bpage->freed_page_clock is 31 bits */
+ return((buf_pool->freed_page_clock & ((1UL << 31) - 1))
+ < ((ulint) bpage->freed_page_clock
+ + (buf_pool->curr_size
+ * (BUF_LRU_OLD_RATIO_DIV - buf_LRU_old_ratio)
+ / (BUF_LRU_OLD_RATIO_DIV * 4))));
+}
+
+/********************************************************************//**
Recommends a move of a block to the start of the LRU list if there is danger
of dropping from the buffer pool. NOTE: does not reserve the buffer pool
mutex.
@@ -89,12 +110,7 @@ buf_page_peek_if_too_old(
buf_pool->stat.n_pages_not_made_young++;
return(FALSE);
} else {
- /* FIXME: bpage->freed_page_clock is 31 bits */
- return((buf_pool->freed_page_clock & ((1UL << 31) - 1))
- > ((ulint) bpage->freed_page_clock
- + (buf_pool->curr_size
- * (BUF_LRU_OLD_RATIO_DIV - buf_LRU_old_ratio)
- / (BUF_LRU_OLD_RATIO_DIV * 4))));
+ return(!buf_page_peek_if_young(bpage));
}
}
diff --git a/storage/innodb_plugin/include/srv0srv.h b/storage/innodb_plugin/include/srv0srv.h
index 91ae895040c..7c63a5f0d45 100644
--- a/storage/innodb_plugin/include/srv0srv.h
+++ b/storage/innodb_plugin/include/srv0srv.h
@@ -143,6 +143,7 @@ extern ulint srv_mem_pool_size;
extern ulint srv_lock_table_size;
extern ulint srv_n_file_io_threads;
+extern my_bool srv_random_read_ahead;
extern ulong srv_read_ahead_threshold;
extern ulint srv_n_read_io_threads;
extern ulint srv_n_write_io_threads;
@@ -618,6 +619,7 @@ struct export_var_struct{
ulint innodb_buffer_pool_wait_free; /*!< srv_buf_pool_wait_free */
ulint innodb_buffer_pool_pages_flushed; /*!< srv_buf_pool_flushed */
ulint innodb_buffer_pool_write_requests;/*!< srv_buf_pool_write_requests */
+ ulint innodb_buffer_pool_read_ahead_rnd;/*!< srv_read_ahead_rnd */
ulint innodb_buffer_pool_read_ahead; /*!< srv_read_ahead */
ulint innodb_buffer_pool_read_ahead_evicted;/*!< srv_read_ahead evicted*/
ulint innodb_dblwr_pages_written; /*!< srv_dblwr_pages_written */
diff --git a/storage/innodb_plugin/srv/srv0srv.c b/storage/innodb_plugin/srv/srv0srv.c
index b1fc1ac67fd..8ad4c02e322 100644
--- a/storage/innodb_plugin/srv/srv0srv.c
+++ b/storage/innodb_plugin/srv/srv0srv.c
@@ -203,6 +203,8 @@ UNIV_INTERN ulint srv_n_file_io_threads = ULINT_MAX;
UNIV_INTERN ulint srv_n_read_io_threads = ULINT_MAX;
UNIV_INTERN ulint srv_n_write_io_threads = ULINT_MAX;
+/* Switch to enable random read ahead. */
+UNIV_INTERN my_bool srv_random_read_ahead = FALSE;
/* User settable value of the number of pages that must be present
in the buffer cache and accessed sequentially for InnoDB to trigger a
readahead request. */
@@ -1906,6 +1908,8 @@ srv_export_innodb_status(void)
export_vars.innodb_buffer_pool_wait_free = srv_buf_pool_wait_free;
export_vars.innodb_buffer_pool_pages_flushed = srv_buf_pool_flushed;
export_vars.innodb_buffer_pool_reads = srv_buf_pool_reads;
+ export_vars.innodb_buffer_pool_read_ahead_rnd
+ = buf_pool->stat.n_ra_pages_read_rnd;
export_vars.innodb_buffer_pool_read_ahead
= buf_pool->stat.n_ra_pages_read;
export_vars.innodb_buffer_pool_read_ahead_evicted