summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--mysql-test/suite/innodb/r/innodb_bug44571.result9
-rw-r--r--mysql-test/suite/innodb/t/innodb_bug44571.test18
-rw-r--r--storage/innodb_plugin/ChangeLog34
-rw-r--r--storage/innodb_plugin/btr/btr0sea.c2
-rw-r--r--storage/innodb_plugin/buf/buf0buf.c220
-rw-r--r--storage/innodb_plugin/buf/buf0lru.c227
-rw-r--r--storage/innodb_plugin/buf/buf0rea.c207
-rw-r--r--storage/innodb_plugin/dict/dict0dict.c23
-rw-r--r--storage/innodb_plugin/handler/ha_innodb.cc51
-rw-r--r--storage/innodb_plugin/include/buf0buf.h115
-rw-r--r--storage/innodb_plugin/include/buf0buf.ic70
-rw-r--r--storage/innodb_plugin/include/buf0lru.h52
-rw-r--r--storage/innodb_plugin/include/buf0rea.h8
-rw-r--r--storage/innodb_plugin/include/buf0types.h2
-rw-r--r--storage/innodb_plugin/include/dict0dict.h2
-rw-r--r--storage/innodb_plugin/include/os0sync.h4
-rw-r--r--storage/innodb_plugin/include/srv0srv.h16
-rw-r--r--storage/innodb_plugin/include/univ.i4
-rw-r--r--storage/innodb_plugin/include/ut0ut.h9
-rw-r--r--storage/innodb_plugin/plug.in51
-rw-r--r--storage/innodb_plugin/row/row0merge.c208
-rw-r--r--storage/innodb_plugin/srv/srv0srv.c53
-rw-r--r--storage/innodb_plugin/srv/srv0start.c6
-rw-r--r--storage/innodb_plugin/ut/ut0ut.c17
24 files changed, 810 insertions, 598 deletions
diff --git a/mysql-test/suite/innodb/r/innodb_bug44571.result b/mysql-test/suite/innodb/r/innodb_bug44571.result
new file mode 100644
index 00000000000..36374edcb3e
--- /dev/null
+++ b/mysql-test/suite/innodb/r/innodb_bug44571.result
@@ -0,0 +1,9 @@
+CREATE TABLE bug44571 (foo INT) ENGINE=InnoDB;
+ALTER TABLE bug44571 CHANGE foo bar INT;
+ALTER TABLE bug44571 ADD INDEX bug44571b (foo);
+ERROR 42000: Key column 'foo' doesn't exist in table
+ALTER TABLE bug44571 ADD INDEX bug44571b (bar);
+ERROR HY000: Incorrect key file for table 'bug44571'; try to repair it
+CREATE INDEX bug44571b ON bug44571 (bar);
+ERROR HY000: Incorrect key file for table 'bug44571'; try to repair it
+DROP TABLE bug44571;
diff --git a/mysql-test/suite/innodb/t/innodb_bug44571.test b/mysql-test/suite/innodb/t/innodb_bug44571.test
new file mode 100644
index 00000000000..43f290cde84
--- /dev/null
+++ b/mysql-test/suite/innodb/t/innodb_bug44571.test
@@ -0,0 +1,18 @@
+#
+# Bug#44571 InnoDB Plugin crashes on ADD INDEX
+# http://bugs.mysql.com/44571
+#
+-- source include/have_innodb.inc
+-- source suite/innodb/include/have_innodb_plugin.inc
+
+CREATE TABLE bug44571 (foo INT) ENGINE=InnoDB;
+ALTER TABLE bug44571 CHANGE foo bar INT;
+-- error ER_KEY_COLUMN_DOES_NOT_EXITS
+ALTER TABLE bug44571 ADD INDEX bug44571b (foo);
+# The following will fail, because the CHANGE foo bar was
+# not communicated to InnoDB.
+--error ER_NOT_KEYFILE
+ALTER TABLE bug44571 ADD INDEX bug44571b (bar);
+--error ER_NOT_KEYFILE
+CREATE INDEX bug44571b ON bug44571 (bar);
+DROP TABLE bug44571;
diff --git a/storage/innodb_plugin/ChangeLog b/storage/innodb_plugin/ChangeLog
index b488b21c768..df71a1c25a5 100644
--- a/storage/innodb_plugin/ChangeLog
+++ b/storage/innodb_plugin/ChangeLog
@@ -1,3 +1,37 @@
+2009-08-27 The InnoDB Team
+
+ * dict/dict0dict.c, include/dict0dict.h,
+ mysql-test/innodb_bug44571.result, mysql-test/innodb_bug44571.test:
+ Fix Bug#44571 InnoDB Plugin crashes on ADD INDEX
+
+2009-08-27 The InnoDB Team
+
+ * row/row0merge.c:
+ Fix a bug in the merge sort that can corrupt indexes in fast index
+ creation. Add some consistency checks. Check that the number of
+ records remains constant in every merge sort pass.
+
+2009-08-27 The InnoDB Team
+
+ * buf/buf0buf.c, buf/buf0lru.c, buf/buf0rea.c,
+ handler/ha_innodb.cc, include/buf0buf.h, include/buf0buf.ic,
+ include/buf0lru.h, include/ut0ut.h, ut/ut0ut.c:
+ Make it possible to tune the buffer pool LRU eviction policy to be
+ more resistant against index scans. Introduce the settable global
+ variables innodb_old_blocks_pct and innodb_old_blocks_time for
+ controlling the buffer pool eviction policy. The parameter
+ innodb_old_blocks_pct (5..95) controls the desired amount of "old"
+ blocks in the LRU list. The default is 37, corresponding to the
+ old fixed ratio of 3/8. Each time a block is accessed, it will be
+ moved to the "new" blocks if its first access was at least
+ innodb_old_blocks_time milliseconds ago (default 0, meaning every
+ block). The idea is that in index scans, blocks will be accessed
+ a few times within innodb_old_blocks_time, and they will remain in
+ the "old" section of the LRU list. Thus, when
+ innodb_old_blocks_time is nonzero, blocks retrieved for one-time
+ index scans will be more likely candidates for eviction than
+ blocks that are accessed in random patterns.
+
2009-08-26 The InnoDB Team
* handler/ha_innodb.cc, os/os0file.c:
diff --git a/storage/innodb_plugin/btr/btr0sea.c b/storage/innodb_plugin/btr/btr0sea.c
index faa1c13897e..0a80c61a58d 100644
--- a/storage/innodb_plugin/btr/btr0sea.c
+++ b/storage/innodb_plugin/btr/btr0sea.c
@@ -957,7 +957,7 @@ btr_search_guess_on_hash(
/* Increment the page get statistics though we did not really
fix the page: for user info only */
- buf_pool->n_page_gets++;
+ buf_pool->stat.n_page_gets++;
return(TRUE);
diff --git a/storage/innodb_plugin/buf/buf0buf.c b/storage/innodb_plugin/buf/buf0buf.c
index 0008fcb1271..6af3dff460a 100644
--- a/storage/innodb_plugin/buf/buf0buf.c
+++ b/storage/innodb_plugin/buf/buf0buf.c
@@ -837,16 +837,35 @@ buf_chunk_not_freed(
block = chunk->blocks;
for (i = chunk->size; i--; block++) {
- mutex_enter(&block->mutex);
-
- if (buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE
- && !buf_flush_ready_for_replace(&block->page)) {
+ ibool ready;
+ switch (buf_block_get_state(block)) {
+ case BUF_BLOCK_ZIP_FREE:
+ case BUF_BLOCK_ZIP_PAGE:
+ case BUF_BLOCK_ZIP_DIRTY:
+ /* The uncompressed buffer pool should never
+ contain compressed block descriptors. */
+ ut_error;
+ break;
+ case BUF_BLOCK_NOT_USED:
+ case BUF_BLOCK_READY_FOR_USE:
+ case BUF_BLOCK_MEMORY:
+ case BUF_BLOCK_REMOVE_HASH:
+ /* Skip blocks that are not being used for
+ file pages. */
+ break;
+ case BUF_BLOCK_FILE_PAGE:
+ mutex_enter(&block->mutex);
+ ready = buf_flush_ready_for_replace(&block->page);
mutex_exit(&block->mutex);
- return(block);
- }
- mutex_exit(&block->mutex);
+ if (!ready) {
+
+ return(block);
+ }
+
+ break;
+ }
}
return(NULL);
@@ -966,8 +985,6 @@ buf_pool_init(void)
buf_pool->no_flush[i] = os_event_create(NULL);
}
- buf_pool->ulint_clock = 1;
-
/* 3. Initialize LRU fields
--------------------------- */
/* All fields are initialized by mem_zalloc(). */
@@ -1471,31 +1488,6 @@ buf_pool_resize(void)
}
/********************************************************************//**
-Moves the block to the start of the LRU list if there is a danger
-that the block would drift out of the buffer pool. */
-UNIV_INLINE
-void
-buf_block_make_young(
-/*=================*/
- buf_page_t* bpage) /*!< in: block to make younger */
-{
- ut_ad(!buf_pool_mutex_own());
-
- /* Note that we read freed_page_clock's without holding any mutex:
- this is allowed since the result is used only in heuristics */
-
- if (buf_page_peek_if_too_old(bpage)) {
-
- buf_pool_mutex_enter();
- /* There has been freeing activity in the LRU list:
- best to move to the head of the LRU list */
-
- buf_LRU_make_block_young(bpage);
- buf_pool_mutex_exit();
- }
-}
-
-/********************************************************************//**
Moves a page to the start of the buffer pool LRU list. This high-level
function can be used to prevent an important page from from slipping out of
the buffer pool. */
@@ -1649,7 +1641,7 @@ buf_page_get_zip(
#ifndef UNIV_LOG_DEBUG
ut_ad(!ibuf_inside());
#endif
- buf_pool->n_page_gets++;
+ buf_pool->stat.n_page_gets++;
for (;;) {
buf_pool_mutex_enter();
@@ -1713,13 +1705,15 @@ err_exit:
got_block:
must_read = buf_page_get_io_fix(bpage) == BUF_IO_READ;
- buf_pool_mutex_exit();
+ if (buf_page_peek_if_too_old(bpage)) {
+ buf_LRU_make_block_young(bpage);
+ }
- buf_page_set_accessed(bpage, TRUE);
+ buf_page_set_accessed(bpage);
- mutex_exit(block_mutex);
+ buf_pool_mutex_exit();
- buf_block_make_young(bpage);
+ mutex_exit(block_mutex);
#ifdef UNIV_DEBUG_FILE_ACCESSES
ut_a(!bpage->file_page_was_freed);
@@ -2000,7 +1994,7 @@ buf_page_get_gen(
mtr_t* mtr) /*!< in: mini-transaction */
{
buf_block_t* block;
- ibool accessed;
+ unsigned access_time;
ulint fix_type;
ibool must_read;
@@ -2016,7 +2010,7 @@ buf_page_get_gen(
#ifndef UNIV_LOG_DEBUG
ut_ad(!ibuf_inside() || ibuf_page(space, zip_size, offset, NULL));
#endif
- buf_pool->n_page_gets++;
+ buf_pool->stat.n_page_gets++;
loop:
block = guess;
buf_pool_mutex_enter();
@@ -2243,17 +2237,20 @@ wait_until_unfixed:
UNIV_MEM_ASSERT_RW(&block->page, sizeof block->page);
buf_block_buf_fix_inc(block, file, line);
- buf_pool_mutex_exit();
+
+ mutex_exit(&block->mutex);
/* Check if this is the first access to the page */
- accessed = buf_page_is_accessed(&block->page);
+ access_time = buf_page_is_accessed(&block->page);
- buf_page_set_accessed(&block->page, TRUE);
+ if (buf_page_peek_if_too_old(&block->page)) {
+ buf_LRU_make_block_young(&block->page);
+ }
- mutex_exit(&block->mutex);
+ buf_page_set_accessed(&block->page);
- buf_block_make_young(&block->page);
+ buf_pool_mutex_exit();
#ifdef UNIV_DEBUG_FILE_ACCESSES
ut_a(!block->page.file_page_was_freed);
@@ -2306,7 +2303,7 @@ wait_until_unfixed:
mtr_memo_push(mtr, block, fix_type);
- if (!accessed) {
+ if (!access_time) {
/* In the case of a first access, try to apply linear
read-ahead */
@@ -2336,7 +2333,7 @@ buf_page_optimistic_get_func(
ulint line, /*!< in: line where called */
mtr_t* mtr) /*!< in: mini-transaction */
{
- ibool accessed;
+ unsigned access_time;
ibool success;
ulint fix_type;
@@ -2353,14 +2350,21 @@ buf_page_optimistic_get_func(
}
buf_block_buf_fix_inc(block, file, line);
- accessed = buf_page_is_accessed(&block->page);
- buf_page_set_accessed(&block->page, TRUE);
mutex_exit(&block->mutex);
- buf_block_make_young(&block->page);
+ buf_pool_mutex_enter();
/* Check if this is the first access to the page */
+ access_time = buf_page_is_accessed(&block->page);
+
+ if (buf_page_peek_if_too_old(&block->page)) {
+ buf_LRU_make_block_young(&block->page);
+ }
+
+ buf_page_set_accessed(&block->page);
+
+ buf_pool_mutex_exit();
ut_ad(!ibuf_inside()
|| ibuf_page(buf_block_get_space(block),
@@ -2412,7 +2416,7 @@ buf_page_optimistic_get_func(
#ifdef UNIV_DEBUG_FILE_ACCESSES
ut_a(block->page.file_page_was_freed == FALSE);
#endif
- if (UNIV_UNLIKELY(!accessed)) {
+ if (UNIV_UNLIKELY(!access_time)) {
/* In the case of a first access, try to apply linear
read-ahead */
@@ -2425,7 +2429,7 @@ buf_page_optimistic_get_func(
ut_a(ibuf_count_get(buf_block_get_space(block),
buf_block_get_page_no(block)) == 0);
#endif
- buf_pool->n_page_gets++;
+ buf_pool->stat.n_page_gets++;
return(TRUE);
}
@@ -2473,10 +2477,16 @@ buf_page_get_known_nowait(
mutex_exit(&block->mutex);
- if (mode == BUF_MAKE_YOUNG) {
- buf_block_make_young(&block->page);
+ buf_pool_mutex_enter();
+
+ if (mode == BUF_MAKE_YOUNG && buf_page_peek_if_too_old(&block->page)) {
+ buf_LRU_make_block_young(&block->page);
}
+ buf_page_set_accessed(&block->page);
+
+ buf_pool_mutex_exit();
+
ut_ad(!ibuf_inside() || (mode == BUF_KEEP_OLD));
if (rw_latch == RW_S_LATCH) {
@@ -2513,7 +2523,7 @@ buf_page_get_known_nowait(
|| (ibuf_count_get(buf_block_get_space(block),
buf_block_get_page_no(block)) == 0));
#endif
- buf_pool->n_page_gets++;
+ buf_pool->stat.n_page_gets++;
return(TRUE);
}
@@ -2589,7 +2599,7 @@ buf_page_try_get_func(
#endif /* UNIV_DEBUG_FILE_ACCESSES */
buf_block_dbg_add_level(block, SYNC_NO_ORDER_CHECK);
- buf_pool->n_page_gets++;
+ buf_pool->stat.n_page_gets++;
#ifdef UNIV_IBUF_COUNT_DEBUG
ut_a(ibuf_count_get(buf_block_get_space(block),
@@ -2608,10 +2618,10 @@ buf_page_init_low(
buf_page_t* bpage) /*!< in: block to init */
{
bpage->flush_type = BUF_FLUSH_LRU;
- bpage->accessed = FALSE;
bpage->io_fix = BUF_IO_NONE;
bpage->buf_fix_count = 0;
bpage->freed_page_clock = 0;
+ bpage->access_time = 0;
bpage->newest_modification = 0;
bpage->oldest_modification = 0;
HASH_INVALIDATE(bpage, hash);
@@ -2953,7 +2963,7 @@ buf_page_create(
buf_LRU_add_block(&block->page, FALSE);
buf_block_buf_fix_inc(block, __FILE__, __LINE__);
- buf_pool->n_pages_created++;
+ buf_pool->stat.n_pages_created++;
if (zip_size) {
void* data;
@@ -2990,12 +3000,12 @@ buf_page_create(
rw_lock_x_unlock(&block->lock);
}
+ buf_page_set_accessed(&block->page);
+
buf_pool_mutex_exit();
mtr_memo_push(mtr, block, MTR_MEMO_BUF_FIX);
- buf_page_set_accessed(&block->page, TRUE);
-
mutex_exit(&block->mutex);
/* Delete possible entries for the page from the insert buffer:
@@ -3201,7 +3211,7 @@ corrupt:
ut_ad(buf_pool->n_pend_reads > 0);
buf_pool->n_pend_reads--;
- buf_pool->n_pages_read++;
+ buf_pool->stat.n_pages_read++;
if (uncompressed) {
rw_lock_x_unlock_gen(&((buf_block_t*) bpage)->lock,
@@ -3221,7 +3231,7 @@ corrupt:
BUF_IO_WRITE);
}
- buf_pool->n_pages_written++;
+ buf_pool->stat.n_pages_written++;
break;
@@ -3528,6 +3538,7 @@ buf_print(void)
"n pending decompressions %lu\n"
"n pending reads %lu\n"
"n pending flush LRU %lu list %lu single page %lu\n"
+ "pages made young %lu, not young %lu\n"
"pages read %lu, created %lu, written %lu\n",
(ulong) size,
(ulong) UT_LIST_GET_LEN(buf_pool->LRU),
@@ -3538,8 +3549,11 @@ buf_print(void)
(ulong) buf_pool->n_flush[BUF_FLUSH_LRU],
(ulong) buf_pool->n_flush[BUF_FLUSH_LIST],
(ulong) buf_pool->n_flush[BUF_FLUSH_SINGLE_PAGE],
- (ulong) buf_pool->n_pages_read, buf_pool->n_pages_created,
- (ulong) buf_pool->n_pages_written);
+ (ulong) buf_pool->stat.n_pages_made_young,
+ (ulong) buf_pool->stat.n_pages_not_made_young,
+ (ulong) buf_pool->stat.n_pages_read,
+ (ulong) buf_pool->stat.n_pages_created,
+ (ulong) buf_pool->stat.n_pages_written);
/* Count the number of blocks belonging to each index in the buffer */
@@ -3744,10 +3758,9 @@ buf_print_io(
{
time_t current_time;
double time_elapsed;
- ulint size;
+ ulint n_gets_diff;
ut_ad(buf_pool);
- size = buf_pool->curr_size;
buf_pool_mutex_enter();
@@ -3755,12 +3768,14 @@ buf_print_io(
"Buffer pool size %lu\n"
"Free buffers %lu\n"
"Database pages %lu\n"
+ "Old database pages %lu\n"
"Modified db pages %lu\n"
"Pending reads %lu\n"
"Pending writes: LRU %lu, flush list %lu, single page %lu\n",
- (ulong) size,
+ (ulong) buf_pool->curr_size,
(ulong) UT_LIST_GET_LEN(buf_pool->free),
(ulong) UT_LIST_GET_LEN(buf_pool->LRU),
+ (ulong) buf_pool->LRU_old_len,
(ulong) UT_LIST_GET_LEN(buf_pool->flush_list),
(ulong) buf_pool->n_pend_reads,
(ulong) buf_pool->n_flush[BUF_FLUSH_LRU]
@@ -3772,37 +3787,66 @@ buf_print_io(
current_time = time(NULL);
time_elapsed = 0.001 + difftime(current_time,
buf_pool->last_printout_time);
- buf_pool->last_printout_time = current_time;
fprintf(file,
+ "Pages made young %lu, not young %lu\n"
+ "%.2f youngs/s, %.2f non-youngs/s\n"
"Pages read %lu, created %lu, written %lu\n"
"%.2f reads/s, %.2f creates/s, %.2f writes/s\n",
- (ulong) buf_pool->n_pages_read,
- (ulong) buf_pool->n_pages_created,
- (ulong) buf_pool->n_pages_written,
- (buf_pool->n_pages_read - buf_pool->n_pages_read_old)
+ (ulong) buf_pool->stat.n_pages_made_young,
+ (ulong) buf_pool->stat.n_pages_not_made_young,
+ (buf_pool->stat.n_pages_made_young
+ - buf_pool->old_stat.n_pages_made_young)
+ / time_elapsed,
+ (buf_pool->stat.n_pages_not_made_young
+ - buf_pool->old_stat.n_pages_not_made_young)
/ time_elapsed,
- (buf_pool->n_pages_created - buf_pool->n_pages_created_old)
+ (ulong) buf_pool->stat.n_pages_read,
+ (ulong) buf_pool->stat.n_pages_created,
+ (ulong) buf_pool->stat.n_pages_written,
+ (buf_pool->stat.n_pages_read
+ - buf_pool->old_stat.n_pages_read)
/ time_elapsed,
- (buf_pool->n_pages_written - buf_pool->n_pages_written_old)
+ (buf_pool->stat.n_pages_created
+ - buf_pool->old_stat.n_pages_created)
+ / time_elapsed,
+ (buf_pool->stat.n_pages_written
+ - buf_pool->old_stat.n_pages_written)
/ time_elapsed);
- if (buf_pool->n_page_gets > buf_pool->n_page_gets_old) {
- fprintf(file, "Buffer pool hit rate %lu / 1000\n",
+ n_gets_diff = buf_pool->stat.n_page_gets - buf_pool->old_stat.n_page_gets;
+
+ if (n_gets_diff) {
+ fprintf(file,
+ "Buffer pool hit rate %lu / 1000,"
+ " young-making rate %lu / 1000 not %lu / 1000\n",
(ulong)
- (1000 - ((1000 * (buf_pool->n_pages_read
- - buf_pool->n_pages_read_old))
- / (buf_pool->n_page_gets
- - buf_pool->n_page_gets_old))));
+ (1000 - ((1000 * (buf_pool->stat.n_pages_read
+ - buf_pool->old_stat.n_pages_read))
+ / (buf_pool->stat.n_page_gets
+ - buf_pool->old_stat.n_page_gets))),
+ (ulong)
+ (1000 * (buf_pool->stat.n_pages_made_young
+ - buf_pool->old_stat.n_pages_made_young)
+ / n_gets_diff),
+ (ulong)
+ (1000 * (buf_pool->stat.n_pages_not_made_young
+ - buf_pool->old_stat.n_pages_not_made_young)
+ / n_gets_diff));
} else {
fputs("No buffer pool page gets since the last printout\n",
file);
}
- buf_pool->n_page_gets_old = buf_pool->n_page_gets;
- buf_pool->n_pages_read_old = buf_pool->n_pages_read;
- buf_pool->n_pages_created_old = buf_pool->n_pages_created;
- buf_pool->n_pages_written_old = buf_pool->n_pages_written;
+ /* Statistics about read ahead algorithm */
+ fprintf(file, "Pages read ahead %.2f/s,"
+ " evicted without access %.2f/s\n",
+ (buf_pool->stat.n_ra_pages_read
+ - buf_pool->old_stat.n_ra_pages_read)
+ / time_elapsed,
+ (buf_pool->stat.n_ra_pages_evicted
+ - buf_pool->old_stat.n_ra_pages_evicted)
+ / time_elapsed);
/* Print some values to help us with visualizing what is
happening with LRU eviction. */
@@ -3814,6 +3858,7 @@ buf_print_io(
buf_LRU_stat_sum.io, buf_LRU_stat_cur.io,
buf_LRU_stat_sum.unzip, buf_LRU_stat_cur.unzip);
+ buf_refresh_io_stats();
buf_pool_mutex_exit();
}
@@ -3825,10 +3870,7 @@ buf_refresh_io_stats(void)
/*======================*/
{
buf_pool->last_printout_time = time(NULL);
- buf_pool->n_page_gets_old = buf_pool->n_page_gets;
- buf_pool->n_pages_read_old = buf_pool->n_pages_read;
- buf_pool->n_pages_created_old = buf_pool->n_pages_created;
- buf_pool->n_pages_written_old = buf_pool->n_pages_written;
+ buf_pool->old_stat = buf_pool->stat;
}
/*********************************************************************//**
diff --git a/storage/innodb_plugin/buf/buf0lru.c b/storage/innodb_plugin/buf/buf0lru.c
index be53a5f5d9d..223746e0d01 100644
--- a/storage/innodb_plugin/buf/buf0lru.c
+++ b/storage/innodb_plugin/buf/buf0lru.c
@@ -49,14 +49,23 @@ Created 11/5/1995 Heikki Tuuri
#include "log0recv.h"
#include "srv0srv.h"
-/** The number of blocks from the LRU_old pointer onward, including the block
-pointed to, must be 3/8 of the whole LRU list length, except that the
-tolerance defined below is allowed. Note that the tolerance must be small
-enough such that for even the BUF_LRU_OLD_MIN_LEN long LRU list, the
-LRU_old pointer is not allowed to point to either end of the LRU list. */
+/** The number of blocks from the LRU_old pointer onward, including
+the block pointed to, must be buf_LRU_old_ratio/BUF_LRU_OLD_RATIO_DIV
+of the whole LRU list length, except that the tolerance defined below
+is allowed. Note that the tolerance must be small enough such that for
+even the BUF_LRU_OLD_MIN_LEN long LRU list, the LRU_old pointer is not
+allowed to point to either end of the LRU list. */
#define BUF_LRU_OLD_TOLERANCE 20
+/** The minimum amount of non-old blocks when the LRU_old list exists
+(that is, when there are more than BUF_LRU_OLD_MIN_LEN blocks).
+@see buf_LRU_old_adjust_len */
+#define BUF_LRU_NON_OLD_MIN_LEN 5
+#if BUF_LRU_NON_OLD_MIN_LEN >= BUF_LRU_OLD_MIN_LEN
+# error "BUF_LRU_NON_OLD_MIN_LEN >= BUF_LRU_OLD_MIN_LEN"
+#endif
+
/** The whole LRU list length is divided by this number to determine an
initial segment in buf_LRU_get_recent_limit */
@@ -107,6 +116,15 @@ UNIV_INTERN buf_LRU_stat_t buf_LRU_stat_sum;
/* @} */
+/** @name Heuristics for detecting index scan @{ */
+/** Reserve this much/BUF_LRU_OLD_RATIO_DIV of the buffer pool for
+"old" blocks. Protected by buf_pool_mutex. */
+UNIV_INTERN uint buf_LRU_old_ratio;
+/** Move blocks to "new" LRU list only if the first access was at
+least this many milliseconds ago. Not protected by any mutex or latch. */
+UNIV_INTERN uint buf_LRU_old_threshold_ms;
+/* @} */
+
/******************************************************************//**
Takes a block out of the LRU list and page hash table.
If the block is compressed-only (BUF_BLOCK_ZIP_PAGE),
@@ -428,42 +446,6 @@ next_page:
}
}
-/******************************************************************//**
-Gets the minimum LRU_position field for the blocks in an initial segment
-(determined by BUF_LRU_INITIAL_RATIO) of the LRU list. The limit is not
-guaranteed to be precise, because the ulint_clock may wrap around.
-@return the limit; zero if could not determine it */
-UNIV_INTERN
-ulint
-buf_LRU_get_recent_limit(void)
-/*==========================*/
-{
- const buf_page_t* bpage;
- ulint len;
- ulint limit;
-
- buf_pool_mutex_enter();
-
- len = UT_LIST_GET_LEN(buf_pool->LRU);
-
- if (len < BUF_LRU_OLD_MIN_LEN) {
- /* The LRU list is too short to do read-ahead */
-
- buf_pool_mutex_exit();
-
- return(0);
- }
-
- bpage = UT_LIST_GET_FIRST(buf_pool->LRU);
-
- limit = buf_page_get_LRU_position(bpage);
- len /= BUF_LRU_INITIAL_RATIO;
-
- buf_pool_mutex_exit();
-
- return(limit > len ? (limit - len) : 0);
-}
-
/********************************************************************//**
Insert a compressed block into buf_pool->zip_clean in the LRU order. */
UNIV_INTERN
@@ -594,6 +576,7 @@ buf_LRU_free_from_common_LRU_list(
bpage = UT_LIST_GET_PREV(LRU, bpage), distance--) {
enum buf_lru_free_block_status freed;
+ unsigned accessed;
mutex_t* block_mutex
= buf_page_get_mutex(bpage);
@@ -601,11 +584,18 @@ buf_LRU_free_from_common_LRU_list(
ut_ad(bpage->in_LRU_list);
mutex_enter(block_mutex);
+ accessed = buf_page_is_accessed(bpage);
freed = buf_LRU_free_block(bpage, TRUE, NULL);
mutex_exit(block_mutex);
switch (freed) {
case BUF_LRU_FREED:
+ /* Keep track of pages that are evicted without
+ ever being accessed. This gives us a measure of
+ the effectiveness of readahead */
+ if (!accessed) {
+ ++buf_pool->stat.n_ra_pages_evicted;
+ }
return(TRUE);
case BUF_LRU_NOT_FREED:
@@ -953,8 +943,10 @@ buf_LRU_old_adjust_len(void)
ut_a(buf_pool->LRU_old);
ut_ad(buf_pool_mutex_own());
-#if 3 * (BUF_LRU_OLD_MIN_LEN / 8) <= BUF_LRU_OLD_TOLERANCE + 5
-# error "3 * (BUF_LRU_OLD_MIN_LEN / 8) <= BUF_LRU_OLD_TOLERANCE + 5"
+ ut_ad(buf_LRU_old_ratio >= BUF_LRU_OLD_RATIO_MIN);
+ ut_ad(buf_LRU_old_ratio <= BUF_LRU_OLD_RATIO_MAX);
+#if BUF_LRU_OLD_RATIO_MIN * BUF_LRU_OLD_MIN_LEN <= BUF_LRU_OLD_RATIO_DIV * (BUF_LRU_OLD_TOLERANCE + 5)
+# error "BUF_LRU_OLD_RATIO_MIN * BUF_LRU_OLD_MIN_LEN <= BUF_LRU_OLD_RATIO_DIV * (BUF_LRU_OLD_TOLERANCE + 5)"
#endif
#ifdef UNIV_LRU_DEBUG
/* buf_pool->LRU_old must be the first item in the LRU list
@@ -966,34 +958,39 @@ buf_LRU_old_adjust_len(void)
|| UT_LIST_GET_NEXT(LRU, buf_pool->LRU_old)->old);
#endif /* UNIV_LRU_DEBUG */
+ old_len = buf_pool->LRU_old_len;
+ new_len = ut_min(UT_LIST_GET_LEN(buf_pool->LRU)
+ * buf_LRU_old_ratio / BUF_LRU_OLD_RATIO_DIV,
+ UT_LIST_GET_LEN(buf_pool->LRU)
+ - (BUF_LRU_OLD_TOLERANCE
+ + BUF_LRU_NON_OLD_MIN_LEN));
+
for (;;) {
- old_len = buf_pool->LRU_old_len;
- new_len = 3 * (UT_LIST_GET_LEN(buf_pool->LRU) / 8);
+ buf_page_t* LRU_old = buf_pool->LRU_old;
- ut_ad(buf_pool->LRU_old->in_LRU_list);
- ut_a(buf_pool->LRU_old);
+ ut_a(LRU_old);
+ ut_ad(LRU_old->in_LRU_list);
#ifdef UNIV_LRU_DEBUG
- ut_a(buf_pool->LRU_old->old);
+ ut_a(LRU_old->old);
#endif /* UNIV_LRU_DEBUG */
/* Update the LRU_old pointer if necessary */
- if (old_len < new_len - BUF_LRU_OLD_TOLERANCE) {
+ if (old_len + BUF_LRU_OLD_TOLERANCE < new_len) {
- buf_pool->LRU_old = UT_LIST_GET_PREV(
- LRU, buf_pool->LRU_old);
+ buf_pool->LRU_old = LRU_old = UT_LIST_GET_PREV(
+ LRU, LRU_old);
#ifdef UNIV_LRU_DEBUG
- ut_a(!buf_pool->LRU_old->old);
+ ut_a(!LRU_old->old);
#endif /* UNIV_LRU_DEBUG */
- buf_page_set_old(buf_pool->LRU_old, TRUE);
- buf_pool->LRU_old_len++;
+ buf_page_set_old(LRU_old, TRUE);
+ old_len = ++buf_pool->LRU_old_len;
} else if (old_len > new_len + BUF_LRU_OLD_TOLERANCE) {
- buf_page_set_old(buf_pool->LRU_old, FALSE);
- buf_pool->LRU_old = UT_LIST_GET_NEXT(
- LRU, buf_pool->LRU_old);
- buf_pool->LRU_old_len--;
+ buf_page_set_old(LRU_old, FALSE);
+ buf_pool->LRU_old = UT_LIST_GET_NEXT(LRU, LRU_old);
+ old_len = --buf_pool->LRU_old_len;
} else {
return;
}
@@ -1021,6 +1018,7 @@ buf_LRU_old_init(void)
while (bpage != NULL) {
ut_ad(bpage->in_LRU_list);
+ ut_ad(buf_page_in_file(bpage));
buf_page_set_old(bpage, TRUE);
bpage = UT_LIST_GET_NEXT(LRU, bpage);
}
@@ -1075,16 +1073,19 @@ buf_LRU_remove_block(
if (UNIV_UNLIKELY(bpage == buf_pool->LRU_old)) {
- /* Below: the previous block is guaranteed to exist, because
- the LRU_old pointer is only allowed to differ by the
- tolerance value from strict 3/8 of the LRU list length. */
+ /* Below: the previous block is guaranteed to exist,
+ because the LRU_old pointer is only allowed to differ
+ by BUF_LRU_OLD_TOLERANCE from strict
+ buf_LRU_old_ratio/BUF_LRU_OLD_RATIO_DIV of the LRU
+ list length. */
+ buf_page_t* prev_bpage = UT_LIST_GET_PREV(LRU, bpage);
- buf_pool->LRU_old = UT_LIST_GET_PREV(LRU, bpage);
- ut_a(buf_pool->LRU_old);
+ ut_a(prev_bpage);
#ifdef UNIV_LRU_DEBUG
- ut_a(!buf_pool->LRU_old->old);
+ ut_a(!prev_bpage->old);
#endif /* UNIV_LRU_DEBUG */
- buf_page_set_old(buf_pool->LRU_old, TRUE);
+ buf_pool->LRU_old = prev_bpage;
+ buf_page_set_old(prev_bpage, TRUE);
buf_pool->LRU_old_len++;
}
@@ -1149,39 +1150,25 @@ buf_LRU_add_block_to_end_low(
/*=========================*/
buf_page_t* bpage) /*!< in: control block */
{
- buf_page_t* last_bpage;
-
ut_ad(buf_pool);
ut_ad(bpage);
ut_ad(buf_pool_mutex_own());
ut_a(buf_page_in_file(bpage));
- last_bpage = UT_LIST_GET_LAST(buf_pool->LRU);
-
- if (last_bpage) {
- bpage->LRU_position = last_bpage->LRU_position;
- } else {
- bpage->LRU_position = buf_pool_clock_tic();
- }
-
ut_ad(!bpage->in_LRU_list);
UT_LIST_ADD_LAST(LRU, buf_pool->LRU, bpage);
ut_d(bpage->in_LRU_list = TRUE);
buf_page_set_old(bpage, TRUE);
- if (UT_LIST_GET_LEN(buf_pool->LRU) >= BUF_LRU_OLD_MIN_LEN) {
-
- buf_pool->LRU_old_len++;
- }
-
if (UT_LIST_GET_LEN(buf_pool->LRU) > BUF_LRU_OLD_MIN_LEN) {
ut_ad(buf_pool->LRU_old);
/* Adjust the length of the old block list if necessary */
+ buf_pool->LRU_old_len++;
buf_LRU_old_adjust_len();
} else if (UT_LIST_GET_LEN(buf_pool->LRU) == BUF_LRU_OLD_MIN_LEN) {
@@ -1189,6 +1176,7 @@ buf_LRU_add_block_to_end_low(
/* The LRU list is now long enough for LRU_old to become
defined: init it */
+ buf_pool->LRU_old_len++;
buf_LRU_old_init();
}
@@ -1222,7 +1210,6 @@ buf_LRU_add_block_low(
UT_LIST_ADD_FIRST(LRU, buf_pool->LRU, bpage);
- bpage->LRU_position = buf_pool_clock_tic();
bpage->freed_page_clock = buf_pool->freed_page_clock;
} else {
#ifdef UNIV_LRU_DEBUG
@@ -1237,11 +1224,6 @@ buf_LRU_add_block_low(
UT_LIST_INSERT_AFTER(LRU, buf_pool->LRU, buf_pool->LRU_old,
bpage);
buf_pool->LRU_old_len++;
-
- /* We copy the LRU position field of the previous block
- to the new block */
-
- bpage->LRU_position = (buf_pool->LRU_old)->LRU_position;
}
ut_d(bpage->in_LRU_list = TRUE);
@@ -1295,6 +1277,9 @@ buf_LRU_make_block_young(
/*=====================*/
buf_page_t* bpage) /*!< in: control block */
{
+ ut_ad(buf_pool_mutex_own());
+ buf_pool->stat.n_pages_made_young++;
+
buf_LRU_remove_block(bpage);
buf_LRU_add_block_low(bpage, FALSE);
}
@@ -1847,6 +1832,48 @@ buf_LRU_block_free_hashed_page(
buf_LRU_block_free_non_file_page(block);
}
+/**********************************************************************//**
+Updates buf_LRU_old_ratio.
+@return updated old_pct */
+UNIV_INTERN
+uint
+buf_LRU_old_ratio_update(
+/*=====================*/
+ uint old_pct,/*!< in: Reserve this percentage of
+ the buffer pool for "old" blocks. */
+ ibool adjust) /*!< in: TRUE=adjust the LRU list;
+ FALSE=just assign buf_LRU_old_ratio
+ during the initialization of InnoDB */
+{
+ uint ratio;
+
+ ratio = old_pct * BUF_LRU_OLD_RATIO_DIV / 100;
+ if (ratio < BUF_LRU_OLD_RATIO_MIN) {
+ ratio = BUF_LRU_OLD_RATIO_MIN;
+ } else if (ratio > BUF_LRU_OLD_RATIO_MAX) {
+ ratio = BUF_LRU_OLD_RATIO_MAX;
+ }
+
+ if (adjust) {
+ buf_pool_mutex_enter();
+
+ if (ratio != buf_LRU_old_ratio) {
+ buf_LRU_old_ratio = ratio;
+
+ if (UT_LIST_GET_LEN(buf_pool->LRU)
+ >= BUF_LRU_OLD_MIN_LEN) {
+ buf_LRU_old_adjust_len();
+ }
+ }
+
+ buf_pool_mutex_exit();
+ } else {
+ buf_LRU_old_ratio = ratio;
+ }
+
+ return(ratio * 100 / BUF_LRU_OLD_RATIO_DIV);
+}
+
/********************************************************************//**
Update the historical stats that we are collecting for LRU eviction
policy at the end of each interval. */
@@ -1896,7 +1923,6 @@ buf_LRU_validate(void)
buf_block_t* block;
ulint old_len;
ulint new_len;
- ulint LRU_pos;
ut_ad(buf_pool);
buf_pool_mutex_enter();
@@ -1905,7 +1931,11 @@ buf_LRU_validate(void)
ut_a(buf_pool->LRU_old);
old_len = buf_pool->LRU_old_len;
- new_len = 3 * (UT_LIST_GET_LEN(buf_pool->LRU) / 8);
+ new_len = ut_min(UT_LIST_GET_LEN(buf_pool->LRU)
+ * buf_LRU_old_ratio / BUF_LRU_OLD_RATIO_DIV,
+ UT_LIST_GET_LEN(buf_pool->LRU)
+ - (BUF_LRU_OLD_TOLERANCE
+ + BUF_LRU_NON_OLD_MIN_LEN));
ut_a(old_len >= new_len - BUF_LRU_OLD_TOLERANCE);
ut_a(old_len <= new_len + BUF_LRU_OLD_TOLERANCE);
}
@@ -1943,16 +1973,7 @@ buf_LRU_validate(void)
ut_a(buf_pool->LRU_old == bpage);
}
- LRU_pos = buf_page_get_LRU_position(bpage);
-
bpage = UT_LIST_GET_NEXT(LRU, bpage);
-
- if (bpage) {
- /* If the following assert fails, it may
- not be an error: just the buf_pool clock
- has wrapped around */
- ut_a(LRU_pos >= buf_page_get_LRU_position(bpage));
- }
}
if (buf_pool->LRU_old) {
@@ -2000,9 +2021,6 @@ buf_LRU_print(void)
ut_ad(buf_pool);
buf_pool_mutex_enter();
- fprintf(stderr, "Pool ulint clock %lu\n",
- (ulong) buf_pool->ulint_clock);
-
bpage = UT_LIST_GET_FIRST(buf_pool->LRU);
while (bpage != NULL) {
@@ -2033,18 +2051,16 @@ buf_LRU_print(void)
const byte* frame;
case BUF_BLOCK_FILE_PAGE:
frame = buf_block_get_frame((buf_block_t*) bpage);
- fprintf(stderr, "\nLRU pos %lu type %lu"
+ fprintf(stderr, "\ntype %lu"
" index id %lu\n",
- (ulong) buf_page_get_LRU_position(bpage),
(ulong) fil_page_get_type(frame),
(ulong) ut_dulint_get_low(
btr_page_get_index_id(frame)));
break;
case BUF_BLOCK_ZIP_PAGE:
frame = bpage->zip.data;
- fprintf(stderr, "\nLRU pos %lu type %lu size %lu"
+ fprintf(stderr, "\ntype %lu size %lu"
" index id %lu\n",
- (ulong) buf_page_get_LRU_position(bpage),
(ulong) fil_page_get_type(frame),
(ulong) buf_page_get_zip_size(bpage),
(ulong) ut_dulint_get_low(
@@ -2052,8 +2068,7 @@ buf_LRU_print(void)
break;
default:
- fprintf(stderr, "\nLRU pos %lu !state %lu!\n",
- (ulong) buf_page_get_LRU_position(bpage),
+ fprintf(stderr, "\n!state %lu!\n",
(ulong) buf_page_get_state(bpage));
break;
}
diff --git a/storage/innodb_plugin/buf/buf0rea.c b/storage/innodb_plugin/buf/buf0rea.c
index 4910f82ccde..dd98ea17eb5 100644
--- a/storage/innodb_plugin/buf/buf0rea.c
+++ b/storage/innodb_plugin/buf/buf0rea.c
@@ -38,14 +38,6 @@ Created 11/5/1995 Heikki Tuuri
#include "srv0start.h"
#include "srv0srv.h"
-/** The size in blocks of the area where the random read-ahead algorithm counts
-the accessed pages when deciding whether to read-ahead */
-#define BUF_READ_AHEAD_RANDOM_AREA BUF_READ_AHEAD_AREA
-
-/** There must be at least this many pages in buf_pool in the area to start
-a random read-ahead */
-#define BUF_READ_AHEAD_RANDOM_THRESHOLD (1 + BUF_READ_AHEAD_RANDOM_AREA / 2)
-
/** The linear read-ahead area size */
#define BUF_READ_AHEAD_LINEAR_AREA BUF_READ_AHEAD_AREA
@@ -62,7 +54,8 @@ flag is cleared and the x-lock released by an i/o-handler thread.
@return 1 if a read request was queued, 0 if the page already resided
in buf_pool, or if the page is in the doublewrite buffer blocks in
which case it is never read into the pool, or if the tablespace does
-not exist or is being dropped */
+not exist or is being dropped
+@return 1 if read request is issued. 0 if it is not */
static
ulint
buf_read_page_low(
@@ -165,174 +158,13 @@ buf_read_page_low(
}
/********************************************************************//**
-Applies a random read-ahead in buf_pool if there are at least a threshold
-value of accessed pages from the random read-ahead area. Does not read any
-page, not even the one at the position (space, offset), if the read-ahead
-mechanism is not activated. NOTE 1: the calling thread may own latches on
-pages: to avoid deadlocks this function must be written such that it cannot
-end up waiting for these latches! NOTE 2: the calling thread must want
-access to the page given: this rule is set to prevent unintended read-aheads
-performed by ibuf routines, a situation which could result in a deadlock if
-the OS does not support asynchronous i/o.
-@return number of page read requests issued; NOTE that if we read ibuf
-pages, it may happen that the page at the given page number does not
-get read even if we return a positive value! */
-static
-ulint
-buf_read_ahead_random(
-/*==================*/
- ulint space, /*!< in: space id */
- ulint zip_size,/*!< in: compressed page size in bytes, or 0 */
- ulint offset) /*!< in: page number of a page which the current thread
- wants to access */
-{
- ib_int64_t tablespace_version;
- ulint recent_blocks = 0;
- ulint count;
- ulint LRU_recent_limit;
- ulint ibuf_mode;
- ulint low, high;
- ulint err;
- ulint i;
- ulint buf_read_ahead_random_area;
-
- /* We have currently disabled random readahead */
- return(0);
-
- if (srv_startup_is_before_trx_rollback_phase) {
- /* No read-ahead to avoid thread deadlocks */
- return(0);
- }
-
- if (ibuf_bitmap_page(zip_size, offset)
- || trx_sys_hdr_page(space, offset)) {
-
- /* If it is an ibuf bitmap page or trx sys hdr, we do
- no read-ahead, as that could break the ibuf page access
- order */
-
- return(0);
- }
-
- /* Remember the tablespace version before we ask te tablespace size
- below: if DISCARD + IMPORT changes the actual .ibd file meanwhile, we
- do not try to read outside the bounds of the tablespace! */
-
- tablespace_version = fil_space_get_version(space);
-
- buf_read_ahead_random_area = BUF_READ_AHEAD_RANDOM_AREA;
-
- low = (offset / buf_read_ahead_random_area)
- * buf_read_ahead_random_area;
- high = (offset / buf_read_ahead_random_area + 1)
- * buf_read_ahead_random_area;
- if (high > fil_space_get_size(space)) {
-
- high = fil_space_get_size(space);
- }
-
- /* Get the minimum LRU_position field value for an initial segment
- of the LRU list, to determine which blocks have recently been added
- to the start of the list. */
-
- LRU_recent_limit = buf_LRU_get_recent_limit();
-
- buf_pool_mutex_enter();
-
- if (buf_pool->n_pend_reads
- > buf_pool->curr_size / BUF_READ_AHEAD_PEND_LIMIT) {
- buf_pool_mutex_exit();
-
- return(0);
- }
-
- /* Count how many blocks in the area have been recently accessed,
- that is, reside near the start of the LRU list. */
-
- for (i = low; i < high; i++) {
- const buf_page_t* bpage = buf_page_hash_get(space, i);
-
- if (bpage
- && buf_page_is_accessed(bpage)
- && (buf_page_get_LRU_position(bpage) > LRU_recent_limit)) {
-
- recent_blocks++;
-
- if (recent_blocks >= BUF_READ_AHEAD_RANDOM_THRESHOLD) {
-
- buf_pool_mutex_exit();
- goto read_ahead;
- }
- }
- }
-
- buf_pool_mutex_exit();
- /* Do nothing */
- return(0);
-
-read_ahead:
- /* Read all the suitable blocks within the area */
-
- if (ibuf_inside()) {
- ibuf_mode = BUF_READ_IBUF_PAGES_ONLY;
- } else {
- ibuf_mode = BUF_READ_ANY_PAGE;
- }
-
- count = 0;
-
- for (i = low; i < high; i++) {
- /* It is only sensible to do read-ahead in the non-sync aio
- mode: hence FALSE as the first parameter */
-
- if (!ibuf_bitmap_page(zip_size, i)) {
- count += buf_read_page_low(
- &err, FALSE,
- ibuf_mode | OS_AIO_SIMULATED_WAKE_LATER,
- space, zip_size, FALSE,
- tablespace_version, i);
- if (err == DB_TABLESPACE_DELETED) {
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Warning: in random"
- " readahead trying to access\n"
- "InnoDB: tablespace %lu page %lu,\n"
- "InnoDB: but the tablespace does not"
- " exist or is just being dropped.\n",
- (ulong) space, (ulong) i);
- }
- }
- }
-
- /* In simulated aio we wake the aio handler threads only after
- queuing all aio requests, in native aio the following call does
- nothing: */
-
- os_aio_simulated_wake_handler_threads();
-
-#ifdef UNIV_DEBUG
- if (buf_debug_prints && (count > 0)) {
- fprintf(stderr,
- "Random read-ahead space %lu offset %lu pages %lu\n",
- (ulong) space, (ulong) offset,
- (ulong) count);
- }
-#endif /* UNIV_DEBUG */
-
- ++srv_read_ahead_rnd;
- return(count);
-}
-
-/********************************************************************//**
High-level function which reads a page asynchronously from a file to the
buffer buf_pool if it is not already there. Sets the io_fix flag and sets
an exclusive lock on the buffer frame. The flag is cleared and the x-lock
-released by the i/o-handler thread. Does a random read-ahead if it seems
-sensible.
-@return number of page read requests issued: this can be greater than
-1 if read-ahead occurred */
+released by the i/o-handler thread.
+@return TRUE if page has been read in, FALSE in case of failure */
UNIV_INTERN
-ulint
+ibool
buf_read_page(
/*==========*/
ulint space, /*!< in: space id */
@@ -341,20 +173,17 @@ buf_read_page(
{
ib_int64_t tablespace_version;
ulint count;
- ulint count2;
ulint err;
tablespace_version = fil_space_get_version(space);
- count = buf_read_ahead_random(space, zip_size, offset);
-
/* We do the i/o in the synchronous aio mode to save thread
switches: hence TRUE */
- count2 = buf_read_page_low(&err, TRUE, BUF_READ_ANY_PAGE, space,
- zip_size, FALSE,
- tablespace_version, offset);
- srv_buf_pool_reads+= count2;
+ count = buf_read_page_low(&err, TRUE, BUF_READ_ANY_PAGE, space,
+ zip_size, FALSE,
+ tablespace_version, offset);
+ srv_buf_pool_reads += count;
if (err == DB_TABLESPACE_DELETED) {
ut_print_timestamp(stderr);
fprintf(stderr,
@@ -371,7 +200,7 @@ buf_read_page(
/* Increment number of I/O operations used for LRU policy. */
buf_LRU_stat_inc_io();
- return(count + count2);
+ return(count > 0);
}
/********************************************************************//**
@@ -498,9 +327,17 @@ buf_read_ahead_linear(
fail_count++;
} else if (pred_bpage) {
- int res = (ut_ulint_cmp(
- buf_page_get_LRU_position(bpage),
- buf_page_get_LRU_position(pred_bpage)));
+ /* Note that buf_page_is_accessed() returns
+ the time of the first access. If some blocks
+ of the extent existed in the buffer pool at
+ the time of a linear access pattern, the first
+ access times may be nonmonotonic, even though
+ the latest access times were linear. The
+ threshold (srv_read_ahead_factor) should help
+ a little against this. */
+ int res = ut_ulint_cmp(
+ buf_page_is_accessed(bpage),
+ buf_page_is_accessed(pred_bpage));
/* Accesses not in the right order */
if (res != 0 && res != asc_or_desc) {
fail_count++;
@@ -643,7 +480,7 @@ buf_read_ahead_linear(
LRU policy decision. */
buf_LRU_stat_inc_io();
- ++srv_read_ahead_seq;
+ buf_pool->stat.n_ra_pages_read += count;
return(count);
}
diff --git a/storage/innodb_plugin/dict/dict0dict.c b/storage/innodb_plugin/dict/dict0dict.c
index d1f0e0ffc19..a8ee593e35c 100644
--- a/storage/innodb_plugin/dict/dict0dict.c
+++ b/storage/innodb_plugin/dict/dict0dict.c
@@ -82,9 +82,10 @@ static char dict_ibfk[] = "_ibfk_";
/*******************************************************************//**
Tries to find column names for the index and sets the col field of the
-index. */
+index.
+@return TRUE if the column names were found */
static
-void
+ibool
dict_index_find_cols(
/*=================*/
dict_table_t* table, /*!< in: table */
@@ -1431,7 +1432,7 @@ add_field_size:
/**********************************************************************//**
Adds an index to the dictionary cache.
-@return DB_SUCCESS or DB_TOO_BIG_RECORD */
+@return DB_SUCCESS, DB_TOO_BIG_RECORD, or DB_CORRUPTION */
UNIV_INTERN
ulint
dict_index_add_to_cache(
@@ -1457,7 +1458,10 @@ dict_index_add_to_cache(
ut_a(!dict_index_is_clust(index)
|| UT_LIST_GET_LEN(table->indexes) == 0);
- dict_index_find_cols(table, index);
+ if (!dict_index_find_cols(table, index)) {
+
+ return(DB_CORRUPTION);
+ }
/* Build the cache internal representation of the index,
containing also the added system fields */
@@ -1665,9 +1669,10 @@ dict_index_remove_from_cache(
/*******************************************************************//**
Tries to find column names for the index and sets the col field of the
-index. */
+index.
+@return TRUE if the column names were found */
static
-void
+ibool
dict_index_find_cols(
/*=================*/
dict_table_t* table, /*!< in: table */
@@ -1692,17 +1697,21 @@ dict_index_find_cols(
}
}
+#ifdef UNIV_DEBUG
/* It is an error not to find a matching column. */
fputs("InnoDB: Error: no matching column for ", stderr);
ut_print_name(stderr, NULL, FALSE, field->name);
fputs(" in ", stderr);
dict_index_name_print(stderr, NULL, index);
fputs("!\n", stderr);
- ut_error;
+#endif /* UNIV_DEBUG */
+ return(FALSE);
found:
;
}
+
+ return(TRUE);
}
#endif /* !UNIV_HOTBACKUP */
diff --git a/storage/innodb_plugin/handler/ha_innodb.cc b/storage/innodb_plugin/handler/ha_innodb.cc
index 616c227ae0c..d279f0ba4f3 100644
--- a/storage/innodb_plugin/handler/ha_innodb.cc
+++ b/storage/innodb_plugin/handler/ha_innodb.cc
@@ -72,6 +72,7 @@ with this program; if not, write to the Free Software Foundation, Inc.,
/* Include necessary InnoDB headers */
extern "C" {
#include "univ.i"
+#include "buf0lru.h"
#include "btr0sea.h"
#include "os0file.h"
#include "os0thread.h"
@@ -152,6 +153,10 @@ static ulong innobase_write_io_threads;
static long long innobase_buffer_pool_size, innobase_log_file_size;
+/** Percentage of the buffer pool to reserve for 'old' blocks.
+Connected to buf_LRU_old_ratio. */
+static uint innobase_old_blocks_pct;
+
/* The default values for the following char* start-up parameters
are determined in innobase_init below: */
@@ -490,10 +495,10 @@ static SHOW_VAR innodb_status_variables[]= {
(char*) &export_vars.innodb_buffer_pool_pages_misc, SHOW_LONG},
{"buffer_pool_pages_total",
(char*) &export_vars.innodb_buffer_pool_pages_total, SHOW_LONG},
- {"buffer_pool_read_ahead_rnd",
- (char*) &export_vars.innodb_buffer_pool_read_ahead_rnd, SHOW_LONG},
- {"buffer_pool_read_ahead_seq",
- (char*) &export_vars.innodb_buffer_pool_read_ahead_seq, SHOW_LONG},
+ {"buffer_pool_read_ahead",
+ (char*) &export_vars.innodb_buffer_pool_read_ahead, SHOW_LONG},
+ {"buffer_pool_read_ahead_evicted",
+ (char*) &export_vars.innodb_buffer_pool_read_ahead_evicted, SHOW_LONG},
{"buffer_pool_read_requests",
(char*) &export_vars.innodb_buffer_pool_read_requests, SHOW_LONG},
{"buffer_pool_reads",
@@ -2204,6 +2209,9 @@ innobase_change_buffering_inited_ok:
ut_a(0 == strcmp(my_charset_latin1.name, "latin1_swedish_ci"));
srv_latin1_ordering = my_charset_latin1.sort_order;
+ innobase_old_blocks_pct = buf_LRU_old_ratio_update(
+ innobase_old_blocks_pct, FALSE);
+
innobase_commit_concurrency_init_default();
/* Since we in this module access directly the fields of a trx
@@ -9610,6 +9618,25 @@ innodb_adaptive_hash_index_update(
}
}
+/****************************************************************//**
+Update the system variable innodb_old_blocks_pct using the "saved"
+value. This function is registered as a callback with MySQL. */
+static
+void
+innodb_old_blocks_pct_update(
+/*=========================*/
+ THD* thd, /*!< in: thread handle */
+ struct st_mysql_sys_var* var, /*!< in: pointer to
+ system variable */
+ void* var_ptr,/*!< out: where the
+ formal string goes */
+ const void* save) /*!< in: immediate result
+ from check function */
+{
+ innobase_old_blocks_pct = buf_LRU_old_ratio_update(
+ *static_cast<const uint*>(save), TRUE);
+}
+
/*************************************************************//**
Check if it is a valid value of innodb_change_buffering. This function is
registered as a callback with MySQL.
@@ -9847,7 +9874,7 @@ static MYSQL_SYSVAR_ULONG(concurrency_tickets, srv_n_free_tickets_to_enter,
NULL, NULL, 500L, 1L, ~0L, 0);
static MYSQL_SYSVAR_LONG(file_io_threads, innobase_file_io_threads,
- PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
+ PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY | PLUGIN_VAR_NOSYSVAR,
"Number of file I/O threads in InnoDB.",
NULL, NULL, 4, 4, 64, 0);
@@ -9886,6 +9913,18 @@ static MYSQL_SYSVAR_LONG(mirrored_log_groups, innobase_mirrored_log_groups,
"Number of identical copies of log groups we keep for the database. Currently this should be set to 1.",
NULL, NULL, 1, 1, 10, 0);
+static MYSQL_SYSVAR_UINT(old_blocks_pct, innobase_old_blocks_pct,
+ PLUGIN_VAR_RQCMDARG,
+ "Percentage of the buffer pool to reserve for 'old' blocks.",
+ NULL, innodb_old_blocks_pct_update, 100 * 3 / 8, 5, 95, 0);
+
+static MYSQL_SYSVAR_UINT(old_blocks_time, buf_LRU_old_threshold_ms,
+ PLUGIN_VAR_RQCMDARG,
+ "Move blocks to the 'new' end of the buffer pool if the first access"
+ " was at least this many milliseconds ago."
+ " The timeout is disabled if 0 (the default).",
+ NULL, NULL, 0, 0, UINT_MAX32, 0);
+
static MYSQL_SYSVAR_LONG(open_files, innobase_open_files,
PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
"How many files at the maximum InnoDB keeps open at the same time.",
@@ -9984,6 +10023,8 @@ static struct st_mysql_sys_var* innobase_system_variables[]= {
MYSQL_SYSVAR(adaptive_flushing),
MYSQL_SYSVAR(max_purge_lag),
MYSQL_SYSVAR(mirrored_log_groups),
+ MYSQL_SYSVAR(old_blocks_pct),
+ MYSQL_SYSVAR(old_blocks_time),
MYSQL_SYSVAR(open_files),
MYSQL_SYSVAR(rollback_on_timeout),
MYSQL_SYSVAR(stats_on_metadata),
diff --git a/storage/innodb_plugin/include/buf0buf.h b/storage/innodb_plugin/include/buf0buf.h
index a24fc4cdbbb..299312e5d10 100644
--- a/storage/innodb_plugin/include/buf0buf.h
+++ b/storage/innodb_plugin/include/buf0buf.h
@@ -707,15 +707,6 @@ buf_page_belongs_to_unzip_LRU(
/*==========================*/
const buf_page_t* bpage) /*!< in: pointer to control block */
__attribute__((pure));
-/*********************************************************************//**
-Determine the approximate LRU list position of a block.
-@return LRU list position */
-UNIV_INLINE
-ulint
-buf_page_get_LRU_position(
-/*======================*/
- const buf_page_t* bpage) /*!< in: control block */
- __attribute__((pure));
/*********************************************************************//**
Gets the mutex of a block.
@@ -816,22 +807,22 @@ buf_page_set_old(
buf_page_t* bpage, /*!< in/out: control block */
ibool old); /*!< in: old */
/*********************************************************************//**
-Determine if a block has been accessed in the buffer pool.
-@return TRUE if accessed */
+Determine the time of last access a block in the buffer pool.
+@return ut_time_ms() at the time of last access, 0 if not accessed */
UNIV_INLINE
-ibool
+unsigned
buf_page_is_accessed(
/*=================*/
const buf_page_t* bpage) /*!< in: control block */
- __attribute__((pure));
+ __attribute__((nonnull, pure));
/*********************************************************************//**
Flag a block accessed. */
UNIV_INLINE
void
buf_page_set_accessed(
/*==================*/
- buf_page_t* bpage, /*!< in/out: control block */
- ibool accessed); /*!< in: accessed */
+ buf_page_t* bpage) /*!< in/out: control block */
+ __attribute__((nonnull));
/*********************************************************************//**
Gets the buf_block_t handle of a buffered file block if an uncompressed
page frame exists, or NULL.
@@ -1017,14 +1008,6 @@ buf_block_hash_get(
/*===============*/
ulint space, /*!< in: space id */
ulint offset);/*!< in: offset of the page within space */
-/*******************************************************************//**
-Increments the pool clock by one and returns its new value. Remember that
-in the 32 bit version the clock wraps around at 4 billion!
-@return new clock value */
-UNIV_INLINE
-ulint
-buf_pool_clock_tic(void);
-/*====================*/
/*********************************************************************//**
Gets the current length of the free list of buffer blocks.
@return length of the free list */
@@ -1064,16 +1047,10 @@ struct buf_page_struct{
flushed to disk, this tells the
flush_type.
@see enum buf_flush */
- unsigned accessed:1; /*!< TRUE if the page has been accessed
- while in the buffer pool: read-ahead
- may read in pages which have not been
- accessed yet; a thread is allowed to
- read this for heuristic purposes
- without holding any mutex or latch */
unsigned io_fix:2; /*!< type of pending I/O operation;
also protected by buf_pool_mutex
@see enum buf_io_fix */
- unsigned buf_fix_count:24;/*!< count of how manyfold this block
+ unsigned buf_fix_count:25;/*!< count of how manyfold this block
is currently bufferfixed */
/* @} */
#endif /* !UNIV_HOTBACKUP */
@@ -1152,17 +1129,7 @@ struct buf_page_struct{
#endif /* UNIV_DEBUG */
unsigned old:1; /*!< TRUE if the block is in the old
blocks in the LRU list */
- unsigned LRU_position:31;/*!< value which monotonically
- decreases (or may stay
- constant if old==TRUE) toward
- the end of the LRU list, if
- buf_pool->ulint_clock has not
- wrapped around: NOTE that this
- value can only be used in
- heuristic algorithms, because
- of the possibility of a
- wrap-around! */
- unsigned freed_page_clock:32;/*!< the value of
+ unsigned freed_page_clock:31;/*!< the value of
buf_pool->freed_page_clock
when this block was the last
time put to the head of the
@@ -1170,6 +1137,9 @@ struct buf_page_struct{
to read this for heuristic
purposes without holding any
mutex or latch */
+ unsigned access_time:32; /*!< time of first access, or
+ 0 if the block was never accessed
+ in the buffer pool */
/* @} */
# ifdef UNIV_DEBUG_FILE_ACCESSES
ibool file_page_was_freed;
@@ -1314,6 +1284,31 @@ Compute the hash fold value for blocks in buf_pool->zip_hash. */
#define BUF_POOL_ZIP_FOLD_BPAGE(b) BUF_POOL_ZIP_FOLD((buf_block_t*) (b))
/* @} */
+/** @brief The buffer pool statistics structure. */
+struct buf_pool_stat_struct{
+ ulint n_page_gets; /*!< number of page gets performed;
+ also successful searches through
+ the adaptive hash index are
+ counted as page gets; this field
+ is NOT protected by the buffer
+ pool mutex */
+ ulint n_pages_read; /*!< number read operations */
+ ulint n_pages_written;/*!< number write operations */
+ ulint n_pages_created;/*!< number of pages created
+ in the pool with no read */
+ ulint n_ra_pages_read;/*!< number of pages read in
+ as part of read ahead */
+ ulint n_ra_pages_evicted;/*!< number of read ahead
+ pages that are evicted without
+ being accessed */
+ ulint n_pages_made_young; /*!< number of pages made young, in
+ calls to buf_LRU_make_block_young() */
+ ulint n_pages_not_made_young; /*!< number of pages not made
+ young because the first access
+ was not long enough ago, in
+ buf_page_peek_if_too_old() */
+};
+
/** @brief The buffer pool structure.
NOTE! The definition appears here only for other modules of this
@@ -1338,28 +1333,16 @@ struct buf_pool_struct{
ulint n_pend_reads; /*!< number of pending read operations */
ulint n_pend_unzip; /*!< number of pending decompressions */
- time_t last_printout_time; /*!< when buf_print was last time
+ time_t last_printout_time;
+ /*!< when buf_print_io was last time
called */
- ulint n_pages_read; /*!< number read operations */
- ulint n_pages_written;/*!< number write operations */
- ulint n_pages_created;/*!< number of pages created
- in the pool with no read */
- ulint n_page_gets; /*!< number of page gets performed;
- also successful searches through
- the adaptive hash index are
- counted as page gets; this field
- is NOT protected by the buffer
- pool mutex */
- ulint n_page_gets_old;/*!< n_page_gets when buf_print was
- last time called: used to calculate
- hit rate */
- ulint n_pages_read_old;/*!< n_pages_read when buf_print was
- last time called */
- ulint n_pages_written_old;/*!< number write operations */
- ulint n_pages_created_old;/*!< number of pages created in
- the pool with no read */
+ buf_pool_stat_t stat; /*!< current statistics */
+ buf_pool_stat_t old_stat; /*!< old statistics */
+
/* @} */
+
/** @name Page flushing algorithm fields */
+
/* @{ */
UT_LIST_BASE_NODE_T(buf_page_t) flush_list;
@@ -1375,10 +1358,6 @@ struct buf_pool_struct{
/*!< this is in the set state
when there is no flush batch
of the given type running */
- ulint ulint_clock; /*!< a sequence number used to count
- time. NOTE! This counter wraps
- around at 4 billion (if ulint ==
- 32 bits)! */
ulint freed_page_clock;/*!< a sequence number used
to count the number of buffer
blocks removed from the end of
@@ -1402,9 +1381,11 @@ struct buf_pool_struct{
block list */
UT_LIST_BASE_NODE_T(buf_page_t) LRU;
/*!< base node of the LRU list */
- buf_page_t* LRU_old; /*!< pointer to the about 3/8 oldest
- blocks in the LRU list; NULL if LRU
- length less than BUF_LRU_OLD_MIN_LEN;
+ buf_page_t* LRU_old; /*!< pointer to the about
+ buf_LRU_old_ratio/BUF_LRU_OLD_RATIO_DIV
+ oldest blocks in the LRU list;
+ NULL if LRU length less than
+ BUF_LRU_OLD_MIN_LEN;
NOTE: when LRU_old != NULL, its length
should always equal LRU_old_len */
ulint LRU_old_len; /*!< length of the LRU list from
diff --git a/storage/innodb_plugin/include/buf0buf.ic b/storage/innodb_plugin/include/buf0buf.ic
index 17064342116..409d339aa16 100644
--- a/storage/innodb_plugin/include/buf0buf.ic
+++ b/storage/innodb_plugin/include/buf0buf.ic
@@ -72,9 +72,24 @@ buf_page_peek_if_too_old(
/*=====================*/
const buf_page_t* bpage) /*!< in: block to make younger */
{
- return(buf_pool->freed_page_clock
- >= buf_page_get_freed_page_clock(bpage)
- + 1 + (buf_pool->curr_size / 4));
+ if (buf_LRU_old_threshold_ms && bpage->old) {
+ unsigned access_time = buf_page_is_accessed(bpage);
+
+ if (access_time && ut_time_ms() - access_time
+ >= buf_LRU_old_threshold_ms) {
+ return(TRUE);
+ }
+
+ buf_pool->stat.n_pages_not_made_young++;
+ return(FALSE);
+ } else {
+ /* FIXME: bpage->freed_page_clock is 31 bits */
+ return((buf_pool->freed_page_clock & ~(1 << 31))
+ > bpage->freed_page_clock
+ + (buf_pool->curr_size
+ * (BUF_LRU_OLD_RATIO_DIV - buf_LRU_old_ratio)
+ / (BUF_LRU_OLD_RATIO_DIV * 4)));
+ }
}
/*********************************************************************//**
@@ -118,22 +133,6 @@ buf_pool_get_oldest_modification(void)
return(lsn);
}
-
-/*******************************************************************//**
-Increments the buf_pool clock by one and returns its new value. Remember
-that in the 32 bit version the clock wraps around at 4 billion!
-@return new clock value */
-UNIV_INLINE
-ulint
-buf_pool_clock_tic(void)
-/*====================*/
-{
- ut_ad(buf_pool_mutex_own());
-
- buf_pool->ulint_clock++;
-
- return(buf_pool->ulint_clock);
-}
#endif /* !UNIV_HOTBACKUP */
/*********************************************************************//**
@@ -280,21 +279,6 @@ buf_page_belongs_to_unzip_LRU(
}
/*********************************************************************//**
-Determine the approximate LRU list position of a block.
-@return LRU list position */
-UNIV_INLINE
-ulint
-buf_page_get_LRU_position(
-/*======================*/
- const buf_page_t* bpage) /*!< in: control block */
-{
- ut_ad(buf_page_in_file(bpage));
- ut_ad(buf_pool_mutex_own());
-
- return(bpage->LRU_position);
-}
-
-/*********************************************************************//**
Gets the mutex of a block.
@return pointer to mutex protecting bpage */
UNIV_INLINE
@@ -487,17 +471,17 @@ buf_page_set_old(
}
/*********************************************************************//**
-Determine if a block has been accessed in the buffer pool.
-@return TRUE if accessed */
+Determine the time of last access a block in the buffer pool.
+@return ut_time_ms() at the time of last access, 0 if not accessed */
UNIV_INLINE
-ibool
+unsigned
buf_page_is_accessed(
/*=================*/
const buf_page_t* bpage) /*!< in: control block */
{
ut_ad(buf_page_in_file(bpage));
- return(bpage->accessed);
+ return(bpage->access_time);
}
/*********************************************************************//**
@@ -506,13 +490,15 @@ UNIV_INLINE
void
buf_page_set_accessed(
/*==================*/
- buf_page_t* bpage, /*!< in/out: control block */
- ibool accessed) /*!< in: accessed */
+ buf_page_t* bpage) /*!< in/out: control block */
{
ut_a(buf_page_in_file(bpage));
- ut_ad(mutex_own(buf_page_get_mutex(bpage)));
+ ut_ad(buf_pool_mutex_own());
- bpage->accessed = accessed;
+ if (!bpage->access_time) {
+ /* Make this the time of the first access. */
+ bpage->access_time = ut_time_ms();
+ }
}
/*********************************************************************//**
diff --git a/storage/innodb_plugin/include/buf0lru.h b/storage/innodb_plugin/include/buf0lru.h
index 463aca0982c..009430af35b 100644
--- a/storage/innodb_plugin/include/buf0lru.h
+++ b/storage/innodb_plugin/include/buf0lru.h
@@ -69,7 +69,7 @@ These are low-level functions
#########################################################################*/
/** Minimum LRU list length for which the LRU_old pointer is defined */
-#define BUF_LRU_OLD_MIN_LEN 80
+#define BUF_LRU_OLD_MIN_LEN 512 /* 8 megabytes of 16k pages */
/** Maximum LRU list search length in buf_flush_LRU_recommendation() */
#define BUF_LRU_FREE_SEARCH_LEN (5 + 2 * BUF_READ_AHEAD_AREA)
@@ -84,15 +84,6 @@ void
buf_LRU_invalidate_tablespace(
/*==========================*/
ulint id); /*!< in: space id */
-/******************************************************************//**
-Gets the minimum LRU_position field for the blocks in an initial segment
-(determined by BUF_LRU_INITIAL_RATIO) of the LRU list. The limit is not
-guaranteed to be precise, because the ulint_clock may wrap around.
-@return the limit; zero if could not determine it */
-UNIV_INTERN
-ulint
-buf_LRU_get_recent_limit(void);
-/*==========================*/
/********************************************************************//**
Insert a compressed block into buf_pool->zip_clean in the LRU order. */
UNIV_INTERN
@@ -201,6 +192,18 @@ void
buf_LRU_make_block_old(
/*===================*/
buf_page_t* bpage); /*!< in: control block */
+/**********************************************************************//**
+Updates buf_LRU_old_ratio.
+@return updated old_pct */
+UNIV_INTERN
+uint
+buf_LRU_old_ratio_update(
+/*=====================*/
+ uint old_pct,/*!< in: Reserve this percentage of
+ the buffer pool for "old" blocks. */
+ ibool adjust);/*!< in: TRUE=adjust the LRU list;
+ FALSE=just assign buf_LRU_old_ratio
+ during the initialization of InnoDB */
/********************************************************************//**
Update the historical stats that we are collecting for LRU eviction
policy at the end of each interval. */
@@ -227,6 +230,35 @@ buf_LRU_print(void);
/*===============*/
#endif /* UNIV_DEBUG_PRINT || UNIV_DEBUG || UNIV_BUF_DEBUG */
+/** @name Heuristics for detecting index scan @{ */
+/** Reserve this much/BUF_LRU_OLD_RATIO_DIV of the buffer pool for
+"old" blocks. Protected by buf_pool_mutex. */
+extern uint buf_LRU_old_ratio;
+/** The denominator of buf_LRU_old_ratio. */
+#define BUF_LRU_OLD_RATIO_DIV 1024
+/** Maximum value of buf_LRU_old_ratio.
+@see buf_LRU_old_adjust_len
+@see buf_LRU_old_ratio_update */
+#define BUF_LRU_OLD_RATIO_MAX BUF_LRU_OLD_RATIO_DIV
+/** Minimum value of buf_LRU_old_ratio.
+@see buf_LRU_old_adjust_len
+@see buf_LRU_old_ratio_update
+The minimum must exceed
+(BUF_LRU_OLD_TOLERANCE + 5) * BUF_LRU_OLD_RATIO_DIV / BUF_LRU_OLD_MIN_LEN. */
+#define BUF_LRU_OLD_RATIO_MIN 51
+
+#if BUF_LRU_OLD_RATIO_MIN >= BUF_LRU_OLD_RATIO_MAX
+# error "BUF_LRU_OLD_RATIO_MIN >= BUF_LRU_OLD_RATIO_MAX"
+#endif
+#if BUF_LRU_OLD_RATIO_MAX > BUF_LRU_OLD_RATIO_DIV
+# error "BUF_LRU_OLD_RATIO_MAX > BUF_LRU_OLD_RATIO_DIV"
+#endif
+
+/** Move blocks to "new" LRU list only if the first access was at
+least this many milliseconds ago. Not protected by any mutex or latch. */
+extern uint buf_LRU_old_threshold_ms;
+/* @} */
+
/** @brief Statistics for selecting the LRU list for eviction.
These statistics are not 'of' LRU but 'for' LRU. We keep count of I/O
diff --git a/storage/innodb_plugin/include/buf0rea.h b/storage/innodb_plugin/include/buf0rea.h
index a3b5685bd91..093750623d6 100644
--- a/storage/innodb_plugin/include/buf0rea.h
+++ b/storage/innodb_plugin/include/buf0rea.h
@@ -33,12 +33,10 @@ Created 11/5/1995 Heikki Tuuri
High-level function which reads a page asynchronously from a file to the
buffer buf_pool if it is not already there. Sets the io_fix flag and sets
an exclusive lock on the buffer frame. The flag is cleared and the x-lock
-released by the i/o-handler thread. Does a random read-ahead if it seems
-sensible.
-@return number of page read requests issued: this can be greater than
-1 if read-ahead occurred */
+released by the i/o-handler thread.
+@return TRUE if page has been read in, FALSE in case of failure */
UNIV_INTERN
-ulint
+ibool
buf_read_page(
/*==========*/
ulint space, /*!< in: space id */
diff --git a/storage/innodb_plugin/include/buf0types.h b/storage/innodb_plugin/include/buf0types.h
index e7167d716a0..bfae6477135 100644
--- a/storage/innodb_plugin/include/buf0types.h
+++ b/storage/innodb_plugin/include/buf0types.h
@@ -34,6 +34,8 @@ typedef struct buf_block_struct buf_block_t;
typedef struct buf_chunk_struct buf_chunk_t;
/** Buffer pool comprising buf_chunk_t */
typedef struct buf_pool_struct buf_pool_t;
+/** Buffer pool statistics struct */
+typedef struct buf_pool_stat_struct buf_pool_stat_t;
/** A buffer frame. @see page_t */
typedef byte buf_frame_t;
diff --git a/storage/innodb_plugin/include/dict0dict.h b/storage/innodb_plugin/include/dict0dict.h
index b2029699e51..d425241a3a2 100644
--- a/storage/innodb_plugin/include/dict0dict.h
+++ b/storage/innodb_plugin/include/dict0dict.h
@@ -712,7 +712,7 @@ dict_index_find_on_id_low(
dulint id); /*!< in: index id */
/**********************************************************************//**
Adds an index to the dictionary cache.
-@return DB_SUCCESS or error code */
+@return DB_SUCCESS, DB_TOO_BIG_RECORD, or DB_CORRUPTION */
UNIV_INTERN
ulint
dict_index_add_to_cache(
diff --git a/storage/innodb_plugin/include/os0sync.h b/storage/innodb_plugin/include/os0sync.h
index 0e0b32e7036..6e52fa1350e 100644
--- a/storage/innodb_plugin/include/os0sync.h
+++ b/storage/innodb_plugin/include/os0sync.h
@@ -285,7 +285,7 @@ os_fast_mutex_free(
/**********************************************************//**
Atomic compare-and-swap and increment for InnoDB. */
-#ifdef HAVE_GCC_ATOMIC_BUILTINS
+#ifdef HAVE_IB_GCC_ATOMIC_BUILTINS
/**********************************************************//**
Returns true if swapped, ptr is pointer to target, old_val is value to
compare to, new_val is the value to swap in. */
@@ -377,7 +377,7 @@ InterlockedExchange() operates on LONG, and the LONG will be
clobbered */
# define os_atomic_test_and_set_byte(ptr, new_val) \
((byte) InterlockedExchange(ptr, new_val))
-#endif /* HAVE_GCC_ATOMIC_BUILTINS */
+#endif /* HAVE_IB_GCC_ATOMIC_BUILTINS */
#ifndef UNIV_NONINL
#include "os0sync.ic"
diff --git a/storage/innodb_plugin/include/srv0srv.h b/storage/innodb_plugin/include/srv0srv.h
index 499bccfe2b8..23472bd100e 100644
--- a/storage/innodb_plugin/include/srv0srv.h
+++ b/storage/innodb_plugin/include/srv0srv.h
@@ -315,10 +315,6 @@ extern ulint srv_buf_pool_flushed;
/** Number of buffer pool reads that led to the
reading of a disk page */
extern ulint srv_buf_pool_reads;
-/** Number of sequential read-aheads */
-extern ulint srv_read_ahead_seq;
-/** Number of random read-aheads */
-extern ulint srv_read_ahead_rnd;
/** Status variables to be passed to MySQL */
typedef struct export_var_struct export_struc;
@@ -605,13 +601,13 @@ struct export_var_struct{
#ifdef UNIV_DEBUG
ulint innodb_buffer_pool_pages_latched; /*!< Latched pages */
#endif /* UNIV_DEBUG */
- ulint innodb_buffer_pool_read_requests; /*!< buf_pool->n_page_gets */
+ ulint innodb_buffer_pool_read_requests; /*!< buf_pool->stat.n_page_gets */
ulint innodb_buffer_pool_reads; /*!< srv_buf_pool_reads */
ulint innodb_buffer_pool_wait_free; /*!< srv_buf_pool_wait_free */
ulint innodb_buffer_pool_pages_flushed; /*!< srv_buf_pool_flushed */
ulint innodb_buffer_pool_write_requests;/*!< srv_buf_pool_write_requests */
- ulint innodb_buffer_pool_read_ahead_seq;/*!< srv_read_ahead_seq */
- ulint innodb_buffer_pool_read_ahead_rnd;/*!< srv_read_ahead_rnd */
+ ulint innodb_buffer_pool_read_ahead; /*!< srv_read_ahead */
+ ulint innodb_buffer_pool_read_ahead_evicted;/*!< srv_read_ahead evicted*/
ulint innodb_dblwr_pages_written; /*!< srv_dblwr_pages_written */
ulint innodb_dblwr_writes; /*!< srv_dblwr_writes */
ibool innodb_have_atomic_builtins; /*!< HAVE_ATOMIC_BUILTINS */
@@ -623,9 +619,9 @@ struct export_var_struct{
ulint innodb_os_log_pending_writes; /*!< srv_os_log_pending_writes */
ulint innodb_os_log_pending_fsyncs; /*!< fil_n_pending_log_flushes */
ulint innodb_page_size; /*!< UNIV_PAGE_SIZE */
- ulint innodb_pages_created; /*!< buf_pool->n_pages_created */
- ulint innodb_pages_read; /*!< buf_pool->n_pages_read */
- ulint innodb_pages_written; /*!< buf_pool->n_pages_written */
+ ulint innodb_pages_created; /*!< buf_pool->stat.n_pages_created */
+ ulint innodb_pages_read; /*!< buf_pool->stat.n_pages_read */
+ ulint innodb_pages_written; /*!< buf_pool->stat.n_pages_written */
ulint innodb_row_lock_waits; /*!< srv_n_lock_wait_count */
ulint innodb_row_lock_current_waits; /*!< srv_n_lock_wait_current_count */
ib_int64_t innodb_row_lock_time; /*!< srv_n_lock_wait_time
diff --git a/storage/innodb_plugin/include/univ.i b/storage/innodb_plugin/include/univ.i
index aa01f072b1d..66240d96a11 100644
--- a/storage/innodb_plugin/include/univ.i
+++ b/storage/innodb_plugin/include/univ.i
@@ -125,11 +125,11 @@ if we are compiling on Windows. */
# include <sched.h>
# endif
-# if defined(HAVE_GCC_ATOMIC_BUILTINS) || defined(HAVE_SOLARIS_ATOMICS) \
+# if defined(HAVE_IB_GCC_ATOMIC_BUILTINS) || defined(HAVE_SOLARIS_ATOMICS) \
|| defined(HAVE_WINDOWS_ATOMICS)
/* If atomics are defined we use them in InnoDB mutex implementation */
# define HAVE_ATOMIC_BUILTINS
-# endif /* (HAVE_GCC_ATOMIC_BUILTINS) || (HAVE_SOLARIS_ATOMICS)
+# endif /* (HAVE_IB_GCC_ATOMIC_BUILTINS) || (HAVE_SOLARIS_ATOMICS)
|| (HAVE_WINDOWS_ATOMICS) */
/* For InnoDB rw_locks to work with atomics we need the thread_id
diff --git a/storage/innodb_plugin/include/ut0ut.h b/storage/innodb_plugin/include/ut0ut.h
index 80094321041..12c1cd59166 100644
--- a/storage/innodb_plugin/include/ut0ut.h
+++ b/storage/innodb_plugin/include/ut0ut.h
@@ -239,6 +239,15 @@ ullint
ut_time_us(
/*=======*/
ullint* tloc); /*!< out: us since epoch, if non-NULL */
+/**********************************************************//**
+Returns the number of milliseconds since some epoch. The
+value may wrap around. It should only be used for heuristic
+purposes.
+@return ms since epoch */
+UNIV_INTERN
+uint
+ut_time_ms(void);
+/*============*/
/**********************************************************//**
Returns the difference of two times in seconds.
diff --git a/storage/innodb_plugin/plug.in b/storage/innodb_plugin/plug.in
index a620f10a0da..230c6ed34df 100644
--- a/storage/innodb_plugin/plug.in
+++ b/storage/innodb_plugin/plug.in
@@ -63,6 +63,57 @@ MYSQL_PLUGIN_ACTIONS(innodb_plugin, [
;;
esac
AC_SUBST(INNODB_DYNAMIC_CFLAGS)
+
+ AC_MSG_CHECKING(whether GCC atomic builtins are available)
+ AC_TRY_RUN(
+ [
+ int main()
+ {
+ long x;
+ long y;
+ long res;
+ char c;
+
+ x = 10;
+ y = 123;
+ res = __sync_bool_compare_and_swap(&x, x, y);
+ if (!res || x != y) {
+ return(1);
+ }
+
+ x = 10;
+ y = 123;
+ res = __sync_bool_compare_and_swap(&x, x + 1, y);
+ if (res || x != 10) {
+ return(1);
+ }
+
+ x = 10;
+ y = 123;
+ res = __sync_add_and_fetch(&x, y);
+ if (res != 123 + 10 || x != 123 + 10) {
+ return(1);
+ }
+
+ c = 10;
+ res = __sync_lock_test_and_set(&c, 123);
+ if (res != 10 || c != 123) {
+ return(1);
+ }
+
+ return(0);
+ }
+ ],
+ [
+ AC_DEFINE([HAVE_IB_GCC_ATOMIC_BUILTINS], [1],
+ [GCC atomic builtins are available])
+ AC_MSG_RESULT(yes)
+ ],
+ [
+ AC_MSG_RESULT(no)
+ ]
+ )
+
AC_MSG_CHECKING(whether pthread_t can be used by GCC atomic builtins)
AC_TRY_RUN(
[
diff --git a/storage/innodb_plugin/row/row0merge.c b/storage/innodb_plugin/row/row0merge.c
index 924b7ffba58..9a51221732b 100644
--- a/storage/innodb_plugin/row/row0merge.c
+++ b/storage/innodb_plugin/row/row0merge.c
@@ -60,9 +60,19 @@ Completed by Sunny Bains and Marko Makela
#ifdef UNIV_DEBUG
/** Set these in order ot enable debug printout. */
/* @{ */
+/** Log the outcome of each row_merge_cmp() call, comparing records. */
static ibool row_merge_print_cmp;
+/** Log each record read from temporary file. */
static ibool row_merge_print_read;
+/** Log each record write to temporary file. */
static ibool row_merge_print_write;
+/** Log each row_merge_blocks() call, merging two blocks of records to
+a bigger one. */
+static ibool row_merge_print_block;
+/** Log each block read from temporary file. */
+static ibool row_merge_print_block_read;
+/** Log each block read from temporary file. */
+static ibool row_merge_print_block_write;
/* @} */
#endif /* UNIV_DEBUG */
@@ -109,8 +119,9 @@ typedef struct row_merge_buf_struct row_merge_buf_t;
/** Information about temporary files used in merge sort */
struct merge_file_struct {
- int fd; /*!< file descriptor */
- ulint offset; /*!< file offset */
+ int fd; /*!< file descriptor */
+ ulint offset; /*!< file offset (end of file) */
+ ib_uint64_t n_rec; /*!< number of records in the file */
};
/** Information about temporary files used in merge sort */
@@ -682,6 +693,13 @@ row_merge_read(
ib_uint64_t ofs = ((ib_uint64_t) offset) * sizeof *buf;
ibool success;
+#ifdef UNIV_DEBUG
+ if (row_merge_print_block_read) {
+ fprintf(stderr, "row_merge_read fd=%d ofs=%lu\n",
+ fd, (ulong) offset);
+ }
+#endif /* UNIV_DEBUG */
+
success = os_file_read_no_error_handling(OS_FILE_FROM_FD(fd), buf,
(ulint) (ofs & 0xFFFFFFFF),
(ulint) (ofs >> 32),
@@ -709,6 +727,13 @@ row_merge_write(
ib_uint64_t ofs = ((ib_uint64_t) offset)
* sizeof(row_merge_block_t);
+#ifdef UNIV_DEBUG
+ if (row_merge_print_block_write) {
+ fprintf(stderr, "row_merge_write fd=%d ofs=%lu\n",
+ fd, (ulong) offset);
+ }
+#endif /* UNIV_DEBUG */
+
return(UNIV_LIKELY(os_file_write("(merge)", OS_FILE_FROM_FD(fd), buf,
(ulint) (ofs & 0xFFFFFFFF),
(ulint) (ofs >> 32),
@@ -718,7 +743,7 @@ row_merge_write(
/********************************************************************//**
Read a merge record.
@return pointer to next record, or NULL on I/O error or end of list */
-static
+static __attribute__((nonnull))
const byte*
row_merge_read_rec(
/*===============*/
@@ -1070,7 +1095,7 @@ row_merge_cmp(
Reads clustered index of the table and create temporary files
containing the index entries for the indexes to be built.
@return DB_SUCCESS or error */
-static
+static __attribute__((nonnull))
ulint
row_merge_read_clustered_index(
/*===========================*/
@@ -1233,6 +1258,7 @@ row_merge_read_clustered_index(
if (UNIV_LIKELY
(row && row_merge_buf_add(buf, row, ext))) {
+ file->n_rec++;
continue;
}
@@ -1274,14 +1300,19 @@ err_exit:
UNIV_MEM_INVALID(block[0], sizeof block[0]);
merge_buf[i] = row_merge_buf_empty(buf);
- /* Try writing the record again, now that
- the buffer has been written out and emptied. */
+ if (UNIV_LIKELY(row != NULL)) {
+ /* Try writing the record again, now
+ that the buffer has been written out
+ and emptied. */
- if (UNIV_UNLIKELY
- (row && !row_merge_buf_add(buf, row, ext))) {
- /* An empty buffer should have enough
- room for at least one record. */
- ut_error;
+ if (UNIV_UNLIKELY
+ (!row_merge_buf_add(buf, row, ext))) {
+ /* An empty buffer should have enough
+ room for at least one record. */
+ ut_error;
+ }
+
+ file->n_rec++;
}
}
@@ -1320,7 +1351,7 @@ func_exit:
b2 = row_merge_write_rec(&block[2], &buf[2], b2, \
of->fd, &of->offset, \
mrec##N, offsets##N); \
- if (UNIV_UNLIKELY(!b2)) { \
+ if (UNIV_UNLIKELY(!b2 || ++of->n_rec > file->n_rec)) { \
goto corrupt; \
} \
b##N = row_merge_read_rec(&block[N], &buf[N], \
@@ -1336,14 +1367,14 @@ func_exit:
} while (0)
/*************************************************************//**
-Merge two blocks of linked lists on disk and write a bigger block.
+Merge two blocks of records on disk and write a bigger block.
@return DB_SUCCESS or error code */
static
ulint
row_merge_blocks(
/*=============*/
const dict_index_t* index, /*!< in: index being created */
- merge_file_t* file, /*!< in/out: file containing
+ const merge_file_t* file, /*!< in: file containing
index entries */
row_merge_block_t* block, /*!< in/out: 3 buffers */
ulint* foffs0, /*!< in/out: offset of first
@@ -1366,6 +1397,17 @@ row_merge_blocks(
ulint* offsets0;/* offsets of mrec0 */
ulint* offsets1;/* offsets of mrec1 */
+#ifdef UNIV_DEBUG
+ if (row_merge_print_block) {
+ fprintf(stderr,
+ "row_merge_blocks fd=%d ofs=%lu + fd=%d ofs=%lu"
+ " = fd=%d ofs=%lu\n",
+ file->fd, (ulong) *foffs0,
+ file->fd, (ulong) *foffs1,
+ of->fd, (ulong) of->offset);
+ }
+#endif /* UNIV_DEBUG */
+
heap = row_merge_heap_create(index, &offsets0, &offsets1);
/* Write a record and read the next record. Split the output
@@ -1438,16 +1480,87 @@ done1:
}
/*************************************************************//**
+Copy a block of index entries.
+@return TRUE on success, FALSE on failure */
+static __attribute__((nonnull))
+ibool
+row_merge_blocks_copy(
+/*==================*/
+ const dict_index_t* index, /*!< in: index being created */
+ const merge_file_t* file, /*!< in: input file */
+ row_merge_block_t* block, /*!< in/out: 3 buffers */
+ ulint* foffs0, /*!< in/out: input file offset */
+ merge_file_t* of) /*!< in/out: output file */
+{
+ mem_heap_t* heap; /*!< memory heap for offsets0, offsets1 */
+
+ mrec_buf_t buf[3]; /*!< buffer for handling
+ split mrec in block[] */
+ const byte* b0; /*!< pointer to block[0] */
+ byte* b2; /*!< pointer to block[2] */
+ const mrec_t* mrec0; /*!< merge rec, points to block[0] */
+ ulint* offsets0;/* offsets of mrec0 */
+ ulint* offsets1;/* dummy offsets */
+
+#ifdef UNIV_DEBUG
+ if (row_merge_print_block) {
+ fprintf(stderr,
+ "row_merge_blocks_copy fd=%d ofs=%lu"
+ " = fd=%d ofs=%lu\n",
+ file->fd, (ulong) foffs0,
+ of->fd, (ulong) of->offset);
+ }
+#endif /* UNIV_DEBUG */
+
+ heap = row_merge_heap_create(index, &offsets0, &offsets1);
+
+ /* Write a record and read the next record. Split the output
+ file in two halves, which can be merged on the following pass. */
+
+ if (!row_merge_read(file->fd, *foffs0, &block[0])) {
+corrupt:
+ mem_heap_free(heap);
+ return(FALSE);
+ }
+
+ b0 = block[0];
+ b2 = block[2];
+
+ b0 = row_merge_read_rec(&block[0], &buf[0], b0, index, file->fd,
+ foffs0, &mrec0, offsets0);
+ if (UNIV_UNLIKELY(!b0 && mrec0)) {
+
+ goto corrupt;
+ }
+
+ if (mrec0) {
+ /* append all mrec0 to output */
+ for (;;) {
+ ROW_MERGE_WRITE_GET_NEXT(0, goto done0);
+ }
+ }
+done0:
+
+ /* The file offset points to the beginning of the last page
+ that has been read. Update it to point to the next block. */
+ (*foffs0)++;
+
+ mem_heap_free(heap);
+ return(row_merge_write_eof(&block[2], b2, of->fd, &of->offset)
+ != NULL);
+}
+
+/*************************************************************//**
Merge disk files.
@return DB_SUCCESS or error code */
-static
+static __attribute__((nonnull))
ulint
row_merge(
/*======*/
const dict_index_t* index, /*!< in: index being created */
merge_file_t* file, /*!< in/out: file containing
index entries */
- ulint half, /*!< in: half the file */
+ ulint* half, /*!< in/out: half the file */
row_merge_block_t* block, /*!< in/out: 3 buffers */
int* tmpfd, /*!< in/out: temporary file handle */
TABLE* table) /*!< in/out: MySQL table, for
@@ -1458,43 +1571,76 @@ row_merge(
ulint foffs1; /*!< second input offset */
ulint error; /*!< error code */
merge_file_t of; /*!< output file */
+ const ulint ihalf = *half;
+ /*!< half the input file */
+ ulint ohalf; /*!< half the output file */
UNIV_MEM_ASSERT_W(block[0], 3 * sizeof block[0]);
- ut_ad(half > 0);
+ ut_ad(ihalf > 0);
+ ut_ad(ihalf < file->offset);
of.fd = *tmpfd;
of.offset = 0;
+ of.n_rec = 0;
/* Merge blocks to the output file. */
+ ohalf = 0;
foffs0 = 0;
- foffs1 = half;
+ foffs1 = ihalf;
+
+ for (; foffs0 < ihalf && foffs1 < file->offset; foffs0++, foffs1++) {
+ ulint ahalf; /*!< arithmetic half the input file */
- for (; foffs0 < half && foffs1 < file->offset; foffs0++, foffs1++) {
error = row_merge_blocks(index, file, block,
&foffs0, &foffs1, &of, table);
if (error != DB_SUCCESS) {
return(error);
}
+
+ /* Record the offset of the output file when
+ approximately half the output has been generated. In
+ this way, the next invocation of row_merge() will
+ spend most of the time in this loop. The initial
+ estimate is ohalf==0. */
+ ahalf = file->offset / 2;
+ ut_ad(ohalf <= of.offset);
+
+ /* Improve the estimate until reaching half the input
+ file size, or we can not get any closer to it. All
+ comparands should be non-negative when !(ohalf < ahalf)
+ because ohalf <= of.offset. */
+ if (ohalf < ahalf || of.offset - ahalf < ohalf - ahalf) {
+ ohalf = of.offset;
+ }
}
- /* Copy the last block, if there is one. */
- while (foffs0 < half) {
- if (!row_merge_read(file->fd, foffs0++, block)
- || !row_merge_write(of.fd, of.offset++, block)) {
+ /* Copy the last blocks, if there are any. */
+
+ while (foffs0 < ihalf) {
+ if (!row_merge_blocks_copy(index, file, block, &foffs0, &of)) {
return(DB_CORRUPTION);
}
}
+
+ ut_ad(foffs0 == ihalf);
+
while (foffs1 < file->offset) {
- if (!row_merge_read(file->fd, foffs1++, block)
- || !row_merge_write(of.fd, of.offset++, block)) {
+ if (!row_merge_blocks_copy(index, file, block, &foffs1, &of)) {
return(DB_CORRUPTION);
}
}
+ ut_ad(foffs1 == file->offset);
+
+ if (UNIV_UNLIKELY(of.n_rec != file->n_rec)) {
+ return(DB_CORRUPTION);
+ }
+
/* Swap file descriptors for the next pass. */
*tmpfd = file->fd;
*file = of;
+ *half = ohalf;
UNIV_MEM_INVALID(block[0], 3 * sizeof block[0]);
@@ -1517,20 +1663,17 @@ row_merge_sort(
reporting erroneous key value
if applicable */
{
- ulint blksz; /*!< block size */
+ ulint half = file->offset / 2;
- for (blksz = 1; blksz < file->offset; blksz *= 2) {
- ulint half;
+ do {
ulint error;
- ut_ad(ut_is_2pow(blksz));
- half = ut_2pow_round((file->offset + (blksz - 1)) / 2, blksz);
- error = row_merge(index, file, half, block, tmpfd, table);
+ error = row_merge(index, file, &half, block, tmpfd, table);
if (error != DB_SUCCESS) {
return(error);
}
- }
+ } while (half < file->offset && half > 0);
return(DB_SUCCESS);
}
@@ -1909,6 +2052,7 @@ row_merge_file_create(
{
merge_file->fd = innobase_mysql_tmpfile();
merge_file->offset = 0;
+ merge_file->n_rec = 0;
}
/*********************************************************************//**
diff --git a/storage/innodb_plugin/srv/srv0srv.c b/storage/innodb_plugin/srv/srv0srv.c
index b41c8acfab4..2f54e9b3734 100644
--- a/storage/innodb_plugin/srv/srv0srv.c
+++ b/storage/innodb_plugin/srv/srv0srv.c
@@ -292,12 +292,6 @@ UNIV_INTERN ulint srv_buf_pool_flushed = 0;
reading of a disk page */
UNIV_INTERN ulint srv_buf_pool_reads = 0;
-/** Number of sequential read-aheads */
-UNIV_INTERN ulint srv_read_ahead_seq = 0;
-
-/** Number of random read-aheads */
-UNIV_INTERN ulint srv_read_ahead_rnd = 0;
-
/* structure to pass status variables to MySQL */
UNIV_INTERN export_struc export_vars;
@@ -464,8 +458,6 @@ static ulint srv_main_background_loops = 0;
static ulint srv_main_flush_loops = 0;
/* Log writes involving flush. */
static ulint srv_log_writes_and_flush = 0;
-/* Log writes not including flush. */
-static ulint srv_log_buffer_writes = 0;
/* This is only ever touched by the master thread. It records the
time when the last flush of log file has happened. The master
@@ -714,9 +706,8 @@ srv_print_master_thread_info(
srv_main_1_second_loops, srv_main_sleeps,
srv_main_10_second_loops, srv_main_background_loops,
srv_main_flush_loops);
- fprintf(file, "srv_master_thread log flush and writes: %lu "
- " log writes only: %lu\n",
- srv_log_writes_and_flush, srv_log_buffer_writes);
+ fprintf(file, "srv_master_thread log flush and writes: %lu\n",
+ srv_log_writes_and_flush);
}
/*********************************************************************//**
@@ -1877,14 +1868,16 @@ srv_export_innodb_status(void)
export_vars.innodb_data_reads = os_n_file_reads;
export_vars.innodb_data_writes = os_n_file_writes;
export_vars.innodb_data_written = srv_data_written;
- export_vars.innodb_buffer_pool_read_requests = buf_pool->n_page_gets;
+ export_vars.innodb_buffer_pool_read_requests = buf_pool->stat.n_page_gets;
export_vars.innodb_buffer_pool_write_requests
= srv_buf_pool_write_requests;
export_vars.innodb_buffer_pool_wait_free = srv_buf_pool_wait_free;
export_vars.innodb_buffer_pool_pages_flushed = srv_buf_pool_flushed;
export_vars.innodb_buffer_pool_reads = srv_buf_pool_reads;
- export_vars.innodb_buffer_pool_read_ahead_rnd = srv_read_ahead_rnd;
- export_vars.innodb_buffer_pool_read_ahead_seq = srv_read_ahead_seq;
+ export_vars.innodb_buffer_pool_read_ahead
+ = buf_pool->stat.n_ra_pages_read;
+ export_vars.innodb_buffer_pool_read_ahead_evicted
+ = buf_pool->stat.n_ra_pages_evicted;
export_vars.innodb_buffer_pool_pages_data
= UT_LIST_GET_LEN(buf_pool->LRU);
export_vars.innodb_buffer_pool_pages_dirty
@@ -1915,9 +1908,9 @@ srv_export_innodb_status(void)
export_vars.innodb_log_writes = srv_log_writes;
export_vars.innodb_dblwr_pages_written = srv_dblwr_pages_written;
export_vars.innodb_dblwr_writes = srv_dblwr_writes;
- export_vars.innodb_pages_created = buf_pool->n_pages_created;
- export_vars.innodb_pages_read = buf_pool->n_pages_read;
- export_vars.innodb_pages_written = buf_pool->n_pages_written;
+ export_vars.innodb_pages_created = buf_pool->stat.n_pages_created;
+ export_vars.innodb_pages_read = buf_pool->stat.n_pages_read;
+ export_vars.innodb_pages_written = buf_pool->stat.n_pages_written;
export_vars.innodb_row_lock_waits = srv_n_lock_wait_count;
export_vars.innodb_row_lock_current_waits
= srv_n_lock_wait_current_count;
@@ -2284,12 +2277,6 @@ srv_sync_log_buffer_in_background(void)
log_buffer_sync_in_background(TRUE);
srv_last_log_flush_time = current_time;
srv_log_writes_and_flush++;
- } else {
- /* Actually we don't need to write logs here.
- We are just being extra safe here by forcing
- the log buffer to log file. */
- log_buffer_sync_in_background(FALSE);
- srv_log_buffer_writes++;
}
}
@@ -2340,8 +2327,8 @@ loop:
srv_main_thread_op_info = "reserving kernel mutex";
- n_ios_very_old = log_sys->n_log_ios + buf_pool->n_pages_read
- + buf_pool->n_pages_written;
+ n_ios_very_old = log_sys->n_log_ios + buf_pool->stat.n_pages_read
+ + buf_pool->stat.n_pages_written;
mutex_enter(&kernel_mutex);
/* Store the user activity counter at the start of this loop */
@@ -2361,8 +2348,8 @@ loop:
skip_sleep = FALSE;
for (i = 0; i < 10; i++) {
- n_ios_old = log_sys->n_log_ios + buf_pool->n_pages_read
- + buf_pool->n_pages_written;
+ n_ios_old = log_sys->n_log_ios + buf_pool->stat.n_pages_read
+ + buf_pool->stat.n_pages_written;
srv_main_thread_op_info = "sleeping";
srv_main_1_second_loops++;
@@ -2401,8 +2388,8 @@ loop:
n_pend_ios = buf_get_n_pending_ios()
+ log_sys->n_pending_writes;
- n_ios = log_sys->n_log_ios + buf_pool->n_pages_read
- + buf_pool->n_pages_written;
+ n_ios = log_sys->n_log_ios + buf_pool->stat.n_pages_read
+ + buf_pool->stat.n_pages_written;
if (n_pend_ios < SRV_PEND_IO_THRESHOLD
&& (n_ios - n_ios_old < SRV_RECENT_IO_ACTIVITY)) {
srv_main_thread_op_info = "doing insert buffer merge";
@@ -2418,6 +2405,8 @@ loop:
/* Try to keep the number of modified pages in the
buffer pool under the limit wished by the user */
+ srv_main_thread_op_info =
+ "flushing buffer pool pages";
n_pages_flushed = buf_flush_batch(BUF_FLUSH_LIST,
PCT_IO(100),
IB_ULONGLONG_MAX);
@@ -2436,6 +2425,8 @@ loop:
ulint n_flush = buf_flush_get_desired_flush_rate();
if (n_flush) {
+ srv_main_thread_op_info =
+ "flushing buffer pool pages";
n_flush = ut_min(PCT_IO(100), n_flush);
n_pages_flushed =
buf_flush_batch(
@@ -2473,8 +2464,8 @@ loop:
are not required, and may be disabled. */
n_pend_ios = buf_get_n_pending_ios() + log_sys->n_pending_writes;
- n_ios = log_sys->n_log_ios + buf_pool->n_pages_read
- + buf_pool->n_pages_written;
+ n_ios = log_sys->n_log_ios + buf_pool->stat.n_pages_read
+ + buf_pool->stat.n_pages_written;
srv_main_10_second_loops++;
if (n_pend_ios < SRV_PEND_IO_THRESHOLD
diff --git a/storage/innodb_plugin/srv/srv0start.c b/storage/innodb_plugin/srv/srv0start.c
index f7447499b3b..e4278f6b5a8 100644
--- a/storage/innodb_plugin/srv/srv0start.c
+++ b/storage/innodb_plugin/srv/srv0start.c
@@ -1106,7 +1106,7 @@ innobase_start_or_create_for_mysql(void)
"InnoDB: The InnoDB memory heap is disabled\n");
}
-#ifdef HAVE_GCC_ATOMIC_BUILTINS
+#ifdef HAVE_IB_GCC_ATOMIC_BUILTINS
# ifdef INNODB_RW_LOCKS_USE_ATOMICS
fprintf(stderr,
"InnoDB: Mutexes and rw_locks use GCC atomic builtins.\n");
@@ -1130,10 +1130,10 @@ innobase_start_or_create_for_mysql(void)
fprintf(stderr,
"InnoDB: Mutexes use Windows interlocked functions.\n");
# endif /* INNODB_RW_LOCKS_USE_ATOMICS */
-#else /* HAVE_GCC_ATOMIC_BUILTINS */
+#else /* HAVE_IB_GCC_ATOMIC_BUILTINS */
fprintf(stderr,
"InnoDB: Neither mutexes nor rw_locks use GCC atomic builtins.\n");
-#endif /* HAVE_GCC_ATOMIC_BUILTINS */
+#endif /* HAVE_IB_GCC_ATOMIC_BUILTINS */
/* Since InnoDB does not currently clean up all its internal data
structures in MySQL Embedded Server Library server_end(), we
diff --git a/storage/innodb_plugin/ut/ut0ut.c b/storage/innodb_plugin/ut/ut0ut.c
index e4cc226fbad..a01a0470938 100644
--- a/storage/innodb_plugin/ut/ut0ut.c
+++ b/storage/innodb_plugin/ut/ut0ut.c
@@ -200,6 +200,23 @@ ut_time_us(
}
/**********************************************************//**
+Returns the number of milliseconds since some epoch. The
+value may wrap around. It should only be used for heuristic
+purposes.
+@return ms since epoch */
+UNIV_INTERN
+uint
+ut_time_ms(void)
+/*============*/
+{
+ struct timeval tv;
+
+ ut_gettimeofday(&tv, NULL);
+
+ return((uint) tv.tv_sec * 1000 + tv.tv_usec / 1000);
+}
+
+/**********************************************************//**
Returns the difference of two times in seconds.
@return time2 - time1 expressed in seconds */
UNIV_INTERN