summaryrefslogtreecommitdiff
path: root/storage/xtradb/buf
diff options
context:
space:
mode:
Diffstat (limited to 'storage/xtradb/buf')
-rw-r--r--storage/xtradb/buf/buf0buddy.c9
-rw-r--r--storage/xtradb/buf/buf0buf.c101
-rw-r--r--storage/xtradb/buf/buf0flu.c284
-rw-r--r--storage/xtradb/buf/buf0lru.c141
-rw-r--r--storage/xtradb/buf/buf0rea.c54
5 files changed, 478 insertions, 111 deletions
diff --git a/storage/xtradb/buf/buf0buddy.c b/storage/xtradb/buf/buf0buddy.c
index d5e45745757..e4a79026d3a 100644
--- a/storage/xtradb/buf/buf0buddy.c
+++ b/storage/xtradb/buf/buf0buddy.c
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 2006, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 2006, 2010, Innobase Oy. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -430,6 +430,8 @@ buf_buddy_relocate_block(
}
mutex_exit(&flush_list_mutex);
+ UNIV_MEM_INVALID(bpage, sizeof *bpage);
+
mutex_exit(&buf_pool_zip_mutex);
mutex_exit(&zip_free_mutex);
return(TRUE);
@@ -567,7 +569,12 @@ success:
}
} else if (i == buf_buddy_get_slot(sizeof(buf_page_t))) {
/* This must be a buf_page_t object. */
+#if UNIV_WORD_SIZE == 4
+ /* On 32-bit systems, there is no padding in
+ buf_page_t. On other systems, Valgrind could complain
+ about uninitialized pad bytes. */
UNIV_MEM_ASSERT_RW(src, size);
+#endif
mutex_exit(&zip_free_mutex);
diff --git a/storage/xtradb/buf/buf0buf.c b/storage/xtradb/buf/buf0buf.c
index 79a9488e339..a5a9e1d9004 100644
--- a/storage/xtradb/buf/buf0buf.c
+++ b/storage/xtradb/buf/buf0buf.c
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1995, 2010, Innobase Oy. All Rights Reserved.
Copyright (c) 2008, Google Inc.
Portions of this file contain modifications contributed and copyrighted by
@@ -277,6 +277,8 @@ the read requests for the whole area.
#ifndef UNIV_HOTBACKUP
/** Value in microseconds */
static const int WAIT_FOR_READ = 5000;
+/** Number of attemtps made to read in a page in the buffer pool */
+static const ulint BUF_PAGE_READ_MAX_RETRIES = 100;
/** The buffer buf_pool of the database */
UNIV_INTERN buf_pool_t* buf_pool = NULL;
@@ -826,7 +828,7 @@ buf_chunk_init(
buf_block_init(block, frame);
-#ifdef HAVE_purify
+#ifdef HAVE_valgrind
/* Wipe contents of frame to eliminate a Purify warning */
memset(block->frame, '\0', UNIV_PAGE_SIZE);
#endif
@@ -1150,7 +1152,9 @@ buf_pool_drop_hash_index(void)
when we have an x-latch on btr_search_latch;
see the comment in buf0buf.h */
- if (!block->is_hashed) {
+ if (buf_block_get_state(block)
+ != BUF_BLOCK_FILE_PAGE
+ || !block->is_hashed) {
continue;
}
@@ -1283,8 +1287,6 @@ buf_relocate(
HASH_DELETE(buf_page_t, hash, buf_pool->page_hash, fold, bpage);
HASH_INSERT(buf_page_t, hash, buf_pool->page_hash, fold, dpage);
-
- UNIV_MEM_INVALID(bpage, sizeof *bpage);
}
/********************************************************************//**
@@ -1980,14 +1982,14 @@ buf_zip_decompress(
buf_block_t* block, /*!< in/out: block */
ibool check) /*!< in: TRUE=verify the page checksum */
{
- const byte* frame = block->page.zip.data;
+ const byte* frame = block->page.zip.data;
+ ulint stamp_checksum = mach_read_from_4(
+ frame + FIL_PAGE_SPACE_OR_CHKSUM);
ut_ad(buf_block_get_zip_size(block));
ut_a(buf_block_get_space(block) != 0);
- if (UNIV_LIKELY(check)) {
- ulint stamp_checksum = mach_read_from_4(
- frame + FIL_PAGE_SPACE_OR_CHKSUM);
+ if (UNIV_LIKELY(check && stamp_checksum != BUF_NO_CHECKSUM_MAGIC)) {
ulint calc_checksum = page_zip_calc_checksum(
frame, page_zip_get_size(&block->page.zip));
@@ -2196,6 +2198,7 @@ buf_page_get_gen(
unsigned access_time;
ulint fix_type;
ibool must_read;
+ ulint retries = 0;
mutex_t* block_mutex;
trx_t* trx = NULL;
ulint sec;
@@ -2204,6 +2207,7 @@ buf_page_get_gen(
ib_uint64_t finish_time;
ut_ad(mtr);
+ ut_ad(mtr->state == MTR_ACTIVE);
ut_ad((rw_latch == RW_S_LATCH)
|| (rw_latch == RW_X_LATCH)
|| (rw_latch == RW_NO_LATCH));
@@ -2271,7 +2275,29 @@ loop2:
return(NULL);
}
- buf_read_page(space, zip_size, offset, trx);
+ if (buf_read_page(space, zip_size, offset, trx)) {
+ retries = 0;
+ } else if (retries < BUF_PAGE_READ_MAX_RETRIES) {
+ ++retries;
+ } else {
+ fprintf(stderr, "InnoDB: Error: Unable"
+ " to read tablespace %lu page no"
+ " %lu into the buffer pool after"
+ " %lu attempts\n"
+ "InnoDB: The most probable cause"
+ " of this error may be that the"
+ " table has been corrupted.\n"
+ "InnoDB: You can try to fix this"
+ " problem by using"
+ " innodb_force_recovery.\n"
+ "InnoDB: Please see reference manual"
+ " for more details.\n"
+ "InnoDB: Aborting...\n",
+ space, offset,
+ BUF_PAGE_READ_MAX_RETRIES);
+
+ ut_error;
+ }
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
ut_a(++buf_dbg_counter % 37 || buf_validate());
@@ -2414,22 +2440,8 @@ wait_until_unfixed:
ut_ad(!block->page.in_flush_list);
} else {
/* Relocate buf_pool->flush_list. */
- buf_page_t* b;
-
- b = UT_LIST_GET_PREV(flush_list, &block->page);
- ut_ad(block->page.in_flush_list);
- UT_LIST_REMOVE(flush_list, buf_pool->flush_list,
- &block->page);
-
- if (b) {
- UT_LIST_INSERT_AFTER(
- flush_list, buf_pool->flush_list, b,
- &block->page);
- } else {
- UT_LIST_ADD_FIRST(
- flush_list, buf_pool->flush_list,
- &block->page);
- }
+ buf_flush_relocate_on_flush_list(bpage,
+ &block->page);
}
mutex_exit(&flush_list_mutex);
@@ -2447,6 +2459,9 @@ wait_until_unfixed:
block->page.buf_fix_count = 1;
buf_block_set_io_fix(block, BUF_IO_READ);
rw_lock_x_lock(&block->lock);
+
+ UNIV_MEM_INVALID(bpage, sizeof *bpage);
+
mutex_exit(block_mutex);
mutex_exit(&buf_pool_zip_mutex);
@@ -2461,8 +2476,9 @@ wait_until_unfixed:
/* Decompress the page and apply buffered operations
while not holding buf_pool_mutex or block->mutex. */
success = buf_zip_decompress(block, srv_use_checksums);
+ ut_a(success);
- if (UNIV_LIKELY(success)) {
+ if (UNIV_LIKELY(!recv_no_ibuf_operations)) {
ibuf_merge_or_delete_for_page(block, space, offset,
zip_size, TRUE);
}
@@ -2478,14 +2494,6 @@ wait_until_unfixed:
buf_pool->n_pend_unzip--;
mutex_exit(&buf_pool_mutex);
rw_lock_x_unlock(&block->lock);
-
- if (UNIV_UNLIKELY(!success)) {
-
- //buf_pool_mutex_exit();
- mutex_exit(block_mutex);
- return(NULL);
- }
-
break;
case BUF_BLOCK_ZIP_FREE:
@@ -2500,7 +2508,12 @@ wait_until_unfixed:
ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
//mutex_enter(&block->mutex);
+#if UNIV_WORD_SIZE == 4
+ /* On 32-bit systems, there is no padding in buf_page_t. On
+ other systems, Valgrind could complain about uninitialized pad
+ bytes. */
UNIV_MEM_ASSERT_RW(&block->page, sizeof block->page);
+#endif
buf_block_buf_fix_inc(block, file, line);
@@ -2603,8 +2616,8 @@ page.
@return TRUE if success */
UNIV_INTERN
ibool
-buf_page_optimistic_get_func(
-/*=========================*/
+buf_page_optimistic_get(
+/*====================*/
ulint rw_latch,/*!< in: RW_S_LATCH, RW_X_LATCH */
buf_block_t* block, /*!< in: guessed buffer block */
ib_uint64_t modify_clock,/*!< in: modify clock value if mode is
@@ -2618,7 +2631,9 @@ buf_page_optimistic_get_func(
ulint fix_type;
trx_t* trx = NULL;
- ut_ad(mtr && block);
+ ut_ad(block);
+ ut_ad(mtr);
+ ut_ad(mtr->state == MTR_ACTIVE);
ut_ad((rw_latch == RW_S_LATCH) || (rw_latch == RW_X_LATCH));
mutex_enter(&block->mutex);
@@ -2738,6 +2753,7 @@ buf_page_get_known_nowait(
trx_t* trx = NULL;
ut_ad(mtr);
+ ut_ad(mtr->state == MTR_ACTIVE);
ut_ad((rw_latch == RW_S_LATCH) || (rw_latch == RW_X_LATCH));
mutex_enter(&block->mutex);
@@ -2846,6 +2862,9 @@ buf_page_try_get_func(
ibool success;
ulint fix_type;
+ ut_ad(mtr);
+ ut_ad(mtr->state == MTR_ACTIVE);
+
//buf_pool_mutex_enter();
rw_lock_s_lock(&page_hash_latch);
block = buf_block_hash_get(space_id, page_no);
@@ -3249,6 +3268,7 @@ buf_page_create(
ulint time_ms = ut_time_ms();
ut_ad(mtr);
+ ut_ad(mtr->state == MTR_ACTIVE);
ut_ad(space || !zip_size);
free_block = buf_LRU_get_free_block(0);
@@ -3431,7 +3451,8 @@ buf_page_io_complete(
read_space_id = mach_read_from_4(
frame + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID);
- if (bpage->space == TRX_SYS_SPACE
+ if ((bpage->space == TRX_SYS_SPACE
+ || (srv_doublewrite_file && bpage->space == TRX_DOUBLEWRITE_SPACE))
&& trx_doublewrite_page_inside(bpage->offset)) {
ut_print_timestamp(stderr);
@@ -3503,7 +3524,7 @@ corrupt:
REFMAN "forcing-recovery.html\n"
"InnoDB: about forcing recovery.\n", stderr);
- if (srv_pass_corrupt_table && bpage->space > 0
+ if (srv_pass_corrupt_table && !trx_sys_sys_space(bpage->space)
&& bpage->space < SRV_LOG_SPACE_FIRST_ID) {
fprintf(stderr,
"InnoDB: space %u will be treated as corrupt.\n",
diff --git a/storage/xtradb/buf/buf0flu.c b/storage/xtradb/buf/buf0flu.c
index 1735f6ac3cb..17588475bbf 100644
--- a/storage/xtradb/buf/buf0flu.c
+++ b/storage/xtradb/buf/buf0flu.c
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1995, 2010, Innobase Oy. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -88,6 +88,146 @@ buf_flush_validate_low(void);
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
/********************************************************************//**
+Insert a block in the flush_rbt and returns a pointer to its
+predecessor or NULL if no predecessor. The ordering is maintained
+on the basis of the <oldest_modification, space, offset> key.
+@return pointer to the predecessor or NULL if no predecessor. */
+static
+buf_page_t*
+buf_flush_insert_in_flush_rbt(
+/*==========================*/
+ buf_page_t* bpage) /*!< in: bpage to be inserted. */
+{
+ buf_page_t* prev = NULL;
+ const ib_rbt_node_t* c_node;
+ const ib_rbt_node_t* p_node;
+
+ //ut_ad(buf_pool_mutex_own());
+ ut_ad(mutex_own(&flush_list_mutex));
+
+ /* Insert this buffer into the rbt. */
+ c_node = rbt_insert(buf_pool->flush_rbt, &bpage, &bpage);
+ ut_a(c_node != NULL);
+
+ /* Get the predecessor. */
+ p_node = rbt_prev(buf_pool->flush_rbt, c_node);
+
+ if (p_node != NULL) {
+ prev = *rbt_value(buf_page_t*, p_node);
+ ut_a(prev != NULL);
+ }
+
+ return(prev);
+}
+
+/********************************************************************//**
+Delete a bpage from the flush_rbt. */
+static
+void
+buf_flush_delete_from_flush_rbt(
+/*============================*/
+ buf_page_t* bpage) /*!< in: bpage to be removed. */
+{
+
+ ibool ret = FALSE;
+
+ //ut_ad(buf_pool_mutex_own());
+ ut_ad(mutex_own(&flush_list_mutex));
+ ret = rbt_delete(buf_pool->flush_rbt, &bpage);
+ ut_ad(ret);
+}
+
+/********************************************************************//**
+Compare two modified blocks in the buffer pool. The key for comparison
+is:
+key = <oldest_modification, space, offset>
+This comparison is used to maintian ordering of blocks in the
+buf_pool->flush_rbt.
+Note that for the purpose of flush_rbt, we only need to order blocks
+on the oldest_modification. The other two fields are used to uniquely
+identify the blocks.
+@return < 0 if b2 < b1, 0 if b2 == b1, > 0 if b2 > b1 */
+static
+int
+buf_flush_block_cmp(
+/*================*/
+ const void* p1, /*!< in: block1 */
+ const void* p2) /*!< in: block2 */
+{
+ int ret;
+ const buf_page_t* b1;
+ const buf_page_t* b2;
+
+ ut_ad(p1 != NULL);
+ ut_ad(p2 != NULL);
+
+ b1 = *(const buf_page_t**) p1;
+ b2 = *(const buf_page_t**) p2;
+
+ ut_ad(b1 != NULL);
+ ut_ad(b2 != NULL);
+
+ ut_ad(b1->in_flush_list);
+ ut_ad(b2->in_flush_list);
+
+ if (b2->oldest_modification
+ > b1->oldest_modification) {
+ return(1);
+ }
+
+ if (b2->oldest_modification
+ < b1->oldest_modification) {
+ return(-1);
+ }
+
+ /* If oldest_modification is same then decide on the space. */
+ ret = (int)(b2->space - b1->space);
+
+ /* Or else decide ordering on the offset field. */
+ return(ret ? ret : (int)(b2->offset - b1->offset));
+}
+
+/********************************************************************//**
+Initialize the red-black tree to speed up insertions into the flush_list
+during recovery process. Should be called at the start of recovery
+process before any page has been read/written. */
+UNIV_INTERN
+void
+buf_flush_init_flush_rbt(void)
+/*==========================*/
+{
+ //buf_pool_mutex_enter();
+ mutex_enter(&flush_list_mutex);
+
+ /* Create red black tree for speedy insertions in flush list. */
+ buf_pool->flush_rbt = rbt_create(sizeof(buf_page_t*),
+ buf_flush_block_cmp);
+ //buf_pool_mutex_exit();
+ mutex_exit(&flush_list_mutex);
+}
+
+/********************************************************************//**
+Frees up the red-black tree. */
+UNIV_INTERN
+void
+buf_flush_free_flush_rbt(void)
+/*==========================*/
+{
+ //buf_pool_mutex_enter();
+ mutex_enter(&flush_list_mutex);
+
+#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
+ ut_a(buf_flush_validate_low());
+#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
+
+ rbt_free(buf_pool->flush_rbt);
+ buf_pool->flush_rbt = NULL;
+
+ //buf_pool_mutex_exit();
+ mutex_exit(&flush_list_mutex);
+}
+
+/********************************************************************//**
Inserts a modified block into the flush list. */
UNIV_INTERN
void
@@ -102,6 +242,13 @@ buf_flush_insert_into_flush_list(
|| (UT_LIST_GET_FIRST(buf_pool->flush_list)->oldest_modification
<= block->page.oldest_modification));
+ /* If we are in the recovery then we need to update the flush
+ red-black tree as well. */
+ if (UNIV_LIKELY_NULL(buf_pool->flush_rbt)) {
+ buf_flush_insert_sorted_into_flush_list(block);
+ return;
+ }
+
ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
ut_ad(block->page.in_LRU_list);
ut_ad(block->page.in_page_hash);
@@ -140,26 +287,27 @@ buf_flush_insert_sorted_into_flush_list(
ut_d(block->page.in_flush_list = TRUE);
prev_b = NULL;
- b = UT_LIST_GET_FIRST(buf_pool->flush_list);
- if (srv_fast_recovery) {
- /* speed hack */
- if (b == NULL || b->oldest_modification < block->page.oldest_modification) {
- UT_LIST_ADD_FIRST(flush_list, buf_pool->flush_list, &block->page);
+ /* For the most part when this function is called the flush_rbt
+ should not be NULL. In a very rare boundary case it is possible
+ that the flush_rbt has already been freed by the recovery thread
+ before the last page was hooked up in the flush_list by the
+ io-handler thread. In that case we'll just do a simple
+ linear search in the else block. */
+ if (buf_pool->flush_rbt) {
+
+ prev_b = buf_flush_insert_in_flush_rbt(&block->page);
+
} else {
- b = UT_LIST_GET_LAST(buf_pool->flush_list);
- if (b->oldest_modification < block->page.oldest_modification) {
- /* align oldest_modification not to sort */
- block->page.oldest_modification = b->oldest_modification;
+
+ b = UT_LIST_GET_FIRST(buf_pool->flush_list);
+
+ while (b && b->oldest_modification
+ > block->page.oldest_modification) {
+ ut_ad(b->in_flush_list);
+ prev_b = b;
+ b = UT_LIST_GET_NEXT(flush_list, b);
}
- UT_LIST_ADD_LAST(flush_list, buf_pool->flush_list, &block->page);
- }
- } else {
- /* normal */
- while (b && b->oldest_modification > block->page.oldest_modification) {
- ut_ad(b->in_flush_list);
- prev_b = b;
- b = UT_LIST_GET_NEXT(flush_list, b);
}
if (prev_b == NULL) {
@@ -168,7 +316,6 @@ buf_flush_insert_sorted_into_flush_list(
UT_LIST_INSERT_AFTER(flush_list, buf_pool->flush_list,
prev_b, &block->page);
}
- }
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
ut_a(buf_flush_validate_low());
@@ -262,7 +409,6 @@ buf_flush_remove(
mutex_enter(&flush_list_mutex);
ut_ad(bpage->in_flush_list);
- ut_d(bpage->in_flush_list = FALSE);
switch (buf_page_get_state(bpage)) {
case BUF_BLOCK_ZIP_PAGE:
@@ -285,6 +431,15 @@ buf_flush_remove(
break;
}
+ /* If the flush_rbt is active then delete from it as well. */
+ if (UNIV_LIKELY_NULL(buf_pool->flush_rbt)) {
+ buf_flush_delete_from_flush_rbt(bpage);
+ }
+
+ /* Must be done after we have removed it from the flush_rbt
+ because we assert on in_flush_list in comparison function. */
+ ut_d(bpage->in_flush_list = FALSE);
+
bpage->oldest_modification = 0;
ut_d(UT_LIST_VALIDATE(flush_list, buf_page_t, buf_pool->flush_list,
@@ -293,6 +448,64 @@ buf_flush_remove(
}
/********************************************************************//**
+Relocates a buffer control block on the flush_list.
+Note that it is assumed that the contents of bpage has already been
+copied to dpage. */
+UNIV_INTERN
+void
+buf_flush_relocate_on_flush_list(
+/*=============================*/
+ buf_page_t* bpage, /*!< in/out: control block being moved */
+ buf_page_t* dpage) /*!< in/out: destination block */
+{
+ buf_page_t* prev;
+ buf_page_t* prev_b = NULL;
+
+ //ut_ad(buf_pool_mutex_own());
+ ut_ad(mutex_own(&flush_list_mutex));
+
+ ut_ad(mutex_own(buf_page_get_mutex(bpage)));
+
+ ut_ad(bpage->in_flush_list);
+ ut_ad(dpage->in_flush_list);
+
+ /* If recovery is active we must swap the control blocks in
+ the flush_rbt as well. */
+ if (UNIV_LIKELY_NULL(buf_pool->flush_rbt)) {
+ buf_flush_delete_from_flush_rbt(bpage);
+ prev_b = buf_flush_insert_in_flush_rbt(dpage);
+ }
+
+ /* Must be done after we have removed it from the flush_rbt
+ because we assert on in_flush_list in comparison function. */
+ ut_d(bpage->in_flush_list = FALSE);
+
+ prev = UT_LIST_GET_PREV(flush_list, bpage);
+ UT_LIST_REMOVE(flush_list, buf_pool->flush_list, bpage);
+
+ if (prev) {
+ ut_ad(prev->in_flush_list);
+ UT_LIST_INSERT_AFTER(
+ flush_list,
+ buf_pool->flush_list,
+ prev, dpage);
+ } else {
+ UT_LIST_ADD_FIRST(
+ flush_list,
+ buf_pool->flush_list,
+ dpage);
+ }
+
+ /* Just an extra check. Previous in flush_list
+ should be the same control block as in flush_rbt. */
+ ut_a(!buf_pool->flush_rbt || prev_b == prev);
+
+#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
+ ut_a(buf_flush_validate_low());
+#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
+}
+
+/********************************************************************//**
Updates the flush system data structures when a write is completed. */
UNIV_INTERN
void
@@ -452,7 +665,8 @@ corrupted_page:
write_buf = trx_doublewrite->write_buf;
i = 0;
- fil_io(OS_FILE_WRITE, TRUE, TRX_SYS_SPACE, 0,
+ fil_io(OS_FILE_WRITE, TRUE,
+ (srv_doublewrite_file ? TRX_DOUBLEWRITE_SPACE : TRX_SYS_SPACE), 0,
trx_doublewrite->block1, 0, len,
(void*) write_buf, NULL);
@@ -489,7 +703,8 @@ corrupted_page:
+ TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * UNIV_PAGE_SIZE;
ut_ad(i == TRX_SYS_DOUBLEWRITE_BLOCK_SIZE);
- fil_io(OS_FILE_WRITE, TRUE, TRX_SYS_SPACE, 0,
+ fil_io(OS_FILE_WRITE, TRUE,
+ (srv_doublewrite_file ? TRX_DOUBLEWRITE_SPACE : TRX_SYS_SPACE), 0,
trx_doublewrite->block2, 0, len,
(void*) write_buf, NULL);
@@ -519,7 +734,7 @@ corrupted_page:
flush:
/* Now flush the doublewrite buffer data to disk */
- fil_flush(TRX_SYS_SPACE);
+ fil_flush(srv_doublewrite_file ? TRX_DOUBLEWRITE_SPACE : TRX_SYS_SPACE);
/* We know that the writes have been flushed to disk now
and in recovery we will find them in the doublewrite buffer
@@ -1473,24 +1688,45 @@ ibool
buf_flush_validate_low(void)
/*========================*/
{
- buf_page_t* bpage;
+ buf_page_t* bpage;
+ const ib_rbt_node_t* rnode = NULL;
UT_LIST_VALIDATE(flush_list, buf_page_t, buf_pool->flush_list,
ut_ad(ut_list_node_313->in_flush_list));
bpage = UT_LIST_GET_FIRST(buf_pool->flush_list);
+ /* If we are in recovery mode i.e.: flush_rbt != NULL
+ then each block in the flush_list must also be present
+ in the flush_rbt. */
+ if (UNIV_LIKELY_NULL(buf_pool->flush_rbt)) {
+ rnode = rbt_first(buf_pool->flush_rbt);
+ }
+
while (bpage != NULL) {
const ib_uint64_t om = bpage->oldest_modification;
ut_ad(bpage->in_flush_list);
//ut_a(buf_page_in_file(bpage)); /* optimistic */
ut_a(om > 0);
+ if (UNIV_LIKELY_NULL(buf_pool->flush_rbt)) {
+ ut_a(rnode);
+ buf_page_t* rpage = *rbt_value(buf_page_t*,
+ rnode);
+ ut_a(rpage);
+ ut_a(rpage == bpage);
+ rnode = rbt_next(buf_pool->flush_rbt, rnode);
+ }
+
bpage = UT_LIST_GET_NEXT(flush_list, bpage);
ut_a(!bpage || om >= bpage->oldest_modification);
}
+ /* By this time we must have exhausted the traversal of
+ flush_rbt (if active) as well. */
+ ut_a(rnode == NULL);
+
return(TRUE);
}
diff --git a/storage/xtradb/buf/buf0lru.c b/storage/xtradb/buf/buf0lru.c
index 58e2c23275b..65fdf342e4f 100644
--- a/storage/xtradb/buf/buf0lru.c
+++ b/storage/xtradb/buf/buf0lru.c
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1995, 2010, Innobase Oy. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -371,21 +371,39 @@ scan_again:
bpage = UT_LIST_GET_LAST(buf_pool->LRU);
while (bpage != NULL) {
- mutex_t* block_mutex = buf_page_get_mutex_enter(bpage);
buf_page_t* prev_bpage;
+ ibool prev_bpage_buf_fix = FALSE;
ut_a(buf_page_in_file(bpage));
prev_bpage = UT_LIST_GET_PREV(LRU, bpage);
- if (!block_mutex) {
- bpage = prev_bpage;
- continue;
- }
+ /* bpage->space and bpage->io_fix are protected by
+ buf_pool_mutex and block_mutex. It is safe to check
+ them while holding buf_pool_mutex only. */
+
+ if (buf_page_get_space(bpage) != id) {
+ /* Skip this block, as it does not belong to
+ the space that is being invalidated. */
+ } else if (buf_page_get_io_fix(bpage) != BUF_IO_NONE) {
+ /* We cannot remove this page during this scan
+ yet; maybe the system is currently reading it
+ in, or flushing the modifications to the file */
+
+ all_freed = FALSE;
+ } else {
+ mutex_t* block_mutex = buf_page_get_mutex_enter(bpage);
- if (buf_page_get_space(bpage) == id) {
- if (bpage->buf_fix_count > 0
- || buf_page_get_io_fix(bpage) != BUF_IO_NONE) {
+ if (!block_mutex) {
+ /* It may be impossible case...
+ Something wrong, so will be scan_again */
+
+ all_freed = FALSE;
+
+ goto next_page_no_mutex;
+ }
+
+ if (bpage->buf_fix_count > 0) {
/* We cannot remove this page during
this scan yet; maybe the system is
@@ -405,8 +423,40 @@ scan_again:
(ulong) buf_page_get_page_no(bpage));
}
#endif
- if (buf_page_get_state(bpage) == BUF_BLOCK_FILE_PAGE
- && ((buf_block_t*) bpage)->is_hashed) {
+ if (buf_page_get_state(bpage) != BUF_BLOCK_FILE_PAGE) {
+ /* This is a compressed-only block
+ descriptor. Ensure that prev_bpage
+ cannot be relocated when bpage is freed. */
+ if (UNIV_LIKELY(prev_bpage != NULL)) {
+ switch (buf_page_get_state(
+ prev_bpage)) {
+ case BUF_BLOCK_FILE_PAGE:
+ /* Descriptors of uncompressed
+ blocks will not be relocated,
+ because we are holding the
+ buf_pool_mutex. */
+ break;
+ case BUF_BLOCK_ZIP_PAGE:
+ case BUF_BLOCK_ZIP_DIRTY:
+ /* Descriptors of compressed-
+ only blocks can be relocated,
+ unless they are buffer-fixed.
+ Because both bpage and
+ prev_bpage are protected by
+ buf_pool_zip_mutex, it is
+ not necessary to acquire
+ further mutexes. */
+ ut_ad(&buf_pool_zip_mutex
+ == block_mutex);
+ ut_ad(mutex_own(block_mutex));
+ prev_bpage_buf_fix = TRUE;
+ prev_bpage->buf_fix_count++;
+ break;
+ default:
+ ut_error;
+ }
+ }
+ } else if (((buf_block_t*) bpage)->is_hashed) {
ulint page_no;
ulint zip_size;
@@ -432,7 +482,8 @@ scan_again:
buf_flush_remove(bpage);
}
- /* Remove from the LRU list */
+ /* Remove from the LRU list. */
+
if (buf_LRU_block_remove_hashed_page(bpage, TRUE)
!= BUF_BLOCK_ZIP_FREE) {
buf_LRU_block_free_hashed_page((buf_block_t*)
@@ -444,18 +495,27 @@ scan_again:
ut_ad(block_mutex == &buf_pool_zip_mutex);
ut_ad(!mutex_own(block_mutex));
- /* The compressed block descriptor
- (bpage) has been deallocated and
- block_mutex released. Also,
- buf_buddy_free() may have relocated
- prev_bpage. Rescan the LRU list. */
+ if (prev_bpage_buf_fix) {
+ /* We temporarily buffer-fixed
+ prev_bpage, so that
+ buf_buddy_free() could not
+ relocate it, in case it was a
+ compressed-only block
+ descriptor. */
+
+ mutex_enter(block_mutex);
+ ut_ad(prev_bpage->buf_fix_count > 0);
+ prev_bpage->buf_fix_count--;
+ mutex_exit(block_mutex);
+ }
- bpage = UT_LIST_GET_LAST(buf_pool->LRU);
- continue;
+ goto next_page_no_mutex;
}
- }
next_page:
- mutex_exit(block_mutex);
+ mutex_exit(block_mutex);
+ }
+
+next_page_no_mutex:
bpage = prev_bpage;
}
@@ -1425,7 +1485,12 @@ buf_LRU_free_block(
ut_ad(buf_page_in_file(bpage));
//ut_ad(bpage->in_LRU_list);
ut_ad(!bpage->in_flush_list == !bpage->oldest_modification);
+#if UNIV_WORD_SIZE == 4
+ /* On 32-bit systems, there is no padding in buf_page_t. On
+ other systems, Valgrind could complain about uninitialized pad
+ bytes. */
UNIV_MEM_ASSERT_RW(bpage, sizeof *bpage);
+#endif
if (!bpage->in_LRU_list || !block_mutex || !buf_page_can_relocate(bpage)) {
@@ -1558,8 +1623,13 @@ not_freed:
ut_ad(prev_b->in_LRU_list);
ut_ad(buf_page_in_file(prev_b));
+#if UNIV_WORD_SIZE == 4
+ /* On 32-bit systems, there is no
+ padding in buf_page_t. On other
+ systems, Valgrind could complain about
+ uninitialized pad bytes. */
UNIV_MEM_ASSERT_RW(prev_b, sizeof *prev_b);
-
+#endif
UT_LIST_INSERT_AFTER(LRU, buf_pool->LRU,
prev_b, b);
@@ -1600,26 +1670,8 @@ not_freed:
if (b->state == BUF_BLOCK_ZIP_PAGE) {
buf_LRU_insert_zip_clean(b);
} else {
- buf_page_t* prev;
-
- ut_ad(b->in_flush_list);
- ut_d(bpage->in_flush_list = FALSE);
-
- prev = UT_LIST_GET_PREV(flush_list, b);
- UT_LIST_REMOVE(flush_list, buf_pool->flush_list, b);
-
- if (prev) {
- ut_ad(prev->in_flush_list);
- UT_LIST_INSERT_AFTER(
- flush_list,
- buf_pool->flush_list,
- prev, b);
- } else {
- UT_LIST_ADD_FIRST(
- flush_list,
- buf_pool->flush_list,
- b);
- }
+ /* Relocate on buf_pool->flush_list. */
+ buf_flush_relocate_on_flush_list(bpage, b);
}
mutex_exit(&flush_list_mutex);
@@ -1792,7 +1844,12 @@ buf_LRU_block_remove_hashed_page(
ut_a(buf_page_get_io_fix(bpage) == BUF_IO_NONE);
ut_a(bpage->buf_fix_count == 0);
+#if UNIV_WORD_SIZE == 4
+ /* On 32-bit systems, there is no padding in
+ buf_page_t. On other systems, Valgrind could complain
+ about uninitialized pad bytes. */
UNIV_MEM_ASSERT_RW(bpage, sizeof *bpage);
+#endif
buf_LRU_remove_block(bpage);
diff --git a/storage/xtradb/buf/buf0rea.c b/storage/xtradb/buf/buf0rea.c
index e5d04df797f..59de70d9a8a 100644
--- a/storage/xtradb/buf/buf0rea.c
+++ b/storage/xtradb/buf/buf0rea.c
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1995, 2010, Innobase Oy. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -86,7 +86,9 @@ buf_read_page_low(
wake_later = mode & OS_AIO_SIMULATED_WAKE_LATER;
mode = mode & ~OS_AIO_SIMULATED_WAKE_LATER;
- if (trx_doublewrite && space == TRX_SYS_SPACE
+ if (trx_doublewrite
+ && (space == TRX_SYS_SPACE
+ || (srv_doublewrite_file && space == TRX_DOUBLEWRITE_SPACE))
&& ( (offset >= trx_doublewrite->block1
&& offset < trx_doublewrite->block1
+ TRX_SYS_DOUBLEWRITE_BLOCK_SIZE)
@@ -656,6 +658,50 @@ buf_read_recv_pages(
/* It is a single table tablespace and the .ibd file is
missing: do nothing */
+ /* the log records should be treated here same reason
+ for http://bugs.mysql.com/bug.php?id=43948 */
+
+ if (recv_recovery_is_on()) {
+ recv_addr_t* recv_addr;
+
+ mutex_enter(&(recv_sys->mutex));
+
+ if (recv_sys->apply_log_recs == FALSE) {
+ mutex_exit(&(recv_sys->mutex));
+ goto not_to_recover;
+ }
+
+ for (i = 0; i < n_stored; i++) {
+ /* recv_get_fil_addr_struct() */
+ recv_addr = HASH_GET_FIRST(recv_sys->addr_hash,
+ hash_calc_hash(ut_fold_ulint_pair(space, page_nos[i]),
+ recv_sys->addr_hash));
+ while (recv_addr) {
+ if ((recv_addr->space == space)
+ && (recv_addr->page_no == page_nos[i])) {
+ break;
+ }
+ recv_addr = HASH_GET_NEXT(addr_hash, recv_addr);
+ }
+
+ if ((recv_addr == NULL)
+ || (recv_addr->state == RECV_BEING_PROCESSED)
+ || (recv_addr->state == RECV_PROCESSED)) {
+ continue;
+ }
+
+ recv_addr->state = RECV_PROCESSED;
+
+ ut_a(recv_sys->n_addrs);
+ recv_sys->n_addrs--;
+ }
+
+ mutex_exit(&(recv_sys->mutex));
+
+ fprintf(stderr, " (cannot find space: %lu)", space);
+ }
+not_to_recover:
+
return;
}
@@ -674,10 +720,10 @@ buf_read_recv_pages(
count++;
- if (count > 5000) {
+ if (count > 1000) {
fprintf(stderr,
"InnoDB: Error: InnoDB has waited for"
- " 50 seconds for pending\n"
+ " 10 seconds for pending\n"
"InnoDB: reads to the buffer pool to"
" be finished.\n"
"InnoDB: Number of pending reads %lu,"