summaryrefslogtreecommitdiff
path: root/storage/innobase/buf/buf0flu.c
diff options
context:
space:
mode:
authorVasil Dimov <vasil.dimov@oracle.com>2010-04-19 20:53:16 +0300
committerVasil Dimov <vasil.dimov@oracle.com>2010-04-19 20:53:16 +0300
commitc7525a01306e49897c52ea7e9844697f8f8379a1 (patch)
tree0c1af6979d3c01e38c801d71a2fc74150cc30667 /storage/innobase/buf/buf0flu.c
parent49dc3a7b325135c8cd2f93c213a48497d1b57dae (diff)
parented30b504d2fd9c14af243248f5a94ae5488ad026 (diff)
downloadmariadb-git-c7525a01306e49897c52ea7e9844697f8f8379a1.tar.gz
Merge from innodb-branches-innodb+ (2)
Diffstat (limited to 'storage/innobase/buf/buf0flu.c')
-rw-r--r--storage/innobase/buf/buf0flu.c607
1 files changed, 430 insertions, 177 deletions
diff --git a/storage/innobase/buf/buf0flu.c b/storage/innobase/buf/buf0flu.c
index 847f8dd9452..bb126a35867 100644
--- a/storage/innobase/buf/buf0flu.c
+++ b/storage/innobase/buf/buf0flu.c
@@ -83,8 +83,9 @@ Validates the flush list.
@return TRUE if ok */
static
ibool
-buf_flush_validate_low(void);
-/*========================*/
+buf_flush_validate_low(
+/*===================*/
+ buf_pool_t* buf_pool); /*!< in: Buffer pool instance */
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
/******************************************************************//**
@@ -98,11 +99,12 @@ buf_flush_insert_in_flush_rbt(
/*==========================*/
buf_page_t* bpage) /*!< in: bpage to be inserted. */
{
- buf_page_t* prev = NULL;
const ib_rbt_node_t* c_node;
const ib_rbt_node_t* p_node;
+ buf_page_t* prev = NULL;
+ buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
- ut_ad(buf_flush_list_mutex_own());
+ ut_ad(buf_flush_list_mutex_own(buf_pool));
/* Insert this buffer into the rbt. */
c_node = rbt_insert(buf_pool->flush_rbt, &bpage, &bpage);
@@ -127,10 +129,10 @@ buf_flush_delete_from_flush_rbt(
/*============================*/
buf_page_t* bpage) /*!< in: bpage to be removed. */
{
+ ibool ret = FALSE;
+ buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
- ibool ret = FALSE;
-
- ut_ad(buf_flush_list_mutex_own());
+ ut_ad(buf_flush_list_mutex_own(buf_pool));
ret = rbt_delete(buf_pool->flush_rbt, &bpage);
ut_ad(ret);
@@ -156,22 +158,21 @@ buf_flush_block_cmp(
int ret;
const buf_page_t* b1 = *(const buf_page_t**) p1;
const buf_page_t* b2 = *(const buf_page_t**) p2;
+#ifdef UNIV_DEBUG
+ buf_pool_t* buf_pool = buf_pool_from_bpage(b1);
+#endif /* UNIV_DEBUG */
ut_ad(b1 != NULL);
ut_ad(b2 != NULL);
- ut_ad(buf_flush_list_mutex_own());
+ ut_ad(buf_flush_list_mutex_own(buf_pool));
ut_ad(b1->in_flush_list);
ut_ad(b2->in_flush_list);
- if (b2->oldest_modification
- > b1->oldest_modification) {
+ if (b2->oldest_modification > b1->oldest_modification) {
return(1);
- }
-
- if (b2->oldest_modification
- < b1->oldest_modification) {
+ } else if (b2->oldest_modification < b1->oldest_modification) {
return(-1);
}
@@ -191,12 +192,21 @@ void
buf_flush_init_flush_rbt(void)
/*==========================*/
{
- buf_flush_list_mutex_enter();
+ ulint i;
+
+ for (i = 0; i < srv_buf_pool_instances; i++) {
+ buf_pool_t* buf_pool;
+
+ buf_pool = buf_pool_from_array(i);
- /* Create red black tree for speedy insertions in flush list. */
- buf_pool->flush_rbt = rbt_create(sizeof(buf_page_t*),
- buf_flush_block_cmp);
- buf_flush_list_mutex_exit();
+ buf_flush_list_mutex_enter(buf_pool);
+
+ /* Create red black tree for speedy insertions in flush list. */
+ buf_pool->flush_rbt = rbt_create(
+ sizeof(buf_page_t*), buf_flush_block_cmp);
+
+ buf_flush_list_mutex_exit(buf_pool);
+ }
}
/********************************************************************//**
@@ -206,16 +216,24 @@ void
buf_flush_free_flush_rbt(void)
/*==========================*/
{
- buf_flush_list_mutex_enter();
+ ulint i;
+
+ for (i = 0; i < srv_buf_pool_instances; i++) {
+ buf_pool_t* buf_pool;
+
+ buf_pool = buf_pool_from_array(i);
+
+ buf_flush_list_mutex_enter(buf_pool);
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
- ut_a(buf_flush_validate_low());
+ ut_a(buf_flush_validate_low(buf_pool));
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
- rbt_free(buf_pool->flush_rbt);
- buf_pool->flush_rbt = NULL;
+ rbt_free(buf_pool->flush_rbt);
+ buf_pool->flush_rbt = NULL;
- buf_flush_list_mutex_exit();
+ buf_flush_list_mutex_exit(buf_pool);
+ }
}
/********************************************************************//**
@@ -224,14 +242,15 @@ UNIV_INTERN
void
buf_flush_insert_into_flush_list(
/*=============================*/
- buf_block_t* block, /*!< in/out: block which is modified */
- ib_uint64_t lsn) /*!< in: oldest modification */
+ buf_pool_t* buf_pool, /*!< buffer pool instance */
+ buf_block_t* block, /*!< in/out: block which is modified */
+ ib_uint64_t lsn) /*!< in: oldest modification */
{
- ut_ad(!buf_pool_mutex_own());
- ut_ad(buf_flush_order_mutex_own());
+ ut_ad(!buf_pool_mutex_own(buf_pool));
+ ut_ad(log_flush_order_mutex_own());
ut_ad(mutex_own(&block->mutex));
- buf_flush_list_mutex_enter();
+ buf_flush_list_mutex_enter(buf_pool);
ut_ad((UT_LIST_GET_FIRST(buf_pool->flush_list) == NULL)
|| (UT_LIST_GET_FIRST(buf_pool->flush_list)->oldest_modification
@@ -240,8 +259,8 @@ buf_flush_insert_into_flush_list(
/* If we are in the recovery then we need to update the flush
red-black tree as well. */
if (UNIV_LIKELY_NULL(buf_pool->flush_rbt)) {
- buf_flush_list_mutex_exit();
- buf_flush_insert_sorted_into_flush_list(block, lsn);
+ buf_flush_list_mutex_exit(buf_pool);
+ buf_flush_insert_sorted_into_flush_list(buf_pool, block, lsn);
return;
}
@@ -253,10 +272,10 @@ buf_flush_insert_into_flush_list(
UT_LIST_ADD_FIRST(list, buf_pool->flush_list, &block->page);
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
- ut_a(buf_flush_validate_low());
+ ut_a(buf_flush_validate_low(buf_pool));
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
- buf_flush_list_mutex_exit();
+ buf_flush_list_mutex_exit(buf_pool);
}
/********************************************************************//**
@@ -267,18 +286,19 @@ UNIV_INTERN
void
buf_flush_insert_sorted_into_flush_list(
/*====================================*/
- buf_block_t* block, /*!< in/out: block which is modified */
- ib_uint64_t lsn) /*!< in: oldest modification */
+ buf_pool_t* buf_pool, /*!< in: buffer pool instance */
+ buf_block_t* block, /*!< in/out: block which is modified */
+ ib_uint64_t lsn) /*!< in: oldest modification */
{
buf_page_t* prev_b;
buf_page_t* b;
- ut_ad(!buf_pool_mutex_own());
- ut_ad(buf_flush_order_mutex_own());
+ ut_ad(!buf_pool_mutex_own(buf_pool));
+ ut_ad(log_flush_order_mutex_own());
ut_ad(mutex_own(&block->mutex));
ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
- buf_flush_list_mutex_enter();
+ buf_flush_list_mutex_enter(buf_pool);
/* The field in_LRU_list is protected by buf_pool_mutex, which
we are not holding. However, while a block is in the flush
@@ -332,10 +352,10 @@ buf_flush_insert_sorted_into_flush_list(
}
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
- ut_a(buf_flush_validate_low());
+ ut_a(buf_flush_validate_low(buf_pool));
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
- buf_flush_list_mutex_exit();
+ buf_flush_list_mutex_exit(buf_pool);
}
/********************************************************************//**
@@ -349,7 +369,10 @@ buf_flush_ready_for_replace(
buf_page_t* bpage) /*!< in: buffer control block, must be
buf_page_in_file(bpage) and in the LRU list */
{
- ut_ad(buf_pool_mutex_own());
+#ifdef UNIV_DEBUG
+ buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
+ ut_ad(buf_pool_mutex_own(buf_pool));
+#endif
ut_ad(mutex_own(buf_page_get_mutex(bpage)));
ut_ad(bpage->in_LRU_list);
@@ -382,8 +405,11 @@ buf_flush_ready_for_flush(
buf_page_in_file(bpage) */
enum buf_flush flush_type)/*!< in: BUF_FLUSH_LRU or BUF_FLUSH_LIST */
{
+#ifdef UNIV_DEBUG
+ buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
+ ut_ad(buf_pool_mutex_own(buf_pool));
+#endif
ut_a(buf_page_in_file(bpage));
- ut_ad(buf_pool_mutex_own());
ut_ad(mutex_own(buf_page_get_mutex(bpage)));
ut_ad(flush_type == BUF_FLUSH_LRU || BUF_FLUSH_LIST);
@@ -416,15 +442,17 @@ buf_flush_remove(
/*=============*/
buf_page_t* bpage) /*!< in: pointer to the block in question */
{
- ut_ad(buf_pool_mutex_own());
+ buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
+
+ ut_ad(buf_pool_mutex_own(buf_pool));
ut_ad(mutex_own(buf_page_get_mutex(bpage)));
ut_ad(bpage->in_flush_list);
- buf_flush_list_mutex_enter();
+ buf_flush_list_mutex_enter(buf_pool);
switch (buf_page_get_state(bpage)) {
case BUF_BLOCK_ZIP_PAGE:
- /* clean compressed pages should not be on the flush list */
+ /* Clean compressed pages should not be on the flush list */
case BUF_BLOCK_ZIP_FREE:
case BUF_BLOCK_NOT_USED:
case BUF_BLOCK_READY_FOR_USE:
@@ -442,7 +470,7 @@ buf_flush_remove(
break;
}
- /* If the flush_rbt is active then delete from it as well. */
+ /* If the flush_rbt is active then delete from there as well. */
if (UNIV_LIKELY_NULL(buf_pool->flush_rbt)) {
buf_flush_delete_from_flush_rbt(bpage);
}
@@ -454,18 +482,18 @@ buf_flush_remove(
bpage->oldest_modification = 0;
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
- ut_a(buf_flush_validate_low());
+ ut_a(buf_flush_validate_low(buf_pool));
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
- buf_flush_list_mutex_exit();
+ buf_flush_list_mutex_exit(buf_pool);
}
/*******************************************************************//**
Relocates a buffer control block on the flush_list.
-Note that it is assumed that the contents of bpage has already been
+Note that it is assumed that the contents of bpage have already been
copied to dpage.
IMPORTANT: When this function is called bpage and dpage are not
-exact copy of each other. For example, they both will have different
+exact copies of each other. For example, they both will have different
::state. Also the ::list pointers in dpage may be stale. We need to
use the current list node (bpage) to do the list manipulation because
the list pointers could have changed between the time that we copied
@@ -478,17 +506,20 @@ buf_flush_relocate_on_flush_list(
buf_page_t* bpage, /*!< in/out: control block being moved */
buf_page_t* dpage) /*!< in/out: destination block */
{
- buf_page_t* prev;
- buf_page_t* prev_b = NULL;
+ buf_page_t* prev;
+ buf_page_t* prev_b = NULL;
+ buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
- ut_ad(buf_pool_mutex_own());
+ ut_ad(buf_pool_mutex_own(buf_pool));
+ /* Must reside in the same buffer pool. */
+ ut_ad(buf_pool == buf_pool_from_bpage(dpage));
ut_ad(mutex_own(buf_page_get_mutex(bpage)));
- buf_flush_list_mutex_enter();
+ buf_flush_list_mutex_enter(buf_pool);
/* FIXME: At this point we have both buf_pool and flush_list
- mutexes. Theoratically removal of a block from flush list is
+ mutexes. Theoretically removal of a block from flush list is
only covered by flush_list mutex but currently we do
have buf_pool mutex in buf_flush_remove() therefore this block
is guaranteed to be in the flush list. We need to check if
@@ -529,10 +560,10 @@ buf_flush_relocate_on_flush_list(
ut_a(!buf_pool->flush_rbt || prev_b == prev);
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
- ut_a(buf_flush_validate_low());
+ ut_a(buf_flush_validate_low(buf_pool));
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
- buf_flush_list_mutex_exit();
+ buf_flush_list_mutex_exit(buf_pool);
}
/********************************************************************//**
@@ -544,6 +575,7 @@ buf_flush_write_complete(
buf_page_t* bpage) /*!< in: pointer to the block in question */
{
enum buf_flush flush_type;
+ buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
ut_ad(bpage);
@@ -564,8 +596,8 @@ buf_flush_write_complete(
/* fprintf(stderr, "n pending flush %lu\n",
buf_pool->n_flush[flush_type]); */
- if ((buf_pool->n_flush[flush_type] == 0)
- && (buf_pool->init_flush[flush_type] == FALSE)) {
+ if (buf_pool->n_flush[flush_type] == 0
+ && buf_pool->init_flush[flush_type] == FALSE) {
/* The running flush batch has ended */
@@ -979,6 +1011,10 @@ buf_flush_write_block_low(
/*======================*/
buf_page_t* bpage) /*!< in: buffer block to write */
{
+#ifdef UNIV_DEBUG
+ buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
+ ut_ad(!buf_pool_mutex_own(buf_pool));
+#endif
ulint zip_size = buf_page_get_zip_size(bpage);
page_t* frame = NULL;
#ifdef UNIV_LOG_DEBUG
@@ -992,8 +1028,8 @@ buf_flush_write_block_low(
io_fixed and oldest_modification != 0. Thus, it cannot be
relocated in the buffer pool or removed from flush_list or
LRU_list. */
- ut_ad(!buf_pool_mutex_own());
- ut_ad(!buf_flush_list_mutex_own());
+ ut_ad(!buf_pool_mutex_own(buf_pool));
+ ut_ad(!buf_flush_list_mutex_own(buf_pool));
ut_ad(!mutex_own(buf_page_get_mutex(bpage)));
ut_ad(buf_page_get_io_fix(bpage) == BUF_IO_WRITE);
ut_ad(bpage->oldest_modification != 0);
@@ -1062,13 +1098,14 @@ buf_flush_write_block_low(
Writes a flushable page asynchronously from the buffer pool to a file.
NOTE: in simulated aio we must call
os_aio_simulated_wake_handler_threads after we have posted a batch of
-writes! NOTE: buf_pool_mutex and buf_page_get_mutex(bpage) must be
+writes! NOTE: buf_pool->mutex and buf_page_get_mutex(bpage) must be
held upon entering this function, and they will be released by this
function. */
static
void
buf_flush_page(
/*===========*/
+ buf_pool_t* buf_pool, /*!< in: buffer pool instance */
buf_page_t* bpage, /*!< in: buffer control block */
enum buf_flush flush_type) /*!< in: BUF_FLUSH_LRU
or BUF_FLUSH_LIST */
@@ -1077,7 +1114,7 @@ buf_flush_page(
ibool is_uncompressed;
ut_ad(flush_type == BUF_FLUSH_LRU || flush_type == BUF_FLUSH_LIST);
- ut_ad(buf_pool_mutex_own());
+ ut_ad(buf_pool_mutex_own(buf_pool));
ut_ad(buf_page_in_file(bpage));
block_mutex = buf_page_get_mutex(bpage);
@@ -1097,7 +1134,7 @@ buf_flush_page(
buf_pool->n_flush[flush_type]++;
is_uncompressed = (buf_page_get_state(bpage) == BUF_BLOCK_FILE_PAGE);
- ut_ad(is_uncompressed == (block_mutex != &buf_pool_zip_mutex));
+ ut_ad(is_uncompressed == (block_mutex != &buf_pool->zip_mutex));
switch (flush_type) {
ibool is_s_latched;
@@ -1113,7 +1150,7 @@ buf_flush_page(
}
mutex_exit(block_mutex);
- buf_pool_mutex_exit();
+ buf_pool_mutex_exit(buf_pool);
/* Even though bpage is not protected by any mutex at
this point, it is safe to access bpage, because it is
@@ -1150,7 +1187,7 @@ buf_flush_page(
immediately. */
mutex_exit(block_mutex);
- buf_pool_mutex_exit();
+ buf_pool_mutex_exit(buf_pool);
break;
default:
@@ -1184,13 +1221,13 @@ buf_flush_try_neighbors(
enum buf_flush flush_type) /*!< in: BUF_FLUSH_LRU or
BUF_FLUSH_LIST */
{
- buf_page_t* bpage;
- ulint low, high;
- ulint count = 0;
ulint i;
+ ulint low;
+ ulint high;
+ ulint count = 0;
+ buf_pool_t* buf_pool = buf_pool_get(space, offset);
- ut_ad(flush_type == BUF_FLUSH_LRU
- || flush_type == BUF_FLUSH_LIST);
+ ut_ad(flush_type == BUF_FLUSH_LRU || flush_type == BUF_FLUSH_LIST);
if (UT_LIST_GET_LEN(buf_pool->LRU) < BUF_LRU_OLD_MIN_LEN) {
/* If there is little space, it is better not to flush
@@ -1203,8 +1240,11 @@ buf_flush_try_neighbors(
neighborhoods of this size, and flushed along with the
original page. */
- ulint buf_flush_area = ut_min(BUF_READ_AHEAD_AREA,
- buf_pool->curr_size / 16);
+ ulint buf_flush_area;
+
+ buf_flush_area = ut_min(
+ BUF_READ_AHEAD_AREA(buf_pool),
+ buf_pool->curr_size / 16);
low = (offset / buf_flush_area) * buf_flush_area;
high = (offset / buf_flush_area + 1) * buf_flush_area;
@@ -1216,14 +1256,20 @@ buf_flush_try_neighbors(
high = fil_space_get_size(space);
}
- buf_pool_mutex_enter();
-
for (i = low; i < high; i++) {
- bpage = buf_page_hash_get(space, i);
+ buf_page_t* bpage;
+
+ buf_pool = buf_pool_get(space, i);
+
+ buf_pool_mutex_enter(buf_pool);
+
+ /* We only want to flush pages from this buffer pool. */
+ bpage = buf_page_hash_get(buf_pool, space, i);
if (!bpage) {
+ buf_pool_mutex_exit(buf_pool);
continue;
}
@@ -1250,19 +1296,18 @@ buf_flush_try_neighbors(
doublewrite buffer before we start
waiting. */
- buf_flush_page(bpage, flush_type);
+ buf_flush_page(buf_pool, bpage, flush_type);
ut_ad(!mutex_own(block_mutex));
+ ut_ad(!buf_pool_mutex_own(buf_pool));
count++;
-
- buf_pool_mutex_enter();
+ continue;
} else {
mutex_exit(block_mutex);
}
}
+ buf_pool_mutex_exit(buf_pool);
}
- buf_pool_mutex_exit();
-
return(count);
}
@@ -1285,10 +1330,13 @@ buf_flush_page_and_try_neighbors(
ulint* count) /*!< in/out: number of pages
flushed */
{
- ibool flushed = FALSE;
mutex_t* block_mutex;
+ ibool flushed = FALSE;
+#ifdef UNIV_DEBUG
+ buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
+#endif /* UNIV_DEBUG */
- ut_ad(buf_pool_mutex_own());
+ ut_ad(buf_pool_mutex_own(buf_pool));
block_mutex = buf_page_get_mutex(bpage);
mutex_enter(block_mutex);
@@ -1296,10 +1344,13 @@ buf_flush_page_and_try_neighbors(
ut_a(buf_page_in_file(bpage));
if (buf_flush_ready_for_flush(bpage, flush_type)) {
- ulint space;
- ulint offset;
+ ulint space;
+ ulint offset;
+ buf_pool_t* buf_pool;
+
+ buf_pool = buf_pool_from_bpage(bpage);
- buf_pool_mutex_exit();
+ buf_pool_mutex_exit(buf_pool);
/* These fields are protected by both the
buffer pool mutex and block mutex. */
@@ -1309,16 +1360,15 @@ buf_flush_page_and_try_neighbors(
mutex_exit(block_mutex);
/* Try to flush also all the neighbors */
- *count += buf_flush_try_neighbors(space, offset,
- flush_type);
+ *count += buf_flush_try_neighbors(space, offset, flush_type);
- buf_pool_mutex_enter();
+ buf_pool_mutex_enter(buf_pool);
flushed = TRUE;
} else {
mutex_exit(block_mutex);
}
- ut_ad(buf_pool_mutex_own());
+ ut_ad(buf_pool_mutex_own(buf_pool));
return(flushed);
}
@@ -1333,12 +1383,13 @@ static
ulint
buf_flush_LRU_list_batch(
/*=====================*/
- ulint max) /*!< in: max of blocks to flush */
+ buf_pool_t* buf_pool, /*!< in: buffer pool instance */
+ ulint max) /*!< in: max of blocks to flush */
{
buf_page_t* bpage;
ulint count = 0;
- ut_ad(buf_pool_mutex_own());
+ ut_ad(buf_pool_mutex_own(buf_pool));
do {
/* Start from the end of the list looking for a
@@ -1360,7 +1411,7 @@ buf_flush_LRU_list_batch(
should be flushed, we factor in this value. */
buf_lru_flush_page_count += count;
- ut_ad(buf_pool_mutex_own());
+ ut_ad(buf_pool_mutex_own(buf_pool));
return(count);
}
@@ -1375,6 +1426,7 @@ static
ulint
buf_flush_flush_list_batch(
/*=======================*/
+ buf_pool_t* buf_pool, /*!< in: buffer pool instance */
ulint min_n, /*!< in: wished minimum mumber
of blocks flushed (it is not
guaranteed that the actual
@@ -1389,16 +1441,16 @@ buf_flush_flush_list_batch(
buf_page_t* bpage;
ulint count = 0;
- ut_ad(buf_pool_mutex_own());
+ ut_ad(buf_pool_mutex_own(buf_pool));
/* If we have flushed enough, leave the loop */
do {
/* Start from the end of the list looking for a suitable
block to be flushed. */
- buf_flush_list_mutex_enter();
+ buf_flush_list_mutex_enter(buf_pool);
- /* We use len here because theoratically insertions can
+ /* We use len here because theoretically insertions can
happen in the flush_list below while we are traversing
it for a suitable candidate for flushing. We'd like to
set a limit on how farther we are willing to traverse
@@ -1410,11 +1462,10 @@ buf_flush_flush_list_batch(
ut_a(bpage->oldest_modification > 0);
}
-
if (!bpage || bpage->oldest_modification >= lsn_limit) {
/* We have flushed enough */
- buf_flush_list_mutex_exit();
+ buf_flush_list_mutex_exit(buf_pool);
break;
}
@@ -1422,7 +1473,7 @@ buf_flush_flush_list_batch(
ut_ad(bpage->in_flush_list);
- buf_flush_list_mutex_exit();
+ buf_flush_list_mutex_exit(buf_pool);
/* The list may change during the flushing and we cannot
safely preserve within this function a pointer to a
@@ -1432,12 +1483,11 @@ buf_flush_flush_list_batch(
&& !buf_flush_page_and_try_neighbors(
bpage, BUF_FLUSH_LIST, &count)) {
- buf_flush_list_mutex_enter();
+ buf_flush_list_mutex_enter(buf_pool);
- /* If we are here that means that buf_pool
- mutex was not released in
- buf_flush_page_and_try_neighbors() above and
- this guarantees that bpage didn't get
+ /* If we are here that means that buf_pool->mutex
+ was not released in buf_flush_page_and_try_neighbors()
+ above and this guarantees that bpage didn't get
relocated since we released the flush_list
mutex above. There is a chance, however, that
the bpage got removed from flush_list (not
@@ -1447,21 +1497,22 @@ buf_flush_flush_list_batch(
the oldest_modification and if it is zero
we start all over again. */
if (bpage->oldest_modification == 0) {
- buf_flush_list_mutex_exit();
+ buf_flush_list_mutex_exit(buf_pool);
break;
}
+
bpage = UT_LIST_GET_PREV(list, bpage);
ut_ad(!bpage || bpage->in_flush_list);
- buf_flush_list_mutex_exit();
+ buf_flush_list_mutex_exit(buf_pool);
--len;
}
} while (count < min_n && bpage != NULL && len > 0);
- ut_ad(buf_pool_mutex_own());
+ ut_ad(buf_pool_mutex_own(buf_pool));
return(count);
}
@@ -1474,10 +1525,11 @@ end up waiting for these latches! NOTE 2: in the case of a flush list flush,
the calling thread is not allowed to own any latches on pages!
@return number of blocks for which the write request was queued;
ULINT_UNDEFINED if there was a flush of the same type already running */
-UNIV_INTERN
+static
ulint
buf_flush_batch(
/*============*/
+ buf_pool_t* buf_pool, /*!< in: buffer pool instance */
enum buf_flush flush_type, /*!< in: BUF_FLUSH_LRU or
BUF_FLUSH_LIST; if BUF_FLUSH_LIST,
then the caller must not own any
@@ -1485,59 +1537,36 @@ buf_flush_batch(
ulint min_n, /*!< in: wished minimum mumber of blocks
flushed (it is not guaranteed that the
actual number is that big, though) */
- ib_uint64_t lsn_limit) /*!< in the case BUF_FLUSH_LIST all
- blocks whose oldest_modification is
+ ib_uint64_t lsn_limit) /*!< in: in the case of BUF_FLUSH_LIST
+ all blocks whose oldest_modification is
smaller than this should be flushed
(if their number does not exceed
min_n), otherwise ignored */
{
ulint count = 0;
- ut_ad(flush_type == BUF_FLUSH_LRU
- || flush_type == BUF_FLUSH_LIST);
+ ut_ad(flush_type == BUF_FLUSH_LRU || flush_type == BUF_FLUSH_LIST);
#ifdef UNIV_SYNC_DEBUG
ut_ad((flush_type != BUF_FLUSH_LIST)
|| sync_thread_levels_empty_gen(TRUE));
#endif /* UNIV_SYNC_DEBUG */
- buf_pool_mutex_enter();
- if (buf_pool->n_flush[flush_type] > 0
- || buf_pool->init_flush[flush_type] == TRUE) {
-
- /* There is already a flush batch of the same type running */
-
- buf_pool_mutex_exit();
-
- return(ULINT_UNDEFINED);
- }
-
- buf_pool->init_flush[flush_type] = TRUE;
+ buf_pool_mutex_enter(buf_pool);
/* Note: The buffer pool mutex is released and reacquired within
the flush functions. */
switch(flush_type) {
case BUF_FLUSH_LRU:
- count = buf_flush_LRU_list_batch(min_n);
+ count = buf_flush_LRU_list_batch(buf_pool, min_n);
break;
case BUF_FLUSH_LIST:
- count = buf_flush_flush_list_batch(min_n, lsn_limit);
+ count = buf_flush_flush_list_batch(buf_pool, min_n, lsn_limit);
break;
default:
ut_error;
}
- ut_ad(buf_pool_mutex_own());
-
- buf_pool->init_flush[flush_type] = FALSE;
-
- if (buf_pool->n_flush[flush_type] == 0) {
-
- /* The running flush batch has ended */
-
- os_event_set(buf_pool->no_flush[flush_type]);
- }
-
- buf_pool_mutex_exit();
+ buf_pool_mutex_exit(buf_pool);
buf_flush_buffered_writes();
@@ -1556,18 +1585,207 @@ buf_flush_batch(
}
/******************************************************************//**
+Gather the aggregated stats for both flush list and LRU list flushing */
+static
+void
+buf_flush_common(
+/*=============*/
+ enum buf_flush flush_type, /*!< in: type of flush */
+ ulint page_count) /*!< in: number of pages flushed */
+{
+ buf_flush_buffered_writes();
+
+ ut_a(flush_type == BUF_FLUSH_LRU || flush_type == BUF_FLUSH_LIST);
+
+#ifdef UNIV_DEBUG
+ if (buf_debug_prints && page_count > 0) {
+ fprintf(stderr, flush_type == BUF_FLUSH_LRU
+ ? "Flushed %lu pages in LRU flush\n"
+ : "Flushed %lu pages in flush list flush\n",
+ (ulong) page_count);
+ }
+#endif /* UNIV_DEBUG */
+
+ srv_buf_pool_flushed += page_count;
+
+ if (flush_type == BUF_FLUSH_LRU) {
+ /* We keep track of all flushes happening as part of LRU
+ flush. When estimating the desired rate at which flush_list
+ should be flushed we factor in this value. */
+ buf_lru_flush_page_count += page_count;
+ }
+}
+
+/******************************************************************//**
+Start a buffer flush batch for LRU or flush list */
+static
+ibool
+buf_flush_start(
+/*============*/
+ buf_pool_t* buf_pool, /*!< buffer pool instance */
+ enum buf_flush flush_type) /*!< in: BUF_FLUSH_LRU
+ or BUF_FLUSH_LIST */
+{
+ buf_pool_mutex_enter(buf_pool);
+
+ if (buf_pool->n_flush[flush_type] > 0
+ || buf_pool->init_flush[flush_type] == TRUE) {
+
+ /* There is already a flush batch of the same type running */
+
+ buf_pool_mutex_exit(buf_pool);
+
+ return(FALSE);
+ }
+
+ buf_pool->init_flush[flush_type] = TRUE;
+
+ buf_pool_mutex_exit(buf_pool);
+
+ return(TRUE);
+}
+
+/******************************************************************//**
+End a buffer flush batch for LRU or flush list */
+static
+void
+buf_flush_end(
+/*==========*/
+ buf_pool_t* buf_pool, /*!< buffer pool instance */
+ enum buf_flush flush_type) /*!< in: BUF_FLUSH_LRU
+ or BUF_FLUSH_LIST */
+{
+ buf_pool_mutex_enter(buf_pool);
+
+ buf_pool->init_flush[flush_type] = FALSE;
+
+ if (buf_pool->n_flush[flush_type] == 0) {
+
+ /* The running flush batch has ended */
+
+ os_event_set(buf_pool->no_flush[flush_type]);
+ }
+
+ buf_pool_mutex_exit(buf_pool);
+}
+
+/******************************************************************//**
Waits until a flush batch of the given type ends */
UNIV_INTERN
void
buf_flush_wait_batch_end(
/*=====================*/
- enum buf_flush type) /*!< in: BUF_FLUSH_LRU or BUF_FLUSH_LIST */
+ buf_pool_t* buf_pool, /*!< buffer pool instance */
+ enum buf_flush type) /*!< in: BUF_FLUSH_LRU
+ or BUF_FLUSH_LIST */
{
- ut_ad((type == BUF_FLUSH_LRU) || (type == BUF_FLUSH_LIST));
+ ut_ad(type == BUF_FLUSH_LRU || type == BUF_FLUSH_LIST);
+
+ if (buf_pool == NULL) {
+ ulint i;
+
+ for (i = 0; i < srv_buf_pool_instances; ++i) {
+ buf_pool_t* buf_pool;
+
+ buf_pool = buf_pool_from_array(i);
- os_event_wait(buf_pool->no_flush[type]);
+ os_event_wait(buf_pool->no_flush[type]);
+ }
+ } else {
+ os_event_wait(buf_pool->no_flush[type]);
+ }
}
+/*******************************************************************//**
+This utility flushes dirty blocks from the end of the LRU list.
+NOTE: The calling thread may own latches to pages: to avoid deadlocks,
+this function must be written so that it cannot end up waiting for these
+latches!
+@return number of blocks for which the write request was queued;
+ULINT_UNDEFINED if there was a flush of the same type already running */
+UNIV_INTERN
+ulint
+buf_flush_LRU(
+/*==========*/
+ buf_pool_t* buf_pool, /*!< in: buffer pool instance */
+ ulint min_n) /*!< in: wished minimum mumber of blocks
+ flushed (it is not guaranteed that the
+ actual number is that big, though) */
+{
+ ulint page_count;
+
+ if (!buf_flush_start(buf_pool, BUF_FLUSH_LRU)) {
+ return(ULINT_UNDEFINED);
+ }
+
+ page_count = buf_flush_batch(buf_pool, BUF_FLUSH_LRU, min_n, 0);
+
+ buf_flush_end(buf_pool, BUF_FLUSH_LRU);
+
+ buf_flush_common(BUF_FLUSH_LRU, page_count);
+
+ return(page_count);
+}
+
+/*******************************************************************//**
+This utility flushes dirty blocks from the end of the flush list of
+all buffer pool instances.
+NOTE: The calling thread is not allowed to own any latches on pages!
+@return number of blocks for which the write request was queued;
+ULINT_UNDEFINED if there was a flush of the same type already running */
+UNIV_INTERN
+ulint
+buf_flush_list(
+/*===========*/
+ ulint min_n, /*!< in: wished minimum mumber of blocks
+ flushed (it is not guaranteed that the
+ actual number is that big, though) */
+ ib_uint64_t lsn_limit) /*!< in the case BUF_FLUSH_LIST all
+ blocks whose oldest_modification is
+ smaller than this should be flushed
+ (if their number does not exceed
+ min_n), otherwise ignored */
+{
+ ulint i;
+ ulint total_page_count = 0;
+
+ if (min_n != ULINT_MAX) {
+ /* Ensure that flushing is spread evenly amongst the
+ buffer pool instances. When min_n is ULINT_MAX
+ we need to flush everything up to the lsn limit
+ so no limit here. */
+ min_n = (min_n + srv_buf_pool_instances - 1)
+ / srv_buf_pool_instances;
+ }
+
+ /* We use buffer pool instance 0 to control start and end of
+ flushing of the flush list since we always flush all instances
+ at once in this case. */
+
+ /* Flush to lsn_limit in all buffer pool instances */
+ for (i = 0; i < srv_buf_pool_instances; i++) {
+ buf_pool_t* buf_pool;
+ ulint page_count = 0;
+
+ buf_pool = buf_pool_from_array(i);
+
+ if (!buf_flush_start(buf_pool, BUF_FLUSH_LIST)) {
+ continue;
+ }
+
+ page_count = buf_flush_batch(
+ buf_pool, BUF_FLUSH_LIST, min_n, lsn_limit);
+
+ buf_flush_end(buf_pool, BUF_FLUSH_LIST);
+
+ buf_flush_common(BUF_FLUSH_LIST, page_count);
+
+ total_page_count += page_count;
+ }
+
+ return(total_page_count);
+}
+
/******************************************************************//**
Gives a recommendation of how many blocks should be flushed to establish
a big enough margin of replaceable blocks near the end of the LRU list
@@ -1576,23 +1794,24 @@ and in the free list.
LRU list */
static
ulint
-buf_flush_LRU_recommendation(void)
-/*==============================*/
+buf_flush_LRU_recommendation(
+/*=========================*/
+ buf_pool_t* buf_pool) /*!< in: Buffer pool instance */
{
buf_page_t* bpage;
ulint n_replaceable;
ulint distance = 0;
- buf_pool_mutex_enter();
+ buf_pool_mutex_enter(buf_pool);
n_replaceable = UT_LIST_GET_LEN(buf_pool->free);
bpage = UT_LIST_GET_LAST(buf_pool->LRU);
while ((bpage != NULL)
- && (n_replaceable < BUF_FLUSH_FREE_BLOCK_MARGIN
- + BUF_FLUSH_EXTRA_MARGIN)
- && (distance < BUF_LRU_FREE_SEARCH_LEN)) {
+ && (n_replaceable < BUF_FLUSH_FREE_BLOCK_MARGIN(buf_pool)
+ + BUF_FLUSH_EXTRA_MARGIN(buf_pool))
+ && (distance < BUF_LRU_FREE_SEARCH_LEN(buf_pool))) {
mutex_t* block_mutex = buf_page_get_mutex(bpage);
@@ -1609,14 +1828,15 @@ buf_flush_LRU_recommendation(void)
bpage = UT_LIST_GET_PREV(LRU, bpage);
}
- buf_pool_mutex_exit();
+ buf_pool_mutex_exit(buf_pool);
- if (n_replaceable >= BUF_FLUSH_FREE_BLOCK_MARGIN) {
+ if (n_replaceable >= BUF_FLUSH_FREE_BLOCK_MARGIN(buf_pool)) {
return(0);
}
- return(BUF_FLUSH_FREE_BLOCK_MARGIN + BUF_FLUSH_EXTRA_MARGIN
+ return(BUF_FLUSH_FREE_BLOCK_MARGIN(buf_pool)
+ + BUF_FLUSH_EXTRA_MARGIN(buf_pool)
- n_replaceable);
}
@@ -1628,25 +1848,46 @@ flush only pages such that the s-lock required for flushing can be acquired
immediately, without waiting. */
UNIV_INTERN
void
-buf_flush_free_margin(void)
-/*=======================*/
+buf_flush_free_margin(
+/*==================*/
+ buf_pool_t* buf_pool) /*!< in: Buffer pool instance */
{
ulint n_to_flush;
- ulint n_flushed;
- n_to_flush = buf_flush_LRU_recommendation();
+ n_to_flush = buf_flush_LRU_recommendation(buf_pool);
if (n_to_flush > 0) {
- n_flushed = buf_flush_batch(BUF_FLUSH_LRU, n_to_flush, 0);
+ ulint n_flushed;
+
+ n_flushed = buf_flush_LRU(buf_pool, n_to_flush);
+
if (n_flushed == ULINT_UNDEFINED) {
/* There was an LRU type flush batch already running;
let us wait for it to end */
- buf_flush_wait_batch_end(BUF_FLUSH_LRU);
+ buf_flush_wait_batch_end(buf_pool, BUF_FLUSH_LRU);
}
}
}
+/*********************************************************************//**
+Flushes pages from the end of all the LRU lists. */
+UNIV_INTERN
+void
+buf_flush_free_margins(void)
+/*========================*/
+{
+ ulint i;
+
+ for (i = 0; i < srv_buf_pool_instances; i++) {
+ buf_pool_t* buf_pool;
+
+ buf_pool = buf_pool_from_array(i);
+
+ buf_flush_free_margin(buf_pool);
+ }
+}
+
/*********************************************************************
Update the historical stats that we are collecting for flush rate
heuristics at the end of each interval.
@@ -1707,22 +1948,28 @@ ulint
buf_flush_get_desired_flush_rate(void)
/*==================================*/
{
- ulint redo_avg;
- ulint lru_flush_avg;
- ulint n_dirty;
- ulint n_flush_req;
- lint rate;
- ib_uint64_t lsn = log_get_lsn();
- ulint log_capacity = log_get_capacity();
+ ulint i;
+ lint rate;
+ ulint redo_avg;
+ ulint n_dirty = 0;
+ ulint n_flush_req;
+ ulint lru_flush_avg;
+ ib_uint64_t lsn = log_get_lsn();
+ ulint log_capacity = log_get_capacity();
/* log_capacity should never be zero after the initialization
of log subsystem. */
ut_ad(log_capacity != 0);
/* Get total number of dirty pages. It is OK to access
- flush_list without holding any mtex as we are using this
+ flush_list without holding any mutex as we are using this
only for heuristics. */
- n_dirty = UT_LIST_GET_LEN(buf_pool->flush_list);
+ for (i = 0; i < srv_buf_pool_instances; i++) {
+ buf_pool_t* buf_pool;
+
+ buf_pool = buf_pool_from_array(i);
+ n_dirty += UT_LIST_GET_LEN(buf_pool->flush_list);
+ }
/* An overflow can happen if we generate more than 2^32 bytes
of redo in this interval i.e.: 4G of redo in 1 second. We can
@@ -1764,13 +2011,14 @@ Validates the flush list.
@return TRUE if ok */
static
ibool
-buf_flush_validate_low(void)
-/*========================*/
+buf_flush_validate_low(
+/*===================*/
+ buf_pool_t* buf_pool) /*!< in: Buffer pool instance */
{
buf_page_t* bpage;
const ib_rbt_node_t* rnode = NULL;
- ut_ad(buf_flush_list_mutex_own());
+ ut_ad(buf_flush_list_mutex_own(buf_pool));
UT_LIST_VALIDATE(list, buf_page_t, buf_pool->flush_list,
ut_ad(ut_list_node_313->in_flush_list));
@@ -1786,6 +2034,9 @@ buf_flush_validate_low(void)
while (bpage != NULL) {
const ib_uint64_t om = bpage->oldest_modification;
+
+ ut_ad(buf_pool_from_bpage(bpage) == buf_pool);
+
ut_ad(bpage->in_flush_list);
/* A page in flush_list can be in BUF_BLOCK_REMOVE_HASH
@@ -1795,14 +2046,15 @@ buf_flush_validate_low(void)
waiting to acquire the flush_list_mutex to complete
the relocation. */
ut_a(buf_page_in_file(bpage)
- || buf_page_get_state(bpage)
- == BUF_BLOCK_REMOVE_HASH);
+ || buf_page_get_state(bpage) == BUF_BLOCK_REMOVE_HASH);
ut_a(om > 0);
if (UNIV_LIKELY_NULL(buf_pool->flush_rbt)) {
+ buf_page_t* rpage;
+
ut_a(rnode);
- buf_page_t* rpage = *rbt_value(buf_page_t*,
- rnode);
+ rpage = *rbt_value(buf_page_t*, rnode);
+
ut_a(rpage);
ut_a(rpage == bpage);
rnode = rbt_next(buf_pool->flush_rbt, rnode);
@@ -1825,16 +2077,17 @@ Validates the flush list.
@return TRUE if ok */
UNIV_INTERN
ibool
-buf_flush_validate(void)
-/*====================*/
+buf_flush_validate(
+/*===============*/
+ buf_pool_t* buf_pool) /*!< buffer pool instance */
{
ibool ret;
- buf_flush_list_mutex_enter();
+ buf_flush_list_mutex_enter(buf_pool);
- ret = buf_flush_validate_low();
+ ret = buf_flush_validate_low(buf_pool);
- buf_flush_list_mutex_exit();
+ buf_flush_list_mutex_exit(buf_pool);
return(ret);
}