summaryrefslogtreecommitdiff
path: root/storage/innobase/buf/buf0flu.cc
diff options
context:
space:
mode:
Diffstat (limited to 'storage/innobase/buf/buf0flu.cc')
-rw-r--r--storage/innobase/buf/buf0flu.cc1576
1 files changed, 643 insertions, 933 deletions
diff --git a/storage/innobase/buf/buf0flu.cc b/storage/innobase/buf/buf0flu.cc
index 6f2e685a441..04340ed79f8 100644
--- a/storage/innobase/buf/buf0flu.cc
+++ b/storage/innobase/buf/buf0flu.cc
@@ -209,7 +209,7 @@ static inline void incr_flush_list_size_in_bytes(const buf_block_t* block)
ut_ad(buf_pool.stat.flush_list_bytes <= buf_pool.curr_pool_size);
}
-#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
+#ifdef UNIV_DEBUG
/** Validate the flush list. */
static void buf_flush_validate_low();
@@ -234,7 +234,7 @@ static void buf_flush_validate_skip()
buf_flush_validate_count = BUF_FLUSH_VALIDATE_SKIP;
buf_flush_validate_low();
}
-#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
+#endif /* UNIV_DEBUG */
/******************************************************************//**
Insert a block in the flush_rbt and returns a pointer to its
@@ -306,29 +306,33 @@ buf_flush_block_cmp(
const void* p1, /*!< in: block1 */
const void* p2) /*!< in: block2 */
{
- int ret;
- const buf_page_t* b1 = *(const buf_page_t**) p1;
- const buf_page_t* b2 = *(const buf_page_t**) p2;
+ const buf_page_t* b1 = *static_cast<const buf_page_t*const*>(p1);
+ const buf_page_t* b2 = *static_cast<const buf_page_t*const*>(p2);
ut_ad(b1 != NULL);
ut_ad(b2 != NULL);
ut_ad(mutex_own(&buf_pool.flush_list_mutex));
- ut_ad(b1->in_flush_list);
- ut_ad(b2->in_flush_list);
+ const lsn_t m1 = b1->oldest_modification(),
+ m2 = b2->oldest_modification();
- if (b2->oldest_modification > b1->oldest_modification) {
+ ut_ad(m1);
+ ut_ad(m2);
+
+ if (m2 > m1) {
return(1);
- } else if (b2->oldest_modification < b1->oldest_modification) {
+ } else if (m2 < m1) {
return(-1);
}
- /* If oldest_modification is same then decide on the space. */
- ret = (int)(b2->id.space() - b1->id.space());
-
- /* Or else decide ordering on the page number. */
- return(ret ? ret : (int) (b2->id.page_no() - b1->id.page_no()));
+ if (b2->id() > b1->id()) {
+ return 1;
+ }
+ if (b2->id() < b1->id()) {
+ return -1;
+ }
+ return 0;
}
/********************************************************************//**
@@ -354,9 +358,7 @@ buf_flush_free_flush_rbt(void)
/*==========================*/
{
mutex_enter(&buf_pool.flush_list_mutex);
-#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
- buf_flush_validate_low();
-#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
+ ut_d(buf_flush_validate_low());
rbt_free(buf_pool.flush_rbt);
buf_pool.flush_rbt = NULL;
mutex_exit(&buf_pool.flush_list_mutex);
@@ -369,14 +371,10 @@ void buf_flush_insert_into_flush_list(buf_block_t* block, lsn_t lsn)
{
ut_ad(!mutex_own(&buf_pool.mutex));
ut_ad(log_flush_order_mutex_own());
- ut_ad(buf_page_mutex_own(block));
ut_ad(lsn);
mutex_enter(&buf_pool.flush_list_mutex);
- ut_ad(!block->page.in_flush_list);
- ut_d(block->page.in_flush_list = TRUE);
- ut_ad(!block->page.oldest_modification);
- block->page.oldest_modification = lsn;
+ block->page.set_oldest_modification(lsn);
UNIV_MEM_ASSERT_RW(block->page.zip.data
? block->page.zip.data : block->frame,
block->physical_size());
@@ -391,11 +389,10 @@ void buf_flush_insert_into_flush_list(buf_block_t* block, lsn_t lsn)
page frame of a compressed block may be discarded or created
(copying the block->page to or from a buf_page_t that is
dynamically allocated from buf_buddy_alloc()). Because those
- transitions hold block->mutex and the flush list mutex (via
+ transitions hold buf_pool.flush_list_mutex (via
buf_flush_relocate_on_flush_list()), there is no possibility
of a race condition in the assertions below. */
ut_ad(block->page.in_LRU_list);
- ut_ad(block->page.in_page_hash);
/* buf_buddy_block_register() will take a block in the
BUF_BLOCK_MEMORY state, not a file page. */
ut_ad(!block->page.in_zip_hash);
@@ -409,76 +406,10 @@ void buf_flush_insert_into_flush_list(buf_block_t* block, lsn_t lsn)
UT_LIST_ADD_FIRST(buf_pool.flush_list, &block->page);
func_exit:
-#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
- buf_flush_validate_skip();
-#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
-
+ ut_d(buf_flush_validate_skip());
mutex_exit(&buf_pool.flush_list_mutex);
}
-/********************************************************************//**
-Returns TRUE if the file page block is immediately suitable for replacement,
-i.e., the transition FILE_PAGE => NOT_USED allowed.
-@return TRUE if can replace immediately */
-ibool
-buf_flush_ready_for_replace(
-/*========================*/
- buf_page_t* bpage) /*!< in: buffer control block, must be
- buf_page_in_file(bpage) and in the LRU list */
-{
- ut_ad(mutex_own(&buf_pool.mutex));
- ut_ad(mutex_own(buf_page_get_mutex(bpage)));
- ut_ad(bpage->in_LRU_list);
-
- if (buf_page_in_file(bpage)) {
-
- return(bpage->oldest_modification == 0
- && bpage->buf_fix_count == 0
- && buf_page_get_io_fix(bpage) == BUF_IO_NONE);
- }
-
- ib::fatal() << "Buffer block " << bpage << " state " << bpage->state
- << " in the LRU list!";
-
- return(FALSE);
-}
-
-/********************************************************************//**
-Returns true if the block is modified and ready for flushing.
-@return true if can flush immediately */
-bool
-buf_flush_ready_for_flush(
-/*======================*/
- buf_page_t* bpage, /*!< in: buffer control block, must be
- buf_page_in_file(bpage) */
- buf_flush_t flush_type)/*!< in: type of flush */
-{
- ut_ad(mutex_own(&buf_pool.mutex));
- ut_a(buf_page_in_file(bpage));
- ut_ad(mutex_own(buf_page_get_mutex(bpage)));
- ut_ad(flush_type < BUF_FLUSH_N_TYPES);
-
- if (bpage->oldest_modification == 0
- || buf_page_get_io_fix(bpage) != BUF_IO_NONE) {
- return(false);
- }
-
- ut_ad(bpage->in_flush_list);
-
- switch (flush_type) {
- case BUF_FLUSH_LIST:
- case BUF_FLUSH_LRU:
- case BUF_FLUSH_SINGLE_PAGE:
- return(true);
-
- case BUF_FLUSH_N_TYPES:
- break;
- }
-
- ut_error;
- return(false);
-}
-
/** Remove a block from the flush list of modified blocks.
@param[in] bpage block to be removed from the flush list */
void buf_flush_remove(buf_page_t* bpage)
@@ -493,36 +424,12 @@ void buf_flush_remove(buf_page_t* bpage)
}
#endif
ut_ad(mutex_own(&buf_pool.mutex));
- ut_ad(mutex_own(buf_page_get_mutex(bpage)));
- ut_ad(bpage->in_flush_list);
-
mutex_enter(&buf_pool.flush_list_mutex);
/* Important that we adjust the hazard pointer before removing
the bpage from flush list. */
buf_pool.flush_hp.adjust(bpage);
-
- switch (buf_page_get_state(bpage)) {
- case BUF_BLOCK_POOL_WATCH:
- case BUF_BLOCK_ZIP_PAGE:
- /* Clean compressed pages should not be on the flush list */
- case BUF_BLOCK_NOT_USED:
- case BUF_BLOCK_READY_FOR_USE:
- case BUF_BLOCK_MEMORY:
- case BUF_BLOCK_REMOVE_HASH:
- ut_error;
- return;
- case BUF_BLOCK_ZIP_DIRTY:
- buf_page_set_state(bpage, BUF_BLOCK_ZIP_PAGE);
- UT_LIST_REMOVE(buf_pool.flush_list, bpage);
-#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
- buf_LRU_insert_zip_clean(bpage);
-#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
- break;
- case BUF_BLOCK_FILE_PAGE:
- UT_LIST_REMOVE(buf_pool.flush_list, bpage);
- break;
- }
+ UT_LIST_REMOVE(buf_pool.flush_list, bpage);
/* If the flush_rbt is active then delete from there as well. */
if (UNIV_LIKELY_NULL(buf_pool.flush_rbt)) {
@@ -530,16 +437,20 @@ void buf_flush_remove(buf_page_t* bpage)
}
/* Must be done after we have removed it from the flush_rbt
- because we assert on in_flush_list in comparison function. */
- ut_d(bpage->in_flush_list = FALSE);
+ because we assert on it in buf_flush_block_cmp(). */
+ bpage->clear_oldest_modification();
- buf_pool.stat.flush_list_bytes -= bpage->physical_size();
+#ifdef UNIV_DEBUG
+ if (bpage->state() == BUF_BLOCK_ZIP_PAGE) {
+ buf_LRU_insert_zip_clean(bpage);
+ }
+#endif /* UNIV_DEBUG */
- bpage->oldest_modification = 0;
+ buf_pool.stat.flush_list_bytes -= bpage->physical_size();
-#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
+#ifdef UNIV_DEBUG
buf_flush_validate_skip();
-#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
+#endif /* UNIV_DEBUG */
mutex_exit(&buf_pool.flush_list_mutex);
}
@@ -565,8 +476,6 @@ buf_flush_relocate_on_flush_list(
buf_page_t* prev_b = NULL;
ut_ad(mutex_own(&buf_pool.mutex));
- ut_ad(mutex_own(buf_page_get_mutex(bpage)));
-
mutex_enter(&buf_pool.flush_list_mutex);
/* FIXME: At this point we have both buf_pool and flush_list
@@ -576,8 +485,7 @@ buf_flush_relocate_on_flush_list(
is guaranteed to be in the flush list. We need to check if
this will work without the assumption of block removing code
having the buf_pool mutex. */
- ut_ad(bpage->in_flush_list);
- ut_ad(dpage->in_flush_list);
+ ut_ad(dpage->oldest_modification());
/* If recovery is active we must swap the control blocks in
the flush_rbt as well. */
@@ -591,14 +499,14 @@ buf_flush_relocate_on_flush_list(
buf_pool.flush_hp.adjust(bpage);
/* Must be done after we have removed it from the flush_rbt
- because we assert on in_flush_list in comparison function. */
- ut_d(bpage->in_flush_list = FALSE);
+ because we assert on it in buf_flush_block_cmp(). */
+ bpage->clear_oldest_modification();
prev = UT_LIST_GET_PREV(list, bpage);
UT_LIST_REMOVE(buf_pool.flush_list, bpage);
if (prev) {
- ut_ad(prev->in_flush_list);
+ ut_ad(prev->oldest_modification());
UT_LIST_INSERT_AFTER( buf_pool.flush_list, prev, dpage);
} else {
UT_LIST_ADD_FIRST(buf_pool.flush_list, dpage);
@@ -606,41 +514,81 @@ buf_flush_relocate_on_flush_list(
/* Just an extra check. Previous in flush_list
should be the same control block as in flush_rbt. */
- ut_a(buf_pool.flush_rbt == NULL || prev_b == prev);
-
-#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
- buf_flush_validate_low();
-#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
-
+ ut_a(!buf_pool.flush_rbt || prev_b == prev);
+ ut_d(buf_flush_validate_low());
mutex_exit(&buf_pool.flush_list_mutex);
}
-/** Update the flush system data structures when a write is completed.
-@param[in,out] bpage flushed page
-@param[in] dblwr whether the doublewrite buffer was used */
-void buf_flush_write_complete(buf_page_t* bpage, bool dblwr)
+/** Update the buf_pool data structures on write completion.
+@param[in,out] bpage written page
+@param[in] flush_type write request type
+@param[in] dblwr whether the doublewrite buffer was used */
+static void buf_flush_write_complete(buf_page_t *bpage,
+ IORequest::flush_t flush_type, bool dblwr)
{
- ut_ad(bpage);
+ ut_ad(mutex_own(&buf_pool.mutex));
+ buf_flush_remove(bpage);
- buf_flush_remove(bpage);
+ switch (--buf_pool.n_flush[flush_type]) {
+#ifdef UNIV_DEBUG
+ case ULINT_UNDEFINED:
+ ut_error;
+ break;
+#endif
+ case 0:
+ if (!buf_pool.init_flush[flush_type])
+ os_event_set(buf_pool.no_flush[flush_type]);
+ }
- const buf_flush_t flush_type = buf_page_get_flush_type(bpage);
- buf_pool.n_flush[flush_type]--;
- ut_ad(buf_pool.n_flush[flush_type] != ULINT_MAX);
+ if (dblwr)
+ buf_dblwr_update(*bpage, flush_type == IORequest::SINGLE_PAGE);
+}
- ut_ad(mutex_own(&buf_pool.mutex));
+/** Complete write of a file page from buf_pool.
+@param bpage written page
+@param request write request
+@param dblwr whether the doublewrite buffer was used
+@param evict whether or not to evict the page from LRU list */
+void buf_page_write_complete(buf_page_t *bpage, const IORequest &request,
+ bool dblwr, bool evict)
+{
+ ut_ad(request.is_write());
+ ut_ad(bpage->in_file());
+ ut_ad(bpage->io_fix() == BUF_IO_WRITE);
+ ut_ad(bpage->id().space() != TRX_SYS_SPACE ||
+ !buf_dblwr_page_inside(bpage->id().page_no()));
+
+ /* We do not need protect io_fix here by mutex to read it because
+ this and buf_page_write_complete() are the only functions where we can
+ change the value from BUF_IO_READ or BUF_IO_WRITE to some other
+ value, and our code ensures that this is the only thread that handles
+ the i/o for this block. */
+ if (bpage->slot)
+ {
+ bpage->slot->release();
+ bpage->slot= nullptr;
+ }
- if (buf_pool.n_flush[flush_type] == 0
- && buf_pool.init_flush[flush_type] == FALSE) {
+ if (UNIV_UNLIKELY(MONITOR_IS_ON(MONITOR_MODULE_BUF_PAGE)))
+ buf_page_monitor(bpage, BUF_IO_WRITE);
+ DBUG_PRINT("ib_buf", ("write page %u:%u",
+ bpage->id().space(), bpage->id().page_no()));
+ mutex_enter(&buf_pool.mutex);
+ bpage->set_io_fix(BUF_IO_NONE);
+ buf_flush_write_complete(bpage, request.flush_type(), dblwr);
- /* The running flush batch has ended */
+ /* Because this thread which does the unlocking might not be the same that
+ did the locking, we use a pass value != 0 in unlock, which simply
+ removes the newest lock debug record, without checking the thread id. */
+ if (bpage->state() == BUF_BLOCK_FILE_PAGE)
+ rw_lock_sx_unlock_gen(&((buf_block_t*) bpage)->lock, BUF_IO_WRITE);
- os_event_set(buf_pool.no_flush[flush_type]);
- }
+ buf_pool.stat.n_pages_written++;
- if (dblwr) {
- buf_dblwr_update(bpage, flush_type);
- }
+ if (evict)
+ buf_LRU_free_page(bpage, true);
+
+ mutex_exit(&buf_pool.mutex);
}
/** Calculate a ROW_FORMAT=COMPRESSED page checksum and update the page.
@@ -754,9 +702,9 @@ buf_flush_init_for_writing(
ulint page_type = fil_page_get_type(page);
ulint reset_type = page_type;
- switch (block->page.id.page_no() % 16384) {
+ switch (block->page.id().page_no() % 16384) {
case 0:
- reset_type = block->page.id.page_no() == 0
+ reset_type = block->page.id().page_no() == 0
? FIL_PAGE_TYPE_FSP_HDR
: FIL_PAGE_TYPE_XDES;
break;
@@ -764,10 +712,8 @@ buf_flush_init_for_writing(
reset_type = FIL_PAGE_IBUF_BITMAP;
break;
case FSP_TRX_SYS_PAGE_NO:
- if (block->page.id.page_no()
- == TRX_SYS_PAGE_NO
- && block->page.id.space()
- == TRX_SYS_SPACE) {
+ if (block->page.id()
+ == page_id_t(TRX_SYS_SPACE, TRX_SYS_PAGE_NO)) {
reset_type = FIL_PAGE_TYPE_TRX_SYS;
break;
}
@@ -802,7 +748,7 @@ buf_flush_init_for_writing(
if (UNIV_UNLIKELY(page_type != reset_type)) {
ib::info()
<< "Resetting invalid page "
- << block->page.id << " type "
+ << block->page.id() << " type "
<< page_type << " to "
<< reset_type << " when flushing.";
fil_page_set_type(page, reset_type);
@@ -894,22 +840,21 @@ a page is written to disk.
@param[in,out] space tablespace
@param[in,out] bpage buffer page
@param[in] s physical page frame that is being encrypted
+@param[in,out] size payload size in bytes
@return page frame to be written to file
(may be src_frame or an encrypted/compressed copy of it) */
-static byte* buf_page_encrypt(fil_space_t* space, buf_page_t* bpage, byte* s)
+static byte *buf_page_encrypt(fil_space_t* space, buf_page_t* bpage, byte* s,
+ size_t *size)
{
- if (bpage->status == buf_page_t::FREED) {
- return s;
- }
-
- ut_ad(space->id == bpage->id.space());
- bpage->real_size = srv_page_size;
+ ut_ad(bpage->status != buf_page_t::FREED);
+ ut_ad(space->id == bpage->id().space());
ut_d(fil_page_type_validate(space, s));
+ const uint32_t page_no= bpage->id().page_no();
- switch (bpage->id.page_no()) {
+ switch (page_no) {
case TRX_SYS_PAGE_NO:
- if (bpage->id.space() != TRX_SYS_SPACE)
+ if (bpage->id().space() != TRX_SYS_SPACE)
break;
/* The TRX_SYS page is neither encrypted nor compressed, because
it contains the address of the doublewrite buffer. */
@@ -970,8 +915,8 @@ static byte* buf_page_encrypt(fil_space_t* space, buf_page_t* bpage, byte* s)
{
not_compressed:
byte *tmp= space->purpose == FIL_TYPE_TEMPORARY
- ? buf_tmp_page_encrypt(bpage->id.page_no(), s, d)
- : fil_space_encrypt(space, bpage->id.page_no(), s, d);
+ ? buf_tmp_page_encrypt(page_no, s, d)
+ : fil_space_encrypt(space, page_no, s, d);
slot->out_buf= d= tmp;
@@ -984,14 +929,13 @@ not_compressed:
buf_tmp_reserve_compression_buf(slot);
byte *tmp= slot->comp_buf;
ulint len= fil_page_compress(s, tmp, space->flags,
- fil_space_get_block_size(space,
- bpage->id.page_no()),
+ fil_space_get_block_size(space, page_no),
encrypted);
if (!len)
goto not_compressed;
- bpage->real_size= len;
+ *size= len;
if (full_crc32)
{
@@ -1011,7 +955,7 @@ not_compressed:
ut_d(fil_page_type_validate(space, tmp));
if (encrypted)
- tmp = fil_space_encrypt(space, bpage->id.page_no(), tmp, d);
+ tmp = fil_space_encrypt(space, page_no, tmp, d);
if (full_crc32)
{
@@ -1034,369 +978,353 @@ not_compressed:
This function also resets the IO_FIX to IO_NONE and making the
page status as NORMAL. It initiates the write to the file only after
releasing the page from flush list and its associated mutex.
-@param[in,out] bpage freed buffer page
-@param[in] space tablespace object of the freed page */
-static void buf_flush_freed_page(buf_page_t *bpage, fil_space_t *space)
+@param[in,out] bpage freed buffer page
+@param[in] space tablespace object of the freed page */
+static void buf_flush_freed_page(buf_page_t *bpage, const fil_space_t &space)
{
- ut_ad(buf_page_in_file(bpage));
- const bool uncompressed= buf_page_get_state(bpage) == BUF_BLOCK_FILE_PAGE;
- BPageMutex *block_mutex= uncompressed
- ? &reinterpret_cast<buf_block_t*>(bpage)->mutex
- : &buf_pool.zip_mutex;
-
+ ut_ad(bpage->in_file());
+ const bool uncompressed= bpage->state() == BUF_BLOCK_FILE_PAGE;
+ const page_id_t page_id(bpage->id());
+ const auto zip_size= bpage->zip_size();
mutex_enter(&buf_pool.mutex);
- mutex_enter(block_mutex);
-
- buf_page_set_io_fix(bpage, BUF_IO_NONE);
+ bpage->set_io_fix(BUF_IO_NONE);
bpage->status= buf_page_t::NORMAL;
- buf_flush_write_complete(bpage, false);
+ buf_flush_remove(bpage);
+ buf_pool.stat.n_pages_written++;
+ mutex_exit(&buf_pool.mutex);
if (uncompressed)
rw_lock_sx_unlock_gen(&reinterpret_cast<buf_block_t*>(bpage)->lock,
- BUF_IO_WRITE);
-
- buf_pool.stat.n_pages_written++;
- mutex_exit(&buf_pool.mutex);
- const page_id_t page_id(bpage->id);
- const auto zip_size= bpage->zip_size();
- mutex_exit(block_mutex);
+ BUF_IO_WRITE);
const bool punch_hole=
#if defined(HAVE_FALLOC_PUNCH_HOLE_AND_KEEP_SIZE) || defined(_WIN32)
- space->is_compressed() ||
+ space.is_compressed() ||
#endif
false;
- ut_ad(space->id == page_id.space());
- ut_ad(space->zip_size() == zip_size);
+ ut_ad(space.id == page_id.space());
+ ut_ad(space.zip_size() == zip_size);
if (punch_hole || srv_immediate_scrub_data_uncompressed)
- fil_io(IORequestWrite, punch_hole, page_id, zip_size, 0,
- zip_size ? zip_size : srv_page_size,
- const_cast<byte*>(field_ref_zero), nullptr, false, punch_hole);
-
- space->release_for_io();
+ {
+ fil_io_t fio= fil_io(IORequestWrite, punch_hole, page_id, zip_size, 0,
+ zip_size ? zip_size : srv_page_size,
+ const_cast<byte*>(field_ref_zero), nullptr, false,
+ punch_hole);
+ if (punch_hole && fio.node)
+ fio.node->space->release_for_io();
+ }
}
-/********************************************************************//**
-Does an asynchronous write of a buffer page. NOTE: when the
-doublewrite buffer is used, we must call
-buf_dblwr_flush_buffered_writes after we have posted a batch of
-writes! */
-static
-void
-buf_flush_write_block_low(
-/*======================*/
- buf_page_t* bpage, /*!< in: buffer block to write */
- buf_flush_t flush_type, /*!< in: type of flush */
- bool sync) /*!< in: true if sync IO request */
+/** Write a flushable page from buf_pool to a file.
+buf_pool.mutex must be held.
+@param bpage buffer control block
+@param flush_type type of flush
+@param space tablespace (or nullptr if not known)
+@param sync whether this is a synchronous request
+ (only for flush_type=SINGLE_PAGE)
+@return whether the page was flushed and buf_pool.mutex was released */
+bool buf_flush_page(buf_page_t *bpage, IORequest::flush_t flush_type,
+ fil_space_t *space, bool sync)
{
- fil_space_t* space = fil_space_acquire_for_io(bpage->id.space());
- if (!space) {
- return;
- }
- ut_ad(space->purpose == FIL_TYPE_TEMPORARY
- || space->purpose == FIL_TYPE_IMPORT
- || space->purpose == FIL_TYPE_TABLESPACE);
- ut_ad((space->purpose == FIL_TYPE_TEMPORARY)
- == (space == fil_system.temp_space));
-
- page_t* frame = NULL;
- const bool full_crc32 = space->full_crc32();
-
- DBUG_PRINT("ib_buf", ("flush %s %u page %u:%u",
- sync ? "sync" : "async", (unsigned) flush_type,
- bpage->id.space(), bpage->id.page_no()));
-
- ut_ad(buf_page_in_file(bpage));
-
- /* We are not holding buf_pool.mutex or block_mutex here.
- Nevertheless, it is safe to access bpage, because it is
- io_fixed and oldest_modification != 0. Thus, it cannot be
- relocated in the buffer pool or removed from flush_list or
- LRU_list. */
- ut_ad(!mutex_own(&buf_pool.mutex));
- ut_ad(!mutex_own(&buf_pool.flush_list_mutex));
- ut_ad(!buf_page_get_mutex(bpage)->is_owned());
- ut_ad(buf_page_get_io_fix(bpage) == BUF_IO_WRITE);
- ut_ad(bpage->oldest_modification != 0);
-
- switch (buf_page_get_state(bpage)) {
- case BUF_BLOCK_POOL_WATCH:
- case BUF_BLOCK_ZIP_PAGE: /* The page should be dirty. */
- case BUF_BLOCK_NOT_USED:
- case BUF_BLOCK_READY_FOR_USE:
- case BUF_BLOCK_MEMORY:
- case BUF_BLOCK_REMOVE_HASH:
- ut_error;
- break;
- case BUF_BLOCK_ZIP_DIRTY:
- frame = bpage->zip.data;
- buf_flush_update_zip_checksum(frame, bpage->zip_size());
- break;
- case BUF_BLOCK_FILE_PAGE:
- frame = bpage->zip.data;
- if (!frame) {
- frame = ((buf_block_t*) bpage)->frame;
- }
-
- /* Skip the encryption and compression for the
- freed page */
- if (bpage->status == buf_page_t::FREED) {
- break;
- }
-
- byte* page = reinterpret_cast<const buf_block_t*>(bpage)->frame;
-
- if (full_crc32) {
- page = buf_page_encrypt(space, bpage, page);
- frame = page;
- }
-
- buf_flush_init_for_writing(
- reinterpret_cast<const buf_block_t*>(bpage), page,
- bpage->zip.data ? &bpage->zip : NULL, full_crc32);
- break;
- }
-
- if (!full_crc32) {
- frame = buf_page_encrypt(space, bpage, frame);
- }
-
- if (UNIV_LIKELY(space->purpose == FIL_TYPE_TABLESPACE)) {
- const lsn_t lsn = mach_read_from_8(frame + FIL_PAGE_LSN);
- ut_ad(lsn);
- ut_ad(lsn >= bpage->oldest_modification);
- ut_ad(!srv_read_only_mode);
- log_write_up_to(lsn, true);
- } else {
- ut_ad(space->atomic_write_supported);
- }
+ ut_ad(bpage->in_file());
+ ut_ad(bpage->ready_for_flush());
+ ut_ad(!sync || flush_type == IORequest::SINGLE_PAGE);
+ ut_ad(mutex_own(&buf_pool.mutex));
+
+ rw_lock_t *rw_lock;
+ bool no_fix_count= bpage->buf_fix_count() == 0;
+
+ if (bpage->state() != BUF_BLOCK_FILE_PAGE)
+ rw_lock= nullptr;
+ else if (!(no_fix_count || flush_type == IORequest::FLUSH_LIST) ||
+ (!no_fix_count && srv_shutdown_state <= SRV_SHUTDOWN_CLEANUP &&
+ fsp_is_system_temporary(bpage->id().space())))
+ /* This is a heuristic, to avoid expensive SX attempts. */
+ /* For table residing in temporary tablespace sync is done
+ using IO_FIX and so before scheduling for flush ensure that
+ page is not fixed. */
+ return false;
+ else
+ {
+ rw_lock= &reinterpret_cast<buf_block_t*>(bpage)->lock;
+ if (flush_type != IORequest::FLUSH_LIST &&
+ !rw_lock_sx_lock_nowait(rw_lock, BUF_IO_WRITE))
+ return false;
+ }
- if (bpage->status == buf_page_t::FREED) {
- buf_flush_freed_page(bpage, space);
- return;
- }
+ /* We are committed to flushing by the time we get here */
+ bpage->set_io_fix(BUF_IO_WRITE);
+ mutex_exit(&buf_pool.mutex);
- const bool use_doublewrite = bpage->status != buf_page_t::INIT_ON_FLUSH
- && space->use_doublewrite();
+ if (flush_type == IORequest::FLUSH_LIST && rw_lock &&
+ !rw_lock_sx_lock_nowait(rw_lock, BUF_IO_WRITE))
+ {
+ if (!fsp_is_system_temporary(bpage->id().space()))
+ /* Avoid a potential deadlock with the doublewrite buffer,
+ which might be holding another buf_block_t::lock. */
+ buf_dblwr_flush_buffered_writes();
+ else
+ os_aio_wait_until_no_pending_writes();
- if (!use_doublewrite) {
- ulint type = IORequest::WRITE;
+ rw_lock_sx_lock_gen(rw_lock, BUF_IO_WRITE);
+ }
- IORequest request(type, bpage);
+ /* We are holding rw_lock = buf_block_t::lock in SX mode except if
+ this is a ROW_FORMAT=COMPRESSED page whose uncompressed page frame
+ has been evicted from the buffer pool.
- /* TODO: pass the tablespace to fil_io() */
- fil_io(request,
- sync, bpage->id, bpage->zip_size(), 0,
- bpage->physical_size(),
- frame, bpage);
- } else {
- ut_ad(!srv_read_only_mode);
+ Apart from possible rw_lock protection, bpage is also protected by
+ io_fix and oldest_modification()!=0. Thus, it cannot be relocated in
+ the buffer pool or removed from flush_list or LRU_list. */
+#if 0 /* rw_lock_own() does not hold because we passed BUF_IO_WRITE above. */
+ ut_ad(!rw_lock || rw_lock_own(rw_lock, RW_LOCK_SX));
+#endif
- if (flush_type == BUF_FLUSH_SINGLE_PAGE) {
- buf_dblwr_write_single_page(bpage, sync);
- } else {
- ut_ad(!sync);
- buf_dblwr_add_to_batch(bpage);
- }
- }
+ const fil_space_t * const provided_space= space;
+ if (!space)
+ {
+ space= fil_space_acquire_for_io(bpage->id().space());
+ if (UNIV_UNLIKELY(!space))
+ {
+ mutex_enter(&buf_pool.mutex);
+ bpage->status= buf_page_t::NORMAL;
+ bpage->set_io_fix(BUF_IO_NONE);
+ if (rw_lock)
+ rw_lock_sx_unlock_gen(rw_lock, BUF_IO_WRITE);
+ return false;
+ }
+ }
+ ut_ad((space->purpose == FIL_TYPE_TEMPORARY) ==
+ (space == fil_system.temp_space));
- /* When doing single page flushing the IO is done synchronously
- and we flush the changes to disk only for the tablespace we
- are working on. */
- if (sync) {
- ut_ad(flush_type == BUF_FLUSH_SINGLE_PAGE);
- if (space->purpose != FIL_TYPE_TEMPORARY) {
- fil_flush(space);
- }
+ const bool full_crc32= space->full_crc32();
- /* The tablespace could already have been dropped,
- because fil_io(request, sync) would already have
- decremented the node->n_pending. However,
- buf_page_io_complete() only needs to look up the
- tablespace during read requests, not during writes. */
- ut_ad(buf_page_get_io_fix(bpage) == BUF_IO_WRITE);
+ DBUG_PRINT("ib_buf", ("flush %s %u page %u:%u",
+ sync ? "sync" : "async", (unsigned) flush_type,
+ bpage->id().space(), bpage->id().page_no()));
+ ut_ad(!mutex_own(&buf_pool.mutex));
+ ut_ad(!mutex_own(&buf_pool.flush_list_mutex));
+ ut_ad(bpage->io_fix() == BUF_IO_WRITE);
+ ut_ad(bpage->oldest_modification());
+ ut_ad(bpage->state() ==
+ (rw_lock ? BUF_BLOCK_FILE_PAGE : BUF_BLOCK_ZIP_PAGE));
+
+ /* Because bpage->status can only be changed while buf_block_t
+ exists, it cannot be modified for ROW_FORMAT=COMPRESSED pages
+ without first allocating the uncompressed page frame. Such
+ allocation cannot be completed due to our io_fix. So, bpage->status
+ is protected even if !rw_lock. */
+ const auto status= bpage->status;
+
+ if (status != buf_page_t::FREED)
+ {
+ switch (buf_pool.n_flush[flush_type]++) {
+ case 0:
+ os_event_reset(buf_pool.no_flush[flush_type]);
+ break;
#ifdef UNIV_DEBUG
- dberr_t err =
+ case ULINT_UNDEFINED:
+ ut_error;
+ break;
#endif
- /* true means we want to evict this page from the
- LRU list as well. */
- buf_page_io_complete(bpage, use_doublewrite, true);
-
- ut_ad(err == DB_SUCCESS);
- }
-
- space->release_for_io();
-
- /* Increment the counter of I/O operations used
- for selecting LRU policy. */
- buf_LRU_stat_inc_io();
-}
-
-/** Write a flushable page asynchronously from the buffer pool to a file.
-NOTE: 1. in simulated aio we must call os_aio_simulated_wake_handler_threads
-after we have posted a batch of writes! 2. buf_page_get_mutex(bpage) must be
-held upon entering this function. The LRU list mutex must be held if flush_type
-== BUF_FLUSH_SINGLE_PAGE. Both mutexes will be released by this function if it
-returns true.
-@param[in] bpage buffer control block
-@param[in] flush_type type of flush
-@param[in] sync true if sync IO request
-@return whether the page was flushed */
-bool buf_flush_page(buf_page_t* bpage, buf_flush_t flush_type, bool sync)
-{
- BPageMutex* block_mutex;
-
- ut_ad(flush_type < BUF_FLUSH_N_TYPES);
- ut_ad(mutex_own(&buf_pool.mutex));
- ut_ad(buf_page_in_file(bpage));
- ut_ad(!sync || flush_type == BUF_FLUSH_SINGLE_PAGE);
-
- block_mutex = buf_page_get_mutex(bpage);
- ut_ad(mutex_own(block_mutex));
-
- ut_ad(buf_flush_ready_for_flush(bpage, flush_type));
-
- bool is_uncompressed = (buf_page_get_state(bpage)
- == BUF_BLOCK_FILE_PAGE);
- ut_ad(is_uncompressed == (block_mutex != &buf_pool.zip_mutex));
-
- rw_lock_t* rw_lock;
- bool no_fix_count = bpage->buf_fix_count == 0;
-
- if (!is_uncompressed) {
- rw_lock = NULL;
- } else if (!(no_fix_count || flush_type == BUF_FLUSH_LIST)
- || (!no_fix_count
- && srv_shutdown_state <= SRV_SHUTDOWN_CLEANUP
- && fsp_is_system_temporary(bpage->id.space()))) {
- /* This is a heuristic, to avoid expensive SX attempts. */
- /* For table residing in temporary tablespace sync is done
- using IO_FIX and so before scheduling for flush ensure that
- page is not fixed. */
- return false;
- } else {
- rw_lock = &reinterpret_cast<buf_block_t*>(bpage)->lock;
- if (flush_type != BUF_FLUSH_LIST
- && !rw_lock_sx_lock_nowait(rw_lock, BUF_IO_WRITE)) {
- return false;
- }
- }
-
- /* We are committed to flushing by the time we get here */
-
- buf_page_set_io_fix(bpage, BUF_IO_WRITE);
+ }
+ }
- buf_page_set_flush_type(bpage, flush_type);
+ page_t *frame= bpage->zip.data;
+ size_t size, orig_size;
- if (buf_pool.n_flush[flush_type] == 0) {
- os_event_reset(buf_pool.no_flush[flush_type]);
- }
+ if (UNIV_UNLIKELY(!rw_lock)) /* ROW_FORMAT=COMPRESSED */
+ {
+ ut_ad(!space->full_crc32());
+ ut_ad(!space->is_compressed()); /* not page_compressed */
+ orig_size= size= bpage->zip_size();
+ if (status != buf_page_t::FREED)
+ {
+ buf_flush_update_zip_checksum(frame, orig_size);
+ frame= buf_page_encrypt(space, bpage, frame, &size);
+ }
+ ut_ad(size == bpage->zip_size());
+ }
+ else
+ {
+ buf_block_t *block= reinterpret_cast<buf_block_t*>(bpage);
+ byte *page= block->frame;
+ orig_size= size= block->physical_size();
- ++buf_pool.n_flush[flush_type];
- ut_ad(buf_pool.n_flush[flush_type] != 0);
+ if (status != buf_page_t::FREED)
+ {
+ if (full_crc32)
+ {
+ /* innodb_checksum_algorithm=full_crc32 is not implemented for
+ ROW_FORMAT=COMPRESSED pages. */
+ ut_ad(!frame);
+ page= buf_page_encrypt(space, bpage, page, &size);
+ }
+
+ buf_flush_init_for_writing(block, page, frame ? &bpage->zip : nullptr,
+ full_crc32);
+
+ if (!full_crc32)
+ page= buf_page_encrypt(space, bpage, frame ? frame : page, &size);
+ }
- mutex_exit(block_mutex);
+ frame= page;
+ }
- mutex_exit(&buf_pool.mutex);
+ if (UNIV_LIKELY(space->purpose == FIL_TYPE_TABLESPACE))
+ {
+ const lsn_t lsn= mach_read_from_8(frame + FIL_PAGE_LSN);
+ ut_ad(lsn);
+ ut_ad(lsn >= bpage->oldest_modification());
+ ut_ad(!srv_read_only_mode);
+ log_write_up_to(lsn, true);
+ }
+ else
+ ut_ad(space->atomic_write_supported);
- if (flush_type == BUF_FLUSH_LIST
- && is_uncompressed
- && !rw_lock_sx_lock_nowait(rw_lock, BUF_IO_WRITE)) {
+ bool use_doublewrite;
+ IORequest request(IORequest::WRITE, bpage, flush_type);
- if (!fsp_is_system_temporary(bpage->id.space())) {
- /* avoiding deadlock possibility involves
- doublewrite buffer, should flush it, because
- it might hold the another block->lock. */
- buf_dblwr_flush_buffered_writes();
- } else {
- buf_dblwr_sync_datafiles();
- }
+ ut_ad(status == bpage->status);
- rw_lock_sx_lock_gen(rw_lock, BUF_IO_WRITE);
- }
+ switch (status) {
+ default:
+ ut_ad(status == buf_page_t::FREED);
+ buf_flush_freed_page(bpage, *space);
+ goto done;
+ case buf_page_t::NORMAL:
+ use_doublewrite= space->use_doublewrite();
- /* Even though bpage is not protected by any mutex at this
- point, it is safe to access bpage, because it is io_fixed and
- oldest_modification != 0. Thus, it cannot be relocated in the
- buffer pool or removed from flush_list or LRU_list. */
+ if (use_doublewrite)
+ {
+ ut_ad(!srv_read_only_mode);
+ if (flush_type == IORequest::SINGLE_PAGE)
+ buf_dblwr->write_single_page(bpage, sync, size);
+ else
+ buf_dblwr->add_to_batch(bpage, flush_type, size);
+ break;
+ }
+ /* fall through */
+ case buf_page_t::INIT_ON_FLUSH:
+ use_doublewrite= false;
+ if (size != orig_size)
+ request.set_punch_hole();
+ /* FIXME: pass space to fil_io() */
+ fil_io_t fio= fil_io(request, sync, bpage->id(), bpage->zip_size(), 0,
+ bpage->physical_size(), frame, bpage);
+ ut_ad(!fio.node || fio.node->space == space);
+ if (fio.node && sync)
+ fio.node->space->release_for_io();
+ }
- buf_flush_write_block_low(bpage, flush_type, sync);
- return true;
-}
+ if (sync)
+ {
+ ut_ad(bpage->io_fix() == BUF_IO_WRITE);
-# if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG
-/** Writes a flushable page asynchronously from the buffer pool to a file.
-NOTE: block and LRU list mutexes must be held upon entering this function, and
-they will be released by this function after flushing. This is loosely based on
-buf_flush_batch() and buf_flush_page().
-@param[in,out] block buffer control block
-@return whether the page was flushed and the mutex released */
-bool buf_flush_page_try(buf_block_t* block)
-{
- ut_ad(mutex_own(&buf_pool.mutex));
- ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
- ut_ad(buf_page_mutex_own(block));
+ /* When flushing single page synchronously, we flush the changes
+ only for the tablespace we are working on. */
+ if (space->purpose != FIL_TYPE_TEMPORARY)
+ fil_flush(space);
- if (!buf_flush_ready_for_flush(&block->page, BUF_FLUSH_SINGLE_PAGE)) {
- return false;
- }
+ if (size != orig_size && space->punch_hole)
+ request.set_punch_hole();
+ buf_page_write_complete(bpage, request, use_doublewrite, true/*evict*/);
+ }
- /* The following call will release the buf_pool and block mutex. */
- return buf_flush_page(&block->page, BUF_FLUSH_SINGLE_PAGE, true);
+done:
+ if (!provided_space)
+ space->release_for_io();
+ /* Increment the I/O operation count used for selecting LRU policy. */
+ buf_LRU_stat_inc_io();
+ return true;
}
-# endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */
-/** Check the page is in buffer pool and can be flushed.
-@param[in] page_id page id
-@param[in] flush_type BUF_FLUSH_LRU or BUF_FLUSH_LIST
-@return true if the page can be flushed. */
-static
-bool
-buf_flush_check_neighbor(
- const page_id_t page_id,
- buf_flush_t flush_type)
+/** Check whether a page can be flushed from the buf_pool.
+@param id page identifier
+@param flush LRU or FLUSH_LIST
+@return whether the page can be flushed */
+static bool buf_flush_check_neighbor(const page_id_t id,
+ IORequest::flush_t flush)
{
- buf_page_t* bpage;
- bool ret;
+ ut_ad(flush == IORequest::LRU || flush == IORequest::FLUSH_LIST);
+ ut_ad(mutex_own(&buf_pool.mutex));
- ut_ad(flush_type == BUF_FLUSH_LRU
- || flush_type == BUF_FLUSH_LIST);
+ buf_page_t *bpage= buf_pool.page_hash_get_low(id);
- mutex_enter(&buf_pool.mutex);
-
- bpage = buf_page_hash_get(page_id);
+ if (!bpage || buf_pool.watch_is_sentinel(*bpage))
+ return false;
- if (!bpage) {
+ /* We avoid flushing 'non-old' blocks in an LRU flush, because the
+ flushed blocks are soon freed */
- mutex_exit(&buf_pool.mutex);
- return(false);
- }
+ return (flush != IORequest::LRU || bpage->is_old()) &&
+ bpage->ready_for_flush();
+}
- ut_a(buf_page_in_file(bpage));
+/** Check which neighbors of a page can be flushed from the buf_pool.
+@param space tablespace
+@param id page identifier of a dirty page
+@param flush LRU or FLUSH_LIST
+@return last page number that can be flushed */
+static page_id_t buf_flush_check_neighbors(const fil_space_t &space,
+ page_id_t &id,
+ IORequest::flush_t flush)
+{
+ ut_ad(id.page_no() < space.size);
+ ut_ad(flush == IORequest::LRU || flush == IORequest::FLUSH_LIST);
+ /* When flushed, dirty blocks are searched in neighborhoods of this
+ size, and flushed along with the original page. */
+ const ulint s= buf_pool.curr_size / 16;
+ const uint32_t read_ahead= buf_pool.read_ahead_area;
+ const uint32_t buf_flush_area= read_ahead > s
+ ? static_cast<uint32_t>(s) : read_ahead;
+ page_id_t low= id - (id.page_no() % buf_flush_area);
+ page_id_t high= low + buf_flush_area;
+ high.set_page_no(std::min(high.page_no(),
+ static_cast<uint32_t>(space.size - 1)));
+
+ /* Determine the contiguous dirty area around id. */
+ const ulint id_fold= id.fold();
- /* We avoid flushing 'non-old' blocks in an LRU flush,
- because the flushed blocks are soon freed */
+ mutex_enter(&buf_pool.mutex);
- ret = false;
- if (flush_type != BUF_FLUSH_LRU || buf_page_is_old(bpage)) {
- BPageMutex* block_mutex = buf_page_get_mutex(bpage);
+ if (id > low)
+ {
+ ulint fold= id_fold;
+ for (page_id_t i= id - 1;; --i)
+ {
+ fold--;
+ ut_ad(i.fold() == fold);
+ if (!buf_flush_check_neighbor(i, flush))
+ {
+ low= i + 1;
+ break;
+ }
+ if (i == low)
+ break;
+ }
+ }
- mutex_enter(block_mutex);
- if (buf_flush_ready_for_flush(bpage, flush_type)) {
- ret = true;
- }
- mutex_exit(block_mutex);
- }
- mutex_exit(&buf_pool.mutex);
+ page_id_t i= id;
+ id= low;
+ ulint fold= id_fold;
+ while (++i < high)
+ {
+ ++fold;
+ ut_ad(i.fold() == fold);
+ if (!buf_flush_check_neighbor(i, flush))
+ break;
+ }
- return(ret);
+ mutex_exit(&buf_pool.mutex);
+ return i;
}
/** Flushes to disk all flushable pages within the flush area.
@param[in] page_id page id
-@param[in] flush_type BUF_FLUSH_LRU or BUF_FLUSH_LIST
+@param[in] flush LRU or FLUSH_LIST
@param[in] n_flushed number of pages flushed so far in this batch
@param[in] n_to_flush maximum number of pages we are allowed to flush
@return number of pages flushed */
@@ -1404,86 +1332,27 @@ static
ulint
buf_flush_try_neighbors(
const page_id_t page_id,
- buf_flush_t flush_type,
+ IORequest::flush_t flush,
ulint n_flushed,
ulint n_to_flush)
{
- ulint i;
- ulint low;
- ulint high;
ulint count = 0;
- ut_ad(flush_type == BUF_FLUSH_LRU || flush_type == BUF_FLUSH_LIST);
+ ut_ad(flush == IORequest::LRU || flush == IORequest::FLUSH_LIST);
fil_space_t* space = fil_space_acquire_for_io(page_id.space());
if (!space) {
return 0;
}
- if (UT_LIST_GET_LEN(buf_pool.LRU) < BUF_LRU_OLD_MIN_LEN
- || !srv_flush_neighbors || !space->is_rotational()) {
- /* If there is little space or neighbor flushing is
- not enabled then just flush the victim. */
- low = page_id.page_no();
- high = page_id.page_no() + 1;
- } else {
- /* When flushed, dirty blocks are searched in
- neighborhoods of this size, and flushed along with the
- original page. */
-
- ulint buf_flush_area;
-
- buf_flush_area = ut_min(
- buf_pool.read_ahead_area,
- buf_pool.curr_size / 16);
+ page_id_t id = page_id;
+ page_id_t high = (srv_flush_neighbors != 1
+ || UT_LIST_GET_LEN(buf_pool.LRU)
+ < BUF_LRU_OLD_MIN_LEN
+ || !space->is_rotational())
+ ? id + 1 /* Flush the minimum. */
+ : buf_flush_check_neighbors(*space, id, flush);
- low = (page_id.page_no() / buf_flush_area) * buf_flush_area;
- high = (page_id.page_no() / buf_flush_area + 1) * buf_flush_area;
-
- if (srv_flush_neighbors == 1) {
- /* adjust 'low' and 'high' to limit
- for contiguous dirty area */
- if (page_id.page_no() > low) {
- for (i = page_id.page_no() - 1; i >= low; i--) {
- if (!buf_flush_check_neighbor(
- page_id_t(page_id.space(), i),
- flush_type)) {
-
- break;
- }
-
- if (i == low) {
- /* Avoid overwrap when low == 0
- and calling
- buf_flush_check_neighbor() with
- i == (ulint) -1 */
- i--;
- break;
- }
- }
- low = i + 1;
- }
-
- for (i = page_id.page_no() + 1;
- i < high
- && buf_flush_check_neighbor(
- page_id_t(page_id.space(), i),
- flush_type);
- i++) {
- /* do nothing */
- }
- high = i;
- }
- }
-
- if (high > space->size) {
- high = space->size;
- }
-
- DBUG_PRINT("ib_buf", ("flush %u:%u..%u",
- page_id.space(),
- (unsigned) low, (unsigned) high));
-
- for (ulint i = low; i < high; i++) {
+ for (; id < high; ++id) {
buf_page_t* bpage;
if ((count + n_flushed) >= n_to_flush) {
@@ -1494,54 +1363,34 @@ buf_flush_try_neighbors(
are flushing has not been flushed yet then
we'll try to flush the victim that we
selected originally. */
- if (i <= page_id.page_no()) {
- i = page_id.page_no();
+ if (id <= page_id) {
+ id = page_id;
} else {
break;
}
}
- const page_id_t cur_page_id(page_id.space(), i);
-
mutex_enter(&buf_pool.mutex);
- bpage = buf_page_hash_get(cur_page_id);
+ bpage = buf_page_hash_get(id);
if (bpage == NULL) {
mutex_exit(&buf_pool.mutex);
continue;
}
- ut_a(buf_page_in_file(bpage));
+ ut_a(bpage->in_file());
/* We avoid flushing 'non-old' blocks in an LRU flush,
because the flushed blocks are soon freed */
- if (flush_type != BUF_FLUSH_LRU
- || i == page_id.page_no()
- || buf_page_is_old(bpage)) {
-
- BPageMutex* block_mutex = buf_page_get_mutex(bpage);
-
- mutex_enter(block_mutex);
-
- if (buf_flush_ready_for_flush(bpage, flush_type)
- && (i == page_id.page_no()
- || bpage->buf_fix_count == 0)) {
-
- /* We also try to flush those
- neighbors != offset */
-
- if (buf_flush_page(bpage, flush_type, false)) {
- ++count;
- } else {
- mutex_exit(block_mutex);
- mutex_exit(&buf_pool.mutex);
- }
-
- continue;
- } else {
- mutex_exit(block_mutex);
+ if (flush != IORequest::LRU
+ || id == page_id || bpage->is_old()) {
+ if (bpage->ready_for_flush()
+ && (id == page_id || bpage->buf_fix_count() == 0)
+ && buf_flush_page(bpage, flush, space, false)) {
+ ++count;
+ continue;
}
}
mutex_exit(&buf_pool.mutex);
@@ -1560,56 +1409,6 @@ buf_flush_try_neighbors(
return(count);
}
-/** Check if the block is modified and ready for flushing.
-If the the block is ready to flush then flush the page and try o flush
-its neighbors.
-@param[in] bpage buffer control block,
-must be buf_page_in_file(bpage)
-@param[in] flush_type BUF_FLUSH_LRU or BUF_FLUSH_LIST
-@param[in] n_to_flush number of pages to flush
-@param[in,out] count number of pages flushed
-@return TRUE if buf_pool mutex was released during this function.
-This does not guarantee that some pages were written as well.
-Number of pages written are incremented to the count. */
-static
-bool
-buf_flush_page_and_try_neighbors(
- buf_page_t* bpage,
- buf_flush_t flush_type,
- ulint n_to_flush,
- ulint* count)
-{
- ut_ad(mutex_own(&buf_pool.mutex));
-
- bool flushed;
- BPageMutex* block_mutex = buf_page_get_mutex(bpage);
-
- mutex_enter(block_mutex);
-
- ut_a(buf_page_in_file(bpage));
-
- if (buf_flush_ready_for_flush(bpage, flush_type)) {
- const page_id_t page_id = bpage->id;
-
- mutex_exit(block_mutex);
- mutex_exit(&buf_pool.mutex);
-
- /* Try to flush also all the neighbors */
- *count += buf_flush_try_neighbors(
- page_id, flush_type, *count, n_to_flush);
-
- mutex_enter(&buf_pool.mutex);
- flushed = true;
- } else {
- mutex_exit(block_mutex);
- flushed = false;
- }
-
- ut_ad(mutex_own(&buf_pool.mutex));
-
- return(flushed);
-}
-
/*******************************************************************//**
This utility moves the uncompressed frames of pages to the free list.
Note that this function does not actually flush any data to disk. It
@@ -1642,14 +1441,11 @@ static ulint buf_free_from_unzip_LRU_list_batch(ulint max)
released and reacquired */
++count;
block = UT_LIST_GET_LAST(buf_pool.unzip_LRU);
-
+ free_len = UT_LIST_GET_LEN(buf_pool.free);
+ lru_len = UT_LIST_GET_LEN(buf_pool.unzip_LRU);
} else {
-
block = UT_LIST_GET_PREV(unzip_LRU, block);
}
-
- free_len = UT_LIST_GET_LEN(buf_pool.free);
- lru_len = UT_LIST_GET_LEN(buf_pool.unzip_LRU);
}
ut_ad(mutex_own(&buf_pool.mutex));
@@ -1671,90 +1467,67 @@ The calling thread is not allowed to own any latches on pages!
@param[in] max desired number of blocks to make available
in the free list (best effort; not guaranteed)
@param[out] n counts of flushed and evicted pages */
-static void buf_flush_LRU_list_batch(ulint max, flush_counters_t* n)
+static void buf_flush_LRU_list_batch(ulint max, flush_counters_t *n)
{
- buf_page_t* bpage;
- ulint scanned = 0;
- ulint free_len = UT_LIST_GET_LEN(buf_pool.free);
- ulint lru_len = UT_LIST_GET_LEN(buf_pool.LRU);
- ulint withdraw_depth = 0;
-
- n->flushed = 0;
- n->evicted = 0;
- n->unzip_LRU_evicted = 0;
- ut_ad(mutex_own(&buf_pool.mutex));
- if (buf_pool.curr_size < buf_pool.old_size
- && buf_pool.withdraw_target > 0) {
- withdraw_depth = buf_pool.withdraw_target
- - UT_LIST_GET_LEN(buf_pool.withdraw);
- }
-
- for (bpage = UT_LIST_GET_LAST(buf_pool.LRU);
- bpage != NULL && n->flushed + n->evicted < max
- && free_len < srv_LRU_scan_depth + withdraw_depth
- && lru_len > BUF_LRU_MIN_LEN;
- ++scanned,
- bpage = buf_pool.lru_hp.get()) {
-
- buf_page_t* prev = UT_LIST_GET_PREV(LRU, bpage);
- buf_pool.lru_hp.set(prev);
-
- BPageMutex* block_mutex = buf_page_get_mutex(bpage);
-
- mutex_enter(block_mutex);
-
- if (buf_flush_ready_for_replace(bpage)) {
- /* block is ready for eviction i.e., it is
- clean and is not IO-fixed or buffer fixed. */
- mutex_exit(block_mutex);
- if (buf_LRU_free_page(bpage, true)) {
- ++n->evicted;
- }
- } else if (buf_flush_ready_for_flush(bpage, BUF_FLUSH_LRU)) {
- /* Block is ready for flush. Dispatch an IO
- request. The IO helper thread will put it on
- free list in IO completion routine. */
- mutex_exit(block_mutex);
- buf_flush_page_and_try_neighbors(
- bpage, BUF_FLUSH_LRU, max, &n->flushed);
- } else {
- /* Can't evict or dispatch this block. Go to
- previous. */
- ut_ad(buf_pool.lru_hp.is_hp(prev));
- mutex_exit(block_mutex);
- }
-
- ut_ad(!mutex_own(block_mutex));
- ut_ad(mutex_own(&buf_pool.mutex));
-
- free_len = UT_LIST_GET_LEN(buf_pool.free);
- lru_len = UT_LIST_GET_LEN(buf_pool.LRU);
- }
-
- buf_pool.lru_hp.set(NULL);
-
- /* We keep track of all flushes happening as part of LRU
- flush. When estimating the desired rate at which flush_list
- should be flushed, we factor in this value. */
- buf_lru_flush_page_count += n->flushed;
-
- ut_ad(mutex_own(&buf_pool.mutex));
+ ulint scanned= 0;
+ ulint free_limit= srv_LRU_scan_depth;
+ n->flushed = 0;
+ n->evicted = 0;
+ n->unzip_LRU_evicted = 0;
+ ut_ad(mutex_own(&buf_pool.mutex));
+ if (buf_pool.withdraw_target && buf_pool.curr_size < buf_pool.old_size)
+ free_limit+= buf_pool.withdraw_target - UT_LIST_GET_LEN(buf_pool.withdraw);
+
+ for (buf_page_t *bpage= UT_LIST_GET_LAST(buf_pool.LRU);
+ bpage && n->flushed + n->evicted < max &&
+ UT_LIST_GET_LEN(buf_pool.LRU) > BUF_LRU_MIN_LEN &&
+ UT_LIST_GET_LEN(buf_pool.free) < free_limit;
+ ++scanned, bpage= buf_pool.lru_hp.get())
+ {
+ buf_page_t *prev= UT_LIST_GET_PREV(LRU, bpage);
+ buf_pool.lru_hp.set(prev);
- if (n->evicted) {
- MONITOR_INC_VALUE_CUMULATIVE(
- MONITOR_LRU_BATCH_EVICT_TOTAL_PAGE,
- MONITOR_LRU_BATCH_EVICT_COUNT,
- MONITOR_LRU_BATCH_EVICT_PAGES,
- n->evicted);
- }
+ if (bpage->ready_for_replace())
+ {
+ /* block is ready for eviction i.e., it is clean and is not
+ IO-fixed or buffer fixed. */
+ if (buf_LRU_free_page(bpage, true))
+ ++n->evicted;
+ }
+ else if (bpage->ready_for_flush())
+ {
+ /* Block is ready for flush. Dispatch an IO request. The IO
+ helper thread will put it on free list in IO completion routine. */
+ const page_id_t page_id(bpage->id());
+ mutex_exit(&buf_pool.mutex);
+ n->flushed+= buf_flush_try_neighbors(page_id, IORequest::LRU, n->flushed,
+ max);
+ mutex_enter(&buf_pool.mutex);
+ }
+ else
+ /* Can't evict or dispatch this block. Go to previous. */
+ ut_ad(buf_pool.lru_hp.is_hp(prev));
+ }
- if (scanned) {
- MONITOR_INC_VALUE_CUMULATIVE(
- MONITOR_LRU_BATCH_SCANNED,
- MONITOR_LRU_BATCH_SCANNED_NUM_CALL,
- MONITOR_LRU_BATCH_SCANNED_PER_CALL,
- scanned);
- }
+ buf_pool.lru_hp.set(nullptr);
+
+ /* We keep track of all flushes happening as part of LRU flush. When
+ estimating the desired rate at which flush_list should be flushed,
+ we factor in this value. */
+ buf_lru_flush_page_count+= n->flushed;
+
+ ut_ad(mutex_own(&buf_pool.mutex));
+
+ if (n->evicted)
+ MONITOR_INC_VALUE_CUMULATIVE(MONITOR_LRU_BATCH_EVICT_TOTAL_PAGE,
+ MONITOR_LRU_BATCH_EVICT_COUNT,
+ MONITOR_LRU_BATCH_EVICT_PAGES,
+ n->evicted);
+ if (scanned)
+ MONITOR_INC_VALUE_CUMULATIVE(MONITOR_LRU_BATCH_SCANNED,
+ MONITOR_LRU_BATCH_SCANNED_NUM_CALL,
+ MONITOR_LRU_BATCH_SCANNED_PER_CALL,
+ scanned);
}
/** Flush and move pages from LRU or unzip_LRU list to the free list.
@@ -1785,76 +1558,68 @@ The calling thread is not allowed to own any latches on pages!
not guaranteed that the actual number is that big, though)
@param[in] lsn_limit all blocks whose oldest_modification is smaller
than this should be flushed (if their number does not exceed min_n)
-@return number of blocks for which the write request was queued;
-ULINT_UNDEFINED if there was a flush of the same type already
-running */
+@return number of blocks for which the write request was queued */
static ulint buf_do_flush_list_batch(ulint min_n, lsn_t lsn_limit)
{
- ulint count = 0;
- ulint scanned = 0;
-
- ut_ad(mutex_own(&buf_pool.mutex));
-
- /* Start from the end of the list looking for a suitable
- block to be flushed. */
- mutex_enter(&buf_pool.flush_list_mutex);
- ulint len = UT_LIST_GET_LEN(buf_pool.flush_list);
-
- /* In order not to degenerate this scan to O(n*n) we attempt
- to preserve pointer of previous block in the flush list. To do
- so we declare it a hazard pointer. Any thread working on the
- flush list must check the hazard pointer and if it is removing
- the same block then it must reset it. */
- for (buf_page_t* bpage = UT_LIST_GET_LAST(buf_pool.flush_list);
- count < min_n && bpage != NULL && len > 0
- && bpage->oldest_modification < lsn_limit;
- bpage = buf_pool.flush_hp.get(),
- ++scanned) {
-
- buf_page_t* prev;
-
- ut_a(bpage->oldest_modification > 0);
- ut_ad(bpage->in_flush_list);
-
- prev = UT_LIST_GET_PREV(list, bpage);
- buf_pool.flush_hp.set(prev);
- mutex_exit(&buf_pool.flush_list_mutex);
-
-#ifdef UNIV_DEBUG
- bool flushed =
-#endif /* UNIV_DEBUG */
- buf_flush_page_and_try_neighbors(
- bpage, BUF_FLUSH_LIST, min_n, &count);
-
- mutex_enter(&buf_pool.flush_list_mutex);
-
- ut_ad(flushed || buf_pool.flush_hp.is_hp(prev));
-
- --len;
- }
+ ulint count= 0;
+ ulint scanned= 0;
+
+ ut_ad(mutex_own(&buf_pool.mutex));
+
+ /* Start from the end of the list looking for a suitable block to be
+ flushed. */
+ mutex_enter(&buf_pool.flush_list_mutex);
+ ulint len = UT_LIST_GET_LEN(buf_pool.flush_list);
+
+ /* In order not to degenerate this scan to O(n*n) we attempt to
+ preserve pointer of previous block in the flush list. To do so we
+ declare it a hazard pointer. Any thread working on the flush list
+ must check the hazard pointer and if it is removing the same block
+ then it must reset it. */
+ for (buf_page_t *bpage= UT_LIST_GET_LAST(buf_pool.flush_list);
+ bpage && len && count < min_n;
+ bpage= buf_pool.flush_hp.get(), ++scanned, len--)
+ {
+ const lsn_t oldest_modification= bpage->oldest_modification();
+ if (oldest_modification >= lsn_limit)
+ break;
+ ut_a(oldest_modification);
- buf_pool.flush_hp.set(NULL);
- mutex_exit(&buf_pool.flush_list_mutex);
+ buf_page_t *prev= UT_LIST_GET_PREV(list, bpage);
+ buf_pool.flush_hp.set(prev);
+ mutex_exit(&buf_pool.flush_list_mutex);
- if (scanned) {
- MONITOR_INC_VALUE_CUMULATIVE(
- MONITOR_FLUSH_BATCH_SCANNED,
- MONITOR_FLUSH_BATCH_SCANNED_NUM_CALL,
- MONITOR_FLUSH_BATCH_SCANNED_PER_CALL,
- scanned);
- }
+ ut_ad(bpage->in_file());
+ const bool flushed= bpage->ready_for_flush();
- if (count) {
- MONITOR_INC_VALUE_CUMULATIVE(
- MONITOR_FLUSH_BATCH_TOTAL_PAGE,
- MONITOR_FLUSH_BATCH_COUNT,
- MONITOR_FLUSH_BATCH_PAGES,
- count);
- }
+ if (flushed)
+ {
+ const page_id_t page_id(bpage->id());
+ mutex_exit(&buf_pool.mutex);
+ count+= buf_flush_try_neighbors(page_id, IORequest::FLUSH_LIST,
+ count, min_n);
+ mutex_enter(&buf_pool.mutex);
+ }
- ut_ad(mutex_own(&buf_pool.mutex));
+ mutex_enter(&buf_pool.flush_list_mutex);
+ ut_ad(flushed || buf_pool.flush_hp.is_hp(prev));
+ }
- return(count);
+ buf_pool.flush_hp.set(nullptr);
+ mutex_exit(&buf_pool.flush_list_mutex);
+
+ if (scanned)
+ MONITOR_INC_VALUE_CUMULATIVE(MONITOR_FLUSH_BATCH_SCANNED,
+ MONITOR_FLUSH_BATCH_SCANNED_NUM_CALL,
+ MONITOR_FLUSH_BATCH_SCANNED_PER_CALL,
+ scanned);
+ if (count)
+ MONITOR_INC_VALUE_CUMULATIVE(MONITOR_FLUSH_BATCH_TOTAL_PAGE,
+ MONITOR_FLUSH_BATCH_COUNT,
+ MONITOR_FLUSH_BATCH_PAGES,
+ count);
+ ut_ad(mutex_own(&buf_pool.mutex));
+ return count;
}
/** This utility flushes dirty blocks from the end of the LRU list or
@@ -1863,45 +1628,39 @@ NOTE 1: in the case of an LRU flush the calling thread may own latches to
pages: to avoid deadlocks, this function must be written so that it cannot
end up waiting for these latches! NOTE 2: in the case of a flush list flush,
the calling thread is not allowed to own any latches on pages!
-@param[in] flush_type BUF_FLUSH_LRU or BUF_FLUSH_LIST; if
-BUF_FLUSH_LIST, then the caller must not own any latches on pages
+@param[in] lru true=LRU; false=FLUSH_LIST;
+if !lru, then the caller must not own any latches on pages
@param[in] min_n wished minimum mumber of blocks flushed (it is
not guaranteed that the actual number is that big, though)
-@param[in] lsn_limit in the case of BUF_FLUSH_LIST all blocks whose
+@param[in] lsn_limit in the case of !lru all blocks whose
@param[out] n counts of flushed and evicted pages
oldest_modification is smaller than this should be flushed (if their number
does not exceed min_n), otherwise ignored */
static
void
buf_flush_batch(
- buf_flush_t flush_type,
+ bool lru,
ulint min_n,
lsn_t lsn_limit,
flush_counters_t* n)
{
- ut_ad(flush_type == BUF_FLUSH_LRU || flush_type == BUF_FLUSH_LIST);
- ut_ad(flush_type == BUF_FLUSH_LRU
- || !sync_check_iterate(dict_sync_check()));
+ ut_ad(lru || !sync_check_iterate(dict_sync_check()));
mutex_enter(&buf_pool.mutex);
/* Note: The buffer pool mutex is released and reacquired within
the flush functions. */
- switch (flush_type) {
- case BUF_FLUSH_LRU:
+ if (lru) {
buf_do_LRU_batch(min_n, n);
- break;
- case BUF_FLUSH_LIST:
+ } else {
n->flushed = buf_do_flush_list_batch(min_n, lsn_limit);
n->evicted = 0;
- break;
- default:
- ut_error;
}
mutex_exit(&buf_pool.mutex);
- DBUG_LOG("ib_buf", "flush " << flush_type << " completed");
+ DBUG_PRINT("ib_buf",
+ (lru ? "LRU flush completed" : "flush_list completed"));
}
/******************************************************************//**
@@ -1925,101 +1684,84 @@ buf_flush_stats(
}
/** Start a buffer flush batch for LRU or flush list
-@param[in] flush_type BUF_FLUSH_LRU or BUF_FLUSH_LIST
+@param[in] lru true=buf_pool.LRU; false=buf_pool.flush_list
@return whether the flush batch was started (was not already running) */
-static bool buf_flush_start(buf_flush_t flush_type)
+static bool buf_flush_start(bool lru)
{
- ut_ad(flush_type == BUF_FLUSH_LRU || flush_type == BUF_FLUSH_LIST);
-
- mutex_enter(&buf_pool.mutex);
-
- if (buf_pool.n_flush[flush_type] > 0
- || buf_pool.init_flush[flush_type] == TRUE) {
-
- /* There is already a flush batch of the same type running */
-
- mutex_exit(&buf_pool.mutex);
-
- return(false);
- }
-
- buf_pool.init_flush[flush_type] = TRUE;
-
- os_event_reset(buf_pool.no_flush[flush_type]);
+ IORequest::flush_t flush_type= lru ? IORequest::LRU : IORequest::FLUSH_LIST;
+ mutex_enter(&buf_pool.mutex);
- mutex_exit(&buf_pool.mutex);
+ if (buf_pool.n_flush[flush_type] > 0 || buf_pool.init_flush[flush_type])
+ {
+ /* There is already a flush batch of the same type running */
+ mutex_exit(&buf_pool.mutex);
+ return false;
+ }
- return(true);
+ buf_pool.init_flush[flush_type]= true;
+ os_event_reset(buf_pool.no_flush[flush_type]);
+ mutex_exit(&buf_pool.mutex);
+ return true;
}
/** End a buffer flush batch.
-@param[in] flush_type BUF_FLUSH_LRU or BUF_FLUSH_LIST */
-static void buf_flush_end(buf_flush_t flush_type)
+@param[in] lru true=buf_pool.LRU; false=buf_pool.flush_list */
+static void buf_flush_end(bool lru)
{
- mutex_enter(&buf_pool.mutex);
+ IORequest::flush_t flush_type= lru ? IORequest::LRU : IORequest::FLUSH_LIST;
- buf_pool.init_flush[flush_type] = FALSE;
-
- buf_pool.try_LRU_scan = TRUE;
-
- if (buf_pool.n_flush[flush_type] == 0) {
+ mutex_enter(&buf_pool.mutex);
- /* The running flush batch has ended */
+ buf_pool.init_flush[flush_type]= false;
+ buf_pool.try_LRU_scan= true;
- os_event_set(buf_pool.no_flush[flush_type]);
- }
+ if (!buf_pool.n_flush[flush_type])
+ /* The running flush batch has ended */
+ os_event_set(buf_pool.no_flush[flush_type]);
- mutex_exit(&buf_pool.mutex);
+ mutex_exit(&buf_pool.mutex);
- if (!srv_read_only_mode) {
- buf_dblwr_flush_buffered_writes();
- }
+ if (!srv_read_only_mode)
+ buf_dblwr_flush_buffered_writes();
}
/** Wait until a flush batch ends.
-@param[in] type BUF_FLUSH_LRU or BUF_FLUSH_LIST */
-void buf_flush_wait_batch_end(buf_flush_t type)
+@param[in] lru true=buf_pool.LRU; false=buf_pool.flush_list */
+void buf_flush_wait_batch_end(bool lru)
{
- ut_ad(type == BUF_FLUSH_LRU || type == BUF_FLUSH_LIST);
- thd_wait_begin(NULL, THD_WAIT_DISKIO);
- os_event_wait(buf_pool.no_flush[type]);
- thd_wait_end(NULL);
+ thd_wait_begin(nullptr, THD_WAIT_DISKIO);
+ os_event_wait(buf_pool.no_flush[lru
+ ? IORequest::LRU : IORequest::FLUSH_LIST]);
+ thd_wait_end(nullptr);
}
/** Do flushing batch of a given type.
NOTE: The calling thread is not allowed to own any latches on pages!
-@param[in] type flush type
+@param[in] lru true=buf_pool.LRU; false=buf_pool.flush_list
@param[in] min_n wished minimum mumber of blocks flushed
(it is not guaranteed that the actual number is that big, though)
-@param[in] lsn_limit in the case BUF_FLUSH_LIST all blocks whose
+@param[in] lsn_limit if !lru, all blocks whose
oldest_modification is smaller than this should be flushed (if their number
does not exceed min_n), otherwise ignored
@param[out] n_processed the number of pages which were processed is
passed back to caller. Ignored if NULL
@retval true if a batch was queued successfully.
@retval false if another batch of same type was already running. */
-bool
-buf_flush_do_batch(
- buf_flush_t type,
- ulint min_n,
- lsn_t lsn_limit,
- flush_counters_t* n)
+bool buf_flush_do_batch(bool lru, ulint min_n, lsn_t lsn_limit,
+ flush_counters_t *n)
{
- ut_ad(type == BUF_FLUSH_LRU || type == BUF_FLUSH_LIST);
-
- if (n != NULL) {
- n->flushed = 0;
- }
+ if (n)
+ n->flushed= 0;
- if (!buf_flush_start(type)) {
- return(false);
- }
+ if (!buf_flush_start(lru))
+ return false;
- buf_flush_batch(type, min_n, lsn_limit, n);
- buf_flush_end(type);
+ buf_flush_batch(lru, min_n, lsn_limit, n);
+ buf_flush_end(lru);
- return(true);
+ return true;
}
+
/** Wait until a flush batch of the given lsn ends
@param[in] new_oldest target oldest_modified_lsn to wait for */
void buf_flush_wait_flushed(lsn_t new_oldest)
@@ -2032,18 +1774,17 @@ void buf_flush_wait_flushed(lsn_t new_oldest)
mutex_enter(&buf_pool.flush_list_mutex);
buf_page_t* bpage;
-
/* FIXME: Keep temporary tablespace pages in a separate flush
list. We would only need to write out temporary pages if the
page is about to be evicted from the buffer pool, and the page
contents is still needed (the page has not been freed). */
for (bpage = UT_LIST_GET_LAST(buf_pool.flush_list);
- bpage && fsp_is_system_temporary(bpage->id.space());
+ bpage && fsp_is_system_temporary(bpage->id().space());
bpage = UT_LIST_GET_PREV(list, bpage)) {
- ut_ad(bpage->in_flush_list);
+ ut_ad(bpage->oldest_modification());
}
- lsn_t oldest = bpage ? bpage->oldest_modification : 0;
+ lsn_t oldest = bpage ? bpage->oldest_modification() : 0;
mutex_exit(&buf_pool.flush_list_mutex);
@@ -2062,7 +1803,7 @@ void buf_flush_wait_flushed(lsn_t new_oldest)
NOTE: The calling thread is not allowed to own any latches on pages!
@param[in] min_n wished minimum mumber of blocks flushed (it is
not guaranteed that the actual number is that big, though)
-@param[in] lsn_limit in the case BUF_FLUSH_LIST all blocks whose
+@param[in] lsn_limit all blocks whose
oldest_modification is smaller than this should be flushed (if their number
does not exceed min_n), otherwise ignored
@param[out] n_processed the number of pages which were processed is
@@ -2073,8 +1814,7 @@ bool buf_flush_lists(ulint min_n, lsn_t lsn_limit, ulint *n_processed)
{
flush_counters_t n;
- bool success = buf_flush_do_batch(
- BUF_FLUSH_LIST, min_n, lsn_limit, &n);
+ bool success = buf_flush_do_batch(false, min_n, lsn_limit, &n);
if (n.flushed) {
buf_flush_stats(n.flushed, 0);
@@ -2097,68 +1837,49 @@ is not fast enough to keep pace with the workload.
@return true if success. */
bool buf_flush_single_page_from_LRU()
{
- ulint scanned;
- buf_page_t* bpage;
- ibool freed;
+ ulint scanned = 0;
+ bool freed = false;
mutex_enter(&buf_pool.mutex);
- for (bpage = buf_pool.single_scan_itr.start(), scanned = 0,
- freed = false;
- bpage != NULL;
+ for (buf_page_t* bpage = buf_pool.single_scan_itr.start(); bpage;
++scanned, bpage = buf_pool.single_scan_itr.get()) {
ut_ad(mutex_own(&buf_pool.mutex));
buf_page_t* prev = UT_LIST_GET_PREV(LRU, bpage);
buf_pool.single_scan_itr.set(prev);
- BPageMutex* block_mutex;
-
- block_mutex = buf_page_get_mutex(bpage);
- mutex_enter(block_mutex);
+ if (!bpage->ready_for_flush()) { // FIXME: ready_for_replace()
+ continue;
+ }
- if (buf_flush_ready_for_replace(bpage)) {
+ if (!bpage->buf_fix_count()
+ && buf_LRU_free_page(bpage, true)) {
/* block is ready for eviction i.e., it is
clean and is not IO-fixed or buffer fixed. */
- mutex_exit(block_mutex);
-
- if (buf_LRU_free_page(bpage, true)) {
- mutex_exit(&buf_pool.mutex);
- freed = true;
- break;
- }
-
- } else if (buf_flush_ready_for_flush(
- bpage, BUF_FLUSH_SINGLE_PAGE)) {
-
+ freed = true;
+ break;
+ } else {
/* Block is ready for flush. Try and dispatch an IO
request. We'll put it on free list in IO completion
routine if it is not buffer fixed. The following call
- will release the buffer pool and block mutex.
+ will release the buf_pool.mutex.
Note: There is no guarantee that this page has actually
been freed, only that it has been flushed to disk */
- freed = buf_flush_page(bpage, BUF_FLUSH_SINGLE_PAGE,
- true);
+ freed = buf_flush_page(bpage, IORequest::SINGLE_PAGE,
+ nullptr, true);
if (freed) {
- break;
+ goto found;
}
-
- mutex_exit(block_mutex);
- } else {
- mutex_exit(block_mutex);
}
- ut_ad(!mutex_own(block_mutex));
- }
- if (!freed) {
- /* Can't find a single flushable page. */
- ut_ad(!bpage);
- mutex_exit(&buf_pool.mutex);
}
+ mutex_exit(&buf_pool.mutex);
+found:
if (scanned) {
MONITOR_INC_VALUE_CUMULATIVE(
MONITOR_LRU_SINGLE_FLUSH_SCANNED,
@@ -2207,7 +1928,7 @@ static ulint buf_flush_LRU_list()
that can trigger an LRU flush at the same time.
So, it is not possible that a batch triggered during
last iteration is still running, */
- buf_flush_do_batch(BUF_FLUSH_LRU, scan_depth, 0, &n);
+ buf_flush_do_batch(true, scan_depth, 0, &n);
return(n.flushed);
}
@@ -2215,13 +1936,12 @@ static ulint buf_flush_LRU_list()
/** Wait for any possible LRU flushes to complete. */
void buf_flush_wait_LRU_batch_end()
{
- mutex_enter(&buf_pool.mutex);
- bool wait = buf_pool.n_flush[BUF_FLUSH_LRU]
- || buf_pool.init_flush[BUF_FLUSH_LRU];
- mutex_exit(&buf_pool.mutex);
- if (wait) {
- buf_flush_wait_batch_end(BUF_FLUSH_LRU);
- }
+ mutex_enter(&buf_pool.mutex);
+ bool wait= buf_pool.n_flush[IORequest::LRU] ||
+ buf_pool.init_flush[IORequest::LRU];
+ mutex_exit(&buf_pool.mutex);
+ if (wait)
+ buf_flush_wait_batch_end(true);
}
/*********************************************************************//**
@@ -2461,7 +2181,7 @@ page_cleaner_flush_pages_recommendation(ulint last_pages_in)
for (buf_page_t* b = UT_LIST_GET_LAST(buf_pool.flush_list);
b != NULL;
b = UT_LIST_GET_PREV(list, b)) {
- if (b->oldest_modification > target_lsn) {
+ if (b->oldest_modification() > target_lsn) {
break;
}
++pages_for_lsn;
@@ -2559,7 +2279,7 @@ pc_sleep_if_needed(
Requests for all slots to flush.
@param min_n wished minimum mumber of blocks flushed
(it is not guaranteed that the actual number is that big)
-@param lsn_limit in the case BUF_FLUSH_LIST all blocks whose
+@param lsn_limit in the case of buf_pool.flush_list all blocks whose
oldest_modification is smaller than this should be flushed
(if their number does not exceed min_n), otherwise ignored
*/
@@ -2638,7 +2358,7 @@ static ulint pc_flush_slot()
list_tm = ut_time_ms();
page_cleaner.slot.succeeded_list = buf_flush_do_batch(
- BUF_FLUSH_LIST,
+ false,
page_cleaner.slot.n_pages_requested,
page_cleaner.lsn_limit,
&n);
@@ -2790,25 +2510,18 @@ static os_thread_ret_t DECLARE_THREAD(buf_flush_page_cleaner)(void*)
break;
}
- switch (recv_sys.flush_type) {
- case BUF_FLUSH_LRU:
+ if (recv_sys.flush_lru) {
/* Flush pages from end of LRU if required */
pc_request(0, LSN_MAX);
while (pc_flush_slot() > 0) {}
pc_wait_finished(&n_flushed_lru, &n_flushed_list);
- break;
-
- case BUF_FLUSH_LIST:
+ } else {
/* Flush all pages */
do {
pc_request(ULINT_MAX, LSN_MAX);
while (pc_flush_slot() > 0) {}
} while (!pc_wait_finished(&n_flushed_lru,
&n_flushed_list));
- break;
-
- default:
- ut_ad(0);
}
os_event_reset(recv_sys.flush_start);
@@ -3064,7 +2777,7 @@ static os_thread_ret_t DECLARE_THREAD(buf_flush_page_cleaner)(void*)
considering end of that batch as a finish of our final
sweep and we'll come out of the loop leaving behind dirty pages
in the flush_list */
- buf_flush_wait_batch_end(BUF_FLUSH_LIST);
+ buf_flush_wait_batch_end(false);
buf_flush_wait_LRU_batch_end();
bool success;
@@ -3080,7 +2793,7 @@ static os_thread_ret_t DECLARE_THREAD(buf_flush_page_cleaner)(void*)
n_flushed = n_flushed_lru + n_flushed_list;
- buf_flush_wait_batch_end(BUF_FLUSH_LIST);
+ buf_flush_wait_batch_end(false);
buf_flush_wait_LRU_batch_end();
} while (!success || n_flushed > 0);
@@ -3136,7 +2849,7 @@ void buf_flush_sync()
bool success;
do {
success = buf_flush_lists(ULINT_MAX, LSN_MAX, NULL);
- buf_flush_wait_batch_end(BUF_FLUSH_LIST);
+ buf_flush_wait_batch_end(false);
} while (!success);
}
@@ -3155,13 +2868,13 @@ void buf_flush_request_force(lsn_t lsn_limit)
os_event_set(buf_flush_event);
}
-#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
+#ifdef UNIV_DEBUG
/** Functor to validate the flush list. */
struct Check {
void operator()(const buf_page_t* elem) const
{
- ut_a(elem->in_flush_list);
+ ut_a(elem->oldest_modification());
}
};
@@ -3185,17 +2898,15 @@ static void buf_flush_validate_low()
}
while (bpage != NULL) {
- const lsn_t om = bpage->oldest_modification;
- ut_ad(bpage->in_flush_list);
-
+ const lsn_t om = bpage->oldest_modification();
/* A page in buf_pool.flush_list can be in
BUF_BLOCK_REMOVE_HASH state. This happens when a page
is in the middle of being relocated. In that case the
original descriptor can have this state and still be
in the flush list waiting to acquire the
buf_pool.flush_list_mutex to complete the relocation. */
- ut_a(buf_page_in_file(bpage)
- || buf_page_get_state(bpage) == BUF_BLOCK_REMOVE_HASH);
+ ut_a(bpage->in_file()
+ || bpage->state() == BUF_BLOCK_REMOVE_HASH);
ut_a(om > 0);
if (UNIV_LIKELY_NULL(buf_pool.flush_rbt)) {
@@ -3211,7 +2922,7 @@ static void buf_flush_validate_low()
bpage = UT_LIST_GET_NEXT(list, bpage);
- ut_a(bpage == NULL || om >= bpage->oldest_modification);
+ ut_a(!bpage || om >= bpage->oldest_modification());
}
/* By this time we must have exhausted the traversal of
@@ -3226,5 +2937,4 @@ void buf_flush_validate()
buf_flush_validate_low();
mutex_exit(&buf_pool.flush_list_mutex);
}
-
-#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
+#endif /* UNIV_DEBUG */