summaryrefslogtreecommitdiff
path: root/storage/innobase/buf
diff options
context:
space:
mode:
authorMarko Mäkelä <marko.makela@mariadb.com>2021-06-23 13:13:11 +0300
committerMarko Mäkelä <marko.makela@mariadb.com>2021-06-23 19:06:52 +0300
commit22b62edaedddb1cabd5b855cdd39a5e90a5695a2 (patch)
tree63d48ea70a63e6a54895b967eebec0db81182250 /storage/innobase/buf
parent8af538979bc9e320b0d7015dc36332e442376bbd (diff)
downloadmariadb-git-22b62edaedddb1cabd5b855cdd39a5e90a5695a2.tar.gz
MDEV-25113: Make page flushing faster
buf_page_write_complete(): Reduce the buf_pool.mutex hold time, and do not acquire buf_pool.flush_list_mutex at all. Instead, mark blocks clean by setting oldest_modification to 1. Dirty pages of temporary tables will be identified by the special value 2 instead of the previous special value 1. (By design of the ib_logfile0 format, actual LSN values smaller than 2048 are not possible.) buf_LRU_free_page(), buf_pool_t::get_oldest_modification() and many other functions will remove the garbage (clean blocks) from buf_pool.flush_list while holding buf_pool.flush_list_mutex. buf_pool_t::n_flush_LRU, buf_pool_t::n_flush_list: Replaced with non-atomic variables, protected by buf_pool.mutex, to avoid unnecessary synchronization when modifying the counts. export_vars: Remove unnecessary indirection for innodb_pages_created, innodb_pages_read, innodb_pages_written.
Diffstat (limited to 'storage/innobase/buf')
-rw-r--r--storage/innobase/buf/buf0buf.cc27
-rw-r--r--storage/innobase/buf/buf0flu.cc331
-rw-r--r--storage/innobase/buf/buf0lru.cc25
3 files changed, 209 insertions, 174 deletions
diff --git a/storage/innobase/buf/buf0buf.cc b/storage/innobase/buf/buf0buf.cc
index 0ba56c346aa..b0997c43847 100644
--- a/storage/innobase/buf/buf0buf.cc
+++ b/storage/innobase/buf/buf0buf.cc
@@ -1346,13 +1346,15 @@ inline const buf_block_t *buf_pool_t::chunk_t::not_freed() const
break;
}
+ const lsn_t lsn= block->page.oldest_modification();
+
if (fsp_is_system_temporary(block->page.id().space()))
{
- ut_ad(block->page.oldest_modification() <= 1);
+ ut_ad(lsn == 0 || lsn == 2);
break;
}
- if (!block->page.ready_for_replace())
+ if (lsn > 1 || !block->page.can_relocate())
return block;
break;
@@ -1509,9 +1511,9 @@ void buf_pool_t::close()
Only on aborted startup (with recovery) or with innodb_fast_shutdown=2
we may discard changes. */
ut_d(const lsn_t oldest= bpage->oldest_modification();)
- ut_ad(!oldest || srv_is_being_started ||
- srv_fast_shutdown == 2 ||
- (oldest == 1 && fsp_is_system_temporary(bpage->id().space())));
+ ut_ad(fsp_is_system_temporary(bpage->id().space())
+ ? (oldest == 0 || oldest == 2)
+ : oldest <= 1 || srv_is_being_started || srv_fast_shutdown == 2);
if (bpage->state() != BUF_BLOCK_FILE_PAGE)
buf_page_free_descriptor(bpage);
@@ -3323,6 +3325,8 @@ re_evict:
mysql_mutex_unlock(&buf_pool.mutex);
buf_flush_list();
buf_flush_wait_batch_end_acquiring_mutex(false);
+ while (buf_flush_list_space(space));
+ os_aio_wait_until_no_pending_writes();
if (fix_block->page.buf_fix_count() == 1
&& !fix_block->page.oldest_modification()) {
@@ -4438,8 +4442,8 @@ void buf_pool_t::print()
<< UT_LIST_GET_LEN(flush_list)
<< ", n pending decompressions=" << n_pend_unzip
<< ", n pending reads=" << n_pend_reads
- << ", n pending flush LRU=" << n_flush_LRU
- << " list=" << n_flush_list
+ << ", n pending flush LRU=" << n_flush_LRU_
+ << " list=" << n_flush_list_
<< ", pages made young=" << stat.n_pages_made_young
<< ", not young=" << stat.n_pages_not_made_young
<< ", pages read=" << stat.n_pages_read
@@ -4538,7 +4542,6 @@ void buf_stats_get_pool_info(buf_pool_info_t *pool_info)
double time_elapsed;
mysql_mutex_lock(&buf_pool.mutex);
- mysql_mutex_lock(&buf_pool.flush_list_mutex);
pool_info->pool_size = buf_pool.curr_size;
@@ -4548,17 +4551,17 @@ void buf_stats_get_pool_info(buf_pool_info_t *pool_info)
pool_info->free_list_len = UT_LIST_GET_LEN(buf_pool.free);
+ mysql_mutex_lock(&buf_pool.flush_list_mutex);
pool_info->flush_list_len = UT_LIST_GET_LEN(buf_pool.flush_list);
pool_info->n_pend_unzip = UT_LIST_GET_LEN(buf_pool.unzip_LRU);
+ mysql_mutex_unlock(&buf_pool.flush_list_mutex);
pool_info->n_pend_reads = buf_pool.n_pend_reads;
- pool_info->n_pending_flush_lru = buf_pool.n_flush_LRU;
-
- pool_info->n_pending_flush_list = buf_pool.n_flush_list;
+ pool_info->n_pending_flush_lru = buf_pool.n_flush_LRU_;
- mysql_mutex_unlock(&buf_pool.flush_list_mutex);
+ pool_info->n_pending_flush_list = buf_pool.n_flush_list_;
current_time = time(NULL);
time_elapsed = 0.001 + difftime(current_time,
diff --git a/storage/innobase/buf/buf0flu.cc b/storage/innobase/buf/buf0flu.cc
index 3978f624e11..aa92ddfcec8 100644
--- a/storage/innobase/buf/buf0flu.cc
+++ b/storage/innobase/buf/buf0flu.cc
@@ -128,7 +128,7 @@ inline void buf_pool_t::page_cleaner_wakeup()
double(UT_LIST_GET_LEN(buf_pool.LRU) + UT_LIST_GET_LEN(buf_pool.free));
double pct_lwm= srv_max_dirty_pages_pct_lwm;
- /* if pct_lwm != 0.0 means adpative flushing is enabled.
+ /* if pct_lwm != 0.0, adaptive flushing is enabled.
signal buf page cleaner thread
- if pct_lwm <= dirty_pct then it will invoke apdative flushing flow
- if pct_lwm > dirty_pct then it will invoke idle flushing flow.
@@ -162,53 +162,58 @@ inline void buf_pool_t::page_cleaner_wakeup()
}
}
+inline void buf_pool_t::delete_from_flush_list_low(buf_page_t *bpage)
+{
+ ut_ad(!fsp_is_system_temporary(bpage->id().space()));
+ mysql_mutex_assert_owner(&flush_list_mutex);
+ flush_hp.adjust(bpage);
+ UT_LIST_REMOVE(flush_list, bpage);
+}
+
/** Insert a modified block into the flush list.
-@param[in,out] block modified block
-@param[in] lsn oldest modification */
-void buf_flush_insert_into_flush_list(buf_block_t* block, lsn_t lsn)
+@param block modified block
+@param lsn start LSN of the mini-transaction that modified the block */
+void buf_pool_t::insert_into_flush_list(buf_block_t *block, lsn_t lsn)
{
- mysql_mutex_assert_not_owner(&buf_pool.mutex);
- mysql_mutex_assert_owner(&log_sys.flush_order_mutex);
- ut_ad(lsn);
- ut_ad(!fsp_is_system_temporary(block->page.id().space()));
+ mysql_mutex_assert_not_owner(&mutex);
+ mysql_mutex_assert_owner(&log_sys.flush_order_mutex);
+ ut_ad(lsn > 2);
+ ut_ad(!fsp_is_system_temporary(block->page.id().space()));
- mysql_mutex_lock(&buf_pool.flush_list_mutex);
- block->page.set_oldest_modification(lsn);
- MEM_CHECK_DEFINED(block->page.zip.data
- ? block->page.zip.data : block->frame,
- block->physical_size());
- buf_pool.stat.flush_list_bytes += block->physical_size();
- ut_ad(buf_pool.stat.flush_list_bytes <= buf_pool.curr_pool_size);
-
- UT_LIST_ADD_FIRST(buf_pool.flush_list, &block->page);
- ut_d(buf_flush_validate_skip());
- buf_pool.page_cleaner_wakeup();
- mysql_mutex_unlock(&buf_pool.flush_list_mutex);
+ mysql_mutex_lock(&flush_list_mutex);
+ if (ut_d(const lsn_t old=) block->page.oldest_modification())
+ {
+ ut_ad(old == 1);
+ delete_from_flush_list_low(&block->page);
+ }
+ else
+ stat.flush_list_bytes+= block->physical_size();
+ ut_ad(stat.flush_list_bytes <= curr_pool_size);
+
+ block->page.set_oldest_modification(lsn);
+ MEM_CHECK_DEFINED(block->page.zip.data
+ ? block->page.zip.data : block->frame,
+ block->physical_size());
+ UT_LIST_ADD_FIRST(flush_list, &block->page);
+ ut_d(buf_flush_validate_skip());
+ page_cleaner_wakeup();
+ mysql_mutex_unlock(&flush_list_mutex);
}
-/** Remove a block from buf_pool.flush_list */
-static void buf_flush_remove_low(buf_page_t *bpage)
+/** Remove a block from flush_list.
+@param bpage buffer pool page
+@param clear whether to invoke buf_page_t::clear_oldest_modification() */
+void buf_pool_t::delete_from_flush_list(buf_page_t *bpage, bool clear)
{
- ut_ad(!fsp_is_system_temporary(bpage->id().space()));
- mysql_mutex_assert_owner(&buf_pool.mutex);
- mysql_mutex_assert_owner(&buf_pool.flush_list_mutex);
- ut_ad(!bpage->oldest_modification());
- buf_pool.flush_hp.adjust(bpage);
- UT_LIST_REMOVE(buf_pool.flush_list, bpage);
- buf_pool.stat.flush_list_bytes -= bpage->physical_size();
+ if (clear)
+ bpage->clear_oldest_modification();
+ delete_from_flush_list_low(bpage);
+ stat.flush_list_bytes-= bpage->physical_size();
#ifdef UNIV_DEBUG
buf_flush_validate_skip();
#endif /* UNIV_DEBUG */
}
-/** Remove a block from the flush list of modified blocks.
-@param[in,out] bpage block to be removed from the flush list */
-static void buf_flush_remove(buf_page_t *bpage)
-{
- bpage->clear_oldest_modification();
- buf_flush_remove_low(bpage);
-}
-
/** Remove all dirty pages belonging to a given tablespace when we are
deleting the data file of that tablespace.
The pages still remain a part of LRU and are evicted from
@@ -239,7 +244,7 @@ void buf_flush_remove_pages(ulint id)
else if (bpage->io_fix() != BUF_IO_NONE)
deferred= true;
else
- buf_flush_remove(bpage);
+ buf_pool.delete_from_flush_list(bpage);
bpage= prev;
}
@@ -281,31 +286,43 @@ buf_flush_relocate_on_flush_list(
mysql_mutex_assert_owner(&buf_pool.mutex);
ut_ad(!fsp_is_system_temporary(bpage->id().space()));
- if (!bpage->oldest_modification()) {
+ const lsn_t lsn = bpage->oldest_modification();
+
+ if (!lsn) {
return;
}
+ ut_ad(lsn == 1 || lsn > 2);
+
mysql_mutex_lock(&buf_pool.flush_list_mutex);
- /* FIXME: At this point we have both buf_pool and flush_list
- mutexes. Theoretically removal of a block from flush list is
- only covered by flush_list mutex but currently we do
- have buf_pool mutex in buf_flush_remove() therefore this block
- is guaranteed to be in the flush list. We need to check if
- this will work without the assumption of block removing code
- having the buf_pool mutex. */
- ut_ad(dpage->oldest_modification());
+ /* FIXME: Can we avoid holding buf_pool.mutex here? */
+ ut_ad(dpage->oldest_modification() == lsn);
- /* Important that we adjust the hazard pointer before removing
- the bpage from the flush list. */
- buf_pool.flush_hp.adjust(bpage);
+ if (const lsn_t o_lsn = bpage->oldest_modification()) {
+ ut_ad(o_lsn == lsn);
- bpage->clear_oldest_modification();
+ /* Important that we adjust the hazard pointer before removing
+ the bpage from the flush list. */
+ buf_pool.flush_hp.adjust(bpage);
- prev = UT_LIST_GET_PREV(list, bpage);
- UT_LIST_REMOVE(buf_pool.flush_list, bpage);
+ bpage->clear_oldest_modification();
- if (prev) {
+ prev = UT_LIST_GET_PREV(list, bpage);
+ UT_LIST_REMOVE(buf_pool.flush_list, bpage);
+ } else {
+ /* bpage was removed from buf_pool.flush_list
+ since we last checked, and before we acquired
+ buf_pool.flush_list_mutex. */
+ dpage->list.prev = nullptr;
+ dpage->list.next = nullptr;
+ goto was_clean;
+ }
+
+ if (lsn == 1) {
+was_clean:
+ dpage->clear_oldest_modification();
+ } else if (prev) {
ut_ad(prev->oldest_modification());
UT_LIST_INSERT_AFTER(buf_pool.flush_list, prev, dpage);
} else {
@@ -326,25 +343,24 @@ void buf_page_write_complete(const IORequest &request)
buf_page_t *bpage= request.bpage;
ut_ad(bpage);
ut_ad(bpage->in_file());
+ /* bpage->io_fix() can only be changed by buf_page_write_complete()
+ and buf_page_read_complete() from BUF_IO_READ or BUF_IO_WRITE */
ut_ad(bpage->io_fix() == BUF_IO_WRITE);
ut_ad(!buf_dblwr.is_inside(bpage->id()));
- bool dblwr;
+ ut_ad(request.node->space->id == bpage->id().space());
+
if (bpage->status == buf_page_t::INIT_ON_FLUSH)
- {
bpage->status= buf_page_t::NORMAL;
- dblwr= false;
- }
else
{
ut_ad(bpage->status == buf_page_t::NORMAL);
- dblwr= request.node->space->use_doublewrite();
+ if (request.node->space->use_doublewrite())
+ {
+ ut_ad(request.node->space != fil_system.temp_space);
+ buf_dblwr.write_completed();
+ }
}
- /* We do not need protect io_fix here by mutex to read it because
- this and buf_page_read_complete() are the only functions where we can
- change the value from BUF_IO_READ or BUF_IO_WRITE to some other
- value, and our code ensures that this is the only thread that handles
- the i/o for this block. */
if (bpage->slot)
{
bpage->slot->release();
@@ -355,27 +371,16 @@ void buf_page_write_complete(const IORequest &request)
buf_page_monitor(bpage, BUF_IO_WRITE);
DBUG_PRINT("ib_buf", ("write page %u:%u",
bpage->id().space(), bpage->id().page_no()));
- ut_ad(request.is_LRU() ? buf_pool.n_flush_LRU : buf_pool.n_flush_list);
const bool temp= fsp_is_system_temporary(bpage->id().space());
mysql_mutex_lock(&buf_pool.mutex);
+ buf_pool.stat.n_pages_written++;
+ /* While we do not need any mutex for clearing oldest_modification
+ here, we hope that it will be in the same cache line with io_fix,
+ whose changes must be protected by buf_pool.mutex. */
+ bpage->clear_oldest_modification(temp);
+ ut_ad(bpage->io_fix() == BUF_IO_WRITE);
bpage->set_io_fix(BUF_IO_NONE);
- mysql_mutex_lock(&buf_pool.flush_list_mutex);
- ut_ad(!temp || bpage->oldest_modification() == 1);
- bpage->clear_oldest_modification();
-
- if (!temp)
- buf_flush_remove_low(bpage);
- else
- ut_ad(request.is_LRU());
-
- mysql_mutex_unlock(&buf_pool.flush_list_mutex);
-
- if (dblwr)
- {
- ut_ad(!fsp_is_system_temporary(bpage->id().space()));
- buf_dblwr.write_completed();
- }
/* Because this thread which does the unlocking might not be the same that
did the locking, we use a pass value != 0 in unlock, which simply
@@ -383,17 +388,21 @@ void buf_page_write_complete(const IORequest &request)
if (bpage->state() == BUF_BLOCK_FILE_PAGE)
rw_lock_sx_unlock_gen(&((buf_block_t*) bpage)->lock, BUF_IO_WRITE);
- buf_pool.stat.n_pages_written++;
+ if (request.is_LRU())
+ buf_LRU_free_page(bpage, true);
+ else
+ ut_ad(!temp);
if (request.is_LRU())
{
- buf_LRU_free_page(bpage, true);
- if (!--buf_pool.n_flush_LRU)
+ ut_ad(buf_pool.n_flush_LRU_);
+ if (!--buf_pool.n_flush_LRU_)
pthread_cond_broadcast(&buf_pool.done_flush_LRU);
}
else
{
- if (!--buf_pool.n_flush_list)
+ ut_ad(buf_pool.n_flush_list_);
+ if (!--buf_pool.n_flush_list_)
pthread_cond_broadcast(&buf_pool.done_flush_list);
}
@@ -780,36 +789,34 @@ not_compressed:
return d;
}
-/** The following function deals with freed page during flushing.
- i) Writing zeros to the file asynchronously if scrubbing is enabled
- ii) Punch the hole to the file synchoronously if page_compressed is
- enabled for the tablespace
-This function also resets the IO_FIX to IO_NONE and making the
-page status as NORMAL. It initiates the write to the file only after
-releasing the page from flush list and its associated mutex.
-@param[in,out] bpage freed buffer page */
-static void buf_release_freed_page(buf_page_t *bpage)
+/** Free a page whose underlying file page has been freed. */
+inline void buf_pool_t::release_freed_page(buf_page_t *bpage)
{
ut_ad(bpage->in_file());
const bool uncompressed= bpage->state() == BUF_BLOCK_FILE_PAGE;
- mysql_mutex_lock(&buf_pool.mutex);
+ mysql_mutex_lock(&mutex);
bpage->set_io_fix(BUF_IO_NONE);
bpage->status= buf_page_t::NORMAL;
- const bool temp= fsp_is_system_temporary(bpage->id().space());
- ut_ad(!temp || uncompressed);
- ut_ad(!temp || bpage->oldest_modification() == 1);
- mysql_mutex_lock(&buf_pool.flush_list_mutex);
+ mysql_mutex_lock(&flush_list_mutex);
+ if (fsp_is_system_temporary(bpage->id().space()))
+ {
+ ut_ad(uncompressed);
+ ut_ad(bpage->oldest_modification() == 2);
+ }
+ else
+ {
+ ut_ad(bpage->oldest_modification() > 2);
+ delete_from_flush_list(bpage, false);
+ }
bpage->clear_oldest_modification();
- if (!temp)
- buf_flush_remove_low(bpage);
- mysql_mutex_unlock(&buf_pool.flush_list_mutex);
+ mysql_mutex_unlock(&flush_list_mutex);
if (uncompressed)
rw_lock_sx_unlock_gen(&reinterpret_cast<buf_block_t*>(bpage)->lock,
BUF_IO_WRITE);
buf_LRU_free_page(bpage, true);
- mysql_mutex_unlock(&buf_pool.mutex);
+ mysql_mutex_unlock(&mutex);
}
/** Write a flushable page from buf_pool to a file.
@@ -840,8 +847,22 @@ static bool buf_flush_page(buf_page_t *bpage, bool lru, fil_space_t *space)
}
bpage->set_io_fix(BUF_IO_WRITE);
- buf_flush_page_count++;
- mysql_mutex_unlock(&buf_pool.mutex);
+ /* Because bpage->status can only be changed while buf_block_t
+ exists, it cannot be modified for ROW_FORMAT=COMPRESSED pages
+ without first allocating the uncompressed page frame. Such
+ allocation cannot be completed due to our io_fix. So, bpage->status
+ is protected even if !rw_lock. */
+ const auto status= bpage->status;
+
+ if (status != buf_page_t::FREED)
+ {
+ if (lru)
+ buf_pool.n_flush_LRU_++;
+ else
+ buf_pool.n_flush_list_++;
+ buf_flush_page_count++;
+ }
+
mysql_mutex_assert_not_owner(&buf_pool.flush_list_mutex);
/* We are holding rw_lock = buf_block_t::lock in SX mode except if
@@ -860,20 +881,14 @@ static bool buf_flush_page(buf_page_t *bpage, bool lru, fil_space_t *space)
ut_ad(bpage->state() ==
(rw_lock ? BUF_BLOCK_FILE_PAGE : BUF_BLOCK_ZIP_PAGE));
ut_ad(ULINT_UNDEFINED >
- (lru ? buf_pool.n_flush_LRU : buf_pool.n_flush_list));
-
- /* Because bpage->status can only be changed while buf_block_t
- exists, it cannot be modified for ROW_FORMAT=COMPRESSED pages
- without first allocating the uncompressed page frame. Such
- allocation cannot be completed due to our io_fix. So, bpage->status
- is protected even if !rw_lock. */
- const auto status= bpage->status;
+ (lru ? buf_pool.n_flush_LRU_ : buf_pool.n_flush_list_));
+ mysql_mutex_unlock(&buf_pool.mutex);
buf_block_t *block= reinterpret_cast<buf_block_t*>(bpage);
page_t *frame= bpage->zip.data;
if (status == buf_page_t::FREED)
- buf_release_freed_page(&block->page);
+ buf_pool.release_freed_page(&block->page);
else
{
space->reacquire();
@@ -909,8 +924,8 @@ static bool buf_flush_page(buf_page_t *bpage, bool lru, fil_space_t *space)
/* innodb_checksum_algorithm=full_crc32 is not implemented for
ROW_FORMAT=COMPRESSED pages. */
ut_ad(!frame);
- page= buf_page_encrypt(space, bpage, page, &size);
- buf_flush_init_for_writing(block, page, nullptr, true);
+ page= buf_page_encrypt(space, bpage, page, &size);
+ buf_flush_init_for_writing(block, page, nullptr, true);
}
else
{
@@ -928,11 +943,6 @@ static bool buf_flush_page(buf_page_t *bpage, bool lru, fil_space_t *space)
ut_ad(status == bpage->status);
- if (lru)
- buf_pool.n_flush_LRU++;
- else
- buf_pool.n_flush_list++;
-
if (status != buf_page_t::NORMAL || !space->use_doublewrite())
{
if (UNIV_LIKELY(space->purpose == FIL_TYPE_TABLESPACE))
@@ -974,8 +984,10 @@ static bool buf_flush_check_neighbor(const page_id_t id, ulint fold, bool lru)
/* We avoid flushing 'non-old' blocks in an LRU flush, because the
flushed blocks are soon freed */
+ if (lru && !bpage->is_old())
+ return false;
- return (!lru || bpage->is_old()) && bpage->ready_for_flush();
+ return bpage->oldest_modification() > 1 && bpage->ready_for_flush();
}
/** Check which neighbors of a page can be flushed from the buf_pool.
@@ -1135,6 +1147,7 @@ static ulint buf_flush_try_neighbors(fil_space_t *space,
if (!lru || id == page_id || bpage->is_old())
{
if (!buf_pool.watch_is_sentinel(*bpage) &&
+ bpage->oldest_modification() > 1 &&
bpage->ready_for_flush() && buf_flush_page(bpage, lru, space))
{
++count;
@@ -1247,7 +1260,7 @@ static void buf_flush_discard_page(buf_page_t *bpage)
bpage->status= buf_page_t::NORMAL;
mysql_mutex_lock(&buf_pool.flush_list_mutex);
- buf_flush_remove(bpage);
+ buf_pool.delete_from_flush_list(bpage);
mysql_mutex_unlock(&buf_pool.flush_list_mutex);
if (rw_lock)
@@ -1278,20 +1291,20 @@ static void buf_flush_LRU_list_batch(ulint max, flush_counters_t *n)
for (buf_page_t *bpage= UT_LIST_GET_LAST(buf_pool.LRU);
bpage && n->flushed + n->evicted < max &&
UT_LIST_GET_LEN(buf_pool.LRU) > BUF_LRU_MIN_LEN &&
- UT_LIST_GET_LEN(buf_pool.free) < free_limit;
- ++scanned, bpage= buf_pool.lru_hp.get())
+ UT_LIST_GET_LEN(buf_pool.free) < free_limit; ++scanned)
{
buf_page_t *prev= UT_LIST_GET_PREV(LRU, bpage);
+ const lsn_t oldest_modification= bpage->oldest_modification();
buf_pool.lru_hp.set(prev);
- if (bpage->ready_for_replace())
+ if (oldest_modification <= 1 && bpage->can_relocate())
{
/* block is ready for eviction i.e., it is clean and is not
IO-fixed or buffer fixed. */
if (buf_LRU_free_page(bpage, true))
++n->evicted;
}
- else if (bpage->ready_for_flush())
+ else if (oldest_modification > 1 && bpage->ready_for_flush())
{
/* Block is ready for flush. Dispatch an IO request. The IO
helper thread will put it on free list in IO completion routine. */
@@ -1334,6 +1347,7 @@ reacquire_mutex:
else
/* Can't evict or dispatch this block. Go to previous. */
ut_ad(buf_pool.lru_hp.is_hp(prev));
+ bpage= buf_pool.lru_hp.get();
}
buf_pool.lru_hp.set(nullptr);
@@ -1405,23 +1419,29 @@ static ulint buf_do_flush_list_batch(ulint max_n, lsn_t lsn)
ulint len= UT_LIST_GET_LEN(buf_pool.flush_list);
for (buf_page_t *bpage= UT_LIST_GET_LAST(buf_pool.flush_list);
- bpage && len && count < max_n;
- bpage= buf_pool.flush_hp.get(), ++scanned, len--)
+ bpage && len && count < max_n; ++scanned, len--)
{
const lsn_t oldest_modification= bpage->oldest_modification();
if (oldest_modification >= lsn)
break;
- ut_ad(oldest_modification);
ut_ad(bpage->in_file());
buf_page_t *prev= UT_LIST_GET_PREV(list, bpage);
- if (!bpage->ready_for_flush())
+ if (oldest_modification == 1)
{
+ buf_pool.delete_from_flush_list(bpage);
+ skip:
bpage= prev;
continue;
}
+ ut_ad(oldest_modification > 2);
+ ut_ad(bpage->in_file());
+
+ if (!bpage->ready_for_flush())
+ goto skip;
+
/* In order not to degenerate this scan to O(n*n) we attempt to
preserve the pointer position. Any thread that would remove 'prev'
from buf_pool.flush_list must adjust the hazard pointer.
@@ -1498,7 +1518,7 @@ static ulint buf_do_flush_list_batch(ulint max_n, lsn_t lsn)
@param lru true=buf_pool.LRU; false=buf_pool.flush_list */
void buf_flush_wait_batch_end(bool lru)
{
- const auto &n_flush= lru ? buf_pool.n_flush_LRU : buf_pool.n_flush_list;
+ const auto &n_flush= lru ? buf_pool.n_flush_LRU_ : buf_pool.n_flush_list_;
if (n_flush)
{
@@ -1523,11 +1543,11 @@ ulint buf_flush_list(ulint max_n, lsn_t lsn)
{
ut_ad(lsn);
- if (buf_pool.n_flush_list)
+ if (buf_pool.n_flush_list())
return 0;
mysql_mutex_lock(&buf_pool.mutex);
- const bool running= buf_pool.n_flush_list != 0;
+ const bool running= buf_pool.n_flush_list_ != 0;
/* FIXME: we are performing a dirty read of buf_pool.flush_list.count
while not holding buf_pool.flush_list_mutex */
if (running || !UT_LIST_GET_LEN(buf_pool.flush_list))
@@ -1537,10 +1557,10 @@ ulint buf_flush_list(ulint max_n, lsn_t lsn)
mysql_mutex_unlock(&buf_pool.mutex);
return 0;
}
- buf_pool.n_flush_list++;
- ulint n_flushed= buf_do_flush_list_batch(max_n, lsn);
- const auto n_flushing= --buf_pool.n_flush_list;
+ buf_pool.n_flush_list_++;
+ const ulint n_flushed= buf_do_flush_list_batch(max_n, lsn);
+ const ulint n_flushing= --buf_pool.n_flush_list_;
buf_pool.try_LRU_scan= true;
@@ -1558,7 +1578,7 @@ ulint buf_flush_list(ulint max_n, lsn_t lsn)
/** Try to flush all the dirty pages that belong to a given tablespace.
@param space tablespace
@param n_flushed number of pages written
-@return whether any pages might not have been flushed */
+@return whether the flush for some pages might not have been initiated */
bool buf_flush_list_space(fil_space_t *space, ulint *n_flushed)
{
const auto space_id= space->id;
@@ -1583,6 +1603,8 @@ bool buf_flush_list_space(fil_space_t *space, ulint *n_flushed)
buf_page_t *prev= UT_LIST_GET_PREV(list, bpage);
if (bpage->id().space() != space_id);
+ else if (bpage->oldest_modification() == 1)
+ buf_pool.delete_from_flush_list(bpage);
else if (!bpage->ready_for_flush())
may_have_skipped= true;
else
@@ -1632,7 +1654,7 @@ bool buf_flush_list_space(fil_space_t *space, ulint *n_flushed)
mysql_mutex_lock(&buf_pool.flush_list_mutex);
if (!buf_pool.flush_hp.is_hp(prev))
may_have_skipped= true;
-next_after_skip:
+ next_after_skip:
bpage= buf_pool.flush_hp.get();
continue;
}
@@ -1670,22 +1692,22 @@ next_after_skip:
@retval 0 if a buf_pool.LRU batch is already running */
ulint buf_flush_LRU(ulint max_n)
{
- if (buf_pool.n_flush_LRU)
+ if (buf_pool.n_flush_LRU())
return 0;
log_buffer_flush_to_disk(true);
mysql_mutex_lock(&buf_pool.mutex);
- if (buf_pool.n_flush_LRU)
+ if (buf_pool.n_flush_LRU_)
{
mysql_mutex_unlock(&buf_pool.mutex);
return 0;
}
- buf_pool.n_flush_LRU++;
+ buf_pool.n_flush_LRU_++;
ulint n_flushed= buf_do_LRU_batch(max_n);
- const auto n_flushing= --buf_pool.n_flush_LRU;
+ const ulint n_flushing= --buf_pool.n_flush_LRU_;
buf_pool.try_LRU_scan= true;
@@ -1908,7 +1930,7 @@ void buf_flush_ahead(lsn_t lsn)
/** Wait for pending flushes to complete. */
void buf_flush_wait_batch_end_acquiring_mutex(bool lru)
{
- if (lru ? buf_pool.n_flush_LRU : buf_pool.n_flush_list)
+ if (lru ? buf_pool.n_flush_LRU() : buf_pool.n_flush_list())
{
mysql_mutex_lock(&buf_pool.mutex);
buf_flush_wait_batch_end(lru);
@@ -2205,7 +2227,6 @@ furious_flush:
else if (srv_shutdown_state > SRV_SHUTDOWN_INITIATED)
break;
-
/* If buf pager cleaner is idle and there is no work
(either dirty pages are all flushed or adaptive flushing
is not enabled) then opt for non-timed wait */
@@ -2229,9 +2250,9 @@ furious_flush:
else if (srv_shutdown_state > SRV_SHUTDOWN_INITIATED)
break;
- const ulint dirty_blocks= UT_LIST_GET_LEN(buf_pool.flush_list);
+ const lsn_t oldest_lsn= buf_pool.get_oldest_modification(0);
- if (!dirty_blocks)
+ if (!oldest_lsn)
{
if (UNIV_UNLIKELY(lsn_limit != 0))
{
@@ -2244,16 +2265,14 @@ unemployed:
continue;
}
+ const ulint dirty_blocks= UT_LIST_GET_LEN(buf_pool.flush_list);
+ ut_ad(dirty_blocks);
/* We perform dirty reads of the LRU+free list lengths here.
Division by zero is not possible, because buf_pool.flush_list is
guaranteed to be nonempty, and it is a subset of buf_pool.LRU. */
const double dirty_pct= double(dirty_blocks) * 100.0 /
double(UT_LIST_GET_LEN(buf_pool.LRU) + UT_LIST_GET_LEN(buf_pool.free));
- const lsn_t oldest_lsn= buf_pool.get_oldest_modified()
- ->oldest_modification();
- ut_ad(oldest_lsn);
-
bool idle_flush= false;
if (lsn_limit);
@@ -2414,19 +2433,19 @@ ATTRIBUTE_COLD void buf_flush_buffer_pool()
service_manager_extend_timeout(INNODB_EXTEND_TIMEOUT_INTERVAL,
"Waiting to flush the buffer pool");
- while (buf_pool.n_flush_list || buf_flush_list_length())
+ while (buf_pool.n_flush_list() || buf_flush_list_length())
{
buf_flush_list(srv_max_io_capacity);
timespec abstime;
- if (buf_pool.n_flush_list)
+ if (buf_pool.n_flush_list())
{
service_manager_extend_timeout(INNODB_EXTEND_TIMEOUT_INTERVAL,
"Waiting to flush " ULINTPF " pages",
buf_flush_list_length());
set_timespec(abstime, INNODB_EXTEND_TIMEOUT_INTERVAL / 2);
mysql_mutex_lock(&buf_pool.mutex);
- while (buf_pool.n_flush_list)
+ while (buf_pool.n_flush_list_)
my_cond_timedwait(&buf_pool.done_flush_list, &buf_pool.mutex.m_mutex,
&abstime);
mysql_mutex_unlock(&buf_pool.mutex);
@@ -2483,10 +2502,10 @@ static void buf_flush_validate_low()
ut_d(const auto s= bpage->state());
ut_ad(s == BUF_BLOCK_ZIP_PAGE || s == BUF_BLOCK_FILE_PAGE
|| s == BUF_BLOCK_REMOVE_HASH);
- ut_ad(om > 0);
+ ut_ad(om == 1 || om > 2);
bpage = UT_LIST_GET_NEXT(list, bpage);
- ut_ad(!bpage || recv_recovery_is_on()
+ ut_ad(om == 1 || !bpage || recv_recovery_is_on()
|| om >= bpage->oldest_modification());
}
}
diff --git a/storage/innobase/buf/buf0lru.cc b/storage/innobase/buf/buf0lru.cc
index 38394237519..a776ae75949 100644
--- a/storage/innobase/buf/buf0lru.cc
+++ b/storage/innobase/buf/buf0lru.cc
@@ -108,7 +108,7 @@ uint buf_LRU_old_threshold_ms;
/** Remove bpage from buf_pool.LRU and buf_pool.page_hash.
-If bpage->state() == BUF_BLOCK_ZIP_PAGE && !bpage->oldest_modification(),
+If bpage->state() == BUF_BLOCK_ZIP_PAGE && bpage->oldest_modification() <= 1,
the object will be freed.
@param bpage buffer block
@@ -242,8 +242,8 @@ static bool buf_LRU_free_from_common_LRU_list(ulint limit)
buf_pool.lru_scan_itr.set(prev);
const auto accessed = bpage->is_accessed();
- if (!bpage->oldest_modification()
- && buf_LRU_free_page(bpage, true)) {
+
+ if (buf_LRU_free_page(bpage, true)) {
if (!accessed) {
/* Keep track of pages that are evicted without
ever being accessed. This gives us a measure of
@@ -449,8 +449,8 @@ retry:
#ifndef DBUG_OFF
not_found:
#endif
+ buf_flush_wait_batch_end(true);
mysql_mutex_unlock(&buf_pool.mutex);
- buf_flush_wait_batch_end_acquiring_mutex(true);
if (n_iterations > 20 && !buf_lru_free_blocks_error_printed
&& srv_buf_pool_old_size == srv_buf_pool_size) {
@@ -801,20 +801,33 @@ bool buf_LRU_free_page(buf_page_t *bpage, bool zip)
const ulint fold = id.fold();
page_hash_latch* hash_lock = buf_pool.page_hash.lock_get(fold);
hash_lock->write_lock();
+ lsn_t oldest_modification = bpage->oldest_modification();
if (UNIV_UNLIKELY(!bpage->can_relocate())) {
/* Do not free buffer fixed and I/O-fixed blocks. */
goto func_exit;
}
+ if (oldest_modification == 1) {
+ mysql_mutex_lock(&buf_pool.flush_list_mutex);
+ oldest_modification = bpage->oldest_modification();
+ if (oldest_modification) {
+ ut_ad(oldest_modification == 1);
+ buf_pool.delete_from_flush_list(bpage);
+ }
+ mysql_mutex_unlock(&buf_pool.flush_list_mutex);
+ ut_ad(!bpage->oldest_modification());
+ oldest_modification = 0;
+ }
+
if (zip || !bpage->zip.data) {
/* This would completely free the block. */
/* Do not completely free dirty blocks. */
- if (bpage->oldest_modification()) {
+ if (oldest_modification) {
goto func_exit;
}
- } else if (bpage->oldest_modification()
+ } else if (oldest_modification
&& bpage->state() != BUF_BLOCK_FILE_PAGE) {
func_exit:
hash_lock->write_unlock();