summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMarko Mäkelä <marko.makela@mariadb.com>2021-06-23 13:13:16 +0300
committerMarko Mäkelä <marko.makela@mariadb.com>2021-06-23 19:06:52 +0300
commit6441bc614a99f5cd6357c8a23b9f583c56d0a90c (patch)
tree20fd8149ff622d93176c487560a32b2d2a9249aa
parent22b62edaedddb1cabd5b855cdd39a5e90a5695a2 (diff)
downloadmariadb-git-6441bc614a99f5cd6357c8a23b9f583c56d0a90c.tar.gz
MDEV-25113: Introduce a page cleaner mode before 'furious flush'
MDEV-23855 changed the way how the page cleaner is signaled by user threads. If a threshold is exceeded, a mini-transaction commit would invoke buf_flush_ahead() in order to initiate page flushing before all writers would eventually grind to halt in log_free_check(), waiting for the checkpoint age to reduce. However, buf_flush_ahead() would always initiate 'furious flushing', making the buf_flush_page_cleaner thread write innodb_io_capacity_max pages per batch, and sleeping no time between batches, until the limit LSN is reached. Because this could saturate the I/O subsystem, system throughput could significantly reduce during these 'furious flushing' spikes. With this change, we introduce a gentler version of flush-ahead, which would write innodb_io_capacity_max pages per second until the 'soft limit' is reached. buf_flush_ahead(): Add a parameter to specify whether furious flushing is requested. buf_flush_async_lsn: Similar to buf_flush_sync_lsn, a limit for the less intrusive flushing. buf_flush_page_cleaner(): Keep working until buf_flush_async_lsn has been reached. log_close(): Suppress a warning message in the event that a new log is being created during startup, when old logs did not exist. Return what type of page cleaning will be needed. mtr_t::finish_write(): Also when m_log.is_small(), invoke log_close(). Return what type of page cleaning will be needed. mtr_t::commit(): Invoke buf_flush_ahead() based on the return value of mtr_t::finish_write().
-rw-r--r--storage/innobase/buf/buf0flu.cc45
-rw-r--r--storage/innobase/include/buf0flu.h7
-rw-r--r--storage/innobase/include/mtr0mtr.h13
-rw-r--r--storage/innobase/mtr/mtr0mtr.cc50
4 files changed, 73 insertions, 42 deletions
diff --git a/storage/innobase/buf/buf0flu.cc b/storage/innobase/buf/buf0flu.cc
index aa92ddfcec8..2e5d1f2332d 100644
--- a/storage/innobase/buf/buf0flu.cc
+++ b/storage/innobase/buf/buf0flu.cc
@@ -63,8 +63,11 @@ static constexpr ulint buf_flush_lsn_scan_factor = 3;
/** Average redo generation rate */
static lsn_t lsn_avg_rate = 0;
-/** Target oldest_modification for the page cleaner; writes are protected by
-buf_pool.flush_list_mutex */
+/** Target oldest_modification for the page cleaner background flushing;
+writes are protected by buf_pool.flush_list_mutex */
+static Atomic_relaxed<lsn_t> buf_flush_async_lsn;
+/** Target oldest_modification for the page cleaner furious flushing;
+writes are protected by buf_pool.flush_list_mutex */
static Atomic_relaxed<lsn_t> buf_flush_sync_lsn;
#ifdef UNIV_PFS_THREAD
@@ -1905,9 +1908,10 @@ try_checkpoint:
}
}
-/** If innodb_flush_sync=ON, initiate a furious flush.
-@param lsn buf_pool.get_oldest_modification(LSN_MAX) target */
-void buf_flush_ahead(lsn_t lsn)
+/** Initiate more eager page flushing if the log checkpoint age is too old.
+@param lsn buf_pool.get_oldest_modification(LSN_MAX) target
+@param furious true=furious flushing, false=limit to innodb_io_capacity */
+ATTRIBUTE_COLD void buf_flush_ahead(lsn_t lsn, bool furious)
{
mysql_mutex_assert_not_owner(&log_sys.mutex);
ut_ad(!srv_read_only_mode);
@@ -1915,14 +1919,15 @@ void buf_flush_ahead(lsn_t lsn)
if (recv_recovery_is_on())
recv_sys.apply(true);
- if (buf_flush_sync_lsn < lsn)
+ Atomic_relaxed<lsn_t> &limit= furious
+ ? buf_flush_sync_lsn : buf_flush_async_lsn;
+
+ if (limit < lsn)
{
mysql_mutex_lock(&buf_pool.flush_list_mutex);
- if (buf_flush_sync_lsn < lsn)
- {
- buf_flush_sync_lsn= lsn;
- pthread_cond_signal(&buf_pool.do_flush_list);
- }
+ if (limit < lsn)
+ limit= lsn;
+ pthread_cond_signal(&buf_pool.do_flush_list);
mysql_mutex_unlock(&buf_pool.flush_list_mutex);
}
}
@@ -1997,6 +2002,8 @@ ATTRIBUTE_COLD static void buf_flush_sync_for_checkpoint(lsn_t lsn)
if (measure >= target)
buf_flush_sync_lsn= 0;
+ else if (measure >= buf_flush_async_lsn)
+ buf_flush_async_lsn= 0;
/* wake up buf_flush_wait_flushed() */
pthread_cond_broadcast(&buf_pool.done_flush_list);
@@ -2016,7 +2023,7 @@ static bool af_needed_for_redo(lsn_t oldest_lsn)
{
lsn_t age= (log_sys.get_lsn() - oldest_lsn);
lsn_t af_lwm= static_cast<lsn_t>(srv_adaptive_flushing_lwm *
- static_cast<double>(log_sys.log_capacity) / 100);
+ static_cast<double>(log_sys.log_capacity) / 100);
/* if age > af_lwm adaptive flushing is recommended */
return (age > af_lwm);
@@ -2240,6 +2247,7 @@ furious_flush:
set_timespec(abstime, 1);
+ lsn_t soft_lsn_limit= buf_flush_async_lsn;
lsn_limit= buf_flush_sync_lsn;
if (UNIV_UNLIKELY(lsn_limit != 0))
@@ -2261,6 +2269,7 @@ furious_flush:
pthread_cond_broadcast(&buf_pool.done_flush_list);
}
unemployed:
+ buf_flush_async_lsn= 0;
buf_pool.page_cleaner_set_idle(true);
continue;
}
@@ -2275,7 +2284,7 @@ unemployed:
bool idle_flush= false;
- if (lsn_limit);
+ if (lsn_limit || soft_lsn_limit);
else if (af_needed_for_redo(oldest_lsn));
else if (srv_max_dirty_pages_pct_lwm != 0.0)
{
@@ -2300,11 +2309,16 @@ unemployed:
goto unemployed;
if (UNIV_UNLIKELY(lsn_limit != 0) && oldest_lsn >= lsn_limit)
- buf_flush_sync_lsn= 0;
+ lsn_limit= buf_flush_sync_lsn= 0;
+ if (UNIV_UNLIKELY(soft_lsn_limit != 0) && oldest_lsn >= soft_lsn_limit)
+ soft_lsn_limit= buf_flush_async_lsn= 0;
buf_pool.page_cleaner_set_idle(false);
mysql_mutex_unlock(&buf_pool.flush_list_mutex);
+ if (!lsn_limit)
+ lsn_limit= soft_lsn_limit;
+
ulint n_flushed;
if (UNIV_UNLIKELY(lsn_limit != 0))
@@ -2355,7 +2369,7 @@ do_checkpoint:
goto do_checkpoint;
}
}
- else
+ else if (buf_flush_async_lsn <= oldest_lsn)
{
mysql_mutex_lock(&buf_pool.flush_list_mutex);
goto unemployed;
@@ -2410,6 +2424,7 @@ ATTRIBUTE_COLD void buf_flush_page_cleaner_init()
ut_ad(srv_operation == SRV_OPERATION_NORMAL ||
srv_operation == SRV_OPERATION_RESTORE ||
srv_operation == SRV_OPERATION_RESTORE_EXPORT);
+ buf_flush_async_lsn= 0;
buf_flush_sync_lsn= 0;
buf_page_cleaner_is_active= true;
os_thread_create(buf_flush_page_cleaner);
diff --git a/storage/innobase/include/buf0flu.h b/storage/innobase/include/buf0flu.h
index 76a4ecff950..8d45cf2be38 100644
--- a/storage/innobase/include/buf0flu.h
+++ b/storage/innobase/include/buf0flu.h
@@ -111,9 +111,10 @@ void buf_flush_wait_batch_end(bool lru);
/** Wait until all persistent pages are flushed up to a limit.
@param sync_lsn buf_pool.get_oldest_modification(LSN_MAX) to wait for */
ATTRIBUTE_COLD void buf_flush_wait_flushed(lsn_t sync_lsn);
-/** If innodb_flush_sync=ON, initiate a furious flush.
-@param lsn buf_pool.get_oldest_modification(LSN_MAX) target */
-void buf_flush_ahead(lsn_t lsn);
+/** Initiate more eager page flushing if the log checkpoint age is too old.
+@param lsn buf_pool.get_oldest_modification(LSN_MAX) target
+@param furious true=furious flushing, false=limit to innodb_io_capacity */
+ATTRIBUTE_COLD void buf_flush_ahead(lsn_t lsn, bool furious);
/********************************************************************//**
This function should be called at a mini-transaction commit, if a page was
diff --git a/storage/innobase/include/mtr0mtr.h b/storage/innobase/include/mtr0mtr.h
index 97bfa2e53cb..a9539000602 100644
--- a/storage/innobase/include/mtr0mtr.h
+++ b/storage/innobase/include/mtr0mtr.h
@@ -588,6 +588,17 @@ public:
@return number of buffer count added by this mtr */
uint32_t get_fix_count(const buf_block_t *block) const;
+ /** type of page flushing is needed during commit() */
+ enum page_flush_ahead
+ {
+ /** no need to trigger page cleaner */
+ PAGE_FLUSH_NO= 0,
+ /** asynchronous flushing is needed */
+ PAGE_FLUSH_ASYNC,
+ /** furious flushing is needed */
+ PAGE_FLUSH_SYNC
+ };
+
private:
/** Log a write of a byte string to a page.
@param block buffer page
@@ -621,7 +632,7 @@ private:
/** Append the redo log records to the redo log buffer.
@param len number of bytes to write
@return {start_lsn,flush_ahead} */
- inline std::pair<lsn_t,bool> finish_write(ulint len);
+ inline std::pair<lsn_t,page_flush_ahead> finish_write(ulint len);
/** Release the resources */
inline void release_resources();
diff --git a/storage/innobase/mtr/mtr0mtr.cc b/storage/innobase/mtr/mtr0mtr.cc
index f7bde06544f..f6747686eb5 100644
--- a/storage/innobase/mtr/mtr0mtr.cc
+++ b/storage/innobase/mtr/mtr0mtr.cc
@@ -402,12 +402,12 @@ void mtr_t::commit()
{
ut_ad(!srv_read_only_mode || m_log_mode == MTR_LOG_NO_REDO);
- std::pair<lsn_t,bool> lsns;
+ std::pair<lsn_t,page_flush_ahead> lsns;
if (const ulint len= prepare_write())
lsns= finish_write(len);
else
- lsns= { m_commit_lsn, false };
+ lsns= { m_commit_lsn, PAGE_FLUSH_NO };
if (m_made_dirty)
mysql_mutex_lock(&log_sys.flush_order_mutex);
@@ -447,8 +447,8 @@ void mtr_t::commit()
m_memo.for_each_block_in_reverse(CIterate<ReleaseLatches>());
- if (lsns.second)
- buf_flush_ahead(m_commit_lsn);
+ if (UNIV_UNLIKELY(lsns.second != PAGE_FLUSH_NO))
+ buf_flush_ahead(m_commit_lsn, lsns.second == PAGE_FLUSH_SYNC);
if (m_made_dirty)
srv_stats.log_write_requests.inc();
@@ -754,7 +754,7 @@ static void log_write_low(const void *str, size_t size)
/** Close the log at mini-transaction commit.
@return whether buffer pool flushing is needed */
-static bool log_close(lsn_t lsn)
+static mtr_t::page_flush_ahead log_close(lsn_t lsn)
{
mysql_mutex_assert_owner(&log_sys.mutex);
ut_ad(lsn == log_sys.get_lsn());
@@ -777,7 +777,9 @@ static bool log_close(lsn_t lsn)
const lsn_t checkpoint_age= lsn - log_sys.last_checkpoint_lsn;
- if (UNIV_UNLIKELY(checkpoint_age >= log_sys.log_capacity))
+ if (UNIV_UNLIKELY(checkpoint_age >= log_sys.log_capacity) &&
+ /* silence message on create_log_file() after the log had been deleted */
+ checkpoint_age != lsn)
{
time_t t= time(nullptr);
if (!log_close_warned || difftime(t, log_close_warn_time) > 15)
@@ -786,15 +788,17 @@ static bool log_close(lsn_t lsn)
log_close_warn_time= t;
ib::error() << "The age of the last checkpoint is " << checkpoint_age
- << ", which exceeds the log capacity "
- << log_sys.log_capacity << ".";
+ << ", which exceeds the log capacity "
+ << log_sys.log_capacity << ".";
}
}
+ else if (UNIV_LIKELY(checkpoint_age <= log_sys.max_modified_age_async))
+ return mtr_t::PAGE_FLUSH_NO;
else if (UNIV_LIKELY(checkpoint_age <= log_sys.max_checkpoint_age))
- return false;
+ return mtr_t::PAGE_FLUSH_ASYNC;
log_sys.set_check_flush_or_checkpoint();
- return true;
+ return mtr_t::PAGE_FLUSH_SYNC;
}
/** Write the block contents to the REDO log */
@@ -858,8 +862,8 @@ inline ulint mtr_t::prepare_write()
/** Append the redo log records to the redo log buffer.
@param len number of bytes to write
-@return {start_lsn,flush_ahead_lsn} */
-inline std::pair<lsn_t,bool> mtr_t::finish_write(ulint len)
+@return {start_lsn,flush_ahead} */
+inline std::pair<lsn_t,mtr_t::page_flush_ahead> mtr_t::finish_write(ulint len)
{
ut_ad(m_log_mode == MTR_LOG_ALL);
mysql_mutex_assert_owner(&log_sys.mutex);
@@ -875,19 +879,19 @@ inline std::pair<lsn_t,bool> mtr_t::finish_write(ulint len)
m_commit_lsn = log_reserve_and_write_fast(front->begin(), len,
&start_lsn);
- if (m_commit_lsn) {
- return std::make_pair(start_lsn, false);
+ if (!m_commit_lsn) {
+ goto piecewise;
}
+ } else {
+piecewise:
+ /* Open the database log for log_write_low */
+ start_lsn = log_reserve_and_open(len);
+ mtr_write_log write_log;
+ m_log.for_each_block(write_log);
+ m_commit_lsn = log_sys.get_lsn();
}
-
- /* Open the database log for log_write_low */
- start_lsn = log_reserve_and_open(len);
-
- mtr_write_log write_log;
- m_log.for_each_block(write_log);
- m_commit_lsn = log_sys.get_lsn();
- bool flush = log_close(m_commit_lsn);
- DBUG_EXECUTE_IF("ib_log_flush_ahead", flush=true;);
+ page_flush_ahead flush= log_close(m_commit_lsn);
+ DBUG_EXECUTE_IF("ib_log_flush_ahead", flush = PAGE_FLUSH_SYNC;);
return std::make_pair(start_lsn, flush);
}