summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMarko Mäkelä <marko.makela@mariadb.com>2022-03-15 14:44:22 +0200
committerMarko Mäkelä <marko.makela@mariadb.com>2022-03-15 14:44:22 +0200
commit73fee39ea62037780c59161507e89dd76c10b7a3 (patch)
tree072bc0e46bafc59b0ae8c3a7082684aa2568a451
parent00896db1c513ca8d71cbe0e8b56b699b43df1467 (diff)
downloadmariadb-git-73fee39ea62037780c59161507e89dd76c10b7a3.tar.gz
MDEV-27985 buf_flush_freed_pages() causes InnoDB to hang
buf_flush_freed_pages(): Assert that neither buf_pool.mutex nor buf_pool.flush_list_mutex are held. Simplify the loops. Return the tablespace and the number of pages written or punched. buf_flush_LRU_list_batch(), buf_do_flush_list_batch(): Release buf_pool.mutex before invoking buf_flush_space(). buf_flush_list_space(): Acquire the mutexes only after invoking buf_flush_freed_pages(). Reviewed by: Thirunarayanan Balathandayuthapani
-rw-r--r--storage/innobase/buf/buf0flu.cc103
1 files changed, 72 insertions, 31 deletions
diff --git a/storage/innobase/buf/buf0flu.cc b/storage/innobase/buf/buf0flu.cc
index 60108af6cf6..81183d42f7c 100644
--- a/storage/innobase/buf/buf0flu.cc
+++ b/storage/innobase/buf/buf0flu.cc
@@ -1046,42 +1046,52 @@ static page_id_t buf_flush_check_neighbors(const fil_space_t &space,
return i;
}
-MY_ATTRIBUTE((nonnull))
+MY_ATTRIBUTE((nonnull, warn_unused_result))
/** Write punch-hole or zeroes of the freed ranges when
innodb_immediate_scrub_data_uncompressed from the freed ranges.
-@param space tablespace which may contain ranges of freed pages */
-static void buf_flush_freed_pages(fil_space_t *space)
+@param space tablespace which may contain ranges of freed pages
+@param writable whether the tablespace is writable
+@return number of pages written or hole-punched */
+static uint32_t buf_flush_freed_pages(fil_space_t *space, bool writable)
{
const bool punch_hole= space->punch_hole;
- if (!srv_immediate_scrub_data_uncompressed && !punch_hole)
- return;
- lsn_t flush_to_disk_lsn= log_sys.get_flushed_lsn();
+ if (!punch_hole && !srv_immediate_scrub_data_uncompressed)
+ return 0;
+
+ mysql_mutex_assert_not_owner(&buf_pool.flush_list_mutex);
+ mysql_mutex_assert_not_owner(&buf_pool.mutex);
- std::unique_lock<std::mutex> freed_lock(space->freed_range_mutex);
- if (space->freed_ranges.empty()
- || flush_to_disk_lsn < space->get_last_freed_lsn())
+ space->freed_range_mutex.lock();
+ if (space->freed_ranges.empty() ||
+ log_sys.get_flushed_lsn() < space->get_last_freed_lsn())
{
- freed_lock.unlock();
- return;
+ space->freed_range_mutex.unlock();
+ return 0;
}
+ const unsigned physical_size{space->physical_size()};
+
range_set freed_ranges= std::move(space->freed_ranges);
- freed_lock.unlock();
+ uint32_t written= 0;
- for (const auto &range : freed_ranges)
+ if (!writable);
+ else if (punch_hole)
{
- const ulint physical_size= space->physical_size();
-
- if (punch_hole)
+ for (const auto &range : freed_ranges)
{
+ written+= range.last - range.first + 1;
space->reacquire();
space->io(IORequest(IORequest::PUNCH_RANGE),
os_offset_t{range.first} * physical_size,
(range.last - range.first + 1) * physical_size,
nullptr);
}
- else if (srv_immediate_scrub_data_uncompressed)
+ }
+ else
+ {
+ for (const auto &range : freed_ranges)
{
+ written+= range.last - range.first + 1;
for (os_offset_t i= range.first; i <= range.last; i++)
{
space->reacquire();
@@ -1090,8 +1100,10 @@ static void buf_flush_freed_pages(fil_space_t *space)
const_cast<byte*>(field_ref_zero));
}
}
- buf_pool.stat.n_pages_written+= (range.last - range.first + 1);
}
+
+ space->freed_range_mutex.unlock();
+ return written;
}
/** Flushes to disk all flushable pages within the flush area
@@ -1213,14 +1225,12 @@ static ulint buf_free_from_unzip_LRU_list_batch(ulint max)
/** Start writing out pages for a tablespace.
@param id tablespace identifier
-@return tablespace
-@retval nullptr if the pages for this tablespace should be discarded */
-static fil_space_t *buf_flush_space(const uint32_t id)
+@return tablespace and number of pages written */
+static std::pair<fil_space_t*, uint32_t> buf_flush_space(const uint32_t id)
{
- fil_space_t *space= fil_space_t::get(id);
- if (space)
- buf_flush_freed_pages(space);
- return space;
+ if (fil_space_t *space= fil_space_t::get(id))
+ return {space, buf_flush_freed_pages(space, true)};
+ return {nullptr, 0};
}
struct flush_counters_t
@@ -1288,6 +1298,7 @@ static void buf_flush_LRU_list_batch(ulint max, flush_counters_t *n)
n->flushed + n->evicted < max) ||
recv_recovery_is_on()); ++scanned)
{
+ retry:
buf_page_t *prev= UT_LIST_GET_PREV(LRU, bpage);
const lsn_t oldest_modification= bpage->oldest_modification();
buf_pool.lru_hp.set(prev);
@@ -1309,10 +1320,18 @@ static void buf_flush_LRU_list_batch(ulint max, flush_counters_t *n)
{
if (last_space_id != space_id)
{
+ buf_pool.lru_hp.set(bpage);
+ mysql_mutex_unlock(&buf_pool.mutex);
if (space)
space->release();
- space= buf_flush_space(space_id);
+ auto p= buf_flush_space(space_id);
+ space= p.first;
last_space_id= space_id;
+ mysql_mutex_lock(&buf_pool.mutex);
+ if (p.second)
+ buf_pool.stat.n_pages_written+= p.second;
+ bpage= buf_pool.lru_hp.get();
+ goto retry;
}
else
ut_ad(!space);
@@ -1455,10 +1474,28 @@ static ulint buf_do_flush_list_batch(ulint max_n, lsn_t lsn)
{
if (last_space_id != space_id)
{
+ mysql_mutex_lock(&buf_pool.flush_list_mutex);
+ buf_pool.flush_hp.set(bpage);
+ mysql_mutex_unlock(&buf_pool.flush_list_mutex);
+ mysql_mutex_unlock(&buf_pool.mutex);
if (space)
space->release();
- space= buf_flush_space(space_id);
+ auto p= buf_flush_space(space_id);
+ space= p.first;
last_space_id= space_id;
+ mysql_mutex_lock(&buf_pool.mutex);
+ if (p.second)
+ buf_pool.stat.n_pages_written+= p.second;
+ mysql_mutex_lock(&buf_pool.flush_list_mutex);
+ bpage= buf_pool.flush_hp.get();
+ if (!bpage)
+ break;
+ if (bpage->id() != page_id)
+ continue;
+ buf_pool.flush_hp.set(UT_LIST_GET_PREV(list, bpage));
+ if (bpage->oldest_modification() <= 1 || !bpage->ready_for_flush())
+ goto next;
+ mysql_mutex_unlock(&buf_pool.flush_list_mutex);
}
else
ut_ad(!space);
@@ -1486,6 +1523,7 @@ static ulint buf_do_flush_list_batch(ulint max_n, lsn_t lsn)
}
mysql_mutex_lock(&buf_pool.flush_list_mutex);
+ next:
bpage= buf_pool.flush_hp.get();
}
@@ -1582,11 +1620,14 @@ bool buf_flush_list_space(fil_space_t *space, ulint *n_flushed)
bool may_have_skipped= false;
ulint max_n_flush= srv_io_capacity;
- mysql_mutex_lock(&buf_pool.mutex);
- mysql_mutex_lock(&buf_pool.flush_list_mutex);
-
bool acquired= space->acquire();
- buf_flush_freed_pages(space);
+ {
+ const uint32_t written{buf_flush_freed_pages(space, acquired)};
+ mysql_mutex_lock(&buf_pool.mutex);
+ if (written)
+ buf_pool.stat.n_pages_written+= written;
+ }
+ mysql_mutex_lock(&buf_pool.flush_list_mutex);
for (buf_page_t *bpage= UT_LIST_GET_LAST(buf_pool.flush_list); bpage; )
{