diff options
author | Marko Mäkelä <marko.makela@mariadb.com> | 2022-03-15 14:44:22 +0200 |
---|---|---|
committer | Marko Mäkelä <marko.makela@mariadb.com> | 2022-03-15 14:44:22 +0200 |
commit | 73fee39ea62037780c59161507e89dd76c10b7a3 (patch) | |
tree | 072bc0e46bafc59b0ae8c3a7082684aa2568a451 | |
parent | 00896db1c513ca8d71cbe0e8b56b699b43df1467 (diff) | |
download | mariadb-git-73fee39ea62037780c59161507e89dd76c10b7a3.tar.gz |
MDEV-27985 buf_flush_freed_pages() causes InnoDB to hang
buf_flush_freed_pages(): Assert that neither buf_pool.mutex
nor buf_pool.flush_list_mutex are held. Simplify the loops.
Return the tablespace and the number of pages written or punched.
buf_flush_LRU_list_batch(), buf_do_flush_list_batch():
Release buf_pool.mutex before invoking buf_flush_space().
buf_flush_list_space(): Acquire the mutexes only after invoking
buf_flush_freed_pages().
Reviewed by: Thirunarayanan Balathandayuthapani
-rw-r--r-- | storage/innobase/buf/buf0flu.cc | 103 |
1 files changed, 72 insertions, 31 deletions
diff --git a/storage/innobase/buf/buf0flu.cc b/storage/innobase/buf/buf0flu.cc index 60108af6cf6..81183d42f7c 100644 --- a/storage/innobase/buf/buf0flu.cc +++ b/storage/innobase/buf/buf0flu.cc @@ -1046,42 +1046,52 @@ static page_id_t buf_flush_check_neighbors(const fil_space_t &space, return i; } -MY_ATTRIBUTE((nonnull)) +MY_ATTRIBUTE((nonnull, warn_unused_result)) /** Write punch-hole or zeroes of the freed ranges when innodb_immediate_scrub_data_uncompressed from the freed ranges. -@param space tablespace which may contain ranges of freed pages */ -static void buf_flush_freed_pages(fil_space_t *space) +@param space tablespace which may contain ranges of freed pages +@param writable whether the tablespace is writable +@return number of pages written or hole-punched */ +static uint32_t buf_flush_freed_pages(fil_space_t *space, bool writable) { const bool punch_hole= space->punch_hole; - if (!srv_immediate_scrub_data_uncompressed && !punch_hole) - return; - lsn_t flush_to_disk_lsn= log_sys.get_flushed_lsn(); + if (!punch_hole && !srv_immediate_scrub_data_uncompressed) + return 0; + + mysql_mutex_assert_not_owner(&buf_pool.flush_list_mutex); + mysql_mutex_assert_not_owner(&buf_pool.mutex); - std::unique_lock<std::mutex> freed_lock(space->freed_range_mutex); - if (space->freed_ranges.empty() - || flush_to_disk_lsn < space->get_last_freed_lsn()) + space->freed_range_mutex.lock(); + if (space->freed_ranges.empty() || + log_sys.get_flushed_lsn() < space->get_last_freed_lsn()) { - freed_lock.unlock(); - return; + space->freed_range_mutex.unlock(); + return 0; } + const unsigned physical_size{space->physical_size()}; + range_set freed_ranges= std::move(space->freed_ranges); - freed_lock.unlock(); + uint32_t written= 0; - for (const auto &range : freed_ranges) + if (!writable); + else if (punch_hole) { - const ulint physical_size= space->physical_size(); - - if (punch_hole) + for (const auto &range : freed_ranges) { + written+= range.last - range.first + 1; space->reacquire(); space->io(IORequest(IORequest::PUNCH_RANGE), os_offset_t{range.first} * physical_size, (range.last - range.first + 1) * physical_size, nullptr); } - else if (srv_immediate_scrub_data_uncompressed) + } + else + { + for (const auto &range : freed_ranges) { + written+= range.last - range.first + 1; for (os_offset_t i= range.first; i <= range.last; i++) { space->reacquire(); @@ -1090,8 +1100,10 @@ static void buf_flush_freed_pages(fil_space_t *space) const_cast<byte*>(field_ref_zero)); } } - buf_pool.stat.n_pages_written+= (range.last - range.first + 1); } + + space->freed_range_mutex.unlock(); + return written; } /** Flushes to disk all flushable pages within the flush area @@ -1213,14 +1225,12 @@ static ulint buf_free_from_unzip_LRU_list_batch(ulint max) /** Start writing out pages for a tablespace. @param id tablespace identifier -@return tablespace -@retval nullptr if the pages for this tablespace should be discarded */ -static fil_space_t *buf_flush_space(const uint32_t id) +@return tablespace and number of pages written */ +static std::pair<fil_space_t*, uint32_t> buf_flush_space(const uint32_t id) { - fil_space_t *space= fil_space_t::get(id); - if (space) - buf_flush_freed_pages(space); - return space; + if (fil_space_t *space= fil_space_t::get(id)) + return {space, buf_flush_freed_pages(space, true)}; + return {nullptr, 0}; } struct flush_counters_t @@ -1288,6 +1298,7 @@ static void buf_flush_LRU_list_batch(ulint max, flush_counters_t *n) n->flushed + n->evicted < max) || recv_recovery_is_on()); ++scanned) { + retry: buf_page_t *prev= UT_LIST_GET_PREV(LRU, bpage); const lsn_t oldest_modification= bpage->oldest_modification(); buf_pool.lru_hp.set(prev); @@ -1309,10 +1320,18 @@ static void buf_flush_LRU_list_batch(ulint max, flush_counters_t *n) { if (last_space_id != space_id) { + buf_pool.lru_hp.set(bpage); + mysql_mutex_unlock(&buf_pool.mutex); if (space) space->release(); - space= buf_flush_space(space_id); + auto p= buf_flush_space(space_id); + space= p.first; last_space_id= space_id; + mysql_mutex_lock(&buf_pool.mutex); + if (p.second) + buf_pool.stat.n_pages_written+= p.second; + bpage= buf_pool.lru_hp.get(); + goto retry; } else ut_ad(!space); @@ -1455,10 +1474,28 @@ static ulint buf_do_flush_list_batch(ulint max_n, lsn_t lsn) { if (last_space_id != space_id) { + mysql_mutex_lock(&buf_pool.flush_list_mutex); + buf_pool.flush_hp.set(bpage); + mysql_mutex_unlock(&buf_pool.flush_list_mutex); + mysql_mutex_unlock(&buf_pool.mutex); if (space) space->release(); - space= buf_flush_space(space_id); + auto p= buf_flush_space(space_id); + space= p.first; last_space_id= space_id; + mysql_mutex_lock(&buf_pool.mutex); + if (p.second) + buf_pool.stat.n_pages_written+= p.second; + mysql_mutex_lock(&buf_pool.flush_list_mutex); + bpage= buf_pool.flush_hp.get(); + if (!bpage) + break; + if (bpage->id() != page_id) + continue; + buf_pool.flush_hp.set(UT_LIST_GET_PREV(list, bpage)); + if (bpage->oldest_modification() <= 1 || !bpage->ready_for_flush()) + goto next; + mysql_mutex_unlock(&buf_pool.flush_list_mutex); } else ut_ad(!space); @@ -1486,6 +1523,7 @@ static ulint buf_do_flush_list_batch(ulint max_n, lsn_t lsn) } mysql_mutex_lock(&buf_pool.flush_list_mutex); + next: bpage= buf_pool.flush_hp.get(); } @@ -1582,11 +1620,14 @@ bool buf_flush_list_space(fil_space_t *space, ulint *n_flushed) bool may_have_skipped= false; ulint max_n_flush= srv_io_capacity; - mysql_mutex_lock(&buf_pool.mutex); - mysql_mutex_lock(&buf_pool.flush_list_mutex); - bool acquired= space->acquire(); - buf_flush_freed_pages(space); + { + const uint32_t written{buf_flush_freed_pages(space, acquired)}; + mysql_mutex_lock(&buf_pool.mutex); + if (written) + buf_pool.stat.n_pages_written+= written; + } + mysql_mutex_lock(&buf_pool.flush_list_mutex); for (buf_page_t *bpage= UT_LIST_GET_LAST(buf_pool.flush_list); bpage; ) { |