summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMarko Mäkelä <marko.makela@mariadb.com>2021-07-03 13:58:38 +0300
committerMarko Mäkelä <marko.makela@mariadb.com>2021-07-03 13:58:38 +0300
commitbd5a6403cace36c6ed428cde62e35adcd3f7e7d0 (patch)
treedd286590d561909ea3d835d544e2f46a0babc12b
parent779262842edf86c989c099c6930ae4683cbde609 (diff)
downloadmariadb-git-bd5a6403cace36c6ed428cde62e35adcd3f7e7d0.tar.gz
MDEV-26033: Race condition between buf_pool.page_hash and resize()
The replacement of buf_pool.page_hash with a different type of hash table in commit 5155a300fab85e97217c75e3ba3c3ce78082dd8a (MDEV-22871) introduced a race condition with buffer pool resizing. We have an execution trace where buf_pool.page_hash.array is changed to point to something else while page_hash_latch::read_lock() is executing. The same should also affect page_hash_latch::write_lock(). We fix the race condition by never resizing (and reallocating) the buf_pool.page_hash. We assume that resizing the buffer pool is a rare operation. Yes, there might be a performance regression if a server is first started up with a tiny buffer pool, which is later enlarged. In that case, the tiny buf_pool.page_hash.array could cause increased use of the hash bucket lists. That problem can be worked around by initially starting up the server with a larger buffer pool and then shrinking that, until changing to a larger size again. buf_pool_t::resize_hash(): Remove. buf_pool_t::page_hash_table::lock(): Do not attempt to deal with hash table resizing. If we really wanted that in a safe manner, we would probably have to introduce a global rw-lock around the operation, or at the very least, poll buf_pool.resizing, both of which would be detrimental to performance.
-rw-r--r--storage/innobase/buf/buf0buf.cc90
-rw-r--r--storage/innobase/include/buf0buf.h29
2 files changed, 6 insertions, 113 deletions
diff --git a/storage/innobase/buf/buf0buf.cc b/storage/innobase/buf/buf0buf.cc
index bbbb67a69df..dcf95fdd5f2 100644
--- a/storage/innobase/buf/buf0buf.cc
+++ b/storage/innobase/buf/buf0buf.cc
@@ -1536,13 +1536,6 @@ void buf_pool_t::close()
ut_free(chunks);
chunks= nullptr;
page_hash.free();
- while (page_hash_table *old_page_hash= freed_page_hash)
- {
- freed_page_hash= static_cast<page_hash_table*>
- (old_page_hash->array[1].node);
- old_page_hash->free();
- UT_DELETE(old_page_hash);
- }
zip_hash.free();
io_buf.close();
@@ -1829,57 +1822,6 @@ inline bool buf_pool_t::withdraw_blocks()
return(false);
}
-/** resize page_hash and zip_hash */
-inline void buf_pool_t::resize_hash()
-{
- page_hash_table *new_page_hash= UT_NEW_NOKEY(page_hash_table());
- new_page_hash->create(2 * buf_pool.curr_size);
- new_page_hash->write_lock_all();
-
- for (auto i= page_hash.pad(page_hash.n_cells); i--; )
- {
- static_assert(!((page_hash_table::ELEMENTS_PER_LATCH + 1) &
- page_hash_table::ELEMENTS_PER_LATCH),
- "must be one less than a power of 2");
- if (!(i & page_hash_table::ELEMENTS_PER_LATCH))
- {
- ut_ad(reinterpret_cast<page_hash_latch*>
- (&page_hash.array[i])->is_write_locked());
- continue;
- }
- while (buf_page_t *bpage= static_cast<buf_page_t*>
- (page_hash.array[i].node))
- {
- ut_ad(bpage->in_page_hash);
- const ulint fold= bpage->id().fold();
- HASH_DELETE(buf_page_t, hash, &buf_pool.page_hash, fold, bpage);
- HASH_INSERT(buf_page_t, hash, new_page_hash, fold, bpage);
- }
- }
-
- buf_pool.page_hash.array[1].node= freed_page_hash;
- std::swap(buf_pool.page_hash, *new_page_hash);
- freed_page_hash= new_page_hash;
-
- /* recreate zip_hash */
- hash_table_t new_hash;
- new_hash.create(2 * buf_pool.curr_size);
-
- for (ulint i= 0; i < buf_pool.zip_hash.n_cells; i++)
- {
- while (buf_page_t *bpage= static_cast<buf_page_t*>
- (HASH_GET_FIRST(&buf_pool.zip_hash, i)))
- {
- const ulint fold= BUF_POOL_ZIP_FOLD_BPAGE(bpage);
- HASH_DELETE(buf_page_t, hash, &buf_pool.zip_hash, fold, bpage);
- HASH_INSERT(buf_page_t, hash, &new_hash, fold, bpage);
- }
- }
-
- std::swap(buf_pool.zip_hash.array, new_hash.array);
- buf_pool.zip_hash.n_cells= new_hash.n_cells;
- new_hash.free();
-}
inline void buf_pool_t::page_hash_table::write_lock_all()
@@ -1904,26 +1846,6 @@ inline void buf_pool_t::page_hash_table::write_unlock_all()
}
-inline void buf_pool_t::write_lock_all_page_hash()
-{
- mysql_mutex_assert_owner(&mutex);
- page_hash.write_lock_all();
- for (page_hash_table *old_page_hash= freed_page_hash; old_page_hash;
- old_page_hash= static_cast<page_hash_table*>
- (old_page_hash->array[1].node))
- old_page_hash->write_lock_all();
-}
-
-
-inline void buf_pool_t::write_unlock_all_page_hash()
-{
- page_hash.write_unlock_all();
- for (page_hash_table *old_page_hash= freed_page_hash; old_page_hash;
- old_page_hash= static_cast<page_hash_table*>
- (old_page_hash->array[1].node))
- old_page_hash->write_unlock_all();
-}
-
namespace
{
@@ -2097,7 +2019,7 @@ withdraw_retry:
resizing.store(true, std::memory_order_relaxed);
mysql_mutex_lock(&mutex);
- write_lock_all_page_hash();
+ page_hash.write_lock_all();
chunk_t::map_reg = UT_NEW_NOKEY(chunk_t::map());
@@ -2252,16 +2174,8 @@ calc_buf_pool_size:
= srv_buf_pool_base_size > srv_buf_pool_size * 2
|| srv_buf_pool_base_size * 2 < srv_buf_pool_size;
- /* Normalize page_hash and zip_hash,
- if the new size is too different */
- if (!warning && new_size_too_diff) {
- buf_resize_status("Resizing hash table");
- resize_hash();
- ib::info() << "hash tables were resized";
- }
-
mysql_mutex_unlock(&mutex);
- write_unlock_all_page_hash();
+ page_hash.write_unlock_all();
UT_DELETE(chunk_map_old);
diff --git a/storage/innobase/include/buf0buf.h b/storage/innobase/include/buf0buf.h
index e0e6c581442..5a118df48e1 100644
--- a/storage/innobase/include/buf0buf.h
+++ b/storage/innobase/include/buf0buf.h
@@ -1895,22 +1895,14 @@ public:
page_hash_latch *lock_get(ulint fold) const
{ return lock_get(fold, n_cells); }
- /** Acquire an array latch, tolerating concurrent buf_pool_t::resize()
+ /** Acquire an array latch.
@tparam exclusive whether the latch is to be acquired exclusively
@param fold hash bucket key */
template<bool exclusive> page_hash_latch *lock(ulint fold)
{
- for (;;)
- {
- auto n= n_cells;
- page_hash_latch *latch= lock_get(fold, n);
- latch->acquire<exclusive>();
- /* Our latch prevents n_cells from changing. */
- if (UNIV_LIKELY(n == n_cells))
- return latch;
- /* Retry, because buf_pool_t::resize_hash() affected us. */
- latch->release<exclusive>();
- }
+ page_hash_latch *latch= lock_get(fold, n_cells);
+ latch->acquire<exclusive>();
+ return latch;
}
/** Exclusively aqcuire all latches */
@@ -1920,19 +1912,6 @@ public:
inline void write_unlock_all();
};
-private:
- /** Former page_hash that has been deleted during resize();
- singly-linked list via freed_page_hash->array[1] */
- page_hash_table *freed_page_hash;
-
- /** Lock all page_hash, also freed_page_hash. */
- inline void write_lock_all_page_hash();
- /** Release all page_hash, also freed_page_hash. */
- inline void write_unlock_all_page_hash();
- /** Resize page_hash and zip_hash. */
- inline void resize_hash();
-
-public:
/** Hash table of file pages (buf_page_t::in_file() holds),
indexed by page_id_t. Protected by both mutex and page_hash.lock_get(). */
page_hash_table page_hash;