diff options
author | Marko Mäkelä <marko.makela@mariadb.com> | 2021-07-03 13:58:38 +0300 |
---|---|---|
committer | Marko Mäkelä <marko.makela@mariadb.com> | 2021-07-03 13:58:38 +0300 |
commit | bd5a6403cace36c6ed428cde62e35adcd3f7e7d0 (patch) | |
tree | dd286590d561909ea3d835d544e2f46a0babc12b | |
parent | 779262842edf86c989c099c6930ae4683cbde609 (diff) | |
download | mariadb-git-bd5a6403cace36c6ed428cde62e35adcd3f7e7d0.tar.gz |
MDEV-26033: Race condition between buf_pool.page_hash and resize()
The replacement of buf_pool.page_hash with a different type of
hash table in commit 5155a300fab85e97217c75e3ba3c3ce78082dd8a (MDEV-22871)
introduced a race condition with buffer pool resizing.
We have an execution trace where buf_pool.page_hash.array is changed
to point to something else while page_hash_latch::read_lock() is
executing. The same should also affect page_hash_latch::write_lock().
We fix the race condition by never resizing (and reallocating) the
buf_pool.page_hash. We assume that resizing the buffer pool is
a rare operation. Yes, there might be a performance regression if a
server is first started up with a tiny buffer pool, which is later
enlarged. In that case, the tiny buf_pool.page_hash.array could cause
increased use of the hash bucket lists. That problem can be worked
around by initially starting up the server with a larger buffer pool
and then shrinking that, until changing to a larger size again.
buf_pool_t::resize_hash(): Remove.
buf_pool_t::page_hash_table::lock(): Do not attempt to deal with
hash table resizing. If we really wanted that in a safe manner,
we would probably have to introduce a global rw-lock around the
operation, or at the very least, poll buf_pool.resizing, both of
which would be detrimental to performance.
-rw-r--r-- | storage/innobase/buf/buf0buf.cc | 90 | ||||
-rw-r--r-- | storage/innobase/include/buf0buf.h | 29 |
2 files changed, 6 insertions, 113 deletions
diff --git a/storage/innobase/buf/buf0buf.cc b/storage/innobase/buf/buf0buf.cc index bbbb67a69df..dcf95fdd5f2 100644 --- a/storage/innobase/buf/buf0buf.cc +++ b/storage/innobase/buf/buf0buf.cc @@ -1536,13 +1536,6 @@ void buf_pool_t::close() ut_free(chunks); chunks= nullptr; page_hash.free(); - while (page_hash_table *old_page_hash= freed_page_hash) - { - freed_page_hash= static_cast<page_hash_table*> - (old_page_hash->array[1].node); - old_page_hash->free(); - UT_DELETE(old_page_hash); - } zip_hash.free(); io_buf.close(); @@ -1829,57 +1822,6 @@ inline bool buf_pool_t::withdraw_blocks() return(false); } -/** resize page_hash and zip_hash */ -inline void buf_pool_t::resize_hash() -{ - page_hash_table *new_page_hash= UT_NEW_NOKEY(page_hash_table()); - new_page_hash->create(2 * buf_pool.curr_size); - new_page_hash->write_lock_all(); - - for (auto i= page_hash.pad(page_hash.n_cells); i--; ) - { - static_assert(!((page_hash_table::ELEMENTS_PER_LATCH + 1) & - page_hash_table::ELEMENTS_PER_LATCH), - "must be one less than a power of 2"); - if (!(i & page_hash_table::ELEMENTS_PER_LATCH)) - { - ut_ad(reinterpret_cast<page_hash_latch*> - (&page_hash.array[i])->is_write_locked()); - continue; - } - while (buf_page_t *bpage= static_cast<buf_page_t*> - (page_hash.array[i].node)) - { - ut_ad(bpage->in_page_hash); - const ulint fold= bpage->id().fold(); - HASH_DELETE(buf_page_t, hash, &buf_pool.page_hash, fold, bpage); - HASH_INSERT(buf_page_t, hash, new_page_hash, fold, bpage); - } - } - - buf_pool.page_hash.array[1].node= freed_page_hash; - std::swap(buf_pool.page_hash, *new_page_hash); - freed_page_hash= new_page_hash; - - /* recreate zip_hash */ - hash_table_t new_hash; - new_hash.create(2 * buf_pool.curr_size); - - for (ulint i= 0; i < buf_pool.zip_hash.n_cells; i++) - { - while (buf_page_t *bpage= static_cast<buf_page_t*> - (HASH_GET_FIRST(&buf_pool.zip_hash, i))) - { - const ulint fold= BUF_POOL_ZIP_FOLD_BPAGE(bpage); - HASH_DELETE(buf_page_t, hash, &buf_pool.zip_hash, fold, bpage); - HASH_INSERT(buf_page_t, hash, &new_hash, fold, bpage); - } - } - - std::swap(buf_pool.zip_hash.array, new_hash.array); - buf_pool.zip_hash.n_cells= new_hash.n_cells; - new_hash.free(); -} inline void buf_pool_t::page_hash_table::write_lock_all() @@ -1904,26 +1846,6 @@ inline void buf_pool_t::page_hash_table::write_unlock_all() } -inline void buf_pool_t::write_lock_all_page_hash() -{ - mysql_mutex_assert_owner(&mutex); - page_hash.write_lock_all(); - for (page_hash_table *old_page_hash= freed_page_hash; old_page_hash; - old_page_hash= static_cast<page_hash_table*> - (old_page_hash->array[1].node)) - old_page_hash->write_lock_all(); -} - - -inline void buf_pool_t::write_unlock_all_page_hash() -{ - page_hash.write_unlock_all(); - for (page_hash_table *old_page_hash= freed_page_hash; old_page_hash; - old_page_hash= static_cast<page_hash_table*> - (old_page_hash->array[1].node)) - old_page_hash->write_unlock_all(); -} - namespace { @@ -2097,7 +2019,7 @@ withdraw_retry: resizing.store(true, std::memory_order_relaxed); mysql_mutex_lock(&mutex); - write_lock_all_page_hash(); + page_hash.write_lock_all(); chunk_t::map_reg = UT_NEW_NOKEY(chunk_t::map()); @@ -2252,16 +2174,8 @@ calc_buf_pool_size: = srv_buf_pool_base_size > srv_buf_pool_size * 2 || srv_buf_pool_base_size * 2 < srv_buf_pool_size; - /* Normalize page_hash and zip_hash, - if the new size is too different */ - if (!warning && new_size_too_diff) { - buf_resize_status("Resizing hash table"); - resize_hash(); - ib::info() << "hash tables were resized"; - } - mysql_mutex_unlock(&mutex); - write_unlock_all_page_hash(); + page_hash.write_unlock_all(); UT_DELETE(chunk_map_old); diff --git a/storage/innobase/include/buf0buf.h b/storage/innobase/include/buf0buf.h index e0e6c581442..5a118df48e1 100644 --- a/storage/innobase/include/buf0buf.h +++ b/storage/innobase/include/buf0buf.h @@ -1895,22 +1895,14 @@ public: page_hash_latch *lock_get(ulint fold) const { return lock_get(fold, n_cells); } - /** Acquire an array latch, tolerating concurrent buf_pool_t::resize() + /** Acquire an array latch. @tparam exclusive whether the latch is to be acquired exclusively @param fold hash bucket key */ template<bool exclusive> page_hash_latch *lock(ulint fold) { - for (;;) - { - auto n= n_cells; - page_hash_latch *latch= lock_get(fold, n); - latch->acquire<exclusive>(); - /* Our latch prevents n_cells from changing. */ - if (UNIV_LIKELY(n == n_cells)) - return latch; - /* Retry, because buf_pool_t::resize_hash() affected us. */ - latch->release<exclusive>(); - } + page_hash_latch *latch= lock_get(fold, n_cells); + latch->acquire<exclusive>(); + return latch; } /** Exclusively aqcuire all latches */ @@ -1920,19 +1912,6 @@ public: inline void write_unlock_all(); }; -private: - /** Former page_hash that has been deleted during resize(); - singly-linked list via freed_page_hash->array[1] */ - page_hash_table *freed_page_hash; - - /** Lock all page_hash, also freed_page_hash. */ - inline void write_lock_all_page_hash(); - /** Release all page_hash, also freed_page_hash. */ - inline void write_unlock_all_page_hash(); - /** Resize page_hash and zip_hash. */ - inline void resize_hash(); - -public: /** Hash table of file pages (buf_page_t::in_file() holds), indexed by page_id_t. Protected by both mutex and page_hash.lock_get(). */ page_hash_table page_hash; |