diff options
author | Marko Mäkelä <marko.makela@mariadb.com> | 2022-03-24 16:09:04 +0200 |
---|---|---|
committer | Marko Mäkelä <marko.makela@mariadb.com> | 2022-03-24 16:09:04 +0200 |
commit | 8684af76e34641331695860fc13eb9fc2ff94215 (patch) | |
tree | fcedb69048f9462ddfb03dc7300566966b63f017 | |
parent | 5ccd845d51d80a2f94dc796037f6d2aef106f75d (diff) | |
download | mariadb-git-8684af76e34641331695860fc13eb9fc2ff94215.tar.gz |
MDEV-28137 Some memory transactions are unnecessarily complex
buf_page_get_zip(): Do not perform a system call inside a
memory transaction. Instead, if the page latch is unavailable,
abort the memory transaction and let the fall-back code path
wait for the page latch.
buf_pool_t::watch_remove(): Return the previous state of the block.
buf_page_init_for_read(): Use regular stores for moving the
buffer fix count of watch_remove() to the new block descriptor.
A more extensive version of this was reviewed by Daniel Black
and tested with Intel TSX-NI by Axel Schwenke and Matthias Leich.
My assumption that regular loads and stores would execute faster
in a memory transaction than operations like std::atomic::fetch_add()
turned out to be incorrect.
-rw-r--r-- | storage/innobase/buf/buf0buf.cc | 41 | ||||
-rw-r--r-- | storage/innobase/buf/buf0rea.cc | 57 | ||||
-rw-r--r-- | storage/innobase/include/buf0buf.h | 9 |
3 files changed, 71 insertions, 36 deletions
diff --git a/storage/innobase/buf/buf0buf.cc b/storage/innobase/buf/buf0buf.cc index e456389149a..3f4f7888315 100644 --- a/storage/innobase/buf/buf0buf.cc +++ b/storage/innobase/buf/buf0buf.cc @@ -2269,29 +2269,66 @@ buf_page_t* buf_page_get_zip(const page_id_t page_id, ulint zip_size) lookup: for (bool discard_attempted= false;;) { +#ifndef NO_ELISION + if (xbegin()) { - transactional_shared_lock_guard<page_hash_latch> g{hash_lock}; + if (hash_lock.is_locked()) + xabort(); bpage= buf_pool.page_hash.get(page_id, chain); if (!bpage || buf_pool.watch_is_sentinel(*bpage)) + { + xend(); goto must_read_page; + } + if (!bpage->zip.data) + { + /* There is no ROW_FORMAT=COMPRESSED page. */ + xend(); + return nullptr; + } + if (discard_attempted || !bpage->frame) + { + if (!bpage->lock.s_lock_try()) + xabort(); + xend(); + break; + } + xend(); + } + else +#endif + { + hash_lock.lock_shared(); + bpage= buf_pool.page_hash.get(page_id, chain); + if (!bpage || buf_pool.watch_is_sentinel(*bpage)) + { + hash_lock.unlock_shared(); + goto must_read_page; + } ut_ad(bpage->in_file()); ut_ad(page_id == bpage->id()); if (!bpage->zip.data) + { /* There is no ROW_FORMAT=COMPRESSED page. */ + hash_lock.unlock_shared(); return nullptr; + } if (discard_attempted || !bpage->frame) { - /* Even when we are holding a page_hash latch, it should be + /* Even when we are holding a hash_lock, it should be acceptable to wait for a page S-latch here, because buf_page_t::read_complete() will not wait for buf_pool.mutex, and because S-latch would not conflict with a U-latch that would be protecting buf_page_t::write_complete(). */ bpage->lock.s_lock(); + hash_lock.unlock_shared(); break; } + + hash_lock.unlock_shared(); } discard_attempted= true; diff --git a/storage/innobase/buf/buf0rea.cc b/storage/innobase/buf/buf0rea.cc index f47c983c650..436d2bd0aa3 100644 --- a/storage/innobase/buf/buf0rea.cc +++ b/storage/innobase/buf/buf0rea.cc @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 1995, 2017, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2015, 2021, MariaDB Corporation. +Copyright (c) 2015, 2022, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -50,19 +50,30 @@ i/o-fixed buffer blocks */ /** Remove the sentinel block for the watch before replacing it with a real block. watch_unset() or watch_occurred() will notice that the block has been replaced with the real block. -@param watch sentinel -@param chain locked hash table chain */ -inline void buf_pool_t::watch_remove(buf_page_t *watch, - buf_pool_t::hash_chain &chain) +@param w sentinel +@param chain locked hash table chain +@return w->state() */ +inline uint32_t buf_pool_t::watch_remove(buf_page_t *w, + buf_pool_t::hash_chain &chain) { mysql_mutex_assert_owner(&buf_pool.mutex); - ut_ad(page_hash.lock_get(chain).is_write_locked()); - ut_a(watch_is_sentinel(*watch)); - if (watch->buf_fix_count()) - page_hash.remove(chain, watch); - ut_ad(!watch->in_page_hash); - watch->set_state(buf_page_t::NOT_USED); - watch->id_= page_id_t(~0ULL); + ut_ad(xtest() || page_hash.lock_get(chain).is_write_locked()); + ut_ad(w >= &watch[0]); + ut_ad(w < &watch[array_elements(watch)]); + ut_ad(!w->in_zip_hash); + ut_ad(!w->zip.data); + + uint32_t s{w->state()}; + w->set_state(buf_page_t::NOT_USED); + ut_ad(s >= buf_page_t::UNFIXED); + ut_ad(s < buf_page_t::READ_FIX); + + if (~buf_page_t::LRU_MASK & s) + page_hash.remove(chain, w); + + ut_ad(!w->in_page_hash); + w->id_= page_id_t(~0ULL); + return s; } /** Initialize a page for read to the buffer buf_pool. If the page is @@ -139,14 +150,8 @@ static buf_page_t* buf_page_init_for_read(ulint mode, const page_id_t page_id, {buf_pool.page_hash.lock_get(chain)}; if (hash_page) - { - /* Preserve the reference count. */ - uint32_t buf_fix_count= hash_page->state(); - ut_a(buf_fix_count >= buf_page_t::UNFIXED); - ut_a(buf_fix_count < buf_page_t::READ_FIX); - buf_pool.watch_remove(hash_page, chain); - block->page.fix(buf_fix_count - buf_page_t::UNFIXED); - } + bpage->set_state(buf_pool.watch_remove(hash_page, chain) + + (buf_page_t::READ_FIX - buf_page_t::UNFIXED)); buf_pool.page_hash.append(chain, &block->page); } @@ -209,16 +214,8 @@ static buf_page_t* buf_page_init_for_read(ulint mode, const page_id_t page_id, {buf_pool.page_hash.lock_get(chain)}; if (hash_page) - { - /* Preserve the reference count. It can be 0 if - buf_pool_t::watch_unset() is executing concurrently, - waiting for buf_pool.mutex, which we are holding. */ - uint32_t buf_fix_count= hash_page->state(); - ut_a(buf_fix_count >= buf_page_t::UNFIXED); - ut_a(buf_fix_count < buf_page_t::READ_FIX); - bpage->fix(buf_fix_count - buf_page_t::UNFIXED); - buf_pool.watch_remove(hash_page, chain); - } + bpage->set_state(buf_pool.watch_remove(hash_page, chain) + + (buf_page_t::READ_FIX - buf_page_t::UNFIXED)); buf_pool.page_hash.append(chain, bpage); } diff --git a/storage/innobase/include/buf0buf.h b/storage/innobase/include/buf0buf.h index a7ecc888137..7c372379ad3 100644 --- a/storage/innobase/include/buf0buf.h +++ b/storage/innobase/include/buf0buf.h @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2013, 2021, MariaDB Corporation. +Copyright (c) 2013, 2022, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -1535,9 +1535,10 @@ public: /** Remove the sentinel block for the watch before replacing it with a real block. watch_unset() or watch_occurred() will notice that the block has been replaced with the real block. - @param watch sentinel - @param chain locked hash table chain */ - inline void watch_remove(buf_page_t *watch, hash_chain &chain); + @param w sentinel + @param chain locked hash table chain + @return w->state() */ + inline uint32_t watch_remove(buf_page_t *w, hash_chain &chain); /** @return whether less than 1/4 of the buffer pool is available */ TPOOL_SUPPRESS_TSAN |