summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMarko Mäkelä <marko.makela@mariadb.com>2022-03-24 16:09:04 +0200
committerMarko Mäkelä <marko.makela@mariadb.com>2022-03-24 16:09:04 +0200
commit8684af76e34641331695860fc13eb9fc2ff94215 (patch)
treefcedb69048f9462ddfb03dc7300566966b63f017
parent5ccd845d51d80a2f94dc796037f6d2aef106f75d (diff)
downloadmariadb-git-8684af76e34641331695860fc13eb9fc2ff94215.tar.gz
MDEV-28137 Some memory transactions are unnecessarily complex
buf_page_get_zip(): Do not perform a system call inside a memory transaction. Instead, if the page latch is unavailable, abort the memory transaction and let the fall-back code path wait for the page latch. buf_pool_t::watch_remove(): Return the previous state of the block. buf_page_init_for_read(): Use regular stores for moving the buffer fix count of watch_remove() to the new block descriptor. A more extensive version of this was reviewed by Daniel Black and tested with Intel TSX-NI by Axel Schwenke and Matthias Leich. My assumption that regular loads and stores would execute faster in a memory transaction than operations like std::atomic::fetch_add() turned out to be incorrect.
-rw-r--r--storage/innobase/buf/buf0buf.cc41
-rw-r--r--storage/innobase/buf/buf0rea.cc57
-rw-r--r--storage/innobase/include/buf0buf.h9
3 files changed, 71 insertions, 36 deletions
diff --git a/storage/innobase/buf/buf0buf.cc b/storage/innobase/buf/buf0buf.cc
index e456389149a..3f4f7888315 100644
--- a/storage/innobase/buf/buf0buf.cc
+++ b/storage/innobase/buf/buf0buf.cc
@@ -2269,29 +2269,66 @@ buf_page_t* buf_page_get_zip(const page_id_t page_id, ulint zip_size)
lookup:
for (bool discard_attempted= false;;)
{
+#ifndef NO_ELISION
+ if (xbegin())
{
- transactional_shared_lock_guard<page_hash_latch> g{hash_lock};
+ if (hash_lock.is_locked())
+ xabort();
bpage= buf_pool.page_hash.get(page_id, chain);
if (!bpage || buf_pool.watch_is_sentinel(*bpage))
+ {
+ xend();
goto must_read_page;
+ }
+ if (!bpage->zip.data)
+ {
+ /* There is no ROW_FORMAT=COMPRESSED page. */
+ xend();
+ return nullptr;
+ }
+ if (discard_attempted || !bpage->frame)
+ {
+ if (!bpage->lock.s_lock_try())
+ xabort();
+ xend();
+ break;
+ }
+ xend();
+ }
+ else
+#endif
+ {
+ hash_lock.lock_shared();
+ bpage= buf_pool.page_hash.get(page_id, chain);
+ if (!bpage || buf_pool.watch_is_sentinel(*bpage))
+ {
+ hash_lock.unlock_shared();
+ goto must_read_page;
+ }
ut_ad(bpage->in_file());
ut_ad(page_id == bpage->id());
if (!bpage->zip.data)
+ {
/* There is no ROW_FORMAT=COMPRESSED page. */
+ hash_lock.unlock_shared();
return nullptr;
+ }
if (discard_attempted || !bpage->frame)
{
- /* Even when we are holding a page_hash latch, it should be
+ /* Even when we are holding a hash_lock, it should be
acceptable to wait for a page S-latch here, because
buf_page_t::read_complete() will not wait for buf_pool.mutex,
and because S-latch would not conflict with a U-latch
that would be protecting buf_page_t::write_complete(). */
bpage->lock.s_lock();
+ hash_lock.unlock_shared();
break;
}
+
+ hash_lock.unlock_shared();
}
discard_attempted= true;
diff --git a/storage/innobase/buf/buf0rea.cc b/storage/innobase/buf/buf0rea.cc
index f47c983c650..436d2bd0aa3 100644
--- a/storage/innobase/buf/buf0rea.cc
+++ b/storage/innobase/buf/buf0rea.cc
@@ -1,7 +1,7 @@
/*****************************************************************************
Copyright (c) 1995, 2017, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2015, 2021, MariaDB Corporation.
+Copyright (c) 2015, 2022, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -50,19 +50,30 @@ i/o-fixed buffer blocks */
/** Remove the sentinel block for the watch before replacing it with a
real block. watch_unset() or watch_occurred() will notice
that the block has been replaced with the real block.
-@param watch sentinel
-@param chain locked hash table chain */
-inline void buf_pool_t::watch_remove(buf_page_t *watch,
- buf_pool_t::hash_chain &chain)
+@param w sentinel
+@param chain locked hash table chain
+@return w->state() */
+inline uint32_t buf_pool_t::watch_remove(buf_page_t *w,
+ buf_pool_t::hash_chain &chain)
{
mysql_mutex_assert_owner(&buf_pool.mutex);
- ut_ad(page_hash.lock_get(chain).is_write_locked());
- ut_a(watch_is_sentinel(*watch));
- if (watch->buf_fix_count())
- page_hash.remove(chain, watch);
- ut_ad(!watch->in_page_hash);
- watch->set_state(buf_page_t::NOT_USED);
- watch->id_= page_id_t(~0ULL);
+ ut_ad(xtest() || page_hash.lock_get(chain).is_write_locked());
+ ut_ad(w >= &watch[0]);
+ ut_ad(w < &watch[array_elements(watch)]);
+ ut_ad(!w->in_zip_hash);
+ ut_ad(!w->zip.data);
+
+ uint32_t s{w->state()};
+ w->set_state(buf_page_t::NOT_USED);
+ ut_ad(s >= buf_page_t::UNFIXED);
+ ut_ad(s < buf_page_t::READ_FIX);
+
+ if (~buf_page_t::LRU_MASK & s)
+ page_hash.remove(chain, w);
+
+ ut_ad(!w->in_page_hash);
+ w->id_= page_id_t(~0ULL);
+ return s;
}
/** Initialize a page for read to the buffer buf_pool. If the page is
@@ -139,14 +150,8 @@ static buf_page_t* buf_page_init_for_read(ulint mode, const page_id_t page_id,
{buf_pool.page_hash.lock_get(chain)};
if (hash_page)
- {
- /* Preserve the reference count. */
- uint32_t buf_fix_count= hash_page->state();
- ut_a(buf_fix_count >= buf_page_t::UNFIXED);
- ut_a(buf_fix_count < buf_page_t::READ_FIX);
- buf_pool.watch_remove(hash_page, chain);
- block->page.fix(buf_fix_count - buf_page_t::UNFIXED);
- }
+ bpage->set_state(buf_pool.watch_remove(hash_page, chain) +
+ (buf_page_t::READ_FIX - buf_page_t::UNFIXED));
buf_pool.page_hash.append(chain, &block->page);
}
@@ -209,16 +214,8 @@ static buf_page_t* buf_page_init_for_read(ulint mode, const page_id_t page_id,
{buf_pool.page_hash.lock_get(chain)};
if (hash_page)
- {
- /* Preserve the reference count. It can be 0 if
- buf_pool_t::watch_unset() is executing concurrently,
- waiting for buf_pool.mutex, which we are holding. */
- uint32_t buf_fix_count= hash_page->state();
- ut_a(buf_fix_count >= buf_page_t::UNFIXED);
- ut_a(buf_fix_count < buf_page_t::READ_FIX);
- bpage->fix(buf_fix_count - buf_page_t::UNFIXED);
- buf_pool.watch_remove(hash_page, chain);
- }
+ bpage->set_state(buf_pool.watch_remove(hash_page, chain) +
+ (buf_page_t::READ_FIX - buf_page_t::UNFIXED));
buf_pool.page_hash.append(chain, bpage);
}
diff --git a/storage/innobase/include/buf0buf.h b/storage/innobase/include/buf0buf.h
index a7ecc888137..7c372379ad3 100644
--- a/storage/innobase/include/buf0buf.h
+++ b/storage/innobase/include/buf0buf.h
@@ -1,7 +1,7 @@
/*****************************************************************************
Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2013, 2021, MariaDB Corporation.
+Copyright (c) 2013, 2022, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -1535,9 +1535,10 @@ public:
/** Remove the sentinel block for the watch before replacing it with a
real block. watch_unset() or watch_occurred() will notice
that the block has been replaced with the real block.
- @param watch sentinel
- @param chain locked hash table chain */
- inline void watch_remove(buf_page_t *watch, hash_chain &chain);
+ @param w sentinel
+ @param chain locked hash table chain
+ @return w->state() */
+ inline uint32_t watch_remove(buf_page_t *w, hash_chain &chain);
/** @return whether less than 1/4 of the buffer pool is available */
TPOOL_SUPPRESS_TSAN