summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMarko Mäkelä <marko.makela@mariadb.com>2021-10-22 12:38:45 +0300
committerMarko Mäkelä <marko.makela@mariadb.com>2021-10-22 12:38:45 +0300
commit1f02280904fcfbb2bd86404d1c85c025634f8c9d (patch)
treed4a7f9ea0a2265ee629fccbcc04be92926694686
parentc091a0bc8da87045f10bfc96618ed7194768fa2d (diff)
downloadmariadb-git-1f02280904fcfbb2bd86404d1c85c025634f8c9d.tar.gz
MDEV-26769 InnoDB does not support hardware lock elision
This implements memory transaction support for: * Intel Restricted Transactional Memory (RTM), also known as TSX-NI (Transactional Synchronization Extensions New Instructions) * POWER v2.09 Hardware Trace Monitor (HTM) on GNU/Linux transactional_lock_guard, transactional_shared_lock_guard: RAII lock guards that try to elide the lock acquisition when transactional memory is available. buf_pool.page_hash: Try to elide latches whenever feasible. Related to the InnoDB change buffer and ROW_FORMAT=COMPRESSED tables, this is not always possible. In buf_page_get_low(), memory transactions only work reasonably well for validating a guessed block address. TMLockGuard, TMLockTrxGuard, TMLockMutexGuard: RAII lock guards that try to elide lock_sys.latch and related latches.
-rw-r--r--storage/innobase/btr/btr0btr.cc1
-rw-r--r--storage/innobase/btr/btr0cur.cc16
-rw-r--r--storage/innobase/btr/btr0sea.cc42
-rw-r--r--storage/innobase/buf/buf0block_hint.cc8
-rw-r--r--storage/innobase/buf/buf0buddy.cc9
-rw-r--r--storage/innobase/buf/buf0buf.cc583
-rw-r--r--storage/innobase/buf/buf0lru.cc17
-rw-r--r--storage/innobase/buf/buf0rea.cc109
-rw-r--r--storage/innobase/dict/dict0crea.cc2
-rw-r--r--storage/innobase/dict/dict0dict.cc2
-rw-r--r--storage/innobase/gis/gis0sea.cc3
-rw-r--r--storage/innobase/handler/ha_innodb.cc16
-rw-r--r--storage/innobase/ibuf/ibuf0ibuf.cc3
-rw-r--r--storage/innobase/include/btr0sea.h12
-rw-r--r--storage/innobase/include/buf0buf.h53
-rw-r--r--storage/innobase/include/buf0types.h49
-rw-r--r--storage/innobase/include/dict0mem.h8
-rw-r--r--storage/innobase/include/lock0lock.h188
-rw-r--r--storage/innobase/include/lock0priv.h7
-rw-r--r--storage/innobase/include/lock0priv.ic7
-rw-r--r--storage/innobase/include/rw_lock.h12
-rw-r--r--storage/innobase/include/srw_lock.h55
-rw-r--r--storage/innobase/include/transactional_lock_guard.h167
-rw-r--r--storage/innobase/include/trx0trx.h3
-rw-r--r--storage/innobase/lock/lock0lock.cc280
-rw-r--r--storage/innobase/log/log0recv.cc1
-rw-r--r--storage/innobase/row/row0ins.cc3
-rw-r--r--storage/innobase/srv/srv0srv.cc17
-rw-r--r--storage/innobase/sync/srw_lock.cc71
-rw-r--r--storage/innobase/trx/trx0purge.cc54
-rw-r--r--storage/innobase/trx/trx0rec.cc22
-rw-r--r--storage/innobase/trx/trx0trx.cc23
32 files changed, 1225 insertions, 618 deletions
diff --git a/storage/innobase/btr/btr0btr.cc b/storage/innobase/btr/btr0btr.cc
index 02aa89361c5..e02df95b641 100644
--- a/storage/innobase/btr/btr0btr.cc
+++ b/storage/innobase/btr/btr0btr.cc
@@ -1069,6 +1069,7 @@ top_loop:
/** Clear the index tree and reinitialize the root page, in the
rollback of TRX_UNDO_EMPTY. The BTR_SEG_LEAF is freed and reinitialized.
@param thr query thread */
+TRANSACTIONAL_TARGET
void dict_index_t::clear(que_thr_t *thr)
{
mtr_t mtr;
diff --git a/storage/innobase/btr/btr0cur.cc b/storage/innobase/btr/btr0cur.cc
index c61799b8b12..435b62a7493 100644
--- a/storage/innobase/btr/btr0cur.cc
+++ b/storage/innobase/btr/btr0cur.cc
@@ -1214,6 +1214,7 @@ If mode is PAGE_CUR_LE , cursor is left at the place where an insert of the
search tuple should be performed in the B-tree. InnoDB does an insert
immediately after the cursor. Thus, the cursor may end up on a user record,
or on a page infimum record. */
+TRANSACTIONAL_TARGET
dberr_t
btr_cur_search_to_nth_level_func(
dict_index_t* index, /*!< in: index */
@@ -1994,16 +1995,15 @@ retry_page_get:
&& mode != PAGE_CUR_RTREE_INSERT
&& mode != PAGE_CUR_RTREE_LOCATE
&& mode >= PAGE_CUR_CONTAIN) {
- trx_t* trx = thr_get_trx(cursor->thr);
lock_prdt_t prdt;
- lock_sys.rd_lock(SRW_LOCK_CALL);
- trx->mutex_lock();
- lock_init_prdt_from_mbr(
- &prdt, &cursor->rtr_info->mbr, mode,
- trx->lock.lock_heap);
- lock_sys.rd_unlock();
- trx->mutex_unlock();
+ {
+ trx_t* trx = thr_get_trx(cursor->thr);
+ TMLockTrxGuard g{TMLockTrxArgs(*trx)};
+ lock_init_prdt_from_mbr(
+ &prdt, &cursor->rtr_info->mbr, mode,
+ trx->lock.lock_heap);
+ }
if (rw_latch == RW_NO_LATCH && height != 0) {
block->lock.s_lock();
diff --git a/storage/innobase/btr/btr0sea.cc b/storage/innobase/btr/btr0sea.cc
index c7a13181590..a59a54676ed 100644
--- a/storage/innobase/btr/btr0sea.cc
+++ b/storage/innobase/btr/btr0sea.cc
@@ -1007,6 +1007,7 @@ both have sensible values.
or NULL
@param[in] mtr mini transaction
@return whether the search succeeded */
+TRANSACTIONAL_TARGET
bool
btr_search_guess_on_hash(
dict_index_t* index,
@@ -1092,25 +1093,32 @@ fail:
if (!ahi_latch) {
buf_pool_t::hash_chain& chain = buf_pool.page_hash.cell_get(
block->page.id().fold());
- page_hash_latch&hash_lock = buf_pool.page_hash.lock_get(chain);
- hash_lock.read_lock();
-
- if (block->page.state() == BUF_BLOCK_REMOVE_HASH) {
- /* Another thread is just freeing the block
- from the LRU list of the buffer pool: do not
- try to access this page. */
- hash_lock.read_unlock();
- goto fail;
+ bool fail;
+ {
+ transactional_shared_lock_guard<page_hash_latch> g{
+ buf_pool.page_hash.lock_get(chain)};
+
+ switch (block->page.state()) {
+ case BUF_BLOCK_REMOVE_HASH:
+ /* Another thread is just freeing the block
+ from the LRU list of the buffer pool: do not
+ try to access this page. */
+ goto fail;
+ case BUF_BLOCK_FILE_PAGE:
+ break;
+ default:
+#ifndef NO_ELISION
+ xend();
+#endif
+ ut_error;
+ }
+
+ block->fix();
+ fail = index != block->index
+ && index_id == block->index->id;
}
- const bool fail = index != block->index
- && index_id == block->index->id;
ut_a(!fail || block->index->freed());
- ut_ad(block->page.state() == BUF_BLOCK_FILE_PAGE);
- DBUG_ASSERT(fail || block->page.status != buf_page_t::FREED);
-
- buf_block_buf_fix_inc(block);
- hash_lock.read_unlock();
block->page.set_accessed();
buf_page_make_young_if_needed(&block->page);
@@ -1137,6 +1145,8 @@ got_no_latch:
if (UNIV_UNLIKELY(fail)) {
goto fail_and_release_page;
}
+
+ DBUG_ASSERT(block->page.status != buf_page_t::FREED);
} else if (UNIV_UNLIKELY(index != block->index
&& index_id == block->index->id)) {
ut_a(block->index->freed());
diff --git a/storage/innobase/buf/buf0block_hint.cc b/storage/innobase/buf/buf0block_hint.cc
index 9fac76b77a5..00c968511b3 100644
--- a/storage/innobase/buf/buf0block_hint.cc
+++ b/storage/innobase/buf/buf0block_hint.cc
@@ -28,6 +28,7 @@ this program; if not, write to the Free Software Foundation, Inc.,
#include "buf0block_hint.h"
namespace buf {
+TRANSACTIONAL_TARGET
void Block_hint::buffer_fix_block_if_still_valid()
{
/* To check if m_block belongs to the current buf_pool, we must
@@ -47,14 +48,13 @@ void Block_hint::buffer_fix_block_if_still_valid()
if (m_block)
{
auto &cell= buf_pool.page_hash.cell_get(m_page_id.fold());
- page_hash_latch &latch= buf_pool.page_hash.lock_get(cell);
- latch.read_lock();
+ transactional_shared_lock_guard<page_hash_latch> g
+ {buf_pool.page_hash.lock_get(cell)};
if (buf_pool.is_uncompressed(m_block) && m_page_id == m_block->page.id() &&
m_block->page.state() == BUF_BLOCK_FILE_PAGE)
- buf_block_buf_fix_inc(m_block);
+ m_block->fix();
else
clear();
- latch.read_unlock();
}
}
} // namespace buf
diff --git a/storage/innobase/buf/buf0buddy.cc b/storage/innobase/buf/buf0buddy.cc
index 1e5cff4959f..6f4b4554518 100644
--- a/storage/innobase/buf/buf0buddy.cc
+++ b/storage/innobase/buf/buf0buddy.cc
@@ -548,7 +548,10 @@ static bool buf_buddy_relocate(void* src, void* dst, ulint i, bool force)
}
page_hash_latch &hash_lock = buf_pool.page_hash.lock_get(cell);
- hash_lock.write_lock();
+ /* It does not make sense to use transactional_lock_guard here,
+ because the memcpy() of 1024 to 16384 bytes would likely make the
+ memory transaction too large. */
+ hash_lock.lock();
if (bpage->can_relocate()) {
/* Relocate the compressed page. */
@@ -559,7 +562,7 @@ static bool buf_buddy_relocate(void* src, void* dst, ulint i, bool force)
memcpy(dst, src, size);
bpage->zip.data = reinterpret_cast<page_zip_t*>(dst);
- hash_lock.write_unlock();
+ hash_lock.unlock();
buf_buddy_mem_invalid(
reinterpret_cast<buf_buddy_free_t*>(src), i);
@@ -570,7 +573,7 @@ static bool buf_buddy_relocate(void* src, void* dst, ulint i, bool force)
return(true);
}
- hash_lock.write_unlock();
+ hash_lock.unlock();
return(false);
}
diff --git a/storage/innobase/buf/buf0buf.cc b/storage/innobase/buf/buf0buf.cc
index 64aa6f873cc..d03a92cc7a5 100644
--- a/storage/innobase/buf/buf0buf.cc
+++ b/storage/innobase/buf/buf0buf.cc
@@ -1339,7 +1339,11 @@ inline bool buf_pool_t::realloc(buf_block_t *block)
const page_id_t id{block->page.id()};
hash_chain& chain = page_hash.cell_get(id.fold());
page_hash_latch& hash_lock = page_hash.lock_get(chain);
- hash_lock.write_lock();
+ /* It does not make sense to use transactional_lock_guard
+ here, because copying innodb_page_size (4096 to 65536) bytes
+ as well as other changes would likely make the memory
+ transaction too large. */
+ hash_lock.lock();
if (block->page.can_relocate()) {
memcpy_aligned<OS_FILE_LOG_BLOCK_SIZE>(
@@ -1421,7 +1425,7 @@ inline bool buf_pool_t::realloc(buf_block_t *block)
new_block = block;
}
- hash_lock.write_unlock();
+ hash_lock.unlock();
buf_LRU_block_free_non_file_page(new_block);
return(true); /* free_list was enough */
}
@@ -1596,7 +1600,7 @@ inline void buf_pool_t::page_hash_table::write_lock_all()
{
for (auto n= pad(n_cells) & ~ELEMENTS_PER_LATCH;; n-= ELEMENTS_PER_LATCH + 1)
{
- reinterpret_cast<page_hash_latch&>(array[n]).write_lock();
+ reinterpret_cast<page_hash_latch&>(array[n]).lock();
if (!n)
break;
}
@@ -1607,7 +1611,7 @@ inline void buf_pool_t::page_hash_table::write_unlock_all()
{
for (auto n= pad(n_cells) & ~ELEMENTS_PER_LATCH;; n-= ELEMENTS_PER_LATCH + 1)
{
- reinterpret_cast<page_hash_latch&>(array[n]).write_unlock();
+ reinterpret_cast<page_hash_latch&>(array[n]).unlock();
if (!n)
break;
}
@@ -1742,6 +1746,8 @@ withdraw_retry:
{found, withdraw_started, my_hrtime_coarse()};
withdraw_started = current_time;
+ /* This is going to exceed the maximum size of a
+ memory transaction. */
LockMutexGuard g{SRW_LOCK_CALL};
trx_sys.trx_list.for_each(f);
}
@@ -2115,7 +2121,7 @@ retry:
return nullptr;
}
- page_hash.lock_get(chain).write_unlock();
+ page_hash.lock_get(chain).unlock();
/* Allocate a watch[] and then try to insert it into the page_hash. */
mysql_mutex_lock(&mutex);
@@ -2143,12 +2149,12 @@ retry:
if (UNIV_LIKELY_NULL(bpage))
{
w->set_state(BUF_BLOCK_NOT_USED);
- page_hash.lock_get(chain).write_lock();
+ page_hash.lock_get(chain).lock();
mysql_mutex_unlock(&mutex);
goto retry;
}
- page_hash.lock_get(chain).write_lock();
+ page_hash.lock_get(chain).lock();
ut_ad(!w->buf_fix_count_);
w->buf_fix_count_= 1;
buf_pool.page_hash.append(chain, w);
@@ -2165,45 +2171,55 @@ retry:
watch_set(id) must have returned nullptr before.
@param id page identifier
@param chain unlocked hash table chain */
+TRANSACTIONAL_TARGET
void buf_pool_t::watch_unset(const page_id_t id, buf_pool_t::hash_chain &chain)
{
mysql_mutex_assert_not_owner(&mutex);
- page_hash_latch &hash_lock= page_hash.lock_get(chain);
- hash_lock.write_lock();
- /* The page must exist because watch_set() increments buf_fix_count. */
- buf_page_t *w= page_hash.get(id, chain);
- const auto buf_fix_count= w->buf_fix_count();
- ut_ad(buf_fix_count);
- const bool must_remove= buf_fix_count == 1 && watch_is_sentinel(*w);
- if (!must_remove)
- w->unfix();
- hash_lock.write_unlock();
-
- if (must_remove)
+ buf_page_t *w;
{
- const auto old= w;
- /* The following is based on buf_pool_t::watch_remove(). */
- mysql_mutex_lock(&mutex);
+ transactional_lock_guard<page_hash_latch> g{page_hash.lock_get(chain)};
+ /* The page must exist because watch_set() increments buf_fix_count. */
w= page_hash.get(id, chain);
- hash_lock.write_lock();
+ const auto buf_fix_count= w->buf_fix_count();
+ ut_ad(buf_fix_count);
+ ut_ad(w->in_page_hash);
+ if (buf_fix_count != 1 || !watch_is_sentinel(*w))
+ {
+ w->unfix();
+ w= nullptr;
+ }
+ }
+
+ if (!w)
+ return;
+
+ const auto old= w;
+ /* The following is based on buf_pool_t::watch_remove(). */
+ mysql_mutex_lock(&mutex);
+ w= page_hash.get(id, chain);
+
+ {
+ transactional_lock_guard<page_hash_latch> g
+ {buf_pool.page_hash.lock_get(chain)};
if (w->unfix() == 0 && w == old)
{
page_hash.remove(chain, w);
- // Now that the watch is detached from page_hash, release it to watch[].
+ // Now that w is detached from page_hash, release it to watch[].
ut_ad(w->id_ == id);
ut_ad(!w->buf_fix_count());
ut_ad(w->state() == BUF_BLOCK_ZIP_PAGE);
w->set_state(BUF_BLOCK_NOT_USED);
}
- mysql_mutex_unlock(&mutex);
- hash_lock.write_unlock();
}
+
+ mysql_mutex_unlock(&mutex);
}
/** Mark the page status as FREED for the given tablespace and page number.
@param[in,out] space tablespace
@param[in] page page number
@param[in,out] mtr mini-transaction */
+TRANSACTIONAL_TARGET
void buf_page_free(fil_space_t *space, uint32_t page, mtr_t *mtr)
{
ut_ad(mtr);
@@ -2219,28 +2235,22 @@ void buf_page_free(fil_space_t *space, uint32_t page, mtr_t *mtr)
++buf_pool.stat.n_page_gets;
const page_id_t page_id(space->id, page);
buf_pool_t::hash_chain &chain= buf_pool.page_hash.cell_get(page_id.fold());
- page_hash_latch &hash_lock= buf_pool.page_hash.lock_get(chain);
- hash_lock.read_lock();
- if (buf_block_t *block= reinterpret_cast<buf_block_t*>
- (buf_pool.page_hash.get(page_id, chain)))
+ buf_block_t *block;
{
- if (block->page.state() != BUF_BLOCK_FILE_PAGE)
- /* FIXME: convert, but avoid buf_zip_decompress() */;
- else
- {
- buf_block_buf_fix_inc(block);
- ut_ad(block->page.buf_fix_count());
- hash_lock.read_unlock();
-
- mtr->memo_push(block, MTR_MEMO_PAGE_X_FIX);
- block->lock.x_lock();
-
- block->page.status= buf_page_t::FREED;
+ transactional_shared_lock_guard<page_hash_latch> g
+ {buf_pool.page_hash.lock_get(chain)};
+ block= reinterpret_cast<buf_block_t*>
+ (buf_pool.page_hash.get(page_id, chain));
+ if (!block || block->page.state() != BUF_BLOCK_FILE_PAGE)
+ /* FIXME: convert ROW_FORMAT=COMPRESSED, without buf_zip_decompress() */
return;
- }
+ block->fix();
}
+ ut_ad(block->page.buf_fix_count());
- hash_lock.read_unlock();
+ mtr->memo_push(block, MTR_MEMO_PAGE_X_FIX);
+ block->lock.x_lock();
+ block->page.status= buf_page_t::FREED;
}
/** Get read access to a compressed page (usually of type
@@ -2253,80 +2263,48 @@ the same set of mutexes or latches.
@param[in] page_id page id
@param[in] zip_size ROW_FORMAT=COMPRESSED page size
@return pointer to the block */
+TRANSACTIONAL_TARGET
buf_page_t* buf_page_get_zip(const page_id_t page_id, ulint zip_size)
{
ut_ad(zip_size);
ut_ad(ut_is_2pow(zip_size));
++buf_pool.stat.n_page_gets;
- bool discard_attempted= false;
buf_pool_t::hash_chain &chain= buf_pool.page_hash.cell_get(page_id.fold());
page_hash_latch &hash_lock= buf_pool.page_hash.lock_get(chain);
buf_page_t *bpage;
- for (;;)
- {
lookup:
- hash_lock.read_lock();
- bpage= buf_pool.page_hash.get(page_id, chain);
- if (bpage)
- break;
- hash_lock.read_unlock();
-
- dberr_t err= buf_read_page(page_id, zip_size);
-
- if (UNIV_UNLIKELY(err != DB_SUCCESS))
+ for (bool discard_attempted= false;;)
+ {
{
- ib::error() << "Reading compressed page " << page_id
- << " failed with error: " << err;
- goto err_exit;
- }
+ transactional_shared_lock_guard<page_hash_latch> g{hash_lock};
+ bpage= buf_pool.page_hash.get(page_id, chain);
+ if (!bpage || buf_pool.watch_is_sentinel(*bpage))
+ goto must_read_page;
-#ifdef UNIV_DEBUG
- if (!(++buf_dbg_counter % 5771)) buf_pool.validate();
-#endif /* UNIV_DEBUG */
- }
-
- if (!bpage->zip.data)
- {
- /* There is no compressed page. */
-err_exit:
- hash_lock.read_unlock();
- return nullptr;
- }
+ ut_ad(bpage->in_file());
+ ut_ad(page_id == bpage->id());
- ut_ad(!buf_pool.watch_is_sentinel(*bpage));
+ if (!bpage->zip.data)
+ /* There is no ROW_FORMAT=COMPRESSED page. */
+ return nullptr;
- switch (bpage->state()) {
- case BUF_BLOCK_FILE_PAGE:
- /* Discard the uncompressed page frame if possible. */
- if (!discard_attempted)
- {
- discard_attempted= true;
- hash_lock.read_unlock();
- mysql_mutex_lock(&buf_pool.mutex);
- if (buf_page_t *bpage= buf_pool.page_hash.get(page_id, chain))
- buf_LRU_free_page(bpage, false);
- mysql_mutex_unlock(&buf_pool.mutex);
- goto lookup;
+ if (discard_attempted || bpage->state() == BUF_BLOCK_ZIP_PAGE)
+ {
+ bpage->fix();
+ break;
+ }
}
- /* fall through */
- case BUF_BLOCK_ZIP_PAGE:
- bpage->fix();
- goto got_block;
- default:
- break;
- }
- ut_error;
- goto err_exit;
-
-got_block:
- bool must_read= bpage->io_fix() == BUF_IO_READ;
- hash_lock.read_unlock();
+ discard_attempted= true;
+ mysql_mutex_lock(&buf_pool.mutex);
+ if (buf_page_t *bpage= buf_pool.page_hash.get(page_id, chain))
+ buf_LRU_free_page(bpage, false);
+ mysql_mutex_unlock(&buf_pool.mutex);
+ }
DBUG_ASSERT(bpage->status != buf_page_t::FREED);
-
bpage->set_accessed();
buf_page_make_young_if_needed(bpage);
@@ -2336,12 +2314,19 @@ got_block:
ut_ad(bpage->buf_fix_count());
ut_ad(bpage->in_file());
- if (must_read)
- /* Let us wait until the read operation completes */
- while (bpage->io_fix() == BUF_IO_READ)
- std::this_thread::sleep_for(WAIT_FOR_READ);
-
+ /* Let us wait until the read operation completes */
+ while (bpage->io_fix() == BUF_IO_READ)
+ std::this_thread::sleep_for(WAIT_FOR_READ);
return bpage;
+
+must_read_page:
+ if (dberr_t err= buf_read_page(page_id, zip_size))
+ {
+ ib::error() << "Reading compressed page " << page_id
+ << " failed with error: " << err;
+ return nullptr;
+ }
+ goto lookup;
}
/********************************************************************//**
@@ -2493,6 +2478,7 @@ while reading the page from file
then it makes sure that it does merging of change buffer changes while
reading the page from file.
@return pointer to the block or NULL */
+TRANSACTIONAL_TARGET
buf_block_t*
buf_page_get_low(
const page_id_t page_id,
@@ -2504,7 +2490,6 @@ buf_page_get_low(
dberr_t* err,
bool allow_ibuf_merge)
{
- buf_block_t* block;
unsigned access_time;
ulint retries = 0;
@@ -2561,146 +2546,137 @@ buf_page_get_low(
auto& chain= buf_pool.page_hash.cell_get(page_id.fold());
page_hash_latch& hash_lock = buf_pool.page_hash.lock_get(chain);
loop:
- buf_block_t* fix_block;
- hash_lock.read_lock();
-
- /* If the guess is a compressed page descriptor that
- has been allocated by buf_page_alloc_descriptor(),
- it may have been freed by buf_relocate(). */
-
- if (guess && buf_pool.is_uncompressed(guess)
- && page_id == guess->page.id()
- && guess->page.state() == BUF_BLOCK_FILE_PAGE) {
- ut_ad(!guess->page.in_zip_hash);
- block = guess;
- goto have_block;
+ buf_block_t* block = guess;
+
+ if (block) {
+ transactional_shared_lock_guard<page_hash_latch> g{hash_lock};
+ if (buf_pool.is_uncompressed(block)
+ && page_id == block->page.id()
+ && block->page.state() == BUF_BLOCK_FILE_PAGE) {
+ ut_ad(!block->page.in_zip_hash);
+ block->fix();
+ goto got_block;
+ }
}
guess = nullptr;
+
+ /* A memory transaction would frequently be aborted here. */
+ hash_lock.lock_shared();
block = reinterpret_cast<buf_block_t*>(
buf_pool.page_hash.get(page_id, chain));
+ if (UNIV_LIKELY(block
+ && !buf_pool.watch_is_sentinel(block->page))) {
+ block->fix();
+ hash_lock.unlock_shared();
+ goto got_block;
+ }
+ hash_lock.unlock_shared();
- if (block && !buf_pool.watch_is_sentinel(block->page)) {
-have_block:
- fix_block = block;
- } else {
- hash_lock.read_unlock();
- fix_block = block = nullptr;
-
- /* Page not in buf_pool: needs to be read from file */
- if (mode == BUF_GET_IF_IN_POOL_OR_WATCH) {
- hash_lock.write_lock();
-
- if (buf_page_t *bpage= buf_pool.watch_set(page_id,
- chain)) {
- /* We can release hash_lock after we
- increment the fix count to make
- sure that no state change takes place. */
- bpage->fix();
- hash_lock.write_unlock();
- block = reinterpret_cast<buf_block_t*>(bpage);
- fix_block = block;
- goto got_block;
- }
-
- hash_lock.write_unlock();
+ /* Page not in buf_pool: needs to be read from file */
+ switch (mode) {
+ case BUF_GET_IF_IN_POOL:
+ case BUF_PEEK_IF_IN_POOL:
+ case BUF_EVICT_IF_IN_POOL:
+ return nullptr;
+ case BUF_GET_IF_IN_POOL_OR_WATCH:
+ /* We cannot easily use a memory transaction here. */
+ hash_lock.lock();
+ block = reinterpret_cast<buf_block_t*>
+ (buf_pool.watch_set(page_id, chain));
+ if (block) {
+ /* buffer-fixing prevents block->page.state()
+ changes */
+ block->fix();
}
+ hash_lock.unlock();
- switch (mode) {
- case BUF_GET_IF_IN_POOL:
- case BUF_GET_IF_IN_POOL_OR_WATCH:
- case BUF_PEEK_IF_IN_POOL:
- case BUF_EVICT_IF_IN_POOL:
- return(NULL);
+ if (block) {
+ goto got_block;
}
- /* The call path is buf_read_page() ->
- buf_read_page_low() (fil_space_t::io()) ->
- buf_page_read_complete() ->
- buf_decrypt_after_read(). Here fil_space_t* is used
- and we decrypt -> buf_page_check_corrupt() where page
- checksums are compared. Decryption, decompression as
- well as error handling takes place at a lower level.
- Here we only need to know whether the page really is
- corrupted, or if an encrypted page with a valid
- checksum cannot be decypted. */
-
- dberr_t local_err = buf_read_page(page_id, zip_size);
-
- if (local_err == DB_SUCCESS) {
- buf_read_ahead_random(page_id, zip_size,
- ibuf_inside(mtr));
-
- retries = 0;
- } else if (mode == BUF_GET_POSSIBLY_FREED) {
+ return nullptr;
+ }
+
+ /* The call path is buf_read_page() ->
+ buf_read_page_low() (fil_space_t::io()) ->
+ buf_page_read_complete() ->
+ buf_decrypt_after_read(). Here fil_space_t* is used
+ and we decrypt -> buf_page_check_corrupt() where page
+ checksums are compared. Decryption, decompression as
+ well as error handling takes place at a lower level.
+ Here we only need to know whether the page really is
+ corrupted, or if an encrypted page with a valid
+ checksum cannot be decypted. */
+
+ if (dberr_t local_err = buf_read_page(page_id, zip_size)) {
+ if (mode == BUF_GET_POSSIBLY_FREED) {
if (err) {
*err = local_err;
}
- return NULL;
+ return nullptr;
} else if (retries < BUF_PAGE_READ_MAX_RETRIES) {
++retries;
-
- DBUG_EXECUTE_IF(
- "innodb_page_corruption_retries",
- retries = BUF_PAGE_READ_MAX_RETRIES;
- );
+ DBUG_EXECUTE_IF("innodb_page_corruption_retries",
+ retries = BUF_PAGE_READ_MAX_RETRIES;);
} else {
if (err) {
*err = local_err;
}
-
- /* Pages whose encryption key is unavailable or used
- key, encryption algorithm or encryption method is
- incorrect are marked as encrypted in
+ /* Pages whose encryption key is unavailable or the
+ configured key, encryption algorithm or encryption
+ method are incorrect are marked as encrypted in
buf_page_check_corrupt(). Unencrypted page could be
corrupted in a way where the key_id field is
nonzero. There is no checksum on field
FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION. */
- if (local_err == DB_DECRYPTION_FAILED) {
- return (NULL);
- }
-
- if (local_err == DB_PAGE_CORRUPTED
- && srv_force_recovery) {
- return NULL;
+ switch (local_err) {
+ case DB_PAGE_CORRUPTED:
+ if (!srv_force_recovery) {
+ break;
+ }
+ /* fall through */
+ case DB_DECRYPTION_FAILED:
+ return nullptr;
+ default:
+ break;
}
/* Try to set table as corrupted instead of
asserting. */
if (page_id.space() == TRX_SYS_SPACE) {
} else if (page_id.space() == SRV_TMP_SPACE_ID) {
- } else if (fil_space_t* space= fil_space_t::get(
- page_id.space())) {
+ } else if (fil_space_t* space
+ = fil_space_t::get(page_id.space())) {
bool set = dict_set_corrupted_by_space(space);
space->release();
if (set) {
- return NULL;
+ return nullptr;
}
}
if (local_err == DB_IO_ERROR) {
- return NULL;
+ return nullptr;
}
ib::fatal() << "Unable to read page " << page_id
- << " into the buffer pool after "
- << BUF_PAGE_READ_MAX_RETRIES
- << ". The most probable cause"
+ << " into the buffer pool after "
+ << BUF_PAGE_READ_MAX_RETRIES
+ << ". The most probable cause"
" of this error may be that the"
" table has been corrupted."
" See https://mariadb.com/kb/en/library/innodb-recovery-modes/";
}
-
-#ifdef UNIV_DEBUG
- if (!(++buf_dbg_counter % 5771)) buf_pool.validate();
-#endif /* UNIV_DEBUG */
- goto loop;
+ } else {
+ buf_read_ahead_random(page_id, zip_size, ibuf_inside(mtr));
+ retries = 0;
}
- fix_block->fix();
- hash_lock.read_unlock();
+ ut_d(if (!(++buf_dbg_counter % 5771)) buf_pool.validate());
+ goto loop;
got_block:
+ ut_ad(!block->page.in_zip_hash);
switch (mode) {
default:
ut_ad(block->zip_size() == zip_size);
@@ -2708,23 +2684,23 @@ got_block:
case BUF_GET_IF_IN_POOL:
case BUF_PEEK_IF_IN_POOL:
case BUF_EVICT_IF_IN_POOL:
- if (fix_block->page.io_fix() == BUF_IO_READ) {
+ if (block->page.io_fix() == BUF_IO_READ) {
/* The page is being read to buffer pool,
but we cannot wait around for the read to
complete. */
- fix_block->unfix();
+ block->unfix();
return(NULL);
}
}
- switch (UNIV_EXPECT(fix_block->page.state(), BUF_BLOCK_FILE_PAGE)) {
+ switch (UNIV_EXPECT(block->page.state(), BUF_BLOCK_FILE_PAGE)) {
case BUF_BLOCK_FILE_PAGE:
if (fsp_is_system_temporary(page_id.space())
&& block->page.io_fix() != BUF_IO_NONE) {
/* This suggests that the page is being flushed.
Avoid returning reference to this page.
Instead wait for the flush action to complete. */
- fix_block->unfix();
+ block->unfix();
std::this_thread::sleep_for(
std::chrono::microseconds(WAIT_FOR_WRITE));
goto loop;
@@ -2732,11 +2708,11 @@ got_block:
if (UNIV_UNLIKELY(mode == BUF_EVICT_IF_IN_POOL)) {
evict_from_pool:
- ut_ad(!fix_block->page.oldest_modification());
+ ut_ad(!block->page.oldest_modification());
mysql_mutex_lock(&buf_pool.mutex);
- fix_block->unfix();
+ block->unfix();
- if (!buf_LRU_free_page(&fix_block->page, true)) {
+ if (!buf_LRU_free_page(&block->page, true)) {
ut_ad(0);
}
@@ -2759,7 +2735,7 @@ evict_from_pool:
adaptive hash index. There cannot be an
adaptive hash index for a compressed-only
page, so do not bother decompressing the page. */
- fix_block->unfix();
+ block->unfix();
return(NULL);
}
@@ -2773,7 +2749,7 @@ evict_from_pool:
/* This condition often occurs when the buffer
is not buffer-fixed, but I/O-fixed by
buf_page_init_for_read(). */
- fix_block->unfix();
+ block->unfix();
/* The block is buffer-fixed or I/O-fixed.
Try again later. */
@@ -2786,16 +2762,21 @@ evict_from_pool:
or relocated while we are attempting to allocate an
uncompressed page. */
- block = buf_LRU_get_free_block(false);
- buf_block_init_low(block);
+ buf_block_t *new_block = buf_LRU_get_free_block(false);
+ buf_block_init_low(new_block);
mysql_mutex_lock(&buf_pool.mutex);
- hash_lock.write_lock();
+ page_hash_latch& hash_lock=buf_pool.page_hash.lock_get(chain);
+
+ /* It does not make sense to use
+ transactional_lock_guard here, because buf_relocate()
+ would likely make a memory transaction too large. */
+ hash_lock.lock();
/* Buffer-fixing prevents the page_hash from changing. */
ut_ad(bpage == buf_pool.page_hash.get(page_id, chain));
- fix_block->unfix(); /* hash_lock protects us after this */
+ block->unfix(); /* hash_lock protects us after this */
if (bpage->buf_fix_count() || bpage->io_fix() != BUF_IO_NONE) {
/* The block was buffer-fixed or I/O-fixed while
@@ -2804,15 +2785,15 @@ evict_from_pool:
This should be extremely unlikely, for example,
if buf_page_get_zip() was invoked. */
- hash_lock.write_unlock();
- buf_LRU_block_free_non_file_page(block);
+ hash_lock.unlock();
+ buf_LRU_block_free_non_file_page(new_block);
mysql_mutex_unlock(&buf_pool.mutex);
/* Try again */
goto loop;
}
- fix_block = block;
+ block = new_block;
/* Move the compressed page from bpage to block,
and uncompress it. */
@@ -2843,7 +2824,7 @@ evict_from_pool:
MEM_UNDEFINED(bpage, sizeof *bpage);
mysql_mutex_unlock(&buf_pool.mutex);
- hash_lock.write_unlock();
+ hash_lock.unlock();
buf_pool.n_pend_unzip++;
access_time = block->page.is_accessed();
@@ -2859,9 +2840,9 @@ evict_from_pool:
buf_pool.mutex. */
if (!buf_zip_decompress(block, false)) {
- fix_block->lock.x_unlock();
- fix_block->page.io_unfix();
- fix_block->unfix();
+ block->lock.x_unlock();
+ block->page.io_unfix();
+ block->unfix();
--buf_pool.n_pend_unzip;
if (err) {
@@ -2870,16 +2851,14 @@ evict_from_pool:
return NULL;
}
+ block->page.io_unfix();
block->lock.x_unlock();
- fix_block->page.io_unfix();
--buf_pool.n_pend_unzip;
- break;
}
- ut_ad(block == fix_block);
- ut_ad(fix_block->page.buf_fix_count());
+ ut_ad(block->page.buf_fix_count());
- ut_ad(fix_block->page.state() == BUF_BLOCK_FILE_PAGE);
+ ut_ad(block->page.state() == BUF_BLOCK_FILE_PAGE);
#if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG
re_evict:
@@ -2892,15 +2871,17 @@ re_evict:
mysql_mutex_lock(&buf_pool.mutex);
- fix_block->unfix();
+ block->unfix();
/* Blocks cannot be relocated or enter or exit the
buf_pool while we are holding the buf_pool.mutex. */
- const bool evicted = buf_LRU_free_page(&fix_block->page, true);
+ const bool evicted = buf_LRU_free_page(&block->page, true);
space->release();
if (evicted) {
- hash_lock.write_lock();
+ page_hash_latch& hash_lock
+ = buf_pool.page_hash.lock_get(chain);
+ hash_lock.lock();
mysql_mutex_unlock(&buf_pool.mutex);
/* We may set the watch, as it would have
been set if the page were not in the
@@ -2909,31 +2890,19 @@ re_evict:
mode == BUF_GET_IF_IN_POOL_OR_WATCH
? buf_pool.watch_set(page_id, chain)
: buf_pool.page_hash.get(page_id, chain));
- hash_lock.write_unlock();
-
- if (block != NULL) {
- /* Either the page has been read in or
- a watch was set on that in the window
- where we released the buf_pool.mutex
- and before we acquire the hash_lock
- above. Try again. */
- guess = block;
-
- goto loop;
- }
-
+ hash_lock.unlock();
return(NULL);
}
- fix_block->fix();
+ block->fix();
mysql_mutex_unlock(&buf_pool.mutex);
buf_flush_list();
buf_flush_wait_batch_end_acquiring_mutex(false);
while (buf_flush_list_space(space));
os_aio_wait_until_no_pending_writes();
- if (fix_block->page.buf_fix_count() == 1
- && !fix_block->page.oldest_modification()) {
+ if (block->page.buf_fix_count() == 1
+ && !block->page.oldest_modification()) {
goto re_evict;
}
@@ -2941,7 +2910,7 @@ re_evict:
}
#endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */
- ut_ad(fix_block->page.buf_fix_count());
+ ut_ad(block->page.buf_fix_count());
/* While tablespace is reinited the indexes are already freed but the
blocks related to it still resides in buffer pool. Trying to remove
@@ -2952,25 +2921,25 @@ re_evict:
"btr_search_drop_page_hash_when_freed". */
ut_ad(mode == BUF_GET_POSSIBLY_FREED
|| mode == BUF_PEEK_IF_IN_POOL
- || fix_block->page.status != buf_page_t::FREED);
+ || block->page.status != buf_page_t::FREED);
- const bool not_first_access = fix_block->page.set_accessed();
+ const bool not_first_access = block->page.set_accessed();
if (mode != BUF_PEEK_IF_IN_POOL) {
- buf_page_make_young_if_needed(&fix_block->page);
+ buf_page_make_young_if_needed(&block->page);
}
#ifdef UNIV_DEBUG
if (!(++buf_dbg_counter % 5771)) buf_pool.validate();
#endif /* UNIV_DEBUG */
- ut_ad(fix_block->page.state() == BUF_BLOCK_FILE_PAGE);
+ ut_ad(block->page.state() == BUF_BLOCK_FILE_PAGE);
/* We have to wait here because the IO_READ state was set
under the protection of the hash_lock and not block->lock. */
- buf_wait_for_read(fix_block);
+ buf_wait_for_read(block);
- if (fix_block->page.id() != page_id) {
- buf_block_buf_fix_dec(fix_block);
+ if (block->page.id() != page_id) {
+ buf_block_buf_fix_dec(block);
if (err) {
*err = DB_PAGE_CORRUPTED;
@@ -2979,27 +2948,27 @@ re_evict:
return NULL;
}
- if (fix_block->page.status != buf_page_t::FREED
+ if (block->page.status != buf_page_t::FREED
&& allow_ibuf_merge
- && fil_page_get_type(fix_block->frame) == FIL_PAGE_INDEX
- && page_is_leaf(fix_block->frame)) {
- fix_block->lock.x_lock();
+ && fil_page_get_type(block->frame) == FIL_PAGE_INDEX
+ && page_is_leaf(block->frame)) {
+ block->lock.x_lock();
- if (fix_block->page.ibuf_exist) {
- fix_block->page.ibuf_exist = false;
- ibuf_merge_or_delete_for_page(fix_block, page_id,
+ if (block->page.ibuf_exist) {
+ block->page.ibuf_exist = false;
+ ibuf_merge_or_delete_for_page(block, page_id,
zip_size);
}
if (rw_latch == RW_X_LATCH) {
- mtr->memo_push(fix_block, MTR_MEMO_PAGE_X_FIX);
+ mtr->memo_push(block, MTR_MEMO_PAGE_X_FIX);
} else {
- fix_block->lock.x_unlock();
+ block->lock.x_unlock();
goto get_latch;
}
} else {
get_latch:
- mtr->page_lock(fix_block, rw_latch);
+ mtr->page_lock(block, rw_latch);
}
if (!not_first_access && mode != BUF_PEEK_IF_IN_POOL) {
@@ -3009,7 +2978,7 @@ get_latch:
buf_read_ahead_linear(page_id, zip_size, ibuf_inside(mtr));
}
- return(fix_block);
+ return block;
}
/** Get access to a database page. Buffered redo log may be applied.
@@ -3070,6 +3039,7 @@ buf_page_get_gen(
This is the general function used to get optimistic access to a database
page.
@return TRUE if success */
+TRANSACTIONAL_TARGET
ibool
buf_page_optimistic_get(
/*====================*/
@@ -3085,26 +3055,26 @@ buf_page_optimistic_get(
ut_ad(mtr->is_active());
ut_ad(rw_latch == RW_S_LATCH || rw_latch == RW_X_LATCH);
- if (UNIV_UNLIKELY(block->page.state() != BUF_BLOCK_FILE_PAGE
- || block->page.io_fix() != BUF_IO_NONE)) {
+ if (have_transactional_memory) {
+ } else if (UNIV_UNLIKELY(block->page.state() != BUF_BLOCK_FILE_PAGE
+ || block->page.io_fix() != BUF_IO_NONE)) {
return FALSE;
}
const page_id_t id{block->page.id()};
buf_pool_t::hash_chain& chain = buf_pool.page_hash.cell_get(id.fold());
- page_hash_latch& hash_lock = buf_pool.page_hash.lock_get(chain);
- hash_lock.read_lock();
- if (UNIV_UNLIKELY(id != block->page.id()
- || block->page.state() != BUF_BLOCK_FILE_PAGE
- || block->page.io_fix() != BUF_IO_NONE)) {
- hash_lock.read_unlock();
- return(FALSE);
+ {
+ transactional_shared_lock_guard<page_hash_latch> g{
+ buf_pool.page_hash.lock_get(chain)};
+ if (UNIV_UNLIKELY(id != block->page.id()
+ || block->page.state() != BUF_BLOCK_FILE_PAGE
+ || block->page.io_fix() != BUF_IO_NONE)) {
+ return FALSE;
+ }
+ block->fix();
}
- buf_block_buf_fix_inc(block);
- hash_lock.read_unlock();
-
block->page.set_accessed();
buf_page_make_young_if_needed(&block->page);
@@ -3165,28 +3135,27 @@ Suitable for using when holding the lock_sys latches (as it avoids deadlock).
@param[in,out] mtr mini-transaction
@return the block
@retval nullptr if an S-latch cannot be granted immediately */
+TRANSACTIONAL_TARGET
buf_block_t *buf_page_try_get(const page_id_t page_id, mtr_t *mtr)
{
ut_ad(mtr);
ut_ad(mtr->is_active());
-
buf_pool_t::hash_chain &chain= buf_pool.page_hash.cell_get(page_id.fold());
- page_hash_latch &hash_lock= buf_pool.page_hash.lock_get(chain);
- hash_lock.read_lock();
- buf_page_t *bpage= buf_pool.page_hash.get(page_id, chain);
- if (!bpage || bpage->state() != BUF_BLOCK_FILE_PAGE)
+ buf_block_t *block;
+
{
- hash_lock.read_unlock();
- return nullptr;
+ transactional_shared_lock_guard<page_hash_latch> g
+ {buf_pool.page_hash.lock_get(chain)};
+ block= reinterpret_cast<buf_block_t*>
+ (buf_pool.page_hash.get(page_id, chain));
+ if (!block || block->page.state() != BUF_BLOCK_FILE_PAGE)
+ return nullptr;
+ block->fix();
}
- buf_block_t *block= reinterpret_cast<buf_block_t*>(bpage);
- buf_block_buf_fix_inc(block);
- hash_lock.read_unlock();
-
if (!block->lock.s_lock_try())
{
- buf_block_buf_fix_dec(block);
+ block->unfix();
return nullptr;
}
@@ -3195,9 +3164,9 @@ buf_block_t *buf_page_try_get(const page_id_t page_id, mtr_t *mtr)
#ifdef UNIV_DEBUG
if (!(++buf_dbg_counter % 5771)) buf_pool.validate();
#endif /* UNIV_DEBUG */
- ut_ad(bpage->buf_fix_count());
- ut_ad(bpage->state() == BUF_BLOCK_FILE_PAGE);
- ut_ad(bpage->id() == page_id);
+ ut_ad(block->page.buf_fix_count());
+ ut_ad(block->page.state() == BUF_BLOCK_FILE_PAGE);
+ ut_ad(block->page.id() == page_id);
++buf_pool.stat.n_page_gets;
return block;
@@ -3216,6 +3185,7 @@ void buf_block_t::initialise(const page_id_t page_id, ulint zip_size,
page_zip_set_size(&page.zip, zip_size);
}
+TRANSACTIONAL_TARGET
static buf_block_t* buf_page_create_low(page_id_t page_id, ulint zip_size,
mtr_t *mtr, buf_block_t *free_block)
{
@@ -3269,10 +3239,13 @@ loop:
break;
case BUF_BLOCK_ZIP_PAGE:
page_hash_latch &hash_lock= buf_pool.page_hash.lock_get(chain);
- hash_lock.write_lock();
+ /* It does not make sense to use transactional_lock_guard here,
+ because buf_relocate() would likely make the memory transaction
+ too large. */
+ hash_lock.lock();
if (block->page.io_fix() != BUF_IO_NONE)
{
- hash_lock.write_unlock();
+ hash_lock.unlock();
/* Wait for buf_page_write_complete() to release the I/O fix. */
timespec abstime;
set_timespec_nsec(abstime, 1000000);
@@ -3289,7 +3262,7 @@ loop:
free_block->page.set_state(BUF_BLOCK_FILE_PAGE);
buf_unzip_LRU_add_block(free_block, FALSE);
- hash_lock.write_unlock();
+ hash_lock.unlock();
buf_page_free_descriptor(&block->page);
block= free_block;
buf_block_buf_fix_inc(block);
@@ -3325,24 +3298,20 @@ loop:
/* The block must be put to the LRU list */
buf_LRU_add_block(&block->page, false);
- page_hash_latch &hash_lock= buf_pool.page_hash.lock_get(chain);
- hash_lock.write_lock();
- block->page.set_state(BUF_BLOCK_FILE_PAGE);
- buf_pool.page_hash.append(chain, &block->page);
+ {
+ transactional_lock_guard<page_hash_latch> g
+ {buf_pool.page_hash.lock_get(chain)};
+ block->page.set_state(BUF_BLOCK_FILE_PAGE);
+ buf_pool.page_hash.append(chain, &block->page);
+ block->lock.x_lock();
+ if (UNIV_UNLIKELY(zip_size))
+ /* Prevent race conditions during buf_buddy_alloc(), which may
+ release and reacquire buf_pool.mutex, by IO-fixing and X-latching. */
+ block->page.set_io_fix(BUF_IO_READ);
+ }
- block->lock.x_lock();
if (UNIV_UNLIKELY(zip_size))
{
- /* Prevent race conditions during buf_buddy_alloc(), which may
- release and reacquire buf_pool.mutex, by IO-fixing and X-latching
- the block. */
- block->page.set_io_fix(BUF_IO_READ);
- hash_lock.write_unlock();
-
- /* buf_pool.mutex may be released and reacquired by
- buf_buddy_alloc(). We must defer this operation until
- after the block descriptor has been added to
- buf_pool.LRU and buf_pool.page_hash. */
block->page.zip.data= buf_buddy_alloc(zip_size);
/* To maintain the invariant block->in_unzip_LRU_list ==
@@ -3353,8 +3322,6 @@ loop:
block->page.set_io_fix(BUF_IO_NONE);
}
- else
- hash_lock.write_unlock();
mysql_mutex_unlock(&buf_pool.mutex);
diff --git a/storage/innobase/buf/buf0lru.cc b/storage/innobase/buf/buf0lru.cc
index 2b02633cbb9..77d367074b0 100644
--- a/storage/innobase/buf/buf0lru.cc
+++ b/storage/innobase/buf/buf0lru.cc
@@ -810,7 +810,9 @@ bool buf_LRU_free_page(buf_page_t *bpage, bool zip)
execution of buf_page_get_low(). */
buf_pool_t::hash_chain& chain= buf_pool.page_hash.cell_get(id.fold());
page_hash_latch& hash_lock = buf_pool.page_hash.lock_get(chain);
- hash_lock.write_lock();
+ /* We cannot use transactional_lock_guard here,
+ because buf_buddy_relocate() in buf_buddy_free() could get stuck. */
+ hash_lock.lock();
lsn_t oldest_modification = bpage->oldest_modification_acquire();
if (UNIV_UNLIKELY(!bpage->can_relocate())) {
@@ -840,7 +842,7 @@ bool buf_LRU_free_page(buf_page_t *bpage, bool zip)
} else if (oldest_modification
&& bpage->state() != BUF_BLOCK_FILE_PAGE) {
func_exit:
- hash_lock.write_unlock();
+ hash_lock.unlock();
return(false);
} else if (bpage->state() == BUF_BLOCK_FILE_PAGE) {
@@ -954,9 +956,10 @@ func_exit:
decompressing the block while we release
hash_lock. */
b->set_io_fix(BUF_IO_PIN);
- hash_lock.write_unlock();
+ goto release;
} else if (!zip) {
- hash_lock.write_unlock();
+release:
+ hash_lock.unlock();
}
buf_block_t* block = reinterpret_cast<buf_block_t*>(bpage);
@@ -1170,7 +1173,7 @@ static bool buf_LRU_block_remove_hashed(buf_page_t *bpage, const page_id_t id,
ut_a(bpage->zip.ssize);
ut_ad(!bpage->oldest_modification());
- hash_lock.write_unlock();
+ hash_lock.unlock();
buf_pool_mutex_exit_forbid();
buf_buddy_free(bpage->zip.data, bpage->zip_size());
@@ -1214,7 +1217,7 @@ static bool buf_LRU_block_remove_hashed(buf_page_t *bpage, const page_id_t id,
and by the time we'll release it in the caller we'd
have inserted the compressed only descriptor in the
page_hash. */
- hash_lock.write_unlock();
+ hash_lock.unlock();
if (bpage->zip.data) {
/* Free the compressed page. */
@@ -1254,7 +1257,7 @@ ATTRIBUTE_COLD void buf_pool_t::corrupted_evict(buf_page_t *bpage)
page_hash_latch &hash_lock= buf_pool.page_hash.lock_get(chain);
mysql_mutex_lock(&mutex);
- hash_lock.write_lock();
+ hash_lock.lock();
ut_ad(bpage->io_fix() == BUF_IO_READ);
ut_ad(!bpage->oldest_modification());
diff --git a/storage/innobase/buf/buf0rea.cc b/storage/innobase/buf/buf0rea.cc
index 55e5e4afba1..080f87adb0f 100644
--- a/storage/innobase/buf/buf0rea.cc
+++ b/storage/innobase/buf/buf0rea.cc
@@ -83,6 +83,7 @@ and the lock released later.
requested (for ROW_FORMAT=COMPRESSED)
@return pointer to the block
@retval NULL in case of an error */
+TRANSACTIONAL_TARGET
static buf_page_t* buf_page_init_for_read(ulint mode, const page_id_t page_id,
ulint zip_size, bool unzip)
{
@@ -115,7 +116,6 @@ static buf_page_t* buf_page_init_for_read(ulint mode, const page_id_t page_id,
}
buf_pool_t::hash_chain &chain= buf_pool.page_hash.cell_get(page_id.fold());
- page_hash_latch &hash_lock= buf_pool.page_hash.lock_get(chain);
mysql_mutex_lock(&buf_pool.mutex);
@@ -136,21 +136,23 @@ static buf_page_t* buf_page_init_for_read(ulint mode, const page_id_t page_id,
bpage= &block->page;
/* Insert into the hash table of file pages */
- hash_lock.write_lock();
-
- if (hash_page)
{
- /* Preserve the reference count. */
- auto buf_fix_count= hash_page->buf_fix_count();
- ut_a(buf_fix_count > 0);
- block->page.add_buf_fix_count(buf_fix_count);
- buf_pool.watch_remove(hash_page, chain);
- }
+ transactional_lock_guard<page_hash_latch> g
+ {buf_pool.page_hash.lock_get(chain)};
- block->page.set_io_fix(BUF_IO_READ);
- block->page.set_state(BUF_BLOCK_FILE_PAGE);
- buf_pool.page_hash.append(chain, &block->page);
- hash_lock.write_unlock();
+ if (hash_page)
+ {
+ /* Preserve the reference count. */
+ auto buf_fix_count= hash_page->buf_fix_count();
+ ut_a(buf_fix_count > 0);
+ block->page.add_buf_fix_count(buf_fix_count);
+ buf_pool.watch_remove(hash_page, chain);
+ }
+
+ block->page.set_io_fix(BUF_IO_READ);
+ block->page.set_state(BUF_BLOCK_FILE_PAGE);
+ buf_pool.page_hash.append(chain, &block->page);
+ }
/* The block must be put to the LRU list, to the old blocks */
buf_LRU_add_block(&block->page, true/* to old blocks */);
@@ -204,20 +206,22 @@ static buf_page_t* buf_page_init_for_read(ulint mode, const page_id_t page_id,
bpage->init(BUF_BLOCK_ZIP_PAGE, page_id);
- hash_lock.write_lock();
-
- if (hash_page)
{
- /* Preserve the reference count. It can be 0 if
- buf_pool_t::watch_unset() is executing concurrently,
- waiting for buf_pool.mutex, which we are holding. */
- bpage->add_buf_fix_count(hash_page->buf_fix_count());
- buf_pool.watch_remove(hash_page, chain);
- }
+ transactional_lock_guard<page_hash_latch> g
+ {buf_pool.page_hash.lock_get(chain)};
+
+ if (hash_page)
+ {
+ /* Preserve the reference count. It can be 0 if
+ buf_pool_t::watch_unset() is executing concurrently,
+ waiting for buf_pool.mutex, which we are holding. */
+ bpage->add_buf_fix_count(hash_page->buf_fix_count());
+ buf_pool.watch_remove(hash_page, chain);
+ }
- buf_pool.page_hash.append(chain, bpage);
- bpage->set_io_fix(BUF_IO_READ);
- hash_lock.write_unlock();
+ buf_pool.page_hash.append(chain, bpage);
+ bpage->set_io_fix(BUF_IO_READ);
+ }
/* The block must be put to the LRU list, to the old blocks.
The zip size is already set into the page zip */
@@ -370,6 +374,7 @@ wants to access
@return number of page read requests issued; NOTE that if we read ibuf
pages, it may happen that the page at the given page number does not
get read even if we return a positive value! */
+TRANSACTIONAL_TARGET
ulint
buf_read_ahead_random(const page_id_t page_id, ulint zip_size, bool ibuf)
{
@@ -404,13 +409,11 @@ buf_read_ahead_random(const page_id_t page_id, ulint zip_size, bool ibuf)
for (page_id_t i= low; i < high; ++i)
{
buf_pool_t::hash_chain &chain= buf_pool.page_hash.cell_get(i.fold());
- page_hash_latch &latch= buf_pool.page_hash.lock_get(chain);
- latch.read_lock();
- const buf_page_t *bpage= buf_pool.page_hash.get(i, chain);
- bool found= bpage && bpage->is_accessed() && buf_page_peek_if_young(bpage);
- latch.read_unlock();
- if (found && !--count)
- goto read_ahead;
+ transactional_shared_lock_guard<page_hash_latch> g
+ {buf_pool.page_hash.lock_get(chain)};
+ if (const buf_page_t *bpage= buf_pool.page_hash.get(i, chain))
+ if (bpage->is_accessed() && buf_page_peek_if_young(bpage) && !--count)
+ goto read_ahead;
}
no_read_ahead:
@@ -552,6 +555,7 @@ which could result in a deadlock if the OS does not support asynchronous io.
@param[in] zip_size ROW_FORMAT=COMPRESSED page size, or 0
@param[in] ibuf whether if we are inside ibuf routine
@return number of page read requests issued */
+TRANSACTIONAL_TARGET
ulint
buf_read_ahead_linear(const page_id_t page_id, ulint zip_size, bool ibuf)
{
@@ -605,9 +609,18 @@ fail:
for (page_id_t i= low; i != high_1; ++i)
{
buf_pool_t::hash_chain &chain= buf_pool.page_hash.cell_get(i.fold());
- page_hash_latch &hash_lock= buf_pool.page_hash.lock_get(chain);
- hash_lock.read_lock();
+ transactional_shared_lock_guard<page_hash_latch> g
+ {buf_pool.page_hash.lock_get(chain)};
const buf_page_t* bpage= buf_pool.page_hash.get(i, chain);
+ if (!bpage)
+ {
+ if (i == page_id)
+ goto fail;
+failed:
+ if (--count)
+ continue;
+ goto fail;
+ }
if (i == page_id)
{
/* Read the natural predecessor and successor page addresses from
@@ -615,12 +628,6 @@ fail:
on the page, we do not acquire an s-latch on the page, this is to
prevent deadlocks. The hash_lock is only protecting the
buf_pool.page_hash for page i, not the bpage contents itself. */
- if (!bpage)
- {
-hard_fail:
- hash_lock.read_unlock();
- goto fail;
- }
const byte *f;
switch (UNIV_EXPECT(bpage->state(), BUF_BLOCK_FILE_PAGE)) {
case BUF_BLOCK_FILE_PAGE:
@@ -630,38 +637,31 @@ hard_fail:
f= bpage->zip.data;
break;
default:
- goto hard_fail;
+ ut_ad("invalid state" == 0);
+ goto fail;
}
uint32_t prev= mach_read_from_4(my_assume_aligned<4>(f + FIL_PAGE_PREV));
uint32_t next= mach_read_from_4(my_assume_aligned<4>(f + FIL_PAGE_NEXT));
if (prev == FIL_NULL || next == FIL_NULL)
- goto hard_fail;
+ goto fail;
page_id_t id= page_id;
if (descending && next - 1 == page_id.page_no())
id.set_page_no(prev);
else if (!descending && prev + 1 == page_id.page_no())
id.set_page_no(next);
else
- goto hard_fail; /* Successor or predecessor not in the right order */
+ goto fail; /* Successor or predecessor not in the right order */
new_low= id - (id.page_no() % buf_read_ahead_area);
new_high_1= new_low + (buf_read_ahead_area - 1);
if (id != new_low && id != new_high_1)
/* This is not a border page of the area: return */
- goto hard_fail;
+ goto fail;
if (new_high_1.page_no() > space->last_page_number())
/* The area is not whole */
- goto hard_fail;
- }
- else if (!bpage)
- {
-failed:
- hash_lock.read_unlock();
- if (--count)
- continue;
- goto fail;
+ goto fail;
}
const unsigned accessed= bpage->is_accessed();
@@ -678,7 +678,6 @@ failed:
prev_accessed= accessed;
if (fail)
goto failed;
- hash_lock.read_unlock();
}
/* If we got this far, read-ahead can be sensible: do it */
diff --git a/storage/innobase/dict/dict0crea.cc b/storage/innobase/dict/dict0crea.cc
index 667a64f907a..c88227dbade 100644
--- a/storage/innobase/dict/dict0crea.cc
+++ b/storage/innobase/dict/dict0crea.cc
@@ -1381,6 +1381,8 @@ dberr_t dict_sys_t::create_or_check_sys_tables()
trx_start_for_ddl(trx);
{
+ /* Do not bother with transactional memory; this is only
+ executed at startup, with no conflicts present. */
LockMutexGuard g{SRW_LOCK_CALL};
trx->mutex_lock();
lock_table_create(dict_sys.sys_tables, LOCK_X, trx);
diff --git a/storage/innobase/dict/dict0dict.cc b/storage/innobase/dict/dict0dict.cc
index 6074398afd3..34080b22095 100644
--- a/storage/innobase/dict/dict0dict.cc
+++ b/storage/innobase/dict/dict0dict.cc
@@ -1181,6 +1181,7 @@ inline void dict_sys_t::add(dict_table_t* table)
/** Test whether a table can be evicted from dict_sys.table_LRU.
@param table table to be considered for eviction
@return whether the table can be evicted */
+TRANSACTIONAL_TARGET
static bool dict_table_can_be_evicted(dict_table_t *table)
{
ut_ad(dict_sys.locked());
@@ -2064,6 +2065,7 @@ dict_index_add_to_cache(
/**********************************************************************//**
Removes an index from the dictionary cache. */
+TRANSACTIONAL_TARGET
static
void
dict_index_remove_from_cache_low(
diff --git a/storage/innobase/gis/gis0sea.cc b/storage/innobase/gis/gis0sea.cc
index f067d43d6a3..41f32cf7240 100644
--- a/storage/innobase/gis/gis0sea.cc
+++ b/storage/innobase/gis/gis0sea.cc
@@ -78,6 +78,7 @@ rtr_adjust_parent_path(
Find the next matching record. This function is used by search
or record locating during index delete/update.
@return true if there is suitable record found, otherwise false */
+TRANSACTIONAL_TARGET
static
bool
rtr_pcur_getnext_from_path(
@@ -387,7 +388,7 @@ rtr_pcur_getnext_from_path(
trx_t* trx = thr_get_trx(
btr_cur->rtr_info->thr);
{
- LockMutexGuard g{SRW_LOCK_CALL};
+ TMLockTrxGuard g{TMLockTrxArgs(*trx)};
lock_init_prdt_from_mbr(
&prdt, &btr_cur->rtr_info->mbr,
mode, trx->lock.lock_heap);
diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc
index 0b0c3b045ff..6729e7e1747 100644
--- a/storage/innobase/handler/ha_innodb.cc
+++ b/storage/innobase/handler/ha_innodb.cc
@@ -3185,6 +3185,7 @@ the query cache.
@param[in] table table object
@param[in] trx transaction object
@return whether the storing or retrieving from the query cache is permitted */
+TRANSACTIONAL_TARGET
static bool innobase_query_caching_table_check_low(
dict_table_t* table, trx_t* trx)
{
@@ -3211,6 +3212,16 @@ static bool innobase_query_caching_table_check_low(
return false;
}
+#if !defined NO_ELISION && !defined SUX_LOCK_GENERIC
+ if (xbegin()) {
+ if (table->lock_mutex_is_locked())
+ xabort();
+ auto len = UT_LIST_GET_LEN(table->locks);
+ xend();
+ return len == 0;
+ }
+#endif
+
table->lock_mutex_lock();
auto len= UT_LIST_GET_LEN(table->locks);
table->lock_mutex_unlock();
@@ -18562,7 +18573,9 @@ void lock_wait_wsrep_kill(trx_t *bf_trx, ulong thd_id, trx_id_t trx_id)
trx_t *vtrx= thd_to_trx(vthd);
if (vtrx)
{
- lock_sys.wr_lock(SRW_LOCK_CALL);
+ /* Do not bother with lock elision using transactional memory here;
+ this is rather complex code */
+ LockMutexGuard g{SRW_LOCK_CALL};
mysql_mutex_lock(&lock_sys.wait_mutex);
vtrx->mutex_lock();
/* victim transaction is either active or prepared, if it has already
@@ -18607,7 +18620,6 @@ void lock_wait_wsrep_kill(trx_t *bf_trx, ulong thd_id, trx_id_t trx_id)
WSREP_DEBUG("kill transaction skipped due to wsrep_aborter set");
}
}
- lock_sys.wr_unlock();
mysql_mutex_unlock(&lock_sys.wait_mutex);
vtrx->mutex_unlock();
}
diff --git a/storage/innobase/ibuf/ibuf0ibuf.cc b/storage/innobase/ibuf/ibuf0ibuf.cc
index 4177215cca7..fbec3bee38c 100644
--- a/storage/innobase/ibuf/ibuf0ibuf.cc
+++ b/storage/innobase/ibuf/ibuf0ibuf.cc
@@ -3145,7 +3145,7 @@ or clustered
@param[in] zip_size ROW_FORMAT=COMPRESSED page size, or 0
@param[in,out] thr query thread
@return DB_SUCCESS, DB_STRONG_FAIL or other error */
-static MY_ATTRIBUTE((warn_unused_result))
+static TRANSACTIONAL_TARGET MY_ATTRIBUTE((warn_unused_result))
dberr_t
ibuf_insert_low(
ulint mode,
@@ -3470,6 +3470,7 @@ is clustered or unique.
@param[in] zip_size ROW_FORMAT=COMPRESSED page size, or 0
@param[in,out] thr query thread
@return true if success */
+TRANSACTIONAL_TARGET
bool
ibuf_insert(
ibuf_op_t op,
diff --git a/storage/innobase/include/btr0sea.h b/storage/innobase/include/btr0sea.h
index 4339c895400..b45183a6428 100644
--- a/storage/innobase/include/btr0sea.h
+++ b/storage/innobase/include/btr0sea.h
@@ -347,11 +347,21 @@ struct btr_search_sys_t
extern btr_search_sys_t btr_search_sys;
/** @return number of leaf pages pointed to by the adaptive hash index */
-inline ulint dict_index_t::n_ahi_pages() const
+TRANSACTIONAL_INLINE inline ulint dict_index_t::n_ahi_pages() const
{
if (!btr_search_enabled)
return 0;
srw_spin_lock *latch= &btr_search_sys.get_part(*this)->latch;
+#if !defined NO_ELISION && !defined SUX_LOCK_GENERIC
+ if (xbegin())
+ {
+ if (latch->is_locked())
+ xabort();
+ ulint ref_count= search_info->ref_count;
+ xend();
+ return ref_count;
+ }
+#endif
latch->rd_lock(SRW_LOCK_CALL);
ulint ref_count= search_info->ref_count;
latch->rd_unlock();
diff --git a/storage/innobase/include/buf0buf.h b/storage/innobase/include/buf0buf.h
index 30729063069..ad06b17466d 100644
--- a/storage/innobase/include/buf0buf.h
+++ b/storage/innobase/include/buf0buf.h
@@ -40,6 +40,7 @@ Created 11/5/1995 Heikki Tuuri
#include "page0types.h"
#include "log0log.h"
#include "srv0srv.h"
+#include "transactional_lock_guard.h"
#include <ostream>
// Forward declaration
@@ -1478,25 +1479,29 @@ public:
public:
/** @return whether the buffer pool contains a page
- @tparam watch whether to allow watch_is_sentinel()
- @param page_id page identifier
- @param chain hash table chain for page_id.fold() */
- template<bool watch= false>
+ @tparam allow_watch whether to allow watch_is_sentinel()
+ @param page_id page identifier
+ @param chain hash table chain for page_id.fold() */
+ template<bool allow_watch= false>
+ TRANSACTIONAL_INLINE
bool page_hash_contains(const page_id_t page_id, hash_chain &chain)
{
- page_hash_latch &latch= page_hash.lock_get(chain);
- latch.read_lock();
+ transactional_shared_lock_guard<page_hash_latch> g
+ {page_hash.lock_get(chain)};
buf_page_t *bpage= page_hash.get(page_id, chain);
- if (!bpage || watch_is_sentinel(*bpage))
+ if (bpage >= &watch[0] && bpage < &watch[UT_ARR_SIZE(watch)])
{
- latch.read_unlock();
- return watch ? bpage : nullptr;
+ ut_ad(bpage->state() == BUF_BLOCK_ZIP_PAGE);
+ ut_ad(!bpage->in_zip_hash);
+ ut_ad(!bpage->zip.data);
+ if (!allow_watch)
+ bpage= nullptr;
+ }
+ else if (bpage)
+ {
+ ut_ad(page_id == bpage->id());
+ ut_ad(bpage->in_file());
}
-
- ut_ad(bpage->in_file());
- ut_ad(page_id == bpage->id());
-
- latch.read_unlock();
return bpage;
}
@@ -1510,11 +1515,11 @@ public:
page_hash.lock_get(page_hash.cell_get(bpage.id().fold())).
is_locked());
#endif /* SAFE_MUTEX */
- ut_ad(bpage.in_file());
-
if (&bpage < &watch[0] || &bpage >= &watch[UT_ARR_SIZE(watch)])
{
- ut_ad(bpage.state() != BUF_BLOCK_ZIP_PAGE || bpage.zip.data);
+ ut_ad(bpage.state() == BUF_BLOCK_ZIP_PAGE
+ ? !!bpage.zip.data
+ : bpage.state() == BUF_BLOCK_FILE_PAGE);
return false;
}
@@ -1528,16 +1533,14 @@ public:
This may only be called after !watch_set() and before invoking watch_unset().
@param id page identifier
@return whether the page was read to the buffer pool */
+ TRANSACTIONAL_INLINE
bool watch_occurred(const page_id_t id)
{
hash_chain &chain= page_hash.cell_get(id.fold());
- page_hash_latch &latch= page_hash.lock_get(chain);
- latch.read_lock();
+ transactional_shared_lock_guard<page_hash_latch> g
+ {page_hash.lock_get(chain)};
/* The page must exist because watch_set() increments buf_fix_count. */
- buf_page_t *bpage= page_hash.get(id, chain);
- const bool is_sentinel= watch_is_sentinel(*bpage);
- latch.read_unlock();
- return !is_sentinel;
+ return !watch_is_sentinel(*page_hash.get(id, chain));
}
/** Register a watch for a page identifier. The caller must hold an
@@ -2000,14 +2003,14 @@ inline buf_page_t *buf_pool_t::page_hash_table::get(const page_id_t id,
}
#ifdef SUX_LOCK_GENERIC
-inline void page_hash_latch::read_lock()
+inline void page_hash_latch::lock_shared()
{
mysql_mutex_assert_not_owner(&buf_pool.mutex);
if (!read_trylock())
read_lock_wait();
}
-inline void page_hash_latch::write_lock()
+inline void page_hash_latch::lock()
{
if (!write_trylock())
write_lock_wait();
diff --git a/storage/innobase/include/buf0types.h b/storage/innobase/include/buf0types.h
index 2f08220fe98..2cb92a5f1df 100644
--- a/storage/innobase/include/buf0types.h
+++ b/storage/innobase/include/buf0types.h
@@ -179,7 +179,7 @@ enum rw_lock_type_t
#include "sux_lock.h"
#ifdef SUX_LOCK_GENERIC
-class page_hash_latch : public rw_lock
+class page_hash_latch : private rw_lock
{
/** Wait for a shared lock */
void read_lock_wait();
@@ -187,33 +187,50 @@ class page_hash_latch : public rw_lock
void write_lock_wait();
public:
/** Acquire a shared lock */
- inline void read_lock();
+ inline void lock_shared();
/** Acquire an exclusive lock */
- inline void write_lock();
+ inline void lock();
+
+#ifdef UNIV_DEBUG
+ /** @return whether an exclusive lock is being held by any thread */
+ bool is_write_locked() const { return rw_lock::is_write_locked(); }
+#endif
+
+ /** @return whether any lock is being held by any thread */
+ bool is_locked() const { return rw_lock::is_locked(); }
+ /** @return whether any lock is being held or waited for by any thread */
+ bool is_locked_or_waiting() const { return rw_lock::is_locked_or_waiting(); }
+
+ /** Release a shared lock */
+ void unlock_shared() { read_unlock(); }
+ /** Release an exclusive lock */
+ void unlock() { write_unlock(); }
};
#elif defined _WIN32 || SIZEOF_SIZE_T >= 8
class page_hash_latch
{
- srw_spin_lock_low lock;
+ srw_spin_lock_low lk;
public:
- void read_lock() { lock.rd_lock(); }
- void read_unlock() { lock.rd_unlock(); }
- void write_lock() { lock.wr_lock(); }
- void write_unlock() { lock.wr_unlock(); }
- bool is_locked() const { return lock.is_locked(); }
- bool is_write_locked() const { return lock.is_write_locked(); }
+ void lock_shared() { lk.rd_lock(); }
+ void unlock_shared() { lk.rd_unlock(); }
+ void lock() { lk.wr_lock(); }
+ void unlock() { lk.wr_unlock(); }
+ bool is_write_locked() const { return lk.is_write_locked(); }
+ bool is_locked() const { return lk.is_locked(); }
+ bool is_locked_or_waiting() const { return lk.is_locked_or_waiting(); }
};
#else
class page_hash_latch
{
- srw_spin_mutex lock;
+ srw_spin_mutex lk;
public:
- void read_lock() { write_lock(); }
- void read_unlock() { write_unlock(); }
- void write_lock() { lock.wr_lock(); }
- void write_unlock() { lock.wr_unlock(); }
- bool is_locked() const { return lock.is_locked(); }
+ void lock_shared() { lock(); }
+ void unlock_shared() { unlock(); }
+ void lock() { lk.wr_lock(); }
+ void unlock() { lk.wr_unlock(); }
+ bool is_locked() const { return lk.is_locked(); }
bool is_write_locked() const { return is_locked(); }
+ bool is_locked_or_waiting() const { return is_locked(); }
};
#endif
diff --git a/storage/innobase/include/dict0mem.h b/storage/innobase/include/dict0mem.h
index 2a7b38f345c..986c767ff49 100644
--- a/storage/innobase/include/dict0mem.h
+++ b/storage/innobase/include/dict0mem.h
@@ -1979,11 +1979,15 @@ struct dict_table_t {
ut_ad(lock_mutex_owner.exchange(0) == os_thread_get_curr_id());
lock_mutex.wr_unlock();
}
+#ifndef SUX_LOCK_GENERIC
+ /** @return whether the lock mutex is held by some thread */
+ bool lock_mutex_is_locked() const noexcept { return lock_mutex.is_locked(); }
+#endif
/* stats mutex lock currently defaults to lock_mutex but in the future,
there could be a use-case to have separate mutex for stats.
-  extra indirection (through inline so no performance hit) should
-  help simplify code and increase long-term maintainability */
+ extra indirection (through inline so no performance hit) should
+ help simplify code and increase long-term maintainability */
void stats_mutex_init() { lock_mutex_init(); }
void stats_mutex_destroy() { lock_mutex_destroy(); }
void stats_mutex_lock() { lock_mutex_lock(); }
diff --git a/storage/innobase/include/lock0lock.h b/storage/innobase/include/lock0lock.h
index 5f051b8ffbe..50b9792cf2b 100644
--- a/storage/innobase/include/lock0lock.h
+++ b/storage/innobase/include/lock0lock.h
@@ -28,16 +28,15 @@ Created 5/7/1996 Heikki Tuuri
#define lock0lock_h
#include "buf0types.h"
-#include "trx0types.h"
+#include "trx0trx.h"
#include "mtr0types.h"
#include "rem0types.h"
-#include "que0types.h"
-#include "lock0types.h"
#include "hash0hash.h"
#include "srv0srv.h"
#include "ut0vec.h"
#include "gis0rtree.h"
#include "lock0prdt.h"
+#include "transactional_lock_guard.h"
// Forward declaration
class ReadView;
@@ -62,8 +61,10 @@ lock_get_min_heap_no(
/*=================*/
const buf_block_t* block); /*!< in: buffer block */
-/** Discard locks for an index */
-void lock_discard_for_index(const dict_index_t &index);
+/** Discard locks for an index when purging DELETE FROM SYS_INDEXES
+after an aborted CREATE INDEX operation.
+@param index a stale index on which ADD INDEX operation was aborted */
+ATTRIBUTE_COLD void lock_discard_for_index(const dict_index_t &index);
/*************************************************************//**
Updates the lock table when we have reorganized a page. NOTE: we copy
@@ -571,6 +572,9 @@ class lock_sys_t
{
friend struct LockGuard;
friend struct LockMultiGuard;
+ friend struct TMLockGuard;
+ friend struct TMLockMutexGuard;
+ friend struct TMLockTrxGuard;
/** Hash table latch */
struct hash_latch
@@ -585,6 +589,11 @@ class lock_sys_t
void acquire() { if (!try_acquire()) wait(); }
/** Release a lock */
void release();
+ /** @return whether any lock is being held or waited for by any thread */
+ bool is_locked_or_waiting() const
+ { return rw_lock::is_locked_or_waiting(); }
+ /** @return whether this latch is possibly held by any thread */
+ bool is_locked() const { return rw_lock::is_locked(); }
#else
{
private:
@@ -596,11 +605,11 @@ class lock_sys_t
void acquire() { lock.wr_lock(); }
/** Release a lock */
void release() { lock.wr_unlock(); }
-#endif
-#ifdef UNIV_DEBUG
+ /** @return whether any lock may be held by any thread */
+ bool is_locked_or_waiting() const noexcept
+ { return lock.is_locked_or_waiting(); }
/** @return whether this latch is possibly held by any thread */
- bool is_locked() const
- { return memcmp(this, field_ref_zero, sizeof *this); }
+ bool is_locked() const noexcept { return lock.is_locked(); }
#endif
};
@@ -799,7 +808,14 @@ public:
#ifdef UNIV_DEBUG
/** @return whether the current thread is the lock_sys.latch writer */
bool is_writer() const
- { return writer.load(std::memory_order_relaxed) == os_thread_get_curr_id(); }
+ {
+# ifdef SUX_LOCK_GENERIC
+ return writer.load(std::memory_order_relaxed) == os_thread_get_curr_id();
+# else
+ return writer.load(std::memory_order_relaxed) == os_thread_get_curr_id() ||
+ (xtest() && !latch.is_locked_or_waiting());
+# endif
+ }
/** Assert that a lock shard is exclusively latched (by some thread) */
void assert_locked(const lock_t &lock) const;
/** Assert that a table lock shard is exclusively latched by this thread */
@@ -836,13 +852,14 @@ public:
void deadlock_check();
/** Cancel a waiting lock request.
- @param lock waiting lock request
- @param trx active transaction
- @param check_victim whether to check trx->lock.was_chosen_as_deadlock_victim
+ @tparam check_victim whether to check for DB_DEADLOCK
+ @param lock waiting lock request
+ @param trx active transaction
@retval DB_SUCCESS if no lock existed
@retval DB_DEADLOCK if trx->lock.was_chosen_as_deadlock_victim was set
@retval DB_LOCK_WAIT if the lock was canceled */
- static dberr_t cancel(trx_t *trx, lock_t *lock, bool check_victim);
+ template<bool check_victim>
+ static dberr_t cancel(trx_t *trx, lock_t *lock);
/** Cancel a waiting lock request (if any) when killing a transaction */
static void cancel(trx_t *trx);
@@ -981,6 +998,149 @@ private:
hash_cell_t *cell2_;
};
+/** lock_sys.latch exclusive guard using transactional memory */
+struct TMLockMutexGuard
+{
+ TRANSACTIONAL_INLINE
+ TMLockMutexGuard(SRW_LOCK_ARGS(const char *file, unsigned line))
+ {
+#if !defined NO_ELISION && !defined SUX_LOCK_GENERIC
+ if (xbegin())
+ {
+ if (was_elided())
+ return;
+ xabort();
+ }
+#endif
+ lock_sys.wr_lock(SRW_LOCK_ARGS(file, line));
+ }
+ TRANSACTIONAL_INLINE
+ ~TMLockMutexGuard()
+ {
+#if !defined NO_ELISION && !defined SUX_LOCK_GENERIC
+ if (was_elided()) xend(); else
+#endif
+ lock_sys.wr_unlock();
+ }
+
+#if !defined NO_ELISION && !defined SUX_LOCK_GENERIC
+ bool was_elided() const noexcept
+ { return !lock_sys.latch.is_locked_or_waiting(); }
+#else
+ bool was_elided() const noexcept { return false; }
+#endif
+};
+
+/** lock_sys latch guard for 1 page_id_t, using transactional memory */
+struct TMLockGuard
+{
+ TRANSACTIONAL_TARGET
+ TMLockGuard(lock_sys_t::hash_table &hash, const page_id_t id);
+ TRANSACTIONAL_INLINE ~TMLockGuard()
+ {
+#if !defined NO_ELISION && !defined SUX_LOCK_GENERIC
+ if (elided)
+ {
+ xend();
+ return;
+ }
+#endif
+ lock_sys_t::hash_table::latch(cell_)->release();
+ /* Must be last, to avoid a race with lock_sys_t::hash_table::resize() */
+ lock_sys.rd_unlock();
+ }
+ /** @return the hash array cell */
+ hash_cell_t &cell() const { return *cell_; }
+private:
+ /** The hash array cell */
+ hash_cell_t *cell_;
+#if !defined NO_ELISION && !defined SUX_LOCK_GENERIC
+ /** whether the latches were elided */
+ bool elided;
+#endif
+};
+
+/** guard for shared lock_sys.latch and trx_t::mutex using
+transactional memory */
+struct TMLockTrxGuard
+{
+ trx_t &trx;
+
+ TRANSACTIONAL_INLINE
+#ifndef UNIV_PFS_RWLOCK
+ TMLockTrxGuard(trx_t &trx) : trx(trx)
+# define TMLockTrxArgs(trx) trx
+#else
+ TMLockTrxGuard(const char *file, unsigned line, trx_t &trx) : trx(trx)
+# define TMLockTrxArgs(trx) SRW_LOCK_CALL, trx
+#endif
+ {
+#if !defined NO_ELISION && !defined SUX_LOCK_GENERIC
+ if (xbegin())
+ {
+ if (!lock_sys.latch.is_write_locked() && was_elided())
+ return;
+ xabort();
+ }
+#endif
+ lock_sys.rd_lock(SRW_LOCK_ARGS(file, line));
+ trx.mutex_lock();
+ }
+ TRANSACTIONAL_INLINE
+ ~TMLockTrxGuard()
+ {
+#if !defined NO_ELISION && !defined SUX_LOCK_GENERIC
+ if (was_elided())
+ {
+ xend();
+ return;
+ }
+#endif
+ lock_sys.rd_unlock();
+ trx.mutex_unlock();
+ }
+#if !defined NO_ELISION && !defined SUX_LOCK_GENERIC
+ bool was_elided() const noexcept { return !trx.mutex_is_locked(); }
+#else
+ bool was_elided() const noexcept { return false; }
+#endif
+};
+
+/** guard for trx_t::mutex using transactional memory */
+struct TMTrxGuard
+{
+ trx_t &trx;
+
+ TRANSACTIONAL_INLINE TMTrxGuard(trx_t &trx) : trx(trx)
+ {
+#if !defined NO_ELISION && !defined SUX_LOCK_GENERIC
+ if (xbegin())
+ {
+ if (was_elided())
+ return;
+ xabort();
+ }
+#endif
+ trx.mutex_lock();
+ }
+ TRANSACTIONAL_INLINE ~TMTrxGuard()
+ {
+#if !defined NO_ELISION && !defined SUX_LOCK_GENERIC
+ if (was_elided())
+ {
+ xend();
+ return;
+ }
+#endif
+ trx.mutex_unlock();
+ }
+#if !defined NO_ELISION && !defined SUX_LOCK_GENERIC
+ bool was_elided() const noexcept { return !trx.mutex_is_locked(); }
+#else
+ bool was_elided() const noexcept { return false; }
+#endif
+};
+
/*********************************************************************//**
Creates a new record lock and inserts it to the lock queue. Does NOT check
for deadlocks or lock compatibility!
diff --git a/storage/innobase/include/lock0priv.h b/storage/innobase/include/lock0priv.h
index f0595598838..b5ec7a0d29e 100644
--- a/storage/innobase/include/lock0priv.h
+++ b/storage/innobase/include/lock0priv.h
@@ -459,7 +459,7 @@ lock_rec_get_n_bits(
/**********************************************************************//**
Sets the nth bit of a record lock to TRUE. */
-UNIV_INLINE
+inline
void
lock_rec_set_nth_bit(
/*=================*/
@@ -473,7 +473,12 @@ lock_rec_set_nth_bit(
inline byte lock_rec_reset_nth_bit(lock_t* lock, ulint i)
{
ut_ad(!lock->is_table());
+#ifdef SUX_LOCK_GENERIC
ut_ad(lock_sys.is_writer() || lock->trx->mutex_is_owner());
+#else
+ ut_ad(lock_sys.is_writer() || lock->trx->mutex_is_owner()
+ || (xtest() && !lock->trx->mutex_is_locked()));
+#endif
ut_ad(i < lock->un_member.rec_lock.n_bits);
byte* b = reinterpret_cast<byte*>(&lock[1]) + (i >> 3);
diff --git a/storage/innobase/include/lock0priv.ic b/storage/innobase/include/lock0priv.ic
index c51304cd7ed..21e7c7c95dc 100644
--- a/storage/innobase/include/lock0priv.ic
+++ b/storage/innobase/include/lock0priv.ic
@@ -67,7 +67,7 @@ lock_rec_get_n_bits(
/**********************************************************************//**
Sets the nth bit of a record lock to TRUE. */
-UNIV_INLINE
+inline
void
lock_rec_set_nth_bit(
/*=================*/
@@ -91,7 +91,12 @@ lock_rec_set_nth_bit(
#if defined __GNUC__ && !defined __clang__ && __GNUC__ < 6
# pragma GCC diagnostic pop
#endif
+#ifdef SUX_LOCK_GENERIC
ut_ad(lock_sys.is_writer() || lock->trx->mutex_is_owner());
+#else
+ ut_ad(lock_sys.is_writer() || lock->trx->mutex_is_owner()
+ || (xtest() && !lock->trx->mutex_is_locked()));
+#endif
lock->trx->lock.n_rec_locks++;
}
diff --git a/storage/innobase/include/rw_lock.h b/storage/innobase/include/rw_lock.h
index 6447eea18ae..0ae052fabe2 100644
--- a/storage/innobase/include/rw_lock.h
+++ b/storage/innobase/include/rw_lock.h
@@ -222,17 +222,13 @@ public:
}
/** @return whether an exclusive lock is being held by any thread */
- bool is_write_locked() const
- { return !!(lock.load(std::memory_order_relaxed) & WRITER); }
+ bool is_write_locked() const { return !!(value() & WRITER); }
#ifdef SUX_LOCK_GENERIC
/** @return whether an update lock is being held by any thread */
- bool is_update_locked() const
- { return !!(lock.load(std::memory_order_relaxed) & UPDATER); }
+ bool is_update_locked() const { return !!(value() & UPDATER); }
#endif /* SUX_LOCK_GENERIC */
/** @return whether any lock is being held or waited for by any thread */
- bool is_locked_or_waiting() const
- { return lock.load(std::memory_order_relaxed) != 0; }
+ bool is_locked_or_waiting() const { return value() != 0; }
/** @return whether any lock is being held by any thread */
- bool is_locked() const
- { return (lock.load(std::memory_order_relaxed) & ~WRITER_WAITING) != 0; }
+ bool is_locked() const { return (value() & ~WRITER_WAITING) != 0; }
};
diff --git a/storage/innobase/include/srw_lock.h b/storage/innobase/include/srw_lock.h
index d69c9de268f..54d042419ca 100644
--- a/storage/innobase/include/srw_lock.h
+++ b/storage/innobase/include/srw_lock.h
@@ -60,10 +60,10 @@ class srw_mutex_impl final
public:
/** @return whether the mutex is being held or waited for */
bool is_locked_or_waiting() const
- { return lock.load(std::memory_order_relaxed) != 0; }
+ { return lock.load(std::memory_order_acquire) != 0; }
/** @return whether the mutex is being held by any thread */
bool is_locked() const
- { return (lock.load(std::memory_order_relaxed) & HOLDER) != 0; }
+ { return (lock.load(std::memory_order_acquire) & HOLDER) != 0; }
void init() { DBUG_ASSERT(!is_locked_or_waiting()); }
void destroy() { DBUG_ASSERT(!is_locked_or_waiting()); }
@@ -174,8 +174,7 @@ public:
{ return (readers.load(std::memory_order_relaxed) & WRITER) != 0; }
# ifndef DBUG_OFF
/** @return whether the lock is being held or waited for */
- bool is_vacant() const
- { return !is_locked() && !writer.is_locked_or_waiting(); }
+ bool is_vacant() const { return !is_locked_or_waiting(); }
# endif /* !DBUG_OFF */
bool rd_lock_try()
@@ -275,10 +274,18 @@ public:
}
/** @return whether an exclusive lock may be held by any thread */
bool is_write_locked() const noexcept
- { return readers.load(std::memory_order_relaxed) == WRITER; }
+ { return readers.load(std::memory_order_acquire) == WRITER; }
/** @return whether any lock may be held by any thread */
bool is_locked() const noexcept
- { return readers.load(std::memory_order_relaxed) != 0; }
+ { return readers.load(std::memory_order_acquire) != 0; }
+ /** @return whether any lock may be held by any thread */
+ bool is_locked_or_waiting() const noexcept
+ { return is_locked() || writer.is_locked_or_waiting(); }
+
+ void lock_shared() { rd_lock(); }
+ void unlock_shared() { rd_unlock(); }
+ void lock() { wr_lock(); }
+ void unlock() { wr_unlock(); }
#endif
};
@@ -291,29 +298,29 @@ class srw_lock_
friend srw_lock_impl<spinloop>;
# endif
# ifdef _WIN32
- SRWLOCK lock;
+ SRWLOCK lk;
# else
- rw_lock_t lock;
+ rw_lock_t lk;
# endif
void rd_wait();
void wr_wait();
public:
- void init() { IF_WIN(,my_rwlock_init(&lock, nullptr)); }
- void destroy() { IF_WIN(,rwlock_destroy(&lock)); }
+ void init() { IF_WIN(,my_rwlock_init(&lk, nullptr)); }
+ void destroy() { IF_WIN(,rwlock_destroy(&lk)); }
inline void rd_lock();
inline void wr_lock();
bool rd_lock_try()
- { return IF_WIN(TryAcquireSRWLockShared(&lock), !rw_tryrdlock(&lock)); }
+ { return IF_WIN(TryAcquireSRWLockShared(&lk), !rw_tryrdlock(&lk)); }
void rd_unlock()
- { IF_WIN(ReleaseSRWLockShared(&lock), rw_unlock(&lock)); }
+ { IF_WIN(ReleaseSRWLockShared(&lk), rw_unlock(&lk)); }
bool wr_lock_try()
- { return IF_WIN(TryAcquireSRWLockExclusive(&lock), !rw_trywrlock(&lock)); }
+ { return IF_WIN(TryAcquireSRWLockExclusive(&lk), !rw_trywrlock(&lk)); }
void wr_unlock()
- { IF_WIN(ReleaseSRWLockExclusive(&lock), rw_unlock(&lock)); }
+ { IF_WIN(ReleaseSRWLockExclusive(&lk), rw_unlock(&lk)); }
#ifdef _WIN32
/** @return whether any lock may be held by any thread */
- bool is_locked_or_waiting() const noexcept { return (size_t&)(lock) != 0; }
+ bool is_locked_or_waiting() const noexcept { return (size_t&)(lk) != 0; }
/** @return whether any lock may be held by any thread */
bool is_locked() const noexcept { return is_locked_or_waiting(); }
/** @return whether an exclusive lock may be held by any thread */
@@ -322,6 +329,11 @@ public:
// FIXME: this returns false positives for shared locks
return is_locked();
}
+
+ void lock_shared() { rd_lock(); }
+ void unlock_shared() { rd_unlock(); }
+ void lock() { wr_lock(); }
+ void unlock() { wr_unlock(); }
#endif
};
@@ -330,10 +342,10 @@ template<> void srw_lock_<true>::wr_wait();
template<>
inline void srw_lock_<false>::rd_lock()
-{ IF_WIN(AcquireSRWLockShared(&lock), rw_rdlock(&lock)); }
+{ IF_WIN(AcquireSRWLockShared(&lk), rw_rdlock(&lk)); }
template<>
inline void srw_lock_<false>::wr_lock()
-{ IF_WIN(AcquireSRWLockExclusive(&lock), rw_wrlock(&lock)); }
+{ IF_WIN(AcquireSRWLockExclusive(&lk), rw_wrlock(&lk)); }
template<>
inline void srw_lock_<true>::rd_lock() { if (!rd_lock_try()) rd_wait(); }
@@ -491,6 +503,15 @@ public:
}
bool rd_lock_try() { return lock.rd_lock_try(); }
bool wr_lock_try() { return lock.wr_lock_try(); }
+#ifndef SUX_LOCK_GENERIC
+ /** @return whether any lock may be held by any thread */
+ bool is_locked_or_waiting() const noexcept
+ { return lock.is_locked_or_waiting(); }
+ /** @return whether an exclusive lock may be held by any thread */
+ bool is_locked() const noexcept { return lock.is_locked(); }
+ /** @return whether an exclusive lock may be held by any thread */
+ bool is_write_locked() const noexcept { return lock.is_write_locked(); }
+#endif
};
typedef srw_lock_impl<false> srw_lock;
diff --git a/storage/innobase/include/transactional_lock_guard.h b/storage/innobase/include/transactional_lock_guard.h
new file mode 100644
index 00000000000..7ece27638fc
--- /dev/null
+++ b/storage/innobase/include/transactional_lock_guard.h
@@ -0,0 +1,167 @@
+/*****************************************************************************
+
+Copyright (c) 2021, MariaDB Corporation.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+#pragma once
+
+#if defined __powerpc64__ && defined __clang__ && defined __linux__
+#elif defined __powerpc64__&&defined __GNUC__&&defined __linux__&&__GNUC__ > 4
+#elif defined _MSC_VER && (defined _M_IX86 || defined _M_X64)
+#elif defined __GNUC__ && (defined __i386__ || defined __x86_64__)
+# if __GNUC__ >= 8
+# elif defined __clang_major__ && __clang_major__ > 6
+# else
+# define NO_ELISION
+# endif
+#else /* Transactional memory has not been implemented for this ISA */
+# define NO_ELISION
+#endif
+
+#ifdef NO_ELISION
+constexpr bool have_transactional_memory= false;
+# ifdef UNIV_DEBUG
+static inline bool xtest() { return false; }
+# endif
+# define TRANSACTIONAL_TARGET /* nothing */
+# define TRANSACTIONAL_INLINE /* nothing */
+#else
+# if defined __i386__||defined __x86_64__||defined _M_IX86||defined _M_X64
+extern bool have_transactional_memory;
+bool transactional_lock_enabled();
+
+# include <immintrin.h>
+# if defined __GNUC__ && !defined __INTEL_COMPILER
+# define TRANSACTIONAL_TARGET __attribute__((target("rtm")))
+# define TRANSACTIONAL_INLINE __attribute__((target("rtm"),always_inline))
+# else
+# define TRANSACTIONAL_TARGET /* nothing */
+# define TRANSACTIONAL_INLINE /* nothing */
+# endif
+
+TRANSACTIONAL_INLINE static inline bool xbegin()
+{
+ return have_transactional_memory && _xbegin() == _XBEGIN_STARTED;
+}
+
+# ifdef UNIV_DEBUG
+# ifdef __GNUC__
+/** @return whether a memory transaction is active */
+bool xtest();
+# else
+static inline bool xtest() { return have_transactional_memory && _xtest(); }
+# endif
+# endif
+
+TRANSACTIONAL_INLINE static inline void xabort() { _xabort(0); }
+
+TRANSACTIONAL_INLINE static inline void xend() { _xend(); }
+# elif defined __powerpc64__
+# include <htmxlintrin.h>
+extern bool have_transactional_memory;
+bool transactional_lock_enabled();
+# define TRANSACTIONAL_TARGET __attribute__((target("htm")))
+# define TRANSACTIONAL_INLINE __attribute__((target("htm"),always_inline))
+
+TRANSACTIONAL_INLINE static inline bool xbegin()
+{
+ return have_transactional_memory &&
+ __TM_simple_begin() == _HTM_TBEGIN_STARTED;
+}
+
+# ifdef UNIV_DEBUG
+bool xtest();
+# endif
+
+TRANSACTIONAL_INLINE static inline void xabort() { __TM_abort(); }
+
+TRANSACTIONAL_INLINE static inline void xend() { __TM_end(); }
+# endif
+#endif
+
+template<class mutex>
+class transactional_lock_guard
+{
+ mutex &m;
+
+public:
+ TRANSACTIONAL_INLINE transactional_lock_guard(mutex &m) : m(m)
+ {
+#ifndef NO_ELISION
+ if (xbegin())
+ {
+ if (was_elided())
+ return;
+ xabort();
+ }
+#endif
+ m.lock();
+ }
+ transactional_lock_guard(const transactional_lock_guard &)= delete;
+ TRANSACTIONAL_INLINE ~transactional_lock_guard()
+ {
+#ifndef NO_ELISION
+ if (was_elided()) xend(); else
+#endif
+ m.unlock();
+ }
+
+#ifndef NO_ELISION
+ bool was_elided() const noexcept { return !m.is_locked_or_waiting(); }
+#else
+ bool was_elided() const noexcept { return false; }
+#endif
+};
+
+template<class mutex>
+class transactional_shared_lock_guard
+{
+ mutex &m;
+#ifndef NO_ELISION
+ bool elided;
+#else
+ static constexpr bool elided= false;
+#endif
+
+public:
+ TRANSACTIONAL_INLINE transactional_shared_lock_guard(mutex &m) : m(m)
+ {
+#ifndef NO_ELISION
+ if (xbegin())
+ {
+ if (!m.is_write_locked())
+ {
+ elided= true;
+ return;
+ }
+ xabort();
+ }
+ elided= false;
+#endif
+ m.lock_shared();
+ }
+ transactional_shared_lock_guard(const transactional_shared_lock_guard &)=
+ delete;
+ TRANSACTIONAL_INLINE ~transactional_shared_lock_guard()
+ {
+#ifndef NO_ELISION
+ if (was_elided()) xend(); else
+#endif
+ m.unlock_shared();
+ }
+
+ bool was_elided() const noexcept { return elided; }
+};
diff --git a/storage/innobase/include/trx0trx.h b/storage/innobase/include/trx0trx.h
index 86e8b534f54..d2bf7075594 100644
--- a/storage/innobase/include/trx0trx.h
+++ b/storage/innobase/include/trx0trx.h
@@ -621,6 +621,9 @@ public:
== os_thread_get_curr_id());
mutex.wr_unlock();
}
+#ifndef SUX_LOCK_GENERIC
+ bool mutex_is_locked() const noexcept { return mutex.is_locked(); }
+#endif
#ifdef UNIV_DEBUG
/** @return whether the current thread holds the mutex */
bool mutex_is_owner() const
diff --git a/storage/innobase/lock/lock0lock.cc b/storage/innobase/lock/lock0lock.cc
index 33c827235be..234d215d1df 100644
--- a/storage/innobase/lock/lock0lock.cc
+++ b/storage/innobase/lock/lock0lock.cc
@@ -166,9 +166,7 @@ void lock_sys_t::assert_locked(const lock_t &lock) const
void lock_sys_t::assert_locked(const dict_table_t &table) const
{
ut_ad(!table.is_temporary());
-
- const os_thread_id_t current_thread= os_thread_get_curr_id();
- if (writer.load(std::memory_order_relaxed) == current_thread)
+ if (is_writer())
return;
ut_ad(readers);
ut_ad(table.lock_mutex_is_owner());
@@ -186,7 +184,7 @@ void lock_sys_t::hash_table::assert_locked(const page_id_t id) const
/** Assert that a hash table cell is exclusively latched (by some thread) */
void lock_sys_t::assert_locked(const hash_cell_t &cell) const
{
- if (lock_sys.is_writer())
+ if (is_writer())
return;
ut_ad(lock_sys.readers);
ut_ad(hash_table::latch(const_cast<hash_cell_t*>(&cell))->is_locked());
@@ -229,6 +227,28 @@ LockMultiGuard::~LockMultiGuard()
lock_sys.rd_unlock();
}
+TRANSACTIONAL_TARGET
+TMLockGuard::TMLockGuard(lock_sys_t::hash_table &hash, page_id_t id)
+{
+ const auto id_fold= id.fold();
+#if !defined NO_ELISION && !defined SUX_LOCK_GENERIC
+ if (xbegin())
+ {
+ if (lock_sys.latch.is_write_locked())
+ xabort();
+ cell_= hash.cell_get(id_fold);
+ if (hash.latch(cell_)->is_locked())
+ xabort();
+ elided= true;
+ return;
+ }
+ elided= false;
+#endif
+ lock_sys.rd_lock(SRW_LOCK_CALL);
+ cell_= hash.cell_get(id_fold);
+ hash.latch(cell_)->acquire();
+}
+
/** Pretty-print a table lock.
@param[in,out] file output stream
@param[in] lock table lock */
@@ -430,6 +450,8 @@ void lock_sys_t::rd_unlock()
void lock_sys_t::resize(ulint n_cells)
{
ut_ad(this == &lock_sys);
+ /* Buffer pool resizing is rarely initiated by the user, and this
+ would exceed the maximum size of a memory transaction. */
LockMutexGuard g{SRW_LOCK_CALL};
rec_hash.resize(n_cells);
prdt_hash.resize(n_cells);
@@ -893,7 +915,8 @@ void lock_wait_wsrep_kill(trx_t *bf_trx, ulong thd_id, trx_id_t trx_id);
/** Kill the holders of conflicting locks.
@param trx brute-force applier transaction running in the current thread */
-ATTRIBUTE_COLD ATTRIBUTE_NOINLINE static void lock_wait_wsrep(trx_t *trx)
+ATTRIBUTE_COLD ATTRIBUTE_NOINLINE
+static void lock_wait_wsrep(trx_t *trx)
{
DBUG_ASSERT(wsrep_on(trx->mysql_thd));
if (!wsrep_thd_is_BF(trx->mysql_thd, false))
@@ -1136,7 +1159,7 @@ lock_rec_create_low(
ulint n_bytes;
ut_d(lock_sys.hash_get(type_mode).assert_locked(page_id));
- ut_ad(holds_trx_mutex == trx->mutex_is_owner());
+ ut_ad(xtest() || holds_trx_mutex == trx->mutex_is_owner());
ut_ad(dict_index_is_clust(index) || !dict_index_is_online_ddl(index));
ut_ad(!(type_mode & LOCK_TABLE));
ut_ad(trx->state != TRX_STATE_NOT_STARTED);
@@ -1263,7 +1286,7 @@ lock_rec_enqueue_waiting(
ut_ad(dict_index_is_clust(index) || !dict_index_is_online_ddl(index));
trx_t* trx = thr_get_trx(thr);
- ut_ad(trx->mutex_is_owner());
+ ut_ad(xtest() || trx->mutex_is_owner());
ut_ad(!trx->dict_operation_lock_mode);
if (trx->mysql_thd && thd_lock_wait_timeout(trx->mysql_thd) == 0) {
@@ -1331,6 +1354,7 @@ can reuse a suitable record lock object already existing on the same page,
just setting the appropriate bit in its bitmap. This is a low-level function
which does NOT check for deadlocks or lock compatibility!
@return lock where the bit was set */
+TRANSACTIONAL_TARGET
static
void
lock_rec_add_to_queue(
@@ -1349,7 +1373,7 @@ lock_rec_add_to_queue(
transaction mutex */
{
ut_d(lock_sys.hash_get(type_mode).assert_locked(id));
- ut_ad(caller_owns_trx_mutex == trx->mutex_is_owner());
+ ut_ad(xtest() || caller_owns_trx_mutex == trx->mutex_is_owner());
ut_ad(index->is_primary()
|| dict_index_get_online_status(index) != ONLINE_INDEX_CREATION);
ut_ad(!(type_mode & LOCK_TABLE));
@@ -1422,9 +1446,11 @@ lock_rec_add_to_queue(
if (caller_owns_trx_mutex) {
trx->mutex_unlock();
}
- lock_trx->mutex_lock();
- lock_rec_set_nth_bit(lock, heap_no);
- lock_trx->mutex_unlock();
+ {
+ TMTrxGuard tg{*lock_trx};
+ lock_rec_set_nth_bit(lock, heap_no);
+ }
+
if (caller_owns_trx_mutex) {
trx->mutex_lock();
}
@@ -1637,6 +1663,8 @@ static void lock_wait_rpl_report(trx_t *trx)
if (!wait_lock)
return;
ut_ad(!(wait_lock->type_mode & LOCK_AUTO_INC));
+ /* This would likely be too large to attempt to use a memory transaction,
+ even for wait_lock->is_table(). */
if (!lock_sys.wr_lock_try())
{
mysql_mutex_unlock(&lock_sys.wait_mutex);
@@ -1833,13 +1861,13 @@ dberr_t lock_wait(que_thr_t *thr)
if (row_lock_wait)
lock_sys.wait_resume(trx->mysql_thd, suspend_time, my_hrtime_coarse());
-end_wait:
if (lock_t *lock= trx->lock.wait_lock)
{
- lock_sys_t::cancel(trx, lock, false);
+ lock_sys_t::cancel<false>(trx, lock);
lock_sys.deadlock_check();
}
+end_wait:
mysql_mutex_unlock(&lock_sys.wait_mutex);
thd_wait_end(trx->mysql_thd);
@@ -1993,6 +2021,7 @@ static void lock_rec_dequeue_from_page(lock_t *in_lock, bool owns_wait_mutex)
/** Remove a record lock request, waiting or granted, on a discarded page
@param hash hash table
@param in_lock lock object */
+TRANSACTIONAL_TARGET
void lock_rec_discard(lock_sys_t::hash_table &lock_hash, lock_t *in_lock)
{
ut_ad(!in_lock->is_table());
@@ -2000,13 +2029,15 @@ void lock_rec_discard(lock_sys_t::hash_table &lock_hash, lock_t *in_lock)
HASH_DELETE(lock_t, hash, &lock_hash,
in_lock->un_member.rec_lock.page_id.fold(), in_lock);
- trx_t *trx= in_lock->trx;
- trx->mutex_lock();
- ut_d(auto old_locks=)
- in_lock->index->table->n_rec_locks--;
+ ut_d(uint32_t old_locks);
+ {
+ trx_t *trx= in_lock->trx;
+ TMTrxGuard tg{*trx};
+ ut_d(old_locks=)
+ in_lock->index->table->n_rec_locks--;
+ UT_LIST_REMOVE(trx->lock.trx_locks, in_lock);
+ }
ut_ad(old_locks);
- UT_LIST_REMOVE(trx->lock.trx_locks, in_lock);
- trx->mutex_unlock();
MONITOR_INC(MONITOR_RECLOCK_REMOVED);
MONITOR_DEC(MONITOR_NUM_RECLOCK);
}
@@ -2030,11 +2061,15 @@ lock_rec_free_all_from_discard_page(page_id_t id, const hash_cell_t &cell,
}
}
-/** Discard locks for an index */
-void lock_discard_for_index(const dict_index_t &index)
+/** Discard locks for an index when purging DELETE FROM SYS_INDEXES
+after an aborted CREATE INDEX operation.
+@param index a stale index on which ADD INDEX operation was aborted */
+ATTRIBUTE_COLD void lock_discard_for_index(const dict_index_t &index)
{
ut_ad(!index.is_committed());
- lock_sys.wr_lock(SRW_LOCK_CALL);
+ /* This is very rarely executed code, and the size of the hash array
+ would exceed the maximum size of a memory transaction. */
+ LockMutexGuard g{SRW_LOCK_CALL};
const ulint n= lock_sys.rec_hash.pad(lock_sys.rec_hash.n_cells);
for (ulint i= 0; i < n; i++)
{
@@ -2052,7 +2087,6 @@ void lock_discard_for_index(const dict_index_t &index)
lock= lock->hash;
}
}
- lock_sys.wr_unlock();
}
/*============= RECORD LOCK MOVING AND INHERITING ===================*/
@@ -2060,6 +2094,7 @@ void lock_discard_for_index(const dict_index_t &index)
/*************************************************************//**
Resets the lock bits for a single record. Releases transactions waiting for
lock requests here. */
+TRANSACTIONAL_TARGET
static
void
lock_rec_reset_and_release_wait(const hash_cell_t &cell, const page_id_t id,
@@ -2072,10 +2107,8 @@ lock_rec_reset_and_release_wait(const hash_cell_t &cell, const page_id_t id,
lock_rec_cancel(lock);
else
{
- trx_t *lock_trx= lock->trx;
- lock_trx->mutex_lock();
+ TMTrxGuard tg{*lock->trx};
lock_rec_reset_nth_bit(lock, heap_no);
- lock_trx->mutex_unlock();
}
}
}
@@ -2157,6 +2190,7 @@ lock_rec_inherit_to_gap_if_gap_lock(
/*************************************************************//**
Moves the locks of a record to another record and resets the lock bits of
the donating record. */
+TRANSACTIONAL_TARGET
static
void
lock_rec_move(
@@ -2243,6 +2277,7 @@ Updates the lock table when we have reorganized a page. NOTE: we copy
also the locks set on the infimum of the page; the infimum may carry
locks if an update of a record is occurring on the page, and its locks
were temporarily stored on the infimum. */
+TRANSACTIONAL_TARGET
void
lock_move_reorganize_page(
/*======================*/
@@ -2260,12 +2295,14 @@ lock_move_reorganize_page(
const page_id_t id{block->page.id()};
const auto id_fold= id.fold();
{
- LockGuard g{lock_sys.rec_hash, id};
+ TMLockGuard g{lock_sys.rec_hash, id};
if (!lock_sys_t::get_first(g.cell(), id))
return;
}
- /* We will modify arbitrary trx->lock.trx_locks. */
+ /* We will modify arbitrary trx->lock.trx_locks.
+ Do not bother with a memory transaction; we are going
+ to allocate memory and copy a lot of data. */
LockMutexGuard g{SRW_LOCK_CALL};
hash_cell_t &cell= *lock_sys.rec_hash.cell_get(id_fold);
@@ -2348,10 +2385,10 @@ lock_move_reorganize_page(
}
trx_t *lock_trx= lock->trx;
- lock_trx->mutex_lock();
+ lock_trx->mutex_lock();
- /* Clear the bit in old_lock. */
- if (old_heap_no < lock->un_member.rec_lock.n_bits &&
+ /* Clear the bit in old_lock. */
+ if (old_heap_no < lock->un_member.rec_lock.n_bits &&
lock_rec_reset_nth_bit(lock, old_heap_no))
{
ut_ad(!page_rec_is_metadata(orec));
@@ -2362,7 +2399,7 @@ lock_move_reorganize_page(
new_heap_no, lock->index, lock_trx, true);
}
- lock_trx->mutex_unlock();
+ lock_trx->mutex_unlock();
if (new_heap_no == PAGE_HEAP_NO_SUPREMUM)
{
@@ -2389,6 +2426,7 @@ lock_move_reorganize_page(
/*************************************************************//**
Moves the explicit locks on user records to another page if a record
list end is moved to another page. */
+TRANSACTIONAL_TARGET
void
lock_move_rec_list_end(
/*===================*/
@@ -2405,6 +2443,7 @@ lock_move_rec_list_end(
const page_id_t id{block->page.id()};
const page_id_t new_id{new_block->page.id()};
{
+ /* This would likely be too large for a memory transaction. */
LockMultiGuard g{lock_sys.rec_hash, id, new_id};
/* Note: when we move locks from record to record, waiting locks
@@ -2504,6 +2543,7 @@ lock_move_rec_list_end(
/*************************************************************//**
Moves the explicit locks on user records to another page if a record
list start is moved to another page. */
+TRANSACTIONAL_TARGET
void
lock_move_rec_list_start(
/*=====================*/
@@ -2529,6 +2569,7 @@ lock_move_rec_list_start(
const page_id_t new_id{new_block->page.id()};
{
+ /* This would likely be too large for a memory transaction. */
LockMultiGuard g{lock_sys.rec_hash, id, new_id};
for (lock_t *lock= lock_sys_t::get_first(g.cell1(), id); lock;
@@ -2616,6 +2657,7 @@ lock_move_rec_list_start(
/*************************************************************//**
Moves the explicit locks on user records to another page if a record
list start is moved to another page. */
+TRANSACTIONAL_TARGET
void
lock_rtr_move_rec_list(
/*===================*/
@@ -2638,6 +2680,7 @@ lock_rtr_move_rec_list(
const page_id_t new_id{new_block->page.id()};
{
+ /* This would likely be too large for a memory transaction. */
LockMultiGuard g{lock_sys.rec_hash, id, new_id};
for (lock_t *lock= lock_sys_t::get_first(g.cell1(), id); lock;
@@ -2712,6 +2755,7 @@ lock_update_split_right(
const page_id_t l{left_block->page.id()};
const page_id_t r{right_block->page.id()};
+ /* This would likely be too large for a memory transaction. */
LockMultiGuard g{lock_sys.rec_hash, l, r};
/* Move the locks on the supremum of the left page to the supremum
@@ -2764,6 +2808,7 @@ lock_update_merge_right(
const page_id_t l{left_block->page.id()};
const page_id_t r{right_block->page.id()};
+ /* This would likely be too large for a memory transaction. */
LockMultiGuard g{lock_sys.rec_hash, l, r};
/* Inherit the locks from the supremum of the left page to the
@@ -2790,6 +2835,7 @@ to be updated. */
void lock_update_root_raise(const buf_block_t &block, const page_id_t root)
{
const page_id_t id{block.page.id()};
+ /* This would likely be too large for a memory transaction. */
LockMultiGuard g{lock_sys.rec_hash, id, root};
/* Move the locks on the supremum of the root to the supremum of block */
lock_rec_move(g.cell1(), block, id, g.cell2(), root,
@@ -2802,6 +2848,7 @@ void lock_update_root_raise(const buf_block_t &block, const page_id_t root)
void lock_update_copy_and_discard(const buf_block_t &new_block, page_id_t old)
{
const page_id_t id{new_block.page.id()};
+ /* This would likely be too large for a memory transaction. */
LockMultiGuard g{lock_sys.rec_hash, id, old};
/* Move the locks on the supremum of the old page to the supremum of new */
lock_rec_move(g.cell1(), new_block, id, g.cell2(), old,
@@ -2838,6 +2885,7 @@ void lock_update_merge_left(const buf_block_t& left, const rec_t *orig_pred,
const page_id_t l{left.page.id()};
+ /* This would likely be too large for a memory transaction. */
LockMultiGuard g{lock_sys.rec_hash, l, right};
const rec_t *left_next_rec= page_rec_get_next_const(orig_pred);
@@ -2883,6 +2931,7 @@ lock_rec_reset_and_inherit_gap_locks(
donating record */
{
const page_id_t heir{heir_block.page.id()};
+ /* This is a rare operation and likely too large for a memory transaction. */
LockMultiGuard g{lock_sys.rec_hash, heir, donor};
lock_rec_reset_and_release_wait(g.cell1(), heir, heir_heap_no);
lock_rec_inherit_to_gap(g.cell1(), heir, g.cell2(), donor, heir_block.frame,
@@ -2906,6 +2955,7 @@ lock_update_discard(
ulint heap_no;
const page_id_t heir(heir_block->page.id());
const page_id_t page_id(block->page.id());
+ /* This would likely be too large for a memory transaction. */
LockMultiGuard g{lock_sys.rec_hash, heir, page_id};
if (lock_sys_t::get_first(g.cell2(), page_id)) {
@@ -3379,6 +3429,37 @@ lock_table_other_has_incompatible(
return(NULL);
}
+/** Aqcuire or enqueue a table lock */
+static dberr_t lock_table_low(dict_table_t *table, lock_mode mode,
+ que_thr_t *thr, trx_t *trx)
+{
+ lock_t *wait_for=
+ lock_table_other_has_incompatible(trx, LOCK_WAIT, table, mode);
+ dberr_t err= DB_SUCCESS;
+
+ trx->mutex_lock();
+
+ if (wait_for)
+ err= lock_table_enqueue_waiting(mode, table, thr, wait_for);
+ else
+ lock_table_create(table, mode, trx, nullptr);
+
+ trx->mutex_unlock();
+
+ return err;
+}
+
+#ifdef WITH_WSREP
+/** Aqcuire or enqueue a table lock in Galera replication mode. */
+ATTRIBUTE_NOINLINE
+static dberr_t lock_table_wsrep(dict_table_t *table, lock_mode mode,
+ que_thr_t *thr, trx_t *trx)
+{
+ LockMutexGuard g{SRW_LOCK_CALL};
+ return lock_table_low(table, mode, thr, trx);
+}
+#endif
+
/*********************************************************************//**
Locks the specified database table in the mode given. If the lock cannot
be granted immediately, the query thread is put to wait.
@@ -3392,8 +3473,6 @@ lock_table(
que_thr_t* thr) /*!< in: query thread */
{
trx_t* trx;
- dberr_t err;
- lock_t* wait_for;
if (table->is_temporary()) {
return DB_SUCCESS;
@@ -3403,7 +3482,7 @@ lock_table(
/* Look for equal or stronger locks the same trx already
has on the table. No need to acquire LockMutexGuard here
- because only this transacton can add/access table locks
+ because only this transaction can add/access table locks
to/from trx_t::table_locks. */
if (lock_table_has(trx, table, mode) || srv_read_only_mode) {
@@ -3422,46 +3501,18 @@ lock_table(
trx_set_rw_mode(trx);
}
- err = DB_SUCCESS;
-
#ifdef WITH_WSREP
if (trx->is_wsrep()) {
- lock_sys.wr_lock(SRW_LOCK_CALL);
- } else {
- lock_sys.rd_lock(SRW_LOCK_CALL);
- table->lock_mutex_lock();
+ return lock_table_wsrep(table, mode, thr, trx);
}
-#else
+#endif
lock_sys.rd_lock(SRW_LOCK_CALL);
table->lock_mutex_lock();
-#endif
-
- /* We have to check if the new lock is compatible with any locks
- other transactions have in the table lock queue. */
-
- wait_for = lock_table_other_has_incompatible(
- trx, LOCK_WAIT, table, mode);
-
- trx->mutex_lock();
-
- if (wait_for) {
- err = lock_table_enqueue_waiting(mode, table, thr, wait_for);
- } else {
- lock_table_create(table, mode, trx, wait_for);
- }
-
-#ifdef WITH_WSREP
- if (trx->is_wsrep()) {
- lock_sys.wr_unlock();
- trx->mutex_unlock();
- return err;
- }
-#endif
+ dberr_t err = lock_table_low(table, mode, thr, trx);
table->lock_mutex_unlock();
lock_sys.rd_unlock();
- trx->mutex_unlock();
- return(err);
+ return err;
}
/** Create a table lock object for a resurrected transaction.
@@ -3477,6 +3528,8 @@ void lock_table_resurrect(dict_table_t *table, trx_t *trx, lock_mode mode)
return;
{
+ /* This is executed at server startup while no connections
+ are alowed. Do not bother with lock elision. */
LockMutexGuard g{SRW_LOCK_CALL};
ut_ad(!lock_table_other_has_incompatible(trx, LOCK_WAIT, table, mode));
@@ -3650,6 +3703,7 @@ dberr_t lock_sys_tables(trx_t *trx)
Removes a granted record lock of a transaction from the queue and grants
locks to other transactions waiting in the queue if they now are entitled
to a lock. */
+TRANSACTIONAL_TARGET
void
lock_rec_unlock(
/*============*/
@@ -3700,9 +3754,10 @@ lock_rec_unlock(
released:
ut_a(!lock->is_waiting());
- trx->mutex_lock();
- lock_rec_reset_nth_bit(lock, heap_no);
- trx->mutex_unlock();
+ {
+ TMTrxGuard tg{*trx};
+ lock_rec_reset_nth_bit(lock, heap_no);
+ }
/* Check if we can now grant waiting lock requests */
@@ -3730,7 +3785,7 @@ released:
/** Release the explicit locks of a committing transaction,
and release possible other transactions waiting because of these locks.
@return whether the operation succeeded */
-static bool lock_release_try(trx_t *trx)
+TRANSACTIONAL_TARGET static bool lock_release_try(trx_t *trx)
{
/* At this point, trx->lock.trx_locks cannot be modified by other
threads, because our transaction has been committed.
@@ -3746,6 +3801,10 @@ static bool lock_release_try(trx_t *trx)
bool all_released= true;
restart:
ulint count= 1000;
+ /* We will not attempt hardware lock elision (memory transaction)
+ here. Both lock_rec_dequeue_from_page() and lock_table_dequeue()
+ would likely lead to a memory transaction due to a system call, to
+ wake up a waiting transaction. */
lock_sys.rd_lock(SRW_LOCK_CALL);
trx->mutex_lock();
@@ -3824,6 +3883,8 @@ void lock_release(trx_t *trx)
/* Fall back to acquiring lock_sys.latch in exclusive mode */
restart:
count= 1000;
+ /* There is probably no point to try lock elision here;
+ in lock_release_try() it is different. */
lock_sys.wr_lock(SRW_LOCK_CALL);
trx->mutex_lock();
@@ -4002,10 +4063,14 @@ void lock_release_on_prepare(trx_t *trx)
}
/** Release locks on a table whose creation is being rolled back */
-ATTRIBUTE_COLD void lock_release_on_rollback(trx_t *trx, dict_table_t *table)
+ATTRIBUTE_COLD
+void lock_release_on_rollback(trx_t *trx, dict_table_t *table)
{
trx->mod_tables.erase(table);
+ /* This is very rarely executed code, in the rare case that an
+ CREATE TABLE operation is being rolled back. Theoretically,
+ we might try to remove the locks in multiple memory transactions. */
lock_sys.wr_lock(SRW_LOCK_CALL);
trx->mutex_lock();
@@ -4211,6 +4276,7 @@ http://bugs.mysql.com/36942 */
/*********************************************************************//**
Calculates the number of record lock structs in the record lock hash table.
@return number of record locks */
+TRANSACTIONAL_TARGET
static ulint lock_get_n_rec_locks()
{
ulint n_locks = 0;
@@ -4244,6 +4310,9 @@ lock_print_info_summary(
FILE* file, /*!< in: file where to print */
ibool nowait) /*!< in: whether to wait for lock_sys.latch */
{
+ /* Here, lock elision does not make sense, because
+ for the output we are going to invoke system calls,
+ which would interrupt a memory transaction. */
if (!nowait) {
lock_sys.wr_lock(SRW_LOCK_CALL);
} else if (!lock_sys.wr_lock_try()) {
@@ -4799,7 +4868,6 @@ static void lock_rec_block_validate(const page_id_t page_id)
}
}
-
static my_bool lock_validate_table_locks(rw_trx_hash_element_t *element, void*)
{
lock_sys.assert_locked();
@@ -4853,6 +4921,7 @@ be suspended for some reason; if not, then puts the transaction and
the query thread to the lock wait state and inserts a waiting request
for a gap x-lock to the lock queue.
@return DB_SUCCESS, DB_LOCK_WAIT, or DB_DEADLOCK */
+TRANSACTIONAL_TARGET
dberr_t
lock_rec_insert_check_and_lock(
/*===========================*/
@@ -5043,7 +5112,6 @@ static my_bool lock_rec_other_trx_holds_expl_callback(
@param[in] rec user record
@param[in] id page identifier
*/
-
static void lock_rec_other_trx_holds_expl(trx_t *caller_trx, trx_t *trx,
const rec_t *rec,
const page_id_t id)
@@ -5572,6 +5640,7 @@ static void lock_cancel_waiting_and_release(lock_t *lock)
trx->mutex_unlock();
}
#ifdef WITH_WSREP
+TRANSACTIONAL_TARGET
void lock_sys_t::cancel_lock_wait_for_trx(trx_t *trx)
{
lock_sys.wr_lock(SRW_LOCK_CALL);
@@ -5588,19 +5657,21 @@ void lock_sys_t::cancel_lock_wait_for_trx(trx_t *trx)
#endif /* WITH_WSREP */
/** Cancel a waiting lock request.
-@param lock waiting lock request
-@param trx active transaction
-@param check_victim whether to check trx->lock.was_chosen_as_deadlock_victim
+@tparam check_victim whether to check for DB_DEADLOCK
+@param lock waiting lock request
+@param trx active transaction
@retval DB_SUCCESS if no lock existed
@retval DB_DEADLOCK if trx->lock.was_chosen_as_deadlock_victim was set
@retval DB_LOCK_WAIT if the lock was canceled */
-dberr_t lock_sys_t::cancel(trx_t *trx, lock_t *lock, bool check_victim)
+template<bool check_victim>
+dberr_t lock_sys_t::cancel(trx_t *trx, lock_t *lock)
{
mysql_mutex_assert_owner(&lock_sys.wait_mutex);
ut_ad(trx->lock.wait_lock == lock);
ut_ad(trx->state == TRX_STATE_ACTIVE);
dberr_t err= DB_SUCCESS;
-
+ /* This would be too large for a memory transaction, except in the
+ DB_DEADLOCK case, which was already tested in lock_trx_handle_wait(). */
if (lock->is_table())
{
if (!lock_sys.rd_lock_try())
@@ -5697,7 +5768,7 @@ void lock_sys_t::cancel(trx_t *trx)
if (!trx->dict_operation)
{
trx->error_state= DB_INTERRUPTED;
- cancel(trx, lock, false);
+ cancel<false>(trx, lock);
}
}
lock_sys.deadlock_check();
@@ -5750,7 +5821,7 @@ dberr_t lock_trx_handle_wait(trx_t *trx)
if (trx->lock.was_chosen_as_deadlock_victim)
err= DB_DEADLOCK;
else if (lock_t *wait_lock= trx->lock.wait_lock)
- err= lock_sys_t::cancel(trx, wait_lock, true);
+ err= lock_sys_t::cancel<true>(trx, wait_lock);
lock_sys.deadlock_check();
mysql_mutex_unlock(&lock_sys.wait_mutex);
return err;
@@ -5799,13 +5870,27 @@ static my_bool lock_table_locks_lookup(rw_trx_hash_element_t *element,
/** Check if there are any locks on a table.
@return true if table has either table or record locks. */
+TRANSACTIONAL_TARGET
bool lock_table_has_locks(dict_table_t *table)
{
if (table->n_rec_locks)
return true;
- table->lock_mutex_lock();
- auto len= UT_LIST_GET_LEN(table->locks);
- table->lock_mutex_unlock();
+ ulint len;
+#if !defined NO_ELISION && !defined SUX_LOCK_GENERIC
+ if (xbegin())
+ {
+ if (table->lock_mutex_is_locked())
+ xabort();
+ len= UT_LIST_GET_LEN(table->locks);
+ xend();
+ }
+ else
+#endif
+ {
+ table->lock_mutex_lock();
+ len= UT_LIST_GET_LEN(table->locks);
+ table->lock_mutex_unlock();
+ }
if (len)
return true;
#ifdef UNIV_DEBUG
@@ -5985,7 +6070,7 @@ namespace Deadlock
static trx_t *report(trx_t *const trx, bool current_trx)
{
mysql_mutex_assert_owner(&lock_sys.wait_mutex);
- ut_ad(lock_sys.is_writer() == !current_trx);
+ ut_ad(xtest() || lock_sys.is_writer() == !current_trx);
/* Normally, trx should be a direct part of the deadlock
cycle. However, if innodb_deadlock_detect had been OFF in the
@@ -6018,6 +6103,9 @@ namespace Deadlock
undo_no_t victim_weight= ~0ULL;
unsigned victim_pos= 0, trx_pos= 0;
+ /* Here, lock elision does not make sense, because
+ for the output we are going to invoke system calls,
+ which would interrupt a memory transaction. */
if (current_trx && !lock_sys.wr_lock_try())
{
mysql_mutex_unlock(&lock_sys.wait_mutex);
@@ -6166,18 +6254,22 @@ static bool Deadlock::check_and_resolve(trx_t *trx)
return false;
if (lock_t *wait_lock= trx->lock.wait_lock)
- lock_sys_t::cancel(trx, wait_lock, false);
+ lock_sys_t::cancel<false>(trx, wait_lock);
lock_sys.deadlock_check();
return true;
}
/** Check for deadlocks while holding only lock_sys.wait_mutex. */
+TRANSACTIONAL_TARGET
void lock_sys_t::deadlock_check()
{
ut_ad(!is_writer());
mysql_mutex_assert_owner(&wait_mutex);
bool acquired= false;
+#if !defined NO_ELISION && !defined SUX_LOCK_GENERIC
+ bool elided= false;
+#endif
if (Deadlock::to_be_checked)
{
@@ -6186,7 +6278,16 @@ void lock_sys_t::deadlock_check()
auto i= Deadlock::to_check.begin();
if (i == Deadlock::to_check.end())
break;
- if (!acquired)
+ if (acquired);
+#if !defined NO_ELISION && !defined SUX_LOCK_GENERIC
+ else if (xbegin())
+ {
+ if (latch.is_locked_or_waiting())
+ xabort();
+ acquired= elided= true;
+ }
+#endif
+ else
{
acquired= wr_lock_try();
if (!acquired)
@@ -6206,6 +6307,10 @@ void lock_sys_t::deadlock_check()
Deadlock::to_be_checked= false;
}
ut_ad(Deadlock::to_check.empty());
+#if !defined NO_ELISION && !defined SUX_LOCK_GENERIC
+ if (elided)
+ return;
+#endif
if (acquired)
wr_unlock();
}
@@ -6225,6 +6330,7 @@ void lock_update_split_and_merge(
const page_id_t l{left_block->page.id()};
const page_id_t r{right_block->page.id()};
+ /* This would likely be too large for a memory transaction. */
LockMultiGuard g{lock_sys.rec_hash, l, r};
const rec_t *left_next_rec= page_rec_get_next_const(orig_pred);
ut_ad(!page_rec_is_metadata(left_next_rec));
diff --git a/storage/innobase/log/log0recv.cc b/storage/innobase/log/log0recv.cc
index d2e29cddadf..5822524bda9 100644
--- a/storage/innobase/log/log0recv.cc
+++ b/storage/innobase/log/log0recv.cc
@@ -2848,6 +2848,7 @@ func_exit:
/** Reads in pages which have hashed log records, from an area around a given
page number.
@param[in] page_id page id */
+TRANSACTIONAL_TARGET
static void recv_read_in_area(page_id_t page_id)
{
uint32_t page_nos[RECV_READ_AHEAD_AREA];
diff --git a/storage/innobase/row/row0ins.cc b/storage/innobase/row/row0ins.cc
index b67c1212271..9b2ea9db542 100644
--- a/storage/innobase/row/row0ins.cc
+++ b/storage/innobase/row/row0ins.cc
@@ -695,6 +695,7 @@ row_ins_set_detailed(
Acquires dict_foreign_err_mutex, rewinds dict_foreign_err_file
and displays information about the given transaction.
The caller must release dict_foreign_err_mutex. */
+TRANSACTIONAL_TARGET
static
void
row_ins_foreign_trx_print(
@@ -708,7 +709,7 @@ row_ins_foreign_trx_print(
ut_ad(!srv_read_only_mode);
{
- LockMutexGuard g{SRW_LOCK_CALL};
+ TMLockMutexGuard g{SRW_LOCK_CALL};
n_rec_locks = trx->lock.n_rec_locks;
n_trx_locks = UT_LIST_GET_LEN(trx->lock.trx_locks);
heap_size = mem_heap_get_size(trx->lock.lock_heap);
diff --git a/storage/innobase/srv/srv0srv.cc b/storage/innobase/srv/srv0srv.cc
index a26a862e1ab..d1fbd6c3cc2 100644
--- a/storage/innobase/srv/srv0srv.cc
+++ b/storage/innobase/srv/srv0srv.cc
@@ -69,6 +69,9 @@ Created 10/8/1995 Heikki Tuuri
#include "fil0pagecompress.h"
#include "trx0types.h"
#include <list>
+#include "log.h"
+
+#include "transactional_lock_guard.h"
#include <my_service_manager.h>
/* The following is the maximum allowed duration of a lock wait. */
@@ -693,13 +696,15 @@ srv_free(void)
/*********************************************************************//**
Boots the InnoDB server. */
-void
-srv_boot(void)
-/*==========*/
+void srv_boot()
{
- srv_thread_pool_init();
- trx_pool_init();
- srv_init();
+#ifndef NO_ELISION
+ if (transactional_lock_enabled())
+ sql_print_information("InnoDB: Using transactional memory");
+#endif
+ srv_thread_pool_init();
+ trx_pool_init();
+ srv_init();
}
/******************************************************************//**
diff --git a/storage/innobase/sync/srw_lock.cc b/storage/innobase/sync/srw_lock.cc
index 82f8d615477..b54191d91b0 100644
--- a/storage/innobase/sync/srw_lock.cc
+++ b/storage/innobase/sync/srw_lock.cc
@@ -19,6 +19,73 @@ this program; if not, write to the Free Software Foundation, Inc.,
#include "srw_lock.h"
#include "srv0srv.h"
#include "my_cpu.h"
+#include "transactional_lock_guard.h"
+
+#ifdef NO_ELISION
+#elif defined _MSC_VER && (defined _M_IX86 || defined _M_X64)
+# include <intrin.h>
+bool have_transactional_memory;
+bool transactional_lock_enabled()
+{
+ int regs[4];
+ __cpuid(regs, 0);
+ if (regs[0] < 7)
+ return false;
+ __cpuidex(regs, 7, 0);
+ /* Restricted Transactional Memory (RTM) */
+ have_transactional_memory= regs[1] & 1U << 11;
+ return have_transactional_memory;
+}
+#elif defined __GNUC__ && (defined __i386__ || defined __x86_64__)
+# include <cpuid.h>
+bool have_transactional_memory;
+bool transactional_lock_enabled()
+{
+ if (__get_cpuid_max(0, nullptr) < 7)
+ return false;
+ unsigned eax, ebx, ecx, edx;
+ __cpuid_count(7, 0, eax, ebx, ecx, edx);
+ /* Restricted Transactional Memory (RTM) */
+ have_transactional_memory= ebx & 1U << 11;
+ return have_transactional_memory;
+}
+
+# ifdef UNIV_DEBUG
+TRANSACTIONAL_TARGET
+bool xtest() { return have_transactional_memory && _xtest(); }
+# endif
+#elif defined __powerpc64__
+# ifdef __linux__
+# include <sys/auxv.h>
+
+# ifndef PPC_FEATURE2_HTM_NOSC
+# define PPC_FEATURE2_HTM_NOSC 0x01000000
+# endif
+# ifndef PPC_FEATURE2_HTM_NO_SUSPEND
+# define PPC_FEATURE2_HTM_NO_SUSPEND 0x00080000
+# endif
+
+# ifndef AT_HWCAP2
+# define AT_HWCAP2 26
+# endif
+# endif
+bool have_transactional_memory;
+bool transactional_lock_enabled()
+{
+# ifdef __linux__
+ return getauxval(AT_HWCAP2) &
+ (PPC_FEATURE2_HTM_NOSC | PPC_FEATURE2_HTM_NO_SUSPEND);
+# endif
+}
+
+# ifdef UNIV_DEBUG
+TRANSACTIONAL_TARGET bool xtest()
+{
+ return have_transactional_memory &&
+ _HTM_STATE (__builtin_ttest ()) == _HTM_TRANSACTIONAL;
+}
+# endif
+#endif
/** @return the parameter for srw_pause() */
static inline unsigned srw_pause_delay()
@@ -477,7 +544,7 @@ template<> void srw_lock_<true>::rd_wait()
return;
}
- IF_WIN(AcquireSRWLockShared(&lock), rw_rdlock(&lock));
+ IF_WIN(AcquireSRWLockShared(&lk), rw_rdlock(&lk));
}
template<> void srw_lock_<true>::wr_wait()
@@ -491,7 +558,7 @@ template<> void srw_lock_<true>::wr_wait()
return;
}
- IF_WIN(AcquireSRWLockExclusive(&lock), rw_wrlock(&lock));
+ IF_WIN(AcquireSRWLockExclusive(&lk), rw_wrlock(&lk));
}
#endif
diff --git a/storage/innobase/trx/trx0purge.cc b/storage/innobase/trx/trx0purge.cc
index 3dd1b093cf6..9d9f9057628 100644
--- a/storage/innobase/trx/trx0purge.cc
+++ b/storage/innobase/trx/trx0purge.cc
@@ -77,7 +77,7 @@ TrxUndoRsegsIterator::TrxUndoRsegsIterator()
/** Sets the next rseg to purge in purge_sys.
Executed in the purge coordinator thread.
@return whether anything is to be purged */
-inline bool TrxUndoRsegsIterator::set_next()
+TRANSACTIONAL_INLINE inline bool TrxUndoRsegsIterator::set_next()
{
mysql_mutex_lock(&purge_sys.pq_mutex);
@@ -110,23 +110,38 @@ inline bool TrxUndoRsegsIterator::set_next()
purge_sys.rseg = *m_iter++;
mysql_mutex_unlock(&purge_sys.pq_mutex);
- purge_sys.rseg->latch.rd_lock();
- ut_a(purge_sys.rseg->last_page_no != FIL_NULL);
- ut_ad(purge_sys.rseg->last_trx_no() == m_rsegs.trx_no);
-
- /* We assume in purge of externally stored fields that space id is
- in the range of UNDO tablespace space ids */
+ /* We assume in purge of externally stored fields that space
+ id is in the range of UNDO tablespace space ids */
ut_ad(purge_sys.rseg->space->id == TRX_SYS_SPACE
|| srv_is_undo_tablespace(purge_sys.rseg->space->id));
- ut_a(purge_sys.tail.trx_no <= purge_sys.rseg->last_trx_no());
+ trx_id_t last_trx_no, tail_trx_no;
+ {
+#ifdef SUX_LOCK_GENERIC
+ purge_sys.rseg->latch.rd_lock();
+#else
+ transactional_shared_lock_guard<srw_spin_lock_low> rg
+ {purge_sys.rseg->latch};
+#endif
+ last_trx_no = purge_sys.rseg->last_trx_no();
+ tail_trx_no = purge_sys.tail.trx_no;
- purge_sys.tail.trx_no = purge_sys.rseg->last_trx_no();
- purge_sys.hdr_offset = purge_sys.rseg->last_offset();
- purge_sys.hdr_page_no = purge_sys.rseg->last_page_no;
+ purge_sys.tail.trx_no = last_trx_no;
+ purge_sys.hdr_offset = purge_sys.rseg->last_offset();
+ purge_sys.hdr_page_no = purge_sys.rseg->last_page_no;
+
+#ifdef SUX_LOCK_GENERIC
+ purge_sys.rseg->latch.rd_unlock();
+#endif
+ }
+
+ /* Only the purge coordinator task will access
+ purge_sys.rseg_iter or purge_sys.hdr_page_no. */
+ ut_ad(last_trx_no == m_rsegs.trx_no);
+ ut_a(purge_sys.hdr_page_no != FIL_NULL);
+ ut_a(tail_trx_no <= last_trx_no);
- purge_sys.rseg->latch.rd_unlock();
return(true);
}
@@ -550,7 +565,7 @@ __attribute__((optimize(0)))
Removes unnecessary history data from rollback segments. NOTE that when this
function is called, the caller must not have any latches on undo log pages!
*/
-static void trx_purge_truncate_history()
+TRANSACTIONAL_TARGET static void trx_purge_truncate_history()
{
ut_ad(purge_sys.head <= purge_sys.tail);
purge_sys_t::iterator &head= purge_sys.head.trx_no
@@ -617,12 +632,18 @@ static void trx_purge_truncate_history()
{
if (rseg.space != &space)
continue;
+#ifdef SUX_LOCK_GENERIC
rseg.latch.rd_lock();
+#else
+ transactional_shared_lock_guard<srw_spin_lock_low> g{rseg.latch};
+#endif
ut_ad(rseg.skip_allocation());
if (rseg.is_referenced())
{
not_free:
+#ifdef SUX_LOCK_GENERIC
rseg.latch.rd_unlock();
+#endif
return;
}
@@ -645,7 +666,9 @@ not_free:
goto not_free;
}
+#ifdef SUX_LOCK_GENERIC
rseg.latch.rd_unlock();
+#endif
}
ib::info() << "Truncating " << file->name;
@@ -938,10 +961,7 @@ Chooses the next undo log to purge and updates the info in purge_sys. This
function is used to initialize purge_sys when the next record to purge is
not known, and also to update the purge system info on the next record when
purge has handled the whole undo log for a transaction. */
-static
-void
-trx_purge_choose_next_log(void)
-/*===========================*/
+TRANSACTIONAL_TARGET static void trx_purge_choose_next_log()
{
ut_ad(!purge_sys.next_stored);
diff --git a/storage/innobase/trx/trx0rec.cc b/storage/innobase/trx/trx0rec.cc
index 08e05edb896..45bd36d9669 100644
--- a/storage/innobase/trx/trx0rec.cc
+++ b/storage/innobase/trx/trx0rec.cc
@@ -1948,16 +1948,30 @@ dberr_t trx_undo_report_rename(trx_t* trx, const dict_table_t* table)
return err;
}
-ATTRIBUTE_COLD ATTRIBUTE_NOINLINE
+TRANSACTIONAL_TARGET ATTRIBUTE_COLD ATTRIBUTE_NOINLINE
/** @return whether the transaction holds an exclusive lock on a table */
static bool trx_has_lock_x(const trx_t &trx, dict_table_t& table)
{
if (table.is_temporary())
return true;
- table.lock_mutex_lock();
- const auto n= table.n_lock_x_or_s;
- table.lock_mutex_unlock();
+ uint32_t n;
+
+#if !defined NO_ELISION && !defined SUX_LOCK_GENERIC
+ if (xbegin())
+ {
+ if (table.lock_mutex_is_locked())
+ xabort();
+ n= table.n_lock_x_or_s;
+ xend();
+ }
+ else
+#endif
+ {
+ table.lock_mutex_lock();
+ n= table.n_lock_x_or_s;
+ table.lock_mutex_unlock();
+ }
/* This thread is executing trx. No other thread can modify our table locks
(only record locks might be created, in an implicit-to-explicit conversion).
diff --git a/storage/innobase/trx/trx0trx.cc b/storage/innobase/trx/trx0trx.cc
index 61e1fd50cd5..18c93d5a8cc 100644
--- a/storage/innobase/trx/trx0trx.cc
+++ b/storage/innobase/trx/trx0trx.cc
@@ -456,7 +456,7 @@ void trx_t::free()
}
/** Transition to committed state, to release implicit locks. */
-inline void trx_t::commit_state()
+TRANSACTIONAL_INLINE inline void trx_t::commit_state()
{
ut_ad(state == TRX_STATE_PREPARED
|| state == TRX_STATE_PREPARED_RECOVERED
@@ -473,9 +473,8 @@ inline void trx_t::commit_state()
makes modifications to the database, will get an lsn larger than the
committing transaction T. In the case where the log flush fails, and
T never gets committed, also T2 will never get committed. */
- mutex.wr_lock();
+ TMTrxGuard tg{*this};
state= TRX_STATE_COMMITTED_IN_MEMORY;
- mutex.wr_unlock();
ut_ad(id || !is_referenced());
}
@@ -498,8 +497,7 @@ inline void trx_t::release_locks()
}
/** At shutdown, frees a transaction object. */
-void
-trx_free_at_shutdown(trx_t *trx)
+TRANSACTIONAL_TARGET void trx_free_at_shutdown(trx_t *trx)
{
ut_ad(trx->is_recovered);
ut_a(trx_state_eq(trx, TRX_STATE_PREPARED)
@@ -1228,7 +1226,7 @@ void trx_t::evict_table(table_id_t table_id, bool reset_only)
}
/** Mark a transaction committed in the main memory data structures. */
-inline void trx_t::commit_in_memory(const mtr_t *mtr)
+TRANSACTIONAL_INLINE inline void trx_t::commit_in_memory(const mtr_t *mtr)
{
must_flush_log_later= false;
read_view.close();
@@ -1395,7 +1393,7 @@ void trx_t::commit_cleanup()
/** Commit the transaction in a mini-transaction.
@param mtr mini-transaction (if there are any persistent modifications) */
-void trx_t::commit_low(mtr_t *mtr)
+TRANSACTIONAL_TARGET void trx_t::commit_low(mtr_t *mtr)
{
ut_ad(!mtr || mtr->is_active());
ut_d(bool aborted = in_rollback && error_state == DB_DEADLOCK);
@@ -1771,6 +1769,7 @@ trx_print_latched(
/**********************************************************************//**
Prints info about a transaction.
Acquires and releases lock_sys.latch. */
+TRANSACTIONAL_TARGET
void
trx_print(
/*======*/
@@ -1781,7 +1780,7 @@ trx_print(
{
ulint n_rec_locks, n_trx_locks, heap_size;
{
- LockMutexGuard g{SRW_LOCK_CALL};
+ TMLockMutexGuard g{SRW_LOCK_CALL};
n_rec_locks= trx->lock.n_rec_locks;
n_trx_locks= UT_LIST_GET_LEN(trx->lock.trx_locks);
heap_size= mem_heap_get_size(trx->lock.lock_heap);
@@ -1833,6 +1832,7 @@ static lsn_t trx_prepare_low(trx_t *trx)
/****************************************************************//**
Prepares a transaction. */
+TRANSACTIONAL_TARGET
static
void
trx_prepare(
@@ -1848,9 +1848,10 @@ trx_prepare(
DBUG_EXECUTE_IF("ib_trx_crash_during_xa_prepare_step", DBUG_SUICIDE(););
ut_a(trx->state == TRX_STATE_ACTIVE);
- trx->mutex_lock();
- trx->state = TRX_STATE_PREPARED;
- trx->mutex_unlock();
+ {
+ TMTrxGuard tg{*trx};
+ trx->state = TRX_STATE_PREPARED;
+ }
if (lsn) {
/* Depending on the my.cnf options, we may now write the log