diff options
author | Marko Mäkelä <marko.makela@mariadb.com> | 2022-08-26 11:41:43 +0300 |
---|---|---|
committer | Marko Mäkelä <marko.makela@mariadb.com> | 2022-08-26 11:41:43 +0300 |
commit | 0fbcb0a2b87d8807b85fec85507074bcda3d4da9 (patch) | |
tree | 50482c753fcae9d17bff9f3c67a0ce887af0bf30 | |
parent | 76bb671e422d958f7252f428b39e109369e2679d (diff) | |
download | mariadb-git-0fbcb0a2b87d8807b85fec85507074bcda3d4da9.tar.gz |
MDEV-29383 Assertion mysql_mutex_assert_owner(&log_sys.flush_order_mutex) failed in mtr_t::commit()
In commit 0b47c126e31cddda1e94588799599e138400bcf8 (MDEV-13542)
a few calls to mtr_t::memo_push() were moved before a write latch
on the page was acquired. This introduced a race condition:
1. is_block_dirtied() returned false to mtr_t::memo_push()
2. buf_page_t::write_complete() was executed, the block marked clean,
and a page latch released
3. The page latch was acquired by the caller of mtr_t::memo_push(),
and mtr_t::m_made_dirty was not set even though the block is in
a clean state.
The impact of this race condition is that crash recovery and backups
may fail.
btr_cur_latch_leaves(), btr_store_big_rec_extern_fields(),
btr_free_externally_stored_field(), trx_purge_free_segment():
Acquire the page latch before invoking mtr_t::memo_push().
This fixes the regression caused by MDEV-13542.
Side note: It would suffice to set mtr_t::m_made_dirty at the time
we set the MTR_MEMO_MODIFY flag for a block. Currently that flag is
unnecessarily set if a mini-transaction acquires a page latch on
a page that is in a clean state, and will not actually modify the block.
This may cause unnecessary acquisitions of log_sys.flush_order_mutex
on mtr_t::commit().
mtr_t::free(): If the block had been exclusively latched in this
mini-transaction, set the m_made_dirty flag so that the flush order mutex
will be acquired during mtr_t::commit(). This should have been part of
commit 4179f93d28035ea2798cb1c16feeaaef87ab4775 (MDEV-18976).
It was necessary to change mtr_t::free() so that
WriteOPT_PAGE_CHECKSUM::operator() would be able to avoid writing
checksums for freed pages.
-rw-r--r-- | storage/innobase/btr/btr0cur.cc | 10 | ||||
-rw-r--r-- | storage/innobase/mtr/mtr0mtr.cc | 6 | ||||
-rw-r--r-- | storage/innobase/trx/trx0purge.cc | 4 |
3 files changed, 12 insertions, 8 deletions
diff --git a/storage/innobase/btr/btr0cur.cc b/storage/innobase/btr/btr0cur.cc index 82430f7e7ab..e9b4e2937b9 100644 --- a/storage/innobase/btr/btr0cur.cc +++ b/storage/innobase/btr/btr0cur.cc @@ -278,10 +278,10 @@ latch_block: latch_leaves->blocks[1] = block; } - mtr->memo_push(block, MTR_MEMO_PAGE_X_FIX); - block->page.fix(); block->page.lock.x_lock(); + + mtr->memo_push(block, MTR_MEMO_PAGE_X_FIX); #ifdef BTR_CUR_HASH_ADAPT ut_ad(!btr_search_check_marked_free_index(block)); #endif @@ -7019,10 +7019,11 @@ btr_store_big_rec_extern_fields( mtr.start(); index->set_modified(mtr); mtr.set_log_mode_sub(*btr_mtr); - mtr.memo_push(rec_block, MTR_MEMO_PAGE_X_FIX); rec_block->page.fix(); rec_block->page.lock.x_lock(); + + mtr.memo_push(rec_block, MTR_MEMO_PAGE_X_FIX); #ifdef BTR_CUR_HASH_ADAPT ut_ad(!btr_search_check_marked_free_index(rec_block)); #endif @@ -7397,9 +7398,10 @@ skip_free: /* The buffer pool block containing the BLOB pointer is exclusively latched by local_mtr. To satisfy some design constraints, we must recursively latch it in mtr as well. */ - mtr.memo_push(block, MTR_MEMO_PAGE_X_FIX); block->fix(); block->page.lock.x_lock(); + + mtr.memo_push(block, MTR_MEMO_PAGE_X_FIX); #ifdef BTR_CUR_HASH_ADAPT ut_ad(!btr_search_check_marked_free_index(block)); #endif diff --git a/storage/innobase/mtr/mtr0mtr.cc b/storage/innobase/mtr/mtr0mtr.cc index 062eb650871..02965821ced 100644 --- a/storage/innobase/mtr/mtr0mtr.cc +++ b/storage/innobase/mtr/mtr0mtr.cc @@ -1599,8 +1599,10 @@ void mtr_t::free(const fil_space_t &space, uint32_t offset) if (is_logged()) { - m_memo.for_each_block_in_reverse - (CIterate<MarkFreed>((MarkFreed{{space.id, offset}}))); + CIterate<MarkFreed> mf{MarkFreed{{space.id, offset}}}; + m_memo.for_each_block_in_reverse(mf); + if (mf.functor.freed && !m_made_dirty) + m_made_dirty= is_block_dirtied(mf.functor.freed); m_log.close(log_write<FREE_PAGE>({space.id, offset}, nullptr)); } } diff --git a/storage/innobase/trx/trx0purge.cc b/storage/innobase/trx/trx0purge.cc index 31153c8e966..c0fafe1ec6b 100644 --- a/storage/innobase/trx/trx0purge.cc +++ b/storage/innobase/trx/trx0purge.cc @@ -399,11 +399,11 @@ static dberr_t trx_purge_free_segment(trx_rseg_t *rseg, fil_addr_t hdr_addr) mtr.commit(); mtr.start(); mtr.flag_modified(); - mtr.memo_push(rseg_hdr, MTR_MEMO_PAGE_X_FIX); - mtr.memo_push(block, MTR_MEMO_PAGE_X_MODIFY); rseg->latch.wr_lock(SRW_LOCK_CALL); rseg_hdr->page.lock.x_lock(); block->page.lock.x_lock(); + mtr.memo_push(rseg_hdr, MTR_MEMO_PAGE_X_FIX); + mtr.memo_push(block, MTR_MEMO_PAGE_X_MODIFY); } /* The page list may now be inconsistent, but the length field |