diff options
author | Marko Mäkelä <marko.makela@mariadb.com> | 2019-09-03 12:31:37 +0300 |
---|---|---|
committer | Marko Mäkelä <marko.makela@mariadb.com> | 2019-09-04 09:42:38 +0300 |
commit | b07beff8943d083cf900c8f2ea0a386a84d9f8db (patch) | |
tree | 388665f25f2c818a4efa057c9d9048558ddfe27d /storage/innobase/lock/lock0lock.cc | |
parent | 7c79c127842133d1f85ac2273a229d84007075c9 (diff) | |
download | mariadb-git-b07beff8943d083cf900c8f2ea0a386a84d9f8db.tar.gz |
MDEV-15326: InnoDB: Failing assertion: !other_lock
MySQL 5.7.9 (and MariaDB 10.2.2) introduced a race condition
between InnoDB transaction commit and the conversion of implicit
locks into explicit ones.
The assertion failure can be triggered with a test that runs
3 concurrent single-statement transactions in a loop on a simple
table:
CREATE TABLE t (a INT PRIMARY KEY) ENGINE=InnoDB;
thread1: INSERT INTO t SET a=1;
thread2: DELETE FROM t;
thread3: SELECT * FROM t FOR UPDATE; -- or DELETE FROM t;
The failure scenarios are like the following:
(1) The INSERT statement is being committed, waiting for lock_sys->mutex.
(2) At the time of the failure, both the DELETE and SELECT transactions
are active but have not logged any changes yet.
(3) The transaction where the !other_lock assertion fails started
lock_rec_convert_impl_to_expl().
(4) After this point, the commit of the INSERT removed the transaction from
trx_sys->rw_trx_set, in trx_erase_lists().
(5) The other transaction consulted trx_sys->rw_trx_set and determined
that there is no implicit lock. Hence, it grabbed the lock.
(6) The !other_lock assertion fails in lock_rec_add_to_queue()
for the lock_rec_convert_impl_to_expl(), because the lock was 'stolen'.
This assertion failure looks genuine, because the INSERT transaction
is still active (trx->state=TRX_STATE_ACTIVE).
The problematic step (4) was introduced in
mysql/mysql-server@e27e0e0bb75b4d35e87059816f1cc370c09890ad
which fixed something related to MVCC (covered by the test
innodb.innodb-read-view). Basically, it reintroduced an error
that had been mentioned in an earlier commit
mysql/mysql-server@a17be6963fc0d9210fa0642d3985b7219cdaf0c5:
"The active transaction was removed from trx_sys->rw_trx_set prematurely."
Our fix goes along the following lines:
(a) Implicit locks will released by assigning
trx->state=TRX_STATE_COMMITTED_IN_MEMORY as the first step.
This transition will no longer be protected by lock_sys_t::mutex,
only by trx->mutex. This idea is by Sergey Vojtovich.
(b) We detach the transaction from trx_sys before starting to release
explicit locks.
(c) All callers of trx_rw_is_active() and trx_rw_is_active_low() must
recheck trx->state after acquiring trx->mutex.
(d) Before releasing any explicit locks, we will ensure that any activity
by other threads to convert implicit locks into explicit will have ceased,
by checking !trx_is_referenced(trx). There was a glitch
in this check when it was part of lock_trx_release_locks(); at the end
we would release trx->mutex and acquire lock_sys->mutex and trx->mutex,
and fail to recheck (trx_is_referenced() is protected by trx_t::mutex).
(e) Explicit locks can be released in batches (LOCK_RELEASE_INTERVAL=1000)
just like we did before.
trx_t::state: Document that the transition to COMMITTED is only
protected by trx_t::mutex, no longer by lock_sys_t::mutex.
trx_rw_is_active_low(), trx_rw_is_active(): Document that the transaction
state should be rechecked after acquiring trx_t::mutex.
trx_t::commit_state(): New function to change a transaction to committed
state, to release implicit locks.
trx_t::release_locks(): New function to release the explicit locks
after commit_state().
lock_trx_release_locks(): Move much of the logic to the caller
(which must invoke trx_t::commit_state() and trx_t::release_locks()
as needed), and assert that the transaction will have locks.
trx_get_trx_by_xid(): Make the parameter a pointer to const.
lock_rec_other_trx_holds_expl(): Recheck trx->state after acquiring
trx->mutex, and avoid a redundant lookup of the transaction.
lock_rec_queue_validate(): Recheck impl_trx->state while holding
impl_trx->mutex.
row_vers_impl_x_locked(), row_vers_impl_x_locked_low():
Document that the transaction state must be rechecked after
trx_mutex_enter().
trx_free_prepared(): Adjust for the changes to lock_trx_release_locks().
Diffstat (limited to 'storage/innobase/lock/lock0lock.cc')
-rw-r--r-- | storage/innobase/lock/lock0lock.cc | 229 |
1 files changed, 63 insertions, 166 deletions
diff --git a/storage/innobase/lock/lock0lock.cc b/storage/innobase/lock/lock0lock.cc index 8634f7e73c5..49c0e1da743 100644 --- a/storage/innobase/lock/lock0lock.cc +++ b/storage/innobase/lock/lock0lock.cc @@ -1273,30 +1273,31 @@ lock_rec_other_trx_holds_expl( trx_t* holds = NULL; lock_mutex_enter(); + mutex_enter(&trx_sys->mutex); + trx_mutex_enter(trx); - if (trx_t* impl_trx = trx_rw_is_active(trx->id, NULL, false)) { - ulint heap_no = page_rec_get_heap_no(rec); - mutex_enter(&trx_sys->mutex); + ut_ad(!trx_state_eq(trx, TRX_STATE_NOT_STARTED)); + if (!trx_state_eq(trx, TRX_STATE_COMMITTED_IN_MEMORY)) { + const ulint heap_no = page_rec_get_heap_no(rec); for (trx_t* t = UT_LIST_GET_FIRST(trx_sys->rw_trx_list); t != NULL; t = UT_LIST_GET_NEXT(trx_list, t)) { lock_t* expl_lock = lock_rec_has_expl( precise_mode, block, heap_no, t); - - if (expl_lock && expl_lock->trx != impl_trx) { + if (expl_lock && expl_lock->trx != trx) { /* An explicit lock is held by trx other than the trx holding the implicit lock. */ holds = expl_lock->trx; break; } } - - mutex_exit(&trx_sys->mutex); } lock_mutex_exit(); + mutex_exit(&trx_sys->mutex); + trx_mutex_exit(trx); return(holds); } @@ -5107,7 +5108,7 @@ lock_trx_table_locks_find( { bool found = false; - trx_mutex_enter(trx); + ut_ad(trx_mutex_own(trx)); for (lock_list::const_iterator it = trx->lock.table_locks.begin(), end = trx->lock.table_locks.end(); it != end; ++it) { @@ -5130,8 +5131,6 @@ lock_trx_table_locks_find( ut_a(lock->un_member.tab_lock.table != NULL); } - trx_mutex_exit(trx); - return(found); } @@ -5155,21 +5154,20 @@ lock_table_queue_validate( /* lock->trx->state cannot change from or to NOT_STARTED while we are holding the trx_sys->mutex. It may change - from ACTIVE to PREPARED, but it may not change to - COMMITTED, because we are holding the lock_sys->mutex. */ + from ACTIVE or PREPARED to PREPARED or COMMITTED. */ + trx_mutex_enter(lock->trx); ut_ad(trx_assert_started(lock->trx)); - - if (!lock_get_wait(lock)) { - + if (trx_state_eq(lock->trx, TRX_STATE_COMMITTED_IN_MEMORY)) { + } else if (!lock_get_wait(lock)) { ut_a(!lock_table_other_has_incompatible( lock->trx, 0, table, lock_get_mode(lock))); } else { - ut_a(lock_table_has_to_wait_in_queue(lock)); } ut_a(lock_trx_table_locks_find(lock->trx, lock)); + trx_mutex_exit(lock->trx); } return(TRUE); @@ -5191,7 +5189,6 @@ lock_rec_queue_validate( const dict_index_t* index, /*!< in: index, or NULL if not known */ const ulint* offsets)/*!< in: rec_get_offsets(rec, index) */ { - const trx_t* impl_trx; const lock_t* lock; ulint heap_no; @@ -5217,40 +5214,34 @@ lock_rec_queue_validate( lock != NULL; lock = lock_rec_get_next_const(heap_no, lock)) { - ut_ad(!trx_is_ac_nl_ro(lock->trx)); + ut_ad(!index || lock->index == index); - if (lock_get_wait(lock)) { - ut_a(lock_rec_has_to_wait_in_queue(lock)); - } - - if (index != NULL) { - ut_a(lock->index == index); - } + trx_mutex_enter(lock->trx); + ut_ad(!trx_is_ac_nl_ro(lock->trx)); + ut_ad(trx_state_eq(lock->trx, + TRX_STATE_COMMITTED_IN_MEMORY) + || !lock_get_wait(lock) + || lock_rec_has_to_wait_in_queue(lock)); + trx_mutex_exit(lock->trx); } goto func_exit; } ut_ad(page_rec_is_leaf(rec)); + ut_ad(lock_mutex_own()); - if (index == NULL) { - + if (!index || !index->is_primary()) { /* Nothing we can do */ - - } else if (dict_index_is_clust(index)) { - trx_id_t trx_id; - - /* Unlike the non-debug code, this invariant can only succeed - if the check and assertion are covered by the lock mutex. */ - - trx_id = lock_clust_rec_some_has_impl(rec, index, offsets); - impl_trx = trx_rw_is_active_low(trx_id, NULL); - - ut_ad(lock_mutex_own()); - /* impl_trx cannot be committed until lock_mutex_exit() - because lock_trx_release_locks() acquires lock_sys->mutex */ - - if (!impl_trx) { + } else if (trx_t* impl_trx = trx_rw_is_active_low( + lock_clust_rec_some_has_impl(rec, index, offsets), + NULL)) { + /* impl_trx could have been committed before we + acquire its mutex, but not thereafter. */ + + mutex_enter(&impl_trx->mutex); + ut_ad(impl_trx->state != TRX_STATE_NOT_STARTED); + if (impl_trx->state == TRX_STATE_COMMITTED_IN_MEMORY) { } else if (const lock_t* other_lock = lock_rec_other_has_expl_req( LOCK_S, block, true, heap_no, @@ -5292,6 +5283,8 @@ lock_rec_queue_validate( ut_ad(lock_rec_has_expl(LOCK_X | LOCK_REC_NOT_GAP, block, heap_no, impl_trx)); } + + mutex_exit(&impl_trx->mutex); } for (lock = lock_rec_get_first(lock_sys->rec_hash, block, heap_no); @@ -5779,29 +5772,23 @@ lock_rec_convert_impl_to_expl_for_trx( trx_t* trx, /*!< in/out: active transaction */ ulint heap_no)/*!< in: rec heap number to lock */ { - ut_ad(trx_is_referenced(trx)); - DEBUG_SYNC_C("before_lock_rec_convert_impl_to_expl_for_trx"); lock_mutex_enter(); - + trx_mutex_enter(trx); ut_ad(!trx_state_eq(trx, TRX_STATE_NOT_STARTED)); if (!trx_state_eq(trx, TRX_STATE_COMMITTED_IN_MEMORY) && !lock_rec_has_expl(LOCK_X | LOCK_REC_NOT_GAP, block, heap_no, trx)) { - - ulint type_mode; - - type_mode = (LOCK_REC | LOCK_X | LOCK_REC_NOT_GAP); - - lock_rec_add_to_queue( - type_mode, block, heap_no, index, trx, FALSE); + lock_rec_add_to_queue(LOCK_REC | LOCK_X | LOCK_REC_NOT_GAP, + block, heap_no, index, trx, true); } lock_mutex_exit(); - - trx_release_reference(trx); + ut_ad(trx->n_ref > 0); + --trx->n_ref; + trx_mutex_exit(trx); DEBUG_SYNC_C("after_lock_rec_convert_impl_to_expl_for_trx"); } @@ -6541,118 +6528,24 @@ lock_unlock_table_autoinc( } } -/*********************************************************************//** -Releases a transaction's locks, and releases possible other transactions -waiting because of these locks. Change the state of the transaction to -TRX_STATE_COMMITTED_IN_MEMORY. */ -void -lock_trx_release_locks( -/*===================*/ - trx_t* trx) /*!< in/out: transaction */ +/** Release the explicit locks of a committing transaction, +and release possible other transactions waiting because of these locks. */ +void lock_trx_release_locks(trx_t* trx) { - check_trx_state(trx); - - ut_ad(trx_state_eq(trx, TRX_STATE_ACTIVE) - || trx_state_eq(trx, TRX_STATE_PREPARED) - || trx_state_eq(trx, TRX_STATE_PREPARED_RECOVERED) - || (trx_state_eq(trx, TRX_STATE_COMMITTED_IN_MEMORY) - && trx->is_recovered - && !UT_LIST_GET_LEN(trx->lock.trx_locks))); - - bool release_lock; - - release_lock = (UT_LIST_GET_LEN(trx->lock.trx_locks) > 0); - - /* Don't take lock_sys mutex if trx didn't acquire any lock. */ - if (release_lock) { - - /* The transition of trx->state to TRX_STATE_COMMITTED_IN_MEMORY - is protected by both the lock_sys->mutex and the trx->mutex. */ - lock_mutex_enter(); - } - - trx_mutex_enter(trx); - - /* The following assignment makes the transaction committed in memory - and makes its changes to data visible to other transactions. - NOTE that there is a small discrepancy from the strict formal - visibility rules here: a human user of the database can see - modifications made by another transaction T even before the necessary - log segment has been flushed to the disk. If the database happens to - crash before the flush, the user has seen modifications from T which - will never be a committed transaction. However, any transaction T2 - which sees the modifications of the committing transaction T, and - which also itself makes modifications to the database, will get an lsn - larger than the committing transaction T. In the case where the log - flush fails, and T never gets committed, also T2 will never get - committed. */ - - /*--------------------------------------*/ - trx->state = TRX_STATE_COMMITTED_IN_MEMORY; - /*--------------------------------------*/ - - if (trx_is_referenced(trx)) { - - ut_a(release_lock); - - lock_mutex_exit(); - - while (trx_is_referenced(trx)) { - - trx_mutex_exit(trx); - - DEBUG_SYNC_C("waiting_trx_is_not_referenced"); - - /** Doing an implicit to explicit conversion - should not be expensive. */ - ut_delay(ut_rnd_interval(0, srv_spin_wait_delay)); - - trx_mutex_enter(trx); - } - - trx_mutex_exit(trx); - - lock_mutex_enter(); - - trx_mutex_enter(trx); - } - - ut_ad(!trx_is_referenced(trx)); - - /* If the background thread trx_rollback_or_clean_recovered() - is still active then there is a chance that the rollback - thread may see this trx as COMMITTED_IN_MEMORY and goes ahead - to clean it up calling trx_cleanup_at_db_startup(). This can - happen in the case we are committing a trx here that is left - in PREPARED state during the crash. Note that commit of the - rollback of a PREPARED trx happens in the recovery thread - while the rollback of other transactions happen in the - background thread. To avoid this race we unconditionally unset - the is_recovered flag. */ - - trx->is_recovered = false; - - trx_mutex_exit(trx); - - if (release_lock) { - - lock_release(trx); - - lock_mutex_exit(); - } + ut_ad(UT_LIST_GET_LEN(trx->lock.trx_locks)); + lock_mutex_enter(); + lock_release(trx); trx->lock.n_rec_locks = 0; - /* We don't remove the locks one by one from the vector for efficiency reasons. We simply reset it because we would have released all the locks anyway. */ trx->lock.table_locks.clear(); - ut_a(UT_LIST_GET_LEN(trx->lock.trx_locks) == 0); - ut_a(ib_vector_is_empty(trx->autoinc_locks)); - ut_a(trx->lock.table_locks.empty()); - + ut_ad(UT_LIST_GET_LEN(trx->lock.trx_locks) == 0); + ut_ad(ib_vector_is_empty(trx->autoinc_locks)); + lock_mutex_exit(); mem_heap_empty(trx->lock.lock_heap); } @@ -6724,18 +6617,16 @@ lock_table_locks_lookup( itself */ const trx_ut_list_t* trx_list) /*!< in: trx list to check */ { - trx_t* trx; - ut_a(table != NULL); ut_ad(lock_mutex_own()); ut_ad(trx_sys_mutex_own()); - for (trx = UT_LIST_GET_FIRST(*trx_list); + for (trx_t* trx = UT_LIST_GET_FIRST(*trx_list); trx != NULL; trx = UT_LIST_GET_NEXT(trx_list, trx)) { + const lock_t* lock; - const lock_t* lock; - + trx_mutex_enter(trx); check_trx_state(trx); for (lock = UT_LIST_GET_FIRST(trx->lock.trx_locks); @@ -6748,15 +6639,21 @@ lock_table_locks_lookup( ut_ad(!dict_index_is_online_ddl(lock->index) || dict_index_is_clust(lock->index)); if (lock->index->table == table) { - return(lock); + break; } } else if (lock->un_member.tab_lock.table == table) { - return(lock); + break; } } + + trx_mutex_exit(trx); + + if (lock) { + return lock; + } } - return(NULL); + return NULL; } #endif /* UNIV_DEBUG */ @@ -6921,7 +6818,7 @@ DeadlockChecker::start_print() if (srv_print_all_deadlocks) { ib::info() << "Transactions deadlock detected, dumping" - << " detailed information."; + " detailed information."; } } |