diff options
author | Vlad Lesin <vlad_lesin@mail.ru> | 2022-10-26 11:58:22 +0300 |
---|---|---|
committer | Vlad Lesin <vlad_lesin@mail.ru> | 2022-10-26 12:15:40 +0300 |
commit | 78a04a4c22d54dc4f67f067fc9b7a0bc717ebfdd (patch) | |
tree | 182088869b8c42c25b0eb1c743cf585dd80a637d | |
parent | 5027cb2b74a0b37cbdd3ad190cb8b2bf738c0cde (diff) | |
download | mariadb-git-78a04a4c22d54dc4f67f067fc9b7a0bc717ebfdd.tar.gz |
MDEV-29869 mtr failure: innodb.deadlock_wait_thr_race
1. The merge aeccbbd926e759a5c3b9818d9948a35918404478 has overwritten
lock0lock.cc, and the changes of MDEV-29622 and MDEV-29635 were
partially lost, this commit restores the changes.
2. innodb.deadlock_wait_thr_race test:
The following hang was found during testing.
There is deadlock_report_before_lock_releasing sync point in
Deadlock::report(), which is waiting for sel_cont signal under lock_sys_t
lock. The signal must be issued after "UPDATE t SET b = 100" rollback,
and that rollback is executing undo record, which is blocked
on dict_sys latch request. dict_sys is locked by the thread of statistics
update(dict_stats_save()), and during that update lock_sys lock is
requested, and can't be acquired as Deadlock::report() holds it. We have
to disable statistics update to make the test stable.
But even if statistics update is disabled, and transaction with consistent
snapshot is started at the very beginning of the test to prevent purging,
the purge can still be invoked for system tables, and it tries to open
system table by id, what causes dict_sys.freeze() call and dict_sys
latching. What, in combination with lock_sys::xx_lock() causes the same
deadlock as described above. We need to disable purging globally for the
test as well.
All the above is applicable to innodb.deadlock_wait_lock_race test also.
8 files changed, 58 insertions, 42 deletions
diff --git a/mysql-test/suite/innodb/disabled.def b/mysql-test/suite/innodb/disabled.def deleted file mode 100644 index 588ede3e886..00000000000 --- a/mysql-test/suite/innodb/disabled.def +++ /dev/null @@ -1,2 +0,0 @@ -deadlock_wait_lock_race : MDEV-29869: often hangs in lock wait timeout -deadlock_wait_thr_race : MDEV-29869: often hangs in lock wait timeout diff --git a/mysql-test/suite/innodb/r/deadlock_wait_lock_race.result b/mysql-test/suite/innodb/r/deadlock_wait_lock_race.result index 8a7878b3078..874f5af47d0 100644 --- a/mysql-test/suite/innodb/r/deadlock_wait_lock_race.result +++ b/mysql-test/suite/innodb/r/deadlock_wait_lock_race.result @@ -1,8 +1,5 @@ -connect suspend_purge,localhost,root,,; -START TRANSACTION WITH CONSISTENT SNAPSHOT; -connection default; -CREATE TABLE t (a int PRIMARY KEY, b int) engine = InnoDB; -CREATE TABLE t2 (a int PRIMARY KEY) engine = InnoDB; +CREATE TABLE t (a int PRIMARY KEY, b int) engine = InnoDB STATS_PERSISTENT=0; +CREATE TABLE t2 (a int PRIMARY KEY) engine = InnoDB STATS_PERSISTENT=0; INSERT INTO t VALUES (10, 10), (20, 20), (30, 30); INSERT INTO t2 VALUES (10), (20), (30); BEGIN; @@ -28,4 +25,3 @@ a b SET DEBUG_SYNC = 'RESET'; DROP TABLE t; DROP TABLE t2; -disconnect suspend_purge; diff --git a/mysql-test/suite/innodb/r/deadlock_wait_thr_race.result b/mysql-test/suite/innodb/r/deadlock_wait_thr_race.result index cea74b0b1cb..6992a447c07 100644 --- a/mysql-test/suite/innodb/r/deadlock_wait_thr_race.result +++ b/mysql-test/suite/innodb/r/deadlock_wait_thr_race.result @@ -1,8 +1,5 @@ -connect suspend_purge,localhost,root,,; -START TRANSACTION WITH CONSISTENT SNAPSHOT; -connection default; -CREATE TABLE t (a int PRIMARY KEY, b int) engine = InnoDB; -CREATE TABLE t2 (a int PRIMARY KEY) engine = InnoDB; +CREATE TABLE t (a int PRIMARY KEY, b int) engine = InnoDB STATS_PERSISTENT=0; +CREATE TABLE t2 (a int PRIMARY KEY) engine = InnoDB STATS_PERSISTENT=0; INSERT INTO t VALUES (10, 10), (20, 20), (30, 30); INSERT INTO t2 VALUES (10), (20), (30); BEGIN; @@ -34,4 +31,3 @@ a b SET DEBUG_SYNC = 'RESET'; DROP TABLE t; DROP TABLE t2; -disconnect suspend_purge; diff --git a/mysql-test/suite/innodb/t/deadlock_wait_lock_race.opt b/mysql-test/suite/innodb/t/deadlock_wait_lock_race.opt new file mode 100644 index 00000000000..65470e63e44 --- /dev/null +++ b/mysql-test/suite/innodb/t/deadlock_wait_lock_race.opt @@ -0,0 +1 @@ +--innodb-force-recovery=2 diff --git a/mysql-test/suite/innodb/t/deadlock_wait_lock_race.test b/mysql-test/suite/innodb/t/deadlock_wait_lock_race.test index 2b3c9763a8c..79a62b098c9 100644 --- a/mysql-test/suite/innodb/t/deadlock_wait_lock_race.test +++ b/mysql-test/suite/innodb/t/deadlock_wait_lock_race.test @@ -2,17 +2,23 @@ --source include/have_debug_sync.inc --source include/count_sessions.inc ---connect(suspend_purge,localhost,root,,) # Purge can cause deadlock in the test, requesting page's RW_X_LATCH for trx # ids reseting, after trx 2 acqured RW_S_LATCH and suspended in debug sync point # lock_trx_handle_wait_enter, waiting for upd_cont signal, which must be # emitted after the last SELECT in this test. The last SELECT will hang waiting # for purge RW_X_LATCH releasing, and trx 2 will be rolled back by timeout. -START TRANSACTION WITH CONSISTENT SNAPSHOT; ---connection default -CREATE TABLE t (a int PRIMARY KEY, b int) engine = InnoDB; -CREATE TABLE t2 (a int PRIMARY KEY) engine = InnoDB; +# There is deadlock_report_before_lock_releasing sync point in +# Deadlock::report(), which is waiting for sel_cont signal under +# lock_sys_t lock. The signal must be issued after "UPDATE t SET b = 100" +# rollback, and that rollback is executing undo record, which is blocked on +# dict_sys latch request. dict_sys is locked by the thread of statistics +# update(dict_stats_save()), and during that update lock_sys lock is requested, +# and can't be acquired as Deadlock::report() holds it. We have to disable +# statistics update to make the test stable. + +CREATE TABLE t (a int PRIMARY KEY, b int) engine = InnoDB STATS_PERSISTENT=0; +CREATE TABLE t2 (a int PRIMARY KEY) engine = InnoDB STATS_PERSISTENT=0; INSERT INTO t VALUES (10, 10), (20, 20), (30, 30); INSERT INTO t2 VALUES (10), (20), (30); @@ -58,5 +64,4 @@ SET DEBUG_SYNC="lock_wait_before_suspend SIGNAL upd_cont"; SET DEBUG_SYNC = 'RESET'; DROP TABLE t; DROP TABLE t2; ---disconnect suspend_purge --source include/wait_until_count_sessions.inc diff --git a/mysql-test/suite/innodb/t/deadlock_wait_thr_race.opt b/mysql-test/suite/innodb/t/deadlock_wait_thr_race.opt new file mode 100644 index 00000000000..65470e63e44 --- /dev/null +++ b/mysql-test/suite/innodb/t/deadlock_wait_thr_race.opt @@ -0,0 +1 @@ +--innodb-force-recovery=2 diff --git a/mysql-test/suite/innodb/t/deadlock_wait_thr_race.test b/mysql-test/suite/innodb/t/deadlock_wait_thr_race.test index 2027b45cbae..42576f35baf 100644 --- a/mysql-test/suite/innodb/t/deadlock_wait_thr_race.test +++ b/mysql-test/suite/innodb/t/deadlock_wait_thr_race.test @@ -2,17 +2,23 @@ --source include/have_debug_sync.inc --source include/count_sessions.inc ---connect(suspend_purge,localhost,root,,) # Purge can cause deadlock in the test, requesting page's RW_X_LATCH for trx # ids reseting, after trx 2 acqured RW_S_LATCH and suspended in debug sync point # lock_trx_handle_wait_enter, waiting for upd_cont signal, which must be # emitted after the last SELECT in this test. The last SELECT will hang waiting # for purge RW_X_LATCH releasing, and trx 2 will be rolled back by timeout. -START TRANSACTION WITH CONSISTENT SNAPSHOT; ---connection default -CREATE TABLE t (a int PRIMARY KEY, b int) engine = InnoDB; -CREATE TABLE t2 (a int PRIMARY KEY) engine = InnoDB; +# There is deadlock_report_before_lock_releasing sync point in +# Deadlock::report(), which is waiting for sel_cont signal under +# lock_sys_t lock. The signal must be issued after "UPDATE t SET b = 100" +# rollback, and that rollback is executing undo record, which is blocked on +# dict_sys latch request. dict_sys is locked by the thread of statistics +# update(dict_stats_save()), and during that update lock_sys lock is requested, +# and can't be acquired as Deadlock::report() holds it. We have to disable +# statistics update to make the test stable. + +CREATE TABLE t (a int PRIMARY KEY, b int) engine = InnoDB STATS_PERSISTENT=0; +CREATE TABLE t2 (a int PRIMARY KEY) engine = InnoDB STATS_PERSISTENT=0; INSERT INTO t VALUES (10, 10), (20, 20), (30, 30); INSERT INTO t2 VALUES (10), (20), (30); @@ -62,5 +68,4 @@ SET DEBUG_SYNC="now SIGNAL upd_cont_2"; SET DEBUG_SYNC = 'RESET'; DROP TABLE t; DROP TABLE t2; ---disconnect suspend_purge --source include/wait_until_count_sessions.inc diff --git a/storage/innobase/lock/lock0lock.cc b/storage/innobase/lock/lock0lock.cc index 6b64d4fd1f4..15a87ce8469 100644 --- a/storage/innobase/lock/lock0lock.cc +++ b/storage/innobase/lock/lock0lock.cc @@ -1796,8 +1796,8 @@ dberr_t lock_wait(que_thr_t *thr) wait_lock->un_member.tab_lock.table->id <= DICT_FIELDS_ID); thd_wait_begin(trx->mysql_thd, (type_mode & LOCK_TABLE) ? THD_WAIT_TABLE_LOCK : THD_WAIT_ROW_LOCK); - trx->error_state= DB_SUCCESS; + int err= 0; mysql_mutex_lock(&lock_sys.wait_mutex); if (trx->lock.wait_lock) { @@ -1819,25 +1819,24 @@ dberr_t lock_wait(que_thr_t *thr) if (row_lock_wait) lock_sys.wait_start(); - trx->error_state= DB_SUCCESS; - #ifdef HAVE_REPLICATION if (rpl) lock_wait_rpl_report(trx); #endif + if (trx->error_state != DB_SUCCESS) + goto check_trx_error; + while (trx->lock.wait_lock) { - int err; + DEBUG_SYNC_C("lock_wait_before_suspend"); if (no_timeout) - { my_cond_wait(&trx->lock.cond, &lock_sys.wait_mutex.m_mutex); - err= 0; - } else err= my_cond_timedwait(&trx->lock.cond, &lock_sys.wait_mutex.m_mutex, &abstime); +check_trx_error: switch (trx->error_state) { case DB_DEADLOCK: case DB_INTERRUPTED: @@ -1883,17 +1882,19 @@ end_wait: /** Resume a lock wait */ -static void lock_wait_end(trx_t *trx) +template <bool from_deadlock= false> +void lock_wait_end(trx_t *trx) { mysql_mutex_assert_owner(&lock_sys.wait_mutex); ut_ad(trx->mutex_is_owner()); ut_d(const auto state= trx->state); - ut_ad(state == TRX_STATE_ACTIVE || state == TRX_STATE_PREPARED); - ut_ad(trx->lock.wait_thr); + ut_ad(state == TRX_STATE_COMMITTED_IN_MEMORY || state == TRX_STATE_ACTIVE || + state == TRX_STATE_PREPARED); + ut_ad(from_deadlock || trx->lock.wait_thr); if (trx->lock.was_chosen_as_deadlock_victim) { - ut_ad(state == TRX_STATE_ACTIVE); + ut_ad(from_deadlock || state == TRX_STATE_ACTIVE); trx->error_state= DB_DEADLOCK; } @@ -5705,13 +5706,16 @@ static void lock_release_autoinc_locks(trx_t *trx) } /** Cancel a waiting lock request and release possibly waiting transactions */ -static void lock_cancel_waiting_and_release(lock_t *lock) +template <bool from_deadlock= false> +void lock_cancel_waiting_and_release(lock_t *lock) { lock_sys.assert_locked(*lock); mysql_mutex_assert_owner(&lock_sys.wait_mutex); trx_t *trx= lock->trx; trx->mutex_lock(); - ut_ad(trx->state == TRX_STATE_ACTIVE); + ut_d(const auto trx_state= trx->state); + ut_ad(trx_state == TRX_STATE_COMMITTED_IN_MEMORY || + trx_state == TRX_STATE_ACTIVE); if (!lock->is_table()) lock_rec_dequeue_from_page(lock, true); @@ -5730,7 +5734,8 @@ static void lock_cancel_waiting_and_release(lock_t *lock) /* Reset the wait flag and the back pointer to lock in trx. */ lock_reset_lock_and_trx_wait(lock); - lock_wait_end(trx); + lock_wait_end<from_deadlock>(trx); + trx->mutex_unlock(); } @@ -5901,6 +5906,7 @@ lock_unlock_table_autoinc( /** Handle a pending lock wait (DB_LOCK_WAIT) in a semi-consistent read while holding a clustered index leaf page latch. + @param trx transaction that is or was waiting for a lock @retval DB_SUCCESS if the lock was granted @retval DB_DEADLOCK if the transaction must be aborted due to a deadlock @@ -5911,8 +5917,13 @@ dberr_t lock_trx_handle_wait(trx_t *trx) DEBUG_SYNC_C("lock_trx_handle_wait_enter"); if (trx->lock.was_chosen_as_deadlock_victim) return DB_DEADLOCK; + DEBUG_SYNC_C("lock_trx_handle_wait_before_unlocked_wait_lock_check"); + /* trx->lock.was_chosen_as_deadlock_victim must always be set before + trx->lock.wait_lock if the transaction was chosen as deadlock victim, + the function must not return DB_SUCCESS if + trx->lock.was_chosen_as_deadlock_victim is set. */ if (!trx->lock.wait_lock) - return DB_SUCCESS; + return trx->lock.was_chosen_as_deadlock_victim ? DB_DEADLOCK : DB_SUCCESS; dberr_t err= DB_SUCCESS; mysql_mutex_lock(&lock_sys.wait_mutex); if (trx->lock.was_chosen_as_deadlock_victim) @@ -6315,8 +6326,11 @@ namespace Deadlock ut_ad(victim->state == TRX_STATE_ACTIVE); + /* victim->lock.was_chosen_as_deadlock_victim must always be set before + releasing waiting locks and reseting trx->lock.wait_lock */ victim->lock.was_chosen_as_deadlock_victim= true; - lock_cancel_waiting_and_release(victim->lock.wait_lock); + DEBUG_SYNC_C("deadlock_report_before_lock_releasing"); + lock_cancel_waiting_and_release<true>(victim->lock.wait_lock); #ifdef WITH_WSREP if (victim->is_wsrep() && wsrep_thd_is_SR(victim->mysql_thd)) wsrep_handle_SR_rollback(trx->mysql_thd, victim->mysql_thd); |