diff options
author | sjaakola <seppo.jaakola@iki.fi> | 2020-05-25 14:23:42 +0300 |
---|---|---|
committer | sjaakola <seppo.jaakola@iki.fi> | 2020-05-25 19:30:23 +0300 |
commit | 1af6e92f0b106c0588f89c51b749c573262e82d1 (patch) | |
tree | 50d183a9a65c112799b0de89929fdbd27abb386e | |
parent | dc22acfdb62745017226a5c698c1bc3ee3e3563e (diff) | |
download | mariadb-git-1af6e92f0b106c0588f89c51b749c573262e82d1.tar.gz |
MDEV-22666 galera.MW-328A hang
The hang can happen between a lock connection issuing KILL CONNECTION for a victim,
which is in committing phase.
There happens two resource deadlockwhere killer is holding victim's
LOCK_thd_data and requires trx mutex for the victim.
The victim, otoh, holds his own trx mutex, but requires LOCK_thd_data
in wsrep_commit_ordered(). Hence a classic two thread deadlock happens.
The fix in this commit changes innodb commit so that wsrep_commit_ordered()
is not called while holding trx mutex. With this, wsrep patch commit time mutex
locking does not violate the locking protocol of KILL command
(i.e. LOCK_thd_data -> trx mutex)
Also, a new test case has been added in galera.galera_bf_kill.test for scenario
where a client connection is killed in committting phase.
-rw-r--r-- | mysql-test/suite/galera/r/galera_bf_kill.result | 17 | ||||
-rw-r--r-- | mysql-test/suite/galera/t/galera_bf_kill.test | 44 | ||||
-rw-r--r-- | sql/service_wsrep.cc | 1 | ||||
-rw-r--r-- | storage/innobase/trx/trx0trx.cc | 11 |
4 files changed, 67 insertions, 6 deletions
diff --git a/mysql-test/suite/galera/r/galera_bf_kill.result b/mysql-test/suite/galera/r/galera_bf_kill.result index 8b620323e35..2a7bc9eac29 100644 --- a/mysql-test/suite/galera/r/galera_bf_kill.result +++ b/mysql-test/suite/galera/r/galera_bf_kill.result @@ -70,3 +70,20 @@ a b 2 1 disconnect node_2a; drop table t1; +connect node_2a, 127.0.0.1, root, , test, $NODE_MYPORT_2; +connection node_2a; +CREATE TABLE t1 (i int primary key); +SET DEBUG_SYNC = "before_wsrep_ordered_commit SIGNAL bwoc_reached WAIT_FOR bwoc_continue"; +INSERT INTO t1 VALUES (1); +connection node_2; +SET DEBUG_SYNC = "now WAIT_FOR bwoc_reached"; +SET DEBUG_SYNC = "now SIGNAL bwoc_continue"; +SET DEBUG_SYNC='RESET'; +connection node_2a; +connection node_2; +select * from t1; +i +1 +disconnect node_2a; +connection node_2; +drop table t1; diff --git a/mysql-test/suite/galera/t/galera_bf_kill.test b/mysql-test/suite/galera/t/galera_bf_kill.test index 0748b732ead..ce8d27c281b 100644 --- a/mysql-test/suite/galera/t/galera_bf_kill.test +++ b/mysql-test/suite/galera/t/galera_bf_kill.test @@ -140,4 +140,48 @@ select * from t1; drop table t1; +# +# Test case 7: +# run a transaction in node 2, and set a sync point to pause the transaction +# in commit phase. +# Through another connection to node 2, kill the committing transaction by +# KILL QUERY command +# + +--connect node_2a, 127.0.0.1, root, , test, $NODE_MYPORT_2 +--connection node_2a +--let $connection_id = `SELECT CONNECTION_ID()` + +CREATE TABLE t1 (i int primary key); + +# Set up sync point +SET DEBUG_SYNC = "before_wsrep_ordered_commit SIGNAL bwoc_reached WAIT_FOR bwoc_continue"; + +# Send insert which will block in the sync point above +--send INSERT INTO t1 VALUES (1) + +--connection node_2 +SET DEBUG_SYNC = "now WAIT_FOR bwoc_reached"; + +--disable_query_log +--disable_result_log +# victim has passed the point of no return, kill is not possible anymore +--eval KILL QUERY $connection_id +--enable_result_log +--enable_query_log + +SET DEBUG_SYNC = "now SIGNAL bwoc_continue"; +SET DEBUG_SYNC='RESET'; +--connection node_2a +--error 0,1213 +--reap + +--connection node_2 +# victim was able to complete the INSERT +select * from t1; + +--disconnect node_2a + +--connection node_2 +drop table t1; diff --git a/sql/service_wsrep.cc b/sql/service_wsrep.cc index 7cac2bf741b..ada0bde803f 100644 --- a/sql/service_wsrep.cc +++ b/sql/service_wsrep.cc @@ -299,6 +299,7 @@ extern "C" void wsrep_commit_ordered(THD *thd) thd->wsrep_trx().state() == wsrep::transaction::s_committing && !wsrep_commit_will_write_binlog(thd)) { + DEBUG_SYNC(thd, "before_wsrep_ordered_commit"); thd->wsrep_cs().ordered_commit(); } } diff --git a/storage/innobase/trx/trx0trx.cc b/storage/innobase/trx/trx0trx.cc index 276a78d00bf..f926e661be4 100644 --- a/storage/innobase/trx/trx0trx.cc +++ b/storage/innobase/trx/trx0trx.cc @@ -1493,12 +1493,6 @@ inline void trx_t::commit_in_memory(const mtr_t *mtr) if (fts_trx) trx_finalize_for_fts(this, undo_no != 0); - trx_mutex_enter(this); - dict_operation= TRX_DICT_OP_NONE; - - DBUG_LOG("trx", "Commit in memory: " << this); - state= TRX_STATE_NOT_STARTED; - #ifdef WITH_WSREP /* Serialization history has been written and the transaction is committed in memory, which makes this commit ordered. Release commit @@ -1510,6 +1504,11 @@ inline void trx_t::commit_in_memory(const mtr_t *mtr) } lock.was_chosen_as_wsrep_victim= false; #endif /* WITH_WSREP */ + trx_mutex_enter(this); + dict_operation= TRX_DICT_OP_NONE; + + DBUG_LOG("trx", "Commit in memory: " << this); + state= TRX_STATE_NOT_STARTED; assert_freed(); trx_init(this); |