summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorsjaakola <seppo.jaakola@iki.fi>2020-05-25 14:23:42 +0300
committersjaakola <seppo.jaakola@iki.fi>2020-05-25 19:30:23 +0300
commit1af6e92f0b106c0588f89c51b749c573262e82d1 (patch)
tree50d183a9a65c112799b0de89929fdbd27abb386e
parentdc22acfdb62745017226a5c698c1bc3ee3e3563e (diff)
downloadmariadb-git-1af6e92f0b106c0588f89c51b749c573262e82d1.tar.gz
MDEV-22666 galera.MW-328A hang
The hang can happen between a lock connection issuing KILL CONNECTION for a victim, which is in committing phase. There happens two resource deadlockwhere killer is holding victim's LOCK_thd_data and requires trx mutex for the victim. The victim, otoh, holds his own trx mutex, but requires LOCK_thd_data in wsrep_commit_ordered(). Hence a classic two thread deadlock happens. The fix in this commit changes innodb commit so that wsrep_commit_ordered() is not called while holding trx mutex. With this, wsrep patch commit time mutex locking does not violate the locking protocol of KILL command (i.e. LOCK_thd_data -> trx mutex) Also, a new test case has been added in galera.galera_bf_kill.test for scenario where a client connection is killed in committting phase.
-rw-r--r--mysql-test/suite/galera/r/galera_bf_kill.result17
-rw-r--r--mysql-test/suite/galera/t/galera_bf_kill.test44
-rw-r--r--sql/service_wsrep.cc1
-rw-r--r--storage/innobase/trx/trx0trx.cc11
4 files changed, 67 insertions, 6 deletions
diff --git a/mysql-test/suite/galera/r/galera_bf_kill.result b/mysql-test/suite/galera/r/galera_bf_kill.result
index 8b620323e35..2a7bc9eac29 100644
--- a/mysql-test/suite/galera/r/galera_bf_kill.result
+++ b/mysql-test/suite/galera/r/galera_bf_kill.result
@@ -70,3 +70,20 @@ a b
2 1
disconnect node_2a;
drop table t1;
+connect node_2a, 127.0.0.1, root, , test, $NODE_MYPORT_2;
+connection node_2a;
+CREATE TABLE t1 (i int primary key);
+SET DEBUG_SYNC = "before_wsrep_ordered_commit SIGNAL bwoc_reached WAIT_FOR bwoc_continue";
+INSERT INTO t1 VALUES (1);
+connection node_2;
+SET DEBUG_SYNC = "now WAIT_FOR bwoc_reached";
+SET DEBUG_SYNC = "now SIGNAL bwoc_continue";
+SET DEBUG_SYNC='RESET';
+connection node_2a;
+connection node_2;
+select * from t1;
+i
+1
+disconnect node_2a;
+connection node_2;
+drop table t1;
diff --git a/mysql-test/suite/galera/t/galera_bf_kill.test b/mysql-test/suite/galera/t/galera_bf_kill.test
index 0748b732ead..ce8d27c281b 100644
--- a/mysql-test/suite/galera/t/galera_bf_kill.test
+++ b/mysql-test/suite/galera/t/galera_bf_kill.test
@@ -140,4 +140,48 @@ select * from t1;
drop table t1;
+#
+# Test case 7:
+# run a transaction in node 2, and set a sync point to pause the transaction
+# in commit phase.
+# Through another connection to node 2, kill the committing transaction by
+# KILL QUERY command
+#
+
+--connect node_2a, 127.0.0.1, root, , test, $NODE_MYPORT_2
+--connection node_2a
+--let $connection_id = `SELECT CONNECTION_ID()`
+
+CREATE TABLE t1 (i int primary key);
+
+# Set up sync point
+SET DEBUG_SYNC = "before_wsrep_ordered_commit SIGNAL bwoc_reached WAIT_FOR bwoc_continue";
+
+# Send insert which will block in the sync point above
+--send INSERT INTO t1 VALUES (1)
+
+--connection node_2
+SET DEBUG_SYNC = "now WAIT_FOR bwoc_reached";
+
+--disable_query_log
+--disable_result_log
+# victim has passed the point of no return, kill is not possible anymore
+--eval KILL QUERY $connection_id
+--enable_result_log
+--enable_query_log
+
+SET DEBUG_SYNC = "now SIGNAL bwoc_continue";
+SET DEBUG_SYNC='RESET';
+--connection node_2a
+--error 0,1213
+--reap
+
+--connection node_2
+# victim was able to complete the INSERT
+select * from t1;
+
+--disconnect node_2a
+
+--connection node_2
+drop table t1;
diff --git a/sql/service_wsrep.cc b/sql/service_wsrep.cc
index 7cac2bf741b..ada0bde803f 100644
--- a/sql/service_wsrep.cc
+++ b/sql/service_wsrep.cc
@@ -299,6 +299,7 @@ extern "C" void wsrep_commit_ordered(THD *thd)
thd->wsrep_trx().state() == wsrep::transaction::s_committing &&
!wsrep_commit_will_write_binlog(thd))
{
+ DEBUG_SYNC(thd, "before_wsrep_ordered_commit");
thd->wsrep_cs().ordered_commit();
}
}
diff --git a/storage/innobase/trx/trx0trx.cc b/storage/innobase/trx/trx0trx.cc
index 276a78d00bf..f926e661be4 100644
--- a/storage/innobase/trx/trx0trx.cc
+++ b/storage/innobase/trx/trx0trx.cc
@@ -1493,12 +1493,6 @@ inline void trx_t::commit_in_memory(const mtr_t *mtr)
if (fts_trx)
trx_finalize_for_fts(this, undo_no != 0);
- trx_mutex_enter(this);
- dict_operation= TRX_DICT_OP_NONE;
-
- DBUG_LOG("trx", "Commit in memory: " << this);
- state= TRX_STATE_NOT_STARTED;
-
#ifdef WITH_WSREP
/* Serialization history has been written and the transaction is
committed in memory, which makes this commit ordered. Release commit
@@ -1510,6 +1504,11 @@ inline void trx_t::commit_in_memory(const mtr_t *mtr)
}
lock.was_chosen_as_wsrep_victim= false;
#endif /* WITH_WSREP */
+ trx_mutex_enter(this);
+ dict_operation= TRX_DICT_OP_NONE;
+
+ DBUG_LOG("trx", "Commit in memory: " << this);
+ state= TRX_STATE_NOT_STARTED;
assert_freed();
trx_init(this);