summaryrefslogtreecommitdiff
path: root/mysql-test/suite
diff options
context:
space:
mode:
authorsjaakola <seppo.jaakola@iki.fi>2021-10-21 14:49:51 +0300
committerOleksandr Byelkin <sanja@mariadb.com>2021-10-29 20:40:35 +0200
commitef2dbb8dbc3ee42b59adcd2ee4b9967ff55867a1 (patch)
tree86553cbede59c91d2308bc452afa86d5c2a0f254 /mysql-test/suite
parentd5bc05798f18af4b6c51075fd6b92c595ebf066f (diff)
downloadmariadb-git-ef2dbb8dbc3ee42b59adcd2ee4b9967ff55867a1.tar.gz
MDEV-23328 Server hang due to Galera lock conflict resolution
Mutex order violation when wsrep bf thread kills a conflicting trx, the stack is wsrep_thd_LOCK() wsrep_kill_victim() lock_rec_other_has_conflicting() lock_clust_rec_read_check_and_lock() row_search_mvcc() ha_innobase::index_read() ha_innobase::rnd_pos() handler::ha_rnd_pos() handler::rnd_pos_by_record() handler::ha_rnd_pos_by_record() Rows_log_event::find_row() Update_rows_log_event::do_exec_row() Rows_log_event::do_apply_event() Log_event::apply_event() wsrep_apply_events() and mutexes are taken in the order lock_sys->mutex -> victim_trx->mutex -> victim_thread->LOCK_thd_data When a normal KILL statement is executed, the stack is innobase_kill_query() kill_handlerton() plugin_foreach_with_mask() ha_kill_query() THD::awake() kill_one_thread() and mutexes are victim_thread->LOCK_thd_data -> lock_sys->mutex -> victim_trx->mutex This patch is the plan D variant for fixing potetial mutex locking order exercised by BF aborting and KILL command execution. In this approach, KILL command is replicated as TOI operation. This guarantees total isolation for the KILL command execution in the first node: there is no concurrent replication applying and no concurrent DDL executing. Therefore there is no risk of BF aborting to happen in parallel with KILL command execution either. Potential mutex deadlocks between the different mutex access paths with KILL command execution and BF aborting cannot therefore happen. TOI replication is used, in this approach, purely as means to provide isolated KILL command execution in the first node. KILL command should not (and must not) be applied in secondary nodes. In this patch, we make this sure by skipping KILL execution in secondary nodes, in applying phase, where we bail out if applier thread is trying to execute KILL command. This is effective, but skipping the applying of KILL command could happen much earlier as well. This also fixed unprotected calls to wsrep_thd_abort that will use wsrep_abort_transaction. This is fixed by holding THD::LOCK_thd_data while we abort transaction. Reviewed-by: Jan Lindström <jan.lindstrom@mariadb.com>
Diffstat (limited to 'mysql-test/suite')
-rw-r--r--mysql-test/suite/galera/r/galera_UK_conflict.result13
-rw-r--r--mysql-test/suite/galera/r/galera_bf_kill_debug.result54
-rw-r--r--mysql-test/suite/galera/r/galera_toi_ddl_fk_insert.result16
-rw-r--r--mysql-test/suite/galera/t/galera_UK_conflict.test23
-rw-r--r--mysql-test/suite/galera/t/galera_bf_kill_debug.cnf7
-rw-r--r--mysql-test/suite/galera/t/galera_bf_kill_debug.test140
-rw-r--r--mysql-test/suite/galera/t/galera_toi_ddl_fk_insert.test14
-rw-r--r--mysql-test/suite/galera/t/galera_var_auto_inc_control_off.test2
8 files changed, 34 insertions, 235 deletions
diff --git a/mysql-test/suite/galera/r/galera_UK_conflict.result b/mysql-test/suite/galera/r/galera_UK_conflict.result
index 44bb64c9d63..cc7e17d7c58 100644
--- a/mysql-test/suite/galera/r/galera_UK_conflict.result
+++ b/mysql-test/suite/galera/r/galera_UK_conflict.result
@@ -68,9 +68,9 @@ f1 f2 f3
10 10 0
INSERT INTO t1 VALUES (7,7,7);
INSERT INTO t1 VALUES (8,8,8);
-DROP TABLE t1;
-test scenario 2
-connection node_1;
+SELECT COUNT(*) FROM t1;
+COUNT(*)
+7
CREATE TABLE t1 (f1 INTEGER PRIMARY KEY, f2 int, f3 int, unique key keyj (f2));
INSERT INTO t1 VALUES (1, 1, 0);
INSERT INTO t1 VALUES (3, 3, 0);
@@ -92,9 +92,9 @@ SET SESSION wsrep_on = 1;
SET GLOBAL wsrep_provider_options = 'dbug=';
SET GLOBAL wsrep_provider_options = 'dbug=d,commit_monitor_master_enter_sync';
connection node_1;
-COMMIT;
-connection node_1a;
-SET SESSION wsrep_on = 0;
+SELECT COUNT(*) FROM t1;
+COUNT(*)
+7
SET SESSION wsrep_on = 1;
SET GLOBAL wsrep_provider_options = 'dbug=';
SET GLOBAL DEBUG_DBUG = "d,sync.wsrep_replay_cb";
@@ -125,6 +125,7 @@ f1 f2 f3
3 3 1
4 4 2
5 5 2
+8 8 8
10 10 0
INSERT INTO t1 VALUES (7,7,7);
INSERT INTO t1 VALUES (8,8,8);
diff --git a/mysql-test/suite/galera/r/galera_bf_kill_debug.result b/mysql-test/suite/galera/r/galera_bf_kill_debug.result
deleted file mode 100644
index c3eae243f47..00000000000
--- a/mysql-test/suite/galera/r/galera_bf_kill_debug.result
+++ /dev/null
@@ -1,54 +0,0 @@
-connection node_2;
-connection node_1;
-connection node_2;
-CREATE TABLE t1(a int not null primary key auto_increment,b int) engine=InnoDB;
-insert into t1 values (NULL,1);
-connect node_2a, 127.0.0.1, root, , test, $NODE_MYPORT_2;
-connection node_2a;
-truncate t1;
-insert into t1 values (1,0);
-begin;
-update t1 set b=2 where a=1;
-connection node_2;
-set session wsrep_sync_wait=0;
-connect node_2b, 127.0.0.1, root, , test, $NODE_MYPORT_2;
-connection node_2b;
-SET GLOBAL debug_dbug = "d,sync.before_wsrep_thd_abort";
-connection node_1;
-select * from t1;
-a b
-1 0
-update t1 set b= 1 where a=1;
-connection node_2b;
-SET SESSION DEBUG_SYNC = "now WAIT_FOR sync.before_wsrep_thd_abort_reached";
-connection node_2;
-SET DEBUG_SYNC= 'before_awake_no_mutex SIGNAL awake_reached WAIT_FOR continue_kill';
-connection node_2b;
-SET DEBUG_SYNC='now WAIT_FOR awake_reached';
-SET GLOBAL debug_dbug = "";
-SET DEBUG_SYNC = "now SIGNAL signal.before_wsrep_thd_abort";
-SET DEBUG_SYNC = "now SIGNAL continue_kill";
-connection node_2;
-connection node_2a;
-select * from t1;
-connection node_2;
-SET DEBUG_SYNC = "RESET";
-drop table t1;
-disconnect node_2a;
-connect node_2a, 127.0.0.1, root, , test, $NODE_MYPORT_2;
-connection node_2a;
-CREATE TABLE t1 (i int primary key);
-SET DEBUG_SYNC = "before_wsrep_ordered_commit SIGNAL bwoc_reached WAIT_FOR bwoc_continue";
-INSERT INTO t1 VALUES (1);
-connection node_2;
-SET DEBUG_SYNC = "now WAIT_FOR bwoc_reached";
-SET DEBUG_SYNC = "now SIGNAL bwoc_continue";
-SET DEBUG_SYNC='RESET';
-connection node_2a;
-connection node_2;
-select * from t1;
-i
-1
-disconnect node_2a;
-connection node_1;
-drop table t1;
diff --git a/mysql-test/suite/galera/r/galera_toi_ddl_fk_insert.result b/mysql-test/suite/galera/r/galera_toi_ddl_fk_insert.result
index 6e55c59ad15..2493075b635 100644
--- a/mysql-test/suite/galera/r/galera_toi_ddl_fk_insert.result
+++ b/mysql-test/suite/galera/r/galera_toi_ddl_fk_insert.result
@@ -23,22 +23,6 @@ connection node_1a;
connection node_1b;
connection node_2;
connection node_2a;
-connection node_1;
-SET SESSION wsrep_sync_wait=15;
-SELECT COUNT(*) FROM parent;
-COUNT(*)
-20001
-SELECT COUNT(*) FROM child;
-COUNT(*)
-10000
-connection node_2;
-SET SESSION wsrep_sync_wait=15;
-SELECT COUNT(*) FROM parent;
-COUNT(*)
-20001
-SELECT COUNT(*) FROM child;
-COUNT(*)
-10000
DROP TABLE child;
DROP TABLE parent;
DROP TABLE ten;
diff --git a/mysql-test/suite/galera/t/galera_UK_conflict.test b/mysql-test/suite/galera/t/galera_UK_conflict.test
index 9978ba9b8bf..25f414a5764 100644
--- a/mysql-test/suite/galera/t/galera_UK_conflict.test
+++ b/mysql-test/suite/galera/t/galera_UK_conflict.test
@@ -140,6 +140,14 @@ SELECT * FROM t1;
# original state in node 1
INSERT INTO t1 VALUES (7,7,7);
INSERT INTO t1 VALUES (8,8,8);
+SELECT COUNT(*) FROM t1;
+SELECT * FROM t1;
+
+--connection node_1
+--let $wait_condition = SELECT COUNT(*) = 7 FROM t1
+--source include/wait_condition.inc
+SELECT COUNT(*) FROM t1;
+SELECT * FROM t1;
DROP TABLE t1;
@@ -199,9 +207,9 @@ INSERT INTO t1 VALUES (5, 5, 2);
--source include/galera_set_sync_point.inc
--connection node_1
---send COMMIT
-
---connection node_1a
+--let $wait_condition = SELECT COUNT(*) = 7 FROM t1
+--source include/wait_condition.inc
+SELECT COUNT(*) FROM t1;
# wait for the local commit to enter in commit monitor wait state
--let $galera_sync_point = apply_monitor_slave_enter_sync commit_monitor_master_enter_sync
--source include/galera_wait_sync_point.inc
@@ -273,4 +281,13 @@ SELECT * FROM t1;
INSERT INTO t1 VALUES (7,7,7);
INSERT INTO t1 VALUES (8,8,8);
+SELECT COUNT(*) FROM t1;
+SELECT * FROM t1;
+
+--connection node_1
+--let $wait_condition = SELECT COUNT(*) = 7 FROM t1
+--source include/wait_condition.inc
+SELECT COUNT(*) FROM t1;
+SELECT * FROM t1;
+
DROP TABLE t1;
diff --git a/mysql-test/suite/galera/t/galera_bf_kill_debug.cnf b/mysql-test/suite/galera/t/galera_bf_kill_debug.cnf
deleted file mode 100644
index e68f891792c..00000000000
--- a/mysql-test/suite/galera/t/galera_bf_kill_debug.cnf
+++ /dev/null
@@ -1,7 +0,0 @@
-!include ../galera_2nodes.cnf
-
-[mysqld.1]
-wsrep-debug=SERVER
-
-[mysqld.2]
-wsrep-debug=SERVER
diff --git a/mysql-test/suite/galera/t/galera_bf_kill_debug.test b/mysql-test/suite/galera/t/galera_bf_kill_debug.test
deleted file mode 100644
index b687a5a6a67..00000000000
--- a/mysql-test/suite/galera/t/galera_bf_kill_debug.test
+++ /dev/null
@@ -1,140 +0,0 @@
---source include/galera_cluster.inc
---source include/have_innodb.inc
---source include/have_debug.inc
---source include/have_debug_sync.inc
-
-#
-# Test case 7:
-# 1. Start a transaction on node_2,
-# and leave it pending while holding a row locked
-# 2. set sync point pause applier
-# 3. send a conflicting write on node_1, it will pause
-# at the sync point
-# 4. though another connection to node_2, kill the local
-# transaction
-#
-
---connection node_2
-CREATE TABLE t1(a int not null primary key auto_increment,b int) engine=InnoDB;
-insert into t1 values (NULL,1);
-
-#
-# connection node_2a runs a local transaction, that is victim of BF abort
-# and victim of KILL command by connection node_2
-#
---connect node_2a, 127.0.0.1, root, , test, $NODE_MYPORT_2
---connection node_2a
-truncate t1;
-insert into t1 values (1,0);
-
-# start a transaction that will conflict with later applier
-begin;
-update t1 set b=2 where a=1;
-
---connection node_2
-set session wsrep_sync_wait=0;
---let $wait_condition = SELECT COUNT(*) = 1 FROM INFORMATION_SCHEMA.PROCESSLIST WHERE USER = 'root' AND COMMAND = 'Sleep' LIMIT 1
---source include/wait_condition.inc
-
---let $k_thread = `SELECT ID FROM INFORMATION_SCHEMA.PROCESSLIST WHERE USER = 'root' AND COMMAND = 'Sleep' LIMIT 1`
-
-# connection node_2b is for controlling debug syn points
-# first set a sync point for applier, to pause during BF aborting
-# and before THD::awake would be called
-#
---connect node_2b, 127.0.0.1, root, , test, $NODE_MYPORT_2
---connection node_2b
-SET GLOBAL debug_dbug = "d,sync.before_wsrep_thd_abort";
-
-#
-# replicate an update, which will BF abort the victim node_2a
-# however, while applier in node 2 is handling the abort,
-# it will pause in sync point set by node_2b
-#
---connection node_1
-select * from t1;
-update t1 set b= 1 where a=1;
-
-#
-# wait until the applying of above update has reached the sync point
-# in node 2
-#
---connection node_2b
-SET SESSION DEBUG_SYNC = "now WAIT_FOR sync.before_wsrep_thd_abort_reached";
-
---connection node_2
-#
-# pause KILL execution before awake
-#
-SET DEBUG_SYNC= 'before_awake_no_mutex SIGNAL awake_reached WAIT_FOR continue_kill';
---disable_query_log
---send_eval KILL $k_thread
---enable_query_log
-
-
---connection node_2b
-SET DEBUG_SYNC='now WAIT_FOR awake_reached';
-
-# release applier and KILL operator
-SET GLOBAL debug_dbug = "";
-SET DEBUG_SYNC = "now SIGNAL signal.before_wsrep_thd_abort";
-SET DEBUG_SYNC = "now SIGNAL continue_kill";
-
---connection node_2
---reap
-
---connection node_2a
---error 0,1213
-select * from t1;
-
---connection node_2
-SET DEBUG_SYNC = "RESET";
-
-drop table t1;
-
---disconnect node_2a
-#
-# Test case 7:
-# run a transaction in node 2, and set a sync point to pause the transaction
-# in commit phase.
-# Through another connection to node 2, kill the committing transaction by
-# KILL QUERY command
-#
-
---connect node_2a, 127.0.0.1, root, , test, $NODE_MYPORT_2
---connection node_2a
---let $connection_id = `SELECT CONNECTION_ID()`
-
-CREATE TABLE t1 (i int primary key);
-
-# Set up sync point
-SET DEBUG_SYNC = "before_wsrep_ordered_commit SIGNAL bwoc_reached WAIT_FOR bwoc_continue";
-
-# Send insert which will block in the sync point above
---send INSERT INTO t1 VALUES (1)
-
---connection node_2
-SET DEBUG_SYNC = "now WAIT_FOR bwoc_reached";
-
---disable_query_log
---disable_result_log
-# victim has passed the point of no return, kill is not possible anymore
---eval KILL QUERY $connection_id
---enable_result_log
---enable_query_log
-
-SET DEBUG_SYNC = "now SIGNAL bwoc_continue";
-SET DEBUG_SYNC='RESET';
---connection node_2a
---error 0,1213
---reap
-
---connection node_2
-# victim was able to complete the INSERT
-select * from t1;
-
---disconnect node_2a
-
---connection node_1
-drop table t1;
-
diff --git a/mysql-test/suite/galera/t/galera_toi_ddl_fk_insert.test b/mysql-test/suite/galera/t/galera_toi_ddl_fk_insert.test
index fadc94d78ff..3b4b427f551 100644
--- a/mysql-test/suite/galera/t/galera_toi_ddl_fk_insert.test
+++ b/mysql-test/suite/galera/t/galera_toi_ddl_fk_insert.test
@@ -54,15 +54,11 @@ INSERT INTO parent VALUES (1, 0);
--connection node_2a
--reap
---connection node_1
-SET SESSION wsrep_sync_wait=15;
-SELECT COUNT(*) FROM parent;
-SELECT COUNT(*) FROM child;
-
---connection node_2
-SET SESSION wsrep_sync_wait=15;
-SELECT COUNT(*) FROM parent;
-SELECT COUNT(*) FROM child;
+#
+# ALTER TABLE could bf kill one or more of INSERTs to parent, so
+# the actual number of rows in PARENT depends on whether
+# the INSERT is committed before ALTER TABLE is executed
+#
DROP TABLE child;
DROP TABLE parent;
diff --git a/mysql-test/suite/galera/t/galera_var_auto_inc_control_off.test b/mysql-test/suite/galera/t/galera_var_auto_inc_control_off.test
index c0bbe5af8cf..241b62dbf8c 100644
--- a/mysql-test/suite/galera/t/galera_var_auto_inc_control_off.test
+++ b/mysql-test/suite/galera/t/galera_var_auto_inc_control_off.test
@@ -94,11 +94,13 @@ SELECT * FROM t1;
--eval SET GLOBAL wsrep_auto_increment_control = $auto_increment_control_orig
--eval SET GLOBAL auto_increment_increment = $auto_increment_increment_node1
--eval SET GLOBAL auto_increment_offset = $auto_increment_offset_node1
+--disconnect node_1a
--connection node_2
--eval SET GLOBAL wsrep_auto_increment_control = $auto_increment_control_orig
--eval SET GLOBAL auto_increment_increment = $auto_increment_increment_node2
--eval SET GLOBAL auto_increment_offset = $auto_increment_offset_node2
+--disconnect node_2a
--enable_query_log