diff options
author | sjaakola <seppo.jaakola@iki.fi> | 2021-10-21 14:49:51 +0300 |
---|---|---|
committer | Jan Lindström <jan.lindstrom@mariadb.com> | 2021-10-29 07:57:18 +0300 |
commit | db50ea3ad324c98e5adf144842304f2095954ab3 (patch) | |
tree | b100bf0faf642477be6f756d24136406538b43c9 | |
parent | c8b39f7ee2a9910fae2f03563b3ea337c45132c2 (diff) | |
download | mariadb-git-db50ea3ad324c98e5adf144842304f2095954ab3.tar.gz |
MDEV-23328 Server hang due to Galera lock conflict resolution
Mutex order violation when wsrep bf thread kills a conflicting trx,
the stack is
wsrep_thd_LOCK()
wsrep_kill_victim()
lock_rec_other_has_conflicting()
lock_clust_rec_read_check_and_lock()
row_search_mvcc()
ha_innobase::index_read()
ha_innobase::rnd_pos()
handler::ha_rnd_pos()
handler::rnd_pos_by_record()
handler::ha_rnd_pos_by_record()
Rows_log_event::find_row()
Update_rows_log_event::do_exec_row()
Rows_log_event::do_apply_event()
Log_event::apply_event()
wsrep_apply_events()
and mutexes are taken in the order
lock_sys->mutex -> victim_trx->mutex -> victim_thread->LOCK_thd_data
When a normal KILL statement is executed, the stack is
innobase_kill_query()
kill_handlerton()
plugin_foreach_with_mask()
ha_kill_query()
THD::awake()
kill_one_thread()
and mutexes are
victim_thread->LOCK_thd_data -> lock_sys->mutex -> victim_trx->mutex
This patch is the plan D variant for fixing potetial mutex locking
order exercised by BF aborting and KILL command execution.
In this approach, KILL command is replicated as TOI operation.
This guarantees total isolation for the KILL command execution
in the first node: there is no concurrent replication applying
and no concurrent DDL executing. Therefore there is no risk of
BF aborting to happen in parallel with KILL command execution
either. Potential mutex deadlocks between the different mutex
access paths with KILL command execution and BF aborting cannot
therefore happen.
TOI replication is used, in this approach, purely as means
to provide isolated KILL command execution in the first node.
KILL command should not (and must not) be applied in secondary
nodes. In this patch, we make this sure by skipping KILL
execution in secondary nodes, in applying phase, where we
bail out if applier thread is trying to execute KILL command.
This is effective, but skipping the applying of KILL command
could happen much earlier as well.
This also fixed unprotected calls to wsrep_thd_abort
that will use wsrep_abort_transaction. This is fixed
by holding THD::LOCK_thd_data while we abort transaction.
Reviewed-by: Jan Lindström <jan.lindstrom@mariadb.com>
-rw-r--r-- | mysql-test/suite/galera/r/galera_UK_conflict.result | 7 | ||||
-rw-r--r-- | mysql-test/suite/galera/r/galera_to_error.result | 30 | ||||
-rw-r--r-- | mysql-test/suite/galera/r/galera_toi_ddl_fk_insert.result | 16 | ||||
-rw-r--r-- | mysql-test/suite/galera/t/galera_UK_conflict.test | 4 | ||||
-rw-r--r-- | mysql-test/suite/galera/t/galera_to_error.test | 71 | ||||
-rw-r--r-- | mysql-test/suite/galera/t/galera_toi_ddl_fk_insert.test | 14 | ||||
-rw-r--r-- | mysql-test/suite/galera/t/galera_var_auto_inc_control_off.test | 2 | ||||
-rw-r--r-- | mysql-test/suite/wsrep/t/variables.test | 6 | ||||
-rw-r--r-- | sql/sql_class.cc | 22 | ||||
-rw-r--r-- | sql/sql_parse.cc | 36 | ||||
-rw-r--r-- | sql/wsrep_mysqld.cc | 56 | ||||
-rw-r--r-- | sql/wsrep_thd.cc | 10 | ||||
-rw-r--r-- | storage/innobase/handler/ha_innodb.cc | 325 | ||||
-rw-r--r-- | storage/innobase/include/ha_prototypes.h | 3 | ||||
-rw-r--r-- | storage/innobase/lock/lock0wait.cc | 18 |
15 files changed, 417 insertions, 203 deletions
diff --git a/mysql-test/suite/galera/r/galera_UK_conflict.result b/mysql-test/suite/galera/r/galera_UK_conflict.result index 76649f1b268..2795a86d6a6 100644 --- a/mysql-test/suite/galera/r/galera_UK_conflict.result +++ b/mysql-test/suite/galera/r/galera_UK_conflict.result @@ -68,6 +68,9 @@ f1 f2 f3 10 10 0 INSERT INTO t1 VALUES (7,7,7); INSERT INTO t1 VALUES (8,8,8); +SELECT COUNT(*) FROM t1; +COUNT(*) +7 SELECT * FROM t1; f1 f2 f3 1 1 0 @@ -78,6 +81,9 @@ f1 f2 f3 8 8 8 10 10 0 connection node_1; +SELECT COUNT(*) FROM t1; +COUNT(*) +7 SELECT * FROM t1; f1 f2 f3 1 1 0 @@ -85,5 +91,6 @@ f1 f2 f3 4 4 2 5 5 2 7 7 7 +8 8 8 10 10 0 DROP TABLE t1; diff --git a/mysql-test/suite/galera/r/galera_to_error.result b/mysql-test/suite/galera/r/galera_to_error.result new file mode 100644 index 00000000000..4f985593466 --- /dev/null +++ b/mysql-test/suite/galera/r/galera_to_error.result @@ -0,0 +1,30 @@ +connection node_1; +connection node_2; +connection node_1; +call mtr.add_suppression("WSREP: TO isolation failed for: "); +CREATE TABLE t2(a int not null auto_increment primary key, b int, key(b)) engine=innodb; +INSERT INTO t2 values (NULL,1),(NULL,2),(NULL,3),(NULL,4),(NULL,5),(NULL,6); +connect node_1a, 127.0.0.1, root, , test, $NODE_MYPORT_1; +connection node_1a; +BEGIN; +UPDATE t2 set b = b + 20 where b BETWEEN 2 and 5;; +connect node_1b, 127.0.0.1, root, , test, $NODE_MYPORT_1; +connection node_1b; +connection node_2; +Killing server ... +connect node_1c, 127.0.0.1, root, , test, $NODE_MYPORT_1; +connection node_1c; +CREATE TABLE t1 (f1 INTEGER NOT NULL PRIMARY KEY) ENGINE=InnoDB; +ERROR 40001: WSREP replication failed. Check your wsrep connection state and retry the query. +ERROR 40001: WSREP replication failed. Check your wsrep connection state and retry the query. +CREATE UNIQUE INDEX b2 ON t2(b); +ERROR 08S01: WSREP has not yet prepared node for application use +connection node_1; +disconnect node_1a; +disconnect node_1b; +disconnect node_1c; +connection node_2; +connect node_2a, 127.0.0.1, root, , test, $NODE_MYPORT_2; +DROP TABLE t2; +disconnect node_2; +disconnect node_1; diff --git a/mysql-test/suite/galera/r/galera_toi_ddl_fk_insert.result b/mysql-test/suite/galera/r/galera_toi_ddl_fk_insert.result index 94752ed7c76..a972394f87c 100644 --- a/mysql-test/suite/galera/r/galera_toi_ddl_fk_insert.result +++ b/mysql-test/suite/galera/r/galera_toi_ddl_fk_insert.result @@ -21,22 +21,6 @@ connection node_1a; connection node_1b; connection node_2; connection node_2a; -connection node_1; -SET SESSION wsrep_sync_wait=15; -SELECT COUNT(*) FROM parent; -COUNT(*) -20001 -SELECT COUNT(*) FROM child; -COUNT(*) -10000 -connection node_2; -SET SESSION wsrep_sync_wait=15; -SELECT COUNT(*) FROM parent; -COUNT(*) -20001 -SELECT COUNT(*) FROM child; -COUNT(*) -10000 DROP TABLE child; DROP TABLE parent; DROP TABLE ten; diff --git a/mysql-test/suite/galera/t/galera_UK_conflict.test b/mysql-test/suite/galera/t/galera_UK_conflict.test index 57bafbf8ae0..83d0e47dc3d 100644 --- a/mysql-test/suite/galera/t/galera_UK_conflict.test +++ b/mysql-test/suite/galera/t/galera_UK_conflict.test @@ -140,9 +140,13 @@ SELECT * FROM t1; # original state in node 1 INSERT INTO t1 VALUES (7,7,7); INSERT INTO t1 VALUES (8,8,8); +SELECT COUNT(*) FROM t1; SELECT * FROM t1; --connection node_1 +--let $wait_condition = SELECT COUNT(*) = 7 FROM t1 +--source include/wait_condition.inc +SELECT COUNT(*) FROM t1; SELECT * FROM t1; DROP TABLE t1; diff --git a/mysql-test/suite/galera/t/galera_to_error.test b/mysql-test/suite/galera/t/galera_to_error.test new file mode 100644 index 00000000000..71bfe64c02d --- /dev/null +++ b/mysql-test/suite/galera/t/galera_to_error.test @@ -0,0 +1,71 @@ +# +# Confirm that with two nodes, killing one causes the other to stop accepting connections +# + +--source include/galera_cluster.inc +--source include/have_innodb.inc + +# Save original auto_increment_offset values. +--let $node_1=node_1 +--let $node_2=node_2 +--source include/auto_increment_offset_save.inc + +--connection node_1 +call mtr.add_suppression("WSREP: TO isolation failed for: "); +--let $wsrep_cluster_address_orig = `SELECT @@wsrep_cluster_address` + +CREATE TABLE t2(a int not null auto_increment primary key, b int, key(b)) engine=innodb; +INSERT INTO t2 values (NULL,1),(NULL,2),(NULL,3),(NULL,4),(NULL,5),(NULL,6); + +--connect node_1a, 127.0.0.1, root, , test, $NODE_MYPORT_1 +--connection node_1a +BEGIN; +--send UPDATE t2 set b = b + 20 where b BETWEEN 2 and 5; + +# +# Take thread id for above query +# +--connect node_1b, 127.0.0.1, root, , test, $NODE_MYPORT_1 +--connection node_1b +--let $k_thread = `SELECT ID FROM INFORMATION_SCHEMA.PROCESSLIST WHERE USER = 'root' AND COMMAND = 'Sleep' LIMIT 1` + +--connection node_2 +--source include/kill_galera.inc + +--connect node_1c, 127.0.0.1, root, , test, $NODE_MYPORT_1 +--connection node_1c +--error ER_LOCK_DEADLOCK +CREATE TABLE t1 (f1 INTEGER NOT NULL PRIMARY KEY) ENGINE=InnoDB; + +--disable_query_log +--error ER_LOCK_DEADLOCK +--eval KILL QUERY $k_thread; +--enable_query_log + +# Reset the master and restart the slave so that post-test checks can run +--error 1047 +CREATE UNIQUE INDEX b2 ON t2(b); + +--connection node_1 +--disconnect node_1a +--disconnect node_1b +--disconnect node_1c + +--connection node_2 +--source include/start_mysqld.inc +--source include/wait_until_connected_again.inc + +--let $wait_condition = SELECT VARIABLE_VALUE = 2 FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'wsrep_cluster_size' +--source include/wait_condition.inc + +--connect node_2a, 127.0.0.1, root, , test, $NODE_MYPORT_2 +--source include/wait_until_connected_again.inc + +DROP TABLE t2; + +# Restore original auto_increment_offset values. +--let $node_2=node_2a +--source include/auto_increment_offset_restore.inc + +--source include/galera_end.inc + diff --git a/mysql-test/suite/galera/t/galera_toi_ddl_fk_insert.test b/mysql-test/suite/galera/t/galera_toi_ddl_fk_insert.test index fadc94d78ff..3b4b427f551 100644 --- a/mysql-test/suite/galera/t/galera_toi_ddl_fk_insert.test +++ b/mysql-test/suite/galera/t/galera_toi_ddl_fk_insert.test @@ -54,15 +54,11 @@ INSERT INTO parent VALUES (1, 0); --connection node_2a --reap ---connection node_1 -SET SESSION wsrep_sync_wait=15; -SELECT COUNT(*) FROM parent; -SELECT COUNT(*) FROM child; - ---connection node_2 -SET SESSION wsrep_sync_wait=15; -SELECT COUNT(*) FROM parent; -SELECT COUNT(*) FROM child; +# +# ALTER TABLE could bf kill one or more of INSERTs to parent, so +# the actual number of rows in PARENT depends on whether +# the INSERT is committed before ALTER TABLE is executed +# DROP TABLE child; DROP TABLE parent; diff --git a/mysql-test/suite/galera/t/galera_var_auto_inc_control_off.test b/mysql-test/suite/galera/t/galera_var_auto_inc_control_off.test index c0bbe5af8cf..241b62dbf8c 100644 --- a/mysql-test/suite/galera/t/galera_var_auto_inc_control_off.test +++ b/mysql-test/suite/galera/t/galera_var_auto_inc_control_off.test @@ -94,11 +94,13 @@ SELECT * FROM t1; --eval SET GLOBAL wsrep_auto_increment_control = $auto_increment_control_orig --eval SET GLOBAL auto_increment_increment = $auto_increment_increment_node1 --eval SET GLOBAL auto_increment_offset = $auto_increment_offset_node1 +--disconnect node_1a --connection node_2 --eval SET GLOBAL wsrep_auto_increment_control = $auto_increment_control_orig --eval SET GLOBAL auto_increment_increment = $auto_increment_increment_node2 --eval SET GLOBAL auto_increment_offset = $auto_increment_offset_node2 +--disconnect node_2a --enable_query_log diff --git a/mysql-test/suite/wsrep/t/variables.test b/mysql-test/suite/wsrep/t/variables.test index 0cf13650ce0..875315c0e7c 100644 --- a/mysql-test/suite/wsrep/t/variables.test +++ b/mysql-test/suite/wsrep/t/variables.test @@ -66,7 +66,7 @@ call mtr.add_suppression("WSREP: Failed to get provider options"); #evalp SET GLOBAL wsrep_provider= '$WSREP_PROVIDER'; ---replace_regex /.*libgalera_smm.*/libgalera_smm.so/ +--replace_regex /.*libgalera.*/libgalera_smm.so/ SELECT @@global.wsrep_provider; SELECT @@global.wsrep_slave_threads; SELECT @@global.wsrep_cluster_address; @@ -77,7 +77,7 @@ SHOW STATUS LIKE 'wsrep_thread_count'; #evalp SET GLOBAL wsrep_provider= '$WSREP_PROVIDER'; ---replace_regex /.*libgalera_smm.*/libgalera_smm.so/ +--replace_regex /.*libgalera.*/libgalera_smm.so/ SELECT @@global.wsrep_provider; SELECT @@global.wsrep_cluster_address; SELECT @@global.wsrep_on; @@ -101,7 +101,7 @@ SELECT VARIABLE_VALUE AS EXPECT_1 FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VA SELECT VARIABLE_VALUE AS EXPECT_1 FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'wsrep_rollbacker_thread_count'; SELECT VARIABLE_VALUE AS EXPECT_2 FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'wsrep_thread_count'; ---replace_regex /.*libgalera_smm.*/libgalera_smm.so/ +--replace_regex /.*libgalera.*/libgalera_smm.so/ SELECT @@global.wsrep_provider; SELECT @@global.wsrep_cluster_address; SELECT @@global.wsrep_on; diff --git a/sql/sql_class.cc b/sql/sql_class.cc index 5ada018e540..2eec056ec9d 100644 --- a/sql/sql_class.cc +++ b/sql/sql_class.cc @@ -1863,6 +1863,7 @@ bool THD::notify_shared_lock(MDL_context_owner *ctx_in_use, if (needs_thr_lock_abort) { + bool mutex_released= false; mysql_mutex_lock(&in_use->LOCK_thd_data); /* If not already dying */ if (in_use->killed != KILL_CONNECTION_HARD) @@ -1879,18 +1880,21 @@ bool THD::notify_shared_lock(MDL_context_owner *ctx_in_use, thread can see those instances (e.g. see partitioning code). */ if (!thd_table->needs_reopen()) - { signalled|= mysql_lock_abort_for_thread(this, thd_table); - if (WSREP(this) && wsrep_thd_is_BF(this, FALSE)) - { - WSREP_DEBUG("remove_table_from_cache: %llu", - (unsigned long long) this->real_id); - wsrep_abort_thd((void *)this, (void *)in_use, FALSE); - } - } } +#ifdef WITH_WSREP + if (WSREP(this) && wsrep_thd_is_BF(this, false)) + { + WSREP_DEBUG("notify_shared_lock: BF thread %llu query %s" + " victim %llu query %s", + this->real_id, wsrep_thd_query(this), + in_use->real_id, wsrep_thd_query(in_use)); + wsrep_abort_thd((void *)this, (void *)in_use, false); + mutex_released= true; + } +#endif /* WITH_WSREP */ } - mysql_mutex_unlock(&in_use->LOCK_thd_data); + if (!mutex_released) mysql_mutex_unlock(&in_use->LOCK_thd_data); } DBUG_RETURN(signalled); } diff --git a/sql/sql_parse.cc b/sql/sql_parse.cc index 3e1f248b082..2bec9c6b6cd 100644 --- a/sql/sql_parse.cc +++ b/sql/sql_parse.cc @@ -1,5 +1,5 @@ /* Copyright (c) 2000, 2017, Oracle and/or its affiliates. - Copyright (c) 2008, 2020, MariaDB + Copyright (c) 2008, 2021, MariaDB This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -9069,6 +9069,18 @@ static void sql_kill(THD *thd, longlong id, killed_state state, killed_type type) { uint error; +#ifdef WITH_WSREP + if (WSREP(thd)) + { + WSREP_DEBUG("sql_kill called"); + if (thd->wsrep_applier) + { + WSREP_DEBUG("KILL in applying, bailing out here"); + return; + } + WSREP_TO_ISOLATION_BEGIN(WSREP_MYSQL_DB, NULL, NULL) + } +#endif /* WITH_WSREP */ if (!(error= kill_one_thread(thd, id, state, type))) { if (!thd->killed) @@ -9078,6 +9090,11 @@ void sql_kill(THD *thd, longlong id, killed_state state, killed_type type) } else my_error(error, MYF(0), id); +#ifdef WITH_WSREP + return; + wsrep_error_label: + my_error(ER_CANNOT_USER, MYF(0), wsrep_thd_query(thd)); +#endif /* WITH_WSREP */ } @@ -9086,6 +9103,18 @@ void sql_kill_user(THD *thd, LEX_USER *user, killed_state state) { uint error; ha_rows rows; +#ifdef WITH_WSREP + if (WSREP(thd)) + { + WSREP_DEBUG("sql_kill_user called"); + if (thd->wsrep_applier) + { + WSREP_DEBUG("KILL in applying, bailing out here"); + return; + } + WSREP_TO_ISOLATION_BEGIN(WSREP_MYSQL_DB, NULL, NULL) + } +#endif /* WITH_WSREP */ if (!(error= kill_threads_for_user(thd, user, state, &rows))) my_ok(thd, rows); else @@ -9096,6 +9125,11 @@ void sql_kill_user(THD *thd, LEX_USER *user, killed_state state) */ my_error(error, MYF(0), user->host.str, user->user.str); } +#ifdef WITH_WSREP + return; + wsrep_error_label: + my_error(ER_CANNOT_USER, MYF(0), user->user.str); +#endif /* WITH_WSREP */ } diff --git a/sql/wsrep_mysqld.cc b/sql/wsrep_mysqld.cc index e60100e2e90..4a99f781fdd 100644 --- a/sql/wsrep_mysqld.cc +++ b/sql/wsrep_mysqld.cc @@ -1,4 +1,4 @@ -/* Copyright 2008-2015 Codership Oy <http://www.codership.com> +/* Copyright 2008-2021 Codership Oy <http://www.codership.com> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -835,13 +835,25 @@ void wsrep_thr_init() DBUG_VOID_RETURN; } +/* This is wrapper for wsrep_break_lock in thr_lock.c */ +static int wsrep_thr_abort_thd(void *bf_thd_ptr, void *victim_thd_ptr, my_bool signal) +{ + THD* victim_thd= (THD *) victim_thd_ptr; + /* We need to lock THD::LOCK_thd_data to protect victim + from concurrent usage or disconnect or delete. */ + mysql_mutex_lock(&victim_thd->LOCK_thd_data); + int res= wsrep_abort_thd(bf_thd_ptr, victim_thd_ptr, signal); + return res; +} + + void wsrep_init_startup (bool first) { if (wsrep_init()) unireg_abort(1); wsrep_thr_lock_init( (wsrep_thd_is_brute_force_fun)wsrep_thd_is_BF, - (wsrep_abort_thd_fun)wsrep_abort_thd, + (wsrep_abort_thd_fun)wsrep_thr_abort_thd, wsrep_debug, wsrep_convert_LOCK_to_trx, (wsrep_on_fun)wsrep_on); @@ -1694,6 +1706,11 @@ static int wsrep_TOI_begin(THD *thd, char *db_, char *table_, case SQLCOM_DROP_TABLE: buf_err= wsrep_drop_table_query(thd, &buf, &buf_len); break; + case SQLCOM_KILL: + WSREP_DEBUG("KILL as TOI: %s", thd->query()); + buf_err= wsrep_to_buf_helper(thd, thd->query(), thd->query_length(), + &buf, &buf_len); + break; case SQLCOM_CREATE_ROLE: if (sp_process_definer(thd)) { @@ -2058,8 +2075,13 @@ bool wsrep_grant_mdl_exception(MDL_context *requestor_ctx, ticket->wsrep_report(true); } - mysql_mutex_unlock(&granted_thd->LOCK_thd_data); - wsrep_abort_thd((void *) request_thd, (void *) granted_thd, 1); + /* This will call wsrep_abort_transaction so we should hold + THD::LOCK_thd_data to protect victim from concurrent usage + or disconnect or delete. */ + if (request_thd->wsrep_exec_mode == REPL_RECV) + DEBUG_SYNC(request_thd, "wsrep_after_granted_lock"); + + wsrep_abort_thd((void *) request_thd, (void *) granted_thd, true); ret= false; } } @@ -2241,6 +2263,7 @@ error: static bool abort_replicated(THD *thd) { bool ret_code= false; + mysql_mutex_lock(&thd->LOCK_thd_data); if (thd->wsrep_query_state== QUERY_COMMITTING) { WSREP_DEBUG("aborting replicated trx: %llu", (ulonglong)(thd->real_id)); @@ -2248,6 +2271,8 @@ static bool abort_replicated(THD *thd) (void)wsrep_abort_thd(thd, thd, TRUE); ret_code= true; } + else + mysql_mutex_unlock(&thd->LOCK_thd_data); return ret_code; } @@ -2294,6 +2319,8 @@ static bool have_client_connections() (longlong) tmp->thread_id)); if (is_client_connection(tmp) && tmp->killed == KILL_CONNECTION) { + WSREP_DEBUG("Informing thread %lld that it's time to die", + (longlong)tmp->thread_id); (void)abort_replicated(tmp); return true; } @@ -2378,6 +2405,8 @@ void wsrep_close_client_connections(my_bool wait_to_end, THD *except_caller_thd) { DBUG_PRINT("quit",("Informing thread %lld that it's time to die", (longlong) tmp->thread_id)); + WSREP_DEBUG("Informing thread %lld that it's time to die", + (longlong)tmp->thread_id); /* We skip slave threads & scheduler on this first loop through. */ if (!is_client_connection(tmp)) continue; @@ -2394,15 +2423,18 @@ void wsrep_close_client_connections(my_bool wait_to_end, THD *except_caller_thd) continue; } - /* replicated transactions must be skipped */ + /* replicated transactions must be skipped and aborted + with wsrep_abort_thd. */ if (abort_replicated(tmp)) continue; WSREP_DEBUG("closing connection %lld", (longlong) tmp->thread_id); /* - instead of wsrep_close_thread() we do now soft kill by THD::awake - */ + instead of wsrep_close_thread() we do now soft kill by + THD::awake(). Here also victim needs to be protected from + concurrent usage or disconnect or delete. + */ mysql_mutex_lock(&tmp->LOCK_thd_data); tmp->awake(KILL_CONNECTION); @@ -2423,7 +2455,6 @@ void wsrep_close_client_connections(my_bool wait_to_end, THD *except_caller_thd) I_List_iterator<THD> it2(threads); while ((tmp=it2++)) { -#ifndef __bsdi__ // Bug in BSDI kernel if (is_client_connection(tmp) && !abort_replicated(tmp) && !is_replaying_connection(tmp) && @@ -2432,7 +2463,6 @@ void wsrep_close_client_connections(my_bool wait_to_end, THD *except_caller_thd) WSREP_INFO("killing local connection: %lld", (longlong) tmp->thread_id); close_connection(tmp,0); } -#endif } DBUG_PRINT("quit",("Waiting for threads to die (count=%u)",thread_count)); @@ -2621,7 +2651,8 @@ extern "C" void wsrep_thd_set_query_state( void wsrep_thd_set_conflict_state(THD *thd, enum wsrep_conflict_state state) { - if (WSREP(thd)) thd->wsrep_conflict_state= state; + mysql_mutex_assert_owner(&thd->LOCK_thd_data); + thd->wsrep_conflict_state= state; } @@ -2762,9 +2793,10 @@ extern "C" void wsrep_thd_awake(THD *thd, my_bool signal) { if (signal) { - mysql_mutex_lock(&thd->LOCK_thd_data); + /* Here we should hold THD::LOCK_thd_data to + protect from concurrent usage. */ + mysql_mutex_assert_owner(&thd->LOCK_thd_data); thd->awake(KILL_QUERY); - mysql_mutex_unlock(&thd->LOCK_thd_data); } else { diff --git a/sql/wsrep_thd.cc b/sql/wsrep_thd.cc index ef8c0e132f7..ae797c4c712 100644 --- a/sql/wsrep_thd.cc +++ b/sql/wsrep_thd.cc @@ -1,4 +1,4 @@ -/* Copyright (C) 2013 Codership Oy <info@codership.com> +/* Copyright (C) 2013-2021 Codership Oy <info@codership.com> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -804,10 +804,12 @@ my_bool wsrep_thd_is_local(void *thd_ptr, my_bool sync) int wsrep_abort_thd(void *bf_thd_ptr, void *victim_thd_ptr, my_bool signal) { - THD *victim_thd = (THD *) victim_thd_ptr; - THD *bf_thd = (THD *) bf_thd_ptr; + THD *victim_thd= (THD *) victim_thd_ptr; + THD *bf_thd= (THD *) bf_thd_ptr; DBUG_ENTER("wsrep_abort_thd"); + mysql_mutex_assert_owner(&victim_thd->LOCK_thd_data); + if ( (WSREP(bf_thd) || ( (WSREP_ON || bf_thd->variables.wsrep_OSU_method == WSREP_OSU_RSU) && bf_thd->wsrep_exec_mode == TOTAL_ORDER) ) && @@ -821,6 +823,7 @@ int wsrep_abort_thd(void *bf_thd_ptr, void *victim_thd_ptr, my_bool signal) "aborted. Ignoring.", (bf_thd) ? (long long)bf_thd->real_id : 0, (long long)victim_thd->real_id); + mysql_mutex_unlock(&victim_thd->LOCK_thd_data); DBUG_RETURN(1); } @@ -831,6 +834,7 @@ int wsrep_abort_thd(void *bf_thd_ptr, void *victim_thd_ptr, my_bool signal) else { WSREP_DEBUG("wsrep_abort_thd not effective: %p %p", bf_thd, victim_thd); + mysql_mutex_unlock(&victim_thd->LOCK_thd_data); } DBUG_RETURN(1); diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc index 57e7ec236c0..8fb52b211c6 100644 --- a/storage/innobase/handler/ha_innodb.cc +++ b/storage/innobase/handler/ha_innodb.cc @@ -5233,17 +5233,18 @@ UNIV_INTERN void lock_cancel_waiting_and_release(lock_t* lock); @sa THD::awake() @sa ha_kill_query() */ static void innobase_kill_query(handlerton*, THD* thd, enum thd_kill_levels) { - DBUG_ENTER("innobase_kill_query"); + DBUG_ENTER("innobase_kill_query"); #ifdef WITH_WSREP - if (wsrep_thd_get_conflict_state(thd) != NO_CONFLICT) { - /* if victim has been signaled by BF thread and/or aborting - is already progressing, following query aborting is not necessary - any more. - Also, BF thread should own trx mutex for the victim, which would - conflict with trx_mutex_enter() below - */ - DBUG_VOID_RETURN; - } + if (wsrep_thd_get_conflict_state(thd) != NO_CONFLICT) + { + /* if victim has been signaled by BF thread and/or aborting + is already progressing, following query aborting is not necessary + any more. */ + WSREP_DEBUG("Victim thread %ld bail out conflict_state %s query %s", + thd_get_thread_id(thd), + wsrep_thd_conflict_state_str(thd), wsrep_thd_query(thd)); + DBUG_VOID_RETURN; + } #endif /* WITH_WSREP */ if (trx_t* trx= thd_to_trx(thd)) @@ -19497,70 +19498,66 @@ static struct st_mysql_storage_engine innobase_storage_engine= { MYSQL_HANDLERTON_INTERFACE_VERSION }; #ifdef WITH_WSREP +static void wsrep_abort_slave_trx( -/*==================*/ - wsrep_seqno_t bf_seqno, - wsrep_seqno_t victim_seqno) -{ - WSREP_ERROR("Trx %lld tries to abort slave trx %lld. This could be " - "caused by:\n\t" - "1) unsupported configuration options combination, please check documentation.\n\t" - "2) a bug in the code.\n\t" - "3) a database corruption.\n Node consistency compromized, " - "need to abort. Restart the node to resync with cluster.", - (long long)bf_seqno, (long long)victim_seqno); - abort(); -} -/*******************************************************************//** -This function is used to kill one transaction in BF. */ -UNIV_INTERN + THD* bf_thd, + THD* victim_thd) +{ + wsrep_seqno_t bf_seqno= wsrep_thd_trx_seqno(bf_thd); + wsrep_seqno_t victim_seqno= wsrep_thd_trx_seqno(victim_thd); + + WSREP_ERROR("wsrep_abort_slave_trx: BF Aborter %s thread: %ld " + "seqno: %lld query_state: %s conflict_state: %s " + "exec mode %s query: %s", + wsrep_thd_is_BF(bf_thd, false) ? "BF" : "normal", + thd_get_thread_id(bf_thd), + bf_seqno, + wsrep_thd_query_state_str(bf_thd), + wsrep_thd_conflict_state_str(bf_thd), + wsrep_thd_exec_mode_str(bf_thd), + wsrep_thd_query(bf_thd)); + + WSREP_ERROR("wsrep_abort_slave_trx: Victim %s thread: %ld " + "seqno: %lld query_state: %s conflict_state: %s " + "exec mode %s query: %s", + wsrep_thd_is_BF(victim_thd, false) ? "BF" : "normal", + thd_get_thread_id(victim_thd), + wsrep_thd_trx_seqno(victim_thd), + wsrep_thd_query_state_str(victim_thd), + wsrep_thd_conflict_state_str(victim_thd), + wsrep_thd_exec_mode_str(victim_thd), + wsrep_thd_query(victim_thd)); + + WSREP_ERROR("Trx %lld tries to abort slave trx %lld. This could be " + "caused by:\n\t" + "1) unsupported configuration options combination, please check documentation.\n\t" + "2) a bug in the code.\n\t" + "3) a database corruption.\n Node consistency compromized, " + "need to abort. Restart the node to resync with cluster.", + (long long)bf_seqno, (long long)victim_seqno); + abort(); +} + +/** This function is used to kill one transaction in BF. */ +static void -wsrep_innobase_kill_one_trx( -/*========================*/ +wsrep_kill_victim( MYSQL_THD const bf_thd, - const trx_t * const bf_trx, - trx_t *victim_trx, - ibool signal) + const trx_t* const bf_trx, + MYSQL_THD thd, + trx_t* victim_trx, + my_bool signal) { - ut_ad(bf_thd); - ut_ad(victim_trx); - ut_ad(lock_mutex_own()); - ut_ad(trx_mutex_own(victim_trx)); - - DBUG_ENTER("wsrep_innobase_kill_one_trx"); - THD *thd = (THD *) victim_trx->mysql_thd; - int64_t bf_seqno = wsrep_thd_trx_seqno(bf_thd); - - if (!thd) { - DBUG_PRINT("wsrep", ("no thd for conflicting lock")); - WSREP_WARN("no THD for trx: " TRX_ID_FMT, victim_trx->id); - DBUG_VOID_RETURN; - } - - WSREP_LOG_CONFLICT(bf_thd, thd, TRUE); - - WSREP_DEBUG("BF kill (" ULINTPF ", seqno: " INT64PF - "), victim: (%lu) trx: " TRX_ID_FMT, - signal, bf_seqno, - thd_get_thread_id(thd), - victim_trx->id); - - WSREP_DEBUG("Aborting query: %s conf %d trx: %" PRId64, - (thd && wsrep_thd_query(thd)) ? wsrep_thd_query(thd) : "void", - wsrep_thd_conflict_state(thd, FALSE), - wsrep_thd_ws_handle(thd)->trx_id); + ut_ad(bf_thd); + ut_ad(thd); + ut_ad(victim_trx); + ut_ad(lock_mutex_own()); + ut_ad(trx_mutex_own(victim_trx)); - wsrep_thd_LOCK(thd); - DBUG_EXECUTE_IF("sync.wsrep_after_BF_victim_lock", - { - const char act[]= - "now " - "wait_for signal.wsrep_after_BF_victim_lock"; - DBUG_ASSERT(!debug_sync_set_action(bf_thd, - STRING_WITH_LEN(act))); - };); + DBUG_ENTER("wsrep_kill_victim"); + const int64_t bf_seqno= wsrep_thd_trx_seqno(bf_thd); if (wsrep_thd_query_state(thd) == QUERY_EXITING) { WSREP_DEBUG("kill trx EXITING for " TRX_ID_FMT, @@ -19570,27 +19567,32 @@ wsrep_innobase_kill_one_trx( } if (wsrep_thd_exec_mode(thd) != LOCAL_STATE) { - WSREP_DEBUG("withdraw for BF trx: " TRX_ID_FMT ", state: %d", + WSREP_DEBUG("withdraw for BF trx: " TRX_ID_FMT + ", state: %s exec %s", victim_trx->id, - wsrep_thd_get_conflict_state(thd)); + wsrep_thd_conflict_state_str(thd), + wsrep_thd_exec_mode_str(thd)); } switch (wsrep_thd_get_conflict_state(thd)) { case NO_CONFLICT: + /* This will cause any call to innobase_kill_query() + for this thd to bail out. */ wsrep_thd_set_conflict_state(thd, MUST_ABORT); break; case MUST_ABORT: WSREP_DEBUG("victim " TRX_ID_FMT " in MUST ABORT state", victim_trx->id); - wsrep_thd_UNLOCK(thd); wsrep_thd_awake(thd, signal); + wsrep_thd_UNLOCK(thd); DBUG_VOID_RETURN; break; case ABORTED: case ABORTING: // fall through default: - WSREP_DEBUG("victim " TRX_ID_FMT " in state %d", - victim_trx->id, wsrep_thd_get_conflict_state(thd)); + WSREP_DEBUG("victim " TRX_ID_FMT " in state %s", + victim_trx->id, + wsrep_thd_conflict_state_str(thd)); wsrep_thd_UNLOCK(thd); DBUG_VOID_RETURN; break; @@ -19598,6 +19600,7 @@ wsrep_innobase_kill_one_trx( switch (wsrep_thd_query_state(thd)) { case QUERY_COMMITTING: + { enum wsrep_status rcode; WSREP_DEBUG("kill query for: %ld", @@ -19606,8 +19609,7 @@ wsrep_innobase_kill_one_trx( victim_trx->id); if (wsrep_thd_exec_mode(thd) == REPL_RECV) { - wsrep_abort_slave_trx(bf_seqno, - wsrep_thd_trx_seqno(thd)); + wsrep_abort_slave_trx(bf_thd, thd); } else { wsrep_t *wsrep= get_wsrep(); rcode = wsrep->abort_pre_commit( @@ -19620,8 +19622,8 @@ wsrep_innobase_kill_one_trx( WSREP_DEBUG("cancel commit warning: " TRX_ID_FMT, victim_trx->id); - wsrep_thd_UNLOCK(thd); wsrep_thd_awake(thd, signal); + wsrep_thd_UNLOCK(thd); DBUG_VOID_RETURN; break; case WSREP_OK: @@ -19639,21 +19641,21 @@ wsrep_innobase_kill_one_trx( break; } } - wsrep_thd_UNLOCK(thd); wsrep_thd_awake(thd, signal); + wsrep_thd_UNLOCK(thd); break; + } case QUERY_EXEC: + { /* it is possible that victim trx is itself waiting for some * other lock. We need to cancel this waiting */ WSREP_DEBUG("kill trx QUERY_EXEC for " TRX_ID_FMT, victim_trx->id); - victim_trx->lock.was_chosen_as_deadlock_victim= TRUE; - if (victim_trx->lock.wait_lock) { WSREP_DEBUG("victim has wait flag: %ld", - thd_get_thread_id(thd)); + thd_get_thread_id(thd)); lock_t* wait_lock = victim_trx->lock.wait_lock; if (wait_lock) { @@ -19662,107 +19664,166 @@ wsrep_innobase_kill_one_trx( lock_cancel_waiting_and_release(wait_lock); } - wsrep_thd_UNLOCK(thd); wsrep_thd_awake(thd, signal); + wsrep_thd_UNLOCK(thd); } else { /* abort currently executing query */ - DBUG_PRINT("wsrep",("sending KILL_QUERY to: %lu", - thd_get_thread_id(thd))); WSREP_DEBUG("kill query for: %ld", - thd_get_thread_id(thd)); - /* Note that innobase_kill_query will take lock_mutex - and trx_mutex */ - wsrep_thd_UNLOCK(thd); - wsrep_thd_awake(thd, signal); + thd_get_thread_id(thd)); /* for BF thd, we need to prevent him from committing */ if (wsrep_thd_exec_mode(thd) == REPL_RECV) { - wsrep_abort_slave_trx(bf_seqno, - wsrep_thd_trx_seqno(thd)); + wsrep_abort_slave_trx(bf_thd, thd); } + + /* Note that innobase_kill_query will take lock_mutex + and trx_mutex */ + wsrep_thd_awake(thd, signal); + wsrep_thd_UNLOCK(thd); } break; + } case QUERY_IDLE: { WSREP_DEBUG("kill IDLE for " TRX_ID_FMT, victim_trx->id); if (wsrep_thd_exec_mode(thd) == REPL_RECV) { WSREP_DEBUG("kill BF IDLE, seqno: %lld", - (long long)wsrep_thd_trx_seqno(thd)); - wsrep_thd_UNLOCK(thd); - wsrep_abort_slave_trx(bf_seqno, - wsrep_thd_trx_seqno(thd)); - DBUG_VOID_RETURN; + wsrep_thd_trx_seqno(thd)); + wsrep_abort_slave_trx(bf_thd, thd); } - /* This will lock thd from proceeding after net_read() */ - wsrep_thd_set_conflict_state(thd, ABORTING); + /* This will lock thd from proceeding after net_read() and + will cause any call to innobase_kill_query() for this + thd to bail out. */ + wsrep_thd_set_conflict_state(thd, ABORTING); wsrep_lock_rollback(); if (wsrep_aborting_thd_contains(thd)) { WSREP_WARN("duplicate thd aborter %lu", - (ulong) thd_get_thread_id(thd)); + thd_get_thread_id(thd)); } else { wsrep_aborting_thd_enqueue(thd); - DBUG_PRINT("wsrep",("enqueuing trx abort for %lu", - thd_get_thread_id(thd))); WSREP_DEBUG("enqueuing trx abort for (%lu)", - thd_get_thread_id(thd)); + thd_get_thread_id(thd)); } - DBUG_PRINT("wsrep",("signalling wsrep rollbacker")); WSREP_DEBUG("signaling aborter"); wsrep_unlock_rollback(); wsrep_thd_UNLOCK(thd); - break; } default: WSREP_WARN("bad wsrep query state: %d", wsrep_thd_query_state(thd)); - wsrep_thd_UNLOCK(thd); - break; + ut_error; } - DBUG_VOID_RETURN; } +/******************************************************************* +This function is used to kill one transaction in BF. */ +void +wsrep_innobase_kill_one_trx( + MYSQL_THD const bf_thd, + const trx_t * const bf_trx, + trx_t *victim_trx, + my_bool signal) +{ + ut_ad(bf_thd); + ut_ad(victim_trx); + ut_ad(lock_mutex_own()); + ut_ad(trx_mutex_own(victim_trx)); + + DBUG_ENTER("wsrep_innobase_kill_one_trx"); + THD *thd= (THD *) victim_trx->mysql_thd; + + /* Here we need to lock THD::LOCK_thd_data to protect from + concurrent usage or disconnect or delete. */ + DEBUG_SYNC(bf_thd, "wsrep_before_BF_victim_lock"); + wsrep_thd_LOCK(thd); + DEBUG_SYNC(bf_thd, "wsrep_after_BF_victim_lock"); + + WSREP_LOG_CONFLICT(bf_thd, thd, TRUE); + + WSREP_DEBUG("wsrep_innobase_kill_one_trx: Aborter %s " + "trx_id: " TRX_ID_FMT " thread: %ld " + "seqno: %lld query_state: %s conflict_state: %s " + "exec mode %s query: %s", + wsrep_thd_is_BF(bf_thd, false) ? "BF" : "normal", + bf_trx ? bf_trx->id : TRX_ID_MAX, + thd_get_thread_id(bf_thd), + wsrep_thd_trx_seqno(bf_thd), + wsrep_thd_query_state_str(bf_thd), + wsrep_thd_conflict_state_str(bf_thd), + wsrep_thd_exec_mode_str(bf_thd), + wsrep_thd_query(bf_thd)); + + WSREP_DEBUG("wsrep_innobase_kill_one_trx: Victim %s " + "trx_id: " TRX_ID_FMT " thread: %ld " + "seqno: %lld query_state: %s conflict_state: %s " + "exec mode %s query: %s", + wsrep_thd_is_BF(thd, false) ? "BF" : "normal", + victim_trx->id, + thd_get_thread_id(thd), + wsrep_thd_trx_seqno(thd), + wsrep_thd_query_state_str(thd), + wsrep_thd_conflict_state_str(thd), + wsrep_thd_exec_mode_str(thd), + wsrep_thd_query(thd)); + + wsrep_kill_victim(bf_thd, bf_trx, thd, victim_trx, signal); + DBUG_VOID_RETURN; +} + static void wsrep_abort_transaction( -/*====================*/ handlerton* hton, THD *bf_thd, THD *victim_thd, my_bool signal) { - DBUG_ENTER("wsrep_abort_transaction"); - - trx_t* victim_trx = thd_to_trx(victim_thd); - trx_t* bf_trx = (bf_thd) ? thd_to_trx(bf_thd) : NULL; - - WSREP_DEBUG("abort transaction: BF: %s victim: %s victim conf: %d", - wsrep_thd_query(bf_thd), - wsrep_thd_query(victim_thd), - wsrep_thd_conflict_state(victim_thd, FALSE)); - - if (victim_trx) { - lock_mutex_enter(); - trx_mutex_enter(victim_trx); - wsrep_innobase_kill_one_trx(bf_thd, bf_trx, victim_trx, signal); - lock_mutex_exit(); - trx_mutex_exit(victim_trx); - wsrep_srv_conc_cancel_wait(victim_trx); - DBUG_VOID_RETURN; - } else { - WSREP_DEBUG("victim does not have transaction"); - wsrep_thd_LOCK(victim_thd); - wsrep_thd_set_conflict_state(victim_thd, MUST_ABORT); - wsrep_thd_UNLOCK(victim_thd); - wsrep_thd_awake(victim_thd, signal); - } + DBUG_ENTER("wsrep_abort_transaction"); + /* Note that victim thd is protected with + THD::LOCK_thd_data here. */ + trx_t* victim_trx= thd_to_trx(victim_thd); + trx_t* bf_trx= thd_to_trx(bf_thd); + + WSREP_DEBUG("wsrep_abort_transaction: BF:" + " thread %ld query_state %s conflict_state %s" + " exec %s query %s trx " TRX_ID_FMT, + thd_get_thread_id(bf_thd), + wsrep_thd_query_state_str(bf_thd), + wsrep_thd_conflict_state_str(bf_thd), + wsrep_thd_exec_mode_str(bf_thd), + wsrep_thd_query(bf_thd), + bf_trx ? bf_trx->id : 0); + + WSREP_DEBUG("wsrep_abort_transaction: victim:" + " thread %ld query_state %s conflict_state %s" + " exec %s query %s trx " TRX_ID_FMT, + thd_get_thread_id(victim_thd), + wsrep_thd_query_state_str(victim_thd), + wsrep_thd_conflict_state_str(victim_thd), + wsrep_thd_exec_mode_str(victim_thd), + wsrep_thd_query(victim_thd), + victim_trx ? victim_trx->id : 0); + + if (victim_trx) { + lock_mutex_enter(); + trx_mutex_enter(victim_trx); + wsrep_kill_victim(bf_thd, bf_trx, victim_thd, victim_trx, signal); + lock_mutex_exit(); + trx_mutex_exit(victim_trx); + wsrep_srv_conc_cancel_wait(victim_trx); + } else { + wsrep_thd_set_conflict_state(victim_thd, MUST_ABORT); + wsrep_thd_awake(victim_thd, signal); + wsrep_thd_UNLOCK(victim_thd); + } - DBUG_VOID_RETURN; + DBUG_VOID_RETURN; } static diff --git a/storage/innobase/include/ha_prototypes.h b/storage/innobase/include/ha_prototypes.h index 3eab2135969..427e57f09d2 100644 --- a/storage/innobase/include/ha_prototypes.h +++ b/storage/innobase/include/ha_prototypes.h @@ -233,12 +233,11 @@ innobase_casedn_str( char* a); /*!< in/out: string to put in lower case */ #ifdef WITH_WSREP -UNIV_INTERN void wsrep_innobase_kill_one_trx(MYSQL_THD const thd_ptr, const trx_t * const bf_trx, trx_t *victim_trx, - ibool signal); + my_bool signal); int wsrep_innobase_mysql_sort(int mysql_type, uint charset_number, unsigned char* str, unsigned int str_length, unsigned int buf_length); diff --git a/storage/innobase/lock/lock0wait.cc b/storage/innobase/lock/lock0wait.cc index df1488b9df3..7da5e16a762 100644 --- a/storage/innobase/lock/lock0wait.cc +++ b/storage/innobase/lock/lock0wait.cc @@ -184,13 +184,11 @@ lock_wait_table_reserve_slot( check if lock timeout was for priority thread, as a side effect trigger lock monitor @param[in] trx transaction owning the lock -@param[in] locked true if trx and lock_sys_mutex is ownd @return false for regular lock timeout */ static bool wsrep_is_BF_lock_timeout( - const trx_t* trx, - bool locked = true) + const trx_t* trx) { bool long_wait= (trx->error_state != DB_DEADLOCK && trx->is_wsrep() && @@ -204,18 +202,6 @@ wsrep_is_BF_lock_timeout( ib::info() << "WSREP: BF lock wait long for trx:" << trx->id << " query: " << wsrep_thd_query(trx->mysql_thd); - if (!locked) - lock_mutex_enter(); - - ut_ad(lock_mutex_own()); - - wsrep_trx_print_locking(stderr, trx, 3000); - /* Note this will release lock_sys mutex */ - lock_print_info_all_transactions(stderr); - - if (locked) - lock_mutex_enter(); - return was_wait; } else return false; @@ -407,7 +393,7 @@ lock_wait_suspend_thread( && wait_time > (double) lock_wait_timeout #ifdef WITH_WSREP && (!trx->is_wsrep() - || (!wsrep_is_BF_lock_timeout(trx, false) + || (!wsrep_is_BF_lock_timeout(trx) && trx->error_state != DB_DEADLOCK)) #endif /* WITH_WSREP */ ) { |