diff options
23 files changed, 334 insertions, 427 deletions
diff --git a/mysql-test/suite/galera/r/galera_UK_conflict.result b/mysql-test/suite/galera/r/galera_UK_conflict.result index 44bb64c9d63..cc7e17d7c58 100644 --- a/mysql-test/suite/galera/r/galera_UK_conflict.result +++ b/mysql-test/suite/galera/r/galera_UK_conflict.result @@ -68,9 +68,9 @@ f1 f2 f3 10 10 0 INSERT INTO t1 VALUES (7,7,7); INSERT INTO t1 VALUES (8,8,8); -DROP TABLE t1; -test scenario 2 -connection node_1; +SELECT COUNT(*) FROM t1; +COUNT(*) +7 CREATE TABLE t1 (f1 INTEGER PRIMARY KEY, f2 int, f3 int, unique key keyj (f2)); INSERT INTO t1 VALUES (1, 1, 0); INSERT INTO t1 VALUES (3, 3, 0); @@ -92,9 +92,9 @@ SET SESSION wsrep_on = 1; SET GLOBAL wsrep_provider_options = 'dbug='; SET GLOBAL wsrep_provider_options = 'dbug=d,commit_monitor_master_enter_sync'; connection node_1; -COMMIT; -connection node_1a; -SET SESSION wsrep_on = 0; +SELECT COUNT(*) FROM t1; +COUNT(*) +7 SET SESSION wsrep_on = 1; SET GLOBAL wsrep_provider_options = 'dbug='; SET GLOBAL DEBUG_DBUG = "d,sync.wsrep_replay_cb"; @@ -125,6 +125,7 @@ f1 f2 f3 3 3 1 4 4 2 5 5 2 +8 8 8 10 10 0 INSERT INTO t1 VALUES (7,7,7); INSERT INTO t1 VALUES (8,8,8); diff --git a/mysql-test/suite/galera/r/galera_bf_kill_debug.result b/mysql-test/suite/galera/r/galera_bf_kill_debug.result deleted file mode 100644 index c3eae243f47..00000000000 --- a/mysql-test/suite/galera/r/galera_bf_kill_debug.result +++ /dev/null @@ -1,54 +0,0 @@ -connection node_2; -connection node_1; -connection node_2; -CREATE TABLE t1(a int not null primary key auto_increment,b int) engine=InnoDB; -insert into t1 values (NULL,1); -connect node_2a, 127.0.0.1, root, , test, $NODE_MYPORT_2; -connection node_2a; -truncate t1; -insert into t1 values (1,0); -begin; -update t1 set b=2 where a=1; -connection node_2; -set session wsrep_sync_wait=0; -connect node_2b, 127.0.0.1, root, , test, $NODE_MYPORT_2; -connection node_2b; -SET GLOBAL debug_dbug = "d,sync.before_wsrep_thd_abort"; -connection node_1; -select * from t1; -a b -1 0 -update t1 set b= 1 where a=1; -connection node_2b; -SET SESSION DEBUG_SYNC = "now WAIT_FOR sync.before_wsrep_thd_abort_reached"; -connection node_2; -SET DEBUG_SYNC= 'before_awake_no_mutex SIGNAL awake_reached WAIT_FOR continue_kill'; -connection node_2b; -SET DEBUG_SYNC='now WAIT_FOR awake_reached'; -SET GLOBAL debug_dbug = ""; -SET DEBUG_SYNC = "now SIGNAL signal.before_wsrep_thd_abort"; -SET DEBUG_SYNC = "now SIGNAL continue_kill"; -connection node_2; -connection node_2a; -select * from t1; -connection node_2; -SET DEBUG_SYNC = "RESET"; -drop table t1; -disconnect node_2a; -connect node_2a, 127.0.0.1, root, , test, $NODE_MYPORT_2; -connection node_2a; -CREATE TABLE t1 (i int primary key); -SET DEBUG_SYNC = "before_wsrep_ordered_commit SIGNAL bwoc_reached WAIT_FOR bwoc_continue"; -INSERT INTO t1 VALUES (1); -connection node_2; -SET DEBUG_SYNC = "now WAIT_FOR bwoc_reached"; -SET DEBUG_SYNC = "now SIGNAL bwoc_continue"; -SET DEBUG_SYNC='RESET'; -connection node_2a; -connection node_2; -select * from t1; -i -1 -disconnect node_2a; -connection node_1; -drop table t1; diff --git a/mysql-test/suite/galera/r/galera_toi_ddl_fk_insert.result b/mysql-test/suite/galera/r/galera_toi_ddl_fk_insert.result index 6e55c59ad15..2493075b635 100644 --- a/mysql-test/suite/galera/r/galera_toi_ddl_fk_insert.result +++ b/mysql-test/suite/galera/r/galera_toi_ddl_fk_insert.result @@ -23,22 +23,6 @@ connection node_1a; connection node_1b; connection node_2; connection node_2a; -connection node_1; -SET SESSION wsrep_sync_wait=15; -SELECT COUNT(*) FROM parent; -COUNT(*) -20001 -SELECT COUNT(*) FROM child; -COUNT(*) -10000 -connection node_2; -SET SESSION wsrep_sync_wait=15; -SELECT COUNT(*) FROM parent; -COUNT(*) -20001 -SELECT COUNT(*) FROM child; -COUNT(*) -10000 DROP TABLE child; DROP TABLE parent; DROP TABLE ten; diff --git a/mysql-test/suite/galera/t/galera_UK_conflict.test b/mysql-test/suite/galera/t/galera_UK_conflict.test index 9978ba9b8bf..25f414a5764 100644 --- a/mysql-test/suite/galera/t/galera_UK_conflict.test +++ b/mysql-test/suite/galera/t/galera_UK_conflict.test @@ -140,6 +140,14 @@ SELECT * FROM t1; # original state in node 1 INSERT INTO t1 VALUES (7,7,7); INSERT INTO t1 VALUES (8,8,8); +SELECT COUNT(*) FROM t1; +SELECT * FROM t1; + +--connection node_1 +--let $wait_condition = SELECT COUNT(*) = 7 FROM t1 +--source include/wait_condition.inc +SELECT COUNT(*) FROM t1; +SELECT * FROM t1; DROP TABLE t1; @@ -199,9 +207,9 @@ INSERT INTO t1 VALUES (5, 5, 2); --source include/galera_set_sync_point.inc --connection node_1 ---send COMMIT - ---connection node_1a +--let $wait_condition = SELECT COUNT(*) = 7 FROM t1 +--source include/wait_condition.inc +SELECT COUNT(*) FROM t1; # wait for the local commit to enter in commit monitor wait state --let $galera_sync_point = apply_monitor_slave_enter_sync commit_monitor_master_enter_sync --source include/galera_wait_sync_point.inc @@ -273,4 +281,13 @@ SELECT * FROM t1; INSERT INTO t1 VALUES (7,7,7); INSERT INTO t1 VALUES (8,8,8); +SELECT COUNT(*) FROM t1; +SELECT * FROM t1; + +--connection node_1 +--let $wait_condition = SELECT COUNT(*) = 7 FROM t1 +--source include/wait_condition.inc +SELECT COUNT(*) FROM t1; +SELECT * FROM t1; + DROP TABLE t1; diff --git a/mysql-test/suite/galera/t/galera_bf_kill_debug.cnf b/mysql-test/suite/galera/t/galera_bf_kill_debug.cnf deleted file mode 100644 index e68f891792c..00000000000 --- a/mysql-test/suite/galera/t/galera_bf_kill_debug.cnf +++ /dev/null @@ -1,7 +0,0 @@ -!include ../galera_2nodes.cnf - -[mysqld.1] -wsrep-debug=SERVER - -[mysqld.2] -wsrep-debug=SERVER diff --git a/mysql-test/suite/galera/t/galera_bf_kill_debug.test b/mysql-test/suite/galera/t/galera_bf_kill_debug.test deleted file mode 100644 index b687a5a6a67..00000000000 --- a/mysql-test/suite/galera/t/galera_bf_kill_debug.test +++ /dev/null @@ -1,140 +0,0 @@ ---source include/galera_cluster.inc ---source include/have_innodb.inc ---source include/have_debug.inc ---source include/have_debug_sync.inc - -# -# Test case 7: -# 1. Start a transaction on node_2, -# and leave it pending while holding a row locked -# 2. set sync point pause applier -# 3. send a conflicting write on node_1, it will pause -# at the sync point -# 4. though another connection to node_2, kill the local -# transaction -# - ---connection node_2 -CREATE TABLE t1(a int not null primary key auto_increment,b int) engine=InnoDB; -insert into t1 values (NULL,1); - -# -# connection node_2a runs a local transaction, that is victim of BF abort -# and victim of KILL command by connection node_2 -# ---connect node_2a, 127.0.0.1, root, , test, $NODE_MYPORT_2 ---connection node_2a -truncate t1; -insert into t1 values (1,0); - -# start a transaction that will conflict with later applier -begin; -update t1 set b=2 where a=1; - ---connection node_2 -set session wsrep_sync_wait=0; ---let $wait_condition = SELECT COUNT(*) = 1 FROM INFORMATION_SCHEMA.PROCESSLIST WHERE USER = 'root' AND COMMAND = 'Sleep' LIMIT 1 ---source include/wait_condition.inc - ---let $k_thread = `SELECT ID FROM INFORMATION_SCHEMA.PROCESSLIST WHERE USER = 'root' AND COMMAND = 'Sleep' LIMIT 1` - -# connection node_2b is for controlling debug syn points -# first set a sync point for applier, to pause during BF aborting -# and before THD::awake would be called -# ---connect node_2b, 127.0.0.1, root, , test, $NODE_MYPORT_2 ---connection node_2b -SET GLOBAL debug_dbug = "d,sync.before_wsrep_thd_abort"; - -# -# replicate an update, which will BF abort the victim node_2a -# however, while applier in node 2 is handling the abort, -# it will pause in sync point set by node_2b -# ---connection node_1 -select * from t1; -update t1 set b= 1 where a=1; - -# -# wait until the applying of above update has reached the sync point -# in node 2 -# ---connection node_2b -SET SESSION DEBUG_SYNC = "now WAIT_FOR sync.before_wsrep_thd_abort_reached"; - ---connection node_2 -# -# pause KILL execution before awake -# -SET DEBUG_SYNC= 'before_awake_no_mutex SIGNAL awake_reached WAIT_FOR continue_kill'; ---disable_query_log ---send_eval KILL $k_thread ---enable_query_log - - ---connection node_2b -SET DEBUG_SYNC='now WAIT_FOR awake_reached'; - -# release applier and KILL operator -SET GLOBAL debug_dbug = ""; -SET DEBUG_SYNC = "now SIGNAL signal.before_wsrep_thd_abort"; -SET DEBUG_SYNC = "now SIGNAL continue_kill"; - ---connection node_2 ---reap - ---connection node_2a ---error 0,1213 -select * from t1; - ---connection node_2 -SET DEBUG_SYNC = "RESET"; - -drop table t1; - ---disconnect node_2a -# -# Test case 7: -# run a transaction in node 2, and set a sync point to pause the transaction -# in commit phase. -# Through another connection to node 2, kill the committing transaction by -# KILL QUERY command -# - ---connect node_2a, 127.0.0.1, root, , test, $NODE_MYPORT_2 ---connection node_2a ---let $connection_id = `SELECT CONNECTION_ID()` - -CREATE TABLE t1 (i int primary key); - -# Set up sync point -SET DEBUG_SYNC = "before_wsrep_ordered_commit SIGNAL bwoc_reached WAIT_FOR bwoc_continue"; - -# Send insert which will block in the sync point above ---send INSERT INTO t1 VALUES (1) - ---connection node_2 -SET DEBUG_SYNC = "now WAIT_FOR bwoc_reached"; - ---disable_query_log ---disable_result_log -# victim has passed the point of no return, kill is not possible anymore ---eval KILL QUERY $connection_id ---enable_result_log ---enable_query_log - -SET DEBUG_SYNC = "now SIGNAL bwoc_continue"; -SET DEBUG_SYNC='RESET'; ---connection node_2a ---error 0,1213 ---reap - ---connection node_2 -# victim was able to complete the INSERT -select * from t1; - ---disconnect node_2a - ---connection node_1 -drop table t1; - diff --git a/mysql-test/suite/galera/t/galera_toi_ddl_fk_insert.test b/mysql-test/suite/galera/t/galera_toi_ddl_fk_insert.test index fadc94d78ff..3b4b427f551 100644 --- a/mysql-test/suite/galera/t/galera_toi_ddl_fk_insert.test +++ b/mysql-test/suite/galera/t/galera_toi_ddl_fk_insert.test @@ -54,15 +54,11 @@ INSERT INTO parent VALUES (1, 0); --connection node_2a --reap ---connection node_1 -SET SESSION wsrep_sync_wait=15; -SELECT COUNT(*) FROM parent; -SELECT COUNT(*) FROM child; - ---connection node_2 -SET SESSION wsrep_sync_wait=15; -SELECT COUNT(*) FROM parent; -SELECT COUNT(*) FROM child; +# +# ALTER TABLE could bf kill one or more of INSERTs to parent, so +# the actual number of rows in PARENT depends on whether +# the INSERT is committed before ALTER TABLE is executed +# DROP TABLE child; DROP TABLE parent; diff --git a/mysql-test/suite/galera/t/galera_var_auto_inc_control_off.test b/mysql-test/suite/galera/t/galera_var_auto_inc_control_off.test index c0bbe5af8cf..241b62dbf8c 100644 --- a/mysql-test/suite/galera/t/galera_var_auto_inc_control_off.test +++ b/mysql-test/suite/galera/t/galera_var_auto_inc_control_off.test @@ -94,11 +94,13 @@ SELECT * FROM t1; --eval SET GLOBAL wsrep_auto_increment_control = $auto_increment_control_orig --eval SET GLOBAL auto_increment_increment = $auto_increment_increment_node1 --eval SET GLOBAL auto_increment_offset = $auto_increment_offset_node1 +--disconnect node_1a --connection node_2 --eval SET GLOBAL wsrep_auto_increment_control = $auto_increment_control_orig --eval SET GLOBAL auto_increment_increment = $auto_increment_increment_node2 --eval SET GLOBAL auto_increment_offset = $auto_increment_offset_node2 +--disconnect node_2a --enable_query_log diff --git a/sql/handler.cc b/sql/handler.cc index 4e891f5d640..f7fb0ea4854 100644 --- a/sql/handler.cc +++ b/sql/handler.cc @@ -859,7 +859,7 @@ static my_bool kill_handlerton(THD *thd, plugin_ref plugin, { handlerton *hton= plugin_hton(plugin); - mysql_mutex_assert_owner(&thd->LOCK_thd_data); + mysql_mutex_assert_owner(&thd->LOCK_thd_kill); if (hton->state == SHOW_OPTION_YES && hton->kill_query && thd_get_ha_data(thd, hton)) hton->kill_query(hton, thd, *(enum thd_kill_levels *) level); diff --git a/sql/log_event.cc b/sql/log_event.cc index c1c753d69fd..a1562016aa4 100644 --- a/sql/log_event.cc +++ b/sql/log_event.cc @@ -8987,6 +8987,18 @@ err: #if defined(HAVE_REPLICATION) && !defined(MYSQL_CLIENT) +static bool wsrep_must_replay(THD *thd) +{ +#ifdef WITH_WSREP + mysql_mutex_lock(&thd->LOCK_thd_data); + bool res= WSREP(thd) && thd->wsrep_trx().state() == wsrep::transaction::s_must_replay; + mysql_mutex_unlock(&thd->LOCK_thd_data); + return res; +#else + return false; +#endif +} + int Xid_log_event::do_apply_event(rpl_group_info *rgi) { bool res; @@ -9051,14 +9063,8 @@ int Xid_log_event::do_apply_event(rpl_group_info *rgi) res= trans_commit(thd); /* Automatically rolls back on error. */ thd->release_transactional_locks(); - mysql_mutex_lock(&thd->LOCK_thd_data); -#ifdef WITH_WSREP - if (sub_id && (!res || (WSREP(thd) && thd->wsrep_trx().state() == wsrep::transaction::s_must_replay))) -#else - if (sub_id && !res) -#endif /* WITH_WSREP */ + if (sub_id && (!res || wsrep_must_replay(thd))) rpl_global_gtid_slave_state->update_state_hash(sub_id, >id, hton, rgi); - mysql_mutex_unlock(&thd->LOCK_thd_data); /* Increment the global status commit count variable */ diff --git a/sql/service_wsrep.cc b/sql/service_wsrep.cc index 6566bb372d8..18067a4d0ec 100644 --- a/sql/service_wsrep.cc +++ b/sql/service_wsrep.cc @@ -1,4 +1,4 @@ -/* Copyright 2018 Codership Oy <info@codership.com> +/* Copyright 2018-2021 Codership Oy <info@codership.com> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -29,12 +29,14 @@ extern "C" my_bool wsrep_on(const THD *thd) extern "C" void wsrep_thd_LOCK(const THD *thd) { + mysql_mutex_lock(&thd->LOCK_thd_kill); mysql_mutex_lock(&thd->LOCK_thd_data); } extern "C" void wsrep_thd_UNLOCK(const THD *thd) { mysql_mutex_unlock(&thd->LOCK_thd_data); + mysql_mutex_unlock(&thd->LOCK_thd_kill); } extern "C" void wsrep_thd_kill_LOCK(const THD *thd) @@ -189,6 +191,8 @@ extern "C" void wsrep_handle_SR_rollback(THD *bf_thd, DBUG_ASSERT(wsrep_thd_is_SR(victim_thd)); if (!victim_thd || !wsrep_on(bf_thd)) return; + wsrep_thd_LOCK(victim_thd); + WSREP_DEBUG("handle rollback, for deadlock: thd %llu trx_id %" PRIu64 " frags %zu conf %s", victim_thd->thread_id, victim_thd->wsrep_trx_id(), @@ -209,6 +213,9 @@ extern "C" void wsrep_handle_SR_rollback(THD *bf_thd, { wsrep_thd_self_abort(victim_thd); } + + wsrep_thd_UNLOCK(victim_thd); + if (bf_thd) { wsrep_store_threadvars(bf_thd); @@ -218,6 +225,9 @@ extern "C" void wsrep_handle_SR_rollback(THD *bf_thd, extern "C" my_bool wsrep_thd_bf_abort(THD *bf_thd, THD *victim_thd, my_bool signal) { + mysql_mutex_assert_owner(&victim_thd->LOCK_thd_kill); + mysql_mutex_assert_owner(&victim_thd->LOCK_thd_data); + DBUG_EXECUTE_IF("sync.before_wsrep_thd_abort", { const char act[]= @@ -234,28 +244,26 @@ extern "C" my_bool wsrep_thd_bf_abort(THD *bf_thd, THD *victim_thd, have wsrep on. Note that this should never interrupt RSU as RSU has paused the provider. */ + mysql_mutex_assert_owner(&victim_thd->LOCK_thd_data); + mysql_mutex_assert_owner(&victim_thd->LOCK_thd_kill); + if ((ret || !wsrep_on(victim_thd)) && signal) { - mysql_mutex_assert_not_owner(&victim_thd->LOCK_thd_data); - mysql_mutex_assert_not_owner(&victim_thd->LOCK_thd_kill); - mysql_mutex_lock(&victim_thd->LOCK_thd_data); - if (victim_thd->wsrep_aborter && victim_thd->wsrep_aborter != bf_thd->thread_id) { WSREP_DEBUG("victim is killed already by %llu, skipping awake", victim_thd->wsrep_aborter); - mysql_mutex_unlock(&victim_thd->LOCK_thd_data); + wsrep_thd_UNLOCK(victim_thd); return false; } - mysql_mutex_lock(&victim_thd->LOCK_thd_kill); victim_thd->wsrep_aborter= bf_thd->thread_id; victim_thd->awake_no_mutex(KILL_QUERY); - mysql_mutex_unlock(&victim_thd->LOCK_thd_kill); - mysql_mutex_unlock(&victim_thd->LOCK_thd_data); - } else { - WSREP_DEBUG("wsrep_thd_bf_abort skipped awake"); } + else + WSREP_DEBUG("wsrep_thd_bf_abort skipped awake for %llu", thd_get_thread_id(victim_thd)); + + wsrep_thd_UNLOCK(victim_thd); return ret; } @@ -280,8 +288,6 @@ extern "C" my_bool wsrep_thd_order_before(const THD *left, const THD *right) extern "C" my_bool wsrep_thd_is_aborting(const MYSQL_THD thd) { - mysql_mutex_assert_owner(&thd->LOCK_thd_data); - const wsrep::client_state& cs(thd->wsrep_cs()); const enum wsrep::transaction::state tx_state(cs.transaction().state()); switch (tx_state) @@ -295,8 +301,6 @@ extern "C" my_bool wsrep_thd_is_aborting(const MYSQL_THD thd) default: return false; } - - return false; } static inline enum wsrep::key::type diff --git a/sql/slave.cc b/sql/slave.cc index f97033d71af..68ddc611c03 100644 --- a/sql/slave.cc +++ b/sql/slave.cc @@ -1069,8 +1069,8 @@ terminate_slave_thread(THD *thd, int error __attribute__((unused)); DBUG_PRINT("loop", ("killing slave thread")); - mysql_mutex_lock(&thd->LOCK_thd_data); mysql_mutex_lock(&thd->LOCK_thd_kill); + mysql_mutex_lock(&thd->LOCK_thd_data); #ifndef DONT_USE_THR_ALARM /* Error codes from pthread_kill are: diff --git a/sql/sql_class.cc b/sql/sql_class.cc index 9efb492e4fb..178808d9431 100644 --- a/sql/sql_class.cc +++ b/sql/sql_class.cc @@ -788,6 +788,7 @@ THD::THD(my_thread_id id, bool is_wsrep_applier) mysql_mutex_init(key_LOCK_wakeup_ready, &LOCK_wakeup_ready, MY_MUTEX_INIT_FAST); mysql_mutex_init(key_LOCK_thd_kill, &LOCK_thd_kill, MY_MUTEX_INIT_FAST); mysql_cond_init(key_COND_wakeup_ready, &COND_wakeup_ready, 0); + mysql_mutex_record_order(&LOCK_thd_kill, &LOCK_thd_data); /* Variables with default values */ proc_info="login"; @@ -1861,7 +1862,6 @@ void THD::awake_no_mutex(killed_state state_to_set) DBUG_PRINT("enter", ("this: %p current_thd: %p state: %d", this, current_thd, (int) state_to_set)); THD_CHECK_SENTRY(this); - mysql_mutex_assert_owner(&LOCK_thd_data); mysql_mutex_assert_owner(&LOCK_thd_kill); print_aborted_warning(3, "KILLED"); @@ -2026,6 +2026,8 @@ bool THD::notify_shared_lock(MDL_context_owner *ctx_in_use, if (needs_thr_lock_abort) { + bool mutex_released= false; + mysql_mutex_lock(&in_use->LOCK_thd_kill); mysql_mutex_lock(&in_use->LOCK_thd_data); /* If not already dying */ if (in_use->killed != KILL_CONNECTION_HARD) @@ -2042,12 +2044,25 @@ bool THD::notify_shared_lock(MDL_context_owner *ctx_in_use, thread can see those instances (e.g. see partitioning code). */ if (!thd_table->needs_reopen()) - { signalled|= mysql_lock_abort_for_thread(this, thd_table); - } } +#ifdef WITH_WSREP + if (WSREP(this) && wsrep_thd_is_BF(this, false)) + { + WSREP_DEBUG("notify_shared_lock: BF thread %llu query %s" + " victim %llu query %s", + this->real_id, wsrep_thd_query(this), + in_use->real_id, wsrep_thd_query(in_use)); + wsrep_abort_thd(this, in_use, false); + mutex_released= true; + } +#endif /* WITH_WSREP */ + } + if (!mutex_released) + { + mysql_mutex_unlock(&in_use->LOCK_thd_data); + mysql_mutex_unlock(&in_use->LOCK_thd_kill); } - mysql_mutex_unlock(&in_use->LOCK_thd_data); } DBUG_RETURN(signalled); } @@ -5107,11 +5122,14 @@ thd_need_ordering_with(const MYSQL_THD thd, const MYSQL_THD other_thd) #ifdef WITH_WSREP /* wsrep applier, replayer and TOI processing threads are ordered by replication provider, relaxed GAP locking protocol can be used - between high priority wsrep threads + between high priority wsrep threads. + Note that wsrep_thd_is_BF() doesn't take LOCK_thd_data for either thd, + the caller should guarantee that the BF state won't change. + (e.g. InnoDB does it by keeping lock_sys.mutex locked) */ if (WSREP_ON && wsrep_thd_is_BF(const_cast<THD *>(thd), false) && - wsrep_thd_is_BF(const_cast<THD *>(other_thd), true)) + wsrep_thd_is_BF(const_cast<THD *>(other_thd), false)) return 0; #endif /* WITH_WSREP */ rgi= thd->rgi_slave; diff --git a/sql/sql_class.h b/sql/sql_class.h index fcc70cc4eaf..7570211f586 100644 --- a/sql/sql_class.h +++ b/sql/sql_class.h @@ -3327,11 +3327,11 @@ public: void awake_no_mutex(killed_state state_to_set); void awake(killed_state state_to_set) { - mysql_mutex_lock(&LOCK_thd_data); mysql_mutex_lock(&LOCK_thd_kill); + mysql_mutex_lock(&LOCK_thd_data); awake_no_mutex(state_to_set); - mysql_mutex_unlock(&LOCK_thd_kill); mysql_mutex_unlock(&LOCK_thd_data); + mysql_mutex_unlock(&LOCK_thd_kill); } void abort_current_cond_wait(bool force); diff --git a/sql/sql_parse.cc b/sql/sql_parse.cc index ba3abd73932..27d716c2162 100644 --- a/sql/sql_parse.cc +++ b/sql/sql_parse.cc @@ -9188,7 +9188,8 @@ kill_one_thread(THD *thd, longlong id, killed_state kill_signal, killed_type typ faster and do a harder kill than KILL_SYSTEM_THREAD; */ - mysql_mutex_lock(&tmp->LOCK_thd_data); // for various wsrep* checks below + mysql_mutex_lock(&tmp->LOCK_thd_data); // Lock from concurrent usage + #ifdef WITH_WSREP if (((thd->security_ctx->master_access & SUPER_ACL) || thd->security_ctx->user_matches(tmp->security_ctx)) && @@ -9203,23 +9204,23 @@ kill_one_thread(THD *thd, longlong id, killed_state kill_signal, killed_type typ if (tmp->wsrep_aborter && tmp->wsrep_aborter != thd->thread_id) { /* victim is in hit list already, bail out */ - WSREP_DEBUG("victim has wsrep aborter: %lu, skipping awake()", - tmp->wsrep_aborter); + WSREP_DEBUG("victim %llu has wsrep aborter: %lu, skipping awake()", + id, tmp->wsrep_aborter); error= 0; } else #endif /* WITH_WSREP */ { - WSREP_DEBUG("kill_one_thread %llu, victim: %llu wsrep_aborter %llu by signal %d", - thd->thread_id, id, tmp->wsrep_aborter, kill_signal); + WSREP_DEBUG("kill_one_thread victim: %llu wsrep_aborter %lu by signal %d", + id, tmp->wsrep_aborter, kill_signal); tmp->awake_no_mutex(kill_signal); - WSREP_DEBUG("victim: %llu taken care of", id); error= 0; } } else error= (type == KILL_TYPE_QUERY ? ER_KILL_QUERY_DENIED_ERROR : ER_KILL_DENIED_ERROR); + mysql_mutex_unlock(&tmp->LOCK_thd_data); } mysql_mutex_unlock(&tmp->LOCK_thd_kill); @@ -9333,6 +9334,18 @@ static void sql_kill(THD *thd, longlong id, killed_state state, killed_type type) { uint error; +#ifdef WITH_WSREP + if (WSREP(thd)) + { + WSREP_DEBUG("sql_kill called"); + if (thd->wsrep_applier) + { + WSREP_DEBUG("KILL in applying, bailing out here"); + return; + } + WSREP_TO_ISOLATION_BEGIN(WSREP_MYSQL_DB, NULL, NULL) + } +#endif /* WITH_WSREP */ if (likely(!(error= kill_one_thread(thd, id, state, type)))) { if (!thd->killed) @@ -9342,6 +9355,11 @@ void sql_kill(THD *thd, longlong id, killed_state state, killed_type type) } else my_error(error, MYF(0), id); +#ifdef WITH_WSREP + return; + wsrep_error_label: + my_error(ER_CANNOT_USER, MYF(0), wsrep_thd_query(thd)); +#endif /* WITH_WSREP */ } @@ -9350,6 +9368,18 @@ void sql_kill_user(THD *thd, LEX_USER *user, killed_state state) { uint error; ha_rows rows; +#ifdef WITH_WSREP + if (WSREP(thd)) + { + WSREP_DEBUG("sql_kill_user called"); + if (thd->wsrep_applier) + { + WSREP_DEBUG("KILL in applying, bailing out here"); + return; + } + WSREP_TO_ISOLATION_BEGIN(WSREP_MYSQL_DB, NULL, NULL) + } +#endif /* WITH_WSREP */ if (likely(!(error= kill_threads_for_user(thd, user, state, &rows)))) my_ok(thd, rows); else @@ -9360,6 +9390,11 @@ void sql_kill_user(THD *thd, LEX_USER *user, killed_state state) */ my_error(error, MYF(0), user->host.str, user->user.str); } +#ifdef WITH_WSREP + return; + wsrep_error_label: + my_error(ER_CANNOT_USER, MYF(0), user->user.str); +#endif /* WITH_WSREP */ } diff --git a/sql/sql_repl.cc b/sql/sql_repl.cc index 8d0030dfe66..94cbc6be614 100644 --- a/sql/sql_repl.cc +++ b/sql/sql_repl.cc @@ -3497,8 +3497,8 @@ static my_bool kill_callback(THD *thd, kill_callback_arg *arg) thd->variables.server_id == arg->slave_server_id) { arg->thd= thd; - mysql_mutex_lock(&thd->LOCK_thd_data); mysql_mutex_lock(&thd->LOCK_thd_kill); // Lock from delete + mysql_mutex_lock(&thd->LOCK_thd_data); return 1; } return 0; diff --git a/sql/wsrep_client_service.cc b/sql/wsrep_client_service.cc index e14d4a096ae..8473489d2e1 100644 --- a/sql/wsrep_client_service.cc +++ b/sql/wsrep_client_service.cc @@ -69,20 +69,15 @@ bool Wsrep_client_service::interrupted( wsrep::unique_lock<wsrep::mutex>& lock WSREP_UNUSED) const { DBUG_ASSERT(m_thd == current_thd); - /* Underlying mutex in lock object points to LOCK_thd_data, which - protects m_thd->wsrep_trx(), LOCK_thd_kill protects m_thd->killed. - Locking order is: - 1) LOCK_thd_data - 2) LOCK_thd_kill */ + /* Underlying mutex in lock object points to THD::LOCK_thd_data, which + protects m_thd->wsrep_trx() and protects us from thd delete. */ mysql_mutex_assert_owner(static_cast<mysql_mutex_t*>(lock.mutex()->native())); - mysql_mutex_lock(&m_thd->LOCK_thd_kill); bool ret= (m_thd->killed != NOT_KILLED); if (ret) { WSREP_DEBUG("wsrep state is interrupted, THD::killed %d trx state %d", m_thd->killed, m_thd->wsrep_trx().state()); } - mysql_mutex_unlock(&m_thd->LOCK_thd_kill); return ret; } diff --git a/sql/wsrep_mysqld.cc b/sql/wsrep_mysqld.cc index 74015fdee28..3fc03054014 100644 --- a/sql/wsrep_mysqld.cc +++ b/sql/wsrep_mysqld.cc @@ -1956,6 +1956,11 @@ static int wsrep_TOI_event_buf(THD* thd, uchar** buf, size_t* buf_len) case SQLCOM_DROP_TABLE: err= wsrep_drop_table_query(thd, buf, buf_len); break; + case SQLCOM_KILL: + WSREP_DEBUG("KILL as TOI: %s", thd->query()); + err= wsrep_to_buf_helper(thd, thd->query(), thd->query_length(), + buf, buf_len); + break; case SQLCOM_CREATE_ROLE: if (sp_process_definer(thd)) { @@ -2319,7 +2324,11 @@ void wsrep_handle_mdl_conflict(MDL_context *requestor_ctx, request_thd, granted_thd); ticket->wsrep_report(wsrep_debug); - mysql_mutex_lock(&granted_thd->LOCK_thd_data); + /* Here we will call wsrep_abort_transaction so we should hold + THD::LOCK_thd_data to protect victim from concurrent usage + and THD::LOCK_thd_kill to protect from disconnect or delete. */ + wsrep_thd_LOCK(granted_thd); + if (wsrep_thd_is_toi(granted_thd) || wsrep_thd_is_applying(granted_thd)) { @@ -2327,21 +2336,22 @@ void wsrep_handle_mdl_conflict(MDL_context *requestor_ctx, { WSREP_DEBUG("BF thread waiting for SR in aborting state"); ticket->wsrep_report(wsrep_debug); - mysql_mutex_unlock(&granted_thd->LOCK_thd_data); + wsrep_thd_UNLOCK(granted_thd); } else if (wsrep_thd_is_SR(granted_thd) && !wsrep_thd_is_SR(request_thd)) { - WSREP_MDL_LOG(INFO, "MDL conflict, DDL vs SR", + WSREP_MDL_LOG(INFO, "MDL conflict, DDL vs SR", schema, schema_len, request_thd, granted_thd); - mysql_mutex_unlock(&granted_thd->LOCK_thd_data); wsrep_abort_thd(request_thd, granted_thd, 1); + mysql_mutex_assert_not_owner(&granted_thd->LOCK_thd_data); + mysql_mutex_assert_not_owner(&granted_thd->LOCK_thd_kill); } else { WSREP_MDL_LOG(INFO, "MDL BF-BF conflict", schema, schema_len, request_thd, granted_thd); ticket->wsrep_report(true); - mysql_mutex_unlock(&granted_thd->LOCK_thd_data); + wsrep_thd_UNLOCK(granted_thd); unireg_abort(1); } } @@ -2350,15 +2360,16 @@ void wsrep_handle_mdl_conflict(MDL_context *requestor_ctx, { WSREP_DEBUG("BF thread waiting for FLUSH"); ticket->wsrep_report(wsrep_debug); - mysql_mutex_unlock(&granted_thd->LOCK_thd_data); + wsrep_thd_UNLOCK(granted_thd); } else if (request_thd->lex->sql_command == SQLCOM_DROP_TABLE) { WSREP_DEBUG("DROP caused BF abort, conf %s", wsrep_thd_transaction_state_str(granted_thd)); ticket->wsrep_report(wsrep_debug); - mysql_mutex_unlock(&granted_thd->LOCK_thd_data); wsrep_abort_thd(request_thd, granted_thd, 1); + mysql_mutex_assert_not_owner(&granted_thd->LOCK_thd_data); + mysql_mutex_assert_not_owner(&granted_thd->LOCK_thd_kill); } else { @@ -2367,8 +2378,9 @@ void wsrep_handle_mdl_conflict(MDL_context *requestor_ctx, ticket->wsrep_report(wsrep_debug); if (granted_thd->wsrep_trx().active()) { - mysql_mutex_unlock(&granted_thd->LOCK_thd_data); - wsrep_abort_thd(request_thd, granted_thd, 1); + wsrep_abort_thd(request_thd, granted_thd, true); + mysql_mutex_assert_not_owner(&granted_thd->LOCK_thd_data); + mysql_mutex_assert_not_owner(&granted_thd->LOCK_thd_kill); } else { @@ -2376,10 +2388,11 @@ void wsrep_handle_mdl_conflict(MDL_context *requestor_ctx, Granted_thd is likely executing with wsrep_on=0. If the requesting thd is BF, BF abort and wait. */ - mysql_mutex_unlock(&granted_thd->LOCK_thd_data); if (wsrep_thd_is_BF(request_thd, FALSE)) { ha_abort_transaction(request_thd, granted_thd, TRUE); + mysql_mutex_assert_not_owner(&granted_thd->LOCK_thd_data); + mysql_mutex_assert_not_owner(&granted_thd->LOCK_thd_kill); } else { @@ -2401,6 +2414,7 @@ void wsrep_handle_mdl_conflict(MDL_context *requestor_ctx, static bool abort_replicated(THD *thd) { bool ret_code= false; + wsrep_thd_LOCK(thd); if (thd->wsrep_trx().state() == wsrep::transaction::s_committing) { WSREP_DEBUG("aborting replicated trx: %llu", (ulonglong)(thd->real_id)); @@ -2408,6 +2422,9 @@ static bool abort_replicated(THD *thd) (void)wsrep_abort_thd(thd, thd, TRUE); ret_code= true; } + else + wsrep_thd_UNLOCK(thd); + return ret_code; } @@ -2445,8 +2462,10 @@ static my_bool have_client_connections(THD *thd, void*) (longlong) thd->thread_id)); if (is_client_connection(thd) && thd->killed == KILL_CONNECTION) { + WSREP_DEBUG("Informing thread %lld that it's time to die", + thd->thread_id); (void)abort_replicated(thd); - return 1; + return true; } return 0; } @@ -2483,6 +2502,8 @@ static my_bool kill_all_threads(THD *thd, THD *caller_thd) { DBUG_PRINT("quit", ("Informing thread %lld that it's time to die", (longlong) thd->thread_id)); + WSREP_DEBUG("Informing thread %lld that it's time to die", + thd->thread_id); /* We skip slave threads & scheduler on this first loop through. */ if (is_client_connection(thd) && thd != caller_thd) { diff --git a/sql/wsrep_thd.cc b/sql/wsrep_thd.cc index 2e02110d697..3dca690c889 100644 --- a/sql/wsrep_thd.cc +++ b/sql/wsrep_thd.cc @@ -1,4 +1,4 @@ -/* Copyright (C) 2013 Codership Oy <info@codership.com> +/* Copyright (C) 2013-2021 Codership Oy <info@codership.com> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -314,7 +314,8 @@ int wsrep_abort_thd(THD *bf_thd_ptr, THD *victim_thd_ptr, my_bool signal) THD *victim_thd= (THD *) victim_thd_ptr; THD *bf_thd= (THD *) bf_thd_ptr; - mysql_mutex_lock(&victim_thd->LOCK_thd_data); + mysql_mutex_assert_owner(&victim_thd->LOCK_thd_data); + mysql_mutex_assert_owner(&victim_thd->LOCK_thd_kill); /* Note that when you use RSU node is desynced from cluster, thus WSREP(thd) might not be true. @@ -327,16 +328,14 @@ int wsrep_abort_thd(THD *bf_thd_ptr, THD *victim_thd_ptr, my_bool signal) { WSREP_DEBUG("wsrep_abort_thd, by: %llu, victim: %llu", (bf_thd) ? (long long)bf_thd->real_id : 0, (long long)victim_thd->real_id); - mysql_mutex_unlock(&victim_thd->LOCK_thd_data); ha_abort_transaction(bf_thd, victim_thd, signal); - mysql_mutex_lock(&victim_thd->LOCK_thd_data); } else { WSREP_DEBUG("wsrep_abort_thd not effective: %p %p", bf_thd, victim_thd); + wsrep_thd_UNLOCK(victim_thd); } - mysql_mutex_unlock(&victim_thd->LOCK_thd_data); DBUG_RETURN(1); } @@ -345,6 +344,9 @@ bool wsrep_bf_abort(THD* bf_thd, THD* victim_thd) WSREP_LOG_THD(bf_thd, "BF aborter before"); WSREP_LOG_THD(victim_thd, "victim before"); + mysql_mutex_assert_owner(&victim_thd->LOCK_thd_data); + mysql_mutex_assert_owner(&victim_thd->LOCK_thd_kill); + DBUG_EXECUTE_IF("sync.wsrep_bf_abort", { const char act[]= @@ -358,7 +360,7 @@ bool wsrep_bf_abort(THD* bf_thd, THD* victim_thd) if (WSREP(victim_thd) && !victim_thd->wsrep_trx().active()) { WSREP_DEBUG("wsrep_bf_abort, BF abort for non active transaction"); - switch (victim_thd->wsrep_trx().state()) + switch (victim_thd->wsrep_trx().state()) { case wsrep::transaction::s_aborting: /* fall through */ case wsrep::transaction::s_aborted: @@ -367,7 +369,13 @@ bool wsrep_bf_abort(THD* bf_thd, THD* victim_thd) default: break; } + /* Test: galera_create_table_as_select. Here we enter wsrep-lib + were LOCK_thd_data will be acquired, thus we need to release it. + However, we can still hold LOCK_thd_kill to protect from + disconnect or delete. */ + mysql_mutex_unlock(&victim_thd->LOCK_thd_data); wsrep_start_transaction(victim_thd, victim_thd->wsrep_next_trx_id()); + mysql_mutex_lock(&victim_thd->LOCK_thd_data); } bool ret; @@ -375,11 +383,21 @@ bool wsrep_bf_abort(THD* bf_thd, THD* victim_thd) if (wsrep_thd_is_toi(bf_thd)) { + /* Here we enter wsrep-lib were LOCK_thd_data will be acquired, + thus we need to release it. However, we can still hold + LOCK_thd_kill to protect from disconnect or delete. */ + mysql_mutex_unlock(&victim_thd->LOCK_thd_data); ret= victim_thd->wsrep_cs().total_order_bf_abort(bf_seqno); + mysql_mutex_lock(&victim_thd->LOCK_thd_data); } else { + /* Test: mysql-wsrep-features#165. Here we enter wsrep-lib + were LOCK_thd_data will be acquired and later LOCK_thd_kill + thus we need to release them. */ + wsrep_thd_UNLOCK(victim_thd); ret= victim_thd->wsrep_cs().bf_abort(bf_seqno); + wsrep_thd_LOCK(victim_thd); } if (ret) { diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc index 9188b2c8f47..23df38c64ab 100644 --- a/storage/innobase/handler/ha_innodb.cc +++ b/storage/innobase/handler/ha_innodb.cc @@ -5193,7 +5193,7 @@ static void innobase_kill_query(handlerton*, THD *thd, enum thd_kill_levels) { ut_ad(trx->mysql_thd == thd); #ifdef WITH_WSREP - if (trx->is_wsrep() && wsrep_thd_is_aborting(thd)) + if (wsrep_thd_is_aborting(thd) || trx->lock.was_chosen_as_wsrep_victim) /* if victim has been signaled by BF thread and/or aborting is already progressing, following query aborting is not necessary any more. Also, BF thread should own trx mutex for the victim. */ @@ -5203,6 +5203,8 @@ static void innobase_kill_query(handlerton*, THD *thd, enum thd_kill_levels) if (lock_t *lock= trx->lock.wait_lock) { trx_mutex_enter(trx); + if (trx->is_wsrep() && wsrep_thd_is_aborting(thd)) + trx->lock.was_chosen_as_deadlock_victim= TRUE; lock_cancel_waiting_and_release(lock); trx_mutex_exit(trx); } @@ -18604,6 +18606,40 @@ static struct st_mysql_storage_engine innobase_storage_engine= #ifdef WITH_WSREP +static +void +wsrep_kill_victim( + MYSQL_THD const bf_thd, + MYSQL_THD thd, + trx_t* victim_trx, + my_bool signal) +{ + DBUG_ENTER("wsrep_kill_victim"); + + /* Mark transaction as a victim for Galera abort */ + victim_trx->lock.was_chosen_as_wsrep_victim= true; + if (wsrep_thd_set_wsrep_aborter(bf_thd, thd)) + { + WSREP_DEBUG("innodb kill transaction skipped due to wsrep_aborter set"); + wsrep_thd_UNLOCK(thd); + DBUG_VOID_RETURN; + } + + if (wsrep_thd_bf_abort(bf_thd, thd, signal)) + { + lock_t* wait_lock= victim_trx->lock.wait_lock; + if (wait_lock) + { + DBUG_ASSERT(victim_trx->is_wsrep()); + WSREP_DEBUG("victim has wait flag: %lu", thd_get_thread_id(thd)); + victim_trx->lock.was_chosen_as_deadlock_victim= TRUE; + lock_cancel_waiting_and_release(wait_lock); + } + } + + DBUG_VOID_RETURN; +} + /** This function is used to kill one transaction. This transaction was open on this node (not-yet-committed), and a @@ -18627,87 +18663,65 @@ comparison as in the local certification failure. @param[in] bf_thd Brute force (BF) thread @param[in,out] victim_trx Vimtim trx to be killed @param[in] signal Should victim be signaled */ -UNIV_INTERN void wsrep_innobase_kill_one_trx( - THD* bf_thd, + MYSQL_THD const bf_thd, trx_t *victim_trx, - bool signal) + my_bool signal) { - ut_ad(bf_thd); - ut_ad(victim_trx); - ut_ad(lock_mutex_own()); - ut_ad(trx_mutex_own(victim_trx)); - - DBUG_ENTER("wsrep_innobase_kill_one_trx"); - - THD *thd= (THD *) victim_trx->mysql_thd; - ut_ad(thd); - /* Note that bf_trx might not exist here e.g. on MDL conflict - case (test: galera_concurrent_ctas). Similarly, BF thread - could be also acquiring MDL-lock causing victim to be - aborted. However, we have not yet called innobase_trx_init() - for BF transaction (test: galera_many_columns)*/ - trx_t* bf_trx= thd_to_trx(bf_thd); - DBUG_ASSERT(wsrep_on(bf_thd)); - - wsrep_thd_LOCK(thd); - - WSREP_LOG_CONFLICT(bf_thd, thd, TRUE); - - WSREP_DEBUG("Aborter %s trx_id: " TRX_ID_FMT " thread: %ld " - "seqno: %lld client_state: %s client_mode: %s transaction_mode: %s " - "query: %s", - wsrep_thd_is_BF(bf_thd, false) ? "BF" : "normal", - bf_trx ? bf_trx->id : TRX_ID_MAX, - thd_get_thread_id(bf_thd), - wsrep_thd_trx_seqno(bf_thd), - wsrep_thd_client_state_str(bf_thd), - wsrep_thd_client_mode_str(bf_thd), - wsrep_thd_transaction_state_str(bf_thd), - wsrep_thd_query(bf_thd)); - - WSREP_DEBUG("Victim %s trx_id: " TRX_ID_FMT " thread: %ld " - "seqno: %lld client_state: %s client_mode: %s transaction_mode: %s " - "query: %s", - wsrep_thd_is_BF(thd, false) ? "BF" : "normal", - victim_trx->id, - thd_get_thread_id(thd), - wsrep_thd_trx_seqno(thd), - wsrep_thd_client_state_str(thd), - wsrep_thd_client_mode_str(thd), - wsrep_thd_transaction_state_str(thd), - wsrep_thd_query(thd)); - - /* Mark transaction as a victim for Galera abort */ - victim_trx->lock.was_chosen_as_wsrep_victim= true; - if (wsrep_thd_set_wsrep_aborter(bf_thd, thd)) - { - WSREP_DEBUG("innodb kill transaction skipped due to wsrep_aborter set"); - wsrep_thd_UNLOCK(thd); - DBUG_VOID_RETURN; - } + ut_ad(bf_thd); + ut_ad(victim_trx); + ut_ad(lock_mutex_own()); + ut_ad(trx_mutex_own(victim_trx)); - /* Note that we need to release this as it will be acquired - below in wsrep-lib */ - wsrep_thd_UNLOCK(thd); - DEBUG_SYNC(bf_thd, "before_wsrep_thd_abort"); + DBUG_ENTER("wsrep_innobase_kill_one_trx"); + THD *thd= (THD *) victim_trx->mysql_thd; + /* Note that bf_trx might not exist here e.g. on MDL conflict + case (test: galera_concurrent_ctas).*/ + trx_t* bf_trx= (trx_t*)thd_to_trx(bf_thd); - if (wsrep_thd_bf_abort(bf_thd, thd, signal)) - { - lock_t* wait_lock = victim_trx->lock.wait_lock; - if (wait_lock) { - DBUG_ASSERT(victim_trx->is_wsrep()); - WSREP_DEBUG("victim has wait flag: %lu", - thd_get_thread_id(thd)); - - WSREP_DEBUG("canceling wait lock"); - victim_trx->lock.was_chosen_as_deadlock_victim= TRUE; - lock_cancel_waiting_and_release(wait_lock); - } - } + if (!thd) + { + WSREP_WARN("no THD for trx: " TRX_ID_FMT, victim_trx->id); + DBUG_VOID_RETURN; + } - DBUG_VOID_RETURN; + /* Here we need to lock THD::LOCK_thd_data to protect from + concurrent usage or disconnect or delete. */ + DEBUG_SYNC(bf_thd, "wsrep_before_BF_victim_lock"); + wsrep_thd_LOCK(thd); + DEBUG_SYNC(bf_thd, "wsrep_after_BF_victim_lock"); + + WSREP_LOG_CONFLICT(bf_thd, thd, TRUE); + + WSREP_DEBUG("wsrep_innobase_kill_one_trx: Aborter %s " + "trx_id: " TRX_ID_FMT " thread: %ld " + "seqno: %lld client_state: %s client_mode: %s " + "trx_state %s query: %s", + wsrep_thd_is_BF(bf_thd, false) ? "BF" : "normal", + bf_trx ? bf_trx->id : TRX_ID_MAX, + thd_get_thread_id(bf_thd), + wsrep_thd_trx_seqno(bf_thd), + wsrep_thd_client_state_str(bf_thd), + wsrep_thd_client_mode_str(bf_thd), + wsrep_thd_transaction_state_str(bf_thd), + wsrep_thd_query(bf_thd)); + + WSREP_DEBUG("wsrep_innobase_kill_one_trx: Victim %s " + "trx_id: " TRX_ID_FMT " thread: %ld " + "seqno: %lld client_state: %s client_mode: %s " + "trx_state %s query: %s", + wsrep_thd_is_BF(thd, false) ? "BF" : "normal", + victim_trx->id, + thd_get_thread_id(thd), + wsrep_thd_trx_seqno(thd), + wsrep_thd_client_state_str(thd), + wsrep_thd_client_mode_str(thd), + wsrep_thd_transaction_state_str(thd), + wsrep_thd_query(thd)); + + wsrep_kill_victim(bf_thd, thd, victim_trx, signal); + DBUG_VOID_RETURN; } /** This function forces the victim transaction to abort. Aborting the @@ -18727,30 +18741,43 @@ wsrep_abort_transaction( THD *victim_thd, my_bool signal) { - DBUG_ENTER("wsrep_abort_transaction"); - ut_ad(bf_thd); - ut_ad(victim_thd); - - trx_t* victim_trx = thd_to_trx(victim_thd); - - WSREP_DEBUG("abort transaction: BF: %s victim: %s victim conf: %s", - wsrep_thd_query(bf_thd), - wsrep_thd_query(victim_thd), - wsrep_thd_transaction_state_str(victim_thd)); - - if (victim_trx) { - lock_mutex_enter(); - trx_mutex_enter(victim_trx); - wsrep_innobase_kill_one_trx(bf_thd, victim_trx, signal); - trx_mutex_exit(victim_trx); - lock_mutex_exit(); - wsrep_srv_conc_cancel_wait(victim_trx); - DBUG_VOID_RETURN; - } else { - wsrep_thd_bf_abort(bf_thd, victim_thd, signal); - } - - DBUG_VOID_RETURN; + /* Note that victim thd is protected with + THD::LOCK_thd_data and THD::LOCK_thd_kill here. */ + trx_t* victim_trx= thd_to_trx(victim_thd); + trx_t* bf_trx= thd_to_trx(bf_thd); + WSREP_DEBUG("wsrep_abort_transaction: BF:" + " thread %ld client_state %s client_mode %s" + " trans_state %s query %s trx " TRX_ID_FMT, + thd_get_thread_id(bf_thd), + wsrep_thd_client_state_str(bf_thd), + wsrep_thd_client_mode_str(bf_thd), + wsrep_thd_transaction_state_str(bf_thd), + wsrep_thd_query(bf_thd), + bf_trx ? bf_trx->id : 0); + + WSREP_DEBUG("wsrep_abort_transaction: victim:" + " thread %ld client_state %s client_mode %s" + " trans_state %s query %s trx " TRX_ID_FMT, + thd_get_thread_id(victim_thd), + wsrep_thd_client_state_str(victim_thd), + wsrep_thd_client_mode_str(victim_thd), + wsrep_thd_transaction_state_str(victim_thd), + wsrep_thd_query(victim_thd), + victim_trx ? victim_trx->id : 0); + + if (victim_trx) + { + lock_mutex_enter(); + trx_mutex_enter(victim_trx); + wsrep_kill_victim(bf_thd, victim_thd, victim_trx, signal); + lock_mutex_exit(); + trx_mutex_exit(victim_trx); + wsrep_srv_conc_cancel_wait(victim_trx); + } + else + { + wsrep_thd_bf_abort(bf_thd, victim_thd, signal); + } } static diff --git a/storage/innobase/include/ha_prototypes.h b/storage/innobase/include/ha_prototypes.h index 108f6925ef7..c5971bc75aa 100644 --- a/storage/innobase/include/ha_prototypes.h +++ b/storage/innobase/include/ha_prototypes.h @@ -230,12 +230,11 @@ innobase_casedn_str( char* a); /*!< in/out: string to put in lower case */ #ifdef WITH_WSREP -UNIV_INTERN void wsrep_innobase_kill_one_trx( THD* bf_thd, trx_t *victim_trx, - bool signal); + my_bool signal); ulint wsrep_innobase_mysql_sort(int mysql_type, uint charset_number, unsigned char* str, unsigned int str_length, diff --git a/storage/innobase/lock/lock0wait.cc b/storage/innobase/lock/lock0wait.cc index 59405e4c1ad..5eb03f668b3 100644 --- a/storage/innobase/lock/lock0wait.cc +++ b/storage/innobase/lock/lock0wait.cc @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2014, 2020, MariaDB Corporation. +Copyright (c) 2014, 2021, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -184,13 +184,11 @@ lock_wait_table_reserve_slot( check if lock timeout was for priority thread, as a side effect trigger lock monitor @param[in] trx transaction owning the lock -@param[in] locked true if trx and lock_sys.mutex is ownd @return false for regular lock timeout */ static bool wsrep_is_BF_lock_timeout( - const trx_t* trx, - bool locked = true) + const trx_t* trx) { bool long_wait= (trx->error_state != DB_DEADLOCK && trx->is_wsrep() && @@ -204,21 +202,10 @@ wsrep_is_BF_lock_timeout( ib::info() << "WSREP: BF lock wait long for trx:" << trx->id << " query: " << wsrep_thd_query(trx->mysql_thd); - if (!locked) - lock_mutex_enter(); - - ut_ad(lock_mutex_own()); - - trx_print_latched(stderr, trx, 3000); - /* Note this will release lock_sys mutex */ - lock_print_info_all_transactions(stderr); - - if (locked) - lock_mutex_enter(); - return was_wait; - } else + } else { return false; + } } #endif /* WITH_WSREP */ @@ -404,7 +391,7 @@ lock_wait_suspend_thread( && wait_time > (double) lock_wait_timeout #ifdef WITH_WSREP && (!trx->is_wsrep() - || (!wsrep_is_BF_lock_timeout(trx, false) + || (!wsrep_is_BF_lock_timeout(trx) && trx->error_state != DB_DEADLOCK)) #endif /* WITH_WSREP */ ) { diff --git a/storage/innobase/srv/srv0conc.cc b/storage/innobase/srv/srv0conc.cc index ba264ba7d6b..ed02fc5c396 100644 --- a/storage/innobase/srv/srv0conc.cc +++ b/storage/innobase/srv/srv0conc.cc @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 2011, 2015, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2015, 2020, MariaDB Corporation. +Copyright (c) 2015, 2021, MariaDB Corporation. Portions of this file contain modifications contributed and copyrighted by Google, Inc. Those modifications are gratefully acknowledged and are described @@ -44,6 +44,8 @@ Created 2011/04/18 Sunny Bains #include "dict0dict.h" #include <mysql/service_thd_wait.h> #include <mysql/service_wsrep.h> +#include "wsrep.h" +#include "log.h" /** Number of times a thread is allowed to enter InnoDB within the same SQL query after it has once got the ticket. */ @@ -121,19 +123,15 @@ srv_conc_enter_innodb_with_atomics( /* We need to take `thd->LOCK_thd_data` to check WSREP thread state */ if (trx->is_wsrep()) { wsrep_thd_LOCK(trx->mysql_thd); - } - if (trx->is_wsrep() && wsrep_thd_is_aborting(trx->mysql_thd)) { - wsrep_thd_UNLOCK(trx->mysql_thd); - if (UNIV_UNLIKELY(wsrep_debug)) { - ib::info() << - "srv_conc_enter due to MUST_ABORT"; + + if (wsrep_thd_is_aborting(trx->mysql_thd)) { + WSREP_DEBUG("srv_conc_enter due to MUST_ABORT for" + TRX_ID_FMT, trx->id); } + wsrep_thd_UNLOCK(trx->mysql_thd); srv_conc_force_enter_innodb(trx); return; } - if (trx->is_wsrep()) { - wsrep_thd_UNLOCK(trx->mysql_thd); - } #endif /* WITH_WSREP */ if (srv_thread_concurrency == 0) { |