summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--mysql-test/suite/galera/t/galera_bf_kill_debug.test2
-rw-r--r--sql/service_wsrep.cc16
-rw-r--r--sql/slave.cc2
-rw-r--r--sql/sql_class.cc11
-rw-r--r--sql/sql_class.h4
-rw-r--r--sql/sql_parse.cc3
-rw-r--r--sql/sql_repl.cc2
-rw-r--r--sql/wsrep_client_service.cc7
-rw-r--r--sql/wsrep_server_service.cc9
-rw-r--r--storage/innobase/handler/ha_innodb.cc214
10 files changed, 94 insertions, 176 deletions
diff --git a/mysql-test/suite/galera/t/galera_bf_kill_debug.test b/mysql-test/suite/galera/t/galera_bf_kill_debug.test
index c322f283757..b687a5a6a67 100644
--- a/mysql-test/suite/galera/t/galera_bf_kill_debug.test
+++ b/mysql-test/suite/galera/t/galera_bf_kill_debug.test
@@ -84,7 +84,7 @@ SET DEBUG_SYNC = "now SIGNAL continue_kill";
--reap
--connection node_2a
---error 0,1213,2013
+--error 0,1213
select * from t1;
--connection node_2
diff --git a/sql/service_wsrep.cc b/sql/service_wsrep.cc
index 67735972400..19ec3d948c4 100644
--- a/sql/service_wsrep.cc
+++ b/sql/service_wsrep.cc
@@ -217,8 +217,16 @@ extern "C" void wsrep_handle_SR_rollback(THD *bf_thd,
extern "C" my_bool wsrep_thd_bf_abort(THD *bf_thd, THD *victim_thd,
my_bool signal)
{
- mysql_mutex_assert_owner(&victim_thd->LOCK_thd_kill);
- mysql_mutex_assert_not_owner(&victim_thd->LOCK_thd_data);
+ DBUG_EXECUTE_IF("sync.before_wsrep_thd_abort",
+ {
+ const char act[]=
+ "now "
+ "SIGNAL sync.before_wsrep_thd_abort_reached "
+ "WAIT_FOR signal.before_wsrep_thd_abort";
+ DBUG_ASSERT(!debug_sync_set_action(bf_thd,
+ STRING_WITH_LEN(act)));
+ };);
+
my_bool ret= wsrep_bf_abort(bf_thd, victim_thd);
/*
Send awake signal if victim was BF aborted or does not
@@ -227,6 +235,8 @@ extern "C" my_bool wsrep_thd_bf_abort(THD *bf_thd, THD *victim_thd,
*/
if ((ret || !wsrep_on(victim_thd)) && signal)
{
+ mysql_mutex_assert_not_owner(&victim_thd->LOCK_thd_data);
+ mysql_mutex_assert_not_owner(&victim_thd->LOCK_thd_kill);
mysql_mutex_lock(&victim_thd->LOCK_thd_data);
if (victim_thd->wsrep_aborter && victim_thd->wsrep_aborter != bf_thd->thread_id)
@@ -237,8 +247,10 @@ extern "C" my_bool wsrep_thd_bf_abort(THD *bf_thd, THD *victim_thd,
return false;
}
+ mysql_mutex_lock(&victim_thd->LOCK_thd_kill);
victim_thd->wsrep_aborter= bf_thd->thread_id;
victim_thd->awake_no_mutex(KILL_QUERY);
+ mysql_mutex_unlock(&victim_thd->LOCK_thd_kill);
mysql_mutex_unlock(&victim_thd->LOCK_thd_data);
} else {
WSREP_DEBUG("wsrep_thd_bf_abort skipped awake");
diff --git a/sql/slave.cc b/sql/slave.cc
index 62ceb07b224..c63eee605f5 100644
--- a/sql/slave.cc
+++ b/sql/slave.cc
@@ -1072,8 +1072,8 @@ terminate_slave_thread(THD *thd,
int error __attribute__((unused));
DBUG_PRINT("loop", ("killing slave thread"));
- mysql_mutex_lock(&thd->LOCK_thd_kill);
mysql_mutex_lock(&thd->LOCK_thd_data);
+ mysql_mutex_lock(&thd->LOCK_thd_kill);
#ifndef DONT_USE_THR_ALARM
/*
Error codes from pthread_kill are:
diff --git a/sql/sql_class.cc b/sql/sql_class.cc
index 883e9c688ff..b8b09b2ef94 100644
--- a/sql/sql_class.cc
+++ b/sql/sql_class.cc
@@ -826,7 +826,6 @@ THD::THD(my_thread_id id, bool is_wsrep_applier)
mysql_mutex_init(key_LOCK_wakeup_ready, &LOCK_wakeup_ready, MY_MUTEX_INIT_FAST);
mysql_mutex_init(key_LOCK_thd_kill, &LOCK_thd_kill, MY_MUTEX_INIT_FAST);
mysql_cond_init(key_COND_wakeup_ready, &COND_wakeup_ready, 0);
- mysql_mutex_record_order(&LOCK_thd_kill, &LOCK_thd_data);
/* Variables with default values */
proc_info="login";
@@ -5289,13 +5288,11 @@ thd_need_ordering_with(const MYSQL_THD thd, const MYSQL_THD other_thd)
#ifdef WITH_WSREP
/* wsrep applier, replayer and TOI processing threads are ordered
by replication provider, relaxed GAP locking protocol can be used
- between high priority wsrep threads.
- Note that wsrep_thd_is_BF() doesn't take LOCK_thd_data for either thd,
- the caller should guarantee that the BF state won't change.
- (e.g. InnoDB does it by keeping lock_sys.mutex locked)
+ between high priority wsrep threads
*/
- if (WSREP_ON && wsrep_thd_is_BF(thd, false) &&
- wsrep_thd_is_BF(other_thd, false))
+ if (WSREP_ON &&
+ wsrep_thd_is_BF(const_cast<THD *>(thd), false) &&
+ wsrep_thd_is_BF(const_cast<THD *>(other_thd), true))
return 0;
#endif /* WITH_WSREP */
rgi= thd->rgi_slave;
diff --git a/sql/sql_class.h b/sql/sql_class.h
index 7aac98eccb2..81452cda035 100644
--- a/sql/sql_class.h
+++ b/sql/sql_class.h
@@ -3470,11 +3470,11 @@ public:
void awake_no_mutex(killed_state state_to_set);
void awake(killed_state state_to_set)
{
- mysql_mutex_lock(&LOCK_thd_kill);
mysql_mutex_lock(&LOCK_thd_data);
+ mysql_mutex_lock(&LOCK_thd_kill);
awake_no_mutex(state_to_set);
- mysql_mutex_unlock(&LOCK_thd_data);
mysql_mutex_unlock(&LOCK_thd_kill);
+ mysql_mutex_unlock(&LOCK_thd_data);
}
void abort_current_cond_wait(bool force);
diff --git a/sql/sql_parse.cc b/sql/sql_parse.cc
index 5a2897868a0..af30aa6fde9 100644
--- a/sql/sql_parse.cc
+++ b/sql/sql_parse.cc
@@ -9248,6 +9248,7 @@ THD *find_thread_by_id(longlong id, bool query_id)
return arg.thd;
}
+ mysql_mutex_lock(&thd->LOCK_thd_data);
/**
kill one thread.
@@ -9372,8 +9373,8 @@ static my_bool kill_threads_callback(THD *thd, kill_threads_callback_arg *arg)
return 1;
if (!arg->threads_to_kill.push_back(thd, arg->thd->mem_root))
{
- mysql_mutex_lock(&thd->LOCK_thd_kill); // Lock from delete
mysql_mutex_lock(&thd->LOCK_thd_data);
+ mysql_mutex_lock(&thd->LOCK_thd_kill); // Lock from delete
}
}
}
diff --git a/sql/sql_repl.cc b/sql/sql_repl.cc
index e15980e9b3c..83806e6dbea 100644
--- a/sql/sql_repl.cc
+++ b/sql/sql_repl.cc
@@ -3498,8 +3498,8 @@ static my_bool kill_callback(THD *thd, kill_callback_arg *arg)
thd->variables.server_id == arg->slave_server_id)
{
arg->thd= thd;
- mysql_mutex_lock(&thd->LOCK_thd_kill); // Lock from delete
mysql_mutex_lock(&thd->LOCK_thd_data);
+ mysql_mutex_lock(&thd->LOCK_thd_kill); // Lock from delete
return 1;
}
return 0;
diff --git a/sql/wsrep_client_service.cc b/sql/wsrep_client_service.cc
index 6ab2834a219..c2f49dbdd6b 100644
--- a/sql/wsrep_client_service.cc
+++ b/sql/wsrep_client_service.cc
@@ -68,13 +68,20 @@ bool Wsrep_client_service::interrupted(
wsrep::unique_lock<wsrep::mutex>& lock WSREP_UNUSED) const
{
DBUG_ASSERT(m_thd == current_thd);
+ /* Underlying mutex in lock object points to LOCK_thd_data, which
+ protects m_thd->wsrep_trx(), LOCK_thd_kill protects m_thd->killed.
+ Locking order is:
+ 1) LOCK_thd_data
+ 2) LOCK_thd_kill */
mysql_mutex_assert_owner(static_cast<mysql_mutex_t*>(lock.mutex()->native()));
+ mysql_mutex_lock(&m_thd->LOCK_thd_kill);
bool ret= (m_thd->killed != NOT_KILLED);
if (ret)
{
WSREP_DEBUG("wsrep state is interrupted, THD::killed %d trx state %d",
m_thd->killed, m_thd->wsrep_trx().state());
}
+ mysql_mutex_unlock(&m_thd->LOCK_thd_kill);
return ret;
}
diff --git a/sql/wsrep_server_service.cc b/sql/wsrep_server_service.cc
index 4916005e7ed..72c98de3f81 100644
--- a/sql/wsrep_server_service.cc
+++ b/sql/wsrep_server_service.cc
@@ -41,7 +41,6 @@ static void init_service_thd(THD* thd, char* thread_stack)
thd->prior_thr_create_utime= thd->start_utime= microsecond_interval_timer();
thd->set_command(COM_SLEEP);
thd->reset_for_next_command(true);
- server_threads.insert(thd); // as wsrep_innobase_kill_one_trx() uses find_thread_by_id()
}
Wsrep_storage_service*
@@ -81,7 +80,6 @@ void Wsrep_server_service::release_storage_service(
static_cast<Wsrep_storage_service*>(storage_service);
THD* thd= ss->m_thd;
wsrep_reset_threadvars(thd);
- server_threads.erase(thd);
delete ss;
delete thd;
}
@@ -95,8 +93,7 @@ wsrep_create_streaming_applier(THD *orig_thd, const char *ctx)
streaming transaction is BF aborted and streaming applier
is created from BF aborter context. */
Wsrep_threadvars saved_threadvars(wsrep_save_threadvars());
- if (saved_threadvars.cur_thd)
- wsrep_reset_threadvars(saved_threadvars.cur_thd);
+ wsrep_reset_threadvars(saved_threadvars.cur_thd);
THD *thd= 0;
Wsrep_applier_service *ret= 0;
if (!wsrep_create_threadvars() &&
@@ -113,8 +110,7 @@ wsrep_create_streaming_applier(THD *orig_thd, const char *ctx)
}
/* Restore original thread local storage state before returning. */
wsrep_restore_threadvars(saved_threadvars);
- if (saved_threadvars.cur_thd)
- wsrep_store_threadvars(saved_threadvars.cur_thd);
+ wsrep_store_threadvars(saved_threadvars.cur_thd);
return ret;
}
@@ -143,7 +139,6 @@ void Wsrep_server_service::release_high_priority_service(wsrep::high_priority_se
THD* thd= hps->m_thd;
delete hps;
wsrep_store_threadvars(thd);
- server_threads.erase(thd);
delete thd;
wsrep_delete_threadvars();
}
diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc
index 6db3908e6da..9026b5844dd 100644
--- a/storage/innobase/handler/ha_innodb.cc
+++ b/storage/innobase/handler/ha_innodb.cc
@@ -63,7 +63,6 @@ this program; if not, write to the Free Software Foundation, Inc.,
#include <my_service_manager.h>
#include <key.h>
-#include <sql_manager.h>
/* Include necessary InnoDB headers */
#include "btr0btr.h"
@@ -4772,8 +4771,6 @@ static void innobase_kill_query(handlerton*, THD *thd, enum thd_kill_levels)
if (lock_t *lock= trx->lock.wait_lock)
{
trx_mutex_enter(trx);
- if (trx->is_wsrep() && wsrep_thd_is_aborting(thd))
- trx->lock.was_chosen_as_deadlock_victim= TRUE;
lock_cancel_waiting_and_release(lock);
trx_mutex_exit(trx);
}
@@ -18642,67 +18639,62 @@ static struct st_mysql_storage_engine innobase_storage_engine=
#ifdef WITH_WSREP
-struct bg_wsrep_kill_trx_arg {
- my_thread_id thd_id, bf_thd_id;
- trx_id_t trx_id, bf_trx_id;
- bool signal;
-};
-
-/** Kill one transaction from a background manager thread
+/** This function is used to kill one transaction.
-wsrep_innobase_kill_one_trx() is invoked when lock_sys.mutex and trx mutex
-are taken, wsrep_thd_bf_abort() cannot be used there as it takes THD mutexes
-that must be taken before lock_sys.mutex and trx mutex. That's why
-wsrep_innobase_kill_one_trx only posts the killing task to the manager thread
-and the actual killing happens asynchronously here.
+This transaction was open on this node (not-yet-committed), and a
+conflicting writeset from some other node that was being applied
+caused a locking conflict. First committed (from other node)
+wins, thus open transaction is rolled back. BF stands for
+brute-force: any transaction can get aborted by galera any time
+it is necessary.
-As no mutexes were held we don't know whether THD or trx pointers are still
-valid, so we need to pass thread/trx ids and perform a lookup.
-*/
-static void bg_wsrep_kill_trx(void *void_arg)
-{
- bg_wsrep_kill_trx_arg *arg= (bg_wsrep_kill_trx_arg *)void_arg;
- THD *thd, *bf_thd;
- trx_t *victim_trx;
- bool aborting= false;
+This conflict can happen only when the replicated writeset (from
+other node) is being applied, not when it’s waiting in the queue.
+If our local transaction reached its COMMIT and this conflicting
+writeset was in the queue, then it should fail the local
+certification test instead.
- if ((bf_thd= find_thread_by_id(arg->bf_thd_id)))
- wsrep_thd_LOCK(bf_thd);
- if ((thd= find_thread_by_id(arg->thd_id)))
- wsrep_thd_LOCK(thd);
+A brute force abort is only triggered by a locking conflict
+between a writeset being applied by an applier thread (slave thread)
+and an open transaction on the node, not by a Galera writeset
+comparison as in the local certification failure.
- if (!thd || !bf_thd || !(victim_trx= thd_to_trx(thd)))
- goto ret0;
+@param[in] bf_thd Brute force (BF) thread
+@param[in,out] victim_trx Vimtim trx to be killed
+@param[in] signal Should victim be signaled */
+UNIV_INTERN
+void
+wsrep_innobase_kill_one_trx(
+ THD* bf_thd,
+ trx_t *victim_trx,
+ bool signal)
+{
+ ut_ad(bf_thd);
+ ut_ad(victim_trx);
+ ut_ad(lock_mutex_own());
+ ut_ad(trx_mutex_own(victim_trx));
- lock_mutex_enter();
- trx_mutex_enter(victim_trx);
- if (victim_trx->id != arg->trx_id
- || victim_trx->state == TRX_STATE_COMMITTED_IN_MEMORY)
- {
- /* Apparently victim trx was meanwhile rolled back or
- committed. Tell bf thd not to wait, in case it already
- started to. */
- trx_t *trx= thd_to_trx(bf_thd);
- if (!trx) {
- /* bf_thd might not be associated with a
- transaction, in case of MDL conflict */
- } else if (lock_t *lock = trx->lock.wait_lock) {
- trx_mutex_enter(trx);
- lock_cancel_waiting_and_release(lock);
- trx_mutex_exit(trx);
- }
- goto ret1;
- }
+ DBUG_ENTER("wsrep_innobase_kill_one_trx");
+ THD *thd= (THD *) victim_trx->mysql_thd;
+ ut_ad(thd);
+ /* Note that bf_trx might not exist here e.g. on MDL conflict
+ case (test: galera_concurrent_ctas). Similarly, BF thread
+ could be also acquiring MDL-lock causing victim to be
+ aborted. However, we have not yet called innobase_trx_init()
+ for BF transaction (test: galera_many_columns)*/
+ trx_t* bf_trx= thd_to_trx(bf_thd);
DBUG_ASSERT(wsrep_on(bf_thd));
+ wsrep_thd_LOCK(thd);
+
WSREP_LOG_CONFLICT(bf_thd, thd, TRUE);
WSREP_DEBUG("Aborter %s trx_id: " TRX_ID_FMT " thread: %ld "
"seqno: %lld client_state: %s client_mode: %s transaction_mode: %s "
"query: %s",
wsrep_thd_is_BF(bf_thd, false) ? "BF" : "normal",
- arg->bf_trx_id,
+ bf_trx ? bf_trx->id : TRX_ID_MAX,
thd_get_thread_id(bf_thd),
wsrep_thd_trx_seqno(bf_thd),
wsrep_thd_client_state_str(bf_thd),
@@ -18727,84 +18719,28 @@ static void bg_wsrep_kill_trx(void *void_arg)
if (wsrep_thd_set_wsrep_aborter(bf_thd, thd))
{
WSREP_DEBUG("innodb kill transaction skipped due to wsrep_aborter set");
- goto ret1;
+ wsrep_thd_UNLOCK(thd);
+ DBUG_VOID_RETURN;
}
- aborting= true;
+ /* Note that we need to release this as it will be acquired
+ below in wsrep-lib */
+ wsrep_thd_UNLOCK(thd);
+ DEBUG_SYNC(bf_thd, "before_wsrep_thd_abort");
-ret1:
- trx_mutex_exit(victim_trx);
- lock_mutex_exit();
-ret0:
- if (thd) {
- wsrep_thd_UNLOCK(thd);
- if (aborting) {
- DEBUG_SYNC(bf_thd, "before_wsrep_thd_abort");
- wsrep_thd_bf_abort(bf_thd, thd, arg->signal);
+ if (wsrep_thd_bf_abort(bf_thd, thd, signal))
+ {
+ lock_t* wait_lock = victim_trx->lock.wait_lock;
+ if (wait_lock) {
+ DBUG_ASSERT(victim_trx->is_wsrep());
+ WSREP_DEBUG("victim has wait flag: %lu",
+ thd_get_thread_id(thd));
+
+ WSREP_DEBUG("canceling wait lock");
+ victim_trx->lock.was_chosen_as_deadlock_victim= TRUE;
+ lock_cancel_waiting_and_release(wait_lock);
}
- wsrep_thd_kill_UNLOCK(thd);
- }
- if (bf_thd) {
- wsrep_thd_UNLOCK(bf_thd);
- wsrep_thd_kill_UNLOCK(bf_thd);
}
- free(arg);
-}
-
-/** This function is used to kill one transaction.
-
-This transaction was open on this node (not-yet-committed), and a
-conflicting writeset from some other node that was being applied
-caused a locking conflict. First committed (from other node)
-wins, thus open transaction is rolled back. BF stands for
-brute-force: any transaction can get aborted by galera any time
-it is necessary.
-
-This conflict can happen only when the replicated writeset (from
-other node) is being applied, not when it’s waiting in the queue.
-If our local transaction reached its COMMIT and this conflicting
-writeset was in the queue, then it should fail the local
-certification test instead.
-
-A brute force abort is only triggered by a locking conflict
-between a writeset being applied by an applier thread (slave thread)
-and an open transaction on the node, not by a Galera writeset
-comparison as in the local certification failure.
-
-@param[in] bf_thd Brute force (BF) thread
-@param[in,out] victim_trx Transaction to be killed
-@param[in] signal Should victim be signaled */
-void
-wsrep_innobase_kill_one_trx(
- THD* bf_thd,
- trx_t *victim_trx,
- bool signal)
-{
- ut_ad(bf_thd);
- ut_ad(victim_trx);
- ut_ad(lock_mutex_own());
- ut_ad(trx_mutex_own(victim_trx));
-
- DBUG_ENTER("wsrep_innobase_kill_one_trx");
-
- DBUG_EXECUTE_IF("sync.before_wsrep_thd_abort",
- {
- const char act[]=
- "now "
- "SIGNAL sync.before_wsrep_thd_abort_reached "
- "WAIT_FOR signal.before_wsrep_thd_abort";
- DBUG_ASSERT(!debug_sync_set_action(bf_thd,
- STRING_WITH_LEN(act)));
- };);
-
- trx_t* bf_trx= thd_to_trx(bf_thd);
- bg_wsrep_kill_trx_arg *arg = (bg_wsrep_kill_trx_arg*)malloc(sizeof(*arg));
- arg->thd_id = thd_get_thread_id(victim_trx->mysql_thd);
- arg->trx_id = victim_trx->id;
- arg->bf_thd_id = thd_get_thread_id(bf_thd);
- arg->bf_trx_id = bf_trx ? bf_trx->id : TRX_ID_MAX;
- arg->signal = signal;
- mysql_manager_submit(bg_wsrep_kill_trx, arg);
DBUG_VOID_RETURN;
}
@@ -18840,42 +18776,12 @@ wsrep_abort_transaction(
if (victim_trx) {
lock_mutex_enter();
trx_mutex_enter(victim_trx);
- victim_trx->lock.was_chosen_as_wsrep_victim= true;
+ wsrep_innobase_kill_one_trx(bf_thd, victim_trx, signal);
trx_mutex_exit(victim_trx);
lock_mutex_exit();
-
- wsrep_thd_kill_LOCK(victim_thd);
- wsrep_thd_LOCK(victim_thd);
- bool aborting= !wsrep_thd_set_wsrep_aborter(bf_thd, victim_thd);
- wsrep_thd_UNLOCK(victim_thd);
- if (aborting) {
- DEBUG_SYNC(bf_thd, "before_wsrep_thd_abort");
- DBUG_EXECUTE_IF("sync.before_wsrep_thd_abort",
- {
- const char act[]=
- "now "
- "SIGNAL sync.before_wsrep_thd_abort_reached "
- "WAIT_FOR signal.before_wsrep_thd_abort";
- DBUG_ASSERT(!debug_sync_set_action(bf_thd,
- STRING_WITH_LEN(act)));
- };);
- wsrep_thd_bf_abort(bf_thd, victim_thd, signal);
- }
- wsrep_thd_kill_UNLOCK(victim_thd);
DBUG_VOID_RETURN;
} else {
- DBUG_EXECUTE_IF("sync.before_wsrep_thd_abort",
- {
- const char act[]=
- "now "
- "SIGNAL sync.before_wsrep_thd_abort_reached "
- "WAIT_FOR signal.before_wsrep_thd_abort";
- DBUG_ASSERT(!debug_sync_set_action(bf_thd,
- STRING_WITH_LEN(act)));
- };);
- wsrep_thd_kill_LOCK(victim_thd);
wsrep_thd_bf_abort(bf_thd, victim_thd, signal);
- wsrep_thd_kill_UNLOCK(victim_thd);
}
DBUG_VOID_RETURN;