diff options
-rw-r--r-- | mysql-test/suite/galera/t/galera_bf_kill_debug.test | 2 | ||||
-rw-r--r-- | sql/service_wsrep.cc | 16 | ||||
-rw-r--r-- | sql/slave.cc | 2 | ||||
-rw-r--r-- | sql/sql_class.cc | 11 | ||||
-rw-r--r-- | sql/sql_class.h | 4 | ||||
-rw-r--r-- | sql/sql_parse.cc | 3 | ||||
-rw-r--r-- | sql/sql_repl.cc | 2 | ||||
-rw-r--r-- | sql/wsrep_client_service.cc | 7 | ||||
-rw-r--r-- | sql/wsrep_server_service.cc | 9 | ||||
-rw-r--r-- | storage/innobase/handler/ha_innodb.cc | 214 |
10 files changed, 94 insertions, 176 deletions
diff --git a/mysql-test/suite/galera/t/galera_bf_kill_debug.test b/mysql-test/suite/galera/t/galera_bf_kill_debug.test index c322f283757..b687a5a6a67 100644 --- a/mysql-test/suite/galera/t/galera_bf_kill_debug.test +++ b/mysql-test/suite/galera/t/galera_bf_kill_debug.test @@ -84,7 +84,7 @@ SET DEBUG_SYNC = "now SIGNAL continue_kill"; --reap --connection node_2a ---error 0,1213,2013 +--error 0,1213 select * from t1; --connection node_2 diff --git a/sql/service_wsrep.cc b/sql/service_wsrep.cc index 67735972400..19ec3d948c4 100644 --- a/sql/service_wsrep.cc +++ b/sql/service_wsrep.cc @@ -217,8 +217,16 @@ extern "C" void wsrep_handle_SR_rollback(THD *bf_thd, extern "C" my_bool wsrep_thd_bf_abort(THD *bf_thd, THD *victim_thd, my_bool signal) { - mysql_mutex_assert_owner(&victim_thd->LOCK_thd_kill); - mysql_mutex_assert_not_owner(&victim_thd->LOCK_thd_data); + DBUG_EXECUTE_IF("sync.before_wsrep_thd_abort", + { + const char act[]= + "now " + "SIGNAL sync.before_wsrep_thd_abort_reached " + "WAIT_FOR signal.before_wsrep_thd_abort"; + DBUG_ASSERT(!debug_sync_set_action(bf_thd, + STRING_WITH_LEN(act))); + };); + my_bool ret= wsrep_bf_abort(bf_thd, victim_thd); /* Send awake signal if victim was BF aborted or does not @@ -227,6 +235,8 @@ extern "C" my_bool wsrep_thd_bf_abort(THD *bf_thd, THD *victim_thd, */ if ((ret || !wsrep_on(victim_thd)) && signal) { + mysql_mutex_assert_not_owner(&victim_thd->LOCK_thd_data); + mysql_mutex_assert_not_owner(&victim_thd->LOCK_thd_kill); mysql_mutex_lock(&victim_thd->LOCK_thd_data); if (victim_thd->wsrep_aborter && victim_thd->wsrep_aborter != bf_thd->thread_id) @@ -237,8 +247,10 @@ extern "C" my_bool wsrep_thd_bf_abort(THD *bf_thd, THD *victim_thd, return false; } + mysql_mutex_lock(&victim_thd->LOCK_thd_kill); victim_thd->wsrep_aborter= bf_thd->thread_id; victim_thd->awake_no_mutex(KILL_QUERY); + mysql_mutex_unlock(&victim_thd->LOCK_thd_kill); mysql_mutex_unlock(&victim_thd->LOCK_thd_data); } else { WSREP_DEBUG("wsrep_thd_bf_abort skipped awake"); diff --git a/sql/slave.cc b/sql/slave.cc index 62ceb07b224..c63eee605f5 100644 --- a/sql/slave.cc +++ b/sql/slave.cc @@ -1072,8 +1072,8 @@ terminate_slave_thread(THD *thd, int error __attribute__((unused)); DBUG_PRINT("loop", ("killing slave thread")); - mysql_mutex_lock(&thd->LOCK_thd_kill); mysql_mutex_lock(&thd->LOCK_thd_data); + mysql_mutex_lock(&thd->LOCK_thd_kill); #ifndef DONT_USE_THR_ALARM /* Error codes from pthread_kill are: diff --git a/sql/sql_class.cc b/sql/sql_class.cc index 883e9c688ff..b8b09b2ef94 100644 --- a/sql/sql_class.cc +++ b/sql/sql_class.cc @@ -826,7 +826,6 @@ THD::THD(my_thread_id id, bool is_wsrep_applier) mysql_mutex_init(key_LOCK_wakeup_ready, &LOCK_wakeup_ready, MY_MUTEX_INIT_FAST); mysql_mutex_init(key_LOCK_thd_kill, &LOCK_thd_kill, MY_MUTEX_INIT_FAST); mysql_cond_init(key_COND_wakeup_ready, &COND_wakeup_ready, 0); - mysql_mutex_record_order(&LOCK_thd_kill, &LOCK_thd_data); /* Variables with default values */ proc_info="login"; @@ -5289,13 +5288,11 @@ thd_need_ordering_with(const MYSQL_THD thd, const MYSQL_THD other_thd) #ifdef WITH_WSREP /* wsrep applier, replayer and TOI processing threads are ordered by replication provider, relaxed GAP locking protocol can be used - between high priority wsrep threads. - Note that wsrep_thd_is_BF() doesn't take LOCK_thd_data for either thd, - the caller should guarantee that the BF state won't change. - (e.g. InnoDB does it by keeping lock_sys.mutex locked) + between high priority wsrep threads */ - if (WSREP_ON && wsrep_thd_is_BF(thd, false) && - wsrep_thd_is_BF(other_thd, false)) + if (WSREP_ON && + wsrep_thd_is_BF(const_cast<THD *>(thd), false) && + wsrep_thd_is_BF(const_cast<THD *>(other_thd), true)) return 0; #endif /* WITH_WSREP */ rgi= thd->rgi_slave; diff --git a/sql/sql_class.h b/sql/sql_class.h index 7aac98eccb2..81452cda035 100644 --- a/sql/sql_class.h +++ b/sql/sql_class.h @@ -3470,11 +3470,11 @@ public: void awake_no_mutex(killed_state state_to_set); void awake(killed_state state_to_set) { - mysql_mutex_lock(&LOCK_thd_kill); mysql_mutex_lock(&LOCK_thd_data); + mysql_mutex_lock(&LOCK_thd_kill); awake_no_mutex(state_to_set); - mysql_mutex_unlock(&LOCK_thd_data); mysql_mutex_unlock(&LOCK_thd_kill); + mysql_mutex_unlock(&LOCK_thd_data); } void abort_current_cond_wait(bool force); diff --git a/sql/sql_parse.cc b/sql/sql_parse.cc index 5a2897868a0..af30aa6fde9 100644 --- a/sql/sql_parse.cc +++ b/sql/sql_parse.cc @@ -9248,6 +9248,7 @@ THD *find_thread_by_id(longlong id, bool query_id) return arg.thd; } + mysql_mutex_lock(&thd->LOCK_thd_data); /** kill one thread. @@ -9372,8 +9373,8 @@ static my_bool kill_threads_callback(THD *thd, kill_threads_callback_arg *arg) return 1; if (!arg->threads_to_kill.push_back(thd, arg->thd->mem_root)) { - mysql_mutex_lock(&thd->LOCK_thd_kill); // Lock from delete mysql_mutex_lock(&thd->LOCK_thd_data); + mysql_mutex_lock(&thd->LOCK_thd_kill); // Lock from delete } } } diff --git a/sql/sql_repl.cc b/sql/sql_repl.cc index e15980e9b3c..83806e6dbea 100644 --- a/sql/sql_repl.cc +++ b/sql/sql_repl.cc @@ -3498,8 +3498,8 @@ static my_bool kill_callback(THD *thd, kill_callback_arg *arg) thd->variables.server_id == arg->slave_server_id) { arg->thd= thd; - mysql_mutex_lock(&thd->LOCK_thd_kill); // Lock from delete mysql_mutex_lock(&thd->LOCK_thd_data); + mysql_mutex_lock(&thd->LOCK_thd_kill); // Lock from delete return 1; } return 0; diff --git a/sql/wsrep_client_service.cc b/sql/wsrep_client_service.cc index 6ab2834a219..c2f49dbdd6b 100644 --- a/sql/wsrep_client_service.cc +++ b/sql/wsrep_client_service.cc @@ -68,13 +68,20 @@ bool Wsrep_client_service::interrupted( wsrep::unique_lock<wsrep::mutex>& lock WSREP_UNUSED) const { DBUG_ASSERT(m_thd == current_thd); + /* Underlying mutex in lock object points to LOCK_thd_data, which + protects m_thd->wsrep_trx(), LOCK_thd_kill protects m_thd->killed. + Locking order is: + 1) LOCK_thd_data + 2) LOCK_thd_kill */ mysql_mutex_assert_owner(static_cast<mysql_mutex_t*>(lock.mutex()->native())); + mysql_mutex_lock(&m_thd->LOCK_thd_kill); bool ret= (m_thd->killed != NOT_KILLED); if (ret) { WSREP_DEBUG("wsrep state is interrupted, THD::killed %d trx state %d", m_thd->killed, m_thd->wsrep_trx().state()); } + mysql_mutex_unlock(&m_thd->LOCK_thd_kill); return ret; } diff --git a/sql/wsrep_server_service.cc b/sql/wsrep_server_service.cc index 4916005e7ed..72c98de3f81 100644 --- a/sql/wsrep_server_service.cc +++ b/sql/wsrep_server_service.cc @@ -41,7 +41,6 @@ static void init_service_thd(THD* thd, char* thread_stack) thd->prior_thr_create_utime= thd->start_utime= microsecond_interval_timer(); thd->set_command(COM_SLEEP); thd->reset_for_next_command(true); - server_threads.insert(thd); // as wsrep_innobase_kill_one_trx() uses find_thread_by_id() } Wsrep_storage_service* @@ -81,7 +80,6 @@ void Wsrep_server_service::release_storage_service( static_cast<Wsrep_storage_service*>(storage_service); THD* thd= ss->m_thd; wsrep_reset_threadvars(thd); - server_threads.erase(thd); delete ss; delete thd; } @@ -95,8 +93,7 @@ wsrep_create_streaming_applier(THD *orig_thd, const char *ctx) streaming transaction is BF aborted and streaming applier is created from BF aborter context. */ Wsrep_threadvars saved_threadvars(wsrep_save_threadvars()); - if (saved_threadvars.cur_thd) - wsrep_reset_threadvars(saved_threadvars.cur_thd); + wsrep_reset_threadvars(saved_threadvars.cur_thd); THD *thd= 0; Wsrep_applier_service *ret= 0; if (!wsrep_create_threadvars() && @@ -113,8 +110,7 @@ wsrep_create_streaming_applier(THD *orig_thd, const char *ctx) } /* Restore original thread local storage state before returning. */ wsrep_restore_threadvars(saved_threadvars); - if (saved_threadvars.cur_thd) - wsrep_store_threadvars(saved_threadvars.cur_thd); + wsrep_store_threadvars(saved_threadvars.cur_thd); return ret; } @@ -143,7 +139,6 @@ void Wsrep_server_service::release_high_priority_service(wsrep::high_priority_se THD* thd= hps->m_thd; delete hps; wsrep_store_threadvars(thd); - server_threads.erase(thd); delete thd; wsrep_delete_threadvars(); } diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc index 6db3908e6da..9026b5844dd 100644 --- a/storage/innobase/handler/ha_innodb.cc +++ b/storage/innobase/handler/ha_innodb.cc @@ -63,7 +63,6 @@ this program; if not, write to the Free Software Foundation, Inc., #include <my_service_manager.h> #include <key.h> -#include <sql_manager.h> /* Include necessary InnoDB headers */ #include "btr0btr.h" @@ -4772,8 +4771,6 @@ static void innobase_kill_query(handlerton*, THD *thd, enum thd_kill_levels) if (lock_t *lock= trx->lock.wait_lock) { trx_mutex_enter(trx); - if (trx->is_wsrep() && wsrep_thd_is_aborting(thd)) - trx->lock.was_chosen_as_deadlock_victim= TRUE; lock_cancel_waiting_and_release(lock); trx_mutex_exit(trx); } @@ -18642,67 +18639,62 @@ static struct st_mysql_storage_engine innobase_storage_engine= #ifdef WITH_WSREP -struct bg_wsrep_kill_trx_arg { - my_thread_id thd_id, bf_thd_id; - trx_id_t trx_id, bf_trx_id; - bool signal; -}; - -/** Kill one transaction from a background manager thread +/** This function is used to kill one transaction. -wsrep_innobase_kill_one_trx() is invoked when lock_sys.mutex and trx mutex -are taken, wsrep_thd_bf_abort() cannot be used there as it takes THD mutexes -that must be taken before lock_sys.mutex and trx mutex. That's why -wsrep_innobase_kill_one_trx only posts the killing task to the manager thread -and the actual killing happens asynchronously here. +This transaction was open on this node (not-yet-committed), and a +conflicting writeset from some other node that was being applied +caused a locking conflict. First committed (from other node) +wins, thus open transaction is rolled back. BF stands for +brute-force: any transaction can get aborted by galera any time +it is necessary. -As no mutexes were held we don't know whether THD or trx pointers are still -valid, so we need to pass thread/trx ids and perform a lookup. -*/ -static void bg_wsrep_kill_trx(void *void_arg) -{ - bg_wsrep_kill_trx_arg *arg= (bg_wsrep_kill_trx_arg *)void_arg; - THD *thd, *bf_thd; - trx_t *victim_trx; - bool aborting= false; +This conflict can happen only when the replicated writeset (from +other node) is being applied, not when it’s waiting in the queue. +If our local transaction reached its COMMIT and this conflicting +writeset was in the queue, then it should fail the local +certification test instead. - if ((bf_thd= find_thread_by_id(arg->bf_thd_id))) - wsrep_thd_LOCK(bf_thd); - if ((thd= find_thread_by_id(arg->thd_id))) - wsrep_thd_LOCK(thd); +A brute force abort is only triggered by a locking conflict +between a writeset being applied by an applier thread (slave thread) +and an open transaction on the node, not by a Galera writeset +comparison as in the local certification failure. - if (!thd || !bf_thd || !(victim_trx= thd_to_trx(thd))) - goto ret0; +@param[in] bf_thd Brute force (BF) thread +@param[in,out] victim_trx Vimtim trx to be killed +@param[in] signal Should victim be signaled */ +UNIV_INTERN +void +wsrep_innobase_kill_one_trx( + THD* bf_thd, + trx_t *victim_trx, + bool signal) +{ + ut_ad(bf_thd); + ut_ad(victim_trx); + ut_ad(lock_mutex_own()); + ut_ad(trx_mutex_own(victim_trx)); - lock_mutex_enter(); - trx_mutex_enter(victim_trx); - if (victim_trx->id != arg->trx_id - || victim_trx->state == TRX_STATE_COMMITTED_IN_MEMORY) - { - /* Apparently victim trx was meanwhile rolled back or - committed. Tell bf thd not to wait, in case it already - started to. */ - trx_t *trx= thd_to_trx(bf_thd); - if (!trx) { - /* bf_thd might not be associated with a - transaction, in case of MDL conflict */ - } else if (lock_t *lock = trx->lock.wait_lock) { - trx_mutex_enter(trx); - lock_cancel_waiting_and_release(lock); - trx_mutex_exit(trx); - } - goto ret1; - } + DBUG_ENTER("wsrep_innobase_kill_one_trx"); + THD *thd= (THD *) victim_trx->mysql_thd; + ut_ad(thd); + /* Note that bf_trx might not exist here e.g. on MDL conflict + case (test: galera_concurrent_ctas). Similarly, BF thread + could be also acquiring MDL-lock causing victim to be + aborted. However, we have not yet called innobase_trx_init() + for BF transaction (test: galera_many_columns)*/ + trx_t* bf_trx= thd_to_trx(bf_thd); DBUG_ASSERT(wsrep_on(bf_thd)); + wsrep_thd_LOCK(thd); + WSREP_LOG_CONFLICT(bf_thd, thd, TRUE); WSREP_DEBUG("Aborter %s trx_id: " TRX_ID_FMT " thread: %ld " "seqno: %lld client_state: %s client_mode: %s transaction_mode: %s " "query: %s", wsrep_thd_is_BF(bf_thd, false) ? "BF" : "normal", - arg->bf_trx_id, + bf_trx ? bf_trx->id : TRX_ID_MAX, thd_get_thread_id(bf_thd), wsrep_thd_trx_seqno(bf_thd), wsrep_thd_client_state_str(bf_thd), @@ -18727,84 +18719,28 @@ static void bg_wsrep_kill_trx(void *void_arg) if (wsrep_thd_set_wsrep_aborter(bf_thd, thd)) { WSREP_DEBUG("innodb kill transaction skipped due to wsrep_aborter set"); - goto ret1; + wsrep_thd_UNLOCK(thd); + DBUG_VOID_RETURN; } - aborting= true; + /* Note that we need to release this as it will be acquired + below in wsrep-lib */ + wsrep_thd_UNLOCK(thd); + DEBUG_SYNC(bf_thd, "before_wsrep_thd_abort"); -ret1: - trx_mutex_exit(victim_trx); - lock_mutex_exit(); -ret0: - if (thd) { - wsrep_thd_UNLOCK(thd); - if (aborting) { - DEBUG_SYNC(bf_thd, "before_wsrep_thd_abort"); - wsrep_thd_bf_abort(bf_thd, thd, arg->signal); + if (wsrep_thd_bf_abort(bf_thd, thd, signal)) + { + lock_t* wait_lock = victim_trx->lock.wait_lock; + if (wait_lock) { + DBUG_ASSERT(victim_trx->is_wsrep()); + WSREP_DEBUG("victim has wait flag: %lu", + thd_get_thread_id(thd)); + + WSREP_DEBUG("canceling wait lock"); + victim_trx->lock.was_chosen_as_deadlock_victim= TRUE; + lock_cancel_waiting_and_release(wait_lock); } - wsrep_thd_kill_UNLOCK(thd); - } - if (bf_thd) { - wsrep_thd_UNLOCK(bf_thd); - wsrep_thd_kill_UNLOCK(bf_thd); } - free(arg); -} - -/** This function is used to kill one transaction. - -This transaction was open on this node (not-yet-committed), and a -conflicting writeset from some other node that was being applied -caused a locking conflict. First committed (from other node) -wins, thus open transaction is rolled back. BF stands for -brute-force: any transaction can get aborted by galera any time -it is necessary. - -This conflict can happen only when the replicated writeset (from -other node) is being applied, not when it’s waiting in the queue. -If our local transaction reached its COMMIT and this conflicting -writeset was in the queue, then it should fail the local -certification test instead. - -A brute force abort is only triggered by a locking conflict -between a writeset being applied by an applier thread (slave thread) -and an open transaction on the node, not by a Galera writeset -comparison as in the local certification failure. - -@param[in] bf_thd Brute force (BF) thread -@param[in,out] victim_trx Transaction to be killed -@param[in] signal Should victim be signaled */ -void -wsrep_innobase_kill_one_trx( - THD* bf_thd, - trx_t *victim_trx, - bool signal) -{ - ut_ad(bf_thd); - ut_ad(victim_trx); - ut_ad(lock_mutex_own()); - ut_ad(trx_mutex_own(victim_trx)); - - DBUG_ENTER("wsrep_innobase_kill_one_trx"); - - DBUG_EXECUTE_IF("sync.before_wsrep_thd_abort", - { - const char act[]= - "now " - "SIGNAL sync.before_wsrep_thd_abort_reached " - "WAIT_FOR signal.before_wsrep_thd_abort"; - DBUG_ASSERT(!debug_sync_set_action(bf_thd, - STRING_WITH_LEN(act))); - };); - - trx_t* bf_trx= thd_to_trx(bf_thd); - bg_wsrep_kill_trx_arg *arg = (bg_wsrep_kill_trx_arg*)malloc(sizeof(*arg)); - arg->thd_id = thd_get_thread_id(victim_trx->mysql_thd); - arg->trx_id = victim_trx->id; - arg->bf_thd_id = thd_get_thread_id(bf_thd); - arg->bf_trx_id = bf_trx ? bf_trx->id : TRX_ID_MAX; - arg->signal = signal; - mysql_manager_submit(bg_wsrep_kill_trx, arg); DBUG_VOID_RETURN; } @@ -18840,42 +18776,12 @@ wsrep_abort_transaction( if (victim_trx) { lock_mutex_enter(); trx_mutex_enter(victim_trx); - victim_trx->lock.was_chosen_as_wsrep_victim= true; + wsrep_innobase_kill_one_trx(bf_thd, victim_trx, signal); trx_mutex_exit(victim_trx); lock_mutex_exit(); - - wsrep_thd_kill_LOCK(victim_thd); - wsrep_thd_LOCK(victim_thd); - bool aborting= !wsrep_thd_set_wsrep_aborter(bf_thd, victim_thd); - wsrep_thd_UNLOCK(victim_thd); - if (aborting) { - DEBUG_SYNC(bf_thd, "before_wsrep_thd_abort"); - DBUG_EXECUTE_IF("sync.before_wsrep_thd_abort", - { - const char act[]= - "now " - "SIGNAL sync.before_wsrep_thd_abort_reached " - "WAIT_FOR signal.before_wsrep_thd_abort"; - DBUG_ASSERT(!debug_sync_set_action(bf_thd, - STRING_WITH_LEN(act))); - };); - wsrep_thd_bf_abort(bf_thd, victim_thd, signal); - } - wsrep_thd_kill_UNLOCK(victim_thd); DBUG_VOID_RETURN; } else { - DBUG_EXECUTE_IF("sync.before_wsrep_thd_abort", - { - const char act[]= - "now " - "SIGNAL sync.before_wsrep_thd_abort_reached " - "WAIT_FOR signal.before_wsrep_thd_abort"; - DBUG_ASSERT(!debug_sync_set_action(bf_thd, - STRING_WITH_LEN(act))); - };); - wsrep_thd_kill_LOCK(victim_thd); wsrep_thd_bf_abort(bf_thd, victim_thd, signal); - wsrep_thd_kill_UNLOCK(victim_thd); } DBUG_VOID_RETURN; |