summaryrefslogtreecommitdiff
path: root/sql
diff options
context:
space:
mode:
authorsjaakola <seppo.jaakola@iki.fi>2021-10-21 14:49:51 +0300
committerJan Lindström <jan.lindstrom@mariadb.com>2021-10-29 10:00:17 +0300
commit157b3a637faa17f933e65adf1f533f0471025dad (patch)
tree7f6b91dc486cfcfa5f7531e77980db2f1f62d25e /sql
parent30337addfc34c882fc6772aa3d820e0ffb52e3b9 (diff)
downloadmariadb-git-157b3a637faa17f933e65adf1f533f0471025dad.tar.gz
MDEV-23328 Server hang due to Galera lock conflict resolution
Mutex order violation when wsrep bf thread kills a conflicting trx, the stack is wsrep_thd_LOCK() wsrep_kill_victim() lock_rec_other_has_conflicting() lock_clust_rec_read_check_and_lock() row_search_mvcc() ha_innobase::index_read() ha_innobase::rnd_pos() handler::ha_rnd_pos() handler::rnd_pos_by_record() handler::ha_rnd_pos_by_record() Rows_log_event::find_row() Update_rows_log_event::do_exec_row() Rows_log_event::do_apply_event() Log_event::apply_event() wsrep_apply_events() and mutexes are taken in the order lock_sys->mutex -> victim_trx->mutex -> victim_thread->LOCK_thd_data When a normal KILL statement is executed, the stack is innobase_kill_query() kill_handlerton() plugin_foreach_with_mask() ha_kill_query() THD::awake() kill_one_thread() and mutexes are victim_thread->LOCK_thd_data -> lock_sys->mutex -> victim_trx->mutex This patch is the plan D variant for fixing potetial mutex locking order exercised by BF aborting and KILL command execution. In this approach, KILL command is replicated as TOI operation. This guarantees total isolation for the KILL command execution in the first node: there is no concurrent replication applying and no concurrent DDL executing. Therefore there is no risk of BF aborting to happen in parallel with KILL command execution either. Potential mutex deadlocks between the different mutex access paths with KILL command execution and BF aborting cannot therefore happen. TOI replication is used, in this approach, purely as means to provide isolated KILL command execution in the first node. KILL command should not (and must not) be applied in secondary nodes. In this patch, we make this sure by skipping KILL execution in secondary nodes, in applying phase, where we bail out if applier thread is trying to execute KILL command. This is effective, but skipping the applying of KILL command could happen much earlier as well. This also fixed unprotected calls to wsrep_thd_abort that will use wsrep_abort_transaction. This is fixed by holding THD::LOCK_thd_data while we abort transaction. Reviewed-by: Jan Lindström <jan.lindstrom@mariadb.com>
Diffstat (limited to 'sql')
-rw-r--r--sql/sql_class.cc27
-rw-r--r--sql/sql_parse.cc34
-rw-r--r--sql/wsrep_mysqld.cc65
-rw-r--r--sql/wsrep_thd.cc11
4 files changed, 109 insertions, 28 deletions
diff --git a/sql/sql_class.cc b/sql/sql_class.cc
index d3c090c3308..fbfb742d9ee 100644
--- a/sql/sql_class.cc
+++ b/sql/sql_class.cc
@@ -1977,7 +1977,9 @@ bool THD::notify_shared_lock(MDL_context_owner *ctx_in_use,
if (needs_thr_lock_abort)
{
+ bool mutex_released= false;
mysql_mutex_lock(&in_use->LOCK_thd_data);
+ mysql_mutex_lock(&in_use->LOCK_thd_kill);
/* If not already dying */
if (in_use->killed != KILL_CONNECTION_HARD)
{
@@ -1993,18 +1995,25 @@ bool THD::notify_shared_lock(MDL_context_owner *ctx_in_use,
thread can see those instances (e.g. see partitioning code).
*/
if (!thd_table->needs_reopen())
- {
signalled|= mysql_lock_abort_for_thread(this, thd_table);
- if (WSREP(this) && wsrep_thd_is_BF(this, FALSE))
- {
- WSREP_DEBUG("remove_table_from_cache: %llu",
- (unsigned long long) this->real_id);
- wsrep_abort_thd((void *)this, (void *)in_use, FALSE);
- }
- }
}
+#ifdef WITH_WSREP
+ if (WSREP(this) && wsrep_thd_is_BF(this, false))
+ {
+ WSREP_DEBUG("notify_shared_lock: BF thread %llu query %s"
+ " victim %llu query %s",
+ this->real_id, wsrep_thd_query(this),
+ in_use->real_id, wsrep_thd_query(in_use));
+ wsrep_abort_thd((void *)this, (void *)in_use, false);
+ mutex_released= true;
+ }
+#endif /* WITH_WSREP */
+ }
+ if (!mutex_released)
+ {
+ mysql_mutex_unlock(&in_use->LOCK_thd_kill);
+ mysql_mutex_unlock(&in_use->LOCK_thd_data);
}
- mysql_mutex_unlock(&in_use->LOCK_thd_data);
}
DBUG_RETURN(signalled);
}
diff --git a/sql/sql_parse.cc b/sql/sql_parse.cc
index 95c04321b4d..aa0b4a96327 100644
--- a/sql/sql_parse.cc
+++ b/sql/sql_parse.cc
@@ -9157,6 +9157,18 @@ static
void sql_kill(THD *thd, longlong id, killed_state state, killed_type type)
{
uint error;
+#ifdef WITH_WSREP
+ if (WSREP(thd))
+ {
+ WSREP_DEBUG("sql_kill called");
+ if (thd->wsrep_applier)
+ {
+ WSREP_DEBUG("KILL in applying, bailing out here");
+ return;
+ }
+ WSREP_TO_ISOLATION_BEGIN(WSREP_MYSQL_DB, NULL, NULL)
+ }
+#endif /* WITH_WSREP */
if (likely(!(error= kill_one_thread(thd, id, state, type))))
{
if (!thd->killed)
@@ -9166,6 +9178,11 @@ void sql_kill(THD *thd, longlong id, killed_state state, killed_type type)
}
else
my_error(error, MYF(0), id);
+#ifdef WITH_WSREP
+ return;
+ wsrep_error_label:
+ my_error(ER_CANNOT_USER, MYF(0), wsrep_thd_query(thd));
+#endif /* WITH_WSREP */
}
@@ -9174,6 +9191,18 @@ void sql_kill_user(THD *thd, LEX_USER *user, killed_state state)
{
uint error;
ha_rows rows;
+#ifdef WITH_WSREP
+ if (WSREP(thd))
+ {
+ WSREP_DEBUG("sql_kill_user called");
+ if (thd->wsrep_applier)
+ {
+ WSREP_DEBUG("KILL in applying, bailing out here");
+ return;
+ }
+ WSREP_TO_ISOLATION_BEGIN(WSREP_MYSQL_DB, NULL, NULL)
+ }
+#endif /* WITH_WSREP */
if (likely(!(error= kill_threads_for_user(thd, user, state, &rows))))
my_ok(thd, rows);
else
@@ -9184,6 +9213,11 @@ void sql_kill_user(THD *thd, LEX_USER *user, killed_state state)
*/
my_error(error, MYF(0), user->host.str, user->user.str);
}
+#ifdef WITH_WSREP
+ return;
+ wsrep_error_label:
+ my_error(ER_CANNOT_USER, MYF(0), user->user.str);
+#endif /* WITH_WSREP */
}
diff --git a/sql/wsrep_mysqld.cc b/sql/wsrep_mysqld.cc
index 9f152d2a20c..fcb7de9ca58 100644
--- a/sql/wsrep_mysqld.cc
+++ b/sql/wsrep_mysqld.cc
@@ -1,4 +1,4 @@
-/* Copyright 2008-2015 Codership Oy <http://www.codership.com>
+/* Copyright 2008-2021 Codership Oy <http://www.codership.com>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -833,13 +833,25 @@ void wsrep_thr_init()
DBUG_VOID_RETURN;
}
+/* This is wrapper for wsrep_break_lock in thr_lock.c */
+static int wsrep_thr_abort_thd(void *bf_thd_ptr, void *victim_thd_ptr, my_bool signal)
+{
+ THD* victim_thd= (THD *) victim_thd_ptr;
+ /* We need to lock THD::LOCK_thd_data to protect victim
+ from concurrent usage or disconnect or delete. */
+ wsrep_thd_LOCK(victim_thd);
+ int res= wsrep_abort_thd(bf_thd_ptr, victim_thd_ptr, signal);
+ return res;
+}
+
+
void wsrep_init_startup (bool first)
{
if (wsrep_init()) unireg_abort(1);
wsrep_thr_lock_init(
(wsrep_thd_is_brute_force_fun)wsrep_thd_is_BF,
- (wsrep_abort_thd_fun)wsrep_abort_thd,
+ (wsrep_abort_thd_fun)wsrep_thr_abort_thd,
wsrep_debug, wsrep_convert_LOCK_to_trx,
(wsrep_on_fun)wsrep_on);
@@ -1685,6 +1697,11 @@ static int wsrep_TOI_begin(THD *thd, const char *db_, const char *table_,
case SQLCOM_DROP_TABLE:
buf_err= wsrep_drop_table_query(thd, &buf, &buf_len);
break;
+ case SQLCOM_KILL:
+ WSREP_DEBUG("KILL as TOI: %s", thd->query());
+ buf_err= wsrep_to_buf_helper(thd, thd->query(), thd->query_length(),
+ &buf, &buf_len);
+ break;
case SQLCOM_CREATE_ROLE:
if (sp_process_definer(thd))
{
@@ -2005,14 +2022,14 @@ bool wsrep_grant_mdl_exception(MDL_context *requestor_ctx,
request_thd, granted_thd);
ticket->wsrep_report(wsrep_debug);
- mysql_mutex_lock(&granted_thd->LOCK_thd_data);
+ wsrep_thd_LOCK(granted_thd);
if (granted_thd->wsrep_exec_mode == TOTAL_ORDER ||
granted_thd->wsrep_exec_mode == REPL_RECV)
{
WSREP_MDL_LOG(INFO, "MDL BF-BF conflict", schema, schema_len,
request_thd, granted_thd);
ticket->wsrep_report(true);
- mysql_mutex_unlock(&granted_thd->LOCK_thd_data);
+ wsrep_thd_UNLOCK(granted_thd);
ret= true;
}
else if (granted_thd->lex->sql_command == SQLCOM_FLUSH ||
@@ -2020,7 +2037,7 @@ bool wsrep_grant_mdl_exception(MDL_context *requestor_ctx,
{
WSREP_DEBUG("BF thread waiting for FLUSH");
ticket->wsrep_report(wsrep_debug);
- mysql_mutex_unlock(&granted_thd->LOCK_thd_data);
+ wsrep_thd_UNLOCK(granted_thd);
ret= false;
}
else
@@ -2045,8 +2062,10 @@ bool wsrep_grant_mdl_exception(MDL_context *requestor_ctx,
ticket->wsrep_report(true);
}
- mysql_mutex_unlock(&granted_thd->LOCK_thd_data);
- wsrep_abort_thd((void *) request_thd, (void *) granted_thd, 1);
+ /* This will call wsrep_abort_transaction so we should hold
+ THD::LOCK_thd_data to protect victim from concurrent usage
+ or disconnect or delete. */
+ wsrep_abort_thd((void *) request_thd, (void *) granted_thd, true);
ret= false;
}
}
@@ -2221,6 +2240,7 @@ error:
static bool abort_replicated(THD *thd)
{
bool ret_code= false;
+ wsrep_thd_LOCK(thd);
if (thd->wsrep_query_state== QUERY_COMMITTING)
{
WSREP_DEBUG("aborting replicated trx: %llu", (ulonglong)(thd->real_id));
@@ -2228,6 +2248,8 @@ static bool abort_replicated(THD *thd)
(void)wsrep_abort_thd(thd, thd, TRUE);
ret_code= true;
}
+ else
+ wsrep_thd_UNLOCK(thd);
return ret_code;
}
@@ -2274,6 +2296,8 @@ static bool have_client_connections()
(longlong) tmp->thread_id));
if (is_client_connection(tmp) && tmp->killed == KILL_CONNECTION)
{
+ WSREP_DEBUG("Informing thread %lld that it's time to die",
+ (longlong)tmp->thread_id);
(void)abort_replicated(tmp);
return true;
}
@@ -2358,6 +2382,8 @@ void wsrep_close_client_connections(my_bool wait_to_end, THD *except_caller_thd)
{
DBUG_PRINT("quit",("Informing thread %lld that it's time to die",
(longlong) tmp->thread_id));
+ WSREP_DEBUG("Informing thread %lld that it's time to die",
+ (longlong)tmp->thread_id);
/* We skip slave threads & scheduler on this first loop through. */
if (!is_client_connection(tmp))
continue;
@@ -2374,15 +2400,18 @@ void wsrep_close_client_connections(my_bool wait_to_end, THD *except_caller_thd)
continue;
}
- /* replicated transactions must be skipped */
+ /* replicated transactions must be skipped and aborted
+ with wsrep_abort_thd. */
if (abort_replicated(tmp))
continue;
WSREP_DEBUG("closing connection %lld", (longlong) tmp->thread_id);
/*
- instead of wsrep_close_thread() we do now soft kill by THD::awake
- */
+ instead of wsrep_close_thread() we do now soft kill by
+ THD::awake(). Here also victim needs to be protected from
+ concurrent usage or disconnect or delete.
+ */
tmp->awake(KILL_CONNECTION);
}
mysql_mutex_unlock(&LOCK_thread_count);
@@ -2398,7 +2427,6 @@ void wsrep_close_client_connections(my_bool wait_to_end, THD *except_caller_thd)
I_List_iterator<THD> it2(threads);
while ((tmp=it2++))
{
-#ifndef __bsdi__ // Bug in BSDI kernel
if (is_client_connection(tmp) &&
!abort_replicated(tmp) &&
!is_replaying_connection(tmp) &&
@@ -2407,7 +2435,6 @@ void wsrep_close_client_connections(my_bool wait_to_end, THD *except_caller_thd)
WSREP_INFO("killing local connection: %lld", (longlong) tmp->thread_id);
close_connection(tmp,0);
}
-#endif
}
DBUG_PRINT("quit",("Waiting for threads to die (count=%u)",thread_count));
@@ -2594,7 +2621,8 @@ extern "C" void wsrep_thd_set_query_state(
void wsrep_thd_set_conflict_state(THD *thd, enum wsrep_conflict_state state)
{
- if (WSREP(thd)) thd->wsrep_conflict_state= state;
+ mysql_mutex_assert_owner(&thd->LOCK_thd_data);
+ thd->wsrep_conflict_state= state;
}
@@ -2662,11 +2690,13 @@ wsrep_ws_handle_t* wsrep_thd_ws_handle(THD *thd)
void wsrep_thd_LOCK(THD *thd)
{
mysql_mutex_lock(&thd->LOCK_thd_data);
+ mysql_mutex_lock(&thd->LOCK_thd_kill);
}
void wsrep_thd_UNLOCK(THD *thd)
{
+ mysql_mutex_unlock(&thd->LOCK_thd_kill);
mysql_mutex_unlock(&thd->LOCK_thd_data);
}
@@ -2747,9 +2777,12 @@ extern "C" void wsrep_thd_awake(THD *thd, my_bool signal)
{
if (signal)
{
- mysql_mutex_lock(&thd->LOCK_thd_data);
- thd->awake(KILL_QUERY);
- mysql_mutex_unlock(&thd->LOCK_thd_data);
+ /* Here we should hold THD::LOCK_thd_data to
+ protect from concurrent usage and
+ THD::LOCK_thd_kill from disconnect or delete */
+ mysql_mutex_assert_owner(&thd->LOCK_thd_data);
+ mysql_mutex_assert_owner(&thd->LOCK_thd_kill);
+ thd->awake_no_mutex(KILL_QUERY);
}
else
{
diff --git a/sql/wsrep_thd.cc b/sql/wsrep_thd.cc
index 6fd57edc692..f8eaa14d176 100644
--- a/sql/wsrep_thd.cc
+++ b/sql/wsrep_thd.cc
@@ -1,4 +1,4 @@
-/* Copyright (C) 2013 Codership Oy <info@codership.com>
+/* Copyright (C) 2013-2021 Codership Oy <info@codership.com>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -803,10 +803,13 @@ my_bool wsrep_thd_is_local(void *thd_ptr, my_bool sync)
int wsrep_abort_thd(void *bf_thd_ptr, void *victim_thd_ptr, my_bool signal)
{
- THD *victim_thd = (THD *) victim_thd_ptr;
- THD *bf_thd = (THD *) bf_thd_ptr;
+ THD *victim_thd= (THD *) victim_thd_ptr;
+ THD *bf_thd= (THD *) bf_thd_ptr;
DBUG_ENTER("wsrep_abort_thd");
+ mysql_mutex_assert_owner(&victim_thd->LOCK_thd_data);
+ mysql_mutex_assert_owner(&victim_thd->LOCK_thd_kill);
+
if ( (WSREP(bf_thd) ||
( (WSREP_ON || bf_thd->variables.wsrep_OSU_method == WSREP_OSU_RSU) &&
bf_thd->wsrep_exec_mode == TOTAL_ORDER) ) &&
@@ -820,6 +823,7 @@ int wsrep_abort_thd(void *bf_thd_ptr, void *victim_thd_ptr, my_bool signal)
"aborted. Ignoring.",
(bf_thd) ? (long long)bf_thd->real_id : 0,
(long long)victim_thd->real_id);
+ wsrep_thd_UNLOCK(victim_thd);
DBUG_RETURN(1);
}
@@ -830,6 +834,7 @@ int wsrep_abort_thd(void *bf_thd_ptr, void *victim_thd_ptr, my_bool signal)
else
{
WSREP_DEBUG("wsrep_abort_thd not effective: %p %p", bf_thd, victim_thd);
+ wsrep_thd_UNLOCK(victim_thd);
}
DBUG_RETURN(1);