diff options
author | sjaakola <seppo.jaakola@iki.fi> | 2021-10-21 14:49:51 +0300 |
---|---|---|
committer | Jan Lindström <jan.lindstrom@mariadb.com> | 2021-10-29 10:00:17 +0300 |
commit | 157b3a637faa17f933e65adf1f533f0471025dad (patch) | |
tree | 7f6b91dc486cfcfa5f7531e77980db2f1f62d25e /sql | |
parent | 30337addfc34c882fc6772aa3d820e0ffb52e3b9 (diff) | |
download | mariadb-git-157b3a637faa17f933e65adf1f533f0471025dad.tar.gz |
MDEV-23328 Server hang due to Galera lock conflict resolution
Mutex order violation when wsrep bf thread kills a conflicting trx,
the stack is
wsrep_thd_LOCK()
wsrep_kill_victim()
lock_rec_other_has_conflicting()
lock_clust_rec_read_check_and_lock()
row_search_mvcc()
ha_innobase::index_read()
ha_innobase::rnd_pos()
handler::ha_rnd_pos()
handler::rnd_pos_by_record()
handler::ha_rnd_pos_by_record()
Rows_log_event::find_row()
Update_rows_log_event::do_exec_row()
Rows_log_event::do_apply_event()
Log_event::apply_event()
wsrep_apply_events()
and mutexes are taken in the order
lock_sys->mutex -> victim_trx->mutex -> victim_thread->LOCK_thd_data
When a normal KILL statement is executed, the stack is
innobase_kill_query()
kill_handlerton()
plugin_foreach_with_mask()
ha_kill_query()
THD::awake()
kill_one_thread()
and mutexes are
victim_thread->LOCK_thd_data -> lock_sys->mutex -> victim_trx->mutex
This patch is the plan D variant for fixing potetial mutex locking
order exercised by BF aborting and KILL command execution.
In this approach, KILL command is replicated as TOI operation.
This guarantees total isolation for the KILL command execution
in the first node: there is no concurrent replication applying
and no concurrent DDL executing. Therefore there is no risk of
BF aborting to happen in parallel with KILL command execution
either. Potential mutex deadlocks between the different mutex
access paths with KILL command execution and BF aborting cannot
therefore happen.
TOI replication is used, in this approach, purely as means
to provide isolated KILL command execution in the first node.
KILL command should not (and must not) be applied in secondary
nodes. In this patch, we make this sure by skipping KILL
execution in secondary nodes, in applying phase, where we
bail out if applier thread is trying to execute KILL command.
This is effective, but skipping the applying of KILL command
could happen much earlier as well.
This also fixed unprotected calls to wsrep_thd_abort
that will use wsrep_abort_transaction. This is fixed
by holding THD::LOCK_thd_data while we abort transaction.
Reviewed-by: Jan Lindström <jan.lindstrom@mariadb.com>
Diffstat (limited to 'sql')
-rw-r--r-- | sql/sql_class.cc | 27 | ||||
-rw-r--r-- | sql/sql_parse.cc | 34 | ||||
-rw-r--r-- | sql/wsrep_mysqld.cc | 65 | ||||
-rw-r--r-- | sql/wsrep_thd.cc | 11 |
4 files changed, 109 insertions, 28 deletions
diff --git a/sql/sql_class.cc b/sql/sql_class.cc index d3c090c3308..fbfb742d9ee 100644 --- a/sql/sql_class.cc +++ b/sql/sql_class.cc @@ -1977,7 +1977,9 @@ bool THD::notify_shared_lock(MDL_context_owner *ctx_in_use, if (needs_thr_lock_abort) { + bool mutex_released= false; mysql_mutex_lock(&in_use->LOCK_thd_data); + mysql_mutex_lock(&in_use->LOCK_thd_kill); /* If not already dying */ if (in_use->killed != KILL_CONNECTION_HARD) { @@ -1993,18 +1995,25 @@ bool THD::notify_shared_lock(MDL_context_owner *ctx_in_use, thread can see those instances (e.g. see partitioning code). */ if (!thd_table->needs_reopen()) - { signalled|= mysql_lock_abort_for_thread(this, thd_table); - if (WSREP(this) && wsrep_thd_is_BF(this, FALSE)) - { - WSREP_DEBUG("remove_table_from_cache: %llu", - (unsigned long long) this->real_id); - wsrep_abort_thd((void *)this, (void *)in_use, FALSE); - } - } } +#ifdef WITH_WSREP + if (WSREP(this) && wsrep_thd_is_BF(this, false)) + { + WSREP_DEBUG("notify_shared_lock: BF thread %llu query %s" + " victim %llu query %s", + this->real_id, wsrep_thd_query(this), + in_use->real_id, wsrep_thd_query(in_use)); + wsrep_abort_thd((void *)this, (void *)in_use, false); + mutex_released= true; + } +#endif /* WITH_WSREP */ + } + if (!mutex_released) + { + mysql_mutex_unlock(&in_use->LOCK_thd_kill); + mysql_mutex_unlock(&in_use->LOCK_thd_data); } - mysql_mutex_unlock(&in_use->LOCK_thd_data); } DBUG_RETURN(signalled); } diff --git a/sql/sql_parse.cc b/sql/sql_parse.cc index 95c04321b4d..aa0b4a96327 100644 --- a/sql/sql_parse.cc +++ b/sql/sql_parse.cc @@ -9157,6 +9157,18 @@ static void sql_kill(THD *thd, longlong id, killed_state state, killed_type type) { uint error; +#ifdef WITH_WSREP + if (WSREP(thd)) + { + WSREP_DEBUG("sql_kill called"); + if (thd->wsrep_applier) + { + WSREP_DEBUG("KILL in applying, bailing out here"); + return; + } + WSREP_TO_ISOLATION_BEGIN(WSREP_MYSQL_DB, NULL, NULL) + } +#endif /* WITH_WSREP */ if (likely(!(error= kill_one_thread(thd, id, state, type)))) { if (!thd->killed) @@ -9166,6 +9178,11 @@ void sql_kill(THD *thd, longlong id, killed_state state, killed_type type) } else my_error(error, MYF(0), id); +#ifdef WITH_WSREP + return; + wsrep_error_label: + my_error(ER_CANNOT_USER, MYF(0), wsrep_thd_query(thd)); +#endif /* WITH_WSREP */ } @@ -9174,6 +9191,18 @@ void sql_kill_user(THD *thd, LEX_USER *user, killed_state state) { uint error; ha_rows rows; +#ifdef WITH_WSREP + if (WSREP(thd)) + { + WSREP_DEBUG("sql_kill_user called"); + if (thd->wsrep_applier) + { + WSREP_DEBUG("KILL in applying, bailing out here"); + return; + } + WSREP_TO_ISOLATION_BEGIN(WSREP_MYSQL_DB, NULL, NULL) + } +#endif /* WITH_WSREP */ if (likely(!(error= kill_threads_for_user(thd, user, state, &rows)))) my_ok(thd, rows); else @@ -9184,6 +9213,11 @@ void sql_kill_user(THD *thd, LEX_USER *user, killed_state state) */ my_error(error, MYF(0), user->host.str, user->user.str); } +#ifdef WITH_WSREP + return; + wsrep_error_label: + my_error(ER_CANNOT_USER, MYF(0), user->user.str); +#endif /* WITH_WSREP */ } diff --git a/sql/wsrep_mysqld.cc b/sql/wsrep_mysqld.cc index 9f152d2a20c..fcb7de9ca58 100644 --- a/sql/wsrep_mysqld.cc +++ b/sql/wsrep_mysqld.cc @@ -1,4 +1,4 @@ -/* Copyright 2008-2015 Codership Oy <http://www.codership.com> +/* Copyright 2008-2021 Codership Oy <http://www.codership.com> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -833,13 +833,25 @@ void wsrep_thr_init() DBUG_VOID_RETURN; } +/* This is wrapper for wsrep_break_lock in thr_lock.c */ +static int wsrep_thr_abort_thd(void *bf_thd_ptr, void *victim_thd_ptr, my_bool signal) +{ + THD* victim_thd= (THD *) victim_thd_ptr; + /* We need to lock THD::LOCK_thd_data to protect victim + from concurrent usage or disconnect or delete. */ + wsrep_thd_LOCK(victim_thd); + int res= wsrep_abort_thd(bf_thd_ptr, victim_thd_ptr, signal); + return res; +} + + void wsrep_init_startup (bool first) { if (wsrep_init()) unireg_abort(1); wsrep_thr_lock_init( (wsrep_thd_is_brute_force_fun)wsrep_thd_is_BF, - (wsrep_abort_thd_fun)wsrep_abort_thd, + (wsrep_abort_thd_fun)wsrep_thr_abort_thd, wsrep_debug, wsrep_convert_LOCK_to_trx, (wsrep_on_fun)wsrep_on); @@ -1685,6 +1697,11 @@ static int wsrep_TOI_begin(THD *thd, const char *db_, const char *table_, case SQLCOM_DROP_TABLE: buf_err= wsrep_drop_table_query(thd, &buf, &buf_len); break; + case SQLCOM_KILL: + WSREP_DEBUG("KILL as TOI: %s", thd->query()); + buf_err= wsrep_to_buf_helper(thd, thd->query(), thd->query_length(), + &buf, &buf_len); + break; case SQLCOM_CREATE_ROLE: if (sp_process_definer(thd)) { @@ -2005,14 +2022,14 @@ bool wsrep_grant_mdl_exception(MDL_context *requestor_ctx, request_thd, granted_thd); ticket->wsrep_report(wsrep_debug); - mysql_mutex_lock(&granted_thd->LOCK_thd_data); + wsrep_thd_LOCK(granted_thd); if (granted_thd->wsrep_exec_mode == TOTAL_ORDER || granted_thd->wsrep_exec_mode == REPL_RECV) { WSREP_MDL_LOG(INFO, "MDL BF-BF conflict", schema, schema_len, request_thd, granted_thd); ticket->wsrep_report(true); - mysql_mutex_unlock(&granted_thd->LOCK_thd_data); + wsrep_thd_UNLOCK(granted_thd); ret= true; } else if (granted_thd->lex->sql_command == SQLCOM_FLUSH || @@ -2020,7 +2037,7 @@ bool wsrep_grant_mdl_exception(MDL_context *requestor_ctx, { WSREP_DEBUG("BF thread waiting for FLUSH"); ticket->wsrep_report(wsrep_debug); - mysql_mutex_unlock(&granted_thd->LOCK_thd_data); + wsrep_thd_UNLOCK(granted_thd); ret= false; } else @@ -2045,8 +2062,10 @@ bool wsrep_grant_mdl_exception(MDL_context *requestor_ctx, ticket->wsrep_report(true); } - mysql_mutex_unlock(&granted_thd->LOCK_thd_data); - wsrep_abort_thd((void *) request_thd, (void *) granted_thd, 1); + /* This will call wsrep_abort_transaction so we should hold + THD::LOCK_thd_data to protect victim from concurrent usage + or disconnect or delete. */ + wsrep_abort_thd((void *) request_thd, (void *) granted_thd, true); ret= false; } } @@ -2221,6 +2240,7 @@ error: static bool abort_replicated(THD *thd) { bool ret_code= false; + wsrep_thd_LOCK(thd); if (thd->wsrep_query_state== QUERY_COMMITTING) { WSREP_DEBUG("aborting replicated trx: %llu", (ulonglong)(thd->real_id)); @@ -2228,6 +2248,8 @@ static bool abort_replicated(THD *thd) (void)wsrep_abort_thd(thd, thd, TRUE); ret_code= true; } + else + wsrep_thd_UNLOCK(thd); return ret_code; } @@ -2274,6 +2296,8 @@ static bool have_client_connections() (longlong) tmp->thread_id)); if (is_client_connection(tmp) && tmp->killed == KILL_CONNECTION) { + WSREP_DEBUG("Informing thread %lld that it's time to die", + (longlong)tmp->thread_id); (void)abort_replicated(tmp); return true; } @@ -2358,6 +2382,8 @@ void wsrep_close_client_connections(my_bool wait_to_end, THD *except_caller_thd) { DBUG_PRINT("quit",("Informing thread %lld that it's time to die", (longlong) tmp->thread_id)); + WSREP_DEBUG("Informing thread %lld that it's time to die", + (longlong)tmp->thread_id); /* We skip slave threads & scheduler on this first loop through. */ if (!is_client_connection(tmp)) continue; @@ -2374,15 +2400,18 @@ void wsrep_close_client_connections(my_bool wait_to_end, THD *except_caller_thd) continue; } - /* replicated transactions must be skipped */ + /* replicated transactions must be skipped and aborted + with wsrep_abort_thd. */ if (abort_replicated(tmp)) continue; WSREP_DEBUG("closing connection %lld", (longlong) tmp->thread_id); /* - instead of wsrep_close_thread() we do now soft kill by THD::awake - */ + instead of wsrep_close_thread() we do now soft kill by + THD::awake(). Here also victim needs to be protected from + concurrent usage or disconnect or delete. + */ tmp->awake(KILL_CONNECTION); } mysql_mutex_unlock(&LOCK_thread_count); @@ -2398,7 +2427,6 @@ void wsrep_close_client_connections(my_bool wait_to_end, THD *except_caller_thd) I_List_iterator<THD> it2(threads); while ((tmp=it2++)) { -#ifndef __bsdi__ // Bug in BSDI kernel if (is_client_connection(tmp) && !abort_replicated(tmp) && !is_replaying_connection(tmp) && @@ -2407,7 +2435,6 @@ void wsrep_close_client_connections(my_bool wait_to_end, THD *except_caller_thd) WSREP_INFO("killing local connection: %lld", (longlong) tmp->thread_id); close_connection(tmp,0); } -#endif } DBUG_PRINT("quit",("Waiting for threads to die (count=%u)",thread_count)); @@ -2594,7 +2621,8 @@ extern "C" void wsrep_thd_set_query_state( void wsrep_thd_set_conflict_state(THD *thd, enum wsrep_conflict_state state) { - if (WSREP(thd)) thd->wsrep_conflict_state= state; + mysql_mutex_assert_owner(&thd->LOCK_thd_data); + thd->wsrep_conflict_state= state; } @@ -2662,11 +2690,13 @@ wsrep_ws_handle_t* wsrep_thd_ws_handle(THD *thd) void wsrep_thd_LOCK(THD *thd) { mysql_mutex_lock(&thd->LOCK_thd_data); + mysql_mutex_lock(&thd->LOCK_thd_kill); } void wsrep_thd_UNLOCK(THD *thd) { + mysql_mutex_unlock(&thd->LOCK_thd_kill); mysql_mutex_unlock(&thd->LOCK_thd_data); } @@ -2747,9 +2777,12 @@ extern "C" void wsrep_thd_awake(THD *thd, my_bool signal) { if (signal) { - mysql_mutex_lock(&thd->LOCK_thd_data); - thd->awake(KILL_QUERY); - mysql_mutex_unlock(&thd->LOCK_thd_data); + /* Here we should hold THD::LOCK_thd_data to + protect from concurrent usage and + THD::LOCK_thd_kill from disconnect or delete */ + mysql_mutex_assert_owner(&thd->LOCK_thd_data); + mysql_mutex_assert_owner(&thd->LOCK_thd_kill); + thd->awake_no_mutex(KILL_QUERY); } else { diff --git a/sql/wsrep_thd.cc b/sql/wsrep_thd.cc index 6fd57edc692..f8eaa14d176 100644 --- a/sql/wsrep_thd.cc +++ b/sql/wsrep_thd.cc @@ -1,4 +1,4 @@ -/* Copyright (C) 2013 Codership Oy <info@codership.com> +/* Copyright (C) 2013-2021 Codership Oy <info@codership.com> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -803,10 +803,13 @@ my_bool wsrep_thd_is_local(void *thd_ptr, my_bool sync) int wsrep_abort_thd(void *bf_thd_ptr, void *victim_thd_ptr, my_bool signal) { - THD *victim_thd = (THD *) victim_thd_ptr; - THD *bf_thd = (THD *) bf_thd_ptr; + THD *victim_thd= (THD *) victim_thd_ptr; + THD *bf_thd= (THD *) bf_thd_ptr; DBUG_ENTER("wsrep_abort_thd"); + mysql_mutex_assert_owner(&victim_thd->LOCK_thd_data); + mysql_mutex_assert_owner(&victim_thd->LOCK_thd_kill); + if ( (WSREP(bf_thd) || ( (WSREP_ON || bf_thd->variables.wsrep_OSU_method == WSREP_OSU_RSU) && bf_thd->wsrep_exec_mode == TOTAL_ORDER) ) && @@ -820,6 +823,7 @@ int wsrep_abort_thd(void *bf_thd_ptr, void *victim_thd_ptr, my_bool signal) "aborted. Ignoring.", (bf_thd) ? (long long)bf_thd->real_id : 0, (long long)victim_thd->real_id); + wsrep_thd_UNLOCK(victim_thd); DBUG_RETURN(1); } @@ -830,6 +834,7 @@ int wsrep_abort_thd(void *bf_thd_ptr, void *victim_thd_ptr, my_bool signal) else { WSREP_DEBUG("wsrep_abort_thd not effective: %p %p", bf_thd, victim_thd); + wsrep_thd_UNLOCK(victim_thd); } DBUG_RETURN(1); |