diff options
author | Jan Lindström <jan.lindstrom@mariadb.com> | 2020-12-17 14:20:23 +0200 |
---|---|---|
committer | Jan Lindström <jan.lindstrom@mariadb.com> | 2020-12-29 14:29:31 +0200 |
commit | 5b25f651a961e06d377f2f8ff78ac38dc40f3fb3 (patch) | |
tree | 308717d150bc68c364404917228b2eda388dbe1a | |
parent | 9118fd360a3da0bba521caf2a35c424968235ac4 (diff) | |
download | mariadb-git-bb-10.6-MDEV-23536.tar.gz |
MDEV-23536 : Race condition between KILL and transaction commitbb-10.6-MDEV-23536
A race condition may occur between the execution of transaction commit,
and an execution of a KILL statement that would attempt to abort that
transaction.
MDEV-17092 worked around this race condition by modifying InnoDB code.
After that issue was closed, Sergey Vojtovich pointed out that this
race condition would better be fixed above the storage engine layer:
If you look carefully into the above, you can conclude that
thd->free_connection() can be called concurrently with
KILL/thd->awake(). Which is the bug. And it is partially fixed in
THD::~THD(), that is destructor waits for KILL completion:
Fix: Add necessary mutex operations to THD::free_connection()
and move WSREP specific code also there. This ensures that no
one is using THD while we do free_connection(). These mutexes
will also ensures that there can't be concurrent KILL/THD::awake().
innobase_kill_query
We can now remove usage of trx_sys_mutex introduced on MDEV-17092.
trx_t::free()
Poison trx->state and trx->mysql_thd
This patch is validated with an RQG run similar to the one that
reproduced MDEV-17092.
-rw-r--r-- | sql/sql_class.cc | 62 | ||||
-rw-r--r-- | sql/sql_class.h | 2 | ||||
-rw-r--r-- | storage/innobase/handler/ha_innodb.cc | 32 | ||||
-rw-r--r-- | storage/innobase/lock/lock0lock.cc | 1 | ||||
-rw-r--r-- | storage/innobase/trx/trx0trx.cc | 4 |
5 files changed, 57 insertions, 44 deletions
diff --git a/sql/sql_class.cc b/sql/sql_class.cc index 1cc9a6a61d3..f4fd27ad5d1 100644 --- a/sql/sql_class.cc +++ b/sql/sql_class.cc @@ -1531,16 +1531,32 @@ void THD::reset_db(const LEX_CSTRING *new_db) /* Do operations that may take a long time */ -void THD::cleanup(void) +void THD::cleanup(bool have_mutex) { DBUG_ENTER("THD::cleanup"); DBUG_ASSERT(cleanup_done == 0); - set_killed(KILL_CONNECTION); + if (have_mutex) + set_killed_no_mutex(KILL_CONNECTION,0,0); + else + set_killed(KILL_CONNECTION); #ifdef WITH_WSREP if (wsrep_cs().state() != wsrep::client_state::s_none) { + // Below wsrep-lib function will take THD::LOCK_thd_data + // I do not like this, but at the moment there is no + // alternative. + if (have_mutex) + { + mysql_mutex_unlock(&LOCK_thd_kill); + mysql_mutex_unlock(&LOCK_thd_data); + } wsrep_cs().cleanup(); + if (have_mutex) + { + mysql_mutex_lock(&LOCK_thd_data); + mysql_mutex_lock(&LOCK_thd_kill); + } } wsrep_client_thread= false; #endif /* WITH_WSREP */ @@ -1614,6 +1630,28 @@ void THD::cleanup(void) void THD::free_connection() { DBUG_ASSERT(free_connection_done == 0); + /* Make sure threads are not available via server_threads. */ + assert_not_linked(); + + /* + Other threads may have a lock on THD::LOCK_thd_data or + THD::LOCK_thd_kill to ensure that this THD is not deleted + while they access it. The following mutex_lock ensures + that no one else is using this THD and it's now safe to + continue. + + For example consider KILL-statement execution on + sql_parse.cc kill_one_thread() that will use + THD::LOCK_thd_data to protect victim thread during + THD::awake(). + */ + mysql_mutex_lock(&LOCK_thd_data); + mysql_mutex_lock(&LOCK_thd_kill); + +#ifdef WITH_WSREP + delete wsrep_rgi; + wsrep_rgi= 0; +#endif my_free((char*) db.str); db= null_clex_str; #ifndef EMBEDDED_LIBRARY @@ -1622,8 +1660,8 @@ void THD::free_connection() net.vio= 0; net_end(&net); #endif - if (!cleanup_done) - cleanup(); + if (!cleanup_done) + cleanup(true); // We have locked THD::LOCK_thd_kill ha_close_connection(this); plugin_thdvar_cleanup(this); mysql_audit_free_thd(this); @@ -1635,6 +1673,8 @@ void THD::free_connection() profiling.restart(); // Reset profiling #endif debug_sync_reset_thread(this); + mysql_mutex_unlock(&LOCK_thd_kill); + mysql_mutex_unlock(&LOCK_thd_data); } /* @@ -1690,24 +1730,10 @@ THD::~THD() if (!status_in_global) add_status_to_global(); - /* - Other threads may have a lock on LOCK_thd_kill to ensure that this - THD is not deleted while they access it. The following mutex_lock - ensures that no one else is using this THD and it's now safe to delete - */ - if (WSREP_NNULL(this)) mysql_mutex_lock(&LOCK_thd_data); - mysql_mutex_lock(&LOCK_thd_kill); - mysql_mutex_unlock(&LOCK_thd_kill); - if (WSREP_NNULL(this)) mysql_mutex_unlock(&LOCK_thd_data); - if (!free_connection_done) free_connection(); #ifdef WITH_WSREP - if (wsrep_rgi != NULL) { - delete wsrep_rgi; - wsrep_rgi = NULL; - } mysql_cond_destroy(&COND_wsrep_thd); #endif mdl_context.destroy(); diff --git a/sql/sql_class.h b/sql/sql_class.h index 6bd08343c9e..3e8211389ce 100644 --- a/sql/sql_class.h +++ b/sql/sql_class.h @@ -3462,7 +3462,7 @@ public: void update_all_stats(); void update_stats(void); void change_user(void); - void cleanup(void); + void cleanup(bool have_mutex=false); void cleanup_after_query(); void free_connection(); void reset_for_reuse(); diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc index c4083f8aeb9..c106eda42c3 100644 --- a/storage/innobase/handler/ha_innodb.cc +++ b/storage/innobase/handler/ha_innodb.cc @@ -4452,36 +4452,22 @@ static void innobase_kill_query(handlerton*, THD *thd, enum thd_kill_levels) if (trx_t* trx= thd_to_trx(thd)) { + ut_ad(trx->mysql_thd == thd); #ifdef WITH_WSREP + /* If victim has been signaled by BF thread and/or aborting is already + progressing, following query aborting is not necessary any more. + Also, BF thread should own trx mutex for the victim. */ if (trx->is_wsrep() && wsrep_thd_is_aborting(thd)) - /* if victim has been signaled by BF thread and/or aborting is already - progressing, following query aborting is not necessary any more. - Also, BF thread should own trx mutex for the victim. */ DBUG_VOID_RETURN; #endif /* WITH_WSREP */ lock_sys.mutex_lock(); - trx_sys.trx_list.freeze(); - trx->mutex.wr_lock(); - /* It is possible that innobase_close_connection() is concurrently - being executed on our victim. Even if the trx object is later - reused for another client connection or a background transaction, - its trx->mysql_thd will differ from our thd. - - trx_sys.trx_list is thread-safe. It's freezed to 'protect' - trx_t. However, trx_t::commit_in_memory() changes a trx_t::state - of autocommit non-locking transactions without any protection. - - At this point, trx may have been reallocated for another client - connection, or for a background operation. In that case, either - trx_t::state or trx_t::mysql_thd should not match our expectations. */ - bool cancel= trx->mysql_thd == thd && trx->state == TRX_STATE_ACTIVE && - !trx->lock.was_chosen_as_deadlock_victim; - trx_sys.trx_list.unfreeze(); - if (!cancel); - else if (lock_t *lock= trx->lock.wait_lock) + if (lock_t *lock= trx->lock.wait_lock) + { + trx->mutex.wr_lock(); lock_cancel_waiting_and_release(lock); + trx->mutex.wr_unlock(); + } lock_sys.mutex_unlock(); - trx->mutex.wr_unlock(); } DBUG_VOID_RETURN; diff --git a/storage/innobase/lock/lock0lock.cc b/storage/innobase/lock/lock0lock.cc index 66644865309..a5536b17a94 100644 --- a/storage/innobase/lock/lock0lock.cc +++ b/storage/innobase/lock/lock0lock.cc @@ -5600,6 +5600,7 @@ lock_cancel_waiting_and_release( que_thr_t* thr; lock_sys.mutex_assert_locked(); + ut_ad(lock->trx->state == TRX_STATE_ACTIVE); lock->trx->lock.cancel = true; diff --git a/storage/innobase/trx/trx0trx.cc b/storage/innobase/trx/trx0trx.cc index 50a78adcf53..7eb8493deaa 100644 --- a/storage/innobase/trx/trx0trx.cc +++ b/storage/innobase/trx/trx0trx.cc @@ -405,7 +405,7 @@ void trx_t::free() MEM_NOACCESS(&n_ref, sizeof n_ref); /* do not poison mutex */ MEM_NOACCESS(&id, sizeof id); - /* state is accessed by innobase_kill_connection() */ + MEM_NOACCESS(&state, sizeof state); MEM_NOACCESS(&is_recovered, sizeof is_recovered); /* wsrep is accessed by innobase_kill_connection() */ read_view.mem_noaccess(); @@ -425,7 +425,7 @@ void trx_t::free() MEM_NOACCESS(&start_time_micro, sizeof start_time_micro); MEM_NOACCESS(&commit_lsn, sizeof commit_lsn); MEM_NOACCESS(&table_id, sizeof table_id); - /* mysql_thd is accessed by innobase_kill_connection() */ + MEM_NOACCESS(&mysql_thd, sizeof mysql_thd); MEM_NOACCESS(&mysql_log_file_name, sizeof mysql_log_file_name); MEM_NOACCESS(&mysql_log_offset, sizeof mysql_log_offset); MEM_NOACCESS(&n_mysql_tables_in_use, sizeof n_mysql_tables_in_use); |