diff options
author | sjaakola <seppo.jaakola@iki.fi> | 2018-04-24 10:26:34 +0300 |
---|---|---|
committer | sjaakola <seppo.jaakola@iki.fi> | 2018-04-24 16:57:39 +0300 |
commit | 2f0b8f3e027840dab1fd6322e35ecd0f3d7edc08 (patch) | |
tree | 69aae3bc2639e60cee09da5a91dddcafac830a93 /sql/wsrep_hton.cc | |
parent | 82d4f081861b227e35299f4ea2e24f55b7146fd1 (diff) | |
download | mariadb-git-2f0b8f3e027840dab1fd6322e35ecd0f3d7edc08.tar.gz |
MDEV-16005 sporadic failures with galera tests MW-328B and MW-328C
These test can sporadically show mutex deadlock warnings between LOCK_wsrep_thd
and LOCK_thd_data mutexes. This means that these mutexes can be locked in opposite
order by different threads, and thus result in deadlock situation.
To fix such issue, the locking policy of these mutexes should be revised and
enforced to be uniform. However, a quick code review shows that the number of
lock/unlock operations for these mutexes combined is between 100-200, and all these
mutex invocations should be checked/fixed.
On the other hand, it turns out that LOCK_wsrep_thd is used for protecting access to
wsrep variables of THD (wsrep_conflict_state, wsrep_query_state), whereas LOCK_thd_data
protects query, db and mysys_var variables in THD. Extending LOCK_thd_data to protect
also wsrep variables looks like a viable solution, as there should not be a use case
where separate threads need simultaneous access to wsrep variables and THD data variables.
In this commit LOCK_wsrep_thd mutex is refactored to be replaced by LOCK_thd_data.
By bluntly replacing LOCK_wsrep_thd by LOCK_thd_data, will result in double locking
of LOCK_thd_data, and some adjustements have been performed to fix such situations.
Diffstat (limited to 'sql/wsrep_hton.cc')
-rw-r--r-- | sql/wsrep_hton.cc | 44 |
1 files changed, 22 insertions, 22 deletions
diff --git a/sql/wsrep_hton.cc b/sql/wsrep_hton.cc index 944c8d83b76..1e95cd4d282 100644 --- a/sql/wsrep_hton.cc +++ b/sql/wsrep_hton.cc @@ -238,12 +238,12 @@ static int wsrep_rollback(handlerton *hton, THD *thd, bool all) DBUG_RETURN(0); } - mysql_mutex_lock(&thd->LOCK_wsrep_thd); + mysql_mutex_lock(&thd->LOCK_thd_data); switch (thd->wsrep_exec_mode) { case TOTAL_ORDER: case REPL_RECV: - mysql_mutex_unlock(&thd->LOCK_wsrep_thd); + mysql_mutex_unlock(&thd->LOCK_thd_data); WSREP_DEBUG("Avoiding wsrep rollback for failed DDL: %s", thd->query()); DBUG_RETURN(0); default: break; @@ -261,7 +261,7 @@ static int wsrep_rollback(handlerton *hton, THD *thd, bool all) } wsrep_cleanup_transaction(thd); } - mysql_mutex_unlock(&thd->LOCK_wsrep_thd); + mysql_mutex_unlock(&thd->LOCK_thd_data); DBUG_RETURN(0); } @@ -274,7 +274,7 @@ int wsrep_commit(handlerton *hton, THD *thd, bool all) DBUG_RETURN(0); } - mysql_mutex_lock(&thd->LOCK_wsrep_thd); + mysql_mutex_lock(&thd->LOCK_thd_data); if ((all || !thd_test_options(thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)) && (thd->variables.wsrep_on && thd->wsrep_conflict_state != MUST_REPLAY)) { @@ -305,7 +305,7 @@ int wsrep_commit(handlerton *hton, THD *thd, bool all) wsrep_cleanup_transaction(thd); } } - mysql_mutex_unlock(&thd->LOCK_wsrep_thd); + mysql_mutex_unlock(&thd->LOCK_thd_data); DBUG_RETURN(0); } @@ -333,20 +333,20 @@ wsrep_run_wsrep_commit(THD *thd, bool all) if (thd->wsrep_exec_mode == REPL_RECV) { - mysql_mutex_lock(&thd->LOCK_wsrep_thd); + mysql_mutex_lock(&thd->LOCK_thd_data); if (thd->wsrep_conflict_state == MUST_ABORT) { if (wsrep_debug) WSREP_INFO("WSREP: must abort for BF"); DBUG_PRINT("wsrep", ("BF apply commit fail")); thd->wsrep_conflict_state = NO_CONFLICT; - mysql_mutex_unlock(&thd->LOCK_wsrep_thd); + mysql_mutex_unlock(&thd->LOCK_thd_data); // // TODO: test all calls of the rollback. // rollback must happen automagically innobase_rollback(hton, thd, 1); // DBUG_RETURN(WSREP_TRX_ERROR); } - mysql_mutex_unlock(&thd->LOCK_wsrep_thd); + mysql_mutex_unlock(&thd->LOCK_thd_data); } if (thd->wsrep_exec_mode != LOCAL_STATE) DBUG_RETURN(WSREP_TRX_OK); @@ -358,11 +358,11 @@ wsrep_run_wsrep_commit(THD *thd, bool all) DBUG_PRINT("wsrep", ("replicating commit")); - mysql_mutex_lock(&thd->LOCK_wsrep_thd); + mysql_mutex_lock(&thd->LOCK_thd_data); if (thd->wsrep_conflict_state == MUST_ABORT) { DBUG_PRINT("wsrep", ("replicate commit fail")); thd->wsrep_conflict_state = ABORTED; - mysql_mutex_unlock(&thd->LOCK_wsrep_thd); + mysql_mutex_unlock(&thd->LOCK_thd_data); if (wsrep_debug) { WSREP_INFO("innobase_commit, abort %s", (thd->query()) ? thd->query() : "void"); @@ -379,7 +379,7 @@ wsrep_run_wsrep_commit(THD *thd, bool all) { mysql_mutex_unlock(&LOCK_wsrep_replaying); - mysql_mutex_unlock(&thd->LOCK_wsrep_thd); + mysql_mutex_unlock(&thd->LOCK_thd_data); mysql_mutex_lock(&thd->mysys_var->mutex); thd_proc_info(thd, "wsrep waiting on replaying"); @@ -407,7 +407,7 @@ wsrep_run_wsrep_commit(THD *thd, bool all) thd->mysys_var->current_cond= 0; mysql_mutex_unlock(&thd->mysys_var->mutex); - mysql_mutex_lock(&thd->LOCK_wsrep_thd); + mysql_mutex_lock(&thd->LOCK_thd_data); mysql_mutex_lock(&LOCK_wsrep_replaying); } mysql_mutex_unlock(&LOCK_wsrep_replaying); @@ -415,14 +415,14 @@ wsrep_run_wsrep_commit(THD *thd, bool all) if (thd->wsrep_conflict_state == MUST_ABORT) { DBUG_PRINT("wsrep", ("replicate commit fail")); thd->wsrep_conflict_state = ABORTED; - mysql_mutex_unlock(&thd->LOCK_wsrep_thd); + mysql_mutex_unlock(&thd->LOCK_thd_data); WSREP_DEBUG("innobase_commit abort after replaying wait %s", (thd->query()) ? thd->query() : "void"); DBUG_RETURN(WSREP_TRX_CERT_FAIL); } thd->wsrep_query_state = QUERY_COMMITTING; - mysql_mutex_unlock(&thd->LOCK_wsrep_thd); + mysql_mutex_unlock(&thd->LOCK_thd_data); cache = get_trans_log(thd); rcode = 0; @@ -487,10 +487,10 @@ wsrep_run_wsrep_commit(THD *thd, bool all) } else if (rcode == WSREP_BF_ABORT) { WSREP_DEBUG("thd %lu seqno %lld BF aborted by provider, will replay", thd->thread_id, (long long)thd->wsrep_trx_meta.gtid.seqno); - mysql_mutex_lock(&thd->LOCK_wsrep_thd); + mysql_mutex_lock(&thd->LOCK_thd_data); thd->wsrep_conflict_state = MUST_REPLAY; DBUG_ASSERT(wsrep_thd_trx_seqno(thd) > 0); - mysql_mutex_unlock(&thd->LOCK_wsrep_thd); + mysql_mutex_unlock(&thd->LOCK_thd_data); mysql_mutex_lock(&LOCK_wsrep_replaying); wsrep_replaying++; WSREP_DEBUG("replaying increased: %d, thd: %lu", @@ -504,7 +504,7 @@ wsrep_run_wsrep_commit(THD *thd, bool all) DBUG_RETURN(WSREP_TRX_ERROR); } - mysql_mutex_lock(&thd->LOCK_wsrep_thd); + mysql_mutex_lock(&thd->LOCK_thd_data); DEBUG_SYNC(thd, "wsrep_after_replication"); @@ -558,26 +558,26 @@ wsrep_run_wsrep_commit(THD *thd, bool all) WSREP_LOG_CONFLICT(NULL, thd, FALSE); } } - mysql_mutex_unlock(&thd->LOCK_wsrep_thd); + mysql_mutex_unlock(&thd->LOCK_thd_data); DBUG_RETURN(WSREP_TRX_CERT_FAIL); case WSREP_SIZE_EXCEEDED: WSREP_ERROR("transaction size exceeded"); - mysql_mutex_unlock(&thd->LOCK_wsrep_thd); + mysql_mutex_unlock(&thd->LOCK_thd_data); DBUG_RETURN(WSREP_TRX_SIZE_EXCEEDED); case WSREP_CONN_FAIL: WSREP_ERROR("connection failure"); - mysql_mutex_unlock(&thd->LOCK_wsrep_thd); + mysql_mutex_unlock(&thd->LOCK_thd_data); DBUG_RETURN(WSREP_TRX_ERROR); default: WSREP_ERROR("unknown connection failure"); - mysql_mutex_unlock(&thd->LOCK_wsrep_thd); + mysql_mutex_unlock(&thd->LOCK_thd_data); DBUG_RETURN(WSREP_TRX_ERROR); } thd->wsrep_query_state= QUERY_EXEC; - mysql_mutex_unlock(&thd->LOCK_wsrep_thd); + mysql_mutex_unlock(&thd->LOCK_thd_data); DBUG_RETURN(WSREP_TRX_OK); } |