summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorseppo <seppo.jaakola@iki.fi>2020-03-24 11:01:42 +0200
committerGitHub <noreply@github.com>2020-03-24 11:01:42 +0200
commit5918b17004674f425f2cd1d4f0bac29b3bcecb35 (patch)
treefd5be51c5b8d516aaa51c9fa1be1bab1e6b2ab1a
parenta7cbce06d432cbcb88e071731089aacfd41750fd (diff)
downloadmariadb-git-5918b17004674f425f2cd1d4f0bac29b3bcecb35.tar.gz
MDEV-21473 conflicts with async slave BF aborting (#1475)
If async slave thread (slave SQL handler), becomes a BF victim, it may occasionally happen that rollbacker thread is used to carry out the rollback instead of the async slave thread. This can happen, if async slave thread has flagged "idle" state when BF thread tries to figure out how to kill the victim. The issue was possible to test by using a galera cluster as slave for external master, and issuing high load of conflicting writes through async replication and directly against galera cluster nodes. However, a deterministic mtr test for the "conflict window" has not yet been worked on. The fix, in this patch makes sure that async slave thread state is never set to IDLE. This prevents the rollbacker thread to intervene. The wsrep_query_state change was refactored to happen by dedicated function to make controlling the idle state change in one place.
-rw-r--r--sql/slave.cc1
-rw-r--r--sql/sql_class.cc2
-rw-r--r--sql/sql_connect.cc2
-rw-r--r--sql/sql_parse.cc6
-rw-r--r--sql/wsrep_applier.cc4
-rw-r--r--sql/wsrep_hton.cc8
-rw-r--r--sql/wsrep_mysqld.cc11
7 files changed, 23 insertions, 11 deletions
diff --git a/sql/slave.cc b/sql/slave.cc
index 3f8c1ce546b..47cfd7412a9 100644
--- a/sql/slave.cc
+++ b/sql/slave.cc
@@ -5050,6 +5050,7 @@ pthread_handler_t handle_slave_sql(void *arg)
#ifdef WITH_WSREP
thd->wsrep_exec_mode= LOCAL_STATE;
+ wsrep_thd_set_query_state(thd, QUERY_EXEC);
/* synchronize with wsrep replication */
if (WSREP_ON)
wsrep_ready_wait();
diff --git a/sql/sql_class.cc b/sql/sql_class.cc
index 97d609d6f1f..494498cad55 100644
--- a/sql/sql_class.cc
+++ b/sql/sql_class.cc
@@ -1207,7 +1207,7 @@ void THD::init(void)
#ifdef WITH_WSREP
wsrep_exec_mode= wsrep_applier ? REPL_RECV : LOCAL_STATE;
wsrep_conflict_state= NO_CONFLICT;
- wsrep_query_state= QUERY_IDLE;
+ wsrep_thd_set_query_state(this, QUERY_IDLE);
wsrep_last_query_id= 0;
wsrep_trx_meta.gtid= WSREP_GTID_UNDEFINED;
wsrep_trx_meta.depends_on= WSREP_SEQNO_UNDEFINED;
diff --git a/sql/sql_connect.cc b/sql/sql_connect.cc
index 3491796ba85..b2900a20b28 100644
--- a/sql/sql_connect.cc
+++ b/sql/sql_connect.cc
@@ -1342,7 +1342,7 @@ void do_handle_one_connection(CONNECT *connect)
if (WSREP(thd))
{
mysql_mutex_lock(&thd->LOCK_thd_data);
- thd->wsrep_query_state= QUERY_EXITING;
+ wsrep_thd_set_query_state(thd, QUERY_EXITING);
mysql_mutex_unlock(&thd->LOCK_thd_data);
}
#endif
diff --git a/sql/sql_parse.cc b/sql/sql_parse.cc
index 35b6667a25c..56aca365dac 100644
--- a/sql/sql_parse.cc
+++ b/sql/sql_parse.cc
@@ -1210,7 +1210,7 @@ bool do_command(THD *thd)
if (WSREP(thd))
{
mysql_mutex_lock(&thd->LOCK_thd_data);
- thd->wsrep_query_state= QUERY_IDLE;
+ wsrep_thd_set_query_state(thd, QUERY_IDLE);
if (thd->wsrep_conflict_state==MUST_ABORT)
{
wsrep_client_rollback(thd);
@@ -1278,7 +1278,7 @@ bool do_command(THD *thd)
thd->store_globals();
}
- thd->wsrep_query_state= QUERY_EXEC;
+ wsrep_thd_set_query_state(thd, QUERY_EXEC);
mysql_mutex_unlock(&thd->LOCK_thd_data);
}
#endif /* WITH_WSREP */
@@ -1575,7 +1575,7 @@ bool dispatch_command(enum enum_server_command command, THD *thd,
}
mysql_mutex_lock(&thd->LOCK_thd_data);
- thd->wsrep_query_state= QUERY_EXEC;
+ wsrep_thd_set_query_state(thd, QUERY_EXEC);
if (thd->wsrep_conflict_state== RETRY_AUTOCOMMIT)
{
thd->wsrep_conflict_state= NO_CONFLICT;
diff --git a/sql/wsrep_applier.cc b/sql/wsrep_applier.cc
index f7bab7c5fc8..66335c412e2 100644
--- a/sql/wsrep_applier.cc
+++ b/sql/wsrep_applier.cc
@@ -98,7 +98,7 @@ static wsrep_cb_status_t wsrep_apply_events(THD* thd,
}
mysql_mutex_lock(&thd->LOCK_thd_data);
- thd->wsrep_query_state= QUERY_EXEC;
+ wsrep_thd_set_query_state(thd, QUERY_EXEC);
if (thd->wsrep_conflict_state!= REPLAYING)
thd->wsrep_conflict_state= NO_CONFLICT;
mysql_mutex_unlock(&thd->LOCK_thd_data);
@@ -197,7 +197,7 @@ static wsrep_cb_status_t wsrep_apply_events(THD* thd,
error:
mysql_mutex_lock(&thd->LOCK_thd_data);
- thd->wsrep_query_state= QUERY_IDLE;
+ wsrep_thd_set_query_state(thd, QUERY_IDLE);
mysql_mutex_unlock(&thd->LOCK_thd_data);
assert(thd->wsrep_exec_mode== REPL_RECV);
diff --git a/sql/wsrep_hton.cc b/sql/wsrep_hton.cc
index 6cf29c43447..7c154d6ce6f 100644
--- a/sql/wsrep_hton.cc
+++ b/sql/wsrep_hton.cc
@@ -439,7 +439,7 @@ wsrep_run_wsrep_commit(THD *thd, bool all)
DBUG_RETURN(WSREP_TRX_CERT_FAIL);
}
- thd->wsrep_query_state = QUERY_COMMITTING;
+ wsrep_thd_set_query_state(thd, QUERY_COMMITTING);
mysql_mutex_unlock(&thd->LOCK_thd_data);
cache = get_trans_log(thd);
@@ -473,7 +473,7 @@ wsrep_run_wsrep_commit(THD *thd, bool all)
{
WSREP_DEBUG("empty rbr buffer, query: %s", thd->query());
}
- thd->wsrep_query_state= QUERY_EXEC;
+ wsrep_thd_set_query_state(thd, QUERY_EXEC);
DBUG_RETURN(WSREP_TRX_OK);
}
@@ -581,7 +581,7 @@ wsrep_run_wsrep_commit(THD *thd, bool all)
WSREP_DEBUG("commit failed for reason: %d", rcode);
DBUG_PRINT("wsrep", ("replicating commit fail"));
- thd->wsrep_query_state= QUERY_EXEC;
+ wsrep_thd_set_query_state(thd, QUERY_EXEC);
if (thd->wsrep_conflict_state == MUST_ABORT) {
thd->wsrep_conflict_state= ABORTED;
@@ -613,7 +613,7 @@ wsrep_run_wsrep_commit(THD *thd, bool all)
DBUG_RETURN(WSREP_TRX_ERROR);
}
- thd->wsrep_query_state= QUERY_EXEC;
+ wsrep_thd_set_query_state(thd, QUERY_EXEC);
mysql_mutex_unlock(&thd->LOCK_thd_data);
DBUG_RETURN(WSREP_TRX_OK);
diff --git a/sql/wsrep_mysqld.cc b/sql/wsrep_mysqld.cc
index ba6c2d24f77..a2666591660 100644
--- a/sql/wsrep_mysqld.cc
+++ b/sql/wsrep_mysqld.cc
@@ -2586,6 +2586,17 @@ extern "C" void wsrep_thd_set_exec_mode(THD *thd, enum wsrep_exec_mode mode)
extern "C" void wsrep_thd_set_query_state(
THD *thd, enum wsrep_query_state state)
{
+ /* async slave thread should never flag IDLE state, as it may
+ give rollbacker thread chance to interfere and rollback async slave
+ transaction.
+ in fact, async slave thread is never idle as it reads complete
+ transactions from relay log and applies them, as a whole.
+ BF abort happens voluntarily by async slave thread.
+ */
+ if (thd->slave_thread && state == QUERY_IDLE) {
+ WSREP_DEBUG("Skipping IDLE state change for slave SQL");
+ return;
+ }
thd->wsrep_query_state= state;
}