summaryrefslogtreecommitdiff
path: root/sql
diff options
context:
space:
mode:
authorseppo <seppo.jaakola@iki.fi>2020-02-23 10:29:42 +0200
committerGitHub <noreply@github.com>2020-02-23 10:29:42 +0200
commit4618c974e4b467624d38bb256c2993afb4ac93b1 (patch)
tree4e6ccc608bb8dcc05f8a9d01d81be1a11bdb5419 /sql
parent3ce49a0a5225eb7d185361e1ece65d03813ec550 (diff)
downloadmariadb-git-4618c974e4b467624d38bb256c2993afb4ac93b1.tar.gz
MDEV-21723 Async slave thread BF abort and replaying fixes (#1448)
If async replication slave thread conflicts with cluster replication, then the async slave transaction should be BF aborted, and depending on the state of async slave transaction execution, potentially also replayed. There were problems in such BF abort implementation and the replaying was not started. This pull request contains fixes which make sure that if async slave thread is marked to abort and replay, it will complete carry out the rollback and release all locks and resources before starting the replaying. After replaying, async slave transactions is treated as successful, so the slave thread will continue as usual, handling next replication event. There is also new mtr test: galera.galera_slave_replay, which stresses both a certification failure for async slave thread and a successful BF abort followed by replaying.
Diffstat (limited to 'sql')
-rw-r--r--sql/log_event.cc8
-rw-r--r--sql/slave.cc24
-rw-r--r--sql/wsrep_thd.cc8
3 files changed, 35 insertions, 5 deletions
diff --git a/sql/log_event.cc b/sql/log_event.cc
index d5066fdabdf..29cb4d6d7d7 100644
--- a/sql/log_event.cc
+++ b/sql/log_event.cc
@@ -8567,8 +8567,16 @@ int Xid_log_event::do_apply_event(rpl_group_info *rgi)
res= trans_commit(thd); /* Automatically rolls back on error. */
thd->mdl_context.release_transactional_locks();
+#ifdef WITH_WSREP
+ if (WSREP(thd)) mysql_mutex_lock(&thd->LOCK_thd_data);
+ if ((!res || (WSREP(thd) && thd->wsrep_conflict_state == MUST_REPLAY)) && sub_id)
+#else
if (!res && sub_id)
+#endif /* WITH_WSREP */
rpl_global_gtid_slave_state->update_state_hash(sub_id, &gtid, rgi);
+#ifdef WITH_WSREP
+ if (WSREP(thd)) mysql_mutex_unlock(&thd->LOCK_thd_data);
+#endif /* WITH_WSREP */
/*
Increment the global status commit count variable
diff --git a/sql/slave.cc b/sql/slave.cc
index 01a86979648..3f8c1ce546b 100644
--- a/sql/slave.cc
+++ b/sql/slave.cc
@@ -3567,14 +3567,34 @@ apply_event_and_update_pos_apply(Log_event* ev, THD* thd, rpl_group_info *rgi,
exec_res= ev->apply_event(rgi);
#ifdef WITH_WSREP
- if (exec_res && thd->wsrep_conflict_state != NO_CONFLICT)
- {
+ if (exec_res)
+ {
+ switch (thd->wsrep_conflict_state) {
+ case NO_CONFLICT: break;
+ case MUST_REPLAY:
+ WSREP_DEBUG("SQL apply failed for MUST_REPLAY, res %d", exec_res);
+ mysql_mutex_lock(&thd->LOCK_thd_data);
+ wsrep_replay_transaction(thd);
+ switch (thd->wsrep_conflict_state) {
+ case NO_CONFLICT:
+ exec_res = 0; /* replaying succeeded, and slave may continue */
+ break;
+ case ABORTED: break; /* replaying has failed, trx is rolled back */
+ default:
+ WSREP_WARN("unexpected result of slave transaction replaying: %lld, %d",
+ thd->thread_id, thd->wsrep_conflict_state);
+ }
+ mysql_mutex_unlock(&thd->LOCK_thd_data);
+ break;
+ default:
WSREP_DEBUG("SQL apply failed, res %d conflict state: %d",
exec_res, thd->wsrep_conflict_state);
rli->abort_slave= 1;
rli->report(ERROR_LEVEL, ER_UNKNOWN_COM_ERROR, rgi->gtid_info(),
"Node has dropped from cluster");
+ break;
}
+ }
#endif
#ifndef DBUG_OFF
diff --git a/sql/wsrep_thd.cc b/sql/wsrep_thd.cc
index de2ad6d1a1f..a7b6e8ff1b1 100644
--- a/sql/wsrep_thd.cc
+++ b/sql/wsrep_thd.cc
@@ -153,8 +153,9 @@ static void wsrep_prepare_bf_thd(THD *thd, struct wsrep_thd_shadow* shadow)
if (!thd->wsrep_rgi) thd->wsrep_rgi= wsrep_relay_group_init("wsrep_relay");
/* thd->system_thread_info.rpl_sql_info isn't initialized. */
- thd->system_thread_info.rpl_sql_info=
- new rpl_sql_thread_info(thd->wsrep_rgi->rli->mi->rpl_filter);
+ if (!thd->slave_thread)
+ thd->system_thread_info.rpl_sql_info=
+ new rpl_sql_thread_info(thd->wsrep_rgi->rli->mi->rpl_filter);
thd->wsrep_exec_mode= REPL_RECV;
thd->net.vio= 0;
@@ -181,7 +182,8 @@ static void wsrep_return_from_bf_mode(THD *thd, struct wsrep_thd_shadow* shadow)
thd->user_time = shadow->user_time;
thd->reset_db(shadow->db, shadow->db_length);
- delete thd->system_thread_info.rpl_sql_info;
+ if (!thd->slave_thread)
+ delete thd->system_thread_info.rpl_sql_info;
delete thd->wsrep_rgi->rli->mi;
delete thd->wsrep_rgi->rli;