summaryrefslogtreecommitdiff
path: root/sql/slave.cc
diff options
context:
space:
mode:
authorBrandon Nesterenko <brandon.nesterenko@mariadb.com>2021-10-20 20:13:45 -0600
committerBrandon Nesterenko <brandon.nesterenko@mariadb.com>2021-11-01 09:51:30 -0600
commite9c3de0502a14b720ead3cf053c3d3f4363b65c5 (patch)
tree237f202a5e949a8a162f5f607f631f92831ae52e /sql/slave.cc
parent36f8cca6f31941ca6bf5f45cbfdbc9ea676707d9 (diff)
downloadmariadb-git-10.4-MDEV-11853.tar.gz
MDEV-11853: semisync thread can be killed after sync binlog but before ACK in the sync state10.4-MDEV-11853
Problem: ======== If a primary is shutdown during an active semi-sync connection during the period when the primary is awaiting an ACK, the primary hard kills the active communication thread and does not ensure the transaction was received by a replica. This can lead to an inconsistent replication state. Solution: ======== During shutdown, the primary should wait for an ACK or timeout before hard killing a thread which is awaiting a communication. We extend the `SHUTDOWN WAIT FOR SLAVES` logic to identify and ignore any threads waiting for a semi-sync ACK in phase 1. Then, before stopping the ack receiver thread, the shutdown is delayed until all waiting semi-sync connections receive an ACK or time out. The connections are then killed in phase 2. Reviewed By: ============
Diffstat (limited to 'sql/slave.cc')
-rw-r--r--sql/slave.cc13
1 files changed, 11 insertions, 2 deletions
diff --git a/sql/slave.cc b/sql/slave.cc
index 31bd9372a14..e712e8a7010 100644
--- a/sql/slave.cc
+++ b/sql/slave.cc
@@ -4520,6 +4520,7 @@ pthread_handler_t handle_slave_io(void *arg)
uint retry_count;
bool suppress_warnings;
int ret;
+ int err_stopping_semisync;
rpl_io_thread_info io_info;
#ifndef DBUG_OFF
mi->dbug_do_disconnect= false;
@@ -4848,6 +4849,7 @@ Stopping slave I/O thread due to out-of-memory error from master");
not cause the slave IO thread to stop, and the error messages are
already reported.
*/
+ DBUG_EXECUTE_IF("simulate_delay_semisync_slave_reply", my_sleep(800000););
(void)repl_semisync_slave.slave_reply(mi);
}
@@ -4919,7 +4921,7 @@ err:
tmp.c_ptr_safe());
sql_print_information("master was %s:%d", mi->host, mi->port);
}
- repl_semisync_slave.slave_stop(mi);
+ err_stopping_semisync= repl_semisync_slave.slave_stop(mi);
thd->reset_query();
thd->reset_db(&null_clex_str);
if (mysql)
@@ -4935,7 +4937,14 @@ err:
#ifdef SIGNAL_WITH_VIO_CLOSE
thd->clear_active_vio();
#endif
- mysql_close(mysql);
+ /*
+ Don't close the connection if it is semi-sync and failed to close
+ gracefully in `repl_semisync_slave.slave_stop`, e.g. if the master is
+ shutting down and force killed the kill_mysql connection before issuing
+ `KILL {mysql->thread_id}`. Let it time out instead.
+ */
+ if (!err_stopping_semisync)
+ mysql_close(mysql);
mi->mysql=0;
}
write_ignored_events_info_to_relay_log(thd, mi);