diff options
author | Jan Lindström <jan.lindstrom@mariadb.com> | 2020-01-22 11:58:50 +0200 |
---|---|---|
committer | Jan Lindström <jan.lindstrom@mariadb.com> | 2020-01-22 16:55:59 +0200 |
commit | 8a931e4d16303b76e4744428faaab7b200f408ec (patch) | |
tree | 98f3f5836ff8c3bf4a0d7e80dbb9cc3862c6b4e0 /sql/wsrep_sst.cc | |
parent | 6918157e98b08e4465766990c4bb2edd6b86dbe8 (diff) | |
download | mariadb-git-8a931e4d16303b76e4744428faaab7b200f408ec.tar.gz |
MDEV-17571 : Make systemd timeout behavior more compatible with long Galera SSTs
This is 10.4 version.
Idea is to create monitor thread for both donor and joiner that will
periodically if needed extend systemd timeout while SST is being
processed. In 10.4 actual SST is executed by running SST script
and exchanging messages on pipe using blocking fgets. This fix
starts monitoring thread before SST script is started and
we stop monitoring thread when SST has been completed.
Diffstat (limited to 'sql/wsrep_sst.cc')
-rw-r--r-- | sql/wsrep_sst.cc | 194 |
1 files changed, 166 insertions, 28 deletions
diff --git a/sql/wsrep_sst.cc b/sql/wsrep_sst.cc index 74a8b9dff05..02f7d4b6760 100644 --- a/sql/wsrep_sst.cc +++ b/sql/wsrep_sst.cc @@ -49,6 +49,126 @@ const char* wsrep_sst_auth = NULL; static const char* sst_auth_real = NULL; my_bool wsrep_sst_donor_rejects_queries= FALSE; +#define WSREP_EXTEND_TIMEOUT_INTERVAL 60 +#define WSREP_TIMEDWAIT_SECONDS 30 + +bool sst_joiner_completed = false; +bool sst_donor_completed = false; + +struct sst_thread_arg +{ + const char* cmd; + char** env; + char* ret_str; + int err; + mysql_mutex_t lock; + mysql_cond_t cond; + + sst_thread_arg (const char* c, char** e) + : cmd(c), env(e), ret_str(0), err(-1) + { + mysql_mutex_init(key_LOCK_wsrep_sst_thread, &lock, MY_MUTEX_INIT_FAST); + mysql_cond_init(key_COND_wsrep_sst_thread, &cond, NULL); + } + + ~sst_thread_arg() + { + mysql_cond_destroy (&cond); + mysql_mutex_unlock (&lock); + mysql_mutex_destroy (&lock); + } +}; + +static void wsrep_donor_monitor_end(void) +{ + mysql_mutex_lock(&LOCK_wsrep_donor_monitor); + sst_donor_completed= true; + mysql_cond_signal(&COND_wsrep_donor_monitor); + mysql_mutex_unlock(&LOCK_wsrep_donor_monitor); +} + +static void wsrep_joiner_monitor_end(void) +{ + mysql_mutex_lock(&LOCK_wsrep_joiner_monitor); + sst_joiner_completed= true; + mysql_cond_signal(&COND_wsrep_joiner_monitor); + mysql_mutex_unlock(&LOCK_wsrep_joiner_monitor); +} + +static void* wsrep_sst_donor_monitor_thread(void *arg __attribute__((unused))) +{ + int ret= 0; + unsigned long time_waited= 0; + + mysql_mutex_lock(&LOCK_wsrep_donor_monitor); + + WSREP_INFO("Donor monitor thread started to monitor"); + + wsp::thd thd(FALSE); // we turn off wsrep_on for this THD so that it can + // operate with wsrep_ready == OFF + + while (!sst_donor_completed) + { + timespec ts; + set_timespec(ts, WSREP_TIMEDWAIT_SECONDS); + time_t start_time= time(NULL); + ret= mysql_cond_timedwait(&COND_wsrep_donor_monitor, &LOCK_wsrep_donor_monitor, &ts); + time_t end_time= time(NULL); + time_waited+= difftime(end_time, start_time); + + if (ret == ETIMEDOUT && !sst_donor_completed) + { + WSREP_DEBUG("Donor waited %lu sec, extending systemd startup timeout as SST" + "is not completed", + time_waited); + service_manager_extend_timeout(WSREP_EXTEND_TIMEOUT_INTERVAL, + "WSREP state transfer ongoing..."); + } + } + + WSREP_INFO("Donor monitor thread ended with total time %lu sec", time_waited); + mysql_mutex_unlock(&LOCK_wsrep_donor_monitor); + + return NULL; +} + +static void* wsrep_sst_joiner_monitor_thread(void *arg __attribute__((unused))) +{ + int ret= 0; + unsigned long time_waited= 0; + + mysql_mutex_lock(&LOCK_wsrep_joiner_monitor); + + WSREP_INFO("Joiner monitor thread started to monitor"); + + wsp::thd thd(FALSE); // we turn off wsrep_on for this THD so that it can + // operate with wsrep_ready == OFF + + while (!sst_joiner_completed) + { + timespec ts; + set_timespec(ts, WSREP_TIMEDWAIT_SECONDS); + time_t start_time= time(NULL); + ret= mysql_cond_timedwait(&COND_wsrep_joiner_monitor, &LOCK_wsrep_joiner_monitor, &ts); + time_t end_time= time(NULL); + time_waited+= difftime(end_time, start_time); + + if (ret == ETIMEDOUT && !sst_joiner_completed) + { + WSREP_DEBUG("Joiner waited %lu sec, extending systemd startup timeout as SST" + "is not completed", + time_waited); + service_manager_extend_timeout(WSREP_EXTEND_TIMEOUT_INTERVAL, + "WSREP state transfer ongoing..."); + } + } + + WSREP_INFO("Joiner monitor thread ended with total time %lu sec", time_waited); + mysql_mutex_unlock(&LOCK_wsrep_joiner_monitor); + + return NULL; +} + bool wsrep_sst_method_check (sys_var *self, THD* thd, set_var* var) { if ((! var->save_result.string_value.str) || @@ -193,6 +313,7 @@ static void wsrep_sst_complete (THD* thd, { Wsrep_client_service client_service(thd, thd->wsrep_cs()); Wsrep_server_state::instance().sst_received(client_service, rcode); + wsrep_joiner_monitor_end(); } /* @@ -253,30 +374,6 @@ void wsrep_sst_received (THD* thd, } } -struct sst_thread_arg -{ - const char* cmd; - char** env; - char* ret_str; - int err; - mysql_mutex_t lock; - mysql_cond_t cond; - - sst_thread_arg (const char* c, char** e) - : cmd(c), env(e), ret_str(0), err(-1) - { - mysql_mutex_init(key_LOCK_wsrep_sst_thread, &lock, MY_MUTEX_INIT_FAST); - mysql_cond_init(key_COND_wsrep_sst_thread, &cond, NULL); - } - - ~sst_thread_arg() - { - mysql_cond_destroy (&cond); - mysql_mutex_unlock (&lock); - mysql_mutex_destroy (&lock); - } -}; - static int sst_scan_uuid_seqno (const char* str, wsrep_uuid_t* uuid, wsrep_seqno_t* seqno) { @@ -442,10 +539,12 @@ static void* sst_joiner_thread (void* a) wsrep_uuid_t ret_uuid = WSREP_UUID_UNDEFINED; wsrep_seqno_t ret_seqno= WSREP_SEQNO_UNDEFINED; - // in case of successfull receiver start, wait for SST completion/end + // in case of successfull receiver start, wait for SST + // completion/end char* tmp= my_fgets (out, out_len, proc.pipe()); proc.wait(); + err= EINVAL; if (!tmp) @@ -989,16 +1088,33 @@ static ssize_t sst_prepare_other (const char* method, } } - pthread_t tmp; + pthread_t tmp, monitor; sst_thread_arg arg(cmd_str(), env()); + mysql_mutex_lock (&arg.lock); - ret = mysql_thread_create (key_wsrep_sst_joiner, &tmp, NULL, sst_joiner_thread, &arg); + + ret = mysql_thread_create (key_wsrep_sst_joiner_monitor, &monitor, NULL, wsrep_sst_joiner_monitor_thread, NULL); + if (ret) { WSREP_ERROR("sst_prepare_other(): mysql_thread_create() failed: %d (%s)", ret, strerror(ret)); return -ret; } + + sst_joiner_completed= false; + + ret= mysql_thread_create (key_wsrep_sst_joiner, &tmp, NULL, sst_joiner_thread, &arg); + + if (ret) + { + WSREP_ERROR("sst_prepare_other(): mysql_thread_create() failed: %d (%s)", + ret, strerror(ret)); + + pthread_detach(monitor); + return -ret; + } + mysql_cond_wait (&arg.cond, &arg.lock); *addr_out= arg.ret_str; @@ -1012,6 +1128,7 @@ static ssize_t sst_prepare_other (const char* method, } pthread_detach (tmp); + pthread_detach (monitor); return ret; } @@ -1509,6 +1626,7 @@ static void* sst_donor_thread (void* a) wsp::thd thd(FALSE); // we turn off wsrep_on for this THD so that it can // operate with wsrep_ready == OFF + wsp::process proc(arg->cmd, "r", arg->env); err= -proc.error(); @@ -1604,9 +1722,13 @@ wait_signal: wsrep::gtid gtid(wsrep::id(ret_uuid.data, sizeof(ret_uuid.data)), wsrep::seqno(err ? wsrep::seqno::undefined() : wsrep::seqno(ret_seqno))); + Wsrep_server_state::instance().sst_sent(gtid, err); + proc.wait(); + wsrep_donor_monitor_end(); + return NULL; } @@ -1681,14 +1803,18 @@ static int sst_donate_other (const char* method, pthread_t tmp; sst_thread_arg arg(cmd_str(), env); + mysql_mutex_lock (&arg.lock); - ret = mysql_thread_create (key_wsrep_sst_donor, &tmp, NULL, sst_donor_thread, &arg); + + ret= mysql_thread_create (key_wsrep_sst_donor, &tmp, NULL, sst_donor_thread, &arg); + if (ret) { WSREP_ERROR("sst_donate_other(): mysql_thread_create() failed: %d (%s)", ret, strerror(ret)); return ret; } + mysql_cond_wait (&arg.cond, &arg.lock); WSREP_INFO("sst_donor_thread signaled with %d", arg.err); @@ -1732,6 +1858,18 @@ int wsrep_sst_donate(const std::string& msg, } } + sst_donor_completed= false; + pthread_t monitor; + + ret= mysql_thread_create (key_wsrep_sst_donor_monitor, &monitor, NULL, wsrep_sst_donor_monitor_thread, NULL); + + if (ret) + { + WSREP_ERROR("sst_donate: mysql_thread_create() failed: %d (%s)", + ret, strerror(ret)); + return WSREP_CB_FAILURE; + } + if (!strcmp (WSREP_SST_MYSQLDUMP, method)) { ret= sst_donate_mysqldump(data, current_gtid, bypass, env()); |