diff options
author | Jan Lindström <jan.lindstrom@mariadb.com> | 2018-06-27 12:37:21 +0300 |
---|---|---|
committer | Jan Lindström <jan.lindstrom@mariadb.com> | 2018-06-27 12:37:21 +0300 |
commit | be5698265a4195586142d1a34fdd1cce9d95d8a1 (patch) | |
tree | dbf223c5a39e479dd59ef94c1416cd5a716f1145 | |
parent | c6392d52ee2e918a65b05c275286ff4d450eef2c (diff) | |
download | mariadb-git-be5698265a4195586142d1a34fdd1cce9d95d8a1.tar.gz |
MDEV-15607: mysqld crashed few after node is being joined with sst
This is a typical systemd response where it tries to shutdown the
joiner (due to "timeout") before the joiner manages to complete SST.
wsrep_sst_wait
wsrep_SE_init_wait
While waiting the operation to finish use mysql_cond_timedwait
instead of mysql_cond_wait and if operation is not finished
extend systemd timeout (if needed).
-rw-r--r-- | sql/wsrep_sst.cc | 41 |
1 files changed, 37 insertions, 4 deletions
diff --git a/sql/wsrep_sst.cc b/sql/wsrep_sst.cc index 4df969496bc..60683bf740c 100644 --- a/sql/wsrep_sst.cc +++ b/sql/wsrep_sst.cc @@ -30,6 +30,10 @@ #include <cstdio> #include <cstdlib> +#if MYSQL_VERSION_ID < 100200 +# include <my_service_manager.h> +#endif + static char wsrep_defaults_file[FN_REFLEN * 2 + 10 + 30 + sizeof(WSREP_SST_OPT_CONF) + sizeof(WSREP_SST_OPT_CONF_SUFFIX) + @@ -186,6 +190,9 @@ bool wsrep_before_SE() static bool sst_complete = false; static bool sst_needed = false; +#define WSREP_EXTEND_TIMEOUT_INTERVAL 30 +#define WSREP_TIMEDWAIT_SECONDS 10 + void wsrep_sst_grab () { WSREP_INFO("wsrep_sst_grab()"); @@ -197,11 +204,25 @@ void wsrep_sst_grab () // Wait for end of SST bool wsrep_sst_wait () { - if (mysql_mutex_lock (&LOCK_wsrep_sst)) abort(); + struct timespec wtime = {WSREP_TIMEDWAIT_SECONDS, 0}; + uint32 total_wtime = 0; + + if (mysql_mutex_lock (&LOCK_wsrep_sst)) + abort(); + + WSREP_INFO("Waiting for SST to complete."); + while (!sst_complete) { - WSREP_INFO("Waiting for SST to complete."); - mysql_cond_wait (&COND_wsrep_sst, &LOCK_wsrep_sst); + mysql_cond_timedwait (&COND_wsrep_sst, &LOCK_wsrep_sst, &wtime); + + if (!sst_complete) + { + total_wtime += wtime.tv_sec; + WSREP_DEBUG("Waiting for SST to complete. waited %u secs.", total_wtime); + service_manager_extend_timeout(WSREP_EXTEND_TIMEOUT_INTERVAL, + "WSREP state transfer ongoing, current seqno: %ld", local_seqno); + } } if (local_seqno >= 0) @@ -1298,10 +1319,22 @@ void wsrep_SE_init_grab() void wsrep_SE_init_wait() { + struct timespec wtime = {WSREP_TIMEDWAIT_SECONDS, 0}; + uint32 total_wtime=0; + while (SE_initialized == false) { - mysql_cond_wait (&COND_wsrep_sst_init, &LOCK_wsrep_sst_init); + mysql_cond_timedwait (&COND_wsrep_sst_init, &LOCK_wsrep_sst_init, &wtime); + + if (!SE_initialized) + { + total_wtime += wtime.tv_sec; + WSREP_DEBUG("Waiting for SST to complete. waited %u secs.", total_wtime); + service_manager_extend_timeout(WSREP_EXTEND_TIMEOUT_INTERVAL, + "WSREP SE initialization ongoing."); + } } + mysql_mutex_unlock (&LOCK_wsrep_sst_init); } |