summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJan Lindström <jan.lindstrom@mariadb.com>2018-06-27 12:37:21 +0300
committerJan Lindström <jan.lindstrom@mariadb.com>2018-06-27 12:37:21 +0300
commitbe5698265a4195586142d1a34fdd1cce9d95d8a1 (patch)
treedbf223c5a39e479dd59ef94c1416cd5a716f1145
parentc6392d52ee2e918a65b05c275286ff4d450eef2c (diff)
downloadmariadb-git-be5698265a4195586142d1a34fdd1cce9d95d8a1.tar.gz
MDEV-15607: mysqld crashed few after node is being joined with sst
This is a typical systemd response where it tries to shutdown the joiner (due to "timeout") before the joiner manages to complete SST. wsrep_sst_wait wsrep_SE_init_wait While waiting the operation to finish use mysql_cond_timedwait instead of mysql_cond_wait and if operation is not finished extend systemd timeout (if needed).
-rw-r--r--sql/wsrep_sst.cc41
1 files changed, 37 insertions, 4 deletions
diff --git a/sql/wsrep_sst.cc b/sql/wsrep_sst.cc
index 4df969496bc..60683bf740c 100644
--- a/sql/wsrep_sst.cc
+++ b/sql/wsrep_sst.cc
@@ -30,6 +30,10 @@
#include <cstdio>
#include <cstdlib>
+#if MYSQL_VERSION_ID < 100200
+# include <my_service_manager.h>
+#endif
+
static char wsrep_defaults_file[FN_REFLEN * 2 + 10 + 30 +
sizeof(WSREP_SST_OPT_CONF) +
sizeof(WSREP_SST_OPT_CONF_SUFFIX) +
@@ -186,6 +190,9 @@ bool wsrep_before_SE()
static bool sst_complete = false;
static bool sst_needed = false;
+#define WSREP_EXTEND_TIMEOUT_INTERVAL 30
+#define WSREP_TIMEDWAIT_SECONDS 10
+
void wsrep_sst_grab ()
{
WSREP_INFO("wsrep_sst_grab()");
@@ -197,11 +204,25 @@ void wsrep_sst_grab ()
// Wait for end of SST
bool wsrep_sst_wait ()
{
- if (mysql_mutex_lock (&LOCK_wsrep_sst)) abort();
+ struct timespec wtime = {WSREP_TIMEDWAIT_SECONDS, 0};
+ uint32 total_wtime = 0;
+
+ if (mysql_mutex_lock (&LOCK_wsrep_sst))
+ abort();
+
+ WSREP_INFO("Waiting for SST to complete.");
+
while (!sst_complete)
{
- WSREP_INFO("Waiting for SST to complete.");
- mysql_cond_wait (&COND_wsrep_sst, &LOCK_wsrep_sst);
+ mysql_cond_timedwait (&COND_wsrep_sst, &LOCK_wsrep_sst, &wtime);
+
+ if (!sst_complete)
+ {
+ total_wtime += wtime.tv_sec;
+ WSREP_DEBUG("Waiting for SST to complete. waited %u secs.", total_wtime);
+ service_manager_extend_timeout(WSREP_EXTEND_TIMEOUT_INTERVAL,
+ "WSREP state transfer ongoing, current seqno: %ld", local_seqno);
+ }
}
if (local_seqno >= 0)
@@ -1298,10 +1319,22 @@ void wsrep_SE_init_grab()
void wsrep_SE_init_wait()
{
+ struct timespec wtime = {WSREP_TIMEDWAIT_SECONDS, 0};
+ uint32 total_wtime=0;
+
while (SE_initialized == false)
{
- mysql_cond_wait (&COND_wsrep_sst_init, &LOCK_wsrep_sst_init);
+ mysql_cond_timedwait (&COND_wsrep_sst_init, &LOCK_wsrep_sst_init, &wtime);
+
+ if (!SE_initialized)
+ {
+ total_wtime += wtime.tv_sec;
+ WSREP_DEBUG("Waiting for SST to complete. waited %u secs.", total_wtime);
+ service_manager_extend_timeout(WSREP_EXTEND_TIMEOUT_INTERVAL,
+ "WSREP SE initialization ongoing.");
+ }
}
+
mysql_mutex_unlock (&LOCK_wsrep_sst_init);
}