summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorYuchen Pei <yuchen.pei@mariadb.com>2023-01-19 18:28:14 +1100
committerYuchen Pei <yuchen.pei@mariadb.com>2023-01-25 12:12:17 +1100
commit284810b3e89e56b35d92284786094e3df2febebe (patch)
treec82f0f746e1b0edcbcd2996561fb7d705595e6f9
parent801c0b4b4712e6c5f19ac9f509535ac38461b121 (diff)
downloadmariadb-git-284810b3e89e56b35d92284786094e3df2febebe.tar.gz
MDEV-30370 Fixing spider hang when server aborts
This is Kentoku's patch for MDEV-22979 (e6e41f04f4e + 22a0097727f), which fixes 30370. It changes the wait to a timed wait for the first sts thread, which waits on server start to execute the init queries for spider. It also flips the flag init_command to false when the sts thread is being freed. With these changes the sts thread can check the flag regularly and abort the init_queries when it finds out the init_command is false. This avoids the deadlock that causes the problem in MDEV-30370. It also fixes MDEV-22979 for 10.4, but not 10.5. I have not tested higher versions for MDEV-22979. A test has also been done on MDEV-29904 to avoid regression, given MDEV-27233 is a similar problem and its patch caused the regression. The test passes for 10.4-11.0. However, this adhoc test only works consistently when placed in the main testsuite. We should not place spider tests in the main suite, so we do not include it in this commit. A patch for MDEV-27912 should fix this problem and allow a proper test for MDEV-29904. See comments in the jira ticket MDEV-30370/29904 for the adhoc testcase used for this commit.
-rw-r--r--storage/spider/mysql-test/spider/bugfix/r/mdev_30370.result3
-rw-r--r--storage/spider/mysql-test/spider/bugfix/t/mdev_30370.test5
-rw-r--r--storage/spider/spd_table.cc21
3 files changed, 25 insertions, 4 deletions
diff --git a/storage/spider/mysql-test/spider/bugfix/r/mdev_30370.result b/storage/spider/mysql-test/spider/bugfix/r/mdev_30370.result
new file mode 100644
index 00000000000..df0f6949280
--- /dev/null
+++ b/storage/spider/mysql-test/spider/bugfix/r/mdev_30370.result
@@ -0,0 +1,3 @@
+#
+# MDEV-30370 mariadbd hangs when running with --wsrep-recover and --plugin-load-add=ha_spider.so
+#
diff --git a/storage/spider/mysql-test/spider/bugfix/t/mdev_30370.test b/storage/spider/mysql-test/spider/bugfix/t/mdev_30370.test
new file mode 100644
index 00000000000..788ea2323f7
--- /dev/null
+++ b/storage/spider/mysql-test/spider/bugfix/t/mdev_30370.test
@@ -0,0 +1,5 @@
+--echo #
+--echo # MDEV-30370 mariadbd hangs when running with --wsrep-recover and --plugin-load-add=ha_spider.so
+--echo #
+
+--exec $MYSQLD_BOOTSTRAP_CMD --wsrep-recover --plugin-load-add=ha_spider.so
diff --git a/storage/spider/spd_table.cc b/storage/spider/spd_table.cc
index a6d2768703d..3dbc391b918 100644
--- a/storage/spider/spd_table.cc
+++ b/storage/spider/spd_table.cc
@@ -9877,6 +9877,7 @@ void spider_free_sts_threads(
) {
bool thread_killed;
DBUG_ENTER("spider_free_sts_threads");
+ spider_thread->init_command = FALSE;
pthread_mutex_lock(&spider_thread->mutex);
thread_killed = spider_thread->killed;
spider_thread->killed = TRUE;
@@ -10033,20 +10034,32 @@ void *spider_table_bg_sts_action(
tmp_disable_binlog(thd);
thd->security_ctx->skip_grants();
thd->client_capabilities |= CLIENT_MULTI_RESULTS;
- if (!(*spd_mysqld_server_started) && !thd->killed)
+ if (!(*spd_mysqld_server_started) && !thd->killed && !thread->killed)
{
pthread_mutex_lock(spd_LOCK_server_started);
thd->mysys_var->current_cond = spd_COND_server_started;
thd->mysys_var->current_mutex = spd_LOCK_server_started;
- if (!(*spd_mysqld_server_started) && !thd->killed)
+ if (!(*spd_mysqld_server_started) && !thd->killed && !thread->killed &&
+ thread->init_command)
{
- pthread_cond_wait(spd_COND_server_started, spd_LOCK_server_started);
+ do
+ {
+ struct timespec abstime;
+ set_timespec_nsec(abstime, 1000);
+ error_num = pthread_cond_timedwait(spd_COND_server_started,
+ spd_LOCK_server_started, &abstime);
+ } while (
+ (error_num == ETIMEDOUT || error_num == ETIME) &&
+ !(*spd_mysqld_server_started) && !thd->killed && !thread->killed &&
+ thread->init_command
+ );
}
pthread_mutex_unlock(spd_LOCK_server_started);
thd->mysys_var->current_cond = &thread->cond;
thd->mysys_var->current_mutex = &thread->mutex;
}
- while (spider_init_queries[i].length && !thd->killed)
+ while (spider_init_queries[i].length && !thd->killed && !thread->killed &&
+ thread->init_command)
{
dispatch_command(COM_QUERY, thd, spider_init_queries[i].str,
(uint) spider_init_queries[i].length, FALSE, FALSE);