diff options
author | Yuchen Pei <yuchen.pei@mariadb.com> | 2023-01-19 18:28:14 +1100 |
---|---|---|
committer | Yuchen Pei <yuchen.pei@mariadb.com> | 2023-01-25 12:12:17 +1100 |
commit | 284810b3e89e56b35d92284786094e3df2febebe (patch) | |
tree | c82f0f746e1b0edcbcd2996561fb7d705595e6f9 /storage/spider | |
parent | 801c0b4b4712e6c5f19ac9f509535ac38461b121 (diff) | |
download | mariadb-git-284810b3e89e56b35d92284786094e3df2febebe.tar.gz |
MDEV-30370 Fixing spider hang when server aborts
This is Kentoku's patch for MDEV-22979 (e6e41f04f4e + 22a0097727f),
which fixes 30370.
It changes the wait to a timed wait for the first sts thread, which
waits on server start to execute the init queries for spider. It also
flips the flag init_command to false when the sts thread is being
freed. With these changes the sts thread can check the flag regularly
and abort the init_queries when it finds out the init_command is
false. This avoids the deadlock that causes the problem in MDEV-30370.
It also fixes MDEV-22979 for 10.4, but not 10.5. I have not tested
higher versions for MDEV-22979.
A test has also been done on MDEV-29904 to avoid regression, given
MDEV-27233 is a similar problem and its patch caused the
regression. The test passes for 10.4-11.0.
However, this adhoc test only works consistently when placed in the
main testsuite. We should not place spider tests in the main suite, so
we do not include it in this commit. A patch for MDEV-27912 should fix
this problem and allow a proper test for MDEV-29904. See comments in
the jira ticket MDEV-30370/29904 for the adhoc testcase used for this
commit.
Diffstat (limited to 'storage/spider')
-rw-r--r-- | storage/spider/mysql-test/spider/bugfix/r/mdev_30370.result | 3 | ||||
-rw-r--r-- | storage/spider/mysql-test/spider/bugfix/t/mdev_30370.test | 5 | ||||
-rw-r--r-- | storage/spider/spd_table.cc | 21 |
3 files changed, 25 insertions, 4 deletions
diff --git a/storage/spider/mysql-test/spider/bugfix/r/mdev_30370.result b/storage/spider/mysql-test/spider/bugfix/r/mdev_30370.result new file mode 100644 index 00000000000..df0f6949280 --- /dev/null +++ b/storage/spider/mysql-test/spider/bugfix/r/mdev_30370.result @@ -0,0 +1,3 @@ +# +# MDEV-30370 mariadbd hangs when running with --wsrep-recover and --plugin-load-add=ha_spider.so +# diff --git a/storage/spider/mysql-test/spider/bugfix/t/mdev_30370.test b/storage/spider/mysql-test/spider/bugfix/t/mdev_30370.test new file mode 100644 index 00000000000..788ea2323f7 --- /dev/null +++ b/storage/spider/mysql-test/spider/bugfix/t/mdev_30370.test @@ -0,0 +1,5 @@ +--echo # +--echo # MDEV-30370 mariadbd hangs when running with --wsrep-recover and --plugin-load-add=ha_spider.so +--echo # + +--exec $MYSQLD_BOOTSTRAP_CMD --wsrep-recover --plugin-load-add=ha_spider.so diff --git a/storage/spider/spd_table.cc b/storage/spider/spd_table.cc index a6d2768703d..3dbc391b918 100644 --- a/storage/spider/spd_table.cc +++ b/storage/spider/spd_table.cc @@ -9877,6 +9877,7 @@ void spider_free_sts_threads( ) { bool thread_killed; DBUG_ENTER("spider_free_sts_threads"); + spider_thread->init_command = FALSE; pthread_mutex_lock(&spider_thread->mutex); thread_killed = spider_thread->killed; spider_thread->killed = TRUE; @@ -10033,20 +10034,32 @@ void *spider_table_bg_sts_action( tmp_disable_binlog(thd); thd->security_ctx->skip_grants(); thd->client_capabilities |= CLIENT_MULTI_RESULTS; - if (!(*spd_mysqld_server_started) && !thd->killed) + if (!(*spd_mysqld_server_started) && !thd->killed && !thread->killed) { pthread_mutex_lock(spd_LOCK_server_started); thd->mysys_var->current_cond = spd_COND_server_started; thd->mysys_var->current_mutex = spd_LOCK_server_started; - if (!(*spd_mysqld_server_started) && !thd->killed) + if (!(*spd_mysqld_server_started) && !thd->killed && !thread->killed && + thread->init_command) { - pthread_cond_wait(spd_COND_server_started, spd_LOCK_server_started); + do + { + struct timespec abstime; + set_timespec_nsec(abstime, 1000); + error_num = pthread_cond_timedwait(spd_COND_server_started, + spd_LOCK_server_started, &abstime); + } while ( + (error_num == ETIMEDOUT || error_num == ETIME) && + !(*spd_mysqld_server_started) && !thd->killed && !thread->killed && + thread->init_command + ); } pthread_mutex_unlock(spd_LOCK_server_started); thd->mysys_var->current_cond = &thread->cond; thd->mysys_var->current_mutex = &thread->mutex; } - while (spider_init_queries[i].length && !thd->killed) + while (spider_init_queries[i].length && !thd->killed && !thread->killed && + thread->init_command) { dispatch_command(COM_QUERY, thd, spider_init_queries[i].str, (uint) spider_init_queries[i].length, FALSE, FALSE); |