diff options
author | Sergei Golubchik <serg@mariadb.org> | 2021-02-12 17:31:25 +0100 |
---|---|---|
committer | Sergei Golubchik <serg@mariadb.org> | 2021-02-14 23:18:42 +0100 |
commit | 26965387230a9b13fb716344477d108bb87dea98 (patch) | |
tree | ad6c110d3d82f073554489610265b56f758efb73 /sql | |
parent | b91e77cff3fb5fbb32ebb061ed342469b434c4e8 (diff) | |
download | mariadb-git-26965387230a9b13fb716344477d108bb87dea98.tar.gz |
updating @@wsrep_cluster_address deadlocks
wsrep_cluster_address_update() causes LOCK_wsrep_slave_threads
to be locked under LOCK_wsrep_cluster_config, while normally
the order should be the opposite.
Fix: don't protect @@wsrep_cluster_address value with the
LOCK_wsrep_cluster_config, LOCK_global_system_variables is enough.
Only protect wsrep reinitialization with the LOCK_wsrep_cluster_config.
And make it use a local copy of the global @@wsrep_cluster_address.
Also, introduce a helper function that checks whether
wsrep_cluster_address is set and also asserts that it can be safely
read by the caller.
Diffstat (limited to 'sql')
-rw-r--r-- | sql/mysqld.cc | 9 | ||||
-rw-r--r-- | sql/sys_vars.cc | 5 | ||||
-rw-r--r-- | sql/wsrep_check_opts.cc | 2 | ||||
-rw-r--r-- | sql/wsrep_mysqld.cc | 13 | ||||
-rw-r--r-- | sql/wsrep_mysqld.h | 10 | ||||
-rw-r--r-- | sql/wsrep_thd.cc | 24 | ||||
-rw-r--r-- | sql/wsrep_var.cc | 28 |
7 files changed, 46 insertions, 45 deletions
diff --git a/sql/mysqld.cc b/sql/mysqld.cc index 6aecb0cf11e..71298e2ba00 100644 --- a/sql/mysqld.cc +++ b/sql/mysqld.cc @@ -5804,9 +5804,12 @@ int mysqld_main(int argc, char **argv) wsrep_init_startup (false); } - WSREP_DEBUG("Startup creating %ld applier threads running %lu", - wsrep_slave_threads - 1, wsrep_running_applier_threads); - wsrep_create_appliers(wsrep_slave_threads - 1); + if (wsrep_cluster_address_exists()) + { + WSREP_DEBUG("Startup creating %ld applier threads running %lu", + wsrep_slave_threads - 1, wsrep_running_applier_threads); + wsrep_create_appliers(wsrep_slave_threads - 1); + } } } diff --git a/sql/sys_vars.cc b/sql/sys_vars.cc index 5f5f9b5daf8..87ab62c4261 100644 --- a/sql/sys_vars.cc +++ b/sql/sys_vars.cc @@ -5445,13 +5445,12 @@ static Sys_var_charptr Sys_wsrep_cluster_name( ON_CHECK(wsrep_cluster_name_check), ON_UPDATE(wsrep_cluster_name_update)); -static PolyLock_mutex PLock_wsrep_cluster_config(&LOCK_wsrep_cluster_config); static Sys_var_charptr Sys_wsrep_cluster_address ( "wsrep_cluster_address", "Address to initially connect to cluster", PREALLOCATED GLOBAL_VAR(wsrep_cluster_address), CMD_LINE(REQUIRED_ARG), IN_SYSTEM_CHARSET, DEFAULT(""), - &PLock_wsrep_cluster_config, NOT_IN_BINLOG, + NO_MUTEX_GUARD, NOT_IN_BINLOG, ON_CHECK(wsrep_cluster_address_check), ON_UPDATE(wsrep_cluster_address_update)); @@ -5482,7 +5481,7 @@ static Sys_var_ulong Sys_wsrep_slave_threads( "wsrep_slave_threads", "Number of slave appliers to launch", GLOBAL_VAR(wsrep_slave_threads), CMD_LINE(REQUIRED_ARG), VALID_RANGE(1, 512), DEFAULT(1), BLOCK_SIZE(1), - &PLock_wsrep_cluster_config, NOT_IN_BINLOG, + NO_MUTEX_GUARD, NOT_IN_BINLOG, ON_CHECK(0), ON_UPDATE(wsrep_slave_threads_update)); diff --git a/sql/wsrep_check_opts.cc b/sql/wsrep_check_opts.cc index 935bacffffc..e5a0dcb2ede 100644 --- a/sql/wsrep_check_opts.cc +++ b/sql/wsrep_check_opts.cc @@ -63,7 +63,7 @@ int wsrep_check_opts() else { // non-mysqldump SST requires wsrep_cluster_address on startup - if (!wsrep_cluster_address || !wsrep_cluster_address[0]) + if (!wsrep_cluster_address_exists()) { WSREP_ERROR ("%s SST method requires wsrep_cluster_address to be " "configured on startup.", wsrep_sst_method); diff --git a/sql/wsrep_mysqld.cc b/sql/wsrep_mysqld.cc index 8e68f7229eb..fc54c7f6ec4 100644 --- a/sql/wsrep_mysqld.cc +++ b/sql/wsrep_mysqld.cc @@ -878,13 +878,13 @@ void wsrep_init_startup (bool sst_first) if (!strcmp(wsrep_provider, WSREP_NONE)) return; /* Skip replication start if no cluster address */ - if (!wsrep_cluster_address || wsrep_cluster_address[0] == 0) return; + if (!wsrep_cluster_address_exists()) return; /* Read value of wsrep_new_cluster before wsrep_start_replication(), the value is reset to FALSE inside wsrep_start_replication. */ - if (!wsrep_start_replication()) unireg_abort(1); + if (!wsrep_start_replication(wsrep_cluster_address)) unireg_abort(1); wsrep_create_rollbacker(); wsrep_create_appliers(1); @@ -1034,7 +1034,7 @@ void wsrep_shutdown_replication() my_pthread_setspecific_ptr(THR_THD, NULL); } -bool wsrep_start_replication() +bool wsrep_start_replication(const char *wsrep_cluster_address) { int rcode; WSREP_DEBUG("wsrep_start_replication"); @@ -1049,12 +1049,7 @@ bool wsrep_start_replication() return true; } - if (!wsrep_cluster_address || wsrep_cluster_address[0]== 0) - { - // if provider is non-trivial, but no address is specified, wait for address - WSREP_DEBUG("wsrep_start_replication exit due to empty address"); - return true; - } + DBUG_ASSERT(wsrep_cluster_address[0]); bool const bootstrap(TRUE == wsrep_new_cluster); wsrep_new_cluster= FALSE; diff --git a/sql/wsrep_mysqld.h b/sql/wsrep_mysqld.h index 29b1c4cf1f4..16663f7152c 100644 --- a/sql/wsrep_mysqld.h +++ b/sql/wsrep_mysqld.h @@ -203,7 +203,7 @@ extern void wsrep_close_applier_threads(int count); /* new defines */ extern void wsrep_stop_replication(THD *thd); -extern bool wsrep_start_replication(); +extern bool wsrep_start_replication(const char *wsrep_cluster_address); extern void wsrep_shutdown_replication(); extern bool wsrep_must_sync_wait (THD* thd, uint mask= WSREP_SYNC_WAIT_BEFORE_READ); extern bool wsrep_sync_wait (THD* thd, uint mask= WSREP_SYNC_WAIT_BEFORE_READ); @@ -280,6 +280,13 @@ void WSREP_LOG(void (*fun)(const char* fmt, ...), const char* fmt, ...); #define WSREP_PROVIDER_EXISTS \ (wsrep_provider && strncasecmp(wsrep_provider, WSREP_NONE, FN_REFLEN)) +static inline bool wsrep_cluster_address_exists() +{ + if (mysqld_server_started) + mysql_mutex_assert_owner(&LOCK_global_system_variables); + return wsrep_cluster_address && wsrep_cluster_address[0]; +} + #define WSREP_QUERY(thd) (thd->query()) extern my_bool wsrep_ready_get(); @@ -501,6 +508,7 @@ wsrep::key wsrep_prepare_key_for_toi(const char* db, const char* table, #define wsrep_thr_deinit() do {} while(0) #define wsrep_init_globals() do {} while(0) #define wsrep_create_appliers(X) do {} while(0) +#define wsrep_cluster_address_exists() (false) #endif /* WITH_WSREP */ diff --git a/sql/wsrep_thd.cc b/sql/wsrep_thd.cc index 26cfa4c58c4..2d814c62424 100644 --- a/sql/wsrep_thd.cc +++ b/sql/wsrep_thd.cc @@ -125,11 +125,7 @@ bool wsrep_create_appliers(long threads, bool mutex_protected) return false; } - if (!wsrep_cluster_address || wsrep_cluster_address[0]== 0) - { - WSREP_DEBUG("wsrep_create_appliers exit due to empty address"); - return false; - } + DBUG_ASSERT(wsrep_cluster_address[0]); long wsrep_threads=0; @@ -284,16 +280,14 @@ static void wsrep_rollback_process(THD *rollbacker, void wsrep_create_rollbacker() { - if (wsrep_cluster_address && wsrep_cluster_address[0] != 0) - { - Wsrep_thd_args* args(new Wsrep_thd_args(wsrep_rollback_process, - WSREP_ROLLBACKER_THREAD, - pthread_self())); - - /* create rollbacker */ - if (create_wsrep_THD(args, false)) - WSREP_WARN("Can't create thread to manage wsrep rollback"); - } + DBUG_ASSERT(wsrep_cluster_address[0]); + Wsrep_thd_args* args(new Wsrep_thd_args(wsrep_rollback_process, + WSREP_ROLLBACKER_THREAD, + pthread_self())); + + /* create rollbacker */ + if (create_wsrep_THD(args, false)) + WSREP_WARN("Can't create thread to manage wsrep rollback"); } /* diff --git a/sql/wsrep_var.cc b/sql/wsrep_var.cc index 239daadc4f6..35f14532e85 100644 --- a/sql/wsrep_var.cc +++ b/sql/wsrep_var.cc @@ -554,30 +554,31 @@ bool wsrep_cluster_address_update (sys_var *self, THD* thd, enum_var_type type) /* stop replication is heavy operation, and includes closing all client connections. Closing clients may need to get LOCK_global_system_variables at least in MariaDB. - - Note: releasing LOCK_global_system_variables may cause race condition, if - there can be several concurrent clients changing wsrep_provider */ + char *tmp= my_strdup(wsrep_cluster_address, MYF(MY_WME)); WSREP_DEBUG("wsrep_cluster_address_update: %s", wsrep_cluster_address); mysql_mutex_unlock(&LOCK_global_system_variables); + + mysql_mutex_lock(&LOCK_wsrep_cluster_config); wsrep_stop_replication(thd); - if (wsrep_start_replication()) + if (*tmp && wsrep_start_replication(tmp)) { wsrep_create_rollbacker(); WSREP_DEBUG("Cluster address update creating %ld applier threads running %lu", wsrep_slave_threads, wsrep_running_applier_threads); wsrep_create_appliers(wsrep_slave_threads); } - /* locking order to be enforced is: - 1. LOCK_global_system_variables - 2. LOCK_wsrep_cluster_config - => have to juggle mutexes to comply with this - */ - mysql_mutex_unlock(&LOCK_wsrep_cluster_config); + mysql_mutex_lock(&LOCK_global_system_variables); - mysql_mutex_lock(&LOCK_wsrep_cluster_config); + if (strcmp(tmp, wsrep_cluster_address)) + { + my_free((void*)wsrep_cluster_address); + wsrep_cluster_address= tmp; + } + else + my_free(tmp); return false; } @@ -674,11 +675,12 @@ static void wsrep_slave_count_change_update () bool wsrep_slave_threads_update (sys_var *self, THD* thd, enum_var_type type) { - mysql_mutex_unlock(&LOCK_wsrep_cluster_config); + if (!wsrep_cluster_address_exists()) + return false; + mysql_mutex_unlock(&LOCK_global_system_variables); mysql_mutex_lock(&LOCK_wsrep_slave_threads); mysql_mutex_lock(&LOCK_global_system_variables); - mysql_mutex_lock(&LOCK_wsrep_cluster_config); bool res= false; wsrep_slave_count_change_update(); |