summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorManish Kumar <manish.4.kumar@oracle.com>2012-01-23 17:39:37 +0530
committerManish Kumar <manish.4.kumar@oracle.com>2012-01-23 17:39:37 +0530
commit58a3147c6f39db6a9df2ebadd1dbc62fc361b0c4 (patch)
tree65b1737062a06f0e93eac8106ad1248385c49cbb
parentd29e871b681dd6934303384257b34379c0379116 (diff)
downloadmariadb-git-58a3147c6f39db6a9df2ebadd1dbc62fc361b0c4.tar.gz
BUG#11752315 - 43460: STOP SLAVE UNABLE TO COMPLETE WHEN SLAVE THREAD IS TRYING TO RECONNECT TO
Problem : The basic problem is the way the thread sleeps in mysql-5.5 and also in mysql-5.1 when we execute a stop slave on windows platform. On windows platform if the stop slave is executed after the master dies, we have this long wait before the stop slave return a value. This is because there is a sleep of the thread. The sleep is uninterruptable in the two above version, which was fixed by Davi patch for the BUG#11765860 for mysql-trunk. Backporting his patch for mysql-5.5 fixes the problem. Solution : A new pair of mutex and condition variable is introduced to synchronize thread sleep and finalization. A new mutex is required because the slave threads are terminated while holding the slave thread locks (run_lock), which can not be relinquished during termination as this would affect the lock order.
-rw-r--r--mysql-test/suite/rpl/r/rpl_start_stop_slave.result10
-rw-r--r--mysql-test/suite/rpl/t/rpl_start_stop_slave.test44
-rw-r--r--sql/mysqld.cc8
-rw-r--r--sql/mysqld.h4
-rw-r--r--sql/rpl_mi.cc4
-rw-r--r--sql/rpl_mi.h4
-rw-r--r--sql/rpl_rli.cc4
-rw-r--r--sql/rpl_rli.h6
-rw-r--r--sql/slave.cc68
9 files changed, 112 insertions, 40 deletions
diff --git a/mysql-test/suite/rpl/r/rpl_start_stop_slave.result b/mysql-test/suite/rpl/r/rpl_start_stop_slave.result
new file mode 100644
index 00000000000..43a7712bdf1
--- /dev/null
+++ b/mysql-test/suite/rpl/r/rpl_start_stop_slave.result
@@ -0,0 +1,10 @@
+include/master-slave.inc
+[connection master]
+set @time_before_kill := (select CURRENT_TIMESTAMP);
+[Time before the query]
+[Connection ID of the slave I/O thread found]
+kill <connection_id>;
+set @time_after_kill := (select CURRENT_TIMESTAMP);
+[Time after the query]
+[Killing of the slave IO thread was successful]
+include/rpl_end.inc
diff --git a/mysql-test/suite/rpl/t/rpl_start_stop_slave.test b/mysql-test/suite/rpl/t/rpl_start_stop_slave.test
new file mode 100644
index 00000000000..69d39a066b1
--- /dev/null
+++ b/mysql-test/suite/rpl/t/rpl_start_stop_slave.test
@@ -0,0 +1,44 @@
+#
+#BUG#11752315 : STOP SLAVE UNABLE TO COMPLETE WHEN SLAVE THREAD IS TRYING
+# TO RECONNECT TO
+#
+# ==== Purpose ====
+#
+#Tests that the slave does not go to a sleep for a long duration after the
+#master is killed and we do a START_SLAVE and STOP_SLAVE.
+#
+# ==== Method ====
+#
+#This is a new functionality of having an interruptable sleep of the slave.
+#We find the thread id for the slave thread. On finding the thread ID of the
+#slave thread we kill the slave thread. A successful kill in less than 60 sec
+#should serve the purpose of checking the functionality.
+#
+
+--source include/have_log_bin.inc
+--source include/master-slave.inc
+
+connection slave;
+--let $connection_id=`SELECT id FROM information_schema.processlist where state LIKE 'Waiting for master to send event'`
+
+set @time_before_kill := (select CURRENT_TIMESTAMP);
+
+--echo [Time before the query]
+--echo [Connection ID of the slave I/O thread found]
+
+--replace_regex /kill [0-9]*/kill <connection_id>/
+--eval kill $connection_id
+
+set @time_after_kill := (select CURRENT_TIMESTAMP);
+
+--echo [Time after the query]
+
+if(`select TIMESTAMPDIFF(SECOND,@time_after_kill, @time_before_kill) > 60`)
+{
+--echo # assert : The difference between the timestamps 'time_after_kill' and 'time_before_kill' should be less than 60sec.
+--die
+}
+
+--echo [Killing of the slave IO thread was successful]
+# End of test
+--source include/rpl_end.inc
diff --git a/sql/mysqld.cc b/sql/mysqld.cc
index 31257777b27..74c5c23aaa2 100644
--- a/sql/mysqld.cc
+++ b/sql/mysqld.cc
@@ -7682,8 +7682,10 @@ PSI_mutex_key key_BINLOG_LOCK_index, key_BINLOG_LOCK_prep_xids,
key_LOCK_system_variables_hash, key_LOCK_table_share, key_LOCK_thd_data,
key_LOCK_user_conn, key_LOCK_uuid_generator, key_LOG_LOCK_log,
key_master_info_data_lock, key_master_info_run_lock,
+ key_master_info_sleep_lock,
key_mutex_slave_reporting_capability_err_lock, key_relay_log_info_data_lock,
key_relay_log_info_log_space_lock, key_relay_log_info_run_lock,
+ key_relay_log_info_sleep_lock,
key_structure_guard_mutex, key_TABLE_SHARE_LOCK_ha_data,
key_LOCK_error_messages, key_LOG_INFO_lock, key_LOCK_thread_count,
key_PARTITION_LOCK_auto_inc;
@@ -7729,10 +7731,12 @@ static PSI_mutex_info all_server_mutexes[]=
{ &key_LOG_LOCK_log, "LOG::LOCK_log", 0},
{ &key_master_info_data_lock, "Master_info::data_lock", 0},
{ &key_master_info_run_lock, "Master_info::run_lock", 0},
+ { &key_master_info_sleep_lock, "Master_info::sleep_lock", 0},
{ &key_mutex_slave_reporting_capability_err_lock, "Slave_reporting_capability::err_lock", 0},
{ &key_relay_log_info_data_lock, "Relay_log_info::data_lock", 0},
{ &key_relay_log_info_log_space_lock, "Relay_log_info::log_space_lock", 0},
{ &key_relay_log_info_run_lock, "Relay_log_info::run_lock", 0},
+ { &key_relay_log_info_sleep_lock, "Relay_log_info::sleep_lock", 0},
{ &key_structure_guard_mutex, "Query_cache::structure_guard_mutex", 0},
{ &key_TABLE_SHARE_LOCK_ha_data, "TABLE_SHARE::LOCK_ha_data", 0},
{ &key_LOCK_error_messages, "LOCK_error_messages", PSI_FLAG_GLOBAL},
@@ -7768,8 +7772,10 @@ PSI_cond_key key_BINLOG_COND_prep_xids, key_BINLOG_update_cond,
key_delayed_insert_cond, key_delayed_insert_cond_client,
key_item_func_sleep_cond, key_master_info_data_cond,
key_master_info_start_cond, key_master_info_stop_cond,
+ key_master_info_sleep_cond,
key_relay_log_info_data_cond, key_relay_log_info_log_space_cond,
key_relay_log_info_start_cond, key_relay_log_info_stop_cond,
+ key_relay_log_info_sleep_cond,
key_TABLE_SHARE_cond, key_user_level_lock_cond,
key_COND_thread_count, key_COND_thread_cache, key_COND_flush_thread_cache;
PSI_cond_key key_RELAYLOG_update_cond;
@@ -7797,10 +7803,12 @@ static PSI_cond_info all_server_conds[]=
{ &key_master_info_data_cond, "Master_info::data_cond", 0},
{ &key_master_info_start_cond, "Master_info::start_cond", 0},
{ &key_master_info_stop_cond, "Master_info::stop_cond", 0},
+ { &key_master_info_sleep_cond, "Master_info::sleep_cond", 0},
{ &key_relay_log_info_data_cond, "Relay_log_info::data_cond", 0},
{ &key_relay_log_info_log_space_cond, "Relay_log_info::log_space_cond", 0},
{ &key_relay_log_info_start_cond, "Relay_log_info::start_cond", 0},
{ &key_relay_log_info_stop_cond, "Relay_log_info::stop_cond", 0},
+ { &key_relay_log_info_sleep_cond, "Relay_log_info::sleep_cond", 0},
{ &key_TABLE_SHARE_cond, "TABLE_SHARE::cond", 0},
{ &key_user_level_lock_cond, "User_level_lock::cond", 0},
{ &key_COND_thread_count, "COND_thread_count", PSI_FLAG_GLOBAL},
diff --git a/sql/mysqld.h b/sql/mysqld.h
index 70089f56e4e..eb885417da3 100644
--- a/sql/mysqld.h
+++ b/sql/mysqld.h
@@ -244,8 +244,10 @@ extern PSI_mutex_key key_BINLOG_LOCK_index, key_BINLOG_LOCK_prep_xids,
key_LOCK_table_share, key_LOCK_thd_data,
key_LOCK_user_conn, key_LOCK_uuid_generator, key_LOG_LOCK_log,
key_master_info_data_lock, key_master_info_run_lock,
+ key_master_info_sleep_lock,
key_mutex_slave_reporting_capability_err_lock, key_relay_log_info_data_lock,
key_relay_log_info_log_space_lock, key_relay_log_info_run_lock,
+ key_relay_log_info_sleep_lock,
key_structure_guard_mutex, key_TABLE_SHARE_LOCK_ha_data,
key_LOCK_error_messages, key_LOCK_thread_count, key_PARTITION_LOCK_auto_inc;
extern PSI_mutex_key key_RELAYLOG_LOCK_index;
@@ -264,8 +266,10 @@ extern PSI_cond_key key_BINLOG_COND_prep_xids, key_BINLOG_update_cond,
key_delayed_insert_cond, key_delayed_insert_cond_client,
key_item_func_sleep_cond, key_master_info_data_cond,
key_master_info_start_cond, key_master_info_stop_cond,
+ key_master_info_sleep_cond,
key_relay_log_info_data_cond, key_relay_log_info_log_space_cond,
key_relay_log_info_start_cond, key_relay_log_info_stop_cond,
+ key_relay_log_info_sleep_cond,
key_TABLE_SHARE_cond, key_user_level_lock_cond,
key_COND_thread_count, key_COND_thread_cache, key_COND_flush_thread_cache;
extern PSI_cond_key key_RELAYLOG_update_cond;
diff --git a/sql/rpl_mi.cc b/sql/rpl_mi.cc
index 0060bcb6b1e..fd4af7a827e 100644
--- a/sql/rpl_mi.cc
+++ b/sql/rpl_mi.cc
@@ -49,9 +49,11 @@ Master_info::Master_info(bool is_slave_recovery)
bzero((char*) &file, sizeof(file));
mysql_mutex_init(key_master_info_run_lock, &run_lock, MY_MUTEX_INIT_FAST);
mysql_mutex_init(key_master_info_data_lock, &data_lock, MY_MUTEX_INIT_FAST);
+ mysql_mutex_init(key_master_info_sleep_lock, &sleep_lock, MY_MUTEX_INIT_FAST);
mysql_cond_init(key_master_info_data_cond, &data_cond, NULL);
mysql_cond_init(key_master_info_start_cond, &start_cond, NULL);
mysql_cond_init(key_master_info_stop_cond, &stop_cond, NULL);
+ mysql_cond_init(key_master_info_sleep_cond, &sleep_cond, NULL);
}
Master_info::~Master_info()
@@ -59,9 +61,11 @@ Master_info::~Master_info()
delete_dynamic(&ignore_server_ids);
mysql_mutex_destroy(&run_lock);
mysql_mutex_destroy(&data_lock);
+ mysql_mutex_destroy(&sleep_lock);
mysql_cond_destroy(&data_cond);
mysql_cond_destroy(&start_cond);
mysql_cond_destroy(&stop_cond);
+ mysql_cond_destroy(&sleep_cond);
}
/**
diff --git a/sql/rpl_mi.h b/sql/rpl_mi.h
index 1434ba45519..690b7e76542 100644
--- a/sql/rpl_mi.h
+++ b/sql/rpl_mi.h
@@ -78,8 +78,8 @@ class Master_info : public Slave_reporting_capability
File fd; // we keep the file open, so we need to remember the file pointer
IO_CACHE file;
- mysql_mutex_t data_lock, run_lock;
- mysql_cond_t data_cond, start_cond, stop_cond;
+ mysql_mutex_t data_lock, run_lock, sleep_lock;
+ mysql_cond_t data_cond, start_cond, stop_cond, sleep_cond;
THD *io_thd;
MYSQL* mysql;
uint32 file_id; /* for 3.23 load data infile */
diff --git a/sql/rpl_rli.cc b/sql/rpl_rli.cc
index 3371406ba65..351ff5843dd 100644
--- a/sql/rpl_rli.cc
+++ b/sql/rpl_rli.cc
@@ -75,10 +75,12 @@ Relay_log_info::Relay_log_info(bool is_slave_recovery)
&data_lock, MY_MUTEX_INIT_FAST);
mysql_mutex_init(key_relay_log_info_log_space_lock,
&log_space_lock, MY_MUTEX_INIT_FAST);
+ mysql_mutex_init(key_relay_log_info_sleep_lock, &sleep_lock, MY_MUTEX_INIT_FAST);
mysql_cond_init(key_relay_log_info_data_cond, &data_cond, NULL);
mysql_cond_init(key_relay_log_info_start_cond, &start_cond, NULL);
mysql_cond_init(key_relay_log_info_stop_cond, &stop_cond, NULL);
mysql_cond_init(key_relay_log_info_log_space_cond, &log_space_cond, NULL);
+ mysql_cond_init(key_relay_log_info_sleep_cond, &sleep_cond, NULL);
relay_log.init_pthread_objects();
DBUG_VOID_RETURN;
}
@@ -91,10 +93,12 @@ Relay_log_info::~Relay_log_info()
mysql_mutex_destroy(&run_lock);
mysql_mutex_destroy(&data_lock);
mysql_mutex_destroy(&log_space_lock);
+ mysql_mutex_destroy(&sleep_lock);
mysql_cond_destroy(&data_cond);
mysql_cond_destroy(&start_cond);
mysql_cond_destroy(&stop_cond);
mysql_cond_destroy(&log_space_cond);
+ mysql_cond_destroy(&sleep_cond);
relay_log.cleanup();
DBUG_VOID_RETURN;
}
diff --git a/sql/rpl_rli.h b/sql/rpl_rli.h
index c8a0f549e0f..6e0a100fca7 100644
--- a/sql/rpl_rli.h
+++ b/sql/rpl_rli.h
@@ -138,15 +138,13 @@ public:
standard lock acquisition order to avoid deadlocks:
run_lock, data_lock, relay_log.LOCK_log, relay_log.LOCK_index
*/
- mysql_mutex_t data_lock, run_lock;
-
+ mysql_mutex_t data_lock, run_lock, sleep_lock;
/*
start_cond is broadcast when SQL thread is started
stop_cond - when stopped
data_cond - when data protected by data_lock changes
*/
- mysql_cond_t start_cond, stop_cond, data_cond;
-
+ mysql_cond_t start_cond, stop_cond, data_cond, sleep_cond;
/* parent Master_info structure */
Master_info *mi;
diff --git a/sql/slave.cc b/sql/slave.cc
index 797bd8e37e7..8385921753b 100644
--- a/sql/slave.cc
+++ b/sql/slave.cc
@@ -68,8 +68,6 @@ bool use_slave_mask = 0;
MY_BITMAP slave_error_mask;
char slave_skip_error_names[SHOW_VAR_FUNC_BUFF_SIZE];
-typedef bool (*CHECK_KILLED_FUNC)(THD*,void*);
-
char* slave_load_tmpdir = 0;
Master_info *active_mi= 0;
my_bool replicate_same_server_id;
@@ -152,9 +150,6 @@ static int safe_reconnect(THD* thd, MYSQL* mysql, Master_info* mi,
bool suppress_warnings);
static int connect_to_master(THD* thd, MYSQL* mysql, Master_info* mi,
bool reconnect, bool suppress_warnings);
-static int safe_sleep(THD* thd, int sec, CHECK_KILLED_FUNC thread_killed,
- void* thread_killed_arg);
-static int get_master_version_and_clock(MYSQL* mysql, Master_info* mi);
static Log_event* next_event(Relay_log_info* rli);
static int queue_event(Master_info* mi,const char* buf,ulong event_len);
static int terminate_slave_thread(THD *thd,
@@ -2068,35 +2063,42 @@ static int init_slave_thread(THD* thd, SLAVE_THD_TYPE thd_type)
DBUG_RETURN(0);
}
+/*
+ Sleep for a given amount of time or until killed.
+
+ @param thd Thread context of the current thread.
+ @param seconds The number of seconds to sleep.
+ @param func Function object to check if the thread has been killed.
+ @param info The Rpl_info object associated with this sleep.
-static int safe_sleep(THD* thd, int sec, CHECK_KILLED_FUNC thread_killed,
- void* thread_killed_arg)
+ @retval True if the thread has been killed, false otherwise.
+*/
+template <typename killed_func, typename rpl_info>
+static inline bool slave_sleep(THD *thd, time_t seconds,
+ killed_func func, rpl_info info)
{
- int nap_time;
- thr_alarm_t alarmed;
- DBUG_ENTER("safe_sleep");
- thr_alarm_init(&alarmed);
- time_t start_time= my_time(0);
- time_t end_time= start_time+sec;
+ bool ret;
+ struct timespec abstime;
+ const char *old_proc_info;
- while ((nap_time= (int) (end_time - start_time)) > 0)
- {
- ALARM alarm_buff;
- /*
- The only reason we are asking for alarm is so that
- we will be woken up in case of murder, so if we do not get killed,
- set the alarm so it goes off after we wake up naturally
- */
- thr_alarm(&alarmed, 2 * nap_time, &alarm_buff);
- sleep(nap_time);
- thr_end_alarm(&alarmed);
+ mysql_mutex_t *lock= &info->sleep_lock;
+ mysql_cond_t *cond= &info->sleep_cond;
- if ((*thread_killed)(thd,thread_killed_arg))
- DBUG_RETURN(1);
- start_time= my_time(0);
+ /* Absolute system time at which the sleep time expires. */
+ set_timespec(abstime, seconds);
+ mysql_mutex_lock(lock);
+ old_proc_info= thd->enter_cond(cond, lock, thd->proc_info);
+
+ while (! (ret= func(thd, info)))
+ {
+ int error= mysql_cond_timedwait(cond, lock, &abstime);
+ if (error == ETIMEDOUT || error == ETIME)
+ break;
}
- DBUG_RETURN(0);
+ /* Implicitly unlocks the mutex. */
+ thd->exit_cond(old_proc_info);
+ return ret;
}
@@ -2555,8 +2557,8 @@ static int exec_relay_log_event(THD* thd, Relay_log_info* rli)
exec_res= 0;
rli->cleanup_context(thd, 1);
/* chance for concurrent connection to get more locks */
- safe_sleep(thd, min(rli->trans_retries, MAX_SLAVE_RETRY_PAUSE),
- (CHECK_KILLED_FUNC)sql_slave_killed, (void*)rli);
+ slave_sleep(thd, min(rli->trans_retries, MAX_SLAVE_RETRY_PAUSE),
+ sql_slave_killed, rli);
mysql_mutex_lock(&rli->data_lock); // because of SHOW STATUS
rli->trans_retries++;
rli->retried_trans++;
@@ -2654,8 +2656,7 @@ static int try_to_reconnect(THD *thd, MYSQL *mysql, Master_info *mi,
{
if (*retry_count > master_retry_count)
return 1; // Don't retry forever
- safe_sleep(thd, mi->connect_retry, (CHECK_KILLED_FUNC) io_slave_killed,
- (void *) mi);
+ slave_sleep(thd, mi->connect_retry, io_slave_killed, mi);
}
if (check_io_slave_killed(thd, mi, messages[SLAVE_RECON_MSG_KILLED_WAITING]))
return 1;
@@ -4248,8 +4249,7 @@ static int connect_to_master(THD* thd, MYSQL* mysql, Master_info* mi,
change_rpl_status(RPL_ACTIVE_SLAVE,RPL_LOST_SOLDIER);
break;
}
- safe_sleep(thd,mi->connect_retry,(CHECK_KILLED_FUNC)io_slave_killed,
- (void*)mi);
+ slave_sleep(thd,mi->connect_retry,io_slave_killed, mi);
}
if (!slave_was_killed)