summaryrefslogtreecommitdiff
path: root/sql
diff options
context:
space:
mode:
authorDavi Arnaut <davi.arnaut@oracle.com>2010-10-22 09:58:09 -0200
committerDavi Arnaut <davi.arnaut@oracle.com>2010-10-22 09:58:09 -0200
commit2881b8014ca7101684358b25aaf54784c7f43613 (patch)
tree4571f70663dd1d045d339716fc55ff6c809fec4a /sql
parenta776e5f3d297f45d63f48ad919ccd46307cddb30 (diff)
downloadmariadb-git-2881b8014ca7101684358b25aaf54784c7f43613.tar.gz
Bug#37780: Make KILL reliable (main.kill fails randomly)
- A prerequisite cleanup patch for making KILL reliable. The test case main.kill did not work reliably. The following problems have been identified: 1. A kill signal could go lost if it came in, short before a thread went reading on the client connection. 2. A kill signal could go lost if it came in, short before a thread went waiting on a condition variable. These problems have been solved as follows. Please see also added code comments for more details. 1. There is no safe way to detect, when a thread enters the blocking state of a read(2) or recv(2) system call, where it can be interrupted by a signal. Hence it is not possible to wait for the right moment to send a kill signal. It has been decided, not to fix it in the code. Instead, the test case repeats the KILL statement until the connection terminates. 2. Before waiting on a condition variable, we register it together with a synchronizating mutex in THD::mysys_var. After this, we need to test THD::killed again. At some places we did only test it in a loop condition before the registration. When THD::killed had been set between this test and the registration, we entered waiting without noticing the killed flag. Additional checks ahve been introduced where required. In addition to the above, a re-write of the main.kill test case has been done. All sleeps have been replaced by Debug Sync Facility synchronization. A couple of sync points have been added to the server code. To avoid further problems, if the test case fails in spite of the fixes, the test case has been added to the "experimental" list for now. - Most of the work on this patch is authored by Ingo Struewing mysql-test/t/kill.test: Re-wrote test case to use Debug Sync points instead of sleeps sql/event_queue.cc: Fixed kill detection in Event_queue::cond_wait() by adding a check after enter_cond(). sql/lock.cc: Moved Debug Sync points behind enter_cond(). Fixed comments. sql/slave.cc: Fixed kill detection in start_slave_thread() by adding a check after enter_cond(). sql/sql_class.cc: Swapped order of kill and close in THD::awake(). Added comments. sql/sql_class.h: Added a comment to THD::killed. sql/sql_parse.cc: Added a sync point in do_command(). sql/sql_select.cc: Added a sync point in JOIN::optimize().
Diffstat (limited to 'sql')
-rw-r--r--sql/event_queue.cc12
-rw-r--r--sql/lock.cc27
-rw-r--r--sql/slave.cc14
-rw-r--r--sql/sql_class.cc63
-rw-r--r--sql/sql_class.h6
-rw-r--r--sql/sql_parse.cc16
-rw-r--r--sql/sql_select.cc2
7 files changed, 106 insertions, 34 deletions
diff --git a/sql/event_queue.cc b/sql/event_queue.cc
index f0310c676d1..458a7c1defd 100644
--- a/sql/event_queue.cc
+++ b/sql/event_queue.cc
@@ -741,11 +741,13 @@ Event_queue::cond_wait(THD *thd, struct timespec *abstime, const char* msg,
thd->enter_cond(&COND_queue_state, &LOCK_event_queue, msg);
- DBUG_PRINT("info", ("mysql_cond_%swait", abstime? "timed":""));
- if (!abstime)
- mysql_cond_wait(&COND_queue_state, &LOCK_event_queue);
- else
- mysql_cond_timedwait(&COND_queue_state, &LOCK_event_queue, abstime);
+ if (!thd->killed)
+ {
+ if (!abstime)
+ mysql_cond_wait(&COND_queue_state, &LOCK_event_queue);
+ else
+ mysql_cond_timedwait(&COND_queue_state, &LOCK_event_queue, abstime);
+ }
mutex_last_locked_in_func= func;
mutex_last_locked_at_line= line;
diff --git a/sql/lock.cc b/sql/lock.cc
index 0181a544824..84cc24b3f61 100644
--- a/sql/lock.cc
+++ b/sql/lock.cc
@@ -989,6 +989,14 @@ bool Global_read_lock::lock_global_read_lock(THD *thd)
const char *new_message= "Waiting to get readlock";
(void) mysql_mutex_lock(&LOCK_global_read_lock);
+ old_message= thd->enter_cond(&COND_global_read_lock,
+ &LOCK_global_read_lock, new_message);
+ DBUG_PRINT("info",
+ ("waiting_for: %d protect_against: %d",
+ waiting_for_read_lock, protect_against_global_read_lock));
+
+ waiting_for_read_lock++;
+
#if defined(ENABLED_DEBUG_SYNC)
/*
The below sync point fires if we have to wait for
@@ -997,27 +1005,18 @@ bool Global_read_lock::lock_global_read_lock(THD *thd)
WARNING: Beware to use WAIT_FOR with this sync point. We hold
LOCK_global_read_lock here.
- Call the sync point before calling enter_cond() as it does use
- enter_cond() and exit_cond() itself if a WAIT_FOR action is
- executed in spite of the above warning.
+ The sync point is after enter_cond() so that proc_info is
+ available immediately after the sync point sends a SIGNAL. This
+ can make tests more reliable.
- Pre-set proc_info so that it is available immediately after the
- sync point sends a SIGNAL. This makes tests more reliable.
+ The sync point is before the loop so that it is executed only once.
*/
- if (protect_against_global_read_lock)
+ if (protect_against_global_read_lock && !thd->killed)
{
- thd_proc_info(thd, new_message);
DEBUG_SYNC(thd, "wait_lock_global_read_lock");
}
#endif /* defined(ENABLED_DEBUG_SYNC) */
- old_message=thd->enter_cond(&COND_global_read_lock, &LOCK_global_read_lock,
- new_message);
- DBUG_PRINT("info",
- ("waiting_for: %d protect_against: %d",
- waiting_for_read_lock, protect_against_global_read_lock));
-
- waiting_for_read_lock++;
while (protect_against_global_read_lock && !thd->killed)
mysql_cond_wait(&COND_global_read_lock, &LOCK_global_read_lock);
waiting_for_read_lock--;
diff --git a/sql/slave.cc b/sql/slave.cc
index ab8952069fb..a6313f0b850 100644
--- a/sql/slave.cc
+++ b/sql/slave.cc
@@ -721,9 +721,17 @@ int start_slave_thread(
while (start_id == *slave_run_id)
{
DBUG_PRINT("sleep",("Waiting for slave thread to start"));
- const char* old_msg = thd->enter_cond(start_cond,cond_lock,
- "Waiting for slave thread to start");
- mysql_cond_wait(start_cond, cond_lock);
+ const char *old_msg= thd->enter_cond(start_cond, cond_lock,
+ "Waiting for slave thread to start");
+ /*
+ It is not sufficient to test this at loop bottom. We must test
+ it after registering the mutex in enter_cond(). If the kill
+ happens after testing of thd->killed and before the mutex is
+ registered, we could otherwise go waiting though thd->killed is
+ set.
+ */
+ if (!thd->killed)
+ mysql_cond_wait(start_cond, cond_lock);
thd->exit_cond(old_msg);
mysql_mutex_lock(cond_lock); // re-acquire it as exit_cond() released
if (thd->killed)
diff --git a/sql/sql_class.cc b/sql/sql_class.cc
index 15fbc6a1480..da61c67f1c8 100644
--- a/sql/sql_class.cc
+++ b/sql/sql_class.cc
@@ -1179,36 +1179,70 @@ void add_diff_to_status(STATUS_VAR *to_var, STATUS_VAR *from_var,
}
+/**
+ Awake a thread.
+
+ @param[in] state_to_set value for THD::killed
+
+ This is normally called from another thread's THD object.
+
+ @note Do always call this while holding LOCK_thd_data.
+*/
+
void THD::awake(THD::killed_state state_to_set)
{
DBUG_ENTER("THD::awake");
- DBUG_PRINT("enter", ("this: 0x%lx", (long) this));
+ DBUG_PRINT("enter", ("this: %p current_thd: %p", this, current_thd));
THD_CHECK_SENTRY(this);
mysql_mutex_assert_owner(&LOCK_thd_data);
+ /* Set the 'killed' flag of 'this', which is the target THD object. */
killed= state_to_set;
+
if (state_to_set != THD::KILL_QUERY)
{
- thr_alarm_kill(thread_id);
- if (!slave_thread)
- MYSQL_CALLBACK(thread_scheduler, post_kill_notification, (this));
#ifdef SIGNAL_WITH_VIO_CLOSE
if (this != current_thd)
{
/*
- In addition to a signal, let's close the socket of the thread that
- is being killed. This is to make sure it does not block if the
- signal is lost. This needs to be done only on platforms where
- signals are not a reliable interruption mechanism.
-
- If we're killing ourselves, we know that we're not blocked, so this
- hack is not used.
+ Before sending a signal, let's close the socket of the thread
+ that is being killed ("this", which is not the current thread).
+ This is to make sure it does not block if the signal is lost.
+ This needs to be done only on platforms where signals are not
+ a reliable interruption mechanism.
+
+ Note that the downside of this mechanism is that we could close
+ the connection while "this" target thread is in the middle of
+ sending a result to the application, thus violating the client-
+ server protocol.
+
+ On the other hand, without closing the socket we have a race
+ condition. If "this" target thread passes the check of
+ thd->killed, and then the current thread runs through
+ THD::awake(), sets the 'killed' flag and completes the
+ signaling, and then the target thread runs into read(), it will
+ block on the socket. As a result of the discussions around
+ Bug#37780, it has been decided that we accept the race
+ condition. A second KILL awakes the target from read().
+
+ If we are killing ourselves, we know that we are not blocked.
+ We also know that we will check thd->killed before we go for
+ reading the next statement.
*/
close_active_vio();
}
-#endif
+#endif
+
+ /* Mark the target thread's alarm request expired, and signal alarm. */
+ thr_alarm_kill(thread_id);
+
+ /* Send an event to the scheduler that a thread should be killed. */
+ if (!slave_thread)
+ MYSQL_CALLBACK(thread_scheduler, post_kill_notification, (this));
}
+
+ /* Broadcast a condition to kick the target if it is waiting on it. */
if (mysys_var)
{
mysql_mutex_lock(&mysys_var->mutex);
@@ -1232,6 +1266,11 @@ void THD::awake(THD::killed_state state_to_set)
we issue a second KILL or the status it's waiting for happens).
It's true that we have set its thd->killed but it may not
see it immediately and so may have time to reach the cond_wait().
+
+ However, where possible, we test for killed once again after
+ enter_cond(). This should make the signaling as safe as possible.
+ However, there is still a small chance of failure on platforms with
+ instruction or memory write reordering.
*/
if (mysys_var->current_cond && mysys_var->current_mutex)
{
diff --git a/sql/sql_class.h b/sql/sql_class.h
index aa0f6cf1aa3..8b0399037ea 100644
--- a/sql/sql_class.h
+++ b/sql/sql_class.h
@@ -1953,6 +1953,12 @@ public:
DYNAMIC_ARRAY user_var_events; /* For user variables replication */
MEM_ROOT *user_var_events_alloc; /* Allocate above array elements here */
+ /*
+ If checking this in conjunction with a wait condition, please
+ include a check after enter_cond() if you want to avoid a race
+ condition. For details see the implementation of awake(),
+ especially the "broadcast" part.
+ */
enum killed_state
{
NOT_KILLED=0,
diff --git a/sql/sql_parse.cc b/sql/sql_parse.cc
index 4ed22e3a355..8a7f42b462d 100644
--- a/sql/sql_parse.cc
+++ b/sql/sql_parse.cc
@@ -712,6 +712,22 @@ bool do_command(THD *thd)
net_new_transaction(net);
+ /*
+ Synchronization point for testing of KILL_CONNECTION.
+ This sync point can wait here, to simulate slow code execution
+ between the last test of thd->killed and blocking in read().
+
+ The goal of this test is to verify that a connection does not
+ hang, if it is killed at this point of execution.
+ (Bug#37780 - main.kill fails randomly)
+
+ Note that the sync point wait itself will be terminated by a
+ kill. In this case it consumes a condition broadcast, but does
+ not change anything else. The consumed broadcast should not
+ matter here, because the read/recv() below doesn't use it.
+ */
+ DEBUG_SYNC(thd, "before_do_command_net_read");
+
if ((packet_length= my_net_read(net)) == packet_error)
{
DBUG_PRINT("info",("Got error %d reading command from socket %s",
diff --git a/sql/sql_select.cc b/sql/sql_select.cc
index 2a2fe3eb36f..d9fefe35bd8 100644
--- a/sql/sql_select.cc
+++ b/sql/sql_select.cc
@@ -47,6 +47,7 @@
#include "records.h" // init_read_record, end_read_record
#include "filesort.h" // filesort_free_buffers
#include "sql_union.h" // mysql_union
+#include "debug_sync.h" // DEBUG_SYNC
#include <m_ctype.h>
#include <my_bit.h>
#include <hash.h>
@@ -852,6 +853,7 @@ JOIN::optimize()
if (optimized)
DBUG_RETURN(0);
optimized= 1;
+ DEBUG_SYNC(thd, "before_join_optimize");
thd_proc_info(thd, "optimizing");
row_limit= ((select_distinct || order || group_list) ? HA_POS_ERROR :