summaryrefslogtreecommitdiff
path: root/sql
diff options
context:
space:
mode:
authorunknown <guilhem@mysql.com>2003-03-17 22:51:56 +0100
committerunknown <guilhem@mysql.com>2003-03-17 22:51:56 +0100
commit40c0b2c6c8b730dec9900c3829b7babf0a5b4772 (patch)
tree5d9a0eaa1fe759e9fccf20738544fe2eab9bbbe8 /sql
parent2103479670e60d1cce4166a4cd67b704bf4743da (diff)
downloadmariadb-git-40c0b2c6c8b730dec9900c3829b7babf0a5b4772.tar.gz
New variable rli->ignore_log_space_limit to resolve
a deadlock between I/O and SQL threads in replication when relay_log_space is too small. This fixes bug #79. sql/log.cc: New variable rli->ignore_log_space_limit to resolve a deadlock between I/O and SQL threads in replication when relay_log_space is too small. sql/slave.cc: New variable rli->ignore_log_space_limit to resolve a deadlock between I/O and SQL threads in replication when relay_log_space is too small. sql/slave.h: New variable rli->ignore_log_space_limit to resolve a deadlock between I/O and SQL threads in replication when relay_log_space is too small. sql/sql_repl.cc: New variable rli->ignore_log_space_limit to resolve a deadlock between I/O and SQL threads in replication when relay_log_space is too small.
Diffstat (limited to 'sql')
-rw-r--r--sql/log.cc2
-rw-r--r--sql/slave.cc60
-rw-r--r--sql/slave.h7
-rw-r--r--sql/sql_repl.cc8
4 files changed, 64 insertions, 13 deletions
diff --git a/sql/log.cc b/sql/log.cc
index 27864e19c03..9befcaefb01 100644
--- a/sql/log.cc
+++ b/sql/log.cc
@@ -645,6 +645,8 @@ int MYSQL_LOG::purge_first_log(struct st_relay_log_info* rli)
*/
pthread_mutex_lock(&rli->log_space_lock);
rli->log_space_total -= rli->relay_log_pos;
+ //tell the I/O thread to take the relay_log_space_limit into account
+ rli->ignore_log_space_limit= 0;
pthread_mutex_unlock(&rli->log_space_lock);
pthread_cond_broadcast(&rli->log_space_cond);
diff --git a/sql/slave.cc b/sql/slave.cc
index 5ddea7501e4..771317f9431 100644
--- a/sql/slave.cc
+++ b/sql/slave.cc
@@ -238,7 +238,7 @@ int init_relay_log_pos(RELAY_LOG_INFO* rli,const char* log,
if (log) // If not first log
{
if (strcmp(log, rli->linfo.log_file_name))
- rli->skip_log_purge=1; // Different name; Don't purge
+ rli->skip_log_purge= 1; // Different name; Don't purge
if (rli->relay_log.find_log_pos(&rli->linfo, log, 1))
{
*errmsg="Could not find target log during relay log initialization";
@@ -273,6 +273,12 @@ int init_relay_log_pos(RELAY_LOG_INFO* rli,const char* log,
my_b_seek(rli->cur_log,(off_t)pos);
err:
+ /*
+ If we don't purge, we can't honour relay_log_space_limit ;
+ silently discard it
+ */
+ if (rli->skip_log_purge)
+ rli->log_space_limit= 0;
pthread_cond_broadcast(&rli->data_cond);
if (need_data_lock)
pthread_mutex_unlock(&rli->data_lock);
@@ -1312,7 +1318,8 @@ static bool wait_for_relay_log_space(RELAY_LOG_INFO* rli)
save_proc_info = thd->proc_info;
thd->proc_info = "Waiting for relay log space to free";
while (rli->log_space_limit < rli->log_space_total &&
- !(slave_killed=io_slave_killed(thd,mi)))
+ !(slave_killed=io_slave_killed(thd,mi)) &&
+ !rli->ignore_log_space_limit)
{
pthread_cond_wait(&rli->log_space_cond, &rli->log_space_lock);
}
@@ -1588,7 +1595,7 @@ bool flush_master_info(MASTER_INFO* mi)
st_relay_log_info::st_relay_log_info()
:info_fd(-1), cur_log_fd(-1), master_log_pos(0), save_temporary_tables(0),
- cur_log_old_open_count(0), log_space_total(0),
+ cur_log_old_open_count(0), log_space_total(0), ignore_log_space_limit(0),
slave_skip_counter(0), abort_pos_wait(0), slave_run_id(0),
sql_thd(0), last_slave_errno(0), inited(0), abort_slave(0),
slave_running(0), skip_log_purge(0),
@@ -2296,7 +2303,8 @@ reconnect done to recover from failed read");
}
flush_master_info(mi);
if (mi->rli.log_space_limit && mi->rli.log_space_limit <
- mi->rli.log_space_total)
+ mi->rli.log_space_total &&
+ !mi->rli.ignore_log_space_limit)
if (wait_for_relay_log_space(&mi->rli))
{
sql_print_error("Slave I/O thread aborted while waiting for relay \
@@ -2408,6 +2416,10 @@ slave_begin:
pthread_cond_broadcast(&rli->start_cond);
// This should always be set to 0 when the slave thread is started
rli->pending = 0;
+
+ //tell the I/O thread to take relay_log_space_limit into account from now on
+ rli->ignore_log_space_limit= 0;
+
if (init_relay_log_pos(rli,
rli->relay_log_name,
rli->relay_log_pos,
@@ -3086,11 +3098,41 @@ Log_event* next_event(RELAY_LOG_INFO* rli)
update. If we do not, show slave status will block
*/
pthread_mutex_unlock(&rli->data_lock);
- /* Note that wait_for_update unlocks lock_log ! */
- rli->relay_log.wait_for_update(rli->sql_thd);
-
- // re-acquire data lock since we released it earlier
- pthread_mutex_lock(&rli->data_lock);
+
+ /*
+ Possible deadlock :
+ - the I/O thread has reached log_space_limit
+ - the SQL thread has read all relay logs, but cannot purge for some
+ reason:
+ * it has already purged all logs except the current one
+ * there are other logs than the current one but they're involved in
+ a transaction that finishes in the current one (or is not finished)
+ Solution :
+ Wake up the possibly waiting I/O thread, and set a boolean asking
+ the I/O thread to temporarily ignore the log_space_limit
+ constraint, because we do not want the I/O thread to block because of
+ space (it's ok if it blocks for any other reason (e.g. because the
+ master does not send anything). Then the I/O thread stops waiting
+ and reads more events.
+ The SQL thread decides when the I/O thread should take log_space_limit
+ into account again : ignore_log_space_limit is reset to 0
+ in purge_first_log (when the SQL thread purges the just-read relay
+ log), and also when the SQL thread starts. We should also reset
+ ignore_log_space_limit to 0 when the user does RESET SLAVE, but in
+ fact, no need as RESET SLAVE requires that the slave
+ be stopped, and when the SQL thread is later restarted
+ ignore_log_space_limit will be reset to 0.
+ */
+ pthread_mutex_lock(&rli->log_space_lock);
+ // prevent the I/O thread from blocking next times
+ rli->ignore_log_space_limit= 1;
+ // If the I/O thread is blocked, unblock it
+ pthread_cond_broadcast(&rli->log_space_cond);
+ pthread_mutex_unlock(&rli->log_space_lock);
+ // Note that wait_for_update unlocks lock_log !
+ rli->relay_log.wait_for_update(rli->sql_thd);
+ // re-acquire data lock since we released it earlier
+ pthread_mutex_lock(&rli->data_lock);
continue;
}
/*
diff --git a/sql/slave.h b/sql/slave.h
index fe0f0b045f3..8832302056d 100644
--- a/sql/slave.h
+++ b/sql/slave.h
@@ -137,7 +137,14 @@ typedef struct st_relay_log_info
offset. pending stored the extra offset to be added to the position.
*/
ulonglong relay_log_pos, pending;
+
+ /*
+ Handling of the relay_log_space_limit optional constraint.
+ ignore_log_space_limit is used to resolve a deadlock between I/O and SQL
+ threads, it makes the I/O thread temporarily forget about the constraint
+ */
ulonglong log_space_limit,log_space_total;
+ bool ignore_log_space_limit;
/*
InnoDB internally stores the master log position it has processed
diff --git a/sql/sql_repl.cc b/sql/sql_repl.cc
index 5e90bbf1b0f..d670c673b4a 100644
--- a/sql/sql_repl.cc
+++ b/sql/sql_repl.cc
@@ -858,22 +858,21 @@ int change_master(THD* thd, MASTER_INFO* mi)
if (lex_mi->relay_log_name)
{
- need_relay_log_purge = 0;
- mi->rli.skip_log_purge=1;
+ need_relay_log_purge= 0;
strmake(mi->rli.relay_log_name,lex_mi->relay_log_name,
sizeof(mi->rli.relay_log_name)-1);
}
if (lex_mi->relay_log_pos)
{
- need_relay_log_purge=0;
+ need_relay_log_purge= 0;
mi->rli.relay_log_pos=lex_mi->relay_log_pos;
}
flush_master_info(mi);
if (need_relay_log_purge)
{
- mi->rli.skip_log_purge=0;
+ mi->rli.skip_log_purge= 0;
thd->proc_info="purging old relay logs";
if (purge_relay_logs(&mi->rli, thd,
0 /* not only reset, but also reinit */,
@@ -887,6 +886,7 @@ int change_master(THD* thd, MASTER_INFO* mi)
else
{
const char* msg;
+ mi->rli.skip_log_purge= 1;
/* Relay log is already initialized */
if (init_relay_log_pos(&mi->rli,
mi->rli.relay_log_name,