summaryrefslogtreecommitdiff
path: root/sql/slave.cc
diff options
context:
space:
mode:
Diffstat (limited to 'sql/slave.cc')
-rw-r--r--sql/slave.cc194
1 files changed, 133 insertions, 61 deletions
diff --git a/sql/slave.cc b/sql/slave.cc
index ec693964726..1d99d2b606b 100644
--- a/sql/slave.cc
+++ b/sql/slave.cc
@@ -1,4 +1,4 @@
-/* Copyright (C) 2000-2003 MySQL AB, 2008-2009 Sun Microsystems, Inc
+/* Copyright (C) 2000, 2011, Oracle and/or its affiliates. All rights reserved.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -113,7 +113,7 @@ static const char *reconnect_messages[SLAVE_RECON_ACT_MAX][SLAVE_RECON_MSG_MAX]=
registration on master",
"Reconnecting after a failed registration on master",
"failed registering on master, reconnecting to try again, \
-log '%s' at postion %s",
+log '%s' at position %s",
"COM_REGISTER_SLAVE",
"Slave I/O thread killed during or after reconnect"
},
@@ -121,7 +121,7 @@ log '%s' at postion %s",
"Waiting to reconnect after a failed binlog dump request",
"Slave I/O thread killed while retrying master dump",
"Reconnecting after a failed binlog dump request",
- "failed dump request, reconnecting to try again, log '%s' at postion %s",
+ "failed dump request, reconnecting to try again, log '%s' at position %s",
"COM_BINLOG_DUMP",
"Slave I/O thread killed during or after reconnect"
},
@@ -130,7 +130,7 @@ log '%s' at postion %s",
"Slave I/O thread killed while waiting to reconnect after a failed read",
"Reconnecting after a failed master event read",
"Slave I/O thread: Failed reading log event, reconnecting to retry, \
-log '%s' at postion %s",
+log '%s' at position %s",
"",
"Slave I/O thread killed during or after a reconnect done to recover from \
failed read"
@@ -504,53 +504,54 @@ int terminate_slave_threads(Master_info* mi,int thread_mask,bool skip_lock)
mysql_mutex_t *sql_lock = &mi->rli.run_lock, *io_lock = &mi->run_lock;
mysql_mutex_t *log_lock= mi->rli.relay_log.get_log_lock();
- if (thread_mask & (SLAVE_IO|SLAVE_FORCE_ALL))
+ if (thread_mask & (SLAVE_SQL|SLAVE_FORCE_ALL))
{
- DBUG_PRINT("info",("Terminating IO thread"));
- mi->abort_slave=1;
- if ((error=terminate_slave_thread(mi->io_thd, io_lock,
- &mi->stop_cond,
- &mi->slave_running,
+ DBUG_PRINT("info",("Terminating SQL thread"));
+ mi->rli.abort_slave=1;
+ if ((error=terminate_slave_thread(mi->rli.sql_thd, sql_lock,
+ &mi->rli.stop_cond,
+ &mi->rli.slave_running,
skip_lock)) &&
!force_all)
DBUG_RETURN(error);
mysql_mutex_lock(log_lock);
- DBUG_PRINT("info",("Flushing relay log and master info file."));
+ DBUG_PRINT("info",("Flushing relay-log info file."));
if (current_thd)
- thd_proc_info(current_thd, "Flushing relay log and master info files.");
- if (flush_master_info(mi, TRUE, FALSE))
- DBUG_RETURN(ER_ERROR_DURING_FLUSH_LOGS);
-
- if (my_sync(mi->rli.relay_log.get_log_file()->file, MYF(MY_WME)))
+ thd_proc_info(current_thd, "Flushing relay-log info file.");
+ if (flush_relay_log_info(&mi->rli))
DBUG_RETURN(ER_ERROR_DURING_FLUSH_LOGS);
-
- if (my_sync(mi->fd, MYF(MY_WME)))
+
+ if (my_sync(mi->rli.info_fd, MYF(MY_WME)))
DBUG_RETURN(ER_ERROR_DURING_FLUSH_LOGS);
mysql_mutex_unlock(log_lock);
}
- if (thread_mask & (SLAVE_SQL|SLAVE_FORCE_ALL))
+ if (thread_mask & (SLAVE_IO|SLAVE_FORCE_ALL))
{
- DBUG_PRINT("info",("Terminating SQL thread"));
- mi->rli.abort_slave=1;
- if ((error=terminate_slave_thread(mi->rli.sql_thd, sql_lock,
- &mi->rli.stop_cond,
- &mi->rli.slave_running,
+ DBUG_PRINT("info",("Terminating IO thread"));
+ mi->abort_slave=1;
+ if ((error=terminate_slave_thread(mi->io_thd, io_lock,
+ &mi->stop_cond,
+ &mi->slave_running,
skip_lock)) &&
!force_all)
DBUG_RETURN(error);
mysql_mutex_lock(log_lock);
- DBUG_PRINT("info",("Flushing relay-log info file."));
+ DBUG_PRINT("info",("Flushing relay log and master info file."));
if (current_thd)
- thd_proc_info(current_thd, "Flushing relay-log info file.");
- if (flush_relay_log_info(&mi->rli))
+ thd_proc_info(current_thd, "Flushing relay log and master info files.");
+ if (flush_master_info(mi, TRUE, FALSE))
DBUG_RETURN(ER_ERROR_DURING_FLUSH_LOGS);
-
- if (my_sync(mi->rli.info_fd, MYF(MY_WME)))
+
+ if (mi->rli.relay_log.is_open() &&
+ my_sync(mi->rli.relay_log.get_log_file()->file, MYF(MY_WME)))
+ DBUG_RETURN(ER_ERROR_DURING_FLUSH_LOGS);
+
+ if (my_sync(mi->fd, MYF(MY_WME)))
DBUG_RETURN(ER_ERROR_DURING_FLUSH_LOGS);
mysql_mutex_unlock(log_lock);
@@ -721,9 +722,17 @@ int start_slave_thread(
while (start_id == *slave_run_id)
{
DBUG_PRINT("sleep",("Waiting for slave thread to start"));
- const char* old_msg = thd->enter_cond(start_cond,cond_lock,
- "Waiting for slave thread to start");
- mysql_cond_wait(start_cond, cond_lock);
+ const char *old_msg= thd->enter_cond(start_cond, cond_lock,
+ "Waiting for slave thread to start");
+ /*
+ It is not sufficient to test this at loop bottom. We must test
+ it after registering the mutex in enter_cond(). If the kill
+ happens after testing of thd->killed and before the mutex is
+ registered, we could otherwise go waiting though thd->killed is
+ set.
+ */
+ if (!thd->killed)
+ mysql_cond_wait(start_cond, cond_lock);
thd->exit_cond(old_msg);
mysql_mutex_lock(cond_lock); // re-acquire it as exit_cond() released
if (thd->killed)
@@ -881,21 +890,31 @@ static bool sql_slave_killed(THD* thd, Relay_log_info* rli)
DBUG_ASSERT(rli->slave_running == 1);// tracking buffer overrun
if (abort_loop || thd->killed || rli->abort_slave)
{
- if (thd->transaction.all.modified_non_trans_table && rli->is_in_group())
+ /*
+ The transaction should always be binlogged if OPTION_KEEP_LOG is set
+ (it implies that something can not be rolled back). And such case
+ should be regarded similarly as modifing a non-transactional table
+ because retrying of the transaction will lead to an error or inconsistency
+ as well.
+ Example: OPTION_KEEP_LOG is set if a temporary table is created or dropped.
+ */
+ if ((thd->transaction.all.modified_non_trans_table ||
+ (thd->variables.option_bits & OPTION_KEEP_LOG))
+ && rli->is_in_group())
{
char msg_stopped[]=
- "... The slave SQL is stopped, leaving the current group "
- "of events unfinished with a non-transaction table changed. "
- "If the group consists solely of Row-based events, you can try "
- "restarting the slave with --slave-exec-mode=IDEMPOTENT, which "
+ "... Slave SQL Thread stopped with incomplete event group "
+ "having non-transactional changes. "
+ "If the group consists solely of row-based events, you can try "
+ "to restart the slave with --slave-exec-mode=IDEMPOTENT, which "
"ignores duplicate key, key not found, and similar errors (see "
"documentation for details).";
if (rli->abort_slave)
{
- DBUG_PRINT("info", ("Slave SQL thread is being stopped in the middle of"
- " a group having updated a non-trans table, giving"
- " it some grace period"));
+ DBUG_PRINT("info", ("Request to stop slave SQL Thread received while "
+ "applying a group that has non-transactional "
+ "changes; waiting for completion of the group ... "));
/*
Slave sql thread shutdown in face of unfinished group modified
@@ -919,9 +938,9 @@ static bool sql_slave_killed(THD* thd, Relay_log_info* rli)
if (ret == 0)
{
rli->report(WARNING_LEVEL, 0,
- "slave SQL thread is being stopped in the middle "
- "of applying of a group having updated a non-transaction "
- "table; waiting for the group completion ... ");
+ "Request to stop slave SQL Thread received while "
+ "applying a group that has non-transactional "
+ "changes; waiting for completion of the group ... ");
}
else
{
@@ -2512,8 +2531,7 @@ static int exec_relay_log_event(THD* thd, Relay_log_info* rli)
if (slave_trans_retries)
{
- int temp_err;
- LINT_INIT(temp_err);
+ int UNINIT_VAR(temp_err);
if (exec_res && (temp_err= has_temporary_error(thd)))
{
const char *errmsg;
@@ -2544,9 +2562,7 @@ static int exec_relay_log_event(THD* thd, Relay_log_info* rli)
else
{
exec_res= 0;
- trans_rollback(thd);
- close_thread_tables(thd);
- thd->mdl_context.release_transactional_locks();
+ rli->cleanup_context(thd, 1);
/* chance for concurrent connection to get more locks */
safe_sleep(thd, min(rli->trans_retries, MAX_SLAVE_RETRY_PAUSE),
(CHECK_KILLED_FUNC)sql_slave_killed, (void*)rli);
@@ -3013,7 +3029,7 @@ err:
sql_print_information("Slave I/O thread exiting, read up to log '%s', position %s",
IO_RPL_LOG_NAME, llstr(mi->master_log_pos,llbuff));
RUN_HOOK(binlog_relay_io, thread_stop, (thd, mi));
- thd->set_query(NULL, 0);
+ thd->reset_query();
thd->reset_db(NULL, 0);
if (mysql)
{
@@ -3396,6 +3412,7 @@ the slave SQL thread with \"SLAVE START\". We stopped at log \
request is detected only by the present function, not by events), so we
must "proactively" clear playgrounds:
*/
+ thd->clear_error();
rli->cleanup_context(thd, 1);
/*
Some extra safety, which should not been needed (normally, event deletion
@@ -3403,7 +3420,7 @@ the slave SQL thread with \"SLAVE START\". We stopped at log \
variables is supposed to set them to 0 before terminating)).
*/
thd->catalog= 0;
- thd->set_query(NULL, 0);
+ thd->reset_query();
thd->reset_db(NULL, 0);
thd_proc_info(thd, "Waiting for slave mutex on exit");
mysql_mutex_lock(&rli->run_lock);
@@ -3626,8 +3643,7 @@ static int process_io_rotate(Master_info *mi, Rotate_log_event *rev)
Rotate the relay log makes binlog format detection easier (at next slave
start or mysqlbinlog)
*/
- rotate_relay_log(mi); /* will take the right mutexes */
- DBUG_RETURN(0);
+ DBUG_RETURN(rotate_relay_log(mi) /* will take the right mutexes */);
}
/*
@@ -4739,12 +4755,66 @@ static Log_event* next_event(Relay_log_info* rli)
DBUG_ASSERT(rli->cur_log_fd == -1);
/*
- Read pointer has to be at the start since we are the only
- reader.
- We must keep the LOCK_log to read the 4 first bytes, as this is a hot
- log (same as when we call read_log_event() above: for a hot log we
- take the mutex).
+ When the SQL thread is [stopped and] (re)started the
+ following may happen:
+
+ 1. Log was hot at stop time and remains hot at restart
+
+ SQL thread reads again from hot_log (SQL thread was
+ reading from the active log when it was stopped and the
+ very same log is still active on SQL thread restart).
+
+ In this case, my_b_seek is performed on cur_log, while
+ cur_log points to relay_log.get_log_file();
+
+ 2. Log was hot at stop time but got cold before restart
+
+ The log was hot when SQL thread stopped, but it is not
+ anymore when the SQL thread restarts.
+
+ In this case, the SQL thread reopens the log, using
+ cache_buf, ie, cur_log points to &cache_buf, and thence
+ its coordinates are reset.
+
+ 3. Log was already cold at stop time
+
+ The log was not hot when the SQL thread stopped, and, of
+ course, it will not be hot when it restarts.
+
+ In this case, the SQL thread opens the cold log again,
+ using cache_buf, ie, cur_log points to &cache_buf, and
+ thence its coordinates are reset.
+
+ 4. Log was hot at stop time, DBA changes to previous cold
+ log and restarts SQL thread
+
+ The log was hot when the SQL thread was stopped, but the
+ user changed the coordinates of the SQL thread to
+ restart from a previous cold log.
+
+ In this case, at start time, cur_log points to a cold
+ log, opened using &cache_buf as cache, and coordinates
+ are reset. However, as it moves on to the next logs, it
+ will eventually reach the hot log. If the hot log is the
+ same at the time the SQL thread was stopped, then
+ coordinates were not reset - the cur_log will point to
+ relay_log.get_log_file(), and not a freshly opened
+ IO_CACHE through cache_buf. For this reason we need to
+ deploy a my_b_seek before calling check_binlog_magic at
+ this point of the code (see: BUG#55263 for more
+ details).
+
+ NOTES:
+ - We must keep the LOCK_log to read the 4 first bytes, as
+ this is a hot log (same as when we call read_log_event()
+ above: for a hot log we take the mutex).
+
+ - Because of scenario #4 above, we need to have a
+ my_b_seek here. Otherwise, we might hit the assertion
+ inside check_binlog_magic.
*/
+
+ my_b_seek(cur_log, (my_off_t) 0);
if (check_binlog_magic(cur_log,&errmsg))
{
if (!hot_log)
@@ -4811,12 +4881,13 @@ err:
is void).
*/
-void rotate_relay_log(Master_info* mi)
+int rotate_relay_log(Master_info* mi)
{
DBUG_ENTER("rotate_relay_log");
Relay_log_info* rli= &mi->rli;
+ int error= 0;
- DBUG_EXECUTE_IF("crash_before_rotate_relaylog", abort(););
+ DBUG_EXECUTE_IF("crash_before_rotate_relaylog", DBUG_SUICIDE(););
/*
We need to test inited because otherwise, new_file() will attempt to lock
@@ -4829,7 +4900,8 @@ void rotate_relay_log(Master_info* mi)
}
/* If the relay log is closed, new_file() will do nothing. */
- rli->relay_log.new_file();
+ if ((error= rli->relay_log.new_file()))
+ goto end;
/*
We harvest now, because otherwise BIN_LOG_HEADER_SIZE will not immediately
@@ -4846,7 +4918,7 @@ void rotate_relay_log(Master_info* mi)
*/
rli->relay_log.harvest_bytes_written(&rli->log_space_total);
end:
- DBUG_VOID_RETURN;
+ DBUG_RETURN(error);
}