diff options
Diffstat (limited to 'sql/slave.cc')
-rw-r--r-- | sql/slave.cc | 194 |
1 files changed, 133 insertions, 61 deletions
diff --git a/sql/slave.cc b/sql/slave.cc index ec693964726..1d99d2b606b 100644 --- a/sql/slave.cc +++ b/sql/slave.cc @@ -1,4 +1,4 @@ -/* Copyright (C) 2000-2003 MySQL AB, 2008-2009 Sun Microsystems, Inc +/* Copyright (C) 2000, 2011, Oracle and/or its affiliates. All rights reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -113,7 +113,7 @@ static const char *reconnect_messages[SLAVE_RECON_ACT_MAX][SLAVE_RECON_MSG_MAX]= registration on master", "Reconnecting after a failed registration on master", "failed registering on master, reconnecting to try again, \ -log '%s' at postion %s", +log '%s' at position %s", "COM_REGISTER_SLAVE", "Slave I/O thread killed during or after reconnect" }, @@ -121,7 +121,7 @@ log '%s' at postion %s", "Waiting to reconnect after a failed binlog dump request", "Slave I/O thread killed while retrying master dump", "Reconnecting after a failed binlog dump request", - "failed dump request, reconnecting to try again, log '%s' at postion %s", + "failed dump request, reconnecting to try again, log '%s' at position %s", "COM_BINLOG_DUMP", "Slave I/O thread killed during or after reconnect" }, @@ -130,7 +130,7 @@ log '%s' at postion %s", "Slave I/O thread killed while waiting to reconnect after a failed read", "Reconnecting after a failed master event read", "Slave I/O thread: Failed reading log event, reconnecting to retry, \ -log '%s' at postion %s", +log '%s' at position %s", "", "Slave I/O thread killed during or after a reconnect done to recover from \ failed read" @@ -504,53 +504,54 @@ int terminate_slave_threads(Master_info* mi,int thread_mask,bool skip_lock) mysql_mutex_t *sql_lock = &mi->rli.run_lock, *io_lock = &mi->run_lock; mysql_mutex_t *log_lock= mi->rli.relay_log.get_log_lock(); - if (thread_mask & (SLAVE_IO|SLAVE_FORCE_ALL)) + if (thread_mask & (SLAVE_SQL|SLAVE_FORCE_ALL)) { - DBUG_PRINT("info",("Terminating IO thread")); - mi->abort_slave=1; - if ((error=terminate_slave_thread(mi->io_thd, io_lock, - &mi->stop_cond, - &mi->slave_running, + DBUG_PRINT("info",("Terminating SQL thread")); + mi->rli.abort_slave=1; + if ((error=terminate_slave_thread(mi->rli.sql_thd, sql_lock, + &mi->rli.stop_cond, + &mi->rli.slave_running, skip_lock)) && !force_all) DBUG_RETURN(error); mysql_mutex_lock(log_lock); - DBUG_PRINT("info",("Flushing relay log and master info file.")); + DBUG_PRINT("info",("Flushing relay-log info file.")); if (current_thd) - thd_proc_info(current_thd, "Flushing relay log and master info files."); - if (flush_master_info(mi, TRUE, FALSE)) - DBUG_RETURN(ER_ERROR_DURING_FLUSH_LOGS); - - if (my_sync(mi->rli.relay_log.get_log_file()->file, MYF(MY_WME))) + thd_proc_info(current_thd, "Flushing relay-log info file."); + if (flush_relay_log_info(&mi->rli)) DBUG_RETURN(ER_ERROR_DURING_FLUSH_LOGS); - - if (my_sync(mi->fd, MYF(MY_WME))) + + if (my_sync(mi->rli.info_fd, MYF(MY_WME))) DBUG_RETURN(ER_ERROR_DURING_FLUSH_LOGS); mysql_mutex_unlock(log_lock); } - if (thread_mask & (SLAVE_SQL|SLAVE_FORCE_ALL)) + if (thread_mask & (SLAVE_IO|SLAVE_FORCE_ALL)) { - DBUG_PRINT("info",("Terminating SQL thread")); - mi->rli.abort_slave=1; - if ((error=terminate_slave_thread(mi->rli.sql_thd, sql_lock, - &mi->rli.stop_cond, - &mi->rli.slave_running, + DBUG_PRINT("info",("Terminating IO thread")); + mi->abort_slave=1; + if ((error=terminate_slave_thread(mi->io_thd, io_lock, + &mi->stop_cond, + &mi->slave_running, skip_lock)) && !force_all) DBUG_RETURN(error); mysql_mutex_lock(log_lock); - DBUG_PRINT("info",("Flushing relay-log info file.")); + DBUG_PRINT("info",("Flushing relay log and master info file.")); if (current_thd) - thd_proc_info(current_thd, "Flushing relay-log info file."); - if (flush_relay_log_info(&mi->rli)) + thd_proc_info(current_thd, "Flushing relay log and master info files."); + if (flush_master_info(mi, TRUE, FALSE)) DBUG_RETURN(ER_ERROR_DURING_FLUSH_LOGS); - - if (my_sync(mi->rli.info_fd, MYF(MY_WME))) + + if (mi->rli.relay_log.is_open() && + my_sync(mi->rli.relay_log.get_log_file()->file, MYF(MY_WME))) + DBUG_RETURN(ER_ERROR_DURING_FLUSH_LOGS); + + if (my_sync(mi->fd, MYF(MY_WME))) DBUG_RETURN(ER_ERROR_DURING_FLUSH_LOGS); mysql_mutex_unlock(log_lock); @@ -721,9 +722,17 @@ int start_slave_thread( while (start_id == *slave_run_id) { DBUG_PRINT("sleep",("Waiting for slave thread to start")); - const char* old_msg = thd->enter_cond(start_cond,cond_lock, - "Waiting for slave thread to start"); - mysql_cond_wait(start_cond, cond_lock); + const char *old_msg= thd->enter_cond(start_cond, cond_lock, + "Waiting for slave thread to start"); + /* + It is not sufficient to test this at loop bottom. We must test + it after registering the mutex in enter_cond(). If the kill + happens after testing of thd->killed and before the mutex is + registered, we could otherwise go waiting though thd->killed is + set. + */ + if (!thd->killed) + mysql_cond_wait(start_cond, cond_lock); thd->exit_cond(old_msg); mysql_mutex_lock(cond_lock); // re-acquire it as exit_cond() released if (thd->killed) @@ -881,21 +890,31 @@ static bool sql_slave_killed(THD* thd, Relay_log_info* rli) DBUG_ASSERT(rli->slave_running == 1);// tracking buffer overrun if (abort_loop || thd->killed || rli->abort_slave) { - if (thd->transaction.all.modified_non_trans_table && rli->is_in_group()) + /* + The transaction should always be binlogged if OPTION_KEEP_LOG is set + (it implies that something can not be rolled back). And such case + should be regarded similarly as modifing a non-transactional table + because retrying of the transaction will lead to an error or inconsistency + as well. + Example: OPTION_KEEP_LOG is set if a temporary table is created or dropped. + */ + if ((thd->transaction.all.modified_non_trans_table || + (thd->variables.option_bits & OPTION_KEEP_LOG)) + && rli->is_in_group()) { char msg_stopped[]= - "... The slave SQL is stopped, leaving the current group " - "of events unfinished with a non-transaction table changed. " - "If the group consists solely of Row-based events, you can try " - "restarting the slave with --slave-exec-mode=IDEMPOTENT, which " + "... Slave SQL Thread stopped with incomplete event group " + "having non-transactional changes. " + "If the group consists solely of row-based events, you can try " + "to restart the slave with --slave-exec-mode=IDEMPOTENT, which " "ignores duplicate key, key not found, and similar errors (see " "documentation for details)."; if (rli->abort_slave) { - DBUG_PRINT("info", ("Slave SQL thread is being stopped in the middle of" - " a group having updated a non-trans table, giving" - " it some grace period")); + DBUG_PRINT("info", ("Request to stop slave SQL Thread received while " + "applying a group that has non-transactional " + "changes; waiting for completion of the group ... ")); /* Slave sql thread shutdown in face of unfinished group modified @@ -919,9 +938,9 @@ static bool sql_slave_killed(THD* thd, Relay_log_info* rli) if (ret == 0) { rli->report(WARNING_LEVEL, 0, - "slave SQL thread is being stopped in the middle " - "of applying of a group having updated a non-transaction " - "table; waiting for the group completion ... "); + "Request to stop slave SQL Thread received while " + "applying a group that has non-transactional " + "changes; waiting for completion of the group ... "); } else { @@ -2512,8 +2531,7 @@ static int exec_relay_log_event(THD* thd, Relay_log_info* rli) if (slave_trans_retries) { - int temp_err; - LINT_INIT(temp_err); + int UNINIT_VAR(temp_err); if (exec_res && (temp_err= has_temporary_error(thd))) { const char *errmsg; @@ -2544,9 +2562,7 @@ static int exec_relay_log_event(THD* thd, Relay_log_info* rli) else { exec_res= 0; - trans_rollback(thd); - close_thread_tables(thd); - thd->mdl_context.release_transactional_locks(); + rli->cleanup_context(thd, 1); /* chance for concurrent connection to get more locks */ safe_sleep(thd, min(rli->trans_retries, MAX_SLAVE_RETRY_PAUSE), (CHECK_KILLED_FUNC)sql_slave_killed, (void*)rli); @@ -3013,7 +3029,7 @@ err: sql_print_information("Slave I/O thread exiting, read up to log '%s', position %s", IO_RPL_LOG_NAME, llstr(mi->master_log_pos,llbuff)); RUN_HOOK(binlog_relay_io, thread_stop, (thd, mi)); - thd->set_query(NULL, 0); + thd->reset_query(); thd->reset_db(NULL, 0); if (mysql) { @@ -3396,6 +3412,7 @@ the slave SQL thread with \"SLAVE START\". We stopped at log \ request is detected only by the present function, not by events), so we must "proactively" clear playgrounds: */ + thd->clear_error(); rli->cleanup_context(thd, 1); /* Some extra safety, which should not been needed (normally, event deletion @@ -3403,7 +3420,7 @@ the slave SQL thread with \"SLAVE START\". We stopped at log \ variables is supposed to set them to 0 before terminating)). */ thd->catalog= 0; - thd->set_query(NULL, 0); + thd->reset_query(); thd->reset_db(NULL, 0); thd_proc_info(thd, "Waiting for slave mutex on exit"); mysql_mutex_lock(&rli->run_lock); @@ -3626,8 +3643,7 @@ static int process_io_rotate(Master_info *mi, Rotate_log_event *rev) Rotate the relay log makes binlog format detection easier (at next slave start or mysqlbinlog) */ - rotate_relay_log(mi); /* will take the right mutexes */ - DBUG_RETURN(0); + DBUG_RETURN(rotate_relay_log(mi) /* will take the right mutexes */); } /* @@ -4739,12 +4755,66 @@ static Log_event* next_event(Relay_log_info* rli) DBUG_ASSERT(rli->cur_log_fd == -1); /* - Read pointer has to be at the start since we are the only - reader. - We must keep the LOCK_log to read the 4 first bytes, as this is a hot - log (same as when we call read_log_event() above: for a hot log we - take the mutex). + When the SQL thread is [stopped and] (re)started the + following may happen: + + 1. Log was hot at stop time and remains hot at restart + + SQL thread reads again from hot_log (SQL thread was + reading from the active log when it was stopped and the + very same log is still active on SQL thread restart). + + In this case, my_b_seek is performed on cur_log, while + cur_log points to relay_log.get_log_file(); + + 2. Log was hot at stop time but got cold before restart + + The log was hot when SQL thread stopped, but it is not + anymore when the SQL thread restarts. + + In this case, the SQL thread reopens the log, using + cache_buf, ie, cur_log points to &cache_buf, and thence + its coordinates are reset. + + 3. Log was already cold at stop time + + The log was not hot when the SQL thread stopped, and, of + course, it will not be hot when it restarts. + + In this case, the SQL thread opens the cold log again, + using cache_buf, ie, cur_log points to &cache_buf, and + thence its coordinates are reset. + + 4. Log was hot at stop time, DBA changes to previous cold + log and restarts SQL thread + + The log was hot when the SQL thread was stopped, but the + user changed the coordinates of the SQL thread to + restart from a previous cold log. + + In this case, at start time, cur_log points to a cold + log, opened using &cache_buf as cache, and coordinates + are reset. However, as it moves on to the next logs, it + will eventually reach the hot log. If the hot log is the + same at the time the SQL thread was stopped, then + coordinates were not reset - the cur_log will point to + relay_log.get_log_file(), and not a freshly opened + IO_CACHE through cache_buf. For this reason we need to + deploy a my_b_seek before calling check_binlog_magic at + this point of the code (see: BUG#55263 for more + details). + + NOTES: + - We must keep the LOCK_log to read the 4 first bytes, as + this is a hot log (same as when we call read_log_event() + above: for a hot log we take the mutex). + + - Because of scenario #4 above, we need to have a + my_b_seek here. Otherwise, we might hit the assertion + inside check_binlog_magic. */ + + my_b_seek(cur_log, (my_off_t) 0); if (check_binlog_magic(cur_log,&errmsg)) { if (!hot_log) @@ -4811,12 +4881,13 @@ err: is void). */ -void rotate_relay_log(Master_info* mi) +int rotate_relay_log(Master_info* mi) { DBUG_ENTER("rotate_relay_log"); Relay_log_info* rli= &mi->rli; + int error= 0; - DBUG_EXECUTE_IF("crash_before_rotate_relaylog", abort();); + DBUG_EXECUTE_IF("crash_before_rotate_relaylog", DBUG_SUICIDE();); /* We need to test inited because otherwise, new_file() will attempt to lock @@ -4829,7 +4900,8 @@ void rotate_relay_log(Master_info* mi) } /* If the relay log is closed, new_file() will do nothing. */ - rli->relay_log.new_file(); + if ((error= rli->relay_log.new_file())) + goto end; /* We harvest now, because otherwise BIN_LOG_HEADER_SIZE will not immediately @@ -4846,7 +4918,7 @@ void rotate_relay_log(Master_info* mi) */ rli->relay_log.harvest_bytes_written(&rli->log_space_total); end: - DBUG_VOID_RETURN; + DBUG_RETURN(error); } |