diff options
Diffstat (limited to 'sql/slave.cc')
-rw-r--r-- | sql/slave.cc | 308 |
1 files changed, 208 insertions, 100 deletions
diff --git a/sql/slave.cc b/sql/slave.cc index 6c7968c2b3f..d2c5b4fd254 100644 --- a/sql/slave.cc +++ b/sql/slave.cc @@ -59,6 +59,58 @@ ulonglong relay_log_space_limit = 0; int disconnect_slave_event_count = 0, abort_slave_event_count = 0; int events_till_abort = -1; +enum enum_slave_reconnect_actions +{ + SLAVE_RECON_ACT_REG= 0, + SLAVE_RECON_ACT_DUMP= 1, + SLAVE_RECON_ACT_EVENT= 2, + SLAVE_RECON_ACT_MAX +}; + +enum enum_slave_reconnect_messages +{ + SLAVE_RECON_MSG_WAIT= 0, + SLAVE_RECON_MSG_KILLED_WAITING= 1, + SLAVE_RECON_MSG_AFTER= 2, + SLAVE_RECON_MSG_FAILED= 3, + SLAVE_RECON_MSG_COMMAND= 4, + SLAVE_RECON_MSG_KILLED_AFTER= 5, + SLAVE_RECON_MSG_MAX +}; + +static const char *reconnect_messages[SLAVE_RECON_ACT_MAX][SLAVE_RECON_MSG_MAX]= +{ + { + "Waiting to reconnect after a failed registration on master", + "Slave I/O thread killed while waitnig to reconnect after a failed \ +registration on master", + "Reconnecting after a failed registration on master", + "failed registering on master, reconnecting to try again, \ +log '%s' at postion %s", + "COM_REGISTER_SLAVE", + "Slave I/O thread killed during or after reconnect" + }, + { + "Waiting to reconnect after a failed binlog dump request", + "Slave I/O thread killed while retrying master dump", + "Reconnecting after a failed binlog dump request", + "failed dump request, reconnecting to try again, log '%s' at postion %s", + "COM_BINLOG_DUMP", + "Slave I/O thread killed during or after reconnect" + }, + { + "Waiting to reconnect after a failed master event read", + "Slave I/O thread killed while waiting to reconnect after a failed read", + "Reconnecting after a failed master event read", + "Slave I/O thread: Failed reading log event, reconnecting to retry, \ +log '%s' at postion %s", + "", + "Slave I/O thread killed during or after a reconnect done to recover from \ +failed read" + } +}; + + typedef enum { SLAVE_THD_IO, SLAVE_THD_SQL} SLAVE_THD_TYPE; static int process_io_rotate(MASTER_INFO* mi, Rotate_log_event* rev); @@ -1098,12 +1150,14 @@ static void write_ignored_events_info_to_relay_log(THD *thd, MASTER_INFO *mi) } -int register_slave_on_master(MYSQL* mysql, MASTER_INFO *mi) +int register_slave_on_master(MYSQL* mysql, MASTER_INFO *mi, + bool *suppress_warnings) { uchar buf[1024], *pos= buf; uint report_host_len, report_user_len=0, report_password_len=0; DBUG_ENTER("register_slave_on_master"); + *suppress_warnings= FALSE; if (!report_host) DBUG_RETURN(0); report_host_len= strlen(report_host); @@ -1127,11 +1181,18 @@ int register_slave_on_master(MYSQL* mysql, MASTER_INFO *mi) if (simple_command(mysql, COM_REGISTER_SLAVE, buf, (size_t) (pos- buf), 0)) { - char buf[256]; - my_snprintf(buf, sizeof(buf), - "%s (Errno: %d)", mysql_error(mysql), mysql_errno(mysql)); - mi->report(ERROR_LEVEL, ER_SLAVE_MASTER_COM_FAILURE, - ER(ER_SLAVE_MASTER_COM_FAILURE), "COM_REGISTER_SLAVE", buf); + if (mysql_errno(mysql) == ER_NET_READ_INTERRUPTED) + { + *suppress_warnings= TRUE; // Suppress reconnect warning + } + else + { + char buf[256]; + my_snprintf(buf, sizeof(buf), "%s (Errno: %d)", mysql_error(mysql), + mysql_errno(mysql)); + mi->report(ERROR_LEVEL, ER_SLAVE_MASTER_COM_FAILURE, + ER(ER_SLAVE_MASTER_COM_FAILURE), "COM_REGISTER_SLAVE", buf); + } DBUG_RETURN(1); } DBUG_RETURN(0); @@ -1460,6 +1521,8 @@ static int request_dump(MYSQL* mysql, MASTER_INFO* mi, int binlog_flags = 0; // for now char* logname = mi->master_log_name; DBUG_ENTER("request_dump"); + + *suppress_warnings= FALSE; // TODO if big log files: Change next to int8store() int4store(buf, (ulong) mi->master_log_pos); @@ -1475,7 +1538,7 @@ static int request_dump(MYSQL* mysql, MASTER_INFO* mi, now we just fill up the error log :-) */ if (mysql_errno(mysql) == ER_NET_READ_INTERRUPTED) - *suppress_warnings= 1; // Suppress reconnect warning + *suppress_warnings= TRUE; // Suppress reconnect warning else sql_print_error("Error on COM_BINLOG_DUMP: %d %s, will retry in %d secs", mysql_errno(mysql), mysql_error(mysql), @@ -1539,7 +1602,7 @@ static ulong read_event(MYSQL* mysql, MASTER_INFO *mi, bool* suppress_warnings) ulong len; DBUG_ENTER("read_event"); - *suppress_warnings= 0; + *suppress_warnings= FALSE; /* my_real_read() will time us out We check if we were told to die, and if not, try reading again @@ -1879,6 +1942,94 @@ on this slave.\ } +static bool check_io_slave_killed(THD *thd, MASTER_INFO *mi, const char *info) +{ + if (io_slave_killed(thd, mi)) + { + if (global_system_variables.log_warnings) + sql_print_information(info); + return TRUE; + } + return FALSE; +} + + +/** + @brief Try to reconnect slave IO thread. + + @details Terminates current connection to master, sleeps for + @c mi->connect_retry msecs and initiates new connection with + @c safe_reconnect(). Variable pointed by @c retry_count is increased - + if it exceeds @c master_retry_count then connection is not re-established + and function signals error. + Unless @c suppres_warnings is TRUE, a warning is put in the server error log + when reconnecting. The warning message and messages used to report errors + are taken from @c messages array. In case @c master_retry_count is exceeded, + no messages are added to the log. + + @param[in] thd Thread context. + @param[in] mysql MySQL connection. + @param[in] mi Master connection information. + @param[in,out] retry_count Number of attempts to reconnect. + @param[in] suppress_warnings TRUE when a normal net read timeout + has caused to reconnecting. + @param[in] messages Messages to print/log, see + reconnect_messages[] array. + + @retval 0 OK. + @retval 1 There was an error. +*/ + +static int try_to_reconnect(THD *thd, MYSQL *mysql, MASTER_INFO *mi, + uint *retry_count, bool suppress_warnings, + const char *messages[SLAVE_RECON_MSG_MAX]) +{ + mi->slave_running= MYSQL_SLAVE_RUN_NOT_CONNECT; + thd->proc_info= messages[SLAVE_RECON_MSG_WAIT]; +#ifdef SIGNAL_WITH_VIO_CLOSE + thd->clear_active_vio(); +#endif + end_server(mysql); + if ((*retry_count)++) + { + if (*retry_count > master_retry_count) + return 1; // Don't retry forever + safe_sleep(thd, mi->connect_retry, (CHECK_KILLED_FUNC) io_slave_killed, + (void *) mi); + } + if (check_io_slave_killed(thd, mi, messages[SLAVE_RECON_MSG_KILLED_WAITING])) + return 1; + thd->proc_info = messages[SLAVE_RECON_MSG_AFTER]; + if (!suppress_warnings) + { + char buf[256], llbuff[22]; + my_snprintf(buf, sizeof(buf), messages[SLAVE_RECON_MSG_FAILED], + IO_RPL_LOG_NAME, llstr(mi->master_log_pos, llbuff)); + /* + Raise a warining during registering on master/requesting dump. + Log a message reading event. + */ + if (messages[SLAVE_RECON_MSG_COMMAND][0]) + { + mi->report(WARNING_LEVEL, ER_SLAVE_MASTER_COM_FAILURE, + ER(ER_SLAVE_MASTER_COM_FAILURE), + messages[SLAVE_RECON_MSG_COMMAND], buf); + } + else + { + sql_print_information(buf); + } + } + if (safe_reconnect(thd, mysql, mi, 1) || io_slave_killed(thd, mi)) + { + if (global_system_variables.log_warnings) + sql_print_information(messages[SLAVE_RECON_MSG_KILLED_AFTER]); + return 1; + } + return 0; +} + + /* Slave I/O Thread entry point */ pthread_handler_t handle_slave_io(void *arg) @@ -1889,7 +2040,10 @@ pthread_handler_t handle_slave_io(void *arg) RELAY_LOG_INFO *rli= &mi->rli; char llbuff[22]; uint retry_count; - + bool suppress_warnings; +#ifndef DBUG_OFF + uint retry_count_reg= 0, retry_count_dump= 0, retry_count_event= 0; +#endif // needs to call my_thread_init(), otherwise we get a coredump in DBUG_ stuff my_thread_init(); DBUG_ENTER("handle_slave_io"); @@ -1975,79 +2129,56 @@ connected: Register ourselves with the master. */ thd->proc_info = "Registering slave on master"; - if (register_slave_on_master(mysql, mi)) + if (register_slave_on_master(mysql, mi, &suppress_warnings)) { sql_print_error("Slave I/O thread couldn't register on master"); - goto err; + if (check_io_slave_killed(thd, mi, "Slave I/O thread killed while \ +registering slave on master") || + try_to_reconnect(thd, mysql, mi, &retry_count, suppress_warnings, + reconnect_messages[SLAVE_RECON_ACT_REG])) + goto err; + goto connected; } + DBUG_EXECUTE_IF("FORCE_SLAVE_TO_RECONNECT_REG", + if (!retry_count_reg) + { + retry_count_reg++; + sql_print_information("Forcing to reconnect slave I/O thread"); + if (try_to_reconnect(thd, mysql, mi, &retry_count, suppress_warnings, + reconnect_messages[SLAVE_RECON_ACT_REG])) + goto err; + goto connected; + }); } DBUG_PRINT("info",("Starting reading binary log from master")); while (!io_slave_killed(thd,mi)) { - bool suppress_warnings= 0; thd->proc_info = "Requesting binlog dump"; if (request_dump(mysql, mi, &suppress_warnings)) { sql_print_error("Failed on request_dump()"); - if (io_slave_killed(thd,mi)) - { - sql_print_information("Slave I/O thread killed while requesting master \ -dump"); - goto err; - } - - mi->slave_running= MYSQL_SLAVE_RUN_NOT_CONNECT; - thd->proc_info= "Waiting to reconnect after a failed binlog dump request"; -#ifdef SIGNAL_WITH_VIO_CLOSE - thd->clear_active_vio(); -#endif - end_server(mysql); - /* - First time retry immediately, assuming that we can recover - right away - if first time fails, sleep between re-tries - hopefuly the admin can fix the problem sometime - */ - if (retry_count++) - { - if (retry_count > master_retry_count) - goto err; // Don't retry forever - safe_sleep(thd,mi->connect_retry,(CHECK_KILLED_FUNC)io_slave_killed, - (void*)mi); - } - if (io_slave_killed(thd,mi)) - { - sql_print_information("Slave I/O thread killed while retrying master \ -dump"); - goto err; - } - - thd->proc_info = "Reconnecting after a failed binlog dump request"; - if (!suppress_warnings) { - char buf[256]; - my_snprintf(buf, sizeof(buf), - "failed dump request, reconnecting to try again," - " log '%s' at postion %s", - IO_RPL_LOG_NAME, - llstr(mi->master_log_pos,llbuff)); - mi->report(WARNING_LEVEL, ER_SLAVE_MASTER_COM_FAILURE, - ER(ER_SLAVE_MASTER_COM_FAILURE), "COM_BINLOG_DUMP", buf); - } - if (safe_reconnect(thd, mysql, mi, suppress_warnings) || - io_slave_killed(thd,mi)) - { - sql_print_information("Slave I/O thread killed during or \ -after reconnect"); + if (check_io_slave_killed(thd, mi, "Slave I/O thread killed while \ +requesting master dump") || + try_to_reconnect(thd, mysql, mi, &retry_count, suppress_warnings, + reconnect_messages[SLAVE_RECON_ACT_DUMP])) goto err; - } - goto connected; } + DBUG_EXECUTE_IF("FORCE_SLAVE_TO_RECONNECT_DUMP", + if (!retry_count_dump) + { + retry_count_dump++; + sql_print_information("Forcing to reconnect slave I/O thread"); + if (try_to_reconnect(thd, mysql, mi, &retry_count, suppress_warnings, + reconnect_messages[SLAVE_RECON_ACT_DUMP])) + goto err; + goto connected; + }); while (!io_slave_killed(thd,mi)) { ulong event_len; - suppress_warnings= 0; /* We say "waiting" because read_event() will wait if there's nothing to read. But if there's something to read, it will not wait. The @@ -2056,12 +2187,19 @@ after reconnect"); */ thd->proc_info= "Waiting for master to send event"; event_len= read_event(mysql, mi, &suppress_warnings); - if (io_slave_killed(thd,mi)) - { - if (global_system_variables.log_warnings) - sql_print_information("Slave I/O thread killed while reading event"); + if (check_io_slave_killed(thd, mi, "Slave I/O thread killed while \ +reading event")) goto err; - } + DBUG_EXECUTE_IF("FORCE_SLAVE_TO_RECONNECT_EVENT", + if (!retry_count_event) + { + retry_count_event++; + sql_print_information("Forcing to reconnect slave I/O thread"); + if (try_to_reconnect(thd, mysql, mi, &retry_count, suppress_warnings, + reconnect_messages[SLAVE_RECON_ACT_EVENT])) + goto err; + goto connected; + }); if (event_len == packet_error) { @@ -2081,39 +2219,9 @@ max_allowed_packet", mysql_error(mysql)); goto err; } - mi->slave_running= MYSQL_SLAVE_RUN_NOT_CONNECT; - thd->proc_info = "Waiting to reconnect after a failed master event read"; -#ifdef SIGNAL_WITH_VIO_CLOSE - thd->clear_active_vio(); -#endif - end_server(mysql); - if (retry_count++) - { - if (retry_count > master_retry_count) - goto err; // Don't retry forever - safe_sleep(thd,mi->connect_retry,(CHECK_KILLED_FUNC)io_slave_killed, - (void*) mi); - } - if (io_slave_killed(thd,mi)) - { - if (global_system_variables.log_warnings) - sql_print_information("Slave I/O thread killed while waiting to \ -reconnect after a failed read"); - goto err; - } - thd->proc_info = "Reconnecting after a failed master event read"; - if (!suppress_warnings) - sql_print_information("Slave I/O thread: Failed reading log event, \ -reconnecting to retry, log '%s' position %s", IO_RPL_LOG_NAME, - llstr(mi->master_log_pos, llbuff)); - if (safe_reconnect(thd, mysql, mi, suppress_warnings) || - io_slave_killed(thd,mi)) - { - if (global_system_variables.log_warnings) - sql_print_information("Slave I/O thread killed during or after a \ -reconnect done to recover from failed read"); + if (try_to_reconnect(thd, mysql, mi, &retry_count, suppress_warnings, + reconnect_messages[SLAVE_RECON_ACT_EVENT])) goto err; - } goto connected; } // if (event_len == packet_error) |