From 9682ff8a26cef6dc2252b6a520f841dcfa62ee20 Mon Sep 17 00:00:00 2001 From: Alfranio Correia Date: Wed, 30 Sep 2009 22:41:05 +0100 Subject: BUG#43075 rpl.rpl_sync fails sporadically on pushbuild NOTE: Backporting the patch to next-mr. The slave was crashing while failing to execute the init_slave() function. The issue stems from two different reasons: 1 - A failure while allocating the master info structure generated a segfault due to a NULL pointer. 2 - A failure while recovering generated a segfault due to a non-initialized relay log file. In other words, the mi->init and rli->init were both set to true before executing the recovery process thus creating an inconsistent state as the relay log file was not initialized. To circumvent such problems, we refactored the recovery process which is now executed while initializing the relay log. It is ensured that the master info structure is created before accessing it and any error is propagated thus avoiding to set mi->init and rli->init to true when for instance the relay log is not initialized or the relay info is not flushed. The changes related to the refactory are described below: 1 - Removed call to init_recovery from init_slave. 2 - Changed the signature of the function init_recovery. 3 - Removed flushes. They are called while initializing the relay log and master info. 4 - Made sure that if the relay info is not flushed the mi-init and rli-init are not set to true. In this patch, we also replaced the exit(1) in the fault injection by DBUG_ABORT() to make it compliant with the code guidelines. --- sql/rpl_rli.cc | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) (limited to 'sql/rpl_rli.cc') diff --git a/sql/rpl_rli.cc b/sql/rpl_rli.cc index 3a12164a1cf..b3a1bbc31d2 100644 --- a/sql/rpl_rli.cc +++ b/sql/rpl_rli.cc @@ -259,8 +259,10 @@ Failed to open the existing relay log info file '%s' (errno %d)", rli->group_relay_log_pos= rli->event_relay_log_pos= relay_log_pos; rli->group_master_log_pos= master_log_pos; - if (!rli->is_relay_log_recovery && - init_relay_log_pos(rli, + if (rli->is_relay_log_recovery && init_recovery(rli->mi, &msg)) + goto err; + + if (init_relay_log_pos(rli, rli->group_relay_log_name, rli->group_relay_log_pos, 0 /* no data lock*/, @@ -275,7 +277,6 @@ Failed to open the existing relay log info file '%s' (errno %d)", } #ifndef DBUG_OFF - if (!rli->is_relay_log_recovery) { char llbuf1[22], llbuf2[22]; DBUG_PRINT("info", ("my_b_tell(rli->cur_log)=%s rli->event_relay_log_pos=%s", @@ -292,7 +293,10 @@ Failed to open the existing relay log info file '%s' (errno %d)", */ reinit_io_cache(&rli->info_file, WRITE_CACHE,0L,0,1); if ((error= flush_relay_log_info(rli))) - sql_print_error("Failed to flush relay log info file"); + { + msg= "Failed to flush relay log info file"; + goto err; + } if (count_relay_log_space(rli)) { msg="Error counting relay log space"; -- cgit v1.2.1