summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAlfranio Correia <alfranio.correia@sun.com>2009-09-29 15:40:52 +0100
committerAlfranio Correia <alfranio.correia@sun.com>2009-09-29 15:40:52 +0100
commit3ab71376ceb2d5da81d3b6fb092630d0b0929d76 (patch)
tree5227fe1804bac85dca21ad7baf732306e7d30679
parent0110bd04d24503d84df93d31b444586c4137c98c (diff)
downloadmariadb-git-3ab71376ceb2d5da81d3b6fb092630d0b0929d76.tar.gz
BUG#40337 Fsyncing master and relay log to disk after every event is too slow
NOTE: Backporting the patch to next-mr. The fix proposed in BUG#35542 and BUG#31665 introduces a performance issue when fsyncing the master.info, relay.info and relay-log.bin* after #th events. Although such solution has been proposed to reduce the probability of corrupted files due to a slave-crash, the performance penalty introduced by it has made the approach impractical for highly intensive workloads. In a nutshell, the option --syn-relay-log proposed in BUG#35542 and BUG#31665 simultaneously fsyncs master.info, relay-log.info and relay-log.bin* and this is the main source of performance issues. This patch introduces new options that give more control to the user on what should be fsynced and how often: 1) (--sync-master-info, integer) which syncs the master.info after #th event; 2) (--sync-relay-log, integer) which syncs the relay-log.bin* after #th events. 3) (--sync-relay-log-info, integer) which syncs the relay.info after #th transactions. To provide both performance and increased reliability, we recommend the following setup: 1) --sync-master-info = 0 eventually the operating system will fsync it; 2) --sync-relay-log = 0 eventually the operating system will fsync it; 3) --sync-relay-log-info = 1 fsyncs it after every transaction; Notice, that the previous setup does not reduce the probability of corrupted master.info and relay-log.bin*. To overcome the issue, this patch also introduces a recovery mechanism that right after restart throws away relay-log.bin* retrieved from a master and updates the master.info based on the relay.info: 4) (--relay-log-recovery, boolean) which enables a recovery mechanism that throws away relay-log.bin* after a crash. However, it can only recover the incorrect binlog file and position in master.info, if other informations (host, port password, etc) are corrupted or incorrect, then this recovery mechanism will fail to work.
-rw-r--r--mysql-test/suite/rpl/r/rpl_flushlog_loop.result1
-rw-r--r--mysql-test/suite/rpl/r/rpl_sync.result40
-rw-r--r--mysql-test/suite/rpl/t/rpl_sync-slave.opt1
-rw-r--r--mysql-test/suite/rpl/t/rpl_sync.test148
-rw-r--r--sql/mysql_priv.h4
-rw-r--r--sql/mysqld.cc26
-rw-r--r--sql/rpl_mi.cc17
-rw-r--r--sql/rpl_mi.h9
-rw-r--r--sql/rpl_rli.cc8
-rw-r--r--sql/rpl_rli.h15
-rw-r--r--sql/set_var.cc8
-rw-r--r--sql/set_var.h4
-rw-r--r--sql/slave.cc102
-rw-r--r--sql/sql_binlog.cc2
-rw-r--r--sql/sql_repl.cc12
15 files changed, 365 insertions, 32 deletions
diff --git a/mysql-test/suite/rpl/r/rpl_flushlog_loop.result b/mysql-test/suite/rpl/r/rpl_flushlog_loop.result
index 600ac44fc86..529d1db6cd6 100644
--- a/mysql-test/suite/rpl/r/rpl_flushlog_loop.result
+++ b/mysql-test/suite/rpl/r/rpl_flushlog_loop.result
@@ -10,6 +10,7 @@ relay_log MYSQLD_DATADIR/relay-log
relay_log_index
relay_log_info_file relay-log.info
relay_log_purge ON
+relay_log_recovery OFF
relay_log_space_limit 0
stop slave;
change master to master_host='127.0.0.1',master_user='root',
diff --git a/mysql-test/suite/rpl/r/rpl_sync.result b/mysql-test/suite/rpl/r/rpl_sync.result
new file mode 100644
index 00000000000..edc20c46140
--- /dev/null
+++ b/mysql-test/suite/rpl/r/rpl_sync.result
@@ -0,0 +1,40 @@
+=====Configuring the enviroment=======;
+stop slave;
+drop table if exists t1,t2,t3,t4,t5,t6,t7,t8,t9;
+reset master;
+reset slave;
+drop table if exists t1,t2,t3,t4,t5,t6,t7,t8,t9;
+start slave;
+call mtr.add_suppression('Attempting backtrace');
+call mtr.add_suppression("Recovery from master pos .* and file master-bin.000001");
+CREATE TABLE t1(a INT, PRIMARY KEY(a)) engine=innodb;
+insert into t1(a) values(1);
+insert into t1(a) values(2);
+insert into t1(a) values(3);
+=====Inserting data on the master but without the SQL Thread being running=======;
+stop slave SQL_THREAD;
+insert into t1(a) values(4);
+insert into t1(a) values(5);
+insert into t1(a) values(6);
+=====Removing relay log files and crashing/recoverying the slave=======;
+stop slave IO_THREAD;
+SET SESSION debug="d,crash_before_rotate_relaylog";
+FLUSH LOGS;
+ERROR HY000: Lost connection to MySQL server during query
+=====Dumping and comparing tables=======;
+start slave;
+Comparing tables master:test.t1 and slave:test.t1
+=====Corrupting the master.info=======;
+stop slave;
+FLUSH LOGS;
+insert into t1(a) values(7);
+insert into t1(a) values(8);
+insert into t1(a) values(9);
+SET SESSION debug="d,crash_before_rotate_relaylog";
+FLUSH LOGS;
+ERROR HY000: Lost connection to MySQL server during query
+=====Dumping and comparing tables=======;
+start slave;
+Comparing tables master:test.t1 and slave:test.t1
+=====Clean up=======;
+drop table t1;
diff --git a/mysql-test/suite/rpl/t/rpl_sync-slave.opt b/mysql-test/suite/rpl/t/rpl_sync-slave.opt
new file mode 100644
index 00000000000..77809a42c43
--- /dev/null
+++ b/mysql-test/suite/rpl/t/rpl_sync-slave.opt
@@ -0,0 +1 @@
+--sync-relay-log-info=1 --relay-log-recovery=1
diff --git a/mysql-test/suite/rpl/t/rpl_sync.test b/mysql-test/suite/rpl/t/rpl_sync.test
new file mode 100644
index 00000000000..80b6a144187
--- /dev/null
+++ b/mysql-test/suite/rpl/t/rpl_sync.test
@@ -0,0 +1,148 @@
+########################################################################################
+# This test verifies the options --sync-relay-log-info and --relay-log-recovery by
+# crashing the slave in two different situations:
+# (case-1) - Corrupt the relay log with changes which were not processed by
+# the SQL Thread and crashes it.
+# (case-2) - Corrupt the master.info with wrong coordinates and crashes it.
+#
+# Case 1:
+# 1 - Stops the SQL Thread
+# 2 - Inserts new records into the master.
+# 3 - Corrupts the relay-log.bin* which most likely has such changes.
+# 4 - Crashes the slave
+# 5 - Verifies if the slave is sync with the master which means that the information
+# loss was circumvented by the recovery process.
+#
+# Case 2:
+# 1 - Stops the SQL/IO Threads
+# 2 - Inserts new records into the master.
+# 3 - Corrupts the master.info with wrong coordinates.
+# 4 - Crashes the slave
+# 5 - Verifies if the slave is sync with the master which means that the information
+# loss was circumvented by the recovery process.
+########################################################################################
+
+########################################################################################
+# Configuring the environment
+########################################################################################
+--echo =====Configuring the enviroment=======;
+--source include/master-slave.inc
+--source include/not_embedded.inc
+--source include/not_valgrind.inc
+--source include/have_debug.inc
+--source include/have_innodb.inc
+
+call mtr.add_suppression('Attempting backtrace');
+call mtr.add_suppression("Recovery from master pos .* and file master-bin.000001");
+CREATE TABLE t1(a INT, PRIMARY KEY(a)) engine=innodb;
+
+insert into t1(a) values(1);
+insert into t1(a) values(2);
+insert into t1(a) values(3);
+
+########################################################################################
+# Case 1: Corrupt a relay-log.bin*
+########################################################################################
+--echo =====Inserting data on the master but without the SQL Thread being running=======;
+sync_slave_with_master;
+
+connection slave;
+let $MYSQLD_SLAVE_DATADIR= `select @@datadir`;
+--replace_result $MYSQLD_SLAVE_DATADIR MYSQLD_SLAVE_DATADIR
+--copy_file $MYSQLD_SLAVE_DATADIR/master.info $MYSQLD_SLAVE_DATADIR/master.backup
+stop slave SQL_THREAD;
+source include/wait_for_slave_sql_to_stop.inc;
+
+connection master;
+insert into t1(a) values(4);
+insert into t1(a) values(5);
+insert into t1(a) values(6);
+
+--echo =====Removing relay log files and crashing/recoverying the slave=======;
+connection slave;
+stop slave IO_THREAD;
+source include/wait_for_slave_io_to_stop.inc;
+
+let $file= query_get_value("SHOW SLAVE STATUS", Relay_Log_File, 1);
+--replace_result $MYSQLD_SLAVE_DATADIR MYSQLD_SLAVE_DATADIR
+--exec echo "failure" > $MYSQLD_SLAVE_DATADIR/$file
+
+--exec echo "restart" > $MYSQLTEST_VARDIR/tmp/mysqld.2.expect
+SET SESSION debug="d,crash_before_rotate_relaylog";
+--error 2013
+FLUSH LOGS;
+
+--enable_reconnect
+--source include/wait_until_connected_again.inc
+
+--echo =====Dumping and comparing tables=======;
+start slave;
+source include/wait_for_slave_to_start.inc;
+
+connection master;
+sync_slave_with_master;
+
+let $diff_table_1=master:test.t1;
+let $diff_table_2=slave:test.t1;
+source include/diff_tables.inc;
+
+########################################################################################
+# Case 2: Corrupt a master.info
+########################################################################################
+--echo =====Corrupting the master.info=======;
+connection slave;
+stop slave;
+source include/wait_for_slave_to_stop.inc;
+
+connection master;
+FLUSH LOGS;
+
+insert into t1(a) values(7);
+insert into t1(a) values(8);
+insert into t1(a) values(9);
+
+connection slave;
+--replace_result $MYSQLD_SLAVE_DATADIR MYSQLD_SLAVE_DATADIR
+--exec cat $MYSQLD_SLAVE_DATADIR/master.backup > $MYSQLD_SLAVE_DATADIR/master.info
+
+let MYSQLD_SLAVE_DATADIR=`select @@datadir`;
+
+--perl
+use strict;
+use warnings;
+my $src= "$ENV{'MYSQLD_SLAVE_DATADIR'}/master.backup";
+my $dst= "$ENV{'MYSQLD_SLAVE_DATADIR'}/master.info";
+open(FILE, "<", $src) or die;
+my @content= <FILE>;
+close FILE;
+open(FILE, ">", $dst) or die;
+binmode FILE;
+print FILE @content;
+close FILE;
+EOF
+
+--exec echo "restart" > $MYSQLTEST_VARDIR/tmp/mysqld.2.expect
+SET SESSION debug="d,crash_before_rotate_relaylog";
+--error 2013
+FLUSH LOGS;
+
+--enable_reconnect
+--source include/wait_until_connected_again.inc
+
+--echo =====Dumping and comparing tables=======;
+start slave;
+source include/wait_for_slave_to_start.inc;
+
+connection master;
+sync_slave_with_master;
+
+let $diff_table_1=master:test.t1;
+let $diff_table_2=slave:test.t1;
+source include/diff_tables.inc;
+
+########################################################################################
+# Clean up
+########################################################################################
+--echo =====Clean up=======;
+connection master;
+drop table t1;
diff --git a/sql/mysql_priv.h b/sql/mysql_priv.h
index 669942cc691..435513832d0 100644
--- a/sql/mysql_priv.h
+++ b/sql/mysql_priv.h
@@ -1869,10 +1869,12 @@ extern ulong MYSQL_PLUGIN_IMPORT specialflag;
#ifdef MYSQL_SERVER
extern ulong current_pid;
extern ulong expire_logs_days;
-extern uint sync_binlog_period, sync_relaylog_period;
+extern uint sync_binlog_period, sync_relaylog_period,
+ sync_relayloginfo_period, sync_masterinfo_period;
extern ulong opt_tc_log_size, tc_log_max_pages_used, tc_log_page_size;
extern ulong tc_log_page_waits;
extern my_bool relay_log_purge, opt_innodb_safe_binlog, opt_innodb;
+extern my_bool relay_log_recovery;
extern uint test_flags,select_errors,ha_open_options;
extern uint protocol_version, mysqld_port, dropping_tables;
extern uint delay_key_write_options;
diff --git a/sql/mysqld.cc b/sql/mysqld.cc
index 8febc0bb7e5..b8d09fd4e5a 100644
--- a/sql/mysqld.cc
+++ b/sql/mysqld.cc
@@ -477,6 +477,7 @@ extern const char *opt_ndb_distribution;
extern enum ndb_distribution opt_ndb_distribution_id;
#endif
my_bool opt_readonly, use_temp_pool, relay_log_purge;
+my_bool relay_log_recovery;
my_bool opt_sync_frm, opt_allow_suspicious_udfs;
my_bool opt_secure_auth= 0;
char* opt_secure_file_priv= 0;
@@ -553,7 +554,8 @@ ulong max_prepared_stmt_count;
ulong prepared_stmt_count=0;
ulong thread_id=1L,current_pid;
ulong slow_launch_threads = 0;
-uint sync_binlog_period= 0, sync_relaylog_period= 0;
+uint sync_binlog_period= 0, sync_relaylog_period= 0,
+ sync_relayloginfo_period= 0, sync_masterinfo_period= 0;
ulong expire_logs_days = 0;
ulong rpl_recovery_rank=0;
const char *log_output_str= "FILE";
@@ -5605,6 +5607,7 @@ enum options_mysqld
OPT_QUERY_CACHE_TYPE, OPT_QUERY_CACHE_WLOCK_INVALIDATE, OPT_RECORD_BUFFER,
OPT_RECORD_RND_BUFFER, OPT_DIV_PRECINCREMENT, OPT_RELAY_LOG_SPACE_LIMIT,
OPT_RELAY_LOG_PURGE,
+ OPT_RELAY_LOG_RECOVERY,
OPT_SLAVE_NET_TIMEOUT, OPT_SLAVE_COMPRESSED_PROTOCOL, OPT_SLOW_LAUNCH_TIME,
OPT_SLAVE_TRANS_RETRIES, OPT_READONLY, OPT_DEBUGGING,
OPT_SORT_BUFFER, OPT_TABLE_OPEN_CACHE, OPT_TABLE_DEF_CACHE,
@@ -5669,7 +5672,9 @@ enum options_mysqld
OPT_GENERAL_LOG_FILE,
OPT_SLOW_QUERY_LOG_FILE,
OPT_IGNORE_BUILTIN_INNODB,
- OPT_SYNC_RELAY_LOG
+ OPT_SYNC_RELAY_LOG,
+ OPT_SYNC_RELAY_LOG_INFO,
+ OPT_SYNC_MASTER_INFO
};
@@ -6889,6 +6894,13 @@ The minimum value for this variable is 4096.",
(uchar**) &relay_log_purge,
(uchar**) &relay_log_purge, 0, GET_BOOL, NO_ARG,
1, 0, 1, 0, 1, 0},
+ {"relay_log_recovery", OPT_RELAY_LOG_RECOVERY,
+ "Enables automatic relay log recovery right after the database startup, "
+ "which means that the IO Thread starts re-fetching from the master "
+ "right after the last transaction processed.",
+ (uchar**) &relay_log_recovery,
+ (uchar**) &relay_log_recovery, 0, GET_BOOL, NO_ARG,
+ 0, 0, 1, 0, 1, 0},
{"relay_log_space_limit", OPT_RELAY_LOG_SPACE_LIMIT,
"Maximum space to use for all relay logs.",
(uchar**) &relay_log_space_limit,
@@ -6930,6 +6942,16 @@ The minimum value for this variable is 4096.",
"Use 0 (default) to disable synchronous flushing.",
(uchar**) &sync_relaylog_period, (uchar**) &sync_relaylog_period, 0, GET_UINT,
REQUIRED_ARG, 0, 0, (longlong) UINT_MAX, 0, 1, 0},
+ {"sync-relay-log-info", OPT_SYNC_RELAY_LOG_INFO,
+ "Synchronously flush relay log info to disk after #th transaction. "
+ "Use 0 (default) to disable synchronous flushing.",
+ (uchar**) &sync_relayloginfo_period, (uchar**) &sync_relayloginfo_period, 0, GET_UINT,
+ REQUIRED_ARG, 0, 0, (longlong) UINT_MAX, 0, 1, 0},
+ {"sync-master-info", OPT_SYNC_MASTER_INFO,
+ "Synchronously flush master info to disk after every #th event. "
+ "Use 0 (default) to disable synchronous flushing.",
+ (uchar**) &sync_masterinfo_period, (uchar**) &sync_masterinfo_period, 0, GET_UINT,
+ REQUIRED_ARG, 0, 0, (longlong) UINT_MAX, 0, 1, 0},
{"sync-frm", OPT_SYNC_FRM, "Sync .frm to disk on create. Enabled by default.",
(uchar**) &opt_sync_frm, (uchar**) &opt_sync_frm, 0, GET_BOOL, NO_ARG, 1, 0,
0, 0, 0, 0},
diff --git a/sql/rpl_mi.cc b/sql/rpl_mi.cc
index 1bca44ac613..cec2eabdd20 100644
--- a/sql/rpl_mi.cc
+++ b/sql/rpl_mi.cc
@@ -27,11 +27,11 @@ int init_intvar_from_file(int* var, IO_CACHE* f, int default_val);
int init_strvar_from_file(char *var, int max_size, IO_CACHE *f,
const char *default_val);
-Master_info::Master_info()
+Master_info::Master_info(bool is_slave_recovery)
:Slave_reporting_capability("I/O"),
ssl(0), ssl_verify_server_cert(0), fd(-1), io_thd(0), inited(0),
- abort_slave(0),slave_running(0),
- slave_run_id(0)
+ rli(is_slave_recovery), abort_slave(0), slave_running(0),
+ slave_run_id(0), sync_counter(0)
{
host[0] = 0; user[0] = 0; password[0] = 0;
ssl_ca[0]= 0; ssl_capath[0]= 0; ssl_cert[0]= 0;
@@ -364,11 +364,6 @@ int flush_master_info(Master_info* mi, bool flush_relay_log_cache)
IO_CACHE *log_file= mi->rli.relay_log.get_log_file();
if (flush_io_cache(log_file))
DBUG_RETURN(2);
-
- /* Sync to disk if --sync-relay-log is set */
- if (sync_relaylog_period &&
- my_sync(log_file->file, MY_WME))
- DBUG_RETURN(2);
}
/*
@@ -398,8 +393,12 @@ int flush_master_info(Master_info* mi, bool flush_relay_log_cache)
(int)(mi->ssl), mi->ssl_ca, mi->ssl_capath, mi->ssl_cert,
mi->ssl_cipher, mi->ssl_key, mi->ssl_verify_server_cert);
err= flush_io_cache(file);
- if (sync_relaylog_period && !err)
+ if (sync_masterinfo_period && !err &&
+ ++(mi->sync_counter) >= sync_masterinfo_period)
+ {
err= my_sync(mi->fd, MYF(MY_WME));
+ mi->sync_counter= 0;
+ }
DBUG_RETURN(-err);
}
diff --git a/sql/rpl_mi.h b/sql/rpl_mi.h
index 93fb0a98198..c59dffefb7c 100644
--- a/sql/rpl_mi.h
+++ b/sql/rpl_mi.h
@@ -58,7 +58,7 @@
class Master_info : public Slave_reporting_capability
{
public:
- Master_info();
+ Master_info(bool is_slave_recovery);
~Master_info();
/* the variables below are needed because we can change masters on the fly */
@@ -100,6 +100,13 @@ class Master_info : public Slave_reporting_capability
*/
long clock_diff_with_master;
+
+ /*
+ Keeps track of the number of events before fsyncing.
+ The option --sync-master-info determines how many
+ events should happen before fsyncing.
+ */
+ uint sync_counter;
};
void init_master_info_with_options(Master_info* mi);
diff --git a/sql/rpl_rli.cc b/sql/rpl_rli.cc
index 37c0815fb8b..3a12164a1cf 100644
--- a/sql/rpl_rli.cc
+++ b/sql/rpl_rli.cc
@@ -28,11 +28,11 @@ int init_intvar_from_file(int* var, IO_CACHE* f, int default_val);
int init_strvar_from_file(char *var, int max_size, IO_CACHE *f,
const char *default_val);
-
-Relay_log_info::Relay_log_info()
+Relay_log_info::Relay_log_info(bool is_slave_recovery)
:Slave_reporting_capability("SQL"),
no_storage(FALSE), replicate_same_server_id(::replicate_same_server_id),
info_fd(-1), cur_log_fd(-1), relay_log(&sync_relaylog_period),
+ sync_counter(0), is_relay_log_recovery(is_slave_recovery),
save_temporary_tables(0),
#if HAVE_purify
is_fake(FALSE),
@@ -259,7 +259,8 @@ Failed to open the existing relay log info file '%s' (errno %d)",
rli->group_relay_log_pos= rli->event_relay_log_pos= relay_log_pos;
rli->group_master_log_pos= master_log_pos;
- if (init_relay_log_pos(rli,
+ if (!rli->is_relay_log_recovery &&
+ init_relay_log_pos(rli,
rli->group_relay_log_name,
rli->group_relay_log_pos,
0 /* no data lock*/,
@@ -274,6 +275,7 @@ Failed to open the existing relay log info file '%s' (errno %d)",
}
#ifndef DBUG_OFF
+ if (!rli->is_relay_log_recovery)
{
char llbuf1[22], llbuf2[22];
DBUG_PRINT("info", ("my_b_tell(rli->cur_log)=%s rli->event_relay_log_pos=%s",
diff --git a/sql/rpl_rli.h b/sql/rpl_rli.h
index 171778d9675..a5410dd0c79 100644
--- a/sql/rpl_rli.h
+++ b/sql/rpl_rli.h
@@ -96,6 +96,19 @@ public:
LOG_INFO linfo;
IO_CACHE cache_buf,*cur_log;
+ /*
+ Keeps track of the number of transactions that commits
+ before fsyncing. The option --sync-relay-log-info determines
+ how many transactions should commit before fsyncing.
+ */
+ uint sync_counter;
+
+ /*
+ Identifies when the recovery process is going on.
+ See sql/slave.cc:init_recovery for further details.
+ */
+ bool is_relay_log_recovery;
+
/* The following variables are safe to read any time */
/* IO_CACHE of the info file - set only during init or end */
@@ -267,7 +280,7 @@ public:
char slave_patternload_file[FN_REFLEN];
size_t slave_patternload_file_size;
- Relay_log_info();
+ Relay_log_info(bool is_slave_recovery);
~Relay_log_info();
/*
diff --git a/sql/set_var.cc b/sql/set_var.cc
index f2b5201cf8b..dcc3954ff1e 100644
--- a/sql/set_var.cc
+++ b/sql/set_var.cc
@@ -1534,19 +1534,19 @@ static bool get_unsigned(THD *thd, set_var *var, ulonglong user_max,
}
-bool sys_var_int_ptr::check(THD *thd, set_var *var)
+bool sys_var_uint_ptr::check(THD *thd, set_var *var)
{
- var->save_result.ulong_value= (ulong) var->value->val_int();
+ var->save_result.ulong_value= (ulong) var->value->val_uint();
return 0;
}
-bool sys_var_int_ptr::update(THD *thd, set_var *var)
+bool sys_var_uint_ptr::update(THD *thd, set_var *var)
{
*value= (uint) var->save_result.ulong_value;
return 0;
}
-void sys_var_int_ptr::set_default(THD *thd, enum_var_type type)
+void sys_var_uint_ptr::set_default(THD *thd, enum_var_type type)
{
*value= (uint) option_limits->def_value;
}
diff --git a/sql/set_var.h b/sql/set_var.h
index 02c87abed88..0202a15836d 100644
--- a/sql/set_var.h
+++ b/sql/set_var.h
@@ -178,10 +178,10 @@ public:
/**
Unsigned int system variable class
*/
-class sys_var_int_ptr :public sys_var
+class sys_var_uint_ptr :public sys_var
{
public:
- sys_var_int_ptr(sys_var_chain *chain, const char *name_arg,
+ sys_var_uint_ptr(sys_var_chain *chain, const char *name_arg,
uint *value_ptr_arg,
sys_after_update_func after_update_arg= NULL)
:sys_var(name_arg, after_update_arg),
diff --git a/sql/slave.cc b/sql/slave.cc
index fac9ee214c5..5edb47df8b5 100644
--- a/sql/slave.cc
+++ b/sql/slave.cc
@@ -129,6 +129,7 @@ static bool wait_for_relay_log_space(Relay_log_info* rli);
static inline bool io_slave_killed(THD* thd,Master_info* mi);
static inline bool sql_slave_killed(THD* thd,Relay_log_info* rli);
static int init_slave_thread(THD* thd, SLAVE_THD_TYPE thd_type);
+static int init_recovery(Master_info* mi);
static void print_slave_skip_errors(void);
static int safe_connect(THD* thd, MYSQL* mysql, Master_info* mi);
static int safe_reconnect(THD* thd, MYSQL* mysql, Master_info* mi,
@@ -220,6 +221,7 @@ void unlock_slave_threads(Master_info* mi)
int init_slave()
{
DBUG_ENTER("init_slave");
+ int error= 0;
/*
This is called when mysqld starts. Before client connections are
@@ -231,7 +233,7 @@ int init_slave()
TODO: re-write this to interate through the list of files
for multi-master
*/
- active_mi= new Master_info;
+ active_mi= new Master_info(relay_log_recovery);
/*
If --slave-skip-errors=... was not used, the string value for the
@@ -250,6 +252,7 @@ int init_slave()
if (!active_mi)
{
sql_print_error("Failed to allocate memory for the master info structure");
+ error= 1;
goto err;
}
@@ -257,6 +260,13 @@ int init_slave()
!master_host, (SLAVE_IO | SLAVE_SQL)))
{
sql_print_error("Failed to initialize the master info structure");
+ error= 1;
+ goto err;
+ }
+
+ if (active_mi->rli.is_relay_log_recovery && init_recovery(active_mi))
+ {
+ error= 1;
goto err;
}
@@ -275,18 +285,89 @@ int init_slave()
SLAVE_IO | SLAVE_SQL))
{
sql_print_error("Failed to create slave threads");
+ error= 1;
goto err;
}
}
- pthread_mutex_unlock(&LOCK_active_mi);
- DBUG_RETURN(0);
err:
+ active_mi->rli.is_relay_log_recovery= FALSE;
pthread_mutex_unlock(&LOCK_active_mi);
- DBUG_RETURN(1);
+ DBUG_RETURN(error);
}
-
+/*
+ Updates the master info based on the information stored in the
+ relay info and ignores relay logs previously retrieved by the IO
+ thread, which thus starts fetching again based on to the
+ group_master_log_pos and group_master_log_name. Eventually, the old
+ relay logs will be purged by the normal purge mechanism.
+
+ In the feature, we should improve this routine in order to avoid throwing
+ away logs that are safely stored in the disk. Note also that this recovery
+ routine relies on the correctness of the relay-log.info and only tolerates
+ coordinate problems in master.info.
+
+ In this function, there is no need for a mutex as the caller
+ (i.e. init_slave) already has one acquired.
+
+ Specifically, the following structures are updated:
+
+ 1 - mi->master_log_pos <-- rli->group_master_log_pos
+ 2 - mi->master_log_name <-- rli->group_master_log_name
+ 3 - It moves the relay log to the new relay log file, by
+ rli->group_relay_log_pos <-- BIN_LOG_HEADER_SIZE;
+ rli->event_relay_log_pos <-- BIN_LOG_HEADER_SIZE;
+ rli->group_relay_log_name <-- rli->relay_log.get_log_fname();
+ rli->event_relay_log_name <-- rli->relay_log.get_log_fname();
+
+ If there is an error, it returns (1), otherwise returns (0).
+ */
+static int init_recovery(Master_info* mi)
+{
+ const char *errmsg= 0;
+ DBUG_ENTER("init_recovery");
+
+ Relay_log_info *rli= &mi->rli;
+ if (rli->group_master_log_name[0])
+ {
+ mi->master_log_pos= max(BIN_LOG_HEADER_SIZE,
+ rli->group_master_log_pos);
+ strmake(mi->master_log_name, rli->group_master_log_name,
+ sizeof(mi->master_log_name)-1);
+
+ sql_print_warning("Recovery from master pos %ld and file %s.",
+ (ulong) mi->master_log_pos, mi->master_log_name);
+
+ strmake(rli->group_relay_log_name, rli->relay_log.get_log_fname(),
+ sizeof(rli->group_relay_log_name)-1);
+ strmake(rli->event_relay_log_name, rli->relay_log.get_log_fname(),
+ sizeof(mi->rli.event_relay_log_name)-1);
+
+ rli->group_relay_log_pos= rli->event_relay_log_pos= BIN_LOG_HEADER_SIZE;
+
+ if (init_relay_log_pos(rli,
+ rli->group_relay_log_name,
+ rli->group_relay_log_pos,
+ 0 /*no data lock*/,
+ &errmsg, 0))
+ DBUG_RETURN(1);
+
+ if (flush_master_info(mi, 0))
+ {
+ sql_print_error("Failed to flush master info file");
+ DBUG_RETURN(1);
+ }
+ if (flush_relay_log_info(rli))
+ {
+ sql_print_error("Failed to flush relay info file");
+ DBUG_RETURN(1);
+ }
+ }
+
+ DBUG_RETURN(0);
+}
+
/**
Convert slave skip errors bitmap into a printable string.
*/
@@ -3959,7 +4040,14 @@ bool flush_relay_log_info(Relay_log_info* rli)
error=1;
if (flush_io_cache(file))
error=1;
-
+ if (sync_relayloginfo_period &&
+ !error &&
+ ++(rli->sync_counter) >= sync_relayloginfo_period)
+ {
+ if (my_sync(rli->info_fd, MYF(MY_WME)))
+ error=1;
+ rli->sync_counter= 0;
+ }
/* Flushing the relay log is done by the slave I/O thread */
DBUG_RETURN(error);
}
@@ -4366,6 +4454,8 @@ void rotate_relay_log(Master_info* mi)
DBUG_ENTER("rotate_relay_log");
Relay_log_info* rli= &mi->rli;
+ DBUG_EXECUTE_IF("crash_before_rotate_relaylog", abort(););
+
/* We don't lock rli->run_lock. This would lead to deadlocks. */
pthread_mutex_lock(&mi->run_lock);
diff --git a/sql/sql_binlog.cc b/sql/sql_binlog.cc
index 96e99b57e3c..531242f64d1 100644
--- a/sql/sql_binlog.cc
+++ b/sql/sql_binlog.cc
@@ -58,7 +58,7 @@ void mysql_client_binlog_statement(THD* thd)
my_bool have_fd_event= TRUE;
if (!thd->rli_fake)
{
- thd->rli_fake= new Relay_log_info;
+ thd->rli_fake= new Relay_log_info(FALSE);
#ifdef HAVE_purify
thd->rli_fake->is_fake= TRUE;
#endif
diff --git a/sql/sql_repl.cc b/sql/sql_repl.cc
index 425d76c8b72..6295dbb0e79 100644
--- a/sql/sql_repl.cc
+++ b/sql/sql_repl.cc
@@ -1769,6 +1769,16 @@ static sys_var_const sys_relay_log_info_file(&vars, "relay_log_info_file",
(uchar*) &relay_log_info_file);
static sys_var_bool_ptr sys_relay_log_purge(&vars, "relay_log_purge",
&relay_log_purge);
+static sys_var_bool_ptr sys_relay_log_recovery(&vars, "relay_log_recovery",
+ &relay_log_recovery);
+static sys_var_uint_ptr sys_sync_binlog_period(&vars, "sync_binlog",
+ &sync_binlog_period);
+static sys_var_uint_ptr sys_sync_relaylog_period(&vars, "sync_relay_log",
+ &sync_relaylog_period);
+static sys_var_uint_ptr sys_sync_relayloginfo_period(&vars, "sync_relay_log_info",
+ &sync_relayloginfo_period);
+static sys_var_uint_ptr sys_sync_masterinfo_period(&vars, "sync_master_info",
+ &sync_masterinfo_period);
static sys_var_const sys_relay_log_space_limit(&vars,
"relay_log_space_limit",
OPT_GLOBAL, SHOW_LONGLONG,
@@ -1784,8 +1794,6 @@ static sys_var_const sys_slave_skip_errors(&vars, "slave_skip_errors",
(uchar*) slave_skip_error_names);
static sys_var_long_ptr sys_slave_trans_retries(&vars, "slave_transaction_retries",
&slave_trans_retries);
-static sys_var_int_ptr sys_sync_binlog_period(&vars, "sync_binlog", &sync_binlog_period);
-static sys_var_int_ptr sys_sync_relaylog_period(&vars, "sync_relay_log", &sync_relaylog_period);
static sys_var_slave_skip_counter sys_slave_skip_counter(&vars, "sql_slave_skip_counter");