diff options
author | Alfranio Correia <alfranio.correia@sun.com> | 2009-09-29 15:40:52 +0100 |
---|---|---|
committer | Alfranio Correia <alfranio.correia@sun.com> | 2009-09-29 15:40:52 +0100 |
commit | 3ab71376ceb2d5da81d3b6fb092630d0b0929d76 (patch) | |
tree | 5227fe1804bac85dca21ad7baf732306e7d30679 | |
parent | 0110bd04d24503d84df93d31b444586c4137c98c (diff) | |
download | mariadb-git-3ab71376ceb2d5da81d3b6fb092630d0b0929d76.tar.gz |
BUG#40337 Fsyncing master and relay log to disk after every event is too slow
NOTE: Backporting the patch to next-mr.
The fix proposed in BUG#35542 and BUG#31665 introduces a performance issue
when fsyncing the master.info, relay.info and relay-log.bin* after #th events.
Although such solution has been proposed to reduce the probability of corrupted
files due to a slave-crash, the performance penalty introduced by it has
made the approach impractical for highly intensive workloads.
In a nutshell, the option --syn-relay-log proposed in BUG#35542 and BUG#31665
simultaneously fsyncs master.info, relay-log.info and relay-log.bin* and
this is the main source of performance issues.
This patch introduces new options that give more control to the user on
what should be fsynced and how often:
1) (--sync-master-info, integer) which syncs the master.info after #th event;
2) (--sync-relay-log, integer) which syncs the relay-log.bin* after #th
events.
3) (--sync-relay-log-info, integer) which syncs the relay.info after #th
transactions.
To provide both performance and increased reliability, we recommend the following
setup:
1) --sync-master-info = 0 eventually the operating system will fsync it;
2) --sync-relay-log = 0 eventually the operating system will fsync it;
3) --sync-relay-log-info = 1 fsyncs it after every transaction;
Notice, that the previous setup does not reduce the probability of
corrupted master.info and relay-log.bin*. To overcome the issue, this patch also
introduces a recovery mechanism that right after restart throws away relay-log.bin*
retrieved from a master and updates the master.info based on the relay.info:
4) (--relay-log-recovery, boolean) which enables a recovery mechanism that
throws away relay-log.bin* after a crash.
However, it can only recover the incorrect binlog file and position in master.info,
if other informations (host, port password, etc) are corrupted or incorrect,
then this recovery mechanism will fail to work.
-rw-r--r-- | mysql-test/suite/rpl/r/rpl_flushlog_loop.result | 1 | ||||
-rw-r--r-- | mysql-test/suite/rpl/r/rpl_sync.result | 40 | ||||
-rw-r--r-- | mysql-test/suite/rpl/t/rpl_sync-slave.opt | 1 | ||||
-rw-r--r-- | mysql-test/suite/rpl/t/rpl_sync.test | 148 | ||||
-rw-r--r-- | sql/mysql_priv.h | 4 | ||||
-rw-r--r-- | sql/mysqld.cc | 26 | ||||
-rw-r--r-- | sql/rpl_mi.cc | 17 | ||||
-rw-r--r-- | sql/rpl_mi.h | 9 | ||||
-rw-r--r-- | sql/rpl_rli.cc | 8 | ||||
-rw-r--r-- | sql/rpl_rli.h | 15 | ||||
-rw-r--r-- | sql/set_var.cc | 8 | ||||
-rw-r--r-- | sql/set_var.h | 4 | ||||
-rw-r--r-- | sql/slave.cc | 102 | ||||
-rw-r--r-- | sql/sql_binlog.cc | 2 | ||||
-rw-r--r-- | sql/sql_repl.cc | 12 |
15 files changed, 365 insertions, 32 deletions
diff --git a/mysql-test/suite/rpl/r/rpl_flushlog_loop.result b/mysql-test/suite/rpl/r/rpl_flushlog_loop.result index 600ac44fc86..529d1db6cd6 100644 --- a/mysql-test/suite/rpl/r/rpl_flushlog_loop.result +++ b/mysql-test/suite/rpl/r/rpl_flushlog_loop.result @@ -10,6 +10,7 @@ relay_log MYSQLD_DATADIR/relay-log relay_log_index relay_log_info_file relay-log.info relay_log_purge ON +relay_log_recovery OFF relay_log_space_limit 0 stop slave; change master to master_host='127.0.0.1',master_user='root', diff --git a/mysql-test/suite/rpl/r/rpl_sync.result b/mysql-test/suite/rpl/r/rpl_sync.result new file mode 100644 index 00000000000..edc20c46140 --- /dev/null +++ b/mysql-test/suite/rpl/r/rpl_sync.result @@ -0,0 +1,40 @@ +=====Configuring the enviroment=======; +stop slave; +drop table if exists t1,t2,t3,t4,t5,t6,t7,t8,t9; +reset master; +reset slave; +drop table if exists t1,t2,t3,t4,t5,t6,t7,t8,t9; +start slave; +call mtr.add_suppression('Attempting backtrace'); +call mtr.add_suppression("Recovery from master pos .* and file master-bin.000001"); +CREATE TABLE t1(a INT, PRIMARY KEY(a)) engine=innodb; +insert into t1(a) values(1); +insert into t1(a) values(2); +insert into t1(a) values(3); +=====Inserting data on the master but without the SQL Thread being running=======; +stop slave SQL_THREAD; +insert into t1(a) values(4); +insert into t1(a) values(5); +insert into t1(a) values(6); +=====Removing relay log files and crashing/recoverying the slave=======; +stop slave IO_THREAD; +SET SESSION debug="d,crash_before_rotate_relaylog"; +FLUSH LOGS; +ERROR HY000: Lost connection to MySQL server during query +=====Dumping and comparing tables=======; +start slave; +Comparing tables master:test.t1 and slave:test.t1 +=====Corrupting the master.info=======; +stop slave; +FLUSH LOGS; +insert into t1(a) values(7); +insert into t1(a) values(8); +insert into t1(a) values(9); +SET SESSION debug="d,crash_before_rotate_relaylog"; +FLUSH LOGS; +ERROR HY000: Lost connection to MySQL server during query +=====Dumping and comparing tables=======; +start slave; +Comparing tables master:test.t1 and slave:test.t1 +=====Clean up=======; +drop table t1; diff --git a/mysql-test/suite/rpl/t/rpl_sync-slave.opt b/mysql-test/suite/rpl/t/rpl_sync-slave.opt new file mode 100644 index 00000000000..77809a42c43 --- /dev/null +++ b/mysql-test/suite/rpl/t/rpl_sync-slave.opt @@ -0,0 +1 @@ +--sync-relay-log-info=1 --relay-log-recovery=1 diff --git a/mysql-test/suite/rpl/t/rpl_sync.test b/mysql-test/suite/rpl/t/rpl_sync.test new file mode 100644 index 00000000000..80b6a144187 --- /dev/null +++ b/mysql-test/suite/rpl/t/rpl_sync.test @@ -0,0 +1,148 @@ +######################################################################################## +# This test verifies the options --sync-relay-log-info and --relay-log-recovery by +# crashing the slave in two different situations: +# (case-1) - Corrupt the relay log with changes which were not processed by +# the SQL Thread and crashes it. +# (case-2) - Corrupt the master.info with wrong coordinates and crashes it. +# +# Case 1: +# 1 - Stops the SQL Thread +# 2 - Inserts new records into the master. +# 3 - Corrupts the relay-log.bin* which most likely has such changes. +# 4 - Crashes the slave +# 5 - Verifies if the slave is sync with the master which means that the information +# loss was circumvented by the recovery process. +# +# Case 2: +# 1 - Stops the SQL/IO Threads +# 2 - Inserts new records into the master. +# 3 - Corrupts the master.info with wrong coordinates. +# 4 - Crashes the slave +# 5 - Verifies if the slave is sync with the master which means that the information +# loss was circumvented by the recovery process. +######################################################################################## + +######################################################################################## +# Configuring the environment +######################################################################################## +--echo =====Configuring the enviroment=======; +--source include/master-slave.inc +--source include/not_embedded.inc +--source include/not_valgrind.inc +--source include/have_debug.inc +--source include/have_innodb.inc + +call mtr.add_suppression('Attempting backtrace'); +call mtr.add_suppression("Recovery from master pos .* and file master-bin.000001"); +CREATE TABLE t1(a INT, PRIMARY KEY(a)) engine=innodb; + +insert into t1(a) values(1); +insert into t1(a) values(2); +insert into t1(a) values(3); + +######################################################################################## +# Case 1: Corrupt a relay-log.bin* +######################################################################################## +--echo =====Inserting data on the master but without the SQL Thread being running=======; +sync_slave_with_master; + +connection slave; +let $MYSQLD_SLAVE_DATADIR= `select @@datadir`; +--replace_result $MYSQLD_SLAVE_DATADIR MYSQLD_SLAVE_DATADIR +--copy_file $MYSQLD_SLAVE_DATADIR/master.info $MYSQLD_SLAVE_DATADIR/master.backup +stop slave SQL_THREAD; +source include/wait_for_slave_sql_to_stop.inc; + +connection master; +insert into t1(a) values(4); +insert into t1(a) values(5); +insert into t1(a) values(6); + +--echo =====Removing relay log files and crashing/recoverying the slave=======; +connection slave; +stop slave IO_THREAD; +source include/wait_for_slave_io_to_stop.inc; + +let $file= query_get_value("SHOW SLAVE STATUS", Relay_Log_File, 1); +--replace_result $MYSQLD_SLAVE_DATADIR MYSQLD_SLAVE_DATADIR +--exec echo "failure" > $MYSQLD_SLAVE_DATADIR/$file + +--exec echo "restart" > $MYSQLTEST_VARDIR/tmp/mysqld.2.expect +SET SESSION debug="d,crash_before_rotate_relaylog"; +--error 2013 +FLUSH LOGS; + +--enable_reconnect +--source include/wait_until_connected_again.inc + +--echo =====Dumping and comparing tables=======; +start slave; +source include/wait_for_slave_to_start.inc; + +connection master; +sync_slave_with_master; + +let $diff_table_1=master:test.t1; +let $diff_table_2=slave:test.t1; +source include/diff_tables.inc; + +######################################################################################## +# Case 2: Corrupt a master.info +######################################################################################## +--echo =====Corrupting the master.info=======; +connection slave; +stop slave; +source include/wait_for_slave_to_stop.inc; + +connection master; +FLUSH LOGS; + +insert into t1(a) values(7); +insert into t1(a) values(8); +insert into t1(a) values(9); + +connection slave; +--replace_result $MYSQLD_SLAVE_DATADIR MYSQLD_SLAVE_DATADIR +--exec cat $MYSQLD_SLAVE_DATADIR/master.backup > $MYSQLD_SLAVE_DATADIR/master.info + +let MYSQLD_SLAVE_DATADIR=`select @@datadir`; + +--perl +use strict; +use warnings; +my $src= "$ENV{'MYSQLD_SLAVE_DATADIR'}/master.backup"; +my $dst= "$ENV{'MYSQLD_SLAVE_DATADIR'}/master.info"; +open(FILE, "<", $src) or die; +my @content= <FILE>; +close FILE; +open(FILE, ">", $dst) or die; +binmode FILE; +print FILE @content; +close FILE; +EOF + +--exec echo "restart" > $MYSQLTEST_VARDIR/tmp/mysqld.2.expect +SET SESSION debug="d,crash_before_rotate_relaylog"; +--error 2013 +FLUSH LOGS; + +--enable_reconnect +--source include/wait_until_connected_again.inc + +--echo =====Dumping and comparing tables=======; +start slave; +source include/wait_for_slave_to_start.inc; + +connection master; +sync_slave_with_master; + +let $diff_table_1=master:test.t1; +let $diff_table_2=slave:test.t1; +source include/diff_tables.inc; + +######################################################################################## +# Clean up +######################################################################################## +--echo =====Clean up=======; +connection master; +drop table t1; diff --git a/sql/mysql_priv.h b/sql/mysql_priv.h index 669942cc691..435513832d0 100644 --- a/sql/mysql_priv.h +++ b/sql/mysql_priv.h @@ -1869,10 +1869,12 @@ extern ulong MYSQL_PLUGIN_IMPORT specialflag; #ifdef MYSQL_SERVER extern ulong current_pid; extern ulong expire_logs_days; -extern uint sync_binlog_period, sync_relaylog_period; +extern uint sync_binlog_period, sync_relaylog_period, + sync_relayloginfo_period, sync_masterinfo_period; extern ulong opt_tc_log_size, tc_log_max_pages_used, tc_log_page_size; extern ulong tc_log_page_waits; extern my_bool relay_log_purge, opt_innodb_safe_binlog, opt_innodb; +extern my_bool relay_log_recovery; extern uint test_flags,select_errors,ha_open_options; extern uint protocol_version, mysqld_port, dropping_tables; extern uint delay_key_write_options; diff --git a/sql/mysqld.cc b/sql/mysqld.cc index 8febc0bb7e5..b8d09fd4e5a 100644 --- a/sql/mysqld.cc +++ b/sql/mysqld.cc @@ -477,6 +477,7 @@ extern const char *opt_ndb_distribution; extern enum ndb_distribution opt_ndb_distribution_id; #endif my_bool opt_readonly, use_temp_pool, relay_log_purge; +my_bool relay_log_recovery; my_bool opt_sync_frm, opt_allow_suspicious_udfs; my_bool opt_secure_auth= 0; char* opt_secure_file_priv= 0; @@ -553,7 +554,8 @@ ulong max_prepared_stmt_count; ulong prepared_stmt_count=0; ulong thread_id=1L,current_pid; ulong slow_launch_threads = 0; -uint sync_binlog_period= 0, sync_relaylog_period= 0; +uint sync_binlog_period= 0, sync_relaylog_period= 0, + sync_relayloginfo_period= 0, sync_masterinfo_period= 0; ulong expire_logs_days = 0; ulong rpl_recovery_rank=0; const char *log_output_str= "FILE"; @@ -5605,6 +5607,7 @@ enum options_mysqld OPT_QUERY_CACHE_TYPE, OPT_QUERY_CACHE_WLOCK_INVALIDATE, OPT_RECORD_BUFFER, OPT_RECORD_RND_BUFFER, OPT_DIV_PRECINCREMENT, OPT_RELAY_LOG_SPACE_LIMIT, OPT_RELAY_LOG_PURGE, + OPT_RELAY_LOG_RECOVERY, OPT_SLAVE_NET_TIMEOUT, OPT_SLAVE_COMPRESSED_PROTOCOL, OPT_SLOW_LAUNCH_TIME, OPT_SLAVE_TRANS_RETRIES, OPT_READONLY, OPT_DEBUGGING, OPT_SORT_BUFFER, OPT_TABLE_OPEN_CACHE, OPT_TABLE_DEF_CACHE, @@ -5669,7 +5672,9 @@ enum options_mysqld OPT_GENERAL_LOG_FILE, OPT_SLOW_QUERY_LOG_FILE, OPT_IGNORE_BUILTIN_INNODB, - OPT_SYNC_RELAY_LOG + OPT_SYNC_RELAY_LOG, + OPT_SYNC_RELAY_LOG_INFO, + OPT_SYNC_MASTER_INFO }; @@ -6889,6 +6894,13 @@ The minimum value for this variable is 4096.", (uchar**) &relay_log_purge, (uchar**) &relay_log_purge, 0, GET_BOOL, NO_ARG, 1, 0, 1, 0, 1, 0}, + {"relay_log_recovery", OPT_RELAY_LOG_RECOVERY, + "Enables automatic relay log recovery right after the database startup, " + "which means that the IO Thread starts re-fetching from the master " + "right after the last transaction processed.", + (uchar**) &relay_log_recovery, + (uchar**) &relay_log_recovery, 0, GET_BOOL, NO_ARG, + 0, 0, 1, 0, 1, 0}, {"relay_log_space_limit", OPT_RELAY_LOG_SPACE_LIMIT, "Maximum space to use for all relay logs.", (uchar**) &relay_log_space_limit, @@ -6930,6 +6942,16 @@ The minimum value for this variable is 4096.", "Use 0 (default) to disable synchronous flushing.", (uchar**) &sync_relaylog_period, (uchar**) &sync_relaylog_period, 0, GET_UINT, REQUIRED_ARG, 0, 0, (longlong) UINT_MAX, 0, 1, 0}, + {"sync-relay-log-info", OPT_SYNC_RELAY_LOG_INFO, + "Synchronously flush relay log info to disk after #th transaction. " + "Use 0 (default) to disable synchronous flushing.", + (uchar**) &sync_relayloginfo_period, (uchar**) &sync_relayloginfo_period, 0, GET_UINT, + REQUIRED_ARG, 0, 0, (longlong) UINT_MAX, 0, 1, 0}, + {"sync-master-info", OPT_SYNC_MASTER_INFO, + "Synchronously flush master info to disk after every #th event. " + "Use 0 (default) to disable synchronous flushing.", + (uchar**) &sync_masterinfo_period, (uchar**) &sync_masterinfo_period, 0, GET_UINT, + REQUIRED_ARG, 0, 0, (longlong) UINT_MAX, 0, 1, 0}, {"sync-frm", OPT_SYNC_FRM, "Sync .frm to disk on create. Enabled by default.", (uchar**) &opt_sync_frm, (uchar**) &opt_sync_frm, 0, GET_BOOL, NO_ARG, 1, 0, 0, 0, 0, 0}, diff --git a/sql/rpl_mi.cc b/sql/rpl_mi.cc index 1bca44ac613..cec2eabdd20 100644 --- a/sql/rpl_mi.cc +++ b/sql/rpl_mi.cc @@ -27,11 +27,11 @@ int init_intvar_from_file(int* var, IO_CACHE* f, int default_val); int init_strvar_from_file(char *var, int max_size, IO_CACHE *f, const char *default_val); -Master_info::Master_info() +Master_info::Master_info(bool is_slave_recovery) :Slave_reporting_capability("I/O"), ssl(0), ssl_verify_server_cert(0), fd(-1), io_thd(0), inited(0), - abort_slave(0),slave_running(0), - slave_run_id(0) + rli(is_slave_recovery), abort_slave(0), slave_running(0), + slave_run_id(0), sync_counter(0) { host[0] = 0; user[0] = 0; password[0] = 0; ssl_ca[0]= 0; ssl_capath[0]= 0; ssl_cert[0]= 0; @@ -364,11 +364,6 @@ int flush_master_info(Master_info* mi, bool flush_relay_log_cache) IO_CACHE *log_file= mi->rli.relay_log.get_log_file(); if (flush_io_cache(log_file)) DBUG_RETURN(2); - - /* Sync to disk if --sync-relay-log is set */ - if (sync_relaylog_period && - my_sync(log_file->file, MY_WME)) - DBUG_RETURN(2); } /* @@ -398,8 +393,12 @@ int flush_master_info(Master_info* mi, bool flush_relay_log_cache) (int)(mi->ssl), mi->ssl_ca, mi->ssl_capath, mi->ssl_cert, mi->ssl_cipher, mi->ssl_key, mi->ssl_verify_server_cert); err= flush_io_cache(file); - if (sync_relaylog_period && !err) + if (sync_masterinfo_period && !err && + ++(mi->sync_counter) >= sync_masterinfo_period) + { err= my_sync(mi->fd, MYF(MY_WME)); + mi->sync_counter= 0; + } DBUG_RETURN(-err); } diff --git a/sql/rpl_mi.h b/sql/rpl_mi.h index 93fb0a98198..c59dffefb7c 100644 --- a/sql/rpl_mi.h +++ b/sql/rpl_mi.h @@ -58,7 +58,7 @@ class Master_info : public Slave_reporting_capability { public: - Master_info(); + Master_info(bool is_slave_recovery); ~Master_info(); /* the variables below are needed because we can change masters on the fly */ @@ -100,6 +100,13 @@ class Master_info : public Slave_reporting_capability */ long clock_diff_with_master; + + /* + Keeps track of the number of events before fsyncing. + The option --sync-master-info determines how many + events should happen before fsyncing. + */ + uint sync_counter; }; void init_master_info_with_options(Master_info* mi); diff --git a/sql/rpl_rli.cc b/sql/rpl_rli.cc index 37c0815fb8b..3a12164a1cf 100644 --- a/sql/rpl_rli.cc +++ b/sql/rpl_rli.cc @@ -28,11 +28,11 @@ int init_intvar_from_file(int* var, IO_CACHE* f, int default_val); int init_strvar_from_file(char *var, int max_size, IO_CACHE *f, const char *default_val); - -Relay_log_info::Relay_log_info() +Relay_log_info::Relay_log_info(bool is_slave_recovery) :Slave_reporting_capability("SQL"), no_storage(FALSE), replicate_same_server_id(::replicate_same_server_id), info_fd(-1), cur_log_fd(-1), relay_log(&sync_relaylog_period), + sync_counter(0), is_relay_log_recovery(is_slave_recovery), save_temporary_tables(0), #if HAVE_purify is_fake(FALSE), @@ -259,7 +259,8 @@ Failed to open the existing relay log info file '%s' (errno %d)", rli->group_relay_log_pos= rli->event_relay_log_pos= relay_log_pos; rli->group_master_log_pos= master_log_pos; - if (init_relay_log_pos(rli, + if (!rli->is_relay_log_recovery && + init_relay_log_pos(rli, rli->group_relay_log_name, rli->group_relay_log_pos, 0 /* no data lock*/, @@ -274,6 +275,7 @@ Failed to open the existing relay log info file '%s' (errno %d)", } #ifndef DBUG_OFF + if (!rli->is_relay_log_recovery) { char llbuf1[22], llbuf2[22]; DBUG_PRINT("info", ("my_b_tell(rli->cur_log)=%s rli->event_relay_log_pos=%s", diff --git a/sql/rpl_rli.h b/sql/rpl_rli.h index 171778d9675..a5410dd0c79 100644 --- a/sql/rpl_rli.h +++ b/sql/rpl_rli.h @@ -96,6 +96,19 @@ public: LOG_INFO linfo; IO_CACHE cache_buf,*cur_log; + /* + Keeps track of the number of transactions that commits + before fsyncing. The option --sync-relay-log-info determines + how many transactions should commit before fsyncing. + */ + uint sync_counter; + + /* + Identifies when the recovery process is going on. + See sql/slave.cc:init_recovery for further details. + */ + bool is_relay_log_recovery; + /* The following variables are safe to read any time */ /* IO_CACHE of the info file - set only during init or end */ @@ -267,7 +280,7 @@ public: char slave_patternload_file[FN_REFLEN]; size_t slave_patternload_file_size; - Relay_log_info(); + Relay_log_info(bool is_slave_recovery); ~Relay_log_info(); /* diff --git a/sql/set_var.cc b/sql/set_var.cc index f2b5201cf8b..dcc3954ff1e 100644 --- a/sql/set_var.cc +++ b/sql/set_var.cc @@ -1534,19 +1534,19 @@ static bool get_unsigned(THD *thd, set_var *var, ulonglong user_max, } -bool sys_var_int_ptr::check(THD *thd, set_var *var) +bool sys_var_uint_ptr::check(THD *thd, set_var *var) { - var->save_result.ulong_value= (ulong) var->value->val_int(); + var->save_result.ulong_value= (ulong) var->value->val_uint(); return 0; } -bool sys_var_int_ptr::update(THD *thd, set_var *var) +bool sys_var_uint_ptr::update(THD *thd, set_var *var) { *value= (uint) var->save_result.ulong_value; return 0; } -void sys_var_int_ptr::set_default(THD *thd, enum_var_type type) +void sys_var_uint_ptr::set_default(THD *thd, enum_var_type type) { *value= (uint) option_limits->def_value; } diff --git a/sql/set_var.h b/sql/set_var.h index 02c87abed88..0202a15836d 100644 --- a/sql/set_var.h +++ b/sql/set_var.h @@ -178,10 +178,10 @@ public: /** Unsigned int system variable class */ -class sys_var_int_ptr :public sys_var +class sys_var_uint_ptr :public sys_var { public: - sys_var_int_ptr(sys_var_chain *chain, const char *name_arg, + sys_var_uint_ptr(sys_var_chain *chain, const char *name_arg, uint *value_ptr_arg, sys_after_update_func after_update_arg= NULL) :sys_var(name_arg, after_update_arg), diff --git a/sql/slave.cc b/sql/slave.cc index fac9ee214c5..5edb47df8b5 100644 --- a/sql/slave.cc +++ b/sql/slave.cc @@ -129,6 +129,7 @@ static bool wait_for_relay_log_space(Relay_log_info* rli); static inline bool io_slave_killed(THD* thd,Master_info* mi); static inline bool sql_slave_killed(THD* thd,Relay_log_info* rli); static int init_slave_thread(THD* thd, SLAVE_THD_TYPE thd_type); +static int init_recovery(Master_info* mi); static void print_slave_skip_errors(void); static int safe_connect(THD* thd, MYSQL* mysql, Master_info* mi); static int safe_reconnect(THD* thd, MYSQL* mysql, Master_info* mi, @@ -220,6 +221,7 @@ void unlock_slave_threads(Master_info* mi) int init_slave() { DBUG_ENTER("init_slave"); + int error= 0; /* This is called when mysqld starts. Before client connections are @@ -231,7 +233,7 @@ int init_slave() TODO: re-write this to interate through the list of files for multi-master */ - active_mi= new Master_info; + active_mi= new Master_info(relay_log_recovery); /* If --slave-skip-errors=... was not used, the string value for the @@ -250,6 +252,7 @@ int init_slave() if (!active_mi) { sql_print_error("Failed to allocate memory for the master info structure"); + error= 1; goto err; } @@ -257,6 +260,13 @@ int init_slave() !master_host, (SLAVE_IO | SLAVE_SQL))) { sql_print_error("Failed to initialize the master info structure"); + error= 1; + goto err; + } + + if (active_mi->rli.is_relay_log_recovery && init_recovery(active_mi)) + { + error= 1; goto err; } @@ -275,18 +285,89 @@ int init_slave() SLAVE_IO | SLAVE_SQL)) { sql_print_error("Failed to create slave threads"); + error= 1; goto err; } } - pthread_mutex_unlock(&LOCK_active_mi); - DBUG_RETURN(0); err: + active_mi->rli.is_relay_log_recovery= FALSE; pthread_mutex_unlock(&LOCK_active_mi); - DBUG_RETURN(1); + DBUG_RETURN(error); } - +/* + Updates the master info based on the information stored in the + relay info and ignores relay logs previously retrieved by the IO + thread, which thus starts fetching again based on to the + group_master_log_pos and group_master_log_name. Eventually, the old + relay logs will be purged by the normal purge mechanism. + + In the feature, we should improve this routine in order to avoid throwing + away logs that are safely stored in the disk. Note also that this recovery + routine relies on the correctness of the relay-log.info and only tolerates + coordinate problems in master.info. + + In this function, there is no need for a mutex as the caller + (i.e. init_slave) already has one acquired. + + Specifically, the following structures are updated: + + 1 - mi->master_log_pos <-- rli->group_master_log_pos + 2 - mi->master_log_name <-- rli->group_master_log_name + 3 - It moves the relay log to the new relay log file, by + rli->group_relay_log_pos <-- BIN_LOG_HEADER_SIZE; + rli->event_relay_log_pos <-- BIN_LOG_HEADER_SIZE; + rli->group_relay_log_name <-- rli->relay_log.get_log_fname(); + rli->event_relay_log_name <-- rli->relay_log.get_log_fname(); + + If there is an error, it returns (1), otherwise returns (0). + */ +static int init_recovery(Master_info* mi) +{ + const char *errmsg= 0; + DBUG_ENTER("init_recovery"); + + Relay_log_info *rli= &mi->rli; + if (rli->group_master_log_name[0]) + { + mi->master_log_pos= max(BIN_LOG_HEADER_SIZE, + rli->group_master_log_pos); + strmake(mi->master_log_name, rli->group_master_log_name, + sizeof(mi->master_log_name)-1); + + sql_print_warning("Recovery from master pos %ld and file %s.", + (ulong) mi->master_log_pos, mi->master_log_name); + + strmake(rli->group_relay_log_name, rli->relay_log.get_log_fname(), + sizeof(rli->group_relay_log_name)-1); + strmake(rli->event_relay_log_name, rli->relay_log.get_log_fname(), + sizeof(mi->rli.event_relay_log_name)-1); + + rli->group_relay_log_pos= rli->event_relay_log_pos= BIN_LOG_HEADER_SIZE; + + if (init_relay_log_pos(rli, + rli->group_relay_log_name, + rli->group_relay_log_pos, + 0 /*no data lock*/, + &errmsg, 0)) + DBUG_RETURN(1); + + if (flush_master_info(mi, 0)) + { + sql_print_error("Failed to flush master info file"); + DBUG_RETURN(1); + } + if (flush_relay_log_info(rli)) + { + sql_print_error("Failed to flush relay info file"); + DBUG_RETURN(1); + } + } + + DBUG_RETURN(0); +} + /** Convert slave skip errors bitmap into a printable string. */ @@ -3959,7 +4040,14 @@ bool flush_relay_log_info(Relay_log_info* rli) error=1; if (flush_io_cache(file)) error=1; - + if (sync_relayloginfo_period && + !error && + ++(rli->sync_counter) >= sync_relayloginfo_period) + { + if (my_sync(rli->info_fd, MYF(MY_WME))) + error=1; + rli->sync_counter= 0; + } /* Flushing the relay log is done by the slave I/O thread */ DBUG_RETURN(error); } @@ -4366,6 +4454,8 @@ void rotate_relay_log(Master_info* mi) DBUG_ENTER("rotate_relay_log"); Relay_log_info* rli= &mi->rli; + DBUG_EXECUTE_IF("crash_before_rotate_relaylog", abort();); + /* We don't lock rli->run_lock. This would lead to deadlocks. */ pthread_mutex_lock(&mi->run_lock); diff --git a/sql/sql_binlog.cc b/sql/sql_binlog.cc index 96e99b57e3c..531242f64d1 100644 --- a/sql/sql_binlog.cc +++ b/sql/sql_binlog.cc @@ -58,7 +58,7 @@ void mysql_client_binlog_statement(THD* thd) my_bool have_fd_event= TRUE; if (!thd->rli_fake) { - thd->rli_fake= new Relay_log_info; + thd->rli_fake= new Relay_log_info(FALSE); #ifdef HAVE_purify thd->rli_fake->is_fake= TRUE; #endif diff --git a/sql/sql_repl.cc b/sql/sql_repl.cc index 425d76c8b72..6295dbb0e79 100644 --- a/sql/sql_repl.cc +++ b/sql/sql_repl.cc @@ -1769,6 +1769,16 @@ static sys_var_const sys_relay_log_info_file(&vars, "relay_log_info_file", (uchar*) &relay_log_info_file); static sys_var_bool_ptr sys_relay_log_purge(&vars, "relay_log_purge", &relay_log_purge); +static sys_var_bool_ptr sys_relay_log_recovery(&vars, "relay_log_recovery", + &relay_log_recovery); +static sys_var_uint_ptr sys_sync_binlog_period(&vars, "sync_binlog", + &sync_binlog_period); +static sys_var_uint_ptr sys_sync_relaylog_period(&vars, "sync_relay_log", + &sync_relaylog_period); +static sys_var_uint_ptr sys_sync_relayloginfo_period(&vars, "sync_relay_log_info", + &sync_relayloginfo_period); +static sys_var_uint_ptr sys_sync_masterinfo_period(&vars, "sync_master_info", + &sync_masterinfo_period); static sys_var_const sys_relay_log_space_limit(&vars, "relay_log_space_limit", OPT_GLOBAL, SHOW_LONGLONG, @@ -1784,8 +1794,6 @@ static sys_var_const sys_slave_skip_errors(&vars, "slave_skip_errors", (uchar*) slave_skip_error_names); static sys_var_long_ptr sys_slave_trans_retries(&vars, "slave_transaction_retries", &slave_trans_retries); -static sys_var_int_ptr sys_sync_binlog_period(&vars, "sync_binlog", &sync_binlog_period); -static sys_var_int_ptr sys_sync_relaylog_period(&vars, "sync_relay_log", &sync_relaylog_period); static sys_var_slave_skip_counter sys_slave_skip_counter(&vars, "sql_slave_skip_counter"); |