From 3ab71376ceb2d5da81d3b6fb092630d0b0929d76 Mon Sep 17 00:00:00 2001 From: Alfranio Correia Date: Tue, 29 Sep 2009 15:40:52 +0100 Subject: BUG#40337 Fsyncing master and relay log to disk after every event is too slow NOTE: Backporting the patch to next-mr. The fix proposed in BUG#35542 and BUG#31665 introduces a performance issue when fsyncing the master.info, relay.info and relay-log.bin* after #th events. Although such solution has been proposed to reduce the probability of corrupted files due to a slave-crash, the performance penalty introduced by it has made the approach impractical for highly intensive workloads. In a nutshell, the option --syn-relay-log proposed in BUG#35542 and BUG#31665 simultaneously fsyncs master.info, relay-log.info and relay-log.bin* and this is the main source of performance issues. This patch introduces new options that give more control to the user on what should be fsynced and how often: 1) (--sync-master-info, integer) which syncs the master.info after #th event; 2) (--sync-relay-log, integer) which syncs the relay-log.bin* after #th events. 3) (--sync-relay-log-info, integer) which syncs the relay.info after #th transactions. To provide both performance and increased reliability, we recommend the following setup: 1) --sync-master-info = 0 eventually the operating system will fsync it; 2) --sync-relay-log = 0 eventually the operating system will fsync it; 3) --sync-relay-log-info = 1 fsyncs it after every transaction; Notice, that the previous setup does not reduce the probability of corrupted master.info and relay-log.bin*. To overcome the issue, this patch also introduces a recovery mechanism that right after restart throws away relay-log.bin* retrieved from a master and updates the master.info based on the relay.info: 4) (--relay-log-recovery, boolean) which enables a recovery mechanism that throws away relay-log.bin* after a crash. However, it can only recover the incorrect binlog file and position in master.info, if other informations (host, port password, etc) are corrupted or incorrect, then this recovery mechanism will fail to work. --- sql/rpl_rli.h | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) (limited to 'sql/rpl_rli.h') diff --git a/sql/rpl_rli.h b/sql/rpl_rli.h index 171778d9675..a5410dd0c79 100644 --- a/sql/rpl_rli.h +++ b/sql/rpl_rli.h @@ -96,6 +96,19 @@ public: LOG_INFO linfo; IO_CACHE cache_buf,*cur_log; + /* + Keeps track of the number of transactions that commits + before fsyncing. The option --sync-relay-log-info determines + how many transactions should commit before fsyncing. + */ + uint sync_counter; + + /* + Identifies when the recovery process is going on. + See sql/slave.cc:init_recovery for further details. + */ + bool is_relay_log_recovery; + /* The following variables are safe to read any time */ /* IO_CACHE of the info file - set only during init or end */ @@ -267,7 +280,7 @@ public: char slave_patternload_file[FN_REFLEN]; size_t slave_patternload_file_size; - Relay_log_info(); + Relay_log_info(bool is_slave_recovery); ~Relay_log_info(); /* -- cgit v1.2.1