diff options
author | Sergei Golubchik <serg@mariadb.org> | 2015-04-30 19:48:11 +0200 |
---|---|---|
committer | Sergei Golubchik <serg@mariadb.org> | 2015-05-03 11:21:55 +0200 |
commit | f875c9f2a090f98ba9d9e881165b4cf71cd6a8a2 (patch) | |
tree | 6b773c01b2591f368898f293bdfd77d15653910e | |
parent | e6d918cacba8c5f3b002c4eb0244f44c3c941818 (diff) | |
download | mariadb-git-f875c9f2a090f98ba9d9e881165b4cf71cd6a8a2.tar.gz |
MDEV-5114 seconds_behind_master flips to 0 & spikes back, when running show slaves status
1. After a period of wait (where last_master_timestamp=0)
do NOT restore the last_master_timestamp to the timestamp
of the last executed event (which would mean we've just
executed it, and we're that much behind the master).
2. Update last_master_timestamp before executing the event,
not after.
Take the approach from the this commit (but with a different test
case that actually makes sense):
commit 0c75ab453fb8c5439576af8fe5add7a1b89f1569
Author: Luis Soares <luis.soares@sun.com>
Date: Thu Apr 15 17:39:31 2010 +0100
BUG#52166: Seconds_Behind_Master spikes after long idle period
-rw-r--r-- | mysql-test/suite/manual/r/rpl_replication_delay.result | 25 | ||||
-rw-r--r-- | mysql-test/suite/manual/t/rpl_replication_delay-slave.opt | 1 | ||||
-rw-r--r-- | mysql-test/suite/manual/t/rpl_replication_delay.test | 63 | ||||
-rw-r--r-- | mysql-test/suite/rpl/r/sec_behind_master-5114.result | 10 | ||||
-rw-r--r-- | mysql-test/suite/rpl/t/sec_behind_master-5114.test | 27 | ||||
-rw-r--r-- | sql/log_event.cc | 27 | ||||
-rw-r--r-- | sql/log_event_old.cc | 2 | ||||
-rw-r--r-- | sql/rpl_mi.cc | 6 | ||||
-rw-r--r-- | sql/rpl_rli.cc | 22 | ||||
-rw-r--r-- | sql/rpl_rli.h | 9 | ||||
-rw-r--r-- | sql/slave.cc | 37 |
11 files changed, 68 insertions, 161 deletions
diff --git a/mysql-test/suite/manual/r/rpl_replication_delay.result b/mysql-test/suite/manual/r/rpl_replication_delay.result deleted file mode 100644 index 73dd77cc647..00000000000 --- a/mysql-test/suite/manual/r/rpl_replication_delay.result +++ /dev/null @@ -1,25 +0,0 @@ -stop slave; -drop table if exists t1,t2,t3,t4,t5,t6,t7,t8,t9; -reset master; -reset slave; -drop table if exists t1,t2,t3,t4,t5,t6,t7,t8,t9; -start slave; -# Second_behind reports 0 -Seconds_Behind_Master 0 -drop table if exists t1; -Warnings: -Note 1051 Unknown table 't1' -create table t1 (f1 int); -flush logs /* contaminate rli->last_master_timestamp */; -lock table t1 write; -insert into t1 values (1); -# bug emulated: reports slave threads starting time about 3*3 not 3 secs -Seconds_Behind_Master 9 -unlock tables; -flush logs /* this time rli->last_master_timestamp is not affected */; -lock table t1 write; -insert into t1 values (2); -# reports the correct diff with master query time about 3+3 secs -Seconds_Behind_Master 7 -unlock tables; -drop table t1; diff --git a/mysql-test/suite/manual/t/rpl_replication_delay-slave.opt b/mysql-test/suite/manual/t/rpl_replication_delay-slave.opt deleted file mode 100644 index 24a4c5952fe..00000000000 --- a/mysql-test/suite/manual/t/rpl_replication_delay-slave.opt +++ /dev/null @@ -1 +0,0 @@ ---loose-debug=d,let_first_flush_log_change_timestamp diff --git a/mysql-test/suite/manual/t/rpl_replication_delay.test b/mysql-test/suite/manual/t/rpl_replication_delay.test deleted file mode 100644 index f2b2da6f53c..00000000000 --- a/mysql-test/suite/manual/t/rpl_replication_delay.test +++ /dev/null @@ -1,63 +0,0 @@ -# -# Testing replication delay reporting (bug#29309) -# there is an unavoidable non-determinism in the test -# please compare the results with the comments -# - - -source include/master-slave.inc; - -connection slave; - ---echo # Second_behind reports 0 -let $status_items= Seconds_Behind_Master; -source include/show_slave_status.inc; -sleep 3; - -### bug emulation - -connection master; -drop table if exists t1; -create table t1 (f1 int); -sleep 3; - -#connection slave; -sync_slave_with_master; -flush logs /* contaminate rli->last_master_timestamp */; - -connection slave; -lock table t1 write; - -connection master; -insert into t1 values (1); - -sleep 3; - -connection slave; ---echo # bug emulated: reports slave threads starting time about 3*3 not 3 secs -source include/show_slave_status.inc; -unlock tables; - -connection master; -sync_slave_with_master; - -### bugfix - - -connection slave; -flush logs /* this time rli->last_master_timestamp is not affected */; -lock table t1 write; - -connection master; -insert into t1 values (2); -sleep 3; - -connection slave; ---echo # reports the correct diff with master query time about 3+3 secs -source include/show_slave_status.inc; -unlock tables; - -connection master; -drop table t1; - ---source include/rpl_end.inc diff --git a/mysql-test/suite/rpl/r/sec_behind_master-5114.result b/mysql-test/suite/rpl/r/sec_behind_master-5114.result new file mode 100644 index 00000000000..5554b17347d --- /dev/null +++ b/mysql-test/suite/rpl/r/sec_behind_master-5114.result @@ -0,0 +1,10 @@ +include/master-slave.inc +[connection master] +call mtr.add_suppression("Unsafe statement written to the binary log"); +CREATE TABLE t1 (a int); +INSERT INTO t1 VALUES(SLEEP(2)); +Seconds_Behind_Master: 1 +Warnings: +Note 1592 Unsafe statement written to the binary log using statement format since BINLOG_FORMAT = STATEMENT. Statement is unsafe because it uses a system function that may return a different value on the slave. +drop table t1; +include/rpl_end.inc diff --git a/mysql-test/suite/rpl/t/sec_behind_master-5114.test b/mysql-test/suite/rpl/t/sec_behind_master-5114.test new file mode 100644 index 00000000000..8c70da4f0a8 --- /dev/null +++ b/mysql-test/suite/rpl/t/sec_behind_master-5114.test @@ -0,0 +1,27 @@ +source include/master-slave.inc; +source include/have_binlog_format_statement.inc; + +call mtr.add_suppression("Unsafe statement written to the binary log"); + +CREATE TABLE t1 (a int); +send INSERT INTO t1 VALUES(SLEEP(2)); + +connection slave; +let $run = 10; +while ($run) +{ + dec $run; + let $sbm=query_get_value(SHOW SLAVE STATUS, Seconds_Behind_Master, 1); + # for debugging uncomment echo and remove the if() + #echo Seconds_Behind_Master: $sbm; + if ($sbm) + { + let $run = 0; + } + sleep 0.5; +} +echo Seconds_Behind_Master: $sbm; +connection master; +reap; +drop table t1; +source include/rpl_end.inc; diff --git a/sql/log_event.cc b/sql/log_event.cc index b8fd02e96d4..5c5f5cdb9f1 100644 --- a/sql/log_event.cc +++ b/sql/log_event.cc @@ -337,10 +337,6 @@ private: flag_set m_flags; }; -#ifndef DBUG_OFF -uint debug_not_change_ts_if_art_event= 1; // bug#29309 simulation -#endif - /* pretty_print_str() */ @@ -879,7 +875,7 @@ Log_event::Log_event() Log_event::Log_event(const char* buf, const Format_description_log_event* description_event) - :temp_buf(0), cache_type(Log_event::EVENT_INVALID_CACHE), + :temp_buf(0), exec_time(0), cache_type(Log_event::EVENT_INVALID_CACHE), crc(0), checksum_alg(BINLOG_CHECKSUM_ALG_UNDEF) { #ifndef MYSQL_CLIENT @@ -970,29 +966,12 @@ int Log_event::do_update_pos(rpl_group_info *rgi) if (rli) { /* - bug#29309 simulation: resetting the flag to force - wrong behaviour of artificial event to update - rli->last_master_timestamp for only one time - - the first FLUSH LOGS in the test. - */ - DBUG_EXECUTE_IF("let_first_flush_log_change_timestamp", - if (debug_not_change_ts_if_art_event == 1 - && is_artificial_event()) - debug_not_change_ts_if_art_event= 0; ); - /* In parallel execution, delay position update for the events that are not part of event groups (format description, rotate, and such) until the actual event execution reaches that point. */ if (!rgi->is_parallel_exec || is_group_event(get_type_code())) - rli->stmt_done(log_pos, - (is_artificial_event() && - IF_DBUG(debug_not_change_ts_if_art_event > 0, 1) ? - 0 : when), - thd, rgi); - DBUG_EXECUTE_IF("let_first_flush_log_change_timestamp", - if (debug_not_change_ts_if_art_event == 0) - debug_not_change_ts_if_art_event= 2; ); + rli->stmt_done(log_pos, thd, rgi); } DBUG_RETURN(0); // Cannot fail currently } @@ -10107,7 +10086,7 @@ Rows_log_event::do_update_pos(rpl_group_info *rgi) Step the group log position if we are not in a transaction, otherwise increase the event log position. */ - rli->stmt_done(log_pos, when, thd, rgi); + rli->stmt_done(log_pos, thd, rgi); /* Clear any errors in thd->net.last_err*. It is not known if this is needed or not. It is believed that any errors that may exist in diff --git a/sql/log_event_old.cc b/sql/log_event_old.cc index 181d950bf59..e6c05aeb849 100644 --- a/sql/log_event_old.cc +++ b/sql/log_event_old.cc @@ -1830,7 +1830,7 @@ Old_rows_log_event::do_update_pos(rpl_group_info *rgi) Step the group log position if we are not in a transaction, otherwise increase the event log position. */ - rli->stmt_done(log_pos, when, thd, rgi); + rli->stmt_done(log_pos, thd, rgi); /* Clear any errors in thd->net.last_err*. It is not known if this is needed or not. It is believed that any errors that may exist in diff --git a/sql/rpl_mi.cc b/sql/rpl_mi.cc index 50cd44df824..ddc502210ce 100644 --- a/sql/rpl_mi.cc +++ b/sql/rpl_mi.cc @@ -36,9 +36,9 @@ Master_info::Master_info(LEX_STRING *connection_name_arg, rli(is_slave_recovery), port(MYSQL_PORT), checksum_alg_before_fd(BINLOG_CHECKSUM_ALG_UNDEF), connect_retry(DEFAULT_CONNECT_RETRY), inited(0), abort_slave(0), - slave_running(0), slave_run_id(0), sync_counter(0), - heartbeat_period(0), received_heartbeats(0), master_id(0), - prev_master_id(0), + slave_running(0), slave_run_id(0), clock_diff_with_master(0), + sync_counter(0), heartbeat_period(0), received_heartbeats(0), + master_id(0), prev_master_id(0), using_gtid(USE_GTID_NO), events_queued_since_last_gtid(0), gtid_reconnect_event_skip_count(0), gtid_event_seen(false) { diff --git a/sql/rpl_rli.cc b/sql/rpl_rli.cc index a324c3c30da..9bd0ca55b01 100644 --- a/sql/rpl_rli.cc +++ b/sql/rpl_rli.cc @@ -1290,13 +1290,9 @@ bool Relay_log_info::is_until_satisfied(THD *thd, Log_event *ev) } -void Relay_log_info::stmt_done(my_off_t event_master_log_pos, - time_t event_creation_time, THD *thd, +void Relay_log_info::stmt_done(my_off_t event_master_log_pos, THD *thd, rpl_group_info *rgi) { -#ifndef DBUG_OFF - extern uint debug_not_change_ts_if_art_event; -#endif DBUG_ENTER("Relay_log_info::stmt_done"); DBUG_ASSERT(rgi->rli == this); @@ -1350,22 +1346,6 @@ void Relay_log_info::stmt_done(my_off_t event_master_log_pos, if (mi->using_gtid == Master_info::USE_GTID_NO) flush_relay_log_info(this); DBUG_EXECUTE_IF("inject_crash_after_flush_rli", DBUG_SUICIDE();); - /* - Note that Rotate_log_event::do_apply_event() does not call this - function, so there is no chance that a fake rotate event resets - last_master_timestamp. Note that we update without mutex - (probably ok - except in some very rare cases, only consequence - is that value may take some time to display in - Seconds_Behind_Master - not critical). - - In parallel replication, we take care to not set last_master_timestamp - backwards, in case of out-of-order calls here. - */ - if (!(event_creation_time == 0 && - IF_DBUG(debug_not_change_ts_if_art_event > 0, 1)) && - !(rgi->is_parallel_exec && event_creation_time <= last_master_timestamp) - ) - last_master_timestamp= event_creation_time; } DBUG_VOID_RETURN; } diff --git a/sql/rpl_rli.h b/sql/rpl_rli.h index 84a4aa93cd3..2d92f384ef3 100644 --- a/sql/rpl_rli.h +++ b/sql/rpl_rli.h @@ -415,15 +415,8 @@ public: Master log position of the event. The position is recorded in the relay log info and used to produce information for <code>SHOW SLAVE STATUS</code>. - - @param event_creation_time - Timestamp for the creation of the event on the master side. The - time stamp is recorded in the relay log info and used to compute - the <code>Seconds_behind_master</code> field. */ - void stmt_done(my_off_t event_log_pos, - time_t event_creation_time, THD *thd, - rpl_group_info *rgi); + void stmt_done(my_off_t event_log_pos, THD *thd, rpl_group_info *rgi); int alloc_inuse_relaylog(const char *name); void free_inuse_relaylog(inuse_relaylog *ir); void reset_inuse_relaylog(); diff --git a/sql/slave.cc b/sql/slave.cc index f0ef3787c65..896270acf36 100644 --- a/sql/slave.cc +++ b/sql/slave.cc @@ -148,26 +148,18 @@ static int process_io_create_file(Master_info* mi, Create_file_log_event* cev); static bool wait_for_relay_log_space(Relay_log_info* rli); static bool io_slave_killed(Master_info* mi); static bool sql_slave_killed(rpl_group_info *rgi); -static int init_slave_thread(THD* thd, Master_info *mi, - SLAVE_THD_TYPE thd_type); +static int init_slave_thread(THD*, Master_info *, SLAVE_THD_TYPE); static void print_slave_skip_errors(void); static int safe_connect(THD* thd, MYSQL* mysql, Master_info* mi); -static int safe_reconnect(THD* thd, MYSQL* mysql, Master_info* mi, - bool suppress_warnings); -static int connect_to_master(THD* thd, MYSQL* mysql, Master_info* mi, - bool reconnect, bool suppress_warnings); +static int safe_reconnect(THD*, MYSQL*, Master_info*, bool); +static int connect_to_master(THD*, MYSQL*, Master_info*, bool, bool); static Log_event* next_event(rpl_group_info* rgi, ulonglong *event_size); static int queue_event(Master_info* mi,const char* buf,ulong event_len); -static int terminate_slave_thread(THD *thd, - mysql_mutex_t *term_lock, - mysql_cond_t *term_cond, - volatile uint *slave_running, - bool skip_lock); +static int terminate_slave_thread(THD *, mysql_mutex_t *, mysql_cond_t *, + volatile uint *, bool); static bool check_io_slave_killed(Master_info *mi, const char *info); -static bool send_show_master_info_header(THD *thd, bool full, - size_t gtid_pos_length); -static bool send_show_master_info_data(THD *thd, Master_info *mi, bool full, - String *gtid_pos); +static bool send_show_master_info_header(THD *, bool, size_t); +static bool send_show_master_info_data(THD *, Master_info *, bool, String *); /* Function to set the slave's max_allowed_packet based on the value of slave_max_allowed_packet. @@ -3502,6 +3494,21 @@ static int exec_relay_log_event(THD* thd, Relay_log_info* rli, Log_event_type typ= ev->get_type_code(); /* + Even if we don't execute this event, we keep the master timestamp, + so that seconds behind master shows correct delta (there are events + that are not replayed, so we keep falling behind). + + If it is an artificial event, or a relay log event (IO thread generated + event) or ev->when is set to 0, we don't update the + last_master_timestamp. + */ + if (!(ev->is_artificial_event() || ev->is_relay_log_event() || (ev->when == 0))) + { + rli->last_master_timestamp= ev->when + (time_t) ev->exec_time; + DBUG_ASSERT(rli->last_master_timestamp >= 0); + } + + /* This tests if the position of the beginning of the current event hits the UNTIL barrier. */ |