diff options
author | Brandon Nesterenko <brandon.nesterenko@mariadb.com> | 2021-12-27 11:53:14 -0700 |
---|---|---|
committer | Brandon Nesterenko <brandon.nesterenko@mariadb.com> | 2022-01-04 11:21:33 -0700 |
commit | 96de6bfd5e0d08bd99a47e8a3d60b1c2900a38f8 (patch) | |
tree | 1bd04187d47ac69194d7a6024b8016b03c297337 | |
parent | 452c9a4d72c18a19136a91f4d59ee60eedd486be (diff) | |
download | mariadb-git-96de6bfd5e0d08bd99a47e8a3d60b1c2900a38f8.tar.gz |
MDEV-16091: Seconds_Behind_Master spikes to millions of secondsbb-10.2-MDEV-16091
Problem:
========
A slave’s relay log format description event is used when
calculating Seconds_Behind_Master (SBM). This forces the SBM
value to spike when processing these events, as their creation
date is set to the timestamp that the IO thread begins.
Solution:
========
When the slave generates a format description event, mark the
event as a relay log event so it does not update the
rli->last_master_timestamp variable.
Reviewed By:
============
Andrei Elkin <andrei.elkin@mariadb.com>
-rw-r--r-- | mysql-test/suite/rpl/r/rpl_seconds_behind_master_spike.result | 41 | ||||
-rw-r--r-- | mysql-test/suite/rpl/t/rpl_seconds_behind_master_spike.test | 86 | ||||
-rw-r--r-- | sql/log.cc | 1 | ||||
-rw-r--r-- | sql/slave.cc | 9 |
4 files changed, 137 insertions, 0 deletions
diff --git a/mysql-test/suite/rpl/r/rpl_seconds_behind_master_spike.result b/mysql-test/suite/rpl/r/rpl_seconds_behind_master_spike.result new file mode 100644 index 00000000000..d164ea5434f --- /dev/null +++ b/mysql-test/suite/rpl/r/rpl_seconds_behind_master_spike.result @@ -0,0 +1,41 @@ +include/master-slave.inc +[connection master] +connection slave; +include/stop_slave.inc +SET @save_dbug= @@GLOBAL.debug_dbug; +SET @@global.debug_dbug="+d,pause_sql_thread_on_fde"; +include/start_slave.inc +# Future events must be logged at least 2 seconds after +# the slave starts +connection master; +# Write events to ensure slave will be consistent with master +create table t1 (a int); +insert into t1 values (1); +# Flush logs on master forces slave to generate a Format description +# event in its relay log +flush logs; +connection slave; +# Ignore FDEs that happen before the CREATE/INSERT commands +SET DEBUG_SYNC='now WAIT_FOR paused_on_fde'; +SET DEBUG_SYNC='now SIGNAL sql_thread_continue'; +SET DEBUG_SYNC='now WAIT_FOR paused_on_fde'; +SET DEBUG_SYNC='now SIGNAL sql_thread_continue'; +# On the next FDE, the slave should have the master CREATE/INSERT events +SET DEBUG_SYNC='now WAIT_FOR paused_on_fde'; +select count(*)=1 from t1; +count(*)=1 +1 +# The relay log FDE has been processed - here we check to ensure it was +# not considered in Seconds_Behind_Master calculation +connection slave1; +# Safely resume slave SQL thread +SET @@global.debug_dbug=''; +SET DEBUG_SYNC='pause_sql_thread_on_fde CLEAR'; +SET DEBUG_SYNC='now SIGNAL sql_thread_continue'; +SET DEBUG_SYNC='RESET'; +connection master; +DROP TABLE t1; +connection slave; +connection slave; +SET @@global.debug_dbug=$save_dbug; +include/rpl_end.inc diff --git a/mysql-test/suite/rpl/t/rpl_seconds_behind_master_spike.test b/mysql-test/suite/rpl/t/rpl_seconds_behind_master_spike.test new file mode 100644 index 00000000000..36ddbd0dc9f --- /dev/null +++ b/mysql-test/suite/rpl/t/rpl_seconds_behind_master_spike.test @@ -0,0 +1,86 @@ +# +# Purpose: +# This test validates that a slave's relay log format description event is +# not used to calculate the Seconds_Behind_Master time displayed by +# SHOW SLAVE STATUS. +# +# Methodology: +# Ensure that a slave's reported Seconds_Behind_Master does not point before +# a time in which we can prove that it has progressed beyond. The slave's +# relay log events are created using the timestamp at which the IO thread was +# created. Therefore, after starting the slave's IO thread, we sleep so any +# proceeding events are forced to have later timestamps. After sleeping, we run +# MDL statements on the master and save the time at which they are binlogged. +# Once the slave executes these MDL commands, we have proven that the slave has +# caught up to this saved timestamp. At this point, if the value of +# Seconds_Behind_Master points before the time in which the MDL events were +# logged, it is invalid. +# +# References: +# MDEV-16091: Seconds_Behind_Master spikes to millions of seconds +# +--source include/have_debug.inc +--source include/have_innodb.inc +--source include/master-slave.inc + +--connection slave +--source include/stop_slave.inc +SET @save_dbug= @@GLOBAL.debug_dbug; +SET @@global.debug_dbug="+d,pause_sql_thread_on_fde"; +--source include/start_slave.inc + +--let $sleep_time=2 +--echo # Future events must be logged at least $sleep_time seconds after +--echo # the slave starts +--sleep $sleep_time + +--connection master +--echo # Write events to ensure slave will be consistent with master +create table t1 (a int); +insert into t1 values (1); +--let $t_master_events_logged= `SELECT UNIX_TIMESTAMP()` + +--echo # Flush logs on master forces slave to generate a Format description +--echo # event in its relay log +flush logs; + +--connection slave +--echo # Ignore FDEs that happen before the CREATE/INSERT commands +SET DEBUG_SYNC='now WAIT_FOR paused_on_fde'; +SET DEBUG_SYNC='now SIGNAL sql_thread_continue'; +SET DEBUG_SYNC='now WAIT_FOR paused_on_fde'; +SET DEBUG_SYNC='now SIGNAL sql_thread_continue'; + +--echo # On the next FDE, the slave should have the master CREATE/INSERT events +SET DEBUG_SYNC='now WAIT_FOR paused_on_fde'; +select count(*)=1 from t1; + +--echo # The relay log FDE has been processed - here we check to ensure it was +--echo # not considered in Seconds_Behind_Master calculation +--connection slave1 +let $sbm= query_get_value(SHOW SLAVE STATUS, Seconds_Behind_Master, 1); +--let $t_now= `SELECT UNIX_TIMESTAMP()` + +if(`select $sbm > $t_now - $t_master_events_logged`) +{ + die "A relay log event was incorrectly used to set Seconds_Behind_Master"; +} + +--echo # Safely resume slave SQL thread +SET @@global.debug_dbug=''; +SET DEBUG_SYNC='pause_sql_thread_on_fde CLEAR'; +SET DEBUG_SYNC='now SIGNAL sql_thread_continue'; + +# Reset last sql_thread_continue signal +SET DEBUG_SYNC='RESET'; + +# Cleanup +--connection master +DROP TABLE t1; +--save_master_pos +--sync_slave_with_master + +--connection slave +SET @@global.debug_dbug=$save_dbug; + +--source include/rpl_end.inc diff --git a/sql/log.cc b/sql/log.cc index 895eeec454b..d94f87b90b5 100644 --- a/sql/log.cc +++ b/sql/log.cc @@ -3463,6 +3463,7 @@ bool MYSQL_BIN_LOG::open(const char *log_name, opt_slave_sql_verify_checksum ? (enum_binlog_checksum_alg) binlog_checksum_options : BINLOG_CHECKSUM_ALG_OFF; s.checksum_alg= relay_log_checksum_alg; + s.set_relay_log_event(); } else s.checksum_alg= (enum_binlog_checksum_alg)binlog_checksum_options; diff --git a/sql/slave.cc b/sql/slave.cc index 761fdbe807a..2ff1a0490e9 100644 --- a/sql/slave.cc +++ b/sql/slave.cc @@ -4033,6 +4033,15 @@ static int exec_relay_log_event(THD* thd, Relay_log_info* rli, #endif /* WITH_WSREP */ thread_safe_increment64(&rli->executed_entries); + DBUG_EXECUTE_IF( + "pause_sql_thread_on_fde", + if (ev && typ == FORMAT_DESCRIPTION_EVENT) { + DBUG_ASSERT(!debug_sync_set_action( + thd, + STRING_WITH_LEN( + "now SIGNAL paused_on_fde WAIT_FOR sql_thread_continue"))); + }); + DBUG_RETURN(exec_res); } mysql_mutex_unlock(&rli->data_lock); |