diff options
author | Brandon Nesterenko <brandon.nesterenko@mariadb.com> | 2022-11-03 14:56:50 -0600 |
---|---|---|
committer | Brandon Nesterenko <brandon.nesterenko@mariadb.com> | 2023-01-24 08:11:35 -0700 |
commit | d69e835787f9ce9848cb6c2a5343887dd0eec2ce (patch) | |
tree | f24fc11faf81358e76cf85f9611829a41a459309 | |
parent | 2ed598eae8ebe737cdfa2aa2e7e6e0f3ad8d6ef7 (diff) | |
download | mariadb-git-d69e835787f9ce9848cb6c2a5343887dd0eec2ce.tar.gz |
MDEV-29639: Seconds_Behind_Master is incorrect for Delayed, Parallel Replicas
Problem
========
On a parallel, delayed replica, Seconds_Behind_Master will not be
calculated until after MASTER_DELAY seconds have passed and the
event has finished executing, resulting in potentially very large
values of Seconds_Behind_Master (which could be much larger than the
MASTER_DELAY parameter) for the entire duration the event is
delayed. This contradicts the documented MASTER_DELAY behavior,
which specifies how many seconds to withhold replicated events from
execution.
Solution
========
After a parallel replica idles, the first event after idling should
immediately update last_master_timestamp with the time that it began
execution on the primary.
Reviewed By
===========
Andrei Elkin <andrei.elkin@mariadb.com>
-rw-r--r-- | mysql-test/suite/rpl/r/rpl_delayed_parallel_slave_sbm.result | 60 | ||||
-rw-r--r-- | mysql-test/suite/rpl/t/rpl_delayed_parallel_slave_sbm-slave.opt | 1 | ||||
-rw-r--r-- | mysql-test/suite/rpl/t/rpl_delayed_parallel_slave_sbm.test | 133 | ||||
-rw-r--r-- | sql/rpl_parallel.cc | 3 | ||||
-rw-r--r-- | sql/slave.cc | 31 | ||||
-rw-r--r-- | sql/slave.h | 12 |
6 files changed, 231 insertions, 9 deletions
diff --git a/mysql-test/suite/rpl/r/rpl_delayed_parallel_slave_sbm.result b/mysql-test/suite/rpl/r/rpl_delayed_parallel_slave_sbm.result new file mode 100644 index 00000000000..f783b1e0783 --- /dev/null +++ b/mysql-test/suite/rpl/r/rpl_delayed_parallel_slave_sbm.result @@ -0,0 +1,60 @@ +include/master-slave.inc +[connection master] +connection slave; +include/stop_slave.inc +change master to master_delay=3, master_use_gtid=Slave_Pos; +set @@GLOBAL.slave_parallel_threads=2; +include/start_slave.inc +connection master; +create table t1 (a int); +include/sync_slave_sql_with_master.inc +# +# Pt 1) Ensure SBM is updated immediately upon arrival of the next event +# Lock t1 on slave so the first received transaction does not complete/commit +connection slave; +LOCK TABLES t1 WRITE; +connection master; +# Sleep 2 to allow a buffer between events for SBM check +insert into t1 values (0); +include/save_master_gtid.inc +connection slave; +# Waiting for transaction to arrive on slave and begin SQL Delay.. +# Validating SBM is updated on event arrival.. +# ..done +connection slave; +UNLOCK TABLES; +include/sync_with_master_gtid.inc +# +# Pt 2) If the SQL thread has not entered an idle state, ensure +# following events do not update SBM +# Stop slave IO thread so it receives both events together on restart +connection slave; +include/stop_slave_io.inc +connection master; +# Sleep 2 to allow a buffer between events for SBM check +insert into t1 values (1); +# Sleep 3 to create gap between events +insert into t1 values (2); +connection slave; +LOCK TABLES t1 WRITE; +START SLAVE IO_THREAD; +# Wait for first transaction to complete SQL delay and begin execution.. +# Validate SBM calculation doesn't use the second transaction because SQL thread shouldn't have gone idle.. +# ..and that SBM wasn't calculated using prior committed transactions +# ..done +connection slave; +UNLOCK TABLES; +# +# Cleanup +# Reset master_delay +include/stop_slave.inc +CHANGE MASTER TO master_delay=0; +set @@GLOBAL.slave_parallel_threads=4; +include/start_slave.inc +connection master; +DROP TABLE t1; +include/save_master_gtid.inc +connection slave; +include/sync_with_master_gtid.inc +include/rpl_end.inc +# End of rpl_delayed_parallel_slave_sbm.test diff --git a/mysql-test/suite/rpl/t/rpl_delayed_parallel_slave_sbm-slave.opt b/mysql-test/suite/rpl/t/rpl_delayed_parallel_slave_sbm-slave.opt new file mode 100644 index 00000000000..9eea6a54b68 --- /dev/null +++ b/mysql-test/suite/rpl/t/rpl_delayed_parallel_slave_sbm-slave.opt @@ -0,0 +1 @@ +--slave-parallel-threads=4 diff --git a/mysql-test/suite/rpl/t/rpl_delayed_parallel_slave_sbm.test b/mysql-test/suite/rpl/t/rpl_delayed_parallel_slave_sbm.test new file mode 100644 index 00000000000..4bcb7ad9e3f --- /dev/null +++ b/mysql-test/suite/rpl/t/rpl_delayed_parallel_slave_sbm.test @@ -0,0 +1,133 @@ +# +# This test ensures that after a delayed parallel slave has idled, i.e. +# executed everything in its relay log, the next event group that the SQL +# thread reads from the relay log will immediately be used in the +# Seconds_Behind_Master. In particular, it ensures that the calculation for +# Seconds_Behind_Master is based on the timestamp of the new transaction, +# rather than the last committed transaction. +# +# References: +# MDEV-29639: Seconds_Behind_Master is incorrect for Delayed, Parallel +# Replicas +# + +--source include/master-slave.inc + +--connection slave +--source include/stop_slave.inc +--let $master_delay= 3 +--eval change master to master_delay=$master_delay, master_use_gtid=Slave_Pos +--let $old_slave_threads= `SELECT @@GLOBAL.slave_parallel_threads` +set @@GLOBAL.slave_parallel_threads=2; +--source include/start_slave.inc + +--connection master +create table t1 (a int); +--source include/sync_slave_sql_with_master.inc + +--echo # +--echo # Pt 1) Ensure SBM is updated immediately upon arrival of the next event + +--echo # Lock t1 on slave so the first received transaction does not complete/commit +--connection slave +LOCK TABLES t1 WRITE; + +--connection master +--echo # Sleep 2 to allow a buffer between events for SBM check +sleep 2; + +--let $ts_trx_before_ins= `SELECT UNIX_TIMESTAMP()` +--let insert_ctr= 0 +--eval insert into t1 values ($insert_ctr) +--inc $insert_ctr +--source include/save_master_gtid.inc + +--connection slave + +--echo # Waiting for transaction to arrive on slave and begin SQL Delay.. +--let $wait_condition= SELECT count(*) FROM information_schema.processlist WHERE state LIKE 'Waiting until MASTER_DELAY seconds after master executed event'; +--source include/wait_condition.inc + +--echo # Validating SBM is updated on event arrival.. +--let $sbm_trx1_arrive= query_get_value(SHOW SLAVE STATUS, Seconds_Behind_Master, 1) +--let $seconds_since_idling= `SELECT UNIX_TIMESTAMP() - $ts_trx_before_ins` +if (`SELECT $sbm_trx1_arrive > ($seconds_since_idling + 1)`) +{ + --echo # SBM was $sbm_trx1_arrive yet shouldn't have been larger than $seconds_since_idling + 1 (for possible negative clock_diff_with_master) + --die Seconds_Behind_Master should reset after idling +} +--echo # ..done + +--connection slave +UNLOCK TABLES; +--source include/sync_with_master_gtid.inc + +--echo # +--echo # Pt 2) If the SQL thread has not entered an idle state, ensure +--echo # following events do not update SBM + +--echo # Stop slave IO thread so it receives both events together on restart +--connection slave +--source include/stop_slave_io.inc + +--connection master + +--echo # Sleep 2 to allow a buffer between events for SBM check +sleep 2; +--let $ts_trxpt2_before_ins= `SELECT UNIX_TIMESTAMP()` +--eval insert into t1 values ($insert_ctr) +--inc $insert_ctr +--echo # Sleep 3 to create gap between events +sleep 3; +--eval insert into t1 values ($insert_ctr) +--inc $insert_ctr +--let $ts_trx_after_ins= `SELECT UNIX_TIMESTAMP()` + +--connection slave +LOCK TABLES t1 WRITE; + +START SLAVE IO_THREAD; + +--echo # Wait for first transaction to complete SQL delay and begin execution.. +--let $wait_condition= SELECT count(*) FROM information_schema.processlist WHERE state LIKE 'Waiting for table metadata lock%' AND command LIKE 'Slave_Worker'; +--source include/wait_condition.inc + +--echo # Validate SBM calculation doesn't use the second transaction because SQL thread shouldn't have gone idle.. +--let $sbm_after_trx_no_idle= query_get_value(SHOW SLAVE STATUS, Seconds_Behind_Master, 1) +--let $timestamp_trxpt2_arrive= `SELECT UNIX_TIMESTAMP()` +if (`SELECT $sbm_after_trx_no_idle < $timestamp_trxpt2_arrive - $ts_trx_after_ins`) +{ + --let $cmpv= `SELECT $timestamp_trxpt2_arrive - $ts_trx_after_ins` + --echo # SBM $sbm_after_trx_no_idle was more recent than time since last transaction ($cmpv seconds) + --die Seconds_Behind_Master should not have used second transaction timestamp +} +--let $seconds_since_idling= `SELECT ($timestamp_trxpt2_arrive - $ts_trxpt2_before_ins)` +--echo # ..and that SBM wasn't calculated using prior committed transactions +if (`SELECT $sbm_after_trx_no_idle > ($seconds_since_idling + 1)`) +{ + --echo # SBM was $sbm_after_trx_no_idle yet shouldn't have been larger than $seconds_since_idling + 1 (for possible negative clock_diff_with_master) + --die Seconds_Behind_Master calculation should not have used prior committed transaction +} +--echo # ..done + +--connection slave +UNLOCK TABLES; + +--echo # +--echo # Cleanup + +--echo # Reset master_delay +--source include/stop_slave.inc +--eval CHANGE MASTER TO master_delay=0 +--eval set @@GLOBAL.slave_parallel_threads=$old_slave_threads +--source include/start_slave.inc + +--connection master +DROP TABLE t1; +--source include/save_master_gtid.inc + +--connection slave +--source include/sync_with_master_gtid.inc + +--source include/rpl_end.inc +--echo # End of rpl_delayed_parallel_slave_sbm.test diff --git a/sql/rpl_parallel.cc b/sql/rpl_parallel.cc index 3eb25219ed3..dbe77c54230 100644 --- a/sql/rpl_parallel.cc +++ b/sql/rpl_parallel.cc @@ -45,8 +45,7 @@ rpt_handle_event(rpl_parallel_thread::queued_event *qev, rgi->event_relay_log_pos= qev->event_relay_log_pos; rgi->future_event_relay_log_pos= qev->future_event_relay_log_pos; strcpy(rgi->future_event_master_log_name, qev->future_event_master_log_name); - if (!(ev->is_artificial_event() || ev->is_relay_log_event() || - (ev->when == 0))) + if (event_can_update_last_master_timestamp(ev)) rgi->last_master_timestamp= ev->when + (time_t)ev->exec_time; err= apply_event_and_update_pos_for_parallel(ev, thd, rgi); diff --git a/sql/slave.cc b/sql/slave.cc index 41e0e3c86eb..b64b9a64979 100644 --- a/sql/slave.cc +++ b/sql/slave.cc @@ -4125,10 +4125,10 @@ static int exec_relay_log_event(THD* thd, Relay_log_info* rli, the user might be surprised to see a claim that the slave is up to date long before those queued events are actually executed. */ - if (!rli->mi->using_parallel() && - !(ev->is_artificial_event() || ev->is_relay_log_event() || (ev->when == 0))) + if ((!rli->mi->using_parallel()) && event_can_update_last_master_timestamp(ev)) { rli->last_master_timestamp= ev->when + (time_t) ev->exec_time; + rli->sql_thread_caught_up= false; DBUG_ASSERT(rli->last_master_timestamp >= 0); } @@ -4177,6 +4177,17 @@ static int exec_relay_log_event(THD* thd, Relay_log_info* rli, if (rli->mi->using_parallel()) { + if (unlikely((rli->last_master_timestamp == 0 || + rli->sql_thread_caught_up) && + event_can_update_last_master_timestamp(ev))) + { + if (rli->last_master_timestamp < ev->when) + { + rli->last_master_timestamp= ev->when; + rli->sql_thread_caught_up= false; + } + } + int res= rli->parallel.do_event(serial_rgi, ev, event_size); /* In parallel replication, we need to update the relay log position @@ -4192,7 +4203,7 @@ static int exec_relay_log_event(THD* thd, Relay_log_info* rli, This is the case for pre-10.0 events without GTID, and for handling slave_skip_counter. */ - if (!(ev->is_artificial_event() || ev->is_relay_log_event() || (ev->when == 0))) + if (event_can_update_last_master_timestamp(ev)) { /* Ignore FD's timestamp as it does not reflect the slave execution @@ -4200,7 +4211,8 @@ static int exec_relay_log_event(THD* thd, Relay_log_info* rli, data modification event execution last long all this time Seconds_Behind_Master is zero. */ - if (ev->get_type_code() != FORMAT_DESCRIPTION_EVENT) + if (ev->get_type_code() != FORMAT_DESCRIPTION_EVENT && + rli->last_master_timestamp < ev->when) rli->last_master_timestamp= ev->when + (time_t) ev->exec_time; DBUG_ASSERT(rli->last_master_timestamp >= 0); @@ -7544,7 +7556,6 @@ static Log_event* next_event(rpl_group_info *rgi, ulonglong *event_size) if (hot_log) mysql_mutex_unlock(log_lock); - rli->sql_thread_caught_up= false; DBUG_RETURN(ev); } if (opt_reckless_slave) // For mysql-test @@ -7711,7 +7722,6 @@ static Log_event* next_event(rpl_group_info *rgi, ulonglong *event_size) rli->relay_log.wait_for_update_relay_log(rli->sql_driver_thd); // re-acquire data lock since we released it earlier mysql_mutex_lock(&rli->data_lock); - rli->sql_thread_caught_up= false; continue; } /* @@ -7902,12 +7912,19 @@ event(errno: %d cur_log->error: %d)", { sql_print_information("Error reading relay log event: %s", "slave SQL thread was killed"); - DBUG_RETURN(0); + goto end; } err: if (errmsg) sql_print_error("Error reading relay log event: %s", errmsg); + +end: + /* + Set that we are not caught up so if there is a hang/problem on restart, + Seconds_Behind_Master will still grow. + */ + rli->sql_thread_caught_up= false; DBUG_RETURN(0); } #ifdef WITH_WSREP diff --git a/sql/slave.h b/sql/slave.h index 97fd4823a07..8d3b4f6d2aa 100644 --- a/sql/slave.h +++ b/sql/slave.h @@ -49,6 +49,7 @@ #include "rpl_filter.h" #include "rpl_tblmap.h" #include "rpl_gtid.h" +#include "log_event.h" #define SLAVE_NET_TIMEOUT 60 @@ -293,6 +294,17 @@ extern char *report_host, *report_password; extern I_List<THD> threads; +/* + Check that a binlog event (read from the relay log) is valid to update + last_master_timestamp. That is, a valid event is one with a consistent + timestamp which originated from a primary server. +*/ +static inline bool event_can_update_last_master_timestamp(Log_event *ev) +{ + return ev && !(ev->is_artificial_event() || ev->is_relay_log_event() || + (ev->when == 0)); +} + #else #define close_active_mi() /* no-op */ #endif /* HAVE_REPLICATION */ |