summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBrandon Nesterenko <brandon.nesterenko@mariadb.com>2022-11-03 14:56:50 -0600
committerBrandon Nesterenko <brandon.nesterenko@mariadb.com>2023-01-24 08:11:35 -0700
commitd69e835787f9ce9848cb6c2a5343887dd0eec2ce (patch)
treef24fc11faf81358e76cf85f9611829a41a459309
parent2ed598eae8ebe737cdfa2aa2e7e6e0f3ad8d6ef7 (diff)
downloadmariadb-git-d69e835787f9ce9848cb6c2a5343887dd0eec2ce.tar.gz
MDEV-29639: Seconds_Behind_Master is incorrect for Delayed, Parallel Replicas
Problem ======== On a parallel, delayed replica, Seconds_Behind_Master will not be calculated until after MASTER_DELAY seconds have passed and the event has finished executing, resulting in potentially very large values of Seconds_Behind_Master (which could be much larger than the MASTER_DELAY parameter) for the entire duration the event is delayed. This contradicts the documented MASTER_DELAY behavior, which specifies how many seconds to withhold replicated events from execution. Solution ======== After a parallel replica idles, the first event after idling should immediately update last_master_timestamp with the time that it began execution on the primary. Reviewed By =========== Andrei Elkin <andrei.elkin@mariadb.com>
-rw-r--r--mysql-test/suite/rpl/r/rpl_delayed_parallel_slave_sbm.result60
-rw-r--r--mysql-test/suite/rpl/t/rpl_delayed_parallel_slave_sbm-slave.opt1
-rw-r--r--mysql-test/suite/rpl/t/rpl_delayed_parallel_slave_sbm.test133
-rw-r--r--sql/rpl_parallel.cc3
-rw-r--r--sql/slave.cc31
-rw-r--r--sql/slave.h12
6 files changed, 231 insertions, 9 deletions
diff --git a/mysql-test/suite/rpl/r/rpl_delayed_parallel_slave_sbm.result b/mysql-test/suite/rpl/r/rpl_delayed_parallel_slave_sbm.result
new file mode 100644
index 00000000000..f783b1e0783
--- /dev/null
+++ b/mysql-test/suite/rpl/r/rpl_delayed_parallel_slave_sbm.result
@@ -0,0 +1,60 @@
+include/master-slave.inc
+[connection master]
+connection slave;
+include/stop_slave.inc
+change master to master_delay=3, master_use_gtid=Slave_Pos;
+set @@GLOBAL.slave_parallel_threads=2;
+include/start_slave.inc
+connection master;
+create table t1 (a int);
+include/sync_slave_sql_with_master.inc
+#
+# Pt 1) Ensure SBM is updated immediately upon arrival of the next event
+# Lock t1 on slave so the first received transaction does not complete/commit
+connection slave;
+LOCK TABLES t1 WRITE;
+connection master;
+# Sleep 2 to allow a buffer between events for SBM check
+insert into t1 values (0);
+include/save_master_gtid.inc
+connection slave;
+# Waiting for transaction to arrive on slave and begin SQL Delay..
+# Validating SBM is updated on event arrival..
+# ..done
+connection slave;
+UNLOCK TABLES;
+include/sync_with_master_gtid.inc
+#
+# Pt 2) If the SQL thread has not entered an idle state, ensure
+# following events do not update SBM
+# Stop slave IO thread so it receives both events together on restart
+connection slave;
+include/stop_slave_io.inc
+connection master;
+# Sleep 2 to allow a buffer between events for SBM check
+insert into t1 values (1);
+# Sleep 3 to create gap between events
+insert into t1 values (2);
+connection slave;
+LOCK TABLES t1 WRITE;
+START SLAVE IO_THREAD;
+# Wait for first transaction to complete SQL delay and begin execution..
+# Validate SBM calculation doesn't use the second transaction because SQL thread shouldn't have gone idle..
+# ..and that SBM wasn't calculated using prior committed transactions
+# ..done
+connection slave;
+UNLOCK TABLES;
+#
+# Cleanup
+# Reset master_delay
+include/stop_slave.inc
+CHANGE MASTER TO master_delay=0;
+set @@GLOBAL.slave_parallel_threads=4;
+include/start_slave.inc
+connection master;
+DROP TABLE t1;
+include/save_master_gtid.inc
+connection slave;
+include/sync_with_master_gtid.inc
+include/rpl_end.inc
+# End of rpl_delayed_parallel_slave_sbm.test
diff --git a/mysql-test/suite/rpl/t/rpl_delayed_parallel_slave_sbm-slave.opt b/mysql-test/suite/rpl/t/rpl_delayed_parallel_slave_sbm-slave.opt
new file mode 100644
index 00000000000..9eea6a54b68
--- /dev/null
+++ b/mysql-test/suite/rpl/t/rpl_delayed_parallel_slave_sbm-slave.opt
@@ -0,0 +1 @@
+--slave-parallel-threads=4
diff --git a/mysql-test/suite/rpl/t/rpl_delayed_parallel_slave_sbm.test b/mysql-test/suite/rpl/t/rpl_delayed_parallel_slave_sbm.test
new file mode 100644
index 00000000000..4bcb7ad9e3f
--- /dev/null
+++ b/mysql-test/suite/rpl/t/rpl_delayed_parallel_slave_sbm.test
@@ -0,0 +1,133 @@
+#
+# This test ensures that after a delayed parallel slave has idled, i.e.
+# executed everything in its relay log, the next event group that the SQL
+# thread reads from the relay log will immediately be used in the
+# Seconds_Behind_Master. In particular, it ensures that the calculation for
+# Seconds_Behind_Master is based on the timestamp of the new transaction,
+# rather than the last committed transaction.
+#
+# References:
+# MDEV-29639: Seconds_Behind_Master is incorrect for Delayed, Parallel
+# Replicas
+#
+
+--source include/master-slave.inc
+
+--connection slave
+--source include/stop_slave.inc
+--let $master_delay= 3
+--eval change master to master_delay=$master_delay, master_use_gtid=Slave_Pos
+--let $old_slave_threads= `SELECT @@GLOBAL.slave_parallel_threads`
+set @@GLOBAL.slave_parallel_threads=2;
+--source include/start_slave.inc
+
+--connection master
+create table t1 (a int);
+--source include/sync_slave_sql_with_master.inc
+
+--echo #
+--echo # Pt 1) Ensure SBM is updated immediately upon arrival of the next event
+
+--echo # Lock t1 on slave so the first received transaction does not complete/commit
+--connection slave
+LOCK TABLES t1 WRITE;
+
+--connection master
+--echo # Sleep 2 to allow a buffer between events for SBM check
+sleep 2;
+
+--let $ts_trx_before_ins= `SELECT UNIX_TIMESTAMP()`
+--let insert_ctr= 0
+--eval insert into t1 values ($insert_ctr)
+--inc $insert_ctr
+--source include/save_master_gtid.inc
+
+--connection slave
+
+--echo # Waiting for transaction to arrive on slave and begin SQL Delay..
+--let $wait_condition= SELECT count(*) FROM information_schema.processlist WHERE state LIKE 'Waiting until MASTER_DELAY seconds after master executed event';
+--source include/wait_condition.inc
+
+--echo # Validating SBM is updated on event arrival..
+--let $sbm_trx1_arrive= query_get_value(SHOW SLAVE STATUS, Seconds_Behind_Master, 1)
+--let $seconds_since_idling= `SELECT UNIX_TIMESTAMP() - $ts_trx_before_ins`
+if (`SELECT $sbm_trx1_arrive > ($seconds_since_idling + 1)`)
+{
+ --echo # SBM was $sbm_trx1_arrive yet shouldn't have been larger than $seconds_since_idling + 1 (for possible negative clock_diff_with_master)
+ --die Seconds_Behind_Master should reset after idling
+}
+--echo # ..done
+
+--connection slave
+UNLOCK TABLES;
+--source include/sync_with_master_gtid.inc
+
+--echo #
+--echo # Pt 2) If the SQL thread has not entered an idle state, ensure
+--echo # following events do not update SBM
+
+--echo # Stop slave IO thread so it receives both events together on restart
+--connection slave
+--source include/stop_slave_io.inc
+
+--connection master
+
+--echo # Sleep 2 to allow a buffer between events for SBM check
+sleep 2;
+--let $ts_trxpt2_before_ins= `SELECT UNIX_TIMESTAMP()`
+--eval insert into t1 values ($insert_ctr)
+--inc $insert_ctr
+--echo # Sleep 3 to create gap between events
+sleep 3;
+--eval insert into t1 values ($insert_ctr)
+--inc $insert_ctr
+--let $ts_trx_after_ins= `SELECT UNIX_TIMESTAMP()`
+
+--connection slave
+LOCK TABLES t1 WRITE;
+
+START SLAVE IO_THREAD;
+
+--echo # Wait for first transaction to complete SQL delay and begin execution..
+--let $wait_condition= SELECT count(*) FROM information_schema.processlist WHERE state LIKE 'Waiting for table metadata lock%' AND command LIKE 'Slave_Worker';
+--source include/wait_condition.inc
+
+--echo # Validate SBM calculation doesn't use the second transaction because SQL thread shouldn't have gone idle..
+--let $sbm_after_trx_no_idle= query_get_value(SHOW SLAVE STATUS, Seconds_Behind_Master, 1)
+--let $timestamp_trxpt2_arrive= `SELECT UNIX_TIMESTAMP()`
+if (`SELECT $sbm_after_trx_no_idle < $timestamp_trxpt2_arrive - $ts_trx_after_ins`)
+{
+ --let $cmpv= `SELECT $timestamp_trxpt2_arrive - $ts_trx_after_ins`
+ --echo # SBM $sbm_after_trx_no_idle was more recent than time since last transaction ($cmpv seconds)
+ --die Seconds_Behind_Master should not have used second transaction timestamp
+}
+--let $seconds_since_idling= `SELECT ($timestamp_trxpt2_arrive - $ts_trxpt2_before_ins)`
+--echo # ..and that SBM wasn't calculated using prior committed transactions
+if (`SELECT $sbm_after_trx_no_idle > ($seconds_since_idling + 1)`)
+{
+ --echo # SBM was $sbm_after_trx_no_idle yet shouldn't have been larger than $seconds_since_idling + 1 (for possible negative clock_diff_with_master)
+ --die Seconds_Behind_Master calculation should not have used prior committed transaction
+}
+--echo # ..done
+
+--connection slave
+UNLOCK TABLES;
+
+--echo #
+--echo # Cleanup
+
+--echo # Reset master_delay
+--source include/stop_slave.inc
+--eval CHANGE MASTER TO master_delay=0
+--eval set @@GLOBAL.slave_parallel_threads=$old_slave_threads
+--source include/start_slave.inc
+
+--connection master
+DROP TABLE t1;
+--source include/save_master_gtid.inc
+
+--connection slave
+--source include/sync_with_master_gtid.inc
+
+--source include/rpl_end.inc
+--echo # End of rpl_delayed_parallel_slave_sbm.test
diff --git a/sql/rpl_parallel.cc b/sql/rpl_parallel.cc
index 3eb25219ed3..dbe77c54230 100644
--- a/sql/rpl_parallel.cc
+++ b/sql/rpl_parallel.cc
@@ -45,8 +45,7 @@ rpt_handle_event(rpl_parallel_thread::queued_event *qev,
rgi->event_relay_log_pos= qev->event_relay_log_pos;
rgi->future_event_relay_log_pos= qev->future_event_relay_log_pos;
strcpy(rgi->future_event_master_log_name, qev->future_event_master_log_name);
- if (!(ev->is_artificial_event() || ev->is_relay_log_event() ||
- (ev->when == 0)))
+ if (event_can_update_last_master_timestamp(ev))
rgi->last_master_timestamp= ev->when + (time_t)ev->exec_time;
err= apply_event_and_update_pos_for_parallel(ev, thd, rgi);
diff --git a/sql/slave.cc b/sql/slave.cc
index 41e0e3c86eb..b64b9a64979 100644
--- a/sql/slave.cc
+++ b/sql/slave.cc
@@ -4125,10 +4125,10 @@ static int exec_relay_log_event(THD* thd, Relay_log_info* rli,
the user might be surprised to see a claim that the slave is up to date
long before those queued events are actually executed.
*/
- if (!rli->mi->using_parallel() &&
- !(ev->is_artificial_event() || ev->is_relay_log_event() || (ev->when == 0)))
+ if ((!rli->mi->using_parallel()) && event_can_update_last_master_timestamp(ev))
{
rli->last_master_timestamp= ev->when + (time_t) ev->exec_time;
+ rli->sql_thread_caught_up= false;
DBUG_ASSERT(rli->last_master_timestamp >= 0);
}
@@ -4177,6 +4177,17 @@ static int exec_relay_log_event(THD* thd, Relay_log_info* rli,
if (rli->mi->using_parallel())
{
+ if (unlikely((rli->last_master_timestamp == 0 ||
+ rli->sql_thread_caught_up) &&
+ event_can_update_last_master_timestamp(ev)))
+ {
+ if (rli->last_master_timestamp < ev->when)
+ {
+ rli->last_master_timestamp= ev->when;
+ rli->sql_thread_caught_up= false;
+ }
+ }
+
int res= rli->parallel.do_event(serial_rgi, ev, event_size);
/*
In parallel replication, we need to update the relay log position
@@ -4192,7 +4203,7 @@ static int exec_relay_log_event(THD* thd, Relay_log_info* rli,
This is the case for pre-10.0 events without GTID, and for handling
slave_skip_counter.
*/
- if (!(ev->is_artificial_event() || ev->is_relay_log_event() || (ev->when == 0)))
+ if (event_can_update_last_master_timestamp(ev))
{
/*
Ignore FD's timestamp as it does not reflect the slave execution
@@ -4200,7 +4211,8 @@ static int exec_relay_log_event(THD* thd, Relay_log_info* rli,
data modification event execution last long all this time
Seconds_Behind_Master is zero.
*/
- if (ev->get_type_code() != FORMAT_DESCRIPTION_EVENT)
+ if (ev->get_type_code() != FORMAT_DESCRIPTION_EVENT &&
+ rli->last_master_timestamp < ev->when)
rli->last_master_timestamp= ev->when + (time_t) ev->exec_time;
DBUG_ASSERT(rli->last_master_timestamp >= 0);
@@ -7544,7 +7556,6 @@ static Log_event* next_event(rpl_group_info *rgi, ulonglong *event_size)
if (hot_log)
mysql_mutex_unlock(log_lock);
- rli->sql_thread_caught_up= false;
DBUG_RETURN(ev);
}
if (opt_reckless_slave) // For mysql-test
@@ -7711,7 +7722,6 @@ static Log_event* next_event(rpl_group_info *rgi, ulonglong *event_size)
rli->relay_log.wait_for_update_relay_log(rli->sql_driver_thd);
// re-acquire data lock since we released it earlier
mysql_mutex_lock(&rli->data_lock);
- rli->sql_thread_caught_up= false;
continue;
}
/*
@@ -7902,12 +7912,19 @@ event(errno: %d cur_log->error: %d)",
{
sql_print_information("Error reading relay log event: %s",
"slave SQL thread was killed");
- DBUG_RETURN(0);
+ goto end;
}
err:
if (errmsg)
sql_print_error("Error reading relay log event: %s", errmsg);
+
+end:
+ /*
+ Set that we are not caught up so if there is a hang/problem on restart,
+ Seconds_Behind_Master will still grow.
+ */
+ rli->sql_thread_caught_up= false;
DBUG_RETURN(0);
}
#ifdef WITH_WSREP
diff --git a/sql/slave.h b/sql/slave.h
index 97fd4823a07..8d3b4f6d2aa 100644
--- a/sql/slave.h
+++ b/sql/slave.h
@@ -49,6 +49,7 @@
#include "rpl_filter.h"
#include "rpl_tblmap.h"
#include "rpl_gtid.h"
+#include "log_event.h"
#define SLAVE_NET_TIMEOUT 60
@@ -293,6 +294,17 @@ extern char *report_host, *report_password;
extern I_List<THD> threads;
+/*
+ Check that a binlog event (read from the relay log) is valid to update
+ last_master_timestamp. That is, a valid event is one with a consistent
+ timestamp which originated from a primary server.
+*/
+static inline bool event_can_update_last_master_timestamp(Log_event *ev)
+{
+ return ev && !(ev->is_artificial_event() || ev->is_relay_log_event() ||
+ (ev->when == 0));
+}
+
#else
#define close_active_mi() /* no-op */
#endif /* HAVE_REPLICATION */