diff options
Diffstat (limited to 'mysql-test/suite/rpl/include/rpl_parallel_incorrect_relay_pos.inc')
-rw-r--r-- | mysql-test/suite/rpl/include/rpl_parallel_incorrect_relay_pos.inc | 128 |
1 files changed, 128 insertions, 0 deletions
diff --git a/mysql-test/suite/rpl/include/rpl_parallel_incorrect_relay_pos.inc b/mysql-test/suite/rpl/include/rpl_parallel_incorrect_relay_pos.inc new file mode 100644 index 00000000000..3cf0afd63ca --- /dev/null +++ b/mysql-test/suite/rpl/include/rpl_parallel_incorrect_relay_pos.inc @@ -0,0 +1,128 @@ +--echo *** MDEV-7237: Parallel replication: incorrect relaylog position after stop/start the slave *** + +--source include/have_innodb.inc +--source include/have_debug.inc +--source include/have_debug_sync.inc +--source include/master-slave.inc + +--connection server_2 +--source include/stop_slave.inc +CALL mtr.add_suppression("Commit failed due to failure of an earlier commit on which this one depends"); +SET @old_parallel_threads=@@GLOBAL.slave_parallel_threads; +SET @old_parallel_mode=@@GLOBAL.slave_parallel_mode; +SET GLOBAL slave_parallel_threads=10; +# Test assumes that 'conservative' mode is in effect. i.e +# Do not start parallel execution of this event group until all prior groups +# have reached the commit phase. Upon execution of STOP SLAVE there can be one +# group which is executing and the rest are doing group commit order wait. +SET GLOBAL slave_parallel_mode='conservative'; +CHANGE MASTER TO master_use_gtid=slave_pos; +--source include/start_slave.inc + +--connection server_1 +ALTER TABLE mysql.gtid_slave_pos ENGINE=InnoDB; +CREATE TABLE t2 (a int PRIMARY KEY) ENGINE=InnoDB; +INSERT INTO t2 VALUES (40); +--save_master_pos + +--connection server_2 +--sync_with_master +--connect (con_temp2,127.0.0.1,root,,test,$SERVER_MYPORT_2,) +--source include/stop_slave.inc +CHANGE MASTER TO master_use_gtid=no; +SET @old_dbug= @@GLOBAL.debug_dbug; +# This DBUG injection causes a DEBUG_SYNC signal "scheduled_gtid_0_x_100" when +# GTID 0-1-100 has been scheduled for and fetched by a worker thread. +SET GLOBAL debug_dbug="+d,rpl_parallel_scheduled_gtid_0_x_100"; +# This DBUG injection causes a DEBUG_SYNC signal "wait_for_done_waiting" when +# STOP SLAVE has signalled all worker threads to stop. +SET GLOBAL debug_dbug="+d,rpl_parallel_wait_for_done_trigger"; +# Reset worker threads to make DBUG setting catch on. +SET GLOBAL slave_parallel_threads=0; +SET GLOBAL slave_parallel_threads=10; + + +--connection server_1 +# Setup some transaction for the slave to replicate. +INSERT INTO t2 VALUES (41); +INSERT INTO t2 VALUES (42); +# Need to log the DELETE in statement format, so we can see it in processlist. +SET @old_format= @@binlog_format; +SET binlog_format= statement; +DELETE FROM t2 WHERE a=40; +SET binlog_format= @old_format; +INSERT INTO t2 VALUES (43); +INSERT INTO t2 VALUES (44); +# Force the slave to switch to a new relay log file. +FLUSH LOGS; +INSERT INTO t2 VALUES (45); +# Inject a GTID 0-1-100, which will trigger a DEBUG_SYNC signal when this +# transaction has been fetched by a worker thread. +SET gtid_seq_no=100; +INSERT INTO t2 VALUES (46); +--save_master_pos + +--connection con_temp2 +# Temporarily block the DELETE on a=40 from completing. +BEGIN; +SELECT * FROM t2 WHERE a=40 FOR UPDATE; + +--connection server_2 +--source include/start_slave.inc + +# Wait for a worker thread to start on the DELETE that will be blocked +# temporarily by the SELECT FOR UPDATE. +--let $wait_condition= SELECT count(*) > 0 FROM information_schema.processlist WHERE state='updating' and info LIKE '%DELETE FROM t2 WHERE a=40%' +--source include/wait_condition.inc + +# The DBUG injection set above will make the worker thread signal the following +# debug_sync when the GTID 0-1-100 has been reached by a worker thread. +# Thus, at this point, the SQL driver thread has reached the next +# relay log file name, while a worker thread is still processing a +# transaction in the previous relay log file, blocked on the SELECT FOR +# UPDATE. +SET debug_sync= 'now WAIT_FOR scheduled_gtid_0_x_100'; +# At this point, the SQL driver thread is in the new relay log file, while +# the DELETE from the old relay log file is not yet complete. We will stop +# the slave at this point. The bug was that the DELETE statement would +# update the slave position to the _new_ relay log file name instead of +# its own old file name. Thus, by stoping and restarting the slave at this +# point, we would get an error at restart due to incorrect position. (If +# we would let the slave catch up before stopping, the incorrect position +# would be corrected by a later transaction). + +send STOP SLAVE; + +--connection con_temp2 +# Wait for STOP SLAVE to have proceeded sufficiently that it has signalled +# all worker threads to stop; this ensures that we will stop after the DELETE +# transaction (and not after a later transaction that might have been able +# to set a fixed position). +SET debug_sync= 'now WAIT_FOR wait_for_done_waiting'; +# Now release the row lock that was blocking the replication of DELETE. +ROLLBACK; + +--connection server_2 +reap; +--source include/wait_for_slave_sql_to_stop.inc +SELECT * FROM t2 WHERE a >= 40 ORDER BY a; +# Now restart the slave. With the bug present, this would start at an +# incorrect relay log position, causing relay log read error (or if unlucky, +# silently skip a number of events). +--source include/start_slave.inc +--sync_with_master +SELECT * FROM t2 WHERE a >= 40 ORDER BY a; + +# Clean up. +--source include/stop_slave.inc +SET GLOBAL debug_dbug=@old_dbug; +SET DEBUG_SYNC= 'RESET'; +SET GLOBAL slave_parallel_threads=@old_parallel_threads; +SET GLOBAL slave_parallel_mode=@old_parallel_mode; +CHANGE MASTER TO master_use_gtid=slave_pos; +--source include/start_slave.inc + +--connection server_1 +DROP TABLE t2; + +--source include/rpl_end.inc |