summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndrei Elkin <andrei.elkin@mariadb.com>2019-11-06 17:05:58 +0200
committerAndrei Elkin <andrei.elkin@mariadb.com>2019-11-10 16:16:37 +0200
commit13db50fc03e7312e6c01b06c7e4af69f69ba5382 (patch)
tree3a02af099e4e4d32f6d4c007895cb16b0447b316
parentdfd2d3d861fa7fad6747489dbbd173b7f680f44d (diff)
downloadmariadb-git-13db50fc03e7312e6c01b06c7e4af69f69ba5382.tar.gz
MDEV-19376 Repl_semi_sync_master::commit_trx assertion failure: ... || !m_active_tranxs->is_tranx_end_pos(trx_wait_binlog_name, trx_wait_binlog_pos)
The assert indicates that the current transaction got caught uncleaned from the semisync master's cache when it is signaled to proceed upon its ack receive. The reason of missed cleanup turns out to be a flaw in the gtid connect mode. A submitted by connecting slave value of its last received event's binlog file *name* was adopted into {{Repl_semi_sync_master::m_reply_file_name}} as a part of semisync initialization. Notice that the initialization still refines the position part of the submitted last received event's binlog coordinates. The master side binlog filename:pos refinement is specific to the gtid connect mode for purpose of computing the latest binlog file to resume slave feeding from. Effectively in the gtid connect mode the computed resumption filename:pos may appear smaller in which case a new post-connect time committing transaction may be logged with its filename:pos also less than the submitted coordinates and that triggers the assert. Fixed with making the semisync initialization to use the refined filename:pos. It is guaranteed to be less than any new generated transaction's binlog:pos.
-rw-r--r--mysql-test/suite/rpl/r/rpl_semi_sync_gtid_reconnect.result28
-rw-r--r--mysql-test/suite/rpl/t/rpl_semi_sync_gtid_reconnect.test75
-rw-r--r--sql/sql_repl.cc6
3 files changed, 108 insertions, 1 deletions
diff --git a/mysql-test/suite/rpl/r/rpl_semi_sync_gtid_reconnect.result b/mysql-test/suite/rpl/r/rpl_semi_sync_gtid_reconnect.result
new file mode 100644
index 00000000000..3c720b94fde
--- /dev/null
+++ b/mysql-test/suite/rpl/r/rpl_semi_sync_gtid_reconnect.result
@@ -0,0 +1,28 @@
+include/master-slave.inc
+[connection master]
+RESET MASTER;
+SET @@GLOBAL.rpl_semi_sync_master_enabled = 1;
+include/stop_slave.inc
+SET @@GLOBAL. rpl_semi_sync_slave_enabled = 1;
+include/start_slave.inc
+CREATE TABLE t1 (a INT);
+INSERT INTO t1 SET a = 1;
+include/save_master_gtid.inc
+FLUSH LOGS;
+INSERT INTO t1 SET a = 2;
+include/stop_slave_sql.inc
+INSERT INTO t1 SET a = 3;
+include/sync_slave_io_with_master.inc
+include/stop_slave_io.inc
+RESET MASTER;
+SET @@global.gtid_binlog_state = '0-1-2';
+CHANGE MASTER TO MASTER_USE_GTID = slave_pos;
+SET @@global.gtid_slave_pos = '0-1-2';
+include/start_slave.inc
+INSERT INTO t1 SET a = 4;
+DROP TABLE t1;
+SET @@GLOBAL. rpl_semi_sync_master_enabled = 0;
+include/stop_slave.inc
+SET @@GLOBAL. rpl_semi_sync_slave_enabled = 0;
+include/start_slave.inc
+include/rpl_end.inc
diff --git a/mysql-test/suite/rpl/t/rpl_semi_sync_gtid_reconnect.test b/mysql-test/suite/rpl/t/rpl_semi_sync_gtid_reconnect.test
new file mode 100644
index 00000000000..c524fcff3a4
--- /dev/null
+++ b/mysql-test/suite/rpl/t/rpl_semi_sync_gtid_reconnect.test
@@ -0,0 +1,75 @@
+source include/not_embedded.inc;
+source include/have_binlog_format_mixed.inc;
+source include/have_semisync.inc;
+source include/master-slave.inc;
+
+#
+# Semisync initialization
+#
+--connection master
+RESET MASTER;
+--let $sav_enabled_master=`SELECT @@GLOBAL.rpl_semi_sync_master_enabled`
+SET @@GLOBAL.rpl_semi_sync_master_enabled = 1;
+
+--connection slave
+source include/stop_slave.inc;
+--let $sav_enabled_slave=`SELECT @@GLOBAL.rpl_semi_sync_slave_enabled`
+SET @@GLOBAL. rpl_semi_sync_slave_enabled = 1;
+source include/start_slave.inc;
+
+# Prove fixes to
+# MDEV-19376 Assert (!m_active_tranxs->is_tranx_end_pos(trx_wait_binlog_name...)
+#
+#
+# Run few queries to replicate/execute on slave.
+# Stop the slave applier.
+# Replicate/not-executed few more.
+# Restart the slave.
+#
+--connection master
+CREATE TABLE t1 (a INT);
+INSERT INTO t1 SET a = 1;
+--source include/save_master_gtid.inc
+--let $resume_gtid = $master_pos
+FLUSH LOGS;
+INSERT INTO t1 SET a = 2;
+
+--sync_slave_with_master
+--connection slave
+--source include/stop_slave_sql.inc
+
+--connection master
+INSERT INTO t1 SET a = 3;
+
+# the sync connection is 'slave' by default
+--source include/sync_slave_io_with_master.inc
+--connection slave
+--source include/stop_slave_io.inc
+
+--connection master
+RESET MASTER;
+--eval SET @@global.gtid_binlog_state = '$resume_gtid'
+
+# The resume gtid is set up to point to the very first binlog file
+--connection slave
+CHANGE MASTER TO MASTER_USE_GTID = slave_pos;
+--eval SET @@global.gtid_slave_pos = '$resume_gtid'
+# Yet the slave io first submits the last received binlog file name:pos.
+--source include/start_slave.inc
+
+# Here goes the cracker.
+--connection master
+INSERT INTO t1 SET a = 4;
+
+#
+# Clean up
+#
+--connection master
+DROP TABLE t1;
+--eval SET @@GLOBAL. rpl_semi_sync_master_enabled = $sav_enabled_master
+
+--sync_slave_with_master
+source include/stop_slave.inc;
+--eval SET @@GLOBAL. rpl_semi_sync_slave_enabled = $sav_enabled_slave
+source include/start_slave.inc;
+--source include/rpl_end.inc
diff --git a/sql/sql_repl.cc b/sql/sql_repl.cc
index ef6d0706da0..48d2781cd46 100644
--- a/sql/sql_repl.cc
+++ b/sql/sql_repl.cc
@@ -2725,7 +2725,11 @@ void mysql_binlog_send(THD* thd, char* log_ident, my_off_t pos,
run hook first when all check has been made that slave seems to
be requesting a reasonable position. i.e when transmit actually starts
*/
- if (RUN_HOOK(binlog_transmit, transmit_start, (thd, flags, log_ident, pos)))
+
+ DBUG_ASSERT(pos == linfo.pos);
+
+ if (RUN_HOOK(binlog_transmit, transmit_start,
+ (thd, flags, linfo.log_file_name, linfo.pos)))
{
info->errmsg= "Failed to run hook 'transmit_start'";
info->error= ER_UNKNOWN_ERROR;