diff options
author | Sujatha <sujatha.sivakumar@mariadb.com> | 2020-04-09 20:45:45 +0530 |
---|---|---|
committer | Andrei Elkin <andrei.elkin@mariadb.com> | 2021-02-08 17:58:03 +0200 |
commit | ee53893834536e6dac77c2ec359fb62329153222 (patch) | |
tree | 25025c9b239e1c6ea903e829b0be93c9729a3138 | |
parent | 7d04ce6a2d42770fcc765d257c0058a859180b80 (diff) | |
download | mariadb-git-ee53893834536e6dac77c2ec359fb62329153222.tar.gz |
MDEV-21117: refine the server binlog-based recovery for semisync
Problem:
=======
When the semisync master is crashed and restarted as slave it could
recover transactions that former slaves may never have seen.
A known method existed to clear out all prepared transactions
with --tc-heuristic-recover=rollback does not care to adjust
binlog accordingly.
Fix:
===
The binlog-based recovery is made to concern of the slave semisync role of
post-crash restarted server.
No changes in behaviour is done to the "normal" binloggging server
and the semisync master.
When the restarted server is configured with
--rpl-semi-sync-slave-enabled=1
the refined recovery attempts to roll back prepared transactions
and truncate binlog accordingly.
In case of a partically committed (that is committed at least
in one of the engine participants) such transaction gets committed.
It's guaranteed no (partially as well) committed transactions
exist beyond the truncate position.
In case there exists a non-transactional replication event
(being in a way a committed transaction) past the
computed truncate position the recovery ends with an error.
To facilite the failover on the slave side
conditions to accept own events (having been discarded by the above recovery)
are relaxed to let so for the semisync slave that connects to master
in gtid mode. gtid_strict_mode is further recommended to secure
from inadvertent re-applying out of order gtids in general.
Non-gtid mode connected semisync slave would require
--replicate-same-server-id (mind --log-slave-updates must be OFF then).
26 files changed, 2415 insertions, 84 deletions
diff --git a/mysql-test/suite/mariabackup/include/have_rocksdb.inc b/mysql-test/include/have_rocksdb.inc index d59f76f6cf3..d59f76f6cf3 100644 --- a/mysql-test/suite/mariabackup/include/have_rocksdb.inc +++ b/mysql-test/include/have_rocksdb.inc diff --git a/mysql-test/suite/binlog/r/binlog_truncate_active_log.result b/mysql-test/suite/binlog/r/binlog_truncate_active_log.result new file mode 100644 index 00000000000..b7cf6bc3d20 --- /dev/null +++ b/mysql-test/suite/binlog/r/binlog_truncate_active_log.result @@ -0,0 +1,118 @@ +call mtr.add_suppression("Can.t init tc log"); +call mtr.add_suppression("Aborting"); +connection default; +RESET MASTER; +CREATE TABLE t ( f INT ) ENGINE=INNODB; +CREATE TABLE t2 ( f INT ) ENGINE=INNODB; +CREATE TABLE tm ( f INT ) ENGINE=Aria; +# Case A. +connect master1,localhost,root,,; +connect master2,localhost,root,,; +connect master3,localhost,root,,; +connection default; +INSERT INTO t VALUES (10); +INSERT INTO tm VALUES (10); +connection master1; +SET DEBUG_SYNC= "commit_before_get_LOCK_commit_ordered SIGNAL master1_ready WAIT_FOR signal_never_arrives"; +INSERT INTO t VALUES (20); +connection master2; +SET DEBUG_SYNC= "now WAIT_FOR master1_ready"; +SET DEBUG_SYNC= "commit_before_get_LOCK_after_binlog_sync SIGNAL master2_ready"; +DELETE FROM t2 WHERE f = 666 /* no such record */; +connection master3; +SET DEBUG_SYNC= "now WAIT_FOR master2_ready"; +SELECT @@global.gtid_binlog_pos as 'Before the crash'; +Before the crash +0-1-7 +# Kill the server +connection default; +FOUND # /Successfully truncated.*to remove transactions starting from GTID 0-1-6/ in mysqld.1.err +SELECT @@global.gtid_binlog_pos as 'After the crash'; +After the crash +0-1-5 +"One row should be present in table 't'" +SELECT COUNT(*) as 'One' FROM t; +One +1 +DELETE FROM t; +disconnect master1; +disconnect master2; +disconnect master3; +# Case B. +connect master1,localhost,root,,; +connect master2,localhost,root,,; +connect master3,localhost,root,,; +connection default; +INSERT INTO t VALUES (10); +INSERT INTO tm VALUES (10); +connection master1; +SET DEBUG_SYNC= "commit_before_get_LOCK_commit_ordered SIGNAL master1_ready WAIT_FOR signal_never_arrives"; +DELETE FROM t2 WHERE f = 0; +connection master2; +SET DEBUG_SYNC= "now WAIT_FOR master1_ready"; +SET DEBUG_SYNC= "commit_before_get_LOCK_after_binlog_sync SIGNAL master2_ready"; +INSERT INTO t VALUES (20); +connection master3; +SET DEBUG_SYNC= "now WAIT_FOR master2_ready"; +SELECT @@global.gtid_binlog_pos as 'Before the crash'; +Before the crash +0-1-10 +# Kill the server +connection default; +FOUND # /Successfully truncated.*to remove transactions starting from GTID 0-1-10/ in mysqld.1.err +SELECT @@global.gtid_binlog_pos as 'After the crash'; +After the crash +0-1-9 +"One row should be present in table 't'" +SELECT COUNT(*) as 'One' FROM t; +One +1 +DELETE FROM t; +disconnect master1; +disconnect master2; +disconnect master3; +CREATE PROCEDURE sp_blank_xa() +BEGIN +XA START 'blank'; +DELETE FROM t2 WHERE f = 666 /* no such record */; +XA END 'blank'; +XA PREPARE 'blank'; +END| +# Case C. +connect master1,localhost,root,,; +connect master2,localhost,root,,; +connect master3,localhost,root,,; +connection default; +INSERT INTO t VALUES (10); +INSERT INTO tm VALUES (10); +connection master1; +SET DEBUG_SYNC= "commit_before_get_LOCK_commit_ordered SIGNAL master1_ready WAIT_FOR signal_never_arrives"; +INSERT INTO t VALUES (20); +connection master2; +SET DEBUG_SYNC= "now WAIT_FOR master1_ready"; +CALL sp_blank_xa; +SET DEBUG_SYNC= "commit_before_get_LOCK_after_binlog_sync SIGNAL master2_ready"; +XA COMMIT 'blank'; +connection master3; +SET DEBUG_SYNC= "now WAIT_FOR master2_ready"; +SELECT @@global.gtid_binlog_pos as 'Before the crash'; +Before the crash +0-1-15 +# Kill the server +connection default; +NOT FOUND /Successfully truncated.*to remove transactions starting from GTID 0-1-13/ in mysqld.1.err +SELECT @@global.gtid_binlog_pos as 'After the crash'; +After the crash +0-1-13 +"One row should be present in table 't'" +SELECT COUNT(*) as 'One' FROM t; +One +1 +DELETE FROM t; +disconnect master1; +disconnect master2; +disconnect master3; +DROP PROCEDURE sp_blank_xa; +# Cleanup +DROP TABLE t,t2,tm; +# End of the tests diff --git a/mysql-test/suite/binlog/r/binlog_truncate_multi_engine.result b/mysql-test/suite/binlog/r/binlog_truncate_multi_engine.result new file mode 100644 index 00000000000..08cbd078664 --- /dev/null +++ b/mysql-test/suite/binlog/r/binlog_truncate_multi_engine.result @@ -0,0 +1,198 @@ +call mtr.add_suppression("Can.t init tc log"); +call mtr.add_suppression("Aborting"); +CREATE TABLE t1 (a INT PRIMARY KEY, b MEDIUMTEXT) ENGINE=Innodb; +CREATE TABLE t2 (a INT PRIMARY KEY, b MEDIUMTEXT) ENGINE=rocksdb; +# +# +# Case A: neither engine committed => rollback & binlog truncate +# +RESET MASTER; +FLUSH LOGS; +SET GLOBAL max_binlog_size= 4096; +connect con1,localhost,root,,; +List of binary logs before rotation +show binary logs; +Log_name File_size +master-bin.000001 # +master-bin.000002 # +INSERT INTO t1 VALUES (1, REPEAT("x", 1)); +INSERT INTO t2 VALUES (1, REPEAT("x", 1)); +BEGIN; +INSERT INTO t1 VALUES (2, REPEAT("x", 4100)); +INSERT INTO t2 VALUES (2, REPEAT("x", 4100)); +SET DEBUG_SYNC= "commit_after_release_LOCK_log SIGNAL con1_ready WAIT_FOR signal_no_signal"; +COMMIT; +connection default; +SET DEBUG_SYNC= "now WAIT_FOR con1_ready"; +List of binary logs after rotation +show binary logs; +Log_name File_size +master-bin.000001 # +master-bin.000002 # +master-bin.000003 # +# restart the server with --rpl-semi-sync-slave-enabled=1 +# the server is crashed +# Kill the server +connection default; +# +# *** Summary: 1 row should be present in both tables; binlog is truncated; number of binlogs at reconnect - 3: +# +SELECT COUNT(*) FROM t1; +COUNT(*) +1 +SELECT COUNT(*) FROM t2; +COUNT(*) +1 +SELECT @@GLOBAL.gtid_binlog_state; +@@GLOBAL.gtid_binlog_state +0-1-2 +SELECT @@GLOBAL.gtid_binlog_pos; +@@GLOBAL.gtid_binlog_pos +0-1-2 +List of binary logs at the end of the tests +show binary logs; +Log_name File_size +master-bin.000001 # +master-bin.000002 # +master-bin.000003 # +# *** +DELETE FROM t1; +DELETE FROM t2; +disconnect con1; +# +Proof of the truncated binlog file is readable (two transactions must be seen): +/*!50530 SET @@SESSION.PSEUDO_SLAVE_MODE=1*/; +/*!40019 SET @@session.max_insert_delayed_threads=0*/; +/*!50003 SET @OLD_COMPLETION_TYPE=@@COMPLETION_TYPE,COMPLETION_TYPE=0*/; +DELIMITER /*!*/; +START TRANSACTION +/*!*/; +COMMIT/*!*/; +START TRANSACTION +/*!*/; +COMMIT/*!*/; +DELIMITER ; +# End of log file +ROLLBACK /* added by mysqlbinlog */; +/*!50003 SET COMPLETION_TYPE=@OLD_COMPLETION_TYPE*/; +/*!50530 SET @@SESSION.PSEUDO_SLAVE_MODE=0*/; +# +# +# Case B: one engine has committed its transaction branch +# +RESET MASTER; +FLUSH LOGS; +SET GLOBAL max_binlog_size= 4096; +connect con1,localhost,root,,; +List of binary logs before rotation +show binary logs; +Log_name File_size +master-bin.000001 # +master-bin.000002 # +INSERT INTO t1 VALUES (1, REPEAT("x", 1)); +INSERT INTO t2 VALUES (1, REPEAT("x", 1)); +BEGIN; +INSERT INTO t1 VALUES (2, REPEAT("x", 4100)); +INSERT INTO t2 VALUES (2, REPEAT("x", 4100)); +SET DEBUG_SYNC= "commit_after_run_commit_ordered SIGNAL con1_ready WAIT_FOR signal_no_signal"; +COMMIT; +connection default; +SET DEBUG_SYNC= "now WAIT_FOR con1_ready"; +List of binary logs after rotation +show binary logs; +Log_name File_size +master-bin.000001 # +master-bin.000002 # +master-bin.000003 # +# restart the server with --rpl-semi-sync-slave-enabled=1 --debug-dbug=d,binlog_truncate_partial_commit +# the server is crashed +# Kill the server +connection default; +# +# *** Summary: 2 rows should be present in both tables; no binlog truncation; one extra binlog file compare with A; number of binlogs at reconnect - 4: +# +SELECT COUNT(*) FROM t1; +COUNT(*) +2 +SELECT COUNT(*) FROM t2; +COUNT(*) +2 +SELECT @@GLOBAL.gtid_binlog_state; +@@GLOBAL.gtid_binlog_state +0-1-3 +SELECT @@GLOBAL.gtid_binlog_pos; +@@GLOBAL.gtid_binlog_pos +0-1-3 +List of binary logs at the end of the tests +show binary logs; +Log_name File_size +master-bin.000001 # +master-bin.000002 # +master-bin.000003 # +master-bin.000004 # +# *** +DELETE FROM t1; +DELETE FROM t2; +disconnect con1; +# +# +# +# Case C: both engines have committed its transaction branch +# +RESET MASTER; +FLUSH LOGS; +SET GLOBAL max_binlog_size= 4096; +connect con1,localhost,root,,; +List of binary logs before rotation +show binary logs; +Log_name File_size +master-bin.000001 # +master-bin.000002 # +INSERT INTO t1 VALUES (1, REPEAT("x", 1)); +INSERT INTO t2 VALUES (1, REPEAT("x", 1)); +BEGIN; +INSERT INTO t1 VALUES (2, REPEAT("x", 4100)); +INSERT INTO t2 VALUES (2, REPEAT("x", 4100)); +SET DEBUG_SYNC= "commit_after_run_commit_ordered SIGNAL con1_ready WAIT_FOR signal_no_signal"; +COMMIT; +connection default; +SET DEBUG_SYNC= "now WAIT_FOR con1_ready"; +List of binary logs after rotation +show binary logs; +Log_name File_size +master-bin.000001 # +master-bin.000002 # +master-bin.000003 # +# restart the server with --rpl-semi-sync-slave-enabled=1 +# the server is restarted +connection default; +# +# *** Summary: 2 rows should be present in both tables; no binlog truncation; the same # of binlog files as in B; number of binlogs at reconnect - 4: +# +SELECT COUNT(*) FROM t1; +COUNT(*) +2 +SELECT COUNT(*) FROM t2; +COUNT(*) +2 +SELECT @@GLOBAL.gtid_binlog_state; +@@GLOBAL.gtid_binlog_state +0-1-3 +SELECT @@GLOBAL.gtid_binlog_pos; +@@GLOBAL.gtid_binlog_pos +0-1-3 +List of binary logs at the end of the tests +show binary logs; +Log_name File_size +master-bin.000001 # +master-bin.000002 # +master-bin.000003 # +master-bin.000004 # +# *** +DELETE FROM t1; +DELETE FROM t2; +disconnect con1; +# +DROP TABLE t1, t2; +SET @@global.max_binlog_size = VALUE_AT_START; +# End of the tests diff --git a/mysql-test/suite/binlog/r/binlog_truncate_multi_log.result b/mysql-test/suite/binlog/r/binlog_truncate_multi_log.result new file mode 100644 index 00000000000..ba87ee9f847 --- /dev/null +++ b/mysql-test/suite/binlog/r/binlog_truncate_multi_log.result @@ -0,0 +1,51 @@ +call mtr.add_suppression("Can.t init tc log"); +call mtr.add_suppression("Aborting"); +SET @@global.max_binlog_size= 4096; +RESET MASTER; +FLUSH LOGS; +CREATE TABLE ti (a INT PRIMARY KEY, b MEDIUMTEXT) ENGINE=Innodb; +CREATE TABLE tm (a INT PRIMARY KEY, b MEDIUMTEXT) ENGINE=MyISAM; +connect master1,localhost,root,,; +"List of binary logs before rotation" +show binary logs; +Log_name File_size +master-bin.000001 # +master-bin.000002 # +INSERT INTO ti VALUES(1,"I am gonna survive"); +INSERT INTO tm VALUES(1,"me too!"); +SET DEBUG_SYNC= "commit_after_release_LOCK_after_binlog_sync SIGNAL master1_ready WAIT_FOR con1_go"; +INSERT INTO ti VALUES (2, REPEAT("x", 4100)); +connect master2,localhost,root,,; +SET DEBUG_SYNC= "now WAIT_FOR master1_ready"; +SET DEBUG_SYNC= "commit_before_get_LOCK_commit_ordered SIGNAL master2_ready WAIT_FOR master2_go"; +INSERT INTO ti VALUES (3, "not gonna survive"); +connection default; +SET DEBUG_SYNC= "now WAIT_FOR master2_ready"; +"List of binary logs before crash" +show binary logs; +Log_name File_size +master-bin.000001 # +master-bin.000002 # +master-bin.000003 # +# The gtid binlog state prior the crash will be truncated at the end of the test +SELECT @@global.gtid_binlog_state; +@@global.gtid_binlog_state +0-1-6 +# Kill the server +connection default; +FOUND # /truncated binlog file:.*master.*000002/ in mysqld.1.err +"One record should be present in table" +SELECT count(*) FROM ti; +count(*) +1 +# The truncated gtid binlog state +SELECT @@global.gtid_binlog_state; +@@global.gtid_binlog_state +0-1-4 +SELECT @@global.gtid_binlog_pos; +@@global.gtid_binlog_pos +0-1-4 +# Cleanup +SET @@global.max_binlog_size = VALUE_AT_START; +DROP TABLE ti; +# End of the tests diff --git a/mysql-test/suite/binlog/r/binlog_truncate_multi_log_unsafe.result b/mysql-test/suite/binlog/r/binlog_truncate_multi_log_unsafe.result new file mode 100644 index 00000000000..7ea13e2548c --- /dev/null +++ b/mysql-test/suite/binlog/r/binlog_truncate_multi_log_unsafe.result @@ -0,0 +1,59 @@ +SET @@global.max_binlog_size= 4096; +call mtr.add_suppression("Table '.*tm' is marked as crashed and should be repaired"); +call mtr.add_suppression("Got an error from unknown thread"); +call mtr.add_suppression("Checking table: '.*tm'"); +call mtr.add_suppression("Recovering table: '.*tm'"); +call mtr.add_suppression("Cannot trim the binary log to file"); +call mtr.add_suppression("Crash recovery failed"); +call mtr.add_suppression("Can.t init tc log"); +call mtr.add_suppression("Aborting"); +call mtr.add_suppression("Found 1 prepared transactions"); +call mtr.add_suppression("mysqld: Table.*tm.*is marked as crashed"); +call mtr.add_suppression("Checking table.*tm"); +RESET MASTER; +FLUSH LOGS; +CREATE TABLE ti (a INT PRIMARY KEY, b MEDIUMTEXT) ENGINE=Innodb; +CREATE TABLE tm (f INT) ENGINE=MYISAM; +INSERT INTO tm VALUES(1); +connect master1,localhost,root,,; +connect master2,localhost,root,,; +connect master3,localhost,root,,; +connection master1; +SET DEBUG_SYNC= "commit_after_release_LOCK_after_binlog_sync SIGNAL master1_ready WAIT_FOR master1_go"; +INSERT INTO ti VALUES (5 - 1, REPEAT("x", 4100)); +connection master2; +SET DEBUG_SYNC= "commit_before_get_LOCK_commit_ordered SIGNAL master2_ready WAIT_FOR master2_go"; +INSERT INTO ti VALUES (5, REPEAT("x", 1)); +connection master3; +SET DEBUG_SYNC= "now WAIT_FOR master2_ready"; +SET DEBUG_SYNC= "commit_before_get_LOCK_after_binlog_sync SIGNAL master3_ready"; +INSERT INTO tm VALUES (2); +connection default; +SET DEBUG_SYNC= "now WAIT_FOR master3_ready"; +# The gtid binlog state prior the crash must be restored at the end of the testSELECT @@global.gtid_binlog_state; +SELECT @@global.gtid_binlog_state; +@@global.gtid_binlog_state +0-1-9 +# Kill the server +# Failed restart as the semisync slave +# Normal restart +connection default; +FOUND # /Cannot trim the binary log to file/ in mysqld.1.err +# Proof that the in-doubt transactions are recovered by the 2nd normal server restart +SELECT COUNT(*) = 5 as 'True' FROM ti; +True +1 +SELECT COUNT(*) <= 1 FROM tm; +COUNT(*) <= 1 +1 +# The gtid binlog state prior the crash is restored now +SELECT @@GLOBAL.gtid_binlog_state; +@@GLOBAL.gtid_binlog_state +0-1-9 +SELECT @@GLOBAL.gtid_binlog_pos; +@@GLOBAL.gtid_binlog_pos +0-1-9 +# Cleanup +SET @@global.max_binlog_size = VALUE_AT_START; +DROP TABLE ti, tm; +End of test diff --git a/mysql-test/suite/binlog/t/binlog_truncate_active_log.inc b/mysql-test/suite/binlog/t/binlog_truncate_active_log.inc new file mode 100644 index 00000000000..b1ffaf18268 --- /dev/null +++ b/mysql-test/suite/binlog/t/binlog_truncate_active_log.inc @@ -0,0 +1,70 @@ +connect(master1,localhost,root,,); +connect(master2,localhost,root,,); +connect(master3,localhost,root,,); + +--connection default + +# First to commit few transactions +INSERT INTO t VALUES (10); +INSERT INTO tm VALUES (10); + +--connection master1 +# Hold insert after write to binlog and before "run_commit_ordered" in engine +SET DEBUG_SYNC= "commit_before_get_LOCK_commit_ordered SIGNAL master1_ready WAIT_FOR signal_never_arrives"; +--send_eval $query1 + +--connection master2 +SET DEBUG_SYNC= "now WAIT_FOR master1_ready"; +if ($pre_q2) +{ + CALL sp_blank_xa; +} +SET DEBUG_SYNC= "commit_before_get_LOCK_after_binlog_sync SIGNAL master2_ready"; +# To binlog non-xid transactional group which will be truncated all right +--send_eval $query2 + + +--connection master3 +SET DEBUG_SYNC= "now WAIT_FOR master2_ready"; +SELECT @@global.gtid_binlog_pos as 'Before the crash'; + +--write_file $MYSQLTEST_VARDIR/tmp/mysqld.1.expect +wait +EOF + +--source include/kill_mysqld.inc +--source include/wait_until_disconnected.inc +# +# Server restart +# +--append_file $MYSQLTEST_VARDIR/tmp/mysqld.1.expect +restart: --rpl-semi-sync-slave-enabled=1 +EOF + +connection default; +--enable_reconnect +--source include/wait_until_connected_again.inc + +# Check error log for a successful truncate message. +let $log_error_= `SELECT @@GLOBAL.log_error`; +if(!$log_error_) +{ + # MySQL Server on windows is started with --console and thus + # does not know the location of its .err log, use default location + let $log_error_ = $MYSQLTEST_VARDIR/log/mysqld.1.err; +} +--let SEARCH_FILE=$log_error_ +--let SEARCH_RANGE=-50000 +--let SEARCH_PATTERN=$this_search_pattern +--replace_regex /FOUND [0-9]+/FOUND #/ +--source include/search_pattern_in_file.inc + +SELECT @@global.gtid_binlog_pos as 'After the crash'; +--echo "One row should be present in table 't'" +SELECT COUNT(*) as 'One' FROM t; + +# Local cleanup +DELETE FROM t; +--disconnect master1 +--disconnect master2 +--disconnect master3 diff --git a/mysql-test/suite/binlog/t/binlog_truncate_active_log.test b/mysql-test/suite/binlog/t/binlog_truncate_active_log.test new file mode 100644 index 00000000000..cf89525dcac --- /dev/null +++ b/mysql-test/suite/binlog/t/binlog_truncate_active_log.test @@ -0,0 +1,76 @@ +# ==== Purpose ==== +# +# Test verifies the truncation of single binary log file. +# +# ==== Implementation ==== +# +# Steps: +# 0 - Create table t1 and insert/commit a row. +# 1 - Insert an another row such that it gets written to binlog but commit +# in engine fails as server crashed at this point. +# 2 - Restart server with --rpl-semi-sync-slave-enabled=1 +# 3 - Upon server start 'master-bin.000001' will be truncated to contain +# only the first insert +# +# ==== References ==== +# +# MDEV-21117: recovery for --rpl-semi-sync-slave-enabled server + +--source include/have_innodb.inc +--source include/have_aria.inc +--source include/have_log_bin.inc +--source include/have_debug.inc +--source include/have_binlog_format_statement.inc + +call mtr.add_suppression("Can.t init tc log"); +call mtr.add_suppression("Aborting"); + +# The following cases are tested: +# A. 2pc transaction is followed by a blank "zero-engines" one +# B. 2pc transaction follows the blank one +# C. Similarly to A, with the XA blank transaction + +--connection default +RESET MASTER; +CREATE TABLE t ( f INT ) ENGINE=INNODB; +CREATE TABLE t2 ( f INT ) ENGINE=INNODB; +CREATE TABLE tm ( f INT ) ENGINE=Aria; + +--echo # Case A. +# Both are doomed into truncation. +--let $this_search_pattern = Successfully truncated.*to remove transactions starting from GTID 0-1-6 +--let $query1 = INSERT INTO t VALUES (20) +--let $query2 = DELETE FROM t2 WHERE f = 666 /* no such record */ +--source binlog_truncate_active_log.inc + +--echo # Case B. +# The inverted sequence ends up to truncate only $query2 +--let $this_search_pattern = Successfully truncated.*to remove transactions starting from GTID 0-1-10 +--let $query1 = DELETE FROM t2 WHERE f = 0 +--let $query2 = INSERT INTO t VALUES (20) +--source binlog_truncate_active_log.inc + + +delimiter |; +CREATE PROCEDURE sp_blank_xa() +BEGIN + XA START 'blank'; + DELETE FROM t2 WHERE f = 666 /* no such record */; + XA END 'blank'; + XA PREPARE 'blank'; +END| +delimiter ;| + + +--echo # Case C. +--let $this_search_pattern = Successfully truncated.*to remove transactions starting from GTID 0-1-13 +--let $query1 = INSERT INTO t VALUES (20) +--let $pre_q2 = CALL sp_blank_xa +--let $query2 = XA COMMIT 'blank' +--source binlog_truncate_active_log.inc +DROP PROCEDURE sp_blank_xa; + +--echo # Cleanup +DROP TABLE t,t2,tm; + +--echo # End of the tests diff --git a/mysql-test/suite/binlog/t/binlog_truncate_multi_engine-master.opt b/mysql-test/suite/binlog/t/binlog_truncate_multi_engine-master.opt new file mode 100644 index 00000000000..df675545bf9 --- /dev/null +++ b/mysql-test/suite/binlog/t/binlog_truncate_multi_engine-master.opt @@ -0,0 +1 @@ +--plugin-load=$HA_ROCKSDB_SO diff --git a/mysql-test/suite/binlog/t/binlog_truncate_multi_engine.inc b/mysql-test/suite/binlog/t/binlog_truncate_multi_engine.inc new file mode 100644 index 00000000000..c260a7987e2 --- /dev/null +++ b/mysql-test/suite/binlog/t/binlog_truncate_multi_engine.inc @@ -0,0 +1,64 @@ +# +# Loop body of binlog_truncate_multi_engine.test +# Parameters: +# $debug_sync_action describes debug-sync actions +# $kill_server 1 when to crash, 0 for regular restart +# $restart_parameters the caller may simulate partial commit at recovery +# $test_outcome summary of extected results +# $MYSQLD_DATADIR + +--echo # +--echo # +--echo # Case $case +--echo # +RESET MASTER; +FLUSH LOGS; +SET GLOBAL max_binlog_size= 4096; + +connect(con1,localhost,root,,); +#--connection con1 +--echo List of binary logs before rotation +--source include/show_binary_logs.inc +INSERT INTO t1 VALUES (1, REPEAT("x", 1)); +INSERT INTO t2 VALUES (1, REPEAT("x", 1)); +BEGIN; + INSERT INTO t1 VALUES (2, REPEAT("x", 4100)); + INSERT INTO t2 VALUES (2, REPEAT("x", 4100)); + +--eval SET DEBUG_SYNC= $debug_sync_action +send COMMIT; + +--connection default +SET DEBUG_SYNC= "now WAIT_FOR con1_ready"; +--echo List of binary logs after rotation +--source include/show_binary_logs.inc + +--echo # restart the server with $restart_parameters +if ($kill_server) +{ + --echo # the server is crashed + --source include/kill_mysqld.inc + --source include/start_mysqld.inc +} +if (!$kill_server) +{ + --echo # the server is restarted + --source include/restart_mysqld.inc +} + +--connection default +--echo # +--echo # *** Summary: $test_outcome: +--echo # +SELECT COUNT(*) FROM t1; +SELECT COUNT(*) FROM t2; +SELECT @@GLOBAL.gtid_binlog_state; +SELECT @@GLOBAL.gtid_binlog_pos; +--echo List of binary logs at the end of the tests +--source include/show_binary_logs.inc +--echo # *** +# cleanup +DELETE FROM t1; +DELETE FROM t2; +--disconnect con1 +--echo # diff --git a/mysql-test/suite/binlog/t/binlog_truncate_multi_engine.test b/mysql-test/suite/binlog/t/binlog_truncate_multi_engine.test new file mode 100644 index 00000000000..fe153e5703c --- /dev/null +++ b/mysql-test/suite/binlog/t/binlog_truncate_multi_engine.test @@ -0,0 +1,78 @@ +# ==== Purpose ==== +# +# Test verifies truncation of multiple binary logs. +# +# ==== Implementation ==== +# +# Steps: +# 0 - Create two tables in innodb and rocksdb engines, +# +# In loop for A,B,C cases (below) do 1-5: +# 1 - execute FLUSH LOGS command to generate a new binary log. +# Start a transaction inserting rows of sufficient sizes +# so binary log gets rotated at commit +# 2 - Using debug simulation make the server crash at a point where +# the transaction is written to binary log *and* either of +# A. neither of them commits +# B. only one commits +# C. both commit +# 3 - print the # of binlog files before the transaction starts and after its +# commit is submitted +# 4 - Restart server with --tc-heuristic-recover=BINLOG_TRUNCATE +# 5 - Restart normally to print post recovery status. +# +# ==== References ==== +# +# MDEV-21117: recovery for --rpl-semi-sync-slave-enabled server + + +--source include/have_innodb.inc +--source include/have_rocksdb.inc +--source include/have_log_bin.inc +--source include/have_debug.inc +--source include/have_debug_sync.inc +--source include/have_binlog_format_row.inc + +--let $old_max_binlog_size= `select @@global.max_binlog_size` +call mtr.add_suppression("Can.t init tc log"); +call mtr.add_suppression("Aborting"); +--let $MYSQLD_DATADIR= `SELECT @@datadir` + +CREATE TABLE t1 (a INT PRIMARY KEY, b MEDIUMTEXT) ENGINE=Innodb; +CREATE TABLE t2 (a INT PRIMARY KEY, b MEDIUMTEXT) ENGINE=rocksdb; + +--let $case = A: neither engine committed => rollback & binlog truncate +# Hold off engine commits after write to binlog and its rotation. +# The transaction is killed along with the server after that. +--let $kill_server=1 +--let $debug_sync_action = "commit_after_release_LOCK_log SIGNAL con1_ready WAIT_FOR signal_no_signal" +--let $restart_parameters = --rpl-semi-sync-slave-enabled=1 +--let $test_outcome= 1 row should be present in both tables; binlog is truncated; number of binlogs at reconnect - 3 + --source binlog_truncate_multi_engine.inc +--echo Proof of the truncated binlog file is readable (two transactions must be seen): +--let $MYSQLD_DATADIR = `select @@datadir` +--exec $MYSQL_BINLOG --short-form --skip-annotate-row-events $MYSQLD_DATADIR/master-bin.000002 + +--let $case = B: one engine has committed its transaction branch +# Hold off after one engine has committed. +--let $kill_server=1 +--let $debug_sync_action = "commit_after_run_commit_ordered SIGNAL con1_ready WAIT_FOR signal_no_signal" +--let $restart_simulate_partial_commit = 1 +--let $restart_parameters = --rpl-semi-sync-slave-enabled=1 --debug-dbug=d,binlog_truncate_partial_commit +--let $test_outcome= 2 rows should be present in both tables; no binlog truncation; one extra binlog file compare with A; number of binlogs at reconnect - 4 + --source binlog_truncate_multi_engine.inc + +--let $case = C: both engines have committed its transaction branch +# Hold off after both engines have committed. The server is shut down. +--let $kill_server=0 +--let $restart_parameters = --rpl-semi-sync-slave-enabled=1 +--let $test_outcome= 2 rows should be present in both tables; no binlog truncation; the same # of binlog files as in B; number of binlogs at reconnect - 4 + --source binlog_truncate_multi_engine.inc + + + +DROP TABLE t1, t2; +--replace_result $old_max_binlog_size VALUE_AT_START +--eval SET @@global.max_binlog_size = $old_max_binlog_size + +--echo # End of the tests diff --git a/mysql-test/suite/binlog/t/binlog_truncate_multi_log.test b/mysql-test/suite/binlog/t/binlog_truncate_multi_log.test new file mode 100644 index 00000000000..231e90dbdc9 --- /dev/null +++ b/mysql-test/suite/binlog/t/binlog_truncate_multi_log.test @@ -0,0 +1,105 @@ +# ==== Purpose ==== +# +# Test verifies truncation of multiple binary logs. +# +# ==== Implementation ==== +# +# Steps: +# 1 - Set max_binlog_size= 4096, to help a series of inserts into a +# transaction table 'ti' get binlog rotated so many time while the +# transactions won't be committed, being stopped at +# a prior to commit debug_sync point +# 2 - kill and restart the server as semisync slave successfully to +# end with an expected first binlog and the gtid state. +# +# ==== References ==== +# MDEV-21117: recovery for --rpl-semi-sync-slave-enabled server + +--source include/have_innodb.inc +--source include/have_log_bin.inc +--source include/have_debug.inc +--source include/have_debug_sync.inc +--source include/have_binlog_format_row.inc + +call mtr.add_suppression("Can.t init tc log"); +call mtr.add_suppression("Aborting"); + +--let $old_max_binlog_size= `select @@global.max_binlog_size` +SET @@global.max_binlog_size= 4096; + +RESET MASTER; +FLUSH LOGS; +CREATE TABLE ti (a INT PRIMARY KEY, b MEDIUMTEXT) ENGINE=Innodb; +CREATE TABLE tm (a INT PRIMARY KEY, b MEDIUMTEXT) ENGINE=MyISAM; + +connect(master1,localhost,root,,); +--echo "List of binary logs before rotation" +--source include/show_binary_logs.inc + +# Some load to either non- and transactional egines +# that should not affect the following recovery: +INSERT INTO ti VALUES(1,"I am gonna survive"); +INSERT INTO tm VALUES(1,"me too!"); + +# hold on near engine commit +SET DEBUG_SYNC= "commit_after_release_LOCK_after_binlog_sync SIGNAL master1_ready WAIT_FOR con1_go"; +--send_eval INSERT INTO ti VALUES (2, REPEAT("x", 4100)) + +connect(master2,localhost,root,,); +# The 2nd trx for recovery, it does not rotate binlog +SET DEBUG_SYNC= "now WAIT_FOR master1_ready"; +SET DEBUG_SYNC= "commit_before_get_LOCK_commit_ordered SIGNAL master2_ready WAIT_FOR master2_go"; +--send_eval INSERT INTO ti VALUES (3, "not gonna survive") + +--connection default +SET DEBUG_SYNC= "now WAIT_FOR master2_ready"; +--echo "List of binary logs before crash" +--source include/show_binary_logs.inc +--echo # The gtid binlog state prior the crash will be truncated at the end of the test +SELECT @@global.gtid_binlog_state; +--write_file $MYSQLTEST_VARDIR/tmp/mysqld.1.expect +wait +EOF + +--source include/kill_mysqld.inc +--source include/wait_until_disconnected.inc + +# +# Server restart +# +--append_file $MYSQLTEST_VARDIR/tmp/mysqld.1.expect +restart: --rpl-semi-sync-slave-enabled=1 +EOF + +connection default; +--enable_reconnect +--source include/wait_until_connected_again.inc + +# Check error log for a successful truncate message. +let $log_error_= `SELECT @@GLOBAL.log_error`; +if(!$log_error_) +{ + # MySQL Server on windows is started with --console and thus + # does not know the location of its .err log, use default location + let $log_error_ = $MYSQLTEST_VARDIR/log/mysqld.1.err; +} +--let SEARCH_FILE=$log_error_ +--let SEARCH_RANGE=-50000 +--let SEARCH_PATTERN=truncated binlog file:.*master.*000002 +--replace_regex /FOUND [0-9]+/FOUND #/ +--source include/search_pattern_in_file.inc + + +--echo "One record should be present in table" +SELECT count(*) FROM ti; + +--echo # The truncated gtid binlog state +SELECT @@global.gtid_binlog_state; +SELECT @@global.gtid_binlog_pos; + +--echo # Cleanup +--replace_result $old_max_binlog_size VALUE_AT_START +--eval SET @@global.max_binlog_size = $old_max_binlog_size +DROP TABLE ti; + +--echo # End of the tests diff --git a/mysql-test/suite/binlog/t/binlog_truncate_multi_log_unsafe.test b/mysql-test/suite/binlog/t/binlog_truncate_multi_log_unsafe.test new file mode 100644 index 00000000000..6aba2935fde --- /dev/null +++ b/mysql-test/suite/binlog/t/binlog_truncate_multi_log_unsafe.test @@ -0,0 +1,142 @@ +# ==== Purpose ==== +# The test verifies attempt to recover by the semisync slave server whose +# binlog is unsafe for truncation. +# +# ==== Implementation ==== +# 2 binlog files are created with the 1st one destined to be the binlog +# checkpoint file for recovery. +# The final group of events is replication unsafe (myisam INSERT). +# Therefore the semisync slave recovery may not. +# +# Steps: +# 0 - Set max_binlog_size= 4096, to help an insert into a +# transaction table 'ti' get binlog rotated while the +# transaction won't be committed, being stopped at +# a prior to commit debug_sync point +# 1 - insert into a non-transactional 'tm' table completes with +# binary logging as well +# 2 - kill and attempt to restart the server as semisync slave that +# must produce an expected unsafe-to-recover error +# 3 - complete the test with a normal restart that successfully finds and +# commits the transaction in doubt. +# +# ==== References ==== +# MDEV-21117: recovery for --rpl-semi-sync-slave-enabled server + + +--source include/have_innodb.inc +--source include/have_log_bin.inc +--source include/have_debug.inc +--source include/have_debug_sync.inc +--source include/have_binlog_format_row.inc + +--let $old_max_binlog_size= `select @@global.max_binlog_size` +SET @@global.max_binlog_size= 4096; + +call mtr.add_suppression("Table '.*tm' is marked as crashed and should be repaired"); +call mtr.add_suppression("Got an error from unknown thread"); +call mtr.add_suppression("Checking table: '.*tm'"); +call mtr.add_suppression("Recovering table: '.*tm'"); +call mtr.add_suppression("Cannot trim the binary log to file"); +call mtr.add_suppression("Crash recovery failed"); +call mtr.add_suppression("Can.t init tc log"); +call mtr.add_suppression("Aborting"); +call mtr.add_suppression("Found 1 prepared transactions"); +call mtr.add_suppression("mysqld: Table.*tm.*is marked as crashed"); +call mtr.add_suppression("Checking table.*tm"); + +RESET MASTER; +FLUSH LOGS; +CREATE TABLE ti (a INT PRIMARY KEY, b MEDIUMTEXT) ENGINE=Innodb; +CREATE TABLE tm (f INT) ENGINE=MYISAM; + +--let $row_count = 5 +--let $i = 3 +--disable_query_log +while ($i) +{ + --eval INSERT INTO ti VALUES ($i, REPEAT("x", 1)) +--dec $i +} +--enable_query_log +INSERT INTO tm VALUES(1); + +connect(master1,localhost,root,,); +connect(master2,localhost,root,,); +connect(master3,localhost,root,,); + +--connection master1 + +# The 1st trx binlogs, rotate binlog and hold on before committing at engine +SET DEBUG_SYNC= "commit_after_release_LOCK_after_binlog_sync SIGNAL master1_ready WAIT_FOR master1_go"; +--send_eval INSERT INTO ti VALUES ($row_count - 1, REPEAT("x", 4100)) + +--connection master2 + +# The 2nd trx for recovery, it does not rotate binlog +SET DEBUG_SYNC= "commit_before_get_LOCK_commit_ordered SIGNAL master2_ready WAIT_FOR master2_go"; +--send_eval INSERT INTO ti VALUES ($row_count, REPEAT("x", 1)) + +--connection master3 +SET DEBUG_SYNC= "now WAIT_FOR master2_ready"; +SET DEBUG_SYNC= "commit_before_get_LOCK_after_binlog_sync SIGNAL master3_ready"; +--send INSERT INTO tm VALUES (2) + +--connection default +SET DEBUG_SYNC= "now WAIT_FOR master3_ready"; +--echo # The gtid binlog state prior the crash must be restored at the end of the testSELECT @@global.gtid_binlog_state; + +--write_file $MYSQLTEST_VARDIR/tmp/mysqld.1.expect +wait +EOF + +SELECT @@global.gtid_binlog_state; +--source include/kill_mysqld.inc +--source include/wait_until_disconnected.inc + +# +# Server restarts +# +--echo # Failed restart as the semisync slave +--error 1 +--exec $MYSQLD_LAST_CMD --rpl-semi-sync-slave-enabled=1 >> $MYSQLTEST_VARDIR/log/mysqld.1.err 2>&1 + +--echo # Normal restart +--append_file $MYSQLTEST_VARDIR/tmp/mysqld.1.expect +restart: +EOF + +connection default; +--enable_reconnect +--source include/wait_until_connected_again.inc + +# Check error log for correct messages. +let $log_error_= `SELECT @@GLOBAL.log_error`; +if(!$log_error_) +{ + # MySQL Server on windows is started with --console and thus + # does not know the location of its .err log, use default location + let $log_error_ = $MYSQLTEST_VARDIR/log/mysqld.1.err; +} +--let SEARCH_FILE=$log_error_ +--let SEARCH_RANGE=-50000 +--let SEARCH_PATTERN=Cannot trim the binary log to file +--replace_regex /FOUND [0-9]+/FOUND #/ +--source include/search_pattern_in_file.inc + +--echo # Proof that the in-doubt transactions are recovered by the 2nd normal server restart +--eval SELECT COUNT(*) = $row_count as 'True' FROM ti +# myisam table may require repair (which is not tested here) +--disable_warnings +SELECT COUNT(*) <= 1 FROM tm; +--enable_warnings + +--echo # The gtid binlog state prior the crash is restored now +SELECT @@GLOBAL.gtid_binlog_state; +SELECT @@GLOBAL.gtid_binlog_pos; + +--echo # Cleanup +--replace_result $old_max_binlog_size VALUE_AT_START +--eval SET @@global.max_binlog_size = $old_max_binlog_size +DROP TABLE ti, tm; +--echo End of test diff --git a/mysql-test/suite/rpl/r/rpl_semi_sync_fail_over.result b/mysql-test/suite/rpl/r/rpl_semi_sync_fail_over.result new file mode 100644 index 00000000000..8107c8cb454 --- /dev/null +++ b/mysql-test/suite/rpl/r/rpl_semi_sync_fail_over.result @@ -0,0 +1,124 @@ +include/rpl_init.inc [topology=1->2] +connection server_2; +include/stop_slave.inc +connection server_1; +RESET MASTER; +connection server_2; +RESET MASTER; +set @@global.rpl_semi_sync_slave_enabled = 1; +set @@global.gtid_slave_pos = ""; +CHANGE MASTER TO master_use_gtid= slave_pos; +include/start_slave.inc +connection server_1; +ALTER TABLE mysql.gtid_slave_pos ENGINE=InnoDB; +set @@global.rpl_semi_sync_master_enabled = 1; +set @@global.rpl_semi_sync_master_wait_point=AFTER_SYNC; +call mtr.add_suppression("Can.t init tc log"); +call mtr.add_suppression("Aborting"); +call mtr.add_suppression("1 client is using or hasn.t closed the table properly"); +call mtr.add_suppression("Table './mtr/test_suppressions' is marked as crashed and should be repaired"); +CREATE TABLE t1 (a INT PRIMARY KEY, b MEDIUMTEXT) ENGINE=Innodb; +INSERT INTO t1 VALUES (1, 'dummy1'); +connect conn_client,127.0.0.1,root,,test,$SERVER_MYPORT_1,; +SET DEBUG_SYNC= "commit_after_release_LOCK_after_binlog_sync SIGNAL con1_ready WAIT_FOR con1_go"; +INSERT INTO t1 VALUES (2, REPEAT("x", 4100)); +connection server_1; +SET DEBUG_SYNC= "now WAIT_FOR con1_ready"; +# Kill the server +connection server_2; +include/stop_slave.inc +SELECT @@GLOBAL.gtid_current_pos; +@@GLOBAL.gtid_current_pos +0-1-8 +connection server_1; +FOUND 1 /truncated binlog file:.*master.*000001/ in mysqld.1.err +disconnect conn_client; +connection server_2; +set global rpl_semi_sync_master_enabled = 1; +set global rpl_semi_sync_master_wait_point=AFTER_SYNC; +connection server_1; +CHANGE MASTER TO master_host='127.0.0.1', master_port=SERVER_MYPORT_2, master_user='root', master_use_gtid=SLAVE_POS; +set global rpl_semi_sync_slave_enabled = 1; +set @@global.gtid_slave_pos=@@global.gtid_binlog_pos; +include/start_slave.inc +connection server_2; +INSERT INTO t1 VALUES (3, 'dummy3'); +# The gtid state on current master must be equal to ... +SHOW VARIABLES LIKE 'gtid_binlog_pos'; +Variable_name Value +gtid_binlog_pos 0-2-9 +connection server_1; +SELECT COUNT(*) = 3 as 'true' FROM t1; +true +1 +# ... the gtid states on the slave: +SHOW VARIABLES LIKE 'gtid_slave_pos'; +Variable_name Value +gtid_slave_pos 0-2-9 +SHOW VARIABLES LIKE 'gtid_binlog_pos'; +Variable_name Value +gtid_binlog_pos 0-2-9 +connection server_2; +connect conn_client,127.0.0.1,root,,test,$SERVER_MYPORT_2,; +SET DEBUG_SYNC= "commit_after_release_LOCK_after_binlog_sync SIGNAL con1_ready WAIT_FOR con1_go"; +INSERT INTO t1 VALUES (4, REPEAT("x", 4100)); +connect conn_client_2,127.0.0.1,root,,test,$SERVER_MYPORT_2,; +SET DEBUG_SYNC= "now WAIT_FOR con1_ready"; +SET DEBUG_SYNC= "commit_after_release_LOCK_lock SIGNAL con1_ready WAIT_FOR con2_go"; +INSERT INTO t1 VALUES (5, REPEAT("x", 4100)); +connection server_2; +SET DEBUG_SYNC= "now WAIT_FOR con1_ready"; +# Kill the server +connection server_1; +include/stop_slave.inc +SELECT @@GLOBAL.gtid_current_pos; +@@GLOBAL.gtid_current_pos +0-2-11 +connection server_2; +NOT FOUND /truncated binlog file:.*slave.*000001/ in mysqld.2.err +disconnect conn_client; +connection server_1; +set global rpl_semi_sync_master_enabled = 1; +set global rpl_semi_sync_master_wait_point=AFTER_SYNC; +connection server_2; +CHANGE MASTER TO master_host='127.0.0.1', master_port=SERVER_MYPORT_1, master_user='root', master_use_gtid=SLAVE_POS; +set global rpl_semi_sync_slave_enabled = 1; +set @@global.gtid_slave_pos=@@global.gtid_binlog_pos; +include/start_slave.inc +connection server_1; +INSERT INTO t1 VALUES (6, 'Done'); +# The gtid state on current master must be equal to ... +SHOW VARIABLES LIKE 'gtid_binlog_pos'; +Variable_name Value +gtid_binlog_pos 0-1-12 +connection server_2; +SELECT COUNT(*) = 6 as 'true' FROM t1; +true +1 +# ... the gtid states on the slave: +SHOW VARIABLES LIKE 'gtid_slave_pos'; +Variable_name Value +gtid_slave_pos 0-1-12 +SHOW VARIABLES LIKE 'gtid_binlog_pos'; +Variable_name Value +gtid_binlog_pos 0-1-12 +include/diff_tables.inc [server_1:t1, server_2:t1] +# Cleanup +connection server_1; +DROP TABLE t1; +connection server_2; +include/stop_slave.inc +connection server_1; +set @@global.rpl_semi_sync_master_enabled = 0; +set @@global.rpl_semi_sync_slave_enabled = 0; +set @@global.rpl_semi_sync_master_wait_point=default; +RESET SLAVE; +RESET MASTER; +connection server_2; +set @@global.rpl_semi_sync_master_enabled = 0; +set @@global.rpl_semi_sync_slave_enabled = 0; +set @@global.rpl_semi_sync_master_wait_point=default; +CHANGE MASTER TO master_host='127.0.0.1', master_port=SERVER_MYPORT_1, master_user='root', master_use_gtid=no; +include/start_slave.inc +connection default; +include/rpl_end.inc diff --git a/mysql-test/suite/rpl/t/rpl_semi_sync_crash.inc b/mysql-test/suite/rpl/t/rpl_semi_sync_crash.inc new file mode 100644 index 00000000000..4289df9155f --- /dev/null +++ b/mysql-test/suite/rpl/t/rpl_semi_sync_crash.inc @@ -0,0 +1,87 @@ +if ($failover_to_slave) +{ + --let $server_to_crash=1 + --let $server_to_promote=2 + --let $new_master_port=$SERVER_MYPORT_2 + --let $client_port=$SERVER_MYPORT_1 + + --connect (conn_client,127.0.0.1,root,,test,$SERVER_MYPORT_1,) +} +if (!$failover_to_slave) +{ + --let $server_to_crash=2 + --let $server_to_promote=1 + --let $new_master_port=$SERVER_MYPORT_1 + --let $client_port=$SERVER_MYPORT_2 + + --connect (conn_client,127.0.0.1,root,,test,$SERVER_MYPORT_2,) +} + + +# Hold insert after write to binlog and before "run_commit_ordered" in engine + +SET DEBUG_SYNC= "commit_after_release_LOCK_after_binlog_sync SIGNAL con1_ready WAIT_FOR con1_go"; +--send_eval $query_to_crash + +# complicate recovery with an extra binlog file +if (!$failover_to_slave) +{ + --connect (conn_client_2,127.0.0.1,root,,test,$SERVER_MYPORT_2,) + # use the same signal with $query_to_crash + SET DEBUG_SYNC= "now WAIT_FOR con1_ready"; + SET DEBUG_SYNC= "commit_after_release_LOCK_lock SIGNAL con1_ready WAIT_FOR con2_go"; + --send_eval $query2_to_crash +} + +--connection server_$server_to_crash +SET DEBUG_SYNC= "now WAIT_FOR con1_ready"; +--source include/kill_mysqld.inc +--source include/wait_until_disconnected.inc + +--connection server_$server_to_promote +--error 2003 +--source include/stop_slave.inc +SELECT @@GLOBAL.gtid_current_pos; + +--let $_expect_file_name=$MYSQLTEST_VARDIR/tmp/mysqld.$server_to_crash.expect +--let $restart_parameters=--rpl-semi-sync-slave-enabled=1 +--let $allow_rpl_inited=1 +--source include/start_mysqld.inc + +--connection server_$server_to_crash +--enable_reconnect +--source include/wait_until_connected_again.inc + +# Check error log for correct messages. +let $log_error_= `SELECT @@GLOBAL.log_error`; +if(!$log_error_) +{ + # MySQL Server on windows is started with --console and thus + # does not know the location of its .err log, use default location + let $log_error_ = $MYSQLTEST_VARDIR/log/mysqld.$server_to_crash.err; +} +--let SEARCH_FILE=$log_error_ +--let SEARCH_RANGE=-50000 +--let SEARCH_PATTERN=$log_search_pattern +--source include/search_pattern_in_file.inc + +--disconnect conn_client + +# +# FAIL OVER now to new master +# +--connection server_$server_to_promote +set global rpl_semi_sync_master_enabled = 1; +set global rpl_semi_sync_master_wait_point=AFTER_SYNC; + +--connection server_$server_to_crash +--let $master_port=$SERVER_MYPORT_2 +if (`select $server_to_crash = 2`) +{ + --let $master_port=$SERVER_MYPORT_1 +} +--replace_result $SERVER_MYPORT_1 SERVER_MYPORT_1 $SERVER_MYPORT_2 SERVER_MYPORT_2 +eval CHANGE MASTER TO master_host='127.0.0.1', master_port=$new_master_port, master_user='root', master_use_gtid=SLAVE_POS; +set global rpl_semi_sync_slave_enabled = 1; +set @@global.gtid_slave_pos=@@global.gtid_binlog_pos; +--source include/start_slave.inc diff --git a/mysql-test/suite/rpl/t/rpl_semi_sync_fail_over.cnf b/mysql-test/suite/rpl/t/rpl_semi_sync_fail_over.cnf new file mode 100644 index 00000000000..3518eb95b67 --- /dev/null +++ b/mysql-test/suite/rpl/t/rpl_semi_sync_fail_over.cnf @@ -0,0 +1,13 @@ +!include suite/rpl/rpl_1slave_base.cnf +!include include/default_client.cnf + + +[mysqld.1] +log-slave-updates +gtid-strict-mode=1 +max_binlog_size= 4096 + +[mysqld.2] +log-slave-updates +gtid-strict-mode=1 +max_binlog_size= 4096 diff --git a/mysql-test/suite/rpl/t/rpl_semi_sync_fail_over.test b/mysql-test/suite/rpl/t/rpl_semi_sync_fail_over.test new file mode 100644 index 00000000000..972aaf2c8b4 --- /dev/null +++ b/mysql-test/suite/rpl/t/rpl_semi_sync_fail_over.test @@ -0,0 +1,144 @@ +# ==== Purpose ==== +# +# Test verifies replication failover scenario. +# +# ==== Implementation ==== +# +# Steps: +# 0 - Having two servers 1 and 2 enable semi-sync replication with +# with the master wait 'after_sync'. +# 1 - Insert a row. While inserting second row simulate +# a server crash at once the transaction is written to binlog, flushed +# and synced but the binlog position is not updated. +# 2 - Post crash-recovery on the old master execute there CHANGE MASTER +# TO command to connect to server id 2. +# 3 - The old master new slave server 1 must connect to the new +# master server 2. +# 4 - repeat the above to crash the new master and restore in role the old one +# +# ==== References ==== +# +# MDEV-21117: recovery for --rpl-semi-sync-slave-enabled server + + +--source include/have_innodb.inc +--source include/have_log_bin.inc +--source include/have_debug.inc +--source include/have_debug_sync.inc +--source include/have_binlog_format_row.inc +--let $rpl_topology=1->2 +--source include/rpl_init.inc + +--connection server_2 +--source include/stop_slave.inc + +--connection server_1 +RESET MASTER; + +--connection server_2 +RESET MASTER; +set @@global.rpl_semi_sync_slave_enabled = 1; +set @@global.gtid_slave_pos = ""; +CHANGE MASTER TO master_use_gtid= slave_pos; +--source include/start_slave.inc + + +--connection server_1 +ALTER TABLE mysql.gtid_slave_pos ENGINE=InnoDB; +set @@global.rpl_semi_sync_master_enabled = 1; +set @@global.rpl_semi_sync_master_wait_point=AFTER_SYNC; + +call mtr.add_suppression("Can.t init tc log"); +call mtr.add_suppression("Aborting"); +call mtr.add_suppression("1 client is using or hasn.t closed the table properly"); +call mtr.add_suppression("Table './mtr/test_suppressions' is marked as crashed and should be repaired"); + +CREATE TABLE t1 (a INT PRIMARY KEY, b MEDIUMTEXT) ENGINE=Innodb; +INSERT INTO t1 VALUES (1, 'dummy1'); + +# +# CRASH the original master, and FAILOVER to the new +# + +# value 1 for server id 1 -> 2 failover +--let $failover_to_slave=1 +--let $query_to_crash= INSERT INTO t1 VALUES (2, REPEAT("x", 4100)) +--let $log_search_pattern=truncated binlog file:.*master.*000001 +--source rpl_semi_sync_crash.inc + +--connection server_2 +--let $rows_so_far=3 +--eval INSERT INTO t1 VALUES ($rows_so_far, 'dummy3') +--save_master_pos +--echo # The gtid state on current master must be equal to ... +SHOW VARIABLES LIKE 'gtid_binlog_pos'; + +--connection server_1 +--sync_with_master +--eval SELECT COUNT(*) = $rows_so_far as 'true' FROM t1 +--echo # ... the gtid states on the slave: +SHOW VARIABLES LIKE 'gtid_slave_pos'; +SHOW VARIABLES LIKE 'gtid_binlog_pos'; + +--connection server_2 +# +# CRASH the new master and FAILOVER back to the original +# + +# value 0 for the reverse server id 2 -> 1 failover +--let $failover_to_slave=0 +--let $query_to_crash = INSERT INTO t1 VALUES (4, REPEAT("x", 4100)) +--let $query2_to_crash= INSERT INTO t1 VALUES (5, REPEAT("x", 4100)) +--let $log_search_pattern=truncated binlog file:.*slave.*000001 +--source rpl_semi_sync_crash.inc + +--connection server_1 +--let $rows_so_far=6 +--eval INSERT INTO t1 VALUES ($rows_so_far, 'Done') +--save_master_pos +--echo # The gtid state on current master must be equal to ... +SHOW VARIABLES LIKE 'gtid_binlog_pos'; + +--connection server_2 +--sync_with_master +--eval SELECT COUNT(*) = $rows_so_far as 'true' FROM t1 +--echo # ... the gtid states on the slave: +SHOW VARIABLES LIKE 'gtid_slave_pos'; +SHOW VARIABLES LIKE 'gtid_binlog_pos'; + + +--let $diff_tables=server_1:t1, server_2:t1 +--source include/diff_tables.inc + +# +--echo # Cleanup +# +--connection server_1 +DROP TABLE t1; +--save_master_pos + +--connection server_2 +--sync_with_master +--source include/stop_slave.inc + +--connection server_1 +set @@global.rpl_semi_sync_master_enabled = 0; +set @@global.rpl_semi_sync_slave_enabled = 0; +set @@global.rpl_semi_sync_master_wait_point=default; +RESET SLAVE; +RESET MASTER; + +--connection server_2 +set @@global.rpl_semi_sync_master_enabled = 0; +set @@global.rpl_semi_sync_slave_enabled = 0; +set @@global.rpl_semi_sync_master_wait_point=default; + +--replace_result $SERVER_MYPORT_1 SERVER_MYPORT_1 +eval CHANGE MASTER TO master_host='127.0.0.1', master_port=$SERVER_MYPORT_1, master_user='root', master_use_gtid=no; +--source include/start_slave.inc + +connection default; +--enable_reconnect +--source include/wait_until_connected_again.inc + +--source include/rpl_end.inc diff --git a/sql/handler.cc b/sql/handler.cc index 6792e80b8fe..247ab7267bf 100644 --- a/sql/handler.cc +++ b/sql/handler.cc @@ -1245,6 +1245,24 @@ int ha_prepare(THD *thd) DBUG_RETURN(error); } +/* + Returns counted number of + read-write recoverable transaction participants. +*/ +uint ha_count_rw(THD *thd, bool all) +{ + unsigned rw_ha_count= 0; + THD_TRANS *trans=all ? &thd->transaction.all : &thd->transaction.stmt; + + for (Ha_trx_info * ha_info= trans->ha_list; ha_info; + ha_info= ha_info->next()) + { + if (ha_info->is_trx_read_write() && ha_info->ht()->recover) + ++rw_ha_count; + } + return rw_ha_count; +} + /** Check if we can skip the two-phase commit. @@ -1264,7 +1282,6 @@ int ha_prepare(THD *thd) engines with read-write changes. */ -static uint ha_check_and_coalesce_trx_read_only(THD *thd, Ha_trx_info *ha_list, bool all) @@ -1481,7 +1498,9 @@ int ha_commit_trans(THD *thd, bool all) if (trans->no_2pc || (rw_ha_count <= 1)) { + thd->is_1pc_ro_trans= !rw_trans; error= ha_commit_one_phase(thd, all); + thd->is_1pc_ro_trans= false; goto done; } @@ -1942,7 +1961,7 @@ static my_xid wsrep_order_and_check_continuity(XID *list, int len) recover() step of xa. @note - there are three modes of operation: + there are four modes of operation: - automatic recover after a crash in this case commit_list != 0, tc_heuristic_recover==0 all xids from commit_list are committed, others are rolled back @@ -1953,6 +1972,9 @@ static my_xid wsrep_order_and_check_continuity(XID *list, int len) - no recovery (MySQL did not detect a crash) in this case commit_list==0, tc_heuristic_recover == 0 there should be no prepared transactions in this case. + - automatic recovery for the semisync slave server: uncommitted + transactions are rolled back and when they are in binlog it gets + truncated to the first uncommitted transaction start offset. */ struct xarecover_st { @@ -1960,8 +1982,122 @@ struct xarecover_st XID *list; HASH *commit_list; bool dry_run; + MEM_ROOT *mem_root; + bool error; }; +/** + Inserts a new hash member. + + returns a successfully created and inserted @c xid_recovery_member + into hash @c hash_arg, + or NULL. +*/ +static xid_recovery_member* +xid_member_insert(HASH *hash_arg, my_xid xid_arg, MEM_ROOT *ptr_mem_root) +{ + xid_recovery_member *member= (xid_recovery_member*) + alloc_root(ptr_mem_root, sizeof(xid_recovery_member)); + if (!member) + return NULL; + + member->xid= xid_arg; + member->in_engine_prepare= 1; + member->decided_to_commit= false; + + return my_hash_insert(hash_arg, (uchar*) member) ? NULL : member; +} + +/* + Inserts a new or updates an existing hash member to increment + the member's prepare counter. + + returns false on success, + true otherwise. +*/ +static bool xid_member_replace(HASH *hash_arg, my_xid xid_arg, + MEM_ROOT *ptr_mem_root) +{ + xid_recovery_member* member; + if ((member= (xid_recovery_member *) + my_hash_search(hash_arg, (uchar *)& xid_arg, sizeof(xid_arg)))) + member->in_engine_prepare++; + else + member= xid_member_insert(hash_arg, xid_arg, ptr_mem_root); + + return member == NULL; +} + +/* + Hash iterate function to complete with commit or rollback as decided + (typically at binlog recovery processing) in member->in_engine_prepare. +*/ +static my_bool xarecover_do_commit_or_rollback(void *member_arg, + void *hton_arg) +{ + xid_recovery_member *member= (xid_recovery_member*) member_arg; + handlerton *hton= (handlerton*) hton_arg; + xid_t x; + my_bool rc; + + x.set(member->xid); + rc= member->decided_to_commit ? hton->commit_by_xid(hton, &x) : + hton->rollback_by_xid(hton, &x); + + DBUG_ASSERT(rc || member->in_engine_prepare > 0); + + if (!rc) + { + member->in_engine_prepare--; + if (global_system_variables.log_warnings > 2) + sql_print_warning("%s transaction with xid %llu", + member->decided_to_commit ? + "Committed" : "Rolled back", (ulonglong) member->xid); + } + + return false; +} + +static my_bool xarecover_do_count_in_prepare(void *member_arg, + void *ptr_count) +{ + xid_recovery_member *member= (xid_recovery_member*) member_arg; + if (member->in_engine_prepare) + { + *(uint*) ptr_count += member->in_engine_prepare; + if (global_system_variables.log_warnings > 2) + sql_print_warning("Found prepared transaction with xid %llu", + (ulonglong) member->xid); + } + + return false; +} + +static my_bool xarecover_binlog_truncate_handlerton(THD *unused, + plugin_ref plugin, + void *arg) +{ + handlerton *hton= plugin_hton(plugin); + + if (hton->state == SHOW_OPTION_YES && hton->recover) + { + my_hash_iterate((HASH*) arg, xarecover_do_commit_or_rollback, hton); + } + + return FALSE; +} + +uint ha_recover_complete(HASH *commit_list) +{ + uint count= 0; + + plugin_foreach(NULL, xarecover_binlog_truncate_handlerton, + MYSQL_STORAGE_ENGINE_PLUGIN, commit_list); + my_hash_iterate(commit_list, xarecover_do_count_in_prepare, &count); + + return count; +} + static my_bool xarecover_handlerton(THD *unused, plugin_ref plugin, void *arg) { @@ -1969,6 +2105,9 @@ static my_bool xarecover_handlerton(THD *unused, plugin_ref plugin, struct xarecover_st *info= (struct xarecover_st *) arg; int got; + if (info->error) + return TRUE; + if (hton->state == SHOW_OPTION_YES && hton->recover) { while ((got= hton->recover(hton, info->list, info->len)) > 0 ) @@ -1988,7 +2127,7 @@ static my_bool xarecover_handlerton(THD *unused, plugin_ref plugin, } #endif /* WITH_WSREP */ - for (int i=0; i < got; i ++) + for (int i=0; i < got && !info->error; i ++) { my_xid x= IF_WSREP(WSREP_ON && wsrep_is_wsrep_xid(&info->list[i]) ? wsrep_xid_seqno(info->list[i]) : @@ -2013,7 +2152,20 @@ static my_bool xarecover_handlerton(THD *unused, plugin_ref plugin, info->found_my_xids++; continue; } - // recovery mode + + /* + Regular and semisync slave server recovery only collects + xids to make decisions on them later by the caller. + */ + if (info->mem_root) + { + if (xid_member_replace(info->commit_list, x, info->mem_root)) + { + info->error= true; + sql_print_error("Error in memory allocation at xarecover_handlerton"); + break; + } + } else if (IF_WSREP((wsrep_emulate_bin_log && wsrep_is_wsrep_xid(info->list + i) && x <= wsrep_limit), false) || @@ -2057,7 +2209,7 @@ static my_bool xarecover_handlerton(THD *unused, plugin_ref plugin, return FALSE; } -int ha_recover(HASH *commit_list) +int ha_recover(HASH *commit_list, MEM_ROOT *arg_mem_root) { struct xarecover_st info; DBUG_ENTER("ha_recover"); @@ -2065,6 +2217,8 @@ int ha_recover(HASH *commit_list) info.commit_list= commit_list; info.dry_run= (info.commit_list==0 && tc_heuristic_recover==0); info.list= NULL; + info.mem_root= arg_mem_root; + info.error= false; /* commit_list and tc_heuristic_recover cannot be set both */ DBUG_ASSERT(info.commit_list==0 || tc_heuristic_recover==0); @@ -2109,6 +2263,9 @@ int ha_recover(HASH *commit_list) info.found_my_xids, opt_tc_log_file); DBUG_RETURN(1); } + if (info.error) + DBUG_RETURN(1); + if (info.commit_list) sql_print_information("Crash recovery finished."); DBUG_RETURN(0); diff --git a/sql/handler.h b/sql/handler.h index 2a346e8d9d1..bd39f46bf1f 100644 --- a/sql/handler.h +++ b/sql/handler.h @@ -873,6 +873,14 @@ typedef struct xid_t XID; /* The 'buf' has to have space for at least SQL_XIDSIZE bytes. */ uint get_sql_xid(XID *xid, char *buf); +/* struct for heuristic binlog truncate recovery */ +struct xid_recovery_member +{ + my_xid xid; + uint in_engine_prepare; // number of engines that have xid prepared + bool decided_to_commit; +}; + /* for recover() handlerton call */ #define MIN_XID_LIST_SIZE 128 #define MAX_XID_LIST_SIZE (1024*128) @@ -4814,7 +4822,8 @@ int ha_commit_one_phase(THD *thd, bool all); int ha_commit_trans(THD *thd, bool all); int ha_rollback_trans(THD *thd, bool all); int ha_prepare(THD *thd); -int ha_recover(HASH *commit_list); +int ha_recover(HASH *commit_list, MEM_ROOT *mem_root= NULL); +uint ha_recover_complete(HASH *commit_list); /* transactions: these functions never call handlerton functions directly */ int ha_enable_transaction(THD *thd, bool on); @@ -4886,4 +4895,8 @@ int del_global_table_stat(THD *thd, const LEX_CSTRING *db, const LEX_CSTRING *t @note This does not need to be multi-byte safe or anything */ char *xid_to_str(char *buf, const XID &xid); #endif // !DBUG_OFF +uint ha_count_rw(THD *thd, bool all); +uint ha_check_and_coalesce_trx_read_only(THD *thd, Ha_trx_info *ha_list, + bool all); + #endif /* HANDLER_INCLUDED */ diff --git a/sql/log.cc b/sql/log.cc index 8073f09ab88..a9808ed8823 100644 --- a/sql/log.cc +++ b/sql/log.cc @@ -58,7 +58,11 @@ #include "wsrep_mysqld.h" #include "sp_rcontext.h" #include "sp_head.h" - +#ifdef HAVE_REPLICATION +#include "semisync_master.h" +#include "semisync_slave.h" +#include <utility> // pair +#endif /* max size of the log message */ #define MAX_LOG_BUFFER_SIZE 1024 #define MAX_TIME_SIZE 32 @@ -5949,7 +5953,8 @@ MYSQL_BIN_LOG::flush_and_set_pending_rows_event(THD *thd, bool MYSQL_BIN_LOG::write_gtid_event(THD *thd, bool standalone, - bool is_transactional, uint64 commit_id) + bool is_transactional, uint64 commit_id, + bool has_xid) { rpl_gtid gtid; uint32 domain_id; @@ -6011,7 +6016,7 @@ MYSQL_BIN_LOG::write_gtid_event(THD *thd, bool standalone, Gtid_log_event gtid_event(thd, seq_no, domain_id, standalone, LOG_EVENT_SUPPRESS_USE_F, is_transactional, - commit_id); + commit_id, has_xid); /* Write the event to the binary log. */ DBUG_ASSERT(this == &mysql_bin_log); @@ -7977,6 +7982,7 @@ MYSQL_BIN_LOG::trx_group_commit_leader(group_commit_entry *leader) #endif } + DEBUG_SYNC(leader->thd, "commit_before_update_end_pos"); /* update binlog_end_pos so it can be read by dump thread Note: must be _after_ the RUN_HOOK(after_flush) or else @@ -8164,9 +8170,12 @@ MYSQL_BIN_LOG::write_transaction_or_stmt(group_commit_entry *entry, uint64 commit_id) { binlog_cache_mngr *mngr= entry->cache_mngr; + bool has_xid= entry->end_event->get_type_code() == XID_EVENT; + DBUG_ENTER("MYSQL_BIN_LOG::write_transaction_or_stmt"); - if (write_gtid_event(entry->thd, false, entry->using_trx_cache, commit_id)) + if (write_gtid_event(entry->thd, false, + entry->using_trx_cache, commit_id, has_xid)) DBUG_RETURN(ER_ERROR_ON_WRITE); if (entry->using_stmt_cache && !mngr->stmt_cache.empty() && @@ -9609,6 +9618,180 @@ int TC_LOG::using_heuristic_recover() /****** transaction coordinator log for 2pc - binlog() based solution ******/ #define TC_LOG_BINLOG MYSQL_BIN_LOG +/** + Truncates the current binlog to specified position. Removes the rest of binlogs + which are present after this binlog file. + + @param truncate_file Holds the binlog name to be truncated + @param truncate_pos Position within binlog from where it needs to + truncated. + + @retval true ok + @retval false error + +*/ +bool MYSQL_BIN_LOG::truncate_and_remove_binlogs(const char *file_name, + my_off_t pos, + rpl_gtid *ptr_gtid, + enum_binlog_checksum_alg cs_alg) +{ + int error= 0; +#ifdef HAVE_REPLICATION + LOG_INFO log_info; + THD *thd= current_thd; + my_off_t index_file_offset= 0; + File file= -1; + IO_CACHE cache; + MY_STAT s; + + if ((error= find_log_pos(&log_info, file_name, 1))) + { + sql_print_error("Failed to locate binary log file:%s." + "Error:%d", file_name, error); + goto end; + } + + while (!(error= find_next_log(&log_info, 1))) + { + if (!index_file_offset) + { + index_file_offset= log_info.index_file_start_offset; + if ((error= open_purge_index_file(TRUE))) + { + sql_print_error("Failed to open purge index " + "file:%s. Error:%d", purge_index_file_name, error); + goto end; + } + } + if ((error= register_purge_index_entry(log_info.log_file_name))) + { + sql_print_error("Failed to copy %s to purge index" + " file. Error:%d", log_info.log_file_name, error); + goto end; + } + } + + if (error != LOG_INFO_EOF) + { + sql_print_error("Failed to find the next binlog to " + "add to purge index register. Error:%d", error); + goto end; + } + + if (is_inited_purge_index_file()) + { + if (!index_file_offset) + index_file_offset= log_info.index_file_start_offset; + + if ((error= sync_purge_index_file())) + { + sql_print_error("Failed to flush purge index " + "file. Error:%d", error); + goto end; + } + + // Trim index file + if ((error= + mysql_file_chsize(index_file.file, index_file_offset, '\n', + MYF(MY_WME))) || + (error= + mysql_file_sync(index_file.file, MYF(MY_WME|MY_SYNC_FILESIZE)))) + { + sql_print_error("Failed to trim binlog index " + "file:%s to offset:%llu. Error:%d", index_file_name, + index_file_offset, error); + goto end; + } + + /* Reset data in old index cache */ + if ((error= reinit_io_cache(&index_file, READ_CACHE, (my_off_t) 0, 0, 1))) + { + sql_print_error("Failed to reinit binlog index " + "file. Error:%d", error); + goto end; + } + + /* Read each entry from purge_index_file and delete the file. */ + if ((error= purge_index_entry(thd, NULL, TRUE))) + { + sql_print_error("Failed to process registered " + "files that would be purged."); + goto end; + } + } + + DBUG_ASSERT(pos); + + if ((file= mysql_file_open(key_file_binlog, file_name, + O_RDWR | O_BINARY, MYF(MY_WME))) < 0) + { + error= 1; + sql_print_error("Failed to open binlog file:%s for " + "truncation.", file_name); + goto end; + } + my_stat(file_name, &s, MYF(0)); + + /* Change binlog file size to truncate_pos */ + if ((error= + mysql_file_chsize(file, pos, 0, MYF(MY_WME))) || + (error= mysql_file_sync(file, MYF(MY_WME|MY_SYNC_FILESIZE)))) + { + sql_print_error("Failed to trim the " + "binlog file:%s to size:%llu. Error:%d", + file_name, pos, error); + goto end; + } + else + { + char buf[21]; + + longlong10_to_str(ptr_gtid->seq_no, buf, 10); + sql_print_information("Successfully truncated binlog file:%s " + "to pos:%llu to remove transactions starting from " + "GTID %u-%u-%s", file_name, pos, + ptr_gtid->domain_id, ptr_gtid->server_id, buf); + } + if (!(error= init_io_cache(&cache, file, IO_SIZE, WRITE_CACHE, + (my_off_t) pos, 0, MYF(MY_WME|MY_NABP)))) + { + /* + Write Stop_log_event to ensure clean end point for the binary log being + truncated. + */ + Stop_log_event se; + se.checksum_alg= cs_alg; + if ((error= write_event(&se, NULL, &cache))) + { + sql_print_error("Failed to write stop event to " + "binary log. Errno:%d", + (cache.error == -1) ? my_errno : error); + goto end; + } + clear_inuse_flag_when_closing(cache.file); + if ((error= flush_io_cache(&cache)) || + (error= mysql_file_sync(file, MYF(MY_WME|MY_SYNC_FILESIZE)))) + { + sql_print_error("Faild to write stop event to " + "binary log. Errno:%d", + (cache.error == -1) ? my_errno : error); + } + } + else + sql_print_error("Failed to initialize binary log " + "cache for writing stop event. Errno:%d", + (cache.error == -1) ? my_errno : error); + +end: + if (file >= 0) + { + end_io_cache(&cache); + mysql_file_close(file, MYF(MY_WME)); + } + error= error || close_purge_index_file(); +#endif + return error > 0; +} int TC_LOG_BINLOG::open(const char *opt_name) { int error= 1; @@ -10040,26 +10223,515 @@ start_binlog_background_thread() return 0; } +#ifdef HAVE_REPLICATION +class Recovery_context +{ +public: + my_off_t prev_event_pos; + rpl_gtid last_gtid; + bool last_gtid_standalone; + bool last_gtid_valid; + bool last_gtid_no2pc; // true when the group does not end with Xid event + uint last_gtid_engines; + std::pair<uint, my_off_t> last_gtid_coord; + /* + When true, it's semisync slave recovery mode + rolls back transactions in doubt and wipes them off from binlog. + The rest of declarations deal with this type of recovery. + */ + bool do_truncate; + rpl_gtid binlog_unsafe_gtid, truncate_gtid; + char binlog_truncate_file_name[FN_REFLEN] ; + char binlog_unsafe_file_name[FN_REFLEN] ; + /* + When do_truncate is true, the truncate position may not be + found in one round when recovered transactions are multi-engine + or just on different engines. + In the single recoverable engine case `truncate_reset_done` and + therefore `truncate_validated` remain `false' when the last + binlog is the binlog-checkpoint one. + The meaning of `truncate_reset_done` is according to the following example: + Let round = 1, Binlog contains the sequence of replication event groups: + [g1, G2, g3] + where `G` (in capital) stands for committed, `g` for prepared. + g1 is first set as truncation candidate, then G2 reset it to indicate + the actual truncation is behind (to the right of) it. + `truncate_validated` is set to true when `binlog_truncate_pos` (as of g3) + won't change. + Observe last_gtid_valid is affected, so in the above example g1 + would have to be discarded from the 1st round binlog state estimate which + is handled by the 2nd round (see below). + */ + bool truncate_validated; // trued when the truncate position settled + bool truncate_reset_done; // trued when the position is to reevaluate + /* Flags the fact of truncate position estimation is done the 1st round */ + bool truncate_set_in_1st; + /* + Monotonically indexes binlog files in the recovery list. + When the list is "likely" singleton the value is UINT_MAX. + Otherwise enumeration starts with zero for the first file, increments + by one for any next file except for the last file in the list, which + is also the initial binlog file for recovery, + that is enumberated with UINT_MAX. + */ + uint id_binlog; + enum_binlog_checksum_alg cs_alg; // for Stop_event with do_truncate + bool single_binlog; + std::pair<uint, my_off_t> binlog_truncate_coord; + std::pair<uint, my_off_t> binlog_unsafe_coord; + + Recovery_context(); + bool decide_or_assess(xid_recovery_member *member, int round, + Format_description_log_event *fdle, + LOG_INFO *linfo, my_off_t pos); + void process_gtid(int round, Gtid_log_event *gev, LOG_INFO *linfo); + int next_binlog_or_round(int& round, + const char *last_log_name, + const char *binlog_checkpoint_name, + LOG_INFO *linfo, MYSQL_BIN_LOG *log); + bool is_safe() + { + return !do_truncate ? true : + (truncate_gtid.seq_no == 0 || // no truncate + binlog_unsafe_coord < binlog_truncate_coord); // or unsafe is earlier + } + bool complete(MYSQL_BIN_LOG *log, HASH &xids); + void update_binlog_unsafe_coord_if_needed(LOG_INFO *linfo); + void reset_truncate_coord(my_off_t pos); + void set_truncate_coord(LOG_INFO *linfo, int round, + enum_binlog_checksum_alg fd_cs_alg); +}; +bool Recovery_context::complete(MYSQL_BIN_LOG *log, HASH &xids) +{ + if (!do_truncate || is_safe()) + { + uint count_in_prepare= ha_recover_complete(&xids); + if (count_in_prepare > 0 && global_system_variables.log_warnings > 2) + { + sql_print_warning("Could not complete %u number of transactions in " + "engines.", count_in_prepare); + return false; // there's later dry run ha_recover() to error out + } + } + + /* Truncation is not done when there's no transaction to roll back */ + if (do_truncate && truncate_gtid.seq_no > 0) + { + if (is_safe()) + { + if (log->truncate_and_remove_binlogs(binlog_truncate_file_name, + binlog_truncate_coord.second, + &truncate_gtid, + cs_alg)) + { + sql_print_error("Failed to trim the binary log to " + "file:%s pos:%llu.", binlog_truncate_file_name, + binlog_truncate_coord.second); + return true; + } + } + else + { + sql_print_error("Cannot trim the binary log to file:%s " + "pos:%llu as unsafe statement " + "is found at file:%s pos:%llu which is " + "beyond the truncation position " + "(the farest in binlog order only is reported); " + "all transactions in doubt are left intact. ", + binlog_truncate_file_name, binlog_truncate_coord.second, + binlog_unsafe_file_name, binlog_unsafe_coord.second); + return true; + } + } + + return false; +} + +Recovery_context::Recovery_context() : + prev_event_pos(0), + last_gtid_standalone(false), last_gtid_valid(false), last_gtid_no2pc(false), + last_gtid_engines(0), + do_truncate(rpl_semi_sync_slave_enabled), + truncate_validated(false), truncate_reset_done(false), + truncate_set_in_1st(false), id_binlog(UINT_MAX), + cs_alg(BINLOG_CHECKSUM_ALG_UNDEF), single_binlog(false) +{ + last_gtid_coord= std::pair<uint,my_off_t>(0,0); + binlog_truncate_coord= std::pair<uint,my_off_t>(0,0); + binlog_unsafe_coord= std::pair<uint,my_off_t>(0,0); + binlog_truncate_file_name[0]= 0; + binlog_unsafe_file_name [0]= 0; + binlog_unsafe_gtid= truncate_gtid= rpl_gtid(); +} + +/** + Is called when a committed or to-be-committed transaction is detected. + @c truncate_gtid is set to "nil" with its @c rpl_gtid::seq_no := 0. + @c truncate_reset_done remembers the fact of that has been done at least + once in the current round; + @c binlog_truncate_coord is "suggested" to a next group start to indicate + the actual settled value must be at most as the last suggested one. +*/ +void Recovery_context::reset_truncate_coord(my_off_t pos) +{ + DBUG_ASSERT(binlog_truncate_coord.second == 0 || + last_gtid_coord >= binlog_truncate_coord || + truncate_set_in_1st); + + binlog_truncate_coord= std::pair<uint,my_off_t>(id_binlog, pos); + truncate_gtid= rpl_gtid(); + truncate_reset_done= true; +} + + +/* + Sets binlog_truncate_pos to the value of the current transaction's gtid. + In multi-engine case that might be just an assessment to be exacted + in the current round and confirmed in a next one. +*/ +void Recovery_context::set_truncate_coord(LOG_INFO *linfo, int round, + enum_binlog_checksum_alg fd_cs_alg) +{ + binlog_truncate_coord= last_gtid_coord; + strmake_buf(binlog_truncate_file_name, linfo->log_file_name); + + truncate_gtid= last_gtid; + cs_alg= fd_cs_alg; + truncate_set_in_1st= (round == 1); +} + +bool Recovery_context::decide_or_assess(xid_recovery_member *member, int round, + Format_description_log_event *fdle, + LOG_INFO *linfo, my_off_t pos) +{ + if (member) + { + DBUG_EXECUTE_IF("binlog_truncate_partial_commit", + if (last_gtid_engines == 2) + { + DBUG_ASSERT(member->in_engine_prepare > 0); + member->in_engine_prepare= 1; + }); + /* + xid in doubt are resolved as follows: + in_engine_prepare is compared agaist binlogged info to + yield the commit-or-rollback decision in the normal case. + In the semisync-slave recovery the decision may be + approximate to change in later rounds. + */ + if (member->in_engine_prepare > last_gtid_engines) + { + char buf[21]; + longlong10_to_str(last_gtid.seq_no, buf, 10); + sql_print_error("Error to recovery multi-engine transaction: " + "the number of engines prepared %u exceeds the " + "respective number %u in its GTID %u-%u-%s " + "located at file:%s pos:%llu", + member->in_engine_prepare, last_gtid_engines, + last_gtid.domain_id, last_gtid.server_id, buf, + linfo->log_file_name, last_gtid_coord.second); + return true; + } + else if (member->in_engine_prepare < last_gtid_engines) + { + DBUG_EXECUTE_IF("binlog_truncate_partial_commit", + member->in_engine_prepare= 2;); + DBUG_ASSERT(member->in_engine_prepare > 0); + /* + This is an "unlikely" branch of two or more engines in transaction + that is partially committed, so to complete. + */ + member->decided_to_commit= true; + if (do_truncate) + { + /* + A validated truncate pos may not change later. + Any "unlikely" (two or more engines in transaction) reset + to not-validated yet position ensues correcting early + estimates in the following round(s). + */ + if (!truncate_validated) + reset_truncate_coord(pos); + } + } + else // member->in_engine_prepare == last_gtid_engines + { + if (!do_truncate) // "normal" recovery + { + member->decided_to_commit= true; + } + else + { + /* + The first time or further estimate the truncate position + until validation. Already set not validated yet postion + may change only through previous reset + unless this xid in doubt is the first in the 2nd round. + */ + if (!truncate_validated) + { + DBUG_ASSERT(round <= 2); + /* + Either truncate was not set or was reset, else + it gets incremented, otherwise it may only set to an earlier + offset only at the turn out of the 1st round. + */ + DBUG_ASSERT(truncate_gtid.seq_no == 0 || + last_gtid_coord >= binlog_truncate_coord || + (round == 2 && truncate_set_in_1st)); + + last_gtid_valid= false; // may still (see gtid) flip later + if (truncate_gtid.seq_no == 0 /* was reset or never set */ || + (truncate_set_in_1st && round == 2 /* reevaluted at round turn */)) + set_truncate_coord(linfo, round, fdle->checksum_alg); + + DBUG_ASSERT(member->decided_to_commit == false); // may flip later + } + else + { + DBUG_ASSERT(!truncate_reset_done); // the position was settled + /* + Correct earlier decisions of 1st and/or 2nd round to + rollback and invalidate last_gtid in binlog state. + */ + if (!(member->decided_to_commit= + last_gtid_coord < binlog_truncate_coord)) + { + last_gtid_valid= false; // settled + if (truncate_gtid.seq_no == 0) + truncate_gtid= last_gtid; + } + } + } + } + } + else if (do_truncate) // "0" < last_gtid_engines + { + /* + Similar to the multi-engine partial commit of "then" branch above. + The 2nd condition economizes away an extra (3rd) round in + expected cases of first xid:s in the binlog checkpoint file + are actually of committed transactions. So fully committed + xid sequence is passed in this branch without any action. + */ + if (!truncate_validated) + reset_truncate_coord(pos); + } + + return false; +} + +/* + Is invoked when a standalone or non-2pc group is detected. + Both are unsafe to truncate in the semisync-slave recovery so + the maximum unsafe coordinate may be updated. + In the non-2pc group case though, *exeptionally*, + the no-engine group is considered safe, to be invalidated + to not contribute to binlog state. +*/ +void Recovery_context::update_binlog_unsafe_coord_if_needed(LOG_INFO *linfo) +{ + if (!do_truncate) + return; + + if (truncate_gtid.seq_no > 0 && // g1,U2, *not* G1,U2 + last_gtid_coord > binlog_truncate_coord) + { + DBUG_ASSERT(binlog_truncate_coord.second > 0); + /* + Potentially unsafe when the truncate coordinate is not determined, + just detected as unsafe when behind the latter. + */ + if (last_gtid_engines == 0) + { + last_gtid_valid= false; + } + else + { + binlog_unsafe_gtid= last_gtid; + binlog_unsafe_coord= last_gtid_coord; + strmake_buf(binlog_unsafe_file_name, linfo->log_file_name); + } + } +} + +/* + Assigns last_gtid and assesses the maximum (in the binlog offset term) + unsafe gtid (group of events). +*/ +void Recovery_context::process_gtid(int round, Gtid_log_event *gev, + LOG_INFO *linfo) +{ + last_gtid.domain_id= gev->domain_id; + last_gtid.server_id= gev->server_id; + last_gtid.seq_no= gev->seq_no; + last_gtid_engines= gev->extra_engines != UINT_MAX ? + gev->extra_engines + 1 : 0; + last_gtid_coord= std::pair<uint,my_off_t>(id_binlog, prev_event_pos); + if (round == 1 || do_truncate) + { + DBUG_ASSERT(!last_gtid_valid); + + last_gtid_no2pc= false; + last_gtid_standalone= + (gev->flags2 & Gtid_log_event::FL_STANDALONE) ? true : false; + if (do_truncate && last_gtid_standalone) + update_binlog_unsafe_coord_if_needed(linfo); + /* Update the binlog state with any 'valid' GTID logged after Gtid_list. */ + last_gtid_valid= true; // may flip at Xid when falls to truncate + } +} + +/* + At the end of processing of the current binlog compute next action. + When round increments in the semisync-slave recovery + truncate_validated, truncate_reset_done + gets reset/set for the next round, + as well as id_binlog gets reset either to zero or UINT_MAX + when recovery deals with the only binlog file. + + @param[in,out] round the current round that may increment here + @param last_log_name the recovery starting binlog file + @param binlog_checkpoint_name + binlog checkpoint file + @param linfo binlog file list struct for next file + @param log pointer to mysql_bin_log instance + @return 0 when rounds continue, maybe the current one remains + 1 when all rounds are done + -1 on error +*/ +int Recovery_context::next_binlog_or_round(int& round, + const char *last_log_name, + const char *binlog_checkpoint_name, + LOG_INFO *linfo, + MYSQL_BIN_LOG *log) +{ + if (!strcmp(linfo->log_file_name, last_log_name)) + { + /* Either exit the loop now, or increment round */ + DBUG_ASSERT(round <= 3); + + if ((round <= 2 && likely(!truncate_reset_done)) || round == 3) + { + // Exit now as the 1st round ends up with the binlog-checkpoint file + // is the same as the initial binlog file, so already parsed; or + // the 2nd round has not made own truncate_reset_done; or + // at the end of the 3rd "truncate" round. + DBUG_ASSERT(do_truncate || round == 1); + // The 3rd round is done only when the 2nd trued truncate_reset_done + // and consequently truncate_validated. + // No instances of truncate_reset_done in first 2 rounds allows + // for exiting now. + DBUG_ASSERT(round < 3 || truncate_validated); + + return true; + } + else + { + DBUG_ASSERT(do_truncate); + /* + the last binlog file, having truncate_reset_done to indicate + needed correction to member->decided_to_commit to reflect + changed binlog_truncate_pos. + */ + truncate_reset_done= false; + truncate_validated= true; + rpl_global_gtid_binlog_state.reset_nolock(); + + if (round > 1 && + log->find_log_pos(linfo, binlog_checkpoint_name, 1)) + { + sql_print_error("Binlog file '%s' not found in binlog index, needed " + "for recovery. Aborting.", binlog_checkpoint_name); + return -1; + } + id_binlog= (single_binlog= !strcmp(linfo->log_file_name, last_log_name)) ? + UINT_MAX : 0; + round++; + } + } + else if (round == 1) + { + if (do_truncate) + { + truncate_validated= truncate_reset_done; + rpl_global_gtid_binlog_state.reset_nolock(); + truncate_reset_done= false; + id_binlog= 0; + + DBUG_ASSERT(!single_binlog); + } + round++; + } + else + { + /* + NOTE: reading other binlog's FD is necessary for finding out + the checksum status of the respective binlog file. + Round remains in this branch. + */ + if (log->find_next_log(linfo, 1)) + { + sql_print_error("Error reading binlog files during recovery. " + "Aborting."); + return -1; + } + if (!strcmp(linfo->log_file_name, last_log_name)) + { + if (do_truncate) + { + DBUG_ASSERT(!single_binlog); + id_binlog= UINT_MAX; + } + else + return 1; // regular recovery exit + } + else if (do_truncate) + id_binlog++; + + DBUG_ASSERT(id_binlog <= UINT_MAX); // the assert is "practical" + } + + DBUG_ASSERT(!single_binlog || + !strcmp(linfo->log_file_name, binlog_checkpoint_name)); + + return 0; +} +#endif int TC_LOG_BINLOG::recover(LOG_INFO *linfo, const char *last_log_name, IO_CACHE *first_log, - Format_description_log_event *fdle, bool do_xa) + Format_description_log_event *fdle_arg, bool do_xa) { Log_event *ev= NULL; HASH xids; MEM_ROOT mem_root; char binlog_checkpoint_name[FN_REFLEN]; bool binlog_checkpoint_found; - bool first_round; IO_CACHE log; File file= -1; const char *errmsg; + Format_description_log_event *fdle= fdle_arg; #ifdef HAVE_REPLICATION - rpl_gtid last_gtid; - bool last_gtid_standalone= false; - bool last_gtid_valid= false; + Recovery_context ctx; #endif + /* + The for-loop variable is updated by the following rule set: + Initially set to 1. + After the initial binlog file is processed to identify + the Binlog-checkpoint file it is incremented when the latter file + is different from the initial one. Otherwise the only log has been + fully parsed and the loop exits. + The 2nd starts with the Binlog-checkpoint file and ends when the initial + binlog file is reached. It is excluded from yet another processing + in the "normal" non-semisync-slave configuration, and then the loop is + done at this point. Otherwise in the semisync slave case it may be parsed + over again. The 2nd round may turn to a third in "unlikely" condition of the + semisync-slave is being recovered having multi- or different engine + transactions in doubt. + */ + int round; if (! fdle->is_valid() || (do_xa && my_hash_init(&xids, &my_charset_bin, TC_LOG_PAGE_SIZE/3, 0, @@ -10072,39 +10744,63 @@ int TC_LOG_BINLOG::recover(LOG_INFO *linfo, const char *last_log_name, fdle->flags&= ~LOG_EVENT_BINLOG_IN_USE_F; // abort on the first error + /* finds xids when root is not NULL */ + if (do_xa && ha_recover(&xids, &mem_root)) + goto err1; + /* Scan the binlog for XIDs that need to be committed if still in the prepared stage. Start with the latest binlog file, then continue with any other binlog files if the last found binlog checkpoint indicates it is needed. + + In case the semisync slave recovery the 2nd round may include + the initial file as well as cause a 3rd round when transactions + with multiple engines are discovered. + Additionally to find and decide on transactions in doubt that + the semisync slave may need to roll back, the binlog can be truncated + in the end to reflect the rolled back decisions. */ binlog_checkpoint_found= false; - first_round= true; - for (;;) + for (round= 1;;) { - while ((ev= Log_event::read_log_event(first_round ? first_log : &log, + while ((ev= Log_event::read_log_event(round == 1 ? first_log : &log, fdle, opt_master_verify_checksum)) && ev->is_valid()) { enum Log_event_type typ= ev->get_type_code(); switch (typ) { + case FORMAT_DESCRIPTION_EVENT: + if (round > 1) + { + if (fdle != fdle_arg) + delete fdle; + fdle= (Format_description_log_event*) ev; + } + break; case XID_EVENT: + if (do_xa) { - if (do_xa) + xid_recovery_member *member= + (xid_recovery_member*) + my_hash_search(&xids, (uchar*) &static_cast<Xid_log_event*>(ev)->xid, + sizeof(my_xid)); +#ifndef HAVE_REPLICATION { - Xid_log_event *xev=(Xid_log_event *)ev; - uchar *x= (uchar *) memdup_root(&mem_root, (uchar*) &xev->xid, - sizeof(xev->xid)); - if (!x || my_hash_insert(&xids, x)) - goto err2; + if (member) + member->decided_to_commit= true; } - break; +#else + if (ctx.decide_or_assess(member, round, fdle, linfo, ev->log_pos)) + goto err2; +#endif } + break; case BINLOG_CHECKPOINT_EVENT: - if (first_round && do_xa) + if (round == 1 && do_xa) { size_t dir_len; Binlog_checkpoint_log_event *cev= (Binlog_checkpoint_log_event *)ev; @@ -10124,9 +10820,18 @@ int TC_LOG_BINLOG::recover(LOG_INFO *linfo, const char *last_log_name, } } break; +#ifdef HAVE_REPLICATION case GTID_LIST_EVENT: - if (first_round) + if (round == 1 || (ctx.do_truncate && + (ctx.id_binlog == 0 || ctx.single_binlog))) { + /* + Unlike the normal case, in do_truncate the initial state is + in the first binlog file of the recovery list. + */ + DBUG_ASSERT(!ctx.do_truncate || !ctx.single_binlog || + ctx.id_binlog == UINT_MAX); + Gtid_list_log_event *glev= (Gtid_list_log_event *)ev; /* Initialise the binlog state from the Gtid_list event. */ @@ -10135,19 +10840,16 @@ int TC_LOG_BINLOG::recover(LOG_INFO *linfo, const char *last_log_name, } break; -#ifdef HAVE_REPLICATION case GTID_EVENT: - if (first_round) + ctx.process_gtid(round, (Gtid_log_event *)ev, linfo); + break; + + case QUERY_EVENT: + if (((Query_log_event *)ev)->is_commit() || + ((Query_log_event *)ev)->is_rollback()) { - Gtid_log_event *gev= (Gtid_log_event *)ev; - - /* Update the binlog state with any GTID logged after Gtid_list. */ - last_gtid.domain_id= gev->domain_id; - last_gtid.server_id= gev->server_id; - last_gtid.seq_no= gev->seq_no; - last_gtid_standalone= - ((gev->flags2 & Gtid_log_event::FL_STANDALONE) ? true : false); - last_gtid_valid= true; + ctx.last_gtid_no2pc= true; + ctx.update_binlog_unsafe_coord_if_needed(linfo); } break; #endif @@ -10162,26 +10864,31 @@ int TC_LOG_BINLOG::recover(LOG_INFO *linfo, const char *last_log_name, default: /* Nothing. */ break; - } + } // end of switch #ifdef HAVE_REPLICATION - if (last_gtid_valid && - ((last_gtid_standalone && !ev->is_part_of_group(typ)) || - (!last_gtid_standalone && - (typ == XID_EVENT || - (LOG_EVENT_IS_QUERY(typ) && - (((Query_log_event *)ev)->is_commit() || - ((Query_log_event *)ev)->is_rollback())))))) + if (ctx.last_gtid_valid && + ((ctx.last_gtid_standalone && !ev->is_part_of_group(typ)) || + (!ctx.last_gtid_standalone && + (typ == XID_EVENT || ctx.last_gtid_no2pc)))) { - if (rpl_global_gtid_binlog_state.update_nolock(&last_gtid, false)) + DBUG_ASSERT(round == 1 || ctx.do_truncate); + DBUG_ASSERT(!ctx.last_gtid_no2pc || + (ctx.last_gtid_standalone || + (LOG_EVENT_IS_QUERY(typ) && + (((Query_log_event *)ev)->is_commit() || + ((Query_log_event *)ev)->is_rollback())))); + + if (rpl_global_gtid_binlog_state.update_nolock(&ctx.last_gtid, false)) goto err2; - last_gtid_valid= false; + ctx.last_gtid_valid= false; } + ctx.prev_event_pos= ev->log_pos; #endif - - delete ev; + if (typ != FORMAT_DESCRIPTION_EVENT) + delete ev; ev= NULL; - } + } // end of while if (!do_xa) break; @@ -10193,11 +10900,10 @@ int TC_LOG_BINLOG::recover(LOG_INFO *linfo, const char *last_log_name, written by an older version of MariaDB (or MySQL) - these always have an (implicit) binlog checkpoint event at the start of the last binlog file. */ - if (first_round) + if (round == 1) { if (!binlog_checkpoint_found) break; - first_round= false; DBUG_EXECUTE_IF("xa_recover_expect_master_bin_000004", if (0 != strcmp("./master-bin.000004", binlog_checkpoint_name) && 0 != strcmp(".\\master-bin.000004", binlog_checkpoint_name)) @@ -10217,35 +10923,43 @@ int TC_LOG_BINLOG::recover(LOG_INFO *linfo, const char *last_log_name, file= -1; } - if (!strcmp(linfo->log_file_name, last_log_name)) - break; // No more files to do +#ifdef HAVE_REPLICATION + int rc= ctx.next_binlog_or_round(round, last_log_name, + binlog_checkpoint_name, linfo, this); + if (rc == -1) + goto err2; + else if (rc == 1) + break; // all rounds done +#else + if (!strcmp(linfo->log_file_name, last_log_name)) + break; // No more files to do +#endif + if ((file= open_binlog(&log, linfo->log_file_name, &errmsg)) < 0) { sql_print_error("%s", errmsg); goto err2; } - /* - We do not need to read the Format_description_log_event of other binlog - files. It is not possible for a binlog checkpoint to span multiple - binlog files written by different versions of the server. So we can use - the first one read for reading from all binlog files. - */ - if (find_next_log(linfo, 1)) - { - sql_print_error("Error reading binlog files during recovery. Aborting."); - goto err2; - } fdle->reset_crypto(); - } + } // end of for if (do_xa) { - if (ha_recover(&xids)) - goto err2; - + if (binlog_checkpoint_found) + { +#ifndef HAVE_REPLICATION + if (ha_recover_complete(&xids)) +#else + if (ctx.complete(this, xids)) +#endif + goto err2; + } free_root(&mem_root, MYF(0)); my_hash_free(&xids); } + if (fdle != fdle_arg) + delete fdle; + return 0; err2: @@ -10265,6 +10979,9 @@ err1: "(if it's, for example, out of memory error) and restart, " "or delete (or rename) binary log and start mysqld with " "--tc-heuristic-recover={commit|rollback}"); + if (fdle != fdle_arg) + delete fdle; + return 1; } diff --git a/sql/log.h b/sql/log.h index b3b598e08ea..4a9eb86e640 100644 --- a/sql/log.h +++ b/sql/log.h @@ -860,6 +860,10 @@ public: int purge_first_log(Relay_log_info* rli, bool included); int set_purge_index_file_name(const char *base_file_name); int open_purge_index_file(bool destroy); + bool truncate_and_remove_binlogs(const char *truncate_file, + my_off_t truncate_pos, + rpl_gtid *gtid, + enum_binlog_checksum_alg cs_alg); bool is_inited_purge_index_file(); int close_purge_index_file(); int clean_purge_index_file(); @@ -896,7 +900,7 @@ public: void set_status_variables(THD *thd); bool is_xidlist_idle(); bool write_gtid_event(THD *thd, bool standalone, bool is_transactional, - uint64 commit_id); + uint64 commit_id, bool has_xid= false); int read_state_from_file(); int write_state_to_file(); int get_most_recent_gtid_list(rpl_gtid **list, uint32 *size); diff --git a/sql/log_event.cc b/sql/log_event.cc index e344fc8894f..a593db10d16 100644 --- a/sql/log_event.cc +++ b/sql/log_event.cc @@ -55,9 +55,13 @@ #include "sql_digest.h" #include "zlib.h" #include "my_atomic.h" +#include <algorithm> #define my_b_write_string(A, B) my_b_write((A), (uchar*)(B), (uint) (sizeof(B) - 1)) +using std::max; +using std::min; + /** BINLOG_CHECKSUM variable. */ @@ -1448,7 +1452,7 @@ Log_event::do_shall_skip(rpl_group_info *rgi) rli->replicate_same_server_id, rli->slave_skip_counter)); if ((server_id == global_system_variables.server_id && - !rli->replicate_same_server_id) || + !(rli->replicate_same_server_id || (flags & LOG_EVENT_ACCEPT_OWN_F))) || (rli->slave_skip_counter == 1 && rli->is_in_group()) || (flags & LOG_EVENT_SKIP_REPLICATION_F && opt_replicate_events_marked_for_skip != RPL_SKIP_REPLICATE)) @@ -7904,10 +7908,12 @@ bool Binlog_checkpoint_log_event::write() Gtid_log_event::Gtid_log_event(const char *buf, uint event_len, const Format_description_log_event *description_event) - : Log_event(buf, description_event), seq_no(0), commit_id(0) + : Log_event(buf, description_event), seq_no(0), commit_id(0), + flags_extra(0), extra_engines(0) { uint8 header_size= description_event->common_header_len; uint8 post_header_len= description_event->post_header_len[GTID_EVENT-1]; + const char *buf_0= buf; if (event_len < (uint) header_size + (uint) post_header_len || post_header_len < GTID_HEADER_LEN) return; @@ -7918,6 +7924,7 @@ Gtid_log_event::Gtid_log_event(const char *buf, uint event_len, domain_id= uint4korr(buf); buf+= 4; flags2= *buf; + ++buf; if (flags2 & FL_GROUP_COMMIT_ID) { if (event_len < (uint)header_size + GTID_HEADER_LEN + 2) @@ -7925,9 +7932,31 @@ Gtid_log_event::Gtid_log_event(const char *buf, uint event_len, seq_no= 0; // So is_valid() returns false return; } - ++buf; commit_id= uint8korr(buf); + buf+= 8; + } + /* the extra flags check and actions */ + if (static_cast<uint>(buf - buf_0) < event_len) + { + flags_extra= *buf; + ++buf; + /* extra flags presence is identifed by non-zero byte value at this point */ + if (flags_extra & FL_EXTRA_MULTI_ENGINE) + { + extra_engines= uint4korr(buf); + buf += 4; + + DBUG_ASSERT(extra_engines > 0); + } } + /* + the strict '<' part of the assert corresponds to extra zero-padded + trailing bytes, + */ + DBUG_ASSERT(static_cast<uint>(buf - buf_0) <= event_len); + /* and the last of them is tested. */ + DBUG_ASSERT(static_cast<uint>(buf - buf_0) == event_len || + buf_0[event_len - 1] == 0); } @@ -7936,13 +7965,16 @@ Gtid_log_event::Gtid_log_event(const char *buf, uint event_len, Gtid_log_event::Gtid_log_event(THD *thd_arg, uint64 seq_no_arg, uint32 domain_id_arg, bool standalone, uint16 flags_arg, bool is_transactional, - uint64 commit_id_arg) + uint64 commit_id_arg, bool has_xid) : Log_event(thd_arg, flags_arg, is_transactional), seq_no(seq_no_arg), commit_id(commit_id_arg), domain_id(domain_id_arg), - flags2((standalone ? FL_STANDALONE : 0) | (commit_id_arg ? FL_GROUP_COMMIT_ID : 0)) + flags2((standalone ? FL_STANDALONE : 0) | + (commit_id_arg ? FL_GROUP_COMMIT_ID : 0)), + flags_extra(0), extra_engines(0) { cache_type= Log_event::EVENT_NO_CACHE; bool is_tmp_table= thd_arg->lex->stmt_accessed_temp_table(); + if (thd_arg->transaction.stmt.trans_did_wait() || thd_arg->transaction.all.trans_did_wait()) flags2|= FL_WAITED; @@ -7958,6 +7990,22 @@ Gtid_log_event::Gtid_log_event(THD *thd_arg, uint64 seq_no_arg, /* Preserve any DDL or WAITED flag in the slave's binlog. */ if (thd_arg->rgi_slave) flags2|= (thd_arg->rgi_slave->gtid_ev_flags2 & (FL_DDL|FL_WAITED)); + /* count non-zero extra recoverable engines; total = extra + 1 */ + if (is_transactional) + { + if (has_xid) + { + extra_engines= + max<uint>(1, ha_count_rw(thd, thd_arg->in_multi_stmt_transaction_mode())) + - 1; + } + else if (unlikely(thd_arg->is_1pc_ro_trans)) + { + extra_engines= UINT_MAX; // neither extra nor base engine + } + if (extra_engines > 0) + flags_extra|= FL_EXTRA_MULTI_ENGINE; + } } @@ -8000,20 +8048,31 @@ Gtid_log_event::peek(const char *event_start, size_t event_len, bool Gtid_log_event::write() { - uchar buf[GTID_HEADER_LEN+2]; - size_t write_len; + uchar buf[GTID_HEADER_LEN+2 + /* flags_extra: */ 1+4]; + size_t write_len= 13; int8store(buf, seq_no); int4store(buf+8, domain_id); buf[12]= flags2; if (flags2 & FL_GROUP_COMMIT_ID) { - int8store(buf+13, commit_id); + int8store(buf + write_len, commit_id); write_len= GTID_HEADER_LEN + 2; } - else + if (flags_extra > 0) + { + buf[write_len]= flags_extra; + write_len++; + } + if (flags_extra & FL_EXTRA_MULTI_ENGINE) + { + int4store(buf + write_len, extra_engines); + write_len += 4; + } + + if (write_len < GTID_HEADER_LEN) { - bzero(buf+13, GTID_HEADER_LEN-13); + bzero(buf+write_len, GTID_HEADER_LEN-write_len); write_len= GTID_HEADER_LEN; } return write_header(write_len) || diff --git a/sql/log_event.h b/sql/log_event.h index 8a342cb5cd3..1588ab85104 100644 --- a/sql/log_event.h +++ b/sql/log_event.h @@ -483,6 +483,16 @@ class String; #define LOG_EVENT_IGNORABLE_F 0x80 /** + @def LOG_EVENT_ACCEPT_OWN_F + + Flag sets by the gtid-mode connected semisync slave for + the same server_id ("own") events which the slave must not have + in its state. Typically such events were never committed by + their originator (this server) and discared at its crash recovery +*/ +#define LOG_EVENT_ACCEPT_OWN_F 0x4000 + +/** @def LOG_EVENT_SKIP_REPLICATION_F Flag set by application creating the event (with @@skip_replication); the @@ -3357,6 +3367,12 @@ public: uint64 commit_id; uint32 domain_id; uchar flags2; + uint flags_extra; // more flags area placed after the regular flags2's one + /* + Extra to a "base" engine recoverable engines participating + in the transaction. Zero, when the base engine only is present. + */ + uint extra_engines; /* Flags2. */ @@ -3385,9 +3401,19 @@ public: /* FL_DDL is set for event group containing DDL. */ static const uchar FL_DDL= 32; + /* Flags_extra. */ + + /* + FL_EXTRA_MULTI_ENGINE is set for event group comprising a transaction + involving multiple storage engines. No flag and extra data are added + to the event when the transaction involves only one engine. + */ + static const uchar FL_EXTRA_MULTI_ENGINE= 1; + #ifdef MYSQL_SERVER Gtid_log_event(THD *thd_arg, uint64 seq_no, uint32 domain_id, bool standalone, - uint16 flags, bool is_transactional, uint64 commit_id); + uint16 flags, bool is_transactional, uint64 commit_id, + bool has_xid= false); #ifdef HAVE_REPLICATION void pack_info(Protocol *protocol); virtual int do_apply_event(rpl_group_info *rgi); diff --git a/sql/slave.cc b/sql/slave.cc index 28e08e32346..9b3f73e5341 100644 --- a/sql/slave.cc +++ b/sql/slave.cc @@ -6186,6 +6186,7 @@ static int queue_event(Master_info* mi,const char* buf, ulong event_len) char new_buf_arr[4096]; bool is_malloc = false; bool is_rows_event= false; + bool semisync_recovery= false; /* FD_q must have been prepared for the first R_a event inside get_master_version_and_clock() @@ -6944,7 +6945,9 @@ static int queue_event(Master_info* mi,const char* buf, ulong event_len) } else if ((s_id == global_system_variables.server_id && - !mi->rli.replicate_same_server_id) || + (!mi->rli.replicate_same_server_id && + !(semisync_recovery= (rpl_semi_sync_slave_enabled && + mi->using_gtid != Master_info::USE_GTID_NO)))) || event_that_should_be_ignored(buf) || /* the following conjunction deals with IGNORE_SERVER_IDS, if set @@ -7004,6 +7007,19 @@ static int queue_event(Master_info* mi,const char* buf, ulong event_len) } else { + if (semisync_recovery) + { + int2store(const_cast<char*>(buf + FLAGS_OFFSET), + uint2korr(buf + FLAGS_OFFSET) | LOG_EVENT_ACCEPT_OWN_F); + if (checksum_alg != BINLOG_CHECKSUM_ALG_OFF) + { + ha_checksum crc= 0; + + crc= my_checksum(crc, (const uchar *) buf, + event_len - BINLOG_CHECKSUM_LEN); + int4store(&buf[event_len - BINLOG_CHECKSUM_LEN], crc); + } + } if (likely(!rli->relay_log.write_event_buffer((uchar*)buf, event_len))) { mi->master_log_pos+= inc_pos; diff --git a/sql/sql_class.cc b/sql/sql_class.cc index 174716c1c3e..51d58fd1666 100644 --- a/sql/sql_class.cc +++ b/sql/sql_class.cc @@ -643,6 +643,7 @@ THD::THD(my_thread_id id, bool is_wsrep_applier) m_stmt_da(&main_da), tdc_hash_pins(0), xid_hash_pins(0), + is_1pc_ro_trans(false), m_tmp_tables_locked(false) #ifdef WITH_WSREP , diff --git a/sql/sql_class.h b/sql/sql_class.h index 140394fefc1..87fa88d4c89 100644 --- a/sql/sql_class.h +++ b/sql/sql_class.h @@ -4632,7 +4632,7 @@ public: LF_PINS *tdc_hash_pins; LF_PINS *xid_hash_pins; bool fix_xid_hash_pins(); - + bool is_1pc_ro_trans; /* Members related to temporary tables. */ public: /* Opened table states. */ diff --git a/sql/transaction.cc b/sql/transaction.cc index 7f94d98179e..6887c0cdde8 100644 --- a/sql/transaction.cc +++ b/sql/transaction.cc @@ -971,7 +971,15 @@ bool trans_xa_commit(THD *thd) { DEBUG_SYNC(thd, "trans_xa_commit_after_acquire_commit_lock"); + Ha_trx_info *ha_info= thd->transaction.all.ha_list; + uint rw_ha_count= ha_check_and_coalesce_trx_read_only(thd, ha_info, true); + /* rw_trans is TRUE when we in a transaction changing data */ + bool rw_trans= + rw_ha_count > (thd->is_current_stmt_binlog_disabled()?0U:1U); + + thd->is_1pc_ro_trans= !rw_trans; res= MY_TEST(ha_commit_one_phase(thd, 1)); + thd->is_1pc_ro_trans= false; if (res) my_error(ER_XAER_RMERR, MYF(0)); } |