diff options
author | Sujatha <sujatha.sivakumar@mariadb.com> | 2020-04-09 20:45:45 +0530 |
---|---|---|
committer | Andrei Elkin <andrei.elkin@mariadb.com> | 2020-10-23 18:04:31 +0300 |
commit | bc00529d14aeda845d8129cb924efe4db2c11228 (patch) | |
tree | 3cfd84a4f79d8fa32887713b947493e48b0bbbc0 | |
parent | 72cb20820b4f48fda0ae5e0ead4783d76dde3ec0 (diff) | |
download | mariadb-git-bb-10.1-andrei.tar.gz |
MDEV-21117: --tc-heuristic-recover=rollback is not replication safebb-10.1-andrei
Problem:
=======
When run after master server crash --tc-heuristic-recover=rollback produces
inconsistent server state with binlog still containing transactions that were
rolled back by the option. Such way recovered server may not be used for
replication.
Fix:
===
A new --tc-heuristic-recover=BINLOG_TRUNCATE value is added to the
server option. It works as ROLLBACK for engines but makes its best to
synchronize binlog to truncate it to the last committed transaction.
The BINLOG_TRUNCATE algorithm starts scanning possibly a sequences of
binlog files starting from the binlog checkpoint file to find a
truncate position candidate. The candidate truncate position
corresponds to the beginning of the first GTID event whose transaction
is not found to have been committed. There must be no committed
transactions beyond this position *but* non-transactional events are
possible. This is also checked to warn when the case.
Rollback and possibly completion of partial commits (in the multi-engine case)
is done regardless, as well as the binlog GTID state is adjusted
accordingly. Also a STOP binlog event is written at the end of
truncated binary log.
-rw-r--r-- | mysql-test/r/mysqld--help.result | 2 | ||||
-rw-r--r-- | mysql-test/suite/binlog/r/binlog_heuristic_rollback_active_log.result | 18 | ||||
-rw-r--r-- | mysql-test/suite/binlog/r/binlog_truncate_multi_log.result | 33 | ||||
-rw-r--r-- | mysql-test/suite/binlog/r/binlog_truncate_multi_log_unsafe.result | 30 | ||||
-rw-r--r-- | mysql-test/suite/binlog/r/binlog_truncate_retry_success.result | 29 | ||||
-rw-r--r-- | mysql-test/suite/binlog/t/binlog_heuristic_rollback_active_log.test | 75 | ||||
-rw-r--r-- | mysql-test/suite/binlog/t/binlog_truncate_multi_log.test | 87 | ||||
-rw-r--r-- | mysql-test/suite/binlog/t/binlog_truncate_multi_log_unsafe.test | 106 | ||||
-rw-r--r-- | mysql-test/suite/binlog/t/binlog_truncate_retry_success.test | 105 | ||||
-rw-r--r-- | mysql-test/suite/rpl/r/rpl_heuristic_fail_over.result | 53 | ||||
-rw-r--r-- | mysql-test/suite/rpl/t/rpl_heuristic_fail_over.test | 160 | ||||
-rw-r--r-- | sql/handler.cc | 143 | ||||
-rw-r--r-- | sql/handler.h | 11 | ||||
-rw-r--r-- | sql/log.cc | 592 | ||||
-rw-r--r-- | sql/log.h | 7 | ||||
-rw-r--r-- | sql/log_event.cc | 49 | ||||
-rw-r--r-- | sql/log_event.h | 14 | ||||
-rw-r--r-- | sql/mysqld.cc | 2 | ||||
-rw-r--r-- | sql/sql_class.h | 1 |
19 files changed, 1491 insertions, 26 deletions
diff --git a/mysql-test/r/mysqld--help.result b/mysql-test/r/mysqld--help.result index dce1578adee..8619beaae77 100644 --- a/mysql-test/r/mysqld--help.result +++ b/mysql-test/r/mysqld--help.result @@ -1089,7 +1089,7 @@ The following specify which files/extra groups are read (specified before remain The number of cached open tables --tc-heuristic-recover=name Decision to use in heuristic recover process. One of: OFF, - COMMIT, ROLLBACK + COMMIT, ROLLBACK, BINLOG_TRUNCATE --thread-cache-size=# How many threads we should keep in a cache for reuse --thread-pool-idle-timeout=# diff --git a/mysql-test/suite/binlog/r/binlog_heuristic_rollback_active_log.result b/mysql-test/suite/binlog/r/binlog_heuristic_rollback_active_log.result new file mode 100644 index 00000000000..eff95d05aac --- /dev/null +++ b/mysql-test/suite/binlog/r/binlog_heuristic_rollback_active_log.result @@ -0,0 +1,18 @@ +call mtr.add_suppression("Can't init tc log"); +call mtr.add_suppression("Aborting"); +RESET MASTER; +CREATE TABLE t ( f INT ) ENGINE=INNODB; +INSERT INTO t VALUES (10); +SET DEBUG_SYNC= "commit_before_update_end_pos SIGNAL con1_ready WAIT_FOR con1_go"; +INSERT INTO t VALUES (20); +SET DEBUG_SYNC= "now WAIT_FOR con1_ready"; +# Kill the server +"One row should be present in table 't'" +SELECT COUNT(*) FROM t; +COUNT(*) +1 +"gtid_binlog_state should be 0-1-2 +SELECT @@GLOBAL.gtid_binlog_state; +@@GLOBAL.gtid_binlog_state +0-1-2 +DROP TABLE t; diff --git a/mysql-test/suite/binlog/r/binlog_truncate_multi_log.result b/mysql-test/suite/binlog/r/binlog_truncate_multi_log.result new file mode 100644 index 00000000000..dd0939788d4 --- /dev/null +++ b/mysql-test/suite/binlog/r/binlog_truncate_multi_log.result @@ -0,0 +1,33 @@ +SET @old_max_binlog_size= @@GLOBAL.max_binlog_size; +SET GLOBAL max_binlog_size= 4096; +call mtr.add_suppression("Can't init tc log"); +call mtr.add_suppression("Aborting"); +RESET MASTER; +CREATE TABLE t1 (a INT PRIMARY KEY, b MEDIUMTEXT) ENGINE=Innodb; +FLUSH LOGS; +"List of binary logs before rotation" +show binary logs; +Log_name File_size +master-bin.000001 # +master-bin.000002 # +SET DEBUG_SYNC= "commit_after_release_LOCK_log SIGNAL con1_ready WAIT_FOR con1_go"; +INSERT INTO t1 VALUES (2, REPEAT("x", 4100)); +SET DEBUG_SYNC= "now WAIT_FOR con1_ready"; +"List of binary logs after rotation" +show binary logs; +Log_name File_size +master-bin.000001 # +master-bin.000002 # +master-bin.000003 # +# Kill the server +"Zero rows should be present in table" +SELECT COUNT(*) FROM t1; +COUNT(*) +0 +SELECT @@GLOBAL.gtid_current_pos; +@@GLOBAL.gtid_current_pos +0-1-1 +DROP TABLE t1; +SELECT @@GLOBAL.gtid_binlog_state; +@@GLOBAL.gtid_binlog_state +0-1-2 diff --git a/mysql-test/suite/binlog/r/binlog_truncate_multi_log_unsafe.result b/mysql-test/suite/binlog/r/binlog_truncate_multi_log_unsafe.result new file mode 100644 index 00000000000..82034a28668 --- /dev/null +++ b/mysql-test/suite/binlog/r/binlog_truncate_multi_log_unsafe.result @@ -0,0 +1,30 @@ +SET @old_max_binlog_size= @@global.max_binlog_size; +SET GLOBAL max_binlog_size= 4096; +call mtr.add_suppression("Table '.*tm' is marked as crashed and should be repaired"); +call mtr.add_suppression("Got an error from unknown thread"); +call mtr.add_suppression("Checking table: '.*tm'"); +call mtr.add_suppression("Recovering table: '.*tm'"); +call mtr.add_suppression("tc-heuristic-recover cannot trim the binary log to"); +call mtr.add_suppression("Can't init tc log"); +call mtr.add_suppression("Aborting"); +RESET MASTER; +CREATE TABLE ti (a INT PRIMARY KEY, b MEDIUMTEXT) ENGINE=Innodb; +CREATE TABLE tm (f INT) ENGINE=MYISAM; +SET DEBUG_SYNC= "commit_before_get_LOCK_commit_ordered SIGNAL con1_ready WAIT_FOR con1_go"; +INSERT INTO ti VALUES (2, REPEAT("x", 4100)); +SET DEBUG_SYNC= "now WAIT_FOR con1_ready"; +INSERT INTO tm VALUES (30);; +# Kill the server +FOUND /tc-heuristic-recover cannot trim the binary log/ in mysqld.1.err +"Zero rows should be present in 'ti' table." +SELECT COUNT(*) FROM ti; +COUNT(*) +0 +"One row must be present in 'tm' table." +SELECT COUNT(*) FROM tm; +COUNT(*) +1 +SELECT @@GLOBAL.gtid_current_pos; +@@GLOBAL.gtid_current_pos +0-1-4 +DROP TABLE ti, tm; diff --git a/mysql-test/suite/binlog/r/binlog_truncate_retry_success.result b/mysql-test/suite/binlog/r/binlog_truncate_retry_success.result new file mode 100644 index 00000000000..809ec7bba0c --- /dev/null +++ b/mysql-test/suite/binlog/r/binlog_truncate_retry_success.result @@ -0,0 +1,29 @@ +call mtr.add_suppression("Can't init tc log"); +call mtr.add_suppression("Aborting"); +call mtr.add_suppression("tc-heuristic-recover: Failed to open the binlog"); +call mtr.add_suppression("Heuristic BINLOG_TRUNCATE crash recovery failed. Error:"); +RESET MASTER; +CREATE TABLE t1 (a INT PRIMARY KEY, b MEDIUMTEXT) ENGINE=Innodb; +INSERT INTO t1 VALUES (1,'dummy'); +SET DEBUG_SYNC= "commit_after_release_LOCK_log SIGNAL con1_ready WAIT_FOR con1_go"; +INSERT INTO t1 VALUES (2,'dummy'); +SET DEBUG_SYNC= "now WAIT_FOR con1_ready"; +"List of binary logs" +show binary logs; +Log_name File_size +master-bin.000001 # +# Kill the server +FOUND /tc-heuristic-recover: Truncated binlog File: \.\/master\-bin\.000001 of Size:[0-9]*, to Position */ in mysqld.1.err +"One row should be present in table" +SELECT COUNT(*) FROM t1; +COUNT(*) +1 +"Two gtids should be present 0-1-2, one for CREATE and the other for INSERT" +SELECT @@GLOBAL.gtid_current_pos; +@@GLOBAL.gtid_current_pos +0-1-2 +DROP TABLE t1; +"Three gtids should be present 0-1-3 after table 't1' is dropped." +SELECT @@GLOBAL.gtid_binlog_state; +@@GLOBAL.gtid_binlog_state +0-1-3 diff --git a/mysql-test/suite/binlog/t/binlog_heuristic_rollback_active_log.test b/mysql-test/suite/binlog/t/binlog_heuristic_rollback_active_log.test new file mode 100644 index 00000000000..9d4331fe661 --- /dev/null +++ b/mysql-test/suite/binlog/t/binlog_heuristic_rollback_active_log.test @@ -0,0 +1,75 @@ +# ==== Purpose ==== +# +# Test verifies the truncation of single binary log file. +# +# ==== Implementation ==== +# +# Steps: +# 0 - Create table t1 and insert a row. +# 1 - Insert an another row such that it gets written to binlog but commit +# in engine fails as server crashed at this point. +# 2 - Restart server with --tc-heuristic-recover=BINLOG_TRUNCATE +# 3 - Upon server start 'master-bin.000001' will be truncated to contain +# only the first insert +# +# ==== References ==== +# +# MDEV-21117: --tc-heuristic-recover=rollback is not replication safe + + +--source include/have_innodb.inc +--source include/have_log_bin.inc +--source include/have_debug.inc +--source include/have_binlog_format_statement.inc + +call mtr.add_suppression("Can't init tc log"); +call mtr.add_suppression("Aborting"); + +connect(master,localhost,root,,); +connect(master1,localhost,root,,); + +--connection master +RESET MASTER; +CREATE TABLE t ( f INT ) ENGINE=INNODB; +INSERT INTO t VALUES (10); + +--connection master1 +# Hold insert after write to binlog and before "run_commit_ordered" in engine +SET DEBUG_SYNC= "commit_before_update_end_pos SIGNAL con1_ready WAIT_FOR con1_go"; +send INSERT INTO t VALUES (20); + +--connection master +SET DEBUG_SYNC= "now WAIT_FOR con1_ready"; + +--write_file $MYSQLTEST_VARDIR/tmp/mysqld.1.expect +wait +EOF + +--source include/kill_mysqld.inc +--source include/wait_until_disconnected.inc +# +# Server restart +# +--append_file $MYSQLTEST_VARDIR/tmp/mysqld.1.expect +restart: --tc-heuristic-recover=BINLOG_TRUNCATE +EOF +--source include/wait_until_disconnected.inc + +--append_file $MYSQLTEST_VARDIR/tmp/mysqld.1.expect +restart: +EOF + +connection default; +--enable_reconnect +--source include/wait_until_connected_again.inc + +--connection master +--enable_reconnect +--source include/wait_until_connected_again.inc + +--echo "One row should be present in table 't'" +SELECT COUNT(*) FROM t; + +--echo "gtid_binlog_state should be 0-1-2 +SELECT @@GLOBAL.gtid_binlog_state; +DROP TABLE t; diff --git a/mysql-test/suite/binlog/t/binlog_truncate_multi_log.test b/mysql-test/suite/binlog/t/binlog_truncate_multi_log.test new file mode 100644 index 00000000000..ef7bf8790f3 --- /dev/null +++ b/mysql-test/suite/binlog/t/binlog_truncate_multi_log.test @@ -0,0 +1,87 @@ +# ==== Purpose ==== +# +# Test verifies truncation of multiple binary logs. +# +# ==== Implementation ==== +# +# Steps: +# 0 - Create a table in innodb engine and execute FLUSH LOGS command to +# generate a new binary log. +# 1 - Set max_binlog_size= 4096. Insert a row such that the max_binlog_size +# is reached and binary log gets rotated. +# 2 - Using debug simulation make the server crash at a point where the DML +# transaction is written to binary log but not committed in engine. +# 3 - At the time of crash three binary logs will be there +# master-bin.0000001, master-bin.000002 and master-bin.000003. +# 4 - Restart server with --tc-heuristic-recover=BINLOG_TRUNCATE +# 5 - Since the prepared DML in master-bin.000002 the binary log will be +# truncated and master-bin.000003 will be removed. +# +# ==== References ==== +# +# MDEV-21117: --tc-heuristic-recover=rollback is not replication safe + + +--source include/have_innodb.inc +--source include/have_log_bin.inc +--source include/have_debug.inc +--source include/have_debug_sync.inc +--source include/have_binlog_format_row.inc + +SET @old_max_binlog_size= @@GLOBAL.max_binlog_size; +SET GLOBAL max_binlog_size= 4096; +call mtr.add_suppression("Can't init tc log"); +call mtr.add_suppression("Aborting"); + +RESET MASTER; + +CREATE TABLE t1 (a INT PRIMARY KEY, b MEDIUMTEXT) ENGINE=Innodb; +FLUSH LOGS; +connect(master1,localhost,root,,); +connect(master2,localhost,root,,); + +--connection master1 +# Hold insert after write to binlog and before "run_commit_ordered" in engine. +# Use "commit_after_release_LOCK_log" sync point. This point is reached after +# the binary log end position is updated which actually triggers binlog to be +# rotated. +--echo "List of binary logs before rotation" +--source include/show_binary_logs.inc +SET DEBUG_SYNC= "commit_after_release_LOCK_log SIGNAL con1_ready WAIT_FOR con1_go"; +send INSERT INTO t1 VALUES (2, REPEAT("x", 4100)); + +--connection master2 +SET DEBUG_SYNC= "now WAIT_FOR con1_ready"; +--echo "List of binary logs after rotation" +--source include/show_binary_logs.inc +--write_file $MYSQLTEST_VARDIR/tmp/mysqld.1.expect +wait +EOF + +--source include/kill_mysqld.inc +--source include/wait_until_disconnected.inc + +# +# Server restart +# +--append_file $MYSQLTEST_VARDIR/tmp/mysqld.1.expect +restart: --tc-heuristic-recover=BINLOG_TRUNCATE +EOF +--source include/wait_until_disconnected.inc + +--append_file $MYSQLTEST_VARDIR/tmp/mysqld.1.expect +restart: +EOF + +connection default; +--enable_reconnect +--source include/wait_until_connected_again.inc + +--echo "Zero rows should be present in table" +SELECT COUNT(*) FROM t1; + +SELECT @@GLOBAL.gtid_current_pos; + +DROP TABLE t1; +SELECT @@GLOBAL.gtid_binlog_state; + diff --git a/mysql-test/suite/binlog/t/binlog_truncate_multi_log_unsafe.test b/mysql-test/suite/binlog/t/binlog_truncate_multi_log_unsafe.test new file mode 100644 index 00000000000..e8adf43b89e --- /dev/null +++ b/mysql-test/suite/binlog/t/binlog_truncate_multi_log_unsafe.test @@ -0,0 +1,106 @@ +# ==== Purpose ==== +# +# Test verifies truncation of multiple binary logs will report an error on +# unsafe scenario. +# +# ==== Implementation ==== +# +# Steps: +# 0 - Set max_binlog_size= 4096. Create a table 'ti' using transactional +# storage engine. Do an insert such that the max_binlog_size is reached +# and binary log gets rotated. Hold this thread at a position where +# transaction is written to binary log but not committed in engine. +# 1 - Create a table named 'tm' using non transactional storage engine. +# 2 - Insert a row in 'tm' table. The DML will reach the engine and it is +# also written to binary log. +# 3 - Kill and restart the server with --tc-heuristic-recover=BINLOG_TRUNCATE +# 4 - Check for binary log truncation unsafe message in error log. +# 5 - No rows should be present in 'ti' table. One row should be present in +# 'tm' table. +# +# ==== References ==== +# +# MDEV-21117: --tc-heuristic-recover=rollback is not replication safe + + +--source include/have_innodb.inc +--source include/have_log_bin.inc +--source include/have_debug.inc +--source include/have_debug_sync.inc +--source include/have_binlog_format_row.inc + +SET @old_max_binlog_size= @@global.max_binlog_size; +SET GLOBAL max_binlog_size= 4096; + +call mtr.add_suppression("Table '.*tm' is marked as crashed and should be repaired"); +call mtr.add_suppression("Got an error from unknown thread"); +call mtr.add_suppression("Checking table: '.*tm'"); +call mtr.add_suppression("Recovering table: '.*tm'"); +call mtr.add_suppression("tc-heuristic-recover cannot trim the binary log to"); +call mtr.add_suppression("Can't init tc log"); +call mtr.add_suppression("Aborting"); + +RESET MASTER; + +CREATE TABLE ti (a INT PRIMARY KEY, b MEDIUMTEXT) ENGINE=Innodb; +CREATE TABLE tm (f INT) ENGINE=MYISAM; + +connect(master1,localhost,root,,); +connect(master2,localhost,root,,); + +--connection master1 +# Hold insert after write to binlog and before "run_commit_ordered" in engine +SET DEBUG_SYNC= "commit_before_get_LOCK_commit_ordered SIGNAL con1_ready WAIT_FOR con1_go"; +--send INSERT INTO ti VALUES (2, REPEAT("x", 4100)) + +--connection master2 +SET DEBUG_SYNC= "now WAIT_FOR con1_ready"; +--send INSERT INTO tm VALUES (30); + +--connection default +--write_file $MYSQLTEST_VARDIR/tmp/mysqld.1.expect +wait +EOF + +--source include/kill_mysqld.inc +--source include/wait_until_disconnected.inc + +# +# Server restart +# +--append_file $MYSQLTEST_VARDIR/tmp/mysqld.1.expect +restart: --tc-heuristic-recover=BINLOG_TRUNCATE --debug-dbug=d,simulate_innodb_forget_commit_pos +EOF +--source include/wait_until_disconnected.inc + +--append_file $MYSQLTEST_VARDIR/tmp/mysqld.1.expect +restart: +EOF + +connection default; +--enable_reconnect +--source include/wait_until_connected_again.inc + +# Check error log for correct messages. +let $log_error_= `SELECT @@GLOBAL.log_error`; +if(!$log_error_) +{ + # MySQL Server on windows is started with --console and thus + # does not know the location of its .err log, use default location + let $log_error_ = $MYSQLTEST_VARDIR/log/mysqld.1.err; +} +--let SEARCH_FILE=$log_error_ +--let SEARCH_RANGE=-50000 +--let SEARCH_PATTERN=tc-heuristic-recover cannot trim the binary log +--source include/search_pattern_in_file.inc + +--echo "Zero rows should be present in 'ti' table." +SELECT COUNT(*) FROM ti; +--echo "One row must be present in 'tm' table." +--replace_regex /Table '.*tm/Table 'tm/ +--disable_warnings +SELECT COUNT(*) FROM tm; +--enable_warnings +SELECT @@GLOBAL.gtid_current_pos; + +DROP TABLE ti, tm; diff --git a/mysql-test/suite/binlog/t/binlog_truncate_retry_success.test b/mysql-test/suite/binlog/t/binlog_truncate_retry_success.test new file mode 100644 index 00000000000..4bef1c53c1d --- /dev/null +++ b/mysql-test/suite/binlog/t/binlog_truncate_retry_success.test @@ -0,0 +1,105 @@ +# ==== Purpose ==== +# +# Test verifies that tc-heuristic-recover=BINLOG_TRUNCATE can be retried. +# +# ==== Implementation ==== +# +# Steps: +# 0 - Create a table in Innodb storage engine. Insert a row into the table. +# 1 - Do an another DML into the table, and simulate a crash in the middle +# of DML commit, so that DML is present in binary log but not committed in +# the storage engine. +# 2 - Restart the server using --tc-heuristic-recover=BINLOG_TRUNCATE +# simulating an erroor during the binary log rollback operation. +# Verify appropriate error is reported in the error log. +# 3 - Remove the simulation retry the --tc-heuristic-recver=BINLOG_TRUNCATE. +# Verify that binary log gets truncated as expected. +# 4 - Verify that global gtid state is according to the rolled back +# transaction. +# ==== References ==== +# +# MDEV-21117: --tc-heuristic-recover=rollback is not replication safe + + +--source include/have_innodb.inc +--source include/not_embedded.inc +--source include/have_log_bin.inc +--source include/have_debug.inc +--source include/have_debug_sync.inc +--source include/have_binlog_format_row.inc + +call mtr.add_suppression("Can't init tc log"); +call mtr.add_suppression("Aborting"); +call mtr.add_suppression("tc-heuristic-recover: Failed to open the binlog"); +call mtr.add_suppression("Heuristic BINLOG_TRUNCATE crash recovery failed. Error:"); + +RESET MASTER; + +CREATE TABLE t1 (a INT PRIMARY KEY, b MEDIUMTEXT) ENGINE=Innodb; +INSERT INTO t1 VALUES (1,'dummy'); +connect(master1,localhost,root,,); +connect(master2,localhost,root,,); + +--connection master1 +SET DEBUG_SYNC= "commit_after_release_LOCK_log SIGNAL con1_ready WAIT_FOR con1_go"; +send INSERT INTO t1 VALUES (2,'dummy'); + +--connection master2 +SET DEBUG_SYNC= "now WAIT_FOR con1_ready"; +--echo "List of binary logs" +--source include/show_binary_logs.inc +--write_file $MYSQLTEST_VARDIR/tmp/mysqld.1.expect +wait +EOF + +--source include/kill_mysqld.inc +--source include/wait_until_disconnected.inc + +# +# Server restart +# The purpose of debug_dbug's "d,skip_new_binlog_create" is to avoid binlog +# at the end of heuristic recovery\footnote{When the simulated error occured for a real +# user she would have to manually remove the created binlog file, if retry +# makes sense}. +# +--error 1 +--exec $MYSQLD_LAST_CMD --tc-heuristic-recover=BINLOG_TRUNCATE --debug-dbug=d,fault_injection_opening_binlog,skip_new_binlog_create > $MYSQLTEST_VARDIR/log/mysqld.1.err 2>&1 +--source include/wait_until_disconnected.inc + +--error 1 +--exec $MYSQLD_LAST_CMD --tc-heuristic-recover=BINLOG_TRUNCATE > $MYSQLTEST_VARDIR/log/mysqld.1.err 2>&1 +--source include/wait_until_disconnected.inc + +--append_file $MYSQLTEST_VARDIR/tmp/mysqld.1.expect +restart: +EOF + +connection default; +--enable_reconnect +--source include/wait_until_connected_again.inc + +# Check error log for correct messages. +let $log_error_= `SELECT @@GLOBAL.log_error`; +if(!$log_error_) +{ + # MySQL Server on windows is started with --console and thus + # does not know the location of its .err log, use default location + let $log_error_ = $MYSQLTEST_VARDIR/log/mysqld.1.err; +} +--let SEARCH_FILE=$log_error_ +--let SEARCH_RANGE=-50000 +--let SEARCH_PATTERN=tc-heuristic-recover: Truncated binlog File: \.\/master\-bin\.000001 of Size:[0-9]*, to Position * +--source include/search_pattern_in_file.inc + +--echo "One row should be present in table" +SELECT COUNT(*) FROM t1; + +--echo "Two gtids should be present 0-1-2, one for CREATE and the other for INSERT" +SELECT @@GLOBAL.gtid_current_pos; + +DROP TABLE t1; +--echo "Three gtids should be present 0-1-3 after table 't1' is dropped." +SELECT @@GLOBAL.gtid_binlog_state; + +--disconnect master1 +--disconnect master2 diff --git a/mysql-test/suite/rpl/r/rpl_heuristic_fail_over.result b/mysql-test/suite/rpl/r/rpl_heuristic_fail_over.result new file mode 100644 index 00000000000..02fc47def41 --- /dev/null +++ b/mysql-test/suite/rpl/r/rpl_heuristic_fail_over.result @@ -0,0 +1,53 @@ +include/rpl_init.inc [topology=1->2] +include/stop_slave.inc +set global rpl_semi_sync_slave_enabled = 1; +CHANGE MASTER TO master_use_gtid= current_pos; +include/start_slave.inc +ALTER TABLE mysql.gtid_slave_pos ENGINE=InnoDB; +set global rpl_semi_sync_master_enabled = 1; +set global rpl_semi_sync_master_wait_point=AFTER_SYNC; +SET @old_max_binlog_size= @@global.max_binlog_size; +SET GLOBAL max_binlog_size= 4096; +call mtr.add_suppression("Can't init tc log"); +call mtr.add_suppression("Aborting"); +CREATE TABLE t1 (a INT PRIMARY KEY, b MEDIUMTEXT) ENGINE=Innodb; +INSERT INTO t1 VALUES (1, 'dummy1'); +SET DEBUG_SYNC= "commit_before_update_end_pos SIGNAL con1_ready WAIT_FOR con1_go"; +INSERT INTO t1 VALUES (2, REPEAT("x", 4100)); +SET DEBUG_SYNC= "now WAIT_FOR con1_ready"; +# Kill the server +include/stop_slave.inc +SELECT @@GLOBAL.gtid_current_pos; +@@GLOBAL.gtid_current_pos +0-1-5 +FOUND /tc-heuristic-recover: Truncated binlog File: \.\/master\-bin\.000001 of Size:[0-9]*, to Position */ in mysqld.1.err +set global rpl_semi_sync_master_enabled = 1; +set global rpl_semi_sync_master_wait_point=AFTER_SYNC; +CHANGE MASTER TO master_host='127.0.0.1', master_port=SERVER_MYPORT_2, master_user='root', master_use_gtid=CURRENT_POS; +set global rpl_semi_sync_slave_enabled = 1; +include/start_slave.inc +INSERT INTO t1 VALUES (3, 'dummy3'); +SELECT COUNT(*) FROM t1; +COUNT(*) +2 +SHOW VARIABLES LIKE 'gtid_current_pos'; +Variable_name Value +gtid_current_pos 0-2-6 +SHOW VARIABLES LIKE 'gtid_current_pos'; +Variable_name Value +gtid_current_pos 0-2-6 +DROP TABLE t1; +set global rpl_semi_sync_master_enabled = 0; +set global rpl_semi_sync_slave_enabled = 0; +set global rpl_semi_sync_master_wait_point=default; +set global rpl_semi_sync_master_enabled = 0; +set global rpl_semi_sync_slave_enabled = 0; +set global rpl_semi_sync_master_wait_point=default; +include/stop_slave.inc +RESET MASTER; +RESET SLAVE; +RESET MASTER; +RESET SLAVE; +CHANGE MASTER TO master_host='127.0.0.1', master_port=SERVER_MYPORT_1, master_user='root', master_use_gtid=no; +include/start_slave.inc +include/rpl_end.inc diff --git a/mysql-test/suite/rpl/t/rpl_heuristic_fail_over.test b/mysql-test/suite/rpl/t/rpl_heuristic_fail_over.test new file mode 100644 index 00000000000..f1db2e6a89a --- /dev/null +++ b/mysql-test/suite/rpl/t/rpl_heuristic_fail_over.test @@ -0,0 +1,160 @@ +# ==== Purpose ==== +# +# Test verifies replication failover scenario. +# +# ==== Implementation ==== +# +# Steps: +# 0 - Have two servers with id's 1 and 2. Enable semi-sync based +# replication. Have semi sync master wait point as 'after_sync'. +# 1 - Create a table and insert a row. While inserting second row simulate +# a server crash at once the transaction is written to binlog, flushed +# and synced but the binlog position is not updated. +# 2 - Restart the server using --tc-heuristic-recover=BINLOG_TRUNCATE +# 3 - Post restart switch the crashed master to slave. Execute CHANGE MASTER +# TO command to connect to server id 2. +# 4 - Slave should be able to connect to master. +# 5 - Execute some DML on new master with server id 2. Ensure that it gets +# replicated to server id 1. +# 6 - Verify the "gtid_current_pos" for correctness. +# 7 - Clean up +# +# ==== References ==== +# +# MDEV-21117: --tc-heuristic-recover=rollback is not replication safe + + +--source include/have_semisync.inc +--source include/have_innodb.inc +--source include/have_log_bin.inc +--source include/have_debug.inc +--source include/have_debug_sync.inc +--source include/have_binlog_format_row.inc +--let $rpl_topology=1->2 +--source include/rpl_init.inc + +--connection server_2 +--source include/stop_slave.inc +set global rpl_semi_sync_slave_enabled = 1; +CHANGE MASTER TO master_use_gtid= current_pos; +--source include/start_slave.inc + + +--connection server_1 +ALTER TABLE mysql.gtid_slave_pos ENGINE=InnoDB; +set global rpl_semi_sync_master_enabled = 1; +set global rpl_semi_sync_master_wait_point=AFTER_SYNC; +SET @old_max_binlog_size= @@global.max_binlog_size; +SET GLOBAL max_binlog_size= 4096; + +call mtr.add_suppression("Can't init tc log"); +call mtr.add_suppression("Aborting"); + +CREATE TABLE t1 (a INT PRIMARY KEY, b MEDIUMTEXT) ENGINE=Innodb; +INSERT INTO t1 VALUES (1, 'dummy1'); +--save_master_pos + +--connection server_2 +--sync_with_master + +--connection server_1 +connect(master1,localhost,root,,); +connect(master2,localhost,root,,); + +--connection master1 +# Hold insert after write to binlog and before "run_commit_ordered" in engine +SET DEBUG_SYNC= "commit_before_update_end_pos SIGNAL con1_ready WAIT_FOR con1_go"; +send INSERT INTO t1 VALUES (2, REPEAT("x", 4100)); + +--connection master2 +SET DEBUG_SYNC= "now WAIT_FOR con1_ready"; +--write_file $MYSQLTEST_VARDIR/tmp/mysqld.1.expect +wait +EOF + +--source include/kill_mysqld.inc +--source include/wait_until_disconnected.inc + +--connection server_2 +--error 2003 +--source include/stop_slave.inc +SELECT @@GLOBAL.gtid_current_pos; + +--connection server_1 +# +# Server restart +# +--append_file $MYSQLTEST_VARDIR/tmp/mysqld.1.expect +restart: --tc-heuristic-recover=BINLOG_TRUNCATE +EOF +--source include/wait_until_disconnected.inc + +--append_file $MYSQLTEST_VARDIR/tmp/mysqld.1.expect +restart: +EOF + +connection default; +--enable_reconnect +--source include/wait_until_connected_again.inc + +--connection server_1 +--enable_reconnect +--source include/wait_until_connected_again.inc + +# Check error log for correct messages. +let $log_error_= `SELECT @@GLOBAL.log_error`; +if(!$log_error_) +{ + # MySQL Server on windows is started with --console and thus + # does not know the location of its .err log, use default location + let $log_error_ = $MYSQLTEST_VARDIR/log/mysqld.1.err; +} +--let SEARCH_FILE=$log_error_ +--let SEARCH_RANGE=-50000 +--let SEARCH_PATTERN=tc-heuristic-recover: Truncated binlog File: \.\/master\-bin\.000001 of Size:[0-9]*, to Position * +--source include/search_pattern_in_file.inc + +--connection server_2 +set global rpl_semi_sync_master_enabled = 1; +set global rpl_semi_sync_master_wait_point=AFTER_SYNC; + +--connection server_1 +--replace_result $SERVER_MYPORT_2 SERVER_MYPORT_2 +eval CHANGE MASTER TO master_host='127.0.0.1', master_port=$SERVER_MYPORT_2, master_user='root', master_use_gtid=CURRENT_POS; +set global rpl_semi_sync_slave_enabled = 1; +--source include/start_slave.inc + +--connection server_2 +INSERT INTO t1 VALUES (3, 'dummy3'); +--save_master_pos + +--connection server_1 +--sync_with_master +SELECT COUNT(*) FROM t1; +SHOW VARIABLES LIKE 'gtid_current_pos'; + +--connection server_2 +SHOW VARIABLES LIKE 'gtid_current_pos'; +DROP TABLE t1; +set global rpl_semi_sync_master_enabled = 0; +set global rpl_semi_sync_slave_enabled = 0; +set global rpl_semi_sync_master_wait_point=default; +--save_master_pos + +--connection server_1 +--sync_with_master +set global rpl_semi_sync_master_enabled = 0; +set global rpl_semi_sync_slave_enabled = 0; +set global rpl_semi_sync_master_wait_point=default; +--source include/stop_slave.inc +RESET MASTER; +RESET SLAVE; + +--connection server_2 +RESET MASTER; +RESET SLAVE; +--replace_result $SERVER_MYPORT_1 SERVER_MYPORT_1 +eval CHANGE MASTER TO master_host='127.0.0.1', master_port=$SERVER_MYPORT_1, master_user='root', master_use_gtid=no; +--source include/start_slave.inc + +--source include/rpl_end.inc diff --git a/sql/handler.cc b/sql/handler.cc index 76a187f4312..42cd048afc8 100644 --- a/sql/handler.cc +++ b/sql/handler.cc @@ -1249,6 +1249,29 @@ int ha_prepare(THD *thd) DBUG_RETURN(error); } +/* + Returns counted number of + read-write recoverable transaction participants optionally limited to two. + Also optionally returns the last found rw ha_info through the 2nd argument. +*/ +uint ha_count_rw_all(THD *thd, Ha_trx_info **ptr_ha_info, bool count_through) +{ + unsigned rw_ha_count= 0; + + for (Ha_trx_info * ha_info= thd->transaction.all.ha_list; ha_info; + ha_info= ha_info->next()) + { + if (ha_info->is_trx_read_write() && ha_info->ht()->recover) + { + if (ptr_ha_info) + *ptr_ha_info= ha_info; + if (++rw_ha_count > 1 && !count_through) + break; + } + } + return rw_ha_count; +} + /** Check if we can skip the two-phase commit. @@ -1866,7 +1889,7 @@ static char* xid_to_str(char *buf, XID *xid) recover() step of xa. @note - there are three modes of operation: + there are four modes of operation: - automatic recover after a crash in this case commit_list != 0, tc_heuristic_recover==0 all xids from commit_list are committed, others are rolled back @@ -1877,6 +1900,9 @@ static char* xid_to_str(char *buf, XID *xid) - no recovery (MySQL did not detect a crash) in this case commit_list==0, tc_heuristic_recover == 0 there should be no prepared transactions in this case. + - recovery to truncated binlog to the last committed transaction + in any engine. Other prepared following binlog order transactions are + rolled back. */ struct xarecover_st { @@ -1884,8 +1910,95 @@ struct xarecover_st XID *list; HASH *commit_list; bool dry_run; + MEM_ROOT *mem_root; + bool error; }; +#ifdef HAVE_REPLICATION +/* + Inserts a new hash member. + + returns a successfully created and inserted @c xid_recovery_member + into hash @c hash_arg, + or NULL. +*/ +static xid_recovery_member* +xid_member_insert(HASH *hash_arg, my_xid xid_arg, MEM_ROOT *ptr_mem_root) +{ + xid_recovery_member *member= (xid_recovery_member*) + alloc_root(ptr_mem_root, sizeof(xid_recovery_member)); + if (!member) + return NULL; + + member->xid= xid_arg; + member->in_engine_prepare= 1; + return my_hash_insert(hash_arg, (uchar*) member) ? NULL : member; +} + +/* + Inserts a new or updates an existing hash member. + + returns false on success, + true otherwise. +*/ +static bool xid_member_replace(HASH *hash_arg, my_xid xid_arg, + MEM_ROOT *ptr_mem_root) +{ + /* + Search if XID is already present in recovery_list. If found + and the state is 'XA_PREPRAED' mark it as XA_COMPLETE. + Effectively, there won't be XA-prepare event group replay. + */ + xid_recovery_member* member; + if ((member= (xid_recovery_member *) + my_hash_search(hash_arg, (uchar *)& xid_arg, sizeof(xid_arg)))) + member->in_engine_prepare++; + else + member= xid_member_insert(hash_arg, xid_arg, ptr_mem_root); + + return member == NULL; +} + +/* + Hash iterate function to complete with commit or rollback as + decided at binlog scanning. +*/ +static my_bool xarecover_do_commit_or_rollback(void *member_arg, + void *hton_arg) +{ + xid_recovery_member *member= (xid_recovery_member*) member_arg; + handlerton *hton= (handlerton*) hton_arg; + xid_t x; + my_bool rc; + + x.set(member->xid); + rc= member->in_engine_prepare > 0 ? + hton->rollback_by_xid(hton, &x) : hton->commit_by_xid(hton, &x); + + return rc; +} + +static my_bool xarecover_binlog_truncate_handlerton(THD *unused, + plugin_ref plugin, + void *arg) +{ + handlerton *hton= plugin_hton(plugin); + + if (hton->state == SHOW_OPTION_YES && hton->recover) + { + my_hash_iterate((HASH*) arg, xarecover_do_commit_or_rollback, hton); + } + + return FALSE; +} + +void ha_recover_binlog_truncate_complete(HASH *commit_list) +{ + plugin_foreach(NULL, xarecover_binlog_truncate_handlerton, + MYSQL_STORAGE_ENGINE_PLUGIN, commit_list); +} +#endif + static my_bool xarecover_handlerton(THD *unused, plugin_ref plugin, void *arg) { @@ -1893,13 +2006,16 @@ static my_bool xarecover_handlerton(THD *unused, plugin_ref plugin, struct xarecover_st *info= (struct xarecover_st *) arg; int got; + if (info->error) + return TRUE; + if (hton->state == SHOW_OPTION_YES && hton->recover) { while ((got= hton->recover(hton, info->list, info->len)) > 0 ) { sql_print_information("Found %d prepared transaction(s) in %s", got, hton_name(hton)->str); - for (int i=0; i < got; i ++) + for (int i=0; i < got && !info->error; i ++) { my_xid x= WSREP_ON && wsrep_is_wsrep_xid(&info->list[i]) ? wsrep_xid_seqno(info->list[i]) : @@ -1936,7 +2052,7 @@ static my_bool xarecover_handlerton(THD *unused, plugin_ref plugin, } #endif } - else + else if (tc_heuristic_recover != TC_RECOVER_BINLOG_TRUNCATE) { #ifndef DBUG_OFF int rc= @@ -1951,6 +2067,17 @@ static my_bool xarecover_handlerton(THD *unused, plugin_ref plugin, } #endif } +#ifdef HAVE_REPLICATION + else + { + if (xid_member_replace(info->commit_list, x, info->mem_root)) + { + info->error= true; + sql_print_error("Error in memory allocation at xarecover_handlerton"); + break; + } + } +#endif } if (got < info->len) break; @@ -1959,7 +2086,7 @@ static my_bool xarecover_handlerton(THD *unused, plugin_ref plugin, return FALSE; } -int ha_recover(HASH *commit_list) +int ha_recover(HASH *commit_list, MEM_ROOT *arg_mem_root) { struct xarecover_st info; DBUG_ENTER("ha_recover"); @@ -1967,9 +2094,12 @@ int ha_recover(HASH *commit_list) info.commit_list= commit_list; info.dry_run= (info.commit_list==0 && tc_heuristic_recover==0); info.list= NULL; + info.mem_root= arg_mem_root; + info.error= false; /* commit_list and tc_heuristic_recover cannot be set both */ - DBUG_ASSERT(info.commit_list==0 || tc_heuristic_recover==0); + DBUG_ASSERT(info.commit_list==0 || tc_heuristic_recover==0 || + tc_heuristic_recover > 2); /* if either is set, total_ha_2pc must be set too */ DBUG_ASSERT(info.dry_run || (failed_ha_2pc + total_ha_2pc) > (ulong)opt_bin_log); @@ -2011,6 +2141,9 @@ int ha_recover(HASH *commit_list) info.found_my_xids, opt_tc_log_file); DBUG_RETURN(1); } + if (info.error) + DBUG_RETURN(1); + if (info.commit_list) sql_print_information("Crash recovery finished."); DBUG_RETURN(0); diff --git a/sql/handler.h b/sql/handler.h index c6cac5dfbbe..73af3b550fd 100644 --- a/sql/handler.h +++ b/sql/handler.h @@ -638,6 +638,13 @@ struct xid_t { }; typedef struct xid_t XID; +/* struct for heuristic binlog truncate recovery */ +struct xid_recovery_member +{ + my_xid xid; + uint in_engine_prepare; // number of engines that have xid prepared +}; + /* for recover() handlerton call */ #define MIN_XID_LIST_SIZE 128 #define MAX_XID_LIST_SIZE (1024*128) @@ -4270,7 +4277,8 @@ int ha_commit_one_phase(THD *thd, bool all); int ha_commit_trans(THD *thd, bool all); int ha_rollback_trans(THD *thd, bool all); int ha_prepare(THD *thd); -int ha_recover(HASH *commit_list); +int ha_recover(HASH *commit_list, MEM_ROOT *mem_root= NULL); +void ha_recover_binlog_truncate_complete(HASH *commit_list); /* transactions: these functions never call handlerton functions directly */ int ha_enable_transaction(THD *thd, bool on); @@ -4326,4 +4334,5 @@ void print_keydup_error(TABLE *table, KEY *key, myf errflag); int del_global_index_stat(THD *thd, TABLE* table, KEY* key_info); int del_global_table_stat(THD *thd, LEX_STRING *db, LEX_STRING *table); +uint ha_count_rw_all(THD *thd, Ha_trx_info **ptr_ha_info, bool count_through); #endif /* HANDLER_INCLUDED */ diff --git a/sql/log.cc b/sql/log.cc index f2fe0d852d1..aa6b1293200 100644 --- a/sql/log.cc +++ b/sql/log.cc @@ -4746,7 +4746,7 @@ int MYSQL_BIN_LOG::purge_index_entry(THD *thd, ulonglong *reclaimed_space, } goto err; } - + error= 0; DBUG_PRINT("info",("purging %s",log_info.log_file_name)); @@ -7882,6 +7882,7 @@ MYSQL_BIN_LOG::trx_group_commit_leader(group_commit_entry *leader) first= false; } + DEBUG_SYNC(leader->thd, "commit_before_update_end_pos"); /* update binlog_end_pos so it can be read by dump thread * * note: must be _after_ the RUN_HOOK(after_flush) or else @@ -8969,7 +8970,7 @@ int TC_LOG_MMAP::open(const char *opt_name) { if (my_errno != ENOENT) goto err; - if (using_heuristic_recover()) + if (using_heuristic_recover(opt_name)) return 1; if ((fd= mysql_file_create(key_file_tclog, logname, CREATE_MODE, O_RDWR | O_CLOEXEC, MYF(MY_WME))) < 0) @@ -9500,23 +9501,596 @@ TC_LOG_MMAP tc_log_mmap; 0 no heuristic recovery was requested @retval 1 heuristic recovery was performed + 2 heuristic recovery failed */ -int TC_LOG::using_heuristic_recover() +int TC_LOG::using_heuristic_recover(const char* opt_name) { + LOG_INFO log_info; + int error= 0; + HASH xids, *ptr_xids= NULL; + MEM_ROOT mem_root, *ptr_mem_root= NULL; + int rc= 1; + if (!tc_heuristic_recover) - return 0; + return (rc= 0); sql_print_information("Heuristic crash recovery mode"); - if (ha_recover(0)) + if (tc_heuristic_recover == TC_RECOVER_BINLOG_TRUNCATE) + { + (void) my_hash_init(&xids, &my_charset_bin, TC_LOG_PAGE_SIZE/3, 0, + sizeof(my_xid), 0, 0, MYF(0)); + (void) init_alloc_root(&mem_root, + TC_LOG_PAGE_SIZE, TC_LOG_PAGE_SIZE, MYF(0)); + ptr_xids= &xids; + ptr_mem_root= &mem_root; + } + if ((error= ha_recover(ptr_xids, ptr_mem_root))) + { sql_print_error("Heuristic crash recovery failed"); + goto err; + } + + if (!strcmp(opt_name, opt_bin_logname) && + tc_heuristic_recover == TC_RECOVER_BINLOG_TRUNCATE) + { + if ((error= mysql_bin_log.find_log_pos(&log_info, NullS, 1))) + sql_print_error("tc-heuristic-recover failed to find binlog file in " + "the index file. Error: %d; consider recovering " + "the index file and retry", error); + else if ((error= heuristic_binlog_rollback(&xids))) + sql_print_error("Heuristic BINLOG_TRUNCATE crash recovery failed. " + "Error: %d", error); + if (error > 0) + rc= 2; + } + +err: sql_print_information("Please restart mysqld without --tc-heuristic-recover"); - return 1; + if (tc_heuristic_recover == TC_RECOVER_BINLOG_TRUNCATE) + { + free_root(&mem_root, MYF(0)); + my_hash_free(&xids); + } + return rc; } /****** transaction coordinator log for 2pc - binlog() based solution ******/ #define TC_LOG_BINLOG MYSQL_BIN_LOG +/** + Truncates the current binlog to specified position. Removes the rest of binlogs + which are present after this binlog file. + + @param truncate_file Holds the binlog name to be truncated + @param truncate_pos Position within binlog from where it needs to + truncated. + + @retval true ok + @retval false error + +*/ +bool MYSQL_BIN_LOG::truncate_and_remove_binlogs(const char *truncate_file, + my_off_t truncate_pos) +{ + int error= 0; +#ifdef HAVE_REPLICATION + LOG_INFO log_info; + THD *thd= current_thd; + my_off_t index_file_offset= 0; + File file= -1; + IO_CACHE cache; + MY_STAT s; + my_off_t binlog_size; + + if ((error= find_log_pos(&log_info, truncate_file, 1))) + { + sql_print_error("tc-heuristic-recover: Failed to locate binary log file:%s." + "Error:%d", truncate_file, error); + goto end; + } + + while (!(error= find_next_log(&log_info, 1))) + { + if (!index_file_offset) + { + index_file_offset= log_info.index_file_start_offset; + if ((error= open_purge_index_file(TRUE))) + { + sql_print_error("tc-heuristic-recover: Failed to open purge index " + "file:%s. Error:%d", purge_index_file_name, error); + goto end; + } + } + if ((error= register_purge_index_entry(log_info.log_file_name))) + { + sql_print_error("tc-heuristic-recover: Failed to copy %s to purge index" + " file. Error:%d", log_info.log_file_name, error); + goto end; + } + } + + if (error != LOG_INFO_EOF) + { + sql_print_error("tc-heuristic-recover: Failed to find the next binlog to " + "add to purge index register. Error:%d", error); + goto end; + } + + if (is_inited_purge_index_file()) + { + if (!index_file_offset) + index_file_offset= log_info.index_file_start_offset; + + if ((error= sync_purge_index_file())) + { + sql_print_error("tc-heuristic-recover: Failed to flush purge index " + "file. Error:%d", error); + goto end; + } + + // Trim index file + if ((error= + mysql_file_chsize(index_file.file, index_file_offset, '\n', + MYF(MY_WME))) || + (error= + mysql_file_sync(index_file.file, MYF(MY_WME|MY_SYNC_FILESIZE)))) + { + sql_print_error("tc-heuristic-recover: Failed to trim binlog index " + "file:%s to offset:%llu. Error:%d", index_file_name, + index_file_offset); + mysql_file_close(index_file.file, MYF(MY_WME)); + goto end; + } + + /* Reset data in old index cache */ + if ((error= reinit_io_cache(&index_file, READ_CACHE, (my_off_t) 0, 0, 1))) + { + sql_print_error("tc-heuristic-recover: Failed to reinit binlog index " + "file. Error:%d", error); + mysql_file_close(index_file.file, MYF(MY_WME)); + goto end; + } + + /* Read each entry from purge_index_file and delete the file. */ + if ((error= purge_index_entry(thd, NULL, TRUE))) + { + sql_print_error("tc-heuristic-recover: Failed to process registered " + "files that would be purged."); + goto end; + } + } + + DBUG_ASSERT(truncate_pos); + + if ((file= mysql_file_open(key_file_binlog, truncate_file, + O_RDWR | O_BINARY, MYF(MY_WME))) < 0) + { + error= 1; + sql_print_error("tc-heuristic-recover: Failed to open binlog file:%s for " + "truncation.", truncate_file); + goto end; + } + my_stat(truncate_file, &s, MYF(0)); + binlog_size= s.st_size; + + /* Change binlog file size to truncate_pos */ + if ((error= + mysql_file_chsize(file, truncate_pos, 0, MYF(MY_WME))) || + (error= mysql_file_sync(file, MYF(MY_WME|MY_SYNC_FILESIZE)))) + { + sql_print_error("tc-heuristic-recover: Failed to trim the " + "binlog file:%s to size:%llu. Error:%d", + truncate_file, truncate_pos, error); + goto end; + } + else + { + sql_print_information("tc-heuristic-recover: Truncated binlog " + "File: %s of Size:%llu, to Position:%llu.", + truncate_file, binlog_size, truncate_pos); + } + if (!(error= init_io_cache(&cache, file, IO_SIZE, WRITE_CACHE, + (my_off_t) truncate_pos, 0, MYF(MY_WME|MY_NABP)))) + { + /* + Write Stop_log_event to ensure clean end point for the binary log being + truncated. + */ + Stop_log_event se; + se.checksum_alg= (enum_binlog_checksum_alg) binlog_checksum_options; + if ((error= write_event(&se, &cache))) + { + sql_print_error("tc-heuristic-recover: Failed to write stop event to " + "binary log. Errno:%d", + (cache.error == -1) ? my_errno : error); + goto end; + } + if ((error= flush_io_cache(&cache)) || + (error= mysql_file_sync(file, MYF(MY_WME|MY_SYNC_FILESIZE)))) + { + sql_print_error("tc-heuristic-recover: Faild to write stop event to " + "binary log. Errno:%d", + (cache.error == -1) ? my_errno : error); + } + } + else + sql_print_error("tc-heuristic-recover: Failed to initialize binary log " + "cache for writing stop event. Errno:%d", + (cache.error == -1) ? my_errno : error); + +end: + if (file >= 0) + { + end_io_cache(&cache); + mysql_file_close(file, MYF(MY_WME)); + } + error= error || close_purge_index_file(); +#endif + return error > 0; +} + +/** + Returns the checkpoint binlog file name found in the lastest binlog file. + + @param checkpoint_file Holds the binlog checkpoint file name. + + @retval 0 ok + @retval 1 error + +*/ +int TC_LOG_BINLOG::get_binlog_checkpoint_file(char* checkpoint_file) +{ + Log_event *ev= NULL; + bool binlog_checkpoint_found= false; + LOG_INFO log_info; + const char *errmsg; + IO_CACHE log; + File file; + Format_description_log_event fdle(BINLOG_VERSION); + char log_name[FN_REFLEN]; + int error=1; + + if (!fdle.is_valid()) + return 1; + + if ((error= find_log_pos(&log_info, NullS, 1))) + { + sql_print_error("tc-heuristic-recover: find_log_pos() failed to read first " + "binary log entry from index file.(error: %d)", error); + return error; + } + + // Move to the last binary log. + do + { + strmake_buf(log_name, log_info.log_file_name); + } while (!(error= find_next_log(&log_info, 1))); + + if (error != LOG_INFO_EOF) + { + sql_print_error("tc-heuristic-recover: find_next_log() failed " + "(error: %d)", error); + return error; + } + if ((file= open_binlog(&log, log_name, &errmsg)) < 0) + { + sql_print_error("tc-heuristic-recover failed to open the binlog:%s for " + "reading checkpoint file name. Error: %s", + log_info.log_file_name, errmsg); + return error; + } + while (!binlog_checkpoint_found && + (ev= + Log_event::read_log_event(&log, 0, &fdle, opt_master_verify_checksum)) + && ev->is_valid()) + { + enum Log_event_type typ= ev->get_type_code(); + if (typ == BINLOG_CHECKPOINT_EVENT) + { + size_t dir_len; + Binlog_checkpoint_log_event *cev= (Binlog_checkpoint_log_event *)ev; + if (cev->binlog_file_len >= FN_REFLEN) + { + sql_print_error("tc-heuristic-recover: Incorrect binlog checkpoint " + "event with too long file name found."); + delete ev; + ev= NULL; + end_io_cache(&log); + mysql_file_close(file, MYF(MY_WME)); + return 1; + } + else + { + dir_len= dirname_length(log_name); + strmake(strnmov(checkpoint_file, log_name, dir_len), + cev->binlog_file_name, FN_REFLEN - 1 - dir_len); + binlog_checkpoint_found= true; + } + } + delete ev; + ev= NULL; + } // End of while + end_io_cache(&log); + mysql_file_close(file, MYF(MY_WME)); + file= -1; + /* + Old binary log without checkpoint found, binlog truncation is not + possible. Hence return error. + */ + if (!binlog_checkpoint_found) + return 1; + + return 0; +} + + +/** + Truncates the binary log, according to the transactions that got rolled + back from engines, during --heuristic-recover=BINLOG_TRUNCATE. + Global GTID state is adjusted as per the truncated binlog. + + Called from @c TC_LOG::using_heuristic_recover(const char* opt_name) + + @param opt_name The base name of binary log. + + @return indicates success or failure of binlog rollback + @retval 0 success + @retval 1 failure + +*/ +int TC_LOG_BINLOG::heuristic_binlog_rollback(HASH *xids) +{ + int error=0; +#ifdef HAVE_REPLICATION + Log_event *ev= NULL; + char binlog_truncate_file_name[FN_REFLEN] = {0}; + char checkpoint_file[FN_REFLEN]; + my_off_t binlog_truncate_pos= 0; + LOG_INFO log_info; + const char *errmsg; + IO_CACHE log; + File file=-1; + Format_description_log_event fdle(BINLOG_VERSION); + bool is_safe= true; + my_off_t tmp_truncate_pos= 0, tmp_pos= 0; + rpl_gtid last_gtid; + bool last_gtid_standalone= false; + bool last_gtid_valid= false; + uint last_gtid_engines= 0; + + if ((error= get_binlog_checkpoint_file(checkpoint_file))) + { + sql_print_error("tc-heuristic-recover: Failed to read latest checkpoint " + "binary log name."); + goto end; + } + sql_print_information("tc-heuristic-recover: Initialising heuristic " + "rollback of binary log using last checkpoint " + "file:%s.", checkpoint_file); + if ((error= find_log_pos(&log_info, checkpoint_file, 1))) + { + sql_print_error("tc-heuristic-recover: Failed to locate binary log file:%s " + "in index file. Error:%d", checkpoint_file, error); + goto end; + } + if ((file= open_binlog(&log, log_info.log_file_name, &errmsg)) < 0 || + DBUG_EVALUATE_IF("fault_injection_opening_binlog", (errmsg="Unknown"), + FALSE)) + { + error= 1; + sql_print_error("tc-heuristic-recover: Failed to open the binlog:%s for " + "recovery. Error:%s", log_info.log_file_name, errmsg); + goto end; + } + + + error= read_state_from_file(); + if (error && error != 2) + { + sql_print_error("tc-heuristic-recover: Failed to load global gtid binlog " + "state from file"); + goto end; + } + if (!fdle.is_valid()) + { + error= 1; + sql_print_error("tc-heuristic-recover: Failed due to invalid format " + "description log event."); + goto end; + } + + for(;;) + { + while (is_safe && + (ev= Log_event::read_log_event(&log, 0, &fdle, + opt_master_verify_checksum)) && ev->is_valid()) + { + enum Log_event_type typ= ev->get_type_code(); + switch (typ) { + case XID_EVENT: + { + xid_recovery_member *member; + + if ((member= (xid_recovery_member*) + my_hash_search(xids, + (uchar*) &static_cast<Xid_log_event*>(ev)->xid, + sizeof(my_xid))) != NULL) + { + // Possible truncate candidate validation follows as: + // in_engine_prepare is examined and set or left to stay + // either to/as 0 for to-commit mark, or non-zero for rollback + if (member->in_engine_prepare > last_gtid_engines) + { + sql_print_error("Error to recovery multi-engine transaction: " + "the number of engines %du exceeds the " + "respective number %du in its GTID event", + member->in_engine_prepare, last_gtid_engines); + error= 1; + goto end; + } + else if (member->in_engine_prepare < last_gtid_engines) + { + member->in_engine_prepare= 0; // partly committed, to complete + } + else + { + DBUG_ASSERT(binlog_truncate_pos == 0); + + binlog_truncate_pos= tmp_truncate_pos; // ascertained now + } + } + } + break; + case GTID_LIST_EVENT: + { + Gtid_list_log_event *glev= (Gtid_list_log_event *)ev; + /* Initialise the binlog state from the Gtid_list event. */ + if (binlog_truncate_pos == 0 && glev->count > 0 && + rpl_global_gtid_binlog_state.load(glev->list, glev->count)) + { + error= 1; + sql_print_error("tc-heuristic-recover: Failed to read GTID List " + "event."); + goto end; + } + } + break; + case GTID_EVENT: + { + Gtid_log_event *gev= (Gtid_log_event *)ev; + /* Update the binlog state with any GTID logged after Gtid_list. */ + last_gtid.domain_id= gev->domain_id; + last_gtid.server_id= gev->server_id; + last_gtid.seq_no= gev->seq_no; + last_gtid_standalone= + ((gev->flags2 & Gtid_log_event::FL_STANDALONE) ? true : false); + last_gtid_valid= true; + last_gtid_engines= gev->extra_engines + 1; + if (gev->flags2 & Gtid_log_event::FL_TRANSACTIONAL && + binlog_truncate_pos == 0) + { + strmake_buf(binlog_truncate_file_name, log_info.log_file_name); + tmp_truncate_pos= tmp_pos; // yet only a candidate + } + else + { + if (binlog_truncate_pos > 0) + is_safe= false; + } + } + break; + default: + /* Nothing. */ + break; + } // End switch + + if (binlog_truncate_pos == 0 && last_gtid_valid && + ((last_gtid_standalone && !ev->is_part_of_group(typ)) || + (!last_gtid_standalone && + (typ == XID_EVENT || + (typ == QUERY_EVENT && + (((Query_log_event *)ev)->is_commit() || + ((Query_log_event *)ev)->is_rollback())))))) + { + if ((error= rpl_global_gtid_binlog_state.update_nolock(&last_gtid, + false))) + { + sql_print_error("tc-heuristic-recover: Failed to update GTID within " + "global gtid state."); + goto end; + } + last_gtid_valid= false; + } + // Used to identify the last group specific end position. + tmp_pos= ev->log_pos; + delete ev; + ev= NULL; + } // End While + if (file >= 0) + { + end_io_cache(&log); + mysql_file_close(file, MYF(MY_WME)); + file= -1; + } + if (is_safe) + { + if ((error= find_next_log(&log_info, 1))) + { + if (error != LOG_INFO_EOF) + { + sql_print_error("tc-heuristic-recover: Failed to read next binary " + "log during recovery."); + goto end; + } + else + { + error= 0; // LOG_INFO_EOF= -1 is not an error. + break; + } + } + if ((file= open_binlog(&log, log_info.log_file_name, &errmsg)) < 0) + { + error= 1; + sql_print_error("tc-heuristic-recover: Failed to open the binlog:%s for " + "recovery. Error:%s", log_info.log_file_name, errmsg); + goto end; + } + } + else + break; + } //end of for(;;) + + /* complete with xids transactions in engines (regadless of is_safe) */ + (void) ha_recover_binlog_truncate_complete(xids); + + if (binlog_truncate_pos == 0) + goto end; // Nothing to truncate + else + { + DBUG_ASSERT(binlog_truncate_pos > 0); + + sql_print_information("tc-heuristic-recover: Binary log to be truncated " + "File:%s Pos:%llu.", binlog_truncate_file_name, + binlog_truncate_pos); + } + + if (is_safe) + { + if ((error= truncate_and_remove_binlogs(binlog_truncate_file_name, + binlog_truncate_pos))) + { + sql_print_error("tc-heuristic-recover: Failed to trim the binary log to " + "File:%s Pos:%llu.", binlog_truncate_file_name, + binlog_truncate_pos); + goto end; + } + } + else + { + sql_print_warning("tc-heuristic-recover cannot trim the binary log to " + "File:%s Pos:%llu as unsafe statements (non-trans/DDL) " + "statements are found beyond the truncation position.", + binlog_truncate_file_name, binlog_truncate_pos); + } + if ((error= write_state_to_file())) + { + sql_print_error("tc-heuristic-recover: Failed to write global gtid state " + "to file"); + goto end; + } + +end: + if (file >= 0) + { + end_io_cache(&log); + mysql_file_close(file, MYF(MY_WME)); + } +#endif + + return error; +} + int TC_LOG_BINLOG::open(const char *opt_name) { int error= 1; @@ -9531,11 +10105,13 @@ int TC_LOG_BINLOG::open(const char *opt_name) return 1; } - if (using_heuristic_recover()) + if (using_heuristic_recover(opt_name)) { mysql_mutex_lock(&LOCK_log); /* generate a new binlog to mask a corrupted one */ - open(opt_name, LOG_BIN, 0, 0, WRITE_CACHE, max_binlog_size, 0, TRUE); + DBUG_EVALUATE_IF("skip_new_binlog_create", 0, + open(opt_name, LOG_BIN, 0, 0, WRITE_CACHE, + max_binlog_size, 0, TRUE)); mysql_mutex_unlock(&LOCK_log); cleanup(); return 1; diff --git a/sql/log.h b/sql/log.h index 277e5c6f69c..00806c46cc5 100644 --- a/sql/log.h +++ b/sql/log.h @@ -41,7 +41,8 @@ bool stmt_has_updated_non_trans_table(const THD* thd); class TC_LOG { public: - int using_heuristic_recover(); + int using_heuristic_recover(const char* opt_name); + virtual int heuristic_binlog_rollback(HASH *commit_hash) { return 0; }; TC_LOG() {} virtual ~TC_LOG() {} @@ -694,6 +695,7 @@ public: void commit_checkpoint_notify(void *cookie); int recover(LOG_INFO *linfo, const char *last_log_name, IO_CACHE *first_log, Format_description_log_event *fdle, bool do_xa); + int heuristic_binlog_rollback(HASH *commit_hash); int do_binlog_recovery(const char *opt_name, bool do_xa_recovery); #if !defined(MYSQL_CLIENT) @@ -794,6 +796,9 @@ public: int purge_first_log(Relay_log_info* rli, bool included); int set_purge_index_file_name(const char *base_file_name); int open_purge_index_file(bool destroy); + bool truncate_and_remove_binlogs(const char *truncate_file, + my_off_t truncate_pos); + int get_binlog_checkpoint_file(char* checkpoint_file); bool is_inited_purge_index_file(); int close_purge_index_file(); int clean_purge_index_file(); diff --git a/sql/log_event.cc b/sql/log_event.cc index 146d8bd5dbe..ba254f784c6 100644 --- a/sql/log_event.cc +++ b/sql/log_event.cc @@ -55,6 +55,7 @@ #define my_b_write_string(A, B) my_b_write((A), (uchar*)(B), (uint) (sizeof(B) - 1)) using std::max; +using std::min; /** BINLOG_CHECKSUM variable. @@ -6703,10 +6704,13 @@ bool Binlog_checkpoint_log_event::write() Gtid_log_event::Gtid_log_event(const char *buf, uint event_len, const Format_description_log_event *description_event) - : Log_event(buf, description_event), seq_no(0), commit_id(0) + : Log_event(buf, description_event), seq_no(0), commit_id(0), + flags_extra(0), extra_engines(0) { uint8 header_size= description_event->common_header_len; uint8 post_header_len= description_event->post_header_len[GTID_EVENT-1]; + const char *buf_0= buf; + if (event_len < header_size + post_header_len || post_header_len < GTID_HEADER_LEN) return; @@ -6717,6 +6721,7 @@ Gtid_log_event::Gtid_log_event(const char *buf, uint event_len, domain_id= uint4korr(buf); buf+= 4; flags2= *buf; + ++buf; if (flags2 & FL_GROUP_COMMIT_ID) { if (event_len < (uint)header_size + GTID_HEADER_LEN + 2) @@ -6724,9 +6729,26 @@ Gtid_log_event::Gtid_log_event(const char *buf, uint event_len, seq_no= 0; // So is_valid() returns false return; } - ++buf; commit_id= uint8korr(buf); + buf+= 8; } + // the extra flags check and actions + if (buf - buf_0 < event_len) + { + flags_extra= *buf; + ++buf; + /* extra flags presence is identifed by non-zero byte value at this point */ + if (flags_extra & FL_EXTRA_MULTI_ENGINE) + { + extra_engines= uint4korr(buf); + buf += 4; + + DBUG_ASSERT(extra_engines > 0); + } + } + /* the '<' part of the assert corresponds to zero-padded trailing bytes */ + DBUG_ASSERT(buf - buf_0 <= event_len); + DBUG_ASSERT(buf - buf_0 == event_len || buf_0[event_len - 1] == 0); } @@ -6738,12 +6760,13 @@ Gtid_log_event::Gtid_log_event(THD *thd_arg, uint64 seq_no_arg, uint64 commit_id_arg) : Log_event(thd_arg, flags_arg, is_transactional), seq_no(seq_no_arg), commit_id(commit_id_arg), domain_id(domain_id_arg), - flags2((standalone ? FL_STANDALONE : 0) | (commit_id_arg ? FL_GROUP_COMMIT_ID : 0)) + flags2((standalone ? FL_STANDALONE : 0) | + (commit_id_arg ? FL_GROUP_COMMIT_ID : 0)), + flags_extra(0) { cache_type= Log_event::EVENT_NO_CACHE; if (thd_arg->transaction.stmt.trans_did_wait() || - thd_arg->transaction.all.trans_did_wait()) - flags2|= FL_WAITED; + thd_arg->transaction.all.trans_did_wait())flags2|= FL_WAITED; if (thd_arg->transaction.stmt.trans_did_ddl() || thd_arg->transaction.stmt.has_created_dropped_temp_table() || thd_arg->transaction.all.trans_did_ddl() || @@ -6756,6 +6779,8 @@ Gtid_log_event::Gtid_log_event(THD *thd_arg, uint64 seq_no_arg, /* Preserve any DDL or WAITED flag in the slave's binlog. */ if (thd_arg->rgi_slave) flags2|= (thd_arg->rgi_slave->gtid_ev_flags2 & (FL_DDL|FL_WAITED)); + if ((extra_engines= (min<uint>(2, ha_count_rw_all(thd, NULL, true)) - 2) > 0)) + flags_extra|= FL_EXTRA_MULTI_ENGINE; } @@ -6798,8 +6823,8 @@ Gtid_log_event::peek(const char *event_start, size_t event_len, bool Gtid_log_event::write() { - uchar buf[GTID_HEADER_LEN+2]; - size_t write_len; + uchar buf[GTID_HEADER_LEN+2 + /* flags_extra: */ 1+4]; + size_t write_len= 13; int8store(buf, seq_no); int4store(buf+8, domain_id); @@ -6809,9 +6834,15 @@ Gtid_log_event::write() int8store(buf+13, commit_id); write_len= GTID_HEADER_LEN + 2; } - else + if (flags & FL_EXTRA_MULTI_ENGINE) + { + int4store(buf, extra_engines); + write_len += 4; + } + + if (write_len < GTID_HEADER_LEN) { - bzero(buf+13, GTID_HEADER_LEN-13); + bzero(buf+write_len, GTID_HEADER_LEN-write_len); write_len= GTID_HEADER_LEN; } return write_header(write_len) || diff --git a/sql/log_event.h b/sql/log_event.h index 58e1281c179..0f74eb21029 100644 --- a/sql/log_event.h +++ b/sql/log_event.h @@ -3193,6 +3193,12 @@ public: uint64 commit_id; uint32 domain_id; uchar flags2; + uint flags_extra; // more flags area placed after the regular flags2's one + /* + Extra (to binlog and a "base" engine) recoverable engines participating + in the transaction. Zero, when none. + */ + uint extra_engines; /* Flags2. */ @@ -3221,6 +3227,14 @@ public: /* FL_DDL is set for event group containing DDL. */ static const uchar FL_DDL= 32; + /* Flags_extra. */ + + /* + FL_EXTRA_MULTI_ENGINE is set for event group comprising a transaction + involving multiple storage engines. + */ + static const uchar FL_EXTRA_MULTI_ENGINE= 1; + #ifdef MYSQL_SERVER Gtid_log_event(THD *thd_arg, uint64 seq_no, uint32 domain_id, bool standalone, uint16 flags, bool is_transactional, uint64 commit_id); diff --git a/sql/mysqld.cc b/sql/mysqld.cc index 8d00b5af948..0fa0419c880 100644 --- a/sql/mysqld.cc +++ b/sql/mysqld.cc @@ -288,7 +288,7 @@ const char *show_comp_option_name[]= {"YES", "NO", "DISABLED"}; static const char *tc_heuristic_recover_names[]= { - "OFF", "COMMIT", "ROLLBACK", NullS + "OFF", "COMMIT", "ROLLBACK", "BINLOG_TRUNCATE", NullS }; static TYPELIB tc_heuristic_recover_typelib= { diff --git a/sql/sql_class.h b/sql/sql_class.h index 7ca3896a69d..ca6990d53cc 100644 --- a/sql/sql_class.h +++ b/sql/sql_class.h @@ -187,6 +187,7 @@ public: #define TC_HEURISTIC_RECOVER_COMMIT 1 #define TC_HEURISTIC_RECOVER_ROLLBACK 2 +#define TC_RECOVER_BINLOG_TRUNCATE 3 extern ulong tc_heuristic_recover; typedef struct st_user_var_events |