summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndrei Elkin <andrei.elkin@mariadb.com>2021-01-05 18:08:25 +0200
committerAndrei Elkin <andrei.elkin@mariadb.com>2021-04-21 15:39:32 +0300
commit2a7dd64425b6be51715f54756446398618cd1789 (patch)
treec1c074638c8ff2a6252c0d79e14767bbf2a011b4
parentef2749c90d7f050e7c876003c55aa20d1dee202d (diff)
downloadmariadb-git-2a7dd64425b6be51715f54756446398618cd1789.tar.gz
MDEV-24526 binlog rotate via FLUSH LOGS may obsolate binlog file for recovery too eary
There was race between a committing transaction and the following in binlog order FLUSH LOGS that could create a 2nd Binlog checkpoint (BCP) event in the new file *before* the first logged-in-old-binlog transaction gets committed in Innodb. That would cause the transaction loss at recovery, should the server stop right after the BCP. The race is tackled by enforcing the necessary set of mutexes to be acquired by FLUSH-LOGS handler in the correct order (of the group commit leader pattern). Note, there remain two cases where a similar race is still possible: - the above race as it is when the server is run with ("unlikely") non-default `--binlog-optimize-thread-scheduling=0` (MDEV-24530), and - at unlikely event of bin-logging of Incident event (MDEV-24531) that also triggers binlog rotation, in both cases though with lesser chances after the current fixes.
-rw-r--r--mysql-test/suite/binlog/r/binlog_checkpoint_flush_logs.result51
-rw-r--r--mysql-test/suite/binlog/t/binlog_checkpoint_flush_logs.test79
-rw-r--r--sql/log.cc19
3 files changed, 149 insertions, 0 deletions
diff --git a/mysql-test/suite/binlog/r/binlog_checkpoint_flush_logs.result b/mysql-test/suite/binlog/r/binlog_checkpoint_flush_logs.result
new file mode 100644
index 00000000000..125324e2928
--- /dev/null
+++ b/mysql-test/suite/binlog/r/binlog_checkpoint_flush_logs.result
@@ -0,0 +1,51 @@
+SET GLOBAL innodb_flush_log_at_trx_commit= 1;
+RESET MASTER;
+CREATE TABLE t1 (a INT PRIMARY KEY, b MEDIUMTEXT) ENGINE=Innodb;
+*** Test that FLUSH LOGS waits if a transaction ordered commit is in progress.
+connect con1,localhost,root,,;
+SET DEBUG_SYNC= "commit_before_get_LOCK_commit_ordered SIGNAL con1_ready WAIT_FOR con1_go";
+INSERT INTO t1 VALUES (1, REPEAT("x", 1));
+connection default;
+SET DEBUG_SYNC= "now WAIT_FOR con1_ready";
+SET DEBUG_SYNC= "rotate_after_rotate SIGNAL con_flush_ready WAIT_FOR default_go";
+FLUSH LOGS;
+connect con2,localhost,root,,;
+Trx_1 is not yet committed:
+SELECT count(*) as 'ZERO' from t1;
+ZERO
+0
+Wait for Trx_2 has rotated binlog:
+SET DEBUG_SYNC= "now WAIT_FOR con_flush_ready";
+SET DEBUG_SYNC= "now SIGNAL default_go";
+connection default;
+Must be tree logs in the list:
+show binary logs;
+Log_name File_size
+master-bin.000001 #
+master-bin.000002 #
+master-bin.000003 #
+include/show_binlog_events.inc
+Log_name Pos Event_type Server_id End_log_pos Info
+master-bin.000001 # Format_desc # # SERVER_VERSION, BINLOG_VERSION
+master-bin.000001 # Gtid_list # # []
+master-bin.000001 # Binlog_checkpoint # # master-bin.000001
+master-bin.000001 # Gtid # # GTID #-#-#
+master-bin.000001 # Query # # use `test`; CREATE TABLE t1 (a INT PRIMARY KEY, b MEDIUMTEXT) ENGINE=Innodb
+master-bin.000001 # Gtid # # BEGIN GTID #-#-#
+master-bin.000001 # Annotate_rows # # INSERT INTO t1 VALUES (1, REPEAT("x", 1))
+master-bin.000001 # Table_map # # table_id: # (test.t1)
+master-bin.000001 # Write_rows_v1 # # table_id: # flags: STMT_END_F
+master-bin.000001 # Xid # # COMMIT /* XID */
+master-bin.000001 # Rotate # # master-bin.000002;pos=POS
+Only one Binlog checkpoint must exist and point to master-bin.000001
+include/show_binlog_events.inc
+Log_name Pos Event_type Server_id End_log_pos Info
+master-bin.000002 # Format_desc # # SERVER_VERSION, BINLOG_VERSION
+master-bin.000002 # Gtid_list # # [#-#-#]
+master-bin.000002 # Binlog_checkpoint # # master-bin.000001
+SELECT count(*) as 'ONE' from t1;
+ONE
+1
+connection default;
+DROP TABLE t1;
+SET debug_sync = 'reset';
diff --git a/mysql-test/suite/binlog/t/binlog_checkpoint_flush_logs.test b/mysql-test/suite/binlog/t/binlog_checkpoint_flush_logs.test
new file mode 100644
index 00000000000..e21f1eb308d
--- /dev/null
+++ b/mysql-test/suite/binlog/t/binlog_checkpoint_flush_logs.test
@@ -0,0 +1,79 @@
+--source include/have_innodb.inc
+--source include/have_debug.inc
+--source include/have_debug_sync.inc
+--source include/have_binlog_format_row.inc
+
+# References:
+#
+# MDEV-24526 binlog rotate via FLUSH LOGS may obsolate binlog file too eary
+#
+# The test for MDEV-24526 proves the fixes correct observed race condition
+# between a commiting transaction and FLUSH-LOGS.
+# The plot.
+# Trx_1 (con1) transaction binlogs first
+# to yield its turn acquiring LOCK_commit_ordered to Trx_2 and stand
+# still waiting of a signal that will never arrive.
+# Trx_2 can't acquire it in the fixed version even though
+# Trx_3 makes sure Trx_2 has reached a post-rotation execution point
+# to signal it to proceed.
+# Then the server gets crashed and Trx_1 must recover unlike
+# in the OLD buggy version.
+#
+SET GLOBAL innodb_flush_log_at_trx_commit= 1;
+RESET MASTER;
+
+CREATE TABLE t1 (a INT PRIMARY KEY, b MEDIUMTEXT) ENGINE=Innodb;
+
+--echo *** Test that FLUSH LOGS waits if a transaction ordered commit is in progress.
+
+connect(con1,localhost,root,,); # Trx_1
+# hang before doing acquiring Commit Ordered mutex
+SET DEBUG_SYNC= "commit_before_get_LOCK_commit_ordered SIGNAL con1_ready WAIT_FOR con1_go";
+
+--send INSERT INTO t1 VALUES (1, REPEAT("x", 1))
+
+connection default; # Trx_2
+
+SET DEBUG_SYNC= "now WAIT_FOR con1_ready";
+SET DEBUG_SYNC= "rotate_after_rotate SIGNAL con_flush_ready WAIT_FOR default_go";
+--send FLUSH LOGS
+
+connect(con2,localhost,root,,); # Trx_3
+--echo Trx_1 is not yet committed:
+SELECT count(*) as 'ZERO' from t1;
+
+--echo Wait for Trx_2 has rotated binlog:
+SET DEBUG_SYNC= "now WAIT_FOR con_flush_ready";
+# Useless signal to prove Trx_2 cannot race Trx_1's commit
+# even though Trx_1 never received the being waited 'con1_go'.
+SET DEBUG_SYNC= "now SIGNAL default_go";
+
+--let $shutdown_timeout=0
+--source include/restart_mysqld.inc
+
+connection default;
+--enable_reconnect
+--error 0,2013
+--reap
+
+--echo Must be tree logs in the list:
+--source include/show_binary_logs.inc
+--let $binlog_file= master-bin.000001
+--let $binlog_start= 4
+--source include/show_binlog_events.inc
+
+--echo Only one Binlog checkpoint must exist and point to master-bin.000001
+--let $binlog_file= master-bin.000002
+--let $binlog_start= 4
+--source include/show_binlog_events.inc
+
+
+# In the buggy server version the following select may have
+# resulted with ZERO:
+SELECT count(*) as 'ONE' from t1;
+
+# Clean up.
+connection default;
+
+DROP TABLE t1;
+SET debug_sync = 'reset';
diff --git a/sql/log.cc b/sql/log.cc
index dc8df9c6fdb..1d9b4645421 100644
--- a/sql/log.cc
+++ b/sql/log.cc
@@ -6851,6 +6851,9 @@ int MYSQL_BIN_LOG::rotate_and_purge(bool force_rotate,
bool check_purge= false;
mysql_mutex_lock(&LOCK_log);
+
+ DEBUG_SYNC(current_thd, "rotate_after_acquire_LOCK_log");
+
prev_binlog_id= current_binlog_id;
if ((err_gtid= do_delete_gtid_domain(domain_drop_lex)))
@@ -6861,11 +6864,22 @@ int MYSQL_BIN_LOG::rotate_and_purge(bool force_rotate,
}
else if ((error= rotate(force_rotate, &check_purge)))
check_purge= false;
+
+ DEBUG_SYNC(current_thd, "rotate_after_rotate");
+
/*
NOTE: Run purge_logs wo/ holding LOCK_log because it does not need
the mutex. Otherwise causes various deadlocks.
+ Explicit binlog rotation must be synchronized with a concurrent
+ binlog ordered commit, in particular not let binlog
+ checkpoint notification request until early binlogged
+ concurrent commits have has been completed.
*/
+ mysql_mutex_lock(&LOCK_after_binlog_sync);
mysql_mutex_unlock(&LOCK_log);
+ mysql_mutex_lock(&LOCK_commit_ordered);
+ mysql_mutex_unlock(&LOCK_after_binlog_sync);
+ mysql_mutex_unlock(&LOCK_commit_ordered);
if (check_purge)
checkpoint_and_purge(prev_binlog_id);
@@ -8014,7 +8028,12 @@ MYSQL_BIN_LOG::trx_group_commit_leader(group_commit_entry *leader)
}
DEBUG_SYNC(leader->thd, "commit_before_get_LOCK_commit_ordered");
+
mysql_mutex_lock(&LOCK_commit_ordered);
+ DBUG_EXECUTE_IF("crash_before_engine_commit",
+ {
+ DBUG_SUICIDE();
+ });
last_commit_pos_offset= commit_offset;
/*