summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorunknown <knielsen@knielsen-hq.org>2011-03-23 15:29:20 +0100
committerunknown <knielsen@knielsen-hq.org>2011-03-23 15:29:20 +0100
commitca5ca4b968297ac4dc5d0850752306b92570b6a1 (patch)
tree56115abfda488804535ac91643347be6f1e1396d
parenta2d921be3634ceff4ab4c67f57b27a481d4a28df (diff)
downloadmariadb-git-ca5ca4b968297ac4dc5d0850752306b92570b6a1.tar.gz
MWL#116: group commit
Implement binlog_optimize_thread_scheduling option to allow benchmarking the effect of running commit_ordered() for multiple transactions all in one thread.
-rw-r--r--mysql-test/r/group_commit_binlog_pos_no_optimize_thread.result36
-rw-r--r--mysql-test/r/group_commit_crash_no_optimize_thread.result120
-rw-r--r--mysql-test/r/group_commit_no_optimize_thread.result63
-rw-r--r--mysql-test/t/group_commit_binlog_pos_no_optimize_thread-master.opt1
-rw-r--r--mysql-test/t/group_commit_binlog_pos_no_optimize_thread.test86
-rw-r--r--mysql-test/t/group_commit_crash_no_optimize_thread-master.opt1
-rw-r--r--mysql-test/t/group_commit_crash_no_optimize_thread.test80
-rw-r--r--mysql-test/t/group_commit_no_optimize_thread-master.opt1
-rw-r--r--mysql-test/t/group_commit_no_optimize_thread.test115
-rw-r--r--sql/log.cc68
-rw-r--r--sql/log.h8
11 files changed, 573 insertions, 6 deletions
diff --git a/mysql-test/r/group_commit_binlog_pos_no_optimize_thread.result b/mysql-test/r/group_commit_binlog_pos_no_optimize_thread.result
new file mode 100644
index 00000000000..02bbfd36143
--- /dev/null
+++ b/mysql-test/r/group_commit_binlog_pos_no_optimize_thread.result
@@ -0,0 +1,36 @@
+CREATE TABLE t1 (a INT PRIMARY KEY) ENGINE=innodb;
+INSERT INTO t1 VALUES (0);
+SET DEBUG_SYNC= "commit_after_get_LOCK_log SIGNAL con1_waiting WAIT_FOR con3_queued";
+SET DEBUG_SYNC= "commit_loop_entry_commit_ordered SIGNAL con1_loop WAIT_FOR con1_loop_cont";
+INSERT INTO t1 VALUES (1);
+SET DEBUG_SYNC= "now WAIT_FOR con1_waiting";
+SET DEBUG_SYNC= "commit_after_prepare_ordered SIGNAL con2_queued";
+SET DEBUG_SYNC= "commit_loop_entry_commit_ordered SIGNAL con1_loop WAIT_FOR con1_loop_cont";
+INSERT INTO t1 VALUES (2);
+SET DEBUG_SYNC= "now WAIT_FOR con2_queued";
+SET DEBUG_SYNC= "commit_after_prepare_ordered SIGNAL con3_queued";
+SET DEBUG_SYNC= "commit_loop_entry_commit_ordered SIGNAL con1_loop WAIT_FOR con1_loop_cont";
+INSERT INTO t1 VALUES (3);
+SET DEBUG_SYNC= "now WAIT_FOR con1_loop";
+SET DEBUG_SYNC= "now SIGNAL con1_loop_cont";
+SET DEBUG_SYNC= "now WAIT_FOR con1_loop";
+SET DEBUG_SYNC= "now SIGNAL con1_loop_cont";
+SET DEBUG_SYNC= "now WAIT_FOR con1_loop";
+SELECT * FROM t1 ORDER BY a;
+a
+0
+1
+2
+SET SESSION debug="+d,crash_dispatch_command_before";
+SELECT 1;
+Got one of the listed errors
+Got one of the listed errors
+SELECT * FROM t1 ORDER BY a;
+a
+0
+1
+2
+3
+InnoDB: Last MySQL binlog file position 0 767, file name ./master-bin.000001
+SET DEBUG_SYNC= 'RESET';
+DROP TABLE t1;
diff --git a/mysql-test/r/group_commit_crash_no_optimize_thread.result b/mysql-test/r/group_commit_crash_no_optimize_thread.result
new file mode 100644
index 00000000000..044161695e3
--- /dev/null
+++ b/mysql-test/r/group_commit_crash_no_optimize_thread.result
@@ -0,0 +1,120 @@
+CREATE TABLE t1(a CHAR(255),
+b CHAR(255),
+c CHAR(255),
+d CHAR(255),
+id INT AUTO_INCREMENT,
+PRIMARY KEY(id)) ENGINE=InnoDB;
+create table t2 like t1;
+create procedure setcrash(IN i INT)
+begin
+CASE i
+WHEN 1 THEN SET SESSION debug="d,crash_commit_after_prepare";
+WHEN 2 THEN SET SESSION debug="d,crash_commit_after_log";
+WHEN 3 THEN SET SESSION debug="d,crash_commit_before_unlog";
+WHEN 4 THEN SET SESSION debug="d,crash_commit_after";
+WHEN 5 THEN SET SESSION debug="d,crash_commit_before";
+ELSE BEGIN END;
+END CASE;
+end //
+FLUSH TABLES;
+INSERT INTO t2(a, b, c, d) VALUES ('a', 'b', 'c', 'd');
+INSERT INTO t2(a, b, c, d) VALUES ('a', 'b', 'c', 'd');
+INSERT INTO t2(a, b, c, d) VALUES ('a', 'b', 'c', 'd');
+INSERT INTO t2(a, b, c, d) VALUES ('a', 'b', 'c', 'd');
+INSERT INTO t2(a, b, c, d) VALUES ('a', 'b', 'c', 'd');
+INSERT INTO t2(a, b, c, d) VALUES ('a', 'b', 'c', 'd');
+INSERT INTO t2(a, b, c, d) VALUES ('a', 'b', 'c', 'd');
+INSERT INTO t2(a, b, c, d) VALUES ('a', 'b', 'c', 'd');
+INSERT INTO t2(a, b, c, d) VALUES ('a', 'b', 'c', 'd');
+INSERT INTO t2(a, b, c, d) VALUES ('a', 'b', 'c', 'd');
+RESET MASTER;
+START TRANSACTION;
+insert into t1 select * from t2;
+call setcrash(5);
+COMMIT;
+Got one of the listed errors
+SELECT * FROM t1 ORDER BY id;
+a b c d id
+SHOW BINLOG EVENTS LIMIT 2,1;
+Log_name Pos Event_type Server_id End_log_pos Info
+delete from t1;
+RESET MASTER;
+START TRANSACTION;
+insert into t1 select * from t2;
+call setcrash(4);
+COMMIT;
+Got one of the listed errors
+SELECT * FROM t1 ORDER BY id;
+a b c d id
+a b c d 1
+a b c d 2
+a b c d 3
+a b c d 4
+a b c d 5
+a b c d 6
+a b c d 7
+a b c d 8
+a b c d 9
+a b c d 10
+SHOW BINLOG EVENTS LIMIT 2,1;
+Log_name Pos Event_type Server_id End_log_pos Info
+master-bin.000001 174 Query 1 268 use `test`; insert into t1 select * from t2
+delete from t1;
+RESET MASTER;
+START TRANSACTION;
+insert into t1 select * from t2;
+call setcrash(3);
+COMMIT;
+Got one of the listed errors
+SELECT * FROM t1 ORDER BY id;
+a b c d id
+a b c d 1
+a b c d 2
+a b c d 3
+a b c d 4
+a b c d 5
+a b c d 6
+a b c d 7
+a b c d 8
+a b c d 9
+a b c d 10
+SHOW BINLOG EVENTS LIMIT 2,1;
+Log_name Pos Event_type Server_id End_log_pos Info
+master-bin.000001 174 Query 1 268 use `test`; insert into t1 select * from t2
+delete from t1;
+RESET MASTER;
+START TRANSACTION;
+insert into t1 select * from t2;
+call setcrash(2);
+COMMIT;
+Got one of the listed errors
+SELECT * FROM t1 ORDER BY id;
+a b c d id
+a b c d 1
+a b c d 2
+a b c d 3
+a b c d 4
+a b c d 5
+a b c d 6
+a b c d 7
+a b c d 8
+a b c d 9
+a b c d 10
+SHOW BINLOG EVENTS LIMIT 2,1;
+Log_name Pos Event_type Server_id End_log_pos Info
+master-bin.000001 174 Query 1 268 use `test`; insert into t1 select * from t2
+delete from t1;
+RESET MASTER;
+START TRANSACTION;
+insert into t1 select * from t2;
+call setcrash(1);
+COMMIT;
+Got one of the listed errors
+SELECT * FROM t1 ORDER BY id;
+a b c d id
+SHOW BINLOG EVENTS LIMIT 2,1;
+Log_name Pos Event_type Server_id End_log_pos Info
+delete from t1;
+DROP TABLE t1;
+DROP TABLE t2;
+DROP PROCEDURE setcrash;
diff --git a/mysql-test/r/group_commit_no_optimize_thread.result b/mysql-test/r/group_commit_no_optimize_thread.result
new file mode 100644
index 00000000000..7c2636fb472
--- /dev/null
+++ b/mysql-test/r/group_commit_no_optimize_thread.result
@@ -0,0 +1,63 @@
+CREATE TABLE t1 (a VARCHAR(10) PRIMARY KEY) ENGINE=innodb;
+SELECT variable_value INTO @commits FROM information_schema.global_status
+WHERE variable_name = 'binlog_commits';
+SELECT variable_value INTO @group_commits FROM information_schema.global_status
+WHERE variable_name = 'binlog_group_commits';
+SET DEBUG_SYNC= "commit_before_get_LOCK_commit_ordered SIGNAL group1_running WAIT_FOR group2_queued";
+INSERT INTO t1 VALUES ("con1");
+set DEBUG_SYNC= "now WAIT_FOR group1_running";
+SET DEBUG_SYNC= "commit_after_prepare_ordered SIGNAL group2_con2";
+SET DEBUG_SYNC= "commit_after_release_LOCK_log WAIT_FOR group3_committed";
+INSERT INTO t1 VALUES ("con2");
+SET DEBUG_SYNC= "now WAIT_FOR group2_con2";
+SET DEBUG_SYNC= "commit_after_prepare_ordered SIGNAL group2_con3";
+INSERT INTO t1 VALUES ("con3");
+SET DEBUG_SYNC= "now WAIT_FOR group2_con3";
+SET DEBUG_SYNC= "commit_after_prepare_ordered SIGNAL group2_con4";
+SET DEBUG_SYNC= "commit_after_group_run_commit_ordered SIGNAL group2_visible WAIT_FOR group2_checked";
+INSERT INTO t1 VALUES ("con4");
+SET DEBUG_SYNC= "now WAIT_FOR group2_con4";
+SET SESSION TRANSACTION ISOLATION LEVEL READ COMMITTED;
+SELECT * FROM t1 ORDER BY a;
+a
+SET DEBUG_SYNC= "now SIGNAL group2_queued";
+SELECT * FROM t1 ORDER BY a;
+a
+con1
+SET DEBUG_SYNC= "commit_before_get_LOCK_commit_ordered SIGNAL group3_con5";
+SET DEBUG_SYNC= "commit_after_get_LOCK_log SIGNAL con5_leader WAIT_FOR con6_queued";
+INSERT INTO t1 VALUES ("con5");
+SET DEBUG_SYNC= "now WAIT_FOR con5_leader";
+SET DEBUG_SYNC= "commit_after_prepare_ordered SIGNAL con6_queued";
+INSERT INTO t1 VALUES ("con6");
+SET DEBUG_SYNC= "now WAIT_FOR group3_con5";
+SELECT * FROM t1 ORDER BY a;
+a
+con1
+SET DEBUG_SYNC= "now SIGNAL group3_committed";
+SET DEBUG_SYNC= "now WAIT_FOR group2_visible";
+SELECT * FROM t1 ORDER BY a;
+a
+con1
+con2
+con3
+con4
+SET DEBUG_SYNC= "now SIGNAL group2_checked";
+SELECT * FROM t1 ORDER BY a;
+a
+con1
+con2
+con3
+con4
+con5
+con6
+SELECT variable_value - @commits FROM information_schema.global_status
+WHERE variable_name = 'binlog_commits';
+variable_value - @commits
+6
+SELECT variable_value - @group_commits FROM information_schema.global_status
+WHERE variable_name = 'binlog_group_commits';
+variable_value - @group_commits
+3
+SET DEBUG_SYNC= 'RESET';
+DROP TABLE t1;
diff --git a/mysql-test/t/group_commit_binlog_pos_no_optimize_thread-master.opt b/mysql-test/t/group_commit_binlog_pos_no_optimize_thread-master.opt
new file mode 100644
index 00000000000..18d43988ffd
--- /dev/null
+++ b/mysql-test/t/group_commit_binlog_pos_no_optimize_thread-master.opt
@@ -0,0 +1 @@
+--binlog-optimize-thread-scheduling=0 --skip-stack-trace --skip-core-file
diff --git a/mysql-test/t/group_commit_binlog_pos_no_optimize_thread.test b/mysql-test/t/group_commit_binlog_pos_no_optimize_thread.test
new file mode 100644
index 00000000000..13f57dd247a
--- /dev/null
+++ b/mysql-test/t/group_commit_binlog_pos_no_optimize_thread.test
@@ -0,0 +1,86 @@
+--source include/have_debug_sync.inc
+--source include/have_innodb.inc
+--source include/have_log_bin.inc
+--source include/have_binlog_format_mixed_or_statement.inc
+
+# Need DBUG to crash the server intentionally
+--source include/have_debug.inc
+# Don't test this under valgrind, memory leaks will occur as we crash
+--source include/not_valgrind.inc
+
+# XtraDB stores the binlog position corresponding to the last commit, and
+# prints it during crash recovery.
+# Test that we get the correct position when we group commit several
+# transactions together.
+
+CREATE TABLE t1 (a INT PRIMARY KEY) ENGINE=innodb;
+INSERT INTO t1 VALUES (0);
+
+connect(con1,localhost,root,,);
+connect(con2,localhost,root,,);
+connect(con3,localhost,root,,);
+
+# Queue up three commits for group commit.
+
+connection con1;
+SET DEBUG_SYNC= "commit_after_get_LOCK_log SIGNAL con1_waiting WAIT_FOR con3_queued";
+SET DEBUG_SYNC= "commit_loop_entry_commit_ordered SIGNAL con1_loop WAIT_FOR con1_loop_cont";
+send INSERT INTO t1 VALUES (1);
+
+connection con2;
+SET DEBUG_SYNC= "now WAIT_FOR con1_waiting";
+SET DEBUG_SYNC= "commit_after_prepare_ordered SIGNAL con2_queued";
+SET DEBUG_SYNC= "commit_loop_entry_commit_ordered SIGNAL con1_loop WAIT_FOR con1_loop_cont";
+send INSERT INTO t1 VALUES (2);
+
+connection con3;
+SET DEBUG_SYNC= "now WAIT_FOR con2_queued";
+SET DEBUG_SYNC= "commit_after_prepare_ordered SIGNAL con3_queued";
+SET DEBUG_SYNC= "commit_loop_entry_commit_ordered SIGNAL con1_loop WAIT_FOR con1_loop_cont";
+send INSERT INTO t1 VALUES (3);
+
+connection default;
+SET DEBUG_SYNC= "now WAIT_FOR con1_loop";
+# At this point, no transactions are committed.
+SET DEBUG_SYNC= "now SIGNAL con1_loop_cont";
+SET DEBUG_SYNC= "now WAIT_FOR con1_loop";
+# At this point, 1 transaction is committed.
+SET DEBUG_SYNC= "now SIGNAL con1_loop_cont";
+SET DEBUG_SYNC= "now WAIT_FOR con1_loop";
+
+# At this point, 2 transactions are committed.
+SELECT * FROM t1 ORDER BY a;
+
+connection con1;
+reap;
+connection con2;
+reap;
+
+# Now crash the server with 1+2 in-memory committed, 3 only prepared.
+connection default;
+system echo wait-group_commit_binlog_pos_no_optimize_thread.test >> $MYSQLTEST_VARDIR/tmp/mysqld.1.expect;
+SET SESSION debug="+d,crash_dispatch_command_before";
+--error 2006,2013
+SELECT 1;
+
+connection con3;
+--error 2006,2013
+reap;
+
+system echo restart-group_commit_binlog_pos_no_optimize_thread.test >> $MYSQLTEST_VARDIR/tmp/mysqld.1.expect;
+
+connection default;
+--enable_reconnect
+--source include/wait_until_connected_again.inc
+
+# Crash recovery should recover all three transactions.
+SELECT * FROM t1 ORDER BY a;
+
+# Check that the binlog position reported by InnoDB is the correct one
+# for the end of the second transaction (as can be checked with
+# mysqlbinlog).
+let $MYSQLD_DATADIR= `SELECT @@datadir`;
+--exec grep 'InnoDB: Last MySQL binlog file position' $MYSQLD_DATADIR/../../log/mysqld.1.err | tail -1
+
+SET DEBUG_SYNC= 'RESET';
+DROP TABLE t1;
diff --git a/mysql-test/t/group_commit_crash_no_optimize_thread-master.opt b/mysql-test/t/group_commit_crash_no_optimize_thread-master.opt
new file mode 100644
index 00000000000..18d43988ffd
--- /dev/null
+++ b/mysql-test/t/group_commit_crash_no_optimize_thread-master.opt
@@ -0,0 +1 @@
+--binlog-optimize-thread-scheduling=0 --skip-stack-trace --skip-core-file
diff --git a/mysql-test/t/group_commit_crash_no_optimize_thread.test b/mysql-test/t/group_commit_crash_no_optimize_thread.test
new file mode 100644
index 00000000000..273cd6230eb
--- /dev/null
+++ b/mysql-test/t/group_commit_crash_no_optimize_thread.test
@@ -0,0 +1,80 @@
+# Testing group commit by crashing a few times.
+# Test adapted from the Facebook patch: lp:mysqlatfacebook
+--source include/not_embedded.inc
+# Don't test this under valgrind, memory leaks will occur
+--source include/not_valgrind.inc
+
+# Binary must be compiled with debug for crash to occur
+--source include/have_debug.inc
+--source include/have_innodb.inc
+--source include/have_log_bin.inc
+
+let $file_format_check=`SELECT @@innodb_file_format_check`;
+CREATE TABLE t1(a CHAR(255),
+ b CHAR(255),
+ c CHAR(255),
+ d CHAR(255),
+ id INT AUTO_INCREMENT,
+ PRIMARY KEY(id)) ENGINE=InnoDB;
+create table t2 like t1;
+delimiter //;
+create procedure setcrash(IN i INT)
+begin
+ CASE i
+ WHEN 1 THEN SET SESSION debug="d,crash_commit_after_prepare";
+ WHEN 2 THEN SET SESSION debug="d,crash_commit_after_log";
+ WHEN 3 THEN SET SESSION debug="d,crash_commit_before_unlog";
+ WHEN 4 THEN SET SESSION debug="d,crash_commit_after";
+ WHEN 5 THEN SET SESSION debug="d,crash_commit_before";
+ ELSE BEGIN END;
+ END CASE;
+end //
+delimiter ;//
+# Avoid getting a crashed mysql.proc table.
+FLUSH TABLES;
+
+let $numtests = 5;
+
+let $numinserts = 10;
+while ($numinserts)
+{
+ dec $numinserts;
+ INSERT INTO t2(a, b, c, d) VALUES ('a', 'b', 'c', 'd');
+}
+
+--enable_reconnect
+
+while ($numtests)
+{
+ RESET MASTER;
+
+ START TRANSACTION;
+ insert into t1 select * from t2;
+ # Write file to make mysql-test-run.pl expect crash
+ --exec echo "restart" > $MYSQLTEST_VARDIR/tmp/mysqld.1.expect
+
+ eval call setcrash($numtests);
+
+ # Run the crashing query
+ --error 2006,2013
+ COMMIT;
+
+ # Poll the server waiting for it to be back online again.
+ --source include/wait_until_connected_again.inc
+
+ # table and binlog should be in sync.
+ SELECT * FROM t1 ORDER BY id;
+ SHOW BINLOG EVENTS LIMIT 2,1;
+
+ delete from t1;
+
+ dec $numtests;
+}
+
+# final cleanup
+DROP TABLE t1;
+DROP TABLE t2;
+DROP PROCEDURE setcrash;
+--disable_query_log
+eval SET GLOBAL innodb_file_format_check=$file_format_check;
+--enable_query_log
diff --git a/mysql-test/t/group_commit_no_optimize_thread-master.opt b/mysql-test/t/group_commit_no_optimize_thread-master.opt
new file mode 100644
index 00000000000..97d8c106816
--- /dev/null
+++ b/mysql-test/t/group_commit_no_optimize_thread-master.opt
@@ -0,0 +1 @@
+--binlog-optimize-thread-scheduling=0
diff --git a/mysql-test/t/group_commit_no_optimize_thread.test b/mysql-test/t/group_commit_no_optimize_thread.test
new file mode 100644
index 00000000000..38dab8aa37c
--- /dev/null
+++ b/mysql-test/t/group_commit_no_optimize_thread.test
@@ -0,0 +1,115 @@
+--source include/have_debug_sync.inc
+--source include/have_innodb.inc
+--source include/have_log_bin.inc
+
+# Test some group commit code paths by using debug_sync to do controlled
+# commits of 6 transactions: first 1 alone, then 3 as a group, then 2 as a
+# group.
+#
+# Group 3 is allowed to race as far as possible ahead before group 2 finishes
+# to check some edge case for concurrency control.
+
+CREATE TABLE t1 (a VARCHAR(10) PRIMARY KEY) ENGINE=innodb;
+
+SELECT variable_value INTO @commits FROM information_schema.global_status
+ WHERE variable_name = 'binlog_commits';
+SELECT variable_value INTO @group_commits FROM information_schema.global_status
+ WHERE variable_name = 'binlog_group_commits';
+
+connect(con1,localhost,root,,);
+connect(con2,localhost,root,,);
+connect(con3,localhost,root,,);
+connect(con4,localhost,root,,);
+connect(con5,localhost,root,,);
+connect(con6,localhost,root,,);
+
+# Start group1 (with one thread) doing commit, waiting for
+# group2 to queue up before finishing.
+
+connection con1;
+SET DEBUG_SYNC= "commit_before_get_LOCK_commit_ordered SIGNAL group1_running WAIT_FOR group2_queued";
+send INSERT INTO t1 VALUES ("con1");
+
+# Make group2 (with three threads) queue up.
+# Make sure con2 is the group commit leader for group2.
+# Make group2 wait with running commit_ordered() until group3 has committed.
+
+connection con2;
+set DEBUG_SYNC= "now WAIT_FOR group1_running";
+SET DEBUG_SYNC= "commit_after_prepare_ordered SIGNAL group2_con2";
+SET DEBUG_SYNC= "commit_after_release_LOCK_log WAIT_FOR group3_committed";
+send INSERT INTO t1 VALUES ("con2");
+connection con3;
+SET DEBUG_SYNC= "now WAIT_FOR group2_con2";
+SET DEBUG_SYNC= "commit_after_prepare_ordered SIGNAL group2_con3";
+send INSERT INTO t1 VALUES ("con3");
+connection con4;
+SET DEBUG_SYNC= "now WAIT_FOR group2_con3";
+SET DEBUG_SYNC= "commit_after_prepare_ordered SIGNAL group2_con4";
+SET DEBUG_SYNC= "commit_after_group_run_commit_ordered SIGNAL group2_visible WAIT_FOR group2_checked";
+send INSERT INTO t1 VALUES ("con4");
+
+# When group2 is queued, let group1 continue and queue group3.
+
+connection default;
+SET DEBUG_SYNC= "now WAIT_FOR group2_con4";
+
+# At this point, trasaction 1 is still not visible as commit_ordered() has not
+# been called yet.
+SET SESSION TRANSACTION ISOLATION LEVEL READ COMMITTED;
+SELECT * FROM t1 ORDER BY a;
+
+SET DEBUG_SYNC= "now SIGNAL group2_queued";
+connection con1;
+reap;
+
+# Now transaction 1 is visible.
+connection default;
+SELECT * FROM t1 ORDER BY a;
+
+connection con5;
+SET DEBUG_SYNC= "commit_before_get_LOCK_commit_ordered SIGNAL group3_con5";
+SET DEBUG_SYNC= "commit_after_get_LOCK_log SIGNAL con5_leader WAIT_FOR con6_queued";
+send INSERT INTO t1 VALUES ("con5");
+
+connection con6;
+SET DEBUG_SYNC= "now WAIT_FOR con5_leader";
+SET DEBUG_SYNC= "commit_after_prepare_ordered SIGNAL con6_queued";
+send INSERT INTO t1 VALUES ("con6");
+
+connection default;
+SET DEBUG_SYNC= "now WAIT_FOR group3_con5";
+# Still only transaction 1 visible, as group2 have not yet run commit_ordered().
+SELECT * FROM t1 ORDER BY a;
+SET DEBUG_SYNC= "now SIGNAL group3_committed";
+SET DEBUG_SYNC= "now WAIT_FOR group2_visible";
+# Now transactions 1-4 visible.
+SELECT * FROM t1 ORDER BY a;
+SET DEBUG_SYNC= "now SIGNAL group2_checked";
+
+connection con2;
+reap;
+
+connection con3;
+reap;
+
+connection con4;
+reap;
+
+connection con5;
+reap;
+
+connection con6;
+reap;
+
+connection default;
+# Check all transactions finally visible.
+SELECT * FROM t1 ORDER BY a;
+
+SELECT variable_value - @commits FROM information_schema.global_status
+ WHERE variable_name = 'binlog_commits';
+SELECT variable_value - @group_commits FROM information_schema.global_status
+ WHERE variable_name = 'binlog_group_commits';
+
+SET DEBUG_SYNC= 'RESET';
+DROP TABLE t1;
diff --git a/sql/log.cc b/sql/log.cc
index d18ded1f24e..c1e13452733 100644
--- a/sql/log.cc
+++ b/sql/log.cc
@@ -2510,7 +2510,8 @@ const char *MYSQL_LOG::generate_name(const char *log_name,
MYSQL_BIN_LOG::MYSQL_BIN_LOG()
:bytes_written(0), prepared_xids(0), file_id(1), open_count(1),
need_start_event(TRUE),
- group_commit_queue(0), num_commits(0), num_group_commits(0),
+ group_commit_queue(0), group_commit_queue_busy(FALSE),
+ num_commits(0), num_group_commits(0),
is_relay_log(0),
description_event_for_exec(0), description_event_for_queue(0)
{
@@ -2567,6 +2568,7 @@ void MYSQL_BIN_LOG::init_pthread_objects()
(void) my_pthread_mutex_init(&LOCK_index, MY_MUTEX_INIT_SLOW, "LOCK_index",
MYF_NO_DEADLOCK_DETECTION);
(void) pthread_cond_init(&update_cond, 0);
+ (void) pthread_cond_init(&COND_queue_busy, 0);
}
@@ -3989,6 +3991,7 @@ err:
}
+static my_bool opt_optimize_thread_scheduling= TRUE;
#ifndef DBUG_OFF
static ulong opt_binlog_dbug_fsync_sleep= 0;
#endif
@@ -4925,6 +4928,32 @@ MYSQL_BIN_LOG::write_transaction_to_binlog_events(group_commit_entry *entry)
else
trx_group_commit_leader(entry);
+ if (!opt_optimize_thread_scheduling)
+ {
+ /* For the leader, trx_group_commit_leader() already took the lock. */
+ if (orig_queue != NULL)
+ pthread_mutex_lock(&LOCK_commit_ordered);
+
+ DEBUG_SYNC(entry->thd, "commit_loop_entry_commit_ordered");
+ ++num_commits;
+ if (entry->trx_data->using_xa && !entry->error)
+ run_commit_ordered(entry->thd, entry->all);
+
+ group_commit_entry *next= entry->next;
+ if (!next)
+ {
+ group_commit_queue_busy= FALSE;
+ pthread_cond_signal(&COND_queue_busy);
+ DEBUG_SYNC(entry->thd, "commit_after_group_run_commit_ordered");
+ }
+ pthread_mutex_unlock(&LOCK_commit_ordered);
+
+ if (next)
+ {
+ next->thd->signal_wakeup_ready();
+ }
+ }
+
if (!entry->error)
return 0;
@@ -5088,6 +5117,24 @@ MYSQL_BIN_LOG::trx_group_commit_leader(group_commit_entry *leader)
DEBUG_SYNC(leader->thd, "commit_after_release_LOCK_log");
++num_group_commits;
+ if (!opt_optimize_thread_scheduling)
+ {
+ /*
+ If we want to run commit_ordered() each in the transaction's own thread
+ context, then we need to mark the queue reserved; we need to finish all
+ threads in one group commit before the next group commit can be allowed
+ to proceed, and we cannot unlock a simple pthreads mutex in a different
+ thread from the one that locked it.
+ */
+
+ while (group_commit_queue_busy)
+ pthread_cond_wait(&COND_queue_busy, &LOCK_commit_ordered);
+ group_commit_queue_busy= TRUE;
+
+ /* Note that we return with LOCK_commit_ordered locked! */
+ DBUG_VOID_RETURN;
+ }
+
/*
Wakeup each participant waiting for our group commit, first calling the
commit_ordered() methods for any transactions doing 2-phase commit.
@@ -6578,6 +6625,16 @@ static SHOW_VAR binlog_status_vars_top[]= {
{NullS, NullS, SHOW_LONG}
};
+static MYSQL_SYSVAR_BOOL(
+ optimize_thread_scheduling,
+ opt_optimize_thread_scheduling,
+ PLUGIN_VAR_READONLY,
+ "Run fast part of group commit in a single thread, to optimize kernel\n"
+ "thread scheduling",
+ NULL,
+ NULL,
+ 1);
+
#ifndef DBUG_OFF
static MYSQL_SYSVAR_ULONG(
dbug_fsync_sleep,
@@ -6590,13 +6647,16 @@ static MYSQL_SYSVAR_ULONG(
0,
ULONG_MAX,
0);
+#endif
static struct st_mysql_sys_var *binlog_sys_vars[]=
{
+ MYSQL_SYSVAR(optimize_thread_scheduling),
+#ifndef DBUG_OFF
MYSQL_SYSVAR(dbug_fsync_sleep),
+#endif
NULL
};
-#endif
/*
@@ -6634,11 +6694,7 @@ mysql_declare_plugin(binlog)
NULL, /* Plugin Deinit */
0x0100 /* 1.0 */,
binlog_status_vars_top, /* status variables */
-#ifndef DBUG_OFF
binlog_sys_vars, /* system variables */
-#else
- NULL, /* system variables */
-#endif
NULL /* config options */
}
mysql_declare_plugin_end;
diff --git a/sql/log.h b/sql/log.h
index fcc9d5a711b..492a735c0ae 100644
--- a/sql/log.h
+++ b/sql/log.h
@@ -370,6 +370,14 @@ class MYSQL_BIN_LOG: public TC_LOG, private MYSQL_LOG
bool no_auto_events;
/* Queue of transactions queued up to participate in group commit. */
group_commit_entry *group_commit_queue;
+ /*
+ Condition variable to mark that the group commit queue is busy.
+ Used when each thread does it's own commit_ordered() (when
+ binlog_optimize_thread_scheduling=1).
+ Used with the LOCK_commit_ordered mutex.
+ */
+ my_bool group_commit_queue_busy;
+ pthread_cond_t COND_queue_busy;
/* Total number of committed transactions. */
ulonglong num_commits;
/* Number of group commits done. */