diff options
author | unknown <knielsen@knielsen-hq.org> | 2011-03-23 15:29:20 +0100 |
---|---|---|
committer | unknown <knielsen@knielsen-hq.org> | 2011-03-23 15:29:20 +0100 |
commit | ca5ca4b968297ac4dc5d0850752306b92570b6a1 (patch) | |
tree | 56115abfda488804535ac91643347be6f1e1396d | |
parent | a2d921be3634ceff4ab4c67f57b27a481d4a28df (diff) | |
download | mariadb-git-ca5ca4b968297ac4dc5d0850752306b92570b6a1.tar.gz |
MWL#116: group commit
Implement binlog_optimize_thread_scheduling option to allow benchmarking the
effect of running commit_ordered() for multiple transactions all in one
thread.
-rw-r--r-- | mysql-test/r/group_commit_binlog_pos_no_optimize_thread.result | 36 | ||||
-rw-r--r-- | mysql-test/r/group_commit_crash_no_optimize_thread.result | 120 | ||||
-rw-r--r-- | mysql-test/r/group_commit_no_optimize_thread.result | 63 | ||||
-rw-r--r-- | mysql-test/t/group_commit_binlog_pos_no_optimize_thread-master.opt | 1 | ||||
-rw-r--r-- | mysql-test/t/group_commit_binlog_pos_no_optimize_thread.test | 86 | ||||
-rw-r--r-- | mysql-test/t/group_commit_crash_no_optimize_thread-master.opt | 1 | ||||
-rw-r--r-- | mysql-test/t/group_commit_crash_no_optimize_thread.test | 80 | ||||
-rw-r--r-- | mysql-test/t/group_commit_no_optimize_thread-master.opt | 1 | ||||
-rw-r--r-- | mysql-test/t/group_commit_no_optimize_thread.test | 115 | ||||
-rw-r--r-- | sql/log.cc | 68 | ||||
-rw-r--r-- | sql/log.h | 8 |
11 files changed, 573 insertions, 6 deletions
diff --git a/mysql-test/r/group_commit_binlog_pos_no_optimize_thread.result b/mysql-test/r/group_commit_binlog_pos_no_optimize_thread.result new file mode 100644 index 00000000000..02bbfd36143 --- /dev/null +++ b/mysql-test/r/group_commit_binlog_pos_no_optimize_thread.result @@ -0,0 +1,36 @@ +CREATE TABLE t1 (a INT PRIMARY KEY) ENGINE=innodb; +INSERT INTO t1 VALUES (0); +SET DEBUG_SYNC= "commit_after_get_LOCK_log SIGNAL con1_waiting WAIT_FOR con3_queued"; +SET DEBUG_SYNC= "commit_loop_entry_commit_ordered SIGNAL con1_loop WAIT_FOR con1_loop_cont"; +INSERT INTO t1 VALUES (1); +SET DEBUG_SYNC= "now WAIT_FOR con1_waiting"; +SET DEBUG_SYNC= "commit_after_prepare_ordered SIGNAL con2_queued"; +SET DEBUG_SYNC= "commit_loop_entry_commit_ordered SIGNAL con1_loop WAIT_FOR con1_loop_cont"; +INSERT INTO t1 VALUES (2); +SET DEBUG_SYNC= "now WAIT_FOR con2_queued"; +SET DEBUG_SYNC= "commit_after_prepare_ordered SIGNAL con3_queued"; +SET DEBUG_SYNC= "commit_loop_entry_commit_ordered SIGNAL con1_loop WAIT_FOR con1_loop_cont"; +INSERT INTO t1 VALUES (3); +SET DEBUG_SYNC= "now WAIT_FOR con1_loop"; +SET DEBUG_SYNC= "now SIGNAL con1_loop_cont"; +SET DEBUG_SYNC= "now WAIT_FOR con1_loop"; +SET DEBUG_SYNC= "now SIGNAL con1_loop_cont"; +SET DEBUG_SYNC= "now WAIT_FOR con1_loop"; +SELECT * FROM t1 ORDER BY a; +a +0 +1 +2 +SET SESSION debug="+d,crash_dispatch_command_before"; +SELECT 1; +Got one of the listed errors +Got one of the listed errors +SELECT * FROM t1 ORDER BY a; +a +0 +1 +2 +3 +InnoDB: Last MySQL binlog file position 0 767, file name ./master-bin.000001 +SET DEBUG_SYNC= 'RESET'; +DROP TABLE t1; diff --git a/mysql-test/r/group_commit_crash_no_optimize_thread.result b/mysql-test/r/group_commit_crash_no_optimize_thread.result new file mode 100644 index 00000000000..044161695e3 --- /dev/null +++ b/mysql-test/r/group_commit_crash_no_optimize_thread.result @@ -0,0 +1,120 @@ +CREATE TABLE t1(a CHAR(255), +b CHAR(255), +c CHAR(255), +d CHAR(255), +id INT AUTO_INCREMENT, +PRIMARY KEY(id)) ENGINE=InnoDB; +create table t2 like t1; +create procedure setcrash(IN i INT) +begin +CASE i +WHEN 1 THEN SET SESSION debug="d,crash_commit_after_prepare"; +WHEN 2 THEN SET SESSION debug="d,crash_commit_after_log"; +WHEN 3 THEN SET SESSION debug="d,crash_commit_before_unlog"; +WHEN 4 THEN SET SESSION debug="d,crash_commit_after"; +WHEN 5 THEN SET SESSION debug="d,crash_commit_before"; +ELSE BEGIN END; +END CASE; +end // +FLUSH TABLES; +INSERT INTO t2(a, b, c, d) VALUES ('a', 'b', 'c', 'd'); +INSERT INTO t2(a, b, c, d) VALUES ('a', 'b', 'c', 'd'); +INSERT INTO t2(a, b, c, d) VALUES ('a', 'b', 'c', 'd'); +INSERT INTO t2(a, b, c, d) VALUES ('a', 'b', 'c', 'd'); +INSERT INTO t2(a, b, c, d) VALUES ('a', 'b', 'c', 'd'); +INSERT INTO t2(a, b, c, d) VALUES ('a', 'b', 'c', 'd'); +INSERT INTO t2(a, b, c, d) VALUES ('a', 'b', 'c', 'd'); +INSERT INTO t2(a, b, c, d) VALUES ('a', 'b', 'c', 'd'); +INSERT INTO t2(a, b, c, d) VALUES ('a', 'b', 'c', 'd'); +INSERT INTO t2(a, b, c, d) VALUES ('a', 'b', 'c', 'd'); +RESET MASTER; +START TRANSACTION; +insert into t1 select * from t2; +call setcrash(5); +COMMIT; +Got one of the listed errors +SELECT * FROM t1 ORDER BY id; +a b c d id +SHOW BINLOG EVENTS LIMIT 2,1; +Log_name Pos Event_type Server_id End_log_pos Info +delete from t1; +RESET MASTER; +START TRANSACTION; +insert into t1 select * from t2; +call setcrash(4); +COMMIT; +Got one of the listed errors +SELECT * FROM t1 ORDER BY id; +a b c d id +a b c d 1 +a b c d 2 +a b c d 3 +a b c d 4 +a b c d 5 +a b c d 6 +a b c d 7 +a b c d 8 +a b c d 9 +a b c d 10 +SHOW BINLOG EVENTS LIMIT 2,1; +Log_name Pos Event_type Server_id End_log_pos Info +master-bin.000001 174 Query 1 268 use `test`; insert into t1 select * from t2 +delete from t1; +RESET MASTER; +START TRANSACTION; +insert into t1 select * from t2; +call setcrash(3); +COMMIT; +Got one of the listed errors +SELECT * FROM t1 ORDER BY id; +a b c d id +a b c d 1 +a b c d 2 +a b c d 3 +a b c d 4 +a b c d 5 +a b c d 6 +a b c d 7 +a b c d 8 +a b c d 9 +a b c d 10 +SHOW BINLOG EVENTS LIMIT 2,1; +Log_name Pos Event_type Server_id End_log_pos Info +master-bin.000001 174 Query 1 268 use `test`; insert into t1 select * from t2 +delete from t1; +RESET MASTER; +START TRANSACTION; +insert into t1 select * from t2; +call setcrash(2); +COMMIT; +Got one of the listed errors +SELECT * FROM t1 ORDER BY id; +a b c d id +a b c d 1 +a b c d 2 +a b c d 3 +a b c d 4 +a b c d 5 +a b c d 6 +a b c d 7 +a b c d 8 +a b c d 9 +a b c d 10 +SHOW BINLOG EVENTS LIMIT 2,1; +Log_name Pos Event_type Server_id End_log_pos Info +master-bin.000001 174 Query 1 268 use `test`; insert into t1 select * from t2 +delete from t1; +RESET MASTER; +START TRANSACTION; +insert into t1 select * from t2; +call setcrash(1); +COMMIT; +Got one of the listed errors +SELECT * FROM t1 ORDER BY id; +a b c d id +SHOW BINLOG EVENTS LIMIT 2,1; +Log_name Pos Event_type Server_id End_log_pos Info +delete from t1; +DROP TABLE t1; +DROP TABLE t2; +DROP PROCEDURE setcrash; diff --git a/mysql-test/r/group_commit_no_optimize_thread.result b/mysql-test/r/group_commit_no_optimize_thread.result new file mode 100644 index 00000000000..7c2636fb472 --- /dev/null +++ b/mysql-test/r/group_commit_no_optimize_thread.result @@ -0,0 +1,63 @@ +CREATE TABLE t1 (a VARCHAR(10) PRIMARY KEY) ENGINE=innodb; +SELECT variable_value INTO @commits FROM information_schema.global_status +WHERE variable_name = 'binlog_commits'; +SELECT variable_value INTO @group_commits FROM information_schema.global_status +WHERE variable_name = 'binlog_group_commits'; +SET DEBUG_SYNC= "commit_before_get_LOCK_commit_ordered SIGNAL group1_running WAIT_FOR group2_queued"; +INSERT INTO t1 VALUES ("con1"); +set DEBUG_SYNC= "now WAIT_FOR group1_running"; +SET DEBUG_SYNC= "commit_after_prepare_ordered SIGNAL group2_con2"; +SET DEBUG_SYNC= "commit_after_release_LOCK_log WAIT_FOR group3_committed"; +INSERT INTO t1 VALUES ("con2"); +SET DEBUG_SYNC= "now WAIT_FOR group2_con2"; +SET DEBUG_SYNC= "commit_after_prepare_ordered SIGNAL group2_con3"; +INSERT INTO t1 VALUES ("con3"); +SET DEBUG_SYNC= "now WAIT_FOR group2_con3"; +SET DEBUG_SYNC= "commit_after_prepare_ordered SIGNAL group2_con4"; +SET DEBUG_SYNC= "commit_after_group_run_commit_ordered SIGNAL group2_visible WAIT_FOR group2_checked"; +INSERT INTO t1 VALUES ("con4"); +SET DEBUG_SYNC= "now WAIT_FOR group2_con4"; +SET SESSION TRANSACTION ISOLATION LEVEL READ COMMITTED; +SELECT * FROM t1 ORDER BY a; +a +SET DEBUG_SYNC= "now SIGNAL group2_queued"; +SELECT * FROM t1 ORDER BY a; +a +con1 +SET DEBUG_SYNC= "commit_before_get_LOCK_commit_ordered SIGNAL group3_con5"; +SET DEBUG_SYNC= "commit_after_get_LOCK_log SIGNAL con5_leader WAIT_FOR con6_queued"; +INSERT INTO t1 VALUES ("con5"); +SET DEBUG_SYNC= "now WAIT_FOR con5_leader"; +SET DEBUG_SYNC= "commit_after_prepare_ordered SIGNAL con6_queued"; +INSERT INTO t1 VALUES ("con6"); +SET DEBUG_SYNC= "now WAIT_FOR group3_con5"; +SELECT * FROM t1 ORDER BY a; +a +con1 +SET DEBUG_SYNC= "now SIGNAL group3_committed"; +SET DEBUG_SYNC= "now WAIT_FOR group2_visible"; +SELECT * FROM t1 ORDER BY a; +a +con1 +con2 +con3 +con4 +SET DEBUG_SYNC= "now SIGNAL group2_checked"; +SELECT * FROM t1 ORDER BY a; +a +con1 +con2 +con3 +con4 +con5 +con6 +SELECT variable_value - @commits FROM information_schema.global_status +WHERE variable_name = 'binlog_commits'; +variable_value - @commits +6 +SELECT variable_value - @group_commits FROM information_schema.global_status +WHERE variable_name = 'binlog_group_commits'; +variable_value - @group_commits +3 +SET DEBUG_SYNC= 'RESET'; +DROP TABLE t1; diff --git a/mysql-test/t/group_commit_binlog_pos_no_optimize_thread-master.opt b/mysql-test/t/group_commit_binlog_pos_no_optimize_thread-master.opt new file mode 100644 index 00000000000..18d43988ffd --- /dev/null +++ b/mysql-test/t/group_commit_binlog_pos_no_optimize_thread-master.opt @@ -0,0 +1 @@ +--binlog-optimize-thread-scheduling=0 --skip-stack-trace --skip-core-file diff --git a/mysql-test/t/group_commit_binlog_pos_no_optimize_thread.test b/mysql-test/t/group_commit_binlog_pos_no_optimize_thread.test new file mode 100644 index 00000000000..13f57dd247a --- /dev/null +++ b/mysql-test/t/group_commit_binlog_pos_no_optimize_thread.test @@ -0,0 +1,86 @@ +--source include/have_debug_sync.inc +--source include/have_innodb.inc +--source include/have_log_bin.inc +--source include/have_binlog_format_mixed_or_statement.inc + +# Need DBUG to crash the server intentionally +--source include/have_debug.inc +# Don't test this under valgrind, memory leaks will occur as we crash +--source include/not_valgrind.inc + +# XtraDB stores the binlog position corresponding to the last commit, and +# prints it during crash recovery. +# Test that we get the correct position when we group commit several +# transactions together. + +CREATE TABLE t1 (a INT PRIMARY KEY) ENGINE=innodb; +INSERT INTO t1 VALUES (0); + +connect(con1,localhost,root,,); +connect(con2,localhost,root,,); +connect(con3,localhost,root,,); + +# Queue up three commits for group commit. + +connection con1; +SET DEBUG_SYNC= "commit_after_get_LOCK_log SIGNAL con1_waiting WAIT_FOR con3_queued"; +SET DEBUG_SYNC= "commit_loop_entry_commit_ordered SIGNAL con1_loop WAIT_FOR con1_loop_cont"; +send INSERT INTO t1 VALUES (1); + +connection con2; +SET DEBUG_SYNC= "now WAIT_FOR con1_waiting"; +SET DEBUG_SYNC= "commit_after_prepare_ordered SIGNAL con2_queued"; +SET DEBUG_SYNC= "commit_loop_entry_commit_ordered SIGNAL con1_loop WAIT_FOR con1_loop_cont"; +send INSERT INTO t1 VALUES (2); + +connection con3; +SET DEBUG_SYNC= "now WAIT_FOR con2_queued"; +SET DEBUG_SYNC= "commit_after_prepare_ordered SIGNAL con3_queued"; +SET DEBUG_SYNC= "commit_loop_entry_commit_ordered SIGNAL con1_loop WAIT_FOR con1_loop_cont"; +send INSERT INTO t1 VALUES (3); + +connection default; +SET DEBUG_SYNC= "now WAIT_FOR con1_loop"; +# At this point, no transactions are committed. +SET DEBUG_SYNC= "now SIGNAL con1_loop_cont"; +SET DEBUG_SYNC= "now WAIT_FOR con1_loop"; +# At this point, 1 transaction is committed. +SET DEBUG_SYNC= "now SIGNAL con1_loop_cont"; +SET DEBUG_SYNC= "now WAIT_FOR con1_loop"; + +# At this point, 2 transactions are committed. +SELECT * FROM t1 ORDER BY a; + +connection con1; +reap; +connection con2; +reap; + +# Now crash the server with 1+2 in-memory committed, 3 only prepared. +connection default; +system echo wait-group_commit_binlog_pos_no_optimize_thread.test >> $MYSQLTEST_VARDIR/tmp/mysqld.1.expect; +SET SESSION debug="+d,crash_dispatch_command_before"; +--error 2006,2013 +SELECT 1; + +connection con3; +--error 2006,2013 +reap; + +system echo restart-group_commit_binlog_pos_no_optimize_thread.test >> $MYSQLTEST_VARDIR/tmp/mysqld.1.expect; + +connection default; +--enable_reconnect +--source include/wait_until_connected_again.inc + +# Crash recovery should recover all three transactions. +SELECT * FROM t1 ORDER BY a; + +# Check that the binlog position reported by InnoDB is the correct one +# for the end of the second transaction (as can be checked with +# mysqlbinlog). +let $MYSQLD_DATADIR= `SELECT @@datadir`; +--exec grep 'InnoDB: Last MySQL binlog file position' $MYSQLD_DATADIR/../../log/mysqld.1.err | tail -1 + +SET DEBUG_SYNC= 'RESET'; +DROP TABLE t1; diff --git a/mysql-test/t/group_commit_crash_no_optimize_thread-master.opt b/mysql-test/t/group_commit_crash_no_optimize_thread-master.opt new file mode 100644 index 00000000000..18d43988ffd --- /dev/null +++ b/mysql-test/t/group_commit_crash_no_optimize_thread-master.opt @@ -0,0 +1 @@ +--binlog-optimize-thread-scheduling=0 --skip-stack-trace --skip-core-file diff --git a/mysql-test/t/group_commit_crash_no_optimize_thread.test b/mysql-test/t/group_commit_crash_no_optimize_thread.test new file mode 100644 index 00000000000..273cd6230eb --- /dev/null +++ b/mysql-test/t/group_commit_crash_no_optimize_thread.test @@ -0,0 +1,80 @@ +# Testing group commit by crashing a few times. +# Test adapted from the Facebook patch: lp:mysqlatfacebook +--source include/not_embedded.inc +# Don't test this under valgrind, memory leaks will occur +--source include/not_valgrind.inc + +# Binary must be compiled with debug for crash to occur +--source include/have_debug.inc +--source include/have_innodb.inc +--source include/have_log_bin.inc + +let $file_format_check=`SELECT @@innodb_file_format_check`; +CREATE TABLE t1(a CHAR(255), + b CHAR(255), + c CHAR(255), + d CHAR(255), + id INT AUTO_INCREMENT, + PRIMARY KEY(id)) ENGINE=InnoDB; +create table t2 like t1; +delimiter //; +create procedure setcrash(IN i INT) +begin + CASE i + WHEN 1 THEN SET SESSION debug="d,crash_commit_after_prepare"; + WHEN 2 THEN SET SESSION debug="d,crash_commit_after_log"; + WHEN 3 THEN SET SESSION debug="d,crash_commit_before_unlog"; + WHEN 4 THEN SET SESSION debug="d,crash_commit_after"; + WHEN 5 THEN SET SESSION debug="d,crash_commit_before"; + ELSE BEGIN END; + END CASE; +end // +delimiter ;// +# Avoid getting a crashed mysql.proc table. +FLUSH TABLES; + +let $numtests = 5; + +let $numinserts = 10; +while ($numinserts) +{ + dec $numinserts; + INSERT INTO t2(a, b, c, d) VALUES ('a', 'b', 'c', 'd'); +} + +--enable_reconnect + +while ($numtests) +{ + RESET MASTER; + + START TRANSACTION; + insert into t1 select * from t2; + # Write file to make mysql-test-run.pl expect crash + --exec echo "restart" > $MYSQLTEST_VARDIR/tmp/mysqld.1.expect + + eval call setcrash($numtests); + + # Run the crashing query + --error 2006,2013 + COMMIT; + + # Poll the server waiting for it to be back online again. + --source include/wait_until_connected_again.inc + + # table and binlog should be in sync. + SELECT * FROM t1 ORDER BY id; + SHOW BINLOG EVENTS LIMIT 2,1; + + delete from t1; + + dec $numtests; +} + +# final cleanup +DROP TABLE t1; +DROP TABLE t2; +DROP PROCEDURE setcrash; +--disable_query_log +eval SET GLOBAL innodb_file_format_check=$file_format_check; +--enable_query_log diff --git a/mysql-test/t/group_commit_no_optimize_thread-master.opt b/mysql-test/t/group_commit_no_optimize_thread-master.opt new file mode 100644 index 00000000000..97d8c106816 --- /dev/null +++ b/mysql-test/t/group_commit_no_optimize_thread-master.opt @@ -0,0 +1 @@ +--binlog-optimize-thread-scheduling=0 diff --git a/mysql-test/t/group_commit_no_optimize_thread.test b/mysql-test/t/group_commit_no_optimize_thread.test new file mode 100644 index 00000000000..38dab8aa37c --- /dev/null +++ b/mysql-test/t/group_commit_no_optimize_thread.test @@ -0,0 +1,115 @@ +--source include/have_debug_sync.inc +--source include/have_innodb.inc +--source include/have_log_bin.inc + +# Test some group commit code paths by using debug_sync to do controlled +# commits of 6 transactions: first 1 alone, then 3 as a group, then 2 as a +# group. +# +# Group 3 is allowed to race as far as possible ahead before group 2 finishes +# to check some edge case for concurrency control. + +CREATE TABLE t1 (a VARCHAR(10) PRIMARY KEY) ENGINE=innodb; + +SELECT variable_value INTO @commits FROM information_schema.global_status + WHERE variable_name = 'binlog_commits'; +SELECT variable_value INTO @group_commits FROM information_schema.global_status + WHERE variable_name = 'binlog_group_commits'; + +connect(con1,localhost,root,,); +connect(con2,localhost,root,,); +connect(con3,localhost,root,,); +connect(con4,localhost,root,,); +connect(con5,localhost,root,,); +connect(con6,localhost,root,,); + +# Start group1 (with one thread) doing commit, waiting for +# group2 to queue up before finishing. + +connection con1; +SET DEBUG_SYNC= "commit_before_get_LOCK_commit_ordered SIGNAL group1_running WAIT_FOR group2_queued"; +send INSERT INTO t1 VALUES ("con1"); + +# Make group2 (with three threads) queue up. +# Make sure con2 is the group commit leader for group2. +# Make group2 wait with running commit_ordered() until group3 has committed. + +connection con2; +set DEBUG_SYNC= "now WAIT_FOR group1_running"; +SET DEBUG_SYNC= "commit_after_prepare_ordered SIGNAL group2_con2"; +SET DEBUG_SYNC= "commit_after_release_LOCK_log WAIT_FOR group3_committed"; +send INSERT INTO t1 VALUES ("con2"); +connection con3; +SET DEBUG_SYNC= "now WAIT_FOR group2_con2"; +SET DEBUG_SYNC= "commit_after_prepare_ordered SIGNAL group2_con3"; +send INSERT INTO t1 VALUES ("con3"); +connection con4; +SET DEBUG_SYNC= "now WAIT_FOR group2_con3"; +SET DEBUG_SYNC= "commit_after_prepare_ordered SIGNAL group2_con4"; +SET DEBUG_SYNC= "commit_after_group_run_commit_ordered SIGNAL group2_visible WAIT_FOR group2_checked"; +send INSERT INTO t1 VALUES ("con4"); + +# When group2 is queued, let group1 continue and queue group3. + +connection default; +SET DEBUG_SYNC= "now WAIT_FOR group2_con4"; + +# At this point, trasaction 1 is still not visible as commit_ordered() has not +# been called yet. +SET SESSION TRANSACTION ISOLATION LEVEL READ COMMITTED; +SELECT * FROM t1 ORDER BY a; + +SET DEBUG_SYNC= "now SIGNAL group2_queued"; +connection con1; +reap; + +# Now transaction 1 is visible. +connection default; +SELECT * FROM t1 ORDER BY a; + +connection con5; +SET DEBUG_SYNC= "commit_before_get_LOCK_commit_ordered SIGNAL group3_con5"; +SET DEBUG_SYNC= "commit_after_get_LOCK_log SIGNAL con5_leader WAIT_FOR con6_queued"; +send INSERT INTO t1 VALUES ("con5"); + +connection con6; +SET DEBUG_SYNC= "now WAIT_FOR con5_leader"; +SET DEBUG_SYNC= "commit_after_prepare_ordered SIGNAL con6_queued"; +send INSERT INTO t1 VALUES ("con6"); + +connection default; +SET DEBUG_SYNC= "now WAIT_FOR group3_con5"; +# Still only transaction 1 visible, as group2 have not yet run commit_ordered(). +SELECT * FROM t1 ORDER BY a; +SET DEBUG_SYNC= "now SIGNAL group3_committed"; +SET DEBUG_SYNC= "now WAIT_FOR group2_visible"; +# Now transactions 1-4 visible. +SELECT * FROM t1 ORDER BY a; +SET DEBUG_SYNC= "now SIGNAL group2_checked"; + +connection con2; +reap; + +connection con3; +reap; + +connection con4; +reap; + +connection con5; +reap; + +connection con6; +reap; + +connection default; +# Check all transactions finally visible. +SELECT * FROM t1 ORDER BY a; + +SELECT variable_value - @commits FROM information_schema.global_status + WHERE variable_name = 'binlog_commits'; +SELECT variable_value - @group_commits FROM information_schema.global_status + WHERE variable_name = 'binlog_group_commits'; + +SET DEBUG_SYNC= 'RESET'; +DROP TABLE t1; diff --git a/sql/log.cc b/sql/log.cc index d18ded1f24e..c1e13452733 100644 --- a/sql/log.cc +++ b/sql/log.cc @@ -2510,7 +2510,8 @@ const char *MYSQL_LOG::generate_name(const char *log_name, MYSQL_BIN_LOG::MYSQL_BIN_LOG() :bytes_written(0), prepared_xids(0), file_id(1), open_count(1), need_start_event(TRUE), - group_commit_queue(0), num_commits(0), num_group_commits(0), + group_commit_queue(0), group_commit_queue_busy(FALSE), + num_commits(0), num_group_commits(0), is_relay_log(0), description_event_for_exec(0), description_event_for_queue(0) { @@ -2567,6 +2568,7 @@ void MYSQL_BIN_LOG::init_pthread_objects() (void) my_pthread_mutex_init(&LOCK_index, MY_MUTEX_INIT_SLOW, "LOCK_index", MYF_NO_DEADLOCK_DETECTION); (void) pthread_cond_init(&update_cond, 0); + (void) pthread_cond_init(&COND_queue_busy, 0); } @@ -3989,6 +3991,7 @@ err: } +static my_bool opt_optimize_thread_scheduling= TRUE; #ifndef DBUG_OFF static ulong opt_binlog_dbug_fsync_sleep= 0; #endif @@ -4925,6 +4928,32 @@ MYSQL_BIN_LOG::write_transaction_to_binlog_events(group_commit_entry *entry) else trx_group_commit_leader(entry); + if (!opt_optimize_thread_scheduling) + { + /* For the leader, trx_group_commit_leader() already took the lock. */ + if (orig_queue != NULL) + pthread_mutex_lock(&LOCK_commit_ordered); + + DEBUG_SYNC(entry->thd, "commit_loop_entry_commit_ordered"); + ++num_commits; + if (entry->trx_data->using_xa && !entry->error) + run_commit_ordered(entry->thd, entry->all); + + group_commit_entry *next= entry->next; + if (!next) + { + group_commit_queue_busy= FALSE; + pthread_cond_signal(&COND_queue_busy); + DEBUG_SYNC(entry->thd, "commit_after_group_run_commit_ordered"); + } + pthread_mutex_unlock(&LOCK_commit_ordered); + + if (next) + { + next->thd->signal_wakeup_ready(); + } + } + if (!entry->error) return 0; @@ -5088,6 +5117,24 @@ MYSQL_BIN_LOG::trx_group_commit_leader(group_commit_entry *leader) DEBUG_SYNC(leader->thd, "commit_after_release_LOCK_log"); ++num_group_commits; + if (!opt_optimize_thread_scheduling) + { + /* + If we want to run commit_ordered() each in the transaction's own thread + context, then we need to mark the queue reserved; we need to finish all + threads in one group commit before the next group commit can be allowed + to proceed, and we cannot unlock a simple pthreads mutex in a different + thread from the one that locked it. + */ + + while (group_commit_queue_busy) + pthread_cond_wait(&COND_queue_busy, &LOCK_commit_ordered); + group_commit_queue_busy= TRUE; + + /* Note that we return with LOCK_commit_ordered locked! */ + DBUG_VOID_RETURN; + } + /* Wakeup each participant waiting for our group commit, first calling the commit_ordered() methods for any transactions doing 2-phase commit. @@ -6578,6 +6625,16 @@ static SHOW_VAR binlog_status_vars_top[]= { {NullS, NullS, SHOW_LONG} }; +static MYSQL_SYSVAR_BOOL( + optimize_thread_scheduling, + opt_optimize_thread_scheduling, + PLUGIN_VAR_READONLY, + "Run fast part of group commit in a single thread, to optimize kernel\n" + "thread scheduling", + NULL, + NULL, + 1); + #ifndef DBUG_OFF static MYSQL_SYSVAR_ULONG( dbug_fsync_sleep, @@ -6590,13 +6647,16 @@ static MYSQL_SYSVAR_ULONG( 0, ULONG_MAX, 0); +#endif static struct st_mysql_sys_var *binlog_sys_vars[]= { + MYSQL_SYSVAR(optimize_thread_scheduling), +#ifndef DBUG_OFF MYSQL_SYSVAR(dbug_fsync_sleep), +#endif NULL }; -#endif /* @@ -6634,11 +6694,7 @@ mysql_declare_plugin(binlog) NULL, /* Plugin Deinit */ 0x0100 /* 1.0 */, binlog_status_vars_top, /* status variables */ -#ifndef DBUG_OFF binlog_sys_vars, /* system variables */ -#else - NULL, /* system variables */ -#endif NULL /* config options */ } mysql_declare_plugin_end; diff --git a/sql/log.h b/sql/log.h index fcc9d5a711b..492a735c0ae 100644 --- a/sql/log.h +++ b/sql/log.h @@ -370,6 +370,14 @@ class MYSQL_BIN_LOG: public TC_LOG, private MYSQL_LOG bool no_auto_events; /* Queue of transactions queued up to participate in group commit. */ group_commit_entry *group_commit_queue; + /* + Condition variable to mark that the group commit queue is busy. + Used when each thread does it's own commit_ordered() (when + binlog_optimize_thread_scheduling=1). + Used with the LOCK_commit_ordered mutex. + */ + my_bool group_commit_queue_busy; + pthread_cond_t COND_queue_busy; /* Total number of committed transactions. */ ulonglong num_commits; /* Number of group commits done. */ |