summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--mysql-test/suite/rpl/r/rpl_parallel.result40
-rw-r--r--mysql-test/suite/rpl/t/rpl_parallel.test54
-rw-r--r--sql/handler.cc18
-rw-r--r--sql/log_event.cc2
-rw-r--r--sql/rpl_parallel.cc39
5 files changed, 147 insertions, 6 deletions
diff --git a/mysql-test/suite/rpl/r/rpl_parallel.result b/mysql-test/suite/rpl/r/rpl_parallel.result
index 459da711171..a317650b41e 100644
--- a/mysql-test/suite/rpl/r/rpl_parallel.result
+++ b/mysql-test/suite/rpl/r/rpl_parallel.result
@@ -1703,6 +1703,46 @@ include/stop_slave.inc
SET GLOBAL debug_dbug= @old_debg;
SET GLOBAL max_relay_log_size= @old_max;
include/start_slave.inc
+*** MDEV-8302: Duplicate key with parallel replication ***
+include/stop_slave.inc
+/* Inject a small sleep which makes the race easier to hit. */
+SET @old_dbug=@@GLOBAL.debug_dbug;
+SET GLOBAL debug_dbug="+d,inject_mdev8302";
+INSERT INTO t7 VALUES (100,1), (101,2), (102,3), (103,4), (104,5);
+SET @old_dbug= @@SESSION.debug_dbug;
+SET @commit_id= 20000;
+SET SESSION debug_dbug="+d,binlog_force_commit_id";
+SET SESSION debug_dbug=@old_dbug;
+SELECT * FROM t7 ORDER BY a;
+a b
+1 1
+2 2
+3 86
+4 4
+5 5
+100 5
+101 1
+102 2
+103 3
+104 4
+include/save_master_gtid.inc
+include/start_slave.inc
+include/sync_with_master_gtid.inc
+SELECT * FROM t7 ORDER BY a;
+a b
+1 1
+2 2
+3 86
+4 4
+5 5
+100 5
+101 1
+102 2
+103 3
+104 4
+include/stop_slave.inc
+SET GLOBAL debug_dbug=@old_dbug;
+include/start_slave.inc
include/stop_slave.inc
SET GLOBAL slave_parallel_threads=@old_parallel_threads;
include/start_slave.inc
diff --git a/mysql-test/suite/rpl/t/rpl_parallel.test b/mysql-test/suite/rpl/t/rpl_parallel.test
index 6861f1bbebe..54294000b6f 100644
--- a/mysql-test/suite/rpl/t/rpl_parallel.test
+++ b/mysql-test/suite/rpl/t/rpl_parallel.test
@@ -2374,6 +2374,60 @@ SET GLOBAL max_relay_log_size= @old_max;
--source include/start_slave.inc
+--echo *** MDEV-8302: Duplicate key with parallel replication ***
+
+--connection server_2
+--source include/stop_slave.inc
+/* Inject a small sleep which makes the race easier to hit. */
+SET @old_dbug=@@GLOBAL.debug_dbug;
+SET GLOBAL debug_dbug="+d,inject_mdev8302";
+
+
+--connection server_1
+INSERT INTO t7 VALUES (100,1), (101,2), (102,3), (103,4), (104,5);
+
+# Artificially create a bunch of group commits with conflicting transactions.
+# The bug happened when T1 and T2 was in one group commit, and T3 was in the
+# following group commit. T2 is a DELETE of a row with same primary key as a
+# row that T3 inserts. T1 and T2 can conflict, causing T2 to be deadlock
+# killed after starting to commit. The bug was that T2 could roll back before
+# doing unmark_start_commit(); this could allow T3 to run before the retry
+# of T2, causing duplicate key violation.
+
+SET @old_dbug= @@SESSION.debug_dbug;
+SET @commit_id= 20000;
+SET SESSION debug_dbug="+d,binlog_force_commit_id";
+
+--let $n = 100
+--disable_query_log
+while ($n)
+{
+ eval UPDATE t7 SET b=b+1 WHERE a=100+($n MOD 5);
+ eval DELETE FROM t7 WHERE a=100+($n MOD 5);
+
+ SET @commit_id = @commit_id + 1;
+ eval INSERT INTO t7 VALUES (100+($n MOD 5), $n);
+ SET @commit_id = @commit_id + 1;
+ dec $n;
+}
+--enable_query_log
+SET SESSION debug_dbug=@old_dbug;
+
+
+SELECT * FROM t7 ORDER BY a;
+--source include/save_master_gtid.inc
+
+
+--connection server_2
+--source include/start_slave.inc
+--source include/sync_with_master_gtid.inc
+SELECT * FROM t7 ORDER BY a;
+
+--source include/stop_slave.inc
+SET GLOBAL debug_dbug=@old_dbug;
+--source include/start_slave.inc
+
+
# Clean up.
--connection server_2
diff --git a/sql/handler.cc b/sql/handler.cc
index 70f30580a07..4f070d3336b 100644
--- a/sql/handler.cc
+++ b/sql/handler.cc
@@ -1615,6 +1615,24 @@ int ha_rollback_trans(THD *thd, bool all)
DBUG_ASSERT(thd->transaction.stmt.ha_list == NULL ||
trans == &thd->transaction.stmt);
+ if (is_real_trans)
+ {
+ /*
+ In parallel replication, if we need to rollback during commit, we must
+ first inform following transactions that we are going to abort our commit
+ attempt. Otherwise those following transactions can run too early, and
+ possibly cause replication to fail. See comments in retry_event_group().
+
+ There were several bugs with this in the past that were very hard to
+ track down (MDEV-7458, MDEV-8302). So we add here an assertion for
+ rollback without signalling following transactions. And in release
+ builds, we explicitly do the signalling before rolling back.
+ */
+ DBUG_ASSERT(!(thd->rgi_slave && thd->rgi_slave->did_mark_start_commit));
+ if (thd->rgi_slave && thd->rgi_slave->did_mark_start_commit)
+ thd->rgi_slave->unmark_start_commit();
+ }
+
if (thd->in_sub_stmt)
{
DBUG_ASSERT(0);
diff --git a/sql/log_event.cc b/sql/log_event.cc
index 87fa145d730..786b249db39 100644
--- a/sql/log_event.cc
+++ b/sql/log_event.cc
@@ -4234,7 +4234,6 @@ int Query_log_event::do_apply_event(rpl_group_info *rgi,
"mysql", rpl_gtid_slave_state_table_name.str,
errcode,
thd->get_stmt_da()->message());
- trans_rollback(thd);
sub_id= 0;
thd->is_slave_error= 1;
goto end;
@@ -7365,7 +7364,6 @@ int Xid_log_event::do_apply_event(rpl_group_info *rgi)
"%s.%s: %d: %s",
"mysql", rpl_gtid_slave_state_table_name.str, ec,
thd->get_stmt_da()->message());
- trans_rollback(thd);
thd->is_slave_error= 1;
return err;
}
diff --git a/sql/rpl_parallel.cc b/sql/rpl_parallel.cc
index 29a87cc627b..6a1ef257ec5 100644
--- a/sql/rpl_parallel.cc
+++ b/sql/rpl_parallel.cc
@@ -225,6 +225,11 @@ static void
signal_error_to_sql_driver_thread(THD *thd, rpl_group_info *rgi, int err)
{
rgi->worker_error= err;
+ /*
+ In case we get an error during commit, inform following transactions that
+ we aborted our commit.
+ */
+ rgi->unmark_start_commit();
rgi->cleanup_context(thd, true);
rgi->rli->abort_slave= true;
rgi->rli->stop_for_until= false;
@@ -377,6 +382,7 @@ do_retry:
transaction we deadlocked with will not signal that it started to commit
until after the unmark.
*/
+ DBUG_EXECUTE_IF("inject_mdev8302", { my_sleep(20000);});
rgi->unmark_start_commit();
DEBUG_SYNC(thd, "rpl_parallel_retry_after_unmark");
@@ -914,9 +920,24 @@ handle_rpl_parallel_thread(void *arg)
group_ending= is_group_ending(qev->ev, event_type);
if (group_ending && likely(!rgi->worker_error))
{
- DEBUG_SYNC(thd, "rpl_parallel_before_mark_start_commit");
- rgi->mark_start_commit();
- DEBUG_SYNC(thd, "rpl_parallel_after_mark_start_commit");
+ /*
+ Do an extra check for (deadlock) kill here. This helps prevent a
+ lingering deadlock kill that occured during normal DML processing to
+ propagate past the mark_start_commit(). If we detect a deadlock only
+ after mark_start_commit(), we have to unmark, which has at least a
+ theoretical possibility of leaving a window where it looks like all
+ transactions in a GCO have started committing, while in fact one
+ will need to rollback and retry. This is not supposed to be possible
+ (since there is a deadlock, at least one transaction should be
+ blocked from reaching commit), but this seems a fragile ensurance,
+ and there were historically a number of subtle bugs in this area.
+ */
+ if (!thd->killed)
+ {
+ DEBUG_SYNC(thd, "rpl_parallel_before_mark_start_commit");
+ rgi->mark_start_commit();
+ DEBUG_SYNC(thd, "rpl_parallel_after_mark_start_commit");
+ }
}
/*
@@ -941,7 +962,17 @@ handle_rpl_parallel_thread(void *arg)
});
if (!err)
#endif
- err= rpt_handle_event(qev, rpt);
+ {
+ if (thd->check_killed())
+ {
+ thd->clear_error();
+ thd->get_stmt_da()->reset_diagnostics_area();
+ thd->send_kill_message();
+ err= 1;
+ }
+ else
+ err= rpt_handle_event(qev, rpt);
+ }
delete_or_keep_event_post_apply(rgi, event_type, qev->ev);
DBUG_EXECUTE_IF("rpl_parallel_simulate_temp_err_gtid_0_x_100",
err= dbug_simulate_tmp_error(rgi, thd););