summaryrefslogtreecommitdiff
path: root/sql/rpl_parallel.cc
diff options
context:
space:
mode:
Diffstat (limited to 'sql/rpl_parallel.cc')
-rw-r--r--sql/rpl_parallel.cc56
1 files changed, 52 insertions, 4 deletions
diff --git a/sql/rpl_parallel.cc b/sql/rpl_parallel.cc
index 305e8053032..9fcd6b48d9f 100644
--- a/sql/rpl_parallel.cc
+++ b/sql/rpl_parallel.cc
@@ -190,6 +190,17 @@ finish_event_group(rpl_parallel_thread *rpt, uint64 sub_id,
fact trigger.
*/
DBUG_ASSERT(!tmp_gco->next_gco || tmp_gco->last_sub_id > sub_id);
+ if (!(!tmp_gco->next_gco || tmp_gco->last_sub_id > sub_id))
+ fprintf(stderr, "MDEV8302: GTID %u-%u-%lu subid=%lu skipping free of "
+ "GCO(wait_count=%lu prior=%lu last=%lu installed=%d) due to "
+ "next->wait_count %lu > count_committing %lu\n",
+ rgi->current_gtid.domain_id, rgi->current_gtid.server_id,
+ (ulong)rgi->current_gtid.seq_no, (ulong)rgi->gtid_sub_id,
+ (ulong)tmp_gco->wait_count, (ulong)tmp_gco->prior_sub_id,
+ (ulong)tmp_gco->last_sub_id, (int)tmp_gco->installed,
+ (ulong)tmp_gco->next_gco->wait_count,
+ (ulong)entry->count_committing_event_groups);
+
tmp_gco= tmp_gco->prev_gco;
}
while (tmp_gco)
@@ -304,6 +315,11 @@ convert_kill_to_deadlock_error(rpl_group_info *rgi)
if ((err_code == ER_QUERY_INTERRUPTED || err_code == ER_CONNECTION_KILLED) &&
rgi->killed_for_retry)
{
+ fprintf(stderr, "MDEV8302: Got deadlock kill in GTID %u-%u-%lu "
+ "(subid %lu in_commit=%d)\n", rgi->current_gtid.domain_id,
+ rgi->current_gtid.server_id,
+ (ulong)rgi->current_gtid.seq_no, (ulong)rgi->gtid_sub_id,
+ (int)rgi->did_mark_start_commit);
thd->clear_error();
my_error(ER_LOCK_DEADLOCK, MYF(0));
rgi->killed_for_retry= false;
@@ -343,6 +359,9 @@ retry_event_group(rpl_group_info *rgi, rpl_parallel_thread *rpt,
Format_description_log_event *description_event= NULL;
do_retry:
+ fprintf(stderr, "MDEV8302: Retry #%lu of GTID %u-%u-%lu\n", retries+1,
+ rgi->current_gtid.domain_id, rgi->current_gtid.server_id,
+ (ulong)rgi->current_gtid.seq_no);
event_count= 0;
err= 0;
errmsg= NULL;
@@ -884,9 +903,28 @@ handle_rpl_parallel_thread(void *arg)
group_ending= is_group_ending(qev->ev, event_type);
if (group_ending && likely(!rgi->worker_error))
{
- DEBUG_SYNC(thd, "rpl_parallel_before_mark_start_commit");
- rgi->mark_start_commit();
- DEBUG_SYNC(thd, "rpl_parallel_after_mark_start_commit");
+ /*
+ Do an extra check for (deadlock) kill here. This helps prevent a
+ lingering deadlock kill that occured during normal DML processing to
+ propagate past the mark_start_commit(). If we detect a deadlock only
+ after mark_start_commit(), we have to unmark, which has at least a
+ theoretical possibility of leaving a window where it looks like all
+ transactions in a GCO have started committing, while in fact one
+ will need to rollback and retry. This is not supposed to be possible
+ (since there is a deadlock, at least one transaction should be
+ blocked from reaching commit), but this seems a fragile ensurance,
+ and there were historically a number of subtle bugs in this area.
+ */
+ if (!thd->killed)
+ {
+ DEBUG_SYNC(thd, "rpl_parallel_before_mark_start_commit");
+ rgi->mark_start_commit();
+ DEBUG_SYNC(thd, "rpl_parallel_after_mark_start_commit");
+ }
+ else
+ fprintf(stderr, "MDEV8302: Skip mark_start_commit(GTID %u-%u-%lu) "
+ "due to killed\n", rgi->current_gtid.domain_id,
+ rgi->current_gtid.server_id, (ulong)rgi->current_gtid.seq_no);
}
/*
@@ -911,7 +949,17 @@ handle_rpl_parallel_thread(void *arg)
});
if (!err)
#endif
- err= rpt_handle_event(qev, rpt);
+ {
+ if (thd->check_killed())
+ {
+ thd->clear_error();
+ thd->get_stmt_da()->reset_diagnostics_area();
+ thd->send_kill_message();
+ err= 1;
+ }
+ else
+ err= rpt_handle_event(qev, rpt);
+ }
delete_or_keep_event_post_apply(rgi, event_type, qev->ev);
DBUG_EXECUTE_IF("rpl_parallel_simulate_temp_err_gtid_0_x_100",
err= dbug_simulate_tmp_error(rgi, thd););