From 52b25934d7fcf552a955078ccb6b19bef18e99d8 Mon Sep 17 00:00:00 2001 From: Kristian Nielsen Date: Mon, 1 Dec 2014 13:53:57 +0100 Subject: MDEV-7237: Parallel replication: incorrect relaylog position after stop/start the slave The replication relay log position was sometimes updated incorrectly at the end of a transaction in parallel replication. This happened because the relay log file name was taken from the current Relay_log_info (SQL driver thread), not the correct value for the transaction in question. The result was that if a transaction was applied while the SQL driver thread was at least one relay log file ahead, _and_ the SQL thread was subsequently stopped before applying any events from the most recent relay log file, then the relay log position would be incorrect - wrong relay log file name. Thus, when the slave was started again, usually a relay log read error would result, or in rare cases, if the position happened to be readable, the slave might even skip arbitrary amounts of events. In GTID mode, the relay log position is reset when both slave threads are restarted, so this bug would only be seen in non-GTID mode, or in GTID mode when only the SQL thread, not the IO thread, was stopped. --- sql/rpl_parallel.cc | 8 ++++++++ sql/rpl_rli.cc | 4 ++-- 2 files changed, 10 insertions(+), 2 deletions(-) (limited to 'sql') diff --git a/sql/rpl_parallel.cc b/sql/rpl_parallel.cc index 7c273d51a19..53b37e82cdb 100644 --- a/sql/rpl_parallel.cc +++ b/sql/rpl_parallel.cc @@ -631,6 +631,14 @@ handle_rpl_parallel_thread(void *arg) PSI_stage_info old_stage; uint64 wait_count; + DBUG_EXECUTE_IF("rpl_parallel_scheduled_gtid_0_x_100", { + if (rgi->current_gtid.domain_id == 0 && + rgi->current_gtid.seq_no == 100) { + debug_sync_set_action(thd, + STRING_WITH_LEN("now SIGNAL scheduled_gtid_0_x_100")); + } + }); + in_event_group= true; /* If the standalone flag is set, then this event group consists of a diff --git a/sql/rpl_rli.cc b/sql/rpl_rli.cc index 673a8c7ca4f..629e046ed0a 100644 --- a/sql/rpl_rli.cc +++ b/sql/rpl_rli.cc @@ -986,11 +986,11 @@ void Relay_log_info::inc_group_relay_log_pos(ulonglong log_pos, if (rgi->is_parallel_exec) { /* In case of parallel replication, do not update the position backwards. */ - int cmp= strcmp(group_relay_log_name, event_relay_log_name); + int cmp= strcmp(group_relay_log_name, rgi->event_relay_log_name); if (cmp < 0) { group_relay_log_pos= rgi->future_event_relay_log_pos; - strmake_buf(group_relay_log_name, event_relay_log_name); + strmake_buf(group_relay_log_name, rgi->event_relay_log_name); notify_group_relay_log_name_update(); } else if (cmp == 0 && group_relay_log_pos < rgi->future_event_relay_log_pos) group_relay_log_pos= rgi->future_event_relay_log_pos; -- cgit v1.2.1