summaryrefslogtreecommitdiff
path: root/sql
diff options
context:
space:
mode:
Diffstat (limited to 'sql')
-rw-r--r--sql/log.cc7
-rw-r--r--sql/rpl_mi.cc1
-rw-r--r--sql/rpl_mi.h6
-rw-r--r--sql/slave.cc80
4 files changed, 94 insertions, 0 deletions
diff --git a/sql/log.cc b/sql/log.cc
index 71975cd83b7..e6d9b6d15cb 100644
--- a/sql/log.cc
+++ b/sql/log.cc
@@ -7469,6 +7469,13 @@ MYSQL_BIN_LOG::write_transaction_or_stmt(group_commit_entry *entry,
}
}
+ DBUG_EXECUTE_IF("inject_error_writing_xid",
+ {
+ entry->error_cache= NULL;
+ entry->commit_errno= 28;
+ DBUG_RETURN(ER_ERROR_ON_WRITE);
+ });
+
if (entry->end_event->write(&log_file))
{
entry->error_cache= NULL;
diff --git a/sql/rpl_mi.cc b/sql/rpl_mi.cc
index 7764400becb..f2c39ea7f8d 100644
--- a/sql/rpl_mi.cc
+++ b/sql/rpl_mi.cc
@@ -38,6 +38,7 @@ Master_info::Master_info(LEX_STRING *connection_name_arg,
connect_retry(DEFAULT_CONNECT_RETRY), inited(0), abort_slave(0),
slave_running(0), slave_run_id(0), sync_counter(0),
heartbeat_period(0), received_heartbeats(0), master_id(0),
+ prev_master_id(0),
using_gtid(USE_GTID_NO), events_queued_since_last_gtid(0),
gtid_reconnect_event_skip_count(0), gtid_event_seen(false)
{
diff --git a/sql/rpl_mi.h b/sql/rpl_mi.h
index af739b1dad4..ebb1b541728 100644
--- a/sql/rpl_mi.h
+++ b/sql/rpl_mi.h
@@ -136,6 +136,12 @@ class Master_info : public Slave_reporting_capability
DYNAMIC_ARRAY ignore_server_ids;
ulong master_id;
/*
+ At reconnect and until the first rotate event is seen, prev_master_id is
+ the value of master_id during the previous connection, used to detect
+ silent change of master server during reconnects.
+ */
+ ulong prev_master_id;
+ /*
Which kind of GTID position (if any) is used when connecting to master.
Note that you can not change the numeric values of these, they are used
diff --git a/sql/slave.cc b/sql/slave.cc
index ae3cb86e8be..9422ecc7f22 100644
--- a/sql/slave.cc
+++ b/sql/slave.cc
@@ -5265,6 +5265,86 @@ static int queue_event(Master_info* mi,const char* buf, ulong event_len)
event_len - BINLOG_CHECKSUM_LEN : event_len,
mi->rli.relay_log.description_event_for_queue);
+ if (unlikely(mi->gtid_reconnect_event_skip_count) &&
+ unlikely(!mi->gtid_event_seen) &&
+ rev.is_artificial_event() &&
+ (mi->prev_master_id != mi->master_id ||
+ strcmp(rev.new_log_ident, mi->master_log_name) != 0))
+ {
+ /*
+ Artificial Rotate_log_event is the first event we receive at the start
+ of each master binlog file. It gives the name of the new binlog file.
+
+ Normally, we already have this name from the real rotate event at the
+ end of the previous binlog file (unless we are making a new connection
+ using GTID). But if the master server restarted/crashed, there is no
+ rotate event at the end of the prior binlog file, so the name is new.
+
+ We use this fact to handle a special case of master crashing. If the
+ master crashed while writing the binlog, it might end with a partial
+ event group lacking the COMMIT/XID event, which must be rolled
+ back. If the slave IO thread happens to get a disconnect in the middle
+ of exactly this event group, it will try to reconnect at the same GTID
+ and skip already fetched events. However, that GTID did not commit on
+ the master before the crash, so it does not really exist, and the
+ master will connect the slave at the next following GTID starting in
+ the next binlog. This could confuse the slave and make it mix the
+ start of one event group with the end of another.
+
+ But we detect this case here, by noticing the change of binlog name
+ which detects the missing rotate event at the end of the previous
+ binlog file. In this case, we reset the counters to make us not skip
+ the next event group, and queue an artificial Format Description
+ event. The previously fetched incomplete event group will then be
+ rolled back when the Format Description event is executed by the SQL
+ thread.
+
+ A similar case is if the reconnect somehow connects to a different
+ master server (like due to a network proxy or IP address takeover).
+ We detect this case by noticing a change of server_id and in this
+ case likewise rollback the partially received event group.
+ */
+ Format_description_log_event fdle(4);
+
+ if (mi->prev_master_id != mi->master_id)
+ sql_print_warning("The server_id of master server changed in the "
+ "middle of GTID %u-%u-%llu. Assuming a change of "
+ "master server, so rolling back the previously "
+ "received partial transaction. Expected: %lu, "
+ "received: %lu", mi->last_queued_gtid.domain_id,
+ mi->last_queued_gtid.server_id,
+ mi->last_queued_gtid.seq_no,
+ mi->prev_master_id, mi->master_id);
+ else if (strcmp(rev.new_log_ident, mi->master_log_name) != 0)
+ sql_print_warning("Unexpected change of master binlog file name in the "
+ "middle of GTID %u-%u-%llu, assuming that master has "
+ "crashed and rolling back the transaction. Expected: "
+ "'%s', received: '%s'",
+ mi->last_queued_gtid.domain_id,
+ mi->last_queued_gtid.server_id,
+ mi->last_queued_gtid.seq_no,
+ mi->master_log_name, rev.new_log_ident);
+
+ mysql_mutex_lock(log_lock);
+ if (likely(!fdle.write(rli->relay_log.get_log_file()) &&
+ !rli->relay_log.flush_and_sync(NULL)))
+ {
+ rli->relay_log.harvest_bytes_written(&rli->log_space_total);
+ }
+ else
+ {
+ error= ER_SLAVE_RELAY_LOG_WRITE_FAILURE;
+ mysql_mutex_unlock(log_lock);
+ goto err;
+ }
+ rli->relay_log.signal_update();
+ mysql_mutex_unlock(log_lock);
+
+ mi->gtid_reconnect_event_skip_count= 0;
+ mi->events_queued_since_last_gtid= 0;
+ }
+ mi->prev_master_id= mi->master_id;
+
if (unlikely(process_io_rotate(mi, &rev)))
{
error= ER_SLAVE_RELAY_LOG_WRITE_FAILURE;