diff options
author | unknown <knielsen@knielsen-hq.org> | 2014-05-08 14:20:18 +0200 |
---|---|---|
committer | Kristian Nielsen <knielsen@knielsen-hq.org> | 2014-05-08 14:20:18 +0200 |
commit | b0b60f249807b6c2d423313350d9ad66693c2d1e (patch) | |
tree | bb8fadff8a4425a04c03af264872991ee36f7fd0 /sql/rpl_rli.h | |
parent | 2b4b857d51469c5fd974186ba8219e367c2019ec (diff) | |
download | mariadb-git-b0b60f249807b6c2d423313350d9ad66693c2d1e.tar.gz |
MDEV-5262: Missing retry after temp error in parallel replication
Start implementing that an event group can be re-tried in parallel replication
if it fails with a temporary error (like deadlock).
Patch is very incomplete, just some very basic retry works.
Stuff still missing (not complete list):
- Handle moving to the next relay log file, if event group to be retried
spans multiple relay log files.
- Handle refcounting of relay log files, to ensure that we do not purge a
relay log file and then later attempt to re-execute events out of it.
- Handle description_event_for_exec - we need to save this somehow for the
possible retry - and use the correct one in case it differs between relay
logs.
- Do another retry attempt in case the first retry also fails.
- Limit the max number of retries.
- Lots of testing will be needed for the various edge cases.
Diffstat (limited to 'sql/rpl_rli.h')
-rw-r--r-- | sql/rpl_rli.h | 36 |
1 files changed, 36 insertions, 0 deletions
diff --git a/sql/rpl_rli.h b/sql/rpl_rli.h index 00d16f52488..c2cdbcdc573 100644 --- a/sql/rpl_rli.h +++ b/sql/rpl_rli.h @@ -61,6 +61,7 @@ enum { *****************************************************************************/ struct rpl_group_info; +struct inuse_relaylog; class Relay_log_info : public Slave_reporting_capability { @@ -164,6 +165,13 @@ public: Master_info *mi; /* + List of active relay log files. + (This can be more than one in case of parallel replication). + */ + inuse_relaylog *inuse_relaylog_list; + inuse_relaylog *last_inuse_relaylog; + + /* Needed to deal properly with cur_log getting closed and re-opened with a different log under our feet */ @@ -398,6 +406,7 @@ public: void stmt_done(my_off_t event_log_pos, time_t event_creation_time, THD *thd, rpl_group_info *rgi); + int alloc_inuse_relaylog(const char *name); /** Is the replication inside a group? @@ -464,6 +473,25 @@ private: /* + In parallel replication, if we need to re-try a transaction due to a + deadlock or other temporary error, we may need to go back and re-read events + out of an earlier relay log. + + This structure keeps track of the relaylogs that are potentially in use. + Each rpl_group_info has a pointer to one of those, corresponding to the + first GTID event. + + A reference count keeps track of how long a relay log is potentially in use. +*/ +struct inuse_relaylog { + inuse_relaylog *next; + uint64 queued_count; + uint64 dequeued_count; + char name[FN_REFLEN]; +}; + + +/* This is data for various state needed to be kept for the processing of one event group (transaction) during replication. @@ -596,6 +624,14 @@ struct rpl_group_info /* Needs room for "Gtid D-S-N\x00". */ char gtid_info_buf[5+10+1+10+1+20+1]; + /* + Information to be able to re-try an event group in case of a deadlock or + other temporary error. + */ + inuse_relaylog *relay_log; + uint64 retry_start_offset; + uint64 retry_event_count; + rpl_group_info(Relay_log_info *rli_); ~rpl_group_info(); void reinit(Relay_log_info *rli); |