summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorantirez <antirez@gmail.com>2017-11-24 11:08:22 +0100
committerantirez <antirez@gmail.com>2017-11-24 11:10:03 +0100
commit2a27da1cf5880b0e8a09b453b7082d66e808748d (patch)
treebe62fbb7dcbbb9c4db19918e7d49fb3576f8468c
parente0c2a0ecfd3ec262175e7f6ce0eeb30b5dfa98ef (diff)
downloadredis-2a27da1cf5880b0e8a09b453b7082d66e808748d.tar.gz
PSYNC2: reorganize comments related to recent fixes.
Related to PR #4412 and issue #4407.
-rw-r--r--src/rdb.c27
-rw-r--r--src/replication.c23
2 files changed, 24 insertions, 26 deletions
diff --git a/src/rdb.c b/src/rdb.c
index acf3197f0..00106cac4 100644
--- a/src/rdb.c
+++ b/src/rdb.c
@@ -2039,19 +2039,16 @@ rdbSaveInfo *rdbPopulateSaveInfo(rdbSaveInfo *rsi) {
* only when repl_backlog is not NULL. If the repl_backlog is NULL,
* it means that the instance isn't in any replication chains. In this
* scenario the replication info is useless, because when a slave
- * connect to us, the NULL repl_backlog will trigger a full synchronization,
- * at the same time we will use a new replid and clear replid2.
- * And remember that after free backlog if we reach repl_backlog_time_limit,
- * we will use a new replid and clear replid2 too. So there is only one
- * scenario which can make repl_stream_db be -1, that is the instance is
- * a master, and it have repl_backlog, but server.slaveseldb is -1. */
+ * connects to us, the NULL repl_backlog will trigger a full
+ * synchronization, at the same time we will use a new replid and clear
+ * replid2. */
if (!server.masterhost && server.repl_backlog) {
- rsi->repl_stream_db = server.slaveseldb == -1 ? 0 : server.slaveseldb;
/* Note that when server.slaveseldb is -1, it means that this master
* didn't apply any write commands after a full synchronization.
* So we can let repl_stream_db be 0, this allows a restarted slave
* to reload replication ID/offset, it's safe because the next write
* command must generate a SELECT statement. */
+ rsi->repl_stream_db = server.slaveseldb == -1 ? 0 : server.slaveseldb;
return rsi;
}
@@ -2061,16 +2058,12 @@ rdbSaveInfo *rdbPopulateSaveInfo(rdbSaveInfo *rsi) {
rsi->repl_stream_db = server.master->db->id;
return rsi;
}
- /* It is useful to persist cached_master's db id inside RDB file.
- * When a slave lost master's connection, server.master will be
- * cached as server.cached_master, after that a slave can not
- * increment the master_repl_offset because slave only apply data
- * from connected master, so the cached_master can hold right
- * replication info. But please note that this action is safe
- * only after we fix the free backlog problem, because when a master
- * turn to be a slave, it will use itself as the server.cached_master,
- * that is dangerous if we didn't use a new replication ID after
- * free backlog. */
+
+ /* If we have a cached master we can use it in order to populate the
+ * replication selected DB info inside the RDB file: the slave can
+ * increment the master_repl_offset only from data arriving from the
+ * master, so if we are disconnected the offset in the cached master
+ * is valid. */
if (server.cached_master) {
rsi->repl_stream_db = server.cached_master->db->id;
return rsi;
diff --git a/src/replication.c b/src/replication.c
index fe7b0f739..cf4db3e3a 100644
--- a/src/replication.c
+++ b/src/replication.c
@@ -2614,15 +2614,20 @@ void replicationCron(void) {
if (idle > server.repl_backlog_time_limit) {
/* When we free the backlog, we always use a new
- * replication ID and clear the ID2. Since without
- * backlog we can not increment master_repl_offset
- * even do write commands, that may lead to inconsistency
- * when we try to connect a "slave-before" master
- * (if this master is our slave before, our replid
- * equals the master's replid2). As the master have our
- * history, so we can match the master's replid2 and
- * second_replid_offset, that make partial sync work,
- * but the data is inconsistent. */
+ * replication ID and clear the ID2. This is needed
+ * because when there is no backlog, the master_repl_offset
+ * is not updated, but we would still retain our replication
+ * ID, leading to the following problem:
+ *
+ * 1. We are a master instance.
+ * 2. Our slave is promoted to master. It's repl-id-2 will
+ * be the same as our repl-id.
+ * 3. We, yet as master, receive some updates, that will not
+ * increment the master_repl_offset.
+ * 4. Later we are turned into a slave, connecto to the new
+ * master that will accept our PSYNC request by second
+ * replication ID, but there will be data inconsistency
+ * because we received writes. */
changeReplicationId();
clearReplicationId2();
freeReplicationBacklog();