summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorantirez <antirez@gmail.com>2015-03-24 16:00:09 +0100
committerantirez <antirez@gmail.com>2015-03-24 16:00:09 +0100
commitc3ad70901f962808a1c0c474951406af81d26a3f (patch)
tree702b3cb9868a82f8f8a87c04fc14bacee3495a8a /src
parent9b7f8b1c9b379ab842d40df4636dfbbeb6376fcb (diff)
downloadredis-c3ad70901f962808a1c0c474951406af81d26a3f.tar.gz
Replication: disconnect blocked clients when switching to slave role.
Bug as old as Redis and blocking operations. It's hard to trigger since only happens on instance role switch, but the results are quite bad since an inconsistency between master and slave is created. How to trigger the bug is a good description of the bug itself. 1. Client does "BLPOP mylist 0" in master. 2. Master is turned into slave, that replicates from New-Master. 3. Client does "LPUSH mylist foo" in New-Master. 4. New-Master propagates write to slave. 5. Slave receives the LPUSH, the blocked client get served. Now Master "mylist" key has "foo", Slave "mylist" key is empty. Highlights: * At step "2" above, the client remains attached, basically escaping any check performed during command dispatch: read only slave, in that case. * At step "5" the slave (that was the master), serves the blocked client consuming a list element, which is not consumed on the master side. This scenario is technically likely to happen during failovers, however since Redis Sentinel already disconnects clients using the CLIENT command when changing the role of the instance, the bug is avoided in Sentinel deployments. Closes #2473.
Diffstat (limited to 'src')
-rw-r--r--src/blocked.c24
-rw-r--r--src/redis.h1
-rw-r--r--src/replication.c1
3 files changed, 26 insertions, 0 deletions
diff --git a/src/blocked.c b/src/blocked.c
index ae2500aac..8acfb8184 100644
--- a/src/blocked.c
+++ b/src/blocked.c
@@ -155,3 +155,27 @@ void replyToBlockedClientTimedOut(redisClient *c) {
}
}
+/* Mass-unblock clients because something changed in the instance that makes
+ * blocking no longer safe. For example clients blocked in list operations
+ * in an instance which turns from master to slave is unsafe, so this function
+ * is called when a master turns into a slave.
+ *
+ * The semantics is to send an -UNBLOCKED error to the client, disconnecting
+ * it at the same time. */
+void disconnectAllBlockedClients(void) {
+ listNode *ln;
+ listIter li;
+
+ listRewind(server.clients,&li);
+ while((ln = listNext(&li))) {
+ redisClient *c = listNodeValue(ln);
+
+ if (c->flags & REDIS_BLOCKED) {
+ addReplySds(c,sdsnew(
+ "-UNBLOCKED force unblock from blocking operation, "
+ "instance state changed (master -> slave?)\r\n"));
+ unblockClient(c);
+ c->flags |= REDIS_CLOSE_AFTER_REPLY;
+ }
+ }
+}
diff --git a/src/redis.h b/src/redis.h
index 34d8b0a4c..53f3967d7 100644
--- a/src/redis.h
+++ b/src/redis.h
@@ -1395,6 +1395,7 @@ void blockClient(redisClient *c, int btype);
void unblockClient(redisClient *c);
void replyToBlockedClientTimedOut(redisClient *c);
int getTimeoutFromObjectOrReply(redisClient *c, robj *object, mstime_t *timeout, int unit);
+void disconnectAllBlockedClients(void);
/* Git SHA1 */
char *redisGitSHA1(void);
diff --git a/src/replication.c b/src/replication.c
index 697acbef5..afea75b6a 100644
--- a/src/replication.c
+++ b/src/replication.c
@@ -1444,6 +1444,7 @@ void replicationSetMaster(char *ip, int port) {
server.masterhost = sdsnew(ip);
server.masterport = port;
if (server.master) freeClient(server.master);
+ disconnectAllBlockedClients(); /* Clients blocked in master, now slave. */
disconnectSlaves(); /* Force our slaves to resync with us as well. */
replicationDiscardCachedMaster(); /* Don't try a PSYNC. */
freeReplicationBacklog(); /* Don't allow our chained slaves to PSYNC. */