summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorantirez <antirez@gmail.com>2015-03-24 16:00:09 +0100
committerantirez <antirez@gmail.com>2015-03-24 16:14:49 +0100
commit96aa610666c154150e14c4ddbce2945b73cd59cf (patch)
treed523b6edacd4b1c3df3751231fc69f6f1dff6f49
parent1f5e7b88637900005bbcee3304915075dd21673d (diff)
downloadredis-96aa610666c154150e14c4ddbce2945b73cd59cf.tar.gz
Replication: disconnect blocked clients when switching to slave role.
Bug as old as Redis and blocking operations. It's hard to trigger since only happens on instance role switch, but the results are quite bad since an inconsistency between master and slave is created. How to trigger the bug is a good description of the bug itself. 1. Client does "BLPOP mylist 0" in master. 2. Master is turned into slave, that replicates from New-Master. 3. Client does "LPUSH mylist foo" in New-Master. 4. New-Master propagates write to slave. 5. Slave receives the LPUSH, the blocked client get served. Now Master "mylist" key has "foo", Slave "mylist" key is empty. Highlights: * At step "2" above, the client remains attached, basically escaping any check performed during command dispatch: read only slave, in that case. * At step "5" the slave (that was the master), serves the blocked client consuming a list element, which is not consumed on the master side. This scenario is technically likely to happen during failovers, however since Redis Sentinel already disconnects clients using the CLIENT command when changing the role of the instance, the bug is avoided in Sentinel deployments. Closes #2473.
-rw-r--r--src/replication.c26
1 files changed, 26 insertions, 0 deletions
diff --git a/src/replication.c b/src/replication.c
index e6e477896..13938a1f3 100644
--- a/src/replication.c
+++ b/src/replication.c
@@ -1425,12 +1425,38 @@ int cancelReplicationHandshake(void) {
return 1;
}
+/* Mass-unblock clients because something changed in the instance that makes
+ * blocking no longer safe. For example clients blocked in list operations
+ * in an instance which turns from master to slave is unsafe, so this function
+ * is called when a master turns into a slave.
+ *
+ * The semantics is to send an -UNBLOCKED error to the client, disconnecting
+ * it at the same time. */
+void disconnectAllBlockedClients(void) {
+ listNode *ln;
+ listIter li;
+
+ listRewind(server.clients,&li);
+ while((ln = listNext(&li))) {
+ redisClient *c = listNodeValue(ln);
+
+ if (c->flags & REDIS_BLOCKED) {
+ addReplySds(c,sdsnew(
+ "-UNBLOCKED force unblock from blocking operation, "
+ "instance state changed (master -> slave?)\r\n"));
+ unblockClientWaitingData(c);
+ c->flags |= REDIS_CLOSE_AFTER_REPLY;
+ }
+ }
+}
+
/* Set replication to the specified master address and port. */
void replicationSetMaster(char *ip, int port) {
sdsfree(server.masterhost);
server.masterhost = sdsdup(ip);
server.masterport = port;
if (server.master) freeClient(server.master);
+ disconnectAllBlockedClients(); /* Clients blocked in master, now slave. */
disconnectSlaves(); /* Force our slaves to resync with us as well. */
replicationDiscardCachedMaster(); /* Don't try a PSYNC. */
freeReplicationBacklog(); /* Don't allow our chained slaves to PSYNC. */