summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorantirez <antirez@gmail.com>2012-08-31 15:32:57 +0200
committerantirez <antirez@gmail.com>2012-09-03 11:48:27 +0200
commitfd2a8951bfebb5ca9584c785b424574088b35b9f (patch)
tree85ce94c5655ce596dbee0fb1db746addd2213753
parent42a239b8887a2f840f4207a43c8277d48df08daf (diff)
downloadredis-fd2a8951bfebb5ca9584c785b424574088b35b9f.tar.gz
Send an async PING before starting replication with master.
During the first synchronization step of the replication process, a Redis slave connects with the master in a non blocking way. However once the connection is established the replication continues sending the REPLCONF command, and sometimes the AUTH command if needed. Those commands are send in a partially blocking way (blocking with timeout in the order of seconds). Because it is common for a blocked master to accept connections even if it is actually not able to reply to the slave requests, it was easy for a slave to block if the master had serious issues, but was still able to accept connections in the listening socket. For this reason we now send an asynchronous PING request just after the non blocking connection ended in a successful way, and wait for the reply before to continue with the replication process. It is very unlikely that a master replying to PING can't reply to the other commands. This solution was proposed by Didier Spezia (Thanks!) so that we don't need to turn all the replication process into a non blocking affair, but still the probability of a slave blocked is minimal even in the event of a failing master. Also we now use getsockopt(SO_ERROR) in order to check errors ASAP in the event handler, instead of waiting for actual I/O to return an error. This commit fixes issue #632.
-rw-r--r--src/redis.h5
-rw-r--r--src/replication.c76
2 files changed, 70 insertions, 11 deletions
diff --git a/src/redis.h b/src/redis.h
index 1dce7b7a6..c3fe9a2db 100644
--- a/src/redis.h
+++ b/src/redis.h
@@ -167,8 +167,9 @@
#define REDIS_REPL_NONE 0 /* No active replication */
#define REDIS_REPL_CONNECT 1 /* Must connect to master */
#define REDIS_REPL_CONNECTING 2 /* Connecting to master */
-#define REDIS_REPL_TRANSFER 3 /* Receiving .rdb from master */
-#define REDIS_REPL_CONNECTED 4 /* Connected to master */
+#define REDIS_REPL_RECEIVE_PONG 3 /* Wait for PING reply */
+#define REDIS_REPL_TRANSFER 4 /* Receiving .rdb from master */
+#define REDIS_REPL_CONNECTED 5 /* Connected to master */
/* Synchronous read timeout - slave side */
#define REDIS_REPL_SYNCIO_TIMEOUT 5
diff --git a/src/replication.c b/src/replication.c
index 543477b66..72b88977a 100644
--- a/src/replication.c
+++ b/src/replication.c
@@ -483,6 +483,8 @@ char *sendSynchronousCommand(int fd, ...) {
void syncWithMaster(aeEventLoop *el, int fd, void *privdata, int mask) {
char tmpfile[256], *err;
int dfd, maxtries = 5;
+ int sockerr = 0;
+ socklen_t errlen = sizeof(sockerr);
REDIS_NOTUSED(el);
REDIS_NOTUSED(privdata);
REDIS_NOTUSED(mask);
@@ -494,11 +496,62 @@ void syncWithMaster(aeEventLoop *el, int fd, void *privdata, int mask) {
return;
}
- redisLog(REDIS_NOTICE,"Non blocking connect for SYNC fired the event.");
- /* This event should only be triggered once since it is used to have a
- * non-blocking connect(2) to the master. It has been triggered when this
- * function is called, so we can delete it. */
- aeDeleteFileEvent(server.el,fd,AE_READABLE|AE_WRITABLE);
+ /* Check for errors in the socket. */
+ if (getsockopt(fd, SOL_SOCKET, SO_ERROR, &sockerr, &errlen) == -1)
+ sockerr = errno;
+ if (sockerr) {
+ aeDeleteFileEvent(server.el,fd,AE_READABLE|AE_WRITABLE);
+ redisLog(REDIS_WARNING,"Error condition on socket for SYNC: %s",
+ strerror(sockerr));
+ goto error;
+ }
+
+ /* If we were connecting, it's time to send a non blocking PING, we want to
+ * make sure the master is able to reply before going into the actual
+ * replication process where we have long timeouts in the order of
+ * seconds (in the meantime the slave would block). */
+ if (server.repl_state == REDIS_REPL_CONNECTING) {
+ redisLog(REDIS_NOTICE,"Non blocking connect for SYNC fired the event.");
+ /* Delete the writable event so that the readable event remains
+ * registered and we can wait for the PONG reply. */
+ aeDeleteFileEvent(server.el,fd,AE_WRITABLE);
+ server.repl_state = REDIS_REPL_RECEIVE_PONG;
+ /* Send the PING, don't check for errors at all, we have the timeout
+ * that will take care about this. */
+ syncWrite(fd,"PING\r\n",6,100);
+ return;
+ }
+
+ /* Receive the PONG command. */
+ if (server.repl_state == REDIS_REPL_RECEIVE_PONG) {
+ char buf[1024];
+
+ /* Delete the readable event, we no longer need it now that there is
+ * the PING reply to read. */
+ aeDeleteFileEvent(server.el,fd,AE_READABLE);
+
+ /* Read the reply with explicit timeout. */
+ buf[0] = '\0';
+ if (syncReadLine(fd,buf,sizeof(buf),
+ server.repl_syncio_timeout*1000) == -1)
+ {
+ redisLog(REDIS_WARNING,
+ "I/O error reading PING reply from master: %s",
+ strerror(errno));
+ goto error;
+ }
+
+ /* We don't care about the reply, it can be +PONG or an error since
+ * the server requires AUTH. As long as it replies correctly, it's
+ * fine from our point of view. */
+ if (buf[0] != '-' && buf[0] != '+') {
+ redisLog(REDIS_WARNING,"Unexpected reply to PING from master.");
+ goto error;
+ } else {
+ redisLog(REDIS_NOTICE,
+ "Master replied to PING, replication can continue...");
+ }
+ }
/* AUTH with the master if required. */
if(server.masterauth) {
@@ -563,8 +616,9 @@ void syncWithMaster(aeEventLoop *el, int fd, void *privdata, int mask) {
return;
error:
- server.repl_state = REDIS_REPL_CONNECT;
close(fd);
+ server.repl_transfer_s = -1;
+ server.repl_state = REDIS_REPL_CONNECT;
return;
}
@@ -597,7 +651,8 @@ int connectWithMaster(void) {
void undoConnectWithMaster(void) {
int fd = server.repl_transfer_s;
- redisAssert(server.repl_state == REDIS_REPL_CONNECTING);
+ redisAssert(server.repl_state == REDIS_REPL_CONNECTING ||
+ server.repl_state == REDIS_REPL_RECEIVE_PONG);
aeDeleteFileEvent(server.el,fd,AE_READABLE|AE_WRITABLE);
close(fd);
server.repl_transfer_s = -1;
@@ -613,7 +668,8 @@ void slaveofCommand(redisClient *c) {
if (server.master) freeClient(server.master);
if (server.repl_state == REDIS_REPL_TRANSFER)
replicationAbortSyncTransfer();
- else if (server.repl_state == REDIS_REPL_CONNECTING)
+ else if (server.repl_state == REDIS_REPL_CONNECTING ||
+ server.repl_state == REDIS_REPL_RECEIVE_PONG)
undoConnectWithMaster();
server.repl_state = REDIS_REPL_NONE;
redisLog(REDIS_NOTICE,"MASTER MODE enabled (user request)");
@@ -651,7 +707,9 @@ void slaveofCommand(redisClient *c) {
void replicationCron(void) {
/* Non blocking connection timeout? */
- if (server.masterhost && server.repl_state == REDIS_REPL_CONNECTING &&
+ if (server.masterhost &&
+ (server.repl_state == REDIS_REPL_CONNECTING ||
+ server.repl_state == REDIS_REPL_RECEIVE_PONG) &&
(time(NULL)-server.repl_transfer_lastio) > server.repl_timeout)
{
redisLog(REDIS_WARNING,"Timeout connecting to the MASTER...");