Initial changes to issue #2427.slave-diskless

author: antirez <antirez@gmail.com> 2015-07-22 12:45:14 +0200
committer: antirez <antirez@gmail.com> 2015-07-22 12:45:14 +0200
commit: 81428a24a884be7693779d31ca4cb141e469f4a7 (patch)
tree: 507cc2e21fec93420b05f1cc45cf73b14f3b8d7f
parent: eb706b42023d22fcc06e4c79973d0c53c09de7ba (diff)
download: redis-slave-diskless.tar.gz
3 files changed, 55 insertions, 11 deletions
diff --git a/redis.conf b/redis.conf
index 92718346e..f9f569121 100644
--- a/redis.conf
+++ b/redis.conf
@@ -318,8 +318,34 @@ repl-diskless-sync no
 # it entirely just set it to 0 seconds and the transfer will start ASAP.
 repl-diskless-sync-delay 5
 
-# Enable diskless replication on slave side. 
-# Load RDB directly from the socket rather than saving it to disk first.
+# Enable diskless replication on slave side.
+#
+# When this option is on, the slave loads the RDB directly from the socket
+# rather than saving it to disk first. However there are data loss risks
+# associated with this feature, so make sure to read the following WARNING
+# section.
+#
+# WARNING: Note that this means that the dataset in the slave gets flushed
+# before the slave is actually sure the RDB transfer is complete, so if the
+# replication link is disconnected after the slave already flushed away its
+# dataset, but before successfully loading the new one, the slave will
+# remain empty (for all the time needed to attempt a new synchornization with
+# the master).
+#
+# This means that you should carefully consider the effects of this feature
+# on slaves that may be promoted to masters:
+#
+# 1) Sentinel checks the disconnection time and the offset of slaves before
+#    promotion. However it is possible that after the check, the slave
+#    attempts to connect with the master again and flushes its dataset.
+#    In order to run Sentinel safely in this setup, make sure to enable
+#    the "slave-protected-restart" option.
+#
+# 2) Redis Cluster slaves will refuse to try to be promoted to masters if
+#    if the dataset was flushed, so this is safe in the context of Redis Cluster.
+#
+# 3) If you are using your own HA setup, make sure to enable slave
+#    "slave-protected-restart".
 repl-diskless-load no
 
 # Slaves send PINGs to server in a predefined interval. It's possible to change
diff --git a/src/replication.c b/src/replication.c
index 90ac9c4c8..6ded6061e 100644
--- a/src/replication.c
+++ b/src/replication.c
@@ -441,9 +441,14 @@ need_full_resync:
  * socket target depending on the configuration, and making sure that
  * the script cache is flushed before to start.
  *
- * Returns REDIS_OK on success or REDIS_ERR otherwise. */
-int startBgsaveForReplication(int use_eof) {
+ * Returns REDIS_OK on success or REDIS_ERR otherwise.
+ *
+ * The caller should pass '1' as the function argument if all the slaves
+ * currently waiting for a BGSAVE all claimed to support the EOF-style
+ * streaming format for RDB transfer. Otherwise it should be '0'. */
+int startBgsaveForReplication(int all_slaves_supprot_eof) {
     int retval;
+    int use_eof = all_slaves_support_eof && server.repl_diskless_sync;
 
     redisLog(REDIS_NOTICE,"Starting BGSAVE for SYNC with target: %s",
         use_eof ? "slaves sockets" : "disk");
@@ -808,7 +813,8 @@ void updateSlavesWaitingBgsave(int bgsaveerr, int type) {
         }
     }
     if (slaves_waiting_eof || slaves_waiting_noneof) {
-        /* if there is at least one slave that doesn't support EOF, we'll start an non-eof replication */
+        /* if there is at least one slave that doesn't support EOF, we'll
+         * start an non-eof replication */
         if (startBgsaveForReplication(slaves_waiting_noneof==0) != REDIS_OK) {
             listIter li;
 
@@ -1054,6 +1060,17 @@ void readSyncBulkPayload(aeEventLoop *el, int fd, void *privdata, int mask) {
             redisLog(REDIS_WARNING,"Failed trying to load the MASTER synchronization DB from disk");
             replicationAbortSyncTransfer();
             rioFreeFd(&rdb, NULL);
+            /* Remove the half-loaded data, and load back the old dataset
+             * if we have persistence turned on.
+             *
+             * TODO:
+             * 1) Actually allow rdbLoadRio() to don't fail with exit().
+             * 2) Load RDB / AOF.
+             *
+             * Right now this code path is not entered when the connection
+             * breaks between master and slave AFAIK.
+             */
+            emptyDb(NULL);
             return;
         }
         if (usemark) {
@@ -1379,7 +1396,7 @@ void syncWithMaster(aeEventLoop *el, int fd, void *privdata, int mask) {
         }
         sdsfree(err);
     }
-    
+
     /* Inform the master that this slave supports EOF marker of diskless-sync */
     {
         err = sendSynchronousCommand(fd,"REPLCONF","eof-supported","yes",
@@ -2174,9 +2191,10 @@ void replicationCron(void) {
 
         if ((slaves_waiting_eof || slaves_waiting_noneof) && max_idle > server.repl_diskless_sync_delay) {
             /* Start a BGSAVE. Usually with socket target, or with disk target
-             * if there was a recent socket -> disk config change. 
-             * if there is at least one slave that doesn't support EOF, we'll start an non-eof replication */
-            if (startBgsaveForReplication(slaves_waiting_noneof==0) == REDIS_OK) {
+             * if there was a recent socket -> disk config change.
+             * if there is at least one slave that doesn't support EOF, we'll
+             * start an non-eof replication */
+            if (startBgsaveForReplication(slaves_waiting_noneof==0) == REDIS_OK){
                 /* It started! We need to change the state of slaves
                  * from WAIT_BGSAVE_START to WAIT_BGSAVE_END in case
                  * the current target is disk. Otherwise it was already done
diff --git a/src/rio.c b/src/rio.c
index 96b7105d6..0df43be55 100644
--- a/src/rio.c
+++ b/src/rio.c
@@ -173,13 +173,13 @@ static size_t rioFdRead(rio *r, void *buf, size_t len) {
     /* if the buffer is too small for the entire request: realloc */
     if (sdslen(r->io.fd.buf) + sdsavail(r->io.fd.buf) < len)
         r->io.fd.buf = sdsMakeRoomFor(r->io.fd.buf, len - sdslen(r->io.fd.buf));
-        
+
     /* if the remaining unused buffer is not large enough: memmove so that we can read the rest */
     if (len > avail && sdsavail(r->io.fd.buf) < len - avail) {
         sdsrange(r->io.fd.buf, r->io.fd.pos, -1);
         r->io.fd.pos = 0;
     }
-    
+
     /* if we don't already have all the data in the sds, read more */
     while (len > sdslen(r->io.fd.buf) - r->io.fd.pos) {
         size_t toread = len - (sdslen(r->io.fd.buf) - r->io.fd.pos);
author	antirez <antirez@gmail.com>	2015-07-22 12:45:14 +0200
committer	antirez <antirez@gmail.com>	2015-07-22 12:45:14 +0200
commit	81428a24a884be7693779d31ca4cb141e469f4a7 (patch)
tree	507cc2e21fec93420b05f1cc45cf73b14f3b8d7f
parent	eb706b42023d22fcc06e4c79973d0c53c09de7ba (diff)
download	redis-slave-diskless.tar.gz