summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorantirez <antirez@gmail.com>2015-07-22 12:45:14 +0200
committerantirez <antirez@gmail.com>2015-07-22 12:45:14 +0200
commit81428a24a884be7693779d31ca4cb141e469f4a7 (patch)
tree507cc2e21fec93420b05f1cc45cf73b14f3b8d7f
parenteb706b42023d22fcc06e4c79973d0c53c09de7ba (diff)
downloadredis-slave-diskless.tar.gz
Initial changes to issue #2427.slave-diskless
-rw-r--r--redis.conf30
-rw-r--r--src/replication.c32
-rw-r--r--src/rio.c4
3 files changed, 55 insertions, 11 deletions
diff --git a/redis.conf b/redis.conf
index 92718346e..f9f569121 100644
--- a/redis.conf
+++ b/redis.conf
@@ -318,8 +318,34 @@ repl-diskless-sync no
# it entirely just set it to 0 seconds and the transfer will start ASAP.
repl-diskless-sync-delay 5
-# Enable diskless replication on slave side.
-# Load RDB directly from the socket rather than saving it to disk first.
+# Enable diskless replication on slave side.
+#
+# When this option is on, the slave loads the RDB directly from the socket
+# rather than saving it to disk first. However there are data loss risks
+# associated with this feature, so make sure to read the following WARNING
+# section.
+#
+# WARNING: Note that this means that the dataset in the slave gets flushed
+# before the slave is actually sure the RDB transfer is complete, so if the
+# replication link is disconnected after the slave already flushed away its
+# dataset, but before successfully loading the new one, the slave will
+# remain empty (for all the time needed to attempt a new synchornization with
+# the master).
+#
+# This means that you should carefully consider the effects of this feature
+# on slaves that may be promoted to masters:
+#
+# 1) Sentinel checks the disconnection time and the offset of slaves before
+# promotion. However it is possible that after the check, the slave
+# attempts to connect with the master again and flushes its dataset.
+# In order to run Sentinel safely in this setup, make sure to enable
+# the "slave-protected-restart" option.
+#
+# 2) Redis Cluster slaves will refuse to try to be promoted to masters if
+# if the dataset was flushed, so this is safe in the context of Redis Cluster.
+#
+# 3) If you are using your own HA setup, make sure to enable slave
+# "slave-protected-restart".
repl-diskless-load no
# Slaves send PINGs to server in a predefined interval. It's possible to change
diff --git a/src/replication.c b/src/replication.c
index 90ac9c4c8..6ded6061e 100644
--- a/src/replication.c
+++ b/src/replication.c
@@ -441,9 +441,14 @@ need_full_resync:
* socket target depending on the configuration, and making sure that
* the script cache is flushed before to start.
*
- * Returns REDIS_OK on success or REDIS_ERR otherwise. */
-int startBgsaveForReplication(int use_eof) {
+ * Returns REDIS_OK on success or REDIS_ERR otherwise.
+ *
+ * The caller should pass '1' as the function argument if all the slaves
+ * currently waiting for a BGSAVE all claimed to support the EOF-style
+ * streaming format for RDB transfer. Otherwise it should be '0'. */
+int startBgsaveForReplication(int all_slaves_supprot_eof) {
int retval;
+ int use_eof = all_slaves_support_eof && server.repl_diskless_sync;
redisLog(REDIS_NOTICE,"Starting BGSAVE for SYNC with target: %s",
use_eof ? "slaves sockets" : "disk");
@@ -808,7 +813,8 @@ void updateSlavesWaitingBgsave(int bgsaveerr, int type) {
}
}
if (slaves_waiting_eof || slaves_waiting_noneof) {
- /* if there is at least one slave that doesn't support EOF, we'll start an non-eof replication */
+ /* if there is at least one slave that doesn't support EOF, we'll
+ * start an non-eof replication */
if (startBgsaveForReplication(slaves_waiting_noneof==0) != REDIS_OK) {
listIter li;
@@ -1054,6 +1060,17 @@ void readSyncBulkPayload(aeEventLoop *el, int fd, void *privdata, int mask) {
redisLog(REDIS_WARNING,"Failed trying to load the MASTER synchronization DB from disk");
replicationAbortSyncTransfer();
rioFreeFd(&rdb, NULL);
+ /* Remove the half-loaded data, and load back the old dataset
+ * if we have persistence turned on.
+ *
+ * TODO:
+ * 1) Actually allow rdbLoadRio() to don't fail with exit().
+ * 2) Load RDB / AOF.
+ *
+ * Right now this code path is not entered when the connection
+ * breaks between master and slave AFAIK.
+ */
+ emptyDb(NULL);
return;
}
if (usemark) {
@@ -1379,7 +1396,7 @@ void syncWithMaster(aeEventLoop *el, int fd, void *privdata, int mask) {
}
sdsfree(err);
}
-
+
/* Inform the master that this slave supports EOF marker of diskless-sync */
{
err = sendSynchronousCommand(fd,"REPLCONF","eof-supported","yes",
@@ -2174,9 +2191,10 @@ void replicationCron(void) {
if ((slaves_waiting_eof || slaves_waiting_noneof) && max_idle > server.repl_diskless_sync_delay) {
/* Start a BGSAVE. Usually with socket target, or with disk target
- * if there was a recent socket -> disk config change.
- * if there is at least one slave that doesn't support EOF, we'll start an non-eof replication */
- if (startBgsaveForReplication(slaves_waiting_noneof==0) == REDIS_OK) {
+ * if there was a recent socket -> disk config change.
+ * if there is at least one slave that doesn't support EOF, we'll
+ * start an non-eof replication */
+ if (startBgsaveForReplication(slaves_waiting_noneof==0) == REDIS_OK){
/* It started! We need to change the state of slaves
* from WAIT_BGSAVE_START to WAIT_BGSAVE_END in case
* the current target is disk. Otherwise it was already done
diff --git a/src/rio.c b/src/rio.c
index 96b7105d6..0df43be55 100644
--- a/src/rio.c
+++ b/src/rio.c
@@ -173,13 +173,13 @@ static size_t rioFdRead(rio *r, void *buf, size_t len) {
/* if the buffer is too small for the entire request: realloc */
if (sdslen(r->io.fd.buf) + sdsavail(r->io.fd.buf) < len)
r->io.fd.buf = sdsMakeRoomFor(r->io.fd.buf, len - sdslen(r->io.fd.buf));
-
+
/* if the remaining unused buffer is not large enough: memmove so that we can read the rest */
if (len > avail && sdsavail(r->io.fd.buf) < len - avail) {
sdsrange(r->io.fd.buf, r->io.fd.pos, -1);
r->io.fd.pos = 0;
}
-
+
/* if we don't already have all the data in the sds, read more */
while (len > sdslen(r->io.fd.buf) - r->io.fd.pos) {
size_t toread = len - (sdslen(r->io.fd.buf) - r->io.fd.pos);