diff options
author | antirez <antirez@gmail.com> | 2015-07-22 12:45:14 +0200 |
---|---|---|
committer | antirez <antirez@gmail.com> | 2015-07-22 12:45:14 +0200 |
commit | 81428a24a884be7693779d31ca4cb141e469f4a7 (patch) | |
tree | 507cc2e21fec93420b05f1cc45cf73b14f3b8d7f | |
parent | eb706b42023d22fcc06e4c79973d0c53c09de7ba (diff) | |
download | redis-slave-diskless.tar.gz |
Initial changes to issue #2427.slave-diskless
-rw-r--r-- | redis.conf | 30 | ||||
-rw-r--r-- | src/replication.c | 32 | ||||
-rw-r--r-- | src/rio.c | 4 |
3 files changed, 55 insertions, 11 deletions
diff --git a/redis.conf b/redis.conf index 92718346e..f9f569121 100644 --- a/redis.conf +++ b/redis.conf @@ -318,8 +318,34 @@ repl-diskless-sync no # it entirely just set it to 0 seconds and the transfer will start ASAP. repl-diskless-sync-delay 5 -# Enable diskless replication on slave side. -# Load RDB directly from the socket rather than saving it to disk first. +# Enable diskless replication on slave side. +# +# When this option is on, the slave loads the RDB directly from the socket +# rather than saving it to disk first. However there are data loss risks +# associated with this feature, so make sure to read the following WARNING +# section. +# +# WARNING: Note that this means that the dataset in the slave gets flushed +# before the slave is actually sure the RDB transfer is complete, so if the +# replication link is disconnected after the slave already flushed away its +# dataset, but before successfully loading the new one, the slave will +# remain empty (for all the time needed to attempt a new synchornization with +# the master). +# +# This means that you should carefully consider the effects of this feature +# on slaves that may be promoted to masters: +# +# 1) Sentinel checks the disconnection time and the offset of slaves before +# promotion. However it is possible that after the check, the slave +# attempts to connect with the master again and flushes its dataset. +# In order to run Sentinel safely in this setup, make sure to enable +# the "slave-protected-restart" option. +# +# 2) Redis Cluster slaves will refuse to try to be promoted to masters if +# if the dataset was flushed, so this is safe in the context of Redis Cluster. +# +# 3) If you are using your own HA setup, make sure to enable slave +# "slave-protected-restart". repl-diskless-load no # Slaves send PINGs to server in a predefined interval. It's possible to change diff --git a/src/replication.c b/src/replication.c index 90ac9c4c8..6ded6061e 100644 --- a/src/replication.c +++ b/src/replication.c @@ -441,9 +441,14 @@ need_full_resync: * socket target depending on the configuration, and making sure that * the script cache is flushed before to start. * - * Returns REDIS_OK on success or REDIS_ERR otherwise. */ -int startBgsaveForReplication(int use_eof) { + * Returns REDIS_OK on success or REDIS_ERR otherwise. + * + * The caller should pass '1' as the function argument if all the slaves + * currently waiting for a BGSAVE all claimed to support the EOF-style + * streaming format for RDB transfer. Otherwise it should be '0'. */ +int startBgsaveForReplication(int all_slaves_supprot_eof) { int retval; + int use_eof = all_slaves_support_eof && server.repl_diskless_sync; redisLog(REDIS_NOTICE,"Starting BGSAVE for SYNC with target: %s", use_eof ? "slaves sockets" : "disk"); @@ -808,7 +813,8 @@ void updateSlavesWaitingBgsave(int bgsaveerr, int type) { } } if (slaves_waiting_eof || slaves_waiting_noneof) { - /* if there is at least one slave that doesn't support EOF, we'll start an non-eof replication */ + /* if there is at least one slave that doesn't support EOF, we'll + * start an non-eof replication */ if (startBgsaveForReplication(slaves_waiting_noneof==0) != REDIS_OK) { listIter li; @@ -1054,6 +1060,17 @@ void readSyncBulkPayload(aeEventLoop *el, int fd, void *privdata, int mask) { redisLog(REDIS_WARNING,"Failed trying to load the MASTER synchronization DB from disk"); replicationAbortSyncTransfer(); rioFreeFd(&rdb, NULL); + /* Remove the half-loaded data, and load back the old dataset + * if we have persistence turned on. + * + * TODO: + * 1) Actually allow rdbLoadRio() to don't fail with exit(). + * 2) Load RDB / AOF. + * + * Right now this code path is not entered when the connection + * breaks between master and slave AFAIK. + */ + emptyDb(NULL); return; } if (usemark) { @@ -1379,7 +1396,7 @@ void syncWithMaster(aeEventLoop *el, int fd, void *privdata, int mask) { } sdsfree(err); } - + /* Inform the master that this slave supports EOF marker of diskless-sync */ { err = sendSynchronousCommand(fd,"REPLCONF","eof-supported","yes", @@ -2174,9 +2191,10 @@ void replicationCron(void) { if ((slaves_waiting_eof || slaves_waiting_noneof) && max_idle > server.repl_diskless_sync_delay) { /* Start a BGSAVE. Usually with socket target, or with disk target - * if there was a recent socket -> disk config change. - * if there is at least one slave that doesn't support EOF, we'll start an non-eof replication */ - if (startBgsaveForReplication(slaves_waiting_noneof==0) == REDIS_OK) { + * if there was a recent socket -> disk config change. + * if there is at least one slave that doesn't support EOF, we'll + * start an non-eof replication */ + if (startBgsaveForReplication(slaves_waiting_noneof==0) == REDIS_OK){ /* It started! We need to change the state of slaves * from WAIT_BGSAVE_START to WAIT_BGSAVE_END in case * the current target is disk. Otherwise it was already done @@ -173,13 +173,13 @@ static size_t rioFdRead(rio *r, void *buf, size_t len) { /* if the buffer is too small for the entire request: realloc */ if (sdslen(r->io.fd.buf) + sdsavail(r->io.fd.buf) < len) r->io.fd.buf = sdsMakeRoomFor(r->io.fd.buf, len - sdslen(r->io.fd.buf)); - + /* if the remaining unused buffer is not large enough: memmove so that we can read the rest */ if (len > avail && sdsavail(r->io.fd.buf) < len - avail) { sdsrange(r->io.fd.buf, r->io.fd.pos, -1); r->io.fd.pos = 0; } - + /* if we don't already have all the data in the sds, read more */ while (len > sdslen(r->io.fd.buf) - r->io.fd.pos) { size_t toread = len - (sdslen(r->io.fd.buf) - r->io.fd.pos); |