summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorOran Agra <oran@redislabs.com>2021-01-12 10:14:04 +0200
committerOran Agra <oran@redislabs.com>2021-01-12 10:14:04 +0200
commitb34fc03e5e45c40053bea491dcdee48940c34b0e (patch)
tree9a9f0776696b373ca0a6802a7118323371d4d11a
parentb8c67ce41b51247c02a639929e4fab20189afa1e (diff)
parent4f8458d8d6c59a47c6ffbb1ac88c69854ccf1592 (diff)
downloadredis-b34fc03e5e45c40053bea491dcdee48940c34b0e.tar.gz
Merge unstable into 6.2
-rw-r--r--deps/hiredis/async_private.h2
-rw-r--r--redis.conf18
-rw-r--r--src/Makefile22
-rw-r--r--src/acl.c46
-rw-r--r--src/aof.c80
-rw-r--r--src/bio.c61
-rw-r--r--src/bio.h6
-rw-r--r--src/bitops.c8
-rw-r--r--src/blocked.c25
-rw-r--r--src/childinfo.c88
-rw-r--r--src/cluster.c120
-rw-r--r--src/cluster.h2
-rw-r--r--src/config.c124
-rw-r--r--src/crc64.c4
-rw-r--r--src/crcspeed.c3
-rw-r--r--src/db.c53
-rw-r--r--src/debug.c123
-rw-r--r--src/debugmacro.h2
-rw-r--r--src/defrag.c1
-rw-r--r--src/dict.c25
-rw-r--r--src/dict.h14
-rw-r--r--src/evict.c2
-rw-r--r--src/expire.c2
-rw-r--r--src/geo.c95
-rw-r--r--src/hyperloglog.c23
-rw-r--r--src/intset.c6
-rw-r--r--src/latency.c24
-rw-r--r--src/lazyfree.c81
-rw-r--r--src/module.c84
-rw-r--r--src/modules/testmodule.c2
-rw-r--r--src/mt19937-64.c187
-rw-r--r--src/mt19937-64.h87
-rw-r--r--src/multi.c71
-rw-r--r--src/networking.c418
-rw-r--r--src/object.c42
-rw-r--r--src/pubsub.c10
-rw-r--r--src/quicklist.c25
-rw-r--r--src/rax.c4
-rw-r--r--src/rdb.c59
-rw-r--r--src/redis-benchmark.c6
-rw-r--r--src/redis-check-rdb.c9
-rw-r--r--src/redis-cli.c11
-rw-r--r--src/redismodule.h2
-rw-r--r--src/replication.c322
-rw-r--r--src/scripting.c40
-rw-r--r--src/sds.c50
-rw-r--r--src/sentinel.c79
-rw-r--r--src/server.c490
-rw-r--r--src/server.h167
-rw-r--r--src/slowlog.c14
-rw-r--r--src/sort.c6
-rw-r--r--src/t_hash.c2
-rw-r--r--src/t_list.c240
-rw-r--r--src/t_set.c8
-rw-r--r--src/t_stream.c806
-rw-r--r--src/t_string.c6
-rw-r--r--src/t_zset.c681
-rw-r--r--src/testhelp.h4
-rw-r--r--src/tracking.c78
-rw-r--r--src/ziplist.c12
-rw-r--r--src/zmalloc.c19
-rw-r--r--tests/cluster/cluster.tcl5
-rw-r--r--tests/cluster/tests/16-transactions-on-replica.tcl2
-rw-r--r--tests/cluster/tests/17-diskless-load-swapdb.tcl12
-rw-r--r--tests/cluster/tests/18-info.tcl45
-rw-r--r--tests/integration/rdb.tcl91
-rw-r--r--tests/modules/propagate.c44
-rw-r--r--tests/support/redis.tcl11
-rw-r--r--tests/support/server.tcl16
-rw-r--r--tests/support/test.tcl48
-rw-r--r--tests/support/util.tcl6
-rw-r--r--tests/test_helper.tcl1
-rw-r--r--tests/unit/auth.tcl41
-rw-r--r--tests/unit/geo.tcl56
-rw-r--r--tests/unit/info.tcl153
-rw-r--r--tests/unit/memefficiency.tcl2
-rw-r--r--tests/unit/moduleapi/propagate.tcl114
-rw-r--r--tests/unit/multi.tcl110
-rw-r--r--tests/unit/pause.tcl200
-rw-r--r--tests/unit/scripting.tcl24
-rw-r--r--tests/unit/tracking.tcl118
-rw-r--r--tests/unit/type/list.tcl17
-rw-r--r--tests/unit/type/stream-cgroups.tcl197
-rw-r--r--tests/unit/type/stream.tcl140
-rw-r--r--tests/unit/type/zset.tcl82
-rw-r--r--tests/unit/wait.tcl21
86 files changed, 5000 insertions, 1657 deletions
diff --git a/deps/hiredis/async_private.h b/deps/hiredis/async_private.h
index b9d23fffd..ea0558d42 100644
--- a/deps/hiredis/async_private.h
+++ b/deps/hiredis/async_private.h
@@ -51,7 +51,7 @@
#define _EL_CLEANUP(ctx) do { \
if ((ctx)->ev.cleanup) (ctx)->ev.cleanup((ctx)->ev.data); \
ctx->ev.cleanup = NULL; \
- } while(0);
+ } while(0)
static inline void refreshTimeout(redisAsyncContext *ctx) {
#define REDIS_TIMER_ISSET(tvp) \
diff --git a/redis.conf b/redis.conf
index 849f171bc..a5062fda9 100644
--- a/redis.conf
+++ b/redis.conf
@@ -196,9 +196,12 @@ tcp-keepalive 300
#
# tls-cluster yes
-# Explicitly specify TLS versions to support. Allowed values are case insensitive
-# and include "TLSv1", "TLSv1.1", "TLSv1.2", "TLSv1.3" (OpenSSL >= 1.1.1) or
-# any combination. To enable only TLSv1.2 and TLSv1.3, use:
+# By default, only TLSv1.2 and TLSv1.3 are enabled and it is highly recommended
+# that older formally deprecated versions are kept disabled to reduce the attack surface.
+# You can explicitly specify TLS versions to support.
+# Allowed values are case insensitive and include "TLSv1", "TLSv1.1", "TLSv1.2",
+# "TLSv1.3" (OpenSSL >= 1.1.1) or any combination.
+# To enable only TLSv1.2 and TLSv1.3, use:
#
# tls-protocols "TLSv1.2 TLSv1.3"
@@ -688,7 +691,7 @@ replica-priority 100
# Redis implements server assisted support for client side caching of values.
# This is implemented using an invalidation table that remembers, using
-# 16 millions of slots, what clients may have certain subsets of keys. In turn
+# a radix key indexed by key name, what clients have which keys. In turn
# this is used in order to send invalidation messages to clients. Please
# check this page to understand more about the feature:
#
@@ -1973,3 +1976,10 @@ jemalloc-bg-thread yes
#
# Set bgsave child process to cpu affinity 1,10,11
# bgsave_cpulist 1,10-11
+
+# In some cases redis will emit warnings and even refuse to start if it detects
+# that the system is in bad state, it is possible to suppress these warnings
+# by setting the following config which takes a space delimited list of warnings
+# to suppress
+#
+# ignore-warnings ARM64-COW-BUG
diff --git a/src/Makefile b/src/Makefile
index 0329da8c9..3bc9f11c0 100644
--- a/src/Makefile
+++ b/src/Makefile
@@ -115,8 +115,18 @@ else
ifeq ($(uname_S),Darwin)
# Darwin
FINAL_LIBS+= -ldl
+ # Homebrew's OpenSSL is not linked to /usr/local to avoid
+ # conflicts with the system's LibreSSL installation so it
+ # must be referenced explicitly during build.
+ifeq ($(uname_M),arm64)
+ # Homebrew arm64 uses /opt/homebrew as HOMEBREW_PREFIX
+ OPENSSL_CFLAGS=-I/opt/homebrew/opt/openssl/include
+ OPENSSL_LDFLAGS=-L/opt/homebrew/opt/openssl/lib
+else
+ # Homebrew x86/ppc uses /usr/local as HOMEBREW_PREFIX
OPENSSL_CFLAGS=-I/usr/local/opt/openssl/include
OPENSSL_LDFLAGS=-L/usr/local/opt/openssl/lib
+endif
else
ifeq ($(uname_S),AIX)
# AIX
@@ -260,11 +270,11 @@ endif
REDIS_SERVER_NAME=redis-server$(PROG_SUFFIX)
REDIS_SENTINEL_NAME=redis-sentinel$(PROG_SUFFIX)
-REDIS_SERVER_OBJ=adlist.o quicklist.o ae.o anet.o dict.o server.o sds.o zmalloc.o lzf_c.o lzf_d.o pqsort.o zipmap.o sha1.o ziplist.o release.o networking.o util.o object.o db.o replication.o rdb.o t_string.o t_list.o t_set.o t_zset.o t_hash.o config.o aof.o pubsub.o multi.o debug.o sort.o intset.o syncio.o cluster.o crc16.o endianconv.o slowlog.o scripting.o bio.o rio.o rand.o memtest.o crcspeed.o crc64.o bitops.o sentinel.o notify.o setproctitle.o blocked.o hyperloglog.o latency.o sparkline.o redis-check-rdb.o redis-check-aof.o geo.o lazyfree.o module.o evict.o expire.o geohash.o geohash_helper.o childinfo.o defrag.o siphash.o rax.o t_stream.o listpack.o localtime.o lolwut.o lolwut5.o lolwut6.o acl.o gopher.o tracking.o connection.o tls.o sha256.o timeout.o setcpuaffinity.o monotonic.o
+REDIS_SERVER_OBJ=adlist.o quicklist.o ae.o anet.o dict.o server.o sds.o zmalloc.o lzf_c.o lzf_d.o pqsort.o zipmap.o sha1.o ziplist.o release.o networking.o util.o object.o db.o replication.o rdb.o t_string.o t_list.o t_set.o t_zset.o t_hash.o config.o aof.o pubsub.o multi.o debug.o sort.o intset.o syncio.o cluster.o crc16.o endianconv.o slowlog.o scripting.o bio.o rio.o rand.o memtest.o crcspeed.o crc64.o bitops.o sentinel.o notify.o setproctitle.o blocked.o hyperloglog.o latency.o sparkline.o redis-check-rdb.o redis-check-aof.o geo.o lazyfree.o module.o evict.o expire.o geohash.o geohash_helper.o childinfo.o defrag.o siphash.o rax.o t_stream.o listpack.o localtime.o lolwut.o lolwut5.o lolwut6.o acl.o gopher.o tracking.o connection.o tls.o sha256.o timeout.o setcpuaffinity.o monotonic.o mt19937-64.o
REDIS_CLI_NAME=redis-cli$(PROG_SUFFIX)
-REDIS_CLI_OBJ=anet.o adlist.o dict.o redis-cli.o zmalloc.o release.o ae.o crcspeed.o crc64.o siphash.o crc16.o monotonic.o cli_common.o
+REDIS_CLI_OBJ=anet.o adlist.o dict.o redis-cli.o zmalloc.o release.o ae.o crcspeed.o crc64.o siphash.o crc16.o monotonic.o cli_common.o mt19937-64.o
REDIS_BENCHMARK_NAME=redis-benchmark$(PROG_SUFFIX)
-REDIS_BENCHMARK_OBJ=ae.o anet.o redis-benchmark.o adlist.o dict.o zmalloc.o release.o crcspeed.o crc64.o siphash.o crc16.o monotonic.o cli_common.o
+REDIS_BENCHMARK_OBJ=ae.o anet.o redis-benchmark.o adlist.o dict.o zmalloc.o release.o crcspeed.o crc64.o siphash.o crc16.o monotonic.o cli_common.o mt19937-64.o
REDIS_CHECK_RDB_NAME=redis-check-rdb$(PROG_SUFFIX)
REDIS_CHECK_AOF_NAME=redis-check-aof$(PROG_SUFFIX)
@@ -336,7 +346,7 @@ $(REDIS_CLI_NAME): $(REDIS_CLI_OBJ)
$(REDIS_BENCHMARK_NAME): $(REDIS_BENCHMARK_OBJ)
$(REDIS_LD) -o $@ $^ ../deps/hiredis/libhiredis.a ../deps/hdr_histogram/hdr_histogram.o $(FINAL_LIBS)
-dict-benchmark: dict.c zmalloc.c sds.c siphash.c
+dict-benchmark: dict.c zmalloc.c sds.c siphash.c mt19937-64.c
$(REDIS_CC) $(FINAL_CFLAGS) $^ -D DICT_BENCHMARK_MAIN -o $@ $(FINAL_LIBS)
DEP = $(REDIS_SERVER_OBJ:%.o=%.d) $(REDIS_CLI_OBJ:%.o=%.d) $(REDIS_BENCHMARK_OBJ:%.o=%.d)
@@ -405,8 +415,8 @@ install: all
$(REDIS_INSTALL) $(REDIS_SERVER_NAME) $(INSTALL_BIN)
$(REDIS_INSTALL) $(REDIS_BENCHMARK_NAME) $(INSTALL_BIN)
$(REDIS_INSTALL) $(REDIS_CLI_NAME) $(INSTALL_BIN)
- $(REDIS_INSTALL) $(REDIS_CHECK_RDB_NAME) $(INSTALL_BIN)
- $(REDIS_INSTALL) $(REDIS_CHECK_AOF_NAME) $(INSTALL_BIN)
+ @ln -sf $(REDIS_SERVER_NAME) $(INSTALL_BIN)/$(REDIS_CHECK_RDB_NAME)
+ @ln -sf $(REDIS_SERVER_NAME) $(INSTALL_BIN)/$(REDIS_CHECK_AOF_NAME)
@ln -sf $(REDIS_SERVER_NAME) $(INSTALL_BIN)/$(REDIS_SENTINEL_NAME)
uninstall:
diff --git a/src/acl.c b/src/acl.c
index a1a7c4237..14d023cc3 100644
--- a/src/acl.c
+++ b/src/acl.c
@@ -174,15 +174,15 @@ sds ACLHashPassword(unsigned char *cleartext, size_t len) {
return sdsnewlen(hex,HASH_PASSWORD_LEN);
}
-/* Given a hash and the hash length, returns C_OK if it is a valid password
+/* Given a hash and the hash length, returns C_OK if it is a valid password
* hash, or C_ERR otherwise. */
int ACLCheckPasswordHash(unsigned char *hash, int hashlen) {
if (hashlen != HASH_PASSWORD_LEN) {
- return C_ERR;
+ return C_ERR;
}
-
+
/* Password hashes can only be characters that represent
- * hexadecimal values, which are numbers and lowercase
+ * hexadecimal values, which are numbers and lowercase
* characters 'a' through 'f'. */
for(int i = 0; i < HASH_PASSWORD_LEN; i++) {
char c = hash[i];
@@ -2184,18 +2184,30 @@ void aclCommand(client *c) {
}
} else if (c->argc == 2 && !strcasecmp(sub,"help")) {
const char *help[] = {
-"LOAD -- Reload users from the ACL file.",
-"SAVE -- Save the current config to the ACL file.",
-"LIST -- Show user details in config file format.",
-"USERS -- List all the registered usernames.",
-"SETUSER <username> [attribs ...] -- Create or modify a user.",
-"GETUSER <username> -- Get the user details.",
-"DELUSER <username> [...] -- Delete a list of users.",
-"CAT -- List available categories.",
-"CAT <category> -- List commands inside category.",
-"GENPASS [<bits>] -- Generate a secure user password.",
-"WHOAMI -- Return the current connection username.",
-"LOG [<count> | RESET] -- Show the ACL log entries.",
+"CAT [<category>]",
+" List all commands that belong to <category>, or all command categories",
+" when no category is specified.",
+"DELUSER <username> [<username> ...]",
+" Delete a list of users.",
+"GETUSER <username>",
+" Get the user's details.",
+"GENPASS [<bits>]",
+" Generate a secure 256-bit user password. The optional `bits` argument can",
+" be used to specify a different size.",
+"LIST",
+" Show users details in config file format.",
+"LOAD",
+" Reload users from the ACL file.",
+"LOG [<count> | RESET]",
+" Show the ACL log entries.",
+"SAVE",
+" Save the current config to the ACL file.",
+"SETUSER <username> <attribute> [<attribute> ...]",
+" Create or modify a user with the specified attributes.",
+"USERS",
+" List all the registered usernames.",
+"WHOAMI",
+" Return the current connection username.",
NULL
};
addReplyHelp(c,help);
@@ -2224,7 +2236,7 @@ void addReplyCommandCategories(client *c, struct redisCommand *cmd) {
void authCommand(client *c) {
/* Only two or three argument forms are allowed. */
if (c->argc > 3) {
- addReply(c,shared.syntaxerr);
+ addReplyErrorObject(c,shared.syntaxerr);
return;
}
diff --git a/src/aof.c b/src/aof.c
index 79b2f1284..d3191277f 100644
--- a/src/aof.c
+++ b/src/aof.c
@@ -206,29 +206,27 @@ int aofFsyncInProgress(void) {
/* Starts a background task that performs fsync() against the specified
* file descriptor (the one of the AOF file) in another thread. */
void aof_background_fsync(int fd) {
- bioCreateBackgroundJob(BIO_AOF_FSYNC,(void*)(long)fd,NULL,NULL);
+ bioCreateFsyncJob(fd);
}
/* Kills an AOFRW child process if exists */
void killAppendOnlyChild(void) {
int statloc;
/* No AOFRW child? return. */
- if (server.aof_child_pid == -1) return;
+ if (server.child_type != CHILD_TYPE_AOF) return;
/* Kill AOFRW child, wait for child exit. */
serverLog(LL_NOTICE,"Killing running AOF rewrite child: %ld",
- (long) server.aof_child_pid);
- if (kill(server.aof_child_pid,SIGUSR1) != -1) {
- while(wait3(&statloc,0,NULL) != server.aof_child_pid);
+ (long) server.child_pid);
+ if (kill(server.child_pid,SIGUSR1) != -1) {
+ while(wait3(&statloc,0,NULL) != server.child_pid);
}
/* Reset the buffer accumulating changes while the child saves. */
aofRewriteBufferReset();
- aofRemoveTempFile(server.aof_child_pid);
- server.aof_child_pid = -1;
+ aofRemoveTempFile(server.child_pid);
+ resetChildState();
server.aof_rewrite_time_start = -1;
/* Close pipes used for IPC between the two processes. */
aofClosePipes();
- closeChildInfoPipe();
- updateDictResizePolicy();
}
/* Called when the user switches from "appendonly yes" to "appendonly no"
@@ -265,14 +263,14 @@ int startAppendOnly(void) {
strerror(errno));
return C_ERR;
}
- if (hasActiveChildProcess() && server.aof_child_pid == -1) {
+ if (hasActiveChildProcess() && server.child_type != CHILD_TYPE_AOF) {
server.aof_rewrite_scheduled = 1;
serverLog(LL_WARNING,"AOF was enabled but there is already another background operation. An AOF background was scheduled to start when possible.");
} else {
/* If there is a pending AOF rewrite, we need to switch it off and
* start a new one: the old one cannot be reused because it is not
* accumulating the AOF buffer. */
- if (server.aof_child_pid != -1) {
+ if (server.child_type == CHILD_TYPE_AOF) {
serverLog(LL_WARNING,"AOF was enabled but there is already an AOF rewriting in background. Stopping background AOF and starting a rewrite now.");
killAppendOnlyChild();
}
@@ -646,7 +644,7 @@ void feedAppendOnlyFile(struct redisCommand *cmd, int dictid, robj **argv, int a
* accumulate the differences between the child DB and the current one
* in a buffer, so that when the child process will do its work we
* can append the differences to the new append only file. */
- if (server.aof_child_pid != -1)
+ if (server.child_type == CHILD_TYPE_AOF)
aofRewriteBufferAppend((unsigned char*)buf,sdslen(buf));
sdsfree(buf);
@@ -1427,6 +1425,8 @@ int rewriteAppendOnlyFileRio(rio *aof) {
dictEntry *de;
size_t processed = 0;
int j;
+ long key_count = 0;
+ long long cow_updated_time = 0;
for (j = 0; j < server.dbnum; j++) {
char selectcmd[] = "*2\r\n$6\r\nSELECT\r\n";
@@ -1486,6 +1486,19 @@ int rewriteAppendOnlyFileRio(rio *aof) {
processed = aof->processed_bytes;
aofReadDiffFromParent();
}
+
+ /* Update COW info every 1 second (approximately).
+ * in order to avoid calling mstime() on each iteration, we will
+ * check the diff every 1024 keys */
+ if ((key_count & 1023) == 0) {
+ key_count = 0;
+ long long now = mstime();
+ if (now - cow_updated_time >= 1000) {
+ sendChildCOWInfo(CHILD_TYPE_AOF, 0, "AOF rewrite");
+ cow_updated_time = now;
+ }
+ }
+ key_count++;
}
dictReleaseIterator(di);
di = NULL;
@@ -1579,8 +1592,31 @@ int rewriteAppendOnlyFile(char *filename) {
serverLog(LL_NOTICE,
"Concatenating %.2f MB of AOF diff received from parent.",
(double) sdslen(server.aof_child_diff) / (1024*1024));
- if (rioWrite(&aof,server.aof_child_diff,sdslen(server.aof_child_diff)) == 0)
- goto werr;
+
+ /* Now we write the entire AOF buffer we received from the parent
+ * via the pipe during the life of this fork child.
+ * once a second, we'll take a break and send updated COW info to the parent */
+ size_t bytes_to_write = sdslen(server.aof_child_diff);
+ const char *buf = server.aof_child_diff;
+ long long cow_updated_time = mstime();
+
+ while (bytes_to_write) {
+ /* We write the AOF buffer in chunk of 8MB so that we can check the time in between them */
+ size_t chunk_size = bytes_to_write < (8<<20) ? bytes_to_write : (8<<20);
+
+ if (rioWrite(&aof,buf,chunk_size) == 0)
+ goto werr;
+
+ bytes_to_write -= chunk_size;
+ buf += chunk_size;
+
+ /* Update COW info */
+ long long now = mstime();
+ if (now - cow_updated_time >= 1000) {
+ sendChildCOWInfo(CHILD_TYPE_AOF, 0, "AOF rewrite");
+ cow_updated_time = now;
+ }
+ }
/* Make sure data will not remain on the OS's output buffers */
if (fflush(fp)) goto werr;
@@ -1703,7 +1739,6 @@ int rewriteAppendOnlyFileBackground(void) {
if (hasActiveChildProcess()) return C_ERR;
if (aofCreatePipes() != C_OK) return C_ERR;
- openChildInfoPipe();
if ((childpid = redisFork(CHILD_TYPE_AOF)) == 0) {
char tmpfile[256];
@@ -1712,7 +1747,7 @@ int rewriteAppendOnlyFileBackground(void) {
redisSetCpuAffinity(server.aof_rewrite_cpulist);
snprintf(tmpfile,256,"temp-rewriteaof-bg-%d.aof", (int) getpid());
if (rewriteAppendOnlyFile(tmpfile) == C_OK) {
- sendChildCOWInfo(CHILD_TYPE_AOF, "AOF rewrite");
+ sendChildCOWInfo(CHILD_TYPE_AOF, 1, "AOF rewrite");
exitFromChild(0);
} else {
exitFromChild(1);
@@ -1720,7 +1755,6 @@ int rewriteAppendOnlyFileBackground(void) {
} else {
/* Parent */
if (childpid == -1) {
- closeChildInfoPipe();
serverLog(LL_WARNING,
"Can't rewrite append only file in background: fork: %s",
strerror(errno));
@@ -1731,8 +1765,7 @@ int rewriteAppendOnlyFileBackground(void) {
"Background append only file rewriting started by pid %ld",(long) childpid);
server.aof_rewrite_scheduled = 0;
server.aof_rewrite_time_start = time(NULL);
- server.aof_child_pid = childpid;
- updateDictResizePolicy();
+
/* We set appendseldb to -1 in order to force the next call to the
* feedAppendOnlyFile() to issue a SELECT command, so the differences
* accumulated by the parent into server.aof_rewrite_buf will start
@@ -1745,7 +1778,7 @@ int rewriteAppendOnlyFileBackground(void) {
}
void bgrewriteaofCommand(client *c) {
- if (server.aof_child_pid != -1) {
+ if (server.child_type == CHILD_TYPE_AOF) {
addReplyError(c,"Background append only file rewriting already in progress");
} else if (hasActiveChildProcess()) {
server.aof_rewrite_scheduled = 1;
@@ -1803,7 +1836,7 @@ void backgroundRewriteDoneHandler(int exitcode, int bysignal) {
* rewritten AOF. */
latencyStartMonitor(latency);
snprintf(tmpfile,256,"temp-rewriteaof-bg-%d.aof",
- (int)server.aof_child_pid);
+ (int)server.child_pid);
newfd = open(tmpfile,O_WRONLY|O_APPEND);
if (newfd == -1) {
serverLog(LL_WARNING,
@@ -1909,7 +1942,7 @@ void backgroundRewriteDoneHandler(int exitcode, int bysignal) {
server.aof_state = AOF_ON;
/* Asynchronously close the overwritten AOF. */
- if (oldfd != -1) bioCreateBackgroundJob(BIO_CLOSE_FILE,(void*)(long)oldfd,NULL,NULL);
+ if (oldfd != -1) bioCreateCloseJob(oldfd);
serverLog(LL_VERBOSE,
"Background AOF rewrite signal handler took %lldus", ustime()-now);
@@ -1931,8 +1964,7 @@ void backgroundRewriteDoneHandler(int exitcode, int bysignal) {
cleanup:
aofClosePipes();
aofRewriteBufferReset();
- aofRemoveTempFile(server.aof_child_pid);
- server.aof_child_pid = -1;
+ aofRemoveTempFile(server.child_pid);
server.aof_rewrite_time_last = time(NULL)-server.aof_rewrite_time_start;
server.aof_rewrite_time_start = -1;
/* Schedule a new rewrite if we are waiting for it to switch the AOF ON. */
diff --git a/src/bio.c b/src/bio.c
index a11bcb18b..c6e17f49d 100644
--- a/src/bio.c
+++ b/src/bio.c
@@ -78,15 +78,13 @@ static unsigned long long bio_pending[BIO_NUM_OPS];
* file as the API does not expose the internals at all. */
struct bio_job {
time_t time; /* Time at which the job was created. */
- /* Job specific arguments pointers. If we need to pass more than three
- * arguments we can just pass a pointer to a structure or alike. */
- void *arg1, *arg2, *arg3;
+ /* Job specific arguments.*/
+ int fd; /* Fd for file based background jobs */
+ lazy_free_fn *free_fn; /* Function that will free the provided arguments */
+ void *free_args[]; /* List of arguments to be passed to the free function */
};
void *bioProcessBackgroundJobs(void *arg);
-void lazyfreeFreeObjectFromBioThread(robj *o);
-void lazyfreeFreeDatabaseFromBioThread(dict *ht1, dict *ht2);
-void lazyfreeFreeSlotsMapFromBioThread(rax *rt);
/* Make sure we have enough stack to perform all the things we do in the
* main thread. */
@@ -128,13 +126,8 @@ void bioInit(void) {
}
}
-void bioCreateBackgroundJob(int type, void *arg1, void *arg2, void *arg3) {
- struct bio_job *job = zmalloc(sizeof(*job));
-
+void bioSubmitJob(int type, struct bio_job *job) {
job->time = time(NULL);
- job->arg1 = arg1;
- job->arg2 = arg2;
- job->arg3 = arg3;
pthread_mutex_lock(&bio_mutex[type]);
listAddNodeTail(bio_jobs[type],job);
bio_pending[type]++;
@@ -142,6 +135,35 @@ void bioCreateBackgroundJob(int type, void *arg1, void *arg2, void *arg3) {
pthread_mutex_unlock(&bio_mutex[type]);
}
+void bioCreateLazyFreeJob(lazy_free_fn free_fn, int arg_count, ...) {
+ va_list valist;
+ /* Allocate memory for the job structure and all required
+ * arguments */
+ struct bio_job *job = zmalloc(sizeof(*job) + sizeof(void *) * (arg_count));
+ job->free_fn = free_fn;
+
+ va_start(valist, arg_count);
+ for (int i = 0; i < arg_count; i++) {
+ job->free_args[i] = va_arg(valist, void *);
+ }
+ va_end(valist);
+ bioSubmitJob(BIO_LAZY_FREE, job);
+}
+
+void bioCreateCloseJob(int fd) {
+ struct bio_job *job = zmalloc(sizeof(*job));
+ job->fd = fd;
+
+ bioSubmitJob(BIO_CLOSE_FILE, job);
+}
+
+void bioCreateFsyncJob(int fd) {
+ struct bio_job *job = zmalloc(sizeof(*job));
+ job->fd = fd;
+
+ bioSubmitJob(BIO_AOF_FSYNC, job);
+}
+
void *bioProcessBackgroundJobs(void *arg) {
struct bio_job *job;
unsigned long type = (unsigned long) arg;
@@ -196,20 +218,11 @@ void *bioProcessBackgroundJobs(void *arg) {
/* Process the job accordingly to its type. */
if (type == BIO_CLOSE_FILE) {
- close((long)job->arg1);
+ close(job->fd);
} else if (type == BIO_AOF_FSYNC) {
- redis_fsync((long)job->arg1);
+ redis_fsync(job->fd);
} else if (type == BIO_LAZY_FREE) {
- /* What we free changes depending on what arguments are set:
- * arg1 -> free the object at pointer.
- * arg2 & arg3 -> free two dictionaries (a Redis DB).
- * only arg3 -> free the radix tree. */
- if (job->arg1)
- lazyfreeFreeObjectFromBioThread(job->arg1);
- else if (job->arg2 && job->arg3)
- lazyfreeFreeDatabaseFromBioThread(job->arg2,job->arg3);
- else if (job->arg3)
- lazyfreeFreeSlotsMapFromBioThread(job->arg3);
+ job->free_fn(job->free_args);
} else {
serverPanic("Wrong job type in bioProcessBackgroundJobs().");
}
diff --git a/src/bio.h b/src/bio.h
index 6c2155941..1e6e97297 100644
--- a/src/bio.h
+++ b/src/bio.h
@@ -30,13 +30,17 @@
#ifndef __BIO_H
#define __BIO_H
+typedef void lazy_free_fn(void *args[]);
+
/* Exported API */
void bioInit(void);
-void bioCreateBackgroundJob(int type, void *arg1, void *arg2, void *arg3);
unsigned long long bioPendingJobsOfType(int type);
unsigned long long bioWaitStepOfType(int type);
time_t bioOlderJobOfType(int type);
void bioKillThreads(void);
+void bioCreateCloseJob(int fd);
+void bioCreateFsyncJob(int fd);
+void bioCreateLazyFreeJob(lazy_free_fn free_fn, int arg_count, ...);
/* Background job opcodes */
#define BIO_CLOSE_FILE 0 /* Deferred close(2) syscall. */
diff --git a/src/bitops.c b/src/bitops.c
index 5e996679b..afd79ad88 100644
--- a/src/bitops.c
+++ b/src/bitops.c
@@ -611,7 +611,7 @@ void bitopCommand(client *c) {
else if((opname[0] == 'n' || opname[0] == 'N') && !strcasecmp(opname,"not"))
op = BITOP_NOT;
else {
- addReply(c,shared.syntaxerr);
+ addReplyErrorObject(c,shared.syntaxerr);
return;
}
@@ -813,7 +813,7 @@ void bitcountCommand(client *c) {
end = strlen-1;
} else {
/* Syntax error. */
- addReply(c,shared.syntaxerr);
+ addReplyErrorObject(c,shared.syntaxerr);
return;
}
@@ -878,7 +878,7 @@ void bitposCommand(client *c) {
end = strlen-1;
} else {
/* Syntax error. */
- addReply(c,shared.syntaxerr);
+ addReplyErrorObject(c,shared.syntaxerr);
return;
}
@@ -970,7 +970,7 @@ void bitfieldGeneric(client *c, int flags) {
}
continue;
} else {
- addReply(c,shared.syntaxerr);
+ addReplyErrorObject(c,shared.syntaxerr);
zfree(ops);
return;
}
diff --git a/src/blocked.c b/src/blocked.c
index d85723458..46935c79f 100644
--- a/src/blocked.c
+++ b/src/blocked.c
@@ -89,6 +89,12 @@ void blockClient(client *c, int btype) {
server.blocked_clients++;
server.blocked_clients_by_type[btype]++;
addClientToTimeoutTable(c);
+ if (btype == BLOCKED_PAUSE) {
+ listAddNodeTail(server.paused_clients, c);
+ c->paused_list_node = listLast(server.paused_clients);
+ /* Mark this client to execute its command */
+ c->flags |= CLIENT_PENDING_COMMAND;
+ }
}
/* This function is called in the beforeSleep() function of the event loop
@@ -110,6 +116,11 @@ void processUnblockedClients(void) {
* client is not blocked before to proceed, but things may change and
* the code is conceptually more correct this way. */
if (!(c->flags & CLIENT_BLOCKED)) {
+ /* If we have a queued command, execute it now. */
+ if (processPendingCommandsAndResetClient(c) == C_ERR) {
+ continue;
+ }
+ /* Then process client if it has more data in it's buffer. */
if (c->querybuf && sdslen(c->querybuf) > 0) {
processInputBuffer(c);
}
@@ -154,6 +165,9 @@ void unblockClient(client *c) {
} else if (c->btype == BLOCKED_MODULE) {
if (moduleClientIsBlockedOnKeys(c)) unblockClientWaitingData(c);
unblockClientFromModule(c);
+ } else if (c->btype == BLOCKED_PAUSE) {
+ listDelNode(server.paused_clients,c->paused_list_node);
+ c->paused_list_node = NULL;
} else {
serverPanic("Unknown btype in unblockClient().");
}
@@ -200,9 +214,16 @@ void disconnectAllBlockedClients(void) {
client *c = listNodeValue(ln);
if (c->flags & CLIENT_BLOCKED) {
- addReplySds(c,sdsnew(
+ /* PAUSED clients are an exception, when they'll be unblocked, the
+ * command processing will start from scratch, and the command will
+ * be either executed or rejected. (unlike LIST blocked clients for
+ * which the command is already in progress in a way. */
+ if (c->btype == BLOCKED_PAUSE)
+ continue;
+
+ addReplyError(c,
"-UNBLOCKED force unblock from blocking operation, "
- "instance state changed (master -> replica?)\r\n"));
+ "instance state changed (master -> replica?)");
unblockClient(c);
c->flags |= CLIENT_CLOSE_AFTER_REPLY;
}
diff --git a/src/childinfo.c b/src/childinfo.c
index d11aa7bcf..cae73fe46 100644
--- a/src/childinfo.c
+++ b/src/childinfo.c
@@ -30,6 +30,12 @@
#include "server.h"
#include <unistd.h>
+typedef struct {
+ int process_type; /* AOF or RDB child? */
+ int on_exit; /* COW size of active or exited child */
+ size_t cow_size; /* Copy on write size. */
+} child_info_data;
+
/* Open a child-parent channel used in order to move information about the
* RDB / AOF saving process from the child to the parent (for instance
* the amount of copy on write memory used) */
@@ -41,7 +47,7 @@ void openChildInfoPipe(void) {
} else if (anetNonBlock(NULL,server.child_info_pipe[0]) != ANET_OK) {
closeChildInfoPipe();
} else {
- memset(&server.child_info_data,0,sizeof(server.child_info_data));
+ server.child_info_nread = 0;
}
}
@@ -54,34 +60,76 @@ void closeChildInfoPipe(void) {
close(server.child_info_pipe[1]);
server.child_info_pipe[0] = -1;
server.child_info_pipe[1] = -1;
+ server.child_info_nread = 0;
}
}
-/* Send COW data to parent. The child should call this function after populating
- * the corresponding fields it want to sent (according to the process type). */
-void sendChildInfo(int ptype) {
+/* Send COW data to parent. */
+void sendChildInfo(int process_type, int on_exit, size_t cow_size) {
if (server.child_info_pipe[1] == -1) return;
- server.child_info_data.magic = CHILD_INFO_MAGIC;
- server.child_info_data.process_type = ptype;
- ssize_t wlen = sizeof(server.child_info_data);
- if (write(server.child_info_pipe[1],&server.child_info_data,wlen) != wlen) {
+
+ child_info_data buffer = {.process_type = process_type, .on_exit = on_exit, .cow_size = cow_size};
+ ssize_t wlen = sizeof(buffer);
+
+ if (write(server.child_info_pipe[1],&buffer,wlen) != wlen) {
/* Nothing to do on error, this will be detected by the other side. */
}
}
-/* Receive COW data from parent. */
+/* Update COW data. */
+void updateChildInfo(int process_type, int on_exit, size_t cow_size) {
+ if (!on_exit) {
+ server.stat_current_cow_bytes = cow_size;
+ return;
+ }
+
+ if (process_type == CHILD_TYPE_RDB) {
+ server.stat_rdb_cow_bytes = cow_size;
+ } else if (process_type == CHILD_TYPE_AOF) {
+ server.stat_aof_cow_bytes = cow_size;
+ } else if (process_type == CHILD_TYPE_MODULE) {
+ server.stat_module_cow_bytes = cow_size;
+ }
+}
+
+/* Read COW info data from the pipe.
+ * if complete data read into the buffer, process type, copy-on-write type and copy-on-write size
+ * are stored into *process_type, *on_exit and *cow_size respectively and returns 1.
+ * otherwise, the partial data is left in the buffer, waiting for the next read, and returns 0. */
+int readChildInfo(int *process_type, int *on_exit, size_t *cow_size) {
+ /* We are using here a static buffer in combination with the server.child_info_nread to handle short reads */
+ static child_info_data buffer;
+ ssize_t wlen = sizeof(buffer);
+
+ /* Do not overlap */
+ if (server.child_info_nread == wlen) server.child_info_nread = 0;
+
+ int nread = read(server.child_info_pipe[0], (char *)&buffer + server.child_info_nread, wlen - server.child_info_nread);
+ if (nread > 0) {
+ server.child_info_nread += nread;
+ }
+
+ /* We have complete child info */
+ if (server.child_info_nread == wlen) {
+ *process_type = buffer.process_type;
+ *on_exit = buffer.on_exit;
+ *cow_size = buffer.cow_size;
+ return 1;
+ } else {
+ return 0;
+ }
+}
+
+/* Receive COW data from child. */
void receiveChildInfo(void) {
if (server.child_info_pipe[0] == -1) return;
- ssize_t wlen = sizeof(server.child_info_data);
- if (read(server.child_info_pipe[0],&server.child_info_data,wlen) == wlen &&
- server.child_info_data.magic == CHILD_INFO_MAGIC)
- {
- if (server.child_info_data.process_type == CHILD_TYPE_RDB) {
- server.stat_rdb_cow_bytes = server.child_info_data.cow_size;
- } else if (server.child_info_data.process_type == CHILD_TYPE_AOF) {
- server.stat_aof_cow_bytes = server.child_info_data.cow_size;
- } else if (server.child_info_data.process_type == CHILD_TYPE_MODULE) {
- server.stat_module_cow_bytes = server.child_info_data.cow_size;
- }
+
+ int process_type;
+ int on_exit;
+ size_t cow_size;
+
+ /* Drain the pipe and update child info so that we get the final message. */
+ while (readChildInfo(&process_type, &on_exit, &cow_size)) {
+ updateChildInfo(process_type, on_exit, cow_size);
}
}
diff --git a/src/cluster.c b/src/cluster.c
index 8651a81d3..78c36e8d1 100644
--- a/src/cluster.c
+++ b/src/cluster.c
@@ -47,7 +47,7 @@
clusterNode *myself = NULL;
clusterNode *createClusterNode(char *nodename, int flags);
-int clusterAddNode(clusterNode *node);
+void clusterAddNode(clusterNode *node);
void clusterAcceptHandler(aeEventLoop *el, int fd, void *privdata, int mask);
void clusterReadHandler(connection *conn);
void clusterSendPing(clusterLink *link, int type);
@@ -961,12 +961,12 @@ void freeClusterNode(clusterNode *n) {
}
/* Add a node to the nodes hash table */
-int clusterAddNode(clusterNode *node) {
+void clusterAddNode(clusterNode *node) {
int retval;
retval = dictAdd(server.cluster->nodes,
sdsnewlen(node->name,CLUSTER_NAMELEN), node);
- return (retval == DICT_OK) ? C_OK : C_ERR;
+ serverAssert(retval == DICT_OK);
}
/* Remove a node from the cluster. The function performs the high level
@@ -2164,7 +2164,7 @@ int clusterProcessPacket(clusterLink *link) {
resetManualFailover();
server.cluster->mf_end = now + CLUSTER_MF_TIMEOUT;
server.cluster->mf_slave = sender;
- pauseClients(now+(CLUSTER_MF_TIMEOUT*CLUSTER_MF_PAUSE_MULT));
+ pauseClients(now+(CLUSTER_MF_TIMEOUT*CLUSTER_MF_PAUSE_MULT),CLIENT_PAUSE_WRITE);
serverLog(LL_WARNING,"Manual failover requested by replica %.40s.",
sender->name);
/* We need to send a ping message to the replica, as it would carry
@@ -3421,9 +3421,8 @@ void clusterHandleSlaveMigration(int max_slaves) {
* The function can be used both to initialize the manual failover state at
* startup or to abort a manual failover in progress. */
void resetManualFailover(void) {
- if (server.cluster->mf_end && clientsArePaused()) {
- server.clients_pause_end_time = 0;
- clientsArePaused(); /* Just use the side effect of the function. */
+ if (server.cluster->mf_end) {
+ checkClientPauseTimeoutAndReturnIfPaused();
}
server.cluster->mf_end = 0; /* No manual failover in progress. */
server.cluster->mf_can_start = 0;
@@ -4357,28 +4356,49 @@ void clusterCommand(client *c) {
if (c->argc == 2 && !strcasecmp(c->argv[1]->ptr,"help")) {
const char *help[] = {
-"ADDSLOTS <slot> [slot ...] -- Assign slots to current node.",
-"BUMPEPOCH -- Advance the cluster config epoch.",
-"COUNT-failure-reports <node-id> -- Return number of failure reports for <node-id>.",
-"COUNTKEYSINSLOT <slot> - Return the number of keys in <slot>.",
-"DELSLOTS <slot> [slot ...] -- Delete slots information from current node.",
-"FAILOVER [force|takeover] -- Promote current replica node to being a master.",
-"FORGET <node-id> -- Remove a node from the cluster.",
-"GETKEYSINSLOT <slot> <count> -- Return key names stored by current node in a slot.",
-"FLUSHSLOTS -- Delete current node own slots information.",
-"INFO - Return information about the cluster.",
-"KEYSLOT <key> -- Return the hash slot for <key>.",
-"MEET <ip> <port> [bus-port] -- Connect nodes into a working cluster.",
-"MYID -- Return the node id.",
-"NODES -- Return cluster configuration seen by node. Output format:",
-" <id> <ip:port> <flags> <master> <pings> <pongs> <epoch> <link> <slot> ... <slot>",
-"REPLICATE <node-id> -- Configure current node as replica to <node-id>.",
-"RESET [hard|soft] -- Reset current node (default: soft).",
-"SET-config-epoch <epoch> - Set config epoch of current node.",
-"SETSLOT <slot> (importing|migrating|stable|node <node-id>) -- Set slot state.",
-"REPLICAS <node-id> -- Return <node-id> replicas.",
-"SAVECONFIG - Force saving cluster configuration on disk.",
-"SLOTS -- Return information about slots range mappings. Each range is made of:",
+"ADDSLOTS <slot> [<slot> ...]",
+" Assign slots to current node.",
+"BUMPEPOCH",
+" Advance the cluster config epoch.",
+"COUNT-FAILURE-REPORTS <node-id>",
+" Return number of failure reports for <node-id>.",
+"COUNTKEYSINSLOT <slot>",
+" Return the number of keys in <slot>.",
+"DELSLOTS <slot> [<slot> ...]",
+" Delete slots information from current node.",
+"FAILOVER [FORCE|TAKEOVER]",
+" Promote current replica node to being a master.",
+"FORGET <node-id>",
+" Remove a node from the cluster.",
+"GETKEYSINSLOT <slot> <count>",
+" Return key names stored by current node in a slot.",
+"FLUSHSLOTS",
+" Delete current node own slots information.",
+"INFO",
+" Return information about the cluster.",
+"KEYSLOT <key>",
+" Return the hash slot for <key>.",
+"MEET <ip> <port> [<bus-port>]",
+" Connect nodes into a working cluster.",
+"MYID",
+" Return the node id.",
+"NODES",
+" Return cluster configuration seen by node. Output format:",
+" <id> <ip:port> <flags> <master> <pings> <pongs> <epoch> <link> <slot> ...",
+"REPLICATE <node-id>",
+" Configure current node as replica to <node-id>.",
+"RESET [HARD|SOFT]",
+" Reset current node (default: soft).",
+"SET-CONFIG-EPOCH <epoch>",
+" Set config epoch of current node.",
+"SETSLOT <slot> (IMPORTING|MIGRATING|STABLE|NODE <node-id>)",
+" Set slot state.",
+"REPLICAS <node-id>",
+" Return <node-id> replicas.",
+"SAVECONFIG",
+" Force saving cluster configuration on disk.",
+"SLOTS",
+" Return information about slots range mappings. Each range is made of:",
" start, end, master and replicas IP addresses, ports and ids",
NULL
};
@@ -4820,7 +4840,7 @@ NULL
takeover = 1;
force = 1; /* Takeover also implies force. */
} else {
- addReply(c,shared.syntaxerr);
+ addReplyErrorObject(c,shared.syntaxerr);
return;
}
}
@@ -4911,7 +4931,7 @@ NULL
} else if (!strcasecmp(c->argv[2]->ptr,"soft")) {
hard = 0;
} else {
- addReply(c,shared.syntaxerr);
+ addReplyErrorObject(c,shared.syntaxerr);
return;
}
}
@@ -5049,7 +5069,7 @@ void restoreCommand(client *c) {
}
j++; /* Consume additional arg. */
} else {
- addReply(c,shared.syntaxerr);
+ addReplyErrorObject(c,shared.syntaxerr);
return;
}
}
@@ -5057,7 +5077,7 @@ void restoreCommand(client *c) {
/* Make sure this key does not already exist here... */
robj *key = c->argv[1];
if (!replace && lookupKeyWrite(c->db,key) != NULL) {
- addReply(c,shared.busykeyerr);
+ addReplyErrorObject(c,shared.busykeyerr);
return;
}
@@ -5170,8 +5190,7 @@ migrateCachedSocket* migrateGetSocket(client *c, robj *host, robj *port, long ti
conn = server.tls_cluster ? connCreateTLS() : connCreateSocket();
if (connBlockingConnect(conn, c->argv[1]->ptr, atoi(c->argv[2]->ptr), timeout)
!= C_OK) {
- addReplySds(c,
- sdsnew("-IOERR error or timeout connecting to the client\r\n"));
+ addReplyError(c,"-IOERR error or timeout connecting to the client");
connClose(conn);
sdsfree(name);
return NULL;
@@ -5259,14 +5278,14 @@ void migrateCommand(client *c) {
replace = 1;
} else if (!strcasecmp(c->argv[j]->ptr,"auth")) {
if (!moreargs) {
- addReply(c,shared.syntaxerr);
+ addReplyErrorObject(c,shared.syntaxerr);
return;
}
j++;
password = c->argv[j]->ptr;
} else if (!strcasecmp(c->argv[j]->ptr,"auth2")) {
if (moreargs < 2) {
- addReply(c,shared.syntaxerr);
+ addReplyErrorObject(c,shared.syntaxerr);
return;
}
username = c->argv[++j]->ptr;
@@ -5282,7 +5301,7 @@ void migrateCommand(client *c) {
num_keys = c->argc - j - 1;
break; /* All the remaining args are keys. */
} else {
- addReply(c,shared.syntaxerr);
+ addReplyErrorObject(c,shared.syntaxerr);
return;
}
}
@@ -5763,7 +5782,7 @@ clusterNode *getNodeByQuery(client *c, struct redisCommand *cmd, robj **argv, in
* cluster is down. */
if (error_code) *error_code = CLUSTER_REDIR_DOWN_STATE;
return NULL;
- } else if (!(cmd->flags & CMD_READONLY) && !(cmd->proc == evalCommand)
+ } else if ((cmd->flags & CMD_WRITE) && !(cmd->proc == evalCommand)
&& !(cmd->proc == evalShaCommand))
{
/* The cluster is configured to allow read only commands
@@ -5812,11 +5831,10 @@ clusterNode *getNodeByQuery(client *c, struct redisCommand *cmd, robj **argv, in
/* Handle the read-only client case reading from a slave: if this
* node is a slave and the request is about a hash slot our master
* is serving, we can reply without redirection. */
- int is_readonly_command = (c->cmd->flags & CMD_READONLY) ||
- (c->cmd->proc == execCommand && !(c->mstate.cmd_inv_flags & CMD_READONLY));
+ int is_write_command = (c->cmd->flags & CMD_WRITE) ||
+ (c->cmd->proc == execCommand && (c->mstate.cmd_flags & CMD_WRITE));
if (c->flags & CLIENT_READONLY &&
- (is_readonly_command || cmd->proc == evalCommand ||
- cmd->proc == evalShaCommand) &&
+ (!is_write_command || cmd->proc == evalCommand || cmd->proc == evalShaCommand) &&
nodeIsSlave(myself) &&
myself->slaveof == n)
{
@@ -5838,23 +5856,23 @@ clusterNode *getNodeByQuery(client *c, struct redisCommand *cmd, robj **argv, in
* be set to the hash slot that caused the redirection. */
void clusterRedirectClient(client *c, clusterNode *n, int hashslot, int error_code) {
if (error_code == CLUSTER_REDIR_CROSS_SLOT) {
- addReplySds(c,sdsnew("-CROSSSLOT Keys in request don't hash to the same slot\r\n"));
+ addReplyError(c,"-CROSSSLOT Keys in request don't hash to the same slot");
} else if (error_code == CLUSTER_REDIR_UNSTABLE) {
/* The request spawns multiple keys in the same slot,
* but the slot is not "stable" currently as there is
* a migration or import in progress. */
- addReplySds(c,sdsnew("-TRYAGAIN Multiple keys request during rehashing of slot\r\n"));
+ addReplyError(c,"-TRYAGAIN Multiple keys request during rehashing of slot");
} else if (error_code == CLUSTER_REDIR_DOWN_STATE) {
- addReplySds(c,sdsnew("-CLUSTERDOWN The cluster is down\r\n"));
+ addReplyError(c,"-CLUSTERDOWN The cluster is down");
} else if (error_code == CLUSTER_REDIR_DOWN_RO_STATE) {
- addReplySds(c,sdsnew("-CLUSTERDOWN The cluster is down and only accepts read commands\r\n"));
+ addReplyError(c,"-CLUSTERDOWN The cluster is down and only accepts read commands");
} else if (error_code == CLUSTER_REDIR_DOWN_UNBOUND) {
- addReplySds(c,sdsnew("-CLUSTERDOWN Hash slot not served\r\n"));
+ addReplyError(c,"-CLUSTERDOWN Hash slot not served");
} else if (error_code == CLUSTER_REDIR_MOVED ||
error_code == CLUSTER_REDIR_ASK)
{
- addReplySds(c,sdscatprintf(sdsempty(),
- "-%s %d %s:%d\r\n",
+ addReplyErrorSds(c,sdscatprintf(sdsempty(),
+ "-%s %d %s:%d",
(error_code == CLUSTER_REDIR_ASK) ? "ASK" : "MOVED",
hashslot,n->ip,n->port));
} else {
@@ -5901,7 +5919,7 @@ int clusterRedirectBlockedClientIfNeeded(client *c) {
/* if the client is read-only and attempting to access key that our
* replica can handle, allow it. */
if ((c->flags & CLIENT_READONLY) &&
- (c->lastcmd->flags & CMD_READONLY) &&
+ !(c->lastcmd->flags & CMD_WRITE) &&
nodeIsSlave(myself) && myself->slaveof == node)
{
node = myself;
diff --git a/src/cluster.h b/src/cluster.h
index 7e5f79c87..d58f350ce 100644
--- a/src/cluster.h
+++ b/src/cluster.h
@@ -40,7 +40,7 @@ typedef struct clusterLink {
sds sndbuf; /* Packet send buffer */
char *rcvbuf; /* Packet reception buffer */
size_t rcvbuf_len; /* Used size of rcvbuf */
- size_t rcvbuf_alloc; /* Used size of rcvbuf */
+ size_t rcvbuf_alloc; /* Allocated size of rcvbuf */
struct clusterNode *node; /* Node related to this link if any, or NULL */
} clusterLink;
diff --git a/src/config.c b/src/config.c
index c858df3f3..2e109dbae 100644
--- a/src/config.c
+++ b/src/config.c
@@ -166,6 +166,15 @@ typedef struct stringConfigData {
be stored as a NULL value. */
} stringConfigData;
+typedef struct sdsConfigData {
+ sds *config; /* Pointer to the server config this value is stored in. */
+ const char *default_value; /* Default value of the config on rewrite. */
+ int (*is_valid_fn)(sds val, char **err); /* Optional function to check validity of new value (generic doc above) */
+ int (*update_fn)(sds val, sds prev, char **err); /* Optional function to apply new value at runtime (generic doc above) */
+ int convert_empty_to_null; /* Boolean indicating if empty SDS strings should
+ be stored as a NULL value. */
+} sdsConfigData;
+
typedef struct enumConfigData {
int *config; /* The pointer to the server config this value is stored in */
configEnum *enum_value; /* The underlying enum type this data represents */
@@ -212,6 +221,7 @@ typedef struct numericConfigData {
typedef union typeData {
boolConfigData yesno;
stringConfigData string;
+ sdsConfigData sds;
enumConfigData enumd;
numericConfigData numeric;
} typeData;
@@ -512,7 +522,7 @@ void loadServerConfigFromString(char *config) {
}
server.repl_state = REPL_STATE_CONNECT;
} else if (!strcasecmp(argv[0],"requirepass") && argc == 2) {
- if (strlen(argv[1]) > CONFIG_AUTHPASS_MAX_LEN) {
+ if (sdslen(argv[1]) > CONFIG_AUTHPASS_MAX_LEN) {
err = "Password is longer than CONFIG_AUTHPASS_MAX_LEN";
goto loaderr;
}
@@ -524,10 +534,10 @@ void loadServerConfigFromString(char *config) {
sdsfree(server.requirepass);
server.requirepass = NULL;
if (sdslen(argv[1])) {
- sds aclop = sdscatprintf(sdsempty(),">%s",argv[1]);
+ sds aclop = sdscatlen(sdsnew(">"), argv[1], sdslen(argv[1]));
ACLSetUser(DefaultUser,aclop,sdslen(aclop));
sdsfree(aclop);
- server.requirepass = sdsnew(argv[1]);
+ server.requirepass = sdsdup(argv[1]);
} else {
ACLSetUser(DefaultUser,"nopass",-1);
}
@@ -751,10 +761,10 @@ void configSetCommand(client *c) {
sdsfree(server.requirepass);
server.requirepass = NULL;
if (sdslen(o->ptr)) {
- sds aclop = sdscatprintf(sdsempty(),">%s",(char*)o->ptr);
+ sds aclop = sdscatlen(sdsnew(">"), o->ptr, sdslen(o->ptr));
ACLSetUser(DefaultUser,aclop,sdslen(aclop));
sdsfree(aclop);
- server.requirepass = sdsnew(o->ptr);
+ server.requirepass = sdsdup(o->ptr);
} else {
ACLSetUser(DefaultUser,"nopass",-1);
}
@@ -905,7 +915,7 @@ badfmt: /* Bad format errors */
addReplyBulkCString(c,_var ? _var : ""); \
matches++; \
} \
-} while(0);
+} while(0)
#define config_get_bool_field(_name,_var) do { \
if (stringmatch(pattern,_name,1)) { \
@@ -913,7 +923,7 @@ badfmt: /* Bad format errors */
addReplyBulkCString(c,_var ? "yes" : "no"); \
matches++; \
} \
-} while(0);
+} while(0)
#define config_get_numerical_field(_name,_var) do { \
if (stringmatch(pattern,_name,1)) { \
@@ -922,8 +932,7 @@ badfmt: /* Bad format errors */
addReplyBulkCString(c,buf); \
matches++; \
} \
-} while(0);
-
+} while(0)
void configGetCommand(client *c) {
robj *o = c->argv[2];
@@ -1330,6 +1339,28 @@ void rewriteConfigStringOption(struct rewriteConfigState *state, const char *opt
rewriteConfigRewriteLine(state,option,line,force);
}
+/* Rewrite a SDS string option. */
+void rewriteConfigSdsOption(struct rewriteConfigState *state, const char *option, sds value, const sds defvalue) {
+ int force = 1;
+ sds line;
+
+ /* If there is no value set, we don't want the SDS option
+ * to be present in the configuration at all. */
+ if (value == NULL) {
+ rewriteConfigMarkAsProcessed(state, option);
+ return;
+ }
+
+ /* Set force to zero if the value is set to its default. */
+ if (defvalue && sdscmp(value, defvalue) == 0) force = 0;
+
+ line = sdsnew(option);
+ line = sdscatlen(line, " ", 1);
+ line = sdscatrepr(line, value, sdslen(value));
+
+ rewriteConfigRewriteLine(state, option, line, force);
+}
+
/* Rewrite a numerical (long long range) option. */
void rewriteConfigNumericalOption(struct rewriteConfigState *state, const char *option, long long value, long long defvalue) {
int force = value != defvalue;
@@ -1802,22 +1833,14 @@ static void boolConfigRewrite(typeData data, const char *name, struct rewriteCon
/* String Configs */
static void stringConfigInit(typeData data) {
- if (data.string.convert_empty_to_null) {
- *data.string.config = data.string.default_value ? zstrdup(data.string.default_value) : NULL;
- } else {
- *data.string.config = zstrdup(data.string.default_value);
- }
+ *data.string.config = (data.string.convert_empty_to_null && !data.string.default_value) ? NULL : zstrdup(data.string.default_value);
}
static int stringConfigSet(typeData data, sds value, int update, char **err) {
if (data.string.is_valid_fn && !data.string.is_valid_fn(value, err))
return 0;
char *prev = *data.string.config;
- if (data.string.convert_empty_to_null) {
- *data.string.config = value[0] ? zstrdup(value) : NULL;
- } else {
- *data.string.config = zstrdup(value);
- }
+ *data.string.config = (data.string.convert_empty_to_null && !value[0]) ? NULL : zstrdup(value);
if (update && data.string.update_fn && !data.string.update_fn(*data.string.config, prev, err)) {
zfree(*data.string.config);
*data.string.config = prev;
@@ -1835,6 +1858,38 @@ static void stringConfigRewrite(typeData data, const char *name, struct rewriteC
rewriteConfigStringOption(state, name,*(data.string.config), data.string.default_value);
}
+/* SDS Configs */
+static void sdsConfigInit(typeData data) {
+ *data.sds.config = (data.sds.convert_empty_to_null && !data.sds.default_value) ? NULL: sdsnew(data.sds.default_value);
+}
+
+static int sdsConfigSet(typeData data, sds value, int update, char **err) {
+ if (data.sds.is_valid_fn && !data.sds.is_valid_fn(value, err))
+ return 0;
+ sds prev = *data.sds.config;
+ *data.sds.config = (data.sds.convert_empty_to_null && (sdslen(value) == 0)) ? NULL : sdsdup(value);
+ if (update && data.sds.update_fn && !data.sds.update_fn(*data.sds.config, prev, err)) {
+ sdsfree(*data.sds.config);
+ *data.sds.config = prev;
+ return 0;
+ }
+ sdsfree(prev);
+ return 1;
+}
+
+static void sdsConfigGet(client *c, typeData data) {
+ if (*data.sds.config) {
+ addReplyBulkSds(c, sdsdup(*data.sds.config));
+ } else {
+ addReplyBulkCString(c, "");
+ }
+}
+
+static void sdsConfigRewrite(typeData data, const char *name, struct rewriteConfigState *state) {
+ rewriteConfigSdsOption(state, name, *(data.sds.config), data.sds.default_value ? sdsnew(data.sds.default_value) : NULL);
+}
+
+
#define ALLOW_EMPTY_STRING 0
#define EMPTY_STRING_IS_NULL 1
@@ -1850,6 +1905,18 @@ static void stringConfigRewrite(typeData data, const char *name, struct rewriteC
} \
}
+#define createSDSConfig(name, alias, modifiable, empty_to_null, config_addr, default, is_valid, update) { \
+ embedCommonConfig(name, alias, modifiable) \
+ embedConfigInterface(sdsConfigInit, sdsConfigSet, sdsConfigGet, sdsConfigRewrite) \
+ .data.sds = { \
+ .config = &(config_addr), \
+ .default_value = (default), \
+ .is_valid_fn = (is_valid), \
+ .update_fn = (update), \
+ .convert_empty_to_null = (empty_to_null), \
+ } \
+}
+
/* Enum configs */
static void enumConfigInit(typeData data) {
*data.enumd.config = data.enumd.default_value;
@@ -2349,7 +2416,6 @@ standardConfig configs[] = {
createStringConfig("pidfile", NULL, IMMUTABLE_CONFIG, EMPTY_STRING_IS_NULL, server.pidfile, NULL, NULL, NULL),
createStringConfig("replica-announce-ip", "slave-announce-ip", MODIFIABLE_CONFIG, EMPTY_STRING_IS_NULL, server.slave_announce_ip, NULL, NULL, NULL),
createStringConfig("masteruser", NULL, MODIFIABLE_CONFIG, EMPTY_STRING_IS_NULL, server.masteruser, NULL, NULL, NULL),
- createStringConfig("masterauth", NULL, MODIFIABLE_CONFIG, EMPTY_STRING_IS_NULL, server.masterauth, NULL, NULL, NULL),
createStringConfig("cluster-announce-ip", NULL, MODIFIABLE_CONFIG, EMPTY_STRING_IS_NULL, server.cluster_announce_ip, NULL, NULL, NULL),
createStringConfig("syslog-ident", NULL, IMMUTABLE_CONFIG, ALLOW_EMPTY_STRING, server.syslog_ident, "redis", NULL, NULL),
createStringConfig("dbfilename", NULL, MODIFIABLE_CONFIG, ALLOW_EMPTY_STRING, server.rdb_filename, "dump.rdb", isValidDBfilename, NULL),
@@ -2358,6 +2424,10 @@ standardConfig configs[] = {
createStringConfig("bio_cpulist", NULL, IMMUTABLE_CONFIG, EMPTY_STRING_IS_NULL, server.bio_cpulist, NULL, NULL, NULL),
createStringConfig("aof_rewrite_cpulist", NULL, IMMUTABLE_CONFIG, EMPTY_STRING_IS_NULL, server.aof_rewrite_cpulist, NULL, NULL, NULL),
createStringConfig("bgsave_cpulist", NULL, IMMUTABLE_CONFIG, EMPTY_STRING_IS_NULL, server.bgsave_cpulist, NULL, NULL, NULL),
+ createStringConfig("ignore-warnings", NULL, MODIFIABLE_CONFIG, ALLOW_EMPTY_STRING, server.ignore_warnings, "", NULL, NULL),
+
+ /* SDS Configs */
+ createSDSConfig("masterauth", NULL, MODIFIABLE_CONFIG, EMPTY_STRING_IS_NULL, server.masterauth, NULL, NULL, NULL),
/* Enum Configs */
createEnumConfig("supervised", NULL, IMMUTABLE_CONFIG, supervised_mode_enum, server.supervised_mode, SUPERVISED_NONE, NULL, NULL),
@@ -2477,12 +2547,17 @@ void configCommand(client *c) {
if (c->argc == 2 && !strcasecmp(c->argv[1]->ptr,"help")) {
const char *help[] = {
-"GET <pattern> -- Return parameters matching the glob-like <pattern> and their values.",
-"SET <parameter> <value> -- Set parameter to value.",
-"RESETSTAT -- Reset statistics reported by INFO.",
-"REWRITE -- Rewrite the configuration file.",
+"GET <pattern>",
+" Return parameters matching the glob-like <pattern> and their values.",
+"SET <directive> <value>",
+" Set the configuration <directive> to <value>.",
+"RESETSTAT",
+" Reset statistics reported by the INFO command.",
+"REWRITE",
+" Rewrite the configuration file.",
NULL
};
+
addReplyHelp(c, help);
} else if (!strcasecmp(c->argv[1]->ptr,"set") && c->argc == 4) {
configSetCommand(c);
@@ -2491,6 +2566,7 @@ NULL
} else if (!strcasecmp(c->argv[1]->ptr,"resetstat") && c->argc == 2) {
resetServerStats();
resetCommandTableStats();
+ resetErrorTableStats();
addReply(c,shared.ok);
} else if (!strcasecmp(c->argv[1]->ptr,"rewrite") && c->argc == 2) {
if (server.configfile == NULL) {
diff --git a/src/crc64.c b/src/crc64.c
index 4cbc019f6..6c9432c4a 100644
--- a/src/crc64.c
+++ b/src/crc64.c
@@ -134,7 +134,7 @@ int crc64Test(int argc, char *argv[]) {
printf("[calcula]: e9c6d914c4b8d9ca == %016" PRIx64 "\n",
(uint64_t)_crc64(0, "123456789", 9));
printf("[64speed]: e9c6d914c4b8d9ca == %016" PRIx64 "\n",
- (uint64_t)crc64(0, "123456789", 9));
+ (uint64_t)crc64(0, (unsigned char*)"123456789", 9));
char li[] = "Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed "
"do eiusmod tempor incididunt ut labore et dolore magna "
"aliqua. Ut enim ad minim veniam, quis nostrud exercitation "
@@ -146,7 +146,7 @@ int crc64Test(int argc, char *argv[]) {
printf("[calcula]: c7794709e69683b3 == %016" PRIx64 "\n",
(uint64_t)_crc64(0, li, sizeof(li)));
printf("[64speed]: c7794709e69683b3 == %016" PRIx64 "\n",
- (uint64_t)crc64(0, li, sizeof(li)));
+ (uint64_t)crc64(0, (unsigned char*)li, sizeof(li)));
return 0;
}
diff --git a/src/crcspeed.c b/src/crcspeed.c
index d4955bfc9..67cb8fd9f 100644
--- a/src/crcspeed.c
+++ b/src/crcspeed.c
@@ -35,7 +35,8 @@ void crcspeed64little_init(crcfn64 crcfn, uint64_t table[8][256]) {
/* generate CRCs for all single byte sequences */
for (int n = 0; n < 256; n++) {
- table[0][n] = crcfn(0, &n, 1);
+ unsigned char v = n;
+ table[0][n] = crcfn(0, &v, 1);
}
/* generate nested CRC table for future slice-by-8 lookup */
diff --git a/src/db.c b/src/db.c
index 5045935c3..5d63566a7 100644
--- a/src/db.c
+++ b/src/db.c
@@ -433,7 +433,7 @@ long long emptyDb(int dbnum, int flags, void(callback)(void*)) {
/* Make sure the WATCHed keys are affected by the FLUSH* commands.
* Note that we need to call the function while the keys are still
* there. */
- signalFlushedDb(dbnum);
+ signalFlushedDb(dbnum, async);
/* Empty redis database structure. */
removed = emptyDbStructure(server.db, dbnum, async, callback);
@@ -572,9 +572,20 @@ void signalModifiedKey(client *c, redisDb *db, robj *key) {
trackingInvalidateKey(c,key);
}
-void signalFlushedDb(int dbid) {
- touchWatchedKeysOnFlush(dbid);
- trackingInvalidateKeysOnFlush(dbid);
+void signalFlushedDb(int dbid, int async) {
+ int startdb, enddb;
+ if (dbid == -1) {
+ startdb = 0;
+ enddb = server.dbnum-1;
+ } else {
+ startdb = enddb = dbid;
+ }
+
+ for (int j = startdb; j <= enddb; j++) {
+ touchAllWatchedKeysInDb(&server.db[j], NULL);
+ }
+
+ trackingInvalidateKeysOnFlush(async);
}
/*-----------------------------------------------------------------------------
@@ -593,7 +604,7 @@ int getFlushCommandFlags(client *c, int *flags) {
/* Parse the optional ASYNC option. */
if (c->argc > 1) {
if (c->argc > 2 || strcasecmp(c->argv[1]->ptr,"async")) {
- addReply(c,shared.syntaxerr);
+ addReplyErrorObject(c,shared.syntaxerr);
return C_ERR;
}
*flags = EMPTYDB_ASYNC;
@@ -606,7 +617,7 @@ int getFlushCommandFlags(client *c, int *flags) {
/* Flushes the whole server data set. */
void flushAllDataAndResetRDB(int flags) {
server.dirty += emptyDb(-1,flags,NULL);
- if (server.rdb_child_pid != -1) killRDBChild();
+ if (server.child_type == CHILD_TYPE_RDB) killRDBChild();
if (server.saveparamslen > 0) {
/* Normally rdbSave() will reset dirty, but we don't want this here
* as otherwise FLUSHALL will not be replicated nor put into the AOF. */
@@ -616,6 +627,9 @@ void flushAllDataAndResetRDB(int flags) {
rdbSave(server.rdb_filename,rsiptr);
server.dirty = saved_dirty;
}
+
+ /* Without that extra dirty++, when db was already empty, FLUSHALL will
+ * not be replicated nor put into the AOF. */
server.dirty++;
#if defined(USE_JEMALLOC)
/* jemalloc 5 doesn't release pages back to the OS when there's no traffic.
@@ -839,7 +853,7 @@ void scanGenericCommand(client *c, robj *o, unsigned long cursor) {
}
if (count < 1) {
- addReply(c,shared.syntaxerr);
+ addReplyErrorObject(c,shared.syntaxerr);
goto cleanup;
}
@@ -858,7 +872,7 @@ void scanGenericCommand(client *c, robj *o, unsigned long cursor) {
typename = c->argv[i+1]->ptr;
i+= 2;
} else {
- addReply(c,shared.syntaxerr);
+ addReplyErrorObject(c,shared.syntaxerr);
goto cleanup;
}
}
@@ -1047,7 +1061,7 @@ void shutdownCommand(client *c) {
int flags = 0;
if (c->argc > 2) {
- addReply(c,shared.syntaxerr);
+ addReplyErrorObject(c,shared.syntaxerr);
return;
} else if (c->argc == 2) {
if (!strcasecmp(c->argv[1]->ptr,"nosave")) {
@@ -1055,7 +1069,7 @@ void shutdownCommand(client *c) {
} else if (!strcasecmp(c->argv[1]->ptr,"save")) {
flags |= SHUTDOWN_SAVE;
} else {
- addReply(c,shared.syntaxerr);
+ addReplyErrorObject(c,shared.syntaxerr);
return;
}
}
@@ -1141,7 +1155,7 @@ void moveCommand(client *c) {
/* If the user is moving using as target the same
* DB as the source DB it is probably an error. */
if (src == dst) {
- addReply(c,shared.sameobjecterr);
+ addReplyErrorObject(c,shared.sameobjecterr);
return;
}
@@ -1205,7 +1219,7 @@ void copyCommand(client *c) {
selectDb(c,srcid); /* Back to the source DB */
j++; /* Consume additional arg. */
} else {
- addReply(c, shared.syntaxerr);
+ addReplyErrorObject(c,shared.syntaxerr);
return;
}
}
@@ -1221,7 +1235,7 @@ void copyCommand(client *c) {
robj *key = c->argv[1];
robj *newkey = c->argv[2];
if (src == dst && (sdscmp(key->ptr, newkey->ptr) == 0)) {
- addReply(c,shared.sameobjecterr);
+ addReplyErrorObject(c,shared.sameobjecterr);
return;
}
@@ -1328,9 +1342,14 @@ int dbSwapDatabases(long id1, long id2) {
* However normally we only do this check for efficiency reasons
* in dbAdd() when a list is created. So here we need to rescan
* the list of clients blocked on lists and signal lists as ready
- * if needed. */
+ * if needed.
+ *
+ * Also the swapdb should make transaction fail if there is any
+ * client watching keys */
scanDatabaseForReadyLists(db1);
+ touchAllWatchedKeysInDb(db1, db2);
scanDatabaseForReadyLists(db2);
+ touchAllWatchedKeysInDb(db2, db1);
return C_OK;
}
@@ -1501,6 +1520,12 @@ int expireIfNeeded(redisDb *db, robj *key) {
* we think the key is expired at this time. */
if (server.masterhost != NULL) return 1;
+ /* If clients are paused, we keep the current dataset constant,
+ * but return to the client what we believe is the right state. Typically,
+ * at the end of the pause we will properly expire the key OR we will
+ * have failed over and the new primary will send us the expire. */
+ if (checkClientPauseTimeoutAndReturnIfPaused()) return 1;
+
/* Delete the key */
server.stat_expiredkeys++;
propagateExpire(db,key,server.lazyfree_lazy_expire);
diff --git a/src/debug.c b/src/debug.c
index 6ce8b3bdc..a725e5a30 100644
--- a/src/debug.c
+++ b/src/debug.c
@@ -381,39 +381,88 @@ void mallctl_string(client *c, robj **argv, int argc) {
void debugCommand(client *c) {
if (c->argc == 2 && !strcasecmp(c->argv[1]->ptr,"help")) {
const char *help[] = {
-"ASSERT -- Crash by assertion failed.",
-"CHANGE-REPL-ID -- Change the replication IDs of the instance. Dangerous, should be used only for testing the replication subsystem.",
-"CRASH-AND-RECOVER <milliseconds> -- Hard crash and restart after <milliseconds> delay.",
-"DIGEST -- Output a hex signature representing the current DB content.",
-"DIGEST-VALUE <key-1> ... <key-N>-- Output a hex signature of the values of all the specified keys.",
-"DEBUG PROTOCOL [string|integer|double|bignum|null|array|set|map|attrib|push|verbatim|true|false]",
-"ERROR <string> -- Return a Redis protocol error with <string> as message. Useful for clients unit tests to simulate Redis errors.",
-"LOG <message> -- write message to the server log.",
-"LEAK <string> -- Create a memory leak of the input string.",
-"HTSTATS <dbid> -- Return hash table statistics of the specified Redis database.",
-"HTSTATS-KEY <key> -- Like htstats but for the hash table stored as key's value.",
-"LOADAOF -- Flush the AOF buffers on disk and reload the AOF in memory.",
-"LUA-ALWAYS-REPLICATE-COMMANDS <0|1> -- Setting it to 1 makes Lua replication defaulting to replicating single commands, without the script having to enable effects replication.",
-"OBJECT <key> -- Show low level info about key and associated value.",
-"OOM -- Crash the server simulating an out-of-memory error.",
-"PANIC -- Crash the server simulating a panic.",
-"POPULATE <count> [prefix] [size] -- Create <count> string keys named key:<num>. If a prefix is specified is used instead of the 'key' prefix.",
-"RELOAD [MERGE] [NOFLUSH] [NOSAVE] -- Save the RDB on disk and reload it back in memory. By default it will save the RDB file and load it back. With the NOFLUSH option the current database is not removed before loading the new one, but conflicts in keys will kill the server with an exception. When MERGE is used, conflicting keys will be loaded (the key in the loaded RDB file will win). When NOSAVE is used, the server will not save the current dataset in the RDB file before loading. Use DEBUG RELOAD NOSAVE when you want just to load the RDB file you placed in the Redis working directory in order to replace the current dataset in memory. Use DEBUG RELOAD NOSAVE NOFLUSH MERGE when you want to add what is in the current RDB file placed in the Redis current directory, with the current memory content. Use DEBUG RELOAD when you want to verify Redis is able to persist the current dataset in the RDB file, flush the memory content, and load it back.",
-"RESTART -- Graceful restart: save config, db, restart.",
-"SDSLEN <key> -- Show low level SDS string info representing key and value.",
-"SEGFAULT -- Crash the server with sigsegv.",
-"SET-ACTIVE-EXPIRE <0|1> -- Setting it to 0 disables expiring keys in background when they are not accessed (otherwise the Redis behavior). Setting it to 1 reenables back the default.",
-"SET-SKIP-CHECKSUM-VALIDATION <0|1> -- Enables or disables checksum checks for rdb or RESTORE payload.",
-"AOF-FLUSH-SLEEP <microsec> -- Server will sleep before flushing the AOF, this is used for testing",
-"SLEEP <seconds> -- Stop the server for <seconds>. Decimals allowed.",
-"STRUCTSIZE -- Return the size of different Redis core C structures.",
-"ZIPLIST <key> -- Show low level info about the ziplist encoding.",
-"STRINGMATCH-TEST -- Run a fuzz tester against the stringmatchlen() function.",
-"CONFIG-REWRITE-FORCE-ALL -- Like CONFIG REWRITE but writes all configuration options, including keywords not listed in original configuration file or default values.",
+"AOF-FLUSH-SLEEP <microsec>",
+" Server will sleep before flushing the AOF, this is used for testing.",
+"ASSERT",
+" Crash by assertion failed.",
+"CHANGE-REPL-ID"
+" Change the replication IDs of the instance.",
+" Dangerous: should be used only for testing the replication subsystem.",
+"CONFIG-REWRITE-FORCE-ALL",
+" Like CONFIG REWRITE but writes all configuration options, including",
+" keywords not listed in original configuration file or default values.",
+"CRASH-AND-RECOVER <milliseconds>",
+" Hard crash and restart after a <milliseconds> delay.",
+"DIGEST",
+" Output a hex signature representing the current DB content.",
+"DIGEST-VALUE <key> [<key> ...]",
+" Output a hex signature of the values of all the specified keys.",
+"ERROR <string>",
+" Return a Redis protocol error with <string> as message. Useful for clients",
+" unit tests to simulate Redis errors.",
+"LOG <message>",
+" Write <message> to the server log.",
+"HTSTATS <dbid>",
+" Return hash table statistics of the specified Redis database.",
+"HTSTATS-KEY <key>",
+" Like HTSTATS but for the hash table stored at <key>'s value.",
+"LOADAOF",
+" Flush the AOF buffers on disk and reload the AOF in memory.",
+"LUA-ALWAYS-REPLICATE-COMMANDS <0|1>",
+" Setting it to 1 makes Lua replication defaulting to replicating single",
+" commands, without the script having to enable effects replication.",
#ifdef USE_JEMALLOC
-"MALLCTL <key> [<val>] -- Get or set a malloc tunning integer.",
-"MALLCTL-STR <key> [<val>] -- Get or set a malloc tunning string.",
+"MALLCTL <key> [<val>]",
+" Get or set a malloc tuning integer.",
+"MALLCTL-STR <key> [<val>]",
+" Get or set a malloc tuning string.",
#endif
+"OBJECT <key>",
+" Show low level info about `key` and associated value.",
+"OOM",
+" Crash the server simulating an out-of-memory error.",
+"PANIC",
+" Crash the server simulating a panic.",
+"POPULATE <count> [<prefix>] [<size>]",
+" Create <count> string keys named key:<num>. If <prefix> is specified then",
+" it is used instead of the 'key' prefix.",
+"DEBUG PROTOCOL <type>",
+" Reply with a test value of the specified type. <type> can be: string,",
+" integer, double, bignum, null, array, set, map, attrib, push, verbatim,",
+" true, false.",
+"RELOAD [option ...]",
+" Save the RDB on disk and reload it back to memory. Valid <option> values:",
+" * MERGE: conflicting keys will be loaded from RDB.",
+" * NOFLUSH: the existing database will not be removed before load, but",
+" conflicting keys will generate an exception and kill the server."
+" * NOSAVE: the database will be loaded from an existing RDB file.",
+" Examples:",
+" * DEBUG RELOAD: verify that the server is able to persist, flsuh and reload",
+" the database.",
+" * DEBUG RELOAD NOSAVE: replace the current database with the contents of an",
+" existing RDB file.",
+" * DEBUG RELOAD NOSAVE NOFLUSH MERGE: add the contents of an existing RDB",
+" file to the database.",
+"RESTART",
+" Graceful restart: save config, db, restart.",
+"SDSLEN <key>",
+" Show low level SDS string info representing `key` and value.",
+"SEGFAULT",
+" Crash the server with sigsegv.",
+"SET-ACTIVE-EXPIRE <0|1>",
+" Setting it to 0 disables expiring keys in background when they are not",
+" accessed (otherwise the Redis behavior). Setting it to 1 reenables back the",
+" default.",
+"SET-SKIP-CHECKSUM-VALIDATION <0|1>",
+" Enables or disables checksum checks for RDB files and RESTORE's payload.",
+"SLEEP <seconds>",
+" Stop the server for <seconds>. Decimals allowed.",
+"STRINGMATCH-TEST",
+" Run a fuzz tester against the stringmatchlen() function.",
+"STRUCTSIZE",
+" Return the size of different Redis core C structures.",
+"ZIPLIST <key>",
+" Show low level info about the ziplist encoding of <key>.",
NULL
};
addReplyHelp(c, help);
@@ -474,7 +523,7 @@ NULL
rdbSaveInfo rsi, *rsiptr;
rsiptr = rdbPopulateSaveInfo(&rsi);
if (rdbSave(server.rdb_filename,rsiptr) != C_OK) {
- addReply(c,shared.err);
+ addReplyErrorObject(c,shared.err);
return;
}
}
@@ -500,7 +549,7 @@ NULL
int ret = loadAppendOnlyFile(server.aof_filename);
unprotectClient(c);
if (ret != C_OK) {
- addReply(c,shared.err);
+ addReplyErrorObject(c,shared.err);
return;
}
server.dirty = 0; /* Prevent AOF / replication */
@@ -512,7 +561,7 @@ NULL
char *strenc;
if ((de = dictFind(c->db->dict,c->argv[2]->ptr)) == NULL) {
- addReply(c,shared.nokeyerr);
+ addReplyErrorObject(c,shared.nokeyerr);
return;
}
val = dictGetVal(de);
@@ -564,7 +613,7 @@ NULL
sds key;
if ((de = dictFind(c->db->dict,c->argv[2]->ptr)) == NULL) {
- addReply(c,shared.nokeyerr);
+ addReplyErrorObject(c,shared.nokeyerr);
return;
}
val = dictGetVal(de);
@@ -1569,7 +1618,7 @@ void logCurrentClient(void) {
}
/* Check if the first argument, usually a key, is found inside the
* selected DB, and if so print info about the associated object. */
- if (cc->argc >= 1) {
+ if (cc->argc > 1) {
robj *val, *key;
dictEntry *de;
@@ -1808,7 +1857,7 @@ void bugReportEnd(int killViaSignal, int sig) {
);
/* free(messages); Don't call free() with possibly corrupted memory. */
- if (server.daemonize && server.supervised == 0) unlink(server.pidfile);
+ if (server.daemonize && server.supervised == 0 && server.pidfile) unlink(server.pidfile);
if (!killViaSignal) {
if (server.use_exit_on_panic)
diff --git a/src/debugmacro.h b/src/debugmacro.h
index ded2d2667..58e6577e5 100644
--- a/src/debugmacro.h
+++ b/src/debugmacro.h
@@ -38,4 +38,4 @@
fprintf(fp,__VA_ARGS__); \
fprintf(fp,"\n"); \
fclose(fp); \
- } while (0);
+ } while (0)
diff --git a/src/defrag.c b/src/defrag.c
index fc327c506..e189deddd 100644
--- a/src/defrag.c
+++ b/src/defrag.c
@@ -58,7 +58,6 @@ void* activeDefragAlloc(void *ptr) {
void *newptr;
if(!je_get_defrag_hint(ptr)) {
server.stat_active_defrag_misses++;
- size = zmalloc_size(ptr);
return NULL;
}
/* move this allocation to a new allocation.
diff --git a/src/dict.c b/src/dict.c
index 4736dacd5..6c203b850 100644
--- a/src/dict.c
+++ b/src/dict.c
@@ -646,13 +646,13 @@ dictEntry *dictGetRandomKey(dict *d)
do {
/* We are sure there are no elements in indexes from 0
* to rehashidx-1 */
- h = d->rehashidx + (random() % (dictSlots(d) - d->rehashidx));
+ h = d->rehashidx + (randomULong() % (dictSlots(d) - d->rehashidx));
he = (h >= d->ht[0].size) ? d->ht[1].table[h - d->ht[0].size] :
d->ht[0].table[h];
} while(he == NULL);
} else {
do {
- h = random() & d->ht[0].sizemask;
+ h = randomULong() & d->ht[0].sizemask;
he = d->ht[0].table[h];
} while(he == NULL);
}
@@ -718,7 +718,7 @@ unsigned int dictGetSomeKeys(dict *d, dictEntry **des, unsigned int count) {
maxsizemask = d->ht[1].sizemask;
/* Pick a random point inside the larger table. */
- unsigned long i = random() & maxsizemask;
+ unsigned long i = randomULong() & maxsizemask;
unsigned long emptylen = 0; /* Continuous empty entries so far. */
while(stored < count && maxsteps--) {
for (j = 0; j < tables; j++) {
@@ -743,7 +743,7 @@ unsigned int dictGetSomeKeys(dict *d, dictEntry **des, unsigned int count) {
if (he == NULL) {
emptylen++;
if (emptylen >= 5 && emptylen > count) {
- i = random() & maxsizemask;
+ i = randomULong() & maxsizemask;
emptylen = 0;
}
} else {
@@ -1135,10 +1135,10 @@ size_t _dictGetStatsHt(char *buf, size_t bufsize, dictht *ht, int tableid) {
/* Generate human readable stats. */
l += snprintf(buf+l,bufsize-l,
"Hash table %d stats (%s):\n"
- " table size: %ld\n"
- " number of elements: %ld\n"
- " different slots: %ld\n"
- " max chain length: %ld\n"
+ " table size: %lu\n"
+ " number of elements: %lu\n"
+ " different slots: %lu\n"
+ " max chain length: %lu\n"
" avg chain length (counted): %.02f\n"
" avg chain length (computed): %.02f\n"
" Chain length distribution:\n",
@@ -1215,7 +1215,7 @@ dictType BenchmarkDictType = {
#define end_benchmark(msg) do { \
elapsed = timeInMilliseconds()-start; \
printf(msg ": %ld items in %lld ms\n", count, elapsed); \
-} while(0);
+} while(0)
/* dict-benchmark [count] */
int main(int argc, char **argv) {
@@ -1272,6 +1272,13 @@ int main(int argc, char **argv) {
start_benchmark();
for (j = 0; j < count; j++) {
+ dictEntry *de = dictGetRandomKey(dict);
+ assert(de != NULL);
+ }
+ end_benchmark("Accessing random keys");
+
+ start_benchmark();
+ for (j = 0; j < count; j++) {
sds key = sdsfromlonglong(rand() % count);
key[0] = 'X';
dictEntry *de = dictFind(dict,key);
diff --git a/src/dict.h b/src/dict.h
index f7515e905..d96c3148f 100644
--- a/src/dict.h
+++ b/src/dict.h
@@ -33,11 +33,14 @@
* POSSIBILITY OF SUCH DAMAGE.
*/
-#include <stdint.h>
-
#ifndef __DICT_H
#define __DICT_H
+#include "mt19937-64.h"
+#include <limits.h>
+#include <stdint.h>
+#include <stdlib.h>
+
#define DICT_OK 0
#define DICT_ERR 1
@@ -148,6 +151,13 @@ typedef void (dictScanBucketFunction)(void *privdata, dictEntry **bucketref);
#define dictSize(d) ((d)->ht[0].used+(d)->ht[1].used)
#define dictIsRehashing(d) ((d)->rehashidx != -1)
+/* If our unsigned long type can store a 64 bit number, use a 64 bit PRNG. */
+#if ULONG_MAX >= 0xffffffffffffffff
+#define randomULong() ((unsigned long) genrand64_int64())
+#else
+#define randomULong() random()
+#endif
+
/* API */
dict *dictCreate(dictType *type, void *privDataPtr);
int dictExpand(dict *d, unsigned long size);
diff --git a/src/evict.c b/src/evict.c
index 3642352ad..04513cd1a 100644
--- a/src/evict.c
+++ b/src/evict.c
@@ -462,7 +462,7 @@ static int isSafeToPerformEvictions(void) {
/* When clients are paused the dataset should be static not just from the
* POV of clients not being able to write, but also from the POV of
* expires and evictions of keys not being performed. */
- if (clientsArePaused()) return 0;
+ if (checkClientPauseTimeoutAndReturnIfPaused()) return 0;
return 1;
}
diff --git a/src/expire.c b/src/expire.c
index 5433f46ca..275a735a7 100644
--- a/src/expire.c
+++ b/src/expire.c
@@ -148,7 +148,7 @@ void activeExpireCycle(int type) {
/* When clients are paused the dataset should be static not just from the
* POV of clients not being able to write, but also from the POV of
* expires and evictions of keys not being performed. */
- if (clientsArePaused()) return;
+ if (checkClientPauseTimeoutAndReturnIfPaused()) return;
if (type == ACTIVE_EXPIRE_CYCLE_FAST) {
/* Don't start a fast cycle if the previous cycle did not exit
diff --git a/src/geo.c b/src/geo.c
index 2fd1cb21b..9c668288b 100644
--- a/src/geo.c
+++ b/src/geo.c
@@ -100,8 +100,8 @@ int extractLongLatOrReply(client *c, robj **argv, double *xy) {
}
if (xy[0] < GEO_LONG_MIN || xy[0] > GEO_LONG_MAX ||
xy[1] < GEO_LAT_MIN || xy[1] > GEO_LAT_MAX) {
- addReplySds(c, sdscatprintf(sdsempty(),
- "-ERR invalid longitude,latitude pair %f,%f\r\n",xy[0],xy[1]));
+ addReplyErrorFormat(c,
+ "-ERR invalid longitude,latitude pair %f,%f\r\n",xy[0],xy[1]);
return C_ERR;
}
return C_OK;
@@ -249,7 +249,7 @@ int geoAppendIfWithinShape(geoArray *ga, GeoShape *shape, double score, sds memb
* using multiple queries to the sorted set, that we later need to sort
* via qsort. Similarly we need to be able to reject points outside the search
* radius area ASAP in order to allocate and process more points than needed. */
-int geoGetPointsInRange(robj *zobj, double min, double max, GeoShape *shape, geoArray *ga) {
+int geoGetPointsInRange(robj *zobj, double min, double max, GeoShape *shape, geoArray *ga, unsigned long limit) {
/* minex 0 = include min in range; maxex 1 = exclude max in range */
/* That's: min <= val < max */
zrangespec range = { .min = min, .max = max, .minex = 0, .maxex = 1 };
@@ -283,6 +283,7 @@ int geoGetPointsInRange(robj *zobj, double min, double max, GeoShape *shape, geo
sdsnewlen(vstr,vlen);
if (geoAppendIfWithinShape(ga,shape,score,member)
== C_ERR) sdsfree(member);
+ if (ga->used && limit && ga->used >= limit) break;
zzlNext(zl, &eptr, &sptr);
}
} else if (zobj->encoding == OBJ_ENCODING_SKIPLIST) {
@@ -304,6 +305,7 @@ int geoGetPointsInRange(robj *zobj, double min, double max, GeoShape *shape, geo
ele = sdsdup(ele);
if (geoAppendIfWithinShape(ga,shape,ln->score,ele)
== C_ERR) sdsfree(ele);
+ if (ga->used && limit && ga->used >= limit) break;
ln = ln->level[0].forward;
}
}
@@ -342,15 +344,15 @@ void scoresOfGeoHashBox(GeoHashBits hash, GeoHashFix52Bits *min, GeoHashFix52Bit
/* Obtain all members between the min/max of this geohash bounding box.
* Populate a geoArray of GeoPoints by calling geoGetPointsInRange().
* Return the number of points added to the array. */
-int membersOfGeoHashBox(robj *zobj, GeoHashBits hash, geoArray *ga, GeoShape *shape) {
+int membersOfGeoHashBox(robj *zobj, GeoHashBits hash, geoArray *ga, GeoShape *shape, unsigned long limit) {
GeoHashFix52Bits min, max;
scoresOfGeoHashBox(hash,&min,&max);
- return geoGetPointsInRange(zobj, min, max, shape, ga);
+ return geoGetPointsInRange(zobj, min, max, shape, ga, limit);
}
/* Search all eight neighbors + self geohash box */
-int membersOfAllNeighbors(robj *zobj, GeoHashRadius n, GeoShape *shape, geoArray *ga) {
+int membersOfAllNeighbors(robj *zobj, GeoHashRadius n, GeoShape *shape, geoArray *ga, unsigned long limit) {
GeoHashBits neighbors[9];
unsigned int i, count = 0, last_processed = 0;
int debugmsg = 0;
@@ -401,7 +403,8 @@ int membersOfAllNeighbors(robj *zobj, GeoHashRadius n, GeoShape *shape, geoArray
D("Skipping processing of %d, same as previous\n",i);
continue;
}
- count += membersOfGeoHashBox(zobj, neighbors[i], ga, shape);
+ if (ga->used && limit && ga->used >= limit) break;
+ count += membersOfGeoHashBox(zobj, neighbors[i], ga, shape, limit);
last_processed = i;
}
return count;
@@ -428,31 +431,45 @@ static int sort_gp_desc(const void *a, const void *b) {
* Commands
* ==================================================================== */
-/* GEOADD key long lat name [long2 lat2 name2 ... longN latN nameN] */
+/* GEOADD key [CH] [NX|XX] long lat name [long2 lat2 name2 ... longN latN nameN] */
void geoaddCommand(client *c) {
- /* Check arguments number for sanity. */
- if ((c->argc - 2) % 3 != 0) {
+ int xx = 0, nx = 0, longidx = 2;
+ int i;
+
+ /* Parse options. At the end 'longidx' is set to the argument position
+ * of the longitude of the first element. */
+ while (longidx < c->argc) {
+ char *opt = c->argv[longidx]->ptr;
+ if (!strcasecmp(opt,"nx")) nx = 1;
+ else if (!strcasecmp(opt,"xx")) xx = 1;
+ else if (!strcasecmp(opt,"ch")) {}
+ else break;
+ longidx++;
+ }
+
+ if ((c->argc - longidx) % 3 || (xx && nx)) {
/* Need an odd number of arguments if we got this far... */
- addReplyError(c, "syntax error. Try GEOADD key [x1] [y1] [name1] "
- "[x2] [y2] [name2] ... ");
+ addReplyErrorObject(c,shared.syntaxerr);
return;
}
- int elements = (c->argc - 2) / 3;
- int argc = 2+elements*2; /* ZADD key score ele ... */
+ /* Set up the vector for calling ZADD. */
+ int elements = (c->argc - longidx) / 3;
+ int argc = longidx+elements*2; /* ZADD key [CH] [NX|XX] score ele ... */
robj **argv = zcalloc(argc*sizeof(robj*));
argv[0] = createRawStringObject("zadd",4);
- argv[1] = c->argv[1]; /* key */
- incrRefCount(argv[1]);
+ for (i = 1; i < longidx; i++) {
+ argv[i] = c->argv[i];
+ incrRefCount(argv[i]);
+ }
/* Create the argument vector to call ZADD in order to add all
* the score,value pairs to the requested zset, where score is actually
* an encoded version of lat,long. */
- int i;
for (i = 0; i < elements; i++) {
double xy[2];
- if (extractLongLatOrReply(c, (c->argv+2)+(i*3),xy) == C_ERR) {
+ if (extractLongLatOrReply(c, (c->argv+longidx)+(i*3),xy) == C_ERR) {
for (i = 0; i < argc; i++)
if (argv[i]) decrRefCount(argv[i]);
zfree(argv);
@@ -464,9 +481,9 @@ void geoaddCommand(client *c) {
geohashEncodeWGS84(xy[0], xy[1], GEO_STEP_MAX, &hash);
GeoHashFix52Bits bits = geohashAlign52Bits(hash);
robj *score = createObject(OBJ_STRING, sdsfromlonglong(bits));
- robj *val = c->argv[2 + i * 3 + 2];
- argv[2+i*2] = score;
- argv[3+i*2] = val;
+ robj *val = c->argv[longidx + i * 3 + 2];
+ argv[longidx+i*2] = score;
+ argv[longidx+1+i*2] = val;
incrRefCount(val);
}
@@ -486,12 +503,12 @@ void geoaddCommand(client *c) {
#define GEOSEARCHSTORE (1<<4) /* GEOSEARCHSTORE just accept STOREDIST option */
/* GEORADIUS key x y radius unit [WITHDIST] [WITHHASH] [WITHCOORD] [ASC|DESC]
- * [COUNT count] [STORE key] [STOREDIST key]
+ * [COUNT count [ANY]] [STORE key] [STOREDIST key]
* GEORADIUSBYMEMBER key member radius unit ... options ...
- * GEOSEARCH key [FROMMEMBER member] [FORMLOG long lat] [BYRADIUS radius unit]
- * [BYBOX width height unit] [WITHCORD] [WITHDIST] [WITHASH] [COUNT count] [ASC|DESC]
- * GEOSEARCHSTORE dest_key src_key [FROMMEMBER member] [FORMLOG long lat] [BYRADIUS radius unit]
- * [BYBOX width height unit] [WITHCORD] [WITHDIST] [WITHASH] [COUNT count] [ASC|DESC] [STOREDIST]
+ * GEOSEARCH key [FROMMEMBER member] [FROMLONLAT long lat] [BYRADIUS radius unit]
+ * [BYBOX width height unit] [WITHCORD] [WITHDIST] [WITHASH] [COUNT count [ANY]] [ASC|DESC]
+ * GEOSEARCHSTORE dest_key src_key [FROMMEMBER member] [FROMLONLAT long lat] [BYRADIUS radius unit]
+ * [BYBOX width height unit] [WITHCORD] [WITHDIST] [WITHASH] [COUNT count [ANY]] [ASC|DESC] [STOREDIST]
* */
void georadiusGeneric(client *c, int srcKeyIndex, int flags) {
robj *storekey = NULL;
@@ -536,7 +553,8 @@ void georadiusGeneric(client *c, int srcKeyIndex, int flags) {
int withdist = 0, withhash = 0, withcoords = 0;
int frommember = 0, fromloc = 0, byradius = 0, bybox = 0;
int sort = SORT_NONE;
- long long count = 0;
+ int any = 0; /* any=1 means a limited search, stop as soon as enough results were found. */
+ long long count = 0; /* Max number of results to return. 0 means unlimited. */
if (c->argc > base_args) {
int remaining = c->argc - base_args;
for (int i = 0; i < remaining; i++) {
@@ -547,13 +565,15 @@ void georadiusGeneric(client *c, int srcKeyIndex, int flags) {
withhash = 1;
} else if (!strcasecmp(arg, "withcoord")) {
withcoords = 1;
+ } else if (!strcasecmp(arg, "any")) {
+ any = 1;
} else if (!strcasecmp(arg, "asc")) {
sort = SORT_ASC;
} else if (!strcasecmp(arg, "desc")) {
sort = SORT_DESC;
} else if (!strcasecmp(arg, "count") && (i+1) < remaining) {
if (getLongLongFromObjectOrReply(c, c->argv[base_args+i+1],
- &count, NULL) != C_OK) return;
+ &count, NULL) != C_OK) return;
if (count <= 0) {
addReplyError(c,"COUNT must be > 0");
return;
@@ -620,7 +640,7 @@ void georadiusGeneric(client *c, int srcKeyIndex, int flags) {
bybox = 1;
i += 3;
} else {
- addReply(c, shared.syntaxerr);
+ addReplyErrorObject(c,shared.syntaxerr);
return;
}
}
@@ -648,16 +668,23 @@ void georadiusGeneric(client *c, int srcKeyIndex, int flags) {
return;
}
- /* COUNT without ordering does not make much sense, force ASC
- * ordering if COUNT was specified but no sorting was requested. */
- if (count != 0 && sort == SORT_NONE) sort = SORT_ASC;
+ if (any && !count) {
+ addReplyErrorFormat(c, "the ANY argument requires COUNT argument");
+ return;
+ }
+
+ /* COUNT without ordering does not make much sense (we need to
+ * sort in order to return the closest N entries),
+ * force ASC ordering if COUNT was specified but no sorting was
+ * requested. Note that this is not needed for ANY option. */
+ if (count != 0 && sort == SORT_NONE && !any) sort = SORT_ASC;
/* Get all neighbor geohash boxes for our radius search */
GeoHashRadius georadius = geohashCalculateAreasByShapeWGS84(&shape);
/* Search the zset for all matching points */
geoArray *ga = geoArrayCreate();
- membersOfAllNeighbors(zobj, georadius, &shape, ga);
+ membersOfAllNeighbors(zobj, georadius, &shape, ga, any ? count : 0);
/* If no matching results, the user gets an empty reply. */
if (ga->used == 0 && storekey == NULL) {
@@ -902,7 +929,7 @@ void geodistCommand(client *c) {
to_meter = extractUnitOrReply(c,c->argv[4]);
if (to_meter < 0) return;
} else if (c->argc > 5) {
- addReply(c,shared.syntaxerr);
+ addReplyErrorObject(c,shared.syntaxerr);
return;
}
diff --git a/src/hyperloglog.c b/src/hyperloglog.c
index d018e975e..75a04227c 100644
--- a/src/hyperloglog.c
+++ b/src/hyperloglog.c
@@ -205,7 +205,7 @@ struct hllhdr {
#define HLL_RAW 255 /* Only used internally, never exposed. */
#define HLL_MAX_ENCODING 1
-static char *invalid_hll_err = "-INVALIDOBJ Corrupted HLL object detected\r\n";
+static char *invalid_hll_err = "-INVALIDOBJ Corrupted HLL object detected";
/* =========================== Low level bit macros ========================= */
@@ -1171,9 +1171,8 @@ int isHLLObjectOrReply(client *c, robj *o) {
return C_OK;
invalid:
- addReplySds(c,
- sdsnew("-WRONGTYPE Key is not a valid "
- "HyperLogLog string value.\r\n"));
+ addReplyError(c,"-WRONGTYPE Key is not a valid "
+ "HyperLogLog string value.");
return C_ERR;
}
@@ -1203,7 +1202,7 @@ void pfaddCommand(client *c) {
updated++;
break;
case -1:
- addReplySds(c,sdsnew(invalid_hll_err));
+ addReplyError(c,invalid_hll_err);
return;
}
}
@@ -1211,7 +1210,7 @@ void pfaddCommand(client *c) {
if (updated) {
signalModifiedKey(c,c->db,c->argv[1]);
notifyKeyspaceEvent(NOTIFY_STRING,"pfadd",c->argv[1],c->db->id);
- server.dirty++;
+ server.dirty += updated;
HLL_INVALIDATE_CACHE(hdr);
}
addReply(c, updated ? shared.cone : shared.czero);
@@ -1245,7 +1244,7 @@ void pfcountCommand(client *c) {
/* Merge with this HLL with our 'max' HLL by setting max[i]
* to MAX(max[i],hll[i]). */
if (hllMerge(registers,o) == C_ERR) {
- addReplySds(c,sdsnew(invalid_hll_err));
+ addReplyError(c,invalid_hll_err);
return;
}
}
@@ -1285,7 +1284,7 @@ void pfcountCommand(client *c) {
/* Recompute it and update the cached value. */
card = hllCount(hdr,&invalid);
if (invalid) {
- addReplySds(c,sdsnew(invalid_hll_err));
+ addReplyError(c,invalid_hll_err);
return;
}
hdr->card[0] = card & 0xff;
@@ -1332,7 +1331,7 @@ void pfmergeCommand(client *c) {
/* Merge with this HLL with our 'max' HLL by setting max[i]
* to MAX(max[i],hll[i]). */
if (hllMerge(max,o) == C_ERR) {
- addReplySds(c,sdsnew(invalid_hll_err));
+ addReplyError(c,invalid_hll_err);
return;
}
}
@@ -1355,7 +1354,7 @@ void pfmergeCommand(client *c) {
/* Convert the destination object to dense representation if at least
* one of the inputs was dense. */
if (use_dense && hllSparseToDense(o) == C_ERR) {
- addReplySds(c,sdsnew(invalid_hll_err));
+ addReplyError(c,invalid_hll_err);
return;
}
@@ -1512,7 +1511,7 @@ void pfdebugCommand(client *c) {
if (hdr->encoding == HLL_SPARSE) {
if (hllSparseToDense(o) == C_ERR) {
- addReplySds(c,sdsnew(invalid_hll_err));
+ addReplyError(c,invalid_hll_err);
return;
}
server.dirty++; /* Force propagation on encoding change. */
@@ -1577,7 +1576,7 @@ void pfdebugCommand(client *c) {
if (hdr->encoding == HLL_SPARSE) {
if (hllSparseToDense(o) == C_ERR) {
- addReplySds(c,sdsnew(invalid_hll_err));
+ addReplyError(c,invalid_hll_err);
return;
}
conv = 1;
diff --git a/src/intset.c b/src/intset.c
index 54afae875..74de87acb 100644
--- a/src/intset.c
+++ b/src/intset.c
@@ -358,12 +358,6 @@ static long long usec(void) {
return (((long long)tv.tv_sec)*1000000)+tv.tv_usec;
}
-#define assert(_e) ((_e)?(void)0:(_assert(#_e,__FILE__,__LINE__),exit(1)))
-static void _assert(char *estr, char *file, int line) {
- printf("\n\n=== ASSERTION FAILED ===\n");
- printf("==> %s:%d '%s' is not true\n",file,line,estr);
-}
-
static intset *createSet(int bits, int size) {
uint64_t mask = (1<<bits)-1;
uint64_t value;
diff --git a/src/latency.c b/src/latency.c
index 535e48c2b..d447b2b5b 100644
--- a/src/latency.c
+++ b/src/latency.c
@@ -584,16 +584,6 @@ sds latencyCommandGenSparkeline(char *event, struct latencyTimeSeries *ts) {
* LATENCY RESET: reset data of a specified event or all the data if no event provided.
*/
void latencyCommand(client *c) {
- const char *help[] = {
-"DOCTOR -- Returns a human readable latency analysis report.",
-"GRAPH <event> -- Returns an ASCII latency graph for the event class.",
-"HISTORY <event> -- Returns time-latency samples for the event class.",
-"LATEST -- Returns the latest latency samples for all events.",
-"RESET [event ...] -- Resets latency data of one or more event classes.",
-" (default: reset all data for all event classes)",
-"HELP -- Prints this help.",
-NULL
- };
struct latencyTimeSeries *ts;
if (!strcasecmp(c->argv[1]->ptr,"history") && c->argc == 3) {
@@ -639,6 +629,20 @@ NULL
addReplyLongLong(c,resets);
}
} else if (!strcasecmp(c->argv[1]->ptr,"help") && c->argc == 2) {
+ const char *help[] = {
+"DOCTOR",
+" Return a human readable latency analysis report.",
+"GRAPH <event>",
+" Return an ASCII latency graph for the <event> class.",
+"HISTORY <event>",
+" Return time-latency samples for the <event> class.",
+"LATEST",
+" Return the latest latency samples for all events.",
+"RESET [<event> ...]",
+" Reset latency data of one or more <event> classes.",
+" (default: reset all data for all event classes)",
+NULL
+ };
addReplyHelp(c, help);
} else {
addReplySubcommandSyntaxError(c);
diff --git a/src/lazyfree.c b/src/lazyfree.c
index 125e6a1b0..8b9f0e2dc 100644
--- a/src/lazyfree.c
+++ b/src/lazyfree.c
@@ -6,6 +6,49 @@
static redisAtomic size_t lazyfree_objects = 0;
static redisAtomic size_t lazyfreed_objects = 0;
+/* Release objects from the lazyfree thread. It's just decrRefCount()
+ * updating the count of objects to release. */
+void lazyfreeFreeObject(void *args[]) {
+ robj *o = (robj *) args[0];
+ decrRefCount(o);
+ atomicDecr(lazyfree_objects,1);
+ atomicIncr(lazyfreed_objects,1);
+}
+
+/* Release a database from the lazyfree thread. The 'db' pointer is the
+ * database which was substituted with a fresh one in the main thread
+ * when the database was logically deleted. */
+void lazyfreeFreeDatabase(void *args[]) {
+ dict *ht1 = (dict *) args[0];
+ dict *ht2 = (dict *) args[1];
+
+ size_t numkeys = dictSize(ht1);
+ dictRelease(ht1);
+ dictRelease(ht2);
+ atomicDecr(lazyfree_objects,numkeys);
+ atomicIncr(lazyfreed_objects,numkeys);
+}
+
+/* Release the skiplist mapping Redis Cluster keys to slots in the
+ * lazyfree thread. */
+void lazyfreeFreeSlotsMap(void *args[]) {
+ rax *rt = args[0];
+ size_t len = rt->numele;
+ raxFree(rt);
+ atomicDecr(lazyfree_objects,len);
+ atomicIncr(lazyfreed_objects,len);
+}
+
+/* Release the rax mapping Redis Cluster keys to slots in the
+ * lazyfree thread. */
+void lazyFreeTrackingTable(void *args[]) {
+ rax *rt = args[0];
+ size_t len = rt->numele;
+ raxFree(rt);
+ atomicDecr(lazyfree_objects,len);
+ atomicIncr(lazyfreed_objects,len);
+}
+
/* Return the number of currently pending objects to free. */
size_t lazyfreeGetPendingObjectsCount(void) {
size_t aux;
@@ -120,7 +163,7 @@ int dbAsyncDelete(redisDb *db, robj *key) {
* equivalent to just calling decrRefCount(). */
if (free_effort > LAZYFREE_THRESHOLD && val->refcount == 1) {
atomicIncr(lazyfree_objects,1);
- bioCreateBackgroundJob(BIO_LAZY_FREE,val,NULL,NULL);
+ bioCreateLazyFreeJob(lazyfreeFreeObject,1, val);
dictSetVal(db->dict,de,NULL);
}
}
@@ -141,7 +184,7 @@ void freeObjAsync(robj *key, robj *obj) {
size_t free_effort = lazyfreeGetFreeEffort(key,obj);
if (free_effort > LAZYFREE_THRESHOLD && obj->refcount == 1) {
atomicIncr(lazyfree_objects,1);
- bioCreateBackgroundJob(BIO_LAZY_FREE,obj,NULL,NULL);
+ bioCreateLazyFreeJob(lazyfreeFreeObject,1,obj);
} else {
decrRefCount(obj);
}
@@ -155,39 +198,17 @@ void emptyDbAsync(redisDb *db) {
db->dict = dictCreate(&dbDictType,NULL);
db->expires = dictCreate(&dbExpiresDictType,NULL);
atomicIncr(lazyfree_objects,dictSize(oldht1));
- bioCreateBackgroundJob(BIO_LAZY_FREE,NULL,oldht1,oldht2);
+ bioCreateLazyFreeJob(lazyfreeFreeDatabase,2,oldht1,oldht2);
}
/* Release the radix tree mapping Redis Cluster keys to slots asynchronously. */
void freeSlotsToKeysMapAsync(rax *rt) {
atomicIncr(lazyfree_objects,rt->numele);
- bioCreateBackgroundJob(BIO_LAZY_FREE,NULL,NULL,rt);
-}
-
-/* Release objects from the lazyfree thread. It's just decrRefCount()
- * updating the count of objects to release. */
-void lazyfreeFreeObjectFromBioThread(robj *o) {
- decrRefCount(o);
- atomicDecr(lazyfree_objects,1);
- atomicIncr(lazyfreed_objects,1);
+ bioCreateLazyFreeJob(lazyfreeFreeSlotsMap,1,rt);
}
-/* Release a database from the lazyfree thread. The 'db' pointer is the
- * database which was substituted with a fresh one in the main thread
- * when the database was logically deleted. */
-void lazyfreeFreeDatabaseFromBioThread(dict *ht1, dict *ht2) {
- size_t numkeys = dictSize(ht1);
- dictRelease(ht1);
- dictRelease(ht2);
- atomicDecr(lazyfree_objects,numkeys);
- atomicIncr(lazyfreed_objects,numkeys);
-}
-
-/* Release the radix tree mapping Redis Cluster keys to slots in the
- * lazyfree thread. */
-void lazyfreeFreeSlotsMapFromBioThread(rax *rt) {
- size_t len = rt->numele;
- raxFree(rt);
- atomicDecr(lazyfree_objects,len);
- atomicIncr(lazyfreed_objects,len);
+/* Free an object, if the object is huge enough, free it in async way. */
+void freeTrackingRadixTreeAsync(rax *tracking) {
+ atomicIncr(lazyfree_objects,tracking->numele);
+ bioCreateLazyFreeJob(lazyFreeTrackingTable,1,tracking);
}
diff --git a/src/module.c b/src/module.c
index da9ac29e8..bf186f8b7 100644
--- a/src/module.c
+++ b/src/module.c
@@ -747,6 +747,7 @@ int64_t commandFlagsFromString(char *s) {
else if (!strcasecmp(t,"no-slowlog")) flags |= CMD_SKIP_SLOWLOG;
else if (!strcasecmp(t,"fast")) flags |= CMD_FAST;
else if (!strcasecmp(t,"no-auth")) flags |= CMD_NO_AUTH;
+ else if (!strcasecmp(t,"may-replicate")) flags |= CMD_MAY_REPLICATE;
else if (!strcasecmp(t,"getkeys-api")) flags |= CMD_MODULE_GETKEYS;
else if (!strcasecmp(t,"no-cluster")) flags |= CMD_MODULE_NO_CLUSTER;
else break;
@@ -813,6 +814,8 @@ int64_t commandFlagsFromString(char *s) {
* * **"no-auth"**: This command can be run by an un-authenticated client.
* Normally this is used by a command that is used
* to authenticate a client.
+ * * **"may-replicate"**: This command may generate replication traffic, even
+ * though it's not a write command.
*/
int RM_CreateCommand(RedisModuleCtx *ctx, const char *name, RedisModuleCmdFunc cmdfunc, const char *strflags, int firstkey, int lastkey, int keystep) {
int64_t flags = strflags ? commandFlagsFromString((char*)strflags) : 0;
@@ -851,6 +854,8 @@ int RM_CreateCommand(RedisModuleCtx *ctx, const char *name, RedisModuleCmdFunc c
cp->rediscmd->keystep = keystep;
cp->rediscmd->microseconds = 0;
cp->rediscmd->calls = 0;
+ cp->rediscmd->rejected_calls = 0;
+ cp->rediscmd->failed_calls = 0;
dictAdd(server.commands,sdsdup(cmdname),cp->rediscmd);
dictAdd(server.orig_commands,sdsdup(cmdname),cp->rediscmd);
cp->rediscmd->id = ACLGetCommandID(cmdname); /* ID used for ACL. */
@@ -1368,18 +1373,6 @@ int RM_ReplyWithLongLong(RedisModuleCtx *ctx, long long ll) {
return REDISMODULE_OK;
}
-/* Reply with an error or simple string (status message). Used to implement
- * ReplyWithSimpleString() and ReplyWithError().
- * The function always returns REDISMODULE_OK. */
-int replyWithStatus(RedisModuleCtx *ctx, const char *msg, char *prefix) {
- client *c = moduleGetReplyClient(ctx);
- if (c == NULL) return REDISMODULE_OK;
- addReplyProto(c,prefix,strlen(prefix));
- addReplyProto(c,msg,strlen(msg));
- addReplyProto(c,"\r\n",2);
- return REDISMODULE_OK;
-}
-
/* Reply with the error 'err'.
*
* Note that 'err' must contain all the error, including
@@ -1395,7 +1388,10 @@ int replyWithStatus(RedisModuleCtx *ctx, const char *msg, char *prefix) {
* The function always returns REDISMODULE_OK.
*/
int RM_ReplyWithError(RedisModuleCtx *ctx, const char *err) {
- return replyWithStatus(ctx,err,"-");
+ client *c = moduleGetReplyClient(ctx);
+ if (c == NULL) return REDISMODULE_OK;
+ addReplyErrorFormat(c,"-%s",err);
+ return REDISMODULE_OK;
}
/* Reply with a simple string (+... \r\n in RESP protocol). This replies
@@ -1404,7 +1400,12 @@ int RM_ReplyWithError(RedisModuleCtx *ctx, const char *err) {
*
* The function always returns REDISMODULE_OK. */
int RM_ReplyWithSimpleString(RedisModuleCtx *ctx, const char *msg) {
- return replyWithStatus(ctx,msg,"+");
+ client *c = moduleGetReplyClient(ctx);
+ if (c == NULL) return REDISMODULE_OK;
+ addReplyProto(c,"+",1);
+ addReplyProto(c,msg,strlen(msg));
+ addReplyProto(c,"\r\n",2);
+ return REDISMODULE_OK;
}
/* Reply with an array type of 'len' elements. However 'len' other calls
@@ -1629,7 +1630,7 @@ void moduleReplicateMultiIfNeeded(RedisModuleCtx *ctx) {
ctx->saved_oparray = server.also_propagate;
redisOpArrayInit(&server.also_propagate);
}
- execCommandPropagateMulti(ctx->client);
+ execCommandPropagateMulti(ctx->client->db->id);
}
/* Replicate the specified command and arguments to slaves and AOF, as effect
@@ -2044,7 +2045,7 @@ int RM_GetContextFlags(RedisModuleCtx *ctx) {
* periodically in timer callbacks or other periodic callbacks.
*/
int RM_AvoidReplicaTraffic() {
- return clientsArePaused();
+ return checkClientPauseTimeoutAndReturnIfPaused();
}
/* Change the currently selected DB. Returns an error if the id
@@ -7067,33 +7068,32 @@ int RM_ScanKey(RedisModuleKey *key, RedisModuleScanCursor *cursor, RedisModuleSc
*/
int RM_Fork(RedisModuleForkDoneHandler cb, void *user_data) {
pid_t childpid;
- if (hasActiveChildProcess()) {
- return -1;
- }
- openChildInfoPipe();
if ((childpid = redisFork(CHILD_TYPE_MODULE)) == 0) {
/* Child */
redisSetProcTitle("redis-module-fork");
} else if (childpid == -1) {
- closeChildInfoPipe();
serverLog(LL_WARNING,"Can't fork for module: %s", strerror(errno));
} else {
/* Parent */
- server.module_child_pid = childpid;
moduleForkInfo.done_handler = cb;
moduleForkInfo.done_handler_user_data = user_data;
- updateDictResizePolicy();
serverLog(LL_VERBOSE, "Module fork started pid: %ld ", (long) childpid);
}
return childpid;
}
+/* The module is advised to call this function from the fork child once in a while,
+ * so that it can report COW memory to the parent which will be reported in INFO */
+void RM_SendChildCOWInfo(void) {
+ sendChildCOWInfo(CHILD_TYPE_MODULE, 0, "Module fork");
+}
+
/* Call from the child process when you want to terminate it.
* retcode will be provided to the done handler executed on the parent process.
*/
int RM_ExitFromChild(int retcode) {
- sendChildCOWInfo(CHILD_TYPE_MODULE, "Module fork");
+ sendChildCOWInfo(CHILD_TYPE_MODULE, 1, "Module fork");
exitFromChild(retcode);
return REDISMODULE_OK;
}
@@ -7103,22 +7103,20 @@ int RM_ExitFromChild(int retcode) {
* child or the pid does not match, return C_ERR without doing anything. */
int TerminateModuleForkChild(int child_pid, int wait) {
/* Module child should be active and pid should match. */
- if (server.module_child_pid == -1 ||
- server.module_child_pid != child_pid) return C_ERR;
+ if (server.child_type != CHILD_TYPE_MODULE ||
+ server.child_pid != child_pid) return C_ERR;
int statloc;
serverLog(LL_VERBOSE,"Killing running module fork child: %ld",
- (long) server.module_child_pid);
- if (kill(server.module_child_pid,SIGUSR1) != -1 && wait) {
- while(wait4(server.module_child_pid,&statloc,0,NULL) !=
- server.module_child_pid);
+ (long) server.child_pid);
+ if (kill(server.child_pid,SIGUSR1) != -1 && wait) {
+ while(wait4(server.child_pid,&statloc,0,NULL) !=
+ server.child_pid);
}
/* Reset the buffer accumulating changes while the child saves. */
- server.module_child_pid = -1;
+ resetChildState();
moduleForkInfo.done_handler = NULL;
moduleForkInfo.done_handler_user_data = NULL;
- closeChildInfoPipe();
- updateDictResizePolicy();
return C_OK;
}
@@ -7135,12 +7133,12 @@ int RM_KillForkChild(int child_pid) {
void ModuleForkDoneHandler(int exitcode, int bysignal) {
serverLog(LL_NOTICE,
"Module fork exited pid: %ld, retcode: %d, bysignal: %d",
- (long) server.module_child_pid, exitcode, bysignal);
+ (long) server.child_pid, exitcode, bysignal);
if (moduleForkInfo.done_handler) {
moduleForkInfo.done_handler(exitcode, bysignal,
moduleForkInfo.done_handler_user_data);
}
- server.module_child_pid = -1;
+
moduleForkInfo.done_handler = NULL;
moduleForkInfo.done_handler_user_data = NULL;
}
@@ -7938,14 +7936,21 @@ sds genModulesInfoString(sds info) {
/* Redis MODULE command.
*
- * MODULE LOAD <path> [args...] */
+ * MODULE LIST
+ * MODULE LOAD <path> [args...]
+ * MODULE UNLOAD <name>
+ */
void moduleCommand(client *c) {
char *subcmd = c->argv[1]->ptr;
+
if (c->argc == 2 && !strcasecmp(subcmd,"help")) {
const char *help[] = {
-"LIST -- Return a list of loaded modules.",
-"LOAD <path> [arg ...] -- Load a module library from <path>.",
-"UNLOAD <name> -- Unload a module.",
+"LIST",
+" Return a list of loaded modules.",
+"LOAD <path> [<arg> ...]",
+" Load a module library from <path>, passing to it any optional arguments.",
+"UNLOAD <name>",
+" Unload a module.",
NULL
};
addReplyHelp(c, help);
@@ -8594,6 +8599,7 @@ void moduleRegisterCoreAPI(void) {
REGISTER_API(CommandFilterArgReplace);
REGISTER_API(CommandFilterArgDelete);
REGISTER_API(Fork);
+ REGISTER_API(SendChildCOWInfo);
REGISTER_API(ExitFromChild);
REGISTER_API(KillForkChild);
REGISTER_API(RegisterInfoFunc);
diff --git a/src/modules/testmodule.c b/src/modules/testmodule.c
index 5634530dd..078c02c5c 100644
--- a/src/modules/testmodule.c
+++ b/src/modules/testmodule.c
@@ -364,7 +364,7 @@ int TestAssertIntegerReply(RedisModuleCtx *ctx, RedisModuleCallReply *reply, lon
do { \
RedisModule_Log(ctx,"warning","Testing %s", name); \
reply = RedisModule_Call(ctx,name,__VA_ARGS__); \
- } while (0);
+ } while (0)
/* TEST.IT -- Run all the tests. */
int TestIt(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) {
diff --git a/src/mt19937-64.c b/src/mt19937-64.c
new file mode 100644
index 000000000..a0c897ff6
--- /dev/null
+++ b/src/mt19937-64.c
@@ -0,0 +1,187 @@
+/*
+ A C-program for MT19937-64 (2004/9/29 version).
+ Coded by Takuji Nishimura and Makoto Matsumoto.
+
+ This is a 64-bit version of Mersenne Twister pseudorandom number
+ generator.
+
+ Before using, initialize the state by using init_genrand64(seed)
+ or init_by_array64(init_key, key_length).
+
+ Copyright (C) 2004, Makoto Matsumoto and Takuji Nishimura,
+ All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ 1. Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ 3. The names of its contributors may not be used to endorse or promote
+ products derived from this software without specific prior written
+ permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+ CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+ References:
+ T. Nishimura, ``Tables of 64-bit Mersenne Twisters''
+ ACM Transactions on Modeling and
+ Computer Simulation 10. (2000) 348--357.
+ M. Matsumoto and T. Nishimura,
+ ``Mersenne Twister: a 623-dimensionally equidistributed
+ uniform pseudorandom number generator''
+ ACM Transactions on Modeling and
+ Computer Simulation 8. (Jan. 1998) 3--30.
+
+ Any feedback is very welcome.
+ http://www.math.hiroshima-u.ac.jp/~m-mat/MT/emt.html
+ email: m-mat @ math.sci.hiroshima-u.ac.jp (remove spaces)
+*/
+
+
+#include "mt19937-64.h"
+#include <stdio.h>
+
+#define NN 312
+#define MM 156
+#define MATRIX_A 0xB5026F5AA96619E9ULL
+#define UM 0xFFFFFFFF80000000ULL /* Most significant 33 bits */
+#define LM 0x7FFFFFFFULL /* Least significant 31 bits */
+
+
+/* The array for the state vector */
+static unsigned long long mt[NN];
+/* mti==NN+1 means mt[NN] is not initialized */
+static int mti=NN+1;
+
+/* initializes mt[NN] with a seed */
+void init_genrand64(unsigned long long seed)
+{
+ mt[0] = seed;
+ for (mti=1; mti<NN; mti++)
+ mt[mti] = (6364136223846793005ULL * (mt[mti-1] ^ (mt[mti-1] >> 62)) + mti);
+}
+
+/* initialize by an array with array-length */
+/* init_key is the array for initializing keys */
+/* key_length is its length */
+void init_by_array64(unsigned long long init_key[],
+ unsigned long long key_length)
+{
+ unsigned long long i, j, k;
+ init_genrand64(19650218ULL);
+ i=1; j=0;
+ k = (NN>key_length ? NN : key_length);
+ for (; k; k--) {
+ mt[i] = (mt[i] ^ ((mt[i-1] ^ (mt[i-1] >> 62)) * 3935559000370003845ULL))
+ + init_key[j] + j; /* non linear */
+ i++; j++;
+ if (i>=NN) { mt[0] = mt[NN-1]; i=1; }
+ if (j>=key_length) j=0;
+ }
+ for (k=NN-1; k; k--) {
+ mt[i] = (mt[i] ^ ((mt[i-1] ^ (mt[i-1] >> 62)) * 2862933555777941757ULL))
+ - i; /* non linear */
+ i++;
+ if (i>=NN) { mt[0] = mt[NN-1]; i=1; }
+ }
+
+ mt[0] = 1ULL << 63; /* MSB is 1; assuring non-zero initial array */
+}
+
+/* generates a random number on [0, 2^64-1]-interval */
+unsigned long long genrand64_int64(void)
+{
+ int i;
+ unsigned long long x;
+ static unsigned long long mag01[2]={0ULL, MATRIX_A};
+
+ if (mti >= NN) { /* generate NN words at one time */
+
+ /* if init_genrand64() has not been called, */
+ /* a default initial seed is used */
+ if (mti == NN+1)
+ init_genrand64(5489ULL);
+
+ for (i=0;i<NN-MM;i++) {
+ x = (mt[i]&UM)|(mt[i+1]&LM);
+ mt[i] = mt[i+MM] ^ (x>>1) ^ mag01[(int)(x&1ULL)];
+ }
+ for (;i<NN-1;i++) {
+ x = (mt[i]&UM)|(mt[i+1]&LM);
+ mt[i] = mt[i+(MM-NN)] ^ (x>>1) ^ mag01[(int)(x&1ULL)];
+ }
+ x = (mt[NN-1]&UM)|(mt[0]&LM);
+ mt[NN-1] = mt[MM-1] ^ (x>>1) ^ mag01[(int)(x&1ULL)];
+
+ mti = 0;
+ }
+
+ x = mt[mti++];
+
+ x ^= (x >> 29) & 0x5555555555555555ULL;
+ x ^= (x << 17) & 0x71D67FFFEDA60000ULL;
+ x ^= (x << 37) & 0xFFF7EEE000000000ULL;
+ x ^= (x >> 43);
+
+ return x;
+}
+
+/* generates a random number on [0, 2^63-1]-interval */
+long long genrand64_int63(void)
+{
+ return (long long)(genrand64_int64() >> 1);
+}
+
+/* generates a random number on [0,1]-real-interval */
+double genrand64_real1(void)
+{
+ return (genrand64_int64() >> 11) * (1.0/9007199254740991.0);
+}
+
+/* generates a random number on [0,1)-real-interval */
+double genrand64_real2(void)
+{
+ return (genrand64_int64() >> 11) * (1.0/9007199254740992.0);
+}
+
+/* generates a random number on (0,1)-real-interval */
+double genrand64_real3(void)
+{
+ return ((genrand64_int64() >> 12) + 0.5) * (1.0/4503599627370496.0);
+}
+
+#ifdef MT19937_64_MAIN
+int main(void)
+{
+ int i;
+ unsigned long long init[4]={0x12345ULL, 0x23456ULL, 0x34567ULL, 0x45678ULL}, length=4;
+ init_by_array64(init, length);
+ printf("1000 outputs of genrand64_int64()\n");
+ for (i=0; i<1000; i++) {
+ printf("%20llu ", genrand64_int64());
+ if (i%5==4) printf("\n");
+ }
+ printf("\n1000 outputs of genrand64_real2()\n");
+ for (i=0; i<1000; i++) {
+ printf("%10.8f ", genrand64_real2());
+ if (i%5==4) printf("\n");
+ }
+ return 0;
+}
+#endif
diff --git a/src/mt19937-64.h b/src/mt19937-64.h
new file mode 100644
index 000000000..b98348fd4
--- /dev/null
+++ b/src/mt19937-64.h
@@ -0,0 +1,87 @@
+/*
+ A C-program for MT19937-64 (2004/9/29 version).
+ Coded by Takuji Nishimura and Makoto Matsumoto.
+
+ This is a 64-bit version of Mersenne Twister pseudorandom number
+ generator.
+
+ Before using, initialize the state by using init_genrand64(seed)
+ or init_by_array64(init_key, key_length).
+
+ Copyright (C) 2004, Makoto Matsumoto and Takuji Nishimura,
+ All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ 1. Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ 3. The names of its contributors may not be used to endorse or promote
+ products derived from this software without specific prior written
+ permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+ CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+ References:
+ T. Nishimura, ``Tables of 64-bit Mersenne Twisters''
+ ACM Transactions on Modeling and
+ Computer Simulation 10. (2000) 348--357.
+ M. Matsumoto and T. Nishimura,
+ ``Mersenne Twister: a 623-dimensionally equidistributed
+ uniform pseudorandom number generator''
+ ACM Transactions on Modeling and
+ Computer Simulation 8. (Jan. 1998) 3--30.
+
+ Any feedback is very welcome.
+ http://www.math.hiroshima-u.ac.jp/~m-mat/MT/emt.html
+ email: m-mat @ math.sci.hiroshima-u.ac.jp (remove spaces)
+*/
+
+#ifndef __MT19937_64_H
+#define __MT19937_64_H
+
+/* initializes mt[NN] with a seed */
+void init_genrand64(unsigned long long seed);
+
+/* initialize by an array with array-length */
+/* init_key is the array for initializing keys */
+/* key_length is its length */
+void init_by_array64(unsigned long long init_key[],
+ unsigned long long key_length);
+
+/* generates a random number on [0, 2^64-1]-interval */
+unsigned long long genrand64_int64(void);
+
+
+/* generates a random number on [0, 2^63-1]-interval */
+long long genrand64_int63(void);
+
+/* generates a random number on [0,1]-real-interval */
+double genrand64_real1(void);
+
+/* generates a random number on [0,1)-real-interval */
+double genrand64_real2(void);
+
+/* generates a random number on (0,1)-real-interval */
+double genrand64_real3(void);
+
+/* generates a random number on (0,1]-real-interval */
+double genrand64_real4(void);
+
+#endif
diff --git a/src/multi.c b/src/multi.c
index a2f9ecccf..d88c5f1b8 100644
--- a/src/multi.c
+++ b/src/multi.c
@@ -127,15 +127,15 @@ void beforePropagateMultiOrExec(int multi) {
/* Send a MULTI command to all the slaves and AOF file. Check the execCommand
* implementation for more information. */
-void execCommandPropagateMulti(client *c) {
+void execCommandPropagateMulti(int dbid) {
beforePropagateMultiOrExec(1);
- propagate(server.multiCommand,c->db->id,&shared.multi,1,
+ propagate(server.multiCommand,dbid,&shared.multi,1,
PROPAGATE_AOF|PROPAGATE_REPL);
}
-void execCommandPropagateExec(client *c) {
+void execCommandPropagateExec(int dbid) {
beforePropagateMultiOrExec(0);
- propagate(server.execCommand,c->db->id,&shared.exec,1,
+ propagate(server.execCommand,dbid,&shared.exec,1,
PROPAGATE_AOF|PROPAGATE_REPL);
}
@@ -162,7 +162,6 @@ void execCommand(client *c) {
robj **orig_argv;
int orig_argc;
struct redisCommand *orig_cmd;
- int must_propagate = 0; /* Need to propagate MULTI/EXEC to AOF / slaves? */
int was_master = server.masterhost == NULL;
if (!(c->flags & CLIENT_MULTI)) {
@@ -202,19 +201,6 @@ void execCommand(client *c) {
c->argv = c->mstate.commands[j].argv;
c->cmd = c->mstate.commands[j].cmd;
- /* Propagate a MULTI request once we encounter the first command which
- * is not readonly nor an administrative one.
- * This way we'll deliver the MULTI/..../EXEC block as a whole and
- * both the AOF and the replication link will have the same consistency
- * and atomicity guarantees. */
- if (!must_propagate &&
- !server.loading &&
- !(c->cmd->flags & (CMD_READONLY|CMD_ADMIN)))
- {
- execCommandPropagateMulti(c);
- must_propagate = 1;
- }
-
/* ACL permissions are also checked at the time of execution in case
* they were changed after the commands were ququed. */
int acl_errpos;
@@ -265,7 +251,7 @@ void execCommand(client *c) {
/* Make sure the EXEC command will be propagated as well if MULTI
* was already propagated. */
- if (must_propagate) {
+ if (server.propagate_in_transaction) {
int is_master = server.masterhost == NULL;
server.dirty++;
beforePropagateMultiOrExec(0);
@@ -388,31 +374,36 @@ void touchWatchedKey(redisDb *db, robj *key) {
}
}
-/* On FLUSHDB or FLUSHALL all the watched keys that are present before the
- * flush but will be deleted as effect of the flushing operation should
- * be touched. "dbid" is the DB that's getting the flush. -1 if it is
- * a FLUSHALL operation (all the DBs flushed). */
-void touchWatchedKeysOnFlush(int dbid) {
- listIter li1, li2;
+/* Set CLIENT_DIRTY_CAS to all clients of DB when DB is dirty.
+ * It may happen in the following situations:
+ * FLUSHDB, FLUSHALL, SWAPDB
+ *
+ * replaced_with: for SWAPDB, the WATCH should be invalidated if
+ * the key exists in either of them, and skipped only if it
+ * doesn't exist in both. */
+void touchAllWatchedKeysInDb(redisDb *emptied, redisDb *replaced_with) {
+ listIter li;
listNode *ln;
-
- /* For every client, check all the waited keys */
- listRewind(server.clients,&li1);
- while((ln = listNext(&li1))) {
- client *c = listNodeValue(ln);
- listRewind(c->watched_keys,&li2);
- while((ln = listNext(&li2))) {
- watchedKey *wk = listNodeValue(ln);
-
- /* For every watched key matching the specified DB, if the
- * key exists, mark the client as dirty, as the key will be
- * removed. */
- if (dbid == -1 || wk->db->id == dbid) {
- if (dictFind(wk->db->dict, wk->key->ptr) != NULL)
- c->flags |= CLIENT_DIRTY_CAS;
+ dictEntry *de;
+
+ if (dictSize(emptied->watched_keys) == 0) return;
+
+ dictIterator *di = dictGetSafeIterator(emptied->watched_keys);
+ while((de = dictNext(di)) != NULL) {
+ robj *key = dictGetKey(de);
+ list *clients = dictGetVal(de);
+ if (!clients) continue;
+ listRewind(clients,&li);
+ while((ln = listNext(&li))) {
+ client *c = listNodeValue(ln);
+ if (dictFind(emptied->dict, key->ptr)) {
+ c->flags |= CLIENT_DIRTY_CAS;
+ } else if (replaced_with && dictFind(replaced_with->dict, key->ptr)) {
+ c->flags |= CLIENT_DIRTY_CAS;
}
}
}
+ dictReleaseIterator(di);
}
void watchCommand(client *c) {
diff --git a/src/networking.c b/src/networking.c
index df30206a3..e624dd8f9 100644
--- a/src/networking.c
+++ b/src/networking.c
@@ -178,6 +178,7 @@ client *createClient(connection *conn) {
c->peerid = NULL;
c->sockname = NULL;
c->client_list_node = NULL;
+ c->paused_list_node = NULL;
c->client_tracking_redirection = 0;
c->client_tracking_prefixes = NULL;
c->client_cron_last_memory_usage = 0;
@@ -259,8 +260,14 @@ int prepareClientToWrite(client *c) {
if (!c->conn) return C_ERR; /* Fake client for AOF loading. */
/* Schedule the client to write the output buffers to the socket, unless
- * it should already be setup to do so (it has already pending data). */
- if (!clientHasPendingReplies(c)) clientInstallWriteHandler(c);
+ * it should already be setup to do so (it has already pending data).
+ *
+ * If CLIENT_PENDING_READ is set, we're in an IO thread and should
+ * not install a write handler. Instead, it will be done by
+ * handleClientsWithPendingReadsUsingThreads() upon return.
+ */
+ if (!clientHasPendingReplies(c) && !(c->flags & CLIENT_PENDING_READ))
+ clientInstallWriteHandler(c);
/* Authorize the caller to queue in the output buffer of this client. */
return C_OK;
@@ -399,6 +406,24 @@ void addReplyErrorLength(client *c, const char *s, size_t len) {
/* Do some actions after an error reply was sent (Log if needed, updates stats, etc.) */
void afterErrorReply(client *c, const char *s, size_t len) {
+ /* Increment the global error counter */
+ server.stat_total_error_replies++;
+ /* Increment the error stats
+ * If the string already starts with "-..." then the error prefix
+ * is provided by the caller ( we limit the search to 32 chars). Otherwise we use "-ERR". */
+ if (s[0] != '-') {
+ incrementErrorCount("ERR", 3);
+ } else {
+ char *spaceloc = memchr(s, ' ', len < 32 ? len : 32);
+ if (spaceloc) {
+ const size_t errEndPos = (size_t)(spaceloc - s);
+ incrementErrorCount(s+1, errEndPos-1);
+ } else {
+ /* Fallback to ERR if we can't retrieve the error prefix */
+ incrementErrorCount("ERR", 3);
+ }
+ }
+
/* Sometimes it could be normal that a slave replies to a master with
* an error and this function gets called. Actually the error will never
* be sent because addReply*() against master clients has no effect...
@@ -452,9 +477,11 @@ void addReplyError(client *c, const char *err) {
}
/* See addReplyErrorLength for expectations from the input string. */
+/* As a side effect the SDS string is freed. */
void addReplyErrorSds(client *c, sds err) {
addReplyErrorLength(c,err,sdslen(err));
afterErrorReply(c,err,sdslen(err));
+ sdsfree(err);
}
/* See addReplyErrorLength for expectations from the formatted string.
@@ -533,13 +560,9 @@ void *addReplyDeferredLen(client *c) {
return listLast(c->reply);
}
-/* Populate the length object and try gluing it to the next chunk. */
-void setDeferredAggregateLen(client *c, void *node, long length, char prefix) {
- serverAssert(length >= 0);
+void setDeferredReply(client *c, void *node, const char *s, size_t length) {
listNode *ln = (listNode*)node;
clientReplyBlock *next;
- char lenstr[128];
- size_t lenstr_len = sprintf(lenstr, "%c%ld\r\n", prefix, length);
/* Abort when *node is NULL: when the client should not accept writes
* we return NULL in addReplyDeferredLen() */
@@ -557,25 +580,39 @@ void setDeferredAggregateLen(client *c, void *node, long length, char prefix) {
* - It has enough room already allocated
* - And not too large (avoid large memmove) */
if (ln->next != NULL && (next = listNodeValue(ln->next)) &&
- next->size - next->used >= lenstr_len &&
- next->used < PROTO_REPLY_CHUNK_BYTES * 4) {
- memmove(next->buf + lenstr_len, next->buf, next->used);
- memcpy(next->buf, lenstr, lenstr_len);
- next->used += lenstr_len;
+ next->size - next->used >= length &&
+ next->used < PROTO_REPLY_CHUNK_BYTES * 4)
+ {
+ memmove(next->buf + length, next->buf, next->used);
+ memcpy(next->buf, s, length);
+ next->used += length;
listDelNode(c->reply,ln);
} else {
/* Create a new node */
- clientReplyBlock *buf = zmalloc(lenstr_len + sizeof(clientReplyBlock));
+ clientReplyBlock *buf = zmalloc(length + sizeof(clientReplyBlock));
/* Take over the allocation's internal fragmentation */
buf->size = zmalloc_usable_size(buf) - sizeof(clientReplyBlock);
- buf->used = lenstr_len;
- memcpy(buf->buf, lenstr, lenstr_len);
+ buf->used = length;
+ memcpy(buf->buf, s, length);
listNodeValue(ln) = buf;
c->reply_bytes += buf->size;
}
asyncCloseClientOnOutputBufferLimitReached(c);
}
+/* Populate the length object and try gluing it to the next chunk. */
+void setDeferredAggregateLen(client *c, void *node, long length, char prefix) {
+ serverAssert(length >= 0);
+
+ /* Abort when *node is NULL: when the client should not accept writes
+ * we return NULL in addReplyDeferredLen() */
+ if (node == NULL) return;
+
+ char lenstr[128];
+ size_t lenstr_len = sprintf(lenstr, "%c%ld\r\n", prefix, length);
+ setDeferredReply(c, node, lenstr, lenstr_len);
+}
+
void setDeferredArrayLen(client *c, void *node, long length) {
setDeferredAggregateLen(c,node,length,'*');
}
@@ -771,6 +808,14 @@ void addReplyBulkSds(client *c, sds s) {
addReply(c,shared.crlf);
}
+/* Set sds to a deferred reply (for symmetry with addReplyBulkSds it also frees the sds) */
+void setDeferredReplyBulkSds(client *c, void *node, sds s) {
+ sds reply = sdscatprintf(sdsempty(), "$%d\r\n%s\r\n", (unsigned)sdslen(s), s);
+ setDeferredReply(c, node, reply, sdslen(reply));
+ sdsfree(reply);
+ sdsfree(s);
+}
+
/* Add a C null term string as bulk reply */
void addReplyBulkCString(client *c, const char *s) {
if (s == NULL) {
@@ -829,12 +874,16 @@ void addReplyHelp(client *c, const char **help) {
sdstoupper(cmd);
addReplyStatusFormat(c,
- "%s <subcommand> arg arg ... arg. Subcommands are:",cmd);
+ "%s <subcommand> [<arg> [value] [opt] ...]. Subcommands are:",cmd);
sdsfree(cmd);
while (help[blen]) addReplyStatus(c,help[blen++]);
- blen++; /* Account for the header line(s). */
+ addReplyStatus(c,"HELP");
+ addReplyStatus(c," Prints this help.");
+
+ blen += 1; /* Account for the header. */
+ blen += 2; /* Account for the footer. */
setDeferredArrayLen(c,blenp,blen);
}
@@ -850,7 +899,7 @@ void addReplySubcommandSyntaxError(client *c) {
sdsfree(cmd);
}
-/* Append 'src' client output buffers into 'dst' client output buffers.
+/* Append 'src' client output buffers into 'dst' client output buffers.
* This function clears the output buffers of 'src' */
void AddReplyFromClient(client *dst, client *src) {
/* If the source client contains a partial response due to client output
@@ -1310,7 +1359,7 @@ void freeClient(client *c) {
* to keep data safe and we may delay configured 'save' for full sync. */
if (server.saveparamslen == 0 &&
c->replstate == SLAVE_STATE_WAIT_BGSAVE_END &&
- server.rdb_child_pid != -1 &&
+ server.child_type == CHILD_TYPE_RDB &&
server.rdb_child_type == RDB_CHILD_TYPE_DISK &&
anyOtherSlaveWaitRdb(c) == 0)
{
@@ -1686,6 +1735,7 @@ int processInlineBuffer(client *c) {
* However the is an exception: masters may send us just a newline
* to keep the connection active. */
if (querylen != 0 && c->flags & CLIENT_MASTER) {
+ sdsfreesplitres(argv,argc);
serverLog(LL_WARNING,"WARNING: Receiving inline protocol from master, master stream corruption? Closing the master connection and discarding the cached master.");
setProtocolError("Master using the inline protocol. Desync?",c);
return C_ERR;
@@ -1851,7 +1901,7 @@ int processMultibulkBuffer(client *c) {
c->qb_pos = 0;
/* Hint the sds library about the amount of bytes this string is
* going to contain. */
- c->querybuf = sdsMakeRoomFor(c->querybuf,ll+2);
+ c->querybuf = sdsMakeRoomFor(c->querybuf,ll+2-sdslen(c->querybuf));
}
}
c->bulklen = ll;
@@ -1911,7 +1961,7 @@ void commandProcessed(client *c) {
* still be able to access the client argv and argc field.
* The client will be reset in unblockClientFromModule(). */
if (!(c->flags & CLIENT_BLOCKED) ||
- c->btype != BLOCKED_MODULE)
+ (c->btype != BLOCKED_MODULE && c->btype != BLOCKED_PAUSE))
{
resetClient(c);
}
@@ -1954,6 +2004,20 @@ int processCommandAndResetClient(client *c) {
return deadclient ? C_ERR : C_OK;
}
+
+/* This function will execute any fully parsed commands pending on
+ * the client. Returns C_ERR if the client is no longer valid after executing
+ * the command, and C_OK for all other cases. */
+int processPendingCommandsAndResetClient(client *c) {
+ if (c->flags & CLIENT_PENDING_COMMAND) {
+ c->flags &= ~CLIENT_PENDING_COMMAND;
+ if (processCommandAndResetClient(c) == C_ERR) {
+ return C_ERR;
+ }
+ }
+ return C_OK;
+}
+
/* This function is called every time, in the client structure 'c', there is
* more query buffer to process, because we read more data from the socket
* or because a client was blocked and later reactivated, so there could be
@@ -1961,9 +2025,6 @@ int processCommandAndResetClient(client *c) {
void processInputBuffer(client *c) {
/* Keep processing while there is something in the input buffer */
while(c->qb_pos < sdslen(c->querybuf)) {
- /* Return if clients are paused. */
- if (!(c->flags & CLIENT_SLAVE) && clientsArePaused()) break;
-
/* Immediately abort if the client is in the middle of something. */
if (c->flags & CLIENT_BLOCKED) break;
@@ -2361,26 +2422,47 @@ void clientCommand(client *c) {
if (c->argc == 2 && !strcasecmp(c->argv[1]->ptr,"help")) {
const char *help[] = {
-"ID -- Return the ID of the current connection.",
-"INFO -- Return information about the current client connection.",
-"GETNAME -- Return the name of the current connection.",
-"KILL <ip:port> -- Kill connection made from <ip:port>.",
-"KILL <option> <value> [option value ...] -- Kill connections. Options are:",
-" ADDR <ip:port> -- Kill connection made from <ip:port>",
-" LADDR <ip:port> -- Kill connection made to <ip:port>",
-" TYPE (normal|master|replica|pubsub) -- Kill connections by type.",
-" USER <username> -- Kill connections authenticated with such user.",
-" SKIPME (yes|no) -- Skip killing current connection (default: yes).",
-"LIST [options ...] -- Return information about client connections. Options:",
-" TYPE (normal|master|replica|pubsub) -- Return clients of specified type.",
-" ID id [id ...] -- Return clients of specified IDs only.",
-"PAUSE <timeout> -- Suspend all Redis clients for <timout> milliseconds.",
-"REPLY (on|off|skip) -- Control the replies sent to the current connection.",
-"SETNAME <name> -- Assign the name <name> to the current connection.",
-"UNBLOCK <clientid> [TIMEOUT|ERROR] -- Unblock the specified blocked client.",
-"TRACKING (on|off) [REDIRECT <id>] [BCAST] [PREFIX first] [PREFIX second] [OPTIN] [OPTOUT]... -- Enable client keys tracking for client side caching.",
-"CACHING (yes|no) -- Enable/Disable tracking of the keys for next command in OPTIN/OPTOUT mode.",
-"GETREDIR -- Return the client ID we are redirecting to when tracking is enabled.",
+"CACHING (YES|NO)",
+" Enable/disable tracking of the keys for next command in OPTIN/OPTOUT modes.",
+"GETREDIR",
+" Return the client ID we are redirecting to when tracking is enabled.",
+"GETNAME",
+" Return the name of the current connection.",
+"ID",
+" Return the ID of the current connection.",
+"INFO",
+" Return information about the current client connection.",
+"KILL <ip:port>",
+" Kill connection made from <ip:port>.",
+"KILL <option> <value> [<option> <value> [...]]",
+" Kill connections. Options are:",
+" * ADDR <ip:port>",
+" Kill connection made from <ip:port>",
+" * TYPE (normal|master|replica|pubsub)",
+" Kill connections by type.",
+" * USER <username>",
+" Kill connections authenticated by <username>.",
+" * SKIPME (YES|NO)",
+" Skip killing current connection (default: yes).",
+"LIST [options ...]",
+" Return information about client connections. Options:",
+" * TYPE (NORMAL|MASTER|REPLICA|PUBSUB)",
+" Return clients of specified type.",
+"UNPAUSE",
+" Stop the current client pause, resuming traffic.",
+"PAUSE <timeout> [WRITE|ALL]",
+" Suspend all, or just write, clients for <timout> milliseconds.",
+"REPLY (ON|OFF|SKIP)",
+" Control the replies sent to the current connection.",
+"SETNAME <name>",
+" Assign the name <name> to the current connection.",
+"UNBLOCK <clientid> [TIMEOUT|ERROR]",
+" Unblock the specified blocked client.",
+"TRACKING (ON|OFF) [REDIRECT <id>] [BCAST] [PREFIX <prefix> [...]]",
+" [OPTIN] [OPTOUT]",
+" Control server assisted client side caching.",
+"TRACKINGINFO",
+" Report tracking status for the current connection.",
NULL
};
addReplyHelp(c, help);
@@ -2421,7 +2503,7 @@ NULL
}
}
} else if (c->argc != 2) {
- addReply(c,shared.syntaxerr);
+ addReplyErrorObject(c,shared.syntaxerr);
return;
}
@@ -2440,7 +2522,7 @@ NULL
if (!(c->flags & CLIENT_REPLY_OFF))
c->flags |= CLIENT_REPLY_SKIP_NEXT;
} else {
- addReply(c,shared.syntaxerr);
+ addReplyErrorObject(c,shared.syntaxerr);
return;
}
} else if (!strcasecmp(c->argv[1]->ptr,"kill")) {
@@ -2496,17 +2578,17 @@ NULL
} else if (!strcasecmp(c->argv[i+1]->ptr,"no")) {
skipme = 0;
} else {
- addReply(c,shared.syntaxerr);
+ addReplyErrorObject(c,shared.syntaxerr);
return;
}
} else {
- addReply(c,shared.syntaxerr);
+ addReplyErrorObject(c,shared.syntaxerr);
return;
}
i += 2;
}
} else {
- addReply(c,shared.syntaxerr);
+ addReplyErrorObject(c,shared.syntaxerr);
return;
}
@@ -2585,13 +2667,31 @@ NULL
addReplyBulk(c,c->name);
else
addReplyNull(c);
- } else if (!strcasecmp(c->argv[1]->ptr,"pause") && c->argc == 3) {
- /* CLIENT PAUSE */
+ } else if (!strcasecmp(c->argv[1]->ptr,"unpause") && c->argc == 2) {
+ /* CLIENT UNPAUSE */
+ unpauseClients();
+ addReply(c,shared.ok);
+ } else if (!strcasecmp(c->argv[1]->ptr,"pause") && (c->argc == 3 ||
+ c->argc == 4))
+ {
+ /* CLIENT PAUSE TIMEOUT [WRITE|ALL] */
long long duration;
+ int type = CLIENT_PAUSE_ALL;
+ if (c->argc == 4) {
+ if (!strcasecmp(c->argv[3]->ptr,"write")) {
+ type = CLIENT_PAUSE_WRITE;
+ } else if (!strcasecmp(c->argv[3]->ptr,"all")) {
+ type = CLIENT_PAUSE_ALL;
+ } else {
+ addReplyError(c,
+ "CLIENT PAUSE mode must be WRITE or ALL");
+ return;
+ }
+ }
if (getTimeoutFromObjectOrReply(c,c->argv[2],&duration,
- UNIT_MILLISECONDS) != C_OK) return;
- pauseClients(duration);
+ UNIT_MILLISECONDS) != C_OK) return;
+ pauseClients(duration, type);
addReply(c,shared.ok);
} else if (!strcasecmp(c->argv[1]->ptr,"tracking") && c->argc >= 3) {
/* CLIENT TRACKING (on|off) [REDIRECT <id>] [BCAST] [PREFIX first]
@@ -2643,7 +2743,7 @@ NULL
prefix[numprefix++] = c->argv[j];
} else {
zfree(prefix);
- addReply(c,shared.syntaxerr);
+ addReplyErrorObject(c,shared.syntaxerr);
return;
}
}
@@ -2700,12 +2800,19 @@ NULL
return;
}
+ if (options & CLIENT_TRACKING_BCAST) {
+ if (!checkPrefixCollisionsOrReply(c,prefix,numprefix)) {
+ zfree(prefix);
+ return;
+ }
+ }
+
enableTracking(c,redir,options,prefix,numprefix);
} else if (!strcasecmp(c->argv[2]->ptr,"off")) {
disableTracking(c);
} else {
zfree(prefix);
- addReply(c,shared.syntaxerr);
+ addReplyErrorObject(c,shared.syntaxerr);
return;
}
zfree(prefix);
@@ -2734,7 +2841,7 @@ NULL
return;
}
} else {
- addReply(c,shared.syntaxerr);
+ addReplyErrorObject(c,shared.syntaxerr);
return;
}
@@ -2747,23 +2854,90 @@ NULL
} else {
addReplyLongLong(c,-1);
}
+ } else if (!strcasecmp(c->argv[1]->ptr,"trackinginfo") && c->argc == 2) {
+ addReplyMapLen(c,3);
+
+ /* Flags */
+ addReplyBulkCString(c,"flags");
+ void *arraylen_ptr = addReplyDeferredLen(c);
+ int numflags = 0;
+ addReplyBulkCString(c,c->flags & CLIENT_TRACKING ? "on" : "off");
+ numflags++;
+ if (c->flags & CLIENT_TRACKING_BCAST) {
+ addReplyBulkCString(c,"bcast");
+ numflags++;
+ }
+ if (c->flags & CLIENT_TRACKING_OPTIN) {
+ addReplyBulkCString(c,"optin");
+ numflags++;
+ if (c->flags & CLIENT_TRACKING_CACHING) {
+ addReplyBulkCString(c,"caching-yes");
+ numflags++;
+ }
+ }
+ if (c->flags & CLIENT_TRACKING_OPTOUT) {
+ addReplyBulkCString(c,"optout");
+ numflags++;
+ if (c->flags & CLIENT_TRACKING_CACHING) {
+ addReplyBulkCString(c,"caching-no");
+ numflags++;
+ }
+ }
+ if (c->flags & CLIENT_TRACKING_NOLOOP) {
+ addReplyBulkCString(c,"noloop");
+ numflags++;
+ }
+ if (c->flags & CLIENT_TRACKING_BROKEN_REDIR) {
+ addReplyBulkCString(c,"broken_redirect");
+ numflags++;
+ }
+ setDeferredSetLen(c,arraylen_ptr,numflags);
+
+ /* Redirect */
+ addReplyBulkCString(c,"redirect");
+ if (c->flags & CLIENT_TRACKING) {
+ addReplyLongLong(c,c->client_tracking_redirection);
+ } else {
+ addReplyLongLong(c,-1);
+ }
+
+ /* Prefixes */
+ addReplyBulkCString(c,"prefixes");
+ if (c->client_tracking_prefixes) {
+ addReplyArrayLen(c,raxSize(c->client_tracking_prefixes));
+ raxIterator ri;
+ raxStart(&ri,c->client_tracking_prefixes);
+ raxSeek(&ri,"^",NULL,0);
+ while(raxNext(&ri)) {
+ addReplyBulkCBuffer(c,ri.key,ri.key_len);
+ }
+ raxStop(&ri);
+ } else {
+ addReplyArrayLen(c,0);
+ }
} else {
- addReplyErrorFormat(c, "Unknown subcommand or wrong number of arguments for '%s'. Try CLIENT HELP", (char*)c->argv[1]->ptr);
+ addReplySubcommandSyntaxError(c);
}
}
-/* HELLO <protocol-version> [AUTH <user> <password>] [SETNAME <name>] */
+/* HELLO [<protocol-version> [AUTH <user> <password>] [SETNAME <name>] ] */
void helloCommand(client *c) {
- long long ver;
+ long long ver = 0;
+ int next_arg = 1;
- if (getLongLongFromObject(c->argv[1],&ver) != C_OK ||
- ver < 2 || ver > 3)
- {
- addReplyError(c,"-NOPROTO unsupported protocol version");
- return;
+ if (c->argc >= 2) {
+ if (getLongLongFromObjectOrReply(c, c->argv[next_arg++], &ver,
+ "Protocol version is not an integer or out of range") != C_OK) {
+ return;
+ }
+
+ if (ver < 2 || ver > 3) {
+ addReplyError(c,"-NOPROTO unsupported protocol version");
+ return;
+ }
}
- for (int j = 2; j < c->argc; j++) {
+ for (int j = next_arg; j < c->argc; j++) {
int moreargs = (c->argc-1) - j;
const char *opt = c->argv[j]->ptr;
if (!strcasecmp(opt,"AUTH") && moreargs >= 2) {
@@ -2791,7 +2965,7 @@ void helloCommand(client *c) {
}
/* Let's switch to the specified RESP mode. */
- c->resp = ver;
+ if (ver) c->resp = ver;
addReplyMapLen(c,6 + !server.sentinel_mode);
addReplyBulkCString(c,"server");
@@ -2801,7 +2975,7 @@ void helloCommand(client *c) {
addReplyBulkCString(c,REDIS_VERSION);
addReplyBulkCString(c,"proto");
- addReplyLongLong(c,ver);
+ addReplyLongLong(c,c->resp);
addReplyBulkCString(c,"id");
addReplyLongLong(c,c->id);
@@ -3074,54 +3248,66 @@ void flushSlavesOutputBuffers(void) {
}
}
-/* Pause clients up to the specified unixtime (in ms). While clients
- * are paused no command is processed from clients, so the data set can't
- * change during that time.
- *
- * However while this function pauses normal and Pub/Sub clients, slaves are
- * still served, so this function can be used on server upgrades where it is
- * required that slaves process the latest bytes from the replication stream
- * before being turned to masters.
+/* Pause clients up to the specified unixtime (in ms) for a given type of
+ * commands.
*
+ * A main use case of this function is to allow pausing replication traffic
+ * so that a failover without data loss to occur. Replicas will continue to receive
+ * traffic to faciliate this functionality.
+ *
* This function is also internally used by Redis Cluster for the manual
* failover procedure implemented by CLUSTER FAILOVER.
*
* The function always succeed, even if there is already a pause in progress.
- * In such a case, the pause is extended if the duration is more than the
- * time left for the previous duration. However if the duration is smaller
- * than the time left for the previous pause, no change is made to the
- * left duration. */
-void pauseClients(mstime_t end) {
- if (!server.clients_paused || end > server.clients_pause_end_time)
- server.clients_pause_end_time = end;
- server.clients_paused = 1;
-}
-
-/* Return non-zero if clients are currently paused. As a side effect the
- * function checks if the pause time was reached and clear it. */
-int clientsArePaused(void) {
- if (server.clients_paused &&
- server.clients_pause_end_time < server.mstime)
- {
- listNode *ln;
- listIter li;
- client *c;
+ * In such a case, the duration is set to the maximum and new end time and the
+ * type is set to the more restrictive type of pause. */
+void pauseClients(mstime_t end, pause_type type) {
+ if (type > server.client_pause_type) {
+ server.client_pause_type = type;
+ }
- server.clients_paused = 0;
+ if (end > server.client_pause_end_time) {
+ server.client_pause_end_time = end;
+ }
- /* Put all the clients in the unblocked clients queue in order to
- * force the re-processing of the input buffer if any. */
- listRewind(server.clients,&li);
- while ((ln = listNext(&li)) != NULL) {
- c = listNodeValue(ln);
+ /* We allow write commands that were queued
+ * up before and after to execute. We need
+ * to track this state so that we don't assert
+ * in propagate(). */
+ if (server.in_exec) {
+ server.client_pause_in_transaction = 1;
+ }
+}
- /* Don't touch slaves and blocked clients.
- * The latter pending requests will be processed when unblocked. */
- if (c->flags & (CLIENT_SLAVE|CLIENT_BLOCKED)) continue;
- queueClientForReprocessing(c);
- }
+/* Unpause clients and queue them for reprocessing. */
+void unpauseClients(void) {
+ listNode *ln;
+ listIter li;
+ client *c;
+
+ server.client_pause_type = CLIENT_PAUSE_OFF;
+
+ /* Unblock all of the clients so they are reprocessed. */
+ listRewind(server.paused_clients,&li);
+ while ((ln = listNext(&li)) != NULL) {
+ c = listNodeValue(ln);
+ unblockClient(c);
}
- return server.clients_paused;
+}
+
+/* Returns true if clients are paused and false otherwise. */
+int areClientsPaused(void) {
+ return server.client_pause_type != CLIENT_PAUSE_OFF;
+}
+
+/* Checks if the current client pause has elapsed and unpause clients
+ * if it has. Also returns true if clients are now paused and false
+ * otherwise. */
+int checkClientPauseTimeoutAndReturnIfPaused(void) {
+ if (server.client_pause_end_time < server.mstime) {
+ unpauseClients();
+ }
+ return areClientsPaused();
}
/* This function is called by Redis in order to process a few events from
@@ -3499,16 +3685,20 @@ int handleClientsWithPendingReadsUsingThreads(void) {
c->flags &= ~CLIENT_PENDING_READ;
listDelNode(server.clients_pending_read,ln);
- if (c->flags & CLIENT_PENDING_COMMAND) {
- c->flags &= ~CLIENT_PENDING_COMMAND;
- if (processCommandAndResetClient(c) == C_ERR) {
- /* If the client is no longer valid, we avoid
- * processing the client later. So we just go
- * to the next. */
- continue;
- }
+ if (processPendingCommandsAndResetClient(c) == C_ERR) {
+ /* If the client is no longer valid, we avoid
+ * processing the client later. So we just go
+ * to the next. */
+ continue;
}
+
processInputBuffer(c);
+
+ /* We may have pending replies if a thread readQueryFromClient() produced
+ * replies and did not install a write handler (it can't).
+ */
+ if (!(c->flags & CLIENT_PENDING_WRITE) && clientHasPendingReplies(c))
+ clientInstallWriteHandler(c);
}
/* Update processed count on server */
diff --git a/src/object.c b/src/object.c
index 71eceb6d6..8573ef0b5 100644
--- a/src/object.c
+++ b/src/object.c
@@ -404,7 +404,7 @@ robj *resetRefCount(robj *obj) {
int checkType(client *c, robj *o, int type) {
/* A NULL is considered an empty key */
if (o && o->type != type) {
- addReply(c,shared.wrongtypeerr);
+ addReplyErrorObject(c,shared.wrongtypeerr);
return 1;
}
return 0;
@@ -1035,7 +1035,7 @@ struct redisMemOverhead *getMemoryOverheadData(void) {
mem += dictSize(server.repl_scriptcache_dict) * sizeof(dictEntry) +
dictSlots(server.repl_scriptcache_dict) * sizeof(dictEntry*);
if (listLength(server.repl_scriptcache_fifo) > 0) {
- mem += listLength(server.repl_scriptcache_fifo) * (sizeof(listNode) +
+ mem += listLength(server.repl_scriptcache_fifo) * (sizeof(listNode) +
sdsZmallocSize(listNodeValue(listFirst(server.repl_scriptcache_fifo))));
}
mh->lua_caches = mem;
@@ -1256,10 +1256,18 @@ void objectCommand(client *c) {
if (c->argc == 2 && !strcasecmp(c->argv[1]->ptr,"help")) {
const char *help[] = {
-"ENCODING <key> -- Return the kind of internal representation used in order to store the value associated with a key.",
-"FREQ <key> -- Return the access frequency index of the key. The returned integer is proportional to the logarithm of the recent access frequency of the key.",
-"IDLETIME <key> -- Return the idle time of the key, that is the approximated number of seconds elapsed since the last access to the key.",
-"REFCOUNT <key> -- Return the number of references of the value associated with the specified key.",
+"ENCODING <key>",
+" Return the kind of internal representation used in order to store the value",
+" associated with a <key>.",
+"FREQ <key>",
+" Return the access frequency index of the <key>. The returned integer is",
+" proportional to the logarithm of the recent access frequency of the key.",
+"IDLETIME <key>",
+" Return the idle time of the <key>, that is the approximated number of",
+" seconds elapsed since the last access to the key.",
+"REFCOUNT <key>",
+" Return the number of references of the value associated with the specified",
+" <key>.",
NULL
};
addReplyHelp(c, help);
@@ -1303,11 +1311,17 @@ NULL
void memoryCommand(client *c) {
if (!strcasecmp(c->argv[1]->ptr,"help") && c->argc == 2) {
const char *help[] = {
-"DOCTOR - Return memory problems reports.",
-"MALLOC-STATS -- Return internal statistics report from the memory allocator.",
-"PURGE -- Attempt to purge dirty pages for reclamation by the allocator.",
-"STATS -- Return information about the memory usage of the server.",
-"USAGE <key> [SAMPLES <count>] -- Return memory in bytes used by <key> and its value. Nested values are sampled up to <count> times (default: 5).",
+"DOCTOR",
+" Return memory problems reports.",
+"MALLOC-STATS"
+" Return internal statistics report from the memory allocator.",
+"PURGE",
+" Attempt to purge dirty pages for reclamation by the allocator.",
+"STATS",
+" Return information about the memory usage of the server.",
+"USAGE <key> [SAMPLES <count>]",
+" Return memory in bytes used by <key> and its value. Nested values are",
+" sampled up to <count> times (default: 5).",
NULL
};
addReplyHelp(c, help);
@@ -1321,13 +1335,13 @@ NULL
if (getLongLongFromObjectOrReply(c,c->argv[j+1],&samples,NULL)
== C_ERR) return;
if (samples < 0) {
- addReply(c,shared.syntaxerr);
+ addReplyErrorObject(c,shared.syntaxerr);
return;
}
if (samples == 0) samples = LLONG_MAX;
j++; /* skip option argument. */
} else {
- addReply(c,shared.syntaxerr);
+ addReplyErrorObject(c,shared.syntaxerr);
return;
}
}
@@ -1452,6 +1466,6 @@ NULL
else
addReplyError(c, "Error purging dirty pages");
} else {
- addReplyErrorFormat(c, "Unknown subcommand or wrong number of arguments for '%s'. Try MEMORY HELP", (char*)c->argv[1]->ptr);
+ addReplySubcommandSyntaxError(c);
}
}
diff --git a/src/pubsub.c b/src/pubsub.c
index 9e41464bf..7355e10b9 100644
--- a/src/pubsub.c
+++ b/src/pubsub.c
@@ -455,9 +455,13 @@ void publishCommand(client *c) {
void pubsubCommand(client *c) {
if (c->argc == 2 && !strcasecmp(c->argv[1]->ptr,"help")) {
const char *help[] = {
-"CHANNELS [<pattern>] -- Return the currently active channels matching a pattern (default: all).",
-"NUMPAT -- Return number of subscriptions to patterns.",
-"NUMSUB [channel-1 .. channel-N] -- Returns the number of subscribers for the specified channels (excluding patterns, default: none).",
+"CHANNELS [<pattern>]",
+" Return the currently active channels matching a <pattern> (default: '*').",
+"NUMPAT",
+" Return number of subscriptions to patterns.",
+"NUMSUB [<channel> ...]",
+" Return the number of subscribers for the specified channels, excluding",
+" pattern subscriptions(default: no channels).",
NULL
};
addReplyHelp(c, help);
diff --git a/src/quicklist.c b/src/quicklist.c
index 5ed9b6a5d..32871ac12 100644
--- a/src/quicklist.c
+++ b/src/quicklist.c
@@ -66,10 +66,10 @@ static const size_t optimization_level[] = {4096, 8192, 16384, 32768, 65536};
#else
#define D(...) \
do { \
- printf("%s:%s:%d:\t", __FILE__, __FUNCTION__, __LINE__); \
+ printf("%s:%s:%d:\t", __FILE__, __func__, __LINE__); \
printf(__VA_ARGS__); \
printf("\n"); \
- } while (0);
+ } while (0)
#endif
/* Bookmarks forward declarations */
@@ -1508,15 +1508,6 @@ void quicklistBookmarksClear(quicklist *ql) {
#include <stdint.h>
#include <sys/time.h>
-#define assert(_e) \
- do { \
- if (!(_e)) { \
- printf("\n\n=== ASSERTION FAILED ===\n"); \
- printf("==> %s:%d '%s' is not true\n", __FILE__, __LINE__, #_e); \
- err++; \
- } \
- } while (0)
-
#define yell(str, ...) printf("ERROR! " str "\n\n", __VA_ARGS__)
#define OK printf("\tOK\n")
@@ -1529,7 +1520,7 @@ void quicklistBookmarksClear(quicklist *ql) {
#define ERR(x, ...) \
do { \
- printf("%s:%s:%d:\t", __FILE__, __FUNCTION__, __LINE__); \
+ printf("%s:%s:%d:\t", __FILE__, __func__, __LINE__); \
printf("ERROR! " x "\n", __VA_ARGS__); \
err++; \
} while (0)
@@ -1614,7 +1605,7 @@ static int _ql_verify(quicklist *ql, uint32_t len, uint32_t count,
ql_info(ql);
if (len != ql->len) {
- yell("quicklist length wrong: expected %d, got %u", len, ql->len);
+ yell("quicklist length wrong: expected %d, got %lu", len, ql->len);
errors++;
}
@@ -1670,7 +1661,7 @@ static int _ql_verify(quicklist *ql, uint32_t len, uint32_t count,
if (node->encoding != QUICKLIST_NODE_ENCODING_RAW) {
yell("Incorrect compression: node %d is "
"compressed at depth %d ((%u, %u); total "
- "nodes: %u; size: %u; recompress: %d)",
+ "nodes: %lu; size: %u; recompress: %d)",
at, ql->compress, low_raw, high_raw, ql->len, node->sz,
node->recompress);
errors++;
@@ -1680,7 +1671,7 @@ static int _ql_verify(quicklist *ql, uint32_t len, uint32_t count,
!node->attempted_compress) {
yell("Incorrect non-compression: node %d is NOT "
"compressed at depth %d ((%u, %u); total "
- "nodes: %u; size: %u; recompress: %d; attempted: %d)",
+ "nodes: %lu; size: %u; recompress: %d; attempted: %d)",
at, ql->compress, low_raw, high_raw, ql->len, node->sz,
node->recompress, node->attempted_compress);
errors++;
@@ -2706,7 +2697,7 @@ int quicklistTest(int argc, char *argv[]) {
if (node->encoding != QUICKLIST_NODE_ENCODING_RAW) {
ERR("Incorrect compression: node %d is "
"compressed at depth %d ((%u, %u); total "
- "nodes: %u; size: %u)",
+ "nodes: %lu; size: %u)",
at, depth, low_raw, high_raw, ql->len,
node->sz);
}
@@ -2714,7 +2705,7 @@ int quicklistTest(int argc, char *argv[]) {
if (node->encoding != QUICKLIST_NODE_ENCODING_LZF) {
ERR("Incorrect non-compression: node %d is NOT "
"compressed at depth %d ((%u, %u); total "
- "nodes: %u; size: %u; attempted: %d)",
+ "nodes: %lu; size: %u; attempted: %d)",
at, depth, low_raw, high_raw, ql->len,
node->sz, node->attempted_compress);
}
diff --git a/src/rax.c b/src/rax.c
index 89345fe26..0826b974a 100644
--- a/src/rax.c
+++ b/src/rax.c
@@ -61,7 +61,7 @@ void raxDebugShowNode(const char *msg, raxNode *n);
#ifdef RAX_DEBUG_MSG
#define debugf(...) \
if (raxDebugMsg) { \
- printf("%s:%s:%d:\t", __FILE__, __FUNCTION__, __LINE__); \
+ printf("%s:%s:%d:\t", __FILE__, __func__, __LINE__); \
printf(__VA_ARGS__); \
fflush(stdout); \
}
@@ -1892,7 +1892,7 @@ void raxShow(rax *rax) {
/* Used by debugnode() macro to show info about a given node. */
void raxDebugShowNode(const char *msg, raxNode *n) {
if (raxDebugMsg == 0) return;
- printf("%s: %p [%.*s] key:%d size:%d children:",
+ printf("%s: %p [%.*s] key:%u size:%u children:",
msg, (void*)n, (int)n->size, (char*)n->data, n->iskey, n->size);
int numcld = n->iscompr ? 1 : n->size;
raxNode **cldptr = raxNodeLastChildPtr(n) - (numcld-1);
diff --git a/src/rdb.c b/src/rdb.c
index 58e7d2cff..7deed2a2d 100644
--- a/src/rdb.c
+++ b/src/rdb.c
@@ -94,7 +94,7 @@ void rdbReportError(int corruption_error, int linenum, char *reason, ...) {
exit(1);
}
-static int rdbWriteRaw(rio *rdb, void *p, size_t len) {
+static ssize_t rdbWriteRaw(rio *rdb, void *p, size_t len) {
if (rdb && rioWrite(rdb,p,len) == 0)
return -1;
return len;
@@ -1219,9 +1219,11 @@ int rdbSaveRio(rio *rdb, int *error, int rdbflags, rdbSaveInfo *rsi) {
dictIterator *di = NULL;
dictEntry *de;
char magic[10];
- int j;
uint64_t cksum;
size_t processed = 0;
+ int j;
+ long key_count = 0;
+ long long cow_updated_time = 0;
if (server.rdb_checksum)
rdb->update_cksum = rioGenericUpdateChecksum;
@@ -1267,6 +1269,23 @@ int rdbSaveRio(rio *rdb, int *error, int rdbflags, rdbSaveInfo *rsi) {
processed = rdb->processed_bytes;
aofReadDiffFromParent();
}
+
+ /* Update COW info every 1 second (approximately).
+ * in order to avoid calling mstime() on each iteration, we will
+ * check the diff every 1024 keys */
+ if ((key_count & 1023) == 0) {
+ key_count = 0;
+ long long now = mstime();
+ if (now - cow_updated_time >= 1000) {
+ if (rdbflags & RDBFLAGS_AOF_PREAMBLE) {
+ sendChildCOWInfo(CHILD_TYPE_AOF, 0, "AOF rewrite");
+ } else {
+ sendChildCOWInfo(CHILD_TYPE_RDB, 0, "RDB");
+ }
+ cow_updated_time = now;
+ }
+ }
+ key_count++;
}
dictReleaseIterator(di);
di = NULL; /* So that we don't release it again on error. */
@@ -1410,7 +1429,6 @@ int rdbSaveBackground(char *filename, rdbSaveInfo *rsi) {
server.dirty_before_bgsave = server.dirty;
server.lastbgsave_try = time(NULL);
- openChildInfoPipe();
if ((childpid = redisFork(CHILD_TYPE_RDB)) == 0) {
int retval;
@@ -1420,13 +1438,12 @@ int rdbSaveBackground(char *filename, rdbSaveInfo *rsi) {
redisSetCpuAffinity(server.bgsave_cpulist);
retval = rdbSave(filename,rsi);
if (retval == C_OK) {
- sendChildCOWInfo(CHILD_TYPE_RDB, "RDB");
+ sendChildCOWInfo(CHILD_TYPE_RDB, 1, "RDB");
}
exitFromChild((retval == C_OK) ? 0 : 1);
} else {
/* Parent */
if (childpid == -1) {
- closeChildInfoPipe();
server.lastbgsave_status = C_ERR;
serverLog(LL_WARNING,"Can't save in background: fork: %s",
strerror(errno));
@@ -1434,9 +1451,7 @@ int rdbSaveBackground(char *filename, rdbSaveInfo *rsi) {
}
serverLog(LL_NOTICE,"Background saving started by pid %ld",(long) childpid);
server.rdb_save_time_start = time(NULL);
- server.rdb_child_pid = childpid;
server.rdb_child_type = RDB_CHILD_TYPE_DISK;
- updateDictResizePolicy();
return C_OK;
}
return C_OK; /* unreached */
@@ -2654,7 +2669,7 @@ static void backgroundSaveDoneHandlerDisk(int exitcode, int bysignal) {
serverLog(LL_WARNING,
"Background saving terminated by signal %d", bysignal);
latencyStartMonitor(latency);
- rdbRemoveTempFile(server.rdb_child_pid, 0);
+ rdbRemoveTempFile(server.child_pid, 0);
latencyEndMonitor(latency);
latencyAddSampleIfNeeded("rdb-unlink-temp-file",latency);
/* SIGUSR1 is whitelisted, so we have a way to kill a child without
@@ -2706,7 +2721,6 @@ void backgroundSaveDoneHandler(int exitcode, int bysignal) {
break;
}
- server.rdb_child_pid = -1;
server.rdb_child_type = RDB_CHILD_TYPE_NONE;
server.rdb_save_time_last = time(NULL)-server.rdb_save_time_start;
server.rdb_save_time_start = -1;
@@ -2719,10 +2733,13 @@ void backgroundSaveDoneHandler(int exitcode, int bysignal) {
* the child did not exit for an error, but because we wanted), and performs
* the cleanup needed. */
void killRDBChild(void) {
- kill(server.rdb_child_pid,SIGUSR1);
- rdbRemoveTempFile(server.rdb_child_pid, 0);
- closeChildInfoPipe();
- updateDictResizePolicy();
+ kill(server.child_pid, SIGUSR1);
+ /* Because we are not using here wait4 (like we have in killAppendOnlyChild
+ * and TerminateModuleForkChild), all the cleanup operations is done by
+ * checkChildrenDone, that later will find that the process killed.
+ * This includes:
+ * - resetChildState
+ * - rdbRemoveTempFile */
}
/* Spawn an RDB child that writes the RDB to the sockets of the slaves
@@ -2773,7 +2790,6 @@ int rdbSaveToSlavesSockets(rdbSaveInfo *rsi) {
}
/* Create the child process. */
- openChildInfoPipe();
if ((childpid = redisFork(CHILD_TYPE_RDB)) == 0) {
/* Child */
int retval, dummy;
@@ -2789,7 +2805,7 @@ int rdbSaveToSlavesSockets(rdbSaveInfo *rsi) {
retval = C_ERR;
if (retval == C_OK) {
- sendChildCOWInfo(CHILD_TYPE_RDB, "RDB");
+ sendChildCOWInfo(CHILD_TYPE_RDB, 1, "RDB");
}
rioFreeFd(&rdb);
@@ -2824,14 +2840,11 @@ int rdbSaveToSlavesSockets(rdbSaveInfo *rsi) {
server.rdb_pipe_conns = NULL;
server.rdb_pipe_numconns = 0;
server.rdb_pipe_numconns_writing = 0;
- closeChildInfoPipe();
} else {
serverLog(LL_NOTICE,"Background RDB transfer started by pid %ld",
(long) childpid);
server.rdb_save_time_start = time(NULL);
- server.rdb_child_pid = childpid;
server.rdb_child_type = RDB_CHILD_TYPE_SOCKET;
- updateDictResizePolicy();
close(rdb_pipe_write); /* close write in parent so that it can detect the close on the child. */
if (aeCreateFileEvent(server.el, server.rdb_pipe_read, AE_READABLE, rdbPipeReadHandler,NULL) == AE_ERR) {
serverPanic("Unrecoverable error creating server.rdb_pipe_read file event.");
@@ -2843,7 +2856,7 @@ int rdbSaveToSlavesSockets(rdbSaveInfo *rsi) {
}
void saveCommand(client *c) {
- if (server.rdb_child_pid != -1) {
+ if (server.child_type == CHILD_TYPE_RDB) {
addReplyError(c,"Background save already in progress");
return;
}
@@ -2852,7 +2865,7 @@ void saveCommand(client *c) {
if (rdbSave(server.rdb_filename,rsiptr) == C_OK) {
addReply(c,shared.ok);
} else {
- addReply(c,shared.err);
+ addReplyErrorObject(c,shared.err);
}
}
@@ -2866,7 +2879,7 @@ void bgsaveCommand(client *c) {
if (c->argc == 2 && !strcasecmp(c->argv[1]->ptr,"schedule")) {
schedule = 1;
} else {
- addReply(c,shared.syntaxerr);
+ addReplyErrorObject(c,shared.syntaxerr);
return;
}
}
@@ -2874,7 +2887,7 @@ void bgsaveCommand(client *c) {
rdbSaveInfo rsi, *rsiptr;
rsiptr = rdbPopulateSaveInfo(&rsi);
- if (server.rdb_child_pid != -1) {
+ if (server.child_type == CHILD_TYPE_RDB) {
addReplyError(c,"Background save already in progress");
} else if (hasActiveChildProcess()) {
if (schedule) {
@@ -2889,7 +2902,7 @@ void bgsaveCommand(client *c) {
} else if (rdbSaveBackground(server.rdb_filename,rsiptr) == C_OK) {
addReplyStatus(c,"Background saving started");
} else {
- addReply(c,shared.err);
+ addReplyErrorObject(c,shared.err);
}
}
diff --git a/src/redis-benchmark.c b/src/redis-benchmark.c
index 4efed4b12..a955c0d4c 100644
--- a/src/redis-benchmark.c
+++ b/src/redis-benchmark.c
@@ -59,6 +59,7 @@
#include "crc16_slottable.h"
#include "hdr_histogram.h"
#include "cli_common.h"
+#include "mt19937-64.h"
#define UNUSED(V) ((void) V)
#define RANDPTR_INITIAL_SIZE 8
@@ -1182,8 +1183,8 @@ static int fetchClusterConfiguration() {
}
if (myself) {
node = firstNode;
- if (node->ip == NULL && ip != NULL) {
- node->ip = ip;
+ if (ip != NULL && strcmp(node->ip, ip) != 0) {
+ node->ip = sdsnew(ip);
node->port = port;
}
} else {
@@ -1677,6 +1678,7 @@ int main(int argc, const char **argv) {
client c;
srandom(time(NULL) ^ getpid());
+ init_genrand64(ustime() ^ getpid());
signal(SIGHUP, SIG_IGN);
signal(SIGPIPE, SIG_IGN);
diff --git a/src/redis-check-rdb.c b/src/redis-check-rdb.c
index 79dbf3229..335e35189 100644
--- a/src/redis-check-rdb.c
+++ b/src/redis-check-rdb.c
@@ -27,10 +27,13 @@
* POSSIBILITY OF SUCH DAMAGE.
*/
+#include "mt19937-64.h"
#include "server.h"
#include "rdb.h"
#include <stdarg.h>
+#include <sys/time.h>
+#include <unistd.h>
void createSharedObjects(void);
void rdbLoadProgressCallback(rio *r, const void *buf, size_t len);
@@ -362,10 +365,16 @@ err:
* Otherwise if called with a non NULL fp, the function returns C_OK or
* C_ERR depending on the success or failure. */
int redis_check_rdb_main(int argc, char **argv, FILE *fp) {
+ struct timeval tv;
+
if (argc != 2 && fp == NULL) {
fprintf(stderr, "Usage: %s <rdb-file-name>\n", argv[0]);
exit(1);
}
+
+ gettimeofday(&tv, NULL);
+ init_genrand64(((long long) tv.tv_sec * 1000000 + tv.tv_usec) ^ getpid());
+
/* In order to call the loading functions we need to create the shared
* integer objects, however since this function may be called from
* an already initialized Redis instance, check if we really need to. */
diff --git a/src/redis-cli.c b/src/redis-cli.c
index 4f5b0fe03..31d2360c9 100644
--- a/src/redis-cli.c
+++ b/src/redis-cli.c
@@ -62,6 +62,7 @@
#include "anet.h"
#include "ae.h"
#include "cli_common.h"
+#include "mt19937-64.h"
#define UNUSED(V) ((void) V)
@@ -5578,7 +5579,7 @@ static int clusterManagerCommandCreate(int argc, char **argv) {
if (last > CLUSTER_MANAGER_SLOTS || i == (masters_count - 1))
last = CLUSTER_MANAGER_SLOTS - 1;
if (last < first) last = first;
- printf("Master[%d] -> Slots %lu - %lu\n", i, first, last);
+ printf("Master[%d] -> Slots %ld - %ld\n", i, first, last);
master->slots_count = 0;
for (j = first; j <= last; j++) {
master->slots[j] = 1;
@@ -7131,7 +7132,9 @@ static void getRDB(clusterManagerNode *node) {
} else {
fprintf(stderr,"Transfer finished with success.\n");
}
- redisFree(s); /* Close the file descriptor ASAP as fsync() may take time. */
+ redisFree(s); /* Close the connection ASAP as fsync() may take time. */
+ if (node)
+ node->context = NULL;
fsync(fd);
close(fd);
fprintf(stderr,"Transfer finished with success.\n");
@@ -8123,6 +8126,7 @@ static sds askPassword(const char *msg) {
int main(int argc, char **argv) {
int firstarg;
+ struct timeval tv;
config.hostip = sdsnew("127.0.0.1");
config.hostport = 6379;
@@ -8219,6 +8223,9 @@ int main(int argc, char **argv) {
}
#endif
+ gettimeofday(&tv, NULL);
+ init_genrand64(((long long) tv.tv_sec * 1000000 + tv.tv_usec) ^ getpid());
+
/* Cluster Manager mode */
if (CLUSTER_MANAGER_MODE()) {
clusterManagerCommandProc *proc = validateClusterManagerCommand();
diff --git a/src/redismodule.h b/src/redismodule.h
index d2afa1f21..36c566bb3 100644
--- a/src/redismodule.h
+++ b/src/redismodule.h
@@ -778,6 +778,7 @@ REDISMODULE_API int (*RedisModule_CommandFilterArgInsert)(RedisModuleCommandFilt
REDISMODULE_API int (*RedisModule_CommandFilterArgReplace)(RedisModuleCommandFilterCtx *fctx, int pos, RedisModuleString *arg) REDISMODULE_ATTR;
REDISMODULE_API int (*RedisModule_CommandFilterArgDelete)(RedisModuleCommandFilterCtx *fctx, int pos) REDISMODULE_ATTR;
REDISMODULE_API int (*RedisModule_Fork)(RedisModuleForkDoneHandler cb, void *user_data) REDISMODULE_ATTR;
+REDISMODULE_API void (*RedisModule_SendChildCOWInfo)(void) REDISMODULE_ATTR;
REDISMODULE_API int (*RedisModule_ExitFromChild)(int retcode) REDISMODULE_ATTR;
REDISMODULE_API int (*RedisModule_KillForkChild)(int child_pid) REDISMODULE_ATTR;
REDISMODULE_API float (*RedisModule_GetUsedMemoryRatio)() REDISMODULE_ATTR;
@@ -1033,6 +1034,7 @@ static int RedisModule_Init(RedisModuleCtx *ctx, const char *name, int ver, int
REDISMODULE_GET_API(CommandFilterArgReplace);
REDISMODULE_GET_API(CommandFilterArgDelete);
REDISMODULE_GET_API(Fork);
+ REDISMODULE_GET_API(SendChildCOWInfo);
REDISMODULE_GET_API(ExitFromChild);
REDISMODULE_GET_API(KillForkChild);
REDISMODULE_GET_API(GetUsedMemoryRatio);
diff --git a/src/replication.c b/src/replication.c
index 64aa41390..9fb19eaca 100644
--- a/src/replication.c
+++ b/src/replication.c
@@ -102,7 +102,7 @@ int bg_unlink(const char *filename) {
errno = old_errno;
return -1;
}
- bioCreateBackgroundJob(BIO_CLOSE_FILE,(void*)(long)fd,NULL,NULL);
+ bioCreateCloseJob(fd);
return 0; /* Success. */
}
}
@@ -715,7 +715,7 @@ void syncCommand(client *c) {
/* Refuse SYNC requests if we are a slave but the link with our master
* is not ok... */
if (server.masterhost && server.repl_state != REPL_STATE_CONNECTED) {
- addReplySds(c,sdsnew("-NOMASTERLINK Can't SYNC while not connected with my master\r\n"));
+ addReplyError(c,"-NOMASTERLINK Can't SYNC while not connected with my master");
return;
}
@@ -786,7 +786,7 @@ void syncCommand(client *c) {
}
/* CASE 1: BGSAVE is in progress, with disk target. */
- if (server.rdb_child_pid != -1 &&
+ if (server.child_type == CHILD_TYPE_RDB &&
server.rdb_child_type == RDB_CHILD_TYPE_DISK)
{
/* Ok a background save is in progress. Let's check if it is a good
@@ -816,7 +816,7 @@ void syncCommand(client *c) {
}
/* CASE 2: BGSAVE is in progress, with socket target. */
- } else if (server.rdb_child_pid != -1 &&
+ } else if (server.child_type == CHILD_TYPE_RDB &&
server.rdb_child_type == RDB_CHILD_TYPE_SOCKET)
{
/* There is an RDB child process but it is writing directly to
@@ -866,7 +866,7 @@ void replconfCommand(client *c) {
if ((c->argc % 2) == 0) {
/* Number of arguments must be odd to make sure that every
* option has a corresponding value. */
- addReply(c,shared.syntaxerr);
+ addReplyErrorObject(c,shared.syntaxerr);
return;
}
@@ -914,7 +914,7 @@ void replconfCommand(client *c) {
* There's a chance the ACK got to us before we detected that the
* bgsave is done (since that depends on cron ticks), so run a
* quick check first (instead of waiting for the next ACK. */
- if (server.rdb_child_pid != -1 && c->replstate == SLAVE_STATE_WAIT_BGSAVE_END)
+ if (server.child_type == CHILD_TYPE_RDB && c->replstate == SLAVE_STATE_WAIT_BGSAVE_END)
checkChildrenDone();
if (c->repl_put_online_on_ack && c->replstate == SLAVE_STATE_ONLINE)
putSlaveOnline(c);
@@ -1334,8 +1334,8 @@ void shiftReplicationId(void) {
/* Returns 1 if the given replication state is a handshake state,
* 0 otherwise. */
int slaveIsInHandshakeState(void) {
- return server.repl_state >= REPL_STATE_RECEIVE_PONG &&
- server.repl_state <= REPL_STATE_RECEIVE_PSYNC;
+ return server.repl_state >= REPL_STATE_RECEIVE_PING_REPLY &&
+ server.repl_state <= REPL_STATE_RECEIVE_PSYNC_REPLY;
}
/* Avoid the master to detect the slave is timing out while loading the
@@ -1690,7 +1690,6 @@ void readSyncBulkPayload(connection *conn) {
* gets promoted. */
return;
}
- stopLoading(1);
/* RDB loading succeeded if we reach this point. */
if (server.repl_diskless_load == REPL_DISKLESS_LOAD_SWAPDB) {
@@ -1705,6 +1704,7 @@ void readSyncBulkPayload(connection *conn) {
if (!rioRead(&rdb,buf,CONFIG_RUN_ID_SIZE) ||
memcmp(buf,eofmark,CONFIG_RUN_ID_SIZE) != 0)
{
+ stopLoading(0);
serverLog(LL_WARNING,"Replication stream EOF marker is broken");
cancelReplicationHandshake(1);
rioFreeConn(&rdb, NULL);
@@ -1712,6 +1712,8 @@ void readSyncBulkPayload(connection *conn) {
}
}
+ stopLoading(1);
+
/* Cleanup and restore the socket to the original state to continue
* with the normal replication. */
rioFreeConn(&rdb, NULL);
@@ -1719,13 +1721,13 @@ void readSyncBulkPayload(connection *conn) {
connRecvTimeout(conn,0);
} else {
/* Ensure background save doesn't overwrite synced data */
- if (server.rdb_child_pid != -1) {
+ if (server.child_type == CHILD_TYPE_RDB) {
serverLog(LL_NOTICE,
"Replica is about to load the RDB file received from the "
"master, but there is a pending RDB child running. "
"Killing process %ld and removing its temp file to avoid "
"any race",
- (long) server.rdb_child_pid);
+ (long) server.child_pid);
killRDBChild();
}
@@ -1752,7 +1754,7 @@ void readSyncBulkPayload(connection *conn) {
return;
}
/* Close old rdb asynchronously. */
- if (old_rdb_fd != -1) bioCreateBackgroundJob(BIO_CLOSE_FILE,(void*)(long)old_rdb_fd,NULL,NULL);
+ if (old_rdb_fd != -1) bioCreateCloseJob(old_rdb_fd);
if (rdbLoad(server.rdb_filename,&rsi,RDBFLAGS_REPLICATION) != C_OK) {
serverLog(LL_WARNING,
@@ -1827,66 +1829,94 @@ error:
return;
}
-/* Send a synchronous command to the master. Used to send AUTH and
- * REPLCONF commands before starting the replication with SYNC.
+char *receiveSynchronousResponse(connection *conn) {
+ char buf[256];
+ /* Read the reply from the server. */
+ if (connSyncReadLine(conn,buf,sizeof(buf),server.repl_syncio_timeout*1000) == -1)
+ {
+ return sdscatprintf(sdsempty(),"-Reading from master: %s",
+ strerror(errno));
+ }
+ server.repl_transfer_lastio = server.unixtime;
+ return sdsnew(buf);
+}
+
+/* Send a pre-formatted multi-bulk command to the connection. */
+char* sendCommandRaw(connection *conn, sds cmd) {
+ if (connSyncWrite(conn,cmd,sdslen(cmd),server.repl_syncio_timeout*1000) == -1) {
+ return sdscatprintf(sdsempty(),"-Writing to master: %s",
+ connGetLastError(conn));
+ }
+ return NULL;
+}
+
+/* Compose a multi-bulk command and send it to the connection.
+ * Used to send AUTH and REPLCONF commands to the master before starting the
+ * replication.
+ *
+ * Takes a list of char* arguments, terminated by a NULL argument.
*
* The command returns an sds string representing the result of the
* operation. On error the first byte is a "-".
*/
-#define SYNC_CMD_READ (1<<0)
-#define SYNC_CMD_WRITE (1<<1)
-#define SYNC_CMD_FULL (SYNC_CMD_READ|SYNC_CMD_WRITE)
-char *sendSynchronousCommand(int flags, connection *conn, ...) {
+char *sendCommand(connection *conn, ...) {
+ va_list ap;
+ sds cmd = sdsempty();
+ sds cmdargs = sdsempty();
+ size_t argslen = 0;
+ char *arg;
/* Create the command to send to the master, we use redis binary
* protocol to make sure correct arguments are sent. This function
* is not safe for all binary data. */
- if (flags & SYNC_CMD_WRITE) {
- char *arg;
- va_list ap;
- sds cmd = sdsempty();
- sds cmdargs = sdsempty();
- size_t argslen = 0;
- va_start(ap,conn);
-
- while(1) {
- arg = va_arg(ap, char*);
- if (arg == NULL) break;
-
- cmdargs = sdscatprintf(cmdargs,"$%zu\r\n%s\r\n",strlen(arg),arg);
- argslen++;
- }
-
- va_end(ap);
-
- cmd = sdscatprintf(cmd,"*%zu\r\n",argslen);
- cmd = sdscatsds(cmd,cmdargs);
- sdsfree(cmdargs);
-
- /* Transfer command to the server. */
- if (connSyncWrite(conn,cmd,sdslen(cmd),server.repl_syncio_timeout*1000)
- == -1)
- {
- sdsfree(cmd);
- return sdscatprintf(sdsempty(),"-Writing to master: %s",
- connGetLastError(conn));
- }
- sdsfree(cmd);
- }
+ va_start(ap,conn);
+ while(1) {
+ arg = va_arg(ap, char*);
+ if (arg == NULL) break;
+ cmdargs = sdscatprintf(cmdargs,"$%zu\r\n%s\r\n",strlen(arg),arg);
+ argslen++;
+ }
+
+ cmd = sdscatprintf(cmd,"*%zu\r\n",argslen);
+ cmd = sdscatsds(cmd,cmdargs);
+ sdsfree(cmdargs);
+
+ va_end(ap);
+ char* err = sendCommandRaw(conn, cmd);
+ sdsfree(cmd);
+ if(err)
+ return err;
+ return NULL;
+}
- /* Read the reply from the server. */
- if (flags & SYNC_CMD_READ) {
- char buf[256];
+/* Compose a multi-bulk command and send it to the connection.
+ * Used to send AUTH and REPLCONF commands to the master before starting the
+ * replication.
+ *
+ * argv_lens is optional, when NULL, strlen is used.
+ *
+ * The command returns an sds string representing the result of the
+ * operation. On error the first byte is a "-".
+ */
+char *sendCommandArgv(connection *conn, int argc, char **argv, size_t *argv_lens) {
+ sds cmd = sdsempty();
+ char *arg;
+ int i;
- if (connSyncReadLine(conn,buf,sizeof(buf),server.repl_syncio_timeout*1000)
- == -1)
- {
- return sdscatprintf(sdsempty(),"-Reading from master: %s",
- strerror(errno));
- }
- server.repl_transfer_lastio = server.unixtime;
- return sdsnew(buf);
- }
+ /* Create the command to send to the master. */
+ cmd = sdscatfmt(cmd,"*%i\r\n",argc);
+ for (i=0; i<argc; i++) {
+ int len;
+ arg = argv[i];
+ len = argv_lens ? argv_lens[i] : strlen(arg);
+ cmd = sdscatfmt(cmd,"$%i\r\n",len);
+ cmd = sdscatlen(cmd,arg,len);
+ cmd = sdscatlen(cmd,"\r\n",2);
+ }
+ char* err = sendCommandRaw(conn, cmd);
+ sdsfree(cmd);
+ if (err)
+ return err;
return NULL;
}
@@ -1969,7 +1999,7 @@ int slaveTryPartialResynchronization(connection *conn, int read_reply) {
}
/* Issue the PSYNC command */
- reply = sendSynchronousCommand(SYNC_CMD_WRITE,conn,"PSYNC",psync_replid,psync_offset,NULL);
+ reply = sendCommand(conn,"PSYNC",psync_replid,psync_offset,NULL);
if (reply != NULL) {
serverLog(LL_WARNING,"Unable to send PSYNC to master: %s",reply);
sdsfree(reply);
@@ -1980,7 +2010,7 @@ int slaveTryPartialResynchronization(connection *conn, int read_reply) {
}
/* Reading half */
- reply = sendSynchronousCommand(SYNC_CMD_READ,conn,NULL);
+ reply = receiveSynchronousResponse(conn);
if (sdslen(reply) == 0) {
/* The master may send empty newlines after it receives PSYNC
* and before to reply, just to keep the connection alive. */
@@ -2131,17 +2161,17 @@ void syncWithMaster(connection *conn) {
* registered and we can wait for the PONG reply. */
connSetReadHandler(conn, syncWithMaster);
connSetWriteHandler(conn, NULL);
- server.repl_state = REPL_STATE_RECEIVE_PONG;
+ server.repl_state = REPL_STATE_RECEIVE_PING_REPLY;
/* Send the PING, don't check for errors at all, we have the timeout
* that will take care about this. */
- err = sendSynchronousCommand(SYNC_CMD_WRITE,conn,"PING",NULL);
+ err = sendCommand(conn,"PING",NULL);
if (err) goto write_error;
return;
}
/* Receive the PONG command. */
- if (server.repl_state == REPL_STATE_RECEIVE_PONG) {
- err = sendSynchronousCommand(SYNC_CMD_READ,conn,NULL);
+ if (server.repl_state == REPL_STATE_RECEIVE_PING_REPLY) {
+ err = receiveSynchronousResponse(conn);
/* We accept only two replies as valid, a positive +PONG reply
* (we just check for "+") or an authentication error.
@@ -2161,59 +2191,88 @@ void syncWithMaster(connection *conn) {
"Master replied to PING, replication can continue...");
}
sdsfree(err);
- server.repl_state = REPL_STATE_SEND_AUTH;
- }
+ err = NULL;
+ server.repl_state = REPL_STATE_SEND_HANDSHAKE;
+ }
+
+ if (server.repl_state == REPL_STATE_SEND_HANDSHAKE) {
+ /* AUTH with the master if required. */
+ if (server.masterauth) {
+ char *args[3] = {"AUTH",NULL,NULL};
+ size_t lens[3] = {4,0,0};
+ int argc = 1;
+ if (server.masteruser) {
+ args[argc] = server.masteruser;
+ lens[argc] = strlen(server.masteruser);
+ argc++;
+ }
+ args[argc] = server.masterauth;
+ lens[argc] = sdslen(server.masterauth);
+ argc++;
+ err = sendCommandArgv(conn, argc, args, lens);
+ if (err) goto write_error;
+ }
- /* AUTH with the master if required. */
- if (server.repl_state == REPL_STATE_SEND_AUTH) {
- if (server.masteruser && server.masterauth) {
- err = sendSynchronousCommand(SYNC_CMD_WRITE,conn,"AUTH",
- server.masteruser,server.masterauth,NULL);
+ /* Set the slave port, so that Master's INFO command can list the
+ * slave listening port correctly. */
+ {
+ int port;
+ if (server.slave_announce_port)
+ port = server.slave_announce_port;
+ else if (server.tls_replication && server.tls_port)
+ port = server.tls_port;
+ else
+ port = server.port;
+ sds portstr = sdsfromlonglong(port);
+ err = sendCommand(conn,"REPLCONF",
+ "listening-port",portstr, NULL);
+ sdsfree(portstr);
if (err) goto write_error;
- server.repl_state = REPL_STATE_RECEIVE_AUTH;
- return;
- } else if (server.masterauth) {
- err = sendSynchronousCommand(SYNC_CMD_WRITE,conn,"AUTH",server.masterauth,NULL);
+ }
+
+ /* Set the slave ip, so that Master's INFO command can list the
+ * slave IP address port correctly in case of port forwarding or NAT.
+ * Skip REPLCONF ip-address if there is no slave-announce-ip option set. */
+ if (server.slave_announce_ip) {
+ err = sendCommand(conn,"REPLCONF",
+ "ip-address",server.slave_announce_ip, NULL);
if (err) goto write_error;
- server.repl_state = REPL_STATE_RECEIVE_AUTH;
- return;
- } else {
- server.repl_state = REPL_STATE_SEND_PORT;
}
+
+ /* Inform the master of our (slave) capabilities.
+ *
+ * EOF: supports EOF-style RDB transfer for diskless replication.
+ * PSYNC2: supports PSYNC v2, so understands +CONTINUE <new repl ID>.
+ *
+ * The master will ignore capabilities it does not understand. */
+ err = sendCommand(conn,"REPLCONF",
+ "capa","eof","capa","psync2",NULL);
+ if (err) goto write_error;
+
+ server.repl_state = REPL_STATE_RECEIVE_AUTH_REPLY;
+ return;
}
+ if (server.repl_state == REPL_STATE_RECEIVE_AUTH_REPLY && !server.masterauth)
+ server.repl_state = REPL_STATE_RECEIVE_PORT_REPLY;
+
/* Receive AUTH reply. */
- if (server.repl_state == REPL_STATE_RECEIVE_AUTH) {
- err = sendSynchronousCommand(SYNC_CMD_READ,conn,NULL);
+ if (server.repl_state == REPL_STATE_RECEIVE_AUTH_REPLY) {
+ err = receiveSynchronousResponse(conn);
if (err[0] == '-') {
serverLog(LL_WARNING,"Unable to AUTH to MASTER: %s",err);
sdsfree(err);
goto error;
}
sdsfree(err);
- server.repl_state = REPL_STATE_SEND_PORT;
- }
-
- /* Set the slave port, so that Master's INFO command can list the
- * slave listening port correctly. */
- if (server.repl_state == REPL_STATE_SEND_PORT) {
- int port;
- if (server.slave_announce_port) port = server.slave_announce_port;
- else if (server.tls_replication && server.tls_port) port = server.tls_port;
- else port = server.port;
- sds portstr = sdsfromlonglong(port);
- err = sendSynchronousCommand(SYNC_CMD_WRITE,conn,"REPLCONF",
- "listening-port",portstr, NULL);
- sdsfree(portstr);
- if (err) goto write_error;
- sdsfree(err);
- server.repl_state = REPL_STATE_RECEIVE_PORT;
+ err = NULL;
+ server.repl_state = REPL_STATE_RECEIVE_PORT_REPLY;
return;
}
/* Receive REPLCONF listening-port reply. */
- if (server.repl_state == REPL_STATE_RECEIVE_PORT) {
- err = sendSynchronousCommand(SYNC_CMD_READ,conn,NULL);
+ if (server.repl_state == REPL_STATE_RECEIVE_PORT_REPLY) {
+ err = receiveSynchronousResponse(conn);
/* Ignore the error if any, not all the Redis versions support
* REPLCONF listening-port. */
if (err[0] == '-') {
@@ -2221,30 +2280,16 @@ void syncWithMaster(connection *conn) {
"REPLCONF listening-port: %s", err);
}
sdsfree(err);
- server.repl_state = REPL_STATE_SEND_IP;
- }
-
- /* Skip REPLCONF ip-address if there is no slave-announce-ip option set. */
- if (server.repl_state == REPL_STATE_SEND_IP &&
- server.slave_announce_ip == NULL)
- {
- server.repl_state = REPL_STATE_SEND_CAPA;
- }
-
- /* Set the slave ip, so that Master's INFO command can list the
- * slave IP address port correctly in case of port forwarding or NAT. */
- if (server.repl_state == REPL_STATE_SEND_IP) {
- err = sendSynchronousCommand(SYNC_CMD_WRITE,conn,"REPLCONF",
- "ip-address",server.slave_announce_ip, NULL);
- if (err) goto write_error;
- sdsfree(err);
- server.repl_state = REPL_STATE_RECEIVE_IP;
+ server.repl_state = REPL_STATE_RECEIVE_IP_REPLY;
return;
}
+ if (server.repl_state == REPL_STATE_RECEIVE_IP_REPLY && !server.slave_announce_ip)
+ server.repl_state = REPL_STATE_RECEIVE_CAPA_REPLY;
+
/* Receive REPLCONF ip-address reply. */
- if (server.repl_state == REPL_STATE_RECEIVE_IP) {
- err = sendSynchronousCommand(SYNC_CMD_READ,conn,NULL);
+ if (server.repl_state == REPL_STATE_RECEIVE_IP_REPLY) {
+ err = receiveSynchronousResponse(conn);
/* Ignore the error if any, not all the Redis versions support
* REPLCONF listening-port. */
if (err[0] == '-') {
@@ -2252,27 +2297,13 @@ void syncWithMaster(connection *conn) {
"REPLCONF ip-address: %s", err);
}
sdsfree(err);
- server.repl_state = REPL_STATE_SEND_CAPA;
- }
-
- /* Inform the master of our (slave) capabilities.
- *
- * EOF: supports EOF-style RDB transfer for diskless replication.
- * PSYNC2: supports PSYNC v2, so understands +CONTINUE <new repl ID>.
- *
- * The master will ignore capabilities it does not understand. */
- if (server.repl_state == REPL_STATE_SEND_CAPA) {
- err = sendSynchronousCommand(SYNC_CMD_WRITE,conn,"REPLCONF",
- "capa","eof","capa","psync2",NULL);
- if (err) goto write_error;
- sdsfree(err);
- server.repl_state = REPL_STATE_RECEIVE_CAPA;
+ server.repl_state = REPL_STATE_RECEIVE_CAPA_REPLY;
return;
}
/* Receive CAPA reply. */
- if (server.repl_state == REPL_STATE_RECEIVE_CAPA) {
- err = sendSynchronousCommand(SYNC_CMD_READ,conn,NULL);
+ if (server.repl_state == REPL_STATE_RECEIVE_CAPA_REPLY) {
+ err = receiveSynchronousResponse(conn);
/* Ignore the error if any, not all the Redis versions support
* REPLCONF capa. */
if (err[0] == '-') {
@@ -2280,6 +2311,7 @@ void syncWithMaster(connection *conn) {
"REPLCONF capa: %s", err);
}
sdsfree(err);
+ err = NULL;
server.repl_state = REPL_STATE_SEND_PSYNC;
}
@@ -2293,12 +2325,12 @@ void syncWithMaster(connection *conn) {
err = sdsnew("Write error sending the PSYNC command.");
goto write_error;
}
- server.repl_state = REPL_STATE_RECEIVE_PSYNC;
+ server.repl_state = REPL_STATE_RECEIVE_PSYNC_REPLY;
return;
}
/* If reached this point, we should be in REPL_STATE_RECEIVE_PSYNC. */
- if (server.repl_state != REPL_STATE_RECEIVE_PSYNC) {
+ if (server.repl_state != REPL_STATE_RECEIVE_PSYNC_REPLY) {
serverLog(LL_WARNING,"syncWithMaster(): state machine error, "
"state should be RECEIVE_PSYNC but is %d",
server.repl_state);
@@ -2393,7 +2425,7 @@ error:
server.repl_state = REPL_STATE_CONNECT;
return;
-write_error: /* Handle sendSynchronousCommand(SYNC_CMD_WRITE) errors. */
+write_error: /* Handle sendCommand() errors. */
serverLog(LL_WARNING,"Sending command to master in replication handshake: %s", err);
sdsfree(err);
goto error;
@@ -3205,7 +3237,7 @@ void replicationCron(void) {
int manual_failover_in_progress =
server.cluster_enabled &&
server.cluster->mf_end &&
- clientsArePaused();
+ checkClientPauseTimeoutAndReturnIfPaused();
if (!manual_failover_in_progress) {
ping_argv[0] = createStringObject("PING",4);
diff --git a/src/scripting.c b/src/scripting.c
index 8dca84478..75604e4d8 100644
--- a/src/scripting.c
+++ b/src/scripting.c
@@ -366,10 +366,7 @@ void luaReplyToRedisReply(client *c, lua_State *lua) {
lua_gettable(lua,-2);
t = lua_type(lua,-1);
if (t == LUA_TSTRING) {
- sds err = sdsnew(lua_tostring(lua,-1));
- sdsmapchars(err,"\r\n"," ",2);
- addReplySds(c,sdscatprintf(sdsempty(),"-%s\r\n",err));
- sdsfree(err);
+ addReplyErrorFormat(c,"-%s",lua_tostring(lua,-1));
lua_pop(lua,2);
return;
}
@@ -410,9 +407,9 @@ void luaReplyToRedisReply(client *c, lua_State *lua) {
lua_pushnil(lua); /* Use nil to start iteration. */
while (lua_next(lua,-2)) {
/* Stack now: table, key, value */
- luaReplyToRedisReply(c, lua); /* Return value. */
- lua_pushvalue(lua,-1); /* Dup key before consuming. */
+ lua_pushvalue(lua,-2); /* Dup key before consuming. */
luaReplyToRedisReply(c, lua); /* Return key. */
+ luaReplyToRedisReply(c, lua); /* Return value. */
/* Stack now: table, key. */
maplen++;
}
@@ -694,11 +691,11 @@ int luaRedisGenericCommand(lua_State *lua, int raise_error) {
if (getNodeByQuery(c,c->cmd,c->argv,c->argc,NULL,&error_code) !=
server.cluster->myself)
{
- if (error_code == CLUSTER_REDIR_DOWN_RO_STATE) {
+ if (error_code == CLUSTER_REDIR_DOWN_RO_STATE) {
luaPushError(lua,
"Lua script attempted to execute a write command while the "
"cluster is down and readonly");
- } else if (error_code == CLUSTER_REDIR_DOWN_STATE) {
+ } else if (error_code == CLUSTER_REDIR_DOWN_STATE) {
luaPushError(lua,
"Lua script attempted to execute a command while the "
"cluster is down");
@@ -721,7 +718,7 @@ int luaRedisGenericCommand(lua_State *lua, int raise_error) {
server.lua_write_dirty &&
server.lua_repl != PROPAGATE_NONE)
{
- execCommandPropagateMulti(server.lua_caller);
+ execCommandPropagateMulti(server.lua_caller->db->id);
server.lua_multi_emitted = 1;
/* Now we are in the MULTI context, the lua_client should be
* flag as CLIENT_MULTI. */
@@ -1638,7 +1635,7 @@ void evalGenericCommand(client *c, int evalsha) {
if (server.lua_replicate_commands) {
preventCommandPropagation(c);
if (server.lua_multi_emitted) {
- execCommandPropagateExec(c);
+ execCommandPropagateExec(c->db->id);
}
}
@@ -1710,11 +1707,16 @@ void evalShaCommand(client *c) {
void scriptCommand(client *c) {
if (c->argc == 2 && !strcasecmp(c->argv[1]->ptr,"help")) {
const char *help[] = {
-"DEBUG (yes|sync|no) -- Set the debug mode for subsequent scripts executed.",
-"EXISTS <sha1> [<sha1> ...] -- Return information about the existence of the scripts in the script cache.",
-"FLUSH -- Flush the Lua scripts cache. Very dangerous on replicas.",
-"KILL -- Kill the currently executing Lua script.",
-"LOAD <script> -- Load a script into the scripts cache, without executing it.",
+"DEBUG (YES|SYNC|NO)",
+" Set the debug mode for subsequent scripts executed.",
+"EXISTS <sha1> [<sha1> ...]",
+" Return information about the existence of the scripts in the script cache.",
+"FLUSH",
+" Flush the Lua scripts cache. Very dangerous on replicas.",
+"KILL",
+" Kill the currently executing Lua script.",
+"LOAD <script>",
+" Load a script into the scripts cache without executing it.",
NULL
};
addReplyHelp(c, help);
@@ -1740,11 +1742,11 @@ NULL
forceCommandPropagation(c,PROPAGATE_REPL|PROPAGATE_AOF);
} else if (c->argc == 2 && !strcasecmp(c->argv[1]->ptr,"kill")) {
if (server.lua_caller == NULL) {
- addReplySds(c,sdsnew("-NOTBUSY No scripts in execution right now.\r\n"));
+ addReplyError(c,"-NOTBUSY No scripts in execution right now.");
} else if (server.lua_caller->flags & CLIENT_MASTER) {
- addReplySds(c,sdsnew("-UNKILLABLE The busy script was sent by a master instance in the context of replication and cannot be killed.\r\n"));
+ addReplyError(c,"-UNKILLABLE The busy script was sent by a master instance in the context of replication and cannot be killed.");
} else if (server.lua_write_dirty) {
- addReplySds(c,sdsnew("-UNKILLABLE Sorry the script already executed write commands against the dataset. You can either wait the script termination or kill the server in a hard way using the SHUTDOWN NOSAVE command.\r\n"));
+ addReplyError(c,"-UNKILLABLE Sorry the script already executed write commands against the dataset. You can either wait the script termination or kill the server in a hard way using the SHUTDOWN NOSAVE command.");
} else {
server.lua_kill = 1;
addReply(c,shared.ok);
@@ -1765,7 +1767,7 @@ NULL
addReply(c,shared.ok);
c->flags |= CLIENT_LUA_DEBUG_SYNC;
} else {
- addReplyError(c,"Use SCRIPT DEBUG yes/sync/no");
+ addReplyError(c,"Use SCRIPT DEBUG YES/SYNC/NO");
return;
}
} else {
diff --git a/src/sds.c b/src/sds.c
index 6a85eb4aa..f16114471 100644
--- a/src/sds.c
+++ b/src/sds.c
@@ -642,7 +642,7 @@ sds sdscatfmt(sds s, char const *fmt, ...) {
/* To avoid continuous reallocations, let's start with a buffer that
* can hold at least two times the format string itself. It's not the
* best heuristic but seems to work in practice. */
- s = sdsMakeRoomFor(s, initlen + strlen(fmt)*2);
+ s = sdsMakeRoomFor(s, strlen(fmt)*2);
va_start(ap,fmt);
f = fmt; /* Next format specifier byte to process. */
i = initlen; /* Position of the next byte to write to dest str. */
@@ -1159,8 +1159,8 @@ void sds_free(void *ptr) { s_free(ptr); }
#ifdef REDIS_TEST
#include <stdio.h>
+#include <limits.h>
#include "testhelp.h"
-#include "limits.h"
#define UNUSED(x) (void)(x)
int sdsTest(int argc, char **argv) {
@@ -1171,12 +1171,12 @@ int sdsTest(int argc, char **argv) {
sds x = sdsnew("foo"), y;
test_cond("Create a string and obtain the length",
- sdslen(x) == 3 && memcmp(x,"foo\0",4) == 0)
+ sdslen(x) == 3 && memcmp(x,"foo\0",4) == 0);
sdsfree(x);
x = sdsnewlen("foo",2);
test_cond("Create a string with specified length",
- sdslen(x) == 2 && memcmp(x,"fo\0",3) == 0)
+ sdslen(x) == 2 && memcmp(x,"fo\0",3) == 0);
x = sdscat(x,"bar");
test_cond("Strings concatenation",
@@ -1184,22 +1184,22 @@ int sdsTest(int argc, char **argv) {
x = sdscpy(x,"a");
test_cond("sdscpy() against an originally longer string",
- sdslen(x) == 1 && memcmp(x,"a\0",2) == 0)
+ sdslen(x) == 1 && memcmp(x,"a\0",2) == 0);
x = sdscpy(x,"xyzxxxxxxxxxxyyyyyyyyyykkkkkkkkkk");
test_cond("sdscpy() against an originally shorter string",
sdslen(x) == 33 &&
- memcmp(x,"xyzxxxxxxxxxxyyyyyyyyyykkkkkkkkkk\0",33) == 0)
+ memcmp(x,"xyzxxxxxxxxxxyyyyyyyyyykkkkkkkkkk\0",33) == 0);
sdsfree(x);
x = sdscatprintf(sdsempty(),"%d",123);
test_cond("sdscatprintf() seems working in the base case",
- sdslen(x) == 3 && memcmp(x,"123\0",4) == 0)
+ sdslen(x) == 3 && memcmp(x,"123\0",4) == 0);
sdsfree(x);
x = sdscatprintf(sdsempty(),"a%cb",0);
test_cond("sdscatprintf() seems working with \\0 inside of result",
- sdslen(x) == 3 && memcmp(x,"a\0""b\0",4) == 0)
+ sdslen(x) == 3 && memcmp(x,"a\0""b\0",4) == 0);
{
sdsfree(x);
@@ -1209,7 +1209,7 @@ int sdsTest(int argc, char **argv) {
}
x = sdscatprintf(sdsempty(),"%0*d",(int)sizeof(etalon),0);
test_cond("sdscatprintf() can print 1MB",
- sdslen(x) == sizeof(etalon) && memcmp(x,etalon,sizeof(etalon)) == 0)
+ sdslen(x) == sizeof(etalon) && memcmp(x,etalon,sizeof(etalon)) == 0);
}
sdsfree(x);
@@ -1218,7 +1218,7 @@ int sdsTest(int argc, char **argv) {
test_cond("sdscatfmt() seems working in the base case",
sdslen(x) == 60 &&
memcmp(x,"--Hello Hi! World -9223372036854775808,"
- "9223372036854775807--",60) == 0)
+ "9223372036854775807--",60) == 0);
printf("[%s]\n",x);
sdsfree(x);
@@ -1226,85 +1226,85 @@ int sdsTest(int argc, char **argv) {
x = sdscatfmt(x, "%u,%U--", UINT_MAX, ULLONG_MAX);
test_cond("sdscatfmt() seems working with unsigned numbers",
sdslen(x) == 35 &&
- memcmp(x,"--4294967295,18446744073709551615--",35) == 0)
+ memcmp(x,"--4294967295,18446744073709551615--",35) == 0);
sdsfree(x);
x = sdsnew(" x ");
sdstrim(x," x");
test_cond("sdstrim() works when all chars match",
- sdslen(x) == 0)
+ sdslen(x) == 0);
sdsfree(x);
x = sdsnew(" x ");
sdstrim(x," ");
test_cond("sdstrim() works when a single char remains",
- sdslen(x) == 1 && x[0] == 'x')
+ sdslen(x) == 1 && x[0] == 'x');
sdsfree(x);
x = sdsnew("xxciaoyyy");
sdstrim(x,"xy");
test_cond("sdstrim() correctly trims characters",
- sdslen(x) == 4 && memcmp(x,"ciao\0",5) == 0)
+ sdslen(x) == 4 && memcmp(x,"ciao\0",5) == 0);
y = sdsdup(x);
sdsrange(y,1,1);
test_cond("sdsrange(...,1,1)",
- sdslen(y) == 1 && memcmp(y,"i\0",2) == 0)
+ sdslen(y) == 1 && memcmp(y,"i\0",2) == 0);
sdsfree(y);
y = sdsdup(x);
sdsrange(y,1,-1);
test_cond("sdsrange(...,1,-1)",
- sdslen(y) == 3 && memcmp(y,"iao\0",4) == 0)
+ sdslen(y) == 3 && memcmp(y,"iao\0",4) == 0);
sdsfree(y);
y = sdsdup(x);
sdsrange(y,-2,-1);
test_cond("sdsrange(...,-2,-1)",
- sdslen(y) == 2 && memcmp(y,"ao\0",3) == 0)
+ sdslen(y) == 2 && memcmp(y,"ao\0",3) == 0);
sdsfree(y);
y = sdsdup(x);
sdsrange(y,2,1);
test_cond("sdsrange(...,2,1)",
- sdslen(y) == 0 && memcmp(y,"\0",1) == 0)
+ sdslen(y) == 0 && memcmp(y,"\0",1) == 0);
sdsfree(y);
y = sdsdup(x);
sdsrange(y,1,100);
test_cond("sdsrange(...,1,100)",
- sdslen(y) == 3 && memcmp(y,"iao\0",4) == 0)
+ sdslen(y) == 3 && memcmp(y,"iao\0",4) == 0);
sdsfree(y);
y = sdsdup(x);
sdsrange(y,100,100);
test_cond("sdsrange(...,100,100)",
- sdslen(y) == 0 && memcmp(y,"\0",1) == 0)
+ sdslen(y) == 0 && memcmp(y,"\0",1) == 0);
sdsfree(y);
sdsfree(x);
x = sdsnew("foo");
y = sdsnew("foa");
- test_cond("sdscmp(foo,foa)", sdscmp(x,y) > 0)
+ test_cond("sdscmp(foo,foa)", sdscmp(x,y) > 0);
sdsfree(y);
sdsfree(x);
x = sdsnew("bar");
y = sdsnew("bar");
- test_cond("sdscmp(bar,bar)", sdscmp(x,y) == 0)
+ test_cond("sdscmp(bar,bar)", sdscmp(x,y) == 0);
sdsfree(y);
sdsfree(x);
x = sdsnew("aar");
y = sdsnew("bar");
- test_cond("sdscmp(bar,bar)", sdscmp(x,y) < 0)
+ test_cond("sdscmp(bar,bar)", sdscmp(x,y) < 0);
sdsfree(y);
sdsfree(x);
x = sdsnewlen("\a\n\0foo\r",7);
y = sdscatrepr(sdsempty(),x,sdslen(x));
test_cond("sdscatrepr(...data...)",
- memcmp(y,"\"\\a\\n\\x00foo\\r\"",15) == 0)
+ memcmp(y,"\"\\a\\n\\x00foo\\r\"",15) == 0);
{
unsigned int oldfree;
@@ -1343,7 +1343,7 @@ int sdsTest(int argc, char **argv) {
sdsfree(x);
}
}
- test_report()
+ test_report();
return 0;
}
#endif
diff --git a/src/sentinel.c b/src/sentinel.c
index 95cfa84ad..02260feb7 100644
--- a/src/sentinel.c
+++ b/src/sentinel.c
@@ -469,7 +469,7 @@ struct redisCommand sentinelcmds[] = {
{"client",clientCommand,-2,"admin random @connection",0,NULL,0,0,0,0,0},
{"shutdown",shutdownCommand,-1,"admin",0,NULL,0,0,0,0,0},
{"auth",authCommand,-2,"no-auth fast @connection",0,NULL,0,0,0,0,0},
- {"hello",helloCommand,-2,"no-auth fast @connection",0,NULL,0,0,0,0,0},
+ {"hello",helloCommand,-1,"no-auth fast @connection",0,NULL,0,0,0,0,0},
{"acl",aclCommand,-2,"admin",0,NULL,0,0,0,0,0,0},
{"command",commandCommand,-1, "random @connection", 0,NULL,0,0,0,0,0,0}
};
@@ -3090,25 +3090,45 @@ int sentinelIsQuorumReachable(sentinelRedisInstance *master, int *usableptr) {
void sentinelCommand(client *c) {
if (c->argc == 2 && !strcasecmp(c->argv[1]->ptr,"help")) {
const char *help[] = {
-"MASTERS -- Show a list of monitored masters and their state.",
-"MASTER <master-name> -- Show the state and info of the specified master.",
-"REPLICAS <master-name> -- Show a list of replicas for this master and their state.",
-"SENTINELS <master-name> -- Show a list of Sentinel instances for this master and their state.",
-"MYID -- Show Current Sentinel Id",
-"IS-MASTER-DOWN-BY-ADDR <ip> <port> <current-epoch> <runid> -- Check if the master specified by ip:port is down from current Sentinel's point of view.",
-"GET-MASTER-ADDR-BY-NAME <master-name> -- Return the ip and port number of the master with that name.",
-"RESET <pattern> -- Reset masters for specific master name matching this pattern.",
-"FAILOVER <master-name> -- Manually failover a master node without asking for agreement from other Sentinels",
-"PENDING-SCRIPTS -- Get pending scripts information.",
-"MONITOR <name> <ip> <port> <quorum> -- Start monitoring a new master with the specified name, ip, port and quorum.",
-"FLUSHCONFIG -- Force Sentinel to rewrite its configuration on disk, including the current Sentinel state.",
-"REMOVE <master-name> -- Remove master from Sentinel's monitor list.",
-"CKQUORUM <master-name> -- Check if the current Sentinel configuration is able to reach the quorum needed to failover a master "
-"and the majority needed to authorize the failover.",
-"SET <master-name> <option> <value> -- Set configuration paramters for certain masters.",
-"INFO-CACHE <master-name> -- Return last cached INFO output from masters and all its replicas.",
-"SIMULATE-FAILURE (crash-after-election|crash-after-promotion|help) -- Simulate a Sentinel crash.",
-"HELP -- Prints this help.",
+"CKQUORUM <master-name>",
+" Check if the current Sentinel configuration is able to reach the quorum",
+" needed to failover a master and the majority needed to authorize the",
+" failover.",
+"GET-MASTER-ADDR-BY-NAME <master-name>",
+" Return the ip and port number of the master with that name.",
+"FAILOVER <master-name>",
+" Manually failover a master node without asking for agreement from other",
+" Sentinels",
+"FLUSHCONFIG",
+" Force Sentinel to rewrite its configuration on disk, including the current",
+" Sentinel state.",
+"INFO-CACHE <master-name>",
+" Return last cached INFO output from masters and all its replicas.",
+"IS-MASTER-DOWN-BY-ADDR <ip> <port> <current-epoch> <runid>",
+" Check if the master specified by ip:port is down from current Sentinel's",
+" point of view.",
+"MASTER <master-name>",
+" Show the state and info of the specified master.",
+"MASTERS",
+" Show a list of monitored masters and their state.",
+"MONITOR <name> <ip> <port> <quorum>",
+" Start monitoring a new master with the specified name, ip, port and quorum.",
+"MYID",
+" Return the ID of the Sentinel instance.",
+"PENDING-SCRIPTS",
+" Get pending scripts information.",
+"REMOVE <master-name>",
+" Remove master from Sentinel's monitor list.",
+"REPLICAS <master-name>",
+" Show a list of replicas for this master and their state.",
+"RESET <pattern>",
+" Reset masters for specific master name matching this pattern.",
+"SENTINELS <master-name>",
+" Show a list of Sentinel instances for this master and their state.",
+"SET <master-name> <option> <value>",
+" Set configuration paramters for certain masters.",
+"SIMULATE-FAILURE (CRASH-AFTER-ELECTION|CRASH-AFTER-PROMOTION|HELP)",
+" Simulate a Sentinel crash.",
NULL
};
addReplyHelp(c, help);
@@ -3446,7 +3466,7 @@ numargserr:
/* SENTINEL INFO [section] */
void sentinelInfoCommand(client *c) {
if (c->argc > 2) {
- addReply(c,shared.syntaxerr);
+ addReplyErrorObject(c,shared.syntaxerr);
return;
}
@@ -3579,14 +3599,13 @@ void sentinelSetCommand(client *c) {
"Reconfiguration of scripts path is denied for "
"security reasons. Check the deny-scripts-reconfig "
"configuration directive in your Sentinel configuration");
- return;
+ goto seterr;
}
if (strlen(value) && access(value,X_OK) == -1) {
addReplyError(c,
"Notification script seems non existing or non executable");
- if (changes) sentinelFlushConfig();
- return;
+ goto seterr;
}
sdsfree(ri->notification_script);
ri->notification_script = strlen(value) ? sdsnew(value) : NULL;
@@ -3599,15 +3618,14 @@ void sentinelSetCommand(client *c) {
"Reconfiguration of scripts path is denied for "
"security reasons. Check the deny-scripts-reconfig "
"configuration directive in your Sentinel configuration");
- return;
+ goto seterr;
}
if (strlen(value) && access(value,X_OK) == -1) {
addReplyError(c,
"Client reconfiguration script seems non existing or "
"non executable");
- if (changes) sentinelFlushConfig();
- return;
+ goto seterr;
}
sdsfree(ri->client_reconfig_script);
ri->client_reconfig_script = strlen(value) ? sdsnew(value) : NULL;
@@ -3657,8 +3675,7 @@ void sentinelSetCommand(client *c) {
} else {
addReplyErrorFormat(c,"Unknown option or number of arguments for "
"SENTINEL SET '%s'", option);
- if (changes) sentinelFlushConfig();
- return;
+ goto seterr;
}
/* Log the event. */
@@ -3684,9 +3701,11 @@ void sentinelSetCommand(client *c) {
return;
badfmt: /* Bad format errors */
- if (changes) sentinelFlushConfig();
addReplyErrorFormat(c,"Invalid argument '%s' for SENTINEL SET '%s'",
(char*)c->argv[badarg]->ptr,option);
+seterr:
+ if (changes) sentinelFlushConfig();
+ return;
}
/* Our fake PUBLISH command: it is actually useful only to receive hello messages
diff --git a/src/server.c b/src/server.c
index ac43c32e7..0551eb3e4 100644
--- a/src/server.c
+++ b/src/server.c
@@ -34,6 +34,7 @@
#include "bio.h"
#include "latency.h"
#include "atomicvar.h"
+#include "mt19937-64.h"
#include <time.h>
#include <signal.h>
@@ -58,6 +59,10 @@
#include <sys/socket.h>
#include <sys/resource.h>
+#ifdef __linux__
+#include <sys/mman.h>
+#endif
+
/* Our shared "common" objects */
struct sharedObjectsStruct shared;
@@ -115,9 +120,9 @@ struct redisServer server; /* Server global state */
*
* write: Write command (may modify the key space).
*
- * read-only: All the non special commands just reading from keys without
- * changing the content, or returning other information like
- * the TIME command. Special commands such administrative commands
+ * read-only: Commands just reading from keys without changing the content.
+ * Note that commands that don't read from the keyspace such as
+ * TIME, SELECT, INFO, administrative commands, and connection
* or transaction related commands (multi, exec, discard, ...)
* are not flagged as read-only commands, since they affect the
* server or the connection in other ways.
@@ -158,6 +163,13 @@ struct redisServer server; /* Server global state */
* delay its execution as long as the kernel scheduler is giving
* us time. Note that commands that may trigger a DEL as a side
* effect (like SET) are not fast commands.
+ *
+ * may-replicate: Command may produce replication traffic, but should be
+ * allowed under circumstances where write commands are disallowed.
+ * Examples include PUBLISH, which replicates pubsub messages,and
+ * EVAL, which may execute write commands, which are replicated,
+ * or may just execute read commands. A command can not be marked
+ * both "write" and "may-replicate"
*
* The following additional flags are only used in order to put commands
* in a specific ACL category. Commands can have multiple ACL categories.
@@ -287,11 +299,11 @@ struct redisCommand redisCommandTable[] = {
"write use-memory @list",
0,NULL,1,1,1,0,0,0},
- {"rpop",rpopCommand,2,
+ {"rpop",rpopCommand,-2,
"write fast @list",
0,NULL,1,1,1,0,0,0},
- {"lpop",lpopCommand,2,
+ {"lpop",lpopCommand,-2,
"write fast @list",
0,NULL,1,1,1,0,0,0},
@@ -463,6 +475,10 @@ struct redisCommand redisCommandTable[] = {
"read-only @sortedset",
0,NULL,1,1,1,0,0,0},
+ {"zrangestore",zrangestoreCommand,-5,
+ "write use-memory @sortedset",
+ 0,NULL,1,2,1,0,0,0},
+
{"zrangebyscore",zrangebyscoreCommand,-4,
"read-only @sortedset",
0,NULL,1,1,1,0,0,0},
@@ -685,7 +701,7 @@ struct redisCommand redisCommandTable[] = {
0,NULL,0,0,0,0,0,0},
{"echo",echoCommand,2,
- "read-only fast @connection",
+ "fast @connection",
0,NULL,0,0,0,0,0,0},
{"save",saveCommand,1,
@@ -705,7 +721,7 @@ struct redisCommand redisCommandTable[] = {
0,NULL,0,0,0,0,0,0},
{"lastsave",lastsaveCommand,1,
- "read-only random fast ok-loading ok-stale @admin @dangerous",
+ "random fast ok-loading ok-stale @admin @dangerous",
0,NULL,0,0,0,0,0,0},
{"type",typeCommand,2,
@@ -781,7 +797,7 @@ struct redisCommand redisCommandTable[] = {
0,NULL,0,0,0,0,0,0},
{"role",roleCommand,1,
- "ok-loading ok-stale no-script fast read-only @dangerous",
+ "ok-loading ok-stale no-script fast @dangerous",
0,NULL,0,0,0,0,0,0},
{"debug",debugCommand,-2,
@@ -809,7 +825,7 @@ struct redisCommand redisCommandTable[] = {
0,NULL,0,0,0,0,0,0},
{"publish",publishCommand,3,
- "pub-sub ok-loading ok-stale fast",
+ "pub-sub ok-loading ok-stale fast may-replicate",
0,NULL,0,0,0,0,0,0},
{"pubsub",pubsubCommand,-2,
@@ -868,18 +884,18 @@ struct redisCommand redisCommandTable[] = {
"admin no-script random ok-loading ok-stale @connection",
0,NULL,0,0,0,0,0,0},
- {"hello",helloCommand,-2,
+ {"hello",helloCommand,-1,
"no-auth no-script fast no-monitor ok-loading ok-stale no-slowlog @connection",
0,NULL,0,0,0,0,0,0},
/* EVAL can modify the dataset, however it is not flagged as a write
* command since we do the check while running commands from Lua. */
{"eval",evalCommand,-3,
- "no-script @scripting",
+ "no-script may-replicate @scripting",
0,evalGetKeys,0,0,0,0,0,0},
{"evalsha",evalShaCommand,-3,
- "no-script @scripting",
+ "no-script may-replicate @scripting",
0,evalGetKeys,0,0,0,0,0,0},
{"slowlog",slowlogCommand,-2,
@@ -887,11 +903,11 @@ struct redisCommand redisCommandTable[] = {
0,NULL,0,0,0,0,0,0},
{"script",scriptCommand,-2,
- "no-script @scripting",
+ "no-script may-replicate @scripting",
0,NULL,0,0,0,0,0,0},
{"time",timeCommand,1,
- "read-only random fast ok-loading ok-stale",
+ "random fast ok-loading ok-stale",
0,NULL,0,0,0,0,0,0},
{"bitop",bitopCommand,-4,
@@ -968,16 +984,19 @@ struct redisCommand redisCommandTable[] = {
* we claim that the representation, even if accessible, is an internal
* affair, and the command is semantically read only. */
{"pfcount",pfcountCommand,-2,
- "read-only @hyperloglog",
+ "read-only may-replicate @hyperloglog",
0,NULL,1,-1,1,0,0,0},
{"pfmerge",pfmergeCommand,-2,
"write use-memory @hyperloglog",
0,NULL,1,-1,1,0,0,0},
+ /* Unlike PFCOUNT that is considered as a read-only command (although
+ * it changes a bit), PFDEBUG may change the entire key when converting
+ * from sparse to dense representation */
{"pfdebug",pfdebugCommand,-3,
- "admin write",
- 0,NULL,0,0,0,0,0,0},
+ "admin write use-memory @hyperloglog",
+ 0,NULL,2,2,1,0,0,0},
{"xadd",xaddCommand,-5,
"write use-memory fast random @stream",
@@ -1023,6 +1042,10 @@ struct redisCommand redisCommandTable[] = {
"write random fast @stream",
0,NULL,1,1,1,0,0,0},
+ {"xautoclaim",xautoclaimCommand,-6,
+ "write random fast @stream",
+ 0,NULL,1,1,1,0,0,0},
+
{"xinfo",xinfoCommand,-2,
"read-only random @stream",
0,NULL,2,2,1,0,0,0},
@@ -1059,7 +1082,7 @@ struct redisCommand redisCommandTable[] = {
"read-only @string",
0,lcsGetKeys,0,0,0,0,0,0},
- {"reset",resetCommand,-1,
+ {"reset",resetCommand,1,
"no-script ok-stale ok-loading fast @connection",
0,NULL,0,0,0,0,0,0}
};
@@ -1547,12 +1570,33 @@ void updateDictResizePolicy(void) {
dictDisableResize();
}
+const char *strChildType(int type) {
+ switch(type) {
+ case CHILD_TYPE_RDB: return "RDB";
+ case CHILD_TYPE_AOF: return "AOF";
+ case CHILD_TYPE_LDB: return "LDB";
+ case CHILD_TYPE_MODULE: return "MODULE";
+ default: return "Unknown";
+ }
+}
+
/* Return true if there are active children processes doing RDB saving,
* AOF rewriting, or some side process spawned by a loaded module. */
int hasActiveChildProcess() {
- return server.rdb_child_pid != -1 ||
- server.aof_child_pid != -1 ||
- server.module_child_pid != -1;
+ return server.child_pid != -1;
+}
+
+void resetChildState() {
+ server.child_type = CHILD_TYPE_NONE;
+ server.child_pid = -1;
+ server.stat_current_cow_bytes = 0;
+ updateDictResizePolicy();
+ closeChildInfoPipe();
+}
+
+/* Return if child type is mutual exclusive with other fork children */
+int isMutuallyExclusiveChildType(int type) {
+ return type == CHILD_TYPE_RDB || type == CHILD_TYPE_AOF || type == CHILD_TYPE_MODULE;
}
/* Return true if this instance has persistence completely turned off:
@@ -1874,29 +1918,30 @@ void checkChildrenDone(void) {
if (pid == -1) {
serverLog(LL_WARNING,"wait3() returned an error: %s. "
- "rdb_child_pid = %d, aof_child_pid = %d, module_child_pid = %d",
+ "child_type: %s, child_pid = %d",
strerror(errno),
- (int) server.rdb_child_pid,
- (int) server.aof_child_pid,
- (int) server.module_child_pid);
- } else if (pid == server.rdb_child_pid) {
- backgroundSaveDoneHandler(exitcode,bysignal);
- if (!bysignal && exitcode == 0) receiveChildInfo();
- } else if (pid == server.aof_child_pid) {
- backgroundRewriteDoneHandler(exitcode,bysignal);
- if (!bysignal && exitcode == 0) receiveChildInfo();
- } else if (pid == server.module_child_pid) {
- ModuleForkDoneHandler(exitcode,bysignal);
+ strChildType(server.child_type),
+ (int) server.child_pid);
+ } else if (pid == server.child_pid) {
+ if (server.child_type == CHILD_TYPE_RDB) {
+ backgroundSaveDoneHandler(exitcode, bysignal);
+ } else if (server.child_type == CHILD_TYPE_AOF) {
+ backgroundRewriteDoneHandler(exitcode, bysignal);
+ } else if (server.child_type == CHILD_TYPE_MODULE) {
+ ModuleForkDoneHandler(exitcode, bysignal);
+ } else {
+ serverPanic("Unknown child type %d for child pid %d", server.child_type, server.child_pid);
+ exit(1);
+ }
if (!bysignal && exitcode == 0) receiveChildInfo();
+ resetChildState();
} else {
if (!ldbRemoveChild(pid)) {
serverLog(LL_WARNING,
- "Warning, detected child with unmatched pid: %ld",
- (long)pid);
+ "Warning, detected child with unmatched pid: %ld",
+ (long) pid);
}
}
- updateDictResizePolicy();
- closeChildInfoPipe();
/* start any pending forks immediately. */
replicationStartPendingFork();
@@ -2065,6 +2110,7 @@ int serverCron(struct aeEventLoop *eventLoop, long long id, void *clientData) {
/* Check if a background saving or AOF rewrite in progress terminated. */
if (hasActiveChildProcess() || ldbPendingChildren())
{
+ run_with_period(1000) receiveChildInfo();
checkChildrenDone();
} else {
/* If there is not a background saving/rewrite in progress check if
@@ -2124,8 +2170,8 @@ int serverCron(struct aeEventLoop *eventLoop, long long id, void *clientData) {
flushAppendOnlyFile(0);
}
- /* Clear the paused clients flag if needed. */
- clientsArePaused(); /* Don't check return value, just use the side effect.*/
+ /* Clear the paused clients state if needed. */
+ checkClientPauseTimeoutAndReturnIfPaused();
/* Replication cron function -- used to reconnect to master,
* detect transfer failures, start background RDB transfers and so forth. */
@@ -2183,12 +2229,16 @@ int serverCron(struct aeEventLoop *eventLoop, long long id, void *clientData) {
void blockingOperationStarts() {
- updateCachedTime(0);
- server.blocked_last_cron = server.mstime;
+ if(!server.blocking_op_nesting++){
+ updateCachedTime(0);
+ server.blocked_last_cron = server.mstime;
+ }
}
void blockingOperationEnds() {
- server.blocked_last_cron = 0;
+ if(!(--server.blocking_op_nesting)){
+ server.blocked_last_cron = 0;
+ }
}
/* This function fill in the role of serverCron during RDB or AOF loading, and
@@ -2225,6 +2275,7 @@ void whileBlockedCron() {
activeDefragCycle();
server.blocked_last_cron += hz_ms;
+
/* Increment cronloop so that run_with_period works. */
server.cronloops++;
}
@@ -2317,8 +2368,12 @@ void beforeSleep(struct aeEventLoop *eventLoop) {
* during the previous event loop iteration. Note that we do this after
* processUnblockedClients(), so if there are multiple pipelined WAITs
* and the just unblocked WAIT gets blocked again, we don't have to wait
- * a server cron cycle in absence of other event loop events. See #6623. */
- if (server.get_ack_from_slaves) {
+ * a server cron cycle in absence of other event loop events. See #6623.
+ *
+ * We also don't send the ACKs while clients are paused, since it can
+ * increment the replication backlog, they'll be sent after the pause
+ * if we are still the master. */
+ if (server.get_ack_from_slaves && !checkClientPauseTimeoutAndReturnIfPaused()) {
robj *argv[3];
argv[0] = createStringObject("REPLCONF",8);
@@ -2376,9 +2431,9 @@ void afterSleep(struct aeEventLoop *eventLoop) {
void createSharedObjects(void) {
int j;
+ /* Shared command responses */
shared.crlf = createObject(OBJ_STRING,sdsnew("\r\n"));
shared.ok = createObject(OBJ_STRING,sdsnew("+OK\r\n"));
- shared.err = createObject(OBJ_STRING,sdsnew("-ERR\r\n"));
shared.emptybulk = createObject(OBJ_STRING,sdsnew("$0\r\n\r\n"));
shared.czero = createObject(OBJ_STRING,sdsnew(":0\r\n"));
shared.cone = createObject(OBJ_STRING,sdsnew(":1\r\n"));
@@ -2386,8 +2441,14 @@ void createSharedObjects(void) {
shared.pong = createObject(OBJ_STRING,sdsnew("+PONG\r\n"));
shared.queued = createObject(OBJ_STRING,sdsnew("+QUEUED\r\n"));
shared.emptyscan = createObject(OBJ_STRING,sdsnew("*2\r\n$1\r\n0\r\n*0\r\n"));
+ shared.space = createObject(OBJ_STRING,sdsnew(" "));
+ shared.colon = createObject(OBJ_STRING,sdsnew(":"));
+ shared.plus = createObject(OBJ_STRING,sdsnew("+"));
+
+ /* Shared command error responses */
shared.wrongtypeerr = createObject(OBJ_STRING,sdsnew(
"-WRONGTYPE Operation against a key holding the wrong kind of value\r\n"));
+ shared.err = createObject(OBJ_STRING,sdsnew("-ERR\r\n"));
shared.nokeyerr = createObject(OBJ_STRING,sdsnew(
"-ERR no such key\r\n"));
shared.syntaxerr = createObject(OBJ_STRING,sdsnew(
@@ -2418,9 +2479,6 @@ void createSharedObjects(void) {
"-NOREPLICAS Not enough good replicas to write.\r\n"));
shared.busykeyerr = createObject(OBJ_STRING,sdsnew(
"-BUSYKEY Target key name already exists.\r\n"));
- shared.space = createObject(OBJ_STRING,sdsnew(" "));
- shared.colon = createObject(OBJ_STRING,sdsnew(":"));
- shared.plus = createObject(OBJ_STRING,sdsnew("+"));
/* The shared NULL depends on the protocol version. */
shared.null[0] = NULL;
@@ -2945,9 +3003,9 @@ void resetServerStats(void) {
atomicSet(server.stat_net_input_bytes, 0);
atomicSet(server.stat_net_output_bytes, 0);
server.stat_unexpected_error_replies = 0;
+ server.stat_total_error_replies = 0;
server.stat_dump_payload_sanitizations = 0;
server.aof_delayed_fsync = 0;
- server.blocked_last_cron = 0;
}
/* Make the thread killable at any time, so that kill threads functions
@@ -2978,6 +3036,7 @@ void initServer(void) {
server.in_fork_child = CHILD_TYPE_NONE;
server.main_thread_id = pthread_self();
server.current_client = NULL;
+ server.errors = raxNew();
server.fixed_time_expire = 0;
server.clients = listCreate();
server.clients_index = raxNew();
@@ -2992,9 +3051,12 @@ void initServer(void) {
server.ready_keys = listCreate();
server.clients_waiting_acks = listCreate();
server.get_ack_from_slaves = 0;
- server.clients_paused = 0;
+ server.client_pause_type = 0;
+ server.paused_clients = listCreate();
server.events_processed_while_blocked = 0;
server.system_memory_size = zmalloc_get_memory_size();
+ server.blocked_last_cron = 0;
+ server.blocking_op_nesting = 0;
if ((server.tls_port || server.tls_replication || server.tls_cluster)
&& tlsConfigure(&server.tls_ctx_config) == C_ERR) {
@@ -3064,9 +3126,9 @@ void initServer(void) {
server.in_eval = 0;
server.in_exec = 0;
server.propagate_in_transaction = 0;
- server.rdb_child_pid = -1;
- server.aof_child_pid = -1;
- server.module_child_pid = -1;
+ server.client_pause_in_transaction = 0;
+ server.child_pid = -1;
+ server.child_type = CHILD_TYPE_NONE;
server.rdb_child_type = RDB_CHILD_TYPE_NONE;
server.rdb_pipe_conns = NULL;
server.rdb_pipe_numconns = 0;
@@ -3076,7 +3138,7 @@ void initServer(void) {
server.rdb_bgsave_scheduled = 0;
server.child_info_pipe[0] = -1;
server.child_info_pipe[1] = -1;
- server.child_info_data.magic = 0;
+ server.child_info_nread = 0;
aofRewriteBufferReset();
server.aof_buf = sdsempty();
server.lastsave = time(NULL); /* At startup we consider the DB saved. */
@@ -3088,6 +3150,7 @@ void initServer(void) {
/* A few stats we don't want to reset: server startup time, and peak mem. */
server.stat_starttime = time(NULL);
server.stat_peak_memory = 0;
+ server.stat_current_cow_bytes = 0;
server.stat_rdb_cow_bytes = 0;
server.stat_aof_cow_bytes = 0;
server.stat_module_cow_bytes = 0;
@@ -3230,6 +3293,8 @@ int populateCommandTableParseFlags(struct redisCommand *c, char *strflags) {
c->flags |= CMD_FAST | CMD_CATEGORY_FAST;
} else if (!strcasecmp(flag,"no-auth")) {
c->flags |= CMD_NO_AUTH;
+ } else if (!strcasecmp(flag,"may-replicate")) {
+ c->flags |= CMD_MAY_REPLICATE;
} else {
/* Parse ACL categories here if the flag name starts with @. */
uint64_t catflag;
@@ -3284,11 +3349,18 @@ void resetCommandTableStats(void) {
c = (struct redisCommand *) dictGetVal(de);
c->microseconds = 0;
c->calls = 0;
+ c->rejected_calls = 0;
+ c->failed_calls = 0;
}
dictReleaseIterator(di);
}
+void resetErrorTableStats(void) {
+ raxFreeWithCallback(server.errors, zfree);
+ server.errors = raxNew();
+}
+
/* ========================== Redis OP Array API ============================ */
void redisOpArrayInit(redisOpArray *oa) {
@@ -3374,6 +3446,18 @@ struct redisCommand *lookupCommandOrOriginal(sds name) {
void propagate(struct redisCommand *cmd, int dbid, robj **argv, int argc,
int flags)
{
+ /* Propagate a MULTI request once we encounter the first command which
+ * is a write command.
+ * This way we'll deliver the MULTI/..../EXEC block as a whole and
+ * both the AOF and the replication link will have the same consistency
+ * and atomicity guarantees. */
+ if (server.in_exec && !server.propagate_in_transaction)
+ execCommandPropagateMulti(dbid);
+
+ /* This needs to be unreachable since the dataset should be fixed during
+ * client pause, otherwise data may be lossed during a failover. */
+ serverAssert(!(areClientsPaused() && !server.client_pause_in_transaction));
+
if (server.aof_state != AOF_OFF && flags & PROPAGATE_AOF)
feedAppendOnlyFile(cmd,dbid,argv,argc);
if (flags & PROPAGATE_REPL)
@@ -3412,6 +3496,7 @@ void alsoPropagate(struct redisCommand *cmd, int dbid, robj **argv, int argc,
* Redis command implementation in order to to force the propagation of a
* specific command execution into AOF / Replication. */
void forceCommandPropagation(client *c, int flags) {
+ serverAssert(c->cmd->flags & (CMD_WRITE | CMD_MAY_REPLICATE));
if (flags & PROPAGATE_REPL) c->flags |= CLIENT_FORCE_REPL;
if (flags & PROPAGATE_AOF) c->flags |= CLIENT_FORCE_AOF;
}
@@ -3475,6 +3560,7 @@ void call(client *c, int flags) {
ustime_t start, duration;
int client_old_flags = c->flags;
struct redisCommand *real_cmd = c->cmd;
+ static long long prev_err_count;
server.fixed_time_expire++;
@@ -3495,6 +3581,7 @@ void call(client *c, int flags) {
/* Call the command. */
dirty = server.dirty;
+ prev_err_count = server.stat_total_error_replies;
updateCachedTime(0);
start = server.ustime;
c->cmd->proc(c);
@@ -3502,6 +3589,14 @@ void call(client *c, int flags) {
dirty = server.dirty-dirty;
if (dirty < 0) dirty = 0;
+ /* Update failed command calls if required.
+ * We leverage a static variable (prev_err_count) to retain
+ * the counter across nested function calls and avoid logging
+ * the same error twice. */
+ if ((server.stat_total_error_replies - prev_err_count) > 0) {
+ real_cmd->failed_calls++;
+ }
+
/* After executing command, we will close the client after writing entire
* reply if it is set 'CLIENT_CLOSE_AFTER_COMMAND' flag. */
if (c->flags & CLIENT_CLOSE_AFTER_COMMAND) {
@@ -3604,7 +3699,7 @@ void call(client *c, int flags) {
!(c->flags & CLIENT_MULTI) &&
!(flags & CMD_CALL_NOWRAP))
{
- execCommandPropagateMulti(c);
+ execCommandPropagateMulti(c->db->id);
multi_emitted = 1;
}
@@ -3619,13 +3714,19 @@ void call(client *c, int flags) {
}
if (multi_emitted) {
- execCommandPropagateExec(c);
+ execCommandPropagateExec(c->db->id);
}
}
redisOpArrayFree(&server.also_propagate);
}
server.also_propagate = prev_also_propagate;
+ /* Client pause takes effect after a transaction has finished. This needs
+ * to be located after everything is propagated. */
+ if (!server.in_exec && server.client_pause_in_transaction) {
+ server.client_pause_in_transaction = 0;
+ }
+
/* If the client has keys tracking enabled for client side caching,
* make sure to remember the keys it fetched via this command. */
if (c->cmd->flags & CMD_READONLY) {
@@ -3640,6 +3741,7 @@ void call(client *c, int flags) {
server.fixed_time_expire--;
server.stat_numcommands++;
+ prev_err_count = server.stat_total_error_replies;
/* Record peak memory after each command and before the eviction that runs
* before the next command. */
@@ -3655,6 +3757,7 @@ void call(client *c, int flags) {
* Note: 'reply' is expected to end with \r\n */
void rejectCommand(client *c, robj *reply) {
flagTransaction(c);
+ if (c->cmd) c->cmd->rejected_calls++;
if (c->cmd && c->cmd->proc == execCommand) {
execCommandAbort(c, reply->ptr);
} else {
@@ -3664,6 +3767,7 @@ void rejectCommand(client *c, robj *reply) {
}
void rejectCommandFormat(client *c, const char *fmt, ...) {
+ if (c->cmd) c->cmd->rejected_calls++;
flagTransaction(c);
va_list ap;
va_start(ap,fmt);
@@ -3674,10 +3778,11 @@ void rejectCommandFormat(client *c, const char *fmt, ...) {
sdsmapchars(s, "\r\n", " ", 2);
if (c->cmd && c->cmd->proc == execCommand) {
execCommandAbort(c, s);
+ sdsfree(s);
} else {
+ /* The following frees 's'. */
addReplyErrorSds(c, s);
}
- sdsfree(s);
}
/* Returns 1 for commands that may have key names in their arguments, but have
@@ -3735,6 +3840,8 @@ int processCommand(client *c) {
(c->cmd->proc == execCommand && (c->mstate.cmd_inv_flags & CMD_STALE));
int is_denyloading_command = !(c->cmd->flags & CMD_LOADING) ||
(c->cmd->proc == execCommand && (c->mstate.cmd_inv_flags & CMD_LOADING));
+ int is_may_replicate_command = (c->cmd->flags & (CMD_WRITE | CMD_MAY_REPLICATE)) ||
+ (c->cmd->proc == execCommand && (c->mstate.cmd_flags & (CMD_WRITE | CMD_MAY_REPLICATE)));
/* Check if the user is authenticated. This check is skipped in case
* the default user is flagged as "nopass" and is active. */
@@ -3789,6 +3896,7 @@ int processCommand(client *c) {
flagTransaction(c);
}
clusterRedirectClient(c,n,hashslot,error_code);
+ c->cmd->rejected_calls++;
return C_OK;
}
}
@@ -3932,6 +4040,17 @@ int processCommand(client *c) {
return C_OK;
}
+ /* If the server is paused, block the client until
+ * the pause has ended. Replicas are never paused. */
+ if (!(c->flags & CLIENT_SLAVE) &&
+ ((server.client_pause_type == CLIENT_PAUSE_ALL) ||
+ (server.client_pause_type == CLIENT_PAUSE_WRITE && is_may_replicate_command)))
+ {
+ c->bpop.timeout = 0;
+ blockClient(c,BLOCKED_PAUSE);
+ return C_OK;
+ }
+
/* Exec the command */
if (c->flags & CLIENT_MULTI &&
c->cmd->proc != execCommand && c->cmd->proc != discardCommand &&
@@ -3946,9 +4065,22 @@ int processCommand(client *c) {
if (listLength(server.ready_keys))
handleClientsBlockedOnKeys();
}
+
return C_OK;
}
+/* ====================== Error lookup and execution ===================== */
+
+void incrementErrorCount(const char *fullerr, size_t namelen) {
+ struct redisError *error = raxFind(server.errors,(unsigned char*)fullerr,namelen);
+ if (error == raxNotFound) {
+ error = zmalloc(sizeof(*error));
+ error->count = 0;
+ raxInsert(server.errors,(unsigned char*)fullerr,namelen,error,NULL);
+ }
+ error->count++;
+}
+
/*================================== Shutdown =============================== */
/* Close listening sockets. Also unlink the unix domain socket if
@@ -3990,25 +4122,28 @@ int prepareForShutdown(int flags) {
/* Kill the saving child if there is a background saving in progress.
We want to avoid race conditions, for instance our saving child may
overwrite the synchronous saving did by SHUTDOWN. */
- if (server.rdb_child_pid != -1) {
+ if (server.child_type == CHILD_TYPE_RDB) {
serverLog(LL_WARNING,"There is a child saving an .rdb. Killing it!");
- /* Note that, in killRDBChild, we call rdbRemoveTempFile that will
- * do close fd(in order to unlink file actully) in background thread.
+ killRDBChild();
+ /* Note that, in killRDBChild normally has backgroundSaveDoneHandler
+ * doing it's cleanup, but in this case this code will not be reached,
+ * so we need to call rdbRemoveTempFile which will close fd(in order
+ * to unlink file actully) in background thread.
* The temp rdb file fd may won't be closed when redis exits quickly,
* but OS will close this fd when process exits. */
- killRDBChild();
+ rdbRemoveTempFile(server.child_pid, 0);
}
/* Kill module child if there is one. */
- if (server.module_child_pid != -1) {
+ if (server.child_type == CHILD_TYPE_MODULE) {
serverLog(LL_WARNING,"There is a module fork child. Killing it!");
- TerminateModuleForkChild(server.module_child_pid,0);
+ TerminateModuleForkChild(server.child_pid,0);
}
if (server.aof_state != AOF_OFF) {
/* Kill the AOF saving child as the AOF we already have may be longer
* but contains the full dataset anyway. */
- if (server.aof_child_pid != -1) {
+ if (server.child_type == CHILD_TYPE_AOF) {
/* If we have AOF enabled but haven't written the AOF yet, don't
* shutdown or else the dataset will be lost. */
if (server.aof_state == AOF_WAIT_REWRITE) {
@@ -4170,6 +4305,7 @@ void addReplyCommand(client *c, struct redisCommand *cmd) {
flagcount += addReplyCommandFlag(c,cmd,CMD_ASKING, "asking");
flagcount += addReplyCommandFlag(c,cmd,CMD_FAST, "fast");
flagcount += addReplyCommandFlag(c,cmd,CMD_NO_AUTH, "no_auth");
+ flagcount += addReplyCommandFlag(c,cmd,CMD_MAY_REPLICATE, "may_replicate");
if (cmdHasMovableKeys(cmd)) {
addReplyStatus(c, "movablekeys");
flagcount += 1;
@@ -4191,10 +4327,14 @@ void commandCommand(client *c) {
if (c->argc == 2 && !strcasecmp(c->argv[1]->ptr,"help")) {
const char *help[] = {
-"(no subcommand) -- Return details about all Redis commands.",
-"COUNT -- Return the total number of commands in this Redis server.",
-"GETKEYS <full-command> -- Return the keys from a full Redis command.",
-"INFO [command-name ...] -- Return details about multiple Redis commands.",
+"(no subcommand)",
+" Return details about all Redis commands.",
+"COUNT",
+" Return the total number of commands in this Redis server.",
+"GETKEYS <full-command>",
+" Return the keys from a full Redis command.",
+"INFO [<command-name> ...]",
+" Return details about multiple Redis commands.",
NULL
};
addReplyHelp(c, help);
@@ -4524,6 +4664,7 @@ sds genRedisInfoString(const char *section) {
info = sdscatprintf(info,
"# Persistence\r\n"
"loading:%d\r\n"
+ "current_cow_size:%zu\r\n"
"rdb_changes_since_last_save:%lld\r\n"
"rdb_bgsave_in_progress:%d\r\n"
"rdb_last_save_time:%jd\r\n"
@@ -4542,24 +4683,25 @@ sds genRedisInfoString(const char *section) {
"module_fork_in_progress:%d\r\n"
"module_fork_last_cow_size:%zu\r\n",
server.loading,
+ server.stat_current_cow_bytes,
server.dirty,
- server.rdb_child_pid != -1,
+ server.child_type == CHILD_TYPE_RDB,
(intmax_t)server.lastsave,
(server.lastbgsave_status == C_OK) ? "ok" : "err",
(intmax_t)server.rdb_save_time_last,
- (intmax_t)((server.rdb_child_pid == -1) ?
+ (intmax_t)((server.child_type != CHILD_TYPE_RDB) ?
-1 : time(NULL)-server.rdb_save_time_start),
server.stat_rdb_cow_bytes,
server.aof_state != AOF_OFF,
- server.aof_child_pid != -1,
+ server.child_type == CHILD_TYPE_AOF,
server.aof_rewrite_scheduled,
(intmax_t)server.aof_rewrite_time_last,
- (intmax_t)((server.aof_child_pid == -1) ?
+ (intmax_t)((server.child_type != CHILD_TYPE_AOF) ?
-1 : time(NULL)-server.aof_rewrite_time_start),
(server.aof_lastbgrewrite_status == C_OK) ? "ok" : "err",
(server.aof_last_write_status == C_OK) ? "ok" : "err",
server.stat_aof_cow_bytes,
- server.module_child_pid != -1,
+ server.child_type == CHILD_TYPE_MODULE,
server.stat_module_cow_bytes);
if (server.aof_enabled) {
@@ -4665,6 +4807,7 @@ sds genRedisInfoString(const char *section) {
"tracking_total_items:%lld\r\n"
"tracking_total_prefixes:%lld\r\n"
"unexpected_error_replies:%lld\r\n"
+ "total_error_replies:%lld\r\n"
"dump_payload_sanitizations:%lld\r\n"
"total_reads_processed:%lld\r\n"
"total_writes_processed:%lld\r\n"
@@ -4702,6 +4845,7 @@ sds genRedisInfoString(const char *section) {
(unsigned long long) trackingGetTotalItems(),
(unsigned long long) trackingGetTotalPrefixes(),
server.stat_unexpected_error_replies,
+ server.stat_total_error_replies,
server.stat_dump_payload_sanitizations,
stat_total_reads_processed,
stat_total_writes_processed,
@@ -4892,14 +5036,33 @@ sds genRedisInfoString(const char *section) {
di = dictGetSafeIterator(server.commands);
while((de = dictNext(di)) != NULL) {
c = (struct redisCommand *) dictGetVal(de);
- if (!c->calls) continue;
+ if (!c->calls && !c->failed_calls && !c->rejected_calls)
+ continue;
info = sdscatprintf(info,
- "cmdstat_%s:calls=%lld,usec=%lld,usec_per_call=%.2f\r\n",
+ "cmdstat_%s:calls=%lld,usec=%lld,usec_per_call=%.2f"
+ ",rejected_calls=%lld,failed_calls=%lld\r\n",
c->name, c->calls, c->microseconds,
- (c->calls == 0) ? 0 : ((float)c->microseconds/c->calls));
+ (c->calls == 0) ? 0 : ((float)c->microseconds/c->calls),
+ c->rejected_calls, c->failed_calls);
}
dictReleaseIterator(di);
}
+ /* Error statistics */
+ if (allsections || defsections || !strcasecmp(section,"errorstats")) {
+ if (sections++) info = sdscat(info,"\r\n");
+ info = sdscat(info, "# Errorstats\r\n");
+ raxIterator ri;
+ raxStart(&ri,server.errors);
+ raxSeek(&ri,"^",NULL,0);
+ struct redisError *e;
+ while(raxNext(&ri)) {
+ e = (struct redisError *) ri.data;
+ info = sdscatprintf(info,
+ "errorstat_%.*s:count=%lld\r\n",
+ (int)ri.key_len, ri.key, e->count);
+ }
+ raxStop(&ri);
+ }
/* Cluster */
if (allsections || defsections || !strcasecmp(section,"cluster")) {
@@ -4944,7 +5107,7 @@ void infoCommand(client *c) {
char *section = c->argc == 2 ? c->argv[1]->ptr : "default";
if (c->argc > 2) {
- addReply(c,shared.syntaxerr);
+ addReplyErrorObject(c,shared.syntaxerr);
return;
}
sds info = genRedisInfoString(section);
@@ -4971,6 +5134,21 @@ void monitorCommand(client *c) {
/* =================================== Main! ================================ */
+int checkIgnoreWarning(const char *warning) {
+ int argc, j;
+ sds *argv = sdssplitargs(server.ignore_warnings, &argc);
+ if (argv == NULL)
+ return 0;
+
+ for (j = 0; j < argc; j++) {
+ char *flag = argv[j];
+ if (!strcasecmp(flag, warning))
+ break;
+ }
+ sdsfreesplitres(argv,argc);
+ return j < argc;
+}
+
#ifdef __linux__
int linuxOvercommitMemoryValue(void) {
FILE *fp = fopen("/proc/sys/vm/overcommit_memory","r");
@@ -4994,6 +5172,113 @@ void linuxMemoryWarnings(void) {
serverLog(LL_WARNING,"WARNING you have Transparent Huge Pages (THP) support enabled in your kernel. This will create latency and memory usage issues with Redis. To fix this issue run the command 'echo madvise > /sys/kernel/mm/transparent_hugepage/enabled' as root, and add it to your /etc/rc.local in order to retain the setting after a reboot. Redis must be restarted after THP is disabled (set to 'madvise' or 'never').");
}
}
+
+#ifdef __arm64__
+
+/* Get size in kilobytes of the Shared_Dirty pages of the calling process for the
+ * memory map corresponding to the provided address, or -1 on error. */
+static int smapsGetSharedDirty(unsigned long addr) {
+ int ret, in_mapping = 0, val = -1;
+ unsigned long from, to;
+ char buf[64];
+ FILE *f;
+
+ f = fopen("/proc/self/smaps", "r");
+ serverAssert(f);
+
+ while (1) {
+ if (!fgets(buf, sizeof(buf), f))
+ break;
+
+ ret = sscanf(buf, "%lx-%lx", &from, &to);
+ if (ret == 2)
+ in_mapping = from <= addr && addr < to;
+
+ if (in_mapping && !memcmp(buf, "Shared_Dirty:", 13)) {
+ ret = sscanf(buf, "%*s %d", &val);
+ serverAssert(ret == 1);
+ break;
+ }
+ }
+
+ fclose(f);
+ return val;
+}
+
+/* Older arm64 Linux kernels have a bug that could lead to data corruption
+ * during background save in certain scenarios. This function checks if the
+ * kernel is affected.
+ * The bug was fixed in commit ff1712f953e27f0b0718762ec17d0adb15c9fd0b
+ * titled: "arm64: pgtable: Ensure dirty bit is preserved across pte_wrprotect()"
+ * Return 1 if the kernel seems to be affected, and 0 otherwise. */
+int linuxMadvFreeForkBugCheck(void) {
+ int ret, pipefd[2];
+ pid_t pid;
+ char *p, *q, bug_found = 0;
+ const long map_size = 3 * 4096;
+
+ /* Create a memory map that's in our full control (not one used by the allocator). */
+ p = mmap(NULL, map_size, PROT_READ, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
+ serverAssert(p != MAP_FAILED);
+
+ q = p + 4096;
+
+ /* Split the memory map in 3 pages by setting their protection as RO|RW|RO to prevent
+ * Linux from merging this memory map with adjacent VMAs. */
+ ret = mprotect(q, 4096, PROT_READ | PROT_WRITE);
+ serverAssert(!ret);
+
+ /* Write to the page once to make it resident */
+ *(volatile char*)q = 0;
+
+ /* Tell the kernel that this page is free to be reclaimed. */
+#ifndef MADV_FREE
+#define MADV_FREE 8
+#endif
+ ret = madvise(q, 4096, MADV_FREE);
+ serverAssert(!ret);
+
+ /* Write to the page after being marked for freeing, this is supposed to take
+ * ownership of that page again. */
+ *(volatile char*)q = 0;
+
+ /* Create a pipe for the child to return the info to the parent. */
+ ret = pipe(pipefd);
+ serverAssert(!ret);
+
+ /* Fork the process. */
+ pid = fork();
+ serverAssert(pid >= 0);
+ if (!pid) {
+ /* Child: check if the page is marked as dirty, expecing 4 (kB).
+ * A value of 0 means the kernel is affected by the bug. */
+ if (!smapsGetSharedDirty((unsigned long)q))
+ bug_found = 1;
+
+ ret = write(pipefd[1], &bug_found, 1);
+ serverAssert(ret == 1);
+
+ exit(0);
+ } else {
+ /* Read the result from the child. */
+ ret = read(pipefd[0], &bug_found, 1);
+ serverAssert(ret == 1);
+
+ /* Reap the child pid. */
+ serverAssert(waitpid(pid, NULL, 0) == pid);
+ }
+
+ /* Cleanup */
+ ret = close(pipefd[0]);
+ serverAssert(!ret);
+ ret = close(pipefd[1]);
+ serverAssert(!ret);
+ ret = munmap(p, map_size);
+ serverAssert(!ret);
+
+ return bug_found;
+}
+#endif /* __arm64__ */
#endif /* __linux__ */
void createPidFile(void) {
@@ -5189,10 +5474,22 @@ void closeClildUnusedResourceAfterFork() {
closeListeningSockets(0);
if (server.cluster_enabled && server.cluster_config_file_lock_fd != -1)
close(server.cluster_config_file_lock_fd); /* don't care if this fails */
+
+ /* Clear server.pidfile, this is the parent pidfile which should not
+ * be touched (or deleted) by the child (on exit / crash) */
+ zfree(server.pidfile);
+ server.pidfile = NULL;
}
/* purpose is one of CHILD_TYPE_ types */
int redisFork(int purpose) {
+ if (isMutuallyExclusiveChildType(purpose)) {
+ if (hasActiveChildProcess())
+ return -1;
+
+ openChildInfoPipe();
+ }
+
int childpid;
long long start = ustime();
if ((childpid = fork()) == 0) {
@@ -5208,23 +5505,38 @@ int redisFork(int purpose) {
server.stat_fork_rate = (double) zmalloc_used_memory() * 1000000 / server.stat_fork_time / (1024*1024*1024); /* GB per second. */
latencyAddSampleIfNeeded("fork",server.stat_fork_time/1000);
if (childpid == -1) {
+ if (isMutuallyExclusiveChildType(purpose)) closeChildInfoPipe();
return -1;
}
+
+ /* The child_pid and child_type are only for mutual exclusive children.
+ * other child types should handle and store their pid's in dedicated variables.
+ *
+ * Today, we allows CHILD_TYPE_LDB to run in parallel with the other fork types:
+ * - it isn't used for production, so it will not make the server be less efficient
+ * - used for debugging, and we don't want to block it from running while other
+ * forks are running (like RDB and AOF) */
+ if (isMutuallyExclusiveChildType(purpose)) {
+ server.child_pid = childpid;
+ server.child_type = purpose;
+ server.stat_current_cow_bytes = 0;
+ }
+
+ updateDictResizePolicy();
}
return childpid;
}
-void sendChildCOWInfo(int ptype, char *pname) {
+void sendChildCOWInfo(int ptype, int on_exit, char *pname) {
size_t private_dirty = zmalloc_get_private_dirty(-1);
if (private_dirty) {
- serverLog(LL_NOTICE,
+ serverLog(on_exit ? LL_NOTICE : LL_VERBOSE,
"%s: %zu MB of memory used by copy-on-write",
- pname, private_dirty/(1024*1024));
+ pname, private_dirty);
}
- server.child_info_data.cow_size = private_dirty;
- sendChildInfo(ptype);
+ sendChildInfo(ptype, on_exit, private_dirty);
}
void memtest(size_t megabytes, int passes);
@@ -5282,7 +5594,7 @@ void loadDataFromDisk(void) {
void redisOutOfMemoryHandler(size_t allocation_size) {
serverLog(LL_WARNING,"Out Of Memory allocating %zu bytes!",
allocation_size);
- serverPanic("Redis aborting for OUT OF MEMORY. Allocating %zu bytes!",
+ serverPanic("Redis aborting for OUT OF MEMORY. Allocating %zu bytes!",
allocation_size);
}
@@ -5436,6 +5748,7 @@ int main(int argc, char **argv) {
srand(time(NULL)^getpid());
srandom(time(NULL)^getpid());
gettimeofday(&tv,NULL);
+ init_genrand64(((long long) tv.tv_sec * 1000000 + tv.tv_usec) ^ getpid());
crc64_init();
uint8_t hashseed[16];
@@ -5564,7 +5877,16 @@ int main(int argc, char **argv) {
serverLog(LL_WARNING,"Server initialized");
#ifdef __linux__
linuxMemoryWarnings();
- #endif
+ #if defined (__arm64__)
+ if (linuxMadvFreeForkBugCheck()) {
+ serverLog(LL_WARNING,"WARNING Your kernel has a bug that could lead to data corruption during background save. Please upgrade to the latest stable kernel.");
+ if (!checkIgnoreWarning("ARM64-COW-BUG")) {
+ serverLog(LL_WARNING,"Redis will now exit to prevent data corruption. Note that it is possible to suppress this warning by setting the following config: ignore-warnings ARM64-COW-BUG");
+ exit(1);
+ }
+ }
+ #endif /* __arm64__ */
+ #endif /* __linux__ */
moduleInitModulesSystemLast();
moduleLoadFromQueue();
ACLLoadUsersAtStartup();
diff --git a/src/server.h b/src/server.h
index 427d7c2f2..eb967a042 100644
--- a/src/server.h
+++ b/src/server.h
@@ -181,33 +181,34 @@ extern int configOOMScoreAdjValuesDefaults[CONFIG_OOM_COUNT];
#define CMD_ASKING (1ULL<<13) /* "cluster-asking" flag */
#define CMD_FAST (1ULL<<14) /* "fast" flag */
#define CMD_NO_AUTH (1ULL<<15) /* "no-auth" flag */
+#define CMD_MAY_REPLICATE (1ULL<<16) /* "may-replicate" flag */
/* Command flags used by the module system. */
-#define CMD_MODULE_GETKEYS (1ULL<<16) /* Use the modules getkeys interface. */
-#define CMD_MODULE_NO_CLUSTER (1ULL<<17) /* Deny on Redis Cluster. */
+#define CMD_MODULE_GETKEYS (1ULL<<17) /* Use the modules getkeys interface. */
+#define CMD_MODULE_NO_CLUSTER (1ULL<<18) /* Deny on Redis Cluster. */
/* Command flags that describe ACLs categories. */
-#define CMD_CATEGORY_KEYSPACE (1ULL<<18)
-#define CMD_CATEGORY_READ (1ULL<<19)
-#define CMD_CATEGORY_WRITE (1ULL<<20)
-#define CMD_CATEGORY_SET (1ULL<<21)
-#define CMD_CATEGORY_SORTEDSET (1ULL<<22)
-#define CMD_CATEGORY_LIST (1ULL<<23)
-#define CMD_CATEGORY_HASH (1ULL<<24)
-#define CMD_CATEGORY_STRING (1ULL<<25)
-#define CMD_CATEGORY_BITMAP (1ULL<<26)
-#define CMD_CATEGORY_HYPERLOGLOG (1ULL<<27)
-#define CMD_CATEGORY_GEO (1ULL<<28)
-#define CMD_CATEGORY_STREAM (1ULL<<29)
-#define CMD_CATEGORY_PUBSUB (1ULL<<30)
-#define CMD_CATEGORY_ADMIN (1ULL<<31)
-#define CMD_CATEGORY_FAST (1ULL<<32)
-#define CMD_CATEGORY_SLOW (1ULL<<33)
-#define CMD_CATEGORY_BLOCKING (1ULL<<34)
-#define CMD_CATEGORY_DANGEROUS (1ULL<<35)
-#define CMD_CATEGORY_CONNECTION (1ULL<<36)
-#define CMD_CATEGORY_TRANSACTION (1ULL<<37)
-#define CMD_CATEGORY_SCRIPTING (1ULL<<38)
+#define CMD_CATEGORY_KEYSPACE (1ULL<<19)
+#define CMD_CATEGORY_READ (1ULL<<20)
+#define CMD_CATEGORY_WRITE (1ULL<<21)
+#define CMD_CATEGORY_SET (1ULL<<22)
+#define CMD_CATEGORY_SORTEDSET (1ULL<<23)
+#define CMD_CATEGORY_LIST (1ULL<<24)
+#define CMD_CATEGORY_HASH (1ULL<<25)
+#define CMD_CATEGORY_STRING (1ULL<<26)
+#define CMD_CATEGORY_BITMAP (1ULL<<27)
+#define CMD_CATEGORY_HYPERLOGLOG (1ULL<<28)
+#define CMD_CATEGORY_GEO (1ULL<<29)
+#define CMD_CATEGORY_STREAM (1ULL<<30)
+#define CMD_CATEGORY_PUBSUB (1ULL<<31)
+#define CMD_CATEGORY_ADMIN (1ULL<<32)
+#define CMD_CATEGORY_FAST (1ULL<<33)
+#define CMD_CATEGORY_SLOW (1ULL<<34)
+#define CMD_CATEGORY_BLOCKING (1ULL<<35)
+#define CMD_CATEGORY_DANGEROUS (1ULL<<36)
+#define CMD_CATEGORY_CONNECTION (1ULL<<37)
+#define CMD_CATEGORY_TRANSACTION (1ULL<<38)
+#define CMD_CATEGORY_SCRIPTING (1ULL<<39)
/* AOF states */
#define AOF_OFF 0 /* AOF is off */
@@ -250,10 +251,8 @@ extern int configOOMScoreAdjValuesDefaults[CONFIG_OOM_COUNT];
#define CLIENT_PENDING_READ (1<<29) /* The client has pending reads and was put
in the list of clients we can read
from. */
-#define CLIENT_PENDING_COMMAND (1<<30) /* Used in threaded I/O to signal after
- we return single threaded that the
- client has already pending commands
- to be executed. */
+#define CLIENT_PENDING_COMMAND (1<<30) /* Indicates the client has a fully
+ * parsed command ready for execution. */
#define CLIENT_TRACKING (1ULL<<31) /* Client enabled keys tracking in order to
perform client side caching. */
#define CLIENT_TRACKING_BROKEN_REDIR (1ULL<<32) /* Target client is invalid. */
@@ -280,7 +279,8 @@ extern int configOOMScoreAdjValuesDefaults[CONFIG_OOM_COUNT];
#define BLOCKED_MODULE 3 /* Blocked by a loadable module. */
#define BLOCKED_STREAM 4 /* XREAD. */
#define BLOCKED_ZSET 5 /* BZPOP et al. */
-#define BLOCKED_NUM 6 /* Number of blocked states. */
+#define BLOCKED_PAUSE 6 /* Blocked by CLIENT PAUSE */
+#define BLOCKED_NUM 7 /* Number of blocked states. */
/* Client request types */
#define PROTO_REQ_INLINE 1
@@ -299,24 +299,23 @@ extern int configOOMScoreAdjValuesDefaults[CONFIG_OOM_COUNT];
/* Slave replication state. Used in server.repl_state for slaves to remember
* what to do next. */
-#define REPL_STATE_NONE 0 /* No active replication */
-#define REPL_STATE_CONNECT 1 /* Must connect to master */
-#define REPL_STATE_CONNECTING 2 /* Connecting to master */
-/* --- Handshake states, must be ordered --- */
-#define REPL_STATE_RECEIVE_PONG 3 /* Wait for PING reply */
-#define REPL_STATE_SEND_AUTH 4 /* Send AUTH to master */
-#define REPL_STATE_RECEIVE_AUTH 5 /* Wait for AUTH reply */
-#define REPL_STATE_SEND_PORT 6 /* Send REPLCONF listening-port */
-#define REPL_STATE_RECEIVE_PORT 7 /* Wait for REPLCONF reply */
-#define REPL_STATE_SEND_IP 8 /* Send REPLCONF ip-address */
-#define REPL_STATE_RECEIVE_IP 9 /* Wait for REPLCONF reply */
-#define REPL_STATE_SEND_CAPA 10 /* Send REPLCONF capa */
-#define REPL_STATE_RECEIVE_CAPA 11 /* Wait for REPLCONF reply */
-#define REPL_STATE_SEND_PSYNC 12 /* Send PSYNC */
-#define REPL_STATE_RECEIVE_PSYNC 13 /* Wait for PSYNC reply */
-/* --- End of handshake states --- */
-#define REPL_STATE_TRANSFER 14 /* Receiving .rdb from master */
-#define REPL_STATE_CONNECTED 15 /* Connected to master */
+typedef enum {
+ REPL_STATE_NONE = 0, /* No active replication */
+ REPL_STATE_CONNECT, /* Must connect to master */
+ REPL_STATE_CONNECTING, /* Connecting to master */
+ /* --- Handshake states, must be ordered --- */
+ REPL_STATE_RECEIVE_PING_REPLY, /* Wait for PING reply */
+ REPL_STATE_SEND_HANDSHAKE, /* Send handshake sequance to master */
+ REPL_STATE_RECEIVE_AUTH_REPLY, /* Wait for AUTH reply */
+ REPL_STATE_RECEIVE_PORT_REPLY, /* Wait for REPLCONF reply */
+ REPL_STATE_RECEIVE_IP_REPLY, /* Wait for REPLCONF reply */
+ REPL_STATE_RECEIVE_CAPA_REPLY, /* Wait for REPLCONF reply */
+ REPL_STATE_SEND_PSYNC, /* Send PSYNC */
+ REPL_STATE_RECEIVE_PSYNC_REPLY, /* Wait for PSYNC reply */
+ /* --- End of handshake states --- */
+ REPL_STATE_TRANSFER, /* Receiving .rdb from master */
+ REPL_STATE_CONNECTED, /* Connected to master */
+} repl_state;
/* State of slaves from the POV of the master. Used in client->replstate.
* In SEND_BULK and ONLINE state the slave receives new updates
@@ -437,6 +436,14 @@ extern int configOOMScoreAdjValuesDefaults[CONFIG_OOM_COUNT];
#define PROPAGATE_AOF 1
#define PROPAGATE_REPL 2
+/* Client pause types, larger types are more restrictive
+ * pause types than smaller pause types. */
+typedef enum {
+ CLIENT_PAUSE_OFF = 0, /* Pause no commands */
+ CLIENT_PAUSE_WRITE, /* Pause write commands */
+ CLIENT_PAUSE_ALL /* Pause all commands */
+} pause_type;
+
/* RDB active child save type. */
#define RDB_CHILD_TYPE_NONE 0
#define RDB_CHILD_TYPE_DISK 1 /* RDB is written to disk. */
@@ -605,7 +612,7 @@ typedef struct RedisModuleIO {
iovar.ver = 0; \
iovar.key = keyptr; \
iovar.ctx = NULL; \
-} while(0);
+} while(0)
/* This is a structure used to export DEBUG DIGEST capabilities to Redis
* modules. We want to capture both the ordered and unordered elements of
@@ -621,7 +628,7 @@ typedef struct RedisModuleDigest {
#define moduleInitDigestContext(mdvar) do { \
memset(mdvar.o,0,sizeof(mdvar.o)); \
memset(mdvar.x,0,sizeof(mdvar.x)); \
-} while(0);
+} while(0)
/* Objects encoding. Some kind of objects like Strings and Hashes can be
* internally represented in multiple ways. The 'encoding' field of the object
@@ -894,6 +901,7 @@ typedef struct client {
sds peerid; /* Cached peer ID. */
sds sockname; /* Cached connection target address. */
listNode *client_list_node; /* list node in client list */
+ listNode *paused_list_node; /* list node within the pause list */
RedisModuleUserChangedFunc auth_callback; /* Module callback to execute
* when the authenticated user
* changes. */
@@ -1099,7 +1107,6 @@ struct clusterState;
#undef hz
#endif
-#define CHILD_INFO_MAGIC 0xC17DDA7A12345678LL
#define CHILD_TYPE_NONE 0
#define CHILD_TYPE_RDB 1
#define CHILD_TYPE_AOF 2
@@ -1123,6 +1130,7 @@ struct redisServer {
dict *commands; /* Command table */
dict *orig_commands; /* Command table before command renaming. */
aeEventLoop *el;
+ rax *errors; /* Errors table */
redisAtomic unsigned int lruclock; /* Clock for LRU eviction */
volatile sig_atomic_t shutdown_asap; /* SHUTDOWN needed ASAP */
int activerehashing; /* Incremental rehash in serverCron() */
@@ -1137,6 +1145,8 @@ struct redisServer {
int in_eval; /* Are we inside EVAL? */
int in_exec; /* Are we inside EXEC? */
int propagate_in_transaction; /* Make sure we don't propagate nested MULTI/EXEC */
+ char *ignore_warnings; /* Config: warnings that should be ignored. */
+ int client_pause_in_transaction; /* Was a client pause executed during this Exec? */
/* Modules */
dict *moduleapi; /* Exported core APIs dictionary for modules. */
dict *sharedapi; /* Like moduleapi but containing the APIs that
@@ -1145,7 +1155,8 @@ struct redisServer {
int module_blocked_pipe[2]; /* Pipe used to awake the event loop if a
client blocked on a module command needs
to be processed. */
- pid_t module_child_pid; /* PID of module child */
+ pid_t child_pid; /* PID of current child */
+ int child_type; /* Type of current child */
/* Networking */
int port; /* TCP listening port */
int tls_port; /* TLS listening port */
@@ -1170,8 +1181,9 @@ struct redisServer {
rax *clients_timeout_table; /* Radix tree for blocked clients timeouts. */
long fixed_time_expire; /* If > 0, expire keys against server.mstime. */
rax *clients_index; /* Active clients dictionary by client ID. */
- int clients_paused; /* True if clients are currently paused */
- mstime_t clients_pause_end_time; /* Time when we undo clients_paused */
+ pause_type client_pause_type; /* True if clients are currently paused */
+ list *paused_clients; /* List of pause clients */
+ mstime_t client_pause_end_time; /* Time when we undo clients_paused */
char neterr[ANET_ERR_LEN]; /* Error buffer for anet.c */
dict *migrate_cached_sockets;/* MIGRATE cached sockets */
redisAtomic uint64_t next_client_id; /* Next client unique ID. Incremental. */
@@ -1227,11 +1239,13 @@ struct redisServer {
struct malloc_stats cron_malloc_stats; /* sampled in serverCron(). */
redisAtomic long long stat_net_input_bytes; /* Bytes read from network. */
redisAtomic long long stat_net_output_bytes; /* Bytes written to network. */
+ size_t stat_current_cow_bytes; /* Copy on write bytes while child is active. */
size_t stat_rdb_cow_bytes; /* Copy on write bytes during RDB saving. */
size_t stat_aof_cow_bytes; /* Copy on write bytes during AOF rewrite. */
size_t stat_module_cow_bytes; /* Copy on write bytes during module fork. */
uint64_t stat_clients_type_memory[CLIENT_TYPE_COUNT];/* Mem usage by type */
long long stat_unexpected_error_replies; /* Number of unexpected (aof-loading, replica to master, etc.) error replies */
+ long long stat_total_error_replies; /* Total number of issued error replies ( command + rejected errors ) */
long long stat_dump_payload_sanitizations; /* Number deep dump payloads integrity validations. */
long long stat_io_reads_processed; /* Number of read events processed by IO / Main threads */
long long stat_io_writes_processed; /* Number of write events processed by IO / Main threads */
@@ -1280,7 +1294,6 @@ struct redisServer {
off_t aof_fsync_offset; /* AOF offset which is already synced to disk. */
int aof_flush_sleep; /* Micros to sleep before flush. (used by tests) */
int aof_rewrite_scheduled; /* Rewrite once BGSAVE terminates. */
- pid_t aof_child_pid; /* PID if rewriting process */
list *aof_rewrite_buf_blocks; /* Hold changes during an AOF rewrite. */
sds aof_buf; /* AOF buffer, written before entering the event loop */
int aof_fd; /* File descriptor of currently selected AOF file */
@@ -1310,7 +1323,6 @@ struct redisServer {
/* RDB persistence */
long long dirty; /* Changes to DB from the last save */
long long dirty_before_bgsave; /* Used to restore dirty on failed BGSAVE */
- pid_t rdb_child_pid; /* PID of RDB saving child */
struct saveparam *saveparams; /* Save points array for RDB */
int saveparamslen; /* Number of saving points */
char *rdb_filename; /* Name of RDB file */
@@ -1342,11 +1354,7 @@ struct redisServer {
* value means fractions of microsecons (on average). */
/* Pipe and data structures for child -> parent info sharing. */
int child_info_pipe[2]; /* Pipe used to write the child_info_data. */
- struct {
- int process_type; /* AOF or RDB child? */
- size_t cow_size; /* Copy on write size. */
- unsigned long long magic; /* Magic value to make sure data is valid. */
- } child_info_data;
+ int child_info_nread; /* Num of bytes of the last read from pipe */
/* Propagation of commands in AOF / replication */
redisOpArray also_propagate; /* Additional command to propagate. */
/* Logging */
@@ -1386,7 +1394,7 @@ struct redisServer {
int repl_diskless_sync_delay; /* Delay to start a diskless repl BGSAVE. */
/* Replication (slave) */
char *masteruser; /* AUTH with this user and masterauth with master */
- char *masterauth; /* AUTH with this password with master */
+ sds masterauth; /* AUTH with this password with master */
char *masterhost; /* Hostname of master */
int masterport; /* Port of master */
int repl_timeout; /* Timeout after N seconds of master idle */
@@ -1467,6 +1475,7 @@ struct redisServer {
int daylight_active; /* Currently in daylight saving time. */
mstime_t mstime; /* 'unixtime' in milliseconds. */
ustime_t ustime; /* 'unixtime' in microseconds. */
+ size_t blocking_op_nesting; /* Nesting level of blocking operation, used to reset blocked_last_cron. */
long long blocked_last_cron; /* Indicate the mstime of the last time we did cron jobs from a blocking operation */
/* Pubsub */
dict *pubsub_channels; /* Map channels to list of subscribed clients */
@@ -1580,7 +1589,7 @@ struct redisCommand {
int firstkey; /* The first argument that's a key (0 = no keys) */
int lastkey; /* The last argument that's a key */
int keystep; /* The step between first and last key */
- long long microseconds, calls;
+ long long microseconds, calls, rejected_calls, failed_calls;
int id; /* Command ID. This is a progressive ID starting from 0 that
is assigned at runtime, and is used in order to check
ACLs. A connection is able to execute a given command if
@@ -1588,6 +1597,10 @@ struct redisCommand {
bit set in the bitmap of allowed commands. */
};
+struct redisError {
+ long long count;
+};
+
struct redisFunctionSym {
char *name;
unsigned long pointer;
@@ -1753,6 +1766,7 @@ void addReplyBulkLongLong(client *c, long long ll);
void addReply(client *c, robj *obj);
void addReplySds(client *c, sds s);
void addReplyBulkSds(client *c, sds s);
+void setDeferredReplyBulkSds(client *c, void *node, sds s);
void addReplyErrorObject(client *c, robj *err);
void addReplyErrorSds(client *c, sds err);
void addReplyError(client *c, const char *err);
@@ -1791,8 +1805,10 @@ char *getClientTypeName(int class);
void flushSlavesOutputBuffers(void);
void disconnectSlaves(void);
int listenToPort(int port, int *fds, int *count);
-void pauseClients(mstime_t duration);
-int clientsArePaused(void);
+void pauseClients(mstime_t duration, pause_type type);
+void unpauseClients(void);
+int areClientsPaused(void);
+int checkClientPauseTimeoutAndReturnIfPaused(void);
void processEventsWhileBlocked(void);
void loadingCron(void);
void whileBlockedCron();
@@ -1826,12 +1842,14 @@ void enableTracking(client *c, uint64_t redirect_to, uint64_t options, robj **pr
void disableTracking(client *c);
void trackingRememberKeys(client *c);
void trackingInvalidateKey(client *c, robj *keyobj);
-void trackingInvalidateKeysOnFlush(int dbid);
+void trackingInvalidateKeysOnFlush(int async);
+void freeTrackingRadixTreeAsync(rax *rt);
void trackingLimitUsedSlots(void);
uint64_t trackingGetTotalItems(void);
uint64_t trackingGetTotalKeys(void);
uint64_t trackingGetTotalPrefixes(void);
void trackingBroadcastInvalidationMessages(void);
+int checkPrefixCollisionsOrReply(client *c, robj **prefix, size_t numprefix);
/* List data type */
void listTypeTryConversion(robj *subject, robj *value);
@@ -1849,7 +1867,7 @@ void listTypeConvert(robj *subject, int enc);
robj *listTypeDup(robj *o);
void unblockClientWaitingData(client *c);
void popGenericCommand(client *c, int where);
-void listElementsRemoved(client *c, robj *key, int where, robj *o);
+void listElementsRemoved(client *c, robj *key, int where, robj *o, long count);
/* MULTI/EXEC/WATCH... */
void unwatchAllKeys(client *c);
@@ -1857,12 +1875,12 @@ void initClientMultiState(client *c);
void freeClientMultiState(client *c);
void queueMultiCommand(client *c);
void touchWatchedKey(redisDb *db, robj *key);
-void touchWatchedKeysOnFlush(int dbid);
+void touchAllWatchedKeysInDb(redisDb *emptied, redisDb *replaced_with);
void discardTransaction(client *c);
void flagTransaction(client *c);
void execCommandAbort(client *c, sds error);
-void execCommandPropagateMulti(client *c);
-void execCommandPropagateExec(client *c);
+void execCommandPropagateMulti(int dbid);
+void execCommandPropagateExec(int dbid);
void beforePropagateMultiOrExec(int multi);
/* Redis object implementation */
@@ -1993,13 +2011,15 @@ void restartAOFAfterSYNC();
/* Child info */
void openChildInfoPipe(void);
void closeChildInfoPipe(void);
-void sendChildInfo(int process_type);
+void sendChildInfo(int process_type, int on_exit, size_t cow_size);
void receiveChildInfo(void);
/* Fork helpers */
int redisFork(int type);
int hasActiveChildProcess();
-void sendChildCOWInfo(int ptype, char *pname);
+void resetChildState();
+int isMutuallyExclusiveChildType(int type);
+void sendChildCOWInfo(int ptype, int on_exit, char *pname);
/* acl.c -- Authentication related prototypes. */
extern rax *Users;
@@ -2104,6 +2124,7 @@ int getMaxmemoryState(size_t *total, size_t *logical, size_t *tofree, float *lev
size_t freeMemoryGetNotCountedMemory();
int overMaxmemoryAfterAlloc(size_t moremem);
int processCommand(client *c);
+int processPendingCommandsAndResetClient(client *c);
void setupSignalHandlers(void);
void removeSignalHandlers(void);
struct redisCommand *lookupCommand(sds name);
@@ -2132,7 +2153,9 @@ void updateDictResizePolicy(void);
int htNeedsResize(dict *dict);
void populateCommandTable(void);
void resetCommandTableStats(void);
+void resetErrorTableStats(void);
void adjustOpenFilesLimit(void);
+void incrementErrorCount(const char *fullerr, size_t namelen);
void closeListeningSockets(int unlink_unix_socket);
void updateCachedTime(int update_daylight_info);
void resetServerStats(void);
@@ -2260,7 +2283,7 @@ void discardDbBackup(dbBackup *buckup, int flags, void(callback)(void*));
int selectDb(client *c, int id);
void signalModifiedKey(client *c, redisDb *db, robj *key);
-void signalFlushedDb(int dbid);
+void signalFlushedDb(int dbid, int async);
unsigned int getKeysInSlot(unsigned int hashslot, robj **keys, unsigned int count);
unsigned int countKeysInSlot(unsigned int hashslot);
unsigned int delKeysInSlot(unsigned int hashslot);
@@ -2507,6 +2530,7 @@ void zinterstoreCommand(client *c);
void zdiffstoreCommand(client *c);
void zunionCommand(client *c);
void zinterCommand(client *c);
+void zrangestoreCommand(client *c);
void zdiffCommand(client *c);
void zscanCommand(client *c);
void hkeysCommand(client *c);
@@ -2575,6 +2599,7 @@ void xsetidCommand(client *c);
void xackCommand(client *c);
void xpendingCommand(client *c);
void xclaimCommand(client *c);
+void xautoclaimCommand(client *c);
void xinfoCommand(client *c);
void xdelCommand(client *c);
void xtrimCommand(client *c);
diff --git a/src/slowlog.c b/src/slowlog.c
index 408456b14..b8c13f1cb 100644
--- a/src/slowlog.c
+++ b/src/slowlog.c
@@ -142,11 +142,15 @@ void slowlogReset(void) {
void slowlogCommand(client *c) {
if (c->argc == 2 && !strcasecmp(c->argv[1]->ptr,"help")) {
const char *help[] = {
-"GET [count] -- Return top entries from the slowlog (default: 10)."
-" Entries are made of:",
-" id, timestamp, time in microseconds, arguments array, client IP and port, client name",
-"LEN -- Return the length of the slowlog.",
-"RESET -- Reset the slowlog.",
+"GET [<count>]",
+" Return top <count> entries from the slowlog (default: 10). Entries are",
+" made of:",
+" id, timestamp, time in microseconds, arguments array, client IP and port,",
+" client name",
+"LEN",
+" Return the length of the slowlog.",
+"RESET",
+" Reset the slowlog.",
NULL
};
addReplyHelp(c, help);
diff --git a/src/sort.c b/src/sort.c
index aeef53e6a..3b67cc639 100644
--- a/src/sort.c
+++ b/src/sort.c
@@ -256,7 +256,7 @@ void sortCommand(client *c) {
getop++;
j++;
} else {
- addReply(c,shared.syntaxerr);
+ addReplyErrorObject(c,shared.syntaxerr);
syntax_error++;
break;
}
@@ -270,7 +270,7 @@ void sortCommand(client *c) {
}
/* Lookup the key to sort. It must be of the right types */
- if (storekey)
+ if (!storekey)
sortval = lookupKeyRead(c->db,c->argv[1]);
else
sortval = lookupKeyWrite(c->db,c->argv[1]);
@@ -279,7 +279,7 @@ void sortCommand(client *c) {
sortval->type != OBJ_ZSET)
{
listRelease(operations);
- addReply(c,shared.wrongtypeerr);
+ addReplyErrorObject(c,shared.wrongtypeerr);
return;
}
diff --git a/src/t_hash.c b/src/t_hash.c
index ff9ac742e..51c7d6758 100644
--- a/src/t_hash.c
+++ b/src/t_hash.c
@@ -644,7 +644,7 @@ void hsetCommand(client *c) {
}
signalModifiedKey(c,c->db,c->argv[1]);
notifyKeyspaceEvent(NOTIFY_HASH,"hset",c->argv[1],c->db->id);
- server.dirty++;
+ server.dirty += (c->argc - 2)/2;
}
void hincrbyCommand(client *c) {
diff --git a/src/t_list.c b/src/t_list.c
index 42b4f92df..f019a7ec0 100644
--- a/src/t_list.c
+++ b/src/t_list.c
@@ -206,7 +206,7 @@ robj *listTypeDup(robj *o) {
lobj->encoding = OBJ_ENCODING_QUICKLIST;
break;
default:
- serverPanic("Wrong encoding.");
+ serverPanic("Unknown list encoding");
break;
}
return lobj;
@@ -216,72 +216,58 @@ robj *listTypeDup(robj *o) {
* List Commands
*----------------------------------------------------------------------------*/
-void pushGenericCommand(client *c, int where) {
- int j, pushed = 0;
- robj *lobj = lookupKeyWrite(c->db,c->argv[1]);
+/* Implements LPUSH/RPUSH/LPUSHX/RPUSHX.
+ * 'xx': push if key exists. */
+void pushGenericCommand(client *c, int where, int xx) {
+ int j;
- if (checkType(c,lobj,OBJ_LIST)) {
- return;
+ robj *lobj = lookupKeyWrite(c->db, c->argv[1]);
+ if (checkType(c,lobj,OBJ_LIST)) return;
+ if (!lobj) {
+ if (xx) {
+ addReply(c, shared.czero);
+ return;
+ }
+
+ lobj = createQuicklistObject();
+ quicklistSetOptions(lobj->ptr, server.list_max_ziplist_size,
+ server.list_compress_depth);
+ dbAdd(c->db,c->argv[1],lobj);
}
for (j = 2; j < c->argc; j++) {
- if (!lobj) {
- lobj = createQuicklistObject();
- quicklistSetOptions(lobj->ptr, server.list_max_ziplist_size,
- server.list_compress_depth);
- dbAdd(c->db,c->argv[1],lobj);
- }
listTypePush(lobj,c->argv[j],where);
- pushed++;
+ server.dirty++;
}
- addReplyLongLong(c, (lobj ? listTypeLength(lobj) : 0));
- if (pushed) {
- char *event = (where == LIST_HEAD) ? "lpush" : "rpush";
- signalModifiedKey(c,c->db,c->argv[1]);
- notifyKeyspaceEvent(NOTIFY_LIST,event,c->argv[1],c->db->id);
- }
- server.dirty += pushed;
+ addReplyLongLong(c, listTypeLength(lobj));
+
+ char *event = (where == LIST_HEAD) ? "lpush" : "rpush";
+ signalModifiedKey(c,c->db,c->argv[1]);
+ notifyKeyspaceEvent(NOTIFY_LIST,event,c->argv[1],c->db->id);
}
+/* LPUSH <key> <element> [<element> ...] */
void lpushCommand(client *c) {
- pushGenericCommand(c,LIST_HEAD);
+ pushGenericCommand(c,LIST_HEAD,0);
}
+/* RPUSH <key> <element> [<element> ...] */
void rpushCommand(client *c) {
- pushGenericCommand(c,LIST_TAIL);
-}
-
-void pushxGenericCommand(client *c, int where) {
- int j, pushed = 0;
- robj *subject;
-
- if ((subject = lookupKeyWriteOrReply(c,c->argv[1],shared.czero)) == NULL ||
- checkType(c,subject,OBJ_LIST)) return;
-
- for (j = 2; j < c->argc; j++) {
- listTypePush(subject,c->argv[j],where);
- pushed++;
- }
-
- addReplyLongLong(c,listTypeLength(subject));
-
- if (pushed) {
- char *event = (where == LIST_HEAD) ? "lpush" : "rpush";
- signalModifiedKey(c,c->db,c->argv[1]);
- notifyKeyspaceEvent(NOTIFY_LIST,event,c->argv[1],c->db->id);
- }
- server.dirty += pushed;
+ pushGenericCommand(c,LIST_TAIL,0);
}
+/* LPUSHX <key> <element> [<element> ...] */
void lpushxCommand(client *c) {
- pushxGenericCommand(c,LIST_HEAD);
+ pushGenericCommand(c,LIST_HEAD,1);
}
+/* RPUSH <key> <element> [<element> ...] */
void rpushxCommand(client *c) {
- pushxGenericCommand(c,LIST_TAIL);
+ pushGenericCommand(c,LIST_TAIL,1);
}
+/* LINSERT <key> (BEFORE|AFTER) <pivot> <element> */
void linsertCommand(client *c) {
int where;
robj *subject;
@@ -294,7 +280,7 @@ void linsertCommand(client *c) {
} else if (strcasecmp(c->argv[2]->ptr,"before") == 0) {
where = LIST_HEAD;
} else {
- addReply(c,shared.syntaxerr);
+ addReplyErrorObject(c,shared.syntaxerr);
return;
}
@@ -326,12 +312,14 @@ void linsertCommand(client *c) {
addReplyLongLong(c,listTypeLength(subject));
}
+/* LLEN <key> */
void llenCommand(client *c) {
robj *o = lookupKeyReadOrReply(c,c->argv[1],shared.czero);
if (o == NULL || checkType(c,o,OBJ_LIST)) return;
addReplyLongLong(c,listTypeLength(o));
}
+/* LINDEX <key> <index> */
void lindexCommand(client *c) {
robj *o = lookupKeyReadOrReply(c,c->argv[1],shared.null[c->resp]);
if (o == NULL || checkType(c,o,OBJ_LIST)) return;
@@ -359,6 +347,7 @@ void lindexCommand(client *c) {
}
}
+/* LSET <key> <index> <element> */
void lsetCommand(client *c) {
robj *o = lookupKeyWriteOrReply(c,c->argv[1],shared.nokeyerr);
if (o == NULL || checkType(c,o,OBJ_LIST)) return;
@@ -373,7 +362,7 @@ void lsetCommand(client *c) {
int replaced = quicklistReplaceAtIndex(ql, index,
value->ptr, sdslen(value->ptr));
if (!replaced) {
- addReply(c,shared.outofrangeerr);
+ addReplyErrorObject(c,shared.outofrangeerr);
} else {
addReply(c,shared.ok);
signalModifiedKey(c,c->db,c->argv[1]);
@@ -385,7 +374,53 @@ void lsetCommand(client *c) {
}
}
-void listElementsRemoved(client *c, robj *key, int where, robj *o) {
+/* A helper for replying with a list's range between the inclusive start and end
+ * indexes as multi-bulk, with support for negative indexes. Note that start
+ * must be less than end or an empty array is returned. When the reverse
+ * argument is set to a non-zero value, the reply is reversed so that elements
+ * are returned from end to start. */
+void addListRangeReply(client *c, robj *o, long start, long end, int reverse) {
+ long rangelen, llen = listTypeLength(o);
+
+ /* Convert negative indexes. */
+ if (start < 0) start = llen+start;
+ if (end < 0) end = llen+end;
+ if (start < 0) start = 0;
+
+ /* Invariant: start >= 0, so this test will be true when end < 0.
+ * The range is empty when start > end or start >= length. */
+ if (start > end || start >= llen) {
+ addReply(c,shared.emptyarray);
+ return;
+ }
+ if (end >= llen) end = llen-1;
+ rangelen = (end-start)+1;
+
+ /* Return the result in form of a multi-bulk reply */
+ addReplyArrayLen(c,rangelen);
+ if (o->encoding == OBJ_ENCODING_QUICKLIST) {
+ int from = reverse ? end : start;
+ int direction = reverse ? LIST_HEAD : LIST_TAIL;
+ listTypeIterator *iter = listTypeInitIterator(o,from,direction);
+
+ while(rangelen--) {
+ listTypeEntry entry;
+ listTypeNext(iter, &entry);
+ quicklistEntry *qe = &entry.entry;
+ if (qe->value) {
+ addReplyBulkCBuffer(c,qe->value,qe->sz);
+ } else {
+ addReplyBulkLongLong(c,qe->longval);
+ }
+ }
+ listTypeReleaseIterator(iter);
+ } else {
+ serverPanic("Unknown list encoding");
+ }
+}
+
+/* A housekeeping helper for list elements popping tasks. */
+void listElementsRemoved(client *c, robj *key, int where, robj *o, long count) {
char *event = (where == LIST_HEAD) ? "lpop" : "rpop";
notifyKeyspaceEvent(NOTIFY_LIST, event, key, c->db->id);
@@ -394,78 +429,84 @@ void listElementsRemoved(client *c, robj *key, int where, robj *o) {
dbDelete(c->db, key);
}
signalModifiedKey(c, c->db, key);
- server.dirty++;
+ server.dirty += count;
}
+/* Implements the generic list pop operation for LPOP/RPOP.
+ * The where argument specifies which end of the list is operated on. An
+ * optional count may be provided as the third argument of the client's
+ * command. */
void popGenericCommand(client *c, int where) {
+ long count = 0;
+ robj *value;
+
+ if (c->argc > 3) {
+ addReplyErrorFormat(c,"wrong number of arguments for '%s' command",
+ c->cmd->name);
+ return;
+ } else if (c->argc == 3) {
+ /* Parse the optional count argument. */
+ if (getPositiveLongFromObjectOrReply(c,c->argv[2],&count,NULL) != C_OK)
+ return;
+ if (count == 0) {
+ /* Fast exit path. */
+ addReplyNullArray(c);
+ return;
+ }
+ }
+
robj *o = lookupKeyWriteOrReply(c, c->argv[1], shared.null[c->resp]);
if (o == NULL || checkType(c, o, OBJ_LIST))
return;
- robj *value = listTypePop(o, where);
- if (value == NULL) {
- addReplyNull(c);
- } else {
+ if (!count) {
+ /* Pop a single element. This is POP's original behavior that replies
+ * with a bulk string. */
+ value = listTypePop(o,where);
+ serverAssert(value != NULL);
addReplyBulk(c,value);
decrRefCount(value);
- listElementsRemoved(c,c->argv[1],where,o);
+ listElementsRemoved(c,c->argv[1],where,o,1);
+ } else {
+ /* Pop a range of elements. An addition to the original POP command,
+ * which replies with a multi-bulk. */
+ long llen = listTypeLength(o);
+ long rangelen = (count > llen) ? llen : count;
+ long rangestart = (where == LIST_HEAD) ? 0 : -rangelen;
+ long rangeend = (where == LIST_HEAD) ? rangelen - 1 : -1;
+ int reverse = (where == LIST_HEAD) ? 0 : 1;
+
+ addListRangeReply(c,o,rangestart,rangeend,reverse);
+ quicklistDelRange(o->ptr,rangestart,rangelen);
+ listElementsRemoved(c,c->argv[1],where,o,rangelen);
}
}
+/* LPOP <key> [count] */
void lpopCommand(client *c) {
popGenericCommand(c,LIST_HEAD);
}
+/* RPOP <key> [count] */
void rpopCommand(client *c) {
popGenericCommand(c,LIST_TAIL);
}
+/* LRANGE <key> <start> <stop> */
void lrangeCommand(client *c) {
robj *o;
- long start, end, llen, rangelen;
+ long start, end;
if ((getLongFromObjectOrReply(c, c->argv[2], &start, NULL) != C_OK) ||
(getLongFromObjectOrReply(c, c->argv[3], &end, NULL) != C_OK)) return;
if ((o = lookupKeyReadOrReply(c,c->argv[1],shared.emptyarray)) == NULL
|| checkType(c,o,OBJ_LIST)) return;
- llen = listTypeLength(o);
- /* convert negative indexes */
- if (start < 0) start = llen+start;
- if (end < 0) end = llen+end;
- if (start < 0) start = 0;
-
- /* Invariant: start >= 0, so this test will be true when end < 0.
- * The range is empty when start > end or start >= length. */
- if (start > end || start >= llen) {
- addReply(c,shared.emptyarray);
- return;
- }
- if (end >= llen) end = llen-1;
- rangelen = (end-start)+1;
-
- /* Return the result in form of a multi-bulk reply */
- addReplyArrayLen(c,rangelen);
- if (o->encoding == OBJ_ENCODING_QUICKLIST) {
- listTypeIterator *iter = listTypeInitIterator(o, start, LIST_TAIL);
-
- while(rangelen--) {
- listTypeEntry entry;
- listTypeNext(iter, &entry);
- quicklistEntry *qe = &entry.entry;
- if (qe->value) {
- addReplyBulkCBuffer(c,qe->value,qe->sz);
- } else {
- addReplyBulkLongLong(c,qe->longval);
- }
- }
- listTypeReleaseIterator(iter);
- } else {
- serverPanic("List encoding is not QUICKLIST!");
- }
+ addListRangeReply(c,o,start,end,0);
}
+/* LTRIM <key> <start> <stop> */
void ltrimCommand(client *c) {
robj *o;
long start, end, llen, ltrim, rtrim;
@@ -508,7 +549,7 @@ void ltrimCommand(client *c) {
notifyKeyspaceEvent(NOTIFY_GENERIC,"del",c->argv[1],c->db->id);
}
signalModifiedKey(c,c->db,c->argv[1]);
- server.dirty++;
+ server.dirty += (ltrim + rtrim);
addReply(c,shared.ok);
}
@@ -566,7 +607,7 @@ void lposCommand(client *c) {
return;
}
} else {
- addReply(c,shared.syntaxerr);
+ addReplyErrorObject(c,shared.syntaxerr);
return;
}
}
@@ -629,6 +670,7 @@ void lposCommand(client *c) {
}
}
+/* LREM <key> <count> <element> */
void lremCommand(client *c) {
robj *subject, *obj;
obj = c->argv[3];
@@ -698,7 +740,7 @@ int getListPositionFromObjectOrReply(client *c, robj *arg, int *position) {
} else if (strcasecmp(arg->ptr,"left") == 0) {
*position = LIST_HEAD;
} else {
- addReply(c,shared.syntaxerr);
+ addReplyErrorObject(c,shared.syntaxerr);
return C_ERR;
}
return C_OK;
@@ -729,10 +771,6 @@ void lmoveGenericCommand(client *c, int wherefrom, int whereto) {
if (checkType(c,dobj,OBJ_LIST)) return;
value = listTypePop(sobj,wherefrom);
serverAssert(value); /* assertion for valgrind (avoid NPD) */
- /* We saved touched key, and protect it, since lmoveHandlePush
- * may change the client command argument vector (it does not
- * currently). */
- incrRefCount(touchedkey);
lmoveHandlePush(c,c->argv[2],dobj,value,whereto);
/* listTypePop returns an object with its refcount incremented */
@@ -749,7 +787,6 @@ void lmoveGenericCommand(client *c, int wherefrom, int whereto) {
touchedkey,c->db->id);
}
signalModifiedKey(c,c->db,touchedkey);
- decrRefCount(touchedkey);
server.dirty++;
if (c->cmd->proc == blmoveCommand) {
rewriteClientCommandVector(c,5,shared.lmove,
@@ -761,6 +798,7 @@ void lmoveGenericCommand(client *c, int wherefrom, int whereto) {
}
}
+/* LMOVE <source> <destination> (LEFT|RIGHT) (LEFT|RIGHT) */
void lmoveCommand(client *c) {
int wherefrom, whereto;
if (getListPositionFromObjectOrReply(c,c->argv[3],&wherefrom)
@@ -893,7 +931,7 @@ void blockingPopGenericCommand(client *c, int where) {
addReplyBulk(c,c->argv[j]);
addReplyBulk(c,value);
decrRefCount(value);
- listElementsRemoved(c,c->argv[j],where,o);
+ listElementsRemoved(c,c->argv[j],where,o,1);
/* Replicate it as an [LR]POP instead of B[LR]POP. */
rewriteClientCommandVector(c,2,
@@ -917,10 +955,12 @@ void blockingPopGenericCommand(client *c, int where) {
blockForKeys(c,BLOCKED_LIST,c->argv + 1,c->argc - 2,timeout,NULL,&pos,NULL);
}
+/* BLPOP <key> [<key> ...] <timeout> */
void blpopCommand(client *c) {
blockingPopGenericCommand(c,LIST_HEAD);
}
+/* BLPOP <key> [<key> ...] <timeout> */
void brpopCommand(client *c) {
blockingPopGenericCommand(c,LIST_TAIL);
}
@@ -947,6 +987,7 @@ void blmoveGenericCommand(client *c, int wherefrom, int whereto, mstime_t timeou
}
}
+/* BLMOVE <source> <destination> (LEFT|RIGHT) (LEFT|RIGHT) <timeout> */
void blmoveCommand(client *c) {
mstime_t timeout;
int wherefrom, whereto;
@@ -959,6 +1000,7 @@ void blmoveCommand(client *c) {
blmoveGenericCommand(c,wherefrom,whereto,timeout);
}
+/* BRPOPLPUSH <source> <destination> <timeout> */
void brpoplpushCommand(client *c) {
mstime_t timeout;
if (getTimeoutFromObjectOrReply(c,c->argv[3],&timeout,UNIT_SECONDS)
diff --git a/src/t_set.c b/src/t_set.c
index 7c71dfc2f..64bbbd3a0 100644
--- a/src/t_set.c
+++ b/src/t_set.c
@@ -476,7 +476,7 @@ void spopWithCountCommand(client *c) {
/* Generate an SPOP keyspace notification */
notifyKeyspaceEvent(NOTIFY_SET,"spop",c->argv[1],c->db->id);
- server.dirty += count;
+ server.dirty += (count >= size) ? size : count;
/* CASE 1:
* The number of requested elements is greater than or equal to
@@ -492,7 +492,6 @@ void spopWithCountCommand(client *c) {
/* Propagate this command as a DEL operation */
rewriteClientCommandVector(c,2,shared.del,c->argv[1]);
signalModifiedKey(c,c->db,c->argv[1]);
- server.dirty++;
return;
}
@@ -594,7 +593,6 @@ void spopWithCountCommand(client *c) {
decrRefCount(propargv[0]);
preventCommandPropagation(c);
signalModifiedKey(c,c->db,c->argv[1]);
- server.dirty++;
}
void spopCommand(client *c) {
@@ -607,7 +605,7 @@ void spopCommand(client *c) {
spopWithCountCommand(c);
return;
} else if (c->argc > 3) {
- addReply(c,shared.syntaxerr);
+ addReplyErrorObject(c,shared.syntaxerr);
return;
}
@@ -803,7 +801,7 @@ void srandmemberCommand(client *c) {
srandmemberWithCountCommand(c);
return;
} else if (c->argc > 3) {
- addReply(c,shared.syntaxerr);
+ addReplyErrorObject(c,shared.syntaxerr);
return;
}
diff --git a/src/t_stream.c b/src/t_stream.c
index d61fb3eab..f991765eb 100644
--- a/src/t_stream.c
+++ b/src/t_stream.c
@@ -46,6 +46,8 @@
void streamFreeCG(streamCG *cg);
void streamFreeNACK(streamNACK *na);
size_t streamReplyWithRangeFromConsumerPEL(client *c, stream *s, streamID *start, streamID *end, size_t count, streamConsumer *consumer);
+int streamParseStrictIDOrReply(client *c, robj *o, streamID *id, uint64_t missing_seq);
+int streamParseIDOrReply(client *c, robj *o, streamID *id, uint64_t missing_seq);
/* -----------------------------------------------------------------------
* Low level stream encoding: a radix tree of listpacks.
@@ -282,6 +284,65 @@ static inline int64_t lpGetIntegerIfValid(unsigned char *ele, int *valid) {
#define lpGetInteger(ele) lpGetIntegerIfValid(ele, NULL)
+/* Get an edge streamID of a given listpack.
+ * 'master_id' is an input param, used to build the 'edge_id' output param */
+int lpGetEdgeStreamID(unsigned char *lp, int first, streamID *master_id, streamID *edge_id)
+{
+ if (lp == NULL)
+ return 0;
+
+ unsigned char *lp_ele;
+
+ /* We need to seek either the first or the last entry depending
+ * on the direction of the iteration. */
+ if (first) {
+ /* Get the master fields count. */
+ lp_ele = lpFirst(lp); /* Seek items count */
+ lp_ele = lpNext(lp, lp_ele); /* Seek deleted count. */
+ lp_ele = lpNext(lp, lp_ele); /* Seek num fields. */
+ int64_t master_fields_count = lpGetInteger(lp_ele);
+ lp_ele = lpNext(lp, lp_ele); /* Seek first field. */
+
+ /* If we are iterating in normal order, skip the master fields
+ * to seek the first actual entry. */
+ for (int64_t i = 0; i < master_fields_count; i++)
+ lp_ele = lpNext(lp, lp_ele);
+
+ /* If we are going forward, skip the previous entry's
+ * lp-count field (or in case of the master entry, the zero
+ * term field) */
+ lp_ele = lpNext(lp, lp_ele);
+ if (lp_ele == NULL)
+ return 0;
+ } else {
+ /* If we are iterating in reverse direction, just seek the
+ * last part of the last entry in the listpack (that is, the
+ * fields count). */
+ lp_ele = lpLast(lp);
+
+ /* If we are going backward, read the number of elements this
+ * entry is composed of, and jump backward N times to seek
+ * its start. */
+ int64_t lp_count = lpGetInteger(lp_ele);
+ if (lp_count == 0) /* We reached the master entry. */
+ return 0;
+
+ while (lp_count--)
+ lp_ele = lpPrev(lp, lp_ele);
+ }
+
+ lp_ele = lpNext(lp, lp_ele); /* Seek ID (lp_ele currently points to 'flags'). */
+
+ /* Get the ID: it is encoded as difference between the master
+ * ID and this entry ID. */
+ streamID id = *master_id;
+ id.ms += lpGetInteger(lp_ele);
+ lp_ele = lpNext(lp, lp_ele);
+ id.seq += lpGetInteger(lp_ele);
+ *edge_id = id;
+ return 1;
+}
+
/* Debugging function to log the full content of a listpack. Useful
* for development and debugging. */
void streamLogListpackContent(unsigned char *lp) {
@@ -325,6 +386,39 @@ int streamCompareID(streamID *a, streamID *b) {
return 0;
}
+void streamGetEdgeID(stream *s, int first, streamID *edge_id)
+{
+ raxIterator ri;
+ raxStart(&ri, s->rax);
+ int empty;
+ if (first) {
+ raxSeek(&ri, "^", NULL, 0);
+ empty = !raxNext(&ri);
+ } else {
+ raxSeek(&ri, "$", NULL, 0);
+ empty = !raxPrev(&ri);
+ }
+
+ if (empty) {
+ /* Stream is empty, mark edge ID as lowest/highest possible. */
+ edge_id->ms = first ? UINT64_MAX : 0;
+ edge_id->seq = first ? UINT64_MAX : 0;
+ raxStop(&ri);
+ return;
+ }
+
+ unsigned char *lp = ri.data;
+
+ /* Read the master ID from the radix tree key. */
+ streamID master_id;
+ streamDecodeID(ri.key, &master_id);
+
+ /* Construct edge ID. */
+ lpGetEdgeStreamID(lp, first, &master_id, edge_id);
+
+ raxStop(&ri);
+}
+
/* Adds a new item into the stream 's' having the specified number of
* field-value pairs as specified in 'numfields' and stored into 'argv'.
* Returns the new entry ID populating the 'added_id' structure.
@@ -525,35 +619,96 @@ int streamAppendItem(stream *s, robj **argv, int64_t numfields, streamID *added_
return C_OK;
}
-/* Trim the stream 's' to have no more than maxlen elements, and return the
+typedef struct {
+ /* XADD options */
+ streamID id; /* User-provided ID, for XADD only. */
+ int id_given; /* Was an ID different than "*" specified? for XADD only. */
+ int no_mkstream; /* if set to 1 do not create new stream */
+
+ /* XADD + XTRIM common options */
+ int trim_strategy; /* TRIM_STRATEGY_* */
+ int trim_strategy_arg_idx; /* Index of the count in MAXLEN/MINID, for rewriting. */
+ int approx_trim; /* If 1 only delete whole radix tree nodes, so
+ * the trim argument is not applied verbatim. */
+ long long limit; /* Maximum amount of entries to trim. If 0, no limitation
+ * on the amount of trimming work is enforced. */
+ /* TRIM_STRATEGY_MAXLEN options */
+ long long maxlen; /* After trimming, leave stream at this length . */
+ /* TRIM_STRATEGY_MINID options */
+ streamID minid; /* Trim by ID (No stream entries with ID < 'minid' will remain) */
+} streamAddTrimArgs;
+
+#define TRIM_STRATEGY_NONE 0
+#define TRIM_STRATEGY_MAXLEN 1
+#define TRIM_STRATEGY_MINID 2
+
+/* Trim the stream 's' according to args->trim_strategy, and return the
* number of elements removed from the stream. The 'approx' option, if non-zero,
* specifies that the trimming must be performed in a approximated way in
* order to maximize performances. This means that the stream may contain
- * more elements than 'maxlen', and elements are only removed if we can remove
+ * entries with IDs < 'id' in case of MINID (or more elements than 'maxlen'
+ * in case of MAXLEN), and elements are only removed if we can remove
* a *whole* node of the radix tree. The elements are removed from the head
* of the stream (older elements).
*
* The function may return zero if:
*
- * 1) The stream is already shorter or equal to the specified max length.
- * 2) The 'approx' option is true and the head node had not enough elements
- * to be deleted, leaving the stream with a number of elements >= maxlen.
+ * 1) The minimal entry ID of the stream is already < 'id' (MINID); or
+ * 2) The stream is already shorter or equal to the specified max length (MAXLEN); or
+ * 3) The 'approx' option is true and the head node did not have enough elements
+ * to be deleted.
+ *
+ * args->limit is the maximum number of entries to delete. The purpose is to
+ * prevent this function from taking to long.
+ * If 'limit' is 0 then we do not limit the number of deleted entries.
+ * Much like the 'approx', if 'limit' is smaller than the number of entries
+ * that should be trimmed, there is a chance we will still have entries with
+ * IDs < 'id' (or number of elements >= maxlen in case of MAXLEN).
*/
-int64_t streamTrimByLength(stream *s, size_t maxlen, int approx) {
- if (s->length <= maxlen) return 0;
+int64_t streamTrim(stream *s, streamAddTrimArgs *args) {
+ size_t maxlen = args->maxlen;
+ streamID *id = &args->minid;
+ int approx = args->approx_trim;
+ int64_t limit = args->limit;
+ int trim_strategy = args->trim_strategy;
+
+ if (trim_strategy == TRIM_STRATEGY_NONE)
+ return 0;
raxIterator ri;
raxStart(&ri,s->rax);
raxSeek(&ri,"^",NULL,0);
int64_t deleted = 0;
- while(s->length > maxlen && raxNext(&ri)) {
+ while (raxNext(&ri)) {
+ /* Check if we exceeded the amount of work we could do */
+ if (limit && deleted >= limit)
+ break;
+
+ if (trim_strategy == TRIM_STRATEGY_MAXLEN && s->length <= maxlen)
+ break;
+
unsigned char *lp = ri.data, *p = lpFirst(lp);
int64_t entries = lpGetInteger(p);
- /* Check if we can remove the whole node, and still have at
- * least maxlen elements. */
- if (s->length - entries >= maxlen) {
+ /* Check if we can remove the whole node. */
+ int remove_node;
+ streamID master_id = {0}; /* For MINID */
+ if (trim_strategy == TRIM_STRATEGY_MAXLEN) {
+ remove_node = s->length - entries >= maxlen;
+ } else {
+ /* Read the master ID from the radix tree key. */
+ streamDecodeID(ri.key, &master_id);
+
+ /* Read last ID. */
+ streamID last_id;
+ lpGetEdgeStreamID(lp, 0, &master_id, &last_id);
+
+ /* We can remove the entire node id its last ID < 'id' */
+ remove_node = streamCompareID(&last_id, id) < 0;
+ }
+
+ if (remove_node) {
lpFree(lp);
raxRemove(s->rax,ri.key,ri.key_len,NULL);
raxSeek(&ri,">=",ri.key,ri.key_len);
@@ -566,19 +721,15 @@ int64_t streamTrimByLength(stream *s, size_t maxlen, int approx) {
* stop here. */
if (approx) break;
- /* Otherwise, we have to mark single entries inside the listpack
- * as deleted. We start by updating the entries/deleted counters. */
- int64_t to_delete = s->length - maxlen;
- serverAssert(to_delete < entries);
- lp = lpReplaceInteger(lp,&p,entries-to_delete);
- p = lpNext(lp,p); /* Seek deleted field. */
- int64_t marked_deleted = lpGetInteger(p);
- lp = lpReplaceInteger(lp,&p,marked_deleted+to_delete);
- p = lpNext(lp,p); /* Seek num-of-fields in the master entry. */
+ /* Now we have to trim entries from within 'lp' */
+ int64_t deleted_from_lp = 0;
+
+ p = lpNext(lp, p); /* Skip deleted field. */
+ p = lpNext(lp, p); /* Skip num-of-fields in the master entry. */
/* Skip all the master fields. */
int64_t master_fields_count = lpGetInteger(p);
- p = lpNext(lp,p); /* Seek the first field. */
+ p = lpNext(lp,p); /* Skip the first field. */
for (int64_t j = 0; j < master_fields_count; j++)
p = lpNext(lp,p); /* Skip all master fields. */
p = lpNext(lp,p); /* Skip the zero master entry terminator. */
@@ -586,37 +737,72 @@ int64_t streamTrimByLength(stream *s, size_t maxlen, int approx) {
/* 'p' is now pointing to the first entry inside the listpack.
* We have to run entry after entry, marking entries as deleted
* if they are already not deleted. */
- while(p) {
+ while (p) {
+ /* We keep a copy of p (which point to flags part) in order to
+ * update it after (and if) we actually remove the entry */
+ unsigned char *pcopy = p;
+
int flags = lpGetInteger(p);
+ p = lpNext(lp, p); /* Skip flags. */
int to_skip;
- /* Mark the entry as deleted. */
- if (!(flags & STREAM_ITEM_FLAG_DELETED)) {
- flags |= STREAM_ITEM_FLAG_DELETED;
- lp = lpReplaceInteger(lp,&p,flags);
- deleted++;
- s->length--;
- if (s->length <= maxlen) break; /* Enough entries deleted. */
+ int ms_delta = lpGetInteger(p);
+ p = lpNext(lp, p); /* Skip ID ms delta */
+ int seq_delta = lpGetInteger(p);
+ p = lpNext(lp, p); /* Skip ID seq delta */
+
+ streamID currid = {0}; /* For MINID */
+ if (trim_strategy == TRIM_STRATEGY_MINID) {
+ currid.ms = master_id.ms + ms_delta;
+ currid.seq = master_id.seq + seq_delta;
}
- p = lpNext(lp,p); /* Skip ID ms delta. */
- p = lpNext(lp,p); /* Skip ID seq delta. */
- p = lpNext(lp,p); /* Seek num-fields or values (if compressed). */
+ int stop;
+ if (trim_strategy == TRIM_STRATEGY_MAXLEN) {
+ stop = s->length <= maxlen;
+ } else {
+ /* Following IDs will definitely be greater because the rax
+ * tree is sorted, no point of continuing. */
+ stop = streamCompareID(&currid, id) >= 0;
+ }
+ if (stop)
+ break;
+
if (flags & STREAM_ITEM_FLAG_SAMEFIELDS) {
to_skip = master_fields_count;
} else {
- to_skip = lpGetInteger(p);
- to_skip = 1+(to_skip*2);
+ to_skip = lpGetInteger(p); /* Get num-fields. */
+ p = lpNext(lp,p); /* Skip num-fields. */
+ to_skip *= 2; /* Fields and values. */
}
while(to_skip--) p = lpNext(lp,p); /* Skip the whole entry. */
p = lpNext(lp,p); /* Skip the final lp-count field. */
+
+ /* Mark the entry as deleted. */
+ if (!(flags & STREAM_ITEM_FLAG_DELETED)) {
+ intptr_t delta = p - lp;
+ flags |= STREAM_ITEM_FLAG_DELETED;
+ lp = lpReplaceInteger(lp, &pcopy, flags);
+ deleted_from_lp++;
+ s->length--;
+ p = lp + delta;
+ }
}
+ deleted += deleted_from_lp;
+
+ /* Now we the entries/deleted counters. */
+ p = lpFirst(lp);
+ lp = lpReplaceInteger(lp,&p,entries-deleted_from_lp);
+ p = lpNext(lp,p); /* Skip deleted field. */
+ int64_t marked_deleted = lpGetInteger(p);
+ lp = lpReplaceInteger(lp,&p,marked_deleted+deleted_from_lp);
+ p = lpNext(lp,p); /* Skip num-of-fields in the master entry. */
/* Here we should perform garbage collection in case at this point
* there are too many entries deleted inside the listpack. */
- entries -= to_delete;
- marked_deleted += to_delete;
+ entries -= deleted_from_lp;
+ marked_deleted += deleted_from_lp;
if (entries + marked_deleted > 10 && marked_deleted > entries/2) {
/* TODO: perform a garbage collection. */
}
@@ -632,6 +818,142 @@ int64_t streamTrimByLength(stream *s, size_t maxlen, int approx) {
return deleted;
}
+/* Parse the arguements of XADD/XTRIM.
+ *
+ * See streamAddTrimArgs for more details about the arguments handled.
+ *
+ * This function returns the position of the ID argument (relevant only to XADD).
+ * On error -1 is returned and a reply is sent. */
+static int streamParseAddOrTrimArgsOrReply(client *c, streamAddTrimArgs *args, int xadd) {
+ /* Initialize arguments to defaults */
+ memset(args, 0, sizeof(*args));
+
+ /* Parse options. */
+ int i = 2; /* This is the first argument position where we could
+ find an option, or the ID. */
+ int limit_given = 0;
+ for (; i < c->argc; i++) {
+ int moreargs = (c->argc-1) - i; /* Number of additional arguments. */
+ char *opt = c->argv[i]->ptr;
+ if (xadd && opt[0] == '*' && opt[1] == '\0') {
+ /* This is just a fast path for the common case of auto-ID
+ * creation. */
+ break;
+ } else if (!strcasecmp(opt,"maxlen") && moreargs) {
+ if (args->trim_strategy != TRIM_STRATEGY_NONE) {
+ addReplyError(c,"syntax error, MAXLEN and MINID options at the same time are not compatible");
+ return -1;
+ }
+ args->approx_trim = 0;
+ char *next = c->argv[i+1]->ptr;
+ /* Check for the form MAXLEN ~ <count>. */
+ if (moreargs >= 2 && next[0] == '~' && next[1] == '\0') {
+ args->approx_trim = 1;
+ i++;
+ } else if (moreargs >= 2 && next[0] == '=' && next[1] == '\0') {
+ i++;
+ }
+ if (getLongLongFromObjectOrReply(c,c->argv[i+1],&args->maxlen,NULL)
+ != C_OK) return -1;
+
+ if (args->maxlen < 0) {
+ addReplyError(c,"The MAXLEN argument must be >= 0.");
+ return -1;
+ }
+ i++;
+ args->trim_strategy = TRIM_STRATEGY_MAXLEN;
+ args->trim_strategy_arg_idx = i;
+ } else if (!strcasecmp(opt,"minid") && moreargs) {
+ if (args->trim_strategy != TRIM_STRATEGY_NONE) {
+ addReplyError(c,"syntax error, MAXLEN and MINID options at the same time are not compatible");
+ return -1;
+ }
+ args->approx_trim = 0;
+ char *next = c->argv[i+1]->ptr;
+ /* Check for the form MINID ~ <id>|<age>. */
+ if (moreargs >= 2 && next[0] == '~' && next[1] == '\0') {
+ args->approx_trim = 1;
+ i++;
+ } else if (moreargs >= 2 && next[0] == '=' && next[1] == '\0') {
+ i++;
+ }
+
+ if (streamParseStrictIDOrReply(c,c->argv[i+1],&args->minid,0) != C_OK)
+ return -1;
+
+ i++;
+ args->trim_strategy = TRIM_STRATEGY_MINID;
+ args->trim_strategy_arg_idx = i;
+ } else if (!strcasecmp(opt,"limit") && moreargs) {
+ /* Note about LIMIT: If it was not provided by the caller we set
+ * it to 100*server.stream_node_max_entries, and that's to prevent the
+ * trimming from taking too long, on the expense of not deleting entries
+ * that should be trimmed.
+ * If user wanted exact trimming (i.e. no '~') we never limit the number
+ * of trimmed entries */
+ if (getLongLongFromObjectOrReply(c,c->argv[i+1],&args->limit,NULL) != C_OK)
+ return -1;
+
+ if (args->limit < 0) {
+ addReplyError(c,"The LIMIT argument must be >= 0.");
+ return -1;
+ }
+ limit_given = 1;
+ i++;
+ } else if (xadd && !strcasecmp(opt,"nomkstream")) {
+ args->no_mkstream = 1;
+ } else if (xadd) {
+ /* If we are here is a syntax error or a valid ID. */
+ if (streamParseStrictIDOrReply(c,c->argv[i],&args->id,0) != C_OK)
+ return -1;
+ args->id_given = 1;
+ break;
+ } else {
+ addReplyErrorObject(c,shared.syntaxerr);
+ return -1;
+ }
+ }
+
+ if (args->limit && args->trim_strategy == TRIM_STRATEGY_NONE) {
+ addReplyError(c,"syntax error, LIMIT cannot be used without specifying a trimming strategy");
+ return -1;
+ }
+
+ if (!xadd && args->trim_strategy == TRIM_STRATEGY_NONE) {
+ addReplyError(c,"syntax error, XTRIM must be called with a trimming strategy");
+ return -1;
+ }
+
+ if (c == server.master || c->id == CLIENT_ID_AOF) {
+ /* If command cam from master or from AOF we must not enforce maxnodes
+ * (The maxlen/minid argument was re-written to make sure there's no
+ * inconsistency). */
+ args->limit = 0;
+ } else {
+ /* We need to set the limit (only if we got '~') */
+ if (limit_given) {
+ if (!args->approx_trim) {
+ /* LIMIT was provided without ~ */
+ addReplyError(c,"syntax error, LIMIT cannot be used without the special ~ option");
+ return -1;
+ }
+ } else {
+ /* User didn't provide LIMIT, we must set it. */
+
+ if (args->approx_trim) {
+ /* In order to prevent from trimming to do too much work and cause
+ * latency spikes we limit the amount of work it can do */
+ args->limit = 100 * server.stream_node_max_entries; /* Maximum 100 rax nodes. */
+ } else {
+ /* No LIMIT for exact trimming */
+ args->limit = 0;
+ }
+ }
+ }
+
+ return i;
+}
+
/* Initialize the stream iterator, so that we can call iterating functions
* to get the next items. This requires a corresponding streamIteratorStop()
* at the end. The 'rev' parameter controls the direction. If it's zero the
@@ -960,6 +1282,11 @@ void addReplyStreamID(client *c, streamID *id) {
addReplyBulkSds(c,replyid);
}
+void setDeferredReplyStreamID(client *c, void *dr, streamID *id) {
+ sds replyid = sdscatfmt(sdsempty(),"%U-%U",id->ms,id->seq);
+ setDeferredReplyBulkSds(c, dr, replyid);
+}
+
/* Similar to the above function, but just creates an object, usually useful
* for replication purposes to create arguments. */
robj *createObjectFromStreamID(streamID *id) {
@@ -1370,68 +1697,36 @@ int streamParseIntervalIDOrReply(client *c, robj *o, streamID *id, int *exclude,
return C_OK;
}
-/* We propagate MAXLEN ~ <count> as MAXLEN = <resulting-len-of-stream>
- * otherwise trimming is no longer determinsitic on replicas / AOF. */
-void streamRewriteApproxMaxlen(client *c, stream *s, int maxlen_arg_idx) {
- robj *maxlen_obj = createStringObjectFromLongLong(s->length);
+void streamRewriteApproxSpecifier(client *c, int idx) {
robj *equal_obj = createStringObject("=",1);
+ rewriteClientCommandArgument(c,idx,equal_obj);
+ decrRefCount(equal_obj);
+}
- rewriteClientCommandArgument(c,maxlen_arg_idx,maxlen_obj);
- rewriteClientCommandArgument(c,maxlen_arg_idx-1,equal_obj);
+/* We propagate MAXLEN/MINID ~ <count> as MAXLEN/MINID = <resulting-len-of-stream>
+ * otherwise trimming is no longer deterministic on replicas / AOF. */
+void streamRewriteTrimArgument(client *c, stream *s, int trim_strategy, int idx) {
+ robj *arg;
+ if (trim_strategy == TRIM_STRATEGY_MAXLEN) {
+ arg = createStringObjectFromLongLong(s->length);
+ } else {
+ streamID first_id;
+ streamGetEdgeID(s, 1, &first_id);
+ arg = createObjectFromStreamID(&first_id);
+ }
- decrRefCount(equal_obj);
- decrRefCount(maxlen_obj);
+ rewriteClientCommandArgument(c,idx,arg);
+ decrRefCount(arg);
}
-/* XADD key [MAXLEN [~|=] <count>] [NOMKSTREAM] <ID or *> [field value] [field value] ... */
+/* XADD key [(MAXLEN [~|=] <count> | MINID [~|=] <id>) [LIMIT <entries>]] [NOMKSTREAM] <ID or *> [field value] [field value] ... */
void xaddCommand(client *c) {
- streamID id;
- int id_given = 0; /* Was an ID different than "*" specified? */
- long long maxlen = -1; /* If left to -1 no trimming is performed. */
- int approx_maxlen = 0; /* If 1 only delete whole radix tree nodes, so
- the maximum length is not applied verbatim. */
- int maxlen_arg_idx = 0; /* Index of the count in MAXLEN, for rewriting. */
- int no_mkstream = 0; /* if set to 1 do not create new stream */
-
/* Parse options. */
- int i = 2; /* This is the first argument position where we could
- find an option, or the ID. */
- for (; i < c->argc; i++) {
- int moreargs = (c->argc-1) - i; /* Number of additional arguments. */
- char *opt = c->argv[i]->ptr;
- if (opt[0] == '*' && opt[1] == '\0') {
- /* This is just a fast path for the common case of auto-ID
- * creation. */
- break;
- } else if (!strcasecmp(opt,"maxlen") && moreargs) {
- approx_maxlen = 0;
- char *next = c->argv[i+1]->ptr;
- /* Check for the form MAXLEN ~ <count>. */
- if (moreargs >= 2 && next[0] == '~' && next[1] == '\0') {
- approx_maxlen = 1;
- i++;
- } else if (moreargs >= 2 && next[0] == '=' && next[1] == '\0') {
- i++;
- }
- if (getLongLongFromObjectOrReply(c,c->argv[i+1],&maxlen,NULL)
- != C_OK) return;
-
- if (maxlen < 0) {
- addReplyError(c,"The MAXLEN argument must be >= 0.");
- return;
- }
- i++;
- maxlen_arg_idx = i;
- } else if (!strcasecmp(opt,"nomkstream")) {
- no_mkstream = 1;
- } else {
- /* If we are here is a syntax error or a valid ID. */
- if (streamParseStrictIDOrReply(c,c->argv[i],&id,0) != C_OK) return;
- id_given = 1;
- break;
- }
- }
- int field_pos = i+1;
+ streamAddTrimArgs parsed_args;
+ int idpos = streamParseAddOrTrimArgsOrReply(c, &parsed_args, 1);
+ if (idpos < 0)
+ return; /* streamParseAddOrTrimArgsOrReply already replied. */
+ int field_pos = idpos+1; /* The ID is always one argument before the first field */
/* Check arity. */
if ((c->argc - field_pos) < 2 || ((c->argc-field_pos) % 2) == 1) {
@@ -1442,7 +1737,9 @@ void xaddCommand(client *c) {
/* Return ASAP if minimal ID (0-0) was given so we avoid possibly creating
* a new stream and have streamAppendItem fail, leaving an empty key in the
* database. */
- if (id_given && id.ms == 0 && id.seq == 0) {
+ if (parsed_args.id_given &&
+ parsed_args.id.ms == 0 && parsed_args.id.seq == 0)
+ {
addReplyError(c,"The ID specified in XADD must be greater than 0-0");
return;
}
@@ -1450,7 +1747,7 @@ void xaddCommand(client *c) {
/* Lookup the stream at key. */
robj *o;
stream *s;
- if ((o = streamTypeLookupWriteOrCreate(c,c->argv[1],no_mkstream)) == NULL) return;
+ if ((o = streamTypeLookupWriteOrCreate(c,c->argv[1],parsed_args.no_mkstream)) == NULL) return;
s = o->ptr;
/* Return ASAP if the stream has reached the last possible ID */
@@ -1461,8 +1758,9 @@ void xaddCommand(client *c) {
}
/* Append using the low level function and return the ID. */
+ streamID id;
if (streamAppendItem(s,c->argv+field_pos,(c->argc-field_pos)/2,
- &id, id_given ? &id : NULL)
+ &id, parsed_args.id_given ? &parsed_args.id : NULL)
== C_ERR)
{
addReplyError(c,"The ID specified in XADD is equal or smaller than the "
@@ -1475,18 +1773,26 @@ void xaddCommand(client *c) {
notifyKeyspaceEvent(NOTIFY_STREAM,"xadd",c->argv[1],c->db->id);
server.dirty++;
- if (maxlen >= 0) {
- /* Notify xtrim event if needed. */
- if (streamTrimByLength(s,maxlen,approx_maxlen)) {
+ /* Trim if needed. */
+ if (parsed_args.trim_strategy != TRIM_STRATEGY_NONE) {
+ if (streamTrim(s, &parsed_args)) {
notifyKeyspaceEvent(NOTIFY_STREAM,"xtrim",c->argv[1],c->db->id);
}
- if (approx_maxlen) streamRewriteApproxMaxlen(c,s,maxlen_arg_idx);
+ if (parsed_args.approx_trim) {
+ /* In case our trimming was limited (by LIMIT or by ~) we must
+ * re-write the relevant trim argument to make sure there will be
+ * no inconsistencies in AOF loading or in the replica.
+ * It's enough to check only args->approx because there is no
+ * way LIMIT is given without the ~ option. */
+ streamRewriteApproxSpecifier(c,parsed_args.trim_strategy_arg_idx-1);
+ streamRewriteTrimArgument(c,s,parsed_args.trim_strategy,parsed_args.trim_strategy_arg_idx);
+ }
}
/* Let's rewrite the ID argument with the one actually generated for
* AOF/replication propagation. */
robj *idarg = createObjectFromStreamID(&id);
- rewriteClientCommandArgument(c,i,idarg);
+ rewriteClientCommandArgument(c,idpos,idarg);
decrRefCount(idarg);
/* We need to signal to blocked clients that there is new data on this
@@ -1534,7 +1840,7 @@ void xrangeGenericCommand(client *c, int rev) {
if (count < 0) count = 0;
j++; /* Consume additional arg. */
} else {
- addReply(c,shared.syntaxerr);
+ addReplyErrorObject(c,shared.syntaxerr);
return;
}
}
@@ -1643,14 +1949,14 @@ void xreadCommand(client *c) {
}
noack = 1;
} else {
- addReply(c,shared.syntaxerr);
+ addReplyErrorObject(c,shared.syntaxerr);
return;
}
}
/* STREAMS option is mandatory. */
if (streams_arg == 0) {
- addReply(c,shared.syntaxerr);
+ addReplyErrorObject(c,shared.syntaxerr);
return;
}
@@ -1981,19 +2287,9 @@ uint64_t streamDelConsumer(streamCG *cg, sds name) {
/* XGROUP CREATE <key> <groupname> <id or $> [MKSTREAM]
* XGROUP SETID <key> <groupname> <id or $>
* XGROUP DESTROY <key> <groupname>
- * CREATECONSUMER <key> <groupname> <consumer>
+ * XGROUP CREATECONSUMER <key> <groupname> <consumer>
* XGROUP DELCONSUMER <key> <groupname> <consumername> */
void xgroupCommand(client *c) {
- const char *help[] = {
-"CREATE <key> <groupname> <id or $> [opt] -- Create a new consumer group.",
-" option MKSTREAM: create the empty stream if it does not exist.",
-"SETID <key> <groupname> <id or $> -- Set the current group ID.",
-"DESTROY <key> <groupname> -- Remove the specified group.",
-"CREATECONSUMER <key> <groupname> <consumer> -- Create new consumer in the specified group.",
-"DELCONSUMER <key> <groupname> <consumer> -- Remove the specified consumer.",
-"HELP -- Prints this help.",
-NULL
- };
stream *s = NULL;
sds grpname = NULL;
streamCG *cg = NULL;
@@ -2047,7 +2343,24 @@ NULL
}
/* Dispatch the different subcommands. */
- if (!strcasecmp(opt,"CREATE") && (c->argc == 5 || c->argc == 6)) {
+ if (c->argc == 2 && !strcasecmp(opt,"HELP")) {
+ const char *help[] = {
+"CREATE <key> <groupname> <id|$> [option]",
+" Create a new consumer group. Options are:",
+" * MKSTREAM",
+" Create the empty stream if it does not exist.",
+"CREATECONSUMER <key> <groupname> <consumer>",
+" Create a new consumer in the specified group.",
+"DELCONSUMER <key> <groupname> <consumer>",
+" Remove the specified consumer.",
+"DESTROY <key> <groupname>"
+" Remove the specified group.",
+"SETID <key> <groupname> <id|$>",
+" Set the current group ID.",
+NULL
+ };
+ addReplyHelp(c, help);
+ } else if (!strcasecmp(opt,"CREATE") && (c->argc == 5 || c->argc == 6)) {
streamID id;
if (!strcmp(c->argv[4]->ptr,"$")) {
if (s) {
@@ -2076,8 +2389,7 @@ NULL
notifyKeyspaceEvent(NOTIFY_STREAM,"xgroup-create",
c->argv[2],c->db->id);
} else {
- addReplySds(c,
- sdsnew("-BUSYGROUP Consumer Group name already exists\r\n"));
+ addReplyError(c,"-BUSYGROUP Consumer Group name already exists");
}
} else if (!strcasecmp(opt,"SETID") && c->argc == 5) {
streamID id;
@@ -2120,8 +2432,6 @@ NULL
server.dirty++;
notifyKeyspaceEvent(NOTIFY_STREAM,"xgroup-delconsumer",
c->argv[2],c->db->id);
- } else if (c->argc == 2 && !strcasecmp(opt,"HELP")) {
- addReplyHelp(c, help);
} else {
addReplySubcommandSyntaxError(c);
}
@@ -2237,7 +2547,7 @@ void xpendingCommand(client *c) {
/* Start and stop, and the consumer, can be omitted. Also the IDLE modifier. */
if (c->argc != 3 && (c->argc < 6 || c->argc > 9)) {
- addReply(c,shared.syntaxerr);
+ addReplyErrorObject(c,shared.syntaxerr);
return;
}
@@ -2251,7 +2561,7 @@ void xpendingCommand(client *c) {
return;
if (c->argc < 8) {
/* If IDLE was provided we must have at least 'start end count' */
- addReply(c,shared.syntaxerr);
+ addReplyErrorObject(c,shared.syntaxerr);
return;
}
/* Search for rest of arguments after 'IDLE <idle>' */
@@ -2662,6 +2972,160 @@ cleanup:
if (ids != static_ids) zfree(ids);
}
+/* XAUTOCLAIM <key> <group> <consumer> <min-idle-time> <start> [COUNT <count>] [JUSTID]
+ *
+ * Gets ownership of one or multiple messages in the Pending Entries List
+ * of a given stream consumer group.
+ *
+ * For each PEL entry, if its idle time greater or equal to <min-idle-time>,
+ * then the message new owner becomes the specified <consumer>.
+ * If the minimum idle time specified is zero, messages are claimed
+ * regardless of their idle time.
+ *
+ * This command creates the consumer as side effect if it does not yet
+ * exists. Moreover the command reset the idle time of the message to 0.
+ *
+ * The command returns an array of messages that the user
+ * successfully claimed, so that the caller is able to understand
+ * what messages it is now in charge of. */
+void xautoclaimCommand(client *c) {
+ streamCG *group = NULL;
+ robj *o = lookupKeyRead(c->db,c->argv[1]);
+ long long minidle; /* Minimum idle time argument, in milliseconds. */
+ long count = 100; /* Maximum entries to claim. */
+ streamID startid;
+ int startex;
+ int justid = 0;
+
+ /* Parse idle/start/end/count arguments ASAP if needed, in order to report
+ * syntax errors before any other error. */
+ if (getLongLongFromObjectOrReply(c,c->argv[4],&minidle,"Invalid min-idle-time argument for XAUTOCLAIM") != C_OK)
+ return;
+ if (minidle < 0) minidle = 0;
+
+ if (streamParseIntervalIDOrReply(c,c->argv[5],&startid,&startex,0) != C_OK)
+ return;
+ if (startex && streamIncrID(&startid) != C_OK) {
+ addReplyError(c,"invalid start ID for the interval");
+ return;
+ }
+
+ int j = 6; /* options start at argv[6] */
+ while(j < c->argc) {
+ int moreargs = (c->argc-1) - j; /* Number of additional arguments. */
+ char *opt = c->argv[j]->ptr;
+ if (!strcasecmp(opt,"COUNT") && moreargs) {
+ if (getPositiveLongFromObjectOrReply(c,c->argv[j+1],&count,NULL) != C_OK)
+ return;
+ if (count == 0) {
+ addReplyError(c,"COUNT must be > 0");
+ return;
+ }
+ j++;
+ } else if (!strcasecmp(opt,"JUSTID")) {
+ justid = 1;
+ } else {
+ addReplyErrorObject(c,shared.syntaxerr);
+ return;
+ }
+ j++;
+ }
+
+ if (o) {
+ if (checkType(c,o,OBJ_STREAM))
+ return; /* Type error. */
+ group = streamLookupCG(o->ptr,c->argv[2]->ptr);
+ }
+
+ /* No key or group? Send an error given that the group creation
+ * is mandatory. */
+ if (o == NULL || group == NULL) {
+ addReplyErrorFormat(c,"-NOGROUP No such key '%s' or consumer group '%s'",
+ (char*)c->argv[1]->ptr,
+ (char*)c->argv[2]->ptr);
+ return;
+ }
+
+ /* Do the actual claiming. */
+ streamConsumer *consumer = NULL;
+ long long attempts = count*10;
+
+ addReplyArrayLen(c, 2);
+ void *endidptr = addReplyDeferredLen(c);
+ void *arraylenptr = addReplyDeferredLen(c);
+
+ unsigned char startkey[sizeof(streamID)];
+ streamEncodeID(startkey,&startid);
+ raxIterator ri;
+ raxStart(&ri,group->pel);
+ raxSeek(&ri,">=",startkey,sizeof(startkey));
+ size_t arraylen = 0;
+ mstime_t now = mstime();
+ while (attempts-- && count && raxNext(&ri)) {
+ streamNACK *nack = ri.data;
+
+ if (minidle) {
+ mstime_t this_idle = now - nack->delivery_time;
+ if (this_idle < minidle)
+ continue;
+ }
+
+ streamID id;
+ streamDecodeID(ri.key, &id);
+
+ if (consumer == NULL)
+ consumer = streamLookupConsumer(group,c->argv[3]->ptr,SLC_NONE,NULL);
+ if (nack->consumer != consumer) {
+ /* Remove the entry from the old consumer.
+ * Note that nack->consumer is NULL if we created the
+ * NACK above because of the FORCE option. */
+ if (nack->consumer)
+ raxRemove(nack->consumer->pel,ri.key,ri.key_len,NULL);
+ }
+
+ /* Update the consumer and idle time. */
+ nack->delivery_time = now;
+ nack->delivery_count++;
+
+ if (nack->consumer != consumer) {
+ /* Add the entry in the new consumer local PEL. */
+ raxInsert(consumer->pel,ri.key,ri.key_len,nack,NULL);
+ nack->consumer = consumer;
+ }
+
+ /* Send the reply for this entry. */
+ if (justid) {
+ addReplyStreamID(c,&id);
+ } else {
+ size_t emitted =
+ streamReplyWithRange(c,o->ptr,&id,&id,1,0,NULL,NULL,
+ STREAM_RWR_RAWENTRIES,NULL);
+ if (!emitted)
+ addReplyNull(c);
+ }
+ arraylen++;
+ count--;
+
+ /* Propagate this change. */
+ robj *idstr = createObjectFromStreamID(&id);
+ streamPropagateXCLAIM(c,c->argv[1],group,c->argv[2],idstr,nack);
+ decrRefCount(idstr);
+ server.dirty++;
+ }
+
+ streamID endid;
+ if (raxEOF(&ri)) {
+ endid.ms = endid.seq = 0;
+ } else {
+ streamDecodeID(ri.key, &endid);
+ }
+ raxStop(&ri);
+
+ setDeferredArrayLen(c,arraylenptr,arraylen);
+ setDeferredReplyStreamID(c,endidptr,&endid);
+
+ preventCommandPropagation(c);
+}
/* XDEL <key> [<ID1> <ID2> ... <IDN>]
*
@@ -2708,14 +3172,25 @@ cleanup:
*
* List of options:
*
+ * Trim strategies:
+ *
* MAXLEN [~|=] <count> -- Trim so that the stream will be capped at
* the specified length. Use ~ before the
* count in order to demand approximated trimming
* (like XADD MAXLEN option).
+ * MINID [~|=] <id> -- Trim so that the stream will not contain entries
+ * with IDs smaller than 'id'. Use ~ before the
+ * count in order to demand approximated trimming
+ * (like XADD MINID option).
+ *
+ * Other options:
+ *
+ * LIMIT <entries> -- The maximum number of entries to trim.
+ * 0 means unlimited. Unless specified, it is set
+ * to a default of 100*server.stream_node_max_entries,
+ * and that's in order to keep the trimming time sane.
+ * Has meaning only if `~` was provided.
*/
-
-#define TRIM_STRATEGY_NONE 0
-#define TRIM_STRATEGY_MAXLEN 1
void xtrimCommand(client *c) {
robj *o;
@@ -2726,58 +3201,27 @@ void xtrimCommand(client *c) {
stream *s = o->ptr;
/* Argument parsing. */
- int trim_strategy = TRIM_STRATEGY_NONE;
- long long maxlen = -1; /* If left to -1 no trimming is performed. */
- int approx_maxlen = 0; /* If 1 only delete whole radix tree nodes, so
- the maxium length is not applied verbatim. */
- int maxlen_arg_idx = 0; /* Index of the count in MAXLEN, for rewriting. */
-
- /* Parse options. */
- int i = 2; /* Start of options. */
- for (; i < c->argc; i++) {
- int moreargs = (c->argc-1) - i; /* Number of additional arguments. */
- char *opt = c->argv[i]->ptr;
- if (!strcasecmp(opt,"maxlen") && moreargs) {
- approx_maxlen = 0;
- trim_strategy = TRIM_STRATEGY_MAXLEN;
- char *next = c->argv[i+1]->ptr;
- /* Check for the form MAXLEN ~ <count>. */
- if (moreargs >= 2 && next[0] == '~' && next[1] == '\0') {
- approx_maxlen = 1;
- i++;
- } else if (moreargs >= 2 && next[0] == '=' && next[1] == '\0') {
- i++;
- }
- if (getLongLongFromObjectOrReply(c,c->argv[i+1],&maxlen,NULL)
- != C_OK) return;
-
- if (maxlen < 0) {
- addReplyError(c,"The MAXLEN argument must be >= 0.");
- return;
- }
- i++;
- maxlen_arg_idx = i;
- } else {
- addReply(c,shared.syntaxerr);
- return;
- }
- }
+ streamAddTrimArgs parsed_args;
+ if (streamParseAddOrTrimArgsOrReply(c, &parsed_args, 1) < 0)
+ return; /* streamParseAddOrTrimArgsOrReply already replied. */
/* Perform the trimming. */
- int64_t deleted = 0;
- if (trim_strategy == TRIM_STRATEGY_MAXLEN) {
- deleted = streamTrimByLength(s,maxlen,approx_maxlen);
- } else {
- addReplyError(c,"XTRIM called without an option to trim the stream");
- return;
- }
-
- /* Propagate the write if needed. */
+ int64_t deleted = streamTrim(s, &parsed_args);
if (deleted) {
- signalModifiedKey(c,c->db,c->argv[1]);
notifyKeyspaceEvent(NOTIFY_STREAM,"xtrim",c->argv[1],c->db->id);
+ if (parsed_args.approx_trim) {
+ /* In case our trimming was limited (by LIMIT or by ~) we must
+ * re-write the relevant trim argument to make sure there will be
+ * no inconsistencies in AOF loading or in the replica.
+ * It's enough to check only args->approx because there is no
+ * way LIMIT is given without the ~ option. */
+ streamRewriteApproxSpecifier(c,parsed_args.trim_strategy_arg_idx-1);
+ streamRewriteTrimArgument(c,s,parsed_args.trim_strategy,parsed_args.trim_strategy_arg_idx);
+ }
+
+ /* Propagate the write. */
+ signalModifiedKey(c, c->db,c->argv[1]);
server.dirty += deleted;
- if (approx_maxlen) streamRewriteApproxMaxlen(c,s,maxlen_arg_idx);
}
addReplyLongLong(c,deleted);
}
@@ -2972,27 +3416,25 @@ void xinfoReplyWithStreamInfo(client *c, stream *s) {
* XINFO STREAM <key> [FULL [COUNT <count>]]
* XINFO HELP. */
void xinfoCommand(client *c) {
- const char *help[] = {
-"CONSUMERS <key> <groupname> -- Show consumer groups of group <groupname>.",
-"GROUPS <key> -- Show the stream consumer groups.",
-"STREAM <key> [FULL [COUNT <count>]] -- Show information about the stream.",
-" FULL will return the full state of the stream,",
-" including all entries, groups, consumers and PELs.",
-" It's possible to show only the first stream/PEL entries",
-" by using the COUNT modifier (Default is 10)",
-"HELP -- Print this help.",
-NULL
- };
stream *s = NULL;
char *opt;
robj *key;
/* HELP is special. Handle it ASAP. */
if (!strcasecmp(c->argv[1]->ptr,"HELP")) {
+ const char *help[] = {
+"CONSUMERS <key> <groupname>",
+" Show consumers of <groupname>.",
+"GROUPS <key>",
+" Show the stream consumer groups.",
+"STREAM <key> [FULL [COUNT <count>]",
+" Show information about the stream.",
+NULL
+ };
addReplyHelp(c, help);
return;
} else if (c->argc < 3) {
- addReplyError(c,"syntax error, try 'XINFO HELP'");
+ addReplySubcommandSyntaxError(c);
return;
}
diff --git a/src/t_string.c b/src/t_string.c
index 3ecc473bd..2792f5557 100644
--- a/src/t_string.c
+++ b/src/t_string.c
@@ -153,7 +153,7 @@ void setCommand(client *c) {
expire = next;
j++;
} else {
- addReply(c,shared.syntaxerr);
+ addReplyErrorObject(c,shared.syntaxerr);
return;
}
}
@@ -528,7 +528,7 @@ void stralgoCommand(client *c) {
if (!strcasecmp(c->argv[1]->ptr,"lcs")) {
stralgoLCS(c);
} else {
- addReply(c,shared.syntaxerr);
+ addReplyErrorObject(c,shared.syntaxerr);
}
}
@@ -589,7 +589,7 @@ void stralgoLCS(client *c) {
b = objb->ptr;
j += 2;
} else {
- addReply(c,shared.syntaxerr);
+ addReplyErrorObject(c,shared.syntaxerr);
goto cleanup;
}
}
diff --git a/src/t_zset.c b/src/t_zset.c
index a9564828a..3d63c41c6 100644
--- a/src/t_zset.c
+++ b/src/t_zset.c
@@ -1701,7 +1701,7 @@ void zaddGenericCommand(client *c, int flags) {
* we expect any number of score-element pairs. */
elements = c->argc-scoreidx;
if (elements % 2 || !elements) {
- addReply(c,shared.syntaxerr);
+ addReplyErrorObject(c,shared.syntaxerr);
return;
}
elements /= 2; /* Now this holds the number of score-element pairs. */
@@ -1822,11 +1822,15 @@ void zremCommand(client *c) {
addReplyLongLong(c,deleted);
}
+typedef enum {
+ ZRANGE_AUTO = 0,
+ ZRANGE_RANK,
+ ZRANGE_SCORE,
+ ZRANGE_LEX,
+} zrange_type;
+
/* Implements ZREMRANGEBYRANK, ZREMRANGEBYSCORE, ZREMRANGEBYLEX commands. */
-#define ZRANGE_RANK 0
-#define ZRANGE_SCORE 1
-#define ZRANGE_LEX 2
-void zremrangeGenericCommand(client *c, int rangetype) {
+void zremrangeGenericCommand(client *c, zrange_type rangetype) {
robj *key = c->argv[1];
robj *zobj;
int keyremoved = 0;
@@ -1834,22 +1838,28 @@ void zremrangeGenericCommand(client *c, int rangetype) {
zrangespec range;
zlexrangespec lexrange;
long start, end, llen;
+ char *notify_type = NULL;
/* Step 1: Parse the range. */
if (rangetype == ZRANGE_RANK) {
+ notify_type = "zremrangebyrank";
if ((getLongFromObjectOrReply(c,c->argv[2],&start,NULL) != C_OK) ||
(getLongFromObjectOrReply(c,c->argv[3],&end,NULL) != C_OK))
return;
} else if (rangetype == ZRANGE_SCORE) {
+ notify_type = "zremrangebyscore";
if (zslParseRange(c->argv[2],c->argv[3],&range) != C_OK) {
addReplyError(c,"min or max is not a float");
return;
}
} else if (rangetype == ZRANGE_LEX) {
+ notify_type = "zremrangebylex";
if (zslParseLexRange(c->argv[2],c->argv[3],&lexrange) != C_OK) {
addReplyError(c,"min or max not valid string range item");
return;
}
+ } else {
+ serverPanic("unknown rangetype %d", (int)rangetype);
}
/* Step 2: Lookup & range sanity checks if needed. */
@@ -1875,6 +1885,7 @@ void zremrangeGenericCommand(client *c, int rangetype) {
/* Step 3: Perform the range deletion operation. */
if (zobj->encoding == OBJ_ENCODING_ZIPLIST) {
switch(rangetype) {
+ case ZRANGE_AUTO:
case ZRANGE_RANK:
zobj->ptr = zzlDeleteRangeByRank(zobj->ptr,start+1,end+1,&deleted);
break;
@@ -1892,6 +1903,7 @@ void zremrangeGenericCommand(client *c, int rangetype) {
} else if (zobj->encoding == OBJ_ENCODING_SKIPLIST) {
zset *zs = zobj->ptr;
switch(rangetype) {
+ case ZRANGE_AUTO:
case ZRANGE_RANK:
deleted = zslDeleteRangeByRank(zs->zsl,start+1,end+1,zs->dict);
break;
@@ -1913,9 +1925,8 @@ void zremrangeGenericCommand(client *c, int rangetype) {
/* Step 4: Notifications and reply. */
if (deleted) {
- char *event[3] = {"zremrangebyrank","zremrangebyscore","zremrangebylex"};
signalModifiedKey(c,c->db,key);
- notifyKeyspaceEvent(NOTIFY_ZSET,event[rangetype],key,c->db->id);
+ notifyKeyspaceEvent(NOTIFY_ZSET,notify_type,key,c->db->id);
if (keyremoved)
notifyKeyspaceEvent(NOTIFY_GENERIC,"del",key,c->db->id);
}
@@ -2525,7 +2536,7 @@ void zunionInterDiffGenericCommand(client *c, robj *dstkey, int numkeysIndex, in
/* test if the expected number of keys would overflow */
if (setnum > (c->argc-(numkeysIndex+1))) {
- addReply(c,shared.syntaxerr);
+ addReplyErrorObject(c,shared.syntaxerr);
return;
}
@@ -2536,7 +2547,7 @@ void zunionInterDiffGenericCommand(client *c, robj *dstkey, int numkeysIndex, in
if (obj != NULL) {
if (obj->type != OBJ_ZSET && obj->type != OBJ_SET) {
zfree(src);
- addReply(c,shared.wrongtypeerr);
+ addReplyErrorObject(c,shared.wrongtypeerr);
return;
}
@@ -2582,7 +2593,7 @@ void zunionInterDiffGenericCommand(client *c, robj *dstkey, int numkeysIndex, in
aggregate = REDIS_AGGR_MAX;
} else {
zfree(src);
- addReply(c,shared.syntaxerr);
+ addReplyErrorObject(c,shared.syntaxerr);
return;
}
j++; remaining--;
@@ -2594,7 +2605,7 @@ void zunionInterDiffGenericCommand(client *c, robj *dstkey, int numkeysIndex, in
withscores = 1;
} else {
zfree(src);
- addReply(c,shared.syntaxerr);
+ addReplyErrorObject(c,shared.syntaxerr);
return;
}
}
@@ -2778,27 +2789,180 @@ void zdiffCommand(client *c) {
zunionInterDiffGenericCommand(c, NULL, 1, SET_OP_DIFF);
}
-void zrangeGenericCommand(client *c, int reverse) {
- robj *key = c->argv[1];
- robj *zobj;
- int withscores = 0;
- long start;
- long end;
- long llen;
- long rangelen;
+typedef enum {
+ ZRANGE_DIRECTION_AUTO = 0,
+ ZRANGE_DIRECTION_FORWARD,
+ ZRANGE_DIRECTION_REVERSE
+} zrange_direction;
+
+typedef enum {
+ ZRANGE_CONSUMER_TYPE_CLIENT = 0,
+ ZRANGE_CONSUMER_TYPE_INTERNAL
+} zrange_consumer_type;
+
+typedef struct zrange_result_handler zrange_result_handler;
+
+typedef void (*zrangeResultBeginFunction)(zrange_result_handler *c);
+typedef void (*zrangeResultFinalizeFunction)(
+ zrange_result_handler *c, size_t result_count);
+typedef void (*zrangeResultEmitCBufferFunction)(
+ zrange_result_handler *c, const void *p, size_t len, double score);
+typedef void (*zrangeResultEmitLongLongFunction)(
+ zrange_result_handler *c, long long ll, double score);
+
+void zrangeGenericCommand (zrange_result_handler *handler, int argc_start, int store,
+ zrange_type rangetype, zrange_direction direction);
+
+/* Interface struct for ZRANGE/ZRANGESTORE generic implementation.
+ * There is one implementation of this interface that sends a RESP reply to clients.
+ * and one implementation that stores the range result into a zset object. */
+struct zrange_result_handler {
+ zrange_consumer_type type;
+ client *client;
+ robj *dstkey;
+ robj *dstobj;
+ void *userdata;
+ int withscores;
+ int should_emit_array_length;
+ zrangeResultBeginFunction beginResultEmission;
+ zrangeResultFinalizeFunction finalizeResultEmission;
+ zrangeResultEmitCBufferFunction emitResultFromCBuffer;
+ zrangeResultEmitLongLongFunction emitResultFromLongLong;
+};
- if ((getLongFromObjectOrReply(c, c->argv[2], &start, NULL) != C_OK) ||
- (getLongFromObjectOrReply(c, c->argv[3], &end, NULL) != C_OK)) return;
+/* Result handler methods for responding the ZRANGE to clients. */
+static void zrangeResultBeginClient(zrange_result_handler *handler) {
+ handler->userdata = addReplyDeferredLen(handler->client);
+}
- if (c->argc == 5 && !strcasecmp(c->argv[4]->ptr,"withscores")) {
- withscores = 1;
- } else if (c->argc >= 5) {
- addReply(c,shared.syntaxerr);
- return;
+static void zrangeResultEmitCBufferToClient(zrange_result_handler *handler,
+ const void *value, size_t value_length_in_bytes, double score)
+{
+ if (handler->should_emit_array_length) {
+ addReplyArrayLen(handler->client, 2);
}
- if ((zobj = lookupKeyReadOrReply(c,key,shared.emptyarray)) == NULL
- || checkType(c,zobj,OBJ_ZSET)) return;
+ addReplyBulkCBuffer(handler->client, value, value_length_in_bytes);
+
+ if (handler->withscores) {
+ addReplyDouble(handler->client, score);
+ }
+}
+
+static void zrangeResultEmitLongLongToClient(zrange_result_handler *handler,
+ long long value, double score)
+{
+ if (handler->should_emit_array_length) {
+ addReplyArrayLen(handler->client, 2);
+ }
+
+ addReplyBulkLongLong(handler->client, value);
+
+ if (handler->withscores) {
+ addReplyDouble(handler->client, score);
+ }
+}
+
+static void zrangeResultFinalizeClient(zrange_result_handler *handler,
+ size_t result_count)
+{
+ if (handler->withscores && (handler->client->resp == 2)) {
+ result_count *= 2;
+ }
+
+ setDeferredArrayLen(handler->client, handler->userdata, result_count);
+}
+
+/* Result handler methods for storing the ZRANGESTORE to a zset. */
+static void zrangeResultBeginStore(zrange_result_handler *handler)
+{
+ handler->dstobj = createZsetZiplistObject();
+}
+
+static void zrangeResultEmitCBufferForStore(zrange_result_handler *handler,
+ const void *value, size_t value_length_in_bytes, double score)
+{
+ double newscore;
+ int retflags = 0;
+ sds ele = sdsnewlen(value, value_length_in_bytes);
+ int retval = zsetAdd(handler->dstobj, score, ele, &retflags, &newscore);
+ sdsfree(ele);
+ serverAssert(retval);
+}
+
+static void zrangeResultEmitLongLongForStore(zrange_result_handler *handler,
+ long long value, double score)
+{
+ double newscore;
+ int retflags = 0;
+ sds ele = sdsfromlonglong(value);
+ int retval = zsetAdd(handler->dstobj, score, ele, &retflags, &newscore);
+ sdsfree(ele);
+ serverAssert(retval);
+}
+
+static void zrangeResultFinalizeStore(zrange_result_handler *handler, size_t result_count)
+{
+ if (result_count) {
+ setKey(handler->client, handler->client->db, handler->dstkey, handler->dstobj);
+ addReplyLongLong(handler->client, result_count);
+ notifyKeyspaceEvent(NOTIFY_ZSET, "zrangestore", handler->dstkey, handler->client->db->id);
+ server.dirty++;
+ } else {
+ addReply(handler->client, shared.czero);
+ if (dbDelete(handler->client->db, handler->dstkey)) {
+ signalModifiedKey(handler->client, handler->client->db, handler->dstkey);
+ notifyKeyspaceEvent(NOTIFY_GENERIC, "del", handler->dstkey, handler->client->db->id);
+ server.dirty++;
+ }
+ }
+ decrRefCount(handler->dstobj);
+}
+
+/* Initialize the consumer interface type with the requested type. */
+static void zrangeResultHandlerInit(zrange_result_handler *handler,
+ client *client, zrange_consumer_type type)
+{
+ memset(handler, 0, sizeof(*handler));
+
+ handler->client = client;
+
+ switch (type) {
+ case ZRANGE_CONSUMER_TYPE_CLIENT:
+ handler->beginResultEmission = zrangeResultBeginClient;
+ handler->finalizeResultEmission = zrangeResultFinalizeClient;
+ handler->emitResultFromCBuffer = zrangeResultEmitCBufferToClient;
+ handler->emitResultFromLongLong = zrangeResultEmitLongLongToClient;
+ break;
+
+ case ZRANGE_CONSUMER_TYPE_INTERNAL:
+ handler->beginResultEmission = zrangeResultBeginStore;
+ handler->finalizeResultEmission = zrangeResultFinalizeStore;
+ handler->emitResultFromCBuffer = zrangeResultEmitCBufferForStore;
+ handler->emitResultFromLongLong = zrangeResultEmitLongLongForStore;
+ break;
+ }
+}
+
+static void zrangeResultHandlerScoreEmissionEnable(zrange_result_handler *handler) {
+ handler->withscores = 1;
+ handler->should_emit_array_length = (handler->client->resp > 2);
+}
+
+static void zrangeResultHandlerDestinationKeySet (zrange_result_handler *handler,
+ robj *dstkey)
+{
+ handler->dstkey = dstkey;
+}
+
+/* This command implements ZRANGE, ZREVRANGE. */
+void genericZrangebyrankCommand(zrange_result_handler *handler,
+ robj *zobj, long start, long end, int withscores, int reverse) {
+
+ client *c = handler->client;
+ long llen;
+ long rangelen;
+ size_t result_cardinality;
/* Sanitize indexes. */
llen = zsetLength(zobj);
@@ -2806,22 +2970,17 @@ void zrangeGenericCommand(client *c, int reverse) {
if (end < 0) end = llen+end;
if (start < 0) start = 0;
+ handler->beginResultEmission(handler);
+
/* Invariant: start >= 0, so this test will be true when end < 0.
* The range is empty when start > end or start >= length. */
if (start > end || start >= llen) {
- addReply(c,shared.emptyarray);
+ handler->finalizeResultEmission(handler, 0);
return;
}
if (end >= llen) end = llen-1;
rangelen = (end-start)+1;
-
- /* Return the result in form of a multi-bulk reply. RESP3 clients
- * will receive sub arrays with score->element, while RESP2 returned
- * a flat array. */
- if (withscores && c->resp == 2)
- addReplyArrayLen(c, rangelen*2);
- else
- addReplyArrayLen(c, rangelen);
+ result_cardinality = rangelen;
if (zobj->encoding == OBJ_ENCODING_ZIPLIST) {
unsigned char *zl = zobj->ptr;
@@ -2829,6 +2988,7 @@ void zrangeGenericCommand(client *c, int reverse) {
unsigned char *vstr;
unsigned int vlen;
long long vlong;
+ double score = 0.0;
if (reverse)
eptr = ziplistIndex(zl,-2-(2*start));
@@ -2842,12 +3002,14 @@ void zrangeGenericCommand(client *c, int reverse) {
serverAssertWithInfo(c,zobj,eptr != NULL && sptr != NULL);
serverAssertWithInfo(c,zobj,ziplistGet(eptr,&vstr,&vlen,&vlong));
- if (withscores && c->resp > 2) addReplyArrayLen(c,2);
- if (vstr == NULL)
- addReplyBulkLongLong(c,vlong);
- else
- addReplyBulkCBuffer(c,vstr,vlen);
- if (withscores) addReplyDouble(c,zzlGetScore(sptr));
+ if (withscores) /* don't bother to extract the score if it's gonna be ignored. */
+ score = zzlGetScore(sptr);
+
+ if (vstr == NULL) {
+ handler->emitResultFromLongLong(handler, vlong, score);
+ } else {
+ handler->emitResultFromCBuffer(handler, vstr, vlen, score);
+ }
if (reverse)
zzlPrev(zl,&eptr,&sptr);
@@ -2859,7 +3021,6 @@ void zrangeGenericCommand(client *c, int reverse) {
zset *zs = zobj->ptr;
zskiplist *zsl = zs->zsl;
zskiplistNode *ln;
- sds ele;
/* Check if starting point is trivial, before doing log(N) lookup. */
if (reverse) {
@@ -2874,83 +3035,53 @@ void zrangeGenericCommand(client *c, int reverse) {
while(rangelen--) {
serverAssertWithInfo(c,zobj,ln != NULL);
- ele = ln->ele;
- if (withscores && c->resp > 2) addReplyArrayLen(c,2);
- addReplyBulkCBuffer(c,ele,sdslen(ele));
- if (withscores) addReplyDouble(c,ln->score);
+ sds ele = ln->ele;
+ handler->emitResultFromCBuffer(handler, ele, sdslen(ele), ln->score);
ln = reverse ? ln->backward : ln->level[0].forward;
}
} else {
serverPanic("Unknown sorted set encoding");
}
+
+ handler->finalizeResultEmission(handler, result_cardinality);
}
+/* ZRANGESTORE <dst> <src> <min> <max> [BYSCORE | BYLEX] [REV] [LIMIT offset count] */
+void zrangestoreCommand (client *c) {
+ robj *dstkey = c->argv[1];
+ zrange_result_handler handler;
+ zrangeResultHandlerInit(&handler, c, ZRANGE_CONSUMER_TYPE_INTERNAL);
+ zrangeResultHandlerDestinationKeySet(&handler, dstkey);
+ zrangeGenericCommand(&handler, 2, 1, ZRANGE_AUTO, ZRANGE_DIRECTION_AUTO);
+}
+
+/* ZRANGE <key> <min> <max> [BYSCORE | BYLEX] [REV] [WITHSCORES] [LIMIT offset count] */
void zrangeCommand(client *c) {
- zrangeGenericCommand(c,0);
+ zrange_result_handler handler;
+ zrangeResultHandlerInit(&handler, c, ZRANGE_CONSUMER_TYPE_CLIENT);
+ zrangeGenericCommand(&handler, 1, 0, ZRANGE_AUTO, ZRANGE_DIRECTION_AUTO);
}
+/* ZREVRANGE <key> <min> <max> [WITHSCORES] */
void zrevrangeCommand(client *c) {
- zrangeGenericCommand(c,1);
+ zrange_result_handler handler;
+ zrangeResultHandlerInit(&handler, c, ZRANGE_CONSUMER_TYPE_CLIENT);
+ zrangeGenericCommand(&handler, 1, 0, ZRANGE_RANK, ZRANGE_DIRECTION_REVERSE);
}
/* This command implements ZRANGEBYSCORE, ZREVRANGEBYSCORE. */
-void genericZrangebyscoreCommand(client *c, int reverse) {
- zrangespec range;
- robj *key = c->argv[1];
- robj *zobj;
- long offset = 0, limit = -1;
- int withscores = 0;
- unsigned long rangelen = 0;
- void *replylen = NULL;
- int minidx, maxidx;
+void genericZrangebyscoreCommand(zrange_result_handler *handler,
+ zrangespec *range, robj *zobj, int withscores, long offset,
+ long limit, int reverse) {
- /* Parse the range arguments. */
- if (reverse) {
- /* Range is given as [max,min] */
- maxidx = 2; minidx = 3;
- } else {
- /* Range is given as [min,max] */
- minidx = 2; maxidx = 3;
- }
-
- if (zslParseRange(c->argv[minidx],c->argv[maxidx],&range) != C_OK) {
- addReplyError(c,"min or max is not a float");
- return;
- }
-
- /* Parse optional extra arguments. Note that ZCOUNT will exactly have
- * 4 arguments, so we'll never enter the following code path. */
- if (c->argc > 4) {
- int remaining = c->argc - 4;
- int pos = 4;
-
- while (remaining) {
- if (remaining >= 1 && !strcasecmp(c->argv[pos]->ptr,"withscores")) {
- pos++; remaining--;
- withscores = 1;
- } else if (remaining >= 3 && !strcasecmp(c->argv[pos]->ptr,"limit")) {
- if ((getLongFromObjectOrReply(c, c->argv[pos+1], &offset, NULL)
- != C_OK) ||
- (getLongFromObjectOrReply(c, c->argv[pos+2], &limit, NULL)
- != C_OK))
- {
- return;
- }
- pos += 3; remaining -= 3;
- } else {
- addReply(c,shared.syntaxerr);
- return;
- }
- }
- }
+ client *c = handler->client;
+ unsigned long rangelen = 0;
- /* Ok, lookup the key and get the range */
- if ((zobj = lookupKeyReadOrReply(c,key,shared.emptyarray)) == NULL ||
- checkType(c,zobj,OBJ_ZSET)) return;
+ handler->beginResultEmission(handler);
/* For invalid offset, return directly. */
if (offset > 0 && offset >= (long)zsetLength(zobj)) {
- addReply(c,shared.emptyarray);
+ handler->finalizeResultEmission(handler, 0);
return;
}
@@ -2960,29 +3091,17 @@ void genericZrangebyscoreCommand(client *c, int reverse) {
unsigned char *vstr;
unsigned int vlen;
long long vlong;
- double score;
/* If reversed, get the last node in range as starting point. */
if (reverse) {
- eptr = zzlLastInRange(zl,&range);
+ eptr = zzlLastInRange(zl,range);
} else {
- eptr = zzlFirstInRange(zl,&range);
- }
-
- /* No "first" element in the specified interval. */
- if (eptr == NULL) {
- addReply(c,shared.emptyarray);
- return;
+ eptr = zzlFirstInRange(zl,range);
}
/* Get score pointer for the first element. */
- serverAssertWithInfo(c,zobj,eptr != NULL);
- sptr = ziplistNext(zl,eptr);
-
- /* We don't know in advance how many matching elements there are in the
- * list, so we push this object that will represent the multi-bulk
- * length in the output buffer, and will "fix" it later */
- replylen = addReplyDeferredLen(c);
+ if (eptr)
+ sptr = ziplistNext(zl,eptr);
/* If there is an offset, just traverse the number of elements without
* checking the score because that is done in the next loop. */
@@ -2995,13 +3114,13 @@ void genericZrangebyscoreCommand(client *c, int reverse) {
}
while (eptr && limit--) {
- score = zzlGetScore(sptr);
+ double score = zzlGetScore(sptr);
/* Abort when the node is no longer in range. */
if (reverse) {
- if (!zslValueGteMin(score,&range)) break;
+ if (!zslValueGteMin(score,range)) break;
} else {
- if (!zslValueLteMax(score,&range)) break;
+ if (!zslValueLteMax(score,range)) break;
}
/* We know the element exists, so ziplistGet should always
@@ -3009,13 +3128,11 @@ void genericZrangebyscoreCommand(client *c, int reverse) {
serverAssertWithInfo(c,zobj,ziplistGet(eptr,&vstr,&vlen,&vlong));
rangelen++;
- if (withscores && c->resp > 2) addReplyArrayLen(c,2);
if (vstr == NULL) {
- addReplyBulkLongLong(c,vlong);
+ handler->emitResultFromLongLong(handler, vlong, score);
} else {
- addReplyBulkCBuffer(c,vstr,vlen);
+ handler->emitResultFromCBuffer(handler, vstr, vlen, score);
}
- if (withscores) addReplyDouble(c,score);
/* Move to next node */
if (reverse) {
@@ -3031,22 +3148,11 @@ void genericZrangebyscoreCommand(client *c, int reverse) {
/* If reversed, get the last node in range as starting point. */
if (reverse) {
- ln = zslLastInRange(zsl,&range);
+ ln = zslLastInRange(zsl,range);
} else {
- ln = zslFirstInRange(zsl,&range);
+ ln = zslFirstInRange(zsl,range);
}
- /* No "first" element in the specified interval. */
- if (ln == NULL) {
- addReply(c,shared.emptyarray);
- return;
- }
-
- /* We don't know in advance how many matching elements there are in the
- * list, so we push this object that will represent the multi-bulk
- * length in the output buffer, and will "fix" it later */
- replylen = addReplyDeferredLen(c);
-
/* If there is an offset, just traverse the number of elements without
* checking the score because that is done in the next loop. */
while (ln && offset--) {
@@ -3060,15 +3166,14 @@ void genericZrangebyscoreCommand(client *c, int reverse) {
while (ln && limit--) {
/* Abort when the node is no longer in range. */
if (reverse) {
- if (!zslValueGteMin(ln->score,&range)) break;
+ if (!zslValueGteMin(ln->score,range)) break;
} else {
- if (!zslValueLteMax(ln->score,&range)) break;
+ if (!zslValueLteMax(ln->score,range)) break;
}
rangelen++;
- if (withscores && c->resp > 2) addReplyArrayLen(c,2);
- addReplyBulkCBuffer(c,ln->ele,sdslen(ln->ele));
- if (withscores) addReplyDouble(c,ln->score);
+ handler->emitResultFromCBuffer(handler, ln->ele, sdslen(ln->ele),
+ ((withscores) ? ln->score : ln->score));
/* Move to next node */
if (reverse) {
@@ -3081,16 +3186,21 @@ void genericZrangebyscoreCommand(client *c, int reverse) {
serverPanic("Unknown sorted set encoding");
}
- if (withscores && c->resp == 2) rangelen *= 2;
- setDeferredArrayLen(c, replylen, rangelen);
+ handler->finalizeResultEmission(handler, rangelen);
}
+/* ZRANGEBYSCORE <key> <min> <max> [WITHSCORES] [LIMIT offset count] */
void zrangebyscoreCommand(client *c) {
- genericZrangebyscoreCommand(c,0);
+ zrange_result_handler handler;
+ zrangeResultHandlerInit(&handler, c, ZRANGE_CONSUMER_TYPE_CLIENT);
+ zrangeGenericCommand(&handler, 1, 0, ZRANGE_SCORE, ZRANGE_DIRECTION_FORWARD);
}
+/* ZREVRANGEBYSCORE <key> <min> <max> [WITHSCORES] [LIMIT offset count] */
void zrevrangebyscoreCommand(client *c) {
- genericZrangebyscoreCommand(c,1);
+ zrange_result_handler handler;
+ zrangeResultHandlerInit(&handler, c, ZRANGE_CONSUMER_TYPE_CLIENT);
+ zrangeGenericCommand(&handler, 1, 0, ZRANGE_SCORE, ZRANGE_DIRECTION_REVERSE);
}
void zcountCommand(client *c) {
@@ -3250,58 +3360,14 @@ void zlexcountCommand(client *c) {
}
/* This command implements ZRANGEBYLEX, ZREVRANGEBYLEX. */
-void genericZrangebylexCommand(client *c, int reverse) {
- zlexrangespec range;
- robj *key = c->argv[1];
- robj *zobj;
- long offset = 0, limit = -1;
+void genericZrangebylexCommand(zrange_result_handler *handler,
+ zlexrangespec *range, robj *zobj, int withscores, long offset, long limit,
+ int reverse)
+{
+ client *c = handler->client;
unsigned long rangelen = 0;
- void *replylen = NULL;
- int minidx, maxidx;
-
- /* Parse the range arguments. */
- if (reverse) {
- /* Range is given as [max,min] */
- maxidx = 2; minidx = 3;
- } else {
- /* Range is given as [min,max] */
- minidx = 2; maxidx = 3;
- }
- if (zslParseLexRange(c->argv[minidx],c->argv[maxidx],&range) != C_OK) {
- addReplyError(c,"min or max not valid string range item");
- return;
- }
-
- /* Parse optional extra arguments. Note that ZCOUNT will exactly have
- * 4 arguments, so we'll never enter the following code path. */
- if (c->argc > 4) {
- int remaining = c->argc - 4;
- int pos = 4;
-
- while (remaining) {
- if (remaining >= 3 && !strcasecmp(c->argv[pos]->ptr,"limit")) {
- if ((getLongFromObjectOrReply(c, c->argv[pos+1], &offset, NULL) != C_OK) ||
- (getLongFromObjectOrReply(c, c->argv[pos+2], &limit, NULL) != C_OK)) {
- zslFreeLexRange(&range);
- return;
- }
- pos += 3; remaining -= 3;
- } else {
- zslFreeLexRange(&range);
- addReply(c,shared.syntaxerr);
- return;
- }
- }
- }
-
- /* Ok, lookup the key and get the range */
- if ((zobj = lookupKeyReadOrReply(c,key,shared.emptyarray)) == NULL ||
- checkType(c,zobj,OBJ_ZSET))
- {
- zslFreeLexRange(&range);
- return;
- }
+ handler->beginResultEmission(handler);
if (zobj->encoding == OBJ_ENCODING_ZIPLIST) {
unsigned char *zl = zobj->ptr;
@@ -3312,26 +3378,14 @@ void genericZrangebylexCommand(client *c, int reverse) {
/* If reversed, get the last node in range as starting point. */
if (reverse) {
- eptr = zzlLastInLexRange(zl,&range);
+ eptr = zzlLastInLexRange(zl,range);
} else {
- eptr = zzlFirstInLexRange(zl,&range);
- }
-
- /* No "first" element in the specified interval. */
- if (eptr == NULL) {
- addReply(c,shared.emptyarray);
- zslFreeLexRange(&range);
- return;
+ eptr = zzlFirstInLexRange(zl,range);
}
/* Get score pointer for the first element. */
- serverAssertWithInfo(c,zobj,eptr != NULL);
- sptr = ziplistNext(zl,eptr);
-
- /* We don't know in advance how many matching elements there are in the
- * list, so we push this object that will represent the multi-bulk
- * length in the output buffer, and will "fix" it later */
- replylen = addReplyDeferredLen(c);
+ if (eptr)
+ sptr = ziplistNext(zl,eptr);
/* If there is an offset, just traverse the number of elements without
* checking the score because that is done in the next loop. */
@@ -3344,11 +3398,15 @@ void genericZrangebylexCommand(client *c, int reverse) {
}
while (eptr && limit--) {
+ double score = 0;
+ if (withscores) /* don't bother to extract the score if it's gonna be ignored. */
+ score = zzlGetScore(sptr);
+
/* Abort when the node is no longer in range. */
if (reverse) {
- if (!zzlLexValueGteMin(eptr,&range)) break;
+ if (!zzlLexValueGteMin(eptr,range)) break;
} else {
- if (!zzlLexValueLteMax(eptr,&range)) break;
+ if (!zzlLexValueLteMax(eptr,range)) break;
}
/* We know the element exists, so ziplistGet should always
@@ -3357,9 +3415,9 @@ void genericZrangebylexCommand(client *c, int reverse) {
rangelen++;
if (vstr == NULL) {
- addReplyBulkLongLong(c,vlong);
+ handler->emitResultFromLongLong(handler, vlong, score);
} else {
- addReplyBulkCBuffer(c,vstr,vlen);
+ handler->emitResultFromCBuffer(handler, vstr, vlen, score);
}
/* Move to next node */
@@ -3376,23 +3434,11 @@ void genericZrangebylexCommand(client *c, int reverse) {
/* If reversed, get the last node in range as starting point. */
if (reverse) {
- ln = zslLastInLexRange(zsl,&range);
+ ln = zslLastInLexRange(zsl,range);
} else {
- ln = zslFirstInLexRange(zsl,&range);
+ ln = zslFirstInLexRange(zsl,range);
}
- /* No "first" element in the specified interval. */
- if (ln == NULL) {
- addReply(c,shared.emptyarray);
- zslFreeLexRange(&range);
- return;
- }
-
- /* We don't know in advance how many matching elements there are in the
- * list, so we push this object that will represent the multi-bulk
- * length in the output buffer, and will "fix" it later */
- replylen = addReplyDeferredLen(c);
-
/* If there is an offset, just traverse the number of elements without
* checking the score because that is done in the next loop. */
while (ln && offset--) {
@@ -3406,13 +3452,13 @@ void genericZrangebylexCommand(client *c, int reverse) {
while (ln && limit--) {
/* Abort when the node is no longer in range. */
if (reverse) {
- if (!zslLexValueGteMin(ln->ele,&range)) break;
+ if (!zslLexValueGteMin(ln->ele,range)) break;
} else {
- if (!zslLexValueLteMax(ln->ele,&range)) break;
+ if (!zslLexValueLteMax(ln->ele,range)) break;
}
rangelen++;
- addReplyBulkCBuffer(c,ln->ele,sdslen(ln->ele));
+ handler->emitResultFromCBuffer(handler, ln->ele, sdslen(ln->ele), ln->score);
/* Move to next node */
if (reverse) {
@@ -3425,16 +3471,171 @@ void genericZrangebylexCommand(client *c, int reverse) {
serverPanic("Unknown sorted set encoding");
}
- zslFreeLexRange(&range);
- setDeferredArrayLen(c, replylen, rangelen);
+ handler->finalizeResultEmission(handler, rangelen);
}
+/* ZRANGEBYLEX <key> <min> <max> [LIMIT offset count] */
void zrangebylexCommand(client *c) {
- genericZrangebylexCommand(c,0);
+ zrange_result_handler handler;
+ zrangeResultHandlerInit(&handler, c, ZRANGE_CONSUMER_TYPE_CLIENT);
+ zrangeGenericCommand(&handler, 1, 0, ZRANGE_LEX, ZRANGE_DIRECTION_FORWARD);
}
+/* ZREVRANGEBYLEX <key> <min> <max> [LIMIT offset count] */
void zrevrangebylexCommand(client *c) {
- genericZrangebylexCommand(c,1);
+ zrange_result_handler handler;
+ zrangeResultHandlerInit(&handler, c, ZRANGE_CONSUMER_TYPE_CLIENT);
+ zrangeGenericCommand(&handler, 1, 0, ZRANGE_LEX, ZRANGE_DIRECTION_REVERSE);
+}
+
+/**
+ * This function handles ZRANGE and ZRANGESTORE, and also the deprecated
+ * Z[REV]RANGE[BYPOS|BYLEX] commands.
+ *
+ * The simple ZRANGE and ZRANGESTORE can take _AUTO in rangetype and direction,
+ * other command pass explicit value.
+ *
+ * The argc_start points to the src key argument, so following syntax is like:
+ * <src> <min> <max> [BYSCORE | BYLEX] [REV] [WITHSCORES] [LIMIT offset count]
+ */
+void zrangeGenericCommand(zrange_result_handler *handler, int argc_start, int store,
+ zrange_type rangetype, zrange_direction direction)
+{
+ client *c = handler->client;
+ robj *key = c->argv[argc_start];
+ robj *zobj;
+ zrangespec range;
+ zlexrangespec lexrange;
+ int minidx = argc_start + 1;
+ int maxidx = argc_start + 2;
+
+ /* Options common to all */
+ long opt_start = 0;
+ long opt_end = 0;
+ int opt_withscores = 0;
+ long opt_offset = 0;
+ long opt_limit = -1;
+
+ /* Step 1: Skip the <src> <min> <max> args and parse remaining optional arguments. */
+ for (int j=argc_start + 3; j < c->argc; j++) {
+ int leftargs = c->argc-j-1;
+ if (!store && !strcasecmp(c->argv[j]->ptr,"withscores")) {
+ opt_withscores = 1;
+ } else if (!strcasecmp(c->argv[j]->ptr,"limit") && leftargs >= 2) {
+ if ((getLongFromObjectOrReply(c, c->argv[j+1], &opt_offset, NULL) != C_OK) ||
+ (getLongFromObjectOrReply(c, c->argv[j+2], &opt_limit, NULL) != C_OK))
+ {
+ return;
+ }
+ j += 2;
+ } else if (direction == ZRANGE_DIRECTION_AUTO &&
+ !strcasecmp(c->argv[j]->ptr,"rev"))
+ {
+ direction = ZRANGE_DIRECTION_REVERSE;
+ } else if (rangetype == ZRANGE_AUTO &&
+ !strcasecmp(c->argv[j]->ptr,"bylex"))
+ {
+ rangetype = ZRANGE_LEX;
+ } else if (rangetype == ZRANGE_AUTO &&
+ !strcasecmp(c->argv[j]->ptr,"byscore"))
+ {
+ rangetype = ZRANGE_SCORE;
+ } else {
+ addReplyErrorObject(c,shared.syntaxerr);
+ return;
+ }
+ }
+
+ /* Use defaults if not overriden by arguments. */
+ if (direction == ZRANGE_DIRECTION_AUTO)
+ direction = ZRANGE_DIRECTION_FORWARD;
+ if (rangetype == ZRANGE_AUTO)
+ rangetype = ZRANGE_RANK;
+
+ /* Check for conflicting arguments. */
+ if (opt_limit != -1 && rangetype == ZRANGE_RANK) {
+ addReplyError(c,"syntax error, LIMIT is only supported in combination with either BYSCORE or BYLEX");
+ return;
+ }
+ if (opt_withscores && rangetype == ZRANGE_LEX) {
+ addReplyError(c,"syntax error, WITHSCORES not supported in combination with BYLEX");
+ return;
+ }
+
+ if (direction == ZRANGE_DIRECTION_REVERSE &&
+ ((ZRANGE_SCORE == rangetype) || (ZRANGE_LEX == rangetype)))
+ {
+ /* Range is given as [max,min] */
+ int tmp = maxidx;
+ maxidx = minidx;
+ minidx = tmp;
+ }
+
+ /* Step 2: Parse the range. */
+ switch (rangetype) {
+ case ZRANGE_AUTO:
+ case ZRANGE_RANK:
+ /* Z[REV]RANGE, ZRANGESTORE [REV]RANGE */
+ if ((getLongFromObjectOrReply(c, c->argv[minidx], &opt_start,NULL) != C_OK) ||
+ (getLongFromObjectOrReply(c, c->argv[maxidx], &opt_end,NULL) != C_OK))
+ {
+ return;
+ }
+ break;
+
+ case ZRANGE_SCORE:
+ /* Z[REV]RANGEBYSCORE, ZRANGESTORE [REV]RANGEBYSCORE */
+ if (zslParseRange(c->argv[minidx], c->argv[maxidx], &range) != C_OK) {
+ addReplyError(c, "min or max is not a float");
+ return;
+ }
+ break;
+
+ case ZRANGE_LEX:
+ /* Z[REV]RANGEBYLEX, ZRANGESTORE [REV]RANGEBYLEX */
+ if (zslParseLexRange(c->argv[minidx], c->argv[maxidx], &lexrange) != C_OK) {
+ addReplyError(c, "min or max not valid string range item");
+ return;
+ }
+ break;
+ }
+
+ if (opt_withscores || store) {
+ zrangeResultHandlerScoreEmissionEnable(handler);
+ }
+
+ /* Step 3: Lookup the key and get the range. */
+ if (((zobj = lookupKeyReadOrReply(c, key, shared.emptyarray)) == NULL)
+ || checkType(c, zobj, OBJ_ZSET)) {
+ goto cleanup;
+ }
+
+ /* Step 4: Pass this to the command-specific handler. */
+ switch (rangetype) {
+ case ZRANGE_AUTO:
+ case ZRANGE_RANK:
+ genericZrangebyrankCommand(handler, zobj, opt_start, opt_end,
+ opt_withscores || store, direction == ZRANGE_DIRECTION_REVERSE);
+ break;
+
+ case ZRANGE_SCORE:
+ genericZrangebyscoreCommand(handler, &range, zobj, opt_withscores || store,
+ opt_offset, opt_limit, direction == ZRANGE_DIRECTION_REVERSE);
+ break;
+
+ case ZRANGE_LEX:
+ genericZrangebylexCommand(handler, &lexrange, zobj, opt_withscores || store,
+ opt_offset, opt_limit, direction == ZRANGE_DIRECTION_REVERSE);
+ break;
+ }
+
+ /* Instead of returning here, we'll just fall-through the clean-up. */
+
+cleanup:
+
+ if (rangetype == ZRANGE_LEX) {
+ zslFreeLexRange(&lexrange);
+ }
}
void zcardCommand(client *c) {
@@ -3632,7 +3833,7 @@ void genericZpopCommand(client *c, robj **keyv, int keyc, int where, int emitkey
/* ZPOPMIN key [<count>] */
void zpopminCommand(client *c) {
if (c->argc > 3) {
- addReply(c,shared.syntaxerr);
+ addReplyErrorObject(c,shared.syntaxerr);
return;
}
genericZpopCommand(c,&c->argv[1],1,ZSET_MIN,0,
@@ -3642,7 +3843,7 @@ void zpopminCommand(client *c) {
/* ZMAXPOP key [<count>] */
void zpopmaxCommand(client *c) {
if (c->argc > 3) {
- addReply(c,shared.syntaxerr);
+ addReplyErrorObject(c,shared.syntaxerr);
return;
}
genericZpopCommand(c,&c->argv[1],1,ZSET_MAX,0,
diff --git a/src/testhelp.h b/src/testhelp.h
index 450334046..c6c1b55bf 100644
--- a/src/testhelp.h
+++ b/src/testhelp.h
@@ -44,7 +44,7 @@ int __test_num = 0;
#define test_cond(descr,_c) do { \
__test_num++; printf("%d - %s: ", __test_num, descr); \
if(_c) printf("PASSED\n"); else {printf("FAILED\n"); __failed_tests++;} \
-} while(0);
+} while(0)
#define test_report() do { \
printf("%d tests, %d passed, %d failed\n", __test_num, \
__test_num-__failed_tests, __failed_tests); \
@@ -52,6 +52,6 @@ int __test_num = 0;
printf("=== WARNING === We have failed tests here...\n"); \
exit(1); \
} \
-} while(0);
+} while(0)
#endif
diff --git a/src/tracking.c b/src/tracking.c
index 913577eab..1cf226e52 100644
--- a/src/tracking.c
+++ b/src/tracking.c
@@ -99,6 +99,57 @@ void disableTracking(client *c) {
}
}
+static int stringCheckPrefix(unsigned char *s1, size_t s1_len, unsigned char *s2, size_t s2_len) {
+ size_t min_length = s1_len < s2_len ? s1_len : s2_len;
+ return memcmp(s1,s2,min_length) == 0;
+}
+
+/* Check if any of the provided prefixes collide with one another or
+ * with an existing prefix for the client. A collision is defined as two
+ * prefixes that will emit an invalidation for the same key. If no prefix
+ * collision is found, 1 is return, otherwise 0 is returned and the client
+ * has an error emitted describing the error. */
+int checkPrefixCollisionsOrReply(client *c, robj **prefixes, size_t numprefix) {
+ for (size_t i = 0; i < numprefix; i++) {
+ /* Check input list has no overlap with existing prefixes. */
+ if (c->client_tracking_prefixes) {
+ raxIterator ri;
+ raxStart(&ri,c->client_tracking_prefixes);
+ raxSeek(&ri,"^",NULL,0);
+ while(raxNext(&ri)) {
+ if (stringCheckPrefix(ri.key,ri.key_len,
+ prefixes[i]->ptr,sdslen(prefixes[i]->ptr)))
+ {
+ sds collision = sdsnewlen(ri.key,ri.key_len);
+ addReplyErrorFormat(c,
+ "Prefix '%s' overlaps with an existing prefix '%s'. "
+ "Prefixes for a single client must not overlap.",
+ (unsigned char *)prefixes[i]->ptr,
+ (unsigned char *)collision);
+ sdsfree(collision);
+ raxStop(&ri);
+ return 0;
+ }
+ }
+ raxStop(&ri);
+ }
+ /* Check input has no overlap with itself. */
+ for (size_t j = i + 1; j < numprefix; j++) {
+ if (stringCheckPrefix(prefixes[i]->ptr,sdslen(prefixes[i]->ptr),
+ prefixes[j]->ptr,sdslen(prefixes[j]->ptr)))
+ {
+ addReplyErrorFormat(c,
+ "Prefix '%s' overlaps with another provided prefix '%s'. "
+ "Prefixes for a single client must not overlap.",
+ (unsigned char *)prefixes[i]->ptr,
+ (unsigned char *)prefixes[j]->ptr);
+ return i;
+ }
+ }
+ }
+ return -1;
+}
+
/* Set the client 'c' to track the prefix 'prefix'. If the client 'c' is
* already registered for the specified prefix, no operation is performed. */
void enableBcastTrackingForPrefix(client *c, char *prefix, size_t plen) {
@@ -350,19 +401,22 @@ void trackingInvalidateKey(client *c, robj *keyobj) {
}
/* This function is called when one or all the Redis databases are
- * flushed (dbid == -1 in case of FLUSHALL). Caching keys are not
- * specific for each DB but are global: currently what we do is send a
- * special notification to clients with tracking enabled, sending a
- * RESP NULL, which means, "all the keys", in order to avoid flooding
- * clients with many invalidation messages for all the keys they may
- * hold.
+ * flushed. Caching keys are not specific for each DB but are global:
+ * currently what we do is send a special notification to clients with
+ * tracking enabled, sending a RESP NULL, which means, "all the keys",
+ * in order to avoid flooding clients with many invalidation messages
+ * for all the keys they may hold.
*/
-void freeTrackingRadixTree(void *rt) {
+void freeTrackingRadixTreeCallback(void *rt) {
raxFree(rt);
}
+void freeTrackingRadixTree(rax *rt) {
+ raxFreeWithCallback(rt,freeTrackingRadixTreeCallback);
+}
+
/* A RESP NULL is sent to indicate that all keys are invalid */
-void trackingInvalidateKeysOnFlush(int dbid) {
+void trackingInvalidateKeysOnFlush(int async) {
if (server.tracking_clients) {
listNode *ln;
listIter li;
@@ -376,8 +430,12 @@ void trackingInvalidateKeysOnFlush(int dbid) {
}
/* In case of FLUSHALL, reclaim all the memory used by tracking. */
- if (dbid == -1 && TrackingTable) {
- raxFreeWithCallback(TrackingTable,freeTrackingRadixTree);
+ if (TrackingTable) {
+ if (async) {
+ freeTrackingRadixTreeAsync(TrackingTable);
+ } else {
+ freeTrackingRadixTree(TrackingTable);
+ }
TrackingTable = raxNew();
TrackingTableTotalItems = 0;
}
diff --git a/src/ziplist.c b/src/ziplist.c
index 866078613..a4f38c5e8 100644
--- a/src/ziplist.c
+++ b/src/ziplist.c
@@ -431,19 +431,21 @@ unsigned int zipStoreEntryEncoding(unsigned char *p, unsigned char encoding, uns
/* Encode the length of the previous entry and write it to "p". This only
* uses the larger encoding (required in __ziplistCascadeUpdate). */
int zipStorePrevEntryLengthLarge(unsigned char *p, unsigned int len) {
+ uint32_t u32;
if (p != NULL) {
p[0] = ZIP_BIG_PREVLEN;
- memcpy(p+1,&len,sizeof(len));
+ u32 = len;
+ memcpy(p+1,&u32,sizeof(u32));
memrev32ifbe(p+1);
}
- return 1+sizeof(len);
+ return 1 + sizeof(uint32_t);
}
/* Encode the length of the previous entry and write it to "p". Return the
* number of bytes needed to encode this length if "p" is NULL. */
unsigned int zipStorePrevEntryLength(unsigned char *p, unsigned int len) {
if (p == NULL) {
- return (len < ZIP_BIG_PREVLEN) ? 1 : sizeof(len)+1;
+ return (len < ZIP_BIG_PREVLEN) ? 1 : sizeof(uint32_t) + 1;
} else {
if (len < ZIP_BIG_PREVLEN) {
p[0] = len;
@@ -1711,7 +1713,7 @@ int ziplistTest(int argc, char **argv) {
if (p == NULL) {
printf("No entry\n");
} else {
- printf("ERROR: Out of range index should return NULL, returned offset: %ld\n", p-zl);
+ printf("ERROR: Out of range index should return NULL, returned offset: %ld\n", (long)(p-zl));
return 1;
}
printf("\n");
@@ -1761,7 +1763,7 @@ int ziplistTest(int argc, char **argv) {
if (p == NULL) {
printf("No entry\n");
} else {
- printf("ERROR: Out of range index should return NULL, returned offset: %ld\n", p-zl);
+ printf("ERROR: Out of range index should return NULL, returned offset: %ld\n", (long)(p-zl));
return 1;
}
printf("\n");
diff --git a/src/zmalloc.c b/src/zmalloc.c
index 86b15b0ee..eacce67bd 100644
--- a/src/zmalloc.c
+++ b/src/zmalloc.c
@@ -580,15 +580,18 @@ size_t zmalloc_get_smap_bytes_by_field(char *field, long pid) {
size_t zmalloc_get_smap_bytes_by_field(char *field, long pid) {
#if defined(__APPLE__)
struct proc_regioninfo pri;
- if (proc_pidinfo(pid, PROC_PIDREGIONINFO, 0, &pri, PROC_PIDREGIONINFO_SIZE) ==
- PROC_PIDREGIONINFO_SIZE) {
- if (!strcmp(field, "Private_Dirty:")) {
- return (size_t)pri.pri_pages_dirtied * 4096;
- } else if (!strcmp(field, "Rss:")) {
- return (size_t)pri.pri_pages_resident * 4096;
- } else if (!strcmp(field, "AnonHugePages:")) {
+ if (pid == -1) pid = getpid();
+ if (proc_pidinfo(pid, PROC_PIDREGIONINFO, 0, &pri,
+ PROC_PIDREGIONINFO_SIZE) == PROC_PIDREGIONINFO_SIZE)
+ {
+ int pagesize = getpagesize();
+ if (!strcmp(field, "Private_Dirty:")) {
+ return (size_t)pri.pri_pages_dirtied * pagesize;
+ } else if (!strcmp(field, "Rss:")) {
+ return (size_t)pri.pri_pages_resident * pagesize;
+ } else if (!strcmp(field, "AnonHugePages:")) {
return 0;
- }
+ }
}
return 0;
#endif
diff --git a/tests/cluster/cluster.tcl b/tests/cluster/cluster.tcl
index e894823de..ffb268561 100644
--- a/tests/cluster/cluster.tcl
+++ b/tests/cluster/cluster.tcl
@@ -57,6 +57,11 @@ proc CI {n field} {
get_info_field [R $n cluster info] $field
}
+# Return the value of the specified INFO field.
+proc s {n field} {
+ get_info_field [R $n info] $field
+}
+
# Assuming nodes are reest, this function performs slots allocation.
# Only the first 'n' nodes are used.
proc cluster_allocate_slots {n} {
diff --git a/tests/cluster/tests/16-transactions-on-replica.tcl b/tests/cluster/tests/16-transactions-on-replica.tcl
index 41083f421..baed15fbc 100644
--- a/tests/cluster/tests/16-transactions-on-replica.tcl
+++ b/tests/cluster/tests/16-transactions-on-replica.tcl
@@ -15,6 +15,7 @@ set replica [Rn 1]
test "Cant read from replica without READONLY" {
$primary SET a 1
+ wait_for_ofs_sync $primary $replica
catch {$replica GET a} err
assert {[string range $err 0 4] eq {MOVED}}
}
@@ -28,6 +29,7 @@ test "Can preform HSET primary and HGET from replica" {
$primary HSET h a 1
$primary HSET h b 2
$primary HSET h c 3
+ wait_for_ofs_sync $primary $replica
assert {[$replica HGET h a] eq {1}}
assert {[$replica HGET h b] eq {2}}
assert {[$replica HGET h c] eq {3}}
diff --git a/tests/cluster/tests/17-diskless-load-swapdb.tcl b/tests/cluster/tests/17-diskless-load-swapdb.tcl
index a035be7be..612818cb7 100644
--- a/tests/cluster/tests/17-diskless-load-swapdb.tcl
+++ b/tests/cluster/tests/17-diskless-load-swapdb.tcl
@@ -22,6 +22,8 @@ test "Right to restore backups when fail to diskless load " {
$replica READONLY
$replica config set repl-diskless-load swapdb
+ $replica config set appendonly no
+ $replica config set save ""
$replica config rewrite
$master config set repl-backlog-size 1024
$master config set repl-diskless-sync yes
@@ -38,7 +40,8 @@ test "Right to restore backups when fail to diskless load " {
assert_equal {1} [$replica get $slot0_key]
assert_equal $slot0_key [$replica CLUSTER GETKEYSINSLOT 0 1]
- # Kill the replica
+ # Save an RDB and kill the replica
+ $replica save
kill_instance redis $replica_id
# Delete the key from master
@@ -60,13 +63,12 @@ test "Right to restore backups when fail to diskless load " {
restart_instance redis $replica_id
$replica READONLY
- # Start full sync
+ # Start full sync, wait till after db is flushed (backed up)
wait_for_condition 500 10 {
- [string match "*sync*" [$replica role]]
+ [s $replica_id loading] eq 1
} else {
fail "Fail to full sync"
}
- after 100
# Kill master, abort full sync
kill_instance redis $master_id
@@ -74,4 +76,4 @@ test "Right to restore backups when fail to diskless load " {
# Replica keys and keys to slots map still both are right
assert_equal {1} [$replica get $slot0_key]
assert_equal $slot0_key [$replica CLUSTER GETKEYSINSLOT 0 1]
-} \ No newline at end of file
+}
diff --git a/tests/cluster/tests/18-info.tcl b/tests/cluster/tests/18-info.tcl
new file mode 100644
index 000000000..978d9d1da
--- /dev/null
+++ b/tests/cluster/tests/18-info.tcl
@@ -0,0 +1,45 @@
+# Check cluster info stats
+
+source "../tests/includes/init-tests.tcl"
+
+test "Create a primary with a replica" {
+ create_cluster 2 0
+}
+
+test "Cluster should start ok" {
+ assert_cluster_state ok
+}
+
+set primary1 [Rn 0]
+set primary2 [Rn 1]
+
+proc cmdstat {instace cmd} {
+ return [cmdrstat $cmd $instace]
+}
+
+proc errorstat {instace cmd} {
+ return [errorrstat $cmd $instace]
+}
+
+test "errorstats: rejected call due to MOVED Redirection" {
+ $primary1 config resetstat
+ $primary2 config resetstat
+ assert_match {} [errorstat $primary1 MOVED]
+ assert_match {} [errorstat $primary2 MOVED]
+ # we know that one will have a MOVED reply and one will succeed
+ catch {$primary1 set key b} replyP1
+ catch {$primary2 set key b} replyP2
+ # sort servers so we know which one failed
+ if {$replyP1 eq {OK}} {
+ assert_match {MOVED*} $replyP2
+ set pok $primary1
+ set perr $primary2
+ } else {
+ assert_match {MOVED*} $replyP1
+ set pok $primary2
+ set perr $primary1
+ }
+ assert_match {} [errorstat $pok MOVED]
+ assert_match {*count=1*} [errorstat $perr MOVED]
+ assert_match {*calls=0,*,rejected_calls=1,failed_calls=0} [cmdstat $perr set]
+}
diff --git a/tests/integration/rdb.tcl b/tests/integration/rdb.tcl
index aadfe281f..99495b2b7 100644
--- a/tests/integration/rdb.tcl
+++ b/tests/integration/rdb.tcl
@@ -198,3 +198,94 @@ test {client freed during loading} {
exec kill [srv 0 pid]
}
}
+
+# Our COW metrics (Private_Dirty) work only on Linux
+set system_name [string tolower [exec uname -s]]
+if {$system_name eq {linux}} {
+
+start_server {overrides {save ""}} {
+ test {Test child sending COW info} {
+ # make sure that rdb_last_cow_size and current_cow_size are zero (the test using new server),
+ # so that the comparisons during the test will be valid
+ assert {[s current_cow_size] == 0}
+ assert {[s rdb_last_cow_size] == 0}
+
+ # using a 200us delay, the bgsave is empirically taking about 10 seconds.
+ # we need it to take more than some 5 seconds, since redis only report COW once a second.
+ r config set rdb-key-save-delay 200
+ r config set loglevel debug
+
+ # populate the db with 10k keys of 4k each
+ set rd [redis_deferring_client 0]
+ set size 4096
+ set cmd_count 10000
+ for {set k 0} {$k < $cmd_count} {incr k} {
+ $rd set key$k [string repeat A $size]
+ }
+
+ for {set k 0} {$k < $cmd_count} {incr k} {
+ catch { $rd read }
+ }
+
+ $rd close
+
+ # start background rdb save
+ r bgsave
+
+ # on each iteration, we will write some key to the server to trigger copy-on-write, and
+ # wait to see that it reflected in INFO.
+ set iteration 1
+ while 1 {
+ # take a sample before writing new data to the server
+ set cow_size [s current_cow_size]
+ if {$::verbose} {
+ puts "COW info before copy-on-write: $cow_size"
+ }
+
+ # trigger copy-on-write
+ r setrange key$iteration 0 [string repeat B $size]
+
+ # wait to see that current_cow_size value updated (as long as the child is in progress)
+ wait_for_condition 80 100 {
+ [s rdb_bgsave_in_progress] == 0 ||
+ [s current_cow_size] >= $cow_size + $size
+ } else {
+ if {$::verbose} {
+ puts "COW info on fail: [s current_cow_size]"
+ puts [exec tail -n 100 < [srv 0 stdout]]
+ }
+ fail "COW info wasn't reported"
+ }
+
+ # for no accurate, stop after 2 iterations
+ if {!$::accurate && $iteration == 2} {
+ break
+ }
+
+ # stop iterating if the bgsave completed
+ if { [s rdb_bgsave_in_progress] == 0 } {
+ break
+ }
+
+ incr iteration 1
+ }
+
+ # make sure we saw report of current_cow_size
+ if {$iteration < 2 && $::verbose} {
+ puts [exec tail -n 100 < [srv 0 stdout]]
+ }
+ assert_morethan_equal $iteration 2
+
+ # if bgsave completed, check that rdb_last_cow_size (fork exit report)
+ # is at least 90% of last rdb_active_cow_size.
+ if { [s rdb_bgsave_in_progress] == 0 } {
+ set final_cow [s rdb_last_cow_size]
+ set cow_size [expr $cow_size * 0.9]
+ if {$final_cow < $cow_size && $::verbose} {
+ puts [exec tail -n 100 < [srv 0 stdout]]
+ }
+ assert_morethan_equal $final_cow $cow_size
+ }
+ }
+}
+} ;# system_name
diff --git a/tests/modules/propagate.c b/tests/modules/propagate.c
index 13277b19d..70cddacbd 100644
--- a/tests/modules/propagate.c
+++ b/tests/modules/propagate.c
@@ -51,18 +51,31 @@ void timerHandler(RedisModuleCtx *ctx, void *data) {
RedisModule_Replicate(ctx,"INCR","c","timer");
times++;
- if (times < 10)
+ if (times < 3)
RedisModule_CreateTimer(ctx,100,timerHandler,NULL);
else
times = 0;
}
+int propagateTestTimerCommand(RedisModuleCtx *ctx, RedisModuleString **argv, int argc)
+{
+ REDISMODULE_NOT_USED(argv);
+ REDISMODULE_NOT_USED(argc);
+
+ RedisModuleTimerID timer_id =
+ RedisModule_CreateTimer(ctx,100,timerHandler,NULL);
+ REDISMODULE_NOT_USED(timer_id);
+
+ RedisModule_ReplyWithSimpleString(ctx,"OK");
+ return REDISMODULE_OK;
+}
+
/* The thread entry point. */
void *threadMain(void *arg) {
REDISMODULE_NOT_USED(arg);
RedisModuleCtx *ctx = RedisModule_GetThreadSafeContext(NULL);
RedisModule_SelectDb(ctx,9); /* Tests ran in database number 9. */
- for (int i = 0; i < 10; i++) {
+ for (int i = 0; i < 3; i++) {
RedisModule_ThreadSafeContextLock(ctx);
RedisModule_Replicate(ctx,"INCR","c","a-from-thread");
RedisModule_Replicate(ctx,"INCR","c","b-from-thread");
@@ -72,15 +85,11 @@ void *threadMain(void *arg) {
return NULL;
}
-int propagateTestCommand(RedisModuleCtx *ctx, RedisModuleString **argv, int argc)
+int propagateTestThreadCommand(RedisModuleCtx *ctx, RedisModuleString **argv, int argc)
{
REDISMODULE_NOT_USED(argv);
REDISMODULE_NOT_USED(argc);
- RedisModuleTimerID timer_id =
- RedisModule_CreateTimer(ctx,100,timerHandler,NULL);
- REDISMODULE_NOT_USED(timer_id);
-
pthread_t tid;
if (pthread_create(&tid,NULL,threadMain,NULL) != 0)
return RedisModule_ReplyWithError(ctx,"-ERR Can't start thread");
@@ -90,7 +99,7 @@ int propagateTestCommand(RedisModuleCtx *ctx, RedisModuleString **argv, int argc
return REDISMODULE_OK;
}
-int propagateTest2Command(RedisModuleCtx *ctx, RedisModuleString **argv, int argc)
+int propagateTestSimpleCommand(RedisModuleCtx *ctx, RedisModuleString **argv, int argc)
{
REDISMODULE_NOT_USED(argv);
REDISMODULE_NOT_USED(argc);
@@ -102,7 +111,7 @@ int propagateTest2Command(RedisModuleCtx *ctx, RedisModuleString **argv, int arg
return REDISMODULE_OK;
}
-int propagateTest3Command(RedisModuleCtx *ctx, RedisModuleString **argv, int argc)
+int propagateTestMixedCommand(RedisModuleCtx *ctx, RedisModuleString **argv, int argc)
{
REDISMODULE_NOT_USED(argv);
REDISMODULE_NOT_USED(argc);
@@ -129,18 +138,23 @@ int RedisModule_OnLoad(RedisModuleCtx *ctx, RedisModuleString **argv, int argc)
if (RedisModule_Init(ctx,"propagate-test",1,REDISMODULE_APIVER_1)
== REDISMODULE_ERR) return REDISMODULE_ERR;
- if (RedisModule_CreateCommand(ctx,"propagate-test",
- propagateTestCommand,
+ if (RedisModule_CreateCommand(ctx,"propagate-test.timer",
+ propagateTestTimerCommand,
+ "",1,1,1) == REDISMODULE_ERR)
+ return REDISMODULE_ERR;
+
+ if (RedisModule_CreateCommand(ctx,"propagate-test.thread",
+ propagateTestThreadCommand,
"",1,1,1) == REDISMODULE_ERR)
return REDISMODULE_ERR;
- if (RedisModule_CreateCommand(ctx,"propagate-test-2",
- propagateTest2Command,
+ if (RedisModule_CreateCommand(ctx,"propagate-test.simple",
+ propagateTestSimpleCommand,
"",1,1,1) == REDISMODULE_ERR)
return REDISMODULE_ERR;
- if (RedisModule_CreateCommand(ctx,"propagate-test-3",
- propagateTest3Command,
+ if (RedisModule_CreateCommand(ctx,"propagate-test.mixed",
+ propagateTestMixedCommand,
"",1,1,1) == REDISMODULE_ERR)
return REDISMODULE_ERR;
diff --git a/tests/support/redis.tcl b/tests/support/redis.tcl
index 9eb5b94e2..8eca2ac32 100644
--- a/tests/support/redis.tcl
+++ b/tests/support/redis.tcl
@@ -214,20 +214,19 @@ proc ::redis::redis_multi_bulk_read {id fd} {
proc ::redis::redis_read_map {id fd} {
set count [redis_read_line $fd]
if {$count == -1} return {}
- set l {}
+ set d {}
set err {}
for {set i 0} {$i < $count} {incr i} {
if {[catch {
- set t {}
- lappend t [redis_read_reply $id $fd] ; # key
- lappend t [redis_read_reply $id $fd] ; # value
- lappend l $t
+ set k [redis_read_reply $id $fd] ; # key
+ set v [redis_read_reply $id $fd] ; # value
+ dict set d $k $v
} e] && $err eq {}} {
set err $e
}
}
if {$err ne {}} {return -code error $err}
- return $l
+ return $d
}
proc ::redis::redis_read_line fd {
diff --git a/tests/support/server.tcl b/tests/support/server.tcl
index 1cddb7068..77ba31d84 100644
--- a/tests/support/server.tcl
+++ b/tests/support/server.tcl
@@ -50,11 +50,17 @@ proc kill_server config {
tags {"leaks"} {
test "Check for memory leaks (pid $pid)" {
set output {0 leaks}
- catch {exec leaks $pid} output
- if {[string match {*process does not exist*} $output] ||
- [string match {*cannot examine*} $output]} {
- # In a few tests we kill the server process.
- set output "0 leaks"
+ catch {exec leaks $pid} output option
+ # In a few tests we kill the server process, so leaks will not find it.
+ # It'll exits with exit code >1 on error, so we ignore these.
+ if {[dict exists $option -errorcode]} {
+ set details [dict get $option -errorcode]
+ if {[lindex $details 0] eq "CHILDSTATUS"} {
+ set status [lindex $details 2]
+ if {$status > 1} {
+ set output "0 leaks"
+ }
+ }
}
set output
} {*0 leaks*}
diff --git a/tests/support/test.tcl b/tests/support/test.tcl
index 23015b3a7..39aebe156 100644
--- a/tests/support/test.tcl
+++ b/tests/support/test.tcl
@@ -31,36 +31,48 @@ proc assert_match {pattern value} {
}
}
+proc assert_failed {expected_err detail} {
+ if {$detail ne ""} {
+ set detail "(detail: $detail)"
+ } else {
+ set detail "(context: [info frame -2])"
+ }
+ error "assertion:$expected_err $detail"
+}
+
proc assert_equal {value expected {detail ""}} {
if {$expected ne $value} {
- if {$detail ne ""} {
- set detail "(detail: $detail)"
- } else {
- set detail "(context: [info frame -1])"
- }
- error "assertion:Expected '$value' to be equal to '$expected' $detail"
+ assert_failed "Expected '$value' to be equal to '$expected'" $detail
}
}
proc assert_lessthan {value expected {detail ""}} {
if {!($value < $expected)} {
- if {$detail ne ""} {
- set detail "(detail: $detail)"
- } else {
- set detail "(context: [info frame -1])"
- }
- error "assertion:Expected '$value' to be lessthan to '$expected' $detail"
+ assert_failed "Expected '$value' to be less than '$expected'" $detail
+ }
+}
+
+proc assert_lessthan_equal {value expected {detail ""}} {
+ if {!($value <= $expected)} {
+ assert_failed "Expected '$value' to be less than or equal to '$expected'" $detail
+ }
+}
+
+proc assert_morethan {value expected {detail ""}} {
+ if {!($value > $expected)} {
+ assert_failed "Expected '$value' to be more than '$expected'" $detail
+ }
+}
+
+proc assert_morethan_equal {value expected {detail ""}} {
+ if {!($value >= $expected)} {
+ assert_failed "Expected '$value' to be more than or equal to '$expected'" $detail
}
}
proc assert_range {value min max {detail ""}} {
if {!($value <= $max && $value >= $min)} {
- if {$detail ne ""} {
- set detail "(detail: $detail)"
- } else {
- set detail "(context: [info frame -1])"
- }
- error "assertion:Expected '$value' to be between to '$min' and '$max' $detail"
+ assert_failed "Expected '$value' to be between to '$min' and '$max'" $detail
}
}
diff --git a/tests/support/util.tcl b/tests/support/util.tcl
index e4b70ed20..86f2753c2 100644
--- a/tests/support/util.tcl
+++ b/tests/support/util.tcl
@@ -561,6 +561,12 @@ proc cmdrstat {cmd r} {
}
}
+proc errorrstat {cmd r} {
+ if {[regexp "\r\nerrorstat_$cmd:(.*?)\r\n" [$r info errorstats] _ value]} {
+ set _ $value
+ }
+}
+
proc generate_fuzzy_traffic_on_key {key duration} {
# Commands per type, blocking commands removed
# TODO: extract these from help.h or elsewhere, and improve to include other types
diff --git a/tests/test_helper.tcl b/tests/test_helper.tcl
index 3b8dc16da..4bef921ff 100644
--- a/tests/test_helper.tcl
+++ b/tests/test_helper.tcl
@@ -18,6 +18,7 @@ set ::all_tests {
unit/protocol
unit/keyspace
unit/scan
+ unit/info
unit/type/string
unit/type/incr
unit/type/list
diff --git a/tests/unit/auth.tcl b/tests/unit/auth.tcl
index 9080d4bf7..b63cf0126 100644
--- a/tests/unit/auth.tcl
+++ b/tests/unit/auth.tcl
@@ -25,3 +25,44 @@ start_server {tags {"auth"} overrides {requirepass foobar}} {
r incr foo
} {101}
}
+
+start_server {tags {"auth_binary_password"}} {
+ test {AUTH fails when binary password is wrong} {
+ r config set requirepass "abc\x00def"
+ catch {r auth abc} err
+ set _ $err
+ } {WRONGPASS*}
+
+ test {AUTH succeeds when binary password is correct} {
+ r config set requirepass "abc\x00def"
+ r auth "abc\x00def"
+ } {OK}
+
+ start_server {tags {"masterauth"}} {
+ set master [srv -1 client]
+ set master_host [srv -1 host]
+ set master_port [srv -1 port]
+ set slave [srv 0 client]
+
+ test {MASTERAUTH test with binary password} {
+ $master config set requirepass "abc\x00def"
+
+ # Configure the replica with masterauth
+ set loglines [count_log_lines 0]
+ $slave slaveof $master_host $master_port
+ $slave config set masterauth "abc"
+
+ # Verify replica is not able to sync with master
+ wait_for_log_messages 0 {"*Unable to AUTH to MASTER*"} $loglines 1000 10
+ assert_equal {down} [s 0 master_link_status]
+
+ # Test replica with the correct masterauth
+ $slave config set masterauth "abc\x00def"
+ wait_for_condition 50 100 {
+ [s 0 master_link_status] eq {up}
+ } else {
+ fail "Can't turn the instance into a replica"
+ }
+ }
+ }
+}
diff --git a/tests/unit/geo.tcl b/tests/unit/geo.tcl
index 119d2bd64..0ac95f7c5 100644
--- a/tests/unit/geo.tcl
+++ b/tests/unit/geo.tcl
@@ -74,6 +74,49 @@ start_server {tags {"geo"}} {
r geoadd nyc -73.9454966 40.747533 "lic market"
} {0}
+ test {GEOADD update with CH option} {
+ assert_equal 1 [r geoadd nyc CH 40.747533 -73.9454966 "lic market"]
+ lassign [lindex [r geopos nyc "lic market"] 0] x1 y1
+ assert {abs($x1) - 40.747 < 0.001}
+ assert {abs($y1) - 73.945 < 0.001}
+ } {}
+
+ test {GEOADD update with NX option} {
+ assert_equal 0 [r geoadd nyc NX -73.9454966 40.747533 "lic market"]
+ lassign [lindex [r geopos nyc "lic market"] 0] x1 y1
+ assert {abs($x1) - 40.747 < 0.001}
+ assert {abs($y1) - 73.945 < 0.001}
+ } {}
+
+ test {GEOADD update with XX option} {
+ assert_equal 0 [r geoadd nyc XX -83.9454966 40.747533 "lic market"]
+ lassign [lindex [r geopos nyc "lic market"] 0] x1 y1
+ assert {abs($x1) - 83.945 < 0.001}
+ assert {abs($y1) - 40.747 < 0.001}
+ } {}
+
+ test {GEOADD update with CH NX option} {
+ r geoadd nyc CH NX -73.9454966 40.747533 "lic market"
+ } {0}
+
+ test {GEOADD update with CH XX option} {
+ r geoadd nyc CH XX -73.9454966 40.747533 "lic market"
+ } {1}
+
+ test {GEOADD update with XX NX option will return syntax error} {
+ catch {
+ r geoadd nyc xx nx -73.9454966 40.747533 "lic market"
+ } err
+ set err
+ } {ERR*syntax*}
+
+ test {GEOADD update with invalid option} {
+ catch {
+ r geoadd nyc ch xx foo -73.9454966 40.747533 "lic market"
+ } err
+ set err
+ } {ERR*syntax*}
+
test {GEOADD invalid coordinates} {
catch {
r geoadd nyc -73.9454966 40.747533 "lic market" \
@@ -135,6 +178,19 @@ start_server {tags {"geo"}} {
r georadius nyc -73.9798091 40.7598464 10 km COUNT 3
} {{central park n/q/r} 4545 {union square}}
+ test {GEORADIUS with ANY not sorted by default} {
+ r georadius nyc -73.9798091 40.7598464 10 km COUNT 3 ANY
+ } {{wtc one} {union square} {central park n/q/r}}
+
+ test {GEORADIUS with ANY sorted by ASC} {
+ r georadius nyc -73.9798091 40.7598464 10 km COUNT 3 ANY ASC
+ } {{central park n/q/r} {union square} {wtc one}}
+
+ test {GEORADIUS with ANY but no COUNT} {
+ catch {r georadius nyc -73.9798091 40.7598464 10 km ANY ASC} e
+ set e
+ } {ERR*ANY*requires*COUNT*}
+
test {GEORADIUS with COUNT but missing integer argument} {
catch {r georadius nyc -73.9798091 40.7598464 10 km COUNT} e
set e
diff --git a/tests/unit/info.tcl b/tests/unit/info.tcl
new file mode 100644
index 000000000..5a44c0647
--- /dev/null
+++ b/tests/unit/info.tcl
@@ -0,0 +1,153 @@
+proc cmdstat {cmd} {
+ return [cmdrstat $cmd r]
+}
+
+proc errorstat {cmd} {
+ return [errorrstat $cmd r]
+}
+
+start_server {tags {"info"}} {
+ start_server {} {
+
+ test {errorstats: failed call authentication error} {
+ r config resetstat
+ assert_match {} [errorstat ERR]
+ assert_equal [s total_error_replies] 0
+ catch {r auth k} e
+ assert_match {ERR AUTH*} $e
+ assert_match {*count=1*} [errorstat ERR]
+ assert_match {*calls=1,*,rejected_calls=0,failed_calls=1} [cmdstat auth]
+ assert_equal [s total_error_replies] 1
+ r config resetstat
+ assert_match {} [errorstat ERR]
+ }
+
+ test {errorstats: failed call within MULTI/EXEC} {
+ r config resetstat
+ assert_match {} [errorstat ERR]
+ assert_equal [s total_error_replies] 0
+ r multi
+ r set a b
+ r auth a
+ catch {r exec} e
+ assert_match {ERR AUTH*} $e
+ assert_match {*count=1*} [errorstat ERR]
+ assert_match {*calls=1,*,rejected_calls=0,failed_calls=0} [cmdstat set]
+ assert_match {*calls=1,*,rejected_calls=0,failed_calls=1} [cmdstat auth]
+ assert_match {*calls=1,*,rejected_calls=0,failed_calls=0} [cmdstat exec]
+ assert_equal [s total_error_replies] 1
+
+ # MULTI/EXEC command errors should still be pinpointed to him
+ catch {r exec} e
+ assert_match {ERR EXEC without MULTI} $e
+ assert_match {*calls=2,*,rejected_calls=0,failed_calls=1} [cmdstat exec]
+ assert_match {*count=2*} [errorstat ERR]
+ assert_equal [s total_error_replies] 2
+ }
+
+ test {errorstats: failed call within LUA} {
+ r config resetstat
+ assert_match {} [errorstat ERR]
+ assert_equal [s total_error_replies] 0
+ catch {r eval {redis.pcall('XGROUP', 'CREATECONSUMER', 's1', 'mygroup', 'consumer') return } 0} e
+ assert_match {*count=1*} [errorstat ERR]
+ assert_match {*calls=1,*,rejected_calls=0,failed_calls=1} [cmdstat xgroup]
+ assert_match {*calls=1,*,rejected_calls=0,failed_calls=0} [cmdstat eval]
+
+ # EVAL command errors should still be pinpointed to him
+ catch {r eval a} e
+ assert_match {ERR wrong*} $e
+ assert_match {*calls=1,*,rejected_calls=1,failed_calls=0} [cmdstat eval]
+ assert_match {*count=2*} [errorstat ERR]
+ assert_equal [s total_error_replies] 2
+ }
+
+ test {errorstats: failed call NOGROUP error} {
+ r config resetstat
+ assert_match {} [errorstat NOGROUP]
+ r del mystream
+ r XADD mystream * f v
+ catch {r XGROUP CREATECONSUMER mystream mygroup consumer} e
+ assert_match {NOGROUP*} $e
+ assert_match {*count=1*} [errorstat NOGROUP]
+ assert_match {*calls=1,*,rejected_calls=0,failed_calls=1} [cmdstat xgroup]
+ r config resetstat
+ assert_match {} [errorstat NOGROUP]
+ }
+
+ test {errorstats: rejected call unknown command} {
+ r config resetstat
+ assert_equal [s total_error_replies] 0
+ assert_match {} [errorstat ERR]
+ catch {r asdf} e
+ assert_match {ERR unknown*} $e
+ assert_match {*count=1*} [errorstat ERR]
+ assert_equal [s total_error_replies] 1
+ r config resetstat
+ assert_match {} [errorstat ERR]
+ }
+
+ test {errorstats: rejected call within MULTI/EXEC} {
+ r config resetstat
+ assert_equal [s total_error_replies] 0
+ assert_match {} [errorstat ERR]
+ r multi
+ catch {r set} e
+ assert_match {ERR wrong number of arguments*} $e
+ catch {r exec} e
+ assert_match {EXECABORT*} $e
+ assert_match {*count=1*} [errorstat ERR]
+ assert_equal [s total_error_replies] 1
+ assert_match {*calls=0,*,rejected_calls=1,failed_calls=0} [cmdstat set]
+ assert_match {*calls=1,*,rejected_calls=0,failed_calls=0} [cmdstat multi]
+ assert_match {*calls=1,*,rejected_calls=0,failed_calls=0} [cmdstat exec]
+ assert_equal [s total_error_replies] 1
+ r config resetstat
+ assert_match {} [errorstat ERR]
+ }
+
+ test {errorstats: rejected call due to wrong arity} {
+ r config resetstat
+ assert_equal [s total_error_replies] 0
+ assert_match {} [errorstat ERR]
+ catch {r set k} e
+ assert_match {ERR wrong number of arguments*} $e
+ assert_match {*count=1*} [errorstat ERR]
+ assert_match {*calls=0,*,rejected_calls=1,failed_calls=0} [cmdstat set]
+ # ensure that after a rejected command, valid ones are counted properly
+ r set k1 v1
+ r set k2 v2
+ assert_match {calls=2,*,rejected_calls=1,failed_calls=0} [cmdstat set]
+ assert_equal [s total_error_replies] 1
+ }
+
+ test {errorstats: rejected call by OOM error} {
+ r config resetstat
+ assert_equal [s total_error_replies] 0
+ assert_match {} [errorstat OOM]
+ r config set maxmemory 1
+ catch {r set a b} e
+ assert_match {OOM*} $e
+ assert_match {*count=1*} [errorstat OOM]
+ assert_match {*calls=0,*,rejected_calls=1,failed_calls=0} [cmdstat set]
+ assert_equal [s total_error_replies] 1
+ r config resetstat
+ assert_match {} [errorstat OOM]
+ }
+
+ test {errorstats: rejected call by authorization error} {
+ r config resetstat
+ assert_equal [s total_error_replies] 0
+ assert_match {} [errorstat NOPERM]
+ r ACL SETUSER alice on >p1pp0 ~cached:* +get +info +config
+ r auth alice p1pp0
+ catch {r set a b} e
+ assert_match {NOPERM*} $e
+ assert_match {*count=1*} [errorstat NOPERM]
+ assert_match {*calls=0,*,rejected_calls=1,failed_calls=0} [cmdstat set]
+ assert_equal [s total_error_replies] 1
+ r config resetstat
+ assert_match {} [errorstat NOPERM]
+ }
+ }
+}
diff --git a/tests/unit/memefficiency.tcl b/tests/unit/memefficiency.tcl
index 357089c8f..88fcd6f51 100644
--- a/tests/unit/memefficiency.tcl
+++ b/tests/unit/memefficiency.tcl
@@ -38,7 +38,7 @@ start_server {tags {"memefficiency"}} {
run_solo {defrag} {
start_server {tags {"defrag"} overrides {appendonly yes auto-aof-rewrite-percentage 0 save ""}} {
- if {[string match {*jemalloc*} [s mem_allocator]]} {
+ if {[string match {*jemalloc*} [s mem_allocator]] && [r debug mallctl arenas.page] <= 8192} {
test "Active defrag" {
r config set hz 100
r config set activedefrag no
diff --git a/tests/unit/moduleapi/propagate.tcl b/tests/unit/moduleapi/propagate.tcl
index aa0f55e5e..adebd37a6 100644
--- a/tests/unit/moduleapi/propagate.tcl
+++ b/tests/unit/moduleapi/propagate.tcl
@@ -14,25 +14,103 @@ tags "modules" {
# Start the replication process...
$replica replicaof $master_host $master_port
wait_for_sync $replica
-
after 1000
- $master propagate-test
- wait_for_condition 5000 10 {
- ([$replica get timer] eq "10") && \
- ([$replica get a-from-thread] eq "10")
- } else {
- fail "The two counters don't match the expected value."
+ test {module propagates from timer} {
+ set repl [attach_to_replication_stream]
+
+ $master propagate-test.timer
+
+ wait_for_condition 5000 10 {
+ [$replica get timer] eq "3"
+ } else {
+ fail "The two counters don't match the expected value."
+ }
+
+ assert_replication_stream $repl {
+ {select *}
+ {multi}
+ {incr timer}
+ {exec}
+ {multi}
+ {incr timer}
+ {exec}
+ {multi}
+ {incr timer}
+ {exec}
+ }
+ close_replication_stream $repl
}
- $master propagate-test-2
- $master propagate-test-3
- $master multi
- $master propagate-test-2
- $master propagate-test-3
- $master exec
- wait_for_ofs_sync $master $replica
+ test {module propagates from thread} {
+ set repl [attach_to_replication_stream]
+
+ $master propagate-test.thread
+
+ wait_for_condition 5000 10 {
+ [$replica get a-from-thread] eq "3"
+ } else {
+ fail "The two counters don't match the expected value."
+ }
+
+ assert_replication_stream $repl {
+ {select *}
+ {incr a-from-thread}
+ {incr b-from-thread}
+ {incr a-from-thread}
+ {incr b-from-thread}
+ {incr a-from-thread}
+ {incr b-from-thread}
+ }
+ close_replication_stream $repl
+ }
+ test {module propagates from from command} {
+ set repl [attach_to_replication_stream]
+
+ $master propagate-test.simple
+ $master propagate-test.mixed
+
+ # Note the 'after-call' propagation below is out of order (known limitation)
+ assert_replication_stream $repl {
+ {select *}
+ {multi}
+ {incr counter-1}
+ {incr counter-2}
+ {exec}
+ {multi}
+ {incr using-call}
+ {incr after-call}
+ {incr counter-1}
+ {incr counter-2}
+ {exec}
+ }
+ close_replication_stream $repl
+ }
+
+ test {module propagates from from multi-exec} {
+ set repl [attach_to_replication_stream]
+
+ $master multi
+ $master propagate-test.simple
+ $master propagate-test.mixed
+ $master exec
+ wait_for_ofs_sync $master $replica
+
+ # Note the 'after-call' propagation below is out of order (known limitation)
+ assert_replication_stream $repl {
+ {select *}
+ {multi}
+ {incr counter-1}
+ {incr counter-2}
+ {incr using-call}
+ {incr after-call}
+ {incr counter-1}
+ {incr counter-2}
+ {exec}
+ }
+ close_replication_stream $repl
+ }
assert_equal [s -1 unexpected_error_replies] 0
}
}
@@ -47,11 +125,11 @@ tags "modules aof" {
r config set auto-aof-rewrite-percentage 0 ; # Disable auto-rewrite.
waitForBgrewriteaof r
- r propagate-test-2
- r propagate-test-3
+ r propagate-test.simple
+ r propagate-test.mixed
r multi
- r propagate-test-2
- r propagate-test-3
+ r propagate-test.simple
+ r propagate-test.mixed
r exec
# Load the AOF
diff --git a/tests/unit/multi.tcl b/tests/unit/multi.tcl
index 43259b1c0..e22b6d43d 100644
--- a/tests/unit/multi.tcl
+++ b/tests/unit/multi.tcl
@@ -196,6 +196,29 @@ start_server {tags {"multi"}} {
r exec
} {PONG}
+ test {SWAPDB is able to touch the watched keys that exist} {
+ r flushall
+ r select 0
+ r set x 30
+ r watch x ;# make sure x (set to 30) doesn't change (SWAPDB will "delete" it)
+ r swapdb 0 1
+ r multi
+ r ping
+ r exec
+ } {}
+
+ test {SWAPDB is able to touch the watched keys that do not exist} {
+ r flushall
+ r select 1
+ r set x 30
+ r select 0
+ r watch x ;# make sure the key x (currently missing) doesn't change (SWAPDB will create it)
+ r swapdb 0 1
+ r multi
+ r ping
+ r exec
+ } {}
+
test {WATCH is able to remember the DB a key belongs to} {
r select 5
r set x 30
@@ -299,10 +322,14 @@ start_server {tags {"multi"}} {
r multi
r del foo
r exec
+
+ # add another command so that when we see it we know multi-exec wasn't
+ # propagated
+ r incr foo
+
assert_replication_stream $repl {
{select *}
- {multi}
- {exec}
+ {incr foo}
}
close_replication_stream $repl
}
@@ -521,4 +548,83 @@ start_server {tags {"multi"}} {
list $m $res
} {OK {{} {} {} {} {} {} {} {}}}
+
+ test {MULTI propagation of PUBLISH} {
+ set repl [attach_to_replication_stream]
+
+ # make sure that PUBLISH inside MULTI is propagated in a transaction
+ r multi
+ r publish bla bla
+ r exec
+
+ assert_replication_stream $repl {
+ {select *}
+ {multi}
+ {publish bla bla}
+ {exec}
+ }
+ close_replication_stream $repl
+ }
+
+ test {MULTI propagation of SCRIPT LOAD} {
+ set repl [attach_to_replication_stream]
+
+ # make sure that SCRIPT LOAD inside MULTI is propagated in a transaction
+ r multi
+ r script load {redis.call('set', KEYS[1], 'foo')}
+ set res [r exec]
+ set sha [lindex $res 0]
+
+ assert_replication_stream $repl {
+ {select *}
+ {multi}
+ {script load *}
+ {exec}
+ }
+ close_replication_stream $repl
+ }
+
+ test {MULTI propagation of SCRIPT LOAD} {
+ set repl [attach_to_replication_stream]
+
+ # make sure that EVAL inside MULTI is propagated in a transaction
+ r config set lua-replicate-commands no
+ r multi
+ r eval {redis.call('set', KEYS[1], 'bar')} 1 bar
+ r exec
+
+ assert_replication_stream $repl {
+ {select *}
+ {multi}
+ {eval *}
+ {exec}
+ }
+ close_replication_stream $repl
+ }
+
+ tags {"stream"} {
+ test {MULTI propagation of XREADGROUP} {
+ # stream is a special case because it calls propagate() directly for XREADGROUP
+ set repl [attach_to_replication_stream]
+
+ r XADD mystream * foo bar
+ r XGROUP CREATE mystream mygroup 0
+
+ # make sure the XCALIM (propagated by XREADGROUP) is indeed inside MULTI/EXEC
+ r multi
+ r XREADGROUP GROUP mygroup consumer1 STREAMS mystream ">"
+ r exec
+
+ assert_replication_stream $repl {
+ {select *}
+ {xadd *}
+ {xgroup CREATE *}
+ {multi}
+ {xclaim *}
+ {exec}
+ }
+ close_replication_stream $repl
+ }
+ }
+
}
diff --git a/tests/unit/pause.tcl b/tests/unit/pause.tcl
new file mode 100644
index 000000000..9f5cfd607
--- /dev/null
+++ b/tests/unit/pause.tcl
@@ -0,0 +1,200 @@
+start_server {tags {"pause"}} {
+ test "Test read commands are not blocked by client pause" {
+ r client PAUSE 100000000 WRITE
+ set rd [redis_deferring_client]
+ $rd GET FOO
+ $rd PING
+ $rd INFO
+ assert_equal [s 0 blocked_clients] 0
+ r client unpause
+ $rd close
+ }
+
+ test "Test write commands are paused by RO" {
+ r client PAUSE 100000000 WRITE
+
+ set rd [redis_deferring_client]
+ $rd SET FOO BAR
+ wait_for_condition 50 100 {
+ [s 0 blocked_clients] eq {1}
+ } else {
+ fail "Clients are not blocked"
+ }
+
+ r client unpause
+ assert_match "OK" [$rd read]
+ $rd close
+ }
+
+ test "Test special commands are paused by RO" {
+ r PFADD pause-hll test
+ r client PAUSE 100000000 WRITE
+
+ # Test that pfcount, which can replicate, is also blocked
+ set rd [redis_deferring_client]
+ $rd PFCOUNT pause-hll
+ wait_for_condition 50 100 {
+ [s 0 blocked_clients] eq {1}
+ } else {
+ fail "Clients are not blocked"
+ }
+
+ # Test that publish, which adds the message to the replication
+ # stream is blocked.
+ set rd2 [redis_deferring_client]
+ $rd2 publish foo bar
+ wait_for_condition 50 100 {
+ [s 0 blocked_clients] eq {2}
+ } else {
+ fail "Clients are not blocked"
+ }
+
+ # Test that SCRIPT LOAD, which is replicated.
+ set rd3 [redis_deferring_client]
+ $rd3 script load "return 1"
+ wait_for_condition 50 100 {
+ [s 0 blocked_clients] eq {3}
+ } else {
+ fail "Clients are not blocked"
+ }
+
+ r client unpause
+ assert_match "1" [$rd read]
+ assert_match "0" [$rd2 read]
+ assert_match "*" [$rd3 read]
+ $rd close
+ $rd2 close
+ $rd3 close
+ }
+
+ test "Test read/admin mutli-execs are not blocked by pause RO" {
+ r SET FOO BAR
+ r client PAUSE 100000000 WRITE
+ set rd [redis_deferring_client]
+ $rd MULTI
+ assert_equal [$rd read] "OK"
+ $rd PING
+ assert_equal [$rd read] "QUEUED"
+ $rd GET FOO
+ assert_equal [$rd read] "QUEUED"
+ $rd EXEC
+ assert_equal [s 0 blocked_clients] 0
+ r client unpause
+ assert_match "PONG BAR" [$rd read]
+ $rd close
+ }
+
+ test "Test write mutli-execs are blocked by pause RO" {
+ set rd [redis_deferring_client]
+ $rd MULTI
+ assert_equal [$rd read] "OK"
+ $rd SET FOO BAR
+ r client PAUSE 100000000 WRITE
+ assert_equal [$rd read] "QUEUED"
+ $rd EXEC
+ wait_for_condition 50 100 {
+ [s 0 blocked_clients] eq {1}
+ } else {
+ fail "Clients are not blocked"
+ }
+ r client unpause
+ assert_match "OK" [$rd read]
+ $rd close
+ }
+
+ test "Test scripts are blocked by pause RO" {
+ r client PAUSE 100000000 WRITE
+ set rd [redis_deferring_client]
+ $rd EVAL "return 1" 0
+
+ wait_for_condition 50 100 {
+ [s 0 blocked_clients] eq {1}
+ } else {
+ fail "Clients are not blocked"
+ }
+ r client unpause
+ assert_match "1" [$rd read]
+ $rd close
+ }
+
+ test "Test multiple clients can be queued up and unblocked" {
+ r client PAUSE 100000000 WRITE
+ set clients [list [redis_deferring_client] [redis_deferring_client] [redis_deferring_client]]
+ foreach client $clients {
+ $client SET FOO BAR
+ }
+
+ wait_for_condition 50 100 {
+ [s 0 blocked_clients] eq {3}
+ } else {
+ fail "Clients are not blocked"
+ }
+ r client unpause
+ foreach client $clients {
+ assert_match "OK" [$client read]
+ $client close
+ }
+ }
+
+ test "Test clients with syntax errors will get responses immediately" {
+ r client PAUSE 100000000 WRITE
+ catch {r set FOO} err
+ assert_match "ERR wrong number of arguments for *" $err
+ r client unpause
+ }
+
+ test "Test both active and passive expiries are skipped during client pause" {
+ set expired_keys [s 0 expired_keys]
+ r multi
+ r set foo bar PX 10
+ r set bar foo PX 10
+ r client PAUSE 100000000 WRITE
+ r exec
+
+ wait_for_condition 10 100 {
+ [r get foo] == {} && [r get bar] == {}
+ } else {
+ fail "Keys were never logically expired"
+ }
+
+ # No keys should actually have been expired
+ assert_match $expired_keys [s 0 expired_keys]
+
+ r client unpause
+
+ # Force the keys to expire
+ r get foo
+ r get bar
+
+ # Now that clients have been unpaused, expires should go through
+ assert_match [expr $expired_keys + 2] [s 0 expired_keys]
+ }
+
+ test "Test that client pause starts at the end of a transaction" {
+ r MULTI
+ r SET FOO1 BAR
+ r client PAUSE 100000000 WRITE
+ r SET FOO2 BAR
+ r exec
+
+ set rd [redis_deferring_client]
+ $rd SET FOO3 BAR
+
+ wait_for_condition 50 100 {
+ [s 0 blocked_clients] eq {1}
+ } else {
+ fail "Clients are not blocked"
+ }
+
+ assert_match "BAR" [r GET FOO1]
+ assert_match "BAR" [r GET FOO2]
+ assert_match "" [r GET FOO3]
+
+ r client unpause
+ assert_match "OK" [$rd read]
+ $rd close
+ }
+
+ # Make sure we unpause at the end
+ r client unpause
+}
diff --git a/tests/unit/scripting.tcl b/tests/unit/scripting.tcl
index 0da1eda9f..6fd152594 100644
--- a/tests/unit/scripting.tcl
+++ b/tests/unit/scripting.tcl
@@ -563,6 +563,30 @@ start_server {tags {"scripting"}} {
} e
set e
} {*wrong number*}
+
+ test {Script with RESP3 map} {
+ set expected_dict [dict create field value]
+ set expected_list [list field value]
+
+ # Sanity test for RESP3 without scripts
+ r HELLO 3
+ r hset hash field value
+ set res [r hgetall hash]
+ assert_equal $res $expected_dict
+
+ # Test RESP3 client with script in both RESP2 and RESP3 modes
+ set res [r eval {redis.setresp(3); return redis.call('hgetall', KEYS[1])} 1 hash]
+ assert_equal $res $expected_dict
+ set res [r eval {redis.setresp(2); return redis.call('hgetall', KEYS[1])} 1 hash]
+ assert_equal $res $expected_list
+
+ # Test RESP2 client with script in both RESP2 and RESP3 modes
+ r HELLO 2
+ set res [r eval {redis.setresp(3); return redis.call('hgetall', KEYS[1])} 1 hash]
+ assert_equal $res $expected_list
+ set res [r eval {redis.setresp(2); return redis.call('hgetall', KEYS[1])} 1 hash]
+ assert_equal $res $expected_list
+ }
}
# Start a new server since the last test in this stanza will kill the
diff --git a/tests/unit/tracking.tcl b/tests/unit/tracking.tcl
index fc2800791..88cf9dc42 100644
--- a/tests/unit/tracking.tcl
+++ b/tests/unit/tracking.tcl
@@ -132,7 +132,24 @@ start_server {tags {"tracking"}} {
test {HELLO 3 reply is correct} {
set reply [r HELLO 3]
- assert {[lindex $reply 2] eq {proto 3}}
+ assert_equal [dict get $reply proto] 3
+ }
+
+ test {HELLO without protover} {
+ set reply [r HELLO 3]
+ assert_equal [dict get $reply proto] 3
+
+ set reply [r HELLO]
+ assert_equal [dict get $reply proto] 3
+
+ set reply [r HELLO 2]
+ assert_equal [dict get $reply proto] 2
+
+ set reply [r HELLO]
+ assert_equal [dict get $reply proto] 2
+
+ # restore RESP3 for next test
+ r HELLO 3
}
test {RESP3 based basic invalidation} {
@@ -306,7 +323,25 @@ start_server {tags {"tracking"}} {
set ping_reply [$rd read]
assert {$inv_msg eq {invalidate key1}}
assert {$ping_reply eq {PONG}}
- }
+ }
+
+ test {BCAST with prefix collisions throw errors} {
+ set r [redis_client]
+ catch {$r CLIENT TRACKING ON BCAST PREFIX FOOBAR PREFIX FOO} output
+ assert_match {ERR Prefix 'FOOBAR'*'FOO'*} $output
+
+ catch {$r CLIENT TRACKING ON BCAST PREFIX FOO PREFIX FOOBAR} output
+ assert_match {ERR Prefix 'FOO'*'FOOBAR'*} $output
+
+ $r CLIENT TRACKING ON BCAST PREFIX FOO PREFIX BAR
+ catch {$r CLIENT TRACKING ON BCAST PREFIX FO} output
+ assert_match {ERR Prefix 'FO'*'FOO'*} $output
+
+ catch {$r CLIENT TRACKING ON BCAST PREFIX BARB} output
+ assert_match {ERR Prefix 'BARB'*'BAR'*} $output
+
+ $r CLIENT TRACKING OFF
+ }
test {Tracking gets notification on tracking table key eviction} {
$rd_redirection HELLO 2
@@ -398,6 +433,85 @@ start_server {tags {"tracking"}} {
assert {$total_prefixes == 1}
}
+ test {CLIENT TRACKINGINFO provides reasonable results when tracking off} {
+ r CLIENT TRACKING off
+ set res [r client trackinginfo]
+ set flags [dict get $res flags]
+ assert_equal {off} $flags
+ set redirect [dict get $res redirect]
+ assert_equal {-1} $redirect
+ set prefixes [dict get $res prefixes]
+ assert_equal {} $prefixes
+ }
+
+ test {CLIENT TRACKINGINFO provides reasonable results when tracking on} {
+ r CLIENT TRACKING on
+ set res [r client trackinginfo]
+ set flags [dict get $res flags]
+ assert_equal {on} $flags
+ set redirect [dict get $res redirect]
+ assert_equal {0} $redirect
+ set prefixes [dict get $res prefixes]
+ assert_equal {} $prefixes
+ }
+
+ test {CLIENT TRACKINGINFO provides reasonable results when tracking on with options} {
+ r CLIENT TRACKING on REDIRECT $redir_id noloop
+ set res [r client trackinginfo]
+ set flags [dict get $res flags]
+ assert_equal {on noloop} $flags
+ set redirect [dict get $res redirect]
+ assert_equal $redir_id $redirect
+ set prefixes [dict get $res prefixes]
+ assert_equal {} $prefixes
+ }
+
+ test {CLIENT TRACKINGINFO provides reasonable results when tracking optin} {
+ r CLIENT TRACKING off
+ r CLIENT TRACKING on optin
+ set res [r client trackinginfo]
+ set flags [dict get $res flags]
+ assert_equal {on optin} $flags
+ set redirect [dict get $res redirect]
+ assert_equal {0} $redirect
+ set prefixes [dict get $res prefixes]
+ assert_equal {} $prefixes
+
+ r CLIENT CACHING yes
+ set res [r client trackinginfo]
+ set flags [dict get $res flags]
+ assert_equal {on optin caching-yes} $flags
+ }
+
+ test {CLIENT TRACKINGINFO provides reasonable results when tracking optout} {
+ r CLIENT TRACKING off
+ r CLIENT TRACKING on optout
+ set res [r client trackinginfo]
+ set flags [dict get $res flags]
+ assert_equal {on optout} $flags
+ set redirect [dict get $res redirect]
+ assert_equal {0} $redirect
+ set prefixes [dict get $res prefixes]
+ assert_equal {} $prefixes
+
+ r CLIENT CACHING no
+ set res [r client trackinginfo]
+ set flags [dict get $res flags]
+ assert_equal {on optout caching-no} $flags
+ }
+
+ test {CLIENT TRACKINGINFO provides reasonable results when tracking bcast mode} {
+ r CLIENT TRACKING off
+ r CLIENT TRACKING on BCAST PREFIX foo PREFIX bar
+ set res [r client trackinginfo]
+ set flags [dict get $res flags]
+ assert_equal {on bcast} $flags
+ set redirect [dict get $res redirect]
+ assert_equal {0} $redirect
+ set prefixes [lsort [dict get $res prefixes]]
+ assert_equal {bar foo} $prefixes
+ }
+
$rd_redirection close
$rd close
}
diff --git a/tests/unit/type/list.tcl b/tests/unit/type/list.tcl
index 61ca23377..9be5dd93b 100644
--- a/tests/unit/type/list.tcl
+++ b/tests/unit/type/list.tcl
@@ -123,6 +123,17 @@ start_server {
test {R/LPOP against empty list} {
r lpop non-existing-list
} {}
+
+ test {R/LPOP with the optional count argument} {
+ assert_equal 7 [r lpush listcount aa bb cc dd ee ff gg]
+ assert_equal {} [r lpop listcount 0]
+ assert_equal {gg} [r lpop listcount 1]
+ assert_equal {ff ee} [r lpop listcount 2]
+ assert_equal {aa bb} [r rpop listcount 2]
+ assert_equal {cc} [r rpop listcount 1]
+ assert_equal {dd} [r rpop listcount 123]
+ assert_error "*ERR*range*" {r lpop forbarqaz -123}
+ }
test {Variadic RPUSH/LPUSH} {
r del mylist
@@ -947,6 +958,12 @@ start_server {
assert_equal {} [r lrange nosuchkey 0 1]
}
+ test {LRANGE with start > end yields an empty array for backward compatibility} {
+ create_list mylist "1 2 3"
+ assert_equal {} [r lrange mylist 1 0]
+ assert_equal {} [r lrange mylist -1 -2]
+ }
+
foreach {type large} [array get largevalue] {
proc trim_list {type min max} {
upvar 1 large large
diff --git a/tests/unit/type/stream-cgroups.tcl b/tests/unit/type/stream-cgroups.tcl
index 91dc2245e..f8de0741d 100644
--- a/tests/unit/type/stream-cgroups.tcl
+++ b/tests/unit/type/stream-cgroups.tcl
@@ -27,7 +27,7 @@ start_server {
# and not the element "foo bar" which was pre existing in the
# stream (see previous test)
set reply [
- r XREADGROUP GROUP mygroup client-1 STREAMS mystream ">"
+ r XREADGROUP GROUP mygroup consumer-1 STREAMS mystream ">"
]
assert {[llength [lindex $reply 0 1]] == 2}
lindex $reply 0 1 0 1
@@ -39,13 +39,13 @@ start_server {
r XADD mystream * d 4
# Read a few elements using a different consumer name
set reply [
- r XREADGROUP GROUP mygroup client-2 STREAMS mystream ">"
+ r XREADGROUP GROUP mygroup consumer-2 STREAMS mystream ">"
]
assert {[llength [lindex $reply 0 1]] == 2}
assert {[lindex $reply 0 1 0 1] eq {c 3}}
- set r1 [r XREADGROUP GROUP mygroup client-1 COUNT 10 STREAMS mystream 0]
- set r2 [r XREADGROUP GROUP mygroup client-2 COUNT 10 STREAMS mystream 0]
+ set r1 [r XREADGROUP GROUP mygroup consumer-1 COUNT 10 STREAMS mystream 0]
+ set r2 [r XREADGROUP GROUP mygroup consumer-2 COUNT 10 STREAMS mystream 0]
assert {[lindex $r1 0 1 0 1] eq {a 1}}
assert {[lindex $r2 0 1 0 1] eq {c 3}}
}
@@ -56,9 +56,9 @@ start_server {
for {set j 0} {$j < 4} {incr j} {
set item [lindex $pending $j]
if {$j < 2} {
- set owner client-1
+ set owner consumer-1
} else {
- set owner client-2
+ set owner consumer-2
}
assert {[lindex $item 1] eq $owner}
assert {[lindex $item 1] eq $owner}
@@ -66,7 +66,7 @@ start_server {
}
test {XPENDING can return single consumer items} {
- set pending [r XPENDING mystream mygroup - + 10 client-1]
+ set pending [r XPENDING mystream mygroup - + 10 consumer-1]
assert {[llength $pending] == 2}
}
@@ -77,9 +77,9 @@ start_server {
test {XPENDING with IDLE} {
after 20
- set pending [r XPENDING mystream mygroup IDLE 99999999 - + 10 client-1]
+ set pending [r XPENDING mystream mygroup IDLE 99999999 - + 10 consumer-1]
assert {[llength $pending] == 0}
- set pending [r XPENDING mystream mygroup IDLE 1 - + 10 client-1]
+ set pending [r XPENDING mystream mygroup IDLE 1 - + 10 consumer-1]
assert {[llength $pending] == 2}
set pending [r XPENDING mystream mygroup IDLE 99999999 - + 10]
assert {[llength $pending] == 0}
@@ -101,12 +101,12 @@ start_server {
}
}
- test {XACK is able to remove items from the client/group PEL} {
- set pending [r XPENDING mystream mygroup - + 10 client-1]
+ test {XACK is able to remove items from the consumer/group PEL} {
+ set pending [r XPENDING mystream mygroup - + 10 consumer-1]
set id1 [lindex $pending 0 0]
set id2 [lindex $pending 1 0]
assert {[r XACK mystream mygroup $id1] eq 1}
- set pending [r XPENDING mystream mygroup - + 10 client-1]
+ set pending [r XPENDING mystream mygroup - + 10 consumer-1]
assert {[llength $pending] == 1}
set id [lindex $pending 0 0]
assert {$id eq $id2}
@@ -242,52 +242,52 @@ start_server {
set id3 [r XADD mystream * c 3]
r XGROUP CREATE mystream mygroup 0
- # Client 1 reads item 1 from the stream without acknowledgements.
- # Client 2 then claims pending item 1 from the PEL of client 1
+ # Consumer 1 reads item 1 from the stream without acknowledgements.
+ # Consumer 2 then claims pending item 1 from the PEL of consumer 1
set reply [
- r XREADGROUP GROUP mygroup client1 count 1 STREAMS mystream >
+ r XREADGROUP GROUP mygroup consumer1 count 1 STREAMS mystream >
]
assert {[llength [lindex $reply 0 1 0 1]] == 2}
assert {[lindex $reply 0 1 0 1] eq {a 1}}
# make sure the entry is present in both the gorup, and the right consumer
assert {[llength [r XPENDING mystream mygroup - + 10]] == 1}
- assert {[llength [r XPENDING mystream mygroup - + 10 client1]] == 1}
- assert {[llength [r XPENDING mystream mygroup - + 10 client2]] == 0}
+ assert {[llength [r XPENDING mystream mygroup - + 10 consumer1]] == 1}
+ assert {[llength [r XPENDING mystream mygroup - + 10 consumer2]] == 0}
- r debug sleep 0.2
+ after 200
set reply [
- r XCLAIM mystream mygroup client2 10 $id1
+ r XCLAIM mystream mygroup consumer2 10 $id1
]
assert {[llength [lindex $reply 0 1]] == 2}
assert {[lindex $reply 0 1] eq {a 1}}
# make sure the entry is present in both the gorup, and the right consumer
assert {[llength [r XPENDING mystream mygroup - + 10]] == 1}
- assert {[llength [r XPENDING mystream mygroup - + 10 client1]] == 0}
- assert {[llength [r XPENDING mystream mygroup - + 10 client2]] == 1}
+ assert {[llength [r XPENDING mystream mygroup - + 10 consumer1]] == 0}
+ assert {[llength [r XPENDING mystream mygroup - + 10 consumer2]] == 1}
- # Client 1 reads another 2 items from stream
- r XREADGROUP GROUP mygroup client1 count 2 STREAMS mystream >
- r debug sleep 0.2
+ # Consumer 1 reads another 2 items from stream
+ r XREADGROUP GROUP mygroup consumer1 count 2 STREAMS mystream >
+ after 200
- # Delete item 2 from the stream. Now client 1 has PEL that contains
- # only item 3. Try to use client 2 to claim the deleted item 2
- # from the PEL of client 1, this should return nil
+ # Delete item 2 from the stream. Now consumer 1 has PEL that contains
+ # only item 3. Try to use consumer 2 to claim the deleted item 2
+ # from the PEL of consumer 1, this should return nil
r XDEL mystream $id2
set reply [
- r XCLAIM mystream mygroup client2 10 $id2
+ r XCLAIM mystream mygroup consumer2 10 $id2
]
assert {[llength $reply] == 1}
assert_equal "" [lindex $reply 0]
- # Delete item 3 from the stream. Now client 1 has PEL that is empty.
- # Try to use client 2 to claim the deleted item 3 from the PEL
- # of client 1, this should return nil
- r debug sleep 0.2
+ # Delete item 3 from the stream. Now consumer 1 has PEL that is empty.
+ # Try to use consumer 2 to claim the deleted item 3 from the PEL
+ # of consumer 1, this should return nil
+ after 200
r XDEL mystream $id3
set reply [
- r XCLAIM mystream mygroup client2 10 $id3
+ r XCLAIM mystream mygroup consumer2 10 $id3
]
assert {[llength $reply] == 1}
assert_equal "" [lindex $reply 0]
@@ -301,16 +301,16 @@ start_server {
set id3 [r XADD mystream * c 3]
r XGROUP CREATE mystream mygroup 0
- # Client 1 reads item 1 from the stream without acknowledgements.
- # Client 2 then claims pending item 1 from the PEL of client 1
+ # Consumer 1 reads item 1 from the stream without acknowledgements.
+ # Consumer 2 then claims pending item 1 from the PEL of consumer 1
set reply [
- r XREADGROUP GROUP mygroup client1 count 1 STREAMS mystream >
+ r XREADGROUP GROUP mygroup consumer1 count 1 STREAMS mystream >
]
assert {[llength [lindex $reply 0 1 0 1]] == 2}
assert {[lindex $reply 0 1 0 1] eq {a 1}}
- r debug sleep 0.2
+ after 200
set reply [
- r XCLAIM mystream mygroup client2 10 $id1
+ r XCLAIM mystream mygroup consumer2 10 $id1
]
assert {[llength [lindex $reply 0 1]] == 2}
assert {[lindex $reply 0 1] eq {a 1}}
@@ -321,10 +321,10 @@ start_server {
assert {[llength [lindex $reply 0]] == 4}
assert {[lindex $reply 0 3] == 2}
- # Client 3 then claims pending item 1 from the PEL of client 2 using JUSTID
- r debug sleep 0.2
+ # Consumer 3 then claims pending item 1 from the PEL of consumer 2 using JUSTID
+ after 200
set reply [
- r XCLAIM mystream mygroup client3 10 $id1 JUSTID
+ r XCLAIM mystream mygroup consumer3 10 $id1 JUSTID
]
assert {[llength $reply] == 1}
assert {[lindex $reply 0] eq $id1}
@@ -344,17 +344,122 @@ start_server {
set id3 [r XADD mystream * c 3]
r XGROUP CREATE mystream mygroup 0
- set reply [r XREADGROUP GROUP mygroup client1 count 1 STREAMS mystream >]
+ set reply [r XREADGROUP GROUP mygroup consumer1 count 1 STREAMS mystream >]
assert {[llength [lindex $reply 0 1 0 1]] == 2}
assert {[lindex $reply 0 1 0 1] eq {a 1}}
- r debug sleep 0.2
+ after 200
# re-claim with the same consumer that already has it
- assert {[llength [r XCLAIM mystream mygroup client1 10 $id1]] == 1}
+ assert {[llength [r XCLAIM mystream mygroup consumer1 10 $id1]] == 1}
# make sure the entry is still in the PEL
set reply [r XPENDING mystream mygroup - + 10]
assert {[llength $reply] == 1}
- assert {[lindex $reply 0 1] eq {client1}}
+ assert {[lindex $reply 0 1] eq {consumer1}}
+ }
+
+ test {XAUTOCLAIM can claim PEL items from another consumer} {
+ # Add 3 items into the stream, and create a consumer group
+ r del mystream
+ set id1 [r XADD mystream * a 1]
+ set id2 [r XADD mystream * b 2]
+ set id3 [r XADD mystream * c 3]
+ r XGROUP CREATE mystream mygroup 0
+
+ # Consumer 1 reads item 1 from the stream without acknowledgements.
+ # Consumer 2 then claims pending item 1 from the PEL of consumer 1
+ set reply [r XREADGROUP GROUP mygroup consumer1 count 1 STREAMS mystream >]
+ assert_equal [llength [lindex $reply 0 1 0 1]] 2
+ assert_equal [lindex $reply 0 1 0 1] {a 1}
+ after 200
+ set reply [r XAUTOCLAIM mystream mygroup consumer2 10 - COUNT 1]
+ assert_equal [llength $reply] 2
+ assert_equal [lindex $reply 0] $id1
+ assert_equal [llength [lindex $reply 1]] 1
+ assert_equal [llength [lindex $reply 1 0]] 2
+ assert_equal [llength [lindex $reply 1 0 1]] 2
+ assert_equal [lindex $reply 1 0 1] {a 1}
+
+ # Consumer 1 reads another 2 items from stream
+ r XREADGROUP GROUP mygroup consumer1 count 2 STREAMS mystream >
+
+ # For min-idle-time
+ after 200
+
+ # Delete item 2 from the stream. Now consumer 1 has PEL that contains
+ # only item 3. Try to use consumer 2 to claim the deleted item 2
+ # from the PEL of consumer 1, this should return nil
+ r XDEL mystream $id2
+ set reply [r XAUTOCLAIM mystream mygroup consumer2 10 - COUNT 2]
+ # id1 is self-claimed here but not id2 ('count' was set to 2)
+ assert_equal [llength $reply] 2
+ assert_equal [lindex $reply 0] $id2
+ assert_equal [llength [lindex $reply 1]] 2
+ assert_equal [llength [lindex $reply 1 0]] 2
+ assert_equal [llength [lindex $reply 1 0 1]] 2
+ assert_equal [lindex $reply 1 0 1] {a 1}
+ assert_equal [lindex $reply 1 1] ""
+
+ # Delete item 3 from the stream. Now consumer 1 has PEL that is empty.
+ # Try to use consumer 2 to claim the deleted item 3 from the PEL
+ # of consumer 1, this should return nil
+ after 200
+ r XDEL mystream $id3
+ set reply [r XAUTOCLAIM mystream mygroup consumer2 10 - JUSTID]
+ # id1 is self-claimed here but not id2 and id3 ('count' is default 100)
+
+ # we also test the JUSTID modifier here. note that, when using JUSTID,
+ # deleted entries are returned in reply (consistent with XCLAIM).
+
+ assert_equal [llength $reply] 2
+ assert_equal [lindex $reply 0] "0-0"
+ assert_equal [llength [lindex $reply 1]] 3
+ assert_equal [lindex $reply 1 0] $id1
+ assert_equal [lindex $reply 1 1] $id2
+ assert_equal [lindex $reply 1 2] $id3
+ }
+
+ test {XAUTOCLAIM as an iterator} {
+ # Add 5 items into the stream, and create a consumer group
+ r del mystream
+ set id1 [r XADD mystream * a 1]
+ set id2 [r XADD mystream * b 2]
+ set id3 [r XADD mystream * c 3]
+ set id4 [r XADD mystream * d 4]
+ set id5 [r XADD mystream * e 5]
+ r XGROUP CREATE mystream mygroup 0
+
+ # Read 5 messages into consumer1
+ r XREADGROUP GROUP mygroup consumer1 count 90 STREAMS mystream >
+
+ # For min-idle-time
+ after 200
+
+ # Claim 2 entries
+ set reply [r XAUTOCLAIM mystream mygroup consumer2 10 - COUNT 2]
+ assert_equal [llength $reply] 2
+ set cursor [lindex $reply 0]
+ assert_equal $cursor $id2
+ assert_equal [llength [lindex $reply 1]] 2
+ assert_equal [llength [lindex $reply 1 0 1]] 2
+ assert_equal [lindex $reply 1 0 1] {a 1}
+
+ # Claim 2 more entries
+ set reply [r XAUTOCLAIM mystream mygroup consumer2 10 ($cursor COUNT 2]
+ assert_equal [llength $reply] 2
+ set cursor [lindex $reply 0]
+ assert_equal $cursor $id4
+ assert_equal [llength [lindex $reply 1]] 2
+ assert_equal [llength [lindex $reply 1 0 1]] 2
+ assert_equal [lindex $reply 1 0 1] {c 3}
+
+ # Claim last entry
+ set reply [r XAUTOCLAIM mystream mygroup consumer2 10 ($cursor COUNT 2]
+ assert_equal [llength $reply] 2
+ set cursor [lindex $reply 0]
+ assert_equal $cursor {0-0}
+ assert_equal [llength [lindex $reply 1]] 1
+ assert_equal [llength [lindex $reply 1 0 1]] 2
+ assert_equal [lindex $reply 1 0 1] {e 5}
}
test {XINFO FULL output} {
@@ -477,7 +582,7 @@ start_server {
assert {$curr_grpinfo == $grpinfo}
set n_consumers [lindex $grpinfo 3]
- # Bob should be created only when there will be new data for this client
+ # Bob should be created only when there will be new data for this consumer
assert_equal $n_consumers 2
set reply [r xinfo consumers mystream mygroup]
set consumer_info [lindex $reply 0]
diff --git a/tests/unit/type/stream.tcl b/tests/unit/type/stream.tcl
index 63cc697c2..a89a65299 100644
--- a/tests/unit/type/stream.tcl
+++ b/tests/unit/type/stream.tcl
@@ -94,6 +94,7 @@ start_server {
r XADD mystream MAXLEN 5 * yitem $j
}
}
+ assert {[r xlen mystream] == 5}
set res [r xrange mystream - +]
set expected 995
foreach r $res {
@@ -138,6 +139,39 @@ start_server {
assert_equal [lindex $items 1 1] {item 2 value b}
}
+ test {XADD with MINID option} {
+ r DEL mystream
+ for {set j 1} {$j < 1001} {incr j} {
+ set minid 1000
+ if {$j >= 5} {
+ set minid [expr {$j-5}]
+ }
+ if {rand() < 0.9} {
+ r XADD mystream MINID $minid $j xitem $j
+ } else {
+ r XADD mystream MINID $minid $j yitem $j
+ }
+ }
+ assert {[r xlen mystream] == 6}
+ set res [r xrange mystream - +]
+ set expected 995
+ foreach r $res {
+ assert {[lindex $r 1 1] == $expected}
+ incr expected
+ }
+ }
+
+ test {XTRIM with MINID option} {
+ r DEL mystream
+ r XADD mystream 1-0 f v
+ r XADD mystream 2-0 f v
+ r XADD mystream 3-0 f v
+ r XADD mystream 4-0 f v
+ r XADD mystream 5-0 f v
+ r XTRIM mystream MINID = 3-0
+ assert_equal [r XRANGE mystream - +] {{3-0 {f v}} {4-0 {f v}} {5-0 {f v}}}
+ }
+
test {XADD mass insertion and XLEN} {
r DEL mystream
r multi
@@ -448,7 +482,49 @@ start_server {
assert {[r XLEN mystream] == 400}
}
-
+ test {XADD with LIMIT consecutive calls} {
+ r del mystream
+ r config set stream-node-max-entries 10
+ for {set j 0} {$j < 100} {incr j} {
+ r XADD mystream * xitem v
+ }
+ r XADD mystream MAXLEN ~ 55 LIMIT 30 * xitem v
+ assert {[r xlen mystream] == 71}
+ r XADD mystream MAXLEN ~ 55 LIMIT 30 * xitem v
+ assert {[r xlen mystream] == 62}
+ r config set stream-node-max-entries 100
+ }
+
+ test {XTRIM with ~ is limited} {
+ r del mystream
+ r config set stream-node-max-entries 1
+ for {set j 0} {$j < 102} {incr j} {
+ r XADD mystream * xitem v
+ }
+ r XTRIM mystream MAXLEN ~ 1
+ assert {[r xlen mystream] == 2}
+ r config set stream-node-max-entries 100
+ }
+
+ test {XTRIM without ~ is not limited} {
+ r del mystream
+ r config set stream-node-max-entries 1
+ for {set j 0} {$j < 102} {incr j} {
+ r XADD mystream * xitem v
+ }
+ r XTRIM mystream MAXLEN 1
+ assert {[r xlen mystream] == 1}
+ r config set stream-node-max-entries 100
+ }
+
+ test {XTRIM without ~ and with LIMIT} {
+ r del mystream
+ r config set stream-node-max-entries 1
+ for {set j 0} {$j < 102} {incr j} {
+ r XADD mystream * xitem v
+ }
+ assert_error ERR* {r XTRIM mystream MAXLEN 1 LIMIT 30}
+ }
}
start_server {tags {"stream"} overrides {appendonly yes}} {
@@ -467,6 +543,22 @@ start_server {tags {"stream"} overrides {appendonly yes}} {
}
start_server {tags {"stream"} overrides {appendonly yes}} {
+ test {XADD with MINID > lastid can propagate correctly} {
+ for {set j 0} {$j < 100} {incr j} {
+ set id [expr {$j+1}]
+ r XADD mystream $id xitem v
+ }
+ r XADD mystream MINID 1 * xitem v
+ incr j
+ assert {[r xlen mystream] == $j}
+ r debug loadaof
+ r XADD mystream * xitem v
+ incr j
+ assert {[r xlen mystream] == $j}
+ }
+}
+
+start_server {tags {"stream"} overrides {appendonly yes}} {
test {XADD with ~ MAXLEN can propagate correctly} {
for {set j 0} {$j < 100} {incr j} {
r XADD mystream * xitem v
@@ -483,6 +575,52 @@ start_server {tags {"stream"} overrides {appendonly yes}} {
}
start_server {tags {"stream"} overrides {appendonly yes stream-node-max-entries 10}} {
+ test {XADD with ~ MAXLEN and LIMIT can propagate correctly} {
+ for {set j 0} {$j < 100} {incr j} {
+ r XADD mystream * xitem v
+ }
+ r XADD mystream MAXLEN ~ 55 LIMIT 30 * xitem v
+ assert {[r xlen mystream] == 71}
+ r config set stream-node-max-entries 1
+ r debug loadaof
+ r XADD mystream * xitem v
+ assert {[r xlen mystream] == 72}
+ }
+}
+
+start_server {tags {"stream"} overrides {appendonly yes}} {
+ test {XADD with ~ MINID can propagate correctly} {
+ for {set j 0} {$j < 100} {incr j} {
+ set id [expr {$j+1}]
+ r XADD mystream $id xitem v
+ }
+ r XADD mystream MINID ~ $j * xitem v
+ incr j
+ assert {[r xlen mystream] == $j}
+ r config set stream-node-max-entries 1
+ r debug loadaof
+ r XADD mystream * xitem v
+ incr j
+ assert {[r xlen mystream] == $j}
+ }
+}
+
+start_server {tags {"stream"} overrides {appendonly yes stream-node-max-entries 10}} {
+ test {XADD with ~ MINID and LIMIT can propagate correctly} {
+ for {set j 0} {$j < 100} {incr j} {
+ set id [expr {$j+1}]
+ r XADD mystream $id xitem v
+ }
+ r XADD mystream MINID ~ 55 LIMIT 30 * xitem v
+ assert {[r xlen mystream] == 71}
+ r config set stream-node-max-entries 1
+ r debug loadaof
+ r XADD mystream * xitem v
+ assert {[r xlen mystream] == 72}
+ }
+}
+
+start_server {tags {"stream"} overrides {appendonly yes stream-node-max-entries 10}} {
test {XTRIM with ~ MAXLEN can propagate correctly} {
for {set j 0} {$j < 100} {incr j} {
r XADD mystream * xitem v
diff --git a/tests/unit/type/zset.tcl b/tests/unit/type/zset.tcl
index c1406797b..8318ebb63 100644
--- a/tests/unit/type/zset.tcl
+++ b/tests/unit/type/zset.tcl
@@ -1472,4 +1472,86 @@ start_server {tags {"zset"}} {
}
r config set zset-max-ziplist-entries $original_max
}
+
+ test {ZRANGESTORE basic} {
+ r flushall
+ r zadd z1 1 a 2 b 3 c 4 d
+ set res [r zrangestore z2 z1 0 -1]
+ assert_equal $res 4
+ r zrange z2 0 -1 withscores
+ } {a 1 b 2 c 3 d 4}
+
+ test {ZRANGESTORE range} {
+ set res [r zrangestore z2 z1 1 2]
+ assert_equal $res 2
+ r zrange z2 0 -1 withscores
+ } {b 2 c 3}
+
+ test {ZRANGESTORE BYLEX} {
+ set res [r zrangestore z2 z1 \[b \[c BYLEX]
+ assert_equal $res 2
+ r zrange z2 0 -1 withscores
+ } {b 2 c 3}
+
+ test {ZRANGESTORE BYSCORE} {
+ set res [r zrangestore z2 z1 1 2 BYSCORE]
+ assert_equal $res 2
+ r zrange z2 0 -1 withscores
+ } {a 1 b 2}
+
+ test {ZRANGESTORE BYSCORE LIMIT} {
+ set res [r zrangestore z2 z1 0 5 BYSCORE LIMIT 0 2]
+ assert_equal $res 2
+ r zrange z2 0 -1 withscores
+ } {a 1 b 2}
+
+ test {ZRANGESTORE BYSCORE REV LIMIT} {
+ set res [r zrangestore z2 z1 5 0 BYSCORE REV LIMIT 0 2]
+ assert_equal $res 2
+ r zrange z2 0 -1 withscores
+ } {c 3 d 4}
+
+ test {ZRANGE BYSCORE REV LIMIT} {
+ r zrange z1 5 0 BYSCORE REV LIMIT 0 2 WITHSCORES
+ } {d 4 c 3}
+
+ test {ZRANGESTORE - empty range} {
+ set res [r zrangestore z2 z1 5 6]
+ assert_equal $res 0
+ r exists z2
+ } {0}
+
+ test {ZRANGESTORE BYLEX - empty range} {
+ set res [r zrangestore z2 z1 \[f \[g BYLEX]
+ assert_equal $res 0
+ r exists z2
+ } {0}
+
+ test {ZRANGESTORE BYSCORE - empty range} {
+ set res [r zrangestore z2 z1 5 6 BYSCORE]
+ assert_equal $res 0
+ r exists z2
+ } {0}
+
+ test {ZRANGE BYLEX} {
+ r zrange z1 \[b \[c BYLEX
+ } {b c}
+
+ test {ZRANGESTORE invalid syntax} {
+ catch {r zrangestore z2 z1 0 -1 limit 1 2} err
+ assert_match "*syntax*" $err
+ catch {r zrangestore z2 z1 0 -1 WITHSCORES} err
+ assert_match "*syntax*" $err
+ }
+
+ test {ZRANGE invalid syntax} {
+ catch {r zrange z1 0 -1 limit 1 2} err
+ assert_match "*syntax*" $err
+ catch {r zrange z1 0 -1 BYLEX WITHSCORES} err
+ assert_match "*syntax*" $err
+ catch {r zrevrange z1 0 -1 BYSCORE} err
+ assert_match "*syntax*" $err
+ catch {r zrangebyscore z1 0 -1 REV} err
+ assert_match "*syntax*" $err
+ }
}
diff --git a/tests/unit/wait.tcl b/tests/unit/wait.tcl
index b1500cff8..0a4965c20 100644
--- a/tests/unit/wait.tcl
+++ b/tests/unit/wait.tcl
@@ -5,6 +5,7 @@ start_server {} {
set slave [srv 0 client]
set slave_host [srv 0 host]
set slave_port [srv 0 port]
+ set slave_pid [srv 0 pid]
set master [srv -1 client]
set master_host [srv -1 host]
set master_port [srv -1 port]
@@ -33,13 +34,25 @@ start_server {} {
}
test {WAIT should not acknowledge 1 additional copy if slave is blocked} {
- set cmd [rediscli $slave_host $slave_port "debug sleep 5"]
- exec {*}$cmd > /dev/null 2> /dev/null &
- after 1000 ;# Give redis-cli the time to execute the command.
+ exec kill -SIGSTOP $slave_pid
$master set foo 0
$master incr foo
$master incr foo
$master incr foo
- assert {[$master wait 1 3000] == 0}
+ assert {[$master wait 1 1000] == 0}
+ exec kill -SIGCONT $slave_pid
+ assert {[$master wait 1 1000] == 1}
+ }
+
+ test {WAIT implicitly blocks on client pause since ACKs aren't sent} {
+ exec kill -SIGSTOP $slave_pid
+ $master multi
+ $master incr foo
+ $master client pause 10000 write
+ $master exec
+ assert {[$master wait 1 1000] == 0}
+ $master client unpause
+ exec kill -SIGCONT $slave_pid
+ assert {[$master wait 1 1000] == 1}
}
}}