summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorOran Agra <oran@redislabs.com>2021-02-01 20:11:42 +0200
committerGitHub <noreply@github.com>2021-02-01 20:11:42 +0200
commit2dba1e391d3772a8da182d95bde050ffa9d01e4d (patch)
tree3664bcd3ede605643a18668624f41c846b5e43ab
parentec2d180739aa3877a45ec54438c68a7659be5159 (diff)
parent95338f9cc41fdfd050f122789187db75fda1fe3c (diff)
downloadredis-6.2-rc3.tar.gz
Merge 6.2 RC36.2-rc3
-rw-r--r--.github/workflows/ci.yml11
-rw-r--r--.github/workflows/daily.yml34
-rw-r--r--00-RELEASENOTES68
-rw-r--r--redis.conf62
-rwxr-xr-xruntest-moduleapi2
-rw-r--r--sentinel.conf18
-rw-r--r--src/acl.c10
-rw-r--r--src/ae.c1
-rw-r--r--src/ae_epoll.c1
-rw-r--r--src/ae_evport.c1
-rw-r--r--src/ae_kqueue.c1
-rw-r--r--src/anet.c43
-rw-r--r--src/anet.h4
-rw-r--r--src/aof.c96
-rw-r--r--src/blocked.c43
-rw-r--r--src/cluster.c94
-rw-r--r--src/cluster.h1
-rw-r--r--src/config.c112
-rw-r--r--src/db.c22
-rw-r--r--src/defrag.c2
-rw-r--r--src/expire.c6
-rw-r--r--src/help.h28
-rw-r--r--src/lazyfree.c18
-rw-r--r--src/module.c1340
-rw-r--r--src/modules/gendoc.rb23
-rw-r--r--src/networking.c35
-rw-r--r--src/redis-cli.c8
-rw-r--r--src/redismodule.h53
-rw-r--r--src/replication.c393
-rw-r--r--src/scripting.c32
-rw-r--r--src/sds.c92
-rw-r--r--src/sds.h8
-rw-r--r--src/sentinel.c608
-rw-r--r--src/server.c292
-rw-r--r--src/server.h64
-rw-r--r--src/stream.h7
-rw-r--r--src/t_hash.c253
-rw-r--r--src/t_set.c36
-rw-r--r--src/t_stream.c29
-rw-r--r--src/t_string.c340
-rw-r--r--src/t_zset.c299
-rw-r--r--src/util.c4
-rw-r--r--src/util.h2
-rw-r--r--src/version.h4
-rw-r--r--src/ziplist.c83
-rw-r--r--src/ziplist.h11
-rw-r--r--tests/cluster/tests/18-cluster-nodes-slots.tcl62
-rw-r--r--tests/instances.tcl46
-rw-r--r--tests/integration/aof.tcl11
-rw-r--r--tests/integration/corrupt-dump.tcl11
-rw-r--r--tests/integration/failover.tcl290
-rw-r--r--tests/integration/rdb.tcl4
-rw-r--r--tests/integration/redis-benchmark.tcl2
-rw-r--r--tests/integration/replication-4.tcl2
-rw-r--r--tests/integration/replication.tcl2
-rw-r--r--tests/modules/Makefile4
-rw-r--r--tests/modules/blockonbackground.c220
-rw-r--r--tests/modules/blockonkeys.c167
-rw-r--r--tests/modules/stream.c258
-rw-r--r--tests/sentinel/run.tcl2
-rw-r--r--tests/sentinel/tests/00-base.tcl2
-rw-r--r--tests/sentinel/tests/08-hostname-conf.tcl67
-rw-r--r--tests/sentinel/tests/09-acl-support.tcl50
-rw-r--r--tests/sentinel/tests/includes/init-tests.tcl8
-rwxr-xr-xtests/sentinel/tests/includes/notify.sh21
-rw-r--r--tests/sentinel/tests/includes/sentinel.conf11
-rw-r--r--tests/sentinel/tests/includes/start-init-tests.tcl18
-rw-r--r--tests/support/redis.tcl1
-rw-r--r--tests/support/server.tcl56
-rw-r--r--tests/support/util.tcl20
-rw-r--r--tests/test_helper.tcl3
-rw-r--r--tests/unit/dump.tcl6
-rw-r--r--tests/unit/expire.tcl128
-rw-r--r--tests/unit/introspection.tcl1
-rw-r--r--tests/unit/limits.tcl2
-rw-r--r--tests/unit/moduleapi/blockonbackground.tcl67
-rw-r--r--tests/unit/moduleapi/blockonkeys.tcl53
-rw-r--r--tests/unit/moduleapi/stream.tcl155
-rw-r--r--tests/unit/oom-score-adj.tcl2
-rw-r--r--tests/unit/other.tcl44
-rw-r--r--tests/unit/pause.tcl2
-rw-r--r--tests/unit/protocol.tcl2
-rw-r--r--tests/unit/pubsub.tcl2
-rw-r--r--tests/unit/scan.tcl2
-rw-r--r--tests/unit/scripting.tcl9
-rw-r--r--tests/unit/tracking.tcl2
-rw-r--r--tests/unit/type/hash.tcl175
-rw-r--r--tests/unit/type/set.tcl2
-rw-r--r--tests/unit/type/string.tcl96
-rw-r--r--tests/unit/type/zset.tcl188
-rw-r--r--tests/unit/wait.tcl2
91 files changed, 6051 insertions, 921 deletions
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 2582c53a4..2e1e7865c 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -58,3 +58,14 @@ jobs:
run: |
yum -y install gcc make
make REDIS_CFLAGS='-Werror'
+
+ build-freebsd:
+ runs-on: macos-latest
+ steps:
+ - uses: actions/checkout@v2
+ - name: make
+ uses: vmactions/freebsd-vm@v0.1.0
+ with:
+ usesh: true
+ prepare: pkg install -y gmake
+ run: gmake
diff --git a/.github/workflows/daily.yml b/.github/workflows/daily.yml
index 028c44e0c..8fb23bac4 100644
--- a/.github/workflows/daily.yml
+++ b/.github/workflows/daily.yml
@@ -99,6 +99,23 @@ jobs:
./runtest-cluster --tls
./runtest-cluster
+ test-ubuntu-io-threads:
+ runs-on: ubuntu-latest
+ if: github.repository == 'redis/redis'
+ timeout-minutes: 14400
+ steps:
+ - uses: actions/checkout@v2
+ - name: make
+ run: |
+ make
+ - name: test
+ run: |
+ sudo apt-get install tcl8.5 tcl-tls
+ ./runtest --config io-threads 4 --config io-threads-do-reads yes --accurate --verbose --tags network
+ - name: cluster tests
+ run: |
+ ./runtest-cluster --config io-threads 4 --config io-threads-do-reads yes
+
test-valgrind:
runs-on: ubuntu-latest
if: github.repository == 'redis/redis'
@@ -186,3 +203,20 @@ jobs:
- name: cluster tests
run: ./runtest-cluster
+ test-freebsd:
+ runs-on: macos-latest
+ if: github.repository == 'redis/redis'
+ timeout-minutes: 14400
+ steps:
+ - uses: actions/checkout@v2
+ - name: test
+ uses: vmactions/freebsd-vm@v0.1.0
+ with:
+ usesh: true
+ prepare: pkg install -y gmake lang/tcl85
+ run: |
+ gmake
+ ./runtest --accurate --verbose --no-latency
+ MAKE=gmake ./runtest-moduleapi --verbose
+ ./runtest-sentinel
+ ./runtest-cluster
diff --git a/00-RELEASENOTES b/00-RELEASENOTES
index 3434d4838..f52a22ed9 100644
--- a/00-RELEASENOTES
+++ b/00-RELEASENOTES
@@ -1,3 +1,53 @@
+Redis 6.2 RC3 Released Tue Feb 1 14:00:00 IST 2021
+================================================================================
+
+Upgrade urgency LOW: This is the third Release Candidate of Redis 6.2.
+
+Here is a comprehensive list of changes in this release compared to 6.2 RC2,
+each one includes the PR number that added it, so you can get more details
+at https://github.com/redis/redis/pull/<number>
+
+New commands / args:
+* Add HRANDFIELD and ZRANDMEMBER commands (#8297)
+* Add FAILOVER command (#8315)
+* Add GETEX, GETDEL commands (#8327)
+* Add PXAT/EXAT arguments to SET command (#8327)
+* Add SYNC arg to FLUSHALL and FLUSHDB, and ASYNC/SYNC arg to SCRIPT FLUSH (#8258)
+
+Sentinel:
+* Add hostname support to Sentinel (#8282)
+* Prevent file descriptors from leaking into Sentinel scripts (#8242)
+* Fix config file line order dependency and config rewrite sequence (#8271)
+
+New configuration options:
+* Add set-proc-title config option to disable changes to the process title (#3623)
+* Add proc-title-template option to control what's shown in the process title (#8397)
+* Add lazyfree-lazy-user-flush config option to control FLUSHALL, FLUSHDB and SCRIPT FLUSH (#8258)
+
+Bug fixes:
+* AOF: recover from last write error by turning on/off appendonly config (#8030)
+* Exit on fsync error when the AOF fsync policy is 'always' (#8347)
+* Avoid assertions (on older kernels) when testing arm64 CoW bug (#8405)
+* CONFIG REWRITE should honor umask settings (#8371)
+* Fix firstkey,lastkey,step in COMMAND command for some commands (#8367)
+
+Special considerations:
+* Fix misleading description of the save configuration directive (#8337)
+
+Improvements:
+* A way to get RDB file via replication without excessive replication buffers (#8303)
+* Optimize performance of clusterGenNodesDescription for large clusters (#8182)
+
+Info fields and introspection changes:
+* SLOWLOG and LATENCY monitor include unblocking time of blocked commands (#7491)
+
+Modules:
+* Add modules API for streams (#8288)
+* Add event for fork child birth and termination (#8289)
+* Add RM_BlockedClientMeasureTime* etc, to track background processing in commandstats (#7491)
+* Fix bug in v6.2, wrong value passed to the new unlink callback (#8381)
+* Fix bug in v6.2, modules blocked on keys unblock on commands like LPUSH (#8356)
+
================================================================================
Redis 6.2 RC2 Released Tue Jan 12 16:17:20 IST 2021
================================================================================
@@ -255,35 +305,39 @@ and we don't get reports of serious issues for a while.
A special thank you for the amount of work put into this release by:
- Oran Agra
- Yossi Gottlieb
-- Itamar Haber
-- Guy Benoish
- Filipe Oliveira
+- Viktor Söderqvist
+- Guy Benoish
+- Itamar Haber
+- Yang Bodong
- Madelyn Olson
- Wang Yuan
- Felipe Machado
-- Yang Bodong
+- Wen Hui
- Tatsuya Arisawa
- Jonah H. Harris
+- Raghav Muddur
- Jim Brunner
- Yaacov Hazan
-- Wen Hui
+- Allen Farris
- Chen Yang
- Nitai Caro
- Meir Shpilraien
- maohuazhu
- Valentino Geron
-- Qu Chen
+- Zhao Zhao
- sundb
+- Qu Chen
- George Prekas
-- Zhao Zhao
- Tyson Andre
- Michael Grunder
- alexronke-channeladvisor
+- Andy Pan
- Wu Yunlong
- Wei Kukey
- Yoav Steinberg
-- Uri Shachar
- Greg Femec
+- Uri Shachar
- Nykolas Laurentino de Lima
- xhe
- zhenwei pi
diff --git a/redis.conf b/redis.conf
index a5062fda9..465d56fc0 100644
--- a/redis.conf
+++ b/redis.conf
@@ -325,31 +325,52 @@ databases 16
# ASCII art logo in startup logs by setting the following option to yes.
always-show-logo no
+# By default, Redis modifies the process title (as seen in 'top' and 'ps') to
+# provide some runtime information. It is possible to disable this and leave
+# the process name as executed by setting the following to no.
+set-proc-title yes
+
+# When changing the process title, Redis uses the following template to construct
+# the modified title.
+#
+# Template variables are specified in curly brackets. The following variables are
+# supported:
+#
+# {title} Name of process as executed if parent, or type of child process.
+# {listen-addr} Bind address or '*' followed by TCP or TLS port listening on, or
+# Unix socket if only that's available.
+# {server-mode} Special mode, i.e. "[sentinel]" or "[cluster]".
+# {port} TCP port listening on, or 0.
+# {tls-port} TLS port listening on, or 0.
+# {unixsocket} Unix domain socket listening on, or "".
+# {config-file} Name of configuration file used.
+#
+proc-title-template "{title} {listen-addr} {server-mode}"
+
################################ SNAPSHOTTING ################################
+
+# Save the DB to disk.
#
-# Save the DB on disk:
+# save <seconds> <changes>
#
-# save <seconds> <changes>
+# Redis will save the DB if both the given number of seconds and the given
+# number of write operations against the DB occurred.
#
-# Will save the DB if both the given number of seconds and the given
-# number of write operations against the DB occurred.
+# Snapshotting can be completely disabled with a single empty string argument
+# as in following example:
#
-# In the example below the behavior will be to save:
-# after 900 sec (15 min) if at least 1 key changed
-# after 300 sec (5 min) if at least 10 keys changed
-# after 60 sec if at least 10000 keys changed
+# save ""
#
-# Note: you can disable saving completely by commenting out all "save" lines.
+# Unless specified otherwise, by default Redis will save the DB:
+# * After 3600 seconds (an hour) if at least 1 key changed
+# * After 300 seconds (5 minutes) if at least 100 keys changed
+# * After 60 seconds if at least 10000 keys changed
#
-# It is also possible to remove all the previously configured save
-# points by adding a save directive with a single empty string argument
-# like in the following example:
+# You can set these explicitly by uncommenting the three following lines.
#
-# save ""
-
-save 900 1
-save 300 10
-save 60 10000
+# save 3600 1
+# save 300 100
+# save 60 10000
# By default Redis will stop accepting writes if RDB snapshots are enabled
# (at least one save point) and the latest background save failed.
@@ -1089,6 +1110,13 @@ replica-lazy-flush no
lazyfree-lazy-user-del no
+# FLUSHDB, FLUSHALL, and SCRIPT FLUSH support both asynchronous and synchronous
+# deletion, which can be controlled by passing the [SYNC|ASYNC] flags into the
+# commands. When neither flag is passed, this directive will be used to determine
+# if the data should be deleted asynchronously.
+
+lazyfree-lazy-user-flush no
+
################################ THREADED I/O #################################
# Redis is mostly single threaded, however there are certain threaded
diff --git a/runtest-moduleapi b/runtest-moduleapi
index 9a48867d2..e554226c1 100755
--- a/runtest-moduleapi
+++ b/runtest-moduleapi
@@ -23,6 +23,7 @@ $TCLSH tests/test_helper.tcl \
--single unit/moduleapi/hooks \
--single unit/moduleapi/misc \
--single unit/moduleapi/blockonkeys \
+--single unit/moduleapi/blockonbackground \
--single unit/moduleapi/scan \
--single unit/moduleapi/datatype \
--single unit/moduleapi/auth \
@@ -31,4 +32,5 @@ $TCLSH tests/test_helper.tcl \
--single unit/moduleapi/getkeys \
--single unit/moduleapi/test_lazyfree \
--single unit/moduleapi/defrag \
+--single unit/moduleapi/stream \
"${@}"
diff --git a/sentinel.conf b/sentinel.conf
index 39d6929e7..8647379d8 100644
--- a/sentinel.conf
+++ b/sentinel.conf
@@ -321,3 +321,21 @@ sentinel deny-scripts-reconfig yes
# is possible to just rename a command to itself:
#
# SENTINEL rename-command mymaster CONFIG CONFIG
+
+# HOSTNAMES SUPPORT
+#
+# Normally Sentinel uses only IP addresses and requires SENTINEL MONITOR
+# to specify an IP address. Also, it requires the Redis replica-announce-ip
+# keyword to specify only IP addresses.
+#
+# You may enable hostnames support by enabling resolve-hostnames. Note
+# that you must make sure your DNS is configured properly and that DNS
+# resolution does not introduce very long delays.
+#
+SENTINEL resolve-hostnames no
+
+# When resolve-hostnames is enabled, Sentinel still uses IP addresses
+# when exposing instances to users, configuration files, etc. If you want
+# to retain the hostnames when announced, enable announce-hostnames below.
+#
+SENTINEL announce-hostnames no
diff --git a/src/acl.c b/src/acl.c
index 14d023cc3..1f07c292f 100644
--- a/src/acl.c
+++ b/src/acl.c
@@ -1024,8 +1024,8 @@ int ACLSetUser(user *u, const char *op, ssize_t oplen) {
/* Return a description of the error that occurred in ACLSetUser() according to
* the errno value set by the function on error. */
-char *ACLSetUserStringError(void) {
- char *errmsg = "Wrong format";
+const char *ACLSetUserStringError(void) {
+ const char *errmsg = "Wrong format";
if (errno == ENOENT)
errmsg = "Unknown command or category name in ACL";
else if (errno == EINVAL)
@@ -1454,7 +1454,7 @@ int ACLLoadConfiguredUsers(void) {
/* Load every rule defined for this user. */
for (int j = 1; aclrules[j]; j++) {
if (ACLSetUser(u,aclrules[j],sdslen(aclrules[j])) != C_OK) {
- char *errmsg = ACLSetUserStringError();
+ const char *errmsg = ACLSetUserStringError();
serverLog(LL_WARNING,"Error loading ACL rule '%s' for "
"the user named '%s': %s",
aclrules[j],aclrules[0],errmsg);
@@ -1587,7 +1587,7 @@ sds ACLLoadFromFile(const char *filename) {
for (j = 2; j < argc; j++) {
argv[j] = sdstrim(argv[j],"\t\r\n");
if (ACLSetUser(fakeuser,argv[j],sdslen(argv[j])) != C_OK) {
- char *errmsg = ACLSetUserStringError();
+ const char *errmsg = ACLSetUserStringError();
errors = sdscatprintf(errors,
"%s:%d: %s. ",
server.acl_filename, linenum, errmsg);
@@ -1908,7 +1908,7 @@ void aclCommand(client *c) {
for (int j = 3; j < c->argc; j++) {
if (ACLSetUser(tempu,c->argv[j]->ptr,sdslen(c->argv[j]->ptr)) != C_OK) {
- char *errmsg = ACLSetUserStringError();
+ const char *errmsg = ACLSetUserStringError();
addReplyErrorFormat(c,
"Error in ACL SETUSER modifier '%s': %s",
(char*)c->argv[j]->ptr, errmsg);
diff --git a/src/ae.c b/src/ae.c
index 1c3a4e091..283f51438 100644
--- a/src/ae.c
+++ b/src/ae.c
@@ -31,6 +31,7 @@
*/
#include "ae.h"
+#include "anet.h"
#include <stdio.h>
#include <sys/time.h>
diff --git a/src/ae_epoll.c b/src/ae_epoll.c
index fa197297e..07ca8ca41 100644
--- a/src/ae_epoll.c
+++ b/src/ae_epoll.c
@@ -51,6 +51,7 @@ static int aeApiCreate(aeEventLoop *eventLoop) {
zfree(state);
return -1;
}
+ anetCloexec(state->epfd);
eventLoop->apidata = state;
return 0;
}
diff --git a/src/ae_evport.c b/src/ae_evport.c
index 4e254b602..7a0b03aea 100644
--- a/src/ae_evport.c
+++ b/src/ae_evport.c
@@ -82,6 +82,7 @@ static int aeApiCreate(aeEventLoop *eventLoop) {
zfree(state);
return -1;
}
+ anetCloexec(state->portfd);
state->npending = 0;
diff --git a/src/ae_kqueue.c b/src/ae_kqueue.c
index 6796f4ceb..b146f2519 100644
--- a/src/ae_kqueue.c
+++ b/src/ae_kqueue.c
@@ -53,6 +53,7 @@ static int aeApiCreate(aeEventLoop *eventLoop) {
zfree(state);
return -1;
}
+ anetCloexec(state->kqfd);
eventLoop->apidata = state;
return 0;
}
diff --git a/src/anet.c b/src/anet.c
index 7a0a1b1ed..0bfa575f5 100644
--- a/src/anet.c
+++ b/src/anet.c
@@ -69,6 +69,11 @@ int anetSetBlock(char *err, int fd, int non_block) {
return ANET_ERR;
}
+ /* Check if this flag has been set or unset, if so,
+ * then there is no need to call fcntl to set/unset it again. */
+ if (!!(flags & O_NONBLOCK) == !!non_block)
+ return ANET_OK;
+
if (non_block)
flags |= O_NONBLOCK;
else
@@ -89,6 +94,29 @@ int anetBlock(char *err, int fd) {
return anetSetBlock(err,fd,0);
}
+/* Enable the FD_CLOEXEC on the given fd to avoid fd leaks.
+ * This function should be invoked for fd's on specific places
+ * where fork + execve system calls are called. */
+int anetCloexec(int fd) {
+ int r;
+ int flags;
+
+ do {
+ r = fcntl(fd, F_GETFD);
+ } while (r == -1 && errno == EINTR);
+
+ if (r == -1 || (r & FD_CLOEXEC))
+ return r;
+
+ flags = r | FD_CLOEXEC;
+
+ do {
+ r = fcntl(fd, F_SETFD, flags);
+ } while (r == -1 && errno == EINTR);
+
+ return r;
+}
+
/* Set TCP keep alive option to detect dead peers. The interval option
* is only used for Linux as we are using Linux-specific APIs to set
* the probe send time, interval, and count. */
@@ -207,14 +235,13 @@ int anetRecvTimeout(char *err, int fd, long long ms) {
return ANET_OK;
}
-/* anetGenericResolve() is called by anetResolve() and anetResolveIP() to
- * do the actual work. It resolves the hostname "host" and set the string
- * representation of the IP address into the buffer pointed by "ipbuf".
+/* Resolve the hostname "host" and set the string representation of the
+ * IP address into the buffer pointed by "ipbuf".
*
* If flags is set to ANET_IP_ONLY the function only resolves hostnames
* that are actually already IPv4 or IPv6 addresses. This turns the function
* into a validating / normalizing function. */
-int anetGenericResolve(char *err, char *host, char *ipbuf, size_t ipbuf_len,
+int anetResolve(char *err, char *host, char *ipbuf, size_t ipbuf_len,
int flags)
{
struct addrinfo hints, *info;
@@ -241,14 +268,6 @@ int anetGenericResolve(char *err, char *host, char *ipbuf, size_t ipbuf_len,
return ANET_OK;
}
-int anetResolve(char *err, char *host, char *ipbuf, size_t ipbuf_len) {
- return anetGenericResolve(err,host,ipbuf,ipbuf_len,ANET_NONE);
-}
-
-int anetResolveIP(char *err, char *host, char *ipbuf, size_t ipbuf_len) {
- return anetGenericResolve(err,host,ipbuf,ipbuf_len,ANET_IP_ONLY);
-}
-
static int anetSetReuseAddr(char *err, int fd) {
int yes = 1;
/* Make sure connection-intensive things like the redis benchmark
diff --git a/src/anet.h b/src/anet.h
index fbf41cd17..5da2f3b46 100644
--- a/src/anet.h
+++ b/src/anet.h
@@ -60,8 +60,7 @@ int anetTcpNonBlockBestEffortBindConnect(char *err, const char *addr, int port,
int anetUnixConnect(char *err, const char *path);
int anetUnixNonBlockConnect(char *err, const char *path);
int anetRead(int fd, char *buf, int count);
-int anetResolve(char *err, char *host, char *ipbuf, size_t ipbuf_len);
-int anetResolveIP(char *err, char *host, char *ipbuf, size_t ipbuf_len);
+int anetResolve(char *err, char *host, char *ipbuf, size_t ipbuf_len, int flags);
int anetTcpServer(char *err, int port, char *bindaddr, int backlog);
int anetTcp6Server(char *err, int port, char *bindaddr, int backlog);
int anetUnixServer(char *err, char *path, mode_t perm, int backlog);
@@ -70,6 +69,7 @@ int anetUnixAccept(char *err, int serversock);
int anetWrite(int fd, char *buf, int count);
int anetNonBlock(char *err, int fd);
int anetBlock(char *err, int fd);
+int anetCloexec(int fd);
int anetEnableTcpNoDelay(char *err, int fd);
int anetDisableTcpNoDelay(char *err, int fd);
int anetTcpKeepAlive(char *err, int fd);
diff --git a/src/aof.c b/src/aof.c
index d3191277f..6753e8bcc 100644
--- a/src/aof.c
+++ b/src/aof.c
@@ -235,6 +235,8 @@ void stopAppendOnly(void) {
serverAssert(server.aof_state != AOF_OFF);
flushAppendOnlyFile(1);
redis_fsync(server.aof_fd);
+ server.aof_fsync_offset = server.aof_current_size;
+ server.aof_last_fsync = server.unixtime;
close(server.aof_fd);
server.aof_fd = -1;
@@ -242,6 +244,8 @@ void stopAppendOnly(void) {
server.aof_state = AOF_OFF;
server.aof_rewrite_scheduled = 0;
killAppendOnlyChild();
+ sdsfree(server.aof_buf);
+ server.aof_buf = sdsempty();
}
/* Called when the user switches from "appendonly no" to "appendonly yes"
@@ -285,6 +289,12 @@ int startAppendOnly(void) {
server.aof_state = AOF_WAIT_REWRITE;
server.aof_last_fsync = server.unixtime;
server.aof_fd = newfd;
+
+ /* If AOF was in error state, we just ignore it and log the event. */
+ if (server.aof_last_write_status == C_ERR) {
+ serverLog(LL_WARNING,"AOF reopen, just ignore the last error.");
+ server.aof_last_write_status = C_OK;
+ }
return C_OK;
}
@@ -451,10 +461,11 @@ void flushAppendOnlyFile(int force) {
/* Handle the AOF write error. */
if (server.aof_fsync == AOF_FSYNC_ALWAYS) {
- /* We can't recover when the fsync policy is ALWAYS since the
- * reply for the client is already in the output buffers, and we
- * have the contract with the user that on acknowledged write data
- * is synced on disk. */
+ /* We can't recover when the fsync policy is ALWAYS since the reply
+ * for the client is already in the output buffers (both writes and
+ * reads), and the changes to the db can't be rolled back. Since we
+ * have a contract with the user that on acknowledged or observed
+ * writes are is synced on disk, we must exit. */
serverLog(LL_WARNING,"Can't recover from AOF write error when the AOF fsync policy is 'always'. Exiting...");
exit(1);
} else {
@@ -502,7 +513,14 @@ try_fsync:
/* redis_fsync is defined as fdatasync() for Linux in order to avoid
* flushing metadata. */
latencyStartMonitor(latency);
- redis_fsync(server.aof_fd); /* Let's try to get this data on the disk */
+ /* Let's try to get this data on the disk. To guarantee data safe when
+ * the AOF fsync policy is 'always', we should exit if failed to fsync
+ * AOF (see comment next to the exit(1) after write error above). */
+ if (redis_fsync(server.aof_fd) == -1) {
+ serverLog(LL_WARNING,"Can't persist AOF for fsync error when the "
+ "AOF fsync policy is 'always': %s. Exiting...", strerror(errno));
+ exit(1);
+ }
latencyEndMonitor(latency);
latencyAddSampleIfNeeded("aof-fsync-always",latency);
server.aof_fsync_offset = server.aof_current_size;
@@ -581,8 +599,6 @@ sds catAppendOnlyExpireAtCommand(sds buf, struct redisCommand *cmd, robj *key, r
void feedAppendOnlyFile(struct redisCommand *cmd, int dictid, robj **argv, int argc) {
sds buf = sdsempty();
- robj *tmpargv[3];
-
/* The DB this command was targeting is not the same as the last command
* we appended. To issue a SELECT command is needed. */
if (dictid != server.aof_selected_db) {
@@ -598,32 +614,31 @@ void feedAppendOnlyFile(struct redisCommand *cmd, int dictid, robj **argv, int a
cmd->proc == expireatCommand) {
/* Translate EXPIRE/PEXPIRE/EXPIREAT into PEXPIREAT */
buf = catAppendOnlyExpireAtCommand(buf,cmd,argv[1],argv[2]);
- } else if (cmd->proc == setexCommand || cmd->proc == psetexCommand) {
- /* Translate SETEX/PSETEX to SET and PEXPIREAT */
- tmpargv[0] = createStringObject("SET",3);
- tmpargv[1] = argv[1];
- tmpargv[2] = argv[3];
- buf = catAppendOnlyGenericCommand(buf,3,tmpargv);
- decrRefCount(tmpargv[0]);
- buf = catAppendOnlyExpireAtCommand(buf,cmd,argv[1],argv[2]);
} else if (cmd->proc == setCommand && argc > 3) {
- int i;
- robj *exarg = NULL, *pxarg = NULL;
- for (i = 3; i < argc; i ++) {
- if (!strcasecmp(argv[i]->ptr, "ex")) exarg = argv[i+1];
- if (!strcasecmp(argv[i]->ptr, "px")) pxarg = argv[i+1];
+ robj *pxarg = NULL;
+ /* When SET is used with EX/PX argument setGenericCommand propagates them with PX millisecond argument.
+ * So since the command arguments are re-written there, we can rely here on the index of PX being 3. */
+ if (!strcasecmp(argv[3]->ptr, "px")) {
+ pxarg = argv[4];
}
- serverAssert(!(exarg && pxarg));
-
- if (exarg || pxarg) {
- /* Translate SET [EX seconds][PX milliseconds] to SET and PEXPIREAT */
- buf = catAppendOnlyGenericCommand(buf,3,argv);
- if (exarg)
- buf = catAppendOnlyExpireAtCommand(buf,server.expireCommand,argv[1],
- exarg);
- if (pxarg)
- buf = catAppendOnlyExpireAtCommand(buf,server.pexpireCommand,argv[1],
- pxarg);
+ /* For AOF we convert SET key value relative time in milliseconds to SET key value absolute time in
+ * millisecond. Whenever the condition is true it implies that original SET has been transformed
+ * to SET PX with millisecond time argument so we do not need to worry about unit here.*/
+ if (pxarg) {
+ robj *millisecond = getDecodedObject(pxarg);
+ long long when = strtoll(millisecond->ptr,NULL,10);
+ when += mstime();
+
+ decrRefCount(millisecond);
+
+ robj *newargs[5];
+ newargs[0] = argv[0];
+ newargs[1] = argv[1];
+ newargs[2] = argv[2];
+ newargs[3] = shared.pxat;
+ newargs[4] = createStringObjectFromLongLong(when);
+ buf = catAppendOnlyGenericCommand(buf,5,newargs);
+ decrRefCount(newargs[4]);
} else {
buf = catAppendOnlyGenericCommand(buf,argc,argv);
}
@@ -1852,6 +1867,20 @@ void backgroundRewriteDoneHandler(int exitcode, int bysignal) {
}
latencyEndMonitor(latency);
latencyAddSampleIfNeeded("aof-rewrite-diff-write",latency);
+
+ if (server.aof_fsync == AOF_FSYNC_EVERYSEC) {
+ aof_background_fsync(newfd);
+ } else if (server.aof_fsync == AOF_FSYNC_ALWAYS) {
+ latencyStartMonitor(latency);
+ if (redis_fsync(newfd) == -1) {
+ serverLog(LL_WARNING,
+ "Error trying to fsync the parent diff to the rewritten AOF: %s", strerror(errno));
+ close(newfd);
+ goto cleanup;
+ }
+ latencyEndMonitor(latency);
+ latencyAddSampleIfNeeded("aof-rewrite-done-fsync",latency);
+ }
serverLog(LL_NOTICE,
"Residual parent diff successfully flushed to the rewritten AOF (%.2f MB)", (double) aofRewriteBufferSize() / (1024*1024));
@@ -1919,14 +1948,11 @@ void backgroundRewriteDoneHandler(int exitcode, int bysignal) {
/* AOF enabled, replace the old fd with the new one. */
oldfd = server.aof_fd;
server.aof_fd = newfd;
- if (server.aof_fsync == AOF_FSYNC_ALWAYS)
- redis_fsync(newfd);
- else if (server.aof_fsync == AOF_FSYNC_EVERYSEC)
- aof_background_fsync(newfd);
server.aof_selected_db = -1; /* Make sure SELECT is re-issued */
aofUpdateCurrentSize();
server.aof_rewrite_base_size = server.aof_current_size;
server.aof_fsync_offset = server.aof_current_size;
+ server.aof_last_fsync = server.unixtime;
/* Clear regular AOF buffer since its contents was just written to
* the new AOF from the background rewrite buffer. */
diff --git a/src/blocked.c b/src/blocked.c
index 46935c79f..09e17213c 100644
--- a/src/blocked.c
+++ b/src/blocked.c
@@ -61,6 +61,9 @@
*/
#include "server.h"
+#include "slowlog.h"
+#include "latency.h"
+#include "monotonic.h"
int serveClientBlockedOnList(client *receiver, robj *key, robj *dstkey, redisDb *db, robj *value, int wherefrom, int whereto);
int getListPositionFromObjectOrReply(client *c, robj *arg, int *position);
@@ -97,6 +100,20 @@ void blockClient(client *c, int btype) {
}
}
+/* This function is called after a client has finished a blocking operation
+ * in order to update the total command duration, log the command into
+ * the Slow log if needed, and log the reply duration event if needed. */
+void updateStatsOnUnblock(client *c, long blocked_us, long reply_us){
+ const ustime_t total_cmd_duration = c->duration + blocked_us + reply_us;
+ c->lastcmd->microseconds += total_cmd_duration;
+ /* Log the command into the Slow log if needed. */
+ if (!(c->lastcmd->flags & CMD_SKIP_SLOWLOG)) {
+ slowlogPushEntryIfNeeded(c,c->argv,c->argc,total_cmd_duration);
+ /* Log the reply duration event. */
+ latencyAddSampleIfNeeded("command-unblocking",reply_us/1000);
+ }
+}
+
/* This function is called in the beforeSleep() function of the event loop
* in order to process the pending input buffer of clients that were
* unblocked after a blocking operation. */
@@ -264,6 +281,8 @@ void serveClientsBlockedOnListKey(robj *o, readyList *rl) {
if (dstkey) incrRefCount(dstkey);
unblockClient(receiver);
+ monotime replyTimer;
+ elapsedStart(&replyTimer);
if (serveClientBlockedOnList(receiver,
rl->key,dstkey,rl->db,value,
wherefrom, whereto) == C_ERR)
@@ -272,6 +291,7 @@ void serveClientsBlockedOnListKey(robj *o, readyList *rl) {
* to also undo the POP operation. */
listTypePush(o,value,wherefrom);
}
+ updateStatsOnUnblock(receiver, 0, elapsedUs(replyTimer));
if (dstkey) decrRefCount(dstkey);
decrRefCount(value);
@@ -316,7 +336,10 @@ void serveClientsBlockedOnSortedSetKey(robj *o, readyList *rl) {
receiver->lastcmd->proc == bzpopminCommand)
? ZSET_MIN : ZSET_MAX;
unblockClient(receiver);
+ monotime replyTimer;
+ elapsedStart(&replyTimer);
genericZpopCommand(receiver,&rl->key,1,where,1,NULL);
+ updateStatsOnUnblock(receiver, 0, elapsedUs(replyTimer));
zcard--;
/* Replicate the command. */
@@ -406,6 +429,8 @@ void serveClientsBlockedOnStreamKey(robj *o, readyList *rl) {
}
}
+ monotime replyTimer;
+ elapsedStart(&replyTimer);
/* Emit the two elements sub-array consisting of
* the name of the stream and the data we
* extracted from it. Wrapped in a single-item
@@ -425,6 +450,7 @@ void serveClientsBlockedOnStreamKey(robj *o, readyList *rl) {
streamReplyWithRange(receiver,s,&start,NULL,
receiver->bpop.xread_count,
0, group, consumer, noack, &pi);
+ updateStatsOnUnblock(receiver, 0, elapsedUs(replyTimer));
/* Note that after we unblock the client, 'gt'
* and other receiver->bpop stuff are no longer
@@ -471,7 +497,10 @@ void serveClientsBlockedOnKeyByModule(readyList *rl) {
* different modules with different triggers to consider if a key
* is ready or not. This means we can't exit the loop but need
* to continue after the first failure. */
+ monotime replyTimer;
+ elapsedStart(&replyTimer);
if (!moduleTryServeClientBlockedOnKey(receiver, rl->key)) continue;
+ updateStatsOnUnblock(receiver, 0, elapsedUs(replyTimer));
moduleUnblockClient(receiver);
}
@@ -684,10 +713,20 @@ static int getBlockedTypeByType(int type) {
void signalKeyAsReady(redisDb *db, robj *key, int type) {
readyList *rl;
- /* If no clients are blocked on this type, just return */
+ /* Quick returns. */
int btype = getBlockedTypeByType(type);
- if (btype == BLOCKED_NONE || !server.blocked_clients_by_type[btype])
+ if (btype == BLOCKED_NONE) {
+ /* The type can never block. */
return;
+ }
+ if (!server.blocked_clients_by_type[btype] &&
+ !server.blocked_clients_by_type[BLOCKED_MODULE]) {
+ /* No clients block on this type. Note: Blocked modules are represented
+ * by BLOCKED_MODULE, even if the intention is to wake up by normal
+ * types (list, zset, stream), so we need to check that there are no
+ * blocked modules before we do a quick return here. */
+ return;
+ }
/* No clients blocking for this key? No need to queue it. */
if (dictFind(db->blocking_keys,key) == NULL) return;
diff --git a/src/cluster.c b/src/cluster.c
index 78c36e8d1..97a25b0b3 100644
--- a/src/cluster.c
+++ b/src/cluster.c
@@ -398,7 +398,7 @@ int clusterLockConfig(char *filename) {
/* To lock it, we need to open the file in a way it is created if
* it does not exist, otherwise there is a race condition with other
* processes. */
- int fd = open(filename,O_WRONLY|O_CREAT,0644);
+ int fd = open(filename,O_WRONLY|O_CREAT|O_CLOEXEC,0644);
if (fd == -1) {
serverLog(LL_WARNING,
"Can't open %s in order to acquire a lock: %s",
@@ -509,8 +509,7 @@ void clusterInit(void) {
serverLog(LL_WARNING, "Redis port number too high. "
"Cluster communication port is 10,000 port "
"numbers higher than your Redis port. "
- "Your Redis port number must be "
- "lower than 55535.");
+ "Your Redis port number must be 55535 or less.");
exit(1);
}
if (listenToPort(port+CLUSTER_PORT_INCR,
@@ -779,6 +778,7 @@ clusterNode *createClusterNode(char *nodename, int flags) {
node->configEpoch = 0;
node->flags = flags;
memset(node->slots,0,sizeof(node->slots));
+ node->slots_info = NULL;
node->numslots = 0;
node->numslaves = 0;
node->slaves = NULL;
@@ -4144,8 +4144,8 @@ sds clusterGenNodeDescription(clusterNode *node) {
sds ci;
/* Node coordinates */
- ci = sdscatprintf(sdsempty(),"%.40s %s:%d@%d ",
- node->name,
+ ci = sdscatlen(sdsempty(),node->name,CLUSTER_NAMELEN);
+ ci = sdscatfmt(ci," %s:%i@%i ",
node->ip,
node->port,
node->cport);
@@ -4154,40 +4154,46 @@ sds clusterGenNodeDescription(clusterNode *node) {
ci = representClusterNodeFlags(ci, node->flags);
/* Slave of... or just "-" */
+ ci = sdscatlen(ci," ",1);
if (node->slaveof)
- ci = sdscatprintf(ci," %.40s ",node->slaveof->name);
+ ci = sdscatlen(ci,node->slaveof->name,CLUSTER_NAMELEN);
else
- ci = sdscatlen(ci," - ",3);
+ ci = sdscatlen(ci,"-",1);
unsigned long long nodeEpoch = node->configEpoch;
if (nodeIsSlave(node) && node->slaveof) {
nodeEpoch = node->slaveof->configEpoch;
}
/* Latency from the POV of this node, config epoch, link status */
- ci = sdscatprintf(ci,"%lld %lld %llu %s",
+ ci = sdscatfmt(ci," %I %I %U %s",
(long long) node->ping_sent,
(long long) node->pong_received,
nodeEpoch,
(node->link || node->flags & CLUSTER_NODE_MYSELF) ?
"connected" : "disconnected");
- /* Slots served by this instance */
- start = -1;
- for (j = 0; j < CLUSTER_SLOTS; j++) {
- int bit;
+ /* Slots served by this instance. If we already have slots info,
+ * append it diretly, otherwise, generate slots only if it has. */
+ if (node->slots_info) {
+ ci = sdscatsds(ci, node->slots_info);
+ } else if (node->numslots > 0) {
+ start = -1;
+ for (j = 0; j < CLUSTER_SLOTS; j++) {
+ int bit;
- if ((bit = clusterNodeGetSlotBit(node,j)) != 0) {
- if (start == -1) start = j;
- }
- if (start != -1 && (!bit || j == CLUSTER_SLOTS-1)) {
- if (bit && j == CLUSTER_SLOTS-1) j++;
+ if ((bit = clusterNodeGetSlotBit(node,j)) != 0) {
+ if (start == -1) start = j;
+ }
+ if (start != -1 && (!bit || j == CLUSTER_SLOTS-1)) {
+ if (bit && j == CLUSTER_SLOTS-1) j++;
- if (start == j-1) {
- ci = sdscatprintf(ci," %d",start);
- } else {
- ci = sdscatprintf(ci," %d-%d",start,j-1);
+ if (start == j-1) {
+ ci = sdscatfmt(ci," %i",start);
+ } else {
+ ci = sdscatfmt(ci," %i-%i",start,j-1);
+ }
+ start = -1;
}
- start = -1;
}
}
@@ -4208,6 +4214,41 @@ sds clusterGenNodeDescription(clusterNode *node) {
return ci;
}
+/* Generate the slot topology for all nodes and store the string representation
+ * in the slots_info struct on the node. This is used to improve the efficiency
+ * of clusterGenNodesDescription() because it removes looping of the slot space
+ * for generating the slot info for each node individually. */
+void clusterGenNodesSlotsInfo(int filter) {
+ clusterNode *n = NULL;
+ int start = -1;
+
+ for (int i = 0; i <= CLUSTER_SLOTS; i++) {
+ /* Find start node and slot id. */
+ if (n == NULL) {
+ if (i == CLUSTER_SLOTS) break;
+ n = server.cluster->slots[i];
+ start = i;
+ continue;
+ }
+
+ /* Generate slots info when occur different node with start
+ * or end of slot. */
+ if (i == CLUSTER_SLOTS || n != server.cluster->slots[i]) {
+ if (!(n->flags & filter)) {
+ if (n->slots_info == NULL) n->slots_info = sdsempty();
+ if (start == i-1) {
+ n->slots_info = sdscatfmt(n->slots_info," %i",start);
+ } else {
+ n->slots_info = sdscatfmt(n->slots_info," %i-%i",start,i-1);
+ }
+ }
+ if (i == CLUSTER_SLOTS) break;
+ n = server.cluster->slots[i];
+ start = i;
+ }
+ }
+}
+
/* Generate a csv-alike representation of the nodes we are aware of,
* including the "myself" node, and return an SDS string containing the
* representation (it is up to the caller to free it).
@@ -4225,6 +4266,9 @@ sds clusterGenNodesDescription(int filter) {
dictIterator *di;
dictEntry *de;
+ /* Generate all nodes slots info firstly. */
+ clusterGenNodesSlotsInfo(filter);
+
di = dictGetSafeIterator(server.cluster->nodes);
while((de = dictNext(di)) != NULL) {
clusterNode *node = dictGetVal(de);
@@ -4234,6 +4278,12 @@ sds clusterGenNodesDescription(int filter) {
ci = sdscatsds(ci,ni);
sdsfree(ni);
ci = sdscatlen(ci,"\n",1);
+
+ /* Release slots info. */
+ if (node->slots_info) {
+ sdsfree(node->slots_info);
+ node->slots_info = NULL;
+ }
}
dictReleaseIterator(di);
return ci;
diff --git a/src/cluster.h b/src/cluster.h
index d58f350ce..716c0d49c 100644
--- a/src/cluster.h
+++ b/src/cluster.h
@@ -118,6 +118,7 @@ typedef struct clusterNode {
int flags; /* CLUSTER_NODE_... */
uint64_t configEpoch; /* Last configEpoch observed for this node */
unsigned char slots[CLUSTER_SLOTS/8]; /* slots handled by this node */
+ sds slots_info; /* Slots info represented by string. */
int numslots; /* Number of slots handled by this node */
int numslaves; /* Number of slave nodes, if this is a master */
struct clusterNode **slaves; /* pointers to slave nodes */
diff --git a/src/config.c b/src/config.c
index 2e109dbae..0bd89c2b9 100644
--- a/src/config.c
+++ b/src/config.c
@@ -153,15 +153,15 @@ int configOOMScoreAdjValuesDefaults[CONFIG_OOM_COUNT] = { 0, 200, 800 };
typedef struct boolConfigData {
int *config; /* The pointer to the server config this value is stored in */
const int default_value; /* The default value of the config on rewrite */
- int (*is_valid_fn)(int val, char **err); /* Optional function to check validity of new value (generic doc above) */
- int (*update_fn)(int val, int prev, char **err); /* Optional function to apply new value at runtime (generic doc above) */
+ int (*is_valid_fn)(int val, const char **err); /* Optional function to check validity of new value (generic doc above) */
+ int (*update_fn)(int val, int prev, const char **err); /* Optional function to apply new value at runtime (generic doc above) */
} boolConfigData;
typedef struct stringConfigData {
char **config; /* Pointer to the server config this value is stored in. */
const char *default_value; /* Default value of the config on rewrite. */
- int (*is_valid_fn)(char* val, char **err); /* Optional function to check validity of new value (generic doc above) */
- int (*update_fn)(char* val, char* prev, char **err); /* Optional function to apply new value at runtime (generic doc above) */
+ int (*is_valid_fn)(char* val, const char **err); /* Optional function to check validity of new value (generic doc above) */
+ int (*update_fn)(char* val, char* prev, const char **err); /* Optional function to apply new value at runtime (generic doc above) */
int convert_empty_to_null; /* Boolean indicating if empty strings should
be stored as a NULL value. */
} stringConfigData;
@@ -169,8 +169,8 @@ typedef struct stringConfigData {
typedef struct sdsConfigData {
sds *config; /* Pointer to the server config this value is stored in. */
const char *default_value; /* Default value of the config on rewrite. */
- int (*is_valid_fn)(sds val, char **err); /* Optional function to check validity of new value (generic doc above) */
- int (*update_fn)(sds val, sds prev, char **err); /* Optional function to apply new value at runtime (generic doc above) */
+ int (*is_valid_fn)(sds val, const char **err); /* Optional function to check validity of new value (generic doc above) */
+ int (*update_fn)(sds val, sds prev, const char **err); /* Optional function to apply new value at runtime (generic doc above) */
int convert_empty_to_null; /* Boolean indicating if empty SDS strings should
be stored as a NULL value. */
} sdsConfigData;
@@ -179,8 +179,8 @@ typedef struct enumConfigData {
int *config; /* The pointer to the server config this value is stored in */
configEnum *enum_value; /* The underlying enum type this data represents */
const int default_value; /* The default value of the config on rewrite */
- int (*is_valid_fn)(int val, char **err); /* Optional function to check validity of new value (generic doc above) */
- int (*update_fn)(int val, int prev, char **err); /* Optional function to apply new value at runtime (generic doc above) */
+ int (*is_valid_fn)(int val, const char **err); /* Optional function to check validity of new value (generic doc above) */
+ int (*update_fn)(int val, int prev, const char **err); /* Optional function to apply new value at runtime (generic doc above) */
} enumConfigData;
typedef enum numericType {
@@ -214,8 +214,8 @@ typedef struct numericConfigData {
long long lower_bound; /* The lower bound of this numeric value */
long long upper_bound; /* The upper bound of this numeric value */
const long long default_value; /* The default value of the config on rewrite */
- int (*is_valid_fn)(long long val, char **err); /* Optional function to check validity of new value (generic doc above) */
- int (*update_fn)(long long val, long long prev, char **err); /* Optional function to apply new value at runtime (generic doc above) */
+ int (*is_valid_fn)(long long val, const char **err); /* Optional function to check validity of new value (generic doc above) */
+ int (*update_fn)(long long val, long long prev, const char **err); /* Optional function to apply new value at runtime (generic doc above) */
} numericConfigData;
typedef union typeData {
@@ -230,10 +230,10 @@ typedef struct typeInterface {
/* Called on server start, to init the server with default value */
void (*init)(typeData data);
/* Called on server start, should return 1 on success, 0 on error and should set err */
- int (*load)(typeData data, sds *argc, int argv, char **err);
+ int (*load)(typeData data, sds *argc, int argv, const char **err);
/* Called on server startup and CONFIG SET, returns 1 on success, 0 on error
* and can set a verbose err string, update is true when called from CONFIG SET */
- int (*set)(typeData data, sds value, int update, char **err);
+ int (*set)(typeData data, sds value, int update, const char **err);
/* Called on CONFIG GET, required to add output to the client */
void (*get)(client *c, typeData data);
/* Called on CONFIG REWRITE, required to rewrite the config state */
@@ -325,7 +325,7 @@ void queueLoadModule(sds path, sds *argv, int argc) {
* server.oom_score_adj_values if valid.
*/
-static int updateOOMScoreAdjValues(sds *args, char **err, int apply) {
+static int updateOOMScoreAdjValues(sds *args, const char **err, int apply) {
int i;
int values[CONFIG_OOM_COUNT];
@@ -385,7 +385,7 @@ void initConfigValues() {
}
void loadServerConfigFromString(char *config) {
- char *err = NULL;
+ const char *err = NULL;
int linenum = 0, totlines, i;
int slaveof_linenum = 0;
sds *lines;
@@ -608,7 +608,7 @@ void loadServerConfigFromString(char *config) {
int argc_err;
if (ACLAppendUserForLoading(argv,argc,&argc_err) == C_ERR) {
char buf[1024];
- char *errmsg = ACLSetUserStringError();
+ const char *errmsg = ACLSetUserStringError();
snprintf(buf,sizeof(buf),"Error in user declaration '%s': %s",
argv[argc_err],errmsg);
err = buf;
@@ -624,8 +624,7 @@ void loadServerConfigFromString(char *config) {
err = "sentinel directive while not in sentinel mode";
goto loaderr;
}
- err = sentinelHandleConfiguration(argv+1,argc-1);
- if (err) goto loaderr;
+ queueSentinelConfig(argv+1,argc-1,linenum,lines[i]);
}
} else {
err = "Bad directive or wrong number of arguments"; goto loaderr;
@@ -730,7 +729,7 @@ void configSetCommand(client *c) {
robj *o;
long long ll;
int err;
- char *errstr = NULL;
+ const char *errstr = NULL;
serverAssertWithInfo(c,c->argv[2],sdsEncodedObject(c->argv[2]));
serverAssertWithInfo(c,c->argv[3],sdsEncodedObject(c->argv[3]));
o = c->argv[3];
@@ -1221,7 +1220,16 @@ struct rewriteConfigState *rewriteConfigReadOldFile(char *path) {
sdsfree(argv[0]);
argv[0] = alt;
}
- rewriteConfigAddLineNumberToOption(state,argv[0],linenum);
+ /* If this is sentinel config, we use sentinel "sentinel <config>" as option
+ to avoid messing up the sequence. */
+ if (server.sentinel_mode && argc > 1 && !strcasecmp(argv[0],"sentinel")) {
+ sds sentinelOption = sdsempty();
+ sentinelOption = sdscatfmt(sentinelOption,"%S %S",argv[0],argv[1]);
+ rewriteConfigAddLineNumberToOption(state,sentinelOption,linenum);
+ sdsfree(sentinelOption);
+ } else {
+ rewriteConfigAddLineNumberToOption(state,argv[0],linenum);
+ }
sdsfreesplitres(argv,argc);
}
fclose(fp);
@@ -1683,7 +1691,7 @@ int rewriteConfigOverwriteFile(char *configfile, sds content) {
if (fsync(fd))
serverLog(LL_WARNING, "Could not sync tmp config file to disk (%s)", strerror(errno));
- else if (fchmod(fd, 0644) == -1)
+ else if (fchmod(fd, 0644 & ~server.umask) == -1)
serverLog(LL_WARNING, "Could not chmod config file (%s)", strerror(errno));
else if (rename(tmp_conffile, configfile) == -1)
serverLog(LL_WARNING, "Could not rename tmp config file (%s)", strerror(errno));
@@ -1795,7 +1803,7 @@ static void boolConfigInit(typeData data) {
*data.yesno.config = data.yesno.default_value;
}
-static int boolConfigSet(typeData data, sds value, int update, char **err) {
+static int boolConfigSet(typeData data, sds value, int update, const char **err) {
int yn = yesnotoi(value);
if (yn == -1) {
*err = "argument must be 'yes' or 'no'";
@@ -1836,7 +1844,7 @@ static void stringConfigInit(typeData data) {
*data.string.config = (data.string.convert_empty_to_null && !data.string.default_value) ? NULL : zstrdup(data.string.default_value);
}
-static int stringConfigSet(typeData data, sds value, int update, char **err) {
+static int stringConfigSet(typeData data, sds value, int update, const char **err) {
if (data.string.is_valid_fn && !data.string.is_valid_fn(value, err))
return 0;
char *prev = *data.string.config;
@@ -1863,7 +1871,7 @@ static void sdsConfigInit(typeData data) {
*data.sds.config = (data.sds.convert_empty_to_null && !data.sds.default_value) ? NULL: sdsnew(data.sds.default_value);
}
-static int sdsConfigSet(typeData data, sds value, int update, char **err) {
+static int sdsConfigSet(typeData data, sds value, int update, const char **err) {
if (data.sds.is_valid_fn && !data.sds.is_valid_fn(value, err))
return 0;
sds prev = *data.sds.config;
@@ -1922,7 +1930,7 @@ static void enumConfigInit(typeData data) {
*data.enumd.config = data.enumd.default_value;
}
-static int enumConfigSet(typeData data, sds value, int update, char **err) {
+static int enumConfigSet(typeData data, sds value, int update, const char **err) {
int enumval = configEnumGetValue(data.enumd.enum_value, value);
if (enumval == INT_MIN) {
sds enumerr = sdsnew("argument must be one of the following: ");
@@ -2028,7 +2036,7 @@ static void numericConfigInit(typeData data) {
SET_NUMERIC_TYPE(data.numeric.default_value)
}
-static int numericBoundaryCheck(typeData data, long long ll, char **err) {
+static int numericBoundaryCheck(typeData data, long long ll, const char **err) {
if (data.numeric.numeric_type == NUMERIC_TYPE_ULONG_LONG ||
data.numeric.numeric_type == NUMERIC_TYPE_UINT ||
data.numeric.numeric_type == NUMERIC_TYPE_SIZE_T) {
@@ -2058,7 +2066,7 @@ static int numericBoundaryCheck(typeData data, long long ll, char **err) {
return 1;
}
-static int numericConfigSet(typeData data, sds value, int update, char **err) {
+static int numericConfigSet(typeData data, sds value, int update, const char **err) {
long long ll, prev = 0;
if (data.numeric.is_memory) {
int memerr;
@@ -2196,7 +2204,7 @@ static void numericConfigRewrite(typeData data, const char *name, struct rewrite
} \
}
-static int isValidActiveDefrag(int val, char **err) {
+static int isValidActiveDefrag(int val, const char **err) {
#ifndef HAVE_DEFRAG
if (val) {
*err = "Active defragmentation cannot be enabled: it "
@@ -2212,7 +2220,7 @@ static int isValidActiveDefrag(int val, char **err) {
return 1;
}
-static int isValidDBfilename(char *val, char **err) {
+static int isValidDBfilename(char *val, const char **err) {
if (!pathIsBaseName(val)) {
*err = "dbfilename can't be a path, just a filename";
return 0;
@@ -2220,7 +2228,7 @@ static int isValidDBfilename(char *val, char **err) {
return 1;
}
-static int isValidAOFfilename(char *val, char **err) {
+static int isValidAOFfilename(char *val, const char **err) {
if (!pathIsBaseName(val)) {
*err = "appendfilename can't be a path, just a filename";
return 0;
@@ -2228,7 +2236,26 @@ static int isValidAOFfilename(char *val, char **err) {
return 1;
}
-static int updateHZ(long long val, long long prev, char **err) {
+/* Validate specified string is a valid proc-title-template */
+static int isValidProcTitleTemplate(char *val, const char **err) {
+ if (!validateProcTitleTemplate(val)) {
+ *err = "template format is invalid or contains unknown variables";
+ return 0;
+ }
+ return 1;
+}
+
+static int updateProcTitleTemplate(char *val, char *prev, const char **err) {
+ UNUSED(val);
+ UNUSED(prev);
+ if (redisSetProcTitle(NULL) == C_ERR) {
+ *err = "failed to set process title";
+ return 0;
+ }
+ return 1;
+}
+
+static int updateHZ(long long val, long long prev, const char **err) {
UNUSED(prev);
UNUSED(err);
/* Hz is more a hint from the user, so we accept values out of range
@@ -2240,14 +2267,14 @@ static int updateHZ(long long val, long long prev, char **err) {
return 1;
}
-static int updateJemallocBgThread(int val, int prev, char **err) {
+static int updateJemallocBgThread(int val, int prev, const char **err) {
UNUSED(prev);
UNUSED(err);
set_jemalloc_bg_thread(val);
return 1;
}
-static int updateReplBacklogSize(long long val, long long prev, char **err) {
+static int updateReplBacklogSize(long long val, long long prev, const char **err) {
/* resizeReplicationBacklog sets server.repl_backlog_size, and relies on
* being able to tell when the size changes, so restore prev before calling it. */
UNUSED(err);
@@ -2256,7 +2283,7 @@ static int updateReplBacklogSize(long long val, long long prev, char **err) {
return 1;
}
-static int updateMaxmemory(long long val, long long prev, char **err) {
+static int updateMaxmemory(long long val, long long prev, const char **err) {
UNUSED(prev);
UNUSED(err);
if (val) {
@@ -2269,7 +2296,7 @@ static int updateMaxmemory(long long val, long long prev, char **err) {
return 1;
}
-static int updateGoodSlaves(long long val, long long prev, char **err) {
+static int updateGoodSlaves(long long val, long long prev, const char **err) {
UNUSED(val);
UNUSED(prev);
UNUSED(err);
@@ -2277,7 +2304,7 @@ static int updateGoodSlaves(long long val, long long prev, char **err) {
return 1;
}
-static int updateAppendonly(int val, int prev, char **err) {
+static int updateAppendonly(int val, int prev, const char **err) {
UNUSED(prev);
if (val == 0 && server.aof_state != AOF_OFF) {
stopAppendOnly();
@@ -2290,7 +2317,7 @@ static int updateAppendonly(int val, int prev, char **err) {
return 1;
}
-static int updateSighandlerEnabled(int val, int prev, char **err) {
+static int updateSighandlerEnabled(int val, int prev, const char **err) {
UNUSED(err);
UNUSED(prev);
if (val)
@@ -2300,7 +2327,7 @@ static int updateSighandlerEnabled(int val, int prev, char **err) {
return 1;
}
-static int updateMaxclients(long long val, long long prev, char **err) {
+static int updateMaxclients(long long val, long long prev, const char **err) {
/* Try to check if the OS is capable of supporting so many FDs. */
if (val > prev) {
adjustOpenFilesLimit();
@@ -2328,7 +2355,7 @@ static int updateMaxclients(long long val, long long prev, char **err) {
return 1;
}
-static int updateOOMScoreAdj(int val, int prev, char **err) {
+static int updateOOMScoreAdj(int val, int prev, const char **err) {
UNUSED(prev);
if (val) {
@@ -2342,7 +2369,7 @@ static int updateOOMScoreAdj(int val, int prev, char **err) {
}
#ifdef USE_OPENSSL
-static int updateTlsCfg(char *val, char *prev, char **err) {
+static int updateTlsCfg(char *val, char *prev, const char **err) {
UNUSED(val);
UNUSED(prev);
UNUSED(err);
@@ -2355,13 +2382,13 @@ static int updateTlsCfg(char *val, char *prev, char **err) {
}
return 1;
}
-static int updateTlsCfgBool(int val, int prev, char **err) {
+static int updateTlsCfgBool(int val, int prev, const char **err) {
UNUSED(val);
UNUSED(prev);
return updateTlsCfg(NULL, NULL, err);
}
-static int updateTlsCfgInt(long long val, long long prev, char **err) {
+static int updateTlsCfgInt(long long val, long long prev, const char **err) {
UNUSED(val);
UNUSED(prev);
return updateTlsCfg(NULL, NULL, err);
@@ -2380,11 +2407,13 @@ standardConfig configs[] = {
createBoolConfig("rdb-del-sync-files", NULL, MODIFIABLE_CONFIG, server.rdb_del_sync_files, 0, NULL, NULL),
createBoolConfig("activerehashing", NULL, MODIFIABLE_CONFIG, server.activerehashing, 1, NULL, NULL),
createBoolConfig("stop-writes-on-bgsave-error", NULL, MODIFIABLE_CONFIG, server.stop_writes_on_bgsave_err, 1, NULL, NULL),
+ createBoolConfig("set-proc-title", NULL, IMMUTABLE_CONFIG, server.set_proc_title, 1, NULL, NULL), /* Should setproctitle be used? */
createBoolConfig("dynamic-hz", NULL, MODIFIABLE_CONFIG, server.dynamic_hz, 1, NULL, NULL), /* Adapt hz to # of clients.*/
createBoolConfig("lazyfree-lazy-eviction", NULL, MODIFIABLE_CONFIG, server.lazyfree_lazy_eviction, 0, NULL, NULL),
createBoolConfig("lazyfree-lazy-expire", NULL, MODIFIABLE_CONFIG, server.lazyfree_lazy_expire, 0, NULL, NULL),
createBoolConfig("lazyfree-lazy-server-del", NULL, MODIFIABLE_CONFIG, server.lazyfree_lazy_server_del, 0, NULL, NULL),
createBoolConfig("lazyfree-lazy-user-del", NULL, MODIFIABLE_CONFIG, server.lazyfree_lazy_user_del , 0, NULL, NULL),
+ createBoolConfig("lazyfree-lazy-user-flush", NULL, MODIFIABLE_CONFIG, server.lazyfree_lazy_user_flush , 0, NULL, NULL),
createBoolConfig("repl-disable-tcp-nodelay", NULL, MODIFIABLE_CONFIG, server.repl_disable_tcp_nodelay, 0, NULL, NULL),
createBoolConfig("repl-diskless-sync", NULL, MODIFIABLE_CONFIG, server.repl_diskless_sync, 0, NULL, NULL),
createBoolConfig("gopher-enabled", NULL, MODIFIABLE_CONFIG, server.gopher_enabled, 0, NULL, NULL),
@@ -2425,6 +2454,7 @@ standardConfig configs[] = {
createStringConfig("aof_rewrite_cpulist", NULL, IMMUTABLE_CONFIG, EMPTY_STRING_IS_NULL, server.aof_rewrite_cpulist, NULL, NULL, NULL),
createStringConfig("bgsave_cpulist", NULL, IMMUTABLE_CONFIG, EMPTY_STRING_IS_NULL, server.bgsave_cpulist, NULL, NULL, NULL),
createStringConfig("ignore-warnings", NULL, MODIFIABLE_CONFIG, ALLOW_EMPTY_STRING, server.ignore_warnings, "", NULL, NULL),
+ createStringConfig("proc-title-template", NULL, MODIFIABLE_CONFIG, ALLOW_EMPTY_STRING, server.proc_title_template, CONFIG_DEFAULT_PROC_TITLE_TEMPLATE, isValidProcTitleTemplate, updateProcTitleTemplate),
/* SDS Configs */
createSDSConfig("masterauth", NULL, MODIFIABLE_CONFIG, EMPTY_STRING_IS_NULL, server.masterauth, NULL, NULL, NULL),
diff --git a/src/db.c b/src/db.c
index 5d63566a7..3871753dd 100644
--- a/src/db.c
+++ b/src/db.c
@@ -226,7 +226,7 @@ void dbOverwrite(redisDb *db, robj *key, robj *val) {
/* Although the key is not really deleted from the database, we regard
overwrite as two steps of unlink+add, so we still need to call the unlink
callback of the module. */
- moduleNotifyKeyUnlink(key,val);
+ moduleNotifyKeyUnlink(key,old);
dictSetVal(db->dict, de, val);
if (server.lazyfree_lazy_server_del) {
@@ -595,21 +595,23 @@ void signalFlushedDb(int dbid, int async) {
/* Return the set of flags to use for the emptyDb() call for FLUSHALL
* and FLUSHDB commands.
*
- * Currently the command just attempts to parse the "ASYNC" option. It
- * also checks if the command arity is wrong.
+ * sync: flushes the database in an sync manner.
+ * async: flushes the database in an async manner.
+ * no option: determine sync or async according to the value of lazyfree-lazy-user-flush.
*
* On success C_OK is returned and the flags are stored in *flags, otherwise
* C_ERR is returned and the function sends an error to the client. */
int getFlushCommandFlags(client *c, int *flags) {
/* Parse the optional ASYNC option. */
- if (c->argc > 1) {
- if (c->argc > 2 || strcasecmp(c->argv[1]->ptr,"async")) {
- addReplyErrorObject(c,shared.syntaxerr);
- return C_ERR;
- }
+ if (c->argc == 2 && !strcasecmp(c->argv[1]->ptr,"sync")) {
+ *flags = EMPTYDB_NO_FLAGS;
+ } else if (c->argc == 2 && !strcasecmp(c->argv[1]->ptr,"async")) {
*flags = EMPTYDB_ASYNC;
+ } else if (c->argc == 1) {
+ *flags = server.lazyfree_lazy_user_flush ? EMPTYDB_ASYNC : EMPTYDB_NO_FLAGS;
} else {
- *flags = EMPTYDB_NO_FLAGS;
+ addReplyErrorObject(c,shared.syntaxerr);
+ return C_ERR;
}
return C_OK;
}
@@ -951,7 +953,7 @@ void scanGenericCommand(client *c, robj *o, unsigned long cursor) {
int filter = 0;
/* Filter element if it does not match the pattern. */
- if (!filter && use_pattern) {
+ if (use_pattern) {
if (sdsEncodedObject(kobj)) {
if (!stringmatchlen(pat, patlen, kobj->ptr, sdslen(kobj->ptr), 0))
filter = 1;
diff --git a/src/defrag.c b/src/defrag.c
index e189deddd..db797711e 100644
--- a/src/defrag.c
+++ b/src/defrag.c
@@ -367,7 +367,7 @@ long activeDefragSdsListAndDict(list *l, dict *d, int dict_val_type) {
} else if (dict_val_type == DEFRAG_SDS_DICT_VAL_VOID_PTR) {
void *newptr, *ptr = dictGetVal(de);
if ((newptr = activeDefragAlloc(ptr)))
- ln->value = newptr, defragged++;
+ de->v.val = newptr, defragged++;
}
defragged += dictIterDefragEntry(di);
}
diff --git a/src/expire.c b/src/expire.c
index 275a735a7..f79510817 100644
--- a/src/expire.c
+++ b/src/expire.c
@@ -53,15 +53,19 @@
* to the function to avoid too many gettimeofday() syscalls. */
int activeExpireCycleTryExpire(redisDb *db, dictEntry *de, long long now) {
long long t = dictGetSignedIntegerVal(de);
+ mstime_t expire_latency;
if (now > t) {
sds key = dictGetKey(de);
robj *keyobj = createStringObject(key,sdslen(key));
propagateExpire(db,keyobj,server.lazyfree_lazy_expire);
+ latencyStartMonitor(expire_latency);
if (server.lazyfree_lazy_expire)
dbAsyncDelete(db,keyobj);
else
dbSyncDelete(db,keyobj);
+ latencyEndMonitor(expire_latency);
+ latencyAddSampleIfNeeded("expire-del",expire_latency);
notifyKeyspaceEvent(NOTIFY_EXPIRED,
"expired",keyobj,db->id);
signalModifiedKey(NULL, db, keyobj);
@@ -224,7 +228,7 @@ void activeExpireCycle(int type) {
/* When there are less than 1% filled slots, sampling the key
* space is expensive, so stop here waiting for better times...
* The dictionary will be resized asap. */
- if (num && slots > DICT_HT_INITIAL_SIZE &&
+ if (slots > DICT_HT_INITIAL_SIZE &&
(num*100/slots < 1)) break;
/* The main collection cycle. Sample random keys among keys
diff --git a/src/help.h b/src/help.h
index edd15a3c9..b8b1efb95 100644
--- a/src/help.h
+++ b/src/help.h
@@ -459,12 +459,12 @@ struct commandHelp {
0,
"1.2.0" },
{ "FLUSHALL",
- "[ASYNC]",
+ "[ASYNC|SYNC]",
"Remove all keys from all databases",
9,
"1.0.0" },
{ "FLUSHDB",
- "[ASYNC]",
+ "[ASYNC|SYNC]",
"Remove all keys from the current database",
9,
"1.0.0" },
@@ -518,6 +518,16 @@ struct commandHelp {
"Returns the bit value at offset in the string value stored at key",
1,
"2.2.0" },
+ { "GETDEL",
+ "key",
+ "Get the value of a key and delete the key",
+ 1,
+ "6.2.0" },
+ { "GETEX",
+ "key [EX seconds|PX milliseconds|EXAT timestamp|PXAT milliseconds-timestamp|PERSIST]",
+ "Get the value of a key and optionally set its expiration",
+ 1,
+ "6.2.0" },
{ "GETRANGE",
"key start end",
"Get a substring of the string stored at a key",
@@ -583,6 +593,11 @@ struct commandHelp {
"Set multiple hash fields to multiple values",
5,
"2.0.0" },
+ { "HRANDFIELD",
+ "key [count [WITHVALUES]]",
+ "Get one or multiple random fields from a hash",
+ 5,
+ "6.2.0" },
{ "HSCAN",
"key cursor [MATCH pattern] [COUNT count]",
"Incrementally iterate hash fields and associated values",
@@ -989,7 +1004,7 @@ struct commandHelp {
10,
"2.6.0" },
{ "SCRIPT FLUSH",
- "-",
+ "[ASYNC|SYNC]",
"Remove all the scripts from the script cache.",
10,
"2.6.0" },
@@ -1019,7 +1034,7 @@ struct commandHelp {
8,
"1.0.0" },
{ "SET",
- "key value [EX seconds|PX milliseconds|KEEPTTL] [NX|XX] [GET]",
+ "key value [EX seconds|PX milliseconds|EXAT timestamp|PXAT milliseconds-timestamp|KEEPTTL] [NX|XX] [GET]",
"Set the string value of a key",
1,
"1.0.0" },
@@ -1323,6 +1338,11 @@ struct commandHelp {
"Remove and return members with the lowest scores in a sorted set",
4,
"5.0.0" },
+ { "ZRANDMEMBER",
+ "key [count [WITHSCORES]]",
+ "Get one or multiple random elements from a sorted set",
+ 4,
+ "6.2.0" },
{ "ZRANGE",
"key min max [BYSCORE|BYLEX] [REV] [LIMIT offset count] [WITHSCORES]",
"Return a range of members in a sorted set",
diff --git a/src/lazyfree.c b/src/lazyfree.c
index 8b9f0e2dc..f18b2027f 100644
--- a/src/lazyfree.c
+++ b/src/lazyfree.c
@@ -49,6 +49,14 @@ void lazyFreeTrackingTable(void *args[]) {
atomicIncr(lazyfreed_objects,len);
}
+void lazyFreeLuaScripts(void *args[]) {
+ dict *lua_scripts = args[0];
+ long long len = dictSize(lua_scripts);
+ dictRelease(lua_scripts);
+ atomicDecr(lazyfree_objects,len);
+ atomicIncr(lazyfreed_objects,len);
+}
+
/* Return the number of currently pending objects to free. */
size_t lazyfreeGetPendingObjectsCount(void) {
size_t aux;
@@ -212,3 +220,13 @@ void freeTrackingRadixTreeAsync(rax *tracking) {
atomicIncr(lazyfree_objects,tracking->numele);
bioCreateLazyFreeJob(lazyFreeTrackingTable,1,tracking);
}
+
+/* Free lua_scripts dict, if the dict is huge enough, free it in async way. */
+void freeLuaScriptsAsync(dict *lua_scripts) {
+ if (dictSize(lua_scripts) > LAZYFREE_THRESHOLD) {
+ atomicIncr(lazyfree_objects,dictSize(lua_scripts));
+ bioCreateLazyFreeJob(lazyFreeLuaScripts,1,lua_scripts);
+ } else {
+ dictRelease(lua_scripts);
+ }
+}
diff --git a/src/module.c b/src/module.c
index bf186f8b7..b04595801 100644
--- a/src/module.c
+++ b/src/module.c
@@ -29,7 +29,9 @@
#include "server.h"
#include "cluster.h"
+#include "slowlog.h"
#include "rdb.h"
+#include "monotonic.h"
#include <dlfcn.h>
#include <sys/stat.h>
#include <sys/wait.h>
@@ -177,15 +179,25 @@ struct RedisModuleKey {
void *iter; /* Iterator. */
int mode; /* Opening mode. */
- /* Zset iterator. */
- uint32_t ztype; /* REDISMODULE_ZSET_RANGE_* */
- zrangespec zrs; /* Score range. */
- zlexrangespec zlrs; /* Lex range. */
- uint32_t zstart; /* Start pos for positional ranges. */
- uint32_t zend; /* End pos for positional ranges. */
- void *zcurrent; /* Zset iterator current node. */
- int zer; /* Zset iterator end reached flag
- (true if end was reached). */
+ union {
+ struct {
+ /* Zset iterator, use only if value->type == OBJ_ZSET */
+ uint32_t type; /* REDISMODULE_ZSET_RANGE_* */
+ zrangespec rs; /* Score range. */
+ zlexrangespec lrs; /* Lex range. */
+ uint32_t start; /* Start pos for positional ranges. */
+ uint32_t end; /* End pos for positional ranges. */
+ void *current; /* Zset iterator current node. */
+ int er; /* Zset iterator end reached flag
+ (true if end was reached). */
+ } zset;
+ struct {
+ /* Stream, use only if value->type == OBJ_STREAM */
+ streamID currentid; /* Current entry while iterating. */
+ int64_t numfieldsleft; /* Fields left to fetch for current entry. */
+ int signalready; /* Flag that signalKeyAsReady() is needed. */
+ } stream;
+ } u;
};
typedef struct RedisModuleKey RedisModuleKey;
@@ -252,6 +264,9 @@ typedef struct RedisModuleBlockedClient {
int dbid; /* Database number selected by the original client. */
int blocked_on_keys; /* If blocked via RM_BlockClientOnKeys(). */
int unblocked; /* Already on the moduleUnblocked list. */
+ monotime background_timer; /* Timer tracking the start of background work */
+ uint64_t background_duration; /* Current command background time duration.
+ Used for measuring latency of blocking cmds */
} RedisModuleBlockedClient;
static pthread_mutex_t moduleUnblockedClientsMutex = PTHREAD_MUTEX_INITIALIZER;
@@ -376,6 +391,7 @@ robj **moduleCreateArgvFromUserFormat(const char *cmdname, const char *fmt, int
void moduleReplicateMultiIfNeeded(RedisModuleCtx *ctx);
void RM_ZsetRangeStop(RedisModuleKey *kp);
static void zsetKeyReset(RedisModuleKey *key);
+static void moduleInitKeyTypeSpecific(RedisModuleKey *key);
void RM_FreeDict(RedisModuleCtx *ctx, RedisModuleDict *d);
void RM_FreeServerInfo(RedisModuleCtx *ctx, RedisModuleServerInfoData *data);
@@ -478,17 +494,17 @@ void *RM_PoolAlloc(RedisModuleCtx *ctx, size_t bytes) {
* Helpers for modules API implementation
* -------------------------------------------------------------------------- */
-/* Create an empty key of the specified type. 'kp' must point to a key object
- * opened for writing where the .value member is set to NULL because the
+/* Create an empty key of the specified type. `key` must point to a key object
+ * opened for writing where the `.value` member is set to NULL because the
* key was found to be non existing.
*
* On success REDISMODULE_OK is returned and the key is populated with
* the value of the specified type. The function fails and returns
* REDISMODULE_ERR if:
*
- * 1) The key is not open for writing.
- * 2) The key is not empty.
- * 3) The specified type is unknown.
+ * 1. The key is not open for writing.
+ * 2. The key is not empty.
+ * 3. The specified type is unknown.
*/
int moduleCreateEmptyKey(RedisModuleKey *key, int type) {
robj *obj;
@@ -509,10 +525,14 @@ int moduleCreateEmptyKey(RedisModuleKey *key, int type) {
case REDISMODULE_KEYTYPE_HASH:
obj = createHashObject();
break;
+ case REDISMODULE_KEYTYPE_STREAM:
+ obj = createStreamObject();
+ break;
default: return REDISMODULE_ERR;
}
dbAdd(key->db,key->key,obj);
key->value = obj;
+ moduleInitKeyTypeSpecific(key);
return REDISMODULE_OK;
}
@@ -900,6 +920,30 @@ long long RM_Milliseconds(void) {
return mstime();
}
+/* Mark a point in time that will be used as the start time to calculate
+ * the elapsed execution time when RM_BlockedClientMeasureTimeEnd() is called.
+ * Within the same command, you can call multiple times
+ * RM_BlockedClientMeasureTimeStart() and RM_BlockedClientMeasureTimeEnd()
+ * to accummulate indepedent time intervals to the background duration.
+ * This method always return REDISMODULE_OK. */
+int RM_BlockedClientMeasureTimeStart(RedisModuleBlockedClient *bc) {
+ elapsedStart(&(bc->background_timer));
+ return REDISMODULE_OK;
+}
+
+/* Mark a point in time that will be used as the end time
+ * to calculate the elapsed execution time.
+ * On success REDISMODULE_OK is returned.
+ * This method only returns REDISMODULE_ERR if no start time was
+ * previously defined ( meaning RM_BlockedClientMeasureTimeStart was not called ). */
+int RM_BlockedClientMeasureTimeEnd(RedisModuleBlockedClient *bc) {
+ // If the counter is 0 then we haven't called RM_BlockedClientMeasureTimeStart
+ if (!bc->background_timer)
+ return REDISMODULE_ERR;
+ bc->background_duration += elapsedUs(bc->background_timer);
+ return REDISMODULE_OK;
+}
+
/* Set flags defining capabilities or behavior bit flags.
*
* REDISMODULE_OPTIONS_HANDLE_IO_ERRORS:
@@ -933,9 +977,9 @@ int RM_SignalModifiedKey(RedisModuleCtx *ctx, RedisModuleString *keyname) {
* keys, call replies and Redis string objects once the command returns. In most
* cases this eliminates the need of calling the following functions:
*
- * 1) RedisModule_CloseKey()
- * 2) RedisModule_FreeCallReply()
- * 3) RedisModule_FreeString()
+ * 1. RedisModule_CloseKey()
+ * 2. RedisModule_FreeCallReply()
+ * 3. RedisModule_FreeString()
*
* These functions can still be used with automatic memory management enabled,
* to optimize loops that make numerous allocations for example. */
@@ -1113,6 +1157,18 @@ RedisModuleString *RM_CreateStringFromString(RedisModuleCtx *ctx, const RedisMod
return o;
}
+/* Creates a string from a stream ID. The returned string must be released with
+ * RedisModule_FreeString(), unless automatic memory is enabled.
+ *
+ * The passed context `ctx` may be NULL if necessary. See the
+ * RedisModule_CreateString() documentation for more info. */
+RedisModuleString *RM_CreateStringFromStreamID(RedisModuleCtx *ctx, const RedisModuleStreamID *id) {
+ streamID streamid = {id->ms, id->seq};
+ RedisModuleString *o = createObjectFromStreamID(&streamid);
+ if (ctx != NULL) autoMemoryAdd(ctx, REDISMODULE_AM_STRING, o);
+ return o;
+}
+
/* Free a module string object obtained with one of the Redis modules API calls
* that return new string objects.
*
@@ -1139,9 +1195,9 @@ void RM_FreeString(RedisModuleCtx *ctx, RedisModuleString *str) {
* Normally you want to call this function when, at the same time
* the following conditions are true:
*
- * 1) You have automatic memory management enabled.
- * 2) You want to create string objects.
- * 3) Those string objects you create need to live *after* the callback
+ * 1. You have automatic memory management enabled.
+ * 2. You want to create string objects.
+ * 3. Those string objects you create need to live *after* the callback
* function(for example a command implementation) creating them returns.
*
* Usually you want this in order to store the created string object
@@ -1188,7 +1244,7 @@ void RM_RetainString(RedisModuleCtx *ctx, RedisModuleString *str) {
* returned RedisModuleString.
*
* It is possible to call this function with a NULL context.
- */
+*/
RedisModuleString* RM_HoldString(RedisModuleCtx *ctx, RedisModuleString *str) {
if (str->refcount == OBJ_STATIC_REFCOUNT) {
return RM_CreateStringFromString(ctx, str);
@@ -1270,6 +1326,30 @@ int RM_StringToLongDouble(const RedisModuleString *str, long double *ld) {
return retval ? REDISMODULE_OK : REDISMODULE_ERR;
}
+/* Convert the string into a stream ID, storing it at `*id`.
+ * Returns REDISMODULE_OK on success and returns REDISMODULE_ERR if the string
+ * is not a valid string representation of a stream ID. The special IDs "+" and
+ * "-" are allowed.
+ *
+ * RedisModuleStreamID is a struct with two 64-bit fields, which is used in
+ * stream functions and defined as
+ *
+ * typedef struct RedisModuleStreamID {
+ * uint64_t ms;
+ * uint64_t seq;
+ * } RedisModuleStreamID;
+ */
+int RM_StringToStreamID(const RedisModuleString *str, RedisModuleStreamID *id) {
+ streamID streamid;
+ if (streamParseID(str, &streamid) == C_OK) {
+ id->ms = streamid.ms;
+ id->seq = streamid.seq;
+ return REDISMODULE_OK;
+ } else {
+ return REDISMODULE_ERR;
+ }
+}
+
/* Compare two string objects, returning -1, 0 or 1 respectively if
* a < b, a == b, a > b. Strings are compared byte by byte as two
* binary blobs without any encoding care / collation attempt. */
@@ -1322,7 +1402,7 @@ int RM_StringAppendBuffer(RedisModuleCtx *ctx, RedisModuleString *str, const cha
* -------------------------------------------------------------------------- */
/* Send an error about the number of arguments given to the command,
- * citing the command name in the error message.
+ * citing the command name in the error message. Returns REDISMODULE_OK.
*
* Example:
*
@@ -1394,7 +1474,7 @@ int RM_ReplyWithError(RedisModuleCtx *ctx, const char *err) {
return REDISMODULE_OK;
}
-/* Reply with a simple string (+... \r\n in RESP protocol). This replies
+/* Reply with a simple string (`+... \r\n` in RESP protocol). This replies
* are suitable only when sending a small non-binary string with small
* overhead, like "OK" or similar replies.
*
@@ -1742,7 +1822,7 @@ int RM_ReplicateVerbatim(RedisModuleCtx *ctx) {
* 2. The ID increases monotonically. Clients connecting to the server later
* are guaranteed to get IDs greater than any past ID previously seen.
*
- * Valid IDs are from 1 to 2^64-1. If 0 is returned it means there is no way
+ * Valid IDs are from 1 to 2^64 - 1. If 0 is returned it means there is no way
* to fetch the ID in the context the function was currently called.
*
* After obtaining the ID, it is possible to check if the command execution
@@ -2072,7 +2152,15 @@ static void moduleInitKey(RedisModuleKey *kp, RedisModuleCtx *ctx, robj *keyname
kp->value = value;
kp->iter = NULL;
kp->mode = mode;
- zsetKeyReset(kp);
+ if (kp->value) moduleInitKeyTypeSpecific(kp);
+}
+
+/* Initialize the type-specific part of the key. Only when key has a value. */
+static void moduleInitKeyTypeSpecific(RedisModuleKey *key) {
+ switch (key->value->type) {
+ case OBJ_ZSET: zsetKeyReset(key); break;
+ case OBJ_STREAM: key->u.stream.signalready = 0; break;
+ }
}
/* Return an handle representing a Redis key, so that it is possible
@@ -2115,8 +2203,13 @@ static void moduleCloseKey(RedisModuleKey *key) {
int signal = SHOULD_SIGNAL_MODIFIED_KEYS(key->ctx);
if ((key->mode & REDISMODULE_WRITE) && signal)
signalModifiedKey(key->ctx->client,key->db,key->key);
- /* TODO: if (key->iter) RM_KeyIteratorStop(kp); */
+ if (key->iter) zfree(key->iter);
RM_ZsetRangeStop(key);
+ if (key && key->value && key->value->type == OBJ_STREAM &&
+ key->u.stream.signalready) {
+ /* One of more RM_StreamAdd() have been done. */
+ signalKeyAsReady(key->db, key->key, OBJ_STREAM);
+ }
decrRefCount(key->key);
}
@@ -2376,9 +2469,10 @@ int RM_ListPush(RedisModuleKey *key, int where, RedisModuleString *ele) {
* that the user should be free with RM_FreeString() or by enabling
* automatic memory. 'where' specifies if the element should be popped from
* head or tail. The command returns NULL if:
- * 1) The list is empty.
- * 2) The key was not open for writing.
- * 3) The key is not a list. */
+ *
+ * 1. The list is empty.
+ * 2. The key was not open for writing.
+ * 3. The key is not a list. */
RedisModuleString *RM_ListPop(RedisModuleKey *key, int where) {
if (!(key->mode & REDISMODULE_WRITE) ||
key->value == NULL ||
@@ -2398,7 +2492,7 @@ RedisModuleString *RM_ListPop(RedisModuleKey *key, int where) {
/* Conversion from/to public flags of the Modules API and our private flags,
* so that we have everything decoupled. */
-int RM_ZsetAddFlagsToCoreFlags(int flags) {
+int moduleZsetAddFlagsToCoreFlags(int flags) {
int retflags = 0;
if (flags & REDISMODULE_ZADD_XX) retflags |= ZADD_XX;
if (flags & REDISMODULE_ZADD_NX) retflags |= ZADD_NX;
@@ -2408,7 +2502,7 @@ int RM_ZsetAddFlagsToCoreFlags(int flags) {
}
/* See previous function comment. */
-int RM_ZsetAddFlagsFromCoreFlags(int flags) {
+int moduleZsetAddFlagsFromCoreFlags(int flags) {
int retflags = 0;
if (flags & ZADD_ADDED) retflags |= REDISMODULE_ZADD_ADDED;
if (flags & ZADD_UPDATED) retflags |= REDISMODULE_ZADD_UPDATED;
@@ -2453,12 +2547,12 @@ int RM_ZsetAdd(RedisModuleKey *key, double score, RedisModuleString *ele, int *f
if (!(key->mode & REDISMODULE_WRITE)) return REDISMODULE_ERR;
if (key->value && key->value->type != OBJ_ZSET) return REDISMODULE_ERR;
if (key->value == NULL) moduleCreateEmptyKey(key,REDISMODULE_KEYTYPE_ZSET);
- if (flagsptr) flags = RM_ZsetAddFlagsToCoreFlags(*flagsptr);
+ if (flagsptr) flags = moduleZsetAddFlagsToCoreFlags(*flagsptr);
if (zsetAdd(key->value,score,ele->ptr,&flags,NULL) == 0) {
if (flagsptr) *flagsptr = 0;
return REDISMODULE_ERR;
}
- if (flagsptr) *flagsptr = RM_ZsetAddFlagsFromCoreFlags(flags);
+ if (flagsptr) *flagsptr = moduleZsetAddFlagsFromCoreFlags(flags);
return REDISMODULE_OK;
}
@@ -2480,7 +2574,7 @@ int RM_ZsetIncrby(RedisModuleKey *key, double score, RedisModuleString *ele, int
if (!(key->mode & REDISMODULE_WRITE)) return REDISMODULE_ERR;
if (key->value && key->value->type != OBJ_ZSET) return REDISMODULE_ERR;
if (key->value == NULL) moduleCreateEmptyKey(key,REDISMODULE_KEYTYPE_ZSET);
- if (flagsptr) flags = RM_ZsetAddFlagsToCoreFlags(*flagsptr);
+ if (flagsptr) flags = moduleZsetAddFlagsToCoreFlags(*flagsptr);
flags |= ZADD_INCR;
if (zsetAdd(key->value,score,ele->ptr,&flags,newscore) == 0) {
if (flagsptr) *flagsptr = 0;
@@ -2491,7 +2585,7 @@ int RM_ZsetIncrby(RedisModuleKey *key, double score, RedisModuleString *ele, int
*flagsptr = 0;
return REDISMODULE_ERR;
}
- if (flagsptr) *flagsptr = RM_ZsetAddFlagsFromCoreFlags(flags);
+ if (flagsptr) *flagsptr = moduleZsetAddFlagsFromCoreFlags(flags);
return REDISMODULE_OK;
}
@@ -2544,16 +2638,17 @@ int RM_ZsetScore(RedisModuleKey *key, RedisModuleString *ele, double *score) {
* -------------------------------------------------------------------------- */
void zsetKeyReset(RedisModuleKey *key) {
- key->ztype = REDISMODULE_ZSET_RANGE_NONE;
- key->zcurrent = NULL;
- key->zer = 1;
+ key->u.zset.type = REDISMODULE_ZSET_RANGE_NONE;
+ key->u.zset.current = NULL;
+ key->u.zset.er = 1;
}
/* Stop a sorted set iteration. */
void RM_ZsetRangeStop(RedisModuleKey *key) {
+ if (!key->value || key->value->type != OBJ_ZSET) return;
/* Free resources if needed. */
- if (key->ztype == REDISMODULE_ZSET_RANGE_LEX)
- zslFreeLexRange(&key->zlrs);
+ if (key->u.zset.type == REDISMODULE_ZSET_RANGE_LEX)
+ zslFreeLexRange(&key->u.zset.lrs);
/* Setup sensible values so that misused iteration API calls when an
* iterator is not active will result into something more sensible
* than crashing. */
@@ -2562,7 +2657,7 @@ void RM_ZsetRangeStop(RedisModuleKey *key) {
/* Return the "End of range" flag value to signal the end of the iteration. */
int RM_ZsetRangeEndReached(RedisModuleKey *key) {
- return key->zer;
+ return key->u.zset.er;
}
/* Helper function for RM_ZsetFirstInScoreRange() and RM_ZsetLastInScoreRange().
@@ -2575,29 +2670,29 @@ int zsetInitScoreRange(RedisModuleKey *key, double min, double max, int minex, i
if (!key->value || key->value->type != OBJ_ZSET) return REDISMODULE_ERR;
RM_ZsetRangeStop(key);
- key->ztype = REDISMODULE_ZSET_RANGE_SCORE;
- key->zer = 0;
+ key->u.zset.type = REDISMODULE_ZSET_RANGE_SCORE;
+ key->u.zset.er = 0;
/* Setup the range structure used by the sorted set core implementation
* in order to seek at the specified element. */
- zrangespec *zrs = &key->zrs;
+ zrangespec *zrs = &key->u.zset.rs;
zrs->min = min;
zrs->max = max;
zrs->minex = minex;
zrs->maxex = maxex;
if (key->value->encoding == OBJ_ENCODING_ZIPLIST) {
- key->zcurrent = first ? zzlFirstInRange(key->value->ptr,zrs) :
- zzlLastInRange(key->value->ptr,zrs);
+ key->u.zset.current = first ? zzlFirstInRange(key->value->ptr,zrs) :
+ zzlLastInRange(key->value->ptr,zrs);
} else if (key->value->encoding == OBJ_ENCODING_SKIPLIST) {
zset *zs = key->value->ptr;
zskiplist *zsl = zs->zsl;
- key->zcurrent = first ? zslFirstInRange(zsl,zrs) :
- zslLastInRange(zsl,zrs);
+ key->u.zset.current = first ? zslFirstInRange(zsl,zrs) :
+ zslLastInRange(zsl,zrs);
} else {
serverPanic("Unsupported zset encoding");
}
- if (key->zcurrent == NULL) key->zer = 1;
+ if (key->u.zset.current == NULL) key->u.zset.er = 1;
return REDISMODULE_OK;
}
@@ -2610,8 +2705,8 @@ int zsetInitScoreRange(RedisModuleKey *key, double min, double max, int minex, i
* The range is specified according to the two double values 'min' and 'max'.
* Both can be infinite using the following two macros:
*
- * REDISMODULE_POSITIVE_INFINITE for positive infinite value
- * REDISMODULE_NEGATIVE_INFINITE for negative infinite value
+ * * REDISMODULE_POSITIVE_INFINITE for positive infinite value
+ * * REDISMODULE_NEGATIVE_INFINITE for negative infinite value
*
* 'minex' and 'maxex' parameters, if true, respectively setup a range
* where the min and max value are exclusive (not included) instead of
@@ -2639,29 +2734,29 @@ int zsetInitLexRange(RedisModuleKey *key, RedisModuleString *min, RedisModuleStr
if (!key->value || key->value->type != OBJ_ZSET) return REDISMODULE_ERR;
RM_ZsetRangeStop(key);
- key->zer = 0;
+ key->u.zset.er = 0;
/* Setup the range structure used by the sorted set core implementation
* in order to seek at the specified element. */
- zlexrangespec *zlrs = &key->zlrs;
+ zlexrangespec *zlrs = &key->u.zset.lrs;
if (zslParseLexRange(min, max, zlrs) == C_ERR) return REDISMODULE_ERR;
/* Set the range type to lex only after successfully parsing the range,
* otherwise we don't want the zlexrangespec to be freed. */
- key->ztype = REDISMODULE_ZSET_RANGE_LEX;
+ key->u.zset.type = REDISMODULE_ZSET_RANGE_LEX;
if (key->value->encoding == OBJ_ENCODING_ZIPLIST) {
- key->zcurrent = first ? zzlFirstInLexRange(key->value->ptr,zlrs) :
- zzlLastInLexRange(key->value->ptr,zlrs);
+ key->u.zset.current = first ? zzlFirstInLexRange(key->value->ptr,zlrs) :
+ zzlLastInLexRange(key->value->ptr,zlrs);
} else if (key->value->encoding == OBJ_ENCODING_SKIPLIST) {
zset *zs = key->value->ptr;
zskiplist *zsl = zs->zsl;
- key->zcurrent = first ? zslFirstInLexRange(zsl,zlrs) :
- zslLastInLexRange(zsl,zlrs);
+ key->u.zset.current = first ? zslFirstInLexRange(zsl,zlrs) :
+ zslLastInLexRange(zsl,zlrs);
} else {
serverPanic("Unsupported zset encoding");
}
- if (key->zcurrent == NULL) key->zer = 1;
+ if (key->u.zset.current == NULL) key->u.zset.er = 1;
return REDISMODULE_OK;
}
@@ -2694,10 +2789,11 @@ int RM_ZsetLastInLexRange(RedisModuleKey *key, RedisModuleString *min, RedisModu
RedisModuleString *RM_ZsetRangeCurrentElement(RedisModuleKey *key, double *score) {
RedisModuleString *str;
- if (key->zcurrent == NULL) return NULL;
+ if (!key->value || key->value->type != OBJ_ZSET) return NULL;
+ if (key->u.zset.current == NULL) return NULL;
if (key->value->encoding == OBJ_ENCODING_ZIPLIST) {
unsigned char *eptr, *sptr;
- eptr = key->zcurrent;
+ eptr = key->u.zset.current;
sds ele = ziplistGetObject(eptr);
if (score) {
sptr = ziplistNext(key->value->ptr,eptr);
@@ -2705,7 +2801,7 @@ RedisModuleString *RM_ZsetRangeCurrentElement(RedisModuleKey *key, double *score
}
str = createObject(OBJ_STRING,ele);
} else if (key->value->encoding == OBJ_ENCODING_SKIPLIST) {
- zskiplistNode *ln = key->zcurrent;
+ zskiplistNode *ln = key->u.zset.current;
if (score) *score = ln->score;
str = createStringObject(ln->ele,sdslen(ln->ele));
} else {
@@ -2719,58 +2815,59 @@ RedisModuleString *RM_ZsetRangeCurrentElement(RedisModuleKey *key, double *score
* a next element, 0 if we are already at the latest element or the range
* does not include any item at all. */
int RM_ZsetRangeNext(RedisModuleKey *key) {
- if (!key->ztype || !key->zcurrent) return 0; /* No active iterator. */
+ if (!key->value || key->value->type != OBJ_ZSET) return 0;
+ if (!key->u.zset.type || !key->u.zset.current) return 0; /* No active iterator. */
if (key->value->encoding == OBJ_ENCODING_ZIPLIST) {
unsigned char *zl = key->value->ptr;
- unsigned char *eptr = key->zcurrent;
+ unsigned char *eptr = key->u.zset.current;
unsigned char *next;
next = ziplistNext(zl,eptr); /* Skip element. */
if (next) next = ziplistNext(zl,next); /* Skip score. */
if (next == NULL) {
- key->zer = 1;
+ key->u.zset.er = 1;
return 0;
} else {
/* Are we still within the range? */
- if (key->ztype == REDISMODULE_ZSET_RANGE_SCORE) {
+ if (key->u.zset.type == REDISMODULE_ZSET_RANGE_SCORE) {
/* Fetch the next element score for the
* range check. */
unsigned char *saved_next = next;
next = ziplistNext(zl,next); /* Skip next element. */
double score = zzlGetScore(next); /* Obtain the next score. */
- if (!zslValueLteMax(score,&key->zrs)) {
- key->zer = 1;
+ if (!zslValueLteMax(score,&key->u.zset.rs)) {
+ key->u.zset.er = 1;
return 0;
}
next = saved_next;
- } else if (key->ztype == REDISMODULE_ZSET_RANGE_LEX) {
- if (!zzlLexValueLteMax(next,&key->zlrs)) {
- key->zer = 1;
+ } else if (key->u.zset.type == REDISMODULE_ZSET_RANGE_LEX) {
+ if (!zzlLexValueLteMax(next,&key->u.zset.lrs)) {
+ key->u.zset.er = 1;
return 0;
}
}
- key->zcurrent = next;
+ key->u.zset.current = next;
return 1;
}
} else if (key->value->encoding == OBJ_ENCODING_SKIPLIST) {
- zskiplistNode *ln = key->zcurrent, *next = ln->level[0].forward;
+ zskiplistNode *ln = key->u.zset.current, *next = ln->level[0].forward;
if (next == NULL) {
- key->zer = 1;
+ key->u.zset.er = 1;
return 0;
} else {
/* Are we still within the range? */
- if (key->ztype == REDISMODULE_ZSET_RANGE_SCORE &&
- !zslValueLteMax(next->score,&key->zrs))
+ if (key->u.zset.type == REDISMODULE_ZSET_RANGE_SCORE &&
+ !zslValueLteMax(next->score,&key->u.zset.rs))
{
- key->zer = 1;
+ key->u.zset.er = 1;
return 0;
- } else if (key->ztype == REDISMODULE_ZSET_RANGE_LEX) {
- if (!zslLexValueLteMax(next->ele,&key->zlrs)) {
- key->zer = 1;
+ } else if (key->u.zset.type == REDISMODULE_ZSET_RANGE_LEX) {
+ if (!zslLexValueLteMax(next->ele,&key->u.zset.lrs)) {
+ key->u.zset.er = 1;
return 0;
}
}
- key->zcurrent = next;
+ key->u.zset.current = next;
return 1;
}
} else {
@@ -2782,58 +2879,59 @@ int RM_ZsetRangeNext(RedisModuleKey *key) {
* a previous element, 0 if we are already at the first element or the range
* does not include any item at all. */
int RM_ZsetRangePrev(RedisModuleKey *key) {
- if (!key->ztype || !key->zcurrent) return 0; /* No active iterator. */
+ if (!key->value || key->value->type != OBJ_ZSET) return 0;
+ if (!key->u.zset.type || !key->u.zset.current) return 0; /* No active iterator. */
if (key->value->encoding == OBJ_ENCODING_ZIPLIST) {
unsigned char *zl = key->value->ptr;
- unsigned char *eptr = key->zcurrent;
+ unsigned char *eptr = key->u.zset.current;
unsigned char *prev;
prev = ziplistPrev(zl,eptr); /* Go back to previous score. */
if (prev) prev = ziplistPrev(zl,prev); /* Back to previous ele. */
if (prev == NULL) {
- key->zer = 1;
+ key->u.zset.er = 1;
return 0;
} else {
/* Are we still within the range? */
- if (key->ztype == REDISMODULE_ZSET_RANGE_SCORE) {
+ if (key->u.zset.type == REDISMODULE_ZSET_RANGE_SCORE) {
/* Fetch the previous element score for the
* range check. */
unsigned char *saved_prev = prev;
prev = ziplistNext(zl,prev); /* Skip element to get the score.*/
double score = zzlGetScore(prev); /* Obtain the prev score. */
- if (!zslValueGteMin(score,&key->zrs)) {
- key->zer = 1;
+ if (!zslValueGteMin(score,&key->u.zset.rs)) {
+ key->u.zset.er = 1;
return 0;
}
prev = saved_prev;
- } else if (key->ztype == REDISMODULE_ZSET_RANGE_LEX) {
- if (!zzlLexValueGteMin(prev,&key->zlrs)) {
- key->zer = 1;
+ } else if (key->u.zset.type == REDISMODULE_ZSET_RANGE_LEX) {
+ if (!zzlLexValueGteMin(prev,&key->u.zset.lrs)) {
+ key->u.zset.er = 1;
return 0;
}
}
- key->zcurrent = prev;
+ key->u.zset.current = prev;
return 1;
}
} else if (key->value->encoding == OBJ_ENCODING_SKIPLIST) {
- zskiplistNode *ln = key->zcurrent, *prev = ln->backward;
+ zskiplistNode *ln = key->u.zset.current, *prev = ln->backward;
if (prev == NULL) {
- key->zer = 1;
+ key->u.zset.er = 1;
return 0;
} else {
/* Are we still within the range? */
- if (key->ztype == REDISMODULE_ZSET_RANGE_SCORE &&
- !zslValueGteMin(prev->score,&key->zrs))
+ if (key->u.zset.type == REDISMODULE_ZSET_RANGE_SCORE &&
+ !zslValueGteMin(prev->score,&key->u.zset.rs))
{
- key->zer = 1;
+ key->u.zset.er = 1;
return 0;
- } else if (key->ztype == REDISMODULE_ZSET_RANGE_LEX) {
- if (!zslLexValueGteMin(prev->ele,&key->zlrs)) {
- key->zer = 1;
+ } else if (key->u.zset.type == REDISMODULE_ZSET_RANGE_LEX) {
+ if (!zslLexValueGteMin(prev->ele,&key->u.zset.lrs)) {
+ key->u.zset.er = 1;
return 0;
}
}
- key->zcurrent = prev;
+ key->u.zset.current = prev;
return 1;
}
} else {
@@ -2970,7 +3068,7 @@ int RM_HashSet(RedisModuleKey *key, int flags, ...) {
*
* RedisModuleString *first, *second;
* RedisModule_HashGet(mykey,REDISMODULE_HASH_NONE,argv[1],&first,
- * argv[2],&second,NULL);
+ * argv[2],&second,NULL);
*
* As with RedisModule_HashSet() the behavior of the command can be specified
* passing flags different than REDISMODULE_HASH_NONE:
@@ -3049,6 +3147,455 @@ int RM_HashGet(RedisModuleKey *key, int flags, ...) {
}
/* --------------------------------------------------------------------------
+ * Key API for the stream type.
+ * -------------------------------------------------------------------------- */
+
+/* Adds an entry to a stream. Like XADD without trimming.
+ *
+ * - `key`: The key where the stream is (or will be) stored
+ * - `flags`: A bit field of
+ * - `REDISMODULE_STREAM_ADD_AUTOID`: Assign a stream ID automatically, like
+ * `*` in the XADD command.
+ * - `id`: If the `AUTOID` flag is set, this is where the assigned ID is
+ * returned. Can be NULL if `AUTOID` is set, if you don't care to receive the
+ * ID. If `AUTOID` is not set, this is the requested ID.
+ * - `argv`: A pointer to an array of size `numfields * 2` containing the
+ * fields and values.
+ * - `numfields`: The number of field-value pairs in `argv`.
+ *
+ * Returns REDISMODULE_OK if an entry has been added. On failure,
+ * REDISMODULE_ERR is returned and `errno` is set as follows:
+ *
+ * - EINVAL if called with invalid arguments
+ * - ENOTSUP if the key refers to a value of a type other than stream
+ * - EBADF if the key was not opened for writing
+ * - EDOM if the given ID was 0-0 or not greater than all other IDs in the
+ * stream (only if the AUTOID flag is unset)
+ * - EFBIG if the stream has reached the last possible ID
+ */
+int RM_StreamAdd(RedisModuleKey *key, int flags, RedisModuleStreamID *id, RedisModuleString **argv, long numfields) {
+ /* Validate args */
+ if (!key || (numfields != 0 && !argv) || /* invalid key or argv */
+ (flags & ~(REDISMODULE_STREAM_ADD_AUTOID)) || /* invalid flags */
+ (!(flags & REDISMODULE_STREAM_ADD_AUTOID) && !id)) { /* id required */
+ errno = EINVAL;
+ return REDISMODULE_ERR;
+ } else if (key->value && key->value->type != OBJ_STREAM) {
+ errno = ENOTSUP; /* wrong type */
+ return REDISMODULE_ERR;
+ } else if (!(key->mode & REDISMODULE_WRITE)) {
+ errno = EBADF; /* key not open for writing */
+ return REDISMODULE_ERR;
+ } else if (!(flags & REDISMODULE_STREAM_ADD_AUTOID) &&
+ id->ms == 0 && id->seq == 0) {
+ errno = EDOM; /* ID out of range */
+ return REDISMODULE_ERR;
+ }
+
+ /* Create key if necessery */
+ int created = 0;
+ if (key->value == NULL) {
+ moduleCreateEmptyKey(key, REDISMODULE_KEYTYPE_STREAM);
+ created = 1;
+ }
+
+ stream *s = key->value->ptr;
+ if (s->last_id.ms == UINT64_MAX && s->last_id.seq == UINT64_MAX) {
+ /* The stream has reached the last possible ID */
+ errno = EFBIG;
+ return REDISMODULE_ERR;
+ }
+
+ streamID added_id;
+ streamID use_id;
+ streamID *use_id_ptr = NULL;
+ if (!(flags & REDISMODULE_STREAM_ADD_AUTOID)) {
+ use_id.ms = id->ms;
+ use_id.seq = id->seq;
+ use_id_ptr = &use_id;
+ }
+ if (streamAppendItem(s, argv, numfields, &added_id, use_id_ptr) == C_ERR) {
+ /* ID not greater than all existing IDs in the stream */
+ errno = EDOM;
+ return REDISMODULE_ERR;
+ }
+ /* Postponed signalKeyAsReady(). Done implicitly by moduleCreateEmptyKey()
+ * so not needed if the stream has just been created. */
+ if (!created) key->u.stream.signalready = 1;
+
+ if (id != NULL) {
+ id->ms = added_id.ms;
+ id->seq = added_id.seq;
+ }
+
+ return REDISMODULE_OK;
+}
+
+/* Deletes an entry from a stream.
+ *
+ * - `key`: A key opened for writing, with no stream iterator started.
+ * - `id`: The stream ID of the entry to delete.
+ *
+ * Returns REDISMODULE_OK on success. On failure, REDISMODULE_ERR is returned
+ * and `errno` is set as follows:
+ *
+ * - EINVAL if called with invalid arguments
+ * - ENOTSUP if the key refers to a value of a type other than stream or if the
+ * key is empty
+ * - EBADF if the key was not opened for writing or if a stream iterator is
+ * associated with the key
+ * - ENOENT if no entry with the given stream ID exists
+ *
+ * See also RM_StreamIteratorDelete() for deleting the current entry while
+ * iterating using a stream iterator.
+ */
+int RM_StreamDelete(RedisModuleKey *key, RedisModuleStreamID *id) {
+ if (!key || !id) {
+ errno = EINVAL;
+ return REDISMODULE_ERR;
+ } else if (!key->value || key->value->type != OBJ_STREAM) {
+ errno = ENOTSUP; /* wrong type */
+ return REDISMODULE_ERR;
+ } else if (!(key->mode & REDISMODULE_WRITE) ||
+ key->iter != NULL) {
+ errno = EBADF; /* key not opened for writing or iterator started */
+ return REDISMODULE_ERR;
+ }
+ stream *s = key->value->ptr;
+ streamID streamid = {id->ms, id->seq};
+ if (streamDeleteItem(s, &streamid)) {
+ return REDISMODULE_OK;
+ } else {
+ errno = ENOENT; /* no entry with this id */
+ return REDISMODULE_ERR;
+ }
+}
+
+/* Sets up a stream iterator.
+ *
+ * - `key`: The stream key opened for reading using RedisModule_OpenKey().
+ * - `flags`:
+ * - `REDISMODULE_STREAM_ITERATOR_EXCLUSIVE`: Don't include `start` and `end`
+ * in the iterated range.
+ * - `REDISMODULE_STREAM_ITERATOR_REVERSE`: Iterate in reverse order, starting
+ * from the `end` of the range.
+ * - `start`: The lower bound of the range. Use NULL for the beginning of the
+ * stream.
+ * - `end`: The upper bound of the range. Use NULL for the end of the stream.
+ *
+ * Returns REDISMODULE_OK on success. On failure, REDISMODULE_ERR is returned
+ * and `errno` is set as follows:
+ *
+ * - EINVAL if called with invalid arguments
+ * - ENOTSUP if the key refers to a value of a type other than stream or if the
+ * key is empty
+ * - EBADF if the key was not opened for writing or if a stream iterator is
+ * already associated with the key
+ * - EDOM if `start` or `end` is outside the valid range
+ *
+ * Returns REDISMODULE_OK on success and REDISMODULE_ERR if the key doesn't
+ * refer to a stream or if invalid arguments were given.
+ *
+ * The stream IDs are retrieved using RedisModule_StreamIteratorNextID() and
+ * for each stream ID, the fields and values are retrieved using
+ * RedisModule_StreamIteratorNextField(). The iterator is freed by calling
+ * RedisModule_StreamIteratorStop().
+ *
+ * Example (error handling omitted):
+ *
+ * RedisModule_StreamIteratorStart(key, 0, startid_ptr, endid_ptr);
+ * RedisModuleStreamID id;
+ * long numfields;
+ * while (RedisModule_StreamIteratorNextID(key, &id, &numfields) ==
+ * REDISMODULE_OK) {
+ * RedisModuleString *field, *value;
+ * while (RedisModule_StreamIteratorNextField(key, &field, &value) ==
+ * REDISMODULE_OK) {
+ * //
+ * // ... Do stuff ...
+ * //
+ * RedisModule_Free(field);
+ * RedisModule_Free(value);
+ * }
+ * }
+ * RedisModule_StreamIteratorStop(key);
+ */
+int RM_StreamIteratorStart(RedisModuleKey *key, int flags, RedisModuleStreamID *start, RedisModuleStreamID *end) {
+ /* check args */
+ if (!key ||
+ (flags & ~(REDISMODULE_STREAM_ITERATOR_EXCLUSIVE |
+ REDISMODULE_STREAM_ITERATOR_REVERSE))) {
+ errno = EINVAL; /* key missing or invalid flags */
+ return REDISMODULE_ERR;
+ } else if (!key->value || key->value->type != OBJ_STREAM) {
+ errno = ENOTSUP;
+ return REDISMODULE_ERR; /* not a stream */
+ } else if (key->iter) {
+ errno = EBADF; /* iterator already started */
+ return REDISMODULE_ERR;
+ }
+
+ /* define range for streamIteratorStart() */
+ streamID lower, upper;
+ if (start) lower = (streamID){start->ms, start->seq};
+ if (end) upper = (streamID){end->ms, end->seq};
+ if (flags & REDISMODULE_STREAM_ITERATOR_EXCLUSIVE) {
+ if ((start && streamIncrID(&lower) != C_OK) ||
+ (end && streamDecrID(&upper) != C_OK)) {
+ errno = EDOM; /* end is 0-0 or start is MAX-MAX? */
+ return REDISMODULE_ERR;
+ }
+ }
+
+ /* create iterator */
+ stream *s = key->value->ptr;
+ int rev = flags & REDISMODULE_STREAM_ITERATOR_REVERSE;
+ streamIterator *si = zmalloc(sizeof(*si));
+ streamIteratorStart(si, s, start ? &lower : NULL, end ? &upper : NULL, rev);
+ key->iter = si;
+ key->u.stream.currentid.ms = 0; /* for RM_StreamIteratorDelete() */
+ key->u.stream.currentid.seq = 0;
+ key->u.stream.numfieldsleft = 0; /* for RM_StreamIteratorNextField() */
+ return REDISMODULE_OK;
+}
+
+/* Stops a stream iterator created using RedisModule_StreamIteratorStart() and
+ * reclaims its memory.
+ *
+ * Returns REDISMODULE_OK on success. On failure, REDISMODULE_ERR is returned
+ * and `errno` is set as follows:
+ *
+ * - EINVAL if called with a NULL key
+ * - ENOTSUP if the key refers to a value of a type other than stream or if the
+ * key is empty
+ * - EBADF if the key was not opened for writing or if no stream iterator is
+ * associated with the key
+ */
+int RM_StreamIteratorStop(RedisModuleKey *key) {
+ if (!key) {
+ errno = EINVAL;
+ return REDISMODULE_ERR;
+ } else if (!key->value || key->value->type != OBJ_STREAM) {
+ errno = ENOTSUP;
+ return REDISMODULE_ERR;
+ } else if (!key->iter) {
+ errno = EBADF;
+ return REDISMODULE_ERR;
+ }
+ zfree(key->iter);
+ key->iter = NULL;
+ return REDISMODULE_OK;
+}
+
+/* Finds the next stream entry and returns its stream ID and the number of
+ * fields.
+ *
+ * - `key`: Key for which a stream iterator has been started using
+ * RedisModule_StreamIteratorStart().
+ * - `id`: The stream ID returned. NULL if you don't care.
+ * - `numfields`: The number of fields in the found stream entry. NULL if you
+ * don't care.
+ *
+ * Returns REDISMODULE_OK and sets `*id` and `*numfields` if an entry was found.
+ * On failure, REDISMODULE_ERR is returned and `errno` is set as follows:
+ *
+ * - EINVAL if called with a NULL key
+ * - ENOTSUP if the key refers to a value of a type other than stream or if the
+ * key is empty
+ * - EBADF if no stream iterator is associated with the key
+ * - ENOENT if there are no more entries in the range of the iterator
+ *
+ * In practice, if RM_StreamIteratorNextID() is called after a successful call
+ * to RM_StreamIteratorStart() and with the same key, it is safe to assume that
+ * an REDISMODULE_ERR return value means that there are no more entries.
+ *
+ * Use RedisModule_StreamIteratorNextField() to retrieve the fields and values.
+ * See the example at RedisModule_StreamIteratorStart().
+ */
+int RM_StreamIteratorNextID(RedisModuleKey *key, RedisModuleStreamID *id, long *numfields) {
+ if (!key) {
+ errno = EINVAL;
+ return REDISMODULE_ERR;
+ } else if (!key->value || key->value->type != OBJ_STREAM) {
+ errno = ENOTSUP;
+ return REDISMODULE_ERR;
+ } else if (!key->iter) {
+ errno = EBADF;
+ return REDISMODULE_ERR;
+ }
+ streamIterator *si = key->iter;
+ int64_t *num_ptr = &key->u.stream.numfieldsleft;
+ streamID *streamid_ptr = &key->u.stream.currentid;
+ if (streamIteratorGetID(si, streamid_ptr, num_ptr)) {
+ if (id) {
+ id->ms = streamid_ptr->ms;
+ id->seq = streamid_ptr->seq;
+ }
+ if (numfields) *numfields = *num_ptr;
+ return REDISMODULE_OK;
+ } else {
+ /* No entry found. */
+ key->u.stream.currentid.ms = 0; /* for RM_StreamIteratorDelete() */
+ key->u.stream.currentid.seq = 0;
+ key->u.stream.numfieldsleft = 0; /* for RM_StreamIteratorNextField() */
+ errno = ENOENT;
+ return REDISMODULE_ERR;
+ }
+}
+
+/* Retrieves the next field of the current stream ID and its corresponding value
+ * in a stream iteration. This function should be called repeatedly after calling
+ * RedisModule_StreamIteratorNextID() to fetch each field-value pair.
+ *
+ * - `key`: Key where a stream iterator has been started.
+ * - `field_ptr`: This is where the field is returned.
+ * - `value_ptr`: This is where the value is returned.
+ *
+ * Returns REDISMODULE_OK and points `*field_ptr` and `*value_ptr` to freshly
+ * allocated RedisModuleString objects. The string objects are freed
+ * automatically when the callback finishes if automatic memory is enabled. On
+ * failure, REDISMODULE_ERR is returned and `errno` is set as follows:
+ *
+ * - EINVAL if called with a NULL key
+ * - ENOTSUP if the key refers to a value of a type other than stream or if the
+ * key is empty
+ * - EBADF if no stream iterator is associated with the key
+ * - ENOENT if there are no more fields in the current stream entry
+ *
+ * In practice, if RM_StreamIteratorNextField() is called after a successful
+ * call to RM_StreamIteratorNextID() and with the same key, it is safe to assume
+ * that an REDISMODULE_ERR return value means that there are no more fields.
+ *
+ * See the example at RedisModule_StreamIteratorStart().
+ */
+int RM_StreamIteratorNextField(RedisModuleKey *key, RedisModuleString **field_ptr, RedisModuleString **value_ptr) {
+ if (!key) {
+ errno = EINVAL;
+ return REDISMODULE_ERR;
+ } else if (!key->value || key->value->type != OBJ_STREAM) {
+ errno = ENOTSUP;
+ return REDISMODULE_ERR;
+ } else if (!key->iter) {
+ errno = EBADF;
+ return REDISMODULE_ERR;
+ } else if (key->u.stream.numfieldsleft <= 0) {
+ errno = ENOENT;
+ return REDISMODULE_ERR;
+ }
+ streamIterator *si = key->iter;
+ unsigned char *field, *value;
+ int64_t field_len, value_len;
+ streamIteratorGetField(si, &field, &value, &field_len, &value_len);
+ if (field_ptr) {
+ *field_ptr = createRawStringObject((char *)field, field_len);
+ autoMemoryAdd(key->ctx, REDISMODULE_AM_STRING, *field_ptr);
+ }
+ if (value_ptr) {
+ *value_ptr = createRawStringObject((char *)value, value_len);
+ autoMemoryAdd(key->ctx, REDISMODULE_AM_STRING, *value_ptr);
+ }
+ key->u.stream.numfieldsleft--;
+ return REDISMODULE_OK;
+}
+
+/* Deletes the current stream entry while iterating.
+ *
+ * This function can be called after RM_StreamIteratorNextID() or after any
+ * calls to RM_StreamIteratorNextField().
+ *
+ * Returns REDISMODULE_OK on success. On failure, REDISMODULE_ERR is returned
+ * and `errno` is set as follows:
+ *
+ * - EINVAL if key is NULL
+ * - ENOTSUP if the key is empty or is of another type than stream
+ * - EBADF if the key is not opened for writing, if no iterator has been started
+ * - ENOENT if the iterator has no current stream entry
+ */
+int RM_StreamIteratorDelete(RedisModuleKey *key) {
+ if (!key) {
+ errno = EINVAL;
+ return REDISMODULE_ERR;
+ } else if (!key->value || key->value->type != OBJ_STREAM) {
+ errno = ENOTSUP;
+ return REDISMODULE_ERR;
+ } else if (!(key->mode & REDISMODULE_WRITE) || !key->iter) {
+ errno = EBADF;
+ return REDISMODULE_ERR;
+ } else if (key->u.stream.currentid.ms == 0 &&
+ key->u.stream.currentid.seq == 0) {
+ errno = ENOENT;
+ return REDISMODULE_ERR;
+ }
+ streamIterator *si = key->iter;
+ streamIteratorRemoveEntry(si, &key->u.stream.currentid);
+ key->u.stream.currentid.ms = 0; /* Make sure repeated Delete() fails */
+ key->u.stream.currentid.seq = 0;
+ key->u.stream.numfieldsleft = 0; /* Make sure NextField() fails */
+ return REDISMODULE_OK;
+}
+
+/* Trim a stream by length, similar to XTRIM with MAXLEN.
+ *
+ * - `key`: Key opened for writing.
+ * - `flags`: A bitfield of
+ * - `REDISMODULE_STREAM_TRIM_APPROX`: Trim less if it improves performance,
+ * like XTRIM with `~`.
+ * - `length`: The number of stream entries to keep after trimming.
+ *
+ * Returns the number of entries deleted. On failure, a negative value is
+ * returned and `errno` is set as follows:
+ *
+ * - EINVAL if called with invalid arguments
+ * - ENOTSUP if the key is empty or of a type other than stream
+ * - EBADF if the key is not opened for writing
+ */
+long long RM_StreamTrimByLength(RedisModuleKey *key, int flags, long long length) {
+ if (!key || (flags & ~(REDISMODULE_STREAM_TRIM_APPROX)) || length < 0) {
+ errno = EINVAL;
+ return -1;
+ } else if (!key->value || key->value->type != OBJ_STREAM) {
+ errno = ENOTSUP;
+ return -1;
+ } else if (!(key->mode & REDISMODULE_WRITE)) {
+ errno = EBADF;
+ return -1;
+ }
+ int approx = flags & REDISMODULE_STREAM_TRIM_APPROX ? 1 : 0;
+ return streamTrimByLength((stream *)key->value->ptr, length, approx);
+}
+
+/* Trim a stream by ID, similar to XTRIM with MINID.
+ *
+ * - `key`: Key opened for writing.
+ * - `flags`: A bitfield of
+ * - `REDISMODULE_STREAM_TRIM_APPROX`: Trim less if it improves performance,
+ * like XTRIM with `~`.
+ * - `id`: The smallest stream ID to keep after trimming.
+ *
+ * Returns the number of entries deleted. On failure, a negative value is
+ * returned and `errno` is set as follows:
+ *
+ * - EINVAL if called with invalid arguments
+ * - ENOTSUP if the key is empty or of a type other than stream
+ * - EBADF if the key is not opened for writing
+ */
+long long RM_StreamTrimByID(RedisModuleKey *key, int flags, RedisModuleStreamID *id) {
+ if (!key || (flags & ~(REDISMODULE_STREAM_TRIM_APPROX)) || !id) {
+ errno = EINVAL;
+ return -1;
+ } else if (!key->value || key->value->type != OBJ_STREAM) {
+ errno = ENOTSUP;
+ return -1;
+ } else if (!(key->mode & REDISMODULE_WRITE)) {
+ errno = EBADF;
+ return -1;
+ }
+ int approx = flags & REDISMODULE_STREAM_TRIM_APPROX ? 1 : 0;
+ streamID minid = (streamID){id->ms, id->seq};
+ return streamTrimByID((stream *)key->value->ptr, minid, approx);
+}
+
+/* --------------------------------------------------------------------------
* Redis <-> Modules generic Call() API
* -------------------------------------------------------------------------- */
@@ -3162,9 +3709,8 @@ void moduleParseCallReply_Array(RedisModuleCallReply *reply) {
reply->type = REDISMODULE_REPLY_ARRAY;
}
-/* Free a Call reply and all the nested replies it contains if it's an
- * array. */
-void RM_FreeCallReply_Rec(RedisModuleCallReply *reply, int freenested){
+/* Recursive free reply function. */
+void moduleFreeCallReplyRec(RedisModuleCallReply *reply, int freenested){
/* Don't free nested replies by default: the user must always free the
* toplevel reply. However be gentle and don't crash if the module
* misuses the API. */
@@ -3174,7 +3720,7 @@ void RM_FreeCallReply_Rec(RedisModuleCallReply *reply, int freenested){
if (reply->type == REDISMODULE_REPLY_ARRAY) {
size_t j;
for (j = 0; j < reply->len; j++)
- RM_FreeCallReply_Rec(reply->val.array+j,1);
+ moduleFreeCallReplyRec(reply->val.array+j,1);
zfree(reply->val.array);
}
}
@@ -3189,13 +3735,14 @@ void RM_FreeCallReply_Rec(RedisModuleCallReply *reply, int freenested){
}
}
-/* Wrapper for the recursive free reply function. This is needed in order
- * to have the first level function to return on nested replies, but only
- * if called by the module API. */
+/* Free a Call reply and all the nested replies it contains if it's an
+ * array. */
void RM_FreeCallReply(RedisModuleCallReply *reply) {
-
+ /* This is a wrapper for the recursive free reply function. This is needed
+ * in order to have the first level function to return on nested replies,
+ * but only if called by the module API. */
RedisModuleCtx *ctx = reply->ctx;
- RM_FreeCallReply_Rec(reply,0);
+ moduleFreeCallReplyRec(reply,0);
autoMemoryFreed(ctx,REDISMODULE_AM_REPLY,reply);
}
@@ -3347,30 +3894,31 @@ fmterr:
*
* * **cmdname**: The Redis command to call.
* * **fmt**: A format specifier string for the command's arguments. Each
- * of the arguments should be specified by a valid type specification:
- * b The argument is a buffer and is immediately followed by another
- * argument that is the buffer's length.
- * c The argument is a pointer to a plain C string (null-terminated).
- * l The argument is long long integer.
- * s The argument is a RedisModuleString.
- * v The argument(s) is a vector of RedisModuleString.
- *
- * The format specifier can also include modifiers:
- * ! Sends the Redis command and its arguments to replicas and AOF.
- * A Suppress AOF propagation, send only to replicas (requires `!`).
- * R Suppress replicas propagation, send only to AOF (requires `!`).
+ * of the arguments should be specified by a valid type specification. The
+ * format specifier can also contain the modifiers `!`, `A` and `R` which
+ * don't have a corresponding argument.
+ *
+ * * `b` -- The argument is a buffer and is immediately followed by another
+ * argument that is the buffer's length.
+ * * `c` -- The argument is a pointer to a plain C string (null-terminated).
+ * * `l` -- The argument is long long integer.
+ * * `s` -- The argument is a RedisModuleString.
+ * * `v` -- The argument(s) is a vector of RedisModuleString.
+ * * `!` -- Sends the Redis command and its arguments to replicas and AOF.
+ * * `A` -- Suppress AOF propagation, send only to replicas (requires `!`).
+ * * `R` -- Suppress replicas propagation, send only to AOF (requires `!`).
* * **...**: The actual arguments to the Redis command.
*
* On success a RedisModuleCallReply object is returned, otherwise
* NULL is returned and errno is set to the following values:
*
- * EBADF: wrong format specifier.
- * EINVAL: wrong command arity.
- * ENOENT: command does not exist.
- * EPERM: operation in Cluster instance with key in non local slot.
- * EROFS: operation in Cluster instance when a write command is sent
- * in a readonly state.
- * ENETDOWN: operation in Cluster instance when cluster is down.
+ * * EBADF: wrong format specifier.
+ * * EINVAL: wrong command arity.
+ * * ENOENT: command does not exist.
+ * * EPERM: operation in Cluster instance with key in non local slot.
+ * * EROFS: operation in Cluster instance when a write command is sent
+ * in a readonly state.
+ * * ENETDOWN: operation in Cluster instance when cluster is down.
*
* Example code fragment:
*
@@ -3682,27 +4230,28 @@ robj *moduleTypeDupOrReply(client *c, robj *fromkey, robj *tokey, robj *value) {
* still load old data produced by an older version if the rdb_load
* callback is able to check the encver value and act accordingly.
* The encver must be a positive value between 0 and 1023.
+ *
* * **typemethods_ptr** is a pointer to a RedisModuleTypeMethods structure
* that should be populated with the methods callbacks and structure
* version, like in the following example:
*
- * RedisModuleTypeMethods tm = {
- * .version = REDISMODULE_TYPE_METHOD_VERSION,
- * .rdb_load = myType_RDBLoadCallBack,
- * .rdb_save = myType_RDBSaveCallBack,
- * .aof_rewrite = myType_AOFRewriteCallBack,
- * .free = myType_FreeCallBack,
- *
- * // Optional fields
- * .digest = myType_DigestCallBack,
- * .mem_usage = myType_MemUsageCallBack,
- * .aux_load = myType_AuxRDBLoadCallBack,
- * .aux_save = myType_AuxRDBSaveCallBack,
- * .free_effort = myType_FreeEffortCallBack,
- * .unlink = myType_UnlinkCallBack,
- * .copy = myType_CopyCallback,
- * .defrag = myType_DefragCallback
- * }
+ * RedisModuleTypeMethods tm = {
+ * .version = REDISMODULE_TYPE_METHOD_VERSION,
+ * .rdb_load = myType_RDBLoadCallBack,
+ * .rdb_save = myType_RDBSaveCallBack,
+ * .aof_rewrite = myType_AOFRewriteCallBack,
+ * .free = myType_FreeCallBack,
+ *
+ * // Optional fields
+ * .digest = myType_DigestCallBack,
+ * .mem_usage = myType_MemUsageCallBack,
+ * .aux_load = myType_AuxRDBLoadCallBack,
+ * .aux_save = myType_AuxRDBSaveCallBack,
+ * .free_effort = myType_FreeEffortCallBack,
+ * .unlink = myType_UnlinkCallBack,
+ * .copy = myType_CopyCallback,
+ * .defrag = myType_DefragCallback
+ * }
*
* * **rdb_load**: A callback function pointer that loads data from RDB files.
* * **rdb_save**: A callback function pointer that saves data to RDB files.
@@ -3740,7 +4289,7 @@ robj *moduleTypeDupOrReply(client *c, robj *fromkey, robj *tokey, robj *value) {
* a time limit and provides cursor support is used only for keys that are determined
* to have significant internal complexity. To determine this, the defrag mechanism
* uses the free_effort callback and the 'active-defrag-max-scan-fields' config directive.
- * NOTE: The value is passed as a void** and the function is expected to update the
+ * NOTE: The value is passed as a `void**` and the function is expected to update the
* pointer if the top-level value pointer is defragmented and consequentially changes.
*
* Note: the module name "AAAAAAAAA" is reserved and produces an error, it
@@ -3900,7 +4449,7 @@ int moduleAllDatatypesHandleErrors() {
}
/* Returns true if any previous IO API failed.
- * for Load* APIs the REDISMODULE_OPTIONS_HANDLE_IO_ERRORS flag must be set with
+ * for `Load*` APIs the REDISMODULE_OPTIONS_HANDLE_IO_ERRORS flag must be set with
* RedisModule_SetModuleOptions first. */
int RM_IsIOError(RedisModuleIO *io) {
return io->error;
@@ -3926,7 +4475,7 @@ saveerr:
}
/* Load an unsigned 64 bit value from the RDB file. This function should only
- * be called in the context of the rdb_load method of modules implementing
+ * be called in the context of the `rdb_load` method of modules implementing
* new data types. */
uint64_t RM_LoadUnsigned(RedisModuleIO *io) {
if (io->error) return 0;
@@ -4242,7 +4791,6 @@ void RM_DigestEndSequence(RedisModuleDigest *md) {
* If this is NOT done, Redis will handle corrupted (or just truncated) serialized
* data by producing an error message and terminating the process.
*/
-
void *RM_LoadDataTypeFromString(const RedisModuleString *str, const moduleType *mt) {
rio payload;
RedisModuleIO io;
@@ -4270,7 +4818,6 @@ void *RM_LoadDataTypeFromString(const RedisModuleString *str, const moduleType *
* implement in order to allow a module to arbitrarily serialize/de-serialize
* keys, similar to how the Redis 'DUMP' and 'RESTORE' commands are implemented.
*/
-
RedisModuleString *RM_SaveDataTypeToString(RedisModuleCtx *ctx, void *data, const moduleType *mt) {
rio payload;
RedisModuleIO io;
@@ -4368,7 +4915,7 @@ const RedisModuleString *RM_GetKeyNameFromIO(RedisModuleIO *io) {
return io->key;
}
-/* Returns a RedisModuleString with the name of the key from RedisModuleKey */
+/* Returns a RedisModuleString with the name of the key from RedisModuleKey. */
const RedisModuleString *RM_GetKeyNameFromModuleKey(RedisModuleKey *key) {
return key ? key->key : NULL;
}
@@ -4383,7 +4930,7 @@ const RedisModuleString *RM_GetKeyNameFromModuleKey(RedisModuleKey *key) {
* RM_LogIOError()
*
*/
-void RM_LogRaw(RedisModule *module, const char *levelstr, const char *fmt, va_list ap) {
+void moduleLogRaw(RedisModule *module, const char *levelstr, const char *fmt, va_list ap) {
char msg[LOG_MAX_LEN];
size_t name_len;
int level;
@@ -4422,7 +4969,7 @@ void RM_LogRaw(RedisModule *module, const char *levelstr, const char *fmt, va_li
void RM_Log(RedisModuleCtx *ctx, const char *levelstr, const char *fmt, ...) {
va_list ap;
va_start(ap, fmt);
- RM_LogRaw(ctx? ctx->module: NULL,levelstr,fmt,ap);
+ moduleLogRaw(ctx? ctx->module: NULL,levelstr,fmt,ap);
va_end(ap);
}
@@ -4434,12 +4981,15 @@ void RM_Log(RedisModuleCtx *ctx, const char *levelstr, const char *fmt, ...) {
void RM_LogIOError(RedisModuleIO *io, const char *levelstr, const char *fmt, ...) {
va_list ap;
va_start(ap, fmt);
- RM_LogRaw(io->type->module,levelstr,fmt,ap);
+ moduleLogRaw(io->type->module,levelstr,fmt,ap);
va_end(ap);
}
/* Redis-like assert function.
*
+ * The macro `RedisModule_Assert(expression)` is recommended, rather than
+ * calling this function directly.
+ *
* A failed assertion will shut down the server and produce logging information
* that looks identical to information generated by Redis itself.
*/
@@ -4570,6 +5120,7 @@ RedisModuleBlockedClient *moduleBlockClient(RedisModuleCtx *ctx, RedisModuleCmdF
bc->dbid = c->db->id;
bc->blocked_on_keys = keys != NULL;
bc->unblocked = 0;
+ bc->background_duration = 0;
c->bpop.timeout = timeout;
if (islua || ismulti) {
@@ -4643,6 +5194,11 @@ int moduleTryServeClientBlockedOnKey(client *c, robj *key) {
*
* In these cases, a call to RedisModule_BlockClient() will **not** block the
* client, but instead produce a specific error reply.
+ *
+ * Measuring background time: By default the time spent in the blocked command
+ * is not account for the total command duration. To include such time you should
+ * use RM_BlockedClientMeasureTimeStart() and RM_BlockedClientMeasureTimeEnd() one,
+ * or multiple times within the blocking command background work.
*/
RedisModuleBlockedClient *RM_BlockClient(RedisModuleCtx *ctx, RedisModuleCmdFunc reply_callback, RedisModuleCmdFunc timeout_callback, void (*free_privdata)(RedisModuleCtx*,void*), long long timeout_ms) {
return moduleBlockClient(ctx,reply_callback,timeout_callback,free_privdata,timeout_ms, NULL,0,NULL);
@@ -4673,7 +5229,7 @@ RedisModuleBlockedClient *RM_BlockClient(RedisModuleCtx *ctx, RedisModuleCmdFunc
* key, or a client in queue before this one can be served, modifying the key
* as well and making it empty again. So when a client is blocked with
* RedisModule_BlockClientOnKeys() the reply callback is not called after
- * RM_UnblockCLient() is called, but every time a key is signaled as ready:
+ * RM_UnblockClient() is called, but every time a key is signaled as ready:
* if the reply callback can serve the client, it returns REDISMODULE_OK
* and the client is unblocked, otherwise it will return REDISMODULE_ERR
* and we'll try again later.
@@ -4837,6 +5393,7 @@ void moduleHandleBlockedClients(void) {
* was blocked on keys (RM_BlockClientOnKeys()), because we already
* called such callback in moduleTryServeClientBlockedOnKey() when
* the key was signaled as ready. */
+ uint64_t reply_us = 0;
if (c && !bc->blocked_on_keys && bc->reply_callback) {
RedisModuleCtx ctx = REDISMODULE_CTX_INIT;
ctx.flags |= REDISMODULE_CTX_BLOCKED_REPLY;
@@ -4845,9 +5402,19 @@ void moduleHandleBlockedClients(void) {
ctx.module = bc->module;
ctx.client = bc->client;
ctx.blocked_client = bc;
+ monotime replyTimer;
+ elapsedStart(&replyTimer);
bc->reply_callback(&ctx,(void**)c->argv,c->argc);
+ reply_us = elapsedUs(replyTimer);
moduleFreeContext(&ctx);
}
+ /* Update stats now that we've finished the blocking operation.
+ * This needs to be out of the reply callback above given that a
+ * module might not define any callback and still do blocking ops.
+ */
+ if (c && !bc->blocked_on_keys) {
+ updateStatsOnUnblock(c, bc->background_duration, reply_us);
+ }
/* Free privdata if any. */
if (bc->privdata && bc->free_privdata) {
@@ -4911,6 +5478,9 @@ void moduleBlockedClientTimedOut(client *c) {
ctx.blocked_privdata = bc->privdata;
bc->timeout_callback(&ctx,(void**)c->argv,c->argc);
moduleFreeContext(&ctx);
+ if (!bc->blocked_on_keys) {
+ updateStatsOnUnblock(c, bc->background_duration, 0);
+ }
/* For timeout events, we do not want to call the disconnect callback,
* because the blocked client will be automatically disconnected in
* this case, and the user can still hook using the timeout callback. */
@@ -5103,9 +5673,9 @@ void moduleReleaseGIL(void) {
*
* The subscriber signature is:
*
- * int (*RedisModuleNotificationFunc) (RedisModuleCtx *ctx, int type,
- * const char *event,
- * RedisModuleString *key);
+ * int (*RedisModuleNotificationFunc) (RedisModuleCtx *ctx, int type,
+ * const char *event,
+ * RedisModuleString *key);
*
* `type` is the event type bit, that must match the mask given at registration
* time. The event string is the actual command being executed, and key is the
@@ -5369,28 +5939,27 @@ size_t RM_GetClusterSize(void) {
return dictSize(server.cluster->nodes);
}
+clusterNode *clusterLookupNode(const char *name); /* We need access to internals */
+
/* Populate the specified info for the node having as ID the specified 'id',
* then returns REDISMODULE_OK. Otherwise if the node ID does not exist from
* the POV of this local node, REDISMODULE_ERR is returned.
*
- * The arguments ip, master_id, port and flags can be NULL in case we don't
- * need to populate back certain info. If an ip and master_id (only populated
+ * The arguments `ip`, `master_id`, `port` and `flags` can be NULL in case we don't
+ * need to populate back certain info. If an `ip` and `master_id` (only populated
* if the instance is a slave) are specified, they point to buffers holding
- * at least REDISMODULE_NODE_ID_LEN bytes. The strings written back as ip
- * and master_id are not null terminated.
+ * at least REDISMODULE_NODE_ID_LEN bytes. The strings written back as `ip`
+ * and `master_id` are not null terminated.
*
* The list of flags reported is the following:
*
- * * REDISMODULE_NODE_MYSELF This node
- * * REDISMODULE_NODE_MASTER The node is a master
- * * REDISMODULE_NODE_SLAVE The node is a replica
- * * REDISMODULE_NODE_PFAIL We see the node as failing
- * * REDISMODULE_NODE_FAIL The cluster agrees the node is failing
- * * REDISMODULE_NODE_NOFAILOVER The slave is configured to never failover
+ * * REDISMODULE_NODE_MYSELF: This node
+ * * REDISMODULE_NODE_MASTER: The node is a master
+ * * REDISMODULE_NODE_SLAVE: The node is a replica
+ * * REDISMODULE_NODE_PFAIL: We see the node as failing
+ * * REDISMODULE_NODE_FAIL: The cluster agrees the node is failing
+ * * REDISMODULE_NODE_NOFAILOVER: The slave is configured to never failover
*/
-
-clusterNode *clusterLookupNode(const char *name); /* We need access to internals */
-
int RM_GetClusterNodeInfo(RedisModuleCtx *ctx, const char *id, char *ip, char *master_id, int *port, int *flags) {
UNUSED(ctx);
@@ -5434,18 +6003,18 @@ int RM_GetClusterNodeInfo(RedisModuleCtx *ctx, const char *id, char *ip, char *m
* a different distributed system, but still want to use the Redis Cluster
* message bus. Flags that can be set:
*
- * CLUSTER_MODULE_FLAG_NO_FAILOVER
- * CLUSTER_MODULE_FLAG_NO_REDIRECTION
+ * * CLUSTER_MODULE_FLAG_NO_FAILOVER
+ * * CLUSTER_MODULE_FLAG_NO_REDIRECTION
*
* With the following effects:
*
- * NO_FAILOVER: prevent Redis Cluster slaves to failover a failing master.
- * Also disables the replica migration feature.
+ * * NO_FAILOVER: prevent Redis Cluster slaves to failover a failing master.
+ * Also disables the replica migration feature.
*
- * NO_REDIRECTION: Every node will accept any key, without trying to perform
- * partitioning according to the user Redis Cluster algorithm.
- * Slots informations will still be propagated across the
- * cluster, but without effects. */
+ * * NO_REDIRECTION: Every node will accept any key, without trying to perform
+ * partitioning according to the user Redis Cluster algorithm.
+ * Slots informations will still be propagated across the
+ * cluster, but without effects. */
void RM_SetClusterFlags(RedisModuleCtx *ctx, uint64_t flags) {
UNUSED(ctx);
if (flags & REDISMODULE_CLUSTER_FLAG_NO_FAILOVER)
@@ -5964,15 +6533,15 @@ int RM_DictDel(RedisModuleDict *d, RedisModuleString *key, void *oldval) {
* comparison operator to use in order to seek the first element. The
* operators available are:
*
- * "^" -- Seek the first (lexicographically smaller) key.
- * "$" -- Seek the last (lexicographically biffer) key.
- * ">" -- Seek the first element greater than the specified key.
- * ">=" -- Seek the first element greater or equal than the specified key.
- * "<" -- Seek the first element smaller than the specified key.
- * "<=" -- Seek the first element smaller or equal than the specified key.
- * "==" -- Seek the first element matching exactly the specified key.
+ * * `^` -- Seek the first (lexicographically smaller) key.
+ * * `$` -- Seek the last (lexicographically biffer) key.
+ * * `>` -- Seek the first element greater than the specified key.
+ * * `>=` -- Seek the first element greater or equal than the specified key.
+ * * `<` -- Seek the first element smaller than the specified key.
+ * * `<=` -- Seek the first element smaller or equal than the specified key.
+ * * `==` -- Seek the first element matching exactly the specified key.
*
- * Note that for "^" and "$" the passed key is not used, and the user may
+ * Note that for `^` and `$` the passed key is not used, and the user may
* just pass NULL with a length of 0.
*
* If the element to start the iteration cannot be seeked based on the
@@ -6017,11 +6586,11 @@ int RM_DictIteratorReseek(RedisModuleDictIter *di, const char *op, RedisModuleSt
return RM_DictIteratorReseekC(di,op,key->ptr,sdslen(key->ptr));
}
-/* Return the current item of the dictionary iterator 'di' and steps to the
+/* Return the current item of the dictionary iterator `di` and steps to the
* next element. If the iterator already yield the last element and there
* are no other elements to return, NULL is returned, otherwise a pointer
- * to a string representing the key is provided, and the '*keylen' length
- * is set by reference (if keylen is not NULL). The '*dataptr', if not NULL
+ * to a string representing the key is provided, and the `*keylen` length
+ * is set by reference (if keylen is not NULL). The `*dataptr`, if not NULL
* is set to the value of the pointer stored at the returned key as auxiliary
* data (as set by the RedisModule_DictSet API).
*
@@ -6035,7 +6604,7 @@ int RM_DictIteratorReseek(RedisModuleDictIter *di, const char *op, RedisModuleSt
* }
*
* The returned pointer is of type void because sometimes it makes sense
- * to cast it to a char* sometimes to an unsigned char* depending on the
+ * to cast it to a `char*` sometimes to an unsigned `char*` depending on the
* fact it contains or not binary data, so this API ends being more
* comfortable to use.
*
@@ -6119,8 +6688,8 @@ int RM_DictCompare(RedisModuleDictIter *di, const char *op, RedisModuleString *k
int RM_InfoEndDictField(RedisModuleInfoCtx *ctx);
/* Used to start a new section, before adding any fields. the section name will
- * be prefixed by "<modulename>_" and must only include A-Z,a-z,0-9.
- * NULL or empty string indicates the default section (only <modulename>) is used.
+ * be prefixed by `<modulename>_` and must only include A-Z,a-z,0-9.
+ * NULL or empty string indicates the default section (only `<modulename>`) is used.
* When return value is REDISMODULE_ERR, the section should and will be skipped. */
int RM_InfoAddSection(RedisModuleInfoCtx *ctx, char *name) {
sds full_name = sdsdup(ctx->module->name);
@@ -6180,8 +6749,8 @@ int RM_InfoEndDictField(RedisModuleInfoCtx *ctx) {
}
/* Used by RedisModuleInfoFunc to add info fields.
- * Each field will be automatically prefixed by "<modulename>_".
- * Field names or values must not include \r\n of ":" */
+ * Each field will be automatically prefixed by `<modulename>_`.
+ * Field names or values must not include `\r\n` or `:`. */
int RM_InfoAddFieldString(RedisModuleInfoCtx *ctx, char *field, RedisModuleString *value) {
if (!ctx->in_section)
return REDISMODULE_ERR;
@@ -6200,6 +6769,7 @@ int RM_InfoAddFieldString(RedisModuleInfoCtx *ctx, char *field, RedisModuleStrin
return REDISMODULE_OK;
}
+/* See RedisModule_InfoAddFieldString(). */
int RM_InfoAddFieldCString(RedisModuleInfoCtx *ctx, char *field, char *value) {
if (!ctx->in_section)
return REDISMODULE_ERR;
@@ -6218,6 +6788,7 @@ int RM_InfoAddFieldCString(RedisModuleInfoCtx *ctx, char *field, char *value) {
return REDISMODULE_OK;
}
+/* See RedisModule_InfoAddFieldString(). */
int RM_InfoAddFieldDouble(RedisModuleInfoCtx *ctx, char *field, double value) {
if (!ctx->in_section)
return REDISMODULE_ERR;
@@ -6236,6 +6807,7 @@ int RM_InfoAddFieldDouble(RedisModuleInfoCtx *ctx, char *field, double value) {
return REDISMODULE_OK;
}
+/* See RedisModule_InfoAddFieldString(). */
int RM_InfoAddFieldLongLong(RedisModuleInfoCtx *ctx, char *field, long long value) {
if (!ctx->in_section)
return REDISMODULE_ERR;
@@ -6254,6 +6826,7 @@ int RM_InfoAddFieldLongLong(RedisModuleInfoCtx *ctx, char *field, long long valu
return REDISMODULE_OK;
}
+/* See RedisModule_InfoAddFieldString(). */
int RM_InfoAddFieldULongLong(RedisModuleInfoCtx *ctx, char *field, unsigned long long value) {
if (!ctx->in_section)
return REDISMODULE_ERR;
@@ -6272,6 +6845,8 @@ int RM_InfoAddFieldULongLong(RedisModuleInfoCtx *ctx, char *field, unsigned long
return REDISMODULE_OK;
}
+/* Registers callback for the INFO command. The callback should add INFO fields
+ * by calling the `RedisModule_InfoAddField*()` functions. */
int RM_RegisterInfoFunc(RedisModuleCtx *ctx, RedisModuleInfoFunc cb) {
ctx->module->info_cb = cb;
return REDISMODULE_OK;
@@ -6711,7 +7286,6 @@ const RedisModuleString *RM_CommandFilterArgGet(RedisModuleCommandFilterCtx *fct
* after the filter context is destroyed, so it must not be auto-memory
* allocated, freed or used elsewhere.
*/
-
int RM_CommandFilterArgInsert(RedisModuleCommandFilterCtx *fctx, int pos, RedisModuleString *arg)
{
int i;
@@ -6733,7 +7307,6 @@ int RM_CommandFilterArgInsert(RedisModuleCommandFilterCtx *fctx, int pos, RedisM
* filter context is destroyed, so it must not be auto-memory allocated, freed
* or used elsewhere.
*/
-
int RM_CommandFilterArgReplace(RedisModuleCommandFilterCtx *fctx, int pos, RedisModuleString *arg)
{
if (pos < 0 || pos >= fctx->argc) return REDISMODULE_ERR;
@@ -6774,10 +7347,10 @@ size_t RM_MallocSize(void* ptr){
/* Return the a number between 0 to 1 indicating the amount of memory
* currently used, relative to the Redis "maxmemory" configuration.
*
- * 0 - No memory limit configured.
- * Between 0 and 1 - The percentage of the memory used normalized in 0-1 range.
- * Exactly 1 - Memory limit reached.
- * Greater 1 - More memory used than the configured limit.
+ * * 0 - No memory limit configured.
+ * * Between 0 and 1 - The percentage of the memory used normalized in 0-1 range.
+ * * Exactly 1 - Memory limit reached.
+ * * Greater 1 - More memory used than the configured limit.
*/
float RM_GetUsedMemoryRatio(){
float level;
@@ -6840,21 +7413,22 @@ void RM_ScanCursorDestroy(RedisModuleScanCursor *cursor) {
* the selected db.
*
* Callback for scan implementation.
- * void scan_callback(RedisModuleCtx *ctx, RedisModuleString *keyname,
- * RedisModuleKey *key, void *privdata);
- * ctx - the redis module context provided to for the scan.
- * keyname - owned by the caller and need to be retained if used after this
- * function.
*
- * key - holds info on the key and value, it is provided as best effort, in
- * some cases it might be NULL, in which case the user should (can) use
- * RedisModule_OpenKey (and CloseKey too).
- * when it is provided, it is owned by the caller and will be free when the
- * callback returns.
+ * void scan_callback(RedisModuleCtx *ctx, RedisModuleString *keyname,
+ * RedisModuleKey *key, void *privdata);
*
- * privdata - the user data provided to RedisModule_Scan.
+ * - `ctx`: the redis module context provided to for the scan.
+ * - `keyname`: owned by the caller and need to be retained if used after this
+ * function.
+ * - `key`: holds info on the key and value, it is provided as best effort, in
+ * some cases it might be NULL, in which case the user should (can) use
+ * RedisModule_OpenKey() (and CloseKey too).
+ * when it is provided, it is owned by the caller and will be free when the
+ * callback returns.
+ * - `privdata`: the user data provided to RedisModule_Scan().
*
* The way it should be used:
+ *
* RedisModuleCursor *c = RedisModule_ScanCursorCreate();
* while(RedisModule_Scan(ctx, c, callback, privateData));
* RedisModule_ScanCursorDestroy(c);
@@ -6938,7 +7512,9 @@ static void moduleScanKeyCallback(void *privdata, const dictEntry *de) {
/* Scan api that allows a module to scan the elements in a hash, set or sorted set key
*
* Callback for scan implementation.
- * void scan_callback(RedisModuleKey *key, RedisModuleString* field, RedisModuleString* value, void *privdata);
+ *
+ * void scan_callback(RedisModuleKey *key, RedisModuleString* field, RedisModuleString* value, void *privdata);
+ *
* - key - the redis key context provided to for the scan.
* - field - field name, owned by the caller and need to be retained if used
* after this function.
@@ -6947,6 +7523,7 @@ static void moduleScanKeyCallback(void *privdata, const dictEntry *de) {
* - privdata - the user data provided to RedisModule_ScanKey.
*
* The way it should be used:
+ *
* RedisModuleCursor *c = RedisModule_ScanCursorCreate();
* RedisModuleKey *key = RedisModule_OpenKey(...)
* while(RedisModule_ScanKey(key, c, callback, privateData));
@@ -6955,6 +7532,7 @@ static void moduleScanKeyCallback(void *privdata, const dictEntry *de) {
*
* It is also possible to use this API from another thread while the lock is acquired during
* the actuall call to RM_ScanKey, and re-opening the key each time:
+ *
* RedisModuleCursor *c = RedisModule_ScanCursorCreate();
* RedisModule_ThreadSafeContextLock(ctx);
* RedisModuleKey *key = RedisModule_OpenKey(...)
@@ -7159,10 +7737,10 @@ void ModuleForkDoneHandler(int exitcode, int bysignal) {
*
* The callback must be of this type:
*
- * int (*RedisModuleEventCallback)(RedisModuleCtx *ctx,
- * RedisModuleEvent eid,
- * uint64_t subevent,
- * void *data);
+ * int (*RedisModuleEventCallback)(RedisModuleCtx *ctx,
+ * RedisModuleEvent eid,
+ * uint64_t subevent,
+ * void *data);
*
* The 'ctx' is a normal Redis module context that the callback can use in
* order to call other modules APIs. The 'eid' is the event itself, this
@@ -7176,201 +7754,207 @@ void ModuleForkDoneHandler(int exitcode, int bysignal) {
*
* Here is a list of events you can use as 'eid' and related sub events:
*
- * RedisModuleEvent_ReplicationRoleChanged
+ * * RedisModuleEvent_ReplicationRoleChanged:
+ *
+ * This event is called when the instance switches from master
+ * to replica or the other way around, however the event is
+ * also called when the replica remains a replica but starts to
+ * replicate with a different master.
*
- * This event is called when the instance switches from master
- * to replica or the other way around, however the event is
- * also called when the replica remains a replica but starts to
- * replicate with a different master.
+ * The following sub events are available:
*
- * The following sub events are available:
+ * * `REDISMODULE_SUBEVENT_REPLROLECHANGED_NOW_MASTER`
+ * * `REDISMODULE_SUBEVENT_REPLROLECHANGED_NOW_REPLICA`
*
- * REDISMODULE_SUBEVENT_REPLROLECHANGED_NOW_MASTER
- * REDISMODULE_SUBEVENT_REPLROLECHANGED_NOW_REPLICA
+ * The 'data' field can be casted by the callback to a
+ * `RedisModuleReplicationInfo` structure with the following fields:
*
- * The 'data' field can be casted by the callback to a
- * RedisModuleReplicationInfo structure with the following fields:
+ * int master; // true if master, false if replica
+ * char *masterhost; // master instance hostname for NOW_REPLICA
+ * int masterport; // master instance port for NOW_REPLICA
+ * char *replid1; // Main replication ID
+ * char *replid2; // Secondary replication ID
+ * uint64_t repl1_offset; // Main replication offset
+ * uint64_t repl2_offset; // Offset of replid2 validity
*
- * int master; // true if master, false if replica
- * char *masterhost; // master instance hostname for NOW_REPLICA
- * int masterport; // master instance port for NOW_REPLICA
- * char *replid1; // Main replication ID
- * char *replid2; // Secondary replication ID
- * uint64_t repl1_offset; // Main replication offset
- * uint64_t repl2_offset; // Offset of replid2 validity
+ * * RedisModuleEvent_Persistence
*
- * RedisModuleEvent_Persistence
+ * This event is called when RDB saving or AOF rewriting starts
+ * and ends. The following sub events are available:
*
- * This event is called when RDB saving or AOF rewriting starts
- * and ends. The following sub events are available:
+ * * `REDISMODULE_SUBEVENT_PERSISTENCE_RDB_START`
+ * * `REDISMODULE_SUBEVENT_PERSISTENCE_AOF_START`
+ * * `REDISMODULE_SUBEVENT_PERSISTENCE_SYNC_RDB_START`
+ * * `REDISMODULE_SUBEVENT_PERSISTENCE_ENDED`
+ * * `REDISMODULE_SUBEVENT_PERSISTENCE_FAILED`
*
- * REDISMODULE_SUBEVENT_PERSISTENCE_RDB_START
- * REDISMODULE_SUBEVENT_PERSISTENCE_AOF_START
- * REDISMODULE_SUBEVENT_PERSISTENCE_SYNC_RDB_START
- * REDISMODULE_SUBEVENT_PERSISTENCE_ENDED
- * REDISMODULE_SUBEVENT_PERSISTENCE_FAILED
+ * The above events are triggered not just when the user calls the
+ * relevant commands like BGSAVE, but also when a saving operation
+ * or AOF rewriting occurs because of internal server triggers.
+ * The SYNC_RDB_START sub events are happening in the forground due to
+ * SAVE command, FLUSHALL, or server shutdown, and the other RDB and
+ * AOF sub events are executed in a background fork child, so any
+ * action the module takes can only affect the generated AOF or RDB,
+ * but will not be reflected in the parent process and affect connected
+ * clients and commands. Also note that the AOF_START sub event may end
+ * up saving RDB content in case of an AOF with rdb-preamble.
*
- * The above events are triggered not just when the user calls the
- * relevant commands like BGSAVE, but also when a saving operation
- * or AOF rewriting occurs because of internal server triggers.
- * The SYNC_RDB_START sub events are happening in the forground due to
- * SAVE command, FLUSHALL, or server shutdown, and the other RDB and
- * AOF sub events are executed in a background fork child, so any
- * action the module takes can only affect the generated AOF or RDB,
- * but will not be reflected in the parent process and affect connected
- * clients and commands. Also note that the AOF_START sub event may end
- * up saving RDB content in case of an AOF with rdb-preamble.
+ * * RedisModuleEvent_FlushDB
*
- * RedisModuleEvent_FlushDB
+ * The FLUSHALL, FLUSHDB or an internal flush (for instance
+ * because of replication, after the replica synchronization)
+ * happened. The following sub events are available:
*
- * The FLUSHALL, FLUSHDB or an internal flush (for instance
- * because of replication, after the replica synchronization)
- * happened. The following sub events are available:
+ * * `REDISMODULE_SUBEVENT_FLUSHDB_START`
+ * * `REDISMODULE_SUBEVENT_FLUSHDB_END`
*
- * REDISMODULE_SUBEVENT_FLUSHDB_START
- * REDISMODULE_SUBEVENT_FLUSHDB_END
+ * The data pointer can be casted to a RedisModuleFlushInfo
+ * structure with the following fields:
*
- * The data pointer can be casted to a RedisModuleFlushInfo
- * structure with the following fields:
+ * int32_t async; // True if the flush is done in a thread.
+ * // See for instance FLUSHALL ASYNC.
+ * // In this case the END callback is invoked
+ * // immediately after the database is put
+ * // in the free list of the thread.
+ * int32_t dbnum; // Flushed database number, -1 for all the DBs
+ * // in the case of the FLUSHALL operation.
*
- * int32_t async; // True if the flush is done in a thread.
- * See for instance FLUSHALL ASYNC.
- * In this case the END callback is invoked
- * immediately after the database is put
- * in the free list of the thread.
- * int32_t dbnum; // Flushed database number, -1 for all the DBs
- * in the case of the FLUSHALL operation.
+ * The start event is called *before* the operation is initated, thus
+ * allowing the callback to call DBSIZE or other operation on the
+ * yet-to-free keyspace.
*
- * The start event is called *before* the operation is initated, thus
- * allowing the callback to call DBSIZE or other operation on the
- * yet-to-free keyspace.
+ * * RedisModuleEvent_Loading
*
- * RedisModuleEvent_Loading
+ * Called on loading operations: at startup when the server is
+ * started, but also after a first synchronization when the
+ * replica is loading the RDB file from the master.
+ * The following sub events are available:
*
- * Called on loading operations: at startup when the server is
- * started, but also after a first synchronization when the
- * replica is loading the RDB file from the master.
- * The following sub events are available:
+ * * `REDISMODULE_SUBEVENT_LOADING_RDB_START`
+ * * `REDISMODULE_SUBEVENT_LOADING_AOF_START`
+ * * `REDISMODULE_SUBEVENT_LOADING_REPL_START`
+ * * `REDISMODULE_SUBEVENT_LOADING_ENDED`
+ * * `REDISMODULE_SUBEVENT_LOADING_FAILED`
*
- * REDISMODULE_SUBEVENT_LOADING_RDB_START
- * REDISMODULE_SUBEVENT_LOADING_AOF_START
- * REDISMODULE_SUBEVENT_LOADING_REPL_START
- * REDISMODULE_SUBEVENT_LOADING_ENDED
- * REDISMODULE_SUBEVENT_LOADING_FAILED
+ * Note that AOF loading may start with an RDB data in case of
+ * rdb-preamble, in which case you'll only receive an AOF_START event.
*
- * Note that AOF loading may start with an RDB data in case of
- * rdb-preamble, in which case you'll only receive an AOF_START event.
+ * * RedisModuleEvent_ClientChange
*
+ * Called when a client connects or disconnects.
+ * The data pointer can be casted to a RedisModuleClientInfo
+ * structure, documented in RedisModule_GetClientInfoById().
+ * The following sub events are available:
*
- * RedisModuleEvent_ClientChange
+ * * `REDISMODULE_SUBEVENT_CLIENT_CHANGE_CONNECTED`
+ * * `REDISMODULE_SUBEVENT_CLIENT_CHANGE_DISCONNECTED`
*
- * Called when a client connects or disconnects.
- * The data pointer can be casted to a RedisModuleClientInfo
- * structure, documented in RedisModule_GetClientInfoById().
- * The following sub events are available:
+ * * RedisModuleEvent_Shutdown
*
- * REDISMODULE_SUBEVENT_CLIENT_CHANGE_CONNECTED
- * REDISMODULE_SUBEVENT_CLIENT_CHANGE_DISCONNECTED
+ * The server is shutting down. No subevents are available.
*
- * RedisModuleEvent_Shutdown
+ * * RedisModuleEvent_ReplicaChange
*
- * The server is shutting down. No subevents are available.
+ * This event is called when the instance (that can be both a
+ * master or a replica) get a new online replica, or lose a
+ * replica since it gets disconnected.
+ * The following sub events are available:
*
- * RedisModuleEvent_ReplicaChange
+ * * `REDISMODULE_SUBEVENT_REPLICA_CHANGE_ONLINE`
+ * * `REDISMODULE_SUBEVENT_REPLICA_CHANGE_OFFLINE`
*
- * This event is called when the instance (that can be both a
- * master or a replica) get a new online replica, or lose a
- * replica since it gets disconnected.
- * The following sub events are available:
+ * No additional information is available so far: future versions
+ * of Redis will have an API in order to enumerate the replicas
+ * connected and their state.
*
- * REDISMODULE_SUBEVENT_REPLICA_CHANGE_ONLINE
- * REDISMODULE_SUBEVENT_REPLICA_CHANGE_OFFLINE
+ * * RedisModuleEvent_CronLoop
*
- * No additional information is available so far: future versions
- * of Redis will have an API in order to enumerate the replicas
- * connected and their state.
+ * This event is called every time Redis calls the serverCron()
+ * function in order to do certain bookkeeping. Modules that are
+ * required to do operations from time to time may use this callback.
+ * Normally Redis calls this function 10 times per second, but
+ * this changes depending on the "hz" configuration.
+ * No sub events are available.
*
- * RedisModuleEvent_CronLoop
+ * The data pointer can be casted to a RedisModuleCronLoop
+ * structure with the following fields:
*
- * This event is called every time Redis calls the serverCron()
- * function in order to do certain bookkeeping. Modules that are
- * required to do operations from time to time may use this callback.
- * Normally Redis calls this function 10 times per second, but
- * this changes depending on the "hz" configuration.
- * No sub events are available.
+ * int32_t hz; // Approximate number of events per second.
*
- * The data pointer can be casted to a RedisModuleCronLoop
- * structure with the following fields:
+ * * RedisModuleEvent_MasterLinkChange
*
- * int32_t hz; // Approximate number of events per second.
+ * This is called for replicas in order to notify when the
+ * replication link becomes functional (up) with our master,
+ * or when it goes down. Note that the link is not considered
+ * up when we just connected to the master, but only if the
+ * replication is happening correctly.
+ * The following sub events are available:
*
- * RedisModuleEvent_MasterLinkChange
+ * * `REDISMODULE_SUBEVENT_MASTER_LINK_UP`
+ * * `REDISMODULE_SUBEVENT_MASTER_LINK_DOWN`
*
- * This is called for replicas in order to notify when the
- * replication link becomes functional (up) with our master,
- * or when it goes down. Note that the link is not considered
- * up when we just connected to the master, but only if the
- * replication is happening correctly.
- * The following sub events are available:
+ * * RedisModuleEvent_ModuleChange
*
- * REDISMODULE_SUBEVENT_MASTER_LINK_UP
- * REDISMODULE_SUBEVENT_MASTER_LINK_DOWN
+ * This event is called when a new module is loaded or one is unloaded.
+ * The following sub events are available:
*
- * RedisModuleEvent_ModuleChange
+ * * `REDISMODULE_SUBEVENT_MODULE_LOADED`
+ * * `REDISMODULE_SUBEVENT_MODULE_UNLOADED`
*
- * This event is called when a new module is loaded or one is unloaded.
- * The following sub events are available:
+ * The data pointer can be casted to a RedisModuleModuleChange
+ * structure with the following fields:
*
- * REDISMODULE_SUBEVENT_MODULE_LOADED
- * REDISMODULE_SUBEVENT_MODULE_UNLOADED
+ * const char* module_name; // Name of module loaded or unloaded.
+ * int32_t module_version; // Module version.
*
- * The data pointer can be casted to a RedisModuleModuleChange
- * structure with the following fields:
+ * * RedisModuleEvent_LoadingProgress
*
- * const char* module_name; // Name of module loaded or unloaded.
- * int32_t module_version; // Module version.
+ * This event is called repeatedly called while an RDB or AOF file
+ * is being loaded.
+ * The following sub events are availble:
*
- * RedisModuleEvent_LoadingProgress
+ * * `REDISMODULE_SUBEVENT_LOADING_PROGRESS_RDB`
+ * * `REDISMODULE_SUBEVENT_LOADING_PROGRESS_AOF`
*
- * This event is called repeatedly called while an RDB or AOF file
- * is being loaded.
- * The following sub events are availble:
+ * The data pointer can be casted to a RedisModuleLoadingProgress
+ * structure with the following fields:
*
- * REDISMODULE_SUBEVENT_LOADING_PROGRESS_RDB
- * REDISMODULE_SUBEVENT_LOADING_PROGRESS_AOF
+ * int32_t hz; // Approximate number of events per second.
+ * int32_t progress; // Approximate progress between 0 and 1024,
+ * // or -1 if unknown.
*
- * The data pointer can be casted to a RedisModuleLoadingProgress
- * structure with the following fields:
+ * * RedisModuleEvent_SwapDB
*
- * int32_t hz; // Approximate number of events per second.
- * int32_t progress; // Approximate progress between 0 and 1024,
- * or -1 if unknown.
+ * This event is called when a SWAPDB command has been successfully
+ * Executed.
+ * For this event call currently there is no subevents available.
*
- * RedisModuleEvent_SwapDB
+ * The data pointer can be casted to a RedisModuleSwapDbInfo
+ * structure with the following fields:
*
- * This event is called when a SWAPDB command has been successfully
- * Executed.
- * For this event call currently there is no subevents available.
+ * int32_t dbnum_first; // Swap Db first dbnum
+ * int32_t dbnum_second; // Swap Db second dbnum
*
- * The data pointer can be casted to a RedisModuleSwapDbInfo
- * structure with the following fields:
+ * * RedisModuleEvent_ReplBackup
*
- * int32_t dbnum_first; // Swap Db first dbnum
- * int32_t dbnum_second; // Swap Db second dbnum
+ * Called when diskless-repl-load config is set to swapdb,
+ * And redis needs to backup the the current database for the
+ * possibility to be restored later. A module with global data and
+ * maybe with aux_load and aux_save callbacks may need to use this
+ * notification to backup / restore / discard its globals.
+ * The following sub events are available:
*
- * RedisModuleEvent_ReplBackup
+ * * `REDISMODULE_SUBEVENT_REPL_BACKUP_CREATE`
+ * * `REDISMODULE_SUBEVENT_REPL_BACKUP_RESTORE`
+ * * `REDISMODULE_SUBEVENT_REPL_BACKUP_DISCARD`
*
- * Called when diskless-repl-load config is set to swapdb,
- * And redis needs to backup the the current database for the
- * possibility to be restored later. A module with global data and
- * maybe with aux_load and aux_save callbacks may need to use this
- * notification to backup / restore / discard its globals.
- * The following sub events are available:
+ * * RedisModuleEvent_ForkChild
*
- * REDISMODULE_SUBEVENT_REPL_BACKUP_CREATE
- * REDISMODULE_SUBEVENT_REPL_BACKUP_RESTORE
- * REDISMODULE_SUBEVENT_REPL_BACKUP_DISCARD
+ * Called when a fork child (AOFRW, RDBSAVE, module fork...) is born/dies
+ * The following sub events are available:
*
+ * * `REDISMODULE_SUBEVENT_FORK_CHILD_BORN`
+ * * `REDISMODULE_SUBEVENT_FORK_CHILD_DIED`
*
* The function returns REDISMODULE_OK if the module was successfully subscribed
* for the specified event. If the API is called from a wrong context or unsupported event
@@ -7444,6 +8028,8 @@ int RM_IsSubEventSupported(RedisModuleEvent event, int64_t subevent) {
return subevent < _REDISMODULE_SUBEVENT_SWAPDB_NEXT;
case REDISMODULE_EVENT_REPL_BACKUP:
return subevent < _REDISMODULE_SUBEVENT_REPL_BACKUP_NEXT;
+ case REDISMODULE_EVENT_FORK_CHILD:
+ return subevent < _REDISMODULE_SUBEVENT_FORK_CHILD_NEXT;
default:
break;
}
@@ -7659,6 +8245,11 @@ void moduleInitModulesSystem(void) {
anetNonBlock(NULL,server.module_blocked_pipe[0]);
anetNonBlock(NULL,server.module_blocked_pipe[1]);
+ /* Enable close-on-exec flag on pipes in case of the fork-exec system calls in
+ * sentinels or redis servers. */
+ anetCloexec(server.module_blocked_pipe[0]);
+ anetCloexec(server.module_blocked_pipe[1]);
+
/* Create the timers radix tree. */
Timers = raxNew();
@@ -8064,7 +8655,8 @@ int RM_GetLFU(RedisModuleKey *key, long long *lfu_freq) {
* the module can check if a certain set of flags are supported
* by the redis server version in use.
* Example:
- * int supportedFlags = RM_GetContextFlagsAll()
+ *
+ * int supportedFlags = RM_GetContextFlagsAll();
* if (supportedFlags & REDISMODULE_CTX_FLAGS_MULTI) {
* // REDISMODULE_CTX_FLAGS_MULTI is supported
* } else{
@@ -8080,7 +8672,8 @@ int RM_GetContextFlagsAll() {
* the module can check if a certain set of flags are supported
* by the redis server version in use.
* Example:
- * int supportedFlags = RM_GetKeyspaceNotificationFlagsAll()
+ *
+ * int supportedFlags = RM_GetKeyspaceNotificationFlagsAll();
* if (supportedFlags & REDISMODULE_NOTIFY_LOADED) {
* // REDISMODULE_NOTIFY_LOADED is supported
* } else{
@@ -8150,8 +8743,8 @@ int RM_ModuleTypeReplaceValue(RedisModuleKey *key, moduleType *mt, void *new_val
* an error condition. Error conditions are indicated by setting errno
* as folllows:
*
- * ENOENT: Specified command does not exist.
- * EINVAL: Invalid command arity specified.
+ * * ENOENT: Specified command does not exist.
+ * * EINVAL: Invalid command arity specified.
*
* NOTE: The returned array is not a Redis Module object so it does not
* get automatically freed even when auto-memory is used. The caller
@@ -8247,11 +8840,11 @@ int RM_DefragShouldStop(RedisModuleDefragCtx *ctx) {
* data type.
*
* This behavior is reserved to cases where late defrag is performed. Late
- * defrag is selected for keys that implement the free_effort callback and
- * return a free_effort value that is larger than the defrag
+ * defrag is selected for keys that implement the `free_effort` callback and
+ * return a `free_effort` value that is larger than the defrag
* 'active-defrag-max-scan-fields' configuration directive.
*
- * Smaller keys, keys that do not implement free_effort or the global
+ * Smaller keys, keys that do not implement `free_effort` or the global
* defrag callback are not called in late-defrag mode. In those cases, a
* call to this function will return REDISMODULE_ERR.
*
@@ -8273,7 +8866,7 @@ int RM_DefragCursorSet(RedisModuleDefragCtx *ctx, unsigned long cursor) {
/* Fetch a cursor value that has been previously stored using RM_DefragCursorSet().
*
* If not called for a late defrag operation, REDISMODULE_ERR will be returned and
- * the cursor should be ignored. See DM_DefragCursorSet() for more details on
+ * the cursor should be ignored. See RM_DefragCursorSet() for more details on
* defrag cursors.
*/
int RM_DefragCursorGet(RedisModuleDefragCtx *ctx, unsigned long *cursor) {
@@ -8445,6 +9038,7 @@ void moduleRegisterCoreAPI(void) {
REGISTER_API(StringToLongLong);
REGISTER_API(StringToDouble);
REGISTER_API(StringToLongDouble);
+ REGISTER_API(StringToStreamID);
REGISTER_API(Call);
REGISTER_API(CallReplyProto);
REGISTER_API(FreeCallReply);
@@ -8459,6 +9053,7 @@ void moduleRegisterCoreAPI(void) {
REGISTER_API(CreateStringFromDouble);
REGISTER_API(CreateStringFromLongDouble);
REGISTER_API(CreateStringFromString);
+ REGISTER_API(CreateStringFromStreamID);
REGISTER_API(CreateStringPrintf);
REGISTER_API(FreeString);
REGISTER_API(StringPtrLen);
@@ -8490,6 +9085,15 @@ void moduleRegisterCoreAPI(void) {
REGISTER_API(ZsetRangeEndReached);
REGISTER_API(HashSet);
REGISTER_API(HashGet);
+ REGISTER_API(StreamAdd);
+ REGISTER_API(StreamDelete);
+ REGISTER_API(StreamIteratorStart);
+ REGISTER_API(StreamIteratorStop);
+ REGISTER_API(StreamIteratorNextID);
+ REGISTER_API(StreamIteratorNextField);
+ REGISTER_API(StreamIteratorDelete);
+ REGISTER_API(StreamTrimByLength);
+ REGISTER_API(StreamTrimByID);
REGISTER_API(IsKeysPositionRequest);
REGISTER_API(KeyAtPos);
REGISTER_API(GetClientId);
@@ -8539,6 +9143,8 @@ void moduleRegisterCoreAPI(void) {
REGISTER_API(GetBlockedClientPrivateData);
REGISTER_API(AbortBlock);
REGISTER_API(Milliseconds);
+ REGISTER_API(BlockedClientMeasureTimeStart);
+ REGISTER_API(BlockedClientMeasureTimeEnd);
REGISTER_API(GetThreadSafeContext);
REGISTER_API(GetDetachedThreadSafeContext);
REGISTER_API(FreeThreadSafeContext);
diff --git a/src/modules/gendoc.rb b/src/modules/gendoc.rb
index ee6572884..2fd2ec5d7 100644
--- a/src/modules/gendoc.rb
+++ b/src/modules/gendoc.rb
@@ -4,16 +4,26 @@
# Convert the C comment to markdown
def markdown(s)
s = s.gsub(/\*\/$/,"")
- s = s.gsub(/^ \* {0,1}/,"")
- s = s.gsub(/^\/\* /,"")
+ s = s.gsub(/^ ?\* ?/,"")
+ s = s.gsub(/^\/\*\*? ?/,"")
s.chop! while s[-1] == "\n" || s[-1] == " "
lines = s.split("\n")
newlines = []
+ # Fix some markdown, except in code blocks indented by 4 spaces.
lines.each{|l|
- if l[0] != ' '
- l = l.gsub(/RM_[A-z()]+/){|x| "`#{x}`"}
- l = l.gsub(/RedisModule_[A-z()]+/){|x| "`#{x}`"}
- l = l.gsub(/REDISMODULE_[A-z]+/){|x| "`#{x}`"}
+ if not l.start_with?(' ')
+ # Rewrite RM_Xyz() to `RedisModule_Xyz()`. The () suffix is
+ # optional. Even RM_Xyz*() with * as wildcard is handled.
+ l = l.gsub(/(?<!`)RM_([A-z]+(?:\*?\(\))?)/, '`RedisModule_\1`')
+ # Add backquotes around RedisModule functions and type where missing.
+ l = l.gsub(/(?<!`)RedisModule[A-z]+(?:\*?\(\))?/){|x| "`#{x}`"}
+ # Add backquotes around c functions like malloc() where missing.
+ l = l.gsub(/(?<![`A-z])[a-z_]+\(\)/, '`\0`')
+ # Add backquotes around macro and var names containing underscores.
+ l = l.gsub(/(?<![`A-z\*])[A-Za-z]+_[A-Za-z0-9_]+/){|x| "`#{x}`"}
+ # Link URLs preceded by space (i.e. when not already linked)
+ l = l.gsub(/ (https?:\/\/[A-Za-z0-9_\/\.\-]+[A-Za-z0-9\/])/,
+ ' [\1](\1)')
end
newlines << l
}
@@ -41,6 +51,7 @@ def docufy(src,i)
end
puts "# Modules API reference\n\n"
+puts "<!-- This file is generated from module.c using gendoc.rb -->\n\n"
src = File.open("../module.c").to_a
src.each_with_index{|line,i|
if line =~ /RM_/ && line[0] != ' ' && line[0] != '#' && line[0] != '/'
diff --git a/src/networking.c b/src/networking.c
index e624dd8f9..da611675c 100644
--- a/src/networking.c
+++ b/src/networking.c
@@ -1104,6 +1104,7 @@ void acceptTcpHandler(aeEventLoop *el, int fd, void *privdata, int mask) {
"Accepting client connection: %s", server.neterr);
return;
}
+ anetCloexec(cfd);
serverLog(LL_VERBOSE,"Accepted %s:%d", cip, cport);
acceptCommonHandler(connCreateAcceptedSocket(cfd),0,cip);
}
@@ -1124,6 +1125,7 @@ void acceptTLSHandler(aeEventLoop *el, int fd, void *privdata, int mask) {
"Accepting client connection: %s", server.neterr);
return;
}
+ anetCloexec(cfd);
serverLog(LL_VERBOSE,"Accepted %s:%d", cip, cport);
acceptCommonHandler(connCreateAcceptedTLS(cfd, server.tls_auth_clients),0,cip);
}
@@ -1143,6 +1145,7 @@ void acceptUnixHandler(aeEventLoop *el, int fd, void *privdata, int mask) {
"Accepting client connection: %s", server.neterr);
return;
}
+ anetCloexec(cfd);
serverLog(LL_VERBOSE,"Accepted connection to %s", server.unixsocket);
acceptCommonHandler(connCreateAcceptedSocket(cfd),CLIENT_UNIX_SOCKET,NULL);
}
@@ -1707,7 +1710,7 @@ int processInlineBuffer(client *c) {
}
/* Handle the \r\n case. */
- if (newline && newline != c->querybuf+c->qb_pos && *(newline-1) == '\r')
+ if (newline != c->querybuf+c->qb_pos && *(newline-1) == '\r')
newline--, linefeed_chars++;
/* Split the input buffer up to the \r\n */
@@ -2436,8 +2439,10 @@ void clientCommand(client *c) {
" Kill connection made from <ip:port>.",
"KILL <option> <value> [<option> <value> [...]]",
" Kill connections. Options are:",
-" * ADDR <ip:port>",
-" Kill connection made from <ip:port>",
+" * ADDR (<ip:port>|<unixsocket>:0)",
+" Kill connections made from the specified address",
+" * LADDR (<ip:port>|<unixsocket>:0)",
+" Kill connections made to specified local address",
" * TYPE (normal|master|replica|pubsub)",
" Kill connections by type.",
" * USER <username>",
@@ -2675,7 +2680,7 @@ NULL
c->argc == 4))
{
/* CLIENT PAUSE TIMEOUT [WRITE|ALL] */
- long long duration;
+ mstime_t end;
int type = CLIENT_PAUSE_ALL;
if (c->argc == 4) {
if (!strcasecmp(c->argv[3]->ptr,"write")) {
@@ -2689,9 +2694,9 @@ NULL
}
}
- if (getTimeoutFromObjectOrReply(c,c->argv[2],&duration,
+ if (getTimeoutFromObjectOrReply(c,c->argv[2],&end,
UNIT_MILLISECONDS) != C_OK) return;
- pauseClients(duration, type);
+ pauseClients(end, type);
addReply(c,shared.ok);
} else if (!strcasecmp(c->argv[1]->ptr,"tracking") && c->argc >= 3) {
/* CLIENT TRACKING (on|off) [REDIRECT <id>] [BCAST] [PREFIX first]
@@ -3355,8 +3360,6 @@ void processEventsWhileBlocked(void) {
* Threaded I/O
* ========================================================================== */
-int tio_debug = 0;
-
#define IO_THREADS_MAX_NUM 128
#define IO_THREADS_OP_READ 0
#define IO_THREADS_OP_WRITE 1
@@ -3407,8 +3410,6 @@ void *IOThreadMain(void *myid) {
serverAssert(getIOPendingCount(id) != 0);
- if (tio_debug) printf("[%ld] %d to handle\n", id, (int)listLength(io_threads_list[id]));
-
/* Process: note that the main thread will never touch our list
* before we drop the pending count to 0. */
listIter li;
@@ -3426,8 +3427,6 @@ void *IOThreadMain(void *myid) {
}
listEmpty(io_threads_list[id]);
setIOPendingCount(id, 0);
-
- if (tio_debug) printf("[%ld] Done\n", id);
}
}
@@ -3482,8 +3481,6 @@ void killIOThreads(void) {
}
void startThreadedIO(void) {
- if (tio_debug) { printf("S"); fflush(stdout); }
- if (tio_debug) printf("--- STARTING THREADED IO ---\n");
serverAssert(server.io_threads_active == 0);
for (int j = 1; j < server.io_threads_num; j++)
pthread_mutex_unlock(&io_threads_mutex[j]);
@@ -3494,10 +3491,6 @@ void stopThreadedIO(void) {
/* We may have still clients with pending reads when this function
* is called: handle them before stopping the threads. */
handleClientsWithPendingReadsUsingThreads();
- if (tio_debug) { printf("E"); fflush(stdout); }
- if (tio_debug) printf("--- STOPPING THREADED IO [R%d] [W%d] ---\n",
- (int) listLength(server.clients_pending_read),
- (int) listLength(server.clients_pending_write));
serverAssert(server.io_threads_active == 1);
for (int j = 1; j < server.io_threads_num; j++)
pthread_mutex_lock(&io_threads_mutex[j]);
@@ -3540,8 +3533,6 @@ int handleClientsWithPendingWritesUsingThreads(void) {
/* Start threads if needed. */
if (!server.io_threads_active) startThreadedIO();
- if (tio_debug) printf("%d TOTAL WRITE pending clients\n", processed);
-
/* Distribute the clients across N different lists. */
listIter li;
listNode *ln;
@@ -3586,7 +3577,6 @@ int handleClientsWithPendingWritesUsingThreads(void) {
pending += getIOPendingCount(j);
if (pending == 0) break;
}
- if (tio_debug) printf("I/O WRITE All threads finshed\n");
/* Run the list of clients again to install the write handler where
* needed. */
@@ -3639,8 +3629,6 @@ int handleClientsWithPendingReadsUsingThreads(void) {
int processed = listLength(server.clients_pending_read);
if (processed == 0) return 0;
- if (tio_debug) printf("%d TOTAL READ pending clients\n", processed);
-
/* Distribute the clients across N different lists. */
listIter li;
listNode *ln;
@@ -3676,7 +3664,6 @@ int handleClientsWithPendingReadsUsingThreads(void) {
pending += getIOPendingCount(j);
if (pending == 0) break;
}
- if (tio_debug) printf("I/O READ All threads finshed\n");
/* Run the list of clients again to process the new buffers. */
while(listLength(server.clients_pending_read)) {
diff --git a/src/redis-cli.c b/src/redis-cli.c
index 31d2360c9..ed3075317 100644
--- a/src/redis-cli.c
+++ b/src/redis-cli.c
@@ -5301,7 +5301,7 @@ static clusterManagerNode *clusterNodeForResharding(char *id,
clusterManagerLogErr(invalid_node_msg, id);
*raise_err = 1;
return NULL;
- } else if (node != NULL && target != NULL) {
+ } else if (target != NULL) {
if (!strcmp(node->name, target->name)) {
clusterManagerLogErr( "*** It is not possible to use "
"the target node as "
@@ -6940,6 +6940,10 @@ void sendCapa() {
sendReplconf("capa", "eof");
}
+void sendRdbOnly(void) {
+ sendReplconf("rdb-only", "1");
+}
+
/* Read raw bytes through a redisContext. The read operation is not greedy
* and may not fill the buffer entirely.
*/
@@ -7137,7 +7141,6 @@ static void getRDB(clusterManagerNode *node) {
node->context = NULL;
fsync(fd);
close(fd);
- fprintf(stderr,"Transfer finished with success.\n");
if (node) {
sdsfree(filename);
return;
@@ -8258,6 +8261,7 @@ int main(int argc, char **argv) {
if (config.getrdb_mode) {
if (cliConnect(0) == REDIS_ERR) exit(1);
sendCapa();
+ sendRdbOnly();
getRDB(NULL);
}
diff --git a/src/redismodule.h b/src/redismodule.h
index 36c566bb3..9d8c6c5ea 100644
--- a/src/redismodule.h
+++ b/src/redismodule.h
@@ -69,6 +69,20 @@
#define REDISMODULE_HASH_CFIELDS (1<<2)
#define REDISMODULE_HASH_EXISTS (1<<3)
+/* StreamID type. */
+typedef struct RedisModuleStreamID {
+ uint64_t ms;
+ uint64_t seq;
+} RedisModuleStreamID;
+
+/* StreamAdd() flags. */
+#define REDISMODULE_STREAM_ADD_AUTOID (1<<0)
+/* StreamIteratorStart() flags. */
+#define REDISMODULE_STREAM_ITERATOR_EXCLUSIVE (1<<0)
+#define REDISMODULE_STREAM_ITERATOR_REVERSE (1<<1)
+/* StreamIteratorTrim*() flags. */
+#define REDISMODULE_STREAM_TRIM_APPROX (1<<0)
+
/* Context Flags: Info about the current context returned by
* RM_GetContextFlags(). */
@@ -216,9 +230,8 @@ typedef uint64_t RedisModuleTimerID;
#define REDISMODULE_EVENT_LOADING_PROGRESS 10
#define REDISMODULE_EVENT_SWAPDB 11
#define REDISMODULE_EVENT_REPL_BACKUP 12
-
-/* Next event flag, should be updated if a new event added. */
-#define _REDISMODULE_EVENT_NEXT 13
+#define REDISMODULE_EVENT_FORK_CHILD 13
+#define _REDISMODULE_EVENT_NEXT 14 /* Next event flag, should be updated if a new event added. */
typedef struct RedisModuleEvent {
uint64_t id; /* REDISMODULE_EVENT_... defines. */
@@ -281,6 +294,10 @@ static const RedisModuleEvent
RedisModuleEvent_ReplBackup = {
REDISMODULE_EVENT_REPL_BACKUP,
1
+ },
+ RedisModuleEvent_ForkChild = {
+ REDISMODULE_EVENT_FORK_CHILD,
+ 1
};
/* Those are values that are used for the 'subevent' callback argument. */
@@ -331,6 +348,10 @@ static const RedisModuleEvent
#define REDISMODULE_SUBEVENT_REPL_BACKUP_DISCARD 2
#define _REDISMODULE_SUBEVENT_REPL_BACKUP_NEXT 3
+#define REDISMODULE_SUBEVENT_FORK_CHILD_BORN 0
+#define REDISMODULE_SUBEVENT_FORK_CHILD_DIED 1
+#define _REDISMODULE_SUBEVENT_FORK_CHILD_NEXT 2
+
#define _REDISMODULE_SUBEVENT_SHUTDOWN_NEXT 0
#define _REDISMODULE_SUBEVENT_CRON_LOOP_NEXT 0
#define _REDISMODULE_SUBEVENT_SWAPDB_NEXT 0
@@ -578,6 +599,7 @@ REDISMODULE_API RedisModuleString * (*RedisModule_CreateStringFromLongLong)(Redi
REDISMODULE_API RedisModuleString * (*RedisModule_CreateStringFromDouble)(RedisModuleCtx *ctx, double d) REDISMODULE_ATTR;
REDISMODULE_API RedisModuleString * (*RedisModule_CreateStringFromLongDouble)(RedisModuleCtx *ctx, long double ld, int humanfriendly) REDISMODULE_ATTR;
REDISMODULE_API RedisModuleString * (*RedisModule_CreateStringFromString)(RedisModuleCtx *ctx, const RedisModuleString *str) REDISMODULE_ATTR;
+REDISMODULE_API RedisModuleString * (*RedisModule_CreateStringFromStreamID)(RedisModuleCtx *ctx, const RedisModuleStreamID *id) REDISMODULE_ATTR;
REDISMODULE_API RedisModuleString * (*RedisModule_CreateStringPrintf)(RedisModuleCtx *ctx, const char *fmt, ...) REDISMODULE_ATTR_PRINTF(2,3) REDISMODULE_ATTR;
REDISMODULE_API void (*RedisModule_FreeString)(RedisModuleCtx *ctx, RedisModuleString *str) REDISMODULE_ATTR;
REDISMODULE_API const char * (*RedisModule_StringPtrLen)(const RedisModuleString *str, size_t *len) REDISMODULE_ATTR;
@@ -599,6 +621,7 @@ REDISMODULE_API int (*RedisModule_ReplyWithCallReply)(RedisModuleCtx *ctx, Redis
REDISMODULE_API int (*RedisModule_StringToLongLong)(const RedisModuleString *str, long long *ll) REDISMODULE_ATTR;
REDISMODULE_API int (*RedisModule_StringToDouble)(const RedisModuleString *str, double *d) REDISMODULE_ATTR;
REDISMODULE_API int (*RedisModule_StringToLongDouble)(const RedisModuleString *str, long double *d) REDISMODULE_ATTR;
+REDISMODULE_API int (*RedisModule_StringToStreamID)(const RedisModuleString *str, RedisModuleStreamID *id) REDISMODULE_ATTR;
REDISMODULE_API void (*RedisModule_AutoMemory)(RedisModuleCtx *ctx) REDISMODULE_ATTR;
REDISMODULE_API int (*RedisModule_Replicate)(RedisModuleCtx *ctx, const char *cmdname, const char *fmt, ...) REDISMODULE_ATTR;
REDISMODULE_API int (*RedisModule_ReplicateVerbatim)(RedisModuleCtx *ctx) REDISMODULE_ATTR;
@@ -629,6 +652,15 @@ REDISMODULE_API int (*RedisModule_ZsetRangePrev)(RedisModuleKey *key) REDISMODUL
REDISMODULE_API int (*RedisModule_ZsetRangeEndReached)(RedisModuleKey *key) REDISMODULE_ATTR;
REDISMODULE_API int (*RedisModule_HashSet)(RedisModuleKey *key, int flags, ...) REDISMODULE_ATTR;
REDISMODULE_API int (*RedisModule_HashGet)(RedisModuleKey *key, int flags, ...) REDISMODULE_ATTR;
+REDISMODULE_API int (*RedisModule_StreamAdd)(RedisModuleKey *key, int flags, RedisModuleStreamID *id, RedisModuleString **argv, int64_t numfields) REDISMODULE_ATTR;
+REDISMODULE_API int (*RedisModule_StreamDelete)(RedisModuleKey *key, RedisModuleStreamID *id) REDISMODULE_ATTR;
+REDISMODULE_API int (*RedisModule_StreamIteratorStart)(RedisModuleKey *key, int flags, RedisModuleStreamID *startid, RedisModuleStreamID *endid) REDISMODULE_ATTR;
+REDISMODULE_API int (*RedisModule_StreamIteratorStop)(RedisModuleKey *key) REDISMODULE_ATTR;
+REDISMODULE_API int (*RedisModule_StreamIteratorNextID)(RedisModuleKey *key, RedisModuleStreamID *id, long *numfields) REDISMODULE_ATTR;
+REDISMODULE_API int (*RedisModule_StreamIteratorNextField)(RedisModuleKey *key, RedisModuleString **field_ptr, RedisModuleString **value_ptr) REDISMODULE_ATTR;
+REDISMODULE_API int (*RedisModule_StreamIteratorDelete)(RedisModuleKey *key) REDISMODULE_ATTR;
+REDISMODULE_API long long (*RedisModule_StreamTrimByLength)(RedisModuleKey *key, int flags, long long length) REDISMODULE_ATTR;
+REDISMODULE_API long long (*RedisModule_StreamTrimByID)(RedisModuleKey *key, int flags, RedisModuleStreamID *id) REDISMODULE_ATTR;
REDISMODULE_API int (*RedisModule_IsKeysPositionRequest)(RedisModuleCtx *ctx) REDISMODULE_ATTR;
REDISMODULE_API void (*RedisModule_KeyAtPos)(RedisModuleCtx *ctx, int pos) REDISMODULE_ATTR;
REDISMODULE_API unsigned long long (*RedisModule_GetClientId)(RedisModuleCtx *ctx) REDISMODULE_ATTR;
@@ -744,6 +776,8 @@ REDISMODULE_API int (*RedisModule_IsBlockedTimeoutRequest)(RedisModuleCtx *ctx)
REDISMODULE_API void * (*RedisModule_GetBlockedClientPrivateData)(RedisModuleCtx *ctx) REDISMODULE_ATTR;
REDISMODULE_API RedisModuleBlockedClient * (*RedisModule_GetBlockedClientHandle)(RedisModuleCtx *ctx) REDISMODULE_ATTR;
REDISMODULE_API int (*RedisModule_AbortBlock)(RedisModuleBlockedClient *bc) REDISMODULE_ATTR;
+REDISMODULE_API int (*RedisModule_BlockedClientMeasureTimeStart)(RedisModuleBlockedClient *bc) REDISMODULE_ATTR;
+REDISMODULE_API int (*RedisModule_BlockedClientMeasureTimeEnd)(RedisModuleBlockedClient *bc) REDISMODULE_ATTR;
REDISMODULE_API RedisModuleCtx * (*RedisModule_GetThreadSafeContext)(RedisModuleBlockedClient *bc) REDISMODULE_ATTR;
REDISMODULE_API RedisModuleCtx * (*RedisModule_GetDetachedThreadSafeContext)(RedisModuleCtx *ctx) REDISMODULE_ATTR;
REDISMODULE_API void (*RedisModule_FreeThreadSafeContext)(RedisModuleCtx *ctx) REDISMODULE_ATTR;
@@ -842,6 +876,7 @@ static int RedisModule_Init(RedisModuleCtx *ctx, const char *name, int ver, int
REDISMODULE_GET_API(StringToLongLong);
REDISMODULE_GET_API(StringToDouble);
REDISMODULE_GET_API(StringToLongDouble);
+ REDISMODULE_GET_API(StringToStreamID);
REDISMODULE_GET_API(Call);
REDISMODULE_GET_API(CallReplyProto);
REDISMODULE_GET_API(FreeCallReply);
@@ -856,6 +891,7 @@ static int RedisModule_Init(RedisModuleCtx *ctx, const char *name, int ver, int
REDISMODULE_GET_API(CreateStringFromDouble);
REDISMODULE_GET_API(CreateStringFromLongDouble);
REDISMODULE_GET_API(CreateStringFromString);
+ REDISMODULE_GET_API(CreateStringFromStreamID);
REDISMODULE_GET_API(CreateStringPrintf);
REDISMODULE_GET_API(FreeString);
REDISMODULE_GET_API(StringPtrLen);
@@ -887,6 +923,15 @@ static int RedisModule_Init(RedisModuleCtx *ctx, const char *name, int ver, int
REDISMODULE_GET_API(ZsetRangeEndReached);
REDISMODULE_GET_API(HashSet);
REDISMODULE_GET_API(HashGet);
+ REDISMODULE_GET_API(StreamAdd);
+ REDISMODULE_GET_API(StreamDelete);
+ REDISMODULE_GET_API(StreamIteratorStart);
+ REDISMODULE_GET_API(StreamIteratorStop);
+ REDISMODULE_GET_API(StreamIteratorNextID);
+ REDISMODULE_GET_API(StreamIteratorNextField);
+ REDISMODULE_GET_API(StreamIteratorDelete);
+ REDISMODULE_GET_API(StreamTrimByLength);
+ REDISMODULE_GET_API(StreamTrimByID);
REDISMODULE_GET_API(IsKeysPositionRequest);
REDISMODULE_GET_API(KeyAtPos);
REDISMODULE_GET_API(GetClientId);
@@ -1006,6 +1051,8 @@ static int RedisModule_Init(RedisModuleCtx *ctx, const char *name, int ver, int
REDISMODULE_GET_API(GetBlockedClientPrivateData);
REDISMODULE_GET_API(GetBlockedClientHandle);
REDISMODULE_GET_API(AbortBlock);
+ REDISMODULE_GET_API(BlockedClientMeasureTimeStart);
+ REDISMODULE_GET_API(BlockedClientMeasureTimeEnd);
REDISMODULE_GET_API(SetDisconnectCallback);
REDISMODULE_GET_API(SubscribeToKeyspaceEvents);
REDISMODULE_GET_API(NotifyKeyspaceEvent);
diff --git a/src/replication.c b/src/replication.c
index 9fb19eaca..f23fcb6de 100644
--- a/src/replication.c
+++ b/src/replication.c
@@ -200,6 +200,16 @@ void feedReplicationBacklogWithObject(robj *o) {
feedReplicationBacklog(p,len);
}
+int canFeedReplicaReplBuffer(client *replica) {
+ /* Don't feed replicas that only want the RDB. */
+ if (replica->flags & CLIENT_REPL_RDBONLY) return 0;
+
+ /* Don't feed replicas that are still waiting for BGSAVE to start. */
+ if (replica->replstate == SLAVE_STATE_WAIT_BGSAVE_START) return 0;
+
+ return 1;
+}
+
/* Propagate write commands to slaves, and populate the replication backlog
* as well. This function is used if the instance is a master: we use
* the commands received by our clients in order to create the replication
@@ -249,7 +259,8 @@ void replicationFeedSlaves(list *slaves, int dictid, robj **argv, int argc) {
listRewind(slaves,&li);
while((ln = listNext(&li))) {
client *slave = ln->value;
- if (slave->replstate == SLAVE_STATE_WAIT_BGSAVE_START) continue;
+
+ if (!canFeedReplicaReplBuffer(slave)) continue;
addReply(slave,selectcmd);
}
@@ -290,8 +301,7 @@ void replicationFeedSlaves(list *slaves, int dictid, robj **argv, int argc) {
while((ln = listNext(&li))) {
client *slave = ln->value;
- /* Don't feed slaves that are still waiting for BGSAVE to start. */
- if (slave->replstate == SLAVE_STATE_WAIT_BGSAVE_START) continue;
+ if (!canFeedReplicaReplBuffer(slave)) continue;
/* Feed slaves that are waiting for the initial SYNC (so these commands
* are queued in the output buffer until the initial SYNC completes),
@@ -363,8 +373,7 @@ void replicationFeedSlavesFromMasterStream(list *slaves, char *buf, size_t bufle
while((ln = listNext(&li))) {
client *slave = ln->value;
- /* Don't feed slaves that are still waiting for BGSAVE to start. */
- if (slave->replstate == SLAVE_STATE_WAIT_BGSAVE_START) continue;
+ if (!canFeedReplicaReplBuffer(slave)) continue;
addReplyProto(slave,buf,buflen);
}
}
@@ -712,6 +721,36 @@ void syncCommand(client *c) {
/* ignore SYNC if already slave or in monitor mode */
if (c->flags & CLIENT_SLAVE) return;
+ /* Check if this is a failover request to a replica with the same replid and
+ * become a master if so. */
+ if (c->argc > 3 && !strcasecmp(c->argv[0]->ptr,"psync") &&
+ !strcasecmp(c->argv[3]->ptr,"failover"))
+ {
+ serverLog(LL_WARNING, "Failover request received for replid %s.",
+ (unsigned char *)c->argv[1]->ptr);
+ if (!server.masterhost) {
+ addReplyError(c, "PSYNC FAILOVER can't be sent to a master.");
+ return;
+ }
+
+ if (!strcasecmp(c->argv[1]->ptr,server.replid)) {
+ replicationUnsetMaster();
+ sds client = catClientInfoString(sdsempty(),c);
+ serverLog(LL_NOTICE,
+ "MASTER MODE enabled (failover request from '%s')",client);
+ sdsfree(client);
+ } else {
+ addReplyError(c, "PSYNC FAILOVER replid must match my replid.");
+ return;
+ }
+ }
+
+ /* Don't let replicas sync with us while we're failing over */
+ if (server.failover_state != NO_FAILOVER) {
+ addReplyError(c,"-NOMASTERLINK Can't SYNC while failing over");
+ return;
+ }
+
/* Refuse SYNC requests if we are a slave but the link with our master
* is not ok... */
if (server.masterhost && server.repl_state != REPL_STATE_CONNECTED) {
@@ -799,14 +838,20 @@ void syncCommand(client *c) {
listRewind(server.slaves,&li);
while((ln = listNext(&li))) {
slave = ln->value;
- if (slave->replstate == SLAVE_STATE_WAIT_BGSAVE_END) break;
+ /* If the client needs a buffer of commands, we can't use
+ * a replica without replication buffer. */
+ if (slave->replstate == SLAVE_STATE_WAIT_BGSAVE_END &&
+ (!(slave->flags & CLIENT_REPL_RDBONLY) ||
+ (c->flags & CLIENT_REPL_RDBONLY)))
+ break;
}
/* To attach this slave, we check that it has at least all the
* capabilities of the slave that triggered the current BGSAVE. */
if (ln && ((c->slave_capa & slave->slave_capa) == slave->slave_capa)) {
/* Perfect, the server is already registering differences for
- * another slave. Set the right state, and copy the buffer. */
- copyClientOutputBuffer(c,slave);
+ * another slave. Set the right state, and copy the buffer.
+ * We don't copy buffer if clients don't want. */
+ if (!(c->flags & CLIENT_REPL_RDBONLY)) copyClientOutputBuffer(c,slave);
replicationSetupSlaveForFullResync(c,slave->psync_initial_offset);
serverLog(LL_NOTICE,"Waiting for end of BGSAVE for SYNC");
} else {
@@ -925,6 +970,15 @@ void replconfCommand(client *c) {
* to the slave. */
if (server.masterhost && server.master) replicationSendAck();
return;
+ } else if (!strcasecmp(c->argv[j]->ptr,"rdb-only")) {
+ /* REPLCONF RDB-ONLY is used to identify the client only wants
+ * RDB snapshot without replication buffer. */
+ long rdb_only = 0;
+ if (getRangeLongFromObjectOrReply(c,c->argv[j+1],
+ 0,1,&rdb_only,NULL) != C_OK)
+ return;
+ if (rdb_only == 1) c->flags |= CLIENT_REPL_RDBONLY;
+ else c->flags &= ~CLIENT_REPL_RDBONLY;
} else {
addReplyErrorFormat(c,"Unrecognized REPLCONF option: %s",
(char*)c->argv[j]->ptr);
@@ -939,19 +993,28 @@ void replconfCommand(client *c) {
* we are finally ready to send the incremental stream of commands.
*
* It does a few things:
- *
- * 1) Put the slave in ONLINE state. Note that the function may also be called
+ * 1) Close the replica's connection async if it doesn't need replication
+ * commands buffer stream, since it actually isn't a valid replica.
+ * 2) Put the slave in ONLINE state. Note that the function may also be called
* for a replicas that are already in ONLINE state, but having the flag
* repl_put_online_on_ack set to true: we still have to install the write
* handler in that case. This function will take care of that.
- * 2) Make sure the writable event is re-installed, since calling the SYNC
+ * 3) Make sure the writable event is re-installed, since calling the SYNC
* command disables it, so that we can accumulate output buffer without
* sending it to the replica.
- * 3) Update the count of "good replicas". */
+ * 4) Update the count of "good replicas". */
void putSlaveOnline(client *slave) {
slave->replstate = SLAVE_STATE_ONLINE;
slave->repl_put_online_on_ack = 0;
slave->repl_ack_time = server.unixtime; /* Prevent false timeout. */
+
+ if (slave->flags & CLIENT_REPL_RDBONLY) {
+ serverLog(LL_NOTICE,
+ "Close the connection with replica %s as RDB transfer is complete",
+ replicationGetSlaveName(slave));
+ freeClientAsync(slave);
+ return;
+ }
if (connSetWriteHandler(slave->conn, sendReplyToClient) == C_ERR) {
serverLog(LL_WARNING,"Unable to register writable event for replica bulk transfer: %s", strerror(errno));
freeClient(slave);
@@ -1998,8 +2061,15 @@ int slaveTryPartialResynchronization(connection *conn, int read_reply) {
memcpy(psync_offset,"-1",3);
}
- /* Issue the PSYNC command */
- reply = sendCommand(conn,"PSYNC",psync_replid,psync_offset,NULL);
+ /* Issue the PSYNC command, if this is a master with a failover in
+ * progress then send the failover argument to the replica to cause it
+ * to become a master */
+ if (server.failover_state == FAILOVER_IN_PROGRESS) {
+ reply = sendCommand(conn,"PSYNC",psync_replid,psync_offset,"FAILOVER",NULL);
+ } else {
+ reply = sendCommand(conn,"PSYNC",psync_replid,psync_offset,NULL);
+ }
+
if (reply != NULL) {
serverLog(LL_WARNING,"Unable to send PSYNC to master: %s",reply);
sdsfree(reply);
@@ -2323,6 +2393,7 @@ void syncWithMaster(connection *conn) {
if (server.repl_state == REPL_STATE_SEND_PSYNC) {
if (slaveTryPartialResynchronization(conn,0) == PSYNC_WRITE_ERROR) {
err = sdsnew("Write error sending the PSYNC command.");
+ abortFailover("Write error to failover target");
goto write_error;
}
server.repl_state = REPL_STATE_RECEIVE_PSYNC_REPLY;
@@ -2340,6 +2411,18 @@ void syncWithMaster(connection *conn) {
psync_result = slaveTryPartialResynchronization(conn,1);
if (psync_result == PSYNC_WAIT_REPLY) return; /* Try again later... */
+ /* Check the status of the planned failover. We expect PSYNC_CONTINUE,
+ * but there is nothing technically wrong with a full resync which
+ * could happen in edge cases. */
+ if (server.failover_state == FAILOVER_IN_PROGRESS) {
+ if (psync_result == PSYNC_CONTINUE || psync_result == PSYNC_FULLRESYNC) {
+ clearFailoverState();
+ } else {
+ abortFailover("Failover target rejected psync request");
+ return;
+ }
+ }
+
/* If the master is in an transient error, we should try to PSYNC
* from scratch later, so go to the error path. This happens when
* the server is loading the dataset or is not connected with its
@@ -2645,6 +2728,11 @@ void replicaofCommand(client *c) {
return;
}
+ if (server.failover_state != NO_FAILOVER) {
+ addReplyError(c,"REPLICAOF not allowed while failing over.");
+ return;
+ }
+
/* The special host/port combination "NO" "ONE" turns the instance
* into a master. Otherwise the new master address is set. */
if (!strcasecmp(c->argv[1]->ptr,"no") &&
@@ -3178,6 +3266,10 @@ long long replicationGetSlaveOffset(void) {
void replicationCron(void) {
static long long replication_cron_loops = 0;
+ /* Check failover status first, to see if we need to start
+ * handling the failover. */
+ updateFailoverStatus();
+
/* Non blocking connection timeout? */
if (server.masterhost &&
(server.repl_state == REPL_STATE_CONNECTING ||
@@ -3235,8 +3327,9 @@ void replicationCron(void) {
* alter the replication offsets of master and slave, and will no longer
* match the one stored into 'mf_master_offset' state. */
int manual_failover_in_progress =
- server.cluster_enabled &&
- server.cluster->mf_end &&
+ ((server.cluster_enabled &&
+ server.cluster->mf_end) ||
+ server.failover_end_time) &&
checkClientPauseTimeoutAndReturnIfPaused();
if (!manual_failover_in_progress) {
@@ -3390,3 +3483,271 @@ void replicationStartPendingFork(void) {
}
}
}
+
+/* Find replica at IP:PORT from replica list */
+static client *findReplica(char *host, int port) {
+ listIter li;
+ listNode *ln;
+ client *replica;
+
+ listRewind(server.slaves,&li);
+ while((ln = listNext(&li))) {
+ replica = ln->value;
+ char ip[NET_IP_STR_LEN], *replicaip = replica->slave_ip;
+
+ if (replicaip[0] == '\0') {
+ if (connPeerToString(replica->conn, ip, sizeof(ip), NULL) == -1)
+ continue;
+ replicaip = ip;
+ }
+
+ if (!strcasecmp(host, replicaip) &&
+ (port == replica->slave_listening_port))
+ return replica;
+ }
+
+ return NULL;
+}
+
+const char *getFailoverStateString() {
+ switch(server.failover_state) {
+ case NO_FAILOVER: return "no-failover";
+ case FAILOVER_IN_PROGRESS: return "failover-in-progress";
+ case FAILOVER_WAIT_FOR_SYNC: return "waiting-for-sync";
+ default: return "unknown";
+ }
+}
+
+/* Resets the internal failover configuration, this needs
+ * to be called after a failover either succeeds or fails
+ * as it includes the client unpause. */
+void clearFailoverState() {
+ server.failover_end_time = 0;
+ server.force_failover = 0;
+ zfree(server.target_replica_host);
+ server.target_replica_host = NULL;
+ server.target_replica_port = 0;
+ server.failover_state = NO_FAILOVER;
+ unpauseClients();
+}
+
+/* Abort an ongoing failover if one is going on. */
+void abortFailover(const char *err) {
+ if (server.failover_state == NO_FAILOVER) return;
+
+ if (server.target_replica_host) {
+ serverLog(LL_NOTICE,"FAILOVER to %s:%d aborted: %s",
+ server.target_replica_host,server.target_replica_port,err);
+ } else {
+ serverLog(LL_NOTICE,"FAILOVER to any replica aborted: %s",err);
+ }
+ if (server.failover_state == FAILOVER_IN_PROGRESS) {
+ replicationUnsetMaster();
+ }
+ clearFailoverState();
+}
+
+/*
+ * FAILOVER [TO <HOST> <IP> [FORCE]] [ABORT] [TIMEOUT <timeout>]
+ *
+ * This command will coordinate a failover between the master and one
+ * of its replicas. The happy path contains the following steps:
+ * 1) The master will initiate a client pause write, to stop replication
+ * traffic.
+ * 2) The master will periodically check if any of its replicas has
+ * consumed the entire replication stream through acks.
+ * 3) Once any replica has caught up, the master will itself become a replica.
+ * 4) The master will send a PSYNC FAILOVER request to the target replica, which
+ * if accepted will cause the replica to become the new master and start a sync.
+ *
+ * FAILOVER ABORT is the only way to abort a failover command, as replicaof
+ * will be disabled. This may be needed if the failover is unable to progress.
+ *
+ * The optional arguments [TO <HOST> <IP>] allows designating a specific replica
+ * to be failed over to.
+ *
+ * FORCE flag indicates that even if the target replica is not caught up,
+ * failover to it anyway. This must be specified with a timeout and a target
+ * HOST and IP.
+ *
+ * TIMEOUT <timeout> indicates how long should the primary wait for
+ * a replica to sync up before aborting. If not specified, the failover
+ * will attempt forever and must be manually aborted.
+ */
+void failoverCommand(client *c) {
+ if (server.cluster_enabled) {
+ addReplyError(c,"FAILOVER not allowed in cluster mode. "
+ "Use CLUSTER FAILOVER command instead.");
+ return;
+ }
+
+ /* Handle special case for abort */
+ if ((c->argc == 2) && !strcasecmp(c->argv[1]->ptr,"abort")) {
+ if (server.failover_state == NO_FAILOVER) {
+ addReplyError(c, "No failover in progress.");
+ return;
+ }
+
+ abortFailover("Failover manually aborted");
+ addReply(c,shared.ok);
+ return;
+ }
+
+ long timeout_in_ms = 0;
+ int force_flag = 0;
+ long port = 0;
+ char *host = NULL;
+
+ /* Parse the command for syntax and arguments. */
+ for (int j = 1; j < c->argc; j++) {
+ if (!strcasecmp(c->argv[j]->ptr,"timeout") && (j + 1 < c->argc) &&
+ timeout_in_ms == 0)
+ {
+ if (getLongFromObjectOrReply(c,c->argv[j + 1],
+ &timeout_in_ms,NULL) != C_OK) return;
+ if (timeout_in_ms <= 0) {
+ addReplyError(c,"FAILOVER timeout must be greater than 0");
+ return;
+ }
+ j++;
+ } else if (!strcasecmp(c->argv[j]->ptr,"to") && (j + 2 < c->argc) &&
+ !host)
+ {
+ if (getLongFromObjectOrReply(c,c->argv[j + 2],&port,NULL) != C_OK)
+ return;
+ host = c->argv[j + 1]->ptr;
+ j += 2;
+ } else if (!strcasecmp(c->argv[j]->ptr,"force") && !force_flag) {
+ force_flag = 1;
+ } else {
+ addReplyErrorObject(c,shared.syntaxerr);
+ return;
+ }
+ }
+
+ if (server.failover_state != NO_FAILOVER) {
+ addReplyError(c,"FAILOVER already in progress.");
+ return;
+ }
+
+ if (server.masterhost) {
+ addReplyError(c,"FAILOVER is not valid when server is a replica.");
+ return;
+ }
+
+ if (listLength(server.slaves) == 0) {
+ addReplyError(c,"FAILOVER requires connected replicas.");
+ return;
+ }
+
+ if (force_flag && (!timeout_in_ms || !host)) {
+ addReplyError(c,"FAILOVER with force option requires both a timeout "
+ "and target HOST and IP.");
+ return;
+ }
+
+ /* If a replica address was provided, validate that it is connected. */
+ if (host) {
+ client *replica = findReplica(host, port);
+
+ if (replica == NULL) {
+ addReplyError(c,"FAILOVER target HOST and IP is not "
+ "a replica.");
+ return;
+ }
+
+ /* Check if requested replica is online */
+ if (replica->replstate != SLAVE_STATE_ONLINE) {
+ addReplyError(c,"FAILOVER target replica is not online.");
+ return;
+ }
+
+ server.target_replica_host = zstrdup(host);
+ server.target_replica_port = port;
+ serverLog(LL_NOTICE,"FAILOVER requested to %s:%ld.",host,port);
+ } else {
+ serverLog(LL_NOTICE,"FAILOVER requested to any replica.");
+ }
+
+ mstime_t now = mstime();
+ if (timeout_in_ms) {
+ server.failover_end_time = now + timeout_in_ms;
+ }
+
+ server.force_failover = force_flag;
+ server.failover_state = FAILOVER_WAIT_FOR_SYNC;
+ /* Cluster failover will unpause eventually */
+ pauseClients(LLONG_MAX,CLIENT_PAUSE_WRITE);
+ addReply(c,shared.ok);
+}
+
+/* Failover cron function, checks coordinated failover state.
+ *
+ * Implementation note: The current implementation calls replicationSetMaster()
+ * to start the failover request, this has some unintended side effects if the
+ * failover doesn't work like blocked clients will be unblocked and replicas will
+ * be disconnected. This could be optimized further.
+ */
+void updateFailoverStatus(void) {
+ if (server.failover_state != FAILOVER_WAIT_FOR_SYNC) return;
+ mstime_t now = server.mstime;
+
+ /* Check if failover operation has timed out */
+ if (server.failover_end_time && server.failover_end_time <= now) {
+ if (server.force_failover) {
+ serverLog(LL_NOTICE,
+ "FAILOVER to %s:%d time out exceeded, failing over.",
+ server.target_replica_host, server.target_replica_port);
+ server.failover_state = FAILOVER_IN_PROGRESS;
+ /* If timeout has expired force a failover if requested. */
+ replicationSetMaster(server.target_replica_host,
+ server.target_replica_port);
+ return;
+ } else {
+ /* Force was not requested, so timeout. */
+ abortFailover("Replica never caught up before timeout");
+ return;
+ }
+ }
+
+ /* Check to see if the replica has caught up so failover can start */
+ client *replica = NULL;
+ if (server.target_replica_host) {
+ replica = findReplica(server.target_replica_host,
+ server.target_replica_port);
+ } else {
+ listIter li;
+ listNode *ln;
+
+ listRewind(server.slaves,&li);
+ /* Find any replica that has matched our repl_offset */
+ while((ln = listNext(&li))) {
+ replica = ln->value;
+ if (replica->repl_ack_off == server.master_repl_offset) {
+ char ip[NET_IP_STR_LEN], *replicaip = replica->slave_ip;
+
+ if (replicaip[0] == '\0') {
+ if (connPeerToString(replica->conn,ip,sizeof(ip),NULL) == -1)
+ continue;
+ replicaip = ip;
+ }
+
+ /* We are now failing over to this specific node */
+ server.target_replica_host = zstrdup(replicaip);
+ server.target_replica_port = replica->slave_listening_port;
+ break;
+ }
+ }
+ }
+
+ /* We've found a replica that is caught up */
+ if (replica && (replica->repl_ack_off == server.master_repl_offset)) {
+ server.failover_state = FAILOVER_IN_PROGRESS;
+ serverLog(LL_NOTICE,
+ "Failover target %s:%d is synced, failing over.",
+ server.target_replica_host, server.target_replica_port);
+ /* Designated replica is caught up, failover to it. */
+ replicationSetMaster(server.target_replica_host,
+ server.target_replica_port);
+ }
+}
diff --git a/src/scripting.c b/src/scripting.c
index 75604e4d8..41469ee2e 100644
--- a/src/scripting.c
+++ b/src/scripting.c
@@ -1282,14 +1282,17 @@ void scriptingInit(int setup) {
/* Release resources related to Lua scripting.
* This function is used in order to reset the scripting environment. */
-void scriptingRelease(void) {
- dictRelease(server.lua_scripts);
+void scriptingRelease(int async) {
+ if (async)
+ freeLuaScriptsAsync(server.lua_scripts);
+ else
+ dictRelease(server.lua_scripts);
server.lua_scripts_mem = 0;
lua_close(server.lua);
}
-void scriptingReset(void) {
- scriptingRelease();
+void scriptingReset(int async) {
+ scriptingRelease(async);
scriptingInit(0);
}
@@ -1711,8 +1714,12 @@ void scriptCommand(client *c) {
" Set the debug mode for subsequent scripts executed.",
"EXISTS <sha1> [<sha1> ...]",
" Return information about the existence of the scripts in the script cache.",
-"FLUSH",
+"FLUSH [ASYNC|SYNC]",
" Flush the Lua scripts cache. Very dangerous on replicas.",
+" When called without the optional mode argument, the behavior is determined by the",
+" lazyfree-lazy-user-flush configuration directive. Valid modes are:",
+" * ASYNC: Asynchronously flush the scripts cache.",
+" * SYNC: Synchronously flush the scripts cache.",
"KILL",
" Kill the currently executing Lua script.",
"LOAD <script>",
@@ -1720,8 +1727,19 @@ void scriptCommand(client *c) {
NULL
};
addReplyHelp(c, help);
- } else if (c->argc == 2 && !strcasecmp(c->argv[1]->ptr,"flush")) {
- scriptingReset();
+ } else if (c->argc >= 2 && !strcasecmp(c->argv[1]->ptr,"flush")) {
+ int async = 0;
+ if (c->argc == 3 && !strcasecmp(c->argv[2]->ptr,"sync")) {
+ async = 0;
+ } else if (c->argc == 3 && !strcasecmp(c->argv[2]->ptr,"async")) {
+ async = 1;
+ } else if (c->argc == 2) {
+ async = server.lazyfree_lazy_user_flush ? 1 : 0;
+ } else {
+ addReplyError(c,"SCRIPT FLUSH only support SYNC|ASYNC option");
+ return;
+ }
+ scriptingReset(async);
addReply(c,shared.ok);
replicationScriptCacheFlush();
server.dirty++; /* Propagating this command is a good idea. */
diff --git a/src/sds.c b/src/sds.c
index f16114471..ad30e2ad4 100644
--- a/src/sds.c
+++ b/src/sds.c
@@ -1157,12 +1157,80 @@ void *sds_malloc(size_t size) { return s_malloc(size); }
void *sds_realloc(void *ptr, size_t size) { return s_realloc(ptr,size); }
void sds_free(void *ptr) { s_free(ptr); }
+/* Perform expansion of a template string and return the result as a newly
+ * allocated sds.
+ *
+ * Template variables are specified using curly brackets, e.g. {variable}.
+ * An opening bracket can be quoted by repeating it twice.
+ */
+sds sdstemplate(const char *template, sdstemplate_callback_t cb_func, void *cb_arg)
+{
+ sds res = sdsempty();
+ const char *p = template;
+
+ while (*p) {
+ /* Find next variable, copy everything until there */
+ const char *sv = strchr(p, '{');
+ if (!sv) {
+ /* Not found: copy till rest of template and stop */
+ res = sdscat(res, p);
+ break;
+ } else if (sv > p) {
+ /* Found: copy anything up to the begining of the variable */
+ res = sdscatlen(res, p, sv - p);
+ }
+
+ /* Skip into variable name, handle premature end or quoting */
+ sv++;
+ if (!*sv) goto error; /* Premature end of template */
+ if (*sv == '{') {
+ /* Quoted '{' */
+ p = sv + 1;
+ res = sdscat(res, "{");
+ continue;
+ }
+
+ /* Find end of variable name, handle premature end of template */
+ const char *ev = strchr(sv, '}');
+ if (!ev) goto error;
+
+ /* Pass variable name to callback and obtain value. If callback failed,
+ * abort. */
+ sds varname = sdsnewlen(sv, ev - sv);
+ sds value = cb_func(varname, cb_arg);
+ sdsfree(varname);
+ if (!value) goto error;
+
+ /* Append value to result and continue */
+ res = sdscat(res, value);
+ sdsfree(value);
+ p = ev + 1;
+ }
+
+ return res;
+
+error:
+ sdsfree(res);
+ return NULL;
+}
+
#ifdef REDIS_TEST
#include <stdio.h>
#include <limits.h>
#include "testhelp.h"
#define UNUSED(x) (void)(x)
+
+static sds sdsTestTemplateCallback(sds varname, void *arg) {
+ UNUSED(arg);
+ static const char *_var1 = "variable1";
+ static const char *_var2 = "variable2";
+
+ if (!strcmp(varname, _var1)) return sdsnew("value1");
+ else if (!strcmp(varname, _var2)) return sdsnew("value2");
+ else return NULL;
+}
+
int sdsTest(int argc, char **argv) {
UNUSED(argc);
UNUSED(argv);
@@ -1342,6 +1410,30 @@ int sdsTest(int argc, char **argv) {
sdsfree(x);
}
+
+ /* Simple template */
+ x = sdstemplate("v1={variable1} v2={variable2}", sdsTestTemplateCallback, NULL);
+ test_cond("sdstemplate() normal flow",
+ memcmp(x,"v1=value1 v2=value2",19) == 0);
+ sdsfree(x);
+
+ /* Template with callback error */
+ x = sdstemplate("v1={variable1} v3={doesnotexist}", sdsTestTemplateCallback, NULL);
+ test_cond("sdstemplate() with callback error", x == NULL);
+
+ /* Template with empty var name */
+ x = sdstemplate("v1={", sdsTestTemplateCallback, NULL);
+ test_cond("sdstemplate() with empty var name", x == NULL);
+
+ /* Template with truncated var name */
+ x = sdstemplate("v1={start", sdsTestTemplateCallback, NULL);
+ test_cond("sdstemplate() with truncated var name", x == NULL);
+
+ /* Template with quoting */
+ x = sdstemplate("v1={{{variable1}} {{} v2={variable2}", sdsTestTemplateCallback, NULL);
+ test_cond("sdstemplate() with quoting",
+ memcmp(x,"v1={value1} {} v2=value2",24) == 0);
+ sdsfree(x);
}
test_report();
return 0;
diff --git a/src/sds.h b/src/sds.h
index 3a9e4cefe..85dc0b680 100644
--- a/src/sds.h
+++ b/src/sds.h
@@ -253,6 +253,14 @@ sds sdsmapchars(sds s, const char *from, const char *to, size_t setlen);
sds sdsjoin(char **argv, int argc, char *sep);
sds sdsjoinsds(sds *argv, int argc, const char *sep, size_t seplen);
+/* Callback for sdstemplate. The function gets called by sdstemplate
+ * every time a variable needs to be expanded. The variable name is
+ * provided as variable, and the callback is expected to return a
+ * substitution value. Returning a NULL indicates an error.
+ */
+typedef sds (*sdstemplate_callback_t)(const sds variable, void *arg);
+sds sdstemplate(const char *template, sdstemplate_callback_t cb_func, void *cb_arg);
+
/* Low level functions exposed to the user API */
sds sdsMakeRoomFor(sds s, size_t addlen);
void sdsIncrLen(sds s, ssize_t incr);
diff --git a/src/sentinel.c b/src/sentinel.c
index 02260feb7..a87766ebe 100644
--- a/src/sentinel.c
+++ b/src/sentinel.c
@@ -55,7 +55,8 @@ extern SSL_CTX *redis_tls_client_ctx;
/* Address object, used to describe an ip:port pair. */
typedef struct sentinelAddr {
- char *ip;
+ char *hostname; /* Hostname OR address, as specified */
+ char *ip; /* Always a resolved address */
int port;
} sentinelAddr;
@@ -94,6 +95,8 @@ typedef struct sentinelAddr {
#define SENTINEL_ELECTION_TIMEOUT 10000
#define SENTINEL_MAX_DESYNC 1000
#define SENTINEL_DEFAULT_DENY_SCRIPTS_RECONFIG 1
+#define SENTINEL_DEFAULT_RESOLVE_HOSTNAMES 0
+#define SENTINEL_DEFAULT_ANNOUNCE_HOSTNAMES 0
/* Failover machine different states. */
#define SENTINEL_FAILOVER_STATE_NONE 0 /* No failover in progress. */
@@ -260,6 +263,8 @@ struct sentinelState {
paths at runtime? */
char *sentinel_auth_pass; /* Password to use for AUTH against other sentinel */
char *sentinel_auth_user; /* Username for ACLs AUTH against other sentinel. */
+ int resolve_hostnames; /* Support use of hostnames, assuming DNS is well configured. */
+ int announce_hostnames; /* Announce hostnames instead of IPs when we have them. */
} sentinel;
/* A script execution job. */
@@ -387,7 +392,7 @@ sentinelRedisInstance *sentinelSelectSlave(sentinelRedisInstance *master);
void sentinelScheduleScriptExecution(char *path, ...);
void sentinelStartFailover(sentinelRedisInstance *master);
void sentinelDiscardReplyCallback(redisAsyncContext *c, void *reply, void *privdata);
-int sentinelSendSlaveOf(sentinelRedisInstance *ri, char *host, int port);
+int sentinelSendSlaveOf(sentinelRedisInstance *ri, const sentinelAddr *addr);
char *sentinelVoteLeader(sentinelRedisInstance *master, uint64_t req_epoch, char *req_runid, uint64_t *leader_epoch);
void sentinelFlushConfig(void);
void sentinelGenerateInitialMonitorEvents(void);
@@ -455,6 +460,8 @@ void sentinelInfoCommand(client *c);
void sentinelSetCommand(client *c);
void sentinelPublishCommand(client *c);
void sentinelRoleCommand(client *c);
+void sentinelConfigGetCommand(client *c);
+void sentinelConfigSetCommand(client *c);
struct redisCommand sentinelcmds[] = {
{"ping",pingCommand,1,"fast @connection",0,NULL,0,0,0,0,0},
@@ -474,6 +481,20 @@ struct redisCommand sentinelcmds[] = {
{"command",commandCommand,-1, "random @connection", 0,NULL,0,0,0,0,0,0}
};
+/* this array is used for sentinel config lookup, which need to be loaded
+ * before monitoring masters config to avoid dependency issues */
+const char *preMonitorCfgName[] = {
+ "announce-ip",
+ "announce-port",
+ "deny-scripts-reconfig",
+ "sentinel-user",
+ "sentinel-pass",
+ "current-epoch",
+ "myid",
+ "resolve-hostnames",
+ "announce-hostnames"
+};
+
/* This function overwrites a few normal Redis config default with Sentinel
* specific defaults. */
void initSentinelConfig(void) {
@@ -481,6 +502,8 @@ void initSentinelConfig(void) {
server.protected_mode = 0; /* Sentinel must be exposed. */
}
+void freeSentinelLoadQueueEntry(void *item);
+
/* Perform the Sentinel mode initialization. */
void initSentinel(void) {
unsigned int j;
@@ -519,7 +542,10 @@ void initSentinel(void) {
sentinel.deny_scripts_reconfig = SENTINEL_DEFAULT_DENY_SCRIPTS_RECONFIG;
sentinel.sentinel_auth_pass = NULL;
sentinel.sentinel_auth_user = NULL;
+ sentinel.resolve_hostnames = SENTINEL_DEFAULT_RESOLVE_HOSTNAMES;
+ sentinel.announce_hostnames = SENTINEL_DEFAULT_ANNOUNCE_HOSTNAMES;
memset(sentinel.myid,0,sizeof(sentinel.myid));
+ server.sentinel_config = NULL;
}
/* This function gets called when the server is in Sentinel mode, started,
@@ -573,11 +599,13 @@ sentinelAddr *createSentinelAddr(char *hostname, int port) {
errno = EINVAL;
return NULL;
}
- if (anetResolve(NULL,hostname,ip,sizeof(ip)) == ANET_ERR) {
+ if (anetResolve(NULL,hostname,ip,sizeof(ip),
+ sentinel.resolve_hostnames ? ANET_NONE : ANET_IP_ONLY) == ANET_ERR) {
errno = ENOENT;
return NULL;
}
sa = zmalloc(sizeof(*sa));
+ sa->hostname = sdsnew(hostname);
sa->ip = sdsnew(ip);
sa->port = port;
return sa;
@@ -588,6 +616,7 @@ sentinelAddr *dupSentinelAddr(sentinelAddr *src) {
sentinelAddr *sa;
sa = zmalloc(sizeof(*sa));
+ sa->hostname = sdsnew(src->hostname);
sa->ip = sdsnew(src->ip);
sa->port = src->port;
return sa;
@@ -595,6 +624,7 @@ sentinelAddr *dupSentinelAddr(sentinelAddr *src) {
/* Free a Sentinel address. Can't fail. */
void releaseSentinelAddr(sentinelAddr *sa) {
+ sdsfree(sa->hostname);
sdsfree(sa->ip);
zfree(sa);
}
@@ -604,6 +634,21 @@ int sentinelAddrIsEqual(sentinelAddr *a, sentinelAddr *b) {
return a->port == b->port && !strcasecmp(a->ip,b->ip);
}
+/* Return non-zero if a hostname matches an address. */
+int sentinelAddrEqualsHostname(sentinelAddr *a, char *hostname) {
+ char ip[NET_IP_STR_LEN];
+
+ /* We always resolve the hostname and compare it to the address */
+ if (anetResolve(NULL, hostname, ip, sizeof(ip),
+ sentinel.resolve_hostnames ? ANET_NONE : ANET_IP_ONLY) == ANET_ERR)
+ return 0;
+ return !strcasecmp(a->ip, ip);
+}
+
+const char *announceSentinelAddr(const sentinelAddr *a) {
+ return sentinel.announce_hostnames ? a->hostname : a->ip;
+}
+
/* =========================== Events notification ========================== */
/* Send an event to log, pub/sub, user notification script.
@@ -644,12 +689,12 @@ void sentinelEvent(int level, char *type, sentinelRedisInstance *ri,
if (master) {
snprintf(msg, sizeof(msg), "%s %s %s %d @ %s %s %d",
sentinelRedisInstanceTypeStr(ri),
- ri->name, ri->addr->ip, ri->addr->port,
- master->name, master->addr->ip, master->addr->port);
+ ri->name, announceSentinelAddr(ri->addr), ri->addr->port,
+ master->name, announceSentinelAddr(master->addr), master->addr->port);
} else {
snprintf(msg, sizeof(msg), "%s %s %s %d",
sentinelRedisInstanceTypeStr(ri),
- ri->name, ri->addr->ip, ri->addr->port);
+ ri->name, announceSentinelAddr(ri->addr), ri->addr->port);
}
fmt += 2;
} else {
@@ -971,7 +1016,8 @@ void sentinelCallClientReconfScript(sentinelRedisInstance *master, int role, cha
sentinelScheduleScriptExecution(master->client_reconfig_script,
master->name,
(role == SENTINEL_LEADER) ? "leader" : "observer",
- state, from->ip, fromport, to->ip, toport, NULL);
+ state, announceSentinelAddr(from), fromport,
+ announceSentinelAddr(to), toport, NULL);
}
/* =============================== instanceLink ============================= */
@@ -1097,6 +1143,35 @@ int sentinelTryConnectionSharing(sentinelRedisInstance *ri) {
return C_ERR;
}
+/* Drop all connections to other sentinels. Returns the number of connections
+ * dropped.*/
+int sentinelDropConnections(void) {
+ dictIterator *di;
+ dictEntry *de;
+ int dropped = 0;
+
+ di = dictGetIterator(sentinel.masters);
+ while ((de = dictNext(di)) != NULL) {
+ dictIterator *sdi;
+ dictEntry *sde;
+
+ sentinelRedisInstance *ri = dictGetVal(de);
+ sdi = dictGetIterator(ri->sentinels);
+ while ((sde = dictNext(sdi)) != NULL) {
+ sentinelRedisInstance *si = dictGetVal(sde);
+ if (!si->link->disconnected) {
+ instanceLinkCloseConnection(si->link, si->link->pc);
+ instanceLinkCloseConnection(si->link, si->link->cc);
+ dropped++;
+ }
+ }
+ dictReleaseIterator(sdi);
+ }
+ dictReleaseIterator(di);
+
+ return dropped;
+}
+
/* When we detect a Sentinel to switch address (reporting a different IP/port
* pair in Hello messages), let's update all the matching Sentinels in the
* context of other masters as well and disconnect the links, so that everybody
@@ -1209,7 +1284,7 @@ sentinelRedisInstance *createSentinelRedisInstance(char *name, int flags, char *
/* For slaves use ip:port as name. */
if (flags & SRI_SLAVE) {
- anetFormatAddr(slavename, sizeof(slavename), hostname, port);
+ anetFormatAddr(slavename, sizeof(slavename), addr->ip, port);
name = slavename;
}
@@ -1320,14 +1395,25 @@ void releaseSentinelRedisInstance(sentinelRedisInstance *ri) {
/* Lookup a slave in a master Redis instance, by ip and port. */
sentinelRedisInstance *sentinelRedisInstanceLookupSlave(
- sentinelRedisInstance *ri, char *ip, int port)
+ sentinelRedisInstance *ri, char *slave_addr, int port)
{
sds key;
sentinelRedisInstance *slave;
char buf[NET_ADDR_STR_LEN];
+ sentinelAddr *addr;
serverAssert(ri->flags & SRI_MASTER);
- anetFormatAddr(buf,sizeof(buf),ip,port);
+
+ /* We need to handle a slave_addr that is potentially a hostname.
+ * If that is the case, depending on configuration we either resolve
+ * it and use the IP addres or fail.
+ */
+ addr = createSentinelAddr(slave_addr, port);
+ if (!addr) return NULL;
+
+ anetFormatAddr(buf,sizeof(buf),addr->ip,addr->port);
+ releaseSentinelAddr(addr);
+
key = sdsnew(buf);
slave = dictFetchValue(ri->slaves,key);
sdsfree(key);
@@ -1377,21 +1463,27 @@ int removeMatchingSentinelFromMaster(sentinelRedisInstance *master, char *runid)
* of instances. Return NULL if not found, otherwise return the instance
* pointer.
*
- * runid or ip can be NULL. In such a case the search is performed only
+ * runid or addr can be NULL. In such a case the search is performed only
* by the non-NULL field. */
-sentinelRedisInstance *getSentinelRedisInstanceByAddrAndRunID(dict *instances, char *ip, int port, char *runid) {
+sentinelRedisInstance *getSentinelRedisInstanceByAddrAndRunID(dict *instances, char *addr, int port, char *runid) {
dictIterator *di;
dictEntry *de;
sentinelRedisInstance *instance = NULL;
+ sentinelAddr *ri_addr = NULL;
- serverAssert(ip || runid); /* User must pass at least one search param. */
+ serverAssert(addr || runid); /* User must pass at least one search param. */
+ if (addr != NULL) {
+ /* Resolve addr, we use the IP as a key even if a hostname is used */
+ ri_addr = createSentinelAddr(addr, port);
+ if (!ri_addr) return NULL;
+ }
di = dictGetIterator(instances);
while((de = dictNext(di)) != NULL) {
sentinelRedisInstance *ri = dictGetVal(de);
if (runid && !ri->runid) continue;
if ((runid == NULL || strcmp(ri->runid, runid) == 0) &&
- (ip == NULL || (strcmp(ri->addr->ip, ip) == 0 &&
+ (addr == NULL || (strcmp(ri->addr->ip, ri_addr->ip) == 0 &&
ri->addr->port == port)))
{
instance = ri;
@@ -1399,6 +1491,9 @@ sentinelRedisInstance *getSentinelRedisInstanceByAddrAndRunID(dict *instances, c
}
}
dictReleaseIterator(di);
+ if (ri_addr != NULL)
+ releaseSentinelAddr(ri_addr);
+
return instance;
}
@@ -1513,26 +1608,28 @@ int sentinelResetMastersByPattern(char *pattern, int flags) {
*
* The function returns C_ERR if the address can't be resolved for some
* reason. Otherwise C_OK is returned. */
-int sentinelResetMasterAndChangeAddress(sentinelRedisInstance *master, char *ip, int port) {
+int sentinelResetMasterAndChangeAddress(sentinelRedisInstance *master, char *hostname, int port) {
sentinelAddr *oldaddr, *newaddr;
sentinelAddr **slaves = NULL;
int numslaves = 0, j;
dictIterator *di;
dictEntry *de;
- newaddr = createSentinelAddr(ip,port);
+ newaddr = createSentinelAddr(hostname,port);
if (newaddr == NULL) return C_ERR;
- /* Make a list of slaves to add back after the reset.
- * Don't include the one having the address we are switching to. */
+ /* There can be only 0 or 1 slave that has the newaddr.
+ * and It can add old master 1 more slave.
+ * so It allocates dictSize(master->slaves) + 1 */
+ slaves = zmalloc(sizeof(sentinelAddr*)*(dictSize(master->slaves) + 1));
+
+ /* Don't include the one having the address we are switching to. */
di = dictGetIterator(master->slaves);
while((de = dictNext(di)) != NULL) {
sentinelRedisInstance *slave = dictGetVal(de);
if (sentinelAddrIsEqual(slave->addr,newaddr)) continue;
- slaves = zrealloc(slaves,sizeof(sentinelAddr*)*(numslaves+1));
- slaves[numslaves++] = createSentinelAddr(slave->addr->ip,
- slave->addr->port);
+ slaves[numslaves++] = dupSentinelAddr(slave->addr);
}
dictReleaseIterator(di);
@@ -1540,9 +1637,7 @@ int sentinelResetMasterAndChangeAddress(sentinelRedisInstance *master, char *ip,
* as a slave as well, so that we'll be able to sense / reconfigure
* the old master. */
if (!sentinelAddrIsEqual(newaddr,master->addr)) {
- slaves = zrealloc(slaves,sizeof(sentinelAddr*)*(numslaves+1));
- slaves[numslaves++] = createSentinelAddr(master->addr->ip,
- master->addr->port);
+ slaves[numslaves++] = dupSentinelAddr(master->addr);
}
/* Reset and switch address. */
@@ -1556,7 +1651,7 @@ int sentinelResetMasterAndChangeAddress(sentinelRedisInstance *master, char *ip,
for (j = 0; j < numslaves; j++) {
sentinelRedisInstance *slave;
- slave = createSentinelRedisInstance(NULL,SRI_SLAVE,slaves[j]->ip,
+ slave = createSentinelRedisInstance(NULL,SRI_SLAVE,slaves[j]->hostname,
slaves[j]->port, master->quorum, master);
releaseSentinelAddr(slaves[j]);
if (slave) sentinelEvent(LL_NOTICE,"+slave",slave,"%@");
@@ -1640,7 +1735,164 @@ char *sentinelInstanceMapCommand(sentinelRedisInstance *ri, char *command) {
}
/* ============================ Config handling ============================= */
-char *sentinelHandleConfiguration(char **argv, int argc) {
+
+/* Generalise handling create instance error. Use SRI_MASTER, SRI_SLAVE or
+ * SRI_SENTINEL as a role value. */
+const char *sentinelCheckCreateInstanceErrors(int role) {
+ switch(errno) {
+ case EBUSY:
+ switch (role) {
+ case SRI_MASTER:
+ return "Duplicate master name.";
+ case SRI_SLAVE:
+ return "Duplicate hostname and port for replica.";
+ case SRI_SENTINEL:
+ return "Duplicate runid for sentinel.";
+ default:
+ serverAssert(0);
+ break;
+ }
+ break;
+ case ENOENT:
+ return "Can't resolve instance hostname.";
+ case EINVAL:
+ return "Invalid port number.";
+ default:
+ return "Unknown Error for creating instances.";
+ }
+}
+
+/* init function for server.sentinel_config */
+void initializeSentinelConfig() {
+ server.sentinel_config = zmalloc(sizeof(struct sentinelConfig));
+ server.sentinel_config->monitor_cfg = listCreate();
+ server.sentinel_config->pre_monitor_cfg = listCreate();
+ server.sentinel_config->post_monitor_cfg = listCreate();
+ listSetFreeMethod(server.sentinel_config->monitor_cfg,freeSentinelLoadQueueEntry);
+ listSetFreeMethod(server.sentinel_config->pre_monitor_cfg,freeSentinelLoadQueueEntry);
+ listSetFreeMethod(server.sentinel_config->post_monitor_cfg,freeSentinelLoadQueueEntry);
+}
+
+/* destroy function for server.sentinel_config */
+void freeSentinelConfig() {
+ /* release these three config queues since we will not use it anymore */
+ listRelease(server.sentinel_config->pre_monitor_cfg);
+ listRelease(server.sentinel_config->monitor_cfg);
+ listRelease(server.sentinel_config->post_monitor_cfg);
+ zfree(server.sentinel_config);
+ server.sentinel_config = NULL;
+}
+
+/* Search config name in pre monitor config name array, return 1 if found,
+ * 0 if not found. */
+int searchPreMonitorCfgName(const char *name) {
+ for (unsigned int i = 0; i < sizeof(preMonitorCfgName)/sizeof(preMonitorCfgName[0]); i++) {
+ if (!strcasecmp(preMonitorCfgName[i],name)) return 1;
+ }
+ return 0;
+}
+
+/* free method for sentinelLoadQueueEntry when release the list */
+void freeSentinelLoadQueueEntry(void *item) {
+ struct sentinelLoadQueueEntry *entry = item;
+ sdsfreesplitres(entry->argv,entry->argc);
+ sdsfree(entry->line);
+ zfree(entry);
+}
+
+/* This function is used for queuing sentinel configuration, the main
+ * purpose of this function is to delay parsing the sentinel config option
+ * in order to avoid the order dependent issue from the config. */
+void queueSentinelConfig(sds *argv, int argc, int linenum, sds line) {
+ int i;
+ struct sentinelLoadQueueEntry *entry;
+
+ /* initialize sentinel_config for the first call */
+ if (server.sentinel_config == NULL) initializeSentinelConfig();
+
+ entry = zmalloc(sizeof(struct sentinelLoadQueueEntry));
+ entry->argv = zmalloc(sizeof(char*)*argc);
+ entry->argc = argc;
+ entry->linenum = linenum;
+ entry->line = sdsdup(line);
+ for (i = 0; i < argc; i++) {
+ entry->argv[i] = sdsdup(argv[i]);
+ }
+ /* Separate config lines with pre monitor config, monitor config and
+ * post monitor config, in order to parsing config dependencies
+ * correctly. */
+ if (!strcasecmp(argv[0],"monitor")) {
+ listAddNodeTail(server.sentinel_config->monitor_cfg,entry);
+ } else if (searchPreMonitorCfgName(argv[0])) {
+ listAddNodeTail(server.sentinel_config->pre_monitor_cfg,entry);
+ } else{
+ listAddNodeTail(server.sentinel_config->post_monitor_cfg,entry);
+ }
+}
+
+/* This function is used for loading the sentinel configuration from
+ * pre_monitor_cfg, monitor_cfg and post_monitor_cfg list */
+void loadSentinelConfigFromQueue(void) {
+ const char *err = NULL;
+ listIter li;
+ listNode *ln;
+ int linenum = 0;
+ sds line = NULL;
+
+ /* if there is no sentinel_config entry, we can return immediately */
+ if (server.sentinel_config == NULL) return;
+
+ /* loading from pre monitor config queue first to avoid dependency issues */
+ listRewind(server.sentinel_config->pre_monitor_cfg,&li);
+ while((ln = listNext(&li))) {
+ struct sentinelLoadQueueEntry *entry = ln->value;
+ err = sentinelHandleConfiguration(entry->argv,entry->argc);
+ if (err) {
+ linenum = entry->linenum;
+ line = entry->line;
+ goto loaderr;
+ }
+ }
+
+ /* loading from monitor config queue */
+ listRewind(server.sentinel_config->monitor_cfg,&li);
+ while((ln = listNext(&li))) {
+ struct sentinelLoadQueueEntry *entry = ln->value;
+ err = sentinelHandleConfiguration(entry->argv,entry->argc);
+ if (err) {
+ linenum = entry->linenum;
+ line = entry->line;
+ goto loaderr;
+ }
+ }
+
+ /* loading from the post monitor config queue */
+ listRewind(server.sentinel_config->post_monitor_cfg,&li);
+ while((ln = listNext(&li))) {
+ struct sentinelLoadQueueEntry *entry = ln->value;
+ err = sentinelHandleConfiguration(entry->argv,entry->argc);
+ if (err) {
+ linenum = entry->linenum;
+ line = entry->line;
+ goto loaderr;
+ }
+ }
+
+ /* free sentinel_config when config loading is finished */
+ freeSentinelConfig();
+ return;
+
+loaderr:
+ fprintf(stderr, "\n*** FATAL CONFIG FILE ERROR (Redis %s) ***\n",
+ REDIS_VERSION);
+ fprintf(stderr, "Reading the configuration file, at line %d\n", linenum);
+ fprintf(stderr, ">>> '%s'\n", line);
+ fprintf(stderr, "%s\n", err);
+ exit(1);
+}
+
+const char *sentinelHandleConfiguration(char **argv, int argc) {
+
sentinelRedisInstance *ri;
if (!strcasecmp(argv[0],"monitor") && argc == 5) {
@@ -1651,11 +1903,7 @@ char *sentinelHandleConfiguration(char **argv, int argc) {
if (createSentinelRedisInstance(argv[1],SRI_MASTER,argv[2],
atoi(argv[3]),quorum,NULL) == NULL)
{
- switch(errno) {
- case EBUSY: return "Duplicated master name.";
- case ENOENT: return "Can't resolve master instance hostname.";
- case EINVAL: return "Invalid port number";
- }
+ return sentinelCheckCreateInstanceErrors(SRI_MASTER);
}
} else if (!strcasecmp(argv[0],"down-after-milliseconds") && argc == 3) {
/* down-after-milliseconds <name> <milliseconds> */
@@ -1737,7 +1985,7 @@ char *sentinelHandleConfiguration(char **argv, int argc) {
if ((slave = createSentinelRedisInstance(NULL,SRI_SLAVE,argv[2],
atoi(argv[3]), ri->quorum, ri)) == NULL)
{
- return "Wrong hostname or port for replica.";
+ return sentinelCheckCreateInstanceErrors(SRI_SLAVE);
}
} else if (!strcasecmp(argv[0],"known-sentinel") &&
(argc == 4 || argc == 5)) {
@@ -1750,7 +1998,7 @@ char *sentinelHandleConfiguration(char **argv, int argc) {
if ((si = createSentinelRedisInstance(argv[4],SRI_SENTINEL,argv[2],
atoi(argv[3]), ri->quorum, ri)) == NULL)
{
- return "Wrong hostname or port for sentinel.";
+ return sentinelCheckCreateInstanceErrors(SRI_SENTINEL);
}
si->runid = sdsnew(argv[4]);
sentinelTryConnectionSharing(si);
@@ -1787,6 +2035,16 @@ char *sentinelHandleConfiguration(char **argv, int argc) {
/* sentinel-pass <password> */
if (strlen(argv[1]))
sentinel.sentinel_auth_pass = sdsnew(argv[1]);
+ } else if (!strcasecmp(argv[0],"resolve-hostnames") && argc == 2) {
+ /* resolve-hostnames <yes|no> */
+ if ((sentinel.resolve_hostnames = yesnotoi(argv[1])) == -1) {
+ return "Please specify yes or not for the resolve-hostnames option.";
+ }
+ } else if (!strcasecmp(argv[0],"announce-hostnames") && argc == 2) {
+ /* announce-hostnames <yes|no> */
+ if ((sentinel.announce_hostnames = yesnotoi(argv[1])) == -1) {
+ return "Please specify yes or not for the announce-hostnames option.";
+ }
} else {
return "Unrecognized sentinel configuration statement.";
}
@@ -1805,14 +2063,29 @@ void rewriteConfigSentinelOption(struct rewriteConfigState *state) {
/* sentinel unique ID. */
line = sdscatprintf(sdsempty(), "sentinel myid %s", sentinel.myid);
- rewriteConfigRewriteLine(state,"sentinel",line,1);
+ rewriteConfigRewriteLine(state,"sentinel myid",line,1);
/* sentinel deny-scripts-reconfig. */
line = sdscatprintf(sdsempty(), "sentinel deny-scripts-reconfig %s",
sentinel.deny_scripts_reconfig ? "yes" : "no");
- rewriteConfigRewriteLine(state,"sentinel",line,
+ rewriteConfigRewriteLine(state,"sentinel deny-scripts-reconfig",line,
sentinel.deny_scripts_reconfig != SENTINEL_DEFAULT_DENY_SCRIPTS_RECONFIG);
+ /* sentinel resolve-hostnames.
+ * This must be included early in the file so it is already in effect
+ * when reading the file.
+ */
+ line = sdscatprintf(sdsempty(), "sentinel resolve-hostnames %s",
+ sentinel.resolve_hostnames ? "yes" : "no");
+ rewriteConfigRewriteLine(state,"sentinel",line,
+ sentinel.resolve_hostnames != SENTINEL_DEFAULT_RESOLVE_HOSTNAMES);
+
+ /* sentinel announce-hostnames. */
+ line = sdscatprintf(sdsempty(), "sentinel announce-hostnames %s",
+ sentinel.announce_hostnames ? "yes" : "no");
+ rewriteConfigRewriteLine(state,"sentinel",line,
+ sentinel.announce_hostnames != SENTINEL_DEFAULT_ANNOUNCE_HOSTNAMES);
+
/* For every master emit a "sentinel monitor" config entry. */
di = dictGetIterator(sentinel.masters);
while((de = dictNext(di)) != NULL) {
@@ -1823,16 +2096,18 @@ void rewriteConfigSentinelOption(struct rewriteConfigState *state) {
master = dictGetVal(de);
master_addr = sentinelGetCurrentMasterAddress(master);
line = sdscatprintf(sdsempty(),"sentinel monitor %s %s %d %d",
- master->name, master_addr->ip, master_addr->port,
+ master->name, announceSentinelAddr(master_addr), master_addr->port,
master->quorum);
- rewriteConfigRewriteLine(state,"sentinel",line,1);
+ rewriteConfigRewriteLine(state,"sentinel monitor",line,1);
+ /* rewriteConfigMarkAsProcessed is handled after the loop */
/* sentinel down-after-milliseconds */
if (master->down_after_period != SENTINEL_DEFAULT_DOWN_AFTER) {
line = sdscatprintf(sdsempty(),
"sentinel down-after-milliseconds %s %ld",
master->name, (long) master->down_after_period);
- rewriteConfigRewriteLine(state,"sentinel",line,1);
+ rewriteConfigRewriteLine(state,"sentinel down-after-milliseconds",line,1);
+ /* rewriteConfigMarkAsProcessed is handled after the loop */
}
/* sentinel failover-timeout */
@@ -1840,7 +2115,9 @@ void rewriteConfigSentinelOption(struct rewriteConfigState *state) {
line = sdscatprintf(sdsempty(),
"sentinel failover-timeout %s %ld",
master->name, (long) master->failover_timeout);
- rewriteConfigRewriteLine(state,"sentinel",line,1);
+ rewriteConfigRewriteLine(state,"sentinel failover-timeout",line,1);
+ /* rewriteConfigMarkAsProcessed is handled after the loop */
+
}
/* sentinel parallel-syncs */
@@ -1848,7 +2125,8 @@ void rewriteConfigSentinelOption(struct rewriteConfigState *state) {
line = sdscatprintf(sdsempty(),
"sentinel parallel-syncs %s %d",
master->name, master->parallel_syncs);
- rewriteConfigRewriteLine(state,"sentinel",line,1);
+ rewriteConfigRewriteLine(state,"sentinel parallel-syncs",line,1);
+ /* rewriteConfigMarkAsProcessed is handled after the loop */
}
/* sentinel notification-script */
@@ -1856,7 +2134,8 @@ void rewriteConfigSentinelOption(struct rewriteConfigState *state) {
line = sdscatprintf(sdsempty(),
"sentinel notification-script %s %s",
master->name, master->notification_script);
- rewriteConfigRewriteLine(state,"sentinel",line,1);
+ rewriteConfigRewriteLine(state,"sentinel notification-script",line,1);
+ /* rewriteConfigMarkAsProcessed is handled after the loop */
}
/* sentinel client-reconfig-script */
@@ -1864,7 +2143,8 @@ void rewriteConfigSentinelOption(struct rewriteConfigState *state) {
line = sdscatprintf(sdsempty(),
"sentinel client-reconfig-script %s %s",
master->name, master->client_reconfig_script);
- rewriteConfigRewriteLine(state,"sentinel",line,1);
+ rewriteConfigRewriteLine(state,"sentinel client-reconfig-script",line,1);
+ /* rewriteConfigMarkAsProcessed is handled after the loop */
}
/* sentinel auth-pass & auth-user */
@@ -1872,27 +2152,32 @@ void rewriteConfigSentinelOption(struct rewriteConfigState *state) {
line = sdscatprintf(sdsempty(),
"sentinel auth-pass %s %s",
master->name, master->auth_pass);
- rewriteConfigRewriteLine(state,"sentinel",line,1);
+ rewriteConfigRewriteLine(state,"sentinel auth-pass",line,1);
+ /* rewriteConfigMarkAsProcessed is handled after the loop */
}
if (master->auth_user) {
line = sdscatprintf(sdsempty(),
"sentinel auth-user %s %s",
master->name, master->auth_user);
- rewriteConfigRewriteLine(state,"sentinel",line,1);
+ rewriteConfigRewriteLine(state,"sentinel auth-user",line,1);
+ /* rewriteConfigMarkAsProcessed is handled after the loop */
}
/* sentinel config-epoch */
line = sdscatprintf(sdsempty(),
"sentinel config-epoch %s %llu",
master->name, (unsigned long long) master->config_epoch);
- rewriteConfigRewriteLine(state,"sentinel",line,1);
+ rewriteConfigRewriteLine(state,"sentinel config-epoch",line,1);
+ /* rewriteConfigMarkAsProcessed is handled after the loop */
+
/* sentinel leader-epoch */
line = sdscatprintf(sdsempty(),
"sentinel leader-epoch %s %llu",
master->name, (unsigned long long) master->leader_epoch);
- rewriteConfigRewriteLine(state,"sentinel",line,1);
+ rewriteConfigRewriteLine(state,"sentinel leader-epoch",line,1);
+ /* rewriteConfigMarkAsProcessed is handled after the loop */
/* sentinel known-slave */
di2 = dictGetIterator(master->slaves);
@@ -1911,8 +2196,9 @@ void rewriteConfigSentinelOption(struct rewriteConfigState *state) {
slave_addr = master->addr;
line = sdscatprintf(sdsempty(),
"sentinel known-replica %s %s %d",
- master->name, slave_addr->ip, slave_addr->port);
- rewriteConfigRewriteLine(state,"sentinel",line,1);
+ master->name, announceSentinelAddr(slave_addr), slave_addr->port);
+ rewriteConfigRewriteLine(state,"sentinel known-replica",line,1);
+ /* rewriteConfigMarkAsProcessed is handled after the loop */
}
dictReleaseIterator(di2);
@@ -1923,8 +2209,9 @@ void rewriteConfigSentinelOption(struct rewriteConfigState *state) {
if (ri->runid == NULL) continue;
line = sdscatprintf(sdsempty(),
"sentinel known-sentinel %s %s %d %s",
- master->name, ri->addr->ip, ri->addr->port, ri->runid);
- rewriteConfigRewriteLine(state,"sentinel",line,1);
+ master->name, announceSentinelAddr(ri->addr), ri->addr->port, ri->runid);
+ rewriteConfigRewriteLine(state,"sentinel known-sentinel",line,1);
+ /* rewriteConfigMarkAsProcessed is handled after the loop */
}
dictReleaseIterator(di2);
@@ -1936,7 +2223,8 @@ void rewriteConfigSentinelOption(struct rewriteConfigState *state) {
line = sdscatprintf(sdsempty(),
"sentinel rename-command %s %s %s",
master->name, oldname, newname);
- rewriteConfigRewriteLine(state,"sentinel",line,1);
+ rewriteConfigRewriteLine(state,"sentinel rename-command",line,1);
+ /* rewriteConfigMarkAsProcessed is handled after the loop */
}
dictReleaseIterator(di2);
}
@@ -1944,36 +2232,62 @@ void rewriteConfigSentinelOption(struct rewriteConfigState *state) {
/* sentinel current-epoch is a global state valid for all the masters. */
line = sdscatprintf(sdsempty(),
"sentinel current-epoch %llu", (unsigned long long) sentinel.current_epoch);
- rewriteConfigRewriteLine(state,"sentinel",line,1);
+ rewriteConfigRewriteLine(state,"sentinel current-epoch",line,1);
/* sentinel announce-ip. */
if (sentinel.announce_ip) {
line = sdsnew("sentinel announce-ip ");
line = sdscatrepr(line, sentinel.announce_ip, sdslen(sentinel.announce_ip));
- rewriteConfigRewriteLine(state,"sentinel",line,1);
+ rewriteConfigRewriteLine(state,"sentinel announce-ip",line,1);
+ } else {
+ rewriteConfigMarkAsProcessed(state,"sentinel announce-ip");
}
/* sentinel announce-port. */
if (sentinel.announce_port) {
line = sdscatprintf(sdsempty(),"sentinel announce-port %d",
sentinel.announce_port);
- rewriteConfigRewriteLine(state,"sentinel",line,1);
+ rewriteConfigRewriteLine(state,"sentinel announce-port",line,1);
+ } else {
+ rewriteConfigMarkAsProcessed(state,"sentinel announce-port");
}
/* sentinel sentinel-user. */
if (sentinel.sentinel_auth_user) {
line = sdscatprintf(sdsempty(), "sentinel sentinel-user %s", sentinel.sentinel_auth_user);
- rewriteConfigRewriteLine(state,"sentinel",line,1);
+ rewriteConfigRewriteLine(state,"sentinel sentinel-user",line,1);
+ } else {
+ rewriteConfigMarkAsProcessed(state,"sentinel sentinel-user");
}
/* sentinel sentinel-pass. */
if (sentinel.sentinel_auth_pass) {
line = sdscatprintf(sdsempty(), "sentinel sentinel-pass %s", sentinel.sentinel_auth_pass);
- rewriteConfigRewriteLine(state,"sentinel",line,1);
+ rewriteConfigRewriteLine(state,"sentinel sentinel-pass",line,1);
+ } else {
+ rewriteConfigMarkAsProcessed(state,"sentinel sentinel-pass");
}
-
dictReleaseIterator(di);
+
+ /* NOTE: the purpose here is in case due to the state change, the config rewrite
+ does not handle the configs, however, previously the config was set in the config file,
+ rewriteConfigMarkAsProcessed should be put here to mark it as processed in order to
+ delete the old config entry.
+ */
+ rewriteConfigMarkAsProcessed(state,"sentinel monitor");
+ rewriteConfigMarkAsProcessed(state,"sentinel down-after-milliseconds");
+ rewriteConfigMarkAsProcessed(state,"sentinel failover-timeout");
+ rewriteConfigMarkAsProcessed(state,"sentinel parallel-syncs");
+ rewriteConfigMarkAsProcessed(state,"sentinel notification-script");
+ rewriteConfigMarkAsProcessed(state,"sentinel client-reconfig-script");
+ rewriteConfigMarkAsProcessed(state,"sentinel auth-pass");
+ rewriteConfigMarkAsProcessed(state,"sentinel auth-user");
+ rewriteConfigMarkAsProcessed(state,"sentinel config-epoch");
+ rewriteConfigMarkAsProcessed(state,"sentinel leader-epoch");
+ rewriteConfigMarkAsProcessed(state,"sentinel known-replica");
+ rewriteConfigMarkAsProcessed(state,"sentinel known-sentinel");
+ rewriteConfigMarkAsProcessed(state,"sentinel rename-command");
}
/* This function uses the config rewriting Redis engine in order to persist
@@ -2029,7 +2343,7 @@ void sentinelSendAuthIfNeeded(sentinelRedisInstance *ri, redisAsyncContext *c) {
auth_user = ri->master->auth_user;
} else if (ri->flags & SRI_SENTINEL) {
/* If sentinel_auth_user is NULL, AUTH will use default user
- with sentinel_auth_pass to autenticate */
+ with sentinel_auth_pass to authenticate */
if (sentinel.sentinel_auth_pass) {
auth_pass = sentinel.sentinel_auth_pass;
auth_user = sentinel.sentinel_auth_user;
@@ -2101,6 +2415,7 @@ void sentinelReconnectInstance(sentinelRedisInstance *ri) {
/* Commands connection. */
if (link->cc == NULL) {
link->cc = redisAsyncConnectBind(ri->addr->ip,ri->addr->port,NET_FIRST_BIND_ADDR);
+ if (!link->cc->err) anetCloexec(link->cc->c.fd);
if (!link->cc->err && server.tls_replication &&
(instanceLinkNegotiateTLS(link->cc) == C_ERR)) {
sentinelEvent(LL_DEBUG,"-cmd-link-reconnection",ri,"%@ #Failed to initialize TLS");
@@ -2128,6 +2443,7 @@ void sentinelReconnectInstance(sentinelRedisInstance *ri) {
/* Pub / Sub */
if ((ri->flags & (SRI_MASTER|SRI_SLAVE)) && link->pc == NULL) {
link->pc = redisAsyncConnectBind(ri->addr->ip,ri->addr->port,NET_FIRST_BIND_ADDR);
+ if (!link->pc->err) anetCloexec(link->pc->c.fd);
if (!link->pc->err && server.tls_replication &&
(instanceLinkNegotiateTLS(link->pc) == C_ERR)) {
sentinelEvent(LL_DEBUG,"-pubsub-link-reconnection",ri,"%@ #Failed to initialize TLS");
@@ -2137,7 +2453,6 @@ void sentinelReconnectInstance(sentinelRedisInstance *ri) {
instanceLinkCloseConnection(link,link->pc);
} else {
int retval;
-
link->pc_conn_time = mstime();
link->pc->data = link;
redisAeAttach(server.el,link->pc);
@@ -2375,9 +2690,7 @@ void sentinelRefreshInstanceInfo(sentinelRedisInstance *ri, const char *info) {
sentinelRedisInstanceNoDownFor(ri,wait_time) &&
mstime() - ri->role_reported_time > wait_time)
{
- int retval = sentinelSendSlaveOf(ri,
- ri->master->addr->ip,
- ri->master->addr->port);
+ int retval = sentinelSendSlaveOf(ri,ri->master->addr);
if (retval == C_OK)
sentinelEvent(LL_NOTICE,"+convert-to-slave",ri,"%@");
}
@@ -2388,7 +2701,7 @@ void sentinelRefreshInstanceInfo(sentinelRedisInstance *ri, const char *info) {
if ((ri->flags & SRI_SLAVE) &&
role == SRI_SLAVE &&
(ri->slave_master_port != ri->master->addr->port ||
- strcasecmp(ri->slave_master_host,ri->master->addr->ip)))
+ !sentinelAddrEqualsHostname(ri->master->addr, ri->slave_master_host)))
{
mstime_t wait_time = ri->master->failover_timeout;
@@ -2398,9 +2711,7 @@ void sentinelRefreshInstanceInfo(sentinelRedisInstance *ri, const char *info) {
sentinelRedisInstanceNoDownFor(ri,wait_time) &&
mstime() - ri->slave_conf_change_time > wait_time)
{
- int retval = sentinelSendSlaveOf(ri,
- ri->master->addr->ip,
- ri->master->addr->port);
+ int retval = sentinelSendSlaveOf(ri,ri->master->addr);
if (retval == C_OK)
sentinelEvent(LL_NOTICE,"+fix-slave-config",ri,"%@");
}
@@ -2414,8 +2725,8 @@ void sentinelRefreshInstanceInfo(sentinelRedisInstance *ri, const char *info) {
/* SRI_RECONF_SENT -> SRI_RECONF_INPROG. */
if ((ri->flags & SRI_RECONF_SENT) &&
ri->slave_master_host &&
- strcmp(ri->slave_master_host,
- ri->master->promoted_slave->addr->ip) == 0 &&
+ sentinelAddrEqualsHostname(ri->master->promoted_slave->addr,
+ ri->slave_master_host) &&
ri->slave_master_port == ri->master->promoted_slave->addr->port)
{
ri->flags &= ~SRI_RECONF_SENT;
@@ -2592,7 +2903,7 @@ void sentinelProcessHelloMessage(char *hello, int hello_len) {
if (si && master->config_epoch < master_config_epoch) {
master->config_epoch = master_config_epoch;
if (master_port != master->addr->port ||
- strcmp(master->addr->ip, token[5]))
+ !sentinelAddrEqualsHostname(master->addr, token[5]))
{
sentinelAddr *old_addr;
@@ -2600,7 +2911,7 @@ void sentinelProcessHelloMessage(char *hello, int hello_len) {
sentinelEvent(LL_WARNING,"+switch-master",
master,"%s %s %d %s %d",
master->name,
- master->addr->ip, master->addr->port,
+ announceSentinelAddr(master->addr), master->addr->port,
token[5], master_port);
old_addr = dupSentinelAddr(master->addr);
@@ -2693,7 +3004,7 @@ int sentinelSendHello(sentinelRedisInstance *ri) {
announce_ip, announce_port, sentinel.myid,
(unsigned long long) sentinel.current_epoch,
/* --- */
- master->name,master_addr->ip,master_addr->port,
+ master->name,announceSentinelAddr(master_addr),master_addr->port,
(unsigned long long) master->config_epoch);
retval = redisAsyncCommand(ri->link->cc,
sentinelPublishReplyCallback, ri, "%s %s %s",
@@ -2827,6 +3138,101 @@ void sentinelSendPeriodicCommands(sentinelRedisInstance *ri) {
/* =========================== SENTINEL command ============================= */
+/* SENTINEL CONFIG SET <option> */
+void sentinelConfigSetCommand(client *c) {
+ robj *o = c->argv[3];
+ robj *val = c->argv[4];
+ long long numval;
+ int drop_conns = 0;
+
+ if (!strcasecmp(o->ptr, "resolve-hostnames")) {
+ if ((numval = yesnotoi(val->ptr)) == -1) goto badfmt;
+ sentinel.resolve_hostnames = numval;
+ } else if (!strcasecmp(o->ptr, "announce-hostnames")) {
+ if ((numval = yesnotoi(val->ptr)) == -1) goto badfmt;
+ sentinel.announce_hostnames = numval;
+ } else if (!strcasecmp(o->ptr, "announce-ip")) {
+ if (sentinel.announce_ip) sdsfree(sentinel.announce_ip);
+ sentinel.announce_ip = sdsnew(val->ptr);
+ } else if (!strcasecmp(o->ptr, "announce-port")) {
+ if (getLongLongFromObject(val, &numval) == C_ERR ||
+ numval < 0 || numval > 65535)
+ goto badfmt;
+ sentinel.announce_port = numval;
+ } else if (!strcasecmp(o->ptr, "sentinel-user")) {
+ sdsfree(sentinel.sentinel_auth_user);
+ sentinel.sentinel_auth_user = sdsnew(val->ptr);
+ drop_conns = 1;
+ } else if (!strcasecmp(o->ptr, "sentinel-pass")) {
+ sdsfree(sentinel.sentinel_auth_pass);
+ sentinel.sentinel_auth_pass = sdsnew(val->ptr);
+ drop_conns = 1;
+ } else {
+ addReplyErrorFormat(c, "Invalid argument '%s' to SENTINEL CONFIG SET",
+ (char *) o->ptr);
+ return;
+ }
+
+ sentinelFlushConfig();
+ addReply(c, shared.ok);
+
+ /* Drop Sentinel connections to initiate a reconnect if needed. */
+ if (drop_conns)
+ sentinelDropConnections();
+
+ return;
+
+badfmt:
+ addReplyErrorFormat(c, "Invalid value '%s' to SENTINEL CONFIG SET '%s'",
+ (char *) val->ptr, (char *) o->ptr);
+}
+
+/* SENTINEL CONFIG GET <option> */
+void sentinelConfigGetCommand(client *c) {
+ robj *o = c->argv[3];
+ const char *pattern = o->ptr;
+ void *replylen = addReplyDeferredLen(c);
+ int matches = 0;
+
+ if (stringmatch(pattern,"resolve-hostnames",1)) {
+ addReplyBulkCString(c,"resolve-hostnames");
+ addReplyBulkCString(c,sentinel.resolve_hostnames ? "yes" : "no");
+ matches++;
+ }
+
+ if (stringmatch(pattern, "announce-hostnames", 1)) {
+ addReplyBulkCString(c,"announce-hostnames");
+ addReplyBulkCString(c,sentinel.announce_hostnames ? "yes" : "no");
+ matches++;
+ }
+
+ if (stringmatch(pattern, "announce-ip", 1)) {
+ addReplyBulkCString(c,"announce-ip");
+ addReplyBulkCString(c,sentinel.announce_ip ? sentinel.announce_ip : "");
+ matches++;
+ }
+
+ if (stringmatch(pattern, "announce-port", 1)) {
+ addReplyBulkCString(c, "announce-port");
+ addReplyBulkLongLong(c, sentinel.announce_port);
+ matches++;
+ }
+
+ if (stringmatch(pattern, "sentinel-user", 1)) {
+ addReplyBulkCString(c, "sentinel-user");
+ addReplyBulkCString(c, sentinel.sentinel_auth_user ? sentinel.sentinel_auth_user : "");
+ matches++;
+ }
+
+ if (stringmatch(pattern, "sentinel-pass", 1)) {
+ addReplyBulkCString(c, "sentinel-pass");
+ addReplyBulkCString(c, sentinel.sentinel_auth_pass ? sentinel.sentinel_auth_pass : "");
+ matches++;
+ }
+
+ setDeferredMapLen(c, replylen, matches);
+}
+
const char *sentinelFailoverStateStr(int state) {
switch(state) {
case SENTINEL_FAILOVER_STATE_NONE: return "none";
@@ -2853,7 +3259,7 @@ void addReplySentinelRedisInstance(client *c, sentinelRedisInstance *ri) {
fields++;
addReplyBulkCString(c,"ip");
- addReplyBulkCString(c,ri->addr->ip);
+ addReplyBulkCString(c,announceSentinelAddr(ri->addr));
fields++;
addReplyBulkCString(c,"port");
@@ -3094,6 +3500,10 @@ void sentinelCommand(client *c) {
" Check if the current Sentinel configuration is able to reach the quorum",
" needed to failover a master and the majority needed to authorize the",
" failover.",
+"CONFIG SET <param> <value>",
+" Set a global Sentinel configuration parameter.",
+"CONFIG GET <param>",
+" Get global Sentinel configuration parameter.",
"GET-MASTER-ADDR-BY-NAME <master-name>",
" Return the ip and port number of the master with that name.",
"FAILOVER <master-name>",
@@ -3235,7 +3645,7 @@ NULL
sentinelAddr *addr = sentinelGetCurrentMasterAddress(ri);
addReplyArrayLen(c,2);
- addReplyBulkCString(c,addr->ip);
+ addReplyBulkCString(c,announceSentinelAddr(addr));
addReplyBulkLongLong(c,addr->port);
}
} else if (!strcasecmp(c->argv[1]->ptr,"failover")) {
@@ -3280,11 +3690,12 @@ NULL
return;
}
- /* Make sure the IP field is actually a valid IP before passing it
- * to createSentinelRedisInstance(), otherwise we may trigger a
- * DNS lookup at runtime. */
- if (anetResolveIP(NULL,c->argv[3]->ptr,ip,sizeof(ip)) == ANET_ERR) {
- addReplyError(c,"Invalid IP address specified");
+ /* If resolve-hostnames is used, actual DNS resolution may take place.
+ * Otherwise just validate address.
+ */
+ if (anetResolve(NULL,c->argv[3]->ptr,ip,sizeof(ip),
+ sentinel.resolve_hostnames ? ANET_NONE : ANET_IP_ONLY) == ANET_ERR) {
+ addReplyError(c, "Invalid IP address or hostname specified");
return;
}
@@ -3354,6 +3765,14 @@ NULL
} else if (!strcasecmp(c->argv[1]->ptr,"set")) {
if (c->argc < 3) goto numargserr;
sentinelSetCommand(c);
+ } else if (!strcasecmp(c->argv[1]->ptr,"config")) {
+ if (c->argc < 3) goto numargserr;
+ if (!strcasecmp(c->argv[2]->ptr,"set") && c->argc == 5)
+ sentinelConfigSetCommand(c);
+ else if (!strcasecmp(c->argv[2]->ptr,"get") && c->argc == 4)
+ sentinelConfigGetCommand(c);
+ else
+ addReplyError(c, "Only SENTINEL CONFIG GET <option> / SET <option> <value> are supported.");
} else if (!strcasecmp(c->argv[1]->ptr,"info-cache")) {
/* SENTINEL INFO-CACHE <name> */
if (c->argc < 2) goto numargserr;
@@ -3517,7 +3936,7 @@ void sentinelInfoCommand(client *c) {
"master%d:name=%s,status=%s,address=%s:%d,"
"slaves=%lu,sentinels=%lu\r\n",
master_id++, ri->name, status,
- ri->addr->ip, ri->addr->port,
+ announceSentinelAddr(ri->addr), ri->addr->port,
dictSize(ri->slaves),
dictSize(ri->sentinels)+1);
}
@@ -3913,7 +4332,7 @@ void sentinelAskMasterStateToOtherSentinels(sentinelRedisInstance *master, int f
sentinelReceiveIsMasterDownReply, ri,
"%s is-master-down-by-addr %s %s %llu %s",
sentinelInstanceMapCommand(ri,"SENTINEL"),
- master->addr->ip, port,
+ announceSentinelAddr(master->addr), port,
sentinel.current_epoch,
(master->failover_state > SENTINEL_FAILOVER_STATE_NONE) ?
sentinel.myid : "*");
@@ -4067,17 +4486,19 @@ char *sentinelGetLeader(sentinelRedisInstance *master, uint64_t epoch) {
* The command returns C_OK if the SLAVEOF command was accepted for
* (later) delivery otherwise C_ERR. The command replies are just
* discarded. */
-int sentinelSendSlaveOf(sentinelRedisInstance *ri, char *host, int port) {
+int sentinelSendSlaveOf(sentinelRedisInstance *ri, const sentinelAddr *addr) {
char portstr[32];
+ const char *host;
int retval;
- ll2string(portstr,sizeof(portstr),port);
-
/* If host is NULL we send SLAVEOF NO ONE that will turn the instance
- * into a master. */
- if (host == NULL) {
+ * into a master. */
+ if (!addr) {
host = "NO";
memcpy(portstr,"ONE",4);
+ } else {
+ host = announceSentinelAddr(addr);
+ ll2string(portstr,sizeof(portstr),addr->port);
}
/* In order to send SLAVEOF in a safe way, we send a transaction performing
@@ -4362,7 +4783,7 @@ void sentinelFailoverSendSlaveOfNoOne(sentinelRedisInstance *ri) {
* We actually register a generic callback for this command as we don't
* really care about the reply. We check if it worked indirectly observing
* if INFO returns a different role (master instead of slave). */
- retval = sentinelSendSlaveOf(ri->promoted_slave,NULL,0);
+ retval = sentinelSendSlaveOf(ri->promoted_slave,NULL);
if (retval != C_OK) return;
sentinelEvent(LL_NOTICE, "+failover-state-wait-promotion",
ri->promoted_slave,"%@");
@@ -4432,9 +4853,7 @@ void sentinelFailoverDetectEnd(sentinelRedisInstance *master) {
if (slave->flags & (SRI_PROMOTED|SRI_RECONF_DONE|SRI_RECONF_SENT)) continue;
if (slave->link->disconnected) continue;
- retval = sentinelSendSlaveOf(slave,
- master->promoted_slave->addr->ip,
- master->promoted_slave->addr->port);
+ retval = sentinelSendSlaveOf(slave,master->promoted_slave->addr);
if (retval == C_OK) {
sentinelEvent(LL_NOTICE,"+slave-reconf-sent-be",slave,"%@");
slave->flags |= SRI_RECONF_SENT;
@@ -4489,9 +4908,7 @@ void sentinelFailoverReconfNextSlave(sentinelRedisInstance *master) {
if (slave->link->disconnected) continue;
/* Send SLAVEOF <new master>. */
- retval = sentinelSendSlaveOf(slave,
- master->promoted_slave->addr->ip,
- master->promoted_slave->addr->port);
+ retval = sentinelSendSlaveOf(slave,master->promoted_slave->addr);
if (retval == C_OK) {
slave->flags |= SRI_RECONF_SENT;
slave->slave_reconf_sent_time = mstime();
@@ -4513,10 +4930,10 @@ void sentinelFailoverSwitchToPromotedSlave(sentinelRedisInstance *master) {
master->promoted_slave : master;
sentinelEvent(LL_WARNING,"+switch-master",master,"%s %s %d %s %d",
- master->name, master->addr->ip, master->addr->port,
- ref->addr->ip, ref->addr->port);
+ master->name, announceSentinelAddr(master->addr), master->addr->port,
+ announceSentinelAddr(ref->addr), ref->addr->port);
- sentinelResetMasterAndChangeAddress(master,ref->addr->ip,ref->addr->port);
+ sentinelResetMasterAndChangeAddress(master,ref->addr->hostname,ref->addr->port);
}
void sentinelFailoverStateMachine(sentinelRedisInstance *ri) {
@@ -4673,4 +5090,3 @@ void sentinelTimer(void) {
* election because of split brain voting). */
server.hz = CONFIG_DEFAULT_HZ + rand() % CONFIG_DEFAULT_HZ;
}
-
diff --git a/src/server.c b/src/server.c
index 0551eb3e4..faaca7215 100644
--- a/src/server.c
+++ b/src/server.c
@@ -201,6 +201,14 @@ struct redisCommand redisCommandTable[] = {
"read-only fast @string",
0,NULL,1,1,1,0,0,0},
+ {"getex",getexCommand,-2,
+ "write fast @string",
+ 0,NULL,1,1,1,0,0,0},
+
+ {"getdel",getdelCommand,2,
+ "write fast @string",
+ 0,NULL,1,1,1,0,0,0},
+
/* Note that we can't flag set as fast, since it may perform an
* implicit DEL of a large key. */
{"set",setCommand,-3,
@@ -449,15 +457,15 @@ struct redisCommand redisCommandTable[] = {
{"zunionstore",zunionstoreCommand,-4,
"write use-memory @sortedset",
- 0,zunionInterDiffStoreGetKeys,0,0,0,0,0,0},
+ 0,zunionInterDiffStoreGetKeys,1,1,1,0,0,0},
{"zinterstore",zinterstoreCommand,-4,
"write use-memory @sortedset",
- 0,zunionInterDiffStoreGetKeys,0,0,0,0,0,0},
+ 0,zunionInterDiffStoreGetKeys,1,1,1,0,0,0},
{"zdiffstore",zdiffstoreCommand,-4,
"write use-memory @sortedset",
- 0,zunionInterDiffStoreGetKeys,0,0,0,0,0,0},
+ 0,zunionInterDiffStoreGetKeys,1,1,1,0,0,0},
{"zunion",zunionCommand,-3,
"read-only @sortedset",
@@ -547,6 +555,10 @@ struct redisCommand redisCommandTable[] = {
"write no-script fast @sortedset @blocking",
0,NULL,1,-2,1,0,0,0},
+ {"zrandmember",zrandmemberCommand,-2,
+ "read-only random @sortedset",
+ 0,NULL,1,1,1,0,0,0},
+
{"hset",hsetCommand,-4,
"write use-memory fast @hash",
0,NULL,1,1,1,0,0,0},
@@ -603,6 +615,10 @@ struct redisCommand redisCommandTable[] = {
"read-only fast @hash",
0,NULL,1,1,1,0,0,0},
+ {"hrandfield",hrandfieldCommand,-2,
+ "read-only random @hash",
+ 0,NULL,1,1,1,0,0,0},
+
{"hscan",hscanCommand,-3,
"read-only random @hash",
0,NULL,1,1,1,0,0,0},
@@ -744,7 +760,7 @@ struct redisCommand redisCommandTable[] = {
"admin no-script",
0,NULL,0,0,0,0,0,0},
- {"psync",syncCommand,3,
+ {"psync",syncCommand,-3,
"admin no-script",
0,NULL,0,0,0,0,0,0},
@@ -941,7 +957,7 @@ struct redisCommand redisCommandTable[] = {
{"georadius_ro",georadiusroCommand,-6,
"read-only @geo",
- 0,georadiusGetKeys,1,1,1,0,0,0},
+ 0,NULL,1,1,1,0,0,0},
{"georadiusbymember",georadiusbymemberCommand,-5,
"write use-memory @geo",
@@ -949,7 +965,7 @@ struct redisCommand redisCommandTable[] = {
{"georadiusbymember_ro",georadiusbymemberroCommand,-5,
"read-only @geo",
- 0,georadiusGetKeys,1,1,1,0,0,0},
+ 0,NULL,1,1,1,0,0,0},
{"geohash",geohashCommand,-2,
"read-only @geo",
@@ -1016,11 +1032,11 @@ struct redisCommand redisCommandTable[] = {
{"xread",xreadCommand,-4,
"read-only @stream @blocking",
- 0,xreadGetKeys,1,1,1,0,0,0},
+ 0,xreadGetKeys,0,0,0,0,0,0},
{"xreadgroup",xreadCommand,-7,
"write @stream @blocking",
- 0,xreadGetKeys,1,1,1,0,0,0},
+ 0,xreadGetKeys,0,0,0,0,0,0},
{"xgroup",xgroupCommand,-2,
"write use-memory @stream",
@@ -1084,6 +1100,10 @@ struct redisCommand redisCommandTable[] = {
{"reset",resetCommand,1,
"no-script ok-stale ok-loading fast @connection",
+ 0,NULL,0,0,0,0,0,0},
+
+ {"failover",failoverCommand,-1,
+ "admin no-script ok-stale",
0,NULL,0,0,0,0,0,0}
};
@@ -1444,6 +1464,17 @@ dictType hashDictType = {
NULL /* allow to expand */
};
+/* Dict type without destructor */
+dictType sdsReplyDictType = {
+ dictSdsHash, /* hash function */
+ NULL, /* key dup */
+ NULL, /* val dup */
+ dictSdsKeyCompare, /* key compare */
+ NULL, /* key destructor */
+ NULL, /* val destructor */
+ NULL /* allow to expand */
+};
+
/* Keylist hash table type has unencoded redis objects as keys and
* lists as values. It's used for blocking operations (BLPOP) and to
* map swapped keys to a list of clients waiting for this keys to be loaded. */
@@ -1592,6 +1623,9 @@ void resetChildState() {
server.stat_current_cow_bytes = 0;
updateDictResizePolicy();
closeChildInfoPipe();
+ moduleFireServerEvent(REDISMODULE_EVENT_FORK_CHILD,
+ REDISMODULE_SUBEVENT_FORK_CHILD_DIED,
+ NULL);
}
/* Return if child type is mutual exclusive with other fork children */
@@ -2159,14 +2193,15 @@ int serverCron(struct aeEventLoop *eventLoop, long long id, void *clientData) {
/* AOF postponed flush: Try at every cron cycle if the slow fsync
* completed. */
- if (server.aof_flush_postponed_start) flushAppendOnlyFile(0);
+ if (server.aof_state == AOF_ON && server.aof_flush_postponed_start)
+ flushAppendOnlyFile(0);
/* AOF write errors: in this case we have a buffer to flush as well and
* clear the AOF error in case of success to make the DB writable again,
* however to try every second is enough in case of 'hz' is set to
* a higher frequency. */
run_with_period(1000) {
- if (server.aof_last_write_status == C_ERR)
+ if (server.aof_state == AOF_ON && server.aof_last_write_status == C_ERR)
flushAppendOnlyFile(0);
}
@@ -2174,8 +2209,15 @@ int serverCron(struct aeEventLoop *eventLoop, long long id, void *clientData) {
checkClientPauseTimeoutAndReturnIfPaused();
/* Replication cron function -- used to reconnect to master,
- * detect transfer failures, start background RDB transfers and so forth. */
- run_with_period(1000) replicationCron();
+ * detect transfer failures, start background RDB transfers and so forth.
+ *
+ * If Redis is trying to failover then run the replication cron faster so
+ * progress on the handshake happens more quickly. */
+ if (server.failover_state != NO_FAILOVER) {
+ run_with_period(100) replicationCron();
+ } else {
+ run_with_period(1000) replicationCron();
+ }
/* Run the Redis Cluster cron. */
run_with_period(100) {
@@ -2386,12 +2428,18 @@ void beforeSleep(struct aeEventLoop *eventLoop) {
server.get_ack_from_slaves = 0;
}
+ /* We may have recieved updates from clients about their current offset. NOTE:
+ * this can't be done where the ACK is recieved since failover will disconnect
+ * our clients. */
+ updateFailoverStatus();
+
/* Send the invalidation messages to clients participating to the
* client side caching protocol in broadcasting (BCAST) mode. */
trackingBroadcastInvalidationMessages();
/* Write the AOF buffer on disk */
- flushAppendOnlyFile(0);
+ if (server.aof_state == AOF_ON)
+ flushAppendOnlyFile(0);
/* Handle writes with pending output buffers. */
handleClientsWithPendingWritesUsingThreads();
@@ -2532,6 +2580,12 @@ void createSharedObjects(void) {
/* Used in the LMOVE/BLMOVE commands */
shared.left = createStringObject("left",4);
shared.right = createStringObject("right",5);
+ shared.pexpireat = createStringObject("PEXPIREAT",9);
+ shared.pexpire = createStringObject("PEXPIRE",7);
+ shared.persist = createStringObject("PERSIST",7);
+ shared.set = createStringObject("SET",3);
+ shared.pxat = createStringObject("PXAT", 4);
+ shared.px = createStringObject("PX",2);
for (j = 0; j < OBJ_SHARED_INTEGERS; j++) {
shared.integers[j] =
makeObjectShared(createObject(OBJ_STRING,(void*)(long)j));
@@ -2634,6 +2688,13 @@ void initServerConfig(void) {
server.repl_backlog_off = 0;
server.repl_no_slaves_since = time(NULL);
+ /* Failover related */
+ server.failover_end_time = 0;
+ server.force_failover = 0;
+ server.target_replica_host = NULL;
+ server.target_replica_port = 0;
+ server.failover_state = NO_FAILOVER;
+
/* Client output buffer limits */
for (j = 0; j < CLIENT_TYPE_OBUF_COUNT; j++)
server.client_obuf_limits[j] = clientBufferLimitsDefaults[j];
@@ -2957,6 +3018,7 @@ int listenToPort(int port, int *fds, int *count) {
return C_ERR;
}
anetNonBlock(NULL,fds[*count]);
+ anetCloexec(fds[*count]);
(*count)++;
}
return C_OK;
@@ -3095,6 +3157,7 @@ void initServer(void) {
exit(1);
}
anetNonBlock(NULL,server.sofd);
+ anetCloexec(server.sofd);
}
/* Abort if there are no listening sockets at all. */
@@ -3557,7 +3620,7 @@ void preventCommandReplication(client *c) {
*/
void call(client *c, int flags) {
long long dirty;
- ustime_t start, duration;
+ monotime call_timer;
int client_old_flags = c->flags;
struct redisCommand *real_cmd = c->cmd;
static long long prev_err_count;
@@ -3583,9 +3646,10 @@ void call(client *c, int flags) {
dirty = server.dirty;
prev_err_count = server.stat_total_error_replies;
updateCachedTime(0);
- start = server.ustime;
+ elapsedStart(&call_timer);
c->cmd->proc(c);
- duration = ustime()-start;
+ const long duration = elapsedUs(call_timer);
+ c->duration = duration;
dirty = server.dirty-dirty;
if (dirty < 0) dirty = 0;
@@ -3629,7 +3693,10 @@ void call(client *c, int flags) {
* arguments. */
robj **argv = c->original_argv ? c->original_argv : c->argv;
int argc = c->original_argv ? c->original_argc : c->argc;
- slowlogPushEntryIfNeeded(c,argv,argc,duration);
+ /* If the client is blocked we will handle slowlog when it is unblocked . */
+ if (!(c->flags & CLIENT_BLOCKED)) {
+ slowlogPushEntryIfNeeded(c,argv,argc,duration);
+ }
}
freeClientOriginalArgv(c);
@@ -4682,7 +4749,7 @@ sds genRedisInfoString(const char *section) {
"aof_last_cow_size:%zu\r\n"
"module_fork_in_progress:%d\r\n"
"module_fork_last_cow_size:%zu\r\n",
- server.loading,
+ (int)server.loading,
server.stat_current_cow_bytes,
server.dirty,
server.child_type == CHILD_TYPE_RDB,
@@ -4972,6 +5039,7 @@ sds genRedisInfoString(const char *section) {
}
}
info = sdscatprintf(info,
+ "master_failover_state:%s\r\n"
"master_replid:%s\r\n"
"master_replid2:%s\r\n"
"master_repl_offset:%lld\r\n"
@@ -4980,6 +5048,7 @@ sds genRedisInfoString(const char *section) {
"repl_backlog_size:%lld\r\n"
"repl_backlog_first_byte_offset:%lld\r\n"
"repl_backlog_histlen:%lld\r\n",
+ getFailoverStateString(),
server.replid,
server.replid2,
server.master_repl_offset,
@@ -5184,7 +5253,7 @@ static int smapsGetSharedDirty(unsigned long addr) {
FILE *f;
f = fopen("/proc/self/smaps", "r");
- serverAssert(f);
+ if (!f) return -1;
while (1) {
if (!fgets(buf, sizeof(buf), f))
@@ -5195,8 +5264,8 @@ static int smapsGetSharedDirty(unsigned long addr) {
in_mapping = from <= addr && addr < to;
if (in_mapping && !memcmp(buf, "Shared_Dirty:", 13)) {
- ret = sscanf(buf, "%*s %d", &val);
- serverAssert(ret == 1);
+ sscanf(buf, "%*s %d", &val);
+ /* If parsing fails, we remain with val == -1 */
break;
}
}
@@ -5210,23 +5279,33 @@ static int smapsGetSharedDirty(unsigned long addr) {
* kernel is affected.
* The bug was fixed in commit ff1712f953e27f0b0718762ec17d0adb15c9fd0b
* titled: "arm64: pgtable: Ensure dirty bit is preserved across pte_wrprotect()"
- * Return 1 if the kernel seems to be affected, and 0 otherwise. */
+ * Return -1 on unexpected test failure, 1 if the kernel seems to be affected,
+ * and 0 otherwise. */
int linuxMadvFreeForkBugCheck(void) {
- int ret, pipefd[2];
+ int ret, pipefd[2] = { -1, -1 };
pid_t pid;
- char *p, *q, bug_found = 0;
- const long map_size = 3 * 4096;
+ char *p = NULL, *q;
+ int bug_found = 0;
+ long page_size = sysconf(_SC_PAGESIZE);
+ long map_size = 3 * page_size;
/* Create a memory map that's in our full control (not one used by the allocator). */
p = mmap(NULL, map_size, PROT_READ, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
- serverAssert(p != MAP_FAILED);
+ if (p == MAP_FAILED) {
+ serverLog(LL_WARNING, "Failed to mmap(): %s", strerror(errno));
+ return -1;
+ }
- q = p + 4096;
+ q = p + page_size;
/* Split the memory map in 3 pages by setting their protection as RO|RW|RO to prevent
* Linux from merging this memory map with adjacent VMAs. */
- ret = mprotect(q, 4096, PROT_READ | PROT_WRITE);
- serverAssert(!ret);
+ ret = mprotect(q, page_size, PROT_READ | PROT_WRITE);
+ if (ret < 0) {
+ serverLog(LL_WARNING, "Failed to mprotect(): %s", strerror(errno));
+ bug_found = -1;
+ goto exit;
+ }
/* Write to the page once to make it resident */
*(volatile char*)q = 0;
@@ -5235,8 +5314,16 @@ int linuxMadvFreeForkBugCheck(void) {
#ifndef MADV_FREE
#define MADV_FREE 8
#endif
- ret = madvise(q, 4096, MADV_FREE);
- serverAssert(!ret);
+ ret = madvise(q, page_size, MADV_FREE);
+ if (ret < 0) {
+ /* MADV_FREE is not available on older kernels that are presumably
+ * not affected. */
+ if (errno == EINVAL) goto exit;
+
+ serverLog(LL_WARNING, "Failed to madvise(): %s", strerror(errno));
+ bug_found = -1;
+ goto exit;
+ }
/* Write to the page after being marked for freeing, this is supposed to take
* ownership of that page again. */
@@ -5244,37 +5331,47 @@ int linuxMadvFreeForkBugCheck(void) {
/* Create a pipe for the child to return the info to the parent. */
ret = pipe(pipefd);
- serverAssert(!ret);
+ if (ret < 0) {
+ serverLog(LL_WARNING, "Failed to create pipe: %s", strerror(errno));
+ bug_found = -1;
+ goto exit;
+ }
/* Fork the process. */
pid = fork();
- serverAssert(pid >= 0);
- if (!pid) {
- /* Child: check if the page is marked as dirty, expecing 4 (kB).
+ if (pid < 0) {
+ serverLog(LL_WARNING, "Failed to fork: %s", strerror(errno));
+ bug_found = -1;
+ goto exit;
+ } else if (!pid) {
+ /* Child: check if the page is marked as dirty, page_size in kb.
* A value of 0 means the kernel is affected by the bug. */
- if (!smapsGetSharedDirty((unsigned long)q))
+ ret = smapsGetSharedDirty((unsigned long) q);
+ if (!ret)
bug_found = 1;
+ else if (ret == -1) /* Failed to read */
+ bug_found = -1;
- ret = write(pipefd[1], &bug_found, 1);
- serverAssert(ret == 1);
-
+ if (write(pipefd[1], &bug_found, sizeof(bug_found)) < 0)
+ serverLog(LL_WARNING, "Failed to write to parent: %s", strerror(errno));
exit(0);
} else {
/* Read the result from the child. */
- ret = read(pipefd[0], &bug_found, 1);
- serverAssert(ret == 1);
+ ret = read(pipefd[0], &bug_found, sizeof(bug_found));
+ if (ret < 0) {
+ serverLog(LL_WARNING, "Failed to read from child: %s", strerror(errno));
+ bug_found = -1;
+ }
/* Reap the child pid. */
- serverAssert(waitpid(pid, NULL, 0) == pid);
+ waitpid(pid, NULL, 0);
}
+exit:
/* Cleanup */
- ret = close(pipefd[0]);
- serverAssert(!ret);
- ret = close(pipefd[1]);
- serverAssert(!ret);
- ret = munmap(p, map_size);
- serverAssert(!ret);
+ if (pipefd[0] != -1) close(pipefd[0]);
+ if (pipefd[1] != -1) close(pipefd[1]);
+ if (p != NULL) munmap(p, map_size);
return bug_found;
}
@@ -5470,7 +5567,7 @@ void setupChildSignalHandlers(void) {
* of the parent process, e.g. fd(socket or flock) etc.
* should close the resources not used by the child process, so that if the
* parent restarts it can bind/lock despite the child possibly still running. */
-void closeClildUnusedResourceAfterFork() {
+void closeChildUnusedResourceAfterFork() {
closeListeningSockets(0);
if (server.cluster_enabled && server.cluster_config_file_lock_fd != -1)
close(server.cluster_config_file_lock_fd); /* don't care if this fails */
@@ -5497,7 +5594,7 @@ int redisFork(int purpose) {
server.in_fork_child = purpose;
setOOMScoreAdj(CONFIG_OOM_BGCHILD);
setupChildSignalHandlers();
- closeClildUnusedResourceAfterFork();
+ closeChildUnusedResourceAfterFork();
} else {
/* Parent */
server.stat_total_forks++;
@@ -5523,6 +5620,9 @@ int redisFork(int purpose) {
}
updateDictResizePolicy();
+ moduleFireServerEvent(REDISMODULE_EVENT_FORK_CHILD,
+ REDISMODULE_SUBEVENT_FORK_CHILD_BORN,
+ NULL);
}
return childpid;
}
@@ -5533,7 +5633,7 @@ void sendChildCOWInfo(int ptype, int on_exit, char *pname) {
if (private_dirty) {
serverLog(on_exit ? LL_NOTICE : LL_VERBOSE,
"%s: %zu MB of memory used by copy-on-write",
- pname, private_dirty);
+ pname, private_dirty/(1024*1024));
}
sendChildInfo(ptype, on_exit, private_dirty);
@@ -5598,20 +5698,68 @@ void redisOutOfMemoryHandler(size_t allocation_size) {
allocation_size);
}
-void redisSetProcTitle(char *title) {
+/* Callback for sdstemplate on proc-title-template. See redis.conf for
+ * supported variables.
+ */
+static sds redisProcTitleGetVariable(const sds varname, void *arg)
+{
+ if (!strcmp(varname, "title")) {
+ return sdsnew(arg);
+ } else if (!strcmp(varname, "listen-addr")) {
+ if (server.port || server.tls_port)
+ return sdscatprintf(sdsempty(), "%s:%u",
+ server.bindaddr_count ? server.bindaddr[0] : "*",
+ server.port ? server.port : server.tls_port);
+ else
+ return sdscatprintf(sdsempty(), "unixsocket:%s", server.unixsocket);
+ } else if (!strcmp(varname, "server-mode")) {
+ if (server.cluster_enabled) return sdsnew("[cluster]");
+ else if (server.sentinel_mode) return sdsnew("[sentinel]");
+ else return sdsempty();
+ } else if (!strcmp(varname, "config-file")) {
+ return sdsnew(server.configfile ? server.configfile : "-");
+ } else if (!strcmp(varname, "port")) {
+ return sdscatprintf(sdsempty(), "%u", server.port);
+ } else if (!strcmp(varname, "tls-port")) {
+ return sdscatprintf(sdsempty(), "%u", server.tls_port);
+ } else if (!strcmp(varname, "unixsocket")) {
+ return sdsnew(server.unixsocket);
+ } else
+ return NULL; /* Unknown variable name */
+}
+
+/* Expand the specified proc-title-template string and return a newly
+ * allocated sds, or NULL. */
+static sds expandProcTitleTemplate(const char *template, const char *title) {
+ sds res = sdstemplate(template, redisProcTitleGetVariable, (void *) title);
+ if (!res)
+ return NULL;
+ return sdstrim(res, " ");
+}
+/* Validate the specified template, returns 1 if valid or 0 otherwise. */
+int validateProcTitleTemplate(const char *template) {
+ int ok = 1;
+ sds res = expandProcTitleTemplate(template, "");
+ if (!res)
+ return 0;
+ if (sdslen(res) == 0) ok = 0;
+ sdsfree(res);
+ return ok;
+}
+
+int redisSetProcTitle(char *title) {
#ifdef USE_SETPROCTITLE
- char *server_mode = "";
- if (server.cluster_enabled) server_mode = " [cluster]";
- else if (server.sentinel_mode) server_mode = " [sentinel]";
-
- setproctitle("%s %s:%d%s",
- title,
- server.bindaddr_count ? server.bindaddr[0] : "*",
- server.port ? server.port : server.tls_port,
- server_mode);
+ if (!title) title = server.exec_argv[0];
+ sds proc_title = expandProcTitleTemplate(server.proc_title_template, title);
+ if (!proc_title) return C_ERR; /* Not likely, proc_title_template is validated */
+
+ setproctitle("%s", proc_title);
+ sdsfree(proc_title);
#else
UNUSED(title);
#endif
+
+ return C_OK;
}
void redisSetCpuAffinity(const char *cpulist) {
@@ -5751,6 +5899,12 @@ int main(int argc, char **argv) {
init_genrand64(((long long) tv.tv_sec * 1000000 + tv.tv_usec) ^ getpid());
crc64_init();
+ /* Store umask value. Because umask(2) only offers a set-and-get API we have
+ * to reset it and restore it back. We do this early to avoid a potential
+ * race condition with threads that could be creating files or directories.
+ */
+ umask(server.umask = umask(0777));
+
uint8_t hashseed[16];
getRandomBytes(hashseed,sizeof(hashseed));
dictSetHashFunctionSeed(hashseed);
@@ -5843,6 +5997,7 @@ int main(int argc, char **argv) {
exit(1);
}
loadServerConfig(server.configfile, config_from_stdin, options);
+ if (server.sentinel_mode) loadSentinelConfigFromQueue();
sdsfree(options);
}
@@ -5868,7 +6023,7 @@ int main(int argc, char **argv) {
readOOMScoreAdj();
initServer();
if (background || server.pidfile) createPidFile();
- redisSetProcTitle(argv[0]);
+ if (server.set_proc_title) redisSetProcTitle(NULL);
redisAsciiArt();
checkTcpBacklogSettings();
@@ -5878,10 +6033,17 @@ int main(int argc, char **argv) {
#ifdef __linux__
linuxMemoryWarnings();
#if defined (__arm64__)
- if (linuxMadvFreeForkBugCheck()) {
- serverLog(LL_WARNING,"WARNING Your kernel has a bug that could lead to data corruption during background save. Please upgrade to the latest stable kernel.");
+ int ret;
+ if ((ret = linuxMadvFreeForkBugCheck())) {
+ if (ret == 1)
+ serverLog(LL_WARNING,"WARNING Your kernel has a bug that could lead to data corruption during background save. "
+ "Please upgrade to the latest stable kernel.");
+ else
+ serverLog(LL_WARNING, "Failed to test the kernel for a bug that could lead to data corruption during background save. "
+ "Your system could be affected, please report this error.");
if (!checkIgnoreWarning("ARM64-COW-BUG")) {
- serverLog(LL_WARNING,"Redis will now exit to prevent data corruption. Note that it is possible to suppress this warning by setting the following config: ignore-warnings ARM64-COW-BUG");
+ serverLog(LL_WARNING,"Redis will now exit to prevent data corruption. "
+ "Note that it is possible to suppress this warning by setting the following config: ignore-warnings ARM64-COW-BUG");
exit(1);
}
}
diff --git a/src/server.h b/src/server.h
index eb967a042..b293afcee 100644
--- a/src/server.h
+++ b/src/server.h
@@ -115,6 +115,7 @@ typedef long long ustime_t; /* microsecond time type. */
#define NET_ADDR_STR_LEN (NET_IP_STR_LEN+32) /* Must be enough for ip:port */
#define CONFIG_BINDADDR_MAX 16
#define CONFIG_MIN_RESERVED_FDS 32
+#define CONFIG_DEFAULT_PROC_TITLE_TEMPLATE "{title} {listen-addr} {server-mode}"
#define ACTIVE_EXPIRE_CYCLE_SLOW 0
#define ACTIVE_EXPIRE_CYCLE_FAST 1
@@ -270,6 +271,8 @@ extern int configOOMScoreAdjValuesDefaults[CONFIG_OOM_COUNT];
#define CLIENT_DENY_BLOCKING (1ULL<<41) /* Indicate that the client should not be blocked.
currently, turned on inside MULTI, Lua, RM_Call,
and AOF client */
+#define CLIENT_REPL_RDBONLY (1ULL<<42) /* This client is a replica that only wants
+ RDB without replication buffer. */
/* Client block type (btype field in client structure)
* if CLIENT_BLOCKED flag is set. */
@@ -317,6 +320,14 @@ typedef enum {
REPL_STATE_CONNECTED, /* Connected to master */
} repl_state;
+/* The state of an in progress coordinated failover */
+typedef enum {
+ NO_FAILOVER = 0, /* No failover in progress */
+ FAILOVER_WAIT_FOR_SYNC, /* Waiting for target replica to catch up */
+ FAILOVER_IN_PROGRESS /* Waiting for target replica to accept
+ * PSYNC FAILOVER request. */
+} failover_state;
+
/* State of slaves from the POV of the master. Used in client->replstate.
* In SEND_BULK and ONLINE state the slave receives new updates
* in its output queue. In the WAIT_BGSAVE states instead the server is waiting
@@ -870,6 +881,7 @@ typedef struct client {
size_t sentlen; /* Amount of bytes already sent in the current
buffer or object being sent. */
time_t ctime; /* Client creation time. */
+ long duration; /* Current command duration. Used for measuring latency of blocking/non-blocking cmds */
time_t lastinteraction; /* Time of the last interaction, used for timeout */
time_t obuf_soft_limit_reached_time;
uint64_t flags; /* Client flags: CLIENT_* macros. */
@@ -942,6 +954,19 @@ struct moduleLoadQueueEntry {
robj **argv;
};
+struct sentinelLoadQueueEntry {
+ int argc;
+ sds *argv;
+ int linenum;
+ sds line;
+};
+
+struct sentinelConfig {
+ list *pre_monitor_cfg;
+ list *monitor_cfg;
+ list *post_monitor_cfg;
+};
+
struct sharedObjectsStruct {
robj *crlf, *ok, *err, *emptybulk, *czero, *cone, *pong, *space,
*colon, *queued, *null[4], *nullarray[4], *emptymap[4], *emptyset[4],
@@ -951,7 +976,8 @@ struct sharedObjectsStruct {
*busykeyerr, *oomerr, *plus, *messagebulk, *pmessagebulk, *subscribebulk,
*unsubscribebulk, *psubscribebulk, *punsubscribebulk, *del, *unlink,
*rpop, *lpop, *lpush, *rpoplpush, *lmove, *blmove, *zpopmin, *zpopmax,
- *emptyscan, *multi, *exec, *left, *right,
+ *emptyscan, *multi, *exec, *left, *right, *persist, *set, *pexpireat,
+ *pexpire, *pxat, *px,
*select[PROTO_SHARED_SELECT_CMDS],
*integers[OBJ_SHARED_INTEGERS],
*mbulkhdr[OBJ_SHARED_BULKHDR_LEN], /* "*<value>\r\n" */
@@ -1124,6 +1150,7 @@ struct redisServer {
int config_hz; /* Configured HZ value. May be different than
the actual 'hz' field value if dynamic-hz
is enabled. */
+ mode_t umask; /* The umask value of the process on startup */
int hz; /* serverCron() calls frequency in hertz */
int in_fork_child; /* indication that this is a fork child */
redisDb *db;
@@ -1280,6 +1307,8 @@ struct redisServer {
int supervised; /* 1 if supervised, 0 otherwise. */
int supervised_mode; /* See SUPERVISED_* */
int daemonize; /* True if running as a daemon */
+ int set_proc_title; /* True if change proc title */
+ char *proc_title_template; /* Process title template format */
clientBufferLimitsConfig client_obuf_limits[CLIENT_TYPE_OBUF_COUNT];
/* AOF persistence */
int aof_enabled; /* AOF configuration */
@@ -1530,6 +1559,7 @@ struct redisServer {
int lazyfree_lazy_expire;
int lazyfree_lazy_server_del;
int lazyfree_lazy_user_del;
+ int lazyfree_lazy_user_flush;
/* Latency monitor */
long long latency_monitor_threshold;
dict *latency_events;
@@ -1554,6 +1584,16 @@ struct redisServer {
char *bio_cpulist; /* cpu affinity list of bio thread. */
char *aof_rewrite_cpulist; /* cpu affinity list of aof rewrite process. */
char *bgsave_cpulist; /* cpu affinity list of bgsave process. */
+ /* Sentinel config */
+ struct sentinelConfig *sentinel_config; /* sentinel config to load at startup time. */
+ /* Coordinate failover info */
+ mstime_t failover_end_time; /* Deadline for failover command. */
+ int force_failover; /* If true then failover will be foreced at the
+ * deadline, otherwise failover is aborted. */
+ char *target_replica_host; /* Failover target host. If null during a
+ * failover then any replica can be used. */
+ int target_replica_port; /* Failover target port */
+ int failover_state; /* Failover state */
};
typedef struct pubsubPattern {
@@ -1679,6 +1719,7 @@ extern dictType hashDictType;
extern dictType replScriptCacheDictType;
extern dictType dbExpiresDictType;
extern dictType modulesDictType;
+extern dictType sdsReplyDictType;
/*-----------------------------------------------------------------------------
* Functions prototypes
@@ -1728,7 +1769,8 @@ void getRandomBytes(unsigned char *p, size_t len);
uint64_t crc64(uint64_t crc, const unsigned char *s, uint64_t l);
void exitFromChild(int retcode);
size_t redisPopcount(void *s, long count);
-void redisSetProcTitle(char *title);
+int redisSetProcTitle(char *title);
+int validateProcTitleTemplate(const char *template);
int redisCommunicateSystemd(const char *sd_notify_msg);
void redisSetCpuAffinity(const char *cpulist);
@@ -1973,6 +2015,10 @@ void feedReplicationBacklog(void *ptr, size_t len);
void showLatestBacklog(void);
void rdbPipeReadHandler(struct aeEventLoop *eventLoop, int fd, void *clientData, int mask);
void rdbPipeWriteHandlerConnRemoved(struct connection *conn);
+void clearFailoverState(void);
+void updateFailoverStatus(void);
+void abortFailover(const char *err);
+const char *getFailoverStateString();
/* Generic persistence functions */
void startLoadingFile(FILE* fp, char* filename, int rdbflags);
@@ -2042,7 +2088,7 @@ int ACLSetUser(user *u, const char *op, ssize_t oplen);
sds ACLDefaultUserFirstPassword(void);
uint64_t ACLGetCommandCategoryFlagByName(const char *name);
int ACLAppendUserForLoading(sds *argv, int argc, int *argc_err);
-char *ACLSetUserStringError(void);
+const char *ACLSetUserStringError(void);
int ACLLoadConfiguredUsers(void);
sds ACLDescribeUser(user *u);
void ACLLoadUsersAtStartup(void);
@@ -2236,6 +2282,7 @@ void appendServerSaveParams(time_t seconds, int changes);
void resetServerSaveParams(void);
struct rewriteConfigState; /* Forward declaration to export API. */
void rewriteConfigRewriteLine(struct rewriteConfigState *state, const char *option, sds line, int force);
+void rewriteConfigMarkAsProcessed(struct rewriteConfigState *state, const char *option);
int rewriteConfig(char *path, int force_all);
void initConfigValues();
@@ -2330,7 +2377,9 @@ int clusterSendModuleMessageToTarget(const char *target, uint64_t module_id, uin
void initSentinelConfig(void);
void initSentinel(void);
void sentinelTimer(void);
-char *sentinelHandleConfiguration(char **argv, int argc);
+const char *sentinelHandleConfiguration(char **argv, int argc);
+void queueSentinelConfig(sds *argv, int argc, int linenum, sds line);
+void loadSentinelConfigFromQueue(void);
void sentinelIsRunning(void);
/* redis-check-rdb & aof */
@@ -2344,6 +2393,7 @@ int ldbRemoveChild(pid_t pid);
void ldbKillForkedSessions(void);
int ldbPendingChildren(void);
sds luaCreateFunction(client *c, lua_State *lua, robj *body);
+void freeLuaScriptsAsync(dict *lua_scripts);
/* Blocked clients */
void processUnblockedClients(void);
@@ -2356,6 +2406,7 @@ void disconnectAllBlockedClients(void);
void handleClientsBlockedOnKeys(void);
void signalKeyAsReady(redisDb *db, robj *key, int type);
void blockForKeys(client *c, int btype, robj **keys, int numkeys, mstime_t timeout, robj *target, struct listPos *listpos, streamID *ids);
+void updateStatsOnUnblock(client *c, long blocked_us, long reply_us);
/* timeout.c -- Blocked clients timeout and connections timeout. */
void addClientToTimeoutTable(client *c);
@@ -2403,6 +2454,8 @@ void setnxCommand(client *c);
void setexCommand(client *c);
void psetexCommand(client *c);
void getCommand(client *c);
+void getexCommand(client *c);
+void getdelCommand(client *c);
void delCommand(client *c);
void unlinkCommand(client *c);
void existsCommand(client *c);
@@ -2505,6 +2558,7 @@ void zpopminCommand(client *c);
void zpopmaxCommand(client *c);
void bzpopminCommand(client *c);
void bzpopmaxCommand(client *c);
+void zrandmemberCommand(client *c);
void multiCommand(client *c);
void execCommand(client *c);
void discardCommand(client *c);
@@ -2538,6 +2592,7 @@ void hvalsCommand(client *c);
void hgetallCommand(client *c);
void hexistsCommand(client *c);
void hscanCommand(client *c);
+void hrandfieldCommand(client *c);
void configCommand(client *c);
void hincrbyCommand(client *c);
void hincrbyfloatCommand(client *c);
@@ -2607,6 +2662,7 @@ void lolwutCommand(client *c);
void aclCommand(client *c);
void stralgoCommand(client *c);
void resetCommand(client *c);
+void failoverCommand(client *c);
#if defined(__GNUC__)
void *calloc(size_t count, size_t size) __attribute__ ((deprecated));
diff --git a/src/stream.h b/src/stream.h
index c7acee719..1f2132365 100644
--- a/src/stream.h
+++ b/src/stream.h
@@ -108,6 +108,7 @@ size_t streamReplyWithRange(client *c, stream *s, streamID *start, streamID *end
void streamIteratorStart(streamIterator *si, stream *s, streamID *start, streamID *end, int rev);
int streamIteratorGetID(streamIterator *si, streamID *id, int64_t *numfields);
void streamIteratorGetField(streamIterator *si, unsigned char **fieldptr, unsigned char **valueptr, int64_t *fieldlen, int64_t *valuelen);
+void streamIteratorRemoveEntry(streamIterator *si, streamID *current);
void streamIteratorStop(streamIterator *si);
streamCG *streamLookupCG(stream *s, sds groupname);
streamConsumer *streamLookupConsumer(streamCG *cg, sds name, int flags, int *created);
@@ -121,5 +122,11 @@ int streamDecrID(streamID *id);
void streamPropagateConsumerCreation(client *c, robj *key, robj *groupname, sds consumername);
robj *streamDup(robj *o);
int streamValidateListpackIntegrity(unsigned char *lp, size_t size, int deep);
+int streamParseID(const robj *o, streamID *id);
+robj *createObjectFromStreamID(streamID *id);
+int streamAppendItem(stream *s, robj **argv, int64_t numfields, streamID *added_id, streamID *use_id);
+int streamDeleteItem(stream *s, streamID *id);
+int64_t streamTrimByLength(stream *s, long long maxlen, int approx);
+int64_t streamTrimByID(stream *s, streamID minid, int approx);
#endif
diff --git a/src/t_hash.c b/src/t_hash.c
index 51c7d6758..9f7540a72 100644
--- a/src/t_hash.c
+++ b/src/t_hash.c
@@ -598,6 +598,42 @@ int hashZiplistValidateIntegrity(unsigned char *zl, size_t size, int deep) {
return ret;
}
+/* Create a new sds string from the ziplist entry. */
+sds hashSdsFromZiplistEntry(ziplistEntry *e) {
+ return e->sval ? sdsnewlen(e->sval, e->slen) : sdsfromlonglong(e->lval);
+}
+
+/* Reply with bulk string from the ziplist entry. */
+void hashReplyFromZiplistEntry(client *c, ziplistEntry *e) {
+ if (e->sval)
+ addReplyBulkCBuffer(c, e->sval, e->slen);
+ else
+ addReplyBulkLongLong(c, e->lval);
+}
+
+/* Return random element from a non empty hash.
+ * 'key' and 'val' will be set to hold the element.
+ * The memory in them is not to be freed or modified by the caller.
+ * 'val' can be NULL in which case it's not extracted. */
+void hashTypeRandomElement(robj *hashobj, unsigned long hashsize, ziplistEntry *key, ziplistEntry *val) {
+ if (hashobj->encoding == OBJ_ENCODING_HT) {
+ dictEntry *de = dictGetFairRandomKey(hashobj->ptr);
+ sds s = dictGetKey(de);
+ key->sval = (unsigned char*)s;
+ key->slen = sdslen(s);
+ if (val) {
+ sds s = dictGetVal(de);
+ val->sval = (unsigned char*)s;
+ val->slen = sdslen(s);
+ }
+ } else if (hashobj->encoding == OBJ_ENCODING_ZIPLIST) {
+ ziplistRandomPair(hashobj->ptr, hashsize, key, val);
+ } else {
+ serverPanic("Unknown hash encoding");
+ }
+}
+
+
/*-----------------------------------------------------------------------------
* Hash type commands
*----------------------------------------------------------------------------*/
@@ -922,3 +958,220 @@ void hscanCommand(client *c) {
checkType(c,o,OBJ_HASH)) return;
scanGenericCommand(c,o,cursor);
}
+
+/* How many times bigger should be the hash compared to the requested size
+ * for us to not use the "remove elements" strategy? Read later in the
+ * implementation for more info. */
+#define HRANDFIELD_SUB_STRATEGY_MUL 3
+
+void hrandfieldWithCountCommand(client *c, long l, int withvalues) {
+ unsigned long count, size;
+ int uniq = 1;
+ robj *hash;
+
+ if ((hash = lookupKeyReadOrReply(c,c->argv[1],shared.null[c->resp]))
+ == NULL || checkType(c,hash,OBJ_HASH)) return;
+ size = hashTypeLength(hash);
+
+ if(l >= 0) {
+ count = (unsigned long) l;
+ } else {
+ count = -l;
+ uniq = 0;
+ }
+
+ /* If count is zero, serve it ASAP to avoid special cases later. */
+ if (count == 0) {
+ addReply(c,shared.emptyarray);
+ return;
+ }
+
+ /* CASE 1: The count was negative, so the extraction method is just:
+ * "return N random elements" sampling the whole set every time.
+ * This case is trivial and can be served without auxiliary data
+ * structures. This case is the only one that also needs to return the
+ * elements in random order. */
+ if (!uniq || count == 1) {
+ if (withvalues && c->resp == 2)
+ addReplyArrayLen(c, count*2);
+ else
+ addReplyArrayLen(c, count);
+ if (hash->encoding == OBJ_ENCODING_HT) {
+ sds key, value;
+ while (count--) {
+ dictEntry *de = dictGetRandomKey(hash->ptr);
+ key = dictGetKey(de);
+ value = dictGetVal(de);
+ if (withvalues && c->resp > 2)
+ addReplyArrayLen(c,2);
+ addReplyBulkCBuffer(c, key, sdslen(key));
+ if (withvalues)
+ addReplyBulkCBuffer(c, value, sdslen(value));
+ }
+ } else if (hash->encoding == OBJ_ENCODING_ZIPLIST) {
+ ziplistEntry *keys, *vals = NULL;
+ keys = zmalloc(sizeof(ziplistEntry)*count);
+ if (withvalues)
+ vals = zmalloc(sizeof(ziplistEntry)*count);
+ ziplistRandomPairs(hash->ptr, count, keys, vals);
+ for (unsigned long i = 0; i < count; i++) {
+ if (withvalues && c->resp > 2)
+ addReplyArrayLen(c,2);
+ if (keys[i].sval)
+ addReplyBulkCBuffer(c, keys[i].sval, keys[i].slen);
+ else
+ addReplyBulkLongLong(c, keys[i].lval);
+ if (withvalues) {
+ if (vals[i].sval)
+ addReplyBulkCBuffer(c, vals[i].sval, vals[i].slen);
+ else
+ addReplyBulkLongLong(c, vals[i].lval);
+ }
+ }
+ zfree(keys);
+ zfree(vals);
+ }
+ return;
+ }
+
+ /* Initiate reply count, RESP3 responds with nested array, RESP2 with flat one. */
+ long reply_size = count < size ? count : size;
+ if (withvalues && c->resp == 2)
+ addReplyArrayLen(c, reply_size*2);
+ else
+ addReplyArrayLen(c, reply_size);
+
+ /* CASE 2:
+ * The number of requested elements is greater than the number of
+ * elements inside the hash: simply return the whole hash. */
+ if(count >= size) {
+ hashTypeIterator *hi = hashTypeInitIterator(hash);
+ while (hashTypeNext(hi) != C_ERR) {
+ if (withvalues && c->resp > 2)
+ addReplyArrayLen(c,2);
+ addHashIteratorCursorToReply(c, hi, OBJ_HASH_KEY);
+ if (withvalues)
+ addHashIteratorCursorToReply(c, hi, OBJ_HASH_VALUE);
+ }
+ hashTypeReleaseIterator(hi);
+ return;
+ }
+
+ /* CASE 3:
+ * The number of elements inside the hash is not greater than
+ * HRANDFIELD_SUB_STRATEGY_MUL times the number of requested elements.
+ * In this case we create a hash from scratch with all the elements, and
+ * subtract random elements to reach the requested number of elements.
+ *
+ * This is done because if the number of requested elements is just
+ * a bit less than the number of elements in the hash, the natural approach
+ * used into CASE 4 is highly inefficient. */
+ if (count*HRANDFIELD_SUB_STRATEGY_MUL > size) {
+ dict *d = dictCreate(&sdsReplyDictType, NULL);
+ hashTypeIterator *hi = hashTypeInitIterator(hash);
+
+ /* Add all the elements into the temporary dictionary. */
+ while ((hashTypeNext(hi)) != C_ERR) {
+ int ret = DICT_ERR;
+ sds key, value = NULL;
+
+ key = hashTypeCurrentObjectNewSds(hi,OBJ_HASH_KEY);
+ if (withvalues)
+ value = hashTypeCurrentObjectNewSds(hi,OBJ_HASH_VALUE);
+ ret = dictAdd(d, key, value);
+
+ serverAssert(ret == DICT_OK);
+ }
+ serverAssert(dictSize(d) == size);
+ hashTypeReleaseIterator(hi);
+
+ /* Remove random elements to reach the right count. */
+ while (size > count) {
+ dictEntry *de;
+ de = dictGetRandomKey(d);
+ dictUnlink(d,dictGetKey(de));
+ sdsfree(dictGetKey(de));
+ sdsfree(dictGetVal(de));
+ dictFreeUnlinkedEntry(d,de);
+ size--;
+ }
+
+ /* Reply with what's in the dict and release memory */
+ dictIterator *di;
+ dictEntry *de;
+ di = dictGetIterator(d);
+ while ((de = dictNext(di)) != NULL) {
+ sds key = dictGetKey(de);
+ sds value = dictGetVal(de);
+ if (withvalues && c->resp > 2)
+ addReplyArrayLen(c,2);
+ addReplyBulkSds(c, key);
+ if (withvalues)
+ addReplyBulkSds(c, value);
+ }
+
+ dictReleaseIterator(di);
+ dictRelease(d);
+ }
+
+ /* CASE 4: We have a big hash compared to the requested number of elements.
+ * In this case we can simply get random elements from the hash and add
+ * to the temporary hash, trying to eventually get enough unique elements
+ * to reach the specified count. */
+ else {
+ unsigned long added = 0;
+ ziplistEntry key, value;
+ dict *d = dictCreate(&hashDictType, NULL);
+ while(added < count) {
+ hashTypeRandomElement(hash, size, &key, withvalues? &value : NULL);
+
+ /* Try to add the object to the dictionary. If it already exists
+ * free it, otherwise increment the number of objects we have
+ * in the result dictionary. */
+ sds skey = hashSdsFromZiplistEntry(&key);
+ if (dictAdd(d,skey,NULL) != DICT_OK) {
+ sdsfree(skey);
+ continue;
+ }
+ added++;
+
+ /* We can reply right away, so that we don't need to store the value in the dict. */
+ if (withvalues && c->resp > 2)
+ addReplyArrayLen(c,2);
+ hashReplyFromZiplistEntry(c, &key);
+ if (withvalues)
+ hashReplyFromZiplistEntry(c, &value);
+ }
+
+ /* Release memory */
+ dictRelease(d);
+ }
+}
+
+/* HRANDFIELD [<count> WITHVALUES] */
+void hrandfieldCommand(client *c) {
+ long l;
+ int withvalues = 0;
+ robj *hash;
+ ziplistEntry ele;
+
+ if (c->argc >= 3) {
+ if (getLongFromObjectOrReply(c,c->argv[2],&l,NULL) != C_OK) return;
+ if (c->argc > 4 || (c->argc == 4 && strcasecmp(c->argv[3]->ptr,"withvalues"))) {
+ addReplyErrorObject(c,shared.syntaxerr);
+ return;
+ } else if (c->argc == 4)
+ withvalues = 1;
+ hrandfieldWithCountCommand(c, l, withvalues);
+ return;
+ }
+
+ /* Handle variant without <count> argument. Reply with simple bulk string */
+ if ((hash = lookupKeyReadOrReply(c,c->argv[1],shared.null[c->resp]))== NULL ||
+ checkType(c,hash,OBJ_HASH)) {
+ return;
+ }
+
+ hashTypeRandomElement(hash,hashTypeLength(hash),&ele,NULL);
+ hashReplyFromZiplistEntry(c, &ele);
+}
diff --git a/src/t_set.c b/src/t_set.c
index 64bbbd3a0..de0a9f954 100644
--- a/src/t_set.c
+++ b/src/t_set.c
@@ -690,8 +690,9 @@ void srandmemberWithCountCommand(client *c) {
/* CASE 1: The count was negative, so the extraction method is just:
* "return N random elements" sampling the whole set every time.
* This case is trivial and can be served without auxiliary data
- * structures. */
- if (!uniq) {
+ * structures. This case is the only one that also needs to return the
+ * elements in random order. */
+ if (!uniq || count == 1) {
addReplySetLen(c,count);
while(count--) {
encoding = setTypeRandomElement(set,&ele,&llele);
@@ -713,7 +714,7 @@ void srandmemberWithCountCommand(client *c) {
}
/* For CASE 3 and CASE 4 we need an auxiliary dictionary. */
- d = dictCreate(&objectKeyPointerValueDictType,NULL);
+ d = dictCreate(&sdsReplyDictType,NULL);
/* CASE 3:
* The number of elements inside the set is not greater than
@@ -729,13 +730,13 @@ void srandmemberWithCountCommand(client *c) {
/* Add all the elements into the temporary dictionary. */
si = setTypeInitIterator(set);
- while((encoding = setTypeNext(si,&ele,&llele)) != -1) {
+ while ((encoding = setTypeNext(si,&ele,&llele)) != -1) {
int retval = DICT_ERR;
if (encoding == OBJ_ENCODING_INTSET) {
- retval = dictAdd(d,createStringObjectFromLongLong(llele),NULL);
+ retval = dictAdd(d,sdsfromlonglong(llele),NULL);
} else {
- retval = dictAdd(d,createStringObject(ele,sdslen(ele)),NULL);
+ retval = dictAdd(d,sdsdup(ele),NULL);
}
serverAssert(retval == DICT_OK);
}
@@ -743,11 +744,12 @@ void srandmemberWithCountCommand(client *c) {
serverAssert(dictSize(d) == size);
/* Remove random elements to reach the right count. */
- while(size > count) {
+ while (size > count) {
dictEntry *de;
-
de = dictGetRandomKey(d);
- dictDelete(d,dictGetKey(de));
+ dictUnlink(d,dictGetKey(de));
+ sdsfree(dictGetKey(de));
+ dictFreeUnlinkedEntry(d,de);
size--;
}
}
@@ -758,22 +760,22 @@ void srandmemberWithCountCommand(client *c) {
* to reach the specified count. */
else {
unsigned long added = 0;
- robj *objele;
+ sds sdsele;
- while(added < count) {
+ while (added < count) {
encoding = setTypeRandomElement(set,&ele,&llele);
if (encoding == OBJ_ENCODING_INTSET) {
- objele = createStringObjectFromLongLong(llele);
+ sdsele = sdsfromlonglong(llele);
} else {
- objele = createStringObject(ele,sdslen(ele));
+ sdsele = sdsdup(ele);
}
/* Try to add the object to the dictionary. If it already exists
* free it, otherwise increment the number of objects we have
* in the result dictionary. */
- if (dictAdd(d,objele,NULL) == DICT_OK)
+ if (dictAdd(d,sdsele,NULL) == DICT_OK)
added++;
else
- decrRefCount(objele);
+ sdsfree(sdsele);
}
}
@@ -785,12 +787,13 @@ void srandmemberWithCountCommand(client *c) {
addReplySetLen(c,count);
di = dictGetIterator(d);
while((de = dictNext(di)) != NULL)
- addReplyBulk(c,dictGetKey(de));
+ addReplyBulkSds(c,dictGetKey(de));
dictReleaseIterator(di);
dictRelease(d);
}
}
+/* SRANDMEMBER [<count>] */
void srandmemberCommand(client *c) {
robj *set;
sds ele;
@@ -805,6 +808,7 @@ void srandmemberCommand(client *c) {
return;
}
+ /* Handle variant without <count> argument. Reply with simple bulk string */
if ((set = lookupKeyReadOrReply(c,c->argv[1],shared.null[c->resp]))
== NULL || checkType(c,set,OBJ_SET)) return;
diff --git a/src/t_stream.c b/src/t_stream.c
index f991765eb..197b7d4f7 100644
--- a/src/t_stream.c
+++ b/src/t_stream.c
@@ -818,6 +818,28 @@ int64_t streamTrim(stream *s, streamAddTrimArgs *args) {
return deleted;
}
+/* Trims a stream by length. Returns the number of deleted items. */
+int64_t streamTrimByLength(stream *s, long long maxlen, int approx) {
+ streamAddTrimArgs args = {
+ .trim_strategy = TRIM_STRATEGY_MAXLEN,
+ .approx_trim = approx,
+ .limit = approx ? 100 * server.stream_node_max_entries : 0,
+ .maxlen = maxlen
+ };
+ return streamTrim(s, &args);
+}
+
+/* Trims a stream by minimum ID. Returns the number of deleted items. */
+int64_t streamTrimByID(stream *s, streamID minid, int approx) {
+ streamAddTrimArgs args = {
+ .trim_strategy = TRIM_STRATEGY_MINID,
+ .approx_trim = approx,
+ .limit = approx ? 100 * server.stream_node_max_entries : 0,
+ .minid = minid
+ };
+ return streamTrim(s, &args);
+}
+
/* Parse the arguements of XADD/XTRIM.
*
* See streamAddTrimArgs for more details about the arguments handled.
@@ -1625,7 +1647,7 @@ robj *streamTypeLookupWriteOrCreate(client *c, robj *key, int no_create) {
* treated as an invalid ID.
*
* If 'c' is set to NULL, no reply is sent to the client. */
-int streamGenericParseIDOrReply(client *c, robj *o, streamID *id, uint64_t missing_seq, int strict) {
+int streamGenericParseIDOrReply(client *c, const robj *o, streamID *id, uint64_t missing_seq, int strict) {
char buf[128];
if (sdslen(o->ptr) > sizeof(buf)-1) goto invalid;
memcpy(buf,o->ptr,sdslen(o->ptr)+1);
@@ -1661,6 +1683,11 @@ invalid:
return C_ERR;
}
+/* Wrapper for streamGenericParseIDOrReply() used by module API. */
+int streamParseID(const robj *o, streamID *id) {
+ return streamGenericParseIDOrReply(NULL, o, id, 0, 0);
+}
+
/* Wrapper for streamGenericParseIDOrReply() with 'strict' argument set to
* 0, to be used when - and + are acceptable IDs. */
int streamParseIDOrReply(client *c, robj *o, streamID *id, uint64_t missing_seq) {
diff --git a/src/t_string.c b/src/t_string.c
index 2792f5557..de67484fc 100644
--- a/src/t_string.c
+++ b/src/t_string.c
@@ -61,13 +61,16 @@ static int checkStringLength(client *c, long long size) {
* If ok_reply is NULL "+OK" is used.
* If abort_reply is NULL, "$-1" is used. */
-#define OBJ_SET_NO_FLAGS 0
+#define OBJ_NO_FLAGS 0
#define OBJ_SET_NX (1<<0) /* Set if key not exists. */
#define OBJ_SET_XX (1<<1) /* Set if key exists. */
-#define OBJ_SET_EX (1<<2) /* Set if time in seconds is given */
-#define OBJ_SET_PX (1<<3) /* Set if time in ms in given */
-#define OBJ_SET_KEEPTTL (1<<4) /* Set and keep the ttl */
+#define OBJ_EX (1<<2) /* Set if time in seconds is given */
+#define OBJ_PX (1<<3) /* Set if time in ms in given */
+#define OBJ_KEEPTTL (1<<4) /* Set and keep the ttl */
#define OBJ_SET_GET (1<<5) /* Set if want to get key before set */
+#define OBJ_EXAT (1<<6) /* Set if timestamp in second is given */
+#define OBJ_PXAT (1<<7) /* Set if timestamp in ms is given */
+#define OBJ_PERSIST (1<<8) /* Set if we need to remove the ttl */
void setGenericCommand(client *c, int flags, robj *key, robj *val, robj *expire, int unit, robj *ok_reply, robj *abort_reply) {
long long milliseconds = 0; /* initialized to avoid any harmness warning */
@@ -93,91 +96,172 @@ void setGenericCommand(client *c, int flags, robj *key, robj *val, robj *expire,
if (getGenericCommand(c) == C_ERR) return;
}
- genericSetKey(c,c->db,key,val,flags & OBJ_SET_KEEPTTL,1);
+ genericSetKey(c,c->db,key, val,flags & OBJ_KEEPTTL,1);
server.dirty++;
- if (expire) setExpire(c,c->db,key,mstime()+milliseconds);
notifyKeyspaceEvent(NOTIFY_STRING,"set",key,c->db->id);
- if (expire) notifyKeyspaceEvent(NOTIFY_GENERIC,
- "expire",key,c->db->id);
+ if (expire) {
+ robj *exp = shared.pxat;
+
+ if ((flags & OBJ_PX) || (flags & OBJ_EX)) {
+ setExpire(c,c->db,key,milliseconds + mstime());
+ exp = shared.px;
+ } else {
+ setExpire(c,c->db,key,milliseconds);
+ }
+ notifyKeyspaceEvent(NOTIFY_GENERIC,"expire",key,c->db->id);
+
+ /* Propagate as SET Key Value PXAT millisecond-timestamp if there is EXAT/PXAT or
+ * propagate as SET Key Value PX millisecond if there is EX/PX flag.
+ *
+ * Additionally when we propagate the SET with PX (relative millisecond) we translate
+ * it again to SET with PXAT for the AOF.
+ *
+ * Additional care is required while modifying the argument order. AOF relies on the
+ * exp argument being at index 3. (see feedAppendOnlyFile)
+ * */
+ robj *millisecondObj = createStringObjectFromLongLong(milliseconds);
+ rewriteClientCommandVector(c,5,shared.set,key,val,exp,millisecondObj);
+ decrRefCount(millisecondObj);
+ }
if (!(flags & OBJ_SET_GET)) {
addReply(c, ok_reply ? ok_reply : shared.ok);
}
+
+ /* Propagate without the GET argument (Isn't needed if we had expire since in that case we completely re-written the command argv) */
+ if ((flags & OBJ_SET_GET) && !expire) {
+ int argc = 0;
+ int j;
+ robj **argv = zmalloc((c->argc-1)*sizeof(robj*));
+ for (j=0; j < c->argc; j++) {
+ char *a = c->argv[j]->ptr;
+ /* Skip GET which may be repeated multiple times. */
+ if (j >= 3 &&
+ (a[0] == 'g' || a[0] == 'G') &&
+ (a[1] == 'e' || a[1] == 'E') &&
+ (a[2] == 't' || a[2] == 'T') && a[3] == '\0')
+ continue;
+ argv[argc++] = c->argv[j];
+ incrRefCount(c->argv[j]);
+ }
+ replaceClientCommandVector(c, argc, argv);
+ }
}
-/* SET key value [NX] [XX] [KEEPTTL] [GET] [EX <seconds>] [PX <milliseconds>] */
-void setCommand(client *c) {
- int j;
- robj *expire = NULL;
- int unit = UNIT_SECONDS;
- int flags = OBJ_SET_NO_FLAGS;
+#define COMMAND_GET 0
+#define COMMAND_SET 1
+/*
+ * The parseExtendedStringArgumentsOrReply() function performs the common validation for extended
+ * string arguments used in SET and GET command.
+ *
+ * Get specific commands - PERSIST/DEL
+ * Set specific commands - XX/NX/GET
+ * Common commands - EX/EXAT/PX/PXAT/KEEPTTL
+ *
+ * Function takes pointers to client, flags, unit, pointer to pointer of expire obj if needed
+ * to be determined and command_type which can be COMMAND_GET or COMMAND_SET.
+ *
+ * If there are any syntax violations C_ERR is returned else C_OK is returned.
+ *
+ * Input flags are updated upon parsing the arguments. Unit and expire are updated if there are any
+ * EX/EXAT/PX/PXAT arguments. Unit is updated to millisecond if PX/PXAT is set.
+ */
+int parseExtendedStringArgumentsOrReply(client *c, int *flags, int *unit, robj **expire, int command_type) {
- for (j = 3; j < c->argc; j++) {
- char *a = c->argv[j]->ptr;
+ int j = command_type == COMMAND_GET ? 2 : 3;
+ for (; j < c->argc; j++) {
+ char *opt = c->argv[j]->ptr;
robj *next = (j == c->argc-1) ? NULL : c->argv[j+1];
- if ((a[0] == 'n' || a[0] == 'N') &&
- (a[1] == 'x' || a[1] == 'X') && a[2] == '\0' &&
- !(flags & OBJ_SET_XX) && !(flags & OBJ_SET_GET))
+ if ((opt[0] == 'n' || opt[0] == 'N') &&
+ (opt[1] == 'x' || opt[1] == 'X') && opt[2] == '\0' &&
+ !(*flags & OBJ_SET_XX) && !(*flags & OBJ_SET_GET) && (command_type == COMMAND_SET))
+ {
+ *flags |= OBJ_SET_NX;
+ } else if ((opt[0] == 'x' || opt[0] == 'X') &&
+ (opt[1] == 'x' || opt[1] == 'X') && opt[2] == '\0' &&
+ !(*flags & OBJ_SET_NX) && (command_type == COMMAND_SET))
+ {
+ *flags |= OBJ_SET_XX;
+ } else if ((opt[0] == 'g' || opt[0] == 'G') &&
+ (opt[1] == 'e' || opt[1] == 'E') &&
+ (opt[2] == 't' || opt[2] == 'T') && opt[3] == '\0' &&
+ !(*flags & OBJ_SET_NX) && (command_type == COMMAND_SET))
+ {
+ *flags |= OBJ_SET_GET;
+ } else if (!strcasecmp(opt, "KEEPTTL") && !(*flags & OBJ_PERSIST) &&
+ !(*flags & OBJ_EX) && !(*flags & OBJ_EXAT) &&
+ !(*flags & OBJ_PX) && !(*flags & OBJ_PXAT) && (command_type == COMMAND_SET))
+ {
+ *flags |= OBJ_KEEPTTL;
+ } else if (!strcasecmp(opt,"PERSIST") && (command_type == COMMAND_GET) &&
+ !(*flags & OBJ_EX) && !(*flags & OBJ_EXAT) &&
+ !(*flags & OBJ_PX) && !(*flags & OBJ_PXAT) &&
+ !(*flags & OBJ_KEEPTTL))
{
- flags |= OBJ_SET_NX;
- } else if ((a[0] == 'x' || a[0] == 'X') &&
- (a[1] == 'x' || a[1] == 'X') && a[2] == '\0' &&
- !(flags & OBJ_SET_NX))
+ *flags |= OBJ_PERSIST;
+ } else if ((opt[0] == 'e' || opt[0] == 'E') &&
+ (opt[1] == 'x' || opt[1] == 'X') && opt[2] == '\0' &&
+ !(*flags & OBJ_KEEPTTL) && !(*flags & OBJ_PERSIST) &&
+ !(*flags & OBJ_EXAT) && !(*flags & OBJ_PX) &&
+ !(*flags & OBJ_PXAT) && next)
{
- flags |= OBJ_SET_XX;
- } else if ((a[0] == 'g' || a[0] == 'G') &&
- (a[1] == 'e' || a[1] == 'E') &&
- (a[2] == 't' || a[2] == 'T') && a[3] == '\0' &&
- !(flags & OBJ_SET_NX)) {
- flags |= OBJ_SET_GET;
- } else if (!strcasecmp(c->argv[j]->ptr,"KEEPTTL") &&
- !(flags & OBJ_SET_EX) && !(flags & OBJ_SET_PX))
+ *flags |= OBJ_EX;
+ *expire = next;
+ j++;
+ } else if ((opt[0] == 'p' || opt[0] == 'P') &&
+ (opt[1] == 'x' || opt[1] == 'X') && opt[2] == '\0' &&
+ !(*flags & OBJ_KEEPTTL) && !(*flags & OBJ_PERSIST) &&
+ !(*flags & OBJ_EX) && !(*flags & OBJ_EXAT) &&
+ !(*flags & OBJ_PXAT) && next)
{
- flags |= OBJ_SET_KEEPTTL;
- } else if ((a[0] == 'e' || a[0] == 'E') &&
- (a[1] == 'x' || a[1] == 'X') && a[2] == '\0' &&
- !(flags & OBJ_SET_KEEPTTL) &&
- !(flags & OBJ_SET_PX) && next)
+ *flags |= OBJ_PX;
+ *unit = UNIT_MILLISECONDS;
+ *expire = next;
+ j++;
+ } else if ((opt[0] == 'e' || opt[0] == 'E') &&
+ (opt[1] == 'x' || opt[1] == 'X') &&
+ (opt[2] == 'a' || opt[2] == 'A') &&
+ (opt[3] == 't' || opt[3] == 'T') && opt[4] == '\0' &&
+ !(*flags & OBJ_KEEPTTL) && !(*flags & OBJ_PERSIST) &&
+ !(*flags & OBJ_EX) && !(*flags & OBJ_PX) &&
+ !(*flags & OBJ_PXAT) && next)
{
- flags |= OBJ_SET_EX;
- unit = UNIT_SECONDS;
- expire = next;
+ *flags |= OBJ_EXAT;
+ *expire = next;
j++;
- } else if ((a[0] == 'p' || a[0] == 'P') &&
- (a[1] == 'x' || a[1] == 'X') && a[2] == '\0' &&
- !(flags & OBJ_SET_KEEPTTL) &&
- !(flags & OBJ_SET_EX) && next)
+ } else if ((opt[0] == 'p' || opt[0] == 'P') &&
+ (opt[1] == 'x' || opt[1] == 'X') &&
+ (opt[2] == 'a' || opt[2] == 'A') &&
+ (opt[3] == 't' || opt[3] == 'T') && opt[4] == '\0' &&
+ !(*flags & OBJ_KEEPTTL) && !(*flags & OBJ_PERSIST) &&
+ !(*flags & OBJ_EX) && !(*flags & OBJ_EXAT) &&
+ !(*flags & OBJ_PX) && next)
{
- flags |= OBJ_SET_PX;
- unit = UNIT_MILLISECONDS;
- expire = next;
+ *flags |= OBJ_PXAT;
+ *unit = UNIT_MILLISECONDS;
+ *expire = next;
j++;
} else {
addReplyErrorObject(c,shared.syntaxerr);
- return;
+ return C_ERR;
}
}
+ return C_OK;
+}
- c->argv[2] = tryObjectEncoding(c->argv[2]);
- setGenericCommand(c,flags,c->argv[1],c->argv[2],expire,unit,NULL,NULL);
+/* SET key value [NX] [XX] [KEEPTTL] [GET] [EX <seconds>] [PX <milliseconds>]
+ * [EXAT <seconds-timestamp>][PXAT <milliseconds-timestamp>] */
+void setCommand(client *c) {
+ robj *expire = NULL;
+ int unit = UNIT_SECONDS;
+ int flags = OBJ_NO_FLAGS;
- /* Propagate without the GET argument */
- if (flags & OBJ_SET_GET) {
- int argc = 0;
- robj **argv = zmalloc((c->argc-1)*sizeof(robj*));
- for (j=0; j < c->argc; j++) {
- char *a = c->argv[j]->ptr;
- /* Skip GET which may be repeated multiple times. */
- if (j >= 3 &&
- (a[0] == 'g' || a[0] == 'G') &&
- (a[1] == 'e' || a[1] == 'E') &&
- (a[2] == 't' || a[2] == 'T') && a[3] == '\0')
- continue;
- argv[argc++] = c->argv[j];
- incrRefCount(c->argv[j]);
- }
- replaceClientCommandVector(c, argc, argv);
+ if (parseExtendedStringArgumentsOrReply(c,&flags,&unit,&expire,COMMAND_SET) != C_OK) {
+ return;
}
+
+ c->argv[2] = tryObjectEncoding(c->argv[2]);
+ setGenericCommand(c,flags,c->argv[1],c->argv[2],expire,unit,NULL,NULL);
}
void setnxCommand(client *c) {
@@ -187,12 +271,12 @@ void setnxCommand(client *c) {
void setexCommand(client *c) {
c->argv[3] = tryObjectEncoding(c->argv[3]);
- setGenericCommand(c,OBJ_SET_NO_FLAGS,c->argv[1],c->argv[3],c->argv[2],UNIT_SECONDS,NULL,NULL);
+ setGenericCommand(c,OBJ_EX,c->argv[1],c->argv[3],c->argv[2],UNIT_SECONDS,NULL,NULL);
}
void psetexCommand(client *c) {
c->argv[3] = tryObjectEncoding(c->argv[3]);
- setGenericCommand(c,OBJ_SET_NO_FLAGS,c->argv[1],c->argv[3],c->argv[2],UNIT_MILLISECONDS,NULL,NULL);
+ setGenericCommand(c,OBJ_PX,c->argv[1],c->argv[3],c->argv[2],UNIT_MILLISECONDS,NULL,NULL);
}
int getGenericCommand(client *c) {
@@ -213,6 +297,112 @@ void getCommand(client *c) {
getGenericCommand(c);
}
+/*
+ * GETEX <key> [PERSIST][EX seconds][PX milliseconds][EXAT seconds-timestamp][PXAT milliseconds-timestamp]
+ *
+ * The getexCommand() function implements extended options and variants of the GET command. Unlike GET
+ * command this command is not read-only.
+ *
+ * The default behavior when no options are specified is same as GET and does not alter any TTL.
+ *
+ * Only one of the below options can be used at a given time.
+ *
+ * 1. PERSIST removes any TTL associated with the key.
+ * 2. EX Set expiry TTL in seconds.
+ * 3. PX Set expiry TTL in milliseconds.
+ * 4. EXAT Same like EX instead of specifying the number of seconds representing the TTL
+ * (time to live), it takes an absolute Unix timestamp
+ * 5. PXAT Same like PX instead of specifying the number of milliseconds representing the TTL
+ * (time to live), it takes an absolute Unix timestamp
+ *
+ * Command would either return the bulk string, error or nil.
+ */
+void getexCommand(client *c) {
+ robj *expire = NULL;
+ int unit = UNIT_SECONDS;
+ int flags = OBJ_NO_FLAGS;
+
+ if (parseExtendedStringArgumentsOrReply(c,&flags,&unit,&expire,COMMAND_GET) != C_OK) {
+ return;
+ }
+
+ robj *o;
+
+ if ((o = lookupKeyReadOrReply(c,c->argv[1],shared.null[c->resp])) == NULL)
+ return;
+
+ if (checkType(c,o,OBJ_STRING)) {
+ return;
+ }
+
+ long long milliseconds = 0;
+
+ /* Validate the expiration time value first */
+ if (expire) {
+ if (getLongLongFromObjectOrReply(c, expire, &milliseconds, NULL) != C_OK)
+ return;
+ if (milliseconds <= 0) {
+ addReplyErrorFormat(c,"invalid expire time in %s",c->cmd->name);
+ return;
+ }
+ if (unit == UNIT_SECONDS) milliseconds *= 1000;
+ }
+
+ /* We need to do this before we expire the key or delete it */
+ addReplyBulk(c,o);
+
+ /* This command is never propagated as is. It is either propagated as PEXPIRE[AT],DEL,UNLINK or PERSIST.
+ * This why it doesn't need special handling in feedAppendOnlyFile to convert relative expire time to absolute one. */
+ if (((flags & OBJ_PXAT) || (flags & OBJ_EXAT)) && checkAlreadyExpired(milliseconds)) {
+ /* When PXAT/EXAT absolute timestamp is specified, there can be a chance that timestamp
+ * has already elapsed so delete the key in that case. */
+ int deleted = server.lazyfree_lazy_expire ? dbAsyncDelete(c->db, c->argv[1]) :
+ dbSyncDelete(c->db, c->argv[1]);
+ serverAssert(deleted);
+ robj *aux = server.lazyfree_lazy_expire ? shared.unlink : shared.del;
+ rewriteClientCommandVector(c,2,aux,c->argv[1]);
+ signalModifiedKey(c, c->db, c->argv[1]);
+ notifyKeyspaceEvent(NOTIFY_GENERIC, "del", c->argv[1], c->db->id);
+ server.dirty++;
+ } else if (expire) {
+ robj *exp = shared.pexpireat;
+ if ((flags & OBJ_PX) || (flags & OBJ_EX)) {
+ setExpire(c,c->db,c->argv[1],milliseconds + mstime());
+ exp = shared.pexpire;
+ } else {
+ setExpire(c,c->db,c->argv[1],milliseconds);
+ }
+
+ robj* millisecondObj = createStringObjectFromLongLong(milliseconds);
+ rewriteClientCommandVector(c,3,exp,c->argv[1],millisecondObj);
+ decrRefCount(millisecondObj);
+ signalModifiedKey(c, c->db, c->argv[1]);
+ notifyKeyspaceEvent(NOTIFY_GENERIC,"expire",c->argv[1],c->db->id);
+ server.dirty++;
+ } else if (flags & OBJ_PERSIST) {
+ if (removeExpire(c->db, c->argv[1])) {
+ signalModifiedKey(c, c->db, c->argv[1]);
+ rewriteClientCommandVector(c, 2, shared.persist, c->argv[1]);
+ notifyKeyspaceEvent(NOTIFY_GENERIC,"persist",c->argv[1],c->db->id);
+ server.dirty++;
+ }
+ }
+}
+
+void getdelCommand(client *c) {
+ if (getGenericCommand(c) == C_ERR) return;
+ int deleted = server.lazyfree_lazy_user_del ? dbAsyncDelete(c->db, c->argv[1]) :
+ dbSyncDelete(c->db, c->argv[1]);
+ if (deleted) {
+ /* Propagate as DEL/UNLINK command */
+ robj *aux = server.lazyfree_lazy_user_del ? shared.unlink : shared.del;
+ rewriteClientCommandVector(c,2,aux,c->argv[1]);
+ signalModifiedKey(c, c->db, c->argv[1]);
+ notifyKeyspaceEvent(NOTIFY_GENERIC, "del", c->argv[1], c->db->id);
+ server.dirty++;
+ }
+}
+
void getsetCommand(client *c) {
if (getGenericCommand(c) == C_ERR) return;
c->argv[2] = tryObjectEncoding(c->argv[2]);
@@ -221,9 +411,7 @@ void getsetCommand(client *c) {
server.dirty++;
/* Propagate as SET command */
- robj *setcmd = createStringObject("SET",3);
- rewriteClientCommandArgument(c,0,setcmd);
- decrRefCount(setcmd);
+ rewriteClientCommandArgument(c,0,shared.set);
}
void setrangeCommand(client *c) {
@@ -443,7 +631,7 @@ void decrbyCommand(client *c) {
void incrbyfloatCommand(client *c) {
long double incr, value;
- robj *o, *new, *aux1, *aux2;
+ robj *o, *new, *aux;
o = lookupKeyWrite(c->db,c->argv[1]);
if (checkType(c,o,OBJ_STRING)) return;
@@ -469,13 +657,11 @@ void incrbyfloatCommand(client *c) {
/* Always replicate INCRBYFLOAT as a SET command with the final value
* in order to make sure that differences in float precision or formatting
* will not create differences in replicas or after an AOF restart. */
- aux1 = createStringObject("SET",3);
- rewriteClientCommandArgument(c,0,aux1);
- decrRefCount(aux1);
+ rewriteClientCommandArgument(c,0,shared.set);
rewriteClientCommandArgument(c,2,new);
- aux2 = createStringObject("KEEPTTL",7);
- rewriteClientCommandArgument(c,3,aux2);
- decrRefCount(aux2);
+ aux = createStringObject("KEEPTTL",7);
+ rewriteClientCommandArgument(c,3,aux);
+ decrRefCount(aux);
}
void appendCommand(client *c) {
diff --git a/src/t_zset.c b/src/t_zset.c
index 3d63c41c6..b55fc169e 100644
--- a/src/t_zset.c
+++ b/src/t_zset.c
@@ -721,20 +721,26 @@ zskiplistNode *zslLastInLexRange(zskiplist *zsl, zlexrangespec *range) {
* Ziplist-backed sorted set API
*----------------------------------------------------------------------------*/
+double zzlStrtod(unsigned char *vstr, unsigned int vlen) {
+ char buf[128];
+ if (vlen > sizeof(buf))
+ vlen = sizeof(buf);
+ memcpy(buf,vstr,vlen);
+ buf[vlen] = '\0';
+ return strtod(buf,NULL);
+ }
+
double zzlGetScore(unsigned char *sptr) {
unsigned char *vstr;
unsigned int vlen;
long long vlong;
- char buf[128];
double score;
serverAssert(sptr != NULL);
serverAssert(ziplistGet(sptr,&vstr,&vlen,&vlong));
if (vstr) {
- memcpy(buf,vstr,vlen);
- buf[vlen] = '\0';
- score = strtod(buf,NULL);
+ score = zzlStrtod(vstr,vlen);
} else {
score = vlong;
}
@@ -1653,6 +1659,48 @@ int zsetZiplistValidateIntegrity(unsigned char *zl, size_t size, int deep) {
return ret;
}
+/* Create a new sds string from the ziplist entry. */
+sds zsetSdsFromZiplistEntry(ziplistEntry *e) {
+ return e->sval ? sdsnewlen(e->sval, e->slen) : sdsfromlonglong(e->lval);
+}
+
+/* Reply with bulk string from the ziplist entry. */
+void zsetReplyFromZiplistEntry(client *c, ziplistEntry *e) {
+ if (e->sval)
+ addReplyBulkCBuffer(c, e->sval, e->slen);
+ else
+ addReplyBulkLongLong(c, e->lval);
+}
+
+
+/* Return random element from a non empty zset.
+ * 'key' and 'val' will be set to hold the element.
+ * The memory in `key` is not to be freed or modified by the caller.
+ * 'score' can be NULL in which case it's not extracted. */
+void zsetTypeRandomElement(robj *zsetobj, unsigned long zsetsize, ziplistEntry *key, double *score) {
+ if (zsetobj->encoding == OBJ_ENCODING_SKIPLIST) {
+ zset *zs = zsetobj->ptr;
+ dictEntry *de = dictGetFairRandomKey(zs->dict);
+ sds s = dictGetKey(de);
+ key->sval = (unsigned char*)s;
+ key->slen = sdslen(s);
+ if (score)
+ *score = *(double*)dictGetVal(de);
+ } else if (zsetobj->encoding == OBJ_ENCODING_ZIPLIST) {
+ ziplistEntry val;
+ ziplistRandomPair(zsetobj->ptr, zsetsize, key, &val);
+ if (score) {
+ if (val.sval) {
+ *score = zzlStrtod(val.sval,val.slen);
+ } else {
+ *score = (double)val.lval;
+ }
+ }
+ } else {
+ serverPanic("Unknown zset encoding");
+ }
+}
+
/*-----------------------------------------------------------------------------
* Sorted set commands
*----------------------------------------------------------------------------*/
@@ -2543,7 +2591,9 @@ void zunionInterDiffGenericCommand(client *c, robj *dstkey, int numkeysIndex, in
/* read keys to be used for input */
src = zcalloc(sizeof(zsetopsrc) * setnum);
for (i = 0, j = numkeysIndex+1; i < setnum; i++, j++) {
- robj *obj = lookupKeyWrite(c->db,c->argv[j]);
+ robj *obj = dstkey ?
+ lookupKeyWrite(c->db,c->argv[j]) :
+ lookupKeyRead(c->db,c->argv[j]);
if (obj != NULL) {
if (obj->type != OBJ_ZSET && obj->type != OBJ_SET) {
zfree(src);
@@ -2749,6 +2799,9 @@ void zunionInterDiffGenericCommand(client *c, robj *dstkey, int numkeysIndex, in
unsigned long length = dstzset->zsl->length;
zskiplist *zsl = dstzset->zsl;
zskiplistNode *zn = zsl->header->level[0].forward;
+ /* In case of WITHSCORES, respond with a single array in RESP2, and
+ * nested arrays in RESP3. We can't use a map response type since the
+ * client library needs to know to respect the order. */
if (withscores && c->resp == 2)
addReplyArrayLen(c, length*2);
else
@@ -2866,6 +2919,9 @@ static void zrangeResultEmitLongLongToClient(zrange_result_handler *handler,
static void zrangeResultFinalizeClient(zrange_result_handler *handler,
size_t result_count)
{
+ /* In case of WITHSCORES, respond with a single array in RESP2, and
+ * nested arrays in RESP3. We can't use a map response type since the
+ * client library needs to know to respect the order. */
if (handler->withscores && (handler->client->resp == 2)) {
result_count *= 2;
}
@@ -3071,8 +3127,8 @@ void zrevrangeCommand(client *c) {
/* This command implements ZRANGEBYSCORE, ZREVRANGEBYSCORE. */
void genericZrangebyscoreCommand(zrange_result_handler *handler,
- zrangespec *range, robj *zobj, int withscores, long offset,
- long limit, int reverse) {
+ zrangespec *range, robj *zobj, long offset, long limit,
+ int reverse) {
client *c = handler->client;
unsigned long rangelen = 0;
@@ -3172,8 +3228,7 @@ void genericZrangebyscoreCommand(zrange_result_handler *handler,
}
rangelen++;
- handler->emitResultFromCBuffer(handler, ln->ele, sdslen(ln->ele),
- ((withscores) ? ln->score : ln->score));
+ handler->emitResultFromCBuffer(handler, ln->ele, sdslen(ln->ele), ln->score);
/* Move to next node */
if (reverse) {
@@ -3605,11 +3660,16 @@ void zrangeGenericCommand(zrange_result_handler *handler, int argc_start, int st
}
/* Step 3: Lookup the key and get the range. */
- if (((zobj = lookupKeyReadOrReply(c, key, shared.emptyarray)) == NULL)
- || checkType(c, zobj, OBJ_ZSET)) {
+ zobj = handler->dstkey ?
+ lookupKeyWrite(c->db,key) :
+ lookupKeyRead(c->db,key);
+ if (zobj == NULL) {
+ addReply(c,shared.emptyarray);
goto cleanup;
}
+ if (checkType(c,zobj,OBJ_ZSET)) goto cleanup;
+
/* Step 4: Pass this to the command-specific handler. */
switch (rangetype) {
case ZRANGE_AUTO:
@@ -3619,8 +3679,8 @@ void zrangeGenericCommand(zrange_result_handler *handler, int argc_start, int st
break;
case ZRANGE_SCORE:
- genericZrangebyscoreCommand(handler, &range, zobj, opt_withscores || store,
- opt_offset, opt_limit, direction == ZRANGE_DIRECTION_REVERSE);
+ genericZrangebyscoreCommand(handler, &range, zobj, opt_offset,
+ opt_limit, direction == ZRANGE_DIRECTION_REVERSE);
break;
case ZRANGE_LEX:
@@ -3895,3 +3955,216 @@ void bzpopminCommand(client *c) {
void bzpopmaxCommand(client *c) {
blockingGenericZpopCommand(c,ZSET_MAX);
}
+
+/* How many times bigger should be the zset compared to the requested size
+ * for us to not use the "remove elements" strategy? Read later in the
+ * implementation for more info. */
+#define ZRANDMEMBER_SUB_STRATEGY_MUL 3
+
+void zrandmemberWithCountCommand(client *c, long l, int withscores) {
+ unsigned long count, size;
+ int uniq = 1;
+ robj *zsetobj;
+
+ if ((zsetobj = lookupKeyReadOrReply(c, c->argv[1], shared.null[c->resp]))
+ == NULL || checkType(c, zsetobj, OBJ_ZSET)) return;
+ size = zsetLength(zsetobj);
+
+ if(l >= 0) {
+ count = (unsigned long) l;
+ } else {
+ count = -l;
+ uniq = 0;
+ }
+
+ /* If count is zero, serve it ASAP to avoid special cases later. */
+ if (count == 0) {
+ addReply(c,shared.emptyarray);
+ return;
+ }
+
+ /* CASE 1: The count was negative, so the extraction method is just:
+ * "return N random elements" sampling the whole set every time.
+ * This case is trivial and can be served without auxiliary data
+ * structures. This case is the only one that also needs to return the
+ * elements in random order. */
+ if (!uniq || count == 1) {
+ if (withscores && c->resp == 2)
+ addReplyArrayLen(c, count*2);
+ else
+ addReplyArrayLen(c, count);
+ if (zsetobj->encoding == OBJ_ENCODING_SKIPLIST) {
+ zset *zs = zsetobj->ptr;
+ while (count--) {
+ dictEntry *de = dictGetFairRandomKey(zs->dict);
+ sds key = dictGetKey(de);
+ if (withscores && c->resp > 2)
+ addReplyArrayLen(c,2);
+ addReplyBulkCBuffer(c, key, sdslen(key));
+ if (withscores)
+ addReplyDouble(c, dictGetDoubleVal(de));
+ }
+ } else if (zsetobj->encoding == OBJ_ENCODING_ZIPLIST) {
+ ziplistEntry *keys, *vals = NULL;
+ keys = zmalloc(sizeof(ziplistEntry)*count);
+ if (withscores)
+ vals = zmalloc(sizeof(ziplistEntry)*count);
+ ziplistRandomPairs(zsetobj->ptr, count, keys, vals);
+ for (unsigned long i = 0; i < count; i++) {
+ if (withscores && c->resp > 2)
+ addReplyArrayLen(c,2);
+ if (keys[i].sval)
+ addReplyBulkCBuffer(c, keys[i].sval, keys[i].slen);
+ else
+ addReplyBulkLongLong(c, keys[i].lval);
+ if (withscores) {
+ if (vals[i].sval) {
+ addReplyDouble(c, zzlStrtod(vals[i].sval,vals[i].slen));
+ } else
+ addReplyDouble(c, vals[i].lval);
+ }
+ }
+ zfree(keys);
+ zfree(vals);
+ }
+ return;
+ }
+
+ zsetopsrc src;
+ zsetopval zval;
+ src.subject = zsetobj;
+ src.type = zsetobj->type;
+ src.encoding = zsetobj->encoding;
+ zuiInitIterator(&src);
+ memset(&zval, 0, sizeof(zval));
+
+ /* Initiate reply count, RESP3 responds with nested array, RESP2 with flat one. */
+ long reply_size = count < size ? count : size;
+ if (withscores && c->resp == 2)
+ addReplyArrayLen(c, reply_size*2);
+ else
+ addReplyArrayLen(c, reply_size);
+
+ /* CASE 2:
+ * The number of requested elements is greater than the number of
+ * elements inside the zset: simply return the whole zset. */
+ if (count >= size) {
+ while (zuiNext(&src, &zval)) {
+ if (withscores && c->resp > 2)
+ addReplyArrayLen(c,2);
+ addReplyBulkSds(c, zuiNewSdsFromValue(&zval));
+ if (withscores)
+ addReplyDouble(c, zval.score);
+ }
+ return;
+ }
+
+ /* CASE 3:
+ * The number of elements inside the zset is not greater than
+ * ZRANDMEMBER_SUB_STRATEGY_MUL times the number of requested elements.
+ * In this case we create a dict from scratch with all the elements, and
+ * subtract random elements to reach the requested number of elements.
+ *
+ * This is done because if the number of requested elements is just
+ * a bit less than the number of elements in the set, the natural approach
+ * used into CASE 4 is highly inefficient. */
+ if (count*ZRANDMEMBER_SUB_STRATEGY_MUL > size) {
+ dict *d = dictCreate(&sdsReplyDictType, NULL);
+ /* Add all the elements into the temporary dictionary. */
+ while (zuiNext(&src, &zval)) {
+ sds key = zuiNewSdsFromValue(&zval);
+ dictEntry *de = dictAddRaw(d, key, NULL);
+ serverAssert(de);
+ if (withscores)
+ dictSetDoubleVal(de, zval.score);
+ }
+ serverAssert(dictSize(d) == size);
+
+ /* Remove random elements to reach the right count. */
+ while (size > count) {
+ dictEntry *de;
+ de = dictGetRandomKey(d);
+ dictUnlink(d,dictGetKey(de));
+ sdsfree(dictGetKey(de));
+ dictFreeUnlinkedEntry(d,de);
+ size--;
+ }
+
+ /* Reply with what's in the dict and release memory */
+ dictIterator *di;
+ dictEntry *de;
+ di = dictGetIterator(d);
+ while ((de = dictNext(di)) != NULL) {
+ if (withscores && c->resp > 2)
+ addReplyArrayLen(c,2);
+ addReplyBulkSds(c, dictGetKey(de));
+ if (withscores)
+ addReplyDouble(c, dictGetDoubleVal(de));
+ }
+
+ dictReleaseIterator(di);
+ dictRelease(d);
+ }
+
+ /* CASE 4: We have a big zset compared to the requested number of elements.
+ * In this case we can simply get random elements from the zset and add
+ * to the temporary set, trying to eventually get enough unique elements
+ * to reach the specified count. */
+ else {
+ unsigned long added = 0;
+ dict *d = dictCreate(&hashDictType, NULL);
+
+ while (added < count) {
+ ziplistEntry key;
+ double score;
+ zsetTypeRandomElement(zsetobj, size, &key, withscores ? &score: NULL);
+
+ /* Try to add the object to the dictionary. If it already exists
+ * free it, otherwise increment the number of objects we have
+ * in the result dictionary. */
+ sds skey = zsetSdsFromZiplistEntry(&key);
+ if (dictAdd(d,skey,NULL) != DICT_OK) {
+ sdsfree(skey);
+ continue;
+ }
+ added++;
+
+ if (withscores && c->resp > 2)
+ addReplyArrayLen(c,2);
+ zsetReplyFromZiplistEntry(c, &key);
+ if (withscores)
+ addReplyDouble(c, score);
+ }
+
+ /* Release memory */
+ dictRelease(d);
+ }
+}
+
+/* ZRANDMEMBER [<count> WITHSCORES] */
+void zrandmemberCommand(client *c) {
+ long l;
+ int withscores = 0;
+ robj *zset;
+ ziplistEntry ele;
+
+ if (c->argc >= 3) {
+ if (getLongFromObjectOrReply(c,c->argv[2],&l,NULL) != C_OK) return;
+ if (c->argc > 4 || (c->argc == 4 && strcasecmp(c->argv[3]->ptr,"withscores"))) {
+ addReplyErrorObject(c,shared.syntaxerr);
+ return;
+ } else if (c->argc == 4)
+ withscores = 1;
+ zrandmemberWithCountCommand(c, l, withscores);
+ return;
+ }
+
+ /* Handle variant without <count> argument. Reply with simple bulk string */
+ if ((zset = lookupKeyReadOrReply(c,c->argv[1],shared.null[c->resp]))== NULL ||
+ checkType(c,zset,OBJ_ZSET)) {
+ return;
+ }
+
+ zsetTypeRandomElement(zset, zsetLength(zset), &ele,NULL);
+ zsetReplyFromZiplistEntry(c,&ele);
+}
diff --git a/src/util.c b/src/util.c
index eca212e57..3243fa51e 100644
--- a/src/util.c
+++ b/src/util.c
@@ -749,7 +749,7 @@ sds getAbsolutePath(char *filename) {
* Gets the proper timezone in a more portable fashion
* i.e timezone variables are linux specific.
*/
-unsigned long getTimeZone(void) {
+long getTimeZone(void) {
#if defined(__linux__) || defined(__sun)
return timezone;
#else
@@ -758,7 +758,7 @@ unsigned long getTimeZone(void) {
gettimeofday(&tv, &tz);
- return tz.tz_minuteswest * 60UL;
+ return tz.tz_minuteswest * 60L;
#endif
}
diff --git a/src/util.h b/src/util.h
index e9ad0ee4d..feaa82924 100644
--- a/src/util.h
+++ b/src/util.h
@@ -60,7 +60,7 @@ int string2d(const char *s, size_t slen, double *dp);
int d2string(char *buf, size_t len, double value);
int ld2string(char *buf, size_t len, long double value, ld2string_mode mode);
sds getAbsolutePath(char *filename);
-unsigned long getTimeZone(void);
+long getTimeZone(void);
int pathIsBaseName(char *path);
#ifdef REDIS_TEST
diff --git a/src/version.h b/src/version.h
index d408dd3e8..1c8c1f2a6 100644
--- a/src/version.h
+++ b/src/version.h
@@ -1,2 +1,2 @@
-#define REDIS_VERSION "6.1.241"
-#define REDIS_VERSION_NUM 0x000601f1
+#define REDIS_VERSION "6.1.242"
+#define REDIS_VERSION_NUM 0x000601f2
diff --git a/src/ziplist.c b/src/ziplist.c
index a4f38c5e8..0cd20630a 100644
--- a/src/ziplist.c
+++ b/src/ziplist.c
@@ -1498,6 +1498,89 @@ int ziplistValidateIntegrity(unsigned char *zl, size_t size, int deep,
return 1;
}
+/* Randomly select a pair of key and value.
+ * total_count is a pre-computed length/2 of the ziplist (to avoid calls to ziplistLen)
+ * 'key' and 'val' are used to store the result key value pair.
+ * 'val' can be NULL if the value is not needed. */
+void ziplistRandomPair(unsigned char *zl, unsigned long total_count, ziplistEntry *key, ziplistEntry *val) {
+ int ret;
+ unsigned char *p;
+
+ /* Avoid div by zero on corrupt ziplist */
+ assert(total_count);
+
+ /* Generate even numbers, because ziplist saved K-V pair */
+ int r = (rand() % total_count) * 2;
+ p = ziplistIndex(zl, r);
+ ret = ziplistGet(p, &key->sval, &key->slen, &key->lval);
+ assert(ret != 0);
+
+ if (!val)
+ return;
+ p = ziplistNext(zl, p);
+ ret = ziplistGet(p, &val->sval, &val->slen, &val->lval);
+ assert(ret != 0);
+}
+
+/* int compare for qsort */
+int intCompare(const void *a, const void *b) {
+ return (*(int *) a - *(int *) b);
+}
+
+/* Helper method to store a string into from val or lval into dest */
+static inline void ziplistSaveValue(unsigned char *val, unsigned int len, long long lval, ziplistEntry *dest) {
+ dest->sval = val;
+ dest->slen = len;
+ dest->lval = lval;
+}
+
+/* Randomly select unique count of key value pairs and store into 'keys' and
+ * 'vals' args. The order of the picked entries is random.
+ * The 'vals' arg can be NULL in which case we skip these. */
+void ziplistRandomPairs(unsigned char *zl, int count, ziplistEntry *keys, ziplistEntry *vals) {
+ unsigned char *p, *key, *value;
+ unsigned int klen, vlen;
+ long long klval, vlval;
+ typedef struct {
+ int index;
+ int order;
+ } rand_pick;
+ rand_pick *picks = zmalloc(sizeof(rand_pick)*count);
+ unsigned long total_size = ziplistLen(zl)/2;
+
+ /* Avoid div by zero on corrupt ziplist */
+ assert(total_size);
+
+ /* create a pool of random indexes (some may be duplicate). */
+ for (int i = 0; i < count; i++) {
+ picks[i].index = (rand() % total_size) * 2; /* Generate even indexes */
+ /* keep track of the order we picked them */
+ picks[i].order = i;
+ }
+
+ /* sort by indexes. */
+ qsort(picks, count, sizeof(rand_pick), intCompare);
+
+ /* fetch the elements form the ziplist into a output array respecting the original order. */
+ int zipindex = 0, pickindex = 0;
+ p = ziplistIndex(zl, 0);
+ while (ziplistGet(p, &key, &klen, &klval) && pickindex < count) {
+ p = ziplistNext(zl, p);
+ ziplistGet(p, &value, &vlen, &vlval);
+ while (pickindex < count && zipindex == picks[pickindex].index) {
+ int storeorder = picks[pickindex].order;
+ ziplistSaveValue(key, klen, klval, &keys[storeorder]);
+ if (vals)
+ ziplistSaveValue(value, vlen, vlval, &vals[storeorder]);
+ pickindex++;
+ }
+ zipindex += 2;
+ p = ziplistNext(zl, p);
+ }
+
+ zfree(picks);
+}
+
#ifdef REDIS_TEST
#include <sys/time.h>
#include "adlist.h"
diff --git a/src/ziplist.h b/src/ziplist.h
index 5153951dc..5fb8fd46a 100644
--- a/src/ziplist.h
+++ b/src/ziplist.h
@@ -34,6 +34,15 @@
#define ZIPLIST_HEAD 0
#define ZIPLIST_TAIL 1
+/* Each entry in the ziplist is either a string or an integer. */
+typedef struct {
+ /* When string is used, it is provided with the length (slen). */
+ unsigned char *sval;
+ unsigned int slen;
+ /* When integer is used, 'sval' is NULL, and lval holds the value. */
+ long long lval;
+} ziplistEntry;
+
unsigned char *ziplistNew(void);
unsigned char *ziplistMerge(unsigned char **first, unsigned char **second);
unsigned char *ziplistPush(unsigned char *zl, unsigned char *s, unsigned int slen, int where);
@@ -52,6 +61,8 @@ void ziplistRepr(unsigned char *zl);
typedef int (*ziplistValidateEntryCB)(unsigned char* p, void* userdata);
int ziplistValidateIntegrity(unsigned char *zl, size_t size, int deep,
ziplistValidateEntryCB entry_cb, void *cb_userdata);
+void ziplistRandomPair(unsigned char *zl, unsigned long total_count, ziplistEntry *key, ziplistEntry *val);
+void ziplistRandomPairs(unsigned char *zl, int count, ziplistEntry *keys, ziplistEntry *vals);
#ifdef REDIS_TEST
int ziplistTest(int argc, char *argv[]);
diff --git a/tests/cluster/tests/18-cluster-nodes-slots.tcl b/tests/cluster/tests/18-cluster-nodes-slots.tcl
new file mode 100644
index 000000000..ca0b3ce0d
--- /dev/null
+++ b/tests/cluster/tests/18-cluster-nodes-slots.tcl
@@ -0,0 +1,62 @@
+# Optimize CLUSTER NODES command by generating all nodes slot topology firstly
+
+source "../tests/includes/init-tests.tcl"
+
+proc cluster_allocate_with_continuous_slots {n} {
+ set slot 16383
+ set avg [expr ($slot+1) / $n]
+ while {$slot >= 0} {
+ set node [expr $slot/$avg >= $n ? $n-1 : $slot/$avg]
+ lappend slots_$node $slot
+ incr slot -1
+ }
+ for {set j 0} {$j < $n} {incr j} {
+ R $j cluster addslots {*}[set slots_${j}]
+ }
+}
+
+proc cluster_create_with_continuous_slots {masters slaves} {
+ cluster_allocate_with_continuous_slots $masters
+ if {$slaves} {
+ cluster_allocate_slaves $masters $slaves
+ }
+ assert_cluster_state ok
+}
+
+test "Create a 2 nodes cluster" {
+ cluster_create_with_continuous_slots 2 2
+}
+
+test "Cluster should start ok" {
+ assert_cluster_state ok
+}
+
+set master1 [Rn 0]
+set master2 [Rn 1]
+
+test "Continuous slots distribution" {
+ assert_match "* 0-8191*" [$master1 CLUSTER NODES]
+ assert_match "* 8192-16383*" [$master2 CLUSTER NODES]
+
+ $master1 CLUSTER DELSLOTS 4096
+ assert_match "* 0-4095 4097-8191*" [$master1 CLUSTER NODES]
+
+ $master2 CLUSTER DELSLOTS 12288
+ assert_match "* 8192-12287 12289-16383*" [$master2 CLUSTER NODES]
+}
+
+test "Discontinuous slots distribution" {
+ # Remove middle slots
+ $master1 CLUSTER DELSLOTS 4092 4094
+ assert_match "* 0-4091 4093 4095 4097-8191*" [$master1 CLUSTER NODES]
+ $master2 CLUSTER DELSLOTS 12284 12286
+ assert_match "* 8192-12283 12285 12287 12289-16383*" [$master2 CLUSTER NODES]
+
+ # Remove head slots
+ $master1 CLUSTER DELSLOTS 0 2
+ assert_match "* 1 3-4091 4093 4095 4097-8191*" [$master1 CLUSTER NODES]
+
+ # Remove tail slots
+ $master2 CLUSTER DELSLOTS 16380 16382 16383
+ assert_match "* 8192-12283 12285 12287 12289-16379 16381*" [$master2 CLUSTER NODES]
+}
diff --git a/tests/instances.tcl b/tests/instances.tcl
index a9cc01008..8cb616ae8 100644
--- a/tests/instances.tcl
+++ b/tests/instances.tcl
@@ -24,9 +24,11 @@ set ::simulate_error 0
set ::failed 0
set ::sentinel_instances {}
set ::redis_instances {}
+set ::global_config {}
set ::sentinel_base_port 20000
set ::redis_base_port 30000
set ::redis_port_count 1024
+set ::host "127.0.0.1"
set ::pids {} ; # We kill everything at exit
set ::dirs {} ; # We remove all the temp dirs at exit
set ::run_matching {} ; # If non empty, only tests matching pattern are run.
@@ -58,10 +60,9 @@ proc exec_instance {type dirname cfgfile} {
}
# Spawn a redis or sentinel instance, depending on 'type'.
-proc spawn_instance {type base_port count {conf {}}} {
+proc spawn_instance {type base_port count {conf {}} {base_conf_file ""}} {
for {set j 0} {$j < $count} {incr j} {
set port [find_available_port $base_port $::redis_port_count]
-
# Create a directory for this instance.
set dirname "${type}_${j}"
lappend ::dirs $dirname
@@ -70,7 +71,13 @@ proc spawn_instance {type base_port count {conf {}}} {
# Write the instance config file.
set cfgfile [file join $dirname $type.conf]
- set cfg [open $cfgfile w]
+ if {$base_conf_file ne ""} {
+ file copy -- $base_conf_file $cfgfile
+ set cfg [open $cfgfile a+]
+ } else {
+ set cfg [open $cfgfile w]
+ }
+
if {$::tls} {
puts $cfg "tls-port $port"
puts $cfg "tls-replication yes"
@@ -92,6 +99,9 @@ proc spawn_instance {type base_port count {conf {}}} {
foreach directive $conf {
puts $cfg $directive
}
+ dict for {name val} $::global_config {
+ puts $cfg "$name $val"
+ }
close $cfg
# Finally exec it and remember the pid for later cleanup.
@@ -119,18 +129,18 @@ proc spawn_instance {type base_port count {conf {}}} {
}
# Check availability finally
- if {[server_is_up 127.0.0.1 $port 100] == 0} {
+ if {[server_is_up $::host $port 100] == 0} {
set logfile [file join $dirname log.txt]
puts [exec tail $logfile]
abort_sentinel_test "Problems starting $type #$j: ping timeout, maybe server start failed, check $logfile"
}
# Push the instance into the right list
- set link [redis 127.0.0.1 $port 0 $::tls]
+ set link [redis $::host $port 0 $::tls]
$link reconnect 1
lappend ::${type}_instances [list \
pid $pid \
- host 127.0.0.1 \
+ host $::host \
port $port \
link $link \
]
@@ -232,6 +242,9 @@ proc parse_options {} {
set ::simulate_error 1
} elseif {$opt eq {--valgrind}} {
set ::valgrind 1
+ } elseif {$opt eq {--host}} {
+ incr j
+ set ::host ${val}
} elseif {$opt eq {--tls}} {
package require tls 1.6
::tls::init \
@@ -239,6 +252,10 @@ proc parse_options {} {
-certfile "$::tlsdir/client.crt" \
-keyfile "$::tlsdir/client.key"
set ::tls 1
+ } elseif {$opt eq {--config}} {
+ set val2 [lindex $::argv [expr $j+2]]
+ dict set ::global_config $val $val2
+ incr j 2
} elseif {$opt eq "--help"} {
puts "--single <pattern> Only runs tests specified by pattern."
puts "--dont-clean Keep log files on exit."
@@ -246,6 +263,8 @@ proc parse_options {} {
puts "--fail Simulate a test failure."
puts "--valgrind Run with valgrind."
puts "--tls Run tests in TLS mode."
+ puts "--host <host> Use hostname instead of 127.0.0.1."
+ puts "--config <k> <v> Extra config argument(s)."
puts "--help Shows this help."
exit 0
} else {
@@ -391,6 +410,11 @@ proc check_leaks instance_types {
# Execute all the units inside the 'tests' directory.
proc run_tests {} {
+ set sentinel_fd_leaks_file "sentinel_fd_leaks"
+ if { [file exists $sentinel_fd_leaks_file] } {
+ file delete $sentinel_fd_leaks_file
+ }
+
set tests [lsort [glob ../tests/*]]
foreach test $tests {
if {$::run_matching ne {} && [string match $::run_matching $test] == 0} {
@@ -405,7 +429,15 @@ proc run_tests {} {
# Print a message and exists with 0 / 1 according to zero or more failures.
proc end_tests {} {
- if {$::failed == 0} {
+ set sentinel_fd_leaks_file "sentinel_fd_leaks"
+ if { [file exists $sentinel_fd_leaks_file] } {
+ # temporarily disabling this error from failing the tests until leaks are fixed.
+ #puts [colorstr red "WARNING: sentinel test(s) failed, there are leaked fds in sentinel:"]
+ #puts [exec cat $sentinel_fd_leaks_file]
+ #exit 1
+ }
+
+ if {$::failed == 0 } {
puts "GOOD! No errors."
exit 0
} else {
diff --git a/tests/integration/aof.tcl b/tests/integration/aof.tcl
index d81521374..e64e2022a 100644
--- a/tests/integration/aof.tcl
+++ b/tests/integration/aof.tcl
@@ -272,4 +272,15 @@ tags {"aof"} {
}
}
}
+
+ start_server {overrides {appendonly {yes} appendfilename {appendonly.aof}}} {
+ test {GETEX should not append to AOF} {
+ set aof [file join [lindex [r config get dir] 1] appendonly.aof]
+ r set foo bar
+ set before [file size $aof]
+ r getex foo
+ set after [file size $aof]
+ assert_equal $before $after
+ }
+ }
}
diff --git a/tests/integration/corrupt-dump.tcl b/tests/integration/corrupt-dump.tcl
index cc597bb4d..f5079e5ed 100644
--- a/tests/integration/corrupt-dump.tcl
+++ b/tests/integration/corrupt-dump.tcl
@@ -507,5 +507,16 @@ test {corrupt payload: fuzzer findings - valgrind invalid read} {
}
}
+test {corrupt payload: fuzzer findings - HRANDFIELD on bad ziplist} {
+ start_server [list overrides [list loglevel verbose use-exit-on-panic yes crash-memcheck-enabled no] ] {
+ r config set sanitize-dump-payload yes
+ r debug set-skip-checksum-validation 1
+ r RESTORE _int 0 "\x04\xC0\x01\x09\x00\xF6\x8A\xB6\x7A\x85\x87\x72\x4D"
+ catch {r HRANDFIELD _int}
+ assert_equal [count_log_message 0 "crashed by signal"] 0
+ assert_equal [count_log_message 0 "ASSERTION FAILED"] 1
+ }
+}
+
} ;# tags
diff --git a/tests/integration/failover.tcl b/tests/integration/failover.tcl
new file mode 100644
index 000000000..c6818700d
--- /dev/null
+++ b/tests/integration/failover.tcl
@@ -0,0 +1,290 @@
+start_server {tags {"failover"}} {
+start_server {} {
+start_server {} {
+ set node_0 [srv 0 client]
+ set node_0_host [srv 0 host]
+ set node_0_port [srv 0 port]
+ set node_0_pid [srv 0 pid]
+
+ set node_1 [srv -1 client]
+ set node_1_host [srv -1 host]
+ set node_1_port [srv -1 port]
+ set node_1_pid [srv -1 pid]
+
+ set node_2 [srv -2 client]
+ set node_2_host [srv -2 host]
+ set node_2_port [srv -2 port]
+ set node_2_pid [srv -2 pid]
+
+ proc assert_digests_match {n1 n2 n3} {
+ assert_equal [$n1 debug digest] [$n2 debug digest]
+ assert_equal [$n2 debug digest] [$n3 debug digest]
+ }
+
+ test {failover command fails without connected replica} {
+ catch { $node_0 failover to $node_1_host $node_1_port } err
+ if {! [string match "ERR*" $err]} {
+ fail "failover command succeeded when replica not connected"
+ }
+ }
+
+ test {setup replication for following tests} {
+ $node_1 replicaof $node_0_host $node_0_port
+ $node_2 replicaof $node_0_host $node_0_port
+ wait_for_sync $node_1
+ wait_for_sync $node_2
+ }
+
+ test {failover command fails with invalid host} {
+ catch { $node_0 failover to invalidhost $node_1_port } err
+ assert_match "ERR*" $err
+ }
+
+ test {failover command fails with invalid port} {
+ catch { $node_0 failover to $node_1_host invalidport } err
+ assert_match "ERR*" $err
+ }
+
+ test {failover command fails with just force and timeout} {
+ catch { $node_0 FAILOVER FORCE TIMEOUT 100} err
+ assert_match "ERR*" $err
+ }
+
+ test {failover command fails when sent to a replica} {
+ catch { $node_1 failover to $node_1_host $node_1_port } err
+ assert_match "ERR*" $err
+ }
+
+ test {failover command fails with force without timeout} {
+ catch { $node_0 failover to $node_1_host $node_1_port FORCE } err
+ assert_match "ERR*" $err
+ }
+
+ test {failover command to specific replica works} {
+ set initial_psyncs [s -1 sync_partial_ok]
+ set initial_syncs [s -1 sync_full]
+
+ # Generate a delta between primary and replica
+ set load_handler [start_write_load $node_0_host $node_0_port 5]
+ exec kill -SIGSTOP [srv -1 pid]
+ wait_for_condition 50 100 {
+ [s 0 total_commands_processed] > 100
+ } else {
+ fail "Node 0 did not accept writes"
+ }
+ exec kill -SIGCONT [srv -1 pid]
+
+ # Execute the failover
+ $node_0 failover to $node_1_host $node_1_port
+
+ # Wait for failover to end
+ wait_for_condition 50 100 {
+ [s 0 master_failover_state] == "no-failover"
+ } else {
+ fail "Failover from node 0 to node 1 did not finish"
+ }
+ stop_write_load $load_handler
+ $node_2 replicaof $node_1_host $node_1_port
+ wait_for_sync $node_0
+ wait_for_sync $node_2
+
+ assert_match *slave* [$node_0 role]
+ assert_match *master* [$node_1 role]
+ assert_match *slave* [$node_2 role]
+
+ # We should accept psyncs from both nodes
+ assert_equal [expr [s -1 sync_partial_ok] - $initial_psyncs] 2
+ assert_equal [expr [s -1 sync_full] - $initial_psyncs] 0
+ assert_digests_match $node_0 $node_1 $node_2
+ }
+
+ test {failover command to any replica works} {
+ set initial_psyncs [s -2 sync_partial_ok]
+ set initial_syncs [s -2 sync_full]
+
+ wait_for_ofs_sync $node_1 $node_2
+ # We stop node 0 to and make sure node 2 is selected
+ exec kill -SIGSTOP $node_0_pid
+ $node_1 set CASE 1
+ $node_1 FAILOVER
+
+ # Wait for failover to end
+ wait_for_condition 50 100 {
+ [s -1 master_failover_state] == "no-failover"
+ } else {
+ fail "Failover from node 1 to node 2 did not finish"
+ }
+ exec kill -SIGCONT $node_0_pid
+ $node_0 replicaof $node_2_host $node_2_port
+
+ wait_for_sync $node_0
+ wait_for_sync $node_1
+
+ assert_match *slave* [$node_0 role]
+ assert_match *slave* [$node_1 role]
+ assert_match *master* [$node_2 role]
+
+ # We should accept Psyncs from both nodes
+ assert_equal [expr [s -2 sync_partial_ok] - $initial_psyncs] 2
+ assert_equal [expr [s -1 sync_full] - $initial_psyncs] 0
+ assert_digests_match $node_0 $node_1 $node_2
+ }
+
+ test {failover to a replica with force works} {
+ set initial_psyncs [s 0 sync_partial_ok]
+ set initial_syncs [s 0 sync_full]
+
+ exec kill -SIGSTOP $node_0_pid
+ # node 0 will never acknowledge this write
+ $node_2 set case 2
+ $node_2 failover to $node_0_host $node_0_port TIMEOUT 100 FORCE
+
+ # Wait for node 0 to give up on sync attempt and start failover
+ wait_for_condition 50 100 {
+ [s -2 master_failover_state] == "failover-in-progress"
+ } else {
+ fail "Failover from node 2 to node 0 did not timeout"
+ }
+
+ # Quick check that everyone is a replica, we never want a
+ # state where there are two masters.
+ assert_match *slave* [$node_1 role]
+ assert_match *slave* [$node_2 role]
+
+ exec kill -SIGCONT $node_0_pid
+
+ # Wait for failover to end
+ wait_for_condition 50 100 {
+ [s -2 master_failover_state] == "no-failover"
+ } else {
+ fail "Failover from node 2 to node 0 did not finish"
+ }
+ $node_1 replicaof $node_0_host $node_0_port
+
+ wait_for_sync $node_1
+ wait_for_sync $node_2
+
+ assert_match *master* [$node_0 role]
+ assert_match *slave* [$node_1 role]
+ assert_match *slave* [$node_2 role]
+
+ assert_equal [count_log_message -2 "time out exceeded, failing over."] 1
+
+ # We should accept both psyncs, although this is the condition we might not
+ # since we didn't catch up.
+ assert_equal [expr [s 0 sync_partial_ok] - $initial_psyncs] 2
+ assert_equal [expr [s 0 sync_full] - $initial_syncs] 0
+ assert_digests_match $node_0 $node_1 $node_2
+ }
+
+ test {failover with timeout aborts if replica never catches up} {
+ set initial_psyncs [s 0 sync_partial_ok]
+ set initial_syncs [s 0 sync_full]
+
+ # Stop replica so it never catches up
+ exec kill -SIGSTOP [srv -1 pid]
+ $node_0 SET CASE 1
+
+ $node_0 failover to [srv -1 host] [srv -1 port] TIMEOUT 500
+ # Wait for failover to end
+ wait_for_condition 50 20 {
+ [s 0 master_failover_state] == "no-failover"
+ } else {
+ fail "Failover from node_0 to replica did not finish"
+ }
+
+ exec kill -SIGCONT [srv -1 pid]
+
+ # We need to make sure the nodes actually sync back up
+ wait_for_ofs_sync $node_0 $node_1
+ wait_for_ofs_sync $node_0 $node_2
+
+ assert_match *master* [$node_0 role]
+ assert_match *slave* [$node_1 role]
+ assert_match *slave* [$node_2 role]
+
+ # Since we never caught up, there should be no syncs
+ assert_equal [expr [s 0 sync_partial_ok] - $initial_psyncs] 0
+ assert_equal [expr [s 0 sync_full] - $initial_syncs] 0
+ assert_digests_match $node_0 $node_1 $node_2
+ }
+
+ test {failovers can be aborted} {
+ set initial_psyncs [s 0 sync_partial_ok]
+ set initial_syncs [s 0 sync_full]
+
+ # Stop replica so it never catches up
+ exec kill -SIGSTOP [srv -1 pid]
+ $node_0 SET CASE 2
+
+ $node_0 failover to [srv -1 host] [srv -1 port] TIMEOUT 60000
+ assert_match [s 0 master_failover_state] "waiting-for-sync"
+
+ # Sanity check that read commands are still accepted
+ $node_0 GET CASE
+
+ $node_0 failover abort
+ assert_match [s 0 master_failover_state] "no-failover"
+
+ exec kill -SIGCONT [srv -1 pid]
+
+ # Just make sure everything is still synced
+ wait_for_ofs_sync $node_0 $node_1
+ wait_for_ofs_sync $node_0 $node_2
+
+ assert_match *master* [$node_0 role]
+ assert_match *slave* [$node_1 role]
+ assert_match *slave* [$node_2 role]
+
+ # Since we never caught up, there should be no syncs
+ assert_equal [expr [s 0 sync_partial_ok] - $initial_psyncs] 0
+ assert_equal [expr [s 0 sync_full] - $initial_syncs] 0
+ assert_digests_match $node_0 $node_1 $node_2
+ }
+
+ test {failover aborts if target rejects sync request} {
+ set initial_psyncs [s 0 sync_partial_ok]
+ set initial_syncs [s 0 sync_full]
+
+ # We block psync, so the failover will fail
+ $node_1 acl setuser default -psync
+
+ # We pause the target long enough to send a write command
+ # during the pause. This write will not be interrupted.
+ exec kill -SIGSTOP [srv -1 pid]
+ set rd [redis_deferring_client]
+ $rd SET FOO BAR
+ $node_0 failover to $node_1_host $node_1_port
+ exec kill -SIGCONT [srv -1 pid]
+
+ # Wait for failover to end
+ wait_for_condition 50 100 {
+ [s 0 master_failover_state] == "no-failover"
+ } else {
+ fail "Failover from node_0 to replica did not finish"
+ }
+
+ assert_equal [$rd read] "OK"
+ $rd close
+
+ # restore access to psync
+ $node_1 acl setuser default +psync
+
+ # We need to make sure the nodes actually sync back up
+ wait_for_sync $node_1
+ wait_for_sync $node_2
+
+ assert_match *master* [$node_0 role]
+ assert_match *slave* [$node_1 role]
+ assert_match *slave* [$node_2 role]
+
+ # We will cycle all of our replicas here and force a psync.
+ assert_equal [expr [s 0 sync_partial_ok] - $initial_psyncs] 2
+ assert_equal [expr [s 0 sync_full] - $initial_syncs] 0
+
+ assert_equal [count_log_message 0 "Failover target rejected psync request"] 1
+ assert_digests_match $node_0 $node_1 $node_2
+ }
+}
+}
+}
diff --git a/tests/integration/rdb.tcl b/tests/integration/rdb.tcl
index 99495b2b7..a89221197 100644
--- a/tests/integration/rdb.tcl
+++ b/tests/integration/rdb.tcl
@@ -1,3 +1,5 @@
+tags {"rdb"} {
+
set server_path [tmpdir "server.rdb-encoding-test"]
# Copy RDB with different encodings in server path
@@ -289,3 +291,5 @@ start_server {overrides {save ""}} {
}
}
} ;# system_name
+
+} ;# tags
diff --git a/tests/integration/redis-benchmark.tcl b/tests/integration/redis-benchmark.tcl
index 5a4f09952..3684d7c3b 100644
--- a/tests/integration/redis-benchmark.tcl
+++ b/tests/integration/redis-benchmark.tcl
@@ -5,7 +5,7 @@ proc cmdstat {cmd} {
return [cmdrstat $cmd r]
}
-start_server {tags {"benchmark"}} {
+start_server {tags {"benchmark network"}} {
start_server {} {
set master_host [srv 0 host]
set master_port [srv 0 port]
diff --git a/tests/integration/replication-4.tcl b/tests/integration/replication-4.tcl
index 8071c4f97..c867001b8 100644
--- a/tests/integration/replication-4.tcl
+++ b/tests/integration/replication-4.tcl
@@ -1,4 +1,4 @@
-start_server {tags {"repl"}} {
+start_server {tags {"repl network"}} {
start_server {} {
set master [srv -1 client]
diff --git a/tests/integration/replication.tcl b/tests/integration/replication.tcl
index 6c437ba71..8d09c68c1 100644
--- a/tests/integration/replication.tcl
+++ b/tests/integration/replication.tcl
@@ -5,7 +5,7 @@ proc log_file_matches {log pattern} {
string match $pattern $content
}
-start_server {tags {"repl"}} {
+start_server {tags {"repl network"}} {
set slave [srv 0 client]
set slave_host [srv 0 host]
set slave_port [srv 0 port]
diff --git a/tests/modules/Makefile b/tests/modules/Makefile
index 7363c98bc..93b4b022f 100644
--- a/tests/modules/Makefile
+++ b/tests/modules/Makefile
@@ -19,6 +19,7 @@ TEST_MODULES = \
misc.so \
hooks.so \
blockonkeys.so \
+ blockonbackground.so \
scan.so \
datatype.so \
auth.so \
@@ -27,7 +28,8 @@ TEST_MODULES = \
getkeys.so \
test_lazyfree.so \
timer.so \
- defragtest.so
+ defragtest.so \
+ stream.so
.PHONY: all
diff --git a/tests/modules/blockonbackground.c b/tests/modules/blockonbackground.c
new file mode 100644
index 000000000..cf7e9c7c1
--- /dev/null
+++ b/tests/modules/blockonbackground.c
@@ -0,0 +1,220 @@
+#define REDISMODULE_EXPERIMENTAL_API
+#include "redismodule.h"
+#include <stdio.h>
+#include <stdlib.h>
+#include <pthread.h>
+#include <time.h>
+#include "assert.h"
+
+#define UNUSED(x) (void)(x)
+
+/* Reply callback for blocking command BLOCK.DEBUG */
+int HelloBlock_Reply(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) {
+ UNUSED(argv);
+ UNUSED(argc);
+ int *myint = RedisModule_GetBlockedClientPrivateData(ctx);
+ return RedisModule_ReplyWithLongLong(ctx,*myint);
+}
+
+/* Timeout callback for blocking command BLOCK.DEBUG */
+int HelloBlock_Timeout(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) {
+ UNUSED(argv);
+ UNUSED(argc);
+ RedisModuleBlockedClient *bc = RedisModule_GetBlockedClientHandle(ctx);
+ assert(RedisModule_BlockedClientMeasureTimeEnd(bc)==REDISMODULE_OK);
+ return RedisModule_ReplyWithSimpleString(ctx,"Request timedout");
+}
+
+/* Private data freeing callback for BLOCK.DEBUG command. */
+void HelloBlock_FreeData(RedisModuleCtx *ctx, void *privdata) {
+ UNUSED(ctx);
+ RedisModule_Free(privdata);
+}
+
+/* The thread entry point that actually executes the blocking part
+ * of the command BLOCK.DEBUG. */
+void *BlockDebug_ThreadMain(void *arg) {
+ void **targ = arg;
+ RedisModuleBlockedClient *bc = targ[0];
+ long long delay = (unsigned long)targ[1];
+ long long enable_time_track = (unsigned long)targ[2];
+ if (enable_time_track)
+ assert(RedisModule_BlockedClientMeasureTimeStart(bc)==REDISMODULE_OK);
+ RedisModule_Free(targ);
+
+ struct timespec ts;
+ ts.tv_sec = delay / 1000;
+ ts.tv_nsec = (delay % 1000) * 1000000;
+ nanosleep(&ts, NULL);
+ int *r = RedisModule_Alloc(sizeof(int));
+ *r = rand();
+ if (enable_time_track)
+ assert(RedisModule_BlockedClientMeasureTimeEnd(bc)==REDISMODULE_OK);
+ RedisModule_UnblockClient(bc,r);
+ return NULL;
+}
+
+/* The thread entry point that actually executes the blocking part
+ * of the command BLOCK.DEBUG. */
+void *DoubleBlock_ThreadMain(void *arg) {
+ void **targ = arg;
+ RedisModuleBlockedClient *bc = targ[0];
+ long long delay = (unsigned long)targ[1];
+ assert(RedisModule_BlockedClientMeasureTimeStart(bc)==REDISMODULE_OK);
+ RedisModule_Free(targ);
+ struct timespec ts;
+ ts.tv_sec = delay / 1000;
+ ts.tv_nsec = (delay % 1000) * 1000000;
+ nanosleep(&ts, NULL);
+ int *r = RedisModule_Alloc(sizeof(int));
+ *r = rand();
+ RedisModule_BlockedClientMeasureTimeEnd(bc);
+ /* call again RedisModule_BlockedClientMeasureTimeStart() and
+ * RedisModule_BlockedClientMeasureTimeEnd and ensure that the
+ * total execution time is 2x the delay. */
+ assert(RedisModule_BlockedClientMeasureTimeStart(bc)==REDISMODULE_OK);
+ nanosleep(&ts, NULL);
+ RedisModule_BlockedClientMeasureTimeEnd(bc);
+
+ RedisModule_UnblockClient(bc,r);
+ return NULL;
+}
+
+void HelloBlock_Disconnected(RedisModuleCtx *ctx, RedisModuleBlockedClient *bc) {
+ RedisModule_Log(ctx,"warning","Blocked client %p disconnected!",
+ (void*)bc);
+}
+
+/* BLOCK.DEBUG <delay_ms> <timeout_ms> -- Block for <count> milliseconds, then reply with
+ * a random number. Timeout is the command timeout, so that you can test
+ * what happens when the delay is greater than the timeout. */
+int HelloBlock_RedisCommand(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) {
+ if (argc != 3) return RedisModule_WrongArity(ctx);
+ long long delay;
+ long long timeout;
+
+ if (RedisModule_StringToLongLong(argv[1],&delay) != REDISMODULE_OK) {
+ return RedisModule_ReplyWithError(ctx,"ERR invalid count");
+ }
+
+ if (RedisModule_StringToLongLong(argv[2],&timeout) != REDISMODULE_OK) {
+ return RedisModule_ReplyWithError(ctx,"ERR invalid count");
+ }
+
+ pthread_t tid;
+ RedisModuleBlockedClient *bc = RedisModule_BlockClient(ctx,HelloBlock_Reply,HelloBlock_Timeout,HelloBlock_FreeData,timeout);
+
+ /* Here we set a disconnection handler, however since this module will
+ * block in sleep() in a thread, there is not much we can do in the
+ * callback, so this is just to show you the API. */
+ RedisModule_SetDisconnectCallback(bc,HelloBlock_Disconnected);
+
+ /* Now that we setup a blocking client, we need to pass the control
+ * to the thread. However we need to pass arguments to the thread:
+ * the delay and a reference to the blocked client handle. */
+ void **targ = RedisModule_Alloc(sizeof(void*)*3);
+ targ[0] = bc;
+ targ[1] = (void*)(unsigned long) delay;
+ // pass 1 as flag to enable time tracking
+ targ[2] = (void*)(unsigned long) 1;
+
+ if (pthread_create(&tid,NULL,BlockDebug_ThreadMain,targ) != 0) {
+ RedisModule_AbortBlock(bc);
+ return RedisModule_ReplyWithError(ctx,"-ERR Can't start thread");
+ }
+ return REDISMODULE_OK;
+}
+
+/* BLOCK.DEBUG_NOTRACKING <delay_ms> <timeout_ms> -- Block for <count> milliseconds, then reply with
+ * a random number. Timeout is the command timeout, so that you can test
+ * what happens when the delay is greater than the timeout.
+ * this command does not track background time so the background time should no appear in stats*/
+int HelloBlockNoTracking_RedisCommand(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) {
+ if (argc != 3) return RedisModule_WrongArity(ctx);
+ long long delay;
+ long long timeout;
+
+ if (RedisModule_StringToLongLong(argv[1],&delay) != REDISMODULE_OK) {
+ return RedisModule_ReplyWithError(ctx,"ERR invalid count");
+ }
+
+ if (RedisModule_StringToLongLong(argv[2],&timeout) != REDISMODULE_OK) {
+ return RedisModule_ReplyWithError(ctx,"ERR invalid count");
+ }
+
+ pthread_t tid;
+ RedisModuleBlockedClient *bc = RedisModule_BlockClient(ctx,HelloBlock_Reply,HelloBlock_Timeout,HelloBlock_FreeData,timeout);
+
+ /* Here we set a disconnection handler, however since this module will
+ * block in sleep() in a thread, there is not much we can do in the
+ * callback, so this is just to show you the API. */
+ RedisModule_SetDisconnectCallback(bc,HelloBlock_Disconnected);
+
+ /* Now that we setup a blocking client, we need to pass the control
+ * to the thread. However we need to pass arguments to the thread:
+ * the delay and a reference to the blocked client handle. */
+ void **targ = RedisModule_Alloc(sizeof(void*)*3);
+ targ[0] = bc;
+ targ[1] = (void*)(unsigned long) delay;
+ // pass 0 as flag to enable time tracking
+ targ[2] = (void*)(unsigned long) 0;
+
+ if (pthread_create(&tid,NULL,BlockDebug_ThreadMain,targ) != 0) {
+ RedisModule_AbortBlock(bc);
+ return RedisModule_ReplyWithError(ctx,"-ERR Can't start thread");
+ }
+ return REDISMODULE_OK;
+}
+
+/* BLOCK.DOUBLE_DEBUG <delay_ms> -- Block for 2 x <count> milliseconds,
+ * then reply with a random number.
+ * This command is used to test multiple calls to RedisModule_BlockedClientMeasureTimeStart()
+ * and RedisModule_BlockedClientMeasureTimeEnd() within the same execution. */
+int HelloDoubleBlock_RedisCommand(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) {
+ if (argc != 2) return RedisModule_WrongArity(ctx);
+ long long delay;
+ long long timeout;
+
+ if (RedisModule_StringToLongLong(argv[1],&delay) != REDISMODULE_OK) {
+ return RedisModule_ReplyWithError(ctx,"ERR invalid count");
+ }
+
+ pthread_t tid;
+ RedisModuleBlockedClient *bc = RedisModule_BlockClient(ctx,HelloBlock_Reply,HelloBlock_Timeout,HelloBlock_FreeData,timeout);
+
+ /* Now that we setup a blocking client, we need to pass the control
+ * to the thread. However we need to pass arguments to the thread:
+ * the delay and a reference to the blocked client handle. */
+ void **targ = RedisModule_Alloc(sizeof(void*)*2);
+ targ[0] = bc;
+ targ[1] = (void*)(unsigned long) delay;
+
+ if (pthread_create(&tid,NULL,DoubleBlock_ThreadMain,targ) != 0) {
+ RedisModule_AbortBlock(bc);
+ return RedisModule_ReplyWithError(ctx,"-ERR Can't start thread");
+ }
+ return REDISMODULE_OK;
+}
+
+
+int RedisModule_OnLoad(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) {
+ UNUSED(argv);
+ UNUSED(argc);
+
+ if (RedisModule_Init(ctx,"block",1,REDISMODULE_APIVER_1)
+ == REDISMODULE_ERR) return REDISMODULE_ERR;
+
+ if (RedisModule_CreateCommand(ctx,"block.debug",
+ HelloBlock_RedisCommand,"",0,0,0) == REDISMODULE_ERR)
+ return REDISMODULE_ERR;
+
+ if (RedisModule_CreateCommand(ctx,"block.double_debug",
+ HelloDoubleBlock_RedisCommand,"",0,0,0) == REDISMODULE_ERR)
+ return REDISMODULE_ERR;
+
+ if (RedisModule_CreateCommand(ctx,"block.debug_no_track",
+ HelloBlockNoTracking_RedisCommand,"",0,0,0) == REDISMODULE_ERR)
+ return REDISMODULE_ERR;
+
+ return REDISMODULE_OK;
+}
diff --git a/tests/modules/blockonkeys.c b/tests/modules/blockonkeys.c
index 94f31d455..b7ab977e9 100644
--- a/tests/modules/blockonkeys.c
+++ b/tests/modules/blockonkeys.c
@@ -2,6 +2,7 @@
#include "redismodule.h"
#include <string.h>
+#include <strings.h>
#include <assert.h>
#include <unistd.h>
@@ -65,6 +66,8 @@ int get_fsl(RedisModuleCtx *ctx, RedisModuleString *keyname, int mode, int creat
RedisModule_CloseKey(key);
if (reply_on_failure)
RedisModule_ReplyWithError(ctx, REDISMODULE_ERRORMSG_WRONGTYPE);
+ RedisModuleCallReply *reply = RedisModule_Call(ctx, "INCR", "c", "fsl_wrong_type");
+ RedisModule_FreeCallReply(reply);
return 0;
}
@@ -298,6 +301,154 @@ int fsl_getall(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) {
return REDISMODULE_OK;
}
+/* Callback for blockonkeys_popall */
+int blockonkeys_popall_reply_callback(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) {
+ REDISMODULE_NOT_USED(argc);
+ RedisModuleKey *key = RedisModule_OpenKey(ctx, argv[1], REDISMODULE_WRITE);
+ if (RedisModule_KeyType(key) == REDISMODULE_KEYTYPE_LIST) {
+ RedisModuleString *elem;
+ long len = 0;
+ RedisModule_ReplyWithArray(ctx, REDISMODULE_POSTPONED_ARRAY_LEN);
+ while ((elem = RedisModule_ListPop(key, REDISMODULE_LIST_HEAD)) != NULL) {
+ len++;
+ RedisModule_ReplyWithString(ctx, elem);
+ RedisModule_FreeString(ctx, elem);
+ }
+ RedisModule_ReplySetArrayLength(ctx, len);
+ } else {
+ RedisModule_ReplyWithError(ctx, "ERR Not a list");
+ }
+ RedisModule_CloseKey(key);
+ return REDISMODULE_OK;
+}
+
+int blockonkeys_popall_timeout_callback(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) {
+ REDISMODULE_NOT_USED(argv);
+ REDISMODULE_NOT_USED(argc);
+ return RedisModule_ReplyWithError(ctx, "ERR Timeout");
+}
+
+/* BLOCKONKEYS.POPALL key
+ *
+ * Blocks on an empty key for up to 3 seconds. When unblocked by a list
+ * operation like LPUSH, all the elements are popped and returned. Fails with an
+ * error on timeout. */
+int blockonkeys_popall(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) {
+ if (argc != 2)
+ return RedisModule_WrongArity(ctx);
+
+ RedisModuleKey *key = RedisModule_OpenKey(ctx, argv[1], REDISMODULE_READ);
+ if (RedisModule_KeyType(key) == REDISMODULE_KEYTYPE_EMPTY) {
+ RedisModule_BlockClientOnKeys(ctx, blockonkeys_popall_reply_callback,
+ blockonkeys_popall_timeout_callback,
+ NULL, 3000, &argv[1], 1, NULL);
+ } else {
+ RedisModule_ReplyWithError(ctx, "ERR Key not empty");
+ }
+ RedisModule_CloseKey(key);
+ return REDISMODULE_OK;
+}
+
+/* BLOCKONKEYS.LPUSH key val [val ..]
+ * BLOCKONKEYS.LPUSH_UNBLOCK key val [val ..]
+ *
+ * A module equivalent of LPUSH. If the name LPUSH_UNBLOCK is used,
+ * RM_SignalKeyAsReady() is also called. */
+int blockonkeys_lpush(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) {
+ if (argc < 3)
+ return RedisModule_WrongArity(ctx);
+
+ RedisModuleKey *key = RedisModule_OpenKey(ctx, argv[1], REDISMODULE_WRITE);
+ if (RedisModule_KeyType(key) != REDISMODULE_KEYTYPE_EMPTY &&
+ RedisModule_KeyType(key) != REDISMODULE_KEYTYPE_LIST) {
+ RedisModule_ReplyWithError(ctx, REDISMODULE_ERRORMSG_WRONGTYPE);
+ } else {
+ for (int i = 2; i < argc; i++) {
+ if (RedisModule_ListPush(key, REDISMODULE_LIST_HEAD,
+ argv[i]) != REDISMODULE_OK) {
+ RedisModule_CloseKey(key);
+ return RedisModule_ReplyWithError(ctx, "ERR Push failed");
+ }
+ }
+ }
+ RedisModule_CloseKey(key);
+
+ /* signal key as ready if the command is lpush_unblock */
+ size_t len;
+ const char *str = RedisModule_StringPtrLen(argv[0], &len);
+ if (!strncasecmp(str, "blockonkeys.lpush_unblock", len)) {
+ RedisModule_SignalKeyAsReady(ctx, argv[1]);
+ }
+ return RedisModule_ReplyWithSimpleString(ctx, "OK");
+}
+
+/* Callback for the BLOCKONKEYS.BLPOPN command */
+int blockonkeys_blpopn_reply_callback(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) {
+ REDISMODULE_NOT_USED(argc);
+ long long n;
+ RedisModule_StringToLongLong(argv[2], &n);
+ RedisModuleKey *key = RedisModule_OpenKey(ctx, argv[1], REDISMODULE_WRITE);
+ int result;
+ if (RedisModule_KeyType(key) == REDISMODULE_KEYTYPE_LIST &&
+ RedisModule_ValueLength(key) >= (size_t)n) {
+ RedisModule_ReplyWithArray(ctx, n);
+ for (long i = 0; i < n; i++) {
+ RedisModuleString *elem = RedisModule_ListPop(key, REDISMODULE_LIST_HEAD);
+ RedisModule_ReplyWithString(ctx, elem);
+ RedisModule_FreeString(ctx, elem);
+ }
+ result = REDISMODULE_OK;
+ } else if (RedisModule_KeyType(key) == REDISMODULE_KEYTYPE_LIST ||
+ RedisModule_KeyType(key) == REDISMODULE_KEYTYPE_EMPTY) {
+ /* continue blocking */
+ result = REDISMODULE_ERR;
+ } else {
+ result = RedisModule_ReplyWithError(ctx, REDISMODULE_ERRORMSG_WRONGTYPE);
+ }
+ RedisModule_CloseKey(key);
+ return result;
+}
+
+int blockonkeys_blpopn_timeout_callback(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) {
+ REDISMODULE_NOT_USED(argv);
+ REDISMODULE_NOT_USED(argc);
+ return RedisModule_ReplyWithError(ctx, "ERR Timeout");
+}
+
+/* BLOCKONKEYS.BLPOPN key N
+ *
+ * Blocks until key has N elements and then pops them or fails after 3 seconds.
+ */
+int blockonkeys_blpopn(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) {
+ if (argc < 3) return RedisModule_WrongArity(ctx);
+
+ long long n;
+ if (RedisModule_StringToLongLong(argv[2], &n) != REDISMODULE_OK) {
+ return RedisModule_ReplyWithError(ctx, "ERR Invalid N");
+ }
+
+ RedisModuleKey *key = RedisModule_OpenKey(ctx, argv[1], REDISMODULE_WRITE);
+ int keytype = RedisModule_KeyType(key);
+ if (keytype != REDISMODULE_KEYTYPE_EMPTY &&
+ keytype != REDISMODULE_KEYTYPE_LIST) {
+ RedisModule_ReplyWithError(ctx, REDISMODULE_ERRORMSG_WRONGTYPE);
+ } else if (keytype == REDISMODULE_KEYTYPE_LIST &&
+ RedisModule_ValueLength(key) >= (size_t)n) {
+ RedisModule_ReplyWithArray(ctx, n);
+ for (long i = 0; i < n; i++) {
+ RedisModuleString *elem = RedisModule_ListPop(key, REDISMODULE_LIST_HEAD);
+ RedisModule_ReplyWithString(ctx, elem);
+ RedisModule_FreeString(ctx, elem);
+ }
+ } else {
+ RedisModule_BlockClientOnKeys(ctx, blockonkeys_blpopn_reply_callback,
+ blockonkeys_blpopn_timeout_callback,
+ NULL, 3000, &argv[1], 1, NULL);
+ }
+ RedisModule_CloseKey(key);
+ return REDISMODULE_OK;
+}
+
int RedisModule_OnLoad(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) {
REDISMODULE_NOT_USED(argv);
REDISMODULE_NOT_USED(argc);
@@ -334,5 +485,21 @@ int RedisModule_OnLoad(RedisModuleCtx *ctx, RedisModuleString **argv, int argc)
if (RedisModule_CreateCommand(ctx,"fsl.getall",fsl_getall,"",0,0,0) == REDISMODULE_ERR)
return REDISMODULE_ERR;
+ if (RedisModule_CreateCommand(ctx, "blockonkeys.popall", blockonkeys_popall,
+ "", 1, 1, 1) == REDISMODULE_ERR)
+ return REDISMODULE_ERR;
+
+ if (RedisModule_CreateCommand(ctx, "blockonkeys.lpush", blockonkeys_lpush,
+ "", 1, 1, 1) == REDISMODULE_ERR)
+ return REDISMODULE_ERR;
+
+ if (RedisModule_CreateCommand(ctx, "blockonkeys.lpush_unblock", blockonkeys_lpush,
+ "", 1, 1, 1) == REDISMODULE_ERR)
+ return REDISMODULE_ERR;
+
+ if (RedisModule_CreateCommand(ctx, "blockonkeys.blpopn", blockonkeys_blpopn,
+ "", 1, 1, 1) == REDISMODULE_ERR)
+ return REDISMODULE_ERR;
+
return REDISMODULE_OK;
}
diff --git a/tests/modules/stream.c b/tests/modules/stream.c
new file mode 100644
index 000000000..abfbb1faf
--- /dev/null
+++ b/tests/modules/stream.c
@@ -0,0 +1,258 @@
+#include "redismodule.h"
+
+#include <string.h>
+#include <strings.h>
+#include <assert.h>
+#include <unistd.h>
+#include <errno.h>
+
+/* Command which adds a stream entry with automatic ID, like XADD *.
+ *
+ * Syntax: STREAM.ADD key field1 value1 [ field2 value2 ... ]
+ *
+ * The response is the ID of the added stream entry or an error message.
+ */
+int stream_add(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) {
+ if (argc < 2 || argc % 2 != 0) {
+ RedisModule_WrongArity(ctx);
+ return REDISMODULE_OK;
+ }
+
+ RedisModuleKey *key = RedisModule_OpenKey(ctx, argv[1], REDISMODULE_WRITE);
+ RedisModuleStreamID id;
+ if (RedisModule_StreamAdd(key, REDISMODULE_STREAM_ADD_AUTOID, &id,
+ &argv[2], (argc-2)/2) == REDISMODULE_OK) {
+ RedisModuleString *id_str = RedisModule_CreateStringFromStreamID(ctx, &id);
+ RedisModule_ReplyWithString(ctx, id_str);
+ RedisModule_FreeString(ctx, id_str);
+ } else {
+ RedisModule_ReplyWithError(ctx, "ERR StreamAdd failed");
+ }
+ RedisModule_CloseKey(key);
+ return REDISMODULE_OK;
+}
+
+/* Command which adds a stream entry N times.
+ *
+ * Syntax: STREAM.ADD key N field1 value1 [ field2 value2 ... ]
+ *
+ * Returns the number of successfully added entries.
+ */
+int stream_addn(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) {
+ if (argc < 3 || argc % 2 == 0) {
+ RedisModule_WrongArity(ctx);
+ return REDISMODULE_OK;
+ }
+
+ long long n, i;
+ if (RedisModule_StringToLongLong(argv[2], &n) == REDISMODULE_ERR) {
+ RedisModule_ReplyWithError(ctx, "N must be a number");
+ return REDISMODULE_OK;
+ }
+
+ RedisModuleKey *key = RedisModule_OpenKey(ctx, argv[1], REDISMODULE_WRITE);
+ for (i = 0; i < n; i++) {
+ if (RedisModule_StreamAdd(key, REDISMODULE_STREAM_ADD_AUTOID, NULL,
+ &argv[3], (argc-3)/2) == REDISMODULE_ERR)
+ break;
+ }
+ RedisModule_ReplyWithLongLong(ctx, i);
+ RedisModule_CloseKey(key);
+ return REDISMODULE_OK;
+}
+
+/* STREAM.DELETE key stream-id */
+int stream_delete(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) {
+ if (argc != 3) return RedisModule_WrongArity(ctx);
+ RedisModuleStreamID id;
+ if (RedisModule_StringToStreamID(argv[2], &id) != REDISMODULE_OK) {
+ return RedisModule_ReplyWithError(ctx, "Invalid stream ID");
+ }
+ RedisModuleKey *key = RedisModule_OpenKey(ctx, argv[1], REDISMODULE_WRITE);
+ if (RedisModule_StreamDelete(key, &id) == REDISMODULE_OK) {
+ RedisModule_ReplyWithSimpleString(ctx, "OK");
+ } else {
+ RedisModule_ReplyWithError(ctx, "ERR StreamDelete failed");
+ }
+ RedisModule_CloseKey(key);
+ return REDISMODULE_OK;
+}
+
+/* STREAM.RANGE key start-id end-id
+ *
+ * Returns an array of stream items. Each item is an array on the form
+ * [stream-id, [field1, value1, field2, value2, ...]].
+ *
+ * A funny side-effect used for testing RM_StreamIteratorDelete() is that if any
+ * entry has a field named "selfdestruct", the stream entry is deleted. It is
+ * however included in the results of this command.
+ */
+int stream_range(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) {
+ if (argc != 4) {
+ RedisModule_WrongArity(ctx);
+ return REDISMODULE_OK;
+ }
+
+ RedisModuleStreamID startid, endid;
+ if (RedisModule_StringToStreamID(argv[2], &startid) != REDISMODULE_OK ||
+ RedisModule_StringToStreamID(argv[3], &endid) != REDISMODULE_OK) {
+ RedisModule_ReplyWithError(ctx, "Invalid stream ID");
+ return REDISMODULE_OK;
+ }
+
+ /* If startid > endid, we swap and set the reverse flag. */
+ int flags = 0;
+ if (startid.ms > endid.ms ||
+ (startid.ms == endid.ms && startid.seq > endid.seq)) {
+ RedisModuleStreamID tmp = startid;
+ startid = endid;
+ endid = tmp;
+ flags |= REDISMODULE_STREAM_ITERATOR_REVERSE;
+ }
+
+ /* Open key and start iterator. */
+ int openflags = REDISMODULE_READ | REDISMODULE_WRITE;
+ RedisModuleKey *key = RedisModule_OpenKey(ctx, argv[1], openflags);
+ if (RedisModule_StreamIteratorStart(key, flags,
+ &startid, &endid) != REDISMODULE_OK) {
+ /* Key is not a stream, etc. */
+ RedisModule_ReplyWithError(ctx, "ERR StreamIteratorStart failed");
+ RedisModule_CloseKey(key);
+ return REDISMODULE_OK;
+ }
+
+ /* Check error handling: Delete current entry when no current entry. */
+ assert(RedisModule_StreamIteratorDelete(key) ==
+ REDISMODULE_ERR);
+ assert(errno == ENOENT);
+
+ /* Check error handling: Fetch fields when no current entry. */
+ assert(RedisModule_StreamIteratorNextField(key, NULL, NULL) ==
+ REDISMODULE_ERR);
+ assert(errno == ENOENT);
+
+ /* Return array. */
+ RedisModule_ReplyWithArray(ctx, REDISMODULE_POSTPONED_ARRAY_LEN);
+ RedisModule_AutoMemory(ctx);
+ RedisModuleStreamID id;
+ long numfields;
+ long len = 0;
+ while (RedisModule_StreamIteratorNextID(key, &id,
+ &numfields) == REDISMODULE_OK) {
+ RedisModule_ReplyWithArray(ctx, 2);
+ RedisModuleString *id_str = RedisModule_CreateStringFromStreamID(ctx, &id);
+ RedisModule_ReplyWithString(ctx, id_str);
+ RedisModule_ReplyWithArray(ctx, numfields * 2);
+ int delete = 0;
+ RedisModuleString *field, *value;
+ for (long i = 0; i < numfields; i++) {
+ assert(RedisModule_StreamIteratorNextField(key, &field, &value) ==
+ REDISMODULE_OK);
+ RedisModule_ReplyWithString(ctx, field);
+ RedisModule_ReplyWithString(ctx, value);
+ /* check if this is a "selfdestruct" field */
+ size_t field_len;
+ const char *field_str = RedisModule_StringPtrLen(field, &field_len);
+ if (!strncmp(field_str, "selfdestruct", field_len)) delete = 1;
+ }
+ if (delete) {
+ assert(RedisModule_StreamIteratorDelete(key) == REDISMODULE_OK);
+ }
+ /* check error handling: no more fields to fetch */
+ assert(RedisModule_StreamIteratorNextField(key, &field, &value) ==
+ REDISMODULE_ERR);
+ assert(errno == ENOENT);
+ len++;
+ }
+ RedisModule_ReplySetArrayLength(ctx, len);
+ RedisModule_StreamIteratorStop(key);
+ RedisModule_CloseKey(key);
+ return REDISMODULE_OK;
+}
+
+/*
+ * STREAM.TRIM key (MAXLEN (=|~) length | MINID (=|~) id)
+ */
+int stream_trim(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) {
+ if (argc != 5) {
+ RedisModule_WrongArity(ctx);
+ return REDISMODULE_OK;
+ }
+
+ /* Parse args */
+ int trim_by_id = 0; /* 0 = maxlen, 1 = minid */
+ long long maxlen;
+ RedisModuleStreamID minid;
+ size_t arg_len;
+ const char *arg = RedisModule_StringPtrLen(argv[2], &arg_len);
+ if (!strcasecmp(arg, "minid")) {
+ trim_by_id = 1;
+ if (RedisModule_StringToStreamID(argv[4], &minid) != REDISMODULE_OK) {
+ RedisModule_ReplyWithError(ctx, "ERR Invalid stream ID");
+ return REDISMODULE_OK;
+ }
+ } else if (!strcasecmp(arg, "maxlen")) {
+ if (RedisModule_StringToLongLong(argv[4], &maxlen) == REDISMODULE_ERR) {
+ RedisModule_ReplyWithError(ctx, "ERR Maxlen must be a number");
+ return REDISMODULE_OK;
+ }
+ } else {
+ RedisModule_ReplyWithError(ctx, "ERR Invalid arguments");
+ return REDISMODULE_OK;
+ }
+
+ /* Approx or exact */
+ int flags;
+ arg = RedisModule_StringPtrLen(argv[3], &arg_len);
+ if (arg_len == 1 && arg[0] == '~') {
+ flags = REDISMODULE_STREAM_TRIM_APPROX;
+ } else if (arg_len == 1 && arg[0] == '=') {
+ flags = 0;
+ } else {
+ RedisModule_ReplyWithError(ctx, "ERR Invalid approx-or-exact mark");
+ return REDISMODULE_OK;
+ }
+
+ /* Trim */
+ RedisModuleKey *key = RedisModule_OpenKey(ctx, argv[1], REDISMODULE_WRITE);
+ long long trimmed;
+ if (trim_by_id) {
+ trimmed = RedisModule_StreamTrimByID(key, flags, &minid);
+ } else {
+ trimmed = RedisModule_StreamTrimByLength(key, flags, maxlen);
+ }
+
+ /* Return result */
+ if (trimmed < 0) {
+ RedisModule_ReplyWithError(ctx, "ERR Trimming failed");
+ } else {
+ RedisModule_ReplyWithLongLong(ctx, trimmed);
+ }
+ RedisModule_CloseKey(key);
+ return REDISMODULE_OK;
+}
+
+int RedisModule_OnLoad(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) {
+ REDISMODULE_NOT_USED(argv);
+ REDISMODULE_NOT_USED(argc);
+ if (RedisModule_Init(ctx, "stream", 1, REDISMODULE_APIVER_1) == REDISMODULE_ERR)
+ return REDISMODULE_ERR;
+
+ if (RedisModule_CreateCommand(ctx, "stream.add", stream_add, "",
+ 1, 1, 1) == REDISMODULE_ERR)
+ return REDISMODULE_ERR;
+ if (RedisModule_CreateCommand(ctx, "stream.addn", stream_addn, "",
+ 1, 1, 1) == REDISMODULE_ERR)
+ return REDISMODULE_ERR;
+ if (RedisModule_CreateCommand(ctx, "stream.delete", stream_delete, "",
+ 1, 1, 1) == REDISMODULE_ERR)
+ return REDISMODULE_ERR;
+ if (RedisModule_CreateCommand(ctx, "stream.range", stream_range, "",
+ 1, 1, 1) == REDISMODULE_ERR)
+ return REDISMODULE_ERR;
+ if (RedisModule_CreateCommand(ctx, "stream.trim", stream_trim, "",
+ 1, 1, 1) == REDISMODULE_ERR)
+ return REDISMODULE_ERR;
+
+ return REDISMODULE_OK;
+}
diff --git a/tests/sentinel/run.tcl b/tests/sentinel/run.tcl
index 996af906a..c275aa762 100644
--- a/tests/sentinel/run.tcl
+++ b/tests/sentinel/run.tcl
@@ -10,7 +10,7 @@ set ::tlsdir "../../tls"
proc main {} {
parse_options
- spawn_instance sentinel $::sentinel_base_port $::instances_count
+ spawn_instance sentinel $::sentinel_base_port $::instances_count [list "sentinel deny-scripts-reconfig no"] "../tests/includes/sentinel.conf"
spawn_instance redis $::redis_base_port $::instances_count
run_tests
cleanup
diff --git a/tests/sentinel/tests/00-base.tcl b/tests/sentinel/tests/00-base.tcl
index 7fb1a8bef..75baf9817 100644
--- a/tests/sentinel/tests/00-base.tcl
+++ b/tests/sentinel/tests/00-base.tcl
@@ -1,5 +1,5 @@
# Check the basic monitoring and failover capabilities.
-
+source "../tests/includes/start-init-tests.tcl"
source "../tests/includes/init-tests.tcl"
if {$::simulate_error} {
diff --git a/tests/sentinel/tests/08-hostname-conf.tcl b/tests/sentinel/tests/08-hostname-conf.tcl
new file mode 100644
index 000000000..be6e42cb0
--- /dev/null
+++ b/tests/sentinel/tests/08-hostname-conf.tcl
@@ -0,0 +1,67 @@
+proc set_redis_announce_ip {addr} {
+ foreach_redis_id id {
+ R $id config set replica-announce-ip $addr
+ }
+}
+
+proc set_sentinel_config {keyword value} {
+ foreach_sentinel_id id {
+ S $id sentinel config set $keyword $value
+ }
+}
+
+proc set_all_instances_hostname {hostname} {
+ foreach_sentinel_id id {
+ set_instance_attrib sentinel $id host $hostname
+ }
+ foreach_redis_id id {
+ set_instance_attrib redis $id host $hostname
+ }
+}
+
+test "(pre-init) Configure instances and sentinel for hostname use" {
+ set ::host "localhost"
+ restart_killed_instances
+ set_all_instances_hostname $::host
+ set_redis_announce_ip $::host
+ set_sentinel_config resolve-hostnames yes
+ set_sentinel_config announce-hostnames yes
+}
+
+source "../tests/includes/init-tests.tcl"
+
+proc verify_hostname_announced {hostname} {
+ foreach_sentinel_id id {
+ # Master is reported with its hostname
+ if {![string equal [lindex [S $id SENTINEL GET-MASTER-ADDR-BY-NAME mymaster] 0] $hostname]} {
+ return 0
+ }
+
+ # Replicas are reported with their hostnames
+ foreach replica [S $id SENTINEL REPLICAS mymaster] {
+ if {![string equal [dict get $replica ip] $hostname]} {
+ return 0
+ }
+ }
+ }
+ return 1
+}
+
+test "Sentinel announces hostnames" {
+ # Check initial state
+ verify_hostname_announced $::host
+
+ # Disable announce-hostnames and confirm IPs are used
+ set_sentinel_config announce-hostnames no
+ assert {[verify_hostname_announced "127.0.0.1"] || [verify_hostname_announced "::1"]}
+}
+
+# We need to revert any special configuration because all tests currently
+# share the same instances.
+test "(post-cleanup) Configure instances and sentinel for IPs" {
+ set ::host "127.0.0.1"
+ set_all_instances_hostname $::host
+ set_redis_announce_ip $::host
+ set_sentinel_config resolve-hostnames no
+ set_sentinel_config announce-hostnames no
+} \ No newline at end of file
diff --git a/tests/sentinel/tests/09-acl-support.tcl b/tests/sentinel/tests/09-acl-support.tcl
new file mode 100644
index 000000000..1366fc4d5
--- /dev/null
+++ b/tests/sentinel/tests/09-acl-support.tcl
@@ -0,0 +1,50 @@
+
+source "../tests/includes/init-tests.tcl"
+
+set ::user "testuser"
+set ::password "secret"
+
+proc setup_acl {} {
+ foreach_sentinel_id id {
+ assert_equal {OK} [S $id ACL SETUSER $::user >$::password +@all on]
+ assert_equal {OK} [S $id ACL SETUSER default off]
+
+ S $id CLIENT KILL USER default SKIPME no
+ assert_equal {OK} [S $id AUTH $::user $::password]
+ }
+}
+
+proc teardown_acl {} {
+ foreach_sentinel_id id {
+ assert_equal {OK} [S $id ACL SETUSER default on]
+ assert_equal {1} [S $id ACL DELUSER $::user]
+
+ S $id SENTINEL CONFIG SET sentinel-user ""
+ S $id SENTINEL CONFIG SET sentinel-pass ""
+ }
+}
+
+test "(post-init) Set up ACL configuration" {
+ setup_acl
+ assert_equal $::user [S 1 ACL WHOAMI]
+}
+
+test "SENTINEL CONFIG SET handles on-the-fly credentials reconfiguration" {
+ # Make sure we're starting with a broken state...
+ after 5000
+ catch {S 1 SENTINEL CKQUORUM mymaster} err
+ assert_match {*NOQUORUM*} $err
+
+ foreach_sentinel_id id {
+ assert_equal {OK} [S $id SENTINEL CONFIG SET sentinel-user $::user]
+ assert_equal {OK} [S $id SENTINEL CONFIG SET sentinel-pass $::password]
+ }
+
+ after 5000
+ assert_match {*OK*} [S 1 SENTINEL CKQUORUM mymaster]
+}
+
+test "(post-cleanup) Tear down ACL configuration" {
+ teardown_acl
+}
+
diff --git a/tests/sentinel/tests/includes/init-tests.tcl b/tests/sentinel/tests/includes/init-tests.tcl
index 234f9c589..b4626caed 100644
--- a/tests/sentinel/tests/includes/init-tests.tcl
+++ b/tests/sentinel/tests/includes/init-tests.tcl
@@ -1,6 +1,6 @@
# Initialization tests -- most units will start including this.
-test "(init) Restart killed instances" {
+proc restart_killed_instances {} {
foreach type {redis sentinel} {
foreach_${type}_id id {
if {[get_instance_attrib $type $id pid] == -1} {
@@ -12,6 +12,10 @@ test "(init) Restart killed instances" {
}
}
+test "(init) Restart killed instances" {
+ restart_killed_instances
+}
+
test "(init) Remove old master entry from sentinels" {
foreach_sentinel_id id {
catch {S $id SENTINEL REMOVE mymaster}
@@ -37,6 +41,8 @@ test "(init) Sentinels can start monitoring a master" {
S $id SENTINEL SET mymaster down-after-milliseconds 2000
S $id SENTINEL SET mymaster failover-timeout 20000
S $id SENTINEL SET mymaster parallel-syncs 10
+ S $id SENTINEL SET mymaster notification-script ../../tests/includes/notify.sh
+ S $id SENTINEL SET mymaster client-reconfig-script ../../tests/includes/notify.sh
}
}
diff --git a/tests/sentinel/tests/includes/notify.sh b/tests/sentinel/tests/includes/notify.sh
new file mode 100755
index 000000000..5de0eaf76
--- /dev/null
+++ b/tests/sentinel/tests/includes/notify.sh
@@ -0,0 +1,21 @@
+#!/usr/bin/env bash
+
+OS=`uname -s`
+if [ ${OS} != "Linux" ]
+then
+ exit 0
+fi
+
+# fd 3 is meant to catch the actual access to /proc/pid/fd,
+# in case there's an fd leak by the sentinel,
+# it can take 3, but then the access to /proc will take another fd, and we'll catch that.
+leaked_fd_count=`ls /proc/self/fd | grep -vE '^[0|1|2|3]$' | wc -l`
+if [ $leaked_fd_count -gt 0 ]
+then
+ sentinel_fd_leaks_file="../sentinel_fd_leaks"
+ if [ ! -f $sentinel_fd_leaks_file ]
+ then
+ ls -l /proc/self/fd | cat >> $sentinel_fd_leaks_file
+ lsof -p $$ | cat >> $sentinel_fd_leaks_file
+ fi
+fi
diff --git a/tests/sentinel/tests/includes/sentinel.conf b/tests/sentinel/tests/includes/sentinel.conf
new file mode 100644
index 000000000..94f2804a4
--- /dev/null
+++ b/tests/sentinel/tests/includes/sentinel.conf
@@ -0,0 +1,11 @@
+# assume master is down after being unresponsive for 20s
+sentinel down-after-milliseconds setmaster 20000
+# reconfigure one slave at a time
+sentinel parallel-syncs setmaster 2
+# wait for 4m before assuming failover went wrong
+sentinel failover-timeout setmaster 240000
+# monitoring set
+sentinel monitor setmaster 10.0.0.1 30000 2
+
+
+
diff --git a/tests/sentinel/tests/includes/start-init-tests.tcl b/tests/sentinel/tests/includes/start-init-tests.tcl
new file mode 100644
index 000000000..b0523506a
--- /dev/null
+++ b/tests/sentinel/tests/includes/start-init-tests.tcl
@@ -0,0 +1,18 @@
+test "(start-init) Flush config and compare rewrite config file lines" {
+ foreach_sentinel_id id {
+ assert_match "OK" [S $id SENTINEL FLUSHCONFIG]
+ set file1 ../tests/includes/sentinel.conf
+ set file2 [file join "sentinel_${id}" "sentinel.conf"]
+ set fh1 [open $file1 r]
+ set fh2 [open $file2 r]
+ while {[gets $fh1 line1]} {
+ if {[gets $fh2 line2]} {
+ assert [string equal $line1 $line2]
+ } else {
+ fail "sentinel config file rewrite sequence changed"
+ }
+ }
+ close $fh1
+ close $fh2
+ }
+} \ No newline at end of file
diff --git a/tests/support/redis.tcl b/tests/support/redis.tcl
index 8eca2ac32..54b49920d 100644
--- a/tests/support/redis.tcl
+++ b/tests/support/redis.tcl
@@ -244,6 +244,7 @@ proc ::redis::redis_read_reply {id fd} {
_ {redis_read_null $fd}
: -
+ {redis_read_line $fd}
+ , {expr {double([redis_read_line $fd])}}
- {return -code error [redis_read_line $fd]}
$ {redis_bulk_read $fd}
> -
diff --git a/tests/support/server.tcl b/tests/support/server.tcl
index 77ba31d84..0d36d46be 100644
--- a/tests/support/server.tcl
+++ b/tests/support/server.tcl
@@ -152,20 +152,48 @@ proc server_is_up {host port retrynum} {
return 0
}
+# Check if current ::tags match requested tags. If ::allowtags are used,
+# there must be some intersection. If ::denytags are used, no intersection
+# is allowed. Returns 1 if tags are acceptable or 0 otherwise, in which
+# case err_return names a return variable for the message to be logged.
+proc tags_acceptable {err_return} {
+ upvar $err_return err
+
+ # If tags are whitelisted, make sure there's match
+ if {[llength $::allowtags] > 0} {
+ set matched 0
+ foreach tag $::allowtags {
+ if {[lsearch $::tags $tag] >= 0} {
+ incr matched
+ }
+ }
+ if {$matched < 1} {
+ set err "Tag: none of the tags allowed"
+ return 0
+ }
+ }
+
+ foreach tag $::denytags {
+ if {[lsearch $::tags $tag] >= 0} {
+ set err "Tag: $tag denied"
+ return 0
+ }
+ }
+
+ return 1
+}
+
# doesn't really belong here, but highly coupled to code in start_server
proc tags {tags code} {
# If we 'tags' contain multiple tags, quoted and seperated by spaces,
# we want to get rid of the quotes in order to have a proper list
set tags [string map { \" "" } $tags]
set ::tags [concat $::tags $tags]
- # We skip unwanted tags
- foreach tag $::denytags {
- if {[lsearch $::tags $tag] >= 0} {
- incr ::num_aborted
- send_data_packet $::test_server_fd ignore "Tag: $tag"
- set ::tags [lrange $::tags 0 end-[llength $tags]]
- return
- }
+ if {![tags_acceptable err]} {
+ incr ::num_aborted
+ send_data_packet $::test_server_fd ignore $err
+ set ::tags [lrange $::tags 0 end-[llength $tags]]
+ return
}
uplevel 1 $code
set ::tags [lrange $::tags 0 end-[llength $tags]]
@@ -267,13 +295,11 @@ proc start_server {options {code undefined}} {
}
# We skip unwanted tags
- foreach tag $::denytags {
- if {[lsearch $::tags $tag] >= 0} {
- incr ::num_aborted
- send_data_packet $::test_server_fd ignore "Tag: $tag"
- set ::tags [lrange $::tags 0 end-[llength $tags]]
- return
- }
+ if {![tags_acceptable err]} {
+ incr ::num_aborted
+ send_data_packet $::test_server_fd ignore $err
+ set ::tags [lrange $::tags 0 end-[llength $tags]]
+ return
}
# If we are running against an external server, we just push the
diff --git a/tests/support/util.tcl b/tests/support/util.tcl
index 86f2753c2..80f8598ce 100644
--- a/tests/support/util.tcl
+++ b/tests/support/util.tcl
@@ -12,7 +12,11 @@ proc randstring {min max {type binary}} {
set maxval 52
}
while {$len} {
- append output [format "%c" [expr {$minval+int(rand()*($maxval-$minval+1))}]]
+ set rr [expr {$minval+int(rand()*($maxval-$minval+1))}]
+ if {$type eq {alpha} && $rr eq 92} {
+ set rr 90; # avoid putting '\' char in the string, it can mess up TCL processing
+ }
+ append output [format "%c" $rr]
incr len -1
}
return $output
@@ -86,12 +90,10 @@ proc waitForBgrewriteaof r {
}
proc wait_for_sync r {
- while 1 {
- if {[status $r master_link_status] eq "down"} {
- after 10
- } else {
- break
- }
+ wait_for_condition 50 100 {
+ [status $r master_link_status] eq "up"
+ } else {
+ fail "replica didn't sync in time"
}
}
@@ -571,8 +573,8 @@ proc generate_fuzzy_traffic_on_key {key duration} {
# Commands per type, blocking commands removed
# TODO: extract these from help.h or elsewhere, and improve to include other types
set string_commands {APPEND BITCOUNT BITFIELD BITOP BITPOS DECR DECRBY GET GETBIT GETRANGE GETSET INCR INCRBY INCRBYFLOAT MGET MSET MSETNX PSETEX SET SETBIT SETEX SETNX SETRANGE STRALGO STRLEN}
- set hash_commands {HDEL HEXISTS HGET HGETALL HINCRBY HINCRBYFLOAT HKEYS HLEN HMGET HMSET HSCAN HSET HSETNX HSTRLEN HVALS}
- set zset_commands {ZADD ZCARD ZCOUNT ZINCRBY ZINTERSTORE ZLEXCOUNT ZPOPMAX ZPOPMIN ZRANGE ZRANGEBYLEX ZRANGEBYSCORE ZRANK ZREM ZREMRANGEBYLEX ZREMRANGEBYRANK ZREMRANGEBYSCORE ZREVRANGE ZREVRANGEBYLEX ZREVRANGEBYSCORE ZREVRANK ZSCAN ZSCORE ZUNIONSTORE}
+ set hash_commands {HDEL HEXISTS HGET HGETALL HINCRBY HINCRBYFLOAT HKEYS HLEN HMGET HMSET HSCAN HSET HSETNX HSTRLEN HVALS HRANDFIELD}
+ set zset_commands {ZADD ZCARD ZCOUNT ZINCRBY ZINTERSTORE ZLEXCOUNT ZPOPMAX ZPOPMIN ZRANGE ZRANGEBYLEX ZRANGEBYSCORE ZRANK ZREM ZREMRANGEBYLEX ZREMRANGEBYRANK ZREMRANGEBYSCORE ZREVRANGE ZREVRANGEBYLEX ZREVRANGEBYSCORE ZREVRANK ZSCAN ZSCORE ZUNIONSTORE ZRANDMEMBER}
set list_commands {LINDEX LINSERT LLEN LPOP LPOS LPUSH LPUSHX LRANGE LREM LSET LTRIM RPOP RPOPLPUSH RPUSH RPUSHX}
set set_commands {SADD SCARD SDIFF SDIFFSTORE SINTER SINTERSTORE SISMEMBER SMEMBERS SMOVE SPOP SRANDMEMBER SREM SSCAN SUNION SUNIONSTORE}
set stream_commands {XACK XADD XCLAIM XDEL XGROUP XINFO XLEN XPENDING XRANGE XREAD XREADGROUP XREVRANGE XTRIM}
diff --git a/tests/test_helper.tcl b/tests/test_helper.tcl
index 4bef921ff..2b7854780 100644
--- a/tests/test_helper.tcl
+++ b/tests/test_helper.tcl
@@ -52,6 +52,7 @@ set ::all_tests {
integration/psync2
integration/psync2-reg
integration/psync2-pingoff
+ integration/failover
integration/redis-cli
integration/redis-benchmark
unit/pubsub
@@ -717,6 +718,7 @@ if {[llength $filtered_tests] < [llength $::all_tests]} {
}
proc attach_to_replication_stream {} {
+ r config set repl-ping-replica-period 3600
if {$::tls} {
set s [::tls::socket [srv 0 "host"] [srv 0 "port"]]
} else {
@@ -774,6 +776,7 @@ proc assert_replication_stream {s patterns} {
proc close_replication_stream {s} {
close $s
+ r config set repl-ping-replica-period 10
}
# With the parallel test running multiple Redis instances at the same time
diff --git a/tests/unit/dump.tcl b/tests/unit/dump.tcl
index affce65e5..d43820ae3 100644
--- a/tests/unit/dump.tcl
+++ b/tests/unit/dump.tcl
@@ -12,7 +12,7 @@ start_server {tags {"dump"}} {
r del foo
r restore foo 5000 $encoded
set ttl [r pttl foo]
- assert {$ttl >= 3000 && $ttl <= 5000}
+ assert_range $ttl 3000 5000
r get foo
} {bar}
@@ -22,7 +22,7 @@ start_server {tags {"dump"}} {
r del foo
r restore foo 2569591501 $encoded
set ttl [r pttl foo]
- assert {$ttl >= (2569591501-3000) && $ttl <= 2569591501}
+ assert_range $ttl (2569591501-3000) 2569591501
r get foo
} {bar}
@@ -33,7 +33,7 @@ start_server {tags {"dump"}} {
set now [clock milliseconds]
r restore foo [expr $now+3000] $encoded absttl
set ttl [r pttl foo]
- assert {$ttl >= 2900 && $ttl <= 3100}
+ assert_range $ttl 2000 3100
r get foo
} {bar}
diff --git a/tests/unit/expire.tcl b/tests/unit/expire.tcl
index 8bcdc16b7..9bde4809f 100644
--- a/tests/unit/expire.tcl
+++ b/tests/unit/expire.tcl
@@ -209,19 +209,101 @@ start_server {tags {"expire"}} {
set e
} {*not an integer*}
- test {SET - use EX/PX option, TTL should not be reseted after loadaof} {
+ test {EXPIRE and SET/GETEX EX/PX/EXAT/PXAT option, TTL should not be reset after loadaof} {
+ # This test makes sure that expire times are propagated as absolute
+ # times to the AOF file and not as relative time, so that when the AOF
+ # is reloaded the TTLs are not being shifted forward to the future.
+ # We want the time to logically pass when the server is restarted!
+
r config set appendonly yes
- r set foo bar EX 100
- after 2000
- r debug loadaof
- set ttl [r ttl foo]
- assert {$ttl <= 98 && $ttl > 90}
+ r set foo1 bar EX 100
+ r set foo2 bar PX 100000
+ r set foo3 bar
+ r set foo4 bar
+ r expire foo3 100
+ r pexpire foo4 100000
+ r setex foo5 100 bar
+ r psetex foo6 100000 bar
+ r set foo7 bar EXAT [expr [clock seconds] + 100]
+ r set foo8 bar PXAT [expr [clock milliseconds] + 100000]
+ r set foo9 bar
+ r getex foo9 EX 100
+ r set foo10 bar
+ r getex foo10 PX 100000
+ r set foo11 bar
+ r getex foo11 EXAT [expr [clock seconds] + 100]
+ r set foo12 bar
+ r getex foo12 PXAT [expr [clock milliseconds] + 100000]
- r set foo bar PX 100000
after 2000
r debug loadaof
- set ttl [r ttl foo]
- assert {$ttl <= 98 && $ttl > 90}
+ assert_range [r ttl foo1] 90 98
+ assert_range [r ttl foo2] 90 98
+ assert_range [r ttl foo3] 90 98
+ assert_range [r ttl foo4] 90 98
+ assert_range [r ttl foo5] 90 98
+ assert_range [r ttl foo6] 90 98
+ assert_range [r ttl foo7] 90 98
+ assert_range [r ttl foo8] 90 98
+ assert_range [r ttl foo9] 90 98
+ assert_range [r ttl foo10] 90 98
+ assert_range [r ttl foo11] 90 98
+ assert_range [r ttl foo12] 90 98
+ }
+
+ test {EXPIRE relative and absolute propagation to replicas} {
+ # Make sure that relative and absolute expire commands are propagated
+ # "as is" to replicas.
+ # We want replicas to honor the same high level contract of expires that
+ # the master has, that is, we want the time to be counted logically
+ # starting from the moment the write was received. This usually provides
+ # the most coherent behavior from the point of view of the external
+ # users, with TTLs that are similar from the POV of the external observer.
+ #
+ # This test is here to stop some innocent / eager optimization or cleanup
+ # from doing the wrong thing without proper discussion, see:
+ # https://github.com/redis/redis/pull/5171#issuecomment-409553266
+
+ set repl [attach_to_replication_stream]
+ r set foo1 bar ex 200
+ r set foo1 bar px 100000
+ r set foo1 bar exat [expr [clock seconds]+100]
+ r set foo1 bar pxat [expr [clock milliseconds]+10000]
+ r setex foo1 100 bar
+ r psetex foo1 100000 bar
+ r set foo2 bar
+ r expire foo2 100
+ r pexpire foo2 100000
+ r set foo3 bar
+ r expireat foo3 [expr [clock seconds]+100]
+ r pexpireat foo3 [expr [clock seconds]*1000+100000]
+ r expireat foo3 [expr [clock seconds]-100]
+ r set foo4 bar
+ r getex foo4 ex 200
+ r getex foo4 px 200000
+ r getex foo4 exat [expr [clock seconds]+100]
+ r getex foo4 pxat [expr [clock milliseconds]+10000]
+ assert_replication_stream $repl {
+ {select *}
+ {set foo1 bar PX 200000}
+ {set foo1 bar PX 100000}
+ {set foo1 bar PXAT *}
+ {set foo1 bar PXAT *}
+ {set foo1 bar PX 100000}
+ {set foo1 bar PX 100000}
+ {set foo2 bar}
+ {expire foo2 100}
+ {pexpire foo2 100000}
+ {set foo3 bar}
+ {expireat foo3 *}
+ {pexpireat foo3 *}
+ {del foo3}
+ {set foo4 bar}
+ {pexpire foo4 200000}
+ {pexpire foo4 200000}
+ {pexpireat foo4 *}
+ {pexpireat foo4 *}
+ }
}
test {SET command will remove expire} {
@@ -246,4 +328,32 @@ start_server {tags {"expire"}} {
set ttl [r ttl foo]
assert {$ttl <= 98 && $ttl > 90}
}
+
+ test {GETEX use of PERSIST option should remove TTL} {
+ r set foo bar EX 100
+ r getex foo PERSIST
+ r ttl foo
+ } {-1}
+
+ test {GETEX use of PERSIST option should remove TTL after loadaof} {
+ r set foo bar EX 100
+ r getex foo PERSIST
+ after 2000
+ r debug loadaof
+ r ttl foo
+ } {-1}
+
+ test {GETEX propagate as to replica as PERSIST, DEL, or nothing} {
+ set repl [attach_to_replication_stream]
+ r set foo bar EX 100
+ r getex foo PERSIST
+ r getex foo
+ r getex foo exat [expr [clock seconds]-100]
+ assert_replication_stream $repl {
+ {select *}
+ {set foo bar PX 100000}
+ {persist foo}
+ {del foo}
+ }
+ }
}
diff --git a/tests/unit/introspection.tcl b/tests/unit/introspection.tcl
index 0a7f7a9c9..ba28341ff 100644
--- a/tests/unit/introspection.tcl
+++ b/tests/unit/introspection.tcl
@@ -112,6 +112,7 @@ start_server {tags {"introspection"}} {
bio_cpulist
aof_rewrite_cpulist
bgsave_cpulist
+ set-proc-title
}
if {!$::tls} {
diff --git a/tests/unit/limits.tcl b/tests/unit/limits.tcl
index 38ba76208..51122e8f5 100644
--- a/tests/unit/limits.tcl
+++ b/tests/unit/limits.tcl
@@ -1,4 +1,4 @@
-start_server {tags {"limits"} overrides {maxclients 10}} {
+start_server {tags {"limits network"} overrides {maxclients 10}} {
if {$::tls} {
set expected_code "*I/O error*"
} else {
diff --git a/tests/unit/moduleapi/blockonbackground.tcl b/tests/unit/moduleapi/blockonbackground.tcl
new file mode 100644
index 000000000..23111ab73
--- /dev/null
+++ b/tests/unit/moduleapi/blockonbackground.tcl
@@ -0,0 +1,67 @@
+set testmodule [file normalize tests/modules/blockonbackground.so]
+
+source tests/support/util.tcl
+
+start_server {tags {"modules"}} {
+ r module load $testmodule
+
+ test { blocked clients time tracking - check blocked command that uses RedisModule_BlockedClientMeasureTimeStart() is tracking background time} {
+ r slowlog reset
+ r config set slowlog-log-slower-than 200000
+ assert_equal [r slowlog len] 0
+ r block.debug 0 10000
+ assert_equal [r slowlog len] 0
+ r config resetstat
+ r block.debug 200 10000
+ assert_equal [r slowlog len] 1
+
+ set cmdstatline [cmdrstat block.debug r]
+
+ regexp "calls=1,usec=(.*?),usec_per_call=(.*?),rejected_calls=0,failed_calls=0" $cmdstatline usec usec_per_call
+ assert {$usec >= 100000}
+ assert {$usec_per_call >= 100000}
+ }
+
+ test { blocked clients time tracking - check blocked command that uses RedisModule_BlockedClientMeasureTimeStart() is tracking background time even in timeout } {
+ r slowlog reset
+ r config set slowlog-log-slower-than 200000
+ assert_equal [r slowlog len] 0
+ r block.debug 0 20000
+ assert_equal [r slowlog len] 0
+ r config resetstat
+ r block.debug 20000 200
+ assert_equal [r slowlog len] 1
+
+ set cmdstatline [cmdrstat block.debug r]
+
+ regexp "calls=1,usec=(.*?),usec_per_call=(.*?),rejected_calls=0,failed_calls=0" $cmdstatline usec usec_per_call
+ assert {$usec >= 100000}
+ assert {$usec_per_call >= 100000}
+ }
+
+ test { blocked clients time tracking - check blocked command with multiple calls RedisModule_BlockedClientMeasureTimeStart() is tracking the total background time } {
+ r slowlog reset
+ r config set slowlog-log-slower-than 200000
+ assert_equal [r slowlog len] 0
+ r block.double_debug 0
+ assert_equal [r slowlog len] 0
+ r config resetstat
+ r block.double_debug 100
+ assert_equal [r slowlog len] 1
+
+ set cmdstatline [cmdrstat block.double_debug r]
+
+ regexp "calls=1,usec=(.*?),usec_per_call=(.*?),rejected_calls=0,failed_calls=0" $cmdstatline usec usec_per_call
+ assert {$usec >= 60000}
+ assert {$usec_per_call >= 60000}
+ }
+
+ test { blocked clients time tracking - check blocked command without calling RedisModule_BlockedClientMeasureTimeStart() is not reporting background time } {
+ r slowlog reset
+ r config set slowlog-log-slower-than 200000
+ assert_equal [r slowlog len] 0
+ r block.debug_no_track 200 1000
+ # ensure slowlog is still empty
+ assert_equal [r slowlog len] 0
+ }
+}
diff --git a/tests/unit/moduleapi/blockonkeys.tcl b/tests/unit/moduleapi/blockonkeys.tcl
index 5e5d93da3..75191b3c7 100644
--- a/tests/unit/moduleapi/blockonkeys.tcl
+++ b/tests/unit/moduleapi/blockonkeys.tcl
@@ -168,7 +168,7 @@ start_server {tags {"modules"}} {
assert_error "*unblocked*" {$rd read}
}
- test {Module client blocked on keys does not wake up on wrong type} {
+ test {Module client re-blocked on keys after woke up on wrong type} {
r del k
set rd [redis_deferring_client]
$rd fsl.bpop k 0
@@ -184,5 +184,56 @@ start_server {tags {"modules"}} {
r del k
r fsl.push k 34
assert_equal {34} [$rd read]
+ assert_equal {1} [r get fsl_wrong_type] ;# first lpush caused one wrong-type wake-up
+ }
+
+ test {Module client blocked on keys woken up by LPUSH} {
+ r del k
+ set rd [redis_deferring_client]
+ $rd blockonkeys.popall k
+ # wait until client is actually blocked
+ wait_for_condition 50 100 {
+ [s 0 blocked_clients] eq {1}
+ } else {
+ fail "Client is not blocked"
+ }
+ r lpush k 42 squirrel banana
+ assert_equal {banana squirrel 42} [$rd read]
+ $rd close
+ }
+
+ test {Module client unblocks BLPOP} {
+ r del k
+ set rd [redis_deferring_client]
+ $rd blpop k 3
+ # wait until client is actually blocked
+ wait_for_condition 50 100 {
+ [s 0 blocked_clients] eq {1}
+ } else {
+ fail "Client is not blocked"
+ }
+ r blockonkeys.lpush k 42
+ assert_equal {k 42} [$rd read]
+ $rd close
+ }
+
+ test {Module unblocks module blocked on non-empty list} {
+ r del k
+ r lpush k aa
+ # Module client blocks to pop 5 elements from list
+ set rd [redis_deferring_client]
+ $rd blockonkeys.blpopn k 5
+ # Wait until client is actually blocked
+ wait_for_condition 50 100 {
+ [s 0 blocked_clients] eq {1}
+ } else {
+ fail "Client is not blocked"
+ }
+ # Check that RM_SignalKeyAsReady() can wake up BLPOPN
+ r blockonkeys.lpush_unblock k bb cc ;# Not enough elements for BLPOPN
+ r lpush k dd ee ff ;# Doesn't unblock module
+ r blockonkeys.lpush_unblock k gg ;# Unblocks module
+ assert_equal {gg ff ee dd cc} [$rd read]
+ $rd close
}
}
diff --git a/tests/unit/moduleapi/stream.tcl b/tests/unit/moduleapi/stream.tcl
new file mode 100644
index 000000000..15e97c183
--- /dev/null
+++ b/tests/unit/moduleapi/stream.tcl
@@ -0,0 +1,155 @@
+set testmodule [file normalize tests/modules/stream.so]
+
+start_server {tags {"modules"}} {
+ r module load $testmodule
+
+ test {Module stream add and delete} {
+ r del mystream
+ # add to empty key
+ set streamid1 [r stream.add mystream item 1 value a]
+ # add to existing stream
+ set streamid2 [r stream.add mystream item 2 value b]
+ # check result
+ assert { [string match "*-*" $streamid1] }
+ set items [r XRANGE mystream - +]
+ assert_equal $items \
+ "{$streamid1 {item 1 value a}} {$streamid2 {item 2 value b}}"
+ # delete one of them and try deleting non-existing ID
+ assert_equal OK [r stream.delete mystream $streamid1]
+ assert_error "ERR StreamDelete*" {r stream.delete mystream 123-456}
+ assert_error "Invalid stream ID*" {r stream.delete mystream foo}
+ assert_equal "{$streamid2 {item 2 value b}}" [r XRANGE mystream - +]
+ # check error condition: wrong type
+ r del mystream
+ r set mystream mystring
+ assert_error "ERR StreamAdd*" {r stream.add mystream item 1 value a}
+ assert_error "ERR StreamDelete*" {r stream.delete mystream 123-456}
+ }
+
+ test {Module stream add unblocks blocking xread} {
+ r del mystream
+
+ # Blocking XREAD on an empty key
+ set rd1 [redis_deferring_client]
+ $rd1 XREAD BLOCK 3000 STREAMS mystream $
+ # wait until client is actually blocked
+ wait_for_condition 50 100 {
+ [s 0 blocked_clients] eq {1}
+ } else {
+ fail "Client is not blocked"
+ }
+ set id [r stream.add mystream field 1 value a]
+ assert_equal "{mystream {{$id {field 1 value a}}}}" [$rd1 read]
+
+ # Blocking XREAD on an existing stream
+ set rd2 [redis_deferring_client]
+ $rd2 XREAD BLOCK 3000 STREAMS mystream $
+ # wait until client is actually blocked
+ wait_for_condition 50 100 {
+ [s 0 blocked_clients] eq {1}
+ } else {
+ fail "Client is not blocked"
+ }
+ set id [r stream.add mystream field 2 value b]
+ assert_equal "{mystream {{$id {field 2 value b}}}}" [$rd2 read]
+ }
+
+ test {Module stream add benchmark (1M stream add)} {
+ set n 1000000
+ r del mystream
+ set result [r stream.addn mystream $n field value]
+ assert_equal $result $n
+ }
+
+ test {Module stream iterator} {
+ r del mystream
+ set streamid1 [r xadd mystream * item 1 value a]
+ set streamid2 [r xadd mystream * item 2 value b]
+ # range result
+ set result1 [r stream.range mystream "-" "+"]
+ set expect1 [r xrange mystream "-" "+"]
+ assert_equal $result1 $expect1
+ # reverse range
+ set result_rev [r stream.range mystream "+" "-"]
+ set expect_rev [r xrevrange mystream "+" "-"]
+ assert_equal $result_rev $expect_rev
+
+ # only one item: range with startid = endid
+ set result2 [r stream.range mystream "-" $streamid1]
+ assert_equal $result2 "{$streamid1 {item 1 value a}}"
+ assert_equal $result2 [list [list $streamid1 {item 1 value a}]]
+ # only one item: range with startid = endid
+ set result3 [r stream.range mystream $streamid2 $streamid2]
+ assert_equal $result3 "{$streamid2 {item 2 value b}}"
+ assert_equal $result3 [list [list $streamid2 {item 2 value b}]]
+ }
+
+ test {Module stream iterator delete} {
+ r del mystream
+ set id1 [r xadd mystream * normal item]
+ set id2 [r xadd mystream * selfdestruct yes]
+ set id3 [r xadd mystream * another item]
+ # stream.range deletes the "selfdestruct" item after returning it
+ assert_equal \
+ "{$id1 {normal item}} {$id2 {selfdestruct yes}} {$id3 {another item}}" \
+ [r stream.range mystream - +]
+ # now, the "selfdestruct" item is gone
+ assert_equal \
+ "{$id1 {normal item}} {$id3 {another item}}" \
+ [r stream.range mystream - +]
+ }
+
+ test {Module stream trim by length} {
+ r del mystream
+ # exact maxlen
+ r xadd mystream * item 1 value a
+ r xadd mystream * item 2 value b
+ r xadd mystream * item 3 value c
+ assert_equal 3 [r xlen mystream]
+ assert_equal 0 [r stream.trim mystream maxlen = 5]
+ assert_equal 3 [r xlen mystream]
+ assert_equal 2 [r stream.trim mystream maxlen = 1]
+ assert_equal 1 [r xlen mystream]
+ assert_equal 1 [r stream.trim mystream maxlen = 0]
+ # check that there is no limit for exact maxlen
+ r stream.addn mystream 20000 item x value y
+ assert_equal 20000 [r stream.trim mystream maxlen = 0]
+ # approx maxlen (100 items per node implies default limit 10K items)
+ r stream.addn mystream 20000 item x value y
+ assert_equal 20000 [r xlen mystream]
+ assert_equal 10000 [r stream.trim mystream maxlen ~ 2]
+ assert_equal 9900 [r stream.trim mystream maxlen ~ 2]
+ assert_equal 0 [r stream.trim mystream maxlen ~ 2]
+ assert_equal 100 [r xlen mystream]
+ assert_equal 100 [r stream.trim mystream maxlen ~ 0]
+ assert_equal 0 [r xlen mystream]
+ }
+
+ test {Module stream trim by ID} {
+ r del mystream
+ # exact minid
+ r xadd mystream * item 1 value a
+ r xadd mystream * item 2 value b
+ set minid [r xadd mystream * item 3 value c]
+ assert_equal 3 [r xlen mystream]
+ assert_equal 0 [r stream.trim mystream minid = -]
+ assert_equal 3 [r xlen mystream]
+ assert_equal 2 [r stream.trim mystream minid = $minid]
+ assert_equal 1 [r xlen mystream]
+ assert_equal 1 [r stream.trim mystream minid = +]
+ # check that there is no limit for exact minid
+ r stream.addn mystream 20000 item x value y
+ assert_equal 20000 [r stream.trim mystream minid = +]
+ # approx minid (100 items per node implies default limit 10K items)
+ r stream.addn mystream 19980 item x value y
+ set minid [r xadd mystream * item x value y]
+ r stream.addn mystream 19 item x value y
+ assert_equal 20000 [r xlen mystream]
+ assert_equal 10000 [r stream.trim mystream minid ~ $minid]
+ assert_equal 9900 [r stream.trim mystream minid ~ $minid]
+ assert_equal 0 [r stream.trim mystream minid ~ $minid]
+ assert_equal 100 [r xlen mystream]
+ assert_equal 100 [r stream.trim mystream minid ~ +]
+ assert_equal 0 [r xlen mystream]
+ }
+}
diff --git a/tests/unit/oom-score-adj.tcl b/tests/unit/oom-score-adj.tcl
index cf671fe6a..efa61b759 100644
--- a/tests/unit/oom-score-adj.tcl
+++ b/tests/unit/oom-score-adj.tcl
@@ -39,7 +39,7 @@ if {$system_name eq {linux}} {
r bgsave
set child_pid [get_child_pid 0]
- assert {[get_oom_score_adj $child_pid] == [expr $base + 30]}
+ assert_equal [get_oom_score_adj $child_pid] [expr $base + 30]
}
# Failed oom-score-adj tests can only run unprivileged
diff --git a/tests/unit/other.tcl b/tests/unit/other.tcl
index d98dc1bd4..a35ac1752 100644
--- a/tests/unit/other.tcl
+++ b/tests/unit/other.tcl
@@ -321,3 +321,47 @@ start_server {tags {"other"}} {
assert_match "*table size: 8192*" [r debug HTSTATS 9]
}
}
+
+proc read_proc_title {pid} {
+ set fd [open "/proc/$pid/cmdline" "r"]
+ set cmdline [read $fd 1024]
+ close $fd
+
+ return $cmdline
+}
+
+start_server {tags {"other"}} {
+ test {Process title set as expected} {
+ # Test only on Linux where it's easy to get cmdline without relying on tools.
+ # Skip valgrind as it messes up the arguments.
+ set os [exec uname]
+ if {$os == "Linux" && !$::valgrind} {
+ # Set a custom template
+ r config set "proc-title-template" "TEST {title} {listen-addr} {port} {tls-port} {unixsocket} {config-file}"
+ set cmdline [read_proc_title [srv 0 pid]]
+
+ assert_equal "TEST" [lindex $cmdline 0]
+ assert_match "*/redis-server" [lindex $cmdline 1]
+
+ if {$::tls} {
+ set expect_port 0
+ set expect_tls_port [srv 0 port]
+ } else {
+ set expect_port [srv 0 port]
+ set expect_tls_port 0
+ }
+ set port [srv 0 port]
+
+ assert_equal "$::host:$port" [lindex $cmdline 2]
+ assert_equal $expect_port [lindex $cmdline 3]
+ assert_equal $expect_tls_port [lindex $cmdline 4]
+ assert_match "*/tests/tmp/server.*/socket" [lindex $cmdline 5]
+ assert_match "*/tests/tmp/redis.conf.*" [lindex $cmdline 6]
+
+ # Try setting a bad template
+ catch {r config set "proc-title-template" "{invalid-var}"} err
+ assert_match {*template format is invalid*} $err
+ }
+ }
+}
+
diff --git a/tests/unit/pause.tcl b/tests/unit/pause.tcl
index 9f5cfd607..67b684d36 100644
--- a/tests/unit/pause.tcl
+++ b/tests/unit/pause.tcl
@@ -1,4 +1,4 @@
-start_server {tags {"pause"}} {
+start_server {tags {"pause network"}} {
test "Test read commands are not blocked by client pause" {
r client PAUSE 100000000 WRITE
set rd [redis_deferring_client]
diff --git a/tests/unit/protocol.tcl b/tests/unit/protocol.tcl
index 4dfdc6f59..442c23de6 100644
--- a/tests/unit/protocol.tcl
+++ b/tests/unit/protocol.tcl
@@ -1,4 +1,4 @@
-start_server {tags {"protocol"}} {
+start_server {tags {"protocol network"}} {
test "Handle an empty query" {
reconnect
r write "\r\n"
diff --git a/tests/unit/pubsub.tcl b/tests/unit/pubsub.tcl
index 9c7a43bf0..966565ae1 100644
--- a/tests/unit/pubsub.tcl
+++ b/tests/unit/pubsub.tcl
@@ -1,4 +1,4 @@
-start_server {tags {"pubsub"}} {
+start_server {tags {"pubsub network"}} {
proc __consume_subscribe_messages {client type channels} {
set numsub -1
set counts {}
diff --git a/tests/unit/scan.tcl b/tests/unit/scan.tcl
index 9f9ff4df2..3981a2234 100644
--- a/tests/unit/scan.tcl
+++ b/tests/unit/scan.tcl
@@ -1,4 +1,4 @@
-start_server {tags {"scan"}} {
+start_server {tags {"scan network"}} {
test "SCAN basic" {
r flushdb
r debug populate 1000
diff --git a/tests/unit/scripting.tcl b/tests/unit/scripting.tcl
index 6fd152594..3aa3c0fba 100644
--- a/tests/unit/scripting.tcl
+++ b/tests/unit/scripting.tcl
@@ -330,6 +330,15 @@ start_server {tags {"scripting"}} {
set e
} {NOSCRIPT*}
+ test {SCRIPTING FLUSH ASYNC} {
+ for {set j 0} {$j < 100} {incr j} {
+ r script load "return $j"
+ }
+ assert { [string match "*number_of_cached_scripts:100*" [r info Memory]] }
+ r script flush async
+ assert { [string match "*number_of_cached_scripts:0*" [r info Memory]] }
+ }
+
test {SCRIPT EXISTS - can detect already defined scripts?} {
r eval "return 1+1" 0
r script exists a27e7e8a43702b7046d4f6a7ccf5b60cef6b9bd9 a27e7e8a43702b7046d4f6a7ccf5b60cef6b9bda
diff --git a/tests/unit/tracking.tcl b/tests/unit/tracking.tcl
index 88cf9dc42..7aaca47ca 100644
--- a/tests/unit/tracking.tcl
+++ b/tests/unit/tracking.tcl
@@ -1,4 +1,4 @@
-start_server {tags {"tracking"}} {
+start_server {tags {"tracking network"}} {
# Create a deferred client we'll use to redirect invalidation
# messages to.
set rd_redirection [redis_deferring_client]
diff --git a/tests/unit/type/hash.tcl b/tests/unit/type/hash.tcl
index 79e58301a..2f3ea37c2 100644
--- a/tests/unit/type/hash.tcl
+++ b/tests/unit/type/hash.tcl
@@ -18,6 +18,181 @@ start_server {tags {"hash"}} {
assert_encoding ziplist smallhash
}
+ proc create_hash {key entries} {
+ r del $key
+ foreach entry $entries {
+ r hset $key [lindex $entry 0] [lindex $entry 1]
+ }
+ }
+
+ proc get_keys {l} {
+ set res {}
+ foreach entry $l {
+ set key [lindex $entry 0]
+ lappend res $key
+ }
+ return $res
+ }
+
+ foreach {type contents} "ziplist {{a 1} {b 2} {c 3}} hashtable {{a 1} {b 2} {[randstring 70 90 alpha] 3}}" {
+ set original_max_value [lindex [r config get hash-max-ziplist-value] 1]
+ r config set hash-max-ziplist-value 10
+ create_hash myhash $contents
+ assert_encoding $type myhash
+
+ test "HRANDFIELD - $type" {
+ unset -nocomplain myhash
+ array set myhash {}
+ for {set i 0} {$i < 100} {incr i} {
+ set key [r hrandfield myhash]
+ set myhash($key) 1
+ }
+ assert_equal [lsort [get_keys $contents]] [lsort [array names myhash]]
+ }
+ r config set hash-max-ziplist-value $original_max_value
+ }
+
+ test "HRANDFIELD with RESP3" {
+ r hello 3
+ set res [r hrandfield myhash 3 withvalues]
+ assert_equal [llength $res] 3
+ assert_equal [llength [lindex $res 1]] 2
+
+ set res [r hrandfield myhash 3]
+ assert_equal [llength $res] 3
+ assert_equal [llength [lindex $res 1]] 1
+ }
+ r hello 2
+
+ test "HRANDFIELD count of 0 is handled correctly" {
+ r hrandfield myhash 0
+ } {}
+
+ test "HRANDFIELD with <count> against non existing key" {
+ r hrandfield nonexisting_key 100
+ } {}
+
+ foreach {type contents} "
+ hashtable {{a 1} {b 2} {c 3} {d 4} {e 5} {6 f} {7 g} {8 h} {9 i} {[randstring 70 90 alpha] 10}}
+ ziplist {{a 1} {b 2} {c 3} {d 4} {e 5} {6 f} {7 g} {8 h} {9 i} {10 j}} " {
+ test "HRANDFIELD with <count> - $type" {
+ set original_max_value [lindex [r config get hash-max-ziplist-value] 1]
+ r config set hash-max-ziplist-value 10
+ create_hash myhash $contents
+ assert_encoding $type myhash
+
+ # create a dict for easy lookup
+ unset -nocomplain mydict
+ foreach {k v} [r hgetall myhash] {
+ dict append mydict $k $v
+ }
+
+ # We'll stress different parts of the code, see the implementation
+ # of HRANDFIELD for more information, but basically there are
+ # four different code paths.
+
+ # PATH 1: Use negative count.
+
+ # 1) Check that it returns repeated elements with and without values.
+ set res [r hrandfield myhash -20]
+ assert_equal [llength $res] 20
+ # again with WITHVALUES
+ set res [r hrandfield myhash -20 withvalues]
+ assert_equal [llength $res] 40
+
+ # 2) Check that all the elements actually belong to the original hash.
+ foreach {key val} $res {
+ assert {[dict exists $mydict $key]}
+ }
+
+ # 3) Check that eventually all the elements are returned.
+ # Use both WITHVALUES and without
+ unset -nocomplain auxset
+ set iterations 1000
+ while {$iterations != 0} {
+ incr iterations -1
+ if {[expr {$iterations % 2}] == 0} {
+ set res [r hrandfield myhash -3 withvalues]
+ foreach {key val} $res {
+ dict append auxset $key $val
+ }
+ } else {
+ set res [r hrandfield myhash -3]
+ foreach key $res {
+ dict append auxset $key $val
+ }
+ }
+ if {[lsort [dict keys $mydict]] eq
+ [lsort [dict keys $auxset]]} {
+ break;
+ }
+ }
+ assert {$iterations != 0}
+
+ # PATH 2: positive count (unique behavior) with requested size
+ # equal or greater than set size.
+ foreach size {10 20} {
+ set res [r hrandfield myhash $size]
+ assert_equal [llength $res] 10
+ assert_equal [lsort $res] [lsort [dict keys $mydict]]
+
+ # again with WITHVALUES
+ set res [r hrandfield myhash $size withvalues]
+ assert_equal [llength $res] 20
+ assert_equal [lsort $res] [lsort $mydict]
+ }
+
+ # PATH 3: Ask almost as elements as there are in the set.
+ # In this case the implementation will duplicate the original
+ # set and will remove random elements up to the requested size.
+ #
+ # PATH 4: Ask a number of elements definitely smaller than
+ # the set size.
+ #
+ # We can test both the code paths just changing the size but
+ # using the same code.
+ foreach size {8 2} {
+ set res [r hrandfield myhash $size]
+ assert_equal [llength $res] $size
+ # again with WITHVALUES
+ set res [r hrandfield myhash $size withvalues]
+ assert_equal [llength $res] [expr {$size * 2}]
+
+ # 1) Check that all the elements actually belong to the
+ # original set.
+ foreach ele [dict keys $res] {
+ assert {[dict exists $mydict $ele]}
+ }
+
+ # 2) Check that eventually all the elements are returned.
+ # Use both WITHVALUES and without
+ unset -nocomplain auxset
+ set iterations 1000
+ while {$iterations != 0} {
+ incr iterations -1
+ if {[expr {$iterations % 2}] == 0} {
+ set res [r hrandfield myhash $size withvalues]
+ foreach {key value} $res {
+ dict append auxset $key $value
+ }
+ } else {
+ set res [r hrandfield myhash $size]
+ foreach key $res {
+ dict append auxset $key
+ }
+ }
+ if {[lsort [dict keys $mydict]] eq
+ [lsort [dict keys $auxset]]} {
+ break;
+ }
+ }
+ assert {$iterations != 0}
+ }
+ }
+ r config set hash-max-ziplist-value $original_max_value
+ }
+
+
test {HSET/HLEN - Big hash creation} {
array set bighash {}
for {set i 0} {$i < 1024} {incr i} {
diff --git a/tests/unit/type/set.tcl b/tests/unit/type/set.tcl
index 84c31c4e4..091ef7f0f 100644
--- a/tests/unit/type/set.tcl
+++ b/tests/unit/type/set.tcl
@@ -501,7 +501,7 @@ start_server {
set iterations 1000
while {$iterations != 0} {
incr iterations -1
- set res [r srandmember myset -10]
+ set res [r srandmember myset $size]
foreach ele $res {
set auxset($ele) 1
}
diff --git a/tests/unit/type/string.tcl b/tests/unit/type/string.tcl
index 16e961623..43968b26b 100644
--- a/tests/unit/type/string.tcl
+++ b/tests/unit/type/string.tcl
@@ -102,6 +102,91 @@ start_server {tags {"string"}} {
assert_equal 20 [r get x]
}
+ test "GETEX EX option" {
+ r del foo
+ r set foo bar
+ r getex foo ex 10
+ assert_range [r ttl foo] 5 10
+ }
+
+ test "GETEX PX option" {
+ r del foo
+ r set foo bar
+ r getex foo px 10000
+ assert_range [r pttl foo] 5000 10000
+ }
+
+ test "GETEX EXAT option" {
+ r del foo
+ r set foo bar
+ r getex foo exat [expr [clock seconds] + 10]
+ assert_range [r ttl foo] 5 10
+ }
+
+ test "GETEX PXAT option" {
+ r del foo
+ r set foo bar
+ r getex foo pxat [expr [clock milliseconds] + 10000]
+ assert_range [r pttl foo] 5000 10000
+ }
+
+ test "GETEX PERSIST option" {
+ r del foo
+ r set foo bar ex 10
+ assert_range [r ttl foo] 5 10
+ r getex foo persist
+ assert_equal -1 [r ttl foo]
+ }
+
+ test "GETEX no option" {
+ r del foo
+ r set foo bar
+ r getex foo
+ assert_equal bar [r getex foo]
+ }
+
+ test "GETEX syntax errors" {
+ set ex {}
+ catch {r getex foo non-existent-option} ex
+ set ex
+ } {*syntax*}
+
+ test "GETEX no arguments" {
+ set ex {}
+ catch {r getex} ex
+ set ex
+ } {*wrong number of arguments*}
+
+ test "GETDEL command" {
+ r del foo
+ r set foo bar
+ assert_equal bar [r getdel foo ]
+ assert_equal {} [r getdel foo ]
+ }
+
+ test {GETDEL propagate as DEL command to replica} {
+ set repl [attach_to_replication_stream]
+ r set foo bar
+ r getdel foo
+ assert_replication_stream $repl {
+ {select *}
+ {set foo bar}
+ {del foo}
+ }
+ }
+
+ test {GETEX without argument does not propagate to replica} {
+ set repl [attach_to_replication_stream]
+ r set foo bar
+ r getex foo
+ r del foo
+ assert_replication_stream $repl {
+ {select *}
+ {set foo bar}
+ {del foo}
+ }
+ }
+
test {MGET} {
r flushdb
r set foo BAR
@@ -437,6 +522,17 @@ start_server {tags {"string"}} {
assert {$ttl <= 10 && $ttl > 5}
}
+ test "Extended SET EXAT option" {
+ r del foo
+ r set foo bar exat [expr [clock seconds] + 10]
+ assert_range [r ttl foo] 5 10
+ }
+
+ test "Extended SET PXAT option" {
+ r del foo
+ r set foo bar pxat [expr [clock milliseconds] + 10000]
+ assert_range [r ttl foo] 5 10
+ }
test {Extended SET using multiple options at once} {
r set foo val
assert {[r set foo bar xx px 10000] eq {OK}}
diff --git a/tests/unit/type/zset.tcl b/tests/unit/type/zset.tcl
index 8318ebb63..c657c1e4e 100644
--- a/tests/unit/type/zset.tcl
+++ b/tests/unit/type/zset.tcl
@@ -7,6 +7,8 @@ start_server {tags {"zset"}} {
}
proc basics {encoding} {
+ set original_max_entries [lindex [r config get zset-max-ziplist-entries] 1]
+ set original_max_value [lindex [r config get zset-max-ziplist-value] 1]
if {$encoding == "ziplist"} {
r config set zset-max-ziplist-entries 128
r config set zset-max-ziplist-value 64
@@ -713,6 +715,12 @@ start_server {tags {"zset"}} {
assert_equal {b 3 c 5} [r zinter 2 zseta zsetb withscores]
}
+ test "ZINTER RESP3 - $encoding" {
+ r hello 3
+ assert_equal {{b 3.0} {c 5.0}} [r zinter 2 zseta zsetb withscores]
+ }
+ r hello 2
+
test "ZINTERSTORE with weights - $encoding" {
assert_equal 2 [r zinterstore zsetc 2 zseta zsetb weights 2 3]
assert_equal {b 7 c 12} [r zrange zsetc 0 -1 withscores]
@@ -919,6 +927,9 @@ start_server {tags {"zset"}} {
assert_equal 0 [r zcard z1]
assert_equal 1 [r zcard z2]
}
+
+ r config set zset-max-ziplist-entries $original_max_entries
+ r config set zset-max-ziplist-value $original_max_value
}
basics ziplist
@@ -1016,6 +1027,8 @@ start_server {tags {"zset"}} {
}
proc stressers {encoding} {
+ set original_max_entries [lindex [r config get zset-max-ziplist-entries] 1]
+ set original_max_value [lindex [r config get zset-max-ziplist-value] 1]
if {$encoding == "ziplist"} {
# Little extra to allow proper fuzzing in the sorting stresser
r config set zset-max-ziplist-entries 256
@@ -1440,6 +1453,8 @@ start_server {tags {"zset"}} {
r zadd zset 0 foo
assert_equal {zset foo 0} [$rd read]
}
+ r config set zset-max-ziplist-entries $original_max_entries
+ r config set zset-max-ziplist-value $original_max_value
}
tags {"slow"} {
@@ -1481,6 +1496,12 @@ start_server {tags {"zset"}} {
r zrange z2 0 -1 withscores
} {a 1 b 2 c 3 d 4}
+ test {ZRANGESTORE RESP3} {
+ r hello 3
+ r zrange z2 0 -1 withscores
+ } {{a 1.0} {b 2.0} {c 3.0} {d 4.0}}
+ r hello 2
+
test {ZRANGESTORE range} {
set res [r zrangestore z2 z1 1 2]
assert_equal $res 2
@@ -1554,4 +1575,171 @@ start_server {tags {"zset"}} {
catch {r zrangebyscore z1 0 -1 REV} err
assert_match "*syntax*" $err
}
+
+ proc get_keys {l} {
+ set res {}
+ foreach {score key} $l {
+ lappend res $key
+ }
+ return $res
+ }
+
+ foreach {type contents} "ziplist {1 a 2 b 3 c} skiplist {1 a 2 b 3 [randstring 70 90 alpha]}" {
+ set original_max_value [lindex [r config get zset-max-ziplist-value] 1]
+ r config set zset-max-ziplist-value 10
+ create_zset myzset $contents
+ assert_encoding $type myzset
+
+ test "ZRANDMEMBER - $type" {
+ unset -nocomplain myzset
+ array set myzset {}
+ for {set i 0} {$i < 100} {incr i} {
+ set key [r zrandmember myzset]
+ set myzset($key) 1
+ }
+ assert_equal [lsort [get_keys $contents]] [lsort [array names myzset]]
+ }
+ r config set zset-max-ziplist-value $original_max_value
+ }
+
+ test "ZRANDMEMBER with RESP3" {
+ r hello 3
+ set res [r zrandmember myzset 3 withscores]
+ assert_equal [llength $res] 3
+ assert_equal [llength [lindex $res 1]] 2
+
+ set res [r zrandmember myzset 3]
+ assert_equal [llength $res] 3
+ assert_equal [llength [lindex $res 1]] 1
+ }
+ r hello 2
+
+ test "ZRANDMEMBER count of 0 is handled correctly" {
+ r zrandmember myzset 0
+ } {}
+
+ test "ZRANDMEMBER with <count> against non existing key" {
+ r zrandmember nonexisting_key 100
+ } {}
+
+ foreach {type contents} "
+ skiplist {1 a 2 b 3 c 4 d 5 e 6 f 7 g 7 h 9 i 10 [randstring 70 90 alpha]}
+ ziplist {1 a 2 b 3 c 4 d 5 e 6 f 7 g 7 h 9 i 10 j} " {
+ test "ZRANDMEMBER with <count> - $type" {
+ set original_max_value [lindex [r config get zset-max-ziplist-value] 1]
+ r config set zset-max-ziplist-value 10
+ create_zset myzset $contents
+ assert_encoding $type myzset
+
+ # create a dict for easy lookup
+ unset -nocomplain mydict
+ foreach {k v} [r zrange myzset 0 -1 withscores] {
+ dict append mydict $k $v
+ }
+
+ # We'll stress different parts of the code, see the implementation
+ # of ZRANDMEMBER for more information, but basically there are
+ # four different code paths.
+
+ # PATH 1: Use negative count.
+
+ # 1) Check that it returns repeated elements with and without values.
+ set res [r zrandmember myzset -20]
+ assert_equal [llength $res] 20
+ # again with WITHSCORES
+ set res [r zrandmember myzset -20 withscores]
+ assert_equal [llength $res] 40
+
+ # 2) Check that all the elements actually belong to the original zset.
+ foreach {key val} $res {
+ assert {[dict exists $mydict $key]}
+ }
+
+ # 3) Check that eventually all the elements are returned.
+ # Use both WITHSCORES and without
+ unset -nocomplain auxset
+ set iterations 1000
+ while {$iterations != 0} {
+ incr iterations -1
+ if {[expr {$iterations % 2}] == 0} {
+ set res [r zrandmember myzset -3 withscores]
+ foreach {key val} $res {
+ dict append auxset $key $val
+ }
+ } else {
+ set res [r zrandmember myzset -3]
+ foreach key $res {
+ dict append auxset $key $val
+ }
+ }
+ if {[lsort [dict keys $mydict]] eq
+ [lsort [dict keys $auxset]]} {
+ break;
+ }
+ }
+ assert {$iterations != 0}
+
+ # PATH 2: positive count (unique behavior) with requested size
+ # equal or greater than set size.
+ foreach size {10 20} {
+ set res [r zrandmember myzset $size]
+ assert_equal [llength $res] 10
+ assert_equal [lsort $res] [lsort [dict keys $mydict]]
+
+ # again with WITHSCORES
+ set res [r zrandmember myzset $size withscores]
+ assert_equal [llength $res] 20
+ assert_equal [lsort $res] [lsort $mydict]
+ }
+
+ # PATH 3: Ask almost as elements as there are in the set.
+ # In this case the implementation will duplicate the original
+ # set and will remove random elements up to the requested size.
+ #
+ # PATH 4: Ask a number of elements definitely smaller than
+ # the set size.
+ #
+ # We can test both the code paths just changing the size but
+ # using the same code.
+ foreach size {8 2} {
+ set res [r zrandmember myzset $size]
+ assert_equal [llength $res] $size
+ # again with WITHSCORES
+ set res [r zrandmember myzset $size withscores]
+ assert_equal [llength $res] [expr {$size * 2}]
+
+ # 1) Check that all the elements actually belong to the
+ # original set.
+ foreach ele [dict keys $res] {
+ assert {[dict exists $mydict $ele]}
+ }
+
+ # 2) Check that eventually all the elements are returned.
+ # Use both WITHSCORES and without
+ unset -nocomplain auxset
+ set iterations 1000
+ while {$iterations != 0} {
+ incr iterations -1
+ if {[expr {$iterations % 2}] == 0} {
+ set res [r zrandmember myzset $size withscores]
+ foreach {key value} $res {
+ dict append auxset $key $value
+ }
+ } else {
+ set res [r zrandmember myzset $size]
+ foreach key $res {
+ dict append auxset $key
+ }
+ }
+ if {[lsort [dict keys $mydict]] eq
+ [lsort [dict keys $auxset]]} {
+ break;
+ }
+ }
+ assert {$iterations != 0}
+ }
+ }
+ r config set zset-max-ziplist-value $original_max_value
+ }
+
}
diff --git a/tests/unit/wait.tcl b/tests/unit/wait.tcl
index 0a4965c20..78c3d8202 100644
--- a/tests/unit/wait.tcl
+++ b/tests/unit/wait.tcl
@@ -1,6 +1,6 @@
source tests/support/cli.tcl
-start_server {tags {"wait"}} {
+start_server {tags {"wait network"}} {
start_server {} {
set slave [srv 0 client]
set slave_host [srv 0 host]