diff options
91 files changed, 6051 insertions, 921 deletions
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 2582c53a4..2e1e7865c 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -58,3 +58,14 @@ jobs: run: | yum -y install gcc make make REDIS_CFLAGS='-Werror' + + build-freebsd: + runs-on: macos-latest + steps: + - uses: actions/checkout@v2 + - name: make + uses: vmactions/freebsd-vm@v0.1.0 + with: + usesh: true + prepare: pkg install -y gmake + run: gmake diff --git a/.github/workflows/daily.yml b/.github/workflows/daily.yml index 028c44e0c..8fb23bac4 100644 --- a/.github/workflows/daily.yml +++ b/.github/workflows/daily.yml @@ -99,6 +99,23 @@ jobs: ./runtest-cluster --tls ./runtest-cluster + test-ubuntu-io-threads: + runs-on: ubuntu-latest + if: github.repository == 'redis/redis' + timeout-minutes: 14400 + steps: + - uses: actions/checkout@v2 + - name: make + run: | + make + - name: test + run: | + sudo apt-get install tcl8.5 tcl-tls + ./runtest --config io-threads 4 --config io-threads-do-reads yes --accurate --verbose --tags network + - name: cluster tests + run: | + ./runtest-cluster --config io-threads 4 --config io-threads-do-reads yes + test-valgrind: runs-on: ubuntu-latest if: github.repository == 'redis/redis' @@ -186,3 +203,20 @@ jobs: - name: cluster tests run: ./runtest-cluster + test-freebsd: + runs-on: macos-latest + if: github.repository == 'redis/redis' + timeout-minutes: 14400 + steps: + - uses: actions/checkout@v2 + - name: test + uses: vmactions/freebsd-vm@v0.1.0 + with: + usesh: true + prepare: pkg install -y gmake lang/tcl85 + run: | + gmake + ./runtest --accurate --verbose --no-latency + MAKE=gmake ./runtest-moduleapi --verbose + ./runtest-sentinel + ./runtest-cluster diff --git a/00-RELEASENOTES b/00-RELEASENOTES index 3434d4838..f52a22ed9 100644 --- a/00-RELEASENOTES +++ b/00-RELEASENOTES @@ -1,3 +1,53 @@ +Redis 6.2 RC3 Released Tue Feb 1 14:00:00 IST 2021 +================================================================================ + +Upgrade urgency LOW: This is the third Release Candidate of Redis 6.2. + +Here is a comprehensive list of changes in this release compared to 6.2 RC2, +each one includes the PR number that added it, so you can get more details +at https://github.com/redis/redis/pull/<number> + +New commands / args: +* Add HRANDFIELD and ZRANDMEMBER commands (#8297) +* Add FAILOVER command (#8315) +* Add GETEX, GETDEL commands (#8327) +* Add PXAT/EXAT arguments to SET command (#8327) +* Add SYNC arg to FLUSHALL and FLUSHDB, and ASYNC/SYNC arg to SCRIPT FLUSH (#8258) + +Sentinel: +* Add hostname support to Sentinel (#8282) +* Prevent file descriptors from leaking into Sentinel scripts (#8242) +* Fix config file line order dependency and config rewrite sequence (#8271) + +New configuration options: +* Add set-proc-title config option to disable changes to the process title (#3623) +* Add proc-title-template option to control what's shown in the process title (#8397) +* Add lazyfree-lazy-user-flush config option to control FLUSHALL, FLUSHDB and SCRIPT FLUSH (#8258) + +Bug fixes: +* AOF: recover from last write error by turning on/off appendonly config (#8030) +* Exit on fsync error when the AOF fsync policy is 'always' (#8347) +* Avoid assertions (on older kernels) when testing arm64 CoW bug (#8405) +* CONFIG REWRITE should honor umask settings (#8371) +* Fix firstkey,lastkey,step in COMMAND command for some commands (#8367) + +Special considerations: +* Fix misleading description of the save configuration directive (#8337) + +Improvements: +* A way to get RDB file via replication without excessive replication buffers (#8303) +* Optimize performance of clusterGenNodesDescription for large clusters (#8182) + +Info fields and introspection changes: +* SLOWLOG and LATENCY monitor include unblocking time of blocked commands (#7491) + +Modules: +* Add modules API for streams (#8288) +* Add event for fork child birth and termination (#8289) +* Add RM_BlockedClientMeasureTime* etc, to track background processing in commandstats (#7491) +* Fix bug in v6.2, wrong value passed to the new unlink callback (#8381) +* Fix bug in v6.2, modules blocked on keys unblock on commands like LPUSH (#8356) + ================================================================================ Redis 6.2 RC2 Released Tue Jan 12 16:17:20 IST 2021 ================================================================================ @@ -255,35 +305,39 @@ and we don't get reports of serious issues for a while. A special thank you for the amount of work put into this release by: - Oran Agra - Yossi Gottlieb -- Itamar Haber -- Guy Benoish - Filipe Oliveira +- Viktor Söderqvist +- Guy Benoish +- Itamar Haber +- Yang Bodong - Madelyn Olson - Wang Yuan - Felipe Machado -- Yang Bodong +- Wen Hui - Tatsuya Arisawa - Jonah H. Harris +- Raghav Muddur - Jim Brunner - Yaacov Hazan -- Wen Hui +- Allen Farris - Chen Yang - Nitai Caro - Meir Shpilraien - maohuazhu - Valentino Geron -- Qu Chen +- Zhao Zhao - sundb +- Qu Chen - George Prekas -- Zhao Zhao - Tyson Andre - Michael Grunder - alexronke-channeladvisor +- Andy Pan - Wu Yunlong - Wei Kukey - Yoav Steinberg -- Uri Shachar - Greg Femec +- Uri Shachar - Nykolas Laurentino de Lima - xhe - zhenwei pi diff --git a/redis.conf b/redis.conf index a5062fda9..465d56fc0 100644 --- a/redis.conf +++ b/redis.conf @@ -325,31 +325,52 @@ databases 16 # ASCII art logo in startup logs by setting the following option to yes. always-show-logo no +# By default, Redis modifies the process title (as seen in 'top' and 'ps') to +# provide some runtime information. It is possible to disable this and leave +# the process name as executed by setting the following to no. +set-proc-title yes + +# When changing the process title, Redis uses the following template to construct +# the modified title. +# +# Template variables are specified in curly brackets. The following variables are +# supported: +# +# {title} Name of process as executed if parent, or type of child process. +# {listen-addr} Bind address or '*' followed by TCP or TLS port listening on, or +# Unix socket if only that's available. +# {server-mode} Special mode, i.e. "[sentinel]" or "[cluster]". +# {port} TCP port listening on, or 0. +# {tls-port} TLS port listening on, or 0. +# {unixsocket} Unix domain socket listening on, or "". +# {config-file} Name of configuration file used. +# +proc-title-template "{title} {listen-addr} {server-mode}" + ################################ SNAPSHOTTING ################################ + +# Save the DB to disk. # -# Save the DB on disk: +# save <seconds> <changes> # -# save <seconds> <changes> +# Redis will save the DB if both the given number of seconds and the given +# number of write operations against the DB occurred. # -# Will save the DB if both the given number of seconds and the given -# number of write operations against the DB occurred. +# Snapshotting can be completely disabled with a single empty string argument +# as in following example: # -# In the example below the behavior will be to save: -# after 900 sec (15 min) if at least 1 key changed -# after 300 sec (5 min) if at least 10 keys changed -# after 60 sec if at least 10000 keys changed +# save "" # -# Note: you can disable saving completely by commenting out all "save" lines. +# Unless specified otherwise, by default Redis will save the DB: +# * After 3600 seconds (an hour) if at least 1 key changed +# * After 300 seconds (5 minutes) if at least 100 keys changed +# * After 60 seconds if at least 10000 keys changed # -# It is also possible to remove all the previously configured save -# points by adding a save directive with a single empty string argument -# like in the following example: +# You can set these explicitly by uncommenting the three following lines. # -# save "" - -save 900 1 -save 300 10 -save 60 10000 +# save 3600 1 +# save 300 100 +# save 60 10000 # By default Redis will stop accepting writes if RDB snapshots are enabled # (at least one save point) and the latest background save failed. @@ -1089,6 +1110,13 @@ replica-lazy-flush no lazyfree-lazy-user-del no +# FLUSHDB, FLUSHALL, and SCRIPT FLUSH support both asynchronous and synchronous +# deletion, which can be controlled by passing the [SYNC|ASYNC] flags into the +# commands. When neither flag is passed, this directive will be used to determine +# if the data should be deleted asynchronously. + +lazyfree-lazy-user-flush no + ################################ THREADED I/O ################################# # Redis is mostly single threaded, however there are certain threaded diff --git a/runtest-moduleapi b/runtest-moduleapi index 9a48867d2..e554226c1 100755 --- a/runtest-moduleapi +++ b/runtest-moduleapi @@ -23,6 +23,7 @@ $TCLSH tests/test_helper.tcl \ --single unit/moduleapi/hooks \ --single unit/moduleapi/misc \ --single unit/moduleapi/blockonkeys \ +--single unit/moduleapi/blockonbackground \ --single unit/moduleapi/scan \ --single unit/moduleapi/datatype \ --single unit/moduleapi/auth \ @@ -31,4 +32,5 @@ $TCLSH tests/test_helper.tcl \ --single unit/moduleapi/getkeys \ --single unit/moduleapi/test_lazyfree \ --single unit/moduleapi/defrag \ +--single unit/moduleapi/stream \ "${@}" diff --git a/sentinel.conf b/sentinel.conf index 39d6929e7..8647379d8 100644 --- a/sentinel.conf +++ b/sentinel.conf @@ -321,3 +321,21 @@ sentinel deny-scripts-reconfig yes # is possible to just rename a command to itself: # # SENTINEL rename-command mymaster CONFIG CONFIG + +# HOSTNAMES SUPPORT +# +# Normally Sentinel uses only IP addresses and requires SENTINEL MONITOR +# to specify an IP address. Also, it requires the Redis replica-announce-ip +# keyword to specify only IP addresses. +# +# You may enable hostnames support by enabling resolve-hostnames. Note +# that you must make sure your DNS is configured properly and that DNS +# resolution does not introduce very long delays. +# +SENTINEL resolve-hostnames no + +# When resolve-hostnames is enabled, Sentinel still uses IP addresses +# when exposing instances to users, configuration files, etc. If you want +# to retain the hostnames when announced, enable announce-hostnames below. +# +SENTINEL announce-hostnames no @@ -1024,8 +1024,8 @@ int ACLSetUser(user *u, const char *op, ssize_t oplen) { /* Return a description of the error that occurred in ACLSetUser() according to * the errno value set by the function on error. */ -char *ACLSetUserStringError(void) { - char *errmsg = "Wrong format"; +const char *ACLSetUserStringError(void) { + const char *errmsg = "Wrong format"; if (errno == ENOENT) errmsg = "Unknown command or category name in ACL"; else if (errno == EINVAL) @@ -1454,7 +1454,7 @@ int ACLLoadConfiguredUsers(void) { /* Load every rule defined for this user. */ for (int j = 1; aclrules[j]; j++) { if (ACLSetUser(u,aclrules[j],sdslen(aclrules[j])) != C_OK) { - char *errmsg = ACLSetUserStringError(); + const char *errmsg = ACLSetUserStringError(); serverLog(LL_WARNING,"Error loading ACL rule '%s' for " "the user named '%s': %s", aclrules[j],aclrules[0],errmsg); @@ -1587,7 +1587,7 @@ sds ACLLoadFromFile(const char *filename) { for (j = 2; j < argc; j++) { argv[j] = sdstrim(argv[j],"\t\r\n"); if (ACLSetUser(fakeuser,argv[j],sdslen(argv[j])) != C_OK) { - char *errmsg = ACLSetUserStringError(); + const char *errmsg = ACLSetUserStringError(); errors = sdscatprintf(errors, "%s:%d: %s. ", server.acl_filename, linenum, errmsg); @@ -1908,7 +1908,7 @@ void aclCommand(client *c) { for (int j = 3; j < c->argc; j++) { if (ACLSetUser(tempu,c->argv[j]->ptr,sdslen(c->argv[j]->ptr)) != C_OK) { - char *errmsg = ACLSetUserStringError(); + const char *errmsg = ACLSetUserStringError(); addReplyErrorFormat(c, "Error in ACL SETUSER modifier '%s': %s", (char*)c->argv[j]->ptr, errmsg); @@ -31,6 +31,7 @@ */ #include "ae.h" +#include "anet.h" #include <stdio.h> #include <sys/time.h> diff --git a/src/ae_epoll.c b/src/ae_epoll.c index fa197297e..07ca8ca41 100644 --- a/src/ae_epoll.c +++ b/src/ae_epoll.c @@ -51,6 +51,7 @@ static int aeApiCreate(aeEventLoop *eventLoop) { zfree(state); return -1; } + anetCloexec(state->epfd); eventLoop->apidata = state; return 0; } diff --git a/src/ae_evport.c b/src/ae_evport.c index 4e254b602..7a0b03aea 100644 --- a/src/ae_evport.c +++ b/src/ae_evport.c @@ -82,6 +82,7 @@ static int aeApiCreate(aeEventLoop *eventLoop) { zfree(state); return -1; } + anetCloexec(state->portfd); state->npending = 0; diff --git a/src/ae_kqueue.c b/src/ae_kqueue.c index 6796f4ceb..b146f2519 100644 --- a/src/ae_kqueue.c +++ b/src/ae_kqueue.c @@ -53,6 +53,7 @@ static int aeApiCreate(aeEventLoop *eventLoop) { zfree(state); return -1; } + anetCloexec(state->kqfd); eventLoop->apidata = state; return 0; } diff --git a/src/anet.c b/src/anet.c index 7a0a1b1ed..0bfa575f5 100644 --- a/src/anet.c +++ b/src/anet.c @@ -69,6 +69,11 @@ int anetSetBlock(char *err, int fd, int non_block) { return ANET_ERR; } + /* Check if this flag has been set or unset, if so, + * then there is no need to call fcntl to set/unset it again. */ + if (!!(flags & O_NONBLOCK) == !!non_block) + return ANET_OK; + if (non_block) flags |= O_NONBLOCK; else @@ -89,6 +94,29 @@ int anetBlock(char *err, int fd) { return anetSetBlock(err,fd,0); } +/* Enable the FD_CLOEXEC on the given fd to avoid fd leaks. + * This function should be invoked for fd's on specific places + * where fork + execve system calls are called. */ +int anetCloexec(int fd) { + int r; + int flags; + + do { + r = fcntl(fd, F_GETFD); + } while (r == -1 && errno == EINTR); + + if (r == -1 || (r & FD_CLOEXEC)) + return r; + + flags = r | FD_CLOEXEC; + + do { + r = fcntl(fd, F_SETFD, flags); + } while (r == -1 && errno == EINTR); + + return r; +} + /* Set TCP keep alive option to detect dead peers. The interval option * is only used for Linux as we are using Linux-specific APIs to set * the probe send time, interval, and count. */ @@ -207,14 +235,13 @@ int anetRecvTimeout(char *err, int fd, long long ms) { return ANET_OK; } -/* anetGenericResolve() is called by anetResolve() and anetResolveIP() to - * do the actual work. It resolves the hostname "host" and set the string - * representation of the IP address into the buffer pointed by "ipbuf". +/* Resolve the hostname "host" and set the string representation of the + * IP address into the buffer pointed by "ipbuf". * * If flags is set to ANET_IP_ONLY the function only resolves hostnames * that are actually already IPv4 or IPv6 addresses. This turns the function * into a validating / normalizing function. */ -int anetGenericResolve(char *err, char *host, char *ipbuf, size_t ipbuf_len, +int anetResolve(char *err, char *host, char *ipbuf, size_t ipbuf_len, int flags) { struct addrinfo hints, *info; @@ -241,14 +268,6 @@ int anetGenericResolve(char *err, char *host, char *ipbuf, size_t ipbuf_len, return ANET_OK; } -int anetResolve(char *err, char *host, char *ipbuf, size_t ipbuf_len) { - return anetGenericResolve(err,host,ipbuf,ipbuf_len,ANET_NONE); -} - -int anetResolveIP(char *err, char *host, char *ipbuf, size_t ipbuf_len) { - return anetGenericResolve(err,host,ipbuf,ipbuf_len,ANET_IP_ONLY); -} - static int anetSetReuseAddr(char *err, int fd) { int yes = 1; /* Make sure connection-intensive things like the redis benchmark diff --git a/src/anet.h b/src/anet.h index fbf41cd17..5da2f3b46 100644 --- a/src/anet.h +++ b/src/anet.h @@ -60,8 +60,7 @@ int anetTcpNonBlockBestEffortBindConnect(char *err, const char *addr, int port, int anetUnixConnect(char *err, const char *path); int anetUnixNonBlockConnect(char *err, const char *path); int anetRead(int fd, char *buf, int count); -int anetResolve(char *err, char *host, char *ipbuf, size_t ipbuf_len); -int anetResolveIP(char *err, char *host, char *ipbuf, size_t ipbuf_len); +int anetResolve(char *err, char *host, char *ipbuf, size_t ipbuf_len, int flags); int anetTcpServer(char *err, int port, char *bindaddr, int backlog); int anetTcp6Server(char *err, int port, char *bindaddr, int backlog); int anetUnixServer(char *err, char *path, mode_t perm, int backlog); @@ -70,6 +69,7 @@ int anetUnixAccept(char *err, int serversock); int anetWrite(int fd, char *buf, int count); int anetNonBlock(char *err, int fd); int anetBlock(char *err, int fd); +int anetCloexec(int fd); int anetEnableTcpNoDelay(char *err, int fd); int anetDisableTcpNoDelay(char *err, int fd); int anetTcpKeepAlive(char *err, int fd); @@ -235,6 +235,8 @@ void stopAppendOnly(void) { serverAssert(server.aof_state != AOF_OFF); flushAppendOnlyFile(1); redis_fsync(server.aof_fd); + server.aof_fsync_offset = server.aof_current_size; + server.aof_last_fsync = server.unixtime; close(server.aof_fd); server.aof_fd = -1; @@ -242,6 +244,8 @@ void stopAppendOnly(void) { server.aof_state = AOF_OFF; server.aof_rewrite_scheduled = 0; killAppendOnlyChild(); + sdsfree(server.aof_buf); + server.aof_buf = sdsempty(); } /* Called when the user switches from "appendonly no" to "appendonly yes" @@ -285,6 +289,12 @@ int startAppendOnly(void) { server.aof_state = AOF_WAIT_REWRITE; server.aof_last_fsync = server.unixtime; server.aof_fd = newfd; + + /* If AOF was in error state, we just ignore it and log the event. */ + if (server.aof_last_write_status == C_ERR) { + serverLog(LL_WARNING,"AOF reopen, just ignore the last error."); + server.aof_last_write_status = C_OK; + } return C_OK; } @@ -451,10 +461,11 @@ void flushAppendOnlyFile(int force) { /* Handle the AOF write error. */ if (server.aof_fsync == AOF_FSYNC_ALWAYS) { - /* We can't recover when the fsync policy is ALWAYS since the - * reply for the client is already in the output buffers, and we - * have the contract with the user that on acknowledged write data - * is synced on disk. */ + /* We can't recover when the fsync policy is ALWAYS since the reply + * for the client is already in the output buffers (both writes and + * reads), and the changes to the db can't be rolled back. Since we + * have a contract with the user that on acknowledged or observed + * writes are is synced on disk, we must exit. */ serverLog(LL_WARNING,"Can't recover from AOF write error when the AOF fsync policy is 'always'. Exiting..."); exit(1); } else { @@ -502,7 +513,14 @@ try_fsync: /* redis_fsync is defined as fdatasync() for Linux in order to avoid * flushing metadata. */ latencyStartMonitor(latency); - redis_fsync(server.aof_fd); /* Let's try to get this data on the disk */ + /* Let's try to get this data on the disk. To guarantee data safe when + * the AOF fsync policy is 'always', we should exit if failed to fsync + * AOF (see comment next to the exit(1) after write error above). */ + if (redis_fsync(server.aof_fd) == -1) { + serverLog(LL_WARNING,"Can't persist AOF for fsync error when the " + "AOF fsync policy is 'always': %s. Exiting...", strerror(errno)); + exit(1); + } latencyEndMonitor(latency); latencyAddSampleIfNeeded("aof-fsync-always",latency); server.aof_fsync_offset = server.aof_current_size; @@ -581,8 +599,6 @@ sds catAppendOnlyExpireAtCommand(sds buf, struct redisCommand *cmd, robj *key, r void feedAppendOnlyFile(struct redisCommand *cmd, int dictid, robj **argv, int argc) { sds buf = sdsempty(); - robj *tmpargv[3]; - /* The DB this command was targeting is not the same as the last command * we appended. To issue a SELECT command is needed. */ if (dictid != server.aof_selected_db) { @@ -598,32 +614,31 @@ void feedAppendOnlyFile(struct redisCommand *cmd, int dictid, robj **argv, int a cmd->proc == expireatCommand) { /* Translate EXPIRE/PEXPIRE/EXPIREAT into PEXPIREAT */ buf = catAppendOnlyExpireAtCommand(buf,cmd,argv[1],argv[2]); - } else if (cmd->proc == setexCommand || cmd->proc == psetexCommand) { - /* Translate SETEX/PSETEX to SET and PEXPIREAT */ - tmpargv[0] = createStringObject("SET",3); - tmpargv[1] = argv[1]; - tmpargv[2] = argv[3]; - buf = catAppendOnlyGenericCommand(buf,3,tmpargv); - decrRefCount(tmpargv[0]); - buf = catAppendOnlyExpireAtCommand(buf,cmd,argv[1],argv[2]); } else if (cmd->proc == setCommand && argc > 3) { - int i; - robj *exarg = NULL, *pxarg = NULL; - for (i = 3; i < argc; i ++) { - if (!strcasecmp(argv[i]->ptr, "ex")) exarg = argv[i+1]; - if (!strcasecmp(argv[i]->ptr, "px")) pxarg = argv[i+1]; + robj *pxarg = NULL; + /* When SET is used with EX/PX argument setGenericCommand propagates them with PX millisecond argument. + * So since the command arguments are re-written there, we can rely here on the index of PX being 3. */ + if (!strcasecmp(argv[3]->ptr, "px")) { + pxarg = argv[4]; } - serverAssert(!(exarg && pxarg)); - - if (exarg || pxarg) { - /* Translate SET [EX seconds][PX milliseconds] to SET and PEXPIREAT */ - buf = catAppendOnlyGenericCommand(buf,3,argv); - if (exarg) - buf = catAppendOnlyExpireAtCommand(buf,server.expireCommand,argv[1], - exarg); - if (pxarg) - buf = catAppendOnlyExpireAtCommand(buf,server.pexpireCommand,argv[1], - pxarg); + /* For AOF we convert SET key value relative time in milliseconds to SET key value absolute time in + * millisecond. Whenever the condition is true it implies that original SET has been transformed + * to SET PX with millisecond time argument so we do not need to worry about unit here.*/ + if (pxarg) { + robj *millisecond = getDecodedObject(pxarg); + long long when = strtoll(millisecond->ptr,NULL,10); + when += mstime(); + + decrRefCount(millisecond); + + robj *newargs[5]; + newargs[0] = argv[0]; + newargs[1] = argv[1]; + newargs[2] = argv[2]; + newargs[3] = shared.pxat; + newargs[4] = createStringObjectFromLongLong(when); + buf = catAppendOnlyGenericCommand(buf,5,newargs); + decrRefCount(newargs[4]); } else { buf = catAppendOnlyGenericCommand(buf,argc,argv); } @@ -1852,6 +1867,20 @@ void backgroundRewriteDoneHandler(int exitcode, int bysignal) { } latencyEndMonitor(latency); latencyAddSampleIfNeeded("aof-rewrite-diff-write",latency); + + if (server.aof_fsync == AOF_FSYNC_EVERYSEC) { + aof_background_fsync(newfd); + } else if (server.aof_fsync == AOF_FSYNC_ALWAYS) { + latencyStartMonitor(latency); + if (redis_fsync(newfd) == -1) { + serverLog(LL_WARNING, + "Error trying to fsync the parent diff to the rewritten AOF: %s", strerror(errno)); + close(newfd); + goto cleanup; + } + latencyEndMonitor(latency); + latencyAddSampleIfNeeded("aof-rewrite-done-fsync",latency); + } serverLog(LL_NOTICE, "Residual parent diff successfully flushed to the rewritten AOF (%.2f MB)", (double) aofRewriteBufferSize() / (1024*1024)); @@ -1919,14 +1948,11 @@ void backgroundRewriteDoneHandler(int exitcode, int bysignal) { /* AOF enabled, replace the old fd with the new one. */ oldfd = server.aof_fd; server.aof_fd = newfd; - if (server.aof_fsync == AOF_FSYNC_ALWAYS) - redis_fsync(newfd); - else if (server.aof_fsync == AOF_FSYNC_EVERYSEC) - aof_background_fsync(newfd); server.aof_selected_db = -1; /* Make sure SELECT is re-issued */ aofUpdateCurrentSize(); server.aof_rewrite_base_size = server.aof_current_size; server.aof_fsync_offset = server.aof_current_size; + server.aof_last_fsync = server.unixtime; /* Clear regular AOF buffer since its contents was just written to * the new AOF from the background rewrite buffer. */ diff --git a/src/blocked.c b/src/blocked.c index 46935c79f..09e17213c 100644 --- a/src/blocked.c +++ b/src/blocked.c @@ -61,6 +61,9 @@ */ #include "server.h" +#include "slowlog.h" +#include "latency.h" +#include "monotonic.h" int serveClientBlockedOnList(client *receiver, robj *key, robj *dstkey, redisDb *db, robj *value, int wherefrom, int whereto); int getListPositionFromObjectOrReply(client *c, robj *arg, int *position); @@ -97,6 +100,20 @@ void blockClient(client *c, int btype) { } } +/* This function is called after a client has finished a blocking operation + * in order to update the total command duration, log the command into + * the Slow log if needed, and log the reply duration event if needed. */ +void updateStatsOnUnblock(client *c, long blocked_us, long reply_us){ + const ustime_t total_cmd_duration = c->duration + blocked_us + reply_us; + c->lastcmd->microseconds += total_cmd_duration; + /* Log the command into the Slow log if needed. */ + if (!(c->lastcmd->flags & CMD_SKIP_SLOWLOG)) { + slowlogPushEntryIfNeeded(c,c->argv,c->argc,total_cmd_duration); + /* Log the reply duration event. */ + latencyAddSampleIfNeeded("command-unblocking",reply_us/1000); + } +} + /* This function is called in the beforeSleep() function of the event loop * in order to process the pending input buffer of clients that were * unblocked after a blocking operation. */ @@ -264,6 +281,8 @@ void serveClientsBlockedOnListKey(robj *o, readyList *rl) { if (dstkey) incrRefCount(dstkey); unblockClient(receiver); + monotime replyTimer; + elapsedStart(&replyTimer); if (serveClientBlockedOnList(receiver, rl->key,dstkey,rl->db,value, wherefrom, whereto) == C_ERR) @@ -272,6 +291,7 @@ void serveClientsBlockedOnListKey(robj *o, readyList *rl) { * to also undo the POP operation. */ listTypePush(o,value,wherefrom); } + updateStatsOnUnblock(receiver, 0, elapsedUs(replyTimer)); if (dstkey) decrRefCount(dstkey); decrRefCount(value); @@ -316,7 +336,10 @@ void serveClientsBlockedOnSortedSetKey(robj *o, readyList *rl) { receiver->lastcmd->proc == bzpopminCommand) ? ZSET_MIN : ZSET_MAX; unblockClient(receiver); + monotime replyTimer; + elapsedStart(&replyTimer); genericZpopCommand(receiver,&rl->key,1,where,1,NULL); + updateStatsOnUnblock(receiver, 0, elapsedUs(replyTimer)); zcard--; /* Replicate the command. */ @@ -406,6 +429,8 @@ void serveClientsBlockedOnStreamKey(robj *o, readyList *rl) { } } + monotime replyTimer; + elapsedStart(&replyTimer); /* Emit the two elements sub-array consisting of * the name of the stream and the data we * extracted from it. Wrapped in a single-item @@ -425,6 +450,7 @@ void serveClientsBlockedOnStreamKey(robj *o, readyList *rl) { streamReplyWithRange(receiver,s,&start,NULL, receiver->bpop.xread_count, 0, group, consumer, noack, &pi); + updateStatsOnUnblock(receiver, 0, elapsedUs(replyTimer)); /* Note that after we unblock the client, 'gt' * and other receiver->bpop stuff are no longer @@ -471,7 +497,10 @@ void serveClientsBlockedOnKeyByModule(readyList *rl) { * different modules with different triggers to consider if a key * is ready or not. This means we can't exit the loop but need * to continue after the first failure. */ + monotime replyTimer; + elapsedStart(&replyTimer); if (!moduleTryServeClientBlockedOnKey(receiver, rl->key)) continue; + updateStatsOnUnblock(receiver, 0, elapsedUs(replyTimer)); moduleUnblockClient(receiver); } @@ -684,10 +713,20 @@ static int getBlockedTypeByType(int type) { void signalKeyAsReady(redisDb *db, robj *key, int type) { readyList *rl; - /* If no clients are blocked on this type, just return */ + /* Quick returns. */ int btype = getBlockedTypeByType(type); - if (btype == BLOCKED_NONE || !server.blocked_clients_by_type[btype]) + if (btype == BLOCKED_NONE) { + /* The type can never block. */ return; + } + if (!server.blocked_clients_by_type[btype] && + !server.blocked_clients_by_type[BLOCKED_MODULE]) { + /* No clients block on this type. Note: Blocked modules are represented + * by BLOCKED_MODULE, even if the intention is to wake up by normal + * types (list, zset, stream), so we need to check that there are no + * blocked modules before we do a quick return here. */ + return; + } /* No clients blocking for this key? No need to queue it. */ if (dictFind(db->blocking_keys,key) == NULL) return; diff --git a/src/cluster.c b/src/cluster.c index 78c36e8d1..97a25b0b3 100644 --- a/src/cluster.c +++ b/src/cluster.c @@ -398,7 +398,7 @@ int clusterLockConfig(char *filename) { /* To lock it, we need to open the file in a way it is created if * it does not exist, otherwise there is a race condition with other * processes. */ - int fd = open(filename,O_WRONLY|O_CREAT,0644); + int fd = open(filename,O_WRONLY|O_CREAT|O_CLOEXEC,0644); if (fd == -1) { serverLog(LL_WARNING, "Can't open %s in order to acquire a lock: %s", @@ -509,8 +509,7 @@ void clusterInit(void) { serverLog(LL_WARNING, "Redis port number too high. " "Cluster communication port is 10,000 port " "numbers higher than your Redis port. " - "Your Redis port number must be " - "lower than 55535."); + "Your Redis port number must be 55535 or less."); exit(1); } if (listenToPort(port+CLUSTER_PORT_INCR, @@ -779,6 +778,7 @@ clusterNode *createClusterNode(char *nodename, int flags) { node->configEpoch = 0; node->flags = flags; memset(node->slots,0,sizeof(node->slots)); + node->slots_info = NULL; node->numslots = 0; node->numslaves = 0; node->slaves = NULL; @@ -4144,8 +4144,8 @@ sds clusterGenNodeDescription(clusterNode *node) { sds ci; /* Node coordinates */ - ci = sdscatprintf(sdsempty(),"%.40s %s:%d@%d ", - node->name, + ci = sdscatlen(sdsempty(),node->name,CLUSTER_NAMELEN); + ci = sdscatfmt(ci," %s:%i@%i ", node->ip, node->port, node->cport); @@ -4154,40 +4154,46 @@ sds clusterGenNodeDescription(clusterNode *node) { ci = representClusterNodeFlags(ci, node->flags); /* Slave of... or just "-" */ + ci = sdscatlen(ci," ",1); if (node->slaveof) - ci = sdscatprintf(ci," %.40s ",node->slaveof->name); + ci = sdscatlen(ci,node->slaveof->name,CLUSTER_NAMELEN); else - ci = sdscatlen(ci," - ",3); + ci = sdscatlen(ci,"-",1); unsigned long long nodeEpoch = node->configEpoch; if (nodeIsSlave(node) && node->slaveof) { nodeEpoch = node->slaveof->configEpoch; } /* Latency from the POV of this node, config epoch, link status */ - ci = sdscatprintf(ci,"%lld %lld %llu %s", + ci = sdscatfmt(ci," %I %I %U %s", (long long) node->ping_sent, (long long) node->pong_received, nodeEpoch, (node->link || node->flags & CLUSTER_NODE_MYSELF) ? "connected" : "disconnected"); - /* Slots served by this instance */ - start = -1; - for (j = 0; j < CLUSTER_SLOTS; j++) { - int bit; + /* Slots served by this instance. If we already have slots info, + * append it diretly, otherwise, generate slots only if it has. */ + if (node->slots_info) { + ci = sdscatsds(ci, node->slots_info); + } else if (node->numslots > 0) { + start = -1; + for (j = 0; j < CLUSTER_SLOTS; j++) { + int bit; - if ((bit = clusterNodeGetSlotBit(node,j)) != 0) { - if (start == -1) start = j; - } - if (start != -1 && (!bit || j == CLUSTER_SLOTS-1)) { - if (bit && j == CLUSTER_SLOTS-1) j++; + if ((bit = clusterNodeGetSlotBit(node,j)) != 0) { + if (start == -1) start = j; + } + if (start != -1 && (!bit || j == CLUSTER_SLOTS-1)) { + if (bit && j == CLUSTER_SLOTS-1) j++; - if (start == j-1) { - ci = sdscatprintf(ci," %d",start); - } else { - ci = sdscatprintf(ci," %d-%d",start,j-1); + if (start == j-1) { + ci = sdscatfmt(ci," %i",start); + } else { + ci = sdscatfmt(ci," %i-%i",start,j-1); + } + start = -1; } - start = -1; } } @@ -4208,6 +4214,41 @@ sds clusterGenNodeDescription(clusterNode *node) { return ci; } +/* Generate the slot topology for all nodes and store the string representation + * in the slots_info struct on the node. This is used to improve the efficiency + * of clusterGenNodesDescription() because it removes looping of the slot space + * for generating the slot info for each node individually. */ +void clusterGenNodesSlotsInfo(int filter) { + clusterNode *n = NULL; + int start = -1; + + for (int i = 0; i <= CLUSTER_SLOTS; i++) { + /* Find start node and slot id. */ + if (n == NULL) { + if (i == CLUSTER_SLOTS) break; + n = server.cluster->slots[i]; + start = i; + continue; + } + + /* Generate slots info when occur different node with start + * or end of slot. */ + if (i == CLUSTER_SLOTS || n != server.cluster->slots[i]) { + if (!(n->flags & filter)) { + if (n->slots_info == NULL) n->slots_info = sdsempty(); + if (start == i-1) { + n->slots_info = sdscatfmt(n->slots_info," %i",start); + } else { + n->slots_info = sdscatfmt(n->slots_info," %i-%i",start,i-1); + } + } + if (i == CLUSTER_SLOTS) break; + n = server.cluster->slots[i]; + start = i; + } + } +} + /* Generate a csv-alike representation of the nodes we are aware of, * including the "myself" node, and return an SDS string containing the * representation (it is up to the caller to free it). @@ -4225,6 +4266,9 @@ sds clusterGenNodesDescription(int filter) { dictIterator *di; dictEntry *de; + /* Generate all nodes slots info firstly. */ + clusterGenNodesSlotsInfo(filter); + di = dictGetSafeIterator(server.cluster->nodes); while((de = dictNext(di)) != NULL) { clusterNode *node = dictGetVal(de); @@ -4234,6 +4278,12 @@ sds clusterGenNodesDescription(int filter) { ci = sdscatsds(ci,ni); sdsfree(ni); ci = sdscatlen(ci,"\n",1); + + /* Release slots info. */ + if (node->slots_info) { + sdsfree(node->slots_info); + node->slots_info = NULL; + } } dictReleaseIterator(di); return ci; diff --git a/src/cluster.h b/src/cluster.h index d58f350ce..716c0d49c 100644 --- a/src/cluster.h +++ b/src/cluster.h @@ -118,6 +118,7 @@ typedef struct clusterNode { int flags; /* CLUSTER_NODE_... */ uint64_t configEpoch; /* Last configEpoch observed for this node */ unsigned char slots[CLUSTER_SLOTS/8]; /* slots handled by this node */ + sds slots_info; /* Slots info represented by string. */ int numslots; /* Number of slots handled by this node */ int numslaves; /* Number of slave nodes, if this is a master */ struct clusterNode **slaves; /* pointers to slave nodes */ diff --git a/src/config.c b/src/config.c index 2e109dbae..0bd89c2b9 100644 --- a/src/config.c +++ b/src/config.c @@ -153,15 +153,15 @@ int configOOMScoreAdjValuesDefaults[CONFIG_OOM_COUNT] = { 0, 200, 800 }; typedef struct boolConfigData { int *config; /* The pointer to the server config this value is stored in */ const int default_value; /* The default value of the config on rewrite */ - int (*is_valid_fn)(int val, char **err); /* Optional function to check validity of new value (generic doc above) */ - int (*update_fn)(int val, int prev, char **err); /* Optional function to apply new value at runtime (generic doc above) */ + int (*is_valid_fn)(int val, const char **err); /* Optional function to check validity of new value (generic doc above) */ + int (*update_fn)(int val, int prev, const char **err); /* Optional function to apply new value at runtime (generic doc above) */ } boolConfigData; typedef struct stringConfigData { char **config; /* Pointer to the server config this value is stored in. */ const char *default_value; /* Default value of the config on rewrite. */ - int (*is_valid_fn)(char* val, char **err); /* Optional function to check validity of new value (generic doc above) */ - int (*update_fn)(char* val, char* prev, char **err); /* Optional function to apply new value at runtime (generic doc above) */ + int (*is_valid_fn)(char* val, const char **err); /* Optional function to check validity of new value (generic doc above) */ + int (*update_fn)(char* val, char* prev, const char **err); /* Optional function to apply new value at runtime (generic doc above) */ int convert_empty_to_null; /* Boolean indicating if empty strings should be stored as a NULL value. */ } stringConfigData; @@ -169,8 +169,8 @@ typedef struct stringConfigData { typedef struct sdsConfigData { sds *config; /* Pointer to the server config this value is stored in. */ const char *default_value; /* Default value of the config on rewrite. */ - int (*is_valid_fn)(sds val, char **err); /* Optional function to check validity of new value (generic doc above) */ - int (*update_fn)(sds val, sds prev, char **err); /* Optional function to apply new value at runtime (generic doc above) */ + int (*is_valid_fn)(sds val, const char **err); /* Optional function to check validity of new value (generic doc above) */ + int (*update_fn)(sds val, sds prev, const char **err); /* Optional function to apply new value at runtime (generic doc above) */ int convert_empty_to_null; /* Boolean indicating if empty SDS strings should be stored as a NULL value. */ } sdsConfigData; @@ -179,8 +179,8 @@ typedef struct enumConfigData { int *config; /* The pointer to the server config this value is stored in */ configEnum *enum_value; /* The underlying enum type this data represents */ const int default_value; /* The default value of the config on rewrite */ - int (*is_valid_fn)(int val, char **err); /* Optional function to check validity of new value (generic doc above) */ - int (*update_fn)(int val, int prev, char **err); /* Optional function to apply new value at runtime (generic doc above) */ + int (*is_valid_fn)(int val, const char **err); /* Optional function to check validity of new value (generic doc above) */ + int (*update_fn)(int val, int prev, const char **err); /* Optional function to apply new value at runtime (generic doc above) */ } enumConfigData; typedef enum numericType { @@ -214,8 +214,8 @@ typedef struct numericConfigData { long long lower_bound; /* The lower bound of this numeric value */ long long upper_bound; /* The upper bound of this numeric value */ const long long default_value; /* The default value of the config on rewrite */ - int (*is_valid_fn)(long long val, char **err); /* Optional function to check validity of new value (generic doc above) */ - int (*update_fn)(long long val, long long prev, char **err); /* Optional function to apply new value at runtime (generic doc above) */ + int (*is_valid_fn)(long long val, const char **err); /* Optional function to check validity of new value (generic doc above) */ + int (*update_fn)(long long val, long long prev, const char **err); /* Optional function to apply new value at runtime (generic doc above) */ } numericConfigData; typedef union typeData { @@ -230,10 +230,10 @@ typedef struct typeInterface { /* Called on server start, to init the server with default value */ void (*init)(typeData data); /* Called on server start, should return 1 on success, 0 on error and should set err */ - int (*load)(typeData data, sds *argc, int argv, char **err); + int (*load)(typeData data, sds *argc, int argv, const char **err); /* Called on server startup and CONFIG SET, returns 1 on success, 0 on error * and can set a verbose err string, update is true when called from CONFIG SET */ - int (*set)(typeData data, sds value, int update, char **err); + int (*set)(typeData data, sds value, int update, const char **err); /* Called on CONFIG GET, required to add output to the client */ void (*get)(client *c, typeData data); /* Called on CONFIG REWRITE, required to rewrite the config state */ @@ -325,7 +325,7 @@ void queueLoadModule(sds path, sds *argv, int argc) { * server.oom_score_adj_values if valid. */ -static int updateOOMScoreAdjValues(sds *args, char **err, int apply) { +static int updateOOMScoreAdjValues(sds *args, const char **err, int apply) { int i; int values[CONFIG_OOM_COUNT]; @@ -385,7 +385,7 @@ void initConfigValues() { } void loadServerConfigFromString(char *config) { - char *err = NULL; + const char *err = NULL; int linenum = 0, totlines, i; int slaveof_linenum = 0; sds *lines; @@ -608,7 +608,7 @@ void loadServerConfigFromString(char *config) { int argc_err; if (ACLAppendUserForLoading(argv,argc,&argc_err) == C_ERR) { char buf[1024]; - char *errmsg = ACLSetUserStringError(); + const char *errmsg = ACLSetUserStringError(); snprintf(buf,sizeof(buf),"Error in user declaration '%s': %s", argv[argc_err],errmsg); err = buf; @@ -624,8 +624,7 @@ void loadServerConfigFromString(char *config) { err = "sentinel directive while not in sentinel mode"; goto loaderr; } - err = sentinelHandleConfiguration(argv+1,argc-1); - if (err) goto loaderr; + queueSentinelConfig(argv+1,argc-1,linenum,lines[i]); } } else { err = "Bad directive or wrong number of arguments"; goto loaderr; @@ -730,7 +729,7 @@ void configSetCommand(client *c) { robj *o; long long ll; int err; - char *errstr = NULL; + const char *errstr = NULL; serverAssertWithInfo(c,c->argv[2],sdsEncodedObject(c->argv[2])); serverAssertWithInfo(c,c->argv[3],sdsEncodedObject(c->argv[3])); o = c->argv[3]; @@ -1221,7 +1220,16 @@ struct rewriteConfigState *rewriteConfigReadOldFile(char *path) { sdsfree(argv[0]); argv[0] = alt; } - rewriteConfigAddLineNumberToOption(state,argv[0],linenum); + /* If this is sentinel config, we use sentinel "sentinel <config>" as option + to avoid messing up the sequence. */ + if (server.sentinel_mode && argc > 1 && !strcasecmp(argv[0],"sentinel")) { + sds sentinelOption = sdsempty(); + sentinelOption = sdscatfmt(sentinelOption,"%S %S",argv[0],argv[1]); + rewriteConfigAddLineNumberToOption(state,sentinelOption,linenum); + sdsfree(sentinelOption); + } else { + rewriteConfigAddLineNumberToOption(state,argv[0],linenum); + } sdsfreesplitres(argv,argc); } fclose(fp); @@ -1683,7 +1691,7 @@ int rewriteConfigOverwriteFile(char *configfile, sds content) { if (fsync(fd)) serverLog(LL_WARNING, "Could not sync tmp config file to disk (%s)", strerror(errno)); - else if (fchmod(fd, 0644) == -1) + else if (fchmod(fd, 0644 & ~server.umask) == -1) serverLog(LL_WARNING, "Could not chmod config file (%s)", strerror(errno)); else if (rename(tmp_conffile, configfile) == -1) serverLog(LL_WARNING, "Could not rename tmp config file (%s)", strerror(errno)); @@ -1795,7 +1803,7 @@ static void boolConfigInit(typeData data) { *data.yesno.config = data.yesno.default_value; } -static int boolConfigSet(typeData data, sds value, int update, char **err) { +static int boolConfigSet(typeData data, sds value, int update, const char **err) { int yn = yesnotoi(value); if (yn == -1) { *err = "argument must be 'yes' or 'no'"; @@ -1836,7 +1844,7 @@ static void stringConfigInit(typeData data) { *data.string.config = (data.string.convert_empty_to_null && !data.string.default_value) ? NULL : zstrdup(data.string.default_value); } -static int stringConfigSet(typeData data, sds value, int update, char **err) { +static int stringConfigSet(typeData data, sds value, int update, const char **err) { if (data.string.is_valid_fn && !data.string.is_valid_fn(value, err)) return 0; char *prev = *data.string.config; @@ -1863,7 +1871,7 @@ static void sdsConfigInit(typeData data) { *data.sds.config = (data.sds.convert_empty_to_null && !data.sds.default_value) ? NULL: sdsnew(data.sds.default_value); } -static int sdsConfigSet(typeData data, sds value, int update, char **err) { +static int sdsConfigSet(typeData data, sds value, int update, const char **err) { if (data.sds.is_valid_fn && !data.sds.is_valid_fn(value, err)) return 0; sds prev = *data.sds.config; @@ -1922,7 +1930,7 @@ static void enumConfigInit(typeData data) { *data.enumd.config = data.enumd.default_value; } -static int enumConfigSet(typeData data, sds value, int update, char **err) { +static int enumConfigSet(typeData data, sds value, int update, const char **err) { int enumval = configEnumGetValue(data.enumd.enum_value, value); if (enumval == INT_MIN) { sds enumerr = sdsnew("argument must be one of the following: "); @@ -2028,7 +2036,7 @@ static void numericConfigInit(typeData data) { SET_NUMERIC_TYPE(data.numeric.default_value) } -static int numericBoundaryCheck(typeData data, long long ll, char **err) { +static int numericBoundaryCheck(typeData data, long long ll, const char **err) { if (data.numeric.numeric_type == NUMERIC_TYPE_ULONG_LONG || data.numeric.numeric_type == NUMERIC_TYPE_UINT || data.numeric.numeric_type == NUMERIC_TYPE_SIZE_T) { @@ -2058,7 +2066,7 @@ static int numericBoundaryCheck(typeData data, long long ll, char **err) { return 1; } -static int numericConfigSet(typeData data, sds value, int update, char **err) { +static int numericConfigSet(typeData data, sds value, int update, const char **err) { long long ll, prev = 0; if (data.numeric.is_memory) { int memerr; @@ -2196,7 +2204,7 @@ static void numericConfigRewrite(typeData data, const char *name, struct rewrite } \ } -static int isValidActiveDefrag(int val, char **err) { +static int isValidActiveDefrag(int val, const char **err) { #ifndef HAVE_DEFRAG if (val) { *err = "Active defragmentation cannot be enabled: it " @@ -2212,7 +2220,7 @@ static int isValidActiveDefrag(int val, char **err) { return 1; } -static int isValidDBfilename(char *val, char **err) { +static int isValidDBfilename(char *val, const char **err) { if (!pathIsBaseName(val)) { *err = "dbfilename can't be a path, just a filename"; return 0; @@ -2220,7 +2228,7 @@ static int isValidDBfilename(char *val, char **err) { return 1; } -static int isValidAOFfilename(char *val, char **err) { +static int isValidAOFfilename(char *val, const char **err) { if (!pathIsBaseName(val)) { *err = "appendfilename can't be a path, just a filename"; return 0; @@ -2228,7 +2236,26 @@ static int isValidAOFfilename(char *val, char **err) { return 1; } -static int updateHZ(long long val, long long prev, char **err) { +/* Validate specified string is a valid proc-title-template */ +static int isValidProcTitleTemplate(char *val, const char **err) { + if (!validateProcTitleTemplate(val)) { + *err = "template format is invalid or contains unknown variables"; + return 0; + } + return 1; +} + +static int updateProcTitleTemplate(char *val, char *prev, const char **err) { + UNUSED(val); + UNUSED(prev); + if (redisSetProcTitle(NULL) == C_ERR) { + *err = "failed to set process title"; + return 0; + } + return 1; +} + +static int updateHZ(long long val, long long prev, const char **err) { UNUSED(prev); UNUSED(err); /* Hz is more a hint from the user, so we accept values out of range @@ -2240,14 +2267,14 @@ static int updateHZ(long long val, long long prev, char **err) { return 1; } -static int updateJemallocBgThread(int val, int prev, char **err) { +static int updateJemallocBgThread(int val, int prev, const char **err) { UNUSED(prev); UNUSED(err); set_jemalloc_bg_thread(val); return 1; } -static int updateReplBacklogSize(long long val, long long prev, char **err) { +static int updateReplBacklogSize(long long val, long long prev, const char **err) { /* resizeReplicationBacklog sets server.repl_backlog_size, and relies on * being able to tell when the size changes, so restore prev before calling it. */ UNUSED(err); @@ -2256,7 +2283,7 @@ static int updateReplBacklogSize(long long val, long long prev, char **err) { return 1; } -static int updateMaxmemory(long long val, long long prev, char **err) { +static int updateMaxmemory(long long val, long long prev, const char **err) { UNUSED(prev); UNUSED(err); if (val) { @@ -2269,7 +2296,7 @@ static int updateMaxmemory(long long val, long long prev, char **err) { return 1; } -static int updateGoodSlaves(long long val, long long prev, char **err) { +static int updateGoodSlaves(long long val, long long prev, const char **err) { UNUSED(val); UNUSED(prev); UNUSED(err); @@ -2277,7 +2304,7 @@ static int updateGoodSlaves(long long val, long long prev, char **err) { return 1; } -static int updateAppendonly(int val, int prev, char **err) { +static int updateAppendonly(int val, int prev, const char **err) { UNUSED(prev); if (val == 0 && server.aof_state != AOF_OFF) { stopAppendOnly(); @@ -2290,7 +2317,7 @@ static int updateAppendonly(int val, int prev, char **err) { return 1; } -static int updateSighandlerEnabled(int val, int prev, char **err) { +static int updateSighandlerEnabled(int val, int prev, const char **err) { UNUSED(err); UNUSED(prev); if (val) @@ -2300,7 +2327,7 @@ static int updateSighandlerEnabled(int val, int prev, char **err) { return 1; } -static int updateMaxclients(long long val, long long prev, char **err) { +static int updateMaxclients(long long val, long long prev, const char **err) { /* Try to check if the OS is capable of supporting so many FDs. */ if (val > prev) { adjustOpenFilesLimit(); @@ -2328,7 +2355,7 @@ static int updateMaxclients(long long val, long long prev, char **err) { return 1; } -static int updateOOMScoreAdj(int val, int prev, char **err) { +static int updateOOMScoreAdj(int val, int prev, const char **err) { UNUSED(prev); if (val) { @@ -2342,7 +2369,7 @@ static int updateOOMScoreAdj(int val, int prev, char **err) { } #ifdef USE_OPENSSL -static int updateTlsCfg(char *val, char *prev, char **err) { +static int updateTlsCfg(char *val, char *prev, const char **err) { UNUSED(val); UNUSED(prev); UNUSED(err); @@ -2355,13 +2382,13 @@ static int updateTlsCfg(char *val, char *prev, char **err) { } return 1; } -static int updateTlsCfgBool(int val, int prev, char **err) { +static int updateTlsCfgBool(int val, int prev, const char **err) { UNUSED(val); UNUSED(prev); return updateTlsCfg(NULL, NULL, err); } -static int updateTlsCfgInt(long long val, long long prev, char **err) { +static int updateTlsCfgInt(long long val, long long prev, const char **err) { UNUSED(val); UNUSED(prev); return updateTlsCfg(NULL, NULL, err); @@ -2380,11 +2407,13 @@ standardConfig configs[] = { createBoolConfig("rdb-del-sync-files", NULL, MODIFIABLE_CONFIG, server.rdb_del_sync_files, 0, NULL, NULL), createBoolConfig("activerehashing", NULL, MODIFIABLE_CONFIG, server.activerehashing, 1, NULL, NULL), createBoolConfig("stop-writes-on-bgsave-error", NULL, MODIFIABLE_CONFIG, server.stop_writes_on_bgsave_err, 1, NULL, NULL), + createBoolConfig("set-proc-title", NULL, IMMUTABLE_CONFIG, server.set_proc_title, 1, NULL, NULL), /* Should setproctitle be used? */ createBoolConfig("dynamic-hz", NULL, MODIFIABLE_CONFIG, server.dynamic_hz, 1, NULL, NULL), /* Adapt hz to # of clients.*/ createBoolConfig("lazyfree-lazy-eviction", NULL, MODIFIABLE_CONFIG, server.lazyfree_lazy_eviction, 0, NULL, NULL), createBoolConfig("lazyfree-lazy-expire", NULL, MODIFIABLE_CONFIG, server.lazyfree_lazy_expire, 0, NULL, NULL), createBoolConfig("lazyfree-lazy-server-del", NULL, MODIFIABLE_CONFIG, server.lazyfree_lazy_server_del, 0, NULL, NULL), createBoolConfig("lazyfree-lazy-user-del", NULL, MODIFIABLE_CONFIG, server.lazyfree_lazy_user_del , 0, NULL, NULL), + createBoolConfig("lazyfree-lazy-user-flush", NULL, MODIFIABLE_CONFIG, server.lazyfree_lazy_user_flush , 0, NULL, NULL), createBoolConfig("repl-disable-tcp-nodelay", NULL, MODIFIABLE_CONFIG, server.repl_disable_tcp_nodelay, 0, NULL, NULL), createBoolConfig("repl-diskless-sync", NULL, MODIFIABLE_CONFIG, server.repl_diskless_sync, 0, NULL, NULL), createBoolConfig("gopher-enabled", NULL, MODIFIABLE_CONFIG, server.gopher_enabled, 0, NULL, NULL), @@ -2425,6 +2454,7 @@ standardConfig configs[] = { createStringConfig("aof_rewrite_cpulist", NULL, IMMUTABLE_CONFIG, EMPTY_STRING_IS_NULL, server.aof_rewrite_cpulist, NULL, NULL, NULL), createStringConfig("bgsave_cpulist", NULL, IMMUTABLE_CONFIG, EMPTY_STRING_IS_NULL, server.bgsave_cpulist, NULL, NULL, NULL), createStringConfig("ignore-warnings", NULL, MODIFIABLE_CONFIG, ALLOW_EMPTY_STRING, server.ignore_warnings, "", NULL, NULL), + createStringConfig("proc-title-template", NULL, MODIFIABLE_CONFIG, ALLOW_EMPTY_STRING, server.proc_title_template, CONFIG_DEFAULT_PROC_TITLE_TEMPLATE, isValidProcTitleTemplate, updateProcTitleTemplate), /* SDS Configs */ createSDSConfig("masterauth", NULL, MODIFIABLE_CONFIG, EMPTY_STRING_IS_NULL, server.masterauth, NULL, NULL, NULL), @@ -226,7 +226,7 @@ void dbOverwrite(redisDb *db, robj *key, robj *val) { /* Although the key is not really deleted from the database, we regard overwrite as two steps of unlink+add, so we still need to call the unlink callback of the module. */ - moduleNotifyKeyUnlink(key,val); + moduleNotifyKeyUnlink(key,old); dictSetVal(db->dict, de, val); if (server.lazyfree_lazy_server_del) { @@ -595,21 +595,23 @@ void signalFlushedDb(int dbid, int async) { /* Return the set of flags to use for the emptyDb() call for FLUSHALL * and FLUSHDB commands. * - * Currently the command just attempts to parse the "ASYNC" option. It - * also checks if the command arity is wrong. + * sync: flushes the database in an sync manner. + * async: flushes the database in an async manner. + * no option: determine sync or async according to the value of lazyfree-lazy-user-flush. * * On success C_OK is returned and the flags are stored in *flags, otherwise * C_ERR is returned and the function sends an error to the client. */ int getFlushCommandFlags(client *c, int *flags) { /* Parse the optional ASYNC option. */ - if (c->argc > 1) { - if (c->argc > 2 || strcasecmp(c->argv[1]->ptr,"async")) { - addReplyErrorObject(c,shared.syntaxerr); - return C_ERR; - } + if (c->argc == 2 && !strcasecmp(c->argv[1]->ptr,"sync")) { + *flags = EMPTYDB_NO_FLAGS; + } else if (c->argc == 2 && !strcasecmp(c->argv[1]->ptr,"async")) { *flags = EMPTYDB_ASYNC; + } else if (c->argc == 1) { + *flags = server.lazyfree_lazy_user_flush ? EMPTYDB_ASYNC : EMPTYDB_NO_FLAGS; } else { - *flags = EMPTYDB_NO_FLAGS; + addReplyErrorObject(c,shared.syntaxerr); + return C_ERR; } return C_OK; } @@ -951,7 +953,7 @@ void scanGenericCommand(client *c, robj *o, unsigned long cursor) { int filter = 0; /* Filter element if it does not match the pattern. */ - if (!filter && use_pattern) { + if (use_pattern) { if (sdsEncodedObject(kobj)) { if (!stringmatchlen(pat, patlen, kobj->ptr, sdslen(kobj->ptr), 0)) filter = 1; diff --git a/src/defrag.c b/src/defrag.c index e189deddd..db797711e 100644 --- a/src/defrag.c +++ b/src/defrag.c @@ -367,7 +367,7 @@ long activeDefragSdsListAndDict(list *l, dict *d, int dict_val_type) { } else if (dict_val_type == DEFRAG_SDS_DICT_VAL_VOID_PTR) { void *newptr, *ptr = dictGetVal(de); if ((newptr = activeDefragAlloc(ptr))) - ln->value = newptr, defragged++; + de->v.val = newptr, defragged++; } defragged += dictIterDefragEntry(di); } diff --git a/src/expire.c b/src/expire.c index 275a735a7..f79510817 100644 --- a/src/expire.c +++ b/src/expire.c @@ -53,15 +53,19 @@ * to the function to avoid too many gettimeofday() syscalls. */ int activeExpireCycleTryExpire(redisDb *db, dictEntry *de, long long now) { long long t = dictGetSignedIntegerVal(de); + mstime_t expire_latency; if (now > t) { sds key = dictGetKey(de); robj *keyobj = createStringObject(key,sdslen(key)); propagateExpire(db,keyobj,server.lazyfree_lazy_expire); + latencyStartMonitor(expire_latency); if (server.lazyfree_lazy_expire) dbAsyncDelete(db,keyobj); else dbSyncDelete(db,keyobj); + latencyEndMonitor(expire_latency); + latencyAddSampleIfNeeded("expire-del",expire_latency); notifyKeyspaceEvent(NOTIFY_EXPIRED, "expired",keyobj,db->id); signalModifiedKey(NULL, db, keyobj); @@ -224,7 +228,7 @@ void activeExpireCycle(int type) { /* When there are less than 1% filled slots, sampling the key * space is expensive, so stop here waiting for better times... * The dictionary will be resized asap. */ - if (num && slots > DICT_HT_INITIAL_SIZE && + if (slots > DICT_HT_INITIAL_SIZE && (num*100/slots < 1)) break; /* The main collection cycle. Sample random keys among keys diff --git a/src/help.h b/src/help.h index edd15a3c9..b8b1efb95 100644 --- a/src/help.h +++ b/src/help.h @@ -459,12 +459,12 @@ struct commandHelp { 0, "1.2.0" }, { "FLUSHALL", - "[ASYNC]", + "[ASYNC|SYNC]", "Remove all keys from all databases", 9, "1.0.0" }, { "FLUSHDB", - "[ASYNC]", + "[ASYNC|SYNC]", "Remove all keys from the current database", 9, "1.0.0" }, @@ -518,6 +518,16 @@ struct commandHelp { "Returns the bit value at offset in the string value stored at key", 1, "2.2.0" }, + { "GETDEL", + "key", + "Get the value of a key and delete the key", + 1, + "6.2.0" }, + { "GETEX", + "key [EX seconds|PX milliseconds|EXAT timestamp|PXAT milliseconds-timestamp|PERSIST]", + "Get the value of a key and optionally set its expiration", + 1, + "6.2.0" }, { "GETRANGE", "key start end", "Get a substring of the string stored at a key", @@ -583,6 +593,11 @@ struct commandHelp { "Set multiple hash fields to multiple values", 5, "2.0.0" }, + { "HRANDFIELD", + "key [count [WITHVALUES]]", + "Get one or multiple random fields from a hash", + 5, + "6.2.0" }, { "HSCAN", "key cursor [MATCH pattern] [COUNT count]", "Incrementally iterate hash fields and associated values", @@ -989,7 +1004,7 @@ struct commandHelp { 10, "2.6.0" }, { "SCRIPT FLUSH", - "-", + "[ASYNC|SYNC]", "Remove all the scripts from the script cache.", 10, "2.6.0" }, @@ -1019,7 +1034,7 @@ struct commandHelp { 8, "1.0.0" }, { "SET", - "key value [EX seconds|PX milliseconds|KEEPTTL] [NX|XX] [GET]", + "key value [EX seconds|PX milliseconds|EXAT timestamp|PXAT milliseconds-timestamp|KEEPTTL] [NX|XX] [GET]", "Set the string value of a key", 1, "1.0.0" }, @@ -1323,6 +1338,11 @@ struct commandHelp { "Remove and return members with the lowest scores in a sorted set", 4, "5.0.0" }, + { "ZRANDMEMBER", + "key [count [WITHSCORES]]", + "Get one or multiple random elements from a sorted set", + 4, + "6.2.0" }, { "ZRANGE", "key min max [BYSCORE|BYLEX] [REV] [LIMIT offset count] [WITHSCORES]", "Return a range of members in a sorted set", diff --git a/src/lazyfree.c b/src/lazyfree.c index 8b9f0e2dc..f18b2027f 100644 --- a/src/lazyfree.c +++ b/src/lazyfree.c @@ -49,6 +49,14 @@ void lazyFreeTrackingTable(void *args[]) { atomicIncr(lazyfreed_objects,len); } +void lazyFreeLuaScripts(void *args[]) { + dict *lua_scripts = args[0]; + long long len = dictSize(lua_scripts); + dictRelease(lua_scripts); + atomicDecr(lazyfree_objects,len); + atomicIncr(lazyfreed_objects,len); +} + /* Return the number of currently pending objects to free. */ size_t lazyfreeGetPendingObjectsCount(void) { size_t aux; @@ -212,3 +220,13 @@ void freeTrackingRadixTreeAsync(rax *tracking) { atomicIncr(lazyfree_objects,tracking->numele); bioCreateLazyFreeJob(lazyFreeTrackingTable,1,tracking); } + +/* Free lua_scripts dict, if the dict is huge enough, free it in async way. */ +void freeLuaScriptsAsync(dict *lua_scripts) { + if (dictSize(lua_scripts) > LAZYFREE_THRESHOLD) { + atomicIncr(lazyfree_objects,dictSize(lua_scripts)); + bioCreateLazyFreeJob(lazyFreeLuaScripts,1,lua_scripts); + } else { + dictRelease(lua_scripts); + } +} diff --git a/src/module.c b/src/module.c index bf186f8b7..b04595801 100644 --- a/src/module.c +++ b/src/module.c @@ -29,7 +29,9 @@ #include "server.h" #include "cluster.h" +#include "slowlog.h" #include "rdb.h" +#include "monotonic.h" #include <dlfcn.h> #include <sys/stat.h> #include <sys/wait.h> @@ -177,15 +179,25 @@ struct RedisModuleKey { void *iter; /* Iterator. */ int mode; /* Opening mode. */ - /* Zset iterator. */ - uint32_t ztype; /* REDISMODULE_ZSET_RANGE_* */ - zrangespec zrs; /* Score range. */ - zlexrangespec zlrs; /* Lex range. */ - uint32_t zstart; /* Start pos for positional ranges. */ - uint32_t zend; /* End pos for positional ranges. */ - void *zcurrent; /* Zset iterator current node. */ - int zer; /* Zset iterator end reached flag - (true if end was reached). */ + union { + struct { + /* Zset iterator, use only if value->type == OBJ_ZSET */ + uint32_t type; /* REDISMODULE_ZSET_RANGE_* */ + zrangespec rs; /* Score range. */ + zlexrangespec lrs; /* Lex range. */ + uint32_t start; /* Start pos for positional ranges. */ + uint32_t end; /* End pos for positional ranges. */ + void *current; /* Zset iterator current node. */ + int er; /* Zset iterator end reached flag + (true if end was reached). */ + } zset; + struct { + /* Stream, use only if value->type == OBJ_STREAM */ + streamID currentid; /* Current entry while iterating. */ + int64_t numfieldsleft; /* Fields left to fetch for current entry. */ + int signalready; /* Flag that signalKeyAsReady() is needed. */ + } stream; + } u; }; typedef struct RedisModuleKey RedisModuleKey; @@ -252,6 +264,9 @@ typedef struct RedisModuleBlockedClient { int dbid; /* Database number selected by the original client. */ int blocked_on_keys; /* If blocked via RM_BlockClientOnKeys(). */ int unblocked; /* Already on the moduleUnblocked list. */ + monotime background_timer; /* Timer tracking the start of background work */ + uint64_t background_duration; /* Current command background time duration. + Used for measuring latency of blocking cmds */ } RedisModuleBlockedClient; static pthread_mutex_t moduleUnblockedClientsMutex = PTHREAD_MUTEX_INITIALIZER; @@ -376,6 +391,7 @@ robj **moduleCreateArgvFromUserFormat(const char *cmdname, const char *fmt, int void moduleReplicateMultiIfNeeded(RedisModuleCtx *ctx); void RM_ZsetRangeStop(RedisModuleKey *kp); static void zsetKeyReset(RedisModuleKey *key); +static void moduleInitKeyTypeSpecific(RedisModuleKey *key); void RM_FreeDict(RedisModuleCtx *ctx, RedisModuleDict *d); void RM_FreeServerInfo(RedisModuleCtx *ctx, RedisModuleServerInfoData *data); @@ -478,17 +494,17 @@ void *RM_PoolAlloc(RedisModuleCtx *ctx, size_t bytes) { * Helpers for modules API implementation * -------------------------------------------------------------------------- */ -/* Create an empty key of the specified type. 'kp' must point to a key object - * opened for writing where the .value member is set to NULL because the +/* Create an empty key of the specified type. `key` must point to a key object + * opened for writing where the `.value` member is set to NULL because the * key was found to be non existing. * * On success REDISMODULE_OK is returned and the key is populated with * the value of the specified type. The function fails and returns * REDISMODULE_ERR if: * - * 1) The key is not open for writing. - * 2) The key is not empty. - * 3) The specified type is unknown. + * 1. The key is not open for writing. + * 2. The key is not empty. + * 3. The specified type is unknown. */ int moduleCreateEmptyKey(RedisModuleKey *key, int type) { robj *obj; @@ -509,10 +525,14 @@ int moduleCreateEmptyKey(RedisModuleKey *key, int type) { case REDISMODULE_KEYTYPE_HASH: obj = createHashObject(); break; + case REDISMODULE_KEYTYPE_STREAM: + obj = createStreamObject(); + break; default: return REDISMODULE_ERR; } dbAdd(key->db,key->key,obj); key->value = obj; + moduleInitKeyTypeSpecific(key); return REDISMODULE_OK; } @@ -900,6 +920,30 @@ long long RM_Milliseconds(void) { return mstime(); } +/* Mark a point in time that will be used as the start time to calculate + * the elapsed execution time when RM_BlockedClientMeasureTimeEnd() is called. + * Within the same command, you can call multiple times + * RM_BlockedClientMeasureTimeStart() and RM_BlockedClientMeasureTimeEnd() + * to accummulate indepedent time intervals to the background duration. + * This method always return REDISMODULE_OK. */ +int RM_BlockedClientMeasureTimeStart(RedisModuleBlockedClient *bc) { + elapsedStart(&(bc->background_timer)); + return REDISMODULE_OK; +} + +/* Mark a point in time that will be used as the end time + * to calculate the elapsed execution time. + * On success REDISMODULE_OK is returned. + * This method only returns REDISMODULE_ERR if no start time was + * previously defined ( meaning RM_BlockedClientMeasureTimeStart was not called ). */ +int RM_BlockedClientMeasureTimeEnd(RedisModuleBlockedClient *bc) { + // If the counter is 0 then we haven't called RM_BlockedClientMeasureTimeStart + if (!bc->background_timer) + return REDISMODULE_ERR; + bc->background_duration += elapsedUs(bc->background_timer); + return REDISMODULE_OK; +} + /* Set flags defining capabilities or behavior bit flags. * * REDISMODULE_OPTIONS_HANDLE_IO_ERRORS: @@ -933,9 +977,9 @@ int RM_SignalModifiedKey(RedisModuleCtx *ctx, RedisModuleString *keyname) { * keys, call replies and Redis string objects once the command returns. In most * cases this eliminates the need of calling the following functions: * - * 1) RedisModule_CloseKey() - * 2) RedisModule_FreeCallReply() - * 3) RedisModule_FreeString() + * 1. RedisModule_CloseKey() + * 2. RedisModule_FreeCallReply() + * 3. RedisModule_FreeString() * * These functions can still be used with automatic memory management enabled, * to optimize loops that make numerous allocations for example. */ @@ -1113,6 +1157,18 @@ RedisModuleString *RM_CreateStringFromString(RedisModuleCtx *ctx, const RedisMod return o; } +/* Creates a string from a stream ID. The returned string must be released with + * RedisModule_FreeString(), unless automatic memory is enabled. + * + * The passed context `ctx` may be NULL if necessary. See the + * RedisModule_CreateString() documentation for more info. */ +RedisModuleString *RM_CreateStringFromStreamID(RedisModuleCtx *ctx, const RedisModuleStreamID *id) { + streamID streamid = {id->ms, id->seq}; + RedisModuleString *o = createObjectFromStreamID(&streamid); + if (ctx != NULL) autoMemoryAdd(ctx, REDISMODULE_AM_STRING, o); + return o; +} + /* Free a module string object obtained with one of the Redis modules API calls * that return new string objects. * @@ -1139,9 +1195,9 @@ void RM_FreeString(RedisModuleCtx *ctx, RedisModuleString *str) { * Normally you want to call this function when, at the same time * the following conditions are true: * - * 1) You have automatic memory management enabled. - * 2) You want to create string objects. - * 3) Those string objects you create need to live *after* the callback + * 1. You have automatic memory management enabled. + * 2. You want to create string objects. + * 3. Those string objects you create need to live *after* the callback * function(for example a command implementation) creating them returns. * * Usually you want this in order to store the created string object @@ -1188,7 +1244,7 @@ void RM_RetainString(RedisModuleCtx *ctx, RedisModuleString *str) { * returned RedisModuleString. * * It is possible to call this function with a NULL context. - */ +*/ RedisModuleString* RM_HoldString(RedisModuleCtx *ctx, RedisModuleString *str) { if (str->refcount == OBJ_STATIC_REFCOUNT) { return RM_CreateStringFromString(ctx, str); @@ -1270,6 +1326,30 @@ int RM_StringToLongDouble(const RedisModuleString *str, long double *ld) { return retval ? REDISMODULE_OK : REDISMODULE_ERR; } +/* Convert the string into a stream ID, storing it at `*id`. + * Returns REDISMODULE_OK on success and returns REDISMODULE_ERR if the string + * is not a valid string representation of a stream ID. The special IDs "+" and + * "-" are allowed. + * + * RedisModuleStreamID is a struct with two 64-bit fields, which is used in + * stream functions and defined as + * + * typedef struct RedisModuleStreamID { + * uint64_t ms; + * uint64_t seq; + * } RedisModuleStreamID; + */ +int RM_StringToStreamID(const RedisModuleString *str, RedisModuleStreamID *id) { + streamID streamid; + if (streamParseID(str, &streamid) == C_OK) { + id->ms = streamid.ms; + id->seq = streamid.seq; + return REDISMODULE_OK; + } else { + return REDISMODULE_ERR; + } +} + /* Compare two string objects, returning -1, 0 or 1 respectively if * a < b, a == b, a > b. Strings are compared byte by byte as two * binary blobs without any encoding care / collation attempt. */ @@ -1322,7 +1402,7 @@ int RM_StringAppendBuffer(RedisModuleCtx *ctx, RedisModuleString *str, const cha * -------------------------------------------------------------------------- */ /* Send an error about the number of arguments given to the command, - * citing the command name in the error message. + * citing the command name in the error message. Returns REDISMODULE_OK. * * Example: * @@ -1394,7 +1474,7 @@ int RM_ReplyWithError(RedisModuleCtx *ctx, const char *err) { return REDISMODULE_OK; } -/* Reply with a simple string (+... \r\n in RESP protocol). This replies +/* Reply with a simple string (`+... \r\n` in RESP protocol). This replies * are suitable only when sending a small non-binary string with small * overhead, like "OK" or similar replies. * @@ -1742,7 +1822,7 @@ int RM_ReplicateVerbatim(RedisModuleCtx *ctx) { * 2. The ID increases monotonically. Clients connecting to the server later * are guaranteed to get IDs greater than any past ID previously seen. * - * Valid IDs are from 1 to 2^64-1. If 0 is returned it means there is no way + * Valid IDs are from 1 to 2^64 - 1. If 0 is returned it means there is no way * to fetch the ID in the context the function was currently called. * * After obtaining the ID, it is possible to check if the command execution @@ -2072,7 +2152,15 @@ static void moduleInitKey(RedisModuleKey *kp, RedisModuleCtx *ctx, robj *keyname kp->value = value; kp->iter = NULL; kp->mode = mode; - zsetKeyReset(kp); + if (kp->value) moduleInitKeyTypeSpecific(kp); +} + +/* Initialize the type-specific part of the key. Only when key has a value. */ +static void moduleInitKeyTypeSpecific(RedisModuleKey *key) { + switch (key->value->type) { + case OBJ_ZSET: zsetKeyReset(key); break; + case OBJ_STREAM: key->u.stream.signalready = 0; break; + } } /* Return an handle representing a Redis key, so that it is possible @@ -2115,8 +2203,13 @@ static void moduleCloseKey(RedisModuleKey *key) { int signal = SHOULD_SIGNAL_MODIFIED_KEYS(key->ctx); if ((key->mode & REDISMODULE_WRITE) && signal) signalModifiedKey(key->ctx->client,key->db,key->key); - /* TODO: if (key->iter) RM_KeyIteratorStop(kp); */ + if (key->iter) zfree(key->iter); RM_ZsetRangeStop(key); + if (key && key->value && key->value->type == OBJ_STREAM && + key->u.stream.signalready) { + /* One of more RM_StreamAdd() have been done. */ + signalKeyAsReady(key->db, key->key, OBJ_STREAM); + } decrRefCount(key->key); } @@ -2376,9 +2469,10 @@ int RM_ListPush(RedisModuleKey *key, int where, RedisModuleString *ele) { * that the user should be free with RM_FreeString() or by enabling * automatic memory. 'where' specifies if the element should be popped from * head or tail. The command returns NULL if: - * 1) The list is empty. - * 2) The key was not open for writing. - * 3) The key is not a list. */ + * + * 1. The list is empty. + * 2. The key was not open for writing. + * 3. The key is not a list. */ RedisModuleString *RM_ListPop(RedisModuleKey *key, int where) { if (!(key->mode & REDISMODULE_WRITE) || key->value == NULL || @@ -2398,7 +2492,7 @@ RedisModuleString *RM_ListPop(RedisModuleKey *key, int where) { /* Conversion from/to public flags of the Modules API and our private flags, * so that we have everything decoupled. */ -int RM_ZsetAddFlagsToCoreFlags(int flags) { +int moduleZsetAddFlagsToCoreFlags(int flags) { int retflags = 0; if (flags & REDISMODULE_ZADD_XX) retflags |= ZADD_XX; if (flags & REDISMODULE_ZADD_NX) retflags |= ZADD_NX; @@ -2408,7 +2502,7 @@ int RM_ZsetAddFlagsToCoreFlags(int flags) { } /* See previous function comment. */ -int RM_ZsetAddFlagsFromCoreFlags(int flags) { +int moduleZsetAddFlagsFromCoreFlags(int flags) { int retflags = 0; if (flags & ZADD_ADDED) retflags |= REDISMODULE_ZADD_ADDED; if (flags & ZADD_UPDATED) retflags |= REDISMODULE_ZADD_UPDATED; @@ -2453,12 +2547,12 @@ int RM_ZsetAdd(RedisModuleKey *key, double score, RedisModuleString *ele, int *f if (!(key->mode & REDISMODULE_WRITE)) return REDISMODULE_ERR; if (key->value && key->value->type != OBJ_ZSET) return REDISMODULE_ERR; if (key->value == NULL) moduleCreateEmptyKey(key,REDISMODULE_KEYTYPE_ZSET); - if (flagsptr) flags = RM_ZsetAddFlagsToCoreFlags(*flagsptr); + if (flagsptr) flags = moduleZsetAddFlagsToCoreFlags(*flagsptr); if (zsetAdd(key->value,score,ele->ptr,&flags,NULL) == 0) { if (flagsptr) *flagsptr = 0; return REDISMODULE_ERR; } - if (flagsptr) *flagsptr = RM_ZsetAddFlagsFromCoreFlags(flags); + if (flagsptr) *flagsptr = moduleZsetAddFlagsFromCoreFlags(flags); return REDISMODULE_OK; } @@ -2480,7 +2574,7 @@ int RM_ZsetIncrby(RedisModuleKey *key, double score, RedisModuleString *ele, int if (!(key->mode & REDISMODULE_WRITE)) return REDISMODULE_ERR; if (key->value && key->value->type != OBJ_ZSET) return REDISMODULE_ERR; if (key->value == NULL) moduleCreateEmptyKey(key,REDISMODULE_KEYTYPE_ZSET); - if (flagsptr) flags = RM_ZsetAddFlagsToCoreFlags(*flagsptr); + if (flagsptr) flags = moduleZsetAddFlagsToCoreFlags(*flagsptr); flags |= ZADD_INCR; if (zsetAdd(key->value,score,ele->ptr,&flags,newscore) == 0) { if (flagsptr) *flagsptr = 0; @@ -2491,7 +2585,7 @@ int RM_ZsetIncrby(RedisModuleKey *key, double score, RedisModuleString *ele, int *flagsptr = 0; return REDISMODULE_ERR; } - if (flagsptr) *flagsptr = RM_ZsetAddFlagsFromCoreFlags(flags); + if (flagsptr) *flagsptr = moduleZsetAddFlagsFromCoreFlags(flags); return REDISMODULE_OK; } @@ -2544,16 +2638,17 @@ int RM_ZsetScore(RedisModuleKey *key, RedisModuleString *ele, double *score) { * -------------------------------------------------------------------------- */ void zsetKeyReset(RedisModuleKey *key) { - key->ztype = REDISMODULE_ZSET_RANGE_NONE; - key->zcurrent = NULL; - key->zer = 1; + key->u.zset.type = REDISMODULE_ZSET_RANGE_NONE; + key->u.zset.current = NULL; + key->u.zset.er = 1; } /* Stop a sorted set iteration. */ void RM_ZsetRangeStop(RedisModuleKey *key) { + if (!key->value || key->value->type != OBJ_ZSET) return; /* Free resources if needed. */ - if (key->ztype == REDISMODULE_ZSET_RANGE_LEX) - zslFreeLexRange(&key->zlrs); + if (key->u.zset.type == REDISMODULE_ZSET_RANGE_LEX) + zslFreeLexRange(&key->u.zset.lrs); /* Setup sensible values so that misused iteration API calls when an * iterator is not active will result into something more sensible * than crashing. */ @@ -2562,7 +2657,7 @@ void RM_ZsetRangeStop(RedisModuleKey *key) { /* Return the "End of range" flag value to signal the end of the iteration. */ int RM_ZsetRangeEndReached(RedisModuleKey *key) { - return key->zer; + return key->u.zset.er; } /* Helper function for RM_ZsetFirstInScoreRange() and RM_ZsetLastInScoreRange(). @@ -2575,29 +2670,29 @@ int zsetInitScoreRange(RedisModuleKey *key, double min, double max, int minex, i if (!key->value || key->value->type != OBJ_ZSET) return REDISMODULE_ERR; RM_ZsetRangeStop(key); - key->ztype = REDISMODULE_ZSET_RANGE_SCORE; - key->zer = 0; + key->u.zset.type = REDISMODULE_ZSET_RANGE_SCORE; + key->u.zset.er = 0; /* Setup the range structure used by the sorted set core implementation * in order to seek at the specified element. */ - zrangespec *zrs = &key->zrs; + zrangespec *zrs = &key->u.zset.rs; zrs->min = min; zrs->max = max; zrs->minex = minex; zrs->maxex = maxex; if (key->value->encoding == OBJ_ENCODING_ZIPLIST) { - key->zcurrent = first ? zzlFirstInRange(key->value->ptr,zrs) : - zzlLastInRange(key->value->ptr,zrs); + key->u.zset.current = first ? zzlFirstInRange(key->value->ptr,zrs) : + zzlLastInRange(key->value->ptr,zrs); } else if (key->value->encoding == OBJ_ENCODING_SKIPLIST) { zset *zs = key->value->ptr; zskiplist *zsl = zs->zsl; - key->zcurrent = first ? zslFirstInRange(zsl,zrs) : - zslLastInRange(zsl,zrs); + key->u.zset.current = first ? zslFirstInRange(zsl,zrs) : + zslLastInRange(zsl,zrs); } else { serverPanic("Unsupported zset encoding"); } - if (key->zcurrent == NULL) key->zer = 1; + if (key->u.zset.current == NULL) key->u.zset.er = 1; return REDISMODULE_OK; } @@ -2610,8 +2705,8 @@ int zsetInitScoreRange(RedisModuleKey *key, double min, double max, int minex, i * The range is specified according to the two double values 'min' and 'max'. * Both can be infinite using the following two macros: * - * REDISMODULE_POSITIVE_INFINITE for positive infinite value - * REDISMODULE_NEGATIVE_INFINITE for negative infinite value + * * REDISMODULE_POSITIVE_INFINITE for positive infinite value + * * REDISMODULE_NEGATIVE_INFINITE for negative infinite value * * 'minex' and 'maxex' parameters, if true, respectively setup a range * where the min and max value are exclusive (not included) instead of @@ -2639,29 +2734,29 @@ int zsetInitLexRange(RedisModuleKey *key, RedisModuleString *min, RedisModuleStr if (!key->value || key->value->type != OBJ_ZSET) return REDISMODULE_ERR; RM_ZsetRangeStop(key); - key->zer = 0; + key->u.zset.er = 0; /* Setup the range structure used by the sorted set core implementation * in order to seek at the specified element. */ - zlexrangespec *zlrs = &key->zlrs; + zlexrangespec *zlrs = &key->u.zset.lrs; if (zslParseLexRange(min, max, zlrs) == C_ERR) return REDISMODULE_ERR; /* Set the range type to lex only after successfully parsing the range, * otherwise we don't want the zlexrangespec to be freed. */ - key->ztype = REDISMODULE_ZSET_RANGE_LEX; + key->u.zset.type = REDISMODULE_ZSET_RANGE_LEX; if (key->value->encoding == OBJ_ENCODING_ZIPLIST) { - key->zcurrent = first ? zzlFirstInLexRange(key->value->ptr,zlrs) : - zzlLastInLexRange(key->value->ptr,zlrs); + key->u.zset.current = first ? zzlFirstInLexRange(key->value->ptr,zlrs) : + zzlLastInLexRange(key->value->ptr,zlrs); } else if (key->value->encoding == OBJ_ENCODING_SKIPLIST) { zset *zs = key->value->ptr; zskiplist *zsl = zs->zsl; - key->zcurrent = first ? zslFirstInLexRange(zsl,zlrs) : - zslLastInLexRange(zsl,zlrs); + key->u.zset.current = first ? zslFirstInLexRange(zsl,zlrs) : + zslLastInLexRange(zsl,zlrs); } else { serverPanic("Unsupported zset encoding"); } - if (key->zcurrent == NULL) key->zer = 1; + if (key->u.zset.current == NULL) key->u.zset.er = 1; return REDISMODULE_OK; } @@ -2694,10 +2789,11 @@ int RM_ZsetLastInLexRange(RedisModuleKey *key, RedisModuleString *min, RedisModu RedisModuleString *RM_ZsetRangeCurrentElement(RedisModuleKey *key, double *score) { RedisModuleString *str; - if (key->zcurrent == NULL) return NULL; + if (!key->value || key->value->type != OBJ_ZSET) return NULL; + if (key->u.zset.current == NULL) return NULL; if (key->value->encoding == OBJ_ENCODING_ZIPLIST) { unsigned char *eptr, *sptr; - eptr = key->zcurrent; + eptr = key->u.zset.current; sds ele = ziplistGetObject(eptr); if (score) { sptr = ziplistNext(key->value->ptr,eptr); @@ -2705,7 +2801,7 @@ RedisModuleString *RM_ZsetRangeCurrentElement(RedisModuleKey *key, double *score } str = createObject(OBJ_STRING,ele); } else if (key->value->encoding == OBJ_ENCODING_SKIPLIST) { - zskiplistNode *ln = key->zcurrent; + zskiplistNode *ln = key->u.zset.current; if (score) *score = ln->score; str = createStringObject(ln->ele,sdslen(ln->ele)); } else { @@ -2719,58 +2815,59 @@ RedisModuleString *RM_ZsetRangeCurrentElement(RedisModuleKey *key, double *score * a next element, 0 if we are already at the latest element or the range * does not include any item at all. */ int RM_ZsetRangeNext(RedisModuleKey *key) { - if (!key->ztype || !key->zcurrent) return 0; /* No active iterator. */ + if (!key->value || key->value->type != OBJ_ZSET) return 0; + if (!key->u.zset.type || !key->u.zset.current) return 0; /* No active iterator. */ if (key->value->encoding == OBJ_ENCODING_ZIPLIST) { unsigned char *zl = key->value->ptr; - unsigned char *eptr = key->zcurrent; + unsigned char *eptr = key->u.zset.current; unsigned char *next; next = ziplistNext(zl,eptr); /* Skip element. */ if (next) next = ziplistNext(zl,next); /* Skip score. */ if (next == NULL) { - key->zer = 1; + key->u.zset.er = 1; return 0; } else { /* Are we still within the range? */ - if (key->ztype == REDISMODULE_ZSET_RANGE_SCORE) { + if (key->u.zset.type == REDISMODULE_ZSET_RANGE_SCORE) { /* Fetch the next element score for the * range check. */ unsigned char *saved_next = next; next = ziplistNext(zl,next); /* Skip next element. */ double score = zzlGetScore(next); /* Obtain the next score. */ - if (!zslValueLteMax(score,&key->zrs)) { - key->zer = 1; + if (!zslValueLteMax(score,&key->u.zset.rs)) { + key->u.zset.er = 1; return 0; } next = saved_next; - } else if (key->ztype == REDISMODULE_ZSET_RANGE_LEX) { - if (!zzlLexValueLteMax(next,&key->zlrs)) { - key->zer = 1; + } else if (key->u.zset.type == REDISMODULE_ZSET_RANGE_LEX) { + if (!zzlLexValueLteMax(next,&key->u.zset.lrs)) { + key->u.zset.er = 1; return 0; } } - key->zcurrent = next; + key->u.zset.current = next; return 1; } } else if (key->value->encoding == OBJ_ENCODING_SKIPLIST) { - zskiplistNode *ln = key->zcurrent, *next = ln->level[0].forward; + zskiplistNode *ln = key->u.zset.current, *next = ln->level[0].forward; if (next == NULL) { - key->zer = 1; + key->u.zset.er = 1; return 0; } else { /* Are we still within the range? */ - if (key->ztype == REDISMODULE_ZSET_RANGE_SCORE && - !zslValueLteMax(next->score,&key->zrs)) + if (key->u.zset.type == REDISMODULE_ZSET_RANGE_SCORE && + !zslValueLteMax(next->score,&key->u.zset.rs)) { - key->zer = 1; + key->u.zset.er = 1; return 0; - } else if (key->ztype == REDISMODULE_ZSET_RANGE_LEX) { - if (!zslLexValueLteMax(next->ele,&key->zlrs)) { - key->zer = 1; + } else if (key->u.zset.type == REDISMODULE_ZSET_RANGE_LEX) { + if (!zslLexValueLteMax(next->ele,&key->u.zset.lrs)) { + key->u.zset.er = 1; return 0; } } - key->zcurrent = next; + key->u.zset.current = next; return 1; } } else { @@ -2782,58 +2879,59 @@ int RM_ZsetRangeNext(RedisModuleKey *key) { * a previous element, 0 if we are already at the first element or the range * does not include any item at all. */ int RM_ZsetRangePrev(RedisModuleKey *key) { - if (!key->ztype || !key->zcurrent) return 0; /* No active iterator. */ + if (!key->value || key->value->type != OBJ_ZSET) return 0; + if (!key->u.zset.type || !key->u.zset.current) return 0; /* No active iterator. */ if (key->value->encoding == OBJ_ENCODING_ZIPLIST) { unsigned char *zl = key->value->ptr; - unsigned char *eptr = key->zcurrent; + unsigned char *eptr = key->u.zset.current; unsigned char *prev; prev = ziplistPrev(zl,eptr); /* Go back to previous score. */ if (prev) prev = ziplistPrev(zl,prev); /* Back to previous ele. */ if (prev == NULL) { - key->zer = 1; + key->u.zset.er = 1; return 0; } else { /* Are we still within the range? */ - if (key->ztype == REDISMODULE_ZSET_RANGE_SCORE) { + if (key->u.zset.type == REDISMODULE_ZSET_RANGE_SCORE) { /* Fetch the previous element score for the * range check. */ unsigned char *saved_prev = prev; prev = ziplistNext(zl,prev); /* Skip element to get the score.*/ double score = zzlGetScore(prev); /* Obtain the prev score. */ - if (!zslValueGteMin(score,&key->zrs)) { - key->zer = 1; + if (!zslValueGteMin(score,&key->u.zset.rs)) { + key->u.zset.er = 1; return 0; } prev = saved_prev; - } else if (key->ztype == REDISMODULE_ZSET_RANGE_LEX) { - if (!zzlLexValueGteMin(prev,&key->zlrs)) { - key->zer = 1; + } else if (key->u.zset.type == REDISMODULE_ZSET_RANGE_LEX) { + if (!zzlLexValueGteMin(prev,&key->u.zset.lrs)) { + key->u.zset.er = 1; return 0; } } - key->zcurrent = prev; + key->u.zset.current = prev; return 1; } } else if (key->value->encoding == OBJ_ENCODING_SKIPLIST) { - zskiplistNode *ln = key->zcurrent, *prev = ln->backward; + zskiplistNode *ln = key->u.zset.current, *prev = ln->backward; if (prev == NULL) { - key->zer = 1; + key->u.zset.er = 1; return 0; } else { /* Are we still within the range? */ - if (key->ztype == REDISMODULE_ZSET_RANGE_SCORE && - !zslValueGteMin(prev->score,&key->zrs)) + if (key->u.zset.type == REDISMODULE_ZSET_RANGE_SCORE && + !zslValueGteMin(prev->score,&key->u.zset.rs)) { - key->zer = 1; + key->u.zset.er = 1; return 0; - } else if (key->ztype == REDISMODULE_ZSET_RANGE_LEX) { - if (!zslLexValueGteMin(prev->ele,&key->zlrs)) { - key->zer = 1; + } else if (key->u.zset.type == REDISMODULE_ZSET_RANGE_LEX) { + if (!zslLexValueGteMin(prev->ele,&key->u.zset.lrs)) { + key->u.zset.er = 1; return 0; } } - key->zcurrent = prev; + key->u.zset.current = prev; return 1; } } else { @@ -2970,7 +3068,7 @@ int RM_HashSet(RedisModuleKey *key, int flags, ...) { * * RedisModuleString *first, *second; * RedisModule_HashGet(mykey,REDISMODULE_HASH_NONE,argv[1],&first, - * argv[2],&second,NULL); + * argv[2],&second,NULL); * * As with RedisModule_HashSet() the behavior of the command can be specified * passing flags different than REDISMODULE_HASH_NONE: @@ -3049,6 +3147,455 @@ int RM_HashGet(RedisModuleKey *key, int flags, ...) { } /* -------------------------------------------------------------------------- + * Key API for the stream type. + * -------------------------------------------------------------------------- */ + +/* Adds an entry to a stream. Like XADD without trimming. + * + * - `key`: The key where the stream is (or will be) stored + * - `flags`: A bit field of + * - `REDISMODULE_STREAM_ADD_AUTOID`: Assign a stream ID automatically, like + * `*` in the XADD command. + * - `id`: If the `AUTOID` flag is set, this is where the assigned ID is + * returned. Can be NULL if `AUTOID` is set, if you don't care to receive the + * ID. If `AUTOID` is not set, this is the requested ID. + * - `argv`: A pointer to an array of size `numfields * 2` containing the + * fields and values. + * - `numfields`: The number of field-value pairs in `argv`. + * + * Returns REDISMODULE_OK if an entry has been added. On failure, + * REDISMODULE_ERR is returned and `errno` is set as follows: + * + * - EINVAL if called with invalid arguments + * - ENOTSUP if the key refers to a value of a type other than stream + * - EBADF if the key was not opened for writing + * - EDOM if the given ID was 0-0 or not greater than all other IDs in the + * stream (only if the AUTOID flag is unset) + * - EFBIG if the stream has reached the last possible ID + */ +int RM_StreamAdd(RedisModuleKey *key, int flags, RedisModuleStreamID *id, RedisModuleString **argv, long numfields) { + /* Validate args */ + if (!key || (numfields != 0 && !argv) || /* invalid key or argv */ + (flags & ~(REDISMODULE_STREAM_ADD_AUTOID)) || /* invalid flags */ + (!(flags & REDISMODULE_STREAM_ADD_AUTOID) && !id)) { /* id required */ + errno = EINVAL; + return REDISMODULE_ERR; + } else if (key->value && key->value->type != OBJ_STREAM) { + errno = ENOTSUP; /* wrong type */ + return REDISMODULE_ERR; + } else if (!(key->mode & REDISMODULE_WRITE)) { + errno = EBADF; /* key not open for writing */ + return REDISMODULE_ERR; + } else if (!(flags & REDISMODULE_STREAM_ADD_AUTOID) && + id->ms == 0 && id->seq == 0) { + errno = EDOM; /* ID out of range */ + return REDISMODULE_ERR; + } + + /* Create key if necessery */ + int created = 0; + if (key->value == NULL) { + moduleCreateEmptyKey(key, REDISMODULE_KEYTYPE_STREAM); + created = 1; + } + + stream *s = key->value->ptr; + if (s->last_id.ms == UINT64_MAX && s->last_id.seq == UINT64_MAX) { + /* The stream has reached the last possible ID */ + errno = EFBIG; + return REDISMODULE_ERR; + } + + streamID added_id; + streamID use_id; + streamID *use_id_ptr = NULL; + if (!(flags & REDISMODULE_STREAM_ADD_AUTOID)) { + use_id.ms = id->ms; + use_id.seq = id->seq; + use_id_ptr = &use_id; + } + if (streamAppendItem(s, argv, numfields, &added_id, use_id_ptr) == C_ERR) { + /* ID not greater than all existing IDs in the stream */ + errno = EDOM; + return REDISMODULE_ERR; + } + /* Postponed signalKeyAsReady(). Done implicitly by moduleCreateEmptyKey() + * so not needed if the stream has just been created. */ + if (!created) key->u.stream.signalready = 1; + + if (id != NULL) { + id->ms = added_id.ms; + id->seq = added_id.seq; + } + + return REDISMODULE_OK; +} + +/* Deletes an entry from a stream. + * + * - `key`: A key opened for writing, with no stream iterator started. + * - `id`: The stream ID of the entry to delete. + * + * Returns REDISMODULE_OK on success. On failure, REDISMODULE_ERR is returned + * and `errno` is set as follows: + * + * - EINVAL if called with invalid arguments + * - ENOTSUP if the key refers to a value of a type other than stream or if the + * key is empty + * - EBADF if the key was not opened for writing or if a stream iterator is + * associated with the key + * - ENOENT if no entry with the given stream ID exists + * + * See also RM_StreamIteratorDelete() for deleting the current entry while + * iterating using a stream iterator. + */ +int RM_StreamDelete(RedisModuleKey *key, RedisModuleStreamID *id) { + if (!key || !id) { + errno = EINVAL; + return REDISMODULE_ERR; + } else if (!key->value || key->value->type != OBJ_STREAM) { + errno = ENOTSUP; /* wrong type */ + return REDISMODULE_ERR; + } else if (!(key->mode & REDISMODULE_WRITE) || + key->iter != NULL) { + errno = EBADF; /* key not opened for writing or iterator started */ + return REDISMODULE_ERR; + } + stream *s = key->value->ptr; + streamID streamid = {id->ms, id->seq}; + if (streamDeleteItem(s, &streamid)) { + return REDISMODULE_OK; + } else { + errno = ENOENT; /* no entry with this id */ + return REDISMODULE_ERR; + } +} + +/* Sets up a stream iterator. + * + * - `key`: The stream key opened for reading using RedisModule_OpenKey(). + * - `flags`: + * - `REDISMODULE_STREAM_ITERATOR_EXCLUSIVE`: Don't include `start` and `end` + * in the iterated range. + * - `REDISMODULE_STREAM_ITERATOR_REVERSE`: Iterate in reverse order, starting + * from the `end` of the range. + * - `start`: The lower bound of the range. Use NULL for the beginning of the + * stream. + * - `end`: The upper bound of the range. Use NULL for the end of the stream. + * + * Returns REDISMODULE_OK on success. On failure, REDISMODULE_ERR is returned + * and `errno` is set as follows: + * + * - EINVAL if called with invalid arguments + * - ENOTSUP if the key refers to a value of a type other than stream or if the + * key is empty + * - EBADF if the key was not opened for writing or if a stream iterator is + * already associated with the key + * - EDOM if `start` or `end` is outside the valid range + * + * Returns REDISMODULE_OK on success and REDISMODULE_ERR if the key doesn't + * refer to a stream or if invalid arguments were given. + * + * The stream IDs are retrieved using RedisModule_StreamIteratorNextID() and + * for each stream ID, the fields and values are retrieved using + * RedisModule_StreamIteratorNextField(). The iterator is freed by calling + * RedisModule_StreamIteratorStop(). + * + * Example (error handling omitted): + * + * RedisModule_StreamIteratorStart(key, 0, startid_ptr, endid_ptr); + * RedisModuleStreamID id; + * long numfields; + * while (RedisModule_StreamIteratorNextID(key, &id, &numfields) == + * REDISMODULE_OK) { + * RedisModuleString *field, *value; + * while (RedisModule_StreamIteratorNextField(key, &field, &value) == + * REDISMODULE_OK) { + * // + * // ... Do stuff ... + * // + * RedisModule_Free(field); + * RedisModule_Free(value); + * } + * } + * RedisModule_StreamIteratorStop(key); + */ +int RM_StreamIteratorStart(RedisModuleKey *key, int flags, RedisModuleStreamID *start, RedisModuleStreamID *end) { + /* check args */ + if (!key || + (flags & ~(REDISMODULE_STREAM_ITERATOR_EXCLUSIVE | + REDISMODULE_STREAM_ITERATOR_REVERSE))) { + errno = EINVAL; /* key missing or invalid flags */ + return REDISMODULE_ERR; + } else if (!key->value || key->value->type != OBJ_STREAM) { + errno = ENOTSUP; + return REDISMODULE_ERR; /* not a stream */ + } else if (key->iter) { + errno = EBADF; /* iterator already started */ + return REDISMODULE_ERR; + } + + /* define range for streamIteratorStart() */ + streamID lower, upper; + if (start) lower = (streamID){start->ms, start->seq}; + if (end) upper = (streamID){end->ms, end->seq}; + if (flags & REDISMODULE_STREAM_ITERATOR_EXCLUSIVE) { + if ((start && streamIncrID(&lower) != C_OK) || + (end && streamDecrID(&upper) != C_OK)) { + errno = EDOM; /* end is 0-0 or start is MAX-MAX? */ + return REDISMODULE_ERR; + } + } + + /* create iterator */ + stream *s = key->value->ptr; + int rev = flags & REDISMODULE_STREAM_ITERATOR_REVERSE; + streamIterator *si = zmalloc(sizeof(*si)); + streamIteratorStart(si, s, start ? &lower : NULL, end ? &upper : NULL, rev); + key->iter = si; + key->u.stream.currentid.ms = 0; /* for RM_StreamIteratorDelete() */ + key->u.stream.currentid.seq = 0; + key->u.stream.numfieldsleft = 0; /* for RM_StreamIteratorNextField() */ + return REDISMODULE_OK; +} + +/* Stops a stream iterator created using RedisModule_StreamIteratorStart() and + * reclaims its memory. + * + * Returns REDISMODULE_OK on success. On failure, REDISMODULE_ERR is returned + * and `errno` is set as follows: + * + * - EINVAL if called with a NULL key + * - ENOTSUP if the key refers to a value of a type other than stream or if the + * key is empty + * - EBADF if the key was not opened for writing or if no stream iterator is + * associated with the key + */ +int RM_StreamIteratorStop(RedisModuleKey *key) { + if (!key) { + errno = EINVAL; + return REDISMODULE_ERR; + } else if (!key->value || key->value->type != OBJ_STREAM) { + errno = ENOTSUP; + return REDISMODULE_ERR; + } else if (!key->iter) { + errno = EBADF; + return REDISMODULE_ERR; + } + zfree(key->iter); + key->iter = NULL; + return REDISMODULE_OK; +} + +/* Finds the next stream entry and returns its stream ID and the number of + * fields. + * + * - `key`: Key for which a stream iterator has been started using + * RedisModule_StreamIteratorStart(). + * - `id`: The stream ID returned. NULL if you don't care. + * - `numfields`: The number of fields in the found stream entry. NULL if you + * don't care. + * + * Returns REDISMODULE_OK and sets `*id` and `*numfields` if an entry was found. + * On failure, REDISMODULE_ERR is returned and `errno` is set as follows: + * + * - EINVAL if called with a NULL key + * - ENOTSUP if the key refers to a value of a type other than stream or if the + * key is empty + * - EBADF if no stream iterator is associated with the key + * - ENOENT if there are no more entries in the range of the iterator + * + * In practice, if RM_StreamIteratorNextID() is called after a successful call + * to RM_StreamIteratorStart() and with the same key, it is safe to assume that + * an REDISMODULE_ERR return value means that there are no more entries. + * + * Use RedisModule_StreamIteratorNextField() to retrieve the fields and values. + * See the example at RedisModule_StreamIteratorStart(). + */ +int RM_StreamIteratorNextID(RedisModuleKey *key, RedisModuleStreamID *id, long *numfields) { + if (!key) { + errno = EINVAL; + return REDISMODULE_ERR; + } else if (!key->value || key->value->type != OBJ_STREAM) { + errno = ENOTSUP; + return REDISMODULE_ERR; + } else if (!key->iter) { + errno = EBADF; + return REDISMODULE_ERR; + } + streamIterator *si = key->iter; + int64_t *num_ptr = &key->u.stream.numfieldsleft; + streamID *streamid_ptr = &key->u.stream.currentid; + if (streamIteratorGetID(si, streamid_ptr, num_ptr)) { + if (id) { + id->ms = streamid_ptr->ms; + id->seq = streamid_ptr->seq; + } + if (numfields) *numfields = *num_ptr; + return REDISMODULE_OK; + } else { + /* No entry found. */ + key->u.stream.currentid.ms = 0; /* for RM_StreamIteratorDelete() */ + key->u.stream.currentid.seq = 0; + key->u.stream.numfieldsleft = 0; /* for RM_StreamIteratorNextField() */ + errno = ENOENT; + return REDISMODULE_ERR; + } +} + +/* Retrieves the next field of the current stream ID and its corresponding value + * in a stream iteration. This function should be called repeatedly after calling + * RedisModule_StreamIteratorNextID() to fetch each field-value pair. + * + * - `key`: Key where a stream iterator has been started. + * - `field_ptr`: This is where the field is returned. + * - `value_ptr`: This is where the value is returned. + * + * Returns REDISMODULE_OK and points `*field_ptr` and `*value_ptr` to freshly + * allocated RedisModuleString objects. The string objects are freed + * automatically when the callback finishes if automatic memory is enabled. On + * failure, REDISMODULE_ERR is returned and `errno` is set as follows: + * + * - EINVAL if called with a NULL key + * - ENOTSUP if the key refers to a value of a type other than stream or if the + * key is empty + * - EBADF if no stream iterator is associated with the key + * - ENOENT if there are no more fields in the current stream entry + * + * In practice, if RM_StreamIteratorNextField() is called after a successful + * call to RM_StreamIteratorNextID() and with the same key, it is safe to assume + * that an REDISMODULE_ERR return value means that there are no more fields. + * + * See the example at RedisModule_StreamIteratorStart(). + */ +int RM_StreamIteratorNextField(RedisModuleKey *key, RedisModuleString **field_ptr, RedisModuleString **value_ptr) { + if (!key) { + errno = EINVAL; + return REDISMODULE_ERR; + } else if (!key->value || key->value->type != OBJ_STREAM) { + errno = ENOTSUP; + return REDISMODULE_ERR; + } else if (!key->iter) { + errno = EBADF; + return REDISMODULE_ERR; + } else if (key->u.stream.numfieldsleft <= 0) { + errno = ENOENT; + return REDISMODULE_ERR; + } + streamIterator *si = key->iter; + unsigned char *field, *value; + int64_t field_len, value_len; + streamIteratorGetField(si, &field, &value, &field_len, &value_len); + if (field_ptr) { + *field_ptr = createRawStringObject((char *)field, field_len); + autoMemoryAdd(key->ctx, REDISMODULE_AM_STRING, *field_ptr); + } + if (value_ptr) { + *value_ptr = createRawStringObject((char *)value, value_len); + autoMemoryAdd(key->ctx, REDISMODULE_AM_STRING, *value_ptr); + } + key->u.stream.numfieldsleft--; + return REDISMODULE_OK; +} + +/* Deletes the current stream entry while iterating. + * + * This function can be called after RM_StreamIteratorNextID() or after any + * calls to RM_StreamIteratorNextField(). + * + * Returns REDISMODULE_OK on success. On failure, REDISMODULE_ERR is returned + * and `errno` is set as follows: + * + * - EINVAL if key is NULL + * - ENOTSUP if the key is empty or is of another type than stream + * - EBADF if the key is not opened for writing, if no iterator has been started + * - ENOENT if the iterator has no current stream entry + */ +int RM_StreamIteratorDelete(RedisModuleKey *key) { + if (!key) { + errno = EINVAL; + return REDISMODULE_ERR; + } else if (!key->value || key->value->type != OBJ_STREAM) { + errno = ENOTSUP; + return REDISMODULE_ERR; + } else if (!(key->mode & REDISMODULE_WRITE) || !key->iter) { + errno = EBADF; + return REDISMODULE_ERR; + } else if (key->u.stream.currentid.ms == 0 && + key->u.stream.currentid.seq == 0) { + errno = ENOENT; + return REDISMODULE_ERR; + } + streamIterator *si = key->iter; + streamIteratorRemoveEntry(si, &key->u.stream.currentid); + key->u.stream.currentid.ms = 0; /* Make sure repeated Delete() fails */ + key->u.stream.currentid.seq = 0; + key->u.stream.numfieldsleft = 0; /* Make sure NextField() fails */ + return REDISMODULE_OK; +} + +/* Trim a stream by length, similar to XTRIM with MAXLEN. + * + * - `key`: Key opened for writing. + * - `flags`: A bitfield of + * - `REDISMODULE_STREAM_TRIM_APPROX`: Trim less if it improves performance, + * like XTRIM with `~`. + * - `length`: The number of stream entries to keep after trimming. + * + * Returns the number of entries deleted. On failure, a negative value is + * returned and `errno` is set as follows: + * + * - EINVAL if called with invalid arguments + * - ENOTSUP if the key is empty or of a type other than stream + * - EBADF if the key is not opened for writing + */ +long long RM_StreamTrimByLength(RedisModuleKey *key, int flags, long long length) { + if (!key || (flags & ~(REDISMODULE_STREAM_TRIM_APPROX)) || length < 0) { + errno = EINVAL; + return -1; + } else if (!key->value || key->value->type != OBJ_STREAM) { + errno = ENOTSUP; + return -1; + } else if (!(key->mode & REDISMODULE_WRITE)) { + errno = EBADF; + return -1; + } + int approx = flags & REDISMODULE_STREAM_TRIM_APPROX ? 1 : 0; + return streamTrimByLength((stream *)key->value->ptr, length, approx); +} + +/* Trim a stream by ID, similar to XTRIM with MINID. + * + * - `key`: Key opened for writing. + * - `flags`: A bitfield of + * - `REDISMODULE_STREAM_TRIM_APPROX`: Trim less if it improves performance, + * like XTRIM with `~`. + * - `id`: The smallest stream ID to keep after trimming. + * + * Returns the number of entries deleted. On failure, a negative value is + * returned and `errno` is set as follows: + * + * - EINVAL if called with invalid arguments + * - ENOTSUP if the key is empty or of a type other than stream + * - EBADF if the key is not opened for writing + */ +long long RM_StreamTrimByID(RedisModuleKey *key, int flags, RedisModuleStreamID *id) { + if (!key || (flags & ~(REDISMODULE_STREAM_TRIM_APPROX)) || !id) { + errno = EINVAL; + return -1; + } else if (!key->value || key->value->type != OBJ_STREAM) { + errno = ENOTSUP; + return -1; + } else if (!(key->mode & REDISMODULE_WRITE)) { + errno = EBADF; + return -1; + } + int approx = flags & REDISMODULE_STREAM_TRIM_APPROX ? 1 : 0; + streamID minid = (streamID){id->ms, id->seq}; + return streamTrimByID((stream *)key->value->ptr, minid, approx); +} + +/* -------------------------------------------------------------------------- * Redis <-> Modules generic Call() API * -------------------------------------------------------------------------- */ @@ -3162,9 +3709,8 @@ void moduleParseCallReply_Array(RedisModuleCallReply *reply) { reply->type = REDISMODULE_REPLY_ARRAY; } -/* Free a Call reply and all the nested replies it contains if it's an - * array. */ -void RM_FreeCallReply_Rec(RedisModuleCallReply *reply, int freenested){ +/* Recursive free reply function. */ +void moduleFreeCallReplyRec(RedisModuleCallReply *reply, int freenested){ /* Don't free nested replies by default: the user must always free the * toplevel reply. However be gentle and don't crash if the module * misuses the API. */ @@ -3174,7 +3720,7 @@ void RM_FreeCallReply_Rec(RedisModuleCallReply *reply, int freenested){ if (reply->type == REDISMODULE_REPLY_ARRAY) { size_t j; for (j = 0; j < reply->len; j++) - RM_FreeCallReply_Rec(reply->val.array+j,1); + moduleFreeCallReplyRec(reply->val.array+j,1); zfree(reply->val.array); } } @@ -3189,13 +3735,14 @@ void RM_FreeCallReply_Rec(RedisModuleCallReply *reply, int freenested){ } } -/* Wrapper for the recursive free reply function. This is needed in order - * to have the first level function to return on nested replies, but only - * if called by the module API. */ +/* Free a Call reply and all the nested replies it contains if it's an + * array. */ void RM_FreeCallReply(RedisModuleCallReply *reply) { - + /* This is a wrapper for the recursive free reply function. This is needed + * in order to have the first level function to return on nested replies, + * but only if called by the module API. */ RedisModuleCtx *ctx = reply->ctx; - RM_FreeCallReply_Rec(reply,0); + moduleFreeCallReplyRec(reply,0); autoMemoryFreed(ctx,REDISMODULE_AM_REPLY,reply); } @@ -3347,30 +3894,31 @@ fmterr: * * * **cmdname**: The Redis command to call. * * **fmt**: A format specifier string for the command's arguments. Each - * of the arguments should be specified by a valid type specification: - * b The argument is a buffer and is immediately followed by another - * argument that is the buffer's length. - * c The argument is a pointer to a plain C string (null-terminated). - * l The argument is long long integer. - * s The argument is a RedisModuleString. - * v The argument(s) is a vector of RedisModuleString. - * - * The format specifier can also include modifiers: - * ! Sends the Redis command and its arguments to replicas and AOF. - * A Suppress AOF propagation, send only to replicas (requires `!`). - * R Suppress replicas propagation, send only to AOF (requires `!`). + * of the arguments should be specified by a valid type specification. The + * format specifier can also contain the modifiers `!`, `A` and `R` which + * don't have a corresponding argument. + * + * * `b` -- The argument is a buffer and is immediately followed by another + * argument that is the buffer's length. + * * `c` -- The argument is a pointer to a plain C string (null-terminated). + * * `l` -- The argument is long long integer. + * * `s` -- The argument is a RedisModuleString. + * * `v` -- The argument(s) is a vector of RedisModuleString. + * * `!` -- Sends the Redis command and its arguments to replicas and AOF. + * * `A` -- Suppress AOF propagation, send only to replicas (requires `!`). + * * `R` -- Suppress replicas propagation, send only to AOF (requires `!`). * * **...**: The actual arguments to the Redis command. * * On success a RedisModuleCallReply object is returned, otherwise * NULL is returned and errno is set to the following values: * - * EBADF: wrong format specifier. - * EINVAL: wrong command arity. - * ENOENT: command does not exist. - * EPERM: operation in Cluster instance with key in non local slot. - * EROFS: operation in Cluster instance when a write command is sent - * in a readonly state. - * ENETDOWN: operation in Cluster instance when cluster is down. + * * EBADF: wrong format specifier. + * * EINVAL: wrong command arity. + * * ENOENT: command does not exist. + * * EPERM: operation in Cluster instance with key in non local slot. + * * EROFS: operation in Cluster instance when a write command is sent + * in a readonly state. + * * ENETDOWN: operation in Cluster instance when cluster is down. * * Example code fragment: * @@ -3682,27 +4230,28 @@ robj *moduleTypeDupOrReply(client *c, robj *fromkey, robj *tokey, robj *value) { * still load old data produced by an older version if the rdb_load * callback is able to check the encver value and act accordingly. * The encver must be a positive value between 0 and 1023. + * * * **typemethods_ptr** is a pointer to a RedisModuleTypeMethods structure * that should be populated with the methods callbacks and structure * version, like in the following example: * - * RedisModuleTypeMethods tm = { - * .version = REDISMODULE_TYPE_METHOD_VERSION, - * .rdb_load = myType_RDBLoadCallBack, - * .rdb_save = myType_RDBSaveCallBack, - * .aof_rewrite = myType_AOFRewriteCallBack, - * .free = myType_FreeCallBack, - * - * // Optional fields - * .digest = myType_DigestCallBack, - * .mem_usage = myType_MemUsageCallBack, - * .aux_load = myType_AuxRDBLoadCallBack, - * .aux_save = myType_AuxRDBSaveCallBack, - * .free_effort = myType_FreeEffortCallBack, - * .unlink = myType_UnlinkCallBack, - * .copy = myType_CopyCallback, - * .defrag = myType_DefragCallback - * } + * RedisModuleTypeMethods tm = { + * .version = REDISMODULE_TYPE_METHOD_VERSION, + * .rdb_load = myType_RDBLoadCallBack, + * .rdb_save = myType_RDBSaveCallBack, + * .aof_rewrite = myType_AOFRewriteCallBack, + * .free = myType_FreeCallBack, + * + * // Optional fields + * .digest = myType_DigestCallBack, + * .mem_usage = myType_MemUsageCallBack, + * .aux_load = myType_AuxRDBLoadCallBack, + * .aux_save = myType_AuxRDBSaveCallBack, + * .free_effort = myType_FreeEffortCallBack, + * .unlink = myType_UnlinkCallBack, + * .copy = myType_CopyCallback, + * .defrag = myType_DefragCallback + * } * * * **rdb_load**: A callback function pointer that loads data from RDB files. * * **rdb_save**: A callback function pointer that saves data to RDB files. @@ -3740,7 +4289,7 @@ robj *moduleTypeDupOrReply(client *c, robj *fromkey, robj *tokey, robj *value) { * a time limit and provides cursor support is used only for keys that are determined * to have significant internal complexity. To determine this, the defrag mechanism * uses the free_effort callback and the 'active-defrag-max-scan-fields' config directive. - * NOTE: The value is passed as a void** and the function is expected to update the + * NOTE: The value is passed as a `void**` and the function is expected to update the * pointer if the top-level value pointer is defragmented and consequentially changes. * * Note: the module name "AAAAAAAAA" is reserved and produces an error, it @@ -3900,7 +4449,7 @@ int moduleAllDatatypesHandleErrors() { } /* Returns true if any previous IO API failed. - * for Load* APIs the REDISMODULE_OPTIONS_HANDLE_IO_ERRORS flag must be set with + * for `Load*` APIs the REDISMODULE_OPTIONS_HANDLE_IO_ERRORS flag must be set with * RedisModule_SetModuleOptions first. */ int RM_IsIOError(RedisModuleIO *io) { return io->error; @@ -3926,7 +4475,7 @@ saveerr: } /* Load an unsigned 64 bit value from the RDB file. This function should only - * be called in the context of the rdb_load method of modules implementing + * be called in the context of the `rdb_load` method of modules implementing * new data types. */ uint64_t RM_LoadUnsigned(RedisModuleIO *io) { if (io->error) return 0; @@ -4242,7 +4791,6 @@ void RM_DigestEndSequence(RedisModuleDigest *md) { * If this is NOT done, Redis will handle corrupted (or just truncated) serialized * data by producing an error message and terminating the process. */ - void *RM_LoadDataTypeFromString(const RedisModuleString *str, const moduleType *mt) { rio payload; RedisModuleIO io; @@ -4270,7 +4818,6 @@ void *RM_LoadDataTypeFromString(const RedisModuleString *str, const moduleType * * implement in order to allow a module to arbitrarily serialize/de-serialize * keys, similar to how the Redis 'DUMP' and 'RESTORE' commands are implemented. */ - RedisModuleString *RM_SaveDataTypeToString(RedisModuleCtx *ctx, void *data, const moduleType *mt) { rio payload; RedisModuleIO io; @@ -4368,7 +4915,7 @@ const RedisModuleString *RM_GetKeyNameFromIO(RedisModuleIO *io) { return io->key; } -/* Returns a RedisModuleString with the name of the key from RedisModuleKey */ +/* Returns a RedisModuleString with the name of the key from RedisModuleKey. */ const RedisModuleString *RM_GetKeyNameFromModuleKey(RedisModuleKey *key) { return key ? key->key : NULL; } @@ -4383,7 +4930,7 @@ const RedisModuleString *RM_GetKeyNameFromModuleKey(RedisModuleKey *key) { * RM_LogIOError() * */ -void RM_LogRaw(RedisModule *module, const char *levelstr, const char *fmt, va_list ap) { +void moduleLogRaw(RedisModule *module, const char *levelstr, const char *fmt, va_list ap) { char msg[LOG_MAX_LEN]; size_t name_len; int level; @@ -4422,7 +4969,7 @@ void RM_LogRaw(RedisModule *module, const char *levelstr, const char *fmt, va_li void RM_Log(RedisModuleCtx *ctx, const char *levelstr, const char *fmt, ...) { va_list ap; va_start(ap, fmt); - RM_LogRaw(ctx? ctx->module: NULL,levelstr,fmt,ap); + moduleLogRaw(ctx? ctx->module: NULL,levelstr,fmt,ap); va_end(ap); } @@ -4434,12 +4981,15 @@ void RM_Log(RedisModuleCtx *ctx, const char *levelstr, const char *fmt, ...) { void RM_LogIOError(RedisModuleIO *io, const char *levelstr, const char *fmt, ...) { va_list ap; va_start(ap, fmt); - RM_LogRaw(io->type->module,levelstr,fmt,ap); + moduleLogRaw(io->type->module,levelstr,fmt,ap); va_end(ap); } /* Redis-like assert function. * + * The macro `RedisModule_Assert(expression)` is recommended, rather than + * calling this function directly. + * * A failed assertion will shut down the server and produce logging information * that looks identical to information generated by Redis itself. */ @@ -4570,6 +5120,7 @@ RedisModuleBlockedClient *moduleBlockClient(RedisModuleCtx *ctx, RedisModuleCmdF bc->dbid = c->db->id; bc->blocked_on_keys = keys != NULL; bc->unblocked = 0; + bc->background_duration = 0; c->bpop.timeout = timeout; if (islua || ismulti) { @@ -4643,6 +5194,11 @@ int moduleTryServeClientBlockedOnKey(client *c, robj *key) { * * In these cases, a call to RedisModule_BlockClient() will **not** block the * client, but instead produce a specific error reply. + * + * Measuring background time: By default the time spent in the blocked command + * is not account for the total command duration. To include such time you should + * use RM_BlockedClientMeasureTimeStart() and RM_BlockedClientMeasureTimeEnd() one, + * or multiple times within the blocking command background work. */ RedisModuleBlockedClient *RM_BlockClient(RedisModuleCtx *ctx, RedisModuleCmdFunc reply_callback, RedisModuleCmdFunc timeout_callback, void (*free_privdata)(RedisModuleCtx*,void*), long long timeout_ms) { return moduleBlockClient(ctx,reply_callback,timeout_callback,free_privdata,timeout_ms, NULL,0,NULL); @@ -4673,7 +5229,7 @@ RedisModuleBlockedClient *RM_BlockClient(RedisModuleCtx *ctx, RedisModuleCmdFunc * key, or a client in queue before this one can be served, modifying the key * as well and making it empty again. So when a client is blocked with * RedisModule_BlockClientOnKeys() the reply callback is not called after - * RM_UnblockCLient() is called, but every time a key is signaled as ready: + * RM_UnblockClient() is called, but every time a key is signaled as ready: * if the reply callback can serve the client, it returns REDISMODULE_OK * and the client is unblocked, otherwise it will return REDISMODULE_ERR * and we'll try again later. @@ -4837,6 +5393,7 @@ void moduleHandleBlockedClients(void) { * was blocked on keys (RM_BlockClientOnKeys()), because we already * called such callback in moduleTryServeClientBlockedOnKey() when * the key was signaled as ready. */ + uint64_t reply_us = 0; if (c && !bc->blocked_on_keys && bc->reply_callback) { RedisModuleCtx ctx = REDISMODULE_CTX_INIT; ctx.flags |= REDISMODULE_CTX_BLOCKED_REPLY; @@ -4845,9 +5402,19 @@ void moduleHandleBlockedClients(void) { ctx.module = bc->module; ctx.client = bc->client; ctx.blocked_client = bc; + monotime replyTimer; + elapsedStart(&replyTimer); bc->reply_callback(&ctx,(void**)c->argv,c->argc); + reply_us = elapsedUs(replyTimer); moduleFreeContext(&ctx); } + /* Update stats now that we've finished the blocking operation. + * This needs to be out of the reply callback above given that a + * module might not define any callback and still do blocking ops. + */ + if (c && !bc->blocked_on_keys) { + updateStatsOnUnblock(c, bc->background_duration, reply_us); + } /* Free privdata if any. */ if (bc->privdata && bc->free_privdata) { @@ -4911,6 +5478,9 @@ void moduleBlockedClientTimedOut(client *c) { ctx.blocked_privdata = bc->privdata; bc->timeout_callback(&ctx,(void**)c->argv,c->argc); moduleFreeContext(&ctx); + if (!bc->blocked_on_keys) { + updateStatsOnUnblock(c, bc->background_duration, 0); + } /* For timeout events, we do not want to call the disconnect callback, * because the blocked client will be automatically disconnected in * this case, and the user can still hook using the timeout callback. */ @@ -5103,9 +5673,9 @@ void moduleReleaseGIL(void) { * * The subscriber signature is: * - * int (*RedisModuleNotificationFunc) (RedisModuleCtx *ctx, int type, - * const char *event, - * RedisModuleString *key); + * int (*RedisModuleNotificationFunc) (RedisModuleCtx *ctx, int type, + * const char *event, + * RedisModuleString *key); * * `type` is the event type bit, that must match the mask given at registration * time. The event string is the actual command being executed, and key is the @@ -5369,28 +5939,27 @@ size_t RM_GetClusterSize(void) { return dictSize(server.cluster->nodes); } +clusterNode *clusterLookupNode(const char *name); /* We need access to internals */ + /* Populate the specified info for the node having as ID the specified 'id', * then returns REDISMODULE_OK. Otherwise if the node ID does not exist from * the POV of this local node, REDISMODULE_ERR is returned. * - * The arguments ip, master_id, port and flags can be NULL in case we don't - * need to populate back certain info. If an ip and master_id (only populated + * The arguments `ip`, `master_id`, `port` and `flags` can be NULL in case we don't + * need to populate back certain info. If an `ip` and `master_id` (only populated * if the instance is a slave) are specified, they point to buffers holding - * at least REDISMODULE_NODE_ID_LEN bytes. The strings written back as ip - * and master_id are not null terminated. + * at least REDISMODULE_NODE_ID_LEN bytes. The strings written back as `ip` + * and `master_id` are not null terminated. * * The list of flags reported is the following: * - * * REDISMODULE_NODE_MYSELF This node - * * REDISMODULE_NODE_MASTER The node is a master - * * REDISMODULE_NODE_SLAVE The node is a replica - * * REDISMODULE_NODE_PFAIL We see the node as failing - * * REDISMODULE_NODE_FAIL The cluster agrees the node is failing - * * REDISMODULE_NODE_NOFAILOVER The slave is configured to never failover + * * REDISMODULE_NODE_MYSELF: This node + * * REDISMODULE_NODE_MASTER: The node is a master + * * REDISMODULE_NODE_SLAVE: The node is a replica + * * REDISMODULE_NODE_PFAIL: We see the node as failing + * * REDISMODULE_NODE_FAIL: The cluster agrees the node is failing + * * REDISMODULE_NODE_NOFAILOVER: The slave is configured to never failover */ - -clusterNode *clusterLookupNode(const char *name); /* We need access to internals */ - int RM_GetClusterNodeInfo(RedisModuleCtx *ctx, const char *id, char *ip, char *master_id, int *port, int *flags) { UNUSED(ctx); @@ -5434,18 +6003,18 @@ int RM_GetClusterNodeInfo(RedisModuleCtx *ctx, const char *id, char *ip, char *m * a different distributed system, but still want to use the Redis Cluster * message bus. Flags that can be set: * - * CLUSTER_MODULE_FLAG_NO_FAILOVER - * CLUSTER_MODULE_FLAG_NO_REDIRECTION + * * CLUSTER_MODULE_FLAG_NO_FAILOVER + * * CLUSTER_MODULE_FLAG_NO_REDIRECTION * * With the following effects: * - * NO_FAILOVER: prevent Redis Cluster slaves to failover a failing master. - * Also disables the replica migration feature. + * * NO_FAILOVER: prevent Redis Cluster slaves to failover a failing master. + * Also disables the replica migration feature. * - * NO_REDIRECTION: Every node will accept any key, without trying to perform - * partitioning according to the user Redis Cluster algorithm. - * Slots informations will still be propagated across the - * cluster, but without effects. */ + * * NO_REDIRECTION: Every node will accept any key, without trying to perform + * partitioning according to the user Redis Cluster algorithm. + * Slots informations will still be propagated across the + * cluster, but without effects. */ void RM_SetClusterFlags(RedisModuleCtx *ctx, uint64_t flags) { UNUSED(ctx); if (flags & REDISMODULE_CLUSTER_FLAG_NO_FAILOVER) @@ -5964,15 +6533,15 @@ int RM_DictDel(RedisModuleDict *d, RedisModuleString *key, void *oldval) { * comparison operator to use in order to seek the first element. The * operators available are: * - * "^" -- Seek the first (lexicographically smaller) key. - * "$" -- Seek the last (lexicographically biffer) key. - * ">" -- Seek the first element greater than the specified key. - * ">=" -- Seek the first element greater or equal than the specified key. - * "<" -- Seek the first element smaller than the specified key. - * "<=" -- Seek the first element smaller or equal than the specified key. - * "==" -- Seek the first element matching exactly the specified key. + * * `^` -- Seek the first (lexicographically smaller) key. + * * `$` -- Seek the last (lexicographically biffer) key. + * * `>` -- Seek the first element greater than the specified key. + * * `>=` -- Seek the first element greater or equal than the specified key. + * * `<` -- Seek the first element smaller than the specified key. + * * `<=` -- Seek the first element smaller or equal than the specified key. + * * `==` -- Seek the first element matching exactly the specified key. * - * Note that for "^" and "$" the passed key is not used, and the user may + * Note that for `^` and `$` the passed key is not used, and the user may * just pass NULL with a length of 0. * * If the element to start the iteration cannot be seeked based on the @@ -6017,11 +6586,11 @@ int RM_DictIteratorReseek(RedisModuleDictIter *di, const char *op, RedisModuleSt return RM_DictIteratorReseekC(di,op,key->ptr,sdslen(key->ptr)); } -/* Return the current item of the dictionary iterator 'di' and steps to the +/* Return the current item of the dictionary iterator `di` and steps to the * next element. If the iterator already yield the last element and there * are no other elements to return, NULL is returned, otherwise a pointer - * to a string representing the key is provided, and the '*keylen' length - * is set by reference (if keylen is not NULL). The '*dataptr', if not NULL + * to a string representing the key is provided, and the `*keylen` length + * is set by reference (if keylen is not NULL). The `*dataptr`, if not NULL * is set to the value of the pointer stored at the returned key as auxiliary * data (as set by the RedisModule_DictSet API). * @@ -6035,7 +6604,7 @@ int RM_DictIteratorReseek(RedisModuleDictIter *di, const char *op, RedisModuleSt * } * * The returned pointer is of type void because sometimes it makes sense - * to cast it to a char* sometimes to an unsigned char* depending on the + * to cast it to a `char*` sometimes to an unsigned `char*` depending on the * fact it contains or not binary data, so this API ends being more * comfortable to use. * @@ -6119,8 +6688,8 @@ int RM_DictCompare(RedisModuleDictIter *di, const char *op, RedisModuleString *k int RM_InfoEndDictField(RedisModuleInfoCtx *ctx); /* Used to start a new section, before adding any fields. the section name will - * be prefixed by "<modulename>_" and must only include A-Z,a-z,0-9. - * NULL or empty string indicates the default section (only <modulename>) is used. + * be prefixed by `<modulename>_` and must only include A-Z,a-z,0-9. + * NULL or empty string indicates the default section (only `<modulename>`) is used. * When return value is REDISMODULE_ERR, the section should and will be skipped. */ int RM_InfoAddSection(RedisModuleInfoCtx *ctx, char *name) { sds full_name = sdsdup(ctx->module->name); @@ -6180,8 +6749,8 @@ int RM_InfoEndDictField(RedisModuleInfoCtx *ctx) { } /* Used by RedisModuleInfoFunc to add info fields. - * Each field will be automatically prefixed by "<modulename>_". - * Field names or values must not include \r\n of ":" */ + * Each field will be automatically prefixed by `<modulename>_`. + * Field names or values must not include `\r\n` or `:`. */ int RM_InfoAddFieldString(RedisModuleInfoCtx *ctx, char *field, RedisModuleString *value) { if (!ctx->in_section) return REDISMODULE_ERR; @@ -6200,6 +6769,7 @@ int RM_InfoAddFieldString(RedisModuleInfoCtx *ctx, char *field, RedisModuleStrin return REDISMODULE_OK; } +/* See RedisModule_InfoAddFieldString(). */ int RM_InfoAddFieldCString(RedisModuleInfoCtx *ctx, char *field, char *value) { if (!ctx->in_section) return REDISMODULE_ERR; @@ -6218,6 +6788,7 @@ int RM_InfoAddFieldCString(RedisModuleInfoCtx *ctx, char *field, char *value) { return REDISMODULE_OK; } +/* See RedisModule_InfoAddFieldString(). */ int RM_InfoAddFieldDouble(RedisModuleInfoCtx *ctx, char *field, double value) { if (!ctx->in_section) return REDISMODULE_ERR; @@ -6236,6 +6807,7 @@ int RM_InfoAddFieldDouble(RedisModuleInfoCtx *ctx, char *field, double value) { return REDISMODULE_OK; } +/* See RedisModule_InfoAddFieldString(). */ int RM_InfoAddFieldLongLong(RedisModuleInfoCtx *ctx, char *field, long long value) { if (!ctx->in_section) return REDISMODULE_ERR; @@ -6254,6 +6826,7 @@ int RM_InfoAddFieldLongLong(RedisModuleInfoCtx *ctx, char *field, long long valu return REDISMODULE_OK; } +/* See RedisModule_InfoAddFieldString(). */ int RM_InfoAddFieldULongLong(RedisModuleInfoCtx *ctx, char *field, unsigned long long value) { if (!ctx->in_section) return REDISMODULE_ERR; @@ -6272,6 +6845,8 @@ int RM_InfoAddFieldULongLong(RedisModuleInfoCtx *ctx, char *field, unsigned long return REDISMODULE_OK; } +/* Registers callback for the INFO command. The callback should add INFO fields + * by calling the `RedisModule_InfoAddField*()` functions. */ int RM_RegisterInfoFunc(RedisModuleCtx *ctx, RedisModuleInfoFunc cb) { ctx->module->info_cb = cb; return REDISMODULE_OK; @@ -6711,7 +7286,6 @@ const RedisModuleString *RM_CommandFilterArgGet(RedisModuleCommandFilterCtx *fct * after the filter context is destroyed, so it must not be auto-memory * allocated, freed or used elsewhere. */ - int RM_CommandFilterArgInsert(RedisModuleCommandFilterCtx *fctx, int pos, RedisModuleString *arg) { int i; @@ -6733,7 +7307,6 @@ int RM_CommandFilterArgInsert(RedisModuleCommandFilterCtx *fctx, int pos, RedisM * filter context is destroyed, so it must not be auto-memory allocated, freed * or used elsewhere. */ - int RM_CommandFilterArgReplace(RedisModuleCommandFilterCtx *fctx, int pos, RedisModuleString *arg) { if (pos < 0 || pos >= fctx->argc) return REDISMODULE_ERR; @@ -6774,10 +7347,10 @@ size_t RM_MallocSize(void* ptr){ /* Return the a number between 0 to 1 indicating the amount of memory * currently used, relative to the Redis "maxmemory" configuration. * - * 0 - No memory limit configured. - * Between 0 and 1 - The percentage of the memory used normalized in 0-1 range. - * Exactly 1 - Memory limit reached. - * Greater 1 - More memory used than the configured limit. + * * 0 - No memory limit configured. + * * Between 0 and 1 - The percentage of the memory used normalized in 0-1 range. + * * Exactly 1 - Memory limit reached. + * * Greater 1 - More memory used than the configured limit. */ float RM_GetUsedMemoryRatio(){ float level; @@ -6840,21 +7413,22 @@ void RM_ScanCursorDestroy(RedisModuleScanCursor *cursor) { * the selected db. * * Callback for scan implementation. - * void scan_callback(RedisModuleCtx *ctx, RedisModuleString *keyname, - * RedisModuleKey *key, void *privdata); - * ctx - the redis module context provided to for the scan. - * keyname - owned by the caller and need to be retained if used after this - * function. * - * key - holds info on the key and value, it is provided as best effort, in - * some cases it might be NULL, in which case the user should (can) use - * RedisModule_OpenKey (and CloseKey too). - * when it is provided, it is owned by the caller and will be free when the - * callback returns. + * void scan_callback(RedisModuleCtx *ctx, RedisModuleString *keyname, + * RedisModuleKey *key, void *privdata); * - * privdata - the user data provided to RedisModule_Scan. + * - `ctx`: the redis module context provided to for the scan. + * - `keyname`: owned by the caller and need to be retained if used after this + * function. + * - `key`: holds info on the key and value, it is provided as best effort, in + * some cases it might be NULL, in which case the user should (can) use + * RedisModule_OpenKey() (and CloseKey too). + * when it is provided, it is owned by the caller and will be free when the + * callback returns. + * - `privdata`: the user data provided to RedisModule_Scan(). * * The way it should be used: + * * RedisModuleCursor *c = RedisModule_ScanCursorCreate(); * while(RedisModule_Scan(ctx, c, callback, privateData)); * RedisModule_ScanCursorDestroy(c); @@ -6938,7 +7512,9 @@ static void moduleScanKeyCallback(void *privdata, const dictEntry *de) { /* Scan api that allows a module to scan the elements in a hash, set or sorted set key * * Callback for scan implementation. - * void scan_callback(RedisModuleKey *key, RedisModuleString* field, RedisModuleString* value, void *privdata); + * + * void scan_callback(RedisModuleKey *key, RedisModuleString* field, RedisModuleString* value, void *privdata); + * * - key - the redis key context provided to for the scan. * - field - field name, owned by the caller and need to be retained if used * after this function. @@ -6947,6 +7523,7 @@ static void moduleScanKeyCallback(void *privdata, const dictEntry *de) { * - privdata - the user data provided to RedisModule_ScanKey. * * The way it should be used: + * * RedisModuleCursor *c = RedisModule_ScanCursorCreate(); * RedisModuleKey *key = RedisModule_OpenKey(...) * while(RedisModule_ScanKey(key, c, callback, privateData)); @@ -6955,6 +7532,7 @@ static void moduleScanKeyCallback(void *privdata, const dictEntry *de) { * * It is also possible to use this API from another thread while the lock is acquired during * the actuall call to RM_ScanKey, and re-opening the key each time: + * * RedisModuleCursor *c = RedisModule_ScanCursorCreate(); * RedisModule_ThreadSafeContextLock(ctx); * RedisModuleKey *key = RedisModule_OpenKey(...) @@ -7159,10 +7737,10 @@ void ModuleForkDoneHandler(int exitcode, int bysignal) { * * The callback must be of this type: * - * int (*RedisModuleEventCallback)(RedisModuleCtx *ctx, - * RedisModuleEvent eid, - * uint64_t subevent, - * void *data); + * int (*RedisModuleEventCallback)(RedisModuleCtx *ctx, + * RedisModuleEvent eid, + * uint64_t subevent, + * void *data); * * The 'ctx' is a normal Redis module context that the callback can use in * order to call other modules APIs. The 'eid' is the event itself, this @@ -7176,201 +7754,207 @@ void ModuleForkDoneHandler(int exitcode, int bysignal) { * * Here is a list of events you can use as 'eid' and related sub events: * - * RedisModuleEvent_ReplicationRoleChanged + * * RedisModuleEvent_ReplicationRoleChanged: + * + * This event is called when the instance switches from master + * to replica or the other way around, however the event is + * also called when the replica remains a replica but starts to + * replicate with a different master. * - * This event is called when the instance switches from master - * to replica or the other way around, however the event is - * also called when the replica remains a replica but starts to - * replicate with a different master. + * The following sub events are available: * - * The following sub events are available: + * * `REDISMODULE_SUBEVENT_REPLROLECHANGED_NOW_MASTER` + * * `REDISMODULE_SUBEVENT_REPLROLECHANGED_NOW_REPLICA` * - * REDISMODULE_SUBEVENT_REPLROLECHANGED_NOW_MASTER - * REDISMODULE_SUBEVENT_REPLROLECHANGED_NOW_REPLICA + * The 'data' field can be casted by the callback to a + * `RedisModuleReplicationInfo` structure with the following fields: * - * The 'data' field can be casted by the callback to a - * RedisModuleReplicationInfo structure with the following fields: + * int master; // true if master, false if replica + * char *masterhost; // master instance hostname for NOW_REPLICA + * int masterport; // master instance port for NOW_REPLICA + * char *replid1; // Main replication ID + * char *replid2; // Secondary replication ID + * uint64_t repl1_offset; // Main replication offset + * uint64_t repl2_offset; // Offset of replid2 validity * - * int master; // true if master, false if replica - * char *masterhost; // master instance hostname for NOW_REPLICA - * int masterport; // master instance port for NOW_REPLICA - * char *replid1; // Main replication ID - * char *replid2; // Secondary replication ID - * uint64_t repl1_offset; // Main replication offset - * uint64_t repl2_offset; // Offset of replid2 validity + * * RedisModuleEvent_Persistence * - * RedisModuleEvent_Persistence + * This event is called when RDB saving or AOF rewriting starts + * and ends. The following sub events are available: * - * This event is called when RDB saving or AOF rewriting starts - * and ends. The following sub events are available: + * * `REDISMODULE_SUBEVENT_PERSISTENCE_RDB_START` + * * `REDISMODULE_SUBEVENT_PERSISTENCE_AOF_START` + * * `REDISMODULE_SUBEVENT_PERSISTENCE_SYNC_RDB_START` + * * `REDISMODULE_SUBEVENT_PERSISTENCE_ENDED` + * * `REDISMODULE_SUBEVENT_PERSISTENCE_FAILED` * - * REDISMODULE_SUBEVENT_PERSISTENCE_RDB_START - * REDISMODULE_SUBEVENT_PERSISTENCE_AOF_START - * REDISMODULE_SUBEVENT_PERSISTENCE_SYNC_RDB_START - * REDISMODULE_SUBEVENT_PERSISTENCE_ENDED - * REDISMODULE_SUBEVENT_PERSISTENCE_FAILED + * The above events are triggered not just when the user calls the + * relevant commands like BGSAVE, but also when a saving operation + * or AOF rewriting occurs because of internal server triggers. + * The SYNC_RDB_START sub events are happening in the forground due to + * SAVE command, FLUSHALL, or server shutdown, and the other RDB and + * AOF sub events are executed in a background fork child, so any + * action the module takes can only affect the generated AOF or RDB, + * but will not be reflected in the parent process and affect connected + * clients and commands. Also note that the AOF_START sub event may end + * up saving RDB content in case of an AOF with rdb-preamble. * - * The above events are triggered not just when the user calls the - * relevant commands like BGSAVE, but also when a saving operation - * or AOF rewriting occurs because of internal server triggers. - * The SYNC_RDB_START sub events are happening in the forground due to - * SAVE command, FLUSHALL, or server shutdown, and the other RDB and - * AOF sub events are executed in a background fork child, so any - * action the module takes can only affect the generated AOF or RDB, - * but will not be reflected in the parent process and affect connected - * clients and commands. Also note that the AOF_START sub event may end - * up saving RDB content in case of an AOF with rdb-preamble. + * * RedisModuleEvent_FlushDB * - * RedisModuleEvent_FlushDB + * The FLUSHALL, FLUSHDB or an internal flush (for instance + * because of replication, after the replica synchronization) + * happened. The following sub events are available: * - * The FLUSHALL, FLUSHDB or an internal flush (for instance - * because of replication, after the replica synchronization) - * happened. The following sub events are available: + * * `REDISMODULE_SUBEVENT_FLUSHDB_START` + * * `REDISMODULE_SUBEVENT_FLUSHDB_END` * - * REDISMODULE_SUBEVENT_FLUSHDB_START - * REDISMODULE_SUBEVENT_FLUSHDB_END + * The data pointer can be casted to a RedisModuleFlushInfo + * structure with the following fields: * - * The data pointer can be casted to a RedisModuleFlushInfo - * structure with the following fields: + * int32_t async; // True if the flush is done in a thread. + * // See for instance FLUSHALL ASYNC. + * // In this case the END callback is invoked + * // immediately after the database is put + * // in the free list of the thread. + * int32_t dbnum; // Flushed database number, -1 for all the DBs + * // in the case of the FLUSHALL operation. * - * int32_t async; // True if the flush is done in a thread. - * See for instance FLUSHALL ASYNC. - * In this case the END callback is invoked - * immediately after the database is put - * in the free list of the thread. - * int32_t dbnum; // Flushed database number, -1 for all the DBs - * in the case of the FLUSHALL operation. + * The start event is called *before* the operation is initated, thus + * allowing the callback to call DBSIZE or other operation on the + * yet-to-free keyspace. * - * The start event is called *before* the operation is initated, thus - * allowing the callback to call DBSIZE or other operation on the - * yet-to-free keyspace. + * * RedisModuleEvent_Loading * - * RedisModuleEvent_Loading + * Called on loading operations: at startup when the server is + * started, but also after a first synchronization when the + * replica is loading the RDB file from the master. + * The following sub events are available: * - * Called on loading operations: at startup when the server is - * started, but also after a first synchronization when the - * replica is loading the RDB file from the master. - * The following sub events are available: + * * `REDISMODULE_SUBEVENT_LOADING_RDB_START` + * * `REDISMODULE_SUBEVENT_LOADING_AOF_START` + * * `REDISMODULE_SUBEVENT_LOADING_REPL_START` + * * `REDISMODULE_SUBEVENT_LOADING_ENDED` + * * `REDISMODULE_SUBEVENT_LOADING_FAILED` * - * REDISMODULE_SUBEVENT_LOADING_RDB_START - * REDISMODULE_SUBEVENT_LOADING_AOF_START - * REDISMODULE_SUBEVENT_LOADING_REPL_START - * REDISMODULE_SUBEVENT_LOADING_ENDED - * REDISMODULE_SUBEVENT_LOADING_FAILED + * Note that AOF loading may start with an RDB data in case of + * rdb-preamble, in which case you'll only receive an AOF_START event. * - * Note that AOF loading may start with an RDB data in case of - * rdb-preamble, in which case you'll only receive an AOF_START event. + * * RedisModuleEvent_ClientChange * + * Called when a client connects or disconnects. + * The data pointer can be casted to a RedisModuleClientInfo + * structure, documented in RedisModule_GetClientInfoById(). + * The following sub events are available: * - * RedisModuleEvent_ClientChange + * * `REDISMODULE_SUBEVENT_CLIENT_CHANGE_CONNECTED` + * * `REDISMODULE_SUBEVENT_CLIENT_CHANGE_DISCONNECTED` * - * Called when a client connects or disconnects. - * The data pointer can be casted to a RedisModuleClientInfo - * structure, documented in RedisModule_GetClientInfoById(). - * The following sub events are available: + * * RedisModuleEvent_Shutdown * - * REDISMODULE_SUBEVENT_CLIENT_CHANGE_CONNECTED - * REDISMODULE_SUBEVENT_CLIENT_CHANGE_DISCONNECTED + * The server is shutting down. No subevents are available. * - * RedisModuleEvent_Shutdown + * * RedisModuleEvent_ReplicaChange * - * The server is shutting down. No subevents are available. + * This event is called when the instance (that can be both a + * master or a replica) get a new online replica, or lose a + * replica since it gets disconnected. + * The following sub events are available: * - * RedisModuleEvent_ReplicaChange + * * `REDISMODULE_SUBEVENT_REPLICA_CHANGE_ONLINE` + * * `REDISMODULE_SUBEVENT_REPLICA_CHANGE_OFFLINE` * - * This event is called when the instance (that can be both a - * master or a replica) get a new online replica, or lose a - * replica since it gets disconnected. - * The following sub events are available: + * No additional information is available so far: future versions + * of Redis will have an API in order to enumerate the replicas + * connected and their state. * - * REDISMODULE_SUBEVENT_REPLICA_CHANGE_ONLINE - * REDISMODULE_SUBEVENT_REPLICA_CHANGE_OFFLINE + * * RedisModuleEvent_CronLoop * - * No additional information is available so far: future versions - * of Redis will have an API in order to enumerate the replicas - * connected and their state. + * This event is called every time Redis calls the serverCron() + * function in order to do certain bookkeeping. Modules that are + * required to do operations from time to time may use this callback. + * Normally Redis calls this function 10 times per second, but + * this changes depending on the "hz" configuration. + * No sub events are available. * - * RedisModuleEvent_CronLoop + * The data pointer can be casted to a RedisModuleCronLoop + * structure with the following fields: * - * This event is called every time Redis calls the serverCron() - * function in order to do certain bookkeeping. Modules that are - * required to do operations from time to time may use this callback. - * Normally Redis calls this function 10 times per second, but - * this changes depending on the "hz" configuration. - * No sub events are available. + * int32_t hz; // Approximate number of events per second. * - * The data pointer can be casted to a RedisModuleCronLoop - * structure with the following fields: + * * RedisModuleEvent_MasterLinkChange * - * int32_t hz; // Approximate number of events per second. + * This is called for replicas in order to notify when the + * replication link becomes functional (up) with our master, + * or when it goes down. Note that the link is not considered + * up when we just connected to the master, but only if the + * replication is happening correctly. + * The following sub events are available: * - * RedisModuleEvent_MasterLinkChange + * * `REDISMODULE_SUBEVENT_MASTER_LINK_UP` + * * `REDISMODULE_SUBEVENT_MASTER_LINK_DOWN` * - * This is called for replicas in order to notify when the - * replication link becomes functional (up) with our master, - * or when it goes down. Note that the link is not considered - * up when we just connected to the master, but only if the - * replication is happening correctly. - * The following sub events are available: + * * RedisModuleEvent_ModuleChange * - * REDISMODULE_SUBEVENT_MASTER_LINK_UP - * REDISMODULE_SUBEVENT_MASTER_LINK_DOWN + * This event is called when a new module is loaded or one is unloaded. + * The following sub events are available: * - * RedisModuleEvent_ModuleChange + * * `REDISMODULE_SUBEVENT_MODULE_LOADED` + * * `REDISMODULE_SUBEVENT_MODULE_UNLOADED` * - * This event is called when a new module is loaded or one is unloaded. - * The following sub events are available: + * The data pointer can be casted to a RedisModuleModuleChange + * structure with the following fields: * - * REDISMODULE_SUBEVENT_MODULE_LOADED - * REDISMODULE_SUBEVENT_MODULE_UNLOADED + * const char* module_name; // Name of module loaded or unloaded. + * int32_t module_version; // Module version. * - * The data pointer can be casted to a RedisModuleModuleChange - * structure with the following fields: + * * RedisModuleEvent_LoadingProgress * - * const char* module_name; // Name of module loaded or unloaded. - * int32_t module_version; // Module version. + * This event is called repeatedly called while an RDB or AOF file + * is being loaded. + * The following sub events are availble: * - * RedisModuleEvent_LoadingProgress + * * `REDISMODULE_SUBEVENT_LOADING_PROGRESS_RDB` + * * `REDISMODULE_SUBEVENT_LOADING_PROGRESS_AOF` * - * This event is called repeatedly called while an RDB or AOF file - * is being loaded. - * The following sub events are availble: + * The data pointer can be casted to a RedisModuleLoadingProgress + * structure with the following fields: * - * REDISMODULE_SUBEVENT_LOADING_PROGRESS_RDB - * REDISMODULE_SUBEVENT_LOADING_PROGRESS_AOF + * int32_t hz; // Approximate number of events per second. + * int32_t progress; // Approximate progress between 0 and 1024, + * // or -1 if unknown. * - * The data pointer can be casted to a RedisModuleLoadingProgress - * structure with the following fields: + * * RedisModuleEvent_SwapDB * - * int32_t hz; // Approximate number of events per second. - * int32_t progress; // Approximate progress between 0 and 1024, - * or -1 if unknown. + * This event is called when a SWAPDB command has been successfully + * Executed. + * For this event call currently there is no subevents available. * - * RedisModuleEvent_SwapDB + * The data pointer can be casted to a RedisModuleSwapDbInfo + * structure with the following fields: * - * This event is called when a SWAPDB command has been successfully - * Executed. - * For this event call currently there is no subevents available. + * int32_t dbnum_first; // Swap Db first dbnum + * int32_t dbnum_second; // Swap Db second dbnum * - * The data pointer can be casted to a RedisModuleSwapDbInfo - * structure with the following fields: + * * RedisModuleEvent_ReplBackup * - * int32_t dbnum_first; // Swap Db first dbnum - * int32_t dbnum_second; // Swap Db second dbnum + * Called when diskless-repl-load config is set to swapdb, + * And redis needs to backup the the current database for the + * possibility to be restored later. A module with global data and + * maybe with aux_load and aux_save callbacks may need to use this + * notification to backup / restore / discard its globals. + * The following sub events are available: * - * RedisModuleEvent_ReplBackup + * * `REDISMODULE_SUBEVENT_REPL_BACKUP_CREATE` + * * `REDISMODULE_SUBEVENT_REPL_BACKUP_RESTORE` + * * `REDISMODULE_SUBEVENT_REPL_BACKUP_DISCARD` * - * Called when diskless-repl-load config is set to swapdb, - * And redis needs to backup the the current database for the - * possibility to be restored later. A module with global data and - * maybe with aux_load and aux_save callbacks may need to use this - * notification to backup / restore / discard its globals. - * The following sub events are available: + * * RedisModuleEvent_ForkChild * - * REDISMODULE_SUBEVENT_REPL_BACKUP_CREATE - * REDISMODULE_SUBEVENT_REPL_BACKUP_RESTORE - * REDISMODULE_SUBEVENT_REPL_BACKUP_DISCARD + * Called when a fork child (AOFRW, RDBSAVE, module fork...) is born/dies + * The following sub events are available: * + * * `REDISMODULE_SUBEVENT_FORK_CHILD_BORN` + * * `REDISMODULE_SUBEVENT_FORK_CHILD_DIED` * * The function returns REDISMODULE_OK if the module was successfully subscribed * for the specified event. If the API is called from a wrong context or unsupported event @@ -7444,6 +8028,8 @@ int RM_IsSubEventSupported(RedisModuleEvent event, int64_t subevent) { return subevent < _REDISMODULE_SUBEVENT_SWAPDB_NEXT; case REDISMODULE_EVENT_REPL_BACKUP: return subevent < _REDISMODULE_SUBEVENT_REPL_BACKUP_NEXT; + case REDISMODULE_EVENT_FORK_CHILD: + return subevent < _REDISMODULE_SUBEVENT_FORK_CHILD_NEXT; default: break; } @@ -7659,6 +8245,11 @@ void moduleInitModulesSystem(void) { anetNonBlock(NULL,server.module_blocked_pipe[0]); anetNonBlock(NULL,server.module_blocked_pipe[1]); + /* Enable close-on-exec flag on pipes in case of the fork-exec system calls in + * sentinels or redis servers. */ + anetCloexec(server.module_blocked_pipe[0]); + anetCloexec(server.module_blocked_pipe[1]); + /* Create the timers radix tree. */ Timers = raxNew(); @@ -8064,7 +8655,8 @@ int RM_GetLFU(RedisModuleKey *key, long long *lfu_freq) { * the module can check if a certain set of flags are supported * by the redis server version in use. * Example: - * int supportedFlags = RM_GetContextFlagsAll() + * + * int supportedFlags = RM_GetContextFlagsAll(); * if (supportedFlags & REDISMODULE_CTX_FLAGS_MULTI) { * // REDISMODULE_CTX_FLAGS_MULTI is supported * } else{ @@ -8080,7 +8672,8 @@ int RM_GetContextFlagsAll() { * the module can check if a certain set of flags are supported * by the redis server version in use. * Example: - * int supportedFlags = RM_GetKeyspaceNotificationFlagsAll() + * + * int supportedFlags = RM_GetKeyspaceNotificationFlagsAll(); * if (supportedFlags & REDISMODULE_NOTIFY_LOADED) { * // REDISMODULE_NOTIFY_LOADED is supported * } else{ @@ -8150,8 +8743,8 @@ int RM_ModuleTypeReplaceValue(RedisModuleKey *key, moduleType *mt, void *new_val * an error condition. Error conditions are indicated by setting errno * as folllows: * - * ENOENT: Specified command does not exist. - * EINVAL: Invalid command arity specified. + * * ENOENT: Specified command does not exist. + * * EINVAL: Invalid command arity specified. * * NOTE: The returned array is not a Redis Module object so it does not * get automatically freed even when auto-memory is used. The caller @@ -8247,11 +8840,11 @@ int RM_DefragShouldStop(RedisModuleDefragCtx *ctx) { * data type. * * This behavior is reserved to cases where late defrag is performed. Late - * defrag is selected for keys that implement the free_effort callback and - * return a free_effort value that is larger than the defrag + * defrag is selected for keys that implement the `free_effort` callback and + * return a `free_effort` value that is larger than the defrag * 'active-defrag-max-scan-fields' configuration directive. * - * Smaller keys, keys that do not implement free_effort or the global + * Smaller keys, keys that do not implement `free_effort` or the global * defrag callback are not called in late-defrag mode. In those cases, a * call to this function will return REDISMODULE_ERR. * @@ -8273,7 +8866,7 @@ int RM_DefragCursorSet(RedisModuleDefragCtx *ctx, unsigned long cursor) { /* Fetch a cursor value that has been previously stored using RM_DefragCursorSet(). * * If not called for a late defrag operation, REDISMODULE_ERR will be returned and - * the cursor should be ignored. See DM_DefragCursorSet() for more details on + * the cursor should be ignored. See RM_DefragCursorSet() for more details on * defrag cursors. */ int RM_DefragCursorGet(RedisModuleDefragCtx *ctx, unsigned long *cursor) { @@ -8445,6 +9038,7 @@ void moduleRegisterCoreAPI(void) { REGISTER_API(StringToLongLong); REGISTER_API(StringToDouble); REGISTER_API(StringToLongDouble); + REGISTER_API(StringToStreamID); REGISTER_API(Call); REGISTER_API(CallReplyProto); REGISTER_API(FreeCallReply); @@ -8459,6 +9053,7 @@ void moduleRegisterCoreAPI(void) { REGISTER_API(CreateStringFromDouble); REGISTER_API(CreateStringFromLongDouble); REGISTER_API(CreateStringFromString); + REGISTER_API(CreateStringFromStreamID); REGISTER_API(CreateStringPrintf); REGISTER_API(FreeString); REGISTER_API(StringPtrLen); @@ -8490,6 +9085,15 @@ void moduleRegisterCoreAPI(void) { REGISTER_API(ZsetRangeEndReached); REGISTER_API(HashSet); REGISTER_API(HashGet); + REGISTER_API(StreamAdd); + REGISTER_API(StreamDelete); + REGISTER_API(StreamIteratorStart); + REGISTER_API(StreamIteratorStop); + REGISTER_API(StreamIteratorNextID); + REGISTER_API(StreamIteratorNextField); + REGISTER_API(StreamIteratorDelete); + REGISTER_API(StreamTrimByLength); + REGISTER_API(StreamTrimByID); REGISTER_API(IsKeysPositionRequest); REGISTER_API(KeyAtPos); REGISTER_API(GetClientId); @@ -8539,6 +9143,8 @@ void moduleRegisterCoreAPI(void) { REGISTER_API(GetBlockedClientPrivateData); REGISTER_API(AbortBlock); REGISTER_API(Milliseconds); + REGISTER_API(BlockedClientMeasureTimeStart); + REGISTER_API(BlockedClientMeasureTimeEnd); REGISTER_API(GetThreadSafeContext); REGISTER_API(GetDetachedThreadSafeContext); REGISTER_API(FreeThreadSafeContext); diff --git a/src/modules/gendoc.rb b/src/modules/gendoc.rb index ee6572884..2fd2ec5d7 100644 --- a/src/modules/gendoc.rb +++ b/src/modules/gendoc.rb @@ -4,16 +4,26 @@ # Convert the C comment to markdown def markdown(s) s = s.gsub(/\*\/$/,"") - s = s.gsub(/^ \* {0,1}/,"") - s = s.gsub(/^\/\* /,"") + s = s.gsub(/^ ?\* ?/,"") + s = s.gsub(/^\/\*\*? ?/,"") s.chop! while s[-1] == "\n" || s[-1] == " " lines = s.split("\n") newlines = [] + # Fix some markdown, except in code blocks indented by 4 spaces. lines.each{|l| - if l[0] != ' ' - l = l.gsub(/RM_[A-z()]+/){|x| "`#{x}`"} - l = l.gsub(/RedisModule_[A-z()]+/){|x| "`#{x}`"} - l = l.gsub(/REDISMODULE_[A-z]+/){|x| "`#{x}`"} + if not l.start_with?(' ') + # Rewrite RM_Xyz() to `RedisModule_Xyz()`. The () suffix is + # optional. Even RM_Xyz*() with * as wildcard is handled. + l = l.gsub(/(?<!`)RM_([A-z]+(?:\*?\(\))?)/, '`RedisModule_\1`') + # Add backquotes around RedisModule functions and type where missing. + l = l.gsub(/(?<!`)RedisModule[A-z]+(?:\*?\(\))?/){|x| "`#{x}`"} + # Add backquotes around c functions like malloc() where missing. + l = l.gsub(/(?<![`A-z])[a-z_]+\(\)/, '`\0`') + # Add backquotes around macro and var names containing underscores. + l = l.gsub(/(?<![`A-z\*])[A-Za-z]+_[A-Za-z0-9_]+/){|x| "`#{x}`"} + # Link URLs preceded by space (i.e. when not already linked) + l = l.gsub(/ (https?:\/\/[A-Za-z0-9_\/\.\-]+[A-Za-z0-9\/])/, + ' [\1](\1)') end newlines << l } @@ -41,6 +51,7 @@ def docufy(src,i) end puts "# Modules API reference\n\n" +puts "<!-- This file is generated from module.c using gendoc.rb -->\n\n" src = File.open("../module.c").to_a src.each_with_index{|line,i| if line =~ /RM_/ && line[0] != ' ' && line[0] != '#' && line[0] != '/' diff --git a/src/networking.c b/src/networking.c index e624dd8f9..da611675c 100644 --- a/src/networking.c +++ b/src/networking.c @@ -1104,6 +1104,7 @@ void acceptTcpHandler(aeEventLoop *el, int fd, void *privdata, int mask) { "Accepting client connection: %s", server.neterr); return; } + anetCloexec(cfd); serverLog(LL_VERBOSE,"Accepted %s:%d", cip, cport); acceptCommonHandler(connCreateAcceptedSocket(cfd),0,cip); } @@ -1124,6 +1125,7 @@ void acceptTLSHandler(aeEventLoop *el, int fd, void *privdata, int mask) { "Accepting client connection: %s", server.neterr); return; } + anetCloexec(cfd); serverLog(LL_VERBOSE,"Accepted %s:%d", cip, cport); acceptCommonHandler(connCreateAcceptedTLS(cfd, server.tls_auth_clients),0,cip); } @@ -1143,6 +1145,7 @@ void acceptUnixHandler(aeEventLoop *el, int fd, void *privdata, int mask) { "Accepting client connection: %s", server.neterr); return; } + anetCloexec(cfd); serverLog(LL_VERBOSE,"Accepted connection to %s", server.unixsocket); acceptCommonHandler(connCreateAcceptedSocket(cfd),CLIENT_UNIX_SOCKET,NULL); } @@ -1707,7 +1710,7 @@ int processInlineBuffer(client *c) { } /* Handle the \r\n case. */ - if (newline && newline != c->querybuf+c->qb_pos && *(newline-1) == '\r') + if (newline != c->querybuf+c->qb_pos && *(newline-1) == '\r') newline--, linefeed_chars++; /* Split the input buffer up to the \r\n */ @@ -2436,8 +2439,10 @@ void clientCommand(client *c) { " Kill connection made from <ip:port>.", "KILL <option> <value> [<option> <value> [...]]", " Kill connections. Options are:", -" * ADDR <ip:port>", -" Kill connection made from <ip:port>", +" * ADDR (<ip:port>|<unixsocket>:0)", +" Kill connections made from the specified address", +" * LADDR (<ip:port>|<unixsocket>:0)", +" Kill connections made to specified local address", " * TYPE (normal|master|replica|pubsub)", " Kill connections by type.", " * USER <username>", @@ -2675,7 +2680,7 @@ NULL c->argc == 4)) { /* CLIENT PAUSE TIMEOUT [WRITE|ALL] */ - long long duration; + mstime_t end; int type = CLIENT_PAUSE_ALL; if (c->argc == 4) { if (!strcasecmp(c->argv[3]->ptr,"write")) { @@ -2689,9 +2694,9 @@ NULL } } - if (getTimeoutFromObjectOrReply(c,c->argv[2],&duration, + if (getTimeoutFromObjectOrReply(c,c->argv[2],&end, UNIT_MILLISECONDS) != C_OK) return; - pauseClients(duration, type); + pauseClients(end, type); addReply(c,shared.ok); } else if (!strcasecmp(c->argv[1]->ptr,"tracking") && c->argc >= 3) { /* CLIENT TRACKING (on|off) [REDIRECT <id>] [BCAST] [PREFIX first] @@ -3355,8 +3360,6 @@ void processEventsWhileBlocked(void) { * Threaded I/O * ========================================================================== */ -int tio_debug = 0; - #define IO_THREADS_MAX_NUM 128 #define IO_THREADS_OP_READ 0 #define IO_THREADS_OP_WRITE 1 @@ -3407,8 +3410,6 @@ void *IOThreadMain(void *myid) { serverAssert(getIOPendingCount(id) != 0); - if (tio_debug) printf("[%ld] %d to handle\n", id, (int)listLength(io_threads_list[id])); - /* Process: note that the main thread will never touch our list * before we drop the pending count to 0. */ listIter li; @@ -3426,8 +3427,6 @@ void *IOThreadMain(void *myid) { } listEmpty(io_threads_list[id]); setIOPendingCount(id, 0); - - if (tio_debug) printf("[%ld] Done\n", id); } } @@ -3482,8 +3481,6 @@ void killIOThreads(void) { } void startThreadedIO(void) { - if (tio_debug) { printf("S"); fflush(stdout); } - if (tio_debug) printf("--- STARTING THREADED IO ---\n"); serverAssert(server.io_threads_active == 0); for (int j = 1; j < server.io_threads_num; j++) pthread_mutex_unlock(&io_threads_mutex[j]); @@ -3494,10 +3491,6 @@ void stopThreadedIO(void) { /* We may have still clients with pending reads when this function * is called: handle them before stopping the threads. */ handleClientsWithPendingReadsUsingThreads(); - if (tio_debug) { printf("E"); fflush(stdout); } - if (tio_debug) printf("--- STOPPING THREADED IO [R%d] [W%d] ---\n", - (int) listLength(server.clients_pending_read), - (int) listLength(server.clients_pending_write)); serverAssert(server.io_threads_active == 1); for (int j = 1; j < server.io_threads_num; j++) pthread_mutex_lock(&io_threads_mutex[j]); @@ -3540,8 +3533,6 @@ int handleClientsWithPendingWritesUsingThreads(void) { /* Start threads if needed. */ if (!server.io_threads_active) startThreadedIO(); - if (tio_debug) printf("%d TOTAL WRITE pending clients\n", processed); - /* Distribute the clients across N different lists. */ listIter li; listNode *ln; @@ -3586,7 +3577,6 @@ int handleClientsWithPendingWritesUsingThreads(void) { pending += getIOPendingCount(j); if (pending == 0) break; } - if (tio_debug) printf("I/O WRITE All threads finshed\n"); /* Run the list of clients again to install the write handler where * needed. */ @@ -3639,8 +3629,6 @@ int handleClientsWithPendingReadsUsingThreads(void) { int processed = listLength(server.clients_pending_read); if (processed == 0) return 0; - if (tio_debug) printf("%d TOTAL READ pending clients\n", processed); - /* Distribute the clients across N different lists. */ listIter li; listNode *ln; @@ -3676,7 +3664,6 @@ int handleClientsWithPendingReadsUsingThreads(void) { pending += getIOPendingCount(j); if (pending == 0) break; } - if (tio_debug) printf("I/O READ All threads finshed\n"); /* Run the list of clients again to process the new buffers. */ while(listLength(server.clients_pending_read)) { diff --git a/src/redis-cli.c b/src/redis-cli.c index 31d2360c9..ed3075317 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -5301,7 +5301,7 @@ static clusterManagerNode *clusterNodeForResharding(char *id, clusterManagerLogErr(invalid_node_msg, id); *raise_err = 1; return NULL; - } else if (node != NULL && target != NULL) { + } else if (target != NULL) { if (!strcmp(node->name, target->name)) { clusterManagerLogErr( "*** It is not possible to use " "the target node as " @@ -6940,6 +6940,10 @@ void sendCapa() { sendReplconf("capa", "eof"); } +void sendRdbOnly(void) { + sendReplconf("rdb-only", "1"); +} + /* Read raw bytes through a redisContext. The read operation is not greedy * and may not fill the buffer entirely. */ @@ -7137,7 +7141,6 @@ static void getRDB(clusterManagerNode *node) { node->context = NULL; fsync(fd); close(fd); - fprintf(stderr,"Transfer finished with success.\n"); if (node) { sdsfree(filename); return; @@ -8258,6 +8261,7 @@ int main(int argc, char **argv) { if (config.getrdb_mode) { if (cliConnect(0) == REDIS_ERR) exit(1); sendCapa(); + sendRdbOnly(); getRDB(NULL); } diff --git a/src/redismodule.h b/src/redismodule.h index 36c566bb3..9d8c6c5ea 100644 --- a/src/redismodule.h +++ b/src/redismodule.h @@ -69,6 +69,20 @@ #define REDISMODULE_HASH_CFIELDS (1<<2) #define REDISMODULE_HASH_EXISTS (1<<3) +/* StreamID type. */ +typedef struct RedisModuleStreamID { + uint64_t ms; + uint64_t seq; +} RedisModuleStreamID; + +/* StreamAdd() flags. */ +#define REDISMODULE_STREAM_ADD_AUTOID (1<<0) +/* StreamIteratorStart() flags. */ +#define REDISMODULE_STREAM_ITERATOR_EXCLUSIVE (1<<0) +#define REDISMODULE_STREAM_ITERATOR_REVERSE (1<<1) +/* StreamIteratorTrim*() flags. */ +#define REDISMODULE_STREAM_TRIM_APPROX (1<<0) + /* Context Flags: Info about the current context returned by * RM_GetContextFlags(). */ @@ -216,9 +230,8 @@ typedef uint64_t RedisModuleTimerID; #define REDISMODULE_EVENT_LOADING_PROGRESS 10 #define REDISMODULE_EVENT_SWAPDB 11 #define REDISMODULE_EVENT_REPL_BACKUP 12 - -/* Next event flag, should be updated if a new event added. */ -#define _REDISMODULE_EVENT_NEXT 13 +#define REDISMODULE_EVENT_FORK_CHILD 13 +#define _REDISMODULE_EVENT_NEXT 14 /* Next event flag, should be updated if a new event added. */ typedef struct RedisModuleEvent { uint64_t id; /* REDISMODULE_EVENT_... defines. */ @@ -281,6 +294,10 @@ static const RedisModuleEvent RedisModuleEvent_ReplBackup = { REDISMODULE_EVENT_REPL_BACKUP, 1 + }, + RedisModuleEvent_ForkChild = { + REDISMODULE_EVENT_FORK_CHILD, + 1 }; /* Those are values that are used for the 'subevent' callback argument. */ @@ -331,6 +348,10 @@ static const RedisModuleEvent #define REDISMODULE_SUBEVENT_REPL_BACKUP_DISCARD 2 #define _REDISMODULE_SUBEVENT_REPL_BACKUP_NEXT 3 +#define REDISMODULE_SUBEVENT_FORK_CHILD_BORN 0 +#define REDISMODULE_SUBEVENT_FORK_CHILD_DIED 1 +#define _REDISMODULE_SUBEVENT_FORK_CHILD_NEXT 2 + #define _REDISMODULE_SUBEVENT_SHUTDOWN_NEXT 0 #define _REDISMODULE_SUBEVENT_CRON_LOOP_NEXT 0 #define _REDISMODULE_SUBEVENT_SWAPDB_NEXT 0 @@ -578,6 +599,7 @@ REDISMODULE_API RedisModuleString * (*RedisModule_CreateStringFromLongLong)(Redi REDISMODULE_API RedisModuleString * (*RedisModule_CreateStringFromDouble)(RedisModuleCtx *ctx, double d) REDISMODULE_ATTR; REDISMODULE_API RedisModuleString * (*RedisModule_CreateStringFromLongDouble)(RedisModuleCtx *ctx, long double ld, int humanfriendly) REDISMODULE_ATTR; REDISMODULE_API RedisModuleString * (*RedisModule_CreateStringFromString)(RedisModuleCtx *ctx, const RedisModuleString *str) REDISMODULE_ATTR; +REDISMODULE_API RedisModuleString * (*RedisModule_CreateStringFromStreamID)(RedisModuleCtx *ctx, const RedisModuleStreamID *id) REDISMODULE_ATTR; REDISMODULE_API RedisModuleString * (*RedisModule_CreateStringPrintf)(RedisModuleCtx *ctx, const char *fmt, ...) REDISMODULE_ATTR_PRINTF(2,3) REDISMODULE_ATTR; REDISMODULE_API void (*RedisModule_FreeString)(RedisModuleCtx *ctx, RedisModuleString *str) REDISMODULE_ATTR; REDISMODULE_API const char * (*RedisModule_StringPtrLen)(const RedisModuleString *str, size_t *len) REDISMODULE_ATTR; @@ -599,6 +621,7 @@ REDISMODULE_API int (*RedisModule_ReplyWithCallReply)(RedisModuleCtx *ctx, Redis REDISMODULE_API int (*RedisModule_StringToLongLong)(const RedisModuleString *str, long long *ll) REDISMODULE_ATTR; REDISMODULE_API int (*RedisModule_StringToDouble)(const RedisModuleString *str, double *d) REDISMODULE_ATTR; REDISMODULE_API int (*RedisModule_StringToLongDouble)(const RedisModuleString *str, long double *d) REDISMODULE_ATTR; +REDISMODULE_API int (*RedisModule_StringToStreamID)(const RedisModuleString *str, RedisModuleStreamID *id) REDISMODULE_ATTR; REDISMODULE_API void (*RedisModule_AutoMemory)(RedisModuleCtx *ctx) REDISMODULE_ATTR; REDISMODULE_API int (*RedisModule_Replicate)(RedisModuleCtx *ctx, const char *cmdname, const char *fmt, ...) REDISMODULE_ATTR; REDISMODULE_API int (*RedisModule_ReplicateVerbatim)(RedisModuleCtx *ctx) REDISMODULE_ATTR; @@ -629,6 +652,15 @@ REDISMODULE_API int (*RedisModule_ZsetRangePrev)(RedisModuleKey *key) REDISMODUL REDISMODULE_API int (*RedisModule_ZsetRangeEndReached)(RedisModuleKey *key) REDISMODULE_ATTR; REDISMODULE_API int (*RedisModule_HashSet)(RedisModuleKey *key, int flags, ...) REDISMODULE_ATTR; REDISMODULE_API int (*RedisModule_HashGet)(RedisModuleKey *key, int flags, ...) REDISMODULE_ATTR; +REDISMODULE_API int (*RedisModule_StreamAdd)(RedisModuleKey *key, int flags, RedisModuleStreamID *id, RedisModuleString **argv, int64_t numfields) REDISMODULE_ATTR; +REDISMODULE_API int (*RedisModule_StreamDelete)(RedisModuleKey *key, RedisModuleStreamID *id) REDISMODULE_ATTR; +REDISMODULE_API int (*RedisModule_StreamIteratorStart)(RedisModuleKey *key, int flags, RedisModuleStreamID *startid, RedisModuleStreamID *endid) REDISMODULE_ATTR; +REDISMODULE_API int (*RedisModule_StreamIteratorStop)(RedisModuleKey *key) REDISMODULE_ATTR; +REDISMODULE_API int (*RedisModule_StreamIteratorNextID)(RedisModuleKey *key, RedisModuleStreamID *id, long *numfields) REDISMODULE_ATTR; +REDISMODULE_API int (*RedisModule_StreamIteratorNextField)(RedisModuleKey *key, RedisModuleString **field_ptr, RedisModuleString **value_ptr) REDISMODULE_ATTR; +REDISMODULE_API int (*RedisModule_StreamIteratorDelete)(RedisModuleKey *key) REDISMODULE_ATTR; +REDISMODULE_API long long (*RedisModule_StreamTrimByLength)(RedisModuleKey *key, int flags, long long length) REDISMODULE_ATTR; +REDISMODULE_API long long (*RedisModule_StreamTrimByID)(RedisModuleKey *key, int flags, RedisModuleStreamID *id) REDISMODULE_ATTR; REDISMODULE_API int (*RedisModule_IsKeysPositionRequest)(RedisModuleCtx *ctx) REDISMODULE_ATTR; REDISMODULE_API void (*RedisModule_KeyAtPos)(RedisModuleCtx *ctx, int pos) REDISMODULE_ATTR; REDISMODULE_API unsigned long long (*RedisModule_GetClientId)(RedisModuleCtx *ctx) REDISMODULE_ATTR; @@ -744,6 +776,8 @@ REDISMODULE_API int (*RedisModule_IsBlockedTimeoutRequest)(RedisModuleCtx *ctx) REDISMODULE_API void * (*RedisModule_GetBlockedClientPrivateData)(RedisModuleCtx *ctx) REDISMODULE_ATTR; REDISMODULE_API RedisModuleBlockedClient * (*RedisModule_GetBlockedClientHandle)(RedisModuleCtx *ctx) REDISMODULE_ATTR; REDISMODULE_API int (*RedisModule_AbortBlock)(RedisModuleBlockedClient *bc) REDISMODULE_ATTR; +REDISMODULE_API int (*RedisModule_BlockedClientMeasureTimeStart)(RedisModuleBlockedClient *bc) REDISMODULE_ATTR; +REDISMODULE_API int (*RedisModule_BlockedClientMeasureTimeEnd)(RedisModuleBlockedClient *bc) REDISMODULE_ATTR; REDISMODULE_API RedisModuleCtx * (*RedisModule_GetThreadSafeContext)(RedisModuleBlockedClient *bc) REDISMODULE_ATTR; REDISMODULE_API RedisModuleCtx * (*RedisModule_GetDetachedThreadSafeContext)(RedisModuleCtx *ctx) REDISMODULE_ATTR; REDISMODULE_API void (*RedisModule_FreeThreadSafeContext)(RedisModuleCtx *ctx) REDISMODULE_ATTR; @@ -842,6 +876,7 @@ static int RedisModule_Init(RedisModuleCtx *ctx, const char *name, int ver, int REDISMODULE_GET_API(StringToLongLong); REDISMODULE_GET_API(StringToDouble); REDISMODULE_GET_API(StringToLongDouble); + REDISMODULE_GET_API(StringToStreamID); REDISMODULE_GET_API(Call); REDISMODULE_GET_API(CallReplyProto); REDISMODULE_GET_API(FreeCallReply); @@ -856,6 +891,7 @@ static int RedisModule_Init(RedisModuleCtx *ctx, const char *name, int ver, int REDISMODULE_GET_API(CreateStringFromDouble); REDISMODULE_GET_API(CreateStringFromLongDouble); REDISMODULE_GET_API(CreateStringFromString); + REDISMODULE_GET_API(CreateStringFromStreamID); REDISMODULE_GET_API(CreateStringPrintf); REDISMODULE_GET_API(FreeString); REDISMODULE_GET_API(StringPtrLen); @@ -887,6 +923,15 @@ static int RedisModule_Init(RedisModuleCtx *ctx, const char *name, int ver, int REDISMODULE_GET_API(ZsetRangeEndReached); REDISMODULE_GET_API(HashSet); REDISMODULE_GET_API(HashGet); + REDISMODULE_GET_API(StreamAdd); + REDISMODULE_GET_API(StreamDelete); + REDISMODULE_GET_API(StreamIteratorStart); + REDISMODULE_GET_API(StreamIteratorStop); + REDISMODULE_GET_API(StreamIteratorNextID); + REDISMODULE_GET_API(StreamIteratorNextField); + REDISMODULE_GET_API(StreamIteratorDelete); + REDISMODULE_GET_API(StreamTrimByLength); + REDISMODULE_GET_API(StreamTrimByID); REDISMODULE_GET_API(IsKeysPositionRequest); REDISMODULE_GET_API(KeyAtPos); REDISMODULE_GET_API(GetClientId); @@ -1006,6 +1051,8 @@ static int RedisModule_Init(RedisModuleCtx *ctx, const char *name, int ver, int REDISMODULE_GET_API(GetBlockedClientPrivateData); REDISMODULE_GET_API(GetBlockedClientHandle); REDISMODULE_GET_API(AbortBlock); + REDISMODULE_GET_API(BlockedClientMeasureTimeStart); + REDISMODULE_GET_API(BlockedClientMeasureTimeEnd); REDISMODULE_GET_API(SetDisconnectCallback); REDISMODULE_GET_API(SubscribeToKeyspaceEvents); REDISMODULE_GET_API(NotifyKeyspaceEvent); diff --git a/src/replication.c b/src/replication.c index 9fb19eaca..f23fcb6de 100644 --- a/src/replication.c +++ b/src/replication.c @@ -200,6 +200,16 @@ void feedReplicationBacklogWithObject(robj *o) { feedReplicationBacklog(p,len); } +int canFeedReplicaReplBuffer(client *replica) { + /* Don't feed replicas that only want the RDB. */ + if (replica->flags & CLIENT_REPL_RDBONLY) return 0; + + /* Don't feed replicas that are still waiting for BGSAVE to start. */ + if (replica->replstate == SLAVE_STATE_WAIT_BGSAVE_START) return 0; + + return 1; +} + /* Propagate write commands to slaves, and populate the replication backlog * as well. This function is used if the instance is a master: we use * the commands received by our clients in order to create the replication @@ -249,7 +259,8 @@ void replicationFeedSlaves(list *slaves, int dictid, robj **argv, int argc) { listRewind(slaves,&li); while((ln = listNext(&li))) { client *slave = ln->value; - if (slave->replstate == SLAVE_STATE_WAIT_BGSAVE_START) continue; + + if (!canFeedReplicaReplBuffer(slave)) continue; addReply(slave,selectcmd); } @@ -290,8 +301,7 @@ void replicationFeedSlaves(list *slaves, int dictid, robj **argv, int argc) { while((ln = listNext(&li))) { client *slave = ln->value; - /* Don't feed slaves that are still waiting for BGSAVE to start. */ - if (slave->replstate == SLAVE_STATE_WAIT_BGSAVE_START) continue; + if (!canFeedReplicaReplBuffer(slave)) continue; /* Feed slaves that are waiting for the initial SYNC (so these commands * are queued in the output buffer until the initial SYNC completes), @@ -363,8 +373,7 @@ void replicationFeedSlavesFromMasterStream(list *slaves, char *buf, size_t bufle while((ln = listNext(&li))) { client *slave = ln->value; - /* Don't feed slaves that are still waiting for BGSAVE to start. */ - if (slave->replstate == SLAVE_STATE_WAIT_BGSAVE_START) continue; + if (!canFeedReplicaReplBuffer(slave)) continue; addReplyProto(slave,buf,buflen); } } @@ -712,6 +721,36 @@ void syncCommand(client *c) { /* ignore SYNC if already slave or in monitor mode */ if (c->flags & CLIENT_SLAVE) return; + /* Check if this is a failover request to a replica with the same replid and + * become a master if so. */ + if (c->argc > 3 && !strcasecmp(c->argv[0]->ptr,"psync") && + !strcasecmp(c->argv[3]->ptr,"failover")) + { + serverLog(LL_WARNING, "Failover request received for replid %s.", + (unsigned char *)c->argv[1]->ptr); + if (!server.masterhost) { + addReplyError(c, "PSYNC FAILOVER can't be sent to a master."); + return; + } + + if (!strcasecmp(c->argv[1]->ptr,server.replid)) { + replicationUnsetMaster(); + sds client = catClientInfoString(sdsempty(),c); + serverLog(LL_NOTICE, + "MASTER MODE enabled (failover request from '%s')",client); + sdsfree(client); + } else { + addReplyError(c, "PSYNC FAILOVER replid must match my replid."); + return; + } + } + + /* Don't let replicas sync with us while we're failing over */ + if (server.failover_state != NO_FAILOVER) { + addReplyError(c,"-NOMASTERLINK Can't SYNC while failing over"); + return; + } + /* Refuse SYNC requests if we are a slave but the link with our master * is not ok... */ if (server.masterhost && server.repl_state != REPL_STATE_CONNECTED) { @@ -799,14 +838,20 @@ void syncCommand(client *c) { listRewind(server.slaves,&li); while((ln = listNext(&li))) { slave = ln->value; - if (slave->replstate == SLAVE_STATE_WAIT_BGSAVE_END) break; + /* If the client needs a buffer of commands, we can't use + * a replica without replication buffer. */ + if (slave->replstate == SLAVE_STATE_WAIT_BGSAVE_END && + (!(slave->flags & CLIENT_REPL_RDBONLY) || + (c->flags & CLIENT_REPL_RDBONLY))) + break; } /* To attach this slave, we check that it has at least all the * capabilities of the slave that triggered the current BGSAVE. */ if (ln && ((c->slave_capa & slave->slave_capa) == slave->slave_capa)) { /* Perfect, the server is already registering differences for - * another slave. Set the right state, and copy the buffer. */ - copyClientOutputBuffer(c,slave); + * another slave. Set the right state, and copy the buffer. + * We don't copy buffer if clients don't want. */ + if (!(c->flags & CLIENT_REPL_RDBONLY)) copyClientOutputBuffer(c,slave); replicationSetupSlaveForFullResync(c,slave->psync_initial_offset); serverLog(LL_NOTICE,"Waiting for end of BGSAVE for SYNC"); } else { @@ -925,6 +970,15 @@ void replconfCommand(client *c) { * to the slave. */ if (server.masterhost && server.master) replicationSendAck(); return; + } else if (!strcasecmp(c->argv[j]->ptr,"rdb-only")) { + /* REPLCONF RDB-ONLY is used to identify the client only wants + * RDB snapshot without replication buffer. */ + long rdb_only = 0; + if (getRangeLongFromObjectOrReply(c,c->argv[j+1], + 0,1,&rdb_only,NULL) != C_OK) + return; + if (rdb_only == 1) c->flags |= CLIENT_REPL_RDBONLY; + else c->flags &= ~CLIENT_REPL_RDBONLY; } else { addReplyErrorFormat(c,"Unrecognized REPLCONF option: %s", (char*)c->argv[j]->ptr); @@ -939,19 +993,28 @@ void replconfCommand(client *c) { * we are finally ready to send the incremental stream of commands. * * It does a few things: - * - * 1) Put the slave in ONLINE state. Note that the function may also be called + * 1) Close the replica's connection async if it doesn't need replication + * commands buffer stream, since it actually isn't a valid replica. + * 2) Put the slave in ONLINE state. Note that the function may also be called * for a replicas that are already in ONLINE state, but having the flag * repl_put_online_on_ack set to true: we still have to install the write * handler in that case. This function will take care of that. - * 2) Make sure the writable event is re-installed, since calling the SYNC + * 3) Make sure the writable event is re-installed, since calling the SYNC * command disables it, so that we can accumulate output buffer without * sending it to the replica. - * 3) Update the count of "good replicas". */ + * 4) Update the count of "good replicas". */ void putSlaveOnline(client *slave) { slave->replstate = SLAVE_STATE_ONLINE; slave->repl_put_online_on_ack = 0; slave->repl_ack_time = server.unixtime; /* Prevent false timeout. */ + + if (slave->flags & CLIENT_REPL_RDBONLY) { + serverLog(LL_NOTICE, + "Close the connection with replica %s as RDB transfer is complete", + replicationGetSlaveName(slave)); + freeClientAsync(slave); + return; + } if (connSetWriteHandler(slave->conn, sendReplyToClient) == C_ERR) { serverLog(LL_WARNING,"Unable to register writable event for replica bulk transfer: %s", strerror(errno)); freeClient(slave); @@ -1998,8 +2061,15 @@ int slaveTryPartialResynchronization(connection *conn, int read_reply) { memcpy(psync_offset,"-1",3); } - /* Issue the PSYNC command */ - reply = sendCommand(conn,"PSYNC",psync_replid,psync_offset,NULL); + /* Issue the PSYNC command, if this is a master with a failover in + * progress then send the failover argument to the replica to cause it + * to become a master */ + if (server.failover_state == FAILOVER_IN_PROGRESS) { + reply = sendCommand(conn,"PSYNC",psync_replid,psync_offset,"FAILOVER",NULL); + } else { + reply = sendCommand(conn,"PSYNC",psync_replid,psync_offset,NULL); + } + if (reply != NULL) { serverLog(LL_WARNING,"Unable to send PSYNC to master: %s",reply); sdsfree(reply); @@ -2323,6 +2393,7 @@ void syncWithMaster(connection *conn) { if (server.repl_state == REPL_STATE_SEND_PSYNC) { if (slaveTryPartialResynchronization(conn,0) == PSYNC_WRITE_ERROR) { err = sdsnew("Write error sending the PSYNC command."); + abortFailover("Write error to failover target"); goto write_error; } server.repl_state = REPL_STATE_RECEIVE_PSYNC_REPLY; @@ -2340,6 +2411,18 @@ void syncWithMaster(connection *conn) { psync_result = slaveTryPartialResynchronization(conn,1); if (psync_result == PSYNC_WAIT_REPLY) return; /* Try again later... */ + /* Check the status of the planned failover. We expect PSYNC_CONTINUE, + * but there is nothing technically wrong with a full resync which + * could happen in edge cases. */ + if (server.failover_state == FAILOVER_IN_PROGRESS) { + if (psync_result == PSYNC_CONTINUE || psync_result == PSYNC_FULLRESYNC) { + clearFailoverState(); + } else { + abortFailover("Failover target rejected psync request"); + return; + } + } + /* If the master is in an transient error, we should try to PSYNC * from scratch later, so go to the error path. This happens when * the server is loading the dataset or is not connected with its @@ -2645,6 +2728,11 @@ void replicaofCommand(client *c) { return; } + if (server.failover_state != NO_FAILOVER) { + addReplyError(c,"REPLICAOF not allowed while failing over."); + return; + } + /* The special host/port combination "NO" "ONE" turns the instance * into a master. Otherwise the new master address is set. */ if (!strcasecmp(c->argv[1]->ptr,"no") && @@ -3178,6 +3266,10 @@ long long replicationGetSlaveOffset(void) { void replicationCron(void) { static long long replication_cron_loops = 0; + /* Check failover status first, to see if we need to start + * handling the failover. */ + updateFailoverStatus(); + /* Non blocking connection timeout? */ if (server.masterhost && (server.repl_state == REPL_STATE_CONNECTING || @@ -3235,8 +3327,9 @@ void replicationCron(void) { * alter the replication offsets of master and slave, and will no longer * match the one stored into 'mf_master_offset' state. */ int manual_failover_in_progress = - server.cluster_enabled && - server.cluster->mf_end && + ((server.cluster_enabled && + server.cluster->mf_end) || + server.failover_end_time) && checkClientPauseTimeoutAndReturnIfPaused(); if (!manual_failover_in_progress) { @@ -3390,3 +3483,271 @@ void replicationStartPendingFork(void) { } } } + +/* Find replica at IP:PORT from replica list */ +static client *findReplica(char *host, int port) { + listIter li; + listNode *ln; + client *replica; + + listRewind(server.slaves,&li); + while((ln = listNext(&li))) { + replica = ln->value; + char ip[NET_IP_STR_LEN], *replicaip = replica->slave_ip; + + if (replicaip[0] == '\0') { + if (connPeerToString(replica->conn, ip, sizeof(ip), NULL) == -1) + continue; + replicaip = ip; + } + + if (!strcasecmp(host, replicaip) && + (port == replica->slave_listening_port)) + return replica; + } + + return NULL; +} + +const char *getFailoverStateString() { + switch(server.failover_state) { + case NO_FAILOVER: return "no-failover"; + case FAILOVER_IN_PROGRESS: return "failover-in-progress"; + case FAILOVER_WAIT_FOR_SYNC: return "waiting-for-sync"; + default: return "unknown"; + } +} + +/* Resets the internal failover configuration, this needs + * to be called after a failover either succeeds or fails + * as it includes the client unpause. */ +void clearFailoverState() { + server.failover_end_time = 0; + server.force_failover = 0; + zfree(server.target_replica_host); + server.target_replica_host = NULL; + server.target_replica_port = 0; + server.failover_state = NO_FAILOVER; + unpauseClients(); +} + +/* Abort an ongoing failover if one is going on. */ +void abortFailover(const char *err) { + if (server.failover_state == NO_FAILOVER) return; + + if (server.target_replica_host) { + serverLog(LL_NOTICE,"FAILOVER to %s:%d aborted: %s", + server.target_replica_host,server.target_replica_port,err); + } else { + serverLog(LL_NOTICE,"FAILOVER to any replica aborted: %s",err); + } + if (server.failover_state == FAILOVER_IN_PROGRESS) { + replicationUnsetMaster(); + } + clearFailoverState(); +} + +/* + * FAILOVER [TO <HOST> <IP> [FORCE]] [ABORT] [TIMEOUT <timeout>] + * + * This command will coordinate a failover between the master and one + * of its replicas. The happy path contains the following steps: + * 1) The master will initiate a client pause write, to stop replication + * traffic. + * 2) The master will periodically check if any of its replicas has + * consumed the entire replication stream through acks. + * 3) Once any replica has caught up, the master will itself become a replica. + * 4) The master will send a PSYNC FAILOVER request to the target replica, which + * if accepted will cause the replica to become the new master and start a sync. + * + * FAILOVER ABORT is the only way to abort a failover command, as replicaof + * will be disabled. This may be needed if the failover is unable to progress. + * + * The optional arguments [TO <HOST> <IP>] allows designating a specific replica + * to be failed over to. + * + * FORCE flag indicates that even if the target replica is not caught up, + * failover to it anyway. This must be specified with a timeout and a target + * HOST and IP. + * + * TIMEOUT <timeout> indicates how long should the primary wait for + * a replica to sync up before aborting. If not specified, the failover + * will attempt forever and must be manually aborted. + */ +void failoverCommand(client *c) { + if (server.cluster_enabled) { + addReplyError(c,"FAILOVER not allowed in cluster mode. " + "Use CLUSTER FAILOVER command instead."); + return; + } + + /* Handle special case for abort */ + if ((c->argc == 2) && !strcasecmp(c->argv[1]->ptr,"abort")) { + if (server.failover_state == NO_FAILOVER) { + addReplyError(c, "No failover in progress."); + return; + } + + abortFailover("Failover manually aborted"); + addReply(c,shared.ok); + return; + } + + long timeout_in_ms = 0; + int force_flag = 0; + long port = 0; + char *host = NULL; + + /* Parse the command for syntax and arguments. */ + for (int j = 1; j < c->argc; j++) { + if (!strcasecmp(c->argv[j]->ptr,"timeout") && (j + 1 < c->argc) && + timeout_in_ms == 0) + { + if (getLongFromObjectOrReply(c,c->argv[j + 1], + &timeout_in_ms,NULL) != C_OK) return; + if (timeout_in_ms <= 0) { + addReplyError(c,"FAILOVER timeout must be greater than 0"); + return; + } + j++; + } else if (!strcasecmp(c->argv[j]->ptr,"to") && (j + 2 < c->argc) && + !host) + { + if (getLongFromObjectOrReply(c,c->argv[j + 2],&port,NULL) != C_OK) + return; + host = c->argv[j + 1]->ptr; + j += 2; + } else if (!strcasecmp(c->argv[j]->ptr,"force") && !force_flag) { + force_flag = 1; + } else { + addReplyErrorObject(c,shared.syntaxerr); + return; + } + } + + if (server.failover_state != NO_FAILOVER) { + addReplyError(c,"FAILOVER already in progress."); + return; + } + + if (server.masterhost) { + addReplyError(c,"FAILOVER is not valid when server is a replica."); + return; + } + + if (listLength(server.slaves) == 0) { + addReplyError(c,"FAILOVER requires connected replicas."); + return; + } + + if (force_flag && (!timeout_in_ms || !host)) { + addReplyError(c,"FAILOVER with force option requires both a timeout " + "and target HOST and IP."); + return; + } + + /* If a replica address was provided, validate that it is connected. */ + if (host) { + client *replica = findReplica(host, port); + + if (replica == NULL) { + addReplyError(c,"FAILOVER target HOST and IP is not " + "a replica."); + return; + } + + /* Check if requested replica is online */ + if (replica->replstate != SLAVE_STATE_ONLINE) { + addReplyError(c,"FAILOVER target replica is not online."); + return; + } + + server.target_replica_host = zstrdup(host); + server.target_replica_port = port; + serverLog(LL_NOTICE,"FAILOVER requested to %s:%ld.",host,port); + } else { + serverLog(LL_NOTICE,"FAILOVER requested to any replica."); + } + + mstime_t now = mstime(); + if (timeout_in_ms) { + server.failover_end_time = now + timeout_in_ms; + } + + server.force_failover = force_flag; + server.failover_state = FAILOVER_WAIT_FOR_SYNC; + /* Cluster failover will unpause eventually */ + pauseClients(LLONG_MAX,CLIENT_PAUSE_WRITE); + addReply(c,shared.ok); +} + +/* Failover cron function, checks coordinated failover state. + * + * Implementation note: The current implementation calls replicationSetMaster() + * to start the failover request, this has some unintended side effects if the + * failover doesn't work like blocked clients will be unblocked and replicas will + * be disconnected. This could be optimized further. + */ +void updateFailoverStatus(void) { + if (server.failover_state != FAILOVER_WAIT_FOR_SYNC) return; + mstime_t now = server.mstime; + + /* Check if failover operation has timed out */ + if (server.failover_end_time && server.failover_end_time <= now) { + if (server.force_failover) { + serverLog(LL_NOTICE, + "FAILOVER to %s:%d time out exceeded, failing over.", + server.target_replica_host, server.target_replica_port); + server.failover_state = FAILOVER_IN_PROGRESS; + /* If timeout has expired force a failover if requested. */ + replicationSetMaster(server.target_replica_host, + server.target_replica_port); + return; + } else { + /* Force was not requested, so timeout. */ + abortFailover("Replica never caught up before timeout"); + return; + } + } + + /* Check to see if the replica has caught up so failover can start */ + client *replica = NULL; + if (server.target_replica_host) { + replica = findReplica(server.target_replica_host, + server.target_replica_port); + } else { + listIter li; + listNode *ln; + + listRewind(server.slaves,&li); + /* Find any replica that has matched our repl_offset */ + while((ln = listNext(&li))) { + replica = ln->value; + if (replica->repl_ack_off == server.master_repl_offset) { + char ip[NET_IP_STR_LEN], *replicaip = replica->slave_ip; + + if (replicaip[0] == '\0') { + if (connPeerToString(replica->conn,ip,sizeof(ip),NULL) == -1) + continue; + replicaip = ip; + } + + /* We are now failing over to this specific node */ + server.target_replica_host = zstrdup(replicaip); + server.target_replica_port = replica->slave_listening_port; + break; + } + } + } + + /* We've found a replica that is caught up */ + if (replica && (replica->repl_ack_off == server.master_repl_offset)) { + server.failover_state = FAILOVER_IN_PROGRESS; + serverLog(LL_NOTICE, + "Failover target %s:%d is synced, failing over.", + server.target_replica_host, server.target_replica_port); + /* Designated replica is caught up, failover to it. */ + replicationSetMaster(server.target_replica_host, + server.target_replica_port); + } +} diff --git a/src/scripting.c b/src/scripting.c index 75604e4d8..41469ee2e 100644 --- a/src/scripting.c +++ b/src/scripting.c @@ -1282,14 +1282,17 @@ void scriptingInit(int setup) { /* Release resources related to Lua scripting. * This function is used in order to reset the scripting environment. */ -void scriptingRelease(void) { - dictRelease(server.lua_scripts); +void scriptingRelease(int async) { + if (async) + freeLuaScriptsAsync(server.lua_scripts); + else + dictRelease(server.lua_scripts); server.lua_scripts_mem = 0; lua_close(server.lua); } -void scriptingReset(void) { - scriptingRelease(); +void scriptingReset(int async) { + scriptingRelease(async); scriptingInit(0); } @@ -1711,8 +1714,12 @@ void scriptCommand(client *c) { " Set the debug mode for subsequent scripts executed.", "EXISTS <sha1> [<sha1> ...]", " Return information about the existence of the scripts in the script cache.", -"FLUSH", +"FLUSH [ASYNC|SYNC]", " Flush the Lua scripts cache. Very dangerous on replicas.", +" When called without the optional mode argument, the behavior is determined by the", +" lazyfree-lazy-user-flush configuration directive. Valid modes are:", +" * ASYNC: Asynchronously flush the scripts cache.", +" * SYNC: Synchronously flush the scripts cache.", "KILL", " Kill the currently executing Lua script.", "LOAD <script>", @@ -1720,8 +1727,19 @@ void scriptCommand(client *c) { NULL }; addReplyHelp(c, help); - } else if (c->argc == 2 && !strcasecmp(c->argv[1]->ptr,"flush")) { - scriptingReset(); + } else if (c->argc >= 2 && !strcasecmp(c->argv[1]->ptr,"flush")) { + int async = 0; + if (c->argc == 3 && !strcasecmp(c->argv[2]->ptr,"sync")) { + async = 0; + } else if (c->argc == 3 && !strcasecmp(c->argv[2]->ptr,"async")) { + async = 1; + } else if (c->argc == 2) { + async = server.lazyfree_lazy_user_flush ? 1 : 0; + } else { + addReplyError(c,"SCRIPT FLUSH only support SYNC|ASYNC option"); + return; + } + scriptingReset(async); addReply(c,shared.ok); replicationScriptCacheFlush(); server.dirty++; /* Propagating this command is a good idea. */ @@ -1157,12 +1157,80 @@ void *sds_malloc(size_t size) { return s_malloc(size); } void *sds_realloc(void *ptr, size_t size) { return s_realloc(ptr,size); } void sds_free(void *ptr) { s_free(ptr); } +/* Perform expansion of a template string and return the result as a newly + * allocated sds. + * + * Template variables are specified using curly brackets, e.g. {variable}. + * An opening bracket can be quoted by repeating it twice. + */ +sds sdstemplate(const char *template, sdstemplate_callback_t cb_func, void *cb_arg) +{ + sds res = sdsempty(); + const char *p = template; + + while (*p) { + /* Find next variable, copy everything until there */ + const char *sv = strchr(p, '{'); + if (!sv) { + /* Not found: copy till rest of template and stop */ + res = sdscat(res, p); + break; + } else if (sv > p) { + /* Found: copy anything up to the begining of the variable */ + res = sdscatlen(res, p, sv - p); + } + + /* Skip into variable name, handle premature end or quoting */ + sv++; + if (!*sv) goto error; /* Premature end of template */ + if (*sv == '{') { + /* Quoted '{' */ + p = sv + 1; + res = sdscat(res, "{"); + continue; + } + + /* Find end of variable name, handle premature end of template */ + const char *ev = strchr(sv, '}'); + if (!ev) goto error; + + /* Pass variable name to callback and obtain value. If callback failed, + * abort. */ + sds varname = sdsnewlen(sv, ev - sv); + sds value = cb_func(varname, cb_arg); + sdsfree(varname); + if (!value) goto error; + + /* Append value to result and continue */ + res = sdscat(res, value); + sdsfree(value); + p = ev + 1; + } + + return res; + +error: + sdsfree(res); + return NULL; +} + #ifdef REDIS_TEST #include <stdio.h> #include <limits.h> #include "testhelp.h" #define UNUSED(x) (void)(x) + +static sds sdsTestTemplateCallback(sds varname, void *arg) { + UNUSED(arg); + static const char *_var1 = "variable1"; + static const char *_var2 = "variable2"; + + if (!strcmp(varname, _var1)) return sdsnew("value1"); + else if (!strcmp(varname, _var2)) return sdsnew("value2"); + else return NULL; +} + int sdsTest(int argc, char **argv) { UNUSED(argc); UNUSED(argv); @@ -1342,6 +1410,30 @@ int sdsTest(int argc, char **argv) { sdsfree(x); } + + /* Simple template */ + x = sdstemplate("v1={variable1} v2={variable2}", sdsTestTemplateCallback, NULL); + test_cond("sdstemplate() normal flow", + memcmp(x,"v1=value1 v2=value2",19) == 0); + sdsfree(x); + + /* Template with callback error */ + x = sdstemplate("v1={variable1} v3={doesnotexist}", sdsTestTemplateCallback, NULL); + test_cond("sdstemplate() with callback error", x == NULL); + + /* Template with empty var name */ + x = sdstemplate("v1={", sdsTestTemplateCallback, NULL); + test_cond("sdstemplate() with empty var name", x == NULL); + + /* Template with truncated var name */ + x = sdstemplate("v1={start", sdsTestTemplateCallback, NULL); + test_cond("sdstemplate() with truncated var name", x == NULL); + + /* Template with quoting */ + x = sdstemplate("v1={{{variable1}} {{} v2={variable2}", sdsTestTemplateCallback, NULL); + test_cond("sdstemplate() with quoting", + memcmp(x,"v1={value1} {} v2=value2",24) == 0); + sdsfree(x); } test_report(); return 0; @@ -253,6 +253,14 @@ sds sdsmapchars(sds s, const char *from, const char *to, size_t setlen); sds sdsjoin(char **argv, int argc, char *sep); sds sdsjoinsds(sds *argv, int argc, const char *sep, size_t seplen); +/* Callback for sdstemplate. The function gets called by sdstemplate + * every time a variable needs to be expanded. The variable name is + * provided as variable, and the callback is expected to return a + * substitution value. Returning a NULL indicates an error. + */ +typedef sds (*sdstemplate_callback_t)(const sds variable, void *arg); +sds sdstemplate(const char *template, sdstemplate_callback_t cb_func, void *cb_arg); + /* Low level functions exposed to the user API */ sds sdsMakeRoomFor(sds s, size_t addlen); void sdsIncrLen(sds s, ssize_t incr); diff --git a/src/sentinel.c b/src/sentinel.c index 02260feb7..a87766ebe 100644 --- a/src/sentinel.c +++ b/src/sentinel.c @@ -55,7 +55,8 @@ extern SSL_CTX *redis_tls_client_ctx; /* Address object, used to describe an ip:port pair. */ typedef struct sentinelAddr { - char *ip; + char *hostname; /* Hostname OR address, as specified */ + char *ip; /* Always a resolved address */ int port; } sentinelAddr; @@ -94,6 +95,8 @@ typedef struct sentinelAddr { #define SENTINEL_ELECTION_TIMEOUT 10000 #define SENTINEL_MAX_DESYNC 1000 #define SENTINEL_DEFAULT_DENY_SCRIPTS_RECONFIG 1 +#define SENTINEL_DEFAULT_RESOLVE_HOSTNAMES 0 +#define SENTINEL_DEFAULT_ANNOUNCE_HOSTNAMES 0 /* Failover machine different states. */ #define SENTINEL_FAILOVER_STATE_NONE 0 /* No failover in progress. */ @@ -260,6 +263,8 @@ struct sentinelState { paths at runtime? */ char *sentinel_auth_pass; /* Password to use for AUTH against other sentinel */ char *sentinel_auth_user; /* Username for ACLs AUTH against other sentinel. */ + int resolve_hostnames; /* Support use of hostnames, assuming DNS is well configured. */ + int announce_hostnames; /* Announce hostnames instead of IPs when we have them. */ } sentinel; /* A script execution job. */ @@ -387,7 +392,7 @@ sentinelRedisInstance *sentinelSelectSlave(sentinelRedisInstance *master); void sentinelScheduleScriptExecution(char *path, ...); void sentinelStartFailover(sentinelRedisInstance *master); void sentinelDiscardReplyCallback(redisAsyncContext *c, void *reply, void *privdata); -int sentinelSendSlaveOf(sentinelRedisInstance *ri, char *host, int port); +int sentinelSendSlaveOf(sentinelRedisInstance *ri, const sentinelAddr *addr); char *sentinelVoteLeader(sentinelRedisInstance *master, uint64_t req_epoch, char *req_runid, uint64_t *leader_epoch); void sentinelFlushConfig(void); void sentinelGenerateInitialMonitorEvents(void); @@ -455,6 +460,8 @@ void sentinelInfoCommand(client *c); void sentinelSetCommand(client *c); void sentinelPublishCommand(client *c); void sentinelRoleCommand(client *c); +void sentinelConfigGetCommand(client *c); +void sentinelConfigSetCommand(client *c); struct redisCommand sentinelcmds[] = { {"ping",pingCommand,1,"fast @connection",0,NULL,0,0,0,0,0}, @@ -474,6 +481,20 @@ struct redisCommand sentinelcmds[] = { {"command",commandCommand,-1, "random @connection", 0,NULL,0,0,0,0,0,0} }; +/* this array is used for sentinel config lookup, which need to be loaded + * before monitoring masters config to avoid dependency issues */ +const char *preMonitorCfgName[] = { + "announce-ip", + "announce-port", + "deny-scripts-reconfig", + "sentinel-user", + "sentinel-pass", + "current-epoch", + "myid", + "resolve-hostnames", + "announce-hostnames" +}; + /* This function overwrites a few normal Redis config default with Sentinel * specific defaults. */ void initSentinelConfig(void) { @@ -481,6 +502,8 @@ void initSentinelConfig(void) { server.protected_mode = 0; /* Sentinel must be exposed. */ } +void freeSentinelLoadQueueEntry(void *item); + /* Perform the Sentinel mode initialization. */ void initSentinel(void) { unsigned int j; @@ -519,7 +542,10 @@ void initSentinel(void) { sentinel.deny_scripts_reconfig = SENTINEL_DEFAULT_DENY_SCRIPTS_RECONFIG; sentinel.sentinel_auth_pass = NULL; sentinel.sentinel_auth_user = NULL; + sentinel.resolve_hostnames = SENTINEL_DEFAULT_RESOLVE_HOSTNAMES; + sentinel.announce_hostnames = SENTINEL_DEFAULT_ANNOUNCE_HOSTNAMES; memset(sentinel.myid,0,sizeof(sentinel.myid)); + server.sentinel_config = NULL; } /* This function gets called when the server is in Sentinel mode, started, @@ -573,11 +599,13 @@ sentinelAddr *createSentinelAddr(char *hostname, int port) { errno = EINVAL; return NULL; } - if (anetResolve(NULL,hostname,ip,sizeof(ip)) == ANET_ERR) { + if (anetResolve(NULL,hostname,ip,sizeof(ip), + sentinel.resolve_hostnames ? ANET_NONE : ANET_IP_ONLY) == ANET_ERR) { errno = ENOENT; return NULL; } sa = zmalloc(sizeof(*sa)); + sa->hostname = sdsnew(hostname); sa->ip = sdsnew(ip); sa->port = port; return sa; @@ -588,6 +616,7 @@ sentinelAddr *dupSentinelAddr(sentinelAddr *src) { sentinelAddr *sa; sa = zmalloc(sizeof(*sa)); + sa->hostname = sdsnew(src->hostname); sa->ip = sdsnew(src->ip); sa->port = src->port; return sa; @@ -595,6 +624,7 @@ sentinelAddr *dupSentinelAddr(sentinelAddr *src) { /* Free a Sentinel address. Can't fail. */ void releaseSentinelAddr(sentinelAddr *sa) { + sdsfree(sa->hostname); sdsfree(sa->ip); zfree(sa); } @@ -604,6 +634,21 @@ int sentinelAddrIsEqual(sentinelAddr *a, sentinelAddr *b) { return a->port == b->port && !strcasecmp(a->ip,b->ip); } +/* Return non-zero if a hostname matches an address. */ +int sentinelAddrEqualsHostname(sentinelAddr *a, char *hostname) { + char ip[NET_IP_STR_LEN]; + + /* We always resolve the hostname and compare it to the address */ + if (anetResolve(NULL, hostname, ip, sizeof(ip), + sentinel.resolve_hostnames ? ANET_NONE : ANET_IP_ONLY) == ANET_ERR) + return 0; + return !strcasecmp(a->ip, ip); +} + +const char *announceSentinelAddr(const sentinelAddr *a) { + return sentinel.announce_hostnames ? a->hostname : a->ip; +} + /* =========================== Events notification ========================== */ /* Send an event to log, pub/sub, user notification script. @@ -644,12 +689,12 @@ void sentinelEvent(int level, char *type, sentinelRedisInstance *ri, if (master) { snprintf(msg, sizeof(msg), "%s %s %s %d @ %s %s %d", sentinelRedisInstanceTypeStr(ri), - ri->name, ri->addr->ip, ri->addr->port, - master->name, master->addr->ip, master->addr->port); + ri->name, announceSentinelAddr(ri->addr), ri->addr->port, + master->name, announceSentinelAddr(master->addr), master->addr->port); } else { snprintf(msg, sizeof(msg), "%s %s %s %d", sentinelRedisInstanceTypeStr(ri), - ri->name, ri->addr->ip, ri->addr->port); + ri->name, announceSentinelAddr(ri->addr), ri->addr->port); } fmt += 2; } else { @@ -971,7 +1016,8 @@ void sentinelCallClientReconfScript(sentinelRedisInstance *master, int role, cha sentinelScheduleScriptExecution(master->client_reconfig_script, master->name, (role == SENTINEL_LEADER) ? "leader" : "observer", - state, from->ip, fromport, to->ip, toport, NULL); + state, announceSentinelAddr(from), fromport, + announceSentinelAddr(to), toport, NULL); } /* =============================== instanceLink ============================= */ @@ -1097,6 +1143,35 @@ int sentinelTryConnectionSharing(sentinelRedisInstance *ri) { return C_ERR; } +/* Drop all connections to other sentinels. Returns the number of connections + * dropped.*/ +int sentinelDropConnections(void) { + dictIterator *di; + dictEntry *de; + int dropped = 0; + + di = dictGetIterator(sentinel.masters); + while ((de = dictNext(di)) != NULL) { + dictIterator *sdi; + dictEntry *sde; + + sentinelRedisInstance *ri = dictGetVal(de); + sdi = dictGetIterator(ri->sentinels); + while ((sde = dictNext(sdi)) != NULL) { + sentinelRedisInstance *si = dictGetVal(sde); + if (!si->link->disconnected) { + instanceLinkCloseConnection(si->link, si->link->pc); + instanceLinkCloseConnection(si->link, si->link->cc); + dropped++; + } + } + dictReleaseIterator(sdi); + } + dictReleaseIterator(di); + + return dropped; +} + /* When we detect a Sentinel to switch address (reporting a different IP/port * pair in Hello messages), let's update all the matching Sentinels in the * context of other masters as well and disconnect the links, so that everybody @@ -1209,7 +1284,7 @@ sentinelRedisInstance *createSentinelRedisInstance(char *name, int flags, char * /* For slaves use ip:port as name. */ if (flags & SRI_SLAVE) { - anetFormatAddr(slavename, sizeof(slavename), hostname, port); + anetFormatAddr(slavename, sizeof(slavename), addr->ip, port); name = slavename; } @@ -1320,14 +1395,25 @@ void releaseSentinelRedisInstance(sentinelRedisInstance *ri) { /* Lookup a slave in a master Redis instance, by ip and port. */ sentinelRedisInstance *sentinelRedisInstanceLookupSlave( - sentinelRedisInstance *ri, char *ip, int port) + sentinelRedisInstance *ri, char *slave_addr, int port) { sds key; sentinelRedisInstance *slave; char buf[NET_ADDR_STR_LEN]; + sentinelAddr *addr; serverAssert(ri->flags & SRI_MASTER); - anetFormatAddr(buf,sizeof(buf),ip,port); + + /* We need to handle a slave_addr that is potentially a hostname. + * If that is the case, depending on configuration we either resolve + * it and use the IP addres or fail. + */ + addr = createSentinelAddr(slave_addr, port); + if (!addr) return NULL; + + anetFormatAddr(buf,sizeof(buf),addr->ip,addr->port); + releaseSentinelAddr(addr); + key = sdsnew(buf); slave = dictFetchValue(ri->slaves,key); sdsfree(key); @@ -1377,21 +1463,27 @@ int removeMatchingSentinelFromMaster(sentinelRedisInstance *master, char *runid) * of instances. Return NULL if not found, otherwise return the instance * pointer. * - * runid or ip can be NULL. In such a case the search is performed only + * runid or addr can be NULL. In such a case the search is performed only * by the non-NULL field. */ -sentinelRedisInstance *getSentinelRedisInstanceByAddrAndRunID(dict *instances, char *ip, int port, char *runid) { +sentinelRedisInstance *getSentinelRedisInstanceByAddrAndRunID(dict *instances, char *addr, int port, char *runid) { dictIterator *di; dictEntry *de; sentinelRedisInstance *instance = NULL; + sentinelAddr *ri_addr = NULL; - serverAssert(ip || runid); /* User must pass at least one search param. */ + serverAssert(addr || runid); /* User must pass at least one search param. */ + if (addr != NULL) { + /* Resolve addr, we use the IP as a key even if a hostname is used */ + ri_addr = createSentinelAddr(addr, port); + if (!ri_addr) return NULL; + } di = dictGetIterator(instances); while((de = dictNext(di)) != NULL) { sentinelRedisInstance *ri = dictGetVal(de); if (runid && !ri->runid) continue; if ((runid == NULL || strcmp(ri->runid, runid) == 0) && - (ip == NULL || (strcmp(ri->addr->ip, ip) == 0 && + (addr == NULL || (strcmp(ri->addr->ip, ri_addr->ip) == 0 && ri->addr->port == port))) { instance = ri; @@ -1399,6 +1491,9 @@ sentinelRedisInstance *getSentinelRedisInstanceByAddrAndRunID(dict *instances, c } } dictReleaseIterator(di); + if (ri_addr != NULL) + releaseSentinelAddr(ri_addr); + return instance; } @@ -1513,26 +1608,28 @@ int sentinelResetMastersByPattern(char *pattern, int flags) { * * The function returns C_ERR if the address can't be resolved for some * reason. Otherwise C_OK is returned. */ -int sentinelResetMasterAndChangeAddress(sentinelRedisInstance *master, char *ip, int port) { +int sentinelResetMasterAndChangeAddress(sentinelRedisInstance *master, char *hostname, int port) { sentinelAddr *oldaddr, *newaddr; sentinelAddr **slaves = NULL; int numslaves = 0, j; dictIterator *di; dictEntry *de; - newaddr = createSentinelAddr(ip,port); + newaddr = createSentinelAddr(hostname,port); if (newaddr == NULL) return C_ERR; - /* Make a list of slaves to add back after the reset. - * Don't include the one having the address we are switching to. */ + /* There can be only 0 or 1 slave that has the newaddr. + * and It can add old master 1 more slave. + * so It allocates dictSize(master->slaves) + 1 */ + slaves = zmalloc(sizeof(sentinelAddr*)*(dictSize(master->slaves) + 1)); + + /* Don't include the one having the address we are switching to. */ di = dictGetIterator(master->slaves); while((de = dictNext(di)) != NULL) { sentinelRedisInstance *slave = dictGetVal(de); if (sentinelAddrIsEqual(slave->addr,newaddr)) continue; - slaves = zrealloc(slaves,sizeof(sentinelAddr*)*(numslaves+1)); - slaves[numslaves++] = createSentinelAddr(slave->addr->ip, - slave->addr->port); + slaves[numslaves++] = dupSentinelAddr(slave->addr); } dictReleaseIterator(di); @@ -1540,9 +1637,7 @@ int sentinelResetMasterAndChangeAddress(sentinelRedisInstance *master, char *ip, * as a slave as well, so that we'll be able to sense / reconfigure * the old master. */ if (!sentinelAddrIsEqual(newaddr,master->addr)) { - slaves = zrealloc(slaves,sizeof(sentinelAddr*)*(numslaves+1)); - slaves[numslaves++] = createSentinelAddr(master->addr->ip, - master->addr->port); + slaves[numslaves++] = dupSentinelAddr(master->addr); } /* Reset and switch address. */ @@ -1556,7 +1651,7 @@ int sentinelResetMasterAndChangeAddress(sentinelRedisInstance *master, char *ip, for (j = 0; j < numslaves; j++) { sentinelRedisInstance *slave; - slave = createSentinelRedisInstance(NULL,SRI_SLAVE,slaves[j]->ip, + slave = createSentinelRedisInstance(NULL,SRI_SLAVE,slaves[j]->hostname, slaves[j]->port, master->quorum, master); releaseSentinelAddr(slaves[j]); if (slave) sentinelEvent(LL_NOTICE,"+slave",slave,"%@"); @@ -1640,7 +1735,164 @@ char *sentinelInstanceMapCommand(sentinelRedisInstance *ri, char *command) { } /* ============================ Config handling ============================= */ -char *sentinelHandleConfiguration(char **argv, int argc) { + +/* Generalise handling create instance error. Use SRI_MASTER, SRI_SLAVE or + * SRI_SENTINEL as a role value. */ +const char *sentinelCheckCreateInstanceErrors(int role) { + switch(errno) { + case EBUSY: + switch (role) { + case SRI_MASTER: + return "Duplicate master name."; + case SRI_SLAVE: + return "Duplicate hostname and port for replica."; + case SRI_SENTINEL: + return "Duplicate runid for sentinel."; + default: + serverAssert(0); + break; + } + break; + case ENOENT: + return "Can't resolve instance hostname."; + case EINVAL: + return "Invalid port number."; + default: + return "Unknown Error for creating instances."; + } +} + +/* init function for server.sentinel_config */ +void initializeSentinelConfig() { + server.sentinel_config = zmalloc(sizeof(struct sentinelConfig)); + server.sentinel_config->monitor_cfg = listCreate(); + server.sentinel_config->pre_monitor_cfg = listCreate(); + server.sentinel_config->post_monitor_cfg = listCreate(); + listSetFreeMethod(server.sentinel_config->monitor_cfg,freeSentinelLoadQueueEntry); + listSetFreeMethod(server.sentinel_config->pre_monitor_cfg,freeSentinelLoadQueueEntry); + listSetFreeMethod(server.sentinel_config->post_monitor_cfg,freeSentinelLoadQueueEntry); +} + +/* destroy function for server.sentinel_config */ +void freeSentinelConfig() { + /* release these three config queues since we will not use it anymore */ + listRelease(server.sentinel_config->pre_monitor_cfg); + listRelease(server.sentinel_config->monitor_cfg); + listRelease(server.sentinel_config->post_monitor_cfg); + zfree(server.sentinel_config); + server.sentinel_config = NULL; +} + +/* Search config name in pre monitor config name array, return 1 if found, + * 0 if not found. */ +int searchPreMonitorCfgName(const char *name) { + for (unsigned int i = 0; i < sizeof(preMonitorCfgName)/sizeof(preMonitorCfgName[0]); i++) { + if (!strcasecmp(preMonitorCfgName[i],name)) return 1; + } + return 0; +} + +/* free method for sentinelLoadQueueEntry when release the list */ +void freeSentinelLoadQueueEntry(void *item) { + struct sentinelLoadQueueEntry *entry = item; + sdsfreesplitres(entry->argv,entry->argc); + sdsfree(entry->line); + zfree(entry); +} + +/* This function is used for queuing sentinel configuration, the main + * purpose of this function is to delay parsing the sentinel config option + * in order to avoid the order dependent issue from the config. */ +void queueSentinelConfig(sds *argv, int argc, int linenum, sds line) { + int i; + struct sentinelLoadQueueEntry *entry; + + /* initialize sentinel_config for the first call */ + if (server.sentinel_config == NULL) initializeSentinelConfig(); + + entry = zmalloc(sizeof(struct sentinelLoadQueueEntry)); + entry->argv = zmalloc(sizeof(char*)*argc); + entry->argc = argc; + entry->linenum = linenum; + entry->line = sdsdup(line); + for (i = 0; i < argc; i++) { + entry->argv[i] = sdsdup(argv[i]); + } + /* Separate config lines with pre monitor config, monitor config and + * post monitor config, in order to parsing config dependencies + * correctly. */ + if (!strcasecmp(argv[0],"monitor")) { + listAddNodeTail(server.sentinel_config->monitor_cfg,entry); + } else if (searchPreMonitorCfgName(argv[0])) { + listAddNodeTail(server.sentinel_config->pre_monitor_cfg,entry); + } else{ + listAddNodeTail(server.sentinel_config->post_monitor_cfg,entry); + } +} + +/* This function is used for loading the sentinel configuration from + * pre_monitor_cfg, monitor_cfg and post_monitor_cfg list */ +void loadSentinelConfigFromQueue(void) { + const char *err = NULL; + listIter li; + listNode *ln; + int linenum = 0; + sds line = NULL; + + /* if there is no sentinel_config entry, we can return immediately */ + if (server.sentinel_config == NULL) return; + + /* loading from pre monitor config queue first to avoid dependency issues */ + listRewind(server.sentinel_config->pre_monitor_cfg,&li); + while((ln = listNext(&li))) { + struct sentinelLoadQueueEntry *entry = ln->value; + err = sentinelHandleConfiguration(entry->argv,entry->argc); + if (err) { + linenum = entry->linenum; + line = entry->line; + goto loaderr; + } + } + + /* loading from monitor config queue */ + listRewind(server.sentinel_config->monitor_cfg,&li); + while((ln = listNext(&li))) { + struct sentinelLoadQueueEntry *entry = ln->value; + err = sentinelHandleConfiguration(entry->argv,entry->argc); + if (err) { + linenum = entry->linenum; + line = entry->line; + goto loaderr; + } + } + + /* loading from the post monitor config queue */ + listRewind(server.sentinel_config->post_monitor_cfg,&li); + while((ln = listNext(&li))) { + struct sentinelLoadQueueEntry *entry = ln->value; + err = sentinelHandleConfiguration(entry->argv,entry->argc); + if (err) { + linenum = entry->linenum; + line = entry->line; + goto loaderr; + } + } + + /* free sentinel_config when config loading is finished */ + freeSentinelConfig(); + return; + +loaderr: + fprintf(stderr, "\n*** FATAL CONFIG FILE ERROR (Redis %s) ***\n", + REDIS_VERSION); + fprintf(stderr, "Reading the configuration file, at line %d\n", linenum); + fprintf(stderr, ">>> '%s'\n", line); + fprintf(stderr, "%s\n", err); + exit(1); +} + +const char *sentinelHandleConfiguration(char **argv, int argc) { + sentinelRedisInstance *ri; if (!strcasecmp(argv[0],"monitor") && argc == 5) { @@ -1651,11 +1903,7 @@ char *sentinelHandleConfiguration(char **argv, int argc) { if (createSentinelRedisInstance(argv[1],SRI_MASTER,argv[2], atoi(argv[3]),quorum,NULL) == NULL) { - switch(errno) { - case EBUSY: return "Duplicated master name."; - case ENOENT: return "Can't resolve master instance hostname."; - case EINVAL: return "Invalid port number"; - } + return sentinelCheckCreateInstanceErrors(SRI_MASTER); } } else if (!strcasecmp(argv[0],"down-after-milliseconds") && argc == 3) { /* down-after-milliseconds <name> <milliseconds> */ @@ -1737,7 +1985,7 @@ char *sentinelHandleConfiguration(char **argv, int argc) { if ((slave = createSentinelRedisInstance(NULL,SRI_SLAVE,argv[2], atoi(argv[3]), ri->quorum, ri)) == NULL) { - return "Wrong hostname or port for replica."; + return sentinelCheckCreateInstanceErrors(SRI_SLAVE); } } else if (!strcasecmp(argv[0],"known-sentinel") && (argc == 4 || argc == 5)) { @@ -1750,7 +1998,7 @@ char *sentinelHandleConfiguration(char **argv, int argc) { if ((si = createSentinelRedisInstance(argv[4],SRI_SENTINEL,argv[2], atoi(argv[3]), ri->quorum, ri)) == NULL) { - return "Wrong hostname or port for sentinel."; + return sentinelCheckCreateInstanceErrors(SRI_SENTINEL); } si->runid = sdsnew(argv[4]); sentinelTryConnectionSharing(si); @@ -1787,6 +2035,16 @@ char *sentinelHandleConfiguration(char **argv, int argc) { /* sentinel-pass <password> */ if (strlen(argv[1])) sentinel.sentinel_auth_pass = sdsnew(argv[1]); + } else if (!strcasecmp(argv[0],"resolve-hostnames") && argc == 2) { + /* resolve-hostnames <yes|no> */ + if ((sentinel.resolve_hostnames = yesnotoi(argv[1])) == -1) { + return "Please specify yes or not for the resolve-hostnames option."; + } + } else if (!strcasecmp(argv[0],"announce-hostnames") && argc == 2) { + /* announce-hostnames <yes|no> */ + if ((sentinel.announce_hostnames = yesnotoi(argv[1])) == -1) { + return "Please specify yes or not for the announce-hostnames option."; + } } else { return "Unrecognized sentinel configuration statement."; } @@ -1805,14 +2063,29 @@ void rewriteConfigSentinelOption(struct rewriteConfigState *state) { /* sentinel unique ID. */ line = sdscatprintf(sdsempty(), "sentinel myid %s", sentinel.myid); - rewriteConfigRewriteLine(state,"sentinel",line,1); + rewriteConfigRewriteLine(state,"sentinel myid",line,1); /* sentinel deny-scripts-reconfig. */ line = sdscatprintf(sdsempty(), "sentinel deny-scripts-reconfig %s", sentinel.deny_scripts_reconfig ? "yes" : "no"); - rewriteConfigRewriteLine(state,"sentinel",line, + rewriteConfigRewriteLine(state,"sentinel deny-scripts-reconfig",line, sentinel.deny_scripts_reconfig != SENTINEL_DEFAULT_DENY_SCRIPTS_RECONFIG); + /* sentinel resolve-hostnames. + * This must be included early in the file so it is already in effect + * when reading the file. + */ + line = sdscatprintf(sdsempty(), "sentinel resolve-hostnames %s", + sentinel.resolve_hostnames ? "yes" : "no"); + rewriteConfigRewriteLine(state,"sentinel",line, + sentinel.resolve_hostnames != SENTINEL_DEFAULT_RESOLVE_HOSTNAMES); + + /* sentinel announce-hostnames. */ + line = sdscatprintf(sdsempty(), "sentinel announce-hostnames %s", + sentinel.announce_hostnames ? "yes" : "no"); + rewriteConfigRewriteLine(state,"sentinel",line, + sentinel.announce_hostnames != SENTINEL_DEFAULT_ANNOUNCE_HOSTNAMES); + /* For every master emit a "sentinel monitor" config entry. */ di = dictGetIterator(sentinel.masters); while((de = dictNext(di)) != NULL) { @@ -1823,16 +2096,18 @@ void rewriteConfigSentinelOption(struct rewriteConfigState *state) { master = dictGetVal(de); master_addr = sentinelGetCurrentMasterAddress(master); line = sdscatprintf(sdsempty(),"sentinel monitor %s %s %d %d", - master->name, master_addr->ip, master_addr->port, + master->name, announceSentinelAddr(master_addr), master_addr->port, master->quorum); - rewriteConfigRewriteLine(state,"sentinel",line,1); + rewriteConfigRewriteLine(state,"sentinel monitor",line,1); + /* rewriteConfigMarkAsProcessed is handled after the loop */ /* sentinel down-after-milliseconds */ if (master->down_after_period != SENTINEL_DEFAULT_DOWN_AFTER) { line = sdscatprintf(sdsempty(), "sentinel down-after-milliseconds %s %ld", master->name, (long) master->down_after_period); - rewriteConfigRewriteLine(state,"sentinel",line,1); + rewriteConfigRewriteLine(state,"sentinel down-after-milliseconds",line,1); + /* rewriteConfigMarkAsProcessed is handled after the loop */ } /* sentinel failover-timeout */ @@ -1840,7 +2115,9 @@ void rewriteConfigSentinelOption(struct rewriteConfigState *state) { line = sdscatprintf(sdsempty(), "sentinel failover-timeout %s %ld", master->name, (long) master->failover_timeout); - rewriteConfigRewriteLine(state,"sentinel",line,1); + rewriteConfigRewriteLine(state,"sentinel failover-timeout",line,1); + /* rewriteConfigMarkAsProcessed is handled after the loop */ + } /* sentinel parallel-syncs */ @@ -1848,7 +2125,8 @@ void rewriteConfigSentinelOption(struct rewriteConfigState *state) { line = sdscatprintf(sdsempty(), "sentinel parallel-syncs %s %d", master->name, master->parallel_syncs); - rewriteConfigRewriteLine(state,"sentinel",line,1); + rewriteConfigRewriteLine(state,"sentinel parallel-syncs",line,1); + /* rewriteConfigMarkAsProcessed is handled after the loop */ } /* sentinel notification-script */ @@ -1856,7 +2134,8 @@ void rewriteConfigSentinelOption(struct rewriteConfigState *state) { line = sdscatprintf(sdsempty(), "sentinel notification-script %s %s", master->name, master->notification_script); - rewriteConfigRewriteLine(state,"sentinel",line,1); + rewriteConfigRewriteLine(state,"sentinel notification-script",line,1); + /* rewriteConfigMarkAsProcessed is handled after the loop */ } /* sentinel client-reconfig-script */ @@ -1864,7 +2143,8 @@ void rewriteConfigSentinelOption(struct rewriteConfigState *state) { line = sdscatprintf(sdsempty(), "sentinel client-reconfig-script %s %s", master->name, master->client_reconfig_script); - rewriteConfigRewriteLine(state,"sentinel",line,1); + rewriteConfigRewriteLine(state,"sentinel client-reconfig-script",line,1); + /* rewriteConfigMarkAsProcessed is handled after the loop */ } /* sentinel auth-pass & auth-user */ @@ -1872,27 +2152,32 @@ void rewriteConfigSentinelOption(struct rewriteConfigState *state) { line = sdscatprintf(sdsempty(), "sentinel auth-pass %s %s", master->name, master->auth_pass); - rewriteConfigRewriteLine(state,"sentinel",line,1); + rewriteConfigRewriteLine(state,"sentinel auth-pass",line,1); + /* rewriteConfigMarkAsProcessed is handled after the loop */ } if (master->auth_user) { line = sdscatprintf(sdsempty(), "sentinel auth-user %s %s", master->name, master->auth_user); - rewriteConfigRewriteLine(state,"sentinel",line,1); + rewriteConfigRewriteLine(state,"sentinel auth-user",line,1); + /* rewriteConfigMarkAsProcessed is handled after the loop */ } /* sentinel config-epoch */ line = sdscatprintf(sdsempty(), "sentinel config-epoch %s %llu", master->name, (unsigned long long) master->config_epoch); - rewriteConfigRewriteLine(state,"sentinel",line,1); + rewriteConfigRewriteLine(state,"sentinel config-epoch",line,1); + /* rewriteConfigMarkAsProcessed is handled after the loop */ + /* sentinel leader-epoch */ line = sdscatprintf(sdsempty(), "sentinel leader-epoch %s %llu", master->name, (unsigned long long) master->leader_epoch); - rewriteConfigRewriteLine(state,"sentinel",line,1); + rewriteConfigRewriteLine(state,"sentinel leader-epoch",line,1); + /* rewriteConfigMarkAsProcessed is handled after the loop */ /* sentinel known-slave */ di2 = dictGetIterator(master->slaves); @@ -1911,8 +2196,9 @@ void rewriteConfigSentinelOption(struct rewriteConfigState *state) { slave_addr = master->addr; line = sdscatprintf(sdsempty(), "sentinel known-replica %s %s %d", - master->name, slave_addr->ip, slave_addr->port); - rewriteConfigRewriteLine(state,"sentinel",line,1); + master->name, announceSentinelAddr(slave_addr), slave_addr->port); + rewriteConfigRewriteLine(state,"sentinel known-replica",line,1); + /* rewriteConfigMarkAsProcessed is handled after the loop */ } dictReleaseIterator(di2); @@ -1923,8 +2209,9 @@ void rewriteConfigSentinelOption(struct rewriteConfigState *state) { if (ri->runid == NULL) continue; line = sdscatprintf(sdsempty(), "sentinel known-sentinel %s %s %d %s", - master->name, ri->addr->ip, ri->addr->port, ri->runid); - rewriteConfigRewriteLine(state,"sentinel",line,1); + master->name, announceSentinelAddr(ri->addr), ri->addr->port, ri->runid); + rewriteConfigRewriteLine(state,"sentinel known-sentinel",line,1); + /* rewriteConfigMarkAsProcessed is handled after the loop */ } dictReleaseIterator(di2); @@ -1936,7 +2223,8 @@ void rewriteConfigSentinelOption(struct rewriteConfigState *state) { line = sdscatprintf(sdsempty(), "sentinel rename-command %s %s %s", master->name, oldname, newname); - rewriteConfigRewriteLine(state,"sentinel",line,1); + rewriteConfigRewriteLine(state,"sentinel rename-command",line,1); + /* rewriteConfigMarkAsProcessed is handled after the loop */ } dictReleaseIterator(di2); } @@ -1944,36 +2232,62 @@ void rewriteConfigSentinelOption(struct rewriteConfigState *state) { /* sentinel current-epoch is a global state valid for all the masters. */ line = sdscatprintf(sdsempty(), "sentinel current-epoch %llu", (unsigned long long) sentinel.current_epoch); - rewriteConfigRewriteLine(state,"sentinel",line,1); + rewriteConfigRewriteLine(state,"sentinel current-epoch",line,1); /* sentinel announce-ip. */ if (sentinel.announce_ip) { line = sdsnew("sentinel announce-ip "); line = sdscatrepr(line, sentinel.announce_ip, sdslen(sentinel.announce_ip)); - rewriteConfigRewriteLine(state,"sentinel",line,1); + rewriteConfigRewriteLine(state,"sentinel announce-ip",line,1); + } else { + rewriteConfigMarkAsProcessed(state,"sentinel announce-ip"); } /* sentinel announce-port. */ if (sentinel.announce_port) { line = sdscatprintf(sdsempty(),"sentinel announce-port %d", sentinel.announce_port); - rewriteConfigRewriteLine(state,"sentinel",line,1); + rewriteConfigRewriteLine(state,"sentinel announce-port",line,1); + } else { + rewriteConfigMarkAsProcessed(state,"sentinel announce-port"); } /* sentinel sentinel-user. */ if (sentinel.sentinel_auth_user) { line = sdscatprintf(sdsempty(), "sentinel sentinel-user %s", sentinel.sentinel_auth_user); - rewriteConfigRewriteLine(state,"sentinel",line,1); + rewriteConfigRewriteLine(state,"sentinel sentinel-user",line,1); + } else { + rewriteConfigMarkAsProcessed(state,"sentinel sentinel-user"); } /* sentinel sentinel-pass. */ if (sentinel.sentinel_auth_pass) { line = sdscatprintf(sdsempty(), "sentinel sentinel-pass %s", sentinel.sentinel_auth_pass); - rewriteConfigRewriteLine(state,"sentinel",line,1); + rewriteConfigRewriteLine(state,"sentinel sentinel-pass",line,1); + } else { + rewriteConfigMarkAsProcessed(state,"sentinel sentinel-pass"); } - dictReleaseIterator(di); + + /* NOTE: the purpose here is in case due to the state change, the config rewrite + does not handle the configs, however, previously the config was set in the config file, + rewriteConfigMarkAsProcessed should be put here to mark it as processed in order to + delete the old config entry. + */ + rewriteConfigMarkAsProcessed(state,"sentinel monitor"); + rewriteConfigMarkAsProcessed(state,"sentinel down-after-milliseconds"); + rewriteConfigMarkAsProcessed(state,"sentinel failover-timeout"); + rewriteConfigMarkAsProcessed(state,"sentinel parallel-syncs"); + rewriteConfigMarkAsProcessed(state,"sentinel notification-script"); + rewriteConfigMarkAsProcessed(state,"sentinel client-reconfig-script"); + rewriteConfigMarkAsProcessed(state,"sentinel auth-pass"); + rewriteConfigMarkAsProcessed(state,"sentinel auth-user"); + rewriteConfigMarkAsProcessed(state,"sentinel config-epoch"); + rewriteConfigMarkAsProcessed(state,"sentinel leader-epoch"); + rewriteConfigMarkAsProcessed(state,"sentinel known-replica"); + rewriteConfigMarkAsProcessed(state,"sentinel known-sentinel"); + rewriteConfigMarkAsProcessed(state,"sentinel rename-command"); } /* This function uses the config rewriting Redis engine in order to persist @@ -2029,7 +2343,7 @@ void sentinelSendAuthIfNeeded(sentinelRedisInstance *ri, redisAsyncContext *c) { auth_user = ri->master->auth_user; } else if (ri->flags & SRI_SENTINEL) { /* If sentinel_auth_user is NULL, AUTH will use default user - with sentinel_auth_pass to autenticate */ + with sentinel_auth_pass to authenticate */ if (sentinel.sentinel_auth_pass) { auth_pass = sentinel.sentinel_auth_pass; auth_user = sentinel.sentinel_auth_user; @@ -2101,6 +2415,7 @@ void sentinelReconnectInstance(sentinelRedisInstance *ri) { /* Commands connection. */ if (link->cc == NULL) { link->cc = redisAsyncConnectBind(ri->addr->ip,ri->addr->port,NET_FIRST_BIND_ADDR); + if (!link->cc->err) anetCloexec(link->cc->c.fd); if (!link->cc->err && server.tls_replication && (instanceLinkNegotiateTLS(link->cc) == C_ERR)) { sentinelEvent(LL_DEBUG,"-cmd-link-reconnection",ri,"%@ #Failed to initialize TLS"); @@ -2128,6 +2443,7 @@ void sentinelReconnectInstance(sentinelRedisInstance *ri) { /* Pub / Sub */ if ((ri->flags & (SRI_MASTER|SRI_SLAVE)) && link->pc == NULL) { link->pc = redisAsyncConnectBind(ri->addr->ip,ri->addr->port,NET_FIRST_BIND_ADDR); + if (!link->pc->err) anetCloexec(link->pc->c.fd); if (!link->pc->err && server.tls_replication && (instanceLinkNegotiateTLS(link->pc) == C_ERR)) { sentinelEvent(LL_DEBUG,"-pubsub-link-reconnection",ri,"%@ #Failed to initialize TLS"); @@ -2137,7 +2453,6 @@ void sentinelReconnectInstance(sentinelRedisInstance *ri) { instanceLinkCloseConnection(link,link->pc); } else { int retval; - link->pc_conn_time = mstime(); link->pc->data = link; redisAeAttach(server.el,link->pc); @@ -2375,9 +2690,7 @@ void sentinelRefreshInstanceInfo(sentinelRedisInstance *ri, const char *info) { sentinelRedisInstanceNoDownFor(ri,wait_time) && mstime() - ri->role_reported_time > wait_time) { - int retval = sentinelSendSlaveOf(ri, - ri->master->addr->ip, - ri->master->addr->port); + int retval = sentinelSendSlaveOf(ri,ri->master->addr); if (retval == C_OK) sentinelEvent(LL_NOTICE,"+convert-to-slave",ri,"%@"); } @@ -2388,7 +2701,7 @@ void sentinelRefreshInstanceInfo(sentinelRedisInstance *ri, const char *info) { if ((ri->flags & SRI_SLAVE) && role == SRI_SLAVE && (ri->slave_master_port != ri->master->addr->port || - strcasecmp(ri->slave_master_host,ri->master->addr->ip))) + !sentinelAddrEqualsHostname(ri->master->addr, ri->slave_master_host))) { mstime_t wait_time = ri->master->failover_timeout; @@ -2398,9 +2711,7 @@ void sentinelRefreshInstanceInfo(sentinelRedisInstance *ri, const char *info) { sentinelRedisInstanceNoDownFor(ri,wait_time) && mstime() - ri->slave_conf_change_time > wait_time) { - int retval = sentinelSendSlaveOf(ri, - ri->master->addr->ip, - ri->master->addr->port); + int retval = sentinelSendSlaveOf(ri,ri->master->addr); if (retval == C_OK) sentinelEvent(LL_NOTICE,"+fix-slave-config",ri,"%@"); } @@ -2414,8 +2725,8 @@ void sentinelRefreshInstanceInfo(sentinelRedisInstance *ri, const char *info) { /* SRI_RECONF_SENT -> SRI_RECONF_INPROG. */ if ((ri->flags & SRI_RECONF_SENT) && ri->slave_master_host && - strcmp(ri->slave_master_host, - ri->master->promoted_slave->addr->ip) == 0 && + sentinelAddrEqualsHostname(ri->master->promoted_slave->addr, + ri->slave_master_host) && ri->slave_master_port == ri->master->promoted_slave->addr->port) { ri->flags &= ~SRI_RECONF_SENT; @@ -2592,7 +2903,7 @@ void sentinelProcessHelloMessage(char *hello, int hello_len) { if (si && master->config_epoch < master_config_epoch) { master->config_epoch = master_config_epoch; if (master_port != master->addr->port || - strcmp(master->addr->ip, token[5])) + !sentinelAddrEqualsHostname(master->addr, token[5])) { sentinelAddr *old_addr; @@ -2600,7 +2911,7 @@ void sentinelProcessHelloMessage(char *hello, int hello_len) { sentinelEvent(LL_WARNING,"+switch-master", master,"%s %s %d %s %d", master->name, - master->addr->ip, master->addr->port, + announceSentinelAddr(master->addr), master->addr->port, token[5], master_port); old_addr = dupSentinelAddr(master->addr); @@ -2693,7 +3004,7 @@ int sentinelSendHello(sentinelRedisInstance *ri) { announce_ip, announce_port, sentinel.myid, (unsigned long long) sentinel.current_epoch, /* --- */ - master->name,master_addr->ip,master_addr->port, + master->name,announceSentinelAddr(master_addr),master_addr->port, (unsigned long long) master->config_epoch); retval = redisAsyncCommand(ri->link->cc, sentinelPublishReplyCallback, ri, "%s %s %s", @@ -2827,6 +3138,101 @@ void sentinelSendPeriodicCommands(sentinelRedisInstance *ri) { /* =========================== SENTINEL command ============================= */ +/* SENTINEL CONFIG SET <option> */ +void sentinelConfigSetCommand(client *c) { + robj *o = c->argv[3]; + robj *val = c->argv[4]; + long long numval; + int drop_conns = 0; + + if (!strcasecmp(o->ptr, "resolve-hostnames")) { + if ((numval = yesnotoi(val->ptr)) == -1) goto badfmt; + sentinel.resolve_hostnames = numval; + } else if (!strcasecmp(o->ptr, "announce-hostnames")) { + if ((numval = yesnotoi(val->ptr)) == -1) goto badfmt; + sentinel.announce_hostnames = numval; + } else if (!strcasecmp(o->ptr, "announce-ip")) { + if (sentinel.announce_ip) sdsfree(sentinel.announce_ip); + sentinel.announce_ip = sdsnew(val->ptr); + } else if (!strcasecmp(o->ptr, "announce-port")) { + if (getLongLongFromObject(val, &numval) == C_ERR || + numval < 0 || numval > 65535) + goto badfmt; + sentinel.announce_port = numval; + } else if (!strcasecmp(o->ptr, "sentinel-user")) { + sdsfree(sentinel.sentinel_auth_user); + sentinel.sentinel_auth_user = sdsnew(val->ptr); + drop_conns = 1; + } else if (!strcasecmp(o->ptr, "sentinel-pass")) { + sdsfree(sentinel.sentinel_auth_pass); + sentinel.sentinel_auth_pass = sdsnew(val->ptr); + drop_conns = 1; + } else { + addReplyErrorFormat(c, "Invalid argument '%s' to SENTINEL CONFIG SET", + (char *) o->ptr); + return; + } + + sentinelFlushConfig(); + addReply(c, shared.ok); + + /* Drop Sentinel connections to initiate a reconnect if needed. */ + if (drop_conns) + sentinelDropConnections(); + + return; + +badfmt: + addReplyErrorFormat(c, "Invalid value '%s' to SENTINEL CONFIG SET '%s'", + (char *) val->ptr, (char *) o->ptr); +} + +/* SENTINEL CONFIG GET <option> */ +void sentinelConfigGetCommand(client *c) { + robj *o = c->argv[3]; + const char *pattern = o->ptr; + void *replylen = addReplyDeferredLen(c); + int matches = 0; + + if (stringmatch(pattern,"resolve-hostnames",1)) { + addReplyBulkCString(c,"resolve-hostnames"); + addReplyBulkCString(c,sentinel.resolve_hostnames ? "yes" : "no"); + matches++; + } + + if (stringmatch(pattern, "announce-hostnames", 1)) { + addReplyBulkCString(c,"announce-hostnames"); + addReplyBulkCString(c,sentinel.announce_hostnames ? "yes" : "no"); + matches++; + } + + if (stringmatch(pattern, "announce-ip", 1)) { + addReplyBulkCString(c,"announce-ip"); + addReplyBulkCString(c,sentinel.announce_ip ? sentinel.announce_ip : ""); + matches++; + } + + if (stringmatch(pattern, "announce-port", 1)) { + addReplyBulkCString(c, "announce-port"); + addReplyBulkLongLong(c, sentinel.announce_port); + matches++; + } + + if (stringmatch(pattern, "sentinel-user", 1)) { + addReplyBulkCString(c, "sentinel-user"); + addReplyBulkCString(c, sentinel.sentinel_auth_user ? sentinel.sentinel_auth_user : ""); + matches++; + } + + if (stringmatch(pattern, "sentinel-pass", 1)) { + addReplyBulkCString(c, "sentinel-pass"); + addReplyBulkCString(c, sentinel.sentinel_auth_pass ? sentinel.sentinel_auth_pass : ""); + matches++; + } + + setDeferredMapLen(c, replylen, matches); +} + const char *sentinelFailoverStateStr(int state) { switch(state) { case SENTINEL_FAILOVER_STATE_NONE: return "none"; @@ -2853,7 +3259,7 @@ void addReplySentinelRedisInstance(client *c, sentinelRedisInstance *ri) { fields++; addReplyBulkCString(c,"ip"); - addReplyBulkCString(c,ri->addr->ip); + addReplyBulkCString(c,announceSentinelAddr(ri->addr)); fields++; addReplyBulkCString(c,"port"); @@ -3094,6 +3500,10 @@ void sentinelCommand(client *c) { " Check if the current Sentinel configuration is able to reach the quorum", " needed to failover a master and the majority needed to authorize the", " failover.", +"CONFIG SET <param> <value>", +" Set a global Sentinel configuration parameter.", +"CONFIG GET <param>", +" Get global Sentinel configuration parameter.", "GET-MASTER-ADDR-BY-NAME <master-name>", " Return the ip and port number of the master with that name.", "FAILOVER <master-name>", @@ -3235,7 +3645,7 @@ NULL sentinelAddr *addr = sentinelGetCurrentMasterAddress(ri); addReplyArrayLen(c,2); - addReplyBulkCString(c,addr->ip); + addReplyBulkCString(c,announceSentinelAddr(addr)); addReplyBulkLongLong(c,addr->port); } } else if (!strcasecmp(c->argv[1]->ptr,"failover")) { @@ -3280,11 +3690,12 @@ NULL return; } - /* Make sure the IP field is actually a valid IP before passing it - * to createSentinelRedisInstance(), otherwise we may trigger a - * DNS lookup at runtime. */ - if (anetResolveIP(NULL,c->argv[3]->ptr,ip,sizeof(ip)) == ANET_ERR) { - addReplyError(c,"Invalid IP address specified"); + /* If resolve-hostnames is used, actual DNS resolution may take place. + * Otherwise just validate address. + */ + if (anetResolve(NULL,c->argv[3]->ptr,ip,sizeof(ip), + sentinel.resolve_hostnames ? ANET_NONE : ANET_IP_ONLY) == ANET_ERR) { + addReplyError(c, "Invalid IP address or hostname specified"); return; } @@ -3354,6 +3765,14 @@ NULL } else if (!strcasecmp(c->argv[1]->ptr,"set")) { if (c->argc < 3) goto numargserr; sentinelSetCommand(c); + } else if (!strcasecmp(c->argv[1]->ptr,"config")) { + if (c->argc < 3) goto numargserr; + if (!strcasecmp(c->argv[2]->ptr,"set") && c->argc == 5) + sentinelConfigSetCommand(c); + else if (!strcasecmp(c->argv[2]->ptr,"get") && c->argc == 4) + sentinelConfigGetCommand(c); + else + addReplyError(c, "Only SENTINEL CONFIG GET <option> / SET <option> <value> are supported."); } else if (!strcasecmp(c->argv[1]->ptr,"info-cache")) { /* SENTINEL INFO-CACHE <name> */ if (c->argc < 2) goto numargserr; @@ -3517,7 +3936,7 @@ void sentinelInfoCommand(client *c) { "master%d:name=%s,status=%s,address=%s:%d," "slaves=%lu,sentinels=%lu\r\n", master_id++, ri->name, status, - ri->addr->ip, ri->addr->port, + announceSentinelAddr(ri->addr), ri->addr->port, dictSize(ri->slaves), dictSize(ri->sentinels)+1); } @@ -3913,7 +4332,7 @@ void sentinelAskMasterStateToOtherSentinels(sentinelRedisInstance *master, int f sentinelReceiveIsMasterDownReply, ri, "%s is-master-down-by-addr %s %s %llu %s", sentinelInstanceMapCommand(ri,"SENTINEL"), - master->addr->ip, port, + announceSentinelAddr(master->addr), port, sentinel.current_epoch, (master->failover_state > SENTINEL_FAILOVER_STATE_NONE) ? sentinel.myid : "*"); @@ -4067,17 +4486,19 @@ char *sentinelGetLeader(sentinelRedisInstance *master, uint64_t epoch) { * The command returns C_OK if the SLAVEOF command was accepted for * (later) delivery otherwise C_ERR. The command replies are just * discarded. */ -int sentinelSendSlaveOf(sentinelRedisInstance *ri, char *host, int port) { +int sentinelSendSlaveOf(sentinelRedisInstance *ri, const sentinelAddr *addr) { char portstr[32]; + const char *host; int retval; - ll2string(portstr,sizeof(portstr),port); - /* If host is NULL we send SLAVEOF NO ONE that will turn the instance - * into a master. */ - if (host == NULL) { + * into a master. */ + if (!addr) { host = "NO"; memcpy(portstr,"ONE",4); + } else { + host = announceSentinelAddr(addr); + ll2string(portstr,sizeof(portstr),addr->port); } /* In order to send SLAVEOF in a safe way, we send a transaction performing @@ -4362,7 +4783,7 @@ void sentinelFailoverSendSlaveOfNoOne(sentinelRedisInstance *ri) { * We actually register a generic callback for this command as we don't * really care about the reply. We check if it worked indirectly observing * if INFO returns a different role (master instead of slave). */ - retval = sentinelSendSlaveOf(ri->promoted_slave,NULL,0); + retval = sentinelSendSlaveOf(ri->promoted_slave,NULL); if (retval != C_OK) return; sentinelEvent(LL_NOTICE, "+failover-state-wait-promotion", ri->promoted_slave,"%@"); @@ -4432,9 +4853,7 @@ void sentinelFailoverDetectEnd(sentinelRedisInstance *master) { if (slave->flags & (SRI_PROMOTED|SRI_RECONF_DONE|SRI_RECONF_SENT)) continue; if (slave->link->disconnected) continue; - retval = sentinelSendSlaveOf(slave, - master->promoted_slave->addr->ip, - master->promoted_slave->addr->port); + retval = sentinelSendSlaveOf(slave,master->promoted_slave->addr); if (retval == C_OK) { sentinelEvent(LL_NOTICE,"+slave-reconf-sent-be",slave,"%@"); slave->flags |= SRI_RECONF_SENT; @@ -4489,9 +4908,7 @@ void sentinelFailoverReconfNextSlave(sentinelRedisInstance *master) { if (slave->link->disconnected) continue; /* Send SLAVEOF <new master>. */ - retval = sentinelSendSlaveOf(slave, - master->promoted_slave->addr->ip, - master->promoted_slave->addr->port); + retval = sentinelSendSlaveOf(slave,master->promoted_slave->addr); if (retval == C_OK) { slave->flags |= SRI_RECONF_SENT; slave->slave_reconf_sent_time = mstime(); @@ -4513,10 +4930,10 @@ void sentinelFailoverSwitchToPromotedSlave(sentinelRedisInstance *master) { master->promoted_slave : master; sentinelEvent(LL_WARNING,"+switch-master",master,"%s %s %d %s %d", - master->name, master->addr->ip, master->addr->port, - ref->addr->ip, ref->addr->port); + master->name, announceSentinelAddr(master->addr), master->addr->port, + announceSentinelAddr(ref->addr), ref->addr->port); - sentinelResetMasterAndChangeAddress(master,ref->addr->ip,ref->addr->port); + sentinelResetMasterAndChangeAddress(master,ref->addr->hostname,ref->addr->port); } void sentinelFailoverStateMachine(sentinelRedisInstance *ri) { @@ -4673,4 +5090,3 @@ void sentinelTimer(void) { * election because of split brain voting). */ server.hz = CONFIG_DEFAULT_HZ + rand() % CONFIG_DEFAULT_HZ; } - diff --git a/src/server.c b/src/server.c index 0551eb3e4..faaca7215 100644 --- a/src/server.c +++ b/src/server.c @@ -201,6 +201,14 @@ struct redisCommand redisCommandTable[] = { "read-only fast @string", 0,NULL,1,1,1,0,0,0}, + {"getex",getexCommand,-2, + "write fast @string", + 0,NULL,1,1,1,0,0,0}, + + {"getdel",getdelCommand,2, + "write fast @string", + 0,NULL,1,1,1,0,0,0}, + /* Note that we can't flag set as fast, since it may perform an * implicit DEL of a large key. */ {"set",setCommand,-3, @@ -449,15 +457,15 @@ struct redisCommand redisCommandTable[] = { {"zunionstore",zunionstoreCommand,-4, "write use-memory @sortedset", - 0,zunionInterDiffStoreGetKeys,0,0,0,0,0,0}, + 0,zunionInterDiffStoreGetKeys,1,1,1,0,0,0}, {"zinterstore",zinterstoreCommand,-4, "write use-memory @sortedset", - 0,zunionInterDiffStoreGetKeys,0,0,0,0,0,0}, + 0,zunionInterDiffStoreGetKeys,1,1,1,0,0,0}, {"zdiffstore",zdiffstoreCommand,-4, "write use-memory @sortedset", - 0,zunionInterDiffStoreGetKeys,0,0,0,0,0,0}, + 0,zunionInterDiffStoreGetKeys,1,1,1,0,0,0}, {"zunion",zunionCommand,-3, "read-only @sortedset", @@ -547,6 +555,10 @@ struct redisCommand redisCommandTable[] = { "write no-script fast @sortedset @blocking", 0,NULL,1,-2,1,0,0,0}, + {"zrandmember",zrandmemberCommand,-2, + "read-only random @sortedset", + 0,NULL,1,1,1,0,0,0}, + {"hset",hsetCommand,-4, "write use-memory fast @hash", 0,NULL,1,1,1,0,0,0}, @@ -603,6 +615,10 @@ struct redisCommand redisCommandTable[] = { "read-only fast @hash", 0,NULL,1,1,1,0,0,0}, + {"hrandfield",hrandfieldCommand,-2, + "read-only random @hash", + 0,NULL,1,1,1,0,0,0}, + {"hscan",hscanCommand,-3, "read-only random @hash", 0,NULL,1,1,1,0,0,0}, @@ -744,7 +760,7 @@ struct redisCommand redisCommandTable[] = { "admin no-script", 0,NULL,0,0,0,0,0,0}, - {"psync",syncCommand,3, + {"psync",syncCommand,-3, "admin no-script", 0,NULL,0,0,0,0,0,0}, @@ -941,7 +957,7 @@ struct redisCommand redisCommandTable[] = { {"georadius_ro",georadiusroCommand,-6, "read-only @geo", - 0,georadiusGetKeys,1,1,1,0,0,0}, + 0,NULL,1,1,1,0,0,0}, {"georadiusbymember",georadiusbymemberCommand,-5, "write use-memory @geo", @@ -949,7 +965,7 @@ struct redisCommand redisCommandTable[] = { {"georadiusbymember_ro",georadiusbymemberroCommand,-5, "read-only @geo", - 0,georadiusGetKeys,1,1,1,0,0,0}, + 0,NULL,1,1,1,0,0,0}, {"geohash",geohashCommand,-2, "read-only @geo", @@ -1016,11 +1032,11 @@ struct redisCommand redisCommandTable[] = { {"xread",xreadCommand,-4, "read-only @stream @blocking", - 0,xreadGetKeys,1,1,1,0,0,0}, + 0,xreadGetKeys,0,0,0,0,0,0}, {"xreadgroup",xreadCommand,-7, "write @stream @blocking", - 0,xreadGetKeys,1,1,1,0,0,0}, + 0,xreadGetKeys,0,0,0,0,0,0}, {"xgroup",xgroupCommand,-2, "write use-memory @stream", @@ -1084,6 +1100,10 @@ struct redisCommand redisCommandTable[] = { {"reset",resetCommand,1, "no-script ok-stale ok-loading fast @connection", + 0,NULL,0,0,0,0,0,0}, + + {"failover",failoverCommand,-1, + "admin no-script ok-stale", 0,NULL,0,0,0,0,0,0} }; @@ -1444,6 +1464,17 @@ dictType hashDictType = { NULL /* allow to expand */ }; +/* Dict type without destructor */ +dictType sdsReplyDictType = { + dictSdsHash, /* hash function */ + NULL, /* key dup */ + NULL, /* val dup */ + dictSdsKeyCompare, /* key compare */ + NULL, /* key destructor */ + NULL, /* val destructor */ + NULL /* allow to expand */ +}; + /* Keylist hash table type has unencoded redis objects as keys and * lists as values. It's used for blocking operations (BLPOP) and to * map swapped keys to a list of clients waiting for this keys to be loaded. */ @@ -1592,6 +1623,9 @@ void resetChildState() { server.stat_current_cow_bytes = 0; updateDictResizePolicy(); closeChildInfoPipe(); + moduleFireServerEvent(REDISMODULE_EVENT_FORK_CHILD, + REDISMODULE_SUBEVENT_FORK_CHILD_DIED, + NULL); } /* Return if child type is mutual exclusive with other fork children */ @@ -2159,14 +2193,15 @@ int serverCron(struct aeEventLoop *eventLoop, long long id, void *clientData) { /* AOF postponed flush: Try at every cron cycle if the slow fsync * completed. */ - if (server.aof_flush_postponed_start) flushAppendOnlyFile(0); + if (server.aof_state == AOF_ON && server.aof_flush_postponed_start) + flushAppendOnlyFile(0); /* AOF write errors: in this case we have a buffer to flush as well and * clear the AOF error in case of success to make the DB writable again, * however to try every second is enough in case of 'hz' is set to * a higher frequency. */ run_with_period(1000) { - if (server.aof_last_write_status == C_ERR) + if (server.aof_state == AOF_ON && server.aof_last_write_status == C_ERR) flushAppendOnlyFile(0); } @@ -2174,8 +2209,15 @@ int serverCron(struct aeEventLoop *eventLoop, long long id, void *clientData) { checkClientPauseTimeoutAndReturnIfPaused(); /* Replication cron function -- used to reconnect to master, - * detect transfer failures, start background RDB transfers and so forth. */ - run_with_period(1000) replicationCron(); + * detect transfer failures, start background RDB transfers and so forth. + * + * If Redis is trying to failover then run the replication cron faster so + * progress on the handshake happens more quickly. */ + if (server.failover_state != NO_FAILOVER) { + run_with_period(100) replicationCron(); + } else { + run_with_period(1000) replicationCron(); + } /* Run the Redis Cluster cron. */ run_with_period(100) { @@ -2386,12 +2428,18 @@ void beforeSleep(struct aeEventLoop *eventLoop) { server.get_ack_from_slaves = 0; } + /* We may have recieved updates from clients about their current offset. NOTE: + * this can't be done where the ACK is recieved since failover will disconnect + * our clients. */ + updateFailoverStatus(); + /* Send the invalidation messages to clients participating to the * client side caching protocol in broadcasting (BCAST) mode. */ trackingBroadcastInvalidationMessages(); /* Write the AOF buffer on disk */ - flushAppendOnlyFile(0); + if (server.aof_state == AOF_ON) + flushAppendOnlyFile(0); /* Handle writes with pending output buffers. */ handleClientsWithPendingWritesUsingThreads(); @@ -2532,6 +2580,12 @@ void createSharedObjects(void) { /* Used in the LMOVE/BLMOVE commands */ shared.left = createStringObject("left",4); shared.right = createStringObject("right",5); + shared.pexpireat = createStringObject("PEXPIREAT",9); + shared.pexpire = createStringObject("PEXPIRE",7); + shared.persist = createStringObject("PERSIST",7); + shared.set = createStringObject("SET",3); + shared.pxat = createStringObject("PXAT", 4); + shared.px = createStringObject("PX",2); for (j = 0; j < OBJ_SHARED_INTEGERS; j++) { shared.integers[j] = makeObjectShared(createObject(OBJ_STRING,(void*)(long)j)); @@ -2634,6 +2688,13 @@ void initServerConfig(void) { server.repl_backlog_off = 0; server.repl_no_slaves_since = time(NULL); + /* Failover related */ + server.failover_end_time = 0; + server.force_failover = 0; + server.target_replica_host = NULL; + server.target_replica_port = 0; + server.failover_state = NO_FAILOVER; + /* Client output buffer limits */ for (j = 0; j < CLIENT_TYPE_OBUF_COUNT; j++) server.client_obuf_limits[j] = clientBufferLimitsDefaults[j]; @@ -2957,6 +3018,7 @@ int listenToPort(int port, int *fds, int *count) { return C_ERR; } anetNonBlock(NULL,fds[*count]); + anetCloexec(fds[*count]); (*count)++; } return C_OK; @@ -3095,6 +3157,7 @@ void initServer(void) { exit(1); } anetNonBlock(NULL,server.sofd); + anetCloexec(server.sofd); } /* Abort if there are no listening sockets at all. */ @@ -3557,7 +3620,7 @@ void preventCommandReplication(client *c) { */ void call(client *c, int flags) { long long dirty; - ustime_t start, duration; + monotime call_timer; int client_old_flags = c->flags; struct redisCommand *real_cmd = c->cmd; static long long prev_err_count; @@ -3583,9 +3646,10 @@ void call(client *c, int flags) { dirty = server.dirty; prev_err_count = server.stat_total_error_replies; updateCachedTime(0); - start = server.ustime; + elapsedStart(&call_timer); c->cmd->proc(c); - duration = ustime()-start; + const long duration = elapsedUs(call_timer); + c->duration = duration; dirty = server.dirty-dirty; if (dirty < 0) dirty = 0; @@ -3629,7 +3693,10 @@ void call(client *c, int flags) { * arguments. */ robj **argv = c->original_argv ? c->original_argv : c->argv; int argc = c->original_argv ? c->original_argc : c->argc; - slowlogPushEntryIfNeeded(c,argv,argc,duration); + /* If the client is blocked we will handle slowlog when it is unblocked . */ + if (!(c->flags & CLIENT_BLOCKED)) { + slowlogPushEntryIfNeeded(c,argv,argc,duration); + } } freeClientOriginalArgv(c); @@ -4682,7 +4749,7 @@ sds genRedisInfoString(const char *section) { "aof_last_cow_size:%zu\r\n" "module_fork_in_progress:%d\r\n" "module_fork_last_cow_size:%zu\r\n", - server.loading, + (int)server.loading, server.stat_current_cow_bytes, server.dirty, server.child_type == CHILD_TYPE_RDB, @@ -4972,6 +5039,7 @@ sds genRedisInfoString(const char *section) { } } info = sdscatprintf(info, + "master_failover_state:%s\r\n" "master_replid:%s\r\n" "master_replid2:%s\r\n" "master_repl_offset:%lld\r\n" @@ -4980,6 +5048,7 @@ sds genRedisInfoString(const char *section) { "repl_backlog_size:%lld\r\n" "repl_backlog_first_byte_offset:%lld\r\n" "repl_backlog_histlen:%lld\r\n", + getFailoverStateString(), server.replid, server.replid2, server.master_repl_offset, @@ -5184,7 +5253,7 @@ static int smapsGetSharedDirty(unsigned long addr) { FILE *f; f = fopen("/proc/self/smaps", "r"); - serverAssert(f); + if (!f) return -1; while (1) { if (!fgets(buf, sizeof(buf), f)) @@ -5195,8 +5264,8 @@ static int smapsGetSharedDirty(unsigned long addr) { in_mapping = from <= addr && addr < to; if (in_mapping && !memcmp(buf, "Shared_Dirty:", 13)) { - ret = sscanf(buf, "%*s %d", &val); - serverAssert(ret == 1); + sscanf(buf, "%*s %d", &val); + /* If parsing fails, we remain with val == -1 */ break; } } @@ -5210,23 +5279,33 @@ static int smapsGetSharedDirty(unsigned long addr) { * kernel is affected. * The bug was fixed in commit ff1712f953e27f0b0718762ec17d0adb15c9fd0b * titled: "arm64: pgtable: Ensure dirty bit is preserved across pte_wrprotect()" - * Return 1 if the kernel seems to be affected, and 0 otherwise. */ + * Return -1 on unexpected test failure, 1 if the kernel seems to be affected, + * and 0 otherwise. */ int linuxMadvFreeForkBugCheck(void) { - int ret, pipefd[2]; + int ret, pipefd[2] = { -1, -1 }; pid_t pid; - char *p, *q, bug_found = 0; - const long map_size = 3 * 4096; + char *p = NULL, *q; + int bug_found = 0; + long page_size = sysconf(_SC_PAGESIZE); + long map_size = 3 * page_size; /* Create a memory map that's in our full control (not one used by the allocator). */ p = mmap(NULL, map_size, PROT_READ, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); - serverAssert(p != MAP_FAILED); + if (p == MAP_FAILED) { + serverLog(LL_WARNING, "Failed to mmap(): %s", strerror(errno)); + return -1; + } - q = p + 4096; + q = p + page_size; /* Split the memory map in 3 pages by setting their protection as RO|RW|RO to prevent * Linux from merging this memory map with adjacent VMAs. */ - ret = mprotect(q, 4096, PROT_READ | PROT_WRITE); - serverAssert(!ret); + ret = mprotect(q, page_size, PROT_READ | PROT_WRITE); + if (ret < 0) { + serverLog(LL_WARNING, "Failed to mprotect(): %s", strerror(errno)); + bug_found = -1; + goto exit; + } /* Write to the page once to make it resident */ *(volatile char*)q = 0; @@ -5235,8 +5314,16 @@ int linuxMadvFreeForkBugCheck(void) { #ifndef MADV_FREE #define MADV_FREE 8 #endif - ret = madvise(q, 4096, MADV_FREE); - serverAssert(!ret); + ret = madvise(q, page_size, MADV_FREE); + if (ret < 0) { + /* MADV_FREE is not available on older kernels that are presumably + * not affected. */ + if (errno == EINVAL) goto exit; + + serverLog(LL_WARNING, "Failed to madvise(): %s", strerror(errno)); + bug_found = -1; + goto exit; + } /* Write to the page after being marked for freeing, this is supposed to take * ownership of that page again. */ @@ -5244,37 +5331,47 @@ int linuxMadvFreeForkBugCheck(void) { /* Create a pipe for the child to return the info to the parent. */ ret = pipe(pipefd); - serverAssert(!ret); + if (ret < 0) { + serverLog(LL_WARNING, "Failed to create pipe: %s", strerror(errno)); + bug_found = -1; + goto exit; + } /* Fork the process. */ pid = fork(); - serverAssert(pid >= 0); - if (!pid) { - /* Child: check if the page is marked as dirty, expecing 4 (kB). + if (pid < 0) { + serverLog(LL_WARNING, "Failed to fork: %s", strerror(errno)); + bug_found = -1; + goto exit; + } else if (!pid) { + /* Child: check if the page is marked as dirty, page_size in kb. * A value of 0 means the kernel is affected by the bug. */ - if (!smapsGetSharedDirty((unsigned long)q)) + ret = smapsGetSharedDirty((unsigned long) q); + if (!ret) bug_found = 1; + else if (ret == -1) /* Failed to read */ + bug_found = -1; - ret = write(pipefd[1], &bug_found, 1); - serverAssert(ret == 1); - + if (write(pipefd[1], &bug_found, sizeof(bug_found)) < 0) + serverLog(LL_WARNING, "Failed to write to parent: %s", strerror(errno)); exit(0); } else { /* Read the result from the child. */ - ret = read(pipefd[0], &bug_found, 1); - serverAssert(ret == 1); + ret = read(pipefd[0], &bug_found, sizeof(bug_found)); + if (ret < 0) { + serverLog(LL_WARNING, "Failed to read from child: %s", strerror(errno)); + bug_found = -1; + } /* Reap the child pid. */ - serverAssert(waitpid(pid, NULL, 0) == pid); + waitpid(pid, NULL, 0); } +exit: /* Cleanup */ - ret = close(pipefd[0]); - serverAssert(!ret); - ret = close(pipefd[1]); - serverAssert(!ret); - ret = munmap(p, map_size); - serverAssert(!ret); + if (pipefd[0] != -1) close(pipefd[0]); + if (pipefd[1] != -1) close(pipefd[1]); + if (p != NULL) munmap(p, map_size); return bug_found; } @@ -5470,7 +5567,7 @@ void setupChildSignalHandlers(void) { * of the parent process, e.g. fd(socket or flock) etc. * should close the resources not used by the child process, so that if the * parent restarts it can bind/lock despite the child possibly still running. */ -void closeClildUnusedResourceAfterFork() { +void closeChildUnusedResourceAfterFork() { closeListeningSockets(0); if (server.cluster_enabled && server.cluster_config_file_lock_fd != -1) close(server.cluster_config_file_lock_fd); /* don't care if this fails */ @@ -5497,7 +5594,7 @@ int redisFork(int purpose) { server.in_fork_child = purpose; setOOMScoreAdj(CONFIG_OOM_BGCHILD); setupChildSignalHandlers(); - closeClildUnusedResourceAfterFork(); + closeChildUnusedResourceAfterFork(); } else { /* Parent */ server.stat_total_forks++; @@ -5523,6 +5620,9 @@ int redisFork(int purpose) { } updateDictResizePolicy(); + moduleFireServerEvent(REDISMODULE_EVENT_FORK_CHILD, + REDISMODULE_SUBEVENT_FORK_CHILD_BORN, + NULL); } return childpid; } @@ -5533,7 +5633,7 @@ void sendChildCOWInfo(int ptype, int on_exit, char *pname) { if (private_dirty) { serverLog(on_exit ? LL_NOTICE : LL_VERBOSE, "%s: %zu MB of memory used by copy-on-write", - pname, private_dirty); + pname, private_dirty/(1024*1024)); } sendChildInfo(ptype, on_exit, private_dirty); @@ -5598,20 +5698,68 @@ void redisOutOfMemoryHandler(size_t allocation_size) { allocation_size); } -void redisSetProcTitle(char *title) { +/* Callback for sdstemplate on proc-title-template. See redis.conf for + * supported variables. + */ +static sds redisProcTitleGetVariable(const sds varname, void *arg) +{ + if (!strcmp(varname, "title")) { + return sdsnew(arg); + } else if (!strcmp(varname, "listen-addr")) { + if (server.port || server.tls_port) + return sdscatprintf(sdsempty(), "%s:%u", + server.bindaddr_count ? server.bindaddr[0] : "*", + server.port ? server.port : server.tls_port); + else + return sdscatprintf(sdsempty(), "unixsocket:%s", server.unixsocket); + } else if (!strcmp(varname, "server-mode")) { + if (server.cluster_enabled) return sdsnew("[cluster]"); + else if (server.sentinel_mode) return sdsnew("[sentinel]"); + else return sdsempty(); + } else if (!strcmp(varname, "config-file")) { + return sdsnew(server.configfile ? server.configfile : "-"); + } else if (!strcmp(varname, "port")) { + return sdscatprintf(sdsempty(), "%u", server.port); + } else if (!strcmp(varname, "tls-port")) { + return sdscatprintf(sdsempty(), "%u", server.tls_port); + } else if (!strcmp(varname, "unixsocket")) { + return sdsnew(server.unixsocket); + } else + return NULL; /* Unknown variable name */ +} + +/* Expand the specified proc-title-template string and return a newly + * allocated sds, or NULL. */ +static sds expandProcTitleTemplate(const char *template, const char *title) { + sds res = sdstemplate(template, redisProcTitleGetVariable, (void *) title); + if (!res) + return NULL; + return sdstrim(res, " "); +} +/* Validate the specified template, returns 1 if valid or 0 otherwise. */ +int validateProcTitleTemplate(const char *template) { + int ok = 1; + sds res = expandProcTitleTemplate(template, ""); + if (!res) + return 0; + if (sdslen(res) == 0) ok = 0; + sdsfree(res); + return ok; +} + +int redisSetProcTitle(char *title) { #ifdef USE_SETPROCTITLE - char *server_mode = ""; - if (server.cluster_enabled) server_mode = " [cluster]"; - else if (server.sentinel_mode) server_mode = " [sentinel]"; - - setproctitle("%s %s:%d%s", - title, - server.bindaddr_count ? server.bindaddr[0] : "*", - server.port ? server.port : server.tls_port, - server_mode); + if (!title) title = server.exec_argv[0]; + sds proc_title = expandProcTitleTemplate(server.proc_title_template, title); + if (!proc_title) return C_ERR; /* Not likely, proc_title_template is validated */ + + setproctitle("%s", proc_title); + sdsfree(proc_title); #else UNUSED(title); #endif + + return C_OK; } void redisSetCpuAffinity(const char *cpulist) { @@ -5751,6 +5899,12 @@ int main(int argc, char **argv) { init_genrand64(((long long) tv.tv_sec * 1000000 + tv.tv_usec) ^ getpid()); crc64_init(); + /* Store umask value. Because umask(2) only offers a set-and-get API we have + * to reset it and restore it back. We do this early to avoid a potential + * race condition with threads that could be creating files or directories. + */ + umask(server.umask = umask(0777)); + uint8_t hashseed[16]; getRandomBytes(hashseed,sizeof(hashseed)); dictSetHashFunctionSeed(hashseed); @@ -5843,6 +5997,7 @@ int main(int argc, char **argv) { exit(1); } loadServerConfig(server.configfile, config_from_stdin, options); + if (server.sentinel_mode) loadSentinelConfigFromQueue(); sdsfree(options); } @@ -5868,7 +6023,7 @@ int main(int argc, char **argv) { readOOMScoreAdj(); initServer(); if (background || server.pidfile) createPidFile(); - redisSetProcTitle(argv[0]); + if (server.set_proc_title) redisSetProcTitle(NULL); redisAsciiArt(); checkTcpBacklogSettings(); @@ -5878,10 +6033,17 @@ int main(int argc, char **argv) { #ifdef __linux__ linuxMemoryWarnings(); #if defined (__arm64__) - if (linuxMadvFreeForkBugCheck()) { - serverLog(LL_WARNING,"WARNING Your kernel has a bug that could lead to data corruption during background save. Please upgrade to the latest stable kernel."); + int ret; + if ((ret = linuxMadvFreeForkBugCheck())) { + if (ret == 1) + serverLog(LL_WARNING,"WARNING Your kernel has a bug that could lead to data corruption during background save. " + "Please upgrade to the latest stable kernel."); + else + serverLog(LL_WARNING, "Failed to test the kernel for a bug that could lead to data corruption during background save. " + "Your system could be affected, please report this error."); if (!checkIgnoreWarning("ARM64-COW-BUG")) { - serverLog(LL_WARNING,"Redis will now exit to prevent data corruption. Note that it is possible to suppress this warning by setting the following config: ignore-warnings ARM64-COW-BUG"); + serverLog(LL_WARNING,"Redis will now exit to prevent data corruption. " + "Note that it is possible to suppress this warning by setting the following config: ignore-warnings ARM64-COW-BUG"); exit(1); } } diff --git a/src/server.h b/src/server.h index eb967a042..b293afcee 100644 --- a/src/server.h +++ b/src/server.h @@ -115,6 +115,7 @@ typedef long long ustime_t; /* microsecond time type. */ #define NET_ADDR_STR_LEN (NET_IP_STR_LEN+32) /* Must be enough for ip:port */ #define CONFIG_BINDADDR_MAX 16 #define CONFIG_MIN_RESERVED_FDS 32 +#define CONFIG_DEFAULT_PROC_TITLE_TEMPLATE "{title} {listen-addr} {server-mode}" #define ACTIVE_EXPIRE_CYCLE_SLOW 0 #define ACTIVE_EXPIRE_CYCLE_FAST 1 @@ -270,6 +271,8 @@ extern int configOOMScoreAdjValuesDefaults[CONFIG_OOM_COUNT]; #define CLIENT_DENY_BLOCKING (1ULL<<41) /* Indicate that the client should not be blocked. currently, turned on inside MULTI, Lua, RM_Call, and AOF client */ +#define CLIENT_REPL_RDBONLY (1ULL<<42) /* This client is a replica that only wants + RDB without replication buffer. */ /* Client block type (btype field in client structure) * if CLIENT_BLOCKED flag is set. */ @@ -317,6 +320,14 @@ typedef enum { REPL_STATE_CONNECTED, /* Connected to master */ } repl_state; +/* The state of an in progress coordinated failover */ +typedef enum { + NO_FAILOVER = 0, /* No failover in progress */ + FAILOVER_WAIT_FOR_SYNC, /* Waiting for target replica to catch up */ + FAILOVER_IN_PROGRESS /* Waiting for target replica to accept + * PSYNC FAILOVER request. */ +} failover_state; + /* State of slaves from the POV of the master. Used in client->replstate. * In SEND_BULK and ONLINE state the slave receives new updates * in its output queue. In the WAIT_BGSAVE states instead the server is waiting @@ -870,6 +881,7 @@ typedef struct client { size_t sentlen; /* Amount of bytes already sent in the current buffer or object being sent. */ time_t ctime; /* Client creation time. */ + long duration; /* Current command duration. Used for measuring latency of blocking/non-blocking cmds */ time_t lastinteraction; /* Time of the last interaction, used for timeout */ time_t obuf_soft_limit_reached_time; uint64_t flags; /* Client flags: CLIENT_* macros. */ @@ -942,6 +954,19 @@ struct moduleLoadQueueEntry { robj **argv; }; +struct sentinelLoadQueueEntry { + int argc; + sds *argv; + int linenum; + sds line; +}; + +struct sentinelConfig { + list *pre_monitor_cfg; + list *monitor_cfg; + list *post_monitor_cfg; +}; + struct sharedObjectsStruct { robj *crlf, *ok, *err, *emptybulk, *czero, *cone, *pong, *space, *colon, *queued, *null[4], *nullarray[4], *emptymap[4], *emptyset[4], @@ -951,7 +976,8 @@ struct sharedObjectsStruct { *busykeyerr, *oomerr, *plus, *messagebulk, *pmessagebulk, *subscribebulk, *unsubscribebulk, *psubscribebulk, *punsubscribebulk, *del, *unlink, *rpop, *lpop, *lpush, *rpoplpush, *lmove, *blmove, *zpopmin, *zpopmax, - *emptyscan, *multi, *exec, *left, *right, + *emptyscan, *multi, *exec, *left, *right, *persist, *set, *pexpireat, + *pexpire, *pxat, *px, *select[PROTO_SHARED_SELECT_CMDS], *integers[OBJ_SHARED_INTEGERS], *mbulkhdr[OBJ_SHARED_BULKHDR_LEN], /* "*<value>\r\n" */ @@ -1124,6 +1150,7 @@ struct redisServer { int config_hz; /* Configured HZ value. May be different than the actual 'hz' field value if dynamic-hz is enabled. */ + mode_t umask; /* The umask value of the process on startup */ int hz; /* serverCron() calls frequency in hertz */ int in_fork_child; /* indication that this is a fork child */ redisDb *db; @@ -1280,6 +1307,8 @@ struct redisServer { int supervised; /* 1 if supervised, 0 otherwise. */ int supervised_mode; /* See SUPERVISED_* */ int daemonize; /* True if running as a daemon */ + int set_proc_title; /* True if change proc title */ + char *proc_title_template; /* Process title template format */ clientBufferLimitsConfig client_obuf_limits[CLIENT_TYPE_OBUF_COUNT]; /* AOF persistence */ int aof_enabled; /* AOF configuration */ @@ -1530,6 +1559,7 @@ struct redisServer { int lazyfree_lazy_expire; int lazyfree_lazy_server_del; int lazyfree_lazy_user_del; + int lazyfree_lazy_user_flush; /* Latency monitor */ long long latency_monitor_threshold; dict *latency_events; @@ -1554,6 +1584,16 @@ struct redisServer { char *bio_cpulist; /* cpu affinity list of bio thread. */ char *aof_rewrite_cpulist; /* cpu affinity list of aof rewrite process. */ char *bgsave_cpulist; /* cpu affinity list of bgsave process. */ + /* Sentinel config */ + struct sentinelConfig *sentinel_config; /* sentinel config to load at startup time. */ + /* Coordinate failover info */ + mstime_t failover_end_time; /* Deadline for failover command. */ + int force_failover; /* If true then failover will be foreced at the + * deadline, otherwise failover is aborted. */ + char *target_replica_host; /* Failover target host. If null during a + * failover then any replica can be used. */ + int target_replica_port; /* Failover target port */ + int failover_state; /* Failover state */ }; typedef struct pubsubPattern { @@ -1679,6 +1719,7 @@ extern dictType hashDictType; extern dictType replScriptCacheDictType; extern dictType dbExpiresDictType; extern dictType modulesDictType; +extern dictType sdsReplyDictType; /*----------------------------------------------------------------------------- * Functions prototypes @@ -1728,7 +1769,8 @@ void getRandomBytes(unsigned char *p, size_t len); uint64_t crc64(uint64_t crc, const unsigned char *s, uint64_t l); void exitFromChild(int retcode); size_t redisPopcount(void *s, long count); -void redisSetProcTitle(char *title); +int redisSetProcTitle(char *title); +int validateProcTitleTemplate(const char *template); int redisCommunicateSystemd(const char *sd_notify_msg); void redisSetCpuAffinity(const char *cpulist); @@ -1973,6 +2015,10 @@ void feedReplicationBacklog(void *ptr, size_t len); void showLatestBacklog(void); void rdbPipeReadHandler(struct aeEventLoop *eventLoop, int fd, void *clientData, int mask); void rdbPipeWriteHandlerConnRemoved(struct connection *conn); +void clearFailoverState(void); +void updateFailoverStatus(void); +void abortFailover(const char *err); +const char *getFailoverStateString(); /* Generic persistence functions */ void startLoadingFile(FILE* fp, char* filename, int rdbflags); @@ -2042,7 +2088,7 @@ int ACLSetUser(user *u, const char *op, ssize_t oplen); sds ACLDefaultUserFirstPassword(void); uint64_t ACLGetCommandCategoryFlagByName(const char *name); int ACLAppendUserForLoading(sds *argv, int argc, int *argc_err); -char *ACLSetUserStringError(void); +const char *ACLSetUserStringError(void); int ACLLoadConfiguredUsers(void); sds ACLDescribeUser(user *u); void ACLLoadUsersAtStartup(void); @@ -2236,6 +2282,7 @@ void appendServerSaveParams(time_t seconds, int changes); void resetServerSaveParams(void); struct rewriteConfigState; /* Forward declaration to export API. */ void rewriteConfigRewriteLine(struct rewriteConfigState *state, const char *option, sds line, int force); +void rewriteConfigMarkAsProcessed(struct rewriteConfigState *state, const char *option); int rewriteConfig(char *path, int force_all); void initConfigValues(); @@ -2330,7 +2377,9 @@ int clusterSendModuleMessageToTarget(const char *target, uint64_t module_id, uin void initSentinelConfig(void); void initSentinel(void); void sentinelTimer(void); -char *sentinelHandleConfiguration(char **argv, int argc); +const char *sentinelHandleConfiguration(char **argv, int argc); +void queueSentinelConfig(sds *argv, int argc, int linenum, sds line); +void loadSentinelConfigFromQueue(void); void sentinelIsRunning(void); /* redis-check-rdb & aof */ @@ -2344,6 +2393,7 @@ int ldbRemoveChild(pid_t pid); void ldbKillForkedSessions(void); int ldbPendingChildren(void); sds luaCreateFunction(client *c, lua_State *lua, robj *body); +void freeLuaScriptsAsync(dict *lua_scripts); /* Blocked clients */ void processUnblockedClients(void); @@ -2356,6 +2406,7 @@ void disconnectAllBlockedClients(void); void handleClientsBlockedOnKeys(void); void signalKeyAsReady(redisDb *db, robj *key, int type); void blockForKeys(client *c, int btype, robj **keys, int numkeys, mstime_t timeout, robj *target, struct listPos *listpos, streamID *ids); +void updateStatsOnUnblock(client *c, long blocked_us, long reply_us); /* timeout.c -- Blocked clients timeout and connections timeout. */ void addClientToTimeoutTable(client *c); @@ -2403,6 +2454,8 @@ void setnxCommand(client *c); void setexCommand(client *c); void psetexCommand(client *c); void getCommand(client *c); +void getexCommand(client *c); +void getdelCommand(client *c); void delCommand(client *c); void unlinkCommand(client *c); void existsCommand(client *c); @@ -2505,6 +2558,7 @@ void zpopminCommand(client *c); void zpopmaxCommand(client *c); void bzpopminCommand(client *c); void bzpopmaxCommand(client *c); +void zrandmemberCommand(client *c); void multiCommand(client *c); void execCommand(client *c); void discardCommand(client *c); @@ -2538,6 +2592,7 @@ void hvalsCommand(client *c); void hgetallCommand(client *c); void hexistsCommand(client *c); void hscanCommand(client *c); +void hrandfieldCommand(client *c); void configCommand(client *c); void hincrbyCommand(client *c); void hincrbyfloatCommand(client *c); @@ -2607,6 +2662,7 @@ void lolwutCommand(client *c); void aclCommand(client *c); void stralgoCommand(client *c); void resetCommand(client *c); +void failoverCommand(client *c); #if defined(__GNUC__) void *calloc(size_t count, size_t size) __attribute__ ((deprecated)); diff --git a/src/stream.h b/src/stream.h index c7acee719..1f2132365 100644 --- a/src/stream.h +++ b/src/stream.h @@ -108,6 +108,7 @@ size_t streamReplyWithRange(client *c, stream *s, streamID *start, streamID *end void streamIteratorStart(streamIterator *si, stream *s, streamID *start, streamID *end, int rev); int streamIteratorGetID(streamIterator *si, streamID *id, int64_t *numfields); void streamIteratorGetField(streamIterator *si, unsigned char **fieldptr, unsigned char **valueptr, int64_t *fieldlen, int64_t *valuelen); +void streamIteratorRemoveEntry(streamIterator *si, streamID *current); void streamIteratorStop(streamIterator *si); streamCG *streamLookupCG(stream *s, sds groupname); streamConsumer *streamLookupConsumer(streamCG *cg, sds name, int flags, int *created); @@ -121,5 +122,11 @@ int streamDecrID(streamID *id); void streamPropagateConsumerCreation(client *c, robj *key, robj *groupname, sds consumername); robj *streamDup(robj *o); int streamValidateListpackIntegrity(unsigned char *lp, size_t size, int deep); +int streamParseID(const robj *o, streamID *id); +robj *createObjectFromStreamID(streamID *id); +int streamAppendItem(stream *s, robj **argv, int64_t numfields, streamID *added_id, streamID *use_id); +int streamDeleteItem(stream *s, streamID *id); +int64_t streamTrimByLength(stream *s, long long maxlen, int approx); +int64_t streamTrimByID(stream *s, streamID minid, int approx); #endif diff --git a/src/t_hash.c b/src/t_hash.c index 51c7d6758..9f7540a72 100644 --- a/src/t_hash.c +++ b/src/t_hash.c @@ -598,6 +598,42 @@ int hashZiplistValidateIntegrity(unsigned char *zl, size_t size, int deep) { return ret; } +/* Create a new sds string from the ziplist entry. */ +sds hashSdsFromZiplistEntry(ziplistEntry *e) { + return e->sval ? sdsnewlen(e->sval, e->slen) : sdsfromlonglong(e->lval); +} + +/* Reply with bulk string from the ziplist entry. */ +void hashReplyFromZiplistEntry(client *c, ziplistEntry *e) { + if (e->sval) + addReplyBulkCBuffer(c, e->sval, e->slen); + else + addReplyBulkLongLong(c, e->lval); +} + +/* Return random element from a non empty hash. + * 'key' and 'val' will be set to hold the element. + * The memory in them is not to be freed or modified by the caller. + * 'val' can be NULL in which case it's not extracted. */ +void hashTypeRandomElement(robj *hashobj, unsigned long hashsize, ziplistEntry *key, ziplistEntry *val) { + if (hashobj->encoding == OBJ_ENCODING_HT) { + dictEntry *de = dictGetFairRandomKey(hashobj->ptr); + sds s = dictGetKey(de); + key->sval = (unsigned char*)s; + key->slen = sdslen(s); + if (val) { + sds s = dictGetVal(de); + val->sval = (unsigned char*)s; + val->slen = sdslen(s); + } + } else if (hashobj->encoding == OBJ_ENCODING_ZIPLIST) { + ziplistRandomPair(hashobj->ptr, hashsize, key, val); + } else { + serverPanic("Unknown hash encoding"); + } +} + + /*----------------------------------------------------------------------------- * Hash type commands *----------------------------------------------------------------------------*/ @@ -922,3 +958,220 @@ void hscanCommand(client *c) { checkType(c,o,OBJ_HASH)) return; scanGenericCommand(c,o,cursor); } + +/* How many times bigger should be the hash compared to the requested size + * for us to not use the "remove elements" strategy? Read later in the + * implementation for more info. */ +#define HRANDFIELD_SUB_STRATEGY_MUL 3 + +void hrandfieldWithCountCommand(client *c, long l, int withvalues) { + unsigned long count, size; + int uniq = 1; + robj *hash; + + if ((hash = lookupKeyReadOrReply(c,c->argv[1],shared.null[c->resp])) + == NULL || checkType(c,hash,OBJ_HASH)) return; + size = hashTypeLength(hash); + + if(l >= 0) { + count = (unsigned long) l; + } else { + count = -l; + uniq = 0; + } + + /* If count is zero, serve it ASAP to avoid special cases later. */ + if (count == 0) { + addReply(c,shared.emptyarray); + return; + } + + /* CASE 1: The count was negative, so the extraction method is just: + * "return N random elements" sampling the whole set every time. + * This case is trivial and can be served without auxiliary data + * structures. This case is the only one that also needs to return the + * elements in random order. */ + if (!uniq || count == 1) { + if (withvalues && c->resp == 2) + addReplyArrayLen(c, count*2); + else + addReplyArrayLen(c, count); + if (hash->encoding == OBJ_ENCODING_HT) { + sds key, value; + while (count--) { + dictEntry *de = dictGetRandomKey(hash->ptr); + key = dictGetKey(de); + value = dictGetVal(de); + if (withvalues && c->resp > 2) + addReplyArrayLen(c,2); + addReplyBulkCBuffer(c, key, sdslen(key)); + if (withvalues) + addReplyBulkCBuffer(c, value, sdslen(value)); + } + } else if (hash->encoding == OBJ_ENCODING_ZIPLIST) { + ziplistEntry *keys, *vals = NULL; + keys = zmalloc(sizeof(ziplistEntry)*count); + if (withvalues) + vals = zmalloc(sizeof(ziplistEntry)*count); + ziplistRandomPairs(hash->ptr, count, keys, vals); + for (unsigned long i = 0; i < count; i++) { + if (withvalues && c->resp > 2) + addReplyArrayLen(c,2); + if (keys[i].sval) + addReplyBulkCBuffer(c, keys[i].sval, keys[i].slen); + else + addReplyBulkLongLong(c, keys[i].lval); + if (withvalues) { + if (vals[i].sval) + addReplyBulkCBuffer(c, vals[i].sval, vals[i].slen); + else + addReplyBulkLongLong(c, vals[i].lval); + } + } + zfree(keys); + zfree(vals); + } + return; + } + + /* Initiate reply count, RESP3 responds with nested array, RESP2 with flat one. */ + long reply_size = count < size ? count : size; + if (withvalues && c->resp == 2) + addReplyArrayLen(c, reply_size*2); + else + addReplyArrayLen(c, reply_size); + + /* CASE 2: + * The number of requested elements is greater than the number of + * elements inside the hash: simply return the whole hash. */ + if(count >= size) { + hashTypeIterator *hi = hashTypeInitIterator(hash); + while (hashTypeNext(hi) != C_ERR) { + if (withvalues && c->resp > 2) + addReplyArrayLen(c,2); + addHashIteratorCursorToReply(c, hi, OBJ_HASH_KEY); + if (withvalues) + addHashIteratorCursorToReply(c, hi, OBJ_HASH_VALUE); + } + hashTypeReleaseIterator(hi); + return; + } + + /* CASE 3: + * The number of elements inside the hash is not greater than + * HRANDFIELD_SUB_STRATEGY_MUL times the number of requested elements. + * In this case we create a hash from scratch with all the elements, and + * subtract random elements to reach the requested number of elements. + * + * This is done because if the number of requested elements is just + * a bit less than the number of elements in the hash, the natural approach + * used into CASE 4 is highly inefficient. */ + if (count*HRANDFIELD_SUB_STRATEGY_MUL > size) { + dict *d = dictCreate(&sdsReplyDictType, NULL); + hashTypeIterator *hi = hashTypeInitIterator(hash); + + /* Add all the elements into the temporary dictionary. */ + while ((hashTypeNext(hi)) != C_ERR) { + int ret = DICT_ERR; + sds key, value = NULL; + + key = hashTypeCurrentObjectNewSds(hi,OBJ_HASH_KEY); + if (withvalues) + value = hashTypeCurrentObjectNewSds(hi,OBJ_HASH_VALUE); + ret = dictAdd(d, key, value); + + serverAssert(ret == DICT_OK); + } + serverAssert(dictSize(d) == size); + hashTypeReleaseIterator(hi); + + /* Remove random elements to reach the right count. */ + while (size > count) { + dictEntry *de; + de = dictGetRandomKey(d); + dictUnlink(d,dictGetKey(de)); + sdsfree(dictGetKey(de)); + sdsfree(dictGetVal(de)); + dictFreeUnlinkedEntry(d,de); + size--; + } + + /* Reply with what's in the dict and release memory */ + dictIterator *di; + dictEntry *de; + di = dictGetIterator(d); + while ((de = dictNext(di)) != NULL) { + sds key = dictGetKey(de); + sds value = dictGetVal(de); + if (withvalues && c->resp > 2) + addReplyArrayLen(c,2); + addReplyBulkSds(c, key); + if (withvalues) + addReplyBulkSds(c, value); + } + + dictReleaseIterator(di); + dictRelease(d); + } + + /* CASE 4: We have a big hash compared to the requested number of elements. + * In this case we can simply get random elements from the hash and add + * to the temporary hash, trying to eventually get enough unique elements + * to reach the specified count. */ + else { + unsigned long added = 0; + ziplistEntry key, value; + dict *d = dictCreate(&hashDictType, NULL); + while(added < count) { + hashTypeRandomElement(hash, size, &key, withvalues? &value : NULL); + + /* Try to add the object to the dictionary. If it already exists + * free it, otherwise increment the number of objects we have + * in the result dictionary. */ + sds skey = hashSdsFromZiplistEntry(&key); + if (dictAdd(d,skey,NULL) != DICT_OK) { + sdsfree(skey); + continue; + } + added++; + + /* We can reply right away, so that we don't need to store the value in the dict. */ + if (withvalues && c->resp > 2) + addReplyArrayLen(c,2); + hashReplyFromZiplistEntry(c, &key); + if (withvalues) + hashReplyFromZiplistEntry(c, &value); + } + + /* Release memory */ + dictRelease(d); + } +} + +/* HRANDFIELD [<count> WITHVALUES] */ +void hrandfieldCommand(client *c) { + long l; + int withvalues = 0; + robj *hash; + ziplistEntry ele; + + if (c->argc >= 3) { + if (getLongFromObjectOrReply(c,c->argv[2],&l,NULL) != C_OK) return; + if (c->argc > 4 || (c->argc == 4 && strcasecmp(c->argv[3]->ptr,"withvalues"))) { + addReplyErrorObject(c,shared.syntaxerr); + return; + } else if (c->argc == 4) + withvalues = 1; + hrandfieldWithCountCommand(c, l, withvalues); + return; + } + + /* Handle variant without <count> argument. Reply with simple bulk string */ + if ((hash = lookupKeyReadOrReply(c,c->argv[1],shared.null[c->resp]))== NULL || + checkType(c,hash,OBJ_HASH)) { + return; + } + + hashTypeRandomElement(hash,hashTypeLength(hash),&ele,NULL); + hashReplyFromZiplistEntry(c, &ele); +} diff --git a/src/t_set.c b/src/t_set.c index 64bbbd3a0..de0a9f954 100644 --- a/src/t_set.c +++ b/src/t_set.c @@ -690,8 +690,9 @@ void srandmemberWithCountCommand(client *c) { /* CASE 1: The count was negative, so the extraction method is just: * "return N random elements" sampling the whole set every time. * This case is trivial and can be served without auxiliary data - * structures. */ - if (!uniq) { + * structures. This case is the only one that also needs to return the + * elements in random order. */ + if (!uniq || count == 1) { addReplySetLen(c,count); while(count--) { encoding = setTypeRandomElement(set,&ele,&llele); @@ -713,7 +714,7 @@ void srandmemberWithCountCommand(client *c) { } /* For CASE 3 and CASE 4 we need an auxiliary dictionary. */ - d = dictCreate(&objectKeyPointerValueDictType,NULL); + d = dictCreate(&sdsReplyDictType,NULL); /* CASE 3: * The number of elements inside the set is not greater than @@ -729,13 +730,13 @@ void srandmemberWithCountCommand(client *c) { /* Add all the elements into the temporary dictionary. */ si = setTypeInitIterator(set); - while((encoding = setTypeNext(si,&ele,&llele)) != -1) { + while ((encoding = setTypeNext(si,&ele,&llele)) != -1) { int retval = DICT_ERR; if (encoding == OBJ_ENCODING_INTSET) { - retval = dictAdd(d,createStringObjectFromLongLong(llele),NULL); + retval = dictAdd(d,sdsfromlonglong(llele),NULL); } else { - retval = dictAdd(d,createStringObject(ele,sdslen(ele)),NULL); + retval = dictAdd(d,sdsdup(ele),NULL); } serverAssert(retval == DICT_OK); } @@ -743,11 +744,12 @@ void srandmemberWithCountCommand(client *c) { serverAssert(dictSize(d) == size); /* Remove random elements to reach the right count. */ - while(size > count) { + while (size > count) { dictEntry *de; - de = dictGetRandomKey(d); - dictDelete(d,dictGetKey(de)); + dictUnlink(d,dictGetKey(de)); + sdsfree(dictGetKey(de)); + dictFreeUnlinkedEntry(d,de); size--; } } @@ -758,22 +760,22 @@ void srandmemberWithCountCommand(client *c) { * to reach the specified count. */ else { unsigned long added = 0; - robj *objele; + sds sdsele; - while(added < count) { + while (added < count) { encoding = setTypeRandomElement(set,&ele,&llele); if (encoding == OBJ_ENCODING_INTSET) { - objele = createStringObjectFromLongLong(llele); + sdsele = sdsfromlonglong(llele); } else { - objele = createStringObject(ele,sdslen(ele)); + sdsele = sdsdup(ele); } /* Try to add the object to the dictionary. If it already exists * free it, otherwise increment the number of objects we have * in the result dictionary. */ - if (dictAdd(d,objele,NULL) == DICT_OK) + if (dictAdd(d,sdsele,NULL) == DICT_OK) added++; else - decrRefCount(objele); + sdsfree(sdsele); } } @@ -785,12 +787,13 @@ void srandmemberWithCountCommand(client *c) { addReplySetLen(c,count); di = dictGetIterator(d); while((de = dictNext(di)) != NULL) - addReplyBulk(c,dictGetKey(de)); + addReplyBulkSds(c,dictGetKey(de)); dictReleaseIterator(di); dictRelease(d); } } +/* SRANDMEMBER [<count>] */ void srandmemberCommand(client *c) { robj *set; sds ele; @@ -805,6 +808,7 @@ void srandmemberCommand(client *c) { return; } + /* Handle variant without <count> argument. Reply with simple bulk string */ if ((set = lookupKeyReadOrReply(c,c->argv[1],shared.null[c->resp])) == NULL || checkType(c,set,OBJ_SET)) return; diff --git a/src/t_stream.c b/src/t_stream.c index f991765eb..197b7d4f7 100644 --- a/src/t_stream.c +++ b/src/t_stream.c @@ -818,6 +818,28 @@ int64_t streamTrim(stream *s, streamAddTrimArgs *args) { return deleted; } +/* Trims a stream by length. Returns the number of deleted items. */ +int64_t streamTrimByLength(stream *s, long long maxlen, int approx) { + streamAddTrimArgs args = { + .trim_strategy = TRIM_STRATEGY_MAXLEN, + .approx_trim = approx, + .limit = approx ? 100 * server.stream_node_max_entries : 0, + .maxlen = maxlen + }; + return streamTrim(s, &args); +} + +/* Trims a stream by minimum ID. Returns the number of deleted items. */ +int64_t streamTrimByID(stream *s, streamID minid, int approx) { + streamAddTrimArgs args = { + .trim_strategy = TRIM_STRATEGY_MINID, + .approx_trim = approx, + .limit = approx ? 100 * server.stream_node_max_entries : 0, + .minid = minid + }; + return streamTrim(s, &args); +} + /* Parse the arguements of XADD/XTRIM. * * See streamAddTrimArgs for more details about the arguments handled. @@ -1625,7 +1647,7 @@ robj *streamTypeLookupWriteOrCreate(client *c, robj *key, int no_create) { * treated as an invalid ID. * * If 'c' is set to NULL, no reply is sent to the client. */ -int streamGenericParseIDOrReply(client *c, robj *o, streamID *id, uint64_t missing_seq, int strict) { +int streamGenericParseIDOrReply(client *c, const robj *o, streamID *id, uint64_t missing_seq, int strict) { char buf[128]; if (sdslen(o->ptr) > sizeof(buf)-1) goto invalid; memcpy(buf,o->ptr,sdslen(o->ptr)+1); @@ -1661,6 +1683,11 @@ invalid: return C_ERR; } +/* Wrapper for streamGenericParseIDOrReply() used by module API. */ +int streamParseID(const robj *o, streamID *id) { + return streamGenericParseIDOrReply(NULL, o, id, 0, 0); +} + /* Wrapper for streamGenericParseIDOrReply() with 'strict' argument set to * 0, to be used when - and + are acceptable IDs. */ int streamParseIDOrReply(client *c, robj *o, streamID *id, uint64_t missing_seq) { diff --git a/src/t_string.c b/src/t_string.c index 2792f5557..de67484fc 100644 --- a/src/t_string.c +++ b/src/t_string.c @@ -61,13 +61,16 @@ static int checkStringLength(client *c, long long size) { * If ok_reply is NULL "+OK" is used. * If abort_reply is NULL, "$-1" is used. */ -#define OBJ_SET_NO_FLAGS 0 +#define OBJ_NO_FLAGS 0 #define OBJ_SET_NX (1<<0) /* Set if key not exists. */ #define OBJ_SET_XX (1<<1) /* Set if key exists. */ -#define OBJ_SET_EX (1<<2) /* Set if time in seconds is given */ -#define OBJ_SET_PX (1<<3) /* Set if time in ms in given */ -#define OBJ_SET_KEEPTTL (1<<4) /* Set and keep the ttl */ +#define OBJ_EX (1<<2) /* Set if time in seconds is given */ +#define OBJ_PX (1<<3) /* Set if time in ms in given */ +#define OBJ_KEEPTTL (1<<4) /* Set and keep the ttl */ #define OBJ_SET_GET (1<<5) /* Set if want to get key before set */ +#define OBJ_EXAT (1<<6) /* Set if timestamp in second is given */ +#define OBJ_PXAT (1<<7) /* Set if timestamp in ms is given */ +#define OBJ_PERSIST (1<<8) /* Set if we need to remove the ttl */ void setGenericCommand(client *c, int flags, robj *key, robj *val, robj *expire, int unit, robj *ok_reply, robj *abort_reply) { long long milliseconds = 0; /* initialized to avoid any harmness warning */ @@ -93,91 +96,172 @@ void setGenericCommand(client *c, int flags, robj *key, robj *val, robj *expire, if (getGenericCommand(c) == C_ERR) return; } - genericSetKey(c,c->db,key,val,flags & OBJ_SET_KEEPTTL,1); + genericSetKey(c,c->db,key, val,flags & OBJ_KEEPTTL,1); server.dirty++; - if (expire) setExpire(c,c->db,key,mstime()+milliseconds); notifyKeyspaceEvent(NOTIFY_STRING,"set",key,c->db->id); - if (expire) notifyKeyspaceEvent(NOTIFY_GENERIC, - "expire",key,c->db->id); + if (expire) { + robj *exp = shared.pxat; + + if ((flags & OBJ_PX) || (flags & OBJ_EX)) { + setExpire(c,c->db,key,milliseconds + mstime()); + exp = shared.px; + } else { + setExpire(c,c->db,key,milliseconds); + } + notifyKeyspaceEvent(NOTIFY_GENERIC,"expire",key,c->db->id); + + /* Propagate as SET Key Value PXAT millisecond-timestamp if there is EXAT/PXAT or + * propagate as SET Key Value PX millisecond if there is EX/PX flag. + * + * Additionally when we propagate the SET with PX (relative millisecond) we translate + * it again to SET with PXAT for the AOF. + * + * Additional care is required while modifying the argument order. AOF relies on the + * exp argument being at index 3. (see feedAppendOnlyFile) + * */ + robj *millisecondObj = createStringObjectFromLongLong(milliseconds); + rewriteClientCommandVector(c,5,shared.set,key,val,exp,millisecondObj); + decrRefCount(millisecondObj); + } if (!(flags & OBJ_SET_GET)) { addReply(c, ok_reply ? ok_reply : shared.ok); } + + /* Propagate without the GET argument (Isn't needed if we had expire since in that case we completely re-written the command argv) */ + if ((flags & OBJ_SET_GET) && !expire) { + int argc = 0; + int j; + robj **argv = zmalloc((c->argc-1)*sizeof(robj*)); + for (j=0; j < c->argc; j++) { + char *a = c->argv[j]->ptr; + /* Skip GET which may be repeated multiple times. */ + if (j >= 3 && + (a[0] == 'g' || a[0] == 'G') && + (a[1] == 'e' || a[1] == 'E') && + (a[2] == 't' || a[2] == 'T') && a[3] == '\0') + continue; + argv[argc++] = c->argv[j]; + incrRefCount(c->argv[j]); + } + replaceClientCommandVector(c, argc, argv); + } } -/* SET key value [NX] [XX] [KEEPTTL] [GET] [EX <seconds>] [PX <milliseconds>] */ -void setCommand(client *c) { - int j; - robj *expire = NULL; - int unit = UNIT_SECONDS; - int flags = OBJ_SET_NO_FLAGS; +#define COMMAND_GET 0 +#define COMMAND_SET 1 +/* + * The parseExtendedStringArgumentsOrReply() function performs the common validation for extended + * string arguments used in SET and GET command. + * + * Get specific commands - PERSIST/DEL + * Set specific commands - XX/NX/GET + * Common commands - EX/EXAT/PX/PXAT/KEEPTTL + * + * Function takes pointers to client, flags, unit, pointer to pointer of expire obj if needed + * to be determined and command_type which can be COMMAND_GET or COMMAND_SET. + * + * If there are any syntax violations C_ERR is returned else C_OK is returned. + * + * Input flags are updated upon parsing the arguments. Unit and expire are updated if there are any + * EX/EXAT/PX/PXAT arguments. Unit is updated to millisecond if PX/PXAT is set. + */ +int parseExtendedStringArgumentsOrReply(client *c, int *flags, int *unit, robj **expire, int command_type) { - for (j = 3; j < c->argc; j++) { - char *a = c->argv[j]->ptr; + int j = command_type == COMMAND_GET ? 2 : 3; + for (; j < c->argc; j++) { + char *opt = c->argv[j]->ptr; robj *next = (j == c->argc-1) ? NULL : c->argv[j+1]; - if ((a[0] == 'n' || a[0] == 'N') && - (a[1] == 'x' || a[1] == 'X') && a[2] == '\0' && - !(flags & OBJ_SET_XX) && !(flags & OBJ_SET_GET)) + if ((opt[0] == 'n' || opt[0] == 'N') && + (opt[1] == 'x' || opt[1] == 'X') && opt[2] == '\0' && + !(*flags & OBJ_SET_XX) && !(*flags & OBJ_SET_GET) && (command_type == COMMAND_SET)) + { + *flags |= OBJ_SET_NX; + } else if ((opt[0] == 'x' || opt[0] == 'X') && + (opt[1] == 'x' || opt[1] == 'X') && opt[2] == '\0' && + !(*flags & OBJ_SET_NX) && (command_type == COMMAND_SET)) + { + *flags |= OBJ_SET_XX; + } else if ((opt[0] == 'g' || opt[0] == 'G') && + (opt[1] == 'e' || opt[1] == 'E') && + (opt[2] == 't' || opt[2] == 'T') && opt[3] == '\0' && + !(*flags & OBJ_SET_NX) && (command_type == COMMAND_SET)) + { + *flags |= OBJ_SET_GET; + } else if (!strcasecmp(opt, "KEEPTTL") && !(*flags & OBJ_PERSIST) && + !(*flags & OBJ_EX) && !(*flags & OBJ_EXAT) && + !(*flags & OBJ_PX) && !(*flags & OBJ_PXAT) && (command_type == COMMAND_SET)) + { + *flags |= OBJ_KEEPTTL; + } else if (!strcasecmp(opt,"PERSIST") && (command_type == COMMAND_GET) && + !(*flags & OBJ_EX) && !(*flags & OBJ_EXAT) && + !(*flags & OBJ_PX) && !(*flags & OBJ_PXAT) && + !(*flags & OBJ_KEEPTTL)) { - flags |= OBJ_SET_NX; - } else if ((a[0] == 'x' || a[0] == 'X') && - (a[1] == 'x' || a[1] == 'X') && a[2] == '\0' && - !(flags & OBJ_SET_NX)) + *flags |= OBJ_PERSIST; + } else if ((opt[0] == 'e' || opt[0] == 'E') && + (opt[1] == 'x' || opt[1] == 'X') && opt[2] == '\0' && + !(*flags & OBJ_KEEPTTL) && !(*flags & OBJ_PERSIST) && + !(*flags & OBJ_EXAT) && !(*flags & OBJ_PX) && + !(*flags & OBJ_PXAT) && next) { - flags |= OBJ_SET_XX; - } else if ((a[0] == 'g' || a[0] == 'G') && - (a[1] == 'e' || a[1] == 'E') && - (a[2] == 't' || a[2] == 'T') && a[3] == '\0' && - !(flags & OBJ_SET_NX)) { - flags |= OBJ_SET_GET; - } else if (!strcasecmp(c->argv[j]->ptr,"KEEPTTL") && - !(flags & OBJ_SET_EX) && !(flags & OBJ_SET_PX)) + *flags |= OBJ_EX; + *expire = next; + j++; + } else if ((opt[0] == 'p' || opt[0] == 'P') && + (opt[1] == 'x' || opt[1] == 'X') && opt[2] == '\0' && + !(*flags & OBJ_KEEPTTL) && !(*flags & OBJ_PERSIST) && + !(*flags & OBJ_EX) && !(*flags & OBJ_EXAT) && + !(*flags & OBJ_PXAT) && next) { - flags |= OBJ_SET_KEEPTTL; - } else if ((a[0] == 'e' || a[0] == 'E') && - (a[1] == 'x' || a[1] == 'X') && a[2] == '\0' && - !(flags & OBJ_SET_KEEPTTL) && - !(flags & OBJ_SET_PX) && next) + *flags |= OBJ_PX; + *unit = UNIT_MILLISECONDS; + *expire = next; + j++; + } else if ((opt[0] == 'e' || opt[0] == 'E') && + (opt[1] == 'x' || opt[1] == 'X') && + (opt[2] == 'a' || opt[2] == 'A') && + (opt[3] == 't' || opt[3] == 'T') && opt[4] == '\0' && + !(*flags & OBJ_KEEPTTL) && !(*flags & OBJ_PERSIST) && + !(*flags & OBJ_EX) && !(*flags & OBJ_PX) && + !(*flags & OBJ_PXAT) && next) { - flags |= OBJ_SET_EX; - unit = UNIT_SECONDS; - expire = next; + *flags |= OBJ_EXAT; + *expire = next; j++; - } else if ((a[0] == 'p' || a[0] == 'P') && - (a[1] == 'x' || a[1] == 'X') && a[2] == '\0' && - !(flags & OBJ_SET_KEEPTTL) && - !(flags & OBJ_SET_EX) && next) + } else if ((opt[0] == 'p' || opt[0] == 'P') && + (opt[1] == 'x' || opt[1] == 'X') && + (opt[2] == 'a' || opt[2] == 'A') && + (opt[3] == 't' || opt[3] == 'T') && opt[4] == '\0' && + !(*flags & OBJ_KEEPTTL) && !(*flags & OBJ_PERSIST) && + !(*flags & OBJ_EX) && !(*flags & OBJ_EXAT) && + !(*flags & OBJ_PX) && next) { - flags |= OBJ_SET_PX; - unit = UNIT_MILLISECONDS; - expire = next; + *flags |= OBJ_PXAT; + *unit = UNIT_MILLISECONDS; + *expire = next; j++; } else { addReplyErrorObject(c,shared.syntaxerr); - return; + return C_ERR; } } + return C_OK; +} - c->argv[2] = tryObjectEncoding(c->argv[2]); - setGenericCommand(c,flags,c->argv[1],c->argv[2],expire,unit,NULL,NULL); +/* SET key value [NX] [XX] [KEEPTTL] [GET] [EX <seconds>] [PX <milliseconds>] + * [EXAT <seconds-timestamp>][PXAT <milliseconds-timestamp>] */ +void setCommand(client *c) { + robj *expire = NULL; + int unit = UNIT_SECONDS; + int flags = OBJ_NO_FLAGS; - /* Propagate without the GET argument */ - if (flags & OBJ_SET_GET) { - int argc = 0; - robj **argv = zmalloc((c->argc-1)*sizeof(robj*)); - for (j=0; j < c->argc; j++) { - char *a = c->argv[j]->ptr; - /* Skip GET which may be repeated multiple times. */ - if (j >= 3 && - (a[0] == 'g' || a[0] == 'G') && - (a[1] == 'e' || a[1] == 'E') && - (a[2] == 't' || a[2] == 'T') && a[3] == '\0') - continue; - argv[argc++] = c->argv[j]; - incrRefCount(c->argv[j]); - } - replaceClientCommandVector(c, argc, argv); + if (parseExtendedStringArgumentsOrReply(c,&flags,&unit,&expire,COMMAND_SET) != C_OK) { + return; } + + c->argv[2] = tryObjectEncoding(c->argv[2]); + setGenericCommand(c,flags,c->argv[1],c->argv[2],expire,unit,NULL,NULL); } void setnxCommand(client *c) { @@ -187,12 +271,12 @@ void setnxCommand(client *c) { void setexCommand(client *c) { c->argv[3] = tryObjectEncoding(c->argv[3]); - setGenericCommand(c,OBJ_SET_NO_FLAGS,c->argv[1],c->argv[3],c->argv[2],UNIT_SECONDS,NULL,NULL); + setGenericCommand(c,OBJ_EX,c->argv[1],c->argv[3],c->argv[2],UNIT_SECONDS,NULL,NULL); } void psetexCommand(client *c) { c->argv[3] = tryObjectEncoding(c->argv[3]); - setGenericCommand(c,OBJ_SET_NO_FLAGS,c->argv[1],c->argv[3],c->argv[2],UNIT_MILLISECONDS,NULL,NULL); + setGenericCommand(c,OBJ_PX,c->argv[1],c->argv[3],c->argv[2],UNIT_MILLISECONDS,NULL,NULL); } int getGenericCommand(client *c) { @@ -213,6 +297,112 @@ void getCommand(client *c) { getGenericCommand(c); } +/* + * GETEX <key> [PERSIST][EX seconds][PX milliseconds][EXAT seconds-timestamp][PXAT milliseconds-timestamp] + * + * The getexCommand() function implements extended options and variants of the GET command. Unlike GET + * command this command is not read-only. + * + * The default behavior when no options are specified is same as GET and does not alter any TTL. + * + * Only one of the below options can be used at a given time. + * + * 1. PERSIST removes any TTL associated with the key. + * 2. EX Set expiry TTL in seconds. + * 3. PX Set expiry TTL in milliseconds. + * 4. EXAT Same like EX instead of specifying the number of seconds representing the TTL + * (time to live), it takes an absolute Unix timestamp + * 5. PXAT Same like PX instead of specifying the number of milliseconds representing the TTL + * (time to live), it takes an absolute Unix timestamp + * + * Command would either return the bulk string, error or nil. + */ +void getexCommand(client *c) { + robj *expire = NULL; + int unit = UNIT_SECONDS; + int flags = OBJ_NO_FLAGS; + + if (parseExtendedStringArgumentsOrReply(c,&flags,&unit,&expire,COMMAND_GET) != C_OK) { + return; + } + + robj *o; + + if ((o = lookupKeyReadOrReply(c,c->argv[1],shared.null[c->resp])) == NULL) + return; + + if (checkType(c,o,OBJ_STRING)) { + return; + } + + long long milliseconds = 0; + + /* Validate the expiration time value first */ + if (expire) { + if (getLongLongFromObjectOrReply(c, expire, &milliseconds, NULL) != C_OK) + return; + if (milliseconds <= 0) { + addReplyErrorFormat(c,"invalid expire time in %s",c->cmd->name); + return; + } + if (unit == UNIT_SECONDS) milliseconds *= 1000; + } + + /* We need to do this before we expire the key or delete it */ + addReplyBulk(c,o); + + /* This command is never propagated as is. It is either propagated as PEXPIRE[AT],DEL,UNLINK or PERSIST. + * This why it doesn't need special handling in feedAppendOnlyFile to convert relative expire time to absolute one. */ + if (((flags & OBJ_PXAT) || (flags & OBJ_EXAT)) && checkAlreadyExpired(milliseconds)) { + /* When PXAT/EXAT absolute timestamp is specified, there can be a chance that timestamp + * has already elapsed so delete the key in that case. */ + int deleted = server.lazyfree_lazy_expire ? dbAsyncDelete(c->db, c->argv[1]) : + dbSyncDelete(c->db, c->argv[1]); + serverAssert(deleted); + robj *aux = server.lazyfree_lazy_expire ? shared.unlink : shared.del; + rewriteClientCommandVector(c,2,aux,c->argv[1]); + signalModifiedKey(c, c->db, c->argv[1]); + notifyKeyspaceEvent(NOTIFY_GENERIC, "del", c->argv[1], c->db->id); + server.dirty++; + } else if (expire) { + robj *exp = shared.pexpireat; + if ((flags & OBJ_PX) || (flags & OBJ_EX)) { + setExpire(c,c->db,c->argv[1],milliseconds + mstime()); + exp = shared.pexpire; + } else { + setExpire(c,c->db,c->argv[1],milliseconds); + } + + robj* millisecondObj = createStringObjectFromLongLong(milliseconds); + rewriteClientCommandVector(c,3,exp,c->argv[1],millisecondObj); + decrRefCount(millisecondObj); + signalModifiedKey(c, c->db, c->argv[1]); + notifyKeyspaceEvent(NOTIFY_GENERIC,"expire",c->argv[1],c->db->id); + server.dirty++; + } else if (flags & OBJ_PERSIST) { + if (removeExpire(c->db, c->argv[1])) { + signalModifiedKey(c, c->db, c->argv[1]); + rewriteClientCommandVector(c, 2, shared.persist, c->argv[1]); + notifyKeyspaceEvent(NOTIFY_GENERIC,"persist",c->argv[1],c->db->id); + server.dirty++; + } + } +} + +void getdelCommand(client *c) { + if (getGenericCommand(c) == C_ERR) return; + int deleted = server.lazyfree_lazy_user_del ? dbAsyncDelete(c->db, c->argv[1]) : + dbSyncDelete(c->db, c->argv[1]); + if (deleted) { + /* Propagate as DEL/UNLINK command */ + robj *aux = server.lazyfree_lazy_user_del ? shared.unlink : shared.del; + rewriteClientCommandVector(c,2,aux,c->argv[1]); + signalModifiedKey(c, c->db, c->argv[1]); + notifyKeyspaceEvent(NOTIFY_GENERIC, "del", c->argv[1], c->db->id); + server.dirty++; + } +} + void getsetCommand(client *c) { if (getGenericCommand(c) == C_ERR) return; c->argv[2] = tryObjectEncoding(c->argv[2]); @@ -221,9 +411,7 @@ void getsetCommand(client *c) { server.dirty++; /* Propagate as SET command */ - robj *setcmd = createStringObject("SET",3); - rewriteClientCommandArgument(c,0,setcmd); - decrRefCount(setcmd); + rewriteClientCommandArgument(c,0,shared.set); } void setrangeCommand(client *c) { @@ -443,7 +631,7 @@ void decrbyCommand(client *c) { void incrbyfloatCommand(client *c) { long double incr, value; - robj *o, *new, *aux1, *aux2; + robj *o, *new, *aux; o = lookupKeyWrite(c->db,c->argv[1]); if (checkType(c,o,OBJ_STRING)) return; @@ -469,13 +657,11 @@ void incrbyfloatCommand(client *c) { /* Always replicate INCRBYFLOAT as a SET command with the final value * in order to make sure that differences in float precision or formatting * will not create differences in replicas or after an AOF restart. */ - aux1 = createStringObject("SET",3); - rewriteClientCommandArgument(c,0,aux1); - decrRefCount(aux1); + rewriteClientCommandArgument(c,0,shared.set); rewriteClientCommandArgument(c,2,new); - aux2 = createStringObject("KEEPTTL",7); - rewriteClientCommandArgument(c,3,aux2); - decrRefCount(aux2); + aux = createStringObject("KEEPTTL",7); + rewriteClientCommandArgument(c,3,aux); + decrRefCount(aux); } void appendCommand(client *c) { diff --git a/src/t_zset.c b/src/t_zset.c index 3d63c41c6..b55fc169e 100644 --- a/src/t_zset.c +++ b/src/t_zset.c @@ -721,20 +721,26 @@ zskiplistNode *zslLastInLexRange(zskiplist *zsl, zlexrangespec *range) { * Ziplist-backed sorted set API *----------------------------------------------------------------------------*/ +double zzlStrtod(unsigned char *vstr, unsigned int vlen) { + char buf[128]; + if (vlen > sizeof(buf)) + vlen = sizeof(buf); + memcpy(buf,vstr,vlen); + buf[vlen] = '\0'; + return strtod(buf,NULL); + } + double zzlGetScore(unsigned char *sptr) { unsigned char *vstr; unsigned int vlen; long long vlong; - char buf[128]; double score; serverAssert(sptr != NULL); serverAssert(ziplistGet(sptr,&vstr,&vlen,&vlong)); if (vstr) { - memcpy(buf,vstr,vlen); - buf[vlen] = '\0'; - score = strtod(buf,NULL); + score = zzlStrtod(vstr,vlen); } else { score = vlong; } @@ -1653,6 +1659,48 @@ int zsetZiplistValidateIntegrity(unsigned char *zl, size_t size, int deep) { return ret; } +/* Create a new sds string from the ziplist entry. */ +sds zsetSdsFromZiplistEntry(ziplistEntry *e) { + return e->sval ? sdsnewlen(e->sval, e->slen) : sdsfromlonglong(e->lval); +} + +/* Reply with bulk string from the ziplist entry. */ +void zsetReplyFromZiplistEntry(client *c, ziplistEntry *e) { + if (e->sval) + addReplyBulkCBuffer(c, e->sval, e->slen); + else + addReplyBulkLongLong(c, e->lval); +} + + +/* Return random element from a non empty zset. + * 'key' and 'val' will be set to hold the element. + * The memory in `key` is not to be freed or modified by the caller. + * 'score' can be NULL in which case it's not extracted. */ +void zsetTypeRandomElement(robj *zsetobj, unsigned long zsetsize, ziplistEntry *key, double *score) { + if (zsetobj->encoding == OBJ_ENCODING_SKIPLIST) { + zset *zs = zsetobj->ptr; + dictEntry *de = dictGetFairRandomKey(zs->dict); + sds s = dictGetKey(de); + key->sval = (unsigned char*)s; + key->slen = sdslen(s); + if (score) + *score = *(double*)dictGetVal(de); + } else if (zsetobj->encoding == OBJ_ENCODING_ZIPLIST) { + ziplistEntry val; + ziplistRandomPair(zsetobj->ptr, zsetsize, key, &val); + if (score) { + if (val.sval) { + *score = zzlStrtod(val.sval,val.slen); + } else { + *score = (double)val.lval; + } + } + } else { + serverPanic("Unknown zset encoding"); + } +} + /*----------------------------------------------------------------------------- * Sorted set commands *----------------------------------------------------------------------------*/ @@ -2543,7 +2591,9 @@ void zunionInterDiffGenericCommand(client *c, robj *dstkey, int numkeysIndex, in /* read keys to be used for input */ src = zcalloc(sizeof(zsetopsrc) * setnum); for (i = 0, j = numkeysIndex+1; i < setnum; i++, j++) { - robj *obj = lookupKeyWrite(c->db,c->argv[j]); + robj *obj = dstkey ? + lookupKeyWrite(c->db,c->argv[j]) : + lookupKeyRead(c->db,c->argv[j]); if (obj != NULL) { if (obj->type != OBJ_ZSET && obj->type != OBJ_SET) { zfree(src); @@ -2749,6 +2799,9 @@ void zunionInterDiffGenericCommand(client *c, robj *dstkey, int numkeysIndex, in unsigned long length = dstzset->zsl->length; zskiplist *zsl = dstzset->zsl; zskiplistNode *zn = zsl->header->level[0].forward; + /* In case of WITHSCORES, respond with a single array in RESP2, and + * nested arrays in RESP3. We can't use a map response type since the + * client library needs to know to respect the order. */ if (withscores && c->resp == 2) addReplyArrayLen(c, length*2); else @@ -2866,6 +2919,9 @@ static void zrangeResultEmitLongLongToClient(zrange_result_handler *handler, static void zrangeResultFinalizeClient(zrange_result_handler *handler, size_t result_count) { + /* In case of WITHSCORES, respond with a single array in RESP2, and + * nested arrays in RESP3. We can't use a map response type since the + * client library needs to know to respect the order. */ if (handler->withscores && (handler->client->resp == 2)) { result_count *= 2; } @@ -3071,8 +3127,8 @@ void zrevrangeCommand(client *c) { /* This command implements ZRANGEBYSCORE, ZREVRANGEBYSCORE. */ void genericZrangebyscoreCommand(zrange_result_handler *handler, - zrangespec *range, robj *zobj, int withscores, long offset, - long limit, int reverse) { + zrangespec *range, robj *zobj, long offset, long limit, + int reverse) { client *c = handler->client; unsigned long rangelen = 0; @@ -3172,8 +3228,7 @@ void genericZrangebyscoreCommand(zrange_result_handler *handler, } rangelen++; - handler->emitResultFromCBuffer(handler, ln->ele, sdslen(ln->ele), - ((withscores) ? ln->score : ln->score)); + handler->emitResultFromCBuffer(handler, ln->ele, sdslen(ln->ele), ln->score); /* Move to next node */ if (reverse) { @@ -3605,11 +3660,16 @@ void zrangeGenericCommand(zrange_result_handler *handler, int argc_start, int st } /* Step 3: Lookup the key and get the range. */ - if (((zobj = lookupKeyReadOrReply(c, key, shared.emptyarray)) == NULL) - || checkType(c, zobj, OBJ_ZSET)) { + zobj = handler->dstkey ? + lookupKeyWrite(c->db,key) : + lookupKeyRead(c->db,key); + if (zobj == NULL) { + addReply(c,shared.emptyarray); goto cleanup; } + if (checkType(c,zobj,OBJ_ZSET)) goto cleanup; + /* Step 4: Pass this to the command-specific handler. */ switch (rangetype) { case ZRANGE_AUTO: @@ -3619,8 +3679,8 @@ void zrangeGenericCommand(zrange_result_handler *handler, int argc_start, int st break; case ZRANGE_SCORE: - genericZrangebyscoreCommand(handler, &range, zobj, opt_withscores || store, - opt_offset, opt_limit, direction == ZRANGE_DIRECTION_REVERSE); + genericZrangebyscoreCommand(handler, &range, zobj, opt_offset, + opt_limit, direction == ZRANGE_DIRECTION_REVERSE); break; case ZRANGE_LEX: @@ -3895,3 +3955,216 @@ void bzpopminCommand(client *c) { void bzpopmaxCommand(client *c) { blockingGenericZpopCommand(c,ZSET_MAX); } + +/* How many times bigger should be the zset compared to the requested size + * for us to not use the "remove elements" strategy? Read later in the + * implementation for more info. */ +#define ZRANDMEMBER_SUB_STRATEGY_MUL 3 + +void zrandmemberWithCountCommand(client *c, long l, int withscores) { + unsigned long count, size; + int uniq = 1; + robj *zsetobj; + + if ((zsetobj = lookupKeyReadOrReply(c, c->argv[1], shared.null[c->resp])) + == NULL || checkType(c, zsetobj, OBJ_ZSET)) return; + size = zsetLength(zsetobj); + + if(l >= 0) { + count = (unsigned long) l; + } else { + count = -l; + uniq = 0; + } + + /* If count is zero, serve it ASAP to avoid special cases later. */ + if (count == 0) { + addReply(c,shared.emptyarray); + return; + } + + /* CASE 1: The count was negative, so the extraction method is just: + * "return N random elements" sampling the whole set every time. + * This case is trivial and can be served without auxiliary data + * structures. This case is the only one that also needs to return the + * elements in random order. */ + if (!uniq || count == 1) { + if (withscores && c->resp == 2) + addReplyArrayLen(c, count*2); + else + addReplyArrayLen(c, count); + if (zsetobj->encoding == OBJ_ENCODING_SKIPLIST) { + zset *zs = zsetobj->ptr; + while (count--) { + dictEntry *de = dictGetFairRandomKey(zs->dict); + sds key = dictGetKey(de); + if (withscores && c->resp > 2) + addReplyArrayLen(c,2); + addReplyBulkCBuffer(c, key, sdslen(key)); + if (withscores) + addReplyDouble(c, dictGetDoubleVal(de)); + } + } else if (zsetobj->encoding == OBJ_ENCODING_ZIPLIST) { + ziplistEntry *keys, *vals = NULL; + keys = zmalloc(sizeof(ziplistEntry)*count); + if (withscores) + vals = zmalloc(sizeof(ziplistEntry)*count); + ziplistRandomPairs(zsetobj->ptr, count, keys, vals); + for (unsigned long i = 0; i < count; i++) { + if (withscores && c->resp > 2) + addReplyArrayLen(c,2); + if (keys[i].sval) + addReplyBulkCBuffer(c, keys[i].sval, keys[i].slen); + else + addReplyBulkLongLong(c, keys[i].lval); + if (withscores) { + if (vals[i].sval) { + addReplyDouble(c, zzlStrtod(vals[i].sval,vals[i].slen)); + } else + addReplyDouble(c, vals[i].lval); + } + } + zfree(keys); + zfree(vals); + } + return; + } + + zsetopsrc src; + zsetopval zval; + src.subject = zsetobj; + src.type = zsetobj->type; + src.encoding = zsetobj->encoding; + zuiInitIterator(&src); + memset(&zval, 0, sizeof(zval)); + + /* Initiate reply count, RESP3 responds with nested array, RESP2 with flat one. */ + long reply_size = count < size ? count : size; + if (withscores && c->resp == 2) + addReplyArrayLen(c, reply_size*2); + else + addReplyArrayLen(c, reply_size); + + /* CASE 2: + * The number of requested elements is greater than the number of + * elements inside the zset: simply return the whole zset. */ + if (count >= size) { + while (zuiNext(&src, &zval)) { + if (withscores && c->resp > 2) + addReplyArrayLen(c,2); + addReplyBulkSds(c, zuiNewSdsFromValue(&zval)); + if (withscores) + addReplyDouble(c, zval.score); + } + return; + } + + /* CASE 3: + * The number of elements inside the zset is not greater than + * ZRANDMEMBER_SUB_STRATEGY_MUL times the number of requested elements. + * In this case we create a dict from scratch with all the elements, and + * subtract random elements to reach the requested number of elements. + * + * This is done because if the number of requested elements is just + * a bit less than the number of elements in the set, the natural approach + * used into CASE 4 is highly inefficient. */ + if (count*ZRANDMEMBER_SUB_STRATEGY_MUL > size) { + dict *d = dictCreate(&sdsReplyDictType, NULL); + /* Add all the elements into the temporary dictionary. */ + while (zuiNext(&src, &zval)) { + sds key = zuiNewSdsFromValue(&zval); + dictEntry *de = dictAddRaw(d, key, NULL); + serverAssert(de); + if (withscores) + dictSetDoubleVal(de, zval.score); + } + serverAssert(dictSize(d) == size); + + /* Remove random elements to reach the right count. */ + while (size > count) { + dictEntry *de; + de = dictGetRandomKey(d); + dictUnlink(d,dictGetKey(de)); + sdsfree(dictGetKey(de)); + dictFreeUnlinkedEntry(d,de); + size--; + } + + /* Reply with what's in the dict and release memory */ + dictIterator *di; + dictEntry *de; + di = dictGetIterator(d); + while ((de = dictNext(di)) != NULL) { + if (withscores && c->resp > 2) + addReplyArrayLen(c,2); + addReplyBulkSds(c, dictGetKey(de)); + if (withscores) + addReplyDouble(c, dictGetDoubleVal(de)); + } + + dictReleaseIterator(di); + dictRelease(d); + } + + /* CASE 4: We have a big zset compared to the requested number of elements. + * In this case we can simply get random elements from the zset and add + * to the temporary set, trying to eventually get enough unique elements + * to reach the specified count. */ + else { + unsigned long added = 0; + dict *d = dictCreate(&hashDictType, NULL); + + while (added < count) { + ziplistEntry key; + double score; + zsetTypeRandomElement(zsetobj, size, &key, withscores ? &score: NULL); + + /* Try to add the object to the dictionary. If it already exists + * free it, otherwise increment the number of objects we have + * in the result dictionary. */ + sds skey = zsetSdsFromZiplistEntry(&key); + if (dictAdd(d,skey,NULL) != DICT_OK) { + sdsfree(skey); + continue; + } + added++; + + if (withscores && c->resp > 2) + addReplyArrayLen(c,2); + zsetReplyFromZiplistEntry(c, &key); + if (withscores) + addReplyDouble(c, score); + } + + /* Release memory */ + dictRelease(d); + } +} + +/* ZRANDMEMBER [<count> WITHSCORES] */ +void zrandmemberCommand(client *c) { + long l; + int withscores = 0; + robj *zset; + ziplistEntry ele; + + if (c->argc >= 3) { + if (getLongFromObjectOrReply(c,c->argv[2],&l,NULL) != C_OK) return; + if (c->argc > 4 || (c->argc == 4 && strcasecmp(c->argv[3]->ptr,"withscores"))) { + addReplyErrorObject(c,shared.syntaxerr); + return; + } else if (c->argc == 4) + withscores = 1; + zrandmemberWithCountCommand(c, l, withscores); + return; + } + + /* Handle variant without <count> argument. Reply with simple bulk string */ + if ((zset = lookupKeyReadOrReply(c,c->argv[1],shared.null[c->resp]))== NULL || + checkType(c,zset,OBJ_ZSET)) { + return; + } + + zsetTypeRandomElement(zset, zsetLength(zset), &ele,NULL); + zsetReplyFromZiplistEntry(c,&ele); +} diff --git a/src/util.c b/src/util.c index eca212e57..3243fa51e 100644 --- a/src/util.c +++ b/src/util.c @@ -749,7 +749,7 @@ sds getAbsolutePath(char *filename) { * Gets the proper timezone in a more portable fashion * i.e timezone variables are linux specific. */ -unsigned long getTimeZone(void) { +long getTimeZone(void) { #if defined(__linux__) || defined(__sun) return timezone; #else @@ -758,7 +758,7 @@ unsigned long getTimeZone(void) { gettimeofday(&tv, &tz); - return tz.tz_minuteswest * 60UL; + return tz.tz_minuteswest * 60L; #endif } diff --git a/src/util.h b/src/util.h index e9ad0ee4d..feaa82924 100644 --- a/src/util.h +++ b/src/util.h @@ -60,7 +60,7 @@ int string2d(const char *s, size_t slen, double *dp); int d2string(char *buf, size_t len, double value); int ld2string(char *buf, size_t len, long double value, ld2string_mode mode); sds getAbsolutePath(char *filename); -unsigned long getTimeZone(void); +long getTimeZone(void); int pathIsBaseName(char *path); #ifdef REDIS_TEST diff --git a/src/version.h b/src/version.h index d408dd3e8..1c8c1f2a6 100644 --- a/src/version.h +++ b/src/version.h @@ -1,2 +1,2 @@ -#define REDIS_VERSION "6.1.241" -#define REDIS_VERSION_NUM 0x000601f1 +#define REDIS_VERSION "6.1.242" +#define REDIS_VERSION_NUM 0x000601f2 diff --git a/src/ziplist.c b/src/ziplist.c index a4f38c5e8..0cd20630a 100644 --- a/src/ziplist.c +++ b/src/ziplist.c @@ -1498,6 +1498,89 @@ int ziplistValidateIntegrity(unsigned char *zl, size_t size, int deep, return 1; } +/* Randomly select a pair of key and value. + * total_count is a pre-computed length/2 of the ziplist (to avoid calls to ziplistLen) + * 'key' and 'val' are used to store the result key value pair. + * 'val' can be NULL if the value is not needed. */ +void ziplistRandomPair(unsigned char *zl, unsigned long total_count, ziplistEntry *key, ziplistEntry *val) { + int ret; + unsigned char *p; + + /* Avoid div by zero on corrupt ziplist */ + assert(total_count); + + /* Generate even numbers, because ziplist saved K-V pair */ + int r = (rand() % total_count) * 2; + p = ziplistIndex(zl, r); + ret = ziplistGet(p, &key->sval, &key->slen, &key->lval); + assert(ret != 0); + + if (!val) + return; + p = ziplistNext(zl, p); + ret = ziplistGet(p, &val->sval, &val->slen, &val->lval); + assert(ret != 0); +} + +/* int compare for qsort */ +int intCompare(const void *a, const void *b) { + return (*(int *) a - *(int *) b); +} + +/* Helper method to store a string into from val or lval into dest */ +static inline void ziplistSaveValue(unsigned char *val, unsigned int len, long long lval, ziplistEntry *dest) { + dest->sval = val; + dest->slen = len; + dest->lval = lval; +} + +/* Randomly select unique count of key value pairs and store into 'keys' and + * 'vals' args. The order of the picked entries is random. + * The 'vals' arg can be NULL in which case we skip these. */ +void ziplistRandomPairs(unsigned char *zl, int count, ziplistEntry *keys, ziplistEntry *vals) { + unsigned char *p, *key, *value; + unsigned int klen, vlen; + long long klval, vlval; + typedef struct { + int index; + int order; + } rand_pick; + rand_pick *picks = zmalloc(sizeof(rand_pick)*count); + unsigned long total_size = ziplistLen(zl)/2; + + /* Avoid div by zero on corrupt ziplist */ + assert(total_size); + + /* create a pool of random indexes (some may be duplicate). */ + for (int i = 0; i < count; i++) { + picks[i].index = (rand() % total_size) * 2; /* Generate even indexes */ + /* keep track of the order we picked them */ + picks[i].order = i; + } + + /* sort by indexes. */ + qsort(picks, count, sizeof(rand_pick), intCompare); + + /* fetch the elements form the ziplist into a output array respecting the original order. */ + int zipindex = 0, pickindex = 0; + p = ziplistIndex(zl, 0); + while (ziplistGet(p, &key, &klen, &klval) && pickindex < count) { + p = ziplistNext(zl, p); + ziplistGet(p, &value, &vlen, &vlval); + while (pickindex < count && zipindex == picks[pickindex].index) { + int storeorder = picks[pickindex].order; + ziplistSaveValue(key, klen, klval, &keys[storeorder]); + if (vals) + ziplistSaveValue(value, vlen, vlval, &vals[storeorder]); + pickindex++; + } + zipindex += 2; + p = ziplistNext(zl, p); + } + + zfree(picks); +} + #ifdef REDIS_TEST #include <sys/time.h> #include "adlist.h" diff --git a/src/ziplist.h b/src/ziplist.h index 5153951dc..5fb8fd46a 100644 --- a/src/ziplist.h +++ b/src/ziplist.h @@ -34,6 +34,15 @@ #define ZIPLIST_HEAD 0 #define ZIPLIST_TAIL 1 +/* Each entry in the ziplist is either a string or an integer. */ +typedef struct { + /* When string is used, it is provided with the length (slen). */ + unsigned char *sval; + unsigned int slen; + /* When integer is used, 'sval' is NULL, and lval holds the value. */ + long long lval; +} ziplistEntry; + unsigned char *ziplistNew(void); unsigned char *ziplistMerge(unsigned char **first, unsigned char **second); unsigned char *ziplistPush(unsigned char *zl, unsigned char *s, unsigned int slen, int where); @@ -52,6 +61,8 @@ void ziplistRepr(unsigned char *zl); typedef int (*ziplistValidateEntryCB)(unsigned char* p, void* userdata); int ziplistValidateIntegrity(unsigned char *zl, size_t size, int deep, ziplistValidateEntryCB entry_cb, void *cb_userdata); +void ziplistRandomPair(unsigned char *zl, unsigned long total_count, ziplistEntry *key, ziplistEntry *val); +void ziplistRandomPairs(unsigned char *zl, int count, ziplistEntry *keys, ziplistEntry *vals); #ifdef REDIS_TEST int ziplistTest(int argc, char *argv[]); diff --git a/tests/cluster/tests/18-cluster-nodes-slots.tcl b/tests/cluster/tests/18-cluster-nodes-slots.tcl new file mode 100644 index 000000000..ca0b3ce0d --- /dev/null +++ b/tests/cluster/tests/18-cluster-nodes-slots.tcl @@ -0,0 +1,62 @@ +# Optimize CLUSTER NODES command by generating all nodes slot topology firstly + +source "../tests/includes/init-tests.tcl" + +proc cluster_allocate_with_continuous_slots {n} { + set slot 16383 + set avg [expr ($slot+1) / $n] + while {$slot >= 0} { + set node [expr $slot/$avg >= $n ? $n-1 : $slot/$avg] + lappend slots_$node $slot + incr slot -1 + } + for {set j 0} {$j < $n} {incr j} { + R $j cluster addslots {*}[set slots_${j}] + } +} + +proc cluster_create_with_continuous_slots {masters slaves} { + cluster_allocate_with_continuous_slots $masters + if {$slaves} { + cluster_allocate_slaves $masters $slaves + } + assert_cluster_state ok +} + +test "Create a 2 nodes cluster" { + cluster_create_with_continuous_slots 2 2 +} + +test "Cluster should start ok" { + assert_cluster_state ok +} + +set master1 [Rn 0] +set master2 [Rn 1] + +test "Continuous slots distribution" { + assert_match "* 0-8191*" [$master1 CLUSTER NODES] + assert_match "* 8192-16383*" [$master2 CLUSTER NODES] + + $master1 CLUSTER DELSLOTS 4096 + assert_match "* 0-4095 4097-8191*" [$master1 CLUSTER NODES] + + $master2 CLUSTER DELSLOTS 12288 + assert_match "* 8192-12287 12289-16383*" [$master2 CLUSTER NODES] +} + +test "Discontinuous slots distribution" { + # Remove middle slots + $master1 CLUSTER DELSLOTS 4092 4094 + assert_match "* 0-4091 4093 4095 4097-8191*" [$master1 CLUSTER NODES] + $master2 CLUSTER DELSLOTS 12284 12286 + assert_match "* 8192-12283 12285 12287 12289-16383*" [$master2 CLUSTER NODES] + + # Remove head slots + $master1 CLUSTER DELSLOTS 0 2 + assert_match "* 1 3-4091 4093 4095 4097-8191*" [$master1 CLUSTER NODES] + + # Remove tail slots + $master2 CLUSTER DELSLOTS 16380 16382 16383 + assert_match "* 8192-12283 12285 12287 12289-16379 16381*" [$master2 CLUSTER NODES] +} diff --git a/tests/instances.tcl b/tests/instances.tcl index a9cc01008..8cb616ae8 100644 --- a/tests/instances.tcl +++ b/tests/instances.tcl @@ -24,9 +24,11 @@ set ::simulate_error 0 set ::failed 0 set ::sentinel_instances {} set ::redis_instances {} +set ::global_config {} set ::sentinel_base_port 20000 set ::redis_base_port 30000 set ::redis_port_count 1024 +set ::host "127.0.0.1" set ::pids {} ; # We kill everything at exit set ::dirs {} ; # We remove all the temp dirs at exit set ::run_matching {} ; # If non empty, only tests matching pattern are run. @@ -58,10 +60,9 @@ proc exec_instance {type dirname cfgfile} { } # Spawn a redis or sentinel instance, depending on 'type'. -proc spawn_instance {type base_port count {conf {}}} { +proc spawn_instance {type base_port count {conf {}} {base_conf_file ""}} { for {set j 0} {$j < $count} {incr j} { set port [find_available_port $base_port $::redis_port_count] - # Create a directory for this instance. set dirname "${type}_${j}" lappend ::dirs $dirname @@ -70,7 +71,13 @@ proc spawn_instance {type base_port count {conf {}}} { # Write the instance config file. set cfgfile [file join $dirname $type.conf] - set cfg [open $cfgfile w] + if {$base_conf_file ne ""} { + file copy -- $base_conf_file $cfgfile + set cfg [open $cfgfile a+] + } else { + set cfg [open $cfgfile w] + } + if {$::tls} { puts $cfg "tls-port $port" puts $cfg "tls-replication yes" @@ -92,6 +99,9 @@ proc spawn_instance {type base_port count {conf {}}} { foreach directive $conf { puts $cfg $directive } + dict for {name val} $::global_config { + puts $cfg "$name $val" + } close $cfg # Finally exec it and remember the pid for later cleanup. @@ -119,18 +129,18 @@ proc spawn_instance {type base_port count {conf {}}} { } # Check availability finally - if {[server_is_up 127.0.0.1 $port 100] == 0} { + if {[server_is_up $::host $port 100] == 0} { set logfile [file join $dirname log.txt] puts [exec tail $logfile] abort_sentinel_test "Problems starting $type #$j: ping timeout, maybe server start failed, check $logfile" } # Push the instance into the right list - set link [redis 127.0.0.1 $port 0 $::tls] + set link [redis $::host $port 0 $::tls] $link reconnect 1 lappend ::${type}_instances [list \ pid $pid \ - host 127.0.0.1 \ + host $::host \ port $port \ link $link \ ] @@ -232,6 +242,9 @@ proc parse_options {} { set ::simulate_error 1 } elseif {$opt eq {--valgrind}} { set ::valgrind 1 + } elseif {$opt eq {--host}} { + incr j + set ::host ${val} } elseif {$opt eq {--tls}} { package require tls 1.6 ::tls::init \ @@ -239,6 +252,10 @@ proc parse_options {} { -certfile "$::tlsdir/client.crt" \ -keyfile "$::tlsdir/client.key" set ::tls 1 + } elseif {$opt eq {--config}} { + set val2 [lindex $::argv [expr $j+2]] + dict set ::global_config $val $val2 + incr j 2 } elseif {$opt eq "--help"} { puts "--single <pattern> Only runs tests specified by pattern." puts "--dont-clean Keep log files on exit." @@ -246,6 +263,8 @@ proc parse_options {} { puts "--fail Simulate a test failure." puts "--valgrind Run with valgrind." puts "--tls Run tests in TLS mode." + puts "--host <host> Use hostname instead of 127.0.0.1." + puts "--config <k> <v> Extra config argument(s)." puts "--help Shows this help." exit 0 } else { @@ -391,6 +410,11 @@ proc check_leaks instance_types { # Execute all the units inside the 'tests' directory. proc run_tests {} { + set sentinel_fd_leaks_file "sentinel_fd_leaks" + if { [file exists $sentinel_fd_leaks_file] } { + file delete $sentinel_fd_leaks_file + } + set tests [lsort [glob ../tests/*]] foreach test $tests { if {$::run_matching ne {} && [string match $::run_matching $test] == 0} { @@ -405,7 +429,15 @@ proc run_tests {} { # Print a message and exists with 0 / 1 according to zero or more failures. proc end_tests {} { - if {$::failed == 0} { + set sentinel_fd_leaks_file "sentinel_fd_leaks" + if { [file exists $sentinel_fd_leaks_file] } { + # temporarily disabling this error from failing the tests until leaks are fixed. + #puts [colorstr red "WARNING: sentinel test(s) failed, there are leaked fds in sentinel:"] + #puts [exec cat $sentinel_fd_leaks_file] + #exit 1 + } + + if {$::failed == 0 } { puts "GOOD! No errors." exit 0 } else { diff --git a/tests/integration/aof.tcl b/tests/integration/aof.tcl index d81521374..e64e2022a 100644 --- a/tests/integration/aof.tcl +++ b/tests/integration/aof.tcl @@ -272,4 +272,15 @@ tags {"aof"} { } } } + + start_server {overrides {appendonly {yes} appendfilename {appendonly.aof}}} { + test {GETEX should not append to AOF} { + set aof [file join [lindex [r config get dir] 1] appendonly.aof] + r set foo bar + set before [file size $aof] + r getex foo + set after [file size $aof] + assert_equal $before $after + } + } } diff --git a/tests/integration/corrupt-dump.tcl b/tests/integration/corrupt-dump.tcl index cc597bb4d..f5079e5ed 100644 --- a/tests/integration/corrupt-dump.tcl +++ b/tests/integration/corrupt-dump.tcl @@ -507,5 +507,16 @@ test {corrupt payload: fuzzer findings - valgrind invalid read} { } } +test {corrupt payload: fuzzer findings - HRANDFIELD on bad ziplist} { + start_server [list overrides [list loglevel verbose use-exit-on-panic yes crash-memcheck-enabled no] ] { + r config set sanitize-dump-payload yes + r debug set-skip-checksum-validation 1 + r RESTORE _int 0 "\x04\xC0\x01\x09\x00\xF6\x8A\xB6\x7A\x85\x87\x72\x4D" + catch {r HRANDFIELD _int} + assert_equal [count_log_message 0 "crashed by signal"] 0 + assert_equal [count_log_message 0 "ASSERTION FAILED"] 1 + } +} + } ;# tags diff --git a/tests/integration/failover.tcl b/tests/integration/failover.tcl new file mode 100644 index 000000000..c6818700d --- /dev/null +++ b/tests/integration/failover.tcl @@ -0,0 +1,290 @@ +start_server {tags {"failover"}} { +start_server {} { +start_server {} { + set node_0 [srv 0 client] + set node_0_host [srv 0 host] + set node_0_port [srv 0 port] + set node_0_pid [srv 0 pid] + + set node_1 [srv -1 client] + set node_1_host [srv -1 host] + set node_1_port [srv -1 port] + set node_1_pid [srv -1 pid] + + set node_2 [srv -2 client] + set node_2_host [srv -2 host] + set node_2_port [srv -2 port] + set node_2_pid [srv -2 pid] + + proc assert_digests_match {n1 n2 n3} { + assert_equal [$n1 debug digest] [$n2 debug digest] + assert_equal [$n2 debug digest] [$n3 debug digest] + } + + test {failover command fails without connected replica} { + catch { $node_0 failover to $node_1_host $node_1_port } err + if {! [string match "ERR*" $err]} { + fail "failover command succeeded when replica not connected" + } + } + + test {setup replication for following tests} { + $node_1 replicaof $node_0_host $node_0_port + $node_2 replicaof $node_0_host $node_0_port + wait_for_sync $node_1 + wait_for_sync $node_2 + } + + test {failover command fails with invalid host} { + catch { $node_0 failover to invalidhost $node_1_port } err + assert_match "ERR*" $err + } + + test {failover command fails with invalid port} { + catch { $node_0 failover to $node_1_host invalidport } err + assert_match "ERR*" $err + } + + test {failover command fails with just force and timeout} { + catch { $node_0 FAILOVER FORCE TIMEOUT 100} err + assert_match "ERR*" $err + } + + test {failover command fails when sent to a replica} { + catch { $node_1 failover to $node_1_host $node_1_port } err + assert_match "ERR*" $err + } + + test {failover command fails with force without timeout} { + catch { $node_0 failover to $node_1_host $node_1_port FORCE } err + assert_match "ERR*" $err + } + + test {failover command to specific replica works} { + set initial_psyncs [s -1 sync_partial_ok] + set initial_syncs [s -1 sync_full] + + # Generate a delta between primary and replica + set load_handler [start_write_load $node_0_host $node_0_port 5] + exec kill -SIGSTOP [srv -1 pid] + wait_for_condition 50 100 { + [s 0 total_commands_processed] > 100 + } else { + fail "Node 0 did not accept writes" + } + exec kill -SIGCONT [srv -1 pid] + + # Execute the failover + $node_0 failover to $node_1_host $node_1_port + + # Wait for failover to end + wait_for_condition 50 100 { + [s 0 master_failover_state] == "no-failover" + } else { + fail "Failover from node 0 to node 1 did not finish" + } + stop_write_load $load_handler + $node_2 replicaof $node_1_host $node_1_port + wait_for_sync $node_0 + wait_for_sync $node_2 + + assert_match *slave* [$node_0 role] + assert_match *master* [$node_1 role] + assert_match *slave* [$node_2 role] + + # We should accept psyncs from both nodes + assert_equal [expr [s -1 sync_partial_ok] - $initial_psyncs] 2 + assert_equal [expr [s -1 sync_full] - $initial_psyncs] 0 + assert_digests_match $node_0 $node_1 $node_2 + } + + test {failover command to any replica works} { + set initial_psyncs [s -2 sync_partial_ok] + set initial_syncs [s -2 sync_full] + + wait_for_ofs_sync $node_1 $node_2 + # We stop node 0 to and make sure node 2 is selected + exec kill -SIGSTOP $node_0_pid + $node_1 set CASE 1 + $node_1 FAILOVER + + # Wait for failover to end + wait_for_condition 50 100 { + [s -1 master_failover_state] == "no-failover" + } else { + fail "Failover from node 1 to node 2 did not finish" + } + exec kill -SIGCONT $node_0_pid + $node_0 replicaof $node_2_host $node_2_port + + wait_for_sync $node_0 + wait_for_sync $node_1 + + assert_match *slave* [$node_0 role] + assert_match *slave* [$node_1 role] + assert_match *master* [$node_2 role] + + # We should accept Psyncs from both nodes + assert_equal [expr [s -2 sync_partial_ok] - $initial_psyncs] 2 + assert_equal [expr [s -1 sync_full] - $initial_psyncs] 0 + assert_digests_match $node_0 $node_1 $node_2 + } + + test {failover to a replica with force works} { + set initial_psyncs [s 0 sync_partial_ok] + set initial_syncs [s 0 sync_full] + + exec kill -SIGSTOP $node_0_pid + # node 0 will never acknowledge this write + $node_2 set case 2 + $node_2 failover to $node_0_host $node_0_port TIMEOUT 100 FORCE + + # Wait for node 0 to give up on sync attempt and start failover + wait_for_condition 50 100 { + [s -2 master_failover_state] == "failover-in-progress" + } else { + fail "Failover from node 2 to node 0 did not timeout" + } + + # Quick check that everyone is a replica, we never want a + # state where there are two masters. + assert_match *slave* [$node_1 role] + assert_match *slave* [$node_2 role] + + exec kill -SIGCONT $node_0_pid + + # Wait for failover to end + wait_for_condition 50 100 { + [s -2 master_failover_state] == "no-failover" + } else { + fail "Failover from node 2 to node 0 did not finish" + } + $node_1 replicaof $node_0_host $node_0_port + + wait_for_sync $node_1 + wait_for_sync $node_2 + + assert_match *master* [$node_0 role] + assert_match *slave* [$node_1 role] + assert_match *slave* [$node_2 role] + + assert_equal [count_log_message -2 "time out exceeded, failing over."] 1 + + # We should accept both psyncs, although this is the condition we might not + # since we didn't catch up. + assert_equal [expr [s 0 sync_partial_ok] - $initial_psyncs] 2 + assert_equal [expr [s 0 sync_full] - $initial_syncs] 0 + assert_digests_match $node_0 $node_1 $node_2 + } + + test {failover with timeout aborts if replica never catches up} { + set initial_psyncs [s 0 sync_partial_ok] + set initial_syncs [s 0 sync_full] + + # Stop replica so it never catches up + exec kill -SIGSTOP [srv -1 pid] + $node_0 SET CASE 1 + + $node_0 failover to [srv -1 host] [srv -1 port] TIMEOUT 500 + # Wait for failover to end + wait_for_condition 50 20 { + [s 0 master_failover_state] == "no-failover" + } else { + fail "Failover from node_0 to replica did not finish" + } + + exec kill -SIGCONT [srv -1 pid] + + # We need to make sure the nodes actually sync back up + wait_for_ofs_sync $node_0 $node_1 + wait_for_ofs_sync $node_0 $node_2 + + assert_match *master* [$node_0 role] + assert_match *slave* [$node_1 role] + assert_match *slave* [$node_2 role] + + # Since we never caught up, there should be no syncs + assert_equal [expr [s 0 sync_partial_ok] - $initial_psyncs] 0 + assert_equal [expr [s 0 sync_full] - $initial_syncs] 0 + assert_digests_match $node_0 $node_1 $node_2 + } + + test {failovers can be aborted} { + set initial_psyncs [s 0 sync_partial_ok] + set initial_syncs [s 0 sync_full] + + # Stop replica so it never catches up + exec kill -SIGSTOP [srv -1 pid] + $node_0 SET CASE 2 + + $node_0 failover to [srv -1 host] [srv -1 port] TIMEOUT 60000 + assert_match [s 0 master_failover_state] "waiting-for-sync" + + # Sanity check that read commands are still accepted + $node_0 GET CASE + + $node_0 failover abort + assert_match [s 0 master_failover_state] "no-failover" + + exec kill -SIGCONT [srv -1 pid] + + # Just make sure everything is still synced + wait_for_ofs_sync $node_0 $node_1 + wait_for_ofs_sync $node_0 $node_2 + + assert_match *master* [$node_0 role] + assert_match *slave* [$node_1 role] + assert_match *slave* [$node_2 role] + + # Since we never caught up, there should be no syncs + assert_equal [expr [s 0 sync_partial_ok] - $initial_psyncs] 0 + assert_equal [expr [s 0 sync_full] - $initial_syncs] 0 + assert_digests_match $node_0 $node_1 $node_2 + } + + test {failover aborts if target rejects sync request} { + set initial_psyncs [s 0 sync_partial_ok] + set initial_syncs [s 0 sync_full] + + # We block psync, so the failover will fail + $node_1 acl setuser default -psync + + # We pause the target long enough to send a write command + # during the pause. This write will not be interrupted. + exec kill -SIGSTOP [srv -1 pid] + set rd [redis_deferring_client] + $rd SET FOO BAR + $node_0 failover to $node_1_host $node_1_port + exec kill -SIGCONT [srv -1 pid] + + # Wait for failover to end + wait_for_condition 50 100 { + [s 0 master_failover_state] == "no-failover" + } else { + fail "Failover from node_0 to replica did not finish" + } + + assert_equal [$rd read] "OK" + $rd close + + # restore access to psync + $node_1 acl setuser default +psync + + # We need to make sure the nodes actually sync back up + wait_for_sync $node_1 + wait_for_sync $node_2 + + assert_match *master* [$node_0 role] + assert_match *slave* [$node_1 role] + assert_match *slave* [$node_2 role] + + # We will cycle all of our replicas here and force a psync. + assert_equal [expr [s 0 sync_partial_ok] - $initial_psyncs] 2 + assert_equal [expr [s 0 sync_full] - $initial_syncs] 0 + + assert_equal [count_log_message 0 "Failover target rejected psync request"] 1 + assert_digests_match $node_0 $node_1 $node_2 + } +} +} +} diff --git a/tests/integration/rdb.tcl b/tests/integration/rdb.tcl index 99495b2b7..a89221197 100644 --- a/tests/integration/rdb.tcl +++ b/tests/integration/rdb.tcl @@ -1,3 +1,5 @@ +tags {"rdb"} { + set server_path [tmpdir "server.rdb-encoding-test"] # Copy RDB with different encodings in server path @@ -289,3 +291,5 @@ start_server {overrides {save ""}} { } } } ;# system_name + +} ;# tags diff --git a/tests/integration/redis-benchmark.tcl b/tests/integration/redis-benchmark.tcl index 5a4f09952..3684d7c3b 100644 --- a/tests/integration/redis-benchmark.tcl +++ b/tests/integration/redis-benchmark.tcl @@ -5,7 +5,7 @@ proc cmdstat {cmd} { return [cmdrstat $cmd r] } -start_server {tags {"benchmark"}} { +start_server {tags {"benchmark network"}} { start_server {} { set master_host [srv 0 host] set master_port [srv 0 port] diff --git a/tests/integration/replication-4.tcl b/tests/integration/replication-4.tcl index 8071c4f97..c867001b8 100644 --- a/tests/integration/replication-4.tcl +++ b/tests/integration/replication-4.tcl @@ -1,4 +1,4 @@ -start_server {tags {"repl"}} { +start_server {tags {"repl network"}} { start_server {} { set master [srv -1 client] diff --git a/tests/integration/replication.tcl b/tests/integration/replication.tcl index 6c437ba71..8d09c68c1 100644 --- a/tests/integration/replication.tcl +++ b/tests/integration/replication.tcl @@ -5,7 +5,7 @@ proc log_file_matches {log pattern} { string match $pattern $content } -start_server {tags {"repl"}} { +start_server {tags {"repl network"}} { set slave [srv 0 client] set slave_host [srv 0 host] set slave_port [srv 0 port] diff --git a/tests/modules/Makefile b/tests/modules/Makefile index 7363c98bc..93b4b022f 100644 --- a/tests/modules/Makefile +++ b/tests/modules/Makefile @@ -19,6 +19,7 @@ TEST_MODULES = \ misc.so \ hooks.so \ blockonkeys.so \ + blockonbackground.so \ scan.so \ datatype.so \ auth.so \ @@ -27,7 +28,8 @@ TEST_MODULES = \ getkeys.so \ test_lazyfree.so \ timer.so \ - defragtest.so + defragtest.so \ + stream.so .PHONY: all diff --git a/tests/modules/blockonbackground.c b/tests/modules/blockonbackground.c new file mode 100644 index 000000000..cf7e9c7c1 --- /dev/null +++ b/tests/modules/blockonbackground.c @@ -0,0 +1,220 @@ +#define REDISMODULE_EXPERIMENTAL_API +#include "redismodule.h" +#include <stdio.h> +#include <stdlib.h> +#include <pthread.h> +#include <time.h> +#include "assert.h" + +#define UNUSED(x) (void)(x) + +/* Reply callback for blocking command BLOCK.DEBUG */ +int HelloBlock_Reply(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) { + UNUSED(argv); + UNUSED(argc); + int *myint = RedisModule_GetBlockedClientPrivateData(ctx); + return RedisModule_ReplyWithLongLong(ctx,*myint); +} + +/* Timeout callback for blocking command BLOCK.DEBUG */ +int HelloBlock_Timeout(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) { + UNUSED(argv); + UNUSED(argc); + RedisModuleBlockedClient *bc = RedisModule_GetBlockedClientHandle(ctx); + assert(RedisModule_BlockedClientMeasureTimeEnd(bc)==REDISMODULE_OK); + return RedisModule_ReplyWithSimpleString(ctx,"Request timedout"); +} + +/* Private data freeing callback for BLOCK.DEBUG command. */ +void HelloBlock_FreeData(RedisModuleCtx *ctx, void *privdata) { + UNUSED(ctx); + RedisModule_Free(privdata); +} + +/* The thread entry point that actually executes the blocking part + * of the command BLOCK.DEBUG. */ +void *BlockDebug_ThreadMain(void *arg) { + void **targ = arg; + RedisModuleBlockedClient *bc = targ[0]; + long long delay = (unsigned long)targ[1]; + long long enable_time_track = (unsigned long)targ[2]; + if (enable_time_track) + assert(RedisModule_BlockedClientMeasureTimeStart(bc)==REDISMODULE_OK); + RedisModule_Free(targ); + + struct timespec ts; + ts.tv_sec = delay / 1000; + ts.tv_nsec = (delay % 1000) * 1000000; + nanosleep(&ts, NULL); + int *r = RedisModule_Alloc(sizeof(int)); + *r = rand(); + if (enable_time_track) + assert(RedisModule_BlockedClientMeasureTimeEnd(bc)==REDISMODULE_OK); + RedisModule_UnblockClient(bc,r); + return NULL; +} + +/* The thread entry point that actually executes the blocking part + * of the command BLOCK.DEBUG. */ +void *DoubleBlock_ThreadMain(void *arg) { + void **targ = arg; + RedisModuleBlockedClient *bc = targ[0]; + long long delay = (unsigned long)targ[1]; + assert(RedisModule_BlockedClientMeasureTimeStart(bc)==REDISMODULE_OK); + RedisModule_Free(targ); + struct timespec ts; + ts.tv_sec = delay / 1000; + ts.tv_nsec = (delay % 1000) * 1000000; + nanosleep(&ts, NULL); + int *r = RedisModule_Alloc(sizeof(int)); + *r = rand(); + RedisModule_BlockedClientMeasureTimeEnd(bc); + /* call again RedisModule_BlockedClientMeasureTimeStart() and + * RedisModule_BlockedClientMeasureTimeEnd and ensure that the + * total execution time is 2x the delay. */ + assert(RedisModule_BlockedClientMeasureTimeStart(bc)==REDISMODULE_OK); + nanosleep(&ts, NULL); + RedisModule_BlockedClientMeasureTimeEnd(bc); + + RedisModule_UnblockClient(bc,r); + return NULL; +} + +void HelloBlock_Disconnected(RedisModuleCtx *ctx, RedisModuleBlockedClient *bc) { + RedisModule_Log(ctx,"warning","Blocked client %p disconnected!", + (void*)bc); +} + +/* BLOCK.DEBUG <delay_ms> <timeout_ms> -- Block for <count> milliseconds, then reply with + * a random number. Timeout is the command timeout, so that you can test + * what happens when the delay is greater than the timeout. */ +int HelloBlock_RedisCommand(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) { + if (argc != 3) return RedisModule_WrongArity(ctx); + long long delay; + long long timeout; + + if (RedisModule_StringToLongLong(argv[1],&delay) != REDISMODULE_OK) { + return RedisModule_ReplyWithError(ctx,"ERR invalid count"); + } + + if (RedisModule_StringToLongLong(argv[2],&timeout) != REDISMODULE_OK) { + return RedisModule_ReplyWithError(ctx,"ERR invalid count"); + } + + pthread_t tid; + RedisModuleBlockedClient *bc = RedisModule_BlockClient(ctx,HelloBlock_Reply,HelloBlock_Timeout,HelloBlock_FreeData,timeout); + + /* Here we set a disconnection handler, however since this module will + * block in sleep() in a thread, there is not much we can do in the + * callback, so this is just to show you the API. */ + RedisModule_SetDisconnectCallback(bc,HelloBlock_Disconnected); + + /* Now that we setup a blocking client, we need to pass the control + * to the thread. However we need to pass arguments to the thread: + * the delay and a reference to the blocked client handle. */ + void **targ = RedisModule_Alloc(sizeof(void*)*3); + targ[0] = bc; + targ[1] = (void*)(unsigned long) delay; + // pass 1 as flag to enable time tracking + targ[2] = (void*)(unsigned long) 1; + + if (pthread_create(&tid,NULL,BlockDebug_ThreadMain,targ) != 0) { + RedisModule_AbortBlock(bc); + return RedisModule_ReplyWithError(ctx,"-ERR Can't start thread"); + } + return REDISMODULE_OK; +} + +/* BLOCK.DEBUG_NOTRACKING <delay_ms> <timeout_ms> -- Block for <count> milliseconds, then reply with + * a random number. Timeout is the command timeout, so that you can test + * what happens when the delay is greater than the timeout. + * this command does not track background time so the background time should no appear in stats*/ +int HelloBlockNoTracking_RedisCommand(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) { + if (argc != 3) return RedisModule_WrongArity(ctx); + long long delay; + long long timeout; + + if (RedisModule_StringToLongLong(argv[1],&delay) != REDISMODULE_OK) { + return RedisModule_ReplyWithError(ctx,"ERR invalid count"); + } + + if (RedisModule_StringToLongLong(argv[2],&timeout) != REDISMODULE_OK) { + return RedisModule_ReplyWithError(ctx,"ERR invalid count"); + } + + pthread_t tid; + RedisModuleBlockedClient *bc = RedisModule_BlockClient(ctx,HelloBlock_Reply,HelloBlock_Timeout,HelloBlock_FreeData,timeout); + + /* Here we set a disconnection handler, however since this module will + * block in sleep() in a thread, there is not much we can do in the + * callback, so this is just to show you the API. */ + RedisModule_SetDisconnectCallback(bc,HelloBlock_Disconnected); + + /* Now that we setup a blocking client, we need to pass the control + * to the thread. However we need to pass arguments to the thread: + * the delay and a reference to the blocked client handle. */ + void **targ = RedisModule_Alloc(sizeof(void*)*3); + targ[0] = bc; + targ[1] = (void*)(unsigned long) delay; + // pass 0 as flag to enable time tracking + targ[2] = (void*)(unsigned long) 0; + + if (pthread_create(&tid,NULL,BlockDebug_ThreadMain,targ) != 0) { + RedisModule_AbortBlock(bc); + return RedisModule_ReplyWithError(ctx,"-ERR Can't start thread"); + } + return REDISMODULE_OK; +} + +/* BLOCK.DOUBLE_DEBUG <delay_ms> -- Block for 2 x <count> milliseconds, + * then reply with a random number. + * This command is used to test multiple calls to RedisModule_BlockedClientMeasureTimeStart() + * and RedisModule_BlockedClientMeasureTimeEnd() within the same execution. */ +int HelloDoubleBlock_RedisCommand(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) { + if (argc != 2) return RedisModule_WrongArity(ctx); + long long delay; + long long timeout; + + if (RedisModule_StringToLongLong(argv[1],&delay) != REDISMODULE_OK) { + return RedisModule_ReplyWithError(ctx,"ERR invalid count"); + } + + pthread_t tid; + RedisModuleBlockedClient *bc = RedisModule_BlockClient(ctx,HelloBlock_Reply,HelloBlock_Timeout,HelloBlock_FreeData,timeout); + + /* Now that we setup a blocking client, we need to pass the control + * to the thread. However we need to pass arguments to the thread: + * the delay and a reference to the blocked client handle. */ + void **targ = RedisModule_Alloc(sizeof(void*)*2); + targ[0] = bc; + targ[1] = (void*)(unsigned long) delay; + + if (pthread_create(&tid,NULL,DoubleBlock_ThreadMain,targ) != 0) { + RedisModule_AbortBlock(bc); + return RedisModule_ReplyWithError(ctx,"-ERR Can't start thread"); + } + return REDISMODULE_OK; +} + + +int RedisModule_OnLoad(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) { + UNUSED(argv); + UNUSED(argc); + + if (RedisModule_Init(ctx,"block",1,REDISMODULE_APIVER_1) + == REDISMODULE_ERR) return REDISMODULE_ERR; + + if (RedisModule_CreateCommand(ctx,"block.debug", + HelloBlock_RedisCommand,"",0,0,0) == REDISMODULE_ERR) + return REDISMODULE_ERR; + + if (RedisModule_CreateCommand(ctx,"block.double_debug", + HelloDoubleBlock_RedisCommand,"",0,0,0) == REDISMODULE_ERR) + return REDISMODULE_ERR; + + if (RedisModule_CreateCommand(ctx,"block.debug_no_track", + HelloBlockNoTracking_RedisCommand,"",0,0,0) == REDISMODULE_ERR) + return REDISMODULE_ERR; + + return REDISMODULE_OK; +} diff --git a/tests/modules/blockonkeys.c b/tests/modules/blockonkeys.c index 94f31d455..b7ab977e9 100644 --- a/tests/modules/blockonkeys.c +++ b/tests/modules/blockonkeys.c @@ -2,6 +2,7 @@ #include "redismodule.h" #include <string.h> +#include <strings.h> #include <assert.h> #include <unistd.h> @@ -65,6 +66,8 @@ int get_fsl(RedisModuleCtx *ctx, RedisModuleString *keyname, int mode, int creat RedisModule_CloseKey(key); if (reply_on_failure) RedisModule_ReplyWithError(ctx, REDISMODULE_ERRORMSG_WRONGTYPE); + RedisModuleCallReply *reply = RedisModule_Call(ctx, "INCR", "c", "fsl_wrong_type"); + RedisModule_FreeCallReply(reply); return 0; } @@ -298,6 +301,154 @@ int fsl_getall(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) { return REDISMODULE_OK; } +/* Callback for blockonkeys_popall */ +int blockonkeys_popall_reply_callback(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) { + REDISMODULE_NOT_USED(argc); + RedisModuleKey *key = RedisModule_OpenKey(ctx, argv[1], REDISMODULE_WRITE); + if (RedisModule_KeyType(key) == REDISMODULE_KEYTYPE_LIST) { + RedisModuleString *elem; + long len = 0; + RedisModule_ReplyWithArray(ctx, REDISMODULE_POSTPONED_ARRAY_LEN); + while ((elem = RedisModule_ListPop(key, REDISMODULE_LIST_HEAD)) != NULL) { + len++; + RedisModule_ReplyWithString(ctx, elem); + RedisModule_FreeString(ctx, elem); + } + RedisModule_ReplySetArrayLength(ctx, len); + } else { + RedisModule_ReplyWithError(ctx, "ERR Not a list"); + } + RedisModule_CloseKey(key); + return REDISMODULE_OK; +} + +int blockonkeys_popall_timeout_callback(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) { + REDISMODULE_NOT_USED(argv); + REDISMODULE_NOT_USED(argc); + return RedisModule_ReplyWithError(ctx, "ERR Timeout"); +} + +/* BLOCKONKEYS.POPALL key + * + * Blocks on an empty key for up to 3 seconds. When unblocked by a list + * operation like LPUSH, all the elements are popped and returned. Fails with an + * error on timeout. */ +int blockonkeys_popall(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) { + if (argc != 2) + return RedisModule_WrongArity(ctx); + + RedisModuleKey *key = RedisModule_OpenKey(ctx, argv[1], REDISMODULE_READ); + if (RedisModule_KeyType(key) == REDISMODULE_KEYTYPE_EMPTY) { + RedisModule_BlockClientOnKeys(ctx, blockonkeys_popall_reply_callback, + blockonkeys_popall_timeout_callback, + NULL, 3000, &argv[1], 1, NULL); + } else { + RedisModule_ReplyWithError(ctx, "ERR Key not empty"); + } + RedisModule_CloseKey(key); + return REDISMODULE_OK; +} + +/* BLOCKONKEYS.LPUSH key val [val ..] + * BLOCKONKEYS.LPUSH_UNBLOCK key val [val ..] + * + * A module equivalent of LPUSH. If the name LPUSH_UNBLOCK is used, + * RM_SignalKeyAsReady() is also called. */ +int blockonkeys_lpush(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) { + if (argc < 3) + return RedisModule_WrongArity(ctx); + + RedisModuleKey *key = RedisModule_OpenKey(ctx, argv[1], REDISMODULE_WRITE); + if (RedisModule_KeyType(key) != REDISMODULE_KEYTYPE_EMPTY && + RedisModule_KeyType(key) != REDISMODULE_KEYTYPE_LIST) { + RedisModule_ReplyWithError(ctx, REDISMODULE_ERRORMSG_WRONGTYPE); + } else { + for (int i = 2; i < argc; i++) { + if (RedisModule_ListPush(key, REDISMODULE_LIST_HEAD, + argv[i]) != REDISMODULE_OK) { + RedisModule_CloseKey(key); + return RedisModule_ReplyWithError(ctx, "ERR Push failed"); + } + } + } + RedisModule_CloseKey(key); + + /* signal key as ready if the command is lpush_unblock */ + size_t len; + const char *str = RedisModule_StringPtrLen(argv[0], &len); + if (!strncasecmp(str, "blockonkeys.lpush_unblock", len)) { + RedisModule_SignalKeyAsReady(ctx, argv[1]); + } + return RedisModule_ReplyWithSimpleString(ctx, "OK"); +} + +/* Callback for the BLOCKONKEYS.BLPOPN command */ +int blockonkeys_blpopn_reply_callback(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) { + REDISMODULE_NOT_USED(argc); + long long n; + RedisModule_StringToLongLong(argv[2], &n); + RedisModuleKey *key = RedisModule_OpenKey(ctx, argv[1], REDISMODULE_WRITE); + int result; + if (RedisModule_KeyType(key) == REDISMODULE_KEYTYPE_LIST && + RedisModule_ValueLength(key) >= (size_t)n) { + RedisModule_ReplyWithArray(ctx, n); + for (long i = 0; i < n; i++) { + RedisModuleString *elem = RedisModule_ListPop(key, REDISMODULE_LIST_HEAD); + RedisModule_ReplyWithString(ctx, elem); + RedisModule_FreeString(ctx, elem); + } + result = REDISMODULE_OK; + } else if (RedisModule_KeyType(key) == REDISMODULE_KEYTYPE_LIST || + RedisModule_KeyType(key) == REDISMODULE_KEYTYPE_EMPTY) { + /* continue blocking */ + result = REDISMODULE_ERR; + } else { + result = RedisModule_ReplyWithError(ctx, REDISMODULE_ERRORMSG_WRONGTYPE); + } + RedisModule_CloseKey(key); + return result; +} + +int blockonkeys_blpopn_timeout_callback(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) { + REDISMODULE_NOT_USED(argv); + REDISMODULE_NOT_USED(argc); + return RedisModule_ReplyWithError(ctx, "ERR Timeout"); +} + +/* BLOCKONKEYS.BLPOPN key N + * + * Blocks until key has N elements and then pops them or fails after 3 seconds. + */ +int blockonkeys_blpopn(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) { + if (argc < 3) return RedisModule_WrongArity(ctx); + + long long n; + if (RedisModule_StringToLongLong(argv[2], &n) != REDISMODULE_OK) { + return RedisModule_ReplyWithError(ctx, "ERR Invalid N"); + } + + RedisModuleKey *key = RedisModule_OpenKey(ctx, argv[1], REDISMODULE_WRITE); + int keytype = RedisModule_KeyType(key); + if (keytype != REDISMODULE_KEYTYPE_EMPTY && + keytype != REDISMODULE_KEYTYPE_LIST) { + RedisModule_ReplyWithError(ctx, REDISMODULE_ERRORMSG_WRONGTYPE); + } else if (keytype == REDISMODULE_KEYTYPE_LIST && + RedisModule_ValueLength(key) >= (size_t)n) { + RedisModule_ReplyWithArray(ctx, n); + for (long i = 0; i < n; i++) { + RedisModuleString *elem = RedisModule_ListPop(key, REDISMODULE_LIST_HEAD); + RedisModule_ReplyWithString(ctx, elem); + RedisModule_FreeString(ctx, elem); + } + } else { + RedisModule_BlockClientOnKeys(ctx, blockonkeys_blpopn_reply_callback, + blockonkeys_blpopn_timeout_callback, + NULL, 3000, &argv[1], 1, NULL); + } + RedisModule_CloseKey(key); + return REDISMODULE_OK; +} + int RedisModule_OnLoad(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) { REDISMODULE_NOT_USED(argv); REDISMODULE_NOT_USED(argc); @@ -334,5 +485,21 @@ int RedisModule_OnLoad(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) if (RedisModule_CreateCommand(ctx,"fsl.getall",fsl_getall,"",0,0,0) == REDISMODULE_ERR) return REDISMODULE_ERR; + if (RedisModule_CreateCommand(ctx, "blockonkeys.popall", blockonkeys_popall, + "", 1, 1, 1) == REDISMODULE_ERR) + return REDISMODULE_ERR; + + if (RedisModule_CreateCommand(ctx, "blockonkeys.lpush", blockonkeys_lpush, + "", 1, 1, 1) == REDISMODULE_ERR) + return REDISMODULE_ERR; + + if (RedisModule_CreateCommand(ctx, "blockonkeys.lpush_unblock", blockonkeys_lpush, + "", 1, 1, 1) == REDISMODULE_ERR) + return REDISMODULE_ERR; + + if (RedisModule_CreateCommand(ctx, "blockonkeys.blpopn", blockonkeys_blpopn, + "", 1, 1, 1) == REDISMODULE_ERR) + return REDISMODULE_ERR; + return REDISMODULE_OK; } diff --git a/tests/modules/stream.c b/tests/modules/stream.c new file mode 100644 index 000000000..abfbb1faf --- /dev/null +++ b/tests/modules/stream.c @@ -0,0 +1,258 @@ +#include "redismodule.h" + +#include <string.h> +#include <strings.h> +#include <assert.h> +#include <unistd.h> +#include <errno.h> + +/* Command which adds a stream entry with automatic ID, like XADD *. + * + * Syntax: STREAM.ADD key field1 value1 [ field2 value2 ... ] + * + * The response is the ID of the added stream entry or an error message. + */ +int stream_add(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) { + if (argc < 2 || argc % 2 != 0) { + RedisModule_WrongArity(ctx); + return REDISMODULE_OK; + } + + RedisModuleKey *key = RedisModule_OpenKey(ctx, argv[1], REDISMODULE_WRITE); + RedisModuleStreamID id; + if (RedisModule_StreamAdd(key, REDISMODULE_STREAM_ADD_AUTOID, &id, + &argv[2], (argc-2)/2) == REDISMODULE_OK) { + RedisModuleString *id_str = RedisModule_CreateStringFromStreamID(ctx, &id); + RedisModule_ReplyWithString(ctx, id_str); + RedisModule_FreeString(ctx, id_str); + } else { + RedisModule_ReplyWithError(ctx, "ERR StreamAdd failed"); + } + RedisModule_CloseKey(key); + return REDISMODULE_OK; +} + +/* Command which adds a stream entry N times. + * + * Syntax: STREAM.ADD key N field1 value1 [ field2 value2 ... ] + * + * Returns the number of successfully added entries. + */ +int stream_addn(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) { + if (argc < 3 || argc % 2 == 0) { + RedisModule_WrongArity(ctx); + return REDISMODULE_OK; + } + + long long n, i; + if (RedisModule_StringToLongLong(argv[2], &n) == REDISMODULE_ERR) { + RedisModule_ReplyWithError(ctx, "N must be a number"); + return REDISMODULE_OK; + } + + RedisModuleKey *key = RedisModule_OpenKey(ctx, argv[1], REDISMODULE_WRITE); + for (i = 0; i < n; i++) { + if (RedisModule_StreamAdd(key, REDISMODULE_STREAM_ADD_AUTOID, NULL, + &argv[3], (argc-3)/2) == REDISMODULE_ERR) + break; + } + RedisModule_ReplyWithLongLong(ctx, i); + RedisModule_CloseKey(key); + return REDISMODULE_OK; +} + +/* STREAM.DELETE key stream-id */ +int stream_delete(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) { + if (argc != 3) return RedisModule_WrongArity(ctx); + RedisModuleStreamID id; + if (RedisModule_StringToStreamID(argv[2], &id) != REDISMODULE_OK) { + return RedisModule_ReplyWithError(ctx, "Invalid stream ID"); + } + RedisModuleKey *key = RedisModule_OpenKey(ctx, argv[1], REDISMODULE_WRITE); + if (RedisModule_StreamDelete(key, &id) == REDISMODULE_OK) { + RedisModule_ReplyWithSimpleString(ctx, "OK"); + } else { + RedisModule_ReplyWithError(ctx, "ERR StreamDelete failed"); + } + RedisModule_CloseKey(key); + return REDISMODULE_OK; +} + +/* STREAM.RANGE key start-id end-id + * + * Returns an array of stream items. Each item is an array on the form + * [stream-id, [field1, value1, field2, value2, ...]]. + * + * A funny side-effect used for testing RM_StreamIteratorDelete() is that if any + * entry has a field named "selfdestruct", the stream entry is deleted. It is + * however included in the results of this command. + */ +int stream_range(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) { + if (argc != 4) { + RedisModule_WrongArity(ctx); + return REDISMODULE_OK; + } + + RedisModuleStreamID startid, endid; + if (RedisModule_StringToStreamID(argv[2], &startid) != REDISMODULE_OK || + RedisModule_StringToStreamID(argv[3], &endid) != REDISMODULE_OK) { + RedisModule_ReplyWithError(ctx, "Invalid stream ID"); + return REDISMODULE_OK; + } + + /* If startid > endid, we swap and set the reverse flag. */ + int flags = 0; + if (startid.ms > endid.ms || + (startid.ms == endid.ms && startid.seq > endid.seq)) { + RedisModuleStreamID tmp = startid; + startid = endid; + endid = tmp; + flags |= REDISMODULE_STREAM_ITERATOR_REVERSE; + } + + /* Open key and start iterator. */ + int openflags = REDISMODULE_READ | REDISMODULE_WRITE; + RedisModuleKey *key = RedisModule_OpenKey(ctx, argv[1], openflags); + if (RedisModule_StreamIteratorStart(key, flags, + &startid, &endid) != REDISMODULE_OK) { + /* Key is not a stream, etc. */ + RedisModule_ReplyWithError(ctx, "ERR StreamIteratorStart failed"); + RedisModule_CloseKey(key); + return REDISMODULE_OK; + } + + /* Check error handling: Delete current entry when no current entry. */ + assert(RedisModule_StreamIteratorDelete(key) == + REDISMODULE_ERR); + assert(errno == ENOENT); + + /* Check error handling: Fetch fields when no current entry. */ + assert(RedisModule_StreamIteratorNextField(key, NULL, NULL) == + REDISMODULE_ERR); + assert(errno == ENOENT); + + /* Return array. */ + RedisModule_ReplyWithArray(ctx, REDISMODULE_POSTPONED_ARRAY_LEN); + RedisModule_AutoMemory(ctx); + RedisModuleStreamID id; + long numfields; + long len = 0; + while (RedisModule_StreamIteratorNextID(key, &id, + &numfields) == REDISMODULE_OK) { + RedisModule_ReplyWithArray(ctx, 2); + RedisModuleString *id_str = RedisModule_CreateStringFromStreamID(ctx, &id); + RedisModule_ReplyWithString(ctx, id_str); + RedisModule_ReplyWithArray(ctx, numfields * 2); + int delete = 0; + RedisModuleString *field, *value; + for (long i = 0; i < numfields; i++) { + assert(RedisModule_StreamIteratorNextField(key, &field, &value) == + REDISMODULE_OK); + RedisModule_ReplyWithString(ctx, field); + RedisModule_ReplyWithString(ctx, value); + /* check if this is a "selfdestruct" field */ + size_t field_len; + const char *field_str = RedisModule_StringPtrLen(field, &field_len); + if (!strncmp(field_str, "selfdestruct", field_len)) delete = 1; + } + if (delete) { + assert(RedisModule_StreamIteratorDelete(key) == REDISMODULE_OK); + } + /* check error handling: no more fields to fetch */ + assert(RedisModule_StreamIteratorNextField(key, &field, &value) == + REDISMODULE_ERR); + assert(errno == ENOENT); + len++; + } + RedisModule_ReplySetArrayLength(ctx, len); + RedisModule_StreamIteratorStop(key); + RedisModule_CloseKey(key); + return REDISMODULE_OK; +} + +/* + * STREAM.TRIM key (MAXLEN (=|~) length | MINID (=|~) id) + */ +int stream_trim(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) { + if (argc != 5) { + RedisModule_WrongArity(ctx); + return REDISMODULE_OK; + } + + /* Parse args */ + int trim_by_id = 0; /* 0 = maxlen, 1 = minid */ + long long maxlen; + RedisModuleStreamID minid; + size_t arg_len; + const char *arg = RedisModule_StringPtrLen(argv[2], &arg_len); + if (!strcasecmp(arg, "minid")) { + trim_by_id = 1; + if (RedisModule_StringToStreamID(argv[4], &minid) != REDISMODULE_OK) { + RedisModule_ReplyWithError(ctx, "ERR Invalid stream ID"); + return REDISMODULE_OK; + } + } else if (!strcasecmp(arg, "maxlen")) { + if (RedisModule_StringToLongLong(argv[4], &maxlen) == REDISMODULE_ERR) { + RedisModule_ReplyWithError(ctx, "ERR Maxlen must be a number"); + return REDISMODULE_OK; + } + } else { + RedisModule_ReplyWithError(ctx, "ERR Invalid arguments"); + return REDISMODULE_OK; + } + + /* Approx or exact */ + int flags; + arg = RedisModule_StringPtrLen(argv[3], &arg_len); + if (arg_len == 1 && arg[0] == '~') { + flags = REDISMODULE_STREAM_TRIM_APPROX; + } else if (arg_len == 1 && arg[0] == '=') { + flags = 0; + } else { + RedisModule_ReplyWithError(ctx, "ERR Invalid approx-or-exact mark"); + return REDISMODULE_OK; + } + + /* Trim */ + RedisModuleKey *key = RedisModule_OpenKey(ctx, argv[1], REDISMODULE_WRITE); + long long trimmed; + if (trim_by_id) { + trimmed = RedisModule_StreamTrimByID(key, flags, &minid); + } else { + trimmed = RedisModule_StreamTrimByLength(key, flags, maxlen); + } + + /* Return result */ + if (trimmed < 0) { + RedisModule_ReplyWithError(ctx, "ERR Trimming failed"); + } else { + RedisModule_ReplyWithLongLong(ctx, trimmed); + } + RedisModule_CloseKey(key); + return REDISMODULE_OK; +} + +int RedisModule_OnLoad(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) { + REDISMODULE_NOT_USED(argv); + REDISMODULE_NOT_USED(argc); + if (RedisModule_Init(ctx, "stream", 1, REDISMODULE_APIVER_1) == REDISMODULE_ERR) + return REDISMODULE_ERR; + + if (RedisModule_CreateCommand(ctx, "stream.add", stream_add, "", + 1, 1, 1) == REDISMODULE_ERR) + return REDISMODULE_ERR; + if (RedisModule_CreateCommand(ctx, "stream.addn", stream_addn, "", + 1, 1, 1) == REDISMODULE_ERR) + return REDISMODULE_ERR; + if (RedisModule_CreateCommand(ctx, "stream.delete", stream_delete, "", + 1, 1, 1) == REDISMODULE_ERR) + return REDISMODULE_ERR; + if (RedisModule_CreateCommand(ctx, "stream.range", stream_range, "", + 1, 1, 1) == REDISMODULE_ERR) + return REDISMODULE_ERR; + if (RedisModule_CreateCommand(ctx, "stream.trim", stream_trim, "", + 1, 1, 1) == REDISMODULE_ERR) + return REDISMODULE_ERR; + + return REDISMODULE_OK; +} diff --git a/tests/sentinel/run.tcl b/tests/sentinel/run.tcl index 996af906a..c275aa762 100644 --- a/tests/sentinel/run.tcl +++ b/tests/sentinel/run.tcl @@ -10,7 +10,7 @@ set ::tlsdir "../../tls" proc main {} { parse_options - spawn_instance sentinel $::sentinel_base_port $::instances_count + spawn_instance sentinel $::sentinel_base_port $::instances_count [list "sentinel deny-scripts-reconfig no"] "../tests/includes/sentinel.conf" spawn_instance redis $::redis_base_port $::instances_count run_tests cleanup diff --git a/tests/sentinel/tests/00-base.tcl b/tests/sentinel/tests/00-base.tcl index 7fb1a8bef..75baf9817 100644 --- a/tests/sentinel/tests/00-base.tcl +++ b/tests/sentinel/tests/00-base.tcl @@ -1,5 +1,5 @@ # Check the basic monitoring and failover capabilities. - +source "../tests/includes/start-init-tests.tcl" source "../tests/includes/init-tests.tcl" if {$::simulate_error} { diff --git a/tests/sentinel/tests/08-hostname-conf.tcl b/tests/sentinel/tests/08-hostname-conf.tcl new file mode 100644 index 000000000..be6e42cb0 --- /dev/null +++ b/tests/sentinel/tests/08-hostname-conf.tcl @@ -0,0 +1,67 @@ +proc set_redis_announce_ip {addr} { + foreach_redis_id id { + R $id config set replica-announce-ip $addr + } +} + +proc set_sentinel_config {keyword value} { + foreach_sentinel_id id { + S $id sentinel config set $keyword $value + } +} + +proc set_all_instances_hostname {hostname} { + foreach_sentinel_id id { + set_instance_attrib sentinel $id host $hostname + } + foreach_redis_id id { + set_instance_attrib redis $id host $hostname + } +} + +test "(pre-init) Configure instances and sentinel for hostname use" { + set ::host "localhost" + restart_killed_instances + set_all_instances_hostname $::host + set_redis_announce_ip $::host + set_sentinel_config resolve-hostnames yes + set_sentinel_config announce-hostnames yes +} + +source "../tests/includes/init-tests.tcl" + +proc verify_hostname_announced {hostname} { + foreach_sentinel_id id { + # Master is reported with its hostname + if {![string equal [lindex [S $id SENTINEL GET-MASTER-ADDR-BY-NAME mymaster] 0] $hostname]} { + return 0 + } + + # Replicas are reported with their hostnames + foreach replica [S $id SENTINEL REPLICAS mymaster] { + if {![string equal [dict get $replica ip] $hostname]} { + return 0 + } + } + } + return 1 +} + +test "Sentinel announces hostnames" { + # Check initial state + verify_hostname_announced $::host + + # Disable announce-hostnames and confirm IPs are used + set_sentinel_config announce-hostnames no + assert {[verify_hostname_announced "127.0.0.1"] || [verify_hostname_announced "::1"]} +} + +# We need to revert any special configuration because all tests currently +# share the same instances. +test "(post-cleanup) Configure instances and sentinel for IPs" { + set ::host "127.0.0.1" + set_all_instances_hostname $::host + set_redis_announce_ip $::host + set_sentinel_config resolve-hostnames no + set_sentinel_config announce-hostnames no +}
\ No newline at end of file diff --git a/tests/sentinel/tests/09-acl-support.tcl b/tests/sentinel/tests/09-acl-support.tcl new file mode 100644 index 000000000..1366fc4d5 --- /dev/null +++ b/tests/sentinel/tests/09-acl-support.tcl @@ -0,0 +1,50 @@ + +source "../tests/includes/init-tests.tcl" + +set ::user "testuser" +set ::password "secret" + +proc setup_acl {} { + foreach_sentinel_id id { + assert_equal {OK} [S $id ACL SETUSER $::user >$::password +@all on] + assert_equal {OK} [S $id ACL SETUSER default off] + + S $id CLIENT KILL USER default SKIPME no + assert_equal {OK} [S $id AUTH $::user $::password] + } +} + +proc teardown_acl {} { + foreach_sentinel_id id { + assert_equal {OK} [S $id ACL SETUSER default on] + assert_equal {1} [S $id ACL DELUSER $::user] + + S $id SENTINEL CONFIG SET sentinel-user "" + S $id SENTINEL CONFIG SET sentinel-pass "" + } +} + +test "(post-init) Set up ACL configuration" { + setup_acl + assert_equal $::user [S 1 ACL WHOAMI] +} + +test "SENTINEL CONFIG SET handles on-the-fly credentials reconfiguration" { + # Make sure we're starting with a broken state... + after 5000 + catch {S 1 SENTINEL CKQUORUM mymaster} err + assert_match {*NOQUORUM*} $err + + foreach_sentinel_id id { + assert_equal {OK} [S $id SENTINEL CONFIG SET sentinel-user $::user] + assert_equal {OK} [S $id SENTINEL CONFIG SET sentinel-pass $::password] + } + + after 5000 + assert_match {*OK*} [S 1 SENTINEL CKQUORUM mymaster] +} + +test "(post-cleanup) Tear down ACL configuration" { + teardown_acl +} + diff --git a/tests/sentinel/tests/includes/init-tests.tcl b/tests/sentinel/tests/includes/init-tests.tcl index 234f9c589..b4626caed 100644 --- a/tests/sentinel/tests/includes/init-tests.tcl +++ b/tests/sentinel/tests/includes/init-tests.tcl @@ -1,6 +1,6 @@ # Initialization tests -- most units will start including this. -test "(init) Restart killed instances" { +proc restart_killed_instances {} { foreach type {redis sentinel} { foreach_${type}_id id { if {[get_instance_attrib $type $id pid] == -1} { @@ -12,6 +12,10 @@ test "(init) Restart killed instances" { } } +test "(init) Restart killed instances" { + restart_killed_instances +} + test "(init) Remove old master entry from sentinels" { foreach_sentinel_id id { catch {S $id SENTINEL REMOVE mymaster} @@ -37,6 +41,8 @@ test "(init) Sentinels can start monitoring a master" { S $id SENTINEL SET mymaster down-after-milliseconds 2000 S $id SENTINEL SET mymaster failover-timeout 20000 S $id SENTINEL SET mymaster parallel-syncs 10 + S $id SENTINEL SET mymaster notification-script ../../tests/includes/notify.sh + S $id SENTINEL SET mymaster client-reconfig-script ../../tests/includes/notify.sh } } diff --git a/tests/sentinel/tests/includes/notify.sh b/tests/sentinel/tests/includes/notify.sh new file mode 100755 index 000000000..5de0eaf76 --- /dev/null +++ b/tests/sentinel/tests/includes/notify.sh @@ -0,0 +1,21 @@ +#!/usr/bin/env bash + +OS=`uname -s` +if [ ${OS} != "Linux" ] +then + exit 0 +fi + +# fd 3 is meant to catch the actual access to /proc/pid/fd, +# in case there's an fd leak by the sentinel, +# it can take 3, but then the access to /proc will take another fd, and we'll catch that. +leaked_fd_count=`ls /proc/self/fd | grep -vE '^[0|1|2|3]$' | wc -l` +if [ $leaked_fd_count -gt 0 ] +then + sentinel_fd_leaks_file="../sentinel_fd_leaks" + if [ ! -f $sentinel_fd_leaks_file ] + then + ls -l /proc/self/fd | cat >> $sentinel_fd_leaks_file + lsof -p $$ | cat >> $sentinel_fd_leaks_file + fi +fi diff --git a/tests/sentinel/tests/includes/sentinel.conf b/tests/sentinel/tests/includes/sentinel.conf new file mode 100644 index 000000000..94f2804a4 --- /dev/null +++ b/tests/sentinel/tests/includes/sentinel.conf @@ -0,0 +1,11 @@ +# assume master is down after being unresponsive for 20s +sentinel down-after-milliseconds setmaster 20000 +# reconfigure one slave at a time +sentinel parallel-syncs setmaster 2 +# wait for 4m before assuming failover went wrong +sentinel failover-timeout setmaster 240000 +# monitoring set +sentinel monitor setmaster 10.0.0.1 30000 2 + + + diff --git a/tests/sentinel/tests/includes/start-init-tests.tcl b/tests/sentinel/tests/includes/start-init-tests.tcl new file mode 100644 index 000000000..b0523506a --- /dev/null +++ b/tests/sentinel/tests/includes/start-init-tests.tcl @@ -0,0 +1,18 @@ +test "(start-init) Flush config and compare rewrite config file lines" { + foreach_sentinel_id id { + assert_match "OK" [S $id SENTINEL FLUSHCONFIG] + set file1 ../tests/includes/sentinel.conf + set file2 [file join "sentinel_${id}" "sentinel.conf"] + set fh1 [open $file1 r] + set fh2 [open $file2 r] + while {[gets $fh1 line1]} { + if {[gets $fh2 line2]} { + assert [string equal $line1 $line2] + } else { + fail "sentinel config file rewrite sequence changed" + } + } + close $fh1 + close $fh2 + } +}
\ No newline at end of file diff --git a/tests/support/redis.tcl b/tests/support/redis.tcl index 8eca2ac32..54b49920d 100644 --- a/tests/support/redis.tcl +++ b/tests/support/redis.tcl @@ -244,6 +244,7 @@ proc ::redis::redis_read_reply {id fd} { _ {redis_read_null $fd} : - + {redis_read_line $fd} + , {expr {double([redis_read_line $fd])}} - {return -code error [redis_read_line $fd]} $ {redis_bulk_read $fd} > - diff --git a/tests/support/server.tcl b/tests/support/server.tcl index 77ba31d84..0d36d46be 100644 --- a/tests/support/server.tcl +++ b/tests/support/server.tcl @@ -152,20 +152,48 @@ proc server_is_up {host port retrynum} { return 0 } +# Check if current ::tags match requested tags. If ::allowtags are used, +# there must be some intersection. If ::denytags are used, no intersection +# is allowed. Returns 1 if tags are acceptable or 0 otherwise, in which +# case err_return names a return variable for the message to be logged. +proc tags_acceptable {err_return} { + upvar $err_return err + + # If tags are whitelisted, make sure there's match + if {[llength $::allowtags] > 0} { + set matched 0 + foreach tag $::allowtags { + if {[lsearch $::tags $tag] >= 0} { + incr matched + } + } + if {$matched < 1} { + set err "Tag: none of the tags allowed" + return 0 + } + } + + foreach tag $::denytags { + if {[lsearch $::tags $tag] >= 0} { + set err "Tag: $tag denied" + return 0 + } + } + + return 1 +} + # doesn't really belong here, but highly coupled to code in start_server proc tags {tags code} { # If we 'tags' contain multiple tags, quoted and seperated by spaces, # we want to get rid of the quotes in order to have a proper list set tags [string map { \" "" } $tags] set ::tags [concat $::tags $tags] - # We skip unwanted tags - foreach tag $::denytags { - if {[lsearch $::tags $tag] >= 0} { - incr ::num_aborted - send_data_packet $::test_server_fd ignore "Tag: $tag" - set ::tags [lrange $::tags 0 end-[llength $tags]] - return - } + if {![tags_acceptable err]} { + incr ::num_aborted + send_data_packet $::test_server_fd ignore $err + set ::tags [lrange $::tags 0 end-[llength $tags]] + return } uplevel 1 $code set ::tags [lrange $::tags 0 end-[llength $tags]] @@ -267,13 +295,11 @@ proc start_server {options {code undefined}} { } # We skip unwanted tags - foreach tag $::denytags { - if {[lsearch $::tags $tag] >= 0} { - incr ::num_aborted - send_data_packet $::test_server_fd ignore "Tag: $tag" - set ::tags [lrange $::tags 0 end-[llength $tags]] - return - } + if {![tags_acceptable err]} { + incr ::num_aborted + send_data_packet $::test_server_fd ignore $err + set ::tags [lrange $::tags 0 end-[llength $tags]] + return } # If we are running against an external server, we just push the diff --git a/tests/support/util.tcl b/tests/support/util.tcl index 86f2753c2..80f8598ce 100644 --- a/tests/support/util.tcl +++ b/tests/support/util.tcl @@ -12,7 +12,11 @@ proc randstring {min max {type binary}} { set maxval 52 } while {$len} { - append output [format "%c" [expr {$minval+int(rand()*($maxval-$minval+1))}]] + set rr [expr {$minval+int(rand()*($maxval-$minval+1))}] + if {$type eq {alpha} && $rr eq 92} { + set rr 90; # avoid putting '\' char in the string, it can mess up TCL processing + } + append output [format "%c" $rr] incr len -1 } return $output @@ -86,12 +90,10 @@ proc waitForBgrewriteaof r { } proc wait_for_sync r { - while 1 { - if {[status $r master_link_status] eq "down"} { - after 10 - } else { - break - } + wait_for_condition 50 100 { + [status $r master_link_status] eq "up" + } else { + fail "replica didn't sync in time" } } @@ -571,8 +573,8 @@ proc generate_fuzzy_traffic_on_key {key duration} { # Commands per type, blocking commands removed # TODO: extract these from help.h or elsewhere, and improve to include other types set string_commands {APPEND BITCOUNT BITFIELD BITOP BITPOS DECR DECRBY GET GETBIT GETRANGE GETSET INCR INCRBY INCRBYFLOAT MGET MSET MSETNX PSETEX SET SETBIT SETEX SETNX SETRANGE STRALGO STRLEN} - set hash_commands {HDEL HEXISTS HGET HGETALL HINCRBY HINCRBYFLOAT HKEYS HLEN HMGET HMSET HSCAN HSET HSETNX HSTRLEN HVALS} - set zset_commands {ZADD ZCARD ZCOUNT ZINCRBY ZINTERSTORE ZLEXCOUNT ZPOPMAX ZPOPMIN ZRANGE ZRANGEBYLEX ZRANGEBYSCORE ZRANK ZREM ZREMRANGEBYLEX ZREMRANGEBYRANK ZREMRANGEBYSCORE ZREVRANGE ZREVRANGEBYLEX ZREVRANGEBYSCORE ZREVRANK ZSCAN ZSCORE ZUNIONSTORE} + set hash_commands {HDEL HEXISTS HGET HGETALL HINCRBY HINCRBYFLOAT HKEYS HLEN HMGET HMSET HSCAN HSET HSETNX HSTRLEN HVALS HRANDFIELD} + set zset_commands {ZADD ZCARD ZCOUNT ZINCRBY ZINTERSTORE ZLEXCOUNT ZPOPMAX ZPOPMIN ZRANGE ZRANGEBYLEX ZRANGEBYSCORE ZRANK ZREM ZREMRANGEBYLEX ZREMRANGEBYRANK ZREMRANGEBYSCORE ZREVRANGE ZREVRANGEBYLEX ZREVRANGEBYSCORE ZREVRANK ZSCAN ZSCORE ZUNIONSTORE ZRANDMEMBER} set list_commands {LINDEX LINSERT LLEN LPOP LPOS LPUSH LPUSHX LRANGE LREM LSET LTRIM RPOP RPOPLPUSH RPUSH RPUSHX} set set_commands {SADD SCARD SDIFF SDIFFSTORE SINTER SINTERSTORE SISMEMBER SMEMBERS SMOVE SPOP SRANDMEMBER SREM SSCAN SUNION SUNIONSTORE} set stream_commands {XACK XADD XCLAIM XDEL XGROUP XINFO XLEN XPENDING XRANGE XREAD XREADGROUP XREVRANGE XTRIM} diff --git a/tests/test_helper.tcl b/tests/test_helper.tcl index 4bef921ff..2b7854780 100644 --- a/tests/test_helper.tcl +++ b/tests/test_helper.tcl @@ -52,6 +52,7 @@ set ::all_tests { integration/psync2 integration/psync2-reg integration/psync2-pingoff + integration/failover integration/redis-cli integration/redis-benchmark unit/pubsub @@ -717,6 +718,7 @@ if {[llength $filtered_tests] < [llength $::all_tests]} { } proc attach_to_replication_stream {} { + r config set repl-ping-replica-period 3600 if {$::tls} { set s [::tls::socket [srv 0 "host"] [srv 0 "port"]] } else { @@ -774,6 +776,7 @@ proc assert_replication_stream {s patterns} { proc close_replication_stream {s} { close $s + r config set repl-ping-replica-period 10 } # With the parallel test running multiple Redis instances at the same time diff --git a/tests/unit/dump.tcl b/tests/unit/dump.tcl index affce65e5..d43820ae3 100644 --- a/tests/unit/dump.tcl +++ b/tests/unit/dump.tcl @@ -12,7 +12,7 @@ start_server {tags {"dump"}} { r del foo r restore foo 5000 $encoded set ttl [r pttl foo] - assert {$ttl >= 3000 && $ttl <= 5000} + assert_range $ttl 3000 5000 r get foo } {bar} @@ -22,7 +22,7 @@ start_server {tags {"dump"}} { r del foo r restore foo 2569591501 $encoded set ttl [r pttl foo] - assert {$ttl >= (2569591501-3000) && $ttl <= 2569591501} + assert_range $ttl (2569591501-3000) 2569591501 r get foo } {bar} @@ -33,7 +33,7 @@ start_server {tags {"dump"}} { set now [clock milliseconds] r restore foo [expr $now+3000] $encoded absttl set ttl [r pttl foo] - assert {$ttl >= 2900 && $ttl <= 3100} + assert_range $ttl 2000 3100 r get foo } {bar} diff --git a/tests/unit/expire.tcl b/tests/unit/expire.tcl index 8bcdc16b7..9bde4809f 100644 --- a/tests/unit/expire.tcl +++ b/tests/unit/expire.tcl @@ -209,19 +209,101 @@ start_server {tags {"expire"}} { set e } {*not an integer*} - test {SET - use EX/PX option, TTL should not be reseted after loadaof} { + test {EXPIRE and SET/GETEX EX/PX/EXAT/PXAT option, TTL should not be reset after loadaof} { + # This test makes sure that expire times are propagated as absolute + # times to the AOF file and not as relative time, so that when the AOF + # is reloaded the TTLs are not being shifted forward to the future. + # We want the time to logically pass when the server is restarted! + r config set appendonly yes - r set foo bar EX 100 - after 2000 - r debug loadaof - set ttl [r ttl foo] - assert {$ttl <= 98 && $ttl > 90} + r set foo1 bar EX 100 + r set foo2 bar PX 100000 + r set foo3 bar + r set foo4 bar + r expire foo3 100 + r pexpire foo4 100000 + r setex foo5 100 bar + r psetex foo6 100000 bar + r set foo7 bar EXAT [expr [clock seconds] + 100] + r set foo8 bar PXAT [expr [clock milliseconds] + 100000] + r set foo9 bar + r getex foo9 EX 100 + r set foo10 bar + r getex foo10 PX 100000 + r set foo11 bar + r getex foo11 EXAT [expr [clock seconds] + 100] + r set foo12 bar + r getex foo12 PXAT [expr [clock milliseconds] + 100000] - r set foo bar PX 100000 after 2000 r debug loadaof - set ttl [r ttl foo] - assert {$ttl <= 98 && $ttl > 90} + assert_range [r ttl foo1] 90 98 + assert_range [r ttl foo2] 90 98 + assert_range [r ttl foo3] 90 98 + assert_range [r ttl foo4] 90 98 + assert_range [r ttl foo5] 90 98 + assert_range [r ttl foo6] 90 98 + assert_range [r ttl foo7] 90 98 + assert_range [r ttl foo8] 90 98 + assert_range [r ttl foo9] 90 98 + assert_range [r ttl foo10] 90 98 + assert_range [r ttl foo11] 90 98 + assert_range [r ttl foo12] 90 98 + } + + test {EXPIRE relative and absolute propagation to replicas} { + # Make sure that relative and absolute expire commands are propagated + # "as is" to replicas. + # We want replicas to honor the same high level contract of expires that + # the master has, that is, we want the time to be counted logically + # starting from the moment the write was received. This usually provides + # the most coherent behavior from the point of view of the external + # users, with TTLs that are similar from the POV of the external observer. + # + # This test is here to stop some innocent / eager optimization or cleanup + # from doing the wrong thing without proper discussion, see: + # https://github.com/redis/redis/pull/5171#issuecomment-409553266 + + set repl [attach_to_replication_stream] + r set foo1 bar ex 200 + r set foo1 bar px 100000 + r set foo1 bar exat [expr [clock seconds]+100] + r set foo1 bar pxat [expr [clock milliseconds]+10000] + r setex foo1 100 bar + r psetex foo1 100000 bar + r set foo2 bar + r expire foo2 100 + r pexpire foo2 100000 + r set foo3 bar + r expireat foo3 [expr [clock seconds]+100] + r pexpireat foo3 [expr [clock seconds]*1000+100000] + r expireat foo3 [expr [clock seconds]-100] + r set foo4 bar + r getex foo4 ex 200 + r getex foo4 px 200000 + r getex foo4 exat [expr [clock seconds]+100] + r getex foo4 pxat [expr [clock milliseconds]+10000] + assert_replication_stream $repl { + {select *} + {set foo1 bar PX 200000} + {set foo1 bar PX 100000} + {set foo1 bar PXAT *} + {set foo1 bar PXAT *} + {set foo1 bar PX 100000} + {set foo1 bar PX 100000} + {set foo2 bar} + {expire foo2 100} + {pexpire foo2 100000} + {set foo3 bar} + {expireat foo3 *} + {pexpireat foo3 *} + {del foo3} + {set foo4 bar} + {pexpire foo4 200000} + {pexpire foo4 200000} + {pexpireat foo4 *} + {pexpireat foo4 *} + } } test {SET command will remove expire} { @@ -246,4 +328,32 @@ start_server {tags {"expire"}} { set ttl [r ttl foo] assert {$ttl <= 98 && $ttl > 90} } + + test {GETEX use of PERSIST option should remove TTL} { + r set foo bar EX 100 + r getex foo PERSIST + r ttl foo + } {-1} + + test {GETEX use of PERSIST option should remove TTL after loadaof} { + r set foo bar EX 100 + r getex foo PERSIST + after 2000 + r debug loadaof + r ttl foo + } {-1} + + test {GETEX propagate as to replica as PERSIST, DEL, or nothing} { + set repl [attach_to_replication_stream] + r set foo bar EX 100 + r getex foo PERSIST + r getex foo + r getex foo exat [expr [clock seconds]-100] + assert_replication_stream $repl { + {select *} + {set foo bar PX 100000} + {persist foo} + {del foo} + } + } } diff --git a/tests/unit/introspection.tcl b/tests/unit/introspection.tcl index 0a7f7a9c9..ba28341ff 100644 --- a/tests/unit/introspection.tcl +++ b/tests/unit/introspection.tcl @@ -112,6 +112,7 @@ start_server {tags {"introspection"}} { bio_cpulist aof_rewrite_cpulist bgsave_cpulist + set-proc-title } if {!$::tls} { diff --git a/tests/unit/limits.tcl b/tests/unit/limits.tcl index 38ba76208..51122e8f5 100644 --- a/tests/unit/limits.tcl +++ b/tests/unit/limits.tcl @@ -1,4 +1,4 @@ -start_server {tags {"limits"} overrides {maxclients 10}} { +start_server {tags {"limits network"} overrides {maxclients 10}} { if {$::tls} { set expected_code "*I/O error*" } else { diff --git a/tests/unit/moduleapi/blockonbackground.tcl b/tests/unit/moduleapi/blockonbackground.tcl new file mode 100644 index 000000000..23111ab73 --- /dev/null +++ b/tests/unit/moduleapi/blockonbackground.tcl @@ -0,0 +1,67 @@ +set testmodule [file normalize tests/modules/blockonbackground.so] + +source tests/support/util.tcl + +start_server {tags {"modules"}} { + r module load $testmodule + + test { blocked clients time tracking - check blocked command that uses RedisModule_BlockedClientMeasureTimeStart() is tracking background time} { + r slowlog reset + r config set slowlog-log-slower-than 200000 + assert_equal [r slowlog len] 0 + r block.debug 0 10000 + assert_equal [r slowlog len] 0 + r config resetstat + r block.debug 200 10000 + assert_equal [r slowlog len] 1 + + set cmdstatline [cmdrstat block.debug r] + + regexp "calls=1,usec=(.*?),usec_per_call=(.*?),rejected_calls=0,failed_calls=0" $cmdstatline usec usec_per_call + assert {$usec >= 100000} + assert {$usec_per_call >= 100000} + } + + test { blocked clients time tracking - check blocked command that uses RedisModule_BlockedClientMeasureTimeStart() is tracking background time even in timeout } { + r slowlog reset + r config set slowlog-log-slower-than 200000 + assert_equal [r slowlog len] 0 + r block.debug 0 20000 + assert_equal [r slowlog len] 0 + r config resetstat + r block.debug 20000 200 + assert_equal [r slowlog len] 1 + + set cmdstatline [cmdrstat block.debug r] + + regexp "calls=1,usec=(.*?),usec_per_call=(.*?),rejected_calls=0,failed_calls=0" $cmdstatline usec usec_per_call + assert {$usec >= 100000} + assert {$usec_per_call >= 100000} + } + + test { blocked clients time tracking - check blocked command with multiple calls RedisModule_BlockedClientMeasureTimeStart() is tracking the total background time } { + r slowlog reset + r config set slowlog-log-slower-than 200000 + assert_equal [r slowlog len] 0 + r block.double_debug 0 + assert_equal [r slowlog len] 0 + r config resetstat + r block.double_debug 100 + assert_equal [r slowlog len] 1 + + set cmdstatline [cmdrstat block.double_debug r] + + regexp "calls=1,usec=(.*?),usec_per_call=(.*?),rejected_calls=0,failed_calls=0" $cmdstatline usec usec_per_call + assert {$usec >= 60000} + assert {$usec_per_call >= 60000} + } + + test { blocked clients time tracking - check blocked command without calling RedisModule_BlockedClientMeasureTimeStart() is not reporting background time } { + r slowlog reset + r config set slowlog-log-slower-than 200000 + assert_equal [r slowlog len] 0 + r block.debug_no_track 200 1000 + # ensure slowlog is still empty + assert_equal [r slowlog len] 0 + } +} diff --git a/tests/unit/moduleapi/blockonkeys.tcl b/tests/unit/moduleapi/blockonkeys.tcl index 5e5d93da3..75191b3c7 100644 --- a/tests/unit/moduleapi/blockonkeys.tcl +++ b/tests/unit/moduleapi/blockonkeys.tcl @@ -168,7 +168,7 @@ start_server {tags {"modules"}} { assert_error "*unblocked*" {$rd read} } - test {Module client blocked on keys does not wake up on wrong type} { + test {Module client re-blocked on keys after woke up on wrong type} { r del k set rd [redis_deferring_client] $rd fsl.bpop k 0 @@ -184,5 +184,56 @@ start_server {tags {"modules"}} { r del k r fsl.push k 34 assert_equal {34} [$rd read] + assert_equal {1} [r get fsl_wrong_type] ;# first lpush caused one wrong-type wake-up + } + + test {Module client blocked on keys woken up by LPUSH} { + r del k + set rd [redis_deferring_client] + $rd blockonkeys.popall k + # wait until client is actually blocked + wait_for_condition 50 100 { + [s 0 blocked_clients] eq {1} + } else { + fail "Client is not blocked" + } + r lpush k 42 squirrel banana + assert_equal {banana squirrel 42} [$rd read] + $rd close + } + + test {Module client unblocks BLPOP} { + r del k + set rd [redis_deferring_client] + $rd blpop k 3 + # wait until client is actually blocked + wait_for_condition 50 100 { + [s 0 blocked_clients] eq {1} + } else { + fail "Client is not blocked" + } + r blockonkeys.lpush k 42 + assert_equal {k 42} [$rd read] + $rd close + } + + test {Module unblocks module blocked on non-empty list} { + r del k + r lpush k aa + # Module client blocks to pop 5 elements from list + set rd [redis_deferring_client] + $rd blockonkeys.blpopn k 5 + # Wait until client is actually blocked + wait_for_condition 50 100 { + [s 0 blocked_clients] eq {1} + } else { + fail "Client is not blocked" + } + # Check that RM_SignalKeyAsReady() can wake up BLPOPN + r blockonkeys.lpush_unblock k bb cc ;# Not enough elements for BLPOPN + r lpush k dd ee ff ;# Doesn't unblock module + r blockonkeys.lpush_unblock k gg ;# Unblocks module + assert_equal {gg ff ee dd cc} [$rd read] + $rd close } } diff --git a/tests/unit/moduleapi/stream.tcl b/tests/unit/moduleapi/stream.tcl new file mode 100644 index 000000000..15e97c183 --- /dev/null +++ b/tests/unit/moduleapi/stream.tcl @@ -0,0 +1,155 @@ +set testmodule [file normalize tests/modules/stream.so] + +start_server {tags {"modules"}} { + r module load $testmodule + + test {Module stream add and delete} { + r del mystream + # add to empty key + set streamid1 [r stream.add mystream item 1 value a] + # add to existing stream + set streamid2 [r stream.add mystream item 2 value b] + # check result + assert { [string match "*-*" $streamid1] } + set items [r XRANGE mystream - +] + assert_equal $items \ + "{$streamid1 {item 1 value a}} {$streamid2 {item 2 value b}}" + # delete one of them and try deleting non-existing ID + assert_equal OK [r stream.delete mystream $streamid1] + assert_error "ERR StreamDelete*" {r stream.delete mystream 123-456} + assert_error "Invalid stream ID*" {r stream.delete mystream foo} + assert_equal "{$streamid2 {item 2 value b}}" [r XRANGE mystream - +] + # check error condition: wrong type + r del mystream + r set mystream mystring + assert_error "ERR StreamAdd*" {r stream.add mystream item 1 value a} + assert_error "ERR StreamDelete*" {r stream.delete mystream 123-456} + } + + test {Module stream add unblocks blocking xread} { + r del mystream + + # Blocking XREAD on an empty key + set rd1 [redis_deferring_client] + $rd1 XREAD BLOCK 3000 STREAMS mystream $ + # wait until client is actually blocked + wait_for_condition 50 100 { + [s 0 blocked_clients] eq {1} + } else { + fail "Client is not blocked" + } + set id [r stream.add mystream field 1 value a] + assert_equal "{mystream {{$id {field 1 value a}}}}" [$rd1 read] + + # Blocking XREAD on an existing stream + set rd2 [redis_deferring_client] + $rd2 XREAD BLOCK 3000 STREAMS mystream $ + # wait until client is actually blocked + wait_for_condition 50 100 { + [s 0 blocked_clients] eq {1} + } else { + fail "Client is not blocked" + } + set id [r stream.add mystream field 2 value b] + assert_equal "{mystream {{$id {field 2 value b}}}}" [$rd2 read] + } + + test {Module stream add benchmark (1M stream add)} { + set n 1000000 + r del mystream + set result [r stream.addn mystream $n field value] + assert_equal $result $n + } + + test {Module stream iterator} { + r del mystream + set streamid1 [r xadd mystream * item 1 value a] + set streamid2 [r xadd mystream * item 2 value b] + # range result + set result1 [r stream.range mystream "-" "+"] + set expect1 [r xrange mystream "-" "+"] + assert_equal $result1 $expect1 + # reverse range + set result_rev [r stream.range mystream "+" "-"] + set expect_rev [r xrevrange mystream "+" "-"] + assert_equal $result_rev $expect_rev + + # only one item: range with startid = endid + set result2 [r stream.range mystream "-" $streamid1] + assert_equal $result2 "{$streamid1 {item 1 value a}}" + assert_equal $result2 [list [list $streamid1 {item 1 value a}]] + # only one item: range with startid = endid + set result3 [r stream.range mystream $streamid2 $streamid2] + assert_equal $result3 "{$streamid2 {item 2 value b}}" + assert_equal $result3 [list [list $streamid2 {item 2 value b}]] + } + + test {Module stream iterator delete} { + r del mystream + set id1 [r xadd mystream * normal item] + set id2 [r xadd mystream * selfdestruct yes] + set id3 [r xadd mystream * another item] + # stream.range deletes the "selfdestruct" item after returning it + assert_equal \ + "{$id1 {normal item}} {$id2 {selfdestruct yes}} {$id3 {another item}}" \ + [r stream.range mystream - +] + # now, the "selfdestruct" item is gone + assert_equal \ + "{$id1 {normal item}} {$id3 {another item}}" \ + [r stream.range mystream - +] + } + + test {Module stream trim by length} { + r del mystream + # exact maxlen + r xadd mystream * item 1 value a + r xadd mystream * item 2 value b + r xadd mystream * item 3 value c + assert_equal 3 [r xlen mystream] + assert_equal 0 [r stream.trim mystream maxlen = 5] + assert_equal 3 [r xlen mystream] + assert_equal 2 [r stream.trim mystream maxlen = 1] + assert_equal 1 [r xlen mystream] + assert_equal 1 [r stream.trim mystream maxlen = 0] + # check that there is no limit for exact maxlen + r stream.addn mystream 20000 item x value y + assert_equal 20000 [r stream.trim mystream maxlen = 0] + # approx maxlen (100 items per node implies default limit 10K items) + r stream.addn mystream 20000 item x value y + assert_equal 20000 [r xlen mystream] + assert_equal 10000 [r stream.trim mystream maxlen ~ 2] + assert_equal 9900 [r stream.trim mystream maxlen ~ 2] + assert_equal 0 [r stream.trim mystream maxlen ~ 2] + assert_equal 100 [r xlen mystream] + assert_equal 100 [r stream.trim mystream maxlen ~ 0] + assert_equal 0 [r xlen mystream] + } + + test {Module stream trim by ID} { + r del mystream + # exact minid + r xadd mystream * item 1 value a + r xadd mystream * item 2 value b + set minid [r xadd mystream * item 3 value c] + assert_equal 3 [r xlen mystream] + assert_equal 0 [r stream.trim mystream minid = -] + assert_equal 3 [r xlen mystream] + assert_equal 2 [r stream.trim mystream minid = $minid] + assert_equal 1 [r xlen mystream] + assert_equal 1 [r stream.trim mystream minid = +] + # check that there is no limit for exact minid + r stream.addn mystream 20000 item x value y + assert_equal 20000 [r stream.trim mystream minid = +] + # approx minid (100 items per node implies default limit 10K items) + r stream.addn mystream 19980 item x value y + set minid [r xadd mystream * item x value y] + r stream.addn mystream 19 item x value y + assert_equal 20000 [r xlen mystream] + assert_equal 10000 [r stream.trim mystream minid ~ $minid] + assert_equal 9900 [r stream.trim mystream minid ~ $minid] + assert_equal 0 [r stream.trim mystream minid ~ $minid] + assert_equal 100 [r xlen mystream] + assert_equal 100 [r stream.trim mystream minid ~ +] + assert_equal 0 [r xlen mystream] + } +} diff --git a/tests/unit/oom-score-adj.tcl b/tests/unit/oom-score-adj.tcl index cf671fe6a..efa61b759 100644 --- a/tests/unit/oom-score-adj.tcl +++ b/tests/unit/oom-score-adj.tcl @@ -39,7 +39,7 @@ if {$system_name eq {linux}} { r bgsave set child_pid [get_child_pid 0] - assert {[get_oom_score_adj $child_pid] == [expr $base + 30]} + assert_equal [get_oom_score_adj $child_pid] [expr $base + 30] } # Failed oom-score-adj tests can only run unprivileged diff --git a/tests/unit/other.tcl b/tests/unit/other.tcl index d98dc1bd4..a35ac1752 100644 --- a/tests/unit/other.tcl +++ b/tests/unit/other.tcl @@ -321,3 +321,47 @@ start_server {tags {"other"}} { assert_match "*table size: 8192*" [r debug HTSTATS 9] } } + +proc read_proc_title {pid} { + set fd [open "/proc/$pid/cmdline" "r"] + set cmdline [read $fd 1024] + close $fd + + return $cmdline +} + +start_server {tags {"other"}} { + test {Process title set as expected} { + # Test only on Linux where it's easy to get cmdline without relying on tools. + # Skip valgrind as it messes up the arguments. + set os [exec uname] + if {$os == "Linux" && !$::valgrind} { + # Set a custom template + r config set "proc-title-template" "TEST {title} {listen-addr} {port} {tls-port} {unixsocket} {config-file}" + set cmdline [read_proc_title [srv 0 pid]] + + assert_equal "TEST" [lindex $cmdline 0] + assert_match "*/redis-server" [lindex $cmdline 1] + + if {$::tls} { + set expect_port 0 + set expect_tls_port [srv 0 port] + } else { + set expect_port [srv 0 port] + set expect_tls_port 0 + } + set port [srv 0 port] + + assert_equal "$::host:$port" [lindex $cmdline 2] + assert_equal $expect_port [lindex $cmdline 3] + assert_equal $expect_tls_port [lindex $cmdline 4] + assert_match "*/tests/tmp/server.*/socket" [lindex $cmdline 5] + assert_match "*/tests/tmp/redis.conf.*" [lindex $cmdline 6] + + # Try setting a bad template + catch {r config set "proc-title-template" "{invalid-var}"} err + assert_match {*template format is invalid*} $err + } + } +} + diff --git a/tests/unit/pause.tcl b/tests/unit/pause.tcl index 9f5cfd607..67b684d36 100644 --- a/tests/unit/pause.tcl +++ b/tests/unit/pause.tcl @@ -1,4 +1,4 @@ -start_server {tags {"pause"}} { +start_server {tags {"pause network"}} { test "Test read commands are not blocked by client pause" { r client PAUSE 100000000 WRITE set rd [redis_deferring_client] diff --git a/tests/unit/protocol.tcl b/tests/unit/protocol.tcl index 4dfdc6f59..442c23de6 100644 --- a/tests/unit/protocol.tcl +++ b/tests/unit/protocol.tcl @@ -1,4 +1,4 @@ -start_server {tags {"protocol"}} { +start_server {tags {"protocol network"}} { test "Handle an empty query" { reconnect r write "\r\n" diff --git a/tests/unit/pubsub.tcl b/tests/unit/pubsub.tcl index 9c7a43bf0..966565ae1 100644 --- a/tests/unit/pubsub.tcl +++ b/tests/unit/pubsub.tcl @@ -1,4 +1,4 @@ -start_server {tags {"pubsub"}} { +start_server {tags {"pubsub network"}} { proc __consume_subscribe_messages {client type channels} { set numsub -1 set counts {} diff --git a/tests/unit/scan.tcl b/tests/unit/scan.tcl index 9f9ff4df2..3981a2234 100644 --- a/tests/unit/scan.tcl +++ b/tests/unit/scan.tcl @@ -1,4 +1,4 @@ -start_server {tags {"scan"}} { +start_server {tags {"scan network"}} { test "SCAN basic" { r flushdb r debug populate 1000 diff --git a/tests/unit/scripting.tcl b/tests/unit/scripting.tcl index 6fd152594..3aa3c0fba 100644 --- a/tests/unit/scripting.tcl +++ b/tests/unit/scripting.tcl @@ -330,6 +330,15 @@ start_server {tags {"scripting"}} { set e } {NOSCRIPT*} + test {SCRIPTING FLUSH ASYNC} { + for {set j 0} {$j < 100} {incr j} { + r script load "return $j" + } + assert { [string match "*number_of_cached_scripts:100*" [r info Memory]] } + r script flush async + assert { [string match "*number_of_cached_scripts:0*" [r info Memory]] } + } + test {SCRIPT EXISTS - can detect already defined scripts?} { r eval "return 1+1" 0 r script exists a27e7e8a43702b7046d4f6a7ccf5b60cef6b9bd9 a27e7e8a43702b7046d4f6a7ccf5b60cef6b9bda diff --git a/tests/unit/tracking.tcl b/tests/unit/tracking.tcl index 88cf9dc42..7aaca47ca 100644 --- a/tests/unit/tracking.tcl +++ b/tests/unit/tracking.tcl @@ -1,4 +1,4 @@ -start_server {tags {"tracking"}} { +start_server {tags {"tracking network"}} { # Create a deferred client we'll use to redirect invalidation # messages to. set rd_redirection [redis_deferring_client] diff --git a/tests/unit/type/hash.tcl b/tests/unit/type/hash.tcl index 79e58301a..2f3ea37c2 100644 --- a/tests/unit/type/hash.tcl +++ b/tests/unit/type/hash.tcl @@ -18,6 +18,181 @@ start_server {tags {"hash"}} { assert_encoding ziplist smallhash } + proc create_hash {key entries} { + r del $key + foreach entry $entries { + r hset $key [lindex $entry 0] [lindex $entry 1] + } + } + + proc get_keys {l} { + set res {} + foreach entry $l { + set key [lindex $entry 0] + lappend res $key + } + return $res + } + + foreach {type contents} "ziplist {{a 1} {b 2} {c 3}} hashtable {{a 1} {b 2} {[randstring 70 90 alpha] 3}}" { + set original_max_value [lindex [r config get hash-max-ziplist-value] 1] + r config set hash-max-ziplist-value 10 + create_hash myhash $contents + assert_encoding $type myhash + + test "HRANDFIELD - $type" { + unset -nocomplain myhash + array set myhash {} + for {set i 0} {$i < 100} {incr i} { + set key [r hrandfield myhash] + set myhash($key) 1 + } + assert_equal [lsort [get_keys $contents]] [lsort [array names myhash]] + } + r config set hash-max-ziplist-value $original_max_value + } + + test "HRANDFIELD with RESP3" { + r hello 3 + set res [r hrandfield myhash 3 withvalues] + assert_equal [llength $res] 3 + assert_equal [llength [lindex $res 1]] 2 + + set res [r hrandfield myhash 3] + assert_equal [llength $res] 3 + assert_equal [llength [lindex $res 1]] 1 + } + r hello 2 + + test "HRANDFIELD count of 0 is handled correctly" { + r hrandfield myhash 0 + } {} + + test "HRANDFIELD with <count> against non existing key" { + r hrandfield nonexisting_key 100 + } {} + + foreach {type contents} " + hashtable {{a 1} {b 2} {c 3} {d 4} {e 5} {6 f} {7 g} {8 h} {9 i} {[randstring 70 90 alpha] 10}} + ziplist {{a 1} {b 2} {c 3} {d 4} {e 5} {6 f} {7 g} {8 h} {9 i} {10 j}} " { + test "HRANDFIELD with <count> - $type" { + set original_max_value [lindex [r config get hash-max-ziplist-value] 1] + r config set hash-max-ziplist-value 10 + create_hash myhash $contents + assert_encoding $type myhash + + # create a dict for easy lookup + unset -nocomplain mydict + foreach {k v} [r hgetall myhash] { + dict append mydict $k $v + } + + # We'll stress different parts of the code, see the implementation + # of HRANDFIELD for more information, but basically there are + # four different code paths. + + # PATH 1: Use negative count. + + # 1) Check that it returns repeated elements with and without values. + set res [r hrandfield myhash -20] + assert_equal [llength $res] 20 + # again with WITHVALUES + set res [r hrandfield myhash -20 withvalues] + assert_equal [llength $res] 40 + + # 2) Check that all the elements actually belong to the original hash. + foreach {key val} $res { + assert {[dict exists $mydict $key]} + } + + # 3) Check that eventually all the elements are returned. + # Use both WITHVALUES and without + unset -nocomplain auxset + set iterations 1000 + while {$iterations != 0} { + incr iterations -1 + if {[expr {$iterations % 2}] == 0} { + set res [r hrandfield myhash -3 withvalues] + foreach {key val} $res { + dict append auxset $key $val + } + } else { + set res [r hrandfield myhash -3] + foreach key $res { + dict append auxset $key $val + } + } + if {[lsort [dict keys $mydict]] eq + [lsort [dict keys $auxset]]} { + break; + } + } + assert {$iterations != 0} + + # PATH 2: positive count (unique behavior) with requested size + # equal or greater than set size. + foreach size {10 20} { + set res [r hrandfield myhash $size] + assert_equal [llength $res] 10 + assert_equal [lsort $res] [lsort [dict keys $mydict]] + + # again with WITHVALUES + set res [r hrandfield myhash $size withvalues] + assert_equal [llength $res] 20 + assert_equal [lsort $res] [lsort $mydict] + } + + # PATH 3: Ask almost as elements as there are in the set. + # In this case the implementation will duplicate the original + # set and will remove random elements up to the requested size. + # + # PATH 4: Ask a number of elements definitely smaller than + # the set size. + # + # We can test both the code paths just changing the size but + # using the same code. + foreach size {8 2} { + set res [r hrandfield myhash $size] + assert_equal [llength $res] $size + # again with WITHVALUES + set res [r hrandfield myhash $size withvalues] + assert_equal [llength $res] [expr {$size * 2}] + + # 1) Check that all the elements actually belong to the + # original set. + foreach ele [dict keys $res] { + assert {[dict exists $mydict $ele]} + } + + # 2) Check that eventually all the elements are returned. + # Use both WITHVALUES and without + unset -nocomplain auxset + set iterations 1000 + while {$iterations != 0} { + incr iterations -1 + if {[expr {$iterations % 2}] == 0} { + set res [r hrandfield myhash $size withvalues] + foreach {key value} $res { + dict append auxset $key $value + } + } else { + set res [r hrandfield myhash $size] + foreach key $res { + dict append auxset $key + } + } + if {[lsort [dict keys $mydict]] eq + [lsort [dict keys $auxset]]} { + break; + } + } + assert {$iterations != 0} + } + } + r config set hash-max-ziplist-value $original_max_value + } + + test {HSET/HLEN - Big hash creation} { array set bighash {} for {set i 0} {$i < 1024} {incr i} { diff --git a/tests/unit/type/set.tcl b/tests/unit/type/set.tcl index 84c31c4e4..091ef7f0f 100644 --- a/tests/unit/type/set.tcl +++ b/tests/unit/type/set.tcl @@ -501,7 +501,7 @@ start_server { set iterations 1000 while {$iterations != 0} { incr iterations -1 - set res [r srandmember myset -10] + set res [r srandmember myset $size] foreach ele $res { set auxset($ele) 1 } diff --git a/tests/unit/type/string.tcl b/tests/unit/type/string.tcl index 16e961623..43968b26b 100644 --- a/tests/unit/type/string.tcl +++ b/tests/unit/type/string.tcl @@ -102,6 +102,91 @@ start_server {tags {"string"}} { assert_equal 20 [r get x] } + test "GETEX EX option" { + r del foo + r set foo bar + r getex foo ex 10 + assert_range [r ttl foo] 5 10 + } + + test "GETEX PX option" { + r del foo + r set foo bar + r getex foo px 10000 + assert_range [r pttl foo] 5000 10000 + } + + test "GETEX EXAT option" { + r del foo + r set foo bar + r getex foo exat [expr [clock seconds] + 10] + assert_range [r ttl foo] 5 10 + } + + test "GETEX PXAT option" { + r del foo + r set foo bar + r getex foo pxat [expr [clock milliseconds] + 10000] + assert_range [r pttl foo] 5000 10000 + } + + test "GETEX PERSIST option" { + r del foo + r set foo bar ex 10 + assert_range [r ttl foo] 5 10 + r getex foo persist + assert_equal -1 [r ttl foo] + } + + test "GETEX no option" { + r del foo + r set foo bar + r getex foo + assert_equal bar [r getex foo] + } + + test "GETEX syntax errors" { + set ex {} + catch {r getex foo non-existent-option} ex + set ex + } {*syntax*} + + test "GETEX no arguments" { + set ex {} + catch {r getex} ex + set ex + } {*wrong number of arguments*} + + test "GETDEL command" { + r del foo + r set foo bar + assert_equal bar [r getdel foo ] + assert_equal {} [r getdel foo ] + } + + test {GETDEL propagate as DEL command to replica} { + set repl [attach_to_replication_stream] + r set foo bar + r getdel foo + assert_replication_stream $repl { + {select *} + {set foo bar} + {del foo} + } + } + + test {GETEX without argument does not propagate to replica} { + set repl [attach_to_replication_stream] + r set foo bar + r getex foo + r del foo + assert_replication_stream $repl { + {select *} + {set foo bar} + {del foo} + } + } + test {MGET} { r flushdb r set foo BAR @@ -437,6 +522,17 @@ start_server {tags {"string"}} { assert {$ttl <= 10 && $ttl > 5} } + test "Extended SET EXAT option" { + r del foo + r set foo bar exat [expr [clock seconds] + 10] + assert_range [r ttl foo] 5 10 + } + + test "Extended SET PXAT option" { + r del foo + r set foo bar pxat [expr [clock milliseconds] + 10000] + assert_range [r ttl foo] 5 10 + } test {Extended SET using multiple options at once} { r set foo val assert {[r set foo bar xx px 10000] eq {OK}} diff --git a/tests/unit/type/zset.tcl b/tests/unit/type/zset.tcl index 8318ebb63..c657c1e4e 100644 --- a/tests/unit/type/zset.tcl +++ b/tests/unit/type/zset.tcl @@ -7,6 +7,8 @@ start_server {tags {"zset"}} { } proc basics {encoding} { + set original_max_entries [lindex [r config get zset-max-ziplist-entries] 1] + set original_max_value [lindex [r config get zset-max-ziplist-value] 1] if {$encoding == "ziplist"} { r config set zset-max-ziplist-entries 128 r config set zset-max-ziplist-value 64 @@ -713,6 +715,12 @@ start_server {tags {"zset"}} { assert_equal {b 3 c 5} [r zinter 2 zseta zsetb withscores] } + test "ZINTER RESP3 - $encoding" { + r hello 3 + assert_equal {{b 3.0} {c 5.0}} [r zinter 2 zseta zsetb withscores] + } + r hello 2 + test "ZINTERSTORE with weights - $encoding" { assert_equal 2 [r zinterstore zsetc 2 zseta zsetb weights 2 3] assert_equal {b 7 c 12} [r zrange zsetc 0 -1 withscores] @@ -919,6 +927,9 @@ start_server {tags {"zset"}} { assert_equal 0 [r zcard z1] assert_equal 1 [r zcard z2] } + + r config set zset-max-ziplist-entries $original_max_entries + r config set zset-max-ziplist-value $original_max_value } basics ziplist @@ -1016,6 +1027,8 @@ start_server {tags {"zset"}} { } proc stressers {encoding} { + set original_max_entries [lindex [r config get zset-max-ziplist-entries] 1] + set original_max_value [lindex [r config get zset-max-ziplist-value] 1] if {$encoding == "ziplist"} { # Little extra to allow proper fuzzing in the sorting stresser r config set zset-max-ziplist-entries 256 @@ -1440,6 +1453,8 @@ start_server {tags {"zset"}} { r zadd zset 0 foo assert_equal {zset foo 0} [$rd read] } + r config set zset-max-ziplist-entries $original_max_entries + r config set zset-max-ziplist-value $original_max_value } tags {"slow"} { @@ -1481,6 +1496,12 @@ start_server {tags {"zset"}} { r zrange z2 0 -1 withscores } {a 1 b 2 c 3 d 4} + test {ZRANGESTORE RESP3} { + r hello 3 + r zrange z2 0 -1 withscores + } {{a 1.0} {b 2.0} {c 3.0} {d 4.0}} + r hello 2 + test {ZRANGESTORE range} { set res [r zrangestore z2 z1 1 2] assert_equal $res 2 @@ -1554,4 +1575,171 @@ start_server {tags {"zset"}} { catch {r zrangebyscore z1 0 -1 REV} err assert_match "*syntax*" $err } + + proc get_keys {l} { + set res {} + foreach {score key} $l { + lappend res $key + } + return $res + } + + foreach {type contents} "ziplist {1 a 2 b 3 c} skiplist {1 a 2 b 3 [randstring 70 90 alpha]}" { + set original_max_value [lindex [r config get zset-max-ziplist-value] 1] + r config set zset-max-ziplist-value 10 + create_zset myzset $contents + assert_encoding $type myzset + + test "ZRANDMEMBER - $type" { + unset -nocomplain myzset + array set myzset {} + for {set i 0} {$i < 100} {incr i} { + set key [r zrandmember myzset] + set myzset($key) 1 + } + assert_equal [lsort [get_keys $contents]] [lsort [array names myzset]] + } + r config set zset-max-ziplist-value $original_max_value + } + + test "ZRANDMEMBER with RESP3" { + r hello 3 + set res [r zrandmember myzset 3 withscores] + assert_equal [llength $res] 3 + assert_equal [llength [lindex $res 1]] 2 + + set res [r zrandmember myzset 3] + assert_equal [llength $res] 3 + assert_equal [llength [lindex $res 1]] 1 + } + r hello 2 + + test "ZRANDMEMBER count of 0 is handled correctly" { + r zrandmember myzset 0 + } {} + + test "ZRANDMEMBER with <count> against non existing key" { + r zrandmember nonexisting_key 100 + } {} + + foreach {type contents} " + skiplist {1 a 2 b 3 c 4 d 5 e 6 f 7 g 7 h 9 i 10 [randstring 70 90 alpha]} + ziplist {1 a 2 b 3 c 4 d 5 e 6 f 7 g 7 h 9 i 10 j} " { + test "ZRANDMEMBER with <count> - $type" { + set original_max_value [lindex [r config get zset-max-ziplist-value] 1] + r config set zset-max-ziplist-value 10 + create_zset myzset $contents + assert_encoding $type myzset + + # create a dict for easy lookup + unset -nocomplain mydict + foreach {k v} [r zrange myzset 0 -1 withscores] { + dict append mydict $k $v + } + + # We'll stress different parts of the code, see the implementation + # of ZRANDMEMBER for more information, but basically there are + # four different code paths. + + # PATH 1: Use negative count. + + # 1) Check that it returns repeated elements with and without values. + set res [r zrandmember myzset -20] + assert_equal [llength $res] 20 + # again with WITHSCORES + set res [r zrandmember myzset -20 withscores] + assert_equal [llength $res] 40 + + # 2) Check that all the elements actually belong to the original zset. + foreach {key val} $res { + assert {[dict exists $mydict $key]} + } + + # 3) Check that eventually all the elements are returned. + # Use both WITHSCORES and without + unset -nocomplain auxset + set iterations 1000 + while {$iterations != 0} { + incr iterations -1 + if {[expr {$iterations % 2}] == 0} { + set res [r zrandmember myzset -3 withscores] + foreach {key val} $res { + dict append auxset $key $val + } + } else { + set res [r zrandmember myzset -3] + foreach key $res { + dict append auxset $key $val + } + } + if {[lsort [dict keys $mydict]] eq + [lsort [dict keys $auxset]]} { + break; + } + } + assert {$iterations != 0} + + # PATH 2: positive count (unique behavior) with requested size + # equal or greater than set size. + foreach size {10 20} { + set res [r zrandmember myzset $size] + assert_equal [llength $res] 10 + assert_equal [lsort $res] [lsort [dict keys $mydict]] + + # again with WITHSCORES + set res [r zrandmember myzset $size withscores] + assert_equal [llength $res] 20 + assert_equal [lsort $res] [lsort $mydict] + } + + # PATH 3: Ask almost as elements as there are in the set. + # In this case the implementation will duplicate the original + # set and will remove random elements up to the requested size. + # + # PATH 4: Ask a number of elements definitely smaller than + # the set size. + # + # We can test both the code paths just changing the size but + # using the same code. + foreach size {8 2} { + set res [r zrandmember myzset $size] + assert_equal [llength $res] $size + # again with WITHSCORES + set res [r zrandmember myzset $size withscores] + assert_equal [llength $res] [expr {$size * 2}] + + # 1) Check that all the elements actually belong to the + # original set. + foreach ele [dict keys $res] { + assert {[dict exists $mydict $ele]} + } + + # 2) Check that eventually all the elements are returned. + # Use both WITHSCORES and without + unset -nocomplain auxset + set iterations 1000 + while {$iterations != 0} { + incr iterations -1 + if {[expr {$iterations % 2}] == 0} { + set res [r zrandmember myzset $size withscores] + foreach {key value} $res { + dict append auxset $key $value + } + } else { + set res [r zrandmember myzset $size] + foreach key $res { + dict append auxset $key + } + } + if {[lsort [dict keys $mydict]] eq + [lsort [dict keys $auxset]]} { + break; + } + } + assert {$iterations != 0} + } + } + r config set zset-max-ziplist-value $original_max_value + } + } diff --git a/tests/unit/wait.tcl b/tests/unit/wait.tcl index 0a4965c20..78c3d8202 100644 --- a/tests/unit/wait.tcl +++ b/tests/unit/wait.tcl @@ -1,6 +1,6 @@ source tests/support/cli.tcl -start_server {tags {"wait"}} { +start_server {tags {"wait network"}} { start_server {} { set slave [srv 0 client] set slave_host [srv 0 host] |