summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--.gitignore1
-rw-r--r--CONTRIBUTING4
-rw-r--r--README.md (renamed from README)75
-rw-r--r--deps/Makefile2
-rw-r--r--deps/lua/src/lua_cmsgpack.c53
-rw-r--r--redis.conf105
-rw-r--r--src/Makefile28
-rw-r--r--src/anet.c29
-rw-r--r--src/anet.h3
-rw-r--r--src/aof.c44
-rw-r--r--src/cluster.c181
-rw-r--r--src/cluster.h10
-rw-r--r--src/config.c117
-rw-r--r--src/config.h11
-rw-r--r--src/crc64.c8
-rw-r--r--src/crc64.h4
-rw-r--r--src/db.c18
-rw-r--r--src/debug.c66
-rw-r--r--src/dict.c132
-rw-r--r--src/dict.h2
-rw-r--r--src/endianconv.c8
-rw-r--r--src/endianconv.h4
-rw-r--r--src/help.h4
-rw-r--r--src/intset.c51
-rw-r--r--src/intset.h4
-rw-r--r--src/latency.c12
-rw-r--r--src/latency.h4
-rw-r--r--src/lzfP.h56
-rw-r--r--src/lzf_c.c42
-rw-r--r--src/lzf_d.c59
-rw-r--r--src/networking.c37
-rw-r--r--src/object.c16
-rw-r--r--src/quicklist.c2650
-rw-r--r--src/quicklist.h169
-rw-r--r--src/rdb.c394
-rw-r--r--src/rdb.h14
-rw-r--r--src/redis-benchmark.c16
-rw-r--r--src/redis-check-rdb.c (renamed from src/redis-check-dump.c)253
-rw-r--r--src/redis-cli.c308
-rwxr-xr-xsrc/redis-trib.rb88
-rw-r--r--src/redis.c280
-rw-r--r--src/redis.h65
-rw-r--r--src/replication.c31
-rw-r--r--src/scripting.c38
-rw-r--r--src/sds.c15
-rw-r--r--src/sds.h4
-rw-r--r--src/sentinel.c50
-rw-r--r--src/sha1.c13
-rw-r--r--src/sha1.h7
-rw-r--r--src/solarisfixes.h4
-rw-r--r--src/sort.c52
-rw-r--r--src/t_list.c360
-rw-r--r--src/t_set.c196
-rw-r--r--src/t_string.c20
-rw-r--r--src/t_zset.c2
-rw-r--r--src/util.c125
-rw-r--r--src/util.h4
-rw-r--r--src/version.h2
-rw-r--r--src/ziplist.c312
-rw-r--r--src/ziplist.h7
-rw-r--r--src/zipmap.c10
-rw-r--r--src/zipmap.h4
-rw-r--r--src/zmalloc.c57
-rw-r--r--src/zmalloc.h1
-rw-r--r--tests/cluster/run.tcl1
-rw-r--r--tests/cluster/tests/04-resharding.tcl13
-rw-r--r--tests/cluster/tests/includes/init-tests.tcl9
-rw-r--r--tests/instances.tcl55
-rw-r--r--tests/integration/aof.tcl24
-rw-r--r--tests/integration/logging.tcl24
-rw-r--r--tests/integration/rdb.tcl2
-rw-r--r--tests/integration/replication-4.tcl19
-rw-r--r--tests/support/cluster.tcl2
-rw-r--r--tests/support/server.tcl2
-rw-r--r--tests/support/test.tcl7
-rw-r--r--tests/test_helper.tcl1
-rw-r--r--tests/unit/aofrw.tcl4
-rw-r--r--tests/unit/basic.tcl13
-rw-r--r--tests/unit/dump.tcl4
-rw-r--r--tests/unit/memefficiency.tcl7
-rw-r--r--tests/unit/scripting.tcl2
-rw-r--r--tests/unit/sort.tcl13
-rw-r--r--tests/unit/type/list-2.tcl9
-rw-r--r--tests/unit/type/list-3.tcl3
-rw-r--r--tests/unit/type/list.tcl169
-rw-r--r--tests/unit/type/set.tcl69
-rw-r--r--utils/cluster_fail_time.tcl50
-rw-r--r--utils/create-cluster/.gitignore1
-rw-r--r--utils/create-cluster/README27
-rwxr-xr-xutils/create-cluster/create-cluster95
-rw-r--r--utils/hashtable/README13
-rw-r--r--utils/hashtable/rehashing.c132
92 files changed, 6182 insertions, 1329 deletions
diff --git a/.gitignore b/.gitignore
index d3b1c2f24..3d346fbcf 100644
--- a/.gitignore
+++ b/.gitignore
@@ -4,6 +4,7 @@
dump.rdb
redis-benchmark
redis-check-aof
+redis-check-rdb
redis-check-dump
redis-cli
redis-sentinel
diff --git a/CONTRIBUTING b/CONTRIBUTING
index f7b6836f7..b416b9561 100644
--- a/CONTRIBUTING
+++ b/CONTRIBUTING
@@ -20,7 +20,7 @@ each source file that you contribute.
# How to provide a patch for a new feature
-1. Drop a message to the Redis Google Group with a proposal of semantics/API.
+1. If it is a major feature or a semantical change, write an RCP (Redis Change Proposal). Check the documentation here: https://github.com/redis/redis-rcp
2. If in step 1 you get an acknowledge from the project leaders, use the
following procedure to submit a patch:
@@ -31,4 +31,6 @@ each source file that you contribute.
d. Initiate a pull request on github ( http://help.github.com/send-pull-requests/ )
e. Done :)
+For minor fixes just open a pull request on Github.
+
Thanks!
diff --git a/README b/README.md
index b7a12b828..0565418f6 100644
--- a/README
+++ b/README.md
@@ -1,18 +1,35 @@
-Where to find complete Redis documentation?
--------------------------------------------
+This README is just a fast *quick start* document. You can find more detailed documentation at http://redis.io.
-This README is just a fast "quick start" document. You can find more detailed
-documentation at http://redis.io
+What is Redis?
+--------------
+
+Redis is often referred as a *data structures* server. What this means is that Redis provides access to mutable data structures via a set of commands, which are send using a *server-client* model with TCP sockets and a simple protocol. So different processes can query and modify the same data structures in a shared way.
+
+Data structures implemented into Redis have a few special properties:
+
+* Redis cares to store them on disk, even if they are always served and modified into the server memory. This means that Redis is fast, but that is also non-volatile.
+* Implementation of data structures stress on memory efficiency, so data structures inside Redis will likely use less memory compared to the same data structure modeled using an high level programming language.
+* Redis offers a number of features that are natural to find into a database, like replication, tunable levels of durability, cluster, high availability.
+
+Another good example is to think at Redis as a more complex version of memcached, where the opeations are not just SETs and GETs, but operations to work with complex data types like Lists, Sets, ordered data structures, and so forth.
+
+If you want to know more, this is a list of selected starting points:
+
+* Introduction to Redis data types. http://redis.io/topics/data-types-intro
+* Try Redis directly inside your browser. http://try.redis.io
+* The full list of Redis commands. http://redis.io/commands
+* There is much more inside the Redis official documentation. http://redis.io/documentation
Building Redis
--------------
Redis can be compiled and used on Linux, OSX, OpenBSD, NetBSD, FreeBSD.
-We support big endian and little endian architectures.
+We support big endian and little endian architectures, and both 32 bit
+and 64 bit systems.
It may compile on Solaris derived systems (for instance SmartOS) but our
-support for this platform is "best effort" and Redis is not guaranteed to
-work as well as in Linux, OSX, and *BSD there.
+support for this platform is *best effort* and Redis is not guaranteed to
+work as well as in Linux, OSX, and \*BSD there.
It is as simple as:
@@ -26,20 +43,39 @@ After building Redis is a good idea to test it, using:
% make test
+Fixing build problems with dependencies or cached build options
+---------
+
+Redis has some dependencies which are included into the `deps` directory.
+`make` does not rebuild dependencies automatically, even if something in the
+source code of dependencies is changes.
+
+When you update the source code with `git pull` or when code inside the
+dependencies tree is modified in any other way, make sure to use the following
+command in order to really clean everything and rebuild from scratch:
+
+ make distclean
+
+This will clean: jemalloc, lua, hiredis, linenoise.
+
+Also if you force certain build options like 32bit target, no C compiler
+optimizations (for debugging purposes), and other similar build time options,
+those options are cached indefinitely until you issue a `make distclean`
+command.
+
Fixing problems building 32 bit binaries
---------
If after building Redis with a 32 bit target you need to rebuild it
with a 64 bit target, or the other way around, you need to perform a
-"make distclean" in the root directory of the Redis distribution.
+`make distclean` in the root directory of the Redis distribution.
In case of build errors when trying to build a 32 bit binary of Redis, try
the following steps:
* Install the packages libc6-dev-i386 (also try g++-multilib).
-* Try using the following command line instead of "make 32bit":
-
- make CFLAGS="-m32 -march=native" LDFLAGS="-m32"
+* Try using the following command line instead of `make 32bit`:
+ `make CFLAGS="-m32 -march=native" LDFLAGS="-m32"`
Allocator
---------
@@ -107,11 +143,9 @@ then in another terminal try the following:
(integer) 1
redis> incr mycounter
(integer) 2
- redis>
+ redis>
-You can find the list of all the available commands here:
-
- http://redis.io/commands
+You can find the list of all the available commands at http://redis.io/commands.
Installing Redis
-----------------
@@ -120,7 +154,7 @@ In order to install Redis binaries into /usr/local/bin just use:
% make install
-You can use "make PREFIX=/some/other/directory install" if you wish to use a
+You can use `make PREFIX=/some/other/directory install` if you wish to use a
different destination.
Make install will just install binaries in your system, but will not configure
@@ -137,7 +171,7 @@ to run Redis properly as a background daemon that will start again on
system reboots.
You'll be able to stop and start Redis using the script named
-/etc/init.d/redis_<portnumber>, for instance /etc/init.d/redis_6379.
+`/etc/init.d/redis_<portnumber>`, for instance `/etc/init.d/redis_6379`.
Code contributions
---
@@ -145,10 +179,13 @@ Code contributions
Note: by contributing code to the Redis project in any form, including sending
a pull request via Github, a code fragment or patch via private email or
public discussion groups, you agree to release your code under the terms
-of the BSD license that you can find in the COPYING file included in the Redis
+of the BSD license that you can find in the [COPYING][1] file included in the Redis
source distribution.
-Please see the CONTRIBUTING file in this source distribution for more
+Please see the [CONTRIBUTING][2] file in this source distribution for more
information.
Enjoy!
+
+[1]: https://github.com/antirez/redis/blob/unstable/COPYING
+[2]: https://github.com/antirez/redis/blob/unstable/CONTRIBUTING
diff --git a/deps/Makefile b/deps/Makefile
index 1f623ea7b..71f6d3a2c 100644
--- a/deps/Makefile
+++ b/deps/Makefile
@@ -58,7 +58,7 @@ ifeq ($(uname_S),SunOS)
LUA_CFLAGS= -D__C99FEATURES__=1
endif
-LUA_CFLAGS+= -O2 -Wall -DLUA_ANSI -DENABLE_CJSON_GLOBAL $(CFLAGS)
+LUA_CFLAGS+= -O2 -Wall -DLUA_ANSI -DENABLE_CJSON_GLOBAL -DREDIS_STATIC='' $(CFLAGS)
LUA_LDFLAGS+= $(LDFLAGS)
# lua's Makefile defines AR="ar rcu", which is unusual, and makes it more
# challenging to cross-compile lua (and redis). These defines make it easier
diff --git a/deps/lua/src/lua_cmsgpack.c b/deps/lua/src/lua_cmsgpack.c
index 4ccf07f6d..e13f053d2 100644
--- a/deps/lua/src/lua_cmsgpack.c
+++ b/deps/lua/src/lua_cmsgpack.c
@@ -31,12 +31,10 @@
#define BITS_32 0
#endif
-#if LUA_VERSION_NUM < 503
- #if BITS_32
- #define lua_pushunsigned(L, n) lua_pushnumber(L, n)
- #else
- #define lua_pushunsigned(L, n) lua_pushinteger(L, n)
- #endif
+#if BITS_32
+ #define lua_pushunsigned(L, n) lua_pushnumber(L, n)
+#else
+ #define lua_pushunsigned(L, n) lua_pushinteger(L, n)
#endif
/* =============================================================================
@@ -256,7 +254,7 @@ static void mp_encode_int(mp_buf *buf, int64_t n) {
}
} else {
if (n >= -32) {
- b[0] = ((char)n); /* negative fixnum */
+ b[0] = ((signed char)n); /* negative fixnum */
enclen = 1;
} else if (n >= -128) {
b[0] = 0xd0; /* int 8 */
@@ -544,6 +542,7 @@ static int mp_pack(lua_State *L) {
void mp_decode_to_lua_type(lua_State *L, mp_cur *c);
void mp_decode_to_lua_array(lua_State *L, mp_cur *c, size_t len) {
+ assert(len <= UINT_MAX);
int index = 1;
lua_newtable(L);
@@ -556,6 +555,7 @@ void mp_decode_to_lua_array(lua_State *L, mp_cur *c, size_t len) {
}
void mp_decode_to_lua_hash(lua_State *L, mp_cur *c, size_t len) {
+ assert(len <= UINT_MAX);
lua_newtable(L);
while(len--) {
mp_decode_to_lua_type(L,c); /* key */
@@ -588,7 +588,7 @@ void mp_decode_to_lua_type(lua_State *L, mp_cur *c) {
break;
case 0xd0: /* int 8 */
mp_cur_need(c,2);
- lua_pushinteger(L,(char)c->p[1]);
+ lua_pushinteger(L,(signed char)c->p[1]);
mp_cur_consume(c,2);
break;
case 0xcd: /* uint 16 */
@@ -699,13 +699,14 @@ void mp_decode_to_lua_type(lua_State *L, mp_cur *c) {
case 0xdb: /* raw 32 */
mp_cur_need(c,5);
{
- size_t l = (c->p[1] << 24) |
- (c->p[2] << 16) |
- (c->p[3] << 8) |
- c->p[4];
- mp_cur_need(c,5+l);
- lua_pushlstring(L,(char*)c->p+5,l);
- mp_cur_consume(c,5+l);
+ size_t l = ((size_t)c->p[1] << 24) |
+ ((size_t)c->p[2] << 16) |
+ ((size_t)c->p[3] << 8) |
+ (size_t)c->p[4];
+ mp_cur_consume(c,5);
+ mp_cur_need(c,l);
+ lua_pushlstring(L,(char*)c->p,l);
+ mp_cur_consume(c,l);
}
break;
case 0xdc: /* array 16 */
@@ -719,10 +720,10 @@ void mp_decode_to_lua_type(lua_State *L, mp_cur *c) {
case 0xdd: /* array 32 */
mp_cur_need(c,5);
{
- size_t l = (c->p[1] << 24) |
- (c->p[2] << 16) |
- (c->p[3] << 8) |
- c->p[4];
+ size_t l = ((size_t)c->p[1] << 24) |
+ ((size_t)c->p[2] << 16) |
+ ((size_t)c->p[3] << 8) |
+ (size_t)c->p[4];
mp_cur_consume(c,5);
mp_decode_to_lua_array(L,c,l);
}
@@ -738,10 +739,10 @@ void mp_decode_to_lua_type(lua_State *L, mp_cur *c) {
case 0xdf: /* map 32 */
mp_cur_need(c,5);
{
- size_t l = (c->p[1] << 24) |
- (c->p[2] << 16) |
- (c->p[3] << 8) |
- c->p[4];
+ size_t l = ((size_t)c->p[1] << 24) |
+ ((size_t)c->p[2] << 16) |
+ ((size_t)c->p[3] << 8) |
+ (size_t)c->p[4];
mp_cur_consume(c,5);
mp_decode_to_lua_hash(L,c,l);
}
@@ -830,15 +831,15 @@ static int mp_unpack(lua_State *L) {
}
static int mp_unpack_one(lua_State *L) {
- int offset = luaL_optint(L, 2, 0);
+ int offset = luaL_optinteger(L, 2, 0);
/* Variable pop because offset may not exist */
lua_pop(L, lua_gettop(L)-1);
return mp_unpack_full(L, 1, offset);
}
static int mp_unpack_limit(lua_State *L) {
- int limit = luaL_checkint(L, 2);
- int offset = luaL_optint(L, 3, 0);
+ int limit = luaL_checkinteger(L, 2);
+ int offset = luaL_optinteger(L, 3, 0);
/* Variable pop because offset may not exist */
lua_pop(L, lua_gettop(L)-1);
diff --git a/redis.conf b/redis.conf
index 7bb94fbe9..d0684e86a 100644
--- a/redis.conf
+++ b/redis.conf
@@ -30,15 +30,30 @@
# include /path/to/local.conf
# include /path/to/other.conf
-################################ GENERAL #####################################
+################################## NETWORK #####################################
-# By default Redis does not run as a daemon. Use 'yes' if you need it.
-# Note that Redis will write a pid file in /var/run/redis.pid when daemonized.
-daemonize no
-
-# When running daemonized, Redis writes a pid file in /var/run/redis.pid by
-# default. You can specify a custom pid file location here.
-pidfile /var/run/redis.pid
+# By default, if no "bind" configuration directive is specified, Redis listens
+# for connections from all the network interfaces available on the server.
+# It is possible to listen to just one or multiple selected interfaces using
+# the "bind" configuration directive, followed by one or more IP addresses.
+#
+# Examples:
+#
+# bind 192.168.1.100 10.0.0.1
+# bind 127.0.0.1 ::1
+#
+# ~~~ WARNING ~~~ If the computer running Redis is directly exposed to the
+# internet, binding to all the interfaces is dangerous and will expose the
+# instance to everybody on the internet. So by default we uncomment the
+# following bind directive, that will force Redis to listen only into
+# the IPv4 lookback interface address (this means Redis will be able to
+# accept connections only from clients running into the same computer it
+# is running).
+#
+# IF YOU ARE SURE YOU WANT YOUR INSTANCE TO LISTEN TO ALL THE INTERFACES
+# JUST UNCOMMENT THE FOLLOWING LINE.
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+bind 127.0.0.1
# Accept connections on the specified port, default is 6379.
# If port 0 is specified Redis will not listen on a TCP socket.
@@ -53,16 +68,8 @@ port 6379
# in order to get the desired effect.
tcp-backlog 511
-# By default Redis listens for connections from all the network interfaces
-# available on the server. It is possible to listen to just one or multiple
-# interfaces using the "bind" configuration directive, followed by one or
-# more IP addresses.
-#
-# Examples:
+# Unix socket.
#
-# bind 192.168.1.100 10.0.0.1
-# bind 127.0.0.1
-
# Specify the path for the Unix socket that will be used to listen for
# incoming connections. There is no default, so Redis will not listen
# on a unix socket when not specified.
@@ -89,6 +96,27 @@ timeout 0
# A reasonable value for this option is 60 seconds.
tcp-keepalive 0
+################################# GENERAL #####################################
+
+# By default Redis does not run as a daemon. Use 'yes' if you need it.
+# Note that Redis will write a pid file in /var/run/redis.pid when daemonized.
+daemonize no
+
+# If you run Redis from upstart or systemd, Redis can interact with your
+# supervision tree. Options:
+# supervised no - no supervision interaction
+# supervised upstart - signal upstart by putting Redis into SIGSTOP mode
+# supervised systemd - signal systemd by writing READY=1 to $NOTIFY_SOCKET
+# supervised auto - detect upstart or systemd method based on
+# UPSTART_JOB or NOTIFY_SOCKET environment variables
+# Note: these supervision methods only signal "process is ready."
+# They do not enable continuous liveness pings back to your supervisor.
+supervised no
+
+# When running daemonized, Redis writes a pid file in /var/run/redis.pid by
+# default. You can specify a custom pid file location here.
+pidfile /var/run/redis.pid
+
# Specify the server verbosity level.
# This can be one of:
# debug (a lot of information, useful for development/testing)
@@ -619,6 +647,12 @@ lua-time-limit 5000
################################ REDIS CLUSTER ###############################
#
+# ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+# WARNING EXPERIMENTAL: Redis Cluster is considered to be stable code, however
+# in order to mark it as "mature" we need to wait for a non trivial percentage
+# of users to deploy it in production.
+# ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+#
# Normal Redis instances can't be part of a Redis Cluster; only nodes that are
# started as cluster nodes can. In order to start a Redis instance as a
# cluster node enable the cluster support uncommenting the following:
@@ -764,7 +798,7 @@ slowlog-max-len 128
# "CONFIG SET latency-monitor-threshold <milliseconds>" if needed.
latency-monitor-threshold 0
-############################# Event notification ##############################
+############################# EVENT NOTIFICATION ##############################
# Redis can notify Pub/Sub clients about events happening in the key space.
# This feature is documented at http://redis.io/topics/notifications
@@ -818,11 +852,36 @@ notify-keyspace-events ""
hash-max-ziplist-entries 512
hash-max-ziplist-value 64
-# Similarly to hashes, small lists are also encoded in a special way in order
-# to save a lot of space. The special representation is only used when
-# you are under the following limits:
-list-max-ziplist-entries 512
-list-max-ziplist-value 64
+# Lists are also encoded in a special way to save a lot of space.
+# The number of entries allowed per internal list node can be specified
+# as a fixed maximum size or a maximum number of elements.
+# For a fixed maximum size, use -5 through -1, meaning:
+# -5: max size: 64 Kb <-- not recommended for normal workloads
+# -4: max size: 32 Kb <-- not recommended
+# -3: max size: 16 Kb <-- probably not recommended
+# -2: max size: 8 Kb <-- good
+# -1: max size: 4 Kb <-- good
+# Positive numbers mean store up to _exactly_ that number of elements
+# per list node.
+# The highest performing option is usually -2 (8 Kb size) or -1 (4 Kb size),
+# but if your use case is unique, adjust the settings as necessary.
+list-max-ziplist-size -2
+
+# Lists may also be compressed.
+# Compress depth is the number of quicklist ziplist nodes from *each* side of
+# the list to *exclude* from compression. The head and tail of the list
+# are always uncompressed for fast push/pop operations. Settings are:
+# 0: disable all list compression
+# 1: depth 1 means "don't start compressing until after 1 node into the list,
+# going from either the head or tail"
+# So: [head]->node->node->...->node->[tail]
+# [head], [tail] will always be uncompressed; inner nodes will compress.
+# 2: [head]->[next]->node->node->...->node->[prev]->[tail]
+# 2 here means: don't compress head or head->next or tail->prev or tail,
+# but compress all nodes between them.
+# 3: [head]->[next]->[next]->node->node->...->node->[prev]->[prev]->[tail]
+# etc.
+list-compress-depth 0
# Sets have a special encoding in just one case: when a set is composed
# of just strings that happen to be integers in radix 10 in the range
diff --git a/src/Makefile b/src/Makefile
index 57b8dbb85..271ab34d8 100644
--- a/src/Makefile
+++ b/src/Makefile
@@ -18,7 +18,7 @@ OPTIMIZATION?=-O2
DEPENDENCY_TARGETS=hiredis linenoise lua
# Default settings
-STD=-std=c99 -pedantic
+STD=-std=c99 -pedantic -DREDIS_STATIC=''
WARN=-Wall -W
OPT=$(OPTIMIZATION)
@@ -46,6 +46,10 @@ ifeq ($(USE_JEMALLOC),yes)
MALLOC=jemalloc
endif
+ifeq ($(USE_JEMALLOC),no)
+ MALLOC=libc
+endif
+
# Override default settings if possible
-include .make-settings
@@ -58,7 +62,7 @@ ifeq ($(uname_S),SunOS)
# SunOS
INSTALL=cp -pf
FINAL_CFLAGS+= -D__EXTENSIONS__ -D_XPG6
- FINAL_LIBS+= -ldl -lnsl -lsocket -lresolv -lpthread
+ FINAL_LIBS+= -ldl -lnsl -lsocket -lresolv -lpthread -lrt
else
ifeq ($(uname_S),Darwin)
# Darwin (nothing to do)
@@ -113,17 +117,16 @@ endif
REDIS_SERVER_NAME=redis-server
REDIS_SENTINEL_NAME=redis-sentinel
-REDIS_SERVER_OBJ=adlist.o ae.o anet.o dict.o redis.o sds.o zmalloc.o lzf_c.o lzf_d.o pqsort.o zipmap.o sha1.o ziplist.o release.o networking.o util.o object.o db.o replication.o rdb.o t_string.o t_list.o t_set.o t_zset.o t_hash.o config.o aof.o pubsub.o multi.o debug.o sort.o intset.o syncio.o cluster.o crc16.o endianconv.o slowlog.o scripting.o bio.o rio.o rand.o memtest.o crc64.o bitops.o sentinel.o notify.o setproctitle.o blocked.o hyperloglog.o latency.o sparkline.o
+REDIS_SERVER_OBJ=adlist.o quicklist.o ae.o anet.o dict.o redis.o sds.o zmalloc.o lzf_c.o lzf_d.o pqsort.o zipmap.o sha1.o ziplist.o release.o networking.o util.o object.o db.o replication.o rdb.o t_string.o t_list.o t_set.o t_zset.o t_hash.o config.o aof.o pubsub.o multi.o debug.o sort.o intset.o syncio.o cluster.o crc16.o endianconv.o slowlog.o scripting.o bio.o rio.o rand.o memtest.o crc64.o bitops.o sentinel.o notify.o setproctitle.o blocked.o hyperloglog.o latency.o sparkline.o redis-check-rdb.o
REDIS_CLI_NAME=redis-cli
REDIS_CLI_OBJ=anet.o sds.o adlist.o redis-cli.o zmalloc.o release.o anet.o ae.o crc64.o
REDIS_BENCHMARK_NAME=redis-benchmark
REDIS_BENCHMARK_OBJ=ae.o anet.o redis-benchmark.o sds.o adlist.o zmalloc.o redis-benchmark.o
-REDIS_CHECK_DUMP_NAME=redis-check-dump
-REDIS_CHECK_DUMP_OBJ=redis-check-dump.o lzf_c.o lzf_d.o crc64.o
+REDIS_CHECK_RDB_NAME=redis-check-rdb
REDIS_CHECK_AOF_NAME=redis-check-aof
REDIS_CHECK_AOF_OBJ=redis-check-aof.o
-all: $(REDIS_SERVER_NAME) $(REDIS_SENTINEL_NAME) $(REDIS_CLI_NAME) $(REDIS_BENCHMARK_NAME) $(REDIS_CHECK_DUMP_NAME) $(REDIS_CHECK_AOF_NAME)
+all: $(REDIS_SERVER_NAME) $(REDIS_SENTINEL_NAME) $(REDIS_CLI_NAME) $(REDIS_BENCHMARK_NAME) $(REDIS_CHECK_RDB_NAME) $(REDIS_CHECK_AOF_NAME)
@echo ""
@echo "Hint: It's a good idea to run 'make test' ;)"
@echo ""
@@ -174,6 +177,10 @@ $(REDIS_SERVER_NAME): $(REDIS_SERVER_OBJ)
$(REDIS_SENTINEL_NAME): $(REDIS_SERVER_NAME)
$(REDIS_INSTALL) $(REDIS_SERVER_NAME) $(REDIS_SENTINEL_NAME)
+# redis-check-rdb
+$(REDIS_CHECK_RDB_NAME): $(REDIS_SERVER_NAME)
+ $(REDIS_INSTALL) $(REDIS_SERVER_NAME) $(REDIS_CHECK_RDB_NAME)
+
# redis-cli
$(REDIS_CLI_NAME): $(REDIS_CLI_OBJ)
$(REDIS_LD) -o $@ $^ ../deps/hiredis/libhiredis.a ../deps/linenoise/linenoise.o $(FINAL_LIBS)
@@ -182,10 +189,6 @@ $(REDIS_CLI_NAME): $(REDIS_CLI_OBJ)
$(REDIS_BENCHMARK_NAME): $(REDIS_BENCHMARK_OBJ)
$(REDIS_LD) -o $@ $^ ../deps/hiredis/libhiredis.a $(FINAL_LIBS)
-# redis-check-dump
-$(REDIS_CHECK_DUMP_NAME): $(REDIS_CHECK_DUMP_OBJ)
- $(REDIS_LD) -o $@ $^ $(FINAL_LIBS)
-
# redis-check-aof
$(REDIS_CHECK_AOF_NAME): $(REDIS_CHECK_AOF_OBJ)
$(REDIS_LD) -o $@ $^ $(FINAL_LIBS)
@@ -197,7 +200,7 @@ $(REDIS_CHECK_AOF_NAME): $(REDIS_CHECK_AOF_OBJ)
$(REDIS_CC) -c $<
clean:
- rm -rf $(REDIS_SERVER_NAME) $(REDIS_SENTINEL_NAME) $(REDIS_CLI_NAME) $(REDIS_BENCHMARK_NAME) $(REDIS_CHECK_DUMP_NAME) $(REDIS_CHECK_AOF_NAME) *.o *.gcda *.gcno *.gcov redis.info lcov-html
+ rm -rf $(REDIS_SERVER_NAME) $(REDIS_SENTINEL_NAME) $(REDIS_CLI_NAME) $(REDIS_BENCHMARK_NAME) $(REDIS_CHECK_RDB_NAME) $(REDIS_CHECK_AOF_NAME) *.o *.gcda *.gcno *.gcov redis.info lcov-html
.PHONY: clean
@@ -253,5 +256,6 @@ install: all
$(REDIS_INSTALL) $(REDIS_SERVER_NAME) $(INSTALL_BIN)
$(REDIS_INSTALL) $(REDIS_BENCHMARK_NAME) $(INSTALL_BIN)
$(REDIS_INSTALL) $(REDIS_CLI_NAME) $(INSTALL_BIN)
- $(REDIS_INSTALL) $(REDIS_CHECK_DUMP_NAME) $(INSTALL_BIN)
+ $(REDIS_INSTALL) $(REDIS_CHECK_RDB_NAME) $(INSTALL_BIN)
$(REDIS_INSTALL) $(REDIS_CHECK_AOF_NAME) $(INSTALL_BIN)
+ @ln -sf $(REDIS_SERVER_NAME) $(INSTALL_BIN)/$(REDIS_SENTINEL_NAME)
diff --git a/src/anet.c b/src/anet.c
index 1e5d85495..0ec5c55a2 100644
--- a/src/anet.c
+++ b/src/anet.c
@@ -391,7 +391,7 @@ int anetUnixNonBlockConnect(char *err, char *path)
* (unless error or EOF condition is encountered) */
int anetRead(int fd, char *buf, int count)
{
- int nread, totlen = 0;
+ ssize_t nread, totlen = 0;
while(totlen != count) {
nread = read(fd,buf,count-totlen);
if (nread == 0) return totlen;
@@ -406,7 +406,7 @@ int anetRead(int fd, char *buf, int count)
* (unless error is encountered) */
int anetWrite(int fd, char *buf, int count)
{
- int nwritten, totlen = 0;
+ ssize_t nwritten, totlen = 0;
while(totlen != count) {
nwritten = write(fd,buf,count-totlen);
if (nwritten == 0) return totlen;
@@ -589,6 +589,23 @@ error:
return -1;
}
+/* Format an IP,port pair into something easy to parse. If IP is IPv6
+ * (matches for ":"), the ip is surrounded by []. IP and port are just
+ * separated by colons. This the standard to display addresses within Redis. */
+int anetFormatAddr(char *buf, size_t buf_len, char *ip, int port) {
+ return snprintf(buf,buf_len, strchr(ip,':') ?
+ "[%s]:%d" : "%s:%d", ip, port);
+}
+
+/* Like anetFormatAddr() but extract ip and port from the socket's peer. */
+int anetFormatPeer(int fd, char *buf, size_t buf_len) {
+ char ip[INET6_ADDRSTRLEN];
+ int port;
+
+ anetPeerToString(fd,ip,sizeof(ip),&port);
+ return anetFormatAddr(buf, buf_len, ip, port);
+}
+
int anetSockName(int fd, char *ip, size_t ip_len, int *port) {
struct sockaddr_storage sa;
socklen_t salen = sizeof(sa);
@@ -610,3 +627,11 @@ int anetSockName(int fd, char *ip, size_t ip_len, int *port) {
}
return 0;
}
+
+int anetFormatSock(int fd, char *fmt, size_t fmt_len) {
+ char ip[INET6_ADDRSTRLEN];
+ int port;
+
+ anetSockName(fd,ip,sizeof(ip),&port);
+ return anetFormatAddr(fmt, fmt_len, ip, port);
+}
diff --git a/src/anet.h b/src/anet.h
index b94a0cd17..ea9c77f2e 100644
--- a/src/anet.h
+++ b/src/anet.h
@@ -70,5 +70,8 @@ int anetSendTimeout(char *err, int fd, long long ms);
int anetPeerToString(int fd, char *ip, size_t ip_len, int *port);
int anetKeepAlive(char *err, int fd, int interval);
int anetSockName(int fd, char *ip, size_t ip_len, int *port);
+int anetFormatAddr(char *fmt, size_t fmt_len, char *ip, int port);
+int anetFormatPeer(int fd, char *fmt, size_t fmt_len);
+int anetFormatSock(int fd, char *fmt, size_t fmt_len);
#endif
diff --git a/src/aof.c b/src/aof.c
index 0af519bfa..dc7d11873 100644
--- a/src/aof.c
+++ b/src/aof.c
@@ -770,52 +770,29 @@ int rioWriteBulkObject(rio *r, robj *obj) {
int rewriteListObject(rio *r, robj *key, robj *o) {
long long count = 0, items = listTypeLength(o);
- if (o->encoding == REDIS_ENCODING_ZIPLIST) {
- unsigned char *zl = o->ptr;
- unsigned char *p = ziplistIndex(zl,0);
- unsigned char *vstr;
- unsigned int vlen;
- long long vlong;
+ if (o->encoding == REDIS_ENCODING_QUICKLIST) {
+ quicklist *list = o->ptr;
+ quicklistIter *li = quicklistGetIterator(list, AL_START_HEAD);
+ quicklistEntry entry;
- while(ziplistGet(p,&vstr,&vlen,&vlong)) {
+ while (quicklistNext(li,&entry)) {
if (count == 0) {
int cmd_items = (items > REDIS_AOF_REWRITE_ITEMS_PER_CMD) ?
REDIS_AOF_REWRITE_ITEMS_PER_CMD : items;
-
if (rioWriteBulkCount(r,'*',2+cmd_items) == 0) return 0;
if (rioWriteBulkString(r,"RPUSH",5) == 0) return 0;
if (rioWriteBulkObject(r,key) == 0) return 0;
}
- if (vstr) {
- if (rioWriteBulkString(r,(char*)vstr,vlen) == 0) return 0;
- } else {
- if (rioWriteBulkLongLong(r,vlong) == 0) return 0;
- }
- p = ziplistNext(zl,p);
- if (++count == REDIS_AOF_REWRITE_ITEMS_PER_CMD) count = 0;
- items--;
- }
- } else if (o->encoding == REDIS_ENCODING_LINKEDLIST) {
- list *list = o->ptr;
- listNode *ln;
- listIter li;
- listRewind(list,&li);
- while((ln = listNext(&li))) {
- robj *eleobj = listNodeValue(ln);
-
- if (count == 0) {
- int cmd_items = (items > REDIS_AOF_REWRITE_ITEMS_PER_CMD) ?
- REDIS_AOF_REWRITE_ITEMS_PER_CMD : items;
-
- if (rioWriteBulkCount(r,'*',2+cmd_items) == 0) return 0;
- if (rioWriteBulkString(r,"RPUSH",5) == 0) return 0;
- if (rioWriteBulkObject(r,key) == 0) return 0;
+ if (entry.value) {
+ if (rioWriteBulkString(r,(char*)entry.value,entry.sz) == 0) return 0;
+ } else {
+ if (rioWriteBulkLongLong(r,entry.longval) == 0) return 0;
}
- if (rioWriteBulkObject(r,eleobj) == 0) return 0;
if (++count == REDIS_AOF_REWRITE_ITEMS_PER_CMD) count = 0;
items--;
}
+ quicklistReleaseIterator(li);
} else {
redisPanic("Unknown list encoding");
}
@@ -1105,6 +1082,7 @@ int rewriteAppendOnlyFile(char *filename) {
}
}
dictReleaseIterator(di);
+ di = NULL;
}
/* Do an initial slow fsync here while the parent is still sending
diff --git a/src/cluster.c b/src/cluster.c
index bb688425b..9a0a228dc 100644
--- a/src/cluster.c
+++ b/src/cluster.c
@@ -40,6 +40,7 @@
#include <sys/socket.h>
#include <sys/stat.h>
#include <sys/file.h>
+#include <math.h>
/* A global reference to myself is handy to make code more clear.
* Myself always points to server.cluster->myself, that is, the clusterNode
@@ -479,6 +480,7 @@ void clusterInit(void) {
* the IP address via MEET messages. */
myself->port = server.port;
+ server.cluster->mf_end = 0;
resetManualFailover();
}
@@ -593,7 +595,7 @@ void clusterAcceptHandler(aeEventLoop *el, int fd, void *privdata, int mask) {
if (cfd == ANET_ERR) {
if (errno != EWOULDBLOCK)
redisLog(REDIS_VERBOSE,
- "Accepting cluster node: %s", server.neterr);
+ "Error accepting cluster node: %s", server.neterr);
return;
}
anetNonBlock(NULL,cfd);
@@ -782,8 +784,11 @@ int clusterNodeRemoveSlave(clusterNode *master, clusterNode *slave) {
for (j = 0; j < master->numslaves; j++) {
if (master->slaves[j] == slave) {
- memmove(master->slaves+j,master->slaves+(j+1),
- (master->numslaves-1)-j);
+ if ((j+1) < master->numslaves) {
+ int remaining_slaves = (master->numslaves - j) - 1;
+ memmove(master->slaves+j,master->slaves+(j+1),
+ (sizeof(*master->slaves) * remaining_slaves));
+ }
master->numslaves--;
return REDIS_OK;
}
@@ -818,15 +823,30 @@ int clusterCountNonFailingSlaves(clusterNode *n) {
return okslaves;
}
+/* Low level cleanup of the node structure. Only called by clusterDelNode(). */
void freeClusterNode(clusterNode *n) {
sds nodename;
+ int j;
+
+ /* If the node is a master with associated slaves, we have to set
+ * all the slaves->slaveof fields to NULL (unknown). */
+ if (nodeIsMaster(n)) {
+ for (j = 0; j < n->numslaves; j++)
+ n->slaves[j]->slaveof = NULL;
+ }
+
+ /* Remove this node from the list of slaves of its master. */
+ if (nodeIsSlave(n) && n->slaveof) clusterNodeRemoveSlave(n->slaveof,n);
+ /* Unlink from the set of nodes. */
nodename = sdsnewlen(n->name, REDIS_CLUSTER_NAMELEN);
redisAssert(dictDelete(server.cluster->nodes,nodename) == DICT_OK);
sdsfree(nodename);
- if (n->slaveof) clusterNodeRemoveSlave(n->slaveof, n);
+
+ /* Release link and associated data structures. */
if (n->link) freeClusterLink(n->link);
listRelease(n->fail_reports);
+ zfree(n->slaves);
zfree(n);
}
@@ -839,11 +859,16 @@ int clusterAddNode(clusterNode *node) {
return (retval == DICT_OK) ? REDIS_OK : REDIS_ERR;
}
-/* Remove a node from the cluster:
- * 1) Mark all the nodes handled by it as unassigned.
- * 2) Remove all the failure reports sent by this node.
- * 3) Free the node, that will in turn remove it from the hash table
- * and from the list of slaves of its master, if it is a slave node.
+/* Remove a node from the cluster. The functio performs the high level
+ * cleanup, calling freeClusterNode() for the low level cleanup.
+ * Here we do the following:
+ *
+ * 1) Mark all the slots handled by it as unassigned.
+ * 2) Remove all the failure reports sent by this node and referenced by
+ * other nodes.
+ * 3) Free the node with freeClusterNode() that will in turn remove it
+ * from the hash table and from the list of slaves of its master, if
+ * it is a slave node.
*/
void clusterDelNode(clusterNode *delnode) {
int j;
@@ -870,11 +895,7 @@ void clusterDelNode(clusterNode *delnode) {
}
dictReleaseIterator(di);
- /* 3) Remove this node from its master's slaves if needed. */
- if (nodeIsSlave(delnode) && delnode->slaveof)
- clusterNodeRemoveSlave(delnode->slaveof,delnode);
-
- /* 4) Free the node, unlinking it from the cluster. */
+ /* 3) Free the node, unlinking it from the cluster. */
freeClusterNode(delnode);
}
@@ -1118,6 +1139,7 @@ int clusterStartHandshake(char *ip, int port) {
/* Set norm_ip as the normalized string representation of the node
* IP address. */
+ memset(norm_ip,0,REDIS_IP_STR_LEN);
if (sa.ss_family == AF_INET)
inet_ntop(AF_INET,
(void*)&(((struct sockaddr_in *)&sa)->sin_addr),
@@ -1232,7 +1254,7 @@ void nodeIp2String(char *buf, clusterLink *link) {
* The function returns 0 if the node address is still the same,
* otherwise 1 is returned. */
int nodeUpdateAddressIfNeeded(clusterNode *node, clusterLink *link, int port) {
- char ip[REDIS_IP_STR_LEN];
+ char ip[REDIS_IP_STR_LEN] = {0};
/* We don't proceed if the link is the same as the sender link, as this
* function is designed to see if the node link is consistent with the
@@ -1463,7 +1485,8 @@ int clusterProcessPacket(clusterLink *link) {
/* Perform sanity checks */
if (totlen < 16) return 1; /* At least signature, version, totlen, count. */
- if (ntohs(hdr->ver) != 0) return 1; /* Can't handle versions other than 0.*/
+ if (ntohs(hdr->ver) != CLUSTER_PROTO_VER)
+ return 1; /* Can't handle versions other than the current one.*/
if (totlen > sdslen(link->rcvbuf)) return 1;
if (type == CLUSTERMSG_TYPE_PING || type == CLUSTERMSG_TYPE_PONG ||
type == CLUSTERMSG_TYPE_MEET)
@@ -1544,8 +1567,12 @@ int clusterProcessPacket(clusterLink *link) {
* later if we changed address, and those nodes will use our
* official address to connect to us. So by obtaining this address
* from the socket is a simple way to discover / update our own
- * address in the cluster without it being hardcoded in the config. */
- if (type == CLUSTERMSG_TYPE_MEET) {
+ * address in the cluster without it being hardcoded in the config.
+ *
+ * However if we don't have an address at all, we update the address
+ * even with a normal PING packet. If it's wrong it will be fixed
+ * by MEET later. */
+ if (type == CLUSTERMSG_TYPE_MEET || myself->ip[0] == '\0') {
char ip[REDIS_IP_STR_LEN];
if (anetSockName(link->fd,ip,sizeof(ip),NULL) != -1 &&
@@ -1604,7 +1631,7 @@ int clusterProcessPacket(clusterLink *link) {
}
/* Free this node as we already have it. This will
* cause the link to be freed as well. */
- freeClusterNode(link->node);
+ clusterDelNode(link->node);
return 0;
}
@@ -2011,7 +2038,8 @@ void clusterBroadcastMessage(void *buf, size_t len) {
dictReleaseIterator(di);
}
-/* Build the message header */
+/* Build the message header. hdr must point to a buffer at least
+ * sizeof(clusterMsg) in bytes. */
void clusterBuildMessageHdr(clusterMsg *hdr, int type) {
int totlen = 0;
uint64_t offset;
@@ -2025,6 +2053,7 @@ void clusterBuildMessageHdr(clusterMsg *hdr, int type) {
myself->slaveof : myself;
memset(hdr,0,sizeof(*hdr));
+ hdr->ver = htons(CLUSTER_PROTO_VER);
hdr->sig[0] = 'R';
hdr->sig[1] = 'C';
hdr->sig[2] = 'm';
@@ -2071,40 +2100,90 @@ void clusterBuildMessageHdr(clusterMsg *hdr, int type) {
/* Send a PING or PONG packet to the specified node, making sure to add enough
* gossip informations. */
void clusterSendPing(clusterLink *link, int type) {
- unsigned char buf[sizeof(clusterMsg)];
- clusterMsg *hdr = (clusterMsg*) buf;
- int gossipcount = 0, totlen;
- /* freshnodes is the number of nodes we can still use to populate the
- * gossip section of the ping packet. Basically we start with the nodes
- * we have in memory minus two (ourself and the node we are sending the
- * message to). Every time we add a node we decrement the counter, so when
- * it will drop to <= zero we know there is no more gossip info we can
- * send. */
+ unsigned char *buf;
+ clusterMsg *hdr;
+ int gossipcount = 0; /* Number of gossip sections added so far. */
+ int wanted; /* Number of gossip sections we want to append if possible. */
+ int totlen; /* Total packet length. */
+ /* freshnodes is the max number of nodes we can hope to append at all:
+ * nodes available minus two (ourself and the node we are sending the
+ * message to). However practically there may be less valid nodes since
+ * nodes in handshake state, disconnected, are not considered. */
int freshnodes = dictSize(server.cluster->nodes)-2;
+ /* How many gossip sections we want to add? 1/10 of the number of nodes
+ * and anyway at least 3. Why 1/10?
+ *
+ * If we have N masters, with N/10 entries, and we consider that in
+ * node_timeout we exchange with each other node at least 4 packets
+ * (we ping in the worst case in node_timeout/2 time, and we also
+ * receive two pings from the host), we have a total of 8 packets
+ * in the node_timeout*2 falure reports validity time. So we have
+ * that, for a single PFAIL node, we can expect to receive the following
+ * number of failure reports (in the specified window of time):
+ *
+ * PROB * GOSSIP_ENTRIES_PER_PACKET * TOTAL_PACKETS:
+ *
+ * PROB = probability of being featured in a single gossip entry,
+ * which is 1 / NUM_OF_NODES.
+ * ENTRIES = 10.
+ * TOTAL_PACKETS = 2 * 4 * NUM_OF_MASTERS.
+ *
+ * If we assume we have just masters (so num of nodes and num of masters
+ * is the same), with 1/10 we always get over the majority, and specifically
+ * 80% of the number of nodes, to account for many masters failing at the
+ * same time.
+ *
+ * Since we have non-voting slaves that lower the probability of an entry
+ * to feature our node, we set the number of entires per packet as
+ * 10% of the total nodes we have. */
+ wanted = floor(dictSize(server.cluster->nodes)/10);
+ if (wanted < 3) wanted = 3;
+ if (wanted > freshnodes) wanted = freshnodes;
+
+ /* Compute the maxium totlen to allocate our buffer. We'll fix the totlen
+ * later according to the number of gossip sections we really were able
+ * to put inside the packet. */
+ totlen = sizeof(clusterMsg)-sizeof(union clusterMsgData);
+ totlen += (sizeof(clusterMsgDataGossip)*wanted);
+ /* Note: clusterBuildMessageHdr() expects the buffer to be always at least
+ * sizeof(clusterMsg) or more. */
+ if (totlen < (int)sizeof(clusterMsg)) totlen = sizeof(clusterMsg);
+ buf = zcalloc(totlen);
+ hdr = (clusterMsg*) buf;
+
+ /* Populate the header. */
if (link->node && type == CLUSTERMSG_TYPE_PING)
link->node->ping_sent = mstime();
clusterBuildMessageHdr(hdr,type);
/* Populate the gossip fields */
- while(freshnodes > 0 && gossipcount < 3) {
+ int maxiterations = wanted*3;
+ while(freshnodes > 0 && gossipcount < wanted && maxiterations--) {
dictEntry *de = dictGetRandomKey(server.cluster->nodes);
clusterNode *this = dictGetVal(de);
clusterMsgDataGossip *gossip;
int j;
+ /* Don't include this node: the whole packet header is about us
+ * already, so we just gossip about other nodes. */
+ if (this == myself) continue;
+
+ /* Give a bias to FAIL/PFAIL nodes. */
+ if (maxiterations > wanted*2 &&
+ !(this->flags & (REDIS_NODE_PFAIL|REDIS_NODE_FAIL)))
+ continue;
+
/* In the gossip section don't include:
- * 1) Myself.
- * 2) Nodes in HANDSHAKE state.
+ * 1) Nodes in HANDSHAKE state.
* 3) Nodes with the NOADDR flag set.
* 4) Disconnected nodes if they don't have configured slots.
*/
- if (this == myself ||
- this->flags & (REDIS_NODE_HANDSHAKE|REDIS_NODE_NOADDR) ||
+ if (this->flags & (REDIS_NODE_HANDSHAKE|REDIS_NODE_NOADDR) ||
(this->link == NULL && this->numslots == 0))
{
- freshnodes--; /* otherwise we may loop forever. */
- continue;
+ freshnodes--; /* Tecnically not correct, but saves CPU. */
+ continue;
}
/* Check if we already added this node */
@@ -2123,13 +2202,19 @@ void clusterSendPing(clusterLink *link, int type) {
memcpy(gossip->ip,this->ip,sizeof(this->ip));
gossip->port = htons(this->port);
gossip->flags = htons(this->flags);
+ gossip->notused1 = 0;
+ gossip->notused2 = 0;
gossipcount++;
}
+
+ /* Ready to send... fix the totlen fiend and queue the message in the
+ * output buffer. */
totlen = sizeof(clusterMsg)-sizeof(union clusterMsgData);
totlen += (sizeof(clusterMsgDataGossip)*gossipcount);
hdr->count = htons(gossipcount);
hdr->totlen = htonl(totlen);
clusterSendMessage(link,buf,totlen);
+ zfree(buf);
}
/* Send a PONG packet to every connected node that's not in handshake state
@@ -2518,7 +2603,7 @@ void clusterHandleSlaveFailover(void) {
/* Compute the failover timeout (the max time we have to send votes
* and wait for replies), and the failover retry time (the time to wait
- * before waiting again.
+ * before trying to get voted again).
*
* Timeout is MIN(NODE_TIMEOUT*2,2000) milliseconds.
* Retry is two times the Timeout.
@@ -2776,6 +2861,7 @@ void clusterHandleSlaveMigration(int max_slaves) {
}
}
}
+ dictReleaseIterator(di);
/* Step 4: perform the migration if there is a target, and if I'm the
* candidate. */
@@ -2897,7 +2983,7 @@ void clusterCron(void) {
/* A Node in HANDSHAKE state has a limited lifespan equal to the
* configured node timeout. */
if (nodeInHandshake(node) && now - node->ctime > handshake_timeout) {
- freeClusterNode(node);
+ clusterDelNode(node);
continue;
}
@@ -3884,10 +3970,7 @@ void clusterCommand(redisClient *c) {
server.cluster->stats_bus_messages_sent,
server.cluster->stats_bus_messages_received
);
- addReplySds(c,sdscatprintf(sdsempty(),"$%lu\r\n",
- (unsigned long)sdslen(info)));
- addReplySds(c,info);
- addReply(c,shared.crlf);
+ addReplyBulkSds(c, info);
} else if (!strcasecmp(c->argv[1]->ptr,"saveconfig") && c->argc == 2) {
int retval = clusterSaveConfig(1);
@@ -4011,6 +4094,18 @@ void clusterCommand(redisClient *c) {
addReplyBulkCString(c,ni);
sdsfree(ni);
}
+ } else if (!strcasecmp(c->argv[1]->ptr,"count-failure-reports") &&
+ c->argc == 3)
+ {
+ /* CLUSTER COUNT-FAILURE-REPORTS <NODE ID> */
+ clusterNode *n = clusterLookupNode(c->argv[2]->ptr);
+
+ if (!n) {
+ addReplyErrorFormat(c,"Unknown node %s", (char*)c->argv[2]->ptr);
+ return;
+ } else {
+ addReplyLongLong(c,clusterNodeFailureReportsCount(n));
+ }
} else if (!strcasecmp(c->argv[1]->ptr,"failover") &&
(c->argc == 2 || c->argc == 3))
{
@@ -4457,7 +4552,7 @@ try_again:
{
sds buf = cmd.io.buffer.ptr;
size_t pos = 0, towrite;
- int nwritten = 0;
+ ssize_t nwritten = 0;
while ((towrite = sdslen(buf)-pos) > 0) {
towrite = (towrite > (64*1024) ? (64*1024) : towrite);
diff --git a/src/cluster.h b/src/cluster.h
index 7d6567d45..ef5caf0d6 100644
--- a/src/cluster.h
+++ b/src/cluster.h
@@ -163,10 +163,11 @@ typedef struct {
char nodename[REDIS_CLUSTER_NAMELEN];
uint32_t ping_sent;
uint32_t pong_received;
- char ip[REDIS_IP_STR_LEN]; /* IP address last time it was seen */
- uint16_t port; /* port last time it was seen */
- uint16_t flags;
- uint32_t notused; /* for 64 bit alignment */
+ char ip[REDIS_IP_STR_LEN]; /* IP address last time it was seen */
+ uint16_t port; /* port last time it was seen */
+ uint16_t flags; /* node->flags copy */
+ uint16_t notused1; /* Some room for future improvements. */
+ uint32_t notused2;
} clusterMsgDataGossip;
typedef struct {
@@ -211,6 +212,7 @@ union clusterMsgData {
} update;
};
+#define CLUSTER_PROTO_VER 0 /* Cluster bus protocol version. */
typedef struct {
char sig[4]; /* Siganture "RCmb" (Redis Cluster message bus). */
diff --git a/src/config.c b/src/config.c
index 05cb7c9fe..3963a1218 100644
--- a/src/config.c
+++ b/src/config.c
@@ -60,6 +60,8 @@ clientBufferLimitsConfig clientBufferLimitsDefaults[REDIS_CLIENT_TYPE_COUNT] = {
* Config file parsing
*----------------------------------------------------------------------------*/
+int supervisedToMode(const char *str);
+
int yesnotoi(char *s) {
if (!strcasecmp(s,"yes")) return 1;
else if (!strcasecmp(s,"no")) return 0;
@@ -397,9 +399,13 @@ void loadServerConfigFromString(char *config) {
} else if (!strcasecmp(argv[0],"hash-max-ziplist-value") && argc == 2) {
server.hash_max_ziplist_value = memtoll(argv[1], NULL);
} else if (!strcasecmp(argv[0],"list-max-ziplist-entries") && argc == 2){
- server.list_max_ziplist_entries = memtoll(argv[1], NULL);
+ /* DEAD OPTION */
} else if (!strcasecmp(argv[0],"list-max-ziplist-value") && argc == 2) {
- server.list_max_ziplist_value = memtoll(argv[1], NULL);
+ /* DEAD OPTION */
+ } else if (!strcasecmp(argv[0],"list-max-ziplist-size") && argc == 2) {
+ server.list_max_ziplist_size = atoi(argv[1]);
+ } else if (!strcasecmp(argv[0],"list-compress-depth") && argc == 2) {
+ server.list_compress_depth = atoi(argv[1]);
} else if (!strcasecmp(argv[0],"set-max-intset-entries") && argc == 2) {
server.set_max_intset_entries = memtoll(argv[1], NULL);
} else if (!strcasecmp(argv[0],"zset-max-ziplist-entries") && argc == 2) {
@@ -529,6 +535,15 @@ void loadServerConfigFromString(char *config) {
goto loaderr;
}
server.notify_keyspace_events = flags;
+ } else if (!strcasecmp(argv[0],"supervised") && argc == 2) {
+ int mode = supervisedToMode(argv[1]);
+
+ if (mode == -1) {
+ err = "Invalid option for 'supervised'. "
+ "Allowed values: 'upstart', 'systemd', 'auto', or 'no'";
+ goto loaderr;
+ }
+ server.supervised_mode = mode;
} else if (!strcasecmp(argv[0],"sentinel")) {
/* argc == 1 is handled by main() as we need to enter the sentinel
* mode ASAP. */
@@ -609,6 +624,7 @@ void loadServerConfig(char *filename, char *options) {
void configSetCommand(redisClient *c) {
robj *o;
long long ll;
+ int err;
redisAssertWithInfo(c,c->argv[2],sdsEncodedObject(c->argv[2]));
redisAssertWithInfo(c,c->argv[3],sdsEncodedObject(c->argv[3]));
o = c->argv[3];
@@ -628,8 +644,8 @@ void configSetCommand(redisClient *c) {
zfree(server.masterauth);
server.masterauth = ((char*)o->ptr)[0] ? zstrdup(o->ptr) : NULL;
} else if (!strcasecmp(c->argv[2]->ptr,"maxmemory")) {
- if (getLongLongFromObject(o,&ll) == REDIS_ERR ||
- ll < 0) goto badfmt;
+ ll = memtoll(o->ptr,&err);
+ if (err || ll < 0) goto badfmt;
server.maxmemory = ll;
if (server.maxmemory) {
if (server.maxmemory < zmalloc_used_memory()) {
@@ -795,12 +811,12 @@ void configSetCommand(redisClient *c) {
} else if (!strcasecmp(c->argv[2]->ptr,"hash-max-ziplist-value")) {
if (getLongLongFromObject(o,&ll) == REDIS_ERR || ll < 0) goto badfmt;
server.hash_max_ziplist_value = ll;
- } else if (!strcasecmp(c->argv[2]->ptr,"list-max-ziplist-entries")) {
+ } else if (!strcasecmp(c->argv[2]->ptr,"list-max-ziplist-size")) {
if (getLongLongFromObject(o,&ll) == REDIS_ERR || ll < 0) goto badfmt;
- server.list_max_ziplist_entries = ll;
- } else if (!strcasecmp(c->argv[2]->ptr,"list-max-ziplist-value")) {
+ server.list_max_ziplist_size = ll;
+ } else if (!strcasecmp(c->argv[2]->ptr,"list-compress-depth")) {
if (getLongLongFromObject(o,&ll) == REDIS_ERR || ll < 0) goto badfmt;
- server.list_max_ziplist_value = ll;
+ server.list_compress_depth = ll;
} else if (!strcasecmp(c->argv[2]->ptr,"set-max-intset-entries")) {
if (getLongLongFromObject(o,&ll) == REDIS_ERR || ll < 0) goto badfmt;
server.set_max_intset_entries = ll;
@@ -851,7 +867,6 @@ void configSetCommand(redisClient *c) {
* whole configuration string or accept it all, even if a single
* error in a single client class is present. */
for (j = 0; j < vlen; j++) {
- char *eptr;
long val;
if ((j % 4) == 0) {
@@ -860,8 +875,8 @@ void configSetCommand(redisClient *c) {
goto badfmt;
}
} else {
- val = strtoll(v[j], &eptr, 10);
- if (eptr[0] != '\0' || val < 0) {
+ val = memtoll(v[j], &err);
+ if (err || val < 0) {
sdsfreesplitres(v,vlen);
goto badfmt;
}
@@ -895,7 +910,8 @@ void configSetCommand(redisClient *c) {
if (getLongLongFromObject(o,&ll) == REDIS_ERR || ll <= 0) goto badfmt;
server.repl_timeout = ll;
} else if (!strcasecmp(c->argv[2]->ptr,"repl-backlog-size")) {
- if (getLongLongFromObject(o,&ll) == REDIS_ERR || ll <= 0) goto badfmt;
+ ll = memtoll(o->ptr,&err);
+ if (err || ll < 0) goto badfmt;
resizeReplicationBacklog(ll);
} else if (!strcasecmp(c->argv[2]->ptr,"repl-backlog-ttl")) {
if (getLongLongFromObject(o,&ll) == REDIS_ERR || ll < 0) goto badfmt;
@@ -1004,6 +1020,47 @@ badfmt: /* Bad format errors */
} \
} while(0);
+char *maxmemoryToString() {
+ char *s;
+ switch(server.maxmemory_policy) {
+ case REDIS_MAXMEMORY_VOLATILE_LRU: s = "volatile-lru"; break;
+ case REDIS_MAXMEMORY_VOLATILE_TTL: s = "volatile-ttl"; break;
+ case REDIS_MAXMEMORY_VOLATILE_RANDOM: s = "volatile-random"; break;
+ case REDIS_MAXMEMORY_ALLKEYS_LRU: s = "allkeys-lru"; break;
+ case REDIS_MAXMEMORY_ALLKEYS_RANDOM: s = "allkeys-random"; break;
+ case REDIS_MAXMEMORY_NO_EVICTION: s = "noeviction"; break;
+ default: s = "unknown"; break;
+ }
+ return s;
+}
+
+int supervisedToMode(const char *str) {
+ int mode;
+ if (!strcasecmp(str,"upstart")) {
+ mode = REDIS_SUPERVISED_UPSTART;
+ } else if (!strcasecmp(str,"systemd")) {
+ mode = REDIS_SUPERVISED_SYSTEMD;
+ } else if (!strcasecmp(str,"auto")) {
+ mode = REDIS_SUPERVISED_AUTODETECT;
+ } else if (!strcasecmp(str,"no")) {
+ mode = REDIS_SUPERVISED_NONE;
+ } else {
+ mode = -1;
+ }
+ return mode;
+}
+
+char *supervisedToString(void) {
+ char *s;
+ switch(server.supervised_mode) {
+ case REDIS_SUPERVISED_UPSTART: s = "upstart"; break;
+ case REDIS_SUPERVISED_SYSTEMD: s = "systemd"; break;
+ case REDIS_SUPERVISED_AUTODETECT: s = "auto"; break;
+ case REDIS_SUPERVISED_NONE: s = "no"; break;
+ default: s = "no"; break;
+ }
+ return s;
+}
void configGetCommand(redisClient *c) {
robj *o = c->argv[2];
void *replylen = addDeferredMultiBulkLength(c);
@@ -1033,10 +1090,10 @@ void configGetCommand(redisClient *c) {
server.hash_max_ziplist_entries);
config_get_numerical_field("hash-max-ziplist-value",
server.hash_max_ziplist_value);
- config_get_numerical_field("list-max-ziplist-entries",
- server.list_max_ziplist_entries);
- config_get_numerical_field("list-max-ziplist-value",
- server.list_max_ziplist_value);
+ config_get_numerical_field("list-max-ziplist-size",
+ server.list_max_ziplist_size);
+ config_get_numerical_field("list-compress-depth",
+ server.list_compress_depth);
config_get_numerical_field("set-max-intset-entries",
server.set_max_intset_entries);
config_get_numerical_field("zset-max-ziplist-entries",
@@ -1112,19 +1169,8 @@ void configGetCommand(redisClient *c) {
matches++;
}
if (stringmatch(pattern,"maxmemory-policy",0)) {
- char *s;
-
- switch(server.maxmemory_policy) {
- case REDIS_MAXMEMORY_VOLATILE_LRU: s = "volatile-lru"; break;
- case REDIS_MAXMEMORY_VOLATILE_TTL: s = "volatile-ttl"; break;
- case REDIS_MAXMEMORY_VOLATILE_RANDOM: s = "volatile-random"; break;
- case REDIS_MAXMEMORY_ALLKEYS_LRU: s = "allkeys-lru"; break;
- case REDIS_MAXMEMORY_ALLKEYS_RANDOM: s = "allkeys-random"; break;
- case REDIS_MAXMEMORY_NO_EVICTION: s = "noeviction"; break;
- default: s = "unknown"; break; /* too harmless to panic */
- }
addReplyBulkCString(c,"maxmemory-policy");
- addReplyBulkCString(c,s);
+ addReplyBulkCString(c,maxmemoryToString());
matches++;
}
if (stringmatch(pattern,"appendfsync",0)) {
@@ -1170,6 +1216,11 @@ void configGetCommand(redisClient *c) {
addReplyBulkCString(c,s);
matches++;
}
+ if (stringmatch(pattern,"supervised",0)) {
+ addReplyBulkCString(c,"supervised");
+ addReplyBulkCString(c,supervisedToString());
+ matches++;
+ }
if (stringmatch(pattern,"client-output-buffer-limit",0)) {
sds buf = sdsempty();
int j;
@@ -1854,8 +1905,8 @@ int rewriteConfig(char *path) {
rewriteConfigNotifykeyspaceeventsOption(state);
rewriteConfigNumericalOption(state,"hash-max-ziplist-entries",server.hash_max_ziplist_entries,REDIS_HASH_MAX_ZIPLIST_ENTRIES);
rewriteConfigNumericalOption(state,"hash-max-ziplist-value",server.hash_max_ziplist_value,REDIS_HASH_MAX_ZIPLIST_VALUE);
- rewriteConfigNumericalOption(state,"list-max-ziplist-entries",server.list_max_ziplist_entries,REDIS_LIST_MAX_ZIPLIST_ENTRIES);
- rewriteConfigNumericalOption(state,"list-max-ziplist-value",server.list_max_ziplist_value,REDIS_LIST_MAX_ZIPLIST_VALUE);
+ rewriteConfigNumericalOption(state,"list-max-ziplist-size",server.list_max_ziplist_size,REDIS_LIST_MAX_ZIPLIST_SIZE);
+ rewriteConfigNumericalOption(state,"list-compress-depth",server.list_compress_depth,REDIS_LIST_COMPRESS_DEPTH);
rewriteConfigNumericalOption(state,"set-max-intset-entries",server.set_max_intset_entries,REDIS_SET_MAX_INTSET_ENTRIES);
rewriteConfigNumericalOption(state,"zset-max-ziplist-entries",server.zset_max_ziplist_entries,REDIS_ZSET_MAX_ZIPLIST_ENTRIES);
rewriteConfigNumericalOption(state,"zset-max-ziplist-value",server.zset_max_ziplist_value,REDIS_ZSET_MAX_ZIPLIST_VALUE);
@@ -1865,6 +1916,12 @@ int rewriteConfig(char *path) {
rewriteConfigNumericalOption(state,"hz",server.hz,REDIS_DEFAULT_HZ);
rewriteConfigYesNoOption(state,"aof-rewrite-incremental-fsync",server.aof_rewrite_incremental_fsync,REDIS_DEFAULT_AOF_REWRITE_INCREMENTAL_FSYNC);
rewriteConfigYesNoOption(state,"aof-load-truncated",server.aof_load_truncated,REDIS_DEFAULT_AOF_LOAD_TRUNCATED);
+ rewriteConfigEnumOption(state,"supervised",server.supervised_mode,
+ "upstart", REDIS_SUPERVISED_UPSTART,
+ "systemd", REDIS_SUPERVISED_SYSTEMD,
+ "auto", REDIS_SUPERVISED_AUTODETECT,
+ "no", REDIS_SUPERVISED_NONE,
+ NULL, REDIS_SUPERVISED_NONE);
if (server.sentinel_mode) rewriteConfigSentinelOption(state);
/* Step 3: remove all the orphaned lines in the old file, that is, lines
diff --git a/src/config.h b/src/config.h
index 2b5004baa..9fd53626e 100644
--- a/src/config.h
+++ b/src/config.h
@@ -34,6 +34,11 @@
#include <AvailabilityMacros.h>
#endif
+#ifdef __linux__
+#include <linux/version.h>
+#include <features.h>
+#endif
+
/* Define redis_fstat to fstat or fstat64() */
#if defined(__APPLE__) && !defined(MAC_OS_X_VERSION_10_6)
#define redis_fstat fstat64
@@ -57,7 +62,7 @@
#endif
/* Test for backtrace() */
-#if defined(__APPLE__) || defined(__linux__)
+#if defined(__APPLE__) || (defined(__linux__) && defined(__GLIBC__))
#define HAVE_BACKTRACE 1
#endif
@@ -92,8 +97,6 @@
/* Define rdb_fsync_range to sync_file_range() on Linux, otherwise we use
* the plain fsync() call. */
#ifdef __linux__
-#include <linux/version.h>
-#include <features.h>
#if defined(__GLIBC__) && defined(__GLIBC_PREREQ)
#if (LINUX_VERSION_CODE >= 0x020611 && __GLIBC_PREREQ(2, 6))
#define HAVE_SYNC_FILE_RANGE 1
@@ -118,7 +121,7 @@
#define USE_SETPROCTITLE
#endif
-#if (defined __linux || defined __APPLE__)
+#if ((defined __linux && defined(__GLIBC__)) || defined __APPLE__)
#define USE_SETPROCTITLE
#define INIT_SETPROCTITLE_REPLACEMENT
void spt_init(int argc, char *argv[]);
diff --git a/src/crc64.c b/src/crc64.c
index ecdba90e0..f1f764922 100644
--- a/src/crc64.c
+++ b/src/crc64.c
@@ -181,9 +181,13 @@ uint64_t crc64(uint64_t crc, const unsigned char *s, uint64_t l) {
}
/* Test main */
-#ifdef TEST_MAIN
+#ifdef REDIS_TEST
#include <stdio.h>
-int main(void) {
+
+#define UNUSED(x) (void)(x)
+int crc64Test(int argc, char *argv[]) {
+ UNUSED(argc);
+ UNUSED(argv);
printf("e9c6d914c4b8d9ca == %016llx\n",
(unsigned long long) crc64(0,(unsigned char*)"123456789",9));
return 0;
diff --git a/src/crc64.h b/src/crc64.h
index ab375d3f4..c9fca519d 100644
--- a/src/crc64.h
+++ b/src/crc64.h
@@ -5,4 +5,8 @@
uint64_t crc64(uint64_t crc, const unsigned char *s, uint64_t l);
+#ifdef REDIS_TEST
+int crc64Test(int argc, char *argv[]);
+#endif
+
#endif
diff --git a/src/db.c b/src/db.c
index b7756bf27..36650318a 100644
--- a/src/db.c
+++ b/src/db.c
@@ -450,8 +450,8 @@ void scanGenericCommand(redisClient *c, robj *o, unsigned long cursor) {
list *keys = listCreate();
listNode *node, *nextnode;
long count = 10;
- sds pat;
- int patlen, use_pattern = 0;
+ sds pat = NULL;
+ int patlen = 0, use_pattern = 0;
dict *ht;
/* Object must be NULL (to iterate keys names), or the type of the object
@@ -688,16 +688,20 @@ void shutdownCommand(redisClient *c) {
void renameGenericCommand(redisClient *c, int nx) {
robj *o;
long long expire;
+ int samekey = 0;
- /* To use the same key as src and dst is probably an error */
- if (sdscmp(c->argv[1]->ptr,c->argv[2]->ptr) == 0) {
- addReply(c,shared.sameobjecterr);
- return;
- }
+ /* When source and dest key is the same, no operation is performed,
+ * if the key exists, however we still return an error on unexisting key. */
+ if (sdscmp(c->argv[1]->ptr,c->argv[2]->ptr) == 0) samekey = 1;
if ((o = lookupKeyWriteOrReply(c,c->argv[1],shared.nokeyerr)) == NULL)
return;
+ if (samekey) {
+ addReply(c,nx ? shared.czero : shared.ok);
+ return;
+ }
+
incrRefCount(o);
expire = getExpire(c->db,c->argv[1]);
if (lookupKeyWrite(c->db,c->argv[2]) != NULL) {
diff --git a/src/debug.c b/src/debug.c
index caf95ec58..b8dcf648e 100644
--- a/src/debug.c
+++ b/src/debug.c
@@ -252,6 +252,12 @@ void computeDatasetDigest(unsigned char *final) {
}
}
+void inputCatSds(void *result, const char *str) {
+ /* result is actually a (sds *), so re-cast it here */
+ sds *info = (sds *)result;
+ *info = sdscat(*info, str);
+}
+
void debugCommand(redisClient *c) {
if (!strcasecmp(c->argv[1]->ptr,"segfault")) {
*((char*)-1) = 'x';
@@ -295,13 +301,46 @@ void debugCommand(redisClient *c) {
val = dictGetVal(de);
strenc = strEncoding(val->encoding);
+ char extra[128] = {0};
+ if (val->encoding == REDIS_ENCODING_QUICKLIST) {
+ char *nextra = extra;
+ int remaining = sizeof(extra);
+ quicklist *ql = val->ptr;
+ /* Add number of quicklist nodes */
+ int used = snprintf(nextra, remaining, " ql_nodes:%u", ql->len);
+ nextra += used;
+ remaining -= used;
+ /* Add average quicklist fill factor */
+ double avg = (double)ql->count/ql->len;
+ used = snprintf(nextra, remaining, " ql_avg_node:%.2f", avg);
+ nextra += used;
+ remaining -= used;
+ /* Add quicklist fill level / max ziplist size */
+ used = snprintf(nextra, remaining, " ql_ziplist_max:%d", ql->fill);
+ nextra += used;
+ remaining -= used;
+ /* Add isCompressed? */
+ int compressed = ql->compress != 0;
+ used = snprintf(nextra, remaining, " ql_compressed:%d", compressed);
+ nextra += used;
+ remaining -= used;
+ /* Add total uncompressed size */
+ unsigned long sz = 0;
+ for (quicklistNode *node = ql->head; node; node = node->next) {
+ sz += node->sz;
+ }
+ used = snprintf(nextra, remaining, " ql_uncompressed_size:%lu", sz);
+ nextra += used;
+ remaining -= used;
+ }
+
addReplyStatusFormat(c,
"Value at:%p refcount:%d "
- "encoding:%s serializedlength:%lld "
- "lru:%d lru_seconds_idle:%llu",
+ "encoding:%s serializedlength:%zu "
+ "lru:%d lru_seconds_idle:%llu%s",
(void*)val, val->refcount,
- strenc, (long long) rdbSavedObjectLen(val),
- val->lru, estimateObjectIdleTime(val)/1000);
+ strenc, rdbSavedObjectLen(val),
+ val->lru, estimateObjectIdleTime(val)/1000, extra);
} else if (!strcasecmp(c->argv[1]->ptr,"sdslen") && c->argc == 3) {
dictEntry *de;
robj *val;
@@ -379,6 +418,25 @@ void debugCommand(redisClient *c) {
errstr = sdsmapchars(errstr,"\n\r"," ",2); /* no newlines in errors. */
errstr = sdscatlen(errstr,"\r\n",2);
addReplySds(c,errstr);
+ } else if (!strcasecmp(c->argv[1]->ptr,"structsize") && c->argc == 2) {
+ sds sizes = sdsempty();
+ sizes = sdscatprintf(sizes,"bits:%d ", (sizeof(void*) == 8)?64:32);
+ sizes = sdscatprintf(sizes,"robj:%d ", (int)sizeof(robj));
+ sizes = sdscatprintf(sizes,"dictentry:%d ", (int)sizeof(dictEntry));
+ sizes = sdscatprintf(sizes,"sdshdr:%d", (int)sizeof(struct sdshdr));
+ addReplyBulkSds(c,sizes);
+ } else if (!strcasecmp(c->argv[1]->ptr,"jemalloc") && c->argc == 3) {
+#if defined(USE_JEMALLOC)
+ if (!strcasecmp(c->argv[2]->ptr, "info")) {
+ sds info = sdsempty();
+ je_malloc_stats_print(inputCatSds, &info, NULL);
+ addReplyBulkSds(c, info);
+ } else {
+ addReplyErrorFormat(c, "Valid jemalloc debug fields: info");
+ }
+#else
+ addReplyErrorFormat(c, "jemalloc support not available");
+#endif
} else {
addReplyErrorFormat(c, "Unknown DEBUG subcommand or wrong number of arguments for '%s'",
(char*)c->argv[1]->ptr);
diff --git a/src/dict.c b/src/dict.c
index 29d400099..dbcfeb492 100644
--- a/src/dict.c
+++ b/src/dict.c
@@ -211,6 +211,9 @@ int dictExpand(dict *d, unsigned long size)
if (dictIsRehashing(d) || d->ht[0].used > size)
return DICT_ERR;
+ /* Rehashing to the same table size is not useful. */
+ if (realsize == d->ht[0].size) return DICT_ERR;
+
/* Allocate the new hash table and initialize all pointers to NULL */
n.size = realsize;
n.sizemask = realsize-1;
@@ -232,27 +235,27 @@ int dictExpand(dict *d, unsigned long size)
/* Performs N steps of incremental rehashing. Returns 1 if there are still
* keys to move from the old to the new hash table, otherwise 0 is returned.
+ *
* Note that a rehashing step consists in moving a bucket (that may have more
- * than one key as we use chaining) from the old to the new hash table. */
+ * than one key as we use chaining) from the old to the new hash table, however
+ * since part of the hash table may be composed of empty spaces, it is not
+ * guaranteed that this function will rehash even a single bucket, since it
+ * will visit at max N*10 empty buckets in total, otherwise the amount of
+ * work it does would be unbound and the function may block for a long time. */
int dictRehash(dict *d, int n) {
+ int empty_visits = n*10; /* Max number of empty buckets to visit. */
if (!dictIsRehashing(d)) return 0;
- while(n--) {
+ while(n-- && d->ht[0].used != 0) {
dictEntry *de, *nextde;
- /* Check if we already rehashed the whole table... */
- if (d->ht[0].used == 0) {
- zfree(d->ht[0].table);
- d->ht[0] = d->ht[1];
- _dictReset(&d->ht[1]);
- d->rehashidx = -1;
- return 0;
- }
-
/* Note that rehashidx can't overflow as we are sure there are more
* elements because ht[0].used != 0 */
assert(d->ht[0].size > (unsigned long)d->rehashidx);
- while(d->ht[0].table[d->rehashidx] == NULL) d->rehashidx++;
+ while(d->ht[0].table[d->rehashidx] == NULL) {
+ d->rehashidx++;
+ if (--empty_visits == 0) return 1;
+ }
de = d->ht[0].table[d->rehashidx];
/* Move all the keys in this bucket from the old to the new hash HT */
while(de) {
@@ -270,6 +273,17 @@ int dictRehash(dict *d, int n) {
d->ht[0].table[d->rehashidx] = NULL;
d->rehashidx++;
}
+
+ /* Check if we already rehashed the whole table... */
+ if (d->ht[0].used == 0) {
+ zfree(d->ht[0].table);
+ d->ht[0] = d->ht[1];
+ _dictReset(&d->ht[1]);
+ d->rehashidx = -1;
+ return 0;
+ }
+
+ /* More to rehash... */
return 1;
}
@@ -342,7 +356,10 @@ dictEntry *dictAddRaw(dict *d, void *key)
if ((index = _dictKeyIndex(d, key)) == -1)
return NULL;
- /* Allocate the memory and store the new entry */
+ /* Allocate the memory and store the new entry.
+ * Insert the element in top, with the assumption that in a database
+ * system it is more likely that recently added entries are accessed
+ * more frequently. */
ht = dictIsRehashing(d) ? &d->ht[1] : &d->ht[0];
entry = zmalloc(sizeof(*entry));
entry->next = ht->table[index];
@@ -616,7 +633,11 @@ dictEntry *dictGetRandomKey(dict *d)
if (dictIsRehashing(d)) _dictRehashStep(d);
if (dictIsRehashing(d)) {
do {
- h = random() % (d->ht[0].size+d->ht[1].size);
+ /* We are sure there are no elements in indexes from 0
+ * to rehashidx-1 */
+ h = d->rehashidx + (random() % (d->ht[0].size +
+ d->ht[1].size -
+ d->rehashidx));
he = (h >= d->ht[0].size) ? d->ht[1].table[h - d->ht[0].size] :
d->ht[0].table[h];
} while(he == NULL);
@@ -643,9 +664,12 @@ dictEntry *dictGetRandomKey(dict *d)
return he;
}
-/* This is a version of dictGetRandomKey() that is modified in order to
- * return multiple entries by jumping at a random place of the hash table
- * and scanning linearly for entries.
+/* This function samples the dictionary to return a few keys from random
+ * locations.
+ *
+ * It does not guarantee to return all the keys specified in 'count', nor
+ * it does guarantee to return non-duplicated elements, however it will make
+ * some effort to do both things.
*
* Returned pointers to hash table entries are stored into 'des' that
* points to an array of dictEntry pointers. The array must have room for
@@ -654,28 +678,65 @@ dictEntry *dictGetRandomKey(dict *d)
*
* The function returns the number of items stored into 'des', that may
* be less than 'count' if the hash table has less than 'count' elements
- * inside.
+ * inside, or if not enough elements were found in a reasonable amount of
+ * steps.
*
* Note that this function is not suitable when you need a good distribution
* of the returned items, but only when you need to "sample" a given number
* of continuous elements to run some kind of algorithm or to produce
* statistics. However the function is much faster than dictGetRandomKey()
- * at producing N elements, and the elements are guaranteed to be non
- * repeating. */
-unsigned int dictGetRandomKeys(dict *d, dictEntry **des, unsigned int count) {
- int j; /* internal hash table id, 0 or 1. */
- unsigned int stored = 0;
+ * at producing N elements. */
+unsigned int dictGetSomeKeys(dict *d, dictEntry **des, unsigned int count) {
+ unsigned int j; /* internal hash table id, 0 or 1. */
+ unsigned int tables; /* 1 or 2 tables? */
+ unsigned int stored = 0, maxsizemask;
+ unsigned int maxsteps;
if (dictSize(d) < count) count = dictSize(d);
- while(stored < count) {
- for (j = 0; j < 2; j++) {
- /* Pick a random point inside the hash table 0 or 1. */
- unsigned int i = random() & d->ht[j].sizemask;
- int size = d->ht[j].size;
-
- /* Make sure to visit every bucket by iterating 'size' times. */
- while(size--) {
- dictEntry *he = d->ht[j].table[i];
+ maxsteps = count*10;
+
+ /* Try to do a rehashing work proportional to 'count'. */
+ for (j = 0; j < count; j++) {
+ if (dictIsRehashing(d))
+ _dictRehashStep(d);
+ else
+ break;
+ }
+
+ tables = dictIsRehashing(d) ? 2 : 1;
+ maxsizemask = d->ht[0].sizemask;
+ if (tables > 1 && maxsizemask < d->ht[1].sizemask)
+ maxsizemask = d->ht[1].sizemask;
+
+ /* Pick a random point inside the larger table. */
+ unsigned int i = random() & maxsizemask;
+ unsigned int emptylen = 0; /* Continuous empty entries so far. */
+ while(stored < count && maxsteps--) {
+ for (j = 0; j < tables; j++) {
+ /* Invariant of the dict.c rehashing: up to the indexes already
+ * visited in ht[0] during the rehashing, there are no populated
+ * buckets, so we can skip ht[0] for indexes between 0 and idx-1. */
+ if (tables == 2 && j == 0 && i < d->rehashidx) {
+ /* Moreover, if we are currently out of range in the second
+ * table, there will be no elements in both tables up to
+ * the current rehashing index, so we jump if possible.
+ * (this happens when going from big to small table). */
+ if (i >= d->ht[1].size) i = d->rehashidx;
+ continue;
+ }
+ if (i >= d->ht[j].size) continue; /* Out of range for this table. */
+ dictEntry *he = d->ht[j].table[i];
+
+ /* Count contiguous empty buckets, and jump to other
+ * locations if they reach 'count' (with a minimum of 5). */
+ if (he == NULL) {
+ emptylen++;
+ if (emptylen >= 5 && emptylen > count) {
+ i = random() & maxsizemask;
+ emptylen = 0;
+ }
+ } else {
+ emptylen = 0;
while (he) {
/* Collect all the elements of the buckets found non
* empty while iterating. */
@@ -685,14 +746,11 @@ unsigned int dictGetRandomKeys(dict *d, dictEntry **des, unsigned int count) {
stored++;
if (stored == count) return stored;
}
- i = (i+1) & d->ht[j].sizemask;
}
- /* If there is only one table and we iterated it all, we should
- * already have 'count' elements. Assert this condition. */
- assert(dictIsRehashing(d) != 0);
}
+ i = (i+1) & maxsizemask;
}
- return stored; /* Never reached. */
+ return stored;
}
/* Function to reverse bits. Algorithm from:
diff --git a/src/dict.h b/src/dict.h
index 7421078f8..014d18212 100644
--- a/src/dict.h
+++ b/src/dict.h
@@ -164,7 +164,7 @@ dictIterator *dictGetSafeIterator(dict *d);
dictEntry *dictNext(dictIterator *iter);
void dictReleaseIterator(dictIterator *iter);
dictEntry *dictGetRandomKey(dict *d);
-unsigned int dictGetRandomKeys(dict *d, dictEntry **des, unsigned int count);
+unsigned int dictGetSomeKeys(dict *d, dictEntry **des, unsigned int count);
void dictPrintStats(dict *d);
unsigned int dictGenHashFunction(const void *key, int len);
unsigned int dictGenCaseHashFunction(const unsigned char *buf, int len);
diff --git a/src/endianconv.c b/src/endianconv.c
index 9adf09c1f..f3b0b4730 100644
--- a/src/endianconv.c
+++ b/src/endianconv.c
@@ -101,12 +101,16 @@ uint64_t intrev64(uint64_t v) {
return v;
}
-#ifdef TESTMAIN
+#ifdef REDIS_TEST
#include <stdio.h>
-int main(void) {
+#define UNUSED(x) (void)(x)
+int endianconvTest(int argc, char *argv[]) {
char buf[32];
+ UNUSED(argc);
+ UNUSED(argv);
+
sprintf(buf,"ciaoroma");
memrev16(buf);
printf("%s\n", buf);
diff --git a/src/endianconv.h b/src/endianconv.h
index d93cd99ba..08f553136 100644
--- a/src/endianconv.h
+++ b/src/endianconv.h
@@ -71,4 +71,8 @@ uint64_t intrev64(uint64_t v);
#define ntohu64(v) intrev64(v)
#endif
+#ifdef REDIS_TEST
+int endianconvTest(int argc, char *argv[]);
+#endif
+
#endif
diff --git a/src/help.h b/src/help.h
index 8395c525b..9f4c979df 100644
--- a/src/help.h
+++ b/src/help.h
@@ -651,8 +651,8 @@ struct commandHelp {
0,
"1.0.0" },
{ "SPOP",
- "key",
- "Remove and return a random member from a set",
+ "key [count]",
+ "Remove and return one or multiple random members from a set",
3,
"1.0.0" },
{ "SRANDMEMBER",
diff --git a/src/intset.c b/src/intset.c
index 5d894e3cd..b0a597fc7 100644
--- a/src/intset.c
+++ b/src/intset.c
@@ -281,44 +281,46 @@ size_t intsetBlobLen(intset *is) {
return sizeof(intset)+intrev32ifbe(is->length)*intrev32ifbe(is->encoding);
}
-#ifdef INTSET_TEST_MAIN
+#ifdef REDIS_TEST
#include <sys/time.h>
+#include <time.h>
-void intsetRepr(intset *is) {
- int i;
- for (i = 0; i < intrev32ifbe(is->length); i++) {
+#if 0
+static void intsetRepr(intset *is) {
+ for (uint32_t i = 0; i < intrev32ifbe(is->length); i++) {
printf("%lld\n", (uint64_t)_intsetGet(is,i));
}
printf("\n");
}
-void error(char *err) {
+static void error(char *err) {
printf("%s\n", err);
exit(1);
}
+#endif
-void ok(void) {
+static void ok(void) {
printf("OK\n");
}
-long long usec(void) {
+static long long usec(void) {
struct timeval tv;
gettimeofday(&tv,NULL);
return (((long long)tv.tv_sec)*1000000)+tv.tv_usec;
}
#define assert(_e) ((_e)?(void)0:(_assert(#_e,__FILE__,__LINE__),exit(1)))
-void _assert(char *estr, char *file, int line) {
+static void _assert(char *estr, char *file, int line) {
printf("\n\n=== ASSERTION FAILED ===\n");
printf("==> %s:%d '%s' is not true\n",file,line,estr);
}
-intset *createSet(int bits, int size) {
+static intset *createSet(int bits, int size) {
uint64_t mask = (1<<bits)-1;
- uint64_t i, value;
+ uint64_t value;
intset *is = intsetNew();
- for (i = 0; i < size; i++) {
+ for (int i = 0; i < size; i++) {
if (bits > 32) {
value = (rand()*rand()) & mask;
} else {
@@ -329,10 +331,8 @@ intset *createSet(int bits, int size) {
return is;
}
-void checkConsistency(intset *is) {
- int i;
-
- for (i = 0; i < (intrev32ifbe(is->length)-1); i++) {
+static void checkConsistency(intset *is) {
+ for (uint32_t i = 0; i < (intrev32ifbe(is->length)-1); i++) {
uint32_t encoding = intrev32ifbe(is->encoding);
if (encoding == INTSET_ENC_INT16) {
@@ -348,11 +348,15 @@ void checkConsistency(intset *is) {
}
}
-int main(int argc, char **argv) {
+#define UNUSED(x) (void)(x)
+int intsetTest(int argc, char **argv) {
uint8_t success;
int i;
intset *is;
- sranddev();
+ srand(time(NULL));
+
+ UNUSED(argc);
+ UNUSED(argv);
printf("Value encodings: "); {
assert(_intsetValueEncoding(-32768) == INTSET_ENC_INT16);
@@ -363,8 +367,10 @@ int main(int argc, char **argv) {
assert(_intsetValueEncoding(+2147483647) == INTSET_ENC_INT32);
assert(_intsetValueEncoding(-2147483649) == INTSET_ENC_INT64);
assert(_intsetValueEncoding(+2147483648) == INTSET_ENC_INT64);
- assert(_intsetValueEncoding(-9223372036854775808ull) == INTSET_ENC_INT64);
- assert(_intsetValueEncoding(+9223372036854775807ull) == INTSET_ENC_INT64);
+ assert(_intsetValueEncoding(-9223372036854775808ull) ==
+ INTSET_ENC_INT64);
+ assert(_intsetValueEncoding(+9223372036854775807ull) ==
+ INTSET_ENC_INT64);
ok();
}
@@ -378,7 +384,7 @@ int main(int argc, char **argv) {
}
printf("Large number of random adds: "); {
- int inserts = 0;
+ uint32_t inserts = 0;
is = intsetNew();
for (i = 0; i < 1024; i++) {
is = intsetAdd(is,rand()%0x800,&success);
@@ -461,7 +467,8 @@ int main(int argc, char **argv) {
start = usec();
for (i = 0; i < num; i++) intsetSearch(is,rand() % ((1<<bits)-1),NULL);
- printf("%ld lookups, %ld element set, %lldusec\n",num,size,usec()-start);
+ printf("%ld lookups, %ld element set, %lldusec\n",
+ num,size,usec()-start);
}
printf("Stress add+delete: "); {
@@ -479,5 +486,7 @@ int main(int argc, char **argv) {
checkConsistency(is);
ok();
}
+
+ return 0;
}
#endif
diff --git a/src/intset.h b/src/intset.h
index bd01ff22f..30a854f89 100644
--- a/src/intset.h
+++ b/src/intset.h
@@ -47,4 +47,8 @@ uint8_t intsetGet(intset *is, uint32_t pos, int64_t *value);
uint32_t intsetLen(intset *is);
size_t intsetBlobLen(intset *is);
+#ifdef REDIS_TEST
+int intsetTest(int argc, char *argv[]);
+#endif
+
#endif // __INTSET_H
diff --git a/src/latency.c b/src/latency.c
index cb116fb90..fd76b3215 100644
--- a/src/latency.c
+++ b/src/latency.c
@@ -228,6 +228,7 @@ sds createLatencyReport(void) {
int advise_write_load_info = 0; /* Print info about AOF and write load. */
int advise_hz = 0; /* Use higher HZ. */
int advise_large_objects = 0; /* Deletion of large objects. */
+ int advise_mass_eviction = 0; /* Avoid mass eviction of keys. */
int advise_relax_fsync_policy = 0; /* appendfsync always is slow. */
int advise_disable_thp = 0; /* AnonHugePages detected. */
int advices = 0;
@@ -364,11 +365,16 @@ sds createLatencyReport(void) {
}
/* Eviction cycle. */
- if (!strcasecmp(event,"eviction-cycle")) {
+ if (!strcasecmp(event,"eviction-del")) {
advise_large_objects = 1;
advices++;
}
+ if (!strcasecmp(event,"eviction-cycle")) {
+ advise_mass_eviction = 1;
+ advices++;
+ }
+
report = sdscatlen(report,"\n",1);
}
dictReleaseIterator(di);
@@ -452,6 +458,10 @@ sds createLatencyReport(void) {
report = sdscat(report,"- Deleting, expiring or evicting (because of maxmemory policy) large objects is a blocking operation. If you have very large objects that are often deleted, expired, or evicted, try to fragment those objects into multiple smaller objects.\n");
}
+ if (advise_mass_eviction) {
+ report = sdscat(report,"- Sudden changes to the 'maxmemory' setting via 'CONFIG SET', or allocation of large objects via sets or sorted sets intersections, STORE option of SORT, Redis Cluster large keys migrations (RESTORE command), may create sudden memory pressure forcing the server to block trying to evict keys. \n");
+ }
+
if (advise_disable_thp) {
report = sdscat(report,"- I detected a non zero amount of anonymous huge pages used by your process. This creates very serious latency events in different conditions, especially when Redis is persisting on disk. To disable THP support use the command 'echo never > /sys/kernel/mm/transparent_hugepage/enabled', make sure to also add it into /etc/rc.local so that the command will be executed again after a reboot. Note that even if you have already disabled THP, you still need to restart the Redis process to get rid of the huge pages already created.\n");
}
diff --git a/src/latency.h b/src/latency.h
index 240f54b45..0fe26e0e4 100644
--- a/src/latency.h
+++ b/src/latency.h
@@ -86,4 +86,8 @@ int THPIsEnabled(void);
(var) >= server.latency_monitor_threshold) \
latencyAddSample((event),(var));
+/* Remove time from a nested event. */
+#define latencyRemoveNestedEvent(event_var,nested_var) \
+ event_var += nested_var;
+
#endif /* __LATENCY_H */
diff --git a/src/lzfP.h b/src/lzfP.h
index c9eae3f6a..c6d2e096c 100644
--- a/src/lzfP.h
+++ b/src/lzfP.h
@@ -49,7 +49,7 @@
* the difference between 15 and 14 is very small
* for small blocks (and 14 is usually a bit faster).
* For a low-memory/faster configuration, use HLOG == 13;
- * For best compression, use 15 or 16 (or more, up to 23).
+ * For best compression, use 15 or 16 (or more, up to 22).
*/
#ifndef HLOG
# define HLOG 16
@@ -94,7 +94,7 @@
/*
* Avoid assigning values to errno variable? for some embedding purposes
* (linux kernel for example), this is necessary. NOTE: this breaks
- * the documentation in lzf.h.
+ * the documentation in lzf.h. Avoiding errno has no speed impact.
*/
#ifndef AVOID_ERRNO
# define AVOID_ERRNO 0
@@ -121,16 +121,52 @@
# define CHECK_INPUT 1
#endif
+/*
+ * Whether to store pointers or offsets inside the hash table. On
+ * 64 bit architetcures, pointers take up twice as much space,
+ * and might also be slower. Default is to autodetect.
+ */
+/*#define LZF_USER_OFFSETS autodetect */
+
/*****************************************************************************/
/* nothing should be changed below */
+#ifdef __cplusplus
+# include <cstring>
+# include <climits>
+using namespace std;
+#else
+# include <string.h>
+# include <limits.h>
+#endif
+
+#ifndef LZF_USE_OFFSETS
+# if defined (WIN32)
+# define LZF_USE_OFFSETS defined(_M_X64)
+# else
+# if __cplusplus > 199711L
+# include <cstdint>
+# else
+# include <stdint.h>
+# endif
+# define LZF_USE_OFFSETS (UINTPTR_MAX > 0xffffffffU)
+# endif
+#endif
+
typedef unsigned char u8;
-typedef const u8 *LZF_STATE[1 << (HLOG)];
+#if LZF_USE_OFFSETS
+# define LZF_HSLOT_BIAS ((const u8 *)in_data)
+ typedef unsigned int LZF_HSLOT;
+#else
+# define LZF_HSLOT_BIAS 0
+ typedef const u8 *LZF_HSLOT;
+#endif
+
+typedef LZF_HSLOT LZF_STATE[1 << (HLOG)];
#if !STRICT_ALIGN
/* for unaligned accesses we need a 16 bit datatype. */
-# include <limits.h>
# if USHRT_MAX == 65535
typedef unsigned short u16;
# elif UINT_MAX == 65535
@@ -142,17 +178,7 @@ typedef const u8 *LZF_STATE[1 << (HLOG)];
#endif
#if ULTRA_FAST
-# if defined(VERY_FAST)
-# undef VERY_FAST
-# endif
-#endif
-
-#if INIT_HTAB
-# ifdef __cplusplus
-# include <cstring>
-# else
-# include <string.h>
-# endif
+# undef VERY_FAST
#endif
#endif
diff --git a/src/lzf_c.c b/src/lzf_c.c
index 9e031ad0b..e9c69a0b8 100644
--- a/src/lzf_c.c
+++ b/src/lzf_c.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2000-2008 Marc Alexander Lehmann <schmorp@schmorp.de>
+ * Copyright (c) 2000-2010 Marc Alexander Lehmann <schmorp@schmorp.de>
*
* Redistribution and use in source and binary forms, with or without modifica-
* tion, are permitted provided that the following conditions are met:
@@ -40,8 +40,8 @@
/*
* don't play with this unless you benchmark!
- * decompression is not dependent on the hash function
- * the hashing function might seem strange, just believe me
+ * the data format is not dependent on the hash function.
+ * the hash function might seem strange, just believe me,
* it works ;)
*/
#ifndef FRST
@@ -89,9 +89,9 @@
/*
* compressed format
*
- * 000LLLLL <L+1> ; literal
- * LLLooooo oooooooo ; backref L
- * 111ooooo LLLLLLLL oooooooo ; backref L+7
+ * 000LLLLL <L+1> ; literal, L+1=1..33 octets
+ * LLLooooo oooooooo ; backref L+1=1..7 octets, o+1=1..4096 offset
+ * 111ooooo LLLLLLLL oooooooo ; backref L+8 octets, o+1=1..4096 offset
*
*/
@@ -106,7 +106,6 @@ lzf_compress (const void *const in_data, unsigned int in_len,
#if !LZF_STATE_ARG
LZF_STATE htab;
#endif
- const u8 **hslot;
const u8 *ip = (const u8 *)in_data;
u8 *op = (u8 *)out_data;
const u8 *in_end = ip + in_len;
@@ -133,10 +132,6 @@ lzf_compress (const void *const in_data, unsigned int in_len,
#if INIT_HTAB
memset (htab, 0, sizeof (htab));
-# if 0
- for (hslot = htab; hslot < htab + HSIZE; hslot++)
- *hslot++ = ip;
-# endif
#endif
lit = 0; op++; /* start run */
@@ -144,24 +139,23 @@ lzf_compress (const void *const in_data, unsigned int in_len,
hval = FRST (ip);
while (ip < in_end - 2)
{
+ LZF_HSLOT *hslot;
+
hval = NEXT (hval, ip);
hslot = htab + IDX (hval);
- ref = *hslot; *hslot = ip;
+ ref = *hslot + LZF_HSLOT_BIAS; *hslot = ip - LZF_HSLOT_BIAS;
if (1
#if INIT_HTAB
&& ref < ip /* the next test will actually take care of this, but this is faster */
#endif
&& (off = ip - ref - 1) < MAX_OFF
- && ip + 4 < in_end
&& ref > (u8 *)in_data
-#if STRICT_ALIGN
- && ref[0] == ip[0]
- && ref[1] == ip[1]
&& ref[2] == ip[2]
+#if STRICT_ALIGN
+ && ((ref[1] << 8) | ref[0]) == ((ip[1] << 8) | ip[0])
#else
&& *(u16 *)ref == *(u16 *)ip
- && ref[2] == ip[2]
#endif
)
{
@@ -170,12 +164,13 @@ lzf_compress (const void *const in_data, unsigned int in_len,
unsigned int maxlen = in_end - ip - len;
maxlen = maxlen > MAX_REF ? MAX_REF : maxlen;
+ if (expect_false (op + 3 + 1 >= out_end)) /* first a faster conservative test */
+ if (op - !lit + 3 + 1 >= out_end) /* second the exact but rare test */
+ return 0;
+
op [- lit - 1] = lit - 1; /* stop run */
op -= !lit; /* undo run if length is zero */
- if (expect_false (op + 3 + 1 >= out_end))
- return 0;
-
for (;;)
{
if (expect_true (maxlen > 16))
@@ -222,6 +217,7 @@ lzf_compress (const void *const in_data, unsigned int in_len,
}
*op++ = off;
+
lit = 0; op++; /* start run */
ip += len + 1;
@@ -237,12 +233,12 @@ lzf_compress (const void *const in_data, unsigned int in_len,
hval = FRST (ip);
hval = NEXT (hval, ip);
- htab[IDX (hval)] = ip;
+ htab[IDX (hval)] = ip - LZF_HSLOT_BIAS;
ip++;
# if VERY_FAST && !ULTRA_FAST
hval = NEXT (hval, ip);
- htab[IDX (hval)] = ip;
+ htab[IDX (hval)] = ip - LZF_HSLOT_BIAS;
ip++;
# endif
#else
@@ -251,7 +247,7 @@ lzf_compress (const void *const in_data, unsigned int in_len,
do
{
hval = NEXT (hval, ip);
- htab[IDX (hval)] = ip;
+ htab[IDX (hval)] = ip - LZF_HSLOT_BIAS;
ip++;
}
while (len--);
diff --git a/src/lzf_d.c b/src/lzf_d.c
index 6c723f5e0..c32be8e87 100644
--- a/src/lzf_d.c
+++ b/src/lzf_d.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2000-2007 Marc Alexander Lehmann <schmorp@schmorp.de>
+ * Copyright (c) 2000-2010 Marc Alexander Lehmann <schmorp@schmorp.de>
*
* Redistribution and use in source and binary forms, with or without modifica-
* tion, are permitted provided that the following conditions are met:
@@ -43,14 +43,14 @@
# define SET_ERRNO(n) errno = (n)
#endif
-/*
+#if USE_REP_MOVSB /* small win on amd, big loss on intel */
#if (__i386 || __amd64) && __GNUC__ >= 3
# define lzf_movsb(dst, src, len) \
asm ("rep movsb" \
: "=D" (dst), "=S" (src), "=c" (len) \
: "0" (dst), "1" (src), "2" (len));
#endif
-*/
+#endif
unsigned int
lzf_decompress (const void *const in_data, unsigned int in_len,
@@ -86,9 +86,17 @@ lzf_decompress (const void *const in_data, unsigned int in_len,
#ifdef lzf_movsb
lzf_movsb (op, ip, ctrl);
#else
- do
- *op++ = *ip++;
- while (--ctrl);
+ switch (ctrl)
+ {
+ case 32: *op++ = *ip++; case 31: *op++ = *ip++; case 30: *op++ = *ip++; case 29: *op++ = *ip++;
+ case 28: *op++ = *ip++; case 27: *op++ = *ip++; case 26: *op++ = *ip++; case 25: *op++ = *ip++;
+ case 24: *op++ = *ip++; case 23: *op++ = *ip++; case 22: *op++ = *ip++; case 21: *op++ = *ip++;
+ case 20: *op++ = *ip++; case 19: *op++ = *ip++; case 18: *op++ = *ip++; case 17: *op++ = *ip++;
+ case 16: *op++ = *ip++; case 15: *op++ = *ip++; case 14: *op++ = *ip++; case 13: *op++ = *ip++;
+ case 12: *op++ = *ip++; case 11: *op++ = *ip++; case 10: *op++ = *ip++; case 9: *op++ = *ip++;
+ case 8: *op++ = *ip++; case 7: *op++ = *ip++; case 6: *op++ = *ip++; case 5: *op++ = *ip++;
+ case 4: *op++ = *ip++; case 3: *op++ = *ip++; case 2: *op++ = *ip++; case 1: *op++ = *ip++;
+ }
#endif
}
else /* back reference */
@@ -134,12 +142,39 @@ lzf_decompress (const void *const in_data, unsigned int in_len,
len += 2;
lzf_movsb (op, ref, len);
#else
- *op++ = *ref++;
- *op++ = *ref++;
-
- do
- *op++ = *ref++;
- while (--len);
+ switch (len)
+ {
+ default:
+ len += 2;
+
+ if (op >= ref + len)
+ {
+ /* disjunct areas */
+ memcpy (op, ref, len);
+ op += len;
+ }
+ else
+ {
+ /* overlapping, use octte by octte copying */
+ do
+ *op++ = *ref++;
+ while (--len);
+ }
+
+ break;
+
+ case 9: *op++ = *ref++;
+ case 8: *op++ = *ref++;
+ case 7: *op++ = *ref++;
+ case 6: *op++ = *ref++;
+ case 5: *op++ = *ref++;
+ case 4: *op++ = *ref++;
+ case 3: *op++ = *ref++;
+ case 2: *op++ = *ref++;
+ case 1: *op++ = *ref++;
+ case 0: *op++ = *ref++; /* two octets more */
+ *op++ = *ref++;
+ }
#endif
}
}
diff --git a/src/networking.c b/src/networking.c
index 5a780a597..0b69f5408 100644
--- a/src/networking.c
+++ b/src/networking.c
@@ -525,6 +525,14 @@ void addReplyBulkCBuffer(redisClient *c, void *p, size_t len) {
addReply(c,shared.crlf);
}
+/* Add sds to reply (takes ownership of sds and frees it) */
+void addReplyBulkSds(redisClient *c, sds s) {
+ addReplySds(c,sdscatfmt(sdsempty(),"$%u\r\n",
+ (unsigned long)sdslen(s)));
+ addReplySds(c,s);
+ addReply(c,shared.crlf);
+}
+
/* Add a C nul term string as bulk reply */
void addReplyBulkCString(redisClient *c, char *s) {
if (s == NULL) {
@@ -789,7 +797,8 @@ void freeClientsInAsyncFreeQueue(void) {
void sendReplyToClient(aeEventLoop *el, int fd, void *privdata, int mask) {
redisClient *c = privdata;
- int nwritten = 0, totwritten = 0, objlen;
+ ssize_t nwritten = 0, totwritten = 0;
+ size_t objlen;
size_t objmem;
robj *o;
REDIS_NOTUSED(el);
@@ -1219,17 +1228,6 @@ void getClientsMaxBuffers(unsigned long *longest_output_list,
*biggest_input_buffer = bib;
}
-/* This is a helper function for genClientPeerId().
- * It writes the specified ip/port to "peerid" as a null termiated string
- * in the form ip:port if ip does not contain ":" itself, otherwise
- * [ip]:port format is used (for IPv6 addresses basically). */
-void formatPeerId(char *peerid, size_t peerid_len, char *ip, int port) {
- if (strchr(ip,':'))
- snprintf(peerid,peerid_len,"[%s]:%d",ip,port);
- else
- snprintf(peerid,peerid_len,"%s:%d",ip,port);
-}
-
/* A Redis "Peer ID" is a colon separated ip:port pair.
* For IPv4 it's in the form x.y.z.k:port, example: "127.0.0.1:1234".
* For IPv6 addresses we use [] around the IP part, like in "[::1]:1234".
@@ -1238,24 +1236,17 @@ void formatPeerId(char *peerid, size_t peerid_len, char *ip, int port) {
* A Peer ID always fits inside a buffer of REDIS_PEER_ID_LEN bytes, including
* the null term.
*
- * The function returns REDIS_OK on succcess, and REDIS_ERR on failure.
- *
* On failure the function still populates 'peerid' with the "?:0" string
* in case you want to relax error checking or need to display something
* anyway (see anetPeerToString implementation for more info). */
-int genClientPeerId(redisClient *client, char *peerid, size_t peerid_len) {
- char ip[REDIS_IP_STR_LEN];
- int port;
-
+void genClientPeerId(redisClient *client, char *peerid,
+ size_t peerid_len) {
if (client->flags & REDIS_UNIX_SOCKET) {
/* Unix socket client. */
snprintf(peerid,peerid_len,"%s:0",server.unixsocket);
- return REDIS_OK;
} else {
/* TCP client. */
- int retval = anetPeerToString(client->fd,ip,sizeof(ip),&port);
- formatPeerId(peerid,peerid_len,ip,port);
- return (retval == -1) ? REDIS_ERR : REDIS_OK;
+ anetFormatPeer(client->fd,peerid,peerid_len);
}
}
@@ -1631,7 +1622,7 @@ int checkClientOutputBufferLimits(redisClient *c) {
* called from contexts where the client can't be freed safely, i.e. from the
* lower level functions pushing data inside the client output buffers. */
void asyncCloseClientOnOutputBufferLimitReached(redisClient *c) {
- redisAssert(c->reply_bytes < ULONG_MAX-(1024*64));
+ redisAssert(c->reply_bytes < SIZE_MAX-(1024*64));
if (c->reply_bytes == 0 || c->flags & REDIS_CLOSE_ASAP) return;
if (checkClientOutputBufferLimits(c)) {
sds client = catClientInfoString(sdsempty(),c);
diff --git a/src/object.c b/src/object.c
index 11c77c3c3..f75421ee8 100644
--- a/src/object.c
+++ b/src/object.c
@@ -180,11 +180,10 @@ robj *dupStringObject(robj *o) {
}
}
-robj *createListObject(void) {
- list *l = listCreate();
+robj *createQuicklistObject(void) {
+ quicklist *l = quicklistCreate();
robj *o = createObject(REDIS_LIST,l);
- listSetFreeMethod(l,decrRefCountVoid);
- o->encoding = REDIS_ENCODING_LINKEDLIST;
+ o->encoding = REDIS_ENCODING_QUICKLIST;
return o;
}
@@ -242,11 +241,8 @@ void freeStringObject(robj *o) {
void freeListObject(robj *o) {
switch (o->encoding) {
- case REDIS_ENCODING_LINKEDLIST:
- listRelease((list*) o->ptr);
- break;
- case REDIS_ENCODING_ZIPLIST:
- zfree(o->ptr);
+ case REDIS_ENCODING_QUICKLIST:
+ quicklistRelease(o->ptr);
break;
default:
redisPanic("Unknown list encoding type");
@@ -678,7 +674,7 @@ char *strEncoding(int encoding) {
case REDIS_ENCODING_RAW: return "raw";
case REDIS_ENCODING_INT: return "int";
case REDIS_ENCODING_HT: return "hashtable";
- case REDIS_ENCODING_LINKEDLIST: return "linkedlist";
+ case REDIS_ENCODING_QUICKLIST: return "quicklist";
case REDIS_ENCODING_ZIPLIST: return "ziplist";
case REDIS_ENCODING_INTSET: return "intset";
case REDIS_ENCODING_SKIPLIST: return "skiplist";
diff --git a/src/quicklist.c b/src/quicklist.c
new file mode 100644
index 000000000..be02e3276
--- /dev/null
+++ b/src/quicklist.c
@@ -0,0 +1,2650 @@
+/* quicklist.c - A doubly linked list of ziplists
+ *
+ * Copyright (c) 2014, Matt Stancliff <matt@genges.com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must start the above copyright notice,
+ * this quicklist of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this quicklist of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Redis nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <string.h> /* for memcpy */
+#include "quicklist.h"
+#include "zmalloc.h"
+#include "ziplist.h"
+#include "util.h" /* for ll2string */
+#include "lzf.h"
+
+#if defined(REDIS_TEST) || defined(REDIS_TEST_VERBOSE)
+#include <stdio.h> /* for printf (debug printing), snprintf (genstr) */
+#endif
+
+#ifndef REDIS_STATIC
+#define REDIS_STATIC static
+#endif
+
+/* Optimization levels for size-based filling */
+static const size_t optimization_level[] = {4096, 8192, 16384, 32768, 65536};
+
+/* Maximum size in bytes of any multi-element ziplist.
+ * Larger values will live in their own isolated ziplists. */
+#define SIZE_SAFETY_LIMIT 8192
+
+/* Minimum ziplist size in bytes for attempting compression. */
+#define MIN_COMPRESS_BYTES 48
+
+/* Minimum size reduction in bytes to store compressed quicklistNode data.
+ * This also prevents us from storing compression if the compression
+ * resulted in a larger size than the original data. */
+#define MIN_COMPRESS_IMPROVE 8
+
+/* If not verbose testing, remove all debug printing. */
+#ifndef REDIS_TEST_VERBOSE
+#define D(...)
+#else
+#define D(...) \
+ do { \
+ printf("%s:%s:%d:\t", __FILE__, __FUNCTION__, __LINE__); \
+ printf(__VA_ARGS__); \
+ printf("\n"); \
+ } while (0);
+#endif
+
+/* Simple way to give quicklistEntry structs default values with one call. */
+#define initEntry(e) \
+ do { \
+ (e)->zi = (e)->value = NULL; \
+ (e)->longval = -123456789; \
+ (e)->quicklist = NULL; \
+ (e)->node = NULL; \
+ (e)->offset = 123456789; \
+ (e)->sz = 0; \
+ } while (0)
+
+#if __GNUC__ >= 3
+#define likely(x) __builtin_expect(!!(x), 1)
+#define unlikely(x) __builtin_expect(!!(x), 0)
+#else
+#define likely(x) (x)
+#define unlikely(x) (x)
+#endif
+
+/* Create a new quicklist.
+ * Free with quicklistRelease(). */
+quicklist *quicklistCreate(void) {
+ struct quicklist *quicklist;
+
+ quicklist = zmalloc(sizeof(*quicklist));
+ quicklist->head = quicklist->tail = NULL;
+ quicklist->len = 0;
+ quicklist->count = 0;
+ quicklist->compress = 0;
+ quicklist->fill = -2;
+ return quicklist;
+}
+
+#define COMPRESS_MAX (1 << 16)
+void quicklistSetCompressDepth(quicklist *quicklist, int compress) {
+ if (compress > COMPRESS_MAX) {
+ compress = COMPRESS_MAX;
+ } else if (compress < 0) {
+ compress = 0;
+ }
+ quicklist->compress = compress;
+}
+
+#define FILL_MAX (1 << 15)
+void quicklistSetFill(quicklist *quicklist, int fill) {
+ if (fill > FILL_MAX) {
+ fill = FILL_MAX;
+ } else if (fill < -5) {
+ fill = -5;
+ }
+ quicklist->fill = fill;
+}
+
+void quicklistSetOptions(quicklist *quicklist, int fill, int depth) {
+ quicklistSetFill(quicklist, fill);
+ quicklistSetCompressDepth(quicklist, depth);
+}
+
+/* Create a new quicklist with some default parameters. */
+quicklist *quicklistNew(int fill, int compress) {
+ quicklist *quicklist = quicklistCreate();
+ quicklistSetOptions(quicklist, fill, compress);
+ return quicklist;
+}
+
+REDIS_STATIC quicklistNode *quicklistCreateNode(void) {
+ quicklistNode *node;
+ node = zmalloc(sizeof(*node));
+ node->zl = NULL;
+ node->count = 0;
+ node->sz = 0;
+ node->next = node->prev = NULL;
+ node->encoding = QUICKLIST_NODE_ENCODING_RAW;
+ node->container = QUICKLIST_NODE_CONTAINER_ZIPLIST;
+ node->recompress = 0;
+ return node;
+}
+
+/* Return cached quicklist count */
+unsigned int quicklistCount(quicklist *ql) { return ql->count; }
+
+/* Free entire quicklist. */
+void quicklistRelease(quicklist *quicklist) {
+ unsigned long len;
+ quicklistNode *current, *next;
+
+ current = quicklist->head;
+ len = quicklist->len;
+ while (len--) {
+ next = current->next;
+
+ zfree(current->zl);
+ quicklist->count -= current->count;
+
+ zfree(current);
+
+ quicklist->len--;
+ current = next;
+ }
+ zfree(quicklist);
+}
+
+/* Compress the ziplist in 'node' and update encoding details.
+ * Returns 1 if ziplist compressed successfully.
+ * Returns 0 if compression failed or if ziplist too small to compress. */
+REDIS_STATIC int __quicklistCompressNode(quicklistNode *node) {
+#ifdef REDIS_TEST
+ node->attempted_compress = 1;
+#endif
+
+ /* Don't bother compressing small values */
+ if (node->sz < MIN_COMPRESS_BYTES)
+ return 0;
+
+ quicklistLZF *lzf = zmalloc(sizeof(*lzf) + node->sz);
+
+ /* Cancel if compression fails or doesn't compress small enough */
+ if (((lzf->sz = lzf_compress(node->zl, node->sz, lzf->compressed,
+ node->sz)) == 0) ||
+ lzf->sz + MIN_COMPRESS_IMPROVE >= node->sz) {
+ /* lzf_compress aborts/rejects compression if value not compressable. */
+ zfree(lzf);
+ return 0;
+ }
+ lzf = zrealloc(lzf, sizeof(*lzf) + lzf->sz);
+ zfree(node->zl);
+ node->zl = (unsigned char *)lzf;
+ node->encoding = QUICKLIST_NODE_ENCODING_LZF;
+ node->recompress = 0;
+ return 1;
+}
+
+/* Compress only uncompressed nodes. */
+#define quicklistCompressNode(_node) \
+ do { \
+ if ((_node) && (_node)->encoding == QUICKLIST_NODE_ENCODING_RAW) { \
+ __quicklistCompressNode((_node)); \
+ } \
+ } while (0)
+
+/* Uncompress the ziplist in 'node' and update encoding details.
+ * Returns 1 on successful decode, 0 on failure to decode. */
+REDIS_STATIC int __quicklistDecompressNode(quicklistNode *node) {
+#ifdef REDIS_TEST
+ node->attempted_compress = 0;
+#endif
+
+ void *decompressed = zmalloc(node->sz);
+ quicklistLZF *lzf = (quicklistLZF *)node->zl;
+ if (lzf_decompress(lzf->compressed, lzf->sz, decompressed, node->sz) == 0) {
+ /* Someone requested decompress, but we can't decompress. Not good. */
+ zfree(decompressed);
+ return 0;
+ }
+ zfree(lzf);
+ node->zl = decompressed;
+ node->encoding = QUICKLIST_NODE_ENCODING_RAW;
+ return 1;
+}
+
+/* Decompress only compressed nodes. */
+#define quicklistDecompressNode(_node) \
+ do { \
+ if ((_node) && (_node)->encoding == QUICKLIST_NODE_ENCODING_LZF) { \
+ __quicklistDecompressNode((_node)); \
+ } \
+ } while (0)
+
+/* Force node to not be immediately re-compresable */
+#define quicklistDecompressNodeForUse(_node) \
+ do { \
+ if ((_node) && (_node)->encoding == QUICKLIST_NODE_ENCODING_LZF) { \
+ __quicklistDecompressNode((_node)); \
+ (_node)->recompress = 1; \
+ } \
+ } while (0)
+
+/* Extract the raw LZF data from this quicklistNode.
+ * Pointer to LZF data is assigned to '*data'.
+ * Return value is the length of compressed LZF data. */
+size_t quicklistGetLzf(const quicklistNode *node, void **data) {
+ quicklistLZF *lzf = (quicklistLZF *)node->zl;
+ *data = lzf->compressed;
+ return lzf->sz;
+}
+
+#define quicklistAllowsCompression(_ql) ((_ql)->compress != 0)
+
+/* Force 'quicklist' to meet compression guidelines set by compress depth.
+ * The only way to guarantee interior nodes get compressed is to iterate
+ * to our "interior" compress depth then compress the next node we find.
+ * If compress depth is larger than the entire list, we return immediately. */
+REDIS_STATIC void __quicklistCompress(const quicklist *quicklist,
+ quicklistNode *node) {
+ /* If length is less than our compress depth (from both sides),
+ * we can't compress anything. */
+ if (!quicklistAllowsCompression(quicklist) ||
+ quicklist->len < (unsigned int)(quicklist->compress * 2))
+ return;
+
+#if 0
+ /* Optimized cases for small depth counts */
+ if (quicklist->compress == 1) {
+ quicklistNode *h = quicklist->head, *t = quicklist->tail;
+ quicklistDecompressNode(h);
+ quicklistDecompressNode(t);
+ if (h != node && t != node)
+ quicklistCompressNode(node);
+ return;
+ } else if (quicklist->compress == 2) {
+ quicklistNode *h = quicklist->head, *hn = h->next, *hnn = hn->next;
+ quicklistNode *t = quicklist->tail, *tp = t->prev, *tpp = tp->prev;
+ quicklistDecompressNode(h);
+ quicklistDecompressNode(hn);
+ quicklistDecompressNode(t);
+ quicklistDecompressNode(tp);
+ if (h != node && hn != node && t != node && tp != node) {
+ quicklistCompressNode(node);
+ }
+ if (hnn != t) {
+ quicklistCompressNode(hnn);
+ }
+ if (tpp != h) {
+ quicklistCompressNode(tpp);
+ }
+ return;
+ }
+#endif
+
+ /* Iterate until we reach compress depth for both sides of the list.a
+ * Note: because we do length checks at the *top* of this function,
+ * we can skip explicit null checks below. Everything exists. */
+ quicklistNode *forward = quicklist->head;
+ quicklistNode *reverse = quicklist->tail;
+ int depth = 0;
+ int in_depth = 0;
+ while (depth++ < quicklist->compress) {
+ quicklistDecompressNode(forward);
+ quicklistDecompressNode(reverse);
+
+ if (forward == node || reverse == node)
+ in_depth = 1;
+
+ if (forward == reverse)
+ return;
+
+ forward = forward->next;
+ reverse = reverse->prev;
+ }
+
+ if (!in_depth)
+ quicklistCompressNode(node);
+
+ if (depth > 2) {
+ /* At this point, forward and reverse are one node beyond depth */
+ quicklistCompressNode(forward);
+ quicklistCompressNode(reverse);
+ }
+}
+
+#define quicklistCompress(_ql, _node) \
+ do { \
+ if ((_node)->recompress) \
+ quicklistCompressNode((_node)); \
+ else \
+ __quicklistCompress((_ql), (_node)); \
+ } while (0)
+
+/* If we previously used quicklistDecompressNodeForUse(), just recompress. */
+#define quicklistRecompressOnly(_ql, _node) \
+ do { \
+ if ((_node)->recompress) \
+ quicklistCompressNode((_node)); \
+ } while (0)
+
+/* Insert 'new_node' after 'old_node' if 'after' is 1.
+ * Insert 'new_node' before 'old_node' if 'after' is 0.
+ * Note: 'new_node' is *always* uncompressed, so if we assign it to
+ * head or tail, we do not need to uncompress it. */
+REDIS_STATIC void __quicklistInsertNode(quicklist *quicklist,
+ quicklistNode *old_node,
+ quicklistNode *new_node, int after) {
+ if (after) {
+ new_node->prev = old_node;
+ if (old_node) {
+ new_node->next = old_node->next;
+ if (old_node->next)
+ old_node->next->prev = new_node;
+ old_node->next = new_node;
+ }
+ if (quicklist->tail == old_node)
+ quicklist->tail = new_node;
+ } else {
+ new_node->next = old_node;
+ if (old_node) {
+ new_node->prev = old_node->prev;
+ if (old_node->prev)
+ old_node->prev->next = new_node;
+ old_node->prev = new_node;
+ }
+ if (quicklist->head == old_node)
+ quicklist->head = new_node;
+ }
+ /* If this insert creates the only element so far, initialize head/tail. */
+ if (quicklist->len == 0) {
+ quicklist->head = quicklist->tail = new_node;
+ }
+
+ if (old_node)
+ quicklistCompress(quicklist, old_node);
+
+ quicklist->len++;
+}
+
+/* Wrappers for node inserting around existing node. */
+REDIS_STATIC void _quicklistInsertNodeBefore(quicklist *quicklist,
+ quicklistNode *old_node,
+ quicklistNode *new_node) {
+ __quicklistInsertNode(quicklist, old_node, new_node, 0);
+}
+
+REDIS_STATIC void _quicklistInsertNodeAfter(quicklist *quicklist,
+ quicklistNode *old_node,
+ quicklistNode *new_node) {
+ __quicklistInsertNode(quicklist, old_node, new_node, 1);
+}
+
+REDIS_STATIC int
+_quicklistNodeSizeMeetsOptimizationRequirement(const size_t sz,
+ const int fill) {
+ if (fill >= 0)
+ return 0;
+
+ size_t offset = (-fill) - 1;
+ if (offset < (sizeof(optimization_level) / sizeof(*optimization_level))) {
+ if (sz <= optimization_level[offset]) {
+ return 1;
+ } else {
+ return 0;
+ }
+ } else {
+ return 0;
+ }
+}
+
+#define sizeMeetsSafetyLimit(sz) ((sz) <= SIZE_SAFETY_LIMIT)
+
+REDIS_STATIC int _quicklistNodeAllowInsert(const quicklistNode *node,
+ const int fill, const size_t sz) {
+ if (unlikely(!node))
+ return 0;
+
+ int ziplist_overhead;
+ /* size of previous offset */
+ if (sz < 254)
+ ziplist_overhead = 1;
+ else
+ ziplist_overhead = 5;
+
+ /* size of forward offset */
+ if (sz < 64)
+ ziplist_overhead += 1;
+ else if (likely(sz < 16384))
+ ziplist_overhead += 2;
+ else
+ ziplist_overhead += 5;
+
+ /* new_sz overestimates if 'sz' encodes to an integer type */
+ unsigned int new_sz = node->sz + sz + ziplist_overhead;
+ if (likely(_quicklistNodeSizeMeetsOptimizationRequirement(new_sz, fill)))
+ return 1;
+ else if (!sizeMeetsSafetyLimit(new_sz))
+ return 0;
+ else if ((int)node->count < fill)
+ return 1;
+ else
+ return 0;
+}
+
+REDIS_STATIC int _quicklistNodeAllowMerge(const quicklistNode *a,
+ const quicklistNode *b,
+ const int fill) {
+ if (!a || !b)
+ return 0;
+
+ /* approximate merged ziplist size (- 11 to remove one ziplist
+ * header/trailer) */
+ unsigned int merge_sz = a->sz + b->sz - 11;
+ if (likely(_quicklistNodeSizeMeetsOptimizationRequirement(merge_sz, fill)))
+ return 1;
+ else if (!sizeMeetsSafetyLimit(merge_sz))
+ return 0;
+ else if ((int)(a->count + b->count) <= fill)
+ return 1;
+ else
+ return 0;
+}
+
+#define quicklistNodeUpdateSz(node) \
+ do { \
+ (node)->sz = ziplistBlobLen((node)->zl); \
+ } while (0)
+
+/* Add new entry to head node of quicklist.
+ *
+ * Returns 0 if used existing head.
+ * Returns 1 if new head created. */
+int quicklistPushHead(quicklist *quicklist, void *value, size_t sz) {
+ quicklistNode *orig_head = quicklist->head;
+ if (likely(
+ _quicklistNodeAllowInsert(quicklist->head, quicklist->fill, sz))) {
+ quicklist->head->zl =
+ ziplistPush(quicklist->head->zl, value, sz, ZIPLIST_HEAD);
+ quicklistNodeUpdateSz(quicklist->head);
+ } else {
+ quicklistNode *node = quicklistCreateNode();
+ node->zl = ziplistPush(ziplistNew(), value, sz, ZIPLIST_HEAD);
+
+ quicklistNodeUpdateSz(node);
+ _quicklistInsertNodeBefore(quicklist, quicklist->head, node);
+ }
+ quicklist->count++;
+ quicklist->head->count++;
+ return (orig_head != quicklist->head);
+}
+
+/* Add new entry to tail node of quicklist.
+ *
+ * Returns 0 if used existing tail.
+ * Returns 1 if new tail created. */
+int quicklistPushTail(quicklist *quicklist, void *value, size_t sz) {
+ quicklistNode *orig_tail = quicklist->tail;
+ if (likely(
+ _quicklistNodeAllowInsert(quicklist->tail, quicklist->fill, sz))) {
+ quicklist->tail->zl =
+ ziplistPush(quicklist->tail->zl, value, sz, ZIPLIST_TAIL);
+ quicklistNodeUpdateSz(quicklist->tail);
+ } else {
+ quicklistNode *node = quicklistCreateNode();
+ node->zl = ziplistPush(ziplistNew(), value, sz, ZIPLIST_TAIL);
+
+ quicklistNodeUpdateSz(node);
+ _quicklistInsertNodeAfter(quicklist, quicklist->tail, node);
+ }
+ quicklist->count++;
+ quicklist->tail->count++;
+ return (orig_tail != quicklist->tail);
+}
+
+/* Create new node consisting of a pre-formed ziplist.
+ * Used for loading RDBs where entire ziplists have been stored
+ * to be retrieved later. */
+void quicklistAppendZiplist(quicklist *quicklist, unsigned char *zl) {
+ quicklistNode *node = quicklistCreateNode();
+
+ node->zl = zl;
+ node->count = ziplistLen(node->zl);
+ node->sz = ziplistBlobLen(zl);
+
+ _quicklistInsertNodeAfter(quicklist, quicklist->tail, node);
+ quicklist->count += node->count;
+}
+
+/* Append all values of ziplist 'zl' individually into 'quicklist'.
+ *
+ * This allows us to restore old RDB ziplists into new quicklists
+ * with smaller ziplist sizes than the saved RDB ziplist.
+ *
+ * Returns 'quicklist' argument. Frees passed-in ziplist 'zl' */
+quicklist *quicklistAppendValuesFromZiplist(quicklist *quicklist,
+ unsigned char *zl) {
+ unsigned char *value;
+ unsigned int sz;
+ long long longval;
+ char longstr[32] = {0};
+
+ unsigned char *p = ziplistIndex(zl, 0);
+ while (ziplistGet(p, &value, &sz, &longval)) {
+ if (!value) {
+ /* Write the longval as a string so we can re-add it */
+ sz = ll2string(longstr, sizeof(longstr), longval);
+ value = (unsigned char *)longstr;
+ }
+ quicklistPushTail(quicklist, value, sz);
+ p = ziplistNext(zl, p);
+ }
+ zfree(zl);
+ return quicklist;
+}
+
+/* Create new (potentially multi-node) quicklist from a single existing ziplist.
+ *
+ * Returns new quicklist. Frees passed-in ziplist 'zl'. */
+quicklist *quicklistCreateFromZiplist(int fill, int compress,
+ unsigned char *zl) {
+ return quicklistAppendValuesFromZiplist(quicklistNew(fill, compress), zl);
+}
+
+#define quicklistDeleteIfEmpty(ql, n) \
+ do { \
+ if ((n)->count == 0) { \
+ __quicklistDelNode((ql), (n)); \
+ (n) = NULL; \
+ } \
+ } while (0)
+
+REDIS_STATIC void __quicklistDelNode(quicklist *quicklist,
+ quicklistNode *node) {
+ if (node->next)
+ node->next->prev = node->prev;
+ if (node->prev)
+ node->prev->next = node->next;
+
+ if (node == quicklist->tail) {
+ quicklist->tail = node->prev;
+ }
+
+ if (node == quicklist->head) {
+ quicklist->head = node->next;
+ }
+
+ /* If we deleted a node within our compress depth, we
+ * now have compressed nodes needing to be decompressed. */
+ __quicklistCompress(quicklist, NULL);
+
+ quicklist->count -= node->count;
+
+ zfree(node->zl);
+ zfree(node);
+ quicklist->len--;
+}
+
+/* Delete one entry from list given the node for the entry and a pointer
+ * to the entry in the node.
+ *
+ * Note: quicklistDelIndex() *requires* uncompressed nodes because you
+ * already had to get *p from an uncompressed node somewhere.
+ *
+ * Returns 1 if the entire node was deleted, 0 if node still exists.
+ * Also updates in/out param 'p' with the next offset in the ziplist. */
+REDIS_STATIC int quicklistDelIndex(quicklist *quicklist, quicklistNode *node,
+ unsigned char **p) {
+ int gone = 0;
+
+ node->zl = ziplistDelete(node->zl, p);
+ node->count--;
+ if (node->count == 0) {
+ gone = 1;
+ __quicklistDelNode(quicklist, node);
+ } else {
+ quicklistNodeUpdateSz(node);
+ }
+ quicklist->count--;
+ /* If we deleted the node, the original node is no longer valid */
+ return gone ? 1 : 0;
+}
+
+/* Delete one element represented by 'entry'
+ *
+ * 'entry' stores enough metadata to delete the proper position in
+ * the correct ziplist in the correct quicklist node. */
+void quicklistDelEntry(quicklistIter *iter, quicklistEntry *entry) {
+ quicklistNode *prev = entry->node->prev;
+ quicklistNode *next = entry->node->next;
+ int deleted_node = quicklistDelIndex((quicklist *)entry->quicklist,
+ entry->node, &entry->zi);
+
+ /* after delete, the zi is now invalid for any future usage. */
+ iter->zi = NULL;
+
+ /* If current node is deleted, we must update iterator node and offset. */
+ if (deleted_node) {
+ if (iter->direction == AL_START_HEAD) {
+ iter->current = next;
+ iter->offset = 0;
+ } else if (iter->direction == AL_START_TAIL) {
+ iter->current = prev;
+ iter->offset = -1;
+ }
+ }
+ /* else if (!deleted_node), no changes needed.
+ * we already reset iter->zi above, and the existing iter->offset
+ * doesn't move again because:
+ * - [1, 2, 3] => delete offset 1 => [1, 3]: next element still offset 1
+ * - [1, 2, 3] => delete offset 0 => [2, 3]: next element still offset 0
+ * if we deleted the last element at offet N and now
+ * length of this ziplist is N-1, the next call into
+ * quicklistNext() will jump to the next node. */
+}
+
+/* Replace quicklist entry at offset 'index' by 'data' with length 'sz'.
+ *
+ * Returns 1 if replace happened.
+ * Returns 0 if replace failed and no changes happened. */
+int quicklistReplaceAtIndex(quicklist *quicklist, long index, void *data,
+ int sz) {
+ quicklistEntry entry;
+ if (likely(quicklistIndex(quicklist, index, &entry))) {
+ /* quicklistIndex provides an uncompressed node */
+ entry.node->zl = ziplistDelete(entry.node->zl, &entry.zi);
+ entry.node->zl = ziplistInsert(entry.node->zl, entry.zi, data, sz);
+ quicklistCompress(quicklist, entry.node);
+ return 1;
+ } else {
+ return 0;
+ }
+}
+
+/* Given two nodes, try to merge their ziplists.
+ *
+ * This helps us not have a quicklist with 3 element ziplists if
+ * our fill factor can handle much higher levels.
+ *
+ * Note: 'a' must be to the LEFT of 'b'.
+ *
+ * After calling this function, both 'a' and 'b' should be considered
+ * unusable. The return value from this function must be used
+ * instead of re-using any of the quicklistNode input arguments.
+ *
+ * Returns the input node picked to merge against or NULL if
+ * merging was not possible. */
+REDIS_STATIC quicklistNode *_quicklistZiplistMerge(quicklist *quicklist,
+ quicklistNode *a,
+ quicklistNode *b) {
+ D("Requested merge (a,b) (%u, %u)", a->count, b->count);
+
+ quicklistDecompressNode(a);
+ quicklistDecompressNode(b);
+ if ((ziplistMerge(&a->zl, &b->zl))) {
+ /* We merged ziplists! Now remove the unused quicklistNode. */
+ quicklistNode *keep = NULL, *nokeep = NULL;
+ if (!a->zl) {
+ nokeep = a;
+ keep = b;
+ } else if (!b->zl) {
+ nokeep = b;
+ keep = a;
+ }
+ keep->count = ziplistLen(keep->zl);
+ quicklistNodeUpdateSz(keep);
+
+ nokeep->count = 0;
+ __quicklistDelNode(quicklist, nokeep);
+ quicklistCompress(quicklist, keep);
+ return keep;
+ } else {
+ /* else, the merge returned NULL and nothing changed. */
+ return NULL;
+ }
+}
+
+/* Attempt to merge ziplists within two nodes on either side of 'center'.
+ *
+ * We attempt to merge:
+ * - (center->prev->prev, center->prev)
+ * - (center->next, center->next->next)
+ * - (center->prev, center)
+ * - (center, center->next)
+ */
+REDIS_STATIC void _quicklistMergeNodes(quicklist *quicklist,
+ quicklistNode *center) {
+ int fill = quicklist->fill;
+ quicklistNode *prev, *prev_prev, *next, *next_next, *target;
+ prev = prev_prev = next = next_next = target = NULL;
+
+ if (center->prev) {
+ prev = center->prev;
+ if (center->prev->prev)
+ prev_prev = center->prev->prev;
+ }
+
+ if (center->next) {
+ next = center->next;
+ if (center->next->next)
+ next_next = center->next->next;
+ }
+
+ /* Try to merge prev_prev and prev */
+ if (_quicklistNodeAllowMerge(prev, prev_prev, fill)) {
+ _quicklistZiplistMerge(quicklist, prev_prev, prev);
+ prev_prev = prev = NULL; /* they could have moved, invalidate them. */
+ }
+
+ /* Try to merge next and next_next */
+ if (_quicklistNodeAllowMerge(next, next_next, fill)) {
+ _quicklistZiplistMerge(quicklist, next, next_next);
+ next = next_next = NULL; /* they could have moved, invalidate them. */
+ }
+
+ /* Try to merge center node and previous node */
+ if (_quicklistNodeAllowMerge(center, center->prev, fill)) {
+ target = _quicklistZiplistMerge(quicklist, center->prev, center);
+ center = NULL; /* center could have been deleted, invalidate it. */
+ } else {
+ /* else, we didn't merge here, but target needs to be valid below. */
+ target = center;
+ }
+
+ /* Use result of center merge (or original) to merge with next node. */
+ if (_quicklistNodeAllowMerge(target, target->next, fill)) {
+ _quicklistZiplistMerge(quicklist, target, target->next);
+ }
+}
+
+/* Split 'node' into two parts, parameterized by 'offset' and 'after'.
+ *
+ * The 'after' argument controls which quicklistNode gets returned.
+ * If 'after'==1, returned node has elements after 'offset'.
+ * input node keeps elements up to 'offset', including 'offset'.
+ * If 'after'==0, returned node has elements up to 'offset', including 'offset'.
+ * input node keeps elements after 'offset'.
+ *
+ * If 'after'==1, returned node will have elements _after_ 'offset'.
+ * The returned node will have elements [OFFSET+1, END].
+ * The input node keeps elements [0, OFFSET].
+ *
+ * If 'after'==0, returned node will keep elements up to and including 'offset'.
+ * The returned node will have elements [0, OFFSET].
+ * The input node keeps elements [OFFSET+1, END].
+ *
+ * The input node keeps all elements not taken by the returned node.
+ *
+ * Returns newly created node or NULL if split not possible. */
+REDIS_STATIC quicklistNode *_quicklistSplitNode(quicklistNode *node, int offset,
+ int after) {
+ size_t zl_sz = node->sz;
+
+ quicklistNode *new_node = quicklistCreateNode();
+ new_node->zl = zmalloc(zl_sz);
+
+ /* Copy original ziplist so we can split it */
+ memcpy(new_node->zl, node->zl, zl_sz);
+
+ /* -1 here means "continue deleting until the list ends" */
+ int orig_start = after ? offset + 1 : 0;
+ int orig_extent = after ? -1 : offset;
+ int new_start = after ? 0 : offset;
+ int new_extent = after ? offset + 1 : -1;
+
+ D("After %d (%d); ranges: [%d, %d], [%d, %d]", after, offset, orig_start,
+ orig_extent, new_start, new_extent);
+
+ node->zl = ziplistDeleteRange(node->zl, orig_start, orig_extent);
+ node->count = ziplistLen(node->zl);
+ quicklistNodeUpdateSz(node);
+
+ new_node->zl = ziplistDeleteRange(new_node->zl, new_start, new_extent);
+ new_node->count = ziplistLen(new_node->zl);
+ quicklistNodeUpdateSz(new_node);
+
+ D("After split lengths: orig (%d), new (%d)", node->count, new_node->count);
+ return new_node;
+}
+
+/* Insert a new entry before or after existing entry 'entry'.
+ *
+ * If after==1, the new value is inserted after 'entry', otherwise
+ * the new value is inserted before 'entry'. */
+REDIS_STATIC void _quicklistInsert(quicklist *quicklist, quicklistEntry *entry,
+ void *value, const size_t sz, int after) {
+ int full = 0, at_tail = 0, at_head = 0, full_next = 0, full_prev = 0;
+ int fill = quicklist->fill;
+ quicklistNode *node = entry->node;
+ quicklistNode *new_node = NULL;
+
+ if (!node) {
+ /* we have no reference node, so let's create only node in the list */
+ D("No node given!");
+ new_node = quicklistCreateNode();
+ new_node->zl = ziplistPush(ziplistNew(), value, sz, ZIPLIST_HEAD);
+ __quicklistInsertNode(quicklist, NULL, new_node, after);
+ new_node->count++;
+ quicklist->count++;
+ return;
+ }
+
+ /* Populate accounting flags for easier boolean checks later */
+ if (!_quicklistNodeAllowInsert(node, fill, sz)) {
+ D("Current node is full with count %d with requested fill %lu",
+ node->count, fill);
+ full = 1;
+ }
+
+ if (after && (entry->offset == node->count)) {
+ D("At Tail of current ziplist");
+ at_tail = 1;
+ if (!_quicklistNodeAllowInsert(node->next, fill, sz)) {
+ D("Next node is full too.");
+ full_next = 1;
+ }
+ }
+
+ if (!after && (entry->offset == 0)) {
+ D("At Head");
+ at_head = 1;
+ if (!_quicklistNodeAllowInsert(node->prev, fill, sz)) {
+ D("Prev node is full too.");
+ full_prev = 1;
+ }
+ }
+
+ /* Now determine where and how to insert the new element */
+ if (!full && after) {
+ D("Not full, inserting after current position.");
+ quicklistDecompressNodeForUse(node);
+ unsigned char *next = ziplistNext(node->zl, entry->zi);
+ if (next == NULL) {
+ node->zl = ziplistPush(node->zl, value, sz, ZIPLIST_TAIL);
+ } else {
+ node->zl = ziplistInsert(node->zl, next, value, sz);
+ }
+ node->count++;
+ quicklistNodeUpdateSz(node);
+ quicklistRecompressOnly(quicklist, node);
+ } else if (!full && !after) {
+ D("Not full, inserting before current position.");
+ quicklistDecompressNodeForUse(node);
+ node->zl = ziplistInsert(node->zl, entry->zi, value, sz);
+ node->count++;
+ quicklistNodeUpdateSz(node);
+ quicklistRecompressOnly(quicklist, node);
+ } else if (full && at_tail && node->next && !full_next && after) {
+ /* If we are: at tail, next has free space, and inserting after:
+ * - insert entry at head of next node. */
+ D("Full and tail, but next isn't full; inserting next node head");
+ new_node = node->next;
+ quicklistDecompressNodeForUse(new_node);
+ new_node->zl = ziplistPush(new_node->zl, value, sz, ZIPLIST_HEAD);
+ new_node->count++;
+ quicklistNodeUpdateSz(new_node);
+ quicklistRecompressOnly(quicklist, new_node);
+ } else if (full && at_head && node->prev && !full_prev && !after) {
+ /* If we are: at head, previous has free space, and inserting before:
+ * - insert entry at tail of previous node. */
+ D("Full and head, but prev isn't full, inserting prev node tail");
+ new_node = node->prev;
+ quicklistDecompressNodeForUse(new_node);
+ new_node->zl = ziplistPush(new_node->zl, value, sz, ZIPLIST_TAIL);
+ new_node->count++;
+ quicklistNodeUpdateSz(new_node);
+ quicklistRecompressOnly(quicklist, new_node);
+ } else if (full && ((at_tail && node->next && full_next && after) ||
+ (at_head && node->prev && full_prev && !after))) {
+ /* If we are: full, and our prev/next is full, then:
+ * - create new node and attach to quicklist */
+ D("\tprovisioning new node...");
+ new_node = quicklistCreateNode();
+ new_node->zl = ziplistPush(ziplistNew(), value, sz, ZIPLIST_HEAD);
+ new_node->count++;
+ quicklistNodeUpdateSz(new_node);
+ __quicklistInsertNode(quicklist, node, new_node, after);
+ } else if (full) {
+ /* else, node is full we need to split it. */
+ /* covers both after and !after cases */
+ D("\tsplitting node...");
+ quicklistDecompressNodeForUse(node);
+ new_node = _quicklistSplitNode(node, entry->offset, after);
+ new_node->zl = ziplistPush(new_node->zl, value, sz,
+ after ? ZIPLIST_HEAD : ZIPLIST_TAIL);
+ new_node->count++;
+ quicklistNodeUpdateSz(new_node);
+ __quicklistInsertNode(quicklist, node, new_node, after);
+ _quicklistMergeNodes(quicklist, node);
+ }
+
+ quicklist->count++;
+}
+
+void quicklistInsertBefore(quicklist *quicklist, quicklistEntry *entry,
+ void *value, const size_t sz) {
+ _quicklistInsert(quicklist, entry, value, sz, 0);
+}
+
+void quicklistInsertAfter(quicklist *quicklist, quicklistEntry *entry,
+ void *value, const size_t sz) {
+ _quicklistInsert(quicklist, entry, value, sz, 1);
+}
+
+/* Delete a range of elements from the quicklist.
+ *
+ * elements may span across multiple quicklistNodes, so we
+ * have to be careful about tracking where we start and end.
+ *
+ * Returns 1 if entries were deleted, 0 if nothing was deleted. */
+int quicklistDelRange(quicklist *quicklist, const long start,
+ const long count) {
+ if (count <= 0)
+ return 0;
+
+ unsigned long extent = count; /* range is inclusive of start position */
+
+ if (start >= 0 && extent > (quicklist->count - start)) {
+ /* if requesting delete more elements than exist, limit to list size. */
+ extent = quicklist->count - start;
+ } else if (start < 0 && extent > (unsigned long)(-start)) {
+ /* else, if at negative offset, limit max size to rest of list. */
+ extent = -start; /* c.f. LREM -29 29; just delete until end. */
+ }
+
+ quicklistEntry entry;
+ if (!quicklistIndex(quicklist, start, &entry))
+ return 0;
+
+ D("Quicklist delete request for start %ld, count %ld, extent: %ld", start,
+ count, extent);
+ quicklistNode *node = entry.node;
+
+ /* iterate over next nodes until everything is deleted. */
+ while (extent) {
+ quicklistNode *next = node->next;
+
+ unsigned long del;
+ int delete_entire_node = 0;
+ if (entry.offset == 0 && extent >= node->count) {
+ /* If we are deleting more than the count of this node, we
+ * can just delete the entire node without ziplist math. */
+ delete_entire_node = 1;
+ del = node->count;
+ } else if (entry.offset >= 0 && extent >= node->count) {
+ /* If deleting more nodes after this one, calculate delete based
+ * on size of current node. */
+ del = node->count - entry.offset;
+ } else if (entry.offset < 0) {
+ /* If offset is negative, we are in the first run of this loop
+ * and we are deleting the entire range
+ * from this start offset to end of list. Since the Negative
+ * offset is the number of elements until the tail of the list,
+ * just use it directly as the deletion count. */
+ del = -entry.offset;
+
+ /* If the positive offset is greater than the remaining extent,
+ * we only delete the remaining extent, not the entire offset.
+ */
+ if (del > extent)
+ del = extent;
+ } else {
+ /* else, we are deleting less than the extent of this node, so
+ * use extent directly. */
+ del = extent;
+ }
+
+ D("[%ld]: asking to del: %ld because offset: %d; (ENTIRE NODE: %d), "
+ "node count: %u",
+ extent, del, entry.offset, delete_entire_node, node->count);
+
+ if (delete_entire_node) {
+ __quicklistDelNode(quicklist, node);
+ } else {
+ quicklistDecompressNodeForUse(node);
+ node->zl = ziplistDeleteRange(node->zl, entry.offset, del);
+ quicklistNodeUpdateSz(node);
+ node->count -= del;
+ quicklist->count -= del;
+ quicklistDeleteIfEmpty(quicklist, node);
+ if (node)
+ quicklistRecompressOnly(quicklist, node);
+ }
+
+ extent -= del;
+
+ node = next;
+
+ entry.offset = 0;
+ }
+ return 1;
+}
+
+/* Passthrough to ziplistCompare() */
+int quicklistCompare(unsigned char *p1, unsigned char *p2, int p2_len) {
+ return ziplistCompare(p1, p2, p2_len);
+}
+
+/* Returns a quicklist iterator 'iter'. After the initialization every
+ * call to quicklistNext() will return the next element of the quicklist. */
+quicklistIter *quicklistGetIterator(const quicklist *quicklist, int direction) {
+ quicklistIter *iter;
+
+ iter = zmalloc(sizeof(*iter));
+
+ if (direction == AL_START_HEAD) {
+ iter->current = quicklist->head;
+ iter->offset = 0;
+ } else if (direction == AL_START_TAIL) {
+ iter->current = quicklist->tail;
+ iter->offset = -1;
+ }
+
+ iter->direction = direction;
+ iter->quicklist = quicklist;
+
+ iter->zi = NULL;
+
+ return iter;
+}
+
+/* Initialize an iterator at a specific offset 'idx' and make the iterator
+ * return nodes in 'direction' direction. */
+quicklistIter *quicklistGetIteratorAtIdx(const quicklist *quicklist,
+ const int direction,
+ const long long idx) {
+ quicklistEntry entry;
+
+ if (quicklistIndex(quicklist, idx, &entry)) {
+ quicklistIter *base = quicklistGetIterator(quicklist, direction);
+ base->zi = NULL;
+ base->current = entry.node;
+ base->offset = entry.offset;
+ return base;
+ } else {
+ return NULL;
+ }
+}
+
+/* Release iterator.
+ * If we still have a valid current node, then re-encode current node. */
+void quicklistReleaseIterator(quicklistIter *iter) {
+ if (iter->current)
+ quicklistCompress(iter->quicklist, iter->current);
+
+ zfree(iter);
+}
+
+/* Get next element in iterator.
+ *
+ * Note: You must NOT insert into the list while iterating over it.
+ * You *may* delete from the list while iterating using the
+ * quicklistDelEntry() function.
+ * If you insert into the quicklist while iterating, you should
+ * re-create the iterator after your addition.
+ *
+ * iter = quicklistGetIterator(quicklist,<direction>);
+ * quicklistEntry entry;
+ * while (quicklistNext(iter, &entry)) {
+ * if (entry.value)
+ * [[ use entry.value with entry.sz ]]
+ * else
+ * [[ use entry.longval ]]
+ * }
+ *
+ * Populates 'entry' with values for this iteration.
+ * Returns 0 when iteration is complete or if iteration not possible.
+ * If return value is 0, the contents of 'entry' are not valid.
+ */
+int quicklistNext(quicklistIter *iter, quicklistEntry *entry) {
+ initEntry(entry);
+
+ if (!iter) {
+ D("Returning because no iter!");
+ return 0;
+ }
+
+ entry->quicklist = iter->quicklist;
+ entry->node = iter->current;
+
+ if (!iter->current) {
+ D("Returning because current node is NULL")
+ return 0;
+ }
+
+ unsigned char *(*nextFn)(unsigned char *, unsigned char *) = NULL;
+ int offset_update = 0;
+
+ if (!iter->zi) {
+ /* If !zi, use current index. */
+ quicklistDecompressNodeForUse(iter->current);
+ iter->zi = ziplistIndex(iter->current->zl, iter->offset);
+ } else {
+ /* else, use existing iterator offset and get prev/next as necessary. */
+ if (iter->direction == AL_START_HEAD) {
+ nextFn = ziplistNext;
+ offset_update = 1;
+ } else if (iter->direction == AL_START_TAIL) {
+ nextFn = ziplistPrev;
+ offset_update = -1;
+ }
+ iter->zi = nextFn(iter->current->zl, iter->zi);
+ iter->offset += offset_update;
+ }
+
+ entry->zi = iter->zi;
+ entry->offset = iter->offset;
+
+ if (iter->zi) {
+ /* Populate value from existing ziplist position */
+ ziplistGet(entry->zi, &entry->value, &entry->sz, &entry->longval);
+ return 1;
+ } else {
+ /* We ran out of ziplist entries.
+ * Pick next node, update offset, then re-run retrieval. */
+ quicklistCompress(iter->quicklist, iter->current);
+ if (iter->direction == AL_START_HEAD) {
+ /* Forward traversal */
+ D("Jumping to start of next node");
+ iter->current = iter->current->next;
+ iter->offset = 0;
+ } else if (iter->direction == AL_START_TAIL) {
+ /* Reverse traversal */
+ D("Jumping to end of previous node");
+ iter->current = iter->current->prev;
+ iter->offset = -1;
+ }
+ iter->zi = NULL;
+ return quicklistNext(iter, entry);
+ }
+}
+
+/* Duplicate the quicklist.
+ * On success a copy of the original quicklist is returned.
+ *
+ * The original quicklist both on success or error is never modified.
+ *
+ * Returns newly allocated quicklist. */
+quicklist *quicklistDup(quicklist *orig) {
+ quicklist *copy;
+
+ copy = quicklistNew(orig->fill, orig->compress);
+
+ for (quicklistNode *current = orig->head; current;
+ current = current->next) {
+ quicklistNode *node = quicklistCreateNode();
+
+ if (node->encoding == QUICKLIST_NODE_ENCODING_LZF) {
+ quicklistLZF *lzf = (quicklistLZF *)node->zl;
+ size_t lzf_sz = sizeof(*lzf) + lzf->sz;
+ node->zl = zmalloc(lzf_sz);
+ memcpy(node->zl, current->zl, lzf_sz);
+ } else if (node->encoding == QUICKLIST_NODE_ENCODING_RAW) {
+ node->zl = zmalloc(current->sz);
+ memcpy(node->zl, current->zl, current->sz);
+ }
+
+ node->count = current->count;
+ copy->count += node->count;
+ node->sz = current->sz;
+ node->encoding = current->encoding;
+
+ _quicklistInsertNodeAfter(copy, copy->tail, node);
+ }
+
+ /* copy->count must equal orig->count here */
+ return copy;
+}
+
+/* Populate 'entry' with the element at the specified zero-based index
+ * where 0 is the head, 1 is the element next to head
+ * and so on. Negative integers are used in order to count
+ * from the tail, -1 is the last element, -2 the penultimate
+ * and so on. If the index is out of range 0 is returned.
+ *
+ * Returns 1 if element found
+ * Returns 0 if element not found */
+int quicklistIndex(const quicklist *quicklist, const long long idx,
+ quicklistEntry *entry) {
+ quicklistNode *n;
+ unsigned long long accum = 0;
+ unsigned long long index;
+ int forward = idx < 0 ? 0 : 1; /* < 0 -> reverse, 0+ -> forward */
+
+ initEntry(entry);
+ entry->quicklist = quicklist;
+
+ if (!forward) {
+ index = (-idx) - 1;
+ n = quicklist->tail;
+ } else {
+ index = idx;
+ n = quicklist->head;
+ }
+
+ if (index >= quicklist->count)
+ return 0;
+
+ while (likely(n)) {
+ if ((accum + n->count) > index) {
+ break;
+ } else {
+ D("Skipping over (%p) %u at accum %lld", (void *)n, n->count,
+ accum);
+ accum += n->count;
+ n = forward ? n->next : n->prev;
+ }
+ }
+
+ if (!n)
+ return 0;
+
+ D("Found node: %p at accum %llu, idx %llu, sub+ %llu, sub- %llu", (void *)n,
+ accum, index, index - accum, (-index) - 1 + accum);
+
+ entry->node = n;
+ if (forward) {
+ /* forward = normal head-to-tail offset. */
+ entry->offset = index - accum;
+ } else {
+ /* reverse = need negative offset for tail-to-head, so undo
+ * the result of the original if (index < 0) above. */
+ entry->offset = (-index) - 1 + accum;
+ }
+
+ quicklistDecompressNodeForUse(entry->node);
+ entry->zi = ziplistIndex(entry->node->zl, entry->offset);
+ ziplistGet(entry->zi, &entry->value, &entry->sz, &entry->longval);
+ /* The caller will use our result, so we don't re-compress here.
+ * The caller can recompress or delete the node as needed. */
+ return 1;
+}
+
+/* Rotate quicklist by moving the tail element to the head. */
+void quicklistRotate(quicklist *quicklist) {
+ if (quicklist->count <= 1)
+ return;
+
+ /* First, get the tail entry */
+ unsigned char *p = ziplistIndex(quicklist->tail->zl, -1);
+ unsigned char *value;
+ long long longval;
+ unsigned int sz;
+ char longstr[32] = {0};
+ ziplistGet(p, &value, &sz, &longval);
+
+ /* If value found is NULL, then ziplistGet populated longval instead */
+ if (!value) {
+ /* Write the longval as a string so we can re-add it */
+ sz = ll2string(longstr, sizeof(longstr), longval);
+ value = (unsigned char *)longstr;
+ }
+
+ /* Add tail entry to head (must happen before tail is deleted). */
+ quicklistPushHead(quicklist, value, sz);
+
+ /* If quicklist has only one node, the head ziplist is also the
+ * tail ziplist and PushHead() could have reallocated our single ziplist,
+ * which would make our pre-existing 'p' unusable. */
+ if (quicklist->len == 1) {
+ p = ziplistIndex(quicklist->tail->zl, -1);
+ }
+
+ /* Remove tail entry. */
+ quicklistDelIndex(quicklist, quicklist->tail, &p);
+}
+
+/* pop from quicklist and return result in 'data' ptr. Value of 'data'
+ * is the return value of 'saver' function pointer if the data is NOT a number.
+ *
+ * If the quicklist element is a long long, then the return value is returned in
+ * 'sval'.
+ *
+ * Return value of 0 means no elements available.
+ * Return value of 1 means check 'data' and 'sval' for values.
+ * If 'data' is set, use 'data' and 'sz'. Otherwise, use 'sval'. */
+int quicklistPopCustom(quicklist *quicklist, int where, unsigned char **data,
+ unsigned int *sz, long long *sval,
+ void *(*saver)(unsigned char *data, unsigned int sz)) {
+ unsigned char *p;
+ unsigned char *vstr;
+ unsigned int vlen;
+ long long vlong;
+ int pos = (where == QUICKLIST_HEAD) ? 0 : -1;
+
+ if (quicklist->count == 0)
+ return 0;
+
+ if (data)
+ *data = NULL;
+ if (sz)
+ *sz = 0;
+ if (sval)
+ *sval = -123456789;
+
+ quicklistNode *node;
+ if (where == QUICKLIST_HEAD && quicklist->head) {
+ node = quicklist->head;
+ } else if (where == QUICKLIST_TAIL && quicklist->tail) {
+ node = quicklist->tail;
+ } else {
+ return 0;
+ }
+
+ p = ziplistIndex(node->zl, pos);
+ if (ziplistGet(p, &vstr, &vlen, &vlong)) {
+ if (vstr) {
+ if (data)
+ *data = saver(vstr, vlen);
+ if (sz)
+ *sz = vlen;
+ } else {
+ if (data)
+ *data = NULL;
+ if (sval)
+ *sval = vlong;
+ }
+ quicklistDelIndex(quicklist, node, &p);
+ return 1;
+ }
+ return 0;
+}
+
+/* Return a malloc'd copy of data passed in */
+REDIS_STATIC void *_quicklistSaver(unsigned char *data, unsigned int sz) {
+ unsigned char *vstr;
+ if (data) {
+ vstr = zmalloc(sz);
+ memcpy(vstr, data, sz);
+ return vstr;
+ }
+ return NULL;
+}
+
+/* Default pop function
+ *
+ * Returns malloc'd value from quicklist */
+int quicklistPop(quicklist *quicklist, int where, unsigned char **data,
+ unsigned int *sz, long long *slong) {
+ unsigned char *vstr;
+ unsigned int vlen;
+ long long vlong;
+ if (quicklist->count == 0)
+ return 0;
+ int ret = quicklistPopCustom(quicklist, where, &vstr, &vlen, &vlong,
+ _quicklistSaver);
+ if (data)
+ *data = vstr;
+ if (slong)
+ *slong = vlong;
+ if (sz)
+ *sz = vlen;
+ return ret;
+}
+
+/* Wrapper to allow argument-based switching between HEAD/TAIL pop */
+void quicklistPush(quicklist *quicklist, void *value, const size_t sz,
+ int where) {
+ if (where == QUICKLIST_HEAD) {
+ quicklistPushHead(quicklist, value, sz);
+ } else if (where == QUICKLIST_TAIL) {
+ quicklistPushTail(quicklist, value, sz);
+ }
+}
+
+/* The rest of this file is test cases and test helpers. */
+#ifdef REDIS_TEST
+#include <stdint.h>
+#include <sys/time.h>
+
+#define assert(_e) \
+ do { \
+ if (!(_e)) { \
+ printf("\n\n=== ASSERTION FAILED ===\n"); \
+ printf("==> %s:%d '%s' is not true\n", __FILE__, __LINE__, #_e); \
+ err++; \
+ } \
+ } while (0)
+
+#define yell(str, ...) printf("ERROR! " str "\n\n", __VA_ARGS__)
+
+#define OK printf("\tOK\n")
+
+#define ERROR \
+ do { \
+ printf("\tERROR!\n"); \
+ err++; \
+ } while (0)
+
+#define ERR(x, ...) \
+ do { \
+ printf("%s:%s:%d:\t", __FILE__, __FUNCTION__, __LINE__); \
+ printf("ERROR! " x "\n", __VA_ARGS__); \
+ err++; \
+ } while (0)
+
+#define TEST(name) printf("test — %s\n", name);
+#define TEST_DESC(name, ...) printf("test — " name "\n", __VA_ARGS__);
+
+#define QL_TEST_VERBOSE 0
+
+#define UNUSED(x) (void)(x)
+static void ql_info(quicklist *ql) {
+#if QL_TEST_VERBOSE
+ printf("Container length: %lu\n", ql->len);
+ printf("Container size: %lu\n", ql->count);
+ if (ql->head)
+ printf("\t(zsize head: %d)\n", ziplistLen(ql->head->zl));
+ if (ql->tail)
+ printf("\t(zsize tail: %d)\n", ziplistLen(ql->tail->zl));
+ printf("\n");
+#else
+ UNUSED(ql);
+#endif
+}
+
+/* Return the UNIX time in microseconds */
+static long long ustime(void) {
+ struct timeval tv;
+ long long ust;
+
+ gettimeofday(&tv, NULL);
+ ust = ((long long)tv.tv_sec) * 1000000;
+ ust += tv.tv_usec;
+ return ust;
+}
+
+/* Return the UNIX time in milliseconds */
+static long long mstime(void) { return ustime() / 1000; }
+
+/* Iterate over an entire quicklist.
+ * Print the list if 'print' == 1.
+ *
+ * Returns physical count of elements found by iterating over the list. */
+static int _itrprintr(quicklist *ql, int print, int forward) {
+ quicklistIter *iter =
+ quicklistGetIterator(ql, forward ? AL_START_HEAD : AL_START_TAIL);
+ quicklistEntry entry;
+ int i = 0;
+ int p = 0;
+ quicklistNode *prev = NULL;
+ while (quicklistNext(iter, &entry)) {
+ if (entry.node != prev) {
+ /* Count the number of list nodes too */
+ p++;
+ prev = entry.node;
+ }
+ if (print) {
+ printf("[%3d (%2d)]: [%.*s] (%lld)\n", i, p, entry.sz,
+ (char *)entry.value, entry.longval);
+ }
+ i++;
+ }
+ quicklistReleaseIterator(iter);
+ return i;
+}
+static int itrprintr(quicklist *ql, int print) {
+ return _itrprintr(ql, print, 1);
+}
+
+static int itrprintr_rev(quicklist *ql, int print) {
+ return _itrprintr(ql, print, 0);
+}
+
+#define ql_verify(a, b, c, d, e) \
+ do { \
+ err += _ql_verify((a), (b), (c), (d), (e)); \
+ } while (0)
+
+/* Verify list metadata matches physical list contents. */
+static int _ql_verify(quicklist *ql, uint32_t len, uint32_t count,
+ uint32_t head_count, uint32_t tail_count) {
+ int errors = 0;
+
+ ql_info(ql);
+ if (len != ql->len) {
+ yell("quicklist length wrong: expected %d, got %u", len, ql->len);
+ errors++;
+ }
+
+ if (count != ql->count) {
+ yell("quicklist count wrong: expected %d, got %lu", count, ql->count);
+ errors++;
+ }
+
+ int loopr = itrprintr(ql, 0);
+ if (loopr != (int)ql->count) {
+ yell("quicklist cached count not match actual count: expected %lu, got "
+ "%d",
+ ql->count, loopr);
+ errors++;
+ }
+
+ int rloopr = itrprintr_rev(ql, 0);
+ if (loopr != rloopr) {
+ yell("quicklist has different forward count than reverse count! "
+ "Forward count is %d, reverse count is %d.",
+ loopr, rloopr);
+ errors++;
+ }
+
+ if (ql->len == 0 && !errors) {
+ OK;
+ return errors;
+ }
+
+ if (ql->head && head_count != ql->head->count &&
+ head_count != ziplistLen(ql->head->zl)) {
+ yell("quicklist head count wrong: expected %d, "
+ "got cached %d vs. actual %d",
+ head_count, ql->head->count, ziplistLen(ql->head->zl));
+ errors++;
+ }
+
+ if (ql->tail && tail_count != ql->tail->count &&
+ tail_count != ziplistLen(ql->tail->zl)) {
+ yell("quicklist tail count wrong: expected %d, "
+ "got cached %u vs. actual %d",
+ tail_count, ql->tail->count, ziplistLen(ql->tail->zl));
+ errors++;
+ }
+
+ if (quicklistAllowsCompression(ql)) {
+ quicklistNode *node = ql->head;
+ unsigned int low_raw = ql->compress;
+ unsigned int high_raw = ql->len - ql->compress;
+
+ for (unsigned int at = 0; at < ql->len; at++, node = node->next) {
+ if (node && (at < low_raw || at >= high_raw)) {
+ if (node->encoding != QUICKLIST_NODE_ENCODING_RAW) {
+ yell("Incorrect compression: node %d is "
+ "compressed at depth %d ((%u, %u); total "
+ "nodes: %u; size: %u; recompress: %d)",
+ at, ql->compress, low_raw, high_raw, ql->len, node->sz,
+ node->recompress);
+ errors++;
+ }
+ } else {
+ if (node->encoding != QUICKLIST_NODE_ENCODING_LZF &&
+ !node->attempted_compress) {
+ yell("Incorrect non-compression: node %d is NOT "
+ "compressed at depth %d ((%u, %u); total "
+ "nodes: %u; size: %u; recompress: %d; attempted: %d)",
+ at, ql->compress, low_raw, high_raw, ql->len, node->sz,
+ node->recompress, node->attempted_compress);
+ errors++;
+ }
+ }
+ }
+ }
+
+ if (!errors)
+ OK;
+ return errors;
+}
+
+/* Generate new string concatenating integer i against string 'prefix' */
+static char *genstr(char *prefix, int i) {
+ static char result[64] = {0};
+ snprintf(result, sizeof(result), "%s%d", prefix, i);
+ return result;
+}
+
+/* main test, but callable from other files */
+int quicklistTest(int argc, char *argv[]) {
+ UNUSED(argc);
+ UNUSED(argv);
+
+ unsigned int err = 0;
+ int optimize_start =
+ -(int)(sizeof(optimization_level) / sizeof(*optimization_level));
+
+ printf("Starting optimization offset at: %d\n", optimize_start);
+
+ int options[] = {0, 1, 2, 3, 4, 5, 6, 10};
+ size_t option_count = sizeof(options) / sizeof(*options);
+ long long runtime[option_count];
+
+ for (int _i = 0; _i < (int)option_count; _i++) {
+ printf("Testing Option %d\n", options[_i]);
+ long long start = mstime();
+
+ TEST("create list") {
+ quicklist *ql = quicklistNew(-2, options[_i]);
+ ql_verify(ql, 0, 0, 0, 0);
+ quicklistRelease(ql);
+ }
+
+ TEST("add to tail of empty list") {
+ quicklist *ql = quicklistNew(-2, options[_i]);
+ quicklistPushTail(ql, "hello", 6);
+ /* 1 for head and 1 for tail beacuse 1 node = head = tail */
+ ql_verify(ql, 1, 1, 1, 1);
+ quicklistRelease(ql);
+ }
+
+ TEST("add to head of empty list") {
+ quicklist *ql = quicklistNew(-2, options[_i]);
+ quicklistPushHead(ql, "hello", 6);
+ /* 1 for head and 1 for tail beacuse 1 node = head = tail */
+ ql_verify(ql, 1, 1, 1, 1);
+ quicklistRelease(ql);
+ }
+
+ for (int f = optimize_start; f < 32; f++) {
+ TEST_DESC("add to tail 5x at fill %d at compress %d", f,
+ options[_i]) {
+ quicklist *ql = quicklistNew(f, options[_i]);
+ for (int i = 0; i < 5; i++)
+ quicklistPushTail(ql, genstr("hello", i), 32);
+ if (ql->count != 5)
+ ERROR;
+ if (f == 32)
+ ql_verify(ql, 1, 5, 5, 5);
+ quicklistRelease(ql);
+ }
+ }
+
+ for (int f = optimize_start; f < 32; f++) {
+ TEST_DESC("add to head 5x at fill %d at compress %d", f,
+ options[_i]) {
+ quicklist *ql = quicklistNew(f, options[_i]);
+ for (int i = 0; i < 5; i++)
+ quicklistPushHead(ql, genstr("hello", i), 32);
+ if (ql->count != 5)
+ ERROR;
+ if (f == 32)
+ ql_verify(ql, 1, 5, 5, 5);
+ quicklistRelease(ql);
+ }
+ }
+
+ for (int f = optimize_start; f < 512; f++) {
+ TEST_DESC("add to tail 500x at fill %d at compress %d", f,
+ options[_i]) {
+ quicklist *ql = quicklistNew(f, options[_i]);
+ for (int i = 0; i < 500; i++)
+ quicklistPushTail(ql, genstr("hello", i), 64);
+ if (ql->count != 500)
+ ERROR;
+ if (f == 32)
+ ql_verify(ql, 16, 500, 32, 20);
+ quicklistRelease(ql);
+ }
+ }
+
+ for (int f = optimize_start; f < 512; f++) {
+ TEST_DESC("add to head 500x at fill %d at compress %d", f,
+ options[_i]) {
+ quicklist *ql = quicklistNew(f, options[_i]);
+ for (int i = 0; i < 500; i++)
+ quicklistPushHead(ql, genstr("hello", i), 32);
+ if (ql->count != 500)
+ ERROR;
+ if (f == 32)
+ ql_verify(ql, 16, 500, 20, 32);
+ quicklistRelease(ql);
+ }
+ }
+
+ TEST("rotate empty") {
+ quicklist *ql = quicklistNew(-2, options[_i]);
+ quicklistRotate(ql);
+ ql_verify(ql, 0, 0, 0, 0);
+ quicklistRelease(ql);
+ }
+
+ for (int f = optimize_start; f < 32; f++) {
+ TEST("rotate one val once") {
+ quicklist *ql = quicklistNew(f, options[_i]);
+ quicklistPushHead(ql, "hello", 6);
+ quicklistRotate(ql);
+ /* Ignore compression verify because ziplist is
+ * too small to compress. */
+ ql_verify(ql, 1, 1, 1, 1);
+ quicklistRelease(ql);
+ }
+ }
+
+ for (int f = optimize_start; f < 3; f++) {
+ TEST_DESC("rotate 500 val 5000 times at fill %d at compress %d", f,
+ options[_i]) {
+ quicklist *ql = quicklistNew(f, options[_i]);
+ quicklistPushHead(ql, "900", 3);
+ quicklistPushHead(ql, "7000", 4);
+ quicklistPushHead(ql, "-1200", 5);
+ quicklistPushHead(ql, "42", 2);
+ for (int i = 0; i < 500; i++)
+ quicklistPushHead(ql, genstr("hello", i), 64);
+ ql_info(ql);
+ for (int i = 0; i < 5000; i++) {
+ ql_info(ql);
+ quicklistRotate(ql);
+ }
+ if (f == 1)
+ ql_verify(ql, 504, 504, 1, 1);
+ else if (f == 2)
+ ql_verify(ql, 252, 504, 2, 2);
+ else if (f == 32)
+ ql_verify(ql, 16, 504, 32, 24);
+ quicklistRelease(ql);
+ }
+ }
+
+ TEST("pop empty") {
+ quicklist *ql = quicklistNew(-2, options[_i]);
+ quicklistPop(ql, QUICKLIST_HEAD, NULL, NULL, NULL);
+ ql_verify(ql, 0, 0, 0, 0);
+ quicklistRelease(ql);
+ }
+
+ TEST("pop 1 string from 1") {
+ quicklist *ql = quicklistNew(-2, options[_i]);
+ char *populate = genstr("hello", 331);
+ quicklistPushHead(ql, populate, 32);
+ unsigned char *data;
+ unsigned int sz;
+ long long lv;
+ ql_info(ql);
+ quicklistPop(ql, QUICKLIST_HEAD, &data, &sz, &lv);
+ assert(data != NULL);
+ assert(sz == 32);
+ if (strcmp(populate, (char *)data))
+ ERR("Pop'd value (%.*s) didn't equal original value (%s)", sz,
+ data, populate);
+ zfree(data);
+ ql_verify(ql, 0, 0, 0, 0);
+ quicklistRelease(ql);
+ }
+
+ TEST("pop head 1 number from 1") {
+ quicklist *ql = quicklistNew(-2, options[_i]);
+ quicklistPushHead(ql, "55513", 5);
+ unsigned char *data;
+ unsigned int sz;
+ long long lv;
+ ql_info(ql);
+ quicklistPop(ql, QUICKLIST_HEAD, &data, &sz, &lv);
+ assert(data == NULL);
+ assert(lv == 55513);
+ ql_verify(ql, 0, 0, 0, 0);
+ quicklistRelease(ql);
+ }
+
+ TEST("pop head 500 from 500") {
+ quicklist *ql = quicklistNew(-2, options[_i]);
+ for (int i = 0; i < 500; i++)
+ quicklistPushHead(ql, genstr("hello", i), 32);
+ ql_info(ql);
+ for (int i = 0; i < 500; i++) {
+ unsigned char *data;
+ unsigned int sz;
+ long long lv;
+ int ret = quicklistPop(ql, QUICKLIST_HEAD, &data, &sz, &lv);
+ assert(ret == 1);
+ assert(data != NULL);
+ assert(sz == 32);
+ if (strcmp(genstr("hello", 499 - i), (char *)data))
+ ERR("Pop'd value (%.*s) didn't equal original value (%s)",
+ sz, data, genstr("hello", 499 - i));
+ zfree(data);
+ }
+ ql_verify(ql, 0, 0, 0, 0);
+ quicklistRelease(ql);
+ }
+
+ TEST("pop head 5000 from 500") {
+ quicklist *ql = quicklistNew(-2, options[_i]);
+ for (int i = 0; i < 500; i++)
+ quicklistPushHead(ql, genstr("hello", i), 32);
+ for (int i = 0; i < 5000; i++) {
+ unsigned char *data;
+ unsigned int sz;
+ long long lv;
+ int ret = quicklistPop(ql, QUICKLIST_HEAD, &data, &sz, &lv);
+ if (i < 500) {
+ assert(ret == 1);
+ assert(data != NULL);
+ assert(sz == 32);
+ if (strcmp(genstr("hello", 499 - i), (char *)data))
+ ERR("Pop'd value (%.*s) didn't equal original value "
+ "(%s)",
+ sz, data, genstr("hello", 499 - i));
+ zfree(data);
+ } else {
+ assert(ret == 0);
+ }
+ }
+ ql_verify(ql, 0, 0, 0, 0);
+ quicklistRelease(ql);
+ }
+
+ TEST("iterate forward over 500 list") {
+ quicklist *ql = quicklistNew(-2, options[_i]);
+ quicklistSetFill(ql, 32);
+ for (int i = 0; i < 500; i++)
+ quicklistPushHead(ql, genstr("hello", i), 32);
+ quicklistIter *iter = quicklistGetIterator(ql, AL_START_HEAD);
+ quicklistEntry entry;
+ int i = 499, count = 0;
+ while (quicklistNext(iter, &entry)) {
+ char *h = genstr("hello", i);
+ if (strcmp((char *)entry.value, h))
+ ERR("value [%s] didn't match [%s] at position %d",
+ entry.value, h, i);
+ i--;
+ count++;
+ }
+ if (count != 500)
+ ERR("Didn't iterate over exactly 500 elements (%d)", i);
+ ql_verify(ql, 16, 500, 20, 32);
+ quicklistReleaseIterator(iter);
+ quicklistRelease(ql);
+ }
+
+ TEST("iterate reverse over 500 list") {
+ quicklist *ql = quicklistNew(-2, options[_i]);
+ quicklistSetFill(ql, 32);
+ for (int i = 0; i < 500; i++)
+ quicklistPushHead(ql, genstr("hello", i), 32);
+ quicklistIter *iter = quicklistGetIterator(ql, AL_START_TAIL);
+ quicklistEntry entry;
+ int i = 0;
+ while (quicklistNext(iter, &entry)) {
+ char *h = genstr("hello", i);
+ if (strcmp((char *)entry.value, h))
+ ERR("value [%s] didn't match [%s] at position %d",
+ entry.value, h, i);
+ i++;
+ }
+ if (i != 500)
+ ERR("Didn't iterate over exactly 500 elements (%d)", i);
+ ql_verify(ql, 16, 500, 20, 32);
+ quicklistReleaseIterator(iter);
+ quicklistRelease(ql);
+ }
+
+ TEST("insert before with 0 elements") {
+ quicklist *ql = quicklistNew(-2, options[_i]);
+ quicklistEntry entry;
+ quicklistIndex(ql, 0, &entry);
+ quicklistInsertBefore(ql, &entry, "abc", 4);
+ ql_verify(ql, 1, 1, 1, 1);
+ quicklistRelease(ql);
+ }
+
+ TEST("insert after with 0 elements") {
+ quicklist *ql = quicklistNew(-2, options[_i]);
+ quicklistEntry entry;
+ quicklistIndex(ql, 0, &entry);
+ quicklistInsertAfter(ql, &entry, "abc", 4);
+ ql_verify(ql, 1, 1, 1, 1);
+ quicklistRelease(ql);
+ }
+
+ TEST("insert after 1 element") {
+ quicklist *ql = quicklistNew(-2, options[_i]);
+ quicklistPushHead(ql, "hello", 6);
+ quicklistEntry entry;
+ quicklistIndex(ql, 0, &entry);
+ quicklistInsertAfter(ql, &entry, "abc", 4);
+ ql_verify(ql, 1, 2, 2, 2);
+ quicklistRelease(ql);
+ }
+
+ TEST("insert before 1 element") {
+ quicklist *ql = quicklistNew(-2, options[_i]);
+ quicklistPushHead(ql, "hello", 6);
+ quicklistEntry entry;
+ quicklistIndex(ql, 0, &entry);
+ quicklistInsertAfter(ql, &entry, "abc", 4);
+ ql_verify(ql, 1, 2, 2, 2);
+ quicklistRelease(ql);
+ }
+
+ for (int f = optimize_start; f < 12; f++) {
+ TEST_DESC("insert once in elements while iterating at fill %d at "
+ "compress %d\n",
+ f, options[_i]) {
+ quicklist *ql = quicklistNew(f, options[_i]);
+ quicklistPushTail(ql, "abc", 3);
+ quicklistSetFill(ql, 1);
+ quicklistPushTail(ql, "def", 3); /* force to unique node */
+ quicklistSetFill(ql, f);
+ quicklistPushTail(ql, "bob", 3); /* force to reset for +3 */
+ quicklistPushTail(ql, "foo", 3);
+ quicklistPushTail(ql, "zoo", 3);
+
+ itrprintr(ql, 0);
+ /* insert "bar" before "bob" while iterating over list. */
+ quicklistIter *iter = quicklistGetIterator(ql, AL_START_HEAD);
+ quicklistEntry entry;
+ while (quicklistNext(iter, &entry)) {
+ if (!strncmp((char *)entry.value, "bob", 3)) {
+ /* Insert as fill = 1 so it spills into new node. */
+ quicklistInsertBefore(ql, &entry, "bar", 3);
+ break; /* didn't we fix insert-while-iterating? */
+ }
+ }
+ itrprintr(ql, 0);
+
+ /* verify results */
+ quicklistIndex(ql, 0, &entry);
+ if (strncmp((char *)entry.value, "abc", 3))
+ ERR("Value 0 didn't match, instead got: %.*s", entry.sz,
+ entry.value);
+ quicklistIndex(ql, 1, &entry);
+ if (strncmp((char *)entry.value, "def", 3))
+ ERR("Value 1 didn't match, instead got: %.*s", entry.sz,
+ entry.value);
+ quicklistIndex(ql, 2, &entry);
+ if (strncmp((char *)entry.value, "bar", 3))
+ ERR("Value 2 didn't match, instead got: %.*s", entry.sz,
+ entry.value);
+ quicklistIndex(ql, 3, &entry);
+ if (strncmp((char *)entry.value, "bob", 3))
+ ERR("Value 3 didn't match, instead got: %.*s", entry.sz,
+ entry.value);
+ quicklistIndex(ql, 4, &entry);
+ if (strncmp((char *)entry.value, "foo", 3))
+ ERR("Value 4 didn't match, instead got: %.*s", entry.sz,
+ entry.value);
+ quicklistIndex(ql, 5, &entry);
+ if (strncmp((char *)entry.value, "zoo", 3))
+ ERR("Value 5 didn't match, instead got: %.*s", entry.sz,
+ entry.value);
+ quicklistReleaseIterator(iter);
+ quicklistRelease(ql);
+ }
+ }
+
+ for (int f = optimize_start; f < 1024; f++) {
+ TEST_DESC(
+ "insert [before] 250 new in middle of 500 elements at fill"
+ " %d at compress %d",
+ f, options[_i]) {
+ quicklist *ql = quicklistNew(f, options[_i]);
+ for (int i = 0; i < 500; i++)
+ quicklistPushTail(ql, genstr("hello", i), 32);
+ for (int i = 0; i < 250; i++) {
+ quicklistEntry entry;
+ quicklistIndex(ql, 250, &entry);
+ quicklistInsertBefore(ql, &entry, genstr("abc", i), 32);
+ }
+ if (f == 32)
+ ql_verify(ql, 25, 750, 32, 20);
+ quicklistRelease(ql);
+ }
+ }
+
+ for (int f = optimize_start; f < 1024; f++) {
+ TEST_DESC("insert [after] 250 new in middle of 500 elements at "
+ "fill %d at compress %d",
+ f, options[_i]) {
+ quicklist *ql = quicklistNew(f, options[_i]);
+ for (int i = 0; i < 500; i++)
+ quicklistPushHead(ql, genstr("hello", i), 32);
+ for (int i = 0; i < 250; i++) {
+ quicklistEntry entry;
+ quicklistIndex(ql, 250, &entry);
+ quicklistInsertAfter(ql, &entry, genstr("abc", i), 32);
+ }
+
+ if (ql->count != 750)
+ ERR("List size not 750, but rather %ld", ql->count);
+
+ if (f == 32)
+ ql_verify(ql, 26, 750, 20, 32);
+ quicklistRelease(ql);
+ }
+ }
+
+ TEST("duplicate empty list") {
+ quicklist *ql = quicklistNew(-2, options[_i]);
+ ql_verify(ql, 0, 0, 0, 0);
+ quicklist *copy = quicklistDup(ql);
+ ql_verify(copy, 0, 0, 0, 0);
+ quicklistRelease(ql);
+ quicklistRelease(copy);
+ }
+
+ TEST("duplicate list of 1 element") {
+ quicklist *ql = quicklistNew(-2, options[_i]);
+ quicklistPushHead(ql, genstr("hello", 3), 32);
+ ql_verify(ql, 1, 1, 1, 1);
+ quicklist *copy = quicklistDup(ql);
+ ql_verify(copy, 1, 1, 1, 1);
+ quicklistRelease(ql);
+ quicklistRelease(copy);
+ }
+
+ TEST("duplicate list of 500") {
+ quicklist *ql = quicklistNew(-2, options[_i]);
+ quicklistSetFill(ql, 32);
+ for (int i = 0; i < 500; i++)
+ quicklistPushHead(ql, genstr("hello", i), 32);
+ ql_verify(ql, 16, 500, 20, 32);
+
+ quicklist *copy = quicklistDup(ql);
+ ql_verify(copy, 16, 500, 20, 32);
+ quicklistRelease(ql);
+ quicklistRelease(copy);
+ }
+
+ for (int f = optimize_start; f < 512; f++) {
+ TEST_DESC("index 1,200 from 500 list at fill %d at compress %d", f,
+ options[_i]) {
+ quicklist *ql = quicklistNew(f, options[_i]);
+ for (int i = 0; i < 500; i++)
+ quicklistPushTail(ql, genstr("hello", i + 1), 32);
+ quicklistEntry entry;
+ quicklistIndex(ql, 1, &entry);
+ if (!strcmp((char *)entry.value, "hello2"))
+ OK;
+ else
+ ERR("Value: %s", entry.value);
+ quicklistIndex(ql, 200, &entry);
+ if (!strcmp((char *)entry.value, "hello201"))
+ OK;
+ else
+ ERR("Value: %s", entry.value);
+ quicklistRelease(ql);
+ }
+
+ TEST_DESC("index -1,-2 from 500 list at fill %d at compress %d", f,
+ options[_i]) {
+ quicklist *ql = quicklistNew(f, options[_i]);
+ for (int i = 0; i < 500; i++)
+ quicklistPushTail(ql, genstr("hello", i + 1), 32);
+ quicklistEntry entry;
+ quicklistIndex(ql, -1, &entry);
+ if (!strcmp((char *)entry.value, "hello500"))
+ OK;
+ else
+ ERR("Value: %s", entry.value);
+ quicklistIndex(ql, -2, &entry);
+ if (!strcmp((char *)entry.value, "hello499"))
+ OK;
+ else
+ ERR("Value: %s", entry.value);
+ quicklistRelease(ql);
+ }
+
+ TEST_DESC("index -100 from 500 list at fill %d at compress %d", f,
+ options[_i]) {
+ quicklist *ql = quicklistNew(f, options[_i]);
+ for (int i = 0; i < 500; i++)
+ quicklistPushTail(ql, genstr("hello", i + 1), 32);
+ quicklistEntry entry;
+ quicklistIndex(ql, -100, &entry);
+ if (!strcmp((char *)entry.value, "hello401"))
+ OK;
+ else
+ ERR("Value: %s", entry.value);
+ quicklistRelease(ql);
+ }
+
+ TEST_DESC("index too big +1 from 50 list at fill %d at compress %d",
+ f, options[_i]) {
+ quicklist *ql = quicklistNew(f, options[_i]);
+ for (int i = 0; i < 50; i++)
+ quicklistPushTail(ql, genstr("hello", i + 1), 32);
+ quicklistEntry entry;
+ if (quicklistIndex(ql, 50, &entry))
+ ERR("Index found at 50 with 50 list: %.*s", entry.sz,
+ entry.value);
+ else
+ OK;
+ quicklistRelease(ql);
+ }
+ }
+
+ TEST("delete range empty list") {
+ quicklist *ql = quicklistNew(-2, options[_i]);
+ quicklistDelRange(ql, 5, 20);
+ ql_verify(ql, 0, 0, 0, 0);
+ quicklistRelease(ql);
+ }
+
+ TEST("delete range of entire node in list of one node") {
+ quicklist *ql = quicklistNew(-2, options[_i]);
+ for (int i = 0; i < 32; i++)
+ quicklistPushHead(ql, genstr("hello", i), 32);
+ ql_verify(ql, 1, 32, 32, 32);
+ quicklistDelRange(ql, 0, 32);
+ ql_verify(ql, 0, 0, 0, 0);
+ quicklistRelease(ql);
+ }
+
+ TEST("delete range of entire node with overflow counts") {
+ quicklist *ql = quicklistNew(-2, options[_i]);
+ for (int i = 0; i < 32; i++)
+ quicklistPushHead(ql, genstr("hello", i), 32);
+ ql_verify(ql, 1, 32, 32, 32);
+ quicklistDelRange(ql, 0, 128);
+ ql_verify(ql, 0, 0, 0, 0);
+ quicklistRelease(ql);
+ }
+
+ TEST("delete middle 100 of 500 list") {
+ quicklist *ql = quicklistNew(-2, options[_i]);
+ quicklistSetFill(ql, 32);
+ for (int i = 0; i < 500; i++)
+ quicklistPushTail(ql, genstr("hello", i + 1), 32);
+ ql_verify(ql, 16, 500, 32, 20);
+ quicklistDelRange(ql, 200, 100);
+ ql_verify(ql, 14, 400, 32, 20);
+ quicklistRelease(ql);
+ }
+
+ TEST("delete negative 1 from 500 list") {
+ quicklist *ql = quicklistNew(-2, options[_i]);
+ quicklistSetFill(ql, 32);
+ for (int i = 0; i < 500; i++)
+ quicklistPushTail(ql, genstr("hello", i + 1), 32);
+ ql_verify(ql, 16, 500, 32, 20);
+ quicklistDelRange(ql, -1, 1);
+ ql_verify(ql, 16, 499, 32, 19);
+ quicklistRelease(ql);
+ }
+
+ TEST("delete negative 1 from 500 list with overflow counts") {
+ quicklist *ql = quicklistNew(-2, options[_i]);
+ quicklistSetFill(ql, 32);
+ for (int i = 0; i < 500; i++)
+ quicklistPushTail(ql, genstr("hello", i + 1), 32);
+ ql_verify(ql, 16, 500, 32, 20);
+ quicklistDelRange(ql, -1, 128);
+ ql_verify(ql, 16, 499, 32, 19);
+ quicklistRelease(ql);
+ }
+
+ TEST("delete negative 100 from 500 list") {
+ quicklist *ql = quicklistNew(-2, options[_i]);
+ quicklistSetFill(ql, 32);
+ for (int i = 0; i < 500; i++)
+ quicklistPushTail(ql, genstr("hello", i + 1), 32);
+ quicklistDelRange(ql, -100, 100);
+ ql_verify(ql, 13, 400, 32, 16);
+ quicklistRelease(ql);
+ }
+
+ TEST("delete -10 count 5 from 50 list") {
+ quicklist *ql = quicklistNew(-2, options[_i]);
+ quicklistSetFill(ql, 32);
+ for (int i = 0; i < 50; i++)
+ quicklistPushTail(ql, genstr("hello", i + 1), 32);
+ ql_verify(ql, 2, 50, 32, 18);
+ quicklistDelRange(ql, -10, 5);
+ ql_verify(ql, 2, 45, 32, 13);
+ quicklistRelease(ql);
+ }
+
+ TEST("numbers only list read") {
+ quicklist *ql = quicklistNew(-2, options[_i]);
+ quicklistPushTail(ql, "1111", 4);
+ quicklistPushTail(ql, "2222", 4);
+ quicklistPushTail(ql, "3333", 4);
+ quicklistPushTail(ql, "4444", 4);
+ ql_verify(ql, 1, 4, 4, 4);
+ quicklistEntry entry;
+ quicklistIndex(ql, 0, &entry);
+ if (entry.longval != 1111)
+ ERR("Not 1111, %lld", entry.longval);
+ quicklistIndex(ql, 1, &entry);
+ if (entry.longval != 2222)
+ ERR("Not 2222, %lld", entry.longval);
+ quicklistIndex(ql, 2, &entry);
+ if (entry.longval != 3333)
+ ERR("Not 3333, %lld", entry.longval);
+ quicklistIndex(ql, 3, &entry);
+ if (entry.longval != 4444)
+ ERR("Not 4444, %lld", entry.longval);
+ if (quicklistIndex(ql, 4, &entry))
+ ERR("Index past elements: %lld", entry.longval);
+ quicklistIndex(ql, -1, &entry);
+ if (entry.longval != 4444)
+ ERR("Not 4444 (reverse), %lld", entry.longval);
+ quicklistIndex(ql, -2, &entry);
+ if (entry.longval != 3333)
+ ERR("Not 3333 (reverse), %lld", entry.longval);
+ quicklistIndex(ql, -3, &entry);
+ if (entry.longval != 2222)
+ ERR("Not 2222 (reverse), %lld", entry.longval);
+ quicklistIndex(ql, -4, &entry);
+ if (entry.longval != 1111)
+ ERR("Not 1111 (reverse), %lld", entry.longval);
+ if (quicklistIndex(ql, -5, &entry))
+ ERR("Index past elements (reverse), %lld", entry.longval);
+ quicklistRelease(ql);
+ }
+
+ TEST("numbers larger list read") {
+ quicklist *ql = quicklistNew(-2, options[_i]);
+ quicklistSetFill(ql, 32);
+ char num[32];
+ long long nums[5000];
+ for (int i = 0; i < 5000; i++) {
+ nums[i] = -5157318210846258176 + i;
+ int sz = ll2string(num, sizeof(num), nums[i]);
+ quicklistPushTail(ql, num, sz);
+ }
+ quicklistPushTail(ql, "xxxxxxxxxxxxxxxxxxxx", 20);
+ quicklistEntry entry;
+ for (int i = 0; i < 5000; i++) {
+ quicklistIndex(ql, i, &entry);
+ if (entry.longval != nums[i])
+ ERR("[%d] Not longval %lld but rather %lld", i, nums[i],
+ entry.longval);
+ entry.longval = 0xdeadbeef;
+ }
+ quicklistIndex(ql, 5000, &entry);
+ if (strncmp((char *)entry.value, "xxxxxxxxxxxxxxxxxxxx", 20))
+ ERR("String val not match: %s", entry.value);
+ ql_verify(ql, 157, 5001, 32, 9);
+ quicklistRelease(ql);
+ }
+
+ TEST("numbers larger list read B") {
+ quicklist *ql = quicklistNew(-2, options[_i]);
+ quicklistPushTail(ql, "99", 2);
+ quicklistPushTail(ql, "98", 2);
+ quicklistPushTail(ql, "xxxxxxxxxxxxxxxxxxxx", 20);
+ quicklistPushTail(ql, "96", 2);
+ quicklistPushTail(ql, "95", 2);
+ quicklistReplaceAtIndex(ql, 1, "foo", 3);
+ quicklistReplaceAtIndex(ql, -1, "bar", 3);
+ quicklistRelease(ql);
+ OK;
+ }
+
+ for (int f = optimize_start; f < 16; f++) {
+ TEST_DESC("lrem test at fill %d at compress %d", f, options[_i]) {
+ quicklist *ql = quicklistNew(f, options[_i]);
+ char *words[] = {"abc", "foo", "bar", "foobar", "foobared",
+ "zap", "bar", "test", "foo"};
+ char *result[] = {"abc", "foo", "foobar", "foobared",
+ "zap", "test", "foo"};
+ char *resultB[] = {"abc", "foo", "foobar",
+ "foobared", "zap", "test"};
+ for (int i = 0; i < 9; i++)
+ quicklistPushTail(ql, words[i], strlen(words[i]));
+
+ /* lrem 0 bar */
+ quicklistIter *iter = quicklistGetIterator(ql, AL_START_HEAD);
+ quicklistEntry entry;
+ int i = 0;
+ while (quicklistNext(iter, &entry)) {
+ if (quicklistCompare(entry.zi, (unsigned char *)"bar", 3)) {
+ quicklistDelEntry(iter, &entry);
+ }
+ i++;
+ }
+ quicklistReleaseIterator(iter);
+
+ /* check result of lrem 0 bar */
+ iter = quicklistGetIterator(ql, AL_START_HEAD);
+ i = 0;
+ int ok = 1;
+ while (quicklistNext(iter, &entry)) {
+ /* Result must be: abc, foo, foobar, foobared, zap, test,
+ * foo */
+ if (strncmp((char *)entry.value, result[i], entry.sz)) {
+ ERR("No match at position %d, got %.*s instead of %s",
+ i, entry.sz, entry.value, result[i]);
+ ok = 0;
+ }
+ i++;
+ }
+ quicklistReleaseIterator(iter);
+
+ quicklistPushTail(ql, "foo", 3);
+
+ /* lrem -2 foo */
+ iter = quicklistGetIterator(ql, AL_START_TAIL);
+ i = 0;
+ int del = 2;
+ while (quicklistNext(iter, &entry)) {
+ if (quicklistCompare(entry.zi, (unsigned char *)"foo", 3)) {
+ quicklistDelEntry(iter, &entry);
+ del--;
+ }
+ if (!del)
+ break;
+ i++;
+ }
+ quicklistReleaseIterator(iter);
+
+ /* check result of lrem -2 foo */
+ /* (we're ignoring the '2' part and still deleting all foo
+ * because
+ * we only have two foo) */
+ iter = quicklistGetIterator(ql, AL_START_TAIL);
+ i = 0;
+ size_t resB = sizeof(resultB) / sizeof(*resultB);
+ while (quicklistNext(iter, &entry)) {
+ /* Result must be: abc, foo, foobar, foobared, zap, test,
+ * foo */
+ if (strncmp((char *)entry.value, resultB[resB - 1 - i],
+ entry.sz)) {
+ ERR("No match at position %d, got %.*s instead of %s",
+ i, entry.sz, entry.value, resultB[resB - 1 - i]);
+ ok = 0;
+ }
+ i++;
+ }
+
+ quicklistReleaseIterator(iter);
+ /* final result of all tests */
+ if (ok)
+ OK;
+ quicklistRelease(ql);
+ }
+ }
+
+ for (int f = optimize_start; f < 16; f++) {
+ TEST_DESC("iterate reverse + delete at fill %d at compress %d", f,
+ options[_i]) {
+ quicklist *ql = quicklistNew(f, options[_i]);
+ quicklistPushTail(ql, "abc", 3);
+ quicklistPushTail(ql, "def", 3);
+ quicklistPushTail(ql, "hij", 3);
+ quicklistPushTail(ql, "jkl", 3);
+ quicklistPushTail(ql, "oop", 3);
+
+ quicklistEntry entry;
+ quicklistIter *iter = quicklistGetIterator(ql, AL_START_TAIL);
+ int i = 0;
+ while (quicklistNext(iter, &entry)) {
+ if (quicklistCompare(entry.zi, (unsigned char *)"hij", 3)) {
+ quicklistDelEntry(iter, &entry);
+ }
+ i++;
+ }
+ quicklistReleaseIterator(iter);
+
+ if (i != 5)
+ ERR("Didn't iterate 5 times, iterated %d times.", i);
+
+ /* Check results after deletion of "hij" */
+ iter = quicklistGetIterator(ql, AL_START_HEAD);
+ i = 0;
+ char *vals[] = {"abc", "def", "jkl", "oop"};
+ while (quicklistNext(iter, &entry)) {
+ if (!quicklistCompare(entry.zi, (unsigned char *)vals[i],
+ 3)) {
+ ERR("Value at %d didn't match %s\n", i, vals[i]);
+ }
+ i++;
+ }
+ quicklistReleaseIterator(iter);
+ quicklistRelease(ql);
+ }
+ }
+
+ for (int f = optimize_start; f < 800; f++) {
+ TEST_DESC("iterator at index test at fill %d at compress %d", f,
+ options[_i]) {
+ quicklist *ql = quicklistNew(f, options[_i]);
+ char num[32];
+ long long nums[5000];
+ for (int i = 0; i < 760; i++) {
+ nums[i] = -5157318210846258176 + i;
+ int sz = ll2string(num, sizeof(num), nums[i]);
+ quicklistPushTail(ql, num, sz);
+ }
+
+ quicklistEntry entry;
+ quicklistIter *iter =
+ quicklistGetIteratorAtIdx(ql, AL_START_HEAD, 437);
+ int i = 437;
+ while (quicklistNext(iter, &entry)) {
+ if (entry.longval != nums[i])
+ ERR("Expected %lld, but got %lld", entry.longval,
+ nums[i]);
+ i++;
+ }
+ quicklistReleaseIterator(iter);
+ quicklistRelease(ql);
+ }
+ }
+
+ for (int f = optimize_start; f < 40; f++) {
+ TEST_DESC("ltrim test A at fill %d at compress %d", f,
+ options[_i]) {
+ quicklist *ql = quicklistNew(f, options[_i]);
+ char num[32];
+ long long nums[5000];
+ for (int i = 0; i < 32; i++) {
+ nums[i] = -5157318210846258176 + i;
+ int sz = ll2string(num, sizeof(num), nums[i]);
+ quicklistPushTail(ql, num, sz);
+ }
+ if (f == 32)
+ ql_verify(ql, 1, 32, 32, 32);
+ /* ltrim 25 53 (keep [25,32] inclusive = 7 remaining) */
+ quicklistDelRange(ql, 0, 25);
+ quicklistDelRange(ql, 0, 0);
+ quicklistEntry entry;
+ for (int i = 0; i < 7; i++) {
+ quicklistIndex(ql, i, &entry);
+ if (entry.longval != nums[25 + i])
+ ERR("Deleted invalid range! Expected %lld but got "
+ "%lld",
+ entry.longval, nums[25 + i]);
+ }
+ if (f == 32)
+ ql_verify(ql, 1, 7, 7, 7);
+ quicklistRelease(ql);
+ }
+ }
+
+ for (int f = optimize_start; f < 40; f++) {
+ TEST_DESC("ltrim test B at fill %d at compress %d", f,
+ options[_i]) {
+ /* Force-disable compression because our 33 sequential
+ * integers don't compress and the check always fails. */
+ quicklist *ql = quicklistNew(f, QUICKLIST_NOCOMPRESS);
+ char num[32];
+ long long nums[5000];
+ for (int i = 0; i < 33; i++) {
+ nums[i] = i;
+ int sz = ll2string(num, sizeof(num), nums[i]);
+ quicklistPushTail(ql, num, sz);
+ }
+ if (f == 32)
+ ql_verify(ql, 2, 33, 32, 1);
+ /* ltrim 5 16 (keep [5,16] inclusive = 12 remaining) */
+ quicklistDelRange(ql, 0, 5);
+ quicklistDelRange(ql, -16, 16);
+ if (f == 32)
+ ql_verify(ql, 1, 12, 12, 12);
+ quicklistEntry entry;
+ quicklistIndex(ql, 0, &entry);
+ if (entry.longval != 5)
+ ERR("A: longval not 5, but %lld", entry.longval);
+ else
+ OK;
+ quicklistIndex(ql, -1, &entry);
+ if (entry.longval != 16)
+ ERR("B! got instead: %lld", entry.longval);
+ else
+ OK;
+ quicklistPushTail(ql, "bobobob", 7);
+ quicklistIndex(ql, -1, &entry);
+ if (strncmp((char *)entry.value, "bobobob", 7))
+ ERR("Tail doesn't match bobobob, it's %.*s instead",
+ entry.sz, entry.value);
+ for (int i = 0; i < 12; i++) {
+ quicklistIndex(ql, i, &entry);
+ if (entry.longval != nums[5 + i])
+ ERR("Deleted invalid range! Expected %lld but got "
+ "%lld",
+ entry.longval, nums[5 + i]);
+ }
+ quicklistRelease(ql);
+ }
+ }
+
+ for (int f = optimize_start; f < 40; f++) {
+ TEST_DESC("ltrim test C at fill %d at compress %d", f,
+ options[_i]) {
+ quicklist *ql = quicklistNew(f, options[_i]);
+ char num[32];
+ long long nums[5000];
+ for (int i = 0; i < 33; i++) {
+ nums[i] = -5157318210846258176 + i;
+ int sz = ll2string(num, sizeof(num), nums[i]);
+ quicklistPushTail(ql, num, sz);
+ }
+ if (f == 32)
+ ql_verify(ql, 2, 33, 32, 1);
+ /* ltrim 3 3 (keep [3,3] inclusive = 1 remaining) */
+ quicklistDelRange(ql, 0, 3);
+ quicklistDelRange(ql, -29,
+ 4000); /* make sure not loop forever */
+ if (f == 32)
+ ql_verify(ql, 1, 1, 1, 1);
+ quicklistEntry entry;
+ quicklistIndex(ql, 0, &entry);
+ if (entry.longval != -5157318210846258173)
+ ERROR;
+ else
+ OK;
+ quicklistRelease(ql);
+ }
+ }
+
+ for (int f = optimize_start; f < 40; f++) {
+ TEST_DESC("ltrim test D at fill %d at compress %d", f,
+ options[_i]) {
+ quicklist *ql = quicklistNew(f, options[_i]);
+ char num[32];
+ long long nums[5000];
+ for (int i = 0; i < 33; i++) {
+ nums[i] = -5157318210846258176 + i;
+ int sz = ll2string(num, sizeof(num), nums[i]);
+ quicklistPushTail(ql, num, sz);
+ }
+ if (f == 32)
+ ql_verify(ql, 2, 33, 32, 1);
+ quicklistDelRange(ql, -12, 3);
+ if (ql->count != 30)
+ ERR("Didn't delete exactly three elements! Count is: %lu",
+ ql->count);
+ quicklistRelease(ql);
+ }
+ }
+
+ for (int f = optimize_start; f < 72; f++) {
+ TEST_DESC("create quicklist from ziplist at fill %d at compress %d",
+ f, options[_i]) {
+ unsigned char *zl = ziplistNew();
+ long long nums[64];
+ char num[64];
+ for (int i = 0; i < 33; i++) {
+ nums[i] = -5157318210846258176 + i;
+ int sz = ll2string(num, sizeof(num), nums[i]);
+ zl =
+ ziplistPush(zl, (unsigned char *)num, sz, ZIPLIST_TAIL);
+ }
+ for (int i = 0; i < 33; i++) {
+ zl = ziplistPush(zl, (unsigned char *)genstr("hello", i),
+ 32, ZIPLIST_TAIL);
+ }
+ quicklist *ql = quicklistCreateFromZiplist(f, options[_i], zl);
+ if (f == 1)
+ ql_verify(ql, 66, 66, 1, 1);
+ else if (f == 32)
+ ql_verify(ql, 3, 66, 32, 2);
+ else if (f == 66)
+ ql_verify(ql, 1, 66, 66, 66);
+ quicklistRelease(ql);
+ }
+ }
+
+ long long stop = mstime();
+ runtime[_i] = stop - start;
+ }
+
+ /* Run a longer test of compression depth outside of primary test loop. */
+ int list_sizes[] = {250, 251, 500, 999, 1000};
+ long long start = mstime();
+ for (int list = 0; list < (int)(sizeof(list_sizes) / sizeof(*list_sizes));
+ list++) {
+ for (int f = optimize_start; f < 128; f++) {
+ for (int depth = 1; depth < 40; depth++) {
+ /* skip over many redundant test cases */
+ TEST_DESC("verify specific compression of interior nodes with "
+ "%d list "
+ "at fill %d at compress %d",
+ list_sizes[list], f, depth) {
+ quicklist *ql = quicklistNew(f, depth);
+ for (int i = 0; i < list_sizes[list]; i++) {
+ quicklistPushTail(ql, genstr("hello TAIL", i + 1), 64);
+ quicklistPushHead(ql, genstr("hello HEAD", i + 1), 64);
+ }
+
+ quicklistNode *node = ql->head;
+ unsigned int low_raw = ql->compress;
+ unsigned int high_raw = ql->len - ql->compress;
+
+ for (unsigned int at = 0; at < ql->len;
+ at++, node = node->next) {
+ if (at < low_raw || at >= high_raw) {
+ if (node->encoding != QUICKLIST_NODE_ENCODING_RAW) {
+ ERR("Incorrect compression: node %d is "
+ "compressed at depth %d ((%u, %u); total "
+ "nodes: %u; size: %u)",
+ at, depth, low_raw, high_raw, ql->len,
+ node->sz);
+ }
+ } else {
+ if (node->encoding != QUICKLIST_NODE_ENCODING_LZF) {
+ ERR("Incorrect non-compression: node %d is NOT "
+ "compressed at depth %d ((%u, %u); total "
+ "nodes: %u; size: %u; attempted: %d)",
+ at, depth, low_raw, high_raw, ql->len,
+ node->sz, node->attempted_compress);
+ }
+ }
+ }
+ quicklistRelease(ql);
+ }
+ }
+ }
+ }
+ long long stop = mstime();
+
+ printf("\n");
+ for (size_t i = 0; i < option_count; i++)
+ printf("Test Loop %02d: %0.2f seconds.\n", options[i],
+ (float)runtime[i] / 1000);
+ printf("Compressions: %0.2f seconds.\n", (float)(stop - start) / 1000);
+ printf("\n");
+
+ if (!err)
+ printf("ALL TESTS PASSED!\n");
+ else
+ ERR("Sorry, not all tests passed! In fact, %d tests failed.", err);
+
+ return err;
+}
+#endif
diff --git a/src/quicklist.h b/src/quicklist.h
new file mode 100644
index 000000000..5c9530ccd
--- /dev/null
+++ b/src/quicklist.h
@@ -0,0 +1,169 @@
+/* quicklist.h - A generic doubly linked quicklist implementation
+ *
+ * Copyright (c) 2014, Matt Stancliff <matt@genges.com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this quicklist of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this quicklist of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Redis nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __QUICKLIST_H__
+#define __QUICKLIST_H__
+
+/* Node, quicklist, and Iterator are the only data structures used currently. */
+
+/* quicklistNode is a 32 byte struct describing a ziplist for a quicklist.
+ * We use bit fields keep the quicklistNode at 32 bytes.
+ * count: 16 bits, max 65536 (max zl bytes is 65k, so max count actually < 32k).
+ * encoding: 2 bits, RAW=1, LZF=2.
+ * container: 2 bits, NONE=1, ZIPLIST=2.
+ * recompress: 1 bit, bool, true if node is temporarry decompressed for usage.
+ * attempted_compress: 1 bit, boolean, used for verifying during testing.
+ * extra: 12 bits, free for future use; pads out the remainder of 32 bits */
+typedef struct quicklistNode {
+ struct quicklistNode *prev;
+ struct quicklistNode *next;
+ unsigned char *zl;
+ unsigned int sz; /* ziplist size in bytes */
+ unsigned int count : 16; /* count of items in ziplist */
+ unsigned int encoding : 2; /* RAW==1 or LZF==2 */
+ unsigned int container : 2; /* NONE==1 or ZIPLIST==2 */
+ unsigned int recompress : 1; /* was this node previous compressed? */
+ unsigned int attempted_compress : 1; /* node can't compress; too small */
+ unsigned int extra : 10; /* more bits to steal for future usage */
+} quicklistNode;
+
+/* quicklistLZF is a 4+N byte struct holding 'sz' followed by 'compressed'.
+ * 'sz' is byte length of 'compressed' field.
+ * 'compressed' is LZF data with total (compressed) length 'sz'
+ * NOTE: uncompressed length is stored in quicklistNode->sz.
+ * When quicklistNode->zl is compressed, node->zl points to a quicklistLZF */
+typedef struct quicklistLZF {
+ unsigned int sz; /* LZF size in bytes*/
+ char compressed[];
+} quicklistLZF;
+
+/* quicklist is a 32 byte struct (on 64-bit systems) describing a quicklist.
+ * 'count' is the number of total entries.
+ * 'len' is the number of quicklist nodes.
+ * 'compress' is: -1 if compression disabled, otherwise it's the number
+ * of quicklistNodes to leave uncompressed at ends of quicklist.
+ * 'fill' is the user-requested (or default) fill factor. */
+typedef struct quicklist {
+ quicklistNode *head;
+ quicklistNode *tail;
+ unsigned long count; /* total count of all entries in all ziplists */
+ unsigned int len; /* number of quicklistNodes */
+ int fill : 16; /* fill factor for individual nodes */
+ unsigned int compress : 16; /* depth of end nodes not to compress;0=off */
+} quicklist;
+
+typedef struct quicklistIter {
+ const quicklist *quicklist;
+ quicklistNode *current;
+ unsigned char *zi;
+ long offset; /* offset in current ziplist */
+ int direction;
+} quicklistIter;
+
+typedef struct quicklistEntry {
+ const quicklist *quicklist;
+ quicklistNode *node;
+ unsigned char *zi;
+ unsigned char *value;
+ unsigned int sz;
+ long long longval;
+ int offset;
+} quicklistEntry;
+
+#define QUICKLIST_HEAD 0
+#define QUICKLIST_TAIL -1
+
+/* quicklist node encodings */
+#define QUICKLIST_NODE_ENCODING_RAW 1
+#define QUICKLIST_NODE_ENCODING_LZF 2
+
+/* quicklist compression disable */
+#define QUICKLIST_NOCOMPRESS 0
+
+/* quicklist container formats */
+#define QUICKLIST_NODE_CONTAINER_NONE 1
+#define QUICKLIST_NODE_CONTAINER_ZIPLIST 2
+
+#define quicklistNodeIsCompressed(node) \
+ ((node)->encoding == QUICKLIST_NODE_ENCODING_LZF)
+
+/* Prototypes */
+quicklist *quicklistCreate(void);
+quicklist *quicklistNew(int fill, int compress);
+void quicklistSetCompressDepth(quicklist *quicklist, int depth);
+void quicklistSetFill(quicklist *quicklist, int fill);
+void quicklistSetOptions(quicklist *quicklist, int fill, int depth);
+void quicklistRelease(quicklist *quicklist);
+int quicklistPushHead(quicklist *quicklist, void *value, const size_t sz);
+int quicklistPushTail(quicklist *quicklist, void *value, const size_t sz);
+void quicklistPush(quicklist *quicklist, void *value, const size_t sz,
+ int where);
+void quicklistAppendZiplist(quicklist *quicklist, unsigned char *zl);
+quicklist *quicklistAppendValuesFromZiplist(quicklist *quicklist,
+ unsigned char *zl);
+quicklist *quicklistCreateFromZiplist(int fill, int compress,
+ unsigned char *zl);
+void quicklistInsertAfter(quicklist *quicklist, quicklistEntry *node,
+ void *value, const size_t sz);
+void quicklistInsertBefore(quicklist *quicklist, quicklistEntry *node,
+ void *value, const size_t sz);
+void quicklistDelEntry(quicklistIter *iter, quicklistEntry *entry);
+int quicklistReplaceAtIndex(quicklist *quicklist, long index, void *data,
+ int sz);
+int quicklistDelRange(quicklist *quicklist, const long start, const long stop);
+quicklistIter *quicklistGetIterator(const quicklist *quicklist, int direction);
+quicklistIter *quicklistGetIteratorAtIdx(const quicklist *quicklist,
+ int direction, const long long idx);
+int quicklistNext(quicklistIter *iter, quicklistEntry *node);
+void quicklistReleaseIterator(quicklistIter *iter);
+quicklist *quicklistDup(quicklist *orig);
+int quicklistIndex(const quicklist *quicklist, const long long index,
+ quicklistEntry *entry);
+void quicklistRewind(quicklist *quicklist, quicklistIter *li);
+void quicklistRewindTail(quicklist *quicklist, quicklistIter *li);
+void quicklistRotate(quicklist *quicklist);
+int quicklistPopCustom(quicklist *quicklist, int where, unsigned char **data,
+ unsigned int *sz, long long *sval,
+ void *(*saver)(unsigned char *data, unsigned int sz));
+int quicklistPop(quicklist *quicklist, int where, unsigned char **data,
+ unsigned int *sz, long long *slong);
+unsigned int quicklistCount(quicklist *ql);
+int quicklistCompare(unsigned char *p1, unsigned char *p2, int p2_len);
+size_t quicklistGetLzf(const quicklistNode *node, void **data);
+
+#ifdef REDIS_TEST
+int quicklistTest(int argc, char *argv[]);
+#endif
+
+/* Directions for iterators */
+#define AL_START_HEAD 0
+#define AL_START_TAIL 1
+
+#endif /* __QUICKLIST_H__ */
diff --git a/src/rdb.c b/src/rdb.c
index d2fd405db..e3236e12c 100644
--- a/src/rdb.c
+++ b/src/rdb.c
@@ -40,6 +40,20 @@
#include <arpa/inet.h>
#include <sys/stat.h>
+#define RDB_LOAD_NONE 0
+#define RDB_LOAD_ENC (1<<0)
+#define RDB_LOAD_PLAIN (1<<1)
+
+#define rdbExitReportCorruptRDB(reason) rdbCheckThenExit(reason, __LINE__);
+
+void rdbCheckThenExit(char *reason, int where) {
+ redisLog(REDIS_WARNING, "Corrupt RDB detected at rdb.c:%d (%s). "
+ "Running 'redis-check-rdb %s'",
+ where, reason, server.rdb_filename);
+ redis_check_rdb(server.rdb_filename);
+ exit(1);
+}
+
static int rdbWriteRaw(rio *rdb, void *p, size_t len) {
if (rdb && rioWrite(rdb,p,len) == 0)
return -1;
@@ -161,9 +175,11 @@ int rdbEncodeInteger(long long value, unsigned char *enc) {
}
/* Loads an integer-encoded object with the specified encoding type "enctype".
- * If the "encode" argument is set the function may return an integer-encoded
- * string object, otherwise it always returns a raw string object. */
-robj *rdbLoadIntegerObject(rio *rdb, int enctype, int encode) {
+ * The returned value changes according to the flags, see
+ * rdbGenerincLoadStringObject() for more info. */
+void *rdbLoadIntegerObject(rio *rdb, int enctype, int flags) {
+ int plain = flags & RDB_LOAD_PLAIN;
+ int encode = flags & RDB_LOAD_ENC;
unsigned char enc[4];
long long val;
@@ -182,12 +198,19 @@ robj *rdbLoadIntegerObject(rio *rdb, int enctype, int encode) {
val = (int32_t)v;
} else {
val = 0; /* anti-warning */
- redisPanic("Unknown RDB integer encoding type");
+ rdbExitReportCorruptRDB("Unknown RDB integer encoding type");
}
- if (encode)
+ if (plain) {
+ char buf[REDIS_LONGSTR_SIZE], *p;
+ int len = ll2string(buf,sizeof(buf),val);
+ p = zmalloc(len);
+ memcpy(p,buf,len);
+ return p;
+ } else if (encode) {
return createStringObjectFromLongLong(val);
- else
+ } else {
return createObject(REDIS_STRING,sdsfromlonglong(val));
+ }
}
/* String objects in the form "2391" "-100" without any space and with a
@@ -209,44 +232,54 @@ int rdbTryIntegerEncoding(char *s, size_t len, unsigned char *enc) {
return rdbEncodeInteger(value,enc);
}
-int rdbSaveLzfStringObject(rio *rdb, unsigned char *s, size_t len) {
- size_t comprlen, outlen;
+ssize_t rdbSaveLzfBlob(rio *rdb, void *data, size_t compress_len,
+ size_t original_len) {
unsigned char byte;
- int n, nwritten = 0;
- void *out;
+ ssize_t n, nwritten = 0;
- /* We require at least four bytes compression for this to be worth it */
- if (len <= 4) return 0;
- outlen = len-4;
- if ((out = zmalloc(outlen+1)) == NULL) return 0;
- comprlen = lzf_compress(s, len, out, outlen);
- if (comprlen == 0) {
- zfree(out);
- return 0;
- }
/* Data compressed! Let's save it on disk */
byte = (REDIS_RDB_ENCVAL<<6)|REDIS_RDB_ENC_LZF;
if ((n = rdbWriteRaw(rdb,&byte,1)) == -1) goto writeerr;
nwritten += n;
- if ((n = rdbSaveLen(rdb,comprlen)) == -1) goto writeerr;
+ if ((n = rdbSaveLen(rdb,compress_len)) == -1) goto writeerr;
nwritten += n;
- if ((n = rdbSaveLen(rdb,len)) == -1) goto writeerr;
+ if ((n = rdbSaveLen(rdb,original_len)) == -1) goto writeerr;
nwritten += n;
- if ((n = rdbWriteRaw(rdb,out,comprlen)) == -1) goto writeerr;
+ if ((n = rdbWriteRaw(rdb,data,compress_len)) == -1) goto writeerr;
nwritten += n;
- zfree(out);
return nwritten;
writeerr:
- zfree(out);
return -1;
}
-robj *rdbLoadLzfStringObject(rio *rdb) {
+ssize_t rdbSaveLzfStringObject(rio *rdb, unsigned char *s, size_t len) {
+ size_t comprlen, outlen;
+ void *out;
+
+ /* We require at least four bytes compression for this to be worth it */
+ if (len <= 4) return 0;
+ outlen = len-4;
+ if ((out = zmalloc(outlen+1)) == NULL) return 0;
+ comprlen = lzf_compress(s, len, out, outlen);
+ if (comprlen == 0) {
+ zfree(out);
+ return 0;
+ }
+ ssize_t nwritten = rdbSaveLzfBlob(rdb, out, comprlen, len);
+ zfree(out);
+ return nwritten;
+}
+
+/* Load an LZF compressed string in RDB format. The returned value
+ * changes according to 'flags'. For more info check the
+ * rdbGenericLoadStringObject() function. */
+void *rdbLoadLzfStringObject(rio *rdb, int flags) {
+ int plain = flags & RDB_LOAD_PLAIN;
unsigned int len, clen;
unsigned char *c = NULL;
sds val = NULL;
@@ -254,22 +287,37 @@ robj *rdbLoadLzfStringObject(rio *rdb) {
if ((clen = rdbLoadLen(rdb,NULL)) == REDIS_RDB_LENERR) return NULL;
if ((len = rdbLoadLen(rdb,NULL)) == REDIS_RDB_LENERR) return NULL;
if ((c = zmalloc(clen)) == NULL) goto err;
- if ((val = sdsnewlen(NULL,len)) == NULL) goto err;
+
+ /* Allocate our target according to the uncompressed size. */
+ if (plain) {
+ val = zmalloc(len);
+ } else {
+ if ((val = sdsnewlen(NULL,len)) == NULL) goto err;
+ }
+
+ /* Load the compressed representation and uncompress it to target. */
if (rioRead(rdb,c,clen) == 0) goto err;
if (lzf_decompress(c,clen,val,len) == 0) goto err;
zfree(c);
- return createObject(REDIS_STRING,val);
+
+ if (plain)
+ return val;
+ else
+ return createObject(REDIS_STRING,val);
err:
zfree(c);
- sdsfree(val);
+ if (plain)
+ zfree(val);
+ else
+ sdsfree(val);
return NULL;
}
/* Save a string object as [len][data] on disk. If the object is a string
* representation of an integer value we try to save it in a special form */
-int rdbSaveRawString(rio *rdb, unsigned char *s, size_t len) {
+ssize_t rdbSaveRawString(rio *rdb, unsigned char *s, size_t len) {
int enclen;
- int n, nwritten = 0;
+ ssize_t n, nwritten = 0;
/* Try integer encoding */
if (len <= 11) {
@@ -300,9 +348,9 @@ int rdbSaveRawString(rio *rdb, unsigned char *s, size_t len) {
}
/* Save a long long value as either an encoded string or a string. */
-int rdbSaveLongLongAsStringObject(rio *rdb, long long value) {
+ssize_t rdbSaveLongLongAsStringObject(rio *rdb, long long value) {
unsigned char buf[32];
- int n, nwritten = 0;
+ ssize_t n, nwritten = 0;
int enclen = rdbEncodeInteger(value,buf);
if (enclen > 0) {
return rdbWriteRaw(rdb,buf,enclen);
@@ -330,10 +378,21 @@ int rdbSaveStringObject(rio *rdb, robj *obj) {
}
}
-robj *rdbGenericLoadStringObject(rio *rdb, int encode) {
+/* Load a string object from an RDB file according to flags:
+ *
+ * RDB_LOAD_NONE (no flags): load an RDB object, unencoded.
+ * RDB_LOAD_ENC: If the returned type is a Redis object, try to
+ * encode it in a special way to be more memory
+ * efficient. When this flag is passed the function
+ * no longer guarantees that obj->ptr is an SDS string.
+ * RDB_LOAD_PLAIN: Return a plain string allocated with zmalloc()
+ * instead of a Redis object.
+ */
+void *rdbGenericLoadStringObject(rio *rdb, int flags) {
+ int encode = flags & RDB_LOAD_ENC;
+ int plain = flags & RDB_LOAD_PLAIN;
int isencoded;
uint32_t len;
- robj *o;
len = rdbLoadLen(rdb,&isencoded);
if (isencoded) {
@@ -341,30 +400,39 @@ robj *rdbGenericLoadStringObject(rio *rdb, int encode) {
case REDIS_RDB_ENC_INT8:
case REDIS_RDB_ENC_INT16:
case REDIS_RDB_ENC_INT32:
- return rdbLoadIntegerObject(rdb,len,encode);
+ return rdbLoadIntegerObject(rdb,len,flags);
case REDIS_RDB_ENC_LZF:
- return rdbLoadLzfStringObject(rdb);
+ return rdbLoadLzfStringObject(rdb,flags);
default:
- redisPanic("Unknown RDB encoding type");
+ rdbExitReportCorruptRDB("Unknown RDB encoding type");
}
}
if (len == REDIS_RDB_LENERR) return NULL;
- o = encode ? createStringObject(NULL,len) :
- createRawStringObject(NULL,len);
- if (len && rioRead(rdb,o->ptr,len) == 0) {
- decrRefCount(o);
- return NULL;
+ if (!plain) {
+ robj *o = encode ? createStringObject(NULL,len) :
+ createRawStringObject(NULL,len);
+ if (len && rioRead(rdb,o->ptr,len) == 0) {
+ decrRefCount(o);
+ return NULL;
+ }
+ return o;
+ } else {
+ void *buf = zmalloc(len);
+ if (len && rioRead(rdb,buf,len) == 0) {
+ zfree(buf);
+ return NULL;
+ }
+ return buf;
}
- return o;
}
robj *rdbLoadStringObject(rio *rdb) {
- return rdbGenericLoadStringObject(rdb,0);
+ return rdbGenericLoadStringObject(rdb,RDB_LOAD_NONE);
}
robj *rdbLoadEncodedStringObject(rio *rdb) {
- return rdbGenericLoadStringObject(rdb,1);
+ return rdbGenericLoadStringObject(rdb,RDB_LOAD_ENC);
}
/* Save a double value. Doubles are saved as strings prefixed by an unsigned
@@ -433,10 +501,8 @@ int rdbSaveObjectType(rio *rdb, robj *o) {
case REDIS_STRING:
return rdbSaveType(rdb,REDIS_RDB_TYPE_STRING);
case REDIS_LIST:
- if (o->encoding == REDIS_ENCODING_ZIPLIST)
- return rdbSaveType(rdb,REDIS_RDB_TYPE_LIST_ZIPLIST);
- else if (o->encoding == REDIS_ENCODING_LINKEDLIST)
- return rdbSaveType(rdb,REDIS_RDB_TYPE_LIST);
+ if (o->encoding == REDIS_ENCODING_QUICKLIST)
+ return rdbSaveType(rdb,REDIS_RDB_TYPE_LIST_QUICKLIST);
else
redisPanic("Unknown list encoding");
case REDIS_SET:
@@ -476,8 +542,8 @@ int rdbLoadObjectType(rio *rdb) {
}
/* Save a Redis object. Returns -1 on error, number of bytes written on success. */
-int rdbSaveObject(rio *rdb, robj *o) {
- int n, nwritten = 0;
+ssize_t rdbSaveObject(rio *rdb, robj *o) {
+ ssize_t n = 0, nwritten = 0;
if (o->type == REDIS_STRING) {
/* Save a string value */
@@ -485,25 +551,24 @@ int rdbSaveObject(rio *rdb, robj *o) {
nwritten += n;
} else if (o->type == REDIS_LIST) {
/* Save a list value */
- if (o->encoding == REDIS_ENCODING_ZIPLIST) {
- size_t l = ziplistBlobLen((unsigned char*)o->ptr);
+ if (o->encoding == REDIS_ENCODING_QUICKLIST) {
+ quicklist *ql = o->ptr;
+ quicklistNode *node = ql->head;
- if ((n = rdbSaveRawString(rdb,o->ptr,l)) == -1) return -1;
+ if ((n = rdbSaveLen(rdb,ql->len)) == -1) return -1;
nwritten += n;
- } else if (o->encoding == REDIS_ENCODING_LINKEDLIST) {
- list *list = o->ptr;
- listIter li;
- listNode *ln;
- if ((n = rdbSaveLen(rdb,listLength(list))) == -1) return -1;
- nwritten += n;
-
- listRewind(list,&li);
- while((ln = listNext(&li))) {
- robj *eleobj = listNodeValue(ln);
- if ((n = rdbSaveStringObject(rdb,eleobj)) == -1) return -1;
- nwritten += n;
- }
+ do {
+ if (quicklistNodeIsCompressed(node)) {
+ void *data;
+ size_t compress_len = quicklistGetLzf(node, &data);
+ if ((n = rdbSaveLzfBlob(rdb,data,compress_len,node->sz)) == -1) return -1;
+ nwritten += n;
+ } else {
+ if ((n = rdbSaveRawString(rdb,node->zl,node->sz)) == -1) return -1;
+ nwritten += n;
+ }
+ } while ((node = node->next));
} else {
redisPanic("Unknown list encoding");
}
@@ -599,8 +664,8 @@ int rdbSaveObject(rio *rdb, robj *o) {
* the rdbSaveObject() function. Currently we use a trick to get
* this length with very little changes to the code. In the future
* we could switch to a faster solution. */
-off_t rdbSavedObjectLen(robj *o) {
- int len = rdbSaveObject(NULL,o);
+size_t rdbSavedObjectLen(robj *o) {
+ ssize_t len = rdbSaveObject(NULL,o);
redisAssertWithInfo(NULL,o,len != -1);
return len;
}
@@ -627,6 +692,39 @@ int rdbSaveKeyValuePair(rio *rdb, robj *key, robj *val,
return 1;
}
+/* Save an AUX field. */
+int rdbSaveAuxField(rio *rdb, void *key, size_t keylen, void *val, size_t vallen) {
+ if (rdbSaveType(rdb,REDIS_RDB_OPCODE_AUX) == -1) return -1;
+ if (rdbSaveRawString(rdb,key,keylen) == -1) return -1;
+ if (rdbSaveRawString(rdb,val,vallen) == -1) return -1;
+ return 1;
+}
+
+/* Wrapper for rdbSaveAuxField() used when key/val length can be obtained
+ * with strlen(). */
+int rdbSaveAuxFieldStrStr(rio *rdb, char *key, char *val) {
+ return rdbSaveAuxField(rdb,key,strlen(key),val,strlen(val));
+}
+
+/* Wrapper for strlen(key) + integer type (up to long long range). */
+int rdbSaveAuxFieldStrInt(rio *rdb, char *key, long long val) {
+ char buf[REDIS_LONGSTR_SIZE];
+ int vlen = ll2string(buf,sizeof(buf),val);
+ return rdbSaveAuxField(rdb,key,strlen(key),buf,vlen);
+}
+
+/* Save a few default AUX fields with information about the RDB generated. */
+int rdbSaveInfoAuxFields(rio *rdb) {
+ int redis_bits = (sizeof(void*) == 8) ? 64 : 32;
+
+ /* Add a few fields about the state when the RDB was created. */
+ if (rdbSaveAuxFieldStrStr(rdb,"redis-ver",REDIS_VERSION) == -1) return -1;
+ if (rdbSaveAuxFieldStrInt(rdb,"redis-bits",redis_bits) == -1) return -1;
+ if (rdbSaveAuxFieldStrInt(rdb,"ctime",time(NULL)) == -1) return -1;
+ if (rdbSaveAuxFieldStrInt(rdb,"used-mem",zmalloc_used_memory()) == -1) return -1;
+ return 1;
+}
+
/* Produces a dump of the database in RDB format sending it to the specified
* Redis I/O channel. On success REDIS_OK is returned, otherwise REDIS_ERR
* is returned and part of the output, or all the output, can be
@@ -647,6 +745,7 @@ int rdbSaveRio(rio *rdb, int *error) {
rdb->update_cksum = rioGenericUpdateChecksum;
snprintf(magic,sizeof(magic),"REDIS%04d",REDIS_RDB_VERSION);
if (rdbWriteRaw(rdb,magic,9) == -1) goto werr;
+ if (rdbSaveInfoAuxFields(rdb) == -1) goto werr;
for (j = 0; j < server.dbnum; j++) {
redisDb *db = server.db+j;
@@ -659,6 +758,21 @@ int rdbSaveRio(rio *rdb, int *error) {
if (rdbSaveType(rdb,REDIS_RDB_OPCODE_SELECTDB) == -1) goto werr;
if (rdbSaveLen(rdb,j) == -1) goto werr;
+ /* Write the RESIZE DB opcode. We trim the size to UINT32_MAX, which
+ * is currently the largest type we are able to represent in RDB sizes.
+ * However this does not limit the actual size of the DB to load since
+ * these sizes are just hints to resize the hash tables. */
+ uint32_t db_size, expires_size;
+ db_size = (dictSize(db->dict) <= UINT32_MAX) ?
+ dictSize(db->dict) :
+ UINT32_MAX;
+ expires_size = (dictSize(db->dict) <= UINT32_MAX) ?
+ dictSize(db->expires) :
+ UINT32_MAX;
+ if (rdbSaveType(rdb,REDIS_RDB_OPCODE_RESIZEDB) == -1) goto werr;
+ if (rdbSaveLen(rdb,db_size) == -1) goto werr;
+ if (rdbSaveLen(rdb,expires_size) == -1) goto werr;
+
/* Iterate this DB writing every entry */
while((de = dictNext(di)) != NULL) {
sds keystr = dictGetKey(de);
@@ -720,7 +834,7 @@ int rdbSave(char *filename) {
char tmpfile[256];
FILE *fp;
rio rdb;
- int error;
+ int error = 0;
snprintf(tmpfile,256,"temp-%d.rdb", (int) getpid());
fp = fopen(tmpfile,"w");
@@ -819,7 +933,7 @@ void rdbRemoveTempFile(pid_t childpid) {
/* Load a Redis object of the specified type from the specified file.
* On success a newly allocated object is returned, otherwise NULL. */
robj *rdbLoadObject(int rdbtype, rio *rdb) {
- robj *o, *ele, *dec;
+ robj *o = NULL, *ele, *dec;
size_t len;
unsigned int i;
@@ -831,33 +945,18 @@ robj *rdbLoadObject(int rdbtype, rio *rdb) {
/* Read list value */
if ((len = rdbLoadLen(rdb,NULL)) == REDIS_RDB_LENERR) return NULL;
- /* Use a real list when there are too many entries */
- if (len > server.list_max_ziplist_entries) {
- o = createListObject();
- } else {
- o = createZiplistObject();
- }
+ o = createQuicklistObject();
+ quicklistSetOptions(o->ptr, server.list_max_ziplist_size,
+ server.list_compress_depth);
/* Load every single element of the list */
while(len--) {
if ((ele = rdbLoadEncodedStringObject(rdb)) == NULL) return NULL;
-
- /* If we are using a ziplist and the value is too big, convert
- * the object to a real list. */
- if (o->encoding == REDIS_ENCODING_ZIPLIST &&
- sdsEncodedObject(ele) &&
- sdslen(ele->ptr) > server.list_max_ziplist_value)
- listTypeConvert(o,REDIS_ENCODING_LINKEDLIST);
-
- if (o->encoding == REDIS_ENCODING_ZIPLIST) {
- dec = getDecodedObject(ele);
- o->ptr = ziplistPush(o->ptr,dec->ptr,sdslen(dec->ptr),REDIS_TAIL);
- decrRefCount(dec);
- decrRefCount(ele);
- } else {
- ele = tryObjectEncoding(ele);
- listAddNodeTail(o->ptr,ele);
- }
+ dec = getDecodedObject(ele);
+ size_t len = sdslen(dec->ptr);
+ quicklistPushTail(o->ptr, dec->ptr, len);
+ decrRefCount(dec);
+ decrRefCount(ele);
}
} else if (rdbtype == REDIS_RDB_TYPE_SET) {
/* Read list/set value */
@@ -989,25 +1088,33 @@ robj *rdbLoadObject(int rdbtype, rio *rdb) {
/* Add pair to hash table */
ret = dictAdd((dict*)o->ptr, field, value);
- redisAssert(ret == DICT_OK);
+ if (ret == DICT_ERR) {
+ rdbExitReportCorruptRDB("Duplicate keys detected");
+ }
}
/* All pairs should be read by now */
redisAssert(len == 0);
-
+ } else if (rdbtype == REDIS_RDB_TYPE_LIST_QUICKLIST) {
+ if ((len = rdbLoadLen(rdb,NULL)) == REDIS_RDB_LENERR) return NULL;
+ o = createQuicklistObject();
+ quicklistSetOptions(o->ptr, server.list_max_ziplist_size,
+ server.list_compress_depth);
+
+ while (len--) {
+ unsigned char *zl = rdbGenericLoadStringObject(rdb,RDB_LOAD_PLAIN);
+ if (zl == NULL) return NULL;
+ quicklistAppendZiplist(o->ptr, zl);
+ }
} else if (rdbtype == REDIS_RDB_TYPE_HASH_ZIPMAP ||
rdbtype == REDIS_RDB_TYPE_LIST_ZIPLIST ||
rdbtype == REDIS_RDB_TYPE_SET_INTSET ||
rdbtype == REDIS_RDB_TYPE_ZSET_ZIPLIST ||
rdbtype == REDIS_RDB_TYPE_HASH_ZIPLIST)
{
- robj *aux = rdbLoadStringObject(rdb);
-
- if (aux == NULL) return NULL;
- o = createObject(REDIS_STRING,NULL); /* string is just placeholder */
- o->ptr = zmalloc(sdslen(aux->ptr));
- memcpy(o->ptr,aux->ptr,sdslen(aux->ptr));
- decrRefCount(aux);
+ unsigned char *encoded = rdbGenericLoadStringObject(rdb,RDB_LOAD_PLAIN);
+ if (encoded == NULL) return NULL;
+ o = createObject(REDIS_STRING,encoded); /* Obj type fixed below. */
/* Fix the object encoding, and make sure to convert the encoded
* data type into the base type if accordingly to the current
@@ -1048,8 +1155,7 @@ robj *rdbLoadObject(int rdbtype, rio *rdb) {
case REDIS_RDB_TYPE_LIST_ZIPLIST:
o->type = REDIS_LIST;
o->encoding = REDIS_ENCODING_ZIPLIST;
- if (ziplistLen(o->ptr) > server.list_max_ziplist_entries)
- listTypeConvert(o,REDIS_ENCODING_LINKEDLIST);
+ listTypeConvert(o,REDIS_ENCODING_QUICKLIST);
break;
case REDIS_RDB_TYPE_SET_INTSET:
o->type = REDIS_SET;
@@ -1070,11 +1176,11 @@ robj *rdbLoadObject(int rdbtype, rio *rdb) {
hashTypeConvert(o, REDIS_ENCODING_HT);
break;
default:
- redisPanic("Unknown encoding");
+ rdbExitReportCorruptRDB("Unknown encoding");
break;
}
} else {
- redisPanic("Unknown object type");
+ rdbExitReportCorruptRDB("Unknown object type");
}
return o;
}
@@ -1087,8 +1193,9 @@ void startLoading(FILE *fp) {
/* Load the DB */
server.loading = 1;
server.loading_start_time = time(NULL);
+ server.loading_loaded_bytes = 0;
if (fstat(fileno(fp), &sb) == -1) {
- server.loading_total_bytes = 1; /* just to avoid division by zero */
+ server.loading_total_bytes = 0;
} else {
server.loading_total_bytes = sb.st_size;
}
@@ -1162,7 +1269,12 @@ int rdbLoad(char *filename) {
/* Read type. */
if ((type = rdbLoadType(&rdb)) == -1) goto eoferr;
+
+ /* Handle special types. */
if (type == REDIS_RDB_OPCODE_EXPIRETIME) {
+ /* EXPIRETIME: load an expire associated with the next key
+ * to load. Note that after loading an expire we need to
+ * load the actual type, and continue. */
if ((expiretime = rdbLoadTime(&rdb)) == -1) goto eoferr;
/* We read the time so we need to read the object type again. */
if ((type = rdbLoadType(&rdb)) == -1) goto eoferr;
@@ -1170,27 +1282,67 @@ int rdbLoad(char *filename) {
* into milliseconds. */
expiretime *= 1000;
} else if (type == REDIS_RDB_OPCODE_EXPIRETIME_MS) {
- /* Milliseconds precision expire times introduced with RDB
- * version 3. */
+ /* EXPIRETIME_MS: milliseconds precision expire times introduced
+ * with RDB v3. Like EXPIRETIME but no with more precision. */
if ((expiretime = rdbLoadMillisecondTime(&rdb)) == -1) goto eoferr;
/* We read the time so we need to read the object type again. */
if ((type = rdbLoadType(&rdb)) == -1) goto eoferr;
- }
-
- if (type == REDIS_RDB_OPCODE_EOF)
+ } else if (type == REDIS_RDB_OPCODE_EOF) {
+ /* EOF: End of file, exit the main loop. */
break;
-
- /* Handle SELECT DB opcode as a special case */
- if (type == REDIS_RDB_OPCODE_SELECTDB) {
+ } else if (type == REDIS_RDB_OPCODE_SELECTDB) {
+ /* SELECTDB: Select the specified database. */
if ((dbid = rdbLoadLen(&rdb,NULL)) == REDIS_RDB_LENERR)
goto eoferr;
if (dbid >= (unsigned)server.dbnum) {
- redisLog(REDIS_WARNING,"FATAL: Data file was created with a Redis server configured to handle more than %d databases. Exiting\n", server.dbnum);
+ redisLog(REDIS_WARNING,
+ "FATAL: Data file was created with a Redis "
+ "server configured to handle more than %d "
+ "databases. Exiting\n", server.dbnum);
exit(1);
}
db = server.db+dbid;
- continue;
+ continue; /* Read type again. */
+ } else if (type == REDIS_RDB_OPCODE_RESIZEDB) {
+ /* RESIZEDB: Hint about the size of the keys in the currently
+ * selected data base, in order to avoid useless rehashing. */
+ uint32_t db_size, expires_size;
+ if ((db_size = rdbLoadLen(&rdb,NULL)) == REDIS_RDB_LENERR)
+ goto eoferr;
+ if ((expires_size = rdbLoadLen(&rdb,NULL)) == REDIS_RDB_LENERR)
+ goto eoferr;
+ dictExpand(db->dict,db_size);
+ dictExpand(db->expires,expires_size);
+ continue; /* Read type again. */
+ } else if (type == REDIS_RDB_OPCODE_AUX) {
+ /* AUX: generic string-string fields. Use to add state to RDB
+ * which is backward compatible. Implementations of RDB loading
+ * are requierd to skip AUX fields they don't understand.
+ *
+ * An AUX field is composed of two strings: key and value. */
+ robj *auxkey, *auxval;
+ if ((auxkey = rdbLoadStringObject(&rdb)) == NULL) goto eoferr;
+ if ((auxval = rdbLoadStringObject(&rdb)) == NULL) goto eoferr;
+
+ if (((char*)auxkey->ptr)[0] == '%') {
+ /* All the fields with a name staring with '%' are considered
+ * information fields and are logged at startup with a log
+ * level of NOTICE. */
+ redisLog(REDIS_NOTICE,"RDB '%s': %s",
+ (char*)auxkey->ptr,
+ (char*)auxval->ptr);
+ } else {
+ /* We ignore fields we don't understand, as by AUX field
+ * contract. */
+ redisLog(REDIS_DEBUG,"Unrecognized RDB AUX field: '%s'",
+ (char*)auxkey->ptr);
+ }
+
+ decrRefCount(auxkey);
+ decrRefCount(auxval);
+ continue; /* Read type again. */
}
+
/* Read key */
if ((key = rdbLoadStringObject(&rdb)) == NULL) goto eoferr;
/* Read value */
@@ -1223,7 +1375,7 @@ int rdbLoad(char *filename) {
redisLog(REDIS_WARNING,"RDB file was saved with checksum disabled: no check performed.");
} else if (cksum != expected) {
redisLog(REDIS_WARNING,"Wrong RDB checksum. Aborting now.");
- exit(1);
+ rdbExitReportCorruptRDB("RDB CRC error");
}
}
@@ -1233,7 +1385,7 @@ int rdbLoad(char *filename) {
eoferr: /* unexpected end of file is handled here with a fatal exit */
redisLog(REDIS_WARNING,"Short read or OOM loading DB. Unrecoverable error, aborting now.");
- exit(1);
+ rdbExitReportCorruptRDB("Unexpected EOF reading RDB file");
return REDIS_ERR; /* Just to avoid warning */
}
@@ -1491,7 +1643,9 @@ int rdbSaveToSlavesSockets(void) {
{
retval = REDIS_ERR;
}
+ zfree(msg);
}
+ zfree(clientids);
exitFromChild((retval == REDIS_OK) ? 0 : 1);
} else {
/* Parent */
diff --git a/src/rdb.h b/src/rdb.h
index eb40d4993..a72607b71 100644
--- a/src/rdb.h
+++ b/src/rdb.h
@@ -38,7 +38,7 @@
/* The current RDB version. When the format changes in a way that is no longer
* backward compatible this number gets incremented. */
-#define REDIS_RDB_VERSION 6
+#define REDIS_RDB_VERSION 7
/* Defines related to the dump file format. To store 32 bits lengths for short
* keys requires a lot of space, so we check the most significant 2 bits of
@@ -74,6 +74,7 @@
#define REDIS_RDB_TYPE_SET 2
#define REDIS_RDB_TYPE_ZSET 3
#define REDIS_RDB_TYPE_HASH 4
+/* NOTE: WHEN ADDING NEW RDB TYPE, UPDATE rdbIsObjectType() BELOW */
/* Object types for encoded objects. */
#define REDIS_RDB_TYPE_HASH_ZIPMAP 9
@@ -81,11 +82,15 @@
#define REDIS_RDB_TYPE_SET_INTSET 11
#define REDIS_RDB_TYPE_ZSET_ZIPLIST 12
#define REDIS_RDB_TYPE_HASH_ZIPLIST 13
+#define REDIS_RDB_TYPE_LIST_QUICKLIST 14
+/* NOTE: WHEN ADDING NEW RDB TYPE, UPDATE rdbIsObjectType() BELOW */
/* Test if a type is an object type. */
-#define rdbIsObjectType(t) ((t >= 0 && t <= 4) || (t >= 9 && t <= 13))
+#define rdbIsObjectType(t) ((t >= 0 && t <= 4) || (t >= 9 && t <= 14))
/* Special RDB opcodes (saved/loaded with rdbSaveType/rdbLoadType). */
+#define REDIS_RDB_OPCODE_AUX 250
+#define REDIS_RDB_OPCODE_RESIZEDB 251
#define REDIS_RDB_OPCODE_EXPIRETIME_MS 252
#define REDIS_RDB_OPCODE_EXPIRETIME 253
#define REDIS_RDB_OPCODE_SELECTDB 254
@@ -104,9 +109,8 @@ int rdbSaveBackground(char *filename);
int rdbSaveToSlavesSockets(void);
void rdbRemoveTempFile(pid_t childpid);
int rdbSave(char *filename);
-int rdbSaveObject(rio *rdb, robj *o);
-off_t rdbSavedObjectLen(robj *o);
-off_t rdbSavedObjectPages(robj *o);
+ssize_t rdbSaveObject(rio *rdb, robj *o);
+size_t rdbSavedObjectLen(robj *o);
robj *rdbLoadObject(int type, rio *rdb);
void backgroundSaveDoneHandler(int exitcode, int bysignal);
int rdbSaveKeyValuePair(rio *rdb, robj *key, robj *val, long long expiretime, long long now);
diff --git a/src/redis-benchmark.c b/src/redis-benchmark.c
index 199203812..f735aeb63 100644
--- a/src/redis-benchmark.c
+++ b/src/redis-benchmark.c
@@ -86,7 +86,7 @@ typedef struct _client {
char **randptr; /* Pointers to :rand: strings inside the command buf */
size_t randlen; /* Number of pointers in client->randptr */
size_t randfree; /* Number of unused pointers in client->randptr */
- unsigned int written; /* Bytes of 'obuf' already written */
+ size_t written; /* Bytes of 'obuf' already written */
long long start; /* Start time of a request */
long long latency; /* Request latency */
int pending; /* Number of pending requests (replies to consume) */
@@ -266,7 +266,7 @@ static void writeHandler(aeEventLoop *el, int fd, void *privdata, int mask) {
if (sdslen(c->obuf) > c->written) {
void *ptr = c->obuf+c->written;
- int nwritten = write(c->context->fd,ptr,sdslen(c->obuf)-c->written);
+ ssize_t nwritten = write(c->context->fd,ptr,sdslen(c->obuf)-c->written);
if (nwritten == -1) {
if (errno != EPIPE)
fprintf(stderr, "Writing to socket: %s\n", strerror(errno));
@@ -738,12 +738,24 @@ int main(int argc, const char **argv) {
free(cmd);
}
+ if (test_is_selected("rpush")) {
+ len = redisFormatCommand(&cmd,"RPUSH mylist %s",data);
+ benchmark("RPUSH",cmd,len);
+ free(cmd);
+ }
+
if (test_is_selected("lpop")) {
len = redisFormatCommand(&cmd,"LPOP mylist");
benchmark("LPOP",cmd,len);
free(cmd);
}
+ if (test_is_selected("rpop")) {
+ len = redisFormatCommand(&cmd,"RPOP mylist");
+ benchmark("RPOP",cmd,len);
+ free(cmd);
+ }
+
if (test_is_selected("sadd")) {
len = redisFormatCommand(&cmd,
"SADD myset element:__rand_int__");
diff --git a/src/redis-check-dump.c b/src/redis-check-rdb.c
index 546462001..21f72c222 100644
--- a/src/redis-check-dump.c
+++ b/src/redis-check-rdb.c
@@ -29,74 +29,19 @@
*/
+#include "redis.h"
+#include "rdb.h"
#include <stdlib.h>
#include <stdio.h>
#include <unistd.h>
#include <fcntl.h>
#include <sys/stat.h>
#include <sys/mman.h>
-#include <string.h>
-#include <arpa/inet.h>
-#include <stdint.h>
-#include <limits.h>
#include "lzf.h"
#include "crc64.h"
-/* Object types */
-#define REDIS_STRING 0
-#define REDIS_LIST 1
-#define REDIS_SET 2
-#define REDIS_ZSET 3
-#define REDIS_HASH 4
-#define REDIS_HASH_ZIPMAP 9
-#define REDIS_LIST_ZIPLIST 10
-#define REDIS_SET_INTSET 11
-#define REDIS_ZSET_ZIPLIST 12
-#define REDIS_HASH_ZIPLIST 13
-
-/* Objects encoding. Some kind of objects like Strings and Hashes can be
- * internally represented in multiple ways. The 'encoding' field of the object
- * is set to one of this fields for this object. */
-#define REDIS_ENCODING_RAW 0 /* Raw representation */
-#define REDIS_ENCODING_INT 1 /* Encoded as integer */
-#define REDIS_ENCODING_ZIPMAP 2 /* Encoded as zipmap */
-#define REDIS_ENCODING_HT 3 /* Encoded as a hash table */
-
-/* Object types only used for dumping to disk */
-#define REDIS_EXPIRETIME_MS 252
-#define REDIS_EXPIRETIME 253
-#define REDIS_SELECTDB 254
-#define REDIS_EOF 255
-
-/* Defines related to the dump file format. To store 32 bits lengths for short
- * keys requires a lot of space, so we check the most significant 2 bits of
- * the first byte to interpreter the length:
- *
- * 00|000000 => if the two MSB are 00 the len is the 6 bits of this byte
- * 01|000000 00000000 => 01, the len is 14 byes, 6 bits + 8 bits of next byte
- * 10|000000 [32 bit integer] => if it's 01, a full 32 bit len will follow
- * 11|000000 this means: specially encoded object will follow. The six bits
- * number specify the kind of object that follows.
- * See the REDIS_RDB_ENC_* defines.
- *
- * Lengths up to 63 are stored using a single byte, most DB keys, and may
- * values, will fit inside. */
-#define REDIS_RDB_6BITLEN 0
-#define REDIS_RDB_14BITLEN 1
-#define REDIS_RDB_32BITLEN 2
-#define REDIS_RDB_ENCVAL 3
-#define REDIS_RDB_LENERR UINT_MAX
-
-/* When a length of a string object stored on disk has the first two bits
- * set, the remaining two bits specify a special encoding for the object
- * accordingly to the following defines: */
-#define REDIS_RDB_ENC_INT8 0 /* 8 bit signed integer */
-#define REDIS_RDB_ENC_INT16 1 /* 16 bit signed integer */
-#define REDIS_RDB_ENC_INT32 2 /* 32 bit signed integer */
-#define REDIS_RDB_ENC_LZF 3 /* string compressed with FASTLZ */
-
#define ERROR(...) { \
- printf(__VA_ARGS__); \
+ redisLog(REDIS_WARNING, __VA_ARGS__); \
exit(1); \
}
@@ -133,28 +78,23 @@ typedef struct {
char success;
} entry;
-/* Global vars that are actually used as constants. The following double
- * values are used for double on-disk serialization, and are initialized
- * at runtime to avoid strange compiler optimizations. */
-static double R_Zero, R_PosInf, R_NegInf, R_Nan;
-
#define MAX_TYPES_NUM 256
#define MAX_TYPE_NAME_LEN 16
/* store string types for output */
static char types[MAX_TYPES_NUM][MAX_TYPE_NAME_LEN];
/* Return true if 't' is a valid object type. */
-int checkType(unsigned char t) {
+static int rdbCheckType(unsigned char t) {
/* In case a new object type is added, update the following
* condition as necessary. */
return
- (t >= REDIS_HASH_ZIPMAP && t <= REDIS_HASH_ZIPLIST) ||
- t <= REDIS_HASH ||
- t >= REDIS_EXPIRETIME_MS;
+ (t >= REDIS_RDB_TYPE_HASH_ZIPMAP && t <= REDIS_RDB_TYPE_HASH_ZIPLIST) ||
+ t <= REDIS_RDB_TYPE_HASH ||
+ t >= REDIS_RDB_OPCODE_EXPIRETIME_MS;
}
/* when number of bytes to read is negative, do a peek */
-int readBytes(void *target, long num) {
+static int readBytes(void *target, long num) {
char peek = (num < 0) ? 1 : 0;
num = (num < 0) ? -num : num;
@@ -173,28 +113,28 @@ int processHeader(void) {
int dump_version;
if (!readBytes(buf, 9)) {
- ERROR("Cannot read header\n");
+ ERROR("Cannot read header");
}
/* expect the first 5 bytes to equal REDIS */
if (memcmp(buf,"REDIS",5) != 0) {
- ERROR("Wrong signature in header\n");
+ ERROR("Wrong signature in header");
}
dump_version = (int)strtol(buf + 5, NULL, 10);
if (dump_version < 1 || dump_version > 6) {
- ERROR("Unknown RDB format version: %d\n", dump_version);
+ ERROR("Unknown RDB format version: %d", dump_version);
}
return dump_version;
}
-int loadType(entry *e) {
+static int loadType(entry *e) {
uint32_t offset = CURR_OFFSET;
/* this byte needs to qualify as type */
unsigned char t;
if (readBytes(&t, 1)) {
- if (checkType(t)) {
+ if (rdbCheckType(t)) {
e->type = t;
return 1;
} else {
@@ -208,18 +148,18 @@ int loadType(entry *e) {
return 0;
}
-int peekType() {
+static int peekType() {
unsigned char t;
- if (readBytes(&t, -1) && (checkType(t)))
+ if (readBytes(&t, -1) && (rdbCheckType(t)))
return t;
return -1;
}
/* discard time, just consume the bytes */
-int processTime(int type) {
+static int processTime(int type) {
uint32_t offset = CURR_OFFSET;
unsigned char t[8];
- int timelen = (type == REDIS_EXPIRETIME_MS) ? 8 : 4;
+ int timelen = (type == REDIS_RDB_OPCODE_EXPIRETIME_MS) ? 8 : 4;
if (readBytes(t,timelen)) {
return 1;
@@ -231,7 +171,7 @@ int processTime(int type) {
return 0;
}
-uint32_t loadLength(int *isencoded) {
+static uint32_t loadLength(int *isencoded) {
unsigned char buf[2];
uint32_t len;
int type;
@@ -257,7 +197,7 @@ uint32_t loadLength(int *isencoded) {
}
}
-char *loadIntegerObject(int enctype) {
+static char *loadIntegerObject(int enctype) {
uint32_t offset = CURR_OFFSET;
unsigned char enc[4];
long long val;
@@ -284,36 +224,36 @@ char *loadIntegerObject(int enctype) {
/* convert val into string */
char *buf;
- buf = malloc(sizeof(char) * 128);
+ buf = zmalloc(sizeof(char) * 128);
sprintf(buf, "%lld", val);
return buf;
}
-char* loadLzfStringObject() {
+static char* loadLzfStringObject() {
unsigned int slen, clen;
char *c, *s;
if ((clen = loadLength(NULL)) == REDIS_RDB_LENERR) return NULL;
if ((slen = loadLength(NULL)) == REDIS_RDB_LENERR) return NULL;
- c = malloc(clen);
+ c = zmalloc(clen);
if (!readBytes(c, clen)) {
- free(c);
+ zfree(c);
return NULL;
}
- s = malloc(slen+1);
+ s = zmalloc(slen+1);
if (lzf_decompress(c,clen,s,slen) == 0) {
- free(c); free(s);
+ zfree(c); zfree(s);
return NULL;
}
- free(c);
+ zfree(c);
return s;
}
/* returns NULL when not processable, char* when valid */
-char* loadStringObject() {
+static char* loadStringObject() {
uint32_t offset = CURR_OFFSET;
int isencoded;
uint32_t len;
@@ -336,48 +276,48 @@ char* loadStringObject() {
if (len == REDIS_RDB_LENERR) return NULL;
- char *buf = malloc(sizeof(char) * (len+1));
+ char *buf = zmalloc(sizeof(char) * (len+1));
if (buf == NULL) return NULL;
buf[len] = '\0';
if (!readBytes(buf, len)) {
- free(buf);
+ zfree(buf);
return NULL;
}
return buf;
}
-int processStringObject(char** store) {
+static int processStringObject(char** store) {
unsigned long offset = CURR_OFFSET;
char *key = loadStringObject();
if (key == NULL) {
SHIFT_ERROR(offset, "Error reading string object");
- free(key);
+ zfree(key);
return 0;
}
if (store != NULL) {
*store = key;
} else {
- free(key);
+ zfree(key);
}
return 1;
}
-double* loadDoubleValue() {
+static double* loadDoubleValue() {
char buf[256];
unsigned char len;
double* val;
if (!readBytes(&len,1)) return NULL;
- val = malloc(sizeof(double));
+ val = zmalloc(sizeof(double));
switch(len) {
case 255: *val = R_NegInf; return val;
case 254: *val = R_PosInf; return val;
case 253: *val = R_Nan; return val;
default:
if (!readBytes(buf, len)) {
- free(val);
+ zfree(val);
return NULL;
}
buf[len] = '\0';
@@ -386,24 +326,24 @@ double* loadDoubleValue() {
}
}
-int processDoubleValue(double** store) {
+static int processDoubleValue(double** store) {
unsigned long offset = CURR_OFFSET;
double *val = loadDoubleValue();
if (val == NULL) {
SHIFT_ERROR(offset, "Error reading double value");
- free(val);
+ zfree(val);
return 0;
}
if (store != NULL) {
*store = val;
} else {
- free(val);
+ zfree(val);
}
return 1;
}
-int loadPair(entry *e) {
+static int loadPair(entry *e) {
uint32_t offset = CURR_OFFSET;
uint32_t i;
@@ -417,10 +357,10 @@ int loadPair(entry *e) {
}
uint32_t length = 0;
- if (e->type == REDIS_LIST ||
- e->type == REDIS_SET ||
- e->type == REDIS_ZSET ||
- e->type == REDIS_HASH) {
+ if (e->type == REDIS_RDB_TYPE_LIST ||
+ e->type == REDIS_RDB_TYPE_SET ||
+ e->type == REDIS_RDB_TYPE_ZSET ||
+ e->type == REDIS_RDB_TYPE_HASH) {
if ((length = loadLength(NULL)) == REDIS_RDB_LENERR) {
SHIFT_ERROR(offset, "Error reading %s length", types[e->type]);
return 0;
@@ -428,19 +368,19 @@ int loadPair(entry *e) {
}
switch(e->type) {
- case REDIS_STRING:
- case REDIS_HASH_ZIPMAP:
- case REDIS_LIST_ZIPLIST:
- case REDIS_SET_INTSET:
- case REDIS_ZSET_ZIPLIST:
- case REDIS_HASH_ZIPLIST:
+ case REDIS_RDB_TYPE_STRING:
+ case REDIS_RDB_TYPE_HASH_ZIPMAP:
+ case REDIS_RDB_TYPE_LIST_ZIPLIST:
+ case REDIS_RDB_TYPE_SET_INTSET:
+ case REDIS_RDB_TYPE_ZSET_ZIPLIST:
+ case REDIS_RDB_TYPE_HASH_ZIPLIST:
if (!processStringObject(NULL)) {
SHIFT_ERROR(offset, "Error reading entry value");
return 0;
}
break;
- case REDIS_LIST:
- case REDIS_SET:
+ case REDIS_RDB_TYPE_LIST:
+ case REDIS_RDB_TYPE_SET:
for (i = 0; i < length; i++) {
offset = CURR_OFFSET;
if (!processStringObject(NULL)) {
@@ -449,7 +389,7 @@ int loadPair(entry *e) {
}
}
break;
- case REDIS_ZSET:
+ case REDIS_RDB_TYPE_ZSET:
for (i = 0; i < length; i++) {
offset = CURR_OFFSET;
if (!processStringObject(NULL)) {
@@ -463,7 +403,7 @@ int loadPair(entry *e) {
}
}
break;
- case REDIS_HASH:
+ case REDIS_RDB_TYPE_HASH:
for (i = 0; i < length; i++) {
offset = CURR_OFFSET;
if (!processStringObject(NULL)) {
@@ -486,7 +426,7 @@ int loadPair(entry *e) {
return 1;
}
-entry loadEntry() {
+static entry loadEntry() {
entry e = { NULL, -1, 0 };
uint32_t length, offset[4];
@@ -499,7 +439,7 @@ entry loadEntry() {
}
offset[1] = CURR_OFFSET;
- if (e.type == REDIS_SELECTDB) {
+ if (e.type == REDIS_RDB_OPCODE_SELECTDB) {
if ((length = loadLength(NULL)) == REDIS_RDB_LENERR) {
SHIFT_ERROR(offset[1], "Error reading database number");
return e;
@@ -508,7 +448,7 @@ entry loadEntry() {
SHIFT_ERROR(offset[1], "Database number out of range (%d)", length);
return e;
}
- } else if (e.type == REDIS_EOF) {
+ } else if (e.type == REDIS_RDB_OPCODE_EOF) {
if (positions[level].offset < positions[level].size) {
SHIFT_ERROR(offset[0], "Unexpected EOF");
} else {
@@ -517,8 +457,8 @@ entry loadEntry() {
return e;
} else {
/* optionally consume expire */
- if (e.type == REDIS_EXPIRETIME ||
- e.type == REDIS_EXPIRETIME_MS) {
+ if (e.type == REDIS_RDB_OPCODE_EXPIRETIME ||
+ e.type == REDIS_RDB_OPCODE_EXPIRETIME_MS) {
if (!processTime(e.type)) return e;
if (!loadType(&e)) return e;
}
@@ -544,31 +484,31 @@ entry loadEntry() {
return e;
}
-void printCentered(int indent, int width, char* body) {
+static void printCentered(int indent, int width, char* body) {
char head[256], tail[256];
memset(head, '\0', 256);
memset(tail, '\0', 256);
memset(head, '=', indent);
memset(tail, '=', width - 2 - indent - strlen(body));
- printf("%s %s %s\n", head, body, tail);
+ redisLog(REDIS_WARNING, "%s %s %s", head, body, tail);
}
-void printValid(uint64_t ops, uint64_t bytes) {
+static void printValid(uint64_t ops, uint64_t bytes) {
char body[80];
sprintf(body, "Processed %llu valid opcodes (in %llu bytes)",
(unsigned long long) ops, (unsigned long long) bytes);
printCentered(4, 80, body);
}
-void printSkipped(uint64_t bytes, uint64_t offset) {
+static void printSkipped(uint64_t bytes, uint64_t offset) {
char body[80];
sprintf(body, "Skipped %llu bytes (resuming at 0x%08llx)",
(unsigned long long) bytes, (unsigned long long) offset);
printCentered(4, 80, body);
}
-void printErrorStack(entry *e) {
+static void printErrorStack(entry *e) {
unsigned int i;
char body[64];
@@ -598,20 +538,20 @@ void printErrorStack(entry *e) {
/* display error stack */
for (i = 0; i < errors.level; i++) {
- printf("0x%08lx - %s\n",
+ redisLog(REDIS_WARNING, "0x%08lx - %s",
(unsigned long) errors.offset[i], errors.error[i]);
}
}
void process(void) {
uint64_t num_errors = 0, num_valid_ops = 0, num_valid_bytes = 0;
- entry entry;
+ entry entry = { NULL, -1, 0 };
int dump_version = processHeader();
/* Exclude the final checksum for RDB >= 5. Will be checked at the end. */
if (dump_version >= 5) {
if (positions[0].size < 8) {
- printf("RDB version >= 5 but no room for checksum.\n");
+ redisLog(REDIS_WARNING, "RDB version >= 5 but no room for checksum.");
exit(1);
}
positions[0].size -= 8;
@@ -660,7 +600,7 @@ void process(void) {
/* advance position */
positions[0] = positions[1];
}
- free(entry.key);
+ zfree(entry.key);
}
/* because there is another potential error,
@@ -668,7 +608,7 @@ void process(void) {
printValid(num_valid_ops, num_valid_bytes);
/* expect an eof */
- if (entry.type != REDIS_EOF) {
+ if (entry.type != REDIS_RDB_OPCODE_EOF) {
/* last byte should be EOF, add error */
errors.level = 0;
SHIFT_ERROR(positions[0].offset, "Expected EOF, got %s", types[entry.type]);
@@ -696,47 +636,40 @@ void process(void) {
if (crc != crc2) {
SHIFT_ERROR(positions[0].offset, "RDB CRC64 does not match.");
} else {
- printf("CRC64 checksum is OK\n");
+ redisLog(REDIS_WARNING, "CRC64 checksum is OK");
}
}
/* print summary on errors */
if (num_errors) {
- printf("\n");
- printf("Total unprocessable opcodes: %llu\n",
+ redisLog(REDIS_WARNING, "Total unprocessable opcodes: %llu",
(unsigned long long) num_errors);
}
}
-int main(int argc, char **argv) {
- /* expect the first argument to be the dump file */
- if (argc <= 1) {
- printf("Usage: %s <dump.rdb>\n", argv[0]);
- exit(0);
- }
-
+int redis_check_rdb(char *rdbfilename) {
int fd;
off_t size;
struct stat stat;
void *data;
- fd = open(argv[1], O_RDONLY);
+ fd = open(rdbfilename, O_RDONLY);
if (fd < 1) {
- ERROR("Cannot open file: %s\n", argv[1]);
+ ERROR("Cannot open file: %s", rdbfilename);
}
if (fstat(fd, &stat) == -1) {
- ERROR("Cannot stat: %s\n", argv[1]);
+ ERROR("Cannot stat: %s", rdbfilename);
} else {
size = stat.st_size;
}
if (sizeof(size_t) == sizeof(int32_t) && size >= INT_MAX) {
- ERROR("Cannot check dump files >2GB on a 32-bit platform\n");
+ ERROR("Cannot check dump files >2GB on a 32-bit platform");
}
data = mmap(NULL, size, PROT_READ, MAP_SHARED, fd, 0);
if (data == MAP_FAILED) {
- ERROR("Cannot mmap: %s\n", argv[1]);
+ ERROR("Cannot mmap: %s", rdbfilename);
}
/* Initialize static vars */
@@ -746,22 +679,16 @@ int main(int argc, char **argv) {
errors.level = 0;
/* Object types */
- sprintf(types[REDIS_STRING], "STRING");
- sprintf(types[REDIS_LIST], "LIST");
- sprintf(types[REDIS_SET], "SET");
- sprintf(types[REDIS_ZSET], "ZSET");
- sprintf(types[REDIS_HASH], "HASH");
+ sprintf(types[REDIS_RDB_TYPE_STRING], "STRING");
+ sprintf(types[REDIS_RDB_TYPE_LIST], "LIST");
+ sprintf(types[REDIS_RDB_TYPE_SET], "SET");
+ sprintf(types[REDIS_RDB_TYPE_ZSET], "ZSET");
+ sprintf(types[REDIS_RDB_TYPE_HASH], "HASH");
/* Object types only used for dumping to disk */
- sprintf(types[REDIS_EXPIRETIME], "EXPIRETIME");
- sprintf(types[REDIS_SELECTDB], "SELECTDB");
- sprintf(types[REDIS_EOF], "EOF");
-
- /* Double constants initialization */
- R_Zero = 0.0;
- R_PosInf = 1.0/R_Zero;
- R_NegInf = -1.0/R_Zero;
- R_Nan = R_Zero/R_Zero;
+ sprintf(types[REDIS_RDB_OPCODE_EXPIRETIME], "EXPIRETIME");
+ sprintf(types[REDIS_RDB_OPCODE_SELECTDB], "SELECTDB");
+ sprintf(types[REDIS_RDB_OPCODE_EOF], "EOF");
process();
@@ -769,3 +696,15 @@ int main(int argc, char **argv) {
close(fd);
return 0;
}
+
+/* RDB check main: called form redis.c when Redis is executed with the
+ * redis-check-rdb alias. */
+int redis_check_rdb_main(char **argv, int argc) {
+ if (argc != 2) {
+ fprintf(stderr, "Usage: %s <rdb-file-name>\n", argv[0]);
+ exit(1);
+ }
+ redisLog(REDIS_WARNING, "Checking RDB file %s", argv[1]);
+ exit(redis_check_rdb(argv[1]));
+ return 0;
+}
diff --git a/src/redis-cli.c b/src/redis-cli.c
index 51b4ccc2c..251e42fad 100644
--- a/src/redis-cli.c
+++ b/src/redis-cli.c
@@ -44,6 +44,7 @@
#include <assert.h>
#include <fcntl.h>
#include <limits.h>
+#include <math.h>
#include "hiredis.h"
#include "sds.h"
@@ -63,6 +64,17 @@
#define REDIS_CLI_HISTFILE_ENV "REDISCLI_HISTFILE"
#define REDIS_CLI_HISTFILE_DEFAULT ".rediscli_history"
+/* --latency-dist palettes. */
+int spectrum_palette_color_size = 19;
+int spectrum_palette_color[] = {0,233,234,235,237,239,241,243,245,247,144,143,142,184,226,214,208,202,196};
+
+int spectrum_palette_mono_size = 13;
+int spectrum_palette_mono[] = {0,233,234,235,237,239,241,243,245,247,249,251,253};
+
+/* The actual palette in use. */
+int *spectrum_palette;
+int spectrum_palette_size;
+
static redisContext *context;
static struct config {
char *hostip;
@@ -76,7 +88,10 @@ static struct config {
int monitor_mode;
int pubsub_mode;
int latency_mode;
+ int latency_dist_mode;
int latency_history;
+ int lru_test_mode;
+ long long lru_test_sample_size;
int cluster_mode;
int cluster_reissue_command;
int slave_mode;
@@ -130,9 +145,8 @@ static void cliRefreshPrompt(void) {
len = snprintf(config.prompt,sizeof(config.prompt),"redis %s",
config.hostsocket);
else
- len = snprintf(config.prompt,sizeof(config.prompt),
- strchr(config.hostip,':') ? "[%s]:%d" : "%s:%d",
- config.hostip, config.hostport);
+ len = anetFormatAddr(config.prompt, sizeof(config.prompt),
+ config.hostip, config.hostport);
/* Add [dbnum] if needed */
if (config.dbnum != 0 && config.last_cmd_type != REDIS_REPLY_ERROR)
len += snprintf(config.prompt+len,sizeof(config.prompt)-len,"[%d]",
@@ -519,7 +533,7 @@ static sds cliFormatReplyCSV(redisReply *r) {
out = sdscatrepr(out,r->str,r->len);
break;
case REDIS_REPLY_NIL:
- out = sdscat(out,"NIL\n");
+ out = sdscat(out,"NIL");
break;
case REDIS_REPLY_ARRAY:
for (i = 0; i < r->elements; i++) {
@@ -630,6 +644,9 @@ static int cliSendCommand(int argc, char **argv, int repeat) {
output_raw = 0;
if (!strcasecmp(command,"info") ||
+ (argc == 3 && !strcasecmp(command,"debug") &&
+ (!strcasecmp(argv[1],"jemalloc") &&
+ !strcasecmp(argv[2],"info"))) ||
(argc == 2 && !strcasecmp(command,"cluster") &&
(!strcasecmp(argv[1],"nodes") ||
!strcasecmp(argv[1],"info"))) ||
@@ -698,16 +715,17 @@ static int cliSendCommand(int argc, char **argv, int repeat) {
return REDIS_OK;
}
-/* Send the INFO command, reconnecting the link if needed. */
-static redisReply *reconnectingInfo(void) {
- redisContext *c = context;
+/* Send a command reconnecting the link if needed. */
+static redisReply *reconnectingRedisCommand(redisContext *c, const char *fmt, ...) {
redisReply *reply = NULL;
int tries = 0;
+ va_list ap;
assert(!c->err);
while(reply == NULL) {
while (c->err & (REDIS_ERR_IO | REDIS_ERR_EOF)) {
- printf("Reconnecting (%d)...\r", ++tries);
+ printf("\r\x1b[0K"); /* Cursor to left edge + clear line. */
+ printf("Reconnecting... %d\r", ++tries);
fflush(stdout);
redisFree(c);
@@ -715,12 +733,15 @@ static redisReply *reconnectingInfo(void) {
usleep(1000000);
}
- reply = redisCommand(c,"INFO");
+ va_start(ap,fmt);
+ reply = redisvCommand(c,fmt,ap);
+ va_end(ap);
+
if (c->err && !(c->err & (REDIS_ERR_IO | REDIS_ERR_EOF))) {
fprintf(stderr, "Error: %s\n", c->errstr);
exit(1);
} else if (tries > 0) {
- printf("\n");
+ printf("\r\x1b[0K"); /* Cursor to left edge + clear line. */
}
}
@@ -768,9 +789,17 @@ static int parseOptions(int argc, char **argv) {
config.output = OUTPUT_CSV;
} else if (!strcmp(argv[i],"--latency")) {
config.latency_mode = 1;
+ } else if (!strcmp(argv[i],"--latency-dist")) {
+ config.latency_dist_mode = 1;
+ } else if (!strcmp(argv[i],"--mono")) {
+ spectrum_palette = spectrum_palette_mono;
+ spectrum_palette_size = spectrum_palette_mono_size;
} else if (!strcmp(argv[i],"--latency-history")) {
config.latency_mode = 1;
config.latency_history = 1;
+ } else if (!strcmp(argv[i],"--lru-test") && !lastarg) {
+ config.lru_test_mode = 1;
+ config.lru_test_sample_size = strtoll(argv[++i],NULL,10);
} else if (!strcmp(argv[i],"--slave")) {
config.slave_mode = 1;
} else if (!strcmp(argv[i],"--stat")) {
@@ -860,6 +889,9 @@ static void usage(void) {
" --latency Enter a special mode continuously sampling latency.\n"
" --latency-history Like --latency but tracking latency changes over time.\n"
" Default time interval is 15 sec. Change it using -i.\n"
+" --latency-dist Shows latency as a spectrum, requires xterm 256 colors.\n"
+" Default time interval is 1 sec. Change it using -i.\n"
+" --lru-test <keys> Simulate a cache workload with an 80-20 distribution.\n"
" --slave Simulate a slave showing commands received from the master.\n"
" --rdb <filename> Transfer an RDB dump from remote server to local file.\n"
" --pipe Transfer raw Redis protocol from stdin to server.\n"
@@ -1075,7 +1107,7 @@ static void latencyMode(void) {
if (!context) exit(1);
while(1) {
start = mstime();
- reply = redisCommand(context,"PING");
+ reply = reconnectingRedisCommand(context,"PING");
if (reply == NULL) {
fprintf(stderr,"\nI/O error\n");
exit(1);
@@ -1106,6 +1138,148 @@ static void latencyMode(void) {
}
/*------------------------------------------------------------------------------
+ * Latency distribution mode -- requires 256 colors xterm
+ *--------------------------------------------------------------------------- */
+
+#define LATENCY_DIST_DEFAULT_INTERVAL 1000 /* milliseconds. */
+
+/* Structure to store samples distribution. */
+struct distsamples {
+ long long max; /* Max latency to fit into this interval (usec). */
+ long long count; /* Number of samples in this interval. */
+ int character; /* Associated character in visualization. */
+};
+
+/* Helper function for latencyDistMode(). Performs the spectrum visualization
+ * of the collected samples targeting an xterm 256 terminal.
+ *
+ * Takes an array of distsamples structures, ordered from smaller to bigger
+ * 'max' value. Last sample max must be 0, to mean that it olds all the
+ * samples greater than the previous one, and is also the stop sentinel.
+ *
+ * "tot' is the total number of samples in the different buckets, so it
+ * is the SUM(samples[i].conut) for i to 0 up to the max sample.
+ *
+ * As a side effect the function sets all the buckets count to 0. */
+void showLatencyDistSamples(struct distsamples *samples, long long tot) {
+ int j;
+
+ /* We convert samples into a index inside the palette
+ * proportional to the percentage a given bucket represents.
+ * This way intensity of the different parts of the spectrum
+ * don't change relative to the number of requests, which avoids to
+ * pollute the visualization with non-latency related info. */
+ printf("\033[38;5;0m"); /* Set foreground color to black. */
+ for (j = 0; ; j++) {
+ int coloridx =
+ ceil((float) samples[j].count / tot * (spectrum_palette_size-1));
+ int color = spectrum_palette[coloridx];
+ printf("\033[48;5;%dm%c", (int)color, samples[j].character);
+ samples[j].count = 0;
+ if (samples[j].max == 0) break; /* Last sample. */
+ }
+ printf("\033[0m\n");
+ fflush(stdout);
+}
+
+/* Show the legend: different buckets values and colors meaning, so
+ * that the spectrum is more easily readable. */
+void showLatencyDistLegend(void) {
+ int j;
+
+ printf("---------------------------------------------\n");
+ printf(". - * # .01 .125 .25 .5 milliseconds\n");
+ printf("1,2,3,...,9 from 1 to 9 milliseconds\n");
+ printf("A,B,C,D,E 10,20,30,40,50 milliseconds\n");
+ printf("F,G,H,I,J .1,.2,.3,.4,.5 seconds\n");
+ printf("K,L,M,N,O,P,Q,? 1,2,4,8,16,30,60,>60 seconds\n");
+ printf("From 0 to 100%%: ");
+ for (j = 0; j < spectrum_palette_size; j++) {
+ printf("\033[48;5;%dm ", spectrum_palette[j]);
+ }
+ printf("\033[0m\n");
+ printf("---------------------------------------------\n");
+}
+
+static void latencyDistMode(void) {
+ redisReply *reply;
+ long long start, latency, count = 0;
+ long long history_interval =
+ config.interval ? config.interval/1000 :
+ LATENCY_DIST_DEFAULT_INTERVAL;
+ long long history_start = ustime();
+ int j, outputs = 0;
+
+ struct distsamples samples[] = {
+ /* We use a mostly logarithmic scale, with certain linear intervals
+ * which are more interesting than others, like 1-10 milliseconds
+ * range. */
+ {10,0,'.'}, /* 0.01 ms */
+ {125,0,'-'}, /* 0.125 ms */
+ {250,0,'*'}, /* 0.25 ms */
+ {500,0,'#'}, /* 0.5 ms */
+ {1000,0,'1'}, /* 1 ms */
+ {2000,0,'2'}, /* 2 ms */
+ {3000,0,'3'}, /* 3 ms */
+ {4000,0,'4'}, /* 4 ms */
+ {5000,0,'5'}, /* 5 ms */
+ {6000,0,'6'}, /* 6 ms */
+ {7000,0,'7'}, /* 7 ms */
+ {8000,0,'8'}, /* 8 ms */
+ {9000,0,'9'}, /* 9 ms */
+ {10000,0,'A'}, /* 10 ms */
+ {20000,0,'B'}, /* 20 ms */
+ {30000,0,'C'}, /* 30 ms */
+ {40000,0,'D'}, /* 40 ms */
+ {50000,0,'E'}, /* 50 ms */
+ {100000,0,'F'}, /* 0.1 s */
+ {200000,0,'G'}, /* 0.2 s */
+ {300000,0,'H'}, /* 0.3 s */
+ {400000,0,'I'}, /* 0.4 s */
+ {500000,0,'J'}, /* 0.5 s */
+ {1000000,0,'K'}, /* 1 s */
+ {2000000,0,'L'}, /* 2 s */
+ {4000000,0,'M'}, /* 4 s */
+ {8000000,0,'N'}, /* 8 s */
+ {16000000,0,'O'}, /* 16 s */
+ {30000000,0,'P'}, /* 30 s */
+ {60000000,0,'Q'}, /* 1 minute */
+ {0,0,'?'}, /* > 1 minute */
+ };
+
+ if (!context) exit(1);
+ while(1) {
+ start = ustime();
+ reply = reconnectingRedisCommand(context,"PING");
+ if (reply == NULL) {
+ fprintf(stderr,"\nI/O error\n");
+ exit(1);
+ }
+ latency = ustime()-start;
+ freeReplyObject(reply);
+ count++;
+
+ /* Populate the relevant bucket. */
+ for (j = 0; ; j++) {
+ if (samples[j].max == 0 || latency <= samples[j].max) {
+ samples[j].count++;
+ break;
+ }
+ }
+
+ /* From time to time show the spectrum. */
+ if (count && (ustime()-history_start)/1000 > history_interval) {
+ if ((outputs++ % 20) == 0)
+ showLatencyDistLegend();
+ showLatencyDistSamples(samples,count);
+ history_start = ustime();
+ count = 0;
+ }
+ usleep(LATENCY_SAMPLE_RATE * 1000);
+ }
+}
+
+/*------------------------------------------------------------------------------
* Slave mode
*--------------------------------------------------------------------------- */
@@ -1724,7 +1898,7 @@ static void statMode(void) {
char buf[64];
int j;
- reply = reconnectingInfo();
+ reply = reconnectingRedisCommand(context,"INFO");
if (reply->type == REDIS_REPLY_ERROR) {
printf("ERROR: %s\n", reply->str);
exit(1);
@@ -1778,6 +1952,7 @@ static void statMode(void) {
/* Children */
aux = getLongInfoField(reply->str,"bgsave_in_progress");
aux |= getLongInfoField(reply->str,"aof_rewrite_in_progress") << 1;
+ aux |= getLongInfoField(reply->str,"loading") << 2;
switch(aux) {
case 0: break;
case 1:
@@ -1789,6 +1964,9 @@ static void statMode(void) {
case 3:
printf("SAVE+AOF");
break;
+ case 4:
+ printf("LOAD");
+ break;
}
printf("\n");
@@ -1831,6 +2009,94 @@ static void scanMode(void) {
}
/*------------------------------------------------------------------------------
+ * LRU test mode
+ *--------------------------------------------------------------------------- */
+
+/* Return an integer from min to max (both inclusive) using a power-law
+ * distribution, depending on the value of alpha: the greater the alpha
+ * the more bias towards lower values.
+ *
+ * With alpha = 6.2 the output follows the 80-20 rule where 20% of
+ * the returned numbers will account for 80% of the frequency. */
+long long powerLawRand(long long min, long long max, double alpha) {
+ double pl, r;
+
+ max += 1;
+ r = ((double)rand()) / RAND_MAX;
+ pl = pow(
+ ((pow(max,alpha+1) - pow(min,alpha+1))*r + pow(min,alpha+1)),
+ (1.0/(alpha+1)));
+ return (max-1-(long long)pl)+min;
+}
+
+/* Generates a key name among a set of lru_test_sample_size keys, using
+ * an 80-20 distribution. */
+void LRUTestGenKey(char *buf, size_t buflen) {
+ snprintf(buf, buflen, "lru:%lld\n",
+ powerLawRand(1, config.lru_test_sample_size, 6.2));
+}
+
+#define LRU_CYCLE_PERIOD 1000 /* 1000 milliseconds. */
+#define LRU_CYCLE_PIPELINE_SIZE 250
+static void LRUTestMode(void) {
+ redisReply *reply;
+ char key[128];
+ long long start_cycle;
+ int j;
+
+ srand(time(NULL)^getpid());
+ while(1) {
+ /* Perform cycles of 1 second with 50% writes and 50% reads.
+ * We use pipelining batching writes / reads N times per cycle in order
+ * to fill the target instance easily. */
+ start_cycle = mstime();
+ long long hits = 0, misses = 0;
+ while(mstime() - start_cycle < 1000) {
+ /* Write cycle. */
+ for (j = 0; j < LRU_CYCLE_PIPELINE_SIZE; j++) {
+ LRUTestGenKey(key,sizeof(key));
+ redisAppendCommand(context, "SET %s val",key);
+ }
+ for (j = 0; j < LRU_CYCLE_PIPELINE_SIZE; j++)
+ redisGetReply(context, (void**)&reply);
+
+ /* Read cycle. */
+ for (j = 0; j < LRU_CYCLE_PIPELINE_SIZE; j++) {
+ LRUTestGenKey(key,sizeof(key));
+ redisAppendCommand(context, "GET %s",key);
+ }
+ for (j = 0; j < LRU_CYCLE_PIPELINE_SIZE; j++) {
+ if (redisGetReply(context, (void**)&reply) == REDIS_OK) {
+ switch(reply->type) {
+ case REDIS_REPLY_ERROR:
+ printf("%s\n", reply->str);
+ break;
+ case REDIS_REPLY_NIL:
+ misses++;
+ break;
+ default:
+ hits++;
+ break;
+ }
+ }
+ }
+
+ if (context->err) {
+ fprintf(stderr,"I/O error during LRU test\n");
+ exit(1);
+ }
+ }
+ /* Print stats. */
+ printf(
+ "%lld Gets/sec | Hits: %lld (%.2f%%) | Misses: %lld (%.2f%%)\n",
+ hits+misses,
+ hits, (double)hits/(hits+misses)*100,
+ misses, (double)misses/(hits+misses)*100);
+ }
+ exit(0);
+}
+
+/*------------------------------------------------------------------------------
* Intrisic latency mode.
*
* Measure max latency of a running process that does not result from
@@ -1921,7 +2187,10 @@ int main(int argc, char **argv) {
config.monitor_mode = 0;
config.pubsub_mode = 0;
config.latency_mode = 0;
+ config.latency_dist_mode = 0;
config.latency_history = 0;
+ config.lru_test_mode = 0;
+ config.lru_test_sample_size = 0;
config.cluster_mode = 0;
config.slave_mode = 0;
config.getrdb_mode = 0;
@@ -1938,6 +2207,9 @@ int main(int argc, char **argv) {
config.eval = NULL;
config.last_cmd_type = -1;
+ spectrum_palette = spectrum_palette_color;
+ spectrum_palette_size = spectrum_palette_color_size;
+
if (!isatty(fileno(stdout)) && (getenv("FAKETTY") == NULL))
config.output = OUTPUT_RAW;
else
@@ -1955,6 +2227,12 @@ int main(int argc, char **argv) {
latencyMode();
}
+ /* Latency distribution mode */
+ if (config.latency_dist_mode) {
+ if (cliConnect(0) == REDIS_ERR) exit(1);
+ latencyDistMode();
+ }
+
/* Slave mode */
if (config.slave_mode) {
if (cliConnect(0) == REDIS_ERR) exit(1);
@@ -1992,6 +2270,12 @@ int main(int argc, char **argv) {
scanMode();
}
+ /* LRU test mode */
+ if (config.lru_test_mode) {
+ if (cliConnect(0) == REDIS_ERR) exit(1);
+ LRUTestMode();
+ }
+
/* Intrinsic latency mode */
if (config.intrinsic_latency_mode) intrinsicLatencyMode();
diff --git a/src/redis-trib.rb b/src/redis-trib.rb
index 4002f6309..6002e4caa 100755
--- a/src/redis-trib.rb
+++ b/src/redis-trib.rb
@@ -72,7 +72,7 @@ class ClusterNode
@friends
end
- def slots
+ def slots
@info[:slots]
end
@@ -154,7 +154,7 @@ class ClusterNode
end
} if slots
@dirty = false
- @r.cluster("info").split("\n").each{|e|
+ @r.cluster("info").split("\n").each{|e|
k,v=e.split(":")
k = k.to_sym
v.chop!
@@ -213,7 +213,7 @@ class ClusterNode
#
# Note: this could be easily written without side effects,
# we use 'slots' just to split the computation into steps.
-
+
# First step: we want an increasing array of integers
# for instance: [1,2,3,4,5,8,9,20,21,22,23,24,25,30]
slots = @info[:slots].keys.sort
@@ -273,7 +273,7 @@ class ClusterNode
def info
@info
end
-
+
def is_dirty?
@dirty
end
@@ -540,7 +540,6 @@ class RedisTrib
nodes_count = @nodes.length
masters_count = @nodes.length / (@replicas+1)
masters = []
- slaves = []
# The first step is to split instances by IP. This is useful as
# we'll try to allocate master nodes in different physical machines
@@ -558,16 +557,31 @@ class RedisTrib
# Select master instances
puts "Using #{masters_count} masters:"
- while masters.length < masters_count
- ips.each{|ip,nodes_list|
- next if nodes_list.length == 0
- masters << nodes_list.shift
- puts masters[-1]
- nodes_count -= 1
- break if masters.length == masters_count
- }
+ interleaved = []
+ stop = false
+ while not stop do
+ # Take one node from each IP until we run out of nodes
+ # across every IP.
+ ips.each do |ip,nodes|
+ if nodes.empty?
+ # if this IP has no remaining nodes, check for termination
+ if interleaved.length == nodes_count
+ # stop when 'interleaved' has accumulated all nodes
+ stop = true
+ next
+ end
+ else
+ # else, move one node from this IP to 'interleaved'
+ interleaved.push nodes.shift
+ end
+ end
end
+ masters = interleaved.slice!(0, masters_count)
+ nodes_count -= masters.length
+
+ masters.each{|m| puts m}
+
# Alloc slots on masters
slots_per_node = ClusterHashSlots.to_f / masters_count
first = 0
@@ -594,8 +608,8 @@ class RedisTrib
# all nodes will be used.
assignment_verbose = false
- [:requested,:unused].each{|assign|
- masters.each{|m|
+ [:requested,:unused].each do |assign|
+ masters.each do |m|
assigned_replicas = 0
while assigned_replicas < @replicas
break if nodes_count == 0
@@ -609,21 +623,33 @@ class RedisTrib
"role too (#{nodes_count} remaining)."
end
end
- ips.each{|ip,nodes_list|
- next if nodes_list.length == 0
- # Skip instances with the same IP as the master if we
- # have some more IPs available.
- next if ip == m.info[:host] && nodes_count > nodes_list.length
- slave = nodes_list.shift
- slave.set_as_replica(m.info[:name])
- nodes_count -= 1
- assigned_replicas += 1
- puts "Adding replica #{slave} to #{m}"
- break
- }
+
+ # Return the first node not matching our current master
+ node = interleaved.find{|n| n.info[:host] != m.info[:host]}
+
+ # If we found a node, use it as a best-first match.
+ # Otherwise, we didn't find a node on a different IP, so we
+ # go ahead and use a same-IP replica.
+ if node
+ slave = node
+ interleaved.delete node
+ else
+ slave = interleaved.shift
+ end
+ slave.set_as_replica(m.info[:name])
+ nodes_count -= 1
+ assigned_replicas += 1
+ puts "Adding replica #{slave} to #{m}"
+
+ # If we are in the "assign extra nodes" loop,
+ # we want to assign one extra replica to each
+ # master before repeating masters.
+ # This break lets us assign extra replicas to masters
+ # in a round-robin way.
+ break if assign == :unused
end
- }
- }
+ end
+ end
end
def flush_nodes_config
@@ -763,7 +789,7 @@ class RedisTrib
# Move slots between source and target nodes using MIGRATE.
#
- # Options:
+ # Options:
# :verbose -- Print a dot for every moved key.
# :fix -- We are moving in the context of a fix. Use REPLACE.
# :cold -- Move keys without opening / reconfiguring the nodes.
@@ -1206,7 +1232,7 @@ end
#################################################################################
# Libraries
-#
+#
# We try to don't depend on external libs since this is a critical part
# of Redis Cluster.
#################################################################################
diff --git a/src/redis.c b/src/redis.c
index 030cfbb48..6e546ecc5 100644
--- a/src/redis.c
+++ b/src/redis.c
@@ -53,6 +53,7 @@
#include <sys/resource.h>
#include <sys/utsname.h>
#include <locale.h>
+#include <sys/sysctl.h>
/* Our shared "common" objects */
@@ -161,7 +162,7 @@ struct redisCommand redisCommandTable[] = {
{"smove",smoveCommand,4,"wF",0,NULL,1,2,1,0,0},
{"sismember",sismemberCommand,3,"rF",0,NULL,1,1,1,0,0},
{"scard",scardCommand,2,"rF",0,NULL,1,1,1,0,0},
- {"spop",spopCommand,2,"wRsF",0,NULL,1,1,1,0,0},
+ {"spop",spopCommand,-2,"wRsF",0,NULL,1,1,1,0,0},
{"srandmember",srandmemberCommand,-2,"rR",0,NULL,1,1,1,0,0},
{"sinter",sinterCommand,-2,"rS",0,NULL,1,-1,1,0,0},
{"sinterstore",sinterstoreCommand,-3,"wm",0,NULL,1,-1,1,0,0},
@@ -248,7 +249,7 @@ struct redisCommand redisCommandTable[] = {
{"pttl",pttlCommand,2,"rF",0,NULL,1,1,1,0,0},
{"persist",persistCommand,2,"wF",0,NULL,1,1,1,0,0},
{"slaveof",slaveofCommand,3,"ast",0,NULL,0,0,0,0,0},
- {"role",roleCommand,1,"last",0,NULL,0,0,0,0,0},
+ {"role",roleCommand,1,"lst",0,NULL,0,0,0,0,0},
{"debug",debugCommand,-2,"as",0,NULL,0,0,0,0,0},
{"config",configCommand,-2,"art",0,NULL,0,0,0,0,0},
{"subscribe",subscribeCommand,-2,"rpslt",0,NULL,0,0,0,0,0},
@@ -260,19 +261,19 @@ struct redisCommand redisCommandTable[] = {
{"watch",watchCommand,-2,"rsF",0,NULL,1,-1,1,0,0},
{"unwatch",unwatchCommand,1,"rsF",0,NULL,0,0,0,0,0},
{"cluster",clusterCommand,-2,"ar",0,NULL,0,0,0,0,0},
- {"restore",restoreCommand,-4,"awm",0,NULL,1,1,1,0,0},
- {"restore-asking",restoreCommand,-4,"awmk",0,NULL,1,1,1,0,0},
- {"migrate",migrateCommand,-6,"aw",0,NULL,0,0,0,0,0},
+ {"restore",restoreCommand,-4,"wm",0,NULL,1,1,1,0,0},
+ {"restore-asking",restoreCommand,-4,"wmk",0,NULL,1,1,1,0,0},
+ {"migrate",migrateCommand,-6,"w",0,NULL,0,0,0,0,0},
{"asking",askingCommand,1,"r",0,NULL,0,0,0,0,0},
{"readonly",readonlyCommand,1,"rF",0,NULL,0,0,0,0,0},
{"readwrite",readwriteCommand,1,"rF",0,NULL,0,0,0,0,0},
- {"dump",dumpCommand,2,"ar",0,NULL,1,1,1,0,0},
+ {"dump",dumpCommand,2,"r",0,NULL,1,1,1,0,0},
{"object",objectCommand,3,"r",0,NULL,2,2,2,0,0},
- {"client",clientCommand,-2,"ars",0,NULL,0,0,0,0,0},
+ {"client",clientCommand,-2,"rs",0,NULL,0,0,0,0,0},
{"eval",evalCommand,-3,"s",0,evalGetKeys,0,0,0,0,0},
{"evalsha",evalShaCommand,-3,"s",0,evalGetKeys,0,0,0,0,0},
{"slowlog",slowlogCommand,-2,"r",0,NULL,0,0,0,0,0},
- {"script",scriptCommand,-2,"ras",0,NULL,0,0,0,0,0},
+ {"script",scriptCommand,-2,"rs",0,NULL,0,0,0,0,0},
{"time",timeCommand,1,"rRF",0,NULL,0,0,0,0,0},
{"bitop",bitopCommand,-4,"wm",0,NULL,2,-1,1,0,0},
{"bitcount",bitcountCommand,-2,"r",0,NULL,1,1,1,0,0},
@@ -1415,6 +1416,7 @@ void initServerConfig(void) {
server.syslog_facility = LOG_LOCAL0;
server.daemonize = REDIS_DEFAULT_DAEMONIZE;
server.supervised = 0;
+ server.supervised_mode = REDIS_SUPERVISED_NONE;
server.aof_state = REDIS_AOF_OFF;
server.aof_fsync = REDIS_DEFAULT_AOF_FSYNC;
server.aof_no_fsync_on_rewrite = REDIS_DEFAULT_AOF_NO_FSYNC_ON_REWRITE;
@@ -1432,7 +1434,7 @@ void initServerConfig(void) {
server.aof_flush_postponed_start = 0;
server.aof_rewrite_incremental_fsync = REDIS_DEFAULT_AOF_REWRITE_INCREMENTAL_FSYNC;
server.aof_load_truncated = REDIS_DEFAULT_AOF_LOAD_TRUNCATED;
- server.pidfile = zstrdup(REDIS_DEFAULT_PID_FILE);
+ server.pidfile = NULL;
server.rdb_filename = zstrdup(REDIS_DEFAULT_RDB_FILENAME);
server.aof_filename = zstrdup(REDIS_DEFAULT_AOF_FILENAME);
server.requirepass = NULL;
@@ -1448,8 +1450,8 @@ void initServerConfig(void) {
server.maxmemory_samples = REDIS_DEFAULT_MAXMEMORY_SAMPLES;
server.hash_max_ziplist_entries = REDIS_HASH_MAX_ZIPLIST_ENTRIES;
server.hash_max_ziplist_value = REDIS_HASH_MAX_ZIPLIST_VALUE;
- server.list_max_ziplist_entries = REDIS_LIST_MAX_ZIPLIST_ENTRIES;
- server.list_max_ziplist_value = REDIS_LIST_MAX_ZIPLIST_VALUE;
+ server.list_max_ziplist_size = REDIS_LIST_MAX_ZIPLIST_SIZE;
+ server.list_compress_depth = REDIS_LIST_COMPRESS_DEPTH;
server.set_max_intset_entries = REDIS_SET_MAX_INTSET_ENTRIES;
server.zset_max_ziplist_entries = REDIS_ZSET_MAX_ZIPLIST_ENTRIES;
server.zset_max_ziplist_value = REDIS_ZSET_MAX_ZIPLIST_VALUE;
@@ -1527,6 +1529,7 @@ void initServerConfig(void) {
server.lpushCommand = lookupCommandByCString("lpush");
server.lpopCommand = lookupCommandByCString("lpop");
server.rpopCommand = lookupCommandByCString("rpop");
+ server.sremCommand = lookupCommandByCString("srem");
/* Slow log */
server.slowlog_log_slower_than = REDIS_SLOWLOG_LOG_SLOWER_THAN;
@@ -1565,33 +1568,33 @@ void adjustOpenFilesLimit(void) {
/* Set the max number of files if the current limit is not enough
* for our needs. */
if (oldlimit < maxfiles) {
- rlim_t f;
+ rlim_t bestlimit;
int setrlimit_error = 0;
/* Try to set the file limit to match 'maxfiles' or at least
* to the higher value supported less than maxfiles. */
- f = maxfiles;
- while(f > oldlimit) {
+ bestlimit = maxfiles;
+ while(bestlimit > oldlimit) {
rlim_t decr_step = 16;
- limit.rlim_cur = f;
- limit.rlim_max = f;
+ limit.rlim_cur = bestlimit;
+ limit.rlim_max = bestlimit;
if (setrlimit(RLIMIT_NOFILE,&limit) != -1) break;
setrlimit_error = errno;
- /* We failed to set file limit to 'f'. Try with a
+ /* We failed to set file limit to 'bestlimit'. Try with a
* smaller limit decrementing by a few FDs per iteration. */
- if (f < decr_step) break;
- f -= decr_step;
+ if (bestlimit < decr_step) break;
+ bestlimit -= decr_step;
}
/* Assume that the limit we get initially is still valid if
* our last try was even lower. */
- if (f < oldlimit) f = oldlimit;
+ if (bestlimit < oldlimit) bestlimit = oldlimit;
- if (f != maxfiles) {
+ if (bestlimit < maxfiles) {
int old_maxclients = server.maxclients;
- server.maxclients = f-REDIS_MIN_RESERVED_FDS;
+ server.maxclients = bestlimit-REDIS_MIN_RESERVED_FDS;
if (server.maxclients < 1) {
redisLog(REDIS_WARNING,"Your current 'ulimit -n' "
"of %llu is not enough for Redis to start. "
@@ -1612,7 +1615,7 @@ void adjustOpenFilesLimit(void) {
"maxclients has been reduced to %d to compensate for "
"low ulimit. "
"If you need higher maxclients increase 'ulimit -n'.",
- (unsigned long long) oldlimit, server.maxclients);
+ (unsigned long long) bestlimit, server.maxclients);
} else {
redisLog(REDIS_NOTICE,"Increased maximum number of open files "
"to %llu (it was originally set to %llu).",
@@ -1759,6 +1762,7 @@ void initServer(void) {
server.clients_waiting_acks = listCreate();
server.get_ack_from_slaves = 0;
server.clients_paused = 0;
+ server.system_memory_size = zmalloc_get_memory_size();
createSharedObjects();
adjustOpenFilesLimit();
@@ -1998,6 +2002,9 @@ struct redisCommand *lookupCommandOrOriginal(sds name) {
* + REDIS_PROPAGATE_NONE (no propagation of command at all)
* + REDIS_PROPAGATE_AOF (propagate into the AOF file if is enabled)
* + REDIS_PROPAGATE_REPL (propagate into the replication link)
+ *
+ * This should not be used inside commands implementation. Use instead
+ * alsoPropagate(), preventCommandPropagation(), forceCommandPropagation().
*/
void propagate(struct redisCommand *cmd, int dbid, robj **argv, int argc,
int flags)
@@ -2009,11 +2016,31 @@ void propagate(struct redisCommand *cmd, int dbid, robj **argv, int argc,
}
/* Used inside commands to schedule the propagation of additional commands
- * after the current command is propagated to AOF / Replication. */
+ * after the current command is propagated to AOF / Replication.
+ *
+ * 'cmd' must be a pointer to the Redis command to replicate, dbid is the
+ * database ID the command should be propagated into.
+ * Arguments of the command to propagte are passed as an array of redis
+ * objects pointers of len 'argc', using the 'argv' vector.
+ *
+ * The function does not take a reference to the passed 'argv' vector,
+ * so it is up to the caller to release the passed argv (but it is usually
+ * stack allocated). The function autoamtically increments ref count of
+ * passed objects, so the caller does not need to. */
void alsoPropagate(struct redisCommand *cmd, int dbid, robj **argv, int argc,
int target)
{
- redisOpArrayAppend(&server.also_propagate,cmd,dbid,argv,argc,target);
+ robj **argvcopy;
+ int j;
+
+ if (server.loading) return; /* No propagation during loading. */
+
+ argvcopy = zmalloc(sizeof(robj*)*argc);
+ for (j = 0; j < argc; j++) {
+ argvcopy[j] = argv[j];
+ incrRefCount(argv[j]);
+ }
+ redisOpArrayAppend(&server.also_propagate,cmd,dbid,argvcopy,argc,target);
}
/* It is possible to call the function forceCommandPropagation() inside a
@@ -2024,6 +2051,13 @@ void forceCommandPropagation(redisClient *c, int flags) {
if (flags & REDIS_PROPAGATE_AOF) c->flags |= REDIS_FORCE_AOF;
}
+/* Avoid that the executed command is propagated at all. This way we
+ * are free to just propagate what we want using the alsoPropagate()
+ * API. */
+void preventCommandPropagation(redisClient *c) {
+ c->flags |= REDIS_PREVENT_PROP;
+}
+
/* Call() is the core of Redis execution of a command */
void call(redisClient *c, int flags) {
long long dirty, start, duration;
@@ -2033,7 +2067,7 @@ void call(redisClient *c, int flags) {
* not generated from reading an AOF. */
if (listLength(server.monitors) &&
!server.loading &&
- !(c->cmd->flags & REDIS_CMD_SKIP_MONITOR))
+ !(c->cmd->flags & (REDIS_CMD_SKIP_MONITOR|REDIS_CMD_ADMIN)))
{
replicationFeedMonitors(c,server.monitors,c->db->id,c->argv,c->argc);
}
@@ -2077,7 +2111,7 @@ void call(redisClient *c, int flags) {
}
/* Propagate the command into the AOF and replication link */
- if (flags & REDIS_CALL_PROPAGATE) {
+ if (flags & REDIS_CALL_PROPAGATE && (c->flags & REDIS_PREVENT_PROP) == 0) {
int flags = REDIS_PROPAGATE_NONE;
if (c->flags & REDIS_FORCE_REPL) flags |= REDIS_PROPAGATE_REPL;
@@ -2088,20 +2122,24 @@ void call(redisClient *c, int flags) {
propagate(c->cmd,c->db->id,c->argv,c->argc,flags);
}
- /* Restore the old FORCE_AOF/REPL flags, since call can be executed
+ /* Restore the old replication flags, since call can be executed
* recursively. */
- c->flags &= ~(REDIS_FORCE_AOF|REDIS_FORCE_REPL);
- c->flags |= client_old_flags & (REDIS_FORCE_AOF|REDIS_FORCE_REPL);
+ c->flags &= ~(REDIS_FORCE_AOF|REDIS_FORCE_REPL|REDIS_PREVENT_PROP);
+ c->flags |= client_old_flags &
+ (REDIS_FORCE_AOF|REDIS_FORCE_REPL|REDIS_PREVENT_PROP);
/* Handle the alsoPropagate() API to handle commands that want to propagate
- * multiple separated commands. */
+ * multiple separated commands. Note that alsoPropagate() is not affected
+ * by REDIS_PREVENT_PROP flag. */
if (server.also_propagate.numops) {
int j;
redisOp *rop;
- for (j = 0; j < server.also_propagate.numops; j++) {
- rop = &server.also_propagate.ops[j];
- propagate(rop->cmd, rop->dbid, rop->argv, rop->argc, rop->target);
+ if (flags & REDIS_CALL_PROPAGATE) {
+ for (j = 0; j < server.also_propagate.numops; j++) {
+ rop = &server.also_propagate.ops[j];
+ propagate(rop->cmd,rop->dbid,rop->argv,rop->argc,rop->target);
+ }
}
redisOpArrayFree(&server.also_propagate);
}
@@ -2372,7 +2410,7 @@ int prepareForShutdown(int flags) {
return REDIS_ERR;
}
}
- if (server.daemonize) {
+ if (server.daemonize || server.pidfile) {
redisLog(REDIS_NOTICE,"Removing the pid file.");
unlink(server.pidfile);
}
@@ -2479,7 +2517,6 @@ void timeCommand(redisClient *c) {
addReplyBulkLongLong(c,tv.tv_usec);
}
-
/* Helper function for addReplyCommand() to output flags. */
int addReplyCommandFlag(redisClient *c, struct redisCommand *cmd, int f, char *reply) {
if (cmd->flags & f) {
@@ -2698,7 +2735,14 @@ sds genRedisInfoString(char *section) {
if (allsections || defsections || !strcasecmp(section,"memory")) {
char hmem[64];
char peak_hmem[64];
+ char total_system_hmem[64];
+ char used_memory_lua_hmem[64];
+ char used_memory_rss_hmem[64];
+ char maxmemory_hmem[64];
size_t zmalloc_used = zmalloc_used_memory();
+ size_t total_system_mem = server.system_memory_size;
+ char *evict_policy = maxmemoryToString();
+ long long memory_lua = (long long)lua_gc(server.lua,LUA_GCCOUNT,0)*1024;
/* Peak memory is updated from time to time by serverCron() so it
* may happen that the instantaneous value is slightly bigger than
@@ -2709,23 +2753,42 @@ sds genRedisInfoString(char *section) {
bytesToHuman(hmem,zmalloc_used);
bytesToHuman(peak_hmem,server.stat_peak_memory);
+ bytesToHuman(total_system_hmem,total_system_mem);
+ bytesToHuman(used_memory_lua_hmem,memory_lua);
+ bytesToHuman(used_memory_rss_hmem,server.resident_set_size);
+ bytesToHuman(maxmemory_hmem,server.maxmemory);
+
if (sections++) info = sdscat(info,"\r\n");
info = sdscatprintf(info,
"# Memory\r\n"
"used_memory:%zu\r\n"
"used_memory_human:%s\r\n"
"used_memory_rss:%zu\r\n"
+ "used_memory_rss_human:%s\r\n"
"used_memory_peak:%zu\r\n"
"used_memory_peak_human:%s\r\n"
+ "total_system_memory:%lu\r\n"
+ "total_system_memory_human:%s\r\n"
"used_memory_lua:%lld\r\n"
+ "used_memory_lua_human:%s\r\n"
+ "maxmemory:%lld\r\n"
+ "maxmemory_human:%s\r\n"
+ "maxmemory_policy:%s\r\n"
"mem_fragmentation_ratio:%.2f\r\n"
"mem_allocator:%s\r\n",
zmalloc_used,
hmem,
server.resident_set_size,
+ used_memory_rss_hmem,
server.stat_peak_memory,
peak_hmem,
- ((long long)lua_gc(server.lua,LUA_GCCOUNT,0))*1024LL,
+ (unsigned long)total_system_mem,
+ total_system_hmem,
+ memory_lua,
+ used_memory_lua_hmem,
+ server.maxmemory,
+ maxmemory_hmem,
+ evict_policy,
zmalloc_get_fragmentation_ratio(server.resident_set_size),
ZMALLOC_LIB
);
@@ -2792,14 +2855,14 @@ sds genRedisInfoString(char *section) {
server.loading_loaded_bytes;
perc = ((double)server.loading_loaded_bytes /
- server.loading_total_bytes) * 100;
+ (server.loading_total_bytes+1)) * 100;
- elapsed = server.unixtime-server.loading_start_time;
+ elapsed = time(NULL)-server.loading_start_time;
if (elapsed == 0) {
eta = 1; /* A fake 1 second figure if we don't have
enough info */
} else {
- eta = (elapsed*remaining_bytes)/server.loading_loaded_bytes;
+ eta = (elapsed*remaining_bytes)/(server.loading_loaded_bytes+1);
}
info = sdscatprintf(info,
@@ -3046,11 +3109,7 @@ void infoCommand(redisClient *c) {
addReply(c,shared.syntaxerr);
return;
}
- sds info = genRedisInfoString(section);
- addReplySds(c,sdscatprintf(sdsempty(),"$%lu\r\n",
- (unsigned long)sdslen(info)));
- addReplySds(c,info);
- addReply(c,shared.crlf);
+ addReplyBulkSds(c, genRedisInfoString(section));
}
void monitorCommand(redisClient *c) {
@@ -3138,13 +3197,7 @@ void evictionPoolPopulate(dict *sampledict, dict *keydict, struct evictionPoolEn
samples = zmalloc(sizeof(samples[0])*server.maxmemory_samples);
}
-#if 1 /* Use bulk get by default. */
- count = dictGetRandomKeys(sampledict,samples,server.maxmemory_samples);
-#else
- count = server.maxmemory_samples;
- for (j = 0; j < count; j++) samples[j] = dictGetRandomKey(sampledict);
-#endif
-
+ count = dictGetSomeKeys(sampledict,samples,server.maxmemory_samples);
for (j = 0; j < count; j++) {
unsigned long long idle;
sds key;
@@ -3199,7 +3252,7 @@ void evictionPoolPopulate(dict *sampledict, dict *keydict, struct evictionPoolEn
int freeMemoryIfNeeded(void) {
size_t mem_used, mem_tofree, mem_freed;
int slaves = listLength(server.slaves);
- mstime_t latency;
+ mstime_t latency, eviction_latency;
/* Remove the size of slaves output buffers and AOF buffer from the
* count of used memory. */
@@ -3330,7 +3383,11 @@ int freeMemoryIfNeeded(void) {
* AOF and Output buffer memory will be freed eventually so
* we only care about memory used by the key space. */
delta = (long long) zmalloc_used_memory();
+ latencyStartMonitor(eviction_latency);
dbDelete(db,keyobj);
+ latencyEndMonitor(eviction_latency);
+ latencyAddSampleIfNeeded("eviction-del",eviction_latency);
+ latencyRemoveNestedEvent(latency,eviction_latency);
delta -= (long long) zmalloc_used_memory();
mem_freed += delta;
server.stat_evictedkeys++;
@@ -3385,6 +3442,10 @@ void linuxMemoryWarnings(void) {
#endif /* __linux__ */
void createPidFile(void) {
+ /* If pidfile requested, but no pidfile defined, use
+ * default pidfile path */
+ if (!server.pidfile) server.pidfile = zstrdup(REDIS_DEFAULT_PID_FILE);
+
/* Try to write the pid file in a best-effort way. */
FILE *fp = fopen(server.pidfile,"w");
if (fp) {
@@ -3579,8 +3640,23 @@ void redisSetProcTitle(char *title) {
/*
* Check whether systemd or upstart have been used to start redis.
*/
-int redisIsSupervised(void) {
+
+int redisSupervisedUpstart(void) {
const char *upstart_job = getenv("UPSTART_JOB");
+
+ if (!upstart_job) {
+ redisLog(REDIS_WARNING,
+ "upstart supervision requested, but UPSTART_JOB not found");
+ return 0;
+ }
+
+ redisLog(REDIS_NOTICE, "supervised by upstart, will stop to signal readyness");
+ raise(SIGSTOP);
+ unsetenv("UPSTART_JOB");
+ return 1;
+}
+
+int redisSupervisedSystemd(void) {
const char *notify_socket = getenv("NOTIFY_SOCKET");
int fd = 1;
struct sockaddr_un su;
@@ -3588,31 +3664,24 @@ int redisIsSupervised(void) {
struct msghdr hdr;
int sendto_flags = 0;
- if (upstart_job == NULL && notify_socket == NULL)
+ if (!notify_socket) {
+ redisLog(REDIS_WARNING,
+ "systemd supervision requested, but NOTIFY_SOCKET not found");
return 0;
-
- if (upstart_job != NULL) {
- redisLog(REDIS_NOTICE, "supervised by upstart, will stop to signal readyness");
- raise(SIGSTOP);
- unsetenv("UPSTART_JOB");
-
- return 1;
}
- /*
- * If we got here, we're supervised by systemd.
- */
- if ((strchr("@/", notify_socket[0])) == NULL ||
- strlen(notify_socket) < 2)
+ if ((strchr("@/", notify_socket[0])) == NULL || strlen(notify_socket) < 2) {
return 0;
+ }
redisLog(REDIS_NOTICE, "supervised by systemd, will signal readyness");
- if ((fd = socket(AF_UNIX, SOCK_DGRAM, 0)) < 0) {
- redisLog(REDIS_WARNING, "cannot contact systemd socket %s", notify_socket);
+ if ((fd = socket(AF_UNIX, SOCK_DGRAM, 0)) == -1) {
+ redisLog(REDIS_WARNING,
+ "Can't connect to systemd socket %s", notify_socket);
return 0;
}
- bzero(&su, sizeof(su));
+ memset(&su, 0, sizeof(su));
su.sun_family = AF_UNIX;
strncpy (su.sun_path, notify_socket, sizeof(su.sun_path) -1);
su.sun_path[sizeof(su.sun_path) - 1] = '\0';
@@ -3620,11 +3689,11 @@ int redisIsSupervised(void) {
if (notify_socket[0] == '@')
su.sun_path[0] = '\0';
- bzero(&iov, sizeof(iov));
+ memset(&iov, 0, sizeof(iov));
iov.iov_base = "READY=1";
iov.iov_len = strlen("READY=1");
- bzero(&hdr, sizeof(hdr));
+ memset(&hdr, 0, sizeof(hdr));
hdr.msg_name = &su;
hdr.msg_namelen = offsetof(struct sockaddr_un, sun_path) +
strlen(notify_socket);
@@ -3636,7 +3705,7 @@ int redisIsSupervised(void) {
sendto_flags |= MSG_NOSIGNAL;
#endif
if (sendmsg(fd, &hdr, sendto_flags) < 0) {
- redisLog(REDIS_WARNING, "Cannot send notification to systemd");
+ redisLog(REDIS_WARNING, "Can't send notification to systemd");
close(fd);
return 0;
}
@@ -3644,9 +3713,55 @@ int redisIsSupervised(void) {
return 1;
}
+int redisIsSupervised(int mode) {
+ if (mode == REDIS_SUPERVISED_AUTODETECT) {
+ const char *upstart_job = getenv("UPSTART_JOB");
+ const char *notify_socket = getenv("NOTIFY_SOCKET");
+
+ if (upstart_job) {
+ redisSupervisedUpstart();
+ } else if (notify_socket) {
+ redisSupervisedSystemd();
+ }
+ } else if (mode == REDIS_SUPERVISED_UPSTART) {
+ return redisSupervisedUpstart();
+ } else if (mode == REDIS_SUPERVISED_SYSTEMD) {
+ return redisSupervisedSystemd();
+ }
+
+ return 0;
+}
+
+
int main(int argc, char **argv) {
struct timeval tv;
+#ifdef REDIS_TEST
+ if (argc == 3 && !strcasecmp(argv[1], "test")) {
+ if (!strcasecmp(argv[2], "ziplist")) {
+ return ziplistTest(argc, argv);
+ } else if (!strcasecmp(argv[2], "quicklist")) {
+ quicklistTest(argc, argv);
+ } else if (!strcasecmp(argv[2], "intset")) {
+ return intsetTest(argc, argv);
+ } else if (!strcasecmp(argv[2], "zipmap")) {
+ return zipmapTest(argc, argv);
+ } else if (!strcasecmp(argv[2], "sha1test")) {
+ return sha1Test(argc, argv);
+ } else if (!strcasecmp(argv[2], "util")) {
+ return utilTest(argc, argv);
+ } else if (!strcasecmp(argv[2], "sds")) {
+ return sdsTest(argc, argv);
+ } else if (!strcasecmp(argv[2], "endianconv")) {
+ return endianconvTest(argc, argv);
+ } else if (!strcasecmp(argv[2], "crc64")) {
+ return crc64Test(argc, argv);
+ }
+
+ return -1; /* test not found */
+ }
+#endif
+
/* We need to initialize our libraries, and the server configuration. */
#ifdef INIT_SETPROCTITLE_REPLACEMENT
spt_init(argc, argv);
@@ -3668,6 +3783,12 @@ int main(int argc, char **argv) {
initSentinel();
}
+ /* Check if we need to start in redis-check-rdb mode. We just execute
+ * the program main. However the program is part of the Redis executable
+ * so that we can easily execute an RDB check on loading errors. */
+ if (strstr(argv[0],"redis-check-rdb") != NULL)
+ exit(redis_check_rdb_main(argv,argc));
+
if (argc >= 2) {
int j = 1; /* First option to parse in argv[] */
sds options = sdsempty();
@@ -3699,6 +3820,11 @@ int main(int argc, char **argv) {
while(j != argc) {
if (argv[j][0] == '-' && argv[j][1] == '-') {
/* Option name */
+ if (!strcmp(argv[j], "--check-rdb")) {
+ /* Argument has no options, need to skip for parsing. */
+ j++;
+ continue;
+ }
if (sdslen(options)) options = sdscat(options,"\n");
options = sdscat(options,argv[j]+2);
options = sdscat(options," ");
@@ -3724,10 +3850,12 @@ int main(int argc, char **argv) {
redisLog(REDIS_WARNING, "Warning: no config file specified, using the default config. In order to specify a config file use %s /path/to/%s.conf", argv[0], server.sentinel_mode ? "sentinel" : "redis");
}
- server.supervised = redisIsSupervised();
- if (server.daemonize && server.supervised == 0) daemonize();
+ server.supervised = redisIsSupervised(server.supervised_mode);
+ int background = server.daemonize && !server.supervised;
+ if (background) daemonize();
+
initServer();
- if (server.daemonize && server.supervised == 0) createPidFile();
+ if (background || server.pidfile) createPidFile();
redisSetProcTitle(argv[0]);
redisAsciiArt();
diff --git a/src/redis.h b/src/redis.h
index 88cbe2f94..e248d2901 100644
--- a/src/redis.h
+++ b/src/redis.h
@@ -32,10 +32,7 @@
#include "fmacros.h"
#include "config.h"
-
-#if defined(__sun)
#include "solarisfixes.h"
-#endif
#include <stdio.h>
#include <stdlib.h>
@@ -65,6 +62,13 @@ typedef long long mstime_t; /* millisecond time type. */
#include "util.h" /* Misc functions useful in many places */
#include "latency.h" /* Latency monitor API */
#include "sparkline.h" /* ASII graphs API */
+#include "quicklist.h"
+
+/* Following includes allow test functions to be called from Redis main() */
+#include "zipmap.h"
+#include "sha1.h"
+#include "endianconv.h"
+#include "crc64.h"
/* Error codes */
#define REDIS_OK 0
@@ -127,7 +131,7 @@ typedef long long mstime_t; /* millisecond time type. */
#define REDIS_DEFAULT_AOF_REWRITE_INCREMENTAL_FSYNC 1
#define REDIS_DEFAULT_MIN_SLAVES_TO_WRITE 0
#define REDIS_DEFAULT_MIN_SLAVES_MAX_LAG 10
-#define REDIS_IP_STR_LEN INET6_ADDRSTRLEN
+#define REDIS_IP_STR_LEN 46 /* INET6_ADDRSTRLEN is 46, but we need to be sure */
#define REDIS_PEER_ID_LEN (REDIS_IP_STR_LEN+32) /* Must be enough for ip:port */
#define REDIS_BINDADDR_MAX 16
#define REDIS_MIN_RESERVED_FDS 32
@@ -198,6 +202,7 @@ typedef long long mstime_t; /* millisecond time type. */
#define REDIS_ENCODING_INTSET 6 /* Encoded as intset */
#define REDIS_ENCODING_SKIPLIST 7 /* Encoded as skiplist */
#define REDIS_ENCODING_EMBSTR 8 /* Embedded sds string encoding */
+#define REDIS_ENCODING_QUICKLIST 9 /* Encoded as linked list of ziplists */
/* Defines related to the dump file format. To store 32 bits lengths for short
* keys requires a lot of space, so we check the most significant 2 bits of
@@ -252,6 +257,7 @@ typedef long long mstime_t; /* millisecond time type. */
#define REDIS_PRE_PSYNC (1<<16) /* Instance don't understand PSYNC. */
#define REDIS_READONLY (1<<17) /* Cluster client is in read-only state. */
#define REDIS_PUBSUB (1<<18) /* Client is in Pub/Sub mode. */
+#define REDIS_PREVENT_PROP (1<<19) /* Don't propagate to AOF / Slaves. */
/* Client block type (btype field in client structure)
* if REDIS_BLOCKED flag is set. */
@@ -308,6 +314,12 @@ typedef long long mstime_t; /* millisecond time type. */
#define REDIS_LOG_RAW (1<<10) /* Modifier to log without timestamp */
#define REDIS_DEFAULT_VERBOSITY REDIS_NOTICE
+/* Supervision options */
+#define REDIS_SUPERVISED_NONE 0
+#define REDIS_SUPERVISED_AUTODETECT 1
+#define REDIS_SUPERVISED_SYSTEMD 2
+#define REDIS_SUPERVISED_UPSTART 3
+
/* Anti-warning macro... */
#define REDIS_NOTUSED(V) ((void) V)
@@ -323,12 +335,14 @@ typedef long long mstime_t; /* millisecond time type. */
/* Zip structure related defaults */
#define REDIS_HASH_MAX_ZIPLIST_ENTRIES 512
#define REDIS_HASH_MAX_ZIPLIST_VALUE 64
-#define REDIS_LIST_MAX_ZIPLIST_ENTRIES 512
-#define REDIS_LIST_MAX_ZIPLIST_VALUE 64
#define REDIS_SET_MAX_INTSET_ENTRIES 512
#define REDIS_ZSET_MAX_ZIPLIST_ENTRIES 128
#define REDIS_ZSET_MAX_ZIPLIST_VALUE 64
+/* List defaults */
+#define REDIS_LIST_MAX_ZIPLIST_SIZE -2
+#define REDIS_LIST_COMPRESS_DEPTH 0
+
/* HyperLogLog defines */
#define REDIS_DEFAULT_HLL_SPARSE_MAX_BYTES 3000
@@ -529,8 +543,8 @@ typedef struct redisClient {
int multibulklen; /* number of multi bulk arguments left to read */
long bulklen; /* length of bulk argument in multi bulk request */
list *reply;
- unsigned long reply_bytes; /* Tot bytes of objects in reply list */
- int sentlen; /* Amount of bytes already sent in the current
+ unsigned long long reply_bytes; /* Tot bytes of objects in reply list */
+ size_t sentlen; /* Amount of bytes already sent in the current
buffer or object being sent. */
time_t ctime; /* Client creation time */
time_t lastinteraction; /* time of the last interaction, used for timeout */
@@ -540,8 +554,8 @@ typedef struct redisClient {
int replstate; /* replication state if this is a slave */
int repl_put_online_on_ack; /* Install slave write handler on ACK. */
int repldbfd; /* replication DB file descriptor */
- off_t repldboff; /* replication DB file offset */
- off_t repldbsize; /* replication DB file size */
+ off_t repldboff; /* replication DB file offset */
+ off_t repldbsize; /* replication DB file size */
sds replpreamble; /* replication DB preamble. */
long long reploff; /* replication offset if this is our master */
long long repl_ack_off; /* replication ack offset, if this is a slave */
@@ -695,7 +709,7 @@ struct redisServer {
off_t loading_process_events_interval_bytes;
/* Fast pointers to often looked up command */
struct redisCommand *delCommand, *multiCommand, *lpushCommand, *lpopCommand,
- *rpopCommand;
+ *rpopCommand, *sremCommand;
/* Fields used only for stats */
time_t stat_starttime; /* Server start time */
long long stat_numcommands; /* Number of processed commands */
@@ -733,7 +747,8 @@ struct redisServer {
int active_expire_enabled; /* Can be disabled for testing purposes. */
size_t client_max_querybuf_len; /* Limit for client query buffer length */
int dbnum; /* Total number of configured DBs */
- int supervised; /* True if supervised by upstart or systemd */
+ int supervised; /* 1 if supervised, 0 otherwise. */
+ int supervised_mode; /* See REDIS_SUPERVISED_* */
int daemonize; /* True if running as a daemon */
clientBufferLimitsConfig client_obuf_limits[REDIS_CLIENT_TYPE_COUNT];
/* AOF persistence */
@@ -863,12 +878,14 @@ struct redisServer {
/* Zip structure config, see redis.conf for more information */
size_t hash_max_ziplist_entries;
size_t hash_max_ziplist_value;
- size_t list_max_ziplist_entries;
- size_t list_max_ziplist_value;
size_t set_max_intset_entries;
size_t zset_max_ziplist_entries;
size_t zset_max_ziplist_value;
size_t hll_sparse_max_bytes;
+ /* List parameters */
+ int list_max_ziplist_size;
+ int list_compress_depth;
+ /* time cache */
time_t unixtime; /* Unix time sampled every cron cycle. */
long long mstime; /* Like 'unixtime' but with milliseconds resolution. */
/* Pubsub */
@@ -908,6 +925,8 @@ struct redisServer {
int assert_line;
int bug_report_start; /* True if bug report header was already logged. */
int watchdog_period; /* Software watchdog period in ms. 0 = off */
+ /* System hardware info */
+ size_t system_memory_size; /* Total memory in system as reported by OS */
};
typedef struct pubsubPattern {
@@ -956,15 +975,13 @@ typedef struct {
robj *subject;
unsigned char encoding;
unsigned char direction; /* Iteration direction */
- unsigned char *zi;
- listNode *ln;
+ quicklistIter *iter;
} listTypeIterator;
/* Structure for an entry while iterating over a list. */
typedef struct {
listTypeIterator *li;
- unsigned char *zi; /* Entry in ziplist */
- listNode *ln; /* Entry in linked list */
+ quicklistEntry entry; /* Entry in quicklist */
} listTypeEntry;
/* Structure to hold set iteration abstraction. */
@@ -1041,6 +1058,7 @@ void addReplyBulkCBuffer(redisClient *c, void *p, size_t len);
void addReplyBulkLongLong(redisClient *c, long long ll);
void addReply(redisClient *c, robj *obj);
void addReplySds(redisClient *c, sds s);
+void addReplyBulkSds(redisClient *c, sds s);
void addReplyError(redisClient *c, char *err);
void addReplyStatus(redisClient *c, char *status);
void addReplyDouble(redisClient *c, double d);
@@ -1090,7 +1108,7 @@ int listTypeNext(listTypeIterator *li, listTypeEntry *entry);
robj *listTypeGet(listTypeEntry *entry);
void listTypeInsert(listTypeEntry *entry, robj *value, int where);
int listTypeEqual(listTypeEntry *entry, robj *o);
-void listTypeDelete(listTypeEntry *entry);
+void listTypeDelete(listTypeIterator *iter, listTypeEntry *entry);
void listTypeConvert(robj *subject, int enc);
void unblockClientWaitingData(redisClient *c);
void handleClientsBlockedOnLists(void);
@@ -1128,7 +1146,7 @@ robj *getDecodedObject(robj *o);
size_t stringObjectLen(robj *o);
robj *createStringObjectFromLongLong(long long value);
robj *createStringObjectFromLongDouble(long double value, int humanfriendly);
-robj *createListObject(void);
+robj *createQuicklistObject(void);
robj *createZiplistObject(void);
robj *createSetObject(void);
robj *createIntsetObject(void);
@@ -1235,6 +1253,7 @@ void call(redisClient *c, int flags);
void propagate(struct redisCommand *cmd, int dbid, robj **argv, int argc, int flags);
void alsoPropagate(struct redisCommand *cmd, int dbid, robj **argv, int argc, int target);
void forceCommandPropagation(redisClient *c, int flags);
+void preventCommandPropagation(redisClient *c);
int prepareForShutdown();
#ifdef __GNUC__
void redisLog(int level, const char *fmt, ...)
@@ -1255,6 +1274,7 @@ void closeListeningSockets(int unlink_unix_socket);
void updateCachedTime(void);
void resetServerStats(void);
unsigned int getLRUClock(void);
+char *maxmemoryToString(void);
/* Set data type */
robj *setTypeCreate(robj *value);
@@ -1266,6 +1286,7 @@ void setTypeReleaseIterator(setTypeIterator *si);
int setTypeNext(setTypeIterator *si, robj **objele, int64_t *llele);
robj *setTypeNextObject(setTypeIterator *si);
int setTypeRandomElement(robj *setobj, robj **objele, int64_t *llele);
+unsigned long setTypeRandomElements(robj *set, unsigned long count, robj *aux_set);
unsigned long setTypeSize(robj *subject);
void setTypeConvert(robj *subject, int enc);
@@ -1361,6 +1382,10 @@ void sentinelTimer(void);
char *sentinelHandleConfiguration(char **argv, int argc);
void sentinelIsRunning(void);
+/* redis-check-rdb */
+int redis_check_rdb(char *rdbfilename);
+int redis_check_rdb_main(char **argv, int argc);
+
/* Scripting */
void scriptingInit(void);
diff --git a/src/replication.c b/src/replication.c
index 41f67625e..697acbef5 100644
--- a/src/replication.c
+++ b/src/replication.c
@@ -56,7 +56,7 @@ char *replicationGetSlaveName(redisClient *c) {
buf[0] = '\0';
if (anetPeerToString(c->fd,ip,sizeof(ip),NULL) != -1) {
if (c->slave_listening_port)
- snprintf(buf,sizeof(buf),"%s:%d",ip,c->slave_listening_port);
+ anetFormatAddr(buf,sizeof(buf),ip,c->slave_listening_port);
else
snprintf(buf,sizeof(buf),"%s:<unknown-slave-port>",ip);
} else {
@@ -854,6 +854,23 @@ void replicationEmptyDbCallback(void *privdata) {
replicationSendNewlineToMaster();
}
+/* Once we have a link with the master and the synchroniziation was
+ * performed, this function materializes the master client we store
+ * at server.master, starting from the specified file descriptor. */
+void replicationCreateMasterClient(int fd) {
+ server.master = createClient(fd);
+ server.master->flags |= REDIS_MASTER;
+ server.master->authenticated = 1;
+ server.repl_state = REDIS_REPL_CONNECTED;
+ server.master->reploff = server.repl_master_initial_offset;
+ memcpy(server.master->replrunid, server.repl_master_runid,
+ sizeof(server.repl_master_runid));
+ /* If master offset is set to -1, this master is old and is not
+ * PSYNC capable, so we flag it accordingly. */
+ if (server.master->reploff == -1)
+ server.master->flags |= REDIS_PRE_PSYNC;
+}
+
/* Asynchronously read the SYNC payload we receive from a master */
#define REPL_MAX_WRITTEN_BEFORE_FSYNC (1024*1024*8) /* 8 MB */
void readSyncBulkPayload(aeEventLoop *el, int fd, void *privdata, int mask) {
@@ -1017,17 +1034,7 @@ void readSyncBulkPayload(aeEventLoop *el, int fd, void *privdata, int mask) {
/* Final setup of the connected slave <- master link */
zfree(server.repl_transfer_tmpfile);
close(server.repl_transfer_fd);
- server.master = createClient(server.repl_transfer_s);
- server.master->flags |= REDIS_MASTER;
- server.master->authenticated = 1;
- server.repl_state = REDIS_REPL_CONNECTED;
- server.master->reploff = server.repl_master_initial_offset;
- memcpy(server.master->replrunid, server.repl_master_runid,
- sizeof(server.repl_master_runid));
- /* If master offset is set to -1, this master is old and is not
- * PSYNC capable, so we flag it accordingly. */
- if (server.master->reploff == -1)
- server.master->flags |= REDIS_PRE_PSYNC;
+ replicationCreateMasterClient(server.repl_transfer_s);
redisLog(REDIS_NOTICE, "MASTER <-> SLAVE sync: Finished with success");
/* Restart the AOF subsystem now that we finished the sync. This
* will trigger an AOF rewrite, and when done will start appending
diff --git a/src/scripting.c b/src/scripting.c
index 39bfe5fa7..c5dd4e718 100644
--- a/src/scripting.c
+++ b/src/scripting.c
@@ -30,6 +30,7 @@
#include "redis.h"
#include "sha1.h"
#include "rand.h"
+#include "cluster.h"
#include <lua.h>
#include <lauxlib.h>
@@ -213,11 +214,27 @@ int luaRedisGenericCommand(lua_State *lua, int raise_error) {
static int argv_size = 0;
static robj *cached_objects[LUA_CMD_OBJCACHE_SIZE];
static size_t cached_objects_len[LUA_CMD_OBJCACHE_SIZE];
+ static int inuse = 0; /* Recursive calls detection. */
+
+ /* By using Lua debug hooks it is possible to trigger a recursive call
+ * to luaRedisGenericCommand(), which normally should never happen.
+ * To make this function reentrant is futile and makes it slower, but
+ * we should at least detect such a misuse, and abort. */
+ if (inuse) {
+ char *recursion_warning =
+ "luaRedisGenericCommand() recursive call detected. "
+ "Are you doing funny stuff with Lua debug hooks?";
+ redisLog(REDIS_WARNING,"%s",recursion_warning);
+ luaPushError(lua,recursion_warning);
+ return 1;
+ }
+ inuse++;
/* Require at least one argument */
if (argc == 0) {
luaPushError(lua,
"Please specify at least one argument for redis.call()");
+ inuse--;
return 1;
}
@@ -272,6 +289,7 @@ int luaRedisGenericCommand(lua_State *lua, int raise_error) {
}
luaPushError(lua,
"Lua redis() command arguments must be strings or integers");
+ inuse--;
return 1;
}
@@ -291,6 +309,7 @@ int luaRedisGenericCommand(lua_State *lua, int raise_error) {
luaPushError(lua,"Unknown Redis command called from Lua script");
goto cleanup;
}
+ c->cmd = cmd;
/* There are commands that are not allowed inside scripts. */
if (cmd->flags & REDIS_CMD_NOSCRIPT) {
@@ -337,8 +356,23 @@ int luaRedisGenericCommand(lua_State *lua, int raise_error) {
if (cmd->flags & REDIS_CMD_RANDOM) server.lua_random_dirty = 1;
if (cmd->flags & REDIS_CMD_WRITE) server.lua_write_dirty = 1;
+ /* If this is a Redis Cluster node, we need to make sure Lua is not
+ * trying to access non-local keys. */
+ if (server.cluster_enabled) {
+ /* Duplicate relevant flags in the lua client. */
+ c->flags &= ~(REDIS_READONLY|REDIS_ASKING);
+ c->flags |= server.lua_caller->flags & (REDIS_READONLY|REDIS_ASKING);
+ if (getNodeByQuery(c,c->cmd,c->argv,c->argc,NULL,NULL) !=
+ server.cluster->myself)
+ {
+ luaPushError(lua,
+ "Lua script attempted to access a non local key in a "
+ "cluster node");
+ goto cleanup;
+ }
+ }
+
/* Run the command */
- c->cmd = cmd;
call(c,REDIS_CALL_SLOWLOG | REDIS_CALL_STATS);
/* Convert the result of the Redis command into a suitable Lua type.
@@ -409,8 +443,10 @@ cleanup:
* return the plain error. */
lua_pushstring(lua,"err");
lua_gettable(lua,-2);
+ inuse--;
return lua_error(lua);
}
+ inuse--;
return 1;
}
diff --git a/src/sds.c b/src/sds.c
index 1df1043ed..05ee0ad56 100644
--- a/src/sds.c
+++ b/src/sds.c
@@ -962,12 +962,15 @@ sds sdsjoin(char **argv, int argc, char *sep) {
return join;
}
-#ifdef SDS_TEST_MAIN
+#if defined(REDIS_TEST) || defined(SDS_TEST_MAIN)
#include <stdio.h>
#include "testhelp.h"
#include "limits.h"
-int main(void) {
+#define UNUSED(x) (void)(x)
+int sdsTest(int argc, char *argv[]) {
+ UNUSED(argc);
+ UNUSED(argv);
{
struct sdshdr *sh;
sds x = sdsnew("foo"), y;
@@ -1092,7 +1095,7 @@ int main(void) {
memcmp(y,"\"\\a\\n\\x00foo\\r\"",15) == 0)
{
- int oldfree;
+ unsigned int oldfree;
sdsfree(x);
x = sdsnew("0");
@@ -1113,3 +1116,9 @@ int main(void) {
return 0;
}
#endif
+
+#ifdef SDS_TEST_MAIN
+int main(void) {
+ return sdsTest();
+}
+#endif
diff --git a/src/sds.h b/src/sds.h
index 37aaf7a28..93dd4f28e 100644
--- a/src/sds.h
+++ b/src/sds.h
@@ -98,4 +98,8 @@ void sdsIncrLen(sds s, int incr);
sds sdsRemoveFreeSpace(sds s);
size_t sdsAllocSize(sds s);
+#ifdef REDIS_TEST
+int sdsTest(int argc, char *argv[]);
+#endif
+
#endif
diff --git a/src/sentinel.c b/src/sentinel.c
index 12f15ff3e..c170818c9 100644
--- a/src/sentinel.c
+++ b/src/sentinel.c
@@ -577,7 +577,7 @@ void sentinelEvent(int level, char *type, sentinelRedisInstance *ri,
if (level == REDIS_WARNING && ri != NULL) {
sentinelRedisInstance *master = (ri->flags & SRI_MASTER) ?
ri : ri->master;
- if (master->notification_script) {
+ if (master && master->notification_script) {
sentinelScheduleScriptExecution(master->notification_script,
type,msg,NULL);
}
@@ -897,7 +897,7 @@ sentinelRedisInstance *createSentinelRedisInstance(char *name, int flags, char *
sentinelRedisInstance *ri;
sentinelAddr *addr;
dict *table = NULL;
- char slavename[128], *sdsname;
+ char slavename[REDIS_PEER_ID_LEN], *sdsname;
redisAssert(flags & (SRI_MASTER|SRI_SLAVE|SRI_SENTINEL));
redisAssert((flags & SRI_MASTER) || master != NULL);
@@ -908,9 +908,7 @@ sentinelRedisInstance *createSentinelRedisInstance(char *name, int flags, char *
/* For slaves and sentinel we use ip:port as name. */
if (flags & (SRI_SLAVE|SRI_SENTINEL)) {
- snprintf(slavename,sizeof(slavename),
- strchr(hostname,':') ? "[%s]:%d" : "%s:%d",
- hostname,port);
+ anetFormatAddr(slavename, sizeof(slavename), hostname, port);
name = slavename;
}
@@ -1033,11 +1031,11 @@ sentinelRedisInstance *sentinelRedisInstanceLookupSlave(
{
sds key;
sentinelRedisInstance *slave;
+ char buf[REDIS_PEER_ID_LEN];
redisAssert(ri->flags & SRI_MASTER);
- key = sdscatprintf(sdsempty(),
- strchr(ip,':') ? "[%s]:%d" : "%s:%d",
- ip,port);
+ anetFormatAddr(buf,sizeof(buf),ip,port);
+ key = sdsnew(buf);
slave = dictFetchValue(ri->slaves,key);
sdsfree(key);
return slave;
@@ -2743,6 +2741,12 @@ void sentinelCommand(redisClient *c) {
!= REDIS_OK) return;
if (getLongFromObjectOrReply(c,c->argv[4],&port,"Invalid port")
!= REDIS_OK) return;
+
+ if (quorum <= 0) {
+ addReplyError(c, "Quorum must be 1 or greater.");
+ return;
+ }
+
/* Make sure the IP field is actually a valid IP before passing it
* to createSentinelRedisInstance(), otherwise we may trigger a
* DNS lookup at runtime. */
@@ -2786,6 +2790,7 @@ void sentinelCommand(redisClient *c) {
sentinelSetCommand(c);
} else if (!strcasecmp(c->argv[1]->ptr,"info-cache")) {
if (c->argc < 2) goto numargserr;
+ mstime_t now = mstime();
/* Create an ad-hoc dictionary type so that we can iterate
* a dictionary composed of just the master groups the user
@@ -2821,6 +2826,8 @@ void sentinelCommand(redisClient *c) {
sentinelRedisInstance *ri = dictGetVal(de);
addReplyBulkCBuffer(c,ri->name,strlen(ri->name));
addReplyMultiBulkLen(c,dictSize(ri->slaves) + 1); /* +1 for self */
+ addReplyMultiBulkLen(c,2);
+ addReplyLongLong(c, now - ri->info_refresh);
if (ri->info)
addReplyBulkCBuffer(c,ri->info,sdslen(ri->info));
else
@@ -2831,6 +2838,8 @@ void sentinelCommand(redisClient *c) {
sdi = dictGetIterator(ri->slaves);
while ((sde = dictNext(sdi)) != NULL) {
sentinelRedisInstance *sri = dictGetVal(sde);
+ addReplyMultiBulkLen(c,2);
+ addReplyLongLong(c, now - sri->info_refresh);
if (sri->info)
addReplyBulkCBuffer(c,sri->info,sdslen(sri->info));
else
@@ -2853,24 +2862,30 @@ numargserr:
/* SENTINEL INFO [section] */
void sentinelInfoCommand(redisClient *c) {
- char *section = c->argc == 2 ? c->argv[1]->ptr : "default";
- sds info = sdsempty();
- int defsections = !strcasecmp(section,"default");
- int sections = 0;
-
if (c->argc > 2) {
addReply(c,shared.syntaxerr);
return;
}
- if (!strcasecmp(section,"server") || defsections) {
+ int defsections = 0, allsections = 0;
+ char *section = c->argc == 2 ? c->argv[1]->ptr : NULL;
+ if (section) {
+ allsections = !strcasecmp(section,"all");
+ defsections = !strcasecmp(section,"default");
+ } else {
+ defsections = 1;
+ }
+
+ int sections = 0;
+ sds info = sdsempty();
+ if (defsections || allsections || !strcasecmp(section,"server")) {
if (sections++) info = sdscat(info,"\r\n");
sds serversection = genRedisInfoString("server");
info = sdscatlen(info,serversection,sdslen(serversection));
sdsfree(serversection);
}
- if (!strcasecmp(section,"sentinel") || defsections) {
+ if (defsections || allsections || !strcasecmp(section,"sentinel")) {
dictIterator *di;
dictEntry *de;
int master_id = 0;
@@ -2905,10 +2920,7 @@ void sentinelInfoCommand(redisClient *c) {
dictReleaseIterator(di);
}
- addReplySds(c,sdscatprintf(sdsempty(),"$%lu\r\n",
- (unsigned long)sdslen(info)));
- addReplySds(c,info);
- addReply(c,shared.crlf);
+ addReplyBulkSds(c, info);
}
/* Implements Sentinel verison of the ROLE command. The output is
diff --git a/src/sha1.c b/src/sha1.c
index 59e6f461d..7f73b40d3 100644
--- a/src/sha1.c
+++ b/src/sha1.c
@@ -24,9 +24,7 @@ A million repetitions of "a"
#include <stdio.h>
#include <string.h>
#include <sys/types.h> /* for u_int*_t */
-#if defined(__sun)
#include "solarisfixes.h"
-#endif
#include "sha1.h"
#include "config.h"
@@ -199,16 +197,19 @@ void SHA1Final(unsigned char digest[20], SHA1_CTX* context)
}
/* ================ end of sha1.c ================ */
-#if 0
+#ifdef REDIS_TEST
#define BUFSIZE 4096
-int
-main(int argc, char **argv)
+#define UNUSED(x) (void)(x)
+int sha1Test(int argc, char **argv)
{
SHA1_CTX ctx;
unsigned char hash[20], buf[BUFSIZE];
int i;
+ UNUSED(argc);
+ UNUSED(argv);
+
for(i=0;i<BUFSIZE;i++)
buf[i] = i;
@@ -223,6 +224,4 @@ main(int argc, char **argv)
printf("\n");
return 0;
}
-
#endif
-
diff --git a/src/sha1.h b/src/sha1.h
index 9d6f12965..4c76d19da 100644
--- a/src/sha1.h
+++ b/src/sha1.h
@@ -1,3 +1,5 @@
+#ifndef SHA1_H
+#define SHA1_H
/* ================ sha1.h ================ */
/*
SHA-1 in C
@@ -15,3 +17,8 @@ void SHA1Transform(u_int32_t state[5], const unsigned char buffer[64]);
void SHA1Init(SHA1_CTX* context);
void SHA1Update(SHA1_CTX* context, const unsigned char* data, u_int32_t len);
void SHA1Final(unsigned char digest[20], SHA1_CTX* context);
+
+#ifdef REDIS_TEST
+int sha1Test(int argc, char **argv);
+#endif
+#endif
diff --git a/src/solarisfixes.h b/src/solarisfixes.h
index 23025257a..3e53ba67c 100644
--- a/src/solarisfixes.h
+++ b/src/solarisfixes.h
@@ -28,6 +28,8 @@
* POSSIBILITY OF SUCH DAMAGE.
*/
+#if defined(__sun)
+
#if defined(__GNUC__)
#include <math.h>
#undef isnan
@@ -48,3 +50,5 @@
#define u_int uint
#define u_int32_t uint32_t
#endif /* __GNUC__ */
+
+#endif /* __sun */
diff --git a/src/sort.c b/src/sort.c
index 2b3276448..c1b571313 100644
--- a/src/sort.c
+++ b/src/sort.c
@@ -220,7 +220,7 @@ void sortCommand(redisClient *c) {
if (sortval)
incrRefCount(sortval);
else
- sortval = createListObject();
+ sortval = createQuicklistObject();
/* The SORT command has an SQL-alike syntax, parse it */
while(j < c->argc) {
@@ -322,17 +322,17 @@ void sortCommand(redisClient *c) {
}
if (end >= vectorlen) end = vectorlen-1;
- /* Optimization:
+ /* Whenever possible, we load elements into the output array in a more
+ * direct way. This is possible if:
*
- * 1) if the object to sort is a sorted set.
+ * 1) The object to sort is a sorted set or a list (internally sorted).
* 2) There is nothing to sort as dontsort is true (BY <constant string>).
- * 3) We have a LIMIT option that actually reduces the number of elements
- * to fetch.
*
- * In this case to load all the objects in the vector is a huge waste of
- * resources. We just allocate a vector that is big enough for the selected
- * range length, and make sure to load just this part in the vector. */
- if (sortval->type == REDIS_ZSET &&
+ * In this special case, if we have a LIMIT option that actually reduces
+ * the number of elements to fetch, we also optimize to just load the
+ * range we are interested in and allocating a vector that is big enough
+ * for the selected range length. */
+ if ((sortval->type == REDIS_ZSET || sortval->type == REDIS_LIST) &&
dontsort &&
(start != 0 || end != vectorlen-1))
{
@@ -343,7 +343,32 @@ void sortCommand(redisClient *c) {
vector = zmalloc(sizeof(redisSortObject)*vectorlen);
j = 0;
- if (sortval->type == REDIS_LIST) {
+ if (sortval->type == REDIS_LIST && dontsort) {
+ /* Special handling for a list, if 'dontsort' is true.
+ * This makes sure we return elements in the list original
+ * ordering, accordingly to DESC / ASC options.
+ *
+ * Note that in this case we also handle LIMIT here in a direct
+ * way, just getting the required range, as an optimization. */
+ if (end >= start) {
+ listTypeIterator *li;
+ listTypeEntry entry;
+ li = listTypeInitIterator(sortval,
+ desc ? (long)(listTypeLength(sortval) - start - 1) : start,
+ desc ? REDIS_HEAD : REDIS_TAIL);
+
+ while(j < vectorlen && listTypeNext(li,&entry)) {
+ vector[j].obj = listTypeGet(&entry);
+ vector[j].u.score = 0;
+ vector[j].u.cmpobj = NULL;
+ j++;
+ }
+ listTypeReleaseIterator(li);
+ /* Fix start/end: output code is not aware of this optimization. */
+ end -= start;
+ start = 0;
+ }
+ } else if (sortval->type == REDIS_LIST) {
listTypeIterator *li = listTypeInitIterator(sortval,0,REDIS_TAIL);
listTypeEntry entry;
while(listTypeNext(li,&entry)) {
@@ -399,10 +424,7 @@ void sortCommand(redisClient *c) {
j++;
ln = desc ? ln->backward : ln->level[0].forward;
}
- /* The code producing the output does not know that in the case of
- * sorted set, 'dontsort', and LIMIT, we are able to get just the
- * range, already sorted, so we need to adjust "start" and "end"
- * to make sure start is set to 0. */
+ /* Fix start/end: output code is not aware of this optimization. */
end -= start;
start = 0;
} else if (sortval->type == REDIS_ZSET) {
@@ -509,7 +531,7 @@ void sortCommand(redisClient *c) {
}
}
} else {
- robj *sobj = createZiplistObject();
+ robj *sobj = createQuicklistObject();
/* STORE option specified, set the sorting result as a List object */
for (j = start; j <= end; j++) {
diff --git a/src/t_list.c b/src/t_list.c
index fc27331f5..232cb5c52 100644
--- a/src/t_list.c
+++ b/src/t_list.c
@@ -33,75 +33,37 @@
* List API
*----------------------------------------------------------------------------*/
-/* Check the argument length to see if it requires us to convert the ziplist
- * to a real list. Only check raw-encoded objects because integer encoded
- * objects are never too long. */
-void listTypeTryConversion(robj *subject, robj *value) {
- if (subject->encoding != REDIS_ENCODING_ZIPLIST) return;
- if (sdsEncodedObject(value) &&
- sdslen(value->ptr) > server.list_max_ziplist_value)
- listTypeConvert(subject,REDIS_ENCODING_LINKEDLIST);
-}
-
/* The function pushes an element to the specified list object 'subject',
* at head or tail position as specified by 'where'.
*
* There is no need for the caller to increment the refcount of 'value' as
* the function takes care of it if needed. */
void listTypePush(robj *subject, robj *value, int where) {
- /* Check if we need to convert the ziplist */
- listTypeTryConversion(subject,value);
- if (subject->encoding == REDIS_ENCODING_ZIPLIST &&
- ziplistLen(subject->ptr) >= server.list_max_ziplist_entries)
- listTypeConvert(subject,REDIS_ENCODING_LINKEDLIST);
-
- if (subject->encoding == REDIS_ENCODING_ZIPLIST) {
- int pos = (where == REDIS_HEAD) ? ZIPLIST_HEAD : ZIPLIST_TAIL;
+ if (subject->encoding == REDIS_ENCODING_QUICKLIST) {
+ int pos = (where == REDIS_HEAD) ? QUICKLIST_HEAD : QUICKLIST_TAIL;
value = getDecodedObject(value);
- subject->ptr = ziplistPush(subject->ptr,value->ptr,sdslen(value->ptr),pos);
+ size_t len = sdslen(value->ptr);
+ quicklistPush(subject->ptr, value->ptr, len, pos);
decrRefCount(value);
- } else if (subject->encoding == REDIS_ENCODING_LINKEDLIST) {
- if (where == REDIS_HEAD) {
- listAddNodeHead(subject->ptr,value);
- } else {
- listAddNodeTail(subject->ptr,value);
- }
- incrRefCount(value);
} else {
redisPanic("Unknown list encoding");
}
}
+void *listPopSaver(unsigned char *data, unsigned int sz) {
+ return createStringObject((char*)data,sz);
+}
+
robj *listTypePop(robj *subject, int where) {
+ long long vlong;
robj *value = NULL;
- if (subject->encoding == REDIS_ENCODING_ZIPLIST) {
- unsigned char *p;
- unsigned char *vstr;
- unsigned int vlen;
- long long vlong;
- int pos = (where == REDIS_HEAD) ? 0 : -1;
- p = ziplistIndex(subject->ptr,pos);
- if (ziplistGet(p,&vstr,&vlen,&vlong)) {
- if (vstr) {
- value = createStringObject((char*)vstr,vlen);
- } else {
+
+ int ql_where = where == REDIS_HEAD ? QUICKLIST_HEAD : QUICKLIST_TAIL;
+ if (subject->encoding == REDIS_ENCODING_QUICKLIST) {
+ if (quicklistPopCustom(subject->ptr, ql_where, (unsigned char **)&value,
+ NULL, &vlong, listPopSaver)) {
+ if (!value)
value = createStringObjectFromLongLong(vlong);
- }
- /* We only need to delete an element when it exists */
- subject->ptr = ziplistDelete(subject->ptr,&p);
- }
- } else if (subject->encoding == REDIS_ENCODING_LINKEDLIST) {
- list *list = subject->ptr;
- listNode *ln;
- if (where == REDIS_HEAD) {
- ln = listFirst(list);
- } else {
- ln = listLast(list);
- }
- if (ln != NULL) {
- value = listNodeValue(ln);
- incrRefCount(value);
- listDelNode(list,ln);
}
} else {
redisPanic("Unknown list encoding");
@@ -110,25 +72,28 @@ robj *listTypePop(robj *subject, int where) {
}
unsigned long listTypeLength(robj *subject) {
- if (subject->encoding == REDIS_ENCODING_ZIPLIST) {
- return ziplistLen(subject->ptr);
- } else if (subject->encoding == REDIS_ENCODING_LINKEDLIST) {
- return listLength((list*)subject->ptr);
+ if (subject->encoding == REDIS_ENCODING_QUICKLIST) {
+ return quicklistCount(subject->ptr);
} else {
redisPanic("Unknown list encoding");
}
}
/* Initialize an iterator at the specified index. */
-listTypeIterator *listTypeInitIterator(robj *subject, long index, unsigned char direction) {
+listTypeIterator *listTypeInitIterator(robj *subject, long index,
+ unsigned char direction) {
listTypeIterator *li = zmalloc(sizeof(listTypeIterator));
li->subject = subject;
li->encoding = subject->encoding;
li->direction = direction;
- if (li->encoding == REDIS_ENCODING_ZIPLIST) {
- li->zi = ziplistIndex(subject->ptr,index);
- } else if (li->encoding == REDIS_ENCODING_LINKEDLIST) {
- li->ln = listIndex(subject->ptr,index);
+ li->iter = NULL;
+ /* REDIS_HEAD means start at TAIL and move *towards* head.
+ * REDIS_TAIL means start at HEAD and move *towards tail. */
+ int iter_direction =
+ direction == REDIS_HEAD ? AL_START_TAIL : AL_START_HEAD;
+ if (li->encoding == REDIS_ENCODING_QUICKLIST) {
+ li->iter = quicklistGetIteratorAtIdx(li->subject->ptr,
+ iter_direction, index);
} else {
redisPanic("Unknown list encoding");
}
@@ -137,6 +102,7 @@ listTypeIterator *listTypeInitIterator(robj *subject, long index, unsigned char
/* Clean up the iterator. */
void listTypeReleaseIterator(listTypeIterator *li) {
+ zfree(li->iter);
zfree(li);
}
@@ -148,24 +114,8 @@ int listTypeNext(listTypeIterator *li, listTypeEntry *entry) {
redisAssert(li->subject->encoding == li->encoding);
entry->li = li;
- if (li->encoding == REDIS_ENCODING_ZIPLIST) {
- entry->zi = li->zi;
- if (entry->zi != NULL) {
- if (li->direction == REDIS_TAIL)
- li->zi = ziplistNext(li->subject->ptr,li->zi);
- else
- li->zi = ziplistPrev(li->subject->ptr,li->zi);
- return 1;
- }
- } else if (li->encoding == REDIS_ENCODING_LINKEDLIST) {
- entry->ln = li->ln;
- if (entry->ln != NULL) {
- if (li->direction == REDIS_TAIL)
- li->ln = li->ln->next;
- else
- li->ln = li->ln->prev;
- return 1;
- }
+ if (li->encoding == REDIS_ENCODING_QUICKLIST) {
+ return quicklistNext(li->iter, &entry->entry);
} else {
redisPanic("Unknown list encoding");
}
@@ -174,24 +124,14 @@ int listTypeNext(listTypeIterator *li, listTypeEntry *entry) {
/* Return entry or NULL at the current position of the iterator. */
robj *listTypeGet(listTypeEntry *entry) {
- listTypeIterator *li = entry->li;
robj *value = NULL;
- if (li->encoding == REDIS_ENCODING_ZIPLIST) {
- unsigned char *vstr;
- unsigned int vlen;
- long long vlong;
- redisAssert(entry->zi != NULL);
- if (ziplistGet(entry->zi,&vstr,&vlen,&vlong)) {
- if (vstr) {
- value = createStringObject((char*)vstr,vlen);
- } else {
- value = createStringObjectFromLongLong(vlong);
- }
+ if (entry->li->encoding == REDIS_ENCODING_QUICKLIST) {
+ if (entry->entry.value) {
+ value = createStringObject((char *)entry->entry.value,
+ entry->entry.sz);
+ } else {
+ value = createStringObjectFromLongLong(entry->entry.longval);
}
- } else if (li->encoding == REDIS_ENCODING_LINKEDLIST) {
- redisAssert(entry->ln != NULL);
- value = listNodeValue(entry->ln);
- incrRefCount(value);
} else {
redisPanic("Unknown list encoding");
}
@@ -199,30 +139,18 @@ robj *listTypeGet(listTypeEntry *entry) {
}
void listTypeInsert(listTypeEntry *entry, robj *value, int where) {
- robj *subject = entry->li->subject;
- if (entry->li->encoding == REDIS_ENCODING_ZIPLIST) {
+ if (entry->li->encoding == REDIS_ENCODING_QUICKLIST) {
value = getDecodedObject(value);
+ sds str = value->ptr;
+ size_t len = sdslen(str);
if (where == REDIS_TAIL) {
- unsigned char *next = ziplistNext(subject->ptr,entry->zi);
-
- /* When we insert after the current element, but the current element
- * is the tail of the list, we need to do a push. */
- if (next == NULL) {
- subject->ptr = ziplistPush(subject->ptr,value->ptr,sdslen(value->ptr),REDIS_TAIL);
- } else {
- subject->ptr = ziplistInsert(subject->ptr,next,value->ptr,sdslen(value->ptr));
- }
- } else {
- subject->ptr = ziplistInsert(subject->ptr,entry->zi,value->ptr,sdslen(value->ptr));
+ quicklistInsertAfter((quicklist *)entry->entry.quicklist,
+ &entry->entry, str, len);
+ } else if (where == REDIS_HEAD) {
+ quicklistInsertBefore((quicklist *)entry->entry.quicklist,
+ &entry->entry, str, len);
}
decrRefCount(value);
- } else if (entry->li->encoding == REDIS_ENCODING_LINKEDLIST) {
- if (where == REDIS_TAIL) {
- listInsertNode(subject->ptr,entry->ln,value,AL_START_TAIL);
- } else {
- listInsertNode(subject->ptr,entry->ln,value,AL_START_HEAD);
- }
- incrRefCount(value);
} else {
redisPanic("Unknown list encoding");
}
@@ -230,59 +158,33 @@ void listTypeInsert(listTypeEntry *entry, robj *value, int where) {
/* Compare the given object with the entry at the current position. */
int listTypeEqual(listTypeEntry *entry, robj *o) {
- listTypeIterator *li = entry->li;
- if (li->encoding == REDIS_ENCODING_ZIPLIST) {
+ if (entry->li->encoding == REDIS_ENCODING_QUICKLIST) {
redisAssertWithInfo(NULL,o,sdsEncodedObject(o));
- return ziplistCompare(entry->zi,o->ptr,sdslen(o->ptr));
- } else if (li->encoding == REDIS_ENCODING_LINKEDLIST) {
- return equalStringObjects(o,listNodeValue(entry->ln));
+ return quicklistCompare(entry->entry.zi,o->ptr,sdslen(o->ptr));
} else {
redisPanic("Unknown list encoding");
}
}
/* Delete the element pointed to. */
-void listTypeDelete(listTypeEntry *entry) {
- listTypeIterator *li = entry->li;
- if (li->encoding == REDIS_ENCODING_ZIPLIST) {
- unsigned char *p = entry->zi;
- li->subject->ptr = ziplistDelete(li->subject->ptr,&p);
-
- /* Update position of the iterator depending on the direction */
- if (li->direction == REDIS_TAIL)
- li->zi = p;
- else
- li->zi = ziplistPrev(li->subject->ptr,p);
- } else if (entry->li->encoding == REDIS_ENCODING_LINKEDLIST) {
- listNode *next;
- if (li->direction == REDIS_TAIL)
- next = entry->ln->next;
- else
- next = entry->ln->prev;
- listDelNode(li->subject->ptr,entry->ln);
- li->ln = next;
+void listTypeDelete(listTypeIterator *iter, listTypeEntry *entry) {
+ if (entry->li->encoding == REDIS_ENCODING_QUICKLIST) {
+ quicklistDelEntry(iter->iter, &entry->entry);
} else {
redisPanic("Unknown list encoding");
}
}
+/* Create a quicklist from a single ziplist */
void listTypeConvert(robj *subject, int enc) {
- listTypeIterator *li;
- listTypeEntry entry;
- redisAssertWithInfo(NULL,subject,subject->type == REDIS_LIST);
-
- if (enc == REDIS_ENCODING_LINKEDLIST) {
- list *l = listCreate();
- listSetFreeMethod(l,decrRefCountVoid);
-
- /* listTypeGet returns a robj with incremented refcount */
- li = listTypeInitIterator(subject,0,REDIS_TAIL);
- while (listTypeNext(li,&entry)) listAddNodeTail(l,listTypeGet(&entry));
- listTypeReleaseIterator(li);
-
- subject->encoding = REDIS_ENCODING_LINKEDLIST;
- zfree(subject->ptr);
- subject->ptr = l;
+ redisAssertWithInfo(NULL,subject,subject->type==REDIS_LIST);
+ redisAssertWithInfo(NULL,subject,subject->encoding==REDIS_ENCODING_ZIPLIST);
+
+ if (enc == REDIS_ENCODING_QUICKLIST) {
+ size_t zlen = server.list_max_ziplist_size;
+ int depth = server.list_compress_depth;
+ subject->ptr = quicklistCreateFromZiplist(zlen, depth, subject->ptr);
+ subject->encoding = REDIS_ENCODING_QUICKLIST;
} else {
redisPanic("Unsupported list conversion");
}
@@ -304,7 +206,9 @@ void pushGenericCommand(redisClient *c, int where) {
for (j = 2; j < c->argc; j++) {
c->argv[j] = tryObjectEncoding(c->argv[j]);
if (!lobj) {
- lobj = createZiplistObject();
+ lobj = createQuicklistObject();
+ quicklistSetOptions(lobj->ptr, server.list_max_ziplist_size,
+ server.list_compress_depth);
dbAdd(c->db,c->argv[1],lobj);
}
listTypePush(lobj,c->argv[j],where);
@@ -338,13 +242,6 @@ void pushxGenericCommand(redisClient *c, robj *refval, robj *val, int where) {
checkType(c,subject,REDIS_LIST)) return;
if (refval != NULL) {
- /* We're not sure if this value can be inserted yet, but we cannot
- * convert the list inside the iterator. We don't want to loop over
- * the list twice (once to see if the value can be inserted and once
- * to do the actual insert), so we assume this value can be inserted
- * and convert the ziplist to a regular list if necessary. */
- listTypeTryConversion(subject,val);
-
/* Seek refval from head to tail */
iter = listTypeInitIterator(subject,0,REDIS_TAIL);
while (listTypeNext(iter,&entry)) {
@@ -357,10 +254,6 @@ void pushxGenericCommand(redisClient *c, robj *refval, robj *val, int where) {
listTypeReleaseIterator(iter);
if (inserted) {
- /* Check if the length exceeds the ziplist length threshold. */
- if (subject->encoding == REDIS_ENCODING_ZIPLIST &&
- ziplistLen(subject->ptr) > server.list_max_ziplist_entries)
- listTypeConvert(subject,REDIS_ENCODING_LINKEDLIST);
signalModifiedKey(c->db,c->argv[1]);
notifyKeyspaceEvent(REDIS_NOTIFY_LIST,"linsert",
c->argv[1],c->db->id);
@@ -418,31 +311,19 @@ void lindexCommand(redisClient *c) {
if ((getLongFromObjectOrReply(c, c->argv[2], &index, NULL) != REDIS_OK))
return;
- if (o->encoding == REDIS_ENCODING_ZIPLIST) {
- unsigned char *p;
- unsigned char *vstr;
- unsigned int vlen;
- long long vlong;
- p = ziplistIndex(o->ptr,index);
- if (ziplistGet(p,&vstr,&vlen,&vlong)) {
- if (vstr) {
- value = createStringObject((char*)vstr,vlen);
+ if (o->encoding == REDIS_ENCODING_QUICKLIST) {
+ quicklistEntry entry;
+ if (quicklistIndex(o->ptr, index, &entry)) {
+ if (entry.value) {
+ value = createStringObject((char*)entry.value,entry.sz);
} else {
- value = createStringObjectFromLongLong(vlong);
+ value = createStringObjectFromLongLong(entry.longval);
}
addReplyBulk(c,value);
decrRefCount(value);
} else {
addReply(c,shared.nullbulk);
}
- } else if (o->encoding == REDIS_ENCODING_LINKEDLIST) {
- listNode *ln = listIndex(o->ptr,index);
- if (ln != NULL) {
- value = listNodeValue(ln);
- addReplyBulk(c,value);
- } else {
- addReply(c,shared.nullbulk);
- }
} else {
redisPanic("Unknown list encoding");
}
@@ -452,35 +333,18 @@ void lsetCommand(redisClient *c) {
robj *o = lookupKeyWriteOrReply(c,c->argv[1],shared.nokeyerr);
if (o == NULL || checkType(c,o,REDIS_LIST)) return;
long index;
- robj *value = (c->argv[3] = tryObjectEncoding(c->argv[3]));
+ robj *value = c->argv[3];
if ((getLongFromObjectOrReply(c, c->argv[2], &index, NULL) != REDIS_OK))
return;
- listTypeTryConversion(o,value);
- if (o->encoding == REDIS_ENCODING_ZIPLIST) {
- unsigned char *p, *zl = o->ptr;
- p = ziplistIndex(zl,index);
- if (p == NULL) {
+ if (o->encoding == REDIS_ENCODING_QUICKLIST) {
+ quicklist *ql = o->ptr;
+ int replaced = quicklistReplaceAtIndex(ql, index,
+ value->ptr, sdslen(value->ptr));
+ if (!replaced) {
addReply(c,shared.outofrangeerr);
} else {
- o->ptr = ziplistDelete(o->ptr,&p);
- value = getDecodedObject(value);
- o->ptr = ziplistInsert(o->ptr,p,value->ptr,sdslen(value->ptr));
- decrRefCount(value);
- addReply(c,shared.ok);
- signalModifiedKey(c->db,c->argv[1]);
- notifyKeyspaceEvent(REDIS_NOTIFY_LIST,"lset",c->argv[1],c->db->id);
- server.dirty++;
- }
- } else if (o->encoding == REDIS_ENCODING_LINKEDLIST) {
- listNode *ln = listIndex(o->ptr,index);
- if (ln == NULL) {
- addReply(c,shared.outofrangeerr);
- } else {
- decrRefCount((robj*)listNodeValue(ln));
- listNodeValue(ln) = value;
- incrRefCount(value);
addReply(c,shared.ok);
signalModifiedKey(c->db,c->argv[1]);
notifyKeyspaceEvent(REDIS_NOTIFY_LIST,"lset",c->argv[1],c->db->id);
@@ -549,43 +413,28 @@ void lrangeCommand(redisClient *c) {
/* Return the result in form of a multi-bulk reply */
addReplyMultiBulkLen(c,rangelen);
- if (o->encoding == REDIS_ENCODING_ZIPLIST) {
- unsigned char *p = ziplistIndex(o->ptr,start);
- unsigned char *vstr;
- unsigned int vlen;
- long long vlong;
+ if (o->encoding == REDIS_ENCODING_QUICKLIST) {
+ listTypeIterator *iter = listTypeInitIterator(o, start, REDIS_TAIL);
while(rangelen--) {
- ziplistGet(p,&vstr,&vlen,&vlong);
- if (vstr) {
- addReplyBulkCBuffer(c,vstr,vlen);
+ listTypeEntry entry;
+ listTypeNext(iter, &entry);
+ quicklistEntry *qe = &entry.entry;
+ if (qe->value) {
+ addReplyBulkCBuffer(c,qe->value,qe->sz);
} else {
- addReplyBulkLongLong(c,vlong);
+ addReplyBulkLongLong(c,qe->longval);
}
- p = ziplistNext(o->ptr,p);
- }
- } else if (o->encoding == REDIS_ENCODING_LINKEDLIST) {
- listNode *ln;
-
- /* If we are nearest to the end of the list, reach the element
- * starting from tail and going backward, as it is faster. */
- if (start > llen/2) start -= llen;
- ln = listIndex(o->ptr,start);
-
- while(rangelen--) {
- addReplyBulk(c,ln->value);
- ln = ln->next;
}
+ listTypeReleaseIterator(iter);
} else {
- redisPanic("List encoding is not LINKEDLIST nor ZIPLIST!");
+ redisPanic("List encoding is not QUICKLIST!");
}
}
void ltrimCommand(redisClient *c) {
robj *o;
- long start, end, llen, j, ltrim, rtrim;
- list *list;
- listNode *ln;
+ long start, end, llen, ltrim, rtrim;
if ((getLongFromObjectOrReply(c, c->argv[2], &start, NULL) != REDIS_OK) ||
(getLongFromObjectOrReply(c, c->argv[3], &end, NULL) != REDIS_OK)) return;
@@ -612,19 +461,9 @@ void ltrimCommand(redisClient *c) {
}
/* Remove list elements to perform the trim */
- if (o->encoding == REDIS_ENCODING_ZIPLIST) {
- o->ptr = ziplistDeleteRange(o->ptr,0,ltrim);
- o->ptr = ziplistDeleteRange(o->ptr,-rtrim,rtrim);
- } else if (o->encoding == REDIS_ENCODING_LINKEDLIST) {
- list = o->ptr;
- for (j = 0; j < ltrim; j++) {
- ln = listFirst(list);
- listDelNode(list,ln);
- }
- for (j = 0; j < rtrim; j++) {
- ln = listLast(list);
- listDelNode(list,ln);
- }
+ if (o->encoding == REDIS_ENCODING_QUICKLIST) {
+ quicklistDelRange(o->ptr,0,ltrim);
+ quicklistDelRange(o->ptr,-rtrim,rtrim);
} else {
redisPanic("Unknown list encoding");
}
@@ -641,10 +480,9 @@ void ltrimCommand(redisClient *c) {
void lremCommand(redisClient *c) {
robj *subject, *obj;
- obj = c->argv[3] = tryObjectEncoding(c->argv[3]);
+ obj = c->argv[3];
long toremove;
long removed = 0;
- listTypeEntry entry;
if ((getLongFromObjectOrReply(c, c->argv[2], &toremove, NULL) != REDIS_OK))
return;
@@ -652,10 +490,6 @@ void lremCommand(redisClient *c) {
subject = lookupKeyWriteOrReply(c,c->argv[1],shared.czero);
if (subject == NULL || checkType(c,subject,REDIS_LIST)) return;
- /* Make sure obj is raw when we're dealing with a ziplist */
- if (subject->encoding == REDIS_ENCODING_ZIPLIST)
- obj = getDecodedObject(obj);
-
listTypeIterator *li;
if (toremove < 0) {
toremove = -toremove;
@@ -664,9 +498,10 @@ void lremCommand(redisClient *c) {
li = listTypeInitIterator(subject,0,REDIS_TAIL);
}
+ listTypeEntry entry;
while (listTypeNext(li,&entry)) {
if (listTypeEqual(&entry,obj)) {
- listTypeDelete(&entry);
+ listTypeDelete(li, &entry);
server.dirty++;
removed++;
if (toremove && removed == toremove) break;
@@ -674,11 +509,10 @@ void lremCommand(redisClient *c) {
}
listTypeReleaseIterator(li);
- /* Clean up raw encoded object */
- if (subject->encoding == REDIS_ENCODING_ZIPLIST)
- decrRefCount(obj);
+ if (listTypeLength(subject) == 0) {
+ dbDelete(c->db,c->argv[1]);
+ }
- if (listTypeLength(subject) == 0) dbDelete(c->db,c->argv[1]);
addReplyLongLong(c,removed);
if (removed) signalModifiedKey(c->db,c->argv[1]);
}
@@ -702,7 +536,9 @@ void lremCommand(redisClient *c) {
void rpoplpushHandlePush(redisClient *c, robj *dstkey, robj *dstobj, robj *value) {
/* Create the list if the key does not exist */
if (!dstobj) {
- dstobj = createZiplistObject();
+ dstobj = createQuicklistObject();
+ quicklistSetOptions(dstobj->ptr, server.list_max_ziplist_size,
+ server.list_compress_depth);
dbAdd(c->db,dstkey,dstobj);
}
signalModifiedKey(c->db,dstkey);
@@ -1010,7 +846,9 @@ void handleClientsBlockedOnLists(void) {
}
}
- if (listTypeLength(o) == 0) dbDelete(rl->db,rl->key);
+ if (listTypeLength(o) == 0) {
+ dbDelete(rl->db,rl->key);
+ }
/* We don't call signalModifiedKey() as it was already called
* when an element was pushed on the list. */
}
diff --git a/src/t_set.c b/src/t_set.c
index c530d6923..c8141c3f6 100644
--- a/src/t_set.c
+++ b/src/t_set.c
@@ -33,7 +33,8 @@
* Set Commands
*----------------------------------------------------------------------------*/
-void sunionDiffGenericCommand(redisClient *c, robj **setkeys, int setnum, robj *dstkey, int op);
+void sunionDiffGenericCommand(redisClient *c, robj **setkeys, int setnum,
+ robj *dstkey, int op);
/* Factory method to return a set that *can* hold "value". When the object has
* an integer-encodable value, an intset will be returned. Otherwise a regular
@@ -44,6 +45,11 @@ robj *setTypeCreate(robj *value) {
return createSetObject();
}
+/* Add the specified value into a set. The function takes care of incrementing
+ * the reference count of the object if needed in order to retain a copy.
+ *
+ * If the value was already member of the set, nothing is done and 0 is
+ * returned, otherwise the new element is added and 1 is returned. */
int setTypeAdd(robj *subject, robj *value) {
long long llval;
if (subject->encoding == REDIS_ENCODING_HT) {
@@ -68,7 +74,8 @@ int setTypeAdd(robj *subject, robj *value) {
/* The set *was* an intset and this value is not integer
* encodable, so dictAdd should always work. */
- redisAssertWithInfo(NULL,value,dictAdd(subject->ptr,value,NULL) == DICT_OK);
+ redisAssertWithInfo(NULL,value,
+ dictAdd(subject->ptr,value,NULL) == DICT_OK);
incrRefCount(value);
return 1;
}
@@ -235,7 +242,8 @@ void setTypeConvert(robj *setobj, int enc) {
si = setTypeInitIterator(setobj);
while (setTypeNext(si,NULL,&intele) != -1) {
element = createStringObjectFromLongLong(intele);
- redisAssertWithInfo(NULL,element,dictAdd(d,element,NULL) == DICT_OK);
+ redisAssertWithInfo(NULL,element,
+ dictAdd(d,element,NULL) == DICT_OK);
}
setTypeReleaseIterator(si);
@@ -377,15 +385,185 @@ void scardCommand(redisClient *c) {
addReplyLongLong(c,setTypeSize(o));
}
+/* Handle the "SPOP key <count>" variant. The normal version of the
+ * command is handled by the spopCommand() function itself. */
+
+/* How many times bigger should be the set compared to the remaining size
+ * for us to use the "create new set" strategy? Read later in the
+ * implementation for more info. */
+#define SPOP_MOVE_STRATEGY_MUL 5
+
+void spopWithCountCommand(redisClient *c) {
+ long l;
+ unsigned long count, size;
+ robj *set;
+
+ /* Get the count argument */
+ if (getLongFromObjectOrReply(c,c->argv[2],&l,NULL) != REDIS_OK) return;
+ if (l >= 0) {
+ count = (unsigned) l;
+ } else {
+ addReply(c,shared.outofrangeerr);
+ return;
+ }
+
+ /* Make sure a key with the name inputted exists, and that it's type is
+ * indeed a set. Otherwise, return nil */
+ if ((set = lookupKeyReadOrReply(c,c->argv[1],shared.emptymultibulk))
+ == NULL || checkType(c,set,REDIS_SET)) return;
+
+ /* If count is zero, serve an empty multibulk ASAP to avoid special
+ * cases later. */
+ if (count == 0) {
+ addReply(c,shared.emptymultibulk);
+ return;
+ }
+
+ size = setTypeSize(set);
+
+ /* Generate an SPOP keyspace notification */
+ notifyKeyspaceEvent(REDIS_NOTIFY_SET,"spop",c->argv[1],c->db->id);
+ server.dirty += count;
+
+ /* CASE 1:
+ * The number of requested elements is greater than or equal to
+ * the number of elements inside the set: simply return the whole set. */
+ if (count >= size) {
+ /* We just return the entire set */
+ sunionDiffGenericCommand(c,c->argv+1,1,NULL,REDIS_OP_UNION);
+
+ /* Delete the set as it is now empty */
+ dbDelete(c->db,c->argv[1]);
+ notifyKeyspaceEvent(REDIS_NOTIFY_GENERIC,"del",c->argv[1],c->db->id);
+
+ /* Propagate this command as an DEL operation */
+ rewriteClientCommandVector(c,2,shared.del,c->argv[1]);
+ signalModifiedKey(c->db,c->argv[1]);
+ server.dirty++;
+ return;
+ }
+
+ /* Case 2 and 3 require to replicate SPOP as a set of SERM commands.
+ * Prepare our replication argument vector. Also send the array length
+ * which is common to both the code paths. */
+ robj *propargv[3];
+ propargv[0] = createStringObject("SREM",4);
+ propargv[1] = c->argv[1];
+ addReplyMultiBulkLen(c,count);
+
+ /* Common iteration vars. */
+ robj *objele;
+ int encoding;
+ int64_t llele;
+ unsigned long remaining = size-count; /* Elements left after SPOP. */
+
+ /* If we are here, the number of requested elements is less than the
+ * number of elements inside the set. Also we are sure that count < size.
+ * Use two different strategies.
+ *
+ * CASE 2: The number of elements to return is small compared to the
+ * set size. We can just extract random elements and return them to
+ * the set. */
+ if (remaining*SPOP_MOVE_STRATEGY_MUL > count) {
+ while(count--) {
+ encoding = setTypeRandomElement(set,&objele,&llele);
+ if (encoding == REDIS_ENCODING_INTSET) {
+ objele = createStringObjectFromLongLong(llele);
+ } else {
+ incrRefCount(objele);
+ }
+
+ /* Return the element to the client and remove from the set. */
+ addReplyBulk(c,objele);
+ setTypeRemove(set,objele);
+
+ /* Replicate/AOF this command as an SREM operation */
+ propargv[2] = objele;
+ alsoPropagate(server.sremCommand,c->db->id,propargv,3,
+ REDIS_PROPAGATE_AOF|REDIS_PROPAGATE_REPL);
+ decrRefCount(objele);
+ }
+ } else {
+ /* CASE 3: The number of elements to return is very big, approaching
+ * the size of the set itself. After some time extracting random elements
+ * from such a set becomes computationally expensive, so we use
+ * a different strategy, we extract random elements that we don't
+ * want to return (the elements that will remain part of the set),
+ * creating a new set as we do this (that will be stored as the original
+ * set). Then we return the elements left in the original set and
+ * release it. */
+ robj *newset = NULL;
+
+ /* Create a new set with just the remaining elements. */
+ while(remaining--) {
+ encoding = setTypeRandomElement(set,&objele,&llele);
+ if (encoding == REDIS_ENCODING_INTSET) {
+ objele = createStringObjectFromLongLong(llele);
+ } else {
+ incrRefCount(objele);
+ }
+ if (!newset) newset = setTypeCreate(objele);
+ setTypeAdd(newset,objele);
+ setTypeRemove(set,objele);
+ decrRefCount(objele);
+ }
+
+ /* Assign the new set as the key value. */
+ incrRefCount(set); /* Protect the old set value. */
+ dbOverwrite(c->db,c->argv[1],newset);
+
+ /* Tranfer the old set to the client and release it. */
+ setTypeIterator *si;
+ si = setTypeInitIterator(set);
+ while((encoding = setTypeNext(si,&objele,&llele)) != -1) {
+ if (encoding == REDIS_ENCODING_INTSET) {
+ objele = createStringObjectFromLongLong(llele);
+ } else {
+ incrRefCount(objele);
+ }
+ addReplyBulk(c,objele);
+
+ /* Replicate/AOF this command as an SREM operation */
+ propargv[2] = objele;
+ alsoPropagate(server.sremCommand,c->db->id,propargv,3,
+ REDIS_PROPAGATE_AOF|REDIS_PROPAGATE_REPL);
+
+ decrRefCount(objele);
+ }
+ setTypeReleaseIterator(si);
+ decrRefCount(set);
+ }
+
+ /* Don't propagate the command itself even if we incremented the
+ * dirty counter. We don't want to propagate an SPOP command since
+ * we propagated the command as a set of SREMs operations using
+ * the alsoPropagate() API. */
+ decrRefCount(propargv[0]);
+ preventCommandPropagation(c);
+}
+
void spopCommand(redisClient *c) {
robj *set, *ele, *aux;
int64_t llele;
int encoding;
+ if (c->argc == 3) {
+ spopWithCountCommand(c);
+ return;
+ } else if (c->argc > 3) {
+ addReply(c,shared.syntaxerr);
+ return;
+ }
+
+ /* Make sure a key with the name inputted exists, and that it's type is
+ * indeed a set */
if ((set = lookupKeyWriteOrReply(c,c->argv[1],shared.nullbulk)) == NULL ||
checkType(c,set,REDIS_SET)) return;
+ /* Get a random element from the set */
encoding = setTypeRandomElement(set,&ele,&llele);
+
+ /* Remove the element from the set */
if (encoding == REDIS_ENCODING_INTSET) {
ele = createStringObjectFromLongLong(llele);
set->ptr = intsetRemove(set->ptr,llele,NULL);
@@ -393,6 +571,7 @@ void spopCommand(redisClient *c) {
incrRefCount(ele);
setTypeRemove(set,ele);
}
+
notifyKeyspaceEvent(REDIS_NOTIFY_SET,"spop",c->argv[1],c->db->id);
/* Replicate/AOF this command as an SREM operation */
@@ -401,11 +580,16 @@ void spopCommand(redisClient *c) {
decrRefCount(ele);
decrRefCount(aux);
+ /* Add the element to the reply */
addReplyBulk(c,ele);
+
+ /* Delete the set if it's empty */
if (setTypeSize(set) == 0) {
dbDelete(c->db,c->argv[1]);
notifyKeyspaceEvent(REDIS_NOTIFY_GENERIC,"del",c->argv[1],c->db->id);
}
+
+ /* Set has been modified */
signalModifiedKey(c->db,c->argv[1]);
server.dirty++;
}
@@ -587,7 +771,8 @@ int qsortCompareSetsByRevCardinality(const void *s1, const void *s2) {
return (o2 ? setTypeSize(o2) : 0) - (o1 ? setTypeSize(o1) : 0);
}
-void sinterGenericCommand(redisClient *c, robj **setkeys, unsigned long setnum, robj *dstkey) {
+void sinterGenericCommand(redisClient *c, robj **setkeys,
+ unsigned long setnum, robj *dstkey) {
robj **sets = zmalloc(sizeof(robj*)*setnum);
setTypeIterator *si;
robj *eleobj, *dstset = NULL;
@@ -734,7 +919,8 @@ void sinterstoreCommand(redisClient *c) {
#define REDIS_OP_DIFF 1
#define REDIS_OP_INTER 2
-void sunionDiffGenericCommand(redisClient *c, robj **setkeys, int setnum, robj *dstkey, int op) {
+void sunionDiffGenericCommand(redisClient *c, robj **setkeys, int setnum,
+ robj *dstkey, int op) {
robj **sets = zmalloc(sizeof(robj*)*setnum);
setTypeIterator *si;
robj *ele, *dstset = NULL;
diff --git a/src/t_string.c b/src/t_string.c
index b61589961..06c2e9ceb 100644
--- a/src/t_string.c
+++ b/src/t_string.c
@@ -61,6 +61,8 @@ static int checkStringLength(redisClient *c, long long size) {
#define REDIS_SET_NO_FLAGS 0
#define REDIS_SET_NX (1<<0) /* Set if key not exists. */
#define REDIS_SET_XX (1<<1) /* Set if key exists. */
+#define REDIS_SET_EX (1<<2) /* Set if time in seconds is given */
+#define REDIS_SET_PX (1<<3) /* Set if time in ms in given */
void setGenericCommand(redisClient *c, int flags, robj *key, robj *val, robj *expire, int unit, robj *ok_reply, robj *abort_reply) {
long long milliseconds = 0; /* initialized to avoid any harmness warning */
@@ -102,18 +104,28 @@ void setCommand(redisClient *c) {
robj *next = (j == c->argc-1) ? NULL : c->argv[j+1];
if ((a[0] == 'n' || a[0] == 'N') &&
- (a[1] == 'x' || a[1] == 'X') && a[2] == '\0') {
+ (a[1] == 'x' || a[1] == 'X') && a[2] == '\0' &&
+ !(flags & REDIS_SET_XX))
+ {
flags |= REDIS_SET_NX;
} else if ((a[0] == 'x' || a[0] == 'X') &&
- (a[1] == 'x' || a[1] == 'X') && a[2] == '\0') {
+ (a[1] == 'x' || a[1] == 'X') && a[2] == '\0' &&
+ !(flags & REDIS_SET_NX))
+ {
flags |= REDIS_SET_XX;
} else if ((a[0] == 'e' || a[0] == 'E') &&
- (a[1] == 'x' || a[1] == 'X') && a[2] == '\0' && next) {
+ (a[1] == 'x' || a[1] == 'X') && a[2] == '\0' &&
+ !(flags & REDIS_SET_PX) && next)
+ {
+ flags |= REDIS_SET_EX;
unit = UNIT_SECONDS;
expire = next;
j++;
} else if ((a[0] == 'p' || a[0] == 'P') &&
- (a[1] == 'x' || a[1] == 'X') && a[2] == '\0' && next) {
+ (a[1] == 'x' || a[1] == 'X') && a[2] == '\0' &&
+ !(flags & REDIS_SET_EX) && next)
+ {
+ flags |= REDIS_SET_PX;
unit = UNIT_MILLISECONDS;
expire = next;
j++;
diff --git a/src/t_zset.c b/src/t_zset.c
index d3c7214bd..64418c9b4 100644
--- a/src/t_zset.c
+++ b/src/t_zset.c
@@ -1382,7 +1382,7 @@ void zremrangeGenericCommand(redisClient *c, int rangetype) {
robj *key = c->argv[1];
robj *zobj;
int keyremoved = 0;
- unsigned long deleted;
+ unsigned long deleted = 0;
zrangespec range;
zlexrangespec lexrange;
long start, end, llen;
diff --git a/src/util.c b/src/util.c
index 80242ff71..4190775b1 100644
--- a/src/util.c
+++ b/src/util.c
@@ -38,8 +38,10 @@
#include <sys/time.h>
#include <float.h>
#include <stdint.h>
+#include <errno.h>
#include "util.h"
+#include "sha1.h"
/* Glob-style pattern matching. */
int stringmatchlen(const char *pattern, int patternLen,
@@ -169,11 +171,12 @@ int stringmatch(const char *pattern, const char *string, int nocase) {
}
/* Convert a string representing an amount of memory into the number of
- * bytes, so for instance memtoll("1Gi") will return 1073741824 that is
+ * bytes, so for instance memtoll("1Gb") will return 1073741824 that is
* (1024*1024*1024).
*
* On parsing error, if *err is not NULL, it's set to 1, otherwise it's
- * set to 0 */
+ * set to 0. On error the function return value is 0, regardless of the
+ * fact 'err' is NULL or not. */
long long memtoll(const char *p, int *err) {
const char *u;
char buf[128];
@@ -182,6 +185,7 @@ long long memtoll(const char *p, int *err) {
unsigned int digits;
if (err) *err = 0;
+
/* Search the first non digit character. */
u = p;
if (*u == '-') u++;
@@ -202,16 +206,26 @@ long long memtoll(const char *p, int *err) {
mul = 1024L*1024*1024;
} else {
if (err) *err = 1;
- mul = 1;
+ return 0;
}
+
+ /* Copy the digits into a buffer, we'll use strtoll() to convert
+ * the digit (without the unit) into a number. */
digits = u-p;
if (digits >= sizeof(buf)) {
if (err) *err = 1;
- return LLONG_MAX;
+ return 0;
}
memcpy(buf,p,digits);
buf[digits] = '\0';
- val = strtoll(buf,NULL,10);
+
+ char *endptr;
+ errno = 0;
+ val = strtoll(buf,&endptr,10);
+ if ((val == 0 && errno == EINVAL) || *endptr != '\0') {
+ if (err) *err = 1;
+ return 0;
+ }
return val*mul;
}
@@ -428,11 +442,44 @@ int d2string(char *buf, size_t len, double value) {
* having run_id == A, and you reconnect and it has run_id == B, you can be
* sure that it is either a different instance or it was restarted. */
void getRandomHexChars(char *p, unsigned int len) {
- FILE *fp = fopen("/dev/urandom","r");
char *charset = "0123456789abcdef";
unsigned int j;
- if (fp == NULL || fread(p,len,1,fp) == 0) {
+ /* Global state. */
+ static int seed_initialized = 0;
+ static unsigned char seed[20]; /* The SHA1 seed, from /dev/urandom. */
+ static uint64_t counter = 0; /* The counter we hash with the seed. */
+
+ if (!seed_initialized) {
+ /* Initialize a seed and use SHA1 in counter mode, where we hash
+ * the same seed with a progressive counter. For the goals of this
+ * function we just need non-colliding strings, there are no
+ * cryptographic security needs. */
+ FILE *fp = fopen("/dev/urandom","r");
+ if (fp && fread(seed,sizeof(seed),1,fp) == 1)
+ seed_initialized = 1;
+ if (fp) fclose(fp);
+ }
+
+ if (seed_initialized) {
+ while(len) {
+ unsigned char digest[20];
+ SHA1_CTX ctx;
+ unsigned int copylen = len > 20 ? 20 : len;
+
+ SHA1Init(&ctx);
+ SHA1Update(&ctx, seed, sizeof(seed));
+ SHA1Update(&ctx, (unsigned char*)&counter,sizeof(counter));
+ SHA1Final(digest, &ctx);
+ counter++;
+
+ memcpy(p,digest,copylen);
+ /* Convert to hex digits. */
+ for (j = 0; j < copylen; j++) p[j] = charset[p[j] & 0x0F];
+ len -= copylen;
+ p += copylen;
+ }
+ } else {
/* If we can't read from /dev/urandom, do some reasonable effort
* in order to create some entropy, since this function is used to
* generate run_id and cluster instance IDs */
@@ -459,14 +506,12 @@ void getRandomHexChars(char *p, unsigned int len) {
x += sizeof(pid);
}
/* Finally xor it with rand() output, that was already seeded with
- * time() at startup. */
- for (j = 0; j < len; j++)
+ * time() at startup, and convert to hex digits. */
+ for (j = 0; j < len; j++) {
p[j] ^= rand();
+ p[j] = charset[p[j] & 0x0F];
+ }
}
- /* Turn it into hex digits taking just 4 bits out of 8 for every byte. */
- for (j = 0; j < len; j++)
- p[j] = charset[p[j] & 0x0F];
- if (fp) fclose(fp);
}
/* Given the filename, return the absolute path as an SDS string, or NULL
@@ -529,10 +574,10 @@ int pathIsBaseName(char *path) {
return strchr(path,'/') == NULL && strchr(path,'\\') == NULL;
}
-#ifdef UTIL_TEST_MAIN
+#ifdef REDIS_TEST
#include <assert.h>
-void test_string2ll(void) {
+static void test_string2ll(void) {
char buf[32];
long long v;
@@ -587,7 +632,7 @@ void test_string2ll(void) {
assert(string2ll(buf,strlen(buf),&v) == 0);
}
-void test_string2l(void) {
+static void test_string2l(void) {
char buf[32];
long v;
@@ -636,9 +681,55 @@ void test_string2l(void) {
#endif
}
-int main(int argc, char **argv) {
+static void test_ll2string(void) {
+ char buf[32];
+ long long v;
+ int sz;
+
+ v = 0;
+ sz = ll2string(buf, sizeof buf, v);
+ assert(sz == 1);
+ assert(!strcmp(buf, "0"));
+
+ v = -1;
+ sz = ll2string(buf, sizeof buf, v);
+ assert(sz == 2);
+ assert(!strcmp(buf, "-1"));
+
+ v = 99;
+ sz = ll2string(buf, sizeof buf, v);
+ assert(sz == 2);
+ assert(!strcmp(buf, "99"));
+
+ v = -99;
+ sz = ll2string(buf, sizeof buf, v);
+ assert(sz == 3);
+ assert(!strcmp(buf, "-99"));
+
+ v = -2147483648;
+ sz = ll2string(buf, sizeof buf, v);
+ assert(sz == 11);
+ assert(!strcmp(buf, "-2147483648"));
+
+ v = LLONG_MIN;
+ sz = ll2string(buf, sizeof buf, v);
+ assert(sz == 20);
+ assert(!strcmp(buf, "-9223372036854775808"));
+
+ v = LLONG_MAX;
+ sz = ll2string(buf, sizeof buf, v);
+ assert(sz == 19);
+ assert(!strcmp(buf, "9223372036854775807"));
+}
+
+#define UNUSED(x) (void)(x)
+int utilTest(int argc, char **argv) {
+ UNUSED(argc);
+ UNUSED(argv);
+
test_string2ll();
test_string2l();
+ test_ll2string();
return 0;
}
#endif
diff --git a/src/util.h b/src/util.h
index b3667cd6f..666042c9b 100644
--- a/src/util.h
+++ b/src/util.h
@@ -42,4 +42,8 @@ int d2string(char *buf, size_t len, double value);
sds getAbsolutePath(char *filename);
int pathIsBaseName(char *path);
+#ifdef REDIS_TEST
+int utilTest(int argc, char **argv);
+#endif
+
#endif
diff --git a/src/version.h b/src/version.h
index 00cbae681..dbb6197c8 100644
--- a/src/version.h
+++ b/src/version.h
@@ -1 +1 @@
-#define REDIS_VERSION "2.9.999"
+#define REDIS_VERSION "3.1.999"
diff --git a/src/ziplist.c b/src/ziplist.c
index 64a22adfc..7428d30e9 100644
--- a/src/ziplist.c
+++ b/src/ziplist.c
@@ -143,6 +143,7 @@
#define ZIPLIST_TAIL_OFFSET(zl) (*((uint32_t*)((zl)+sizeof(uint32_t))))
#define ZIPLIST_LENGTH(zl) (*((uint16_t*)((zl)+sizeof(uint32_t)*2)))
#define ZIPLIST_HEADER_SIZE (sizeof(uint32_t)*2+sizeof(uint16_t))
+#define ZIPLIST_END_SIZE (sizeof(uint8_t))
#define ZIPLIST_ENTRY_HEAD(zl) ((zl)+ZIPLIST_HEADER_SIZE)
#define ZIPLIST_ENTRY_TAIL(zl) ((zl)+intrev32ifbe(ZIPLIST_TAIL_OFFSET(zl)))
#define ZIPLIST_ENTRY_END(zl) ((zl)+intrev32ifbe(ZIPLIST_BYTES(zl))-1)
@@ -162,6 +163,13 @@ typedef struct zlentry {
unsigned char *p;
} zlentry;
+#define ZIPLIST_ENTRY_ZERO(zle) { \
+ (zle)->prevrawlensize = (zle)->prevrawlen = 0; \
+ (zle)->lensize = (zle)->len = (zle)->headersize = 0; \
+ (zle)->encoding = 0; \
+ (zle)->p = NULL; \
+}
+
/* Extract the encoding from the byte pointed by 'ptr' and set it into
* 'encoding'. */
#define ZIP_ENTRY_ENCODING(ptr, encoding) do { \
@@ -169,6 +177,8 @@ typedef struct zlentry {
if ((encoding) < ZIP_STR_MASK) (encoding) &= ZIP_STR_MASK; \
} while(0)
+void ziplistRepr(unsigned char *zl);
+
/* Return bytes needed to store integer encoded by 'encoding' */
static unsigned int zipIntSize(unsigned char encoding) {
switch(encoding) {
@@ -404,14 +414,12 @@ static int64_t zipLoadInteger(unsigned char *p, unsigned char encoding) {
}
/* Return a struct with all information about an entry. */
-static zlentry zipEntry(unsigned char *p) {
- zlentry e;
-
- ZIP_DECODE_PREVLEN(p, e.prevrawlensize, e.prevrawlen);
- ZIP_DECODE_LENGTH(p + e.prevrawlensize, e.encoding, e.lensize, e.len);
- e.headersize = e.prevrawlensize + e.lensize;
- e.p = p;
- return e;
+static void zipEntry(unsigned char *p, zlentry *e) {
+
+ ZIP_DECODE_PREVLEN(p, e->prevrawlensize, e->prevrawlen);
+ ZIP_DECODE_LENGTH(p + e->prevrawlensize, e->encoding, e->lensize, e->len);
+ e->headersize = e->prevrawlensize + e->lensize;
+ e->p = p;
}
/* Create a new empty ziplist. */
@@ -460,13 +468,13 @@ static unsigned char *__ziplistCascadeUpdate(unsigned char *zl, unsigned char *p
zlentry cur, next;
while (p[0] != ZIP_END) {
- cur = zipEntry(p);
+ zipEntry(p, &cur);
rawlen = cur.headersize + cur.len;
rawlensize = zipPrevEncodeLength(NULL,rawlen);
/* Abort if there is no next entry. */
if (p[rawlen] == ZIP_END) break;
- next = zipEntry(p+rawlen);
+ zipEntry(p+rawlen, &next);
/* Abort when "prevlen" has not changed. */
if (next.prevrawlen == rawlen) break;
@@ -521,7 +529,7 @@ static unsigned char *__ziplistDelete(unsigned char *zl, unsigned char *p, unsig
int nextdiff = 0;
zlentry first, tail;
- first = zipEntry(p);
+ zipEntry(p, &first);
for (i = 0; p[0] != ZIP_END && i < num; i++) {
p += zipRawEntryLength(p);
deleted++;
@@ -545,7 +553,7 @@ static unsigned char *__ziplistDelete(unsigned char *zl, unsigned char *p, unsig
/* When the tail contains more than one entry, we need to take
* "nextdiff" in account as well. Otherwise, a change in the
* size of prevlen doesn't have an effect on the *tail* offset. */
- tail = zipEntry(p);
+ zipEntry(p, &tail);
if (p[tail.headersize+tail.len] != ZIP_END) {
ZIPLIST_TAIL_OFFSET(zl) =
intrev32ifbe(intrev32ifbe(ZIPLIST_TAIL_OFFSET(zl))+nextdiff);
@@ -635,7 +643,7 @@ static unsigned char *__ziplistInsert(unsigned char *zl, unsigned char *p, unsig
/* When the tail contains more than one entry, we need to take
* "nextdiff" in account as well. Otherwise, a change in the
* size of prevlen doesn't have an effect on the *tail* offset. */
- tail = zipEntry(p+reqlen);
+ zipEntry(p+reqlen, &tail);
if (p[reqlen+tail.headersize+tail.len] != ZIP_END) {
ZIPLIST_TAIL_OFFSET(zl) =
intrev32ifbe(intrev32ifbe(ZIPLIST_TAIL_OFFSET(zl))+nextdiff);
@@ -665,6 +673,121 @@ static unsigned char *__ziplistInsert(unsigned char *zl, unsigned char *p, unsig
return zl;
}
+/* Merge ziplists 'first' and 'second' by appending 'second' to 'first'.
+ *
+ * NOTE: The larger ziplist is reallocated to contain the new merged ziplist.
+ * Either 'first' or 'second' can be used for the result. The parameter not
+ * used will be free'd and set to NULL.
+ *
+ * After calling this function, the input parameters are no longer valid since
+ * they are changed and free'd in-place.
+ *
+ * The result ziplist is the contents of 'first' followed by 'second'.
+ *
+ * On failure: returns NULL if the merge is impossible.
+ * On success: returns the merged ziplist (which is expanded version of either
+ * 'first' or 'second', also frees the other unused input ziplist, and sets the
+ * input ziplist argument equal to newly reallocated ziplist return value. */
+unsigned char *ziplistMerge(unsigned char **first, unsigned char **second) {
+ /* If any params are null, we can't merge, so NULL. */
+ if (first == NULL || *first == NULL || second == NULL || *second == NULL)
+ return NULL;
+
+ /* Can't merge same list into itself. */
+ if (*first == *second)
+ return NULL;
+
+ size_t first_bytes = intrev32ifbe(ZIPLIST_BYTES(*first));
+ size_t first_len = intrev16ifbe(ZIPLIST_LENGTH(*first));
+
+ size_t second_bytes = intrev32ifbe(ZIPLIST_BYTES(*second));
+ size_t second_len = intrev16ifbe(ZIPLIST_LENGTH(*second));
+
+ int append;
+ unsigned char *source, *target;
+ size_t target_bytes, source_bytes;
+ /* Pick the largest ziplist so we can resize easily in-place.
+ * We must also track if we are now appending or prepending to
+ * the target ziplist. */
+ if (first_len >= second_len) {
+ /* retain first, append second to first. */
+ target = *first;
+ target_bytes = first_bytes;
+ source = *second;
+ source_bytes = second_bytes;
+ append = 1;
+ } else {
+ /* else, retain second, prepend first to second. */
+ target = *second;
+ target_bytes = second_bytes;
+ source = *first;
+ source_bytes = first_bytes;
+ append = 0;
+ }
+
+ /* Calculate final bytes (subtract one pair of metadata) */
+ size_t zlbytes = first_bytes + second_bytes -
+ ZIPLIST_HEADER_SIZE - ZIPLIST_END_SIZE;
+ size_t zllength = first_len + second_len;
+
+ /* Combined zl length should be limited within UINT16_MAX */
+ zllength = zllength < UINT16_MAX ? zllength : UINT16_MAX;
+
+ /* Save offset positions before we start ripping memory apart. */
+ size_t first_offset = intrev32ifbe(ZIPLIST_TAIL_OFFSET(*first));
+ size_t second_offset = intrev32ifbe(ZIPLIST_TAIL_OFFSET(*second));
+
+ /* Extend target to new zlbytes then append or prepend source. */
+ target = zrealloc(target, zlbytes);
+ if (append) {
+ /* append == appending to target */
+ /* Copy source after target (copying over original [END]):
+ * [TARGET - END, SOURCE - HEADER] */
+ memcpy(target + target_bytes - ZIPLIST_END_SIZE,
+ source + ZIPLIST_HEADER_SIZE,
+ source_bytes - ZIPLIST_HEADER_SIZE);
+ } else {
+ /* !append == prepending to target */
+ /* Move target *contents* exactly size of (source - [END]),
+ * then copy source into vacataed space (source - [END]):
+ * [SOURCE - END, TARGET - HEADER] */
+ memmove(target + source_bytes - ZIPLIST_END_SIZE,
+ target + ZIPLIST_HEADER_SIZE,
+ target_bytes - ZIPLIST_HEADER_SIZE);
+ memcpy(target, source, source_bytes - ZIPLIST_END_SIZE);
+ }
+
+ /* Update header metadata. */
+ ZIPLIST_BYTES(target) = intrev32ifbe(zlbytes);
+ ZIPLIST_LENGTH(target) = intrev16ifbe(zllength);
+ /* New tail offset is:
+ * + N bytes of first ziplist
+ * - 1 byte for [END] of first ziplist
+ * + M bytes for the offset of the original tail of the second ziplist
+ * - J bytes for HEADER because second_offset keeps no header. */
+ ZIPLIST_TAIL_OFFSET(target) = intrev32ifbe(
+ (first_bytes - ZIPLIST_END_SIZE) +
+ (second_offset - ZIPLIST_HEADER_SIZE));
+
+ /* __ziplistCascadeUpdate just fixes the prev length values until it finds a
+ * correct prev length value (then it assumes the rest of the list is okay).
+ * We tell CascadeUpdate to start at the first ziplist's tail element to fix
+ * the merge seam. */
+ target = __ziplistCascadeUpdate(target, target+first_offset);
+
+ /* Now free and NULL out what we didn't realloc */
+ if (append) {
+ zfree(*second);
+ *second = NULL;
+ *first = target;
+ } else {
+ zfree(*first);
+ *first = NULL;
+ *second = target;
+ }
+ return target;
+}
+
unsigned char *ziplistPush(unsigned char *zl, unsigned char *s, unsigned int slen, int where) {
unsigned char *p;
p = (where == ZIPLIST_HEAD) ? ZIPLIST_ENTRY_HEAD(zl) : ZIPLIST_ENTRY_END(zl);
@@ -748,7 +871,7 @@ unsigned int ziplistGet(unsigned char *p, unsigned char **sstr, unsigned int *sl
if (p == NULL || p[0] == ZIP_END) return 0;
if (sstr) *sstr = NULL;
- entry = zipEntry(p);
+ zipEntry(p, &entry);
if (ZIP_IS_STR(entry.encoding)) {
if (sstr) {
*slen = entry.len;
@@ -783,7 +906,7 @@ unsigned char *ziplistDelete(unsigned char *zl, unsigned char **p) {
}
/* Delete a range of entries from the ziplist. */
-unsigned char *ziplistDeleteRange(unsigned char *zl, unsigned int index, unsigned int num) {
+unsigned char *ziplistDeleteRange(unsigned char *zl, int index, unsigned int num) {
unsigned char *p = ziplistIndex(zl,index);
return (p == NULL) ? zl : __ziplistDelete(zl,p,num);
}
@@ -796,7 +919,7 @@ unsigned int ziplistCompare(unsigned char *p, unsigned char *sstr, unsigned int
long long zval, sval;
if (p[0] == ZIP_END) return 0;
- entry = zipEntry(p);
+ zipEntry(p, &entry);
if (ZIP_IS_STR(entry.encoding)) {
/* Raw compare */
if (entry.len == slen) {
@@ -913,7 +1036,7 @@ void ziplistRepr(unsigned char *zl) {
intrev32ifbe(ZIPLIST_TAIL_OFFSET(zl)));
p = ZIPLIST_ENTRY_HEAD(zl);
while(*p != ZIP_END) {
- entry = zipEntry(p);
+ zipEntry(p, &entry);
printf(
"{"
"addr 0x%08lx, "
@@ -952,14 +1075,14 @@ void ziplistRepr(unsigned char *zl) {
printf("{end}\n\n");
}
-#ifdef ZIPLIST_TEST_MAIN
+#ifdef REDIS_TEST
#include <sys/time.h>
#include "adlist.h"
#include "sds.h"
#define debug(f, ...) { if (DEBUG) printf(f, __VA_ARGS__); }
-unsigned char *createList() {
+static unsigned char *createList() {
unsigned char *zl = ziplistNew();
zl = ziplistPush(zl, (unsigned char*)"foo", 3, ZIPLIST_TAIL);
zl = ziplistPush(zl, (unsigned char*)"quux", 4, ZIPLIST_TAIL);
@@ -968,7 +1091,7 @@ unsigned char *createList() {
return zl;
}
-unsigned char *createIntList() {
+static unsigned char *createIntList() {
unsigned char *zl = ziplistNew();
char buf[32];
@@ -987,13 +1110,13 @@ unsigned char *createIntList() {
return zl;
}
-long long usec(void) {
+static long long usec(void) {
struct timeval tv;
gettimeofday(&tv,NULL);
return (((long long)tv.tv_sec)*1000000)+tv.tv_usec;
}
-void stress(int pos, int num, int maxsize, int dnum) {
+static void stress(int pos, int num, int maxsize, int dnum) {
int i,j,k;
unsigned char *zl;
char posstr[2][5] = { "HEAD", "TAIL" };
@@ -1016,7 +1139,7 @@ void stress(int pos, int num, int maxsize, int dnum) {
}
}
-void pop(unsigned char *zl, int where) {
+static unsigned char *pop(unsigned char *zl, int where) {
unsigned char *p, *vstr;
unsigned int vlen;
long long vlong;
@@ -1028,20 +1151,22 @@ void pop(unsigned char *zl, int where) {
else
printf("Pop tail: ");
- if (vstr)
+ if (vstr) {
if (vlen && fwrite(vstr,vlen,1,stdout) == 0) perror("fwrite");
- else
+ }
+ else {
printf("%lld", vlong);
+ }
printf("\n");
- ziplistDeleteRange(zl,-1,1);
+ return ziplistDelete(zl,&p);
} else {
printf("ERROR: Could not pop\n");
exit(1);
}
}
-int randstring(char *target, unsigned int min, unsigned int max) {
+static int randstring(char *target, unsigned int min, unsigned int max) {
int p = 0;
int len = min+rand()%(max-min+1);
int minval, maxval;
@@ -1067,23 +1192,24 @@ int randstring(char *target, unsigned int min, unsigned int max) {
return len;
}
-void verify(unsigned char *zl, zlentry *e) {
- int i;
+static void verify(unsigned char *zl, zlentry *e) {
int len = ziplistLen(zl);
zlentry _e;
- for (i = 0; i < len; i++) {
+ ZIPLIST_ENTRY_ZERO(&_e);
+
+ for (int i = 0; i < len; i++) {
memset(&e[i], 0, sizeof(zlentry));
- e[i] = zipEntry(ziplistIndex(zl, i));
+ zipEntry(ziplistIndex(zl, i), &e[i]);
memset(&_e, 0, sizeof(zlentry));
- _e = zipEntry(ziplistIndex(zl, -len+i));
+ zipEntry(ziplistIndex(zl, -len+i), &_e);
assert(memcmp(&e[i], &_e, sizeof(zlentry)) == 0);
}
}
-int main(int argc, char **argv) {
+int ziplistTest(int argc, char **argv) {
unsigned char *zl, *p;
unsigned char *entry;
unsigned int elen;
@@ -1096,21 +1222,25 @@ int main(int argc, char **argv) {
zl = createIntList();
ziplistRepr(zl);
+ zfree(zl);
+
zl = createList();
ziplistRepr(zl);
- pop(zl,ZIPLIST_TAIL);
+ zl = pop(zl,ZIPLIST_TAIL);
ziplistRepr(zl);
- pop(zl,ZIPLIST_HEAD);
+ zl = pop(zl,ZIPLIST_HEAD);
ziplistRepr(zl);
- pop(zl,ZIPLIST_TAIL);
+ zl = pop(zl,ZIPLIST_TAIL);
ziplistRepr(zl);
- pop(zl,ZIPLIST_TAIL);
+ zl = pop(zl,ZIPLIST_TAIL);
ziplistRepr(zl);
+ zfree(zl);
+
printf("Get element at index 3:\n");
{
zl = createList();
@@ -1126,6 +1256,7 @@ int main(int argc, char **argv) {
printf("%lld\n", value);
}
printf("\n");
+ zfree(zl);
}
printf("Get element at index 4 (out of range):\n");
@@ -1139,6 +1270,7 @@ int main(int argc, char **argv) {
return 1;
}
printf("\n");
+ zfree(zl);
}
printf("Get element at index -1 (last element):\n");
@@ -1156,6 +1288,7 @@ int main(int argc, char **argv) {
printf("%lld\n", value);
}
printf("\n");
+ zfree(zl);
}
printf("Get element at index -4 (first element):\n");
@@ -1173,6 +1306,7 @@ int main(int argc, char **argv) {
printf("%lld\n", value);
}
printf("\n");
+ zfree(zl);
}
printf("Get element at index -5 (reverse out of range):\n");
@@ -1186,6 +1320,7 @@ int main(int argc, char **argv) {
return 1;
}
printf("\n");
+ zfree(zl);
}
printf("Iterate list from 0 to end:\n");
@@ -1203,6 +1338,7 @@ int main(int argc, char **argv) {
printf("\n");
}
printf("\n");
+ zfree(zl);
}
printf("Iterate list from 1 to end:\n");
@@ -1220,6 +1356,7 @@ int main(int argc, char **argv) {
printf("\n");
}
printf("\n");
+ zfree(zl);
}
printf("Iterate list from 2 to end:\n");
@@ -1237,6 +1374,7 @@ int main(int argc, char **argv) {
printf("\n");
}
printf("\n");
+ zfree(zl);
}
printf("Iterate starting out of range:\n");
@@ -1249,6 +1387,7 @@ int main(int argc, char **argv) {
printf("ERROR\n");
}
printf("\n");
+ zfree(zl);
}
printf("Iterate from back to front:\n");
@@ -1266,6 +1405,7 @@ int main(int argc, char **argv) {
printf("\n");
}
printf("\n");
+ zfree(zl);
}
printf("Iterate from back to front, deleting all items:\n");
@@ -1284,6 +1424,7 @@ int main(int argc, char **argv) {
printf("\n");
}
printf("\n");
+ zfree(zl);
}
printf("Delete inclusive range 0,0:\n");
@@ -1291,6 +1432,7 @@ int main(int argc, char **argv) {
zl = createList();
zl = ziplistDeleteRange(zl, 0, 1);
ziplistRepr(zl);
+ zfree(zl);
}
printf("Delete inclusive range 0,1:\n");
@@ -1298,6 +1440,7 @@ int main(int argc, char **argv) {
zl = createList();
zl = ziplistDeleteRange(zl, 0, 2);
ziplistRepr(zl);
+ zfree(zl);
}
printf("Delete inclusive range 1,2:\n");
@@ -1305,6 +1448,7 @@ int main(int argc, char **argv) {
zl = createList();
zl = ziplistDeleteRange(zl, 1, 2);
ziplistRepr(zl);
+ zfree(zl);
}
printf("Delete with start index out of range:\n");
@@ -1312,6 +1456,7 @@ int main(int argc, char **argv) {
zl = createList();
zl = ziplistDeleteRange(zl, 5, 1);
ziplistRepr(zl);
+ zfree(zl);
}
printf("Delete with num overflow:\n");
@@ -1319,6 +1464,7 @@ int main(int argc, char **argv) {
zl = createList();
zl = ziplistDeleteRange(zl, 1, 5);
ziplistRepr(zl);
+ zfree(zl);
}
printf("Delete foo while iterating:\n");
@@ -1343,11 +1489,12 @@ int main(int argc, char **argv) {
}
printf("\n");
ziplistRepr(zl);
+ zfree(zl);
}
printf("Regression test for >255 byte strings:\n");
{
- char v1[257],v2[257];
+ char v1[257] = {0}, v2[257] = {0};
memset(v1,'x',256);
memset(v2,'y',256);
zl = ziplistNew();
@@ -1362,13 +1509,15 @@ int main(int argc, char **argv) {
assert(ziplistGet(p,&entry,&elen,&value));
assert(strncmp(v2,(char*)entry,elen) == 0);
printf("SUCCESS\n\n");
+ zfree(zl);
}
printf("Regression test deleting next to last entries:\n");
{
- char v[3][257];
- zlentry e[3];
- int i;
+ char v[3][257] = {{0}};
+ zlentry e[3] = {{.prevrawlensize = 0, .prevrawlen = 0, .lensize = 0,
+ .len = 0, .headersize = 0, .encoding = 0, .p = NULL}};
+ size_t i;
for (i = 0; i < (sizeof(v)/sizeof(v[0])); i++) {
memset(v[i], 'a' + i, sizeof(v[0]));
@@ -1399,6 +1548,7 @@ int main(int argc, char **argv) {
assert(e[1].prevrawlensize == 5);
printf("SUCCESS\n\n");
+ zfree(zl);
}
printf("Create long list and check indices:\n");
@@ -1420,6 +1570,7 @@ int main(int argc, char **argv) {
assert(999-i == value);
}
printf("SUCCESS\n\n");
+ zfree(zl);
}
printf("Compare strings with ziplist entries:\n");
@@ -1445,6 +1596,82 @@ int main(int argc, char **argv) {
return 1;
}
printf("SUCCESS\n\n");
+ zfree(zl);
+ }
+
+ printf("Merge test:\n");
+ {
+ /* create list gives us: [hello, foo, quux, 1024] */
+ zl = createList();
+ unsigned char *zl2 = createList();
+
+ unsigned char *zl3 = ziplistNew();
+ unsigned char *zl4 = ziplistNew();
+
+ if (ziplistMerge(&zl4, &zl4)) {
+ printf("ERROR: Allowed merging of one ziplist into itself.\n");
+ return 1;
+ }
+
+ /* Merge two empty ziplists, get empty result back. */
+ zl4 = ziplistMerge(&zl3, &zl4);
+ ziplistRepr(zl4);
+ if (ziplistLen(zl4)) {
+ printf("ERROR: Merging two empty ziplists created entries.\n");
+ return 1;
+ }
+ zfree(zl4);
+
+ zl2 = ziplistMerge(&zl, &zl2);
+ /* merge gives us: [hello, foo, quux, 1024, hello, foo, quux, 1024] */
+ ziplistRepr(zl2);
+
+ if (ziplistLen(zl2) != 8) {
+ printf("ERROR: Merged length not 8, but: %u\n", ziplistLen(zl2));
+ return 1;
+ }
+
+ p = ziplistIndex(zl2,0);
+ if (!ziplistCompare(p,(unsigned char*)"hello",5)) {
+ printf("ERROR: not \"hello\"\n");
+ return 1;
+ }
+ if (ziplistCompare(p,(unsigned char*)"hella",5)) {
+ printf("ERROR: \"hella\"\n");
+ return 1;
+ }
+
+ p = ziplistIndex(zl2,3);
+ if (!ziplistCompare(p,(unsigned char*)"1024",4)) {
+ printf("ERROR: not \"1024\"\n");
+ return 1;
+ }
+ if (ziplistCompare(p,(unsigned char*)"1025",4)) {
+ printf("ERROR: \"1025\"\n");
+ return 1;
+ }
+
+ p = ziplistIndex(zl2,4);
+ if (!ziplistCompare(p,(unsigned char*)"hello",5)) {
+ printf("ERROR: not \"hello\"\n");
+ return 1;
+ }
+ if (ziplistCompare(p,(unsigned char*)"hella",5)) {
+ printf("ERROR: \"hella\"\n");
+ return 1;
+ }
+
+ p = ziplistIndex(zl2,7);
+ if (!ziplistCompare(p,(unsigned char*)"1024",4)) {
+ printf("ERROR: not \"1024\"\n");
+ return 1;
+ }
+ if (ziplistCompare(p,(unsigned char*)"1025",4)) {
+ printf("ERROR: \"1025\"\n");
+ return 1;
+ }
+ printf("SUCCESS\n\n");
+ zfree(zl);
}
printf("Stress with random payloads of different encoding:\n");
@@ -1464,7 +1691,7 @@ int main(int argc, char **argv) {
for (i = 0; i < 20000; i++) {
zl = ziplistNew();
ref = listCreate();
- listSetFreeMethod(ref,sdsfree);
+ listSetFreeMethod(ref,(void (*)(void*))sdsfree);
len = rand() % 256;
/* Create lists */
@@ -1532,5 +1759,4 @@ int main(int argc, char **argv) {
return 0;
}
-
#endif
diff --git a/src/ziplist.h b/src/ziplist.h
index b29c34167..e92b5e783 100644
--- a/src/ziplist.h
+++ b/src/ziplist.h
@@ -32,6 +32,7 @@
#define ZIPLIST_TAIL 1
unsigned char *ziplistNew(void);
+unsigned char *ziplistMerge(unsigned char **first, unsigned char **second);
unsigned char *ziplistPush(unsigned char *zl, unsigned char *s, unsigned int slen, int where);
unsigned char *ziplistIndex(unsigned char *zl, int index);
unsigned char *ziplistNext(unsigned char *zl, unsigned char *p);
@@ -39,8 +40,12 @@ unsigned char *ziplistPrev(unsigned char *zl, unsigned char *p);
unsigned int ziplistGet(unsigned char *p, unsigned char **sval, unsigned int *slen, long long *lval);
unsigned char *ziplistInsert(unsigned char *zl, unsigned char *p, unsigned char *s, unsigned int slen);
unsigned char *ziplistDelete(unsigned char *zl, unsigned char **p);
-unsigned char *ziplistDeleteRange(unsigned char *zl, unsigned int index, unsigned int num);
+unsigned char *ziplistDeleteRange(unsigned char *zl, int index, unsigned int num);
unsigned int ziplistCompare(unsigned char *p, unsigned char *s, unsigned int slen);
unsigned char *ziplistFind(unsigned char *p, unsigned char *vstr, unsigned int vlen, unsigned int skip);
unsigned int ziplistLen(unsigned char *zl);
size_t ziplistBlobLen(unsigned char *zl);
+
+#ifdef REDIS_TEST
+int ziplistTest(int argc, char *argv[]);
+#endif
diff --git a/src/zipmap.c b/src/zipmap.c
index 384b76bba..22bfa1a46 100644
--- a/src/zipmap.c
+++ b/src/zipmap.c
@@ -370,8 +370,8 @@ size_t zipmapBlobLen(unsigned char *zm) {
return totlen;
}
-#ifdef ZIPMAP_TEST_MAIN
-void zipmapRepr(unsigned char *p) {
+#ifdef REDIS_TEST
+static void zipmapRepr(unsigned char *p) {
unsigned int l;
printf("{status %u}",*p++);
@@ -404,9 +404,13 @@ void zipmapRepr(unsigned char *p) {
printf("\n");
}
-int main(void) {
+#define UNUSED(x) (void)(x)
+int zipmapTest(int argc, char *argv[]) {
unsigned char *zm;
+ UNUSED(argc);
+ UNUSED(argv);
+
zm = zipmapNew();
zm = zipmapSet(zm,(unsigned char*) "name",4, (unsigned char*) "foo",3,NULL);
diff --git a/src/zipmap.h b/src/zipmap.h
index 9cf1b2484..ac588f05a 100644
--- a/src/zipmap.h
+++ b/src/zipmap.h
@@ -46,4 +46,8 @@ unsigned int zipmapLen(unsigned char *zm);
size_t zipmapBlobLen(unsigned char *zm);
void zipmapRepr(unsigned char *p);
+#ifdef REDIS_TEST
+int zipmapTest(int argc, char *argv[]);
+#endif
+
#endif
diff --git a/src/zmalloc.c b/src/zmalloc.c
index 6df51a80f..640ee19e2 100644
--- a/src/zmalloc.c
+++ b/src/zmalloc.c
@@ -364,3 +364,60 @@ size_t zmalloc_get_smap_bytes_by_field(char *field) {
size_t zmalloc_get_private_dirty(void) {
return zmalloc_get_smap_bytes_by_field("Private_Dirty:");
}
+
+/* Returns the size of physical memory (RAM) in bytes.
+ * It looks ugly, but this is the cleanest way to achive cross platform results.
+ * Cleaned up from:
+ *
+ * http://nadeausoftware.com/articles/2012/09/c_c_tip_how_get_physical_memory_size_system
+ *
+ * Note that this function:
+ * 1) Was released under the following CC attribution license:
+ * http://creativecommons.org/licenses/by/3.0/deed.en_US.
+ * 2) Was originally implemented by David Robert Nadeau.
+ * 3) Was modified for Redis by Matt Stancliff.
+ * 4) This note exists in order to comply with the original license.
+ */
+size_t zmalloc_get_memory_size(void) {
+#if defined(__unix__) || defined(__unix) || defined(unix) || \
+ (defined(__APPLE__) && defined(__MACH__))
+#if defined(CTL_HW) && (defined(HW_MEMSIZE) || defined(HW_PHYSMEM64))
+ int mib[2];
+ mib[0] = CTL_HW;
+#if defined(HW_MEMSIZE)
+ mib[1] = HW_MEMSIZE; /* OSX. --------------------- */
+#elif defined(HW_PHYSMEM64)
+ mib[1] = HW_PHYSMEM64; /* NetBSD, OpenBSD. --------- */
+#endif
+ int64_t size = 0; /* 64-bit */
+ size_t len = sizeof(size);
+ if (sysctl( mib, 2, &size, &len, NULL, 0) == 0)
+ return (size_t)size;
+ return 0L; /* Failed? */
+
+#elif defined(_SC_PHYS_PAGES) && defined(_SC_PAGESIZE)
+ /* FreeBSD, Linux, OpenBSD, and Solaris. -------------------- */
+ return (size_t)sysconf(_SC_PHYS_PAGES) * (size_t)sysconf(_SC_PAGESIZE);
+
+#elif defined(CTL_HW) && (defined(HW_PHYSMEM) || defined(HW_REALMEM))
+ /* DragonFly BSD, FreeBSD, NetBSD, OpenBSD, and OSX. -------- */
+ int mib[2];
+ mib[0] = CTL_HW;
+#if defined(HW_REALMEM)
+ mib[1] = HW_REALMEM; /* FreeBSD. ----------------- */
+#elif defined(HW_PYSMEM)
+ mib[1] = HW_PHYSMEM; /* Others. ------------------ */
+#endif
+ unsigned int size = 0; /* 32-bit */
+ size_t len = sizeof(size);
+ if (sysctl(mib, 2, &size, &len, NULL, 0) == 0)
+ return (size_t)size;
+ return 0L; /* Failed? */
+#endif /* sysctl and sysconf variants */
+
+#else
+ return 0L; /* Unknown OS. */
+#endif
+}
+
+
diff --git a/src/zmalloc.h b/src/zmalloc.h
index 4de2cffea..a47ea6ccf 100644
--- a/src/zmalloc.h
+++ b/src/zmalloc.h
@@ -77,6 +77,7 @@ float zmalloc_get_fragmentation_ratio(size_t rss);
size_t zmalloc_get_rss(void);
size_t zmalloc_get_private_dirty(void);
size_t zmalloc_get_smap_bytes_by_field(char *field);
+size_t zmalloc_get_memory_size(void);
void zlibc_free(void *ptr);
#ifndef HAVE_MALLOC_SIZE
diff --git a/tests/cluster/run.tcl b/tests/cluster/run.tcl
index 69a160c4f..f764cea0a 100644
--- a/tests/cluster/run.tcl
+++ b/tests/cluster/run.tcl
@@ -21,6 +21,7 @@ proc main {} {
if {[catch main e]} {
puts $::errorInfo
+ if {$::pause_on_error} pause_on_error
cleanup
exit 1
}
diff --git a/tests/cluster/tests/04-resharding.tcl b/tests/cluster/tests/04-resharding.tcl
index b9e772351..8811762c6 100644
--- a/tests/cluster/tests/04-resharding.tcl
+++ b/tests/cluster/tests/04-resharding.tcl
@@ -66,9 +66,18 @@ test "Cluster consistency during live resharding" {
}
# Write random data to random list.
- set key "key:[randomInt $numkeys]"
+ set listid [randomInt $numkeys]
+ set key "key:$listid"
set ele [randomValue]
- $cluster rpush $key $ele
+ # We write both with Lua scripts and with plain commands.
+ # This way we are able to stress Lua -> Redis command invocation
+ # as well, that has tests to prevent Lua to write into wrong
+ # hash slots.
+ if {$listid % 2} {
+ $cluster rpush $key $ele
+ } else {
+ $cluster eval {redis.call("rpush",KEYS[1],ARGV[1])} 1 $key $ele
+ }
lappend content($key) $ele
if {($j % 1000) == 0} {
diff --git a/tests/cluster/tests/includes/init-tests.tcl b/tests/cluster/tests/includes/init-tests.tcl
index 65fc806e1..466ab8f25 100644
--- a/tests/cluster/tests/includes/init-tests.tcl
+++ b/tests/cluster/tests/includes/init-tests.tcl
@@ -27,10 +27,17 @@ test "Cluster nodes are reachable" {
test "Cluster nodes hard reset" {
foreach_redis_id id {
+ if {$::valgrind} {
+ set node_timeout 10000
+ } else {
+ set node_timeout 3000
+ }
catch {R $id flushall} ; # May fail for readonly slaves.
+ R $id MULTI
R $id cluster reset hard
R $id cluster set-config-epoch [expr {$id+1}]
- R $id config set cluster-node-timeout 3000
+ R $id EXEC
+ R $id config set cluster-node-timeout $node_timeout
R $id config set cluster-slave-validity-factor 10
R $id config rewrite
}
diff --git a/tests/instances.tcl b/tests/instances.tcl
index 426508f33..353d9b2d2 100644
--- a/tests/instances.tcl
+++ b/tests/instances.tcl
@@ -16,6 +16,7 @@ source ../support/server.tcl
source ../support/test.tcl
set ::verbose 0
+set ::valgrind 0
set ::pause_on_error 0
set ::simulate_error 0
set ::sentinel_instances {}
@@ -32,6 +33,25 @@ if {[catch {cd tmp}]} {
exit 1
}
+# Execute the specified instance of the server specified by 'type', using
+# the provided configuration file. Returns the PID of the process.
+proc exec_instance {type cfgfile} {
+ if {$type eq "redis"} {
+ set prgname redis-server
+ } elseif {$type eq "sentinel"} {
+ set prgname redis-sentinel
+ } else {
+ error "Unknown instance type."
+ }
+
+ if {$::valgrind} {
+ set pid [exec valgrind --track-origins=yes --suppressions=../../../src/valgrind.sup --show-reachable=no --show-possibly-lost=no --leak-check=full ../../../src/${prgname} $cfgfile &]
+ } else {
+ set pid [exec ../../../src/${prgname} $cfgfile &]
+ }
+ return $pid
+}
+
# Spawn a redis or sentinel instance, depending on 'type'.
proc spawn_instance {type base_port count {conf {}}} {
for {set j 0} {$j < $count} {incr j} {
@@ -58,14 +78,7 @@ proc spawn_instance {type base_port count {conf {}}} {
close $cfg
# Finally exec it and remember the pid for later cleanup.
- if {$type eq "redis"} {
- set prgname redis-server
- } elseif {$type eq "sentinel"} {
- set prgname redis-sentinel
- } else {
- error "Unknown instance type."
- }
- set pid [exec ../../../src/${prgname} $cfgfile &]
+ set pid [exec_instance $type $cfgfile]
lappend ::pids $pid
# Check availability
@@ -98,6 +111,7 @@ proc cleanup {} {
proc abort_sentinel_test msg {
puts "WARNING: Aborting the test."
puts ">>>>>>>> $msg"
+ if {$::pause_on_error} pause_on_error
cleanup
exit 1
}
@@ -113,6 +127,8 @@ proc parse_options {} {
set ::pause_on_error 1
} elseif {$opt eq "--fail"} {
set ::simulate_error 1
+ } elseif {$opt eq {--valgrind}} {
+ set ::valgrind 1
} elseif {$opt eq "--help"} {
puts "Hello, I'm sentinel.tcl and I run Sentinel unit tests."
puts "\nOptions:"
@@ -360,15 +376,31 @@ proc get_instance_id_by_port {type port} {
# The instance can be restarted with restart-instance.
proc kill_instance {type id} {
set pid [get_instance_attrib $type $id pid]
+ set port [get_instance_attrib $type $id port]
+
if {$pid == -1} {
error "You tried to kill $type $id twice."
}
+
exec kill -9 $pid
set_instance_attrib $type $id pid -1
set_instance_attrib $type $id link you_tried_to_talk_with_killed_instance
# Remove the PID from the list of pids to kill at exit.
set ::pids [lsearch -all -inline -not -exact $::pids $pid]
+
+ # Wait for the port it was using to be available again, so that's not
+ # an issue to start a new server ASAP with the same port.
+ set retry 10
+ while {[incr retry -1]} {
+ set port_is_free [catch {set s [socket 127.0.01 $port]}]
+ if {$port_is_free} break
+ catch {close $s}
+ after 1000
+ }
+ if {$retry == 0} {
+ error "Port $port does not return available after killing instance."
+ }
}
# Return true of the instance of the specified type/id is killed.
@@ -385,12 +417,7 @@ proc restart_instance {type id} {
# Execute the instance with its old setup and append the new pid
# file for cleanup.
- if {$type eq "redis"} {
- set prgname redis-server
- } else {
- set prgname redis-sentinel
- }
- set pid [exec ../../../src/${prgname} $cfgfile &]
+ set pid [exec_instance $type $cfgfile]
set_instance_attrib $type $id pid $pid
lappend ::pids $pid
diff --git a/tests/integration/aof.tcl b/tests/integration/aof.tcl
index 7ea70943c..832f996e1 100644
--- a/tests/integration/aof.tcl
+++ b/tests/integration/aof.tcl
@@ -204,6 +204,30 @@ tags {"aof"} {
}
}
+ ## Uses the alsoPropagate() API.
+ create_aof {
+ append_to_aof [formatCommand sadd set foo]
+ append_to_aof [formatCommand sadd set bar]
+ append_to_aof [formatCommand sadd set gah]
+ append_to_aof [formatCommand spop set 2]
+ }
+
+ start_server_aof [list dir $server_path] {
+ test "AOF+SPOP: Server should have been started" {
+ assert_equal 1 [is_alive $srv]
+ }
+
+ test "AOF+SPOP: Set should have 1 member" {
+ set client [redis [dict get $srv host] [dict get $srv port]]
+ wait_for_condition 50 100 {
+ [catch {$client ping} e] == 0
+ } else {
+ fail "Loading DB is taking too much time."
+ }
+ assert_equal 1 [$client scard set]
+ }
+ }
+
## Test that EXPIREAT is loaded correctly
create_aof {
append_to_aof [formatCommand rpush list foo]
diff --git a/tests/integration/logging.tcl b/tests/integration/logging.tcl
new file mode 100644
index 000000000..c1f4854d4
--- /dev/null
+++ b/tests/integration/logging.tcl
@@ -0,0 +1,24 @@
+set server_path [tmpdir server.log]
+set system_name [string tolower [exec uname -s]]
+
+if {$system_name eq {linux} || $system_name eq {darwin}} {
+ start_server [list overrides [list dir $server_path]] {
+ test "Server is able to generate a stack trace on selected systems" {
+ r config set watchdog-period 200
+ r debug sleep 1
+ set pattern "*debugCommand*"
+ set retry 10
+ while {$retry} {
+ set result [exec tail -100 < [srv 0 stdout]]
+ if {[string match $pattern $result]} {
+ break
+ }
+ incr retry -1
+ after 1000
+ }
+ if {$retry == 0} {
+ error "assertion:expected stack trace not found into log file"
+ }
+ }
+ }
+}
diff --git a/tests/integration/rdb.tcl b/tests/integration/rdb.tcl
index 71876a6ed..a2f187ad4 100644
--- a/tests/integration/rdb.tcl
+++ b/tests/integration/rdb.tcl
@@ -90,7 +90,7 @@ start_server_and_kill_it [list "dir" $server_path] {
test {Server should not start if RDB is corrupted} {
wait_for_condition 50 100 {
[string match {*RDB checksum*} \
- [exec tail -n1 < [dict get $srv stdout]]]
+ [exec tail -n10 < [dict get $srv stdout]]]
} else {
fail "Server started even if RDB was corrupted!"
}
diff --git a/tests/integration/replication-4.tcl b/tests/integration/replication-4.tcl
index 6db9ffe2b..3af2e921d 100644
--- a/tests/integration/replication-4.tcl
+++ b/tests/integration/replication-4.tcl
@@ -132,5 +132,24 @@ start_server {tags {"repl"}} {
}
assert {[$master dbsize] > 0}
}
+
+ test {Replication of SPOP command -- alsoPropagate() API} {
+ $master del myset
+ set size [randomInt 100]
+ set content {}
+ for {set j 0} {$j < $size} {incr j} {
+ lappend content [randomValue]
+ }
+ $master sadd myset {*}$content
+
+ set count [randomInt 100]
+ set result [$master spop myset $count]
+
+ wait_for_condition 50 100 {
+ [$master debug digest] eq [$slave debug digest]
+ } else {
+ fail "SPOP replication inconsistency"
+ }
+ }
}
}
diff --git a/tests/support/cluster.tcl b/tests/support/cluster.tcl
index b007e3b05..d4e7d2e5d 100644
--- a/tests/support/cluster.tcl
+++ b/tests/support/cluster.tcl
@@ -226,6 +226,8 @@ proc ::redis_cluster::get_keys_from_command {cmd argv} {
# Special handling for other commands
switch -exact $cmd {
mget {return $argv}
+ eval {return [lrange $argv 2 1+[lindex $argv 1]]}
+ evalsha {return [lrange $argv 2 1+[lindex $argv 1]]}
}
# All the remaining commands are not handled.
diff --git a/tests/support/server.tcl b/tests/support/server.tcl
index 67ee24528..317b40a84 100644
--- a/tests/support/server.tcl
+++ b/tests/support/server.tcl
@@ -207,7 +207,7 @@ proc start_server {options {code undefined}} {
set stderr [format "%s/%s" [dict get $config "dir"] "stderr"]
if {$::valgrind} {
- set pid [exec valgrind --suppressions=src/valgrind.sup --show-reachable=no --show-possibly-lost=no --leak-check=full src/redis-server $config_file > $stdout 2> $stderr &]
+ set pid [exec valgrind --track-origins=yes --suppressions=src/valgrind.sup --show-reachable=no --show-possibly-lost=no --leak-check=full src/redis-server $config_file > $stdout 2> $stderr &]
} else {
set pid [exec src/redis-server $config_file > $stdout 2> $stderr &]
}
diff --git a/tests/support/test.tcl b/tests/support/test.tcl
index 7d390cc47..31371c567 100644
--- a/tests/support/test.tcl
+++ b/tests/support/test.tcl
@@ -19,9 +19,12 @@ proc assert_match {pattern value} {
}
}
-proc assert_equal {expected value} {
+proc assert_equal {expected value {detail ""}} {
if {$expected ne $value} {
- error "assertion:Expected '$value' to be equal to '$expected'"
+ if {$detail ne ""} {
+ set detail " (detail: $detail)"
+ }
+ error "assertion:Expected '$value' to be equal to '$expected'$detail"
}
}
diff --git a/tests/test_helper.tcl b/tests/test_helper.tcl
index 212c95b4f..7e9e2cfaa 100644
--- a/tests/test_helper.tcl
+++ b/tests/test_helper.tcl
@@ -38,6 +38,7 @@ set ::all_tests {
integration/aof
integration/rdb
integration/convert-zipmap-hash-on-load
+ integration/logging
unit/pubsub
unit/slowlog
unit/scripting
diff --git a/tests/unit/aofrw.tcl b/tests/unit/aofrw.tcl
index a2d74168f..4fdbdc6c6 100644
--- a/tests/unit/aofrw.tcl
+++ b/tests/unit/aofrw.tcl
@@ -77,10 +77,10 @@ start_server {tags {"aofrw"}} {
}
foreach d {string int} {
- foreach e {ziplist linkedlist} {
+ foreach e {quicklist} {
test "AOF rewrite of list with $e encoding, $d data" {
r flushall
- if {$e eq {ziplist}} {set len 10} else {set len 1000}
+ set len 1000
for {set j 0} {$j < $len} {incr j} {
if {$d eq {string}} {
set data [randstring 0 16 alpha]
diff --git a/tests/unit/basic.tcl b/tests/unit/basic.tcl
index b0b3b9bac..fec0df5ec 100644
--- a/tests/unit/basic.tcl
+++ b/tests/unit/basic.tcl
@@ -368,7 +368,18 @@ start_server {tags {"basic"}} {
format $err
} {ERR*}
- test {RENAME where source and dest key is the same} {
+ test {RENAME where source and dest key are the same (existing)} {
+ r set mykey foo
+ r rename mykey mykey
+ } {OK}
+
+ test {RENAMENX where source and dest key are the same (existing)} {
+ r set mykey foo
+ r renamenx mykey mykey
+ } {0}
+
+ test {RENAME where source and dest key are the same (non existing)} {
+ r del mykey
catch {r rename mykey mykey} err
format $err
} {ERR*}
diff --git a/tests/unit/dump.tcl b/tests/unit/dump.tcl
index d39204f9f..5af53db8d 100644
--- a/tests/unit/dump.tcl
+++ b/tests/unit/dump.tcl
@@ -157,7 +157,7 @@ start_server {tags {"dump"}} {
test {MIGRATE can correctly transfer large values} {
set first [srv 0 client]
r del key
- for {set j 0} {$j < 5000} {incr j} {
+ for {set j 0} {$j < 40000} {incr j} {
r rpush key 1 2 3 4 5 6 7 8 9 10
r rpush key "item 1" "item 2" "item 3" "item 4" "item 5" \
"item 6" "item 7" "item 8" "item 9" "item 10"
@@ -175,7 +175,7 @@ start_server {tags {"dump"}} {
assert {[$first exists key] == 0}
assert {[$second exists key] == 1}
assert {[$second ttl key] == -1}
- assert {[$second llen key] == 5000*20}
+ assert {[$second llen key] == 40000*20}
}
}
diff --git a/tests/unit/memefficiency.tcl b/tests/unit/memefficiency.tcl
index 14e135ced..7ca9a705b 100644
--- a/tests/unit/memefficiency.tcl
+++ b/tests/unit/memefficiency.tcl
@@ -1,15 +1,20 @@
proc test_memory_efficiency {range} {
r flushall
+ set rd [redis_deferring_client]
set base_mem [s used_memory]
set written 0
for {set j 0} {$j < 10000} {incr j} {
set key key:$j
set val [string repeat A [expr {int(rand()*$range)}]]
- r set $key $val
+ $rd set $key $val
incr written [string length $key]
incr written [string length $val]
incr written 2 ;# A separator is the minimum to store key-value data.
}
+ for {set j 0} {$j < 10000} {incr j} {
+ $rd read ; # Discard replies
+ }
+
set current_mem [s used_memory]
set used [expr {$current_mem-$base_mem}]
set efficiency [expr {double($written)/$used}]
diff --git a/tests/unit/scripting.tcl b/tests/unit/scripting.tcl
index e1cd2174b..921382e34 100644
--- a/tests/unit/scripting.tcl
+++ b/tests/unit/scripting.tcl
@@ -413,7 +413,7 @@ start_server {tags {"scripting"}} {
r sadd myset a b c
r mset a 1 b 2 c 3 d 4
assert {[r spop myset] ne {}}
- assert {[r spop myset] ne {}}
+ assert {[r spop myset 1] ne {}}
assert {[r spop myset] ne {}}
assert {[r mget a b c d] eq {1 2 3 4}}
assert {[r spop myset] eq {}}
diff --git a/tests/unit/sort.tcl b/tests/unit/sort.tcl
index 8ebd75965..083c4540d 100644
--- a/tests/unit/sort.tcl
+++ b/tests/unit/sort.tcl
@@ -1,8 +1,7 @@
start_server {
tags {"sort"}
overrides {
- "list-max-ziplist-value" 16
- "list-max-ziplist-entries" 32
+ "list-max-ziplist-size" 32
"set-max-intset-entries" 32
}
} {
@@ -36,9 +35,9 @@ start_server {
}
foreach {num cmd enc title} {
- 16 lpush ziplist "Ziplist"
- 1000 lpush linkedlist "Linked list"
- 10000 lpush linkedlist "Big Linked list"
+ 16 lpush quicklist "Old Ziplist"
+ 1000 lpush quicklist "Old Linked list"
+ 10000 lpush quicklist "Old Big Linked list"
16 sadd intset "Intset"
1000 sadd hashtable "Hash table"
10000 sadd hashtable "Big Hash table"
@@ -85,14 +84,14 @@ start_server {
r sort tosort BY weight_* store sort-res
assert_equal $result [r lrange sort-res 0 -1]
assert_equal 16 [r llen sort-res]
- assert_encoding ziplist sort-res
+ assert_encoding quicklist sort-res
}
test "SORT BY hash field STORE" {
r sort tosort BY wobj_*->weight store sort-res
assert_equal $result [r lrange sort-res 0 -1]
assert_equal 16 [r llen sort-res]
- assert_encoding ziplist sort-res
+ assert_encoding quicklist sort-res
}
test "SORT extracts STORE correctly" {
diff --git a/tests/unit/type/list-2.tcl b/tests/unit/type/list-2.tcl
index bf6a055eb..4c7d6d91c 100644
--- a/tests/unit/type/list-2.tcl
+++ b/tests/unit/type/list-2.tcl
@@ -1,8 +1,7 @@
start_server {
tags {"list"}
overrides {
- "list-max-ziplist-value" 16
- "list-max-ziplist-entries" 256
+ "list-max-ziplist-size" 4
}
} {
source "tests/unit/type/list-common.tcl"
@@ -28,14 +27,18 @@ start_server {
for {set i 0} {$i < 1000} {incr i} {
set min [expr {int(rand()*$startlen)}]
set max [expr {$min+int(rand()*$startlen)}]
+ set before_len [llength $mylist]
+ set before_len_r [r llen mylist]
set mylist [lrange $mylist $min $max]
r ltrim mylist $min $max
- assert_equal $mylist [r lrange mylist 0 -1]
+ assert_equal $mylist [r lrange mylist 0 -1] "failed trim"
+ set starting [r llen mylist]
for {set j [r llen mylist]} {$j < $startlen} {incr j} {
set str [randomInt 9223372036854775807]
r rpush mylist $str
lappend mylist $str
+ assert_equal $mylist [r lrange mylist 0 -1] "failed append match"
}
}
}
diff --git a/tests/unit/type/list-3.tcl b/tests/unit/type/list-3.tcl
index 94f9a0b79..ece6ea2d5 100644
--- a/tests/unit/type/list-3.tcl
+++ b/tests/unit/type/list-3.tcl
@@ -1,8 +1,7 @@
start_server {
tags {list ziplist}
overrides {
- "list-max-ziplist-value" 200000
- "list-max-ziplist-entries" 256
+ "list-max-ziplist-size" 16
}
} {
test {Explicit regression for a list bug} {
diff --git a/tests/unit/type/list.tcl b/tests/unit/type/list.tcl
index c8e26602b..e4d568cf1 100644
--- a/tests/unit/type/list.tcl
+++ b/tests/unit/type/list.tcl
@@ -1,25 +1,24 @@
start_server {
tags {"list"}
overrides {
- "list-max-ziplist-value" 16
- "list-max-ziplist-entries" 256
+ "list-max-ziplist-size" 5
}
} {
source "tests/unit/type/list-common.tcl"
test {LPUSH, RPUSH, LLENGTH, LINDEX, LPOP - ziplist} {
# first lpush then rpush
- assert_equal 1 [r lpush myziplist1 a]
- assert_equal 2 [r rpush myziplist1 b]
- assert_equal 3 [r rpush myziplist1 c]
+ assert_equal 1 [r lpush myziplist1 aa]
+ assert_equal 2 [r rpush myziplist1 bb]
+ assert_equal 3 [r rpush myziplist1 cc]
assert_equal 3 [r llen myziplist1]
- assert_equal a [r lindex myziplist1 0]
- assert_equal b [r lindex myziplist1 1]
- assert_equal c [r lindex myziplist1 2]
+ assert_equal aa [r lindex myziplist1 0]
+ assert_equal bb [r lindex myziplist1 1]
+ assert_equal cc [r lindex myziplist1 2]
assert_equal {} [r lindex myziplist2 3]
- assert_equal c [r rpop myziplist1]
- assert_equal a [r lpop myziplist1]
- assert_encoding ziplist myziplist1
+ assert_equal cc [r rpop myziplist1]
+ assert_equal aa [r lpop myziplist1]
+ assert_encoding quicklist myziplist1
# first rpush then lpush
assert_equal 1 [r rpush myziplist2 a]
@@ -32,13 +31,13 @@ start_server {
assert_equal {} [r lindex myziplist2 3]
assert_equal a [r rpop myziplist2]
assert_equal c [r lpop myziplist2]
- assert_encoding ziplist myziplist2
+ assert_encoding quicklist myziplist2
}
test {LPUSH, RPUSH, LLENGTH, LINDEX, LPOP - regular list} {
# first lpush then rpush
assert_equal 1 [r lpush mylist1 $largevalue(linkedlist)]
- assert_encoding linkedlist mylist1
+ assert_encoding quicklist mylist1
assert_equal 2 [r rpush mylist1 b]
assert_equal 3 [r rpush mylist1 c]
assert_equal 3 [r llen mylist1]
@@ -51,7 +50,7 @@ start_server {
# first rpush then lpush
assert_equal 1 [r rpush mylist2 $largevalue(linkedlist)]
- assert_encoding linkedlist mylist2
+ assert_encoding quicklist mylist2
assert_equal 2 [r lpush mylist2 b]
assert_equal 3 [r lpush mylist2 c]
assert_equal 3 [r llen mylist2]
@@ -74,34 +73,22 @@ start_server {
assert_equal {d c b a 0 1 2 3} [r lrange mylist 0 -1]
}
- test {DEL a list - ziplist} {
- assert_equal 1 [r del myziplist2]
- assert_equal 0 [r exists myziplist2]
- assert_equal 0 [r llen myziplist2]
- }
-
- test {DEL a list - regular list} {
+ test {DEL a list} {
assert_equal 1 [r del mylist2]
assert_equal 0 [r exists mylist2]
assert_equal 0 [r llen mylist2]
}
- proc create_ziplist {key entries} {
- r del $key
- foreach entry $entries { r rpush $key $entry }
- assert_encoding ziplist $key
- }
-
- proc create_linkedlist {key entries} {
+ proc create_list {key entries} {
r del $key
foreach entry $entries { r rpush $key $entry }
- assert_encoding linkedlist $key
+ assert_encoding quicklist $key
}
foreach {type large} [array get largevalue] {
test "BLPOP, BRPOP: single existing list - $type" {
set rd [redis_deferring_client]
- create_$type blist "a b $large c d"
+ create_list blist "a b $large c d"
$rd blpop blist 1
assert_equal {blist a} [$rd read]
@@ -116,8 +103,8 @@ start_server {
test "BLPOP, BRPOP: multiple existing lists - $type" {
set rd [redis_deferring_client]
- create_$type blist1 "a $large c"
- create_$type blist2 "d $large f"
+ create_list blist1 "a $large c"
+ create_list blist2 "d $large f"
$rd blpop blist1 blist2 1
assert_equal {blist1 a} [$rd read]
@@ -137,7 +124,7 @@ start_server {
test "BLPOP, BRPOP: second list has an entry - $type" {
set rd [redis_deferring_client]
r del blist1
- create_$type blist2 "d $large f"
+ create_list blist2 "d $large f"
$rd blpop blist1 blist2 1
assert_equal {blist2 d} [$rd read]
@@ -151,7 +138,7 @@ start_server {
r del target
set rd [redis_deferring_client]
- create_$type blist "a b $large c d"
+ create_list blist "a b $large c d"
$rd brpoplpush blist target 1
assert_equal d [$rd read]
@@ -517,28 +504,28 @@ start_server {
foreach {type large} [array get largevalue] {
test "LPUSHX, RPUSHX - $type" {
- create_$type xlist "$large c"
+ create_list xlist "$large c"
assert_equal 3 [r rpushx xlist d]
assert_equal 4 [r lpushx xlist a]
assert_equal "a $large c d" [r lrange xlist 0 -1]
}
test "LINSERT - $type" {
- create_$type xlist "a $large c d"
- assert_equal 5 [r linsert xlist before c zz]
- assert_equal "a $large zz c d" [r lrange xlist 0 10]
- assert_equal 6 [r linsert xlist after c yy]
- assert_equal "a $large zz c yy d" [r lrange xlist 0 10]
- assert_equal 7 [r linsert xlist after d dd]
- assert_equal -1 [r linsert xlist after bad ddd]
- assert_equal "a $large zz c yy d dd" [r lrange xlist 0 10]
- assert_equal 8 [r linsert xlist before a aa]
- assert_equal -1 [r linsert xlist before bad aaa]
- assert_equal "aa a $large zz c yy d dd" [r lrange xlist 0 10]
+ create_list xlist "a $large c d"
+ assert_equal 5 [r linsert xlist before c zz] "before c"
+ assert_equal "a $large zz c d" [r lrange xlist 0 10] "lrangeA"
+ assert_equal 6 [r linsert xlist after c yy] "after c"
+ assert_equal "a $large zz c yy d" [r lrange xlist 0 10] "lrangeB"
+ assert_equal 7 [r linsert xlist after d dd] "after d"
+ assert_equal -1 [r linsert xlist after bad ddd] "after bad"
+ assert_equal "a $large zz c yy d dd" [r lrange xlist 0 10] "lrangeC"
+ assert_equal 8 [r linsert xlist before a aa] "before a"
+ assert_equal -1 [r linsert xlist before bad aaa] "before bad"
+ assert_equal "aa a $large zz c yy d dd" [r lrange xlist 0 10] "lrangeD"
# check inserting integer encoded value
- assert_equal 9 [r linsert xlist before aa 42]
- assert_equal 42 [r lrange xlist 0 0]
+ assert_equal 9 [r linsert xlist before aa 42] "before aa"
+ assert_equal 42 [r lrange xlist 0 0] "lrangeE"
}
}
@@ -547,55 +534,7 @@ start_server {
set e
} {*ERR*syntax*error*}
- test {LPUSHX, RPUSHX convert from ziplist to list} {
- set large $largevalue(linkedlist)
-
- # convert when a large value is pushed
- create_ziplist xlist a
- assert_equal 2 [r rpushx xlist $large]
- assert_encoding linkedlist xlist
- create_ziplist xlist a
- assert_equal 2 [r lpushx xlist $large]
- assert_encoding linkedlist xlist
-
- # convert when the length threshold is exceeded
- create_ziplist xlist [lrepeat 256 a]
- assert_equal 257 [r rpushx xlist b]
- assert_encoding linkedlist xlist
- create_ziplist xlist [lrepeat 256 a]
- assert_equal 257 [r lpushx xlist b]
- assert_encoding linkedlist xlist
- }
-
- test {LINSERT convert from ziplist to list} {
- set large $largevalue(linkedlist)
-
- # convert when a large value is inserted
- create_ziplist xlist a
- assert_equal 2 [r linsert xlist before a $large]
- assert_encoding linkedlist xlist
- create_ziplist xlist a
- assert_equal 2 [r linsert xlist after a $large]
- assert_encoding linkedlist xlist
-
- # convert when the length threshold is exceeded
- create_ziplist xlist [lrepeat 256 a]
- assert_equal 257 [r linsert xlist before a a]
- assert_encoding linkedlist xlist
- create_ziplist xlist [lrepeat 256 a]
- assert_equal 257 [r linsert xlist after a a]
- assert_encoding linkedlist xlist
-
- # don't convert when the value could not be inserted
- create_ziplist xlist [lrepeat 256 a]
- assert_equal -1 [r linsert xlist before foo a]
- assert_encoding ziplist xlist
- create_ziplist xlist [lrepeat 256 a]
- assert_equal -1 [r linsert xlist after foo a]
- assert_encoding ziplist xlist
- }
-
- foreach {type num} {ziplist 250 linkedlist 500} {
+ foreach {type num} {quicklist 250 quicklist 500} {
proc check_numbered_list_consistency {key} {
set len [r llen $key]
for {set i 0} {$i < $len} {incr i} {
@@ -664,16 +603,16 @@ start_server {
foreach {type large} [array get largevalue] {
test "RPOPLPUSH base case - $type" {
r del mylist1 mylist2
- create_$type mylist1 "a $large c d"
+ create_list mylist1 "a $large c d"
assert_equal d [r rpoplpush mylist1 mylist2]
assert_equal c [r rpoplpush mylist1 mylist2]
assert_equal "a $large" [r lrange mylist1 0 -1]
assert_equal "c d" [r lrange mylist2 0 -1]
- assert_encoding ziplist mylist2
+ assert_encoding quicklist mylist2
}
test "RPOPLPUSH with the same list as src and dst - $type" {
- create_$type mylist "a $large c"
+ create_list mylist "a $large c"
assert_equal "a $large c" [r lrange mylist 0 -1]
assert_equal c [r rpoplpush mylist mylist]
assert_equal "c a $large" [r lrange mylist 0 -1]
@@ -681,8 +620,8 @@ start_server {
foreach {othertype otherlarge} [array get largevalue] {
test "RPOPLPUSH with $type source and existing target $othertype" {
- create_$type srclist "a b c $large"
- create_$othertype dstlist "$otherlarge"
+ create_list srclist "a b c $large"
+ create_list dstlist "$otherlarge"
assert_equal $large [r rpoplpush srclist dstlist]
assert_equal c [r rpoplpush srclist dstlist]
assert_equal "a b" [r lrange srclist 0 -1]
@@ -691,7 +630,7 @@ start_server {
# When we rpoplpush'ed a large value, dstlist should be
# converted to the same encoding as srclist.
if {$type eq "linkedlist"} {
- assert_encoding linkedlist dstlist
+ assert_encoding quicklist dstlist
}
}
}
@@ -713,7 +652,7 @@ start_server {
}
test {RPOPLPUSH against non list dst key} {
- create_ziplist srclist {a b c d}
+ create_list srclist {a b c d}
r set dstlist x
assert_error WRONGTYPE* {r rpoplpush srclist dstlist}
assert_type string dstlist
@@ -727,7 +666,7 @@ start_server {
foreach {type large} [array get largevalue] {
test "Basic LPOP/RPOP - $type" {
- create_$type mylist "$large 1 2"
+ create_list mylist "$large 1 2"
assert_equal $large [r lpop mylist]
assert_equal 2 [r rpop mylist]
assert_equal 1 [r lpop mylist]
@@ -745,7 +684,7 @@ start_server {
assert_error WRONGTYPE* {r rpop notalist}
}
- foreach {type num} {ziplist 250 linkedlist 500} {
+ foreach {type num} {quicklist 250 quicklist 500} {
test "Mass RPOP/LPOP - $type" {
r del mylist
set sum1 0
@@ -765,24 +704,24 @@ start_server {
foreach {type large} [array get largevalue] {
test "LRANGE basics - $type" {
- create_$type mylist "$large 1 2 3 4 5 6 7 8 9"
+ create_list mylist "$large 1 2 3 4 5 6 7 8 9"
assert_equal {1 2 3 4 5 6 7 8} [r lrange mylist 1 -2]
assert_equal {7 8 9} [r lrange mylist -3 -1]
assert_equal {4} [r lrange mylist 4 4]
}
test "LRANGE inverted indexes - $type" {
- create_$type mylist "$large 1 2 3 4 5 6 7 8 9"
+ create_list mylist "$large 1 2 3 4 5 6 7 8 9"
assert_equal {} [r lrange mylist 6 2]
}
test "LRANGE out of range indexes including the full list - $type" {
- create_$type mylist "$large 1 2 3"
+ create_list mylist "$large 1 2 3"
assert_equal "$large 1 2 3" [r lrange mylist -1000 1000]
}
test "LRANGE out of range negative end index - $type" {
- create_$type mylist "$large 1 2 3"
+ create_list mylist "$large 1 2 3"
assert_equal $large [r lrange mylist 0 -4]
assert_equal {} [r lrange mylist 0 -5]
}
@@ -796,7 +735,7 @@ start_server {
proc trim_list {type min max} {
upvar 1 large large
r del mylist
- create_$type mylist "1 2 3 4 $large"
+ create_list mylist "1 2 3 4 $large"
r ltrim mylist $min $max
r lrange mylist 0 -1
}
@@ -825,7 +764,7 @@ start_server {
foreach {type large} [array get largevalue] {
test "LSET - $type" {
- create_$type mylist "99 98 $large 96 95"
+ create_list mylist "99 98 $large 96 95"
r lset mylist 1 foo
r lset mylist -1 bar
assert_equal "99 foo $large 96 bar" [r lrange mylist 0 -1]
@@ -847,7 +786,7 @@ start_server {
foreach {type e} [array get largevalue] {
test "LREM remove all the occurrences - $type" {
- create_$type mylist "$e foo bar foobar foobared zap bar test foo"
+ create_list mylist "$e foo bar foobar foobared zap bar test foo"
assert_equal 2 [r lrem mylist 0 bar]
assert_equal "$e foo foobar foobared zap test foo" [r lrange mylist 0 -1]
}
@@ -863,7 +802,7 @@ start_server {
}
test "LREM starting from tail with negative count - $type" {
- create_$type mylist "$e foo bar foobar foobared zap bar test foo foo"
+ create_list mylist "$e foo bar foobar foobared zap bar test foo foo"
assert_equal 1 [r lrem mylist -1 bar]
assert_equal "$e foo bar foobar foobared zap test foo foo" [r lrange mylist 0 -1]
}
@@ -874,7 +813,7 @@ start_server {
}
test "LREM deleting objects that may be int encoded - $type" {
- create_$type myotherlist "$e 1 2 3"
+ create_list myotherlist "$e 1 2 3"
assert_equal 1 [r lrem myotherlist 1 2]
assert_equal 3 [r llen myotherlist]
}
diff --git a/tests/unit/type/set.tcl b/tests/unit/type/set.tcl
index 162de0af7..a9a3d0835 100644
--- a/tests/unit/type/set.tcl
+++ b/tests/unit/type/set.tcl
@@ -293,6 +293,13 @@ start_server {
assert_equal 0 [r scard myset]
}
+ test "SPOP with <count>=1 - $type" {
+ create_set myset $contents
+ assert_encoding $type myset
+ assert_equal $contents [lsort [list [r spop myset 1] [r spop myset 1] [r spop myset 1]]]
+ assert_equal 0 [r scard myset]
+ }
+
test "SRANDMEMBER - $type" {
create_set myset $contents
unset -nocomplain myset
@@ -304,6 +311,68 @@ start_server {
}
}
+ foreach {type contents} {
+ hashtable {a b c d e f g h i j k l m n o p q r s t u v w x y z}
+ intset {1 10 11 12 13 14 15 16 17 18 19 2 20 21 22 23 24 25 26 3 4 5 6 7 8 9}
+ } {
+ test "SPOP with <count>" {
+ create_set myset $contents
+ assert_encoding $type myset
+ assert_equal $contents [lsort [concat [r spop myset 11] [r spop myset 9] [r spop myset 0] [r spop myset 4] [r spop myset 1] [r spop myset 0] [r spop myset 1] [r spop myset 0]]]
+ assert_equal 0 [r scard myset]
+ }
+ }
+
+ # As seen in intsetRandomMembers
+ test "SPOP using integers, testing Knuth's and Floyd's algorithm" {
+ create_set myset {1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20}
+ assert_encoding intset myset
+ assert_equal 20 [r scard myset]
+ r spop myset 1
+ assert_equal 19 [r scard myset]
+ r spop myset 2
+ assert_equal 17 [r scard myset]
+ r spop myset 3
+ assert_equal 14 [r scard myset]
+ r spop myset 10
+ assert_equal 4 [r scard myset]
+ r spop myset 10
+ assert_equal 0 [r scard myset]
+ r spop myset 1
+ assert_equal 0 [r scard myset]
+ } {}
+
+ test "SPOP using integers with Knuth's algorithm" {
+ r spop nonexisting_key 100
+ } {}
+
+ test "SPOP new implementation: code path #1" {
+ set content {1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20}
+ create_set myset $content
+ set res [r spop myset 30]
+ assert {[lsort $content] eq [lsort $res]}
+ }
+
+ test "SPOP new implementation: code path #2" {
+ set content {1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20}
+ create_set myset $content
+ set res [r spop myset 2]
+ assert {[llength $res] == 2}
+ assert {[r scard myset] == 18}
+ set union [concat [r smembers myset] $res]
+ assert {[lsort $union] eq [lsort $content]}
+ }
+
+ test "SPOP new implementation: code path #3" {
+ set content {1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20}
+ create_set myset $content
+ set res [r spop myset 18]
+ assert {[llength $res] == 18}
+ assert {[r scard myset] == 2}
+ set union [concat [r smembers myset] $res]
+ assert {[lsort $union] eq [lsort $content]}
+ }
+
test "SRANDMEMBER with <count> against non existing key" {
r srandmember nonexisting_key 100
} {}
diff --git a/utils/cluster_fail_time.tcl b/utils/cluster_fail_time.tcl
new file mode 100644
index 000000000..87399495f
--- /dev/null
+++ b/utils/cluster_fail_time.tcl
@@ -0,0 +1,50 @@
+# This simple script is used in order to estimate the average PFAIL->FAIL
+# state switch after a failure.
+
+set ::sleep_time 10 ; # How much to sleep to trigger PFAIL.
+set ::fail_port 30016 ; # Node to put in sleep.
+set ::other_port 30001 ; # Node to use to monitor the flag switch.
+
+proc avg vector {
+ set sum 0.0
+ foreach x $vector {
+ set sum [expr {$sum+$x}]
+ }
+ expr {$sum/[llength $vector]}
+}
+
+set samples {}
+while 1 {
+ exec redis-cli -p $::fail_port debug sleep $::sleep_time > /dev/null &
+
+ # Wait for fail? to appear.
+ while 1 {
+ set output [exec redis-cli -p $::other_port cluster nodes]
+ if {[string match {*fail\?*} $output]} break
+ after 100
+ }
+
+ puts "FAIL?"
+ set start [clock milliseconds]
+
+ # Wait for fail? to disappear.
+ while 1 {
+ set output [exec redis-cli -p $::other_port cluster nodes]
+ if {![string match {*fail\?*} $output]} break
+ after 100
+ }
+
+ puts "FAIL"
+ set now [clock milliseconds]
+ set elapsed [expr {$now-$start}]
+ puts $elapsed
+ lappend samples $elapsed
+
+ puts "AVG([llength $samples]): [avg $samples]"
+
+ # Wait for the instance to be available again.
+ exec redis-cli -p $::fail_port ping
+
+ # Wait for the fail flag to be cleared.
+ after 2000
+}
diff --git a/utils/create-cluster/.gitignore b/utils/create-cluster/.gitignore
new file mode 100644
index 000000000..cdd7c19c8
--- /dev/null
+++ b/utils/create-cluster/.gitignore
@@ -0,0 +1 @@
+config.sh
diff --git a/utils/create-cluster/README b/utils/create-cluster/README
new file mode 100644
index 000000000..1f43748ee
--- /dev/null
+++ b/utils/create-cluster/README
@@ -0,0 +1,27 @@
+Create-custer is a small script used to easily start a big number of Redis
+instances configured to run in cluster mode. Its main goal is to allow manual
+testing in a condition which is not easy to replicate with the Redis cluster
+unit tests, for example when a lot of instances are needed in order to trigger
+a give bug.
+
+The tool can also be used just to easily create a number of instances in a
+Redis Cluster in order to experiment a bit with the system.
+
+USAGE
+---
+
+To create a cluster, follow this steps:
+
+1. Edit create-cluster and change the start / end port, depending on the
+number of instances you want to create.
+2. Use "./create-cluster start" in order to run the instances.
+3. Use "./create-cluster create" in order to execute redis-trib create, so that
+an actual Redis cluster will be created.
+4. Now you are ready to play with the cluster. AOF files and logs for each instances are created in the current directory.
+
+In order to stop a cluster:
+
+1. Use "./craete-cluster stop" to stop all the instances. After you stopped the instances you can use "./create-cluster start" to restart them if you change ideas.
+2. Use "./create-cluster clean" to remove all the AOF / log files to restat with a clean environment.
+
+Use the command "./create-cluster help" to get the full list of features.
diff --git a/utils/create-cluster/create-cluster b/utils/create-cluster/create-cluster
new file mode 100755
index 000000000..efb3135d4
--- /dev/null
+++ b/utils/create-cluster/create-cluster
@@ -0,0 +1,95 @@
+#!/bin/bash
+
+# Settings
+PORT=30000
+TIMEOUT=2000
+NODES=6
+REPLICAS=1
+
+# You may want to put the above config parameters into config.sh in order to
+# override the defaults without modifying this script.
+
+if [ -a config.sh ]
+then
+ source "config.sh"
+fi
+
+# Computed vars
+ENDPORT=$((PORT+NODES))
+
+if [ "$1" == "start" ]
+then
+ while [ $((PORT < ENDPORT)) != "0" ]; do
+ PORT=$((PORT+1))
+ echo "Starting $PORT"
+ ../../src/redis-server --port $PORT --cluster-enabled yes --cluster-config-file nodes-${PORT}.conf --cluster-node-timeout $TIMEOUT --appendonly yes --appendfilename appendonly-${PORT}.aof --dbfilename dump-${PORT}.rdb --logfile ${PORT}.log --daemonize yes
+ done
+ exit 0
+fi
+
+if [ "$1" == "create" ]
+then
+ HOSTS=""
+ while [ $((PORT < ENDPORT)) != "0" ]; do
+ PORT=$((PORT+1))
+ HOSTS="$HOSTS 127.0.0.1:$PORT"
+ done
+ ../../src/redis-trib.rb create --replicas $REPLICAS $HOSTS
+ exit 0
+fi
+
+if [ "$1" == "stop" ]
+then
+ while [ $((PORT < ENDPORT)) != "0" ]; do
+ PORT=$((PORT+1))
+ echo "Stopping $PORT"
+ redis-cli -p $PORT shutdown nosave
+ done
+ exit 0
+fi
+
+if [ "$1" == "watch" ]
+then
+ PORT=$((PORT+1))
+ while [ 1 ]; do
+ clear
+ date
+ redis-cli -p $PORT cluster nodes | head -30
+ sleep 1
+ done
+ exit 0
+fi
+
+if [ "$1" == "tail" ]
+then
+ INSTANCE=$2
+ PORT=$((PORT+INSTANCE))
+ tail -f ${PORT}.log
+ exit 0
+fi
+
+if [ "$1" == "call" ]
+then
+ while [ $((PORT < ENDPORT)) != "0" ]; do
+ PORT=$((PORT+1))
+ ../../src/redis-cli -p $PORT $2 $3 $4 $5 $6 $7 $8 $9
+ done
+ exit 0
+fi
+
+if [ "$1" == "clean" ]
+then
+ rm -rf *.log
+ rm -rf appendonly*.aof
+ rm -rf dump*.rdb
+ rm -rf nodes*.conf
+ exit 0
+fi
+
+echo "Usage: $0 [start|create|stop|watch|tail|clean]"
+echo "start -- Launch Redis Cluster instances."
+echo "create -- Create a cluster using redis-trib create."
+echo "stop -- Stop Redis Cluster instances."
+echo "watch -- Show CLUSTER NODES output (first 30 lines) of first node."
+echo "tail <id> -- Run tail -f of instance at base port + ID."
+echo "clean -- Remove all instances data, logs, configs."
diff --git a/utils/hashtable/README b/utils/hashtable/README
new file mode 100644
index 000000000..e2862f012
--- /dev/null
+++ b/utils/hashtable/README
@@ -0,0 +1,13 @@
+Hash table implementation related utilities.
+
+rehashing.c
+---
+
+Visually show buckets in the two hash tables between rehashings. Also stress
+test getRandomKeys() implementation, that may actually disappear from
+Redis soon, however visualizaiton some code is reusable in new bugs
+investigation.
+
+Compile with:
+
+ cc -I ../../src/ rehashing.c ../../src/zmalloc.c ../../src/dict.c -o rehashing_test
diff --git a/utils/hashtable/rehashing.c b/utils/hashtable/rehashing.c
new file mode 100644
index 000000000..df1f52bb1
--- /dev/null
+++ b/utils/hashtable/rehashing.c
@@ -0,0 +1,132 @@
+#include "redis.h"
+#include "dict.h"
+
+void _redisAssert(char *x, char *y, int l) {
+ printf("ASSERT: %s %s %d\n",x,y,l);
+ exit(1);
+}
+
+unsigned int dictKeyHash(const void *keyp) {
+ unsigned long key = (unsigned long)keyp;
+ key = dictGenHashFunction(&key,sizeof(key));
+ key += ~(key << 15);
+ key ^= (key >> 10);
+ key += (key << 3);
+ key ^= (key >> 6);
+ key += ~(key << 11);
+ key ^= (key >> 16);
+ return key;
+}
+
+int dictKeyCompare(void *privdata, const void *key1, const void *key2) {
+ unsigned long k1 = (unsigned long)key1;
+ unsigned long k2 = (unsigned long)key2;
+ return k1 == k2;
+}
+
+dictType dictTypeTest = {
+ dictKeyHash, /* hash function */
+ NULL, /* key dup */
+ NULL, /* val dup */
+ dictKeyCompare, /* key compare */
+ NULL, /* key destructor */
+ NULL /* val destructor */
+};
+
+void showBuckets(dictht ht) {
+ if (ht.table == NULL) {
+ printf("NULL\n");
+ } else {
+ int j;
+ for (j = 0; j < ht.size; j++) {
+ printf("%c", ht.table[j] ? '1' : '0');
+ }
+ printf("\n");
+ }
+}
+
+void show(dict *d) {
+ int j;
+ if (d->rehashidx != -1) {
+ printf("rhidx: ");
+ for (j = 0; j < d->rehashidx; j++)
+ printf(".");
+ printf("|\n");
+ }
+ printf("ht[0]: ");
+ showBuckets(d->ht[0]);
+ printf("ht[1]: ");
+ showBuckets(d->ht[1]);
+ printf("\n");
+}
+
+int sortPointers(const void *a, const void *b) {
+ unsigned long la, lb;
+
+ la = (long) (*((dictEntry**)a));
+ lb = (long) (*((dictEntry**)b));
+ return la-lb;
+}
+
+void stressGetKeys(dict *d, int times) {
+ int j;
+ dictEntry **des = zmalloc(sizeof(dictEntry*)*dictSize(d));
+ for (j = 0; j < times; j++) {
+ int requested = rand() % (dictSize(d)+1);
+ int returned = dictGetRandomKeys(d, des, requested);
+ if (requested != returned) {
+ printf("*** ERROR! Req: %d, Ret: %d\n", requested, returned);
+ exit(1);
+ }
+ qsort(des,returned,sizeof(dictEntry*),sortPointers);
+ if (returned > 1) {
+ int i;
+ for (i = 0; i < returned-1; i++) {
+ if (des[i] == des[i+1]) {
+ printf("*** ERROR! Duplicated element detected\n");
+ exit(1);
+ }
+ }
+ }
+ }
+ zfree(des);
+}
+
+#define MAX1 120
+#define MAX2 1000
+int main(void) {
+ dict *d = dictCreate(&dictTypeTest,NULL);
+ unsigned long i;
+ srand(time(NULL));
+
+ for (i = 0; i < MAX1; i++) {
+ dictAdd(d,(void*)i,NULL);
+ show(d);
+ }
+ printf("Size: %d\n", (int)dictSize(d));
+
+ for (i = 0; i < MAX1; i++) {
+ dictDelete(d,(void*)i);
+ dictResize(d);
+ show(d);
+ }
+ dictRelease(d);
+
+ d = dictCreate(&dictTypeTest,NULL);
+ printf("Getkeys stress test\n");
+
+ for (i = 0; i < MAX2; i++) {
+ dictAdd(d,(void*)i,NULL);
+ stressGetKeys(d,100);
+ }
+
+ for (i = 0; i < MAX2; i++) {
+ dictDelete(d,(void*)i);
+ dictResize(d);
+ stressGetKeys(d,100);
+ }
+ dictRelease(d);
+
+ printf("TEST PASSED!\n");
+ return 0;
+}