summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--.gitignore1
-rw-r--r--CONTRIBUTING4
-rw-r--r--README19
-rw-r--r--deps/Makefile2
-rw-r--r--deps/lua/src/lua_cmsgpack.c95
-rw-r--r--redis.conf60
-rw-r--r--src/Makefile32
-rw-r--r--src/anet.c25
-rw-r--r--src/anet.h3
-rw-r--r--src/aof.c44
-rw-r--r--src/bitops.c30
-rw-r--r--src/cluster.c186
-rw-r--r--src/cluster.h15
-rw-r--r--src/config.c104
-rw-r--r--src/config.h10
-rw-r--r--src/crc64.c8
-rw-r--r--src/crc64.h4
-rw-r--r--src/db.c35
-rw-r--r--src/debug.c66
-rw-r--r--src/dict.c5
-rw-r--r--src/endianconv.c8
-rw-r--r--src/endianconv.h4
-rw-r--r--src/fmacros.h1
-rw-r--r--src/help.h4
-rw-r--r--src/hyperloglog.c6
-rw-r--r--src/intset.c135
-rw-r--r--src/intset.h5
-rw-r--r--src/latency.c4
-rw-r--r--src/lzfP.h56
-rw-r--r--src/lzf_c.c42
-rw-r--r--src/lzf_d.c59
-rw-r--r--src/memtest.c3
-rw-r--r--src/networking.c40
-rw-r--r--src/object.c66
-rw-r--r--src/quicklist.c2639
-rw-r--r--src/quicklist.h169
-rw-r--r--src/rdb.c378
-rw-r--r--src/rdb.h9
-rw-r--r--src/redis-benchmark.c85
-rw-r--r--src/redis-check-rdb.c (renamed from src/redis-check-dump.c)253
-rw-r--r--src/redis-cli.c109
-rwxr-xr-xsrc/redis-trib.rb90
-rw-r--r--src/redis.c336
-rw-r--r--src/redis.h78
-rw-r--r--src/replication.c5
-rw-r--r--src/scripting.c38
-rw-r--r--src/sds.c41
-rw-r--r--src/sds.h4
-rw-r--r--src/sentinel.c87
-rw-r--r--src/sha1.c13
-rw-r--r--src/sha1.h7
-rw-r--r--src/solarisfixes.h4
-rw-r--r--src/sort.c22
-rw-r--r--src/sparkline.c3
-rw-r--r--src/t_hash.c2
-rw-r--r--src/t_list.c362
-rw-r--r--src/t_set.c253
-rw-r--r--src/t_string.c2
-rw-r--r--src/t_zset.c2
-rw-r--r--src/util.c102
-rw-r--r--src/util.h4
-rw-r--r--src/ziplist.c312
-rw-r--r--src/ziplist.h7
-rw-r--r--src/zipmap.c15
-rw-r--r--src/zipmap.h4
-rw-r--r--src/zmalloc.c57
-rw-r--r--src/zmalloc.h1
-rw-r--r--tests/cluster/run.tcl1
-rw-r--r--tests/cluster/tests/04-resharding.tcl13
-rw-r--r--tests/cluster/tests/includes/init-tests.tcl9
-rw-r--r--tests/instances.tcl55
-rw-r--r--tests/integration/aof.tcl24
-rw-r--r--tests/integration/replication.tcl26
-rw-r--r--tests/support/cluster.tcl2
-rw-r--r--tests/support/server.tcl16
-rw-r--r--tests/support/test.tcl7
-rw-r--r--tests/test_helper.tcl73
-rw-r--r--tests/unit/aofrw.tcl4
-rw-r--r--tests/unit/dump.tcl4
-rw-r--r--tests/unit/scan.tcl10
-rw-r--r--tests/unit/scripting.tcl2
-rw-r--r--tests/unit/sort.tcl31
-rw-r--r--tests/unit/type/list-2.tcl9
-rw-r--r--tests/unit/type/list-3.tcl3
-rw-r--r--tests/unit/type/list.tcl169
-rw-r--r--tests/unit/type/set.tcl42
-rw-r--r--utils/cluster_fail_time.tcl50
-rw-r--r--utils/create-cluster/.gitignore1
-rw-r--r--utils/create-cluster/README27
-rwxr-xr-xutils/create-cluster/create-cluster95
-rwxr-xr-xutils/redis_init_script.tpl5
-rwxr-xr-xutils/whatisdoing.sh8
92 files changed, 6015 insertions, 1345 deletions
diff --git a/.gitignore b/.gitignore
index d3b1c2f24..3d346fbcf 100644
--- a/.gitignore
+++ b/.gitignore
@@ -4,6 +4,7 @@
dump.rdb
redis-benchmark
redis-check-aof
+redis-check-rdb
redis-check-dump
redis-cli
redis-sentinel
diff --git a/CONTRIBUTING b/CONTRIBUTING
index f7b6836f7..b416b9561 100644
--- a/CONTRIBUTING
+++ b/CONTRIBUTING
@@ -20,7 +20,7 @@ each source file that you contribute.
# How to provide a patch for a new feature
-1. Drop a message to the Redis Google Group with a proposal of semantics/API.
+1. If it is a major feature or a semantical change, write an RCP (Redis Change Proposal). Check the documentation here: https://github.com/redis/redis-rcp
2. If in step 1 you get an acknowledge from the project leaders, use the
following procedure to submit a patch:
@@ -31,4 +31,6 @@ each source file that you contribute.
d. Initiate a pull request on github ( http://help.github.com/send-pull-requests/ )
e. Done :)
+For minor fixes just open a pull request on Github.
+
Thanks!
diff --git a/README b/README
index b7a12b828..369118631 100644
--- a/README
+++ b/README
@@ -26,6 +26,25 @@ After building Redis is a good idea to test it, using:
% make test
+Fixing build problems with dependencies or cached build options
+—--------
+Redis has some dependencies which are included into the "deps" directory.
+"make" does not rebuild dependencies automatically, even if something in the
+source code of dependencies is changes.
+
+When you update the source code with `git pull` or when code inside the
+dependencies tree is modified in any other way, make sure to use the following
+command in order to really clean everything and rebuild from scratch:
+
+ make distclean
+
+This will clean: jemalloc, lua, hiredis, linenoise.
+
+Also if you force certain build options like 32bit target, no C compiler
+optimizations (for debugging purposes), and other similar build time options,
+those options are cached indefinitely until you issue a "make distclean"
+command.
+
Fixing problems building 32 bit binaries
---------
diff --git a/deps/Makefile b/deps/Makefile
index 1f623ea7b..71f6d3a2c 100644
--- a/deps/Makefile
+++ b/deps/Makefile
@@ -58,7 +58,7 @@ ifeq ($(uname_S),SunOS)
LUA_CFLAGS= -D__C99FEATURES__=1
endif
-LUA_CFLAGS+= -O2 -Wall -DLUA_ANSI -DENABLE_CJSON_GLOBAL $(CFLAGS)
+LUA_CFLAGS+= -O2 -Wall -DLUA_ANSI -DENABLE_CJSON_GLOBAL -DREDIS_STATIC='' $(CFLAGS)
LUA_LDFLAGS+= $(LDFLAGS)
# lua's Makefile defines AR="ar rcu", which is unusual, and makes it more
# challenging to cross-compile lua (and redis). These defines make it easier
diff --git a/deps/lua/src/lua_cmsgpack.c b/deps/lua/src/lua_cmsgpack.c
index 32b9a0bc3..e13f053d2 100644
--- a/deps/lua/src/lua_cmsgpack.c
+++ b/deps/lua/src/lua_cmsgpack.c
@@ -13,26 +13,27 @@
#define LUACMSGPACK_COPYRIGHT "Copyright (C) 2012, Salvatore Sanfilippo"
#define LUACMSGPACK_DESCRIPTION "MessagePack C implementation for Lua"
-#define LUACMSGPACK_MAX_NESTING 16 /* Max tables nesting. */
-
/* Allows a preprocessor directive to override MAX_NESTING */
#ifndef LUACMSGPACK_MAX_NESTING
- #define LUACMSGPACK_MAX_NESTING 16
-#endif
-
-#if (_XOPEN_SOURCE >= 600 || _ISOC99_SOURCE || _POSIX_C_SOURCE >= 200112L)
- #define IS_FINITE(x) isfinite(x)
-#else
- #define IS_FINITE(x) ((x) == (x) && (x) + 1 > (x))
+ #define LUACMSGPACK_MAX_NESTING 16 /* Max tables nesting. */
#endif
/* Check if float or double can be an integer without loss of precision */
-#define IS_INT_TYPE_EQUIVALENT(x, T) (IS_FINITE(x) && (T)(x) == (x))
+#define IS_INT_TYPE_EQUIVALENT(x, T) (!isinf(x) && (T)(x) == (x))
#define IS_INT64_EQUIVALENT(x) IS_INT_TYPE_EQUIVALENT(x, int64_t)
#define IS_INT_EQUIVALENT(x) IS_INT_TYPE_EQUIVALENT(x, int)
-#if LUA_VERSION_NUM < 503
+/* If size of pointer is equal to a 4 byte integer, we're on 32 bits. */
+#if UINTPTR_MAX == UINT_MAX
+ #define BITS_32 1
+#else
+ #define BITS_32 0
+#endif
+
+#if BITS_32
+ #define lua_pushunsigned(L, n) lua_pushnumber(L, n)
+#else
#define lua_pushunsigned(L, n) lua_pushinteger(L, n)
#endif
@@ -72,7 +73,7 @@ static void memrevifle(void *ptr, size_t len) {
int test = 1;
unsigned char *testp = (unsigned char*) &test;
- if (testp[0] == 0) return; /* Big endian, nothign to do. */
+ if (testp[0] == 0) return; /* Big endian, nothing to do. */
len /= 2;
while(len--) {
aux = *p;
@@ -84,7 +85,7 @@ static void memrevifle(void *ptr, size_t len) {
}
/* ---------------------------- String buffer ----------------------------------
- * This is a simple implementation of string buffers. The only opereation
+ * This is a simple implementation of string buffers. The only operation
* supported is creating empty buffers and appending bytes to it.
* The string buffer uses 2x preallocation on every realloc for O(N) append
* behavior. */
@@ -108,7 +109,7 @@ static mp_buf *mp_buf_new(lua_State *L) {
mp_buf *buf = NULL;
/* Old size = 0; new size = sizeof(*buf) */
- buf = (mp_buf*)mp_realloc(L, buf, 0, sizeof(*buf));
+ buf = (mp_buf*)mp_realloc(L, NULL, 0, sizeof(*buf));
buf->L = L;
buf->b = NULL;
@@ -143,7 +144,7 @@ void mp_buf_free(mp_buf *buf) {
* be used to report errors. */
#define MP_CUR_ERROR_NONE 0
-#define MP_CUR_ERROR_EOF 1 /* Not enough data to complete opereation. */
+#define MP_CUR_ERROR_EOF 1 /* Not enough data to complete operation. */
#define MP_CUR_ERROR_BADFMT 2 /* Bad data format */
typedef struct mp_cur {
@@ -160,7 +161,7 @@ static void mp_cur_init(mp_cur *cursor, const unsigned char *s, size_t len) {
#define mp_cur_consume(_c,_len) do { _c->p += _len; _c->left -= _len; } while(0)
-/* When there is not enough room we set an error in the cursor and return, this
+/* When there is not enough room we set an error in the cursor and return. This
* is very common across the code so we have a macro to make the code look
* a bit simpler. */
#define mp_cur_need(_c,_len) do { \
@@ -253,7 +254,7 @@ static void mp_encode_int(mp_buf *buf, int64_t n) {
}
} else {
if (n >= -32) {
- b[0] = ((char)n); /* negative fixnum */
+ b[0] = ((signed char)n); /* negative fixnum */
enclen = 1;
} else if (n >= -128) {
b[0] = 0xd0; /* int 8 */
@@ -350,7 +351,11 @@ static void mp_encode_lua_bool(lua_State *L, mp_buf *buf) {
/* Lua 5.3 has a built in 64-bit integer type */
static void mp_encode_lua_integer(lua_State *L, mp_buf *buf) {
+#if (LUA_VERSION_NUM < 503) && BITS_32
+ lua_Number i = lua_tonumber(L,-1);
+#else
lua_Integer i = lua_tointeger(L,-1);
+#endif
mp_encode_int(buf, (int64_t)i);
}
@@ -392,7 +397,7 @@ static void mp_encode_lua_table_as_map(lua_State *L, mp_buf *buf, int level) {
/* First step: count keys into table. No other way to do it with the
* Lua API, we need to iterate a first time. Note that an alternative
* would be to do a single run, and then hack the buffer to insert the
- * map opcodes for message pack. Too hachish for this lib. */
+ * map opcodes for message pack. Too hackish for this lib. */
lua_pushnil(L);
while(lua_next(L,-2)) {
lua_pop(L,1); /* remove value, keep key for next iteration. */
@@ -432,11 +437,12 @@ static int table_is_an_array(lua_State *L) {
lua_pop(L,1); /* Stack: ... key */
/* The <= 0 check is valid here because we're comparing indexes. */
#if LUA_VERSION_NUM < 503
- if (!lua_isnumber(L,-1) || (n = lua_tonumber(L, -1)) <= 0 ||
- !IS_INT_EQUIVALENT(n)) {
+ if ((LUA_TNUMBER != lua_type(L,-1)) || (n = lua_tonumber(L, -1)) <= 0 ||
+ !IS_INT_EQUIVALENT(n))
#else
- if (!lua_isinteger(L,-1) || (n = lua_tointeger(L, -1)) <= 0) {
+ if (!lua_isinteger(L,-1) || (n = lua_tointeger(L, -1)) <= 0)
#endif
+ {
lua_settop(L, stacktop);
return 0;
}
@@ -473,7 +479,7 @@ static void mp_encode_lua_null(lua_State *L, mp_buf *buf) {
static void mp_encode_lua_type(lua_State *L, mp_buf *buf, int level) {
int t = lua_type(L,-1);
- /* Limit the encoding of nested tables to a specfiied maximum depth, so that
+ /* Limit the encoding of nested tables to a specified maximum depth, so that
* we survive when called against circular references in tables. */
if (t == LUA_TTABLE && level == LUACMSGPACK_MAX_NESTING) t = LUA_TNIL;
switch(t) {
@@ -536,6 +542,7 @@ static int mp_pack(lua_State *L) {
void mp_decode_to_lua_type(lua_State *L, mp_cur *c);
void mp_decode_to_lua_array(lua_State *L, mp_cur *c, size_t len) {
+ assert(len <= UINT_MAX);
int index = 1;
lua_newtable(L);
@@ -548,6 +555,7 @@ void mp_decode_to_lua_array(lua_State *L, mp_cur *c, size_t len) {
}
void mp_decode_to_lua_hash(lua_State *L, mp_cur *c, size_t len) {
+ assert(len <= UINT_MAX);
lua_newtable(L);
while(len--) {
mp_decode_to_lua_type(L,c); /* key */
@@ -580,7 +588,7 @@ void mp_decode_to_lua_type(lua_State *L, mp_cur *c) {
break;
case 0xd0: /* int 8 */
mp_cur_need(c,2);
- lua_pushinteger(L,(char)c->p[1]);
+ lua_pushinteger(L,(signed char)c->p[1]);
mp_cur_consume(c,2);
break;
case 0xcd: /* uint 16 */
@@ -630,7 +638,11 @@ void mp_decode_to_lua_type(lua_State *L, mp_cur *c) {
break;
case 0xd3: /* int 64 */
mp_cur_need(c,9);
+#if LUA_VERSION_NUM < 503
+ lua_pushnumber(L,
+#else
lua_pushinteger(L,
+#endif
((int64_t)c->p[1] << 56) |
((int64_t)c->p[2] << 48) |
((int64_t)c->p[3] << 40) |
@@ -687,13 +699,14 @@ void mp_decode_to_lua_type(lua_State *L, mp_cur *c) {
case 0xdb: /* raw 32 */
mp_cur_need(c,5);
{
- size_t l = (c->p[1] << 24) |
- (c->p[2] << 16) |
- (c->p[3] << 8) |
- c->p[4];
- mp_cur_need(c,5+l);
- lua_pushlstring(L,(char*)c->p+5,l);
- mp_cur_consume(c,5+l);
+ size_t l = ((size_t)c->p[1] << 24) |
+ ((size_t)c->p[2] << 16) |
+ ((size_t)c->p[3] << 8) |
+ (size_t)c->p[4];
+ mp_cur_consume(c,5);
+ mp_cur_need(c,l);
+ lua_pushlstring(L,(char*)c->p,l);
+ mp_cur_consume(c,l);
}
break;
case 0xdc: /* array 16 */
@@ -707,10 +720,10 @@ void mp_decode_to_lua_type(lua_State *L, mp_cur *c) {
case 0xdd: /* array 32 */
mp_cur_need(c,5);
{
- size_t l = (c->p[1] << 24) |
- (c->p[2] << 16) |
- (c->p[3] << 8) |
- c->p[4];
+ size_t l = ((size_t)c->p[1] << 24) |
+ ((size_t)c->p[2] << 16) |
+ ((size_t)c->p[3] << 8) |
+ (size_t)c->p[4];
mp_cur_consume(c,5);
mp_decode_to_lua_array(L,c,l);
}
@@ -726,10 +739,10 @@ void mp_decode_to_lua_type(lua_State *L, mp_cur *c) {
case 0xdf: /* map 32 */
mp_cur_need(c,5);
{
- size_t l = (c->p[1] << 24) |
- (c->p[2] << 16) |
- (c->p[3] << 8) |
- c->p[4];
+ size_t l = ((size_t)c->p[1] << 24) |
+ ((size_t)c->p[2] << 16) |
+ ((size_t)c->p[3] << 8) |
+ (size_t)c->p[4];
mp_cur_consume(c,5);
mp_decode_to_lua_hash(L,c,l);
}
@@ -818,15 +831,15 @@ static int mp_unpack(lua_State *L) {
}
static int mp_unpack_one(lua_State *L) {
- int offset = luaL_optint(L, 2, 0);
+ int offset = luaL_optinteger(L, 2, 0);
/* Variable pop because offset may not exist */
lua_pop(L, lua_gettop(L)-1);
return mp_unpack_full(L, 1, offset);
}
static int mp_unpack_limit(lua_State *L) {
- int limit = luaL_checkint(L, 2);
- int offset = luaL_optint(L, 3, 0);
+ int limit = luaL_checkinteger(L, 2);
+ int offset = luaL_optinteger(L, 3, 0);
/* Variable pop because offset may not exist */
lua_pop(L, lua_gettop(L)-1);
diff --git a/redis.conf b/redis.conf
index 81dcf5a86..38e258698 100644
--- a/redis.conf
+++ b/redis.conf
@@ -36,6 +36,17 @@
# Note that Redis will write a pid file in /var/run/redis.pid when daemonized.
daemonize no
+# If you run Redis from upstart or systemd, Redis can interact with your
+# supervision tree. Options:
+# supervised no - no supervision interaction
+# supervised upstart - signal upstart by putting Redis into SIGSTOP mode
+# supervised systemd - signal systemd by writing READY=1 to $NOTIFY_SOCKET
+# supervised auto - detect upstart or systemd method based on
+# UPSTART_JOB or NOTIFY_SOCKET environment variables
+# Note: these supervision methods only signal "process is ready."
+# They do not enable continuous liveness pings back to your supervisor.
+supervised no
+
# When running daemonized, Redis writes a pid file in /var/run/redis.pid by
# default. You can specify a custom pid file location here.
pidfile /var/run/redis.pid
@@ -242,6 +253,10 @@ slave-read-only yes
# Replication SYNC strategy: disk or socket.
#
+# -------------------------------------------------------
+# WARNING: DISKLESS REPLICATION IS EXPERIMENTAL CURRENTLY
+# -------------------------------------------------------
+#
# New slaves and reconnecting slaves that are not able to continue the replication
# process just receiving differences, need to do what is called a "full
# synchronization". An RDB file is transmitted from the master to the slaves.
@@ -268,7 +283,7 @@ slave-read-only yes
repl-diskless-sync no
# When diskless replication is enabled, it is possible to configure the delay
-# the server waits in order to spawn the child that trnasfers the RDB via socket
+# the server waits in order to spawn the child that transfers the RDB via socket
# to the slaves.
#
# This is important since once the transfer starts, it is not possible to serve
@@ -615,6 +630,12 @@ lua-time-limit 5000
################################ REDIS CLUSTER ###############################
#
+# ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+# WARNING EXPERIMENTAL: Redis Cluster is considered to be stable code, however
+# in order to mark it as "mature" we need to wait for a non trivial percentage
+# of users to deploy it in production.
+# ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+#
# Normal Redis instances can't be part of a Redis Cluster; only nodes that are
# started as cluster nodes can. In order to start a Redis instance as a
# cluster node enable the cluster support uncommenting the following:
@@ -756,7 +777,7 @@ slowlog-max-len 128
# By default latency monitoring is disabled since it is mostly not needed
# if you don't have latency issues, and collecting data has a performance
# impact, that while very small, can be measured under big load. Latency
-# monitoring can easily be enalbed at runtime using the command
+# monitoring can easily be enabled at runtime using the command
# "CONFIG SET latency-monitor-threshold <milliseconds>" if needed.
latency-monitor-threshold 0
@@ -814,11 +835,36 @@ notify-keyspace-events ""
hash-max-ziplist-entries 512
hash-max-ziplist-value 64
-# Similarly to hashes, small lists are also encoded in a special way in order
-# to save a lot of space. The special representation is only used when
-# you are under the following limits:
-list-max-ziplist-entries 512
-list-max-ziplist-value 64
+# Lists are also encoded in a special way to save a lot of space.
+# The number of entries allowed per internal list node can be specified
+# as a fixed maximum size or a maximum number of elements.
+# For a fixed maximum size, use -5 through -1, meaning:
+# -5: max size: 64 Kb <-- not recommended for normal workloads
+# -4: max size: 32 Kb <-- not recommended
+# -3: max size: 16 Kb <-- probably not recommended
+# -2: max size: 8 Kb <-- good
+# -1: max size: 4 Kb <-- good
+# Positive numbers mean store up to _exactly_ that number of elements
+# per list node.
+# The highest performing option is usually -2 (8 Kb size) or -1 (4 Kb size),
+# but if your use case is unique, adjust the settings as necessary.
+list-max-ziplist-size -2
+
+# Lists may also be compressed.
+# Compress depth is the number of quicklist ziplist nodes from *each* side of
+# the list to *exclude* from compression. The head and tail of the list
+# are always uncompressed for fast push/pop operations. Settings are:
+# 0: disable all list compression
+# 1: depth 1 means "don't start compressing until after 1 node into the list,
+# going from either the head or tail"
+# So: [head]->node->node->...->node->[tail]
+# [head], [tail] will always be uncompressed; inner nodes will compress.
+# 2: [head]->[next]->node->node->...->node->[prev]->[tail]
+# 2 here means: don't compress head or head->next or tail->prev or tail,
+# but compress all nodes between them.
+# 3: [head]->[next]->[next]->node->node->...->node->[prev]->[prev]->[tail]
+# etc.
+list-compress-depth 0
# Sets have a special encoding in just one case: when a set is composed
# of just strings that happen to be integers in radix 10 in the range
diff --git a/src/Makefile b/src/Makefile
index 96af74afa..271ab34d8 100644
--- a/src/Makefile
+++ b/src/Makefile
@@ -18,7 +18,7 @@ OPTIMIZATION?=-O2
DEPENDENCY_TARGETS=hiredis linenoise lua
# Default settings
-STD=-std=c99 -pedantic
+STD=-std=c99 -pedantic -DREDIS_STATIC=''
WARN=-Wall -W
OPT=$(OPTIMIZATION)
@@ -46,6 +46,10 @@ ifeq ($(USE_JEMALLOC),yes)
MALLOC=jemalloc
endif
+ifeq ($(USE_JEMALLOC),no)
+ MALLOC=libc
+endif
+
# Override default settings if possible
-include .make-settings
@@ -58,7 +62,7 @@ ifeq ($(uname_S),SunOS)
# SunOS
INSTALL=cp -pf
FINAL_CFLAGS+= -D__EXTENSIONS__ -D_XPG6
- FINAL_LIBS+= -ldl -lnsl -lsocket -lresolv -lpthread
+ FINAL_LIBS+= -ldl -lnsl -lsocket -lresolv -lpthread -lrt
else
ifeq ($(uname_S),Darwin)
# Darwin (nothing to do)
@@ -113,17 +117,16 @@ endif
REDIS_SERVER_NAME=redis-server
REDIS_SENTINEL_NAME=redis-sentinel
-REDIS_SERVER_OBJ=adlist.o ae.o anet.o dict.o redis.o sds.o zmalloc.o lzf_c.o lzf_d.o pqsort.o zipmap.o sha1.o ziplist.o release.o networking.o util.o object.o db.o replication.o rdb.o t_string.o t_list.o t_set.o t_zset.o t_hash.o config.o aof.o pubsub.o multi.o debug.o sort.o intset.o syncio.o cluster.o crc16.o endianconv.o slowlog.o scripting.o bio.o rio.o rand.o memtest.o crc64.o bitops.o sentinel.o notify.o setproctitle.o blocked.o hyperloglog.o latency.o sparkline.o
+REDIS_SERVER_OBJ=adlist.o quicklist.o ae.o anet.o dict.o redis.o sds.o zmalloc.o lzf_c.o lzf_d.o pqsort.o zipmap.o sha1.o ziplist.o release.o networking.o util.o object.o db.o replication.o rdb.o t_string.o t_list.o t_set.o t_zset.o t_hash.o config.o aof.o pubsub.o multi.o debug.o sort.o intset.o syncio.o cluster.o crc16.o endianconv.o slowlog.o scripting.o bio.o rio.o rand.o memtest.o crc64.o bitops.o sentinel.o notify.o setproctitle.o blocked.o hyperloglog.o latency.o sparkline.o redis-check-rdb.o
REDIS_CLI_NAME=redis-cli
REDIS_CLI_OBJ=anet.o sds.o adlist.o redis-cli.o zmalloc.o release.o anet.o ae.o crc64.o
REDIS_BENCHMARK_NAME=redis-benchmark
REDIS_BENCHMARK_OBJ=ae.o anet.o redis-benchmark.o sds.o adlist.o zmalloc.o redis-benchmark.o
-REDIS_CHECK_DUMP_NAME=redis-check-dump
-REDIS_CHECK_DUMP_OBJ=redis-check-dump.o lzf_c.o lzf_d.o crc64.o
+REDIS_CHECK_RDB_NAME=redis-check-rdb
REDIS_CHECK_AOF_NAME=redis-check-aof
REDIS_CHECK_AOF_OBJ=redis-check-aof.o
-all: $(REDIS_SERVER_NAME) $(REDIS_SENTINEL_NAME) $(REDIS_CLI_NAME) $(REDIS_BENCHMARK_NAME) $(REDIS_CHECK_DUMP_NAME) $(REDIS_CHECK_AOF_NAME)
+all: $(REDIS_SERVER_NAME) $(REDIS_SENTINEL_NAME) $(REDIS_CLI_NAME) $(REDIS_BENCHMARK_NAME) $(REDIS_CHECK_RDB_NAME) $(REDIS_CHECK_AOF_NAME)
@echo ""
@echo "Hint: It's a good idea to run 'make test' ;)"
@echo ""
@@ -174,6 +177,10 @@ $(REDIS_SERVER_NAME): $(REDIS_SERVER_OBJ)
$(REDIS_SENTINEL_NAME): $(REDIS_SERVER_NAME)
$(REDIS_INSTALL) $(REDIS_SERVER_NAME) $(REDIS_SENTINEL_NAME)
+# redis-check-rdb
+$(REDIS_CHECK_RDB_NAME): $(REDIS_SERVER_NAME)
+ $(REDIS_INSTALL) $(REDIS_SERVER_NAME) $(REDIS_CHECK_RDB_NAME)
+
# redis-cli
$(REDIS_CLI_NAME): $(REDIS_CLI_OBJ)
$(REDIS_LD) -o $@ $^ ../deps/hiredis/libhiredis.a ../deps/linenoise/linenoise.o $(FINAL_LIBS)
@@ -182,10 +189,6 @@ $(REDIS_CLI_NAME): $(REDIS_CLI_OBJ)
$(REDIS_BENCHMARK_NAME): $(REDIS_BENCHMARK_OBJ)
$(REDIS_LD) -o $@ $^ ../deps/hiredis/libhiredis.a $(FINAL_LIBS)
-# redis-check-dump
-$(REDIS_CHECK_DUMP_NAME): $(REDIS_CHECK_DUMP_OBJ)
- $(REDIS_LD) -o $@ $^ $(FINAL_LIBS)
-
# redis-check-aof
$(REDIS_CHECK_AOF_NAME): $(REDIS_CHECK_AOF_OBJ)
$(REDIS_LD) -o $@ $^ $(FINAL_LIBS)
@@ -197,7 +200,7 @@ $(REDIS_CHECK_AOF_NAME): $(REDIS_CHECK_AOF_OBJ)
$(REDIS_CC) -c $<
clean:
- rm -rf $(REDIS_SERVER_NAME) $(REDIS_SENTINEL_NAME) $(REDIS_CLI_NAME) $(REDIS_BENCHMARK_NAME) $(REDIS_CHECK_DUMP_NAME) $(REDIS_CHECK_AOF_NAME) *.o *.gcda *.gcno *.gcov redis.info lcov-html
+ rm -rf $(REDIS_SERVER_NAME) $(REDIS_SENTINEL_NAME) $(REDIS_CLI_NAME) $(REDIS_BENCHMARK_NAME) $(REDIS_CHECK_RDB_NAME) $(REDIS_CHECK_AOF_NAME) *.o *.gcda *.gcno *.gcov redis.info lcov-html
.PHONY: clean
@@ -221,6 +224,10 @@ lcov:
@geninfo -o redis.info .
@genhtml --legend -o lcov-html redis.info
+test-sds: sds.c sds.h
+ $(REDIS_CC) sds.c zmalloc.c -DSDS_TEST_MAIN -o /tmp/sds_test
+ /tmp/sds_test
+
.PHONY: lcov
bench: $(REDIS_BENCHMARK_NAME)
@@ -249,5 +256,6 @@ install: all
$(REDIS_INSTALL) $(REDIS_SERVER_NAME) $(INSTALL_BIN)
$(REDIS_INSTALL) $(REDIS_BENCHMARK_NAME) $(INSTALL_BIN)
$(REDIS_INSTALL) $(REDIS_CLI_NAME) $(INSTALL_BIN)
- $(REDIS_INSTALL) $(REDIS_CHECK_DUMP_NAME) $(INSTALL_BIN)
+ $(REDIS_INSTALL) $(REDIS_CHECK_RDB_NAME) $(INSTALL_BIN)
$(REDIS_INSTALL) $(REDIS_CHECK_AOF_NAME) $(INSTALL_BIN)
+ @ln -sf $(REDIS_SERVER_NAME) $(INSTALL_BIN)/$(REDIS_SENTINEL_NAME)
diff --git a/src/anet.c b/src/anet.c
index 1e5d85495..76e9b67ae 100644
--- a/src/anet.c
+++ b/src/anet.c
@@ -589,6 +589,23 @@ error:
return -1;
}
+/* Format an IP,port pair into something easy to parse. If IP is IPv6
+ * (matches for ":"), the ip is surrounded by []. IP and port are just
+ * separated by colons. This the standard to display addresses within Redis. */
+int anetFormatAddr(char *buf, size_t buf_len, char *ip, int port) {
+ return snprintf(buf,buf_len, strchr(ip,':') ?
+ "[%s]:%d" : "%s:%d", ip, port);
+}
+
+/* Like anetFormatAddr() but extract ip and port from the socket's peer. */
+int anetFormatPeer(int fd, char *buf, size_t buf_len) {
+ char ip[INET6_ADDRSTRLEN];
+ int port;
+
+ anetPeerToString(fd,ip,sizeof(ip),&port);
+ return anetFormatAddr(buf, buf_len, ip, port);
+}
+
int anetSockName(int fd, char *ip, size_t ip_len, int *port) {
struct sockaddr_storage sa;
socklen_t salen = sizeof(sa);
@@ -610,3 +627,11 @@ int anetSockName(int fd, char *ip, size_t ip_len, int *port) {
}
return 0;
}
+
+int anetFormatSock(int fd, char *fmt, size_t fmt_len) {
+ char ip[INET6_ADDRSTRLEN];
+ int port;
+
+ anetSockName(fd,ip,sizeof(ip),&port);
+ return anetFormatAddr(fmt, fmt_len, ip, port);
+}
diff --git a/src/anet.h b/src/anet.h
index b94a0cd17..ea9c77f2e 100644
--- a/src/anet.h
+++ b/src/anet.h
@@ -70,5 +70,8 @@ int anetSendTimeout(char *err, int fd, long long ms);
int anetPeerToString(int fd, char *ip, size_t ip_len, int *port);
int anetKeepAlive(char *err, int fd, int interval);
int anetSockName(int fd, char *ip, size_t ip_len, int *port);
+int anetFormatAddr(char *fmt, size_t fmt_len, char *ip, int port);
+int anetFormatPeer(int fd, char *fmt, size_t fmt_len);
+int anetFormatSock(int fd, char *fmt, size_t fmt_len);
#endif
diff --git a/src/aof.c b/src/aof.c
index 0af519bfa..dc7d11873 100644
--- a/src/aof.c
+++ b/src/aof.c
@@ -770,52 +770,29 @@ int rioWriteBulkObject(rio *r, robj *obj) {
int rewriteListObject(rio *r, robj *key, robj *o) {
long long count = 0, items = listTypeLength(o);
- if (o->encoding == REDIS_ENCODING_ZIPLIST) {
- unsigned char *zl = o->ptr;
- unsigned char *p = ziplistIndex(zl,0);
- unsigned char *vstr;
- unsigned int vlen;
- long long vlong;
+ if (o->encoding == REDIS_ENCODING_QUICKLIST) {
+ quicklist *list = o->ptr;
+ quicklistIter *li = quicklistGetIterator(list, AL_START_HEAD);
+ quicklistEntry entry;
- while(ziplistGet(p,&vstr,&vlen,&vlong)) {
+ while (quicklistNext(li,&entry)) {
if (count == 0) {
int cmd_items = (items > REDIS_AOF_REWRITE_ITEMS_PER_CMD) ?
REDIS_AOF_REWRITE_ITEMS_PER_CMD : items;
-
if (rioWriteBulkCount(r,'*',2+cmd_items) == 0) return 0;
if (rioWriteBulkString(r,"RPUSH",5) == 0) return 0;
if (rioWriteBulkObject(r,key) == 0) return 0;
}
- if (vstr) {
- if (rioWriteBulkString(r,(char*)vstr,vlen) == 0) return 0;
- } else {
- if (rioWriteBulkLongLong(r,vlong) == 0) return 0;
- }
- p = ziplistNext(zl,p);
- if (++count == REDIS_AOF_REWRITE_ITEMS_PER_CMD) count = 0;
- items--;
- }
- } else if (o->encoding == REDIS_ENCODING_LINKEDLIST) {
- list *list = o->ptr;
- listNode *ln;
- listIter li;
- listRewind(list,&li);
- while((ln = listNext(&li))) {
- robj *eleobj = listNodeValue(ln);
-
- if (count == 0) {
- int cmd_items = (items > REDIS_AOF_REWRITE_ITEMS_PER_CMD) ?
- REDIS_AOF_REWRITE_ITEMS_PER_CMD : items;
-
- if (rioWriteBulkCount(r,'*',2+cmd_items) == 0) return 0;
- if (rioWriteBulkString(r,"RPUSH",5) == 0) return 0;
- if (rioWriteBulkObject(r,key) == 0) return 0;
+ if (entry.value) {
+ if (rioWriteBulkString(r,(char*)entry.value,entry.sz) == 0) return 0;
+ } else {
+ if (rioWriteBulkLongLong(r,entry.longval) == 0) return 0;
}
- if (rioWriteBulkObject(r,eleobj) == 0) return 0;
if (++count == REDIS_AOF_REWRITE_ITEMS_PER_CMD) count = 0;
items--;
}
+ quicklistReleaseIterator(li);
} else {
redisPanic("Unknown list encoding");
}
@@ -1105,6 +1082,7 @@ int rewriteAppendOnlyFile(char *filename) {
}
}
dictReleaseIterator(di);
+ di = NULL;
}
/* Do an initial slow fsync here while the parent is still sending
diff --git a/src/bitops.c b/src/bitops.c
index 94c7f3537..4c8662244 100644
--- a/src/bitops.c
+++ b/src/bitops.c
@@ -70,16 +70,19 @@ size_t redisPopcount(void *s, long count) {
count--;
}
- /* Count bits 16 bytes at a time */
+ /* Count bits 28 bytes at a time */
p4 = (uint32_t*)p;
- while(count>=16) {
- uint32_t aux1, aux2, aux3, aux4;
+ while(count>=28) {
+ uint32_t aux1, aux2, aux3, aux4, aux5, aux6, aux7;
aux1 = *p4++;
aux2 = *p4++;
aux3 = *p4++;
aux4 = *p4++;
- count -= 16;
+ aux5 = *p4++;
+ aux6 = *p4++;
+ aux7 = *p4++;
+ count -= 28;
aux1 = aux1 - ((aux1 >> 1) & 0x55555555);
aux1 = (aux1 & 0x33333333) + ((aux1 >> 2) & 0x33333333);
@@ -89,10 +92,19 @@ size_t redisPopcount(void *s, long count) {
aux3 = (aux3 & 0x33333333) + ((aux3 >> 2) & 0x33333333);
aux4 = aux4 - ((aux4 >> 1) & 0x55555555);
aux4 = (aux4 & 0x33333333) + ((aux4 >> 2) & 0x33333333);
- bits += ((((aux1 + (aux1 >> 4)) & 0x0F0F0F0F) * 0x01010101) >> 24) +
- ((((aux2 + (aux2 >> 4)) & 0x0F0F0F0F) * 0x01010101) >> 24) +
- ((((aux3 + (aux3 >> 4)) & 0x0F0F0F0F) * 0x01010101) >> 24) +
- ((((aux4 + (aux4 >> 4)) & 0x0F0F0F0F) * 0x01010101) >> 24);
+ aux5 = aux5 - ((aux5 >> 1) & 0x55555555);
+ aux5 = (aux5 & 0x33333333) + ((aux5 >> 2) & 0x33333333);
+ aux6 = aux6 - ((aux6 >> 1) & 0x55555555);
+ aux6 = (aux6 & 0x33333333) + ((aux6 >> 2) & 0x33333333);
+ aux7 = aux7 - ((aux7 >> 1) & 0x55555555);
+ aux7 = (aux7 & 0x33333333) + ((aux7 >> 2) & 0x33333333);
+ bits += ((((aux1 + (aux1 >> 4)) & 0x0F0F0F0F) +
+ ((aux2 + (aux2 >> 4)) & 0x0F0F0F0F) +
+ ((aux3 + (aux3 >> 4)) & 0x0F0F0F0F) +
+ ((aux4 + (aux4 >> 4)) & 0x0F0F0F0F) +
+ ((aux5 + (aux5 >> 4)) & 0x0F0F0F0F) +
+ ((aux6 + (aux6 >> 4)) & 0x0F0F0F0F) +
+ ((aux7 + (aux7 >> 4)) & 0x0F0F0F0F))* 0x01010101) >> 24;
}
/* Count the remaining bytes. */
p = (unsigned char*)p4;
@@ -348,7 +360,7 @@ void bitopCommand(redisClient *c) {
* can take a fast path that performs much better than the
* vanilla algorithm. */
j = 0;
- if (minlen && numkeys <= 16) {
+ if (minlen >= sizeof(unsigned long)*4 && numkeys <= 16) {
unsigned long *lp[16];
unsigned long *lres = (unsigned long*) res;
diff --git a/src/cluster.c b/src/cluster.c
index 608f8d2c5..2da0ed5f6 100644
--- a/src/cluster.c
+++ b/src/cluster.c
@@ -40,6 +40,7 @@
#include <sys/socket.h>
#include <sys/stat.h>
#include <sys/file.h>
+#include <math.h>
/* A global reference to myself is handy to make code more clear.
* Myself always points to server.cluster->myself, that is, the clusterNode
@@ -479,6 +480,7 @@ void clusterInit(void) {
* the IP address via MEET messages. */
myself->port = server.port;
+ server.cluster->mf_end = 0;
resetManualFailover();
}
@@ -593,7 +595,7 @@ void clusterAcceptHandler(aeEventLoop *el, int fd, void *privdata, int mask) {
if (cfd == ANET_ERR) {
if (errno != EWOULDBLOCK)
redisLog(REDIS_VERBOSE,
- "Accepting cluster node: %s", server.neterr);
+ "Error accepting cluster node: %s", server.neterr);
return;
}
anetNonBlock(NULL,cfd);
@@ -782,8 +784,11 @@ int clusterNodeRemoveSlave(clusterNode *master, clusterNode *slave) {
for (j = 0; j < master->numslaves; j++) {
if (master->slaves[j] == slave) {
- memmove(master->slaves+j,master->slaves+(j+1),
- (master->numslaves-1)-j);
+ if ((j+1) < master->numslaves) {
+ int remaining_slaves = (master->numslaves - j) - 1;
+ memmove(master->slaves+j,master->slaves+(j+1),
+ (sizeof(*master->slaves) * remaining_slaves));
+ }
master->numslaves--;
return REDIS_OK;
}
@@ -818,15 +823,30 @@ int clusterCountNonFailingSlaves(clusterNode *n) {
return okslaves;
}
+/* Low level cleanup of the node structure. Only called by clusterDelNode(). */
void freeClusterNode(clusterNode *n) {
sds nodename;
+ int j;
+
+ /* If the node is a master with associated slaves, we have to set
+ * all the slaves->slaveof fields to NULL (unknown). */
+ if (nodeIsMaster(n)) {
+ for (j = 0; j < n->numslaves; j++)
+ n->slaves[j]->slaveof = NULL;
+ }
+
+ /* Remove this node from the list of slaves of its master. */
+ if (nodeIsSlave(n) && n->slaveof) clusterNodeRemoveSlave(n->slaveof,n);
+ /* Unlink from the set of nodes. */
nodename = sdsnewlen(n->name, REDIS_CLUSTER_NAMELEN);
redisAssert(dictDelete(server.cluster->nodes,nodename) == DICT_OK);
sdsfree(nodename);
- if (n->slaveof) clusterNodeRemoveSlave(n->slaveof, n);
+
+ /* Release link and associated data structures. */
if (n->link) freeClusterLink(n->link);
listRelease(n->fail_reports);
+ zfree(n->slaves);
zfree(n);
}
@@ -839,11 +859,16 @@ int clusterAddNode(clusterNode *node) {
return (retval == DICT_OK) ? REDIS_OK : REDIS_ERR;
}
-/* Remove a node from the cluster:
- * 1) Mark all the nodes handled by it as unassigned.
- * 2) Remove all the failure reports sent by this node.
- * 3) Free the node, that will in turn remove it from the hash table
- * and from the list of slaves of its master, if it is a slave node.
+/* Remove a node from the cluster. The functio performs the high level
+ * cleanup, calling freeClusterNode() for the low level cleanup.
+ * Here we do the following:
+ *
+ * 1) Mark all the slots handled by it as unassigned.
+ * 2) Remove all the failure reports sent by this node and referenced by
+ * other nodes.
+ * 3) Free the node with freeClusterNode() that will in turn remove it
+ * from the hash table and from the list of slaves of its master, if
+ * it is a slave node.
*/
void clusterDelNode(clusterNode *delnode) {
int j;
@@ -870,11 +895,7 @@ void clusterDelNode(clusterNode *delnode) {
}
dictReleaseIterator(di);
- /* 3) Remove this node from its master's slaves if needed. */
- if (nodeIsSlave(delnode) && delnode->slaveof)
- clusterNodeRemoveSlave(delnode->slaveof,delnode);
-
- /* 4) Free the node, unlinking it from the cluster. */
+ /* 3) Free the node, unlinking it from the cluster. */
freeClusterNode(delnode);
}
@@ -1118,6 +1139,7 @@ int clusterStartHandshake(char *ip, int port) {
/* Set norm_ip as the normalized string representation of the node
* IP address. */
+ memset(norm_ip,0,REDIS_IP_STR_LEN);
if (sa.ss_family == AF_INET)
inet_ntop(AF_INET,
(void*)&(((struct sockaddr_in *)&sa)->sin_addr),
@@ -1232,7 +1254,7 @@ void nodeIp2String(char *buf, clusterLink *link) {
* The function returns 0 if the node address is still the same,
* otherwise 1 is returned. */
int nodeUpdateAddressIfNeeded(clusterNode *node, clusterLink *link, int port) {
- char ip[REDIS_IP_STR_LEN];
+ char ip[REDIS_IP_STR_LEN] = {0};
/* We don't proceed if the link is the same as the sender link, as this
* function is designed to see if the node link is consistent with the
@@ -1463,7 +1485,8 @@ int clusterProcessPacket(clusterLink *link) {
/* Perform sanity checks */
if (totlen < 16) return 1; /* At least signature, version, totlen, count. */
- if (ntohs(hdr->ver) != 0) return 1; /* Can't handle versions other than 0.*/
+ if (ntohs(hdr->ver) != CLUSTER_PROTO_VER)
+ return 1; /* Can't handle versions other than the current one.*/
if (totlen > sdslen(link->rcvbuf)) return 1;
if (type == CLUSTERMSG_TYPE_PING || type == CLUSTERMSG_TYPE_PONG ||
type == CLUSTERMSG_TYPE_MEET)
@@ -1482,7 +1505,8 @@ int clusterProcessPacket(clusterLink *link) {
} else if (type == CLUSTERMSG_TYPE_PUBLISH) {
uint32_t explen = sizeof(clusterMsg)-sizeof(union clusterMsgData);
- explen += sizeof(clusterMsgDataPublish) +
+ explen += sizeof(clusterMsgDataPublish) -
+ 8 +
ntohl(hdr->data.publish.msg.channel_len) +
ntohl(hdr->data.publish.msg.message_len);
if (totlen != explen) return 1;
@@ -1543,8 +1567,12 @@ int clusterProcessPacket(clusterLink *link) {
* later if we changed address, and those nodes will use our
* official address to connect to us. So by obtaining this address
* from the socket is a simple way to discover / update our own
- * address in the cluster without it being hardcoded in the config. */
- if (type == CLUSTERMSG_TYPE_MEET) {
+ * address in the cluster without it being hardcoded in the config.
+ *
+ * However if we don't have an address at all, we update the address
+ * even with a normal PING packet. If it's wrong it will be fixed
+ * by MEET later. */
+ if (type == CLUSTERMSG_TYPE_MEET || myself->ip[0] == '\0') {
char ip[REDIS_IP_STR_LEN];
if (anetSockName(link->fd,ip,sizeof(ip),NULL) != -1 &&
@@ -1603,7 +1631,7 @@ int clusterProcessPacket(clusterLink *link) {
}
/* Free this node as we already have it. This will
* cause the link to be freed as well. */
- freeClusterNode(link->node);
+ clusterDelNode(link->node);
return 0;
}
@@ -2010,7 +2038,8 @@ void clusterBroadcastMessage(void *buf, size_t len) {
dictReleaseIterator(di);
}
-/* Build the message header */
+/* Build the message header. hdr must point to a buffer at least
+ * sizeof(clusterMsg) in bytes. */
void clusterBuildMessageHdr(clusterMsg *hdr, int type) {
int totlen = 0;
uint64_t offset;
@@ -2024,6 +2053,7 @@ void clusterBuildMessageHdr(clusterMsg *hdr, int type) {
myself->slaveof : myself;
memset(hdr,0,sizeof(*hdr));
+ hdr->ver = htons(CLUSTER_PROTO_VER);
hdr->sig[0] = 'R';
hdr->sig[1] = 'C';
hdr->sig[2] = 'm';
@@ -2070,40 +2100,90 @@ void clusterBuildMessageHdr(clusterMsg *hdr, int type) {
/* Send a PING or PONG packet to the specified node, making sure to add enough
* gossip informations. */
void clusterSendPing(clusterLink *link, int type) {
- unsigned char buf[sizeof(clusterMsg)];
- clusterMsg *hdr = (clusterMsg*) buf;
- int gossipcount = 0, totlen;
- /* freshnodes is the number of nodes we can still use to populate the
- * gossip section of the ping packet. Basically we start with the nodes
- * we have in memory minus two (ourself and the node we are sending the
- * message to). Every time we add a node we decrement the counter, so when
- * it will drop to <= zero we know there is no more gossip info we can
- * send. */
+ unsigned char *buf;
+ clusterMsg *hdr;
+ int gossipcount = 0; /* Number of gossip sections added so far. */
+ int wanted; /* Number of gossip sections we want to append if possible. */
+ int totlen; /* Total packet length. */
+ /* freshnodes is the max number of nodes we can hope to append at all:
+ * nodes available minus two (ourself and the node we are sending the
+ * message to). However practically there may be less valid nodes since
+ * nodes in handshake state, disconnected, are not considered. */
int freshnodes = dictSize(server.cluster->nodes)-2;
+ /* How many gossip sections we want to add? 1/10 of the number of nodes
+ * and anyway at least 3. Why 1/10?
+ *
+ * If we have N masters, with N/10 entries, and we consider that in
+ * node_timeout we exchange with each other node at least 4 packets
+ * (we ping in the worst case in node_timeout/2 time, and we also
+ * receive two pings from the host), we have a total of 8 packets
+ * in the node_timeout*2 falure reports validity time. So we have
+ * that, for a single PFAIL node, we can expect to receive the following
+ * number of failure reports (in the specified window of time):
+ *
+ * PROB * GOSSIP_ENTRIES_PER_PACKET * TOTAL_PACKETS:
+ *
+ * PROB = probability of being featured in a single gossip entry,
+ * which is 1 / NUM_OF_NODES.
+ * ENTRIES = 10.
+ * TOTAL_PACKETS = 2 * 4 * NUM_OF_MASTERS.
+ *
+ * If we assume we have just masters (so num of nodes and num of masters
+ * is the same), with 1/10 we always get over the majority, and specifically
+ * 80% of the number of nodes, to account for many masters failing at the
+ * same time.
+ *
+ * Since we have non-voting slaves that lower the probability of an entry
+ * to feature our node, we set the number of entires per packet as
+ * 10% of the total nodes we have. */
+ wanted = floor(dictSize(server.cluster->nodes)/10);
+ if (wanted < 3) wanted = 3;
+ if (wanted > freshnodes) wanted = freshnodes;
+
+ /* Compute the maxium totlen to allocate our buffer. We'll fix the totlen
+ * later according to the number of gossip sections we really were able
+ * to put inside the packet. */
+ totlen = sizeof(clusterMsg)-sizeof(union clusterMsgData);
+ totlen += (sizeof(clusterMsgDataGossip)*wanted);
+ /* Note: clusterBuildMessageHdr() expects the buffer to be always at least
+ * sizeof(clusterMsg) or more. */
+ if (totlen < (int)sizeof(clusterMsg)) totlen = sizeof(clusterMsg);
+ buf = zcalloc(totlen);
+ hdr = (clusterMsg*) buf;
+
+ /* Populate the header. */
if (link->node && type == CLUSTERMSG_TYPE_PING)
link->node->ping_sent = mstime();
clusterBuildMessageHdr(hdr,type);
/* Populate the gossip fields */
- while(freshnodes > 0 && gossipcount < 3) {
+ int maxiterations = wanted*3;
+ while(freshnodes > 0 && gossipcount < wanted && maxiterations--) {
dictEntry *de = dictGetRandomKey(server.cluster->nodes);
clusterNode *this = dictGetVal(de);
clusterMsgDataGossip *gossip;
int j;
+ /* Don't include this node: the whole packet header is about us
+ * already, so we just gossip about other nodes. */
+ if (this == myself) continue;
+
+ /* Give a bias to FAIL/PFAIL nodes. */
+ if (maxiterations > wanted*2 &&
+ !(this->flags & (REDIS_NODE_PFAIL|REDIS_NODE_FAIL)))
+ continue;
+
/* In the gossip section don't include:
- * 1) Myself.
- * 2) Nodes in HANDSHAKE state.
+ * 1) Nodes in HANDSHAKE state.
* 3) Nodes with the NOADDR flag set.
* 4) Disconnected nodes if they don't have configured slots.
*/
- if (this == myself ||
- this->flags & (REDIS_NODE_HANDSHAKE|REDIS_NODE_NOADDR) ||
+ if (this->flags & (REDIS_NODE_HANDSHAKE|REDIS_NODE_NOADDR) ||
(this->link == NULL && this->numslots == 0))
{
- freshnodes--; /* otherwise we may loop forever. */
- continue;
+ freshnodes--; /* Tecnically not correct, but saves CPU. */
+ continue;
}
/* Check if we already added this node */
@@ -2122,13 +2202,19 @@ void clusterSendPing(clusterLink *link, int type) {
memcpy(gossip->ip,this->ip,sizeof(this->ip));
gossip->port = htons(this->port);
gossip->flags = htons(this->flags);
+ gossip->notused1 = 0;
+ gossip->notused2 = 0;
gossipcount++;
}
+
+ /* Ready to send... fix the totlen fiend and queue the message in the
+ * output buffer. */
totlen = sizeof(clusterMsg)-sizeof(union clusterMsgData);
totlen += (sizeof(clusterMsgDataGossip)*gossipcount);
hdr->count = htons(gossipcount);
hdr->totlen = htonl(totlen);
clusterSendMessage(link,buf,totlen);
+ zfree(buf);
}
/* Send a PONG packet to every connected node that's not in handshake state
@@ -2184,7 +2270,7 @@ void clusterSendPublish(clusterLink *link, robj *channel, robj *message) {
clusterBuildMessageHdr(hdr,CLUSTERMSG_TYPE_PUBLISH);
totlen = sizeof(clusterMsg)-sizeof(union clusterMsgData);
- totlen += sizeof(clusterMsgDataPublish) + channel_len + message_len;
+ totlen += sizeof(clusterMsgDataPublish) - 8 + channel_len + message_len;
hdr->data.publish.msg.channel_len = htonl(channel_len);
hdr->data.publish.msg.message_len = htonl(message_len);
@@ -2517,7 +2603,7 @@ void clusterHandleSlaveFailover(void) {
/* Compute the failover timeout (the max time we have to send votes
* and wait for replies), and the failover retry time (the time to wait
- * before waiting again.
+ * before trying to get voted again).
*
* Timeout is MIN(NODE_TIMEOUT*2,2000) milliseconds.
* Retry is two times the Timeout.
@@ -2775,6 +2861,7 @@ void clusterHandleSlaveMigration(int max_slaves) {
}
}
}
+ dictReleaseIterator(di);
/* Step 4: perform the migration if there is a target, and if I'm the
* candidate. */
@@ -2896,7 +2983,7 @@ void clusterCron(void) {
/* A Node in HANDSHAKE state has a limited lifespan equal to the
* configured node timeout. */
if (nodeInHandshake(node) && now - node->ctime > handshake_timeout) {
- freeClusterNode(node);
+ clusterDelNode(node);
continue;
}
@@ -3883,10 +3970,7 @@ void clusterCommand(redisClient *c) {
server.cluster->stats_bus_messages_sent,
server.cluster->stats_bus_messages_received
);
- addReplySds(c,sdscatprintf(sdsempty(),"$%lu\r\n",
- (unsigned long)sdslen(info)));
- addReplySds(c,info);
- addReply(c,shared.crlf);
+ addReplyBulkSds(c, info);
} else if (!strcasecmp(c->argv[1]->ptr,"saveconfig") && c->argc == 2) {
int retval = clusterSaveConfig(1);
@@ -4010,6 +4094,18 @@ void clusterCommand(redisClient *c) {
addReplyBulkCString(c,ni);
sdsfree(ni);
}
+ } else if (!strcasecmp(c->argv[1]->ptr,"count-failure-reports") &&
+ c->argc == 3)
+ {
+ /* CLUSTER COUNT-FAILURE-REPORTS <NODE ID> */
+ clusterNode *n = clusterLookupNode(c->argv[2]->ptr);
+
+ if (!n) {
+ addReplyErrorFormat(c,"Unknown node %s", (char*)c->argv[2]->ptr);
+ return;
+ } else {
+ addReplyLongLong(c,clusterNodeFailureReportsCount(n));
+ }
} else if (!strcasecmp(c->argv[1]->ptr,"failover") &&
(c->argc == 2 || c->argc == 3))
{
@@ -4408,7 +4504,7 @@ try_again:
/* Check if the key is here. If not we reply with success as there is
* nothing to migrate (for instance the key expired in the meantime), but
* we include such information in the reply string. */
- if ((o = lookupKeyRead(c->db,c->argv[3])) == NULL) {
+ if ((o = lookupKeyWrite(c->db,c->argv[3])) == NULL) {
addReplySds(c,sdsnew("+NOKEY\r\n"));
return;
}
diff --git a/src/cluster.h b/src/cluster.h
index b05a30ded..ef5caf0d6 100644
--- a/src/cluster.h
+++ b/src/cluster.h
@@ -163,10 +163,11 @@ typedef struct {
char nodename[REDIS_CLUSTER_NAMELEN];
uint32_t ping_sent;
uint32_t pong_received;
- char ip[REDIS_IP_STR_LEN]; /* IP address last time it was seen */
- uint16_t port; /* port last time it was seen */
- uint16_t flags;
- uint32_t notused; /* for 64 bit alignment */
+ char ip[REDIS_IP_STR_LEN]; /* IP address last time it was seen */
+ uint16_t port; /* port last time it was seen */
+ uint16_t flags; /* node->flags copy */
+ uint16_t notused1; /* Some room for future improvements. */
+ uint32_t notused2;
} clusterMsgDataGossip;
typedef struct {
@@ -176,7 +177,10 @@ typedef struct {
typedef struct {
uint32_t channel_len;
uint32_t message_len;
- unsigned char bulk_data[8]; /* defined as 8 just for alignment concerns. */
+ /* We can't reclare bulk_data as bulk_data[] since this structure is
+ * nested. The 8 bytes are removed from the count during the message
+ * length computation. */
+ unsigned char bulk_data[8];
} clusterMsgDataPublish;
typedef struct {
@@ -208,6 +212,7 @@ union clusterMsgData {
} update;
};
+#define CLUSTER_PROTO_VER 0 /* Cluster bus protocol version. */
typedef struct {
char sig[4]; /* Siganture "RCmb" (Redis Cluster message bus). */
diff --git a/src/config.c b/src/config.c
index 05cb7c9fe..8255a56b7 100644
--- a/src/config.c
+++ b/src/config.c
@@ -60,6 +60,8 @@ clientBufferLimitsConfig clientBufferLimitsDefaults[REDIS_CLIENT_TYPE_COUNT] = {
* Config file parsing
*----------------------------------------------------------------------------*/
+int supervisedToMode(const char *str);
+
int yesnotoi(char *s) {
if (!strcasecmp(s,"yes")) return 1;
else if (!strcasecmp(s,"no")) return 0;
@@ -397,9 +399,13 @@ void loadServerConfigFromString(char *config) {
} else if (!strcasecmp(argv[0],"hash-max-ziplist-value") && argc == 2) {
server.hash_max_ziplist_value = memtoll(argv[1], NULL);
} else if (!strcasecmp(argv[0],"list-max-ziplist-entries") && argc == 2){
- server.list_max_ziplist_entries = memtoll(argv[1], NULL);
+ /* DEAD OPTION */
} else if (!strcasecmp(argv[0],"list-max-ziplist-value") && argc == 2) {
- server.list_max_ziplist_value = memtoll(argv[1], NULL);
+ /* DEAD OPTION */
+ } else if (!strcasecmp(argv[0],"list-max-ziplist-size") && argc == 2) {
+ server.list_max_ziplist_size = atoi(argv[1]);
+ } else if (!strcasecmp(argv[0],"list-compress-depth") && argc == 2) {
+ server.list_compress_depth = atoi(argv[1]);
} else if (!strcasecmp(argv[0],"set-max-intset-entries") && argc == 2) {
server.set_max_intset_entries = memtoll(argv[1], NULL);
} else if (!strcasecmp(argv[0],"zset-max-ziplist-entries") && argc == 2) {
@@ -529,6 +535,15 @@ void loadServerConfigFromString(char *config) {
goto loaderr;
}
server.notify_keyspace_events = flags;
+ } else if (!strcasecmp(argv[0],"supervised") && argc == 2) {
+ int mode = supervisedToMode(argv[1]);
+
+ if (mode == -1) {
+ err = "Invalid option for 'supervised'. "
+ "Allowed values: 'upstart', 'systemd', 'auto', or 'no'";
+ goto loaderr;
+ }
+ server.supervised_mode = mode;
} else if (!strcasecmp(argv[0],"sentinel")) {
/* argc == 1 is handled by main() as we need to enter the sentinel
* mode ASAP. */
@@ -795,12 +810,12 @@ void configSetCommand(redisClient *c) {
} else if (!strcasecmp(c->argv[2]->ptr,"hash-max-ziplist-value")) {
if (getLongLongFromObject(o,&ll) == REDIS_ERR || ll < 0) goto badfmt;
server.hash_max_ziplist_value = ll;
- } else if (!strcasecmp(c->argv[2]->ptr,"list-max-ziplist-entries")) {
+ } else if (!strcasecmp(c->argv[2]->ptr,"list-max-ziplist-size")) {
if (getLongLongFromObject(o,&ll) == REDIS_ERR || ll < 0) goto badfmt;
- server.list_max_ziplist_entries = ll;
- } else if (!strcasecmp(c->argv[2]->ptr,"list-max-ziplist-value")) {
+ server.list_max_ziplist_size = ll;
+ } else if (!strcasecmp(c->argv[2]->ptr,"list-compress-depth")) {
if (getLongLongFromObject(o,&ll) == REDIS_ERR || ll < 0) goto badfmt;
- server.list_max_ziplist_value = ll;
+ server.list_compress_depth = ll;
} else if (!strcasecmp(c->argv[2]->ptr,"set-max-intset-entries")) {
if (getLongLongFromObject(o,&ll) == REDIS_ERR || ll < 0) goto badfmt;
server.set_max_intset_entries = ll;
@@ -1004,6 +1019,47 @@ badfmt: /* Bad format errors */
} \
} while(0);
+char *maxmemoryToString() {
+ char *s;
+ switch(server.maxmemory_policy) {
+ case REDIS_MAXMEMORY_VOLATILE_LRU: s = "volatile-lru"; break;
+ case REDIS_MAXMEMORY_VOLATILE_TTL: s = "volatile-ttl"; break;
+ case REDIS_MAXMEMORY_VOLATILE_RANDOM: s = "volatile-random"; break;
+ case REDIS_MAXMEMORY_ALLKEYS_LRU: s = "allkeys-lru"; break;
+ case REDIS_MAXMEMORY_ALLKEYS_RANDOM: s = "allkeys-random"; break;
+ case REDIS_MAXMEMORY_NO_EVICTION: s = "noeviction"; break;
+ default: s = "unknown"; break;
+ }
+ return s;
+}
+
+int supervisedToMode(const char *str) {
+ int mode;
+ if (!strcasecmp(str,"upstart")) {
+ mode = REDIS_SUPERVISED_UPSTART;
+ } else if (!strcasecmp(str,"systemd")) {
+ mode = REDIS_SUPERVISED_SYSTEMD;
+ } else if (!strcasecmp(str,"auto")) {
+ mode = REDIS_SUPERVISED_AUTODETECT;
+ } else if (!strcasecmp(str,"no")) {
+ mode = REDIS_SUPERVISED_NONE;
+ } else {
+ mode = -1;
+ }
+ return mode;
+}
+
+char *supervisedToString(void) {
+ char *s;
+ switch(server.supervised_mode) {
+ case REDIS_SUPERVISED_UPSTART: s = "upstart"; break;
+ case REDIS_SUPERVISED_SYSTEMD: s = "systemd"; break;
+ case REDIS_SUPERVISED_AUTODETECT: s = "auto"; break;
+ case REDIS_SUPERVISED_NONE: s = "no"; break;
+ default: s = "no"; break;
+ }
+ return s;
+}
void configGetCommand(redisClient *c) {
robj *o = c->argv[2];
void *replylen = addDeferredMultiBulkLength(c);
@@ -1033,10 +1089,10 @@ void configGetCommand(redisClient *c) {
server.hash_max_ziplist_entries);
config_get_numerical_field("hash-max-ziplist-value",
server.hash_max_ziplist_value);
- config_get_numerical_field("list-max-ziplist-entries",
- server.list_max_ziplist_entries);
- config_get_numerical_field("list-max-ziplist-value",
- server.list_max_ziplist_value);
+ config_get_numerical_field("list-max-ziplist-size",
+ server.list_max_ziplist_size);
+ config_get_numerical_field("list-compress-depth",
+ server.list_compress_depth);
config_get_numerical_field("set-max-intset-entries",
server.set_max_intset_entries);
config_get_numerical_field("zset-max-ziplist-entries",
@@ -1112,19 +1168,8 @@ void configGetCommand(redisClient *c) {
matches++;
}
if (stringmatch(pattern,"maxmemory-policy",0)) {
- char *s;
-
- switch(server.maxmemory_policy) {
- case REDIS_MAXMEMORY_VOLATILE_LRU: s = "volatile-lru"; break;
- case REDIS_MAXMEMORY_VOLATILE_TTL: s = "volatile-ttl"; break;
- case REDIS_MAXMEMORY_VOLATILE_RANDOM: s = "volatile-random"; break;
- case REDIS_MAXMEMORY_ALLKEYS_LRU: s = "allkeys-lru"; break;
- case REDIS_MAXMEMORY_ALLKEYS_RANDOM: s = "allkeys-random"; break;
- case REDIS_MAXMEMORY_NO_EVICTION: s = "noeviction"; break;
- default: s = "unknown"; break; /* too harmless to panic */
- }
addReplyBulkCString(c,"maxmemory-policy");
- addReplyBulkCString(c,s);
+ addReplyBulkCString(c,maxmemoryToString());
matches++;
}
if (stringmatch(pattern,"appendfsync",0)) {
@@ -1170,6 +1215,11 @@ void configGetCommand(redisClient *c) {
addReplyBulkCString(c,s);
matches++;
}
+ if (stringmatch(pattern,"supervised",0)) {
+ addReplyBulkCString(c,"supervised");
+ addReplyBulkCString(c,supervisedToString());
+ matches++;
+ }
if (stringmatch(pattern,"client-output-buffer-limit",0)) {
sds buf = sdsempty();
int j;
@@ -1854,8 +1904,8 @@ int rewriteConfig(char *path) {
rewriteConfigNotifykeyspaceeventsOption(state);
rewriteConfigNumericalOption(state,"hash-max-ziplist-entries",server.hash_max_ziplist_entries,REDIS_HASH_MAX_ZIPLIST_ENTRIES);
rewriteConfigNumericalOption(state,"hash-max-ziplist-value",server.hash_max_ziplist_value,REDIS_HASH_MAX_ZIPLIST_VALUE);
- rewriteConfigNumericalOption(state,"list-max-ziplist-entries",server.list_max_ziplist_entries,REDIS_LIST_MAX_ZIPLIST_ENTRIES);
- rewriteConfigNumericalOption(state,"list-max-ziplist-value",server.list_max_ziplist_value,REDIS_LIST_MAX_ZIPLIST_VALUE);
+ rewriteConfigNumericalOption(state,"list-max-ziplist-size",server.list_max_ziplist_size,REDIS_LIST_MAX_ZIPLIST_SIZE);
+ rewriteConfigNumericalOption(state,"list-compress-depth",server.list_compress_depth,REDIS_LIST_COMPRESS_DEPTH);
rewriteConfigNumericalOption(state,"set-max-intset-entries",server.set_max_intset_entries,REDIS_SET_MAX_INTSET_ENTRIES);
rewriteConfigNumericalOption(state,"zset-max-ziplist-entries",server.zset_max_ziplist_entries,REDIS_ZSET_MAX_ZIPLIST_ENTRIES);
rewriteConfigNumericalOption(state,"zset-max-ziplist-value",server.zset_max_ziplist_value,REDIS_ZSET_MAX_ZIPLIST_VALUE);
@@ -1865,6 +1915,12 @@ int rewriteConfig(char *path) {
rewriteConfigNumericalOption(state,"hz",server.hz,REDIS_DEFAULT_HZ);
rewriteConfigYesNoOption(state,"aof-rewrite-incremental-fsync",server.aof_rewrite_incremental_fsync,REDIS_DEFAULT_AOF_REWRITE_INCREMENTAL_FSYNC);
rewriteConfigYesNoOption(state,"aof-load-truncated",server.aof_load_truncated,REDIS_DEFAULT_AOF_LOAD_TRUNCATED);
+ rewriteConfigEnumOption(state,"supervised",server.supervised_mode,
+ "upstart", REDIS_SUPERVISED_UPSTART,
+ "systemd", REDIS_SUPERVISED_SYSTEMD,
+ "auto", REDIS_SUPERVISED_AUTODETECT,
+ "no", REDIS_SUPERVISED_NONE,
+ NULL, REDIS_SUPERVISED_NONE);
if (server.sentinel_mode) rewriteConfigSentinelOption(state);
/* Step 3: remove all the orphaned lines in the old file, that is, lines
diff --git a/src/config.h b/src/config.h
index 57d07599a..1ed8ef301 100644
--- a/src/config.h
+++ b/src/config.h
@@ -48,6 +48,7 @@
#define HAVE_PROC_STAT 1
#define HAVE_PROC_MAPS 1
#define HAVE_PROC_SMAPS 1
+#define HAVE_PROC_SOMAXCONN 1
#endif
/* Test for task_info() */
@@ -56,10 +57,15 @@
#endif
/* Test for backtrace() */
-#if defined(__APPLE__) || defined(__linux__)
+#if defined(__APPLE__) || (defined(__linux__) && defined(__GLIBC__))
#define HAVE_BACKTRACE 1
#endif
+/* MSG_NOSIGNAL. */
+#ifdef __linux__
+#define HAVE_MSG_NOSIGNAL 1
+#endif
+
/* Test for polling API */
#ifdef __linux__
#define HAVE_EPOLL 1
@@ -112,7 +118,7 @@
#define USE_SETPROCTITLE
#endif
-#if (defined __linux || defined __APPLE__)
+#if ((defined __linux && defined(__GLIBC__)) || defined __APPLE__)
#define USE_SETPROCTITLE
#define INIT_SETPROCTITLE_REPLACEMENT
void spt_init(int argc, char *argv[]);
diff --git a/src/crc64.c b/src/crc64.c
index ecdba90e0..f1f764922 100644
--- a/src/crc64.c
+++ b/src/crc64.c
@@ -181,9 +181,13 @@ uint64_t crc64(uint64_t crc, const unsigned char *s, uint64_t l) {
}
/* Test main */
-#ifdef TEST_MAIN
+#ifdef REDIS_TEST
#include <stdio.h>
-int main(void) {
+
+#define UNUSED(x) (void)(x)
+int crc64Test(int argc, char *argv[]) {
+ UNUSED(argc);
+ UNUSED(argv);
printf("e9c6d914c4b8d9ca == %016llx\n",
(unsigned long long) crc64(0,(unsigned char*)"123456789",9));
return 0;
diff --git a/src/crc64.h b/src/crc64.h
index ab375d3f4..c9fca519d 100644
--- a/src/crc64.h
+++ b/src/crc64.h
@@ -5,4 +5,8 @@
uint64_t crc64(uint64_t crc, const unsigned char *s, uint64_t l);
+#ifdef REDIS_TEST
+int crc64Test(int argc, char *argv[]);
+#endif
+
#endif
diff --git a/src/db.c b/src/db.c
index d5a381daf..69d1a7768 100644
--- a/src/db.c
+++ b/src/db.c
@@ -60,7 +60,32 @@ robj *lookupKey(redisDb *db, robj *key) {
robj *lookupKeyRead(redisDb *db, robj *key) {
robj *val;
- expireIfNeeded(db,key);
+ if (expireIfNeeded(db,key) == 1) {
+ /* Key expired. If we are in the context of a master, expireIfNeeded()
+ * returns 0 only when the key does not exist at all, so it's save
+ * to return NULL ASAP. */
+ if (server.masterhost == NULL) return NULL;
+
+ /* However if we are in the context of a slave, expireIfNeeded() will
+ * not really try to expire the key, it only returns information
+ * about the "logical" status of the key: key expiring is up to the
+ * master in order to have a consistent view of master's data set.
+ *
+ * However, if the command caller is not the master, and as additional
+ * safety measure, the command invoked is a read-only command, we can
+ * safely return NULL here, and provide a more consistent behavior
+ * to clients accessign expired values in a read-only fashion, that
+ * will say the key as non exisitng.
+ *
+ * Notably this covers GETs when slaves are used to scale reads. */
+ if (server.current_client &&
+ server.current_client != server.master &&
+ server.current_client->cmd &&
+ server.current_client->cmd->flags & REDIS_CMD_READONLY)
+ {
+ return NULL;
+ }
+ }
val = lookupKey(db,key);
if (val == NULL)
server.stat_keyspace_misses++;
@@ -381,7 +406,7 @@ void scanCallback(void *privdata, const dictEntry *de) {
} else if (o->type == REDIS_ZSET) {
key = dictGetKey(de);
incrRefCount(key);
- val = createStringObjectFromLongDouble(*(double*)dictGetVal(de));
+ val = createStringObjectFromLongDouble(*(double*)dictGetVal(de),0);
} else {
redisPanic("Type not handled in SCAN callback.");
}
@@ -425,8 +450,8 @@ void scanGenericCommand(redisClient *c, robj *o, unsigned long cursor) {
list *keys = listCreate();
listNode *node, *nextnode;
long count = 10;
- sds pat;
- int patlen, use_pattern = 0;
+ sds pat = NULL;
+ int patlen = 0, use_pattern = 0;
dict *ht;
/* Object must be NULL (to iterate keys names), or the type of the object
@@ -877,7 +902,7 @@ void expireGenericCommand(redisClient *c, long long basetime, int unit) {
when += basetime;
/* No key, return zero. */
- if (lookupKeyRead(c->db,key) == NULL) {
+ if (lookupKeyWrite(c->db,key) == NULL) {
addReply(c,shared.czero);
return;
}
diff --git a/src/debug.c b/src/debug.c
index d566f716d..162274cce 100644
--- a/src/debug.c
+++ b/src/debug.c
@@ -252,6 +252,12 @@ void computeDatasetDigest(unsigned char *final) {
}
}
+void inputCatSds(void *result, const char *str) {
+ /* result is actually a (sds *), so re-cast it here */
+ sds *info = (sds *)result;
+ *info = sdscat(*info, str);
+}
+
void debugCommand(redisClient *c) {
if (!strcasecmp(c->argv[1]->ptr,"segfault")) {
*((char*)-1) = 'x';
@@ -295,13 +301,46 @@ void debugCommand(redisClient *c) {
val = dictGetVal(de);
strenc = strEncoding(val->encoding);
+ char extra[128] = {0};
+ if (val->encoding == REDIS_ENCODING_QUICKLIST) {
+ char *nextra = extra;
+ int remaining = sizeof(extra);
+ quicklist *ql = val->ptr;
+ /* Add number of quicklist nodes */
+ int used = snprintf(nextra, remaining, " ql_nodes:%u", ql->len);
+ nextra += used;
+ remaining -= used;
+ /* Add average quicklist fill factor */
+ double avg = (double)ql->count/ql->len;
+ used = snprintf(nextra, remaining, " ql_avg_node:%.2f", avg);
+ nextra += used;
+ remaining -= used;
+ /* Add quicklist fill level / max ziplist size */
+ used = snprintf(nextra, remaining, " ql_ziplist_max:%d", ql->fill);
+ nextra += used;
+ remaining -= used;
+ /* Add isCompressed? */
+ int compressed = ql->compress != 0;
+ used = snprintf(nextra, remaining, " ql_compressed:%d", compressed);
+ nextra += used;
+ remaining -= used;
+ /* Add total uncompressed size */
+ unsigned long sz = 0;
+ for (quicklistNode *node = ql->head; node; node = node->next) {
+ sz += node->sz;
+ }
+ used = snprintf(nextra, remaining, " ql_uncompressed_size:%lu", sz);
+ nextra += used;
+ remaining -= used;
+ }
+
addReplyStatusFormat(c,
"Value at:%p refcount:%d "
"encoding:%s serializedlength:%lld "
- "lru:%d lru_seconds_idle:%llu",
+ "lru:%d lru_seconds_idle:%llu%s",
(void*)val, val->refcount,
strenc, (long long) rdbSavedObjectLen(val),
- val->lru, estimateObjectIdleTime(val));
+ val->lru, estimateObjectIdleTime(val)/1000, extra);
} else if (!strcasecmp(c->argv[1]->ptr,"sdslen") && c->argc == 3) {
dictEntry *de;
robj *val;
@@ -338,7 +377,7 @@ void debugCommand(redisClient *c) {
snprintf(buf,sizeof(buf),"%s:%lu",
(c->argc == 3) ? "key" : (char*)c->argv[3]->ptr, j);
key = createStringObject(buf,strlen(buf));
- if (lookupKeyRead(c->db,key) != NULL) {
+ if (lookupKeyWrite(c->db,key) != NULL) {
decrRefCount(key);
continue;
}
@@ -379,6 +418,25 @@ void debugCommand(redisClient *c) {
errstr = sdsmapchars(errstr,"\n\r"," ",2); /* no newlines in errors. */
errstr = sdscatlen(errstr,"\r\n",2);
addReplySds(c,errstr);
+ } else if (!strcasecmp(c->argv[1]->ptr,"structsize") && c->argc == 2) {
+ sds sizes = sdsempty();
+ sizes = sdscatprintf(sizes,"bits:%d ", (sizeof(void*) == 8)?64:32);
+ sizes = sdscatprintf(sizes,"robj:%d ", (int)sizeof(robj));
+ sizes = sdscatprintf(sizes,"dictentry:%d ", (int)sizeof(dictEntry));
+ sizes = sdscatprintf(sizes,"sdshdr:%d", (int)sizeof(struct sdshdr));
+ addReplyBulkSds(c,sizes);
+ } else if (!strcasecmp(c->argv[1]->ptr,"jemalloc") && c->argc == 3) {
+#if defined(USE_JEMALLOC)
+ if (!strcasecmp(c->argv[2]->ptr, "info")) {
+ sds info = sdsempty();
+ je_malloc_stats_print(inputCatSds, &info, NULL);
+ addReplyBulkSds(c, info);
+ } else {
+ addReplyErrorFormat(c, "Valid jemalloc debug fields: info");
+ }
+#else
+ addReplyErrorFormat(c, "jemalloc support not available");
+#endif
} else {
addReplyErrorFormat(c, "Unknown DEBUG subcommand or wrong number of arguments for '%s'",
(char*)c->argv[1]->ptr);
@@ -857,7 +915,7 @@ void sigsegvHandler(int sig, siginfo_t *info, void *secret) {
" Suspect RAM error? Use redis-server --test-memory to verify it.\n\n"
);
/* free(messages); Don't call free() with possibly corrupted memory. */
- if (server.daemonize) unlink(server.pidfile);
+ if (server.daemonize && server.supervised == 0) unlink(server.pidfile);
/* Make sure we exit with the right signal at the end. So for instance
* the core will be dumped if enabled. */
diff --git a/src/dict.c b/src/dict.c
index 29d400099..7d8db3631 100644
--- a/src/dict.c
+++ b/src/dict.c
@@ -342,7 +342,10 @@ dictEntry *dictAddRaw(dict *d, void *key)
if ((index = _dictKeyIndex(d, key)) == -1)
return NULL;
- /* Allocate the memory and store the new entry */
+ /* Allocate the memory and store the new entry.
+ * Insert the element in top, with the assumption that in a database
+ * system it is more likely that recently added entries are accessed
+ * more frequently. */
ht = dictIsRehashing(d) ? &d->ht[1] : &d->ht[0];
entry = zmalloc(sizeof(*entry));
entry->next = ht->table[index];
diff --git a/src/endianconv.c b/src/endianconv.c
index 9adf09c1f..f3b0b4730 100644
--- a/src/endianconv.c
+++ b/src/endianconv.c
@@ -101,12 +101,16 @@ uint64_t intrev64(uint64_t v) {
return v;
}
-#ifdef TESTMAIN
+#ifdef REDIS_TEST
#include <stdio.h>
-int main(void) {
+#define UNUSED(x) (void)(x)
+int endianconvTest(int argc, char *argv[]) {
char buf[32];
+ UNUSED(argc);
+ UNUSED(argv);
+
sprintf(buf,"ciaoroma");
memrev16(buf);
printf("%s\n", buf);
diff --git a/src/endianconv.h b/src/endianconv.h
index d93cd99ba..08f553136 100644
--- a/src/endianconv.h
+++ b/src/endianconv.h
@@ -71,4 +71,8 @@ uint64_t intrev64(uint64_t v);
#define ntohu64(v) intrev64(v)
#endif
+#ifdef REDIS_TEST
+int endianconvTest(int argc, char *argv[]);
+#endif
+
#endif
diff --git a/src/fmacros.h b/src/fmacros.h
index e49735ce5..6e56c759d 100644
--- a/src/fmacros.h
+++ b/src/fmacros.h
@@ -34,6 +34,7 @@
#if defined(__linux__)
#define _GNU_SOURCE
+#define _DEFAULT_SOURCE
#endif
#if defined(_AIX)
diff --git a/src/help.h b/src/help.h
index 8395c525b..9f4c979df 100644
--- a/src/help.h
+++ b/src/help.h
@@ -651,8 +651,8 @@ struct commandHelp {
0,
"1.0.0" },
{ "SPOP",
- "key",
- "Remove and return a random member from a set",
+ "key [count]",
+ "Remove and return one or multiple random members from a set",
3,
"1.0.0" },
{ "SRANDMEMBER",
diff --git a/src/hyperloglog.c b/src/hyperloglog.c
index 005beb18f..b3542f997 100644
--- a/src/hyperloglog.c
+++ b/src/hyperloglog.c
@@ -1213,7 +1213,7 @@ void pfcountCommand(redisClient *c) {
for (j = 1; j < c->argc; j++) {
/* Check type and size. */
robj *o = lookupKeyRead(c->db,c->argv[j]);
- if (o == NULL) continue; /* Assume empty HLL for non existing var. */
+ if (o == NULL) continue; /* Assume empty HLL for non existing var.*/
if (isHLLObjectOrReply(c,o) != REDIS_OK) return;
/* Merge with this HLL with our 'max' HHL by setting max[i]
@@ -1233,7 +1233,7 @@ void pfcountCommand(redisClient *c) {
*
* The user specified a single key. Either return the cached value
* or compute one and update the cache. */
- o = lookupKeyRead(c->db,c->argv[1]);
+ o = lookupKeyWrite(c->db,c->argv[1]);
if (o == NULL) {
/* No key? Cardinality is zero since no element was added, otherwise
* we would have a key as HLLADD creates it as a side effect. */
@@ -1458,7 +1458,7 @@ void pfdebugCommand(redisClient *c) {
robj *o;
int j;
- o = lookupKeyRead(c->db,c->argv[2]);
+ o = lookupKeyWrite(c->db,c->argv[2]);
if (o == NULL) {
addReplyError(c,"The specified key does not exist");
return;
diff --git a/src/intset.c b/src/intset.c
index 5d894e3cd..762bd48c8 100644
--- a/src/intset.c
+++ b/src/intset.c
@@ -261,6 +261,90 @@ int64_t intsetRandom(intset *is) {
return _intsetGet(is,rand()%intrev32ifbe(is->length));
}
+/* How many times bigger should the set length be compared to the requested
+ * count of members for us to use the Floyd algorithm instead of
+ * the Knuth algorithm */
+#define RANDOMMEMBERS_ALGORITHM_SELECTION_RATIO (2)
+
+/* Copies 'count' random members from the set into the 'values' array.
+ * 'values' must be an array of int64_t values, of length 'count'.
+ * Returns the amount of items returned. If this amount is less than 'count',
+ * then the remaining 'values' are left uninitialized. */
+int intsetRandomMembers(intset *is, int64_t* values, int count) {
+
+ /* We don't check that is and values are non-NULL - the caller must
+ * play nice. */
+
+ int length = intsetLen(is);
+
+ if (count > length) {
+ /* Return everything in the set */
+ count = length;
+ }
+
+ /* Choose between the Knuth shuffle algorithm, O(1) space, O(length) time,
+ * and the Floyd algorithm, O(length) space, O(count) time. */
+ if ((RANDOMMEMBERS_ALGORITHM_SELECTION_RATIO * count) > length) {
+
+ /* If the count of members requested is almost the length of the set,
+ * use the Knuth shuffle algorithm, O(1) space, O(length) time. */
+
+ /* First, fill the values array with unique random indexes inside
+ * the set. */
+ int in, im, rn, rm;
+ im = 0;
+ for (in = 0; in < length && im < count; in++) {
+
+ rn = length - in;
+ rm = count - im;
+ if (rand() % rn < rm) {
+ values[im++] = in;
+ }
+ }
+
+ } else {
+
+ /* If the length is considerably more than the count of members
+ * requested, use Robert Floyd's algorithm, O(length) space,
+ * O(count) time.
+ * Based on Jon Bentley's Programming Pearls */
+
+ int64_t *is_used = zcalloc(sizeof(int64_t) * length);
+ int in, im, r;
+
+ r = 0;
+ im = 0;
+
+ for (in = length - count; in < length && im < count; in++) {
+
+ /* Generate a random number r */
+ r = rand() % (in + 1);
+
+ /* Do we already have the value in r? */
+ if (is_used[r]) {
+ /* Use in instead of the generated number */
+ r = in;
+ }
+
+ values[im++] = r ;
+
+ /* Mark it as used */
+ is_used[r] = 1;
+ }
+
+ zfree(is_used);
+ }
+
+ /* Replace each random index with the value stored there in the intset */
+ uint8_t encoding = intrev32ifbe(is->encoding);
+ for (int currentValue = 0; currentValue < count; currentValue++) {
+ values[currentValue] =
+ _intsetGetEncoded(is, values[currentValue], encoding);
+ }
+
+ return count;
+}
+
/* Sets the value to the value at the given position. When this position is
* out of range the function returns 0, when in range it returns 1. */
uint8_t intsetGet(intset *is, uint32_t pos, int64_t *value) {
@@ -281,44 +365,46 @@ size_t intsetBlobLen(intset *is) {
return sizeof(intset)+intrev32ifbe(is->length)*intrev32ifbe(is->encoding);
}
-#ifdef INTSET_TEST_MAIN
+#ifdef REDIS_TEST
#include <sys/time.h>
+#include <time.h>
-void intsetRepr(intset *is) {
- int i;
- for (i = 0; i < intrev32ifbe(is->length); i++) {
+#if 0
+static void intsetRepr(intset *is) {
+ for (uint32_t i = 0; i < intrev32ifbe(is->length); i++) {
printf("%lld\n", (uint64_t)_intsetGet(is,i));
}
printf("\n");
}
-void error(char *err) {
+static void error(char *err) {
printf("%s\n", err);
exit(1);
}
+#endif
-void ok(void) {
+static void ok(void) {
printf("OK\n");
}
-long long usec(void) {
+static long long usec(void) {
struct timeval tv;
gettimeofday(&tv,NULL);
return (((long long)tv.tv_sec)*1000000)+tv.tv_usec;
}
#define assert(_e) ((_e)?(void)0:(_assert(#_e,__FILE__,__LINE__),exit(1)))
-void _assert(char *estr, char *file, int line) {
+static void _assert(char *estr, char *file, int line) {
printf("\n\n=== ASSERTION FAILED ===\n");
printf("==> %s:%d '%s' is not true\n",file,line,estr);
}
-intset *createSet(int bits, int size) {
+static intset *createSet(int bits, int size) {
uint64_t mask = (1<<bits)-1;
- uint64_t i, value;
+ uint64_t value;
intset *is = intsetNew();
- for (i = 0; i < size; i++) {
+ for (int i = 0; i < size; i++) {
if (bits > 32) {
value = (rand()*rand()) & mask;
} else {
@@ -329,10 +415,8 @@ intset *createSet(int bits, int size) {
return is;
}
-void checkConsistency(intset *is) {
- int i;
-
- for (i = 0; i < (intrev32ifbe(is->length)-1); i++) {
+static void checkConsistency(intset *is) {
+ for (uint32_t i = 0; i < (intrev32ifbe(is->length)-1); i++) {
uint32_t encoding = intrev32ifbe(is->encoding);
if (encoding == INTSET_ENC_INT16) {
@@ -348,11 +432,15 @@ void checkConsistency(intset *is) {
}
}
-int main(int argc, char **argv) {
+#define UNUSED(x) (void)(x)
+int intsetTest(int argc, char **argv) {
uint8_t success;
int i;
intset *is;
- sranddev();
+ srand(time(NULL));
+
+ UNUSED(argc);
+ UNUSED(argv);
printf("Value encodings: "); {
assert(_intsetValueEncoding(-32768) == INTSET_ENC_INT16);
@@ -363,8 +451,10 @@ int main(int argc, char **argv) {
assert(_intsetValueEncoding(+2147483647) == INTSET_ENC_INT32);
assert(_intsetValueEncoding(-2147483649) == INTSET_ENC_INT64);
assert(_intsetValueEncoding(+2147483648) == INTSET_ENC_INT64);
- assert(_intsetValueEncoding(-9223372036854775808ull) == INTSET_ENC_INT64);
- assert(_intsetValueEncoding(+9223372036854775807ull) == INTSET_ENC_INT64);
+ assert(_intsetValueEncoding(-9223372036854775808ull) ==
+ INTSET_ENC_INT64);
+ assert(_intsetValueEncoding(+9223372036854775807ull) ==
+ INTSET_ENC_INT64);
ok();
}
@@ -378,7 +468,7 @@ int main(int argc, char **argv) {
}
printf("Large number of random adds: "); {
- int inserts = 0;
+ uint32_t inserts = 0;
is = intsetNew();
for (i = 0; i < 1024; i++) {
is = intsetAdd(is,rand()%0x800,&success);
@@ -461,7 +551,8 @@ int main(int argc, char **argv) {
start = usec();
for (i = 0; i < num; i++) intsetSearch(is,rand() % ((1<<bits)-1),NULL);
- printf("%ld lookups, %ld element set, %lldusec\n",num,size,usec()-start);
+ printf("%ld lookups, %ld element set, %lldusec\n",
+ num,size,usec()-start);
}
printf("Stress add+delete: "); {
@@ -479,5 +570,7 @@ int main(int argc, char **argv) {
checkConsistency(is);
ok();
}
+
+ return 0;
}
#endif
diff --git a/src/intset.h b/src/intset.h
index bd01ff22f..7550df303 100644
--- a/src/intset.h
+++ b/src/intset.h
@@ -43,8 +43,13 @@ intset *intsetAdd(intset *is, int64_t value, uint8_t *success);
intset *intsetRemove(intset *is, int64_t value, int *success);
uint8_t intsetFind(intset *is, int64_t value);
int64_t intsetRandom(intset *is);
+int intsetRandomMembers(intset *is, int64_t* value, int count);
uint8_t intsetGet(intset *is, uint32_t pos, int64_t *value);
uint32_t intsetLen(intset *is);
size_t intsetBlobLen(intset *is);
+#ifdef REDIS_TEST
+int intsetTest(int argc, char *argv[]);
+#endif
+
#endif // __INTSET_H
diff --git a/src/latency.c b/src/latency.c
index 9875aa164..cb116fb90 100644
--- a/src/latency.c
+++ b/src/latency.c
@@ -512,7 +512,6 @@ sds latencyCommandGenSparkeline(char *event, struct latencyTimeSeries *ts) {
for (j = 0; j < LATENCY_TS_LEN; j++) {
int i = (ts->idx + j) % LATENCY_TS_LEN;
int elapsed;
- char *label;
char buf[64];
if (ts->samples[i].time == 0) continue;
@@ -534,8 +533,7 @@ sds latencyCommandGenSparkeline(char *event, struct latencyTimeSeries *ts) {
snprintf(buf,sizeof(buf),"%dh",elapsed/3600);
else
snprintf(buf,sizeof(buf),"%dd",elapsed/(3600*24));
- label = zstrdup(buf);
- sparklineSequenceAddSample(seq,ts->samples[i].latency,label);
+ sparklineSequenceAddSample(seq,ts->samples[i].latency,buf);
}
graph = sdscatprintf(graph,
diff --git a/src/lzfP.h b/src/lzfP.h
index c9eae3f6a..c6d2e096c 100644
--- a/src/lzfP.h
+++ b/src/lzfP.h
@@ -49,7 +49,7 @@
* the difference between 15 and 14 is very small
* for small blocks (and 14 is usually a bit faster).
* For a low-memory/faster configuration, use HLOG == 13;
- * For best compression, use 15 or 16 (or more, up to 23).
+ * For best compression, use 15 or 16 (or more, up to 22).
*/
#ifndef HLOG
# define HLOG 16
@@ -94,7 +94,7 @@
/*
* Avoid assigning values to errno variable? for some embedding purposes
* (linux kernel for example), this is necessary. NOTE: this breaks
- * the documentation in lzf.h.
+ * the documentation in lzf.h. Avoiding errno has no speed impact.
*/
#ifndef AVOID_ERRNO
# define AVOID_ERRNO 0
@@ -121,16 +121,52 @@
# define CHECK_INPUT 1
#endif
+/*
+ * Whether to store pointers or offsets inside the hash table. On
+ * 64 bit architetcures, pointers take up twice as much space,
+ * and might also be slower. Default is to autodetect.
+ */
+/*#define LZF_USER_OFFSETS autodetect */
+
/*****************************************************************************/
/* nothing should be changed below */
+#ifdef __cplusplus
+# include <cstring>
+# include <climits>
+using namespace std;
+#else
+# include <string.h>
+# include <limits.h>
+#endif
+
+#ifndef LZF_USE_OFFSETS
+# if defined (WIN32)
+# define LZF_USE_OFFSETS defined(_M_X64)
+# else
+# if __cplusplus > 199711L
+# include <cstdint>
+# else
+# include <stdint.h>
+# endif
+# define LZF_USE_OFFSETS (UINTPTR_MAX > 0xffffffffU)
+# endif
+#endif
+
typedef unsigned char u8;
-typedef const u8 *LZF_STATE[1 << (HLOG)];
+#if LZF_USE_OFFSETS
+# define LZF_HSLOT_BIAS ((const u8 *)in_data)
+ typedef unsigned int LZF_HSLOT;
+#else
+# define LZF_HSLOT_BIAS 0
+ typedef const u8 *LZF_HSLOT;
+#endif
+
+typedef LZF_HSLOT LZF_STATE[1 << (HLOG)];
#if !STRICT_ALIGN
/* for unaligned accesses we need a 16 bit datatype. */
-# include <limits.h>
# if USHRT_MAX == 65535
typedef unsigned short u16;
# elif UINT_MAX == 65535
@@ -142,17 +178,7 @@ typedef const u8 *LZF_STATE[1 << (HLOG)];
#endif
#if ULTRA_FAST
-# if defined(VERY_FAST)
-# undef VERY_FAST
-# endif
-#endif
-
-#if INIT_HTAB
-# ifdef __cplusplus
-# include <cstring>
-# else
-# include <string.h>
-# endif
+# undef VERY_FAST
#endif
#endif
diff --git a/src/lzf_c.c b/src/lzf_c.c
index 9e031ad0b..e9c69a0b8 100644
--- a/src/lzf_c.c
+++ b/src/lzf_c.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2000-2008 Marc Alexander Lehmann <schmorp@schmorp.de>
+ * Copyright (c) 2000-2010 Marc Alexander Lehmann <schmorp@schmorp.de>
*
* Redistribution and use in source and binary forms, with or without modifica-
* tion, are permitted provided that the following conditions are met:
@@ -40,8 +40,8 @@
/*
* don't play with this unless you benchmark!
- * decompression is not dependent on the hash function
- * the hashing function might seem strange, just believe me
+ * the data format is not dependent on the hash function.
+ * the hash function might seem strange, just believe me,
* it works ;)
*/
#ifndef FRST
@@ -89,9 +89,9 @@
/*
* compressed format
*
- * 000LLLLL <L+1> ; literal
- * LLLooooo oooooooo ; backref L
- * 111ooooo LLLLLLLL oooooooo ; backref L+7
+ * 000LLLLL <L+1> ; literal, L+1=1..33 octets
+ * LLLooooo oooooooo ; backref L+1=1..7 octets, o+1=1..4096 offset
+ * 111ooooo LLLLLLLL oooooooo ; backref L+8 octets, o+1=1..4096 offset
*
*/
@@ -106,7 +106,6 @@ lzf_compress (const void *const in_data, unsigned int in_len,
#if !LZF_STATE_ARG
LZF_STATE htab;
#endif
- const u8 **hslot;
const u8 *ip = (const u8 *)in_data;
u8 *op = (u8 *)out_data;
const u8 *in_end = ip + in_len;
@@ -133,10 +132,6 @@ lzf_compress (const void *const in_data, unsigned int in_len,
#if INIT_HTAB
memset (htab, 0, sizeof (htab));
-# if 0
- for (hslot = htab; hslot < htab + HSIZE; hslot++)
- *hslot++ = ip;
-# endif
#endif
lit = 0; op++; /* start run */
@@ -144,24 +139,23 @@ lzf_compress (const void *const in_data, unsigned int in_len,
hval = FRST (ip);
while (ip < in_end - 2)
{
+ LZF_HSLOT *hslot;
+
hval = NEXT (hval, ip);
hslot = htab + IDX (hval);
- ref = *hslot; *hslot = ip;
+ ref = *hslot + LZF_HSLOT_BIAS; *hslot = ip - LZF_HSLOT_BIAS;
if (1
#if INIT_HTAB
&& ref < ip /* the next test will actually take care of this, but this is faster */
#endif
&& (off = ip - ref - 1) < MAX_OFF
- && ip + 4 < in_end
&& ref > (u8 *)in_data
-#if STRICT_ALIGN
- && ref[0] == ip[0]
- && ref[1] == ip[1]
&& ref[2] == ip[2]
+#if STRICT_ALIGN
+ && ((ref[1] << 8) | ref[0]) == ((ip[1] << 8) | ip[0])
#else
&& *(u16 *)ref == *(u16 *)ip
- && ref[2] == ip[2]
#endif
)
{
@@ -170,12 +164,13 @@ lzf_compress (const void *const in_data, unsigned int in_len,
unsigned int maxlen = in_end - ip - len;
maxlen = maxlen > MAX_REF ? MAX_REF : maxlen;
+ if (expect_false (op + 3 + 1 >= out_end)) /* first a faster conservative test */
+ if (op - !lit + 3 + 1 >= out_end) /* second the exact but rare test */
+ return 0;
+
op [- lit - 1] = lit - 1; /* stop run */
op -= !lit; /* undo run if length is zero */
- if (expect_false (op + 3 + 1 >= out_end))
- return 0;
-
for (;;)
{
if (expect_true (maxlen > 16))
@@ -222,6 +217,7 @@ lzf_compress (const void *const in_data, unsigned int in_len,
}
*op++ = off;
+
lit = 0; op++; /* start run */
ip += len + 1;
@@ -237,12 +233,12 @@ lzf_compress (const void *const in_data, unsigned int in_len,
hval = FRST (ip);
hval = NEXT (hval, ip);
- htab[IDX (hval)] = ip;
+ htab[IDX (hval)] = ip - LZF_HSLOT_BIAS;
ip++;
# if VERY_FAST && !ULTRA_FAST
hval = NEXT (hval, ip);
- htab[IDX (hval)] = ip;
+ htab[IDX (hval)] = ip - LZF_HSLOT_BIAS;
ip++;
# endif
#else
@@ -251,7 +247,7 @@ lzf_compress (const void *const in_data, unsigned int in_len,
do
{
hval = NEXT (hval, ip);
- htab[IDX (hval)] = ip;
+ htab[IDX (hval)] = ip - LZF_HSLOT_BIAS;
ip++;
}
while (len--);
diff --git a/src/lzf_d.c b/src/lzf_d.c
index 6c723f5e0..c32be8e87 100644
--- a/src/lzf_d.c
+++ b/src/lzf_d.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2000-2007 Marc Alexander Lehmann <schmorp@schmorp.de>
+ * Copyright (c) 2000-2010 Marc Alexander Lehmann <schmorp@schmorp.de>
*
* Redistribution and use in source and binary forms, with or without modifica-
* tion, are permitted provided that the following conditions are met:
@@ -43,14 +43,14 @@
# define SET_ERRNO(n) errno = (n)
#endif
-/*
+#if USE_REP_MOVSB /* small win on amd, big loss on intel */
#if (__i386 || __amd64) && __GNUC__ >= 3
# define lzf_movsb(dst, src, len) \
asm ("rep movsb" \
: "=D" (dst), "=S" (src), "=c" (len) \
: "0" (dst), "1" (src), "2" (len));
#endif
-*/
+#endif
unsigned int
lzf_decompress (const void *const in_data, unsigned int in_len,
@@ -86,9 +86,17 @@ lzf_decompress (const void *const in_data, unsigned int in_len,
#ifdef lzf_movsb
lzf_movsb (op, ip, ctrl);
#else
- do
- *op++ = *ip++;
- while (--ctrl);
+ switch (ctrl)
+ {
+ case 32: *op++ = *ip++; case 31: *op++ = *ip++; case 30: *op++ = *ip++; case 29: *op++ = *ip++;
+ case 28: *op++ = *ip++; case 27: *op++ = *ip++; case 26: *op++ = *ip++; case 25: *op++ = *ip++;
+ case 24: *op++ = *ip++; case 23: *op++ = *ip++; case 22: *op++ = *ip++; case 21: *op++ = *ip++;
+ case 20: *op++ = *ip++; case 19: *op++ = *ip++; case 18: *op++ = *ip++; case 17: *op++ = *ip++;
+ case 16: *op++ = *ip++; case 15: *op++ = *ip++; case 14: *op++ = *ip++; case 13: *op++ = *ip++;
+ case 12: *op++ = *ip++; case 11: *op++ = *ip++; case 10: *op++ = *ip++; case 9: *op++ = *ip++;
+ case 8: *op++ = *ip++; case 7: *op++ = *ip++; case 6: *op++ = *ip++; case 5: *op++ = *ip++;
+ case 4: *op++ = *ip++; case 3: *op++ = *ip++; case 2: *op++ = *ip++; case 1: *op++ = *ip++;
+ }
#endif
}
else /* back reference */
@@ -134,12 +142,39 @@ lzf_decompress (const void *const in_data, unsigned int in_len,
len += 2;
lzf_movsb (op, ref, len);
#else
- *op++ = *ref++;
- *op++ = *ref++;
-
- do
- *op++ = *ref++;
- while (--len);
+ switch (len)
+ {
+ default:
+ len += 2;
+
+ if (op >= ref + len)
+ {
+ /* disjunct areas */
+ memcpy (op, ref, len);
+ op += len;
+ }
+ else
+ {
+ /* overlapping, use octte by octte copying */
+ do
+ *op++ = *ref++;
+ while (--len);
+ }
+
+ break;
+
+ case 9: *op++ = *ref++;
+ case 8: *op++ = *ref++;
+ case 7: *op++ = *ref++;
+ case 6: *op++ = *ref++;
+ case 5: *op++ = *ref++;
+ case 4: *op++ = *ref++;
+ case 3: *op++ = *ref++;
+ case 2: *op++ = *ref++;
+ case 1: *op++ = *ref++;
+ case 0: *op++ = *ref++; /* two octets more */
+ *op++ = *ref++;
+ }
#endif
}
}
diff --git a/src/memtest.c b/src/memtest.c
index 18d821b10..39fc4fcaa 100644
--- a/src/memtest.c
+++ b/src/memtest.c
@@ -35,6 +35,9 @@
#include <errno.h>
#include <termios.h>
#include <sys/ioctl.h>
+#if defined(__sun)
+#include <stropts.h>
+#endif
#include "config.h"
#if (ULONG_MAX == 4294967295UL)
diff --git a/src/networking.c b/src/networking.c
index f10a1c5e2..607d225fd 100644
--- a/src/networking.c
+++ b/src/networking.c
@@ -525,6 +525,14 @@ void addReplyBulkCBuffer(redisClient *c, void *p, size_t len) {
addReply(c,shared.crlf);
}
+/* Add sds to reply (takes ownership of sds and frees it) */
+void addReplyBulkSds(redisClient *c, sds s) {
+ addReplySds(c,sdscatfmt(sdsempty(),"$%u\r\n",
+ (unsigned long)sdslen(s)));
+ addReplySds(c,s);
+ addReply(c,shared.crlf);
+}
+
/* Add a C nul term string as bulk reply */
void addReplyBulkCString(redisClient *c, char *s) {
if (s == NULL) {
@@ -839,6 +847,7 @@ void sendReplyToClient(aeEventLoop *el, int fd, void *privdata, int mask) {
*
* However if we are over the maxmemory limit we ignore that and
* just deliver as much data as it is possible to deliver. */
+ server.stat_net_output_bytes += totwritten;
if (totwritten > REDIS_MAX_WRITE_PER_EVENT &&
(server.maxmemory == 0 ||
zmalloc_used_memory() < server.maxmemory)) break;
@@ -926,8 +935,10 @@ int processInlineBuffer(redisClient *c) {
sdsrange(c->querybuf,querylen+2,-1);
/* Setup argv array on client structure */
- if (c->argv) zfree(c->argv);
- c->argv = zmalloc(sizeof(robj*)*argc);
+ if (argc) {
+ if (c->argv) zfree(c->argv);
+ c->argv = zmalloc(sizeof(robj*)*argc);
+ }
/* Create redis objects for all arguments. */
for (c->argc = 0, j = 0; j < argc; j++) {
@@ -1179,6 +1190,7 @@ void readQueryFromClient(aeEventLoop *el, int fd, void *privdata, int mask) {
sdsIncrLen(c->querybuf,nread);
c->lastinteraction = server.unixtime;
if (c->flags & REDIS_MASTER) c->reploff += nread;
+ server.stat_net_input_bytes += nread;
} else {
server.current_client = NULL;
return;
@@ -1215,17 +1227,6 @@ void getClientsMaxBuffers(unsigned long *longest_output_list,
*biggest_input_buffer = bib;
}
-/* This is a helper function for genClientPeerId().
- * It writes the specified ip/port to "peerid" as a null termiated string
- * in the form ip:port if ip does not contain ":" itself, otherwise
- * [ip]:port format is used (for IPv6 addresses basically). */
-void formatPeerId(char *peerid, size_t peerid_len, char *ip, int port) {
- if (strchr(ip,':'))
- snprintf(peerid,peerid_len,"[%s]:%d",ip,port);
- else
- snprintf(peerid,peerid_len,"%s:%d",ip,port);
-}
-
/* A Redis "Peer ID" is a colon separated ip:port pair.
* For IPv4 it's in the form x.y.z.k:port, example: "127.0.0.1:1234".
* For IPv6 addresses we use [] around the IP part, like in "[::1]:1234".
@@ -1234,24 +1235,17 @@ void formatPeerId(char *peerid, size_t peerid_len, char *ip, int port) {
* A Peer ID always fits inside a buffer of REDIS_PEER_ID_LEN bytes, including
* the null term.
*
- * The function returns REDIS_OK on succcess, and REDIS_ERR on failure.
- *
* On failure the function still populates 'peerid' with the "?:0" string
* in case you want to relax error checking or need to display something
* anyway (see anetPeerToString implementation for more info). */
-int genClientPeerId(redisClient *client, char *peerid, size_t peerid_len) {
- char ip[REDIS_IP_STR_LEN];
- int port;
-
+void genClientPeerId(redisClient *client, char *peerid,
+ size_t peerid_len) {
if (client->flags & REDIS_UNIX_SOCKET) {
/* Unix socket client. */
snprintf(peerid,peerid_len,"%s:0",server.unixsocket);
- return REDIS_OK;
} else {
/* TCP client. */
- int retval = anetPeerToString(client->fd,ip,sizeof(ip),&port);
- formatPeerId(peerid,peerid_len,ip,port);
- return (retval == -1) ? REDIS_ERR : REDIS_OK;
+ anetFormatPeer(client->fd,peerid,peerid_len);
}
}
diff --git a/src/object.c b/src/object.c
index 6b8e42477..f75421ee8 100644
--- a/src/object.c
+++ b/src/object.c
@@ -109,26 +109,44 @@ robj *createStringObjectFromLongLong(long long value) {
return o;
}
-/* Note: this function is defined into object.c since here it is where it
- * belongs but it is actually designed to be used just for INCRBYFLOAT */
-robj *createStringObjectFromLongDouble(long double value) {
+/* Create a string object from a long double. If humanfriendly is non-zero
+ * it does not use exponential format and trims trailing zeroes at the end,
+ * however this results in loss of precision. Otherwise exp format is used
+ * and the output of snprintf() is not modified.
+ *
+ * The 'humanfriendly' option is used for INCRBYFLOAT and HINCRBYFLOAT. */
+robj *createStringObjectFromLongDouble(long double value, int humanfriendly) {
char buf[256];
int len;
- /* We use 17 digits precision since with 128 bit floats that precision
- * after rounding is able to represent most small decimal numbers in a way
- * that is "non surprising" for the user (that is, most small decimal
- * numbers will be represented in a way that when converted back into
- * a string are exactly the same as what the user typed.) */
- len = snprintf(buf,sizeof(buf),"%.17Lf", value);
- /* Now remove trailing zeroes after the '.' */
- if (strchr(buf,'.') != NULL) {
- char *p = buf+len-1;
- while(*p == '0') {
- p--;
- len--;
+ if (isinf(value)) {
+ /* Libc in odd systems (Hi Solaris!) will format infinite in a
+ * different way, so better to handle it in an explicit way. */
+ if (value > 0) {
+ memcpy(buf,"inf",3);
+ len = 3;
+ } else {
+ memcpy(buf,"-inf",4);
+ len = 4;
}
- if (*p == '.') len--;
+ } else if (humanfriendly) {
+ /* We use 17 digits precision since with 128 bit floats that precision
+ * after rounding is able to represent most small decimal numbers in a
+ * way that is "non surprising" for the user (that is, most small
+ * decimal numbers will be represented in a way that when converted
+ * back into a string are exactly the same as what the user typed.) */
+ len = snprintf(buf,sizeof(buf),"%.17Lf", value);
+ /* Now remove trailing zeroes after the '.' */
+ if (strchr(buf,'.') != NULL) {
+ char *p = buf+len-1;
+ while(*p == '0') {
+ p--;
+ len--;
+ }
+ if (*p == '.') len--;
+ }
+ } else {
+ len = snprintf(buf,sizeof(buf),"%.17Lg", value);
}
return createStringObject(buf,len);
}
@@ -162,11 +180,10 @@ robj *dupStringObject(robj *o) {
}
}
-robj *createListObject(void) {
- list *l = listCreate();
+robj *createQuicklistObject(void) {
+ quicklist *l = quicklistCreate();
robj *o = createObject(REDIS_LIST,l);
- listSetFreeMethod(l,decrRefCountVoid);
- o->encoding = REDIS_ENCODING_LINKEDLIST;
+ o->encoding = REDIS_ENCODING_QUICKLIST;
return o;
}
@@ -224,11 +241,8 @@ void freeStringObject(robj *o) {
void freeListObject(robj *o) {
switch (o->encoding) {
- case REDIS_ENCODING_LINKEDLIST:
- listRelease((list*) o->ptr);
- break;
- case REDIS_ENCODING_ZIPLIST:
- zfree(o->ptr);
+ case REDIS_ENCODING_QUICKLIST:
+ quicklistRelease(o->ptr);
break;
default:
redisPanic("Unknown list encoding type");
@@ -660,7 +674,7 @@ char *strEncoding(int encoding) {
case REDIS_ENCODING_RAW: return "raw";
case REDIS_ENCODING_INT: return "int";
case REDIS_ENCODING_HT: return "hashtable";
- case REDIS_ENCODING_LINKEDLIST: return "linkedlist";
+ case REDIS_ENCODING_QUICKLIST: return "quicklist";
case REDIS_ENCODING_ZIPLIST: return "ziplist";
case REDIS_ENCODING_INTSET: return "intset";
case REDIS_ENCODING_SKIPLIST: return "skiplist";
diff --git a/src/quicklist.c b/src/quicklist.c
new file mode 100644
index 000000000..6682b2087
--- /dev/null
+++ b/src/quicklist.c
@@ -0,0 +1,2639 @@
+/* quicklist.c - A doubly linked list of ziplists
+ *
+ * Copyright (c) 2014, Matt Stancliff <matt@genges.com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must start the above copyright notice,
+ * this quicklist of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this quicklist of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Redis nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <string.h> /* for memcpy */
+#include "quicklist.h"
+#include "zmalloc.h"
+#include "ziplist.h"
+#include "util.h" /* for ll2string */
+#include "lzf.h"
+
+#if defined(REDIS_TEST) || defined(REDIS_TEST_VERBOSE)
+#include <stdio.h> /* for printf (debug printing), snprintf (genstr) */
+#endif
+
+#ifndef REDIS_STATIC
+#define REDIS_STATIC static
+#endif
+
+/* Optimization levels for size-based filling */
+static const size_t optimization_level[] = {4096, 8192, 16384, 32768, 65536};
+
+/* Maximum size in bytes of any multi-element ziplist.
+ * Larger values will live in their own isolated ziplists. */
+#define SIZE_SAFETY_LIMIT 8192
+
+/* Minimum ziplist size in bytes for attempting compression. */
+#define MIN_COMPRESS_BYTES 48
+
+/* Minimum size reduction in bytes to store compressed quicklistNode data.
+ * This also prevents us from storing compression if the compression
+ * resulted in a larger size than the original data. */
+#define MIN_COMPRESS_IMPROVE 8
+
+/* If not verbose testing, remove all debug printing. */
+#ifndef REDIS_TEST_VERBOSE
+#define D(...)
+#else
+#define D(...) \
+ do { \
+ printf("%s:%s:%d:\t", __FILE__, __FUNCTION__, __LINE__); \
+ printf(__VA_ARGS__); \
+ printf("\n"); \
+ } while (0);
+#endif
+
+/* Simple way to give quicklistEntry structs default values with one call. */
+#define initEntry(e) \
+ do { \
+ (e)->zi = (e)->value = NULL; \
+ (e)->longval = -123456789; \
+ (e)->quicklist = NULL; \
+ (e)->node = NULL; \
+ (e)->offset = 123456789; \
+ (e)->sz = 0; \
+ } while (0)
+
+#if __GNUC__ >= 3
+#define likely(x) __builtin_expect(!!(x), 1)
+#define unlikely(x) __builtin_expect(!!(x), 0)
+#else
+#define likely(x) (x)
+#define unlikely(x) (x)
+#endif
+
+/* Create a new quicklist.
+ * Free with quicklistRelease(). */
+quicklist *quicklistCreate(void) {
+ struct quicklist *quicklist;
+
+ quicklist = zmalloc(sizeof(*quicklist));
+ quicklist->head = quicklist->tail = NULL;
+ quicklist->len = 0;
+ quicklist->count = 0;
+ quicklist->compress = 0;
+ quicklist->fill = -2;
+ return quicklist;
+}
+
+#define COMPRESS_MAX (1 << 16)
+void quicklistSetCompressDepth(quicklist *quicklist, int compress) {
+ if (compress > COMPRESS_MAX) {
+ compress = COMPRESS_MAX;
+ } else if (compress < 0) {
+ compress = 0;
+ }
+ quicklist->compress = compress;
+}
+
+#define FILL_MAX (1 << 15)
+void quicklistSetFill(quicklist *quicklist, int fill) {
+ if (fill > FILL_MAX) {
+ fill = FILL_MAX;
+ } else if (fill < -5) {
+ fill = -5;
+ }
+ quicklist->fill = fill;
+}
+
+void quicklistSetOptions(quicklist *quicklist, int fill, int depth) {
+ quicklistSetFill(quicklist, fill);
+ quicklistSetCompressDepth(quicklist, depth);
+}
+
+/* Create a new quicklist with some default parameters. */
+quicklist *quicklistNew(int fill, int compress) {
+ quicklist *quicklist = quicklistCreate();
+ quicklistSetOptions(quicklist, fill, compress);
+ return quicklist;
+}
+
+REDIS_STATIC quicklistNode *quicklistCreateNode(void) {
+ quicklistNode *node;
+ node = zmalloc(sizeof(*node));
+ node->zl = NULL;
+ node->count = 0;
+ node->sz = 0;
+ node->next = node->prev = NULL;
+ node->encoding = QUICKLIST_NODE_ENCODING_RAW;
+ node->container = QUICKLIST_NODE_CONTAINER_ZIPLIST;
+ node->recompress = 0;
+ return node;
+}
+
+/* Return cached quicklist count */
+unsigned int quicklistCount(quicklist *ql) { return ql->count; }
+
+/* Free entire quicklist. */
+void quicklistRelease(quicklist *quicklist) {
+ unsigned long len;
+ quicklistNode *current, *next;
+
+ current = quicklist->head;
+ len = quicklist->len;
+ while (len--) {
+ next = current->next;
+
+ zfree(current->zl);
+ quicklist->count -= current->count;
+
+ zfree(current);
+
+ quicklist->len--;
+ current = next;
+ }
+ zfree(quicklist);
+}
+
+/* Compress the ziplist in 'node' and update encoding details.
+ * Returns 1 if ziplist compressed successfully.
+ * Returns 0 if compression failed or if ziplist too small to compress. */
+REDIS_STATIC int __quicklistCompressNode(quicklistNode *node) {
+#ifdef REDIS_TEST
+ node->attempted_compress = 1;
+#endif
+
+ /* Don't bother compressing small values */
+ if (node->sz < MIN_COMPRESS_BYTES)
+ return 0;
+
+ quicklistLZF *lzf = zmalloc(sizeof(*lzf) + node->sz);
+
+ /* Cancel if compression fails or doesn't compress small enough */
+ if (((lzf->sz = lzf_compress(node->zl, node->sz, lzf->compressed,
+ node->sz)) == 0) ||
+ lzf->sz + MIN_COMPRESS_IMPROVE >= node->sz) {
+ /* lzf_compress aborts/rejects compression if value not compressable. */
+ zfree(lzf);
+ return 0;
+ }
+ lzf = zrealloc(lzf, sizeof(*lzf) + lzf->sz);
+ zfree(node->zl);
+ node->zl = (unsigned char *)lzf;
+ node->encoding = QUICKLIST_NODE_ENCODING_LZF;
+ node->recompress = 0;
+ return 1;
+}
+
+/* Compress only uncompressed nodes. */
+#define quicklistCompressNode(_node) \
+ do { \
+ if ((_node) && (_node)->encoding == QUICKLIST_NODE_ENCODING_RAW) { \
+ __quicklistCompressNode((_node)); \
+ } \
+ } while (0)
+
+/* Uncompress the ziplist in 'node' and update encoding details.
+ * Returns 1 on successful decode, 0 on failure to decode. */
+REDIS_STATIC int __quicklistDecompressNode(quicklistNode *node) {
+#ifdef REDIS_TEST
+ node->attempted_compress = 0;
+#endif
+
+ void *decompressed = zmalloc(node->sz);
+ quicklistLZF *lzf = (quicklistLZF *)node->zl;
+ if (lzf_decompress(lzf->compressed, lzf->sz, decompressed, node->sz) == 0) {
+ /* Someone requested decompress, but we can't decompress. Not good. */
+ zfree(decompressed);
+ return 0;
+ }
+ zfree(lzf);
+ node->zl = decompressed;
+ node->encoding = QUICKLIST_NODE_ENCODING_RAW;
+ return 1;
+}
+
+/* Decompress only compressed nodes. */
+#define quicklistDecompressNode(_node) \
+ do { \
+ if ((_node) && (_node)->encoding == QUICKLIST_NODE_ENCODING_LZF) { \
+ __quicklistDecompressNode((_node)); \
+ } \
+ } while (0)
+
+/* Force node to not be immediately re-compresable */
+#define quicklistDecompressNodeForUse(_node) \
+ do { \
+ if ((_node) && (_node)->encoding == QUICKLIST_NODE_ENCODING_LZF) { \
+ __quicklistDecompressNode((_node)); \
+ (_node)->recompress = 1; \
+ } \
+ } while (0)
+
+/* Extract the raw LZF data from this quicklistNode.
+ * Pointer to LZF data is assigned to '*data'.
+ * Return value is the length of compressed LZF data. */
+size_t quicklistGetLzf(const quicklistNode *node, void **data) {
+ quicklistLZF *lzf = (quicklistLZF *)node->zl;
+ *data = lzf->compressed;
+ return lzf->sz;
+}
+
+#define quicklistAllowsCompression(_ql) ((_ql)->compress != 0)
+
+/* Force 'quicklist' to meet compression guidelines set by compress depth.
+ * The only way to guarantee interior nodes get compressed is to iterate
+ * to our "interior" compress depth then compress the next node we find.
+ * If compress depth is larger than the entire list, we return immediately. */
+REDIS_STATIC void __quicklistCompress(const quicklist *quicklist,
+ quicklistNode *node) {
+ /* If length is less than our compress depth (from both sides),
+ * we can't compress anything. */
+ if (!quicklistAllowsCompression(quicklist) ||
+ quicklist->len < (unsigned int)(quicklist->compress * 2))
+ return;
+
+#if 0
+ /* Optimized cases for small depth counts */
+ if (quicklist->compress == 1) {
+ quicklistNode *h = quicklist->head, *t = quicklist->tail;
+ quicklistDecompressNode(h);
+ quicklistDecompressNode(t);
+ if (h != node && t != node)
+ quicklistCompressNode(node);
+ return;
+ } else if (quicklist->compress == 2) {
+ quicklistNode *h = quicklist->head, *hn = h->next, *hnn = hn->next;
+ quicklistNode *t = quicklist->tail, *tp = t->prev, *tpp = tp->prev;
+ quicklistDecompressNode(h);
+ quicklistDecompressNode(hn);
+ quicklistDecompressNode(t);
+ quicklistDecompressNode(tp);
+ if (h != node && hn != node && t != node && tp != node) {
+ quicklistCompressNode(node);
+ }
+ if (hnn != t) {
+ quicklistCompressNode(hnn);
+ }
+ if (tpp != h) {
+ quicklistCompressNode(tpp);
+ }
+ return;
+ }
+#endif
+
+ /* Iterate until we reach compress depth for both sides of the list.a
+ * Note: because we do length checks at the *top* of this function,
+ * we can skip explicit null checks below. Everything exists. */
+ quicklistNode *forward = quicklist->head;
+ quicklistNode *reverse = quicklist->tail;
+ int depth = 0;
+ int in_depth = 0;
+ while (depth++ < quicklist->compress) {
+ quicklistDecompressNode(forward);
+ quicklistDecompressNode(reverse);
+
+ if (forward == node || reverse == node)
+ in_depth = 1;
+
+ if (forward == reverse)
+ return;
+
+ forward = forward->next;
+ reverse = reverse->prev;
+ }
+
+ if (!in_depth)
+ quicklistCompressNode(node);
+
+ if (depth > 2) {
+ /* At this point, forward and reverse are one node beyond depth */
+ quicklistCompressNode(forward);
+ quicklistCompressNode(reverse);
+ }
+}
+
+#define quicklistCompress(_ql, _node) \
+ do { \
+ if ((_node)->recompress) \
+ quicklistCompressNode((_node)); \
+ else \
+ __quicklistCompress((_ql), (_node)); \
+ } while (0)
+
+/* If we previously used quicklistDecompressNodeForUse(), just recompress. */
+#define quicklistRecompressOnly(_ql, _node) \
+ do { \
+ if ((_node)->recompress) \
+ quicklistCompressNode((_node)); \
+ } while (0)
+
+/* Insert 'new_node' after 'old_node' if 'after' is 1.
+ * Insert 'new_node' before 'old_node' if 'after' is 0.
+ * Note: 'new_node' is *always* uncompressed, so if we assign it to
+ * head or tail, we do not need to uncompress it. */
+REDIS_STATIC void __quicklistInsertNode(quicklist *quicklist,
+ quicklistNode *old_node,
+ quicklistNode *new_node, int after) {
+ if (after) {
+ new_node->prev = old_node;
+ if (old_node) {
+ new_node->next = old_node->next;
+ if (old_node->next)
+ old_node->next->prev = new_node;
+ old_node->next = new_node;
+ }
+ if (quicklist->tail == old_node)
+ quicklist->tail = new_node;
+ } else {
+ new_node->next = old_node;
+ if (old_node) {
+ new_node->prev = old_node->prev;
+ if (old_node->prev)
+ old_node->prev->next = new_node;
+ old_node->prev = new_node;
+ }
+ if (quicklist->head == old_node)
+ quicklist->head = new_node;
+ }
+ /* If this insert creates the only element so far, initialize head/tail. */
+ if (quicklist->len == 0) {
+ quicklist->head = quicklist->tail = new_node;
+ }
+
+ if (old_node)
+ quicklistCompress(quicklist, old_node);
+
+ quicklist->len++;
+}
+
+/* Wrappers for node inserting around existing node. */
+REDIS_STATIC void _quicklistInsertNodeBefore(quicklist *quicklist,
+ quicklistNode *old_node,
+ quicklistNode *new_node) {
+ __quicklistInsertNode(quicklist, old_node, new_node, 0);
+}
+
+REDIS_STATIC void _quicklistInsertNodeAfter(quicklist *quicklist,
+ quicklistNode *old_node,
+ quicklistNode *new_node) {
+ __quicklistInsertNode(quicklist, old_node, new_node, 1);
+}
+
+REDIS_STATIC int
+_quicklistNodeSizeMeetsOptimizationRequirement(const size_t sz,
+ const int fill) {
+ if (fill >= 0)
+ return 0;
+
+ size_t offset = (-fill) - 1;
+ if (offset < (sizeof(optimization_level) / sizeof(*optimization_level))) {
+ if (sz <= optimization_level[offset]) {
+ return 1;
+ } else {
+ return 0;
+ }
+ } else {
+ return 0;
+ }
+}
+
+#define sizeMeetsSafetyLimit(sz) ((sz) <= SIZE_SAFETY_LIMIT)
+
+REDIS_STATIC int _quicklistNodeAllowInsert(const quicklistNode *node,
+ const int fill, const size_t sz) {
+ if (unlikely(!node))
+ return 0;
+
+ int ziplist_overhead;
+ /* size of previous offset */
+ if (sz < 254)
+ ziplist_overhead = 1;
+ else
+ ziplist_overhead = 5;
+
+ /* size of forward offset */
+ if (sz < 64)
+ ziplist_overhead += 1;
+ else if (likely(sz < 16384))
+ ziplist_overhead += 2;
+ else
+ ziplist_overhead += 5;
+
+ /* new_sz overestimates if 'sz' encodes to an integer type */
+ unsigned int new_sz = node->sz + sz + ziplist_overhead;
+ if (likely(_quicklistNodeSizeMeetsOptimizationRequirement(new_sz, fill)))
+ return 1;
+ else if (!sizeMeetsSafetyLimit(new_sz))
+ return 0;
+ else if ((int)node->count < fill)
+ return 1;
+ else
+ return 0;
+}
+
+REDIS_STATIC int _quicklistNodeAllowMerge(const quicklistNode *a,
+ const quicklistNode *b,
+ const int fill) {
+ if (!a || !b)
+ return 0;
+
+ /* approximate merged ziplist size (- 11 to remove one ziplist
+ * header/trailer) */
+ unsigned int merge_sz = a->sz + b->sz - 11;
+ if (likely(_quicklistNodeSizeMeetsOptimizationRequirement(merge_sz, fill)))
+ return 1;
+ else if (!sizeMeetsSafetyLimit(merge_sz))
+ return 0;
+ else if ((int)(a->count + b->count) <= fill)
+ return 1;
+ else
+ return 0;
+}
+
+#define quicklistNodeUpdateSz(node) \
+ do { \
+ (node)->sz = ziplistBlobLen((node)->zl); \
+ } while (0)
+
+/* Add new entry to head node of quicklist.
+ *
+ * Returns 0 if used existing head.
+ * Returns 1 if new head created. */
+int quicklistPushHead(quicklist *quicklist, void *value, size_t sz) {
+ quicklistNode *orig_head = quicklist->head;
+ if (likely(
+ _quicklistNodeAllowInsert(quicklist->head, quicklist->fill, sz))) {
+ quicklist->head->zl =
+ ziplistPush(quicklist->head->zl, value, sz, ZIPLIST_HEAD);
+ quicklistNodeUpdateSz(quicklist->head);
+ } else {
+ quicklistNode *node = quicklistCreateNode();
+ node->zl = ziplistPush(ziplistNew(), value, sz, ZIPLIST_HEAD);
+
+ quicklistNodeUpdateSz(node);
+ _quicklistInsertNodeBefore(quicklist, quicklist->head, node);
+ }
+ quicklist->count++;
+ quicklist->head->count++;
+ return (orig_head != quicklist->head);
+}
+
+/* Add new entry to tail node of quicklist.
+ *
+ * Returns 0 if used existing tail.
+ * Returns 1 if new tail created. */
+int quicklistPushTail(quicklist *quicklist, void *value, size_t sz) {
+ quicklistNode *orig_tail = quicklist->tail;
+ if (likely(
+ _quicklistNodeAllowInsert(quicklist->tail, quicklist->fill, sz))) {
+ quicklist->tail->zl =
+ ziplistPush(quicklist->tail->zl, value, sz, ZIPLIST_TAIL);
+ quicklistNodeUpdateSz(quicklist->tail);
+ } else {
+ quicklistNode *node = quicklistCreateNode();
+ node->zl = ziplistPush(ziplistNew(), value, sz, ZIPLIST_TAIL);
+
+ quicklistNodeUpdateSz(node);
+ _quicklistInsertNodeAfter(quicklist, quicklist->tail, node);
+ }
+ quicklist->count++;
+ quicklist->tail->count++;
+ return (orig_tail != quicklist->tail);
+}
+
+/* Create new node consisting of a pre-formed ziplist.
+ * Used for loading RDBs where entire ziplists have been stored
+ * to be retrieved later. */
+void quicklistAppendZiplist(quicklist *quicklist, unsigned char *zl) {
+ quicklistNode *node = quicklistCreateNode();
+
+ node->zl = zl;
+ node->count = ziplistLen(node->zl);
+ node->sz = ziplistBlobLen(zl);
+
+ _quicklistInsertNodeAfter(quicklist, quicklist->tail, node);
+ quicklist->count += node->count;
+}
+
+/* Append all values of ziplist 'zl' individually into 'quicklist'.
+ *
+ * This allows us to restore old RDB ziplists into new quicklists
+ * with smaller ziplist sizes than the saved RDB ziplist.
+ *
+ * Returns 'quicklist' argument. Frees passed-in ziplist 'zl' */
+quicklist *quicklistAppendValuesFromZiplist(quicklist *quicklist,
+ unsigned char *zl) {
+ unsigned char *value;
+ unsigned int sz;
+ long long longval;
+ char longstr[32] = {0};
+
+ unsigned char *p = ziplistIndex(zl, 0);
+ while (ziplistGet(p, &value, &sz, &longval)) {
+ if (!value) {
+ /* Write the longval as a string so we can re-add it */
+ sz = ll2string(longstr, sizeof(longstr), longval);
+ value = (unsigned char *)longstr;
+ }
+ quicklistPushTail(quicklist, value, sz);
+ p = ziplistNext(zl, p);
+ }
+ zfree(zl);
+ return quicklist;
+}
+
+/* Create new (potentially multi-node) quicklist from a single existing ziplist.
+ *
+ * Returns new quicklist. Frees passed-in ziplist 'zl'. */
+quicklist *quicklistCreateFromZiplist(int fill, int compress,
+ unsigned char *zl) {
+ return quicklistAppendValuesFromZiplist(quicklistNew(fill, compress), zl);
+}
+
+#define quicklistDeleteIfEmpty(ql, n) \
+ do { \
+ if ((n)->count == 0) { \
+ __quicklistDelNode((ql), (n)); \
+ (n) = NULL; \
+ } \
+ } while (0)
+
+REDIS_STATIC void __quicklistDelNode(quicklist *quicklist,
+ quicklistNode *node) {
+ if (node->next)
+ node->next->prev = node->prev;
+ if (node->prev)
+ node->prev->next = node->next;
+
+ if (node == quicklist->tail) {
+ quicklist->tail = node->prev;
+ }
+
+ if (node == quicklist->head) {
+ quicklist->head = node->next;
+ }
+
+ /* If we deleted a node within our compress depth, we
+ * now have compressed nodes needing to be decompressed. */
+ __quicklistCompress(quicklist, NULL);
+
+ quicklist->count -= node->count;
+
+ zfree(node->zl);
+ zfree(node);
+ quicklist->len--;
+}
+
+/* Delete one entry from list given the node for the entry and a pointer
+ * to the entry in the node.
+ *
+ * Note: quicklistDelIndex() *requires* uncompressed nodes because you
+ * already had to get *p from an uncompressed node somewhere.
+ *
+ * Returns 1 if the entire node was deleted, 0 if node still exists.
+ * Also updates in/out param 'p' with the next offset in the ziplist. */
+REDIS_STATIC int quicklistDelIndex(quicklist *quicklist, quicklistNode *node,
+ unsigned char **p) {
+ int gone = 0;
+
+ node->zl = ziplistDelete(node->zl, p);
+ node->count--;
+ if (node->count == 0) {
+ gone = 1;
+ __quicklistDelNode(quicklist, node);
+ } else {
+ quicklistNodeUpdateSz(node);
+ }
+ quicklist->count--;
+ /* If we deleted the node, the original node is no longer valid */
+ return gone ? 1 : 0;
+}
+
+/* Delete one element represented by 'entry'
+ *
+ * 'entry' stores enough metadata to delete the proper position in
+ * the correct ziplist in the correct quicklist node. */
+void quicklistDelEntry(quicklistIter *iter, quicklistEntry *entry) {
+ quicklistNode *prev = entry->node->prev;
+ quicklistNode *next = entry->node->next;
+ int deleted_node = quicklistDelIndex((quicklist *)entry->quicklist,
+ entry->node, &entry->zi);
+
+ /* after delete, the zi is now invalid for any future usage. */
+ iter->zi = NULL;
+
+ /* If current node is deleted, we must update iterator node and offset. */
+ if (deleted_node) {
+ if (iter->direction == AL_START_HEAD) {
+ iter->current = next;
+ iter->offset = 0;
+ } else if (iter->direction == AL_START_TAIL) {
+ iter->current = prev;
+ iter->offset = -1;
+ }
+ }
+ /* else if (!deleted_node), no changes needed.
+ * we already reset iter->zi above, and the existing iter->offset
+ * doesn't move again because:
+ * - [1, 2, 3] => delete offset 1 => [1, 3]: next element still offset 1
+ * - [1, 2, 3] => delete offset 0 => [2, 3]: next element still offset 0
+ * if we deleted the last element at offet N and now
+ * length of this ziplist is N-1, the next call into
+ * quicklistNext() will jump to the next node. */
+}
+
+/* Replace quicklist entry at offset 'index' by 'data' with length 'sz'.
+ *
+ * Returns 1 if replace happened.
+ * Returns 0 if replace failed and no changes happened. */
+int quicklistReplaceAtIndex(quicklist *quicklist, long index, void *data,
+ int sz) {
+ quicklistEntry entry;
+ if (likely(quicklistIndex(quicklist, index, &entry))) {
+ /* quicklistIndex provides an uncompressed node */
+ entry.node->zl = ziplistDelete(entry.node->zl, &entry.zi);
+ entry.node->zl = ziplistInsert(entry.node->zl, entry.zi, data, sz);
+ quicklistCompress(quicklist, entry.node);
+ return 1;
+ } else {
+ return 0;
+ }
+}
+
+/* Given two nodes, try to merge their ziplists.
+ *
+ * This helps us not have a quicklist with 3 element ziplists if
+ * our fill factor can handle much higher levels.
+ *
+ * Note: 'a' must be to the LEFT of 'b'.
+ *
+ * After calling this function, both 'a' and 'b' should be considered
+ * unusable. The return value from this function must be used
+ * instead of re-using any of the quicklistNode input arguments.
+ *
+ * Returns the input node picked to merge against or NULL if
+ * merging was not possible. */
+REDIS_STATIC quicklistNode *_quicklistZiplistMerge(quicklist *quicklist,
+ quicklistNode *a,
+ quicklistNode *b) {
+ D("Requested merge (a,b) (%u, %u)", a->count, b->count);
+
+ quicklistDecompressNode(a);
+ quicklistDecompressNode(b);
+ if ((ziplistMerge(&a->zl, &b->zl))) {
+ /* We merged ziplists! Now remove the unused quicklistNode. */
+ quicklistNode *keep = NULL, *nokeep = NULL;
+ if (!a->zl) {
+ nokeep = a;
+ keep = b;
+ } else if (!b->zl) {
+ nokeep = b;
+ keep = a;
+ }
+ keep->count = ziplistLen(keep->zl);
+ quicklistNodeUpdateSz(keep);
+
+ nokeep->count = 0;
+ __quicklistDelNode(quicklist, nokeep);
+ quicklistCompress(quicklist, keep);
+ return keep;
+ } else {
+ /* else, the merge returned NULL and nothing changed. */
+ return NULL;
+ }
+}
+
+/* Attempt to merge ziplists within two nodes on either side of 'center'.
+ *
+ * We attempt to merge:
+ * - (center->prev->prev, center->prev)
+ * - (center->next, center->next->next)
+ * - (center->prev, center)
+ * - (center, center->next)
+ */
+REDIS_STATIC void _quicklistMergeNodes(quicklist *quicklist,
+ quicklistNode *center) {
+ int fill = quicklist->fill;
+ quicklistNode *prev, *prev_prev, *next, *next_next, *target;
+ prev = prev_prev = next = next_next = target = NULL;
+
+ if (center->prev) {
+ prev = center->prev;
+ if (center->prev->prev)
+ prev_prev = center->prev->prev;
+ }
+
+ if (center->next) {
+ next = center->next;
+ if (center->next->next)
+ next_next = center->next->next;
+ }
+
+ /* Try to merge prev_prev and prev */
+ if (_quicklistNodeAllowMerge(prev, prev_prev, fill)) {
+ _quicklistZiplistMerge(quicklist, prev_prev, prev);
+ prev_prev = prev = NULL; /* they could have moved, invalidate them. */
+ }
+
+ /* Try to merge next and next_next */
+ if (_quicklistNodeAllowMerge(next, next_next, fill)) {
+ _quicklistZiplistMerge(quicklist, next, next_next);
+ next = next_next = NULL; /* they could have moved, invalidate them. */
+ }
+
+ /* Try to merge center node and previous node */
+ if (_quicklistNodeAllowMerge(center, center->prev, fill)) {
+ target = _quicklistZiplistMerge(quicklist, center->prev, center);
+ center = NULL; /* center could have been deleted, invalidate it. */
+ } else {
+ /* else, we didn't merge here, but target needs to be valid below. */
+ target = center;
+ }
+
+ /* Use result of center merge (or original) to merge with next node. */
+ if (_quicklistNodeAllowMerge(target, target->next, fill)) {
+ _quicklistZiplistMerge(quicklist, target, target->next);
+ }
+}
+
+/* Split 'node' into two parts, parameterized by 'offset' and 'after'.
+ *
+ * The 'after' argument controls which quicklistNode gets returned.
+ * If 'after'==1, returned node has elements after 'offset'.
+ * input node keeps elements up to 'offset', including 'offset'.
+ * If 'after'==0, returned node has elements up to 'offset', including 'offset'.
+ * input node keeps elements after 'offset'.
+ *
+ * If 'after'==1, returned node will have elements _after_ 'offset'.
+ * The returned node will have elements [OFFSET+1, END].
+ * The input node keeps elements [0, OFFSET].
+ *
+ * If 'after'==0, returned node will keep elements up to and including 'offset'.
+ * The returned node will have elements [0, OFFSET].
+ * The input node keeps elements [OFFSET+1, END].
+ *
+ * The input node keeps all elements not taken by the returned node.
+ *
+ * Returns newly created node or NULL if split not possible. */
+REDIS_STATIC quicklistNode *_quicklistSplitNode(quicklistNode *node, int offset,
+ int after) {
+ size_t zl_sz = node->sz;
+
+ quicklistNode *new_node = quicklistCreateNode();
+ new_node->zl = zmalloc(zl_sz);
+
+ /* Copy original ziplist so we can split it */
+ memcpy(new_node->zl, node->zl, zl_sz);
+
+ /* -1 here means "continue deleting until the list ends" */
+ int orig_start = after ? offset + 1 : 0;
+ int orig_extent = after ? -1 : offset;
+ int new_start = after ? 0 : offset;
+ int new_extent = after ? offset + 1 : -1;
+
+ D("After %d (%d); ranges: [%d, %d], [%d, %d]", after, offset, orig_start,
+ orig_extent, new_start, new_extent);
+
+ node->zl = ziplistDeleteRange(node->zl, orig_start, orig_extent);
+ node->count = ziplistLen(node->zl);
+ quicklistNodeUpdateSz(node);
+
+ new_node->zl = ziplistDeleteRange(new_node->zl, new_start, new_extent);
+ new_node->count = ziplistLen(new_node->zl);
+ quicklistNodeUpdateSz(new_node);
+
+ D("After split lengths: orig (%d), new (%d)", node->count, new_node->count);
+ return new_node;
+}
+
+/* Insert a new entry before or after existing entry 'entry'.
+ *
+ * If after==1, the new value is inserted after 'entry', otherwise
+ * the new value is inserted before 'entry'. */
+REDIS_STATIC void _quicklistInsert(quicklist *quicklist, quicklistEntry *entry,
+ void *value, const size_t sz, int after) {
+ int full = 0, at_tail = 0, at_head = 0, full_next = 0, full_prev = 0;
+ int fill = quicklist->fill;
+ quicklistNode *node = entry->node;
+ quicklistNode *new_node = NULL;
+
+ if (!node) {
+ /* we have no reference node, so let's create only node in the list */
+ D("No node given!");
+ new_node = quicklistCreateNode();
+ new_node->zl = ziplistPush(ziplistNew(), value, sz, ZIPLIST_HEAD);
+ __quicklistInsertNode(quicklist, NULL, new_node, after);
+ new_node->count++;
+ quicklist->count++;
+ return;
+ }
+
+ /* Populate accounting flags for easier boolean checks later */
+ if (!_quicklistNodeAllowInsert(node, fill, sz)) {
+ D("Current node is full with count %d with requested fill %lu",
+ node->count, fill);
+ full = 1;
+ }
+
+ if (after && (entry->offset == node->count)) {
+ D("At Tail of current ziplist");
+ at_tail = 1;
+ if (!_quicklistNodeAllowInsert(node->next, fill, sz)) {
+ D("Next node is full too.");
+ full_next = 1;
+ }
+ }
+
+ if (!after && (entry->offset == 0)) {
+ D("At Head");
+ at_head = 1;
+ if (!_quicklistNodeAllowInsert(node->prev, fill, sz)) {
+ D("Prev node is full too.");
+ full_prev = 1;
+ }
+ }
+
+ /* Now determine where and how to insert the new element */
+ if (!full && after) {
+ D("Not full, inserting after current position.");
+ quicklistDecompressNodeForUse(node);
+ unsigned char *next = ziplistNext(node->zl, entry->zi);
+ if (next == NULL) {
+ node->zl = ziplistPush(node->zl, value, sz, ZIPLIST_TAIL);
+ } else {
+ node->zl = ziplistInsert(node->zl, next, value, sz);
+ }
+ node->count++;
+ quicklistNodeUpdateSz(node);
+ quicklistRecompressOnly(quicklist, node);
+ } else if (!full && !after) {
+ D("Not full, inserting before current position.");
+ quicklistDecompressNodeForUse(node);
+ node->zl = ziplistInsert(node->zl, entry->zi, value, sz);
+ node->count++;
+ quicklistNodeUpdateSz(node);
+ quicklistRecompressOnly(quicklist, node);
+ } else if (full && at_tail && node->next && !full_next && after) {
+ /* If we are: at tail, next has free space, and inserting after:
+ * - insert entry at head of next node. */
+ D("Full and tail, but next isn't full; inserting next node head");
+ new_node = node->next;
+ quicklistDecompressNodeForUse(new_node);
+ new_node->zl = ziplistPush(new_node->zl, value, sz, ZIPLIST_HEAD);
+ new_node->count++;
+ quicklistNodeUpdateSz(new_node);
+ quicklistRecompressOnly(quicklist, new_node);
+ } else if (full && at_head && node->prev && !full_prev && !after) {
+ /* If we are: at head, previous has free space, and inserting before:
+ * - insert entry at tail of previous node. */
+ D("Full and head, but prev isn't full, inserting prev node tail");
+ new_node = node->prev;
+ quicklistDecompressNodeForUse(new_node);
+ new_node->zl = ziplistPush(new_node->zl, value, sz, ZIPLIST_TAIL);
+ new_node->count++;
+ quicklistNodeUpdateSz(new_node);
+ quicklistRecompressOnly(quicklist, new_node);
+ } else if (full && ((at_tail && node->next && full_next && after) ||
+ (at_head && node->prev && full_prev && !after))) {
+ /* If we are: full, and our prev/next is full, then:
+ * - create new node and attach to quicklist */
+ D("\tprovisioning new node...");
+ new_node = quicklistCreateNode();
+ new_node->zl = ziplistPush(ziplistNew(), value, sz, ZIPLIST_HEAD);
+ new_node->count++;
+ quicklistNodeUpdateSz(new_node);
+ __quicklistInsertNode(quicklist, node, new_node, after);
+ } else if (full) {
+ /* else, node is full we need to split it. */
+ /* covers both after and !after cases */
+ D("\tsplitting node...");
+ quicklistDecompressNodeForUse(node);
+ new_node = _quicklistSplitNode(node, entry->offset, after);
+ new_node->zl = ziplistPush(new_node->zl, value, sz,
+ after ? ZIPLIST_HEAD : ZIPLIST_TAIL);
+ new_node->count++;
+ quicklistNodeUpdateSz(new_node);
+ __quicklistInsertNode(quicklist, node, new_node, after);
+ _quicklistMergeNodes(quicklist, node);
+ }
+
+ quicklist->count++;
+}
+
+void quicklistInsertBefore(quicklist *quicklist, quicklistEntry *entry,
+ void *value, const size_t sz) {
+ _quicklistInsert(quicklist, entry, value, sz, 0);
+}
+
+void quicklistInsertAfter(quicklist *quicklist, quicklistEntry *entry,
+ void *value, const size_t sz) {
+ _quicklistInsert(quicklist, entry, value, sz, 1);
+}
+
+/* Delete a range of elements from the quicklist.
+ *
+ * elements may span across multiple quicklistNodes, so we
+ * have to be careful about tracking where we start and end.
+ *
+ * Returns 1 if entries were deleted, 0 if nothing was deleted. */
+int quicklistDelRange(quicklist *quicklist, const long start,
+ const long count) {
+ if (count <= 0)
+ return 0;
+
+ unsigned long extent = count; /* range is inclusive of start position */
+
+ if (start >= 0 && extent > (quicklist->count - start)) {
+ /* if requesting delete more elements than exist, limit to list size. */
+ extent = quicklist->count - start;
+ } else if (start < 0 && extent > (unsigned long)(-start)) {
+ /* else, if at negative offset, limit max size to rest of list. */
+ extent = -start; /* c.f. LREM -29 29; just delete until end. */
+ }
+
+ quicklistEntry entry;
+ if (!quicklistIndex(quicklist, start, &entry))
+ return 0;
+
+ D("Quicklist delete request for start %ld, count %ld, extent: %ld", start,
+ count, extent);
+ quicklistNode *node = entry.node;
+
+ /* iterate over next nodes until everything is deleted. */
+ while (extent) {
+ quicklistNode *next = node->next;
+
+ unsigned long del;
+ int delete_entire_node = 0;
+ if (entry.offset == 0 && extent >= node->count) {
+ /* If we are deleting more than the count of this node, we
+ * can just delete the entire node without ziplist math. */
+ delete_entire_node = 1;
+ del = node->count;
+ } else if (entry.offset >= 0 && extent >= node->count) {
+ /* If deleting more nodes after this one, calculate delete based
+ * on size of current node. */
+ del = node->count - entry.offset;
+ } else if (entry.offset < 0) {
+ /* If offset is negative, we are in the first run of this loop
+ * and we are deleting the entire range
+ * from this start offset to end of list. Since the Negative
+ * offset is the number of elements until the tail of the list,
+ * just use it directly as the deletion count. */
+ del = -entry.offset;
+
+ /* If the positive offset is greater than the remaining extent,
+ * we only delete the remaining extent, not the entire offset.
+ */
+ if (del > extent)
+ del = extent;
+ } else {
+ /* else, we are deleting less than the extent of this node, so
+ * use extent directly. */
+ del = extent;
+ }
+
+ D("[%ld]: asking to del: %ld because offset: %d; (ENTIRE NODE: %d), "
+ "node count: %u",
+ extent, del, entry.offset, delete_entire_node, node->count);
+
+ if (delete_entire_node) {
+ __quicklistDelNode(quicklist, node);
+ } else {
+ quicklistDecompressNodeForUse(node);
+ node->zl = ziplistDeleteRange(node->zl, entry.offset, del);
+ quicklistNodeUpdateSz(node);
+ node->count -= del;
+ quicklist->count -= del;
+ quicklistDeleteIfEmpty(quicklist, node);
+ if (node)
+ quicklistRecompressOnly(quicklist, node);
+ }
+
+ extent -= del;
+
+ node = next;
+
+ entry.offset = 0;
+ }
+ return 1;
+}
+
+/* Passthrough to ziplistCompare() */
+int quicklistCompare(unsigned char *p1, unsigned char *p2, int p2_len) {
+ return ziplistCompare(p1, p2, p2_len);
+}
+
+/* Returns a quicklist iterator 'iter'. After the initialization every
+ * call to quicklistNext() will return the next element of the quicklist. */
+quicklistIter *quicklistGetIterator(const quicklist *quicklist, int direction) {
+ quicklistIter *iter;
+
+ iter = zmalloc(sizeof(*iter));
+
+ if (direction == AL_START_HEAD) {
+ iter->current = quicklist->head;
+ iter->offset = 0;
+ } else if (direction == AL_START_TAIL) {
+ iter->current = quicklist->tail;
+ iter->offset = -1;
+ }
+
+ iter->direction = direction;
+ iter->quicklist = quicklist;
+
+ iter->zi = NULL;
+
+ return iter;
+}
+
+/* Initialize an iterator at a specific offset 'idx' and make the iterator
+ * return nodes in 'direction' direction. */
+quicklistIter *quicklistGetIteratorAtIdx(const quicklist *quicklist,
+ const int direction,
+ const long long idx) {
+ quicklistEntry entry;
+
+ if (quicklistIndex(quicklist, idx, &entry)) {
+ quicklistIter *base = quicklistGetIterator(quicklist, direction);
+ base->zi = NULL;
+ base->current = entry.node;
+ base->offset = entry.offset;
+ return base;
+ } else {
+ return NULL;
+ }
+}
+
+/* Release iterator.
+ * If we still have a valid current node, then re-encode current node. */
+void quicklistReleaseIterator(quicklistIter *iter) {
+ if (iter->current)
+ quicklistCompress(iter->quicklist, iter->current);
+
+ zfree(iter);
+}
+
+/* Get next element in iterator.
+ *
+ * Note: You must NOT insert into the list while iterating over it.
+ * You *may* delete from the list while iterating using the
+ * quicklistDelEntry() function.
+ * If you insert into the quicklist while iterating, you should
+ * re-create the iterator after your addition.
+ *
+ * iter = quicklistGetIterator(quicklist,<direction>);
+ * quicklistEntry entry;
+ * while (quicklistNext(iter, &entry)) {
+ * if (entry.value)
+ * [[ use entry.value with entry.sz ]]
+ * else
+ * [[ use entry.longval ]]
+ * }
+ *
+ * Populates 'entry' with values for this iteration.
+ * Returns 0 when iteration is complete or if iteration not possible.
+ * If return value is 0, the contents of 'entry' are not valid.
+ */
+int quicklistNext(quicklistIter *iter, quicklistEntry *entry) {
+ initEntry(entry);
+
+ if (!iter) {
+ D("Returning because no iter!");
+ return 0;
+ }
+
+ entry->quicklist = iter->quicklist;
+ entry->node = iter->current;
+
+ if (!iter->current) {
+ D("Returning because current node is NULL")
+ return 0;
+ }
+
+ unsigned char *(*nextFn)(unsigned char *, unsigned char *) = NULL;
+ int offset_update = 0;
+
+ if (!iter->zi) {
+ /* If !zi, use current index. */
+ quicklistDecompressNodeForUse(iter->current);
+ iter->zi = ziplistIndex(iter->current->zl, iter->offset);
+ } else {
+ /* else, use existing iterator offset and get prev/next as necessary. */
+ if (iter->direction == AL_START_HEAD) {
+ nextFn = ziplistNext;
+ offset_update = 1;
+ } else if (iter->direction == AL_START_TAIL) {
+ nextFn = ziplistPrev;
+ offset_update = -1;
+ }
+ iter->zi = nextFn(iter->current->zl, iter->zi);
+ iter->offset += offset_update;
+ }
+
+ entry->zi = iter->zi;
+ entry->offset = iter->offset;
+
+ if (iter->zi) {
+ /* Populate value from existing ziplist position */
+ ziplistGet(entry->zi, &entry->value, &entry->sz, &entry->longval);
+ return 1;
+ } else {
+ /* We ran out of ziplist entries.
+ * Pick next node, update offset, then re-run retrieval. */
+ quicklistCompress(iter->quicklist, iter->current);
+ if (iter->direction == AL_START_HEAD) {
+ /* Forward traversal */
+ D("Jumping to start of next node");
+ iter->current = iter->current->next;
+ iter->offset = 0;
+ } else if (iter->direction == AL_START_TAIL) {
+ /* Reverse traversal */
+ D("Jumping to end of previous node");
+ iter->current = iter->current->prev;
+ iter->offset = -1;
+ }
+ iter->zi = NULL;
+ return quicklistNext(iter, entry);
+ }
+}
+
+/* Duplicate the quicklist.
+ * On success a copy of the original quicklist is returned.
+ *
+ * The original quicklist both on success or error is never modified.
+ *
+ * Returns newly allocated quicklist. */
+quicklist *quicklistDup(quicklist *orig) {
+ quicklist *copy;
+
+ copy = quicklistNew(orig->fill, orig->compress);
+
+ for (quicklistNode *current = orig->head; current;
+ current = current->next) {
+ quicklistNode *node = quicklistCreateNode();
+
+ if (node->encoding == QUICKLIST_NODE_ENCODING_LZF) {
+ quicklistLZF *lzf = (quicklistLZF *)node->zl;
+ size_t lzf_sz = sizeof(*lzf) + lzf->sz;
+ node->zl = zmalloc(lzf_sz);
+ memcpy(node->zl, current->zl, lzf_sz);
+ } else if (node->encoding == QUICKLIST_NODE_ENCODING_RAW) {
+ node->zl = zmalloc(current->sz);
+ memcpy(node->zl, current->zl, current->sz);
+ }
+
+ node->count = current->count;
+ copy->count += node->count;
+ node->sz = current->sz;
+ node->encoding = current->encoding;
+
+ _quicklistInsertNodeAfter(copy, copy->tail, node);
+ }
+
+ /* copy->count must equal orig->count here */
+ return copy;
+}
+
+/* Populate 'entry' with the element at the specified zero-based index
+ * where 0 is the head, 1 is the element next to head
+ * and so on. Negative integers are used in order to count
+ * from the tail, -1 is the last element, -2 the penultimate
+ * and so on. If the index is out of range 0 is returned.
+ *
+ * Returns 1 if element found
+ * Returns 0 if element not found */
+int quicklistIndex(const quicklist *quicklist, const long long idx,
+ quicklistEntry *entry) {
+ quicklistNode *n;
+ unsigned long long accum = 0;
+ unsigned long long index;
+ int forward = idx < 0 ? 0 : 1; /* < 0 -> reverse, 0+ -> forward */
+
+ initEntry(entry);
+ entry->quicklist = quicklist;
+
+ if (!forward) {
+ index = (-idx) - 1;
+ n = quicklist->tail;
+ } else {
+ index = idx;
+ n = quicklist->head;
+ }
+
+ if (index >= quicklist->count)
+ return 0;
+
+ while (likely(n)) {
+ if ((accum + n->count) > index) {
+ break;
+ } else {
+ D("Skipping over (%p) %u at accum %lld", (void *)n, n->count,
+ accum);
+ accum += n->count;
+ n = forward ? n->next : n->prev;
+ }
+ }
+
+ if (!n)
+ return 0;
+
+ D("Found node: %p at accum %llu, idx %llu, sub+ %llu, sub- %llu", (void *)n,
+ accum, index, index - accum, (-index) - 1 + accum);
+
+ entry->node = n;
+ if (forward) {
+ /* forward = normal head-to-tail offset. */
+ entry->offset = index - accum;
+ } else {
+ /* reverse = need negative offset for tail-to-head, so undo
+ * the result of the original if (index < 0) above. */
+ entry->offset = (-index) - 1 + accum;
+ }
+
+ quicklistDecompressNodeForUse(entry->node);
+ entry->zi = ziplistIndex(entry->node->zl, entry->offset);
+ ziplistGet(entry->zi, &entry->value, &entry->sz, &entry->longval);
+ /* The caller will use our result, so we don't re-compress here.
+ * The caller can recompress or delete the node as needed. */
+ return 1;
+}
+
+/* Rotate quicklist by moving the tail element to the head. */
+void quicklistRotate(quicklist *quicklist) {
+ if (quicklist->count <= 1)
+ return;
+
+ /* First, get the tail entry */
+ unsigned char *p = ziplistIndex(quicklist->tail->zl, -1);
+ unsigned char *value;
+ long long longval;
+ unsigned int sz;
+ char longstr[32] = {0};
+ ziplistGet(p, &value, &sz, &longval);
+
+ /* If value found is NULL, then ziplistGet populated longval instead */
+ if (!value) {
+ /* Write the longval as a string so we can re-add it */
+ sz = ll2string(longstr, sizeof(longstr), longval);
+ value = (unsigned char *)longstr;
+ }
+
+ /* Add tail entry to head (must happen before tail is deleted). */
+ quicklistPushHead(quicklist, value, sz);
+
+ /* If quicklist has only one node, the head ziplist is also the
+ * tail ziplist and PushHead() could have reallocated our single ziplist,
+ * which would make our pre-existing 'p' unusable. */
+ if (quicklist->len == 1) {
+ p = ziplistIndex(quicklist->tail->zl, -1);
+ }
+
+ /* Remove tail entry. */
+ quicklistDelIndex(quicklist, quicklist->tail, &p);
+}
+
+/* pop from quicklist and return result in 'data' ptr. Value of 'data'
+ * is the return value of 'saver' function pointer if the data is NOT a number.
+ *
+ * If the quicklist element is a long long, then the return value is returned in
+ * 'sval'.
+ *
+ * Return value of 0 means no elements available.
+ * Return value of 1 means check 'data' and 'sval' for values.
+ * If 'data' is set, use 'data' and 'sz'. Otherwise, use 'sval'. */
+int quicklistPopCustom(quicklist *quicklist, int where, unsigned char **data,
+ unsigned int *sz, long long *sval,
+ void *(*saver)(unsigned char *data, unsigned int sz)) {
+ unsigned char *p;
+ unsigned char *vstr;
+ unsigned int vlen;
+ long long vlong;
+ int pos = (where == QUICKLIST_HEAD) ? 0 : -1;
+
+ if (quicklist->count == 0)
+ return 0;
+
+ if (data)
+ *data = NULL;
+ if (sz)
+ *sz = 0;
+ if (sval)
+ *sval = -123456789;
+
+ quicklistNode *node;
+ if (where == QUICKLIST_HEAD && quicklist->head) {
+ node = quicklist->head;
+ } else if (where == QUICKLIST_TAIL && quicklist->tail) {
+ node = quicklist->tail;
+ } else {
+ return 0;
+ }
+
+ p = ziplistIndex(node->zl, pos);
+ if (ziplistGet(p, &vstr, &vlen, &vlong)) {
+ if (vstr) {
+ if (data)
+ *data = saver(vstr, vlen);
+ if (sz)
+ *sz = vlen;
+ } else {
+ if (data)
+ *data = NULL;
+ if (sval)
+ *sval = vlong;
+ }
+ quicklistDelIndex(quicklist, node, &p);
+ return 1;
+ }
+ return 0;
+}
+
+/* Return a malloc'd copy of data passed in */
+REDIS_STATIC void *_quicklistSaver(unsigned char *data, unsigned int sz) {
+ unsigned char *vstr;
+ if (data) {
+ vstr = zmalloc(sz);
+ memcpy(data, vstr, sz);
+ return vstr;
+ }
+ return NULL;
+}
+
+/* Default pop function
+ *
+ * Returns malloc'd value from quicklist */
+int quicklistPop(quicklist *quicklist, int where, unsigned char **data,
+ unsigned int *sz, long long *slong) {
+ unsigned char *vstr;
+ unsigned int vlen;
+ long long vlong;
+ if (quicklist->count == 0)
+ return 0;
+ int ret = quicklistPopCustom(quicklist, where, &vstr, &vlen, &vlong,
+ _quicklistSaver);
+ if (data)
+ *data = vstr;
+ if (slong)
+ *slong = vlong;
+ if (sz)
+ *sz = vlen;
+ return ret;
+}
+
+/* Wrapper to allow argument-based switching between HEAD/TAIL pop */
+void quicklistPush(quicklist *quicklist, void *value, const size_t sz,
+ int where) {
+ if (where == QUICKLIST_HEAD) {
+ quicklistPushHead(quicklist, value, sz);
+ } else if (where == QUICKLIST_TAIL) {
+ quicklistPushTail(quicklist, value, sz);
+ }
+}
+
+/* The rest of this file is test cases and test helpers. */
+#ifdef REDIS_TEST
+#include <stdint.h>
+#include <sys/time.h>
+
+#define assert(_e) \
+ do { \
+ if (!(_e)) { \
+ printf("\n\n=== ASSERTION FAILED ===\n"); \
+ printf("==> %s:%d '%s' is not true\n", __FILE__, __LINE__, #_e); \
+ err++; \
+ } \
+ } while (0)
+
+#define yell(str, ...) printf("ERROR! " str "\n\n", __VA_ARGS__)
+
+#define OK printf("\tOK\n")
+
+#define ERROR \
+ do { \
+ printf("\tERROR!\n"); \
+ err++; \
+ } while (0)
+
+#define ERR(x, ...) \
+ do { \
+ printf("%s:%s:%d:\t", __FILE__, __FUNCTION__, __LINE__); \
+ printf("ERROR! " x "\n", __VA_ARGS__); \
+ err++; \
+ } while (0)
+
+#define TEST(name) printf("test — %s\n", name);
+#define TEST_DESC(name, ...) printf("test — " name "\n", __VA_ARGS__);
+
+#define QL_TEST_VERBOSE 0
+
+#define UNUSED(x) (void)(x)
+static void ql_info(quicklist *ql) {
+#if QL_TEST_VERBOSE
+ printf("Container length: %lu\n", ql->len);
+ printf("Container size: %lu\n", ql->count);
+ if (ql->head)
+ printf("\t(zsize head: %d)\n", ziplistLen(ql->head->zl));
+ if (ql->tail)
+ printf("\t(zsize tail: %d)\n", ziplistLen(ql->tail->zl));
+ printf("\n");
+#else
+ UNUSED(ql);
+#endif
+}
+
+/* Return the UNIX time in microseconds */
+static long long ustime(void) {
+ struct timeval tv;
+ long long ust;
+
+ gettimeofday(&tv, NULL);
+ ust = ((long long)tv.tv_sec) * 1000000;
+ ust += tv.tv_usec;
+ return ust;
+}
+
+/* Return the UNIX time in milliseconds */
+static long long mstime(void) { return ustime() / 1000; }
+
+/* Iterate over an entire quicklist.
+ * Print the list if 'print' == 1.
+ *
+ * Returns physical count of elements found by iterating over the list. */
+static int _itrprintr(quicklist *ql, int print, int forward) {
+ quicklistIter *iter =
+ quicklistGetIterator(ql, forward ? AL_START_HEAD : AL_START_TAIL);
+ quicklistEntry entry;
+ int i = 0;
+ int p = 0;
+ quicklistNode *prev = NULL;
+ while (quicklistNext(iter, &entry)) {
+ if (entry.node != prev) {
+ /* Count the number of list nodes too */
+ p++;
+ prev = entry.node;
+ }
+ if (print) {
+ printf("[%3d (%2d)]: [%.*s] (%lld)\n", i, p, entry.sz,
+ (char *)entry.value, entry.longval);
+ }
+ i++;
+ }
+ quicklistReleaseIterator(iter);
+ return i;
+}
+static int itrprintr(quicklist *ql, int print) {
+ return _itrprintr(ql, print, 1);
+}
+
+static int itrprintr_rev(quicklist *ql, int print) {
+ return _itrprintr(ql, print, 0);
+}
+
+#define ql_verify(a, b, c, d, e) \
+ do { \
+ err += _ql_verify((a), (b), (c), (d), (e)); \
+ } while (0)
+
+/* Verify list metadata matches physical list contents. */
+static int _ql_verify(quicklist *ql, uint32_t len, uint32_t count,
+ uint32_t head_count, uint32_t tail_count) {
+ int errors = 0;
+
+ ql_info(ql);
+ if (len != ql->len) {
+ yell("quicklist length wrong: expected %d, got %u", len, ql->len);
+ errors++;
+ }
+
+ if (count != ql->count) {
+ yell("quicklist count wrong: expected %d, got %lu", count, ql->count);
+ errors++;
+ }
+
+ int loopr = itrprintr(ql, 0);
+ if (loopr != (int)ql->count) {
+ yell("quicklist cached count not match actual count: expected %lu, got "
+ "%d",
+ ql->count, loopr);
+ errors++;
+ }
+
+ int rloopr = itrprintr_rev(ql, 0);
+ if (loopr != rloopr) {
+ yell("quicklist has different forward count than reverse count! "
+ "Forward count is %d, reverse count is %d.",
+ loopr, rloopr);
+ errors++;
+ }
+
+ if (ql->len == 0 && !errors) {
+ OK;
+ return errors;
+ }
+
+ if (ql->head && head_count != ql->head->count &&
+ head_count != ziplistLen(ql->head->zl)) {
+ yell("quicklist head count wrong: expected %d, "
+ "got cached %d vs. actual %d",
+ head_count, ql->head->count, ziplistLen(ql->head->zl));
+ errors++;
+ }
+
+ if (ql->tail && tail_count != ql->tail->count &&
+ tail_count != ziplistLen(ql->tail->zl)) {
+ yell("quicklist tail count wrong: expected %d, "
+ "got cached %u vs. actual %d",
+ tail_count, ql->tail->count, ziplistLen(ql->tail->zl));
+ errors++;
+ }
+
+ if (quicklistAllowsCompression(ql)) {
+ quicklistNode *node = ql->head;
+ unsigned int low_raw = ql->compress;
+ unsigned int high_raw = ql->len - ql->compress;
+
+ for (unsigned int at = 0; at < ql->len; at++, node = node->next) {
+ if (node && (at < low_raw || at >= high_raw)) {
+ if (node->encoding != QUICKLIST_NODE_ENCODING_RAW) {
+ yell("Incorrect compression: node %d is "
+ "compressed at depth %d ((%u, %u); total "
+ "nodes: %u; size: %u; recompress: %d)",
+ at, ql->compress, low_raw, high_raw, ql->len, node->sz,
+ node->recompress);
+ errors++;
+ }
+ } else {
+ if (node->encoding != QUICKLIST_NODE_ENCODING_LZF &&
+ !node->attempted_compress) {
+ yell("Incorrect non-compression: node %d is NOT "
+ "compressed at depth %d ((%u, %u); total "
+ "nodes: %u; size: %u; recompress: %d; attempted: %d)",
+ at, ql->compress, low_raw, high_raw, ql->len, node->sz,
+ node->recompress, node->attempted_compress);
+ errors++;
+ }
+ }
+ }
+ }
+
+ if (!errors)
+ OK;
+ return errors;
+}
+
+/* Generate new string concatenating integer i against string 'prefix' */
+static char *genstr(char *prefix, int i) {
+ static char result[64] = {0};
+ snprintf(result, sizeof(result), "%s%d", prefix, i);
+ return result;
+}
+
+/* main test, but callable from other files */
+int quicklistTest(int argc, char *argv[]) {
+ UNUSED(argc);
+ UNUSED(argv);
+
+ unsigned int err = 0;
+ int optimize_start =
+ -(int)(sizeof(optimization_level) / sizeof(*optimization_level));
+
+ printf("Starting optimization offset at: %d\n", optimize_start);
+
+ int options[] = {0, 1, 2, 3, 4, 5, 6, 10};
+ size_t option_count = sizeof(options) / sizeof(*options);
+ long long runtime[option_count];
+
+ for (int _i = 0; _i < (int)option_count; _i++) {
+ printf("Testing Option %d\n", options[_i]);
+ long long start = mstime();
+
+ TEST("create list") {
+ quicklist *ql = quicklistNew(-2, options[_i]);
+ ql_verify(ql, 0, 0, 0, 0);
+ quicklistRelease(ql);
+ }
+
+ TEST("add to tail of empty list") {
+ quicklist *ql = quicklistNew(-2, options[_i]);
+ quicklistPushTail(ql, "hello", 6);
+ /* 1 for head and 1 for tail beacuse 1 node = head = tail */
+ ql_verify(ql, 1, 1, 1, 1);
+ quicklistRelease(ql);
+ }
+
+ TEST("add to head of empty list") {
+ quicklist *ql = quicklistNew(-2, options[_i]);
+ quicklistPushHead(ql, "hello", 6);
+ /* 1 for head and 1 for tail beacuse 1 node = head = tail */
+ ql_verify(ql, 1, 1, 1, 1);
+ quicklistRelease(ql);
+ }
+
+ for (int f = optimize_start; f < 32; f++) {
+ TEST_DESC("add to tail 5x at fill %d at compress %d", f,
+ options[_i]) {
+ quicklist *ql = quicklistNew(f, options[_i]);
+ for (int i = 0; i < 5; i++)
+ quicklistPushTail(ql, genstr("hello", i), 32);
+ if (ql->count != 5)
+ ERROR;
+ if (f == 32)
+ ql_verify(ql, 1, 5, 5, 5);
+ quicklistRelease(ql);
+ }
+ }
+
+ for (int f = optimize_start; f < 32; f++) {
+ TEST_DESC("add to head 5x at fill %d at compress %d", f,
+ options[_i]) {
+ quicklist *ql = quicklistNew(f, options[_i]);
+ for (int i = 0; i < 5; i++)
+ quicklistPushHead(ql, genstr("hello", i), 32);
+ if (ql->count != 5)
+ ERROR;
+ if (f == 32)
+ ql_verify(ql, 1, 5, 5, 5);
+ quicklistRelease(ql);
+ }
+ }
+
+ for (int f = optimize_start; f < 512; f++) {
+ TEST_DESC("add to tail 500x at fill %d at compress %d", f,
+ options[_i]) {
+ quicklist *ql = quicklistNew(f, options[_i]);
+ for (int i = 0; i < 500; i++)
+ quicklistPushTail(ql, genstr("hello", i), 64);
+ if (ql->count != 500)
+ ERROR;
+ if (f == 32)
+ ql_verify(ql, 16, 500, 32, 20);
+ quicklistRelease(ql);
+ }
+ }
+
+ for (int f = optimize_start; f < 512; f++) {
+ TEST_DESC("add to head 500x at fill %d at compress %d", f,
+ options[_i]) {
+ quicklist *ql = quicklistNew(f, options[_i]);
+ for (int i = 0; i < 500; i++)
+ quicklistPushHead(ql, genstr("hello", i), 32);
+ if (ql->count != 500)
+ ERROR;
+ if (f == 32)
+ ql_verify(ql, 16, 500, 20, 32);
+ quicklistRelease(ql);
+ }
+ }
+
+ TEST("rotate empty") {
+ quicklist *ql = quicklistNew(-2, options[_i]);
+ quicklistRotate(ql);
+ ql_verify(ql, 0, 0, 0, 0);
+ quicklistRelease(ql);
+ }
+
+ for (int f = optimize_start; f < 32; f++) {
+ TEST("rotate one val once") {
+ quicklist *ql = quicklistNew(f, options[_i]);
+ quicklistPushHead(ql, "hello", 6);
+ quicklistRotate(ql);
+ /* Ignore compression verify because ziplist is
+ * too small to compress. */
+ ql_verify(ql, 1, 1, 1, 1);
+ quicklistRelease(ql);
+ }
+ }
+
+ for (int f = optimize_start; f < 3; f++) {
+ TEST_DESC("rotate 500 val 5000 times at fill %d at compress %d", f,
+ options[_i]) {
+ quicklist *ql = quicklistNew(f, options[_i]);
+ quicklistPushHead(ql, "900", 3);
+ quicklistPushHead(ql, "7000", 4);
+ quicklistPushHead(ql, "-1200", 5);
+ quicklistPushHead(ql, "42", 2);
+ for (int i = 0; i < 500; i++)
+ quicklistPushHead(ql, genstr("hello", i), 64);
+ ql_info(ql);
+ for (int i = 0; i < 5000; i++) {
+ ql_info(ql);
+ quicklistRotate(ql);
+ }
+ if (f == 1)
+ ql_verify(ql, 504, 504, 1, 1);
+ else if (f == 2)
+ ql_verify(ql, 252, 504, 2, 2);
+ else if (f == 32)
+ ql_verify(ql, 16, 504, 32, 24);
+ quicklistRelease(ql);
+ }
+ }
+
+ TEST("pop empty") {
+ quicklist *ql = quicklistNew(-2, options[_i]);
+ quicklistPop(ql, QUICKLIST_HEAD, NULL, NULL, NULL);
+ ql_verify(ql, 0, 0, 0, 0);
+ quicklistRelease(ql);
+ }
+
+ TEST("pop 1 string from 1") {
+ quicklist *ql = quicklistNew(-2, options[_i]);
+ quicklistPushHead(ql, genstr("hello", 331), 32);
+ unsigned char *data;
+ unsigned int sz;
+ long long lv;
+ ql_info(ql);
+ quicklistPop(ql, QUICKLIST_HEAD, &data, &sz, &lv);
+ assert(data != NULL);
+ assert(sz == 32);
+ zfree(data);
+ ql_verify(ql, 0, 0, 0, 0);
+ quicklistRelease(ql);
+ }
+
+ TEST("pop head 1 number from 1") {
+ quicklist *ql = quicklistNew(-2, options[_i]);
+ quicklistPushHead(ql, "55513", 5);
+ unsigned char *data;
+ unsigned int sz;
+ long long lv;
+ ql_info(ql);
+ quicklistPop(ql, QUICKLIST_HEAD, &data, &sz, &lv);
+ assert(data == NULL);
+ assert(lv == 55513);
+ ql_verify(ql, 0, 0, 0, 0);
+ quicklistRelease(ql);
+ }
+
+ TEST("pop head 500 from 500") {
+ quicklist *ql = quicklistNew(-2, options[_i]);
+ for (int i = 0; i < 500; i++)
+ quicklistPushHead(ql, genstr("hello", i), 32);
+ ql_info(ql);
+ for (int i = 0; i < 500; i++) {
+ unsigned char *data;
+ unsigned int sz;
+ long long lv;
+ int ret = quicklistPop(ql, QUICKLIST_HEAD, &data, &sz, &lv);
+ assert(ret == 1);
+ assert(data != NULL);
+ assert(sz == 32);
+ zfree(data);
+ }
+ ql_verify(ql, 0, 0, 0, 0);
+ quicklistRelease(ql);
+ }
+
+ TEST("pop head 5000 from 500") {
+ quicklist *ql = quicklistNew(-2, options[_i]);
+ for (int i = 0; i < 500; i++)
+ quicklistPushHead(ql, genstr("hello", i), 32);
+ for (int i = 0; i < 5000; i++) {
+ unsigned char *data;
+ unsigned int sz;
+ long long lv;
+ int ret = quicklistPop(ql, QUICKLIST_HEAD, &data, &sz, &lv);
+ if (i < 500) {
+ assert(ret == 1);
+ assert(data != NULL);
+ assert(sz == 32);
+ zfree(data);
+ } else {
+ assert(ret == 0);
+ }
+ }
+ ql_verify(ql, 0, 0, 0, 0);
+ quicklistRelease(ql);
+ }
+
+ TEST("iterate forward over 500 list") {
+ quicklist *ql = quicklistNew(-2, options[_i]);
+ quicklistSetFill(ql, 32);
+ for (int i = 0; i < 500; i++)
+ quicklistPushHead(ql, genstr("hello", i), 32);
+ quicklistIter *iter = quicklistGetIterator(ql, AL_START_HEAD);
+ quicklistEntry entry;
+ int i = 499, count = 0;
+ while (quicklistNext(iter, &entry)) {
+ char *h = genstr("hello", i);
+ if (strcmp((char *)entry.value, h))
+ ERR("value [%s] didn't match [%s] at position %d",
+ entry.value, h, i);
+ i--;
+ count++;
+ }
+ if (count != 500)
+ ERR("Didn't iterate over exactly 500 elements (%d)", i);
+ ql_verify(ql, 16, 500, 20, 32);
+ quicklistReleaseIterator(iter);
+ quicklistRelease(ql);
+ }
+
+ TEST("iterate reverse over 500 list") {
+ quicklist *ql = quicklistNew(-2, options[_i]);
+ quicklistSetFill(ql, 32);
+ for (int i = 0; i < 500; i++)
+ quicklistPushHead(ql, genstr("hello", i), 32);
+ quicklistIter *iter = quicklistGetIterator(ql, AL_START_TAIL);
+ quicklistEntry entry;
+ int i = 0;
+ while (quicklistNext(iter, &entry)) {
+ char *h = genstr("hello", i);
+ if (strcmp((char *)entry.value, h))
+ ERR("value [%s] didn't match [%s] at position %d",
+ entry.value, h, i);
+ i++;
+ }
+ if (i != 500)
+ ERR("Didn't iterate over exactly 500 elements (%d)", i);
+ ql_verify(ql, 16, 500, 20, 32);
+ quicklistReleaseIterator(iter);
+ quicklistRelease(ql);
+ }
+
+ TEST("insert before with 0 elements") {
+ quicklist *ql = quicklistNew(-2, options[_i]);
+ quicklistEntry entry;
+ quicklistIndex(ql, 0, &entry);
+ quicklistInsertBefore(ql, &entry, "abc", 4);
+ ql_verify(ql, 1, 1, 1, 1);
+ quicklistRelease(ql);
+ }
+
+ TEST("insert after with 0 elements") {
+ quicklist *ql = quicklistNew(-2, options[_i]);
+ quicklistEntry entry;
+ quicklistIndex(ql, 0, &entry);
+ quicklistInsertAfter(ql, &entry, "abc", 4);
+ ql_verify(ql, 1, 1, 1, 1);
+ quicklistRelease(ql);
+ }
+
+ TEST("insert after 1 element") {
+ quicklist *ql = quicklistNew(-2, options[_i]);
+ quicklistPushHead(ql, "hello", 6);
+ quicklistEntry entry;
+ quicklistIndex(ql, 0, &entry);
+ quicklistInsertAfter(ql, &entry, "abc", 4);
+ ql_verify(ql, 1, 2, 2, 2);
+ quicklistRelease(ql);
+ }
+
+ TEST("insert before 1 element") {
+ quicklist *ql = quicklistNew(-2, options[_i]);
+ quicklistPushHead(ql, "hello", 6);
+ quicklistEntry entry;
+ quicklistIndex(ql, 0, &entry);
+ quicklistInsertAfter(ql, &entry, "abc", 4);
+ ql_verify(ql, 1, 2, 2, 2);
+ quicklistRelease(ql);
+ }
+
+ for (int f = optimize_start; f < 12; f++) {
+ TEST_DESC("insert once in elements while iterating at fill %d at "
+ "compress %d\n",
+ f, options[_i]) {
+ quicklist *ql = quicklistNew(f, options[_i]);
+ quicklistPushTail(ql, "abc", 3);
+ quicklistSetFill(ql, 1);
+ quicklistPushTail(ql, "def", 3); /* force to unique node */
+ quicklistSetFill(ql, f);
+ quicklistPushTail(ql, "bob", 3); /* force to reset for +3 */
+ quicklistPushTail(ql, "foo", 3);
+ quicklistPushTail(ql, "zoo", 3);
+
+ itrprintr(ql, 0);
+ /* insert "bar" before "bob" while iterating over list. */
+ quicklistIter *iter = quicklistGetIterator(ql, AL_START_HEAD);
+ quicklistEntry entry;
+ while (quicklistNext(iter, &entry)) {
+ if (!strncmp((char *)entry.value, "bob", 3)) {
+ /* Insert as fill = 1 so it spills into new node. */
+ quicklistInsertBefore(ql, &entry, "bar", 3);
+ break; /* didn't we fix insert-while-iterating? */
+ }
+ }
+ itrprintr(ql, 0);
+
+ /* verify results */
+ quicklistIndex(ql, 0, &entry);
+ if (strncmp((char *)entry.value, "abc", 3))
+ ERR("Value 0 didn't match, instead got: %.*s", entry.sz,
+ entry.value);
+ quicklistIndex(ql, 1, &entry);
+ if (strncmp((char *)entry.value, "def", 3))
+ ERR("Value 1 didn't match, instead got: %.*s", entry.sz,
+ entry.value);
+ quicklistIndex(ql, 2, &entry);
+ if (strncmp((char *)entry.value, "bar", 3))
+ ERR("Value 2 didn't match, instead got: %.*s", entry.sz,
+ entry.value);
+ quicklistIndex(ql, 3, &entry);
+ if (strncmp((char *)entry.value, "bob", 3))
+ ERR("Value 3 didn't match, instead got: %.*s", entry.sz,
+ entry.value);
+ quicklistIndex(ql, 4, &entry);
+ if (strncmp((char *)entry.value, "foo", 3))
+ ERR("Value 4 didn't match, instead got: %.*s", entry.sz,
+ entry.value);
+ quicklistIndex(ql, 5, &entry);
+ if (strncmp((char *)entry.value, "zoo", 3))
+ ERR("Value 5 didn't match, instead got: %.*s", entry.sz,
+ entry.value);
+ quicklistReleaseIterator(iter);
+ quicklistRelease(ql);
+ }
+ }
+
+ for (int f = optimize_start; f < 1024; f++) {
+ TEST_DESC(
+ "insert [before] 250 new in middle of 500 elements at fill"
+ " %d at compress %d",
+ f, options[_i]) {
+ quicklist *ql = quicklistNew(f, options[_i]);
+ for (int i = 0; i < 500; i++)
+ quicklistPushTail(ql, genstr("hello", i), 32);
+ for (int i = 0; i < 250; i++) {
+ quicklistEntry entry;
+ quicklistIndex(ql, 250, &entry);
+ quicklistInsertBefore(ql, &entry, genstr("abc", i), 32);
+ }
+ if (f == 32)
+ ql_verify(ql, 25, 750, 32, 20);
+ quicklistRelease(ql);
+ }
+ }
+
+ for (int f = optimize_start; f < 1024; f++) {
+ TEST_DESC("insert [after] 250 new in middle of 500 elements at "
+ "fill %d at compress %d",
+ f, options[_i]) {
+ quicklist *ql = quicklistNew(f, options[_i]);
+ for (int i = 0; i < 500; i++)
+ quicklistPushHead(ql, genstr("hello", i), 32);
+ for (int i = 0; i < 250; i++) {
+ quicklistEntry entry;
+ quicklistIndex(ql, 250, &entry);
+ quicklistInsertAfter(ql, &entry, genstr("abc", i), 32);
+ }
+
+ if (ql->count != 750)
+ ERR("List size not 750, but rather %ld", ql->count);
+
+ if (f == 32)
+ ql_verify(ql, 26, 750, 20, 32);
+ quicklistRelease(ql);
+ }
+ }
+
+ TEST("duplicate empty list") {
+ quicklist *ql = quicklistNew(-2, options[_i]);
+ ql_verify(ql, 0, 0, 0, 0);
+ quicklist *copy = quicklistDup(ql);
+ ql_verify(copy, 0, 0, 0, 0);
+ quicklistRelease(ql);
+ quicklistRelease(copy);
+ }
+
+ TEST("duplicate list of 1 element") {
+ quicklist *ql = quicklistNew(-2, options[_i]);
+ quicklistPushHead(ql, genstr("hello", 3), 32);
+ ql_verify(ql, 1, 1, 1, 1);
+ quicklist *copy = quicklistDup(ql);
+ ql_verify(copy, 1, 1, 1, 1);
+ quicklistRelease(ql);
+ quicklistRelease(copy);
+ }
+
+ TEST("duplicate list of 500") {
+ quicklist *ql = quicklistNew(-2, options[_i]);
+ quicklistSetFill(ql, 32);
+ for (int i = 0; i < 500; i++)
+ quicklistPushHead(ql, genstr("hello", i), 32);
+ ql_verify(ql, 16, 500, 20, 32);
+
+ quicklist *copy = quicklistDup(ql);
+ ql_verify(copy, 16, 500, 20, 32);
+ quicklistRelease(ql);
+ quicklistRelease(copy);
+ }
+
+ for (int f = optimize_start; f < 512; f++) {
+ TEST_DESC("index 1,200 from 500 list at fill %d at compress %d", f,
+ options[_i]) {
+ quicklist *ql = quicklistNew(f, options[_i]);
+ for (int i = 0; i < 500; i++)
+ quicklistPushTail(ql, genstr("hello", i + 1), 32);
+ quicklistEntry entry;
+ quicklistIndex(ql, 1, &entry);
+ if (!strcmp((char *)entry.value, "hello2"))
+ OK;
+ else
+ ERR("Value: %s", entry.value);
+ quicklistIndex(ql, 200, &entry);
+ if (!strcmp((char *)entry.value, "hello201"))
+ OK;
+ else
+ ERR("Value: %s", entry.value);
+ quicklistRelease(ql);
+ }
+
+ TEST_DESC("index -1,-2 from 500 list at fill %d at compress %d", f,
+ options[_i]) {
+ quicklist *ql = quicklistNew(f, options[_i]);
+ for (int i = 0; i < 500; i++)
+ quicklistPushTail(ql, genstr("hello", i + 1), 32);
+ quicklistEntry entry;
+ quicklistIndex(ql, -1, &entry);
+ if (!strcmp((char *)entry.value, "hello500"))
+ OK;
+ else
+ ERR("Value: %s", entry.value);
+ quicklistIndex(ql, -2, &entry);
+ if (!strcmp((char *)entry.value, "hello499"))
+ OK;
+ else
+ ERR("Value: %s", entry.value);
+ quicklistRelease(ql);
+ }
+
+ TEST_DESC("index -100 from 500 list at fill %d at compress %d", f,
+ options[_i]) {
+ quicklist *ql = quicklistNew(f, options[_i]);
+ for (int i = 0; i < 500; i++)
+ quicklistPushTail(ql, genstr("hello", i + 1), 32);
+ quicklistEntry entry;
+ quicklistIndex(ql, -100, &entry);
+ if (!strcmp((char *)entry.value, "hello401"))
+ OK;
+ else
+ ERR("Value: %s", entry.value);
+ quicklistRelease(ql);
+ }
+
+ TEST_DESC("index too big +1 from 50 list at fill %d at compress %d",
+ f, options[_i]) {
+ quicklist *ql = quicklistNew(f, options[_i]);
+ for (int i = 0; i < 50; i++)
+ quicklistPushTail(ql, genstr("hello", i + 1), 32);
+ quicklistEntry entry;
+ if (quicklistIndex(ql, 50, &entry))
+ ERR("Index found at 50 with 50 list: %.*s", entry.sz,
+ entry.value);
+ else
+ OK;
+ quicklistRelease(ql);
+ }
+ }
+
+ TEST("delete range empty list") {
+ quicklist *ql = quicklistNew(-2, options[_i]);
+ quicklistDelRange(ql, 5, 20);
+ ql_verify(ql, 0, 0, 0, 0);
+ quicklistRelease(ql);
+ }
+
+ TEST("delete range of entire node in list of one node") {
+ quicklist *ql = quicklistNew(-2, options[_i]);
+ for (int i = 0; i < 32; i++)
+ quicklistPushHead(ql, genstr("hello", i), 32);
+ ql_verify(ql, 1, 32, 32, 32);
+ quicklistDelRange(ql, 0, 32);
+ ql_verify(ql, 0, 0, 0, 0);
+ quicklistRelease(ql);
+ }
+
+ TEST("delete range of entire node with overflow counts") {
+ quicklist *ql = quicklistNew(-2, options[_i]);
+ for (int i = 0; i < 32; i++)
+ quicklistPushHead(ql, genstr("hello", i), 32);
+ ql_verify(ql, 1, 32, 32, 32);
+ quicklistDelRange(ql, 0, 128);
+ ql_verify(ql, 0, 0, 0, 0);
+ quicklistRelease(ql);
+ }
+
+ TEST("delete middle 100 of 500 list") {
+ quicklist *ql = quicklistNew(-2, options[_i]);
+ quicklistSetFill(ql, 32);
+ for (int i = 0; i < 500; i++)
+ quicklistPushTail(ql, genstr("hello", i + 1), 32);
+ ql_verify(ql, 16, 500, 32, 20);
+ quicklistDelRange(ql, 200, 100);
+ ql_verify(ql, 14, 400, 32, 20);
+ quicklistRelease(ql);
+ }
+
+ TEST("delete negative 1 from 500 list") {
+ quicklist *ql = quicklistNew(-2, options[_i]);
+ quicklistSetFill(ql, 32);
+ for (int i = 0; i < 500; i++)
+ quicklistPushTail(ql, genstr("hello", i + 1), 32);
+ ql_verify(ql, 16, 500, 32, 20);
+ quicklistDelRange(ql, -1, 1);
+ ql_verify(ql, 16, 499, 32, 19);
+ quicklistRelease(ql);
+ }
+
+ TEST("delete negative 1 from 500 list with overflow counts") {
+ quicklist *ql = quicklistNew(-2, options[_i]);
+ quicklistSetFill(ql, 32);
+ for (int i = 0; i < 500; i++)
+ quicklistPushTail(ql, genstr("hello", i + 1), 32);
+ ql_verify(ql, 16, 500, 32, 20);
+ quicklistDelRange(ql, -1, 128);
+ ql_verify(ql, 16, 499, 32, 19);
+ quicklistRelease(ql);
+ }
+
+ TEST("delete negative 100 from 500 list") {
+ quicklist *ql = quicklistNew(-2, options[_i]);
+ quicklistSetFill(ql, 32);
+ for (int i = 0; i < 500; i++)
+ quicklistPushTail(ql, genstr("hello", i + 1), 32);
+ quicklistDelRange(ql, -100, 100);
+ ql_verify(ql, 13, 400, 32, 16);
+ quicklistRelease(ql);
+ }
+
+ TEST("delete -10 count 5 from 50 list") {
+ quicklist *ql = quicklistNew(-2, options[_i]);
+ quicklistSetFill(ql, 32);
+ for (int i = 0; i < 50; i++)
+ quicklistPushTail(ql, genstr("hello", i + 1), 32);
+ ql_verify(ql, 2, 50, 32, 18);
+ quicklistDelRange(ql, -10, 5);
+ ql_verify(ql, 2, 45, 32, 13);
+ quicklistRelease(ql);
+ }
+
+ TEST("numbers only list read") {
+ quicklist *ql = quicklistNew(-2, options[_i]);
+ quicklistPushTail(ql, "1111", 4);
+ quicklistPushTail(ql, "2222", 4);
+ quicklistPushTail(ql, "3333", 4);
+ quicklistPushTail(ql, "4444", 4);
+ ql_verify(ql, 1, 4, 4, 4);
+ quicklistEntry entry;
+ quicklistIndex(ql, 0, &entry);
+ if (entry.longval != 1111)
+ ERR("Not 1111, %lld", entry.longval);
+ quicklistIndex(ql, 1, &entry);
+ if (entry.longval != 2222)
+ ERR("Not 2222, %lld", entry.longval);
+ quicklistIndex(ql, 2, &entry);
+ if (entry.longval != 3333)
+ ERR("Not 3333, %lld", entry.longval);
+ quicklistIndex(ql, 3, &entry);
+ if (entry.longval != 4444)
+ ERR("Not 4444, %lld", entry.longval);
+ if (quicklistIndex(ql, 4, &entry))
+ ERR("Index past elements: %lld", entry.longval);
+ quicklistIndex(ql, -1, &entry);
+ if (entry.longval != 4444)
+ ERR("Not 4444 (reverse), %lld", entry.longval);
+ quicklistIndex(ql, -2, &entry);
+ if (entry.longval != 3333)
+ ERR("Not 3333 (reverse), %lld", entry.longval);
+ quicklistIndex(ql, -3, &entry);
+ if (entry.longval != 2222)
+ ERR("Not 2222 (reverse), %lld", entry.longval);
+ quicklistIndex(ql, -4, &entry);
+ if (entry.longval != 1111)
+ ERR("Not 1111 (reverse), %lld", entry.longval);
+ if (quicklistIndex(ql, -5, &entry))
+ ERR("Index past elements (reverse), %lld", entry.longval);
+ quicklistRelease(ql);
+ }
+
+ TEST("numbers larger list read") {
+ quicklist *ql = quicklistNew(-2, options[_i]);
+ quicklistSetFill(ql, 32);
+ char num[32];
+ long long nums[5000];
+ for (int i = 0; i < 5000; i++) {
+ nums[i] = -5157318210846258176 + i;
+ int sz = ll2string(num, sizeof(num), nums[i]);
+ quicklistPushTail(ql, num, sz);
+ }
+ quicklistPushTail(ql, "xxxxxxxxxxxxxxxxxxxx", 20);
+ quicklistEntry entry;
+ for (int i = 0; i < 5000; i++) {
+ quicklistIndex(ql, i, &entry);
+ if (entry.longval != nums[i])
+ ERR("[%d] Not longval %lld but rather %lld", i, nums[i],
+ entry.longval);
+ entry.longval = 0xdeadbeef;
+ }
+ quicklistIndex(ql, 5000, &entry);
+ if (strncmp((char *)entry.value, "xxxxxxxxxxxxxxxxxxxx", 20))
+ ERR("String val not match: %s", entry.value);
+ ql_verify(ql, 157, 5001, 32, 9);
+ quicklistRelease(ql);
+ }
+
+ TEST("numbers larger list read B") {
+ quicklist *ql = quicklistNew(-2, options[_i]);
+ quicklistPushTail(ql, "99", 2);
+ quicklistPushTail(ql, "98", 2);
+ quicklistPushTail(ql, "xxxxxxxxxxxxxxxxxxxx", 20);
+ quicklistPushTail(ql, "96", 2);
+ quicklistPushTail(ql, "95", 2);
+ quicklistReplaceAtIndex(ql, 1, "foo", 3);
+ quicklistReplaceAtIndex(ql, -1, "bar", 3);
+ quicklistRelease(ql);
+ OK;
+ }
+
+ for (int f = optimize_start; f < 16; f++) {
+ TEST_DESC("lrem test at fill %d at compress %d", f, options[_i]) {
+ quicklist *ql = quicklistNew(f, options[_i]);
+ char *words[] = {"abc", "foo", "bar", "foobar", "foobared",
+ "zap", "bar", "test", "foo"};
+ char *result[] = {"abc", "foo", "foobar", "foobared",
+ "zap", "test", "foo"};
+ char *resultB[] = {"abc", "foo", "foobar",
+ "foobared", "zap", "test"};
+ for (int i = 0; i < 9; i++)
+ quicklistPushTail(ql, words[i], strlen(words[i]));
+
+ /* lrem 0 bar */
+ quicklistIter *iter = quicklistGetIterator(ql, AL_START_HEAD);
+ quicklistEntry entry;
+ int i = 0;
+ while (quicklistNext(iter, &entry)) {
+ if (quicklistCompare(entry.zi, (unsigned char *)"bar", 3)) {
+ quicklistDelEntry(iter, &entry);
+ }
+ i++;
+ }
+ quicklistReleaseIterator(iter);
+
+ /* check result of lrem 0 bar */
+ iter = quicklistGetIterator(ql, AL_START_HEAD);
+ i = 0;
+ int ok = 1;
+ while (quicklistNext(iter, &entry)) {
+ /* Result must be: abc, foo, foobar, foobared, zap, test,
+ * foo */
+ if (strncmp((char *)entry.value, result[i], entry.sz)) {
+ ERR("No match at position %d, got %.*s instead of %s",
+ i, entry.sz, entry.value, result[i]);
+ ok = 0;
+ }
+ i++;
+ }
+ quicklistReleaseIterator(iter);
+
+ quicklistPushTail(ql, "foo", 3);
+
+ /* lrem -2 foo */
+ iter = quicklistGetIterator(ql, AL_START_TAIL);
+ i = 0;
+ int del = 2;
+ while (quicklistNext(iter, &entry)) {
+ if (quicklistCompare(entry.zi, (unsigned char *)"foo", 3)) {
+ quicklistDelEntry(iter, &entry);
+ del--;
+ }
+ if (!del)
+ break;
+ i++;
+ }
+ quicklistReleaseIterator(iter);
+
+ /* check result of lrem -2 foo */
+ /* (we're ignoring the '2' part and still deleting all foo
+ * because
+ * we only have two foo) */
+ iter = quicklistGetIterator(ql, AL_START_TAIL);
+ i = 0;
+ size_t resB = sizeof(resultB) / sizeof(*resultB);
+ while (quicklistNext(iter, &entry)) {
+ /* Result must be: abc, foo, foobar, foobared, zap, test,
+ * foo */
+ if (strncmp((char *)entry.value, resultB[resB - 1 - i],
+ entry.sz)) {
+ ERR("No match at position %d, got %.*s instead of %s",
+ i, entry.sz, entry.value, resultB[resB - 1 - i]);
+ ok = 0;
+ }
+ i++;
+ }
+
+ quicklistReleaseIterator(iter);
+ /* final result of all tests */
+ if (ok)
+ OK;
+ quicklistRelease(ql);
+ }
+ }
+
+ for (int f = optimize_start; f < 16; f++) {
+ TEST_DESC("iterate reverse + delete at fill %d at compress %d", f,
+ options[_i]) {
+ quicklist *ql = quicklistNew(f, options[_i]);
+ quicklistPushTail(ql, "abc", 3);
+ quicklistPushTail(ql, "def", 3);
+ quicklistPushTail(ql, "hij", 3);
+ quicklistPushTail(ql, "jkl", 3);
+ quicklistPushTail(ql, "oop", 3);
+
+ quicklistEntry entry;
+ quicklistIter *iter = quicklistGetIterator(ql, AL_START_TAIL);
+ int i = 0;
+ while (quicklistNext(iter, &entry)) {
+ if (quicklistCompare(entry.zi, (unsigned char *)"hij", 3)) {
+ quicklistDelEntry(iter, &entry);
+ }
+ i++;
+ }
+ quicklistReleaseIterator(iter);
+
+ if (i != 5)
+ ERR("Didn't iterate 5 times, iterated %d times.", i);
+
+ /* Check results after deletion of "hij" */
+ iter = quicklistGetIterator(ql, AL_START_HEAD);
+ i = 0;
+ char *vals[] = {"abc", "def", "jkl", "oop"};
+ while (quicklistNext(iter, &entry)) {
+ if (!quicklistCompare(entry.zi, (unsigned char *)vals[i],
+ 3)) {
+ ERR("Value at %d didn't match %s\n", i, vals[i]);
+ }
+ i++;
+ }
+ quicklistReleaseIterator(iter);
+ quicklistRelease(ql);
+ }
+ }
+
+ for (int f = optimize_start; f < 800; f++) {
+ TEST_DESC("iterator at index test at fill %d at compress %d", f,
+ options[_i]) {
+ quicklist *ql = quicklistNew(f, options[_i]);
+ char num[32];
+ long long nums[5000];
+ for (int i = 0; i < 760; i++) {
+ nums[i] = -5157318210846258176 + i;
+ int sz = ll2string(num, sizeof(num), nums[i]);
+ quicklistPushTail(ql, num, sz);
+ }
+
+ quicklistEntry entry;
+ quicklistIter *iter =
+ quicklistGetIteratorAtIdx(ql, AL_START_HEAD, 437);
+ int i = 437;
+ while (quicklistNext(iter, &entry)) {
+ if (entry.longval != nums[i])
+ ERR("Expected %lld, but got %lld", entry.longval,
+ nums[i]);
+ i++;
+ }
+ quicklistReleaseIterator(iter);
+ quicklistRelease(ql);
+ }
+ }
+
+ for (int f = optimize_start; f < 40; f++) {
+ TEST_DESC("ltrim test A at fill %d at compress %d", f,
+ options[_i]) {
+ quicklist *ql = quicklistNew(f, options[_i]);
+ char num[32];
+ long long nums[5000];
+ for (int i = 0; i < 32; i++) {
+ nums[i] = -5157318210846258176 + i;
+ int sz = ll2string(num, sizeof(num), nums[i]);
+ quicklistPushTail(ql, num, sz);
+ }
+ if (f == 32)
+ ql_verify(ql, 1, 32, 32, 32);
+ /* ltrim 25 53 (keep [25,32] inclusive = 7 remaining) */
+ quicklistDelRange(ql, 0, 25);
+ quicklistDelRange(ql, 0, 0);
+ quicklistEntry entry;
+ for (int i = 0; i < 7; i++) {
+ quicklistIndex(ql, i, &entry);
+ if (entry.longval != nums[25 + i])
+ ERR("Deleted invalid range! Expected %lld but got "
+ "%lld",
+ entry.longval, nums[25 + i]);
+ }
+ if (f == 32)
+ ql_verify(ql, 1, 7, 7, 7);
+ quicklistRelease(ql);
+ }
+ }
+
+ for (int f = optimize_start; f < 40; f++) {
+ TEST_DESC("ltrim test B at fill %d at compress %d", f,
+ options[_i]) {
+ /* Force-disable compression because our 33 sequential
+ * integers don't compress and the check always fails. */
+ quicklist *ql = quicklistNew(f, QUICKLIST_NOCOMPRESS);
+ char num[32];
+ long long nums[5000];
+ for (int i = 0; i < 33; i++) {
+ nums[i] = i;
+ int sz = ll2string(num, sizeof(num), nums[i]);
+ quicklistPushTail(ql, num, sz);
+ }
+ if (f == 32)
+ ql_verify(ql, 2, 33, 32, 1);
+ /* ltrim 5 16 (keep [5,16] inclusive = 12 remaining) */
+ quicklistDelRange(ql, 0, 5);
+ quicklistDelRange(ql, -16, 16);
+ if (f == 32)
+ ql_verify(ql, 1, 12, 12, 12);
+ quicklistEntry entry;
+ quicklistIndex(ql, 0, &entry);
+ if (entry.longval != 5)
+ ERR("A: longval not 5, but %lld", entry.longval);
+ else
+ OK;
+ quicklistIndex(ql, -1, &entry);
+ if (entry.longval != 16)
+ ERR("B! got instead: %lld", entry.longval);
+ else
+ OK;
+ quicklistPushTail(ql, "bobobob", 7);
+ quicklistIndex(ql, -1, &entry);
+ if (strncmp((char *)entry.value, "bobobob", 7))
+ ERR("Tail doesn't match bobobob, it's %.*s instead",
+ entry.sz, entry.value);
+ for (int i = 0; i < 12; i++) {
+ quicklistIndex(ql, i, &entry);
+ if (entry.longval != nums[5 + i])
+ ERR("Deleted invalid range! Expected %lld but got "
+ "%lld",
+ entry.longval, nums[5 + i]);
+ }
+ quicklistRelease(ql);
+ }
+ }
+
+ for (int f = optimize_start; f < 40; f++) {
+ TEST_DESC("ltrim test C at fill %d at compress %d", f,
+ options[_i]) {
+ quicklist *ql = quicklistNew(f, options[_i]);
+ char num[32];
+ long long nums[5000];
+ for (int i = 0; i < 33; i++) {
+ nums[i] = -5157318210846258176 + i;
+ int sz = ll2string(num, sizeof(num), nums[i]);
+ quicklistPushTail(ql, num, sz);
+ }
+ if (f == 32)
+ ql_verify(ql, 2, 33, 32, 1);
+ /* ltrim 3 3 (keep [3,3] inclusive = 1 remaining) */
+ quicklistDelRange(ql, 0, 3);
+ quicklistDelRange(ql, -29,
+ 4000); /* make sure not loop forever */
+ if (f == 32)
+ ql_verify(ql, 1, 1, 1, 1);
+ quicklistEntry entry;
+ quicklistIndex(ql, 0, &entry);
+ if (entry.longval != -5157318210846258173)
+ ERROR;
+ else
+ OK;
+ quicklistRelease(ql);
+ }
+ }
+
+ for (int f = optimize_start; f < 40; f++) {
+ TEST_DESC("ltrim test D at fill %d at compress %d", f,
+ options[_i]) {
+ quicklist *ql = quicklistNew(f, options[_i]);
+ char num[32];
+ long long nums[5000];
+ for (int i = 0; i < 33; i++) {
+ nums[i] = -5157318210846258176 + i;
+ int sz = ll2string(num, sizeof(num), nums[i]);
+ quicklistPushTail(ql, num, sz);
+ }
+ if (f == 32)
+ ql_verify(ql, 2, 33, 32, 1);
+ quicklistDelRange(ql, -12, 3);
+ if (ql->count != 30)
+ ERR("Didn't delete exactly three elements! Count is: %lu",
+ ql->count);
+ quicklistRelease(ql);
+ }
+ }
+
+ for (int f = optimize_start; f < 72; f++) {
+ TEST_DESC("create quicklist from ziplist at fill %d at compress %d",
+ f, options[_i]) {
+ unsigned char *zl = ziplistNew();
+ long long nums[64];
+ char num[64];
+ for (int i = 0; i < 33; i++) {
+ nums[i] = -5157318210846258176 + i;
+ int sz = ll2string(num, sizeof(num), nums[i]);
+ zl =
+ ziplistPush(zl, (unsigned char *)num, sz, ZIPLIST_TAIL);
+ }
+ for (int i = 0; i < 33; i++) {
+ zl = ziplistPush(zl, (unsigned char *)genstr("hello", i),
+ 32, ZIPLIST_TAIL);
+ }
+ quicklist *ql = quicklistCreateFromZiplist(f, options[_i], zl);
+ if (f == 1)
+ ql_verify(ql, 66, 66, 1, 1);
+ else if (f == 32)
+ ql_verify(ql, 3, 66, 32, 2);
+ else if (f == 66)
+ ql_verify(ql, 1, 66, 66, 66);
+ quicklistRelease(ql);
+ }
+ }
+
+ long long stop = mstime();
+ runtime[_i] = stop - start;
+ }
+
+ /* Run a longer test of compression depth outside of primary test loop. */
+ int list_sizes[] = {250, 251, 500, 999, 1000};
+ long long start = mstime();
+ for (int list = 0; list < (int)(sizeof(list_sizes) / sizeof(*list_sizes));
+ list++) {
+ for (int f = optimize_start; f < 128; f++) {
+ for (int depth = 1; depth < 40; depth++) {
+ /* skip over many redundant test cases */
+ TEST_DESC("verify specific compression of interior nodes with "
+ "%d list "
+ "at fill %d at compress %d",
+ list_sizes[list], f, depth) {
+ quicklist *ql = quicklistNew(f, depth);
+ for (int i = 0; i < list_sizes[list]; i++) {
+ quicklistPushTail(ql, genstr("hello TAIL", i + 1), 64);
+ quicklistPushHead(ql, genstr("hello HEAD", i + 1), 64);
+ }
+
+ quicklistNode *node = ql->head;
+ unsigned int low_raw = ql->compress;
+ unsigned int high_raw = ql->len - ql->compress;
+
+ for (unsigned int at = 0; at < ql->len;
+ at++, node = node->next) {
+ if (at < low_raw || at >= high_raw) {
+ if (node->encoding != QUICKLIST_NODE_ENCODING_RAW) {
+ ERR("Incorrect compression: node %d is "
+ "compressed at depth %d ((%u, %u); total "
+ "nodes: %u; size: %u)",
+ at, depth, low_raw, high_raw, ql->len,
+ node->sz);
+ }
+ } else {
+ if (node->encoding != QUICKLIST_NODE_ENCODING_LZF) {
+ ERR("Incorrect non-compression: node %d is NOT "
+ "compressed at depth %d ((%u, %u); total "
+ "nodes: %u; size: %u; attempted: %d)",
+ at, depth, low_raw, high_raw, ql->len,
+ node->sz, node->attempted_compress);
+ }
+ }
+ }
+ quicklistRelease(ql);
+ }
+ }
+ }
+ }
+ long long stop = mstime();
+
+ printf("\n");
+ for (size_t i = 0; i < option_count; i++)
+ printf("Test Loop %02d: %0.2f seconds.\n", options[i],
+ (float)runtime[i] / 1000);
+ printf("Compressions: %0.2f seconds.\n", (float)(stop - start) / 1000);
+ printf("\n");
+
+ if (!err)
+ printf("ALL TESTS PASSED!\n");
+ else
+ ERR("Sorry, not all tests passed! In fact, %d tests failed.", err);
+
+ return err;
+}
+#endif
diff --git a/src/quicklist.h b/src/quicklist.h
new file mode 100644
index 000000000..5c9530ccd
--- /dev/null
+++ b/src/quicklist.h
@@ -0,0 +1,169 @@
+/* quicklist.h - A generic doubly linked quicklist implementation
+ *
+ * Copyright (c) 2014, Matt Stancliff <matt@genges.com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this quicklist of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this quicklist of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Redis nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __QUICKLIST_H__
+#define __QUICKLIST_H__
+
+/* Node, quicklist, and Iterator are the only data structures used currently. */
+
+/* quicklistNode is a 32 byte struct describing a ziplist for a quicklist.
+ * We use bit fields keep the quicklistNode at 32 bytes.
+ * count: 16 bits, max 65536 (max zl bytes is 65k, so max count actually < 32k).
+ * encoding: 2 bits, RAW=1, LZF=2.
+ * container: 2 bits, NONE=1, ZIPLIST=2.
+ * recompress: 1 bit, bool, true if node is temporarry decompressed for usage.
+ * attempted_compress: 1 bit, boolean, used for verifying during testing.
+ * extra: 12 bits, free for future use; pads out the remainder of 32 bits */
+typedef struct quicklistNode {
+ struct quicklistNode *prev;
+ struct quicklistNode *next;
+ unsigned char *zl;
+ unsigned int sz; /* ziplist size in bytes */
+ unsigned int count : 16; /* count of items in ziplist */
+ unsigned int encoding : 2; /* RAW==1 or LZF==2 */
+ unsigned int container : 2; /* NONE==1 or ZIPLIST==2 */
+ unsigned int recompress : 1; /* was this node previous compressed? */
+ unsigned int attempted_compress : 1; /* node can't compress; too small */
+ unsigned int extra : 10; /* more bits to steal for future usage */
+} quicklistNode;
+
+/* quicklistLZF is a 4+N byte struct holding 'sz' followed by 'compressed'.
+ * 'sz' is byte length of 'compressed' field.
+ * 'compressed' is LZF data with total (compressed) length 'sz'
+ * NOTE: uncompressed length is stored in quicklistNode->sz.
+ * When quicklistNode->zl is compressed, node->zl points to a quicklistLZF */
+typedef struct quicklistLZF {
+ unsigned int sz; /* LZF size in bytes*/
+ char compressed[];
+} quicklistLZF;
+
+/* quicklist is a 32 byte struct (on 64-bit systems) describing a quicklist.
+ * 'count' is the number of total entries.
+ * 'len' is the number of quicklist nodes.
+ * 'compress' is: -1 if compression disabled, otherwise it's the number
+ * of quicklistNodes to leave uncompressed at ends of quicklist.
+ * 'fill' is the user-requested (or default) fill factor. */
+typedef struct quicklist {
+ quicklistNode *head;
+ quicklistNode *tail;
+ unsigned long count; /* total count of all entries in all ziplists */
+ unsigned int len; /* number of quicklistNodes */
+ int fill : 16; /* fill factor for individual nodes */
+ unsigned int compress : 16; /* depth of end nodes not to compress;0=off */
+} quicklist;
+
+typedef struct quicklistIter {
+ const quicklist *quicklist;
+ quicklistNode *current;
+ unsigned char *zi;
+ long offset; /* offset in current ziplist */
+ int direction;
+} quicklistIter;
+
+typedef struct quicklistEntry {
+ const quicklist *quicklist;
+ quicklistNode *node;
+ unsigned char *zi;
+ unsigned char *value;
+ unsigned int sz;
+ long long longval;
+ int offset;
+} quicklistEntry;
+
+#define QUICKLIST_HEAD 0
+#define QUICKLIST_TAIL -1
+
+/* quicklist node encodings */
+#define QUICKLIST_NODE_ENCODING_RAW 1
+#define QUICKLIST_NODE_ENCODING_LZF 2
+
+/* quicklist compression disable */
+#define QUICKLIST_NOCOMPRESS 0
+
+/* quicklist container formats */
+#define QUICKLIST_NODE_CONTAINER_NONE 1
+#define QUICKLIST_NODE_CONTAINER_ZIPLIST 2
+
+#define quicklistNodeIsCompressed(node) \
+ ((node)->encoding == QUICKLIST_NODE_ENCODING_LZF)
+
+/* Prototypes */
+quicklist *quicklistCreate(void);
+quicklist *quicklistNew(int fill, int compress);
+void quicklistSetCompressDepth(quicklist *quicklist, int depth);
+void quicklistSetFill(quicklist *quicklist, int fill);
+void quicklistSetOptions(quicklist *quicklist, int fill, int depth);
+void quicklistRelease(quicklist *quicklist);
+int quicklistPushHead(quicklist *quicklist, void *value, const size_t sz);
+int quicklistPushTail(quicklist *quicklist, void *value, const size_t sz);
+void quicklistPush(quicklist *quicklist, void *value, const size_t sz,
+ int where);
+void quicklistAppendZiplist(quicklist *quicklist, unsigned char *zl);
+quicklist *quicklistAppendValuesFromZiplist(quicklist *quicklist,
+ unsigned char *zl);
+quicklist *quicklistCreateFromZiplist(int fill, int compress,
+ unsigned char *zl);
+void quicklistInsertAfter(quicklist *quicklist, quicklistEntry *node,
+ void *value, const size_t sz);
+void quicklistInsertBefore(quicklist *quicklist, quicklistEntry *node,
+ void *value, const size_t sz);
+void quicklistDelEntry(quicklistIter *iter, quicklistEntry *entry);
+int quicklistReplaceAtIndex(quicklist *quicklist, long index, void *data,
+ int sz);
+int quicklistDelRange(quicklist *quicklist, const long start, const long stop);
+quicklistIter *quicklistGetIterator(const quicklist *quicklist, int direction);
+quicklistIter *quicklistGetIteratorAtIdx(const quicklist *quicklist,
+ int direction, const long long idx);
+int quicklistNext(quicklistIter *iter, quicklistEntry *node);
+void quicklistReleaseIterator(quicklistIter *iter);
+quicklist *quicklistDup(quicklist *orig);
+int quicklistIndex(const quicklist *quicklist, const long long index,
+ quicklistEntry *entry);
+void quicklistRewind(quicklist *quicklist, quicklistIter *li);
+void quicklistRewindTail(quicklist *quicklist, quicklistIter *li);
+void quicklistRotate(quicklist *quicklist);
+int quicklistPopCustom(quicklist *quicklist, int where, unsigned char **data,
+ unsigned int *sz, long long *sval,
+ void *(*saver)(unsigned char *data, unsigned int sz));
+int quicklistPop(quicklist *quicklist, int where, unsigned char **data,
+ unsigned int *sz, long long *slong);
+unsigned int quicklistCount(quicklist *ql);
+int quicklistCompare(unsigned char *p1, unsigned char *p2, int p2_len);
+size_t quicklistGetLzf(const quicklistNode *node, void **data);
+
+#ifdef REDIS_TEST
+int quicklistTest(int argc, char *argv[]);
+#endif
+
+/* Directions for iterators */
+#define AL_START_HEAD 0
+#define AL_START_TAIL 1
+
+#endif /* __QUICKLIST_H__ */
diff --git a/src/rdb.c b/src/rdb.c
index d2fd405db..caa06db87 100644
--- a/src/rdb.c
+++ b/src/rdb.c
@@ -40,6 +40,20 @@
#include <arpa/inet.h>
#include <sys/stat.h>
+#define RDB_LOAD_NONE 0
+#define RDB_LOAD_ENC (1<<0)
+#define RDB_LOAD_PLAIN (1<<1)
+
+#define rdbExitReportCorruptRDB(reason) rdbCheckThenExit(reason, __LINE__);
+
+void rdbCheckThenExit(char *reason, int where) {
+ redisLog(REDIS_WARNING, "Corrupt RDB detected at rdb.c:%d (%s). "
+ "Running 'redis-check-rdb %s'",
+ where, reason, server.rdb_filename);
+ redis_check_rdb(server.rdb_filename);
+ exit(1);
+}
+
static int rdbWriteRaw(rio *rdb, void *p, size_t len) {
if (rdb && rioWrite(rdb,p,len) == 0)
return -1;
@@ -161,9 +175,11 @@ int rdbEncodeInteger(long long value, unsigned char *enc) {
}
/* Loads an integer-encoded object with the specified encoding type "enctype".
- * If the "encode" argument is set the function may return an integer-encoded
- * string object, otherwise it always returns a raw string object. */
-robj *rdbLoadIntegerObject(rio *rdb, int enctype, int encode) {
+ * The returned value changes according to the flags, see
+ * rdbGenerincLoadStringObject() for more info. */
+void *rdbLoadIntegerObject(rio *rdb, int enctype, int flags) {
+ int plain = flags & RDB_LOAD_PLAIN;
+ int encode = flags & RDB_LOAD_ENC;
unsigned char enc[4];
long long val;
@@ -182,12 +198,19 @@ robj *rdbLoadIntegerObject(rio *rdb, int enctype, int encode) {
val = (int32_t)v;
} else {
val = 0; /* anti-warning */
- redisPanic("Unknown RDB integer encoding type");
+ rdbExitReportCorruptRDB("Unknown RDB integer encoding type");
}
- if (encode)
+ if (plain) {
+ char buf[REDIS_LONGSTR_SIZE], *p;
+ int len = ll2string(buf,sizeof(buf),val);
+ p = zmalloc(len);
+ memcpy(p,buf,len);
+ return p;
+ } else if (encode) {
return createStringObjectFromLongLong(val);
- else
+ } else {
return createObject(REDIS_STRING,sdsfromlonglong(val));
+ }
}
/* String objects in the form "2391" "-100" without any space and with a
@@ -209,44 +232,54 @@ int rdbTryIntegerEncoding(char *s, size_t len, unsigned char *enc) {
return rdbEncodeInteger(value,enc);
}
-int rdbSaveLzfStringObject(rio *rdb, unsigned char *s, size_t len) {
- size_t comprlen, outlen;
+int rdbSaveLzfBlob(rio *rdb, void *data, size_t compress_len,
+ size_t original_len) {
unsigned char byte;
int n, nwritten = 0;
- void *out;
- /* We require at least four bytes compression for this to be worth it */
- if (len <= 4) return 0;
- outlen = len-4;
- if ((out = zmalloc(outlen+1)) == NULL) return 0;
- comprlen = lzf_compress(s, len, out, outlen);
- if (comprlen == 0) {
- zfree(out);
- return 0;
- }
/* Data compressed! Let's save it on disk */
byte = (REDIS_RDB_ENCVAL<<6)|REDIS_RDB_ENC_LZF;
if ((n = rdbWriteRaw(rdb,&byte,1)) == -1) goto writeerr;
nwritten += n;
- if ((n = rdbSaveLen(rdb,comprlen)) == -1) goto writeerr;
+ if ((n = rdbSaveLen(rdb,compress_len)) == -1) goto writeerr;
nwritten += n;
- if ((n = rdbSaveLen(rdb,len)) == -1) goto writeerr;
+ if ((n = rdbSaveLen(rdb,original_len)) == -1) goto writeerr;
nwritten += n;
- if ((n = rdbWriteRaw(rdb,out,comprlen)) == -1) goto writeerr;
+ if ((n = rdbWriteRaw(rdb,data,compress_len)) == -1) goto writeerr;
nwritten += n;
- zfree(out);
return nwritten;
writeerr:
- zfree(out);
return -1;
}
-robj *rdbLoadLzfStringObject(rio *rdb) {
+int rdbSaveLzfStringObject(rio *rdb, unsigned char *s, size_t len) {
+ size_t comprlen, outlen;
+ void *out;
+
+ /* We require at least four bytes compression for this to be worth it */
+ if (len <= 4) return 0;
+ outlen = len-4;
+ if ((out = zmalloc(outlen+1)) == NULL) return 0;
+ comprlen = lzf_compress(s, len, out, outlen);
+ if (comprlen == 0) {
+ zfree(out);
+ return 0;
+ }
+ size_t nwritten = rdbSaveLzfBlob(rdb, out, comprlen, len);
+ zfree(out);
+ return nwritten;
+}
+
+/* Load an LZF compressed string in RDB format. The returned value
+ * changes according to 'flags'. For more info check the
+ * rdbGenericLoadStringObject() function. */
+void *rdbLoadLzfStringObject(rio *rdb, int flags) {
+ int plain = flags & RDB_LOAD_PLAIN;
unsigned int len, clen;
unsigned char *c = NULL;
sds val = NULL;
@@ -254,14 +287,29 @@ robj *rdbLoadLzfStringObject(rio *rdb) {
if ((clen = rdbLoadLen(rdb,NULL)) == REDIS_RDB_LENERR) return NULL;
if ((len = rdbLoadLen(rdb,NULL)) == REDIS_RDB_LENERR) return NULL;
if ((c = zmalloc(clen)) == NULL) goto err;
- if ((val = sdsnewlen(NULL,len)) == NULL) goto err;
+
+ /* Allocate our target according to the uncompressed size. */
+ if (plain) {
+ val = zmalloc(len);
+ } else {
+ if ((val = sdsnewlen(NULL,len)) == NULL) goto err;
+ }
+
+ /* Load the compressed representation and uncompress it to target. */
if (rioRead(rdb,c,clen) == 0) goto err;
if (lzf_decompress(c,clen,val,len) == 0) goto err;
zfree(c);
- return createObject(REDIS_STRING,val);
+
+ if (plain)
+ return val;
+ else
+ return createObject(REDIS_STRING,val);
err:
zfree(c);
- sdsfree(val);
+ if (plain)
+ zfree(val);
+ else
+ sdsfree(val);
return NULL;
}
@@ -330,10 +378,21 @@ int rdbSaveStringObject(rio *rdb, robj *obj) {
}
}
-robj *rdbGenericLoadStringObject(rio *rdb, int encode) {
+/* Load a string object from an RDB file according to flags:
+ *
+ * RDB_LOAD_NONE (no flags): load an RDB object, unencoded.
+ * RDB_LOAD_ENC: If the returned type is a Redis object, try to
+ * encode it in a special way to be more memory
+ * efficient. When this flag is passed the function
+ * no longer guarantees that obj->ptr is an SDS string.
+ * RDB_LOAD_PLAIN: Return a plain string allocated with zmalloc()
+ * instead of a Redis object.
+ */
+void *rdbGenericLoadStringObject(rio *rdb, int flags) {
+ int encode = flags & RDB_LOAD_ENC;
+ int plain = flags & RDB_LOAD_PLAIN;
int isencoded;
uint32_t len;
- robj *o;
len = rdbLoadLen(rdb,&isencoded);
if (isencoded) {
@@ -341,30 +400,39 @@ robj *rdbGenericLoadStringObject(rio *rdb, int encode) {
case REDIS_RDB_ENC_INT8:
case REDIS_RDB_ENC_INT16:
case REDIS_RDB_ENC_INT32:
- return rdbLoadIntegerObject(rdb,len,encode);
+ return rdbLoadIntegerObject(rdb,len,flags);
case REDIS_RDB_ENC_LZF:
- return rdbLoadLzfStringObject(rdb);
+ return rdbLoadLzfStringObject(rdb,flags);
default:
- redisPanic("Unknown RDB encoding type");
+ rdbExitReportCorruptRDB("Unknown RDB encoding type");
}
}
if (len == REDIS_RDB_LENERR) return NULL;
- o = encode ? createStringObject(NULL,len) :
- createRawStringObject(NULL,len);
- if (len && rioRead(rdb,o->ptr,len) == 0) {
- decrRefCount(o);
- return NULL;
+ if (!plain) {
+ robj *o = encode ? createStringObject(NULL,len) :
+ createRawStringObject(NULL,len);
+ if (len && rioRead(rdb,o->ptr,len) == 0) {
+ decrRefCount(o);
+ return NULL;
+ }
+ return o;
+ } else {
+ void *buf = zmalloc(len);
+ if (len && rioRead(rdb,buf,len) == 0) {
+ zfree(buf);
+ return NULL;
+ }
+ return buf;
}
- return o;
}
robj *rdbLoadStringObject(rio *rdb) {
- return rdbGenericLoadStringObject(rdb,0);
+ return rdbGenericLoadStringObject(rdb,RDB_LOAD_NONE);
}
robj *rdbLoadEncodedStringObject(rio *rdb) {
- return rdbGenericLoadStringObject(rdb,1);
+ return rdbGenericLoadStringObject(rdb,RDB_LOAD_ENC);
}
/* Save a double value. Doubles are saved as strings prefixed by an unsigned
@@ -433,10 +501,8 @@ int rdbSaveObjectType(rio *rdb, robj *o) {
case REDIS_STRING:
return rdbSaveType(rdb,REDIS_RDB_TYPE_STRING);
case REDIS_LIST:
- if (o->encoding == REDIS_ENCODING_ZIPLIST)
- return rdbSaveType(rdb,REDIS_RDB_TYPE_LIST_ZIPLIST);
- else if (o->encoding == REDIS_ENCODING_LINKEDLIST)
- return rdbSaveType(rdb,REDIS_RDB_TYPE_LIST);
+ if (o->encoding == REDIS_ENCODING_QUICKLIST)
+ return rdbSaveType(rdb,REDIS_RDB_TYPE_LIST_QUICKLIST);
else
redisPanic("Unknown list encoding");
case REDIS_SET:
@@ -477,7 +543,7 @@ int rdbLoadObjectType(rio *rdb) {
/* Save a Redis object. Returns -1 on error, number of bytes written on success. */
int rdbSaveObject(rio *rdb, robj *o) {
- int n, nwritten = 0;
+ int n = 0, nwritten = 0;
if (o->type == REDIS_STRING) {
/* Save a string value */
@@ -485,25 +551,24 @@ int rdbSaveObject(rio *rdb, robj *o) {
nwritten += n;
} else if (o->type == REDIS_LIST) {
/* Save a list value */
- if (o->encoding == REDIS_ENCODING_ZIPLIST) {
- size_t l = ziplistBlobLen((unsigned char*)o->ptr);
+ if (o->encoding == REDIS_ENCODING_QUICKLIST) {
+ quicklist *ql = o->ptr;
+ quicklistNode *node = ql->head;
- if ((n = rdbSaveRawString(rdb,o->ptr,l)) == -1) return -1;
+ if ((n = rdbSaveLen(rdb,ql->len)) == -1) return -1;
nwritten += n;
- } else if (o->encoding == REDIS_ENCODING_LINKEDLIST) {
- list *list = o->ptr;
- listIter li;
- listNode *ln;
- if ((n = rdbSaveLen(rdb,listLength(list))) == -1) return -1;
- nwritten += n;
-
- listRewind(list,&li);
- while((ln = listNext(&li))) {
- robj *eleobj = listNodeValue(ln);
- if ((n = rdbSaveStringObject(rdb,eleobj)) == -1) return -1;
- nwritten += n;
- }
+ do {
+ if (quicklistNodeIsCompressed(node)) {
+ void *data;
+ size_t compress_len = quicklistGetLzf(node, &data);
+ if ((n = rdbSaveLzfBlob(rdb,data,compress_len,node->sz)) == -1) return -1;
+ nwritten += n;
+ } else {
+ if ((n = rdbSaveRawString(rdb,node->zl,node->sz)) == -1) return -1;
+ nwritten += n;
+ }
+ } while ((node = node->next));
} else {
redisPanic("Unknown list encoding");
}
@@ -627,6 +692,39 @@ int rdbSaveKeyValuePair(rio *rdb, robj *key, robj *val,
return 1;
}
+/* Save an AUX field. */
+int rdbSaveAuxField(rio *rdb, void *key, size_t keylen, void *val, size_t vallen) {
+ if (rdbSaveType(rdb,REDIS_RDB_OPCODE_AUX) == -1) return -1;
+ if (rdbSaveRawString(rdb,key,keylen) == -1) return -1;
+ if (rdbSaveRawString(rdb,val,vallen) == -1) return -1;
+ return 1;
+}
+
+/* Wrapper for rdbSaveAuxField() used when key/val length can be obtained
+ * with strlen(). */
+int rdbSaveAuxFieldStrStr(rio *rdb, char *key, char *val) {
+ return rdbSaveAuxField(rdb,key,strlen(key),val,strlen(val));
+}
+
+/* Wrapper for strlen(key) + integer type (up to long long range). */
+int rdbSaveAuxFieldStrInt(rio *rdb, char *key, long long val) {
+ char buf[REDIS_LONGSTR_SIZE];
+ int vlen = ll2string(buf,sizeof(buf),val);
+ return rdbSaveAuxField(rdb,key,strlen(key),buf,vlen);
+}
+
+/* Save a few default AUX fields with information about the RDB generated. */
+int rdbSaveInfoAuxFields(rio *rdb) {
+ int redis_bits = (sizeof(void*) == 8) ? 64 : 32;
+
+ /* Add a few fields about the state when the RDB was created. */
+ if (rdbSaveAuxFieldStrStr(rdb,"redis-ver",REDIS_VERSION) == -1) return -1;
+ if (rdbSaveAuxFieldStrInt(rdb,"redis-bits",redis_bits) == -1) return -1;
+ if (rdbSaveAuxFieldStrInt(rdb,"ctime",time(NULL)) == -1) return -1;
+ if (rdbSaveAuxFieldStrInt(rdb,"used-mem",zmalloc_used_memory()) == -1) return -1;
+ return 1;
+}
+
/* Produces a dump of the database in RDB format sending it to the specified
* Redis I/O channel. On success REDIS_OK is returned, otherwise REDIS_ERR
* is returned and part of the output, or all the output, can be
@@ -647,6 +745,7 @@ int rdbSaveRio(rio *rdb, int *error) {
rdb->update_cksum = rioGenericUpdateChecksum;
snprintf(magic,sizeof(magic),"REDIS%04d",REDIS_RDB_VERSION);
if (rdbWriteRaw(rdb,magic,9) == -1) goto werr;
+ if (rdbSaveInfoAuxFields(rdb) == -1) goto werr;
for (j = 0; j < server.dbnum; j++) {
redisDb *db = server.db+j;
@@ -659,6 +758,21 @@ int rdbSaveRio(rio *rdb, int *error) {
if (rdbSaveType(rdb,REDIS_RDB_OPCODE_SELECTDB) == -1) goto werr;
if (rdbSaveLen(rdb,j) == -1) goto werr;
+ /* Write the RESIZE DB opcode. We trim the size to UINT32_MAX, which
+ * is currently the largest type we are able to represent in RDB sizes.
+ * However this does not limit the actual size of the DB to load since
+ * these sizes are just hints to resize the hash tables. */
+ uint32_t db_size, expires_size;
+ db_size = (dictSize(db->dict) <= UINT32_MAX) ?
+ dictSize(db->dict) :
+ UINT32_MAX;
+ expires_size = (dictSize(db->dict) <= UINT32_MAX) ?
+ dictSize(db->expires) :
+ UINT32_MAX;
+ if (rdbSaveType(rdb,REDIS_RDB_OPCODE_RESIZEDB) == -1) goto werr;
+ if (rdbSaveLen(rdb,db_size) == -1) goto werr;
+ if (rdbSaveLen(rdb,expires_size) == -1) goto werr;
+
/* Iterate this DB writing every entry */
while((de = dictNext(di)) != NULL) {
sds keystr = dictGetKey(de);
@@ -720,7 +834,7 @@ int rdbSave(char *filename) {
char tmpfile[256];
FILE *fp;
rio rdb;
- int error;
+ int error = 0;
snprintf(tmpfile,256,"temp-%d.rdb", (int) getpid());
fp = fopen(tmpfile,"w");
@@ -819,7 +933,7 @@ void rdbRemoveTempFile(pid_t childpid) {
/* Load a Redis object of the specified type from the specified file.
* On success a newly allocated object is returned, otherwise NULL. */
robj *rdbLoadObject(int rdbtype, rio *rdb) {
- robj *o, *ele, *dec;
+ robj *o = NULL, *ele, *dec;
size_t len;
unsigned int i;
@@ -831,33 +945,18 @@ robj *rdbLoadObject(int rdbtype, rio *rdb) {
/* Read list value */
if ((len = rdbLoadLen(rdb,NULL)) == REDIS_RDB_LENERR) return NULL;
- /* Use a real list when there are too many entries */
- if (len > server.list_max_ziplist_entries) {
- o = createListObject();
- } else {
- o = createZiplistObject();
- }
+ o = createQuicklistObject();
+ quicklistSetOptions(o->ptr, server.list_max_ziplist_size,
+ server.list_compress_depth);
/* Load every single element of the list */
while(len--) {
if ((ele = rdbLoadEncodedStringObject(rdb)) == NULL) return NULL;
-
- /* If we are using a ziplist and the value is too big, convert
- * the object to a real list. */
- if (o->encoding == REDIS_ENCODING_ZIPLIST &&
- sdsEncodedObject(ele) &&
- sdslen(ele->ptr) > server.list_max_ziplist_value)
- listTypeConvert(o,REDIS_ENCODING_LINKEDLIST);
-
- if (o->encoding == REDIS_ENCODING_ZIPLIST) {
- dec = getDecodedObject(ele);
- o->ptr = ziplistPush(o->ptr,dec->ptr,sdslen(dec->ptr),REDIS_TAIL);
- decrRefCount(dec);
- decrRefCount(ele);
- } else {
- ele = tryObjectEncoding(ele);
- listAddNodeTail(o->ptr,ele);
- }
+ dec = getDecodedObject(ele);
+ size_t len = sdslen(dec->ptr);
+ quicklistPushTail(o->ptr, dec->ptr, len);
+ decrRefCount(dec);
+ decrRefCount(ele);
}
} else if (rdbtype == REDIS_RDB_TYPE_SET) {
/* Read list/set value */
@@ -989,25 +1088,33 @@ robj *rdbLoadObject(int rdbtype, rio *rdb) {
/* Add pair to hash table */
ret = dictAdd((dict*)o->ptr, field, value);
- redisAssert(ret == DICT_OK);
+ if (ret == DICT_ERR) {
+ rdbExitReportCorruptRDB("Duplicate keys detected");
+ }
}
/* All pairs should be read by now */
redisAssert(len == 0);
-
+ } else if (rdbtype == REDIS_RDB_TYPE_LIST_QUICKLIST) {
+ if ((len = rdbLoadLen(rdb,NULL)) == REDIS_RDB_LENERR) return NULL;
+ o = createQuicklistObject();
+ quicklistSetOptions(o->ptr, server.list_max_ziplist_size,
+ server.list_compress_depth);
+
+ while (len--) {
+ unsigned char *zl = rdbGenericLoadStringObject(rdb,RDB_LOAD_PLAIN);
+ if (zl == NULL) return NULL;
+ quicklistAppendZiplist(o->ptr, zl);
+ }
} else if (rdbtype == REDIS_RDB_TYPE_HASH_ZIPMAP ||
rdbtype == REDIS_RDB_TYPE_LIST_ZIPLIST ||
rdbtype == REDIS_RDB_TYPE_SET_INTSET ||
rdbtype == REDIS_RDB_TYPE_ZSET_ZIPLIST ||
rdbtype == REDIS_RDB_TYPE_HASH_ZIPLIST)
{
- robj *aux = rdbLoadStringObject(rdb);
-
- if (aux == NULL) return NULL;
- o = createObject(REDIS_STRING,NULL); /* string is just placeholder */
- o->ptr = zmalloc(sdslen(aux->ptr));
- memcpy(o->ptr,aux->ptr,sdslen(aux->ptr));
- decrRefCount(aux);
+ unsigned char *encoded = rdbGenericLoadStringObject(rdb,RDB_LOAD_PLAIN);
+ if (encoded == NULL) return NULL;
+ o = createObject(REDIS_STRING,encoded); /* Obj type fixed below. */
/* Fix the object encoding, and make sure to convert the encoded
* data type into the base type if accordingly to the current
@@ -1048,8 +1155,7 @@ robj *rdbLoadObject(int rdbtype, rio *rdb) {
case REDIS_RDB_TYPE_LIST_ZIPLIST:
o->type = REDIS_LIST;
o->encoding = REDIS_ENCODING_ZIPLIST;
- if (ziplistLen(o->ptr) > server.list_max_ziplist_entries)
- listTypeConvert(o,REDIS_ENCODING_LINKEDLIST);
+ listTypeConvert(o,REDIS_ENCODING_QUICKLIST);
break;
case REDIS_RDB_TYPE_SET_INTSET:
o->type = REDIS_SET;
@@ -1070,11 +1176,11 @@ robj *rdbLoadObject(int rdbtype, rio *rdb) {
hashTypeConvert(o, REDIS_ENCODING_HT);
break;
default:
- redisPanic("Unknown encoding");
+ rdbExitReportCorruptRDB("Unknown encoding");
break;
}
} else {
- redisPanic("Unknown object type");
+ rdbExitReportCorruptRDB("Unknown object type");
}
return o;
}
@@ -1087,8 +1193,9 @@ void startLoading(FILE *fp) {
/* Load the DB */
server.loading = 1;
server.loading_start_time = time(NULL);
+ server.loading_loaded_bytes = 0;
if (fstat(fileno(fp), &sb) == -1) {
- server.loading_total_bytes = 1; /* just to avoid division by zero */
+ server.loading_total_bytes = 0;
} else {
server.loading_total_bytes = sb.st_size;
}
@@ -1162,7 +1269,12 @@ int rdbLoad(char *filename) {
/* Read type. */
if ((type = rdbLoadType(&rdb)) == -1) goto eoferr;
+
+ /* Handle special types. */
if (type == REDIS_RDB_OPCODE_EXPIRETIME) {
+ /* EXPIRETIME: load an expire associated with the next key
+ * to load. Note that after loading an expire we need to
+ * load the actual type, and continue. */
if ((expiretime = rdbLoadTime(&rdb)) == -1) goto eoferr;
/* We read the time so we need to read the object type again. */
if ((type = rdbLoadType(&rdb)) == -1) goto eoferr;
@@ -1170,27 +1282,67 @@ int rdbLoad(char *filename) {
* into milliseconds. */
expiretime *= 1000;
} else if (type == REDIS_RDB_OPCODE_EXPIRETIME_MS) {
- /* Milliseconds precision expire times introduced with RDB
- * version 3. */
+ /* EXPIRETIME_MS: milliseconds precision expire times introduced
+ * with RDB v3. Like EXPIRETIME but no with more precision. */
if ((expiretime = rdbLoadMillisecondTime(&rdb)) == -1) goto eoferr;
/* We read the time so we need to read the object type again. */
if ((type = rdbLoadType(&rdb)) == -1) goto eoferr;
- }
-
- if (type == REDIS_RDB_OPCODE_EOF)
+ } else if (type == REDIS_RDB_OPCODE_EOF) {
+ /* EOF: End of file, exit the main loop. */
break;
-
- /* Handle SELECT DB opcode as a special case */
- if (type == REDIS_RDB_OPCODE_SELECTDB) {
+ } else if (type == REDIS_RDB_OPCODE_SELECTDB) {
+ /* SELECTDB: Select the specified database. */
if ((dbid = rdbLoadLen(&rdb,NULL)) == REDIS_RDB_LENERR)
goto eoferr;
if (dbid >= (unsigned)server.dbnum) {
- redisLog(REDIS_WARNING,"FATAL: Data file was created with a Redis server configured to handle more than %d databases. Exiting\n", server.dbnum);
+ redisLog(REDIS_WARNING,
+ "FATAL: Data file was created with a Redis "
+ "server configured to handle more than %d "
+ "databases. Exiting\n", server.dbnum);
exit(1);
}
db = server.db+dbid;
- continue;
+ continue; /* Read type again. */
+ } else if (type == REDIS_RDB_OPCODE_RESIZEDB) {
+ /* RESIZEDB: Hint about the size of the keys in the currently
+ * selected data base, in order to avoid useless rehashing. */
+ uint32_t db_size, expires_size;
+ if ((db_size = rdbLoadLen(&rdb,NULL)) == REDIS_RDB_LENERR)
+ goto eoferr;
+ if ((expires_size = rdbLoadLen(&rdb,NULL)) == REDIS_RDB_LENERR)
+ goto eoferr;
+ dictExpand(db->dict,db_size);
+ dictExpand(db->expires,expires_size);
+ continue; /* Read type again. */
+ } else if (type == REDIS_RDB_OPCODE_AUX) {
+ /* AUX: generic string-string fields. Use to add state to RDB
+ * which is backward compatible. Implementations of RDB loading
+ * are requierd to skip AUX fields they don't understand.
+ *
+ * An AUX field is composed of two strings: key and value. */
+ robj *auxkey, *auxval;
+ if ((auxkey = rdbLoadStringObject(&rdb)) == NULL) goto eoferr;
+ if ((auxval = rdbLoadStringObject(&rdb)) == NULL) goto eoferr;
+
+ if (((char*)auxkey->ptr)[0] == '%') {
+ /* All the fields with a name staring with '%' are considered
+ * information fields and are logged at startup with a log
+ * level of NOTICE. */
+ redisLog(REDIS_NOTICE,"RDB '%s': %s",
+ (char*)auxkey->ptr,
+ (char*)auxval->ptr);
+ } else {
+ /* We ignore fields we don't understand, as by AUX field
+ * contract. */
+ redisLog(REDIS_DEBUG,"Unrecognized RDB AUX field: '%s'",
+ (char*)auxkey->ptr);
+ }
+
+ decrRefCount(auxkey);
+ decrRefCount(auxval);
+ continue; /* Read type again. */
}
+
/* Read key */
if ((key = rdbLoadStringObject(&rdb)) == NULL) goto eoferr;
/* Read value */
@@ -1223,7 +1375,7 @@ int rdbLoad(char *filename) {
redisLog(REDIS_WARNING,"RDB file was saved with checksum disabled: no check performed.");
} else if (cksum != expected) {
redisLog(REDIS_WARNING,"Wrong RDB checksum. Aborting now.");
- exit(1);
+ rdbExitReportCorruptRDB("RDB CRC error");
}
}
@@ -1233,7 +1385,7 @@ int rdbLoad(char *filename) {
eoferr: /* unexpected end of file is handled here with a fatal exit */
redisLog(REDIS_WARNING,"Short read or OOM loading DB. Unrecoverable error, aborting now.");
- exit(1);
+ rdbExitReportCorruptRDB("Unexpected EOF reading RDB file");
return REDIS_ERR; /* Just to avoid warning */
}
@@ -1491,7 +1643,9 @@ int rdbSaveToSlavesSockets(void) {
{
retval = REDIS_ERR;
}
+ zfree(msg);
}
+ zfree(clientids);
exitFromChild((retval == REDIS_OK) ? 0 : 1);
} else {
/* Parent */
diff --git a/src/rdb.h b/src/rdb.h
index eb40d4993..6319f5d02 100644
--- a/src/rdb.h
+++ b/src/rdb.h
@@ -38,7 +38,7 @@
/* The current RDB version. When the format changes in a way that is no longer
* backward compatible this number gets incremented. */
-#define REDIS_RDB_VERSION 6
+#define REDIS_RDB_VERSION 7
/* Defines related to the dump file format. To store 32 bits lengths for short
* keys requires a lot of space, so we check the most significant 2 bits of
@@ -74,6 +74,7 @@
#define REDIS_RDB_TYPE_SET 2
#define REDIS_RDB_TYPE_ZSET 3
#define REDIS_RDB_TYPE_HASH 4
+/* NOTE: WHEN ADDING NEW RDB TYPE, UPDATE rdbIsObjectType() BELOW */
/* Object types for encoded objects. */
#define REDIS_RDB_TYPE_HASH_ZIPMAP 9
@@ -81,11 +82,15 @@
#define REDIS_RDB_TYPE_SET_INTSET 11
#define REDIS_RDB_TYPE_ZSET_ZIPLIST 12
#define REDIS_RDB_TYPE_HASH_ZIPLIST 13
+#define REDIS_RDB_TYPE_LIST_QUICKLIST 14
+/* NOTE: WHEN ADDING NEW RDB TYPE, UPDATE rdbIsObjectType() BELOW */
/* Test if a type is an object type. */
-#define rdbIsObjectType(t) ((t >= 0 && t <= 4) || (t >= 9 && t <= 13))
+#define rdbIsObjectType(t) ((t >= 0 && t <= 4) || (t >= 9 && t <= 14))
/* Special RDB opcodes (saved/loaded with rdbSaveType/rdbLoadType). */
+#define REDIS_RDB_OPCODE_AUX 250
+#define REDIS_RDB_OPCODE_RESIZEDB 251
#define REDIS_RDB_OPCODE_EXPIRETIME_MS 252
#define REDIS_RDB_OPCODE_EXPIRETIME 253
#define REDIS_RDB_OPCODE_SELECTDB 254
diff --git a/src/redis-benchmark.c b/src/redis-benchmark.c
index 2e67f1021..7567e0181 100644
--- a/src/redis-benchmark.c
+++ b/src/redis-benchmark.c
@@ -90,9 +90,10 @@ typedef struct _client {
long long start; /* Start time of a request */
long long latency; /* Request latency */
int pending; /* Number of pending requests (replies to consume) */
- int selectlen; /* If non-zero, a SELECT of 'selectlen' bytes is currently
- used as a prefix of the pipline of commands. This gets
- discarded the first time it's sent. */
+ int prefix_pending; /* If non-zero, number of pending prefix commands. Commands
+ such as auth and select are prefixed to the pipeline of
+ benchmark commands and discarded after the first send. */
+ int prefixlen; /* Size in bytes of the pending prefix commands */
} *client;
/* Prototypes */
@@ -212,20 +213,21 @@ static void readHandler(aeEventLoop *el, int fd, void *privdata, int mask) {
}
freeReplyObject(reply);
-
- if (c->selectlen) {
- size_t j;
-
- /* This is the OK from SELECT. Just discard the SELECT
- * from the buffer. */
+ /* This is an OK for prefix commands such as auth and select.*/
+ if (c->prefix_pending > 0) {
+ c->prefix_pending--;
c->pending--;
- sdsrange(c->obuf,c->selectlen,-1);
- /* We also need to fix the pointers to the strings
- * we need to randomize. */
- for (j = 0; j < c->randlen; j++)
- c->randptr[j] -= c->selectlen;
- c->selectlen = 0;
- continue;
+ /* Discard prefix commands on first response.*/
+ if (c->prefixlen > 0) {
+ size_t j;
+ sdsrange(c->obuf, c->prefixlen, -1);
+ /* We also need to fix the pointers to the strings
+ * we need to randomize. */
+ for (j = 0; j < c->randlen; j++)
+ c->randptr[j] -= c->prefixlen;
+ c->prefixlen = 0;
+ }
+ continue;
}
if (config.requests_finished < config.requests)
@@ -299,8 +301,7 @@ static void writeHandler(aeEventLoop *el, int fd, void *privdata, int mask) {
* 2) The offsets of the __rand_int__ elements inside the command line, used
* for arguments randomization.
*
- * Even when cloning another client, the SELECT command is automatically prefixed
- * if needed. */
+ * Even when cloning another client, prefix commands are applied if needed.*/
static client createClient(char *cmd, size_t len, client from) {
int j;
client c = zmalloc(sizeof(struct _client));
@@ -325,12 +326,16 @@ static client createClient(char *cmd, size_t len, client from) {
* Queue N requests accordingly to the pipeline size, or simply clone
* the example client buffer. */
c->obuf = sdsempty();
-
+ /* Prefix the request buffer with AUTH and/or SELECT commands, if applicable.
+ * These commands are discarded after the first response, so if the client is
+ * reused the commands will not be used again. */
+ c->prefix_pending = 0;
if (config.auth) {
char *buf = NULL;
int len = redisFormatCommand(&buf, "AUTH %s", config.auth);
c->obuf = sdscatlen(c->obuf, buf, len);
free(buf);
+ c->prefix_pending++;
}
/* If a DB number different than zero is selected, prefix our request
@@ -340,26 +345,23 @@ static client createClient(char *cmd, size_t len, client from) {
if (config.dbnum != 0) {
c->obuf = sdscatprintf(c->obuf,"*2\r\n$6\r\nSELECT\r\n$%d\r\n%s\r\n",
(int)sdslen(config.dbnumstr),config.dbnumstr);
- c->selectlen = sdslen(c->obuf);
- } else {
- c->selectlen = 0;
+ c->prefix_pending++;
}
-
+ c->prefixlen = sdslen(c->obuf);
/* Append the request itself. */
if (from) {
c->obuf = sdscatlen(c->obuf,
- from->obuf+from->selectlen,
- sdslen(from->obuf)-from->selectlen);
+ from->obuf+from->prefixlen,
+ sdslen(from->obuf)-from->prefixlen);
} else {
for (j = 0; j < config.pipeline; j++)
c->obuf = sdscatlen(c->obuf,cmd,len);
}
c->written = 0;
- c->pending = config.pipeline;
+ c->pending = config.pipeline+c->prefix_pending;
c->randptr = NULL;
c->randlen = 0;
- if (c->selectlen) c->pending++;
/* Find substrings in the output buffer that need to be randomized. */
if (config.randomkeys) {
@@ -371,7 +373,7 @@ static client createClient(char *cmd, size_t len, client from) {
for (j = 0; j < (int)c->randlen; j++) {
c->randptr[j] = c->obuf + (from->randptr[j]-from->obuf);
/* Adjust for the different select prefix length. */
- c->randptr[j] += c->selectlen - from->selectlen;
+ c->randptr[j] += c->prefixlen - from->prefixlen;
}
} else {
char *p = c->obuf;
@@ -390,7 +392,8 @@ static client createClient(char *cmd, size_t len, client from) {
}
}
}
- aeCreateFileEvent(config.el,c->context->fd,AE_WRITABLE,writeHandler,c);
+ if (config.idlemode == 0)
+ aeCreateFileEvent(config.el,c->context->fd,AE_WRITABLE,writeHandler,c);
listAddNodeTail(config.clients,c);
config.liveclients++;
return c;
@@ -555,7 +558,7 @@ usage:
" -s <socket> Server socket (overrides host and port)\n"
" -a <password> Password for Redis Auth\n"
" -c <clients> Number of parallel connections (default 50)\n"
-" -n <requests> Total number of requests (default 10000)\n"
+" -n <requests> Total number of requests (default 100000)\n"
" -d <size> Data size of SET/GET value in bytes (default 2)\n"
" -dbnum <db> SELECT the specified db number (default 0)\n"
" -k <boolean> 1=keep alive 0=reconnect (default 1)\n"
@@ -600,8 +603,12 @@ int showThroughput(struct aeEventLoop *eventLoop, long long id, void *clientData
fprintf(stderr,"All clients disconnected... aborting.\n");
exit(1);
}
-
if (config.csv) return 250;
+ if (config.idlemode == 1) {
+ printf("clients: %d\r", config.liveclients);
+ fflush(stdout);
+ return 250;
+ }
float dt = (float)(mstime()-config.start)/1000.0;
float rps = (float)config.requests_finished/dt;
printf("%s: %.2f\r", config.title, rps);
@@ -635,7 +642,7 @@ int main(int argc, const char **argv) {
signal(SIGPIPE, SIG_IGN);
config.numclients = 50;
- config.requests = 10000;
+ config.requests = 100000;
config.liveclients = 0;
config.el = aeCreateEventLoop(1024*10);
aeCreateTimeEvent(config.el,1,showThroughput,NULL,NULL);
@@ -693,8 +700,8 @@ int main(int argc, const char **argv) {
}
/* Run default benchmark suite. */
+ data = zmalloc(config.datasize+1);
do {
- data = zmalloc(config.datasize+1);
memset(data,'x',config.datasize);
data[config.datasize] = '\0';
@@ -731,12 +738,24 @@ int main(int argc, const char **argv) {
free(cmd);
}
+ if (test_is_selected("rpush")) {
+ len = redisFormatCommand(&cmd,"RPUSH mylist %s",data);
+ benchmark("RPUSH",cmd,len);
+ free(cmd);
+ }
+
if (test_is_selected("lpop")) {
len = redisFormatCommand(&cmd,"LPOP mylist");
benchmark("LPOP",cmd,len);
free(cmd);
}
+ if (test_is_selected("rpop")) {
+ len = redisFormatCommand(&cmd,"RPOP mylist");
+ benchmark("RPOP",cmd,len);
+ free(cmd);
+ }
+
if (test_is_selected("sadd")) {
len = redisFormatCommand(&cmd,
"SADD myset element:__rand_int__");
diff --git a/src/redis-check-dump.c b/src/redis-check-rdb.c
index 546462001..21f72c222 100644
--- a/src/redis-check-dump.c
+++ b/src/redis-check-rdb.c
@@ -29,74 +29,19 @@
*/
+#include "redis.h"
+#include "rdb.h"
#include <stdlib.h>
#include <stdio.h>
#include <unistd.h>
#include <fcntl.h>
#include <sys/stat.h>
#include <sys/mman.h>
-#include <string.h>
-#include <arpa/inet.h>
-#include <stdint.h>
-#include <limits.h>
#include "lzf.h"
#include "crc64.h"
-/* Object types */
-#define REDIS_STRING 0
-#define REDIS_LIST 1
-#define REDIS_SET 2
-#define REDIS_ZSET 3
-#define REDIS_HASH 4
-#define REDIS_HASH_ZIPMAP 9
-#define REDIS_LIST_ZIPLIST 10
-#define REDIS_SET_INTSET 11
-#define REDIS_ZSET_ZIPLIST 12
-#define REDIS_HASH_ZIPLIST 13
-
-/* Objects encoding. Some kind of objects like Strings and Hashes can be
- * internally represented in multiple ways. The 'encoding' field of the object
- * is set to one of this fields for this object. */
-#define REDIS_ENCODING_RAW 0 /* Raw representation */
-#define REDIS_ENCODING_INT 1 /* Encoded as integer */
-#define REDIS_ENCODING_ZIPMAP 2 /* Encoded as zipmap */
-#define REDIS_ENCODING_HT 3 /* Encoded as a hash table */
-
-/* Object types only used for dumping to disk */
-#define REDIS_EXPIRETIME_MS 252
-#define REDIS_EXPIRETIME 253
-#define REDIS_SELECTDB 254
-#define REDIS_EOF 255
-
-/* Defines related to the dump file format. To store 32 bits lengths for short
- * keys requires a lot of space, so we check the most significant 2 bits of
- * the first byte to interpreter the length:
- *
- * 00|000000 => if the two MSB are 00 the len is the 6 bits of this byte
- * 01|000000 00000000 => 01, the len is 14 byes, 6 bits + 8 bits of next byte
- * 10|000000 [32 bit integer] => if it's 01, a full 32 bit len will follow
- * 11|000000 this means: specially encoded object will follow. The six bits
- * number specify the kind of object that follows.
- * See the REDIS_RDB_ENC_* defines.
- *
- * Lengths up to 63 are stored using a single byte, most DB keys, and may
- * values, will fit inside. */
-#define REDIS_RDB_6BITLEN 0
-#define REDIS_RDB_14BITLEN 1
-#define REDIS_RDB_32BITLEN 2
-#define REDIS_RDB_ENCVAL 3
-#define REDIS_RDB_LENERR UINT_MAX
-
-/* When a length of a string object stored on disk has the first two bits
- * set, the remaining two bits specify a special encoding for the object
- * accordingly to the following defines: */
-#define REDIS_RDB_ENC_INT8 0 /* 8 bit signed integer */
-#define REDIS_RDB_ENC_INT16 1 /* 16 bit signed integer */
-#define REDIS_RDB_ENC_INT32 2 /* 32 bit signed integer */
-#define REDIS_RDB_ENC_LZF 3 /* string compressed with FASTLZ */
-
#define ERROR(...) { \
- printf(__VA_ARGS__); \
+ redisLog(REDIS_WARNING, __VA_ARGS__); \
exit(1); \
}
@@ -133,28 +78,23 @@ typedef struct {
char success;
} entry;
-/* Global vars that are actually used as constants. The following double
- * values are used for double on-disk serialization, and are initialized
- * at runtime to avoid strange compiler optimizations. */
-static double R_Zero, R_PosInf, R_NegInf, R_Nan;
-
#define MAX_TYPES_NUM 256
#define MAX_TYPE_NAME_LEN 16
/* store string types for output */
static char types[MAX_TYPES_NUM][MAX_TYPE_NAME_LEN];
/* Return true if 't' is a valid object type. */
-int checkType(unsigned char t) {
+static int rdbCheckType(unsigned char t) {
/* In case a new object type is added, update the following
* condition as necessary. */
return
- (t >= REDIS_HASH_ZIPMAP && t <= REDIS_HASH_ZIPLIST) ||
- t <= REDIS_HASH ||
- t >= REDIS_EXPIRETIME_MS;
+ (t >= REDIS_RDB_TYPE_HASH_ZIPMAP && t <= REDIS_RDB_TYPE_HASH_ZIPLIST) ||
+ t <= REDIS_RDB_TYPE_HASH ||
+ t >= REDIS_RDB_OPCODE_EXPIRETIME_MS;
}
/* when number of bytes to read is negative, do a peek */
-int readBytes(void *target, long num) {
+static int readBytes(void *target, long num) {
char peek = (num < 0) ? 1 : 0;
num = (num < 0) ? -num : num;
@@ -173,28 +113,28 @@ int processHeader(void) {
int dump_version;
if (!readBytes(buf, 9)) {
- ERROR("Cannot read header\n");
+ ERROR("Cannot read header");
}
/* expect the first 5 bytes to equal REDIS */
if (memcmp(buf,"REDIS",5) != 0) {
- ERROR("Wrong signature in header\n");
+ ERROR("Wrong signature in header");
}
dump_version = (int)strtol(buf + 5, NULL, 10);
if (dump_version < 1 || dump_version > 6) {
- ERROR("Unknown RDB format version: %d\n", dump_version);
+ ERROR("Unknown RDB format version: %d", dump_version);
}
return dump_version;
}
-int loadType(entry *e) {
+static int loadType(entry *e) {
uint32_t offset = CURR_OFFSET;
/* this byte needs to qualify as type */
unsigned char t;
if (readBytes(&t, 1)) {
- if (checkType(t)) {
+ if (rdbCheckType(t)) {
e->type = t;
return 1;
} else {
@@ -208,18 +148,18 @@ int loadType(entry *e) {
return 0;
}
-int peekType() {
+static int peekType() {
unsigned char t;
- if (readBytes(&t, -1) && (checkType(t)))
+ if (readBytes(&t, -1) && (rdbCheckType(t)))
return t;
return -1;
}
/* discard time, just consume the bytes */
-int processTime(int type) {
+static int processTime(int type) {
uint32_t offset = CURR_OFFSET;
unsigned char t[8];
- int timelen = (type == REDIS_EXPIRETIME_MS) ? 8 : 4;
+ int timelen = (type == REDIS_RDB_OPCODE_EXPIRETIME_MS) ? 8 : 4;
if (readBytes(t,timelen)) {
return 1;
@@ -231,7 +171,7 @@ int processTime(int type) {
return 0;
}
-uint32_t loadLength(int *isencoded) {
+static uint32_t loadLength(int *isencoded) {
unsigned char buf[2];
uint32_t len;
int type;
@@ -257,7 +197,7 @@ uint32_t loadLength(int *isencoded) {
}
}
-char *loadIntegerObject(int enctype) {
+static char *loadIntegerObject(int enctype) {
uint32_t offset = CURR_OFFSET;
unsigned char enc[4];
long long val;
@@ -284,36 +224,36 @@ char *loadIntegerObject(int enctype) {
/* convert val into string */
char *buf;
- buf = malloc(sizeof(char) * 128);
+ buf = zmalloc(sizeof(char) * 128);
sprintf(buf, "%lld", val);
return buf;
}
-char* loadLzfStringObject() {
+static char* loadLzfStringObject() {
unsigned int slen, clen;
char *c, *s;
if ((clen = loadLength(NULL)) == REDIS_RDB_LENERR) return NULL;
if ((slen = loadLength(NULL)) == REDIS_RDB_LENERR) return NULL;
- c = malloc(clen);
+ c = zmalloc(clen);
if (!readBytes(c, clen)) {
- free(c);
+ zfree(c);
return NULL;
}
- s = malloc(slen+1);
+ s = zmalloc(slen+1);
if (lzf_decompress(c,clen,s,slen) == 0) {
- free(c); free(s);
+ zfree(c); zfree(s);
return NULL;
}
- free(c);
+ zfree(c);
return s;
}
/* returns NULL when not processable, char* when valid */
-char* loadStringObject() {
+static char* loadStringObject() {
uint32_t offset = CURR_OFFSET;
int isencoded;
uint32_t len;
@@ -336,48 +276,48 @@ char* loadStringObject() {
if (len == REDIS_RDB_LENERR) return NULL;
- char *buf = malloc(sizeof(char) * (len+1));
+ char *buf = zmalloc(sizeof(char) * (len+1));
if (buf == NULL) return NULL;
buf[len] = '\0';
if (!readBytes(buf, len)) {
- free(buf);
+ zfree(buf);
return NULL;
}
return buf;
}
-int processStringObject(char** store) {
+static int processStringObject(char** store) {
unsigned long offset = CURR_OFFSET;
char *key = loadStringObject();
if (key == NULL) {
SHIFT_ERROR(offset, "Error reading string object");
- free(key);
+ zfree(key);
return 0;
}
if (store != NULL) {
*store = key;
} else {
- free(key);
+ zfree(key);
}
return 1;
}
-double* loadDoubleValue() {
+static double* loadDoubleValue() {
char buf[256];
unsigned char len;
double* val;
if (!readBytes(&len,1)) return NULL;
- val = malloc(sizeof(double));
+ val = zmalloc(sizeof(double));
switch(len) {
case 255: *val = R_NegInf; return val;
case 254: *val = R_PosInf; return val;
case 253: *val = R_Nan; return val;
default:
if (!readBytes(buf, len)) {
- free(val);
+ zfree(val);
return NULL;
}
buf[len] = '\0';
@@ -386,24 +326,24 @@ double* loadDoubleValue() {
}
}
-int processDoubleValue(double** store) {
+static int processDoubleValue(double** store) {
unsigned long offset = CURR_OFFSET;
double *val = loadDoubleValue();
if (val == NULL) {
SHIFT_ERROR(offset, "Error reading double value");
- free(val);
+ zfree(val);
return 0;
}
if (store != NULL) {
*store = val;
} else {
- free(val);
+ zfree(val);
}
return 1;
}
-int loadPair(entry *e) {
+static int loadPair(entry *e) {
uint32_t offset = CURR_OFFSET;
uint32_t i;
@@ -417,10 +357,10 @@ int loadPair(entry *e) {
}
uint32_t length = 0;
- if (e->type == REDIS_LIST ||
- e->type == REDIS_SET ||
- e->type == REDIS_ZSET ||
- e->type == REDIS_HASH) {
+ if (e->type == REDIS_RDB_TYPE_LIST ||
+ e->type == REDIS_RDB_TYPE_SET ||
+ e->type == REDIS_RDB_TYPE_ZSET ||
+ e->type == REDIS_RDB_TYPE_HASH) {
if ((length = loadLength(NULL)) == REDIS_RDB_LENERR) {
SHIFT_ERROR(offset, "Error reading %s length", types[e->type]);
return 0;
@@ -428,19 +368,19 @@ int loadPair(entry *e) {
}
switch(e->type) {
- case REDIS_STRING:
- case REDIS_HASH_ZIPMAP:
- case REDIS_LIST_ZIPLIST:
- case REDIS_SET_INTSET:
- case REDIS_ZSET_ZIPLIST:
- case REDIS_HASH_ZIPLIST:
+ case REDIS_RDB_TYPE_STRING:
+ case REDIS_RDB_TYPE_HASH_ZIPMAP:
+ case REDIS_RDB_TYPE_LIST_ZIPLIST:
+ case REDIS_RDB_TYPE_SET_INTSET:
+ case REDIS_RDB_TYPE_ZSET_ZIPLIST:
+ case REDIS_RDB_TYPE_HASH_ZIPLIST:
if (!processStringObject(NULL)) {
SHIFT_ERROR(offset, "Error reading entry value");
return 0;
}
break;
- case REDIS_LIST:
- case REDIS_SET:
+ case REDIS_RDB_TYPE_LIST:
+ case REDIS_RDB_TYPE_SET:
for (i = 0; i < length; i++) {
offset = CURR_OFFSET;
if (!processStringObject(NULL)) {
@@ -449,7 +389,7 @@ int loadPair(entry *e) {
}
}
break;
- case REDIS_ZSET:
+ case REDIS_RDB_TYPE_ZSET:
for (i = 0; i < length; i++) {
offset = CURR_OFFSET;
if (!processStringObject(NULL)) {
@@ -463,7 +403,7 @@ int loadPair(entry *e) {
}
}
break;
- case REDIS_HASH:
+ case REDIS_RDB_TYPE_HASH:
for (i = 0; i < length; i++) {
offset = CURR_OFFSET;
if (!processStringObject(NULL)) {
@@ -486,7 +426,7 @@ int loadPair(entry *e) {
return 1;
}
-entry loadEntry() {
+static entry loadEntry() {
entry e = { NULL, -1, 0 };
uint32_t length, offset[4];
@@ -499,7 +439,7 @@ entry loadEntry() {
}
offset[1] = CURR_OFFSET;
- if (e.type == REDIS_SELECTDB) {
+ if (e.type == REDIS_RDB_OPCODE_SELECTDB) {
if ((length = loadLength(NULL)) == REDIS_RDB_LENERR) {
SHIFT_ERROR(offset[1], "Error reading database number");
return e;
@@ -508,7 +448,7 @@ entry loadEntry() {
SHIFT_ERROR(offset[1], "Database number out of range (%d)", length);
return e;
}
- } else if (e.type == REDIS_EOF) {
+ } else if (e.type == REDIS_RDB_OPCODE_EOF) {
if (positions[level].offset < positions[level].size) {
SHIFT_ERROR(offset[0], "Unexpected EOF");
} else {
@@ -517,8 +457,8 @@ entry loadEntry() {
return e;
} else {
/* optionally consume expire */
- if (e.type == REDIS_EXPIRETIME ||
- e.type == REDIS_EXPIRETIME_MS) {
+ if (e.type == REDIS_RDB_OPCODE_EXPIRETIME ||
+ e.type == REDIS_RDB_OPCODE_EXPIRETIME_MS) {
if (!processTime(e.type)) return e;
if (!loadType(&e)) return e;
}
@@ -544,31 +484,31 @@ entry loadEntry() {
return e;
}
-void printCentered(int indent, int width, char* body) {
+static void printCentered(int indent, int width, char* body) {
char head[256], tail[256];
memset(head, '\0', 256);
memset(tail, '\0', 256);
memset(head, '=', indent);
memset(tail, '=', width - 2 - indent - strlen(body));
- printf("%s %s %s\n", head, body, tail);
+ redisLog(REDIS_WARNING, "%s %s %s", head, body, tail);
}
-void printValid(uint64_t ops, uint64_t bytes) {
+static void printValid(uint64_t ops, uint64_t bytes) {
char body[80];
sprintf(body, "Processed %llu valid opcodes (in %llu bytes)",
(unsigned long long) ops, (unsigned long long) bytes);
printCentered(4, 80, body);
}
-void printSkipped(uint64_t bytes, uint64_t offset) {
+static void printSkipped(uint64_t bytes, uint64_t offset) {
char body[80];
sprintf(body, "Skipped %llu bytes (resuming at 0x%08llx)",
(unsigned long long) bytes, (unsigned long long) offset);
printCentered(4, 80, body);
}
-void printErrorStack(entry *e) {
+static void printErrorStack(entry *e) {
unsigned int i;
char body[64];
@@ -598,20 +538,20 @@ void printErrorStack(entry *e) {
/* display error stack */
for (i = 0; i < errors.level; i++) {
- printf("0x%08lx - %s\n",
+ redisLog(REDIS_WARNING, "0x%08lx - %s",
(unsigned long) errors.offset[i], errors.error[i]);
}
}
void process(void) {
uint64_t num_errors = 0, num_valid_ops = 0, num_valid_bytes = 0;
- entry entry;
+ entry entry = { NULL, -1, 0 };
int dump_version = processHeader();
/* Exclude the final checksum for RDB >= 5. Will be checked at the end. */
if (dump_version >= 5) {
if (positions[0].size < 8) {
- printf("RDB version >= 5 but no room for checksum.\n");
+ redisLog(REDIS_WARNING, "RDB version >= 5 but no room for checksum.");
exit(1);
}
positions[0].size -= 8;
@@ -660,7 +600,7 @@ void process(void) {
/* advance position */
positions[0] = positions[1];
}
- free(entry.key);
+ zfree(entry.key);
}
/* because there is another potential error,
@@ -668,7 +608,7 @@ void process(void) {
printValid(num_valid_ops, num_valid_bytes);
/* expect an eof */
- if (entry.type != REDIS_EOF) {
+ if (entry.type != REDIS_RDB_OPCODE_EOF) {
/* last byte should be EOF, add error */
errors.level = 0;
SHIFT_ERROR(positions[0].offset, "Expected EOF, got %s", types[entry.type]);
@@ -696,47 +636,40 @@ void process(void) {
if (crc != crc2) {
SHIFT_ERROR(positions[0].offset, "RDB CRC64 does not match.");
} else {
- printf("CRC64 checksum is OK\n");
+ redisLog(REDIS_WARNING, "CRC64 checksum is OK");
}
}
/* print summary on errors */
if (num_errors) {
- printf("\n");
- printf("Total unprocessable opcodes: %llu\n",
+ redisLog(REDIS_WARNING, "Total unprocessable opcodes: %llu",
(unsigned long long) num_errors);
}
}
-int main(int argc, char **argv) {
- /* expect the first argument to be the dump file */
- if (argc <= 1) {
- printf("Usage: %s <dump.rdb>\n", argv[0]);
- exit(0);
- }
-
+int redis_check_rdb(char *rdbfilename) {
int fd;
off_t size;
struct stat stat;
void *data;
- fd = open(argv[1], O_RDONLY);
+ fd = open(rdbfilename, O_RDONLY);
if (fd < 1) {
- ERROR("Cannot open file: %s\n", argv[1]);
+ ERROR("Cannot open file: %s", rdbfilename);
}
if (fstat(fd, &stat) == -1) {
- ERROR("Cannot stat: %s\n", argv[1]);
+ ERROR("Cannot stat: %s", rdbfilename);
} else {
size = stat.st_size;
}
if (sizeof(size_t) == sizeof(int32_t) && size >= INT_MAX) {
- ERROR("Cannot check dump files >2GB on a 32-bit platform\n");
+ ERROR("Cannot check dump files >2GB on a 32-bit platform");
}
data = mmap(NULL, size, PROT_READ, MAP_SHARED, fd, 0);
if (data == MAP_FAILED) {
- ERROR("Cannot mmap: %s\n", argv[1]);
+ ERROR("Cannot mmap: %s", rdbfilename);
}
/* Initialize static vars */
@@ -746,22 +679,16 @@ int main(int argc, char **argv) {
errors.level = 0;
/* Object types */
- sprintf(types[REDIS_STRING], "STRING");
- sprintf(types[REDIS_LIST], "LIST");
- sprintf(types[REDIS_SET], "SET");
- sprintf(types[REDIS_ZSET], "ZSET");
- sprintf(types[REDIS_HASH], "HASH");
+ sprintf(types[REDIS_RDB_TYPE_STRING], "STRING");
+ sprintf(types[REDIS_RDB_TYPE_LIST], "LIST");
+ sprintf(types[REDIS_RDB_TYPE_SET], "SET");
+ sprintf(types[REDIS_RDB_TYPE_ZSET], "ZSET");
+ sprintf(types[REDIS_RDB_TYPE_HASH], "HASH");
/* Object types only used for dumping to disk */
- sprintf(types[REDIS_EXPIRETIME], "EXPIRETIME");
- sprintf(types[REDIS_SELECTDB], "SELECTDB");
- sprintf(types[REDIS_EOF], "EOF");
-
- /* Double constants initialization */
- R_Zero = 0.0;
- R_PosInf = 1.0/R_Zero;
- R_NegInf = -1.0/R_Zero;
- R_Nan = R_Zero/R_Zero;
+ sprintf(types[REDIS_RDB_OPCODE_EXPIRETIME], "EXPIRETIME");
+ sprintf(types[REDIS_RDB_OPCODE_SELECTDB], "SELECTDB");
+ sprintf(types[REDIS_RDB_OPCODE_EOF], "EOF");
process();
@@ -769,3 +696,15 @@ int main(int argc, char **argv) {
close(fd);
return 0;
}
+
+/* RDB check main: called form redis.c when Redis is executed with the
+ * redis-check-rdb alias. */
+int redis_check_rdb_main(char **argv, int argc) {
+ if (argc != 2) {
+ fprintf(stderr, "Usage: %s <rdb-file-name>\n", argv[0]);
+ exit(1);
+ }
+ redisLog(REDIS_WARNING, "Checking RDB file %s", argv[1]);
+ exit(redis_check_rdb(argv[1]));
+ return 0;
+}
diff --git a/src/redis-cli.c b/src/redis-cli.c
index 34070ab80..e243db451 100644
--- a/src/redis-cli.c
+++ b/src/redis-cli.c
@@ -60,6 +60,8 @@
#define OUTPUT_CSV 2
#define REDIS_CLI_KEEPALIVE_INTERVAL 15 /* seconds */
#define REDIS_CLI_DEFAULT_PIPE_TIMEOUT 30 /* seconds */
+#define REDIS_CLI_HISTFILE_ENV "REDISCLI_HISTFILE"
+#define REDIS_CLI_HISTFILE_DEFAULT ".rediscli_history"
static redisContext *context;
static struct config {
@@ -128,9 +130,8 @@ static void cliRefreshPrompt(void) {
len = snprintf(config.prompt,sizeof(config.prompt),"redis %s",
config.hostsocket);
else
- len = snprintf(config.prompt,sizeof(config.prompt),
- strchr(config.hostip,':') ? "[%s]:%d" : "%s:%d",
- config.hostip, config.hostport);
+ len = anetFormatAddr(config.prompt, sizeof(config.prompt),
+ config.hostip, config.hostport);
/* Add [dbnum] if needed */
if (config.dbnum != 0 && config.last_cmd_type != REDIS_REPLY_ERROR)
len += snprintf(config.prompt+len,sizeof(config.prompt)-len,"[%d]",
@@ -138,6 +139,30 @@ static void cliRefreshPrompt(void) {
snprintf(config.prompt+len,sizeof(config.prompt)-len,"> ");
}
+static sds getHistoryPath() {
+ char *path = NULL;
+ sds historyPath = NULL;
+
+ /* check the env for a histfile override */
+ path = getenv(REDIS_CLI_HISTFILE_ENV);
+ if (path != NULL && *path != '\0') {
+ if (!strcmp("/dev/null", path)) {
+ return NULL;
+ }
+
+ /* if the env is set, return it */
+ historyPath = sdscatprintf(sdsempty(), "%s", path);
+ } else {
+ char *home = getenv("HOME");
+ if (home != NULL && *home != '\0') {
+ /* otherwise, return the default */
+ historyPath = sdscatprintf(sdsempty(), "%s/%s", home, REDIS_CLI_HISTFILE_DEFAULT);
+ }
+ }
+
+ return historyPath;
+}
+
/*------------------------------------------------------------------------------
* Help functions
*--------------------------------------------------------------------------- */
@@ -302,7 +327,7 @@ static void completionCallback(const char *buf, linenoiseCompletions *lc) {
*--------------------------------------------------------------------------- */
/* Send AUTH command to the server */
-static int cliAuth() {
+static int cliAuth(void) {
redisReply *reply;
if (config.auth == NULL) return REDIS_OK;
@@ -315,7 +340,7 @@ static int cliAuth() {
}
/* Send SELECT dbnum to the server */
-static int cliSelect() {
+static int cliSelect(void) {
redisReply *reply;
if (config.dbnum == 0) return REDIS_OK;
@@ -493,7 +518,7 @@ static sds cliFormatReplyCSV(redisReply *r) {
out = sdscatrepr(out,r->str,r->len);
break;
case REDIS_REPLY_NIL:
- out = sdscat(out,"NIL\n");
+ out = sdscat(out,"NIL");
break;
case REDIS_REPLY_ARRAY:
for (i = 0; i < r->elements; i++) {
@@ -604,6 +629,9 @@ static int cliSendCommand(int argc, char **argv, int repeat) {
output_raw = 0;
if (!strcasecmp(command,"info") ||
+ (argc == 3 && !strcasecmp(command,"debug") &&
+ (!strcasecmp(argv[1],"jemalloc") &&
+ !strcasecmp(argv[2],"info"))) ||
(argc == 2 && !strcasecmp(command,"cluster") &&
(!strcasecmp(argv[1],"nodes") ||
!strcasecmp(argv[1],"info"))) ||
@@ -830,6 +858,7 @@ static void usage(void) {
" not a tty).\n"
" --no-raw Force formatted output even when STDOUT is not a tty.\n"
" --csv Output in CSV format.\n"
+" --stat Print rolling stats about server: mem, clients, ...\n"
" --latency Enter a special mode continuously sampling latency.\n"
" --latency-history Like --latency but tracking latency changes over time.\n"
" Default time interval is 15 sec. Change it using -i.\n"
@@ -877,6 +906,33 @@ static char **convertToSds(int count, char** args) {
return sds;
}
+static int issueCommandRepeat(int argc, char **argv, long repeat) {
+ while (1) {
+ config.cluster_reissue_command = 0;
+ if (cliSendCommand(argc,argv,repeat) != REDIS_OK) {
+ cliConnect(1);
+
+ /* If we still cannot send the command print error.
+ * We'll try to reconnect the next time. */
+ if (cliSendCommand(argc,argv,repeat) != REDIS_OK) {
+ cliPrintContextError();
+ return REDIS_ERR;
+ }
+ }
+ /* Issue the command again if we got redirected in cluster mode */
+ if (config.cluster_mode && config.cluster_reissue_command) {
+ cliConnect(1);
+ } else {
+ break;
+ }
+ }
+ return REDIS_OK;
+}
+
+static int issueCommand(int argc, char **argv) {
+ return issueCommandRepeat(argc, argv, config.repeat);
+}
+
static void repl(void) {
sds historyfile = NULL;
int history = 0;
@@ -890,10 +946,9 @@ static void repl(void) {
/* Only use history when stdin is a tty. */
if (isatty(fileno(stdin))) {
- history = 1;
-
- if (getenv("HOME") != NULL) {
- historyfile = sdscatprintf(sdsempty(),"%s/.rediscli_history",getenv("HOME"));
+ historyfile = getHistoryPath();
+ if (historyfile != NULL) {
+ history = 1;
linenoiseHistoryLoad(historyfile);
}
}
@@ -933,26 +988,8 @@ static void repl(void) {
repeat = 1;
}
- while (1) {
- config.cluster_reissue_command = 0;
- if (cliSendCommand(argc-skipargs,argv+skipargs,repeat)
- != REDIS_OK)
- {
- cliConnect(1);
-
- /* If we still cannot send the command print error.
- * We'll try to reconnect the next time. */
- if (cliSendCommand(argc-skipargs,argv+skipargs,repeat)
- != REDIS_OK)
- cliPrintContextError();
- }
- /* Issue the command again if we got redirected in cluster mode */
- if (config.cluster_mode && config.cluster_reissue_command) {
- cliConnect(1);
- } else {
- break;
- }
- }
+ issueCommandRepeat(argc-skipargs, argv+skipargs, repeat);
+
elapsed = mstime()-start_time;
if (elapsed >= 500) {
printf("(%.2fs)\n",(double)elapsed/1000);
@@ -973,10 +1010,9 @@ static int noninteractive(int argc, char **argv) {
if (config.stdinarg) {
argv = zrealloc(argv, (argc+1)*sizeof(char*));
argv[argc] = readArgFromStdin();
- retval = cliSendCommand(argc+1, argv, config.repeat);
+ retval = issueCommand(argc+1, argv);
} else {
- /* stdin is probably a tty, can be tested with S_ISCHR(s.st_mode) */
- retval = cliSendCommand(argc, argv, config.repeat);
+ retval = issueCommand(argc, argv);
}
return retval;
}
@@ -1020,7 +1056,7 @@ static int evalMode(int argc, char **argv) {
argv2[2] = sdscatprintf(sdsempty(),"%d",keys);
/* Call it */
- return cliSendCommand(argc+3-got_comma, argv2, config.repeat);
+ return issueCommand(argc+3-got_comma, argv2);
}
/*------------------------------------------------------------------------------
@@ -1915,8 +1951,6 @@ int main(int argc, char **argv) {
argc -= firstarg;
argv += firstarg;
- signal(SIGPIPE, SIG_IGN);
-
/* Latency mode */
if (config.latency_mode) {
if (cliConnect(0) == REDIS_ERR) exit(1);
@@ -1965,6 +1999,9 @@ int main(int argc, char **argv) {
/* Start interactive mode when no command is provided */
if (argc == 0 && !config.eval) {
+ /* Ignore SIGPIPE in interactive mode to force a reconnect */
+ signal(SIGPIPE, SIG_IGN);
+
/* Note that in repl mode we don't abort on connection error.
* A new attempt will be performed for every command send. */
cliConnect(0);
diff --git a/src/redis-trib.rb b/src/redis-trib.rb
index 466a81137..6002e4caa 100755
--- a/src/redis-trib.rb
+++ b/src/redis-trib.rb
@@ -72,7 +72,7 @@ class ClusterNode
@friends
end
- def slots
+ def slots
@info[:slots]
end
@@ -154,7 +154,7 @@ class ClusterNode
end
} if slots
@dirty = false
- @r.cluster("info").split("\n").each{|e|
+ @r.cluster("info").split("\n").each{|e|
k,v=e.split(":")
k = k.to_sym
v.chop!
@@ -213,7 +213,7 @@ class ClusterNode
#
# Note: this could be easily written without side effects,
# we use 'slots' just to split the computation into steps.
-
+
# First step: we want an increasing array of integers
# for instance: [1,2,3,4,5,8,9,20,21,22,23,24,25,30]
slots = @info[:slots].keys.sort
@@ -273,7 +273,7 @@ class ClusterNode
def info
@info
end
-
+
def is_dirty?
@dirty
end
@@ -540,7 +540,6 @@ class RedisTrib
nodes_count = @nodes.length
masters_count = @nodes.length / (@replicas+1)
masters = []
- slaves = []
# The first step is to split instances by IP. This is useful as
# we'll try to allocate master nodes in different physical machines
@@ -558,16 +557,31 @@ class RedisTrib
# Select master instances
puts "Using #{masters_count} masters:"
- while masters.length < masters_count
- ips.each{|ip,nodes_list|
- next if nodes_list.length == 0
- masters << nodes_list.shift
- puts masters[-1]
- nodes_count -= 1
- break if masters.length == masters_count
- }
+ interleaved = []
+ stop = false
+ while not stop do
+ # Take one node from each IP until we run out of nodes
+ # across every IP.
+ ips.each do |ip,nodes|
+ if nodes.empty?
+ # if this IP has no remaining nodes, check for termination
+ if interleaved.length == nodes_count
+ # stop when 'interleaved' has accumulated all nodes
+ stop = true
+ next
+ end
+ else
+ # else, move one node from this IP to 'interleaved'
+ interleaved.push nodes.shift
+ end
+ end
end
+ masters = interleaved.slice!(0, masters_count)
+ nodes_count -= masters.length
+
+ masters.each{|m| puts m}
+
# Alloc slots on masters
slots_per_node = ClusterHashSlots.to_f / masters_count
first = 0
@@ -594,8 +608,8 @@ class RedisTrib
# all nodes will be used.
assignment_verbose = false
- [:requested,:unused].each{|assign|
- masters.each{|m|
+ [:requested,:unused].each do |assign|
+ masters.each do |m|
assigned_replicas = 0
while assigned_replicas < @replicas
break if nodes_count == 0
@@ -609,21 +623,33 @@ class RedisTrib
"role too (#{nodes_count} remaining)."
end
end
- ips.each{|ip,nodes_list|
- next if nodes_list.length == 0
- # Skip instances with the same IP as the master if we
- # have some more IPs available.
- next if ip == m.info[:host] && nodes_count > nodes_list.length
- slave = nodes_list.shift
- slave.set_as_replica(m.info[:name])
- nodes_count -= 1
- assigned_replicas += 1
- puts "Adding replica #{slave} to #{m}"
- break
- }
+
+ # Return the first node not matching our current master
+ node = interleaved.find{|n| n.info[:host] != m.info[:host]}
+
+ # If we found a node, use it as a best-first match.
+ # Otherwise, we didn't find a node on a different IP, so we
+ # go ahead and use a same-IP replica.
+ if node
+ slave = node
+ interleaved.delete node
+ else
+ slave = interleaved.shift
+ end
+ slave.set_as_replica(m.info[:name])
+ nodes_count -= 1
+ assigned_replicas += 1
+ puts "Adding replica #{slave} to #{m}"
+
+ # If we are in the "assign extra nodes" loop,
+ # we want to assign one extra replica to each
+ # master before repeating masters.
+ # This break lets us assign extra replicas to masters
+ # in a round-robin way.
+ break if assign == :unused
end
- }
- }
+ end
+ end
end
def flush_nodes_config
@@ -763,7 +789,7 @@ class RedisTrib
# Move slots between source and target nodes using MIGRATE.
#
- # Options:
+ # Options:
# :verbose -- Print a dot for every moved key.
# :fix -- We are moving in the context of a fix. Use REPLACE.
# :cold -- Move keys without opening / reconfiguring the nodes.
@@ -1139,7 +1165,7 @@ class RedisTrib
# right node as needed.
cursor = nil
while cursor != 0
- cursor,keys = source.scan(cursor,:count,1000)
+ cursor,keys = source.scan(cursor, :count => 1000)
cursor = cursor.to_i
keys.each{|k|
# Migrate keys using the MIGRATE command.
@@ -1206,7 +1232,7 @@ end
#################################################################################
# Libraries
-#
+#
# We try to don't depend on external libs since this is a critical part
# of Redis Cluster.
#################################################################################
diff --git a/src/redis.c b/src/redis.c
index 83e0946ef..b2f9ffc68 100644
--- a/src/redis.c
+++ b/src/redis.c
@@ -46,12 +46,14 @@
#include <sys/time.h>
#include <sys/resource.h>
#include <sys/uio.h>
+#include <sys/un.h>
#include <limits.h>
#include <float.h>
#include <math.h>
#include <sys/resource.h>
#include <sys/utsname.h>
#include <locale.h>
+#include <sys/sysctl.h>
/* Our shared "common" objects */
@@ -160,7 +162,7 @@ struct redisCommand redisCommandTable[] = {
{"smove",smoveCommand,4,"wF",0,NULL,1,2,1,0,0},
{"sismember",sismemberCommand,3,"rF",0,NULL,1,1,1,0,0},
{"scard",scardCommand,2,"rF",0,NULL,1,1,1,0,0},
- {"spop",spopCommand,2,"wRsF",0,NULL,1,1,1,0,0},
+ {"spop",spopCommand,-2,"wRsF",0,NULL,1,1,1,0,0},
{"srandmember",srandmemberCommand,-2,"rR",0,NULL,1,1,1,0,0},
{"sinter",sinterCommand,-2,"rS",0,NULL,1,-1,1,0,0},
{"sinterstore",sinterstoreCommand,-3,"wm",0,NULL,1,-1,1,0,0},
@@ -247,7 +249,7 @@ struct redisCommand redisCommandTable[] = {
{"pttl",pttlCommand,2,"rF",0,NULL,1,1,1,0,0},
{"persist",persistCommand,2,"wF",0,NULL,1,1,1,0,0},
{"slaveof",slaveofCommand,3,"ast",0,NULL,0,0,0,0,0},
- {"role",roleCommand,1,"last",0,NULL,0,0,0,0,0},
+ {"role",roleCommand,1,"lst",0,NULL,0,0,0,0,0},
{"debug",debugCommand,-2,"as",0,NULL,0,0,0,0,0},
{"config",configCommand,-2,"art",0,NULL,0,0,0,0,0},
{"subscribe",subscribeCommand,-2,"rpslt",0,NULL,0,0,0,0,0},
@@ -259,19 +261,19 @@ struct redisCommand redisCommandTable[] = {
{"watch",watchCommand,-2,"rsF",0,NULL,1,-1,1,0,0},
{"unwatch",unwatchCommand,1,"rsF",0,NULL,0,0,0,0,0},
{"cluster",clusterCommand,-2,"ar",0,NULL,0,0,0,0,0},
- {"restore",restoreCommand,-4,"awm",0,NULL,1,1,1,0,0},
- {"restore-asking",restoreCommand,-4,"awmk",0,NULL,1,1,1,0,0},
- {"migrate",migrateCommand,-6,"aw",0,NULL,0,0,0,0,0},
+ {"restore",restoreCommand,-4,"wm",0,NULL,1,1,1,0,0},
+ {"restore-asking",restoreCommand,-4,"wmk",0,NULL,1,1,1,0,0},
+ {"migrate",migrateCommand,-6,"w",0,NULL,0,0,0,0,0},
{"asking",askingCommand,1,"r",0,NULL,0,0,0,0,0},
{"readonly",readonlyCommand,1,"rF",0,NULL,0,0,0,0,0},
{"readwrite",readwriteCommand,1,"rF",0,NULL,0,0,0,0,0},
- {"dump",dumpCommand,2,"ar",0,NULL,1,1,1,0,0},
+ {"dump",dumpCommand,2,"r",0,NULL,1,1,1,0,0},
{"object",objectCommand,3,"r",0,NULL,2,2,2,0,0},
- {"client",clientCommand,-2,"ars",0,NULL,0,0,0,0,0},
+ {"client",clientCommand,-2,"rs",0,NULL,0,0,0,0,0},
{"eval",evalCommand,-3,"s",0,evalGetKeys,0,0,0,0,0},
{"evalsha",evalShaCommand,-3,"s",0,evalGetKeys,0,0,0,0,0},
{"slowlog",slowlogCommand,-2,"r",0,NULL,0,0,0,0,0},
- {"script",scriptCommand,-2,"ras",0,NULL,0,0,0,0,0},
+ {"script",scriptCommand,-2,"rs",0,NULL,0,0,0,0,0},
{"time",timeCommand,1,"rRF",0,NULL,0,0,0,0,0},
{"bitop",bitopCommand,-4,"wm",0,NULL,2,-1,1,0,0},
{"bitcount",bitcountCommand,-2,"r",0,NULL,1,1,1,0,0},
@@ -280,7 +282,7 @@ struct redisCommand redisCommandTable[] = {
{"command",commandCommand,0,"rlt",0,NULL,0,0,0,0,0},
{"pfselftest",pfselftestCommand,1,"r",0,NULL,0,0,0,0,0},
{"pfadd",pfaddCommand,-2,"wmF",0,NULL,1,1,1,0,0},
- {"pfcount",pfcountCommand,-2,"w",0,NULL,1,1,1,0,0},
+ {"pfcount",pfcountCommand,-2,"r",0,NULL,1,1,1,0,0},
{"pfmerge",pfmergeCommand,-2,"wm",0,NULL,1,-1,1,0,0},
{"pfdebug",pfdebugCommand,-3,"w",0,NULL,0,0,0,0,0},
{"latency",latencyCommand,-2,"arslt",0,NULL,0,0,0,0,0}
@@ -876,27 +878,30 @@ unsigned int getLRUClock(void) {
}
/* Add a sample to the operations per second array of samples. */
-void trackOperationsPerSecond(void) {
- long long t = mstime() - server.ops_sec_last_sample_time;
- long long ops = server.stat_numcommands - server.ops_sec_last_sample_ops;
+void trackInstantaneousMetric(int metric, long long current_reading) {
+ long long t = mstime() - server.inst_metric[metric].last_sample_time;
+ long long ops = current_reading -
+ server.inst_metric[metric].last_sample_count;
long long ops_sec;
ops_sec = t > 0 ? (ops*1000/t) : 0;
- server.ops_sec_samples[server.ops_sec_idx] = ops_sec;
- server.ops_sec_idx = (server.ops_sec_idx+1) % REDIS_OPS_SEC_SAMPLES;
- server.ops_sec_last_sample_time = mstime();
- server.ops_sec_last_sample_ops = server.stat_numcommands;
+ server.inst_metric[metric].samples[server.inst_metric[metric].idx] =
+ ops_sec;
+ server.inst_metric[metric].idx++;
+ server.inst_metric[metric].idx %= REDIS_METRIC_SAMPLES;
+ server.inst_metric[metric].last_sample_time = mstime();
+ server.inst_metric[metric].last_sample_count = current_reading;
}
/* Return the mean of all the samples. */
-long long getOperationsPerSecond(void) {
+long long getInstantaneousMetric(int metric) {
int j;
long long sum = 0;
- for (j = 0; j < REDIS_OPS_SEC_SAMPLES; j++)
- sum += server.ops_sec_samples[j];
- return sum / REDIS_OPS_SEC_SAMPLES;
+ for (j = 0; j < REDIS_METRIC_SAMPLES; j++)
+ sum += server.inst_metric[metric].samples[j];
+ return sum / REDIS_METRIC_SAMPLES;
}
/* Check for timeouts. Returns non-zero if the client was terminated */
@@ -1068,7 +1073,13 @@ int serverCron(struct aeEventLoop *eventLoop, long long id, void *clientData) {
/* Update the time cache. */
updateCachedTime();
- run_with_period(100) trackOperationsPerSecond();
+ run_with_period(100) {
+ trackInstantaneousMetric(REDIS_METRIC_COMMAND,server.stat_numcommands);
+ trackInstantaneousMetric(REDIS_METRIC_NET_INPUT,
+ server.stat_net_input_bytes);
+ trackInstantaneousMetric(REDIS_METRIC_NET_OUTPUT,
+ server.stat_net_output_bytes);
+ }
/* We have just REDIS_LRU_BITS bits per object for LRU information.
* So we use an (eventually wrapping) LRU clock.
@@ -1404,6 +1415,8 @@ void initServerConfig(void) {
server.syslog_ident = zstrdup(REDIS_DEFAULT_SYSLOG_IDENT);
server.syslog_facility = LOG_LOCAL0;
server.daemonize = REDIS_DEFAULT_DAEMONIZE;
+ server.supervised = 0;
+ server.supervised_mode = REDIS_SUPERVISED_NONE;
server.aof_state = REDIS_AOF_OFF;
server.aof_fsync = REDIS_DEFAULT_AOF_FSYNC;
server.aof_no_fsync_on_rewrite = REDIS_DEFAULT_AOF_NO_FSYNC_ON_REWRITE;
@@ -1421,7 +1434,7 @@ void initServerConfig(void) {
server.aof_flush_postponed_start = 0;
server.aof_rewrite_incremental_fsync = REDIS_DEFAULT_AOF_REWRITE_INCREMENTAL_FSYNC;
server.aof_load_truncated = REDIS_DEFAULT_AOF_LOAD_TRUNCATED;
- server.pidfile = zstrdup(REDIS_DEFAULT_PID_FILE);
+ server.pidfile = NULL;
server.rdb_filename = zstrdup(REDIS_DEFAULT_RDB_FILENAME);
server.aof_filename = zstrdup(REDIS_DEFAULT_AOF_FILENAME);
server.requirepass = NULL;
@@ -1437,8 +1450,8 @@ void initServerConfig(void) {
server.maxmemory_samples = REDIS_DEFAULT_MAXMEMORY_SAMPLES;
server.hash_max_ziplist_entries = REDIS_HASH_MAX_ZIPLIST_ENTRIES;
server.hash_max_ziplist_value = REDIS_HASH_MAX_ZIPLIST_VALUE;
- server.list_max_ziplist_entries = REDIS_LIST_MAX_ZIPLIST_ENTRIES;
- server.list_max_ziplist_value = REDIS_LIST_MAX_ZIPLIST_VALUE;
+ server.list_max_ziplist_size = REDIS_LIST_MAX_ZIPLIST_SIZE;
+ server.list_compress_depth = REDIS_LIST_COMPRESS_DEPTH;
server.set_max_intset_entries = REDIS_SET_MAX_INTSET_ENTRIES;
server.zset_max_ziplist_entries = REDIS_ZSET_MAX_ZIPLIST_ENTRIES;
server.zset_max_ziplist_value = REDIS_ZSET_MAX_ZIPLIST_VALUE;
@@ -1554,33 +1567,33 @@ void adjustOpenFilesLimit(void) {
/* Set the max number of files if the current limit is not enough
* for our needs. */
if (oldlimit < maxfiles) {
- rlim_t f;
+ rlim_t bestlimit;
int setrlimit_error = 0;
/* Try to set the file limit to match 'maxfiles' or at least
* to the higher value supported less than maxfiles. */
- f = maxfiles;
- while(f > oldlimit) {
+ bestlimit = maxfiles;
+ while(bestlimit > oldlimit) {
rlim_t decr_step = 16;
- limit.rlim_cur = f;
- limit.rlim_max = f;
+ limit.rlim_cur = bestlimit;
+ limit.rlim_max = bestlimit;
if (setrlimit(RLIMIT_NOFILE,&limit) != -1) break;
setrlimit_error = errno;
- /* We failed to set file limit to 'f'. Try with a
+ /* We failed to set file limit to 'bestlimit'. Try with a
* smaller limit decrementing by a few FDs per iteration. */
- if (f < decr_step) break;
- f -= decr_step;
+ if (bestlimit < decr_step) break;
+ bestlimit -= decr_step;
}
/* Assume that the limit we get initially is still valid if
* our last try was even lower. */
- if (f < oldlimit) f = oldlimit;
+ if (bestlimit < oldlimit) bestlimit = oldlimit;
- if (f != maxfiles) {
+ if (bestlimit < maxfiles) {
int old_maxclients = server.maxclients;
- server.maxclients = f-REDIS_MIN_RESERVED_FDS;
+ server.maxclients = bestlimit-REDIS_MIN_RESERVED_FDS;
if (server.maxclients < 1) {
redisLog(REDIS_WARNING,"Your current 'ulimit -n' "
"of %llu is not enough for Redis to start. "
@@ -1601,7 +1614,7 @@ void adjustOpenFilesLimit(void) {
"maxclients has been reduced to %d to compensate for "
"low ulimit. "
"If you need higher maxclients increase 'ulimit -n'.",
- (unsigned long long) oldlimit, server.maxclients);
+ (unsigned long long) bestlimit, server.maxclients);
} else {
redisLog(REDIS_NOTICE,"Increased maximum number of open files "
"to %llu (it was originally set to %llu).",
@@ -1612,6 +1625,23 @@ void adjustOpenFilesLimit(void) {
}
}
+/* Check that server.tcp_backlog can be actually enforced in Linux according
+ * to the value of /proc/sys/net/core/somaxconn, or warn about it. */
+void checkTcpBacklogSettings(void) {
+#ifdef HAVE_PROC_SOMAXCONN
+ FILE *fp = fopen("/proc/sys/net/core/somaxconn","r");
+ char buf[1024];
+ if (!fp) return;
+ if (fgets(buf,sizeof(buf),fp) != NULL) {
+ int somaxconn = atoi(buf);
+ if (somaxconn > 0 && somaxconn < server.tcp_backlog) {
+ redisLog(REDIS_WARNING,"WARNING: The TCP backlog setting of %d cannot be enforced because /proc/sys/net/core/somaxconn is set to the lower value of %d.", server.tcp_backlog, somaxconn);
+ }
+ }
+ fclose(fp);
+#endif
+}
+
/* Initialize a set of file descriptors to listen to the specified 'port'
* binding the addresses specified in the Redis server configuration.
*
@@ -1682,6 +1712,8 @@ int listenToPort(int port, int *fds, int *count) {
* to reset via CONFIG RESETSTAT. The function is also used in order to
* initialize these fields in initServer() at server startup. */
void resetServerStats(void) {
+ int j;
+
server.stat_numcommands = 0;
server.stat_numconnections = 0;
server.stat_expiredkeys = 0;
@@ -1694,10 +1726,15 @@ void resetServerStats(void) {
server.stat_sync_full = 0;
server.stat_sync_partial_ok = 0;
server.stat_sync_partial_err = 0;
- memset(server.ops_sec_samples,0,sizeof(server.ops_sec_samples));
- server.ops_sec_idx = 0;
- server.ops_sec_last_sample_time = mstime();
- server.ops_sec_last_sample_ops = 0;
+ for (j = 0; j < REDIS_METRIC_COUNT; j++) {
+ server.inst_metric[j].idx = 0;
+ server.inst_metric[j].last_sample_time = mstime();
+ server.inst_metric[j].last_sample_count = 0;
+ memset(server.inst_metric[j].samples,0,
+ sizeof(server.inst_metric[j].samples));
+ }
+ server.stat_net_input_bytes = 0;
+ server.stat_net_output_bytes = 0;
}
void initServer(void) {
@@ -1724,6 +1761,7 @@ void initServer(void) {
server.clients_waiting_acks = listCreate();
server.get_ack_from_slaves = 0;
server.clients_paused = 0;
+ server.system_memory_size = zmalloc_get_memory_size();
createSharedObjects();
adjustOpenFilesLimit();
@@ -1998,7 +2036,7 @@ void call(redisClient *c, int flags) {
* not generated from reading an AOF. */
if (listLength(server.monitors) &&
!server.loading &&
- !(c->cmd->flags & REDIS_CMD_SKIP_MONITOR))
+ !(c->cmd->flags & (REDIS_CMD_SKIP_MONITOR|REDIS_CMD_ADMIN)))
{
replicationFeedMonitors(c,server.monitors,c->db->id,c->argv,c->argc);
}
@@ -2337,7 +2375,7 @@ int prepareForShutdown(int flags) {
return REDIS_ERR;
}
}
- if (server.daemonize) {
+ if (server.daemonize || server.pidfile) {
redisLog(REDIS_NOTICE,"Removing the pid file.");
unlink(server.pidfile);
}
@@ -2444,7 +2482,6 @@ void timeCommand(redisClient *c) {
addReplyBulkLongLong(c,tv.tv_usec);
}
-
/* Helper function for addReplyCommand() to output flags. */
int addReplyCommandFlag(redisClient *c, struct redisCommand *cmd, int f, char *reply) {
if (cmd->flags & f) {
@@ -2663,7 +2700,10 @@ sds genRedisInfoString(char *section) {
if (allsections || defsections || !strcasecmp(section,"memory")) {
char hmem[64];
char peak_hmem[64];
+ char total_system_hmem[64];
size_t zmalloc_used = zmalloc_used_memory();
+ size_t total_system_mem = server.system_memory_size;
+ char *evict_policy = maxmemoryToString();
/* Peak memory is updated from time to time by serverCron() so it
* may happen that the instantaneous value is slightly bigger than
@@ -2674,6 +2714,8 @@ sds genRedisInfoString(char *section) {
bytesToHuman(hmem,zmalloc_used);
bytesToHuman(peak_hmem,server.stat_peak_memory);
+ bytesToHuman(total_system_hmem,total_system_mem);
+
if (sections++) info = sdscat(info,"\r\n");
info = sdscatprintf(info,
"# Memory\r\n"
@@ -2682,17 +2724,23 @@ sds genRedisInfoString(char *section) {
"used_memory_rss:%zu\r\n"
"used_memory_peak:%zu\r\n"
"used_memory_peak_human:%s\r\n"
+ "total_system_memory:%lu\r\n"
+ "total_system_memory_human:%s\r\n"
"used_memory_lua:%lld\r\n"
"mem_fragmentation_ratio:%.2f\r\n"
- "mem_allocator:%s\r\n",
+ "mem_allocator:%s\r\n"
+ "maxmemory_policy:%s\r\n",
zmalloc_used,
hmem,
server.resident_set_size,
server.stat_peak_memory,
peak_hmem,
+ (unsigned long)total_system_mem,
+ total_system_hmem,
((long long)lua_gc(server.lua,LUA_GCCOUNT,0))*1024LL,
zmalloc_get_fragmentation_ratio(server.resident_set_size),
- ZMALLOC_LIB
+ ZMALLOC_LIB,
+ evict_policy
);
}
@@ -2757,14 +2805,14 @@ sds genRedisInfoString(char *section) {
server.loading_loaded_bytes;
perc = ((double)server.loading_loaded_bytes /
- server.loading_total_bytes) * 100;
+ (server.loading_total_bytes+1)) * 100;
- elapsed = server.unixtime-server.loading_start_time;
+ elapsed = time(NULL)-server.loading_start_time;
if (elapsed == 0) {
eta = 1; /* A fake 1 second figure if we don't have
enough info */
} else {
- eta = (elapsed*remaining_bytes)/server.loading_loaded_bytes;
+ eta = (elapsed*remaining_bytes)/(server.loading_loaded_bytes+1);
}
info = sdscatprintf(info,
@@ -2790,6 +2838,10 @@ sds genRedisInfoString(char *section) {
"total_connections_received:%lld\r\n"
"total_commands_processed:%lld\r\n"
"instantaneous_ops_per_sec:%lld\r\n"
+ "total_net_input_bytes:%lld\r\n"
+ "total_net_output_bytes:%lld\r\n"
+ "instantaneous_input_kbps:%.2f\r\n"
+ "instantaneous_output_kbps:%.2f\r\n"
"rejected_connections:%lld\r\n"
"sync_full:%lld\r\n"
"sync_partial_ok:%lld\r\n"
@@ -2804,7 +2856,11 @@ sds genRedisInfoString(char *section) {
"migrate_cached_sockets:%ld\r\n",
server.stat_numconnections,
server.stat_numcommands,
- getOperationsPerSecond(),
+ getInstantaneousMetric(REDIS_METRIC_COMMAND),
+ server.stat_net_input_bytes,
+ server.stat_net_output_bytes,
+ (float)getInstantaneousMetric(REDIS_METRIC_NET_INPUT)/1024,
+ (float)getInstantaneousMetric(REDIS_METRIC_NET_OUTPUT)/1024,
server.stat_rejected_conn,
server.stat_sync_full,
server.stat_sync_partial_ok,
@@ -3003,11 +3059,7 @@ void infoCommand(redisClient *c) {
addReply(c,shared.syntaxerr);
return;
}
- sds info = genRedisInfoString(section);
- addReplySds(c,sdscatprintf(sdsempty(),"$%lu\r\n",
- (unsigned long)sdslen(info)));
- addReplySds(c,info);
- addReply(c,shared.crlf);
+ addReplyBulkSds(c, genRedisInfoString(section));
}
void monitorCommand(redisClient *c) {
@@ -3342,6 +3394,10 @@ void linuxMemoryWarnings(void) {
#endif /* __linux__ */
void createPidFile(void) {
+ /* If pidfile requested, but no pidfile defined, use
+ * default pidfile path */
+ if (!server.pidfile) server.pidfile = zstrdup(REDIS_DEFAULT_PID_FILE);
+
/* Try to write the pid file in a best-effort way. */
FILE *fp = fopen(server.pidfile,"w");
if (fp) {
@@ -3404,15 +3460,27 @@ void redisAsciiArt(void) {
else if (server.sentinel_mode) mode = "sentinel";
else mode = "standalone";
- snprintf(buf,1024*16,ascii_logo,
- REDIS_VERSION,
- redisGitSHA1(),
- strtol(redisGitDirty(),NULL,10) > 0,
- (sizeof(long) == 8) ? "64" : "32",
- mode, server.port,
- (long) getpid()
- );
- redisLogRaw(REDIS_NOTICE|REDIS_LOG_RAW,buf);
+ if (server.syslog_enabled) {
+ redisLog(REDIS_NOTICE,
+ "Redis %s (%s/%d) %s bit, %s mode, port %d, pid %ld ready to start.",
+ REDIS_VERSION,
+ redisGitSHA1(),
+ strtol(redisGitDirty(),NULL,10) > 0,
+ (sizeof(long) == 8) ? "64" : "32",
+ mode, server.port,
+ (long) getpid()
+ );
+ } else {
+ snprintf(buf,1024*16,ascii_logo,
+ REDIS_VERSION,
+ redisGitSHA1(),
+ strtol(redisGitDirty(),NULL,10) > 0,
+ (sizeof(long) == 8) ? "64" : "32",
+ mode, server.port,
+ (long) getpid()
+ );
+ redisLogRaw(REDIS_NOTICE|REDIS_LOG_RAW,buf);
+ }
zfree(buf);
}
@@ -3521,9 +3589,131 @@ void redisSetProcTitle(char *title) {
#endif
}
+/*
+ * Check whether systemd or upstart have been used to start redis.
+ */
+
+int redisSupervisedUpstart(void) {
+ const char *upstart_job = getenv("UPSTART_JOB");
+
+ if (!upstart_job) {
+ redisLog(REDIS_WARNING,
+ "upstart supervision requested, but UPSTART_JOB not found");
+ return 0;
+ }
+
+ redisLog(REDIS_NOTICE, "supervised by upstart, will stop to signal readyness");
+ raise(SIGSTOP);
+ unsetenv("UPSTART_JOB");
+ return 1;
+}
+
+int redisSupervisedSystemd(void) {
+ const char *notify_socket = getenv("NOTIFY_SOCKET");
+ int fd = 1;
+ struct sockaddr_un su;
+ struct iovec iov;
+ struct msghdr hdr;
+ int sendto_flags = 0;
+
+ if (!notify_socket) {
+ redisLog(REDIS_WARNING,
+ "systemd supervision requested, but NOTIFY_SOCKET not found");
+ return 0;
+ }
+
+ if ((strchr("@/", notify_socket[0])) == NULL || strlen(notify_socket) < 2) {
+ return 0;
+ }
+
+ redisLog(REDIS_NOTICE, "supervised by systemd, will signal readyness");
+ if ((fd = socket(AF_UNIX, SOCK_DGRAM, 0)) == -1) {
+ redisLog(REDIS_WARNING,
+ "Can't connect to systemd socket %s", notify_socket);
+ return 0;
+ }
+
+ memset(&su, 0, sizeof(su));
+ su.sun_family = AF_UNIX;
+ strncpy (su.sun_path, notify_socket, sizeof(su.sun_path) -1);
+ su.sun_path[sizeof(su.sun_path) - 1] = '\0';
+
+ if (notify_socket[0] == '@')
+ su.sun_path[0] = '\0';
+
+ memset(&iov, 0, sizeof(iov));
+ iov.iov_base = "READY=1";
+ iov.iov_len = strlen("READY=1");
+
+ memset(&hdr, 0, sizeof(hdr));
+ hdr.msg_name = &su;
+ hdr.msg_namelen = offsetof(struct sockaddr_un, sun_path) +
+ strlen(notify_socket);
+ hdr.msg_iov = &iov;
+ hdr.msg_iovlen = 1;
+
+ unsetenv("NOTIFY_SOCKET");
+#ifdef HAVE_MSG_NOSIGNAL
+ sendto_flags |= MSG_NOSIGNAL;
+#endif
+ if (sendmsg(fd, &hdr, sendto_flags) < 0) {
+ redisLog(REDIS_WARNING, "Can't send notification to systemd");
+ close(fd);
+ return 0;
+ }
+ close(fd);
+ return 1;
+}
+
+int redisIsSupervised(int mode) {
+ if (mode == REDIS_SUPERVISED_AUTODETECT) {
+ const char *upstart_job = getenv("UPSTART_JOB");
+ const char *notify_socket = getenv("NOTIFY_SOCKET");
+
+ if (upstart_job) {
+ redisSupervisedUpstart();
+ } else if (notify_socket) {
+ redisSupervisedSystemd();
+ }
+ } else if (mode == REDIS_SUPERVISED_UPSTART) {
+ return redisSupervisedUpstart();
+ } else if (mode == REDIS_SUPERVISED_SYSTEMD) {
+ return redisSupervisedSystemd();
+ }
+
+ return 0;
+}
+
+
int main(int argc, char **argv) {
struct timeval tv;
+#ifdef REDIS_TEST
+ if (argc == 3 && !strcasecmp(argv[1], "test")) {
+ if (!strcasecmp(argv[2], "ziplist")) {
+ return ziplistTest(argc, argv);
+ } else if (!strcasecmp(argv[2], "quicklist")) {
+ quicklistTest(argc, argv);
+ } else if (!strcasecmp(argv[2], "intset")) {
+ return intsetTest(argc, argv);
+ } else if (!strcasecmp(argv[2], "zipmap")) {
+ return zipmapTest(argc, argv);
+ } else if (!strcasecmp(argv[2], "sha1test")) {
+ return sha1Test(argc, argv);
+ } else if (!strcasecmp(argv[2], "util")) {
+ return utilTest(argc, argv);
+ } else if (!strcasecmp(argv[2], "sds")) {
+ return sdsTest(argc, argv);
+ } else if (!strcasecmp(argv[2], "endianconv")) {
+ return endianconvTest(argc, argv);
+ } else if (!strcasecmp(argv[2], "crc64")) {
+ return crc64Test(argc, argv);
+ }
+
+ return -1; /* test not found */
+ }
+#endif
+
/* We need to initialize our libraries, and the server configuration. */
#ifdef INIT_SETPROCTITLE_REPLACEMENT
spt_init(argc, argv);
@@ -3545,6 +3735,12 @@ int main(int argc, char **argv) {
initSentinel();
}
+ /* Check if we need to start in redis-check-rdb mode. We just execute
+ * the program main. However the program is part of the Redis executable
+ * so that we can easily execute an RDB check on loading errors. */
+ if (strstr(argv[0],"redis-check-rdb") != NULL)
+ exit(redis_check_rdb_main(argv,argc));
+
if (argc >= 2) {
int j = 1; /* First option to parse in argv[] */
sds options = sdsempty();
@@ -3576,6 +3772,11 @@ int main(int argc, char **argv) {
while(j != argc) {
if (argv[j][0] == '-' && argv[j][1] == '-') {
/* Option name */
+ if (!strcmp(argv[j], "--check-rdb")) {
+ /* Argument has no options, need to skip for parsing. */
+ j++;
+ continue;
+ }
if (sdslen(options)) options = sdscat(options,"\n");
options = sdscat(options,argv[j]+2);
options = sdscat(options," ");
@@ -3600,9 +3801,13 @@ int main(int argc, char **argv) {
} else {
redisLog(REDIS_WARNING, "Warning: no config file specified, using the default config. In order to specify a config file use %s /path/to/%s.conf", argv[0], server.sentinel_mode ? "sentinel" : "redis");
}
- if (server.daemonize) daemonize();
+
+ server.supervised = redisIsSupervised(server.supervised_mode);
+ int background = server.daemonize && !server.supervised;
+ if (background) daemonize();
+
initServer();
- if (server.daemonize) createPidFile();
+ if (background || server.pidfile) createPidFile();
redisSetProcTitle(argv[0]);
redisAsciiArt();
@@ -3612,6 +3817,7 @@ int main(int argc, char **argv) {
#ifdef __linux__
linuxMemoryWarnings();
#endif
+ checkTcpBacklogSettings();
loadDataFromDisk();
if (server.cluster_enabled) {
if (verifyClusterConfigWithData() == REDIS_ERR) {
diff --git a/src/redis.h b/src/redis.h
index 855ae5742..2170c5d29 100644
--- a/src/redis.h
+++ b/src/redis.h
@@ -32,10 +32,7 @@
#include "fmacros.h"
#include "config.h"
-
-#if defined(__sun)
#include "solarisfixes.h"
-#endif
#include <stdio.h>
#include <stdlib.h>
@@ -65,6 +62,13 @@ typedef long long mstime_t; /* millisecond time type. */
#include "util.h" /* Misc functions useful in many places */
#include "latency.h" /* Latency monitor API */
#include "sparkline.h" /* ASII graphs API */
+#include "quicklist.h"
+
+/* Following includes allow test functions to be called from Redis main() */
+#include "zipmap.h"
+#include "sha1.h"
+#include "endianconv.h"
+#include "crc64.h"
/* Error codes */
#define REDIS_OK 0
@@ -97,7 +101,6 @@ typedef long long mstime_t; /* millisecond time type. */
#define REDIS_REPL_PING_SLAVE_PERIOD 10
#define REDIS_RUN_ID_SIZE 40
#define REDIS_EOF_MARK_SIZE 40
-#define REDIS_OPS_SEC_SAMPLES 16
#define REDIS_DEFAULT_REPL_BACKLOG_SIZE (1024*1024) /* 1mb */
#define REDIS_DEFAULT_REPL_BACKLOG_TIME_LIMIT (60*60) /* 1 hour */
#define REDIS_REPL_BACKLOG_MIN_SIZE (1024*16) /* 16k */
@@ -128,7 +131,7 @@ typedef long long mstime_t; /* millisecond time type. */
#define REDIS_DEFAULT_AOF_REWRITE_INCREMENTAL_FSYNC 1
#define REDIS_DEFAULT_MIN_SLAVES_TO_WRITE 0
#define REDIS_DEFAULT_MIN_SLAVES_MAX_LAG 10
-#define REDIS_IP_STR_LEN INET6_ADDRSTRLEN
+#define REDIS_IP_STR_LEN 46 /* INET6_ADDRSTRLEN is 46, but we need to be sure */
#define REDIS_PEER_ID_LEN (REDIS_IP_STR_LEN+32) /* Must be enough for ip:port */
#define REDIS_BINDADDR_MAX 16
#define REDIS_MIN_RESERVED_FDS 32
@@ -140,6 +143,13 @@ typedef long long mstime_t; /* millisecond time type. */
#define ACTIVE_EXPIRE_CYCLE_SLOW 0
#define ACTIVE_EXPIRE_CYCLE_FAST 1
+/* Instantaneous metrics tracking. */
+#define REDIS_METRIC_SAMPLES 16 /* Number of samples per metric. */
+#define REDIS_METRIC_COMMAND 0 /* Number of commands executed. */
+#define REDIS_METRIC_NET_INPUT 1 /* Bytes read to network .*/
+#define REDIS_METRIC_NET_OUTPUT 2 /* Bytes written to network. */
+#define REDIS_METRIC_COUNT 3
+
/* Protocol and I/O related defines */
#define REDIS_MAX_QUERYBUF_LEN (1024*1024*1024) /* 1GB max query buffer. */
#define REDIS_IOBUF_LEN (1024*16) /* Generic I/O buffer size */
@@ -192,6 +202,7 @@ typedef long long mstime_t; /* millisecond time type. */
#define REDIS_ENCODING_INTSET 6 /* Encoded as intset */
#define REDIS_ENCODING_SKIPLIST 7 /* Encoded as skiplist */
#define REDIS_ENCODING_EMBSTR 8 /* Embedded sds string encoding */
+#define REDIS_ENCODING_QUICKLIST 9 /* Encoded as linked list of ziplists */
/* Defines related to the dump file format. To store 32 bits lengths for short
* keys requires a lot of space, so we check the most significant 2 bits of
@@ -302,6 +313,12 @@ typedef long long mstime_t; /* millisecond time type. */
#define REDIS_LOG_RAW (1<<10) /* Modifier to log without timestamp */
#define REDIS_DEFAULT_VERBOSITY REDIS_NOTICE
+/* Supervision options */
+#define REDIS_SUPERVISED_NONE 0
+#define REDIS_SUPERVISED_AUTODETECT 1
+#define REDIS_SUPERVISED_SYSTEMD 2
+#define REDIS_SUPERVISED_UPSTART 3
+
/* Anti-warning macro... */
#define REDIS_NOTUSED(V) ((void) V)
@@ -317,12 +334,14 @@ typedef long long mstime_t; /* millisecond time type. */
/* Zip structure related defaults */
#define REDIS_HASH_MAX_ZIPLIST_ENTRIES 512
#define REDIS_HASH_MAX_ZIPLIST_VALUE 64
-#define REDIS_LIST_MAX_ZIPLIST_ENTRIES 512
-#define REDIS_LIST_MAX_ZIPLIST_VALUE 64
#define REDIS_SET_MAX_INTSET_ENTRIES 512
#define REDIS_ZSET_MAX_ZIPLIST_ENTRIES 128
#define REDIS_ZSET_MAX_ZIPLIST_VALUE 64
+/* List defaults */
+#define REDIS_LIST_MAX_ZIPLIST_SIZE -2
+#define REDIS_LIST_COMPRESS_DEPTH 0
+
/* HyperLogLog defines */
#define REDIS_DEFAULT_HLL_SPARSE_MAX_BYTES 3000
@@ -710,12 +729,16 @@ struct redisServer {
long long slowlog_log_slower_than; /* SLOWLOG time limit (to get logged) */
unsigned long slowlog_max_len; /* SLOWLOG max number of items logged */
size_t resident_set_size; /* RSS sampled in serverCron(). */
- /* The following two are used to track instantaneous "load" in terms
- * of operations per second. */
- long long ops_sec_last_sample_time; /* Timestamp of last sample (in ms) */
- long long ops_sec_last_sample_ops; /* numcommands in last sample */
- long long ops_sec_samples[REDIS_OPS_SEC_SAMPLES];
- int ops_sec_idx;
+ long long stat_net_input_bytes; /* Bytes read from network. */
+ long long stat_net_output_bytes; /* Bytes written to network. */
+ /* The following two are used to track instantaneous metrics, like
+ * number of operations per second, network traffic. */
+ struct {
+ long long last_sample_time; /* Timestamp of last sample in ms */
+ long long last_sample_count;/* Count in last sample */
+ long long samples[REDIS_METRIC_SAMPLES];
+ int idx;
+ } inst_metric[REDIS_METRIC_COUNT];
/* Configuration */
int verbosity; /* Loglevel in redis.conf */
int maxidletime; /* Client timeout in seconds */
@@ -723,6 +746,8 @@ struct redisServer {
int active_expire_enabled; /* Can be disabled for testing purposes. */
size_t client_max_querybuf_len; /* Limit for client query buffer length */
int dbnum; /* Total number of configured DBs */
+ int supervised; /* 1 if supervised, 0 otherwise. */
+ int supervised_mode; /* See REDIS_SUPERVISED_* */
int daemonize; /* True if running as a daemon */
clientBufferLimitsConfig client_obuf_limits[REDIS_CLIENT_TYPE_COUNT];
/* AOF persistence */
@@ -852,12 +877,14 @@ struct redisServer {
/* Zip structure config, see redis.conf for more information */
size_t hash_max_ziplist_entries;
size_t hash_max_ziplist_value;
- size_t list_max_ziplist_entries;
- size_t list_max_ziplist_value;
size_t set_max_intset_entries;
size_t zset_max_ziplist_entries;
size_t zset_max_ziplist_value;
size_t hll_sparse_max_bytes;
+ /* List parameters */
+ int list_max_ziplist_size;
+ int list_compress_depth;
+ /* time cache */
time_t unixtime; /* Unix time sampled every cron cycle. */
long long mstime; /* Like 'unixtime' but with milliseconds resolution. */
/* Pubsub */
@@ -897,6 +924,8 @@ struct redisServer {
int assert_line;
int bug_report_start; /* True if bug report header was already logged. */
int watchdog_period; /* Software watchdog period in ms. 0 = off */
+ /* System hardware info */
+ size_t system_memory_size; /* Total memory in system as reported by OS */
};
typedef struct pubsubPattern {
@@ -945,15 +974,13 @@ typedef struct {
robj *subject;
unsigned char encoding;
unsigned char direction; /* Iteration direction */
- unsigned char *zi;
- listNode *ln;
+ quicklistIter *iter;
} listTypeIterator;
/* Structure for an entry while iterating over a list. */
typedef struct {
listTypeIterator *li;
- unsigned char *zi; /* Entry in ziplist */
- listNode *ln; /* Entry in linked list */
+ quicklistEntry entry; /* Entry in quicklist */
} listTypeEntry;
/* Structure to hold set iteration abstraction. */
@@ -1030,6 +1057,7 @@ void addReplyBulkCBuffer(redisClient *c, void *p, size_t len);
void addReplyBulkLongLong(redisClient *c, long long ll);
void addReply(redisClient *c, robj *obj);
void addReplySds(redisClient *c, sds s);
+void addReplyBulkSds(redisClient *c, sds s);
void addReplyError(redisClient *c, char *err);
void addReplyStatus(redisClient *c, char *status);
void addReplyDouble(redisClient *c, double d);
@@ -1079,7 +1107,7 @@ int listTypeNext(listTypeIterator *li, listTypeEntry *entry);
robj *listTypeGet(listTypeEntry *entry);
void listTypeInsert(listTypeEntry *entry, robj *value, int where);
int listTypeEqual(listTypeEntry *entry, robj *o);
-void listTypeDelete(listTypeEntry *entry);
+void listTypeDelete(listTypeIterator *iter, listTypeEntry *entry);
void listTypeConvert(robj *subject, int enc);
void unblockClientWaitingData(redisClient *c);
void handleClientsBlockedOnLists(void);
@@ -1116,8 +1144,8 @@ robj *tryObjectEncoding(robj *o);
robj *getDecodedObject(robj *o);
size_t stringObjectLen(robj *o);
robj *createStringObjectFromLongLong(long long value);
-robj *createStringObjectFromLongDouble(long double value);
-robj *createListObject(void);
+robj *createStringObjectFromLongDouble(long double value, int humanfriendly);
+robj *createQuicklistObject(void);
robj *createZiplistObject(void);
robj *createSetObject(void);
robj *createIntsetObject(void);
@@ -1244,6 +1272,7 @@ void closeListeningSockets(int unlink_unix_socket);
void updateCachedTime(void);
void resetServerStats(void);
unsigned int getLRUClock(void);
+char *maxmemoryToString(void);
/* Set data type */
robj *setTypeCreate(robj *value);
@@ -1255,6 +1284,7 @@ void setTypeReleaseIterator(setTypeIterator *si);
int setTypeNext(setTypeIterator *si, robj **objele, int64_t *llele);
robj *setTypeNextObject(setTypeIterator *si);
int setTypeRandomElement(robj *setobj, robj **objele, int64_t *llele);
+unsigned long setTypeRandomElements(robj *set, unsigned long count, robj *aux_set);
unsigned long setTypeSize(robj *subject);
void setTypeConvert(robj *subject, int enc);
@@ -1350,6 +1380,10 @@ void sentinelTimer(void);
char *sentinelHandleConfiguration(char **argv, int argc);
void sentinelIsRunning(void);
+/* redis-check-rdb */
+int redis_check_rdb(char *rdbfilename);
+int redis_check_rdb_main(char **argv, int argc);
+
/* Scripting */
void scriptingInit(void);
diff --git a/src/replication.c b/src/replication.c
index c0e833263..7e36c3e99 100644
--- a/src/replication.c
+++ b/src/replication.c
@@ -56,7 +56,7 @@ char *replicationGetSlaveName(redisClient *c) {
buf[0] = '\0';
if (anetPeerToString(c->fd,ip,sizeof(ip),NULL) != -1) {
if (c->slave_listening_port)
- snprintf(buf,sizeof(buf),"%s:%d",ip,c->slave_listening_port);
+ anetFormatAddr(buf,sizeof(buf),ip,c->slave_listening_port);
else
snprintf(buf,sizeof(buf),"%s:<unknown-slave-port>",ip);
} else {
@@ -690,6 +690,7 @@ void sendBulkToSlave(aeEventLoop *el, int fd, void *privdata, int mask) {
freeClient(slave);
return;
}
+ server.stat_net_output_bytes += nwritten;
sdsrange(slave->replpreamble,nwritten,-1);
if (sdslen(slave->replpreamble) == 0) {
sdsfree(slave->replpreamble);
@@ -718,6 +719,7 @@ void sendBulkToSlave(aeEventLoop *el, int fd, void *privdata, int mask) {
return;
}
slave->repldboff += nwritten;
+ server.stat_net_output_bytes += nwritten;
if (slave->repldboff == slave->repldbsize) {
close(slave->repldbfd);
slave->repldbfd = -1;
@@ -938,6 +940,7 @@ void readSyncBulkPayload(aeEventLoop *el, int fd, void *privdata, int mask) {
replicationAbortSyncTransfer();
return;
}
+ server.stat_net_input_bytes += nread;
/* When a mark is used, we want to detect EOF asap in order to avoid
* writing the EOF mark into the file... */
diff --git a/src/scripting.c b/src/scripting.c
index 39bfe5fa7..c5dd4e718 100644
--- a/src/scripting.c
+++ b/src/scripting.c
@@ -30,6 +30,7 @@
#include "redis.h"
#include "sha1.h"
#include "rand.h"
+#include "cluster.h"
#include <lua.h>
#include <lauxlib.h>
@@ -213,11 +214,27 @@ int luaRedisGenericCommand(lua_State *lua, int raise_error) {
static int argv_size = 0;
static robj *cached_objects[LUA_CMD_OBJCACHE_SIZE];
static size_t cached_objects_len[LUA_CMD_OBJCACHE_SIZE];
+ static int inuse = 0; /* Recursive calls detection. */
+
+ /* By using Lua debug hooks it is possible to trigger a recursive call
+ * to luaRedisGenericCommand(), which normally should never happen.
+ * To make this function reentrant is futile and makes it slower, but
+ * we should at least detect such a misuse, and abort. */
+ if (inuse) {
+ char *recursion_warning =
+ "luaRedisGenericCommand() recursive call detected. "
+ "Are you doing funny stuff with Lua debug hooks?";
+ redisLog(REDIS_WARNING,"%s",recursion_warning);
+ luaPushError(lua,recursion_warning);
+ return 1;
+ }
+ inuse++;
/* Require at least one argument */
if (argc == 0) {
luaPushError(lua,
"Please specify at least one argument for redis.call()");
+ inuse--;
return 1;
}
@@ -272,6 +289,7 @@ int luaRedisGenericCommand(lua_State *lua, int raise_error) {
}
luaPushError(lua,
"Lua redis() command arguments must be strings or integers");
+ inuse--;
return 1;
}
@@ -291,6 +309,7 @@ int luaRedisGenericCommand(lua_State *lua, int raise_error) {
luaPushError(lua,"Unknown Redis command called from Lua script");
goto cleanup;
}
+ c->cmd = cmd;
/* There are commands that are not allowed inside scripts. */
if (cmd->flags & REDIS_CMD_NOSCRIPT) {
@@ -337,8 +356,23 @@ int luaRedisGenericCommand(lua_State *lua, int raise_error) {
if (cmd->flags & REDIS_CMD_RANDOM) server.lua_random_dirty = 1;
if (cmd->flags & REDIS_CMD_WRITE) server.lua_write_dirty = 1;
+ /* If this is a Redis Cluster node, we need to make sure Lua is not
+ * trying to access non-local keys. */
+ if (server.cluster_enabled) {
+ /* Duplicate relevant flags in the lua client. */
+ c->flags &= ~(REDIS_READONLY|REDIS_ASKING);
+ c->flags |= server.lua_caller->flags & (REDIS_READONLY|REDIS_ASKING);
+ if (getNodeByQuery(c,c->cmd,c->argv,c->argc,NULL,NULL) !=
+ server.cluster->myself)
+ {
+ luaPushError(lua,
+ "Lua script attempted to access a non local key in a "
+ "cluster node");
+ goto cleanup;
+ }
+ }
+
/* Run the command */
- c->cmd = cmd;
call(c,REDIS_CALL_SLOWLOG | REDIS_CALL_STATS);
/* Convert the result of the Redis command into a suitable Lua type.
@@ -409,8 +443,10 @@ cleanup:
* return the plain error. */
lua_pushstring(lua,"err");
lua_gettable(lua,-2);
+ inuse--;
return lua_error(lua);
}
+ inuse--;
return 1;
}
diff --git a/src/sds.c b/src/sds.c
index 0ad925b4a..05ee0ad56 100644
--- a/src/sds.c
+++ b/src/sds.c
@@ -295,7 +295,7 @@ sds sdscpy(sds s, const char *t) {
* conversion. 's' must point to a string with room for at least
* SDS_LLSTR_SIZE bytes.
*
- * The function returns the lenght of the null-terminated string
+ * The function returns the length of the null-terminated string
* representation stored at 's'. */
#define SDS_LLSTR_SIZE 21
int sdsll2str(char *s, long long value) {
@@ -369,7 +369,7 @@ sds sdsfromlonglong(long long value) {
return sdsnewlen(buf,len);
}
-/* Like sdscatpritf() but gets va_list instead of being variadic. */
+/* Like sdscatprintf() but gets va_list instead of being variadic. */
sds sdscatvprintf(sds s, const char *fmt, va_list ap) {
va_list cpy;
char staticbuf[1024], *buf = staticbuf, *t;
@@ -390,7 +390,7 @@ sds sdscatvprintf(sds s, const char *fmt, va_list ap) {
buf[buflen-2] = '\0';
va_copy(cpy,ap);
vsnprintf(buf, buflen, fmt, cpy);
- va_end(ap);
+ va_end(cpy);
if (buf[buflen-2] != '\0') {
if (buf != staticbuf) zfree(buf);
buflen *= 2;
@@ -415,7 +415,7 @@ sds sdscatvprintf(sds s, const char *fmt, va_list ap) {
*
* Example:
*
- * s = sdsempty("Sum is: ");
+ * s = sdsnew("Sum is: ");
* s = sdscatprintf(s,"%d+%d = %d",a,b,a+b).
*
* Often you need to create a string from scratch with the printf-alike
@@ -570,7 +570,7 @@ sds sdstrim(sds s, const char *cset) {
sp = start = s;
ep = end = s+sdslen(s)-1;
while(sp <= end && strchr(cset, *sp)) sp++;
- while(ep > start && strchr(cset, *ep)) ep--;
+ while(ep > sp && strchr(cset, *ep)) ep--;
len = (sp > ep) ? 0 : ((ep-sp)+1);
if (sh->buf != sp) memmove(sh->buf, sp, len);
sh->buf[len] = '\0';
@@ -643,8 +643,8 @@ void sdstoupper(sds s) {
*
* Return value:
*
- * 1 if s1 > s2.
- * -1 if s1 < s2.
+ * positive if s1 > s2.
+ * negative if s1 < s2.
* 0 if s1 and s2 are exactly the same binary string.
*
* If two strings share exactly the same prefix, but one of the two has
@@ -962,12 +962,15 @@ sds sdsjoin(char **argv, int argc, char *sep) {
return join;
}
-#ifdef SDS_TEST_MAIN
+#if defined(REDIS_TEST) || defined(SDS_TEST_MAIN)
#include <stdio.h>
#include "testhelp.h"
#include "limits.h"
-int main(void) {
+#define UNUSED(x) (void)(x)
+int sdsTest(int argc, char *argv[]) {
+ UNUSED(argc);
+ UNUSED(argv);
{
struct sdshdr *sh;
sds x = sdsnew("foo"), y;
@@ -1014,6 +1017,18 @@ int main(void) {
memcmp(x,"--4294967295,18446744073709551615--",35) == 0)
sdsfree(x);
+ x = sdsnew(" x ");
+ sdstrim(x," x");
+ test_cond("sdstrim() works when all chars match",
+ sdslen(x) == 0)
+
+ sdsfree(x);
+ x = sdsnew(" x ");
+ sdstrim(x," ");
+ test_cond("sdstrim() works when a single char remains",
+ sdslen(x) == 1 && x[0] == 'x')
+
+ sdsfree(x);
x = sdsnew("xxciaoyyy");
sdstrim(x,"xy");
test_cond("sdstrim() correctly trims characters",
@@ -1080,7 +1095,7 @@ int main(void) {
memcmp(y,"\"\\a\\n\\x00foo\\r\"",15) == 0)
{
- int oldfree;
+ unsigned int oldfree;
sdsfree(x);
x = sdsnew("0");
@@ -1101,3 +1116,9 @@ int main(void) {
return 0;
}
#endif
+
+#ifdef SDS_TEST_MAIN
+int main(void) {
+ return sdsTest();
+}
+#endif
diff --git a/src/sds.h b/src/sds.h
index 37aaf7a28..93dd4f28e 100644
--- a/src/sds.h
+++ b/src/sds.h
@@ -98,4 +98,8 @@ void sdsIncrLen(sds s, int incr);
sds sdsRemoveFreeSpace(sds s);
size_t sdsAllocSize(sds s);
+#ifdef REDIS_TEST
+int sdsTest(int argc, char *argv[]);
+#endif
+
#endif
diff --git a/src/sentinel.c b/src/sentinel.c
index 8e78a2263..c693a5862 100644
--- a/src/sentinel.c
+++ b/src/sentinel.c
@@ -190,6 +190,7 @@ typedef struct sentinelRedisInstance {
* are set to NULL no script is executed. */
char *notification_script;
char *client_reconfig_script;
+ sds info; /* cached INFO output */
} sentinelRedisInstance;
/* Main state. */
@@ -576,7 +577,7 @@ void sentinelEvent(int level, char *type, sentinelRedisInstance *ri,
if (level == REDIS_WARNING && ri != NULL) {
sentinelRedisInstance *master = (ri->flags & SRI_MASTER) ?
ri : ri->master;
- if (master->notification_script) {
+ if (master && master->notification_script) {
sentinelScheduleScriptExecution(master->notification_script,
type,msg,NULL);
}
@@ -896,7 +897,7 @@ sentinelRedisInstance *createSentinelRedisInstance(char *name, int flags, char *
sentinelRedisInstance *ri;
sentinelAddr *addr;
dict *table = NULL;
- char slavename[128], *sdsname;
+ char slavename[REDIS_PEER_ID_LEN], *sdsname;
redisAssert(flags & (SRI_MASTER|SRI_SLAVE|SRI_SENTINEL));
redisAssert((flags & SRI_MASTER) || master != NULL);
@@ -907,9 +908,7 @@ sentinelRedisInstance *createSentinelRedisInstance(char *name, int flags, char *
/* For slaves and sentinel we use ip:port as name. */
if (flags & (SRI_SLAVE|SRI_SENTINEL)) {
- snprintf(slavename,sizeof(slavename),
- strchr(hostname,':') ? "[%s]:%d" : "%s:%d",
- hostname,port);
+ anetFormatAddr(slavename, sizeof(slavename), hostname, port);
name = slavename;
}
@@ -983,6 +982,7 @@ sentinelRedisInstance *createSentinelRedisInstance(char *name, int flags, char *
ri->promoted_slave = NULL;
ri->notification_script = NULL;
ri->client_reconfig_script = NULL;
+ ri->info = NULL;
/* Role */
ri->role_reported = ri->flags & (SRI_MASTER|SRI_SLAVE);
@@ -1015,6 +1015,7 @@ void releaseSentinelRedisInstance(sentinelRedisInstance *ri) {
sdsfree(ri->slave_master_host);
sdsfree(ri->leader);
sdsfree(ri->auth_pass);
+ sdsfree(ri->info);
releaseSentinelAddr(ri->addr);
/* Clear state into the master if needed. */
@@ -1030,11 +1031,11 @@ sentinelRedisInstance *sentinelRedisInstanceLookupSlave(
{
sds key;
sentinelRedisInstance *slave;
+ char buf[REDIS_PEER_ID_LEN];
redisAssert(ri->flags & SRI_MASTER);
- key = sdscatprintf(sdsempty(),
- strchr(ip,':') ? "[%s]:%d" : "%s:%d",
- ip,port);
+ anetFormatAddr(buf,sizeof(buf),ip,port);
+ key = sdsnew(buf);
slave = dictFetchValue(ri->slaves,key);
sdsfree(key);
return slave;
@@ -1785,6 +1786,10 @@ void sentinelRefreshInstanceInfo(sentinelRedisInstance *ri, const char *info) {
int numlines, j;
int role = 0;
+ /* cache full INFO output for instance */
+ sdsfree(ri->info);
+ ri->info = sdsnew(info);
+
/* The following fields must be reset to a given value in the case they
* are not found at all in the INFO output. */
ri->master_link_down_time = 0;
@@ -2777,6 +2782,67 @@ void sentinelCommand(redisClient *c) {
} else if (!strcasecmp(c->argv[1]->ptr,"set")) {
if (c->argc < 3 || c->argc % 2 == 0) goto numargserr;
sentinelSetCommand(c);
+ } else if (!strcasecmp(c->argv[1]->ptr,"info-cache")) {
+ if (c->argc < 2) goto numargserr;
+ mstime_t now = mstime();
+
+ /* Create an ad-hoc dictionary type so that we can iterate
+ * a dictionary composed of just the master groups the user
+ * requested. */
+ dictType copy_keeper = instancesDictType;
+ copy_keeper.valDestructor = NULL;
+ dict *masters_local = sentinel.masters;
+ if (c->argc > 2) {
+ masters_local = dictCreate(&copy_keeper, NULL);
+
+ for (int i = 2; i < c->argc; i++) {
+ sentinelRedisInstance *ri;
+ ri = sentinelGetMasterByName(c->argv[i]->ptr);
+ if (!ri) continue; /* ignore non-existing names */
+ dictAdd(masters_local, ri->name, ri);
+ }
+ }
+
+ /* Reply format:
+ * 1.) master name
+ * 2.) 1.) info from master
+ * 2.) info from replica
+ * ...
+ * 3.) other master name
+ * ...
+ */
+ addReplyMultiBulkLen(c,dictSize(masters_local) * 2);
+
+ dictIterator *di;
+ dictEntry *de;
+ di = dictGetIterator(masters_local);
+ while ((de = dictNext(di)) != NULL) {
+ sentinelRedisInstance *ri = dictGetVal(de);
+ addReplyBulkCBuffer(c,ri->name,strlen(ri->name));
+ addReplyMultiBulkLen(c,dictSize(ri->slaves) + 1); /* +1 for self */
+ addReplyMultiBulkLen(c,2);
+ addReplyLongLong(c, now - ri->info_refresh);
+ if (ri->info)
+ addReplyBulkCBuffer(c,ri->info,sdslen(ri->info));
+ else
+ addReply(c,shared.nullbulk);
+
+ dictIterator *sdi;
+ dictEntry *sde;
+ sdi = dictGetIterator(ri->slaves);
+ while ((sde = dictNext(sdi)) != NULL) {
+ sentinelRedisInstance *sri = dictGetVal(sde);
+ addReplyMultiBulkLen(c,2);
+ addReplyLongLong(c, now - sri->info_refresh);
+ if (sri->info)
+ addReplyBulkCBuffer(c,sri->info,sdslen(sri->info));
+ else
+ addReply(c,shared.nullbulk);
+ }
+ dictReleaseIterator(sdi);
+ }
+ dictReleaseIterator(di);
+ if (masters_local != sentinel.masters) dictRelease(masters_local);
} else {
addReplyErrorFormat(c,"Unknown sentinel subcommand '%s'",
(char*)c->argv[1]->ptr);
@@ -2842,10 +2908,7 @@ void sentinelInfoCommand(redisClient *c) {
dictReleaseIterator(di);
}
- addReplySds(c,sdscatprintf(sdsempty(),"$%lu\r\n",
- (unsigned long)sdslen(info)));
- addReplySds(c,info);
- addReply(c,shared.crlf);
+ addReplyBulkSds(c, info);
}
/* Implements Sentinel verison of the ROLE command. The output is
diff --git a/src/sha1.c b/src/sha1.c
index 59e6f461d..7f73b40d3 100644
--- a/src/sha1.c
+++ b/src/sha1.c
@@ -24,9 +24,7 @@ A million repetitions of "a"
#include <stdio.h>
#include <string.h>
#include <sys/types.h> /* for u_int*_t */
-#if defined(__sun)
#include "solarisfixes.h"
-#endif
#include "sha1.h"
#include "config.h"
@@ -199,16 +197,19 @@ void SHA1Final(unsigned char digest[20], SHA1_CTX* context)
}
/* ================ end of sha1.c ================ */
-#if 0
+#ifdef REDIS_TEST
#define BUFSIZE 4096
-int
-main(int argc, char **argv)
+#define UNUSED(x) (void)(x)
+int sha1Test(int argc, char **argv)
{
SHA1_CTX ctx;
unsigned char hash[20], buf[BUFSIZE];
int i;
+ UNUSED(argc);
+ UNUSED(argv);
+
for(i=0;i<BUFSIZE;i++)
buf[i] = i;
@@ -223,6 +224,4 @@ main(int argc, char **argv)
printf("\n");
return 0;
}
-
#endif
-
diff --git a/src/sha1.h b/src/sha1.h
index 9d6f12965..4c76d19da 100644
--- a/src/sha1.h
+++ b/src/sha1.h
@@ -1,3 +1,5 @@
+#ifndef SHA1_H
+#define SHA1_H
/* ================ sha1.h ================ */
/*
SHA-1 in C
@@ -15,3 +17,8 @@ void SHA1Transform(u_int32_t state[5], const unsigned char buffer[64]);
void SHA1Init(SHA1_CTX* context);
void SHA1Update(SHA1_CTX* context, const unsigned char* data, u_int32_t len);
void SHA1Final(unsigned char digest[20], SHA1_CTX* context);
+
+#ifdef REDIS_TEST
+int sha1Test(int argc, char **argv);
+#endif
+#endif
diff --git a/src/solarisfixes.h b/src/solarisfixes.h
index 23025257a..3e53ba67c 100644
--- a/src/solarisfixes.h
+++ b/src/solarisfixes.h
@@ -28,6 +28,8 @@
* POSSIBILITY OF SUCH DAMAGE.
*/
+#if defined(__sun)
+
#if defined(__GNUC__)
#include <math.h>
#undef isnan
@@ -48,3 +50,5 @@
#define u_int uint
#define u_int32_t uint32_t
#endif /* __GNUC__ */
+
+#endif /* __sun */
diff --git a/src/sort.c b/src/sort.c
index fedf0cf3a..74b27cb67 100644
--- a/src/sort.c
+++ b/src/sort.c
@@ -220,7 +220,7 @@ void sortCommand(redisClient *c) {
if (sortval)
incrRefCount(sortval);
else
- sortval = createListObject();
+ sortval = createQuicklistObject();
/* The SORT command has an SQL-alike syntax, parse it */
while(j < c->argc) {
@@ -285,16 +285,15 @@ void sortCommand(redisClient *c) {
return;
}
- /* For the STORE option, or when SORT is called from a Lua script,
- * we want to force a specific ordering even when no explicit ordering
- * was asked (SORT BY nosort). This guarantees that replication / AOF
- * is deterministic.
+ /* When sorting a set with no sort specified, we must sort the output
+ * so the result is consistent across scripting and replication.
*
- * However in the case 'dontsort' is true, but the type to sort is a
- * sorted set, we don't need to do anything as ordering is guaranteed
- * in this special case. */
- if ((storekey || c->flags & REDIS_LUA_CLIENT) &&
- (dontsort && sortval->type != REDIS_ZSET))
+ * The other types (list, sorted set) will retain their native order
+ * even if no sort order is requested, so they remain stable across
+ * scripting and replication. */
+ if (dontsort &&
+ sortval->type == REDIS_SET &&
+ (storekey || c->flags & REDIS_LUA_CLIENT))
{
/* Force ALPHA sorting */
dontsort = 0;
@@ -421,6 +420,7 @@ void sortCommand(redisClient *c) {
} else {
redisPanic("Unknown type");
}
+ printf("j: %d; vectorlen: %d\n", j, vectorlen);
redisAssertWithInfo(c,sortval,j == vectorlen);
/* Now it's time to load the right scores in the sorting vector */
@@ -510,7 +510,7 @@ void sortCommand(redisClient *c) {
}
}
} else {
- robj *sobj = createZiplistObject();
+ robj *sobj = createQuicklistObject();
/* STORE option specified, set the sorting result as a List object */
for (j = start; j <= end; j++) {
diff --git a/src/sparkline.c b/src/sparkline.c
index 900f26ab7..8e2764aee 100644
--- a/src/sparkline.c
+++ b/src/sparkline.c
@@ -49,7 +49,7 @@ static int label_margin_top = 1;
* sparklineSequenceAddSample(seq, 10, NULL);
* sparklineSequenceAddSample(seq, 20, NULL);
* sparklineSequenceAddSample(seq, 30, "last sample label");
- * sds output = sparklineRender(seq, 80, 4);
+ * sds output = sparklineRender(sdsempty(), seq, 80, 4, SPARKLINE_FILL);
* freeSparklineSequence(seq);
* ------------------------------------------------------------------------- */
@@ -63,6 +63,7 @@ struct sequence *createSparklineSequence(void) {
/* Add a new sample into a sequence. */
void sparklineSequenceAddSample(struct sequence *seq, double value, char *label) {
+ label = (label == NULL || label[0] == '\0') ? NULL : zstrdup(label);
if (seq->length == 0) {
seq->min = seq->max = value;
} else {
diff --git a/src/t_hash.c b/src/t_hash.c
index f5ceb36e9..7f33bba0c 100644
--- a/src/t_hash.c
+++ b/src/t_hash.c
@@ -565,7 +565,7 @@ void hincrbyfloatCommand(redisClient *c) {
}
value += incr;
- new = createStringObjectFromLongDouble(value);
+ new = createStringObjectFromLongDouble(value,1);
hashTypeTryObjectEncoding(o,&c->argv[2],NULL);
hashTypeSet(o,c->argv[2],new);
addReplyBulk(c,new);
diff --git a/src/t_list.c b/src/t_list.c
index 7c79185fd..232cb5c52 100644
--- a/src/t_list.c
+++ b/src/t_list.c
@@ -33,75 +33,37 @@
* List API
*----------------------------------------------------------------------------*/
-/* Check the argument length to see if it requires us to convert the ziplist
- * to a real list. Only check raw-encoded objects because integer encoded
- * objects are never too long. */
-void listTypeTryConversion(robj *subject, robj *value) {
- if (subject->encoding != REDIS_ENCODING_ZIPLIST) return;
- if (sdsEncodedObject(value) &&
- sdslen(value->ptr) > server.list_max_ziplist_value)
- listTypeConvert(subject,REDIS_ENCODING_LINKEDLIST);
-}
-
/* The function pushes an element to the specified list object 'subject',
* at head or tail position as specified by 'where'.
*
* There is no need for the caller to increment the refcount of 'value' as
* the function takes care of it if needed. */
void listTypePush(robj *subject, robj *value, int where) {
- /* Check if we need to convert the ziplist */
- listTypeTryConversion(subject,value);
- if (subject->encoding == REDIS_ENCODING_ZIPLIST &&
- ziplistLen(subject->ptr) >= server.list_max_ziplist_entries)
- listTypeConvert(subject,REDIS_ENCODING_LINKEDLIST);
-
- if (subject->encoding == REDIS_ENCODING_ZIPLIST) {
- int pos = (where == REDIS_HEAD) ? ZIPLIST_HEAD : ZIPLIST_TAIL;
+ if (subject->encoding == REDIS_ENCODING_QUICKLIST) {
+ int pos = (where == REDIS_HEAD) ? QUICKLIST_HEAD : QUICKLIST_TAIL;
value = getDecodedObject(value);
- subject->ptr = ziplistPush(subject->ptr,value->ptr,sdslen(value->ptr),pos);
+ size_t len = sdslen(value->ptr);
+ quicklistPush(subject->ptr, value->ptr, len, pos);
decrRefCount(value);
- } else if (subject->encoding == REDIS_ENCODING_LINKEDLIST) {
- if (where == REDIS_HEAD) {
- listAddNodeHead(subject->ptr,value);
- } else {
- listAddNodeTail(subject->ptr,value);
- }
- incrRefCount(value);
} else {
redisPanic("Unknown list encoding");
}
}
+void *listPopSaver(unsigned char *data, unsigned int sz) {
+ return createStringObject((char*)data,sz);
+}
+
robj *listTypePop(robj *subject, int where) {
+ long long vlong;
robj *value = NULL;
- if (subject->encoding == REDIS_ENCODING_ZIPLIST) {
- unsigned char *p;
- unsigned char *vstr;
- unsigned int vlen;
- long long vlong;
- int pos = (where == REDIS_HEAD) ? 0 : -1;
- p = ziplistIndex(subject->ptr,pos);
- if (ziplistGet(p,&vstr,&vlen,&vlong)) {
- if (vstr) {
- value = createStringObject((char*)vstr,vlen);
- } else {
+
+ int ql_where = where == REDIS_HEAD ? QUICKLIST_HEAD : QUICKLIST_TAIL;
+ if (subject->encoding == REDIS_ENCODING_QUICKLIST) {
+ if (quicklistPopCustom(subject->ptr, ql_where, (unsigned char **)&value,
+ NULL, &vlong, listPopSaver)) {
+ if (!value)
value = createStringObjectFromLongLong(vlong);
- }
- /* We only need to delete an element when it exists */
- subject->ptr = ziplistDelete(subject->ptr,&p);
- }
- } else if (subject->encoding == REDIS_ENCODING_LINKEDLIST) {
- list *list = subject->ptr;
- listNode *ln;
- if (where == REDIS_HEAD) {
- ln = listFirst(list);
- } else {
- ln = listLast(list);
- }
- if (ln != NULL) {
- value = listNodeValue(ln);
- incrRefCount(value);
- listDelNode(list,ln);
}
} else {
redisPanic("Unknown list encoding");
@@ -110,25 +72,28 @@ robj *listTypePop(robj *subject, int where) {
}
unsigned long listTypeLength(robj *subject) {
- if (subject->encoding == REDIS_ENCODING_ZIPLIST) {
- return ziplistLen(subject->ptr);
- } else if (subject->encoding == REDIS_ENCODING_LINKEDLIST) {
- return listLength((list*)subject->ptr);
+ if (subject->encoding == REDIS_ENCODING_QUICKLIST) {
+ return quicklistCount(subject->ptr);
} else {
redisPanic("Unknown list encoding");
}
}
/* Initialize an iterator at the specified index. */
-listTypeIterator *listTypeInitIterator(robj *subject, long index, unsigned char direction) {
+listTypeIterator *listTypeInitIterator(robj *subject, long index,
+ unsigned char direction) {
listTypeIterator *li = zmalloc(sizeof(listTypeIterator));
li->subject = subject;
li->encoding = subject->encoding;
li->direction = direction;
- if (li->encoding == REDIS_ENCODING_ZIPLIST) {
- li->zi = ziplistIndex(subject->ptr,index);
- } else if (li->encoding == REDIS_ENCODING_LINKEDLIST) {
- li->ln = listIndex(subject->ptr,index);
+ li->iter = NULL;
+ /* REDIS_HEAD means start at TAIL and move *towards* head.
+ * REDIS_TAIL means start at HEAD and move *towards tail. */
+ int iter_direction =
+ direction == REDIS_HEAD ? AL_START_TAIL : AL_START_HEAD;
+ if (li->encoding == REDIS_ENCODING_QUICKLIST) {
+ li->iter = quicklistGetIteratorAtIdx(li->subject->ptr,
+ iter_direction, index);
} else {
redisPanic("Unknown list encoding");
}
@@ -137,6 +102,7 @@ listTypeIterator *listTypeInitIterator(robj *subject, long index, unsigned char
/* Clean up the iterator. */
void listTypeReleaseIterator(listTypeIterator *li) {
+ zfree(li->iter);
zfree(li);
}
@@ -148,24 +114,8 @@ int listTypeNext(listTypeIterator *li, listTypeEntry *entry) {
redisAssert(li->subject->encoding == li->encoding);
entry->li = li;
- if (li->encoding == REDIS_ENCODING_ZIPLIST) {
- entry->zi = li->zi;
- if (entry->zi != NULL) {
- if (li->direction == REDIS_TAIL)
- li->zi = ziplistNext(li->subject->ptr,li->zi);
- else
- li->zi = ziplistPrev(li->subject->ptr,li->zi);
- return 1;
- }
- } else if (li->encoding == REDIS_ENCODING_LINKEDLIST) {
- entry->ln = li->ln;
- if (entry->ln != NULL) {
- if (li->direction == REDIS_TAIL)
- li->ln = li->ln->next;
- else
- li->ln = li->ln->prev;
- return 1;
- }
+ if (li->encoding == REDIS_ENCODING_QUICKLIST) {
+ return quicklistNext(li->iter, &entry->entry);
} else {
redisPanic("Unknown list encoding");
}
@@ -174,24 +124,14 @@ int listTypeNext(listTypeIterator *li, listTypeEntry *entry) {
/* Return entry or NULL at the current position of the iterator. */
robj *listTypeGet(listTypeEntry *entry) {
- listTypeIterator *li = entry->li;
robj *value = NULL;
- if (li->encoding == REDIS_ENCODING_ZIPLIST) {
- unsigned char *vstr;
- unsigned int vlen;
- long long vlong;
- redisAssert(entry->zi != NULL);
- if (ziplistGet(entry->zi,&vstr,&vlen,&vlong)) {
- if (vstr) {
- value = createStringObject((char*)vstr,vlen);
- } else {
- value = createStringObjectFromLongLong(vlong);
- }
+ if (entry->li->encoding == REDIS_ENCODING_QUICKLIST) {
+ if (entry->entry.value) {
+ value = createStringObject((char *)entry->entry.value,
+ entry->entry.sz);
+ } else {
+ value = createStringObjectFromLongLong(entry->entry.longval);
}
- } else if (li->encoding == REDIS_ENCODING_LINKEDLIST) {
- redisAssert(entry->ln != NULL);
- value = listNodeValue(entry->ln);
- incrRefCount(value);
} else {
redisPanic("Unknown list encoding");
}
@@ -199,30 +139,18 @@ robj *listTypeGet(listTypeEntry *entry) {
}
void listTypeInsert(listTypeEntry *entry, robj *value, int where) {
- robj *subject = entry->li->subject;
- if (entry->li->encoding == REDIS_ENCODING_ZIPLIST) {
+ if (entry->li->encoding == REDIS_ENCODING_QUICKLIST) {
value = getDecodedObject(value);
+ sds str = value->ptr;
+ size_t len = sdslen(str);
if (where == REDIS_TAIL) {
- unsigned char *next = ziplistNext(subject->ptr,entry->zi);
-
- /* When we insert after the current element, but the current element
- * is the tail of the list, we need to do a push. */
- if (next == NULL) {
- subject->ptr = ziplistPush(subject->ptr,value->ptr,sdslen(value->ptr),REDIS_TAIL);
- } else {
- subject->ptr = ziplistInsert(subject->ptr,next,value->ptr,sdslen(value->ptr));
- }
- } else {
- subject->ptr = ziplistInsert(subject->ptr,entry->zi,value->ptr,sdslen(value->ptr));
+ quicklistInsertAfter((quicklist *)entry->entry.quicklist,
+ &entry->entry, str, len);
+ } else if (where == REDIS_HEAD) {
+ quicklistInsertBefore((quicklist *)entry->entry.quicklist,
+ &entry->entry, str, len);
}
decrRefCount(value);
- } else if (entry->li->encoding == REDIS_ENCODING_LINKEDLIST) {
- if (where == REDIS_TAIL) {
- listInsertNode(subject->ptr,entry->ln,value,AL_START_TAIL);
- } else {
- listInsertNode(subject->ptr,entry->ln,value,AL_START_HEAD);
- }
- incrRefCount(value);
} else {
redisPanic("Unknown list encoding");
}
@@ -230,59 +158,33 @@ void listTypeInsert(listTypeEntry *entry, robj *value, int where) {
/* Compare the given object with the entry at the current position. */
int listTypeEqual(listTypeEntry *entry, robj *o) {
- listTypeIterator *li = entry->li;
- if (li->encoding == REDIS_ENCODING_ZIPLIST) {
+ if (entry->li->encoding == REDIS_ENCODING_QUICKLIST) {
redisAssertWithInfo(NULL,o,sdsEncodedObject(o));
- return ziplistCompare(entry->zi,o->ptr,sdslen(o->ptr));
- } else if (li->encoding == REDIS_ENCODING_LINKEDLIST) {
- return equalStringObjects(o,listNodeValue(entry->ln));
+ return quicklistCompare(entry->entry.zi,o->ptr,sdslen(o->ptr));
} else {
redisPanic("Unknown list encoding");
}
}
/* Delete the element pointed to. */
-void listTypeDelete(listTypeEntry *entry) {
- listTypeIterator *li = entry->li;
- if (li->encoding == REDIS_ENCODING_ZIPLIST) {
- unsigned char *p = entry->zi;
- li->subject->ptr = ziplistDelete(li->subject->ptr,&p);
-
- /* Update position of the iterator depending on the direction */
- if (li->direction == REDIS_TAIL)
- li->zi = p;
- else
- li->zi = ziplistPrev(li->subject->ptr,p);
- } else if (entry->li->encoding == REDIS_ENCODING_LINKEDLIST) {
- listNode *next;
- if (li->direction == REDIS_TAIL)
- next = entry->ln->next;
- else
- next = entry->ln->prev;
- listDelNode(li->subject->ptr,entry->ln);
- li->ln = next;
+void listTypeDelete(listTypeIterator *iter, listTypeEntry *entry) {
+ if (entry->li->encoding == REDIS_ENCODING_QUICKLIST) {
+ quicklistDelEntry(iter->iter, &entry->entry);
} else {
redisPanic("Unknown list encoding");
}
}
+/* Create a quicklist from a single ziplist */
void listTypeConvert(robj *subject, int enc) {
- listTypeIterator *li;
- listTypeEntry entry;
- redisAssertWithInfo(NULL,subject,subject->type == REDIS_LIST);
-
- if (enc == REDIS_ENCODING_LINKEDLIST) {
- list *l = listCreate();
- listSetFreeMethod(l,decrRefCountVoid);
-
- /* listTypeGet returns a robj with incremented refcount */
- li = listTypeInitIterator(subject,0,REDIS_TAIL);
- while (listTypeNext(li,&entry)) listAddNodeTail(l,listTypeGet(&entry));
- listTypeReleaseIterator(li);
-
- subject->encoding = REDIS_ENCODING_LINKEDLIST;
- zfree(subject->ptr);
- subject->ptr = l;
+ redisAssertWithInfo(NULL,subject,subject->type==REDIS_LIST);
+ redisAssertWithInfo(NULL,subject,subject->encoding==REDIS_ENCODING_ZIPLIST);
+
+ if (enc == REDIS_ENCODING_QUICKLIST) {
+ size_t zlen = server.list_max_ziplist_size;
+ int depth = server.list_compress_depth;
+ subject->ptr = quicklistCreateFromZiplist(zlen, depth, subject->ptr);
+ subject->encoding = REDIS_ENCODING_QUICKLIST;
} else {
redisPanic("Unsupported list conversion");
}
@@ -304,7 +206,9 @@ void pushGenericCommand(redisClient *c, int where) {
for (j = 2; j < c->argc; j++) {
c->argv[j] = tryObjectEncoding(c->argv[j]);
if (!lobj) {
- lobj = createZiplistObject();
+ lobj = createQuicklistObject();
+ quicklistSetOptions(lobj->ptr, server.list_max_ziplist_size,
+ server.list_compress_depth);
dbAdd(c->db,c->argv[1],lobj);
}
listTypePush(lobj,c->argv[j],where);
@@ -334,17 +238,10 @@ void pushxGenericCommand(redisClient *c, robj *refval, robj *val, int where) {
listTypeEntry entry;
int inserted = 0;
- if ((subject = lookupKeyReadOrReply(c,c->argv[1],shared.czero)) == NULL ||
+ if ((subject = lookupKeyWriteOrReply(c,c->argv[1],shared.czero)) == NULL ||
checkType(c,subject,REDIS_LIST)) return;
if (refval != NULL) {
- /* We're not sure if this value can be inserted yet, but we cannot
- * convert the list inside the iterator. We don't want to loop over
- * the list twice (once to see if the value can be inserted and once
- * to do the actual insert), so we assume this value can be inserted
- * and convert the ziplist to a regular list if necessary. */
- listTypeTryConversion(subject,val);
-
/* Seek refval from head to tail */
iter = listTypeInitIterator(subject,0,REDIS_TAIL);
while (listTypeNext(iter,&entry)) {
@@ -357,10 +254,6 @@ void pushxGenericCommand(redisClient *c, robj *refval, robj *val, int where) {
listTypeReleaseIterator(iter);
if (inserted) {
- /* Check if the length exceeds the ziplist length threshold. */
- if (subject->encoding == REDIS_ENCODING_ZIPLIST &&
- ziplistLen(subject->ptr) > server.list_max_ziplist_entries)
- listTypeConvert(subject,REDIS_ENCODING_LINKEDLIST);
signalModifiedKey(c->db,c->argv[1]);
notifyKeyspaceEvent(REDIS_NOTIFY_LIST,"linsert",
c->argv[1],c->db->id);
@@ -418,31 +311,19 @@ void lindexCommand(redisClient *c) {
if ((getLongFromObjectOrReply(c, c->argv[2], &index, NULL) != REDIS_OK))
return;
- if (o->encoding == REDIS_ENCODING_ZIPLIST) {
- unsigned char *p;
- unsigned char *vstr;
- unsigned int vlen;
- long long vlong;
- p = ziplistIndex(o->ptr,index);
- if (ziplistGet(p,&vstr,&vlen,&vlong)) {
- if (vstr) {
- value = createStringObject((char*)vstr,vlen);
+ if (o->encoding == REDIS_ENCODING_QUICKLIST) {
+ quicklistEntry entry;
+ if (quicklistIndex(o->ptr, index, &entry)) {
+ if (entry.value) {
+ value = createStringObject((char*)entry.value,entry.sz);
} else {
- value = createStringObjectFromLongLong(vlong);
+ value = createStringObjectFromLongLong(entry.longval);
}
addReplyBulk(c,value);
decrRefCount(value);
} else {
addReply(c,shared.nullbulk);
}
- } else if (o->encoding == REDIS_ENCODING_LINKEDLIST) {
- listNode *ln = listIndex(o->ptr,index);
- if (ln != NULL) {
- value = listNodeValue(ln);
- addReplyBulk(c,value);
- } else {
- addReply(c,shared.nullbulk);
- }
} else {
redisPanic("Unknown list encoding");
}
@@ -452,35 +333,18 @@ void lsetCommand(redisClient *c) {
robj *o = lookupKeyWriteOrReply(c,c->argv[1],shared.nokeyerr);
if (o == NULL || checkType(c,o,REDIS_LIST)) return;
long index;
- robj *value = (c->argv[3] = tryObjectEncoding(c->argv[3]));
+ robj *value = c->argv[3];
if ((getLongFromObjectOrReply(c, c->argv[2], &index, NULL) != REDIS_OK))
return;
- listTypeTryConversion(o,value);
- if (o->encoding == REDIS_ENCODING_ZIPLIST) {
- unsigned char *p, *zl = o->ptr;
- p = ziplistIndex(zl,index);
- if (p == NULL) {
+ if (o->encoding == REDIS_ENCODING_QUICKLIST) {
+ quicklist *ql = o->ptr;
+ int replaced = quicklistReplaceAtIndex(ql, index,
+ value->ptr, sdslen(value->ptr));
+ if (!replaced) {
addReply(c,shared.outofrangeerr);
} else {
- o->ptr = ziplistDelete(o->ptr,&p);
- value = getDecodedObject(value);
- o->ptr = ziplistInsert(o->ptr,p,value->ptr,sdslen(value->ptr));
- decrRefCount(value);
- addReply(c,shared.ok);
- signalModifiedKey(c->db,c->argv[1]);
- notifyKeyspaceEvent(REDIS_NOTIFY_LIST,"lset",c->argv[1],c->db->id);
- server.dirty++;
- }
- } else if (o->encoding == REDIS_ENCODING_LINKEDLIST) {
- listNode *ln = listIndex(o->ptr,index);
- if (ln == NULL) {
- addReply(c,shared.outofrangeerr);
- } else {
- decrRefCount((robj*)listNodeValue(ln));
- listNodeValue(ln) = value;
- incrRefCount(value);
addReply(c,shared.ok);
signalModifiedKey(c->db,c->argv[1]);
notifyKeyspaceEvent(REDIS_NOTIFY_LIST,"lset",c->argv[1],c->db->id);
@@ -549,43 +413,28 @@ void lrangeCommand(redisClient *c) {
/* Return the result in form of a multi-bulk reply */
addReplyMultiBulkLen(c,rangelen);
- if (o->encoding == REDIS_ENCODING_ZIPLIST) {
- unsigned char *p = ziplistIndex(o->ptr,start);
- unsigned char *vstr;
- unsigned int vlen;
- long long vlong;
+ if (o->encoding == REDIS_ENCODING_QUICKLIST) {
+ listTypeIterator *iter = listTypeInitIterator(o, start, REDIS_TAIL);
while(rangelen--) {
- ziplistGet(p,&vstr,&vlen,&vlong);
- if (vstr) {
- addReplyBulkCBuffer(c,vstr,vlen);
+ listTypeEntry entry;
+ listTypeNext(iter, &entry);
+ quicklistEntry *qe = &entry.entry;
+ if (qe->value) {
+ addReplyBulkCBuffer(c,qe->value,qe->sz);
} else {
- addReplyBulkLongLong(c,vlong);
+ addReplyBulkLongLong(c,qe->longval);
}
- p = ziplistNext(o->ptr,p);
- }
- } else if (o->encoding == REDIS_ENCODING_LINKEDLIST) {
- listNode *ln;
-
- /* If we are nearest to the end of the list, reach the element
- * starting from tail and going backward, as it is faster. */
- if (start > llen/2) start -= llen;
- ln = listIndex(o->ptr,start);
-
- while(rangelen--) {
- addReplyBulk(c,ln->value);
- ln = ln->next;
}
+ listTypeReleaseIterator(iter);
} else {
- redisPanic("List encoding is not LINKEDLIST nor ZIPLIST!");
+ redisPanic("List encoding is not QUICKLIST!");
}
}
void ltrimCommand(redisClient *c) {
robj *o;
- long start, end, llen, j, ltrim, rtrim;
- list *list;
- listNode *ln;
+ long start, end, llen, ltrim, rtrim;
if ((getLongFromObjectOrReply(c, c->argv[2], &start, NULL) != REDIS_OK) ||
(getLongFromObjectOrReply(c, c->argv[3], &end, NULL) != REDIS_OK)) return;
@@ -612,19 +461,9 @@ void ltrimCommand(redisClient *c) {
}
/* Remove list elements to perform the trim */
- if (o->encoding == REDIS_ENCODING_ZIPLIST) {
- o->ptr = ziplistDeleteRange(o->ptr,0,ltrim);
- o->ptr = ziplistDeleteRange(o->ptr,-rtrim,rtrim);
- } else if (o->encoding == REDIS_ENCODING_LINKEDLIST) {
- list = o->ptr;
- for (j = 0; j < ltrim; j++) {
- ln = listFirst(list);
- listDelNode(list,ln);
- }
- for (j = 0; j < rtrim; j++) {
- ln = listLast(list);
- listDelNode(list,ln);
- }
+ if (o->encoding == REDIS_ENCODING_QUICKLIST) {
+ quicklistDelRange(o->ptr,0,ltrim);
+ quicklistDelRange(o->ptr,-rtrim,rtrim);
} else {
redisPanic("Unknown list encoding");
}
@@ -641,10 +480,9 @@ void ltrimCommand(redisClient *c) {
void lremCommand(redisClient *c) {
robj *subject, *obj;
- obj = c->argv[3] = tryObjectEncoding(c->argv[3]);
+ obj = c->argv[3];
long toremove;
long removed = 0;
- listTypeEntry entry;
if ((getLongFromObjectOrReply(c, c->argv[2], &toremove, NULL) != REDIS_OK))
return;
@@ -652,10 +490,6 @@ void lremCommand(redisClient *c) {
subject = lookupKeyWriteOrReply(c,c->argv[1],shared.czero);
if (subject == NULL || checkType(c,subject,REDIS_LIST)) return;
- /* Make sure obj is raw when we're dealing with a ziplist */
- if (subject->encoding == REDIS_ENCODING_ZIPLIST)
- obj = getDecodedObject(obj);
-
listTypeIterator *li;
if (toremove < 0) {
toremove = -toremove;
@@ -664,9 +498,10 @@ void lremCommand(redisClient *c) {
li = listTypeInitIterator(subject,0,REDIS_TAIL);
}
+ listTypeEntry entry;
while (listTypeNext(li,&entry)) {
if (listTypeEqual(&entry,obj)) {
- listTypeDelete(&entry);
+ listTypeDelete(li, &entry);
server.dirty++;
removed++;
if (toremove && removed == toremove) break;
@@ -674,11 +509,10 @@ void lremCommand(redisClient *c) {
}
listTypeReleaseIterator(li);
- /* Clean up raw encoded object */
- if (subject->encoding == REDIS_ENCODING_ZIPLIST)
- decrRefCount(obj);
+ if (listTypeLength(subject) == 0) {
+ dbDelete(c->db,c->argv[1]);
+ }
- if (listTypeLength(subject) == 0) dbDelete(c->db,c->argv[1]);
addReplyLongLong(c,removed);
if (removed) signalModifiedKey(c->db,c->argv[1]);
}
@@ -702,7 +536,9 @@ void lremCommand(redisClient *c) {
void rpoplpushHandlePush(redisClient *c, robj *dstkey, robj *dstobj, robj *value) {
/* Create the list if the key does not exist */
if (!dstobj) {
- dstobj = createZiplistObject();
+ dstobj = createQuicklistObject();
+ quicklistSetOptions(dstobj->ptr, server.list_max_ziplist_size,
+ server.list_compress_depth);
dbAdd(c->db,dstkey,dstobj);
}
signalModifiedKey(c->db,dstkey);
@@ -1010,7 +846,9 @@ void handleClientsBlockedOnLists(void) {
}
}
- if (listTypeLength(o) == 0) dbDelete(rl->db,rl->key);
+ if (listTypeLength(o) == 0) {
+ dbDelete(rl->db,rl->key);
+ }
/* We don't call signalModifiedKey() as it was already called
* when an element was pushed on the list. */
}
diff --git a/src/t_set.c b/src/t_set.c
index c530d6923..f3f8bbaca 100644
--- a/src/t_set.c
+++ b/src/t_set.c
@@ -33,7 +33,8 @@
* Set Commands
*----------------------------------------------------------------------------*/
-void sunionDiffGenericCommand(redisClient *c, robj **setkeys, int setnum, robj *dstkey, int op);
+void sunionDiffGenericCommand(redisClient *c, robj **setkeys, int setnum,
+ robj *dstkey, int op);
/* Factory method to return a set that *can* hold "value". When the object has
* an integer-encodable value, an intset will be returned. Otherwise a regular
@@ -68,7 +69,8 @@ int setTypeAdd(robj *subject, robj *value) {
/* The set *was* an intset and this value is not integer
* encodable, so dictAdd should always work. */
- redisAssertWithInfo(NULL,value,dictAdd(subject->ptr,value,NULL) == DICT_OK);
+ redisAssertWithInfo(NULL,value,
+ dictAdd(subject->ptr,value,NULL) == DICT_OK);
incrRefCount(value);
return 1;
}
@@ -205,6 +207,106 @@ int setTypeRandomElement(robj *setobj, robj **objele, int64_t *llele) {
return setobj->encoding;
}
+/* Return a number of random elements from a non empty set.
+ *
+ * This is a version of setTypeRandomElement() that is modified in order to
+ * return multiple entries, using dictGetRandomKeys() and intsetRandomMembers().
+ *
+ * The elements are stored into 'aux_set' which should be of a set type.
+ *
+ * The function returns the number of items stored into 'aux_set', that may
+ * be less than 'count' if the hash table has less than 'count' elements
+ * inside.
+ *
+ * Note that this function is not suitable when you need a good distribution
+ * of the returned items, but only when you need to "sample" a given number
+ * of continuous elements to run some kind of algorithm or to produce
+ * statistics. However the function is much faster than setTypeRandomElement()
+ * at producing N elements, and the elements are guaranteed to be non
+ * repeating.
+ */
+unsigned long setTypeRandomElements(robj *set, unsigned long count,
+ robj *aux_set) {
+ unsigned long set_size;
+ unsigned long elements_to_return = count;
+ unsigned long elements_copied = 0;
+ unsigned long current_element = 0;
+
+ /* Like all setType* functions, we assume good behavior on part of the
+ * caller, so no extra parameter checks are made. */
+
+ /* If the number of elements in the the set is less than the count
+ * requested, just return all of them. */
+ set_size = setTypeSize(set);
+ if (set_size < count) {
+ elements_to_return = set_size;
+ }
+
+ /* TODO: It is definitely faster adding items to the set by directly
+ * handling the Dict or intset inside it, avoiding the constant encoding
+ * checks inside setTypeAdd(). However, We don't want to touch the set
+ * internals in non setType* functions. So, we just call setTypeAdd()
+ * multiple times, but this isn't an optimal solution.
+ * Another option would be to create a bulk-add function:
+ * setTypeAddBulk(). */
+ if (set->encoding == REDIS_ENCODING_HT) {
+ /* Allocate result array */
+ dictEntry **random_elements =
+ zmalloc(sizeof(dictEntry*) * elements_to_return);
+
+ /* Get the random elements */
+ elements_copied =
+ dictGetRandomKeys(set->ptr, random_elements, elements_to_return);
+ redisAssert(elements_copied == elements_to_return);
+
+ /* Put them into the set */
+ for (current_element = 0; current_element < elements_copied;
+ current_element++) {
+
+ /* We get the key and duplicate it, as we know it is a string */
+ setTypeAdd(aux_set,
+ dictGetKey(random_elements[current_element]));
+ }
+
+ zfree(random_elements);
+
+ } else if (set->encoding == REDIS_ENCODING_INTSET) {
+ /* Allocate result array */
+ int64_t *random_elements =
+ zmalloc(sizeof(int64_t) * elements_to_return);
+ robj* element_as_str = NULL;
+
+ elements_copied =
+ intsetRandomMembers((intset*) set->ptr,
+ random_elements,
+ elements_to_return);
+
+ redisAssert(elements_copied == elements_to_return);
+
+ /* Put them into the set */
+ for (current_element = 0; current_element < elements_copied;
+ current_element++) {
+
+ element_as_str = createStringObjectFromLongLong(
+ random_elements[current_element]);
+
+ /* Put the values in the set */
+ setTypeAdd(aux_set,
+ element_as_str);
+
+ decrRefCount(element_as_str);
+ }
+
+ zfree(random_elements);
+ } else {
+ redisPanic("Unknown set encoding");
+ }
+
+ /* We have a set with random elements. Return the actual elements in
+ the aux_set. */
+ return elements_copied;
+}
+
unsigned long setTypeSize(robj *subject) {
if (subject->encoding == REDIS_ENCODING_HT) {
return dictSize((dict*)subject->ptr);
@@ -235,7 +337,8 @@ void setTypeConvert(robj *setobj, int enc) {
si = setTypeInitIterator(setobj);
while (setTypeNext(si,NULL,&intele) != -1) {
element = createStringObjectFromLongLong(intele);
- redisAssertWithInfo(NULL,element,dictAdd(d,element,NULL) == DICT_OK);
+ redisAssertWithInfo(NULL,element,
+ dictAdd(d,element,NULL) == DICT_OK);
}
setTypeReleaseIterator(si);
@@ -377,15 +480,147 @@ void scardCommand(redisClient *c) {
addReplyLongLong(c,setTypeSize(o));
}
+/* handle the "SPOP key <count>" variant. The normal version of the
+ * command is handled by the spopCommand() function itself. */
+
+void spopWithCountCommand(redisClient *c) {
+ long l;
+ unsigned long count, size;
+ unsigned long elements_returned;
+ robj *set, *aux, *aux_set;
+ int64_t llele;
+
+ /* Get the count argument */
+ if (getLongFromObjectOrReply(c,c->argv[2],&l,NULL) != REDIS_OK) return;
+ if (l >= 0) {
+ count = (unsigned) l;
+ } else {
+ addReply(c,shared.outofrangeerr);
+ return;
+ }
+
+ /* Make sure a key with the name inputted exists, and that it's type is
+ * indeed a set. Otherwise, return nil */
+ if ((set = lookupKeyReadOrReply(c,c->argv[1],shared.emptymultibulk))
+ == NULL || checkType(c,set,REDIS_SET)) return;
+
+ /* If count is zero, serve an empty multibulk ASAP to avoid special
+ * cases later. */
+ if (count == 0) {
+ addReply(c,shared.emptymultibulk);
+ return;
+ }
+
+ /* Get the size of the set. It is always > 0, as empty sets get
+ * deleted. */
+ size = setTypeSize(set);
+
+ /* Generate an SPOP keyspace notification */
+ notifyKeyspaceEvent(REDIS_NOTIFY_SET,"spop",c->argv[1],c->db->id);
+
+ /* CASE 1:
+ * The number of requested elements is greater than or equal to
+ * the number of elements inside the set: simply return the whole set. */
+ if (count >= size) {
+
+ /* We just return the entire set */
+ sunionDiffGenericCommand(c,c->argv+1,1,NULL,REDIS_OP_UNION);
+
+ /* Delete the set as it is now empty */
+ dbDelete(c->db,c->argv[1]);
+ notifyKeyspaceEvent(REDIS_NOTIFY_GENERIC,"del",c->argv[1],c->db->id);
+
+ /* Replicate/AOF this command as an SREM operation */
+ aux = createStringObject("DEL",3);
+ rewriteClientCommandVector(c,2,aux,c->argv[1]);
+ decrRefCount(aux);
+
+ return;
+ }
+
+ /* CASE 2:
+ * The number of requested elements is less than the number
+ * of elements inside the set. */
+
+ /* We need an auxiliary set. Optimistically, we create a set using an
+ * Intset internally. */
+ aux = createStringObjectFromLongLong(0);
+ aux_set = setTypeCreate(aux);
+ decrRefCount(aux);
+
+ /* Get the count requested of random elements from the set into our
+ * auxiliary set. */
+ elements_returned = setTypeRandomElements(set, count, aux_set);
+ redisAssert(elements_returned == count);
+
+ {
+ setTypeIterator *si;
+ robj *objele;
+ int element_encoding;
+
+ addReplyMultiBulkLen(c, elements_returned);
+
+ /* Replicate/AOF this command as an SREM operation */
+ aux = createStringObject("SREM",4);
+
+ si = setTypeInitIterator(aux_set);
+ while ((element_encoding = setTypeNext(si, &objele, &llele)) != -1) {
+ if (element_encoding == REDIS_ENCODING_HT) {
+
+ addReplyBulk(c, objele);
+
+ /* Replicate/AOF this command as an SREM commands */
+ rewriteClientCommandVector(c, 3, aux, c->argv[1], objele);
+ setTypeRemove(set, objele);
+ }
+ else if (element_encoding == REDIS_ENCODING_INTSET) {
+ /* TODO: setTypeRemove() forces us to convert all of the ints
+ * to string... isn't there a nicer way to do this? */
+ objele = createStringObjectFromLongLong(llele);
+ addReplyBulk(c, objele);
+
+ /* Replicate/AOF this command as an SREM commands */
+ rewriteClientCommandVector(c, 3, aux, c->argv[1], objele);
+ setTypeRemove(set, objele);
+
+ /* We created it, we kill it. */
+ decrRefCount(objele);
+ }
+ else {
+ redisPanic("Unknown set encoding");
+ }
+ }
+ setTypeReleaseIterator(si);
+
+ decrRefCount(aux);
+ }
+
+ /* Free the auxiliary set - we need it no more. */
+ decrRefCount(aux_set);
+}
+
void spopCommand(redisClient *c) {
robj *set, *ele, *aux;
int64_t llele;
int encoding;
+ if (c->argc == 3) {
+ spopWithCountCommand(c);
+ return;
+ } else if (c->argc > 3) {
+ addReply(c,shared.syntaxerr);
+ return;
+ }
+
+ /* Make sure a key with the name inputted exists, and that it's type is
+ * indeed a set */
if ((set = lookupKeyWriteOrReply(c,c->argv[1],shared.nullbulk)) == NULL ||
checkType(c,set,REDIS_SET)) return;
+ /* Get a random element from the set */
encoding = setTypeRandomElement(set,&ele,&llele);
+
+ /* Remove the element from the set */
if (encoding == REDIS_ENCODING_INTSET) {
ele = createStringObjectFromLongLong(llele);
set->ptr = intsetRemove(set->ptr,llele,NULL);
@@ -393,6 +628,7 @@ void spopCommand(redisClient *c) {
incrRefCount(ele);
setTypeRemove(set,ele);
}
+
notifyKeyspaceEvent(REDIS_NOTIFY_SET,"spop",c->argv[1],c->db->id);
/* Replicate/AOF this command as an SREM operation */
@@ -401,11 +637,16 @@ void spopCommand(redisClient *c) {
decrRefCount(ele);
decrRefCount(aux);
+ /* Add the element to the reply */
addReplyBulk(c,ele);
+
+ /* Delete the set if it's empty */
if (setTypeSize(set) == 0) {
dbDelete(c->db,c->argv[1]);
notifyKeyspaceEvent(REDIS_NOTIFY_GENERIC,"del",c->argv[1],c->db->id);
}
+
+ /* Set has been modified */
signalModifiedKey(c->db,c->argv[1]);
server.dirty++;
}
@@ -587,7 +828,8 @@ int qsortCompareSetsByRevCardinality(const void *s1, const void *s2) {
return (o2 ? setTypeSize(o2) : 0) - (o1 ? setTypeSize(o1) : 0);
}
-void sinterGenericCommand(redisClient *c, robj **setkeys, unsigned long setnum, robj *dstkey) {
+void sinterGenericCommand(redisClient *c, robj **setkeys,
+ unsigned long setnum, robj *dstkey) {
robj **sets = zmalloc(sizeof(robj*)*setnum);
setTypeIterator *si;
robj *eleobj, *dstset = NULL;
@@ -734,7 +976,8 @@ void sinterstoreCommand(redisClient *c) {
#define REDIS_OP_DIFF 1
#define REDIS_OP_INTER 2
-void sunionDiffGenericCommand(redisClient *c, robj **setkeys, int setnum, robj *dstkey, int op) {
+void sunionDiffGenericCommand(redisClient *c, robj **setkeys, int setnum,
+ robj *dstkey, int op) {
robj **sets = zmalloc(sizeof(robj*)*setnum);
setTypeIterator *si;
robj *ele, *dstset = NULL;
diff --git a/src/t_string.c b/src/t_string.c
index 067aa10e3..34ab11b51 100644
--- a/src/t_string.c
+++ b/src/t_string.c
@@ -405,7 +405,7 @@ void incrbyfloatCommand(redisClient *c) {
addReplyError(c,"increment would produce NaN or Infinity");
return;
}
- new = createStringObjectFromLongDouble(value);
+ new = createStringObjectFromLongDouble(value,1);
if (o)
dbOverwrite(c->db,c->argv[1],new);
else
diff --git a/src/t_zset.c b/src/t_zset.c
index d3c7214bd..64418c9b4 100644
--- a/src/t_zset.c
+++ b/src/t_zset.c
@@ -1382,7 +1382,7 @@ void zremrangeGenericCommand(redisClient *c, int rangetype) {
robj *key = c->argv[1];
robj *zobj;
int keyremoved = 0;
- unsigned long deleted;
+ unsigned long deleted = 0;
zrangespec range;
zlexrangespec lexrange;
long start, end, llen;
diff --git a/src/util.c b/src/util.c
index 80242ff71..543de112b 100644
--- a/src/util.c
+++ b/src/util.c
@@ -40,6 +40,7 @@
#include <stdint.h>
#include "util.h"
+#include "sha1.h"
/* Glob-style pattern matching. */
int stringmatchlen(const char *pattern, int patternLen,
@@ -428,11 +429,44 @@ int d2string(char *buf, size_t len, double value) {
* having run_id == A, and you reconnect and it has run_id == B, you can be
* sure that it is either a different instance or it was restarted. */
void getRandomHexChars(char *p, unsigned int len) {
- FILE *fp = fopen("/dev/urandom","r");
char *charset = "0123456789abcdef";
unsigned int j;
- if (fp == NULL || fread(p,len,1,fp) == 0) {
+ /* Global state. */
+ static int seed_initialized = 0;
+ static unsigned char seed[20]; /* The SHA1 seed, from /dev/urandom. */
+ static uint64_t counter = 0; /* The counter we hash with the seed. */
+
+ if (!seed_initialized) {
+ /* Initialize a seed and use SHA1 in counter mode, where we hash
+ * the same seed with a progressive counter. For the goals of this
+ * function we just need non-colliding strings, there are no
+ * cryptographic security needs. */
+ FILE *fp = fopen("/dev/urandom","r");
+ if (fp && fread(seed,sizeof(seed),1,fp) == 1)
+ seed_initialized = 1;
+ if (fp) fclose(fp);
+ }
+
+ if (seed_initialized) {
+ while(len) {
+ unsigned char digest[20];
+ SHA1_CTX ctx;
+ unsigned int copylen = len > 20 ? 20 : len;
+
+ SHA1Init(&ctx);
+ SHA1Update(&ctx, seed, sizeof(seed));
+ SHA1Update(&ctx, (unsigned char*)&counter,sizeof(counter));
+ SHA1Final(digest, &ctx);
+ counter++;
+
+ memcpy(p,digest,copylen);
+ /* Convert to hex digits. */
+ for (j = 0; j < copylen; j++) p[j] = charset[p[j] & 0x0F];
+ len -= copylen;
+ p += copylen;
+ }
+ } else {
/* If we can't read from /dev/urandom, do some reasonable effort
* in order to create some entropy, since this function is used to
* generate run_id and cluster instance IDs */
@@ -459,14 +493,12 @@ void getRandomHexChars(char *p, unsigned int len) {
x += sizeof(pid);
}
/* Finally xor it with rand() output, that was already seeded with
- * time() at startup. */
- for (j = 0; j < len; j++)
+ * time() at startup, and convert to hex digits. */
+ for (j = 0; j < len; j++) {
p[j] ^= rand();
+ p[j] = charset[p[j] & 0x0F];
+ }
}
- /* Turn it into hex digits taking just 4 bits out of 8 for every byte. */
- for (j = 0; j < len; j++)
- p[j] = charset[p[j] & 0x0F];
- if (fp) fclose(fp);
}
/* Given the filename, return the absolute path as an SDS string, or NULL
@@ -529,10 +561,10 @@ int pathIsBaseName(char *path) {
return strchr(path,'/') == NULL && strchr(path,'\\') == NULL;
}
-#ifdef UTIL_TEST_MAIN
+#ifdef REDIS_TEST
#include <assert.h>
-void test_string2ll(void) {
+static void test_string2ll(void) {
char buf[32];
long long v;
@@ -587,7 +619,7 @@ void test_string2ll(void) {
assert(string2ll(buf,strlen(buf),&v) == 0);
}
-void test_string2l(void) {
+static void test_string2l(void) {
char buf[32];
long v;
@@ -636,9 +668,55 @@ void test_string2l(void) {
#endif
}
-int main(int argc, char **argv) {
+static void test_ll2string(void) {
+ char buf[32];
+ long long v;
+ int sz;
+
+ v = 0;
+ sz = ll2string(buf, sizeof buf, v);
+ assert(sz == 1);
+ assert(!strcmp(buf, "0"));
+
+ v = -1;
+ sz = ll2string(buf, sizeof buf, v);
+ assert(sz == 2);
+ assert(!strcmp(buf, "-1"));
+
+ v = 99;
+ sz = ll2string(buf, sizeof buf, v);
+ assert(sz == 2);
+ assert(!strcmp(buf, "99"));
+
+ v = -99;
+ sz = ll2string(buf, sizeof buf, v);
+ assert(sz == 3);
+ assert(!strcmp(buf, "-99"));
+
+ v = -2147483648;
+ sz = ll2string(buf, sizeof buf, v);
+ assert(sz == 11);
+ assert(!strcmp(buf, "-2147483648"));
+
+ v = LLONG_MIN;
+ sz = ll2string(buf, sizeof buf, v);
+ assert(sz == 20);
+ assert(!strcmp(buf, "-9223372036854775808"));
+
+ v = LLONG_MAX;
+ sz = ll2string(buf, sizeof buf, v);
+ assert(sz == 19);
+ assert(!strcmp(buf, "9223372036854775807"));
+}
+
+#define UNUSED(x) (void)(x)
+int utilTest(int argc, char **argv) {
+ UNUSED(argc);
+ UNUSED(argv);
+
test_string2ll();
test_string2l();
+ test_ll2string();
return 0;
}
#endif
diff --git a/src/util.h b/src/util.h
index b3667cd6f..666042c9b 100644
--- a/src/util.h
+++ b/src/util.h
@@ -42,4 +42,8 @@ int d2string(char *buf, size_t len, double value);
sds getAbsolutePath(char *filename);
int pathIsBaseName(char *path);
+#ifdef REDIS_TEST
+int utilTest(int argc, char **argv);
+#endif
+
#endif
diff --git a/src/ziplist.c b/src/ziplist.c
index 64a22adfc..7428d30e9 100644
--- a/src/ziplist.c
+++ b/src/ziplist.c
@@ -143,6 +143,7 @@
#define ZIPLIST_TAIL_OFFSET(zl) (*((uint32_t*)((zl)+sizeof(uint32_t))))
#define ZIPLIST_LENGTH(zl) (*((uint16_t*)((zl)+sizeof(uint32_t)*2)))
#define ZIPLIST_HEADER_SIZE (sizeof(uint32_t)*2+sizeof(uint16_t))
+#define ZIPLIST_END_SIZE (sizeof(uint8_t))
#define ZIPLIST_ENTRY_HEAD(zl) ((zl)+ZIPLIST_HEADER_SIZE)
#define ZIPLIST_ENTRY_TAIL(zl) ((zl)+intrev32ifbe(ZIPLIST_TAIL_OFFSET(zl)))
#define ZIPLIST_ENTRY_END(zl) ((zl)+intrev32ifbe(ZIPLIST_BYTES(zl))-1)
@@ -162,6 +163,13 @@ typedef struct zlentry {
unsigned char *p;
} zlentry;
+#define ZIPLIST_ENTRY_ZERO(zle) { \
+ (zle)->prevrawlensize = (zle)->prevrawlen = 0; \
+ (zle)->lensize = (zle)->len = (zle)->headersize = 0; \
+ (zle)->encoding = 0; \
+ (zle)->p = NULL; \
+}
+
/* Extract the encoding from the byte pointed by 'ptr' and set it into
* 'encoding'. */
#define ZIP_ENTRY_ENCODING(ptr, encoding) do { \
@@ -169,6 +177,8 @@ typedef struct zlentry {
if ((encoding) < ZIP_STR_MASK) (encoding) &= ZIP_STR_MASK; \
} while(0)
+void ziplistRepr(unsigned char *zl);
+
/* Return bytes needed to store integer encoded by 'encoding' */
static unsigned int zipIntSize(unsigned char encoding) {
switch(encoding) {
@@ -404,14 +414,12 @@ static int64_t zipLoadInteger(unsigned char *p, unsigned char encoding) {
}
/* Return a struct with all information about an entry. */
-static zlentry zipEntry(unsigned char *p) {
- zlentry e;
-
- ZIP_DECODE_PREVLEN(p, e.prevrawlensize, e.prevrawlen);
- ZIP_DECODE_LENGTH(p + e.prevrawlensize, e.encoding, e.lensize, e.len);
- e.headersize = e.prevrawlensize + e.lensize;
- e.p = p;
- return e;
+static void zipEntry(unsigned char *p, zlentry *e) {
+
+ ZIP_DECODE_PREVLEN(p, e->prevrawlensize, e->prevrawlen);
+ ZIP_DECODE_LENGTH(p + e->prevrawlensize, e->encoding, e->lensize, e->len);
+ e->headersize = e->prevrawlensize + e->lensize;
+ e->p = p;
}
/* Create a new empty ziplist. */
@@ -460,13 +468,13 @@ static unsigned char *__ziplistCascadeUpdate(unsigned char *zl, unsigned char *p
zlentry cur, next;
while (p[0] != ZIP_END) {
- cur = zipEntry(p);
+ zipEntry(p, &cur);
rawlen = cur.headersize + cur.len;
rawlensize = zipPrevEncodeLength(NULL,rawlen);
/* Abort if there is no next entry. */
if (p[rawlen] == ZIP_END) break;
- next = zipEntry(p+rawlen);
+ zipEntry(p+rawlen, &next);
/* Abort when "prevlen" has not changed. */
if (next.prevrawlen == rawlen) break;
@@ -521,7 +529,7 @@ static unsigned char *__ziplistDelete(unsigned char *zl, unsigned char *p, unsig
int nextdiff = 0;
zlentry first, tail;
- first = zipEntry(p);
+ zipEntry(p, &first);
for (i = 0; p[0] != ZIP_END && i < num; i++) {
p += zipRawEntryLength(p);
deleted++;
@@ -545,7 +553,7 @@ static unsigned char *__ziplistDelete(unsigned char *zl, unsigned char *p, unsig
/* When the tail contains more than one entry, we need to take
* "nextdiff" in account as well. Otherwise, a change in the
* size of prevlen doesn't have an effect on the *tail* offset. */
- tail = zipEntry(p);
+ zipEntry(p, &tail);
if (p[tail.headersize+tail.len] != ZIP_END) {
ZIPLIST_TAIL_OFFSET(zl) =
intrev32ifbe(intrev32ifbe(ZIPLIST_TAIL_OFFSET(zl))+nextdiff);
@@ -635,7 +643,7 @@ static unsigned char *__ziplistInsert(unsigned char *zl, unsigned char *p, unsig
/* When the tail contains more than one entry, we need to take
* "nextdiff" in account as well. Otherwise, a change in the
* size of prevlen doesn't have an effect on the *tail* offset. */
- tail = zipEntry(p+reqlen);
+ zipEntry(p+reqlen, &tail);
if (p[reqlen+tail.headersize+tail.len] != ZIP_END) {
ZIPLIST_TAIL_OFFSET(zl) =
intrev32ifbe(intrev32ifbe(ZIPLIST_TAIL_OFFSET(zl))+nextdiff);
@@ -665,6 +673,121 @@ static unsigned char *__ziplistInsert(unsigned char *zl, unsigned char *p, unsig
return zl;
}
+/* Merge ziplists 'first' and 'second' by appending 'second' to 'first'.
+ *
+ * NOTE: The larger ziplist is reallocated to contain the new merged ziplist.
+ * Either 'first' or 'second' can be used for the result. The parameter not
+ * used will be free'd and set to NULL.
+ *
+ * After calling this function, the input parameters are no longer valid since
+ * they are changed and free'd in-place.
+ *
+ * The result ziplist is the contents of 'first' followed by 'second'.
+ *
+ * On failure: returns NULL if the merge is impossible.
+ * On success: returns the merged ziplist (which is expanded version of either
+ * 'first' or 'second', also frees the other unused input ziplist, and sets the
+ * input ziplist argument equal to newly reallocated ziplist return value. */
+unsigned char *ziplistMerge(unsigned char **first, unsigned char **second) {
+ /* If any params are null, we can't merge, so NULL. */
+ if (first == NULL || *first == NULL || second == NULL || *second == NULL)
+ return NULL;
+
+ /* Can't merge same list into itself. */
+ if (*first == *second)
+ return NULL;
+
+ size_t first_bytes = intrev32ifbe(ZIPLIST_BYTES(*first));
+ size_t first_len = intrev16ifbe(ZIPLIST_LENGTH(*first));
+
+ size_t second_bytes = intrev32ifbe(ZIPLIST_BYTES(*second));
+ size_t second_len = intrev16ifbe(ZIPLIST_LENGTH(*second));
+
+ int append;
+ unsigned char *source, *target;
+ size_t target_bytes, source_bytes;
+ /* Pick the largest ziplist so we can resize easily in-place.
+ * We must also track if we are now appending or prepending to
+ * the target ziplist. */
+ if (first_len >= second_len) {
+ /* retain first, append second to first. */
+ target = *first;
+ target_bytes = first_bytes;
+ source = *second;
+ source_bytes = second_bytes;
+ append = 1;
+ } else {
+ /* else, retain second, prepend first to second. */
+ target = *second;
+ target_bytes = second_bytes;
+ source = *first;
+ source_bytes = first_bytes;
+ append = 0;
+ }
+
+ /* Calculate final bytes (subtract one pair of metadata) */
+ size_t zlbytes = first_bytes + second_bytes -
+ ZIPLIST_HEADER_SIZE - ZIPLIST_END_SIZE;
+ size_t zllength = first_len + second_len;
+
+ /* Combined zl length should be limited within UINT16_MAX */
+ zllength = zllength < UINT16_MAX ? zllength : UINT16_MAX;
+
+ /* Save offset positions before we start ripping memory apart. */
+ size_t first_offset = intrev32ifbe(ZIPLIST_TAIL_OFFSET(*first));
+ size_t second_offset = intrev32ifbe(ZIPLIST_TAIL_OFFSET(*second));
+
+ /* Extend target to new zlbytes then append or prepend source. */
+ target = zrealloc(target, zlbytes);
+ if (append) {
+ /* append == appending to target */
+ /* Copy source after target (copying over original [END]):
+ * [TARGET - END, SOURCE - HEADER] */
+ memcpy(target + target_bytes - ZIPLIST_END_SIZE,
+ source + ZIPLIST_HEADER_SIZE,
+ source_bytes - ZIPLIST_HEADER_SIZE);
+ } else {
+ /* !append == prepending to target */
+ /* Move target *contents* exactly size of (source - [END]),
+ * then copy source into vacataed space (source - [END]):
+ * [SOURCE - END, TARGET - HEADER] */
+ memmove(target + source_bytes - ZIPLIST_END_SIZE,
+ target + ZIPLIST_HEADER_SIZE,
+ target_bytes - ZIPLIST_HEADER_SIZE);
+ memcpy(target, source, source_bytes - ZIPLIST_END_SIZE);
+ }
+
+ /* Update header metadata. */
+ ZIPLIST_BYTES(target) = intrev32ifbe(zlbytes);
+ ZIPLIST_LENGTH(target) = intrev16ifbe(zllength);
+ /* New tail offset is:
+ * + N bytes of first ziplist
+ * - 1 byte for [END] of first ziplist
+ * + M bytes for the offset of the original tail of the second ziplist
+ * - J bytes for HEADER because second_offset keeps no header. */
+ ZIPLIST_TAIL_OFFSET(target) = intrev32ifbe(
+ (first_bytes - ZIPLIST_END_SIZE) +
+ (second_offset - ZIPLIST_HEADER_SIZE));
+
+ /* __ziplistCascadeUpdate just fixes the prev length values until it finds a
+ * correct prev length value (then it assumes the rest of the list is okay).
+ * We tell CascadeUpdate to start at the first ziplist's tail element to fix
+ * the merge seam. */
+ target = __ziplistCascadeUpdate(target, target+first_offset);
+
+ /* Now free and NULL out what we didn't realloc */
+ if (append) {
+ zfree(*second);
+ *second = NULL;
+ *first = target;
+ } else {
+ zfree(*first);
+ *first = NULL;
+ *second = target;
+ }
+ return target;
+}
+
unsigned char *ziplistPush(unsigned char *zl, unsigned char *s, unsigned int slen, int where) {
unsigned char *p;
p = (where == ZIPLIST_HEAD) ? ZIPLIST_ENTRY_HEAD(zl) : ZIPLIST_ENTRY_END(zl);
@@ -748,7 +871,7 @@ unsigned int ziplistGet(unsigned char *p, unsigned char **sstr, unsigned int *sl
if (p == NULL || p[0] == ZIP_END) return 0;
if (sstr) *sstr = NULL;
- entry = zipEntry(p);
+ zipEntry(p, &entry);
if (ZIP_IS_STR(entry.encoding)) {
if (sstr) {
*slen = entry.len;
@@ -783,7 +906,7 @@ unsigned char *ziplistDelete(unsigned char *zl, unsigned char **p) {
}
/* Delete a range of entries from the ziplist. */
-unsigned char *ziplistDeleteRange(unsigned char *zl, unsigned int index, unsigned int num) {
+unsigned char *ziplistDeleteRange(unsigned char *zl, int index, unsigned int num) {
unsigned char *p = ziplistIndex(zl,index);
return (p == NULL) ? zl : __ziplistDelete(zl,p,num);
}
@@ -796,7 +919,7 @@ unsigned int ziplistCompare(unsigned char *p, unsigned char *sstr, unsigned int
long long zval, sval;
if (p[0] == ZIP_END) return 0;
- entry = zipEntry(p);
+ zipEntry(p, &entry);
if (ZIP_IS_STR(entry.encoding)) {
/* Raw compare */
if (entry.len == slen) {
@@ -913,7 +1036,7 @@ void ziplistRepr(unsigned char *zl) {
intrev32ifbe(ZIPLIST_TAIL_OFFSET(zl)));
p = ZIPLIST_ENTRY_HEAD(zl);
while(*p != ZIP_END) {
- entry = zipEntry(p);
+ zipEntry(p, &entry);
printf(
"{"
"addr 0x%08lx, "
@@ -952,14 +1075,14 @@ void ziplistRepr(unsigned char *zl) {
printf("{end}\n\n");
}
-#ifdef ZIPLIST_TEST_MAIN
+#ifdef REDIS_TEST
#include <sys/time.h>
#include "adlist.h"
#include "sds.h"
#define debug(f, ...) { if (DEBUG) printf(f, __VA_ARGS__); }
-unsigned char *createList() {
+static unsigned char *createList() {
unsigned char *zl = ziplistNew();
zl = ziplistPush(zl, (unsigned char*)"foo", 3, ZIPLIST_TAIL);
zl = ziplistPush(zl, (unsigned char*)"quux", 4, ZIPLIST_TAIL);
@@ -968,7 +1091,7 @@ unsigned char *createList() {
return zl;
}
-unsigned char *createIntList() {
+static unsigned char *createIntList() {
unsigned char *zl = ziplistNew();
char buf[32];
@@ -987,13 +1110,13 @@ unsigned char *createIntList() {
return zl;
}
-long long usec(void) {
+static long long usec(void) {
struct timeval tv;
gettimeofday(&tv,NULL);
return (((long long)tv.tv_sec)*1000000)+tv.tv_usec;
}
-void stress(int pos, int num, int maxsize, int dnum) {
+static void stress(int pos, int num, int maxsize, int dnum) {
int i,j,k;
unsigned char *zl;
char posstr[2][5] = { "HEAD", "TAIL" };
@@ -1016,7 +1139,7 @@ void stress(int pos, int num, int maxsize, int dnum) {
}
}
-void pop(unsigned char *zl, int where) {
+static unsigned char *pop(unsigned char *zl, int where) {
unsigned char *p, *vstr;
unsigned int vlen;
long long vlong;
@@ -1028,20 +1151,22 @@ void pop(unsigned char *zl, int where) {
else
printf("Pop tail: ");
- if (vstr)
+ if (vstr) {
if (vlen && fwrite(vstr,vlen,1,stdout) == 0) perror("fwrite");
- else
+ }
+ else {
printf("%lld", vlong);
+ }
printf("\n");
- ziplistDeleteRange(zl,-1,1);
+ return ziplistDelete(zl,&p);
} else {
printf("ERROR: Could not pop\n");
exit(1);
}
}
-int randstring(char *target, unsigned int min, unsigned int max) {
+static int randstring(char *target, unsigned int min, unsigned int max) {
int p = 0;
int len = min+rand()%(max-min+1);
int minval, maxval;
@@ -1067,23 +1192,24 @@ int randstring(char *target, unsigned int min, unsigned int max) {
return len;
}
-void verify(unsigned char *zl, zlentry *e) {
- int i;
+static void verify(unsigned char *zl, zlentry *e) {
int len = ziplistLen(zl);
zlentry _e;
- for (i = 0; i < len; i++) {
+ ZIPLIST_ENTRY_ZERO(&_e);
+
+ for (int i = 0; i < len; i++) {
memset(&e[i], 0, sizeof(zlentry));
- e[i] = zipEntry(ziplistIndex(zl, i));
+ zipEntry(ziplistIndex(zl, i), &e[i]);
memset(&_e, 0, sizeof(zlentry));
- _e = zipEntry(ziplistIndex(zl, -len+i));
+ zipEntry(ziplistIndex(zl, -len+i), &_e);
assert(memcmp(&e[i], &_e, sizeof(zlentry)) == 0);
}
}
-int main(int argc, char **argv) {
+int ziplistTest(int argc, char **argv) {
unsigned char *zl, *p;
unsigned char *entry;
unsigned int elen;
@@ -1096,21 +1222,25 @@ int main(int argc, char **argv) {
zl = createIntList();
ziplistRepr(zl);
+ zfree(zl);
+
zl = createList();
ziplistRepr(zl);
- pop(zl,ZIPLIST_TAIL);
+ zl = pop(zl,ZIPLIST_TAIL);
ziplistRepr(zl);
- pop(zl,ZIPLIST_HEAD);
+ zl = pop(zl,ZIPLIST_HEAD);
ziplistRepr(zl);
- pop(zl,ZIPLIST_TAIL);
+ zl = pop(zl,ZIPLIST_TAIL);
ziplistRepr(zl);
- pop(zl,ZIPLIST_TAIL);
+ zl = pop(zl,ZIPLIST_TAIL);
ziplistRepr(zl);
+ zfree(zl);
+
printf("Get element at index 3:\n");
{
zl = createList();
@@ -1126,6 +1256,7 @@ int main(int argc, char **argv) {
printf("%lld\n", value);
}
printf("\n");
+ zfree(zl);
}
printf("Get element at index 4 (out of range):\n");
@@ -1139,6 +1270,7 @@ int main(int argc, char **argv) {
return 1;
}
printf("\n");
+ zfree(zl);
}
printf("Get element at index -1 (last element):\n");
@@ -1156,6 +1288,7 @@ int main(int argc, char **argv) {
printf("%lld\n", value);
}
printf("\n");
+ zfree(zl);
}
printf("Get element at index -4 (first element):\n");
@@ -1173,6 +1306,7 @@ int main(int argc, char **argv) {
printf("%lld\n", value);
}
printf("\n");
+ zfree(zl);
}
printf("Get element at index -5 (reverse out of range):\n");
@@ -1186,6 +1320,7 @@ int main(int argc, char **argv) {
return 1;
}
printf("\n");
+ zfree(zl);
}
printf("Iterate list from 0 to end:\n");
@@ -1203,6 +1338,7 @@ int main(int argc, char **argv) {
printf("\n");
}
printf("\n");
+ zfree(zl);
}
printf("Iterate list from 1 to end:\n");
@@ -1220,6 +1356,7 @@ int main(int argc, char **argv) {
printf("\n");
}
printf("\n");
+ zfree(zl);
}
printf("Iterate list from 2 to end:\n");
@@ -1237,6 +1374,7 @@ int main(int argc, char **argv) {
printf("\n");
}
printf("\n");
+ zfree(zl);
}
printf("Iterate starting out of range:\n");
@@ -1249,6 +1387,7 @@ int main(int argc, char **argv) {
printf("ERROR\n");
}
printf("\n");
+ zfree(zl);
}
printf("Iterate from back to front:\n");
@@ -1266,6 +1405,7 @@ int main(int argc, char **argv) {
printf("\n");
}
printf("\n");
+ zfree(zl);
}
printf("Iterate from back to front, deleting all items:\n");
@@ -1284,6 +1424,7 @@ int main(int argc, char **argv) {
printf("\n");
}
printf("\n");
+ zfree(zl);
}
printf("Delete inclusive range 0,0:\n");
@@ -1291,6 +1432,7 @@ int main(int argc, char **argv) {
zl = createList();
zl = ziplistDeleteRange(zl, 0, 1);
ziplistRepr(zl);
+ zfree(zl);
}
printf("Delete inclusive range 0,1:\n");
@@ -1298,6 +1440,7 @@ int main(int argc, char **argv) {
zl = createList();
zl = ziplistDeleteRange(zl, 0, 2);
ziplistRepr(zl);
+ zfree(zl);
}
printf("Delete inclusive range 1,2:\n");
@@ -1305,6 +1448,7 @@ int main(int argc, char **argv) {
zl = createList();
zl = ziplistDeleteRange(zl, 1, 2);
ziplistRepr(zl);
+ zfree(zl);
}
printf("Delete with start index out of range:\n");
@@ -1312,6 +1456,7 @@ int main(int argc, char **argv) {
zl = createList();
zl = ziplistDeleteRange(zl, 5, 1);
ziplistRepr(zl);
+ zfree(zl);
}
printf("Delete with num overflow:\n");
@@ -1319,6 +1464,7 @@ int main(int argc, char **argv) {
zl = createList();
zl = ziplistDeleteRange(zl, 1, 5);
ziplistRepr(zl);
+ zfree(zl);
}
printf("Delete foo while iterating:\n");
@@ -1343,11 +1489,12 @@ int main(int argc, char **argv) {
}
printf("\n");
ziplistRepr(zl);
+ zfree(zl);
}
printf("Regression test for >255 byte strings:\n");
{
- char v1[257],v2[257];
+ char v1[257] = {0}, v2[257] = {0};
memset(v1,'x',256);
memset(v2,'y',256);
zl = ziplistNew();
@@ -1362,13 +1509,15 @@ int main(int argc, char **argv) {
assert(ziplistGet(p,&entry,&elen,&value));
assert(strncmp(v2,(char*)entry,elen) == 0);
printf("SUCCESS\n\n");
+ zfree(zl);
}
printf("Regression test deleting next to last entries:\n");
{
- char v[3][257];
- zlentry e[3];
- int i;
+ char v[3][257] = {{0}};
+ zlentry e[3] = {{.prevrawlensize = 0, .prevrawlen = 0, .lensize = 0,
+ .len = 0, .headersize = 0, .encoding = 0, .p = NULL}};
+ size_t i;
for (i = 0; i < (sizeof(v)/sizeof(v[0])); i++) {
memset(v[i], 'a' + i, sizeof(v[0]));
@@ -1399,6 +1548,7 @@ int main(int argc, char **argv) {
assert(e[1].prevrawlensize == 5);
printf("SUCCESS\n\n");
+ zfree(zl);
}
printf("Create long list and check indices:\n");
@@ -1420,6 +1570,7 @@ int main(int argc, char **argv) {
assert(999-i == value);
}
printf("SUCCESS\n\n");
+ zfree(zl);
}
printf("Compare strings with ziplist entries:\n");
@@ -1445,6 +1596,82 @@ int main(int argc, char **argv) {
return 1;
}
printf("SUCCESS\n\n");
+ zfree(zl);
+ }
+
+ printf("Merge test:\n");
+ {
+ /* create list gives us: [hello, foo, quux, 1024] */
+ zl = createList();
+ unsigned char *zl2 = createList();
+
+ unsigned char *zl3 = ziplistNew();
+ unsigned char *zl4 = ziplistNew();
+
+ if (ziplistMerge(&zl4, &zl4)) {
+ printf("ERROR: Allowed merging of one ziplist into itself.\n");
+ return 1;
+ }
+
+ /* Merge two empty ziplists, get empty result back. */
+ zl4 = ziplistMerge(&zl3, &zl4);
+ ziplistRepr(zl4);
+ if (ziplistLen(zl4)) {
+ printf("ERROR: Merging two empty ziplists created entries.\n");
+ return 1;
+ }
+ zfree(zl4);
+
+ zl2 = ziplistMerge(&zl, &zl2);
+ /* merge gives us: [hello, foo, quux, 1024, hello, foo, quux, 1024] */
+ ziplistRepr(zl2);
+
+ if (ziplistLen(zl2) != 8) {
+ printf("ERROR: Merged length not 8, but: %u\n", ziplistLen(zl2));
+ return 1;
+ }
+
+ p = ziplistIndex(zl2,0);
+ if (!ziplistCompare(p,(unsigned char*)"hello",5)) {
+ printf("ERROR: not \"hello\"\n");
+ return 1;
+ }
+ if (ziplistCompare(p,(unsigned char*)"hella",5)) {
+ printf("ERROR: \"hella\"\n");
+ return 1;
+ }
+
+ p = ziplistIndex(zl2,3);
+ if (!ziplistCompare(p,(unsigned char*)"1024",4)) {
+ printf("ERROR: not \"1024\"\n");
+ return 1;
+ }
+ if (ziplistCompare(p,(unsigned char*)"1025",4)) {
+ printf("ERROR: \"1025\"\n");
+ return 1;
+ }
+
+ p = ziplistIndex(zl2,4);
+ if (!ziplistCompare(p,(unsigned char*)"hello",5)) {
+ printf("ERROR: not \"hello\"\n");
+ return 1;
+ }
+ if (ziplistCompare(p,(unsigned char*)"hella",5)) {
+ printf("ERROR: \"hella\"\n");
+ return 1;
+ }
+
+ p = ziplistIndex(zl2,7);
+ if (!ziplistCompare(p,(unsigned char*)"1024",4)) {
+ printf("ERROR: not \"1024\"\n");
+ return 1;
+ }
+ if (ziplistCompare(p,(unsigned char*)"1025",4)) {
+ printf("ERROR: \"1025\"\n");
+ return 1;
+ }
+ printf("SUCCESS\n\n");
+ zfree(zl);
}
printf("Stress with random payloads of different encoding:\n");
@@ -1464,7 +1691,7 @@ int main(int argc, char **argv) {
for (i = 0; i < 20000; i++) {
zl = ziplistNew();
ref = listCreate();
- listSetFreeMethod(ref,sdsfree);
+ listSetFreeMethod(ref,(void (*)(void*))sdsfree);
len = rand() % 256;
/* Create lists */
@@ -1532,5 +1759,4 @@ int main(int argc, char **argv) {
return 0;
}
-
#endif
diff --git a/src/ziplist.h b/src/ziplist.h
index b29c34167..e92b5e783 100644
--- a/src/ziplist.h
+++ b/src/ziplist.h
@@ -32,6 +32,7 @@
#define ZIPLIST_TAIL 1
unsigned char *ziplistNew(void);
+unsigned char *ziplistMerge(unsigned char **first, unsigned char **second);
unsigned char *ziplistPush(unsigned char *zl, unsigned char *s, unsigned int slen, int where);
unsigned char *ziplistIndex(unsigned char *zl, int index);
unsigned char *ziplistNext(unsigned char *zl, unsigned char *p);
@@ -39,8 +40,12 @@ unsigned char *ziplistPrev(unsigned char *zl, unsigned char *p);
unsigned int ziplistGet(unsigned char *p, unsigned char **sval, unsigned int *slen, long long *lval);
unsigned char *ziplistInsert(unsigned char *zl, unsigned char *p, unsigned char *s, unsigned int slen);
unsigned char *ziplistDelete(unsigned char *zl, unsigned char **p);
-unsigned char *ziplistDeleteRange(unsigned char *zl, unsigned int index, unsigned int num);
+unsigned char *ziplistDeleteRange(unsigned char *zl, int index, unsigned int num);
unsigned int ziplistCompare(unsigned char *p, unsigned char *s, unsigned int slen);
unsigned char *ziplistFind(unsigned char *p, unsigned char *vstr, unsigned int vlen, unsigned int skip);
unsigned int ziplistLen(unsigned char *zl);
size_t ziplistBlobLen(unsigned char *zl);
+
+#ifdef REDIS_TEST
+int ziplistTest(int argc, char *argv[]);
+#endif
diff --git a/src/zipmap.c b/src/zipmap.c
index 803fedeec..22bfa1a46 100644
--- a/src/zipmap.c
+++ b/src/zipmap.c
@@ -51,10 +51,9 @@
* <len> is the length of the following string (key or value).
* <len> lengths are encoded in a single value or in a 5 bytes value.
* If the first byte value (as an unsigned 8 bit value) is between 0 and
- * 252, it's a single-byte length. If it is 253 then a four bytes unsigned
+ * 253, it's a single-byte length. If it is 254 then a four bytes unsigned
* integer follows (in the host byte ordering). A value of 255 is used to
- * signal the end of the hash. The special value 254 is used to mark
- * empty space that can be used to add new key/value pairs.
+ * signal the end of the hash.
*
* <free> is the number of free unused bytes after the string, resulting
* from modification of values associated to a key. For instance if "foo"
@@ -371,8 +370,8 @@ size_t zipmapBlobLen(unsigned char *zm) {
return totlen;
}
-#ifdef ZIPMAP_TEST_MAIN
-void zipmapRepr(unsigned char *p) {
+#ifdef REDIS_TEST
+static void zipmapRepr(unsigned char *p) {
unsigned int l;
printf("{status %u}",*p++);
@@ -405,9 +404,13 @@ void zipmapRepr(unsigned char *p) {
printf("\n");
}
-int main(void) {
+#define UNUSED(x) (void)(x)
+int zipmapTest(int argc, char *argv[]) {
unsigned char *zm;
+ UNUSED(argc);
+ UNUSED(argv);
+
zm = zipmapNew();
zm = zipmapSet(zm,(unsigned char*) "name",4, (unsigned char*) "foo",3,NULL);
diff --git a/src/zipmap.h b/src/zipmap.h
index 9cf1b2484..ac588f05a 100644
--- a/src/zipmap.h
+++ b/src/zipmap.h
@@ -46,4 +46,8 @@ unsigned int zipmapLen(unsigned char *zm);
size_t zipmapBlobLen(unsigned char *zm);
void zipmapRepr(unsigned char *p);
+#ifdef REDIS_TEST
+int zipmapTest(int argc, char *argv[]);
+#endif
+
#endif
diff --git a/src/zmalloc.c b/src/zmalloc.c
index 6df51a80f..640ee19e2 100644
--- a/src/zmalloc.c
+++ b/src/zmalloc.c
@@ -364,3 +364,60 @@ size_t zmalloc_get_smap_bytes_by_field(char *field) {
size_t zmalloc_get_private_dirty(void) {
return zmalloc_get_smap_bytes_by_field("Private_Dirty:");
}
+
+/* Returns the size of physical memory (RAM) in bytes.
+ * It looks ugly, but this is the cleanest way to achive cross platform results.
+ * Cleaned up from:
+ *
+ * http://nadeausoftware.com/articles/2012/09/c_c_tip_how_get_physical_memory_size_system
+ *
+ * Note that this function:
+ * 1) Was released under the following CC attribution license:
+ * http://creativecommons.org/licenses/by/3.0/deed.en_US.
+ * 2) Was originally implemented by David Robert Nadeau.
+ * 3) Was modified for Redis by Matt Stancliff.
+ * 4) This note exists in order to comply with the original license.
+ */
+size_t zmalloc_get_memory_size(void) {
+#if defined(__unix__) || defined(__unix) || defined(unix) || \
+ (defined(__APPLE__) && defined(__MACH__))
+#if defined(CTL_HW) && (defined(HW_MEMSIZE) || defined(HW_PHYSMEM64))
+ int mib[2];
+ mib[0] = CTL_HW;
+#if defined(HW_MEMSIZE)
+ mib[1] = HW_MEMSIZE; /* OSX. --------------------- */
+#elif defined(HW_PHYSMEM64)
+ mib[1] = HW_PHYSMEM64; /* NetBSD, OpenBSD. --------- */
+#endif
+ int64_t size = 0; /* 64-bit */
+ size_t len = sizeof(size);
+ if (sysctl( mib, 2, &size, &len, NULL, 0) == 0)
+ return (size_t)size;
+ return 0L; /* Failed? */
+
+#elif defined(_SC_PHYS_PAGES) && defined(_SC_PAGESIZE)
+ /* FreeBSD, Linux, OpenBSD, and Solaris. -------------------- */
+ return (size_t)sysconf(_SC_PHYS_PAGES) * (size_t)sysconf(_SC_PAGESIZE);
+
+#elif defined(CTL_HW) && (defined(HW_PHYSMEM) || defined(HW_REALMEM))
+ /* DragonFly BSD, FreeBSD, NetBSD, OpenBSD, and OSX. -------- */
+ int mib[2];
+ mib[0] = CTL_HW;
+#if defined(HW_REALMEM)
+ mib[1] = HW_REALMEM; /* FreeBSD. ----------------- */
+#elif defined(HW_PYSMEM)
+ mib[1] = HW_PHYSMEM; /* Others. ------------------ */
+#endif
+ unsigned int size = 0; /* 32-bit */
+ size_t len = sizeof(size);
+ if (sysctl(mib, 2, &size, &len, NULL, 0) == 0)
+ return (size_t)size;
+ return 0L; /* Failed? */
+#endif /* sysctl and sysconf variants */
+
+#else
+ return 0L; /* Unknown OS. */
+#endif
+}
+
+
diff --git a/src/zmalloc.h b/src/zmalloc.h
index 4de2cffea..a47ea6ccf 100644
--- a/src/zmalloc.h
+++ b/src/zmalloc.h
@@ -77,6 +77,7 @@ float zmalloc_get_fragmentation_ratio(size_t rss);
size_t zmalloc_get_rss(void);
size_t zmalloc_get_private_dirty(void);
size_t zmalloc_get_smap_bytes_by_field(char *field);
+size_t zmalloc_get_memory_size(void);
void zlibc_free(void *ptr);
#ifndef HAVE_MALLOC_SIZE
diff --git a/tests/cluster/run.tcl b/tests/cluster/run.tcl
index 69a160c4f..f764cea0a 100644
--- a/tests/cluster/run.tcl
+++ b/tests/cluster/run.tcl
@@ -21,6 +21,7 @@ proc main {} {
if {[catch main e]} {
puts $::errorInfo
+ if {$::pause_on_error} pause_on_error
cleanup
exit 1
}
diff --git a/tests/cluster/tests/04-resharding.tcl b/tests/cluster/tests/04-resharding.tcl
index b9e772351..8811762c6 100644
--- a/tests/cluster/tests/04-resharding.tcl
+++ b/tests/cluster/tests/04-resharding.tcl
@@ -66,9 +66,18 @@ test "Cluster consistency during live resharding" {
}
# Write random data to random list.
- set key "key:[randomInt $numkeys]"
+ set listid [randomInt $numkeys]
+ set key "key:$listid"
set ele [randomValue]
- $cluster rpush $key $ele
+ # We write both with Lua scripts and with plain commands.
+ # This way we are able to stress Lua -> Redis command invocation
+ # as well, that has tests to prevent Lua to write into wrong
+ # hash slots.
+ if {$listid % 2} {
+ $cluster rpush $key $ele
+ } else {
+ $cluster eval {redis.call("rpush",KEYS[1],ARGV[1])} 1 $key $ele
+ }
lappend content($key) $ele
if {($j % 1000) == 0} {
diff --git a/tests/cluster/tests/includes/init-tests.tcl b/tests/cluster/tests/includes/init-tests.tcl
index 65fc806e1..466ab8f25 100644
--- a/tests/cluster/tests/includes/init-tests.tcl
+++ b/tests/cluster/tests/includes/init-tests.tcl
@@ -27,10 +27,17 @@ test "Cluster nodes are reachable" {
test "Cluster nodes hard reset" {
foreach_redis_id id {
+ if {$::valgrind} {
+ set node_timeout 10000
+ } else {
+ set node_timeout 3000
+ }
catch {R $id flushall} ; # May fail for readonly slaves.
+ R $id MULTI
R $id cluster reset hard
R $id cluster set-config-epoch [expr {$id+1}]
- R $id config set cluster-node-timeout 3000
+ R $id EXEC
+ R $id config set cluster-node-timeout $node_timeout
R $id config set cluster-slave-validity-factor 10
R $id config rewrite
}
diff --git a/tests/instances.tcl b/tests/instances.tcl
index 426508f33..353d9b2d2 100644
--- a/tests/instances.tcl
+++ b/tests/instances.tcl
@@ -16,6 +16,7 @@ source ../support/server.tcl
source ../support/test.tcl
set ::verbose 0
+set ::valgrind 0
set ::pause_on_error 0
set ::simulate_error 0
set ::sentinel_instances {}
@@ -32,6 +33,25 @@ if {[catch {cd tmp}]} {
exit 1
}
+# Execute the specified instance of the server specified by 'type', using
+# the provided configuration file. Returns the PID of the process.
+proc exec_instance {type cfgfile} {
+ if {$type eq "redis"} {
+ set prgname redis-server
+ } elseif {$type eq "sentinel"} {
+ set prgname redis-sentinel
+ } else {
+ error "Unknown instance type."
+ }
+
+ if {$::valgrind} {
+ set pid [exec valgrind --track-origins=yes --suppressions=../../../src/valgrind.sup --show-reachable=no --show-possibly-lost=no --leak-check=full ../../../src/${prgname} $cfgfile &]
+ } else {
+ set pid [exec ../../../src/${prgname} $cfgfile &]
+ }
+ return $pid
+}
+
# Spawn a redis or sentinel instance, depending on 'type'.
proc spawn_instance {type base_port count {conf {}}} {
for {set j 0} {$j < $count} {incr j} {
@@ -58,14 +78,7 @@ proc spawn_instance {type base_port count {conf {}}} {
close $cfg
# Finally exec it and remember the pid for later cleanup.
- if {$type eq "redis"} {
- set prgname redis-server
- } elseif {$type eq "sentinel"} {
- set prgname redis-sentinel
- } else {
- error "Unknown instance type."
- }
- set pid [exec ../../../src/${prgname} $cfgfile &]
+ set pid [exec_instance $type $cfgfile]
lappend ::pids $pid
# Check availability
@@ -98,6 +111,7 @@ proc cleanup {} {
proc abort_sentinel_test msg {
puts "WARNING: Aborting the test."
puts ">>>>>>>> $msg"
+ if {$::pause_on_error} pause_on_error
cleanup
exit 1
}
@@ -113,6 +127,8 @@ proc parse_options {} {
set ::pause_on_error 1
} elseif {$opt eq "--fail"} {
set ::simulate_error 1
+ } elseif {$opt eq {--valgrind}} {
+ set ::valgrind 1
} elseif {$opt eq "--help"} {
puts "Hello, I'm sentinel.tcl and I run Sentinel unit tests."
puts "\nOptions:"
@@ -360,15 +376,31 @@ proc get_instance_id_by_port {type port} {
# The instance can be restarted with restart-instance.
proc kill_instance {type id} {
set pid [get_instance_attrib $type $id pid]
+ set port [get_instance_attrib $type $id port]
+
if {$pid == -1} {
error "You tried to kill $type $id twice."
}
+
exec kill -9 $pid
set_instance_attrib $type $id pid -1
set_instance_attrib $type $id link you_tried_to_talk_with_killed_instance
# Remove the PID from the list of pids to kill at exit.
set ::pids [lsearch -all -inline -not -exact $::pids $pid]
+
+ # Wait for the port it was using to be available again, so that's not
+ # an issue to start a new server ASAP with the same port.
+ set retry 10
+ while {[incr retry -1]} {
+ set port_is_free [catch {set s [socket 127.0.01 $port]}]
+ if {$port_is_free} break
+ catch {close $s}
+ after 1000
+ }
+ if {$retry == 0} {
+ error "Port $port does not return available after killing instance."
+ }
}
# Return true of the instance of the specified type/id is killed.
@@ -385,12 +417,7 @@ proc restart_instance {type id} {
# Execute the instance with its old setup and append the new pid
# file for cleanup.
- if {$type eq "redis"} {
- set prgname redis-server
- } else {
- set prgname redis-sentinel
- }
- set pid [exec ../../../src/${prgname} $cfgfile &]
+ set pid [exec_instance $type $cfgfile]
set_instance_attrib $type $id pid $pid
lappend ::pids $pid
diff --git a/tests/integration/aof.tcl b/tests/integration/aof.tcl
index 7ea70943c..01b928bb5 100644
--- a/tests/integration/aof.tcl
+++ b/tests/integration/aof.tcl
@@ -204,6 +204,30 @@ tags {"aof"} {
}
}
+ ## Test that SPOP with <count> (that modifies the client's argc/argv) is correctly free'd
+ create_aof {
+ append_to_aof [formatCommand sadd set foo]
+ append_to_aof [formatCommand sadd set bar]
+ append_to_aof [formatCommand sadd set gah]
+ append_to_aof [formatCommand spop set 2]
+ }
+
+ start_server_aof [list dir $server_path] {
+ test "AOF+SPOP: Server should have been started" {
+ assert_equal 1 [is_alive $srv]
+ }
+
+ test "AOF+SPOP: Set should have 1 member" {
+ set client [redis [dict get $srv host] [dict get $srv port]]
+ wait_for_condition 50 100 {
+ [catch {$client ping} e] == 0
+ } else {
+ fail "Loading DB is taking too much time."
+ }
+ assert_equal 1 [$client scard set]
+ }
+ }
+
## Test that EXPIREAT is loaded correctly
create_aof {
append_to_aof [formatCommand rpush list foo]
diff --git a/tests/integration/replication.tcl b/tests/integration/replication.tcl
index d2668d736..71a7ec60a 100644
--- a/tests/integration/replication.tcl
+++ b/tests/integration/replication.tcl
@@ -118,7 +118,8 @@ foreach dl {no yes} {
[lindex $slaves 1] slaveof $master_host $master_port
[lindex $slaves 2] slaveof $master_host $master_port
- # Wait for all the three slaves to reach the "online" state
+ # Wait for all the three slaves to reach the "online"
+ # state from the POV of the master.
set retry 500
while {$retry} {
set info [r -3 info]
@@ -133,6 +134,17 @@ foreach dl {no yes} {
error "assertion:Slaves not correctly synchronized"
}
+ # Wait that slaves acknowledge they are online so
+ # we are sure that DBSIZE and DEBUG DIGEST will not
+ # fail because of timing issues.
+ wait_for_condition 500 100 {
+ [lindex [[lindex $slaves 0] role] 3] eq {connected} &&
+ [lindex [[lindex $slaves 1] role] 3] eq {connected} &&
+ [lindex [[lindex $slaves 2] role] 3] eq {connected}
+ } else {
+ fail "Slaves still not connected after some time"
+ }
+
# Stop the write load
stop_write_load $load_handle0
stop_write_load $load_handle1
@@ -140,16 +152,8 @@ foreach dl {no yes} {
stop_write_load $load_handle3
stop_write_load $load_handle4
- # Wait that slaves exit the "loading" state
- wait_for_condition 500 100 {
- ![string match {*loading:1*} [[lindex $slaves 0] info]] &&
- ![string match {*loading:1*} [[lindex $slaves 1] info]] &&
- ![string match {*loading:1*} [[lindex $slaves 2] info]]
- } else {
- fail "Slaves still loading data after too much time"
- }
-
- # Make sure that slaves and master have same number of keys
+ # Make sure that slaves and master have same
+ # number of keys
wait_for_condition 500 100 {
[$master dbsize] == [[lindex $slaves 0] dbsize] &&
[$master dbsize] == [[lindex $slaves 1] dbsize] &&
diff --git a/tests/support/cluster.tcl b/tests/support/cluster.tcl
index b007e3b05..d4e7d2e5d 100644
--- a/tests/support/cluster.tcl
+++ b/tests/support/cluster.tcl
@@ -226,6 +226,8 @@ proc ::redis_cluster::get_keys_from_command {cmd argv} {
# Special handling for other commands
switch -exact $cmd {
mget {return $argv}
+ eval {return [lrange $argv 2 1+[lindex $argv 1]]}
+ evalsha {return [lrange $argv 2 1+[lindex $argv 1]]}
}
# All the remaining commands are not handled.
diff --git a/tests/support/server.tcl b/tests/support/server.tcl
index 0e2e2982a..317b40a84 100644
--- a/tests/support/server.tcl
+++ b/tests/support/server.tcl
@@ -70,6 +70,9 @@ proc kill_server config {
if {$::valgrind} {
check_valgrind_errors [dict get $config stderr]
}
+
+ # Remove this pid from the set of active pids in the test server.
+ send_data_packet $::test_server_fd server-killed $pid
}
proc is_alive config {
@@ -204,11 +207,14 @@ proc start_server {options {code undefined}} {
set stderr [format "%s/%s" [dict get $config "dir"] "stderr"]
if {$::valgrind} {
- exec valgrind --suppressions=src/valgrind.sup --show-reachable=no --show-possibly-lost=no --leak-check=full src/redis-server $config_file > $stdout 2> $stderr &
+ set pid [exec valgrind --track-origins=yes --suppressions=src/valgrind.sup --show-reachable=no --show-possibly-lost=no --leak-check=full src/redis-server $config_file > $stdout 2> $stderr &]
} else {
- exec src/redis-server $config_file > $stdout 2> $stderr &
+ set pid [exec src/redis-server $config_file > $stdout 2> $stderr &]
}
+ # Tell the test server about this new instance.
+ send_data_packet $::test_server_fd server-spawned $pid
+
# check that the server actually started
# ugly but tries to be as fast as possible...
if {$::valgrind} {set retrynum 1000} else {set retrynum 100}
@@ -234,9 +240,9 @@ proc start_server {options {code undefined}} {
return
}
- # find out the pid
- while {![info exists pid]} {
- regexp {PID:\s(\d+)} [exec cat $stdout] _ pid
+ # Wait for actual startup
+ while {![info exists _pid]} {
+ regexp {PID:\s(\d+)} [exec cat $stdout] _ _pid
after 100
}
diff --git a/tests/support/test.tcl b/tests/support/test.tcl
index 7d390cc47..31371c567 100644
--- a/tests/support/test.tcl
+++ b/tests/support/test.tcl
@@ -19,9 +19,12 @@ proc assert_match {pattern value} {
}
}
-proc assert_equal {expected value} {
+proc assert_equal {expected value {detail ""}} {
if {$expected ne $value} {
- error "assertion:Expected '$value' to be equal to '$expected'"
+ if {$detail ne ""} {
+ set detail " (detail: $detail)"
+ }
+ error "assertion:Expected '$value' to be equal to '$expected'$detail"
}
}
diff --git a/tests/test_helper.tcl b/tests/test_helper.tcl
index 9d975cfb7..212c95b4f 100644
--- a/tests/test_helper.tcl
+++ b/tests/test_helper.tcl
@@ -65,6 +65,9 @@ set ::file ""; # If set, runs only the tests in this comma separated list
set ::curfile ""; # Hold the filename of the current suite
set ::accurate 0; # If true runs fuzz tests with more iterations
set ::force_failure 0
+set ::timeout 600; # 10 minutes without progresses will quit the test.
+set ::last_progress [clock seconds]
+set ::active_servers {} ; # Pids of active Redis instances.
# Set to 1 when we are running in client mode. The Redis test uses a
# server-client model to run tests simultaneously. The server instance
@@ -200,11 +203,19 @@ proc test_server_main {} {
vwait forever
}
-# This function gets called 10 times per second, for now does nothing but
-# may be used in the future in order to detect test clients taking too much
-# time to execute the task.
+# This function gets called 10 times per second.
proc test_server_cron {} {
- # Do some work here.
+ set elapsed [expr {[clock seconds]-$::last_progress}]
+
+ if {$elapsed > $::timeout} {
+ set err "\[[colorstr red TIMEOUT]\]: clients state report follows."
+ puts $err
+ show_clients_state
+ kill_clients
+ force_kill_all_servers
+ the_end
+ }
+
after 100 test_server_cron
}
@@ -230,6 +241,8 @@ proc read_from_test_client fd {
set bytes [gets $fd]
set payload [read $fd $bytes]
foreach {status data} $payload break
+ set ::last_progress [clock seconds]
+
if {$status eq {ready}} {
if {!$::quiet} {
puts "\[$status\]: $data"
@@ -256,12 +269,15 @@ proc read_from_test_client fd {
set ::active_clients_task($fd) "(ERR) $data"
} elseif {$status eq {exception}} {
puts "\[[colorstr red $status]\]: $data"
- foreach p $::clients_pids {
- catch {exec kill -9 $p}
- }
+ kill_clients
+ force_kill_all_servers
exit 1
} elseif {$status eq {testing}} {
set ::active_clients_task($fd) "(IN PROGRESS) $data"
+ } elseif {$status eq {server-spawned}} {
+ lappend ::active_servers $data
+ } elseif {$status eq {server-killed}} {
+ set ::active_servers [lsearch -all -inline -not -exact $::active_servers $data]
} else {
if {!$::quiet} {
puts "\[$status\]: $data"
@@ -269,6 +285,31 @@ proc read_from_test_client fd {
}
}
+proc show_clients_state {} {
+ # The following loop is only useful for debugging tests that may
+ # enter an infinite loop. Commented out normally.
+ foreach x $::active_clients {
+ if {[info exist ::active_clients_task($x)]} {
+ puts "$x => $::active_clients_task($x)"
+ } else {
+ puts "$x => ???"
+ }
+ }
+}
+
+proc kill_clients {} {
+ foreach p $::clients_pids {
+ catch {exec kill $p}
+ }
+}
+
+proc force_kill_all_servers {} {
+ foreach p $::active_servers {
+ puts "Killing still running Redis server $p"
+ catch {exec kill -9 $p}
+ }
+}
+
# A new client is idle. Remove it from the list of active clients and
# if there are still test units to run, launch them.
proc signal_idle_client fd {
@@ -276,17 +317,7 @@ proc signal_idle_client fd {
set ::active_clients \
[lsearch -all -inline -not -exact $::active_clients $fd]
- if 0 {
- # The following loop is only useful for debugging tests that may
- # enter an infinite loop. Commented out normally.
- foreach x $::active_clients {
- if {[info exist ::active_clients_task($x)]} {
- puts "$x => $::active_clients_task($x)"
- } else {
- puts "$x => ???"
- }
- }
- }
+ if 0 {show_clients_state}
# New unit to process?
if {$::next_test != [llength $::all_tests]} {
@@ -361,7 +392,8 @@ proc print_help_screen {} {
"--quiet Don't show individual tests."
"--single <unit> Just execute the specified unit (see next option)."
"--list-tests List all the available test units."
- "--clients <num> Number of test clients (16)."
+ "--clients <num> Number of test clients (default 16)."
+ "--timeout <sec> Test timeout in seconds (default 10 min)."
"--force-failure Force the execution of a test that always fails."
"--help Print this help screen."
} "\n"]
@@ -410,6 +442,9 @@ for {set j 0} {$j < [llength $argv]} {incr j} {
} elseif {$opt eq {--clients}} {
set ::numclients $arg
incr j
+ } elseif {$opt eq {--timeout}} {
+ set ::timeout $arg
+ incr j
} elseif {$opt eq {--help}} {
print_help_screen
exit 0
diff --git a/tests/unit/aofrw.tcl b/tests/unit/aofrw.tcl
index a2d74168f..4fdbdc6c6 100644
--- a/tests/unit/aofrw.tcl
+++ b/tests/unit/aofrw.tcl
@@ -77,10 +77,10 @@ start_server {tags {"aofrw"}} {
}
foreach d {string int} {
- foreach e {ziplist linkedlist} {
+ foreach e {quicklist} {
test "AOF rewrite of list with $e encoding, $d data" {
r flushall
- if {$e eq {ziplist}} {set len 10} else {set len 1000}
+ set len 1000
for {set j 0} {$j < $len} {incr j} {
if {$d eq {string}} {
set data [randstring 0 16 alpha]
diff --git a/tests/unit/dump.tcl b/tests/unit/dump.tcl
index d39204f9f..5af53db8d 100644
--- a/tests/unit/dump.tcl
+++ b/tests/unit/dump.tcl
@@ -157,7 +157,7 @@ start_server {tags {"dump"}} {
test {MIGRATE can correctly transfer large values} {
set first [srv 0 client]
r del key
- for {set j 0} {$j < 5000} {incr j} {
+ for {set j 0} {$j < 40000} {incr j} {
r rpush key 1 2 3 4 5 6 7 8 9 10
r rpush key "item 1" "item 2" "item 3" "item 4" "item 5" \
"item 6" "item 7" "item 8" "item 9" "item 10"
@@ -175,7 +175,7 @@ start_server {tags {"dump"}} {
assert {[$first exists key] == 0}
assert {[$second exists key] == 1}
assert {[$second ttl key] == -1}
- assert {[$second llen key] == 5000*20}
+ assert {[$second llen key] == 40000*20}
}
}
diff --git a/tests/unit/scan.tcl b/tests/unit/scan.tcl
index 2b1033e39..1d84f128d 100644
--- a/tests/unit/scan.tcl
+++ b/tests/unit/scan.tcl
@@ -226,4 +226,14 @@ start_server {tags {"scan"}} {
set res [r zscan mykey 0 MATCH foo* COUNT 10000]
lsort -unique [lindex $res 1]
}
+
+ test "ZSCAN scores: regression test for issue #2175" {
+ r del mykey
+ for {set j 0} {$j < 500} {incr j} {
+ r zadd mykey 9.8813129168249309e-323 $j
+ }
+ set res [lindex [r zscan mykey 0] 1]
+ set first_score [lindex $res 1]
+ assert {$first_score != 0}
+ }
}
diff --git a/tests/unit/scripting.tcl b/tests/unit/scripting.tcl
index e1cd2174b..921382e34 100644
--- a/tests/unit/scripting.tcl
+++ b/tests/unit/scripting.tcl
@@ -413,7 +413,7 @@ start_server {tags {"scripting"}} {
r sadd myset a b c
r mset a 1 b 2 c 3 d 4
assert {[r spop myset] ne {}}
- assert {[r spop myset] ne {}}
+ assert {[r spop myset 1] ne {}}
assert {[r spop myset] ne {}}
assert {[r mget a b c d] eq {1 2 3 4}}
assert {[r spop myset] eq {}}
diff --git a/tests/unit/sort.tcl b/tests/unit/sort.tcl
index 490158f14..083c4540d 100644
--- a/tests/unit/sort.tcl
+++ b/tests/unit/sort.tcl
@@ -1,8 +1,7 @@
start_server {
tags {"sort"}
overrides {
- "list-max-ziplist-value" 16
- "list-max-ziplist-entries" 32
+ "list-max-ziplist-size" 32
"set-max-intset-entries" 32
}
} {
@@ -36,9 +35,9 @@ start_server {
}
foreach {num cmd enc title} {
- 16 lpush ziplist "Ziplist"
- 1000 lpush linkedlist "Linked list"
- 10000 lpush linkedlist "Big Linked list"
+ 16 lpush quicklist "Old Ziplist"
+ 1000 lpush quicklist "Old Linked list"
+ 10000 lpush quicklist "Old Big Linked list"
16 sadd intset "Intset"
1000 sadd hashtable "Hash table"
10000 sadd hashtable "Big Hash table"
@@ -85,14 +84,14 @@ start_server {
r sort tosort BY weight_* store sort-res
assert_equal $result [r lrange sort-res 0 -1]
assert_equal 16 [r llen sort-res]
- assert_encoding ziplist sort-res
+ assert_encoding quicklist sort-res
}
test "SORT BY hash field STORE" {
r sort tosort BY wobj_*->weight store sort-res
assert_equal $result [r lrange sort-res 0 -1]
assert_equal 16 [r llen sort-res]
- assert_encoding ziplist sort-res
+ assert_encoding quicklist sort-res
}
test "SORT extracts STORE correctly" {
@@ -246,6 +245,24 @@ start_server {
r sort mylist by num get x:*->
} {100}
+ test "SORT by nosort retains native order for lists" {
+ r del testa
+ r lpush testa 2 1 4 3 5
+ r sort testa by nosort
+ } {5 3 4 1 2}
+
+ test "SORT by nosort plus store retains native order for lists" {
+ r del testa
+ r lpush testa 2 1 4 3 5
+ r sort testa by nosort store testb
+ r lrange testb 0 -1
+ } {5 3 4 1 2}
+
+ test "SORT by nosort with limit returns based on original list order" {
+ r sort testa by nosort limit 0 3 store testb
+ r lrange testb 0 -1
+ } {5 3 4}
+
tags {"slow"} {
set num 100
set res [create_random_dataset $num lpush]
diff --git a/tests/unit/type/list-2.tcl b/tests/unit/type/list-2.tcl
index bf6a055eb..4c7d6d91c 100644
--- a/tests/unit/type/list-2.tcl
+++ b/tests/unit/type/list-2.tcl
@@ -1,8 +1,7 @@
start_server {
tags {"list"}
overrides {
- "list-max-ziplist-value" 16
- "list-max-ziplist-entries" 256
+ "list-max-ziplist-size" 4
}
} {
source "tests/unit/type/list-common.tcl"
@@ -28,14 +27,18 @@ start_server {
for {set i 0} {$i < 1000} {incr i} {
set min [expr {int(rand()*$startlen)}]
set max [expr {$min+int(rand()*$startlen)}]
+ set before_len [llength $mylist]
+ set before_len_r [r llen mylist]
set mylist [lrange $mylist $min $max]
r ltrim mylist $min $max
- assert_equal $mylist [r lrange mylist 0 -1]
+ assert_equal $mylist [r lrange mylist 0 -1] "failed trim"
+ set starting [r llen mylist]
for {set j [r llen mylist]} {$j < $startlen} {incr j} {
set str [randomInt 9223372036854775807]
r rpush mylist $str
lappend mylist $str
+ assert_equal $mylist [r lrange mylist 0 -1] "failed append match"
}
}
}
diff --git a/tests/unit/type/list-3.tcl b/tests/unit/type/list-3.tcl
index 94f9a0b79..ece6ea2d5 100644
--- a/tests/unit/type/list-3.tcl
+++ b/tests/unit/type/list-3.tcl
@@ -1,8 +1,7 @@
start_server {
tags {list ziplist}
overrides {
- "list-max-ziplist-value" 200000
- "list-max-ziplist-entries" 256
+ "list-max-ziplist-size" 16
}
} {
test {Explicit regression for a list bug} {
diff --git a/tests/unit/type/list.tcl b/tests/unit/type/list.tcl
index c8e26602b..e4d568cf1 100644
--- a/tests/unit/type/list.tcl
+++ b/tests/unit/type/list.tcl
@@ -1,25 +1,24 @@
start_server {
tags {"list"}
overrides {
- "list-max-ziplist-value" 16
- "list-max-ziplist-entries" 256
+ "list-max-ziplist-size" 5
}
} {
source "tests/unit/type/list-common.tcl"
test {LPUSH, RPUSH, LLENGTH, LINDEX, LPOP - ziplist} {
# first lpush then rpush
- assert_equal 1 [r lpush myziplist1 a]
- assert_equal 2 [r rpush myziplist1 b]
- assert_equal 3 [r rpush myziplist1 c]
+ assert_equal 1 [r lpush myziplist1 aa]
+ assert_equal 2 [r rpush myziplist1 bb]
+ assert_equal 3 [r rpush myziplist1 cc]
assert_equal 3 [r llen myziplist1]
- assert_equal a [r lindex myziplist1 0]
- assert_equal b [r lindex myziplist1 1]
- assert_equal c [r lindex myziplist1 2]
+ assert_equal aa [r lindex myziplist1 0]
+ assert_equal bb [r lindex myziplist1 1]
+ assert_equal cc [r lindex myziplist1 2]
assert_equal {} [r lindex myziplist2 3]
- assert_equal c [r rpop myziplist1]
- assert_equal a [r lpop myziplist1]
- assert_encoding ziplist myziplist1
+ assert_equal cc [r rpop myziplist1]
+ assert_equal aa [r lpop myziplist1]
+ assert_encoding quicklist myziplist1
# first rpush then lpush
assert_equal 1 [r rpush myziplist2 a]
@@ -32,13 +31,13 @@ start_server {
assert_equal {} [r lindex myziplist2 3]
assert_equal a [r rpop myziplist2]
assert_equal c [r lpop myziplist2]
- assert_encoding ziplist myziplist2
+ assert_encoding quicklist myziplist2
}
test {LPUSH, RPUSH, LLENGTH, LINDEX, LPOP - regular list} {
# first lpush then rpush
assert_equal 1 [r lpush mylist1 $largevalue(linkedlist)]
- assert_encoding linkedlist mylist1
+ assert_encoding quicklist mylist1
assert_equal 2 [r rpush mylist1 b]
assert_equal 3 [r rpush mylist1 c]
assert_equal 3 [r llen mylist1]
@@ -51,7 +50,7 @@ start_server {
# first rpush then lpush
assert_equal 1 [r rpush mylist2 $largevalue(linkedlist)]
- assert_encoding linkedlist mylist2
+ assert_encoding quicklist mylist2
assert_equal 2 [r lpush mylist2 b]
assert_equal 3 [r lpush mylist2 c]
assert_equal 3 [r llen mylist2]
@@ -74,34 +73,22 @@ start_server {
assert_equal {d c b a 0 1 2 3} [r lrange mylist 0 -1]
}
- test {DEL a list - ziplist} {
- assert_equal 1 [r del myziplist2]
- assert_equal 0 [r exists myziplist2]
- assert_equal 0 [r llen myziplist2]
- }
-
- test {DEL a list - regular list} {
+ test {DEL a list} {
assert_equal 1 [r del mylist2]
assert_equal 0 [r exists mylist2]
assert_equal 0 [r llen mylist2]
}
- proc create_ziplist {key entries} {
- r del $key
- foreach entry $entries { r rpush $key $entry }
- assert_encoding ziplist $key
- }
-
- proc create_linkedlist {key entries} {
+ proc create_list {key entries} {
r del $key
foreach entry $entries { r rpush $key $entry }
- assert_encoding linkedlist $key
+ assert_encoding quicklist $key
}
foreach {type large} [array get largevalue] {
test "BLPOP, BRPOP: single existing list - $type" {
set rd [redis_deferring_client]
- create_$type blist "a b $large c d"
+ create_list blist "a b $large c d"
$rd blpop blist 1
assert_equal {blist a} [$rd read]
@@ -116,8 +103,8 @@ start_server {
test "BLPOP, BRPOP: multiple existing lists - $type" {
set rd [redis_deferring_client]
- create_$type blist1 "a $large c"
- create_$type blist2 "d $large f"
+ create_list blist1 "a $large c"
+ create_list blist2 "d $large f"
$rd blpop blist1 blist2 1
assert_equal {blist1 a} [$rd read]
@@ -137,7 +124,7 @@ start_server {
test "BLPOP, BRPOP: second list has an entry - $type" {
set rd [redis_deferring_client]
r del blist1
- create_$type blist2 "d $large f"
+ create_list blist2 "d $large f"
$rd blpop blist1 blist2 1
assert_equal {blist2 d} [$rd read]
@@ -151,7 +138,7 @@ start_server {
r del target
set rd [redis_deferring_client]
- create_$type blist "a b $large c d"
+ create_list blist "a b $large c d"
$rd brpoplpush blist target 1
assert_equal d [$rd read]
@@ -517,28 +504,28 @@ start_server {
foreach {type large} [array get largevalue] {
test "LPUSHX, RPUSHX - $type" {
- create_$type xlist "$large c"
+ create_list xlist "$large c"
assert_equal 3 [r rpushx xlist d]
assert_equal 4 [r lpushx xlist a]
assert_equal "a $large c d" [r lrange xlist 0 -1]
}
test "LINSERT - $type" {
- create_$type xlist "a $large c d"
- assert_equal 5 [r linsert xlist before c zz]
- assert_equal "a $large zz c d" [r lrange xlist 0 10]
- assert_equal 6 [r linsert xlist after c yy]
- assert_equal "a $large zz c yy d" [r lrange xlist 0 10]
- assert_equal 7 [r linsert xlist after d dd]
- assert_equal -1 [r linsert xlist after bad ddd]
- assert_equal "a $large zz c yy d dd" [r lrange xlist 0 10]
- assert_equal 8 [r linsert xlist before a aa]
- assert_equal -1 [r linsert xlist before bad aaa]
- assert_equal "aa a $large zz c yy d dd" [r lrange xlist 0 10]
+ create_list xlist "a $large c d"
+ assert_equal 5 [r linsert xlist before c zz] "before c"
+ assert_equal "a $large zz c d" [r lrange xlist 0 10] "lrangeA"
+ assert_equal 6 [r linsert xlist after c yy] "after c"
+ assert_equal "a $large zz c yy d" [r lrange xlist 0 10] "lrangeB"
+ assert_equal 7 [r linsert xlist after d dd] "after d"
+ assert_equal -1 [r linsert xlist after bad ddd] "after bad"
+ assert_equal "a $large zz c yy d dd" [r lrange xlist 0 10] "lrangeC"
+ assert_equal 8 [r linsert xlist before a aa] "before a"
+ assert_equal -1 [r linsert xlist before bad aaa] "before bad"
+ assert_equal "aa a $large zz c yy d dd" [r lrange xlist 0 10] "lrangeD"
# check inserting integer encoded value
- assert_equal 9 [r linsert xlist before aa 42]
- assert_equal 42 [r lrange xlist 0 0]
+ assert_equal 9 [r linsert xlist before aa 42] "before aa"
+ assert_equal 42 [r lrange xlist 0 0] "lrangeE"
}
}
@@ -547,55 +534,7 @@ start_server {
set e
} {*ERR*syntax*error*}
- test {LPUSHX, RPUSHX convert from ziplist to list} {
- set large $largevalue(linkedlist)
-
- # convert when a large value is pushed
- create_ziplist xlist a
- assert_equal 2 [r rpushx xlist $large]
- assert_encoding linkedlist xlist
- create_ziplist xlist a
- assert_equal 2 [r lpushx xlist $large]
- assert_encoding linkedlist xlist
-
- # convert when the length threshold is exceeded
- create_ziplist xlist [lrepeat 256 a]
- assert_equal 257 [r rpushx xlist b]
- assert_encoding linkedlist xlist
- create_ziplist xlist [lrepeat 256 a]
- assert_equal 257 [r lpushx xlist b]
- assert_encoding linkedlist xlist
- }
-
- test {LINSERT convert from ziplist to list} {
- set large $largevalue(linkedlist)
-
- # convert when a large value is inserted
- create_ziplist xlist a
- assert_equal 2 [r linsert xlist before a $large]
- assert_encoding linkedlist xlist
- create_ziplist xlist a
- assert_equal 2 [r linsert xlist after a $large]
- assert_encoding linkedlist xlist
-
- # convert when the length threshold is exceeded
- create_ziplist xlist [lrepeat 256 a]
- assert_equal 257 [r linsert xlist before a a]
- assert_encoding linkedlist xlist
- create_ziplist xlist [lrepeat 256 a]
- assert_equal 257 [r linsert xlist after a a]
- assert_encoding linkedlist xlist
-
- # don't convert when the value could not be inserted
- create_ziplist xlist [lrepeat 256 a]
- assert_equal -1 [r linsert xlist before foo a]
- assert_encoding ziplist xlist
- create_ziplist xlist [lrepeat 256 a]
- assert_equal -1 [r linsert xlist after foo a]
- assert_encoding ziplist xlist
- }
-
- foreach {type num} {ziplist 250 linkedlist 500} {
+ foreach {type num} {quicklist 250 quicklist 500} {
proc check_numbered_list_consistency {key} {
set len [r llen $key]
for {set i 0} {$i < $len} {incr i} {
@@ -664,16 +603,16 @@ start_server {
foreach {type large} [array get largevalue] {
test "RPOPLPUSH base case - $type" {
r del mylist1 mylist2
- create_$type mylist1 "a $large c d"
+ create_list mylist1 "a $large c d"
assert_equal d [r rpoplpush mylist1 mylist2]
assert_equal c [r rpoplpush mylist1 mylist2]
assert_equal "a $large" [r lrange mylist1 0 -1]
assert_equal "c d" [r lrange mylist2 0 -1]
- assert_encoding ziplist mylist2
+ assert_encoding quicklist mylist2
}
test "RPOPLPUSH with the same list as src and dst - $type" {
- create_$type mylist "a $large c"
+ create_list mylist "a $large c"
assert_equal "a $large c" [r lrange mylist 0 -1]
assert_equal c [r rpoplpush mylist mylist]
assert_equal "c a $large" [r lrange mylist 0 -1]
@@ -681,8 +620,8 @@ start_server {
foreach {othertype otherlarge} [array get largevalue] {
test "RPOPLPUSH with $type source and existing target $othertype" {
- create_$type srclist "a b c $large"
- create_$othertype dstlist "$otherlarge"
+ create_list srclist "a b c $large"
+ create_list dstlist "$otherlarge"
assert_equal $large [r rpoplpush srclist dstlist]
assert_equal c [r rpoplpush srclist dstlist]
assert_equal "a b" [r lrange srclist 0 -1]
@@ -691,7 +630,7 @@ start_server {
# When we rpoplpush'ed a large value, dstlist should be
# converted to the same encoding as srclist.
if {$type eq "linkedlist"} {
- assert_encoding linkedlist dstlist
+ assert_encoding quicklist dstlist
}
}
}
@@ -713,7 +652,7 @@ start_server {
}
test {RPOPLPUSH against non list dst key} {
- create_ziplist srclist {a b c d}
+ create_list srclist {a b c d}
r set dstlist x
assert_error WRONGTYPE* {r rpoplpush srclist dstlist}
assert_type string dstlist
@@ -727,7 +666,7 @@ start_server {
foreach {type large} [array get largevalue] {
test "Basic LPOP/RPOP - $type" {
- create_$type mylist "$large 1 2"
+ create_list mylist "$large 1 2"
assert_equal $large [r lpop mylist]
assert_equal 2 [r rpop mylist]
assert_equal 1 [r lpop mylist]
@@ -745,7 +684,7 @@ start_server {
assert_error WRONGTYPE* {r rpop notalist}
}
- foreach {type num} {ziplist 250 linkedlist 500} {
+ foreach {type num} {quicklist 250 quicklist 500} {
test "Mass RPOP/LPOP - $type" {
r del mylist
set sum1 0
@@ -765,24 +704,24 @@ start_server {
foreach {type large} [array get largevalue] {
test "LRANGE basics - $type" {
- create_$type mylist "$large 1 2 3 4 5 6 7 8 9"
+ create_list mylist "$large 1 2 3 4 5 6 7 8 9"
assert_equal {1 2 3 4 5 6 7 8} [r lrange mylist 1 -2]
assert_equal {7 8 9} [r lrange mylist -3 -1]
assert_equal {4} [r lrange mylist 4 4]
}
test "LRANGE inverted indexes - $type" {
- create_$type mylist "$large 1 2 3 4 5 6 7 8 9"
+ create_list mylist "$large 1 2 3 4 5 6 7 8 9"
assert_equal {} [r lrange mylist 6 2]
}
test "LRANGE out of range indexes including the full list - $type" {
- create_$type mylist "$large 1 2 3"
+ create_list mylist "$large 1 2 3"
assert_equal "$large 1 2 3" [r lrange mylist -1000 1000]
}
test "LRANGE out of range negative end index - $type" {
- create_$type mylist "$large 1 2 3"
+ create_list mylist "$large 1 2 3"
assert_equal $large [r lrange mylist 0 -4]
assert_equal {} [r lrange mylist 0 -5]
}
@@ -796,7 +735,7 @@ start_server {
proc trim_list {type min max} {
upvar 1 large large
r del mylist
- create_$type mylist "1 2 3 4 $large"
+ create_list mylist "1 2 3 4 $large"
r ltrim mylist $min $max
r lrange mylist 0 -1
}
@@ -825,7 +764,7 @@ start_server {
foreach {type large} [array get largevalue] {
test "LSET - $type" {
- create_$type mylist "99 98 $large 96 95"
+ create_list mylist "99 98 $large 96 95"
r lset mylist 1 foo
r lset mylist -1 bar
assert_equal "99 foo $large 96 bar" [r lrange mylist 0 -1]
@@ -847,7 +786,7 @@ start_server {
foreach {type e} [array get largevalue] {
test "LREM remove all the occurrences - $type" {
- create_$type mylist "$e foo bar foobar foobared zap bar test foo"
+ create_list mylist "$e foo bar foobar foobared zap bar test foo"
assert_equal 2 [r lrem mylist 0 bar]
assert_equal "$e foo foobar foobared zap test foo" [r lrange mylist 0 -1]
}
@@ -863,7 +802,7 @@ start_server {
}
test "LREM starting from tail with negative count - $type" {
- create_$type mylist "$e foo bar foobar foobared zap bar test foo foo"
+ create_list mylist "$e foo bar foobar foobared zap bar test foo foo"
assert_equal 1 [r lrem mylist -1 bar]
assert_equal "$e foo bar foobar foobared zap test foo foo" [r lrange mylist 0 -1]
}
@@ -874,7 +813,7 @@ start_server {
}
test "LREM deleting objects that may be int encoded - $type" {
- create_$type myotherlist "$e 1 2 3"
+ create_list myotherlist "$e 1 2 3"
assert_equal 1 [r lrem myotherlist 1 2]
assert_equal 3 [r llen myotherlist]
}
diff --git a/tests/unit/type/set.tcl b/tests/unit/type/set.tcl
index 162de0af7..74a8fb318 100644
--- a/tests/unit/type/set.tcl
+++ b/tests/unit/type/set.tcl
@@ -293,6 +293,13 @@ start_server {
assert_equal 0 [r scard myset]
}
+ test "SPOP with <count>=1 - $type" {
+ create_set myset $contents
+ assert_encoding $type myset
+ assert_equal $contents [lsort [list [r spop myset 1] [r spop myset 1] [r spop myset 1]]]
+ assert_equal 0 [r scard myset]
+ }
+
test "SRANDMEMBER - $type" {
create_set myset $contents
unset -nocomplain myset
@@ -304,6 +311,41 @@ start_server {
}
}
+ foreach {type contents} {
+ hashtable {a b c d e f g h i j k l m n o p q r s t u v w x y z}
+ intset {1 10 11 12 13 14 15 16 17 18 19 2 20 21 22 23 24 25 26 3 4 5 6 7 8 9}
+ } {
+ test "SPOP with <count>" {
+ create_set myset $contents
+ assert_encoding $type myset
+ assert_equal $contents [lsort [concat [r spop myset 11] [r spop myset 9] [r spop myset 0] [r spop myset 4] [r spop myset 1] [r spop myset 0] [r spop myset 1] [r spop myset 0]]]
+ assert_equal 0 [r scard myset]
+ }
+ }
+
+ # As seen in intsetRandomMembers
+ test "SPOP using integers, testing Knuth's and Floyd's algorithm" {
+ create_set myset {1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20}
+ assert_encoding intset myset
+ assert_equal 20 [r scard myset]
+ r spop myset 1
+ assert_equal 19 [r scard myset]
+ r spop myset 2
+ assert_equal 17 [r scard myset]
+ r spop myset 3
+ assert_equal 14 [r scard myset]
+ r spop myset 10
+ assert_equal 4 [r scard myset]
+ r spop myset 10
+ assert_equal 0 [r scard myset]
+ r spop myset 1
+ assert_equal 0 [r scard myset]
+ } {}
+
+ test "SPOP using integers with Knuth's algorithm" {
+ r spop nonexisting_key 100
+ } {}
+
test "SRANDMEMBER with <count> against non existing key" {
r srandmember nonexisting_key 100
} {}
diff --git a/utils/cluster_fail_time.tcl b/utils/cluster_fail_time.tcl
new file mode 100644
index 000000000..87399495f
--- /dev/null
+++ b/utils/cluster_fail_time.tcl
@@ -0,0 +1,50 @@
+# This simple script is used in order to estimate the average PFAIL->FAIL
+# state switch after a failure.
+
+set ::sleep_time 10 ; # How much to sleep to trigger PFAIL.
+set ::fail_port 30016 ; # Node to put in sleep.
+set ::other_port 30001 ; # Node to use to monitor the flag switch.
+
+proc avg vector {
+ set sum 0.0
+ foreach x $vector {
+ set sum [expr {$sum+$x}]
+ }
+ expr {$sum/[llength $vector]}
+}
+
+set samples {}
+while 1 {
+ exec redis-cli -p $::fail_port debug sleep $::sleep_time > /dev/null &
+
+ # Wait for fail? to appear.
+ while 1 {
+ set output [exec redis-cli -p $::other_port cluster nodes]
+ if {[string match {*fail\?*} $output]} break
+ after 100
+ }
+
+ puts "FAIL?"
+ set start [clock milliseconds]
+
+ # Wait for fail? to disappear.
+ while 1 {
+ set output [exec redis-cli -p $::other_port cluster nodes]
+ if {![string match {*fail\?*} $output]} break
+ after 100
+ }
+
+ puts "FAIL"
+ set now [clock milliseconds]
+ set elapsed [expr {$now-$start}]
+ puts $elapsed
+ lappend samples $elapsed
+
+ puts "AVG([llength $samples]): [avg $samples]"
+
+ # Wait for the instance to be available again.
+ exec redis-cli -p $::fail_port ping
+
+ # Wait for the fail flag to be cleared.
+ after 2000
+}
diff --git a/utils/create-cluster/.gitignore b/utils/create-cluster/.gitignore
new file mode 100644
index 000000000..cdd7c19c8
--- /dev/null
+++ b/utils/create-cluster/.gitignore
@@ -0,0 +1 @@
+config.sh
diff --git a/utils/create-cluster/README b/utils/create-cluster/README
new file mode 100644
index 000000000..1f43748ee
--- /dev/null
+++ b/utils/create-cluster/README
@@ -0,0 +1,27 @@
+Create-custer is a small script used to easily start a big number of Redis
+instances configured to run in cluster mode. Its main goal is to allow manual
+testing in a condition which is not easy to replicate with the Redis cluster
+unit tests, for example when a lot of instances are needed in order to trigger
+a give bug.
+
+The tool can also be used just to easily create a number of instances in a
+Redis Cluster in order to experiment a bit with the system.
+
+USAGE
+---
+
+To create a cluster, follow this steps:
+
+1. Edit create-cluster and change the start / end port, depending on the
+number of instances you want to create.
+2. Use "./create-cluster start" in order to run the instances.
+3. Use "./create-cluster create" in order to execute redis-trib create, so that
+an actual Redis cluster will be created.
+4. Now you are ready to play with the cluster. AOF files and logs for each instances are created in the current directory.
+
+In order to stop a cluster:
+
+1. Use "./craete-cluster stop" to stop all the instances. After you stopped the instances you can use "./create-cluster start" to restart them if you change ideas.
+2. Use "./create-cluster clean" to remove all the AOF / log files to restat with a clean environment.
+
+Use the command "./create-cluster help" to get the full list of features.
diff --git a/utils/create-cluster/create-cluster b/utils/create-cluster/create-cluster
new file mode 100755
index 000000000..efb3135d4
--- /dev/null
+++ b/utils/create-cluster/create-cluster
@@ -0,0 +1,95 @@
+#!/bin/bash
+
+# Settings
+PORT=30000
+TIMEOUT=2000
+NODES=6
+REPLICAS=1
+
+# You may want to put the above config parameters into config.sh in order to
+# override the defaults without modifying this script.
+
+if [ -a config.sh ]
+then
+ source "config.sh"
+fi
+
+# Computed vars
+ENDPORT=$((PORT+NODES))
+
+if [ "$1" == "start" ]
+then
+ while [ $((PORT < ENDPORT)) != "0" ]; do
+ PORT=$((PORT+1))
+ echo "Starting $PORT"
+ ../../src/redis-server --port $PORT --cluster-enabled yes --cluster-config-file nodes-${PORT}.conf --cluster-node-timeout $TIMEOUT --appendonly yes --appendfilename appendonly-${PORT}.aof --dbfilename dump-${PORT}.rdb --logfile ${PORT}.log --daemonize yes
+ done
+ exit 0
+fi
+
+if [ "$1" == "create" ]
+then
+ HOSTS=""
+ while [ $((PORT < ENDPORT)) != "0" ]; do
+ PORT=$((PORT+1))
+ HOSTS="$HOSTS 127.0.0.1:$PORT"
+ done
+ ../../src/redis-trib.rb create --replicas $REPLICAS $HOSTS
+ exit 0
+fi
+
+if [ "$1" == "stop" ]
+then
+ while [ $((PORT < ENDPORT)) != "0" ]; do
+ PORT=$((PORT+1))
+ echo "Stopping $PORT"
+ redis-cli -p $PORT shutdown nosave
+ done
+ exit 0
+fi
+
+if [ "$1" == "watch" ]
+then
+ PORT=$((PORT+1))
+ while [ 1 ]; do
+ clear
+ date
+ redis-cli -p $PORT cluster nodes | head -30
+ sleep 1
+ done
+ exit 0
+fi
+
+if [ "$1" == "tail" ]
+then
+ INSTANCE=$2
+ PORT=$((PORT+INSTANCE))
+ tail -f ${PORT}.log
+ exit 0
+fi
+
+if [ "$1" == "call" ]
+then
+ while [ $((PORT < ENDPORT)) != "0" ]; do
+ PORT=$((PORT+1))
+ ../../src/redis-cli -p $PORT $2 $3 $4 $5 $6 $7 $8 $9
+ done
+ exit 0
+fi
+
+if [ "$1" == "clean" ]
+then
+ rm -rf *.log
+ rm -rf appendonly*.aof
+ rm -rf dump*.rdb
+ rm -rf nodes*.conf
+ exit 0
+fi
+
+echo "Usage: $0 [start|create|stop|watch|tail|clean]"
+echo "start -- Launch Redis Cluster instances."
+echo "create -- Create a cluster using redis-trib create."
+echo "stop -- Stop Redis Cluster instances."
+echo "watch -- Show CLUSTER NODES output (first 30 lines) of first node."
+echo "tail <id> -- Run tail -f of instance at base port + ID."
+echo "clean -- Remove all instances data, logs, configs."
diff --git a/utils/redis_init_script.tpl b/utils/redis_init_script.tpl
index d65086312..2e5b61301 100755
--- a/utils/redis_init_script.tpl
+++ b/utils/redis_init_script.tpl
@@ -26,11 +26,12 @@ case "$1" in
fi
;;
status)
- if [ ! -f $PIDFILE ]
+ PID=$(cat $PIDFILE)
+ if [ ! -x /proc/${PID} ]
then
echo 'Redis is not running'
else
- echo "Redis is running ($(<$PIDFILE))"
+ echo "Redis is running ($PID)"
fi
;;
restart)
diff --git a/utils/whatisdoing.sh b/utils/whatisdoing.sh
index 8f441cfc0..e4059caed 100755
--- a/utils/whatisdoing.sh
+++ b/utils/whatisdoing.sh
@@ -1,9 +1,15 @@
# This script is from http://poormansprofiler.org/
+#
+# NOTE: Instead of using this script, you should use the Redis
+# Software Watchdog, which provides a similar functionality but in
+# a more reliable / easy to use way.
+#
+# Check http://redis.io/topics/latency for more information.
#!/bin/bash
nsamples=1
sleeptime=0
-pid=$(pidof redis-server)
+pid=$(ps auxww | grep '[r]edis-server' | awk '{print $2}')
for x in $(seq 1 $nsamples)
do